Jetzt habe ich selber gerechnet... und erhalte immer, dass 0 am seltensten gezogen wird und 13 am häufigsten. Die übrigen Werte scheinen normal. Kann das jemand bestätigen? Ist das dieser berühmt berüchtigte "Bias", von dem hier schon oft die Rede war?
Edit: Code noch einmal angepasst und Kommentare hinzugefügt.
C++:
#include <cstdint>
#include <cmath>
#include <chrono>
#include <iostream>
#include <iomanip>
#include <vector>
#include <algorithm>
#include <numeric>
#include <random>
#include <unordered_map>
using namespace std;
template <typename E>
void select(const vector<E> &in, vector<E> &out, const uint32_t n)
{
if (n > in.size())
{
throw invalid_argument("n cannot be greater than the size of input vector");
}
out.clear();
out.reserve(n);
if (n == 0)
{
return;
}
static auto gen = default_random_engine{random_device{}()};
uint32_t left = n;
const double offset = (double)in.size() / n;
const double step = offset * 2.0;
double last = -offset;
auto dist = uniform_real_distribution<double>(0, step);
while (left > 0)
{
int32_t next = round(last + dist(gen));
if (next < 0 || next >= in.size())
continue;
out.push_back(in[next]);
last = (n - left--) * offset + offset;
}
}
void test_select()
{
const double frac = 0.1; // selection fraction
const uint32_t n1 = 1000; // input size
const uint32_t n2 = 150000; // number of selections
const uint32_t n3 = round(n1 * frac); // selection size
const uint32_t n4 = round(n1 * frac * n2); // total selections
const uint32_t n5 = round(n1 * frac * n2 / n1); // expected average selections per item
unordered_map<uint32_t, uint32_t> counts;
vector<uint32_t> in(n1);
iota(begin(in), end(in), 0);
for (uint32_t i = 0; i < n2; i++)
{
vector<uint32_t> out = {};
select(in, out, n3);
for (const auto &item : out)
{
counts[item]++;
}
}
vector<pair<uint32_t, uint32_t>> pairs;
for (uint32_t i = 0; i < in.size(); i++)
{
pairs.push_back({i, counts[i]});
}
sort(pairs.begin(), pairs.end(), [=](std::pair<uint32_t, uint32_t> &a, std::pair<uint32_t, uint32_t> &b)
{ return a.second < b.second; });
cout << pairs.front().first << " with " << pairs.front().second << endl;
cout << pairs.back().first << " with " << pairs.back().second << endl;
uint32_t min = n5 * 0.95, max = n5 * 1.05;
uint32_t count = 0;
for (const auto &pair : counts)
{
if (pair.second >= min && pair.second <= max)
{
count++;
}
}
cout << "[" << min << "," << max << "] = " << (double)count / n1 * 100.0 << " %" << endl;
}
int main()
{
test_select();
return 0;
}
Edit: Code noch einmal angepasst und Kommentare hinzugefügt.
Zuletzt bearbeitet: