Sampling a random subset from an array

In my opinion, I do not think shuffling the entire deck necessary. You just need to make sure your sample is random not your deck. What you can do, is select the size amount from the front then swap each one in the sampling array with another position in it. So, if you allow replacement you get more and more shuffled.

function getRandom(length) { return Math.floor(Math.random()*(length)); }

function getRandomSample(array, size) {
    var length = array.length;

    for(var i = size; i--;) {
        var index = getRandom(length);
        var temp = array[index];
        array[index] = array[i];
        array[i] = temp;
    }

    return array.slice(0, size);
}

This algorithm is only 2*size steps, if you include the slice method, to select the random sample.

More Random

To make the sample more random, we can randomly select the starting point of the sample. But it is a little more expensive to get the sample.

function getRandomSample(array, size) {
    var length = array.length, start = getRandom(length);

    for(var i = size; i--;) {
        var index = (start + i)%length, rindex = getRandom(length);
        var temp = array[rindex];
        array[rindex] = array[index];
        array[index] = temp;
    }
    var end = start + size, sample = array.slice(start, end);
    if(end > length)
        sample = sample.concat(array.slice(0, end - length));
    return sample;
}

What makes this more random is the fact that when you always just shuffling the front items you tend to not get them very often in the sample if the sampling array is large and the sample is small. This would not be a problem if the array was not supposed to always be the same. So, what this method does is change up this position where the shuffled region starts.

No Replacement

To not have to copy the sampling array and not worry about replacement, you can do the following but it does give you 3*size vs the 2*size.

function getRandomSample(array, size) {
    var length = array.length, swaps = [], i = size, temp;

    while(i--) {
        var rindex = getRandom(length);
        temp = array[rindex];
        array[rindex] = array[i];
        array[i] = temp;
        swaps.push({ from: i, to: rindex });
    }

    var sample = array.slice(0, size);

    // Put everything back.
    i = size;
    while(i--) {
         var pop = swaps.pop();
         temp = array[pop.from];
         array[pop.from] = array[pop.to];
         array[pop.to] = temp;
    }

    return sample;
}

No Replacement and More Random

To apply the algorithm that gave a little bit more random samples to the no replacement function:

function getRandomSample(array, size) {
    var length = array.length, start = getRandom(length),
        swaps = [], i = size, temp;

    while(i--) {
        var index = (start + i)%length, rindex = getRandom(length);
        temp = array[rindex];
        array[rindex] = array[index];
        array[index] = temp;
        swaps.push({ from: index, to: rindex });
    }

    var end = start + size, sample = array.slice(start, end);
    if(end > length)
        sample = sample.concat(array.slice(0, end - length));

    // Put everything back.
    i = size;
    while(i--) {
         var pop = swaps.pop();
         temp = array[pop.from];
         array[pop.from] = array[pop.to];
         array[pop.to] = temp;
    }

    return sample;
}

Faster...

Like all of these post, this uses the Fisher-Yates Shuffle. But, I removed the over head of copying the array.

function getRandomSample(array, size) {
    var r, i = array.length, end = i - size, temp, swaps = getRandomSample.swaps;

    while (i-- > end) {
        r = getRandom(i + 1);
        temp = array[r];
        array[r] = array[i];
        array[i] = temp;
        swaps.push(i);
        swaps.push(r);
    }

    var sample = array.slice(end);

    while(size--) {
        i = swaps.pop();
        r = swaps.pop();
        temp = array[i];
        array[i] = array[r];
        array[r] = temp;
    }

    return sample;
}
getRandomSample.swaps = [];

I suggest shuffling a copy of the array using the Fisher-Yates shuffle and taking a slice:

function getRandomSubarray(arr, size) {
    var shuffled = arr.slice(0), i = arr.length, temp, index;
    while (i--) {
        index = Math.floor((i + 1) * Math.random());
        temp = shuffled[index];
        shuffled[index] = shuffled[i];
        shuffled[i] = temp;
    }
    return shuffled.slice(0, size);
}

var x = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15];
var fiveRandomMembers = getRandomSubarray(x, 5);

Note that this will not be the most efficient method for getting a small random subset of a large array because it shuffles the whole array unnecessarily. For better performance you could do a partial shuffle instead:

function getRandomSubarray(arr, size) {
    var shuffled = arr.slice(0), i = arr.length, min = i - size, temp, index;
    while (i-- > min) {
        index = Math.floor((i + 1) * Math.random());
        temp = shuffled[index];
        shuffled[index] = shuffled[i];
        shuffled[i] = temp;
    }
    return shuffled.slice(min);
}

Or... if you use underscore.js...

_und = require('underscore');

...

function sample(a, n) {
    return _und.take(_und.shuffle(a), n);
}

Simple enough.

A little late to the party but this could be solved with underscore's new sample method (underscore 1.5.2 - Sept 2013):

var x = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15];

var randomFiveNumbers = _.sample(x, 5);

Sampling a random subset from an array

More Random

No Replacement

No Replacement and More Random

Faster...

Tags:

Javascript

Arrays

Random

Numerical Methods

Related

Recent Posts