How to count collisions in unordered_set c++

The functions you use do not provide collision counts, you may like to read their documentation on

One way to calculate the bucket collision statistics is to examine the number of elements in each bucket:

struct BucketStats {
    size_t occupied = 0;
    size_t total_collisions = 0;
    size_t max_collisions = 0;

    template<class... Args>
    BucketStats(std::unordered_set<Args...> const& c)
        for(auto bucket = c.bucket_count(); bucket--;) {
            auto bucket_size = c.bucket_size(bucket);
            occupied += bucket_size > 0;
            if(bucket_size > 1) {
                auto collisions = bucket_size - 1;
                total_collisions += collisions;
                max_collisions = std::max(max_collisions, collisions);

    double avg_collisions() const {
        return occupied ? static_cast<double>(total_collisions) / occupied : 0;

    friend std::ostream& operator<<(std::ostream& s, BucketStats const& b) {
        return s
            << "used buckets: " << b.occupied
            << "; total collisions: " << b.total_collisions
            << "; max collisions in a bucket: " << b.max_collisions
            << "; avg collisions per bucket: " << b.avg_collisions()

// ...

    std::cout << BucketStats(a) << '\n';
    std::cout << BucketStats(b) << '\n';


used buckets: 1; total collisions: 9999; max collisions in a bucket: 9999; avg collisions per bucket: 9999
used buckets: 10000; total collisions: 0; max collisions in a bucket: 0; avg collisions per bucket: 0

std::unordered_map will increase bucket_count in an attempt to keep load_factor near max_load_factor.

That means that bucket_count depends only on the number of elements in the map, and is unaffected by the number of collisions.

To check for collisions, count all elements that have a bucket size > 1.

size_t collisions = 0, empty = 0;
for (auto bucket = a.bucket_count(); bucket--;) {
    if (a.bucket_size(bucket) == 0)
        collisions += a.bucket_size(bucket) - 1;
std::cout << "a = " << a.max_load_factor() << ' ' << a.load_factor() << ' '
    << ' ' << a.bucket_count() << ' ' << collisions << ' ' << empty << '\n';
empty = 0, collisions = 0;
for (auto bucket = b.bucket_count(); bucket--;) {
    if (b.bucket_size(bucket) == 0)
        collisions += b.bucket_size(bucket) - 1;
std::cout << "b = " << b.max_load_factor() << ' ' << b.load_factor() << ' '
    << ' ' << b.bucket_count() << ' ' << collisions << ' ' << empty << '\n';


a = 1 0.610352  16384 9999 16383
b = 1 0.610352  16384 4773 11157

That is, with a bad hashing function there are 9999 collisions and 16383 out of 16384 empty buckets.

Unrelated: if you care about hash table performance, have a look at dense_hash_map, which implements linear probing for much better performance.