/* Examine density of expanding hash implementations. */
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <limits.h>
#include <stdbool.h>
#include <err.h>
#include <assert.h>

static size_t allocated;
static unsigned int levels[65];

/* Strategies when a group fills: */
enum explode_mode {
	/* make a new subhash for all of them. */
	ALL,
	/* make a new subhash for the most popular. */
	DENSEST,
	/* make a new subhash for the one just inserted. */
	INSERTED,
	/* share a subhash, unshare once a group fills. */
	SPLIT_ONCE,
	/* share a subhash, halve sharing on group fill. */
	SPLIT_LOG,
	INVALID,
};

static uint64_t *alloc_level(unsigned int bits_per_level)
{
	allocated += sizeof(uint64_t) << bits_per_level;
	return calloc(1 << bits_per_level, sizeof(uint64_t));
}

static void free_level(uint64_t *level, unsigned int bits_per_level)
{
	allocated -= sizeof(uint64_t) << bits_per_level;
	free(level);
}

static bool is_shared(uint64_t prev_htable[], unsigned prev_bucket,
		      unsigned groupsize)
{
	unsigned i, start;
	if (!prev_htable || groupsize == 1)
		return false;

	start = (prev_bucket - (prev_bucket % groupsize));
	for (i = start; i < start + groupsize; i++) {
		if (i != prev_bucket
		    && prev_htable[i] == prev_htable[prev_bucket])
			return true;
	}
	return false;
}

static unsigned int happy, unhappy;

static void insert(uint64_t htable[],
		   unsigned toplevel_bits,
		   unsigned sublevel_bits, unsigned groupsize,
		   uint64_t hash, unsigned level, enum explode_mode exmode,
		   uint64_t prev_htable[], unsigned prev_bucket)
{
	uint64_t *subtable, vals[groupsize];
	unsigned int bucket, start, i, bits_used, thislevel_bits;

	assert(hash && !(hash & (1ULL << 63)));

	if (level) {
		bits_used = toplevel_bits + (level - 1) * sublevel_bits;
		thislevel_bits = sublevel_bits;
	} else {
		bits_used = 0;
		thislevel_bits = toplevel_bits;
	}

	/* Ran out of hash? */
	if (bits_used >= 64) {
		/* Go linear. */
		levels[level]++;
		for (i = 0; i < (1 << sublevel_bits) - 1; i++) {
			if (!htable[i]) {
				htable[i] = hash;
				return;
			}
		}
		errx(1, "Implement linear overflow!");
	}

	bucket = (hash >> bits_used) % (1 << thislevel_bits);
	if (!htable[bucket]) {
		levels[level]++;
		htable[bucket] = hash;
		return;
	}

	if (htable[bucket] & (1ULL << 63)) {
		subtable = (uint64_t *)(intptr_t)(htable[bucket]
						  & ((1ULL << 63)-1));

		insert(subtable, toplevel_bits, sublevel_bits, groupsize,
		       hash, level+1, exmode, htable, bucket);
		return;
	}

	/* Collision: search rest of group. */
	start = (bucket - (bucket % groupsize));
	for (i = 0; i < groupsize; i++) {
		if (htable[start + i] == 0) {
			levels[level]++;
			htable[start + i] = hash;
			return;
		}
	}

	/* Save buckets. */
	memcpy(vals, &htable[start], sizeof(vals));

	if (exmode == ALL) {
		/* Overflow, explode bucket, call ourselves again. */
		for (i = 0; i < groupsize; i++)
			htable[start + i] = ((intptr_t)alloc_level(sublevel_bits) | (1ULL << 63));
		for (i = 0; i < groupsize; i++)
			insert(htable, toplevel_bits, sublevel_bits, groupsize,
			       vals[i], level, exmode, prev_htable, prev_bucket);
	} else if (exmode == DENSEST) {
		/* Explode most popular bucket. */
		unsigned counts[groupsize];
		unsigned best;

		memset(counts, 0, sizeof(counts));
		/* Count the new one. */
		counts[bucket % groupsize]++;
		best = bucket % groupsize;

		/* Count the existing ones. */
		for (i = 0; i < groupsize; i++) {
			unsigned b;
			if (vals[i] & (1ULL << 63)) {
				/* Ignore this: it's expanded already. */
				continue;
			}
			b = (vals[i] >> bits_used) % (1 << thislevel_bits);
			if (b % groupsize == i)
				happy++;
			else
				unhappy++;
			counts[b % groupsize]++;
			if (counts[b % groupsize] > counts[best])
				best = b % groupsize;
			/* Erase from table. */
			htable[start + i] = 0;
		}
		/* Explode most-used bucket. */
		htable[start + best] = ((intptr_t)alloc_level(sublevel_bits) | (1ULL << 63));

		/* Re-add. */
		for (i = 0; i < groupsize; i++) {
			if (!(vals[i] & (1ULL << 63)))
				insert(htable, toplevel_bits, sublevel_bits,
				       groupsize, vals[i], level, exmode,
				       prev_htable, prev_bucket);
		}
	} else if (exmode == INSERTED) {
		/* Explode the one we were going to place into. */
		/* Clear the existing ones. */
		for (i = 0; i < groupsize; i++) {
			if (!(vals[i] & (1ULL << 63))) {
				unsigned b;
				b = (vals[i] >> bits_used) % (1 << thislevel_bits);
				if (b % groupsize == i)
					happy++;
				else
					unhappy++;
				htable[start + i] = 0;
			}
		}
		/* Explode most-used bucket. */
		htable[bucket] = ((intptr_t)alloc_level(sublevel_bits) | (1ULL << 63));
		/* Re-add. */
		for (i = 0; i < groupsize; i++) {
			if (!(vals[i] & (1ULL << 63)))
				insert(htable, toplevel_bits, sublevel_bits,
				       groupsize, vals[i], level, exmode,
				       prev_htable, prev_bucket);
		}
	} else if (!is_shared(prev_htable, prev_bucket, groupsize)) {
		/* Expand all buckets into the same subhash. */
		htable[start] = ((intptr_t)alloc_level(sublevel_bits)
				 | (1ULL << 63));
		for (i = 1; i < groupsize; i++)
			htable[start + i] = htable[start];

		for (i = 0; i < groupsize; i++)
			insert(htable, toplevel_bits, sublevel_bits,
			       groupsize, vals[i], level, exmode,
			       prev_htable, prev_bucket);
	} else {
		if (exmode == SPLIT_ONCE) {
			unsigned int first;

			first = (prev_bucket - (prev_bucket % groupsize));
			/* Explode all upper buckets. */
			for (i = 0; i < groupsize; i++)
				prev_htable[first + i]
					= ((intptr_t)alloc_level(sublevel_bits)
					   | (1ULL << 63));
		} else if (exmode == SPLIT_LOG) {
			unsigned int first, num;

			/* Find number of hashes we share with. */
			for (first = prev_bucket; first > 0; first--) {
				if (prev_htable[first-1]
				    != prev_htable[prev_bucket])
					break;
			}

			for (num = 0; num < groupsize; num++) {
				if (prev_htable[first+num]
				    != prev_htable[first])
					break;
			}
			assert(num > 1);

			/* OK, split in half. */
			prev_htable[first]
				= ((intptr_t)alloc_level(sublevel_bits)
				   | (1ULL << 63));
			prev_htable[first + num / 2]
				= ((intptr_t)alloc_level(sublevel_bits)
				   | (1ULL << 63));

			for (i = 0; i < num / 2; i++) {
				prev_htable[first + i] = prev_htable[first];
				prev_htable[first + num/2 + i]
					= prev_htable[first + num/2];
			}
		}

		/* Re-add all our values, one level above. */
		level--;
		for (i = 0; i < (1 << thislevel_bits); i++) {
			if (htable[i])
				insert(prev_htable, toplevel_bits,
				       sublevel_bits, groupsize,
				       htable[i], level, exmode, NULL, 0);
		}
		free_level(htable, thislevel_bits);
		htable = prev_htable;
	}

	/* Now insert the one we wanted! */
	insert(htable, toplevel_bits, sublevel_bits, groupsize, hash, level,
	       exmode, prev_htable, prev_bucket);
}

#define HAVE_BUILTIN_CLZL 1

static unsigned fls64(uint64_t val)
{
#if HAVE_BUILTIN_CLZL
	if (val <= ULONG_MAX) {
		/* This is significantly faster! */
		return val ? sizeof(long) * CHAR_BIT - __builtin_clzl(val) : 0;
	} else {
#endif
	uint64_t r = 64;

	if (!val)
		return 0;
	if (!(val & 0xffffffff00000000ull)) {
		val <<= 32;
		r -= 32;
	}
	if (!(val & 0xffff000000000000ull)) {
		val <<= 16;
		r -= 16;
	}
	if (!(val & 0xff00000000000000ull)) {
		val <<= 8;
		r -= 8;
	}
	if (!(val & 0xf000000000000000ull)) {
		val <<= 4;
		r -= 4;
	}
	if (!(val & 0xc000000000000000ull)) {
		val <<= 2;
		r -= 2;
	}
	if (!(val & 0x8000000000000000ull)) {
		val <<= 1;
		r -= 1;
	}
	return r;
#if HAVE_BUILTIN_CLZL
	}
#endif
}

static uint64_t rand64(void)
{
	unsigned int i;
	uint64_t randbits = 0;

	for (i = 0; i < 64; i += fls64(RAND_MAX)) 
		randbits ^= ((uint64_t)random()) << i;
	return randbits;
}

static bool lookup(uint64_t htable[],
		   unsigned toplevel_bits,
		   unsigned sublevel_bits, unsigned groupsize,
		   uint64_t hash, unsigned level)
{
	uint64_t *subtable;
	unsigned int bucket, start, i, bits_used, thislevel_bits;

	assert(!(hash & (1ULL << 63)));

	if (level) {
		bits_used = toplevel_bits + (level - 1) * sublevel_bits;
		thislevel_bits = sublevel_bits;
	} else {
		bits_used = 0;
		thislevel_bits = toplevel_bits;
	}

	/* Ran out of hash? */
	if (bits_used >= 64) {
		/* Go linear. */
		levels[level]++;
		for (i = 0; i < (1 << sublevel_bits) - 1; i++) {
			if (htable[i] == hash)
				return true;
		}
		return false;
	}

	bucket = (hash >> bits_used) % (1 << thislevel_bits);
	if (htable[bucket] & (1ULL << 63)) {
		subtable = (uint64_t *)(intptr_t)(htable[bucket]
						  & ((1ULL << 63)-1));

		return lookup(subtable, toplevel_bits, sublevel_bits, groupsize,
		       hash, level+1);
	}

	/* Search entire group. */
	start = (bucket - (bucket % groupsize));
	for (i = 0; i < groupsize; i++) {
		if (htable[start + i] == hash)
			return true;
	}
	return false;
}

static unsigned int total_depth(uint64_t *htable,
				unsigned bits_this_level,
				unsigned bits_next_level,
				unsigned int depth)
{
	unsigned int i, total = 0;

	for (i = 0; i < (1 << bits_this_level); i++) {
		if (htable[i] & (1ULL << 63)) {
			uint64_t *subtable;
			subtable = (uint64_t *)(intptr_t)(htable[i]
							  & ((1ULL << 63)-1));
			/* Don't double-account shared hashes. */
			if (i == 0 || htable[i] != htable[i-1]) {
				total += total_depth(subtable, bits_next_level,
						     bits_next_level, depth+1);
			}
		} else if (htable[i])
			total += depth;
	}
	return total;
}

static void print_summary(unsigned int num, double density)
{
	unsigned int i;
	printf("After %i, allocated %zu bytes, density = %lf%%\n",
	       num, allocated, density * 100);

	for (i = 0; i < sizeof(levels)/sizeof(levels[0]); i++) {
		if (levels[i])
			printf("level %u: %u elements\n", i, levels[i]);
	}
}

static enum explode_mode parse_exmode(const char *exmode)
{
	if (strcmp(exmode, "explode-all") == 0) 
		return ALL;
	else if (strcmp(exmode, "explode-current") == 0)
		return INSERTED;
	else if (strcmp(exmode, "explode-densest") == 0)
		return DENSEST;
	else if (strcmp(exmode, "split-once") == 0)
		return SPLIT_ONCE;
	else if (strcmp(exmode, "split-log") == 0)
		return SPLIT_LOG;
	return INVALID;
}

int main(int argc, char *argv[])
{
	unsigned int i, groupsize, toplevel_bits, sublevel_bits, num;
	uint64_t *htable;
	double total_density = 0.0, best_density = 0, worst_density = 1, prev;
	enum explode_mode exmode;

	if (argc != 5
	    || (exmode = parse_exmode(argv[1])) == INVALID
	    || (toplevel_bits = atoi(argv[2])) == 0
	    || (sublevel_bits = atoi(argv[3])) == 0
	    || (groupsize = atoi(argv[4])) == 0) {
		errx(1, "Usage: hash-density <explode-all|explode-current|explode-densest|split-once|split-log> <toplevel-bits> <sublevel-bits> <groupsize>");
	}
	srandom(0);

	htable = alloc_level(toplevel_bits);
	for (i = 0; allocated < 2U*1024*1024*1024; i++) {
		double density;

		insert(htable, toplevel_bits, sublevel_bits, groupsize,
		       rand64() & ((1ULL << 63)-1), 0, exmode, NULL, 0);
		density = (double)i * sizeof(uint64_t) / allocated;

		/* Don't count first inserts. */
		if (i < (1 << toplevel_bits)) {
			prev = density;
			continue;
		}

		total_density += density;
		if (density < worst_density) {
			worst_density = density;
		}
		if (density > best_density) {
			best_density = density;
		}
		if ((int)(density * 100) != (int)(prev * 100))
			printf("After %i, density = %.0f%%\n",
			       i, density * 100);
		prev = density;
#if 0
		if (allocated / (2U*1024*1024*1024 / 5) != report) {
			report = allocated / (2U*1024*1024*1024 / 5);
			print_summary(i, density);
		}
#endif
	}

	srandom(0);
	num = i;
	for (i = 0; i < num; i++) {
		assert(lookup(htable, toplevel_bits, sublevel_bits, groupsize,
			      rand64() & ((1ULL << 63)-1), 0));
	}

	print_summary(i, (double)i * sizeof(uint64_t) / allocated);
	printf("Average depth now = %.2f\n",
	       total_depth(htable, toplevel_bits, sublevel_bits, 0) / (double)i);
	printf("Density: worst/average/best = %.1f%%/%.1f%%/%.1f%%\n",
	       worst_density * 100,
	       total_density / (i - (1 << toplevel_bits)) * 100,
	       best_density * 100);
	printf("%u happy, %u unhappy\n", happy, unhappy);
	return 0;
}
