Count arrays of periods

C#, n=128 in about 2:40

using System;
using System.Collections.Generic;
using System.Linq;

namespace Sandbox
{
    class PPCG137436
    {
        public static void Main(string[] args)
        {
            if (args.Length == 0) args = new string[] { "1", "2", "4", "8", "16", "32", "64", "128" };

            foreach (string arg in args)
            {
                Console.WriteLine(Count(new int[(int)(0.5 + Math.Log(int.Parse(arg)) / Math.Log(2))], 0));
            }
        }

        static int Count(int[] periods, int idx)
        {
            if (idx == periods.Length)
            {
                //Console.WriteLine(string.Join(", ", periods));
                return 1;
            }

            int count = 0;
            int p = idx == 0 ? 1 : periods[idx - 1];
            for (int q = p; q <= 1 << (idx + 1); q++)
            {
                periods[idx] = q;
                if (q == p || q > 1 << idx || p + q - Gcd(p, q) > 1 << idx && UnificationPasses(periods, idx, q)) count += Count(periods, idx + 1);
            }

            return count;
        }

        private static int Gcd(int a, int b)
        {
            while (a > 0) { int tmp = a; a = b % a; b = tmp; }
            return b;
        }

        private static bool UnificationPasses(int[] periods, int idx, int q)
        {
            UnionSet union = new UnionSet(1 << idx);
            for (int i = 0; i <= idx; i++)
            {
                for (int j = 0; j + periods[i] < Math.Min(2 << i, 1 << idx); j++) union.Unify(j, j + periods[i]);
            }

            IDictionary<int, long> rev = new Dictionary<int, long>();
            for (int k = 0; k < 1 << idx; k++) rev[union.Find(k)] = 0L;
            for (int k = 0; k < 1 << idx; k++) rev[union.Find(k)] |= 1L << k;

            long zeroes = rev[union.Find(0)]; // wlog the value at position 0 is 0

            ISet<int> onesIndex = new HashSet<int>();

            // This can be seen as the special case of the next loop where j == -1.
            for (int i = 0; i < idx; i++)
            {
                if (periods[i] == 2 << i) onesIndex.Add((2 << i) - 1);
            }
            for (int j = 0; j < idx - 1 && periods[j] == 2 << j; j++)
            {
                for (int i = j + 1; i < idx; i++)
                {
                    if (periods[i] == 2 << i)
                    {
                        for (int k = (1 << j) + 1; k <= 2 << j; k++) onesIndex.Add((2 << i) - k);
                    }
                }
            }

            for (int i = 1; i < idx; i++)
            {
                if (periods[i] == 1) continue;

                int d = (2 << i) - periods[i];
                long dmask = (1L << d) - 1;
                if (((zeroes >> 1) & (zeroes >> periods[i]) & dmask) == dmask) onesIndex.Add(periods[i] - 1);
            }

            long ones = 0L;
            foreach (var key in onesIndex) ones |= rev[union.Find(key)];

            if ((zeroes & ones) != 0) return false; // Definite contradiction!

            rev.Remove(union.Find(0));
            foreach (var key in onesIndex) rev.Remove(key);

            long[] masks = System.Linq.Enumerable.ToArray(rev.Values);

            int numFilteredMasks = 0;
            long set = 0;
            long M = 0;
            for (int i = 1; i <= idx; i++)
            {
                if (periods[i - 1] == 1) continue;

                // Sort the relevant masks to the start
                if (i == idx) numFilteredMasks = masks.Length; // Minor optimisation: skip the filter because we know we need all the masks
                long filter = (1L << (1 << i)) - 1;
                for (int j = numFilteredMasks; j < masks.Length; j++)
                {
                    if ((masks[j] & filter) != 0)
                    {
                        var tmp = masks[j];
                        masks[j] = masks[numFilteredMasks];
                        masks[numFilteredMasks++] = tmp;
                    }
                }

                // Search for a successful assignment, using the information from the previous search to skip a few initial values in this one.
                set |= (1L << numFilteredMasks) - 1 - M;
                M = (1L << numFilteredMasks) - 1;
                while (true)
                {
                    if (TestAssignment(periods, i, ones, masks, set)) break;
                    if (set == 0) return false; // No suitable assignment found

                    // Gosper's hack with variant to reduce the number of bits on overflow
                    long c = set & -set;
                    long r = set + c;
                    set = (((r ^ set) >> 2) / c) | (r & M);
                }
            }

            return true;
        }

        private static bool TestAssignment(int[] periods, int idx, long ones, long[] masks, long assignment)
        {
            for (int j = 0; j < masks.Length; j++, assignment >>= 1) ones |= masks[j] & -(assignment & 1);
            for (int i = idx - 1; i > 0; i--) // i == 0 is already handled in the unification process.
            {
                if (Period(ones, 2 << i, periods[i - 1]) < periods[i]) return false;
            }

            return true;
        }

        private static int Period(long arr, int n, int min)
        {
            for (int p = min; p <= n; p++)
            {
                // If the bottom n bits have period p then the bottom (n-p) bits equal the bottom (n-p) bits of the integer shifted right p
                long mask = (1L << (n - p)) - 1L;
                if ((arr & mask) == ((arr >> p) & mask)) return p;
            }

            throw new Exception("Unreachable");
        }

        class UnionSet
        {
            private int[] _Lookup;

            public UnionSet(int size)
            {
                _Lookup = new int[size];
                for (int k = 0; k < size; k++) _Lookup[k] = k;
            }

            public int Find(int key)
            {
                var l = _Lookup[key];
                if (l != key) _Lookup[key] = l = Find(l);
                return l;
            }

            public void Unify(int key1, int key2)
            {
                int root1 = Find(key1);
                int root2 = Find(key2);

                if (root1 < root2) _Lookup[root2] = root1;
                else _Lookup[root1] = root2;
            }
        }
    }
}

Extending to n=256 would require switching to BigInteger for the masks, which probably kills the performance too much for n=128 to pass without new ideas, let alone n=256.

Under Linux, compile with mono-csc and execute with mono.

Basic explanation

I'm not going to do a line-by-line dissection, just an overview of the concepts.

As a rule of thumb, I'm happy to iterate through on the order of 250 elements in a brute-force combinatoric program. To get to n=128 it is therefore necessary to use an approach which doesn't analyse every bitstring. So rather than working forwards from bit strings to period sequences, I work backwards: given a period sequence, is there a bitstring which realises it? For n=2x there's an easy upper bound of 2x(x+1)/2 period sequences (vs 22x bitstrings).

Some of the arguments use the string periodicity lemma:

Let p and q be two periods of a string of length n. If p + q ≤ n + gcd(p, q) then gcd(p, q) is also a period of the string.

Wlog I will assume that all of the bitstrings under consideration start with 0.

Given a period sequence [p1 p2 ... pk] where pi is the period of the prefix of length 2i (p0 = 1 always), there are some easy observations about possible values of pk+1:

  • pk+1 ≥ pk since a period of a string S is also a period of any prefix of S.

  • pk+1 = pk is always a possible extension: just repeat the same primitive string for twice as many characters.

  • 2k < pk+1 ≤ 2k+1 is always a possible extension. It suffices to show this for pk+1 = 2k+1 because an aperiodic string of length L can be extended to an aperiodic string of length L+1 by appending any letter which isn't its first letter.

    Take a string Sx of length 2k whose period is pk and consider the string SxyS of length 2k+1. Clearly SxyS has a period 2k+1. Suppose its period q is smaller.

    Then 2k+1 + q ≤ 2k+1+1 ≤ 2k+1 + gcd(2k+1, q) so by the periodicity lemma gcd(2k+1, q) is also a period of SxyS, and since the greatest divisor is less than or equal to its arguments and q is the smallest period, we require q to be a proper factor of 2k+1. Since its quotient cannot be 2, we have q ≤ (2k+1)/3.

    Now, since q ≤ 2k is a period of SxyS it must be a period of Sx. But the period of Sx is pk. We have two cases:

    1. gcd(pk, q) = pk, or equivalently pk divides exactly into q.
    2. pk + q > 2k + gcd(pk, q) so that the periodicity lemma doesn't force a smaller period.

    Consider first the second case. pk > 2k + gcd(pk, q) - q ≥ 2k+1 - q ≥ 2k+1 - (2k+1)/3 ≥ 2q, contradicting the definition of pk as the period of Sx. Therefore we are forced into the conclusion that pk is a factor of q.

    But since q is a period of Sx and pk is the period of Sx, the prefix of length q is just q/pk copies of the prefix of length pk, so we see that pk is also a period of SxyS.

    Therefore the period of SxyS is either pk or 2k+1. But we have two options for y! At most one choice of y will give period pk, so at least one will give period 2k+1. QED.

  • The periodicity lemma allows us to reject some of the remaining possible extensions.

  • Any extension which hasn't passed a quick-accept or a quick-reject test needs to be tested constructively.

The construction of a bitstring given a period sequence is essentially a satisifiability problem, but it has a lot of structure. There are 2k - pk simple equality constraints implied by each prefix period, so I use a union set data structure to combine bits into independent clusters. This was enough to tackle n=64, but for n=128 it was necessary to go further. I employ two useful lines of argument:

  1. If the prefix of length M is 01M-1 and the prefix of length L > M has period L then the prefix of length L must end in 1M. This is most powerful precisely in the cases which would otherwise have most independent clusters, which is convenient.
  2. If the prefix of length M is 0M and the prefix of length L > M has period L - d with d < M and ends in 0d then it must in fact end in 10d. This is most powerful at the opposite extreme, when the period sequence starts with a lot of ones.

If we don't get an immediate contradiction by forcing the cluster with the first bit (assumed to be zero) to be one then we brute force (with some micro-optimisations) over the possible values for the unforced clusters. Note that the order is in descending number of ones because if the ith bit is a one then the period cannot be i and we want to avoid periods which are shorter than the ones which are already enforced by the clustering. Going down increases the chances of finding a valid assignment early.


Rust, 32, 10s 11s 29s on my laptop

Call it with the bitsize as the command line argument.

Clever techniques: represent bitstrings directly as numbers, use bittwiddling to check for cycles. Only search the first half of the bitstrings, those starting with 0, because the array of periods of a bitstring and its inverse (0s swapped for 1s) are identical. If every possibility for the final position has already occured, I don't search it.

More clever stuff:

To deduplicate each block (strings where the first half of the bits are the same) I use a bitvector, which is much faster than a hashset, since the final cycle lengths don't need hashing.

Also, I skip the first steps of the cycle checking, because I know that the final cycle can't be shorter than the second-to-last cycle.

After lots of profiling, I can now tell that almost all of the time is being used productively, so algorithmic improvements will be required to improve from here, I think. I also switched to 32 bit integers to save a little more time.

//extern crate cpuprofiler;
//use cpuprofiler::PROFILER;

extern crate bit_vec;
use bit_vec::BitVec;

use std::collections::HashSet;

fn cycle_len(num: u32, mask: u32, skip_steps: usize) -> usize {
    let mut left = num >> skip_steps;
    let mut mask = mask >> skip_steps;
    let mut steps = skip_steps;
    loop {
        left >>= 1;
        if left == (num & mask) {
            return steps;
        }
        mask >>= 1;
        steps += 1;
    }
}

fn all_cycles(size_log: usize) -> HashSet<Vec<usize>> {
    let mut set = HashSet::new();
    if size_log == 0 {
        set.insert(vec![]);
        return set;
    } else if size_log == 1 {
        set.insert(vec![0]);
        set.insert(vec![1]);
        return set;
    }
    let size: usize = 1 << size_log;
    let half_size: usize = 1 << size_log - 1;
    let shift_and_mask: Vec<(usize, u32)> = (1..size_log)
        .map(|subsize_log| {
            let subsize = 1 << subsize_log;
            (size - subsize, (1 << (subsize - 1)) - 1)
        })
        .collect();
    let size_mask = (1 << (size - 1)) - 1;
    for block in 0..(1 << (half_size - 1)) as u32 {
        let start: u32 = block << half_size;
        if block % 1024 == 0 {
            eprintln!(
                "{} ({:.2}%): {}",
                start,
                start as f64 / (1u64 << size - 1) as f64 * 100f64,
                set.len()
            );
        }
        let leader = {
            let mut cycles = Vec::new();
            for &(shift, mask) in &shift_and_mask {
                let subnum = start >> shift;
                cycles.push(cycle_len(subnum, mask, 0));
            }
            cycles
        };
        let &end = leader.last().unwrap();
        if (end..size).all(|count| {
            let mut new = leader.clone();
            new.push(count);
            set.contains(&new)
        })
        {
            continue;
        }
        let mut subset = BitVec::from_elem(size, false);
        for num in start..start + (1 << half_size) {
            subset.set(cycle_len(num, size_mask, end), true);
        }
        for (unique_cycle_len, _) in subset.into_iter().enumerate().filter(|x| x.1) {
            let mut new_l = leader.clone();
            new_l.push(unique_cycle_len);
            set.insert(new_l);
        }
    }
    set
}

fn main() {
    let size: f32 = std::env::args().nth(1).unwrap().parse().unwrap();
    let size_log = size.log2() as usize;
    //PROFILER.lock().unwrap().start("./my-prof.profile").unwrap();
    let cycles = all_cycles(size_log);
    //PROFILER.lock().unwrap().stop().unwrap();
    println!(
        "Number of distinct arrays of periods of bitstrings of length {} is {}",
        1 << size_log,
        cycles.len()
    );
}

Put bit-vec = "0.4.4" in your Cargo.toml

If you'd like to run this, clone this: github.com/isaacg1/cycle then Cargo build --release to build, then Cargo run --release 32 to run.


Rust, 16

use std::collections::HashSet;
use std::io;

fn main() {
	print!("Enter a pow of two:");
	let mut input_text = String::new();
    io::stdin()
        .read_line(&mut input_text)
        .expect("failed to read from stdin");

    let n_as_string = input_text.trim();
	match n_as_string.parse::<usize>() {
		Ok(n) => {
			let log2 = (n as f64).log(2_f64) as usize;
			if n != 1 << log2 {
				panic!("{} is not a power of two", n);
			}
			let count = compute_count_array(log2, n);
			println!("n = {} -> count = {}", n, count);
		}
		Err(_) => { panic!("{} is not a number", n_as_string); }
	}
}

fn compute_count_array(log2:usize, n: usize) -> usize {
	let mut z = HashSet::new();

	let mut s:Vec<bool> = vec!(false; n);
	loop {
		let mut y:Vec<usize> = vec!();
		for j in 0..log2+1 {
			let p = find_period(&s[0..1<<j]);
			y.push(p);
		}		
		z.insert(y);
		if !next(&mut s) {
			break;
		}
	}
	z.len()
}

#[inline]
fn find_period(s: &[bool]) -> usize {
	let n=s.len();
	let mut j=1;
	while j<n {
		if s[0..n-j] == s[j..n] {
			return j;
		}
		j+=1;
    }
	n
}	

#[inline]
fn next(s:&mut Vec<bool>) -> bool {
	if s[0] {
		s[0] = false;
		for i in 1..s.len() {
			if s[i] {
				s[i] = false;
			} else {
				s[i] = true;
				return true;
			}
		}
		return false
	} else {
		s[0] = true;
	}
	true
}

Try it online!

Compilation: rustc -O <name>.rs

The string is implemented as a Bool vector.

  • The next function iterate through the combinations ;

  • The find_period takes a Bool slice and returns the period ;

  • The compute_count_array does the job for each "power of two" subsequence of each combinations of Bools.

Theoretically, no overflow is expected until 2^n exceeds the u64 maximum value, ie n > 64. This limit could be outreached with an expensive test on s=[true, true, ..., true].

Bad news is: it returns 317 for n=16, but I don't know why. I don't know either if it will make it in ten minutes for n=32, since the Vec<bool> is not optimized for this kind of computation.

EDIT

  1. I've managed to remove the limit of 64 for n. Now, it won't crash until n is greater than the max usize integer.

  2. I found why the previous code returned 317 for n=32. I was counting sets of periods and not arrays of periods. There were three arrays with the same elements:

    [1, 2, 3, 3, 8] -> {1, 2, 3, 8}
    [1, 2, 3, 8, 8] -> {1, 2, 3, 8}
    [1, 1, 3, 3, 7] -> {1, 3, 7}
    [1, 1, 3, 7, 7] -> {1, 3, 7}
    [1, 1, 3, 3, 8] -> {1, 3, 8}
    [1, 1, 3, 8, 8] -> {1, 3, 8}
    

Now it works. It is still slow but it works.