Finding approximate correlations

Rust, n ≈ 30 or 31 or 32

On my laptop (two cores, i5-6200U), this gets through n = 1, …, 31 in 53 seconds, using about 2.5 GiB of memory, or through n = 1, …, 32 in 105 seconds, using about 5 GiB of memory. Compile with cargo build --release and run target/release/correlations.

src/main.rs

extern crate rayon;

type S = u32;
const S_BITS: u32 = 32;

fn cat(mut a: Vec<S>, mut b: Vec<S>) -> Vec<S> {
    if a.capacity() >= b.capacity() {
        a.append(&mut b);
        a
    } else {
        b.append(&mut a);
        b
    }
}

fn search(n: u32, i: u32, ss: Vec<S>) -> u32 {
    if ss.is_empty() {
        0
    } else if 2 * i + 1 > n {
        search_end(n, i, ss)
    } else if 2 * i + 1 == n {
        search2(n, i, ss.into_iter().flat_map(|s| vec![s, s | 1 << i]))
    } else {
        search2(n,
                i,
                ss.into_iter()
                    .flat_map(|s| {
                                  vec![s,
                                       s | 1 << i,
                                       s | 1 << n - i - 1,
                                       s | 1 << i | 1 << n - i - 1]
                              }))
    }
}

fn search2<SS: Iterator<Item = S>>(n: u32, i: u32, ss: SS) -> u32 {
    let (shift, mask) = (n - i - 1, !(!(0 as S) << i + 1));
    let close = |s: S| {
        let x = (s ^ s >> shift) & mask;
        x & x.wrapping_sub(1) == 0
    };
    let (ssy, ssn) = ss.partition(|&s| close(s));
    let (cy, cn) = rayon::join(|| search(n, i + 1, ssy), || search(n, i + 1, ssn));
    cy + cn
}

fn search_end(n: u32, i: u32, ss: Vec<S>) -> u32 {
    if i >= n - 1 { 1 } else { search_end2(n, i, ss) }
}

fn search_end2(n: u32, i: u32, mut ss: Vec<S>) -> u32 {
    let (shift, mask) = (n - i - 1, !(!(0 as S) << i + 1));
    let close = |s: S| {
        let x = (s ^ s >> shift) & mask;
        x & x.wrapping_sub(1) == 0
    };
    match ss.iter().position(|&s| close(s)) {
        Some(0) => {
            match ss.iter().position(|&s| !close(s)) {
                Some(p) => {
                    let (ssy, ssn) = ss.drain(p..).partition(|&s| close(s));
                    let (cy, cn) = rayon::join(|| search_end(n, i + 1, cat(ss, ssy)),
                                               || search_end(n, i + 1, ssn));
                    cy + cn
                }
                None => search_end(n, i + 1, ss),
            }
        }
        Some(p) => {
            let (ssy, ssn) = ss.drain(p..).partition(|&s| close(s));
            let (cy, cn) = rayon::join(|| search_end(n, i + 1, ssy),
                                       || search_end(n, i + 1, cat(ss, ssn)));
            cy + cn
        }
        None => search_end(n, i + 1, ss),
    }
}

fn main() {
    for n in 1..S_BITS + 1 {
        println!("{}: {}", n, search(n, 1, vec![0, 1]));
    }
}

Cargo.toml

[package]
name = "correlations"
version = "0.1.0"
authors = ["Anders Kaseorg <[email protected]>"]

[dependencies]
rayon = "0.7.0"

Try it online!

I also have a slightly slower variant using very much less memory.


Rust + CryptoMiniSat, n ≈ 41

src/main.rs

extern crate cryptominisat;
extern crate itertools;

use std::iter::once;
use cryptominisat::{Lbool, Lit, Solver};
use itertools::Itertools;

fn make_solver(n: usize) -> (Solver, Vec<Lit>) {
    let mut solver = Solver::new();
    let s: Vec<Lit> = (1..n).map(|_| solver.new_var()).collect();
    let d: Vec<Vec<Lit>> = (1..n - 1)
        .map(|k| {
                 (0..n - k)
                     .map(|i| (if i == 0 { s[k - 1] } else { solver.new_var() }))
                     .collect()
             })
        .collect();
    let a: Vec<Lit> = (1..n - 1).map(|_| solver.new_var()).collect();
    for k in 1..n - 1 {
        for i in 1..n - k {
            solver.add_xor_literal_clause(&[s[i - 1], s[k + i - 1], d[k - 1][i]], true);
        }
        for t in (0..n - k).combinations(2) {
            solver.add_clause(&t.iter()
                                   .map(|&i| d[k - 1][i])
                                   .chain(once(!a[k - 1]))
                                   .collect::<Vec<_>>()
                                   [..]);
        }
        for t in (0..n - k).combinations(n - k - 1) {
            solver.add_clause(&t.iter()
                                   .map(|&i| !d[k - 1][i])
                                   .chain(once(a[k - 1]))
                                   .collect::<Vec<_>>()
                                   [..]);
        }
    }
    (solver, a)
}

fn search(n: usize,
          solver: &mut Solver,
          a: &Vec<Lit>,
          assumptions: &mut Vec<Lit>,
          k: usize)
          -> usize {
    match solver.solve_with_assumptions(assumptions) {
        Lbool::True => search_sat(n, solver, a, assumptions, k),
        Lbool::False => 0,
        Lbool::Undef => panic!(),
    }
}

fn search_sat(n: usize,
              solver: &mut Solver,
              a: &Vec<Lit>,
              assumptions: &mut Vec<Lit>,
              k: usize)
              -> usize {
    if k >= n - 1 {
        1
    } else {
        let s = solver.is_true(a[k - 1]);
        assumptions.push(if s { a[k - 1] } else { !a[k - 1] });
        let c = search_sat(n, solver, a, assumptions, k + 1);
        assumptions.pop();
        assumptions.push(if s { !a[k - 1] } else { a[k - 1] });
        let c1 = search(n, solver, a, assumptions, k + 1);
        assumptions.pop();
        c + c1
    }
}

fn f(n: usize) -> usize {
    let (mut solver, proj) = make_solver(n);
    search(n, &mut solver, &proj, &mut vec![], 1)
}

fn main() {
    for n in 1.. {
        println!("{}: {}", n, f(n));
    }
}

Cargo.toml

[package]
name = "correlations-cms"
version = "0.1.0"
authors = ["Anders Kaseorg <[email protected]>"]

[dependencies]
cryptominisat = "5.0.1"
itertools = "0.6.0"

How it works

This does a recursive search through the tree of all partial assignments to prefixes of the Y/N array, using a SAT solver to check at each step whether the current partial assignment is consistent and prune the search if not. CryptoMiniSat is the right SAT solver for this job due to its special optimizations for XOR clauses.

The three families of constraints are

SiSk + iDki, for 1 ≤ kn − 2, 0 ≤ i ≤ nk;
Dki1Dki2 ∨ ¬Ak, for 1 ≤ kn − 2, 0 ≤ i1 < i2nk;
¬Dki1 ∨ ⋯ ∨ ¬Dkink − 1Ak, for 1 ≤ kn − 2, 0 ≤ i1 < ⋯ < ink − 1nk;

except that, as an optimization, S0 is forced to false, so that Dk0 is simply equal to Sk.


C++ using the BuDDy library

A different approach: have a binary formula (as binary decision diagram) that takes the bits of S as input and is true iff that gives some fixed values of Y or N at certain selected positions. If that formula is not constant false, select a free position and recurse, trying both Y and N. If there is no free position, we have found a possible output value. If the formula is constant false, backtrack.

This works relatively reasonable because there are so few possible values so that we can often backtrack early. I tried a similar idea with a SAT solver, but that was less successful.

#include<vector>
#include<iostream>
#include<bdd.h>

// does vars[0..i-1] differ from vars[n-i..n-1] in at least two positions?
bdd cond(int i, int n, const std::vector<bdd>& vars){
  bdd x1 { bddfalse };
  bdd xs { bddfalse };
  for(int k=0; k<i; ++k){
    bdd d { vars[k] ^ vars[n-i+k] };
    xs |= d & x1;
    x1 |= d;
  }
  return xs;
}

void expand(int i, int n, int &c, const std::vector<bdd>& conds, bdd x){
  if (x==bddfalse)
    return;
  if (i==n-2){
    ++c;
    return;
  }

  expand(i+1,n,c,conds, x & conds[2*i]);
  x &= conds[2*i+1];
  expand(i+1,n,c,conds, x);
}

int count(int n){
  if (n==1)   // handle trivial case
    return 1;
  bdd_setvarnum(n-1);
  std::vector<bdd> vars {};
  vars.push_back(bddtrue); // assume first bit is 1
  for(int i=0; i<n-1; ++i)
    if (i%2==0)            // vars in mixed order
      vars.push_back(bdd_ithvar(i/2));
    else
      vars.push_back(bdd_ithvar(n-2-i/2));
  std::vector<bdd> conds {};
  for(int i=n-1; i>1; --i){ // handle long blocks first
    bdd cnd { cond(i,n,vars) };
    conds.push_back( cnd );
    conds.push_back( !cnd );
  }
  int c=0;
  expand(0,n,c,conds,bddtrue);
  return c;
}

int main(void){
  bdd_init(20000000,1000000);
  bdd_gbc_hook(nullptr); // comment out to see GC messages
  for(int n=1; ; ++n){
    std::cout << n << " " << count(n) << "\n" ;
  }
}

To compile with debian 8 (jessie), install libbdd-dev and do g++ -std=c++11 -O3 -o hb hb.cpp -lbdd. It might be useful to increase the first argument to bdd_init even more.