Express a number - a modern "Des Chiffres et des Lettres"

C++17, score .0086

This program has non-deterministic penalty score due to thread races, so I'm declaring based on an average of three runs, each of which handled the test suite in under a minute:

score 0.000071 for 14(11*13) = 14143
score 0.000019 for (696699+66)*69 = 48076785
score 0.000069 for 333333+333333333+33333 = 333699999
score 0.000975 for 5(1((((555555255-1-1-4-5-5-5-5-4-4-4-4-4-4-4-4-4-4-4-4-4-5-3-3-3-3-3-3-3-3-3-3-3-3-3-5)/2*3/2-2)/2*3+2+1+1+1+1-1-1)/2*2/2/2/2)/2) = 589062470
score 0.000462 for (((199181197*41-193-191-179-173-167-163-157-151-149-139-137-131-127-113-109-107-103-101-97-89-83-79-73-71-67-61-59-53-47-43-17-3)/5*7+23)/2/11*13+19)/31*37 = 8063447296
score 0.000118 for (992930870*72+812+756+702+650+600+552+506+462+420+380+342-42-56-182-12-210-156-90-20-272-30-6-306)/240*132*2 = 78640130184
score 0.000512 for (((317811*832040*3-39088169-24157817-14930352-9227465-5702887-2178309-1346269-3524578-514229-196418-121393-17711-233-75025-46368-89-28657-4181-10946-6765-34-987-2584-13-610-8-1)/2-377-144)/5-1597)1 = 793193194211
score 0.005725 for 2(20((120000000*20000+50000000+10000000+5000000+2000000+100000+50000+10000+5000+2000-500-1000)/50)/5)+200+100+10 = 2409600268972
total score = .007951

real    0m57.876s
user    4m24.396s
sys     0m0.684s

score 0.000071 for 14(11*13) = 14143
score 0.000019 for (696699+66)*69 = 48076785
score 0.000069 for 333333+333333333+33333 = 333699999
score 0.001675 for (3((((((((555555455+5+5+5+5-1-1-4-4-4-4-4-4-4-4-4-1-4-4-4-4-5-3-3-3-3-3-4)/2*3/2-1)*2+5)/3*3+3)/2-3-3)/2*3/2*2+2)/2*2/2*3+2+1)/5/2)-1-1-1-1-1-1-1-1-1-2)/2*3 = 590624943
score 0.000973 for ((199181197*41-193-191-179-173-167-163-157-151-149-139-137-131-127-113-107-101-59-97-79-3-71-67-83-2-47-37-73-89-103-19-11-29)/5*7+109-23)/61*43 = 8059325224
score 0.000118 for ((992930870*72+812+756+702+650+600+552+506+462+420+380+342+306+272+240+210+182-0-56-110-20-90)/2-42-156)/30*132/12*6 = 78640132296
score 0.000512 for (((317811*832040*3-39088169-24157817-14930352-9227465-5702887-3524578-514229-196418-2178309-1346269-121393-75025-28657-10946-233-46368-89-17711-2584-6765-610-4181-34-987-55-1)/2-8-144-377)/5-1597)1 = 793193194161
score 0.004734 for 2(20((120000000*20000+50000000+10000000+5000000+2000000+100000+50000+10000+5000+2000-100-1000-500)/200*50/10)/5) = 2412000335827
total score = .008171

real    0m45.636s
user    3m30.272s
sys     0m0.720s

score 0.000071 for 14(11*13) = 14143
score 0.000019 for (696699+66)*69 = 48076785
score 0.000069 for 333333+333333333+33333 = 333699999
score 0.002963 for 1(((((((555555555+5+5+5+5+5+5+4+4+4+4-1-2-4-4-4-4-4-4-4-4-4-4-4-3-3-3-3)/2*3+3+2)/2*2+3+3)/2*2/2/2*3+3)/2-3-3)*3/2-1-3)/2*3/2/2)/2 = 587890622
score 0.000069 for ((((199181197*41-193-191-179-173-167-163-157-151-149-139-137-131-127-113-109-107-103-101-97-89-83-79-73-71-67-61-59-53-47-43-37-11)/7)2+3)/23*17-13-5)/31*29 = 8066615553
score 0.000118 for ((992930870*72+812+756+702+650+600+552+506+462+420+380-0-6-90-56-42-272-182-110-210-342-30-306)*2+12)/240*132 = 78640129524
score 0.000512 for (((317811*832040*3-39088169-24157817-14930352-9227465-5702887-2178309-1346269-3524578-514229-196418-121393-75025-46368-28657-144-55-17711-2584-10946-4181-6765-21-610-987-377-8-1)/2-89-13)/5-233-1597)1 = 793193192491
score 0.005725 for 2(20((120000000*20000+50000000+10000000+5000000+2000000+100000+50000+10000+5000+2000-500-1000)/50)/5)+200+100+10 = 2409600268972
total score = .009546

real    0m57.289s
user    4m19.488s
sys     0m0.708s

Here's the program; explanation is provided in comments. You can define CONCAT_NONE for traditional Countdown rules that don't permit concatenation, or CONCAT_DIGITS to allow concatenation of the input values, but not of any intermediate results. By default, without either defined, the most liberal rules are used.

#include <omp.h>

#include <algorithm>
#include <cmath>
#include <memory>
#include <set>
#include <string>
#include <utility>
#include <vector>

// We apply some principles to help us arrive at a good enough solution
// in a reasonable time:

// 1. Ruthlessly prune duplicate expressions from the candidate
//    list.  If we've seen a+b, then there's no need to consider
//    b+a.  Similarly, having seen (a+b)+c, then (a+c)+b can be
//    discounted.
// 2. Detect duplicates by storing batches of part-processed results
//    in sets before sending to the next pass.
// 3. Sort our candidates so that those containing a term near to the
//    target are first in line for further processing.
// 4. Gradually widen our acceptance margin as we proceed.  This
//    allows us to terminate quickly without exhaustively searching
//    the full problem space.
// 5. Parallelize the generation of candidate solutions using OpenMP.

// Define precedence values for our operators, so that we can print
// with the minimum sufficient parentheses.  The values are grouped
// into tens so that add/10 == subtract/10 and mult/10 == divide/10 -
// the operators use that for avoiding duplicate expressions.
static const int PREC_ADD = 26;
static const int PREC_SUBTRACT = 24;
static const int PREC_MULT = 16;
static const int PREC_DIVIDE = 14;
static const int PREC_CONCAT = 2;
static const int PREC_LITERAL = 0;

static const int PREC_MAX = 1000;

class LiteralTerm;

struct Term
{
    long value;
    int precedence;

    Term(long value, int precedence)
        : value(value), precedence(precedence)
    {}
    Term(const Term&) = default;
    virtual ~Term() = default;

    virtual std::string to_string(int p = PREC_MAX) const = 0;
    virtual LiteralTerm as_literal() const = 0;

    long distance(long target) const { return std::abs(value - target); }

    // We sort large values first, in the hope that this will approach
    // the target faster.
    bool operator<(const Term& o) const { return value > o.value; }
};


// We have two kinds of Term: a LiteralTerm is a leaf node of the
// expression tree, and a BinaryTerm is an internal node.
struct Operator;

class LiteralTerm : public Term
{
    std::string s;
public:
    LiteralTerm(std::string s) : Term(std::stol(s), 0), s(s) {}
    LiteralTerm(std::string s, long value) : Term(value, 0), s(s) {}
    std::string to_string(int = PREC_MAX) const override { return s; }
    LiteralTerm as_literal() const override { return *this; }
};

struct BinaryTerm : public Term
{
    Operator const *op;

    std::shared_ptr<const Term> a;
    std::shared_ptr<const Term> b;

    BinaryTerm(long value, const Operator* op, std::shared_ptr<const Term> a, std::shared_ptr<const Term> b);
    BinaryTerm(const BinaryTerm&) = default;
    BinaryTerm& operator=(const BinaryTerm&) = default;

    std::string to_string(int p = PREC_MAX) const;

    LiteralTerm as_literal() const override { return { to_string(), value }; }
};

struct TermList {
    std::vector<std::shared_ptr<const Term>> terms;
    std::vector<long> values;
    long target_value;
    long badness;

    TermList(std::vector<std::shared_ptr<const Term>> terms, long target_value)
        : terms(std::move(terms)),
          values(),
          target_value(target_value),
          badness(min_badness(this->terms, target_value))
    {
        values.reserve(terms.size());
        std::transform(terms.begin(), terms.end(),
                       std::back_inserter(values), [](auto t) { return t->value; });
        // Literals that begin with "0" need to be distinct from (but
        // adjacent to) equivalent non-literals.  Append a negative
        // value for each term with leading zeros.  There's an edge
        // case involving multiple leading zeros, but we'll ignore
        // that.
        for (const auto& v: terms)
            if (v->precedence <= PREC_CONCAT && v->value > 0 && v->to_string()[0] == '0')
                values.push_back(-v->value);
    }

    // Sort according to the term that's nearest to the target.
    bool operator<(const TermList& o) const
    {
        return std::make_tuple(std::cref(badness),   std::cref(values))
            <  std::make_tuple(std::cref(o.badness), std::cref(o.values));
    }

private:
    static long min_badness(const std::vector<std::shared_ptr<const Term>>& t, long target_value)
    {
        auto less_bad = [target_value](const auto& a, const auto&b)
            { return a->distance(target_value) < b->distance(target_value); };
        auto const& e = *std::min_element(t.begin(), t.end(), less_bad);
        return std::abs(e->value - target_value);
    }
};

using Set = std::set<TermList>;

// Detect duplicate expressions.  This will discount "3+2-3", "8*5*2/3/5"
// and similar expressions that contain simple pairs of inverse operands.
static bool contains_value(const Term& t, int precedence, long value)
{
    auto *const b = dynamic_cast<const BinaryTerm*>(&t);
    if (t.precedence == precedence)
        return t.value == value
            || b && b->b->value < value
            || b && contains_value(*b->a, precedence, value)
            || b && contains_value(*b->b, precedence, value);
    if (t.precedence/10 == precedence/10)
        // Advance through the subtractions to inspect the additions
        // (or through the divides to inspect the multiplications).
        return b && contains_value(*b->a, precedence, value);
    return false;
}

// An Operator is a factory producing binary terms of a given type,
// and for printing those terms.  Here's the abstract base class.
struct Operator
{
    using TermPointer = std::shared_ptr<const Term>;
    using BinaryTermPointer = std::shared_ptr<const BinaryTerm>;

    int const precedence;
    std::string const joiner;

    virtual std::string to_string(const Term &a, const Term &b) const {
        return a.to_string(precedence) + joiner + b.to_string(precedence);
    }

    virtual BinaryTermPointer make_term(TermPointer a, TermPointer b) const {
        long r = evaluate(*a, *b);
        return r ? std::make_shared<BinaryTerm>(r, this, a, b) : BinaryTermPointer();
    }

    virtual ~Operator() = default;

protected:
    Operator(int precedence, std::string joiner) : precedence(precedence), joiner(joiner) {}

    virtual long evaluate(const Term& a, const Term& b) const = 0;
};

// Now we define a subclass for each permitted operator
struct AddOperator : Operator
{
    AddOperator() : Operator(PREC_ADD, "+") {}

    long evaluate(const Term& a, const Term& b) const override
    {
        const auto *d = dynamic_cast<const BinaryTerm*>(&a);
        long r;
        return b.precedence/10 != PREC_ADD/10
            && a.precedence != PREC_SUBTRACT
            && b.value > 0
            && ! (d && d->precedence == this->precedence && d->b->value < b.value)
            && !__builtin_add_overflow(a.value, b.value, &r)
            ? r : 0;
    }
};
struct SubtractOperator : Operator
{
    SubtractOperator() : Operator(PREC_SUBTRACT, "-") {}

    long evaluate(const Term& a, const Term& b) const override
    {
        return b.precedence < PREC_SUBTRACT
            && a.value > b.value
            && !contains_value(a, PREC_ADD, b.value)
            ? a.value - b.value : 0;
    }
};
struct MultiplyOperator : Operator
{
    MultiplyOperator() : Operator(PREC_MULT, "*") {}

    long evaluate(const Term& a, const Term& b) const override
    {
        const auto *d = dynamic_cast<const BinaryTerm*>(&a);
        long r;
        return b.precedence/10 != PREC_MULT/10
            && b.value > 1
            && (b.value > 2 || a.value > 2)
            && ! (d && d->precedence == this->precedence && d->b->value < b.value)
            && !__builtin_mul_overflow(a.value, b.value, &r)
            ? r : 0;
    }
};
struct DivideOperator : Operator
{
    DivideOperator() : Operator(PREC_DIVIDE, "/") {}

    long evaluate(const Term& a, const Term& b) const override
    {
        return b.precedence/10 != PREC_DIVIDE/10 && b.value > 1
            && a.value % b.value == 0
            && !contains_value(a, PREC_MULT, b.value)
            ? a.value / b.value : 0;
    }
};

struct ConcatOperator : Operator
{
    ConcatOperator() : Operator(PREC_CONCAT, "") {}

    long evaluate(const Term& a, const Term& b) const override
    {
#ifdef CONCAT_DIGITS
        if (a.precedence > PREC_CONCAT || a.value == 0 || b.precedence >= PREC_CONCAT)
            return 0;
#else  // CONCAT_FULL
        if (b.precedence == PREC_CONCAT || a.value == 0)
            return 0;
#endif
        long bv = b.value, av = a.value, x = 1, r;
        if (b.precedence > PREC_CONCAT) while (x <= bv) x*= 10;
        else { int d = b.to_string().length(); while (d--) x*= 10; }
        return __builtin_mul_overflow(av, x, &r) || __builtin_add_overflow(r, bv, &r) ? 0 : r;
    }
};
struct ReverseConcatOperator : ConcatOperator
{
    BinaryTermPointer make_term(TermPointer a, TermPointer b) const override
    {
        return ConcatOperator::make_term(b, a);
    }
};

static const std::vector<std::shared_ptr<const Operator>> ops{
#ifndef CONCAT_NONE
        std::make_shared<ConcatOperator>(),
        std::make_shared<ReverseConcatOperator>(),
#endif
        std::make_shared<MultiplyOperator>(),
        std::make_shared<AddOperator>(),
        std::make_shared<SubtractOperator>(),
        std::make_shared<DivideOperator>(),
};


// Implement the BinaryTerm members that use Operator
BinaryTerm::BinaryTerm(long value, const Operator* op, std::shared_ptr<const Term> a, std::shared_ptr<const Term> b)
    : Term(value, op->precedence), op(op), a(std::move(a)), b(std::move(b))
{}

std::string BinaryTerm::to_string(int p) const
{
    auto const s = op->to_string(*a, *b);
    return (p/10) < (precedence/10) ? "("+s+")" : s;
}


// An object to represent our target value, and how close we have
// reached so far.
struct Target
{
    const long value;
    double max_badness = 0;

    LiteralTerm best = {"0"};
    long best_badness = value;

    bool done() const { return best_badness < max_badness; }
    double score() const { return 1.*best_badness/value; }

    void update(const Term& t)
    {
        auto badness = std::abs(t.value - value);
        if (badness < best_badness) {
            best = t.as_literal();
            best_badness = badness;
        }
    }

    void update(const TermList& terms)
    {
        for (auto t: terms.terms)
            update(*t);
    }

    void increase_threshold(size_t items_seen)
    {
        // Adjust our acceptance threshold nearer to accepting 0 by
        // 0.01% for every million values seen.
        max_badness += (value - max_badness) * .0001 * std::exp(items_seen / 1000000);
    }
};

// OpenMP reduction for sets
auto merge(auto& a, auto& b)
{
    auto it = a.begin();
    for (auto&& e: b)
        it = a.insert(std::move(e)).first;
    return a;
}
#pragma omp declare reduction(merge: Set: merge<Set>(omp_out, omp_in) ) \
    initializer(omp_priv = Set())


// We run a cascade of pair-wise combination steps, where for each
// input TermList, we generate every possible allowed pairing of its
// terms, and pass that through (in batches) to the next stage.
struct Combiner
{
    std::unique_ptr<Combiner> const next;
    Target& target;
    size_t const max_output_size;
    size_t const nterms;

    Set input = {};
    size_t output_size = 0;

    Combiner(Target& target, size_t nterms, size_t max_output_size)
        : next(nterms > 0 ? std::make_unique<Combiner>(target, nterms-1, max_output_size) : nullptr),
          target(target),
          max_output_size(max_output_size),
          nterms(nterms)
    {}

    inline void insert(const TermList&& t)
    {
        target.update(t);
        if (target.done()) return;
        if (next) {
            if (input.insert(t).second)
                output_size += count_distinct_pairs(t);
            if (output_size >= max_output_size)
                process_input();
        }
    }

    void finish()
    {
        process_input();
        if (next)
            next->finish();
    }

private:
    // Here's where we do the real work - generating and sifting the
    // combined terms for the next pass.
    void process_input()
    {
        if (target.done()) {
            return;
        }

        if (!next)
            return;

        // Move the elements into a vector, so we can parallelize the
        // for-loop.
        auto in = std::vector<Set::value_type>();
        in.reserve(input.size());
        std::move(input.begin(), input.end(), std::back_inserter(in));
        input.clear();
        output_size = 0;

        auto out = Set();

#pragma omp parallel reduction(merge:out)
        {
#pragma omp for
            for (auto it = in.begin();  it < in.end();  ++it)
            {
                try {
                    const auto end = it->terms.cend();
                    for (auto i = it->terms.cbegin();  i != end;  i = std::upper_bound(i, end, *i))
                        for (auto j = i+1;  j != end;  j = std::upper_bound(j, end, *j)) {
                            for (const auto& op: ops) {
                                auto x = op->make_term(*i, *j);
                                if (x) out.insert(replace(*it, i, j, x));
                            }
                        }
                } catch (const std::bad_alloc&) {
                    // Ignore it; process what we've generated so far.
                }
            }
        }

        // Now we're in single-threaded code, we can pass the combined
        // results to the next combiner.
        for (auto& o: out)
            next->insert(std::move(o));

        target.increase_threshold(out.size());
    }


    // Helper methods used by the above

    // An upper bound on the possible number of output TermLists,
    // assuming every combination is valid.  If all n terms in the
    // input list are distinct, that's just ½n(n-1), but if values
    // are duplicated, we need to reduce n to the number of distinct
    // values, and then add in the cases where we pick two of the
    // same value.
    static int count_distinct_pairs(const TermList& terms)
    {
        int distinct = 0, duplicated = 0;
        auto it = terms.terms.begin(),
            end = terms.terms.end();
        while (it != end) {
            ++distinct;
            auto const& v = (*it)->value;
            if (++it == end || (*it)->value != v) continue;
            ++duplicated;
            while (++it != end && (*it)->value == v)
                ;
        }
        return distinct * (distinct - 1) / 2 + duplicated;
    }

    // Create a new TermList from o by replacing elements i and j with
    // newly-created term n.
    static TermList replace(const TermList& o, auto i, auto j, std::shared_ptr<const Term> n)
    {
        std::vector<std::shared_ptr<const Term>> r;
        r.reserve(o.terms.size() - 1);
        auto added = false;
        for (auto k = o.terms.begin();  k != o.terms.end();  ++k) {
            if (!added && (*k)->value < n->value) { r.push_back(n); added = true; }
            if (k != i && k != j) r.push_back(*k);
        }
        if (!added) r.push_back(n);
        return { r, o.target_value };
    }
};


#include <iostream>
std::ostream& operator<<(std::ostream& o, const Term& t)
{
    return o << t.to_string()<< " = " << t.value;
}
std::ostream& operator<<(std::ostream& o, const TermList& t)
{
    auto *sep = "";
    o << "[" << t.badness << "] ";
    for (auto const& x: t.terms)
        o << sep << *x, sep = ", ";
    return o;
}

int main(int argc, char **argv)
{
    if (argc < 3) {
        std::cerr << "Usage: " << argv[0] << " target term ...";
        return EXIT_FAILURE;
    }
    auto target = Target{std::stol(*++argv)};

    std::vector<std::shared_ptr<const Term>> terms;
    while (*++argv) {
        auto t = std::make_shared<LiteralTerm>(*argv);
        target.update(*t);
        terms.push_back(t);
    }
    std::sort(terms.begin(), terms.end());

    // Construct the sieve
    Combiner search{target, terms.size(), 2500000/terms.size() + 1}; // tunable - max set size
    search.insert({terms, target.value});
    search.finish();

    std::cout << "score " << std::fixed << target.score() << " for " << target.best << std::endl;
}

I compiled this using GCC 6.2, using g++ -std=c++17 -fopenmp -march=native -O3 (along with some debugging and warning options).


Python 2.7. Score: 1,67039106

So, I decided to have a throw at it myself. Nothing too fancy. This program sorts the numbers in reverse order (big to small), and tries all operators sequentially on the numbers. Blazing fast, but a performance that deserves to be superseded.

Here is the program:

import sys

def score(current,target):
    return abs(1.0-current/float(target))

# Process input and init variables
targetvalue=int(sys.argv[1].strip(','))
numbers=[int(a.strip(',')) for a in sys.argv[2:]]
numbers.sort(reverse=True)
expression='('+str(numbers[0])+')'
currentvalue=nextvalue=testvalue=numbers[0]

# Loop over all values (except the first one)
for value in numbers[1:]:
    # Set multiplication as the reference operator...
    testvalue=currentvalue*value
    minscore=score(testvalue,targetvalue)
    operator="*"
    nextvalue=testvalue
    
    # then try division (only if result is integer and not divided by zero)...
    if value!=0 and currentvalue%value==0:
        testvalue=currentvalue/value
        if score(testvalue,targetvalue)<minscore:
            operator="/"
            minscore=score(testvalue,targetvalue)
            nextvalue=testvalue

    # and addition...
    testvalue=currentvalue+value
    if score(testvalue,targetvalue)<minscore:
        operator="+"
        minscore=score(testvalue,targetvalue)
        nextvalue=testvalue

    # and subtraction...
    testvalue=currentvalue-value
    if score(testvalue,targetvalue)<minscore:
        operator="-"
        minscore=score(testvalue,targetvalue)
        nextvalue=testvalue

    # and concatenation
    testvalue=int(str(currentvalue)+str(value))
    if score(testvalue,targetvalue)<minscore:
        operator=""
        minscore=score(testvalue,targetvalue)
        nextvalue=testvalue

    # finally check if any operator improces the score. If so, append to the expression
    if score(nextvalue,targetvalue)<score(currentvalue,targetvalue):
        expression='('+expression+operator+str(value)+')'
        currentvalue=nextvalue

print(expression)

The output for all the test cases is:

((((((15)14)*13)-12)-11)-10)
((((((((((((999)996)+969)+966)+699)+696)+669)+666)*69)-66)-9)-6)
(((((((((333333333)+333333)+33333)+3333)+333)+33)+3)+3)+3)
(((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((((5)5)5)5)5)5)5)5)5)+5)+5)+5)+5)+5)+5)+4)+4)+4)+4)+4)+4)+4)+4)+4)+4)+4)+4)+4)+4)+4)+3)+3)+3)+3)+3)+3)+3)+3)+3)+3)+3)+3)+3)+3)+3)+2)+2)+2)+2)+2)+2)+2)+2)+2)+2)+2)+2)+2)+2)+2)+1)+1)+1)+1)+1)+1)+1)+1)+1)+1)+1)+1)+1)+1)+1)
((((((((((((((((((((((((((((((((((((((((((((((199)197)193)+191)+181)+179)+173)+167)+163)+157)+151)+149)+139)+137)+131)+127)+113)+109)+107)+103)+101)+97)+89)+83)*79)-73)-71)-67)-61)-59)-53)-47)-43)-41)-37)-31)-29)-23)-19)-17)-13)-11)-7)-5)-3)/2)
(((((((((((((((((((((((((((((((992)930)870)+812)+756)+702)+650)+600)+552)+506)+462)+420)+380)+342)+306)+272)+240)+210)+182)*156)-132)-110)-90)-72)-56)-42)-30)/20)*12)-6)-2)
((((((((((((((((((((((((((((((((((((((39088169)+24157817)+14930352)+9227465)+5702887)+3524578)+2178309)+1346269)+832040)+514229)+317811)+196418)+121393)+75025)+46368)+28657)+17711)*10946)-6765)-4181)-2584)-1597)-987)-610)-377)-233)-144)-89)-55)-34)-21)-13)-8)-5)-3)/2)+1)+1)
(((((((((((((((((((((50000000)+20000000)+10000000)+5000000)+2000000)+100000)*50000)-20000)-10000)-5000)-2000)-1000)-500)-200)-100)-50)-20)-10)/5)*2)+1)