KOTH - Loaded RPS

Statistician (no longer playing)

import random
import collections

R, P, S = moves = range(3)
move_idx = {"R": R, "P": P, "S": S}
name = "RPS"
beat = (P, S, R)
beaten = (S, R, P)

def react(_0, _1, _2, _3, _4, opp_history):
    if not opp_history:
        return random.randrange(0, 3)
    return beat[opp_history[-1]]

def anti_react(_0, _1, _2, _3, _4, opp_history):
    if not opp_history:
        return random.randrange(0, 3)
    return beaten[opp_history[-1]]

def random_max(scores):
    scores = [s + random.normalvariate(0, 1) for s in scores]
    return scores.index(max(scores))

def greedy_margin(my_points, opp_points, my_loaded, opp_loaded, my_history, opp_history):
    scores = [my_loaded[move] - opp_loaded[beat[move]] for move in moves]
    return random_max(scores)

def anti_greedy(my_points, opp_pints, my_loaded, opp_loaded, my_history, opp_history):
    scores = [-my_loaded[move] for move in moves]
    return random_max(scores)

def recent_stats(my_points, opp_points, my_loaded, opp_loaded, my_history, opp_history):
    opp_history = opp_history[-10:-1]
    counts = collections.Counter(opp_history)
    scores = [(counts[beaten[move]] + 1) * my_loaded[move] - 
              (counts[beat[move]] + 1) * opp_loaded[move] for move in moves]
    return random_max(scores)

def statistician(_0, _1, _2, _3, my_history, opp_history):
    m1 = []
    o1 = []
    my_loaded = [0] * 3
    opp_loaded = [0] * 3
    my_points = 0
    opp_points = 0
    strategies = [react, anti_react, greedy_margin, anti_greedy, recent_stats]
    strategy_scores = [0 for _ in strategies]
    for i, (mx, ox) in enumerate(zip(my_history, opp_history)):
        mx = move_idx[mx]
        ox = move_idx[ox]
        for j, strategy in enumerate(strategies):
            strategy_scores[j] *= 0.98
            move = strategy(my_points, opp_points, my_loaded, opp_loaded, m1, o1)
            if move == beat[ox]:
                strategy_scores[j] += my_loaded[move]
            elif move == beaten[ox]:
                strategy_scores[j] -= opp_loaded[ox]
        m1.append(mx)
        o1.append(ox)
        if mx == beat[ox]:
            opp_loaded[ox] += 1
            my_points += my_loaded[mx]
        elif mx == beaten[ox]:
            my_loaded[mx] += 1
            opp_points += opp_loaded[ox]
        else:
            my_loaded[mx] += 0.5
            opp_loaded[ox] += 0.5
    strategy = strategies[random_max(strategy_scores)]
    return name[strategy(my_points, opp_points, my_loaded, opp_loaded, m1, o1)]

Switches between a few simple strategies based on expected past performance

Statistician 2

import random
import collections
import numpy as np

R, P, S = moves = range(3)
move_idx = {"R": R, "P": P, "S": S}
names = "RPS"
beat = (P, S, R)
beaten = (S, R, P)

def react(my_loaded, opp_loaded, my_history, opp_history):
    if not opp_history:
        return random.randrange(0, 3)
    counts = [0, 0, 0]
    counts[beat[opp_history[-1]]] += 1
    return counts

def random_max(scores):
    scores = [s + random.normalvariate(0, 1) for s in scores]
    return scores.index(max(scores))

def argmax(scores):
    m = max(scores)
    return [s == m for s in scores]

def greedy_margin(my_loaded, opp_loaded, my_history, opp_history):
    scores = [my_loaded[move] - opp_loaded[beat[move]] for move in moves]
    return argmax(scores)

recent_counts = None

def best_move(counts, my_loaded, opp_loaded):
    scores = [(counts[beaten[move]] + 0.5) * my_loaded[move] - 
              (counts[beat[move]] + 0.5) * opp_loaded[move] for move in moves]
    return argmax(scores)

def recent_stats(my_loaded, opp_loaded, my_history, opp_history):
    if len(opp_history) >= 10:
        recent_counts[opp_history[-10]] -= 1
    recent_counts[opp_history[-1]] += 1
    return best_move(recent_counts, my_loaded, opp_loaded)

order2_counts = None

def order2(my_loaded, opp_loaded, my_history, opp_history):
    if len(my_history) >= 2:
        base0 = 9 * my_history[-2] + 3 * opp_history[-2]
        order2_counts[base0 + opp_history[-1]] += 1
    base1 = 9 * my_history[-1] + 3 * opp_history[-1]
    counts = [order2_counts[base1 + move] for move in moves]
    return best_move(counts, my_loaded, opp_loaded)

def nash(my_loaded, opp_loaded, my_history, opp_history):
    third = 1.0 / 3
    p = np.full(3, third)
    q = np.full(3, third)
    u = np.array(my_loaded)
    v = np.array(opp_loaded)
    m0 = np.zeros(3)
    m1 = np.zeros(3)
    lr = 0.2
    for _ in range(10):
        de0 = u * np.roll(q, 1) - np.roll(v * q, 2)
        de1 = v * np.roll(p, 1) - np.roll(u * p, 2)
        m0 = 0.9 * m0 + 0.1 * de0
        m1 = 0.9 * m1 + 0.1 * de1
        p += lr * m0
        q += lr * m1
        p[p < 0] = 0
        q[q < 0] = 0
        tp, tq = np.sum(p), np.sum(q)
        if tp == 0 or tq == 0:
            return np.full(3, third)
        p /= tp
        q /= tq
        lr *= 0.9
    return p
   
strategies = [react, greedy_margin, recent_stats, order2, nash]

predictions = strategy_scores = mh = oh = None

def statistician2func(my_points, opp_points, my_loaded, opp_loaded, my_history, opp_history):
    global strategy_scores, history, recent_counts, mh, oh, predictions, order2_counts
    if not opp_history:
        strategy_scores = [0 for _ in strategies]
        recent_counts = collections.Counter()
        order2_counts = collections.Counter()
        mh, oh = [], []
        predictions = None
        return random.choice(names)
    my_move = move_idx[my_history[-1]]
    opp_move = move_idx[opp_history[-1]]
    if predictions is not None:
        for j, p in enumerate(predictions):
            good = beat[opp_move]
            bad = beaten[opp_move]
            strategy_scores[j] += (my_loaded[good] * p[good] - opp_loaded[opp_move] * p[bad]) / sum(p)
    mh.append(my_move)
    oh.append(opp_move)
    predictions = [strategy(my_loaded, opp_loaded, mh, oh) for strategy in strategies]
    strategy = random_max(strategy_scores)
    p = predictions[strategy]
    r = random.random()
    for i, pi in enumerate(p):
        r -= pi
        if r <= 0:
            break
    return names[i]

Nash

import numpy as np
import random

def nashfunc(my_points, opp_points, my_loaded, opp_loaded, my_history, opp_history):
    third = 1.0 / 3
    p = np.full(3, third)
    q = np.full(3, third)
    u = np.array(my_loaded)
    v = np.array(opp_loaded)
    m0 = np.zeros(3)
    m1 = np.zeros(3)
    lr = 0.2
    for _ in range(10):
        de0 = u * np.roll(q, 1) - np.roll(v * q, 2)
        de1 = v * np.roll(p, 1) - np.roll(u * p, 2)
        m0 = 0.9 * m0 + 0.1 * de0
        m1 = 0.9 * m1 + 0.1 * de1
        p += lr * m0
        q += lr * m1
        p[p < 0] = 0
        q[q < 0] = 0
        tp, tq = np.sum(p), np.sum(q)
        if tp == 0 or tq == 0:
            return random.choice("RPS")
        p /= tp
        q /= tq
        lr *= 0.9
    r = random.random()
    for i, pi in enumerate(p):
        r -= pi
        if r <= 0:
            break
    return "RPS"[i]

Computes an approximate Nash equilibrium by gradient descent.


Weigher

I lost track of reasoning while experimenting with the code, but the basic idea is to estimate opponent's move probability by last 3 moves using some weights and multiply them by another weight which depends on loads. I thought that I can somehow use my_loaded too, but I couldn't decide how, so left it out.

def weigher(my_points, opp_points, my_loaded, opp_loaded, my_history, opp_history):
    idx = {"R": 0, "P": 1, "S": 2}
    sc = [0, 0, 0]
    for i, m in enumerate(reversed(opp_history[-3:])):
        sc[idx[m]] += (1 / (1 + i))

    for i in range(3):
        sc[i] *= (opp_loaded[i] ** 2)

    return "PSR"[sc.index(max(sc))]

Satan

Probably will be disqualified, because it's kind of cheating and it makes some assumptions about the testing function (it has to have opponent's function in a variable on its stack frame), but it doesn't technically break any current rules — it doesn't redefine or rewrite anything. It simply uses black magic to execute the opponent function to see what turn did/will they do. It cannot deal with randomness, but deterministic bots have no chance to defeat Satan.

def satan(my_points, opp_points, my_loaded, opp_loaded, my_history, opp_history):
    import inspect, types
    f = inspect.currentframe()
    s = f.f_code.co_name
    try:
        for v in f.f_back.f_locals.values():
            if isinstance(v, types.FunctionType) and v.__name__ != s:
                try:
                    return "PSR"[{"R": 0, "P": 1, "S": 2}[
                        v(opp_points, my_points, opp_loaded, my_loaded, opp_history, my_history)]]
                except:
                    continue
    finally:
        del f

Fitter

This Bot improves Pattern and fuses it with Economist (Pattern and Economist will no longer participate)

The improvement of Pattern is that the Bot now looks for two two kinds of patterns: Opponent reacting to his last play and opponent reacting to my last play. Then evaluates both predictions to use the one that fits the best.

From that pattern the Bot has now the probability for R, P and S. Taking that into account and the expected value of each play (as Economist did), the Bot plays the one that gives the most value.

import random
import numpy as np
def fitterfunc(my_points, opp_points, my_loaded, opp_loaded, my_history, opp_history):
        t = len(opp_history)
        RPS = ["R","P","S"]
        if t <= 2:
                return RPS[t]
        elif t == 3:
                return random.choice(RPS)

        def n(c): return RPS.index(c)

        total_me = np.zeros(shape=(3,3))
        total_opp= np.zeros(shape=(3,3))
        p_me = np.array([[1/3]*3]*3)
        p_opp = np.array([[1/3]*3]*3)

        for i in range(1, t):
                total_me[n(my_history[i-1]), n(opp_history[i])] += 1
                total_opp[n(opp_history[i-1]), n(opp_history[i])] += 1
        for i in range(3):
                if np.sum(total_me[i,:]) != 0:
                        p_me[i,:] = total_me[i,:] / np.sum(total_me[i,:])
                if np.sum(total_opp[i,:]) != 0:
                        p_opp[i,:] = total_opp[i,:] / np.sum(total_opp[i,:])

        error_me = 0
        error_opp = 0

        for i in range(1, t):
                diff = 1 - p_me[n(my_history[i-1]), n(opp_history[i])]
                error_me += diff * diff
                diff = 1 - p_opp[n(opp_history[i-1]), n(opp_history[i])]
                error_opp += diff * diff

        if error_me < error_opp:
                p = p_me[n(my_history[-1]),:]
        else:
                p = p_opp[n(opp_history[-1]),:]


# From here, right now I weight values, though not 100% is the best idea, I leave the alternative in case I'd feel like changing it
        value = [(p[2]*my_loaded[0] - p[1]*opp_loaded[1], "R"), (p[0]*my_loaded[1] - p[2]*opp_loaded[2], "P"), (p[1]*my_loaded[2] - p[0]*opp_loaded[0], "S")]
        value.sort()

        if value[-1][0] > value[-2][0]:
                return value[-1][1]
        elif value[-1][0] > value[-3][0]:
                return random.choice([value[-1][1], value[-2][1]])
        else:
                return random.choice(RPS)

#       idx = p.tolist().index(max(p))
#       return ["P", "S", "R"][idx]

Here are the two old codes

Pattern (no longer playing)

The Pattern tries to find patterns on his opponent. It looks what the opponent had played after the last play he did (giving more weight to the latter plays). Through that, it guesses what the opponent will play, and plays the countermatch to that.

import random
import numpy as np
def patternfunc(my_points, opp_points, my_loaded, opp_loaded, my_history, opp_history):
        if len(opp_history) == 0:
                return random.choice(["R","P","S"])
        elif len(opp_history) == 1:
                if opp_history == "R":
                        return "P"
                elif opp_history == "P":
                        return "S"
                elif opp_history == "S":
                        return "R"

        p = np.array([1/3]*3)
        c = opp_history[-1]
        for i in range(1, len(opp_history)):
                c0 = opp_history[i-1]
                c1 = opp_history[i]
                if c0 == c:
                        p *= .9
                        if c1 == "R":
                                p[0] += .1
                        elif c1 == "P":
                                p[1] += .1
                        elif c1 == "S":
                                p[2] += .1

        idx = p.tolist().index(max(p))
        return ["P", "S", "R"][idx]

Economist (no longer playing)

The Economist does the following: Guesses the probability of each play by the opponent by watching what he had played the last 9 turns. From that, computes the expected benefit of each play and goes with the one that has the best expected value.

import random
def economistfunc(my_points, opp_points, my_loaded, opp_loaded, my_history, opp_history):
        if len(opp_history) == 0:
                return random.choice(["R","P","S"])
        if len(opp_history) > 9:
                opp_history = opp_history[-10:-1]
        p = [opp_history.count("R"), opp_history.count("P"), opp_history.count("S")]

        value = [(p[2]*my_loaded[0] - p[1]*opp_loaded[1], "R"), (p[0]*my_loaded[1] - p[2]*opp_loaded[2], "P"), (p[1]*my_loaded[2] - p[0]*opp_loaded[0], "S")]
        value.sort()

        if value[-1][0] > value[-2][0]:
                return value[-1][1]
        elif value[-1][0] > value[-3][0]:
                return random.choice([value[-1][1], value[-2][1]])
        else:
                return random.choice(["R","P","S"])