import random

class Tsetlin:
    def __init__(self, n):
        self.n = n
        self.state = random.choice([self.n, self.n+1])
        
    def reward(self):
        if self.state <= self.n and self.state > 1:
            self.state -= 1
        elif self.state > self.n and self.state < 2*self.n:
            self.state += 1

    def penalize(self):
        if self.state <= self.n:
           self.state += 1
        elif self.state > self.n:
           self.state -= 1

    def makeDecision(self):
        if self.state  <= self.n:
            return 1
        else:
            return 2

class Referee:
    def reward(self, yes_votes):
        if yes_votes == 0:
            if random.random() <= 0.3:
                return True
            else:
                return False
        elif yes_votes == 1:
            if random.random() <= 0.5:
                return True
            else:
                return False
        elif yes_votes == 2:
            if random.random() <= 0.7:
                return True
            else:
                return False
        elif yes_votes == 3:
            if random.random() <= 0.4:
                return True
            else:
                return False
            
# Main

ref = Referee()

las = [0]*3
votes = [0]*4

for i in range(3):
    las[i] = Tsetlin(10)
    
print "**** Start ****\n"

for i in range(1000):
    yes_votes = 0

    print "States: ", 
    for la in las:
        print la.state,
        if la.makeDecision() == 2:
            yes_votes += 1
    print "\n"

    print yes_votes, " :", 

    for la in las:
        if ref.reward(yes_votes):
            print "\tReward ",
            la.reward()
        else:
            la.penalize()
            print "\tPenalty",
    print "\n"
    votes[yes_votes] += 1

print votes
