# markov.py Copyright Daniel Casner 2007 # # This work is licensed under a Creative Commons Attribution-Noncommercial- # Share Alike 3.0 United States License. See: # http://creativecommons.org/licenses/by-nc-sa/3.0/us/ # for details. My name and / or a link to www.danielcasner.org must be # maintained if you use this module or create a derivative work which copies # code substantially from this module. # # Questions regarding comertial use of the software should be directed to the # author at daniel@danielcasner.org # # This program is distributed in the hope that it will be useful, but WITHOUT # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS # FOR A PARTICULAR PURPOSE. __doc__ = """Generalized Markov chain classes This module is licensed under a Creative Commons Attribution-Noncommercial- Share Alike 3.0 United States License. See: http://creativecommons.org/licenses/by-nc-sa/3.0/us/ for details. This module contains a number of classes for providing generalized Markov chain functionality. Chainer is the base class, most simple and most general. The subclasses provide various application specific enhancements. I've tried to keep these implementations as general as possible so that this module can be used as a framework for any number of Markov chain based projects. Efficiency has also been a programming goal here (as much as it can be in an interpreted language) so these chainers will hopefully be relitively fast. """ __author__ = "Daniel Casner " __version__ = 0.3 from random import Random from sys import maxint from threading import Thread class Chainer: """A generalized class represetation of a Markov chain""" def __init__(self, dict={}, history=[]): """Sets up the chainer dict: Specifies the state transitions as a dictionary where each key is the present state (or a touple of present and past states in the case of higher order Markov chains) and the value is the next state. If no dict is specified you must train the chainer some how. history: Optionally initalize memory of the last several states we've been in. If dict is specified this should be at least the order of the Markov chain long. """ self.dict = dict self.memory = history def __initTraining__(self, history, order): """Internal method used to get training started""" if type(order) != int or order < 1: desc = 'Invalid order specified %i' % order raise ChainerException(desc, self), desc return False elif len(history) < order+1: desc = 'Not enough history to train with order=%i' % order raise ChainerException(desc, self), desc return False else: # If we haven't been given a memory yet use the end of the training # data for the purpose. if not self.memory: if order == 1: self.memory = history[-1] else: self.memory = history[-order:] return True def __updateMemory__(self, next): """Internal method for keeping track of past transitions. next should be the next single state. """ if type(self.memory) == list: self.memory = self.memory[1:] + [next] else: self.memory = next def __trans__(self, state): """Internal method for subclass use only""" # Process input state if state: self.memory = state # Generate successor next = None if type(self.memory) == list: key = tuple(self.memory) else: key = self.memory try: next = self.dict[key] except KeyError: next = None return next def train(self, history, order=1): """Trains the chainer based on a past set of transitions. If the chainer already has a transition dictionary then this training data will be used to augment / overwrite the existing dictionary. history is an iterable of states in the order of their occurances. order: Specifies the desired order of the Markov chain (how many past states are needed to determine the next state), default 1. """ if self.__initTraining__(history, order): if order == 1: # Most common, order==1 case memory = history[0] for state in history[1:]: self.dict[memory] = state memory = state # If we don't have a memory yet, use the last training state if not self.memory: self.memory = history[-1] return True else: # order > 1 # Initalize the memory with the first order states of history memory = history[:order] # And train over the all this history for state in history[order:]: self.dict[tuple(memory)] = state memory = memory[1:] + [state] # Slide the state forward # If we haven't been given a memory yet use the end of the training # data for the purpose. if len(self.memory) < order: self.memory = history[-order:] return True else: return False def transition(self, state=None): """Returns the next state based on a specified state (or tuple of past states for higher order chains). If called with no state then internal memory of the curent state will be used to generate the next state and so on. If no transition is known for the specified state None is returned. """ next = self.__trans__(state) # Update internal memory self.__updateMemory__(next) # And return the new state return next def clear(self): """Clears the transition dictionary. Provided just in case someone finds a use for it. """ self.dict.clear() self.memory = [] class BranchingChainer(Chainer): """Extends the basic Chainer class with the posibility that a state could have multiple possible successors. The values in the transition dictionary are then lists of possible successors. transition returns a list of possible successors for you to choose from. train is enhanced to deal with the possibility of multiple outcomes. """ def train(self, history, order=1): """Trains the chainer based on a past set of transitions. If the chainer already has a transition dictionary then this training data will be used to augment the existing dictionary. history is an iterable of states in the order of their occurances. order: Specifies the desired order of the Markov chain (how many past states are needed to determine the next state), default 1. """ if self.__initTraining__(history, order): if order == 1: # Most common, order==1 case memory = history[0] for state in history[1:]: if self.dict.has_key(memory): if not state in self.dict[memory]: self.dict[memory].append(state) else: self.dict[memory] = [state] memory = state # If we don't have a memory yet, use the last training state if not self.memory: self.memory = history[-1] return True else: # order > 1 # Initalize the memory with the first order states of history memory = history[:order] # And train over the all this history for state in history[order:]: key = tuple(memory) if self.dict.has_key(key): if not state in self.dict[key]: self.dict[key].append(state) else: self.dict[key] = [state] memory = memory[1:] + [state] # Slide the state forward return True else: return False def transition(self, state): """Returns a list of possible successor states. Note that unlike a basic Chainer. BranchingChainer.transition cannot be called without an argument since it doesn't know which one of the possible successors was selected. """ if state == None: raise ValueError(), 'BranchingCainer.transition requires a state' else: return Chainer.__trans__(self, state) class RandomChainer(BranchingChainer): """Like the BrachingChainer class except that the transition menthod returns one of the possible successor states selected at random. """ def __init__(self, dict={}, memory=[]): "See Chainer.__init__" BranchingChainer.__init__(self, dict, memory) self.random = Random() def transition(self, state=None): """Returns one possible successor state chosen at random. See the class description and Chain.transition for more details.""" options = self.__trans__(state) next = None if options != None: next = self.random.choice(options) self.__updateMemory__(next) return next class WeightedChainer(RandomChainer): """A branching Markov chain with weighted probabilities for each possible successor. For this chainer, the values in the transition dict are dictionaries them- selves with the keys being possible states and the values being positive integer weights associated with those values. """ def train(self, history, order=1): """Trains the chainer based on a past set of transitions. If the chainer already has a transition dictionary then this training data will be used to augment the existing dictionary. Weights are set by the relitive number of times a given outcome occures from a state. history is an iterable of states in the order of their occurances. order: Specifies the desired order of the Markov chain, default 1. order must be at least one. If there is already a transition dict and order does not match the order of that dict an exception will be thrown. """ if self.__initTraining__(history, order): if order == 1: # Most common, order==1 case memory = history[0] for state in history[1:]: if self.dict.has_key(memory): if self.dict[memory].has_key(state): self.dict[memory][state] += 1 else: self.dict[memory][state] = 1 else: self.dict[memory] = {state: 1} memory = state # If we don't have a memory yet, use the last training state if not self.memory: self.memory = history[-1] return True else: # order > 1 # Initalize the memory with the first order states of history memory = history[:order] # And train over the all this history for state in history[order:]: key = tuple(memory) if self.dict.has_key(key): if self.dict[key].has_key(state): self.dict[key][state] += 1 else: self.dict[key][state] = 1 else: self.dict[key] = {state: 1} memory = memory[1:] + [state] # Slide the state forward # If we don't have a memory yet, use the last training states if not self.memory: self.memory = history[-order:] return True else: return False def transition(self, state=None): """Returns a successor state like a basic Chainer with the probability of a given one of the possible successor states being chosen determined by the weights in the transition dictionary.""" # Process input state options = self.__trans__(state) if options: range = 0 table = [] # Create an increasing table of values with each possible successor # getting its weight's area. for key in options.keys(): range += options[key] table.append((key, range)) # Pick a random number in the total area of the table select = self.random.randint(1,range) # Find which state that goes to for next, val in table: if select <= val: self.__updateMemory__(next) return next else: return None def reinforce(self, state, successor, delta): """Reinforces the weight (positively or negitavely) of the transition from state to successor by delta. The adjustment may be either positive or negative but the weight will not exceed the range 0 to sys.maxint. If the result after applying delta is less than 1 the transition is removed.""" if not self.dict.has_key(state): if delta > 0: self.dict[state] = {successor: delta} elif not self.dict[state].has_key(successor): if delta > 0: self.dict[state][successor] = delta else: # Keep things in range if self.dict[state][successor] + delta < 1: self.dict[state].pop(successor) elif self.dict[state][successor] + delta > maxint: self.dict[state][successor] = maxint else: self.dict[state][successor] += delta class ThreadedWeightedChainer(WeightedChainer, Thread): """An extension to the WeightedChainer class which prepares the next transition in advance by assuming it follows the last one.""" def __init__(self, dict={}, memory=[]): self.next = None # Default value # Initalize parents WeightedChainer.__init__(self, dict, memory) Thread.__init__(self) # If we have something to work with, get started right away if memory: self.start() def run(self): "The exicutable part for the thread." self.next = WeightedChainer.transition(self) def transition(self, state=None): "Returns the prepared state if valid or the normal WeightedChainer result" if state == None and self.next != None: retval = self.next self.start() return retval else: return WeightedChainer.transition(self, state) class ChainerException(Exception): """Exception class for Markov chainer related events""" def __init__(self, description, chainer): self.desc = description self.chainer = chainer def __getitem__(self): return chainer def __str__(self): return 'Exception in chainer: ' + str(self.chainer) + '\n' + description