#!/bin/python ################################################################################ # # MarkovChar 2.21.09 - Joe Mariglio # #=============================================================================== # # analyzes a text file for transitions and produces a dictionary # units are measured in chars, not words # variable depth of memory units (in chars) # uses the dictionary to generate a markov chain of a length (in units) # #=============================================================================== # # usage: markovChar.py # ################################################################################ import re, sys, string, os.path, getopt, random dic = {} def parse(inpath): infile = open(inpath, "r") lines = infile.read() return lines def analyze(lines, depth): line = lines.replace('\n', '') i = 0 for char in line: if char in dic.keys(): end = i + depth +1 if len(line) > end: addContext(char, line[i+1:end]) else: end = i + depth+1 if len(line) > end: dic[char] = [line[i+1:end]] i = i + 1 def addContext(token, context): dic[token].append(context) def generate(depth, length): start = random.choice(dic.keys()) output = "" for i in range(0,length): value = random.choice(dic[start]) start = value[depth-1] output = output + value return output def main(argv): if len(sys.argv) == 4: inpath = sys.argv[1] depth = int(sys.argv[2]) length = int(sys.argv[3]) lines = parse(inpath) analyze(lines, depth) output = generate(depth, length) #output = random.choice(dic["a"]) print output else: print "usage: markovChar.py " print "where is number of chars per unit" print "and is the output length in units." if __name__ == '__main__': import re, sys, string, os.path, getopt main(sys.argv[1:])