#!/bin/python ################################################################################ # # Concordance - 2.21.09 Joe Mariglio # #=============================================================================== # # builds a dictionary mapping unique words to their containing lines # if duplicates are found, add the new context to the old unique word # #=============================================================================== # # usage: concordance.py # ################################################################################ import re, sys, string, os.path, getopt rx0 = re.compile(r'\W+') rx1 = re.compile(r'\n') dic = {} def parse(inpath): infile = open(inpath, "r") lines = infile.read() return lines def splt(line): tokens = rx0.split(line) return tokens def addLine(line): tokens = splt(line) for token in tokens: if token in dic.keys(): addContext(token, line) else: dic[token] = [line] def addContext(token, context): dic[token].append(context) def main(argv): if len(sys.argv) == 3: inpath = sys.argv[1] word = sys.argv[2] lines = parse(inpath) contexts = rx1.split(lines) for line in contexts: addLine(line) output = dic[word] print output else: print "usage: concordance.py " print "to find context (enclosing line) of " if __name__ == '__main__': import re, sys, string, os.path, getopt main(sys.argv[1:])