| 1 | n/a | #!/usr/bin/env python3 |
|---|
| 2 | n/a | |
|---|
| 3 | n/a | """ |
|---|
| 4 | n/a | Markov chain simulation of words or characters. |
|---|
| 5 | n/a | """ |
|---|
| 6 | n/a | |
|---|
| 7 | n/a | class Markov: |
|---|
| 8 | n/a | def __init__(self, histsize, choice): |
|---|
| 9 | n/a | self.histsize = histsize |
|---|
| 10 | n/a | self.choice = choice |
|---|
| 11 | n/a | self.trans = {} |
|---|
| 12 | n/a | |
|---|
| 13 | n/a | def add(self, state, next): |
|---|
| 14 | n/a | self.trans.setdefault(state, []).append(next) |
|---|
| 15 | n/a | |
|---|
| 16 | n/a | def put(self, seq): |
|---|
| 17 | n/a | n = self.histsize |
|---|
| 18 | n/a | add = self.add |
|---|
| 19 | n/a | add(None, seq[:0]) |
|---|
| 20 | n/a | for i in range(len(seq)): |
|---|
| 21 | n/a | add(seq[max(0, i-n):i], seq[i:i+1]) |
|---|
| 22 | n/a | add(seq[len(seq)-n:], None) |
|---|
| 23 | n/a | |
|---|
| 24 | n/a | def get(self): |
|---|
| 25 | n/a | choice = self.choice |
|---|
| 26 | n/a | trans = self.trans |
|---|
| 27 | n/a | n = self.histsize |
|---|
| 28 | n/a | seq = choice(trans[None]) |
|---|
| 29 | n/a | while True: |
|---|
| 30 | n/a | subseq = seq[max(0, len(seq)-n):] |
|---|
| 31 | n/a | options = trans[subseq] |
|---|
| 32 | n/a | next = choice(options) |
|---|
| 33 | n/a | if not next: |
|---|
| 34 | n/a | break |
|---|
| 35 | n/a | seq += next |
|---|
| 36 | n/a | return seq |
|---|
| 37 | n/a | |
|---|
| 38 | n/a | |
|---|
| 39 | n/a | def test(): |
|---|
| 40 | n/a | import sys, random, getopt |
|---|
| 41 | n/a | args = sys.argv[1:] |
|---|
| 42 | n/a | try: |
|---|
| 43 | n/a | opts, args = getopt.getopt(args, '0123456789cdwq') |
|---|
| 44 | n/a | except getopt.error: |
|---|
| 45 | n/a | print('Usage: %s [-#] [-cddqw] [file] ...' % sys.argv[0]) |
|---|
| 46 | n/a | print('Options:') |
|---|
| 47 | n/a | print('-#: 1-digit history size (default 2)') |
|---|
| 48 | n/a | print('-c: characters (default)') |
|---|
| 49 | n/a | print('-w: words') |
|---|
| 50 | n/a | print('-d: more debugging output') |
|---|
| 51 | n/a | print('-q: no debugging output') |
|---|
| 52 | n/a | print('Input files (default stdin) are split in paragraphs') |
|---|
| 53 | n/a | print('separated blank lines and each paragraph is split') |
|---|
| 54 | n/a | print('in words by whitespace, then reconcatenated with') |
|---|
| 55 | n/a | print('exactly one space separating words.') |
|---|
| 56 | n/a | print('Output consists of paragraphs separated by blank') |
|---|
| 57 | n/a | print('lines, where lines are no longer than 72 characters.') |
|---|
| 58 | n/a | sys.exit(2) |
|---|
| 59 | n/a | histsize = 2 |
|---|
| 60 | n/a | do_words = False |
|---|
| 61 | n/a | debug = 1 |
|---|
| 62 | n/a | for o, a in opts: |
|---|
| 63 | n/a | if '-0' <= o <= '-9': histsize = int(o[1:]) |
|---|
| 64 | n/a | if o == '-c': do_words = False |
|---|
| 65 | n/a | if o == '-d': debug += 1 |
|---|
| 66 | n/a | if o == '-q': debug = 0 |
|---|
| 67 | n/a | if o == '-w': do_words = True |
|---|
| 68 | n/a | if not args: |
|---|
| 69 | n/a | args = ['-'] |
|---|
| 70 | n/a | |
|---|
| 71 | n/a | m = Markov(histsize, random.choice) |
|---|
| 72 | n/a | try: |
|---|
| 73 | n/a | for filename in args: |
|---|
| 74 | n/a | if filename == '-': |
|---|
| 75 | n/a | f = sys.stdin |
|---|
| 76 | n/a | if f.isatty(): |
|---|
| 77 | n/a | print('Sorry, need stdin from file') |
|---|
| 78 | n/a | continue |
|---|
| 79 | n/a | else: |
|---|
| 80 | n/a | f = open(filename, 'r') |
|---|
| 81 | n/a | if debug: print('processing', filename, '...') |
|---|
| 82 | n/a | text = f.read() |
|---|
| 83 | n/a | f.close() |
|---|
| 84 | n/a | paralist = text.split('\n\n') |
|---|
| 85 | n/a | for para in paralist: |
|---|
| 86 | n/a | if debug > 1: print('feeding ...') |
|---|
| 87 | n/a | words = para.split() |
|---|
| 88 | n/a | if words: |
|---|
| 89 | n/a | if do_words: |
|---|
| 90 | n/a | data = tuple(words) |
|---|
| 91 | n/a | else: |
|---|
| 92 | n/a | data = ' '.join(words) |
|---|
| 93 | n/a | m.put(data) |
|---|
| 94 | n/a | except KeyboardInterrupt: |
|---|
| 95 | n/a | print('Interrupted -- continue with data read so far') |
|---|
| 96 | n/a | if not m.trans: |
|---|
| 97 | n/a | print('No valid input files') |
|---|
| 98 | n/a | return |
|---|
| 99 | n/a | if debug: print('done.') |
|---|
| 100 | n/a | |
|---|
| 101 | n/a | if debug > 1: |
|---|
| 102 | n/a | for key in m.trans.keys(): |
|---|
| 103 | n/a | if key is None or len(key) < histsize: |
|---|
| 104 | n/a | print(repr(key), m.trans[key]) |
|---|
| 105 | n/a | if histsize == 0: print(repr(''), m.trans['']) |
|---|
| 106 | n/a | print() |
|---|
| 107 | n/a | while True: |
|---|
| 108 | n/a | data = m.get() |
|---|
| 109 | n/a | if do_words: |
|---|
| 110 | n/a | words = data |
|---|
| 111 | n/a | else: |
|---|
| 112 | n/a | words = data.split() |
|---|
| 113 | n/a | n = 0 |
|---|
| 114 | n/a | limit = 72 |
|---|
| 115 | n/a | for w in words: |
|---|
| 116 | n/a | if n + len(w) > limit: |
|---|
| 117 | n/a | print() |
|---|
| 118 | n/a | n = 0 |
|---|
| 119 | n/a | print(w, end=' ') |
|---|
| 120 | n/a | n += len(w) + 1 |
|---|
| 121 | n/a | print() |
|---|
| 122 | n/a | print() |
|---|
| 123 | n/a | |
|---|
| 124 | n/a | if __name__ == "__main__": |
|---|
| 125 | n/a | test() |
|---|