1 | n/a | #!/usr/bin/env python3 |
---|
2 | n/a | |
---|
3 | n/a | """ |
---|
4 | n/a | Markov chain simulation of words or characters. |
---|
5 | n/a | """ |
---|
6 | n/a | |
---|
7 | n/a | class Markov: |
---|
8 | n/a | def __init__(self, histsize, choice): |
---|
9 | n/a | self.histsize = histsize |
---|
10 | n/a | self.choice = choice |
---|
11 | n/a | self.trans = {} |
---|
12 | n/a | |
---|
13 | n/a | def add(self, state, next): |
---|
14 | n/a | self.trans.setdefault(state, []).append(next) |
---|
15 | n/a | |
---|
16 | n/a | def put(self, seq): |
---|
17 | n/a | n = self.histsize |
---|
18 | n/a | add = self.add |
---|
19 | n/a | add(None, seq[:0]) |
---|
20 | n/a | for i in range(len(seq)): |
---|
21 | n/a | add(seq[max(0, i-n):i], seq[i:i+1]) |
---|
22 | n/a | add(seq[len(seq)-n:], None) |
---|
23 | n/a | |
---|
24 | n/a | def get(self): |
---|
25 | n/a | choice = self.choice |
---|
26 | n/a | trans = self.trans |
---|
27 | n/a | n = self.histsize |
---|
28 | n/a | seq = choice(trans[None]) |
---|
29 | n/a | while True: |
---|
30 | n/a | subseq = seq[max(0, len(seq)-n):] |
---|
31 | n/a | options = trans[subseq] |
---|
32 | n/a | next = choice(options) |
---|
33 | n/a | if not next: |
---|
34 | n/a | break |
---|
35 | n/a | seq += next |
---|
36 | n/a | return seq |
---|
37 | n/a | |
---|
38 | n/a | |
---|
39 | n/a | def test(): |
---|
40 | n/a | import sys, random, getopt |
---|
41 | n/a | args = sys.argv[1:] |
---|
42 | n/a | try: |
---|
43 | n/a | opts, args = getopt.getopt(args, '0123456789cdwq') |
---|
44 | n/a | except getopt.error: |
---|
45 | n/a | print('Usage: %s [-#] [-cddqw] [file] ...' % sys.argv[0]) |
---|
46 | n/a | print('Options:') |
---|
47 | n/a | print('-#: 1-digit history size (default 2)') |
---|
48 | n/a | print('-c: characters (default)') |
---|
49 | n/a | print('-w: words') |
---|
50 | n/a | print('-d: more debugging output') |
---|
51 | n/a | print('-q: no debugging output') |
---|
52 | n/a | print('Input files (default stdin) are split in paragraphs') |
---|
53 | n/a | print('separated blank lines and each paragraph is split') |
---|
54 | n/a | print('in words by whitespace, then reconcatenated with') |
---|
55 | n/a | print('exactly one space separating words.') |
---|
56 | n/a | print('Output consists of paragraphs separated by blank') |
---|
57 | n/a | print('lines, where lines are no longer than 72 characters.') |
---|
58 | n/a | sys.exit(2) |
---|
59 | n/a | histsize = 2 |
---|
60 | n/a | do_words = False |
---|
61 | n/a | debug = 1 |
---|
62 | n/a | for o, a in opts: |
---|
63 | n/a | if '-0' <= o <= '-9': histsize = int(o[1:]) |
---|
64 | n/a | if o == '-c': do_words = False |
---|
65 | n/a | if o == '-d': debug += 1 |
---|
66 | n/a | if o == '-q': debug = 0 |
---|
67 | n/a | if o == '-w': do_words = True |
---|
68 | n/a | if not args: |
---|
69 | n/a | args = ['-'] |
---|
70 | n/a | |
---|
71 | n/a | m = Markov(histsize, random.choice) |
---|
72 | n/a | try: |
---|
73 | n/a | for filename in args: |
---|
74 | n/a | if filename == '-': |
---|
75 | n/a | f = sys.stdin |
---|
76 | n/a | if f.isatty(): |
---|
77 | n/a | print('Sorry, need stdin from file') |
---|
78 | n/a | continue |
---|
79 | n/a | else: |
---|
80 | n/a | f = open(filename, 'r') |
---|
81 | n/a | if debug: print('processing', filename, '...') |
---|
82 | n/a | text = f.read() |
---|
83 | n/a | f.close() |
---|
84 | n/a | paralist = text.split('\n\n') |
---|
85 | n/a | for para in paralist: |
---|
86 | n/a | if debug > 1: print('feeding ...') |
---|
87 | n/a | words = para.split() |
---|
88 | n/a | if words: |
---|
89 | n/a | if do_words: |
---|
90 | n/a | data = tuple(words) |
---|
91 | n/a | else: |
---|
92 | n/a | data = ' '.join(words) |
---|
93 | n/a | m.put(data) |
---|
94 | n/a | except KeyboardInterrupt: |
---|
95 | n/a | print('Interrupted -- continue with data read so far') |
---|
96 | n/a | if not m.trans: |
---|
97 | n/a | print('No valid input files') |
---|
98 | n/a | return |
---|
99 | n/a | if debug: print('done.') |
---|
100 | n/a | |
---|
101 | n/a | if debug > 1: |
---|
102 | n/a | for key in m.trans.keys(): |
---|
103 | n/a | if key is None or len(key) < histsize: |
---|
104 | n/a | print(repr(key), m.trans[key]) |
---|
105 | n/a | if histsize == 0: print(repr(''), m.trans['']) |
---|
106 | n/a | print() |
---|
107 | n/a | while True: |
---|
108 | n/a | data = m.get() |
---|
109 | n/a | if do_words: |
---|
110 | n/a | words = data |
---|
111 | n/a | else: |
---|
112 | n/a | words = data.split() |
---|
113 | n/a | n = 0 |
---|
114 | n/a | limit = 72 |
---|
115 | n/a | for w in words: |
---|
116 | n/a | if n + len(w) > limit: |
---|
117 | n/a | print() |
---|
118 | n/a | n = 0 |
---|
119 | n/a | print(w, end=' ') |
---|
120 | n/a | n += len(w) + 1 |
---|
121 | n/a | print() |
---|
122 | n/a | print() |
---|
123 | n/a | |
---|
124 | n/a | if __name__ == "__main__": |
---|
125 | n/a | test() |
---|