Merge remote-tracking branch 'flapflap/de-network_configuration'
[tails-test.git] / config / chroot_local-includes / usr / local / bin / lc.py
blob2e83ec9d62dfd765fd6e01152c43c3e892fd0bde
1 #!/usr/bin/python
3 # The MIT License
4 #
5 # Copyright (c) 2011 Christopher Pound
6 #
7 # Permission is hereby granted, free of charge, to any person obtaining a copy
8 # of this software and associated documentation files (the "Software"), to deal
9 # in the Software without restriction, including without limitation the rights
10 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 # copies of the Software, and to permit persons to whom the Software is
12 # furnished to do so, subject to the following conditions:
14 # The above copyright notice and this permission notice shall be included in
15 # all copies or substantial portions of the Software.
17 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23 # THE SOFTWARE.
25 # lc.py -- language confluxer (http://www.ruf.rice.edu/~pound/lc.py)
27 # - Written by Christopher Pound (pound@rice.edu), July 1993.
28 # - Loren Miller suggested I make sure lc starts by picking a
29 # letter pair that was at the beginning of a data word, Oct 95.
30 # - Cleaned it up a little bit, March 95; more, September 01
31 # - Python version, Jul 09
33 # The datafile should be a bunch of words from some language
34 # with minimal punctuation or garbage (# starts a comment).
36 from __future__ import with_statement
37 from optparse import OptionParser
38 import random
39 import re
40 import sys
42 class Pseudolanguage:
44 def __init__(self, **dict):
45 """Set up a new pseudolanguage"""
46 dict.setdefault('name', '')
47 self.name = dict['name']
48 self.parsed = False
49 self.data = {}
50 self.inits = {}
51 self.pairs = {}
53 def incorporate(self, files):
54 """Load list of files for this pseudolanguage into self.data"""
55 self.parsed = False
56 for f in files:
57 words = []
58 with open(f) as text:
59 for line in text:
60 line = line.strip()
61 line = re.sub(r"#.*", "", line)
62 words.extend(re.split(r"\s+", line))
63 self.data[f] = words
65 def delete(self, files):
66 """Delete a list of languages from self.data"""
67 self.parsed = False
68 for f in files:
69 del self.data[f]
71 def parse(self):
72 """Parse pseudolanguage's data into self.inits and self.pairs"""
73 if not self.parsed:
74 self.inits.clear()
75 self.pairs.clear()
76 for f in self.data:
77 for word in self.data[f]:
78 word += ' '
79 if len(word) > 3:
80 if self.inits.has_key(word[0:2]):
81 self.inits[word[0:2]].append(word[2:3])
82 else:
83 self.inits[word[0:2]] = [word[2:3]]
84 pos = 0
85 while pos < len(word)-2:
86 if self.pairs.has_key(word[pos:pos+2]):
87 self.pairs[word[pos:pos+2]].append(word[pos+2])
88 else:
89 self.pairs[word[pos:pos+2]] = [word[pos+2]]
90 pos = pos + 1
91 self.parsed = True
93 def dump(self):
94 """Print the current parsed data; use pickle for inflatable dumps"""
95 self.parse()
96 print 'name = """', self.name, '"""'
97 print "dump = { 'inits': ", self.inits, ","
98 print "'pairs': ", self.pairs, " }"
100 def generate(self, number, min, max):
101 """Generate list of words of min and max lengths"""
102 self.parse()
103 wordlist = []
104 while len(wordlist) < number:
105 word = random.choice(self.inits.keys())
106 while word.find(' ') == -1:
107 word += random.choice(self.pairs[word[-2:]])
108 word = word.strip()
109 if len(word) >= min and len(word) <= max:
110 wordlist.append(word)
111 return wordlist
113 if __name__ == '__main__':
115 usage = "usage: %prog [options] datafile1 [datafile2 ...]"
116 parser = OptionParser(usage=usage, version="%prog 1.0")
117 parser.add_option("-d", "--dump", action="store_true",
118 dest="dump", default=False,
119 help="Dump internal representation of the pseudolanguage")
120 parser.add_option("-g", "--generate", type="int", dest="num",
121 help="Generate specified number of words")
122 parser.add_option("--min", type="int", dest="min", default=3,
123 help="Set the minimum length of each word")
124 parser.add_option("--max", type="int", dest="max", default=9,
125 help="Set the maximum length of each word")
126 parser.add_option("--name", dest="name", default=' ',
127 help="Set the name of the pseudolanguage")
128 (options, args) = parser.parse_args()
130 aLanguage = Pseudolanguage(name=options.name)
131 aLanguage.incorporate(args)
132 if options.dump:
133 aLanguage.dump()
134 else:
135 results = aLanguage.generate(options.num, options.min, options.max)
136 for word in results:
137 print word