5 const Start : token = 0
8 type token_prediction = struct
9 /* Total sample size */
12 /* ("t", relative probability of "t") */
13 possibilities : (token, int)[:]
19 tok_from_str : std.htab(byte[:], token)#
20 str_from_tok : std.htab(token, byte[:])#
23 /* Possible followups for a token sequence */
24 options : std.htab(token[:], token_prediction#)#
26 /* When generating, how many past tokens to keep */
30 Technical detail: when tokens are passed in, we
31 slice them up in many, many different ways. For
32 memory purposes, we want to use many slices of
33 a single slice. Therefore, we (mostly) copy the
34 input slice. But at clean-up time, we have to
35 have something to delete. These are those. They
36 are here to free at the end.
38 full_storage : token[:][:]
44 rng : std.option(std.rng#)
46 impl iterable mciter -> byte[:]
47 impl disposable mciter
50 const mk : (lookback_num : int -> chain#)
52 /* Then call seed() once for each complete string of tokens you have */
53 const seed : (c : chain#, s : byte[:][:] -> void)
55 /* Call generate() to get an iterator */
56 const generate : (c : chain#, rng_seed : std.option(uint32) -> mciter)
58 /* complete() is like generate(), but from a given starting point */
59 const complete : (c : chain#, start : byte[:][:], rng_seed : std.option(uint32) -> mciter)
61 /* Finally, call cleanup() to free memory */
62 const cleanup : (c : chain# -> void)
65 impl iterable mciter -> byte[:] =
66 __iternext__ = {mci, valp
68 First, using the tokens stored in mci.past, look
71 var wanted_token = End
72 match std.htget(mci.c.options, mci.past)
74 /* This should be impossible. */
79 | `std.None: k = std.randnum() % tp.total_seen
80 | `std.Some rng: k = std.rngrandnum(rng) % tp.total_seen
82 k = (k + tp.total_seen) % tp.total_seen
84 /* Figure out which bucket we picked */
86 for (p_t, p_n) : tp.possibilities
95 if wanted_token == Start || wanted_token == End
99 match std.htget(mci.c.str_from_tok, wanted_token)
100 | `std.None: -> false
101 | `std.Some s: valp# = s
105 /* Second, cycle mci.past */
106 if mci.past.len >= mci.c.lookback_num
107 for var j : std.size = 0; j + 1< mci.past.len; j++
108 mci.past[j] = mci.past[j + 1]
110 mci.past[mci.past.len - 1] = wanted_token
112 std.slpush(&mci.past, wanted_token)
118 __iterfin__ = {mci, valp
122 impl disposable mciter =
126 | `std.Some rng: std.freerng(rng)
132 const mk = {lookback_num : int
133 lookback_num = std.max(lookback_num, 1)
135 .options = std.mkht(),
136 .tok_from_str = std.mkht(),
137 .str_from_tok = std.mkht(),
138 .full_storage = std.slalloc(0),
139 .lookback_num = lookback_num,
145 const seed = {c : chain#, s : byte[:][:] -> void
146 /* Translate s to tokens (otherwise memory explodes) */
147 var toks : token[:] = std.slalloc(0)
148 std.slpush(&toks, Start)
150 var this_t : token = c.next_tok
151 var dup : byte[:] = std.sldup(ss)
152 match std.htget(c.tok_from_str, dup)
154 std.htput(c.tok_from_str, dup, this_t)
155 std.htput(c.str_from_tok, this_t, dup)
160 std.slpush(&toks, this_t)
162 std.slpush(&toks, End)
163 std.slpush(&c.full_storage, toks)
165 /* Record having seen tokens */
168 First, the beginning sequence, which is special because
169 we might not have enough tokens yet
171 for var len = 2; len < std.min(c.lookback_num + 1, toks.len); ++len
172 see_sequence(c, toks[:len])
175 /* Now the intermediate sequences */
176 for var j = 0; j + c.lookback_num + 1 <= toks.len; ++j
177 see_sequence(c, toks[j:j + c.lookback_num + 1])
181 const generate = {c : chain#, rng_seed : std.option(uint32)
182 var rng : std.option(std.rng#)
184 | `std.None: rng = `std.None
185 | `std.Some i: rng = `std.Some std.mksrng(i)
187 -> [.c = c, .rng = rng, .past = std.sldup([Start][:])]
190 const complete = {c : chain#, start : byte[:][:], rng_seed : std.option(uint32)
191 var rng : std.option(std.rng#)
193 | `std.None: rng = `std.None
194 | `std.Some i: rng = `std.Some std.mksrng(i)
196 var past : token[:] = [][:]
197 if start.len >= c.lookback_num
198 for var j = start.len - c.lookback_num; j < start.len; ++j
200 match std.htget(c.tok_from_str, start[j])
201 | `std.Some tt: t = tt
202 | `std.None: /* well, this is going to fail */
207 std.slpush(&past, Start)
210 match std.htget(c.tok_from_str, s)
211 | `std.Some tt: t = tt
212 | `std.None: /* well, this is going to fail */
217 -> [.c = c, .rng = rng, .past = past]
220 const see_sequence = {c : chain#, t : token[:]
221 var k : token[:] = t[:t.len - 1]
222 var next_t : token = t[t.len - 1]
223 match std.htget(c.options, k)
226 for var j : std.size = 0; j < tp.possibilities.len; ++j
229 (tt, n) = tp.possibilities[j]
230 if std.eq(tt, next_t)
231 tp.possibilities[j] = (tt, n + 1)
235 std.slpush(&tp.possibilities, (next_t, 1))
237 var p : (token, int)[:] = std.slalloc(1)
239 var tp : token_prediction# = std.mk([.total_seen = 1, .possibilities = p])
240 std.htput(c.options, k, tp)
244 const cleanup = {c : chain#
247 for (k, v) : std.byhtkeyvals(c.options)
248 std.slfree(v.possibilities)
251 std.htfree(c.options)
253 /* The byte are owned by str_from_tok */
254 std.htfree(c.tok_from_str)
255 for (k, v) : std.byhtkeyvals(c.str_from_tok)
258 std.htfree(c.str_from_tok)
260 /* Delete the base memory that the options' keys are in */
261 for f : c.full_storage
264 std.slfree(c.full_storage)