cosmetix in locals support words
[urforth.git] / level1 / 96_prng.f
blobcc9e154c3c26172fa5ee5f34297e13a7b0dd952e
1 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
2 ;; UrForth level 1: self-hosting 32-bit Forth compiler
3 ;; Copyright (C) 2020 Ketmar Dark // Invisible Vector
4 ;; GPLv3 ONLY
5 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
7 vocabulary PRNG
8 voc-set-active PRNG
11 absolutely non-scientific stupid benchmark:
13 bjprng : 115,356,877 values/second
14 bjprng-3rots : 115,109,543 values/second
15 xorshift*-64/32: 114,961,650 values/second
16 pcg32 : 107,718,882 values/second
17 pcg32ex : 95,801,375 values/second
19 as you can see, 2-rot bjprng is the fastest one, and it is quite good.
20 pcg32 (with fixed stream) is quite fast, and it is good too.
21 so i left only two of those in the kernel, and moved others to lib.
22 you can use
23 include !libs/ext/prngs.f
24 to include rest of the generators.
28 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
29 ;; generate 64-bit seed (not cryptographically strong!)
30 ;; also, don't call this repeatedly, it will produce bad seeds
31 : GEN-DSEED ( dlo dhi -- )
32 os:clock-monotonic os:clock-gettime u32hash swap u32hash xor os:get-pid xor u32hash ;; low seed
33 os:clock-monotonic os:clock-gettime u32hash swap u32hash xor ;; high seed
36 ;; useful to store 16-byte seeds
37 code: 4! ( a b c d addr -- )
38 lea edi,[TOS+12]
39 std
40 pop eax
41 stosd
42 pop eax
43 stosd
44 pop eax
45 stosd
46 pop eax
47 stosd
48 cld
49 pop TOS
50 urnext
51 endcode
53 code: 4@ ( addr -- a b c d )
54 xchg esi,TOS
55 lodsd
56 push eax
57 lodsd
58 push eax
59 lodsd
60 push eax
61 lodsd
62 push eax
63 xchg esi,TOS
64 pop TOS
65 urnext
66 endcode
69 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
70 ;; generate 32-bit random number, with first stream
71 ;; it uses smaller 64 bit state, and slightly faster
73 ;; code for:
74 ;; oldstate = state;
75 ;; state = oldstate*6364136223846793005UL+((42<<1)|1);
76 ;; u32 xorshifted = ((oldstate>>18)^oldstate)>>27;
77 ;; u32 rot = oldstate>>59;
78 ;; res = (xorshifted>>rot)|(xorshifted<<((-rot)&31));
79 code: PCG32-NEXT ( statelo statehi -- newstatelo newstatehi u32prv )
80 push TOS
81 push EIP ;; it will be used in the code
82 sub esp,8 ;; and two temp vars
83 ;; [esp+0]: tmpvar0
84 ;; [esp+4]: tmpvar1
85 ;; [esp+8]: EIP
86 ;; [esp+12]: statehi
87 ;; [esp+16]: statelo
88 ld edx,[esp+12] ;; statehi
89 ld eax,[esp+16] ;; statelo
90 ld [esp+0],edx ;; tmpvar0
91 imul edx,edx,0x4c957f2d
92 imul ecx,eax,0x5851f42d
93 add ecx,edx
94 ld edx,0x4c957f2d
95 ld [esp+4],eax ;; tmpvar1
96 mul edx
97 add edx,ecx
98 add eax,85 ;; inclo
99 adc edx,0 ;; inchi
100 ld [esp+12],edx ;; statehi
101 ld edx,[esp+0] ;; tmpvar0
102 ld [esp+16],eax ;; statelo
103 ld eax,[esp+4] ;; tmpvar1
104 shrd eax,edx,0x12
105 shr edx,0x12
106 xor eax,[esp+4] ;; tmpvar1
107 xor edx,[esp+0] ;; tmpvar0
108 shrd eax,edx,0x1b
109 shr edx,0x1b
110 ld esi,eax
111 ld edx,[esp+0] ;; tmpvar0
112 ld eax,[esp+4] ;; tmpvar1
113 shr edx,0x1b
114 ld eax,edx
115 ld edi,eax
116 ld eax,esi
117 ld ecx,edi
118 xor edx,edx
119 shr eax,cl
120 ld ecx,edi
121 neg ecx
122 ld edx,esi
123 and ecx,0x1f
124 shl edx,cl
125 or eax,edx
126 ld TOS,eax ;; u32prv
127 add esp,8
128 pop EIP
129 urnext
130 endcode
132 : PCG32-SEED-U64 ( dlo dhi -- statelo statehi )
133 0 0 pcg32-next drop d+ pcg32-next drop
137 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
138 ;; Bob Jenkins small PRNG -- http://burtleburtle.net/bob/rand/smallprng.html
140 (* original:
141 e = a-ROT(b, 27);
142 a = b^ROT(c, 17);
143 b = c+d;
144 c = e+d;
145 d = e+a; -- u32prv
147 code: BJ-NEXT ( a b c d -- a b c d u32prv )
148 push TOS
149 ;; [esp+0]: d
150 ;; [esp+4]: c
151 ;; [esp+8]: b
152 ;; [esp+12]: a
153 ld eax,[esp+8]
154 ld edx,eax ;; save b, to use it later
155 ld edi,[esp+12]
156 rol eax,27
157 sub edi,eax
158 ;; EDX: b
159 ;; ECX: d
160 ;; EDI: e
161 ld eax,[esp+4]
162 ld ebx,eax ;; save c, to use it later
163 rol eax,17
164 xor edx,eax
165 ld [esp+12],edx
166 ;; EBX: c
167 ;; ECX: d
168 ;; EDI: e
169 add ebx,ecx
170 ld [esp+8],ebx
171 ;; ECX: d
172 ;; EDI: e
173 add ecx,edi
174 ld [esp+4],ecx
175 ;; EDI: e
176 add edi,[esp+12]
177 ld TOS,edi
178 ld [esp+0],edi
179 urnext
180 endcode
182 : BJ-SEED-U32 ( u -- a b c d )
183 ;; dup u32hash bj-seed-u64
184 ;; this is how BJ does it for 32-bit seeds
185 0xf1ea5eed swap dup dup ;; initial seed
186 20 for bj-next drop endfor ;; skip first 20 values, to perturb seed
189 ;; k8: this sux, don't use it!
190 : BJ-SEED-U64 ( du -- a b c d )
191 u32hash swap u32hash xor bj-seed-u32
195 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
196 ;; produce biased result [0..range)
197 ;; this is faster than division, tho
198 ;; it multiplies range and prv, and takes the high 32 bits of 64-bit result
199 ;; this (seemingly) performs worser than modulo on PRNGs with non-32-bit range
200 ;; if you need biased ranged result, and you are unsure, use "UMOD"
201 code: SMALL-BIASED-RANGE ( u32prv urange -- u1 )
202 pop eax
203 mul TOS
204 ld TOS,edx
205 urnext
206 endcode
209 voc-set-active FORTH