Bug 1928997: Update tabs icon in Unified Search popup r=desktop-theme-reviewers,daleh...
[gecko.git] / other-licenses / 7zstub / src / Asm / x86 / LzmaDecOpt.asm
blob0a89eb735f17fb54e066fa3316fa4a676ed81879
1 ; LzmaDecOpt.asm -- ASM version of LzmaDec_DecodeReal_3() function
2 ; 2018-02-06: Igor Pavlov : Public domain
4 ; 3 - is the code compatibility version of LzmaDec_DecodeReal_*()
5 ; function for check at link time.
6 ; That code is tightly coupled with LzmaDec_TryDummy()
7 ; and with another functions in LzmaDec.c file.
8 ; CLzmaDec structure, (probs) array layout, input and output of
9 ; LzmaDec_DecodeReal_*() must be equal in both versions (C / ASM).
11 ifndef x64
12 ; x64=1
13 ; .err <x64_IS_REQUIRED>
14 endif
16 include 7zAsm.asm
18 MY_ASM_START
20 _TEXT$LZMADECOPT SEGMENT ALIGN(64) 'CODE'
22 MY_ALIGN macro num:req
23 align num
24 endm
26 MY_ALIGN_16 macro
27 MY_ALIGN 16
28 endm
30 MY_ALIGN_32 macro
31 MY_ALIGN 32
32 endm
34 MY_ALIGN_64 macro
35 MY_ALIGN 64
36 endm
39 ; _LZMA_SIZE_OPT equ 1
41 ; _LZMA_PROB32 equ 1
43 ifdef _LZMA_PROB32
44 PSHIFT equ 2
45 PLOAD macro dest, mem
46 mov dest, dword ptr [mem]
47 endm
48 PSTORE macro src, mem
49 mov dword ptr [mem], src
50 endm
51 else
52 PSHIFT equ 1
53 PLOAD macro dest, mem
54 movzx dest, word ptr [mem]
55 endm
56 PSTORE macro src, mem
57 mov word ptr [mem], @CatStr(src, _W)
58 endm
59 endif
61 PMULT equ (1 SHL PSHIFT)
62 PMULT_HALF equ (1 SHL (PSHIFT - 1))
63 PMULT_2 equ (1 SHL (PSHIFT + 1))
66 ; x0 range
67 ; x1 pbPos / (prob) TREE
68 ; x2 probBranch / prm (MATCHED) / pbPos / cnt
69 ; x3 sym
70 ;====== r4 === RSP
71 ; x5 cod
72 ; x6 t1 NORM_CALC / probs_state / dist
73 ; x7 t0 NORM_CALC / prob2 IF_BIT_1
74 ; x8 state
75 ; x9 match (MATCHED) / sym2 / dist2 / lpMask_reg
76 ; x10 kBitModelTotal_reg
77 ; r11 probs
78 ; x12 offs (MATCHED) / dic / len_temp
79 ; x13 processedPos
80 ; x14 bit (MATCHED) / dicPos
81 ; r15 buf
84 cod equ x5
85 cod_L equ x5_L
86 range equ x0
87 state equ x8
88 state_R equ r8
89 buf equ r15
90 processedPos equ x13
91 kBitModelTotal_reg equ x10
93 probBranch equ x2
94 probBranch_R equ r2
95 probBranch_W equ x2_W
97 pbPos equ x1
98 pbPos_R equ r1
100 cnt equ x2
101 cnt_R equ r2
103 lpMask_reg equ x9
104 dicPos equ r14
106 sym equ x3
107 sym_R equ r3
108 sym_L equ x3_L
110 probs equ r11
111 dic equ r12
113 t0 equ x7
114 t0_W equ x7_W
115 t0_R equ r7
117 prob2 equ t0
118 prob2_W equ t0_W
120 t1 equ x6
121 t1_R equ r6
123 probs_state equ t1
124 probs_state_R equ t1_R
126 prm equ r2
127 match equ x9
128 match_R equ r9
129 offs equ x12
130 offs_R equ r12
131 bit equ x14
132 bit_R equ r14
134 sym2 equ x9
135 sym2_R equ r9
137 len_temp equ x12
139 dist equ sym
140 dist2 equ x9
144 kNumBitModelTotalBits equ 11
145 kBitModelTotal equ (1 SHL kNumBitModelTotalBits)
146 kNumMoveBits equ 5
147 kBitModelOffset equ ((1 SHL kNumMoveBits) - 1)
148 kTopValue equ (1 SHL 24)
150 NORM_2 macro
151 ; movzx t0, BYTE PTR [buf]
152 shl cod, 8
153 mov cod_L, BYTE PTR [buf]
154 shl range, 8
155 ; or cod, t0
156 inc buf
157 endm
160 NORM macro
161 cmp range, kTopValue
162 jae SHORT @F
163 NORM_2
165 endm
168 ; ---------- Branch MACROS ----------
170 UPDATE_0 macro probsArray:req, probOffset:req, probDisp:req
171 mov prob2, kBitModelTotal_reg
172 sub prob2, probBranch
173 shr prob2, kNumMoveBits
174 add probBranch, prob2
175 PSTORE probBranch, probOffset * 1 + probsArray + probDisp * PMULT
176 endm
179 UPDATE_1 macro probsArray:req, probOffset:req, probDisp:req
180 sub prob2, range
181 sub cod, range
182 mov range, prob2
183 mov prob2, probBranch
184 shr probBranch, kNumMoveBits
185 sub prob2, probBranch
186 PSTORE prob2, probOffset * 1 + probsArray + probDisp * PMULT
187 endm
190 CMP_COD macro probsArray:req, probOffset:req, probDisp:req
191 PLOAD probBranch, probOffset * 1 + probsArray + probDisp * PMULT
192 NORM
193 mov prob2, range
194 shr range, kNumBitModelTotalBits
195 imul range, probBranch
196 cmp cod, range
197 endm
200 IF_BIT_1_NOUP macro probsArray:req, probOffset:req, probDisp:req, toLabel:req
201 CMP_COD probsArray, probOffset, probDisp
202 jae toLabel
203 endm
206 IF_BIT_1 macro probsArray:req, probOffset:req, probDisp:req, toLabel:req
207 IF_BIT_1_NOUP probsArray, probOffset, probDisp, toLabel
208 UPDATE_0 probsArray, probOffset, probDisp
209 endm
212 IF_BIT_0_NOUP macro probsArray:req, probOffset:req, probDisp:req, toLabel:req
213 CMP_COD probsArray, probOffset, probDisp
214 jb toLabel
215 endm
218 ; ---------- CMOV MACROS ----------
220 NORM_CALC macro prob:req
221 NORM
222 mov t0, range
223 shr range, kNumBitModelTotalBits
224 imul range, prob
225 sub t0, range
226 mov t1, cod
227 sub cod, range
228 endm
231 PUP macro prob:req, probPtr:req
232 sub t0, prob
233 ; only sar works for both 16/32 bit prob modes
234 sar t0, kNumMoveBits
235 add t0, prob
236 PSTORE t0, probPtr
237 endm
240 PUP_SUB macro prob:req, probPtr:req, symSub:req
241 sbb sym, symSub
242 PUP prob, probPtr
243 endm
246 PUP_COD macro prob:req, probPtr:req, symSub:req
247 mov t0, kBitModelOffset
248 cmovb cod, t1
249 mov t1, sym
250 cmovb t0, kBitModelTotal_reg
251 PUP_SUB prob, probPtr, symSub
252 endm
255 BIT_0 macro prob:req, probNext:req
256 PLOAD prob, probs + 1 * PMULT
257 PLOAD probNext, probs + 1 * PMULT_2
259 NORM_CALC prob
261 cmovae range, t0
262 PLOAD t0, probs + 1 * PMULT_2 + PMULT
263 cmovae probNext, t0
264 mov t0, kBitModelOffset
265 cmovb cod, t1
266 cmovb t0, kBitModelTotal_reg
267 mov sym, 2
268 PUP_SUB prob, probs + 1 * PMULT, 0 - 1
269 endm
272 BIT_1 macro prob:req, probNext:req
273 PLOAD probNext, probs + sym_R * PMULT_2
274 add sym, sym
276 NORM_CALC prob
278 cmovae range, t0
279 PLOAD t0, probs + sym_R * PMULT + PMULT
280 cmovae probNext, t0
281 PUP_COD prob, probs + t1_R * PMULT_HALF, 0 - 1
282 endm
285 BIT_2 macro prob:req, symSub:req
286 add sym, sym
288 NORM_CALC prob
290 cmovae range, t0
291 PUP_COD prob, probs + t1_R * PMULT_HALF, symSub
292 endm
295 ; ---------- MATCHED LITERAL ----------
297 LITM_0 macro
298 mov offs, 256 * PMULT
299 shl match, (PSHIFT + 1)
300 mov bit, offs
301 and bit, match
302 PLOAD x1, probs + 256 * PMULT + bit_R * 1 + 1 * PMULT
303 lea prm, [probs + 256 * PMULT + bit_R * 1 + 1 * PMULT]
304 ; lea prm, [probs + 256 * PMULT + 1 * PMULT]
305 ; add prm, bit_R
306 xor offs, bit
307 add match, match
309 NORM_CALC x1
311 cmovae offs, bit
312 mov bit, match
313 cmovae range, t0
314 mov t0, kBitModelOffset
315 cmovb cod, t1
316 cmovb t0, kBitModelTotal_reg
317 mov sym, 0
318 PUP_SUB x1, prm, -2-1
319 endm
322 LITM macro
323 and bit, offs
324 lea prm, [probs + offs_R * 1]
325 add prm, bit_R
326 PLOAD x1, prm + sym_R * PMULT
327 xor offs, bit
328 add sym, sym
329 add match, match
331 NORM_CALC x1
333 cmovae offs, bit
334 mov bit, match
335 cmovae range, t0
336 PUP_COD x1, prm + t1_R * PMULT_HALF, - 1
337 endm
340 LITM_2 macro
341 and bit, offs
342 lea prm, [probs + offs_R * 1]
343 add prm, bit_R
344 PLOAD x1, prm + sym_R * PMULT
345 add sym, sym
347 NORM_CALC x1
349 cmovae range, t0
350 PUP_COD x1, prm + t1_R * PMULT_HALF, 256 - 1
351 endm
354 ; ---------- REVERSE BITS ----------
356 REV_0 macro prob:req, probNext:req
357 ; PLOAD prob, probs + 1 * PMULT
358 ; lea sym2_R, [probs + 2 * PMULT]
359 ; PLOAD probNext, probs + 2 * PMULT
360 PLOAD probNext, sym2_R
362 NORM_CALC prob
364 cmovae range, t0
365 PLOAD t0, probs + 3 * PMULT
366 cmovae probNext, t0
367 cmovb cod, t1
368 mov t0, kBitModelOffset
369 cmovb t0, kBitModelTotal_reg
370 lea t1_R, [probs + 3 * PMULT]
371 cmovae sym2_R, t1_R
372 PUP prob, probs + 1 * PMULT
373 endm
376 REV_1 macro prob:req, probNext:req, step:req
377 add sym2_R, step * PMULT
378 PLOAD probNext, sym2_R
380 NORM_CALC prob
382 cmovae range, t0
383 PLOAD t0, sym2_R + step * PMULT
384 cmovae probNext, t0
385 cmovb cod, t1
386 mov t0, kBitModelOffset
387 cmovb t0, kBitModelTotal_reg
388 lea t1_R, [sym2_R + step * PMULT]
389 cmovae sym2_R, t1_R
390 PUP prob, t1_R - step * PMULT_2
391 endm
394 REV_2 macro prob:req, step:req
395 sub sym2_R, probs
396 shr sym2, PSHIFT
397 or sym, sym2
399 NORM_CALC prob
401 cmovae range, t0
402 lea t0, [sym - step]
403 cmovb sym, t0
404 cmovb cod, t1
405 mov t0, kBitModelOffset
406 cmovb t0, kBitModelTotal_reg
407 PUP prob, probs + sym2_R * PMULT
408 endm
411 REV_1_VAR macro prob:req
412 PLOAD prob, sym_R
413 mov probs, sym_R
414 add sym_R, sym2_R
416 NORM_CALC prob
418 cmovae range, t0
419 lea t0_R, [sym_R + sym2_R]
420 cmovae sym_R, t0_R
421 mov t0, kBitModelOffset
422 cmovb cod, t1
423 ; mov t1, kBitModelTotal
424 ; cmovb t0, t1
425 cmovb t0, kBitModelTotal_reg
426 add sym2, sym2
427 PUP prob, probs
428 endm
433 LIT_PROBS macro lpMaskParam:req
434 ; prob += (UInt32)3 * ((((processedPos << 8) + dic[(dicPos == 0 ? dicBufSize : dicPos) - 1]) & lpMask) << lc);
435 mov t0, processedPos
436 shl t0, 8
437 add sym, t0
438 and sym, lpMaskParam
439 add probs_state_R, pbPos_R
440 mov x1, LOC lc2
441 lea sym, dword ptr[sym_R + 2 * sym_R]
442 add probs, Literal * PMULT
443 shl sym, x1_L
444 add probs, sym_R
445 UPDATE_0 probs_state_R, 0, IsMatch
446 inc processedPos
447 endm
451 kNumPosBitsMax equ 4
452 kNumPosStatesMax equ (1 SHL kNumPosBitsMax)
454 kLenNumLowBits equ 3
455 kLenNumLowSymbols equ (1 SHL kLenNumLowBits)
456 kLenNumHighBits equ 8
457 kLenNumHighSymbols equ (1 SHL kLenNumHighBits)
458 kNumLenProbs equ (2 * kLenNumLowSymbols * kNumPosStatesMax + kLenNumHighSymbols)
460 LenLow equ 0
461 LenChoice equ LenLow
462 LenChoice2 equ (LenLow + kLenNumLowSymbols)
463 LenHigh equ (LenLow + 2 * kLenNumLowSymbols * kNumPosStatesMax)
465 kNumStates equ 12
466 kNumStates2 equ 16
467 kNumLitStates equ 7
469 kStartPosModelIndex equ 4
470 kEndPosModelIndex equ 14
471 kNumFullDistances equ (1 SHL (kEndPosModelIndex SHR 1))
473 kNumPosSlotBits equ 6
474 kNumLenToPosStates equ 4
476 kNumAlignBits equ 4
477 kAlignTableSize equ (1 SHL kNumAlignBits)
479 kMatchMinLen equ 2
480 kMatchSpecLenStart equ (kMatchMinLen + kLenNumLowSymbols * 2 + kLenNumHighSymbols)
482 kStartOffset equ 1664
483 SpecPos equ (-kStartOffset)
484 IsRep0Long equ (SpecPos + kNumFullDistances)
485 RepLenCoder equ (IsRep0Long + (kNumStates2 SHL kNumPosBitsMax))
486 LenCoder equ (RepLenCoder + kNumLenProbs)
487 IsMatch equ (LenCoder + kNumLenProbs)
488 kAlign equ (IsMatch + (kNumStates2 SHL kNumPosBitsMax))
489 IsRep equ (kAlign + kAlignTableSize)
490 IsRepG0 equ (IsRep + kNumStates)
491 IsRepG1 equ (IsRepG0 + kNumStates)
492 IsRepG2 equ (IsRepG1 + kNumStates)
493 PosSlot equ (IsRepG2 + kNumStates)
494 Literal equ (PosSlot + (kNumLenToPosStates SHL kNumPosSlotBits))
495 NUM_BASE_PROBS equ (Literal + kStartOffset)
497 if kAlign ne 0
498 .err <Stop_Compiling_Bad_LZMA_kAlign>
499 endif
501 if NUM_BASE_PROBS ne 1984
502 .err <Stop_Compiling_Bad_LZMA_PROBS>
503 endif
506 PTR_FIELD equ dq ?
508 CLzmaDec_Asm struct
509 lc db ?
510 lp db ?
511 pb db ?
512 _pad_ db ?
513 dicSize dd ?
515 probs_Spec PTR_FIELD
516 probs_1664 PTR_FIELD
517 dic_Spec PTR_FIELD
518 dicBufSize PTR_FIELD
519 dicPos_Spec PTR_FIELD
520 buf_Spec PTR_FIELD
522 range_Spec dd ?
523 code_Spec dd ?
524 processedPos_Spec dd ?
525 checkDicSize dd ?
526 rep0 dd ?
527 rep1 dd ?
528 rep2 dd ?
529 rep3 dd ?
530 state_Spec dd ?
531 remainLen dd ?
532 CLzmaDec_Asm ends
535 CLzmaDec_Asm_Loc struct
536 OLD_RSP PTR_FIELD
537 lzmaPtr PTR_FIELD
538 _pad0_ PTR_FIELD
539 _pad1_ PTR_FIELD
540 _pad2_ PTR_FIELD
541 dicBufSize PTR_FIELD
542 probs_Spec PTR_FIELD
543 dic_Spec PTR_FIELD
545 limit PTR_FIELD
546 bufLimit PTR_FIELD
547 lc2 dd ?
548 lpMask dd ?
549 pbMask dd ?
550 checkDicSize dd ?
552 _pad_ dd ?
553 remainLen dd ?
554 dicPos_Spec PTR_FIELD
555 rep0 dd ?
556 rep1 dd ?
557 rep2 dd ?
558 rep3 dd ?
559 CLzmaDec_Asm_Loc ends
562 GLOB_2 equ [sym_R].CLzmaDec_Asm.
563 GLOB equ [r1].CLzmaDec_Asm.
564 LOC_0 equ [r0].CLzmaDec_Asm_Loc.
565 LOC equ [RSP].CLzmaDec_Asm_Loc.
568 COPY_VAR macro name
569 mov t0, GLOB_2 name
570 mov LOC_0 name, t0
571 endm
574 RESTORE_VAR macro name
575 mov t0, LOC name
576 mov GLOB name, t0
577 endm
581 IsMatchBranch_Pre macro reg
582 ; prob = probs + IsMatch + (state << kNumPosBitsMax) + posState;
583 mov pbPos, LOC pbMask
584 and pbPos, processedPos
585 shl pbPos, (kLenNumLowBits + 1 + PSHIFT)
586 lea probs_state_R, [probs + state_R]
587 endm
590 IsMatchBranch macro reg
591 IsMatchBranch_Pre
592 IF_BIT_1 probs_state_R, pbPos_R, IsMatch, IsMatch_label
593 endm
596 CheckLimits macro reg
597 cmp buf, LOC bufLimit
598 jae fin_OK
599 cmp dicPos, LOC limit
600 jae fin_OK
601 endm
605 ; RSP is (16x + 8) bytes aligned in WIN64-x64
606 ; LocalSize equ ((((SIZEOF CLzmaDec_Asm_Loc) + 7) / 16 * 16) + 8)
608 PARAM_lzma equ REG_PARAM_0
609 PARAM_limit equ REG_PARAM_1
610 PARAM_bufLimit equ REG_PARAM_2
612 ; MY_ALIGN_64
613 MY_PROC LzmaDec_DecodeReal_3, 3
614 MY_PUSH_PRESERVED_REGS
616 lea r0, [RSP - (SIZEOF CLzmaDec_Asm_Loc)]
617 and r0, -128
618 mov r5, RSP
619 mov RSP, r0
620 mov LOC_0 Old_RSP, r5
621 mov LOC_0 lzmaPtr, PARAM_lzma
623 mov LOC_0 remainLen, 0 ; remainLen must be ZERO
625 mov LOC_0 bufLimit, PARAM_bufLimit
626 mov sym_R, PARAM_lzma ; CLzmaDec_Asm_Loc pointer for GLOB_2
627 mov dic, GLOB_2 dic_Spec
628 add PARAM_limit, dic
629 mov LOC_0 limit, PARAM_limit
631 COPY_VAR(rep0)
632 COPY_VAR(rep1)
633 COPY_VAR(rep2)
634 COPY_VAR(rep3)
636 mov dicPos, GLOB_2 dicPos_Spec
637 add dicPos, dic
638 mov LOC_0 dicPos_Spec, dicPos
639 mov LOC_0 dic_Spec, dic
641 mov x1_L, GLOB_2 pb
642 mov t0, 1
643 shl t0, x1_L
644 dec t0
645 mov LOC_0 pbMask, t0
647 ; unsigned pbMask = ((unsigned)1 << (p->prop.pb)) - 1;
648 ; unsigned lc = p->prop.lc;
649 ; unsigned lpMask = ((unsigned)0x100 << p->prop.lp) - ((unsigned)0x100 >> lc);
651 mov x1_L, GLOB_2 lc
652 mov x2, 100h
653 mov t0, x2
654 shr x2, x1_L
655 ; inc x1
656 add x1_L, PSHIFT
657 mov LOC_0 lc2, x1
658 mov x1_L, GLOB_2 lp
659 shl t0, x1_L
660 sub t0, x2
661 mov LOC_0 lpMask, t0
662 mov lpMask_reg, t0
664 ; mov probs, GLOB_2 probs_Spec
665 ; add probs, kStartOffset SHL PSHIFT
666 mov probs, GLOB_2 probs_1664
667 mov LOC_0 probs_Spec, probs
669 mov t0_R, GLOB_2 dicBufSize
670 mov LOC_0 dicBufSize, t0_R
672 mov x1, GLOB_2 checkDicSize
673 mov LOC_0 checkDicSize, x1
675 mov processedPos, GLOB_2 processedPos_Spec
677 mov state, GLOB_2 state_Spec
678 shl state, PSHIFT
680 mov buf, GLOB_2 buf_Spec
681 mov range, GLOB_2 range_Spec
682 mov cod, GLOB_2 code_Spec
683 mov kBitModelTotal_reg, kBitModelTotal
684 xor sym, sym
686 ; if (processedPos != 0 || checkDicSize != 0)
687 or x1, processedPos
688 jz @f
690 add t0_R, dic
691 cmp dicPos, dic
692 cmovnz t0_R, dicPos
693 movzx sym, byte ptr[t0_R - 1]
696 IsMatchBranch_Pre
697 cmp state, 4 * PMULT
698 jb lit_end
699 cmp state, kNumLitStates * PMULT
700 jb lit_matched_end
701 jmp lz_end
706 ; ---------- LITERAL ----------
707 MY_ALIGN_64
708 lit_start:
709 xor state, state
710 lit_start_2:
711 LIT_PROBS lpMask_reg
713 ifdef _LZMA_SIZE_OPT
715 PLOAD x1, probs + 1 * PMULT
716 mov sym, 1
717 MY_ALIGN_16
718 lit_loop:
719 BIT_1 x1, x2
720 mov x1, x2
721 cmp sym, 127
722 jbe lit_loop
724 else
726 BIT_0 x1, x2
727 BIT_1 x2, x1
728 BIT_1 x1, x2
729 BIT_1 x2, x1
730 BIT_1 x1, x2
731 BIT_1 x2, x1
732 BIT_1 x1, x2
734 endif
736 BIT_2 x2, 256 - 1
738 ; mov dic, LOC dic_Spec
739 mov probs, LOC probs_Spec
740 IsMatchBranch_Pre
741 mov byte ptr[dicPos], sym_L
742 inc dicPos
744 CheckLimits
745 lit_end:
746 IF_BIT_0_NOUP probs_state_R, pbPos_R, IsMatch, lit_start
748 ; jmp IsMatch_label
750 ; ---------- MATCHES ----------
751 ; MY_ALIGN_32
752 IsMatch_label:
753 UPDATE_1 probs_state_R, pbPos_R, IsMatch
754 IF_BIT_1 probs_state_R, 0, IsRep, IsRep_label
756 add probs, LenCoder * PMULT
757 add state, kNumStates * PMULT
759 ; ---------- LEN DECODE ----------
760 len_decode:
761 mov len_temp, 8 - 1 - kMatchMinLen
762 IF_BIT_0_NOUP probs, 0, 0, len_mid_0
763 UPDATE_1 probs, 0, 0
764 add probs, (1 SHL (kLenNumLowBits + PSHIFT))
765 mov len_temp, -1 - kMatchMinLen
766 IF_BIT_0_NOUP probs, 0, 0, len_mid_0
767 UPDATE_1 probs, 0, 0
768 add probs, LenHigh * PMULT - (1 SHL (kLenNumLowBits + PSHIFT))
769 mov sym, 1
770 PLOAD x1, probs + 1 * PMULT
772 MY_ALIGN_32
773 len8_loop:
774 BIT_1 x1, x2
775 mov x1, x2
776 cmp sym, 64
777 jb len8_loop
779 mov len_temp, (kLenNumHighSymbols - kLenNumLowSymbols * 2) - 1 - kMatchMinLen
780 jmp len_mid_2
782 MY_ALIGN_32
783 len_mid_0:
784 UPDATE_0 probs, 0, 0
785 add probs, pbPos_R
786 BIT_0 x2, x1
787 len_mid_2:
788 BIT_1 x1, x2
789 BIT_2 x2, len_temp
790 mov probs, LOC probs_Spec
791 cmp state, kNumStates * PMULT
792 jb copy_match
795 ; ---------- DECODE DISTANCE ----------
796 ; probs + PosSlot + ((len < kNumLenToPosStates ? len : kNumLenToPosStates - 1) << kNumPosSlotBits);
798 mov t0, 3 + kMatchMinLen
799 cmp sym, 3 + kMatchMinLen
800 cmovb t0, sym
801 add probs, PosSlot * PMULT - (kMatchMinLen SHL (kNumPosSlotBits + PSHIFT))
802 shl t0, (kNumPosSlotBits + PSHIFT)
803 add probs, t0_R
805 ; sym = Len
806 ; mov LOC remainLen, sym
807 mov len_temp, sym
809 ifdef _LZMA_SIZE_OPT
811 PLOAD x1, probs + 1 * PMULT
812 mov sym, 1
813 MY_ALIGN_16
814 slot_loop:
815 BIT_1 x1, x2
816 mov x1, x2
817 cmp sym, 32
818 jb slot_loop
820 else
822 BIT_0 x1, x2
823 BIT_1 x2, x1
824 BIT_1 x1, x2
825 BIT_1 x2, x1
826 BIT_1 x1, x2
828 endif
830 mov x1, sym
831 BIT_2 x2, 64-1
833 and sym, 3
834 mov probs, LOC probs_Spec
835 cmp x1, 32 + kEndPosModelIndex / 2
836 jb short_dist
838 ; unsigned numDirectBits = (unsigned)(((distance >> 1) - 1));
839 sub x1, (32 + 1 + kNumAlignBits)
840 ; distance = (2 | (distance & 1));
841 or sym, 2
842 PLOAD x2, probs + 1 * PMULT
843 shl sym, kNumAlignBits + 1
844 lea sym2_R, [probs + 2 * PMULT]
846 jmp direct_norm
847 ; lea t1, [sym_R + (1 SHL kNumAlignBits)]
848 ; cmp range, kTopValue
849 ; jb direct_norm
851 ; ---------- DIRECT DISTANCE ----------
852 MY_ALIGN_32
853 direct_loop:
854 shr range, 1
855 mov t0, cod
856 sub cod, range
857 cmovs cod, t0
858 cmovns sym, t1
860 comment ~
861 sub cod, range
862 mov x2, cod
863 sar x2, 31
864 lea sym, dword ptr [r2 + sym_R * 2 + 1]
865 and x2, range
866 add cod, x2
868 dec x1
869 je direct_end
871 add sym, sym
872 direct_norm:
873 lea t1, [sym_R + (1 SHL kNumAlignBits)]
874 cmp range, kTopValue
875 jae near ptr direct_loop
876 ; we align for 32 here with "near ptr" command above
877 NORM_2
878 jmp direct_loop
880 MY_ALIGN_32
881 direct_end:
882 ; prob = + kAlign;
883 ; distance <<= kNumAlignBits;
884 REV_0 x2, x1
885 REV_1 x1, x2, 2
886 REV_1 x2, x1, 4
887 REV_2 x1, 8
889 decode_dist_end:
891 ; if (distance >= (checkDicSize == 0 ? processedPos: checkDicSize))
893 mov t0, LOC checkDicSize
894 test t0, t0
895 cmove t0, processedPos
896 cmp sym, t0
897 jae end_of_payload
899 ; rep3 = rep2;
900 ; rep2 = rep1;
901 ; rep1 = rep0;
902 ; rep0 = distance + 1;
904 inc sym
905 mov t0, LOC rep0
906 mov t1, LOC rep1
907 mov x1, LOC rep2
908 mov LOC rep0, sym
909 ; mov sym, LOC remainLen
910 mov sym, len_temp
911 mov LOC rep1, t0
912 mov LOC rep2, t1
913 mov LOC rep3, x1
915 ; state = (state < kNumStates + kNumLitStates) ? kNumLitStates : kNumLitStates + 3;
916 cmp state, (kNumStates + kNumLitStates) * PMULT
917 mov state, kNumLitStates * PMULT
918 mov t0, (kNumLitStates + 3) * PMULT
919 cmovae state, t0
922 ; ---------- COPY MATCH ----------
923 copy_match:
925 ; len += kMatchMinLen;
926 ; add sym, kMatchMinLen
928 ; if ((rem = limit - dicPos) == 0)
930 ; p->dicPos = dicPos;
931 ; return SZ_ERROR_DATA;
933 mov cnt_R, LOC limit
934 sub cnt_R, dicPos
935 jz fin_ERROR
937 ; curLen = ((rem < len) ? (unsigned)rem : len);
938 cmp cnt_R, sym_R
939 ; cmovae cnt_R, sym_R ; 64-bit
940 cmovae cnt, sym ; 32-bit
942 mov dic, LOC dic_Spec
943 mov x1, LOC rep0
945 mov t0_R, dicPos
946 add dicPos, cnt_R
947 ; processedPos += curLen;
948 add processedPos, cnt
949 ; len -= curLen;
950 sub sym, cnt
951 mov LOC remainLen, sym
953 sub t0_R, dic
955 ; pos = dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0);
956 sub t0_R, r1
957 jae @f
959 mov r1, LOC dicBufSize
960 add t0_R, r1
961 sub r1, t0_R
962 cmp cnt_R, r1
963 ja copy_match_cross
965 ; if (curLen <= dicBufSize - pos)
967 ; ---------- COPY MATCH FAST ----------
968 ; Byte *dest = dic + dicPos;
969 ; mov r1, dic
970 ; ptrdiff_t src = (ptrdiff_t)pos - (ptrdiff_t)dicPos;
971 ; sub t0_R, dicPos
972 ; dicPos += curLen;
974 ; const Byte *lim = dest + curLen;
975 add t0_R, dic
976 movzx sym, byte ptr[t0_R]
977 add t0_R, cnt_R
978 neg cnt_R
979 ; lea r1, [dicPos - 1]
980 copy_common:
981 dec dicPos
982 ; cmp LOC rep0, 1
983 ; je rep0Label
985 ; t0_R - src_lim
986 ; r1 - dest_lim - 1
987 ; cnt_R - (-cnt)
989 IsMatchBranch_Pre
990 inc cnt_R
991 jz copy_end
992 MY_ALIGN_16
994 mov byte ptr[cnt_R * 1 + dicPos], sym_L
995 movzx sym, byte ptr[cnt_R * 1 + t0_R]
996 inc cnt_R
997 jnz @b
999 copy_end:
1000 lz_end_match:
1001 mov byte ptr[dicPos], sym_L
1002 inc dicPos
1004 ; IsMatchBranch_Pre
1005 CheckLimits
1006 lz_end:
1007 IF_BIT_1_NOUP probs_state_R, pbPos_R, IsMatch, IsMatch_label
1011 ; ---------- LITERAL MATCHED ----------
1013 LIT_PROBS LOC lpMask
1015 ; matchByte = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)];
1016 mov x1, LOC rep0
1017 ; mov dic, LOC dic_Spec
1018 mov LOC dicPos_Spec, dicPos
1020 ; state -= (state < 10) ? 3 : 6;
1021 lea t0, [state_R - 6 * PMULT]
1022 sub state, 3 * PMULT
1023 cmp state, 7 * PMULT
1024 cmovae state, t0
1026 sub dicPos, dic
1027 sub dicPos, r1
1028 jae @f
1029 add dicPos, LOC dicBufSize
1031 comment ~
1032 xor t0, t0
1033 sub dicPos, r1
1034 cmovb t0_R, LOC dicBufSize
1037 movzx match, byte ptr[dic + dicPos * 1]
1039 ifdef _LZMA_SIZE_OPT
1041 mov offs, 256 * PMULT
1042 shl match, (PSHIFT + 1)
1043 mov bit, match
1044 mov sym, 1
1045 MY_ALIGN_16
1046 litm_loop:
1047 LITM
1048 cmp sym, 256
1049 jb litm_loop
1050 sub sym, 256
1052 else
1054 LITM_0
1055 LITM
1056 LITM
1057 LITM
1058 LITM
1059 LITM
1060 LITM
1061 LITM_2
1063 endif
1065 mov probs, LOC probs_Spec
1066 IsMatchBranch_Pre
1067 ; mov dic, LOC dic_Spec
1068 mov dicPos, LOC dicPos_Spec
1069 mov byte ptr[dicPos], sym_L
1070 inc dicPos
1072 CheckLimits
1073 lit_matched_end:
1074 IF_BIT_1_NOUP probs_state_R, pbPos_R, IsMatch, IsMatch_label
1075 ; IsMatchBranch
1076 mov lpMask_reg, LOC lpMask
1077 sub state, 3 * PMULT
1078 jmp lit_start_2
1082 ; ---------- REP 0 LITERAL ----------
1083 MY_ALIGN_32
1084 IsRep0Short_label:
1085 UPDATE_0 probs_state_R, pbPos_R, IsRep0Long
1087 ; dic[dicPos] = dic[dicPos - rep0 + (dicPos < rep0 ? dicBufSize : 0)];
1088 mov dic, LOC dic_Spec
1089 mov t0_R, dicPos
1090 mov probBranch, LOC rep0
1091 sub t0_R, dic
1093 sub probs, RepLenCoder * PMULT
1094 inc processedPos
1095 ; state = state < kNumLitStates ? 9 : 11;
1096 or state, 1 * PMULT
1097 IsMatchBranch_Pre
1099 sub t0_R, probBranch_R
1100 jae @f
1101 add t0_R, LOC dicBufSize
1103 movzx sym, byte ptr[dic + t0_R * 1]
1104 jmp lz_end_match
1107 MY_ALIGN_32
1108 IsRep_label:
1109 UPDATE_1 probs_state_R, 0, IsRep
1111 ; The (checkDicSize == 0 && processedPos == 0) case was checked before in LzmaDec.c with kBadRepCode.
1112 ; So we don't check it here.
1114 ; mov t0, processedPos
1115 ; or t0, LOC checkDicSize
1116 ; jz fin_ERROR_2
1118 ; state = state < kNumLitStates ? 8 : 11;
1119 cmp state, kNumLitStates * PMULT
1120 mov state, 8 * PMULT
1121 mov probBranch, 11 * PMULT
1122 cmovae state, probBranch
1124 ; prob = probs + RepLenCoder;
1125 add probs, RepLenCoder * PMULT
1127 IF_BIT_1 probs_state_R, 0, IsRepG0, IsRepG0_label
1128 IF_BIT_0_NOUP probs_state_R, pbPos_R, IsRep0Long, IsRep0Short_label
1129 UPDATE_1 probs_state_R, pbPos_R, IsRep0Long
1130 jmp len_decode
1132 MY_ALIGN_32
1133 IsRepG0_label:
1134 UPDATE_1 probs_state_R, 0, IsRepG0
1135 mov dist2, LOC rep0
1136 mov dist, LOC rep1
1137 mov LOC rep1, dist2
1139 IF_BIT_1 probs_state_R, 0, IsRepG1, IsRepG1_label
1140 mov LOC rep0, dist
1141 jmp len_decode
1143 ; MY_ALIGN_32
1144 IsRepG1_label:
1145 UPDATE_1 probs_state_R, 0, IsRepG1
1146 mov dist2, LOC rep2
1147 mov LOC rep2, dist
1149 IF_BIT_1 probs_state_R, 0, IsRepG2, IsRepG2_label
1150 mov LOC rep0, dist2
1151 jmp len_decode
1153 ; MY_ALIGN_32
1154 IsRepG2_label:
1155 UPDATE_1 probs_state_R, 0, IsRepG2
1156 mov dist, LOC rep3
1157 mov LOC rep3, dist2
1158 mov LOC rep0, dist
1159 jmp len_decode
1163 ; ---------- SPEC SHORT DISTANCE ----------
1165 MY_ALIGN_32
1166 short_dist:
1167 sub x1, 32 + 1
1168 jbe decode_dist_end
1169 or sym, 2
1170 shl sym, x1_L
1171 lea sym_R, [probs + sym_R * PMULT + SpecPos * PMULT + 1 * PMULT]
1172 mov sym2, PMULT ; step
1173 MY_ALIGN_32
1174 spec_loop:
1175 REV_1_VAR x2
1176 dec x1
1177 jnz spec_loop
1179 mov probs, LOC probs_Spec
1180 sub sym, sym2
1181 sub sym, SpecPos * PMULT
1182 sub sym_R, probs
1183 shr sym, PSHIFT
1185 jmp decode_dist_end
1188 ; ---------- COPY MATCH CROSS ----------
1189 copy_match_cross:
1190 ; t0_R - src pos
1191 ; r1 - len to dicBufSize
1192 ; cnt_R - total copy len
1194 mov t1_R, t0_R ; srcPos
1195 mov t0_R, dic
1196 mov r1, LOC dicBufSize ;
1197 neg cnt_R
1199 movzx sym, byte ptr[t1_R * 1 + t0_R]
1200 inc t1_R
1201 mov byte ptr[cnt_R * 1 + dicPos], sym_L
1202 inc cnt_R
1203 cmp t1_R, r1
1204 jne @b
1206 movzx sym, byte ptr[t0_R]
1207 sub t0_R, cnt_R
1208 jmp copy_common
1213 fin_ERROR:
1214 mov LOC remainLen, len_temp
1215 ; fin_ERROR_2:
1216 mov sym, 1
1217 jmp fin
1219 end_of_payload:
1220 cmp sym, 0FFFFFFFFh ; -1
1221 jne fin_ERROR
1223 mov LOC remainLen, kMatchSpecLenStart
1224 sub state, kNumStates * PMULT
1226 fin_OK:
1227 xor sym, sym
1229 fin:
1230 NORM
1232 mov r1, LOC lzmaPtr
1234 sub dicPos, LOC dic_Spec
1235 mov GLOB dicPos_Spec, dicPos
1236 mov GLOB buf_Spec, buf
1237 mov GLOB range_Spec, range
1238 mov GLOB code_Spec, cod
1239 shr state, PSHIFT
1240 mov GLOB state_Spec, state
1241 mov GLOB processedPos_Spec, processedPos
1243 RESTORE_VAR(remainLen)
1244 RESTORE_VAR(rep0)
1245 RESTORE_VAR(rep1)
1246 RESTORE_VAR(rep2)
1247 RESTORE_VAR(rep3)
1249 mov x0, sym
1251 mov RSP, LOC Old_RSP
1253 MY_POP_PRESERVED_REGS
1254 MY_ENDP
1256 _TEXT$LZMADECOPT ENDS