1 /* match.s -- optional optimized asm version of longest match in deflate.c
3 Copyright (C) 2002, 2006, 2009-2024 Free Software Foundation, Inc.
4 Copyright (C) 1992-1993 Jean-loup Gailly
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3, or (at your option)
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software Foundation,
18 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
21 * The 68020 version has been written by Francesco Potortì <pot@cnuce.cnr.it>
22 * with adaptations by Carsten Steger <stegerc@informatik.tu-muenchen.de>,
23 * Andreas Schwab <schwab@lamothe.informatik.uni-dortmund.de> and
24 * Kristoffer Eriksson <ske@pkmab.se>
26 * The ia64 version has been written by Sverre Jarp (HP Labs) 2001-2002.
27 * Unwind directives and some reformatting for better readability added by
28 * David Mosberger-Tang <davidm@hpl.hp.com>.
31 /* Preprocess with -DNO_UNDERLINE if your C compiler does not prefix
32 * external symbols with an underline character '_'.
36 # define _window window
37 # define _match_start match_start
38 # define _prev_length prev_length
39 # define _good_match good_match
40 # define _nice_match nice_match
41 # define _strstart strstart
42 # define _max_chain_length max_chain_length
44 # define _match_init match_init
45 # define _longest_match longest_match
49 error
: DYN_ALLOC
not yet supported in match
.s
52 /* On x86-64, Sun C 5.13 (Oracle Solaris Studio 12.4) 'cc -E -m64'
53 defines i386 when compiling .s or .S files! Luckily it also
54 defines __x86_64__. See Bug#23133. */
55 #if ((defined i386 || defined _I386 || defined __i386 || defined __i386__) \
56 && !defined __x86_64__)
58 /* This version is for 386 Unix or OS/2 in 32 bit mode.
59 * Warning: it uses the AT&T syntax: mov source,dest
60 * This file is only optional. If you want to force the C version,
61 * add -DNO_ASM to CFLAGS in Makefile and set OBJA to an empty string.
62 * If you have reduced WSIZE in gzip.h, then change its value below.
63 * This version assumes static allocation of the arrays (-DDYN_ALLOC not used).
69 #define MAX_MATCH2 $128 /* MAX_MATCH/2-1 */
72 #define MAX_DIST WSIZE - MAX_MATCH - MIN_MATCH - 1
82 /*-----------------------------------------------------------------------
83 * Set match_start to the longest match starting at the given string and
84 * return its length. Matches shorter or equal to prev_length are discarded,
85 * in which case the result is equal to prev_length and match_start is
87 * IN assertions: cur_match is the head of the hash chain for the current
88 * string (strstart) and its distance is <= MAX_DIST, and prev_length >= 1
91 _longest_match
: /* int longest_match(cur_match) */
93 #define cur_match 20(%esp)
94 /* return address */ /* esp+16 */
95 push
%ebp
/* esp+12 */
103 * chain_length equ ebp
108 mov _max_chain_length
,%ebp
/* chain_length = max_chain_length */
111 sub MAX_DIST
,%edx
/* limit = strstart-MAX_DIST */
113 sub
%edx
,%edx
/* limit = NIL */
115 add $
2+_window
,%edi
/* edi = offset(window+strstart+2) */
116 mov _prev_length
,%ebx
/* best_len = prev_length */
117 movw
-3(%ebx
,%edi
),%ax
/* ax = scan[best_len-1..best_len] */
118 movw
-2(%edi
),%cx
/* cx = scan[0..1] */
119 cmp _good_match
,%ebx
/* do we have a good match already? */
121 shr $
2,%ebp
/* chain_length >>= 2 */
126 /* at this point, edi == scan+2, esi == cur_match */
127 movw
-3(%ebx
,%edi
),%ax
/* ax = scan[best_len-1..best_len] */
128 movw
-2(%edi
),%cx
/* cx = scan[0..1] */
131 * at this point, di == scan+2, si == cur_match,
132 * ax = scan[best_len-1..best_len] and cx = scan[0..1]
135 movw
_prev(%esi
,%esi
),%si
/* cur_match = prev[cur_match] */
136 /* top word of esi is still 0 */
137 cmp
%edx
,%esi
/* cur_match <= limit ? */
139 dec
%ebp
/* --chain_length */
142 cmpw _window
-1(%ebx
,%esi
),%ax
/* check match at best_len-1 */
144 cmpw
_window(%esi
),%cx
/* check min_match_length match */
147 lea _window
+2(%esi
),%esi
/* si = match */
148 mov
%edi
,%eax
/* ax = scan+2 */
149 mov MAX_MATCH2
,%ecx
/* scan for at most MAX_MATCH bytes */
150 rep
; cmpsw
/* loop until mismatch */
151 je maxmatch
/* match of length MAX_MATCH? */
153 movb
-2(%edi
),%cl
/* mismatch on first or second byte? */
154 subb
-2(%esi
),%cl
/* cl = 0 if first bytes equal */
155 xchg
%edi
,%eax
/* edi = scan+2, eax = end of scan */
156 sub
%edi
,%eax
/* eax = len */
157 sub
%eax
,%esi
/* esi = cur_match + 2 + offset(window) */
158 sub $
2+_window
,%esi
/* esi = cur_match */
159 subb $
1,%cl
/* set carry if cl == 0 (cannot use DEC) */
160 adc $
0,%eax
/* eax = carry ? len+1 : len */
161 cmp
%ebx
,%eax
/* len > best_len ? */
163 mov
%esi
,_match_start
/* match_start = cur_match */
164 mov
%eax
,%ebx
/* ebx = best_len = len */
165 cmp _nice_match
,%eax
/* len >= nice_match ? */
168 mov
%ebx
,%eax
/* result = eax = best_len */
180 /* ======================== 680x0 version ================================= */
182 #if defined(m68k)||defined(mc68k)||defined(__mc68000__)||defined(__MC68000__)
188 #if defined(__mc68020__) || defined(__MC68020__) || defined(sysV68)
194 #if defined(mc68020) || defined(mc68000)
196 #if (defined(mc68020) || defined(NeXT)) && !defined(UNALIGNED_OK)
197 # define UNALIGNED_OK
200 #ifdef sysV68 /* Try Motorola Delta style */
202 # define GLOBAL(symbol) global symbol
204 # define FILE(filename) file filename
205 # define invert_maybe(src,dst) dst,src
206 # define imm(data) &data
207 # define reg(register) %register
210 # define addql addq.l
215 # define cmpmb cmpm.b
220 # define movel move.l
221 # define movew move.w
222 # define moveb move.b
223 # define moveml movem.l
226 # define subql subq.l
228 # define IndBase(bd,An) (bd,An)
229 # define IndBaseNdxl(bd,An,Xn) (bd,An,Xn.l)
230 # define IndBaseNdxw(bd,An,Xn) (bd,An,Xn.w)
231 # define predec(An) -(An)
232 # define postinc(An) (An)+
234 #else /* default style (Sun 3, NeXT, Atari) */
236 # define GLOBAL(symbol) .globl symbol
238 # define FILE(filename) .even
239 # define invert_maybe(src,dst) src,dst
240 # if defined(sun) || defined(mc68k)
241 # define imm(data) #data
243 # define imm(data) \#data
245 # define reg(register) register
248 # if defined(sun) || defined(mc68k)
253 # define IndBase(bd,An) An@(bd)
254 # define IndBaseNdxl(bd,An,Xn) An@(bd,Xn:l)
255 # define IndBaseNdxw(bd,An,Xn) An@(bd,Xn:w)
256 # define predec(An) An@-
257 # define postinc(An) An@+
261 #define Best_Len reg(d0) /* unsigned */
262 #define Cur_Match reg(d1) /* Ipos */
263 #define Loop_Counter reg(d2) /* int */
264 #define Scan_Start reg(d3) /* unsigned short */
265 #define Scan_End reg(d4) /* unsigned short */
266 #define Limit reg(d5) /* IPos */
267 #define Chain_Length reg(d6) /* unsigned */
268 #define Scan_Test reg(d7)
269 #define Scan reg(a0) /* *uch */
270 #define Match reg(a1) /* *uch */
271 #define Prev_Address reg(a2) /* *Pos */
272 #define Scan_Ini reg(a3) /* *uch */
273 #define Match_Ini reg(a4) /* *uch */
274 #define Stack_Pointer reg(sp)
276 #define MAX_MATCH 258
279 #define MAX_DIST (WSIZE - MAX_MATCH - MIN_MATCH - 1)
282 GLOBAL (_longest_match
)
291 /*-----------------------------------------------------------------------
292 * Set match_start to the longest match starting at the given string and
293 * return its length. Matches shorter or equal to prev_length are discarded,
294 * in which case the result is equal to prev_length and match_start is
296 * IN assertions: cur_match is the head of the hash chain for the current
297 * string (strstart) and its distance is <= MAX_DIST, and prev_length >= 1
300 /* int longest_match (cur_match) */
303 # define pushreg 15928 /* d2-d6/a2-a4 */
306 # define pushreg 16184 /* d2-d7/a2-a4 */
311 movel
IndBase(4,Stack_Pointer
),Cur_Match
312 moveml
imm(pushreg
),predec(Stack_Pointer
)
313 movel _max_chain_length
,Chain_Length
314 movel _prev_length
,Best_Len
315 movel
imm(_prev
),Prev_Address
316 movel
imm(_window
+MIN_MATCH
),Match_Ini
317 movel _strstart
,Limit
318 movel Match_Ini
,Scan_Ini
320 subw
imm(MAX_DIST
),Limit
324 cmpl
invert_maybe(_good_match
,Best_Len
)
326 lsrl
imm(2),Chain_Length
328 subql
imm(1),Chain_Length
330 movew
IndBase(-MIN_MATCH
,Scan_Ini
),Scan_Start
331 movew
IndBaseNdxw(-MIN_MATCH
-1,Scan_Ini
,Best_Len
),Scan_End
333 moveb
IndBase(-MIN_MATCH
,Scan_Ini
),Scan_Start
334 lslw
imm(8),Scan_Start
335 moveb
IndBase(-MIN_MATCH
+1,Scan_Ini
),Scan_Start
336 moveb
IndBaseNdxw(-MIN_MATCH
-1,Scan_Ini
,Best_Len
),Scan_End
338 moveb
IndBaseNdxw(-MIN_MATCH
,Scan_Ini
,Best_Len
),Scan_End
344 movew
IndBaseNdxw(-MIN_MATCH
-1,Scan_Ini
,Best_Len
),Scan_End
346 moveb
IndBaseNdxw(-MIN_MATCH
-1,Scan_Ini
,Best_Len
),Scan_End
348 moveb
IndBaseNdxw(-MIN_MATCH
,Scan_Ini
,Best_Len
),Scan_End
352 lslw
imm(1),Cur_Match
353 movew
IndBaseNdxl(0,Prev_Address
,Cur_Match
),Cur_Match
354 cmpw
invert_maybe(Limit
,Cur_Match
)
355 dbls Chain_Length
,L__do_scan
359 movel Match_Ini
,Match
362 cmpw
invert_maybe(IndBaseNdxw(-MIN_MATCH
-1,Match
,Best_Len
),Scan_End
)
364 cmpw
invert_maybe(IndBase(-MIN_MATCH
,Match
),Scan_Start
)
367 moveb
IndBaseNdxw(-MIN_MATCH
-1,Match
,Best_Len
),Scan_Test
368 lslw
imm(8),Scan_Test
369 moveb
IndBaseNdxw(-MIN_MATCH
,Match
,Best_Len
),Scan_Test
370 cmpw
invert_maybe(Scan_Test
,Scan_End
)
372 moveb
IndBase(-MIN_MATCH
,Match
),Scan_Test
373 lslw
imm(8),Scan_Test
374 moveb
IndBase(-MIN_MATCH
+1,Match
),Scan_Test
375 cmpw
invert_maybe(Scan_Test
,Scan_Start
)
379 movew
imm((MAX_MATCH
-MIN_MATCH
+1)-1),Loop_Counter
382 cmpmb
postinc(Match
),postinc(Scan
)
383 dbne Loop_Counter
,L__scan_loop
386 addql
imm(MIN_MATCH
-1),Scan
387 cmpl
invert_maybe(Best_Len
,Scan
)
390 movel Cur_Match
,_match_start
391 cmpl
invert_maybe(_nice_match
,Best_Len
)
394 moveml
postinc(Stack_Pointer
),imm(popreg
)
399 # if defined (__ia64__)
401 /* ======================== ia64 version ================================= */
404 * 'longest_match.S' (assembly program for gzip for the IA-64 architecture)
406 * Optimized for McKinley, but with Merced-compatibility, such as
407 * MIB+MIB, used wherever possible.
409 * Copyright: Sverre Jarp (HP Labs) 2001-2002
411 * See deflate.c for c-version
412 * Version 2 - Optimize the outer loop
417 #if __BYTE_ORDER == ____BIG_ENDIAN
427 // 24 rotating register (r32 - r55)
429 #define s_vmatch0 r32
430 #define s_vmatch1 r33
431 #define s_vmatbst r34
432 #define s_vmatbst1 r35
433 #define s_amatblen r36
445 #define s_vstrstart r30
446 #define s_vchainlen r29
447 #define s_awinbest r28
448 #define s_vcurmatch r27
450 #define s_vscanend r25
451 #define s_vscanend1 r24
452 #define s_anicematch r23
457 #define s_awindow r19
458 #define s_amatchstart r18
462 #define s_ascanend r14
464 #define s_vspec_cmatch r11 // next iteration
467 #define s_vbestlen r8 // return register
475 #define p_bn2 p5 // Use in loop (indicating bestlen != 2)
477 #define p_nbs p9 // not new best_len
478 #define p_nnc p10 // not nice_length
482 #define MAX_MATCH 258
485 #define MAX_DIST WSIZE - MAX_MATCH - MIN_MATCH - 1
490 #define R_ROTATING 24
496 #define cgtu cmp.gt.unc
497 #define cgeu cmp.ge.unc
498 #define cneu cmp.ne.unc
500 .global longest_match
507 alloc r2
=ar
.pfs
,R_INPUT
,R_LOCAL
,R_OUTPUT
,R_ROTATING
508 .rotr scan
[MLAT
+2], match
[MLAT
+2], shscan0
[SHLAT
+1], \
509 shscan1
[SHLAT
+1], shmatch0
[SHLAT
+1], shmatch1
[SHLAT
+1]
510 .rotp lc
[MLAT
+SHLAT
+2]
511 mova s_vspec_cmatch
=in0
// cur_match from input register
512 add s_tm1
=@
gprel(strstart
),gp
// a(a(strstart))
514 add s_tm3
=@
gprel(prev_length
),gp
// a(a(prev_length))
515 add s_tm5
=@
ltoff(window
),gp
// a(a(window))
516 add s_tm6
=@
ltoff(prev
),gp
// a(a(prev))
519 ld4 s_vstrstart
=[s_tm1
] // strstart
520 ld4 s_vbestlen
=[s_tm3
] // best_len = prev_length
521 brp
.loop
.imp
.cmploop
,.cmploop
+48
523 add s_tm2
=@
gprel(max_chain_length
),gp
// a(a(max_chain_length))
527 ld8 s_aprev
=[s_tm6
] // a(prev)
528 ld8 s_awindow
=[s_tm5
] // a(window)
530 movi0 s_prsave
=pr
// save predicates
532 add s_tm4
=@
gprel(good_match
),gp
// a(a(good_match))
533 add s_tm7
=@
ltoff(nice_match
),gp
// a(a(nice_match))
534 add s_tm8
=@
ltoff(match_start
),gp
// a(match_start)
537 ld8 s_anicematch
=[s_tm7
] // a(nice_match)
538 ld8 s_amatchstart
=[s_tm8
] // a(match_start)
539 .save ar
.lc
, s_lcsave
540 movi0 s_lcsave
=ar
.lc
// save loop count register
543 add s_tm1
=-(MAX_MATCH
+ MIN_MATCH
),s_wmask
// maxdist
544 cmp
.eq p_ll
,p0
=r0
,r0
// parallel compare initialized as 'true'
545 mova s_vcurmatch
=s_vspec_cmatch
548 ld4 s_vchainlen
=[s_tm2
] // chain_length=max_chain_length
549 ld4 s_tm4
=[s_tm4
] // v(good_match)
550 add s_ascan
=s_awindow
,s_vstrstart
// scan=window + strstart
552 sub s_vlimit
=s_vstrstart
, s_tm1
// limit=strstart - MAX_DIST
553 add s_amatch
=s_awindow
,s_vspec_cmatch
// match=window + cur_match
554 and s_vspec_cmatch
=s_vspec_cmatch
,s_wmask
557 add s_amatblen
=s_amatch
,s_vbestlen
//
558 cneu p_bn2
,p0
=2,s_vbestlen
// set if bestlen != 2
559 add s_ascanend
=s_ascan
,s_vbestlen
// compute a(scan) + best_len
561 ld1 s_vscan0
=[s_ascan
],1 // NB: s_ascan++
562 ld1 s_vmatch0
=[s_amatch
],1
563 cgtu p0
,p_no
=s_vlimit
,r0
// is result positive ?
566 ld1
.nt1 s_vscan1
=[s_ascan
],2 // NB: s_ascan+3 in total
567 ld1
.nt1 s_vmatch1
=[s_amatch
],2
568 add s_awinbest
=s_awindow
,s_vbestlen
//
571 ld1
.nt1 s_vscanend
=[s_ascanend
],-1 // scan_end=scan[best_len]
572 ld1
.nt1 s_vmatbst
=[s_amatblen
],-1
573 (p_no
) mova s_vlimit
=r0
576 (p_bn2
) ld1
.nt1 s_vscanend1
=[s_ascanend
],1 // scan_end1=scan[best_len-1]
577 (p_bn2
) ld1
.nt1 s_vmatbst1
=[s_amatblen
]
578 shladd s_vspec_cmatch
=s_vspec_cmatch
,1,s_aprev
580 cgeu p_shf
,p0
=s_vbestlen
,s_tm4
// is (prev_length >= good_match) ?
583 ld1
.nt1 s_vscan3
=[s_ascan
]
584 ld2
.nt1 s_vspec_cmatch
=[s_vspec_cmatch
]
587 (p_shf
) shr
.u s_vchainlen
=s_vchainlen
,2 // (cur_len) >> 2
590 ld1
.nt1 s_vmatch3
=[s_amatch
]
591 // p_ll switched on as soon as we get a mismatch:
592 cmp
.eq
.and p_ll
,p0
=s_vmatch0
,s_vscan0
593 cmp
.eq
.and p_ll
,p0
=s_vmatbst
,s_vscanend
595 cmp
.eq
.and p_ll
,p0
=s_vmatch1
,s_vscan1
596 (p_bn2
) cmp
.eq
.and p_ll
,p0
=s_vmatbst1
,s_vscanend1
597 (p_ll
) br
.cond
.dpnt
.many
.test_more
603 add s_amatch
=s_awindow
,s_vspec_cmatch
// match=window + cur_match
604 mov s_vcurmatch
=s_vspec_cmatch
// current value
605 add s_vchainlen
=-1,s_vchainlen
// --chain_length
607 cmp
.le
.unc p_end
,p0
=s_vspec_cmatch
,s_vlimit
608 and s_vspec_cmatch
=s_vspec_cmatch
,s_wmask
609 (p_end
) br
.cond
.dptk
.many
.terminate
612 ld1 s_vmatch0
=[s_amatch
],1 // load match[0]
613 // compute prev[cur_match]:
614 shladd s_vspec_cmatch
=s_vspec_cmatch
,1,s_aprev
615 cmp
.eq
.unc p_end
,p0
=s_vchainlen
,r0
618 add s_amatblen
=s_awinbest
,s_vcurmatch
// match=window + cur_match
619 (p_end
) br
.cond
.dptk
.many
.terminate
621 }{.mmi
// Cycle 2 (short)
622 ld2
.nt1 s_vspec_cmatch
=[s_vspec_cmatch
] // get next cur_match
624 }{.mmi
// Cycle 3 (short)
625 ld1
.nt1 s_vmatbst
=[s_amatblen
],-1 // load match[best_len]
626 cmp
.ne
.unc p_ll
,p0
=r0
,r0
// parallel compare initialized as 'false'
628 }{.mmi
// Cycle 4 (short)
629 // load match[1] - - note: match += 3 (in total):
630 ld1
.nt1 s_vmatch1
=[s_amatch
],2
633 (p_bn2
) ld1
.nt1 s_vmatbst1
=[s_amatblen
] // load match[best_len-1]
634 }{.mib
// Here we (MOST LIKELY) pay a L2-fetch stall
635 // p_ll switched on as soon as we get a mismatch:
636 cmp
.ne
.or p_ll
,p0
=s_vmatch0
,s_vscan0
637 cmp
.ne
.or p_ll
,p0
=s_vmatbst
,s_vscanend
638 (p_ll
) br
.cond
.dptk
.many
.next_iter
641 ld1
.nt1 s_vmatch3
=[s_amatch
]
645 cmp
.ne
.or p_ll
,p0
=s_vmatch1
,s_vscan1
646 (p_bn2
) cmp
.ne
.or p_ll
,p0
=s_vmatbst1
,s_vscanend1
647 (p_ll
) br
.cond
.dptk
.many
.next_iter
651 // We have passed the first hurdle - Are there additional matches ???
655 and s_tm3
=7,s_ascan
// get byte offset
656 and s_tm4
=7,s_amatch
// get byte offset
657 movi0 ar
.ec
=MLAT
+SHLAT
+2 // NB: One trip more than usual
659 cmp
.ne
.unc p_no
,p0
=s_vscan3
,s_vmatch3
// does not next one differ?
660 (p_no
) br
.cond
.dptk
.many
.only3
663 and s_tm1
=-8,s_ascan
// get aligned address
664 shladd s_tm3
=s_tm3
,3,r0
665 movi0 ar
.lc
=31 // 32 times around the loop (8B at a time)
667 and s_tm2
=-8,s_amatch
// get aligned address
668 shladd s_tm4
=s_tm4
,3,r0
672 ld8
.nt1 scan
[1]=[s_tm1
],8 // load first chunk
673 sub s_tm5
=64,s_tm3
// 64 - amount
676 ld8
.nt1 match
[1]=[s_tm2
],8 // load first chunk
677 sub s_tm6
=64,s_tm4
// 64 - amount
678 add s_vlen
=-8,s_vlen
// will be updated at least once
684 (lc
[0]) ld8 scan
[0]=[s_tm1
],8 // next scan chunk
685 (lc
[MLAT
+SHLAT
+1]) add s_vlen
=8,s_vlen
686 (lc
[MLAT
]) first shscan0
[0]=scan
[MLAT
+1],s_tm3
688 (lc
[MLAT
+SHLAT
+1]) cmp
.ne
.unc p_no
,p0
=s_tm7
,s_tm8
// break search if !=
689 (lc
[MLAT
]) first shmatch0
[0]=match
[MLAT
+1],s_tm4
690 (p_no
) br
.cond
.dpnt
.many
.mismatch
693 (lc
[0]) ld8 match
[0]=[s_tm2
],8
694 // shift left(le) or right(be):
695 (lc
[MLAT
]) second shscan1
[0]=scan
[MLAT
],s_tm5
696 (lc
[MLAT
]) second shmatch1
[0]=match
[MLAT
],s_tm6
698 (lc
[MLAT
+SHLAT
]) or s_tm7
=shscan0
[SHLAT
],shscan1
[SHLAT
]
699 (lc
[MLAT
+SHLAT
]) or s_tm8
=shmatch0
[SHLAT
],shmatch1
[SHLAT
]
700 br
.ctop
.dptk
.many
.cmploop
710 {.mii
// Cycle 0 (short)
711 (p_no
) pcmp1
.eq s_tm2
=s_tm7
,s_tm8
// find first non-matching character
715 (p_no
) count s_tm1
=s_tm2
717 }{.mib
// Cycle 2 (short)
718 (p_no
) add s_vlen
=s_vlen
,s_tm1
// effective length
725 {.mib
// Cycle 0 (short)
726 cmp
.gt
.unc p0
,p_nbs
=s_vlen
,s_vbestlen
// (len > best_len) ?
727 (p_nbs
) br
.cond
.dpnt
.many
.next_iter
// if not, reiterate
729 }{.mmi
// Cycle 1 (short)
730 ld4 s_tm7
=[s_anicematch
] // nice_match
731 st4
[s_amatchstart
]= s_vcurmatch
732 add s_ascanend
=s_ascan
,s_vlen
// reset with best_len
734 }{.mmi
// Cycle 2 (short)
735 mova s_vbestlen
=s_vlen
736 add s_ascanend
=-3,s_ascanend
// remember extra offset
738 }{.mmi
// Cycle 3 (short)
739 ld1 s_vscanend
=[s_ascanend
],-1 // scan_end=scan[best_len]
740 add s_awinbest
=s_awindow
,s_vbestlen
// update with new best_len
741 cmp
.ne
.unc p_bn2
,p0
=2,s_vbestlen
// set if bestlen != 2
743 }{.mib
// Cycle 4 (short)
744 // scan_end1=scan[best_len-1] NB: s_ascanend reset:
745 ld1
.nt1 s_vscanend1
=[s_ascanend
],1
746 cmp
.lt
.unc p_nnc
,p0
=s_vlen
,s_tm7
// compare with nice_match
747 (p_nnc
) br
.cond
.dptk
.many
.next_iter
758 br
.ret
.sptk
.many rp
// ret0 is identical to best_len
771 error
: this asm version is
for 386 or 680x0
or ia64 only
772 # endif /* __ia64__ */
773 #endif /* mc68000 || mc68020 */
774 #endif /* i386 || _I386 */