2 # Copyright (C) 2001-2005, Parrot Foundation.
7 use lib qw( . lib ../lib ../../lib );
9 use Parrot::Test tests => 11;
14 t/op/cclass.t - character class tests
22 Tests find_cclass find_not_cclass, is_cclass.
26 pir_output_is( <<'CODE', <<'OUT', "find_cclass, ascii" );
27 .include "cclass.pasm"
29 $S0 = ascii:"test_func(1)"
30 test( .CCLASS_WORD, $S0 )
32 $S0 = ascii:"ab\nC_X34.\0 \t!"
33 test( .CCLASS_NUMERIC, $S0 )
34 test( .CCLASS_LOWERCASE, $S0 )
35 test( .CCLASS_PUNCTUATION, $S0 )
43 $I1 = find_cclass flags, str, $I0, 100
47 if $I0 <= $I2 goto loop
52 0;1;2;3;4;5;6;7;8;10;10;12;12;
53 6;6;6;6;6;6;6;7;13;13;13;13;13;13;
54 0;1;13;13;13;13;13;13;13;13;13;13;13;13;
55 4;4;4;4;4;8;8;8;8;12;12;12;12;13;
58 pir_output_is( <<'CODE', <<'OUT', "find_not_cclass, ascii" );
59 .include "cclass.pasm"
61 $S0 = ascii:"test_func(1)"
62 test( .CCLASS_WORD, $S0 )
64 $S0 = ascii:"ab\nC_X34.\0 \t!"
65 test( .CCLASS_NUMERIC, $S0 )
66 test( .CCLASS_LOWERCASE, $S0 )
67 test( .CCLASS_PUNCTUATION, $S0 )
75 $I1 = find_not_cclass flags, str, $I0, 100
79 if $I0 <= $I2 goto loop
84 9;9;9;9;9;9;9;9;9;9;11;11;12;
85 0;1;2;3;4;5;8;8;8;9;10;11;12;13;
86 2;2;2;3;4;5;6;7;8;9;10;11;12;13;
87 0;1;2;3;5;5;6;7;9;9;10;11;13;13;
90 pir_output_is( <<'CODE', <<'OUT', "find_cclass, iso-8859-1" );
91 .include "cclass.pasm"
93 $S0 = iso-8859-1:"test_func(1)"
94 test( .CCLASS_WORD, $S0 )
96 $S0 = iso-8859-1:"ab\nC_X34.\0 \t!"
97 test( .CCLASS_NUMERIC, $S0 )
98 test( .CCLASS_LOWERCASE, $S0 )
99 test( .CCLASS_PUNCTUATION, $S0 )
107 $I1 = find_cclass flags, str, $I0, 100
111 if $I0 <= $I2 goto loop
116 0;1;2;3;4;5;6;7;8;10;10;12;12;
117 6;6;6;6;6;6;6;7;13;13;13;13;13;13;
118 0;1;13;13;13;13;13;13;13;13;13;13;13;13;
119 4;4;4;4;4;8;8;8;8;12;12;12;12;13;
122 pir_output_is( <<'CODE', <<'OUT', "find_not_cclass, iso-8859-1" );
123 .include "cclass.pasm"
125 $S0 = iso-8859-1:"test_func(1)"
126 test( .CCLASS_WORD, $S0 )
128 $S0 = iso-8859-1:"ab\nC_X34.\0 \t!"
129 test( .CCLASS_NUMERIC, $S0 )
130 test( .CCLASS_LOWERCASE, $S0 )
131 test( .CCLASS_PUNCTUATION, $S0 )
139 $I1 = find_not_cclass flags, str, $I0, 100
143 if $I0 <= $I2 goto loop
148 9;9;9;9;9;9;9;9;9;9;11;11;12;
149 0;1;2;3;4;5;8;8;8;9;10;11;12;13;
150 2;2;2;3;4;5;6;7;8;9;10;11;12;13;
151 0;1;2;3;5;5;6;7;9;9;10;11;13;13;
154 pir_output_is( <<'CODE', <<'OUT', "is_cclass, ascii" );
155 .include "cclass.pasm"
157 $S1 = ascii:"ab\nC_X34.\0 \t!"
162 test2( str, .CCLASS_UPPERCASE)
163 test2( str, .CCLASS_LOWERCASE)
164 test2( str, .CCLASS_ALPHABETIC)
165 test2( str, .CCLASS_NUMERIC)
166 test2( str, .CCLASS_HEXADECIMAL)
167 test2( str, .CCLASS_WHITESPACE)
168 test2( str, .CCLASS_PRINTING)
169 test2( str, .CCLASS_GRAPHICAL)
170 test2( str, .CCLASS_BLANK)
171 test2( str, .CCLASS_CONTROL)
172 test2( str, .CCLASS_PUNCTUATION)
173 test2( str, .CCLASS_ALPHANUMERIC)
174 test2( str, .CCLASS_NEWLINE)
175 test2( str, .CCLASS_WORD)
177 $I0 = .CCLASS_NEWLINE|.CCLASS_WHITESPACE
179 $I0 = .CCLASS_WHITESPACE|.CCLASS_LOWERCASE
181 $I0 = .CCLASS_UPPERCASE|.CCLASS_PUNCTUATION
191 $I2 = is_cclass code, str, $I0
194 if $I0 <= $I1 goto loop
217 pir_output_is( <<'CODE', <<'OUT', "is_cclass, iso-8859-1" );
218 .include "cclass.pasm"
220 $S1 = iso-8859-1:"ab\nC_X34.\0 \t!"
225 test2( str, .CCLASS_UPPERCASE)
226 test2( str, .CCLASS_LOWERCASE)
227 test2( str, .CCLASS_ALPHABETIC)
228 test2( str, .CCLASS_NUMERIC)
229 test2( str, .CCLASS_HEXADECIMAL)
230 test2( str, .CCLASS_WHITESPACE)
231 test2( str, .CCLASS_PRINTING)
232 test2( str, .CCLASS_GRAPHICAL)
233 test2( str, .CCLASS_BLANK)
234 test2( str, .CCLASS_CONTROL)
235 test2( str, .CCLASS_PUNCTUATION)
236 test2( str, .CCLASS_ALPHANUMERIC)
237 test2( str, .CCLASS_NEWLINE)
238 test2( str, .CCLASS_WORD)
240 $I0 = .CCLASS_NEWLINE|.CCLASS_WHITESPACE
242 $I0 = .CCLASS_WHITESPACE|.CCLASS_LOWERCASE
244 $I0 = .CCLASS_UPPERCASE|.CCLASS_PUNCTUATION
254 $I2 = is_cclass code, str, $I0
257 if $I0 <= $I1 goto loop
280 ## setup for unicode whitespace tests
281 ## see http://www.unicode.org/Public/UNIDATA/PropList.txt for White_Space list
282 ## see also t/p6rules/metachars.t
284 horizontal_ascii => [qw/ \u0009 \u0020 \u00a0 /],
285 horizontal_unicode => [
287 \u1680 \u180e \u2000 \u2001 \u2002 \u2003 \u2004 \u2005
288 \u2006 \u2007 \u2008 \u2009 \u200a \u202f \u205f \u3000
291 vertical_ascii => [qw/ \u000a \u000b \u000c \u000d \u0085 /],
292 vertical_unicode => [qw/ \u2028 \u2029 /],
295 push @{ $ws->{horizontal} } => @{ $ws->{horizontal_ascii} },
296 @{ $ws->{horizontal_unicode} };
298 push @{ $ws->{vertical} } => @{ $ws->{vertical_ascii} },
299 @{ $ws->{vertical_unicode} };
301 push @{ $ws->{whitespace_ascii} } => @{ $ws->{horizontal_ascii} },
302 @{ $ws->{vertical_ascii} };
304 push @{ $ws->{whitespace_unicode} } => @{ $ws->{horizontal_unicode} },
305 @{ $ws->{vertical_unicode} };
307 push @{ $ws->{whitespace} } => @{ $ws->{whitespace_ascii} },
308 @{ $ws->{whitespace_unicode} };
312 'unicode:"' . join( '', @{ $ws->{$which} } ) . '"';
315 my $all_ws = string('whitespace');
318 skip 'unicode support unavailable' => 3
319 unless $PConfig{has_icu};
320 pir_output_is( <<"CODE", <<'OUT', "unicode is_cclass whitespace" );
322 .include "cclass.pasm"
323 .local int result, char, len, i
329 result = is_cclass .CCLASS_WHITESPACE, s, i
334 \$P0 = new 'ResizablePMCArray'
336 \$S0 = sprintf "\\nchar %#x not reported as ws\\n", \$P0
344 11111111111111111111111111
347 pir_output_is( <<"CODE", <<'OUT', "unicode find_ccclass whitespace" );
349 .include "cclass.pasm"
350 .local int result, char, len, i
353 s = unicode:"abc" . s
355 result = find_cclass .CCLASS_WHITESPACE, s, 0, len
363 pir_output_is( <<"CODE", <<'OUT', "unicode find_not_ccclass whitespace" );
365 .include "cclass.pasm"
366 .local int result, char, len, i
371 result = find_not_cclass .CCLASS_WHITESPACE, s, 0, len
382 # The following should pass even if ICU is unavailable (pmichaud, 2005-11-3)
383 pir_output_is( <<"CODE", <<'OUT', "unicode 0-127 find_*_cclass whitespace" );
385 .include "cclass.pasm"
386 .local int result, char, len, i
388 s = unicode:"abc def"
390 result = find_cclass .CCLASS_WHITESPACE, s, 0, len
394 result = find_not_cclass .CCLASS_WHITESPACE, s, 3, len
403 pir_output_is( <<'CODE', <<'OUT', "is_cclass, unicode first codepage" );
404 .include "cclass.pasm"
406 $S1 = unicode:"ab\nC_X34.\0 \t!"
411 test2( str, .CCLASS_UPPERCASE)
412 test2( str, .CCLASS_LOWERCASE)
413 test2( str, .CCLASS_ALPHABETIC)
414 test2( str, .CCLASS_NUMERIC)
415 test2( str, .CCLASS_HEXADECIMAL)
416 test2( str, .CCLASS_WHITESPACE)
417 test2( str, .CCLASS_PRINTING)
418 test2( str, .CCLASS_GRAPHICAL)
419 test2( str, .CCLASS_BLANK)
420 test2( str, .CCLASS_CONTROL)
421 test2( str, .CCLASS_PUNCTUATION)
422 test2( str, .CCLASS_ALPHANUMERIC)
423 test2( str, .CCLASS_NEWLINE)
424 test2( str, .CCLASS_WORD)
426 $I0 = .CCLASS_NEWLINE|.CCLASS_WHITESPACE
428 $I0 = .CCLASS_WHITESPACE|.CCLASS_LOWERCASE
430 $I0 = .CCLASS_UPPERCASE|.CCLASS_PUNCTUATION
440 $I2 = is_cclass code, str, $I0
443 if $I0 <= $I1 goto loop
468 # cperl-indent-level: 4
471 # vim: expandtab shiftwidth=4: