1 /* -*- Mode: C++; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 4 -*- */
2 /* vi: set ts=4 sw=4 expandtab: (add to ~/.vimrc: set modeline modelines=5) */
3 /* ***** BEGIN LICENSE BLOCK *****
4 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
6 * The contents of this file are subject to the Mozilla Public License Version
7 * 1.1 (the "License"); you may not use this file except in compliance with
8 * the License. You may obtain a copy of the License at
9 * http://www.mozilla.org/MPL/
11 * Software distributed under the License is distributed on an "AS IS" basis,
12 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 * for the specific language governing rights and limitations under the
16 * The Original Code is [Open Source Virtual Machine.].
18 * The Initial Developer of the Original Code is
19 * Adobe System Incorporated.
20 * Portions created by the Initial Developer are Copyright (C) 2008
21 * the Initial Developer. All Rights Reserved.
26 * Alternatively, the contents of this file may be used under the terms of
27 * either the GNU General Public License Version 2 or later (the "GPL"), or
28 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
29 * in which case the provisions of the GPL or the LGPL are applicable instead
30 * of those above. If you wish to allow use of your version of this file only
31 * under the terms of either the GPL or the LGPL, and not to allow others to
32 * use your version of this file under the terms of the MPL, indicate your
33 * decision by deleting the provisions above and replace them with the notice
34 * and other provisions required by the GPL or the LGPL. If you do not delete
35 * the provisions above, a recipient may use your version of this file under
36 * the terms of any one of the MPL, the GPL or the LGPL.
38 * ***** END LICENSE BLOCK ***** */
46 /* Rudimentary Unicode support - enough to handle identifier lexing. These tables
47 * are slightly optimized for space, but more could be done. In particular, delta
48 * coding can be useful because most deltas - both in character ranges and between
49 * ranges - fit in a single byte. Delta coding might shrink the tables by slightly
50 * less than a factor of 2. As it is, the total size of these tables is about 2KB.
52 * Speed is not a big issue here because almost no programs have identifiers that
53 * contain characters outside the ASCII range, and ASCII is handled specially
54 * outside this file using faster code paths (see code in eval-lex.cpp).
56 * The tables are generated from the Unicode data file by generate-unicode-tables.c
72 uint16_t const * singletons
;
75 // UnicodeLetter (Lu Ll Lt Lm Lo Nl)
76 static const Range unicodeLetter_ranges
[] = {
342 static const uint16_t unicodeLetter_singletons
[] = {
416 static const UnicodeTable unicodeLetter
= {
418 unicodeLetter_ranges
,
420 unicodeLetter_singletons
423 // UnicodeCombiningMark (Mn, Mc)
425 // UnicodeConnectorPunctuation (Pc)
426 static const Range identifier_subsequent_ranges
[] = {
577 static const uint16_t identifier_subsequent_singletons
[] = {
617 static const UnicodeTable identifier_subsequent
= {
619 identifier_subsequent_ranges
,
621 identifier_subsequent_singletons
624 static bool unicodeLookup(const UnicodeTable
* tbl
, wchar c
)
627 int32_t hi
= tbl
->nranges
-1;
630 int32_t mid
= (lo
+ hi
) / 2;
631 if (tbl
->ranges
[mid
].lo
<= c
&& c
<= tbl
->ranges
[mid
].hi
)
633 if (c
< tbl
->ranges
[mid
].lo
)
640 hi
= tbl
->nsingletons
-1;
642 int32_t mid
= (lo
+ hi
) / 2;
643 if (tbl
->singletons
[mid
] == c
)
645 if (c
< tbl
->singletons
[mid
])
654 bool isNonASCIIIdentifierStart(wchar c
)
656 return unicodeLookup(&unicodeLetter
, c
);
659 bool isNonASCIIIdentifierSubsequent(wchar c
)
661 return unicodeLookup(&unicodeLetter
, c
) ||
662 unicodeLookup(&identifier_subsequent
, c
);
665 bool isUnicodeLetter(wchar c
)
667 return unicodeLookup(&unicodeLetter
, c
);
670 bool isUnicodeDigit(wchar c
)
672 // FIXME: not quite right, we want a proper lookup table for unicodeDigit
673 return c
>= '0' && c
<= '9';