1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "ui/base/ime/chromeos/character_composer.h"
10 #include "base/strings/utf_string_conversions.h"
11 #include "base/third_party/icu/icu_utf.h"
12 // Note for Gtk removal: gdkkeysyms.h only contains a set of
13 // '#define GDK_KeyName 0xNNNN' macros and does not #include any Gtk headers.
14 #include "third_party/gtk+/gdk/gdkkeysyms.h"
16 #include "ui/base/glib/glib_integers.h"
17 #include "ui/events/event.h"
18 #include "ui/events/keycodes/keyboard_codes.h"
20 // Note for Gtk removal: gtkimcontextsimpleseqs.h does not #include any Gtk
21 // headers and only contains one big guint16 array |gtk_compose_seqs_compact|
22 // which defines the main compose table. The table has internal linkage.
23 // The order of header inclusion is out of order because
24 // gtkimcontextsimpleseqs.h depends on guint16, which is defined in
25 // "ui/base/glib/glib_integers.h".
26 #include "third_party/gtk+/gtk/gtkimcontextsimpleseqs.h"
30 // A black list for not composing dead keys. Once the key combination is listed
31 // below, the dead key won't work even when this is listed in
32 // gtkimcontextsimpleseqs.h. This only supports two keyevent sequenses.
33 // TODO(nona): Remove this hack.
34 const struct BlackListedDeadKey
{
35 uint32 first_key
; // target first key event.
36 uint32 second_key
; // target second key event.
37 uint32 output_char
; // the character to be inserted if the filter is matched.
38 bool consume
; // true if the original key event will be consumed.
39 } kBlackListedDeadKeys
[] = {
40 { GDK_KEY_dead_acute
, GDK_KEY_m
, GDK_KEY_apostrophe
, false },
41 { GDK_KEY_dead_acute
, GDK_KEY_s
, GDK_KEY_apostrophe
, false },
42 { GDK_KEY_dead_acute
, GDK_KEY_t
, GDK_KEY_apostrophe
, false },
43 { GDK_KEY_dead_acute
, GDK_KEY_v
, GDK_KEY_apostrophe
, false },
44 { GDK_KEY_dead_acute
, GDK_KEY_dead_acute
, GDK_KEY_apostrophe
, true },
47 typedef std::vector
<unsigned int> ComposeBufferType
;
49 // An iterator class to apply std::lower_bound for composition table.
50 class SequenceIterator
51 : public std::iterator
<std::random_access_iterator_tag
, const uint16
*> {
53 SequenceIterator() : ptr_(NULL
), stride_(0) {}
54 SequenceIterator(const uint16
* ptr
, int stride
)
55 : ptr_(ptr
), stride_(stride
) {}
57 const uint16
* ptr() const {return ptr_
;}
58 int stride() const {return stride_
;}
60 SequenceIterator
& operator++() {
64 SequenceIterator
& operator+=(int n
) {
69 const uint16
* operator*() const {return ptr_
;}
76 inline SequenceIterator
operator+(const SequenceIterator
& l
, int r
) {
77 return SequenceIterator(l
) += r
;
80 inline int operator-(const SequenceIterator
& l
, const SequenceIterator
& r
) {
81 const int d
= l
.ptr() - r
.ptr();
82 DCHECK(l
.stride() == r
.stride() && l
.stride() > 0 && d
%l
.stride() == 0);
86 inline bool operator==(const SequenceIterator
& l
, const SequenceIterator
& r
) {
87 DCHECK(l
.stride() == r
.stride());
88 return l
.ptr() == r
.ptr();
91 inline bool operator!=(const SequenceIterator
& l
, const SequenceIterator
& r
) {
95 // A function to compare key value.
96 inline int CompareSequenceValue(unsigned int l
, unsigned int r
) {
97 return (l
> r
) ? 1 : ((l
< r
) ? -1 : 0);
100 // A template to make |CompareFunc| work like operator<.
101 // |CompareFunc| is required to implement a member function,
102 // int operator()(const ComposeBufferType& l, const uint16* r) const.
103 template<typename CompareFunc
>
104 struct ComparatorAdoptor
{
105 bool operator()(const ComposeBufferType
& l
, const uint16
* r
) const {
106 return CompareFunc()(l
, r
) == -1;
108 bool operator()(const uint16
* l
, const ComposeBufferType
& r
) const {
109 return CompareFunc()(r
, l
) == 1;
113 class ComposeChecker
{
115 // This class does not take the ownership of |data|, |data| should be alive
116 // for the lifetime of the object.
117 // |data| is a pointer to the head of an array of
118 // length (|max_sequence_length| + 2)*|n_sequences|.
119 // Every (|max_sequence_length| + 2) elements of |data| represent an entry.
120 // First |max_sequence_length| elements of an entry is the sequecne which
121 // composes the character represented by the last two elements of the entry.
122 ComposeChecker(const uint16
* data
, int max_sequence_length
, int n_sequences
);
123 bool CheckSequence(const ComposeBufferType
& sequence
,
124 uint32
* composed_character
) const;
127 struct CompareSequence
{
128 int operator()(const ComposeBufferType
& l
, const uint16
* r
) const;
131 // This class does not take the ownership of |data_|,
132 // the dtor does not delete |data_|.
134 int max_sequence_length_
;
138 DISALLOW_COPY_AND_ASSIGN(ComposeChecker
);
141 ComposeChecker::ComposeChecker(const uint16
* data
,
142 int max_sequence_length
,
145 max_sequence_length_(max_sequence_length
),
146 n_sequences_(n_sequences
),
147 row_stride_(max_sequence_length
+ 2) {
150 bool ComposeChecker::CheckSequence(const ComposeBufferType
& sequence
,
151 uint32
* composed_character
) const {
152 const int sequence_length
= sequence
.size();
153 if (sequence_length
> max_sequence_length_
)
155 // Find sequence in the table.
156 const SequenceIterator
begin(data_
, row_stride_
);
157 const SequenceIterator end
= begin
+ n_sequences_
;
158 const SequenceIterator found
= std::lower_bound(
159 begin
, end
, sequence
, ComparatorAdoptor
<CompareSequence
>());
160 if (found
== end
|| CompareSequence()(sequence
, *found
) != 0)
163 if (sequence_length
== max_sequence_length_
||
164 (*found
)[sequence_length
] == 0) {
165 // |found| is not partially matching. It's fully matching.
166 if (found
+ 1 == end
||
167 CompareSequence()(sequence
, *(found
+ 1)) != 0) {
168 // There is no composition longer than |found| which matches to
170 const uint32 value
= ((*found
)[max_sequence_length_
] << 16) |
171 (*found
)[max_sequence_length_
+ 1];
172 *composed_character
= value
;
178 int ComposeChecker::CompareSequence::operator()(const ComposeBufferType
& l
,
179 const uint16
* r
) const {
180 for(size_t i
= 0; i
< l
.size(); ++i
) {
181 const int compare_result
= CompareSequenceValue(l
[i
], r
[i
]);
183 return compare_result
;
189 class ComposeCheckerWithCompactTable
{
191 // This class does not take the ownership of |data|, |data| should be alive
192 // for the lifetime of the object.
193 // First |index_size|*|index_stride| elements of |data| are an index table.
194 // Every |index_stride| elements of an index table are an index entry.
195 // If you are checking with a sequence of length N beginning with character C,
196 // you have to find an index entry whose first element is C, then get the N-th
197 // element of the index entry as the index.
198 // The index is pointing the element of |data| where the composition table for
199 // sequences of length N beginning with C is placed.
201 ComposeCheckerWithCompactTable(const uint16
* data
,
202 int max_sequence_length
,
205 bool CheckSequence(const ComposeBufferType
& sequence
,
206 uint32
* composed_character
) const;
209 struct CompareSequenceFront
{
210 int operator()(const ComposeBufferType
& l
, const uint16
* r
) const;
212 struct CompareSequenceSkipFront
{
213 int operator()(const ComposeBufferType
& l
, const uint16
* r
) const;
216 // This class does not take the ownership of |data_|,
217 // the dtor does not delete |data_|.
219 int max_sequence_length_
;
224 ComposeCheckerWithCompactTable::ComposeCheckerWithCompactTable(
226 int max_sequence_length
,
230 max_sequence_length_(max_sequence_length
),
231 index_size_(index_size
),
232 index_stride_(index_stride
) {
235 bool ComposeCheckerWithCompactTable::CheckSequence(
236 const ComposeBufferType
& sequence
,
237 uint32
* composed_character
) const {
238 const int compose_length
= sequence
.size();
239 if (compose_length
> max_sequence_length_
)
241 // Find corresponding index for the first keypress.
242 const SequenceIterator
index_begin(data_
, index_stride_
);
243 const SequenceIterator index_end
= index_begin
+ index_size_
;
244 const SequenceIterator index
=
245 std::lower_bound(index_begin
, index_end
, sequence
,
246 ComparatorAdoptor
<CompareSequenceFront
>());
247 if (index
== index_end
|| CompareSequenceFront()(sequence
, *index
) != 0)
249 if (compose_length
== 1)
251 // Check for composition sequences.
252 for (int length
= compose_length
- 1; length
< max_sequence_length_
;
254 const uint16
* table
= data_
+ (*index
)[length
];
255 const uint16
* table_next
= data_
+ (*index
)[length
+ 1];
256 if (table_next
> table
) {
257 // There are composition sequences for this |length|.
258 const int row_stride
= length
+ 1;
259 const int n_sequences
= (table_next
- table
)/row_stride
;
260 const SequenceIterator
table_begin(table
, row_stride
);
261 const SequenceIterator table_end
= table_begin
+ n_sequences
;
262 const SequenceIterator found
=
263 std::lower_bound(table_begin
, table_end
, sequence
,
264 ComparatorAdoptor
<CompareSequenceSkipFront
>());
265 if (found
!= table_end
&&
266 CompareSequenceSkipFront()(sequence
, *found
) == 0) {
267 if (length
== compose_length
- 1) // Exact match.
268 *composed_character
= (*found
)[length
];
276 int ComposeCheckerWithCompactTable::CompareSequenceFront::operator()(
277 const ComposeBufferType
& l
, const uint16
* r
) const {
278 return CompareSequenceValue(l
[0], r
[0]);
281 int ComposeCheckerWithCompactTable::CompareSequenceSkipFront::operator()(
282 const ComposeBufferType
& l
, const uint16
* r
) const {
283 for(size_t i
= 1; i
< l
.size(); ++i
) {
284 const int compare_result
= CompareSequenceValue(l
[i
], r
[i
- 1]);
286 return compare_result
;
294 // The difference between this and the default input method is the handling
295 // of C+acute - this method produces C WITH CEDILLA rather than C WITH ACUTE.
296 // For languages that use CCedilla and not acute, this is the preferred mapping,
297 // and is particularly important for pt_BR, where the us-intl keyboard is
300 const uint16 cedilla_compose_seqs
[] = {
301 // LATIN_CAPITAL_LETTER_C_WITH_CEDILLA
302 GDK_KEY_dead_acute
, GDK_KEY_C
, 0, 0, 0, 0x00C7,
303 // LATIN_SMALL_LETTER_C_WITH_CEDILLA
304 GDK_KEY_dead_acute
, GDK_KEY_c
, 0, 0, 0, 0x00E7,
305 // LATIN_CAPITAL_LETTER_C_WITH_CEDILLA
306 GDK_KEY_Multi_key
, GDK_KEY_apostrophe
, GDK_KEY_C
, 0, 0, 0x00C7,
307 // LATIN_SMALL_LETTER_C_WITH_CEDILLA
308 GDK_KEY_Multi_key
, GDK_KEY_apostrophe
, GDK_KEY_c
, 0, 0, 0x00E7,
309 // LATIN_CAPITAL_LETTER_C_WITH_CEDILLA
310 GDK_KEY_Multi_key
, GDK_KEY_C
, GDK_KEY_apostrophe
, 0, 0, 0x00C7,
311 // LATIN_SMALL_LETTER_C_WITH_CEDILLA
312 GDK_KEY_Multi_key
, GDK_KEY_c
, GDK_KEY_apostrophe
, 0, 0, 0x00E7,
315 bool KeypressShouldBeIgnored(unsigned int keyval
) {
317 case GDK_KEY_Shift_L
:
318 case GDK_KEY_Shift_R
:
319 case GDK_KEY_Control_L
:
320 case GDK_KEY_Control_R
:
321 case GDK_KEY_Caps_Lock
:
322 case GDK_KEY_Shift_Lock
:
327 case GDK_KEY_Super_L
:
328 case GDK_KEY_Super_R
:
329 case GDK_KEY_Hyper_L
:
330 case GDK_KEY_Hyper_R
:
331 case GDK_KEY_Mode_switch
:
332 case GDK_KEY_ISO_Level3_Shift
:
339 bool CheckCharacterComposeTable(const ComposeBufferType
& sequence
,
340 uint32
* composed_character
) {
341 // Check cedilla compose table.
342 const ComposeChecker
kCedillaComposeChecker(
343 cedilla_compose_seqs
, 4, arraysize(cedilla_compose_seqs
)/(4 + 2));
344 if (kCedillaComposeChecker
.CheckSequence(sequence
, composed_character
))
347 // Check main compose table.
348 const ComposeCheckerWithCompactTable
kMainComposeChecker(
349 gtk_compose_seqs_compact
, 5, 24, 6);
350 if (kMainComposeChecker
.CheckSequence(sequence
, composed_character
))
356 // Converts |character| to UTF16 string.
357 // Returns false when |character| is not a valid character.
358 bool UTF32CharacterToUTF16(uint32 character
, base::string16
* output
) {
360 // Reject invalid character. (e.g. codepoint greater than 0x10ffff)
361 if (!CBU_IS_UNICODE_CHAR(character
))
364 output
->resize(CBU16_LENGTH(character
));
366 CBU16_APPEND_UNSAFE(&(*output
)[0], i
, character
);
371 // Returns an hexadecimal digit integer (0 to 15) corresponding to |keyval|.
372 // -1 is returned when |keyval| cannot be a hexadecimal digit.
373 int KeyvalToHexDigit(unsigned int keyval
) {
374 if (GDK_KEY_0
<= keyval
&& keyval
<= GDK_KEY_9
)
375 return keyval
- GDK_KEY_0
;
376 if (GDK_KEY_a
<= keyval
&& keyval
<= GDK_KEY_f
)
377 return keyval
- GDK_KEY_a
+ 10;
378 if (GDK_KEY_A
<= keyval
&& keyval
<= GDK_KEY_F
)
379 return keyval
- GDK_KEY_A
+ 10;
380 return -1; // |keyval| cannot be a hexadecimal digit.
383 // Returns an hexadecimal digit integer (0 to 15) corresponding to |keycode|.
384 // -1 is returned when |keycode| cannot be a hexadecimal digit.
385 int KeycodeToHexDigit(unsigned int keycode
) {
386 if (ui::VKEY_0
<= keycode
&& keycode
<= ui::VKEY_9
)
387 return keycode
- ui::VKEY_0
;
388 if (ui::VKEY_A
<= keycode
&& keycode
<= ui::VKEY_F
)
389 return keycode
- ui::VKEY_A
+ 10;
390 return -1; // |keycode| cannot be a hexadecimal digit.
397 CharacterComposer::CharacterComposer() : composition_mode_(KEY_SEQUENCE_MODE
) {}
399 CharacterComposer::~CharacterComposer() {}
401 void CharacterComposer::Reset() {
402 compose_buffer_
.clear();
403 composed_character_
.clear();
404 preedit_string_
.clear();
405 composition_mode_
= KEY_SEQUENCE_MODE
;
408 bool CharacterComposer::FilterKeyPress(const ui::KeyEvent
& event
) {
409 uint32 keyval
= event
.platform_keycode();
411 (event
.type() != ET_KEY_PRESSED
&& event
.type() != ET_KEY_RELEASED
))
414 return FilterKeyPressInternal(keyval
, event
.key_code(), event
.flags());
418 bool CharacterComposer::FilterKeyPressInternal(unsigned int keyval
,
419 unsigned int keycode
,
421 composed_character_
.clear();
422 preedit_string_
.clear();
424 // We don't care about modifier key presses.
425 if(KeypressShouldBeIgnored(keyval
))
428 // When the user presses Ctrl+Shift+U, maybe switch to HEX_MODE.
429 // We don't care about other modifiers like Alt. When CapsLock is down, we
430 // do nothing because what we receive is Ctrl+Shift+u (not U).
431 if (keyval
== GDK_KEY_U
&& (flags
& EF_SHIFT_DOWN
) &&
432 (flags
& EF_CONTROL_DOWN
)) {
433 if (composition_mode_
== KEY_SEQUENCE_MODE
&& compose_buffer_
.empty()) {
434 // There is no ongoing composition. Let's switch to HEX_MODE.
435 composition_mode_
= HEX_MODE
;
436 UpdatePreeditStringHexMode();
441 // Filter key press in an appropriate manner.
442 switch (composition_mode_
) {
443 case KEY_SEQUENCE_MODE
:
444 return FilterKeyPressSequenceMode(keyval
, flags
);
446 return FilterKeyPressHexMode(keyval
, keycode
, flags
);
453 bool CharacterComposer::FilterKeyPressSequenceMode(unsigned int keyval
,
455 DCHECK(composition_mode_
== KEY_SEQUENCE_MODE
);
456 compose_buffer_
.push_back(keyval
);
458 if (compose_buffer_
.size() == 2U) {
459 for (size_t i
= 0; i
< arraysize(kBlackListedDeadKeys
); ++i
) {
460 if (compose_buffer_
[0] == kBlackListedDeadKeys
[i
].first_key
&&
461 compose_buffer_
[1] == kBlackListedDeadKeys
[i
].second_key
) {
463 composed_character_
.push_back(kBlackListedDeadKeys
[i
].output_char
);
464 return kBlackListedDeadKeys
[i
].consume
;
469 // Check compose table.
470 uint32 composed_character_utf32
= 0;
471 if (CheckCharacterComposeTable(compose_buffer_
, &composed_character_utf32
)) {
472 // Key press is recognized as a part of composition.
473 if (composed_character_utf32
!= 0) {
474 // We get a composed character.
475 compose_buffer_
.clear();
476 UTF32CharacterToUTF16(composed_character_utf32
, &composed_character_
);
480 // Key press is not a part of composition.
481 compose_buffer_
.pop_back(); // Remove the keypress added this time.
482 if (!compose_buffer_
.empty()) {
483 compose_buffer_
.clear();
489 bool CharacterComposer::FilterKeyPressHexMode(unsigned int keyval
,
490 unsigned int keycode
,
492 DCHECK(composition_mode_
== HEX_MODE
);
493 const size_t kMaxHexSequenceLength
= 8;
494 int hex_digit
= KeyvalToHexDigit(keyval
);
496 // With 101 keyboard, control + shift + 3 produces '#', but a user may
497 // have intended to type '3'. So, if a hexadecimal character was not found,
498 // suppose a user is holding shift key (and possibly control key, too) and
499 // try a character with modifier keys removed.
500 hex_digit
= KeycodeToHexDigit(keycode
);
503 if (keyval
== GDK_KEY_Escape
) {
504 // Cancel composition when ESC is pressed.
506 } else if (keyval
== GDK_KEY_Return
|| keyval
== GDK_KEY_KP_Enter
||
507 keyval
== GDK_KEY_ISO_Enter
||
508 keyval
== GDK_KEY_space
|| keyval
== GDK_KEY_KP_Space
) {
509 // Commit the composed character when Enter or space is pressed.
511 } else if (keyval
== GDK_KEY_BackSpace
) {
512 // Pop back the buffer when Backspace is pressed.
513 if (!compose_buffer_
.empty()) {
514 compose_buffer_
.pop_back();
516 // If there is no character in |compose_buffer_|, cancel composition.
519 } else if (hex_digit
>= 0 &&
520 compose_buffer_
.size() < kMaxHexSequenceLength
) {
521 // Add the key to the buffer if it is a hex digit.
522 compose_buffer_
.push_back(hex_digit
);
525 UpdatePreeditStringHexMode();
530 void CharacterComposer::CommitHex() {
531 DCHECK(composition_mode_
== HEX_MODE
);
532 uint32 composed_character_utf32
= 0;
533 for (size_t i
= 0; i
!= compose_buffer_
.size(); ++i
) {
534 const uint32 digit
= compose_buffer_
[i
];
535 DCHECK(0 <= digit
&& digit
< 16);
536 composed_character_utf32
<<= 4;
537 composed_character_utf32
|= digit
;
540 UTF32CharacterToUTF16(composed_character_utf32
, &composed_character_
);
543 void CharacterComposer::UpdatePreeditStringHexMode() {
544 if (composition_mode_
!= HEX_MODE
) {
545 preedit_string_
.clear();
548 std::string
preedit_string_ascii("u");
549 for (size_t i
= 0; i
!= compose_buffer_
.size(); ++i
) {
550 const int digit
= compose_buffer_
[i
];
551 DCHECK(0 <= digit
&& digit
< 16);
552 preedit_string_ascii
+= digit
<= 9 ? ('0' + digit
) : ('a' + (digit
- 10));
554 preedit_string_
= base::ASCIIToUTF16(preedit_string_ascii
);