tests/euc-mb

   1 #!/bin/sh
   2 # test that matches starting in the middle of a multibyte char aren't rejected
   3 # too greedily.
   4 # Derived from https://savannah.gnu.org/bugs/?23814
   5 . "${srcdir=.}/init.sh"; path_prepend_ ../src
   6
   7 # Add "." to PATH for the use of get-mb-cur-max.
   8 path_prepend_ .
   9
  10 require_compiled_in_MB_support
  11
  12 locale=ja_JP.EUC-JP
  13
  14 make_input () {
  15   echo "$1" | tr AB '\244\263'
  16 }
  17
  18 euc_grep () {
  19   pat=$(make_input "$1")
  20   LC_ALL=$locale grep "$pat"
  21 }
  22
  23 case $(get-mb-cur-max $locale) in
  24   2|3) ;;
  25   *) skip_ 'EUC-JP locale not found' ;;
  26 esac
  27
  28 fail=0
  29
  30 # Does EUC-JP work at all?
  31 make_input BABA |euc_grep AB && fail=1
  32
  33 # Here are two cases in which a KWSet search matches in the middle
  34 # of a multibyte character.  The first ensures that the DFA matcher
  35 # finds the real match at the end of line.  The second ensures that
  36 # while the KWSet match found a false positive, the DFA matcher
  37 # determines there is no match after all.
  38 make_input BABAAB |euc_grep AB > out || fail=1
  39 make_input BABAAB > exp || framework_failure_
  40 compare exp out || fail=1
  41 make_input BABABA |returns_ 1 euc_grep AB || fail=1
  42 make_input BABABA |returns_ 1 euc_grep '^x\|AB' || fail=1
  43
  44 # -P supports only unibyte and UTF-8 locales.
  45 returns_ 2 env LC_ALL=$locale grep -P x /dev/null || fail=1
  46
  47 Exit $fail