app-i18n/mozc/files/mozc-2.23.2815.102-python-3_3.patch

   1 https://github.com/google/mozc/issues/462
   2
   3 --- /src/dictionary/gen_pos_map.py
   4 +++ /src/dictionary/gen_pos_map.py
   5 @@ -39,7 +39,7 @@
   6  from build_tools import code_generator_util
   7
   8
   9 -HEADER = """// Copyright 2009 Google Inc. All Rights Reserved.
  10 +HEADER = b"""// Copyright 2009 Google Inc. All Rights Reserved.
  11  // Author: keni
  12
  13  #ifndef MOZC_DICTIONARY_POS_MAP_H_
  14 @@ -48,13 +48,13 @@
  15  // POS conversion rules
  16  const POSMap kPOSMap[] = {
  17  """
  18 -FOOTER = """};
  19 +FOOTER = b"""};
  20
  21  #endif  // MOZC_DICTIONARY_POS_MAP_H_
  22  """
  23
  24  def ParseUserPos(user_pos_file):
  25 -  with open(user_pos_file, 'r') as stream:
  26 +  with open(user_pos_file, 'rb') as stream:
  27      stream = code_generator_util.SkipLineComment(stream)
  28      stream = code_generator_util.ParseColumnStream(stream, num_column=2)
  29      return dict((key, enum_value) for key, enum_value in stream)
  30 @@ -64,7 +64,7 @@
  31    user_pos_map = ParseUserPos(user_pos_file)
  32
  33    result = {}
  34 -  with open(third_party_pos_map_file, 'r') as stream:
  35 +  with open(third_party_pos_map_file, 'rb') as stream:
  36      stream = code_generator_util.SkipLineComment(stream)
  37      for columns in code_generator_util.ParseColumnStream(stream, num_column=2):
  38        third_party_pos_name, mozc_pos = (columns + [None])[:2]
  39 @@ -78,7 +78,7 @@
  40        result[third_party_pos_name] = mozc_pos
  41
  42    # Create mozc_pos to mozc_pos map.
  43 -  for key, value in user_pos_map.iteritems():
  44 +  for key, value in user_pos_map.items():
  45      if key in result:
  46        assert (result[key] == value)
  47        continue
  48 @@ -94,10 +94,10 @@
  49      if value is None:
  50        # Invalid PosType.
  51        value = (
  52 -          'static_cast< ::mozc::user_dictionary::UserDictionary::PosType>(-1)')
  53 +          b'static_cast< ::mozc::user_dictionary::UserDictionary::PosType>(-1)')
  54      else:
  55 -      value = '::mozc::user_dictionary::UserDictionary::' + value
  56 -    output.write('  { %s, %s },\n' % (key, value))
  57 +      value = b'::mozc::user_dictionary::UserDictionary::' + value
  58 +    output.write(b'  { %s, %s },\n' % (key, value))
  59    output.write(FOOTER)
  60
  61
  62 @@ -121,7 +121,7 @@
  63    pos_map = GeneratePosMap(options.third_party_pos_map_file,
  64                             options.user_pos_file)
  65
  66 -  with open(options.output, 'w') as stream:
  67 +  with open(options.output, 'wb') as stream:
  68      OutputPosMap(pos_map, stream)
  69
  70
  71 --- /src/dictionary/gen_pos_rewrite_rule.py
  72 +++ /src/dictionary/gen_pos_rewrite_rule.py
  73 @@ -46,29 +46,34 @@
  74
  75
  76  def LoadRewriteMapRule(filename):
  77 -  fh = open(filename)
  78 +  fh = open(filename, 'rb')
  79    rule = []
  80    for line in fh:
  81 -    line = line.rstrip('\n')
  82 -    if not line or line.startswith('#'):
  83 +    line = line.rstrip(b'\n')
  84 +    if not line or line.startswith(b'#'):
  85        continue
  86      fields = line.split()
  87      rule.append([fields[0], fields[1]])
  88 +  fh.close()
  89    return rule
  90
  91
  92  def ReadPOSID(id_file, special_pos_file):
  93    pos_list = []
  94
  95 -  for line in open(id_file, 'r'):
  96 +  fh = open(id_file, 'rb')
  97 +  for line in fh:
  98      fields = line.split()
  99      pos_list.append(fields[1])
 100 +  fh.close()
 101
 102 -  for line in open(special_pos_file, 'r'):
 103 -    if len(line) <= 1 or line[0] == '#':
 104 +  fh = open(special_pos_file, 'rb')
 105 +  for line in fh:
 106 +    if len(line) <= 1 or line[0:1] == b'#':
 107        continue
 108      fields = line.split()
 109      pos_list.append(fields[0])
 110 +  fh.close()
 111
 112    return pos_list
 113
 114 @@ -112,7 +117,7 @@
 115      ids.append(id)
 116
 117    with open(opts.output, 'wb') as f:
 118 -    f.write(''.join(chr(id) for id in ids))
 119 +    f.write(''.join(chr(id) for id in ids).encode('utf-8'))
 120
 121
 122  if __name__ == '__main__':
 123 --- /src/dictionary/gen_suffix_data.py
 124 +++ /src/dictionary/gen_suffix_data.py
 125 @@ -52,10 +52,10 @@
 126    opts = _ParseOptions()
 127
 128    result = []
 129 -  with open(opts.input, 'r') as stream:
 130 +  with open(opts.input, 'rb') as stream:
 131      for line in stream:
 132 -      line = line.rstrip('\r\n')
 133 -      fields = line.split('\t')
 134 +      line = line.rstrip(b'\r\n')
 135 +      fields = line.split(b'\t')
 136        key = fields[0]
 137        lid = int(fields[1])
 138        rid = int(fields[2])
 139 @@ -63,7 +63,7 @@
 140        value = fields[4]
 141
 142        if key == value:
 143 -        value = ''
 144 +        value = b''
 145
 146        result.append((key, value, lid, rid, cost))
 147
 148 --- /src/dictionary/gen_user_pos_data.py
 149 +++ /src/dictionary/gen_user_pos_data.py
 150 @@ -64,7 +64,7 @@
 151          f.write(struct.pack('<H', conjugation_id))
 152
 153    serialized_string_array_builder.SerializeToFile(
 154 -      sorted(string_index.iterkeys()), output_string_array)
 155 +      sorted(x.encode('utf-8') for x in string_index.keys()), output_string_array)
 156
 157
 158  def ParseOptions():
 159 @@ -100,7 +100,7 @@
 160
 161    if options.output_pos_list:
 162      serialized_string_array_builder.SerializeToFile(
 163 -        [pos for (pos, _) in user_pos.data], options.output_pos_list)
 164 +        [pos.encode('utf-8') for (pos, _) in user_pos.data], options.output_pos_list)
 165
 166
 167  if __name__ == '__main__':
 168 --- /src/dictionary/gen_zip_code_seed.py
 169 +++ /src/dictionary/gen_zip_code_seed.py
 170 @@ -83,7 +83,7 @@
 171      address = unicodedata.normalize('NFKC', self.address)
 172      line = '\t'.join([zip_code, '0', '0', str(ZIP_CODE_COST),
 173                        address, ZIP_CODE_LABEL])
 174 -    print line.encode('utf-8')
 175 +    print(line.encode('utf-8'))
 176
 177
 178  def ProcessZipCodeCSV(file_name):
 179 @@ -105,26 +105,26 @@
 180
 181  def ReadZipCodeEntries(zip_code, level1, level2, level3):
 182    """Read zip code entries."""
 183 -  return [ZipEntry(zip_code, u''.join([level1, level2, town]))
 184 +  return [ZipEntry(zip_code, ''.join([level1, level2, town]))
 185            for town in ParseTownName(level3)]
 186
 187
 188  def ReadJigyosyoEntry(zip_code, level1, level2, level3, name):
 189    """Read jigyosyo entry."""
 190    return ZipEntry(zip_code,
 191 -                  u''.join([level1, level2, level3, u' ', name]))
 192 +                  ''.join([level1, level2, level3, ' ', name]))
 193
 194
 195  def ParseTownName(level3):
 196    """Parse town name."""
 197 -  if level3.find(u'以下に掲載がない場合') != -1:
 198 +  if level3.find('以下に掲載がない場合') != -1:
 199      return ['']
 200
 201    assert CanParseAddress(level3), ('failed to be merged %s'
 202                                     % level3.encode('utf-8'))
 203
 204    # We ignore additional information here.
 205 -  level3 = re.sub(u'（.*）', u'', level3, re.U)
 206 +  level3 = re.sub('（.*）', '', level3, re.U)
 207
 208    # For 地割, we have these cases.
 209    #  XX1地割
 210 @@ -134,7 +134,7 @@
 211    #  XX第1地割、XX第2地割、
 212    #  XX第1地割〜XX第2地割、
 213    # We simply use XX for them.
 214 -  chiwari_match = re.match(u'(\D*?)第?\d+地割.*', level3, re.U)
 215 +  chiwari_match = re.match('(\D*?)第?\d+地割.*', level3, re.U)
 216    if chiwari_match:
 217      town = chiwari_match.group(1)
 218      return [town]
 219 @@ -144,21 +144,21 @@
 220    #   -> XX町YY and (XX町)ZZ
 221    #  YY、ZZ
 222    #   -> YY and ZZ
 223 -  chou_match = re.match(u'(.*町)?(.*)', level3, re.U)
 224 +  chou_match = re.match('(.*町)?(.*)', level3, re.U)
 225    if chou_match:
 226 -    chou = u''
 227 +    chou = ''
 228      if chou_match.group(1):
 229        chou = chou_match.group(1)
 230      rests = chou_match.group(2)
 231 -    return [chou + rest for rest in rests.split(u'、')]
 232 +    return [chou + rest for rest in rests.split('、')]
 233
 234    return [level3]
 235
 236
 237  def CanParseAddress(address):
 238    """Return true for valid address."""
 239 -  return (address.find(u'（') == -1 or
 240 -          address.find(u'）') != -1)
 241 +  return (address.find('（') == -1 or
 242 +          address.find('）') != -1)
 243
 244
 245  def ParseOptions():
 246 --- /src/dictionary/zip_code_util.py
 247 +++ /src/dictionary/zip_code_util.py
 248 @@ -86,11 +86,11 @@
 249
 250
 251  _SPECIAL_CASES = [
 252 -    SpecialMergeZip(u'5900111', u'大阪府', u'堺市中区', [u'三原台']),
 253 -    SpecialMergeZip(u'8710046', u'大分県', u'中津市',
 254 -                    [u'金谷', u'西堀端', u'東堀端', u'古金谷']),
 255 -    SpecialMergeZip(u'9218046', u'石川県', u'金沢市',
 256 -                    [u'大桑町', u'三小牛町']),
 257 +    SpecialMergeZip('5900111', '大阪府', '堺市中区', ['三原台']),
 258 +    SpecialMergeZip('8710046', '大分県', '中津市',
 259 +                    ['金谷', '西堀端', '東堀端', '古金谷']),
 260 +    SpecialMergeZip('9218046', '石川県', '金沢市',
 261 +                    ['大桑町', '三小牛町']),
 262      ]
 263
 264
 265 --- /src/gui/character_pad/data/gen_cp932_map.py
 266 +++ /src/gui/character_pad/data/gen_cp932_map.py
 267 @@ -32,7 +32,6 @@
 268
 269  import re
 270  import sys
 271 -import string
 272
 273  kUnicodePat = re.compile(r'0x[0-9A-Fa-f]{2,4}')
 274  def IsValidUnicode(n):
 275 @@ -42,28 +41,29 @@
 276    fh = open(sys.argv[1])
 277    result = {}
 278    for line in fh.readlines():
 279 -    if line[0] is '#':
 280 +    if line[0] == '#':
 281        continue
 282 -    array = string.split(line)
 283 +    array = line.split()
 284      sjis = array[0]
 285      ucs2 = array[1]
 286      if eval(sjis) < 32 or not IsValidUnicode(ucs2):
 287        continue
 288      result.setdefault(ucs2, sjis)
 289 +  fh.close()
 290
 291    keys = sorted(result.keys())
 292
 293 -  print "struct CP932MapData {"
 294 -  print "  unsigned int ucs4;"
 295 -  print "  unsigned short int sjis;"
 296 -  print "};"
 297 -  print ""
 298 -  print "static const size_t kCP932MapDataSize = %d;" % (len(keys))
 299 -  print "static const CP932MapData kCP932MapData[] = {"
 300 +  print("struct CP932MapData {")
 301 +  print("  unsigned int ucs4;")
 302 +  print("  unsigned short int sjis;")
 303 +  print("};")
 304 +  print("")
 305 +  print("static const size_t kCP932MapDataSize = %d;" % (len(keys)))
 306 +  print("static const CP932MapData kCP932MapData[] = {")
 307    for n in keys:
 308 -    print "  { %s, %s }," % (n ,result[n])
 309 -  print "  { 0, 0 }";
 310 -  print "};"
 311 +    print("  { %s, %s }," % (n ,result[n]))
 312 +  print("  { 0, 0 }");
 313 +  print("};")
 314
 315  if __name__ == "__main__":
 316    main()
 317 --- /src/gui/character_pad/data/gen_local_character_map.py
 318 +++ /src/gui/character_pad/data/gen_local_character_map.py
 319 @@ -30,7 +30,6 @@
 320
 321  __author__ = "taku"
 322
 323 -import string
 324  import re
 325  import sys
 326
 327 @@ -43,9 +42,9 @@
 328    fh = open(filename)
 329    result = []
 330    for line in fh.readlines():
 331 -    if line[0] is '#':
 332 +    if line[0] == '#':
 333        continue
 334 -    array = string.split(line)
 335 +    array = line.split()
 336      jis = array[0].replace('0x', '')
 337      ucs2 = array[1].replace('0x', '')
 338      if len(jis) == 2:
 339 @@ -53,6 +52,7 @@
 340
 341      if IsValidUnicode(ucs2):
 342        result.append([jis, ucs2])
 343 +  fh.close()
 344
 345    return ["JISX0201", result]
 346
 347 @@ -60,13 +60,14 @@
 348    fh = open(filename)
 349    result = []
 350    for line in fh.readlines():
 351 -    if line[0] is '#':
 352 +    if line[0] == '#':
 353        continue
 354      array = line.split()
 355      jis = array[1].replace('0x', '')
 356      ucs2 = array[2].replace('0x', '')
 357      if IsValidUnicode(ucs2):
 358        result.append([jis, ucs2])
 359 +  fh.close()
 360
 361    return ["JISX0208", result]
 362
 363 @@ -74,13 +75,14 @@
 364    fh = open(filename)
 365    result = []
 366    for line in fh.readlines():
 367 -    if line[0] is '#':
 368 +    if line[0] == '#':
 369        continue
 370      array = line.split()
 371      jis = array[0].replace('0x', '')
 372      ucs2 = array[1].replace('0x', '')
 373      if IsValidUnicode(ucs2):
 374        result.append([jis, ucs2])
 375 +  fh.close()
 376
 377    return ["JISX0212", result]
 378
 379 @@ -88,7 +90,7 @@
 380    fh = open(filename)
 381    result = []
 382    for line in fh.readlines():
 383 -    if line[0] is '#':
 384 +    if line[0] == '#':
 385        continue
 386      array = line.split()
 387      sjis = array[0].replace('0x', '')
 388 @@ -100,19 +102,20 @@
 389
 390      if IsValidUnicode(ucs2):
 391        result.append([sjis, ucs2])
 392 +  fh.close()
 393
 394    return ["CP932", result]
 395
 396  def Output(arg):
 397    name = arg[0]
 398    result = arg[1]
 399 -  print "static const size_t k%sMapSize = %d;" % (name, len(result))
 400 -  print "static const mozc::gui::CharacterPalette::LocalCharacterMap k%sMap[] = {" % (name)
 401 +  print("static const size_t k%sMapSize = %d;" % (name, len(result)))
 402 +  print("static const mozc::gui::CharacterPalette::LocalCharacterMap k%sMap[] = {" % (name))
 403    for n in result:
 404 -    print "  { 0x%s, 0x%s }," % (n[0] ,n[1])
 405 -  print "  { 0, 0 }";
 406 -  print "};"
 407 -  print ""
 408 +    print("  { 0x%s, 0x%s }," % (n[0] ,n[1]))
 409 +  print("  { 0, 0 }");
 410 +  print("};")
 411 +  print("")
 412
 413  if __name__ == "__main__":
 414    Output(LoadJISX0201(sys.argv[1]))
 415 --- /src/gui/character_pad/data/gen_unicode_blocks.py
 416 +++ /src/gui/character_pad/data/gen_unicode_blocks.py
 417 @@ -33,13 +33,13 @@
 418  import sys
 419  import re
 420
 421 -re = re.compile('^(.....?)\.\.(.....?); (.+)')
 422 +re = re.compile(r'^(.....?)\.\.(.....?); (.+)')
 423
 424  def main():
 425 -  print "static const mozc::gui::CharacterPalette::UnicodeBlock kUnicodeBlockTable[] = {"
 426 +  print("static const mozc::gui::CharacterPalette::UnicodeBlock kUnicodeBlockTable[] = {")
 427    fh = open(sys.argv[1])
 428    for line in fh.readlines():
 429 -    if line[0] is '#':
 430 +    if line[0] == '#':
 431        continue
 432      m = re.match(line)
 433      if m is not None:
 434 @@ -47,11 +47,12 @@
 435        end   = int(m.group(2), 16)
 436        name = m.group(3)
 437        if start <= 0x2FFFF and end <= 0x2FFFF:
 438 -        print "  { \"%s\", { %d, %d } }," % (name, start, end)
 439 +        print("  { \"%s\", { %d, %d } }," % (name, start, end))
 440 +  fh.close()
 441
 442 -  print "  { NULL, { 0, 0 } }"
 443 -  print "};"
 444 -  print ""
 445 +  print("  { NULL, { 0, 0 } }")
 446 +  print("};")
 447 +  print("")
 448
 449  if __name__ == "__main__":
 450    main()
 451 --- /src/gui/character_pad/data/gen_unicode_data.py
 452 +++ /src/gui/character_pad/data/gen_unicode_data.py
 453 @@ -46,18 +46,19 @@
 454      code = int(code, 16)
 455      if code < 0x2FFFF:
 456        results.append("  { %d, \"%s\" }," % (code, desc))
 457 +  fh.close()
 458
 459 -  print "struct UnicodeData {";
 460 -  print "  char32 ucs4;";
 461 -  print "  const char *description;";
 462 -  print "};";
 463 -  print ""
 464 -  print "static const size_t kUnicodeDataSize = %d;" % (len(results))
 465 -  print "static const UnicodeData kUnicodeData[] = {";
 466 +  print("struct UnicodeData {");
 467 +  print("  char32 ucs4;");
 468 +  print("  const char *description;");
 469 +  print("};");
 470 +  print("")
 471 +  print("static const size_t kUnicodeDataSize = %d;" % (len(results)))
 472 +  print("static const UnicodeData kUnicodeData[] = {");
 473    for line in results:
 474 -    print line;
 475 -  print "  { 0, NULL }";
 476 -  print "};";
 477 +    print(line);
 478 +  print("  { 0, NULL }");
 479 +  print("};");
 480
 481  if __name__ == "__main__":
 482    main()
 483 --- /src/gui/character_pad/data/gen_unihan_data.py
 484 +++ /src/gui/character_pad/data/gen_unihan_data.py
 485 @@ -31,35 +31,34 @@
 486  __author__ = "taku"
 487
 488  import re
 489 -import string
 490  import sys
 491  rs = {}
 492
 493  def Escape(n):
 494 -  if n is not "NULL":
 495 +  if n != "NULL":
 496      return "\"%s\"" % (n)
 497    else:
 498      return "NULL"
 499
 500  def GetCode(n):
 501 -  if n is not "NULL":
 502 -    n = string.replace(n, '0-', 'JIS X 0208: 0x')
 503 -    n = string.replace(n, '1-', 'JIS X 0212: 0x')
 504 -    n = string.replace(n, '3-', 'JIS X 0213: 0x')
 505 -    n = string.replace(n, '4-', 'JIS X 0213: 0x')
 506 -    n = string.replace(n, 'A-', 'Vendors Ideographs: 0x')
 507 -    n = string.replace(n, '3A', 'JIS X 0213 2000: 0x')
 508 +  if n != "NULL":
 509 +    n = n.replace('0-', 'JIS X 0208: 0x')
 510 +    n = n.replace('1-', 'JIS X 0212: 0x')
 511 +    n = n.replace('3-', 'JIS X 0213: 0x')
 512 +    n = n.replace('4-', 'JIS X 0213: 0x')
 513 +    n = n.replace('A-', 'Vendors Ideographs: 0x')
 514 +    n = n.replace('3A', 'JIS X 0213 2000: 0x')
 515      return "\"%s\"" % n
 516    else:
 517      return "NULL"
 518
 519  def GetRadical(n):
 520    pat = re.compile(r'^(\d+)\.')
 521 -  if n is not "NULL":
 522 +  if n != "NULL":
 523      m = pat.match(n)
 524      if m:
 525        result = rs[m.group(1)]
 526 -      return  "\"%s\"" % (result.encode('string_escape'))
 527 +      return "\"%s\"" % result
 528      else:
 529        return "NULL"
 530    else:
 531 @@ -73,6 +72,7 @@
 532      id = array[1]
 533      radical = array[2]
 534      rs[id] = radical
 535 +  fh.close()
 536
 537    dic = {}
 538    pat = re.compile(r'^U\+(\S+)\s+(kTotalStrokes|kJapaneseKun|kJapaneseOn|kRSUnicode|kIRG_JSource)\t(.+)')
 539 @@ -86,23 +86,24 @@
 540        n = int(m.group(1), 16)
 541        if n <= 65536:
 542          dic.setdefault(key, {}).setdefault(field, value)
 543 +  fh.close()
 544
 545    keys = sorted(dic.keys())
 546
 547 -  print "struct UnihanData {";
 548 -  print "  unsigned int ucs4;";
 549 +  print("struct UnihanData {");
 550 +  print("  unsigned int ucs4;");
 551  # Since the total strokes defined in Unihan data is Chinese-based
 552  # number, we can't use it.
 553  #  print "  unsigned char total_strokes;";
 554 -  print "  const char *japanese_kun;";
 555 -  print "  const char *japanese_on;";
 556 +  print("  const char *japanese_kun;");
 557 +  print("  const char *japanese_on;");
 558  # Since the radical information defined in Unihan data is Chinese-based
 559  # number, we can't use it.
 560  #  print "  const char *radical;";
 561 -  print "  const char *IRG_jsource;";
 562 -  print "};"
 563 -  print "static const size_t kUnihanDataSize = %d;" % (len(keys))
 564 -  print "static const UnihanData kUnihanData[] = {"
 565 +  print("  const char *IRG_jsource;");
 566 +  print("};")
 567 +  print("static const size_t kUnihanDataSize = %d;" % (len(keys)))
 568 +  print("static const UnihanData kUnihanData[] = {")
 569
 570    for key in keys:
 571      total_strokes = dic[key].get("kTotalStrokes", "0")
 572 @@ -111,9 +112,9 @@
 573      rad = GetRadical(dic[key].get("kRSUnicode", "NULL"))
 574      code = GetCode(dic[key].get("kIRG_JSource", "NULL"))
 575  #    print " { 0x%s, %s, %s, %s, %s, %s }," % (key, total_strokes, kun, on, rad, code)
 576 -    print " { 0x%s, %s, %s, %s }," % (key, kun, on, code)
 577 +    print(" { 0x%s, %s, %s, %s }," % (key, kun, on, code))
 578
 579 -  print "};"
 580 +  print("};")
 581
 582  if __name__ == "__main__":
 583    main()