jblite/jmdict.py

   1 # -*- coding: utf-8 -*-
   2 """JMdict support."""
   3
   4 # This could be a bit cleaner if I used something like SQLalchemy
   5 # perhaps...  The create/insert/index bits were done decent enough,
   6 # but lookups are done in straight SQL due to the potential
   7 # complexity, and this sadly does break the abstraction of the table
   8 # objects...
   9
  10 from __future__ import print_function
  11 from __future__ import with_statement
  12
  13 import os, sys, re, sqlite3
  14 from cStringIO import StringIO
  15 from xml.etree.cElementTree import ElementTree
  16 from helpers import gzread
  17 from table import Table, KeyValueTable
  18
  19 import gettext
  20 #t = gettext.translation("jblite")
  21 #_ = t.ugettext
  22 gettext.install("jblite")
  23
  24 # Full expansion of xml:lang
  25 XML_LANG = "{http://www.w3.org/XML/1998/namespace}lang"
  26
  27
  28 # Copied from kd2.py...
  29 get_encoding = sys.getfilesystemencoding
  30
  31 # FORMAT OF TABLE MAP:
  32 # dictionary entry: table: (children | None)
  33 # table: table_name | (table_name, table_type, *args, **kwargs)
  34 #
  35 # Ideas:
  36 # Value = dict: take keys as child tables, lookup all rows, and take values as grandchildren.
  37 # Value = list: take items as child tables, lookup all rows, assume no children.
  38 #
  39 #
  40 # entry:
  41 # data = tables["entry"].lookup()
  42 # children_map = TABLE_MAP["entry"]
  43 # children = get_data(children_map["k_ele"])
  44 # result = TableData(data, children)
  45 #
  46 #
  47 # {"k_ele": {"data": [...],
  48 #            "children": {...}}}
  49
  50 # Table data object:
  51 #   obj.data: {},  # single db row
  52 #   obj.children: {"key": table_object}
  53
  54
  55 # breadth first creation?  depth?
  56
  57 class Record(object):
  58
  59     """Represents a row in a table, plus all data it is a 'parent' of.
  60
  61     Each Record may be linked to multiple Records in child tables.
  62
  63     """
  64
  65     def __init__(self, data=None, children=None):
  66         self.data = data if data is not None else {}
  67         self.children = children if children is not None else {}
  68
  69 # Map of tables to their children maps.  Empty {} means no children.
  70 TABLE_MAP = {
  71     "entry": {
  72         "k_ele": {
  73             "ke_inf": {},
  74             "ke_pri": {},
  75             },
  76         "r_ele": {
  77             "re_restr": {},
  78             "re_inf": {},
  79             "re_pri": {},
  80             },
  81         "links": {},
  82         "bibl": {},
  83         "etym": {},
  84         "audit": {},
  85         "sense": {
  86             "pos": {},
  87             "field": {},
  88             "misc": {},
  89             "dial": {},
  90             "stagk": {},
  91             "stagr": {},
  92             "xref": {},
  93             "ant": {},
  94             "s_inf": {},
  95             "example": {},
  96             "lsource": {},
  97             "gloss": {
  98                 "pri": {},
  99                 }
 100             }
 101         }
 102     }
 103
 104
 105 class Entry(object):
 106
 107     def __init__(self, data_d):
 108         self._d = data_d
 109
 110     def __unicode__(self):
 111         """Basic string representation of the entry."""
 112         d = self._d
 113         lines = []
 114
 115         k_eles = d.get("k_ele", [])
 116         if len(k_eles) > 0:
 117             lines.append(_(u"Kanji readings:"))
 118         for k_ele_index, k_ele in enumerate(k_eles):
 119             k_ele_index += 1
 120             lines.append(_(u"  Reading %d:") % k_ele_index)
 121             lines.append(_(u"    Blob: %s") % k_ele['keb'])
 122
 123         r_eles = d.get("r_ele", [])
 124         if len(r_eles) > 0:
 125             lines.append(_(u"Kana readings:"))
 126         for r_ele_index, r_ele in enumerate(r_eles):
 127             r_ele_index += 1
 128             lines.append(_(u"  Reading %d:") % r_ele_index)
 129             lines.append(_(u"    Blob: %s") % r_ele['reb'])
 130
 131         senses = d.get("sense", [])
 132         if len(senses) > 0:
 133             lines.append(_(u"Glosses:"))
 134         for sense_index, sense in enumerate(senses):
 135             sense_index += 1
 136             lines.append(_(u"  Sense %d:") % sense_index)
 137             glosses = sense.get("gloss", {})
 138             # Output glosses by language
 139             for lang in sorted(glosses.keys()):
 140                 values = glosses[lang]
 141                 lines.append(_(u"    Lang: %s") % lang)
 142                 for val_index, val_d in enumerate(values):
 143                     val_index += 1
 144                     val = val_d['value']
 145                     lines.append(_(u"      Gloss %d: %s") % (val_index, val))
 146         return u"\n".join(lines)
 147
 148     def __repr__(self):
 149         return repr(self._d)
 150
 151
 152 class Database(object):
 153
 154     """Top level object for SQLite 3-based JMdict database."""
 155
 156     def __init__(self, filename, init_from_file=None):
 157         self.conn = sqlite3.connect(filename)
 158         self.conn.row_factory = sqlite3.Row  # keyword accessors for rows
 159         self.cursor = self.conn.cursor()
 160         self.tables = self._create_table_objects()
 161         if init_from_file is not None:
 162             raw_data = gzread(init_from_file)
 163
 164             entities = self._get_entities(raw_data)
 165             infile = StringIO(raw_data)
 166             etree = ElementTree(file=infile)
 167             infile.close()
 168
 169             self._create_new_tables()
 170             self._populate_database(etree, entities)
 171             self.conn.commit()
 172
 173     def search(self, query, lang=None):
 174         # Search
 175         # Two main methods: to and from Japanese.
 176         # 1. Guess which direction we're searching.
 177         # 2. Search preferred method.
 178         # 3. Search remaining method.
 179         entries_from = self.search_from_japanese(query)
 180         entries_to = self.search_to_japanese(query, lang=lang)
 181
 182         results = entries_from + entries_to
 183         return results
 184
 185     def search_from_japanese(self, query):
 186         # Japanese search locations:
 187         # 1. Kanji elements
 188         # 2. Reading elements
 189         # 3. Any indices (none yet)
 190         #
 191         # Preferred orderings
 192         # 1. Location of query in result
 193         #    1. Exact match
 194         #    2. Begins with
 195         #    3. Anywhere
 196         # 2. Ranking of usage (the (P) option in EDICT, for example)
 197         #
 198         # FOR NOW: just get the searching working.
 199         # This puts us on roughly the same level as J-Ben 1.2.x.
 200         encoding = get_encoding()
 201         wrapped_query = "%%%s%%" % query  # Wrap in wildcards
 202         unicode_query = wrapped_query.decode(encoding)
 203
 204         entries_by_keb = self._search_keb(unicode_query)
 205         entries_by_reb = self._search_reb(unicode_query)
 206         #entries_by_indices = self._search_indices_from_ja(unicode_query)
 207
 208         # Merge results into one list and return.
 209         results = []
 210         for lst in (entries_by_keb, entries_by_reb):
 211             for o in lst:
 212                 if o not in results:
 213                     results.append(o)
 214         return results
 215
 216     def _search_keb(self, unicode_query):
 217         """Searches kanji elements (Japanese readings with kanji).
 218
 219         Returns a list of entry IDs.
 220
 221         """
 222         # keb: entry.id -> k_ele.fk, k_ele.value
 223         query = "SELECT fk FROM k_ele WHERE value LIKE ?"
 224         args = (unicode_query,)
 225         self.cursor.execute(query, args)
 226         rows = self.cursor.fetchall()
 227         return [row[0] for row in rows]
 228
 229     def _search_reb(self, unicode_query):
 230         """Searches reading elements (Japanese readings without kanji).
 231
 232         Returns a list of entry IDs.
 233
 234         """
 235         # reb: entry.id -> r_ele.fk, r_ele.value
 236         query = "SELECT fk FROM r_ele WHERE value LIKE ?"
 237         args = (unicode_query,)
 238         self.cursor.execute(query, args)
 239         rows = self.cursor.fetchall()
 240         return [row[0] for row in rows]
 241
 242     def _search_indices_from_ja(self, unicode_query):
 243         raise NotImplementedError
 244
 245     def search_to_japanese(self, query, lang):
 246         # Foreign language search locations:
 247         # 1. Glosses
 248         # 2. Any indices (none yet)
 249         #
 250         # For other considerations, see search_from_japanese().
 251         encoding = get_encoding()
 252         wrapped_query = "%%%s%%" % query  # Wrap in wildcards
 253         unicode_query = wrapped_query.decode(encoding)
 254
 255         entries_by_glosses = self._search_glosses(unicode_query, lang)
 256         #entries_by_indices = self._search_indices_to_ja(unicode_query, lang)
 257
 258         # Merge results into one list and return.
 259         results = []
 260         for lst in (entries_by_glosses,):
 261             for o in lst:
 262                 if o not in results:
 263                     results.append(o)
 264         return results
 265
 266
 267     def _search_glosses(self, unicode_query, lang):
 268         """Searches foreign language glosses.
 269
 270         If lang is not None, only entries which match the lang
 271         parameter are returned.
 272
 273         Returns a list of entry IDs.
 274
 275         """
 276         # entry.id -> sense.fk, sense.id -> gloss.fk
 277         if lang is not None:
 278             query = (
 279                 "SELECT e.id FROM gloss g, sense s, entry e "
 280                 "WHERE g.lang = ? AND g.value LIKE ? "
 281                 "AND g.fk = s.id AND s.fk = e.id"
 282             )
 283             args = (lang, unicode_query)
 284         else:
 285             query = (
 286                 "SELECT e.id FROM gloss g, sense s, entry e "
 287                 "WHERE g.value LIKE ?"
 288             )
 289             args = (unicode_query,)
 290
 291         self.cursor.execute(query, args)
 292         rows = self.cursor.fetchall()
 293         return [row[0] for row in rows]
 294
 295     def _search_indices_to_ja(self, unicode_query, lang):
 296         raise NotImplementedError
 297
 298     def query_db(self, *args, **kwargs):
 299         """Helper.  Wraps the execute/fetchall idiom on the DB cursor."""
 300         self.cursor.execute(*args, **kwargs)
 301         return self.cursor.fetchall()
 302
 303     def _convert_entities(self, entities):
 304         """Expands a list of entities.
 305
 306         Returns a list of the entity expansions.  The order of the
 307         returned expansions matches the order of the input entities.
 308
 309         """
 310         args = list(sorted(set(entities)))
 311         template = ", ".join(["?"] * len(args))
 312         query = "SELECT entity, expansion " \
 313             "FROM entity WHERE entity IN (%s)" % template
 314         rows = self.query_db(query, args)
 315         d = {}
 316         for entity, expansion in rows:
 317             d[entity] = expansion
 318         result = [d[entity] for entity in entities]
 319         return result
 320
 321     # lookup entry by id
 322     # lookup all other data by fk
 323
 324     def lookup(self, entry_id):
 325         """Creates an entry object.
 326
 327         Returns a Record instance from the entry table, with all data
 328         linked as children to this record.
 329
 330         """
 331         # Lookup data in entry table.
 332         rows = self.tables['entry'].lookup_by_id(entry_id)
 333         data = rows[0]  # only 1 row
 334         # Lookup child data using the entry_id as a foreign key.
 335         children = self._lookup_children(TABLE_MAP['entry'], data['id'])
 336         record = Record(data, children)
 337         return record
 338
 339     def _lookup_children(self, children_map, fk):
 340         children = {}
 341         for child_table in children_map:
 342             grandchild_map = children_map[child_table]
 343             children[child_table] = self._lookup_by_fk(
 344                 child_table, children_map[child_table], fk)
 345         return children
 346
 347     def _lookup_by_fk(self, table_name, children_map, fk):
 348         """Looks up data from a table and related 'child' tables.
 349
 350         table_name: name of the table to query.
 351         children_map: a dictionary of child table mappings, or None if
 352             no children are present.
 353         fk: foreign key used in table query.
 354
 355         """
 356         rows = self.tables[table_name].lookup_by_fk(fk)
 357         results = []
 358         for row in rows:
 359             children = self._lookup_children(children_map, row['id'])
 360             record = Record(row, children)
 361             results.append(record)
 362         return results
 363
 364     def _old_lookup(self):
 365         # AT LEAST (for now...):
 366         # 1. readings
 367         # 1.1. k_ele
 368         rows = self.query_db("SELECT id, value FROM k_ele WHERE fk = ?",
 369                              (entry_id,))
 370         k_ele = []
 371         for k_ele_id, keb in rows:
 372             # ke_inf
 373             query = "SELECT entity FROM ke_inf WHERE fk = ?"
 374             args = (k_ele_id,)
 375             rows = self.query_db(query, args)
 376             ke_inf = self._convert_entities([row[0] for row in rows])
 377
 378             # ke_pri
 379             query = "SELECT value FROM ke_pri WHERE fk = ?"
 380             args = (k_ele_id,)
 381             rows = self.query_db(query, args)
 382             ke_pri = [row[0] for row in rows]
 383
 384             # merge results
 385             k_ele_d = {}
 386             k_ele_d['keb'] = keb
 387             k_ele_d['ke_pri'] = ke_pri
 388             k_ele_d['ke_inf'] = ke_inf
 389             k_ele.append(k_ele_d)
 390         result['k_ele'] = k_ele
 391         # 1.2. r_ele
 392         query = "SELECT id, value, nokanji FROM r_ele WHERE fk = ?"
 393         args = (entry_id,)
 394         rows = self.query_db(query, args)
 395         r_ele = []
 396         for r_ele_id, reb, nokanji in rows:
 397             # re_restr
 398             query = "SELECT value FROM re_restr WHERE fk = ?"
 399             args = (r_ele_id,)
 400             rows = self.query_db(query, args)
 401             re_restr = [row[0] for row in rows]
 402
 403             # re_inf
 404             query = "SELECT entity FROM re_inf WHERE fk = ?"
 405             args = (r_ele_id,)
 406             rows = self.query_db(query, args)
 407             re_inf = self._convert_entities([row[0] for row in rows])
 408
 409             # re_pri
 410             query = "SELECT value FROM re_pri WHERE fk = ?"
 411             args = (r_ele_id,)
 412             rows = self.query_db(query, args)
 413             re_pri = [row[0] for row in rows]
 414
 415             # merge results
 416             r_ele_d = {}
 417             r_ele_d['reb'] = reb
 418             r_ele_d['nokanji'] = nokanji
 419             r_ele_d['re_restr'] = re_restr
 420             r_ele_d['re_pri'] = re_pri
 421             r_ele_d['re_inf'] = re_inf
 422             r_ele.append(r_ele_d)
 423         result['r_ele'] = r_ele
 424
 425         # 2. glosses
 426         query = "SELECT id FROM sense WHERE fk = ?"
 427         args = (entry_id,)
 428         rows = self.query_db(query, args)
 429         sense_ids = [row[0] for row in rows]
 430
 431         sense = []
 432         for sense_id in sense_ids:
 433             # gloss
 434             query = "SELECT lang, value, g_gend, pri FROM gloss WHERE fk = ?"
 435             args = (sense_id,)
 436             rows = self.query_db(query, args)
 437             gloss = {}
 438             for lang, value, g_gend, pri in rows:
 439                 lst = gloss.setdefault(lang, [])
 440                 lst.append(
 441                     {"value": value, "g_gend": g_gend, "pri": pri})
 442             sense_d = {}
 443             sense_d['gloss'] = gloss
 444             sense.append(sense_d)
 445         result['sense'] = sense
 446
 447         return Entry(result)
 448
 449     def _create_table_objects(self):
 450         """Creates table objects.
 451
 452         Returns a dictionary of table name to table object.
 453
 454         """
 455         class_mappings = {
 456             "entry": EntryTable,     # key->int ID
 457             "r_ele": REleTable,      # key-value plus nokanji flag
 458             "sense": SenseTable,     # one-many group mapping for sense info
 459             "audit": AuditTable,     # key->(update_date, update_details)
 460             "lsource": LSourceTable, # key -> lang, type=full/part, wasei=t/f
 461             "gloss": GlossTable,     # key -> lang, g_gend, value, pri flag
 462             "links": LinksTable,     # key -> tag, desc, uri
 463             "bibl": BiblTable,       # key -> tag, txt
 464             "entity": EntityTable,   # Info from JMdict XML entities
 465             }
 466
 467         # Set up key/value and key/entity tables
 468         kv_tables = [ # key-value tables (id -> text blob)
 469             "k_ele",
 470             "ke_pri",
 471             "re_restr",
 472             "re_pri",
 473             "etym",
 474             "stagk",
 475             "stagr",
 476             "xref",  # (#PCDATA)* - why the *?
 477             "ant",   # (#PCDATA)* - why the *?
 478             "s_inf",
 479             "example",
 480             "pri",
 481             ]
 482         kv_entity_tables = [ # key-value tables where val == entity
 483             "ke_inf",
 484             "re_inf",
 485             "dial",
 486             "field",
 487             "misc",
 488             "pos",
 489             ]
 490         for tbl in kv_tables:
 491             class_mappings[tbl] = KeyValueTable
 492         for tbl in kv_entity_tables:
 493             class_mappings[tbl] = KeyEntityTable
 494
 495         # Create all table objects
 496         table_mappings = {}
 497         for tbl, cls in class_mappings.iteritems():
 498             table_mappings[tbl] = cls(self.cursor, tbl)
 499
 500         return table_mappings
 501
 502     def _create_new_tables(self):
 503         """(Re)creates the database tables."""
 504         for tbl, tbl_obj in self.tables.iteritems():
 505             self.cursor.execute("DROP TABLE IF EXISTS %s" % tbl)
 506             tbl_obj.create()
 507
 508     def _populate_database(self, etree, entities):
 509         """Imports XML data into SQLite database.
 510
 511         table_d: table to table_object dictionary
 512         etree: ElementTree object for JMdict
 513         entities: entity name to description dictionary
 514
 515         """
 516         # NOTE: this is waaay too long.  Should be broken up somehow.
 517         # For now this will work though...
 518
 519         # Populate entities table and get integer keys
 520         # NOTE: we'll be mapping from *expanded* entities to ints.
 521         entity_int_d = {}
 522         tbl = self.tables['entity']
 523         for entity, expansion in entities.iteritems():
 524             i = tbl.insert(entity, expansion)
 525             entity_int_d[expansion] = i
 526
 527         # Iterate through each entry
 528         for entry in etree.findall("entry"):
 529
 530             # entry table
 531             ent_seq = entry.find("ent_seq")
 532             entry_id = self.tables["entry"].insert(int(ent_seq.text))
 533
 534             for k_ele in entry.findall("k_ele"):
 535                 # k_ele
 536                 value = k_ele.find("keb").text
 537                 k_ele_id = self.tables["k_ele"].insert(entry_id, value)
 538
 539                 # ke_inf
 540                 for ke_inf in k_ele.findall("ke_inf"):
 541                     value = ke_inf.text.strip()
 542                     entity_id = entity_int_d[value]
 543                     self.tables["ke_inf"].insert(k_ele_id, entity_id)
 544
 545                 # ke_pri
 546                 for ke_pri in k_ele.findall("ke_pri"):
 547                     value = ke_pri.text
 548                     self.tables["ke_pri"].insert(k_ele_id, value)
 549
 550             for r_ele in entry.findall("r_ele"):
 551                 # r_ele
 552                 value = r_ele.find("reb").text
 553                 # For nokanji: currently it's an empty tag, so
 554                 # treating it as true/false.
 555                 nokanji = 1 if r_ele.find("nokanji") is not None else 0
 556                 r_ele_id = self.tables["r_ele"].insert(entry_id, value, nokanji)
 557
 558                 # re_restr
 559                 for re_restr in r_ele.findall("re_restr"):
 560                     value = re_restr.text
 561                     self.tables["re_restr"].insert(r_ele_id, value)
 562
 563                 # re_inf
 564                 for re_inf in r_ele.findall("re_inf"):
 565                     value = re_inf.text.strip()
 566                     entity_id = entity_int_d[value]
 567                     self.tables["re_inf"].insert(r_ele_id, entity_id)
 568
 569                 # re_pri
 570                 for re_pri in r_ele.findall("re_pri"):
 571                     value = re_pri.text
 572                     self.tables["re_pri"].insert(r_ele_id, value)
 573
 574             # info
 575             # (Although children of an info node, since there's only
 576             # one per entry, let's connect directly to the entry.)
 577             info = entry.find("info")
 578             if info is not None:
 579                 for links in info.findall("links"):
 580                     link_tag = links.find("link_tag").text
 581                     link_desc = links.find("link_desc").text
 582                     link_uri = links.find("link_uri").text
 583                     self.tables["links"].insert(entry_id, link_tag, link_desc,
 584                                                 link_uri)
 585                 for bibl in info.findall("bibl"):
 586                     bib_tag = links.find("bib_tag")
 587                     bib_txt = links.find("bib_txt")
 588                     bib_tag = bib_tag.text if bib_tag is not None else None
 589                     bib_txt = bib_txt.text if bib_txt is not None else None
 590                     self.tables["bibl"].insert(entry_id, bib_tag, bib_txt)
 591                 for etym in info.findall("etym"):
 592                     self.tables["etym"].insert(entry_id, etym.text)
 593                 for audit in info.findall("audit"):
 594                     upd_date = audit.find("upd_date").text
 595                     upd_detl = audit.find("upd_detl").text
 596                     self.tables["audit"].insert(entry_id, upd_date, upd_detl)
 597
 598             # sense
 599             key_entity_tables = ["pos", "field", "misc", "dial"]
 600             key_value_tables = ["stagk", "stagr", "xref", "ant", "s_inf", "example"]
 601
 602             for sense in entry.findall("sense"):
 603                 # Each sense gets its own ID, for grouping purposes
 604                 sense_id = self.tables["sense"].insert(entry_id)
 605
 606                 for elem_name in key_value_tables:
 607                     for element in sense.findall(elem_name):
 608                         self.tables[elem_name].insert(sense_id, element.text)
 609
 610                 for elem_name in key_entity_tables:
 611                     for element in sense.findall(elem_name):
 612                         entity_id = entity_int_d[element.text.strip()]
 613                         self.tables[elem_name].insert(sense_id, entity_id)
 614
 615                 for lsource in sense.findall("lsource"):
 616                     lang = lsource.get(XML_LANG, "eng")
 617                     ls_type = lsource.get("ls_type")  # implied "full" if absent, "part" otherwise
 618                     ls_wasei = lsource.get("ls_wasei") # usually "y"... just a flag.
 619
 620                     partial = 1 if ls_type is not None else 0
 621                     if ls_wasei is None:
 622                         wasei = 0
 623                     elif ls_wasei == "y":
 624                         wasei = 1
 625                     else:
 626                         raise ValueError(
 627                             'Only known valid ls_wasei attribute value '
 628                             'is "y", found:', ls_wasei.text)
 629
 630                     self.tables["lsource"].insert(sense_id,
 631                                                   lang, partial, wasei)
 632                 for gloss in sense.findall("gloss"):
 633                     lang = gloss.get(XML_LANG, "eng")
 634                     g_gend = gloss.get("g_gend")
 635                     pri_list = gloss.getchildren()
 636                     if len(pri_list) > 1:
 637                         gloss_id = self.tables['gloss'].insert(
 638                             sense_id, lang, g_gend, gloss.text, 1)
 639                         for pri in pri_list:
 640                             self.tables['pri'].insert(gloss_id, pri.text)
 641                     else:
 642                         self.tables['gloss'].insert(sense_id, lang, g_gend,
 643                                                     gloss.text, 0)
 644
 645     def _get_entities(self, xml_data):
 646         """Gets the ENTITY definitions from JMdict.
 647
 648         Finds the built-in DTD and extracts all ENTITY definitions.
 649
 650         """
 651         dtd = self._get_dtd(xml_data)
 652         # do some logic to find all entities...
 653         entities = {}
 654         regex = '<!ENTITY[ ]+([a-zA-Z0-9-]+)[ ]+"(.*?)">'
 655         for match in re.finditer(regex, xml_data):
 656             key, value = match.groups()[0:2]
 657             entities[key] = value
 658         return entities
 659
 660     def _get_dtd(self, xml_data):
 661         """Gets the DTD from JMdict."""
 662         # This works for JMdict (as it is at the time of writing), but is
 663         # not a general solution.
 664         start_index = xml_data.find("<!DOCTYPE")
 665         if start_index == -1:
 666             raise Exception("Could not find start of internal DTD")
 667         end_index = xml_data.find("]>")
 668         if end_index == -1:
 669             raise Exception("Could not find end ofinternal DTD")
 670         end_index += 2
 671         dtd = xml_data[start_index:end_index]
 672         return dtd
 673
 674
 675 class EntryTable(Table):
 676     create_query = ("CREATE TABLE %s "
 677                     "(id INTEGER PRIMARY KEY, ent_seq INTEGER)")
 678     insert_query = "INSERT INTO %s VALUES (NULL, ?)"
 679     index_queries = [
 680         "CREATE INDEX %s_seq ON %s (ent_seq)",
 681         ]
 682
 683
 684 class KeyEntityTable(KeyValueTable):
 685     """Just like a KeyValueTable, but with 'entity' instead of 'value'."""
 686     create_query = ("CREATE TABLE %s "
 687                     "(id INTEGER PRIMARY KEY, fk INTEGER, entity INTEGER)")
 688
 689
 690 class REleTable(Table):
 691     create_query = ("CREATE TABLE %s "
 692                     "(id INTEGER PRIMARY KEY, fk INTEGER,"
 693                     " value TEXT, nokanji INTEGER)")
 694     insert_query = "INSERT INTO %s VALUES (NULL, ?, ?, ?)"
 695     index_queries = [
 696         "CREATE INDEX %s_fk ON %s (fk)",
 697         ]
 698
 699
 700 class SenseTable(Table):
 701     """Corresponds to <sense> tag.  Functions as group for glosses, etc."""
 702     create_query = ("CREATE TABLE %s (id INTEGER PRIMARY KEY, fk INTEGER)")
 703     insert_query = "INSERT INTO %s VALUES (NULL, ?)"
 704     index_queries = [
 705         "CREATE INDEX %s_fk ON %s (fk)",
 706         ]
 707
 708
 709 class AuditTable(Table):
 710     create_query = ("CREATE TABLE %s "
 711                     "(id INTEGER PRIMARY KEY, fk INTEGER,"
 712                     " update_date TEXT, update_details TEXT)")
 713     insert_query = "INSERT INTO %s VALUES (NULL, ?, ?, ?)"
 714     index_queries = [
 715         "CREATE INDEX %s_fk ON %s (fk)",
 716         ]
 717
 718
 719 class LSourceTable(Table):
 720     """Represents the <lsource> element from JMdict.
 721
 722     Important changes:
 723     ls_type=full/part => partial=1/0
 724     ls_wasei=y/null => wasei=1/0
 725
 726     """
 727     create_query = ("CREATE TABLE %s "
 728                     "(id INTEGER PRIMARY KEY, fk INTEGER,"
 729                     " lang TEXT, partial INTEGER, wasei INTEGER)")
 730     insert_query = "INSERT INTO %s VALUES (NULL, ?, ?, ?, ?)"
 731     index_queries = [
 732         "CREATE INDEX %s_fk ON %s (fk)",
 733         ]
 734
 735
 736 class GlossTable(Table):
 737     create_query = ("CREATE TABLE %s "
 738                     "(id INTEGER PRIMARY KEY, fk INTEGER,"
 739                     " lang TEXT, g_gend TEXT, value TEXT, pri INTEGER)")
 740     insert_query = "INSERT INTO %s VALUES (NULL, ?, ?, ?, ?, ?)"
 741     index_queries = [
 742         "CREATE INDEX %s_fk ON %s (fk)",
 743         "CREATE INDEX %s_lang ON %s (lang)",
 744         "CREATE INDEX %s_value ON %s (value)",
 745         ]
 746
 747
 748 class LinksTable(Table):
 749     create_query = ("CREATE TABLE %s "
 750                     "(id INTEGER PRIMARY KEY, fk INTEGER,"
 751                     " tag TEXT, desc TEXT, uri TEXT)")
 752     insert_query = "INSERT INTO %s VALUES (NULL, ?, ?, ?, ?)"
 753     index_queries = [
 754         "CREATE INDEX %s_fk ON %s (fk)",
 755         ]
 756
 757
 758 class BiblTable(Table):
 759     create_query = ("CREATE TABLE %s "
 760                     "(id INTEGER PRIMARY KEY, fk INTEGER,"
 761                     " tag TEXT, txt TEXT)")
 762     insert_query = "INSERT INTO %s VALUES (NULL, ?, ?, ?, ?)"
 763     index_queries = [
 764         "CREATE INDEX %s_fk ON %s (fk)",
 765         ]
 766
 767
 768 class EntityTable(Table):
 769     create_query = ("CREATE TABLE %s "
 770                     "(id INTEGER PRIMARY KEY, entity TEXT, expansion TEXT)")
 771     insert_query = "INSERT INTO %s VALUES (NULL, ?, ?)"
 772
 773
 774 ######################################################################
 775
 776 def parse_args():
 777     from optparse import OptionParser
 778     op = OptionParser(usage="%prog [options] <db_filename> [search_query]")
 779     op.add_option("-i", "--initialize",
 780                   dest="init_fname", metavar="XML_SOURCE",
 781                   help=_("Initialize database from file."))
 782     op.add_option("-L", "--lang",
 783                   help=_("Specify preferred language for searching."))
 784     options, args = op.parse_args()
 785     if len(args) < 1:
 786         op.print_help()
 787         exit(-1)
 788     return (options, args)
 789
 790 def main():
 791     # Copied *almost* verbatim from kd2.py.
 792     options, args = parse_args()
 793     db_fname = args[0]
 794
 795     if options.init_fname is not None:
 796         db = Database(db_fname, init_from_file=options.init_fname)
 797     else:
 798         db = Database(db_fname)
 799
 800     results = []
 801     if len(args) > 1:
 802         # Do search
 803         # To be nice, we'll join all remaining args with spaces.
 804         search_query = " ".join(args[1:])
 805         if options.lang is not None:
 806             results = db.search(search_query, lang=options.lang)
 807         else:
 808             results = db.search(search_query)
 809
 810     if len(results) > 0:
 811         from pprint import pprint
 812         encoding = get_encoding()
 813         for index, result in enumerate(results):
 814             index += 1
 815             print(_("[Entry %d]") % index)
 816             entry = db.lookup(result)
 817             try:
 818                 print(unicode(entry).encode(encoding))
 819             except:
 820                 print(repr(entry))
 821             print()
 822     else:
 823         print(_("No results found."))
 824
 825 if __name__ == "__main__":
 826     main()