jblite/jmdict.py

   1 # -*- coding: utf-8 -*-
   2 """JMdict support."""
   3
   4 # This could be a bit cleaner if I used something like SQLalchemy
   5 # perhaps...  The create/insert/index bits were done decent enough,
   6 # but lookups are done in straight SQL due to the potential
   7 # complexity, and this sadly does break the abstraction of the table
   8 # objects...
   9
  10 from __future__ import print_function
  11 from __future__ import with_statement
  12
  13 import os, sys, re, sqlite3
  14 from cStringIO import StringIO
  15 from xml.etree.cElementTree import ElementTree
  16 from helpers import gzread
  17 from db import Database as BaseDatabase
  18 from table import Table, ChildTable, KeyValueTable
  19
  20 import gettext
  21 #t = gettext.translation("jblite")
  22 #_ = t.ugettext
  23 gettext.install("jblite")
  24
  25 # Full expansion of xml:lang
  26 XML_LANG = "{http://www.w3.org/XML/1998/namespace}lang"
  27
  28
  29 # Copied from kd2.py...
  30 get_encoding = sys.getfilesystemencoding
  31
  32 # FORMAT OF TABLE MAP:
  33 # dictionary entry: table: (children | None)
  34 # table: table_name | (table_name, table_type, *args, **kwargs)
  35 #
  36 # Ideas:
  37 # Value = dict: take keys as child tables, lookup all rows, and take values as grandchildren.
  38 # Value = list: take items as child tables, lookup all rows, assume no children.
  39 #
  40 #
  41 # entry:
  42 # data = tables["entry"].lookup()
  43 # children_map = TABLE_MAP["entry"]
  44 # children = get_data(children_map["k_ele"])
  45 # result = TableData(data, children)
  46 #
  47 #
  48 # {"k_ele": {"data": [...],
  49 #            "children": {...}}}
  50
  51 # Table data object:
  52 #   obj.data: {},  # single db row
  53 #   obj.children: {"key": table_object}
  54
  55
  56 # breadth first creation?  depth?
  57
  58 # Map of tables to their children maps.  Empty {} means no children.
  59
  60
  61 class Entry(object):
  62
  63     def __init__(self, record):
  64         self._record = record
  65
  66     def __unicode__(self):
  67         """Basic string representation of the entry."""
  68         rec = self._record
  69         lines = []
  70
  71         k_eles = rec.find_children("k_ele")
  72         if len(k_eles) > 0:
  73             lines.append(_(u"Kanji readings:"))
  74         for k_ele_index, k_ele in enumerate(k_eles):
  75             k_ele_index += 1
  76             lines.append(_(u"  Reading %d:") % k_ele_index)
  77             lines.append(_(u"    Blob: %s") % k_ele.data['value'])
  78
  79         r_eles = rec.find_children("r_ele")
  80         if len(r_eles) > 0:
  81             lines.append(_(u"Kana readings:"))
  82         for r_ele_index, r_ele in enumerate(r_eles):
  83             r_ele_index += 1
  84             lines.append(_(u"  Reading %d:") % r_ele_index)
  85             lines.append(_(u"    Blob: %s") % r_ele.data['value'])
  86
  87         senses = rec.find_children("sense")
  88         if len(senses) > 0:
  89             lines.append(_(u"Glosses:"))
  90         for sense_index, sense in enumerate(senses):
  91             sense_index += 1
  92             lines.append(_(u"  Sense %d:") % sense_index)
  93             glosses = sense.find_children("gloss")
  94
  95             gloss_d = {}
  96             for gloss in glosses:
  97                 gloss_d.setdefault(gloss.data["lang"], []).append(gloss)
  98             # Output glosses by language
  99             for lang in sorted(gloss_d.keys()):
 100                 gloss_recs = gloss_d[lang]
 101                 lines.append(_(u"    Lang: %s") % lang)
 102                 for gloss_index, gloss in enumerate(gloss_recs):
 103                     gloss_index += 1
 104                     val = gloss.data['value']
 105                     lines.append(_(u"      Gloss %d: %s") % (gloss_index, val))
 106         return u"\n".join(lines)
 107
 108     def __repr__(self):
 109         return repr(self._record)
 110
 111
 112 class Database(BaseDatabase):
 113
 114     """Top level object for SQLite 3-based JMdict database."""
 115
 116     entry_class = Entry
 117     table_map = {
 118         u"entry": {
 119             u"k_ele": {
 120                 u"ke_inf": {},
 121                 u"ke_pri": {},
 122                 },
 123             u"r_ele": {
 124                 u"re_restr": {},
 125                 u"re_inf": {},
 126                 u"re_pri": {},
 127                 },
 128             u"links": {},
 129             u"bibl": {},
 130             u"etym": {},
 131             u"audit": {},
 132             u"sense": {
 133                 u"pos": {},
 134                 u"field": {},
 135                 u"misc": {},
 136                 u"dial": {},
 137                 u"stagk": {},
 138                 u"stagr": {},
 139                 u"xref": {},
 140                 u"ant": {},
 141                 u"s_inf": {},
 142                 u"example": {},
 143                 u"lsource": {},
 144                 u"gloss": {
 145                     u"pri": {},
 146                     }
 147                 }
 148             }
 149         }
 150
 151     def __init__(self, filename, init_from_file=None):
 152         self.conn = sqlite3.connect(filename)
 153         self.conn.row_factory = sqlite3.Row  # keyword accessors for rows
 154         self.cursor = self.conn.cursor()
 155         self.tables = self._create_table_objects()
 156         if init_from_file is not None:
 157             raw_data = gzread(init_from_file)
 158
 159             entities = self._get_entities(raw_data)
 160             infile = StringIO(raw_data)
 161             etree = ElementTree(file=infile)
 162             infile.close()
 163
 164             self._create_new_tables()
 165             self._populate_database(etree, entities)
 166             self.conn.commit()
 167
 168     def search(self, query, lang=None):
 169         # Search
 170         # Two main methods: to and from Japanese.
 171         # 1. Guess which direction we're searching.
 172         # 2. Search preferred method.
 173         # 3. Search remaining method.
 174         entries_from = self.search_from_japanese(query)
 175         entries_to = self.search_to_japanese(query, lang=lang)
 176
 177         results = entries_from + entries_to
 178         return results
 179
 180     def search_from_japanese(self, query):
 181         # Japanese search locations:
 182         # 1. Kanji elements
 183         # 2. Reading elements
 184         # 3. Any indices (none yet)
 185         #
 186         # Preferred orderings
 187         # 1. Location of query in result
 188         #    1. Exact match
 189         #    2. Begins with
 190         #    3. Anywhere
 191         # 2. Ranking of usage (the (P) option in EDICT, for example)
 192         #
 193         # FOR NOW: just get the searching working.
 194         # This puts us on roughly the same level as J-Ben 1.2.x.
 195         encoding = get_encoding()
 196         wrapped_query = "%%%s%%" % query  # Wrap in wildcards
 197         unicode_query = wrapped_query.decode(encoding)
 198
 199         entries_by_keb = self._search_keb(unicode_query)
 200         entries_by_reb = self._search_reb(unicode_query)
 201         #entries_by_indices = self._search_indices_from_ja(unicode_query)
 202
 203         # Merge results into one list and return.
 204         results = []
 205         for lst in (entries_by_keb, entries_by_reb):
 206             for o in lst:
 207                 if o not in results:
 208                     results.append(o)
 209         return results
 210
 211     def _search_keb(self, unicode_query):
 212         """Searches kanji elements (Japanese readings with kanji).
 213
 214         Returns a list of entry IDs.
 215
 216         """
 217         # keb: entry.id -> k_ele.fk, k_ele.value
 218         query = "SELECT fk FROM k_ele WHERE value LIKE ?"
 219         args = (unicode_query,)
 220         self.cursor.execute(query, args)
 221         rows = self.cursor.fetchall()
 222         return [row[0] for row in rows]
 223
 224     def _search_reb(self, unicode_query):
 225         """Searches reading elements (Japanese readings without kanji).
 226
 227         Returns a list of entry IDs.
 228
 229         """
 230         # reb: entry.id -> r_ele.fk, r_ele.value
 231         query = "SELECT fk FROM r_ele WHERE value LIKE ?"
 232         args = (unicode_query,)
 233         self.cursor.execute(query, args)
 234         rows = self.cursor.fetchall()
 235         return [row[0] for row in rows]
 236
 237     def _search_indices_from_ja(self, unicode_query):
 238         raise NotImplementedError
 239
 240     def search_to_japanese(self, query, lang):
 241         # Foreign language search locations:
 242         # 1. Glosses
 243         # 2. Any indices (none yet)
 244         #
 245         # For other considerations, see search_from_japanese().
 246         encoding = get_encoding()
 247         wrapped_query = "%%%s%%" % query  # Wrap in wildcards
 248         unicode_query = wrapped_query.decode(encoding)
 249
 250         entries_by_glosses = self._search_glosses(unicode_query, lang)
 251         #entries_by_indices = self._search_indices_to_ja(unicode_query, lang)
 252
 253         # Merge results into one list and return.
 254         results = []
 255         for lst in (entries_by_glosses,):
 256             for o in lst:
 257                 if o not in results:
 258                     results.append(o)
 259         return results
 260
 261
 262     def _search_glosses(self, unicode_query, lang):
 263         """Searches foreign language glosses.
 264
 265         If lang is not None, only entries which match the lang
 266         parameter are returned.
 267
 268         Returns a list of entry IDs.
 269
 270         """
 271         # entry.id -> sense.fk, sense.id -> gloss.fk
 272         if lang is not None:
 273             query = (
 274                 "SELECT e.id FROM gloss g, sense s, entry e "
 275                 "WHERE g.lang = ? AND g.value LIKE ? "
 276                 "AND g.fk = s.id AND s.fk = e.id"
 277             )
 278             args = (lang, unicode_query)
 279         else:
 280             query = (
 281                 "SELECT e.id FROM gloss g, sense s, entry e "
 282                 "WHERE g.value LIKE ?"
 283             )
 284             args = (unicode_query,)
 285
 286         self.cursor.execute(query, args)
 287         rows = self.cursor.fetchall()
 288         return [row[0] for row in rows]
 289
 290     def _search_indices_to_ja(self, unicode_query, lang):
 291         raise NotImplementedError
 292
 293     def lookup(self, id):
 294         return BaseDatabase.lookup(self, "entry", id)
 295
 296     def query_db(self, *args, **kwargs):
 297         """Helper.  Wraps the execute/fetchall idiom on the DB cursor."""
 298         self.cursor.execute(*args, **kwargs)
 299         return self.cursor.fetchall()
 300
 301     def _convert_entities(self, entities):
 302         """Expands a list of entities.
 303
 304         Returns a list of the entity expansions.  The order of the
 305         returned expansions matches the order of the input entities.
 306
 307         """
 308         args = list(sorted(set(entities)))
 309         template = ", ".join(["?"] * len(args))
 310         query = "SELECT entity, expansion " \
 311             "FROM entity WHERE entity IN (%s)" % template
 312         rows = self.query_db(query, args)
 313         d = {}
 314         for entity, expansion in rows:
 315             d[entity] = expansion
 316         result = [d[entity] for entity in entities]
 317         return result
 318
 319     def _create_table_objects(self):
 320         """Creates table objects.
 321
 322         Returns a dictionary of table name to table object.
 323
 324         """
 325         class_mappings = {
 326             "entry": EntryTable,     # key->int ID
 327             "r_ele": REleTable,      # key-value plus nokanji flag
 328             "sense": SenseTable,     # one-many group mapping for sense info
 329             "audit": AuditTable,     # key->(update_date, update_details)
 330             "lsource": LSourceTable, # key -> lang, type=full/part, wasei=t/f
 331             "gloss": GlossTable,     # key -> lang, g_gend, value, pri flag
 332             "links": LinksTable,     # key -> tag, desc, uri
 333             "bibl": BiblTable,       # key -> tag, txt
 334             "entity": EntityTable,   # Info from JMdict XML entities
 335             }
 336
 337         # Set up key/value and key/entity tables
 338         kv_tables = [ # key-value tables (id -> text blob)
 339             "k_ele",
 340             "ke_pri",
 341             "re_restr",
 342             "re_pri",
 343             "etym",
 344             "stagk",
 345             "stagr",
 346             "xref",  # (#PCDATA)* - why the *?
 347             "ant",   # (#PCDATA)* - why the *?
 348             "s_inf",
 349             "example",
 350             "pri",
 351             ]
 352         kv_entity_tables = [ # key-value tables where val == entity
 353             "ke_inf",
 354             "re_inf",
 355             "dial",
 356             "field",
 357             "misc",
 358             "pos",
 359             ]
 360         for tbl in kv_tables:
 361             class_mappings[tbl] = KeyValueTable
 362         for tbl in kv_entity_tables:
 363             class_mappings[tbl] = KeyEntityTable
 364
 365         # Create all table objects
 366         table_mappings = {}
 367         for tbl, cls in class_mappings.iteritems():
 368             table_mappings[tbl] = cls(self.cursor, tbl)
 369
 370         return table_mappings
 371
 372     def _create_new_tables(self):
 373         """(Re)creates the database tables."""
 374         for tbl, tbl_obj in self.tables.iteritems():
 375             self.cursor.execute("DROP TABLE IF EXISTS %s" % tbl)
 376             tbl_obj.create()
 377
 378     def _populate_database(self, etree, entities):
 379         """Imports XML data into SQLite database.
 380
 381         table_d: table to table_object dictionary
 382         etree: ElementTree object for JMdict
 383         entities: entity name to description dictionary
 384
 385         """
 386         # NOTE: this is waaay too long.  Should be broken up somehow.
 387         # For now this will work though...
 388
 389         # Populate entities table and get integer keys
 390         # NOTE: we'll be mapping from *expanded* entities to ints.
 391         entity_int_d = {}
 392         tbl = self.tables['entity']
 393         for entity, expansion in entities.iteritems():
 394             i = tbl.insert(entity, expansion)
 395             entity_int_d[expansion] = i
 396
 397         # Iterate through each entry
 398         for entry in etree.findall("entry"):
 399
 400             # entry table
 401             ent_seq = entry.find("ent_seq")
 402             entry_id = self.tables["entry"].insert(int(ent_seq.text))
 403
 404             for k_ele in entry.findall("k_ele"):
 405                 # k_ele
 406                 value = k_ele.find("keb").text
 407                 k_ele_id = self.tables["k_ele"].insert(entry_id, value)
 408
 409                 # ke_inf
 410                 for ke_inf in k_ele.findall("ke_inf"):
 411                     value = ke_inf.text.strip()
 412                     entity_id = entity_int_d[value]
 413                     self.tables["ke_inf"].insert(k_ele_id, entity_id)
 414
 415                 # ke_pri
 416                 for ke_pri in k_ele.findall("ke_pri"):
 417                     value = ke_pri.text
 418                     self.tables["ke_pri"].insert(k_ele_id, value)
 419
 420             for r_ele in entry.findall("r_ele"):
 421                 # r_ele
 422                 value = r_ele.find("reb").text
 423                 # For nokanji: currently it's an empty tag, so
 424                 # treating it as true/false.
 425                 nokanji = 1 if r_ele.find("nokanji") is not None else 0
 426                 r_ele_id = self.tables["r_ele"].insert(entry_id, value, nokanji)
 427
 428                 # re_restr
 429                 for re_restr in r_ele.findall("re_restr"):
 430                     value = re_restr.text
 431                     self.tables["re_restr"].insert(r_ele_id, value)
 432
 433                 # re_inf
 434                 for re_inf in r_ele.findall("re_inf"):
 435                     value = re_inf.text.strip()
 436                     entity_id = entity_int_d[value]
 437                     self.tables["re_inf"].insert(r_ele_id, entity_id)
 438
 439                 # re_pri
 440                 for re_pri in r_ele.findall("re_pri"):
 441                     value = re_pri.text
 442                     self.tables["re_pri"].insert(r_ele_id, value)
 443
 444             # info
 445             # (Although children of an info node, since there's only
 446             # one per entry, let's connect directly to the entry.)
 447             info = entry.find("info")
 448             if info is not None:
 449                 for links in info.findall("links"):
 450                     link_tag = links.find("link_tag").text
 451                     link_desc = links.find("link_desc").text
 452                     link_uri = links.find("link_uri").text
 453                     self.tables["links"].insert(entry_id, link_tag, link_desc,
 454                                                 link_uri)
 455                 for bibl in info.findall("bibl"):
 456                     bib_tag = links.find("bib_tag")
 457                     bib_txt = links.find("bib_txt")
 458                     bib_tag = bib_tag.text if bib_tag is not None else None
 459                     bib_txt = bib_txt.text if bib_txt is not None else None
 460                     self.tables["bibl"].insert(entry_id, bib_tag, bib_txt)
 461                 for etym in info.findall("etym"):
 462                     self.tables["etym"].insert(entry_id, etym.text)
 463                 for audit in info.findall("audit"):
 464                     upd_date = audit.find("upd_date").text
 465                     upd_detl = audit.find("upd_detl").text
 466                     self.tables["audit"].insert(entry_id, upd_date, upd_detl)
 467
 468             # sense
 469             key_entity_tables = ["pos", "field", "misc", "dial"]
 470             key_value_tables = ["stagk", "stagr", "xref", "ant", "s_inf", "example"]
 471
 472             for sense in entry.findall("sense"):
 473                 # Each sense gets its own ID, for grouping purposes
 474                 sense_id = self.tables["sense"].insert(entry_id)
 475
 476                 for elem_name in key_value_tables:
 477                     for element in sense.findall(elem_name):
 478                         self.tables[elem_name].insert(sense_id, element.text)
 479
 480                 for elem_name in key_entity_tables:
 481                     for element in sense.findall(elem_name):
 482                         entity_id = entity_int_d[element.text.strip()]
 483                         self.tables[elem_name].insert(sense_id, entity_id)
 484
 485                 for lsource in sense.findall("lsource"):
 486                     lang = lsource.get(XML_LANG, "eng")
 487                     ls_type = lsource.get("ls_type")  # implied "full" if absent, "part" otherwise
 488                     ls_wasei = lsource.get("ls_wasei") # usually "y"... just a flag.
 489
 490                     partial = 1 if ls_type is not None else 0
 491                     if ls_wasei is None:
 492                         wasei = 0
 493                     elif ls_wasei == "y":
 494                         wasei = 1
 495                     else:
 496                         raise ValueError(
 497                             'Only known valid ls_wasei attribute value '
 498                             'is "y", found:', ls_wasei.text)
 499
 500                     self.tables["lsource"].insert(sense_id,
 501                                                   lang, partial, wasei)
 502                 for gloss in sense.findall("gloss"):
 503                     lang = gloss.get(XML_LANG, "eng")
 504                     g_gend = gloss.get("g_gend")
 505                     pri_list = gloss.getchildren()
 506                     if len(pri_list) > 1:
 507                         gloss_id = self.tables['gloss'].insert(
 508                             sense_id, lang, g_gend, gloss.text, 1)
 509                         for pri in pri_list:
 510                             self.tables['pri'].insert(gloss_id, pri.text)
 511                     else:
 512                         self.tables['gloss'].insert(sense_id, lang, g_gend,
 513                                                     gloss.text, 0)
 514
 515     def _get_entities(self, xml_data):
 516         """Gets the ENTITY definitions from JMdict.
 517
 518         Finds the built-in DTD and extracts all ENTITY definitions.
 519
 520         """
 521         dtd = self._get_dtd(xml_data)
 522         # do some logic to find all entities...
 523         entities = {}
 524         regex = '<!ENTITY[ ]+([a-zA-Z0-9-]+)[ ]+"(.*?)">'
 525         for match in re.finditer(regex, xml_data):
 526             key, value = match.groups()[0:2]
 527             entities[key] = value
 528         return entities
 529
 530     def _get_dtd(self, xml_data):
 531         """Gets the DTD from JMdict."""
 532         # This works for JMdict (as it is at the time of writing), but is
 533         # not a general solution.
 534         start_index = xml_data.find("<!DOCTYPE")
 535         if start_index == -1:
 536             raise Exception("Could not find start of internal DTD")
 537         end_index = xml_data.find("]>")
 538         if end_index == -1:
 539             raise Exception("Could not find end ofinternal DTD")
 540         end_index += 2
 541         dtd = xml_data[start_index:end_index]
 542         return dtd
 543
 544
 545 class EntryTable(Table):
 546     create_query = ("CREATE TABLE %s "
 547                     "(id INTEGER PRIMARY KEY, ent_seq INTEGER)")
 548     insert_query = "INSERT INTO %s VALUES (NULL, ?)"
 549     index_queries = [
 550         "CREATE INDEX %s_seq ON %s (ent_seq)",
 551         ]
 552
 553
 554 class KeyEntityTable(KeyValueTable):
 555     """Just like a KeyValueTable, but with 'entity' instead of 'value'."""
 556     create_query = ("CREATE TABLE %s "
 557                     "(id INTEGER PRIMARY KEY, fk INTEGER, entity INTEGER)")
 558
 559
 560 class REleTable(ChildTable):
 561     create_query = ("CREATE TABLE %s "
 562                     "(id INTEGER PRIMARY KEY, fk INTEGER,"
 563                     " value TEXT, nokanji INTEGER)")
 564     insert_query = "INSERT INTO %s VALUES (NULL, ?, ?, ?)"
 565     index_queries = [
 566         "CREATE INDEX %s_fk ON %s (fk)",
 567         ]
 568
 569
 570 class SenseTable(ChildTable):
 571     """Corresponds to <sense> tag.  Functions as group for glosses, etc."""
 572     create_query = ("CREATE TABLE %s (id INTEGER PRIMARY KEY, fk INTEGER)")
 573     insert_query = "INSERT INTO %s VALUES (NULL, ?)"
 574     index_queries = [
 575         "CREATE INDEX %s_fk ON %s (fk)",
 576         ]
 577
 578
 579 class AuditTable(ChildTable):
 580     create_query = ("CREATE TABLE %s "
 581                     "(id INTEGER PRIMARY KEY, fk INTEGER,"
 582                     " update_date TEXT, update_details TEXT)")
 583     insert_query = "INSERT INTO %s VALUES (NULL, ?, ?, ?)"
 584     index_queries = [
 585         "CREATE INDEX %s_fk ON %s (fk)",
 586         ]
 587
 588
 589 class LSourceTable(ChildTable):
 590     """Represents the <lsource> element from JMdict.
 591
 592     Important changes:
 593     ls_type=full/part => partial=1/0
 594     ls_wasei=y/null => wasei=1/0
 595
 596     """
 597     create_query = ("CREATE TABLE %s "
 598                     "(id INTEGER PRIMARY KEY, fk INTEGER,"
 599                     " lang TEXT, partial INTEGER, wasei INTEGER)")
 600     insert_query = "INSERT INTO %s VALUES (NULL, ?, ?, ?, ?)"
 601     index_queries = [
 602         "CREATE INDEX %s_fk ON %s (fk)",
 603         ]
 604
 605
 606 class GlossTable(ChildTable):
 607     create_query = ("CREATE TABLE %s "
 608                     "(id INTEGER PRIMARY KEY, fk INTEGER,"
 609                     " lang TEXT, g_gend TEXT, value TEXT, pri INTEGER)")
 610     insert_query = "INSERT INTO %s VALUES (NULL, ?, ?, ?, ?, ?)"
 611     index_queries = [
 612         "CREATE INDEX %s_fk ON %s (fk)",
 613         "CREATE INDEX %s_lang ON %s (lang)",
 614         "CREATE INDEX %s_value ON %s (value)",
 615         ]
 616
 617
 618 class LinksTable(ChildTable):
 619     create_query = ("CREATE TABLE %s "
 620                     "(id INTEGER PRIMARY KEY, fk INTEGER,"
 621                     " tag TEXT, desc TEXT, uri TEXT)")
 622     insert_query = "INSERT INTO %s VALUES (NULL, ?, ?, ?, ?)"
 623     index_queries = [
 624         "CREATE INDEX %s_fk ON %s (fk)",
 625         ]
 626
 627
 628 class BiblTable(ChildTable):
 629     create_query = ("CREATE TABLE %s "
 630                     "(id INTEGER PRIMARY KEY, fk INTEGER,"
 631                     " tag TEXT, txt TEXT)")
 632     insert_query = "INSERT INTO %s VALUES (NULL, ?, ?, ?, ?)"
 633     index_queries = [
 634         "CREATE INDEX %s_fk ON %s (fk)",
 635         ]
 636
 637
 638 class EntityTable(Table):
 639     create_query = ("CREATE TABLE %s "
 640                     "(id INTEGER PRIMARY KEY, entity TEXT, expansion TEXT)")
 641     insert_query = "INSERT INTO %s VALUES (NULL, ?, ?)"
 642
 643
 644 ######################################################################
 645
 646 def parse_args():
 647     from optparse import OptionParser
 648     op = OptionParser(usage="%prog [options] <db_filename> [search_query]")
 649     op.add_option("-i", "--initialize",
 650                   dest="init_fname", metavar="XML_SOURCE",
 651                   help=_("Initialize database from file."))
 652     op.add_option("-L", "--lang",
 653                   help=_("Specify preferred language for searching."))
 654     options, args = op.parse_args()
 655     if len(args) < 1:
 656         op.print_help()
 657         exit(-1)
 658     return (options, args)
 659
 660 def main():
 661     # Copied *almost* verbatim from kd2.py.
 662     options, args = parse_args()
 663     db_fname = args[0]
 664
 665     if options.init_fname is not None:
 666         db = Database(db_fname, init_from_file=options.init_fname)
 667     else:
 668         db = Database(db_fname)
 669
 670     results = []
 671     if len(args) > 1:
 672         # Do search
 673         # To be nice, we'll join all remaining args with spaces.
 674         search_query = " ".join(args[1:])
 675         if options.lang is not None:
 676             results = db.search(search_query, lang=options.lang)
 677         else:
 678             results = db.search(search_query)
 679
 680     if len(results) > 0:
 681         from pprint import pprint
 682         encoding = get_encoding()
 683         for index, result in enumerate(results):
 684             index += 1
 685             print(_("[Entry %d]") % index)
 686             entry = db.lookup(result)
 687
 688             print(unicode(entry).encode(encoding))
 689             print()
 690     else:
 691         print(_("No results found."))
 692
 693 if __name__ == "__main__":
 694     main()