Cleanup, refactoring, etc.
[jblite.git] / jblite / jmdict.py
blob2782dc3fc037a7c0c2e459a63fbdae1cd6ab7270
1 # -*- coding: utf-8 -*-
2 """JMdict support."""
4 # This could be a bit cleaner if I used something like SQLalchemy
5 # perhaps... The create/insert/index bits were done decent enough,
6 # but lookups are done in straight SQL due to the potential
7 # complexity, and this sadly does break the abstraction of the table
8 # objects...
10 from __future__ import print_function
11 from __future__ import with_statement
13 import os, sys, re, sqlite3
14 from cStringIO import StringIO
15 from xml.etree.cElementTree import ElementTree
16 from helpers import gzread
17 from table import Table, KeyValueTable
19 import gettext
20 #t = gettext.translation("jblite")
21 #_ = t.ugettext
22 gettext.install("jblite")
24 # Full expansion of xml:lang
25 XML_LANG = "{http://www.w3.org/XML/1998/namespace}lang"
28 # Copied from kd2.py...
29 get_encoding = sys.getfilesystemencoding
31 # FORMAT OF TABLE MAP:
32 # dictionary entry: table: (children | None)
33 # table: table_name | (table_name, table_type, *args, **kwargs)
35 # Ideas:
36 # Value = dict: take keys as child tables, lookup all rows, and take values as grandchildren.
37 # Value = list: take items as child tables, lookup all rows, assume no children.
40 # entry:
41 # data = tables["entry"].lookup()
42 # children_map = TABLE_MAP["entry"]
43 # children = get_data(children_map["k_ele"])
44 # result = TableData(data, children)
47 # {"k_ele": {"data": [...],
48 # "children": {...}}}
50 # Table data object:
51 # obj.data: {}, # single db row
52 # obj.children: {"key": table_object}
55 # breadth first creation? depth?
57 class Record(object):
59 """Represents a row in a table, plus all data it is a 'parent' of.
61 Each Record may be linked to multiple Records in child tables.
63 """
65 def __init__(self, data=None, children=None):
66 self.data = data if data is not None else {}
67 self.children = children if children is not None else {}
69 # Map of tables to their children maps. Empty {} means no children.
70 TABLE_MAP = {
71 "entry": {
72 "k_ele": {
73 "ke_inf": {},
74 "ke_pri": {},
76 "r_ele": {
77 "re_restr": {},
78 "re_inf": {},
79 "re_pri": {},
81 "links": {},
82 "bibl": {},
83 "etym": {},
84 "audit": {},
85 "sense": {
86 "pos": {},
87 "field": {},
88 "misc": {},
89 "dial": {},
90 "stagk": {},
91 "stagr": {},
92 "xref": {},
93 "ant": {},
94 "s_inf": {},
95 "example": {},
96 "lsource": {},
97 "gloss": {
98 "pri": {},
105 class Entry(object):
107 def __init__(self, data_d):
108 self._d = data_d
110 def __unicode__(self):
111 """Basic string representation of the entry."""
112 d = self._d
113 lines = []
115 k_eles = d.get("k_ele", [])
116 if len(k_eles) > 0:
117 lines.append(_(u"Kanji readings:"))
118 for k_ele_index, k_ele in enumerate(k_eles):
119 k_ele_index += 1
120 lines.append(_(u" Reading %d:") % k_ele_index)
121 lines.append(_(u" Blob: %s") % k_ele['keb'])
123 r_eles = d.get("r_ele", [])
124 if len(r_eles) > 0:
125 lines.append(_(u"Kana readings:"))
126 for r_ele_index, r_ele in enumerate(r_eles):
127 r_ele_index += 1
128 lines.append(_(u" Reading %d:") % r_ele_index)
129 lines.append(_(u" Blob: %s") % r_ele['reb'])
131 senses = d.get("sense", [])
132 if len(senses) > 0:
133 lines.append(_(u"Glosses:"))
134 for sense_index, sense in enumerate(senses):
135 sense_index += 1
136 lines.append(_(u" Sense %d:") % sense_index)
137 glosses = sense.get("gloss", {})
138 # Output glosses by language
139 for lang in sorted(glosses.keys()):
140 values = glosses[lang]
141 lines.append(_(u" Lang: %s") % lang)
142 for val_index, val_d in enumerate(values):
143 val_index += 1
144 val = val_d['value']
145 lines.append(_(u" Gloss %d: %s") % (val_index, val))
146 return u"\n".join(lines)
148 def __repr__(self):
149 return repr(self._d)
152 class Database(object):
154 """Top level object for SQLite 3-based JMdict database."""
156 def __init__(self, filename, init_from_file=None):
157 self.conn = sqlite3.connect(filename)
158 self.conn.row_factory = sqlite3.Row # keyword accessors for rows
159 self.cursor = self.conn.cursor()
160 self.tables = self._create_table_objects()
161 if init_from_file is not None:
162 raw_data = gzread(init_from_file)
164 entities = self._get_entities(raw_data)
165 infile = StringIO(raw_data)
166 etree = ElementTree(file=infile)
167 infile.close()
169 self._create_new_tables()
170 self._populate_database(etree, entities)
171 self.conn.commit()
173 def search(self, query, lang=None):
174 # Search
175 # Two main methods: to and from Japanese.
176 # 1. Guess which direction we're searching.
177 # 2. Search preferred method.
178 # 3. Search remaining method.
179 entries_from = self.search_from_japanese(query)
180 entries_to = self.search_to_japanese(query, lang=lang)
182 results = entries_from + entries_to
183 return results
185 def search_from_japanese(self, query):
186 # Japanese search locations:
187 # 1. Kanji elements
188 # 2. Reading elements
189 # 3. Any indices (none yet)
191 # Preferred orderings
192 # 1. Location of query in result
193 # 1. Exact match
194 # 2. Begins with
195 # 3. Anywhere
196 # 2. Ranking of usage (the (P) option in EDICT, for example)
198 # FOR NOW: just get the searching working.
199 # This puts us on roughly the same level as J-Ben 1.2.x.
200 encoding = get_encoding()
201 wrapped_query = "%%%s%%" % query # Wrap in wildcards
202 unicode_query = wrapped_query.decode(encoding)
204 entries_by_keb = self._search_keb(unicode_query)
205 entries_by_reb = self._search_reb(unicode_query)
206 #entries_by_indices = self._search_indices_from_ja(unicode_query)
208 # Merge results into one list and return.
209 results = []
210 for lst in (entries_by_keb, entries_by_reb):
211 for o in lst:
212 if o not in results:
213 results.append(o)
214 return results
216 def _search_keb(self, unicode_query):
217 """Searches kanji elements (Japanese readings with kanji).
219 Returns a list of entry IDs.
222 # keb: entry.id -> k_ele.fk, k_ele.value
223 query = "SELECT fk FROM k_ele WHERE value LIKE ?"
224 args = (unicode_query,)
225 self.cursor.execute(query, args)
226 rows = self.cursor.fetchall()
227 return [row[0] for row in rows]
229 def _search_reb(self, unicode_query):
230 """Searches reading elements (Japanese readings without kanji).
232 Returns a list of entry IDs.
235 # reb: entry.id -> r_ele.fk, r_ele.value
236 query = "SELECT fk FROM r_ele WHERE value LIKE ?"
237 args = (unicode_query,)
238 self.cursor.execute(query, args)
239 rows = self.cursor.fetchall()
240 return [row[0] for row in rows]
242 def _search_indices_from_ja(self, unicode_query):
243 raise NotImplementedError
245 def search_to_japanese(self, query, lang):
246 # Foreign language search locations:
247 # 1. Glosses
248 # 2. Any indices (none yet)
250 # For other considerations, see search_from_japanese().
251 encoding = get_encoding()
252 wrapped_query = "%%%s%%" % query # Wrap in wildcards
253 unicode_query = wrapped_query.decode(encoding)
255 entries_by_glosses = self._search_glosses(unicode_query, lang)
256 #entries_by_indices = self._search_indices_to_ja(unicode_query, lang)
258 # Merge results into one list and return.
259 results = []
260 for lst in (entries_by_glosses,):
261 for o in lst:
262 if o not in results:
263 results.append(o)
264 return results
267 def _search_glosses(self, unicode_query, lang):
268 """Searches foreign language glosses.
270 If lang is not None, only entries which match the lang
271 parameter are returned.
273 Returns a list of entry IDs.
276 # entry.id -> sense.fk, sense.id -> gloss.fk
277 if lang is not None:
278 query = (
279 "SELECT e.id FROM gloss g, sense s, entry e "
280 "WHERE g.lang = ? AND g.value LIKE ? "
281 "AND g.fk = s.id AND s.fk = e.id"
283 args = (lang, unicode_query)
284 else:
285 query = (
286 "SELECT e.id FROM gloss g, sense s, entry e "
287 "WHERE g.value LIKE ?"
289 args = (unicode_query,)
291 self.cursor.execute(query, args)
292 rows = self.cursor.fetchall()
293 return [row[0] for row in rows]
295 def _search_indices_to_ja(self, unicode_query, lang):
296 raise NotImplementedError
298 def query_db(self, *args, **kwargs):
299 """Helper. Wraps the execute/fetchall idiom on the DB cursor."""
300 self.cursor.execute(*args, **kwargs)
301 return self.cursor.fetchall()
303 def _convert_entities(self, entities):
304 """Expands a list of entities.
306 Returns a list of the entity expansions. The order of the
307 returned expansions matches the order of the input entities.
310 args = list(sorted(set(entities)))
311 template = ", ".join(["?"] * len(args))
312 query = "SELECT entity, expansion " \
313 "FROM entity WHERE entity IN (%s)" % template
314 rows = self.query_db(query, args)
315 d = {}
316 for entity, expansion in rows:
317 d[entity] = expansion
318 result = [d[entity] for entity in entities]
319 return result
321 # lookup entry by id
322 # lookup all other data by fk
324 def lookup(self, entry_id):
325 """Creates an entry object.
327 Returns a Record instance from the entry table, with all data
328 linked as children to this record.
331 # Lookup data in entry table.
332 rows = self.tables['entry'].lookup_by_id(entry_id)
333 data = rows[0] # only 1 row
334 # Lookup child data using the entry_id as a foreign key.
335 children = self._lookup_children(TABLE_MAP['entry'], data['id'])
336 record = Record(data, children)
337 return record
339 def _lookup_children(self, children_map, fk):
340 children = {}
341 for child_table in children_map:
342 grandchild_map = children_map[child_table]
343 children[child_table] = self._lookup_by_fk(
344 child_table, children_map[child_table], fk)
345 return children
347 def _lookup_by_fk(self, table_name, children_map, fk):
348 """Looks up data from a table and related 'child' tables.
350 table_name: name of the table to query.
351 children_map: a dictionary of child table mappings, or None if
352 no children are present.
353 fk: foreign key used in table query.
356 rows = self.tables[table_name].lookup_by_fk(fk)
357 results = []
358 for row in rows:
359 children = self._lookup_children(children_map, row['id'])
360 record = Record(row, children)
361 results.append(record)
362 return results
364 def _old_lookup(self):
365 # AT LEAST (for now...):
366 # 1. readings
367 # 1.1. k_ele
368 rows = self.query_db("SELECT id, value FROM k_ele WHERE fk = ?",
369 (entry_id,))
370 k_ele = []
371 for k_ele_id, keb in rows:
372 # ke_inf
373 query = "SELECT entity FROM ke_inf WHERE fk = ?"
374 args = (k_ele_id,)
375 rows = self.query_db(query, args)
376 ke_inf = self._convert_entities([row[0] for row in rows])
378 # ke_pri
379 query = "SELECT value FROM ke_pri WHERE fk = ?"
380 args = (k_ele_id,)
381 rows = self.query_db(query, args)
382 ke_pri = [row[0] for row in rows]
384 # merge results
385 k_ele_d = {}
386 k_ele_d['keb'] = keb
387 k_ele_d['ke_pri'] = ke_pri
388 k_ele_d['ke_inf'] = ke_inf
389 k_ele.append(k_ele_d)
390 result['k_ele'] = k_ele
391 # 1.2. r_ele
392 query = "SELECT id, value, nokanji FROM r_ele WHERE fk = ?"
393 args = (entry_id,)
394 rows = self.query_db(query, args)
395 r_ele = []
396 for r_ele_id, reb, nokanji in rows:
397 # re_restr
398 query = "SELECT value FROM re_restr WHERE fk = ?"
399 args = (r_ele_id,)
400 rows = self.query_db(query, args)
401 re_restr = [row[0] for row in rows]
403 # re_inf
404 query = "SELECT entity FROM re_inf WHERE fk = ?"
405 args = (r_ele_id,)
406 rows = self.query_db(query, args)
407 re_inf = self._convert_entities([row[0] for row in rows])
409 # re_pri
410 query = "SELECT value FROM re_pri WHERE fk = ?"
411 args = (r_ele_id,)
412 rows = self.query_db(query, args)
413 re_pri = [row[0] for row in rows]
415 # merge results
416 r_ele_d = {}
417 r_ele_d['reb'] = reb
418 r_ele_d['nokanji'] = nokanji
419 r_ele_d['re_restr'] = re_restr
420 r_ele_d['re_pri'] = re_pri
421 r_ele_d['re_inf'] = re_inf
422 r_ele.append(r_ele_d)
423 result['r_ele'] = r_ele
425 # 2. glosses
426 query = "SELECT id FROM sense WHERE fk = ?"
427 args = (entry_id,)
428 rows = self.query_db(query, args)
429 sense_ids = [row[0] for row in rows]
431 sense = []
432 for sense_id in sense_ids:
433 # gloss
434 query = "SELECT lang, value, g_gend, pri FROM gloss WHERE fk = ?"
435 args = (sense_id,)
436 rows = self.query_db(query, args)
437 gloss = {}
438 for lang, value, g_gend, pri in rows:
439 lst = gloss.setdefault(lang, [])
440 lst.append(
441 {"value": value, "g_gend": g_gend, "pri": pri})
442 sense_d = {}
443 sense_d['gloss'] = gloss
444 sense.append(sense_d)
445 result['sense'] = sense
447 return Entry(result)
449 def _create_table_objects(self):
450 """Creates table objects.
452 Returns a dictionary of table name to table object.
455 class_mappings = {
456 "entry": EntryTable, # key->int ID
457 "r_ele": REleTable, # key-value plus nokanji flag
458 "sense": SenseTable, # one-many group mapping for sense info
459 "audit": AuditTable, # key->(update_date, update_details)
460 "lsource": LSourceTable, # key -> lang, type=full/part, wasei=t/f
461 "gloss": GlossTable, # key -> lang, g_gend, value, pri flag
462 "links": LinksTable, # key -> tag, desc, uri
463 "bibl": BiblTable, # key -> tag, txt
464 "entity": EntityTable, # Info from JMdict XML entities
467 # Set up key/value and key/entity tables
468 kv_tables = [ # key-value tables (id -> text blob)
469 "k_ele",
470 "ke_pri",
471 "re_restr",
472 "re_pri",
473 "etym",
474 "stagk",
475 "stagr",
476 "xref", # (#PCDATA)* - why the *?
477 "ant", # (#PCDATA)* - why the *?
478 "s_inf",
479 "example",
480 "pri",
482 kv_entity_tables = [ # key-value tables where val == entity
483 "ke_inf",
484 "re_inf",
485 "dial",
486 "field",
487 "misc",
488 "pos",
490 for tbl in kv_tables:
491 class_mappings[tbl] = KeyValueTable
492 for tbl in kv_entity_tables:
493 class_mappings[tbl] = KeyEntityTable
495 # Create all table objects
496 table_mappings = {}
497 for tbl, cls in class_mappings.iteritems():
498 table_mappings[tbl] = cls(self.cursor, tbl)
500 return table_mappings
502 def _create_new_tables(self):
503 """(Re)creates the database tables."""
504 for tbl, tbl_obj in self.tables.iteritems():
505 self.cursor.execute("DROP TABLE IF EXISTS %s" % tbl)
506 tbl_obj.create()
508 def _populate_database(self, etree, entities):
509 """Imports XML data into SQLite database.
511 table_d: table to table_object dictionary
512 etree: ElementTree object for JMdict
513 entities: entity name to description dictionary
516 # NOTE: this is waaay too long. Should be broken up somehow.
517 # For now this will work though...
519 # Populate entities table and get integer keys
520 # NOTE: we'll be mapping from *expanded* entities to ints.
521 entity_int_d = {}
522 tbl = self.tables['entity']
523 for entity, expansion in entities.iteritems():
524 i = tbl.insert(entity, expansion)
525 entity_int_d[expansion] = i
527 # Iterate through each entry
528 for entry in etree.findall("entry"):
530 # entry table
531 ent_seq = entry.find("ent_seq")
532 entry_id = self.tables["entry"].insert(int(ent_seq.text))
534 for k_ele in entry.findall("k_ele"):
535 # k_ele
536 value = k_ele.find("keb").text
537 k_ele_id = self.tables["k_ele"].insert(entry_id, value)
539 # ke_inf
540 for ke_inf in k_ele.findall("ke_inf"):
541 value = ke_inf.text.strip()
542 entity_id = entity_int_d[value]
543 self.tables["ke_inf"].insert(k_ele_id, entity_id)
545 # ke_pri
546 for ke_pri in k_ele.findall("ke_pri"):
547 value = ke_pri.text
548 self.tables["ke_pri"].insert(k_ele_id, value)
550 for r_ele in entry.findall("r_ele"):
551 # r_ele
552 value = r_ele.find("reb").text
553 # For nokanji: currently it's an empty tag, so
554 # treating it as true/false.
555 nokanji = 1 if r_ele.find("nokanji") is not None else 0
556 r_ele_id = self.tables["r_ele"].insert(entry_id, value, nokanji)
558 # re_restr
559 for re_restr in r_ele.findall("re_restr"):
560 value = re_restr.text
561 self.tables["re_restr"].insert(r_ele_id, value)
563 # re_inf
564 for re_inf in r_ele.findall("re_inf"):
565 value = re_inf.text.strip()
566 entity_id = entity_int_d[value]
567 self.tables["re_inf"].insert(r_ele_id, entity_id)
569 # re_pri
570 for re_pri in r_ele.findall("re_pri"):
571 value = re_pri.text
572 self.tables["re_pri"].insert(r_ele_id, value)
574 # info
575 # (Although children of an info node, since there's only
576 # one per entry, let's connect directly to the entry.)
577 info = entry.find("info")
578 if info is not None:
579 for links in info.findall("links"):
580 link_tag = links.find("link_tag").text
581 link_desc = links.find("link_desc").text
582 link_uri = links.find("link_uri").text
583 self.tables["links"].insert(entry_id, link_tag, link_desc,
584 link_uri)
585 for bibl in info.findall("bibl"):
586 bib_tag = links.find("bib_tag")
587 bib_txt = links.find("bib_txt")
588 bib_tag = bib_tag.text if bib_tag is not None else None
589 bib_txt = bib_txt.text if bib_txt is not None else None
590 self.tables["bibl"].insert(entry_id, bib_tag, bib_txt)
591 for etym in info.findall("etym"):
592 self.tables["etym"].insert(entry_id, etym.text)
593 for audit in info.findall("audit"):
594 upd_date = audit.find("upd_date").text
595 upd_detl = audit.find("upd_detl").text
596 self.tables["audit"].insert(entry_id, upd_date, upd_detl)
598 # sense
599 key_entity_tables = ["pos", "field", "misc", "dial"]
600 key_value_tables = ["stagk", "stagr", "xref", "ant", "s_inf", "example"]
602 for sense in entry.findall("sense"):
603 # Each sense gets its own ID, for grouping purposes
604 sense_id = self.tables["sense"].insert(entry_id)
606 for elem_name in key_value_tables:
607 for element in sense.findall(elem_name):
608 self.tables[elem_name].insert(sense_id, element.text)
610 for elem_name in key_entity_tables:
611 for element in sense.findall(elem_name):
612 entity_id = entity_int_d[element.text.strip()]
613 self.tables[elem_name].insert(sense_id, entity_id)
615 for lsource in sense.findall("lsource"):
616 lang = lsource.get(XML_LANG, "eng")
617 ls_type = lsource.get("ls_type") # implied "full" if absent, "part" otherwise
618 ls_wasei = lsource.get("ls_wasei") # usually "y"... just a flag.
620 partial = 1 if ls_type is not None else 0
621 if ls_wasei is None:
622 wasei = 0
623 elif ls_wasei == "y":
624 wasei = 1
625 else:
626 raise ValueError(
627 'Only known valid ls_wasei attribute value '
628 'is "y", found:', ls_wasei.text)
630 self.tables["lsource"].insert(sense_id,
631 lang, partial, wasei)
632 for gloss in sense.findall("gloss"):
633 lang = gloss.get(XML_LANG, "eng")
634 g_gend = gloss.get("g_gend")
635 pri_list = gloss.getchildren()
636 if len(pri_list) > 1:
637 gloss_id = self.tables['gloss'].insert(
638 sense_id, lang, g_gend, gloss.text, 1)
639 for pri in pri_list:
640 self.tables['pri'].insert(gloss_id, pri.text)
641 else:
642 self.tables['gloss'].insert(sense_id, lang, g_gend,
643 gloss.text, 0)
645 def _get_entities(self, xml_data):
646 """Gets the ENTITY definitions from JMdict.
648 Finds the built-in DTD and extracts all ENTITY definitions.
651 dtd = self._get_dtd(xml_data)
652 # do some logic to find all entities...
653 entities = {}
654 regex = '<!ENTITY[ ]+([a-zA-Z0-9-]+)[ ]+"(.*?)">'
655 for match in re.finditer(regex, xml_data):
656 key, value = match.groups()[0:2]
657 entities[key] = value
658 return entities
660 def _get_dtd(self, xml_data):
661 """Gets the DTD from JMdict."""
662 # This works for JMdict (as it is at the time of writing), but is
663 # not a general solution.
664 start_index = xml_data.find("<!DOCTYPE")
665 if start_index == -1:
666 raise Exception("Could not find start of internal DTD")
667 end_index = xml_data.find("]>")
668 if end_index == -1:
669 raise Exception("Could not find end ofinternal DTD")
670 end_index += 2
671 dtd = xml_data[start_index:end_index]
672 return dtd
675 class EntryTable(Table):
676 create_query = ("CREATE TABLE %s "
677 "(id INTEGER PRIMARY KEY, ent_seq INTEGER)")
678 insert_query = "INSERT INTO %s VALUES (NULL, ?)"
679 index_queries = [
680 "CREATE INDEX %s_seq ON %s (ent_seq)",
684 class KeyEntityTable(KeyValueTable):
685 """Just like a KeyValueTable, but with 'entity' instead of 'value'."""
686 create_query = ("CREATE TABLE %s "
687 "(id INTEGER PRIMARY KEY, fk INTEGER, entity INTEGER)")
690 class REleTable(Table):
691 create_query = ("CREATE TABLE %s "
692 "(id INTEGER PRIMARY KEY, fk INTEGER,"
693 " value TEXT, nokanji INTEGER)")
694 insert_query = "INSERT INTO %s VALUES (NULL, ?, ?, ?)"
695 index_queries = [
696 "CREATE INDEX %s_fk ON %s (fk)",
700 class SenseTable(Table):
701 """Corresponds to <sense> tag. Functions as group for glosses, etc."""
702 create_query = ("CREATE TABLE %s (id INTEGER PRIMARY KEY, fk INTEGER)")
703 insert_query = "INSERT INTO %s VALUES (NULL, ?)"
704 index_queries = [
705 "CREATE INDEX %s_fk ON %s (fk)",
709 class AuditTable(Table):
710 create_query = ("CREATE TABLE %s "
711 "(id INTEGER PRIMARY KEY, fk INTEGER,"
712 " update_date TEXT, update_details TEXT)")
713 insert_query = "INSERT INTO %s VALUES (NULL, ?, ?, ?)"
714 index_queries = [
715 "CREATE INDEX %s_fk ON %s (fk)",
719 class LSourceTable(Table):
720 """Represents the <lsource> element from JMdict.
722 Important changes:
723 ls_type=full/part => partial=1/0
724 ls_wasei=y/null => wasei=1/0
727 create_query = ("CREATE TABLE %s "
728 "(id INTEGER PRIMARY KEY, fk INTEGER,"
729 " lang TEXT, partial INTEGER, wasei INTEGER)")
730 insert_query = "INSERT INTO %s VALUES (NULL, ?, ?, ?, ?)"
731 index_queries = [
732 "CREATE INDEX %s_fk ON %s (fk)",
736 class GlossTable(Table):
737 create_query = ("CREATE TABLE %s "
738 "(id INTEGER PRIMARY KEY, fk INTEGER,"
739 " lang TEXT, g_gend TEXT, value TEXT, pri INTEGER)")
740 insert_query = "INSERT INTO %s VALUES (NULL, ?, ?, ?, ?, ?)"
741 index_queries = [
742 "CREATE INDEX %s_fk ON %s (fk)",
743 "CREATE INDEX %s_lang ON %s (lang)",
744 "CREATE INDEX %s_value ON %s (value)",
748 class LinksTable(Table):
749 create_query = ("CREATE TABLE %s "
750 "(id INTEGER PRIMARY KEY, fk INTEGER,"
751 " tag TEXT, desc TEXT, uri TEXT)")
752 insert_query = "INSERT INTO %s VALUES (NULL, ?, ?, ?, ?)"
753 index_queries = [
754 "CREATE INDEX %s_fk ON %s (fk)",
758 class BiblTable(Table):
759 create_query = ("CREATE TABLE %s "
760 "(id INTEGER PRIMARY KEY, fk INTEGER,"
761 " tag TEXT, txt TEXT)")
762 insert_query = "INSERT INTO %s VALUES (NULL, ?, ?, ?, ?)"
763 index_queries = [
764 "CREATE INDEX %s_fk ON %s (fk)",
768 class EntityTable(Table):
769 create_query = ("CREATE TABLE %s "
770 "(id INTEGER PRIMARY KEY, entity TEXT, expansion TEXT)")
771 insert_query = "INSERT INTO %s VALUES (NULL, ?, ?)"
774 ######################################################################
776 def parse_args():
777 from optparse import OptionParser
778 op = OptionParser(usage="%prog [options] <db_filename> [search_query]")
779 op.add_option("-i", "--initialize",
780 dest="init_fname", metavar="XML_SOURCE",
781 help=_("Initialize database from file."))
782 op.add_option("-L", "--lang",
783 help=_("Specify preferred language for searching."))
784 options, args = op.parse_args()
785 if len(args) < 1:
786 op.print_help()
787 exit(-1)
788 return (options, args)
790 def main():
791 # Copied *almost* verbatim from kd2.py.
792 options, args = parse_args()
793 db_fname = args[0]
795 if options.init_fname is not None:
796 db = Database(db_fname, init_from_file=options.init_fname)
797 else:
798 db = Database(db_fname)
800 results = []
801 if len(args) > 1:
802 # Do search
803 # To be nice, we'll join all remaining args with spaces.
804 search_query = " ".join(args[1:])
805 if options.lang is not None:
806 results = db.search(search_query, lang=options.lang)
807 else:
808 results = db.search(search_query)
810 if len(results) > 0:
811 from pprint import pprint
812 encoding = get_encoding()
813 for index, result in enumerate(results):
814 index += 1
815 print(_("[Entry %d]") % index)
816 entry = db.lookup(result)
817 try:
818 print(unicode(entry).encode(encoding))
819 except:
820 print(repr(entry))
821 print()
822 else:
823 print(_("No results found."))
825 if __name__ == "__main__":
826 main()