1 from __future__
import print_function
2 from __future__
import with_statement
4 import os
, sys
, re
, sqlite3
5 from cStringIO
import StringIO
6 from xml
.etree
.cElementTree
import ElementTree
7 from helpers
import gzread
10 #t = gettext.translation("jblite")
12 gettext
.install("jblite")
20 class Database(object):
22 def __init__(self
, filename
, init_from_file
=None):
23 self
.conn
= sqlite3
.connect(filename
)
24 self
.cursor
= self
.conn
.cursor()
25 if init_from_file
is not None:
26 self
._reset
_database
()
27 self
._init
_from
_file
(init_from_file
)
29 def search(self
, query
, pref_lang
=None):
30 raise NotImplementedError()
32 def _reset_database(self
):
34 "entry": EntryTable
, # key->int ID
35 "r_ele": REleTable
, # key-value plus nokanji flag
36 "audit": AuditTable
, # key->(update_date, update_details)
37 "lsource": LSourceTable
, # key -> lang, type=full/part, wasei=t/f
38 "gloss": GlossTable
, # key -> lang, g_gend, value, pri flag
39 "links": LinksTable
, # key -> tag, desc, uri
40 "bibl": BiblTable
, # key -> tag, txt
41 #"etym", # not used yet
42 "entities": EntityTable
, # Info from JMdict XML entities
44 kv_tables
= [ # key-value tables (id -> text blob)
57 kv_entity_tables
= [ # key-value tables where val == entity
66 # Drop any existing tables
67 all_tables
= other_tables
.keys() + kv_tables
+ kv_entity_tables
68 for tbl
in all_tables
:
69 self
.cursor
.execute("DROP TABLE IF EXISTS %s" % tbl
)
71 # Create mappings of table name to class
72 class_mappings
= other_tables
74 class_mappings
[tbl
] = KeyValueTable
75 for tbl
in kv_entity_tables
:
76 class_mappings
[tbl
] = KeyEntityTable
78 # Create all table objects
80 for tbl
, cls
in class_mappings
.iteritems():
81 table_mappings
[tbl
] = cls(self
.cursor
, tbl
)
83 # Create all tables in DB
84 for tbl_obj
in table_mappings
.itervalues():
87 def _init_from_file(self
, jmdict_src
):
88 raw_data
= gzread(jmdict_src
)
89 entities
= self
._get
_entities
(raw_data
)
91 infile
= StringIO(raw_data
)
92 etree
= ElementTree(file=infile
)
95 self
._process
_etree
(etree
, entities
)
97 def _get_entities(self
, xml_data
):
98 """Gets the ENTITY definitions from JMdict.
100 Finds the built-in DTD and extracts all ENTITY definitions.
103 dtd
= self
._get
_dtd
(xml_data
)
104 # do some logic to find all entities...
106 regex
= "<!ENTITY[ ]+([a-zA-Z-]+)[ ]+['\"](.*?)['\"]>"
107 for match
in re
.finditer(regex
, xml_data
):
108 key
, value
= match
.groups()[0:2]
109 key
= "&%s;" % key
# Convert to &entity; format
110 entities
[key
] = value
113 def _get_dtd(self
, xml_data
):
114 """Gets the DTD from JMdict."""
115 # This works for JMdict (as it is at the time of writing), but is
116 # not a general solution.
117 start_index
= xml_data
.find("<!DOCTYPE")
118 if start_index
== -1:
119 raise Exception("Could not find start of internal DTD")
120 end_index
= xml_data
.find("]>")
122 raise Exception("Could not find end ofinternal DTD")
124 dtd
= xml_data
[start_index
:end_index
]
127 def _process_etree(self
, etree
, entities
):
128 for i
, elem
in enumerate(etree
.getiterator("entry")):
138 1. Need objects from DB
139 2. Need to parse XML and:
140 1. Directly create DB
141 2. Create entry objects, and create DB from them.
142 - Strength: allows opportunity for modifying database in the future.
143 - Weakness: Need two ways to create objects: from DB and from
144 SQL. This complicates things...
151 """Base class for tables."""
153 # These queries must be specified in child classes.
158 def __init__(self
, cursor
, table_name
):
161 self
.table_name
= table_name
164 """Creates table, plus indices if supplied in class definition."""
165 self
.cursor
.execute(self
._get
_create
_query
())
166 index_queries
= self
._get
_index
_queries
()
167 for query
in index_queries
:
168 self
.cursor
.execute(query
)
170 def insert(self
, *args
):
171 self
.cursor
.execute(self
._get
_insert
_query
(), args
)
173 def _get_create_query(self
):
174 if self
.table_name
is None:
176 "table_name must be specified in class definition")
177 return self
.create_query
% self
.table_name
179 def _get_insert_query(self
):
180 if self
.table_name
is None:
182 "table_name must be specified in class definition")
183 return self
.insert_query
% self
.table_name
185 def _get_index_queries(self
):
186 if self
.table_name
is None:
188 "table_name must be specified in class definition")
189 if (not (isinstance(self
.index_queries
, list))
190 or (len(self
.index_queries
) == 0)):
193 # Each query needs to have the table name merged in two
195 queries
= [q
% (self
.table_name
, self
.table_name
)
196 for q
in self
.index_queries
]
200 class EntryTable(Table
):
201 create_query
= ("CREATE TABLE %s "
202 "(id INTEGER PRIMARY KEY, ent_seq INTEGER)")
203 insert_query
= "INSERT INTO %s VALUES (NULL, ?)"
205 "CREATE INDEX %s_seq ON %s (ent_seq)",
209 class KeyValueTable(Table
):
210 """General key/value table for one-many relations."""
211 create_query
= ("CREATE TABLE %s "
212 "(id INTEGER PRIMARY KEY, fk INTEGER, value TEXT)")
213 insert_query
= "INSERT INTO %s VALUES (NULL, ?, ?)"
215 "CREATE INDEX %s_fk ON %s (fk)",
219 class KeyEntityTable(KeyValueTable
):
220 """Just like a KeyValueTable, but with 'entity' instead of 'value'."""
221 create_query
= ("CREATE TABLE %s "
222 "(id INTEGER PRIMARY KEY, fk INTEGER, entity INTEGER)")
225 class REleTable(Table
):
226 create_query
= ("CREATE TABLE %s "
227 "(id INTEGER PRIMARY KEY, fk INTEGER,"
228 " value TEXT, nokanji INTEGER)")
229 insert_query
= "INSERT INTO %s VALUES (NULL, ?, ?, ?)"
231 "CREATE INDEX %s_fk ON %s (fk)",
235 class AuditTable(Table
):
236 create_query
= ("CREATE TABLE %s "
237 "(id INTEGER PRIMARY KEY, fk INTEGER,"
238 " update_date TEXT, update_details TEXT)")
239 insert_query
= "INSERT INTO %s VALUES (NULL, ?, ?, ?)"
241 "CREATE INDEX %s_fk ON %s (fk)",
245 class LSourceTable(Table
):
246 """Represents the <lsource> element from JMdict.
249 ls_type=full/part => partial=1/0
252 create_query
= ("CREATE TABLE %s "
253 "(id INTEGER PRIMARY KEY, fk INTEGER,"
254 " lang TEXT, partial INTEGER, wasei INTEGER)")
255 insert_query
= "INSERT INTO %s VALUES (NULL, ?, ?, ?, ?)"
257 "CREATE INDEX %s_fk ON %s (fk)",
261 class GlossTable(Table
):
262 create_query
= ("CREATE TABLE %s "
263 "(id INTEGER PRIMARY KEY, fk INTEGER,"
264 " lang TEXT, g_gend TEXT, value TEXT, pri INTEGER)")
265 insert_query
= "INSERT INTO %s VALUES (NULL, ?, ?, ?, ?, ?)"
267 "CREATE INDEX %s_fk ON %s (fk)",
268 "CREATE INDEX %s_lang ON %s (lang)",
269 "CREATE INDEX %s_value ON %s (value)",
273 class LinksTable(Table
):
274 create_query
= ("CREATE TABLE %s "
275 "(id INTEGER PRIMARY KEY, fk INTEGER,"
276 " tag TEXT, desc TEXT, uri TEXT)")
277 insert_query
= "INSERT INTO %s VALUES (NULL, ?, ?, ?, ?)"
279 "CREATE INDEX %s_fk ON %s (fk)",
283 class BiblTable(Table
):
284 create_query
= ("CREATE TABLE %s "
285 "(id INTEGER PRIMARY KEY, fk INTEGER,"
286 " tag TEXT, txt TEXT)")
287 insert_query
= "INSERT INTO %s VALUES (NULL, ?, ?, ?, ?)"
289 "CREATE INDEX %s_fk ON %s (fk)",
293 class EntityTable(Table
):
294 table_name
= "entities"
295 create_query
= ("CREATE TABLE %s "
296 "(id INTEGER PRIMARY KEY, entity TEXT, expansion TEXT)")
297 insert_query
= "INSERT INTO %s VALUES (NULL, ?, ?)"
299 "CREATE INDEX %s_entity ON %s (entity)",
304 if len(sys
.argv
) < 3:
305 print(_("Please specify"), file=sys
.stderr
)
306 db
= Database(sys
.argv
[1], init_from_file
=sys
.argv
[2])
308 if __name__
== "__main__":