3 from __future__
import with_statement
4 import random
, re
, os
, datetime
, hashlib
, thread
8 raise ImportError("MYSQL ERROR: "+str(e
))
11 from limoutil
import log
13 warnings
.filterwarnings('ignore', '^.*exists$') # ignore the warnings generated by 'create X if not exists' statements
16 """ The Model object is a way to create database schema incrementally.
18 The goal is to allow disparate modules to require different tables, or different columns on existing tables.
20 1) Create the test database.
22 Clear the test database forcefully.
24 >>> q=Query("drop database test; create database test; use test", dsn='test')
26 2) Define the database schema.
28 .database() - Ensures the database exists.
29 If there was no call to .database() is made in a sequence, then the db from the dsn passed to the Model() constructor is assumed for future operations.
30 .table() - Ensures the table exists.
32 .col() - Ensures that the column exists.
34 .data() - Ensures that some default data is in place.
36 The execute() on the end will build only the pieces that are missing.
38 >>> start = datetime.datetime.now()
39 >>> s=Model(dsn="test")\
42 .col("uid int primary key auto_increment")\
43 .col("name text not null")\
44 .data([1, 'anonymous'])\
45 .data([2, 'jldailey'])\
47 .col("gid int primary key auto_increment")\
48 .col("name text not null")\
51 .col("rid int primary key auto_increment")\
52 .col("name text not null")\
54 .table("group_roles")\
55 .col("gid int not null")\
56 .col("rid int not null")\
57 .col("primary key (gid, rid)")\
59 .table("group_users")\
60 .col("gid int not null")\
61 .col("uid int not null")\
64 .col("uid int not null")\
65 .col("rid int not null")\
68 3) Extending the existing model
70 The reason to use the schema object (rather than directly modifying the db) is that you can come along later
71 and extend the existing model without clobbering (or knowing much about) what already exists.
73 So this example adds 2 fields to the users table.
75 >>> s=Model(dsn="test")\
78 .col("email varchar(128)")\
79 .col("passwd varchar(32) not null default ''")\
82 >>> Query("show create table test.users", dsn="test")[0][1]
83 u"CREATE TABLE `users` (\\n `uid` int(11) NOT NULL AUTO_INCREMENT,\\n `name` text NOT NULL,\\n `email` varchar(128) DEFAULT NULL,\\n `passwd` varchar(32) NOT NULL DEFAULT '',\\n PRIMARY KEY (`uid`)\\n) ENGINE=MyISAM AUTO_INCREMENT=3 DEFAULT CHARSET=latin1"
85 If code in 2 separate places requires the same schema, it has no effect.
87 So, adding the email column again here has no effect.
89 Think of the column definitions as column requirements, ensuring that the column exists as defined.
91 >>> s=Model(dsn="test")\
94 .col("email varchar(128)")\
95 .col("unique index uq_email (email)")\
96 .col("index ix_email_uid (email, uid)")\
98 >>> Query("show create table test.users", dsn="test")[0][1]
99 u"CREATE TABLE `users` (\\n `uid` int(11) NOT NULL AUTO_INCREMENT,\\n `name` text NOT NULL,\\n `email` varchar(128) DEFAULT NULL,\\n `passwd` varchar(32) NOT NULL DEFAULT '',\\n PRIMARY KEY (`uid`),\\n UNIQUE KEY `uq_email` (`email`),\\n KEY `ix_email_uid` (`email`,`uid`)\\n) ENGINE=MyISAM AUTO_INCREMENT=3 DEFAULT CHARSET=latin1"
101 >>> s=Model(dsn="test").defaultTable("pages")\
102 .column("pid int primary key auto_increment")\
103 .column("url varchar(256) not null")\
104 .column("name varchar(256) not null")\
106 >>> s=Model(dsn="test").defaultTable("pages")\
107 .column("pid int primary key auto_increment")\
108 .column("url varchar(256) not null")\
109 .column("name varchar(256) not null")\
113 4) You can forcefully modify an existing column definition by specifying a version on the column.
115 Also in this example you can see that passing the database() explicitly is not required, it can be inferred from the dsn passed to the Model() constructor.
117 >>> s=Model(dsn="test")\
119 .col("email varchar(256)", version=1)\
121 >>> Query("show create table test.users", dsn="test")[0][1]
122 u"CREATE TABLE `users` (\\n `uid` int(11) NOT NULL AUTO_INCREMENT,\\n `name` text NOT NULL,\\n `email` varchar(256) DEFAULT NULL,\\n `passwd` varchar(32) NOT NULL DEFAULT '',\\n PRIMARY KEY (`uid`),\\n UNIQUE KEY `uq_email` (`email`),\\n KEY `ix_email_uid` (`email`,`uid`)\\n) ENGINE=MyISAM AUTO_INCREMENT=3 DEFAULT CHARSET=latin1"
124 >>> s=Model(dsn="test")\
126 .col("name varchar(64) not null", version=1)\
127 .col("unique index (name)")\
129 >>> Query("show create table test.users", dsn="test")[0][1]
130 u"CREATE TABLE `users` (\\n `uid` int(11) NOT NULL AUTO_INCREMENT,\\n `name` varchar(64) NOT NULL,\\n `email` varchar(256) DEFAULT NULL,\\n `passwd` varchar(32) NOT NULL DEFAULT '',\\n PRIMARY KEY (`uid`),\\n UNIQUE KEY `uq_name` (`name`),\\n UNIQUE KEY `uq_email` (`email`),\\n KEY `ix_email_uid` (`email`,`uid`)\\n) ENGINE=MyISAM AUTO_INCREMENT=3 DEFAULT CHARSET=latin1"
132 This test always fails.
134 >>> _ms_elapsed(start)
141 def __init__(self
, dsn
="default", skipExecute
=False):
147 self
._table
_opts
= {}
153 # only update the tables the first time
154 if not self
._init
.get(self
.__class
__.__name
__, False):
157 self
._init
[self
.__class
__.__name
__] = True
161 .table("schema_history")\
162 .col("`table_name` varchar(128) NOT NULL")\
163 .col("`column_name` varchar(128) NOT NULL")\
164 .col("`column_definition` text NOT NULL")\
165 .col("`version` int(11) NOT NULL")\
166 .col("`entered` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP")\
168 def defaultDatabase(self
, name
):
169 """ Require that a database exists, creating if necessary. """
170 if self
.db
.get(name
, None) is None:
174 def database(self
, name
):
175 """ Alias for defaultDatabase() """
176 return self
.defaultDatabase(name
)
178 def defaultTable(self
, name
,options
=None):
179 """ Require that a certain table be present. Uses the most recent call to .requireDatabase to get the context for the table.
180 If there is no most recent call to .requireDatabase, parse the database from the datasource definition.
181 Subsequent calls to defaultColumn() can be used to ensure the table's content.
186 (prot
, user
, password
, host
, port
, db
) = re
.findall(Query
._db
_cfg
_regex
, Query
._cfg
[self
.dsn
])[0]
188 raise Exception("no default database found: dsn: %s cfg: %s -> %s" % ( str(self
.dsn
), str(Query
._cfg
[self
.dsn
]), str(e
)))
189 self
.defaultDatabase(db
)
190 if self
.db
[self
._db
].get(name
, None) is None:
191 self
.db
[self
._db
][name
] = {}
193 if options
is not None:
194 self
._table
_opts
[name
] = options
196 if self
._order
.get(key
, None) is None:
197 self
._order
[key
] = []
198 self
._order
[key
].append(self
._table
)
199 self
.log("Requiring table %s: [%s] = %s" % (name
, key
, name
))
201 def table(self
, name
,options
=None):
202 """ Alias for defaultTable() """
203 return self
.defaultTable(name
,options
)
205 def defaultColumn(self
, definition
, version
=0, skipNormalize
=False):
207 Require that a certain column be present.
208 Uses the most recent call to .requireDatabase and .requireTable to get the context for where this column should exist.
210 if not skipNormalize
:
211 definition
= self
._normalize
_column
_definition
(definition
)
212 # after a column definition is normalized, it might actually turn out to be more than one column (a data column, and an index)
213 if type(definition
) == list:
214 # if so, recursively add the column and then the index
215 self
.log("Column requirement is a list: %s" % ( str(definition
) ) )
217 self
.defaultColumn(d
.lower(), version
=version
, skipNormalize
=True)
219 definition
= definition
.lower()
220 name
= _getNameFromDefn(definition
)
221 # self.log("NAME: %s FROM: %s" % (name, definition))
223 if self
.db
[self
._db
][self
._table
].get(name
, None) is None:
224 self
.db
[self
._db
][self
._table
][name
] = definition
225 key
= self
._db
+self
._table
227 self
.log("Saving _version argument: [%s] = %s" % (key
+name
, version
))
228 self
._version
[key
+name
] = version
229 if self
._order
.get(key
, None) is None:
230 self
._order
[key
] = []
231 # self.log("Appending column %s to key %s (from definition: %s)" % (name, key, definition))
232 self
._order
[key
].append((name
,definition
))
233 self
.log("Requiring column %s: [%s] = %s" % (name
, key
, definition
))
235 def column(self
, d
, version
=0): return self
.defaultColumn(d
,version
=version
)
236 def col(self
, d
, version
=0): return self
.defaultColumn(d
,version
=version
)
237 def c(self
, d
, version
=0): return self
.defaultColumn(d
,version
=version
)
239 def defaultData(self
, row
):
240 key
= self
._db
+self
._table
241 if self
._data
.get(key
, None) is None:
242 self
._data
[key
] = [row
]
244 self
._data
[key
].append(row
)
246 def data(self
, row
): return self
.defaultData(row
)
252 with
open("db.log", "a+") as f
:
256 for db
, tables
in self
.db
.items():
257 FastQuery("create database if not exists %s" % db
, dsn
=self
.dsn
)
258 # pull out all the tables in the order they were specified
259 for table_name
in self
._order
[db
]:
260 columns
= self
.db
[db
][table_name
]
261 FastQuery("create table if not exists %s.%s ( __filler__ int null ) %s" %
262 (db
, table_name
, self
._table
_opts
.get(table_name
,'')), dsn
=self
.dsn
)
263 table
= _TableInfo(db
+"."+table_name
, dsn
=self
.dsn
)
264 assert len(table
.lines
.keys()) > 0
267 # pull out all the column in the order they were specified
268 for (column_name
, column_definition
) in self
._order
.get(key
, []):
269 # self.log("EXECUTING: '%s' from %s" % (column_name, column_definition))
270 if table
.names
.get(unicode(column_name
), False): # if the table already has this column
271 if table
.names
[unicode(column_name
)] == column_definition
: # if the existing column is identical
273 # self.log("COLUMN ALREADY EXISTS: %s as %s" % (column_name, column_definition))
275 # retrieve the version specified for this column update
276 version
= self
._version
.get(db
+table_name
+column_name
, None)
277 self
.log('DETECTED VERSION CHANGE [%s] \'%s\' :: %s != %s (version:%s)' % (table_name
+"."+column_name
, unicode(column_name
), table
.names
[unicode(column_name
)], column_definition
, version
))
278 # if none was specified then we cant do versioning, so bail
279 if version
is None or version
== 0:
280 e
= ModelConflictError("%s already defined as %s, and no new version number was specified" % (column_definition
, table
.names
[unicode(column_name
)]))
281 # self.log('SCHEMA CONFLICT ERROR: %s' % str(e))
283 # get the current version of the column as it is on the table
284 current_definition
= _TableInfo(db
+"."+table_name
, dsn
=self
.dsn
).getName('primary key')
285 self
.log("CURRENT DEFINITION: %s" % current_definition
)
286 current_version
= Query("select coalesce(max(version),0) from schema_history where table_name = '%s' and column_name = '%s' and column_definition = '%s'" % (table_name
, column_name
, current_definition
), dsn
=self
.dsn
)[0][0]
287 current_version
= int(current_version
)
288 # if the specified version is newer
289 if version
> current_version
:
291 # TODO: save version 0
292 FastQuery("replace into schema_history (table_name, column_name, column_definition, version) values ('%s', '%s', '%s', %d)" \
293 % (table_name
, column_name
, safe(column_definition
), version
), dsn
=self
.dsn
)
294 # and then modify the existing column
295 if column_definition
.startswith("primary key"):
296 FastQuery("alter table %s.%s drop primary key" % (db
, table_name
))
297 FastQuery("alter table %s.%s add %s" % (db
, table_name
, column_definition
))
299 FastQuery("alter table %s.%s modify %s" % (db
, table_name
, column_definition
), dsn
=self
.dsn
)
301 # skip obsolete column definition
304 # self.log("COLUMN DID NOT PREVIOUSLY EXIST: %s in %s" % (column_definition, str(table.names.keys())))
305 # the column didnt exist so just add it
306 # HACK: to fix bug with primary key columns
307 if skipPKLater
and column_definition
.find("primary key") > -1:
310 if column_definition
.find("auto_increment") > -1:
311 column_definition
= column_definition
.replace("auto_increment", "auto_increment primary key")
314 FastQuery("alter table %s.%s add %s" % (db
, table_name
, column_definition
), dsn
=self
.dsn
)
316 self
.log("FAILED: %s (sql: %s)" % ( str(e
), "alter table %s.%s add %s" % (db
, table_name
, column_definition
)))
318 if table
.names
.get("__filler__", False):
319 FastQuery("alter table %s.%s drop __filler__" % (db
, table_name
))
320 if self
._data
.get(key
, None) is not None:
321 data
= self
._data
[key
]
322 self
.log("Key: %s has required data: %s" % (key
, str(data
)))
323 table
= _TableInfo(db
+"."+table_name
, dsn
=self
.dsn
) # reload the table definition
325 cols
= ', '.join([c
for c
in table
.order
[0:len(row
)] if c
!= 'primary key'])
326 for r
in range(len(row
)):
329 values
= ', '.join(["'%s'"%s for s
in row
])
330 FastQuery("insert ignore into %s ( %s ) values ( %s )" % (table_name
, cols
, values
), dsn
=self
.dsn
)
333 def _normalize_column_definition(self
, defn
):
336 This creates an empty 'probe' table in the database with just the necessary columns.
337 Then calls 'show table' to get the server to produce the standard version of that sql.
340 >>> Model()._normalize_column_definition('email varchar(32)')
341 u'`email` varchar(32) default null'
343 If a single line column definition actually results in more than one column on the resulting table, a list of columns is returned.
345 >>> Model()._normalize_column_definition('uid int primary key auto_increment')
346 [u'`uid` int(11) not null auto_increment', u'primary key (`uid`)']
348 >>> Model()._normalize_column_definition('primary key (id, name)')
349 u'primary key (`id`,`name`)'
351 >>> Model()._normalize_column_definition('unique index (vid, url(32), sid)')
352 u'unique key `uq_vid_url_sid` (`vid`,`url`(32),`sid`)'
355 # self.log("_normalize_column_definition(%s)" % defn)
356 t
= 'probe_table_%d' % random
.randrange(0,9999999)
359 columns
= [ x
.strip() for x
in defn
[defn
.find('(')+1:defn
.rfind(')')].split(',') ]
363 is_compound_pk
= (re
.search('\S\sprimary key', defn
) is not None)
364 if re
.search("(?:index|key)\s*\(", defn
) is not None:
366 if defn
.find("unique") > -1:
368 name
= prefix
+ "_".join([ re
.sub("\(\d+\)","",x
) for x
in columns
]).replace("`","").replace("'","")
369 defn
= re
.sub("(?:index|key)", "key `%s`" % name
, defn
)
370 # if its a simple index column, then the probe table needs the indexed columns to be built
371 if _match_any(defn
, ('unique ', 'key ', 'index ', 'fulltext ')) and not is_compound_pk
:
372 if _match_any(defn
, ('fulltext',)):
377 col
= col
.replace(" ","")
378 # if the column being indexed specified a prefix, then it must refer to a varchar field that is long enough
382 close
= col
.rfind(')')
383 # build the new type with a long enough varchar
384 type = "varchar(%d)" % (int(col
[open+1:close
]) + 1)
385 # set the column name to be the name without the prefix
387 # prepend this to the existing list of columns
388 defn
= "%s %s, %s "%(col
, type, defn
)
390 # create the probe table
391 Query("create table %s ( %s )" % (t
, defn
))
392 # read back the table definition
393 table
= _TableInfo(t
)
394 Query("drop table %s" % t
)
395 # pull out just the column text
396 for name
in table
.order
:
397 line
= table
.names
[name
]
398 if name
not in columns
: # dont return columns that were generated to satisfy an index
400 # self.log("ADDING COLUMN: %s from %s not in %s" % (name, line, str(columns)))
402 # self.log("INGORING COLUMN: %s from %s in %s" % (name, line, str(columns)))
404 ret
= [r
.strip().lower() for r
in ret
]
406 ret
= ret
[-1].strip().lower()
411 def int_uid(from_string
):
412 return Model
.uid(from_string
).__hash
__()
414 def uid(from_string
):
415 return hashlib
.sha1(from_string
).hexdigest()
419 A good non-dbapi db system should:
421 Gets its configuration from a db.cfg file, no connect params in code
424 default="mysql://user:password@localhost/db1"
425 foo="mysql://user:password@localhost/foo"
427 1) Allow direct, named, query access:
429 >>> result = Query("select * from users where 1 = 0",dsn="test")
431 <class '__main__.Query'>
435 2) Support iteration:
437 >>> for row in Query("select * from users",dsn="test"):
438 ... print "uid: %d name: %s rowIndex: %d" % (int(row.uid), row.name, row.rowIndex)
439 uid: 1 name: anonymous rowIndex: 0
440 uid: 2 name: jldailey rowIndex: 1
442 3) Allow indexed access by columns in a wide variety of ways:
444 >>> result = Query("select uid, name from users",dsn="test")
447 >>> result[0]['name']
450 3a) Using the name as the first indexor creates a projection of the data, and further indexers pull rows from that projection
452 >>> result['name'][0]
457 3b) Using the name as an attribute selects a single column
459 >>> result[1]['name']
464 3c) You can see any individual Row object in its entirety
467 <class '__main__.Row'>
469 3d) You can pull out multi-dimensional projections of the data using comma-separated string indexes
471 >>> result[0]['uid, name']
474 >>> result[0:2]['uid, name']
475 [[1L, u'anonymous'], [2L, u'jldailey']]
480 >>> for i in result[0:2].uid:
485 >>> for i in result.uid:
490 >>> for i in result['uid, name']:
492 {Row:'uid': 1L,'name': u'anonymous'}
493 {Row:'uid': 2L,'name': u'jldailey'}
495 >>> result['uid, name']
496 (RowList:{Row:'uid': 1L,'name': u'anonymous'},{Row:'uid': 2L,'name': u'jldailey'})
497 >>> result['uid, name'][0]
498 {Row:'uid': 1L,'name': u'anonymous'}
499 >>> result['uid, name'][0:1]
500 (RowList:{Row:'uid': 1L,'name': u'anonymous'})
501 >>> result['uid, name'][0:2]
502 (RowList:{Row:'uid': 1L,'name': u'anonymous'},{Row:'uid': 2L,'name': u'jldailey'})
504 NOTE: The critical difference is whether you use names first: names select columns, numbers select rows.
505 If you select rows first, it returns you just the raw data in a tuple
506 If you select the columns first, it returns you real row objects
508 4) Allow query of queries
510 First do something to pull back a data set
512 >>> result = Query('''\
513 select u.name as user_name, r.name as role_name from users u\
514 left join user_roles ur on ur.uid = u.uid\
515 left join roles r on ur.rid = r.rid\
517 select u.name as user_name, r.name as role_name from users u\
518 left join group_users gu on gu.uid = u.uid\
519 left join groups g on gu.gid = g.gid\
520 left join group_roles gr on gr.gid = g.gid\
521 left join roles r on r.rid = gr.rid\
526 (RowList:{Row:'user_name': u'anonymous','role_name': u'None'},{Row:'user_name': u'jldailey','role_name': u'None'},{Row:'user_name': u'jldailey','role_name': u'admin'})
528 Then execute queries against that resultset.
530 >>> result2 = result.Query("select distinct role_name from __self__")
532 <class '__main__.Query'>
536 (RowList:u'None',u'admin')
538 >>> for i in result2:
540 {Row:'role_name': u'None'}
541 {Row:'role_name': u'admin'}
543 >>> assert(u'admin' == result2[1].role_name)
544 >>> assert(u'None' == result2[0].role_name)
546 >>> count = result.Query("select count( distinct role_name ) from __self__")
549 >>> count = result.Query("select count( distinct role_name ) as qty from __self__")
551 4a) All sub-queries support the same operations as real queries, except they lose type information (a temporary limitation of using pysqlite2 instead of 3, waiting for sqlite3/python2.5 to be fixed)
552 Everything that comes out of a sub-query will be a unicode string
557 >>> int(count[0].qty)
560 4b) Query of queries can join 2 result sets together
562 >>> get_users = Query(name="get_users", sql="select * from users",dsn="test")
563 >>> get_groups = Query(name="get_groups", sql="select * from group_users",dsn="test")
564 >>> result = get_users.Query("select u.uid, u.name, r.gid from get_users u join get_groups r on r.uid = u.uid")
566 {Row:'uid': u'2','name': u'jldailey','gid': u'1'}
568 If you dont want to specify a name for the query, you can use the special token: __self__ in the query.
569 The __self__ token will be replaced by the name of the parent query.
570 You can also access a queries _name property to get the name of the backing table directly.
571 (If you dont specify a name, this will be an autogenerated value like: 'autoname_<40 character sha1 hash>')
573 >>> get_users = Query("select * from users", dsn="test")
574 >>> get_groups = Query("select * from group_users",dsn="test")
575 >>> result = get_users.Query("select u.uid, u.name, r.gid from __self__ u join %s r on r.uid = u.uid" % get_groups._name)
577 {Row:'uid': u'2','name': u'jldailey','gid': u'1'}
579 5) Get a log of all queries executed:
580 >>> for (sql, duration) in Query.getQueryLog():
581 ... pass # dont verify the output, since there is too much of it
583 5a) Clear the log of query data.
585 >>> Query.clearQueryLog()
594 _db_cfg_regex
= '(mysql)://(\w+):(\w*)@([^:]+):(\d+)/(\w+)'
597 if Query
._cfg
is None:
601 # read the cfg if modified
602 db_cfg
= os
.path
.sep
.join(__file__
.split(os
.path
.sep
)[:-1] + ["db.cfg"])
603 m
= os
.path
.getmtime( db_cfg
)
604 if m
> Query
._cfg
_mtime
:
606 s
= open(db_cfg
).read()
608 for p
in [p
.split('=') for p
in s
.replace('\r','').split('\n') if len(p
) > 0]:
609 Query
._cfg
[p
[0]] = p
[1]
610 if Query
._db
.get(p
[0], None) is not None:
611 Query
._db
[p
[0]].close()
612 Query
._db
[p
[0]] = None
614 def _update_connections(dsn
):
615 """ Connect to the sqlite results datasource, and make sure that we have a mysql connection.
616 Returns the mysql connection.
618 tid
= thread
.get_ident()
619 if Query
._sqlite
.get(tid
, None) is None:
620 Query
._sqlite
[tid
] = apsw
.Connection(":memory:")
621 if Query
._db
.get(tid
,None) is None:
624 if Query
._db
[tid
].get(dsn
,None) is None: # instantiate the needed connections
626 (prot
, user
, password
, host
, port
, db
) = re
.findall(Query
._db
_cfg
_regex
, Query
._cfg
[dsn
])[0]
629 raise Exception("DSN argument passed to Query (dsn='%s') was not found in the cfg (available: '%s')" % (db
, str(Query
._cfg
.keys())))
631 raise ValueError("DSN specified in db.cfg has non-integer port value: '%s'" % port
)
634 log("FastQuery: Connecting to db... %s %s %s" % (host
,db
,user
))
635 Query
._db
[tid
][dsn
] = MySQLdb
.connect(host
=host
, port
=port
, db
=db
, user
=user
, passwd
=password
)
636 log("FastQuery: Connected...")
637 except MySQLdb
.OperationalError
:
638 # try re-connecting without specifying the database
639 Query
._db
[tid
][dsn
] = MySQLdb
.connect(host
=host
, user
=user
, passwd
=password
)
640 # then create it and connect to the empty db
641 Query
._db
[tid
][dsn
].cursor().execute("create database if not exists %s" % db
)
642 Query
._db
[tid
][dsn
].close()
643 Query
._db
[tid
][dsn
] = MySQLdb
.connect(host
=host
, db
=db
, user
=user
, passwd
=password
)
644 return Query
._db
[tid
][dsn
]
647 def _close_connection(dsn
):
648 tid
= thread
.get_ident()
649 cxt
= Query
._db
.get(tid
,{}).get(dsn
,None)
652 Query
._db
[tid
][dsn
] = None
654 def __init__(self
, sql
=None, name
=None, dsn
="default", cursor
=None, cache
=False):
656 Can be instantiated in 2 ways: with a sql string, which will be executed against the remote data source
657 Or, with a cursor object that is already full of results. (.description and .fetchall() must work)
661 name
= "autoname_%s" % hashlib
.sha1(sql
).hexdigest()
663 name
= "autoname_%s" % hashlib
.sha1(str(random
.randrange(0,99999999))).hexdigest()
668 # if we were called with just sql, this means we want to run statements directly on the real back end
669 if sql
is not None and cursor
is None:
671 if cache
and Query
._cache
.get(sql
, None) is not None:
672 start
= datetime
.datetime
.now()
673 (other_name
, self
.columns
, self
.rowCount
) = Query
._cache
[sql
]
674 if self
._name
.startswith('autoname'):
675 # since the caller didnt specify a name, they dont care what the name is, so we can share a backing table with the cached data
676 self
._name
= other_name
678 # since we are giving this data a new name, it needs a new backing table, in case we need to join against it
679 # TODO: a dict full of redirections would serve? Difficulty: any number of autoname queries could reference the same backing table, but once that table gets a real name, you have to start copying, because someone could issue an update statement on it, and change other result sets.
680 c
= Query
._sqlite
_cursor
()
681 c
.setexectrace(self
.log_sqlite
)
682 c
.execute("create table %s as select * from %s" % (self
._name
, other_name
))
683 Query
.logQuery("CACHED: "+sql
, _ms_elapsed(start
))
685 # read the cfg file if it has changed
687 # get a db connection
688 self
.db
= Query
._update
_connections
(dsn
)
690 start
= datetime
.datetime
.now()
697 self
.log("LOST CONNECTION TO DB: RECONNECTING...")
698 Query
._close
_connection
(self
.dsn
)
699 self
.db
= Query
._update
_connections
(self
.dsn
)
703 raise SQLException(str(e
), sql
)
704 Query
.__init
__(self
, sql
=sql
, name
=self
._name
, cursor
=c
)
706 Query
.logQuery(sql
, _ms_elapsed(start
))
707 elif cursor
is not None: # otherwise, we are loading the raw way, using the result of some other query
711 self
.columns
= cursor
.description
712 self
.rows
= cursor
.fetchall()
713 except AttributeError:
716 self
.columns
= cursor
.getdescription()
718 self
.rows
.append(Row(cursor
, row
, index
=len(self
.rows
)))
719 except apsw
.ExecutionCompleteError
:
721 self
.rowCount
= len(self
.rows
)
722 c
= Query
._sqlite
_cursor
()
723 c
.setexectrace(self
.log_sqlite
)
724 if self
.columns
is not None and len(self
.columns
) > 0:
725 create
= "create table %s ( %s )" % (self
._name
, ','.join(["[%s] %s"%(x
[0], self
._get
_sqlite
_typename
(x
[1])) for x
in self
.columns
]))
728 except apsw
.SchemaChangeError
, e
: # some version of apsw raise SchemaChangeError, some versions raise SQLError, so we have to catch both
730 if s
.endswith('already exists'):
734 except apsw
.SQLError
, e
:
736 if s
.endswith('already exists'):
740 if len(self
.rows
) > 0:
742 num_inserts
= int(len(self
.rows
)/max_rows_per
) + 1
743 for n
in range(num_inserts
):
745 b
= min(len(self
.rows
),(n
+1)*max_rows_per
)
746 insert
= u
"insert or ignore into %s ( %s ) %s" % (
748 u
','.join([u
"[%s]"%x[0] for x
in self
.columns
]),
749 u
' union all '.join([u
'select '+u
','.join([u
"%s" % self
._sqlrepr
(s
) for s
in row
]) for row
in self
.rows
[a
:b
]]))
753 raise Exception("FAILED INSERT: (a: %s, b: %s len(self.rows[a:b]): %s)" % (a
,b
,len(self
.rows
[a
:b
])))
754 if sql
.lower().startswith('select'):
755 Query
._cache
[sql
] = (self
._name
, self
.columns
, self
.rowCount
)
758 c
= Query
._sqlite
_cursor
()
759 c
.setexectrace(self
.log_sqlite
)
760 self
.log_sqlite("IN __DEL__", None)
761 c
.execute("drop table if exists %s" % self
._name
)
764 def logQuery(sql
, duration
):
765 Query
.log("[%.2f] %s" % (duration
, sql
))
766 if Query
._sqlite
is None: return
767 # prepare the query log
768 c2
= Query
._sqlite
_cursor
()
769 c2
.execute(u
"create table if not exists query_log ( sql text not null, duration int )");
770 sql
= sql
.replace("'","''")
771 c2
.execute("insert into query_log ( sql, duration ) values ( '%s', %f )" % (sql
, duration
))
775 if Query
._sqlite
is None: return
776 c2
= Query
._sqlite
_cursor
()
777 c2
.execute("select sql, duration from query_log order by rowid asc")
778 for (sql
, duration
) in c2
:
779 yield (sql
, duration
)
784 c2
= Query
._sqlite
_cursor
()
785 c2
.execute("delete from query_log")
789 def _sqlrepr(self
, o
):
793 return u
"'%s'" % str(o
).decode('iso-8859-2').replace("'","''")
795 def _get_sqlite_typename(self
, t
):
796 if t
== MySQLdb
.NUMBER
:
802 def _update_sqlite():
803 tid
= thread
.get_ident()
804 if Query
._sqlite
[tid
] is None:
805 Query
._sqlite
[tid
] = apsw
.Connection(":memory:")
807 def _sqlite_cursor():
808 tid
= thread
.get_ident()
809 return Query
._sqlite
[tid
].cursor()
811 def Query(self
, sql
=None, name
=None):
813 Returns a new Query that selects from this result set as if it were a table.
814 - Can join to other resultsets by name.
816 c
= Query
._sqlite
_cursor
()
817 c
.setexectrace(self
.log_sqlite
)
818 sql
= sql
.replace("__self__", self
._name
)
820 return Query(name
=name
, sql
=sql
, cursor
=c
)
825 def __getitem__(self
, key
):
826 if isinstance(key
, slice):
827 i
= key
.indices(len(self
))
828 return self
._rows
("*", range(i
[0],i
[1],i
[2]))
829 elif isinstance(key
, str):
830 return self
._rows
(key
, range(0,len(self
)))
831 elif isinstance(key
, int):
832 return self
._rows
("*", [key
], single
=True)
834 raise ArgumentError("Unknown type: "+str(type(key
)))
836 def __getattribute__(self
, name
):
838 return object.__getattribute
__(self
, name
)
839 except AttributeError:
841 return self
._rows
(name
, range(0,object.__getattribute
__(self
, "rowCount")))
842 except apsw
.SQLError
, e
:
843 raise AttributeError(name
)
845 def _rows(self
, columns
, keys
, single
=False, rowIndex
=-1):
846 c
= Query
._sqlite
_cursor
()
847 c
.setexectrace(self
.log_sqlite
)
850 for row
in c
.execute("select %s from %s where (rowid-1) in (%s)" % (columns
, self
._name
, ','.join([str(x
) for x
in keys
]))):
851 ret
.append(Row(c
,row
, index
=len(ret
)))
852 except apsw
.SQLError
, e
:
853 raise #apsw.SQLError("SQLite error. Original query that generated sqlitetable: %s SQLite error: %s" % (str(self.sql), str(e)))
856 ret
[0].rowIndex
= rowIndex
859 raise IndexError("columns: %s keys: %s expected a result and got none" % (columns
, keys
))
862 # assert(len(r) == len(ret))
871 if self
.currentRow
>= self
.rowCount
:
874 return self
._rows
("*",[self
.currentRow
, ], single
=True, rowIndex
=self
.currentRow
)
877 return "{Query: %s backed by table: %s}" % (self
.sql
, self
._name
)
880 return str(self
[0:len(self
)])
882 def log_sqlite(self
, sql
, bindings
,unknown
=None):
883 # self.log("SQLITE: "+sql+" BINDINGS: "+str(bindings))
890 with
open("db.log", "a+") as f
:
893 class RowList(object):
894 def __init__(self
, list):
895 """ Takes a list of Row objects """
898 if i
.columncount() == 1:
899 self
.__list
.append(i
[0])
901 self
.__list
.append(i
)
902 assert(len(self
.__list
) == len(list))
903 def __getitem__(self
, key
):
904 if isinstance(key
, int):
905 return self
.__list
[key
]
906 elif isinstance(key
, slice):
907 return RowList(self
.__list
[key
])
909 ret
= [i
.__getitem
__(key
) for i
in self
.__list
]
911 def __getattribute__(self
, key
):
913 return object.__getattribute
__(self
, key
)
914 except AttributeError:
915 return [i
.__getattribute
__(key
) for i
in self
.__list
]
917 return self
.__str
__()
919 return "(RowList:"+','.join([repr(x
) for x
in self
.__list
])+")"
921 return len(self
.__list
)
923 return self
.__list
.__iter
__()
926 def __init__(self
, cursor
, row
, index
=None):
927 self
.rowIndex
= index
930 for col
in cursor
.getdescription():
931 self
.__c
.append(col
[0])
932 self
.__t
.append(col
[1])
933 self
.__d
= [(d
if d
is not None else 0) for d
in row
]
934 def columncount(self
):
939 return self
.__str
__()
941 return "{Row:"+','.join(["'%s': %s" % (self
.__c
[i
],repr(self
.__d
[i
])) for i
in range(0, len(self
.__c
))])+"}"
942 def __getattribute__(self
, key
):
944 return object.__getattribute
__(self
, key
)
945 except AttributeError:
947 i
= self
.__c
.index(key
)
950 raise AttributeError(key
)
951 def __getitem__(self
, key
):
952 if isinstance(key
, str):
954 i
= self
.__c
.index(key
)
957 if key
.find(',') > -1:
958 return [self
.__getitem
__(k
.replace(' ','')) for k
in key
.split(',')]
960 elif isinstance(key
, int):
962 elif isinstance(key
, slice):
966 for i
in range(0,len(self
.__c
)):
973 class FastQuery(object):
974 """ FastQuery is a way to shortcut all the bulky components in query that allow requerying, slicing, etc.
975 Still uses dsn="..." and the db.cfg file, except it only supports iterating the results
977 def __init__(self
, sql
, dsn
="default"):
981 # get a db connection
982 db
= Query
._update
_connections
(dsn
)
984 self
.start
= datetime
.datetime
.now()
990 Query
._close
_connection
(self
.dsn
)
991 self
.c
= Query
._update
_connections
(self
.dsn
).cursor()
994 raise SQLException(str(e
), sql
)
995 Query
.logQuery("FAST: "+self
.sql
, _ms_elapsed(self
.start
))
999 row
= self
.c
.fetchone()
1002 return [ (d
if d
is not None else 0) for d
in row
]
1006 def _getNameFromDefn(defn
):
1008 >>> Model()._getNameFromDefn("`email` varchar(256) not null")
1011 >>> Model()._getNameFromDefn("`email` text not null")
1014 >>> Model()._getNameFromDefn("primary key (`foo`)")
1017 >>> Model()._getNameFromDefn("index `foo` (`bar`)")
1020 >>> Model()._getNameFromDefn("index (`bar`)")
1024 # primary keys have a constant name
1025 if re
.search('^\s*primary key\s*\(', defn
, re
.I
) is not None:
1026 return "primary key"
1027 # normal columns just get the name parsed off
1028 ret
= defn
.split("`")
1030 return ret
[1].lower()
1031 except IndexError, e
:
1032 raise IndexError("%s did not split on `" % defn
)
1035 def __init__(self
, name
, dsn
="default"):
1042 self
.text
= Query("show create table %s" % name
, dsn
=dsn
, cache
=False)
1043 assert type(self
.text
) == Query
, "Unexpected type: %s" % str(type(self
.text
))
1044 self
.text
= self
.text
[0]
1045 assert type(self
.text
) == Row
, "Unexpected type: %s" % str(type(self
.text
))
1046 self
.text
= self
.text
[1]
1047 assert type(self
.text
) == unicode, "Unexpected type: %s" % str(type(self
.text
))
1048 self
.text
= self
.text
.lower()
1049 for line
in self
.text
.split('\n')[1:-1]:
1050 name
= _getNameFromDefn(line
)# line.split('`')[1]
1051 name
= re
.sub( ',*\s*$', '', re
.sub('^\s*','', name
))
1052 line
= re
.sub( ',*\s*$', '', re
.sub('^\s*','', line
))
1053 self
.names
[name
] = line
1054 self
.lines
[line
] = name
1055 self
.order
.append(name
)
1056 except MySQLdb
.ProgrammingError
:
1058 def getName(self
, name
):
1059 return self
.names
.get(name
, None)
1060 def hasDefn(self
, defn
):
1061 return self
.lines
.get(defn
, None)
1065 Returns a db-injection safe copy of the given string.
1067 # TODO: more filters
1068 return re
.sub("([^'])'",r
"\1''", s
)
1070 class ModelConflictError(Exception):
1073 class SQLException(Exception):
1075 >>> Query("this is not a valid sql statement", dsn="test") #doctest: ELLIPSIS
1076 Traceback (most recent call last):
1080 def __init__(self
, msg
, sql
):
1081 Exception.__init
__(self
, msg
)
1084 return Exception.__str
__(self
)+" (sql: %s)" % (self
.sql
)
1085 def __unicode__(self
):
1086 return unicode(self
.__str
__())
1088 return self
.__str
__()
1090 def _ms_elapsed(since
):
1091 d
= (datetime
.datetime
.now() - since
)
1092 return (d
.seconds
*1000.0) + (d
.microseconds
/ 1000.0)
1094 def _match_any(s
, l
):
1099 if __name__
== "__main__":
1100 # run the tests the hard way, to force the order
1101 import doctest
, unittest
1102 suite
= unittest
.TestSuite()
1103 finder
= doctest
.DocTestFinder()
1105 tests
.extend(finder
.find(Model
))
1106 tests
.extend(finder
.find(Query
))
1109 runner
= doctest
.DocTestRunner()