1 # Copyright 2015 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
6 import googledatastore
as datastore
9 from future
import Future
11 # N.B.: In order to use this module you should have a working cloud development
12 # environment configured with the googledatastore module installed.
14 # Please see https://cloud.google.com/datastore/docs/getstarted/start_python/
17 _DATASET_NAME
= 'chrome-apps-doc'
18 _PERSISTENT_OBJECT_KIND
= 'PersistentObjectStoreItem'
19 _VALUE_PROPERTY_NAME
= 'pickled_value'
21 # The max number of entities to include in a single request. This is capped at
22 # 500 by the service. In practice we may send fewer due to _MAX_REQUEST_SIZE
26 # The maximum entity size allowed by Datastore.
27 _MAX_ENTITY_SIZE
= 1024*1024
30 # The maximum request size (in bytes) to send Datastore. This is an approximate
31 # size based on the sum of entity blob_value sizes.
32 _MAX_REQUEST_SIZE
= 5*1024*1024
35 def _CreateEntity(name
, value
):
36 entity
= datastore
.Entity()
37 path
= entity
.key
.path_element
.add()
38 path
.kind
= _PERSISTENT_OBJECT_KIND
40 pickled_value_property
= entity
.property.add()
41 pickled_value_property
.name
= _VALUE_PROPERTY_NAME
42 pickled_value_property
.value
.indexed
= False
43 pickled_value_property
.value
.blob_value
= value
47 def _CreateBatches(data
):
48 '''Constructs batches of at most _MAX_BATCH_SIZE entities to cover all
49 entities defined in |data| without exceeding the transaction size limit.
50 This is a generator emitting lists of entities.
53 return len(entity
.property[0].value
.blob_value
)
55 entities
= [_CreateEntity(name
, value
) for name
, value
in data
.iteritems()]
58 batch_size
= get_size(entities
[0])
59 while batch_end
< len(entities
):
60 next_size
= get_size(entities
[batch_end
])
61 if (batch_size
+ next_size
> _MAX_REQUEST_SIZE
or
62 batch_end
- batch_start
>= _MAX_BATCH_SIZE
):
63 yield entities
[batch_start
:batch_end
], batch_end
, len(entities
)
64 batch_start
= batch_end
67 batch_size
+= next_size
68 batch_end
= batch_end
+ 1
69 if batch_end
> batch_start
and batch_start
< len(entities
):
70 yield entities
[batch_start
:batch_end
], batch_end
, len(entities
)
73 def PushData(data
, original_data
={}):
74 '''Pushes a bunch of data into the datastore. The data should be a dict. Each
75 key is treated as a namespace, and each value is also a dict. A new datastore
76 entry is upserted for every inner key, with the value pickled into the
77 |pickled_value| field.
79 For example, if given the dictionary:
85 'trolling carrot': { 'arbitrarily complex': ['value', 'goes', 'here'] }
94 this would result in a push of 6 keys in total, with the following IDs:
96 Key('PersistentObjectStoreItem', 'fruit/apple')
97 Key('PersistentObjectStoreItem', 'fruit/banana')
98 Key('PersistentObjectStoreItem', 'fruit/trolling carrot')
99 Key('PersistentObjectStoreItem', 'animal/sheep')
100 Key('PersistentObjectStoreItem', 'animal/dog')
101 Key('PersistentObjectStoreItem', 'animal/trolling cat')
103 If given |original_data|, this will only push key-value pairs for entries that
104 are either new or have changed from their original (pickled) value.
106 Caveat: Pickling and unpickling a dictionary can (but does not always) change
107 its key order. This means that objects will often be seen as changed even when
108 they haven't changed.
110 datastore
.set_options(dataset
=_DATASET_NAME
)
112 def flatten(dataset
):
114 for namespace
, items
in dataset
.iteritems():
115 for k
, v
in items
.iteritems():
116 flat
['%s/%s' % (namespace
, k
)] = cPickle
.dumps(v
)
119 logging
.info('Flattening data sets...')
121 original_data
= flatten(original_data
)
123 logging
.info('Culling new data...')
124 for k
in data
.keys():
125 if ((k
in original_data
and original_data
[k
] == data
[k
]) or
126 (len(data
[k
]) > _MAX_ENTITY_SIZE
)):
129 for batch
, n
, total
in _CreateBatches(data
):
130 commit_request
= datastore
.CommitRequest()
131 commit_request
.mode
= datastore
.CommitRequest
.NON_TRANSACTIONAL
132 commit_request
.mutation
.upsert
.extend(list(batch
))
134 logging
.info('Committing %s/%s entities...' % (n
, total
))
135 datastore
.commit(commit_request
)