cygprofile: increase timeouts to allow showing web contents
[chromium-blink-merge.git] / chrome / common / extensions / docs / server2 / datastore_util.py
blob2deeb938a26c5e414710f0f370c7c7232d7c042e
1 # Copyright 2015 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
5 import cPickle
6 import googledatastore as datastore
7 import logging
9 from future import Future
11 # N.B.: In order to use this module you should have a working cloud development
12 # environment configured with the googledatastore module installed.
14 # Please see https://cloud.google.com/datastore/docs/getstarted/start_python/
17 _DATASET_NAME = 'chrome-apps-doc'
18 _PERSISTENT_OBJECT_KIND = 'PersistentObjectStoreItem'
19 _VALUE_PROPERTY_NAME = 'pickled_value'
21 # The max number of entities to include in a single request. This is capped at
22 # 500 by the service. In practice we may send fewer due to _MAX_REQUEST_SIZE
23 _MAX_BATCH_SIZE = 500
26 # The maximum entity size allowed by Datastore.
27 _MAX_ENTITY_SIZE = 1024*1024
30 # The maximum request size (in bytes) to send Datastore. This is an approximate
31 # size based on the sum of entity blob_value sizes.
32 _MAX_REQUEST_SIZE = 5*1024*1024
35 def _CreateEntity(name, value):
36 entity = datastore.Entity()
37 path = entity.key.path_element.add()
38 path.kind = _PERSISTENT_OBJECT_KIND
39 path.name = name
40 pickled_value_property = entity.property.add()
41 pickled_value_property.name = _VALUE_PROPERTY_NAME
42 pickled_value_property.value.indexed = False
43 pickled_value_property.value.blob_value = value
44 return entity
47 def _CreateBatches(data):
48 '''Constructs batches of at most _MAX_BATCH_SIZE entities to cover all
49 entities defined in |data| without exceeding the transaction size limit.
50 This is a generator emitting lists of entities.
51 '''
52 def get_size(entity):
53 return len(entity.property[0].value.blob_value)
55 entities = [_CreateEntity(name, value) for name, value in data.iteritems()]
56 batch_start = 0
57 batch_end = 1
58 batch_size = get_size(entities[0])
59 while batch_end < len(entities):
60 next_size = get_size(entities[batch_end])
61 if (batch_size + next_size > _MAX_REQUEST_SIZE or
62 batch_end - batch_start >= _MAX_BATCH_SIZE):
63 yield entities[batch_start:batch_end], batch_end, len(entities)
64 batch_start = batch_end
65 batch_size = 0
66 else:
67 batch_size += next_size
68 batch_end = batch_end + 1
69 if batch_end > batch_start and batch_start < len(entities):
70 yield entities[batch_start:batch_end], batch_end, len(entities)
73 def PushData(data, original_data={}):
74 '''Pushes a bunch of data into the datastore. The data should be a dict. Each
75 key is treated as a namespace, and each value is also a dict. A new datastore
76 entry is upserted for every inner key, with the value pickled into the
77 |pickled_value| field.
79 For example, if given the dictionary:
82 'fruit': {
83 'apple': 1234,
84 'banana': 'yellow',
85 'trolling carrot': { 'arbitrarily complex': ['value', 'goes', 'here'] }
87 'animal': {
88 'sheep': 'baaah',
89 'dog': 'woof',
90 'trolling cat': 'moo'
94 this would result in a push of 6 keys in total, with the following IDs:
96 Key('PersistentObjectStoreItem', 'fruit/apple')
97 Key('PersistentObjectStoreItem', 'fruit/banana')
98 Key('PersistentObjectStoreItem', 'fruit/trolling carrot')
99 Key('PersistentObjectStoreItem', 'animal/sheep')
100 Key('PersistentObjectStoreItem', 'animal/dog')
101 Key('PersistentObjectStoreItem', 'animal/trolling cat')
103 If given |original_data|, this will only push key-value pairs for entries that
104 are either new or have changed from their original (pickled) value.
106 Caveat: Pickling and unpickling a dictionary can (but does not always) change
107 its key order. This means that objects will often be seen as changed even when
108 they haven't changed.
110 datastore.set_options(dataset=_DATASET_NAME)
112 def flatten(dataset):
113 flat = {}
114 for namespace, items in dataset.iteritems():
115 for k, v in items.iteritems():
116 flat['%s/%s' % (namespace, k)] = cPickle.dumps(v)
117 return flat
119 logging.info('Flattening data sets...')
120 data = flatten(data)
121 original_data = flatten(original_data)
123 logging.info('Culling new data...')
124 for k in data.keys():
125 if ((k in original_data and original_data[k] == data[k]) or
126 (len(data[k]) > _MAX_ENTITY_SIZE)):
127 del data[k]
129 for batch, n, total in _CreateBatches(data):
130 commit_request = datastore.CommitRequest()
131 commit_request.mode = datastore.CommitRequest.NON_TRANSACTIONAL
132 commit_request.mutation.upsert.extend(list(batch))
134 logging.info('Committing %s/%s entities...' % (n, total))
135 datastore.commit(commit_request)