2 +----------------------------------------------------------------------+
4 +----------------------------------------------------------------------+
5 | Copyright (c) 2010-present Facebook, Inc. (http://www.facebook.com) |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
16 #include "hphp/hhbbc/misc.h"
18 #include "hphp/hhbbc/options.h"
19 #include "hphp/hhbbc/parallel.h"
21 #include "hphp/runtime/base/string-data.h"
23 #include "hphp/util/hash.h"
25 //////////////////////////////////////////////////////////////////////
27 namespace HPHP::HHBBC
{
29 //////////////////////////////////////////////////////////////////////
31 std::vector
<std::vector
<SString
>>
32 consistently_bucketize(const std::vector
<SString
>& items
, size_t bucketSize
) {
33 using namespace folly::gen
;
35 // Calculate the number of buckets we need, assuming each bucket
36 // will contain "bucketSize" number of elements (rounding up).
37 assertx(bucketSize
> 0);
38 auto const numBuckets
= (items
.size() + bucketSize
- 1) / bucketSize
;
39 return consistently_bucketize_by_num_buckets(items
, numBuckets
);
42 std::vector
<std::vector
<SString
>>
43 consistently_bucketize_by_num_buckets(const std::vector
<SString
>& items
, size_t numBuckets
) {
44 using namespace folly::gen
;
45 if (numBuckets
== items
.size()) {
47 | map([] (SString s
) { return singleton_vec(s
); })
50 if (numBuckets
== 1) return singleton_vec(items
);
52 // Consistently hash the strings into their buckets indices.
53 auto const indices
= parallel::map(
56 auto const idx
= consistent_hash(s
->hashStatic(), numBuckets
);
57 assertx(idx
< numBuckets
);
62 // Then partition the strings into the buckets using the indices.
63 std::vector
<std::vector
<SString
>> buckets
;
64 buckets
.resize(numBuckets
);
65 assertx(indices
.size() == items
.size());
66 for (size_t i
= 0, size
= indices
.size(); i
< size
; ++i
) {
67 buckets
[indices
[i
]].emplace_back(items
[i
]);
70 // Sort each bucket to keep things consistent.
73 [] (std::vector
<SString
>& bucket
) {
74 std::sort(begin(bucket
), end(bucket
), string_data_lt_type
{});
78 // Finally, remove any empty buckets.
83 [] (const std::vector
<SString
>& b
) { return b
.empty(); }
90 //////////////////////////////////////////////////////////////////////