1 // Copyright (c) 2012-2016 The Bitcoin Core developers
2 // Distributed under the MIT software license, see the accompanying
3 // file COPYING or http://www.opensource.org/licenses/mit-license.php.
7 #include "primitives/transaction.h"
9 #include "script/script.h"
10 #include "script/standard.h"
17 #include <boost/foreach.hpp>
19 #define LN2SQUARED 0.4804530139182014246671025263266649717305529515945455
20 #define LN2 0.6931471805599453094172321214581765680755001343602552
22 CBloomFilter::CBloomFilter(unsigned int nElements
, double nFPRate
, unsigned int nTweakIn
, unsigned char nFlagsIn
) :
24 * The ideal size for a bloom filter with a given number of elements and false positive rate is:
25 * - nElements * log(fp rate) / ln(2)^2
26 * We ignore filter parameters which will create a bloom filter larger than the protocol limits
28 vData(std::min((unsigned int)(-1 / LN2SQUARED
* nElements
* log(nFPRate
)), MAX_BLOOM_FILTER_SIZE
* 8) / 8),
30 * The ideal number of hash functions is filter size * ln(2) / number of elements
31 * Again, we ignore filter parameters which will create a bloom filter with more hash functions than the protocol limits
32 * See https://en.wikipedia.org/wiki/Bloom_filter for an explanation of these formulas
36 nHashFuncs(std::min((unsigned int)(vData
.size() * 8 / nElements
* LN2
), MAX_HASH_FUNCS
)),
42 // Private constructor used by CRollingBloomFilter
43 CBloomFilter::CBloomFilter(unsigned int nElements
, double nFPRate
, unsigned int nTweakIn
) :
44 vData((unsigned int)(-1 / LN2SQUARED
* nElements
* log(nFPRate
)) / 8),
47 nHashFuncs((unsigned int)(vData
.size() * 8 / nElements
* LN2
)),
49 nFlags(BLOOM_UPDATE_NONE
)
53 inline unsigned int CBloomFilter::Hash(unsigned int nHashNum
, const std::vector
<unsigned char>& vDataToHash
) const
55 // 0xFBA4C795 chosen as it guarantees a reasonable bit difference between nHashNum values.
56 return MurmurHash3(nHashNum
* 0xFBA4C795 + nTweak
, vDataToHash
) % (vData
.size() * 8);
59 void CBloomFilter::insert(const std::vector
<unsigned char>& vKey
)
63 for (unsigned int i
= 0; i
< nHashFuncs
; i
++)
65 unsigned int nIndex
= Hash(i
, vKey
);
66 // Sets bit nIndex of vData
67 vData
[nIndex
>> 3] |= (1 << (7 & nIndex
));
72 void CBloomFilter::insert(const COutPoint
& outpoint
)
74 CDataStream
stream(SER_NETWORK
, PROTOCOL_VERSION
);
76 std::vector
<unsigned char> data(stream
.begin(), stream
.end());
80 void CBloomFilter::insert(const uint256
& hash
)
82 std::vector
<unsigned char> data(hash
.begin(), hash
.end());
86 bool CBloomFilter::contains(const std::vector
<unsigned char>& vKey
) const
92 for (unsigned int i
= 0; i
< nHashFuncs
; i
++)
94 unsigned int nIndex
= Hash(i
, vKey
);
95 // Checks bit nIndex of vData
96 if (!(vData
[nIndex
>> 3] & (1 << (7 & nIndex
))))
102 bool CBloomFilter::contains(const COutPoint
& outpoint
) const
104 CDataStream
stream(SER_NETWORK
, PROTOCOL_VERSION
);
106 std::vector
<unsigned char> data(stream
.begin(), stream
.end());
107 return contains(data
);
110 bool CBloomFilter::contains(const uint256
& hash
) const
112 std::vector
<unsigned char> data(hash
.begin(), hash
.end());
113 return contains(data
);
116 void CBloomFilter::clear()
118 vData
.assign(vData
.size(),0);
123 void CBloomFilter::reset(unsigned int nNewTweak
)
129 bool CBloomFilter::IsWithinSizeConstraints() const
131 return vData
.size() <= MAX_BLOOM_FILTER_SIZE
&& nHashFuncs
<= MAX_HASH_FUNCS
;
134 bool CBloomFilter::IsRelevantAndUpdate(const CTransaction
& tx
)
137 // Match if the filter contains the hash of tx
138 // for finding tx when they appear in a block
143 const uint256
& hash
= tx
.GetHash();
147 for (unsigned int i
= 0; i
< tx
.vout
.size(); i
++)
149 const CTxOut
& txout
= tx
.vout
[i
];
150 // Match if the filter contains any arbitrary script data element in any scriptPubKey in tx
151 // If this matches, also add the specific output that was matched.
152 // This means clients don't have to update the filter themselves when a new relevant tx
153 // is discovered in order to find spending transactions, which avoids round-tripping and race conditions.
154 CScript::const_iterator pc
= txout
.scriptPubKey
.begin();
155 std::vector
<unsigned char> data
;
156 while (pc
< txout
.scriptPubKey
.end())
159 if (!txout
.scriptPubKey
.GetOp(pc
, opcode
, data
))
161 if (data
.size() != 0 && contains(data
))
164 if ((nFlags
& BLOOM_UPDATE_MASK
) == BLOOM_UPDATE_ALL
)
165 insert(COutPoint(hash
, i
));
166 else if ((nFlags
& BLOOM_UPDATE_MASK
) == BLOOM_UPDATE_P2PUBKEY_ONLY
)
169 std::vector
<std::vector
<unsigned char> > vSolutions
;
170 if (Solver(txout
.scriptPubKey
, type
, vSolutions
) &&
171 (type
== TX_PUBKEY
|| type
== TX_MULTISIG
))
172 insert(COutPoint(hash
, i
));
182 BOOST_FOREACH(const CTxIn
& txin
, tx
.vin
)
184 // Match if the filter contains an outpoint tx spends
185 if (contains(txin
.prevout
))
188 // Match if the filter contains any arbitrary script data element in any scriptSig in tx
189 CScript::const_iterator pc
= txin
.scriptSig
.begin();
190 std::vector
<unsigned char> data
;
191 while (pc
< txin
.scriptSig
.end())
194 if (!txin
.scriptSig
.GetOp(pc
, opcode
, data
))
196 if (data
.size() != 0 && contains(data
))
204 void CBloomFilter::UpdateEmptyFull()
208 for (unsigned int i
= 0; i
< vData
.size(); i
++)
210 full
&= vData
[i
] == 0xff;
211 empty
&= vData
[i
] == 0;
217 CRollingBloomFilter::CRollingBloomFilter(unsigned int nElements
, double fpRate
)
219 double logFpRate
= log(fpRate
);
220 /* The optimal number of hash functions is log(fpRate) / log(0.5), but
221 * restrict it to the range 1-50. */
222 nHashFuncs
= std::max(1, std::min((int)round(logFpRate
/ log(0.5)), 50));
223 /* In this rolling bloom filter, we'll store between 2 and 3 generations of nElements / 2 entries. */
224 nEntriesPerGeneration
= (nElements
+ 1) / 2;
225 uint32_t nMaxElements
= nEntriesPerGeneration
* 3;
226 /* The maximum fpRate = pow(1.0 - exp(-nHashFuncs * nMaxElements / nFilterBits), nHashFuncs)
227 * => pow(fpRate, 1.0 / nHashFuncs) = 1.0 - exp(-nHashFuncs * nMaxElements / nFilterBits)
228 * => 1.0 - pow(fpRate, 1.0 / nHashFuncs) = exp(-nHashFuncs * nMaxElements / nFilterBits)
229 * => log(1.0 - pow(fpRate, 1.0 / nHashFuncs)) = -nHashFuncs * nMaxElements / nFilterBits
230 * => nFilterBits = -nHashFuncs * nMaxElements / log(1.0 - pow(fpRate, 1.0 / nHashFuncs))
231 * => nFilterBits = -nHashFuncs * nMaxElements / log(1.0 - exp(logFpRate / nHashFuncs))
233 uint32_t nFilterBits
= (uint32_t)ceil(-1.0 * nHashFuncs
* nMaxElements
/ log(1.0 - exp(logFpRate
/ nHashFuncs
)));
235 /* For each data element we need to store 2 bits. If both bits are 0, the
236 * bit is treated as unset. If the bits are (01), (10), or (11), the bit is
237 * treated as set in generation 1, 2, or 3 respectively.
238 * These bits are stored in separate integers: position P corresponds to bit
239 * (P & 63) of the integers data[(P >> 6) * 2] and data[(P >> 6) * 2 + 1]. */
240 data
.resize(((nFilterBits
+ 63) / 64) << 1);
244 /* Similar to CBloomFilter::Hash */
245 static inline uint32_t RollingBloomHash(unsigned int nHashNum
, uint32_t nTweak
, const std::vector
<unsigned char>& vDataToHash
) {
246 return MurmurHash3(nHashNum
* 0xFBA4C795 + nTweak
, vDataToHash
);
249 void CRollingBloomFilter::insert(const std::vector
<unsigned char>& vKey
)
251 if (nEntriesThisGeneration
== nEntriesPerGeneration
) {
252 nEntriesThisGeneration
= 0;
254 if (nGeneration
== 4) {
257 uint64_t nGenerationMask1
= 0 - (uint64_t)(nGeneration
& 1);
258 uint64_t nGenerationMask2
= 0 - (uint64_t)(nGeneration
>> 1);
259 /* Wipe old entries that used this generation number. */
260 for (uint32_t p
= 0; p
< data
.size(); p
+= 2) {
261 uint64_t p1
= data
[p
], p2
= data
[p
+ 1];
262 uint64_t mask
= (p1
^ nGenerationMask1
) | (p2
^ nGenerationMask2
);
264 data
[p
+ 1] = p2
& mask
;
267 nEntriesThisGeneration
++;
269 for (int n
= 0; n
< nHashFuncs
; n
++) {
270 uint32_t h
= RollingBloomHash(n
, nTweak
, vKey
);
272 uint32_t pos
= (h
>> 6) % data
.size();
273 /* The lowest bit of pos is ignored, and set to zero for the first bit, and to one for the second. */
274 data
[pos
& ~1] = (data
[pos
& ~1] & ~(((uint64_t)1) << bit
)) | ((uint64_t)(nGeneration
& 1)) << bit
;
275 data
[pos
| 1] = (data
[pos
| 1] & ~(((uint64_t)1) << bit
)) | ((uint64_t)(nGeneration
>> 1)) << bit
;
279 void CRollingBloomFilter::insert(const uint256
& hash
)
281 std::vector
<unsigned char> vData(hash
.begin(), hash
.end());
285 bool CRollingBloomFilter::contains(const std::vector
<unsigned char>& vKey
) const
287 for (int n
= 0; n
< nHashFuncs
; n
++) {
288 uint32_t h
= RollingBloomHash(n
, nTweak
, vKey
);
290 uint32_t pos
= (h
>> 6) % data
.size();
291 /* If the relevant bit is not set in either data[pos & ~1] or data[pos | 1], the filter does not contain vKey */
292 if (!(((data
[pos
& ~1] | data
[pos
| 1]) >> bit
) & 1)) {
299 bool CRollingBloomFilter::contains(const uint256
& hash
) const
301 std::vector
<unsigned char> vData(hash
.begin(), hash
.end());
302 return contains(vData
);
305 void CRollingBloomFilter::reset()
307 nTweak
= GetRand(std::numeric_limits
<unsigned int>::max());
308 nEntriesThisGeneration
= 0;
310 for (std::vector
<uint64_t>::iterator it
= data
.begin(); it
!= data
.end(); it
++) {