1 // Copyright (c) 2012-2015 The Bitcoin Core developers
2 // Distributed under the MIT software license, see the accompanying
3 // file COPYING or http://www.opensource.org/licenses/mit-license.php.
7 #include "primitives/transaction.h"
9 #include "script/script.h"
10 #include "script/standard.h"
17 #include <boost/foreach.hpp>
19 #define LN2SQUARED 0.4804530139182014246671025263266649717305529515945455
20 #define LN2 0.6931471805599453094172321214581765680755001343602552
24 CBloomFilter::CBloomFilter(unsigned int nElements
, double nFPRate
, unsigned int nTweakIn
, unsigned char nFlagsIn
) :
26 * The ideal size for a bloom filter with a given number of elements and false positive rate is:
27 * - nElements * log(fp rate) / ln(2)^2
28 * We ignore filter parameters which will create a bloom filter larger than the protocol limits
30 vData(min((unsigned int)(-1 / LN2SQUARED
* nElements
* log(nFPRate
)), MAX_BLOOM_FILTER_SIZE
* 8) / 8),
32 * The ideal number of hash functions is filter size * ln(2) / number of elements
33 * Again, we ignore filter parameters which will create a bloom filter with more hash functions than the protocol limits
34 * See https://en.wikipedia.org/wiki/Bloom_filter for an explanation of these formulas
38 nHashFuncs(min((unsigned int)(vData
.size() * 8 / nElements
* LN2
), MAX_HASH_FUNCS
)),
44 // Private constructor used by CRollingBloomFilter
45 CBloomFilter::CBloomFilter(unsigned int nElements
, double nFPRate
, unsigned int nTweakIn
) :
46 vData((unsigned int)(-1 / LN2SQUARED
* nElements
* log(nFPRate
)) / 8),
49 nHashFuncs((unsigned int)(vData
.size() * 8 / nElements
* LN2
)),
51 nFlags(BLOOM_UPDATE_NONE
)
55 inline unsigned int CBloomFilter::Hash(unsigned int nHashNum
, const std::vector
<unsigned char>& vDataToHash
) const
57 // 0xFBA4C795 chosen as it guarantees a reasonable bit difference between nHashNum values.
58 return MurmurHash3(nHashNum
* 0xFBA4C795 + nTweak
, vDataToHash
) % (vData
.size() * 8);
61 void CBloomFilter::insert(const vector
<unsigned char>& vKey
)
65 for (unsigned int i
= 0; i
< nHashFuncs
; i
++)
67 unsigned int nIndex
= Hash(i
, vKey
);
68 // Sets bit nIndex of vData
69 vData
[nIndex
>> 3] |= (1 << (7 & nIndex
));
74 void CBloomFilter::insert(const COutPoint
& outpoint
)
76 CDataStream
stream(SER_NETWORK
, PROTOCOL_VERSION
);
78 vector
<unsigned char> data(stream
.begin(), stream
.end());
82 void CBloomFilter::insert(const uint256
& hash
)
84 vector
<unsigned char> data(hash
.begin(), hash
.end());
88 bool CBloomFilter::contains(const vector
<unsigned char>& vKey
) const
94 for (unsigned int i
= 0; i
< nHashFuncs
; i
++)
96 unsigned int nIndex
= Hash(i
, vKey
);
97 // Checks bit nIndex of vData
98 if (!(vData
[nIndex
>> 3] & (1 << (7 & nIndex
))))
104 bool CBloomFilter::contains(const COutPoint
& outpoint
) const
106 CDataStream
stream(SER_NETWORK
, PROTOCOL_VERSION
);
108 vector
<unsigned char> data(stream
.begin(), stream
.end());
109 return contains(data
);
112 bool CBloomFilter::contains(const uint256
& hash
) const
114 vector
<unsigned char> data(hash
.begin(), hash
.end());
115 return contains(data
);
118 void CBloomFilter::clear()
120 vData
.assign(vData
.size(),0);
125 void CBloomFilter::reset(unsigned int nNewTweak
)
131 bool CBloomFilter::IsWithinSizeConstraints() const
133 return vData
.size() <= MAX_BLOOM_FILTER_SIZE
&& nHashFuncs
<= MAX_HASH_FUNCS
;
136 bool CBloomFilter::IsRelevantAndUpdate(const CTransaction
& tx
)
139 // Match if the filter contains the hash of tx
140 // for finding tx when they appear in a block
145 const uint256
& hash
= tx
.GetHash();
149 for (unsigned int i
= 0; i
< tx
.vout
.size(); i
++)
151 const CTxOut
& txout
= tx
.vout
[i
];
152 // Match if the filter contains any arbitrary script data element in any scriptPubKey in tx
153 // If this matches, also add the specific output that was matched.
154 // This means clients don't have to update the filter themselves when a new relevant tx
155 // is discovered in order to find spending transactions, which avoids round-tripping and race conditions.
156 CScript::const_iterator pc
= txout
.scriptPubKey
.begin();
157 vector
<unsigned char> data
;
158 while (pc
< txout
.scriptPubKey
.end())
161 if (!txout
.scriptPubKey
.GetOp(pc
, opcode
, data
))
163 if (data
.size() != 0 && contains(data
))
166 if ((nFlags
& BLOOM_UPDATE_MASK
) == BLOOM_UPDATE_ALL
)
167 insert(COutPoint(hash
, i
));
168 else if ((nFlags
& BLOOM_UPDATE_MASK
) == BLOOM_UPDATE_P2PUBKEY_ONLY
)
171 vector
<vector
<unsigned char> > vSolutions
;
172 if (Solver(txout
.scriptPubKey
, type
, vSolutions
) &&
173 (type
== TX_PUBKEY
|| type
== TX_MULTISIG
))
174 insert(COutPoint(hash
, i
));
184 BOOST_FOREACH(const CTxIn
& txin
, tx
.vin
)
186 // Match if the filter contains an outpoint tx spends
187 if (contains(txin
.prevout
))
190 // Match if the filter contains any arbitrary script data element in any scriptSig in tx
191 CScript::const_iterator pc
= txin
.scriptSig
.begin();
192 vector
<unsigned char> data
;
193 while (pc
< txin
.scriptSig
.end())
196 if (!txin
.scriptSig
.GetOp(pc
, opcode
, data
))
198 if (data
.size() != 0 && contains(data
))
206 void CBloomFilter::UpdateEmptyFull()
210 for (unsigned int i
= 0; i
< vData
.size(); i
++)
212 full
&= vData
[i
] == 0xff;
213 empty
&= vData
[i
] == 0;
219 CRollingBloomFilter::CRollingBloomFilter(unsigned int nElements
, double fpRate
)
221 double logFpRate
= log(fpRate
);
222 /* The optimal number of hash functions is log(fpRate) / log(0.5), but
223 * restrict it to the range 1-50. */
224 nHashFuncs
= std::max(1, std::min((int)round(logFpRate
/ log(0.5)), 50));
225 /* In this rolling bloom filter, we'll store between 2 and 3 generations of nElements / 2 entries. */
226 nEntriesPerGeneration
= (nElements
+ 1) / 2;
227 uint32_t nMaxElements
= nEntriesPerGeneration
* 3;
228 /* The maximum fpRate = pow(1.0 - exp(-nHashFuncs * nMaxElements / nFilterBits), nHashFuncs)
229 * => pow(fpRate, 1.0 / nHashFuncs) = 1.0 - exp(-nHashFuncs * nMaxElements / nFilterBits)
230 * => 1.0 - pow(fpRate, 1.0 / nHashFuncs) = exp(-nHashFuncs * nMaxElements / nFilterBits)
231 * => log(1.0 - pow(fpRate, 1.0 / nHashFuncs)) = -nHashFuncs * nMaxElements / nFilterBits
232 * => nFilterBits = -nHashFuncs * nMaxElements / log(1.0 - pow(fpRate, 1.0 / nHashFuncs))
233 * => nFilterBits = -nHashFuncs * nMaxElements / log(1.0 - exp(logFpRate / nHashFuncs))
235 uint32_t nFilterBits
= (uint32_t)ceil(-1.0 * nHashFuncs
* nMaxElements
/ log(1.0 - exp(logFpRate
/ nHashFuncs
)));
237 /* For each data element we need to store 2 bits. If both bits are 0, the
238 * bit is treated as unset. If the bits are (01), (10), or (11), the bit is
239 * treated as set in generation 1, 2, or 3 respectively.
240 * These bits are stored in separate integers: position P corresponds to bit
241 * (P & 63) of the integers data[(P >> 6) * 2] and data[(P >> 6) * 2 + 1]. */
242 data
.resize(((nFilterBits
+ 63) / 64) << 1);
246 /* Similar to CBloomFilter::Hash */
247 static inline uint32_t RollingBloomHash(unsigned int nHashNum
, uint32_t nTweak
, const std::vector
<unsigned char>& vDataToHash
) {
248 return MurmurHash3(nHashNum
* 0xFBA4C795 + nTweak
, vDataToHash
);
251 void CRollingBloomFilter::insert(const std::vector
<unsigned char>& vKey
)
253 if (nEntriesThisGeneration
== nEntriesPerGeneration
) {
254 nEntriesThisGeneration
= 0;
256 if (nGeneration
== 4) {
259 uint64_t nGenerationMask1
= -(uint64_t)(nGeneration
& 1);
260 uint64_t nGenerationMask2
= -(uint64_t)(nGeneration
>> 1);
261 /* Wipe old entries that used this generation number. */
262 for (uint32_t p
= 0; p
< data
.size(); p
+= 2) {
263 uint64_t p1
= data
[p
], p2
= data
[p
+ 1];
264 uint64_t mask
= (p1
^ nGenerationMask1
) | (p2
^ nGenerationMask2
);
266 data
[p
+ 1] = p2
& mask
;
269 nEntriesThisGeneration
++;
271 for (int n
= 0; n
< nHashFuncs
; n
++) {
272 uint32_t h
= RollingBloomHash(n
, nTweak
, vKey
);
274 uint32_t pos
= (h
>> 6) % data
.size();
275 /* The lowest bit of pos is ignored, and set to zero for the first bit, and to one for the second. */
276 data
[pos
& ~1] = (data
[pos
& ~1] & ~(((uint64_t)1) << bit
)) | ((uint64_t)(nGeneration
& 1)) << bit
;
277 data
[pos
| 1] = (data
[pos
| 1] & ~(((uint64_t)1) << bit
)) | ((uint64_t)(nGeneration
>> 1)) << bit
;
281 void CRollingBloomFilter::insert(const uint256
& hash
)
283 vector
<unsigned char> vData(hash
.begin(), hash
.end());
287 bool CRollingBloomFilter::contains(const std::vector
<unsigned char>& vKey
) const
289 for (int n
= 0; n
< nHashFuncs
; n
++) {
290 uint32_t h
= RollingBloomHash(n
, nTweak
, vKey
);
292 uint32_t pos
= (h
>> 6) % data
.size();
293 /* If the relevant bit is not set in either data[pos & ~1] or data[pos | 1], the filter does not contain vKey */
294 if (!(((data
[pos
& ~1] | data
[pos
| 1]) >> bit
) & 1)) {
301 bool CRollingBloomFilter::contains(const uint256
& hash
) const
303 vector
<unsigned char> vData(hash
.begin(), hash
.end());
304 return contains(vData
);
307 void CRollingBloomFilter::reset()
309 nTweak
= GetRand(std::numeric_limits
<unsigned int>::max());
310 nEntriesThisGeneration
= 0;
312 for (std::vector
<uint64_t>::iterator it
= data
.begin(); it
!= data
.end(); it
++) {