2 * @brief Serialise floating point values to string which sort the same way.
4 /* Copyright (C) 2007,2009,2015,2016 Olly Betts
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
23 #include <xapian/queryparser.h>
25 // Disable these assertions when building the library as these functions are
26 // marked not to throw exceptions in the API headers. In unittest we define
27 // UNITTEST_ASSERT_NOTHROW to set a variable to the exception message and
28 // return, then the harness checks if that variable has been set.
29 #ifndef XAPIAN_UNITTEST
30 # define UNITTEST_ASSERT_NOTHROW(COND,RET)
42 # error Code currently assumes FLT_RADIX == 2
46 // Disable warning about negating an unsigned type, which we do deliberately.
47 # pragma warning(disable:4146)
51 Xapian::sortable_serialise_(double value
, char * buf
) XAPIAN_NOEXCEPT
57 if (value
< -DBL_MAX
) return 0;
59 mantissa
= frexp(value
, &exponent
);
61 /* Deal with zero specially.
63 * IEEE representation of doubles uses 11 bits for the exponent, with a
64 * bias of 1023. We bias this by subtracting 8, and non-IEEE
65 * representations may allow higher exponents, so allow exponents down to
66 * -2039 - if smaller exponents are possible anywhere, we underflow such
69 if (mantissa
== 0.0 || exponent
< -2039) {
74 bool negative
= (mantissa
< 0);
75 if (negative
) mantissa
= -mantissa
;
77 // Infinity, or extremely large non-IEEE representation.
78 if (value
> DBL_MAX
|| exponent
> 2055) {
80 // This can only happen with a non-IEEE representation, because
81 // we've already tested for value < -DBL_MAX
84 memset(buf
, '\xff', 9);
91 // [ 7 | 6 | 5 | 4 3 2 1 0]
94 // Sm stores the sign of the mantissa: 1 = positive or zero, 0 = negative.
95 // Se stores the sign of the exponent: Sm for positive/zero, !Sm for neg.
96 // Le stores the length of the exponent: !Se for 3 bits, Se for 11 bits.
97 unsigned char next
= (negative
? 0 : 0xe0);
99 // Bias the exponent by 8 so that more small integers get short encodings.
101 bool exponent_negative
= (exponent
< 0);
102 if (exponent_negative
) {
103 exponent
= -exponent
;
109 /* We store the exponent in 3 or 11 bits. If the number is negative, we
110 * flip all the bits of the exponent, since larger negative numbers should
113 * If the exponent is negative, we flip the bits of the exponent, since
114 * larger negative exponents should sort first (unless the number is
115 * negative, in which case they should sort later).
117 UNITTEST_ASSERT_NOTHROW(exponent
>= 0, 0);
120 next
|= static_cast<unsigned char>(exponent
<< 2);
121 if (negative
^ exponent_negative
) next
^= 0x1c;
123 UNITTEST_ASSERT_NOTHROW((exponent
>> 11) == 0, 0);
124 // Put the top 5 bits of the exponent into the lower 5 bits of the
126 next
|= static_cast<unsigned char>(exponent
>> 6);
127 if (negative
^ exponent_negative
) next
^= 0x1f;
129 // And the lower 6 bits of the exponent go into the upper 6 bits
130 // of the second byte:
131 next
= static_cast<unsigned char>(exponent
<< 2);
132 if (negative
^ exponent_negative
) next
^= 0xfc;
135 // Convert the 52 (or 53) bits of the mantissa into two 32-bit words.
136 mantissa
*= 1 << (negative
? 26 : 27);
137 unsigned word1
= static_cast<unsigned>(mantissa
);
139 unsigned word2
= static_cast<unsigned>(mantissa
* 4294967296.0); // 1<<32
140 // If the number is positive, the first bit will always be set because 0.5
141 // <= mantissa < 1, unless mantissa is zero, which we handle specially
142 // above). If the number is negative, we negate the mantissa instead of
143 // flipping all the bits, so in the case of 0.5, the first bit isn't set
144 // so we need to store it explicitly. But for the cost of one extra
145 // leading bit, we can save several trailing 0xff bytes in lots of common
147 UNITTEST_ASSERT_NOTHROW(negative
|| (word1
& (1<<26)), 0);
149 // We negate the mantissa for negative numbers, so that the sort order
150 // is reversed (since larger negative numbers should come first).
152 if (word2
!= 0) ++word1
;
157 next
|= static_cast<unsigned char>(word1
>> 24);
159 buf
[len
++] = char(word1
>> 16);
160 buf
[len
++] = char(word1
>> 8);
161 buf
[len
++] = char(word1
);
163 buf
[len
++] = char(word2
>> 24);
164 buf
[len
++] = char(word2
>> 16);
165 buf
[len
++] = char(word2
>> 8);
166 buf
[len
++] = char(word2
);
168 // Finally, we can chop off any trailing zero bytes.
169 while (len
> 0 && buf
[len
- 1] == '\0') {
176 /// Get a number from the character at a given position in a string, returning
177 /// 0 if the string isn't long enough.
178 static inline unsigned char
179 numfromstr(const std::string
& str
, std::string::size_type pos
)
181 return (pos
< str
.size()) ? static_cast<unsigned char>(str
[pos
]) : '\0';
185 Xapian::sortable_unserialise(const std::string
& value
) XAPIAN_NOEXCEPT
188 if (value
.size() == 1 && value
[0] == '\x80') return 0.0;
190 // Positive infinity.
191 if (value
.size() == 9 &&
192 memcmp(value
.data(), "\xff\xff\xff\xff\xff\xff\xff\xff\xff", 9) == 0) {
194 // INFINITY is C99. Oddly, it's of type "float" so sanity check in
195 // case it doesn't cast to double as infinity (apparently some
196 // implementations have this problem).
197 if (double(INFINITY
) > HUGE_VAL
) return INFINITY
;
202 // Negative infinity.
205 if (double(INFINITY
) > HUGE_VAL
) return -INFINITY
;
210 unsigned char first
= numfromstr(value
, 0);
213 first
^= static_cast<unsigned char>(first
& 0xc0) >> 1;
214 bool negative
= !(first
& 0x80);
215 bool exponent_negative
= (first
& 0x40);
216 bool explen
= !(first
& 0x20);
217 int exponent
= first
& 0x1f;
220 if (negative
^ exponent_negative
) exponent
^= 0x07;
222 first
= numfromstr(value
, ++i
);
224 exponent
|= (first
>> 2);
225 if (negative
^ exponent_negative
) exponent
^= 0x07ff;
229 word1
= (unsigned(first
& 0x03) << 24);
230 word1
|= numfromstr(value
, ++i
) << 16;
231 word1
|= numfromstr(value
, ++i
) << 8;
232 word1
|= numfromstr(value
, ++i
);
235 if (i
< value
.size()) {
236 word2
= numfromstr(value
, ++i
) << 24;
237 word2
|= numfromstr(value
, ++i
) << 16;
238 word2
|= numfromstr(value
, ++i
) << 8;
239 word2
|= numfromstr(value
, ++i
);
244 if (word2
!= 0) ++word1
;
246 UNITTEST_ASSERT_NOTHROW((word1
& 0xf0000000) != 0, 0);
249 if (!negative
) word1
|= 1<<26;
252 if (word2
) mantissa
= word2
/ 4294967296.0; // 1<<32
254 mantissa
/= 1 << (negative
? 26 : 27);
256 if (exponent_negative
) exponent
= -exponent
;
259 if (negative
) mantissa
= -mantissa
;
261 // We use scalbn() since it's equivalent to ldexp() when FLT_RADIX == 2
262 // (which we currently assume), except that ldexp() will set errno if the
263 // result overflows or underflows, which isn't really desirable here.
264 return scalbn(mantissa
, exponent
);