2 * @brief OmegaScript $sort function
4 /* Copyright (C) 2001,2004,2012,2016,2018 Olly Betts
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License as
8 * published by the Free Software Foundation; either version 2 of the
9 * License, or (at your option) any later version.
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
26 // For option["decimal"].
28 #include "stringutils.h"
36 // Return -1, 0 or 1 depending on sign of floating point number starting at
37 // index i of string s. Non-numbers give 0.
39 string_sign(const string
& s
, const string
& decimal
, size_t i
= 0)
49 if (s
.compare(i
, decimal
.size(), decimal
) == 0) {
55 return C_isdigit(s
[i
]) ? r
: 0;
58 // Compare strings of two non-zero floating point numbers, without loss of
59 // precision, and ignoring their signs.
61 // The format handled is (as a Perl regex):
63 // ^[ \t]*(-?[0-9]*($decimal[0-9]*)?)
65 // where $decimal is option["decimal"].
67 // Values which are equal by the numeric test are compared as strings.
69 ncmp(const string
& a
, const string
& b
, const string
& decimal
)
71 // Ignore leading blanks, an optional minus sign, and leading zeros.
72 size_t ai
= a
.find_first_not_of(" \t");
73 if (a
[ai
] == '-') ++ai
;
74 while (a
[ai
] == '0') ++ai
;
76 size_t bi
= b
.find_first_not_of(" \t");
77 if (b
[bi
] == '-') ++bi
;
78 while (b
[bi
] == '0') ++bi
;
80 while (a
[ai
] == b
[bi
] && C_isdigit(a
[ai
])) {
86 if (C_isdigit(a
[ai
])) {
87 if (!C_isdigit(b
[bi
])) {
92 int r
= a
[ai
] - b
[bi
];
93 bool a_digit
, b_digit
;
97 a_digit
= C_isdigit(a
[ai
]);
98 b_digit
= C_isdigit(b
[bi
]);
99 } while (a_digit
&& b_digit
);
101 if (!a_digit
&& !b_digit
) {
102 // Same number of digits, but not the same digits.
105 return a_digit
? 1 : -1;
108 if (C_isdigit(b
[bi
])) {
113 // Non-fractional parts are the same - compare any fractional parts.
114 bool a_frac
= (a
.compare(ai
, decimal
.size(), decimal
) == 0);
115 if (a_frac
) ai
+= decimal
.size();
116 bool b_frac
= (b
.compare(bi
, decimal
.size(), decimal
) == 0);
117 if (b_frac
) bi
+= decimal
.size();
118 if (!a_frac
&& !b_frac
) return 0;
120 // Check if a's fractional part is zero.
121 while (a
[ai
] == '0') ++ai
;
122 return C_isdigit(a
[ai
]) ? 1 : 0;
125 // Check if b's fractional part is zero.
126 while (b
[bi
] == '0') ++bi
;
127 return C_isdigit(b
[bi
]) ? -1 : 0;
130 // Both have fractional parts, so compare.
131 while (a
[ai
] == b
[bi
] && C_isdigit(a
[ai
])) {
135 if (C_isdigit(a
[ai
])) return 1;
136 if (C_isdigit(b
[bi
])) return -1;
141 natcmp(const string
& a
, const string
& b
)
143 size_t shorter
= min(a
.size(), b
.size());
145 while (i
!= shorter
) {
146 int cha
= static_cast<unsigned char>(a
[i
]);
147 int chb
= static_cast<unsigned char>(b
[i
]);
148 if (!C_isdigit(cha
)) {
150 // Matching non-digits.
155 if (C_isdigit(chb
)) return 1;
159 // Sort embedded digit spans by numeric value and before non-digits.
160 if (!C_isdigit(chb
)) return -1;
162 // Skip any leading zeros on each.
164 while (a
[sa
] == '0') ++sa
;
166 while (b
[sb
] == '0') ++sb
;
172 if (!C_isdigit(a
[ea
])) {
173 if (C_isdigit(b
[eb
])) {
174 // Number in b is longer and so larger.
177 // Digit sequences are the same length.
180 if (a
[ea
] != b
[eb
]) {
181 if (!C_isdigit(b
[eb
])) {
182 // Number in a is longer and so larger.
185 // Record first difference between digits.
186 if (res
== 0) res
= a
[ea
] - b
[eb
];
193 // More leading zeros sorts first.
194 res
= int(sb
) - int(sa
);
198 // Digit sequences were identical, so keep going.
201 return int(a
.size()) - int(b
.size());
205 omegascript_sort(const vector
<string
>& args
,
208 const string
&list
= args
[0];
209 if (list
.empty()) return;
211 // 0 => string sort, '#' => natural, 'n' => numeric.
215 if (args
.size() > 1) {
216 for (auto opt_ch
: args
[1]) {
221 string m
= "Invalid $sort option combination: ";
234 throw string("Unknown $sort option: ") + opt_ch
;
238 vector
<string
> items
;
239 string::size_type split
= 0, split2
;
241 split2
= list
.find('\t', split
);
242 items
.emplace_back(list
, split
, split2
- split
);
243 split
= UNSIGNED_OVERFLOW_OK(split2
+ 1);
244 } while (split2
!= string::npos
);
250 sort(items
.begin(), items
.end());
252 sort(items
.begin(), items
.end(),
253 [](const string
& a
, const string
& b
) {
258 // "Natural" sort - embedded natural numbers are handled specially.
260 sort(items
.begin(), items
.end(),
261 [](const string
& a
, const string
& b
) {
262 return natcmp(a
, b
) < 0;
265 sort(items
.begin(), items
.end(),
266 [](const string
& a
, const string
& b
) {
267 return natcmp(a
, b
) > 0;
272 value
.reserve(list
.size());
274 const string
* prev
= nullptr;
275 for (auto&& item
: items
) {
276 // Skip duplicates if "u" flag specified.
277 if (prev
&& *prev
== item
) {
296 // We first partition items such that all the negative values come first,
297 // then all the zero values, then all the positive values (for a forward
298 // search that is - for a reverse search the order of the partitions is
301 // Then we sort within each partition.
302 const string
& decimal
= option
["decimal"];
304 size_t part_f
= items
.size();
306 int first
= rev
? 1 : -1;
308 // Scan linearly through items, swapping to create the desired
312 int sign
= string_sign(items
[i
], decimal
);
314 // Swap value to end of the first partition.
316 swap(items
[part_z
], items
[i
]);
319 } else if (sign
== 0) {
320 // Extend middle partition to include zero value.
323 // Swap value to start of final partition.
324 swap(items
[i
], items
[--part_f
]);
328 // Need to preserve order within each partition.
329 auto z
= stable_partition(items
.begin(), items
.end(),
330 [&decimal
, first
](const string
& a
) {
331 return string_sign(a
, decimal
) == first
;
333 part_z
= z
- items
.begin();
334 auto f
= stable_partition(z
, items
.end(),
335 [&decimal
](const string
& a
) {
336 return string_sign(a
, decimal
) == 0;
338 part_f
= f
- items
.begin();
341 value
.reserve(list
.size());
344 const string
* prev
= nullptr;
346 // Sort the first partition.
349 sort(items
.begin(), items
.begin() + part_z
,
350 [&decimal
](const string
& a
, const string
& b
) {
351 int r
= ncmp(a
, b
, decimal
);
352 return r
? r
> 0 : a
< b
;
355 sort(items
.begin(), items
.begin() + part_z
,
356 [&decimal
](const string
& a
, const string
& b
) {
357 int r
= ncmp(a
, b
, decimal
);
358 return r
? r
> 0 : a
> b
;
362 stable_sort(items
.begin(), items
.begin() + part_z
,
363 [&decimal
](const string
& a
, const string
& b
) {
364 return ncmp(a
, b
, decimal
) > 0;
367 for (size_t i
= 0; i
!= part_z
; ++i
) {
368 const string
& item
= items
[i
];
370 if (prev
&& ncmp(*prev
, item
, decimal
) == 0) {
385 if (part_z
!= part_f
) {
386 // Handle all the zero values.
389 sort(items
.begin() + part_z
, items
.begin() + part_f
,
390 [](const string
& a
, const string
& b
) {
394 sort(items
.begin() + part_z
, items
.begin() + part_f
,
395 [](const string
& a
, const string
& b
) {
405 // No need to actually sort this partition when uniq is true -
406 // we just take the first element.
407 value
+= items
[part_z
];
411 if (part_f
!= items
.size()) {
412 // Sort the final partition.
415 sort(items
.begin() + part_f
, items
.end(),
416 [&decimal
](const string
& a
, const string
& b
) {
417 int r
= ncmp(a
, b
, decimal
);
418 return r
? r
< 0 : a
< b
;
421 sort(items
.begin() + part_f
, items
.end(),
422 [&decimal
](const string
& a
, const string
& b
) {
423 int r
= ncmp(a
, b
, decimal
);
424 return r
? r
< 0 : a
> b
;
428 stable_sort(items
.begin() + part_f
, items
.end(),
429 [&decimal
](const string
& a
, const string
& b
) {
430 return ncmp(a
, b
, decimal
) < 0;
433 for (size_t i
= part_f
; i
!= items
.size(); ++i
) {
434 const string
& item
= items
[i
];
436 if (prev
&& ncmp(*prev
, item
, decimal
) == 0) {
451 // If uniq is true we already assembled the output in value as we processed
454 for (auto&& item
: items
) {