1 // Formatting library for C++ - scanning API proof of concept
3 // Copyright (c) 2019 - present, Victor Zverovich
4 // All rights reserved.
6 // For the license information refer to format.h.
13 #include "fmt/format-inl.h"
18 inline auto is_whitespace(char c
) -> bool { return c
== ' ' || c
== '\n'; }
20 // If c is a hex digit returns its numeric value, otherwise -1.
21 inline auto to_hex_digit(char c
) -> int {
22 if (c
>= '0' && c
<= '9') return c
- '0';
23 if (c
>= 'a' && c
<= 'f') return c
- 'a' + 10;
24 if (c
>= 'A' && c
<= 'F') return c
- 'A' + 10;
28 struct maybe_contiguous_range
{
32 explicit operator bool() const { return begin
!= nullptr; }
42 scan_buffer(const char* ptr
, const char* end
, bool contiguous
)
43 : ptr_(ptr
), end_(end
), contiguous_(contiguous
) {}
44 ~scan_buffer() = default;
46 void set(span
<const char> buf
) {
48 end_
= buf
.data
+ buf
.size
;
51 auto ptr() const -> const char* { return ptr_
; }
54 scan_buffer(const scan_buffer
&) = delete;
55 void operator=(const scan_buffer
&) = delete;
57 // Fills the buffer with more input if available.
58 virtual void consume() = 0;
65 scan_buffer
* buf_
; // This could be merged with ptr_.
68 static auto get_sentinel() -> const char** {
69 static const char* ptr
= nullptr;
73 friend class scan_buffer
;
75 friend auto operator==(iterator lhs
, sentinel
) -> bool {
76 return *lhs
.ptr_
== nullptr;
78 friend auto operator!=(iterator lhs
, sentinel
) -> bool {
79 return *lhs
.ptr_
!= nullptr;
82 iterator(scan_buffer
* buf
) : buf_(buf
) {
83 if (buf
->ptr_
== buf
->end_
) {
84 ptr_
= get_sentinel();
91 friend scan_buffer
& get_buffer(iterator it
) { return *it
.buf_
; }
94 iterator() : ptr_(get_sentinel()), buf_(nullptr) {}
96 auto operator++() -> iterator
& {
97 if (!buf_
->try_consume()) ptr_
= get_sentinel();
101 auto operator++(int) -> iterator
{
102 iterator copy
= *this;
106 auto operator*() const -> char { return value_
; }
108 auto base() const -> const char* { return buf_
->ptr_
; }
110 friend auto to_contiguous(iterator it
) -> maybe_contiguous_range
;
111 friend auto advance(iterator it
, size_t n
) -> iterator
;
114 friend auto to_contiguous(iterator it
) -> maybe_contiguous_range
{
115 if (it
.buf_
->is_contiguous()) return {it
.buf_
->ptr_
, it
.buf_
->end_
};
116 return {nullptr, nullptr};
118 friend auto advance(iterator it
, size_t n
) -> iterator
{
119 FMT_ASSERT(it
.buf_
->is_contiguous(), "");
120 const char*& ptr
= it
.buf_
->ptr_
;
123 if (ptr
== it
.buf_
->end_
) it
.ptr_
= iterator::get_sentinel();
127 auto begin() -> iterator
{ return this; }
128 auto end() -> sentinel
{ return {}; }
130 auto is_contiguous() const -> bool { return contiguous_
; }
132 // Tries consuming a single code unit. Returns true iff there is more input.
133 auto try_consume() -> bool {
134 FMT_ASSERT(ptr_
!= end_
, "");
136 if (ptr_
!= end_
) return true;
142 using scan_iterator
= scan_buffer::iterator
;
143 using scan_sentinel
= scan_buffer::sentinel
;
145 class string_scan_buffer final
: public scan_buffer
{
147 void consume() override
{}
150 explicit string_scan_buffer(string_view s
)
151 : scan_buffer(s
.begin(), s
.end(), true) {}
154 class file_scan_buffer final
: public scan_buffer
{
156 template <typename F
, FMT_ENABLE_IF(sizeof(F::_IO_read_ptr
) != 0 &&
157 !FMT_USE_FALLBACK_FILE
)>
158 static auto get_file(F
* f
, int) -> glibc_file
<F
> {
161 template <typename F
,
162 FMT_ENABLE_IF(sizeof(F::_p
) != 0 && !FMT_USE_FALLBACK_FILE
)>
163 static auto get_file(F
* f
, int) -> apple_file
<F
> {
166 static auto get_file(FILE* f
, ...) -> fallback_file
<FILE> { return f
; }
168 decltype(get_file(static_cast<FILE*>(nullptr), 0)) file_
;
170 // Fills the buffer if it is empty.
172 span
<const char> buf
= file_
.get_read_buffer();
175 // Put the character back since we are only filling the buffer.
176 if (c
!= EOF
) file_
.unget(static_cast<char>(c
));
177 buf
= file_
.get_read_buffer();
182 void consume() override
{
183 // Consume the current buffer content.
184 size_t n
= to_unsigned(ptr() - file_
.get_read_buffer().data
);
185 for (size_t i
= 0; i
!= n
; ++i
) file_
.get();
190 explicit file_scan_buffer(FILE* f
)
191 : scan_buffer(nullptr, nullptr, false), file_(f
) {
195 ~file_scan_buffer() {
200 } // namespace detail
202 template <typename T
, typename Char
= char> struct scanner
{
203 // A deleted default constructor indicates a disabled scanner.
207 class scan_parse_context
{
212 using iterator
= string_view::iterator
;
214 explicit FMT_CONSTEXPR
scan_parse_context(string_view format
)
217 FMT_CONSTEXPR
auto begin() const -> iterator
{ return format_
.begin(); }
218 FMT_CONSTEXPR
auto end() const -> iterator
{ return format_
.end(); }
220 void advance_to(iterator it
) {
221 format_
.remove_prefix(detail::to_unsigned(it
- begin()));
226 enum class scan_type
{
237 template <typename Context
> struct custom_scan_arg
{
239 void (*scan
)(void* arg
, scan_parse_context
& parse_ctx
, Context
& ctx
);
241 } // namespace detail
243 // A scan argument. Context is a template parameter for the compiled API where
244 // output can be unbuffered.
245 template <typename Context
> class basic_scan_arg
{
247 using scan_type
= detail::scan_type
;
251 unsigned* uint_value_
;
252 long long* long_long_value_
;
253 unsigned long long* ulong_long_value_
;
254 std::string
* string_
;
255 string_view
* string_view_
;
256 detail::custom_scan_arg
<Context
> custom_
;
260 template <typename T
>
261 static void scan_custom_arg(void* arg
, scan_parse_context
& parse_ctx
,
263 auto s
= scanner
<T
>();
264 parse_ctx
.advance_to(s
.parse(parse_ctx
));
265 ctx
.advance_to(s
.scan(*static_cast<T
*>(arg
), ctx
));
269 FMT_CONSTEXPR
basic_scan_arg()
270 : type_(scan_type::none_type
), int_value_(nullptr) {}
271 FMT_CONSTEXPR
basic_scan_arg(int& value
)
272 : type_(scan_type::int_type
), int_value_(&value
) {}
273 FMT_CONSTEXPR
basic_scan_arg(unsigned& value
)
274 : type_(scan_type::uint_type
), uint_value_(&value
) {}
275 FMT_CONSTEXPR
basic_scan_arg(long long& value
)
276 : type_(scan_type::long_long_type
), long_long_value_(&value
) {}
277 FMT_CONSTEXPR
basic_scan_arg(unsigned long long& value
)
278 : type_(scan_type::ulong_long_type
), ulong_long_value_(&value
) {}
279 FMT_CONSTEXPR
basic_scan_arg(std::string
& value
)
280 : type_(scan_type::string_type
), string_(&value
) {}
281 FMT_CONSTEXPR
basic_scan_arg(string_view
& value
)
282 : type_(scan_type::string_view_type
), string_view_(&value
) {}
283 template <typename T
>
284 FMT_CONSTEXPR
basic_scan_arg(T
& value
) : type_(scan_type::custom_type
) {
285 custom_
.value
= &value
;
286 custom_
.scan
= scan_custom_arg
<T
>;
289 constexpr explicit operator bool() const noexcept
{
290 return type_
!= scan_type::none_type
;
293 auto type() const -> detail::scan_type
{ return type_
; }
295 template <typename Visitor
>
296 auto visit(Visitor
&& vis
) -> decltype(vis(monostate())) {
298 case scan_type::none_type
:
300 case scan_type::int_type
:
301 return vis(*int_value_
);
302 case scan_type::uint_type
:
303 return vis(*uint_value_
);
304 case scan_type::long_long_type
:
305 return vis(*long_long_value_
);
306 case scan_type::ulong_long_type
:
307 return vis(*ulong_long_value_
);
308 case scan_type::string_type
:
309 return vis(*string_
);
310 case scan_type::string_view_type
:
311 return vis(*string_view_
);
312 case scan_type::custom_type
:
315 return vis(monostate());
318 auto scan_custom(const char* parse_begin
, scan_parse_context
& parse_ctx
,
319 Context
& ctx
) const -> bool {
320 if (type_
!= scan_type::custom_type
) return false;
321 parse_ctx
.advance_to(parse_begin
);
322 custom_
.scan(custom_
.value
, parse_ctx
, ctx
);
328 using scan_arg
= basic_scan_arg
<scan_context
>;
332 const scan_arg
* data
;
335 FMT_CONSTEXPR
scan_args(const std::array
<scan_arg
, N
>& store
)
336 : size(N
), data(store
.data()) {
337 static_assert(N
< INT_MAX
, "too many arguments");
343 detail::scan_buffer
& buf_
;
347 using iterator
= detail::scan_iterator
;
348 using sentinel
= detail::scan_sentinel
;
350 explicit FMT_CONSTEXPR
scan_context(detail::scan_buffer
& buf
, scan_args args
)
351 : buf_(buf
), args_(args
) {}
353 FMT_CONSTEXPR
auto arg(int id
) const -> scan_arg
{
354 return id
< args_
.size
? args_
.data
[id
] : scan_arg();
357 auto begin() const -> iterator
{ return buf_
.begin(); }
358 auto end() const -> sentinel
{ return {}; }
360 void advance_to(iterator
) { buf_
.consume(); }
365 const char* parse_scan_specs(const char* begin
, const char* end
,
366 format_specs
& specs
, scan_type
) {
367 while (begin
!= end
) {
368 switch (to_ascii(*begin
)) {
369 // TODO: parse more scan format specifiers
371 specs
.type
= presentation_type::hex
;
381 template <typename T
, FMT_ENABLE_IF(std::is_unsigned
<T
>::value
)>
382 auto read(scan_iterator it
, T
& value
) -> scan_iterator
{
383 if (it
== scan_sentinel()) return it
;
385 if (c
< '0' || c
> '9') report_error("invalid input");
392 n
= n
* 10 + static_cast<unsigned>(c
- '0');
396 if (c
< '0' || c
> '9') break;
397 } while (it
!= scan_sentinel());
400 if (num_digits
<= std::numeric_limits
<int>::digits10
) {
404 unsigned max
= to_unsigned((std::numeric_limits
<int>::max
)());
405 if (num_digits
== std::numeric_limits
<int>::digits10
+ 1 &&
406 prev
* 10ull + unsigned(prev_digit
- '0') <= max
) {
409 report_error("number is too big");
414 template <typename T
, FMT_ENABLE_IF(std::is_unsigned
<T
>::value
)>
415 auto read_hex(scan_iterator it
, T
& value
) -> scan_iterator
{
416 if (it
== scan_sentinel()) return it
;
417 int digit
= to_hex_digit(*it
);
418 if (digit
< 0) report_error("invalid input");
423 n
= (n
<< 4) + static_cast<unsigned>(digit
);
425 digit
= to_hex_digit(*++it
);
426 if (digit
< 0) break;
427 } while (it
!= scan_sentinel());
430 if (num_digits
<= (std::numeric_limits
<T
>::digits
>> 2))
433 report_error("number is too big");
437 template <typename T
, FMT_ENABLE_IF(std::is_unsigned
<T
>::value
)>
438 auto read(scan_iterator it
, T
& value
, const format_specs
& specs
)
440 if (specs
.type
== presentation_type::hex
) return read_hex(it
, value
);
441 return read(it
, value
);
444 template <typename T
, FMT_ENABLE_IF(std::is_signed
<T
>::value
)>
445 auto read(scan_iterator it
, T
& value
, const format_specs
& specs
= {})
447 bool negative
= it
!= scan_sentinel() && *it
== '-';
450 if (it
== scan_sentinel()) report_error("invalid input");
452 using unsigned_type
= typename
std::make_unsigned
<T
>::type
;
453 unsigned_type abs_value
= 0;
454 it
= read(it
, abs_value
, specs
);
455 auto n
= static_cast<T
>(abs_value
);
456 value
= negative
? -n
: n
;
460 auto read(scan_iterator it
, std::string
& value
, const format_specs
& = {})
462 while (it
!= scan_sentinel() && *it
!= ' ') value
.push_back(*it
++);
466 auto read(scan_iterator it
, string_view
& value
, const format_specs
& = {})
468 auto range
= to_contiguous(it
);
469 // This could also be checked at compile time in scan.
470 if (!range
) report_error("string_view requires contiguous input");
471 auto p
= range
.begin
;
472 while (p
!= range
.end
&& *p
!= ' ') ++p
;
473 size_t size
= to_unsigned(p
- range
.begin
);
474 value
= {range
.begin
, size
};
475 return advance(it
, size
);
478 auto read(scan_iterator it
, monostate
, const format_specs
& = {})
483 // An argument scanner that uses the default format, e.g. decimal for integers.
484 struct default_arg_scanner
{
487 template <typename T
> FMT_INLINE
auto operator()(T
&& value
) -> scan_iterator
{
488 return read(it
, value
);
492 // An argument scanner with format specifiers.
495 const format_specs
& specs
;
497 template <typename T
> auto operator()(T
&& value
) -> scan_iterator
{
498 return read(it
, value
, specs
);
502 struct scan_handler
{
504 scan_parse_context parse_ctx_
;
505 scan_context scan_ctx_
;
508 using sentinel
= scan_buffer::sentinel
;
511 FMT_CONSTEXPR
scan_handler(string_view format
, scan_buffer
& buf
,
513 : parse_ctx_(format
), scan_ctx_(buf
, args
), next_arg_id_(0) {}
515 auto pos() const -> scan_buffer::iterator
{ return scan_ctx_
.begin(); }
517 void on_text(const char* begin
, const char* end
) {
518 if (begin
== end
) return;
519 auto it
= scan_ctx_
.begin();
520 for (; begin
!= end
; ++begin
, ++it
) {
521 if (it
== sentinel() || *begin
!= *it
) on_error("invalid input");
523 scan_ctx_
.advance_to(it
);
526 FMT_CONSTEXPR
auto on_arg_id() -> int { return on_arg_id(next_arg_id_
++); }
527 FMT_CONSTEXPR
auto on_arg_id(int id
) -> int {
528 if (!scan_ctx_
.arg(id
)) on_error("argument index out of range");
531 FMT_CONSTEXPR
auto on_arg_id(string_view id
) -> int {
532 if (id
.data()) on_error("invalid format");
536 void on_replacement_field(int arg_id
, const char* begin
) {
537 scan_arg arg
= scan_ctx_
.arg(arg_id
);
538 if (arg
.scan_custom(begin
, parse_ctx_
, scan_ctx_
)) return;
539 auto it
= scan_ctx_
.begin();
540 while (it
!= sentinel() && is_whitespace(*it
)) ++it
;
541 scan_ctx_
.advance_to(arg
.visit(default_arg_scanner
{it
}));
544 auto on_format_specs(int arg_id
, const char* begin
, const char* end
) -> const
546 scan_arg arg
= scan_ctx_
.arg(arg_id
);
547 if (arg
.scan_custom(begin
, parse_ctx_
, scan_ctx_
))
548 return parse_ctx_
.begin();
549 auto specs
= format_specs();
550 begin
= parse_scan_specs(begin
, end
, specs
, arg
.type());
551 if (begin
== end
|| *begin
!= '}') on_error("missing '}' in format string");
552 scan_ctx_
.advance_to(arg
.visit(arg_scanner
{scan_ctx_
.begin(), specs
}));
556 FMT_NORETURN
void on_error(const char* message
) { report_error(message
); }
559 void vscan(detail::scan_buffer
& buf
, string_view fmt
, scan_args args
) {
560 auto h
= detail::scan_handler(fmt
, buf
, args
);
561 detail::parse_format_string
<false>(fmt
, h
);
564 template <size_t I
, typename
... T
, FMT_ENABLE_IF(I
== sizeof...(T
))>
565 void make_args(std::array
<scan_arg
, sizeof...(T
)>&, std::tuple
<T
...>&) {}
567 template <size_t I
, typename
... T
, FMT_ENABLE_IF(I
< sizeof...(T
))>
568 void make_args(std::array
<scan_arg
, sizeof...(T
)>& args
,
569 std::tuple
<T
...>& values
) {
570 using element_type
= typename
std::tuple_element
<I
, std::tuple
<T
...>>::type
;
571 static_assert(std::is_same
<remove_cvref_t
<element_type
>, element_type
>::value
,
573 args
[I
] = std::get
<I
>(values
);
574 make_args
<I
+ 1>(args
, values
);
576 } // namespace detail
578 template <typename Range
, typename
... T
> class scan_data
{
580 std::tuple
<T
...> values_
;
584 scan_data() = default;
585 scan_data(T
... values
) : values_(std::move(values
)...) {}
587 auto value() const -> decltype(std::get
<0>(values_
)) {
588 return std::get
<0>(values_
);
591 auto values() const -> const std::tuple
<T
...>& { return values_
; }
593 auto make_args() -> std::array
<scan_arg
, sizeof...(T
)> {
594 auto args
= std::array
<scan_arg
, sizeof...(T
)>();
595 detail::make_args
<0>(args
, values_
);
599 auto range() const -> Range
{ return range_
; }
601 auto begin() const -> decltype(range_
.begin()) { return range_
.begin(); }
602 auto end() const -> decltype(range_
.end()) { return range_
.end(); }
605 template <typename
... T
>
606 auto make_scan_args(T
&... args
) -> std::array
<scan_arg
, sizeof...(T
)> {
612 // A rudimentary version of std::expected for testing the API shape.
613 template <typename T
, typename E
> class expected
{
616 bool has_value_
= true;
619 expected(T value
) : value_(std::move(value
)) {}
621 explicit operator bool() const { return has_value_
; }
623 auto operator->() const -> const T
* { return &value_
; }
625 auto error() -> E
const { return E(); }
628 template <typename Range
, typename
... T
>
629 using scan_result
= expected
<scan_data
<Range
, T
...>, scan_error
>;
631 auto vscan(string_view input
, string_view fmt
, scan_args args
)
632 -> string_view::iterator
{
633 auto&& buf
= detail::string_scan_buffer(input
);
634 detail::vscan(buf
, fmt
, args
);
635 return input
.begin() + (buf
.begin().base() - input
.data());
638 // Scans the input and stores the results (in)to args.
639 template <typename
... T
>
640 auto scan_to(string_view input
, string_view fmt
, T
&... args
)
641 -> string_view::iterator
{
642 return vscan(input
, fmt
, make_scan_args(args
...));
645 template <typename
... T
>
646 auto scan(string_view input
, string_view fmt
)
647 -> scan_result
<string_view
, T
...> {
648 auto data
= scan_data
<string_view
, T
...>();
649 vscan(input
, fmt
, data
.make_args());
653 template <typename Range
, typename
... T
,
654 FMT_ENABLE_IF(!std::is_convertible
<Range
, string_view
>::value
)>
655 auto scan_to(Range
&& input
, string_view fmt
, T
&... args
)
656 -> decltype(std::begin(input
)) {
657 auto it
= std::begin(input
);
658 detail::vscan(get_buffer(it
), fmt
, make_scan_args(args
...));
662 template <typename
... T
>
663 auto scan_to(FILE* f
, string_view fmt
, T
&... args
) -> bool {
664 auto&& buf
= detail::file_scan_buffer(f
);
665 detail::vscan(buf
, fmt
, make_scan_args(args
...));
666 return buf
.begin() != buf
.end();