led_ctrl/include/toml11/impl/syntax_impl.hpp

894 lines
26 KiB
C++
Raw Normal View History

2025-06-05 09:03:12 +08:00
#ifndef TOML11_SYNTAX_IMPL_HPP
#define TOML11_SYNTAX_IMPL_HPP
#include "../fwd/syntax_fwd.hpp"
#include "../scanner.hpp"
#include "../spec.hpp"
#include "../version.hpp"
namespace toml
{
inline namespace TOML11_INLINE_VERSION_NAMESPACE
{
namespace detail
{
namespace syntax
{
using char_type = location::char_type;
template<typename F>
struct syntax_cache
{
using value_type = cxx::return_type_of_t<F, const spec&>;
static_assert(std::is_base_of<scanner_base, value_type>::value, "");
explicit syntax_cache(F f)
: func_(std::move(f)), cache_(cxx::make_nullopt())
{}
value_type const& at(const spec& s)
{
if( ! this->cache_.has_value() || this->cache_.value().first != s)
{
this->cache_ = std::make_pair(s, func_(s));
}
return this->cache_.value().second;
}
private:
F func_;
cxx::optional<std::pair<spec, value_type>> cache_;
};
template<typename F>
syntax_cache<cxx::remove_cvref_t<F>> make_cache(F&& f)
{
return syntax_cache<cxx::remove_cvref_t<F>>(std::forward<F>(f));
}
// ===========================================================================
// UTF-8
// avoid redundant representation and out-of-unicode sequence
TOML11_INLINE character_in_range const& utf8_1byte(const spec&)
{
static thread_local character_in_range cache(0x00, 0x7F);
return cache;
}
TOML11_INLINE sequence const& utf8_2bytes(const spec&)
{
static thread_local sequence cache(
character_in_range(0xC2, 0xDF),
character_in_range(0x80, 0xBF));
return cache;
}
TOML11_INLINE sequence const& utf8_3bytes(const spec&)
{
static thread_local sequence cache(/*1~2 bytes = */either(
sequence(character (0xE0), character_in_range(0xA0, 0xBF)),
sequence(character_in_range(0xE1, 0xEC), character_in_range(0x80, 0xBF)),
sequence(character (0xED), character_in_range(0x80, 0x9F)),
sequence(character_in_range(0xEE, 0xEF), character_in_range(0x80, 0xBF))
), /*3rd byte = */ character_in_range(0x80, 0xBF));
return cache;
}
TOML11_INLINE sequence const& utf8_4bytes(const spec&)
{
static thread_local sequence cache(/*1~2 bytes = */either(
sequence(character (0xF0), character_in_range(0x90, 0xBF)),
sequence(character_in_range(0xF1, 0xF3), character_in_range(0x80, 0xBF)),
sequence(character (0xF4), character_in_range(0x80, 0x8F))
), character_in_range(0x80, 0xBF), character_in_range(0x80, 0xBF));
return cache;
}
// ===========================================================================
// Whitespace
TOML11_INLINE character_either const& wschar(const spec&)
{
static thread_local character_either cache(" \t");
return cache;
}
TOML11_INLINE repeat_at_least const& ws(const spec& sp)
{
static thread_local auto cache = make_cache([](const spec& s){
return repeat_at_least(0, wschar(s));
});
return cache.at(sp);
}
// ===========================================================================
// Newline
TOML11_INLINE either const& newline(const spec&)
{
static thread_local either cache(character(char_type('\n')), literal("\r\n"));
return cache;
}
// ===========================================================================
// Comments
TOML11_INLINE either const& allowed_comment_char(const spec& sp)
{
static thread_local auto cache = make_cache([](const spec& s){
if(s.v1_1_0_allow_control_characters_in_comments)
{
return either(
character_in_range(0x01, 0x09),
character_in_range(0x0E, 0x7F),
non_ascii(s)
);
}
else
{
return either(
character(0x09),
character_in_range(0x20, 0x7E),
non_ascii(s)
);
}
});
return cache.at(sp);
}
// XXX Note that it does not take newline
TOML11_INLINE sequence const& comment(const spec& sp)
{
static thread_local auto cache = make_cache([](const spec& s){
return sequence(character(char_type('#')),
repeat_at_least(0, allowed_comment_char(s)));
});
return cache.at(sp);
}
// ===========================================================================
// Boolean
TOML11_INLINE either const& boolean(const spec&)
{
static thread_local either cache(literal("true"), literal("false"));
return cache;
}
// ===========================================================================
// Integer
// non-digit-graph = ([a-zA-Z]|unicode mb char)
// graph = ([a-zA-Z0-9]|unicode mb char)
// suffix = _ non-digit-graph (graph | _graph)
TOML11_INLINE sequence const& num_suffix(const spec& sp)
{
static thread_local auto cache = make_cache([](const spec& s) {
const auto non_digit_graph = [&s]() {
return either(
alpha(s),
non_ascii(s)
);
};
const auto graph = [&s]() {
return either(
alpha(s),
digit(s),
non_ascii(s)
);
};
return sequence(
character(char_type('_')),
non_digit_graph(),
repeat_at_least(0,
either(
sequence(character(char_type('_')), graph()),
graph()
)
)
);
});
return cache.at(sp);
}
TOML11_INLINE sequence const& dec_int(const spec& sp)
{
static thread_local auto cache = make_cache([](const spec& s) {
const auto digit19 = []() {
return character_in_range(char_type('1'), char_type('9'));
};
return sequence(
maybe(character_either("+-")),
either(
sequence(
digit19(),
repeat_at_least(1,
either(
digit(s),
sequence(character(char_type('_')), digit(s))
)
)
),
digit(s)
)
);
});
return cache.at(sp);
}
TOML11_INLINE sequence const& hex_int(const spec& sp)
{
static thread_local auto cache = make_cache([](const spec& s) {
return sequence(
literal("0x"),
hexdig(s),
repeat_at_least(0,
either(
hexdig(s),
sequence(character(char_type('_')), hexdig(s))
)
)
);
});
return cache.at(sp);
}
TOML11_INLINE sequence const& oct_int(const spec& s)
{
static thread_local auto cache = make_cache([](const spec&) {
const auto digit07 = []() {
return character_in_range(char_type('0'), char_type('7'));
};
return sequence(
literal("0o"),
digit07(),
repeat_at_least(0,
either(
digit07(),
sequence(character(char_type('_')), digit07())
)
)
);
});
return cache.at(s);
}
TOML11_INLINE sequence const& bin_int(const spec& s)
{
static thread_local auto cache = make_cache([](const spec&) {
const auto digit01 = []() {
return character_either("01");
};
return sequence(
literal("0b"),
digit01(),
repeat_at_least(0,
either(
digit01(),
sequence(character(char_type('_')), digit01())
)
)
);
});
return cache.at(s);
}
TOML11_INLINE either const& integer(const spec& sp)
{
static thread_local auto cache = make_cache([](const spec& s) {
return either(
hex_int(s),
oct_int(s),
bin_int(s),
dec_int(s)
);
});
return cache.at(sp);
}
// ===========================================================================
// Floating
TOML11_INLINE sequence const& zero_prefixable_int(const spec& sp)
{
static thread_local auto cache = make_cache([](const spec& s) {
return sequence(
digit(s),
repeat_at_least(0,
either(
digit(s),
sequence(character('_'), digit(s))
)
)
);
});
return cache.at(sp);
}
TOML11_INLINE sequence const& fractional_part(const spec& sp)
{
static thread_local auto cache = make_cache([](const spec& s) {
return sequence(
character('.'),
zero_prefixable_int(s)
);
});
return cache.at(sp);
}
TOML11_INLINE sequence const& exponent_part(const spec& sp)
{
static thread_local auto cache = make_cache([](const spec& s) {
return sequence(
character_either("eE"),
maybe(character_either("+-")),
zero_prefixable_int(s)
);
});
return cache.at(sp);
}
TOML11_INLINE sequence const& hex_floating(const spec& sp)
{
static thread_local auto cache = make_cache([](const spec& s) {
// C99 hexfloat (%a)
// [+-]? 0x ( [0-9a-fA-F]*\.[0-9a-fA-F]+ | [0-9a-fA-F]+\.? ) [pP] [+-]? [0-9]+
// - 0x(int).(frac)p[+-](int)
// - 0x(int).p[+-](int)
// - 0x.(frac)p[+-](int)
// - 0x(int)p[+-](int)
return sequence(
maybe(character_either("+-")),
character('0'),
character_either("xX"),
either(
sequence(
repeat_at_least(0, hexdig(s)),
character('.'),
repeat_at_least(1, hexdig(s))
),
sequence(
repeat_at_least(1, hexdig(s)),
maybe(character('.'))
)
),
character_either("pP"),
maybe(character_either("+-")),
repeat_at_least(1, character_in_range('0', '9'))
);
});
return cache.at(sp);
}
TOML11_INLINE either const& floating(const spec& sp)
{
static thread_local auto cache = make_cache([](const spec& s) {
return either(
sequence(
dec_int(s),
either(
exponent_part(s),
sequence(fractional_part(s), maybe(exponent_part(s)))
)
),
sequence(
maybe(character_either("+-")),
either(literal("inf"), literal("nan"))
)
);
});
return cache.at(sp);
}
// ===========================================================================
// Datetime
TOML11_INLINE sequence const& local_date(const spec& sp)
{
static thread_local auto cache = make_cache([](const spec& s) {
return sequence(
repeat_exact(4, digit(s)),
character('-'),
repeat_exact(2, digit(s)),
character('-'),
repeat_exact(2, digit(s))
);
});
return cache.at(sp);
}
TOML11_INLINE sequence const& local_time(const spec& sp)
{
static thread_local auto cache = make_cache([](const spec& s) {
if(s.v1_1_0_make_seconds_optional)
{
return sequence(
repeat_exact(2, digit(s)),
character(':'),
repeat_exact(2, digit(s)),
maybe(sequence(
character(':'),
repeat_exact(2, digit(s)),
maybe(sequence(character('.'), repeat_at_least(1, digit(s))))
)));
}
else
{
return sequence(
repeat_exact(2, digit(s)),
character(':'),
repeat_exact(2, digit(s)),
character(':'),
repeat_exact(2, digit(s)),
maybe(sequence(character('.'), repeat_at_least(1, digit(s))))
);
}
});
return cache.at(sp);
}
TOML11_INLINE either const& time_offset(const spec& sp)
{
static thread_local auto cache = make_cache([](const spec& s) {
return either(
character_either("zZ"),
sequence(character_either("+-"),
repeat_exact(2, digit(s)),
character(':'),
repeat_exact(2, digit(s))
)
);
});
return cache.at(sp);
}
TOML11_INLINE sequence const& full_time(const spec& sp)
{
static thread_local auto cache = make_cache([](const spec& s) {
return sequence(local_time(s), time_offset(s));
});
return cache.at(sp);
}
TOML11_INLINE character_either const& time_delim(const spec& sp)
{
static thread_local auto cache = make_cache([](const spec&) {
return character_either("Tt ");
});
return cache.at(sp);
}
TOML11_INLINE sequence const& local_datetime(const spec& sp)
{
static thread_local auto cache = make_cache([](const spec& s) {
return sequence(local_date(s), time_delim(s), local_time(s));
});
return cache.at(sp);
}
TOML11_INLINE sequence const& offset_datetime(const spec& sp)
{
static thread_local auto cache = make_cache([](const spec& s) {
return sequence(local_date(s), time_delim(s), full_time(s));
});
return cache.at(sp);
}
// ===========================================================================
// String
TOML11_INLINE sequence const& escaped_x2(const spec& sp)
{
static thread_local auto cache = make_cache([](const spec& s) {
return sequence(character('x'), repeat_exact(2, hexdig(s)));
});
return cache.at(sp);
}
TOML11_INLINE sequence const& escaped_u4(const spec& sp)
{
static thread_local auto cache = make_cache([](const spec& s) {
return sequence(character('u'), repeat_exact(4, hexdig(s)));
});
return cache.at(sp);
}
TOML11_INLINE sequence const& escaped_U8(const spec& sp)
{
static thread_local auto cache = make_cache([](const spec& s) {
return sequence(character('U'), repeat_exact(8, hexdig(s)));
});
return cache.at(sp);
}
TOML11_INLINE sequence const& escaped(const spec& sp)
{
static thread_local auto cache = make_cache([](const spec& s) {
const auto escape_char = [&s] {
if(s.v1_1_0_add_escape_sequence_e)
{
return character_either("\"\\bfnrte");
}
else
{
return character_either("\"\\bfnrt");
}
};
const auto escape_seq = [&s, &escape_char] {
if(s.v1_1_0_add_escape_sequence_x)
{
return either(
escape_char(),
escaped_u4(s),
escaped_U8(s),
escaped_x2(s)
);
}
else
{
return either(
escape_char(),
escaped_u4(s),
escaped_U8(s)
);
}
};
return sequence(character('\\'), escape_seq());
});
return cache.at(sp);
}
TOML11_INLINE either const& basic_char(const spec& sp)
{
static thread_local auto cache = make_cache([](const spec& s) {
const auto basic_unescaped = [&s]() {
return either(
wschar(s),
character(0x21), // 22 is "
character_in_range(0x23, 0x5B), // 5C is backslash
character_in_range(0x5D, 0x7E), // 7F is DEL
non_ascii(s)
);
};
return either(basic_unescaped(), escaped(s));
});
return cache.at(sp);
}
TOML11_INLINE sequence const& basic_string(const spec& sp)
{
static thread_local auto cache = make_cache([](const spec& s) {
return sequence(
character('"'),
repeat_at_least(0, basic_char(s)),
character('"')
);
});
return cache.at(sp);
}
// ---------------------------------------------------------------------------
// multiline string
TOML11_INLINE sequence const& escaped_newline(const spec& sp)
{
static thread_local auto cache = make_cache([](const spec& s) {
return sequence(
character('\\'), ws(s), newline(s),
repeat_at_least(0, either(wschar(s), newline(s)))
);
});
return cache.at(sp);
}
TOML11_INLINE sequence const& ml_basic_string(const spec& sp)
{
static thread_local auto cache = make_cache([](const spec& s) {
const auto mlb_content = [&s]() {
return either(basic_char(s), newline(s), escaped_newline(s));
};
const auto mlb_quotes = []() {
return either(literal("\"\""), character('\"'));
};
return sequence(
literal("\"\"\""),
maybe(newline(s)),
repeat_at_least(0, mlb_content()),
repeat_at_least(0,
sequence(
mlb_quotes(),
repeat_at_least(1, mlb_content())
)
),
// XXX """ and mlb_quotes are intentionally reordered to avoid
// unexpected match of mlb_quotes
literal("\"\"\""),
maybe(mlb_quotes())
);
});
return cache.at(sp);
}
// ---------------------------------------------------------------------------
// literal string
TOML11_INLINE either const& literal_char(const spec& sp)
{
static thread_local auto cache = make_cache([](const spec& s) {
return either(
character (0x09),
character_in_range(0x20, 0x26),
character_in_range(0x28, 0x7E),
non_ascii(s)
);
});
return cache.at(sp);
}
TOML11_INLINE sequence const& literal_string(const spec& sp)
{
static thread_local auto cache = make_cache([](const spec& s) {
return sequence(
character('\''),
repeat_at_least(0, literal_char(s)),
character('\'')
);
});
return cache.at(sp);
}
TOML11_INLINE sequence const& ml_literal_string(const spec& sp)
{
static thread_local auto cache = make_cache([](const spec& s) {
const auto mll_quotes = []() {
return either(literal("''"), character('\''));
};
const auto mll_content = [&s]() {
return either(literal_char(s), newline(s));
};
return sequence(
literal("'''"),
maybe(newline(s)),
repeat_at_least(0, mll_content()),
repeat_at_least(0, sequence(
mll_quotes(),
repeat_at_least(1, mll_content())
)
),
literal("'''"),
maybe(mll_quotes())
// XXX ''' and mll_quotes are intentionally reordered to avoid
// unexpected match of mll_quotes
);
});
return cache.at(sp);
}
TOML11_INLINE either const& string(const spec& sp)
{
static thread_local auto cache = make_cache([](const spec& s) {
return either(
ml_basic_string(s),
ml_literal_string(s),
basic_string(s),
literal_string(s)
);
});
return cache.at(sp);
}
// ===========================================================================
// Keys
// to keep `expected_chars` simple
TOML11_INLINE non_ascii_key_char::non_ascii_key_char(const spec& s) noexcept
{
assert(s.v1_1_0_allow_non_english_in_bare_keys);
(void)s; // for NDEBUG
}
TOML11_INLINE std::uint32_t non_ascii_key_char::read_utf8(location& loc) const
{
// U+0000 ... U+0079 ; 0xxx_xxxx
// U+0080 ... U+07FF ; 110y_yyyx 10xx_xxxx;
// U+0800 ... U+FFFF ; 1110_yyyy 10yx_xxxx 10xx_xxxx
// U+010000 ... U+10FFFF; 1111_0yyy 10yy_xxxx 10xx_xxxx 10xx_xxxx
const unsigned char b1 = loc.current(); loc.advance(1);
if(b1 < 0x80)
{
return static_cast<std::uint32_t>(b1);
}
else if((b1 >> 5) == 6) // 0b110 == 6
{
const auto b2 = loc.current(); loc.advance(1);
const std::uint32_t c1 = b1 & ((1 << 5) - 1);
const std::uint32_t c2 = b2 & ((1 << 6) - 1);
const std::uint32_t codep = (c1 << 6) + c2;
if(codep < 0x80)
{
return 0xFFFFFFFF;
}
return codep;
}
else if((b1 >> 4) == 14) // 0b1110 == 14
{
const auto b2 = loc.current(); loc.advance(1); if(loc.eof()) {return 0xFFFFFFFF;}
const auto b3 = loc.current(); loc.advance(1);
const std::uint32_t c1 = b1 & ((1 << 4) - 1);
const std::uint32_t c2 = b2 & ((1 << 6) - 1);
const std::uint32_t c3 = b3 & ((1 << 6) - 1);
const std::uint32_t codep = (c1 << 12) + (c2 << 6) + c3;
if(codep < 0x800)
{
return 0xFFFFFFFF;
}
return codep;
}
else if((b1 >> 3) == 30) // 0b11110 == 30
{
const auto b2 = loc.current(); loc.advance(1); if(loc.eof()) {return 0xFFFFFFFF;}
const auto b3 = loc.current(); loc.advance(1); if(loc.eof()) {return 0xFFFFFFFF;}
const auto b4 = loc.current(); loc.advance(1);
const std::uint32_t c1 = b1 & ((1 << 3) - 1);
const std::uint32_t c2 = b2 & ((1 << 6) - 1);
const std::uint32_t c3 = b3 & ((1 << 6) - 1);
const std::uint32_t c4 = b4 & ((1 << 6) - 1);
const std::uint32_t codep = (c1 << 18) + (c2 << 12) + (c3 << 6) + c4;
if(codep < 0x10000)
{
return 0xFFFFFFFF;
}
return codep;
}
else // not a Unicode codepoint in UTF-8
{
return 0xFFFFFFFF;
}
}
TOML11_INLINE region non_ascii_key_char::scan(location& loc) const
{
if(loc.eof()) {return region{};}
const auto first = loc;
const auto cp = read_utf8(loc);
if(cp == 0xFFFFFFFF)
{
return region{};
}
// ALPHA / DIGIT / %x2D / %x5F ; a-z A-Z 0-9 - _
// / %xB2 / %xB3 / %xB9 / %xBC-BE ; superscript digits, fractions
// / %xC0-D6 / %xD8-F6 / %xF8-37D ; non-symbol chars in Latin block
// / %x37F-1FFF ; exclude GREEK QUESTION MARK, which is basically a semi-colon
// / %x200C-200D / %x203F-2040 ; from General Punctuation Block, include the two tie symbols and ZWNJ, ZWJ
// / %x2070-218F / %x2460-24FF ; include super-/subscripts, letterlike/numberlike forms, enclosed alphanumerics
// / %x2C00-2FEF / %x3001-D7FF ; skip arrows, math, box drawing etc, skip 2FF0-3000 ideographic up/down markers and spaces
// / %xF900-FDCF / %xFDF0-FFFD ; skip D800-DFFF surrogate block, E000-F8FF Private Use area, FDD0-FDEF intended for process-internal use (unicode)
// / %x10000-EFFFF ; all chars outside BMP range, excluding Private Use planes (F0000-10FFFF)
if(cp == 0xB2 || cp == 0xB3 || cp == 0xB9 || (0xBC <= cp && cp <= 0xBE) ||
(0xC0 <= cp && cp <= 0xD6 ) || (0xD8 <= cp && cp <= 0xF6) || (0xF8 <= cp && cp <= 0x37D) ||
(0x37F <= cp && cp <= 0x1FFF) ||
(0x200C <= cp && cp <= 0x200D) || (0x203F <= cp && cp <= 0x2040) ||
(0x2070 <= cp && cp <= 0x218F) || (0x2460 <= cp && cp <= 0x24FF) ||
(0x2C00 <= cp && cp <= 0x2FEF) || (0x3001 <= cp && cp <= 0xD7FF) ||
(0xF900 <= cp && cp <= 0xFDCF) || (0xFDF0 <= cp && cp <= 0xFFFD) ||
(0x10000 <= cp && cp <= 0xEFFFF) )
{
return region(first, loc);
}
loc = first;
return region{};
}
TOML11_INLINE repeat_at_least const& unquoted_key(const spec& sp)
{
static thread_local auto cache = make_cache([](const spec& s) {
const auto keychar = [&s] {
if(s.v1_1_0_allow_non_english_in_bare_keys)
{
return either(alpha(s), digit(s), character{0x2D}, character{0x5F},
non_ascii_key_char(s));
}
else
{
return either(alpha(s), digit(s), character{0x2D}, character{0x5F});
}
};
return repeat_at_least(1, keychar());
});
return cache.at(sp);
}
TOML11_INLINE either const& quoted_key(const spec& sp)
{
static thread_local auto cache = make_cache([](const spec& s) {
return either(basic_string(s), literal_string(s));
});
return cache.at(sp);
}
TOML11_INLINE either const& simple_key(const spec& sp)
{
static thread_local auto cache = make_cache([](const spec& s) {
return either(unquoted_key(s), quoted_key(s));
});
return cache.at(sp);
}
TOML11_INLINE sequence const& dot_sep(const spec& sp)
{
static thread_local auto cache = make_cache([](const spec& s) {
return sequence(ws(s), character('.'), ws(s));
});
return cache.at(sp);
}
TOML11_INLINE sequence const& dotted_key(const spec& sp)
{
static thread_local auto cache = make_cache([](const spec& s) {
return sequence(
simple_key(s),
repeat_at_least(1, sequence(dot_sep(s), simple_key(s)))
);
});
return cache.at(sp);
}
TOML11_INLINE sequence const& keyval_sep(const spec& sp)
{
static thread_local auto cache = make_cache([](const spec& s) {
return sequence(ws(s), character('='), ws(s));
});
return cache.at(sp);
}
// ===========================================================================
// Table key
TOML11_INLINE sequence const& std_table(const spec& sp)
{
static thread_local auto cache = make_cache([](const spec& s) {
return sequence(character('['), ws(s), key(s), ws(s), character(']'));
});
return cache.at(sp);
}
TOML11_INLINE sequence const& array_table(const spec& sp)
{
static thread_local auto cache = make_cache([](const spec& s) {
return sequence(literal("[["), ws(s), key(s), ws(s), literal("]]"));
});
return cache.at(sp);
}
// ===========================================================================
// extension: null
TOML11_INLINE literal const& null_value(const spec&)
{
static thread_local literal cache("null");
return cache;
}
} // namespace syntax
} // namespace detail
} // TOML11_INLINE_VERSION_NAMESPACE
} // namespace toml
#endif // TOML11_SYNTAX_IMPL_HPP