diff options
Diffstat (limited to 'include/lib/modernjson/detail/input')
-rw-r--r-- | include/lib/modernjson/detail/input/binary_reader.hpp | 1984 | ||||
-rw-r--r-- | include/lib/modernjson/detail/input/input_adapters.hpp | 398 | ||||
-rw-r--r-- | include/lib/modernjson/detail/input/json_sax.hpp | 701 | ||||
-rw-r--r-- | include/lib/modernjson/detail/input/lexer.hpp | 1506 | ||||
-rw-r--r-- | include/lib/modernjson/detail/input/parser.hpp | 504 | ||||
-rw-r--r-- | include/lib/modernjson/detail/input/position_t.hpp | 27 |
6 files changed, 0 insertions, 5120 deletions
diff --git a/include/lib/modernjson/detail/input/binary_reader.hpp b/include/lib/modernjson/detail/input/binary_reader.hpp deleted file mode 100644 index 637569a..0000000 --- a/include/lib/modernjson/detail/input/binary_reader.hpp +++ /dev/null @@ -1,1984 +0,0 @@ -#pragma once - -#include <algorithm> // generate_n -#include <array> // array -#include <cassert> // assert -#include <cmath> // ldexp -#include <cstddef> // size_t -#include <cstdint> // uint8_t, uint16_t, uint32_t, uint64_t -#include <cstdio> // snprintf -#include <cstring> // memcpy -#include <iterator> // back_inserter -#include <limits> // numeric_limits -#include <string> // char_traits, string -#include <utility> // make_pair, move - -#include <lib/modernjson/detail/input/input_adapters.hpp> -#include <lib/modernjson/detail/input/json_sax.hpp> -#include <lib/modernjson/detail/exceptions.hpp> -#include <lib/modernjson/detail/macro_scope.hpp> -#include <lib/modernjson/detail/meta/is_sax.hpp> -#include <lib/modernjson/detail/value_t.hpp> - -namespace nlohmann -{ -namespace detail -{ -/////////////////// -// binary reader // -/////////////////// - -/*! -@brief deserialization of CBOR, MessagePack, and UBJSON values -*/ -template<typename BasicJsonType, typename SAX = json_sax_dom_parser<BasicJsonType>> -class binary_reader -{ - using number_integer_t = typename BasicJsonType::number_integer_t; - using number_unsigned_t = typename BasicJsonType::number_unsigned_t; - using number_float_t = typename BasicJsonType::number_float_t; - using string_t = typename BasicJsonType::string_t; - using json_sax_t = SAX; - - public: - /*! - @brief create a binary reader - - @param[in] adapter input adapter to read from - */ - explicit binary_reader(input_adapter_t adapter) : ia(std::move(adapter)) - { - (void)detail::is_sax_static_asserts<SAX, BasicJsonType> {}; - assert(ia); - } - - /*! - @param[in] format the binary format to parse - @param[in] sax_ a SAX event processor - @param[in] strict whether to expect the input to be consumed completed - - @return - */ - bool sax_parse(const input_format_t format, - json_sax_t* sax_, - const bool strict = true) - { - sax = sax_; - bool result = false; - - switch (format) - { - case input_format_t::bson: - result = parse_bson_internal(); - break; - - case input_format_t::cbor: - result = parse_cbor_internal(); - break; - - case input_format_t::msgpack: - result = parse_msgpack_internal(); - break; - - case input_format_t::ubjson: - result = parse_ubjson_internal(); - break; - - // LCOV_EXCL_START - default: - assert(false); - // LCOV_EXCL_STOP - } - - // strict mode: next byte must be EOF - if (result and strict) - { - if (format == input_format_t::ubjson) - { - get_ignore_noop(); - } - else - { - get(); - } - - if (JSON_UNLIKELY(current != std::char_traits<char>::eof())) - { - return sax->parse_error(chars_read, get_token_string(), - parse_error::create(110, chars_read, exception_message(format, "expected end of input; last byte: 0x" + get_token_string(), "value"))); - } - } - - return result; - } - - /*! - @brief determine system byte order - - @return true if and only if system's byte order is little endian - - @note from http://stackoverflow.com/a/1001328/266378 - */ - static constexpr bool little_endianess(int num = 1) noexcept - { - return (*reinterpret_cast<char*>(&num) == 1); - } - - private: - ////////// - // BSON // - ////////// - - /*! - @brief Reads in a BSON-object and passes it to the SAX-parser. - @return whether a valid BSON-value was passed to the SAX parser - */ - bool parse_bson_internal() - { - std::int32_t document_size; - get_number<std::int32_t, true>(input_format_t::bson, document_size); - - if (JSON_UNLIKELY(not sax->start_object(std::size_t(-1)))) - { - return false; - } - - if (JSON_UNLIKELY(not parse_bson_element_list(/*is_array*/false))) - { - return false; - } - - return sax->end_object(); - } - - /*! - @brief Parses a C-style string from the BSON input. - @param[in, out] result A reference to the string variable where the read - string is to be stored. - @return `true` if the \x00-byte indicating the end of the string was - encountered before the EOF; false` indicates an unexpected EOF. - */ - bool get_bson_cstr(string_t& result) - { - auto out = std::back_inserter(result); - while (true) - { - get(); - if (JSON_UNLIKELY(not unexpect_eof(input_format_t::bson, "cstring"))) - { - return false; - } - if (current == 0x00) - { - return true; - } - *out++ = static_cast<char>(current); - } - - return true; - } - - /*! - @brief Parses a zero-terminated string of length @a len from the BSON - input. - @param[in] len The length (including the zero-byte at the end) of the - string to be read. - @param[in, out] result A reference to the string variable where the read - string is to be stored. - @tparam NumberType The type of the length @a len - @pre len >= 1 - @return `true` if the string was successfully parsed - */ - template<typename NumberType> - bool get_bson_string(const NumberType len, string_t& result) - { - if (JSON_UNLIKELY(len < 1)) - { - auto last_token = get_token_string(); - return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::bson, "string length must be at least 1, is " + std::to_string(len), "string"))); - } - - return get_string(input_format_t::bson, len - static_cast<NumberType>(1), result) and get() != std::char_traits<char>::eof(); - } - - /*! - @brief Read a BSON document element of the given @a element_type. - @param[in] element_type The BSON element type, c.f. http://bsonspec.org/spec.html - @param[in] element_type_parse_position The position in the input stream, - where the `element_type` was read. - @warning Not all BSON element types are supported yet. An unsupported - @a element_type will give rise to a parse_error.114: - Unsupported BSON record type 0x... - @return whether a valid BSON-object/array was passed to the SAX parser - */ - bool parse_bson_element_internal(const int element_type, - const std::size_t element_type_parse_position) - { - switch (element_type) - { - case 0x01: // double - { - double number; - return get_number<double, true>(input_format_t::bson, number) and sax->number_float(static_cast<number_float_t>(number), ""); - } - - case 0x02: // string - { - std::int32_t len; - string_t value; - return get_number<std::int32_t, true>(input_format_t::bson, len) and get_bson_string(len, value) and sax->string(value); - } - - case 0x03: // object - { - return parse_bson_internal(); - } - - case 0x04: // array - { - return parse_bson_array(); - } - - case 0x08: // boolean - { - return sax->boolean(get() != 0); - } - - case 0x0A: // null - { - return sax->null(); - } - - case 0x10: // int32 - { - std::int32_t value; - return get_number<std::int32_t, true>(input_format_t::bson, value) and sax->number_integer(value); - } - - case 0x12: // int64 - { - std::int64_t value; - return get_number<std::int64_t, true>(input_format_t::bson, value) and sax->number_integer(value); - } - - default: // anything else not supported (yet) - { - char cr[3]; - (std::snprintf)(cr, sizeof(cr), "%.2hhX", static_cast<unsigned char>(element_type)); - return sax->parse_error(element_type_parse_position, std::string(cr), parse_error::create(114, element_type_parse_position, "Unsupported BSON record type 0x" + std::string(cr))); - } - } - } - - /*! - @brief Read a BSON element list (as specified in the BSON-spec) - - The same binary layout is used for objects and arrays, hence it must be - indicated with the argument @a is_array which one is expected - (true --> array, false --> object). - - @param[in] is_array Determines if the element list being read is to be - treated as an object (@a is_array == false), or as an - array (@a is_array == true). - @return whether a valid BSON-object/array was passed to the SAX parser - */ - bool parse_bson_element_list(const bool is_array) - { - string_t key; - while (int element_type = get()) - { - if (JSON_UNLIKELY(not unexpect_eof(input_format_t::bson, "element list"))) - { - return false; - } - - const std::size_t element_type_parse_position = chars_read; - if (JSON_UNLIKELY(not get_bson_cstr(key))) - { - return false; - } - - if (not is_array) - { - if (not sax->key(key)) - { - return false; - } - } - - if (JSON_UNLIKELY(not parse_bson_element_internal(element_type, element_type_parse_position))) - { - return false; - } - - // get_bson_cstr only appends - key.clear(); - } - - return true; - } - - /*! - @brief Reads an array from the BSON input and passes it to the SAX-parser. - @return whether a valid BSON-array was passed to the SAX parser - */ - bool parse_bson_array() - { - std::int32_t document_size; - get_number<std::int32_t, true>(input_format_t::bson, document_size); - - if (JSON_UNLIKELY(not sax->start_array(std::size_t(-1)))) - { - return false; - } - - if (JSON_UNLIKELY(not parse_bson_element_list(/*is_array*/true))) - { - return false; - } - - return sax->end_array(); - } - - ////////// - // CBOR // - ////////// - - /*! - @param[in] get_char whether a new character should be retrieved from the - input (true, default) or whether the last read - character should be considered instead - - @return whether a valid CBOR value was passed to the SAX parser - */ - bool parse_cbor_internal(const bool get_char = true) - { - switch (get_char ? get() : current) - { - // EOF - case std::char_traits<char>::eof(): - return unexpect_eof(input_format_t::cbor, "value"); - - // Integer 0x00..0x17 (0..23) - case 0x00: - case 0x01: - case 0x02: - case 0x03: - case 0x04: - case 0x05: - case 0x06: - case 0x07: - case 0x08: - case 0x09: - case 0x0A: - case 0x0B: - case 0x0C: - case 0x0D: - case 0x0E: - case 0x0F: - case 0x10: - case 0x11: - case 0x12: - case 0x13: - case 0x14: - case 0x15: - case 0x16: - case 0x17: - return sax->number_unsigned(static_cast<number_unsigned_t>(current)); - - case 0x18: // Unsigned integer (one-byte uint8_t follows) - { - uint8_t number; - return get_number(input_format_t::cbor, number) and sax->number_unsigned(number); - } - - case 0x19: // Unsigned integer (two-byte uint16_t follows) - { - uint16_t number; - return get_number(input_format_t::cbor, number) and sax->number_unsigned(number); - } - - case 0x1A: // Unsigned integer (four-byte uint32_t follows) - { - uint32_t number; - return get_number(input_format_t::cbor, number) and sax->number_unsigned(number); - } - - case 0x1B: // Unsigned integer (eight-byte uint64_t follows) - { - uint64_t number; - return get_number(input_format_t::cbor, number) and sax->number_unsigned(number); - } - - // Negative integer -1-0x00..-1-0x17 (-1..-24) - case 0x20: - case 0x21: - case 0x22: - case 0x23: - case 0x24: - case 0x25: - case 0x26: - case 0x27: - case 0x28: - case 0x29: - case 0x2A: - case 0x2B: - case 0x2C: - case 0x2D: - case 0x2E: - case 0x2F: - case 0x30: - case 0x31: - case 0x32: - case 0x33: - case 0x34: - case 0x35: - case 0x36: - case 0x37: - return sax->number_integer(static_cast<int8_t>(0x20 - 1 - current)); - - case 0x38: // Negative integer (one-byte uint8_t follows) - { - uint8_t number; - return get_number(input_format_t::cbor, number) and sax->number_integer(static_cast<number_integer_t>(-1) - number); - } - - case 0x39: // Negative integer -1-n (two-byte uint16_t follows) - { - uint16_t number; - return get_number(input_format_t::cbor, number) and sax->number_integer(static_cast<number_integer_t>(-1) - number); - } - - case 0x3A: // Negative integer -1-n (four-byte uint32_t follows) - { - uint32_t number; - return get_number(input_format_t::cbor, number) and sax->number_integer(static_cast<number_integer_t>(-1) - number); - } - - case 0x3B: // Negative integer -1-n (eight-byte uint64_t follows) - { - uint64_t number; - return get_number(input_format_t::cbor, number) and sax->number_integer(static_cast<number_integer_t>(-1) - - static_cast<number_integer_t>(number)); - } - - // UTF-8 string (0x00..0x17 bytes follow) - case 0x60: - case 0x61: - case 0x62: - case 0x63: - case 0x64: - case 0x65: - case 0x66: - case 0x67: - case 0x68: - case 0x69: - case 0x6A: - case 0x6B: - case 0x6C: - case 0x6D: - case 0x6E: - case 0x6F: - case 0x70: - case 0x71: - case 0x72: - case 0x73: - case 0x74: - case 0x75: - case 0x76: - case 0x77: - case 0x78: // UTF-8 string (one-byte uint8_t for n follows) - case 0x79: // UTF-8 string (two-byte uint16_t for n follow) - case 0x7A: // UTF-8 string (four-byte uint32_t for n follow) - case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow) - case 0x7F: // UTF-8 string (indefinite length) - { - string_t s; - return get_cbor_string(s) and sax->string(s); - } - - // array (0x00..0x17 data items follow) - case 0x80: - case 0x81: - case 0x82: - case 0x83: - case 0x84: - case 0x85: - case 0x86: - case 0x87: - case 0x88: - case 0x89: - case 0x8A: - case 0x8B: - case 0x8C: - case 0x8D: - case 0x8E: - case 0x8F: - case 0x90: - case 0x91: - case 0x92: - case 0x93: - case 0x94: - case 0x95: - case 0x96: - case 0x97: - return get_cbor_array(static_cast<std::size_t>(current & 0x1F)); - - case 0x98: // array (one-byte uint8_t for n follows) - { - uint8_t len; - return get_number(input_format_t::cbor, len) and get_cbor_array(static_cast<std::size_t>(len)); - } - - case 0x99: // array (two-byte uint16_t for n follow) - { - uint16_t len; - return get_number(input_format_t::cbor, len) and get_cbor_array(static_cast<std::size_t>(len)); - } - - case 0x9A: // array (four-byte uint32_t for n follow) - { - uint32_t len; - return get_number(input_format_t::cbor, len) and get_cbor_array(static_cast<std::size_t>(len)); - } - - case 0x9B: // array (eight-byte uint64_t for n follow) - { - uint64_t len; - return get_number(input_format_t::cbor, len) and get_cbor_array(static_cast<std::size_t>(len)); - } - - case 0x9F: // array (indefinite length) - return get_cbor_array(std::size_t(-1)); - - // map (0x00..0x17 pairs of data items follow) - case 0xA0: - case 0xA1: - case 0xA2: - case 0xA3: - case 0xA4: - case 0xA5: - case 0xA6: - case 0xA7: - case 0xA8: - case 0xA9: - case 0xAA: - case 0xAB: - case 0xAC: - case 0xAD: - case 0xAE: - case 0xAF: - case 0xB0: - case 0xB1: - case 0xB2: - case 0xB3: - case 0xB4: - case 0xB5: - case 0xB6: - case 0xB7: - return get_cbor_object(static_cast<std::size_t>(current & 0x1F)); - - case 0xB8: // map (one-byte uint8_t for n follows) - { - uint8_t len; - return get_number(input_format_t::cbor, len) and get_cbor_object(static_cast<std::size_t>(len)); - } - - case 0xB9: // map (two-byte uint16_t for n follow) - { - uint16_t len; - return get_number(input_format_t::cbor, len) and get_cbor_object(static_cast<std::size_t>(len)); - } - - case 0xBA: // map (four-byte uint32_t for n follow) - { - uint32_t len; - return get_number(input_format_t::cbor, len) and get_cbor_object(static_cast<std::size_t>(len)); - } - - case 0xBB: // map (eight-byte uint64_t for n follow) - { - uint64_t len; - return get_number(input_format_t::cbor, len) and get_cbor_object(static_cast<std::size_t>(len)); - } - - case 0xBF: // map (indefinite length) - return get_cbor_object(std::size_t(-1)); - - case 0xF4: // false - return sax->boolean(false); - - case 0xF5: // true - return sax->boolean(true); - - case 0xF6: // null - return sax->null(); - - case 0xF9: // Half-Precision Float (two-byte IEEE 754) - { - const int byte1_raw = get(); - if (JSON_UNLIKELY(not unexpect_eof(input_format_t::cbor, "number"))) - { - return false; - } - const int byte2_raw = get(); - if (JSON_UNLIKELY(not unexpect_eof(input_format_t::cbor, "number"))) - { - return false; - } - - const auto byte1 = static_cast<unsigned char>(byte1_raw); - const auto byte2 = static_cast<unsigned char>(byte2_raw); - - // code from RFC 7049, Appendix D, Figure 3: - // As half-precision floating-point numbers were only added - // to IEEE 754 in 2008, today's programming platforms often - // still only have limited support for them. It is very - // easy to include at least decoding support for them even - // without such support. An example of a small decoder for - // half-precision floating-point numbers in the C language - // is shown in Fig. 3. - const int half = (byte1 << 8) + byte2; - const double val = [&half] - { - const int exp = (half >> 10) & 0x1F; - const int mant = half & 0x3FF; - assert(0 <= exp and exp <= 32); - assert(0 <= mant and mant <= 1024); - switch (exp) - { - case 0: - return std::ldexp(mant, -24); - case 31: - return (mant == 0) - ? std::numeric_limits<double>::infinity() - : std::numeric_limits<double>::quiet_NaN(); - default: - return std::ldexp(mant + 1024, exp - 25); - } - }(); - return sax->number_float((half & 0x8000) != 0 - ? static_cast<number_float_t>(-val) - : static_cast<number_float_t>(val), ""); - } - - case 0xFA: // Single-Precision Float (four-byte IEEE 754) - { - float number; - return get_number(input_format_t::cbor, number) and sax->number_float(static_cast<number_float_t>(number), ""); - } - - case 0xFB: // Double-Precision Float (eight-byte IEEE 754) - { - double number; - return get_number(input_format_t::cbor, number) and sax->number_float(static_cast<number_float_t>(number), ""); - } - - default: // anything else (0xFF is handled inside the other types) - { - auto last_token = get_token_string(); - return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::cbor, "invalid byte: 0x" + last_token, "value"))); - } - } - } - - /*! - @brief reads a CBOR string - - This function first reads starting bytes to determine the expected - string length and then copies this number of bytes into a string. - Additionally, CBOR's strings with indefinite lengths are supported. - - @param[out] result created string - - @return whether string creation completed - */ - bool get_cbor_string(string_t& result) - { - if (JSON_UNLIKELY(not unexpect_eof(input_format_t::cbor, "string"))) - { - return false; - } - - switch (current) - { - // UTF-8 string (0x00..0x17 bytes follow) - case 0x60: - case 0x61: - case 0x62: - case 0x63: - case 0x64: - case 0x65: - case 0x66: - case 0x67: - case 0x68: - case 0x69: - case 0x6A: - case 0x6B: - case 0x6C: - case 0x6D: - case 0x6E: - case 0x6F: - case 0x70: - case 0x71: - case 0x72: - case 0x73: - case 0x74: - case 0x75: - case 0x76: - case 0x77: - { - return get_string(input_format_t::cbor, current & 0x1F, result); - } - - case 0x78: // UTF-8 string (one-byte uint8_t for n follows) - { - uint8_t len; - return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); - } - - case 0x79: // UTF-8 string (two-byte uint16_t for n follow) - { - uint16_t len; - return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); - } - - case 0x7A: // UTF-8 string (four-byte uint32_t for n follow) - { - uint32_t len; - return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); - } - - case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow) - { - uint64_t len; - return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); - } - - case 0x7F: // UTF-8 string (indefinite length) - { - while (get() != 0xFF) - { - string_t chunk; - if (not get_cbor_string(chunk)) - { - return false; - } - result.append(chunk); - } - return true; - } - - default: - { - auto last_token = get_token_string(); - return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::cbor, "expected length specification (0x60-0x7B) or indefinite string type (0x7F); last byte: 0x" + last_token, "string"))); - } - } - } - - /*! - @param[in] len the length of the array or std::size_t(-1) for an - array of indefinite size - @return whether array creation completed - */ - bool get_cbor_array(const std::size_t len) - { - if (JSON_UNLIKELY(not sax->start_array(len))) - { - return false; - } - - if (len != std::size_t(-1)) - { - for (std::size_t i = 0; i < len; ++i) - { - if (JSON_UNLIKELY(not parse_cbor_internal())) - { - return false; - } - } - } - else - { - while (get() != 0xFF) - { - if (JSON_UNLIKELY(not parse_cbor_internal(false))) - { - return false; - } - } - } - - return sax->end_array(); - } - - /*! - @param[in] len the length of the object or std::size_t(-1) for an - object of indefinite size - @return whether object creation completed - */ - bool get_cbor_object(const std::size_t len) - { - if (not JSON_UNLIKELY(sax->start_object(len))) - { - return false; - } - - string_t key; - if (len != std::size_t(-1)) - { - for (std::size_t i = 0; i < len; ++i) - { - get(); - if (JSON_UNLIKELY(not get_cbor_string(key) or not sax->key(key))) - { - return false; - } - - if (JSON_UNLIKELY(not parse_cbor_internal())) - { - return false; - } - key.clear(); - } - } - else - { - while (get() != 0xFF) - { - if (JSON_UNLIKELY(not get_cbor_string(key) or not sax->key(key))) - { - return false; - } - - if (JSON_UNLIKELY(not parse_cbor_internal())) - { - return false; - } - key.clear(); - } - } - - return sax->end_object(); - } - - ///////////// - // MsgPack // - ///////////// - - /*! - @return whether a valid MessagePack value was passed to the SAX parser - */ - bool parse_msgpack_internal() - { - switch (get()) - { - // EOF - case std::char_traits<char>::eof(): - return unexpect_eof(input_format_t::msgpack, "value"); - - // positive fixint - case 0x00: - case 0x01: - case 0x02: - case 0x03: - case 0x04: - case 0x05: - case 0x06: - case 0x07: - case 0x08: - case 0x09: - case 0x0A: - case 0x0B: - case 0x0C: - case 0x0D: - case 0x0E: - case 0x0F: - case 0x10: - case 0x11: - case 0x12: - case 0x13: - case 0x14: - case 0x15: - case 0x16: - case 0x17: - case 0x18: - case 0x19: - case 0x1A: - case 0x1B: - case 0x1C: - case 0x1D: - case 0x1E: - case 0x1F: - case 0x20: - case 0x21: - case 0x22: - case 0x23: - case 0x24: - case 0x25: - case 0x26: - case 0x27: - case 0x28: - case 0x29: - case 0x2A: - case 0x2B: - case 0x2C: - case 0x2D: - case 0x2E: - case 0x2F: - case 0x30: - case 0x31: - case 0x32: - case 0x33: - case 0x34: - case 0x35: - case 0x36: - case 0x37: - case 0x38: - case 0x39: - case 0x3A: - case 0x3B: - case 0x3C: - case 0x3D: - case 0x3E: - case 0x3F: - case 0x40: - case 0x41: - case 0x42: - case 0x43: - case 0x44: - case 0x45: - case 0x46: - case 0x47: - case 0x48: - case 0x49: - case 0x4A: - case 0x4B: - case 0x4C: - case 0x4D: - case 0x4E: - case 0x4F: - case 0x50: - case 0x51: - case 0x52: - case 0x53: - case 0x54: - case 0x55: - case 0x56: - case 0x57: - case 0x58: - case 0x59: - case 0x5A: - case 0x5B: - case 0x5C: - case 0x5D: - case 0x5E: - case 0x5F: - case 0x60: - case 0x61: - case 0x62: - case 0x63: - case 0x64: - case 0x65: - case 0x66: - case 0x67: - case 0x68: - case 0x69: - case 0x6A: - case 0x6B: - case 0x6C: - case 0x6D: - case 0x6E: - case 0x6F: - case 0x70: - case 0x71: - case 0x72: - case 0x73: - case 0x74: - case 0x75: - case 0x76: - case 0x77: - case 0x78: - case 0x79: - case 0x7A: - case 0x7B: - case 0x7C: - case 0x7D: - case 0x7E: - case 0x7F: - return sax->number_unsigned(static_cast<number_unsigned_t>(current)); - - // fixmap - case 0x80: - case 0x81: - case 0x82: - case 0x83: - case 0x84: - case 0x85: - case 0x86: - case 0x87: - case 0x88: - case 0x89: - case 0x8A: - case 0x8B: - case 0x8C: - case 0x8D: - case 0x8E: - case 0x8F: - return get_msgpack_object(static_cast<std::size_t>(current & 0x0F)); - - // fixarray - case 0x90: - case 0x91: - case 0x92: - case 0x93: - case 0x94: - case 0x95: - case 0x96: - case 0x97: - case 0x98: - case 0x99: - case 0x9A: - case 0x9B: - case 0x9C: - case 0x9D: - case 0x9E: - case 0x9F: - return get_msgpack_array(static_cast<std::size_t>(current & 0x0F)); - - // fixstr - case 0xA0: - case 0xA1: - case 0xA2: - case 0xA3: - case 0xA4: - case 0xA5: - case 0xA6: - case 0xA7: - case 0xA8: - case 0xA9: - case 0xAA: - case 0xAB: - case 0xAC: - case 0xAD: - case 0xAE: - case 0xAF: - case 0xB0: - case 0xB1: - case 0xB2: - case 0xB3: - case 0xB4: - case 0xB5: - case 0xB6: - case 0xB7: - case 0xB8: - case 0xB9: - case 0xBA: - case 0xBB: - case 0xBC: - case 0xBD: - case 0xBE: - case 0xBF: - { - string_t s; - return get_msgpack_string(s) and sax->string(s); - } - - case 0xC0: // nil - return sax->null(); - - case 0xC2: // false - return sax->boolean(false); - - case 0xC3: // true - return sax->boolean(true); - - case 0xCA: // float 32 - { - float number; - return get_number(input_format_t::msgpack, number) and sax->number_float(static_cast<number_float_t>(number), ""); - } - - case 0xCB: // float 64 - { - double number; - return get_number(input_format_t::msgpack, number) and sax->number_float(static_cast<number_float_t>(number), ""); - } - - case 0xCC: // uint 8 - { - uint8_t number; - return get_number(input_format_t::msgpack, number) and sax->number_unsigned(number); - } - - case 0xCD: // uint 16 - { - uint16_t number; - return get_number(input_format_t::msgpack, number) and sax->number_unsigned(number); - } - - case 0xCE: // uint 32 - { - uint32_t number; - return get_number(input_format_t::msgpack, number) and sax->number_unsigned(number); - } - - case 0xCF: // uint 64 - { - uint64_t number; - return get_number(input_format_t::msgpack, number) and sax->number_unsigned(number); - } - - case 0xD0: // int 8 - { - int8_t number; - return get_number(input_format_t::msgpack, number) and sax->number_integer(number); - } - - case 0xD1: // int 16 - { - int16_t number; - return get_number(input_format_t::msgpack, number) and sax->number_integer(number); - } - - case 0xD2: // int 32 - { - int32_t number; - return get_number(input_format_t::msgpack, number) and sax->number_integer(number); - } - - case 0xD3: // int 64 - { - int64_t number; - return get_number(input_format_t::msgpack, number) and sax->number_integer(number); - } - - case 0xD9: // str 8 - case 0xDA: // str 16 - case 0xDB: // str 32 - { - string_t s; - return get_msgpack_string(s) and sax->string(s); - } - - case 0xDC: // array 16 - { - uint16_t len; - return get_number(input_format_t::msgpack, len) and get_msgpack_array(static_cast<std::size_t>(len)); - } - - case 0xDD: // array 32 - { - uint32_t len; - return get_number(input_format_t::msgpack, len) and get_msgpack_array(static_cast<std::size_t>(len)); - } - - case 0xDE: // map 16 - { - uint16_t len; - return get_number(input_format_t::msgpack, len) and get_msgpack_object(static_cast<std::size_t>(len)); - } - - case 0xDF: // map 32 - { - uint32_t len; - return get_number(input_format_t::msgpack, len) and get_msgpack_object(static_cast<std::size_t>(len)); - } - - // negative fixint - case 0xE0: - case 0xE1: - case 0xE2: - case 0xE3: - case 0xE4: - case 0xE5: - case 0xE6: - case 0xE7: - case 0xE8: - case 0xE9: - case 0xEA: - case 0xEB: - case 0xEC: - case 0xED: - case 0xEE: - case 0xEF: - case 0xF0: - case 0xF1: - case 0xF2: - case 0xF3: - case 0xF4: - case 0xF5: - case 0xF6: - case 0xF7: - case 0xF8: - case 0xF9: - case 0xFA: - case 0xFB: - case 0xFC: - case 0xFD: - case 0xFE: - case 0xFF: - return sax->number_integer(static_cast<int8_t>(current)); - - default: // anything else - { - auto last_token = get_token_string(); - return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::msgpack, "invalid byte: 0x" + last_token, "value"))); - } - } - } - - /*! - @brief reads a MessagePack string - - This function first reads starting bytes to determine the expected - string length and then copies this number of bytes into a string. - - @param[out] result created string - - @return whether string creation completed - */ - bool get_msgpack_string(string_t& result) - { - if (JSON_UNLIKELY(not unexpect_eof(input_format_t::msgpack, "string"))) - { - return false; - } - - switch (current) - { - // fixstr - case 0xA0: - case 0xA1: - case 0xA2: - case 0xA3: - case 0xA4: - case 0xA5: - case 0xA6: - case 0xA7: - case 0xA8: - case 0xA9: - case 0xAA: - case 0xAB: - case 0xAC: - case 0xAD: - case 0xAE: - case 0xAF: - case 0xB0: - case 0xB1: - case 0xB2: - case 0xB3: - case 0xB4: - case 0xB5: - case 0xB6: - case 0xB7: - case 0xB8: - case 0xB9: - case 0xBA: - case 0xBB: - case 0xBC: - case 0xBD: - case 0xBE: - case 0xBF: - { - return get_string(input_format_t::msgpack, current & 0x1F, result); - } - - case 0xD9: // str 8 - { - uint8_t len; - return get_number(input_format_t::msgpack, len) and get_string(input_format_t::msgpack, len, result); - } - - case 0xDA: // str 16 - { - uint16_t len; - return get_number(input_format_t::msgpack, len) and get_string(input_format_t::msgpack, len, result); - } - - case 0xDB: // str 32 - { - uint32_t len; - return get_number(input_format_t::msgpack, len) and get_string(input_format_t::msgpack, len, result); - } - - default: - { - auto last_token = get_token_string(); - return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::msgpack, "expected length specification (0xA0-0xBF, 0xD9-0xDB); last byte: 0x" + last_token, "string"))); - } - } - } - - /*! - @param[in] len the length of the array - @return whether array creation completed - */ - bool get_msgpack_array(const std::size_t len) - { - if (JSON_UNLIKELY(not sax->start_array(len))) - { - return false; - } - - for (std::size_t i = 0; i < len; ++i) - { - if (JSON_UNLIKELY(not parse_msgpack_internal())) - { - return false; - } - } - - return sax->end_array(); - } - - /*! - @param[in] len the length of the object - @return whether object creation completed - */ - bool get_msgpack_object(const std::size_t len) - { - if (JSON_UNLIKELY(not sax->start_object(len))) - { - return false; - } - - string_t key; - for (std::size_t i = 0; i < len; ++i) - { - get(); - if (JSON_UNLIKELY(not get_msgpack_string(key) or not sax->key(key))) - { - return false; - } - - if (JSON_UNLIKELY(not parse_msgpack_internal())) - { - return false; - } - key.clear(); - } - - return sax->end_object(); - } - - //////////// - // UBJSON // - //////////// - - /*! - @param[in] get_char whether a new character should be retrieved from the - input (true, default) or whether the last read - character should be considered instead - - @return whether a valid UBJSON value was passed to the SAX parser - */ - bool parse_ubjson_internal(const bool get_char = true) - { - return get_ubjson_value(get_char ? get_ignore_noop() : current); - } - - /*! - @brief reads a UBJSON string - - This function is either called after reading the 'S' byte explicitly - indicating a string, or in case of an object key where the 'S' byte can be - left out. - - @param[out] result created string - @param[in] get_char whether a new character should be retrieved from the - input (true, default) or whether the last read - character should be considered instead - - @return whether string creation completed - */ - bool get_ubjson_string(string_t& result, const bool get_char = true) - { - if (get_char) - { - get(); // TODO: may we ignore N here? - } - - if (JSON_UNLIKELY(not unexpect_eof(input_format_t::ubjson, "value"))) - { - return false; - } - - switch (current) - { - case 'U': - { - uint8_t len; - return get_number(input_format_t::ubjson, len) and get_string(input_format_t::ubjson, len, result); - } - - case 'i': - { - int8_t len; - return get_number(input_format_t::ubjson, len) and get_string(input_format_t::ubjson, len, result); - } - - case 'I': - { - int16_t len; - return get_number(input_format_t::ubjson, len) and get_string(input_format_t::ubjson, len, result); - } - - case 'l': - { - int32_t len; - return get_number(input_format_t::ubjson, len) and get_string(input_format_t::ubjson, len, result); - } - - case 'L': - { - int64_t len; - return get_number(input_format_t::ubjson, len) and get_string(input_format_t::ubjson, len, result); - } - - default: - auto last_token = get_token_string(); - return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::ubjson, "expected length type specification (U, i, I, l, L); last byte: 0x" + last_token, "string"))); - } - } - - /*! - @param[out] result determined size - @return whether size determination completed - */ - bool get_ubjson_size_value(std::size_t& result) - { - switch (get_ignore_noop()) - { - case 'U': - { - uint8_t number; - if (JSON_UNLIKELY(not get_number(input_format_t::ubjson, number))) - { - return false; - } - result = static_cast<std::size_t>(number); - return true; - } - - case 'i': - { - int8_t number; - if (JSON_UNLIKELY(not get_number(input_format_t::ubjson, number))) - { - return false; - } - result = static_cast<std::size_t>(number); - return true; - } - - case 'I': - { - int16_t number; - if (JSON_UNLIKELY(not get_number(input_format_t::ubjson, number))) - { - return false; - } - result = static_cast<std::size_t>(number); - return true; - } - - case 'l': - { - int32_t number; - if (JSON_UNLIKELY(not get_number(input_format_t::ubjson, number))) - { - return false; - } - result = static_cast<std::size_t>(number); - return true; - } - - case 'L': - { - int64_t number; - if (JSON_UNLIKELY(not get_number(input_format_t::ubjson, number))) - { - return false; - } - result = static_cast<std::size_t>(number); - return true; - } - - default: - { - auto last_token = get_token_string(); - return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::ubjson, "expected length type specification (U, i, I, l, L) after '#'; last byte: 0x" + last_token, "size"))); - } - } - } - - /*! - @brief determine the type and size for a container - - In the optimized UBJSON format, a type and a size can be provided to allow - for a more compact representation. - - @param[out] result pair of the size and the type - - @return whether pair creation completed - */ - bool get_ubjson_size_type(std::pair<std::size_t, int>& result) - { - result.first = string_t::npos; // size - result.second = 0; // type - - get_ignore_noop(); - - if (current == '$') - { - result.second = get(); // must not ignore 'N', because 'N' maybe the type - if (JSON_UNLIKELY(not unexpect_eof(input_format_t::ubjson, "type"))) - { - return false; - } - - get_ignore_noop(); - if (JSON_UNLIKELY(current != '#')) - { - if (JSON_UNLIKELY(not unexpect_eof(input_format_t::ubjson, "value"))) - { - return false; - } - auto last_token = get_token_string(); - return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::ubjson, "expected '#' after type information; last byte: 0x" + last_token, "size"))); - } - - return get_ubjson_size_value(result.first); - } - else if (current == '#') - { - return get_ubjson_size_value(result.first); - } - return true; - } - - /*! - @param prefix the previously read or set type prefix - @return whether value creation completed - */ - bool get_ubjson_value(const int prefix) - { - switch (prefix) - { - case std::char_traits<char>::eof(): // EOF - return unexpect_eof(input_format_t::ubjson, "value"); - - case 'T': // true - return sax->boolean(true); - case 'F': // false - return sax->boolean(false); - - case 'Z': // null - return sax->null(); - - case 'U': - { - uint8_t number; - return get_number(input_format_t::ubjson, number) and sax->number_unsigned(number); - } - - case 'i': - { - int8_t number; - return get_number(input_format_t::ubjson, number) and sax->number_integer(number); - } - - case 'I': - { - int16_t number; - return get_number(input_format_t::ubjson, number) and sax->number_integer(number); - } - - case 'l': - { - int32_t number; - return get_number(input_format_t::ubjson, number) and sax->number_integer(number); - } - - case 'L': - { - int64_t number; - return get_number(input_format_t::ubjson, number) and sax->number_integer(number); - } - - case 'd': - { - float number; - return get_number(input_format_t::ubjson, number) and sax->number_float(static_cast<number_float_t>(number), ""); - } - - case 'D': - { - double number; - return get_number(input_format_t::ubjson, number) and sax->number_float(static_cast<number_float_t>(number), ""); - } - - case 'C': // char - { - get(); - if (JSON_UNLIKELY(not unexpect_eof(input_format_t::ubjson, "char"))) - { - return false; - } - if (JSON_UNLIKELY(current > 127)) - { - auto last_token = get_token_string(); - return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::ubjson, "byte after 'C' must be in range 0x00..0x7F; last byte: 0x" + last_token, "char"))); - } - string_t s(1, static_cast<char>(current)); - return sax->string(s); - } - - case 'S': // string - { - string_t s; - return get_ubjson_string(s) and sax->string(s); - } - - case '[': // array - return get_ubjson_array(); - - case '{': // object - return get_ubjson_object(); - - default: // anything else - { - auto last_token = get_token_string(); - return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::ubjson, "invalid byte: 0x" + last_token, "value"))); - } - } - } - - /*! - @return whether array creation completed - */ - bool get_ubjson_array() - { - std::pair<std::size_t, int> size_and_type; - if (JSON_UNLIKELY(not get_ubjson_size_type(size_and_type))) - { - return false; - } - - if (size_and_type.first != string_t::npos) - { - if (JSON_UNLIKELY(not sax->start_array(size_and_type.first))) - { - return false; - } - - if (size_and_type.second != 0) - { - if (size_and_type.second != 'N') - { - for (std::size_t i = 0; i < size_and_type.first; ++i) - { - if (JSON_UNLIKELY(not get_ubjson_value(size_and_type.second))) - { - return false; - } - } - } - } - else - { - for (std::size_t i = 0; i < size_and_type.first; ++i) - { - if (JSON_UNLIKELY(not parse_ubjson_internal())) - { - return false; - } - } - } - } - else - { - if (JSON_UNLIKELY(not sax->start_array(std::size_t(-1)))) - { - return false; - } - - while (current != ']') - { - if (JSON_UNLIKELY(not parse_ubjson_internal(false))) - { - return false; - } - get_ignore_noop(); - } - } - - return sax->end_array(); - } - - /*! - @return whether object creation completed - */ - bool get_ubjson_object() - { - std::pair<std::size_t, int> size_and_type; - if (JSON_UNLIKELY(not get_ubjson_size_type(size_and_type))) - { - return false; - } - - string_t key; - if (size_and_type.first != string_t::npos) - { - if (JSON_UNLIKELY(not sax->start_object(size_and_type.first))) - { - return false; - } - - if (size_and_type.second != 0) - { - for (std::size_t i = 0; i < size_and_type.first; ++i) - { - if (JSON_UNLIKELY(not get_ubjson_string(key) or not sax->key(key))) - { - return false; - } - if (JSON_UNLIKELY(not get_ubjson_value(size_and_type.second))) - { - return false; - } - key.clear(); - } - } - else - { - for (std::size_t i = 0; i < size_and_type.first; ++i) - { - if (JSON_UNLIKELY(not get_ubjson_string(key) or not sax->key(key))) - { - return false; - } - if (JSON_UNLIKELY(not parse_ubjson_internal())) - { - return false; - } - key.clear(); - } - } - } - else - { - if (JSON_UNLIKELY(not sax->start_object(std::size_t(-1)))) - { - return false; - } - - while (current != '}') - { - if (JSON_UNLIKELY(not get_ubjson_string(key, false) or not sax->key(key))) - { - return false; - } - if (JSON_UNLIKELY(not parse_ubjson_internal())) - { - return false; - } - get_ignore_noop(); - key.clear(); - } - } - - return sax->end_object(); - } - - /////////////////////// - // Utility functions // - /////////////////////// - - /*! - @brief get next character from the input - - This function provides the interface to the used input adapter. It does - not throw in case the input reached EOF, but returns a -'ve valued - `std::char_traits<char>::eof()` in that case. - - @return character read from the input - */ - int get() - { - ++chars_read; - return (current = ia->get_character()); - } - - /*! - @return character read from the input after ignoring all 'N' entries - */ - int get_ignore_noop() - { - do - { - get(); - } - while (current == 'N'); - - return current; - } - - /* - @brief read a number from the input - - @tparam NumberType the type of the number - @param[in] format the current format (for diagnostics) - @param[out] result number of type @a NumberType - - @return whether conversion completed - - @note This function needs to respect the system's endianess, because - bytes in CBOR, MessagePack, and UBJSON are stored in network order - (big endian) and therefore need reordering on little endian systems. - */ - template<typename NumberType, bool InputIsLittleEndian = false> - bool get_number(const input_format_t format, NumberType& result) - { - // step 1: read input into array with system's byte order - std::array<uint8_t, sizeof(NumberType)> vec; - for (std::size_t i = 0; i < sizeof(NumberType); ++i) - { - get(); - if (JSON_UNLIKELY(not unexpect_eof(format, "number"))) - { - return false; - } - - // reverse byte order prior to conversion if necessary - if (is_little_endian && !InputIsLittleEndian) - { - vec[sizeof(NumberType) - i - 1] = static_cast<uint8_t>(current); - } - else - { - vec[i] = static_cast<uint8_t>(current); // LCOV_EXCL_LINE - } - } - - // step 2: convert array into number of type T and return - std::memcpy(&result, vec.data(), sizeof(NumberType)); - return true; - } - - /*! - @brief create a string by reading characters from the input - - @tparam NumberType the type of the number - @param[in] format the current format (for diagnostics) - @param[in] len number of characters to read - @param[out] result string created by reading @a len bytes - - @return whether string creation completed - - @note We can not reserve @a len bytes for the result, because @a len - may be too large. Usually, @ref unexpect_eof() detects the end of - the input before we run out of string memory. - */ - template<typename NumberType> - bool get_string(const input_format_t format, - const NumberType len, - string_t& result) - { - bool success = true; - std::generate_n(std::back_inserter(result), len, [this, &success, &format]() - { - get(); - if (JSON_UNLIKELY(not unexpect_eof(format, "string"))) - { - success = false; - } - return static_cast<char>(current); - }); - return success; - } - - /*! - @param[in] format the current format (for diagnostics) - @param[in] context further context information (for diagnostics) - @return whether the last read character is not EOF - */ - bool unexpect_eof(const input_format_t format, const char* context) const - { - if (JSON_UNLIKELY(current == std::char_traits<char>::eof())) - { - return sax->parse_error(chars_read, "<end of file>", - parse_error::create(110, chars_read, exception_message(format, "unexpected end of input", context))); - } - return true; - } - - /*! - @return a string representation of the last read byte - */ - std::string get_token_string() const - { - char cr[3]; - (std::snprintf)(cr, 3, "%.2hhX", static_cast<unsigned char>(current)); - return std::string{cr}; - } - - /*! - @param[in] format the current format - @param[in] detail a detailed error message - @param[in] context further contect information - @return a message string to use in the parse_error exceptions - */ - std::string exception_message(const input_format_t format, - const std::string& detail, - const std::string& context) const - { - std::string error_msg = "syntax error while parsing "; - - switch (format) - { - case input_format_t::cbor: - error_msg += "CBOR"; - break; - - case input_format_t::msgpack: - error_msg += "MessagePack"; - break; - - case input_format_t::ubjson: - error_msg += "UBJSON"; - break; - - case input_format_t::bson: - error_msg += "BSON"; - break; - - // LCOV_EXCL_START - default: - assert(false); - // LCOV_EXCL_STOP - } - - return error_msg + " " + context + ": " + detail; - } - - private: - /// input adapter - input_adapter_t ia = nullptr; - - /// the current character - int current = std::char_traits<char>::eof(); - - /// the number of characters read - std::size_t chars_read = 0; - - /// whether we can assume little endianess - const bool is_little_endian = little_endianess(); - - /// the SAX parser - json_sax_t* sax = nullptr; -}; -} // namespace detail -} // namespace nlohmann diff --git a/include/lib/modernjson/detail/input/input_adapters.hpp b/include/lib/modernjson/detail/input/input_adapters.hpp deleted file mode 100644 index dfb8caf..0000000 --- a/include/lib/modernjson/detail/input/input_adapters.hpp +++ /dev/null @@ -1,398 +0,0 @@ -#pragma once - -#include <cassert> // assert -#include <cstddef> // size_t -#include <cstring> // strlen -#include <istream> // istream -#include <iterator> // begin, end, iterator_traits, random_access_iterator_tag, distance, next -#include <memory> // shared_ptr, make_shared, addressof -#include <numeric> // accumulate -#include <string> // string, char_traits -#include <type_traits> // enable_if, is_base_of, is_pointer, is_integral, remove_pointer -#include <utility> // pair, declval - -#include <lib/modernjson/detail/macro_scope.hpp> - -namespace nlohmann -{ -namespace detail -{ -/// the supported input formats -enum class input_format_t { json, cbor, msgpack, ubjson, bson }; - -//////////////////// -// input adapters // -//////////////////// - -/*! -@brief abstract input adapter interface - -Produces a stream of std::char_traits<char>::int_type characters from a -std::istream, a buffer, or some other input type. Accepts the return of -exactly one non-EOF character for future input. The int_type characters -returned consist of all valid char values as positive values (typically -unsigned char), plus an EOF value outside that range, specified by the value -of the function std::char_traits<char>::eof(). This value is typically -1, but -could be any arbitrary value which is not a valid char value. -*/ -struct input_adapter_protocol -{ - /// get a character [0,255] or std::char_traits<char>::eof(). - virtual std::char_traits<char>::int_type get_character() = 0; - virtual ~input_adapter_protocol() = default; -}; - -/// a type to simplify interfaces -using input_adapter_t = std::shared_ptr<input_adapter_protocol>; - -/*! -Input adapter for a (caching) istream. Ignores a UFT Byte Order Mark at -beginning of input. Does not support changing the underlying std::streambuf -in mid-input. Maintains underlying std::istream and std::streambuf to support -subsequent use of standard std::istream operations to process any input -characters following those used in parsing the JSON input. Clears the -std::istream flags; any input errors (e.g., EOF) will be detected by the first -subsequent call for input from the std::istream. -*/ -class input_stream_adapter : public input_adapter_protocol -{ - public: - ~input_stream_adapter() override - { - // clear stream flags; we use underlying streambuf I/O, do not - // maintain ifstream flags, except eof - is.clear(is.rdstate() & std::ios::eofbit); - } - - explicit input_stream_adapter(std::istream& i) - : is(i), sb(*i.rdbuf()) - {} - - // delete because of pointer members - input_stream_adapter(const input_stream_adapter&) = delete; - input_stream_adapter& operator=(input_stream_adapter&) = delete; - input_stream_adapter(input_stream_adapter&&) = delete; - input_stream_adapter& operator=(input_stream_adapter&&) = delete; - - // std::istream/std::streambuf use std::char_traits<char>::to_int_type, to - // ensure that std::char_traits<char>::eof() and the character 0xFF do not - // end up as the same value, eg. 0xFFFFFFFF. - std::char_traits<char>::int_type get_character() override - { - auto res = sb.sbumpc(); - // set eof manually, as we don't use the istream interface. - if (res == EOF) - { - is.clear(is.rdstate() | std::ios::eofbit); - } - return res; - } - - private: - /// the associated input stream - std::istream& is; - std::streambuf& sb; -}; - -/// input adapter for buffer input -class input_buffer_adapter : public input_adapter_protocol -{ - public: - input_buffer_adapter(const char* b, const std::size_t l) noexcept - : cursor(b), limit(b + l) - {} - - // delete because of pointer members - input_buffer_adapter(const input_buffer_adapter&) = delete; - input_buffer_adapter& operator=(input_buffer_adapter&) = delete; - input_buffer_adapter(input_buffer_adapter&&) = delete; - input_buffer_adapter& operator=(input_buffer_adapter&&) = delete; - ~input_buffer_adapter() override = default; - - std::char_traits<char>::int_type get_character() noexcept override - { - if (JSON_LIKELY(cursor < limit)) - { - return std::char_traits<char>::to_int_type(*(cursor++)); - } - - return std::char_traits<char>::eof(); - } - - private: - /// pointer to the current character - const char* cursor; - /// pointer past the last character - const char* const limit; -}; - -template<typename WideStringType, size_t T> -struct wide_string_input_helper -{ - // UTF-32 - static void fill_buffer(const WideStringType& str, size_t& current_wchar, std::array<std::char_traits<char>::int_type, 4>& utf8_bytes, size_t& utf8_bytes_index, size_t& utf8_bytes_filled) - { - utf8_bytes_index = 0; - - if (current_wchar == str.size()) - { - utf8_bytes[0] = std::char_traits<char>::eof(); - utf8_bytes_filled = 1; - } - else - { - // get the current character - const auto wc = static_cast<int>(str[current_wchar++]); - - // UTF-32 to UTF-8 encoding - if (wc < 0x80) - { - utf8_bytes[0] = wc; - utf8_bytes_filled = 1; - } - else if (wc <= 0x7FF) - { - utf8_bytes[0] = 0xC0 | ((wc >> 6) & 0x1F); - utf8_bytes[1] = 0x80 | (wc & 0x3F); - utf8_bytes_filled = 2; - } - else if (wc <= 0xFFFF) - { - utf8_bytes[0] = 0xE0 | ((wc >> 12) & 0x0F); - utf8_bytes[1] = 0x80 | ((wc >> 6) & 0x3F); - utf8_bytes[2] = 0x80 | (wc & 0x3F); - utf8_bytes_filled = 3; - } - else if (wc <= 0x10FFFF) - { - utf8_bytes[0] = 0xF0 | ((wc >> 18) & 0x07); - utf8_bytes[1] = 0x80 | ((wc >> 12) & 0x3F); - utf8_bytes[2] = 0x80 | ((wc >> 6) & 0x3F); - utf8_bytes[3] = 0x80 | (wc & 0x3F); - utf8_bytes_filled = 4; - } - else - { - // unknown character - utf8_bytes[0] = wc; - utf8_bytes_filled = 1; - } - } - } -}; - -template<typename WideStringType> -struct wide_string_input_helper<WideStringType, 2> -{ - // UTF-16 - static void fill_buffer(const WideStringType& str, size_t& current_wchar, std::array<std::char_traits<char>::int_type, 4>& utf8_bytes, size_t& utf8_bytes_index, size_t& utf8_bytes_filled) - { - utf8_bytes_index = 0; - - if (current_wchar == str.size()) - { - utf8_bytes[0] = std::char_traits<char>::eof(); - utf8_bytes_filled = 1; - } - else - { - // get the current character - const auto wc = static_cast<int>(str[current_wchar++]); - - // UTF-16 to UTF-8 encoding - if (wc < 0x80) - { - utf8_bytes[0] = wc; - utf8_bytes_filled = 1; - } - else if (wc <= 0x7FF) - { - utf8_bytes[0] = 0xC0 | ((wc >> 6)); - utf8_bytes[1] = 0x80 | (wc & 0x3F); - utf8_bytes_filled = 2; - } - else if (0xD800 > wc or wc >= 0xE000) - { - utf8_bytes[0] = 0xE0 | ((wc >> 12)); - utf8_bytes[1] = 0x80 | ((wc >> 6) & 0x3F); - utf8_bytes[2] = 0x80 | (wc & 0x3F); - utf8_bytes_filled = 3; - } - else - { - if (current_wchar < str.size()) - { - const auto wc2 = static_cast<int>(str[current_wchar++]); - const int charcode = 0x10000 + (((wc & 0x3FF) << 10) | (wc2 & 0x3FF)); - utf8_bytes[0] = 0xf0 | (charcode >> 18); - utf8_bytes[1] = 0x80 | ((charcode >> 12) & 0x3F); - utf8_bytes[2] = 0x80 | ((charcode >> 6) & 0x3F); - utf8_bytes[3] = 0x80 | (charcode & 0x3F); - utf8_bytes_filled = 4; - } - else - { - // unknown character - ++current_wchar; - utf8_bytes[0] = wc; - utf8_bytes_filled = 1; - } - } - } - } -}; - -template<typename WideStringType> -class wide_string_input_adapter : public input_adapter_protocol -{ - public: - explicit wide_string_input_adapter(const WideStringType& w) noexcept - : str(w) - {} - - std::char_traits<char>::int_type get_character() noexcept override - { - // check if buffer needs to be filled - if (utf8_bytes_index == utf8_bytes_filled) - { - fill_buffer<sizeof(typename WideStringType::value_type)>(); - - assert(utf8_bytes_filled > 0); - assert(utf8_bytes_index == 0); - } - - // use buffer - assert(utf8_bytes_filled > 0); - assert(utf8_bytes_index < utf8_bytes_filled); - return utf8_bytes[utf8_bytes_index++]; - } - - private: - template<size_t T> - void fill_buffer() - { - wide_string_input_helper<WideStringType, T>::fill_buffer(str, current_wchar, utf8_bytes, utf8_bytes_index, utf8_bytes_filled); - } - - /// the wstring to process - const WideStringType& str; - - /// index of the current wchar in str - std::size_t current_wchar = 0; - - /// a buffer for UTF-8 bytes - std::array<std::char_traits<char>::int_type, 4> utf8_bytes = {{0, 0, 0, 0}}; - - /// index to the utf8_codes array for the next valid byte - std::size_t utf8_bytes_index = 0; - /// number of valid bytes in the utf8_codes array - std::size_t utf8_bytes_filled = 0; -}; - -class input_adapter -{ - public: - // native support - - /// input adapter for input stream - input_adapter(std::istream& i) - : ia(std::make_shared<input_stream_adapter>(i)) {} - - /// input adapter for input stream - input_adapter(std::istream&& i) - : ia(std::make_shared<input_stream_adapter>(i)) {} - - input_adapter(const std::wstring& ws) - : ia(std::make_shared<wide_string_input_adapter<std::wstring>>(ws)) {} - - input_adapter(const std::u16string& ws) - : ia(std::make_shared<wide_string_input_adapter<std::u16string>>(ws)) {} - - input_adapter(const std::u32string& ws) - : ia(std::make_shared<wide_string_input_adapter<std::u32string>>(ws)) {} - - /// input adapter for buffer - template<typename CharT, - typename std::enable_if< - std::is_pointer<CharT>::value and - std::is_integral<typename std::remove_pointer<CharT>::type>::value and - sizeof(typename std::remove_pointer<CharT>::type) == 1, - int>::type = 0> - input_adapter(CharT b, std::size_t l) - : ia(std::make_shared<input_buffer_adapter>(reinterpret_cast<const char*>(b), l)) {} - - // derived support - - /// input adapter for string literal - template<typename CharT, - typename std::enable_if< - std::is_pointer<CharT>::value and - std::is_integral<typename std::remove_pointer<CharT>::type>::value and - sizeof(typename std::remove_pointer<CharT>::type) == 1, - int>::type = 0> - input_adapter(CharT b) - : input_adapter(reinterpret_cast<const char*>(b), - std::strlen(reinterpret_cast<const char*>(b))) {} - - /// input adapter for iterator range with contiguous storage - template<class IteratorType, - typename std::enable_if< - std::is_same<typename std::iterator_traits<IteratorType>::iterator_category, std::random_access_iterator_tag>::value, - int>::type = 0> - input_adapter(IteratorType first, IteratorType last) - { -#ifndef NDEBUG - // assertion to check that the iterator range is indeed contiguous, - // see http://stackoverflow.com/a/35008842/266378 for more discussion - const auto is_contiguous = std::accumulate( - first, last, std::pair<bool, int>(true, 0), - [&first](std::pair<bool, int> res, decltype(*first) val) - { - res.first &= (val == *(std::next(std::addressof(*first), res.second++))); - return res; - }).first; - assert(is_contiguous); -#endif - - // assertion to check that each element is 1 byte long - static_assert( - sizeof(typename std::iterator_traits<IteratorType>::value_type) == 1, - "each element in the iterator range must have the size of 1 byte"); - - const auto len = static_cast<size_t>(std::distance(first, last)); - if (JSON_LIKELY(len > 0)) - { - // there is at least one element: use the address of first - ia = std::make_shared<input_buffer_adapter>(reinterpret_cast<const char*>(&(*first)), len); - } - else - { - // the address of first cannot be used: use nullptr - ia = std::make_shared<input_buffer_adapter>(nullptr, len); - } - } - - /// input adapter for array - template<class T, std::size_t N> - input_adapter(T (&array)[N]) - : input_adapter(std::begin(array), std::end(array)) {} - - /// input adapter for contiguous container - template<class ContiguousContainer, typename - std::enable_if<not std::is_pointer<ContiguousContainer>::value and - std::is_base_of<std::random_access_iterator_tag, typename std::iterator_traits<decltype(std::begin(std::declval<ContiguousContainer const>()))>::iterator_category>::value, - int>::type = 0> - input_adapter(const ContiguousContainer& c) - : input_adapter(std::begin(c), std::end(c)) {} - - operator input_adapter_t() - { - return ia; - } - - private: - /// the actual adapter - input_adapter_t ia = nullptr; -}; -} // namespace detail -} // namespace nlohmann diff --git a/include/lib/modernjson/detail/input/json_sax.hpp b/include/lib/modernjson/detail/input/json_sax.hpp deleted file mode 100644 index e1d48a2..0000000 --- a/include/lib/modernjson/detail/input/json_sax.hpp +++ /dev/null @@ -1,701 +0,0 @@ -#pragma once - -#include <cstddef> -#include <string> -#include <vector> - -#include <lib/modernjson/detail/input/parser.hpp> -#include <lib/modernjson/detail/exceptions.hpp> - -namespace nlohmann -{ - -/*! -@brief SAX interface - -This class describes the SAX interface used by @ref nlohmann::json::sax_parse. -Each function is called in different situations while the input is parsed. The -boolean return value informs the parser whether to continue processing the -input. -*/ -template<typename BasicJsonType> -struct json_sax -{ - /// type for (signed) integers - using number_integer_t = typename BasicJsonType::number_integer_t; - /// type for unsigned integers - using number_unsigned_t = typename BasicJsonType::number_unsigned_t; - /// type for floating-point numbers - using number_float_t = typename BasicJsonType::number_float_t; - /// type for strings - using string_t = typename BasicJsonType::string_t; - - /*! - @brief a null value was read - @return whether parsing should proceed - */ - virtual bool null() = 0; - - /*! - @brief a boolean value was read - @param[in] val boolean value - @return whether parsing should proceed - */ - virtual bool boolean(bool val) = 0; - - /*! - @brief an integer number was read - @param[in] val integer value - @return whether parsing should proceed - */ - virtual bool number_integer(number_integer_t val) = 0; - - /*! - @brief an unsigned integer number was read - @param[in] val unsigned integer value - @return whether parsing should proceed - */ - virtual bool number_unsigned(number_unsigned_t val) = 0; - - /*! - @brief an floating-point number was read - @param[in] val floating-point value - @param[in] s raw token value - @return whether parsing should proceed - */ - virtual bool number_float(number_float_t val, const string_t& s) = 0; - - /*! - @brief a string was read - @param[in] val string value - @return whether parsing should proceed - @note It is safe to move the passed string. - */ - virtual bool string(string_t& val) = 0; - - /*! - @brief the beginning of an object was read - @param[in] elements number of object elements or -1 if unknown - @return whether parsing should proceed - @note binary formats may report the number of elements - */ - virtual bool start_object(std::size_t elements) = 0; - - /*! - @brief an object key was read - @param[in] val object key - @return whether parsing should proceed - @note It is safe to move the passed string. - */ - virtual bool key(string_t& val) = 0; - - /*! - @brief the end of an object was read - @return whether parsing should proceed - */ - virtual bool end_object() = 0; - - /*! - @brief the beginning of an array was read - @param[in] elements number of array elements or -1 if unknown - @return whether parsing should proceed - @note binary formats may report the number of elements - */ - virtual bool start_array(std::size_t elements) = 0; - - /*! - @brief the end of an array was read - @return whether parsing should proceed - */ - virtual bool end_array() = 0; - - /*! - @brief a parse error occurred - @param[in] position the position in the input where the error occurs - @param[in] last_token the last read token - @param[in] ex an exception object describing the error - @return whether parsing should proceed (must return false) - */ - virtual bool parse_error(std::size_t position, - const std::string& last_token, - const detail::exception& ex) = 0; - - virtual ~json_sax() = default; -}; - - -namespace detail -{ -/*! -@brief SAX implementation to create a JSON value from SAX events - -This class implements the @ref json_sax interface and processes the SAX events -to create a JSON value which makes it basically a DOM parser. The structure or -hierarchy of the JSON value is managed by the stack `ref_stack` which contains -a pointer to the respective array or object for each recursion depth. - -After successful parsing, the value that is passed by reference to the -constructor contains the parsed value. - -@tparam BasicJsonType the JSON type -*/ -template<typename BasicJsonType> -class json_sax_dom_parser -{ - public: - using number_integer_t = typename BasicJsonType::number_integer_t; - using number_unsigned_t = typename BasicJsonType::number_unsigned_t; - using number_float_t = typename BasicJsonType::number_float_t; - using string_t = typename BasicJsonType::string_t; - - /*! - @param[in, out] r reference to a JSON value that is manipulated while - parsing - @param[in] allow_exceptions_ whether parse errors yield exceptions - */ - explicit json_sax_dom_parser(BasicJsonType& r, const bool allow_exceptions_ = true) - : root(r), allow_exceptions(allow_exceptions_) - {} - - bool null() - { - handle_value(nullptr); - return true; - } - - bool boolean(bool val) - { - handle_value(val); - return true; - } - - bool number_integer(number_integer_t val) - { - handle_value(val); - return true; - } - - bool number_unsigned(number_unsigned_t val) - { - handle_value(val); - return true; - } - - bool number_float(number_float_t val, const string_t& /*unused*/) - { - handle_value(val); - return true; - } - - bool string(string_t& val) - { - handle_value(val); - return true; - } - - bool start_object(std::size_t len) - { - ref_stack.push_back(handle_value(BasicJsonType::value_t::object)); - - if (JSON_UNLIKELY(len != std::size_t(-1) and len > ref_stack.back()->max_size())) - { - JSON_THROW(out_of_range::create(408, - "excessive object size: " + std::to_string(len))); - } - - return true; - } - - bool key(string_t& val) - { - // add null at given key and store the reference for later - object_element = &(ref_stack.back()->m_value.object->operator[](val)); - return true; - } - - bool end_object() - { - ref_stack.pop_back(); - return true; - } - - bool start_array(std::size_t len) - { - ref_stack.push_back(handle_value(BasicJsonType::value_t::array)); - - if (JSON_UNLIKELY(len != std::size_t(-1) and len > ref_stack.back()->max_size())) - { - JSON_THROW(out_of_range::create(408, - "excessive array size: " + std::to_string(len))); - } - - return true; - } - - bool end_array() - { - ref_stack.pop_back(); - return true; - } - - bool parse_error(std::size_t /*unused*/, const std::string& /*unused*/, - const detail::exception& ex) - { - errored = true; - if (allow_exceptions) - { - // determine the proper exception type from the id - switch ((ex.id / 100) % 100) - { - case 1: - JSON_THROW(*reinterpret_cast<const detail::parse_error*>(&ex)); - case 4: - JSON_THROW(*reinterpret_cast<const detail::out_of_range*>(&ex)); - // LCOV_EXCL_START - case 2: - JSON_THROW(*reinterpret_cast<const detail::invalid_iterator*>(&ex)); - case 3: - JSON_THROW(*reinterpret_cast<const detail::type_error*>(&ex)); - case 5: - JSON_THROW(*reinterpret_cast<const detail::other_error*>(&ex)); - default: - assert(false); - // LCOV_EXCL_STOP - } - } - return false; - } - - constexpr bool is_errored() const - { - return errored; - } - - private: - /*! - @invariant If the ref stack is empty, then the passed value will be the new - root. - @invariant If the ref stack contains a value, then it is an array or an - object to which we can add elements - */ - template<typename Value> - BasicJsonType* handle_value(Value&& v) - { - if (ref_stack.empty()) - { - root = BasicJsonType(std::forward<Value>(v)); - return &root; - } - - assert(ref_stack.back()->is_array() or ref_stack.back()->is_object()); - - if (ref_stack.back()->is_array()) - { - ref_stack.back()->m_value.array->emplace_back(std::forward<Value>(v)); - return &(ref_stack.back()->m_value.array->back()); - } - else - { - assert(object_element); - *object_element = BasicJsonType(std::forward<Value>(v)); - return object_element; - } - } - - /// the parsed JSON value - BasicJsonType& root; - /// stack to model hierarchy of values - std::vector<BasicJsonType*> ref_stack; - /// helper to hold the reference for the next object element - BasicJsonType* object_element = nullptr; - /// whether a syntax error occurred - bool errored = false; - /// whether to throw exceptions in case of errors - const bool allow_exceptions = true; -}; - -template<typename BasicJsonType> -class json_sax_dom_callback_parser -{ - public: - using number_integer_t = typename BasicJsonType::number_integer_t; - using number_unsigned_t = typename BasicJsonType::number_unsigned_t; - using number_float_t = typename BasicJsonType::number_float_t; - using string_t = typename BasicJsonType::string_t; - using parser_callback_t = typename BasicJsonType::parser_callback_t; - using parse_event_t = typename BasicJsonType::parse_event_t; - - json_sax_dom_callback_parser(BasicJsonType& r, - const parser_callback_t cb, - const bool allow_exceptions_ = true) - : root(r), callback(cb), allow_exceptions(allow_exceptions_) - { - keep_stack.push_back(true); - } - - bool null() - { - handle_value(nullptr); - return true; - } - - bool boolean(bool val) - { - handle_value(val); - return true; - } - - bool number_integer(number_integer_t val) - { - handle_value(val); - return true; - } - - bool number_unsigned(number_unsigned_t val) - { - handle_value(val); - return true; - } - - bool number_float(number_float_t val, const string_t& /*unused*/) - { - handle_value(val); - return true; - } - - bool string(string_t& val) - { - handle_value(val); - return true; - } - - bool start_object(std::size_t len) - { - // check callback for object start - const bool keep = callback(static_cast<int>(ref_stack.size()), parse_event_t::object_start, discarded); - keep_stack.push_back(keep); - - auto val = handle_value(BasicJsonType::value_t::object, true); - ref_stack.push_back(val.second); - - // check object limit - if (ref_stack.back()) - { - if (JSON_UNLIKELY(len != std::size_t(-1) and len > ref_stack.back()->max_size())) - { - JSON_THROW(out_of_range::create(408, - "excessive object size: " + std::to_string(len))); - } - } - - return true; - } - - bool key(string_t& val) - { - BasicJsonType k = BasicJsonType(val); - - // check callback for key - const bool keep = callback(static_cast<int>(ref_stack.size()), parse_event_t::key, k); - key_keep_stack.push_back(keep); - - // add discarded value at given key and store the reference for later - if (keep and ref_stack.back()) - { - object_element = &(ref_stack.back()->m_value.object->operator[](val) = discarded); - } - - return true; - } - - bool end_object() - { - if (ref_stack.back()) - { - if (not callback(static_cast<int>(ref_stack.size()) - 1, parse_event_t::object_end, *ref_stack.back())) - { - // discard object - *ref_stack.back() = discarded; - } - } - - assert(not ref_stack.empty()); - assert(not keep_stack.empty()); - ref_stack.pop_back(); - keep_stack.pop_back(); - - if (not ref_stack.empty() and ref_stack.back()) - { - // remove discarded value - if (ref_stack.back()->is_object()) - { - for (auto it = ref_stack.back()->begin(); it != ref_stack.back()->end(); ++it) - { - if (it->is_discarded()) - { - ref_stack.back()->erase(it); - break; - } - } - } - } - - return true; - } - - bool start_array(std::size_t len) - { - const bool keep = callback(static_cast<int>(ref_stack.size()), parse_event_t::array_start, discarded); - keep_stack.push_back(keep); - - auto val = handle_value(BasicJsonType::value_t::array, true); - ref_stack.push_back(val.second); - - // check array limit - if (ref_stack.back()) - { - if (JSON_UNLIKELY(len != std::size_t(-1) and len > ref_stack.back()->max_size())) - { - JSON_THROW(out_of_range::create(408, - "excessive array size: " + std::to_string(len))); - } - } - - return true; - } - - bool end_array() - { - bool keep = true; - - if (ref_stack.back()) - { - keep = callback(static_cast<int>(ref_stack.size()) - 1, parse_event_t::array_end, *ref_stack.back()); - if (not keep) - { - // discard array - *ref_stack.back() = discarded; - } - } - - assert(not ref_stack.empty()); - assert(not keep_stack.empty()); - ref_stack.pop_back(); - keep_stack.pop_back(); - - // remove discarded value - if (not keep and not ref_stack.empty()) - { - if (ref_stack.back()->is_array()) - { - ref_stack.back()->m_value.array->pop_back(); - } - } - - return true; - } - - bool parse_error(std::size_t /*unused*/, const std::string& /*unused*/, - const detail::exception& ex) - { - errored = true; - if (allow_exceptions) - { - // determine the proper exception type from the id - switch ((ex.id / 100) % 100) - { - case 1: - JSON_THROW(*reinterpret_cast<const detail::parse_error*>(&ex)); - case 4: - JSON_THROW(*reinterpret_cast<const detail::out_of_range*>(&ex)); - // LCOV_EXCL_START - case 2: - JSON_THROW(*reinterpret_cast<const detail::invalid_iterator*>(&ex)); - case 3: - JSON_THROW(*reinterpret_cast<const detail::type_error*>(&ex)); - case 5: - JSON_THROW(*reinterpret_cast<const detail::other_error*>(&ex)); - default: - assert(false); - // LCOV_EXCL_STOP - } - } - return false; - } - - constexpr bool is_errored() const - { - return errored; - } - - private: - /*! - @param[in] v value to add to the JSON value we build during parsing - @param[in] skip_callback whether we should skip calling the callback - function; this is required after start_array() and - start_object() SAX events, because otherwise we would call the - callback function with an empty array or object, respectively. - - @invariant If the ref stack is empty, then the passed value will be the new - root. - @invariant If the ref stack contains a value, then it is an array or an - object to which we can add elements - - @return pair of boolean (whether value should be kept) and pointer (to the - passed value in the ref_stack hierarchy; nullptr if not kept) - */ - template<typename Value> - std::pair<bool, BasicJsonType*> handle_value(Value&& v, const bool skip_callback = false) - { - assert(not keep_stack.empty()); - - // do not handle this value if we know it would be added to a discarded - // container - if (not keep_stack.back()) - { - return {false, nullptr}; - } - - // create value - auto value = BasicJsonType(std::forward<Value>(v)); - - // check callback - const bool keep = skip_callback or callback(static_cast<int>(ref_stack.size()), parse_event_t::value, value); - - // do not handle this value if we just learnt it shall be discarded - if (not keep) - { - return {false, nullptr}; - } - - if (ref_stack.empty()) - { - root = std::move(value); - return {true, &root}; - } - - // skip this value if we already decided to skip the parent - // (https://github.com/nlohmann/json/issues/971#issuecomment-413678360) - if (not ref_stack.back()) - { - return {false, nullptr}; - } - - // we now only expect arrays and objects - assert(ref_stack.back()->is_array() or ref_stack.back()->is_object()); - - if (ref_stack.back()->is_array()) - { - ref_stack.back()->m_value.array->push_back(std::move(value)); - return {true, &(ref_stack.back()->m_value.array->back())}; - } - else - { - // check if we should store an element for the current key - assert(not key_keep_stack.empty()); - const bool store_element = key_keep_stack.back(); - key_keep_stack.pop_back(); - - if (not store_element) - { - return {false, nullptr}; - } - - assert(object_element); - *object_element = std::move(value); - return {true, object_element}; - } - } - - /// the parsed JSON value - BasicJsonType& root; - /// stack to model hierarchy of values - std::vector<BasicJsonType*> ref_stack; - /// stack to manage which values to keep - std::vector<bool> keep_stack; - /// stack to manage which object keys to keep - std::vector<bool> key_keep_stack; - /// helper to hold the reference for the next object element - BasicJsonType* object_element = nullptr; - /// whether a syntax error occurred - bool errored = false; - /// callback function - const parser_callback_t callback = nullptr; - /// whether to throw exceptions in case of errors - const bool allow_exceptions = true; - /// a discarded value for the callback - BasicJsonType discarded = BasicJsonType::value_t::discarded; -}; - -template<typename BasicJsonType> -class json_sax_acceptor -{ - public: - using number_integer_t = typename BasicJsonType::number_integer_t; - using number_unsigned_t = typename BasicJsonType::number_unsigned_t; - using number_float_t = typename BasicJsonType::number_float_t; - using string_t = typename BasicJsonType::string_t; - - bool null() - { - return true; - } - - bool boolean(bool /*unused*/) - { - return true; - } - - bool number_integer(number_integer_t /*unused*/) - { - return true; - } - - bool number_unsigned(number_unsigned_t /*unused*/) - { - return true; - } - - bool number_float(number_float_t /*unused*/, const string_t& /*unused*/) - { - return true; - } - - bool string(string_t& /*unused*/) - { - return true; - } - - bool start_object(std::size_t /*unused*/ = std::size_t(-1)) - { - return true; - } - - bool key(string_t& /*unused*/) - { - return true; - } - - bool end_object() - { - return true; - } - - bool start_array(std::size_t /*unused*/ = std::size_t(-1)) - { - return true; - } - - bool end_array() - { - return true; - } - - bool parse_error(std::size_t /*unused*/, const std::string& /*unused*/, const detail::exception& /*unused*/) - { - return false; - } -}; -} // namespace detail - -} // namespace nlohmann diff --git a/include/lib/modernjson/detail/input/lexer.hpp b/include/lib/modernjson/detail/input/lexer.hpp deleted file mode 100644 index b61e289..0000000 --- a/include/lib/modernjson/detail/input/lexer.hpp +++ /dev/null @@ -1,1506 +0,0 @@ -#pragma once - -#include <clocale> // localeconv -#include <cstddef> // size_t -#include <cstdlib> // strtof, strtod, strtold, strtoll, strtoull -#include <cstdio> // snprintf -#include <initializer_list> // initializer_list -#include <string> // char_traits, string -#include <vector> // vector - -#include <lib/modernjson/detail/macro_scope.hpp> -#include <lib/modernjson/detail/input/input_adapters.hpp> -#include <lib/modernjson/detail/input/position_t.hpp> - -namespace nlohmann -{ -namespace detail -{ -/////////// -// lexer // -/////////// - -/*! -@brief lexical analysis - -This class organizes the lexical analysis during JSON deserialization. -*/ -template<typename BasicJsonType> -class lexer -{ - using number_integer_t = typename BasicJsonType::number_integer_t; - using number_unsigned_t = typename BasicJsonType::number_unsigned_t; - using number_float_t = typename BasicJsonType::number_float_t; - using string_t = typename BasicJsonType::string_t; - - public: - /// token types for the parser - enum class token_type - { - uninitialized, ///< indicating the scanner is uninitialized - literal_true, ///< the `true` literal - literal_false, ///< the `false` literal - literal_null, ///< the `null` literal - value_string, ///< a string -- use get_string() for actual value - value_unsigned, ///< an unsigned integer -- use get_number_unsigned() for actual value - value_integer, ///< a signed integer -- use get_number_integer() for actual value - value_float, ///< an floating point number -- use get_number_float() for actual value - begin_array, ///< the character for array begin `[` - begin_object, ///< the character for object begin `{` - end_array, ///< the character for array end `]` - end_object, ///< the character for object end `}` - name_separator, ///< the name separator `:` - value_separator, ///< the value separator `,` - parse_error, ///< indicating a parse error - end_of_input, ///< indicating the end of the input buffer - literal_or_value ///< a literal or the begin of a value (only for diagnostics) - }; - - /// return name of values of type token_type (only used for errors) - static const char* token_type_name(const token_type t) noexcept - { - switch (t) - { - case token_type::uninitialized: - return "<uninitialized>"; - case token_type::literal_true: - return "true literal"; - case token_type::literal_false: - return "false literal"; - case token_type::literal_null: - return "null literal"; - case token_type::value_string: - return "string literal"; - case lexer::token_type::value_unsigned: - case lexer::token_type::value_integer: - case lexer::token_type::value_float: - return "number literal"; - case token_type::begin_array: - return "'['"; - case token_type::begin_object: - return "'{'"; - case token_type::end_array: - return "']'"; - case token_type::end_object: - return "'}'"; - case token_type::name_separator: - return "':'"; - case token_type::value_separator: - return "','"; - case token_type::parse_error: - return "<parse error>"; - case token_type::end_of_input: - return "end of input"; - case token_type::literal_or_value: - return "'[', '{', or a literal"; - // LCOV_EXCL_START - default: // catch non-enum values - return "unknown token"; - // LCOV_EXCL_STOP - } - } - - explicit lexer(detail::input_adapter_t&& adapter) - : ia(std::move(adapter)), decimal_point_char(get_decimal_point()) {} - - // delete because of pointer members - lexer(const lexer&) = delete; - lexer(lexer&&) = delete; - lexer& operator=(lexer&) = delete; - lexer& operator=(lexer&&) = delete; - ~lexer() = default; - - private: - ///////////////////// - // locales - ///////////////////// - - /// return the locale-dependent decimal point - static char get_decimal_point() noexcept - { - const auto loc = localeconv(); - assert(loc != nullptr); - return (loc->decimal_point == nullptr) ? '.' : *(loc->decimal_point); - } - - ///////////////////// - // scan functions - ///////////////////// - - /*! - @brief get codepoint from 4 hex characters following `\u` - - For input "\u c1 c2 c3 c4" the codepoint is: - (c1 * 0x1000) + (c2 * 0x0100) + (c3 * 0x0010) + c4 - = (c1 << 12) + (c2 << 8) + (c3 << 4) + (c4 << 0) - - Furthermore, the possible characters '0'..'9', 'A'..'F', and 'a'..'f' - must be converted to the integers 0x0..0x9, 0xA..0xF, 0xA..0xF, resp. The - conversion is done by subtracting the offset (0x30, 0x37, and 0x57) - between the ASCII value of the character and the desired integer value. - - @return codepoint (0x0000..0xFFFF) or -1 in case of an error (e.g. EOF or - non-hex character) - */ - int get_codepoint() - { - // this function only makes sense after reading `\u` - assert(current == 'u'); - int codepoint = 0; - - const auto factors = { 12, 8, 4, 0 }; - for (const auto factor : factors) - { - get(); - - if (current >= '0' and current <= '9') - { - codepoint += ((current - 0x30) << factor); - } - else if (current >= 'A' and current <= 'F') - { - codepoint += ((current - 0x37) << factor); - } - else if (current >= 'a' and current <= 'f') - { - codepoint += ((current - 0x57) << factor); - } - else - { - return -1; - } - } - - assert(0x0000 <= codepoint and codepoint <= 0xFFFF); - return codepoint; - } - - /*! - @brief check if the next byte(s) are inside a given range - - Adds the current byte and, for each passed range, reads a new byte and - checks if it is inside the range. If a violation was detected, set up an - error message and return false. Otherwise, return true. - - @param[in] ranges list of integers; interpreted as list of pairs of - inclusive lower and upper bound, respectively - - @pre The passed list @a ranges must have 2, 4, or 6 elements; that is, - 1, 2, or 3 pairs. This precondition is enforced by an assertion. - - @return true if and only if no range violation was detected - */ - bool next_byte_in_range(std::initializer_list<int> ranges) - { - assert(ranges.size() == 2 or ranges.size() == 4 or ranges.size() == 6); - add(current); - - for (auto range = ranges.begin(); range != ranges.end(); ++range) - { - get(); - if (JSON_LIKELY(*range <= current and current <= *(++range))) - { - add(current); - } - else - { - error_message = "invalid string: ill-formed UTF-8 byte"; - return false; - } - } - - return true; - } - - /*! - @brief scan a string literal - - This function scans a string according to Sect. 7 of RFC 7159. While - scanning, bytes are escaped and copied into buffer token_buffer. Then the - function returns successfully, token_buffer is *not* null-terminated (as it - may contain \0 bytes), and token_buffer.size() is the number of bytes in the - string. - - @return token_type::value_string if string could be successfully scanned, - token_type::parse_error otherwise - - @note In case of errors, variable error_message contains a textual - description. - */ - token_type scan_string() - { - // reset token_buffer (ignore opening quote) - reset(); - - // we entered the function by reading an open quote - assert(current == '\"'); - - while (true) - { - // get next character - switch (get()) - { - // end of file while parsing string - case std::char_traits<char>::eof(): - { - error_message = "invalid string: missing closing quote"; - return token_type::parse_error; - } - - // closing quote - case '\"': - { - return token_type::value_string; - } - - // escapes - case '\\': - { - switch (get()) - { - // quotation mark - case '\"': - add('\"'); - break; - // reverse solidus - case '\\': - add('\\'); - break; - // solidus - case '/': - add('/'); - break; - // backspace - case 'b': - add('\b'); - break; - // form feed - case 'f': - add('\f'); - break; - // line feed - case 'n': - add('\n'); - break; - // carriage return - case 'r': - add('\r'); - break; - // tab - case 't': - add('\t'); - break; - - // unicode escapes - case 'u': - { - const int codepoint1 = get_codepoint(); - int codepoint = codepoint1; // start with codepoint1 - - if (JSON_UNLIKELY(codepoint1 == -1)) - { - error_message = "invalid string: '\\u' must be followed by 4 hex digits"; - return token_type::parse_error; - } - - // check if code point is a high surrogate - if (0xD800 <= codepoint1 and codepoint1 <= 0xDBFF) - { - // expect next \uxxxx entry - if (JSON_LIKELY(get() == '\\' and get() == 'u')) - { - const int codepoint2 = get_codepoint(); - - if (JSON_UNLIKELY(codepoint2 == -1)) - { - error_message = "invalid string: '\\u' must be followed by 4 hex digits"; - return token_type::parse_error; - } - - // check if codepoint2 is a low surrogate - if (JSON_LIKELY(0xDC00 <= codepoint2 and codepoint2 <= 0xDFFF)) - { - // overwrite codepoint - codepoint = - // high surrogate occupies the most significant 22 bits - (codepoint1 << 10) - // low surrogate occupies the least significant 15 bits - + codepoint2 - // there is still the 0xD800, 0xDC00 and 0x10000 noise - // in the result so we have to subtract with: - // (0xD800 << 10) + DC00 - 0x10000 = 0x35FDC00 - - 0x35FDC00; - } - else - { - error_message = "invalid string: surrogate U+DC00..U+DFFF must be followed by U+DC00..U+DFFF"; - return token_type::parse_error; - } - } - else - { - error_message = "invalid string: surrogate U+DC00..U+DFFF must be followed by U+DC00..U+DFFF"; - return token_type::parse_error; - } - } - else - { - if (JSON_UNLIKELY(0xDC00 <= codepoint1 and codepoint1 <= 0xDFFF)) - { - error_message = "invalid string: surrogate U+DC00..U+DFFF must follow U+D800..U+DBFF"; - return token_type::parse_error; - } - } - - // result of the above calculation yields a proper codepoint - assert(0x00 <= codepoint and codepoint <= 0x10FFFF); - - // translate codepoint into bytes - if (codepoint < 0x80) - { - // 1-byte characters: 0xxxxxxx (ASCII) - add(codepoint); - } - else if (codepoint <= 0x7FF) - { - // 2-byte characters: 110xxxxx 10xxxxxx - add(0xC0 | (codepoint >> 6)); - add(0x80 | (codepoint & 0x3F)); - } - else if (codepoint <= 0xFFFF) - { - // 3-byte characters: 1110xxxx 10xxxxxx 10xxxxxx - add(0xE0 | (codepoint >> 12)); - add(0x80 | ((codepoint >> 6) & 0x3F)); - add(0x80 | (codepoint & 0x3F)); - } - else - { - // 4-byte characters: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx - add(0xF0 | (codepoint >> 18)); - add(0x80 | ((codepoint >> 12) & 0x3F)); - add(0x80 | ((codepoint >> 6) & 0x3F)); - add(0x80 | (codepoint & 0x3F)); - } - - break; - } - - // other characters after escape - default: - error_message = "invalid string: forbidden character after backslash"; - return token_type::parse_error; - } - - break; - } - - // invalid control characters - case 0x00: - { - error_message = "invalid string: control character U+0000 (NUL) must be escaped to \\u0000"; - return token_type::parse_error; - } - - case 0x01: - { - error_message = "invalid string: control character U+0001 (SOH) must be escaped to \\u0001"; - return token_type::parse_error; - } - - case 0x02: - { - error_message = "invalid string: control character U+0002 (STX) must be escaped to \\u0002"; - return token_type::parse_error; - } - - case 0x03: - { - error_message = "invalid string: control character U+0003 (ETX) must be escaped to \\u0003"; - return token_type::parse_error; - } - - case 0x04: - { - error_message = "invalid string: control character U+0004 (EOT) must be escaped to \\u0004"; - return token_type::parse_error; - } - - case 0x05: - { - error_message = "invalid string: control character U+0005 (ENQ) must be escaped to \\u0005"; - return token_type::parse_error; - } - - case 0x06: - { - error_message = "invalid string: control character U+0006 (ACK) must be escaped to \\u0006"; - return token_type::parse_error; - } - - case 0x07: - { - error_message = "invalid string: control character U+0007 (BEL) must be escaped to \\u0007"; - return token_type::parse_error; - } - - case 0x08: - { - error_message = "invalid string: control character U+0008 (BS) must be escaped to \\u0008 or \\b"; - return token_type::parse_error; - } - - case 0x09: - { - error_message = "invalid string: control character U+0009 (HT) must be escaped to \\u0009 or \\t"; - return token_type::parse_error; - } - - case 0x0A: - { - error_message = "invalid string: control character U+000A (LF) must be escaped to \\u000A or \\n"; - return token_type::parse_error; - } - - case 0x0B: - { - error_message = "invalid string: control character U+000B (VT) must be escaped to \\u000B"; - return token_type::parse_error; - } - - case 0x0C: - { - error_message = "invalid string: control character U+000C (FF) must be escaped to \\u000C or \\f"; - return token_type::parse_error; - } - - case 0x0D: - { - error_message = "invalid string: control character U+000D (CR) must be escaped to \\u000D or \\r"; - return token_type::parse_error; - } - - case 0x0E: - { - error_message = "invalid string: control character U+000E (SO) must be escaped to \\u000E"; - return token_type::parse_error; - } - - case 0x0F: - { - error_message = "invalid string: control character U+000F (SI) must be escaped to \\u000F"; - return token_type::parse_error; - } - - case 0x10: - { - error_message = "invalid string: control character U+0010 (DLE) must be escaped to \\u0010"; - return token_type::parse_error; - } - - case 0x11: - { - error_message = "invalid string: control character U+0011 (DC1) must be escaped to \\u0011"; - return token_type::parse_error; - } - - case 0x12: - { - error_message = "invalid string: control character U+0012 (DC2) must be escaped to \\u0012"; - return token_type::parse_error; - } - - case 0x13: - { - error_message = "invalid string: control character U+0013 (DC3) must be escaped to \\u0013"; - return token_type::parse_error; - } - - case 0x14: - { - error_message = "invalid string: control character U+0014 (DC4) must be escaped to \\u0014"; - return token_type::parse_error; - } - - case 0x15: - { - error_message = "invalid string: control character U+0015 (NAK) must be escaped to \\u0015"; - return token_type::parse_error; - } - - case 0x16: - { - error_message = "invalid string: control character U+0016 (SYN) must be escaped to \\u0016"; - return token_type::parse_error; - } - - case 0x17: - { - error_message = "invalid string: control character U+0017 (ETB) must be escaped to \\u0017"; - return token_type::parse_error; - } - - case 0x18: - { - error_message = "invalid string: control character U+0018 (CAN) must be escaped to \\u0018"; - return token_type::parse_error; - } - - case 0x19: - { - error_message = "invalid string: control character U+0019 (EM) must be escaped to \\u0019"; - return token_type::parse_error; - } - - case 0x1A: - { - error_message = "invalid string: control character U+001A (SUB) must be escaped to \\u001A"; - return token_type::parse_error; - } - - case 0x1B: - { - error_message = "invalid string: control character U+001B (ESC) must be escaped to \\u001B"; - return token_type::parse_error; - } - - case 0x1C: - { - error_message = "invalid string: control character U+001C (FS) must be escaped to \\u001C"; - return token_type::parse_error; - } - - case 0x1D: - { - error_message = "invalid string: control character U+001D (GS) must be escaped to \\u001D"; - return token_type::parse_error; - } - - case 0x1E: - { - error_message = "invalid string: control character U+001E (RS) must be escaped to \\u001E"; - return token_type::parse_error; - } - - case 0x1F: - { - error_message = "invalid string: control character U+001F (US) must be escaped to \\u001F"; - return token_type::parse_error; - } - - // U+0020..U+007F (except U+0022 (quote) and U+005C (backspace)) - case 0x20: - case 0x21: - case 0x23: - case 0x24: - case 0x25: - case 0x26: - case 0x27: - case 0x28: - case 0x29: - case 0x2A: - case 0x2B: - case 0x2C: - case 0x2D: - case 0x2E: - case 0x2F: - case 0x30: - case 0x31: - case 0x32: - case 0x33: - case 0x34: - case 0x35: - case 0x36: - case 0x37: - case 0x38: - case 0x39: - case 0x3A: - case 0x3B: - case 0x3C: - case 0x3D: - case 0x3E: - case 0x3F: - case 0x40: - case 0x41: - case 0x42: - case 0x43: - case 0x44: - case 0x45: - case 0x46: - case 0x47: - case 0x48: - case 0x49: - case 0x4A: - case 0x4B: - case 0x4C: - case 0x4D: - case 0x4E: - case 0x4F: - case 0x50: - case 0x51: - case 0x52: - case 0x53: - case 0x54: - case 0x55: - case 0x56: - case 0x57: - case 0x58: - case 0x59: - case 0x5A: - case 0x5B: - case 0x5D: - case 0x5E: - case 0x5F: - case 0x60: - case 0x61: - case 0x62: - case 0x63: - case 0x64: - case 0x65: - case 0x66: - case 0x67: - case 0x68: - case 0x69: - case 0x6A: - case 0x6B: - case 0x6C: - case 0x6D: - case 0x6E: - case 0x6F: - case 0x70: - case 0x71: - case 0x72: - case 0x73: - case 0x74: - case 0x75: - case 0x76: - case 0x77: - case 0x78: - case 0x79: - case 0x7A: - case 0x7B: - case 0x7C: - case 0x7D: - case 0x7E: - case 0x7F: - { - add(current); - break; - } - - // U+0080..U+07FF: bytes C2..DF 80..BF - case 0xC2: - case 0xC3: - case 0xC4: - case 0xC5: - case 0xC6: - case 0xC7: - case 0xC8: - case 0xC9: - case 0xCA: - case 0xCB: - case 0xCC: - case 0xCD: - case 0xCE: - case 0xCF: - case 0xD0: - case 0xD1: - case 0xD2: - case 0xD3: - case 0xD4: - case 0xD5: - case 0xD6: - case 0xD7: - case 0xD8: - case 0xD9: - case 0xDA: - case 0xDB: - case 0xDC: - case 0xDD: - case 0xDE: - case 0xDF: - { - if (JSON_UNLIKELY(not next_byte_in_range({0x80, 0xBF}))) - { - return token_type::parse_error; - } - break; - } - - // U+0800..U+0FFF: bytes E0 A0..BF 80..BF - case 0xE0: - { - if (JSON_UNLIKELY(not (next_byte_in_range({0xA0, 0xBF, 0x80, 0xBF})))) - { - return token_type::parse_error; - } - break; - } - - // U+1000..U+CFFF: bytes E1..EC 80..BF 80..BF - // U+E000..U+FFFF: bytes EE..EF 80..BF 80..BF - case 0xE1: - case 0xE2: - case 0xE3: - case 0xE4: - case 0xE5: - case 0xE6: - case 0xE7: - case 0xE8: - case 0xE9: - case 0xEA: - case 0xEB: - case 0xEC: - case 0xEE: - case 0xEF: - { - if (JSON_UNLIKELY(not (next_byte_in_range({0x80, 0xBF, 0x80, 0xBF})))) - { - return token_type::parse_error; - } - break; - } - - // U+D000..U+D7FF: bytes ED 80..9F 80..BF - case 0xED: - { - if (JSON_UNLIKELY(not (next_byte_in_range({0x80, 0x9F, 0x80, 0xBF})))) - { - return token_type::parse_error; - } - break; - } - - // U+10000..U+3FFFF F0 90..BF 80..BF 80..BF - case 0xF0: - { - if (JSON_UNLIKELY(not (next_byte_in_range({0x90, 0xBF, 0x80, 0xBF, 0x80, 0xBF})))) - { - return token_type::parse_error; - } - break; - } - - // U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF - case 0xF1: - case 0xF2: - case 0xF3: - { - if (JSON_UNLIKELY(not (next_byte_in_range({0x80, 0xBF, 0x80, 0xBF, 0x80, 0xBF})))) - { - return token_type::parse_error; - } - break; - } - - // U+100000..U+10FFFF F4 80..8F 80..BF 80..BF - case 0xF4: - { - if (JSON_UNLIKELY(not (next_byte_in_range({0x80, 0x8F, 0x80, 0xBF, 0x80, 0xBF})))) - { - return token_type::parse_error; - } - break; - } - - // remaining bytes (80..C1 and F5..FF) are ill-formed - default: - { - error_message = "invalid string: ill-formed UTF-8 byte"; - return token_type::parse_error; - } - } - } - } - - static void strtof(float& f, const char* str, char** endptr) noexcept - { - f = std::strtof(str, endptr); - } - - static void strtof(double& f, const char* str, char** endptr) noexcept - { - f = std::strtod(str, endptr); - } - - static void strtof(long double& f, const char* str, char** endptr) noexcept - { - f = std::strtold(str, endptr); - } - - /*! - @brief scan a number literal - - This function scans a string according to Sect. 6 of RFC 7159. - - The function is realized with a deterministic finite state machine derived - from the grammar described in RFC 7159. Starting in state "init", the - input is read and used to determined the next state. Only state "done" - accepts the number. State "error" is a trap state to model errors. In the - table below, "anything" means any character but the ones listed before. - - state | 0 | 1-9 | e E | + | - | . | anything - ---------|----------|----------|----------|---------|---------|----------|----------- - init | zero | any1 | [error] | [error] | minus | [error] | [error] - minus | zero | any1 | [error] | [error] | [error] | [error] | [error] - zero | done | done | exponent | done | done | decimal1 | done - any1 | any1 | any1 | exponent | done | done | decimal1 | done - decimal1 | decimal2 | [error] | [error] | [error] | [error] | [error] | [error] - decimal2 | decimal2 | decimal2 | exponent | done | done | done | done - exponent | any2 | any2 | [error] | sign | sign | [error] | [error] - sign | any2 | any2 | [error] | [error] | [error] | [error] | [error] - any2 | any2 | any2 | done | done | done | done | done - - The state machine is realized with one label per state (prefixed with - "scan_number_") and `goto` statements between them. The state machine - contains cycles, but any cycle can be left when EOF is read. Therefore, - the function is guaranteed to terminate. - - During scanning, the read bytes are stored in token_buffer. This string is - then converted to a signed integer, an unsigned integer, or a - floating-point number. - - @return token_type::value_unsigned, token_type::value_integer, or - token_type::value_float if number could be successfully scanned, - token_type::parse_error otherwise - - @note The scanner is independent of the current locale. Internally, the - locale's decimal point is used instead of `.` to work with the - locale-dependent converters. - */ - token_type scan_number() // lgtm [cpp/use-of-goto] - { - // reset token_buffer to store the number's bytes - reset(); - - // the type of the parsed number; initially set to unsigned; will be - // changed if minus sign, decimal point or exponent is read - token_type number_type = token_type::value_unsigned; - - // state (init): we just found out we need to scan a number - switch (current) - { - case '-': - { - add(current); - goto scan_number_minus; - } - - case '0': - { - add(current); - goto scan_number_zero; - } - - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - { - add(current); - goto scan_number_any1; - } - - // LCOV_EXCL_START - default: - { - // all other characters are rejected outside scan_number() - assert(false); - } - // LCOV_EXCL_STOP - } - -scan_number_minus: - // state: we just parsed a leading minus sign - number_type = token_type::value_integer; - switch (get()) - { - case '0': - { - add(current); - goto scan_number_zero; - } - - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - { - add(current); - goto scan_number_any1; - } - - default: - { - error_message = "invalid number; expected digit after '-'"; - return token_type::parse_error; - } - } - -scan_number_zero: - // state: we just parse a zero (maybe with a leading minus sign) - switch (get()) - { - case '.': - { - add(decimal_point_char); - goto scan_number_decimal1; - } - - case 'e': - case 'E': - { - add(current); - goto scan_number_exponent; - } - - default: - goto scan_number_done; - } - -scan_number_any1: - // state: we just parsed a number 0-9 (maybe with a leading minus sign) - switch (get()) - { - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - { - add(current); - goto scan_number_any1; - } - - case '.': - { - add(decimal_point_char); - goto scan_number_decimal1; - } - - case 'e': - case 'E': - { - add(current); - goto scan_number_exponent; - } - - default: - goto scan_number_done; - } - -scan_number_decimal1: - // state: we just parsed a decimal point - number_type = token_type::value_float; - switch (get()) - { - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - { - add(current); - goto scan_number_decimal2; - } - - default: - { - error_message = "invalid number; expected digit after '.'"; - return token_type::parse_error; - } - } - -scan_number_decimal2: - // we just parsed at least one number after a decimal point - switch (get()) - { - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - { - add(current); - goto scan_number_decimal2; - } - - case 'e': - case 'E': - { - add(current); - goto scan_number_exponent; - } - - default: - goto scan_number_done; - } - -scan_number_exponent: - // we just parsed an exponent - number_type = token_type::value_float; - switch (get()) - { - case '+': - case '-': - { - add(current); - goto scan_number_sign; - } - - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - { - add(current); - goto scan_number_any2; - } - - default: - { - error_message = - "invalid number; expected '+', '-', or digit after exponent"; - return token_type::parse_error; - } - } - -scan_number_sign: - // we just parsed an exponent sign - switch (get()) - { - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - { - add(current); - goto scan_number_any2; - } - - default: - { - error_message = "invalid number; expected digit after exponent sign"; - return token_type::parse_error; - } - } - -scan_number_any2: - // we just parsed a number after the exponent or exponent sign - switch (get()) - { - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - { - add(current); - goto scan_number_any2; - } - - default: - goto scan_number_done; - } - -scan_number_done: - // unget the character after the number (we only read it to know that - // we are done scanning a number) - unget(); - - char* endptr = nullptr; - errno = 0; - - // try to parse integers first and fall back to floats - if (number_type == token_type::value_unsigned) - { - const auto x = std::strtoull(token_buffer.data(), &endptr, 10); - - // we checked the number format before - assert(endptr == token_buffer.data() + token_buffer.size()); - - if (errno == 0) - { - value_unsigned = static_cast<number_unsigned_t>(x); - if (value_unsigned == x) - { - return token_type::value_unsigned; - } - } - } - else if (number_type == token_type::value_integer) - { - const auto x = std::strtoll(token_buffer.data(), &endptr, 10); - - // we checked the number format before - assert(endptr == token_buffer.data() + token_buffer.size()); - - if (errno == 0) - { - value_integer = static_cast<number_integer_t>(x); - if (value_integer == x) - { - return token_type::value_integer; - } - } - } - - // this code is reached if we parse a floating-point number or if an - // integer conversion above failed - strtof(value_float, token_buffer.data(), &endptr); - - // we checked the number format before - assert(endptr == token_buffer.data() + token_buffer.size()); - - return token_type::value_float; - } - - /*! - @param[in] literal_text the literal text to expect - @param[in] length the length of the passed literal text - @param[in] return_type the token type to return on success - */ - token_type scan_literal(const char* literal_text, const std::size_t length, - token_type return_type) - { - assert(current == literal_text[0]); - for (std::size_t i = 1; i < length; ++i) - { - if (JSON_UNLIKELY(get() != literal_text[i])) - { - error_message = "invalid literal"; - return token_type::parse_error; - } - } - return return_type; - } - - ///////////////////// - // input management - ///////////////////// - - /// reset token_buffer; current character is beginning of token - void reset() noexcept - { - token_buffer.clear(); - token_string.clear(); - token_string.push_back(std::char_traits<char>::to_char_type(current)); - } - - /* - @brief get next character from the input - - This function provides the interface to the used input adapter. It does - not throw in case the input reached EOF, but returns a - `std::char_traits<char>::eof()` in that case. Stores the scanned characters - for use in error messages. - - @return character read from the input - */ - std::char_traits<char>::int_type get() - { - ++position.chars_read_total; - ++position.chars_read_current_line; - - if (next_unget) - { - // just reset the next_unget variable and work with current - next_unget = false; - } - else - { - current = ia->get_character(); - } - - if (JSON_LIKELY(current != std::char_traits<char>::eof())) - { - token_string.push_back(std::char_traits<char>::to_char_type(current)); - } - - if (current == '\n') - { - ++position.lines_read; - ++position.chars_read_current_line = 0; - } - - return current; - } - - /*! - @brief unget current character (read it again on next get) - - We implement unget by setting variable next_unget to true. The input is not - changed - we just simulate ungetting by modifying chars_read_total, - chars_read_current_line, and token_string. The next call to get() will - behave as if the unget character is read again. - */ - void unget() - { - next_unget = true; - - --position.chars_read_total; - - // in case we "unget" a newline, we have to also decrement the lines_read - if (position.chars_read_current_line == 0) - { - if (position.lines_read > 0) - { - --position.lines_read; - } - } - else - { - --position.chars_read_current_line; - } - - if (JSON_LIKELY(current != std::char_traits<char>::eof())) - { - assert(token_string.size() != 0); - token_string.pop_back(); - } - } - - /// add a character to token_buffer - void add(int c) - { - token_buffer.push_back(std::char_traits<char>::to_char_type(c)); - } - - public: - ///////////////////// - // value getters - ///////////////////// - - /// return integer value - constexpr number_integer_t get_number_integer() const noexcept - { - return value_integer; - } - - /// return unsigned integer value - constexpr number_unsigned_t get_number_unsigned() const noexcept - { - return value_unsigned; - } - - /// return floating-point value - constexpr number_float_t get_number_float() const noexcept - { - return value_float; - } - - /// return current string value (implicitly resets the token; useful only once) - string_t& get_string() - { - return token_buffer; - } - - ///////////////////// - // diagnostics - ///////////////////// - - /// return position of last read token - constexpr position_t get_position() const noexcept - { - return position; - } - - /// return the last read token (for errors only). Will never contain EOF - /// (an arbitrary value that is not a valid char value, often -1), because - /// 255 may legitimately occur. May contain NUL, which should be escaped. - std::string get_token_string() const - { - // escape control characters - std::string result; - for (const auto c : token_string) - { - if ('\x00' <= c and c <= '\x1F') - { - // escape control characters - char cs[9]; - (std::snprintf)(cs, 9, "<U+%.4X>", static_cast<unsigned char>(c)); - result += cs; - } - else - { - // add character as is - result.push_back(c); - } - } - - return result; - } - - /// return syntax error message - constexpr const char* get_error_message() const noexcept - { - return error_message; - } - - ///////////////////// - // actual scanner - ///////////////////// - - /*! - @brief skip the UTF-8 byte order mark - @return true iff there is no BOM or the correct BOM has been skipped - */ - bool skip_bom() - { - if (get() == 0xEF) - { - // check if we completely parse the BOM - return get() == 0xBB and get() == 0xBF; - } - - // the first character is not the beginning of the BOM; unget it to - // process is later - unget(); - return true; - } - - token_type scan() - { - // initially, skip the BOM - if (position.chars_read_total == 0 and not skip_bom()) - { - error_message = "invalid BOM; must be 0xEF 0xBB 0xBF if given"; - return token_type::parse_error; - } - - // read next character and ignore whitespace - do - { - get(); - } - while (current == ' ' or current == '\t' or current == '\n' or current == '\r'); - - switch (current) - { - // structural characters - case '[': - return token_type::begin_array; - case ']': - return token_type::end_array; - case '{': - return token_type::begin_object; - case '}': - return token_type::end_object; - case ':': - return token_type::name_separator; - case ',': - return token_type::value_separator; - - // literals - case 't': - return scan_literal("true", 4, token_type::literal_true); - case 'f': - return scan_literal("false", 5, token_type::literal_false); - case 'n': - return scan_literal("null", 4, token_type::literal_null); - - // string - case '\"': - return scan_string(); - - // number - case '-': - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - return scan_number(); - - // end of input (the null byte is needed when parsing from - // string literals) - case '\0': - case std::char_traits<char>::eof(): - return token_type::end_of_input; - - // error - default: - error_message = "invalid literal"; - return token_type::parse_error; - } - } - - private: - /// input adapter - detail::input_adapter_t ia = nullptr; - - /// the current character - std::char_traits<char>::int_type current = std::char_traits<char>::eof(); - - /// whether the next get() call should just return current - bool next_unget = false; - - /// the start position of the current token - position_t position; - - /// raw input token string (for error messages) - std::vector<char> token_string {}; - - /// buffer for variable-length tokens (numbers, strings) - string_t token_buffer {}; - - /// a description of occurred lexer errors - const char* error_message = ""; - - // number values - number_integer_t value_integer = 0; - number_unsigned_t value_unsigned = 0; - number_float_t value_float = 0; - - /// the decimal point - const char decimal_point_char = '.'; -}; -} // namespace detail -} // namespace nlohmann diff --git a/include/lib/modernjson/detail/input/parser.hpp b/include/lib/modernjson/detail/input/parser.hpp deleted file mode 100644 index d205bbb..0000000 --- a/include/lib/modernjson/detail/input/parser.hpp +++ /dev/null @@ -1,504 +0,0 @@ -#pragma once - -#include <cassert> // assert -#include <cmath> // isfinite -#include <cstdint> // uint8_t -#include <functional> // function -#include <string> // string -#include <utility> // move - -#include <lib/modernjson/detail/exceptions.hpp> -#include <lib/modernjson/detail/macro_scope.hpp> -#include <lib/modernjson/detail/meta/is_sax.hpp> -#include <lib/modernjson/detail/input/input_adapters.hpp> -#include <lib/modernjson/detail/input/json_sax.hpp> -#include <lib/modernjson/detail/input/lexer.hpp> -#include <lib/modernjson/detail/value_t.hpp> - -namespace nlohmann -{ -namespace detail -{ -//////////// -// parser // -//////////// - -/*! -@brief syntax analysis - -This class implements a recursive decent parser. -*/ -template<typename BasicJsonType> -class parser -{ - using number_integer_t = typename BasicJsonType::number_integer_t; - using number_unsigned_t = typename BasicJsonType::number_unsigned_t; - using number_float_t = typename BasicJsonType::number_float_t; - using string_t = typename BasicJsonType::string_t; - using lexer_t = lexer<BasicJsonType>; - using token_type = typename lexer_t::token_type; - - public: - enum class parse_event_t : uint8_t - { - /// the parser read `{` and started to process a JSON object - object_start, - /// the parser read `}` and finished processing a JSON object - object_end, - /// the parser read `[` and started to process a JSON array - array_start, - /// the parser read `]` and finished processing a JSON array - array_end, - /// the parser read a key of a value in an object - key, - /// the parser finished reading a JSON value - value - }; - - using parser_callback_t = - std::function<bool(int depth, parse_event_t event, BasicJsonType& parsed)>; - - /// a parser reading from an input adapter - explicit parser(detail::input_adapter_t&& adapter, - const parser_callback_t cb = nullptr, - const bool allow_exceptions_ = true) - : callback(cb), m_lexer(std::move(adapter)), allow_exceptions(allow_exceptions_) - { - // read first token - get_token(); - } - - /*! - @brief public parser interface - - @param[in] strict whether to expect the last token to be EOF - @param[in,out] result parsed JSON value - - @throw parse_error.101 in case of an unexpected token - @throw parse_error.102 if to_unicode fails or surrogate error - @throw parse_error.103 if to_unicode fails - */ - void parse(const bool strict, BasicJsonType& result) - { - if (callback) - { - json_sax_dom_callback_parser<BasicJsonType> sdp(result, callback, allow_exceptions); - sax_parse_internal(&sdp); - result.assert_invariant(); - - // in strict mode, input must be completely read - if (strict and (get_token() != token_type::end_of_input)) - { - sdp.parse_error(m_lexer.get_position(), - m_lexer.get_token_string(), - parse_error::create(101, m_lexer.get_position(), - exception_message(token_type::end_of_input, "value"))); - } - - // in case of an error, return discarded value - if (sdp.is_errored()) - { - result = value_t::discarded; - return; - } - - // set top-level value to null if it was discarded by the callback - // function - if (result.is_discarded()) - { - result = nullptr; - } - } - else - { - json_sax_dom_parser<BasicJsonType> sdp(result, allow_exceptions); - sax_parse_internal(&sdp); - result.assert_invariant(); - - // in strict mode, input must be completely read - if (strict and (get_token() != token_type::end_of_input)) - { - sdp.parse_error(m_lexer.get_position(), - m_lexer.get_token_string(), - parse_error::create(101, m_lexer.get_position(), - exception_message(token_type::end_of_input, "value"))); - } - - // in case of an error, return discarded value - if (sdp.is_errored()) - { - result = value_t::discarded; - return; - } - } - } - - /*! - @brief public accept interface - - @param[in] strict whether to expect the last token to be EOF - @return whether the input is a proper JSON text - */ - bool accept(const bool strict = true) - { - json_sax_acceptor<BasicJsonType> sax_acceptor; - return sax_parse(&sax_acceptor, strict); - } - - template <typename SAX> - bool sax_parse(SAX* sax, const bool strict = true) - { - (void)detail::is_sax_static_asserts<SAX, BasicJsonType> {}; - const bool result = sax_parse_internal(sax); - - // strict mode: next byte must be EOF - if (result and strict and (get_token() != token_type::end_of_input)) - { - return sax->parse_error(m_lexer.get_position(), - m_lexer.get_token_string(), - parse_error::create(101, m_lexer.get_position(), - exception_message(token_type::end_of_input, "value"))); - } - - return result; - } - - private: - template <typename SAX> - bool sax_parse_internal(SAX* sax) - { - // stack to remember the hierarchy of structured values we are parsing - // true = array; false = object - std::vector<bool> states; - // value to avoid a goto (see comment where set to true) - bool skip_to_state_evaluation = false; - - while (true) - { - if (not skip_to_state_evaluation) - { - // invariant: get_token() was called before each iteration - switch (last_token) - { - case token_type::begin_object: - { - if (JSON_UNLIKELY(not sax->start_object(std::size_t(-1)))) - { - return false; - } - - // closing } -> we are done - if (get_token() == token_type::end_object) - { - if (JSON_UNLIKELY(not sax->end_object())) - { - return false; - } - break; - } - - // parse key - if (JSON_UNLIKELY(last_token != token_type::value_string)) - { - return sax->parse_error(m_lexer.get_position(), - m_lexer.get_token_string(), - parse_error::create(101, m_lexer.get_position(), - exception_message(token_type::value_string, "object key"))); - } - if (JSON_UNLIKELY(not sax->key(m_lexer.get_string()))) - { - return false; - } - - // parse separator (:) - if (JSON_UNLIKELY(get_token() != token_type::name_separator)) - { - return sax->parse_error(m_lexer.get_position(), - m_lexer.get_token_string(), - parse_error::create(101, m_lexer.get_position(), - exception_message(token_type::name_separator, "object separator"))); - } - - // remember we are now inside an object - states.push_back(false); - - // parse values - get_token(); - continue; - } - - case token_type::begin_array: - { - if (JSON_UNLIKELY(not sax->start_array(std::size_t(-1)))) - { - return false; - } - - // closing ] -> we are done - if (get_token() == token_type::end_array) - { - if (JSON_UNLIKELY(not sax->end_array())) - { - return false; - } - break; - } - - // remember we are now inside an array - states.push_back(true); - - // parse values (no need to call get_token) - continue; - } - - case token_type::value_float: - { - const auto res = m_lexer.get_number_float(); - - if (JSON_UNLIKELY(not std::isfinite(res))) - { - return sax->parse_error(m_lexer.get_position(), - m_lexer.get_token_string(), - out_of_range::create(406, "number overflow parsing '" + m_lexer.get_token_string() + "'")); - } - else - { - if (JSON_UNLIKELY(not sax->number_float(res, m_lexer.get_string()))) - { - return false; - } - break; - } - } - - case token_type::literal_false: - { - if (JSON_UNLIKELY(not sax->boolean(false))) - { - return false; - } - break; - } - - case token_type::literal_null: - { - if (JSON_UNLIKELY(not sax->null())) - { - return false; - } - break; - } - - case token_type::literal_true: - { - if (JSON_UNLIKELY(not sax->boolean(true))) - { - return false; - } - break; - } - - case token_type::value_integer: - { - if (JSON_UNLIKELY(not sax->number_integer(m_lexer.get_number_integer()))) - { - return false; - } - break; - } - - case token_type::value_string: - { - if (JSON_UNLIKELY(not sax->string(m_lexer.get_string()))) - { - return false; - } - break; - } - - case token_type::value_unsigned: - { - if (JSON_UNLIKELY(not sax->number_unsigned(m_lexer.get_number_unsigned()))) - { - return false; - } - break; - } - - case token_type::parse_error: - { - // using "uninitialized" to avoid "expected" message - return sax->parse_error(m_lexer.get_position(), - m_lexer.get_token_string(), - parse_error::create(101, m_lexer.get_position(), - exception_message(token_type::uninitialized, "value"))); - } - - default: // the last token was unexpected - { - return sax->parse_error(m_lexer.get_position(), - m_lexer.get_token_string(), - parse_error::create(101, m_lexer.get_position(), - exception_message(token_type::literal_or_value, "value"))); - } - } - } - else - { - skip_to_state_evaluation = false; - } - - // we reached this line after we successfully parsed a value - if (states.empty()) - { - // empty stack: we reached the end of the hierarchy: done - return true; - } - else - { - if (states.back()) // array - { - // comma -> next value - if (get_token() == token_type::value_separator) - { - // parse a new value - get_token(); - continue; - } - - // closing ] - if (JSON_LIKELY(last_token == token_type::end_array)) - { - if (JSON_UNLIKELY(not sax->end_array())) - { - return false; - } - - // We are done with this array. Before we can parse a - // new value, we need to evaluate the new state first. - // By setting skip_to_state_evaluation to false, we - // are effectively jumping to the beginning of this if. - assert(not states.empty()); - states.pop_back(); - skip_to_state_evaluation = true; - continue; - } - else - { - return sax->parse_error(m_lexer.get_position(), - m_lexer.get_token_string(), - parse_error::create(101, m_lexer.get_position(), - exception_message(token_type::end_array, "array"))); - } - } - else // object - { - // comma -> next value - if (get_token() == token_type::value_separator) - { - // parse key - if (JSON_UNLIKELY(get_token() != token_type::value_string)) - { - return sax->parse_error(m_lexer.get_position(), - m_lexer.get_token_string(), - parse_error::create(101, m_lexer.get_position(), - exception_message(token_type::value_string, "object key"))); - } - else - { - if (JSON_UNLIKELY(not sax->key(m_lexer.get_string()))) - { - return false; - } - } - - // parse separator (:) - if (JSON_UNLIKELY(get_token() != token_type::name_separator)) - { - return sax->parse_error(m_lexer.get_position(), - m_lexer.get_token_string(), - parse_error::create(101, m_lexer.get_position(), - exception_message(token_type::name_separator, "object separator"))); - } - - // parse values - get_token(); - continue; - } - - // closing } - if (JSON_LIKELY(last_token == token_type::end_object)) - { - if (JSON_UNLIKELY(not sax->end_object())) - { - return false; - } - - // We are done with this object. Before we can parse a - // new value, we need to evaluate the new state first. - // By setting skip_to_state_evaluation to false, we - // are effectively jumping to the beginning of this if. - assert(not states.empty()); - states.pop_back(); - skip_to_state_evaluation = true; - continue; - } - else - { - return sax->parse_error(m_lexer.get_position(), - m_lexer.get_token_string(), - parse_error::create(101, m_lexer.get_position(), - exception_message(token_type::end_object, "object"))); - } - } - } - } - } - - /// get next token from lexer - token_type get_token() - { - return (last_token = m_lexer.scan()); - } - - std::string exception_message(const token_type expected, const std::string& context) - { - std::string error_msg = "syntax error "; - - if (not context.empty()) - { - error_msg += "while parsing " + context + " "; - } - - error_msg += "- "; - - if (last_token == token_type::parse_error) - { - error_msg += std::string(m_lexer.get_error_message()) + "; last read: '" + - m_lexer.get_token_string() + "'"; - } - else - { - error_msg += "unexpected " + std::string(lexer_t::token_type_name(last_token)); - } - - if (expected != token_type::uninitialized) - { - error_msg += "; expected " + std::string(lexer_t::token_type_name(expected)); - } - - return error_msg; - } - - private: - /// callback function - const parser_callback_t callback = nullptr; - /// the type of the last read token - token_type last_token = token_type::uninitialized; - /// the lexer - lexer_t m_lexer; - /// whether to throw exceptions in case of errors - const bool allow_exceptions = true; -}; -} // namespace detail -} // namespace nlohmann diff --git a/include/lib/modernjson/detail/input/position_t.hpp b/include/lib/modernjson/detail/input/position_t.hpp deleted file mode 100644 index 37f4ab1..0000000 --- a/include/lib/modernjson/detail/input/position_t.hpp +++ /dev/null @@ -1,27 +0,0 @@ -#pragma once - -#include <cstddef> // size_t - -namespace nlohmann -{ -namespace detail -{ -/// struct to capture the start position of the current token -struct position_t -{ - /// the total number of characters read - std::size_t chars_read_total = 0; - /// the number of characters read in the current line - std::size_t chars_read_current_line = 0; - /// the number of lines read - std::size_t lines_read = 0; - - /// conversion to size_t to preserve SAX interface - constexpr operator size_t() const - { - return chars_read_total; - } -}; - -} -} |