diff options
Diffstat (limited to 'include/lib/modernjson/detail/input/binary_reader.hpp')
-rw-r--r-- | include/lib/modernjson/detail/input/binary_reader.hpp | 1025 |
1 files changed, 656 insertions, 369 deletions
diff --git a/include/lib/modernjson/detail/input/binary_reader.hpp b/include/lib/modernjson/detail/input/binary_reader.hpp index 103cbf7..637569a 100644 --- a/include/lib/modernjson/detail/input/binary_reader.hpp +++ b/include/lib/modernjson/detail/input/binary_reader.hpp @@ -68,6 +68,10 @@ class binary_reader switch (format) { + case input_format_t::bson: + result = parse_bson_internal(); + break; + case input_format_t::cbor: result = parse_cbor_internal(); break; @@ -100,7 +104,8 @@ class binary_reader if (JSON_UNLIKELY(current != std::char_traits<char>::eof())) { - return sax->parse_error(chars_read, get_token_string(), parse_error::create(110, chars_read, "expected end of input")); + return sax->parse_error(chars_read, get_token_string(), + parse_error::create(110, chars_read, exception_message(format, "expected end of input; last byte: 0x" + get_token_string(), "value"))); } } @@ -120,6 +125,225 @@ class binary_reader } private: + ////////// + // BSON // + ////////// + + /*! + @brief Reads in a BSON-object and passes it to the SAX-parser. + @return whether a valid BSON-value was passed to the SAX parser + */ + bool parse_bson_internal() + { + std::int32_t document_size; + get_number<std::int32_t, true>(input_format_t::bson, document_size); + + if (JSON_UNLIKELY(not sax->start_object(std::size_t(-1)))) + { + return false; + } + + if (JSON_UNLIKELY(not parse_bson_element_list(/*is_array*/false))) + { + return false; + } + + return sax->end_object(); + } + + /*! + @brief Parses a C-style string from the BSON input. + @param[in, out] result A reference to the string variable where the read + string is to be stored. + @return `true` if the \x00-byte indicating the end of the string was + encountered before the EOF; false` indicates an unexpected EOF. + */ + bool get_bson_cstr(string_t& result) + { + auto out = std::back_inserter(result); + while (true) + { + get(); + if (JSON_UNLIKELY(not unexpect_eof(input_format_t::bson, "cstring"))) + { + return false; + } + if (current == 0x00) + { + return true; + } + *out++ = static_cast<char>(current); + } + + return true; + } + + /*! + @brief Parses a zero-terminated string of length @a len from the BSON + input. + @param[in] len The length (including the zero-byte at the end) of the + string to be read. + @param[in, out] result A reference to the string variable where the read + string is to be stored. + @tparam NumberType The type of the length @a len + @pre len >= 1 + @return `true` if the string was successfully parsed + */ + template<typename NumberType> + bool get_bson_string(const NumberType len, string_t& result) + { + if (JSON_UNLIKELY(len < 1)) + { + auto last_token = get_token_string(); + return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::bson, "string length must be at least 1, is " + std::to_string(len), "string"))); + } + + return get_string(input_format_t::bson, len - static_cast<NumberType>(1), result) and get() != std::char_traits<char>::eof(); + } + + /*! + @brief Read a BSON document element of the given @a element_type. + @param[in] element_type The BSON element type, c.f. http://bsonspec.org/spec.html + @param[in] element_type_parse_position The position in the input stream, + where the `element_type` was read. + @warning Not all BSON element types are supported yet. An unsupported + @a element_type will give rise to a parse_error.114: + Unsupported BSON record type 0x... + @return whether a valid BSON-object/array was passed to the SAX parser + */ + bool parse_bson_element_internal(const int element_type, + const std::size_t element_type_parse_position) + { + switch (element_type) + { + case 0x01: // double + { + double number; + return get_number<double, true>(input_format_t::bson, number) and sax->number_float(static_cast<number_float_t>(number), ""); + } + + case 0x02: // string + { + std::int32_t len; + string_t value; + return get_number<std::int32_t, true>(input_format_t::bson, len) and get_bson_string(len, value) and sax->string(value); + } + + case 0x03: // object + { + return parse_bson_internal(); + } + + case 0x04: // array + { + return parse_bson_array(); + } + + case 0x08: // boolean + { + return sax->boolean(get() != 0); + } + + case 0x0A: // null + { + return sax->null(); + } + + case 0x10: // int32 + { + std::int32_t value; + return get_number<std::int32_t, true>(input_format_t::bson, value) and sax->number_integer(value); + } + + case 0x12: // int64 + { + std::int64_t value; + return get_number<std::int64_t, true>(input_format_t::bson, value) and sax->number_integer(value); + } + + default: // anything else not supported (yet) + { + char cr[3]; + (std::snprintf)(cr, sizeof(cr), "%.2hhX", static_cast<unsigned char>(element_type)); + return sax->parse_error(element_type_parse_position, std::string(cr), parse_error::create(114, element_type_parse_position, "Unsupported BSON record type 0x" + std::string(cr))); + } + } + } + + /*! + @brief Read a BSON element list (as specified in the BSON-spec) + + The same binary layout is used for objects and arrays, hence it must be + indicated with the argument @a is_array which one is expected + (true --> array, false --> object). + + @param[in] is_array Determines if the element list being read is to be + treated as an object (@a is_array == false), or as an + array (@a is_array == true). + @return whether a valid BSON-object/array was passed to the SAX parser + */ + bool parse_bson_element_list(const bool is_array) + { + string_t key; + while (int element_type = get()) + { + if (JSON_UNLIKELY(not unexpect_eof(input_format_t::bson, "element list"))) + { + return false; + } + + const std::size_t element_type_parse_position = chars_read; + if (JSON_UNLIKELY(not get_bson_cstr(key))) + { + return false; + } + + if (not is_array) + { + if (not sax->key(key)) + { + return false; + } + } + + if (JSON_UNLIKELY(not parse_bson_element_internal(element_type, element_type_parse_position))) + { + return false; + } + + // get_bson_cstr only appends + key.clear(); + } + + return true; + } + + /*! + @brief Reads an array from the BSON input and passes it to the SAX-parser. + @return whether a valid BSON-array was passed to the SAX parser + */ + bool parse_bson_array() + { + std::int32_t document_size; + get_number<std::int32_t, true>(input_format_t::bson, document_size); + + if (JSON_UNLIKELY(not sax->start_array(std::size_t(-1)))) + { + return false; + } + + if (JSON_UNLIKELY(not parse_bson_element_list(/*is_array*/true))) + { + return false; + } + + return sax->end_array(); + } + + ////////// + // CBOR // + ////////// + /*! @param[in] get_char whether a new character should be retrieved from the input (true, default) or whether the last read @@ -133,7 +357,7 @@ class binary_reader { // EOF case std::char_traits<char>::eof(): - return unexpect_eof(); + return unexpect_eof(input_format_t::cbor, "value"); // Integer 0x00..0x17 (0..23) case 0x00: @@ -165,25 +389,25 @@ class binary_reader case 0x18: // Unsigned integer (one-byte uint8_t follows) { uint8_t number; - return get_number(number) and sax->number_unsigned(number); + return get_number(input_format_t::cbor, number) and sax->number_unsigned(number); } case 0x19: // Unsigned integer (two-byte uint16_t follows) { uint16_t number; - return get_number(number) and sax->number_unsigned(number); + return get_number(input_format_t::cbor, number) and sax->number_unsigned(number); } case 0x1A: // Unsigned integer (four-byte uint32_t follows) { uint32_t number; - return get_number(number) and sax->number_unsigned(number); + return get_number(input_format_t::cbor, number) and sax->number_unsigned(number); } case 0x1B: // Unsigned integer (eight-byte uint64_t follows) { uint64_t number; - return get_number(number) and sax->number_unsigned(number); + return get_number(input_format_t::cbor, number) and sax->number_unsigned(number); } // Negative integer -1-0x00..-1-0x17 (-1..-24) @@ -216,25 +440,25 @@ class binary_reader case 0x38: // Negative integer (one-byte uint8_t follows) { uint8_t number; - return get_number(number) and sax->number_integer(static_cast<number_integer_t>(-1) - number); + return get_number(input_format_t::cbor, number) and sax->number_integer(static_cast<number_integer_t>(-1) - number); } case 0x39: // Negative integer -1-n (two-byte uint16_t follows) { uint16_t number; - return get_number(number) and sax->number_integer(static_cast<number_integer_t>(-1) - number); + return get_number(input_format_t::cbor, number) and sax->number_integer(static_cast<number_integer_t>(-1) - number); } case 0x3A: // Negative integer -1-n (four-byte uint32_t follows) { uint32_t number; - return get_number(number) and sax->number_integer(static_cast<number_integer_t>(-1) - number); + return get_number(input_format_t::cbor, number) and sax->number_integer(static_cast<number_integer_t>(-1) - number); } case 0x3B: // Negative integer -1-n (eight-byte uint64_t follows) { uint64_t number; - return get_number(number) and sax->number_integer(static_cast<number_integer_t>(-1) + return get_number(input_format_t::cbor, number) and sax->number_integer(static_cast<number_integer_t>(-1) - static_cast<number_integer_t>(number)); } @@ -303,25 +527,25 @@ class binary_reader case 0x98: // array (one-byte uint8_t for n follows) { uint8_t len; - return get_number(len) and get_cbor_array(static_cast<std::size_t>(len)); + return get_number(input_format_t::cbor, len) and get_cbor_array(static_cast<std::size_t>(len)); } case 0x99: // array (two-byte uint16_t for n follow) { uint16_t len; - return get_number(len) and get_cbor_array(static_cast<std::size_t>(len)); + return get_number(input_format_t::cbor, len) and get_cbor_array(static_cast<std::size_t>(len)); } case 0x9A: // array (four-byte uint32_t for n follow) { uint32_t len; - return get_number(len) and get_cbor_array(static_cast<std::size_t>(len)); + return get_number(input_format_t::cbor, len) and get_cbor_array(static_cast<std::size_t>(len)); } case 0x9B: // array (eight-byte uint64_t for n follow) { uint64_t len; - return get_number(len) and get_cbor_array(static_cast<std::size_t>(len)); + return get_number(input_format_t::cbor, len) and get_cbor_array(static_cast<std::size_t>(len)); } case 0x9F: // array (indefinite length) @@ -357,25 +581,25 @@ class binary_reader case 0xB8: // map (one-byte uint8_t for n follows) { uint8_t len; - return get_number(len) and get_cbor_object(static_cast<std::size_t>(len)); + return get_number(input_format_t::cbor, len) and get_cbor_object(static_cast<std::size_t>(len)); } case 0xB9: // map (two-byte uint16_t for n follow) { uint16_t len; - return get_number(len) and get_cbor_object(static_cast<std::size_t>(len)); + return get_number(input_format_t::cbor, len) and get_cbor_object(static_cast<std::size_t>(len)); } case 0xBA: // map (four-byte uint32_t for n follow) { uint32_t len; - return get_number(len) and get_cbor_object(static_cast<std::size_t>(len)); + return get_number(input_format_t::cbor, len) and get_cbor_object(static_cast<std::size_t>(len)); } case 0xBB: // map (eight-byte uint64_t for n follow) { uint64_t len; - return get_number(len) and get_cbor_object(static_cast<std::size_t>(len)); + return get_number(input_format_t::cbor, len) and get_cbor_object(static_cast<std::size_t>(len)); } case 0xBF: // map (indefinite length) @@ -392,17 +616,20 @@ class binary_reader case 0xF9: // Half-Precision Float (two-byte IEEE 754) { - const int byte1 = get(); - if (JSON_UNLIKELY(not unexpect_eof())) + const int byte1_raw = get(); + if (JSON_UNLIKELY(not unexpect_eof(input_format_t::cbor, "number"))) { return false; } - const int byte2 = get(); - if (JSON_UNLIKELY(not unexpect_eof())) + const int byte2_raw = get(); + if (JSON_UNLIKELY(not unexpect_eof(input_format_t::cbor, "number"))) { return false; } + const auto byte1 = static_cast<unsigned char>(byte1_raw); + const auto byte2 = static_cast<unsigned char>(byte2_raw); + // code from RFC 7049, Appendix D, Figure 3: // As half-precision floating-point numbers were only added // to IEEE 754 in 2008, today's programming platforms often @@ -438,24 +665,209 @@ class binary_reader case 0xFA: // Single-Precision Float (four-byte IEEE 754) { float number; - return get_number(number) and sax->number_float(static_cast<number_float_t>(number), ""); + return get_number(input_format_t::cbor, number) and sax->number_float(static_cast<number_float_t>(number), ""); } case 0xFB: // Double-Precision Float (eight-byte IEEE 754) { double number; - return get_number(number) and sax->number_float(static_cast<number_float_t>(number), ""); + return get_number(input_format_t::cbor, number) and sax->number_float(static_cast<number_float_t>(number), ""); } default: // anything else (0xFF is handled inside the other types) { auto last_token = get_token_string(); - return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, "error reading CBOR; last byte: 0x" + last_token)); + return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::cbor, "invalid byte: 0x" + last_token, "value"))); + } + } + } + + /*! + @brief reads a CBOR string + + This function first reads starting bytes to determine the expected + string length and then copies this number of bytes into a string. + Additionally, CBOR's strings with indefinite lengths are supported. + + @param[out] result created string + + @return whether string creation completed + */ + bool get_cbor_string(string_t& result) + { + if (JSON_UNLIKELY(not unexpect_eof(input_format_t::cbor, "string"))) + { + return false; + } + + switch (current) + { + // UTF-8 string (0x00..0x17 bytes follow) + case 0x60: + case 0x61: + case 0x62: + case 0x63: + case 0x64: + case 0x65: + case 0x66: + case 0x67: + case 0x68: + case 0x69: + case 0x6A: + case 0x6B: + case 0x6C: + case 0x6D: + case 0x6E: + case 0x6F: + case 0x70: + case 0x71: + case 0x72: + case 0x73: + case 0x74: + case 0x75: + case 0x76: + case 0x77: + { + return get_string(input_format_t::cbor, current & 0x1F, result); + } + + case 0x78: // UTF-8 string (one-byte uint8_t for n follows) + { + uint8_t len; + return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); + } + + case 0x79: // UTF-8 string (two-byte uint16_t for n follow) + { + uint16_t len; + return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); + } + + case 0x7A: // UTF-8 string (four-byte uint32_t for n follow) + { + uint32_t len; + return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); + } + + case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow) + { + uint64_t len; + return get_number(input_format_t::cbor, len) and get_string(input_format_t::cbor, len, result); + } + + case 0x7F: // UTF-8 string (indefinite length) + { + while (get() != 0xFF) + { + string_t chunk; + if (not get_cbor_string(chunk)) + { + return false; + } + result.append(chunk); + } + return true; + } + + default: + { + auto last_token = get_token_string(); + return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::cbor, "expected length specification (0x60-0x7B) or indefinite string type (0x7F); last byte: 0x" + last_token, "string"))); } } } /*! + @param[in] len the length of the array or std::size_t(-1) for an + array of indefinite size + @return whether array creation completed + */ + bool get_cbor_array(const std::size_t len) + { + if (JSON_UNLIKELY(not sax->start_array(len))) + { + return false; + } + + if (len != std::size_t(-1)) + { + for (std::size_t i = 0; i < len; ++i) + { + if (JSON_UNLIKELY(not parse_cbor_internal())) + { + return false; + } + } + } + else + { + while (get() != 0xFF) + { + if (JSON_UNLIKELY(not parse_cbor_internal(false))) + { + return false; + } + } + } + + return sax->end_array(); + } + + /*! + @param[in] len the length of the object or std::size_t(-1) for an + object of indefinite size + @return whether object creation completed + */ + bool get_cbor_object(const std::size_t len) + { + if (not JSON_UNLIKELY(sax->start_object(len))) + { + return false; + } + + string_t key; + if (len != std::size_t(-1)) + { + for (std::size_t i = 0; i < len; ++i) + { + get(); + if (JSON_UNLIKELY(not get_cbor_string(key) or not sax->key(key))) + { + return false; + } + + if (JSON_UNLIKELY(not parse_cbor_internal())) + { + return false; + } + key.clear(); + } + } + else + { + while (get() != 0xFF) + { + if (JSON_UNLIKELY(not get_cbor_string(key) or not sax->key(key))) + { + return false; + } + + if (JSON_UNLIKELY(not parse_cbor_internal())) + { + return false; + } + key.clear(); + } + } + + return sax->end_object(); + } + + ///////////// + // MsgPack // + ///////////// + + /*! @return whether a valid MessagePack value was passed to the SAX parser */ bool parse_msgpack_internal() @@ -464,7 +876,7 @@ class binary_reader { // EOF case std::char_traits<char>::eof(): - return unexpect_eof(); + return unexpect_eof(input_format_t::msgpack, "value"); // positive fixint case 0x00: @@ -685,61 +1097,61 @@ class binary_reader case 0xCA: // float 32 { float number; - return get_number(number) and sax->number_float(static_cast<number_float_t>(number), ""); + return get_number(input_format_t::msgpack, number) and sax->number_float(static_cast<number_float_t>(number), ""); } case 0xCB: // float 64 { double number; - return get_number(number) and sax->number_float(static_cast<number_float_t>(number), ""); + return get_number(input_format_t::msgpack, number) and sax->number_float(static_cast<number_float_t>(number), ""); } case 0xCC: // uint 8 { uint8_t number; - return get_number(number) and sax->number_unsigned(number); + return get_number(input_format_t::msgpack, number) and sax->number_unsigned(number); } case 0xCD: // uint 16 { uint16_t number; - return get_number(number) and sax->number_unsigned(number); + return get_number(input_format_t::msgpack, number) and sax->number_unsigned(number); } case 0xCE: // uint 32 { uint32_t number; - return get_number(number) and sax->number_unsigned(number); + return get_number(input_format_t::msgpack, number) and sax->number_unsigned(number); } case 0xCF: // uint 64 { uint64_t number; - return get_number(number) and sax->number_unsigned(number); + return get_number(input_format_t::msgpack, number) and sax->number_unsigned(number); } case 0xD0: // int 8 { int8_t number; - return get_number(number) and sax->number_integer(number); + return get_number(input_format_t::msgpack, number) and sax->number_integer(number); } case 0xD1: // int 16 { int16_t number; - return get_number(number) and sax->number_integer(number); + return get_number(input_format_t::msgpack, number) and sax->number_integer(number); } case 0xD2: // int 32 { int32_t number; - return get_number(number) and sax->number_integer(number); + return get_number(input_format_t::msgpack, number) and sax->number_integer(number); } case 0xD3: // int 64 { int64_t number; - return get_number(number) and sax->number_integer(number); + return get_number(input_format_t::msgpack, number) and sax->number_integer(number); } case 0xD9: // str 8 @@ -753,25 +1165,25 @@ class binary_reader case 0xDC: // array 16 { uint16_t len; - return get_number(len) and get_msgpack_array(static_cast<std::size_t>(len)); + return get_number(input_format_t::msgpack, len) and get_msgpack_array(static_cast<std::size_t>(len)); } case 0xDD: // array 32 { uint32_t len; - return get_number(len) and get_msgpack_array(static_cast<std::size_t>(len)); + return get_number(input_format_t::msgpack, len) and get_msgpack_array(static_cast<std::size_t>(len)); } case 0xDE: // map 16 { uint16_t len; - return get_number(len) and get_msgpack_object(static_cast<std::size_t>(len)); + return get_number(input_format_t::msgpack, len) and get_msgpack_object(static_cast<std::size_t>(len)); } case 0xDF: // map 32 { uint32_t len; - return get_number(len) and get_msgpack_object(static_cast<std::size_t>(len)); + return get_number(input_format_t::msgpack, len) and get_msgpack_object(static_cast<std::size_t>(len)); } // negative fixint @@ -812,302 +1224,12 @@ class binary_reader default: // anything else { auto last_token = get_token_string(); - return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, "error reading MessagePack; last byte: 0x" + last_token)); + return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::msgpack, "invalid byte: 0x" + last_token, "value"))); } } } /*! - @param[in] get_char whether a new character should be retrieved from the - input (true, default) or whether the last read - character should be considered instead - - @return whether a valid UBJSON value was passed to the SAX parser - */ - bool parse_ubjson_internal(const bool get_char = true) - { - return get_ubjson_value(get_char ? get_ignore_noop() : current); - } - - /*! - @brief get next character from the input - - This function provides the interface to the used input adapter. It does - not throw in case the input reached EOF, but returns a -'ve valued - `std::char_traits<char>::eof()` in that case. - - @return character read from the input - */ - int get() - { - ++chars_read; - return (current = ia->get_character()); - } - - /*! - @return character read from the input after ignoring all 'N' entries - */ - int get_ignore_noop() - { - do - { - get(); - } - while (current == 'N'); - - return current; - } - - /* - @brief read a number from the input - - @tparam NumberType the type of the number - @param[out] result number of type @a NumberType - - @return whether conversion completed - - @note This function needs to respect the system's endianess, because - bytes in CBOR, MessagePack, and UBJSON are stored in network order - (big endian) and therefore need reordering on little endian systems. - */ - template<typename NumberType> - bool get_number(NumberType& result) - { - // step 1: read input into array with system's byte order - std::array<uint8_t, sizeof(NumberType)> vec; - for (std::size_t i = 0; i < sizeof(NumberType); ++i) - { - get(); - if (JSON_UNLIKELY(not unexpect_eof())) - { - return false; - } - - // reverse byte order prior to conversion if necessary - if (is_little_endian) - { - vec[sizeof(NumberType) - i - 1] = static_cast<uint8_t>(current); - } - else - { - vec[i] = static_cast<uint8_t>(current); // LCOV_EXCL_LINE - } - } - - // step 2: convert array into number of type T and return - std::memcpy(&result, vec.data(), sizeof(NumberType)); - return true; - } - - /*! - @brief create a string by reading characters from the input - - @tparam NumberType the type of the number - @param[in] len number of characters to read - @param[out] string created by reading @a len bytes - - @return whether string creation completed - - @note We can not reserve @a len bytes for the result, because @a len - may be too large. Usually, @ref unexpect_eof() detects the end of - the input before we run out of string memory. - */ - template<typename NumberType> - bool get_string(const NumberType len, string_t& result) - { - bool success = true; - std::generate_n(std::back_inserter(result), len, [this, &success]() - { - get(); - if (JSON_UNLIKELY(not unexpect_eof())) - { - success = false; - } - return static_cast<char>(current); - }); - return success; - } - - /*! - @brief reads a CBOR string - - This function first reads starting bytes to determine the expected - string length and then copies this number of bytes into a string. - Additionally, CBOR's strings with indefinite lengths are supported. - - @param[out] result created string - - @return whether string creation completed - */ - bool get_cbor_string(string_t& result) - { - if (JSON_UNLIKELY(not unexpect_eof())) - { - return false; - } - - switch (current) - { - // UTF-8 string (0x00..0x17 bytes follow) - case 0x60: - case 0x61: - case 0x62: - case 0x63: - case 0x64: - case 0x65: - case 0x66: - case 0x67: - case 0x68: - case 0x69: - case 0x6A: - case 0x6B: - case 0x6C: - case 0x6D: - case 0x6E: - case 0x6F: - case 0x70: - case 0x71: - case 0x72: - case 0x73: - case 0x74: - case 0x75: - case 0x76: - case 0x77: - { - return get_string(current & 0x1F, result); - } - - case 0x78: // UTF-8 string (one-byte uint8_t for n follows) - { - uint8_t len; - return get_number(len) and get_string(len, result); - } - - case 0x79: // UTF-8 string (two-byte uint16_t for n follow) - { - uint16_t len; - return get_number(len) and get_string(len, result); - } - - case 0x7A: // UTF-8 string (four-byte uint32_t for n follow) - { - uint32_t len; - return get_number(len) and get_string(len, result); - } - - case 0x7B: // UTF-8 string (eight-byte uint64_t for n follow) - { - uint64_t len; - return get_number(len) and get_string(len, result); - } - - case 0x7F: // UTF-8 string (indefinite length) - { - while (get() != 0xFF) - { - string_t chunk; - if (not get_cbor_string(chunk)) - { - return false; - } - result.append(chunk); - } - return true; - } - - default: - { - auto last_token = get_token_string(); - return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, "expected a CBOR string; last byte: 0x" + last_token)); - } - } - } - - /*! - @param[in] len the length of the array or std::size_t(-1) for an - array of indefinite size - @return whether array creation completed - */ - bool get_cbor_array(const std::size_t len) - { - if (JSON_UNLIKELY(not sax->start_array(len))) - { - return false; - } - - if (len != std::size_t(-1)) - for (std::size_t i = 0; i < len; ++i) - { - if (JSON_UNLIKELY(not parse_cbor_internal())) - { - return false; - } - } - else - { - while (get() != 0xFF) - { - if (JSON_UNLIKELY(not parse_cbor_internal(false))) - { - return false; - } - } - } - - return sax->end_array(); - } - - /*! - @param[in] len the length of the object or std::size_t(-1) for an - object of indefinite size - @return whether object creation completed - */ - bool get_cbor_object(const std::size_t len) - { - if (not JSON_UNLIKELY(sax->start_object(len))) - { - return false; - } - - string_t key; - if (len != std::size_t(-1)) - { - for (std::size_t i = 0; i < len; ++i) - { - get(); - if (JSON_UNLIKELY(not get_cbor_string(key) or not sax->key(key))) - { - return false; - } - - if (JSON_UNLIKELY(not parse_cbor_internal())) - { - return false; - } - key.clear(); - } - } - else - { - while (get() != 0xFF) - { - if (JSON_UNLIKELY(not get_cbor_string(key) or not sax->key(key))) - { - return false; - } - - if (JSON_UNLIKELY(not parse_cbor_internal())) - { - return false; - } - key.clear(); - } - } - - return sax->end_object(); - } - - /*! @brief reads a MessagePack string This function first reads starting bytes to determine the expected @@ -1119,7 +1241,7 @@ class binary_reader */ bool get_msgpack_string(string_t& result) { - if (JSON_UNLIKELY(not unexpect_eof())) + if (JSON_UNLIKELY(not unexpect_eof(input_format_t::msgpack, "string"))) { return false; } @@ -1160,31 +1282,31 @@ class binary_reader case 0xBE: case 0xBF: { - return get_string(current & 0x1F, result); + return get_string(input_format_t::msgpack, current & 0x1F, result); } case 0xD9: // str 8 { uint8_t len; - return get_number(len) and get_string(len, result); + return get_number(input_format_t::msgpack, len) and get_string(input_format_t::msgpack, len, result); } case 0xDA: // str 16 { uint16_t len; - return get_number(len) and get_string(len, result); + return get_number(input_format_t::msgpack, len) and get_string(input_format_t::msgpack, len, result); } case 0xDB: // str 32 { uint32_t len; - return get_number(len) and get_string(len, result); + return get_number(input_format_t::msgpack, len) and get_string(input_format_t::msgpack, len, result); } default: { auto last_token = get_token_string(); - return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, "expected a MessagePack string; last byte: 0x" + last_token)); + return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::msgpack, "expected length specification (0xA0-0xBF, 0xD9-0xDB); last byte: 0x" + last_token, "string"))); } } } @@ -1241,6 +1363,22 @@ class binary_reader return sax->end_object(); } + //////////// + // UBJSON // + //////////// + + /*! + @param[in] get_char whether a new character should be retrieved from the + input (true, default) or whether the last read + character should be considered instead + + @return whether a valid UBJSON value was passed to the SAX parser + */ + bool parse_ubjson_internal(const bool get_char = true) + { + return get_ubjson_value(get_char ? get_ignore_noop() : current); + } + /*! @brief reads a UBJSON string @@ -1262,7 +1400,7 @@ class binary_reader get(); // TODO: may we ignore N here? } - if (JSON_UNLIKELY(not unexpect_eof())) + if (JSON_UNLIKELY(not unexpect_eof(input_format_t::ubjson, "value"))) { return false; } @@ -1272,36 +1410,36 @@ class binary_reader case 'U': { uint8_t len; - return get_number(len) and get_string(len, result); + return get_number(input_format_t::ubjson, len) and get_string(input_format_t::ubjson, len, result); } case 'i': { int8_t len; - return get_number(len) and get_string(len, result); + return get_number(input_format_t::ubjson, len) and get_string(input_format_t::ubjson, len, result); } case 'I': { int16_t len; - return get_number(len) and get_string(len, result); + return get_number(input_format_t::ubjson, len) and get_string(input_format_t::ubjson, len, result); } case 'l': { int32_t len; - return get_number(len) and get_string(len, result); + return get_number(input_format_t::ubjson, len) and get_string(input_format_t::ubjson, len, result); } case 'L': { int64_t len; - return get_number(len) and get_string(len, result); + return get_number(input_format_t::ubjson, len) and get_string(input_format_t::ubjson, len, result); } default: auto last_token = get_token_string(); - return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, "expected a UBJSON string; last byte: 0x" + last_token)); + return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::ubjson, "expected length type specification (U, i, I, l, L); last byte: 0x" + last_token, "string"))); } } @@ -1316,7 +1454,7 @@ class binary_reader case 'U': { uint8_t number; - if (JSON_UNLIKELY(not get_number(number))) + if (JSON_UNLIKELY(not get_number(input_format_t::ubjson, number))) { return false; } @@ -1327,7 +1465,7 @@ class binary_reader case 'i': { int8_t number; - if (JSON_UNLIKELY(not get_number(number))) + if (JSON_UNLIKELY(not get_number(input_format_t::ubjson, number))) { return false; } @@ -1338,7 +1476,7 @@ class binary_reader case 'I': { int16_t number; - if (JSON_UNLIKELY(not get_number(number))) + if (JSON_UNLIKELY(not get_number(input_format_t::ubjson, number))) { return false; } @@ -1349,7 +1487,7 @@ class binary_reader case 'l': { int32_t number; - if (JSON_UNLIKELY(not get_number(number))) + if (JSON_UNLIKELY(not get_number(input_format_t::ubjson, number))) { return false; } @@ -1360,7 +1498,7 @@ class binary_reader case 'L': { int64_t number; - if (JSON_UNLIKELY(not get_number(number))) + if (JSON_UNLIKELY(not get_number(input_format_t::ubjson, number))) { return false; } @@ -1371,7 +1509,7 @@ class binary_reader default: { auto last_token = get_token_string(); - return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, "byte after '#' must denote a number type; last byte: 0x" + last_token)); + return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::ubjson, "expected length type specification (U, i, I, l, L) after '#'; last byte: 0x" + last_token, "size"))); } } } @@ -1396,7 +1534,7 @@ class binary_reader if (current == '$') { result.second = get(); // must not ignore 'N', because 'N' maybe the type - if (JSON_UNLIKELY(not unexpect_eof())) + if (JSON_UNLIKELY(not unexpect_eof(input_format_t::ubjson, "type"))) { return false; } @@ -1404,12 +1542,12 @@ class binary_reader get_ignore_noop(); if (JSON_UNLIKELY(current != '#')) { - if (JSON_UNLIKELY(not unexpect_eof())) + if (JSON_UNLIKELY(not unexpect_eof(input_format_t::ubjson, "value"))) { return false; } auto last_token = get_token_string(); - return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, "expected '#' after UBJSON type information; last byte: 0x" + last_token)); + return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::ubjson, "expected '#' after type information; last byte: 0x" + last_token, "size"))); } return get_ubjson_size_value(result.first); @@ -1430,7 +1568,7 @@ class binary_reader switch (prefix) { case std::char_traits<char>::eof(): // EOF - return unexpect_eof(); + return unexpect_eof(input_format_t::ubjson, "value"); case 'T': // true return sax->boolean(true); @@ -1443,56 +1581,56 @@ class binary_reader case 'U': { uint8_t number; - return get_number(number) and sax->number_unsigned(number); + return get_number(input_format_t::ubjson, number) and sax->number_unsigned(number); } case 'i': { int8_t number; - return get_number(number) and sax->number_integer(number); + return get_number(input_format_t::ubjson, number) and sax->number_integer(number); } case 'I': { int16_t number; - return get_number(number) and sax->number_integer(number); + return get_number(input_format_t::ubjson, number) and sax->number_integer(number); } case 'l': { int32_t number; - return get_number(number) and sax->number_integer(number); + return get_number(input_format_t::ubjson, number) and sax->number_integer(number); } case 'L': { int64_t number; - return get_number(number) and sax->number_integer(number); + return get_number(input_format_t::ubjson, number) and sax->number_integer(number); } case 'd': { float number; - return get_number(number) and sax->number_float(static_cast<number_float_t>(number), ""); + return get_number(input_format_t::ubjson, number) and sax->number_float(static_cast<number_float_t>(number), ""); } case 'D': { double number; - return get_number(number) and sax->number_float(static_cast<number_float_t>(number), ""); + return get_number(input_format_t::ubjson, number) and sax->number_float(static_cast<number_float_t>(number), ""); } case 'C': // char { get(); - if (JSON_UNLIKELY(not unexpect_eof())) + if (JSON_UNLIKELY(not unexpect_eof(input_format_t::ubjson, "char"))) { return false; } if (JSON_UNLIKELY(current > 127)) { auto last_token = get_token_string(); - return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, "byte after 'C' must be in range 0x00..0x7F; last byte: 0x" + last_token)); + return sax->parse_error(chars_read, last_token, parse_error::create(113, chars_read, exception_message(input_format_t::ubjson, "byte after 'C' must be in range 0x00..0x7F; last byte: 0x" + last_token, "char"))); } string_t s(1, static_cast<char>(current)); return sax->string(s); @@ -1513,7 +1651,7 @@ class binary_reader default: // anything else { auto last_token = get_token_string(); - return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, "error reading UBJSON; last byte: 0x" + last_token)); + return sax->parse_error(chars_read, last_token, parse_error::create(112, chars_read, exception_message(input_format_t::ubjson, "invalid byte: 0x" + last_token, "value"))); } } } @@ -1655,14 +1793,124 @@ class binary_reader return sax->end_object(); } + /////////////////////// + // Utility functions // + /////////////////////// + + /*! + @brief get next character from the input + + This function provides the interface to the used input adapter. It does + not throw in case the input reached EOF, but returns a -'ve valued + `std::char_traits<char>::eof()` in that case. + + @return character read from the input + */ + int get() + { + ++chars_read; + return (current = ia->get_character()); + } + + /*! + @return character read from the input after ignoring all 'N' entries + */ + int get_ignore_noop() + { + do + { + get(); + } + while (current == 'N'); + + return current; + } + + /* + @brief read a number from the input + + @tparam NumberType the type of the number + @param[in] format the current format (for diagnostics) + @param[out] result number of type @a NumberType + + @return whether conversion completed + + @note This function needs to respect the system's endianess, because + bytes in CBOR, MessagePack, and UBJSON are stored in network order + (big endian) and therefore need reordering on little endian systems. + */ + template<typename NumberType, bool InputIsLittleEndian = false> + bool get_number(const input_format_t format, NumberType& result) + { + // step 1: read input into array with system's byte order + std::array<uint8_t, sizeof(NumberType)> vec; + for (std::size_t i = 0; i < sizeof(NumberType); ++i) + { + get(); + if (JSON_UNLIKELY(not unexpect_eof(format, "number"))) + { + return false; + } + + // reverse byte order prior to conversion if necessary + if (is_little_endian && !InputIsLittleEndian) + { + vec[sizeof(NumberType) - i - 1] = static_cast<uint8_t>(current); + } + else + { + vec[i] = static_cast<uint8_t>(current); // LCOV_EXCL_LINE + } + } + + // step 2: convert array into number of type T and return + std::memcpy(&result, vec.data(), sizeof(NumberType)); + return true; + } + + /*! + @brief create a string by reading characters from the input + + @tparam NumberType the type of the number + @param[in] format the current format (for diagnostics) + @param[in] len number of characters to read + @param[out] result string created by reading @a len bytes + + @return whether string creation completed + + @note We can not reserve @a len bytes for the result, because @a len + may be too large. Usually, @ref unexpect_eof() detects the end of + the input before we run out of string memory. + */ + template<typename NumberType> + bool get_string(const input_format_t format, + const NumberType len, + string_t& result) + { + bool success = true; + std::generate_n(std::back_inserter(result), len, [this, &success, &format]() + { + get(); + if (JSON_UNLIKELY(not unexpect_eof(format, "string"))) + { + success = false; + } + return static_cast<char>(current); + }); + return success; + } + /*! + @param[in] format the current format (for diagnostics) + @param[in] context further context information (for diagnostics) @return whether the last read character is not EOF */ - bool unexpect_eof() const + bool unexpect_eof(const input_format_t format, const char* context) const { if (JSON_UNLIKELY(current == std::char_traits<char>::eof())) { - return sax->parse_error(chars_read, "<end of file>", parse_error::create(110, chars_read, "unexpected end of input")); + return sax->parse_error(chars_read, "<end of file>", + parse_error::create(110, chars_read, exception_message(format, "unexpected end of input", context))); } return true; } @@ -1673,10 +1921,49 @@ class binary_reader std::string get_token_string() const { char cr[3]; - snprintf(cr, 3, "%.2hhX", static_cast<unsigned char>(current)); + (std::snprintf)(cr, 3, "%.2hhX", static_cast<unsigned char>(current)); return std::string{cr}; } + /*! + @param[in] format the current format + @param[in] detail a detailed error message + @param[in] context further contect information + @return a message string to use in the parse_error exceptions + */ + std::string exception_message(const input_format_t format, + const std::string& detail, + const std::string& context) const + { + std::string error_msg = "syntax error while parsing "; + + switch (format) + { + case input_format_t::cbor: + error_msg += "CBOR"; + break; + + case input_format_t::msgpack: + error_msg += "MessagePack"; + break; + + case input_format_t::ubjson: + error_msg += "UBJSON"; + break; + + case input_format_t::bson: + error_msg += "BSON"; + break; + + // LCOV_EXCL_START + default: + assert(false); + // LCOV_EXCL_STOP + } + + return error_msg + " " + context + ": " + detail; + } + private: /// input adapter input_adapter_t ia = nullptr; @@ -1693,5 +1980,5 @@ class binary_reader /// the SAX parser json_sax_t* sax = nullptr; }; -} -} +} // namespace detail +} // namespace nlohmann |