From 1542f34f0e0fc53324f6fdc5905f4b77b252a789 Mon Sep 17 00:00:00 2001 From: Daniel Friesel Date: Mon, 26 Nov 2018 09:06:31 +0100 Subject: update nlohmann modernjson to v3.4 (with bson support) --- .../lib/modernjson/detail/output/binary_writer.hpp | 630 +++++++++++++++++---- .../modernjson/detail/output/output_adapters.hpp | 16 +- .../lib/modernjson/detail/output/serializer.hpp | 150 ++++- 3 files changed, 651 insertions(+), 145 deletions(-) (limited to 'include/lib/modernjson/detail/output') diff --git a/include/lib/modernjson/detail/output/binary_writer.hpp b/include/lib/modernjson/detail/output/binary_writer.hpp index 1726e0c..1020e69 100644 --- a/include/lib/modernjson/detail/output/binary_writer.hpp +++ b/include/lib/modernjson/detail/output/binary_writer.hpp @@ -23,6 +23,8 @@ namespace detail template class binary_writer { + using string_t = typename BasicJsonType::string_t; + public: /*! @brief create a binary writer @@ -35,7 +37,28 @@ class binary_writer } /*! - @brief[in] j JSON value to serialize + @param[in] j JSON value to serialize + @pre j.type() == value_t::object + */ + void write_bson(const BasicJsonType& j) + { + switch (j.type()) + { + case value_t::object: + { + write_bson_object(*j.m_value.object); + break; + } + + default: + { + JSON_THROW(type_error::create(317, "to serialize to BSON, top-level type must be object, but is " + std::string(j.type_name()))); + } + } + } + + /*! + @param[in] j JSON value to serialize */ void write_cbor(const BasicJsonType& j) { @@ -43,15 +66,15 @@ class binary_writer { case value_t::null: { - oa->write_character(static_cast(0xF6)); + oa->write_character(to_char_type(0xF6)); break; } case value_t::boolean: { oa->write_character(j.m_value.boolean - ? static_cast(0xF5) - : static_cast(0xF4)); + ? to_char_type(0xF5) + : to_char_type(0xF4)); break; } @@ -68,22 +91,22 @@ class binary_writer } else if (j.m_value.number_integer <= (std::numeric_limits::max)()) { - oa->write_character(static_cast(0x18)); + oa->write_character(to_char_type(0x18)); write_number(static_cast(j.m_value.number_integer)); } else if (j.m_value.number_integer <= (std::numeric_limits::max)()) { - oa->write_character(static_cast(0x19)); + oa->write_character(to_char_type(0x19)); write_number(static_cast(j.m_value.number_integer)); } else if (j.m_value.number_integer <= (std::numeric_limits::max)()) { - oa->write_character(static_cast(0x1A)); + oa->write_character(to_char_type(0x1A)); write_number(static_cast(j.m_value.number_integer)); } else { - oa->write_character(static_cast(0x1B)); + oa->write_character(to_char_type(0x1B)); write_number(static_cast(j.m_value.number_integer)); } } @@ -98,22 +121,22 @@ class binary_writer } else if (positive_number <= (std::numeric_limits::max)()) { - oa->write_character(static_cast(0x38)); + oa->write_character(to_char_type(0x38)); write_number(static_cast(positive_number)); } else if (positive_number <= (std::numeric_limits::max)()) { - oa->write_character(static_cast(0x39)); + oa->write_character(to_char_type(0x39)); write_number(static_cast(positive_number)); } else if (positive_number <= (std::numeric_limits::max)()) { - oa->write_character(static_cast(0x3A)); + oa->write_character(to_char_type(0x3A)); write_number(static_cast(positive_number)); } else { - oa->write_character(static_cast(0x3B)); + oa->write_character(to_char_type(0x3B)); write_number(static_cast(positive_number)); } } @@ -128,22 +151,22 @@ class binary_writer } else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) { - oa->write_character(static_cast(0x18)); + oa->write_character(to_char_type(0x18)); write_number(static_cast(j.m_value.number_unsigned)); } else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) { - oa->write_character(static_cast(0x19)); + oa->write_character(to_char_type(0x19)); write_number(static_cast(j.m_value.number_unsigned)); } else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) { - oa->write_character(static_cast(0x1A)); + oa->write_character(to_char_type(0x1A)); write_number(static_cast(j.m_value.number_unsigned)); } else { - oa->write_character(static_cast(0x1B)); + oa->write_character(to_char_type(0x1B)); write_number(static_cast(j.m_value.number_unsigned)); } break; @@ -166,23 +189,23 @@ class binary_writer } else if (N <= (std::numeric_limits::max)()) { - oa->write_character(static_cast(0x78)); + oa->write_character(to_char_type(0x78)); write_number(static_cast(N)); } else if (N <= (std::numeric_limits::max)()) { - oa->write_character(static_cast(0x79)); + oa->write_character(to_char_type(0x79)); write_number(static_cast(N)); } else if (N <= (std::numeric_limits::max)()) { - oa->write_character(static_cast(0x7A)); + oa->write_character(to_char_type(0x7A)); write_number(static_cast(N)); } // LCOV_EXCL_START else if (N <= (std::numeric_limits::max)()) { - oa->write_character(static_cast(0x7B)); + oa->write_character(to_char_type(0x7B)); write_number(static_cast(N)); } // LCOV_EXCL_STOP @@ -204,23 +227,23 @@ class binary_writer } else if (N <= (std::numeric_limits::max)()) { - oa->write_character(static_cast(0x98)); + oa->write_character(to_char_type(0x98)); write_number(static_cast(N)); } else if (N <= (std::numeric_limits::max)()) { - oa->write_character(static_cast(0x99)); + oa->write_character(to_char_type(0x99)); write_number(static_cast(N)); } else if (N <= (std::numeric_limits::max)()) { - oa->write_character(static_cast(0x9A)); + oa->write_character(to_char_type(0x9A)); write_number(static_cast(N)); } // LCOV_EXCL_START else if (N <= (std::numeric_limits::max)()) { - oa->write_character(static_cast(0x9B)); + oa->write_character(to_char_type(0x9B)); write_number(static_cast(N)); } // LCOV_EXCL_STOP @@ -243,23 +266,23 @@ class binary_writer } else if (N <= (std::numeric_limits::max)()) { - oa->write_character(static_cast(0xB8)); + oa->write_character(to_char_type(0xB8)); write_number(static_cast(N)); } else if (N <= (std::numeric_limits::max)()) { - oa->write_character(static_cast(0xB9)); + oa->write_character(to_char_type(0xB9)); write_number(static_cast(N)); } else if (N <= (std::numeric_limits::max)()) { - oa->write_character(static_cast(0xBA)); + oa->write_character(to_char_type(0xBA)); write_number(static_cast(N)); } // LCOV_EXCL_START else if (N <= (std::numeric_limits::max)()) { - oa->write_character(static_cast(0xBB)); + oa->write_character(to_char_type(0xBB)); write_number(static_cast(N)); } // LCOV_EXCL_STOP @@ -279,7 +302,7 @@ class binary_writer } /*! - @brief[in] j JSON value to serialize + @param[in] j JSON value to serialize */ void write_msgpack(const BasicJsonType& j) { @@ -287,15 +310,15 @@ class binary_writer { case value_t::null: // nil { - oa->write_character(static_cast(0xC0)); + oa->write_character(to_char_type(0xC0)); break; } case value_t::boolean: // true and false { oa->write_character(j.m_value.boolean - ? static_cast(0xC3) - : static_cast(0xC2)); + ? to_char_type(0xC3) + : to_char_type(0xC2)); break; } @@ -314,25 +337,25 @@ class binary_writer else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) { // uint 8 - oa->write_character(static_cast(0xCC)); + oa->write_character(to_char_type(0xCC)); write_number(static_cast(j.m_value.number_integer)); } else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) { // uint 16 - oa->write_character(static_cast(0xCD)); + oa->write_character(to_char_type(0xCD)); write_number(static_cast(j.m_value.number_integer)); } else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) { // uint 32 - oa->write_character(static_cast(0xCE)); + oa->write_character(to_char_type(0xCE)); write_number(static_cast(j.m_value.number_integer)); } else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) { // uint 64 - oa->write_character(static_cast(0xCF)); + oa->write_character(to_char_type(0xCF)); write_number(static_cast(j.m_value.number_integer)); } } @@ -347,28 +370,28 @@ class binary_writer j.m_value.number_integer <= (std::numeric_limits::max)()) { // int 8 - oa->write_character(static_cast(0xD0)); + oa->write_character(to_char_type(0xD0)); write_number(static_cast(j.m_value.number_integer)); } else if (j.m_value.number_integer >= (std::numeric_limits::min)() and j.m_value.number_integer <= (std::numeric_limits::max)()) { // int 16 - oa->write_character(static_cast(0xD1)); + oa->write_character(to_char_type(0xD1)); write_number(static_cast(j.m_value.number_integer)); } else if (j.m_value.number_integer >= (std::numeric_limits::min)() and j.m_value.number_integer <= (std::numeric_limits::max)()) { // int 32 - oa->write_character(static_cast(0xD2)); + oa->write_character(to_char_type(0xD2)); write_number(static_cast(j.m_value.number_integer)); } else if (j.m_value.number_integer >= (std::numeric_limits::min)() and j.m_value.number_integer <= (std::numeric_limits::max)()) { // int 64 - oa->write_character(static_cast(0xD3)); + oa->write_character(to_char_type(0xD3)); write_number(static_cast(j.m_value.number_integer)); } } @@ -385,25 +408,25 @@ class binary_writer else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) { // uint 8 - oa->write_character(static_cast(0xCC)); + oa->write_character(to_char_type(0xCC)); write_number(static_cast(j.m_value.number_integer)); } else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) { // uint 16 - oa->write_character(static_cast(0xCD)); + oa->write_character(to_char_type(0xCD)); write_number(static_cast(j.m_value.number_integer)); } else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) { // uint 32 - oa->write_character(static_cast(0xCE)); + oa->write_character(to_char_type(0xCE)); write_number(static_cast(j.m_value.number_integer)); } else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) { // uint 64 - oa->write_character(static_cast(0xCF)); + oa->write_character(to_char_type(0xCF)); write_number(static_cast(j.m_value.number_integer)); } break; @@ -428,19 +451,19 @@ class binary_writer else if (N <= (std::numeric_limits::max)()) { // str 8 - oa->write_character(static_cast(0xD9)); + oa->write_character(to_char_type(0xD9)); write_number(static_cast(N)); } else if (N <= (std::numeric_limits::max)()) { // str 16 - oa->write_character(static_cast(0xDA)); + oa->write_character(to_char_type(0xDA)); write_number(static_cast(N)); } else if (N <= (std::numeric_limits::max)()) { // str 32 - oa->write_character(static_cast(0xDB)); + oa->write_character(to_char_type(0xDB)); write_number(static_cast(N)); } @@ -463,13 +486,13 @@ class binary_writer else if (N <= (std::numeric_limits::max)()) { // array 16 - oa->write_character(static_cast(0xDC)); + oa->write_character(to_char_type(0xDC)); write_number(static_cast(N)); } else if (N <= (std::numeric_limits::max)()) { // array 32 - oa->write_character(static_cast(0xDD)); + oa->write_character(to_char_type(0xDD)); write_number(static_cast(N)); } @@ -493,13 +516,13 @@ class binary_writer else if (N <= (std::numeric_limits::max)()) { // map 16 - oa->write_character(static_cast(0xDE)); + oa->write_character(to_char_type(0xDE)); write_number(static_cast(N)); } else if (N <= (std::numeric_limits::max)()) { // map 32 - oa->write_character(static_cast(0xDF)); + oa->write_character(to_char_type(0xDF)); write_number(static_cast(N)); } @@ -532,7 +555,7 @@ class binary_writer { if (add_prefix) { - oa->write_character(static_cast('Z')); + oa->write_character(to_char_type('Z')); } break; } @@ -540,9 +563,11 @@ class binary_writer case value_t::boolean: { if (add_prefix) + { oa->write_character(j.m_value.boolean - ? static_cast('T') - : static_cast('F')); + ? to_char_type('T') + : to_char_type('F')); + } break; } @@ -568,7 +593,7 @@ class binary_writer { if (add_prefix) { - oa->write_character(static_cast('S')); + oa->write_character(to_char_type('S')); } write_number_with_ubjson_prefix(j.m_value.string->size(), true); oa->write_characters( @@ -581,7 +606,7 @@ class binary_writer { if (add_prefix) { - oa->write_character(static_cast('[')); + oa->write_character(to_char_type('[')); } bool prefix_required = true; @@ -598,14 +623,14 @@ class binary_writer if (same_prefix) { prefix_required = false; - oa->write_character(static_cast('$')); + oa->write_character(to_char_type('$')); oa->write_character(first_prefix); } } if (use_count) { - oa->write_character(static_cast('#')); + oa->write_character(to_char_type('#')); write_number_with_ubjson_prefix(j.m_value.array->size(), true); } @@ -616,7 +641,7 @@ class binary_writer if (not use_count) { - oa->write_character(static_cast(']')); + oa->write_character(to_char_type(']')); } break; @@ -626,7 +651,7 @@ class binary_writer { if (add_prefix) { - oa->write_character(static_cast('{')); + oa->write_character(to_char_type('{')); } bool prefix_required = true; @@ -643,14 +668,14 @@ class binary_writer if (same_prefix) { prefix_required = false; - oa->write_character(static_cast('$')); + oa->write_character(to_char_type('$')); oa->write_character(first_prefix); } } if (use_count) { - oa->write_character(static_cast('#')); + oa->write_character(to_char_type('#')); write_number_with_ubjson_prefix(j.m_value.object->size(), true); } @@ -665,7 +690,7 @@ class binary_writer if (not use_count) { - oa->write_character(static_cast('}')); + oa->write_character(to_char_type('}')); } break; @@ -677,33 +702,349 @@ class binary_writer } private: - /* - @brief write a number to output input + ////////// + // BSON // + ////////// - @param[in] n number of type @a NumberType - @tparam NumberType the type of the number + /*! + @return The size of a BSON document entry header, including the id marker + and the entry name size (and its null-terminator). + */ + static std::size_t calc_bson_entry_header_size(const string_t& name) + { + const auto it = name.find(static_cast(0)); + if (JSON_UNLIKELY(it != BasicJsonType::string_t::npos)) + { + JSON_THROW(out_of_range::create(409, + "BSON key cannot contain code point U+0000 (at byte " + std::to_string(it) + ")")); + } - @note This function needs to respect the system's endianess, because bytes - in CBOR, MessagePack, and UBJSON are stored in network order (big - endian) and therefore need reordering on little endian systems. + return /*id*/ 1ul + name.size() + /*zero-terminator*/1u; + } + + /*! + @brief Writes the given @a element_type and @a name to the output adapter */ - template - void write_number(const NumberType n) + void write_bson_entry_header(const string_t& name, + const std::uint8_t element_type) { - // step 1: write number to array of length NumberType - std::array vec; - std::memcpy(vec.data(), &n, sizeof(NumberType)); + oa->write_character(to_char_type(element_type)); // boolean + oa->write_characters( + reinterpret_cast(name.c_str()), + name.size() + 1u); + } - // step 2: write array to output (with possible reordering) - if (is_little_endian) + /*! + @brief Writes a BSON element with key @a name and boolean value @a value + */ + void write_bson_boolean(const string_t& name, + const bool value) + { + write_bson_entry_header(name, 0x08); + oa->write_character(value ? to_char_type(0x01) : to_char_type(0x00)); + } + + /*! + @brief Writes a BSON element with key @a name and double value @a value + */ + void write_bson_double(const string_t& name, + const double value) + { + write_bson_entry_header(name, 0x01); + write_number(value); + } + + /*! + @return The size of the BSON-encoded string in @a value + */ + static std::size_t calc_bson_string_size(const string_t& value) + { + return sizeof(std::int32_t) + value.size() + 1ul; + } + + /*! + @brief Writes a BSON element with key @a name and string value @a value + */ + void write_bson_string(const string_t& name, + const string_t& value) + { + write_bson_entry_header(name, 0x02); + + write_number(static_cast(value.size() + 1ul)); + oa->write_characters( + reinterpret_cast(value.c_str()), + value.size() + 1); + } + + /*! + @brief Writes a BSON element with key @a name and null value + */ + void write_bson_null(const string_t& name) + { + write_bson_entry_header(name, 0x0A); + } + + /*! + @return The size of the BSON-encoded integer @a value + */ + static std::size_t calc_bson_integer_size(const std::int64_t value) + { + if ((std::numeric_limits::min)() <= value and value <= (std::numeric_limits::max)()) { - // reverse byte order prior to conversion if necessary - std::reverse(vec.begin(), vec.end()); + return sizeof(std::int32_t); } + else + { + return sizeof(std::int64_t); + } + } - oa->write_characters(vec.data(), sizeof(NumberType)); + /*! + @brief Writes a BSON element with key @a name and integer @a value + */ + void write_bson_integer(const string_t& name, + const std::int64_t value) + { + if ((std::numeric_limits::min)() <= value and value <= (std::numeric_limits::max)()) + { + write_bson_entry_header(name, 0x10); // int32 + write_number(static_cast(value)); + } + else + { + write_bson_entry_header(name, 0x12); // int64 + write_number(static_cast(value)); + } + } + + /*! + @return The size of the BSON-encoded unsigned integer in @a j + */ + static constexpr std::size_t calc_bson_unsigned_size(const std::uint64_t value) noexcept + { + return (value <= static_cast((std::numeric_limits::max)())) + ? sizeof(std::int32_t) + : sizeof(std::int64_t); + } + + /*! + @brief Writes a BSON element with key @a name and unsigned @a value + */ + void write_bson_unsigned(const string_t& name, + const std::uint64_t value) + { + if (value <= static_cast((std::numeric_limits::max)())) + { + write_bson_entry_header(name, 0x10 /* int32 */); + write_number(static_cast(value)); + } + else if (value <= static_cast((std::numeric_limits::max)())) + { + write_bson_entry_header(name, 0x12 /* int64 */); + write_number(static_cast(value)); + } + else + { + JSON_THROW(out_of_range::create(407, "integer number " + std::to_string(value) + " cannot be represented by BSON as it does not fit int64")); + } + } + + /*! + @brief Writes a BSON element with key @a name and object @a value + */ + void write_bson_object_entry(const string_t& name, + const typename BasicJsonType::object_t& value) + { + write_bson_entry_header(name, 0x03); // object + write_bson_object(value); + } + + /*! + @return The size of the BSON-encoded array @a value + */ + static std::size_t calc_bson_array_size(const typename BasicJsonType::array_t& value) + { + std::size_t embedded_document_size = 0ul; + std::size_t array_index = 0ul; + + for (const auto& el : value) + { + embedded_document_size += calc_bson_element_size(std::to_string(array_index++), el); + } + + return sizeof(std::int32_t) + embedded_document_size + 1ul; + } + + /*! + @brief Writes a BSON element with key @a name and array @a value + */ + void write_bson_array(const string_t& name, + const typename BasicJsonType::array_t& value) + { + write_bson_entry_header(name, 0x04); // array + write_number(static_cast(calc_bson_array_size(value))); + + std::size_t array_index = 0ul; + + for (const auto& el : value) + { + write_bson_element(std::to_string(array_index++), el); + } + + oa->write_character(to_char_type(0x00)); + } + + /*! + @brief Calculates the size necessary to serialize the JSON value @a j with its @a name + @return The calculated size for the BSON document entry for @a j with the given @a name. + */ + static std::size_t calc_bson_element_size(const string_t& name, + const BasicJsonType& j) + { + const auto header_size = calc_bson_entry_header_size(name); + switch (j.type()) + { + case value_t::object: + return header_size + calc_bson_object_size(*j.m_value.object); + + case value_t::array: + return header_size + calc_bson_array_size(*j.m_value.array); + + case value_t::boolean: + return header_size + 1ul; + + case value_t::number_float: + return header_size + 8ul; + + case value_t::number_integer: + return header_size + calc_bson_integer_size(j.m_value.number_integer); + + case value_t::number_unsigned: + return header_size + calc_bson_unsigned_size(j.m_value.number_unsigned); + + case value_t::string: + return header_size + calc_bson_string_size(*j.m_value.string); + + case value_t::null: + return header_size + 0ul; + + // LCOV_EXCL_START + default: + assert(false); + return 0ul; + // LCOV_EXCL_STOP + }; + } + + /*! + @brief Serializes the JSON value @a j to BSON and associates it with the + key @a name. + @param name The name to associate with the JSON entity @a j within the + current BSON document + @return The size of the BSON entry + */ + void write_bson_element(const string_t& name, + const BasicJsonType& j) + { + switch (j.type()) + { + case value_t::object: + return write_bson_object_entry(name, *j.m_value.object); + + case value_t::array: + return write_bson_array(name, *j.m_value.array); + + case value_t::boolean: + return write_bson_boolean(name, j.m_value.boolean); + + case value_t::number_float: + return write_bson_double(name, j.m_value.number_float); + + case value_t::number_integer: + return write_bson_integer(name, j.m_value.number_integer); + + case value_t::number_unsigned: + return write_bson_unsigned(name, j.m_value.number_unsigned); + + case value_t::string: + return write_bson_string(name, *j.m_value.string); + + case value_t::null: + return write_bson_null(name); + + // LCOV_EXCL_START + default: + assert(false); + return; + // LCOV_EXCL_STOP + }; } + /*! + @brief Calculates the size of the BSON serialization of the given + JSON-object @a j. + @param[in] j JSON value to serialize + @pre j.type() == value_t::object + */ + static std::size_t calc_bson_object_size(const typename BasicJsonType::object_t& value) + { + std::size_t document_size = std::accumulate(value.begin(), value.end(), 0ul, + [](size_t result, const typename BasicJsonType::object_t::value_type & el) + { + return result += calc_bson_element_size(el.first, el.second); + }); + + return sizeof(std::int32_t) + document_size + 1ul; + } + + /*! + @param[in] j JSON value to serialize + @pre j.type() == value_t::object + */ + void write_bson_object(const typename BasicJsonType::object_t& value) + { + write_number(static_cast(calc_bson_object_size(value))); + + for (const auto& el : value) + { + write_bson_element(el.first, el.second); + } + + oa->write_character(to_char_type(0x00)); + } + + ////////// + // CBOR // + ////////// + + static constexpr CharType get_cbor_float_prefix(float /*unused*/) + { + return to_char_type(0xFA); // Single-Precision Float + } + + static constexpr CharType get_cbor_float_prefix(double /*unused*/) + { + return to_char_type(0xFB); // Double-Precision Float + } + + ///////////// + // MsgPack // + ///////////// + + static constexpr CharType get_msgpack_float_prefix(float /*unused*/) + { + return to_char_type(0xCA); // float 32 + } + + static constexpr CharType get_msgpack_float_prefix(double /*unused*/) + { + return to_char_type(0xCB); // float 64 + } + + //////////// + // UBJSON // + //////////// + // UBJSON: write number (floating point) template::value, int>::type = 0> @@ -727,7 +1068,7 @@ class binary_writer { if (add_prefix) { - oa->write_character(static_cast('i')); // int8 + oa->write_character(to_char_type('i')); // int8 } write_number(static_cast(n)); } @@ -735,7 +1076,7 @@ class binary_writer { if (add_prefix) { - oa->write_character(static_cast('U')); // uint8 + oa->write_character(to_char_type('U')); // uint8 } write_number(static_cast(n)); } @@ -743,7 +1084,7 @@ class binary_writer { if (add_prefix) { - oa->write_character(static_cast('I')); // int16 + oa->write_character(to_char_type('I')); // int16 } write_number(static_cast(n)); } @@ -751,7 +1092,7 @@ class binary_writer { if (add_prefix) { - oa->write_character(static_cast('l')); // int32 + oa->write_character(to_char_type('l')); // int32 } write_number(static_cast(n)); } @@ -759,13 +1100,13 @@ class binary_writer { if (add_prefix) { - oa->write_character(static_cast('L')); // int64 + oa->write_character(to_char_type('L')); // int64 } write_number(static_cast(n)); } else { - JSON_THROW(out_of_range::create(407, "number overflow serializing " + std::to_string(n))); + JSON_THROW(out_of_range::create(407, "integer number " + std::to_string(n) + " cannot be represented by UBJSON as it does not fit int64")); } } @@ -780,7 +1121,7 @@ class binary_writer { if (add_prefix) { - oa->write_character(static_cast('i')); // int8 + oa->write_character(to_char_type('i')); // int8 } write_number(static_cast(n)); } @@ -788,7 +1129,7 @@ class binary_writer { if (add_prefix) { - oa->write_character(static_cast('U')); // uint8 + oa->write_character(to_char_type('U')); // uint8 } write_number(static_cast(n)); } @@ -796,7 +1137,7 @@ class binary_writer { if (add_prefix) { - oa->write_character(static_cast('I')); // int16 + oa->write_character(to_char_type('I')); // int16 } write_number(static_cast(n)); } @@ -804,7 +1145,7 @@ class binary_writer { if (add_prefix) { - oa->write_character(static_cast('l')); // int32 + oa->write_character(to_char_type('l')); // int32 } write_number(static_cast(n)); } @@ -812,14 +1153,14 @@ class binary_writer { if (add_prefix) { - oa->write_character(static_cast('L')); // int64 + oa->write_character(to_char_type('L')); // int64 } write_number(static_cast(n)); } // LCOV_EXCL_START else { - JSON_THROW(out_of_range::create(407, "number overflow serializing " + std::to_string(n))); + JSON_THROW(out_of_range::create(407, "integer number " + std::to_string(n) + " cannot be represented by UBJSON as it does not fit int64")); } // LCOV_EXCL_STOP } @@ -849,22 +1190,20 @@ class binary_writer { return 'i'; } - else if ((std::numeric_limits::min)() <= j.m_value.number_integer and j.m_value.number_integer <= (std::numeric_limits::max)()) + if ((std::numeric_limits::min)() <= j.m_value.number_integer and j.m_value.number_integer <= (std::numeric_limits::max)()) { return 'U'; } - else if ((std::numeric_limits::min)() <= j.m_value.number_integer and j.m_value.number_integer <= (std::numeric_limits::max)()) + if ((std::numeric_limits::min)() <= j.m_value.number_integer and j.m_value.number_integer <= (std::numeric_limits::max)()) { return 'I'; } - else if ((std::numeric_limits::min)() <= j.m_value.number_integer and j.m_value.number_integer <= (std::numeric_limits::max)()) + if ((std::numeric_limits::min)() <= j.m_value.number_integer and j.m_value.number_integer <= (std::numeric_limits::max)()) { return 'l'; } - else // no check and assume int64_t (see note above) - { - return 'L'; - } + // no check and assume int64_t (see note above) + return 'L'; } case value_t::number_unsigned: @@ -873,22 +1212,20 @@ class binary_writer { return 'i'; } - else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) + if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) { return 'U'; } - else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) + if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) { return 'I'; } - else if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) + if (j.m_value.number_unsigned <= (std::numeric_limits::max)()) { return 'l'; } - else // no check and assume int64_t (see note above) - { - return 'L'; - } + // no check and assume int64_t (see note above) + return 'L'; } case value_t::number_float: @@ -908,34 +1245,87 @@ class binary_writer } } - static constexpr CharType get_cbor_float_prefix(float) + static constexpr CharType get_ubjson_float_prefix(float /*unused*/) { - return static_cast(0xFA); // Single-Precision Float + return 'd'; // float 32 + } + + static constexpr CharType get_ubjson_float_prefix(double /*unused*/) + { + return 'D'; // float 64 } - static constexpr CharType get_cbor_float_prefix(double) + /////////////////////// + // Utility functions // + /////////////////////// + + /* + @brief write a number to output input + @param[in] n number of type @a NumberType + @tparam NumberType the type of the number + @tparam OutputIsLittleEndian Set to true if output data is + required to be little endian + + @note This function needs to respect the system's endianess, because bytes + in CBOR, MessagePack, and UBJSON are stored in network order (big + endian) and therefore need reordering on little endian systems. + */ + template + void write_number(const NumberType n) { - return static_cast(0xFB); // Double-Precision Float + // step 1: write number to array of length NumberType + std::array vec; + std::memcpy(vec.data(), &n, sizeof(NumberType)); + + // step 2: write array to output (with possible reordering) + if (is_little_endian and not OutputIsLittleEndian) + { + // reverse byte order prior to conversion if necessary + std::reverse(vec.begin(), vec.end()); + } + + oa->write_characters(vec.data(), sizeof(NumberType)); } - static constexpr CharType get_msgpack_float_prefix(float) + public: + // The following to_char_type functions are implement the conversion + // between uint8_t and CharType. In case CharType is not unsigned, + // such a conversion is required to allow values greater than 128. + // See for a discussion. + template < typename C = CharType, + enable_if_t < std::is_signed::value and std::is_signed::value > * = nullptr > + static constexpr CharType to_char_type(std::uint8_t x) noexcept { - return static_cast(0xCA); // float 32 + return *reinterpret_cast(&x); } - static constexpr CharType get_msgpack_float_prefix(double) + template < typename C = CharType, + enable_if_t < std::is_signed::value and std::is_unsigned::value > * = nullptr > + static CharType to_char_type(std::uint8_t x) noexcept { - return static_cast(0xCB); // float 64 + static_assert(sizeof(std::uint8_t) == sizeof(CharType), "size of CharType must be equal to std::uint8_t"); + static_assert(std::is_pod::value, "CharType must be POD"); + CharType result; + std::memcpy(&result, &x, sizeof(x)); + return result; } - static constexpr CharType get_ubjson_float_prefix(float) + template::value>* = nullptr> + static constexpr CharType to_char_type(std::uint8_t x) noexcept { - return 'd'; // float 32 + return x; } - static constexpr CharType get_ubjson_float_prefix(double) + template < typename InputCharType, typename C = CharType, + enable_if_t < + std::is_signed::value and + std::is_signed::value and + std::is_same::type>::value + > * = nullptr > + static constexpr CharType to_char_type(InputCharType x) noexcept { - return 'D'; // float 64 + return x; } private: @@ -945,5 +1335,5 @@ class binary_writer /// the output output_adapter_t oa = nullptr; }; -} -} +} // namespace detail +} // namespace nlohmann diff --git a/include/lib/modernjson/detail/output/output_adapters.hpp b/include/lib/modernjson/detail/output/output_adapters.hpp index ff86a6e..699757d 100644 --- a/include/lib/modernjson/detail/output/output_adapters.hpp +++ b/include/lib/modernjson/detail/output/output_adapters.hpp @@ -30,7 +30,9 @@ template class output_vector_adapter : public output_adapter_protocol { public: - explicit output_vector_adapter(std::vector& vec) : v(vec) {} + explicit output_vector_adapter(std::vector& vec) noexcept + : v(vec) + {} void write_character(CharType c) override { @@ -51,7 +53,9 @@ template class output_stream_adapter : public output_adapter_protocol { public: - explicit output_stream_adapter(std::basic_ostream& s) : stream(s) {} + explicit output_stream_adapter(std::basic_ostream& s) noexcept + : stream(s) + {} void write_character(CharType c) override { @@ -72,7 +76,9 @@ template> class output_string_adapter : public output_adapter_protocol { public: - explicit output_string_adapter(StringType& s) : str(s) {} + explicit output_string_adapter(StringType& s) noexcept + : str(s) + {} void write_character(CharType c) override { @@ -109,5 +115,5 @@ class output_adapter private: output_adapter_t oa = nullptr; }; -} -} +} // namespace detail +} // namespace nlohmann diff --git a/include/lib/modernjson/detail/output/serializer.hpp b/include/lib/modernjson/detail/output/serializer.hpp index 7c6abb8..a34a452 100644 --- a/include/lib/modernjson/detail/output/serializer.hpp +++ b/include/lib/modernjson/detail/output/serializer.hpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -28,6 +29,14 @@ namespace detail // serialization // /////////////////// +/// how to treat decoding errors +enum class error_handler_t +{ + strict, ///< throw a type_error exception in case of invalid UTF-8 + replace, ///< replace invalid UTF-8 sequences with U+FFFD + ignore ///< ignore invalid UTF-8 sequences +}; + template class serializer { @@ -42,17 +51,25 @@ class serializer /*! @param[in] s output stream to serialize to @param[in] ichar indentation character to use + @param[in] error_handler_ how to react on decoding errors */ - serializer(output_adapter_t s, const char ichar) - : o(std::move(s)), loc(std::localeconv()), - thousands_sep(loc->thousands_sep == nullptr ? '\0' : * (loc->thousands_sep)), - decimal_point(loc->decimal_point == nullptr ? '\0' : * (loc->decimal_point)), - indent_char(ichar), indent_string(512, indent_char) + serializer(output_adapter_t s, const char ichar, + error_handler_t error_handler_ = error_handler_t::strict) + : o(std::move(s)) + , loc(std::localeconv()) + , thousands_sep(loc->thousands_sep == nullptr ? '\0' : * (loc->thousands_sep)) + , decimal_point(loc->decimal_point == nullptr ? '\0' : * (loc->decimal_point)) + , indent_char(ichar) + , indent_string(512, indent_char) + , error_handler(error_handler_) {} // delete because of pointer members serializer(const serializer&) = delete; serializer& operator=(const serializer&) = delete; + serializer(serializer&&) = delete; + serializer& operator=(serializer&&) = delete; + ~serializer() = default; /*! @brief internal implementation of the serialization function @@ -284,6 +301,10 @@ class serializer uint8_t state = UTF8_ACCEPT; std::size_t bytes = 0; // number of bytes written to string_buffer + // number of bytes written at the point of the last valid byte + std::size_t bytes_after_last_accept = 0; + std::size_t undumped_chars = 0; + for (std::size_t i = 0; i < s.size(); ++i) { const auto byte = static_cast(s[i]); @@ -351,15 +372,15 @@ class serializer { if (codepoint <= 0xFFFF) { - std::snprintf(string_buffer.data() + bytes, 7, "\\u%04x", - static_cast(codepoint)); + (std::snprintf)(string_buffer.data() + bytes, 7, "\\u%04x", + static_cast(codepoint)); bytes += 6; } else { - std::snprintf(string_buffer.data() + bytes, 13, "\\u%04x\\u%04x", - static_cast(0xD7C0 + (codepoint >> 10)), - static_cast(0xDC00 + (codepoint & 0x3FF))); + (std::snprintf)(string_buffer.data() + bytes, 13, "\\u%04x\\u%04x", + static_cast(0xD7C0 + (codepoint >> 10)), + static_cast(0xDC00 + (codepoint & 0x3FF))); bytes += 12; } } @@ -381,14 +402,69 @@ class serializer o->write_characters(string_buffer.data(), bytes); bytes = 0; } + + // remember the byte position of this accept + bytes_after_last_accept = bytes; + undumped_chars = 0; break; } case UTF8_REJECT: // decode found invalid UTF-8 byte { - std::string sn(3, '\0'); - snprintf(&sn[0], sn.size(), "%.2X", byte); - JSON_THROW(type_error::create(316, "invalid UTF-8 byte at index " + std::to_string(i) + ": 0x" + sn)); + switch (error_handler) + { + case error_handler_t::strict: + { + std::string sn(3, '\0'); + (std::snprintf)(&sn[0], sn.size(), "%.2X", byte); + JSON_THROW(type_error::create(316, "invalid UTF-8 byte at index " + std::to_string(i) + ": 0x" + sn)); + } + + case error_handler_t::ignore: + case error_handler_t::replace: + { + // in case we saw this character the first time, we + // would like to read it again, because the byte + // may be OK for itself, but just not OK for the + // previous sequence + if (undumped_chars > 0) + { + --i; + } + + // reset length buffer to the last accepted index; + // thus removing/ignoring the invalid characters + bytes = bytes_after_last_accept; + + if (error_handler == error_handler_t::replace) + { + // add a replacement character + if (ensure_ascii) + { + string_buffer[bytes++] = '\\'; + string_buffer[bytes++] = 'u'; + string_buffer[bytes++] = 'f'; + string_buffer[bytes++] = 'f'; + string_buffer[bytes++] = 'f'; + string_buffer[bytes++] = 'd'; + } + else + { + string_buffer[bytes++] = detail::binary_writer::to_char_type('\xEF'); + string_buffer[bytes++] = detail::binary_writer::to_char_type('\xBF'); + string_buffer[bytes++] = detail::binary_writer::to_char_type('\xBD'); + } + bytes_after_last_accept = bytes; + } + + undumped_chars = 0; + + // continue processing the string + state = UTF8_ACCEPT; + break; + } + } + break; } default: // decode found yet incomplete multi-byte code point @@ -398,11 +474,13 @@ class serializer // code point will not be escaped - copy byte to buffer string_buffer[bytes++] = s[i]; } + ++undumped_chars; break; } } } + // we finished processing the string if (JSON_LIKELY(state == UTF8_ACCEPT)) { // write buffer @@ -414,9 +492,38 @@ class serializer else { // we finish reading, but do not accept: string was incomplete - std::string sn(3, '\0'); - snprintf(&sn[0], sn.size(), "%.2X", static_cast(s.back())); - JSON_THROW(type_error::create(316, "incomplete UTF-8 string; last byte: 0x" + sn)); + switch (error_handler) + { + case error_handler_t::strict: + { + std::string sn(3, '\0'); + (std::snprintf)(&sn[0], sn.size(), "%.2X", static_cast(s.back())); + JSON_THROW(type_error::create(316, "incomplete UTF-8 string; last byte: 0x" + sn)); + } + + case error_handler_t::ignore: + { + // write all accepted bytes + o->write_characters(string_buffer.data(), bytes_after_last_accept); + break; + } + + case error_handler_t::replace: + { + // write all accepted bytes + o->write_characters(string_buffer.data(), bytes_after_last_accept); + // add a replacement character + if (ensure_ascii) + { + o->write_characters("\\ufffd", 6); + } + else + { + o->write_characters("\xEF\xBF\xBD", 3); + } + break; + } + } } } @@ -442,7 +549,7 @@ class serializer return; } - const bool is_negative = (x <= 0) and (x != 0); // see issue #755 + const bool is_negative = std::is_same::value and not (x >= 0); // see issue #755 std::size_t i = 0; while (x != 0) @@ -509,7 +616,7 @@ class serializer static constexpr auto d = std::numeric_limits::max_digits10; // the actual conversion - std::ptrdiff_t len = snprintf(number_buffer.data(), number_buffer.size(), "%.*g", d, x); + std::ptrdiff_t len = (std::snprintf)(number_buffer.data(), number_buffer.size(), "%.*g", d, x); // negative value indicates an error assert(len > 0); @@ -626,6 +733,9 @@ class serializer const char indent_char; /// the indentation string string_t indent_string; + + /// error_handler how to react on decoding errors + const error_handler_t error_handler; }; -} -} +} // namespace detail +} // namespace nlohmann -- cgit v1.2.3