summaryrefslogtreecommitdiff
path: root/include/lib/modernjson/detail/output/serializer.hpp
diff options
context:
space:
mode:
authorDaniel Friesel <derf@finalrewind.org>2018-11-26 09:06:31 +0100
committerDaniel Friesel <derf@finalrewind.org>2018-11-26 09:06:31 +0100
commit1542f34f0e0fc53324f6fdc5905f4b77b252a789 (patch)
treed2126bc53f8759c36809ff25b9ae3a19fd7aa362 /include/lib/modernjson/detail/output/serializer.hpp
parente7711c06640f098323cab80934c198090e9120a3 (diff)
update nlohmann modernjson to v3.4 (with bson support)
Diffstat (limited to 'include/lib/modernjson/detail/output/serializer.hpp')
-rw-r--r--include/lib/modernjson/detail/output/serializer.hpp150
1 files changed, 130 insertions, 20 deletions
diff --git a/include/lib/modernjson/detail/output/serializer.hpp b/include/lib/modernjson/detail/output/serializer.hpp
index 7c6abb8..a34a452 100644
--- a/include/lib/modernjson/detail/output/serializer.hpp
+++ b/include/lib/modernjson/detail/output/serializer.hpp
@@ -17,6 +17,7 @@
#include <lib/modernjson/detail/conversions/to_chars.hpp>
#include <lib/modernjson/detail/macro_scope.hpp>
#include <lib/modernjson/detail/meta/cpp_future.hpp>
+#include <lib/modernjson/detail/output/binary_writer.hpp>
#include <lib/modernjson/detail/output/output_adapters.hpp>
#include <lib/modernjson/detail/value_t.hpp>
@@ -28,6 +29,14 @@ namespace detail
// serialization //
///////////////////
+/// how to treat decoding errors
+enum class error_handler_t
+{
+ strict, ///< throw a type_error exception in case of invalid UTF-8
+ replace, ///< replace invalid UTF-8 sequences with U+FFFD
+ ignore ///< ignore invalid UTF-8 sequences
+};
+
template<typename BasicJsonType>
class serializer
{
@@ -42,17 +51,25 @@ class serializer
/*!
@param[in] s output stream to serialize to
@param[in] ichar indentation character to use
+ @param[in] error_handler_ how to react on decoding errors
*/
- serializer(output_adapter_t<char> s, const char ichar)
- : o(std::move(s)), loc(std::localeconv()),
- thousands_sep(loc->thousands_sep == nullptr ? '\0' : * (loc->thousands_sep)),
- decimal_point(loc->decimal_point == nullptr ? '\0' : * (loc->decimal_point)),
- indent_char(ichar), indent_string(512, indent_char)
+ serializer(output_adapter_t<char> s, const char ichar,
+ error_handler_t error_handler_ = error_handler_t::strict)
+ : o(std::move(s))
+ , loc(std::localeconv())
+ , thousands_sep(loc->thousands_sep == nullptr ? '\0' : * (loc->thousands_sep))
+ , decimal_point(loc->decimal_point == nullptr ? '\0' : * (loc->decimal_point))
+ , indent_char(ichar)
+ , indent_string(512, indent_char)
+ , error_handler(error_handler_)
{}
// delete because of pointer members
serializer(const serializer&) = delete;
serializer& operator=(const serializer&) = delete;
+ serializer(serializer&&) = delete;
+ serializer& operator=(serializer&&) = delete;
+ ~serializer() = default;
/*!
@brief internal implementation of the serialization function
@@ -284,6 +301,10 @@ class serializer
uint8_t state = UTF8_ACCEPT;
std::size_t bytes = 0; // number of bytes written to string_buffer
+ // number of bytes written at the point of the last valid byte
+ std::size_t bytes_after_last_accept = 0;
+ std::size_t undumped_chars = 0;
+
for (std::size_t i = 0; i < s.size(); ++i)
{
const auto byte = static_cast<uint8_t>(s[i]);
@@ -351,15 +372,15 @@ class serializer
{
if (codepoint <= 0xFFFF)
{
- std::snprintf(string_buffer.data() + bytes, 7, "\\u%04x",
- static_cast<uint16_t>(codepoint));
+ (std::snprintf)(string_buffer.data() + bytes, 7, "\\u%04x",
+ static_cast<uint16_t>(codepoint));
bytes += 6;
}
else
{
- std::snprintf(string_buffer.data() + bytes, 13, "\\u%04x\\u%04x",
- static_cast<uint16_t>(0xD7C0 + (codepoint >> 10)),
- static_cast<uint16_t>(0xDC00 + (codepoint & 0x3FF)));
+ (std::snprintf)(string_buffer.data() + bytes, 13, "\\u%04x\\u%04x",
+ static_cast<uint16_t>(0xD7C0 + (codepoint >> 10)),
+ static_cast<uint16_t>(0xDC00 + (codepoint & 0x3FF)));
bytes += 12;
}
}
@@ -381,14 +402,69 @@ class serializer
o->write_characters(string_buffer.data(), bytes);
bytes = 0;
}
+
+ // remember the byte position of this accept
+ bytes_after_last_accept = bytes;
+ undumped_chars = 0;
break;
}
case UTF8_REJECT: // decode found invalid UTF-8 byte
{
- std::string sn(3, '\0');
- snprintf(&sn[0], sn.size(), "%.2X", byte);
- JSON_THROW(type_error::create(316, "invalid UTF-8 byte at index " + std::to_string(i) + ": 0x" + sn));
+ switch (error_handler)
+ {
+ case error_handler_t::strict:
+ {
+ std::string sn(3, '\0');
+ (std::snprintf)(&sn[0], sn.size(), "%.2X", byte);
+ JSON_THROW(type_error::create(316, "invalid UTF-8 byte at index " + std::to_string(i) + ": 0x" + sn));
+ }
+
+ case error_handler_t::ignore:
+ case error_handler_t::replace:
+ {
+ // in case we saw this character the first time, we
+ // would like to read it again, because the byte
+ // may be OK for itself, but just not OK for the
+ // previous sequence
+ if (undumped_chars > 0)
+ {
+ --i;
+ }
+
+ // reset length buffer to the last accepted index;
+ // thus removing/ignoring the invalid characters
+ bytes = bytes_after_last_accept;
+
+ if (error_handler == error_handler_t::replace)
+ {
+ // add a replacement character
+ if (ensure_ascii)
+ {
+ string_buffer[bytes++] = '\\';
+ string_buffer[bytes++] = 'u';
+ string_buffer[bytes++] = 'f';
+ string_buffer[bytes++] = 'f';
+ string_buffer[bytes++] = 'f';
+ string_buffer[bytes++] = 'd';
+ }
+ else
+ {
+ string_buffer[bytes++] = detail::binary_writer<BasicJsonType, char>::to_char_type('\xEF');
+ string_buffer[bytes++] = detail::binary_writer<BasicJsonType, char>::to_char_type('\xBF');
+ string_buffer[bytes++] = detail::binary_writer<BasicJsonType, char>::to_char_type('\xBD');
+ }
+ bytes_after_last_accept = bytes;
+ }
+
+ undumped_chars = 0;
+
+ // continue processing the string
+ state = UTF8_ACCEPT;
+ break;
+ }
+ }
+ break;
}
default: // decode found yet incomplete multi-byte code point
@@ -398,11 +474,13 @@ class serializer
// code point will not be escaped - copy byte to buffer
string_buffer[bytes++] = s[i];
}
+ ++undumped_chars;
break;
}
}
}
+ // we finished processing the string
if (JSON_LIKELY(state == UTF8_ACCEPT))
{
// write buffer
@@ -414,9 +492,38 @@ class serializer
else
{
// we finish reading, but do not accept: string was incomplete
- std::string sn(3, '\0');
- snprintf(&sn[0], sn.size(), "%.2X", static_cast<uint8_t>(s.back()));
- JSON_THROW(type_error::create(316, "incomplete UTF-8 string; last byte: 0x" + sn));
+ switch (error_handler)
+ {
+ case error_handler_t::strict:
+ {
+ std::string sn(3, '\0');
+ (std::snprintf)(&sn[0], sn.size(), "%.2X", static_cast<uint8_t>(s.back()));
+ JSON_THROW(type_error::create(316, "incomplete UTF-8 string; last byte: 0x" + sn));
+ }
+
+ case error_handler_t::ignore:
+ {
+ // write all accepted bytes
+ o->write_characters(string_buffer.data(), bytes_after_last_accept);
+ break;
+ }
+
+ case error_handler_t::replace:
+ {
+ // write all accepted bytes
+ o->write_characters(string_buffer.data(), bytes_after_last_accept);
+ // add a replacement character
+ if (ensure_ascii)
+ {
+ o->write_characters("\\ufffd", 6);
+ }
+ else
+ {
+ o->write_characters("\xEF\xBF\xBD", 3);
+ }
+ break;
+ }
+ }
}
}
@@ -442,7 +549,7 @@ class serializer
return;
}
- const bool is_negative = (x <= 0) and (x != 0); // see issue #755
+ const bool is_negative = std::is_same<NumberType, number_integer_t>::value and not (x >= 0); // see issue #755
std::size_t i = 0;
while (x != 0)
@@ -509,7 +616,7 @@ class serializer
static constexpr auto d = std::numeric_limits<number_float_t>::max_digits10;
// the actual conversion
- std::ptrdiff_t len = snprintf(number_buffer.data(), number_buffer.size(), "%.*g", d, x);
+ std::ptrdiff_t len = (std::snprintf)(number_buffer.data(), number_buffer.size(), "%.*g", d, x);
// negative value indicates an error
assert(len > 0);
@@ -626,6 +733,9 @@ class serializer
const char indent_char;
/// the indentation string
string_t indent_string;
+
+ /// error_handler how to react on decoding errors
+ const error_handler_t error_handler;
};
-}
-}
+} // namespace detail
+} // namespace nlohmann