diff options
Diffstat (limited to 'include/lib/modernjson/detail/input/input_adapters.hpp')
-rw-r--r-- | include/lib/modernjson/detail/input/input_adapters.hpp | 377 |
1 files changed, 377 insertions, 0 deletions
diff --git a/include/lib/modernjson/detail/input/input_adapters.hpp b/include/lib/modernjson/detail/input/input_adapters.hpp new file mode 100644 index 0000000..5abaee3 --- /dev/null +++ b/include/lib/modernjson/detail/input/input_adapters.hpp @@ -0,0 +1,377 @@ +#pragma once + +#include <cassert> // assert +#include <cstddef> // size_t +#include <cstring> // strlen +#include <istream> // istream +#include <iterator> // begin, end, iterator_traits, random_access_iterator_tag, distance, next +#include <memory> // shared_ptr, make_shared, addressof +#include <numeric> // accumulate +#include <string> // string, char_traits +#include <type_traits> // enable_if, is_base_of, is_pointer, is_integral, remove_pointer +#include <utility> // pair, declval + +#include <lib/modernjson/detail/macro_scope.hpp> + +namespace nlohmann +{ +namespace detail +{ +/// the supported input formats +enum class input_format_t { json, cbor, msgpack, ubjson }; + +//////////////////// +// input adapters // +//////////////////// + +/*! +@brief abstract input adapter interface + +Produces a stream of std::char_traits<char>::int_type characters from a +std::istream, a buffer, or some other input type. Accepts the return of +exactly one non-EOF character for future input. The int_type characters +returned consist of all valid char values as positive values (typically +unsigned char), plus an EOF value outside that range, specified by the value +of the function std::char_traits<char>::eof(). This value is typically -1, but +could be any arbitrary value which is not a valid char value. +*/ +struct input_adapter_protocol +{ + /// get a character [0,255] or std::char_traits<char>::eof(). + virtual std::char_traits<char>::int_type get_character() = 0; + virtual ~input_adapter_protocol() = default; +}; + +/// a type to simplify interfaces +using input_adapter_t = std::shared_ptr<input_adapter_protocol>; + +/*! +Input adapter for a (caching) istream. Ignores a UFT Byte Order Mark at +beginning of input. Does not support changing the underlying std::streambuf +in mid-input. Maintains underlying std::istream and std::streambuf to support +subsequent use of standard std::istream operations to process any input +characters following those used in parsing the JSON input. Clears the +std::istream flags; any input errors (e.g., EOF) will be detected by the first +subsequent call for input from the std::istream. +*/ +class input_stream_adapter : public input_adapter_protocol +{ + public: + ~input_stream_adapter() override + { + // clear stream flags; we use underlying streambuf I/O, do not + // maintain ifstream flags + is.clear(); + } + + explicit input_stream_adapter(std::istream& i) + : is(i), sb(*i.rdbuf()) + {} + + // delete because of pointer members + input_stream_adapter(const input_stream_adapter&) = delete; + input_stream_adapter& operator=(input_stream_adapter&) = delete; + + // std::istream/std::streambuf use std::char_traits<char>::to_int_type, to + // ensure that std::char_traits<char>::eof() and the character 0xFF do not + // end up as the same value, eg. 0xFFFFFFFF. + std::char_traits<char>::int_type get_character() override + { + return sb.sbumpc(); + } + + private: + /// the associated input stream + std::istream& is; + std::streambuf& sb; +}; + +/// input adapter for buffer input +class input_buffer_adapter : public input_adapter_protocol +{ + public: + input_buffer_adapter(const char* b, const std::size_t l) + : cursor(b), limit(b + l) + {} + + // delete because of pointer members + input_buffer_adapter(const input_buffer_adapter&) = delete; + input_buffer_adapter& operator=(input_buffer_adapter&) = delete; + + std::char_traits<char>::int_type get_character() noexcept override + { + if (JSON_LIKELY(cursor < limit)) + { + return std::char_traits<char>::to_int_type(*(cursor++)); + } + + return std::char_traits<char>::eof(); + } + + private: + /// pointer to the current character + const char* cursor; + /// pointer past the last character + const char* const limit; +}; + +template<typename WideStringType> +class wide_string_input_adapter : public input_adapter_protocol +{ + public: + explicit wide_string_input_adapter(const WideStringType& w) : str(w) {} + + std::char_traits<char>::int_type get_character() noexcept override + { + // check if buffer needs to be filled + if (utf8_bytes_index == utf8_bytes_filled) + { + if (sizeof(typename WideStringType::value_type) == 2) + { + fill_buffer_utf16(); + } + else + { + fill_buffer_utf32(); + } + + assert(utf8_bytes_filled > 0); + assert(utf8_bytes_index == 0); + } + + // use buffer + assert(utf8_bytes_filled > 0); + assert(utf8_bytes_index < utf8_bytes_filled); + return utf8_bytes[utf8_bytes_index++]; + } + + private: + void fill_buffer_utf16() + { + utf8_bytes_index = 0; + + if (current_wchar == str.size()) + { + utf8_bytes[0] = std::char_traits<char>::eof(); + utf8_bytes_filled = 1; + } + else + { + // get the current character + const int wc = static_cast<int>(str[current_wchar++]); + + // UTF-16 to UTF-8 encoding + if (wc < 0x80) + { + utf8_bytes[0] = wc; + utf8_bytes_filled = 1; + } + else if (wc <= 0x7FF) + { + utf8_bytes[0] = 0xC0 | ((wc >> 6)); + utf8_bytes[1] = 0x80 | (wc & 0x3F); + utf8_bytes_filled = 2; + } + else if (0xD800 > wc or wc >= 0xE000) + { + utf8_bytes[0] = 0xE0 | ((wc >> 12)); + utf8_bytes[1] = 0x80 | ((wc >> 6) & 0x3F); + utf8_bytes[2] = 0x80 | (wc & 0x3F); + utf8_bytes_filled = 3; + } + else + { + if (current_wchar < str.size()) + { + const int wc2 = static_cast<int>(str[current_wchar++]); + const int charcode = 0x10000 + (((wc & 0x3FF) << 10) | (wc2 & 0x3FF)); + utf8_bytes[0] = 0xf0 | (charcode >> 18); + utf8_bytes[1] = 0x80 | ((charcode >> 12) & 0x3F); + utf8_bytes[2] = 0x80 | ((charcode >> 6) & 0x3F); + utf8_bytes[3] = 0x80 | (charcode & 0x3F); + utf8_bytes_filled = 4; + } + else + { + // unknown character + ++current_wchar; + utf8_bytes[0] = wc; + utf8_bytes_filled = 1; + } + } + } + } + + void fill_buffer_utf32() + { + utf8_bytes_index = 0; + + if (current_wchar == str.size()) + { + utf8_bytes[0] = std::char_traits<char>::eof(); + utf8_bytes_filled = 1; + } + else + { + // get the current character + const int wc = static_cast<int>(str[current_wchar++]); + + // UTF-32 to UTF-8 encoding + if (wc < 0x80) + { + utf8_bytes[0] = wc; + utf8_bytes_filled = 1; + } + else if (wc <= 0x7FF) + { + utf8_bytes[0] = 0xC0 | ((wc >> 6) & 0x1F); + utf8_bytes[1] = 0x80 | (wc & 0x3F); + utf8_bytes_filled = 2; + } + else if (wc <= 0xFFFF) + { + utf8_bytes[0] = 0xE0 | ((wc >> 12) & 0x0F); + utf8_bytes[1] = 0x80 | ((wc >> 6) & 0x3F); + utf8_bytes[2] = 0x80 | (wc & 0x3F); + utf8_bytes_filled = 3; + } + else if (wc <= 0x10FFFF) + { + utf8_bytes[0] = 0xF0 | ((wc >> 18 ) & 0x07); + utf8_bytes[1] = 0x80 | ((wc >> 12) & 0x3F); + utf8_bytes[2] = 0x80 | ((wc >> 6) & 0x3F); + utf8_bytes[3] = 0x80 | (wc & 0x3F); + utf8_bytes_filled = 4; + } + else + { + // unknown character + utf8_bytes[0] = wc; + utf8_bytes_filled = 1; + } + } + } + + private: + /// the wstring to process + const WideStringType& str; + + /// index of the current wchar in str + std::size_t current_wchar = 0; + + /// a buffer for UTF-8 bytes + std::array<std::char_traits<char>::int_type, 4> utf8_bytes = {{0, 0, 0, 0}}; + + /// index to the utf8_codes array for the next valid byte + std::size_t utf8_bytes_index = 0; + /// number of valid bytes in the utf8_codes array + std::size_t utf8_bytes_filled = 0; +}; + +class input_adapter +{ + public: + // native support + + /// input adapter for input stream + input_adapter(std::istream& i) + : ia(std::make_shared<input_stream_adapter>(i)) {} + + /// input adapter for input stream + input_adapter(std::istream&& i) + : ia(std::make_shared<input_stream_adapter>(i)) {} + + input_adapter(const std::wstring& ws) + : ia(std::make_shared<wide_string_input_adapter<std::wstring>>(ws)) {} + + input_adapter(const std::u16string& ws) + : ia(std::make_shared<wide_string_input_adapter<std::u16string>>(ws)) {} + + input_adapter(const std::u32string& ws) + : ia(std::make_shared<wide_string_input_adapter<std::u32string>>(ws)) {} + + /// input adapter for buffer + template<typename CharT, + typename std::enable_if< + std::is_pointer<CharT>::value and + std::is_integral<typename std::remove_pointer<CharT>::type>::value and + sizeof(typename std::remove_pointer<CharT>::type) == 1, + int>::type = 0> + input_adapter(CharT b, std::size_t l) + : ia(std::make_shared<input_buffer_adapter>(reinterpret_cast<const char*>(b), l)) {} + + // derived support + + /// input adapter for string literal + template<typename CharT, + typename std::enable_if< + std::is_pointer<CharT>::value and + std::is_integral<typename std::remove_pointer<CharT>::type>::value and + sizeof(typename std::remove_pointer<CharT>::type) == 1, + int>::type = 0> + input_adapter(CharT b) + : input_adapter(reinterpret_cast<const char*>(b), + std::strlen(reinterpret_cast<const char*>(b))) {} + + /// input adapter for iterator range with contiguous storage + template<class IteratorType, + typename std::enable_if< + std::is_same<typename std::iterator_traits<IteratorType>::iterator_category, std::random_access_iterator_tag>::value, + int>::type = 0> + input_adapter(IteratorType first, IteratorType last) + { +#ifndef NDEBUG + // assertion to check that the iterator range is indeed contiguous, + // see http://stackoverflow.com/a/35008842/266378 for more discussion + const auto is_contiguous = std::accumulate( + first, last, std::pair<bool, int>(true, 0), + [&first](std::pair<bool, int> res, decltype(*first) val) + { + res.first &= (val == *(std::next(std::addressof(*first), res.second++))); + return res; + }).first; + assert(is_contiguous); +#endif + + // assertion to check that each element is 1 byte long + static_assert( + sizeof(typename std::iterator_traits<IteratorType>::value_type) == 1, + "each element in the iterator range must have the size of 1 byte"); + + const auto len = static_cast<size_t>(std::distance(first, last)); + if (JSON_LIKELY(len > 0)) + { + // there is at least one element: use the address of first + ia = std::make_shared<input_buffer_adapter>(reinterpret_cast<const char*>(&(*first)), len); + } + else + { + // the address of first cannot be used: use nullptr + ia = std::make_shared<input_buffer_adapter>(nullptr, len); + } + } + + /// input adapter for array + template<class T, std::size_t N> + input_adapter(T (&array)[N]) + : input_adapter(std::begin(array), std::end(array)) {} + + /// input adapter for contiguous container + template<class ContiguousContainer, typename + std::enable_if<not std::is_pointer<ContiguousContainer>::value and + std::is_base_of<std::random_access_iterator_tag, typename std::iterator_traits<decltype(std::begin(std::declval<ContiguousContainer const>()))>::iterator_category>::value, + int>::type = 0> + input_adapter(const ContiguousContainer& c) + : input_adapter(std::begin(c), std::end(c)) {} + + operator input_adapter_t() + { + return ia; + } + + private: + /// the actual adapter + input_adapter_t ia = nullptr; +}; +} +} |