summaryrefslogtreecommitdiff
path: root/include/lib/modernjson/detail/input/parser.hpp
diff options
context:
space:
mode:
Diffstat (limited to 'include/lib/modernjson/detail/input/parser.hpp')
-rw-r--r--include/lib/modernjson/detail/input/parser.hpp504
1 files changed, 504 insertions, 0 deletions
diff --git a/include/lib/modernjson/detail/input/parser.hpp b/include/lib/modernjson/detail/input/parser.hpp
new file mode 100644
index 0000000..d205bbb
--- /dev/null
+++ b/include/lib/modernjson/detail/input/parser.hpp
@@ -0,0 +1,504 @@
+#pragma once
+
+#include <cassert> // assert
+#include <cmath> // isfinite
+#include <cstdint> // uint8_t
+#include <functional> // function
+#include <string> // string
+#include <utility> // move
+
+#include <lib/modernjson/detail/exceptions.hpp>
+#include <lib/modernjson/detail/macro_scope.hpp>
+#include <lib/modernjson/detail/meta/is_sax.hpp>
+#include <lib/modernjson/detail/input/input_adapters.hpp>
+#include <lib/modernjson/detail/input/json_sax.hpp>
+#include <lib/modernjson/detail/input/lexer.hpp>
+#include <lib/modernjson/detail/value_t.hpp>
+
+namespace nlohmann
+{
+namespace detail
+{
+////////////
+// parser //
+////////////
+
+/*!
+@brief syntax analysis
+
+This class implements a recursive decent parser.
+*/
+template<typename BasicJsonType>
+class parser
+{
+ using number_integer_t = typename BasicJsonType::number_integer_t;
+ using number_unsigned_t = typename BasicJsonType::number_unsigned_t;
+ using number_float_t = typename BasicJsonType::number_float_t;
+ using string_t = typename BasicJsonType::string_t;
+ using lexer_t = lexer<BasicJsonType>;
+ using token_type = typename lexer_t::token_type;
+
+ public:
+ enum class parse_event_t : uint8_t
+ {
+ /// the parser read `{` and started to process a JSON object
+ object_start,
+ /// the parser read `}` and finished processing a JSON object
+ object_end,
+ /// the parser read `[` and started to process a JSON array
+ array_start,
+ /// the parser read `]` and finished processing a JSON array
+ array_end,
+ /// the parser read a key of a value in an object
+ key,
+ /// the parser finished reading a JSON value
+ value
+ };
+
+ using parser_callback_t =
+ std::function<bool(int depth, parse_event_t event, BasicJsonType& parsed)>;
+
+ /// a parser reading from an input adapter
+ explicit parser(detail::input_adapter_t&& adapter,
+ const parser_callback_t cb = nullptr,
+ const bool allow_exceptions_ = true)
+ : callback(cb), m_lexer(std::move(adapter)), allow_exceptions(allow_exceptions_)
+ {
+ // read first token
+ get_token();
+ }
+
+ /*!
+ @brief public parser interface
+
+ @param[in] strict whether to expect the last token to be EOF
+ @param[in,out] result parsed JSON value
+
+ @throw parse_error.101 in case of an unexpected token
+ @throw parse_error.102 if to_unicode fails or surrogate error
+ @throw parse_error.103 if to_unicode fails
+ */
+ void parse(const bool strict, BasicJsonType& result)
+ {
+ if (callback)
+ {
+ json_sax_dom_callback_parser<BasicJsonType> sdp(result, callback, allow_exceptions);
+ sax_parse_internal(&sdp);
+ result.assert_invariant();
+
+ // in strict mode, input must be completely read
+ if (strict and (get_token() != token_type::end_of_input))
+ {
+ sdp.parse_error(m_lexer.get_position(),
+ m_lexer.get_token_string(),
+ parse_error::create(101, m_lexer.get_position(),
+ exception_message(token_type::end_of_input, "value")));
+ }
+
+ // in case of an error, return discarded value
+ if (sdp.is_errored())
+ {
+ result = value_t::discarded;
+ return;
+ }
+
+ // set top-level value to null if it was discarded by the callback
+ // function
+ if (result.is_discarded())
+ {
+ result = nullptr;
+ }
+ }
+ else
+ {
+ json_sax_dom_parser<BasicJsonType> sdp(result, allow_exceptions);
+ sax_parse_internal(&sdp);
+ result.assert_invariant();
+
+ // in strict mode, input must be completely read
+ if (strict and (get_token() != token_type::end_of_input))
+ {
+ sdp.parse_error(m_lexer.get_position(),
+ m_lexer.get_token_string(),
+ parse_error::create(101, m_lexer.get_position(),
+ exception_message(token_type::end_of_input, "value")));
+ }
+
+ // in case of an error, return discarded value
+ if (sdp.is_errored())
+ {
+ result = value_t::discarded;
+ return;
+ }
+ }
+ }
+
+ /*!
+ @brief public accept interface
+
+ @param[in] strict whether to expect the last token to be EOF
+ @return whether the input is a proper JSON text
+ */
+ bool accept(const bool strict = true)
+ {
+ json_sax_acceptor<BasicJsonType> sax_acceptor;
+ return sax_parse(&sax_acceptor, strict);
+ }
+
+ template <typename SAX>
+ bool sax_parse(SAX* sax, const bool strict = true)
+ {
+ (void)detail::is_sax_static_asserts<SAX, BasicJsonType> {};
+ const bool result = sax_parse_internal(sax);
+
+ // strict mode: next byte must be EOF
+ if (result and strict and (get_token() != token_type::end_of_input))
+ {
+ return sax->parse_error(m_lexer.get_position(),
+ m_lexer.get_token_string(),
+ parse_error::create(101, m_lexer.get_position(),
+ exception_message(token_type::end_of_input, "value")));
+ }
+
+ return result;
+ }
+
+ private:
+ template <typename SAX>
+ bool sax_parse_internal(SAX* sax)
+ {
+ // stack to remember the hierarchy of structured values we are parsing
+ // true = array; false = object
+ std::vector<bool> states;
+ // value to avoid a goto (see comment where set to true)
+ bool skip_to_state_evaluation = false;
+
+ while (true)
+ {
+ if (not skip_to_state_evaluation)
+ {
+ // invariant: get_token() was called before each iteration
+ switch (last_token)
+ {
+ case token_type::begin_object:
+ {
+ if (JSON_UNLIKELY(not sax->start_object(std::size_t(-1))))
+ {
+ return false;
+ }
+
+ // closing } -> we are done
+ if (get_token() == token_type::end_object)
+ {
+ if (JSON_UNLIKELY(not sax->end_object()))
+ {
+ return false;
+ }
+ break;
+ }
+
+ // parse key
+ if (JSON_UNLIKELY(last_token != token_type::value_string))
+ {
+ return sax->parse_error(m_lexer.get_position(),
+ m_lexer.get_token_string(),
+ parse_error::create(101, m_lexer.get_position(),
+ exception_message(token_type::value_string, "object key")));
+ }
+ if (JSON_UNLIKELY(not sax->key(m_lexer.get_string())))
+ {
+ return false;
+ }
+
+ // parse separator (:)
+ if (JSON_UNLIKELY(get_token() != token_type::name_separator))
+ {
+ return sax->parse_error(m_lexer.get_position(),
+ m_lexer.get_token_string(),
+ parse_error::create(101, m_lexer.get_position(),
+ exception_message(token_type::name_separator, "object separator")));
+ }
+
+ // remember we are now inside an object
+ states.push_back(false);
+
+ // parse values
+ get_token();
+ continue;
+ }
+
+ case token_type::begin_array:
+ {
+ if (JSON_UNLIKELY(not sax->start_array(std::size_t(-1))))
+ {
+ return false;
+ }
+
+ // closing ] -> we are done
+ if (get_token() == token_type::end_array)
+ {
+ if (JSON_UNLIKELY(not sax->end_array()))
+ {
+ return false;
+ }
+ break;
+ }
+
+ // remember we are now inside an array
+ states.push_back(true);
+
+ // parse values (no need to call get_token)
+ continue;
+ }
+
+ case token_type::value_float:
+ {
+ const auto res = m_lexer.get_number_float();
+
+ if (JSON_UNLIKELY(not std::isfinite(res)))
+ {
+ return sax->parse_error(m_lexer.get_position(),
+ m_lexer.get_token_string(),
+ out_of_range::create(406, "number overflow parsing '" + m_lexer.get_token_string() + "'"));
+ }
+ else
+ {
+ if (JSON_UNLIKELY(not sax->number_float(res, m_lexer.get_string())))
+ {
+ return false;
+ }
+ break;
+ }
+ }
+
+ case token_type::literal_false:
+ {
+ if (JSON_UNLIKELY(not sax->boolean(false)))
+ {
+ return false;
+ }
+ break;
+ }
+
+ case token_type::literal_null:
+ {
+ if (JSON_UNLIKELY(not sax->null()))
+ {
+ return false;
+ }
+ break;
+ }
+
+ case token_type::literal_true:
+ {
+ if (JSON_UNLIKELY(not sax->boolean(true)))
+ {
+ return false;
+ }
+ break;
+ }
+
+ case token_type::value_integer:
+ {
+ if (JSON_UNLIKELY(not sax->number_integer(m_lexer.get_number_integer())))
+ {
+ return false;
+ }
+ break;
+ }
+
+ case token_type::value_string:
+ {
+ if (JSON_UNLIKELY(not sax->string(m_lexer.get_string())))
+ {
+ return false;
+ }
+ break;
+ }
+
+ case token_type::value_unsigned:
+ {
+ if (JSON_UNLIKELY(not sax->number_unsigned(m_lexer.get_number_unsigned())))
+ {
+ return false;
+ }
+ break;
+ }
+
+ case token_type::parse_error:
+ {
+ // using "uninitialized" to avoid "expected" message
+ return sax->parse_error(m_lexer.get_position(),
+ m_lexer.get_token_string(),
+ parse_error::create(101, m_lexer.get_position(),
+ exception_message(token_type::uninitialized, "value")));
+ }
+
+ default: // the last token was unexpected
+ {
+ return sax->parse_error(m_lexer.get_position(),
+ m_lexer.get_token_string(),
+ parse_error::create(101, m_lexer.get_position(),
+ exception_message(token_type::literal_or_value, "value")));
+ }
+ }
+ }
+ else
+ {
+ skip_to_state_evaluation = false;
+ }
+
+ // we reached this line after we successfully parsed a value
+ if (states.empty())
+ {
+ // empty stack: we reached the end of the hierarchy: done
+ return true;
+ }
+ else
+ {
+ if (states.back()) // array
+ {
+ // comma -> next value
+ if (get_token() == token_type::value_separator)
+ {
+ // parse a new value
+ get_token();
+ continue;
+ }
+
+ // closing ]
+ if (JSON_LIKELY(last_token == token_type::end_array))
+ {
+ if (JSON_UNLIKELY(not sax->end_array()))
+ {
+ return false;
+ }
+
+ // We are done with this array. Before we can parse a
+ // new value, we need to evaluate the new state first.
+ // By setting skip_to_state_evaluation to false, we
+ // are effectively jumping to the beginning of this if.
+ assert(not states.empty());
+ states.pop_back();
+ skip_to_state_evaluation = true;
+ continue;
+ }
+ else
+ {
+ return sax->parse_error(m_lexer.get_position(),
+ m_lexer.get_token_string(),
+ parse_error::create(101, m_lexer.get_position(),
+ exception_message(token_type::end_array, "array")));
+ }
+ }
+ else // object
+ {
+ // comma -> next value
+ if (get_token() == token_type::value_separator)
+ {
+ // parse key
+ if (JSON_UNLIKELY(get_token() != token_type::value_string))
+ {
+ return sax->parse_error(m_lexer.get_position(),
+ m_lexer.get_token_string(),
+ parse_error::create(101, m_lexer.get_position(),
+ exception_message(token_type::value_string, "object key")));
+ }
+ else
+ {
+ if (JSON_UNLIKELY(not sax->key(m_lexer.get_string())))
+ {
+ return false;
+ }
+ }
+
+ // parse separator (:)
+ if (JSON_UNLIKELY(get_token() != token_type::name_separator))
+ {
+ return sax->parse_error(m_lexer.get_position(),
+ m_lexer.get_token_string(),
+ parse_error::create(101, m_lexer.get_position(),
+ exception_message(token_type::name_separator, "object separator")));
+ }
+
+ // parse values
+ get_token();
+ continue;
+ }
+
+ // closing }
+ if (JSON_LIKELY(last_token == token_type::end_object))
+ {
+ if (JSON_UNLIKELY(not sax->end_object()))
+ {
+ return false;
+ }
+
+ // We are done with this object. Before we can parse a
+ // new value, we need to evaluate the new state first.
+ // By setting skip_to_state_evaluation to false, we
+ // are effectively jumping to the beginning of this if.
+ assert(not states.empty());
+ states.pop_back();
+ skip_to_state_evaluation = true;
+ continue;
+ }
+ else
+ {
+ return sax->parse_error(m_lexer.get_position(),
+ m_lexer.get_token_string(),
+ parse_error::create(101, m_lexer.get_position(),
+ exception_message(token_type::end_object, "object")));
+ }
+ }
+ }
+ }
+ }
+
+ /// get next token from lexer
+ token_type get_token()
+ {
+ return (last_token = m_lexer.scan());
+ }
+
+ std::string exception_message(const token_type expected, const std::string& context)
+ {
+ std::string error_msg = "syntax error ";
+
+ if (not context.empty())
+ {
+ error_msg += "while parsing " + context + " ";
+ }
+
+ error_msg += "- ";
+
+ if (last_token == token_type::parse_error)
+ {
+ error_msg += std::string(m_lexer.get_error_message()) + "; last read: '" +
+ m_lexer.get_token_string() + "'";
+ }
+ else
+ {
+ error_msg += "unexpected " + std::string(lexer_t::token_type_name(last_token));
+ }
+
+ if (expected != token_type::uninitialized)
+ {
+ error_msg += "; expected " + std::string(lexer_t::token_type_name(expected));
+ }
+
+ return error_msg;
+ }
+
+ private:
+ /// callback function
+ const parser_callback_t callback = nullptr;
+ /// the type of the last read token
+ token_type last_token = token_type::uninitialized;
+ /// the lexer
+ lexer_t m_lexer;
+ /// whether to throw exceptions in case of errors
+ const bool allow_exceptions = true;
+};
+} // namespace detail
+} // namespace nlohmann