1 // Tencent is pleased to support the open source community by making RapidJSON available.
2 //
3 // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
4 //
5 // Licensed under the MIT License (the "License"); you may not use this file except
6 // in compliance with the License. You may obtain a copy of the License at
7 //
8 // http://opensource.org/licenses/MIT
9 //
10 // Unless required by applicable law or agreed to in writing, software distributed
11 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13 // specific language governing permissions and limitations under the License.
14 
15 #ifndef RAPIDJSON_READER_H_
16 #define RAPIDJSON_READER_H_
17 
18 /*! \file reader.h */
19 
20 #include "rapidjson.h"
21 #include "encodings.h"
22 #include "internal/meta.h"
23 #include "internal/stack.h"
24 #include "internal/strtod.h"
25 
26 #if defined(RAPIDJSON_SIMD) && defined(_MSC_VER)
27 #include <intrin.h>
28 #pragma intrinsic(_BitScanForward)
29 #endif
30 #ifdef RAPIDJSON_SSE42
31 #include <nmmintrin.h>
32 #elif defined(RAPIDJSON_SSE2)
33 #include <emmintrin.h>
34 #endif
35 
36 #ifdef _MSC_VER
37 RAPIDJSON_DIAG_PUSH
38 RAPIDJSON_DIAG_OFF(4127)  // conditional expression is constant
39 RAPIDJSON_DIAG_OFF(4702)  // unreachable code
40 #endif
41 
42 #ifdef __GNUC__
43 RAPIDJSON_DIAG_PUSH
44 RAPIDJSON_DIAG_OFF(effc++)
45 #endif
46 
47 //!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN
48 #define RAPIDJSON_NOTHING /* deliberately empty */
49 #ifndef RAPIDJSON_PARSE_ERROR_EARLY_RETURN
50 #define RAPIDJSON_PARSE_ERROR_EARLY_RETURN(value) \
51     RAPIDJSON_MULTILINEMACRO_BEGIN \
52     if (HasParseError()) { return value; } \
53     RAPIDJSON_MULTILINEMACRO_END
54 #endif
55 #define RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID \
56     RAPIDJSON_PARSE_ERROR_EARLY_RETURN(RAPIDJSON_NOTHING)
57 //!@endcond
58 
59 /*! \def RAPIDJSON_PARSE_ERROR_NORETURN
60     \ingroup RAPIDJSON_ERRORS
61     \brief Macro to indicate a parse error.
62     \param parseErrorCode \ref rapidjson::ParseErrorCode of the error
63     \param offset  position of the error in JSON input (\c size_t)
64 
65     This macros can be used as a customization point for the internal
66     error handling mechanism of RapidJSON.
67 
68     A common usage model is to throw an exception instead of requiring the
69     caller to explicitly check the \ref rapidjson::GenericReader::Parse's
70     return value:
71 
72     \code
73     #define RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode,offset) \
74        throw ParseException(parseErrorCode, #parseErrorCode, offset)
75 
76     #include <stdexcept>               // std::runtime_error
77     #include "rapidjson/error/error.h" // rapidjson::ParseResult
78 
79     struct ParseException : std::runtime_error, rapidjson::ParseResult {
80       ParseException(rapidjson::ParseErrorCode code, const char* msg, size_t offset)
81         : std::runtime_error(msg), ParseResult(code, offset) {}
82     };
83 
84     #include "rapidjson/reader.h"
85     \endcode
86 
87     \see RAPIDJSON_PARSE_ERROR, rapidjson::GenericReader::Parse
88  */
89 #ifndef RAPIDJSON_PARSE_ERROR_NORETURN
90 #define RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode, offset) \
91     RAPIDJSON_MULTILINEMACRO_BEGIN \
92     RAPIDJSON_ASSERT(!HasParseError()); /* Error can only be assigned once */ \
93     SetParseError(parseErrorCode, offset); \
94     RAPIDJSON_MULTILINEMACRO_END
95 #endif
96 
97 /*! \def RAPIDJSON_PARSE_ERROR
98     \ingroup RAPIDJSON_ERRORS
99     \brief (Internal) macro to indicate and handle a parse error.
100     \param parseErrorCode \ref rapidjson::ParseErrorCode of the error
101     \param offset  position of the error in JSON input (\c size_t)
102 
103     Invokes RAPIDJSON_PARSE_ERROR_NORETURN and stops the parsing.
104 
105     \see RAPIDJSON_PARSE_ERROR_NORETURN
106     \hideinitializer
107  */
108 #ifndef RAPIDJSON_PARSE_ERROR
109 #define RAPIDJSON_PARSE_ERROR(parseErrorCode, offset) \
110     RAPIDJSON_MULTILINEMACRO_BEGIN \
111     RAPIDJSON_PARSE_ERROR_NORETURN(parseErrorCode, offset); \
112     RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID; \
113     RAPIDJSON_MULTILINEMACRO_END
114 #endif
115 
116 #include "error/error.h" // ParseErrorCode, ParseResult
117 
118 RAPIDJSON_NAMESPACE_BEGIN
119 
120 ///////////////////////////////////////////////////////////////////////////////
121 // ParseFlag
122 
123 /*! \def RAPIDJSON_PARSE_DEFAULT_FLAGS
124     \ingroup RAPIDJSON_CONFIG
125     \brief User-defined kParseDefaultFlags definition.
126 
127     User can define this as any \c ParseFlag combinations.
128 */
129 #ifndef RAPIDJSON_PARSE_DEFAULT_FLAGS
130 #define RAPIDJSON_PARSE_DEFAULT_FLAGS kParseNoFlags
131 #endif
132 
133 //! Combination of parseFlags
134 /*! \see Reader::Parse, Document::Parse, Document::ParseInsitu, Document::ParseStream
135  */
136 enum ParseFlag {
137     kParseNoFlags = 0,              //!< No flags are set.
138     kParseInsituFlag = 1,           //!< In-situ(destructive) parsing.
139     kParseValidateEncodingFlag = 2, //!< Validate encoding of JSON strings.
140     kParseIterativeFlag = 4,        //!< Iterative(constant complexity in terms of function call stack size) parsing.
141     kParseStopWhenDoneFlag = 8,     //!< After parsing a complete JSON root from stream, stop further processing the rest of stream. When this flag is used, parser will not generate kParseErrorDocumentRootNotSingular error.
142     kParseFullPrecisionFlag = 16,   //!< Parse number in full precision (but slower).
143     kParseDefaultFlags = RAPIDJSON_PARSE_DEFAULT_FLAGS  //!< Default parse flags. Can be customized by defining RAPIDJSON_PARSE_DEFAULT_FLAGS
144 };
145 
146 ///////////////////////////////////////////////////////////////////////////////
147 // Handler
148 
149 /*! \class rapidjson::Handler
150     \brief Concept for receiving events from GenericReader upon parsing.
151     The functions return true if no error occurs. If they return false,
152     the event publisher should terminate the process.
153 \code
154 concept Handler {
155     typename Ch;
156 
157     bool Null();
158     bool Bool(bool b);
159     bool Int(int i);
160     bool Uint(unsigned i);
161     bool Int64(int64_t i);
162     bool Uint64(uint64_t i);
163     bool Double(double d);
164     bool String(const Ch* str, SizeType length, bool copy);
165     bool StartObject();
166     bool Key(const Ch* str, SizeType length, bool copy);
167     bool EndObject(SizeType memberCount);
168     bool StartArray();
169     bool EndArray(SizeType elementCount);
170 };
171 \endcode
172 */
173 ///////////////////////////////////////////////////////////////////////////////
174 // BaseReaderHandler
175 
176 //! Default implementation of Handler.
177 /*! This can be used as base class of any reader handler.
178     \note implements Handler concept
179 */
180 template<typename Encoding = UTF8<>, typename Derived = void>
181 struct BaseReaderHandler {
182     typedef typename Encoding::Ch Ch;
183 
184     typedef typename internal::SelectIf<internal::IsSame<Derived, void>, BaseReaderHandler, Derived>::Type Override;
185 
DefaultBaseReaderHandler186     bool Default() { return true; }
NullBaseReaderHandler187     bool Null() { return static_cast<Override&>(*this).Default(); }
BoolBaseReaderHandler188     bool Bool(bool) { return static_cast<Override&>(*this).Default(); }
IntBaseReaderHandler189     bool Int(int) { return static_cast<Override&>(*this).Default(); }
UintBaseReaderHandler190     bool Uint(unsigned) { return static_cast<Override&>(*this).Default(); }
Int64BaseReaderHandler191     bool Int64(int64_t) { return static_cast<Override&>(*this).Default(); }
Uint64BaseReaderHandler192     bool Uint64(uint64_t) { return static_cast<Override&>(*this).Default(); }
DoubleBaseReaderHandler193     bool Double(double) { return static_cast<Override&>(*this).Default(); }
StringBaseReaderHandler194     bool String(const Ch*, SizeType, bool) { return static_cast<Override&>(*this).Default(); }
StartObjectBaseReaderHandler195     bool StartObject() { return static_cast<Override&>(*this).Default(); }
KeyBaseReaderHandler196     bool Key(const Ch* str, SizeType len, bool copy) { return static_cast<Override&>(*this).String(str, len, copy); }
EndObjectBaseReaderHandler197     bool EndObject(SizeType) { return static_cast<Override&>(*this).Default(); }
StartArrayBaseReaderHandler198     bool StartArray() { return static_cast<Override&>(*this).Default(); }
EndArrayBaseReaderHandler199     bool EndArray(SizeType) { return static_cast<Override&>(*this).Default(); }
200 };
201 
202 ///////////////////////////////////////////////////////////////////////////////
203 // StreamLocalCopy
204 
205 namespace internal {
206 
207 template<typename Stream, int = StreamTraits<Stream>::copyOptimization>
208 class StreamLocalCopy;
209 
210 //! Do copy optimization.
211 template<typename Stream>
212 class StreamLocalCopy<Stream, 1> {
213 public:
StreamLocalCopy(Stream & original)214     explicit StreamLocalCopy(Stream& original) : s(original), original_(original) {}
~StreamLocalCopy()215     ~StreamLocalCopy() { original_ = s; }
216 
217     Stream s;
218 
219 private:
220     StreamLocalCopy& operator=(const StreamLocalCopy&) /* = delete */;
221 
222     Stream& original_;
223 };
224 
225 //! Keep reference.
226 template<typename Stream>
227 class StreamLocalCopy<Stream, 0> {
228 public:
StreamLocalCopy(Stream & original)229     explicit StreamLocalCopy(Stream& original) : s(original) {}
230 
231     Stream& s;
232 
233 private:
234     StreamLocalCopy& operator=(const StreamLocalCopy&) /* = delete */;
235 };
236 
237 } // namespace internal
238 
239 ///////////////////////////////////////////////////////////////////////////////
240 // SkipWhitespace
241 
242 //! Skip the JSON white spaces in a stream.
243 /*! \param is A input stream for skipping white spaces.
244     \note This function has SSE2/SSE4.2 specialization.
245 */
246 template<typename InputStream>
SkipWhitespace(InputStream & is)247 void SkipWhitespace(InputStream& is) {
248     internal::StreamLocalCopy<InputStream> copy(is);
249     InputStream& s(copy.s);
250 
251     while (s.Peek() == ' ' || s.Peek() == '\n' || s.Peek() == '\r' || s.Peek() == '\t')
252         s.Take();
253 }
254 
255 #ifdef RAPIDJSON_SSE42
256 //! Skip whitespace with SSE 4.2 pcmpistrm instruction, testing 16 8-byte characters at once.
SkipWhitespace_SIMD(const char * p)257 inline const char *SkipWhitespace_SIMD(const char* p) {
258     // Fast return for single non-whitespace
259     if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
260         ++p;
261     else
262         return p;
263 
264     // 16-byte align to the next boundary
265     const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & ~15);
266     while (p != nextAligned)
267         if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
268             ++p;
269         else
270             return p;
271 
272     // The rest of string using SIMD
273     static const char whitespace[16] = " \n\r\t";
274     const __m128i w = _mm_loadu_si128((const __m128i *)&whitespace[0]);
275 
276     for (;; p += 16) {
277         const __m128i s = _mm_load_si128((const __m128i *)p);
278         const unsigned r = _mm_cvtsi128_si32(_mm_cmpistrm(w, s, _SIDD_UBYTE_OPS | _SIDD_CMP_EQUAL_ANY | _SIDD_BIT_MASK | _SIDD_NEGATIVE_POLARITY));
279         if (r != 0) {   // some of characters is non-whitespace
280 #ifdef _MSC_VER         // Find the index of first non-whitespace
281             unsigned long offset;
282             _BitScanForward(&offset, r);
283             return p + offset;
284 #else
285             return p + __builtin_ffs(r) - 1;
286 #endif
287         }
288     }
289 }
290 
291 #elif defined(RAPIDJSON_SSE2)
292 
293 //! Skip whitespace with SSE2 instructions, testing 16 8-byte characters at once.
SkipWhitespace_SIMD(const char * p)294 inline const char *SkipWhitespace_SIMD(const char* p) {
295     // Fast return for single non-whitespace
296     if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
297         ++p;
298     else
299         return p;
300 
301     // 16-byte align to the next boundary
302     const char* nextAligned = reinterpret_cast<const char*>((reinterpret_cast<size_t>(p) + 15) & ~15);
303     while (p != nextAligned)
304         if (*p == ' ' || *p == '\n' || *p == '\r' || *p == '\t')
305             ++p;
306         else
307             return p;
308 
309     // The rest of string
310     static const char whitespaces[4][17] = {
311         "                ",
312         "\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n",
313         "\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r\r",
314         "\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t"};
315 
316         const __m128i w0 = _mm_loadu_si128((const __m128i *)&whitespaces[0][0]);
317         const __m128i w1 = _mm_loadu_si128((const __m128i *)&whitespaces[1][0]);
318         const __m128i w2 = _mm_loadu_si128((const __m128i *)&whitespaces[2][0]);
319         const __m128i w3 = _mm_loadu_si128((const __m128i *)&whitespaces[3][0]);
320 
321     for (;; p += 16) {
322         const __m128i s = _mm_load_si128((const __m128i *)p);
323         __m128i x = _mm_cmpeq_epi8(s, w0);
324         x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w1));
325         x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w2));
326         x = _mm_or_si128(x, _mm_cmpeq_epi8(s, w3));
327         unsigned short r = (unsigned short)~_mm_movemask_epi8(x);
328         if (r != 0) {   // some of characters may be non-whitespace
329 #ifdef _MSC_VER         // Find the index of first non-whitespace
330             unsigned long offset;
331             _BitScanForward(&offset, r);
332             return p + offset;
333 #else
334             return p + __builtin_ffs(r) - 1;
335 #endif
336         }
337     }
338 }
339 
340 #endif // RAPIDJSON_SSE2
341 
342 #ifdef RAPIDJSON_SIMD
343 //! Template function specialization for InsituStringStream
SkipWhitespace(InsituStringStream & is)344 template<> inline void SkipWhitespace(InsituStringStream& is) {
345     is.src_ = const_cast<char*>(SkipWhitespace_SIMD(is.src_));
346 }
347 
348 //! Template function specialization for StringStream
SkipWhitespace(StringStream & is)349 template<> inline void SkipWhitespace(StringStream& is) {
350     is.src_ = SkipWhitespace_SIMD(is.src_);
351 }
352 #endif // RAPIDJSON_SIMD
353 
354 ///////////////////////////////////////////////////////////////////////////////
355 // GenericReader
356 
357 //! SAX-style JSON parser. Use \ref Reader for UTF8 encoding and default allocator.
358 /*! GenericReader parses JSON text from a stream, and send events synchronously to an
359     object implementing Handler concept.
360 
361     It needs to allocate a stack for storing a single decoded string during
362     non-destructive parsing.
363 
364     For in-situ parsing, the decoded string is directly written to the source
365     text string, no temporary buffer is required.
366 
367     A GenericReader object can be reused for parsing multiple JSON text.
368 
369     \tparam SourceEncoding Encoding of the input stream.
370     \tparam TargetEncoding Encoding of the parse output.
371     \tparam StackAllocator Allocator type for stack.
372 */
373 template <typename SourceEncoding, typename TargetEncoding, typename StackAllocator = CrtAllocator>
374 class GenericReader {
375 public:
376     typedef typename SourceEncoding::Ch Ch; //!< SourceEncoding character type
377 
378     //! Constructor.
379     /*! \param stackAllocator Optional allocator for allocating stack memory. (Only use for non-destructive parsing)
380         \param stackCapacity stack capacity in bytes for storing a single decoded string.  (Only use for non-destructive parsing)
381     */
stack_(stackAllocator,stackCapacity)382     explicit GenericReader(StackAllocator* stackAllocator = 0, size_t stackCapacity = kDefaultStackCapacity) : stack_(stackAllocator, stackCapacity), parseResult_() {}
383 
384     //! Parse JSON text.
385     /*! \tparam parseFlags Combination of \ref ParseFlag.
386         \tparam InputStream Type of input stream, implementing Stream concept.
387         \tparam Handler Type of handler, implementing Handler concept.
388         \param is Input stream to be parsed.
389         \param handler The handler to receive events.
390         \return Whether the parsing is successful.
391     */
392     template <unsigned parseFlags, typename InputStream, typename Handler>
Parse(InputStream & is,Handler & handler)393     ParseResult Parse(InputStream& is, Handler& handler) {
394         if (parseFlags & kParseIterativeFlag)
395             return IterativeParse<parseFlags>(is, handler);
396 
397         parseResult_.Clear();
398 
399         ClearStackOnExit scope(*this);
400 
401         SkipWhitespace(is);
402 
403         if (is.Peek() == '\0') {
404             RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorDocumentEmpty, is.Tell());
405             RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
406         }
407         else {
408             ParseValue<parseFlags>(is, handler);
409             RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
410 
411             if (!(parseFlags & kParseStopWhenDoneFlag)) {
412                 SkipWhitespace(is);
413 
414                 if (is.Peek() != '\0') {
415                     RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorDocumentRootNotSingular, is.Tell());
416                     RAPIDJSON_PARSE_ERROR_EARLY_RETURN(parseResult_);
417                 }
418             }
419         }
420 
421         return parseResult_;
422     }
423 
424     //! Parse JSON text (with \ref kParseDefaultFlags)
425     /*! \tparam InputStream Type of input stream, implementing Stream concept
426         \tparam Handler Type of handler, implementing Handler concept.
427         \param is Input stream to be parsed.
428         \param handler The handler to receive events.
429         \return Whether the parsing is successful.
430     */
431     template <typename InputStream, typename Handler>
Parse(InputStream & is,Handler & handler)432     ParseResult Parse(InputStream& is, Handler& handler) {
433         return Parse<kParseDefaultFlags>(is, handler);
434     }
435 
436     //! Whether a parse error has occured in the last parsing.
HasParseError()437     bool HasParseError() const { return parseResult_.IsError(); }
438 
439     //! Get the \ref ParseErrorCode of last parsing.
GetParseErrorCode()440     ParseErrorCode GetParseErrorCode() const { return parseResult_.Code(); }
441 
442     //! Get the position of last parsing error in input, 0 otherwise.
GetErrorOffset()443     size_t GetErrorOffset() const { return parseResult_.Offset(); }
444 
445 protected:
SetParseError(ParseErrorCode code,size_t offset)446     void SetParseError(ParseErrorCode code, size_t offset) { parseResult_.Set(code, offset); }
447 
448 private:
449     // Prohibit copy constructor & assignment operator.
450     GenericReader(const GenericReader&);
451     GenericReader& operator=(const GenericReader&);
452 
ClearStack()453     void ClearStack() { stack_.Clear(); }
454 
455     // clear stack on any exit from ParseStream, e.g. due to exception
456     struct ClearStackOnExit {
ClearStackOnExitClearStackOnExit457         explicit ClearStackOnExit(GenericReader& r) : r_(r) {}
~ClearStackOnExitClearStackOnExit458         ~ClearStackOnExit() { r_.ClearStack(); }
459     private:
460         GenericReader& r_;
461         ClearStackOnExit(const ClearStackOnExit&);
462         ClearStackOnExit& operator=(const ClearStackOnExit&);
463     };
464 
465     // Parse object: { string : value, ... }
466     template<unsigned parseFlags, typename InputStream, typename Handler>
ParseObject(InputStream & is,Handler & handler)467     void ParseObject(InputStream& is, Handler& handler) {
468         RAPIDJSON_ASSERT(is.Peek() == '{');
469         is.Take();  // Skip '{'
470 
471         if (!handler.StartObject())
472             RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
473 
474         SkipWhitespace(is);
475 
476         if (is.Peek() == '}') {
477             is.Take();
478             if (!handler.EndObject(0))  // empty object
479                 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
480             return;
481         }
482 
483         for (SizeType memberCount = 0;;) {
484             if (is.Peek() != '"')
485                 RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissName, is.Tell());
486 
487             ParseString<parseFlags>(is, handler, true);
488             RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
489 
490             SkipWhitespace(is);
491 
492             if (is.Take() != ':')
493                 RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissColon, is.Tell());
494 
495             SkipWhitespace(is);
496 
497             ParseValue<parseFlags>(is, handler);
498             RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
499 
500             SkipWhitespace(is);
501 
502             ++memberCount;
503 
504             switch (is.Take()) {
505                 case ',': SkipWhitespace(is); break;
506                 case '}':
507                     if (!handler.EndObject(memberCount))
508                         RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
509                     return;
510                 default:  RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissCommaOrCurlyBracket, is.Tell());
511             }
512         }
513     }
514 
515     // Parse array: [ value, ... ]
516     template<unsigned parseFlags, typename InputStream, typename Handler>
ParseArray(InputStream & is,Handler & handler)517     void ParseArray(InputStream& is, Handler& handler) {
518         RAPIDJSON_ASSERT(is.Peek() == '[');
519         is.Take();  // Skip '['
520 
521         if (!handler.StartArray())
522             RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
523 
524         SkipWhitespace(is);
525 
526         if (is.Peek() == ']') {
527             is.Take();
528             if (!handler.EndArray(0)) // empty array
529                 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
530             return;
531         }
532 
533         for (SizeType elementCount = 0;;) {
534             ParseValue<parseFlags>(is, handler);
535             RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
536 
537             ++elementCount;
538             SkipWhitespace(is);
539 
540             switch (is.Take()) {
541                 case ',': SkipWhitespace(is); break;
542                 case ']':
543                     if (!handler.EndArray(elementCount))
544                         RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
545                     return;
546                 default:  RAPIDJSON_PARSE_ERROR(kParseErrorArrayMissCommaOrSquareBracket, is.Tell());
547             }
548         }
549     }
550 
551     template<unsigned parseFlags, typename InputStream, typename Handler>
ParseNull(InputStream & is,Handler & handler)552     void ParseNull(InputStream& is, Handler& handler) {
553         RAPIDJSON_ASSERT(is.Peek() == 'n');
554         is.Take();
555 
556         if (is.Take() == 'u' && is.Take() == 'l' && is.Take() == 'l') {
557             if (!handler.Null())
558                 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
559         }
560         else
561             RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell() - 1);
562     }
563 
564     template<unsigned parseFlags, typename InputStream, typename Handler>
ParseTrue(InputStream & is,Handler & handler)565     void ParseTrue(InputStream& is, Handler& handler) {
566         RAPIDJSON_ASSERT(is.Peek() == 't');
567         is.Take();
568 
569         if (is.Take() == 'r' && is.Take() == 'u' && is.Take() == 'e') {
570             if (!handler.Bool(true))
571                 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
572         }
573         else
574             RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell() - 1);
575     }
576 
577     template<unsigned parseFlags, typename InputStream, typename Handler>
ParseFalse(InputStream & is,Handler & handler)578     void ParseFalse(InputStream& is, Handler& handler) {
579         RAPIDJSON_ASSERT(is.Peek() == 'f');
580         is.Take();
581 
582         if (is.Take() == 'a' && is.Take() == 'l' && is.Take() == 's' && is.Take() == 'e') {
583             if (!handler.Bool(false))
584                 RAPIDJSON_PARSE_ERROR(kParseErrorTermination, is.Tell());
585         }
586         else
587             RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, is.Tell() - 1);
588     }
589 
590     // Helper function to parse four hexidecimal digits in \uXXXX in ParseString().
591     template<typename InputStream>
ParseHex4(InputStream & is)592     unsigned ParseHex4(InputStream& is) {
593         unsigned codepoint = 0;
594         for (int i = 0; i < 4; i++) {
595             Ch c = is.Take();
596             codepoint <<= 4;
597             codepoint += static_cast<unsigned>(c);
598             if (c >= '0' && c <= '9')
599                 codepoint -= '0';
600             else if (c >= 'A' && c <= 'F')
601                 codepoint -= 'A' - 10;
602             else if (c >= 'a' && c <= 'f')
603                 codepoint -= 'a' - 10;
604             else {
605                 RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorStringUnicodeEscapeInvalidHex, is.Tell() - 1);
606                 RAPIDJSON_PARSE_ERROR_EARLY_RETURN(0);
607             }
608         }
609         return codepoint;
610     }
611 
612     template <typename CharType>
613     class StackStream {
614     public:
615         typedef CharType Ch;
616 
StackStream(internal::Stack<StackAllocator> & stack)617         explicit StackStream(internal::Stack<StackAllocator>& stack) : stack_(stack), length_(0) {}
Put(Ch c)618         RAPIDJSON_FORCEINLINE void Put(Ch c) {
619             *stack_.template Push<Ch>() = c;
620             ++length_;
621         }
Length()622         size_t Length() const { return length_; }
Pop()623         Ch* Pop() {
624             return stack_.template Pop<Ch>(length_);
625         }
626 
627     private:
628         StackStream(const StackStream&);
629         StackStream& operator=(const StackStream&);
630 
631         internal::Stack<StackAllocator>& stack_;
632         SizeType length_;
633     };
634 
635     // Parse string and generate String event. Different code paths for kParseInsituFlag.
636     template<unsigned parseFlags, typename InputStream, typename Handler>
637     void ParseString(InputStream& is, Handler& handler, bool isKey = false) {
638         internal::StreamLocalCopy<InputStream> copy(is);
639         InputStream& s(copy.s);
640 
641         bool success = false;
642         if (parseFlags & kParseInsituFlag) {
643             typename InputStream::Ch *head = s.PutBegin();
644             ParseStringToStream<parseFlags, SourceEncoding, SourceEncoding>(s, s);
645             RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
646             size_t length = s.PutEnd(head) - 1;
647             RAPIDJSON_ASSERT(length <= 0xFFFFFFFF);
648             const typename TargetEncoding::Ch* const str = (typename TargetEncoding::Ch*)head;
649             success = (isKey ? handler.Key(str, SizeType(length), false) : handler.String(str, SizeType(length), false));
650         }
651         else {
652             StackStream<typename TargetEncoding::Ch> stackStream(stack_);
653             ParseStringToStream<parseFlags, SourceEncoding, TargetEncoding>(s, stackStream);
654             RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
655             SizeType length = static_cast<SizeType>(stackStream.Length()) - 1;
656             const typename TargetEncoding::Ch* const str = stackStream.Pop();
657             success = (isKey ? handler.Key(str, length, true) : handler.String(str, length, true));
658         }
659         if (!success)
660             RAPIDJSON_PARSE_ERROR(kParseErrorTermination, s.Tell());
661     }
662 
663     // Parse string to an output is
664     // This function handles the prefix/suffix double quotes, escaping, and optional encoding validation.
665     template<unsigned parseFlags, typename SEncoding, typename TEncoding, typename InputStream, typename OutputStream>
ParseStringToStream(InputStream & is,OutputStream & os)666     RAPIDJSON_FORCEINLINE void ParseStringToStream(InputStream& is, OutputStream& os) {
667 //!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN
668 #define Z16 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
669         static const char escape[256] = {
670             Z16, Z16, 0, 0,'\"', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,'/',
671             Z16, Z16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,'\\', 0, 0, 0,
672             0, 0,'\b', 0, 0, 0,'\f', 0, 0, 0, 0, 0, 0, 0,'\n', 0,
673             0, 0,'\r', 0,'\t', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
674             Z16, Z16, Z16, Z16, Z16, Z16, Z16, Z16
675         };
676 #undef Z16
677 //!@endcond
678 
679         RAPIDJSON_ASSERT(is.Peek() == '\"');
680         is.Take();  // Skip '\"'
681 
682         for (;;) {
683             Ch c = is.Peek();
684             if (c == '\\') {    // Escape
685                 is.Take();
686                 Ch e = is.Take();
687                 if ((sizeof(Ch) == 1 || unsigned(e) < 256) && escape[(unsigned char)e]) {
688                     os.Put(escape[(unsigned char)e]);
689                 }
690                 else if (e == 'u') {    // Unicode
691                     unsigned codepoint = ParseHex4(is);
692                     RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
693                     if (codepoint >= 0xD800 && codepoint <= 0xDBFF) {
694                         // Handle UTF-16 surrogate pair
695                         if (is.Take() != '\\' || is.Take() != 'u')
696                             RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, is.Tell() - 2);
697                         unsigned codepoint2 = ParseHex4(is);
698                         RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
699                         if (codepoint2 < 0xDC00 || codepoint2 > 0xDFFF)
700                             RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, is.Tell() - 2);
701                         codepoint = (((codepoint - 0xD800) << 10) | (codepoint2 - 0xDC00)) + 0x10000;
702                     }
703                     TEncoding::Encode(os, codepoint);
704                 }
705                 else
706                     RAPIDJSON_PARSE_ERROR(kParseErrorStringEscapeInvalid, is.Tell() - 1);
707             }
708             else if (c == '"') {    // Closing double quote
709                 is.Take();
710                 os.Put('\0');   // null-terminate the string
711                 return;
712             }
713             else if (c == '\0')
714                 RAPIDJSON_PARSE_ERROR(kParseErrorStringMissQuotationMark, is.Tell() - 1);
715             else if ((unsigned)c < 0x20) // RFC 4627: unescaped = %x20-21 / %x23-5B / %x5D-10FFFF
716                 RAPIDJSON_PARSE_ERROR(kParseErrorStringEscapeInvalid, is.Tell() - 1);
717             else {
718                 if (parseFlags & kParseValidateEncodingFlag ?
719                     !Transcoder<SEncoding, TEncoding>::Validate(is, os) :
720                     !Transcoder<SEncoding, TEncoding>::Transcode(is, os))
721                     RAPIDJSON_PARSE_ERROR(kParseErrorStringInvalidEncoding, is.Tell());
722             }
723         }
724     }
725 
726     template<typename InputStream, bool backup>
727     class NumberStream;
728 
729     template<typename InputStream>
730     class NumberStream<InputStream, false> {
731     public:
NumberStream(GenericReader & reader,InputStream & s)732         NumberStream(GenericReader& reader, InputStream& s) : is(s) { (void)reader;  }
~NumberStream()733         ~NumberStream() {}
734 
Peek()735         RAPIDJSON_FORCEINLINE Ch Peek() const { return is.Peek(); }
TakePush()736         RAPIDJSON_FORCEINLINE Ch TakePush() { return is.Take(); }
Take()737         RAPIDJSON_FORCEINLINE Ch Take() { return is.Take(); }
Tell()738         size_t Tell() { return is.Tell(); }
Length()739         size_t Length() { return 0; }
Pop()740         const char* Pop() { return 0; }
741 
742     protected:
743         NumberStream& operator=(const NumberStream&);
744 
745         InputStream& is;
746     };
747 
748     template<typename InputStream>
749     class NumberStream<InputStream, true> : public NumberStream<InputStream, false> {
750         typedef NumberStream<InputStream, false> Base;
751     public:
NumberStream(GenericReader & reader,InputStream & is)752         NumberStream(GenericReader& reader, InputStream& is) : NumberStream<InputStream, false>(reader, is), stackStream(reader.stack_) {}
~NumberStream()753         ~NumberStream() {}
754 
TakePush()755         RAPIDJSON_FORCEINLINE Ch TakePush() {
756             stackStream.Put((char)Base::is.Peek());
757             return Base::is.Take();
758         }
759 
Length()760         size_t Length() { return stackStream.Length(); }
761 
Pop()762         const char* Pop() {
763             stackStream.Put('\0');
764             return stackStream.Pop();
765         }
766 
767     private:
768         StackStream<char> stackStream;
769     };
770 
771     template<unsigned parseFlags, typename InputStream, typename Handler>
ParseNumber(InputStream & is,Handler & handler)772     void ParseNumber(InputStream& is, Handler& handler) {
773         internal::StreamLocalCopy<InputStream> copy(is);
774         NumberStream<InputStream, (parseFlags & kParseFullPrecisionFlag) != 0> s(*this, copy.s);
775 
776         // Parse minus
777         bool minus = false;
778         if (s.Peek() == '-') {
779             minus = true;
780             s.Take();
781         }
782 
783         // Parse int: zero / ( digit1-9 *DIGIT )
784         unsigned i = 0;
785         uint64_t i64 = 0;
786         bool use64bit = false;
787         int significandDigit = 0;
788         if (s.Peek() == '0') {
789             i = 0;
790             s.TakePush();
791         }
792         else if (s.Peek() >= '1' && s.Peek() <= '9') {
793             i = static_cast<unsigned>(s.TakePush() - '0');
794 
795             if (minus)
796                 while (s.Peek() >= '0' && s.Peek() <= '9') {
797                     if (i >= 214748364) { // 2^31 = 2147483648
798                         if (i != 214748364 || s.Peek() > '8') {
799                             i64 = i;
800                             use64bit = true;
801                             break;
802                         }
803                     }
804                     i = i * 10 + static_cast<unsigned>(s.TakePush() - '0');
805                     significandDigit++;
806                 }
807             else
808                 while (s.Peek() >= '0' && s.Peek() <= '9') {
809                     if (i >= 429496729) { // 2^32 - 1 = 4294967295
810                         if (i != 429496729 || s.Peek() > '5') {
811                             i64 = i;
812                             use64bit = true;
813                             break;
814                         }
815                     }
816                     i = i * 10 + static_cast<unsigned>(s.TakePush() - '0');
817                     significandDigit++;
818                 }
819         }
820         else
821             RAPIDJSON_PARSE_ERROR(kParseErrorValueInvalid, s.Tell());
822 
823         // Parse 64bit int
824         bool useDouble = false;
825         double d = 0.0;
826         if (use64bit) {
827             if (minus)
828                 while (s.Peek() >= '0' && s.Peek() <= '9') {
829                      if (i64 >= RAPIDJSON_UINT64_C2(0x0CCCCCCC, 0xCCCCCCCC)) // 2^63 = 9223372036854775808
830                         if (i64 != RAPIDJSON_UINT64_C2(0x0CCCCCCC, 0xCCCCCCCC) || s.Peek() > '8') {
831                             d = i64;
832                             useDouble = true;
833                             break;
834                         }
835                     i64 = i64 * 10 + static_cast<unsigned>(s.TakePush() - '0');
836                     significandDigit++;
837                 }
838             else
839                 while (s.Peek() >= '0' && s.Peek() <= '9') {
840                     if (i64 >= RAPIDJSON_UINT64_C2(0x19999999, 0x99999999)) // 2^64 - 1 = 18446744073709551615
841                         if (i64 != RAPIDJSON_UINT64_C2(0x19999999, 0x99999999) || s.Peek() > '5') {
842                             d = i64;
843                             useDouble = true;
844                             break;
845                         }
846                     i64 = i64 * 10 + static_cast<unsigned>(s.TakePush() - '0');
847                     significandDigit++;
848                 }
849         }
850 
851         // Force double for big integer
852         if (useDouble) {
853             while (s.Peek() >= '0' && s.Peek() <= '9') {
854                 if (d >= 1.7976931348623157e307) // DBL_MAX / 10.0
855                     RAPIDJSON_PARSE_ERROR(kParseErrorNumberTooBig, s.Tell());
856                 d = d * 10 + (s.TakePush() - '0');
857             }
858         }
859 
860         // Parse frac = decimal-point 1*DIGIT
861         int expFrac = 0;
862         size_t decimalPosition;
863         if (s.Peek() == '.') {
864             s.Take();
865             decimalPosition = s.Length();
866 
867             if (!(s.Peek() >= '0' && s.Peek() <= '9'))
868                 RAPIDJSON_PARSE_ERROR(kParseErrorNumberMissFraction, s.Tell());
869 
870             if (!useDouble) {
871 #if RAPIDJSON_64BIT
872                 // Use i64 to store significand in 64-bit architecture
873                 if (!use64bit)
874                     i64 = i;
875 
876                 while (s.Peek() >= '0' && s.Peek() <= '9') {
877                     if (i64 > RAPIDJSON_UINT64_C2(0x1FFFFF, 0xFFFFFFFF)) // 2^53 - 1 for fast path
878                         break;
879                     else {
880                         i64 = i64 * 10 + static_cast<unsigned>(s.TakePush() - '0');
881                         --expFrac;
882                         if (i64 != 0)
883                             significandDigit++;
884                     }
885                 }
886 
887                 d = (double)i64;
888 #else
889                 // Use double to store significand in 32-bit architecture
890                 d = use64bit ? (double)i64 : (double)i;
891 #endif
892                 useDouble = true;
893             }
894 
895             while (s.Peek() >= '0' && s.Peek() <= '9') {
896                 if (significandDigit < 17) {
897                     d = d * 10.0 + (s.TakePush() - '0');
898                     --expFrac;
899                     if (d > 0.0)
900                         significandDigit++;
901                 }
902                 else
903                     s.TakePush();
904             }
905         }
906         else
907             decimalPosition = s.Length(); // decimal position at the end of integer.
908 
909         // Parse exp = e [ minus / plus ] 1*DIGIT
910         int exp = 0;
911         if (s.Peek() == 'e' || s.Peek() == 'E') {
912             if (!useDouble) {
913                 d = use64bit ? i64 : i;
914                 useDouble = true;
915             }
916             s.Take();
917 
918             bool expMinus = false;
919             if (s.Peek() == '+')
920                 s.Take();
921             else if (s.Peek() == '-') {
922                 s.Take();
923                 expMinus = true;
924             }
925 
926             if (s.Peek() >= '0' && s.Peek() <= '9') {
927                 exp = s.Take() - '0';
928                 if (expMinus) {
929                     while (s.Peek() >= '0' && s.Peek() <= '9') {
930                         exp = exp * 10 + (s.Take() - '0');
931                         if (exp >= 214748364) {                         // Issue #313: prevent overflow exponent
932                             while (s.Peek() >= '0' && s.Peek() <= '9')  // Consume the rest of exponent
933                                 s.Take();
934                         }
935                     }
936                 }
937                 else {  // positive exp
938                     int maxExp = 308 - expFrac;
939                     while (s.Peek() >= '0' && s.Peek() <= '9') {
940                         exp = exp * 10 + (s.Take() - '0');
941                         if (exp > maxExp)
942                             RAPIDJSON_PARSE_ERROR(kParseErrorNumberTooBig, s.Tell());
943                     }
944                 }
945             }
946             else
947                 RAPIDJSON_PARSE_ERROR(kParseErrorNumberMissExponent, s.Tell());
948 
949             if (expMinus)
950                 exp = -exp;
951         }
952 
953         // Finish parsing, call event according to the type of number.
954         bool cont = true;
955         size_t length = s.Length();
956         const char* decimal = s.Pop();  // Pop stack no matter if it will be used or not.
957 
958         if (useDouble) {
959             int p = exp + expFrac;
960             if (parseFlags & kParseFullPrecisionFlag)
961                 d = internal::StrtodFullPrecision(d, p, decimal, length, decimalPosition, exp);
962             else
963                 d = internal::StrtodNormalPrecision(d, p);
964 
965             cont = handler.Double(minus ? -d : d);
966         }
967         else {
968             if (use64bit) {
969                 if (minus)
970                     cont = handler.Int64(static_cast<int64_t>(~i64 + 1));
971                 else
972                     cont = handler.Uint64(i64);
973             }
974             else {
975                 if (minus)
976                     cont = handler.Int(static_cast<int32_t>(~i + 1));
977                 else
978                     cont = handler.Uint(i);
979             }
980         }
981         if (!cont)
982             RAPIDJSON_PARSE_ERROR(kParseErrorTermination, s.Tell());
983     }
984 
985     // Parse any JSON value
986     template<unsigned parseFlags, typename InputStream, typename Handler>
ParseValue(InputStream & is,Handler & handler)987     void ParseValue(InputStream& is, Handler& handler) {
988         switch (is.Peek()) {
989             case 'n': ParseNull  <parseFlags>(is, handler); break;
990             case 't': ParseTrue  <parseFlags>(is, handler); break;
991             case 'f': ParseFalse <parseFlags>(is, handler); break;
992             case '"': ParseString<parseFlags>(is, handler); break;
993             case '{': ParseObject<parseFlags>(is, handler); break;
994             case '[': ParseArray <parseFlags>(is, handler); break;
995             default : ParseNumber<parseFlags>(is, handler);
996         }
997     }
998 
999     // Iterative Parsing
1000 
1001     // States
1002     enum IterativeParsingState {
1003         IterativeParsingStartState = 0,
1004         IterativeParsingFinishState,
1005         IterativeParsingErrorState,
1006 
1007         // Object states
1008         IterativeParsingObjectInitialState,
1009         IterativeParsingMemberKeyState,
1010         IterativeParsingKeyValueDelimiterState,
1011         IterativeParsingMemberValueState,
1012         IterativeParsingMemberDelimiterState,
1013         IterativeParsingObjectFinishState,
1014 
1015         // Array states
1016         IterativeParsingArrayInitialState,
1017         IterativeParsingElementState,
1018         IterativeParsingElementDelimiterState,
1019         IterativeParsingArrayFinishState,
1020 
1021         // Single value state
1022         IterativeParsingValueState,
1023 
1024         cIterativeParsingStateCount
1025     };
1026 
1027     // Tokens
1028     enum Token {
1029         LeftBracketToken = 0,
1030         RightBracketToken,
1031 
1032         LeftCurlyBracketToken,
1033         RightCurlyBracketToken,
1034 
1035         CommaToken,
1036         ColonToken,
1037 
1038         StringToken,
1039         FalseToken,
1040         TrueToken,
1041         NullToken,
1042         NumberToken,
1043 
1044         kTokenCount
1045     };
1046 
Tokenize(Ch c)1047     RAPIDJSON_FORCEINLINE Token Tokenize(Ch c) {
1048 
1049 //!@cond RAPIDJSON_HIDDEN_FROM_DOXYGEN
1050 #define N NumberToken
1051 #define N16 N,N,N,N,N,N,N,N,N,N,N,N,N,N,N,N
1052         // Maps from ASCII to Token
1053         static const unsigned char tokenMap[256] = {
1054             N16, // 00~0F
1055             N16, // 10~1F
1056             N, N, StringToken, N, N, N, N, N, N, N, N, N, CommaToken, N, N, N, // 20~2F
1057             N, N, N, N, N, N, N, N, N, N, ColonToken, N, N, N, N, N, // 30~3F
1058             N16, // 40~4F
1059             N, N, N, N, N, N, N, N, N, N, N, LeftBracketToken, N, RightBracketToken, N, N, // 50~5F
1060             N, N, N, N, N, N, FalseToken, N, N, N, N, N, N, N, NullToken, N, // 60~6F
1061             N, N, N, N, TrueToken, N, N, N, N, N, N, LeftCurlyBracketToken, N, RightCurlyBracketToken, N, N, // 70~7F
1062             N16, N16, N16, N16, N16, N16, N16, N16 // 80~FF
1063         };
1064 #undef N
1065 #undef N16
1066 //!@endcond
1067 
1068         if (sizeof(Ch) == 1 || unsigned(c) < 256)
1069             return (Token)tokenMap[(unsigned char)c];
1070         else
1071             return NumberToken;
1072     }
1073 
Predict(IterativeParsingState state,Token token)1074     RAPIDJSON_FORCEINLINE IterativeParsingState Predict(IterativeParsingState state, Token token) {
1075         // current state x one lookahead token -> new state
1076         static const char G[cIterativeParsingStateCount][kTokenCount] = {
1077             // Start
1078             {
1079                 IterativeParsingArrayInitialState,  // Left bracket
1080                 IterativeParsingErrorState,         // Right bracket
1081                 IterativeParsingObjectInitialState, // Left curly bracket
1082                 IterativeParsingErrorState,         // Right curly bracket
1083                 IterativeParsingErrorState,         // Comma
1084                 IterativeParsingErrorState,         // Colon
1085                 IterativeParsingValueState,         // String
1086                 IterativeParsingValueState,         // False
1087                 IterativeParsingValueState,         // True
1088                 IterativeParsingValueState,         // Null
1089                 IterativeParsingValueState          // Number
1090             },
1091             // Finish(sink state)
1092             {
1093                 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1094                 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1095                 IterativeParsingErrorState
1096             },
1097             // Error(sink state)
1098             {
1099                 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1100                 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1101                 IterativeParsingErrorState
1102             },
1103             // ObjectInitial
1104             {
1105                 IterativeParsingErrorState,         // Left bracket
1106                 IterativeParsingErrorState,         // Right bracket
1107                 IterativeParsingErrorState,         // Left curly bracket
1108                 IterativeParsingObjectFinishState,  // Right curly bracket
1109                 IterativeParsingErrorState,         // Comma
1110                 IterativeParsingErrorState,         // Colon
1111                 IterativeParsingMemberKeyState,     // String
1112                 IterativeParsingErrorState,         // False
1113                 IterativeParsingErrorState,         // True
1114                 IterativeParsingErrorState,         // Null
1115                 IterativeParsingErrorState          // Number
1116             },
1117             // MemberKey
1118             {
1119                 IterativeParsingErrorState,             // Left bracket
1120                 IterativeParsingErrorState,             // Right bracket
1121                 IterativeParsingErrorState,             // Left curly bracket
1122                 IterativeParsingErrorState,             // Right curly bracket
1123                 IterativeParsingErrorState,             // Comma
1124                 IterativeParsingKeyValueDelimiterState, // Colon
1125                 IterativeParsingErrorState,             // String
1126                 IterativeParsingErrorState,             // False
1127                 IterativeParsingErrorState,             // True
1128                 IterativeParsingErrorState,             // Null
1129                 IterativeParsingErrorState              // Number
1130             },
1131             // KeyValueDelimiter
1132             {
1133                 IterativeParsingArrayInitialState,      // Left bracket(push MemberValue state)
1134                 IterativeParsingErrorState,             // Right bracket
1135                 IterativeParsingObjectInitialState,     // Left curly bracket(push MemberValue state)
1136                 IterativeParsingErrorState,             // Right curly bracket
1137                 IterativeParsingErrorState,             // Comma
1138                 IterativeParsingErrorState,             // Colon
1139                 IterativeParsingMemberValueState,       // String
1140                 IterativeParsingMemberValueState,       // False
1141                 IterativeParsingMemberValueState,       // True
1142                 IterativeParsingMemberValueState,       // Null
1143                 IterativeParsingMemberValueState        // Number
1144             },
1145             // MemberValue
1146             {
1147                 IterativeParsingErrorState,             // Left bracket
1148                 IterativeParsingErrorState,             // Right bracket
1149                 IterativeParsingErrorState,             // Left curly bracket
1150                 IterativeParsingObjectFinishState,      // Right curly bracket
1151                 IterativeParsingMemberDelimiterState,   // Comma
1152                 IterativeParsingErrorState,             // Colon
1153                 IterativeParsingErrorState,             // String
1154                 IterativeParsingErrorState,             // False
1155                 IterativeParsingErrorState,             // True
1156                 IterativeParsingErrorState,             // Null
1157                 IterativeParsingErrorState              // Number
1158             },
1159             // MemberDelimiter
1160             {
1161                 IterativeParsingErrorState,         // Left bracket
1162                 IterativeParsingErrorState,         // Right bracket
1163                 IterativeParsingErrorState,         // Left curly bracket
1164                 IterativeParsingErrorState,         // Right curly bracket
1165                 IterativeParsingErrorState,         // Comma
1166                 IterativeParsingErrorState,         // Colon
1167                 IterativeParsingMemberKeyState,     // String
1168                 IterativeParsingErrorState,         // False
1169                 IterativeParsingErrorState,         // True
1170                 IterativeParsingErrorState,         // Null
1171                 IterativeParsingErrorState          // Number
1172             },
1173             // ObjectFinish(sink state)
1174             {
1175                 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1176                 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1177                 IterativeParsingErrorState
1178             },
1179             // ArrayInitial
1180             {
1181                 IterativeParsingArrayInitialState,      // Left bracket(push Element state)
1182                 IterativeParsingArrayFinishState,       // Right bracket
1183                 IterativeParsingObjectInitialState,     // Left curly bracket(push Element state)
1184                 IterativeParsingErrorState,             // Right curly bracket
1185                 IterativeParsingErrorState,             // Comma
1186                 IterativeParsingErrorState,             // Colon
1187                 IterativeParsingElementState,           // String
1188                 IterativeParsingElementState,           // False
1189                 IterativeParsingElementState,           // True
1190                 IterativeParsingElementState,           // Null
1191                 IterativeParsingElementState            // Number
1192             },
1193             // Element
1194             {
1195                 IterativeParsingErrorState,             // Left bracket
1196                 IterativeParsingArrayFinishState,       // Right bracket
1197                 IterativeParsingErrorState,             // Left curly bracket
1198                 IterativeParsingErrorState,             // Right curly bracket
1199                 IterativeParsingElementDelimiterState,  // Comma
1200                 IterativeParsingErrorState,             // Colon
1201                 IterativeParsingErrorState,             // String
1202                 IterativeParsingErrorState,             // False
1203                 IterativeParsingErrorState,             // True
1204                 IterativeParsingErrorState,             // Null
1205                 IterativeParsingErrorState              // Number
1206             },
1207             // ElementDelimiter
1208             {
1209                 IterativeParsingArrayInitialState,      // Left bracket(push Element state)
1210                 IterativeParsingErrorState,             // Right bracket
1211                 IterativeParsingObjectInitialState,     // Left curly bracket(push Element state)
1212                 IterativeParsingErrorState,             // Right curly bracket
1213                 IterativeParsingErrorState,             // Comma
1214                 IterativeParsingErrorState,             // Colon
1215                 IterativeParsingElementState,           // String
1216                 IterativeParsingElementState,           // False
1217                 IterativeParsingElementState,           // True
1218                 IterativeParsingElementState,           // Null
1219                 IterativeParsingElementState            // Number
1220             },
1221             // ArrayFinish(sink state)
1222             {
1223                 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1224                 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1225                 IterativeParsingErrorState
1226             },
1227             // Single Value (sink state)
1228             {
1229                 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1230                 IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState, IterativeParsingErrorState,
1231                 IterativeParsingErrorState
1232             }
1233         }; // End of G
1234 
1235         return (IterativeParsingState)G[state][token];
1236     }
1237 
1238     // Make an advance in the token stream and state based on the candidate destination state which was returned by Transit().
1239     // May return a new state on state pop.
1240     template <unsigned parseFlags, typename InputStream, typename Handler>
Transit(IterativeParsingState src,Token token,IterativeParsingState dst,InputStream & is,Handler & handler)1241     RAPIDJSON_FORCEINLINE IterativeParsingState Transit(IterativeParsingState src, Token token, IterativeParsingState dst, InputStream& is, Handler& handler) {
1242         (void)token;
1243 
1244         switch (dst) {
1245         case IterativeParsingErrorState:
1246             return dst;
1247 
1248         case IterativeParsingObjectInitialState:
1249         case IterativeParsingArrayInitialState:
1250         {
1251             // Push the state(Element or MemeberValue) if we are nested in another array or value of member.
1252             // In this way we can get the correct state on ObjectFinish or ArrayFinish by frame pop.
1253             IterativeParsingState n = src;
1254             if (src == IterativeParsingArrayInitialState || src == IterativeParsingElementDelimiterState)
1255                 n = IterativeParsingElementState;
1256             else if (src == IterativeParsingKeyValueDelimiterState)
1257                 n = IterativeParsingMemberValueState;
1258             // Push current state.
1259             *stack_.template Push<SizeType>(1) = n;
1260             // Initialize and push the member/element count.
1261             *stack_.template Push<SizeType>(1) = 0;
1262             // Call handler
1263             bool hr = (dst == IterativeParsingObjectInitialState) ? handler.StartObject() : handler.StartArray();
1264             // On handler short circuits the parsing.
1265             if (!hr) {
1266                 RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell());
1267                 return IterativeParsingErrorState;
1268             }
1269             else {
1270                 is.Take();
1271                 return dst;
1272             }
1273         }
1274 
1275         case IterativeParsingMemberKeyState:
1276             ParseString<parseFlags>(is, handler, true);
1277             if (HasParseError())
1278                 return IterativeParsingErrorState;
1279             else
1280                 return dst;
1281 
1282         case IterativeParsingKeyValueDelimiterState:
1283             RAPIDJSON_ASSERT(token == ColonToken);
1284             is.Take();
1285             return dst;
1286 
1287         case IterativeParsingMemberValueState:
1288             // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state.
1289             ParseValue<parseFlags>(is, handler);
1290             if (HasParseError()) {
1291                 return IterativeParsingErrorState;
1292             }
1293             return dst;
1294 
1295         case IterativeParsingElementState:
1296             // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state.
1297             ParseValue<parseFlags>(is, handler);
1298             if (HasParseError()) {
1299                 return IterativeParsingErrorState;
1300             }
1301             return dst;
1302 
1303         case IterativeParsingMemberDelimiterState:
1304         case IterativeParsingElementDelimiterState:
1305             is.Take();
1306             // Update member/element count.
1307             *stack_.template Top<SizeType>() = *stack_.template Top<SizeType>() + 1;
1308             return dst;
1309 
1310         case IterativeParsingObjectFinishState:
1311         {
1312             // Get member count.
1313             SizeType c = *stack_.template Pop<SizeType>(1);
1314             // If the object is not empty, count the last member.
1315             if (src == IterativeParsingMemberValueState)
1316                 ++c;
1317             // Restore the state.
1318             IterativeParsingState n = static_cast<IterativeParsingState>(*stack_.template Pop<SizeType>(1));
1319             // Transit to Finish state if this is the topmost scope.
1320             if (n == IterativeParsingStartState)
1321                 n = IterativeParsingFinishState;
1322             // Call handler
1323             bool hr = handler.EndObject(c);
1324             // On handler short circuits the parsing.
1325             if (!hr) {
1326                 RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell());
1327                 return IterativeParsingErrorState;
1328             }
1329             else {
1330                 is.Take();
1331                 return n;
1332             }
1333         }
1334 
1335         case IterativeParsingArrayFinishState:
1336         {
1337             // Get element count.
1338             SizeType c = *stack_.template Pop<SizeType>(1);
1339             // If the array is not empty, count the last element.
1340             if (src == IterativeParsingElementState)
1341                 ++c;
1342             // Restore the state.
1343             IterativeParsingState n = static_cast<IterativeParsingState>(*stack_.template Pop<SizeType>(1));
1344             // Transit to Finish state if this is the topmost scope.
1345             if (n == IterativeParsingStartState)
1346                 n = IterativeParsingFinishState;
1347             // Call handler
1348             bool hr = handler.EndArray(c);
1349             // On handler short circuits the parsing.
1350             if (!hr) {
1351                 RAPIDJSON_PARSE_ERROR_NORETURN(kParseErrorTermination, is.Tell());
1352                 return IterativeParsingErrorState;
1353             }
1354             else {
1355                 is.Take();
1356                 return n;
1357             }
1358         }
1359 
1360         default:
1361             // This branch is for IterativeParsingValueState actually.
1362             // Use `default:` rather than
1363             // `case IterativeParsingValueState:` is for code coverage.
1364 
1365             // The IterativeParsingStartState is not enumerated in this switch-case.
1366             // It is impossible for that case. And it can be caught by following assertion.
1367 
1368             // The IterativeParsingFinishState is not enumerated in this switch-case either.
1369             // It is a "derivative" state which cannot triggered from Predict() directly.
1370             // Therefore it cannot happen here. And it can be caught by following assertion.
1371             RAPIDJSON_ASSERT(dst == IterativeParsingValueState);
1372 
1373             // Must be non-compound value. Or it would be ObjectInitial or ArrayInitial state.
1374             ParseValue<parseFlags>(is, handler);
1375             if (HasParseError()) {
1376                 return IterativeParsingErrorState;
1377             }
1378             return IterativeParsingFinishState;
1379         }
1380     }
1381 
1382     template <typename InputStream>
HandleError(IterativeParsingState src,InputStream & is)1383     void HandleError(IterativeParsingState src, InputStream& is) {
1384         if (HasParseError()) {
1385             // Error flag has been set.
1386             return;
1387         }
1388 
1389         switch (src) {
1390         case IterativeParsingStartState:            RAPIDJSON_PARSE_ERROR(kParseErrorDocumentEmpty, is.Tell()); return;
1391         case IterativeParsingFinishState:           RAPIDJSON_PARSE_ERROR(kParseErrorDocumentRootNotSingular, is.Tell()); return;
1392         case IterativeParsingObjectInitialState:
1393         case IterativeParsingMemberDelimiterState:  RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissName, is.Tell()); return;
1394         case IterativeParsingMemberKeyState:        RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissColon, is.Tell()); return;
1395         case IterativeParsingMemberValueState:      RAPIDJSON_PARSE_ERROR(kParseErrorObjectMissCommaOrCurlyBracket, is.Tell()); return;
1396         case IterativeParsingElementState:          RAPIDJSON_PARSE_ERROR(kParseErrorArrayMissCommaOrSquareBracket, is.Tell()); return;
1397         default:                                    RAPIDJSON_PARSE_ERROR(kParseErrorUnspecificSyntaxError, is.Tell());
1398         }
1399     }
1400 
1401     template <unsigned parseFlags, typename InputStream, typename Handler>
IterativeParse(InputStream & is,Handler & handler)1402     ParseResult IterativeParse(InputStream& is, Handler& handler) {
1403         parseResult_.Clear();
1404         ClearStackOnExit scope(*this);
1405         IterativeParsingState state = IterativeParsingStartState;
1406 
1407         SkipWhitespace(is);
1408         while (is.Peek() != '\0') {
1409             Token t = Tokenize(is.Peek());
1410             IterativeParsingState n = Predict(state, t);
1411             IterativeParsingState d = Transit<parseFlags>(state, t, n, is, handler);
1412 
1413             if (d == IterativeParsingErrorState) {
1414                 HandleError(state, is);
1415                 break;
1416             }
1417 
1418             state = d;
1419 
1420             // Do not further consume streams if a root JSON has been parsed.
1421             if ((parseFlags & kParseStopWhenDoneFlag) && state == IterativeParsingFinishState)
1422                 break;
1423 
1424             SkipWhitespace(is);
1425         }
1426 
1427         // Handle the end of file.
1428         if (state != IterativeParsingFinishState)
1429             HandleError(state, is);
1430 
1431         return parseResult_;
1432     }
1433 
1434     static const size_t kDefaultStackCapacity = 256;    //!< Default stack capacity in bytes for storing a single decoded string.
1435     internal::Stack<StackAllocator> stack_;  //!< A stack for storing decoded string temporarily during non-destructive parsing.
1436     ParseResult parseResult_;
1437 }; // class GenericReader
1438 
1439 //! Reader with UTF8 encoding and default allocator.
1440 typedef GenericReader<UTF8<>, UTF8<> > Reader;
1441 
1442 RAPIDJSON_NAMESPACE_END
1443 
1444 #ifdef __GNUC__
1445 RAPIDJSON_DIAG_POP
1446 #endif
1447 
1448 #ifdef _MSC_VER
1449 RAPIDJSON_DIAG_POP
1450 #endif
1451 
1452 #endif // RAPIDJSON_READER_H_
1453