1 /*
2  * Copyright (C) 2010 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package android.util;
18 
19 import libcore.internal.StringPool;
20 
21 import java.io.Closeable;
22 import java.io.EOFException;
23 import java.io.IOException;
24 import java.io.Reader;
25 import java.util.ArrayList;
26 import java.util.List;
27 
28 
29 /**
30  * Reads a JSON (<a href="http://www.ietf.org/rfc/rfc4627.txt">RFC 4627</a>)
31  * encoded value as a stream of tokens. This stream includes both literal
32  * values (strings, numbers, booleans, and nulls) as well as the begin and
33  * end delimiters of objects and arrays. The tokens are traversed in
34  * depth-first order, the same order that they appear in the JSON document.
35  * Within JSON objects, name/value pairs are represented by a single token.
36  *
37  * <h3>Parsing JSON</h3>
38  * To create a recursive descent parser for your own JSON streams, first create
39  * an entry point method that creates a {@code JsonReader}.
40  *
41  * <p>Next, create handler methods for each structure in your JSON text. You'll
42  * need a method for each object type and for each array type.
43  * <ul>
44  *   <li>Within <strong>array handling</strong> methods, first call {@link
45  *       #beginArray} to consume the array's opening bracket. Then create a
46  *       while loop that accumulates values, terminating when {@link #hasNext}
47  *       is false. Finally, read the array's closing bracket by calling {@link
48  *       #endArray}.
49  *   <li>Within <strong>object handling</strong> methods, first call {@link
50  *       #beginObject} to consume the object's opening brace. Then create a
51  *       while loop that assigns values to local variables based on their name.
52  *       This loop should terminate when {@link #hasNext} is false. Finally,
53  *       read the object's closing brace by calling {@link #endObject}.
54  * </ul>
55  * <p>When a nested object or array is encountered, delegate to the
56  * corresponding handler method.
57  *
58  * <p>When an unknown name is encountered, strict parsers should fail with an
59  * exception. Lenient parsers should call {@link #skipValue()} to recursively
60  * skip the value's nested tokens, which may otherwise conflict.
61  *
62  * <p>If a value may be null, you should first check using {@link #peek()}.
63  * Null literals can be consumed using either {@link #nextNull()} or {@link
64  * #skipValue()}.
65  *
66  * <h3>Example</h3>
67  * Suppose we'd like to parse a stream of messages such as the following: <pre> {@code
68  * [
69  *   {
70  *     "id": 912345678901,
71  *     "text": "How do I read JSON on Android?",
72  *     "geo": null,
73  *     "user": {
74  *       "name": "android_newb",
75  *       "followers_count": 41
76  *      }
77  *   },
78  *   {
79  *     "id": 912345678902,
80  *     "text": "@android_newb just use android.util.JsonReader!",
81  *     "geo": [50.454722, -104.606667],
82  *     "user": {
83  *       "name": "jesse",
84  *       "followers_count": 2
85  *     }
86  *   }
87  * ]}</pre>
88  * This code implements the parser for the above structure: <pre>   {@code
89  *
90  *   public List<Message> readJsonStream(InputStream in) throws IOException {
91  *     JsonReader reader = new JsonReader(new InputStreamReader(in, "UTF-8"));
92  *     try {
93  *       return readMessagesArray(reader);
94  *     } finally {
95  *       reader.close();
96  *     }
97  *   }
98  *
99  *   public List<Message> readMessagesArray(JsonReader reader) throws IOException {
100  *     List<Message> messages = new ArrayList<Message>();
101  *
102  *     reader.beginArray();
103  *     while (reader.hasNext()) {
104  *       messages.add(readMessage(reader));
105  *     }
106  *     reader.endArray();
107  *     return messages;
108  *   }
109  *
110  *   public Message readMessage(JsonReader reader) throws IOException {
111  *     long id = -1;
112  *     String text = null;
113  *     User user = null;
114  *     List<Double> geo = null;
115  *
116  *     reader.beginObject();
117  *     while (reader.hasNext()) {
118  *       String name = reader.nextName();
119  *       if (name.equals("id")) {
120  *         id = reader.nextLong();
121  *       } else if (name.equals("text")) {
122  *         text = reader.nextString();
123  *       } else if (name.equals("geo") && reader.peek() != JsonToken.NULL) {
124  *         geo = readDoublesArray(reader);
125  *       } else if (name.equals("user")) {
126  *         user = readUser(reader);
127  *       } else {
128  *         reader.skipValue();
129  *       }
130  *     }
131  *     reader.endObject();
132  *     return new Message(id, text, user, geo);
133  *   }
134  *
135  *   public List<Double> readDoublesArray(JsonReader reader) throws IOException {
136  *     List<Double> doubles = new ArrayList<Double>();
137  *
138  *     reader.beginArray();
139  *     while (reader.hasNext()) {
140  *       doubles.add(reader.nextDouble());
141  *     }
142  *     reader.endArray();
143  *     return doubles;
144  *   }
145  *
146  *   public User readUser(JsonReader reader) throws IOException {
147  *     String username = null;
148  *     int followersCount = -1;
149  *
150  *     reader.beginObject();
151  *     while (reader.hasNext()) {
152  *       String name = reader.nextName();
153  *       if (name.equals("name")) {
154  *         username = reader.nextString();
155  *       } else if (name.equals("followers_count")) {
156  *         followersCount = reader.nextInt();
157  *       } else {
158  *         reader.skipValue();
159  *       }
160  *     }
161  *     reader.endObject();
162  *     return new User(username, followersCount);
163  *   }}</pre>
164  *
165  * <h3>Number Handling</h3>
166  * This reader permits numeric values to be read as strings and string values to
167  * be read as numbers. For example, both elements of the JSON array {@code
168  * [1, "1"]} may be read using either {@link #nextInt} or {@link #nextString}.
169  * This behavior is intended to prevent lossy numeric conversions: double is
170  * JavaScript's only numeric type and very large values like {@code
171  * 9007199254740993} cannot be represented exactly on that platform. To minimize
172  * precision loss, extremely large values should be written and read as strings
173  * in JSON.
174  *
175  * <p>Each {@code JsonReader} may be used to read a single JSON stream. Instances
176  * of this class are not thread safe.
177  */
178 public final class JsonReader implements Closeable {
179 
180     private static final String TRUE = "true";
181     private static final String FALSE = "false";
182 
183     private final StringPool stringPool = new StringPool();
184 
185     /** The input JSON. */
186     private final Reader in;
187 
188     /** True to accept non-spec compliant JSON */
189     private boolean lenient = false;
190 
191     /**
192      * Use a manual buffer to easily read and unread upcoming characters, and
193      * also so we can create strings without an intermediate StringBuilder.
194      * We decode literals directly out of this buffer, so it must be at least as
195      * long as the longest token that can be reported as a number.
196      */
197     private final char[] buffer = new char[1024];
198     private int pos = 0;
199     private int limit = 0;
200 
201     /*
202      * The offset of the first character in the buffer.
203      */
204     private int bufferStartLine = 1;
205     private int bufferStartColumn = 1;
206 
207     private final List<JsonScope> stack = new ArrayList<JsonScope>();
208     {
209         push(JsonScope.EMPTY_DOCUMENT);
210     }
211 
212     /**
213      * The type of the next token to be returned by {@link #peek} and {@link
214      * #advance}. If null, peek() will assign a value.
215      */
216     private JsonToken token;
217 
218     /** The text of the next name. */
219     private String name;
220 
221     /*
222      * For the next literal value, we may have the text value, or the position
223      * and length in the buffer.
224      */
225     private String value;
226     private int valuePos;
227     private int valueLength;
228 
229     /** True if we're currently handling a skipValue() call. */
230     private boolean skipping = false;
231 
232     /**
233      * Creates a new instance that reads a JSON-encoded stream from {@code in}.
234      */
JsonReader(Reader in)235     public JsonReader(Reader in) {
236         if (in == null) {
237             throw new NullPointerException("in == null");
238         }
239         this.in = in;
240     }
241 
242     /**
243      * Configure this parser to be  be liberal in what it accepts. By default,
244      * this parser is strict and only accepts JSON as specified by <a
245      * href="http://www.ietf.org/rfc/rfc4627.txt">RFC 4627</a>. Setting the
246      * parser to lenient causes it to ignore the following syntax errors:
247      *
248      * <ul>
249      *   <li>End of line comments starting with {@code //} or {@code #} and
250      *       ending with a newline character.
251      *   <li>C-style comments starting with {@code /*} and ending with
252      *       {@code *}{@code /}. Such comments may not be nested.
253      *   <li>Names that are unquoted or {@code 'single quoted'}.
254      *   <li>Strings that are unquoted or {@code 'single quoted'}.
255      *   <li>Array elements separated by {@code ;} instead of {@code ,}.
256      *   <li>Unnecessary array separators. These are interpreted as if null
257      *       was the omitted value.
258      *   <li>Names and values separated by {@code =} or {@code =>} instead of
259      *       {@code :}.
260      *   <li>Name/value pairs separated by {@code ;} instead of {@code ,}.
261      * </ul>
262      */
setLenient(boolean lenient)263     public void setLenient(boolean lenient) {
264         this.lenient = lenient;
265     }
266 
267     /**
268      * Returns true if this parser is liberal in what it accepts.
269      */
isLenient()270     public boolean isLenient() {
271         return lenient;
272     }
273 
274     /**
275      * Consumes the next token from the JSON stream and asserts that it is the
276      * beginning of a new array.
277      */
beginArray()278     public void beginArray() throws IOException {
279         expect(JsonToken.BEGIN_ARRAY);
280     }
281 
282     /**
283      * Consumes the next token from the JSON stream and asserts that it is the
284      * end of the current array.
285      */
endArray()286     public void endArray() throws IOException {
287         expect(JsonToken.END_ARRAY);
288     }
289 
290     /**
291      * Consumes the next token from the JSON stream and asserts that it is the
292      * beginning of a new object.
293      */
beginObject()294     public void beginObject() throws IOException {
295         expect(JsonToken.BEGIN_OBJECT);
296     }
297 
298     /**
299      * Consumes the next token from the JSON stream and asserts that it is the
300      * end of the current object.
301      */
endObject()302     public void endObject() throws IOException {
303         expect(JsonToken.END_OBJECT);
304     }
305 
306     /**
307      * Consumes {@code expected}.
308      */
expect(JsonToken expected)309     private void expect(JsonToken expected) throws IOException {
310         peek();
311         if (token != expected) {
312             throw new IllegalStateException("Expected " + expected + " but was " + peek());
313         }
314         advance();
315     }
316 
317     /**
318      * Returns true if the current array or object has another element.
319      */
hasNext()320     public boolean hasNext() throws IOException {
321         peek();
322         return token != JsonToken.END_OBJECT && token != JsonToken.END_ARRAY;
323     }
324 
325     /**
326      * Returns the type of the next token without consuming it.
327      */
peek()328     public JsonToken peek() throws IOException {
329         if (token != null) {
330           return token;
331         }
332 
333         switch (peekStack()) {
334             case EMPTY_DOCUMENT:
335                 replaceTop(JsonScope.NONEMPTY_DOCUMENT);
336                 JsonToken firstToken = nextValue();
337                 if (!lenient && token != JsonToken.BEGIN_ARRAY && token != JsonToken.BEGIN_OBJECT) {
338                     throw new IOException(
339                             "Expected JSON document to start with '[' or '{' but was " + token);
340                 }
341                 return firstToken;
342             case EMPTY_ARRAY:
343                 return nextInArray(true);
344             case NONEMPTY_ARRAY:
345                 return nextInArray(false);
346             case EMPTY_OBJECT:
347                 return nextInObject(true);
348             case DANGLING_NAME:
349                 return objectValue();
350             case NONEMPTY_OBJECT:
351                 return nextInObject(false);
352             case NONEMPTY_DOCUMENT:
353                 try {
354                     JsonToken token = nextValue();
355                     if (lenient) {
356                         return token;
357                     }
358                     throw syntaxError("Expected EOF");
359                 } catch (EOFException e) {
360                     return token = JsonToken.END_DOCUMENT; // TODO: avoid throwing here?
361                 }
362             case CLOSED:
363                 throw new IllegalStateException("JsonReader is closed");
364             default:
365                 throw new AssertionError();
366         }
367     }
368 
369     /**
370      * Advances the cursor in the JSON stream to the next token.
371      */
advance()372     private JsonToken advance() throws IOException {
373         peek();
374 
375         JsonToken result = token;
376         token = null;
377         value = null;
378         name = null;
379         return result;
380     }
381 
382     /**
383      * Returns the next token, a {@link JsonToken#NAME property name}, and
384      * consumes it.
385      *
386      * @throws IOException if the next token in the stream is not a property
387      *     name.
388      */
nextName()389     public String nextName() throws IOException {
390         peek();
391         if (token != JsonToken.NAME) {
392             throw new IllegalStateException("Expected a name but was " + peek());
393         }
394         String result = name;
395         advance();
396         return result;
397     }
398 
399     /**
400      * Returns the {@link JsonToken#STRING string} value of the next token,
401      * consuming it. If the next token is a number, this method will return its
402      * string form.
403      *
404      * @throws IllegalStateException if the next token is not a string or if
405      *     this reader is closed.
406      */
nextString()407     public String nextString() throws IOException {
408         peek();
409         if (token != JsonToken.STRING && token != JsonToken.NUMBER) {
410             throw new IllegalStateException("Expected a string but was " + peek());
411         }
412 
413         String result = value;
414         advance();
415         return result;
416     }
417 
418     /**
419      * Returns the {@link JsonToken#BOOLEAN boolean} value of the next token,
420      * consuming it.
421      *
422      * @throws IllegalStateException if the next token is not a boolean or if
423      *     this reader is closed.
424      */
nextBoolean()425     public boolean nextBoolean() throws IOException {
426         peek();
427         if (token != JsonToken.BOOLEAN) {
428             throw new IllegalStateException("Expected a boolean but was " + token);
429         }
430 
431         boolean result = (value == TRUE);
432         advance();
433         return result;
434     }
435 
436     /**
437      * Consumes the next token from the JSON stream and asserts that it is a
438      * literal null.
439      *
440      * @throws IllegalStateException if the next token is not null or if this
441      *     reader is closed.
442      */
nextNull()443     public void nextNull() throws IOException {
444         peek();
445         if (token != JsonToken.NULL) {
446             throw new IllegalStateException("Expected null but was " + token);
447         }
448 
449         advance();
450     }
451 
452     /**
453      * Returns the {@link JsonToken#NUMBER double} value of the next token,
454      * consuming it. If the next token is a string, this method will attempt to
455      * parse it as a double using {@link Double#parseDouble(String)}.
456      *
457      * @throws IllegalStateException if the next token is not a literal value.
458      */
nextDouble()459     public double nextDouble() throws IOException {
460         peek();
461         if (token != JsonToken.STRING && token != JsonToken.NUMBER) {
462             throw new IllegalStateException("Expected a double but was " + token);
463         }
464 
465         double result = Double.parseDouble(value);
466         advance();
467         return result;
468     }
469 
470     /**
471      * Returns the {@link JsonToken#NUMBER long} value of the next token,
472      * consuming it. If the next token is a string, this method will attempt to
473      * parse it as a long. If the next token's numeric value cannot be exactly
474      * represented by a Java {@code long}, this method throws.
475      *
476      * @throws IllegalStateException if the next token is not a literal value.
477      * @throws NumberFormatException if the next literal value cannot be parsed
478      *     as a number, or exactly represented as a long.
479      */
nextLong()480     public long nextLong() throws IOException {
481         peek();
482         if (token != JsonToken.STRING && token != JsonToken.NUMBER) {
483             throw new IllegalStateException("Expected a long but was " + token);
484         }
485 
486         long result;
487         try {
488             result = Long.parseLong(value);
489         } catch (NumberFormatException ignored) {
490             double asDouble = Double.parseDouble(value); // don't catch this NumberFormatException
491             result = (long) asDouble;
492             if ((double) result != asDouble) {
493                 throw new NumberFormatException(value);
494             }
495         }
496 
497         advance();
498         return result;
499     }
500 
501     /**
502      * Returns the {@link JsonToken#NUMBER int} value of the next token,
503      * consuming it. If the next token is a string, this method will attempt to
504      * parse it as an int. If the next token's numeric value cannot be exactly
505      * represented by a Java {@code int}, this method throws.
506      *
507      * @throws IllegalStateException if the next token is not a literal value.
508      * @throws NumberFormatException if the next literal value cannot be parsed
509      *     as a number, or exactly represented as an int.
510      */
nextInt()511     public int nextInt() throws IOException {
512         peek();
513         if (token != JsonToken.STRING && token != JsonToken.NUMBER) {
514             throw new IllegalStateException("Expected an int but was " + token);
515         }
516 
517         int result;
518         try {
519             result = Integer.parseInt(value);
520         } catch (NumberFormatException ignored) {
521             double asDouble = Double.parseDouble(value); // don't catch this NumberFormatException
522             result = (int) asDouble;
523             if ((double) result != asDouble) {
524                 throw new NumberFormatException(value);
525             }
526         }
527 
528         advance();
529         return result;
530     }
531 
532     /**
533      * Closes this JSON reader and the underlying {@link Reader}.
534      */
close()535     public void close() throws IOException {
536         value = null;
537         token = null;
538         stack.clear();
539         stack.add(JsonScope.CLOSED);
540         in.close();
541     }
542 
543     /**
544      * Skips the next value recursively. If it is an object or array, all nested
545      * elements are skipped. This method is intended for use when the JSON token
546      * stream contains unrecognized or unhandled values.
547      */
skipValue()548     public void skipValue() throws IOException {
549         skipping = true;
550         try {
551             if (!hasNext() || peek() == JsonToken.END_DOCUMENT) {
552                 throw new IllegalStateException("No element left to skip");
553             }
554             int count = 0;
555             do {
556                 JsonToken token = advance();
557                 if (token == JsonToken.BEGIN_ARRAY || token == JsonToken.BEGIN_OBJECT) {
558                     count++;
559                 } else if (token == JsonToken.END_ARRAY || token == JsonToken.END_OBJECT) {
560                     count--;
561                 }
562             } while (count != 0);
563         } finally {
564             skipping = false;
565         }
566     }
567 
peekStack()568     private JsonScope peekStack() {
569         return stack.get(stack.size() - 1);
570     }
571 
pop()572     private JsonScope pop() {
573         return stack.remove(stack.size() - 1);
574     }
575 
push(JsonScope newTop)576     private void push(JsonScope newTop) {
577         stack.add(newTop);
578     }
579 
580     /**
581      * Replace the value on the top of the stack with the given value.
582      */
replaceTop(JsonScope newTop)583     private void replaceTop(JsonScope newTop) {
584         stack.set(stack.size() - 1, newTop);
585     }
586 
nextInArray(boolean firstElement)587     private JsonToken nextInArray(boolean firstElement) throws IOException {
588         if (firstElement) {
589             replaceTop(JsonScope.NONEMPTY_ARRAY);
590         } else {
591             /* Look for a comma before each element after the first element. */
592             switch (nextNonWhitespace()) {
593                 case ']':
594                     pop();
595                     return token = JsonToken.END_ARRAY;
596                 case ';':
597                     checkLenient(); // fall-through
598                 case ',':
599                     break;
600                 default:
601                     throw syntaxError("Unterminated array");
602             }
603         }
604 
605         switch (nextNonWhitespace()) {
606             case ']':
607                 if (firstElement) {
608                     pop();
609                     return token = JsonToken.END_ARRAY;
610                 }
611                 // fall-through to handle ",]"
612             case ';':
613             case ',':
614                 /* In lenient mode, a 0-length literal means 'null' */
615                 checkLenient();
616                 pos--;
617                 value = "null";
618                 return token = JsonToken.NULL;
619             default:
620                 pos--;
621                 return nextValue();
622         }
623     }
624 
nextInObject(boolean firstElement)625     private JsonToken nextInObject(boolean firstElement) throws IOException {
626         /*
627          * Read delimiters. Either a comma/semicolon separating this and the
628          * previous name-value pair, or a close brace to denote the end of the
629          * object.
630          */
631         if (firstElement) {
632             /* Peek to see if this is the empty object. */
633             switch (nextNonWhitespace()) {
634                 case '}':
635                     pop();
636                     return token = JsonToken.END_OBJECT;
637                 default:
638                     pos--;
639             }
640         } else {
641             switch (nextNonWhitespace()) {
642                 case '}':
643                     pop();
644                     return token = JsonToken.END_OBJECT;
645                 case ';':
646                 case ',':
647                     break;
648                 default:
649                     throw syntaxError("Unterminated object");
650             }
651         }
652 
653         /* Read the name. */
654         int quote = nextNonWhitespace();
655         switch (quote) {
656             case '\'':
657                 checkLenient(); // fall-through
658             case '"':
659                 name = nextString((char) quote);
660                 break;
661             default:
662                 checkLenient();
663                 pos--;
664                 name = nextLiteral(false);
665                 if (name.isEmpty()) {
666                     throw syntaxError("Expected name");
667                 }
668         }
669 
670         replaceTop(JsonScope.DANGLING_NAME);
671         return token = JsonToken.NAME;
672     }
673 
objectValue()674     private JsonToken objectValue() throws IOException {
675         /*
676          * Read the name/value separator. Usually a colon ':'. In lenient mode
677          * we also accept an equals sign '=', or an arrow "=>".
678          */
679         switch (nextNonWhitespace()) {
680             case ':':
681                 break;
682             case '=':
683                 checkLenient();
684                 if ((pos < limit || fillBuffer(1)) && buffer[pos] == '>') {
685                     pos++;
686                 }
687                 break;
688             default:
689                 throw syntaxError("Expected ':'");
690         }
691 
692         replaceTop(JsonScope.NONEMPTY_OBJECT);
693         return nextValue();
694     }
695 
nextValue()696     private JsonToken nextValue() throws IOException {
697         int c = nextNonWhitespace();
698         switch (c) {
699             case '{':
700                 push(JsonScope.EMPTY_OBJECT);
701                 return token = JsonToken.BEGIN_OBJECT;
702 
703             case '[':
704                 push(JsonScope.EMPTY_ARRAY);
705                 return token = JsonToken.BEGIN_ARRAY;
706 
707             case '\'':
708                 checkLenient(); // fall-through
709             case '"':
710                 value = nextString((char) c);
711                 return token = JsonToken.STRING;
712 
713             default:
714                 pos--;
715                 return readLiteral();
716         }
717     }
718 
719     /**
720      * Returns true once {@code limit - pos >= minimum}. If the data is
721      * exhausted before that many characters are available, this returns
722      * false.
723      */
fillBuffer(int minimum)724     private boolean fillBuffer(int minimum) throws IOException {
725         // Before clobbering the old characters, update where buffer starts
726         for (int i = 0; i < pos; i++) {
727             if (buffer[i] == '\n') {
728                 bufferStartLine++;
729                 bufferStartColumn = 1;
730             } else {
731                 bufferStartColumn++;
732             }
733         }
734 
735         if (limit != pos) {
736             limit -= pos;
737             System.arraycopy(buffer, pos, buffer, 0, limit);
738         } else {
739             limit = 0;
740         }
741 
742         pos = 0;
743         int total;
744         while ((total = in.read(buffer, limit, buffer.length - limit)) != -1) {
745             limit += total;
746 
747             // if this is the first read, consume an optional byte order mark (BOM) if it exists
748                 if (bufferStartLine == 1 && bufferStartColumn == 1
749                         && limit > 0 && buffer[0] == '\ufeff') {
750                 pos++;
751                 bufferStartColumn--;
752             }
753 
754             if (limit >= minimum) {
755                 return true;
756             }
757         }
758         return false;
759     }
760 
getLineNumber()761     private int getLineNumber() {
762         int result = bufferStartLine;
763         for (int i = 0; i < pos; i++) {
764             if (buffer[i] == '\n') {
765                 result++;
766             }
767         }
768         return result;
769     }
770 
getColumnNumber()771     private int getColumnNumber() {
772         int result = bufferStartColumn;
773         for (int i = 0; i < pos; i++) {
774             if (buffer[i] == '\n') {
775                 result = 1;
776             } else {
777                 result++;
778             }
779         }
780         return result;
781     }
782 
nextNonWhitespace()783     private int nextNonWhitespace() throws IOException {
784         while (pos < limit || fillBuffer(1)) {
785             int c = buffer[pos++];
786             switch (c) {
787                 case '\t':
788                 case ' ':
789                 case '\n':
790                 case '\r':
791                     continue;
792 
793                 case '/':
794                     if (pos == limit && !fillBuffer(1)) {
795                         return c;
796                     }
797 
798                     checkLenient();
799                     char peek = buffer[pos];
800                     switch (peek) {
801                         case '*':
802                             // skip a /* c-style comment */
803                             pos++;
804                             if (!skipTo("*/")) {
805                                 throw syntaxError("Unterminated comment");
806                             }
807                             pos += 2;
808                             continue;
809 
810                         case '/':
811                             // skip a // end-of-line comment
812                             pos++;
813                             skipToEndOfLine();
814                             continue;
815 
816                         default:
817                             return c;
818                     }
819 
820                 case '#':
821                     /*
822                      * Skip a # hash end-of-line comment. The JSON RFC doesn't
823                      * specify this behaviour, but it's required to parse
824                      * existing documents. See http://b/2571423.
825                      */
826                     checkLenient();
827                     skipToEndOfLine();
828                     continue;
829 
830                 default:
831                     return c;
832             }
833         }
834 
835         throw new EOFException("End of input");
836     }
837 
checkLenient()838     private void checkLenient() throws IOException {
839         if (!lenient) {
840             throw syntaxError("Use JsonReader.setLenient(true) to accept malformed JSON");
841         }
842     }
843 
844     /**
845      * Advances the position until after the next newline character. If the line
846      * is terminated by "\r\n", the '\n' must be consumed as whitespace by the
847      * caller.
848      */
skipToEndOfLine()849     private void skipToEndOfLine() throws IOException {
850         while (pos < limit || fillBuffer(1)) {
851             char c = buffer[pos++];
852             if (c == '\r' || c == '\n') {
853                 break;
854             }
855         }
856     }
857 
skipTo(String toFind)858     private boolean skipTo(String toFind) throws IOException {
859         outer:
860         for (; pos + toFind.length() <= limit || fillBuffer(toFind.length()); pos++) {
861             for (int c = 0; c < toFind.length(); c++) {
862                 if (buffer[pos + c] != toFind.charAt(c)) {
863                     continue outer;
864                 }
865             }
866             return true;
867         }
868         return false;
869     }
870 
871     /**
872      * Returns the string up to but not including {@code quote}, unescaping any
873      * character escape sequences encountered along the way. The opening quote
874      * should have already been read. This consumes the closing quote, but does
875      * not include it in the returned string.
876      *
877      * @param quote either ' or ".
878      * @throws NumberFormatException if any unicode escape sequences are
879      *     malformed.
880      */
nextString(char quote)881     private String nextString(char quote) throws IOException {
882         StringBuilder builder = null;
883         do {
884             /* the index of the first character not yet appended to the builder. */
885             int start = pos;
886             while (pos < limit) {
887                 int c = buffer[pos++];
888 
889                 if (c == quote) {
890                     if (skipping) {
891                         return "skipped!";
892                     } else if (builder == null) {
893                         return stringPool.get(buffer, start, pos - start - 1);
894                     } else {
895                         builder.append(buffer, start, pos - start - 1);
896                         return builder.toString();
897                     }
898 
899                 } else if (c == '\\') {
900                     if (builder == null) {
901                         builder = new StringBuilder();
902                     }
903                     builder.append(buffer, start, pos - start - 1);
904                     builder.append(readEscapeCharacter());
905                     start = pos;
906                 }
907             }
908 
909             if (builder == null) {
910                 builder = new StringBuilder();
911             }
912             builder.append(buffer, start, pos - start);
913         } while (fillBuffer(1));
914 
915         throw syntaxError("Unterminated string");
916     }
917 
918     /**
919      * Reads the value up to but not including any delimiter characters. This
920      * does not consume the delimiter character.
921      *
922      * @param assignOffsetsOnly true for this method to only set the valuePos
923      *     and valueLength fields and return a null result. This only works if
924      *     the literal is short; a string is returned otherwise.
925      */
nextLiteral(boolean assignOffsetsOnly)926     private String nextLiteral(boolean assignOffsetsOnly) throws IOException {
927         StringBuilder builder = null;
928         valuePos = -1;
929         valueLength = 0;
930         int i = 0;
931 
932         findNonLiteralCharacter:
933         while (true) {
934             for (; pos + i < limit; i++) {
935                 switch (buffer[pos + i]) {
936                 case '/':
937                 case '\\':
938                 case ';':
939                 case '#':
940                 case '=':
941                     checkLenient(); // fall-through
942                 case '{':
943                 case '}':
944                 case '[':
945                 case ']':
946                 case ':':
947                 case ',':
948                 case ' ':
949                 case '\t':
950                 case '\f':
951                 case '\r':
952                 case '\n':
953                     break findNonLiteralCharacter;
954                 }
955             }
956 
957             /*
958              * Attempt to load the entire literal into the buffer at once. If
959              * we run out of input, add a non-literal character at the end so
960              * that decoding doesn't need to do bounds checks.
961              */
962             if (i < buffer.length) {
963                 if (fillBuffer(i + 1)) {
964                     continue;
965                 } else {
966                     buffer[limit] = '\0';
967                     break;
968                 }
969             }
970 
971             // use a StringBuilder when the value is too long. It must be an unquoted string.
972             if (builder == null) {
973                 builder = new StringBuilder();
974             }
975             builder.append(buffer, pos, i);
976             valueLength += i;
977             pos += i;
978             i = 0;
979             if (!fillBuffer(1)) {
980                 break;
981             }
982         }
983 
984         String result;
985         if (assignOffsetsOnly && builder == null) {
986             valuePos = pos;
987             result = null;
988         } else if (skipping) {
989             result = "skipped!";
990         } else if (builder == null) {
991             result = stringPool.get(buffer, pos, i);
992         } else {
993             builder.append(buffer, pos, i);
994             result = builder.toString();
995         }
996         valueLength += i;
997         pos += i;
998         return result;
999     }
1000 
toString()1001     @Override public String toString() {
1002         return getClass().getSimpleName() + " near " + getSnippet();
1003     }
1004 
1005     /**
1006      * Unescapes the character identified by the character or characters that
1007      * immediately follow a backslash. The backslash '\' should have already
1008      * been read. This supports both unicode escapes "u000A" and two-character
1009      * escapes "\n".
1010      *
1011      * @throws NumberFormatException if any unicode escape sequences are
1012      *     malformed.
1013      */
readEscapeCharacter()1014     private char readEscapeCharacter() throws IOException {
1015         if (pos == limit && !fillBuffer(1)) {
1016             throw syntaxError("Unterminated escape sequence");
1017         }
1018 
1019         char escaped = buffer[pos++];
1020         switch (escaped) {
1021             case 'u':
1022                 if (pos + 4 > limit && !fillBuffer(4)) {
1023                     throw syntaxError("Unterminated escape sequence");
1024                 }
1025                 String hex = stringPool.get(buffer, pos, 4);
1026                 pos += 4;
1027                 return (char) Integer.parseInt(hex, 16);
1028 
1029             case 't':
1030                 return '\t';
1031 
1032             case 'b':
1033                 return '\b';
1034 
1035             case 'n':
1036                 return '\n';
1037 
1038             case 'r':
1039                 return '\r';
1040 
1041             case 'f':
1042                 return '\f';
1043 
1044             case '\'':
1045             case '"':
1046             case '\\':
1047             default:
1048                 return escaped;
1049         }
1050     }
1051 
1052     /**
1053      * Reads a null, boolean, numeric or unquoted string literal value.
1054      */
readLiteral()1055     private JsonToken readLiteral() throws IOException {
1056         value = nextLiteral(true);
1057         if (valueLength == 0) {
1058             throw syntaxError("Expected literal value");
1059         }
1060         token = decodeLiteral();
1061         if (token == JsonToken.STRING) {
1062           checkLenient();
1063         }
1064         return token;
1065     }
1066 
1067     /**
1068      * Assigns {@code nextToken} based on the value of {@code nextValue}.
1069      */
decodeLiteral()1070     private JsonToken decodeLiteral() throws IOException {
1071         if (valuePos == -1) {
1072             // it was too long to fit in the buffer so it can only be a string
1073             return JsonToken.STRING;
1074         } else if (valueLength == 4
1075                 && ('n' == buffer[valuePos    ] || 'N' == buffer[valuePos    ])
1076                 && ('u' == buffer[valuePos + 1] || 'U' == buffer[valuePos + 1])
1077                 && ('l' == buffer[valuePos + 2] || 'L' == buffer[valuePos + 2])
1078                 && ('l' == buffer[valuePos + 3] || 'L' == buffer[valuePos + 3])) {
1079             value = "null";
1080             return JsonToken.NULL;
1081         } else if (valueLength == 4
1082                 && ('t' == buffer[valuePos    ] || 'T' == buffer[valuePos    ])
1083                 && ('r' == buffer[valuePos + 1] || 'R' == buffer[valuePos + 1])
1084                 && ('u' == buffer[valuePos + 2] || 'U' == buffer[valuePos + 2])
1085                 && ('e' == buffer[valuePos + 3] || 'E' == buffer[valuePos + 3])) {
1086             value = TRUE;
1087             return JsonToken.BOOLEAN;
1088         } else if (valueLength == 5
1089                 && ('f' == buffer[valuePos    ] || 'F' == buffer[valuePos    ])
1090                 && ('a' == buffer[valuePos + 1] || 'A' == buffer[valuePos + 1])
1091                 && ('l' == buffer[valuePos + 2] || 'L' == buffer[valuePos + 2])
1092                 && ('s' == buffer[valuePos + 3] || 'S' == buffer[valuePos + 3])
1093                 && ('e' == buffer[valuePos + 4] || 'E' == buffer[valuePos + 4])) {
1094             value = FALSE;
1095             return JsonToken.BOOLEAN;
1096         } else {
1097             value = stringPool.get(buffer, valuePos, valueLength);
1098             return decodeNumber(buffer, valuePos, valueLength);
1099         }
1100     }
1101 
1102     /**
1103      * Determine whether the characters is a JSON number. Numbers are of the
1104      * form -12.34e+56. Fractional and exponential parts are optional. Leading
1105      * zeroes are not allowed in the value or exponential part, but are allowed
1106      * in the fraction.
1107      */
decodeNumber(char[] chars, int offset, int length)1108     private JsonToken decodeNumber(char[] chars, int offset, int length) {
1109         int i = offset;
1110         int c = chars[i];
1111 
1112         if (c == '-') {
1113             c = chars[++i];
1114         }
1115 
1116         if (c == '0') {
1117             c = chars[++i];
1118         } else if (c >= '1' && c <= '9') {
1119             c = chars[++i];
1120             while (c >= '0' && c <= '9') {
1121                 c = chars[++i];
1122             }
1123         } else {
1124             return JsonToken.STRING;
1125         }
1126 
1127         if (c == '.') {
1128             c = chars[++i];
1129             while (c >= '0' && c <= '9') {
1130                 c = chars[++i];
1131             }
1132         }
1133 
1134         if (c == 'e' || c == 'E') {
1135             c = chars[++i];
1136             if (c == '+' || c == '-') {
1137                 c = chars[++i];
1138             }
1139             if (c >= '0' && c <= '9') {
1140                 c = chars[++i];
1141                 while (c >= '0' && c <= '9') {
1142                     c = chars[++i];
1143                 }
1144             } else {
1145                 return JsonToken.STRING;
1146             }
1147         }
1148 
1149         if (i == offset + length) {
1150             return JsonToken.NUMBER;
1151         } else {
1152             return JsonToken.STRING;
1153         }
1154     }
1155 
1156     /**
1157      * Throws a new IO exception with the given message and a context snippet
1158      * with this reader's content.
1159      */
syntaxError(String message)1160     private IOException syntaxError(String message) throws IOException {
1161         throw new MalformedJsonException(message
1162                 + " at line " + getLineNumber() + " column " + getColumnNumber());
1163     }
1164 
getSnippet()1165     private CharSequence getSnippet() {
1166         StringBuilder snippet = new StringBuilder();
1167         int beforePos = Math.min(pos, 20);
1168         snippet.append(buffer, pos - beforePos, beforePos);
1169         int afterPos = Math.min(limit - pos, 20);
1170         snippet.append(buffer, pos, afterPos);
1171         return snippet;
1172     }
1173 }
1174