1 /*
2  * Copyright (C) 2018 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package com.android.powermodel;
18 
19 import java.io.InputStream;
20 import java.io.IOException;
21 import java.nio.charset.Charset;
22 import java.nio.charset.StandardCharsets;
23 import java.util.ArrayList;
24 
25 /**
26  * Parses CSV.
27  * <p>
28  * Call parse() with an InputStream.
29  * <p>
30  * CsvLineProcessor.onLine() will be called for each line in the source document.
31  * <p>
32  * To simplify parsing and to protect against using too much memory for bad
33  * data, the maximum field length is {@link #MAX_FIELD_SIZE}.
34  */
35 class CsvParser {
36     /**
37      * The maximum size of a single field in bytes.
38      */
39     public static final int MAX_FIELD_SIZE = (8*1024)-1;
40 
41     /**
42      * Callback interface for each line of CSV as it is parsed.
43      */
44     interface LineProcessor {
45         /**
46          * A line of CSV was parsed.
47          *
48          * @param lineNumber the line number in the file, starting at 1
49          * @param fields the comma separated fields for the line
50          */
onLine(int lineNumber, ArrayList<String> fields)51         void onLine(int lineNumber, ArrayList<String> fields) throws ParseException;
52     }
53 
54     /**
55      * Parse the CSV text in input, calling onto processor for each row.
56      */
parse(InputStream input, LineProcessor processor)57     public static void parse(InputStream input, LineProcessor processor)
58             throws IOException, ParseException {
59         final Charset utf8 = StandardCharsets.UTF_8;
60         final byte[] buf = new byte[MAX_FIELD_SIZE+1];
61         int lineNumber = 1;
62         int readPos = 0;
63         int prev = 0;
64         ArrayList<String> fields = new ArrayList<String>();
65         boolean finalBuffer = false;
66         boolean escaping = false;
67         boolean sawQuote = false;
68 
69         while (!finalBuffer) {
70             int amt = input.read(buf, readPos, buf.length-readPos);
71             if (amt < 0) {
72                 // No more data. Process whatever's left from before.
73                 amt = readPos;
74                 finalBuffer = true;
75             } else {
76                 // Process whatever's left from before, plus the new data.
77                 amt += readPos;
78                 finalBuffer = false;
79             }
80 
81             // Process as much of this buffer as we can.
82             int fieldStart = 0;
83             int index = readPos;
84             int escapeIndex = escaping ? readPos : -1;
85             while (index < amt) {
86                 byte c = buf[index];
87                 if (c == '\r' || c == '\n') {
88                     if (escaping) {
89                         // TODO: Quotes do not escape newlines in our CSV dialect,
90                         // but we actually see some data where it should.
91                         fields.add(new String(buf, fieldStart, escapeIndex-fieldStart));
92                         escapeIndex = -1;
93                         escaping = false;
94                         sawQuote = false;
95                     } else {
96                         fields.add(new String(buf, fieldStart, index-fieldStart));
97                     }
98                     // Don't report blank lines
99                     if (fields.size() > 1 || (fields.size() == 1 && fields.get(0).length() > 0)) {
100                         processor.onLine(lineNumber, fields);
101                     }
102                     fields = new ArrayList<String>();
103                     if (!(c == '\n' && prev == '\r')) {
104                         // Don't double increment for dos line endings.
105                         lineNumber++;
106                     }
107                     fieldStart = index = index + 1;
108                 } else {
109                     if (escaping) {
110                         // Field started with a " so quotes are escaped with " and commas
111                         // don't matter except when following a single quote.
112                         if (c == '"') {
113                             if (sawQuote) {
114                                 buf[escapeIndex] = buf[index];
115                                 escapeIndex++;
116                                 sawQuote = false;
117                             } else {
118                                 sawQuote = true;
119                             }
120                             index++;
121                         } else if (sawQuote && c == ',') {
122                             fields.add(new String(buf, fieldStart, escapeIndex-fieldStart));
123                             fieldStart = index = index + 1;
124                             escapeIndex = -1;
125                             escaping = false;
126                             sawQuote = false;
127                         } else {
128                             buf[escapeIndex] = buf[index];
129                             escapeIndex++;
130                             index++;
131                             sawQuote = false;
132                         }
133                     } else {
134                         if (c == ',') {
135                             fields.add(new String(buf, fieldStart, index-fieldStart));
136                             fieldStart = index + 1;
137                         } else if (c == '"' && fieldStart == index) {
138                             // First character is a "
139                             escaping = true;
140                             fieldStart = escapeIndex = index + 1;
141                         }
142                         index++;
143                     }
144                 }
145                 prev = c;
146             }
147 
148             // A single field is greater than buf.length, so fail.
149             if (fieldStart == 0 && index == buf.length) {
150                 throw new ParseException(lineNumber, "Line is too long: "
151                         + new String(buf, 0, 20, utf8) + "...");
152             }
153 
154             // Move whatever we didn't process to the beginning of the buffer
155             // and try again.
156             if (fieldStart != amt) {
157                 readPos = (escaping ? escapeIndex : index) - fieldStart;
158                 System.arraycopy(buf, fieldStart, buf, 0, readPos);
159             } else {
160                 readPos = 0;
161             }
162 
163             // Process whatever's left over
164             if (finalBuffer) {
165                 fields.add(new String(buf, 0, readPos));
166                 // If there is any content, return the last line.
167                 if (fields.size() > 1 || (fields.size() == 1 && fields.get(0).length() > 0)) {
168                     processor.onLine(lineNumber, fields);
169                 }
170             }
171         }
172     }
173 }
174