1 // Tencent is pleased to support the open source community by making RapidJSON available.
2 //
3 // Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
4 //
5 // Licensed under the MIT License (the "License"); you may not use this file except
6 // in compliance with the License. You may obtain a copy of the License at
7 //
8 // http://opensource.org/licenses/MIT
9 //
10 // Unless required by applicable law or agreed to in writing, software distributed
11 // under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
13 // specific language governing permissions and limitations under the License.
14 
15 #ifndef RAPIDJSON_ENCODINGS_H_
16 #define RAPIDJSON_ENCODINGS_H_
17 
18 #include "rapidjson.h"
19 
20 #ifdef _MSC_VER
21 RAPIDJSON_DIAG_PUSH
22 RAPIDJSON_DIAG_OFF(4244) // conversion from 'type1' to 'type2', possible loss of data
23 RAPIDJSON_DIAG_OFF(4702)  // unreachable code
24 #elif defined(__GNUC__)
25 RAPIDJSON_DIAG_PUSH
26 RAPIDJSON_DIAG_OFF(effc++)
27 RAPIDJSON_DIAG_OFF(overflow)
28 #endif
29 
30 RAPIDJSON_NAMESPACE_BEGIN
31 
32 ///////////////////////////////////////////////////////////////////////////////
33 // Encoding
34 
35 /*! \class rapidjson::Encoding
36     \brief Concept for encoding of Unicode characters.
37 
38 \code
39 concept Encoding {
40     typename Ch;    //! Type of character. A "character" is actually a code unit in unicode's definition.
41 
42     enum { supportUnicode = 1 }; // or 0 if not supporting unicode
43 
44     //! \brief Encode a Unicode codepoint to an output stream.
45     //! \param os Output stream.
46     //! \param codepoint An unicode codepoint, ranging from 0x0 to 0x10FFFF inclusively.
47     template<typename OutputStream>
48     static void Encode(OutputStream& os, unsigned codepoint);
49 
50     //! \brief Decode a Unicode codepoint from an input stream.
51     //! \param is Input stream.
52     //! \param codepoint Output of the unicode codepoint.
53     //! \return true if a valid codepoint can be decoded from the stream.
54     template <typename InputStream>
55     static bool Decode(InputStream& is, unsigned* codepoint);
56 
57     //! \brief Validate one Unicode codepoint from an encoded stream.
58     //! \param is Input stream to obtain codepoint.
59     //! \param os Output for copying one codepoint.
60     //! \return true if it is valid.
61     //! \note This function just validating and copying the codepoint without actually decode it.
62     template <typename InputStream, typename OutputStream>
63     static bool Validate(InputStream& is, OutputStream& os);
64 
65     // The following functions are deal with byte streams.
66 
67     //! Take a character from input byte stream, skip BOM if exist.
68     template <typename InputByteStream>
69     static CharType TakeBOM(InputByteStream& is);
70 
71     //! Take a character from input byte stream.
72     template <typename InputByteStream>
73     static Ch Take(InputByteStream& is);
74 
75     //! Put BOM to output byte stream.
76     template <typename OutputByteStream>
77     static void PutBOM(OutputByteStream& os);
78 
79     //! Put a character to output byte stream.
80     template <typename OutputByteStream>
81     static void Put(OutputByteStream& os, Ch c);
82 };
83 \endcode
84 */
85 
86 ///////////////////////////////////////////////////////////////////////////////
87 // UTF8
88 
89 //! UTF-8 encoding.
90 /*! http://en.wikipedia.org/wiki/UTF-8
91     http://tools.ietf.org/html/rfc3629
92     \tparam CharType Code unit for storing 8-bit UTF-8 data. Default is char.
93     \note implements Encoding concept
94 */
95 template<typename CharType = char>
96 struct UTF8 {
97     typedef CharType Ch;
98 
99     enum { supportUnicode = 1 };
100 
101     template<typename OutputStream>
EncodeUTF8102     static void Encode(OutputStream& os, unsigned codepoint) {
103         if (codepoint <= 0x7F)
104             os.Put(static_cast<Ch>(codepoint & 0xFF));
105         else if (codepoint <= 0x7FF) {
106             os.Put(static_cast<Ch>(0xC0 | ((codepoint >> 6) & 0xFF)));
107             os.Put(static_cast<Ch>(0x80 | ((codepoint & 0x3F))));
108         }
109         else if (codepoint <= 0xFFFF) {
110             os.Put(static_cast<Ch>(0xE0 | ((codepoint >> 12) & 0xFF)));
111             os.Put(static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F)));
112             os.Put(static_cast<Ch>(0x80 | (codepoint & 0x3F)));
113         }
114         else {
115             RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
116             os.Put(static_cast<Ch>(0xF0 | ((codepoint >> 18) & 0xFF)));
117             os.Put(static_cast<Ch>(0x80 | ((codepoint >> 12) & 0x3F)));
118             os.Put(static_cast<Ch>(0x80 | ((codepoint >> 6) & 0x3F)));
119             os.Put(static_cast<Ch>(0x80 | (codepoint & 0x3F)));
120         }
121     }
122 
123     template <typename InputStream>
DecodeUTF8124     static bool Decode(InputStream& is, unsigned* codepoint) {
125 #define COPY() c = is.Take(); *codepoint = (*codepoint << 6) | ((unsigned char)c & 0x3Fu)
126 #define TRANS(mask) result &= ((GetRange((unsigned char)c) & (mask)) != 0)
127 #define TAIL() COPY(); TRANS(0x70)
128         Ch c = is.Take();
129         if (!(c & 0x80)) {
130             *codepoint = (unsigned char)c;
131             return true;
132         }
133 
134         unsigned char type = GetRange((unsigned char)c);
135         *codepoint = (0xFF >> type) & (unsigned char)c;
136         bool result = true;
137         switch (type) {
138         case 2: TAIL(); return result;
139         case 3: TAIL(); TAIL(); return result;
140         case 4: COPY(); TRANS(0x50); TAIL(); return result;
141         case 5: COPY(); TRANS(0x10); TAIL(); TAIL(); return result;
142         case 6: TAIL(); TAIL(); TAIL(); return result;
143         case 10: COPY(); TRANS(0x20); TAIL(); return result;
144         case 11: COPY(); TRANS(0x60); TAIL(); TAIL(); return result;
145         default: return false;
146         }
147 #undef COPY
148 #undef TRANS
149 #undef TAIL
150     }
151 
152     template <typename InputStream, typename OutputStream>
ValidateUTF8153     static bool Validate(InputStream& is, OutputStream& os) {
154 #define COPY() os.Put(c = is.Take())
155 #define TRANS(mask) result &= ((GetRange((unsigned char)c) & (mask)) != 0)
156 #define TAIL() COPY(); TRANS(0x70)
157         Ch c;
158         COPY();
159         if (!(c & 0x80))
160             return true;
161 
162         bool result = true;
163         switch (GetRange((unsigned char)c)) {
164         case 2: TAIL(); return result;
165         case 3: TAIL(); TAIL(); return result;
166         case 4: COPY(); TRANS(0x50); TAIL(); return result;
167         case 5: COPY(); TRANS(0x10); TAIL(); TAIL(); return result;
168         case 6: TAIL(); TAIL(); TAIL(); return result;
169         case 10: COPY(); TRANS(0x20); TAIL(); return result;
170         case 11: COPY(); TRANS(0x60); TAIL(); TAIL(); return result;
171         default: return false;
172         }
173 #undef COPY
174 #undef TRANS
175 #undef TAIL
176     }
177 
GetRangeUTF8178     static unsigned char GetRange(unsigned char c) {
179         // Referring to DFA of http://bjoern.hoehrmann.de/utf-8/decoder/dfa/
180         // With new mapping 1 -> 0x10, 7 -> 0x20, 9 -> 0x40, such that AND operation can test multiple types.
181         static const unsigned char type[] = {
182             0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
183             0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
184             0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
185             0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,  0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
186             0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,0x10,
187             0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,0x40,
188             0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,
189             0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,0x20,
190             8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,  2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
191             10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8,
192         };
193         return type[c];
194     }
195 
196     template <typename InputByteStream>
TakeBOMUTF8197     static CharType TakeBOM(InputByteStream& is) {
198         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
199         Ch c = Take(is);
200         if ((unsigned char)c != 0xEFu) return c;
201         c = is.Take();
202         if ((unsigned char)c != 0xBBu) return c;
203         c = is.Take();
204         if ((unsigned char)c != 0xBFu) return c;
205         c = is.Take();
206         return c;
207     }
208 
209     template <typename InputByteStream>
TakeUTF8210     static Ch Take(InputByteStream& is) {
211         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
212         return is.Take();
213     }
214 
215     template <typename OutputByteStream>
PutBOMUTF8216     static void PutBOM(OutputByteStream& os) {
217         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
218         os.Put(0xEFu); os.Put(0xBBu); os.Put(0xBFu);
219     }
220 
221     template <typename OutputByteStream>
PutUTF8222     static void Put(OutputByteStream& os, Ch c) {
223         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
224         os.Put(static_cast<typename OutputByteStream::Ch>(c));
225     }
226 };
227 
228 ///////////////////////////////////////////////////////////////////////////////
229 // UTF16
230 
231 //! UTF-16 encoding.
232 /*! http://en.wikipedia.org/wiki/UTF-16
233     http://tools.ietf.org/html/rfc2781
234     \tparam CharType Type for storing 16-bit UTF-16 data. Default is wchar_t. C++11 may use char16_t instead.
235     \note implements Encoding concept
236 
237     \note For in-memory access, no need to concern endianness. The code units and code points are represented by CPU's endianness.
238     For streaming, use UTF16LE and UTF16BE, which handle endianness.
239 */
240 template<typename CharType = wchar_t>
241 struct UTF16 {
242     typedef CharType Ch;
243     RAPIDJSON_STATIC_ASSERT(sizeof(Ch) >= 2);
244 
245     enum { supportUnicode = 1 };
246 
247     template<typename OutputStream>
EncodeUTF16248     static void Encode(OutputStream& os, unsigned codepoint) {
249         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 2);
250         if (codepoint <= 0xFFFF) {
251             RAPIDJSON_ASSERT(codepoint < 0xD800 || codepoint > 0xDFFF); // Code point itself cannot be surrogate pair
252             os.Put(static_cast<typename OutputStream::Ch>(codepoint));
253         }
254         else {
255             RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
256             unsigned v = codepoint - 0x10000;
257             os.Put(static_cast<typename OutputStream::Ch>((v >> 10) | 0xD800));
258             os.Put((v & 0x3FF) | 0xDC00);
259         }
260     }
261 
262     template <typename InputStream>
DecodeUTF16263     static bool Decode(InputStream& is, unsigned* codepoint) {
264         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 2);
265         Ch c = is.Take();
266         if (c < 0xD800 || c > 0xDFFF) {
267             *codepoint = c;
268             return true;
269         }
270         else if (c <= 0xDBFF) {
271             *codepoint = (c & 0x3FF) << 10;
272             c = is.Take();
273             *codepoint |= (c & 0x3FF);
274             *codepoint += 0x10000;
275             return c >= 0xDC00 && c <= 0xDFFF;
276         }
277         return false;
278     }
279 
280     template <typename InputStream, typename OutputStream>
ValidateUTF16281     static bool Validate(InputStream& is, OutputStream& os) {
282         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 2);
283         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 2);
284         Ch c;
285         os.Put(c = is.Take());
286         if (c < 0xD800 || c > 0xDFFF)
287             return true;
288         else if (c <= 0xDBFF) {
289             os.Put(c = is.Take());
290             return c >= 0xDC00 && c <= 0xDFFF;
291         }
292         return false;
293     }
294 };
295 
296 //! UTF-16 little endian encoding.
297 template<typename CharType = wchar_t>
298 struct UTF16LE : UTF16<CharType> {
299     template <typename InputByteStream>
TakeBOMUTF16LE300     static CharType TakeBOM(InputByteStream& is) {
301         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
302         CharType c = Take(is);
303         return (unsigned short)c == 0xFEFFu ? Take(is) : c;
304     }
305 
306     template <typename InputByteStream>
TakeUTF16LE307     static CharType Take(InputByteStream& is) {
308         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
309         CharType c = (unsigned char)is.Take();
310         c |= (unsigned char)is.Take() << 8;
311         return c;
312     }
313 
314     template <typename OutputByteStream>
PutBOMUTF16LE315     static void PutBOM(OutputByteStream& os) {
316         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
317         os.Put(0xFFu); os.Put(0xFEu);
318     }
319 
320     template <typename OutputByteStream>
PutUTF16LE321     static void Put(OutputByteStream& os, CharType c) {
322         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
323         os.Put(c & 0xFFu);
324         os.Put((c >> 8) & 0xFFu);
325     }
326 };
327 
328 //! UTF-16 big endian encoding.
329 template<typename CharType = wchar_t>
330 struct UTF16BE : UTF16<CharType> {
331     template <typename InputByteStream>
TakeBOMUTF16BE332     static CharType TakeBOM(InputByteStream& is) {
333         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
334         CharType c = Take(is);
335         return (unsigned short)c == 0xFEFFu ? Take(is) : c;
336     }
337 
338     template <typename InputByteStream>
TakeUTF16BE339     static CharType Take(InputByteStream& is) {
340         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
341         CharType c = (unsigned char)is.Take() << 8;
342         c |= (unsigned char)is.Take();
343         return c;
344     }
345 
346     template <typename OutputByteStream>
PutBOMUTF16BE347     static void PutBOM(OutputByteStream& os) {
348         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
349         os.Put(0xFEu); os.Put(0xFFu);
350     }
351 
352     template <typename OutputByteStream>
PutUTF16BE353     static void Put(OutputByteStream& os, CharType c) {
354         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
355         os.Put((c >> 8) & 0xFFu);
356         os.Put(c & 0xFFu);
357     }
358 };
359 
360 ///////////////////////////////////////////////////////////////////////////////
361 // UTF32
362 
363 //! UTF-32 encoding.
364 /*! http://en.wikipedia.org/wiki/UTF-32
365     \tparam CharType Type for storing 32-bit UTF-32 data. Default is unsigned. C++11 may use char32_t instead.
366     \note implements Encoding concept
367 
368     \note For in-memory access, no need to concern endianness. The code units and code points are represented by CPU's endianness.
369     For streaming, use UTF32LE and UTF32BE, which handle endianness.
370 */
371 template<typename CharType = unsigned>
372 struct UTF32 {
373     typedef CharType Ch;
374     RAPIDJSON_STATIC_ASSERT(sizeof(Ch) >= 4);
375 
376     enum { supportUnicode = 1 };
377 
378     template<typename OutputStream>
EncodeUTF32379     static void Encode(OutputStream& os, unsigned codepoint) {
380         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputStream::Ch) >= 4);
381         RAPIDJSON_ASSERT(codepoint <= 0x10FFFF);
382         os.Put(codepoint);
383     }
384 
385     template <typename InputStream>
DecodeUTF32386     static bool Decode(InputStream& is, unsigned* codepoint) {
387         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 4);
388         Ch c = is.Take();
389         *codepoint = c;
390         return c <= 0x10FFFF;
391     }
392 
393     template <typename InputStream, typename OutputStream>
ValidateUTF32394     static bool Validate(InputStream& is, OutputStream& os) {
395         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputStream::Ch) >= 4);
396         Ch c;
397         os.Put(c = is.Take());
398         return c <= 0x10FFFF;
399     }
400 };
401 
402 //! UTF-32 little endian enocoding.
403 template<typename CharType = unsigned>
404 struct UTF32LE : UTF32<CharType> {
405     template <typename InputByteStream>
TakeBOMUTF32LE406     static CharType TakeBOM(InputByteStream& is) {
407         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
408         CharType c = Take(is);
409         return (unsigned)c == 0x0000FEFFu ? Take(is) : c;
410     }
411 
412     template <typename InputByteStream>
TakeUTF32LE413     static CharType Take(InputByteStream& is) {
414         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
415         CharType c = (unsigned char)is.Take();
416         c |= (unsigned char)is.Take() << 8;
417         c |= (unsigned char)is.Take() << 16;
418         c |= (unsigned char)is.Take() << 24;
419         return c;
420     }
421 
422     template <typename OutputByteStream>
PutBOMUTF32LE423     static void PutBOM(OutputByteStream& os) {
424         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
425         os.Put(0xFFu); os.Put(0xFEu); os.Put(0x00u); os.Put(0x00u);
426     }
427 
428     template <typename OutputByteStream>
PutUTF32LE429     static void Put(OutputByteStream& os, CharType c) {
430         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
431         os.Put(c & 0xFFu);
432         os.Put((c >> 8) & 0xFFu);
433         os.Put((c >> 16) & 0xFFu);
434         os.Put((c >> 24) & 0xFFu);
435     }
436 };
437 
438 //! UTF-32 big endian encoding.
439 template<typename CharType = unsigned>
440 struct UTF32BE : UTF32<CharType> {
441     template <typename InputByteStream>
TakeBOMUTF32BE442     static CharType TakeBOM(InputByteStream& is) {
443         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
444         CharType c = Take(is);
445         return (unsigned)c == 0x0000FEFFu ? Take(is) : c;
446     }
447 
448     template <typename InputByteStream>
TakeUTF32BE449     static CharType Take(InputByteStream& is) {
450         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
451         CharType c = (unsigned char)is.Take() << 24;
452         c |= (unsigned char)is.Take() << 16;
453         c |= (unsigned char)is.Take() << 8;
454         c |= (unsigned char)is.Take();
455         return c;
456     }
457 
458     template <typename OutputByteStream>
PutBOMUTF32BE459     static void PutBOM(OutputByteStream& os) {
460         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
461         os.Put(0x00u); os.Put(0x00u); os.Put(0xFEu); os.Put(0xFFu);
462     }
463 
464     template <typename OutputByteStream>
PutUTF32BE465     static void Put(OutputByteStream& os, CharType c) {
466         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
467         os.Put((c >> 24) & 0xFFu);
468         os.Put((c >> 16) & 0xFFu);
469         os.Put((c >> 8) & 0xFFu);
470         os.Put(c & 0xFFu);
471     }
472 };
473 
474 ///////////////////////////////////////////////////////////////////////////////
475 // ASCII
476 
477 //! ASCII encoding.
478 /*! http://en.wikipedia.org/wiki/ASCII
479     \tparam CharType Code unit for storing 7-bit ASCII data. Default is char.
480     \note implements Encoding concept
481 */
482 template<typename CharType = char>
483 struct ASCII {
484     typedef CharType Ch;
485 
486     enum { supportUnicode = 0 };
487 
488     template<typename OutputStream>
EncodeASCII489     static void Encode(OutputStream& os, unsigned codepoint) {
490         RAPIDJSON_ASSERT(codepoint <= 0x7F);
491         os.Put(static_cast<Ch>(codepoint & 0xFF));
492     }
493 
494     template <typename InputStream>
DecodeASCII495     static bool Decode(InputStream& is, unsigned* codepoint) {
496         unsigned char c = static_cast<unsigned char>(is.Take());
497         *codepoint = c;
498         return c <= 0X7F;
499     }
500 
501     template <typename InputStream, typename OutputStream>
ValidateASCII502     static bool Validate(InputStream& is, OutputStream& os) {
503         unsigned char c = is.Take();
504         os.Put(c);
505         return c <= 0x7F;
506     }
507 
508     template <typename InputByteStream>
TakeBOMASCII509     static CharType TakeBOM(InputByteStream& is) {
510         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
511         Ch c = Take(is);
512         return c;
513     }
514 
515     template <typename InputByteStream>
TakeASCII516     static Ch Take(InputByteStream& is) {
517         RAPIDJSON_STATIC_ASSERT(sizeof(typename InputByteStream::Ch) == 1);
518         return is.Take();
519     }
520 
521     template <typename OutputByteStream>
PutBOMASCII522     static void PutBOM(OutputByteStream& os) {
523         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
524         (void)os;
525     }
526 
527     template <typename OutputByteStream>
PutASCII528     static void Put(OutputByteStream& os, Ch c) {
529         RAPIDJSON_STATIC_ASSERT(sizeof(typename OutputByteStream::Ch) == 1);
530         os.Put(static_cast<typename OutputByteStream::Ch>(c));
531     }
532 };
533 
534 ///////////////////////////////////////////////////////////////////////////////
535 // AutoUTF
536 
537 //! Runtime-specified UTF encoding type of a stream.
538 enum UTFType {
539     kUTF8 = 0,      //!< UTF-8.
540     kUTF16LE = 1,   //!< UTF-16 little endian.
541     kUTF16BE = 2,   //!< UTF-16 big endian.
542     kUTF32LE = 3,   //!< UTF-32 little endian.
543     kUTF32BE = 4    //!< UTF-32 big endian.
544 };
545 
546 //! Dynamically select encoding according to stream's runtime-specified UTF encoding type.
547 /*! \note This class can be used with AutoUTFInputtStream and AutoUTFOutputStream, which provides GetType().
548 */
549 template<typename CharType>
550 struct AutoUTF {
551     typedef CharType Ch;
552 
553     enum { supportUnicode = 1 };
554 
555 #define RAPIDJSON_ENCODINGS_FUNC(x) UTF8<Ch>::x, UTF16LE<Ch>::x, UTF16BE<Ch>::x, UTF32LE<Ch>::x, UTF32BE<Ch>::x
556 
557     template<typename OutputStream>
EncodeAutoUTF558     RAPIDJSON_FORCEINLINE static void Encode(OutputStream& os, unsigned codepoint) {
559         typedef void (*EncodeFunc)(OutputStream&, unsigned);
560         static const EncodeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Encode) };
561         (*f[os.GetType()])(os, codepoint);
562     }
563 
564     template <typename InputStream>
DecodeAutoUTF565     RAPIDJSON_FORCEINLINE static bool Decode(InputStream& is, unsigned* codepoint) {
566         typedef bool (*DecodeFunc)(InputStream&, unsigned*);
567         static const DecodeFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Decode) };
568         return (*f[is.GetType()])(is, codepoint);
569     }
570 
571     template <typename InputStream, typename OutputStream>
ValidateAutoUTF572     RAPIDJSON_FORCEINLINE static bool Validate(InputStream& is, OutputStream& os) {
573         typedef bool (*ValidateFunc)(InputStream&, OutputStream&);
574         static const ValidateFunc f[] = { RAPIDJSON_ENCODINGS_FUNC(Validate) };
575         return (*f[is.GetType()])(is, os);
576     }
577 
578 #undef RAPIDJSON_ENCODINGS_FUNC
579 };
580 
581 ///////////////////////////////////////////////////////////////////////////////
582 // Transcoder
583 
584 //! Encoding conversion.
585 template<typename SourceEncoding, typename TargetEncoding>
586 struct Transcoder {
587     //! Take one Unicode codepoint from source encoding, convert it to target encoding and put it to the output stream.
588     template<typename InputStream, typename OutputStream>
TranscodeTranscoder589     RAPIDJSON_FORCEINLINE static bool Transcode(InputStream& is, OutputStream& os) {
590         unsigned codepoint;
591         if (!SourceEncoding::Decode(is, &codepoint))
592             return false;
593         TargetEncoding::Encode(os, codepoint);
594         return true;
595     }
596 
597     //! Validate one Unicode codepoint from an encoded stream.
598     template<typename InputStream, typename OutputStream>
ValidateTranscoder599     RAPIDJSON_FORCEINLINE static bool Validate(InputStream& is, OutputStream& os) {
600         return Transcode(is, os);   // Since source/target encoding is different, must transcode.
601     }
602 };
603 
604 //! Specialization of Transcoder with same source and target encoding.
605 template<typename Encoding>
606 struct Transcoder<Encoding, Encoding> {
607     template<typename InputStream, typename OutputStream>
608     RAPIDJSON_FORCEINLINE static bool Transcode(InputStream& is, OutputStream& os) {
609         os.Put(is.Take());  // Just copy one code unit. This semantic is different from primary template class.
610         return true;
611     }
612 
613     template<typename InputStream, typename OutputStream>
614     RAPIDJSON_FORCEINLINE static bool Validate(InputStream& is, OutputStream& os) {
615         return Encoding::Validate(is, os);  // source/target encoding are the same
616     }
617 };
618 
619 RAPIDJSON_NAMESPACE_END
620 
621 #if defined(__GNUC__) || defined(_MSV_VER)
622 RAPIDJSON_DIAG_POP
623 #endif
624 
625 #endif // RAPIDJSON_ENCODINGS_H_
626