1 /* 2 * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. 3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. 4 * 5 * This code is free software; you can redistribute it and/or modify it 6 * under the terms of the GNU General Public License version 2 only, as 7 * published by the Free Software Foundation. Oracle designates this 8 * particular file as subject to the "Classpath" exception as provided 9 * by Oracle in the LICENSE file that accompanied this code. 10 * 11 * This code is distributed in the hope that it will be useful, but WITHOUT 12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 * version 2 for more details (a copy is included in the LICENSE file that 15 * accompanied this code). 16 * 17 * You should have received a copy of the GNU General Public License version 18 * 2 along with this work; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 20 * 21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA 22 * or visit www.oracle.com if you need additional information or have any 23 * questions. 24 */ 25 26 package java.lang; 27 28 import java.util.NoSuchElementException; 29 import java.util.PrimitiveIterator; 30 import java.util.Spliterator; 31 import java.util.Spliterators; 32 import java.util.function.IntConsumer; 33 import java.util.stream.IntStream; 34 import java.util.stream.StreamSupport; 35 36 /** 37 * A <tt>CharSequence</tt> is a readable sequence of <code>char</code> values. This 38 * interface provides uniform, read-only access to many different kinds of 39 * <code>char</code> sequences. 40 * A <code>char</code> value represents a character in the <i>Basic 41 * Multilingual Plane (BMP)</i> or a surrogate. Refer to <a 42 * href="Character.html#unicode">Unicode Character Representation</a> for details. 43 * 44 * <p> This interface does not refine the general contracts of the {@link 45 * java.lang.Object#equals(java.lang.Object) equals} and {@link 46 * java.lang.Object#hashCode() hashCode} methods. The result of comparing two 47 * objects that implement <tt>CharSequence</tt> is therefore, in general, 48 * undefined. Each object may be implemented by a different class, and there 49 * is no guarantee that each class will be capable of testing its instances 50 * for equality with those of the other. It is therefore inappropriate to use 51 * arbitrary <tt>CharSequence</tt> instances as elements in a set or as keys in 52 * a map. </p> 53 * 54 * @author Mike McCloskey 55 * @since 1.4 56 * @spec JSR-51 57 */ 58 59 public interface CharSequence { 60 61 /** 62 * Returns the length of this character sequence. The length is the number 63 * of 16-bit <code>char</code>s in the sequence. 64 * 65 * @return the number of <code>char</code>s in this sequence 66 */ length()67 int length(); 68 69 /** 70 * Returns the <code>char</code> value at the specified index. An index ranges from zero 71 * to <tt>length() - 1</tt>. The first <code>char</code> value of the sequence is at 72 * index zero, the next at index one, and so on, as for array 73 * indexing. 74 * 75 * <p>If the <code>char</code> value specified by the index is a 76 * <a href="{@docRoot}/java/lang/Character.html#unicode">surrogate</a>, the surrogate 77 * value is returned. 78 * 79 * @param index the index of the <code>char</code> value to be returned 80 * 81 * @return the specified <code>char</code> value 82 * 83 * @throws IndexOutOfBoundsException 84 * if the <tt>index</tt> argument is negative or not less than 85 * <tt>length()</tt> 86 */ charAt(int index)87 char charAt(int index); 88 89 /** 90 * Returns a <code>CharSequence</code> that is a subsequence of this sequence. 91 * The subsequence starts with the <code>char</code> value at the specified index and 92 * ends with the <code>char</code> value at index <tt>end - 1</tt>. The length 93 * (in <code>char</code>s) of the 94 * returned sequence is <tt>end - start</tt>, so if <tt>start == end</tt> 95 * then an empty sequence is returned. 96 * 97 * @param start the start index, inclusive 98 * @param end the end index, exclusive 99 * 100 * @return the specified subsequence 101 * 102 * @throws IndexOutOfBoundsException 103 * if <tt>start</tt> or <tt>end</tt> are negative, 104 * if <tt>end</tt> is greater than <tt>length()</tt>, 105 * or if <tt>start</tt> is greater than <tt>end</tt> 106 */ subSequence(int start, int end)107 CharSequence subSequence(int start, int end); 108 109 /** 110 * Returns a string containing the characters in this sequence in the same 111 * order as this sequence. The length of the string will be the length of 112 * this sequence. 113 * 114 * @return a string consisting of exactly this sequence of characters 115 */ toString()116 public String toString(); 117 118 /** 119 * Returns a stream of {@code int} zero-extending the {@code char} values 120 * from this sequence. Any char which maps to a <a 121 * href="{@docRoot}/java/lang/Character.html#unicode">surrogate code 122 * point</a> is passed through uninterpreted. 123 * 124 * <p>If the sequence is mutated while the stream is being read, the 125 * result is undefined. 126 * 127 * @return an IntStream of char values from this sequence 128 * @since 1.8 129 */ chars()130 public default IntStream chars() { 131 class CharIterator implements PrimitiveIterator.OfInt { 132 int cur = 0; 133 134 public boolean hasNext() { 135 return cur < length(); 136 } 137 138 public int nextInt() { 139 if (hasNext()) { 140 return charAt(cur++); 141 } else { 142 throw new NoSuchElementException(); 143 } 144 } 145 146 @Override 147 public void forEachRemaining(IntConsumer block) { 148 for (; cur < length(); cur++) { 149 block.accept(charAt(cur)); 150 } 151 } 152 } 153 154 return StreamSupport.intStream(() -> 155 Spliterators.spliterator( 156 new CharIterator(), 157 length(), 158 Spliterator.ORDERED), 159 Spliterator.SUBSIZED | Spliterator.SIZED | Spliterator.ORDERED, 160 false); 161 } 162 163 /** 164 * Returns a stream of code point values from this sequence. Any surrogate 165 * pairs encountered in the sequence are combined as if by {@linkplain 166 * Character#toCodePoint Character.toCodePoint} and the result is passed 167 * to the stream. Any other code units, including ordinary BMP characters, 168 * unpaired surrogates, and undefined code units, are zero-extended to 169 * {@code int} values which are then passed to the stream. 170 * 171 * <p>If the sequence is mutated while the stream is being read, the result 172 * is undefined. 173 * 174 * @return an IntStream of Unicode code points from this sequence 175 * @since 1.8 176 */ codePoints()177 public default IntStream codePoints() { 178 class CodePointIterator implements PrimitiveIterator.OfInt { 179 int cur = 0; 180 181 @Override 182 public void forEachRemaining(IntConsumer block) { 183 final int length = length(); 184 int i = cur; 185 try { 186 while (i < length) { 187 char c1 = charAt(i++); 188 if (!Character.isHighSurrogate(c1) || i >= length) { 189 block.accept(c1); 190 } else { 191 char c2 = charAt(i); 192 if (Character.isLowSurrogate(c2)) { 193 i++; 194 block.accept(Character.toCodePoint(c1, c2)); 195 } else { 196 block.accept(c1); 197 } 198 } 199 } 200 } finally { 201 cur = i; 202 } 203 } 204 205 public boolean hasNext() { 206 return cur < length(); 207 } 208 209 public int nextInt() { 210 final int length = length(); 211 212 if (cur >= length) { 213 throw new NoSuchElementException(); 214 } 215 char c1 = charAt(cur++); 216 if (Character.isHighSurrogate(c1) && cur < length) { 217 char c2 = charAt(cur); 218 if (Character.isLowSurrogate(c2)) { 219 cur++; 220 return Character.toCodePoint(c1, c2); 221 } 222 } 223 return c1; 224 } 225 } 226 227 return StreamSupport.intStream(() -> 228 Spliterators.spliteratorUnknownSize( 229 new CodePointIterator(), 230 Spliterator.ORDERED), 231 Spliterator.ORDERED, 232 false); 233 } 234 } 235