1 /* 2 * Copyright (C) 2019 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package libcore.util; 18 19 /** 20 * <p>The {@code FP16} class is a wrapper and a utility class to manipulate half-precision 16-bit 21 * <a href="https://en.wikipedia.org/wiki/Half-precision_floating-point_format">IEEE 754</a> 22 * floating point data types (also called fp16 or binary16). A half-precision float can be 23 * created from or converted to single-precision floats, and is stored in a short data type. 24 * 25 * <p>The IEEE 754 standard specifies an fp16 as having the following format:</p> 26 * <ul> 27 * <li>Sign bit: 1 bit</li> 28 * <li>Exponent width: 5 bits</li> 29 * <li>Significand: 10 bits</li> 30 * </ul> 31 * 32 * <p>The format is laid out as follows:</p> 33 * <pre> 34 * 1 11111 1111111111 35 * ^ --^-- -----^---- 36 * sign | |_______ significand 37 * | 38 * -- exponent 39 * </pre> 40 * 41 * <p>Half-precision floating points can be useful to save memory and/or 42 * bandwidth at the expense of range and precision when compared to single-precision 43 * floating points (fp32).</p> 44 * <p>To help you decide whether fp16 is the right storage type for you need, please 45 * refer to the table below that shows the available precision throughout the range of 46 * possible values. The <em>precision</em> column indicates the step size between two 47 * consecutive numbers in a specific part of the range.</p> 48 * 49 * <table summary="Precision of fp16 across the range"> 50 * <tr><th>Range start</th><th>Precision</th></tr> 51 * <tr><td>0</td><td>1 ⁄ 16,777,216</td></tr> 52 * <tr><td>1 ⁄ 16,384</td><td>1 ⁄ 16,777,216</td></tr> 53 * <tr><td>1 ⁄ 8,192</td><td>1 ⁄ 8,388,608</td></tr> 54 * <tr><td>1 ⁄ 4,096</td><td>1 ⁄ 4,194,304</td></tr> 55 * <tr><td>1 ⁄ 2,048</td><td>1 ⁄ 2,097,152</td></tr> 56 * <tr><td>1 ⁄ 1,024</td><td>1 ⁄ 1,048,576</td></tr> 57 * <tr><td>1 ⁄ 512</td><td>1 ⁄ 524,288</td></tr> 58 * <tr><td>1 ⁄ 256</td><td>1 ⁄ 262,144</td></tr> 59 * <tr><td>1 ⁄ 128</td><td>1 ⁄ 131,072</td></tr> 60 * <tr><td>1 ⁄ 64</td><td>1 ⁄ 65,536</td></tr> 61 * <tr><td>1 ⁄ 32</td><td>1 ⁄ 32,768</td></tr> 62 * <tr><td>1 ⁄ 16</td><td>1 ⁄ 16,384</td></tr> 63 * <tr><td>1 ⁄ 8</td><td>1 ⁄ 8,192</td></tr> 64 * <tr><td>1 ⁄ 4</td><td>1 ⁄ 4,096</td></tr> 65 * <tr><td>1 ⁄ 2</td><td>1 ⁄ 2,048</td></tr> 66 * <tr><td>1</td><td>1 ⁄ 1,024</td></tr> 67 * <tr><td>2</td><td>1 ⁄ 512</td></tr> 68 * <tr><td>4</td><td>1 ⁄ 256</td></tr> 69 * <tr><td>8</td><td>1 ⁄ 128</td></tr> 70 * <tr><td>16</td><td>1 ⁄ 64</td></tr> 71 * <tr><td>32</td><td>1 ⁄ 32</td></tr> 72 * <tr><td>64</td><td>1 ⁄ 16</td></tr> 73 * <tr><td>128</td><td>1 ⁄ 8</td></tr> 74 * <tr><td>256</td><td>1 ⁄ 4</td></tr> 75 * <tr><td>512</td><td>1 ⁄ 2</td></tr> 76 * <tr><td>1,024</td><td>1</td></tr> 77 * <tr><td>2,048</td><td>2</td></tr> 78 * <tr><td>4,096</td><td>4</td></tr> 79 * <tr><td>8,192</td><td>8</td></tr> 80 * <tr><td>16,384</td><td>16</td></tr> 81 * <tr><td>32,768</td><td>32</td></tr> 82 * </table> 83 * 84 * <p>This table shows that numbers higher than 1024 lose all fractional precision.</p> 85 * 86 * @hide 87 */ 88 89 @libcore.api.CorePlatformApi 90 public class FP16 { 91 /** 92 * The number of bits used to represent a half-precision float value. 93 */ 94 @libcore.api.CorePlatformApi 95 public static final int SIZE = 16; 96 97 /** 98 * Epsilon is the difference between 1.0 and the next value representable 99 * by a half-precision floating-point. 100 */ 101 @libcore.api.CorePlatformApi 102 public static final short EPSILON = (short) 0x1400; 103 104 /** 105 * Maximum exponent a finite half-precision float may have. 106 */ 107 @libcore.api.CorePlatformApi 108 public static final int MAX_EXPONENT = 15; 109 /** 110 * Minimum exponent a normalized half-precision float may have. 111 */ 112 @libcore.api.CorePlatformApi 113 public static final int MIN_EXPONENT = -14; 114 115 /** 116 * Smallest negative value a half-precision float may have. 117 */ 118 @libcore.api.CorePlatformApi 119 public static final short LOWEST_VALUE = (short) 0xfbff; 120 /** 121 * Maximum positive finite value a half-precision float may have. 122 */ 123 @libcore.api.CorePlatformApi 124 public static final short MAX_VALUE = (short) 0x7bff; 125 /** 126 * Smallest positive normal value a half-precision float may have. 127 */ 128 @libcore.api.CorePlatformApi 129 public static final short MIN_NORMAL = (short) 0x0400; 130 /** 131 * Smallest positive non-zero value a half-precision float may have. 132 */ 133 @libcore.api.CorePlatformApi 134 public static final short MIN_VALUE = (short) 0x0001; 135 /** 136 * A Not-a-Number representation of a half-precision float. 137 */ 138 @libcore.api.CorePlatformApi 139 public static final short NaN = (short) 0x7e00; 140 /** 141 * Negative infinity of type half-precision float. 142 */ 143 @libcore.api.CorePlatformApi 144 public static final short NEGATIVE_INFINITY = (short) 0xfc00; 145 /** 146 * Negative 0 of type half-precision float. 147 */ 148 @libcore.api.CorePlatformApi 149 public static final short NEGATIVE_ZERO = (short) 0x8000; 150 /** 151 * Positive infinity of type half-precision float. 152 */ 153 @libcore.api.CorePlatformApi 154 public static final short POSITIVE_INFINITY = (short) 0x7c00; 155 /** 156 * Positive 0 of type half-precision float. 157 */ 158 @libcore.api.CorePlatformApi 159 public static final short POSITIVE_ZERO = (short) 0x0000; 160 161 @libcore.api.CorePlatformApi 162 public static final int SIGN_SHIFT = 15; 163 @libcore.api.CorePlatformApi 164 public static final int EXPONENT_SHIFT = 10; 165 @libcore.api.CorePlatformApi 166 public static final int SIGN_MASK = 0x8000; 167 @libcore.api.CorePlatformApi 168 public static final int SHIFTED_EXPONENT_MASK = 0x1f; 169 @libcore.api.CorePlatformApi 170 public static final int SIGNIFICAND_MASK = 0x3ff; 171 @libcore.api.CorePlatformApi 172 public static final int EXPONENT_SIGNIFICAND_MASK = 0x7fff; 173 @libcore.api.CorePlatformApi 174 public static final int EXPONENT_BIAS = 15; 175 176 private static final int FP32_SIGN_SHIFT = 31; 177 private static final int FP32_EXPONENT_SHIFT = 23; 178 private static final int FP32_SHIFTED_EXPONENT_MASK = 0xff; 179 private static final int FP32_SIGNIFICAND_MASK = 0x7fffff; 180 private static final int FP32_EXPONENT_BIAS = 127; 181 private static final int FP32_QNAN_MASK = 0x400000; 182 private static final int FP32_DENORMAL_MAGIC = 126 << 23; 183 private static final float FP32_DENORMAL_FLOAT = Float.intBitsToFloat(FP32_DENORMAL_MAGIC); 184 185 /** Hidden constructor to prevent instantiation. */ FP16()186 private FP16() {} 187 188 /** 189 * <p>Compares the two specified half-precision float values. The following 190 * conditions apply during the comparison:</p> 191 * 192 * <ul> 193 * <li>{@link #NaN} is considered by this method to be equal to itself and greater 194 * than all other half-precision float values (including {@code #POSITIVE_INFINITY})</li> 195 * <li>{@link #POSITIVE_ZERO} is considered by this method to be greater than 196 * {@link #NEGATIVE_ZERO}.</li> 197 * </ul> 198 * 199 * @param x The first half-precision float value to compare. 200 * @param y The second half-precision float value to compare 201 * 202 * @return The value {@code 0} if {@code x} is numerically equal to {@code y}, a 203 * value less than {@code 0} if {@code x} is numerically less than {@code y}, 204 * and a value greater than {@code 0} if {@code x} is numerically greater 205 * than {@code y} 206 */ 207 @libcore.api.CorePlatformApi compare(short x, short y)208 public static int compare(short x, short y) { 209 if (less(x, y)) return -1; 210 if (greater(x, y)) return 1; 211 212 // Collapse NaNs, akin to halfToIntBits(), but we want to keep 213 // (signed) short value types to preserve the ordering of -0.0 214 // and +0.0 215 short xBits = isNaN(x) ? NaN : x; 216 short yBits = isNaN(y) ? NaN : y; 217 218 return (xBits == yBits ? 0 : (xBits < yBits ? -1 : 1)); 219 } 220 221 /** 222 * Returns the closest integral half-precision float value to the specified 223 * half-precision float value. Special values are handled in the 224 * following ways: 225 * <ul> 226 * <li>If the specified half-precision float is NaN, the result is NaN</li> 227 * <li>If the specified half-precision float is infinity (negative or positive), 228 * the result is infinity (with the same sign)</li> 229 * <li>If the specified half-precision float is zero (negative or positive), 230 * the result is zero (with the same sign)</li> 231 * </ul> 232 * 233 * @param h A half-precision float value 234 * @return The value of the specified half-precision float rounded to the nearest 235 * half-precision float value 236 */ 237 @libcore.api.CorePlatformApi rint(short h)238 public static short rint(short h) { 239 int bits = h & 0xffff; 240 int abs = bits & EXPONENT_SIGNIFICAND_MASK; 241 int result = bits; 242 243 if (abs < 0x3c00) { 244 result &= SIGN_MASK; 245 if (abs > 0x3800){ 246 result |= 0x3c00; 247 } 248 } else if (abs < 0x6400) { 249 int exp = 25 - (abs >> 10); 250 int mask = (1 << exp) - 1; 251 result += ((1 << (exp - 1)) - (~(abs >> exp) & 1)); 252 result &= ~mask; 253 } 254 if (isNaN((short) result)) { 255 // if result is NaN mask with qNaN 256 // (i.e. mask the most significant mantissa bit with 1) 257 // to comply with hardware implementations (ARM64, Intel, etc). 258 result |= NaN; 259 } 260 261 return (short) result; 262 } 263 264 /** 265 * Returns the smallest half-precision float value toward negative infinity 266 * greater than or equal to the specified half-precision float value. 267 * Special values are handled in the following ways: 268 * <ul> 269 * <li>If the specified half-precision float is NaN, the result is NaN</li> 270 * <li>If the specified half-precision float is infinity (negative or positive), 271 * the result is infinity (with the same sign)</li> 272 * <li>If the specified half-precision float is zero (negative or positive), 273 * the result is zero (with the same sign)</li> 274 * </ul> 275 * 276 * @param h A half-precision float value 277 * @return The smallest half-precision float value toward negative infinity 278 * greater than or equal to the specified half-precision float value 279 */ 280 @libcore.api.CorePlatformApi ceil(short h)281 public static short ceil(short h) { 282 int bits = h & 0xffff; 283 int abs = bits & EXPONENT_SIGNIFICAND_MASK; 284 int result = bits; 285 286 if (abs < 0x3c00) { 287 result &= SIGN_MASK; 288 result |= 0x3c00 & -(~(bits >> 15) & (abs != 0 ? 1 : 0)); 289 } else if (abs < 0x6400) { 290 abs = 25 - (abs >> 10); 291 int mask = (1 << abs) - 1; 292 result += mask & ((bits >> 15) - 1); 293 result &= ~mask; 294 } 295 if (isNaN((short) result)) { 296 // if result is NaN mask with qNaN 297 // (i.e. mask the most significant mantissa bit with 1) 298 // to comply with hardware implementations (ARM64, Intel, etc). 299 result |= NaN; 300 } 301 302 return (short) result; 303 } 304 305 /** 306 * Returns the largest half-precision float value toward positive infinity 307 * less than or equal to the specified half-precision float value. 308 * Special values are handled in the following ways: 309 * <ul> 310 * <li>If the specified half-precision float is NaN, the result is NaN</li> 311 * <li>If the specified half-precision float is infinity (negative or positive), 312 * the result is infinity (with the same sign)</li> 313 * <li>If the specified half-precision float is zero (negative or positive), 314 * the result is zero (with the same sign)</li> 315 * </ul> 316 * 317 * @param h A half-precision float value 318 * @return The largest half-precision float value toward positive infinity 319 * less than or equal to the specified half-precision float value 320 */ 321 @libcore.api.CorePlatformApi floor(short h)322 public static short floor(short h) { 323 int bits = h & 0xffff; 324 int abs = bits & EXPONENT_SIGNIFICAND_MASK; 325 int result = bits; 326 327 if (abs < 0x3c00) { 328 result &= SIGN_MASK; 329 result |= 0x3c00 & (bits > 0x8000 ? 0xffff : 0x0); 330 } else if (abs < 0x6400) { 331 abs = 25 - (abs >> 10); 332 int mask = (1 << abs) - 1; 333 result += mask & -(bits >> 15); 334 result &= ~mask; 335 } 336 if (isNaN((short) result)) { 337 // if result is NaN mask with qNaN 338 // i.e. (Mask the most significant mantissa bit with 1) 339 result |= NaN; 340 } 341 342 return (short) result; 343 } 344 345 /** 346 * Returns the truncated half-precision float value of the specified 347 * half-precision float value. Special values are handled in the following ways: 348 * <ul> 349 * <li>If the specified half-precision float is NaN, the result is NaN</li> 350 * <li>If the specified half-precision float is infinity (negative or positive), 351 * the result is infinity (with the same sign)</li> 352 * <li>If the specified half-precision float is zero (negative or positive), 353 * the result is zero (with the same sign)</li> 354 * </ul> 355 * 356 * @param h A half-precision float value 357 * @return The truncated half-precision float value of the specified 358 * half-precision float value 359 */ 360 @libcore.api.CorePlatformApi trunc(short h)361 public static short trunc(short h) { 362 int bits = h & 0xffff; 363 int abs = bits & EXPONENT_SIGNIFICAND_MASK; 364 int result = bits; 365 366 if (abs < 0x3c00) { 367 result &= SIGN_MASK; 368 } else if (abs < 0x6400) { 369 abs = 25 - (abs >> 10); 370 int mask = (1 << abs) - 1; 371 result &= ~mask; 372 } 373 374 return (short) result; 375 } 376 377 /** 378 * Returns the smaller of two half-precision float values (the value closest 379 * to negative infinity). Special values are handled in the following ways: 380 * <ul> 381 * <li>If either value is NaN, the result is NaN</li> 382 * <li>{@link #NEGATIVE_ZERO} is smaller than {@link #POSITIVE_ZERO}</li> 383 * </ul> 384 * 385 * @param x The first half-precision value 386 * @param y The second half-precision value 387 * @return The smaller of the two specified half-precision values 388 */ 389 @libcore.api.CorePlatformApi min(short x, short y)390 public static short min(short x, short y) { 391 if (isNaN(x)) return NaN; 392 if (isNaN(y)) return NaN; 393 394 if ((x & EXPONENT_SIGNIFICAND_MASK) == 0 && (y & EXPONENT_SIGNIFICAND_MASK) == 0) { 395 return (x & SIGN_MASK) != 0 ? x : y; 396 } 397 398 return ((x & SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) < 399 ((y & SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff) ? x : y; 400 } 401 402 /** 403 * Returns the larger of two half-precision float values (the value closest 404 * to positive infinity). Special values are handled in the following ways: 405 * <ul> 406 * <li>If either value is NaN, the result is NaN</li> 407 * <li>{@link #POSITIVE_ZERO} is greater than {@link #NEGATIVE_ZERO}</li> 408 * </ul> 409 * 410 * @param x The first half-precision value 411 * @param y The second half-precision value 412 * 413 * @return The larger of the two specified half-precision values 414 */ 415 @libcore.api.CorePlatformApi max(short x, short y)416 public static short max(short x, short y) { 417 if (isNaN(x)) return NaN; 418 if (isNaN(y)) return NaN; 419 420 if ((x & EXPONENT_SIGNIFICAND_MASK) == 0 && (y & EXPONENT_SIGNIFICAND_MASK) == 0) { 421 return (x & SIGN_MASK) != 0 ? y : x; 422 } 423 424 return ((x & SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) > 425 ((y & SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff) ? x : y; 426 } 427 428 /** 429 * Returns true if the first half-precision float value is less (smaller 430 * toward negative infinity) than the second half-precision float value. 431 * If either of the values is NaN, the result is false. 432 * 433 * @param x The first half-precision value 434 * @param y The second half-precision value 435 * 436 * @return True if x is less than y, false otherwise 437 */ 438 @libcore.api.CorePlatformApi less(short x, short y)439 public static boolean less(short x, short y) { 440 if (isNaN(x)) return false; 441 if (isNaN(y)) return false; 442 443 return ((x & SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) < 444 ((y & SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff); 445 } 446 447 /** 448 * Returns true if the first half-precision float value is less (smaller 449 * toward negative infinity) than or equal to the second half-precision 450 * float value. If either of the values is NaN, the result is false. 451 * 452 * @param x The first half-precision value 453 * @param y The second half-precision value 454 * 455 * @return True if x is less than or equal to y, false otherwise 456 */ 457 @libcore.api.CorePlatformApi lessEquals(short x, short y)458 public static boolean lessEquals(short x, short y) { 459 if (isNaN(x)) return false; 460 if (isNaN(y)) return false; 461 462 return ((x & SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) <= 463 ((y & SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff); 464 } 465 466 /** 467 * Returns true if the first half-precision float value is greater (larger 468 * toward positive infinity) than the second half-precision float value. 469 * If either of the values is NaN, the result is false. 470 * 471 * @param x The first half-precision value 472 * @param y The second half-precision value 473 * 474 * @return True if x is greater than y, false otherwise 475 */ 476 @libcore.api.CorePlatformApi greater(short x, short y)477 public static boolean greater(short x, short y) { 478 if (isNaN(x)) return false; 479 if (isNaN(y)) return false; 480 481 return ((x & SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) > 482 ((y & SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff); 483 } 484 485 /** 486 * Returns true if the first half-precision float value is greater (larger 487 * toward positive infinity) than or equal to the second half-precision float 488 * value. If either of the values is NaN, the result is false. 489 * 490 * @param x The first half-precision value 491 * @param y The second half-precision value 492 * 493 * @return True if x is greater than y, false otherwise 494 */ 495 @libcore.api.CorePlatformApi greaterEquals(short x, short y)496 public static boolean greaterEquals(short x, short y) { 497 if (isNaN(x)) return false; 498 if (isNaN(y)) return false; 499 500 return ((x & SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) >= 501 ((y & SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff); 502 } 503 504 /** 505 * Returns true if the two half-precision float values are equal. 506 * If either of the values is NaN, the result is false. {@link #POSITIVE_ZERO} 507 * and {@link #NEGATIVE_ZERO} are considered equal. 508 * 509 * @param x The first half-precision value 510 * @param y The second half-precision value 511 * 512 * @return True if x is equal to y, false otherwise 513 */ 514 @libcore.api.CorePlatformApi equals(short x, short y)515 public static boolean equals(short x, short y) { 516 if (isNaN(x)) return false; 517 if (isNaN(y)) return false; 518 519 return x == y || ((x | y) & EXPONENT_SIGNIFICAND_MASK) == 0; 520 } 521 522 /** 523 * Returns true if the specified half-precision float value represents 524 * infinity, false otherwise. 525 * 526 * @param h A half-precision float value 527 * @return True if the value is positive infinity or negative infinity, 528 * false otherwise 529 */ 530 @libcore.api.CorePlatformApi isInfinite(short h)531 public static boolean isInfinite(short h) { 532 return (h & EXPONENT_SIGNIFICAND_MASK) == POSITIVE_INFINITY; 533 } 534 535 /** 536 * Returns true if the specified half-precision float value represents 537 * a Not-a-Number, false otherwise. 538 * 539 * @param h A half-precision float value 540 * @return True if the value is a NaN, false otherwise 541 */ 542 @libcore.api.CorePlatformApi isNaN(short h)543 public static boolean isNaN(short h) { 544 return (h & EXPONENT_SIGNIFICAND_MASK) > POSITIVE_INFINITY; 545 } 546 547 /** 548 * Returns true if the specified half-precision float value is normalized 549 * (does not have a subnormal representation). If the specified value is 550 * {@link #POSITIVE_INFINITY}, {@link #NEGATIVE_INFINITY}, 551 * {@link #POSITIVE_ZERO}, {@link #NEGATIVE_ZERO}, NaN or any subnormal 552 * number, this method returns false. 553 * 554 * @param h A half-precision float value 555 * @return True if the value is normalized, false otherwise 556 */ 557 @libcore.api.CorePlatformApi isNormalized(short h)558 public static boolean isNormalized(short h) { 559 return (h & POSITIVE_INFINITY) != 0 && (h & POSITIVE_INFINITY) != POSITIVE_INFINITY; 560 } 561 562 /** 563 * <p>Converts the specified half-precision float value into a 564 * single-precision float value. The following special cases are handled:</p> 565 * <ul> 566 * <li>If the input is {@link #NaN}, the returned value is {@link Float#NaN}</li> 567 * <li>If the input is {@link #POSITIVE_INFINITY} or 568 * {@link #NEGATIVE_INFINITY}, the returned value is respectively 569 * {@link Float#POSITIVE_INFINITY} or {@link Float#NEGATIVE_INFINITY}</li> 570 * <li>If the input is 0 (positive or negative), the returned value is +/-0.0f</li> 571 * <li>Otherwise, the returned value is a normalized single-precision float value</li> 572 * </ul> 573 * 574 * @param h The half-precision float value to convert to single-precision 575 * @return A normalized single-precision float value 576 */ 577 @libcore.api.CorePlatformApi toFloat(short h)578 public static float toFloat(short h) { 579 int bits = h & 0xffff; 580 int s = bits & SIGN_MASK; 581 int e = (bits >>> EXPONENT_SHIFT) & SHIFTED_EXPONENT_MASK; 582 int m = (bits ) & SIGNIFICAND_MASK; 583 584 int outE = 0; 585 int outM = 0; 586 587 if (e == 0) { // Denormal or 0 588 if (m != 0) { 589 // Convert denorm fp16 into normalized fp32 590 float o = Float.intBitsToFloat(FP32_DENORMAL_MAGIC + m); 591 o -= FP32_DENORMAL_FLOAT; 592 return s == 0 ? o : -o; 593 } 594 } else { 595 outM = m << 13; 596 if (e == 0x1f) { // Infinite or NaN 597 outE = 0xff; 598 if (outM != 0) { // SNaNs are quieted 599 outM |= FP32_QNAN_MASK; 600 } 601 } else { 602 outE = e - EXPONENT_BIAS + FP32_EXPONENT_BIAS; 603 } 604 } 605 606 int out = (s << 16) | (outE << FP32_EXPONENT_SHIFT) | outM; 607 return Float.intBitsToFloat(out); 608 } 609 610 /** 611 * <p>Converts the specified single-precision float value into a 612 * half-precision float value. The following special cases are handled:</p> 613 * <ul> 614 * <li>If the input is NaN (see {@link Float#isNaN(float)}), the returned 615 * value is {@link #NaN}</li> 616 * <li>If the input is {@link Float#POSITIVE_INFINITY} or 617 * {@link Float#NEGATIVE_INFINITY}, the returned value is respectively 618 * {@link #POSITIVE_INFINITY} or {@link #NEGATIVE_INFINITY}</li> 619 * <li>If the input is 0 (positive or negative), the returned value is 620 * {@link #POSITIVE_ZERO} or {@link #NEGATIVE_ZERO}</li> 621 * <li>If the input is a less than {@link #MIN_VALUE}, the returned value 622 * is flushed to {@link #POSITIVE_ZERO} or {@link #NEGATIVE_ZERO}</li> 623 * <li>If the input is a less than {@link #MIN_NORMAL}, the returned value 624 * is a denorm half-precision float</li> 625 * <li>Otherwise, the returned value is rounded to the nearest 626 * representable half-precision float value</li> 627 * </ul> 628 * 629 * @param f The single-precision float value to convert to half-precision 630 * @return A half-precision float value 631 */ 632 @libcore.api.CorePlatformApi toHalf(float f)633 public static short toHalf(float f) { 634 int bits = Float.floatToRawIntBits(f); 635 int s = (bits >>> FP32_SIGN_SHIFT ); 636 int e = (bits >>> FP32_EXPONENT_SHIFT) & FP32_SHIFTED_EXPONENT_MASK; 637 int m = (bits ) & FP32_SIGNIFICAND_MASK; 638 639 int outE = 0; 640 int outM = 0; 641 642 if (e == 0xff) { // Infinite or NaN 643 outE = 0x1f; 644 outM = m != 0 ? 0x200 : 0; 645 } else { 646 e = e - FP32_EXPONENT_BIAS + EXPONENT_BIAS; 647 if (e >= 0x1f) { // Overflow 648 outE = 0x1f; 649 } else if (e <= 0) { // Underflow 650 if (e < -10) { 651 // The absolute fp32 value is less than MIN_VALUE, flush to +/-0 652 } else { 653 // The fp32 value is a normalized float less than MIN_NORMAL, 654 // we convert to a denorm fp16 655 m = m | 0x800000; 656 int shift = 14 - e; 657 outM = m >> shift; 658 659 int lowm = m & ((1 << shift) - 1); 660 int hway = 1 << (shift - 1); 661 // if above halfway or exactly halfway and outM is odd 662 if (lowm + (outM & 1) > hway){ 663 // Round to nearest even 664 // Can overflow into exponent bit, which surprisingly is OK. 665 // This increment relies on the +outM in the return statement below 666 outM++; 667 } 668 } 669 } else { 670 outE = e; 671 outM = m >> 13; 672 // if above halfway or exactly halfway and outM is odd 673 if ((m & 0x1fff) + (outM & 0x1) > 0x1000) { 674 // Round to nearest even 675 // Can overflow into exponent bit, which surprisingly is OK. 676 // This increment relies on the +outM in the return statement below 677 outM++; 678 } 679 } 680 } 681 // The outM is added here as the +1 increments for outM above can 682 // cause an overflow in the exponent bit which is OK. 683 return (short) ((s << SIGN_SHIFT) | (outE << EXPONENT_SHIFT) + outM); 684 } 685 686 /** 687 * <p>Returns a hexadecimal string representation of the specified half-precision 688 * float value. If the value is a NaN, the result is <code>"NaN"</code>, 689 * otherwise the result follows this format:</p> 690 * <ul> 691 * <li>If the sign is positive, no sign character appears in the result</li> 692 * <li>If the sign is negative, the first character is <code>'-'</code></li> 693 * <li>If the value is inifinity, the string is <code>"Infinity"</code></li> 694 * <li>If the value is 0, the string is <code>"0x0.0p0"</code></li> 695 * <li>If the value has a normalized representation, the exponent and 696 * significand are represented in the string in two fields. The significand 697 * starts with <code>"0x1."</code> followed by its lowercase hexadecimal 698 * representation. Trailing zeroes are removed unless all digits are 0, then 699 * a single zero is used. The significand representation is followed by the 700 * exponent, represented by <code>"p"</code>, itself followed by a decimal 701 * string of the unbiased exponent</li> 702 * <li>If the value has a subnormal representation, the significand starts 703 * with <code>"0x0."</code> followed by its lowercase hexadecimal 704 * representation. Trailing zeroes are removed unless all digits are 0, then 705 * a single zero is used. The significand representation is followed by the 706 * exponent, represented by <code>"p-14"</code></li> 707 * </ul> 708 * 709 * @param h A half-precision float value 710 * @return A hexadecimal string representation of the specified value 711 */ 712 @libcore.api.CorePlatformApi toHexString(short h)713 public static String toHexString(short h) { 714 StringBuilder o = new StringBuilder(); 715 716 int bits = h & 0xffff; 717 int s = (bits >>> SIGN_SHIFT ); 718 int e = (bits >>> EXPONENT_SHIFT) & SHIFTED_EXPONENT_MASK; 719 int m = (bits ) & SIGNIFICAND_MASK; 720 721 if (e == 0x1f) { // Infinite or NaN 722 if (m == 0) { 723 if (s != 0) o.append('-'); 724 o.append("Infinity"); 725 } else { 726 o.append("NaN"); 727 } 728 } else { 729 if (s == 1) o.append('-'); 730 if (e == 0) { 731 if (m == 0) { 732 o.append("0x0.0p0"); 733 } else { 734 o.append("0x0."); 735 String significand = Integer.toHexString(m); 736 o.append(significand.replaceFirst("0{2,}$", "")); 737 o.append("p-14"); 738 } 739 } else { 740 o.append("0x1."); 741 String significand = Integer.toHexString(m); 742 o.append(significand.replaceFirst("0{2,}$", "")); 743 o.append('p'); 744 o.append(Integer.toString(e - EXPONENT_BIAS)); 745 } 746 } 747 748 return o.toString(); 749 } 750 } 751