1 /*
2  * Copyright (C) 2019 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 package libcore.util;
18 
19 /**
20  * <p>The {@code FP16} class is a wrapper and a utility class to manipulate half-precision 16-bit
21  * <a href="https://en.wikipedia.org/wiki/Half-precision_floating-point_format">IEEE 754</a>
22  * floating point data types (also called fp16 or binary16). A half-precision float can be
23  * created from or converted to single-precision floats, and is stored in a short data type.
24  *
25  * <p>The IEEE 754 standard specifies an fp16 as having the following format:</p>
26  * <ul>
27  * <li>Sign bit: 1 bit</li>
28  * <li>Exponent width: 5 bits</li>
29  * <li>Significand: 10 bits</li>
30  * </ul>
31  *
32  * <p>The format is laid out as follows:</p>
33  * <pre>
34  * 1   11111   1111111111
35  * ^   --^--   -----^----
36  * sign  |          |_______ significand
37  *       |
38  *       -- exponent
39  * </pre>
40  *
41  * <p>Half-precision floating points can be useful to save memory and/or
42  * bandwidth at the expense of range and precision when compared to single-precision
43  * floating points (fp32).</p>
44  * <p>To help you decide whether fp16 is the right storage type for you need, please
45  * refer to the table below that shows the available precision throughout the range of
46  * possible values. The <em>precision</em> column indicates the step size between two
47  * consecutive numbers in a specific part of the range.</p>
48  *
49  * <table summary="Precision of fp16 across the range">
50  *     <tr><th>Range start</th><th>Precision</th></tr>
51  *     <tr><td>0</td><td>1 &frasl; 16,777,216</td></tr>
52  *     <tr><td>1 &frasl; 16,384</td><td>1 &frasl; 16,777,216</td></tr>
53  *     <tr><td>1 &frasl; 8,192</td><td>1 &frasl; 8,388,608</td></tr>
54  *     <tr><td>1 &frasl; 4,096</td><td>1 &frasl; 4,194,304</td></tr>
55  *     <tr><td>1 &frasl; 2,048</td><td>1 &frasl; 2,097,152</td></tr>
56  *     <tr><td>1 &frasl; 1,024</td><td>1 &frasl; 1,048,576</td></tr>
57  *     <tr><td>1 &frasl; 512</td><td>1 &frasl; 524,288</td></tr>
58  *     <tr><td>1 &frasl; 256</td><td>1 &frasl; 262,144</td></tr>
59  *     <tr><td>1 &frasl; 128</td><td>1 &frasl; 131,072</td></tr>
60  *     <tr><td>1 &frasl; 64</td><td>1 &frasl; 65,536</td></tr>
61  *     <tr><td>1 &frasl; 32</td><td>1 &frasl; 32,768</td></tr>
62  *     <tr><td>1 &frasl; 16</td><td>1 &frasl; 16,384</td></tr>
63  *     <tr><td>1 &frasl; 8</td><td>1 &frasl; 8,192</td></tr>
64  *     <tr><td>1 &frasl; 4</td><td>1 &frasl; 4,096</td></tr>
65  *     <tr><td>1 &frasl; 2</td><td>1 &frasl; 2,048</td></tr>
66  *     <tr><td>1</td><td>1 &frasl; 1,024</td></tr>
67  *     <tr><td>2</td><td>1 &frasl; 512</td></tr>
68  *     <tr><td>4</td><td>1 &frasl; 256</td></tr>
69  *     <tr><td>8</td><td>1 &frasl; 128</td></tr>
70  *     <tr><td>16</td><td>1 &frasl; 64</td></tr>
71  *     <tr><td>32</td><td>1 &frasl; 32</td></tr>
72  *     <tr><td>64</td><td>1 &frasl; 16</td></tr>
73  *     <tr><td>128</td><td>1 &frasl; 8</td></tr>
74  *     <tr><td>256</td><td>1 &frasl; 4</td></tr>
75  *     <tr><td>512</td><td>1 &frasl; 2</td></tr>
76  *     <tr><td>1,024</td><td>1</td></tr>
77  *     <tr><td>2,048</td><td>2</td></tr>
78  *     <tr><td>4,096</td><td>4</td></tr>
79  *     <tr><td>8,192</td><td>8</td></tr>
80  *     <tr><td>16,384</td><td>16</td></tr>
81  *     <tr><td>32,768</td><td>32</td></tr>
82  * </table>
83  *
84  * <p>This table shows that numbers higher than 1024 lose all fractional precision.</p>
85  *
86  * @hide
87  */
88 
89 @libcore.api.CorePlatformApi
90 public class FP16 {
91     /**
92      * The number of bits used to represent a half-precision float value.
93      */
94     @libcore.api.CorePlatformApi
95     public static final int SIZE = 16;
96 
97     /**
98      * Epsilon is the difference between 1.0 and the next value representable
99      * by a half-precision floating-point.
100      */
101     @libcore.api.CorePlatformApi
102     public static final short EPSILON = (short) 0x1400;
103 
104     /**
105      * Maximum exponent a finite half-precision float may have.
106      */
107     @libcore.api.CorePlatformApi
108     public static final int MAX_EXPONENT = 15;
109     /**
110      * Minimum exponent a normalized half-precision float may have.
111      */
112     @libcore.api.CorePlatformApi
113     public static final int MIN_EXPONENT = -14;
114 
115     /**
116      * Smallest negative value a half-precision float may have.
117      */
118     @libcore.api.CorePlatformApi
119     public static final short LOWEST_VALUE = (short) 0xfbff;
120     /**
121      * Maximum positive finite value a half-precision float may have.
122      */
123     @libcore.api.CorePlatformApi
124     public static final short MAX_VALUE = (short) 0x7bff;
125     /**
126      * Smallest positive normal value a half-precision float may have.
127      */
128     @libcore.api.CorePlatformApi
129     public static final short MIN_NORMAL = (short) 0x0400;
130     /**
131      * Smallest positive non-zero value a half-precision float may have.
132      */
133     @libcore.api.CorePlatformApi
134     public static final short MIN_VALUE = (short) 0x0001;
135     /**
136      * A Not-a-Number representation of a half-precision float.
137      */
138     @libcore.api.CorePlatformApi
139     public static final short NaN = (short) 0x7e00;
140     /**
141      * Negative infinity of type half-precision float.
142      */
143     @libcore.api.CorePlatformApi
144     public static final short NEGATIVE_INFINITY = (short) 0xfc00;
145     /**
146      * Negative 0 of type half-precision float.
147      */
148     @libcore.api.CorePlatformApi
149     public static final short NEGATIVE_ZERO = (short) 0x8000;
150     /**
151      * Positive infinity of type half-precision float.
152      */
153     @libcore.api.CorePlatformApi
154     public static final short POSITIVE_INFINITY = (short) 0x7c00;
155     /**
156      * Positive 0 of type half-precision float.
157      */
158     @libcore.api.CorePlatformApi
159     public static final short POSITIVE_ZERO = (short) 0x0000;
160 
161     @libcore.api.CorePlatformApi
162     public static final int SIGN_SHIFT                = 15;
163     @libcore.api.CorePlatformApi
164     public static final int EXPONENT_SHIFT            = 10;
165     @libcore.api.CorePlatformApi
166     public static final int SIGN_MASK                 = 0x8000;
167     @libcore.api.CorePlatformApi
168     public static final int SHIFTED_EXPONENT_MASK     = 0x1f;
169     @libcore.api.CorePlatformApi
170     public static final int SIGNIFICAND_MASK          = 0x3ff;
171     @libcore.api.CorePlatformApi
172     public static final int EXPONENT_SIGNIFICAND_MASK = 0x7fff;
173     @libcore.api.CorePlatformApi
174     public static final int EXPONENT_BIAS             = 15;
175 
176     private static final int FP32_SIGN_SHIFT            = 31;
177     private static final int FP32_EXPONENT_SHIFT        = 23;
178     private static final int FP32_SHIFTED_EXPONENT_MASK = 0xff;
179     private static final int FP32_SIGNIFICAND_MASK      = 0x7fffff;
180     private static final int FP32_EXPONENT_BIAS         = 127;
181     private static final int FP32_QNAN_MASK             = 0x400000;
182     private static final int FP32_DENORMAL_MAGIC = 126 << 23;
183     private static final float FP32_DENORMAL_FLOAT = Float.intBitsToFloat(FP32_DENORMAL_MAGIC);
184 
185     /** Hidden constructor to prevent instantiation. */
FP16()186     private FP16() {}
187 
188     /**
189      * <p>Compares the two specified half-precision float values. The following
190      * conditions apply during the comparison:</p>
191      *
192      * <ul>
193      * <li>{@link #NaN} is considered by this method to be equal to itself and greater
194      * than all other half-precision float values (including {@code #POSITIVE_INFINITY})</li>
195      * <li>{@link #POSITIVE_ZERO} is considered by this method to be greater than
196      * {@link #NEGATIVE_ZERO}.</li>
197      * </ul>
198      *
199      * @param x The first half-precision float value to compare.
200      * @param y The second half-precision float value to compare
201      *
202      * @return  The value {@code 0} if {@code x} is numerically equal to {@code y}, a
203      *          value less than {@code 0} if {@code x} is numerically less than {@code y},
204      *          and a value greater than {@code 0} if {@code x} is numerically greater
205      *          than {@code y}
206      */
207     @libcore.api.CorePlatformApi
compare(short x, short y)208     public static int compare(short x, short y) {
209         if (less(x, y)) return -1;
210         if (greater(x, y)) return 1;
211 
212         // Collapse NaNs, akin to halfToIntBits(), but we want to keep
213         // (signed) short value types to preserve the ordering of -0.0
214         // and +0.0
215         short xBits = isNaN(x) ? NaN : x;
216         short yBits = isNaN(y) ? NaN : y;
217 
218         return (xBits == yBits ? 0 : (xBits < yBits ? -1 : 1));
219     }
220 
221     /**
222      * Returns the closest integral half-precision float value to the specified
223      * half-precision float value. Special values are handled in the
224      * following ways:
225      * <ul>
226      * <li>If the specified half-precision float is NaN, the result is NaN</li>
227      * <li>If the specified half-precision float is infinity (negative or positive),
228      * the result is infinity (with the same sign)</li>
229      * <li>If the specified half-precision float is zero (negative or positive),
230      * the result is zero (with the same sign)</li>
231      * </ul>
232      *
233      * @param h A half-precision float value
234      * @return The value of the specified half-precision float rounded to the nearest
235      *         half-precision float value
236      */
237     @libcore.api.CorePlatformApi
rint(short h)238     public static short rint(short h) {
239         int bits = h & 0xffff;
240         int abs = bits & EXPONENT_SIGNIFICAND_MASK;
241         int result = bits;
242 
243         if (abs < 0x3c00) {
244             result &= SIGN_MASK;
245             if (abs > 0x3800){
246                 result |= 0x3c00;
247             }
248         } else if (abs < 0x6400) {
249             int exp = 25 - (abs >> 10);
250             int mask = (1 << exp) - 1;
251             result += ((1 << (exp - 1)) - (~(abs >> exp) & 1));
252             result &= ~mask;
253         }
254         if (isNaN((short) result)) {
255             // if result is NaN mask with qNaN
256             // (i.e. mask the most significant mantissa bit with 1)
257             // to comply with hardware implementations (ARM64, Intel, etc).
258             result |= NaN;
259         }
260 
261         return (short) result;
262     }
263 
264     /**
265      * Returns the smallest half-precision float value toward negative infinity
266      * greater than or equal to the specified half-precision float value.
267      * Special values are handled in the following ways:
268      * <ul>
269      * <li>If the specified half-precision float is NaN, the result is NaN</li>
270      * <li>If the specified half-precision float is infinity (negative or positive),
271      * the result is infinity (with the same sign)</li>
272      * <li>If the specified half-precision float is zero (negative or positive),
273      * the result is zero (with the same sign)</li>
274      * </ul>
275      *
276      * @param h A half-precision float value
277      * @return The smallest half-precision float value toward negative infinity
278      *         greater than or equal to the specified half-precision float value
279      */
280     @libcore.api.CorePlatformApi
ceil(short h)281     public static short ceil(short h) {
282         int bits = h & 0xffff;
283         int abs = bits & EXPONENT_SIGNIFICAND_MASK;
284         int result = bits;
285 
286         if (abs < 0x3c00) {
287             result &= SIGN_MASK;
288             result |= 0x3c00 & -(~(bits >> 15) & (abs != 0 ? 1 : 0));
289         } else if (abs < 0x6400) {
290             abs = 25 - (abs >> 10);
291             int mask = (1 << abs) - 1;
292             result += mask & ((bits >> 15) - 1);
293             result &= ~mask;
294         }
295         if (isNaN((short) result)) {
296             // if result is NaN mask with qNaN
297             // (i.e. mask the most significant mantissa bit with 1)
298             // to comply with hardware implementations (ARM64, Intel, etc).
299             result |= NaN;
300         }
301 
302         return (short) result;
303     }
304 
305     /**
306      * Returns the largest half-precision float value toward positive infinity
307      * less than or equal to the specified half-precision float value.
308      * Special values are handled in the following ways:
309      * <ul>
310      * <li>If the specified half-precision float is NaN, the result is NaN</li>
311      * <li>If the specified half-precision float is infinity (negative or positive),
312      * the result is infinity (with the same sign)</li>
313      * <li>If the specified half-precision float is zero (negative or positive),
314      * the result is zero (with the same sign)</li>
315      * </ul>
316      *
317      * @param h A half-precision float value
318      * @return The largest half-precision float value toward positive infinity
319      *         less than or equal to the specified half-precision float value
320      */
321     @libcore.api.CorePlatformApi
floor(short h)322     public static short floor(short h) {
323         int bits = h & 0xffff;
324         int abs = bits & EXPONENT_SIGNIFICAND_MASK;
325         int result = bits;
326 
327         if (abs < 0x3c00) {
328             result &= SIGN_MASK;
329             result |= 0x3c00 & (bits > 0x8000 ? 0xffff : 0x0);
330         } else if (abs < 0x6400) {
331             abs = 25 - (abs >> 10);
332             int mask = (1 << abs) - 1;
333             result += mask & -(bits >> 15);
334             result &= ~mask;
335         }
336         if (isNaN((short) result)) {
337             // if result is NaN mask with qNaN
338             // i.e. (Mask the most significant mantissa bit with 1)
339             result |= NaN;
340         }
341 
342         return (short) result;
343     }
344 
345     /**
346      * Returns the truncated half-precision float value of the specified
347      * half-precision float value. Special values are handled in the following ways:
348      * <ul>
349      * <li>If the specified half-precision float is NaN, the result is NaN</li>
350      * <li>If the specified half-precision float is infinity (negative or positive),
351      * the result is infinity (with the same sign)</li>
352      * <li>If the specified half-precision float is zero (negative or positive),
353      * the result is zero (with the same sign)</li>
354      * </ul>
355      *
356      * @param h A half-precision float value
357      * @return The truncated half-precision float value of the specified
358      *         half-precision float value
359      */
360     @libcore.api.CorePlatformApi
trunc(short h)361     public static short trunc(short h) {
362         int bits = h & 0xffff;
363         int abs = bits & EXPONENT_SIGNIFICAND_MASK;
364         int result = bits;
365 
366         if (abs < 0x3c00) {
367             result &= SIGN_MASK;
368         } else if (abs < 0x6400) {
369             abs = 25 - (abs >> 10);
370             int mask = (1 << abs) - 1;
371             result &= ~mask;
372         }
373 
374         return (short) result;
375     }
376 
377     /**
378      * Returns the smaller of two half-precision float values (the value closest
379      * to negative infinity). Special values are handled in the following ways:
380      * <ul>
381      * <li>If either value is NaN, the result is NaN</li>
382      * <li>{@link #NEGATIVE_ZERO} is smaller than {@link #POSITIVE_ZERO}</li>
383      * </ul>
384      *
385      * @param x The first half-precision value
386      * @param y The second half-precision value
387      * @return The smaller of the two specified half-precision values
388      */
389     @libcore.api.CorePlatformApi
min(short x, short y)390     public static short min(short x, short y) {
391         if (isNaN(x)) return NaN;
392         if (isNaN(y)) return NaN;
393 
394         if ((x & EXPONENT_SIGNIFICAND_MASK) == 0 && (y & EXPONENT_SIGNIFICAND_MASK) == 0) {
395             return (x & SIGN_MASK) != 0 ? x : y;
396         }
397 
398         return ((x & SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) <
399                ((y & SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff) ? x : y;
400     }
401 
402     /**
403      * Returns the larger of two half-precision float values (the value closest
404      * to positive infinity). Special values are handled in the following ways:
405      * <ul>
406      * <li>If either value is NaN, the result is NaN</li>
407      * <li>{@link #POSITIVE_ZERO} is greater than {@link #NEGATIVE_ZERO}</li>
408      * </ul>
409      *
410      * @param x The first half-precision value
411      * @param y The second half-precision value
412      *
413      * @return The larger of the two specified half-precision values
414      */
415     @libcore.api.CorePlatformApi
max(short x, short y)416     public static short max(short x, short y) {
417         if (isNaN(x)) return NaN;
418         if (isNaN(y)) return NaN;
419 
420         if ((x & EXPONENT_SIGNIFICAND_MASK) == 0 && (y & EXPONENT_SIGNIFICAND_MASK) == 0) {
421             return (x & SIGN_MASK) != 0 ? y : x;
422         }
423 
424         return ((x & SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) >
425                ((y & SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff) ? x : y;
426     }
427 
428     /**
429      * Returns true if the first half-precision float value is less (smaller
430      * toward negative infinity) than the second half-precision float value.
431      * If either of the values is NaN, the result is false.
432      *
433      * @param x The first half-precision value
434      * @param y The second half-precision value
435      *
436      * @return True if x is less than y, false otherwise
437      */
438     @libcore.api.CorePlatformApi
less(short x, short y)439     public static boolean less(short x, short y) {
440         if (isNaN(x)) return false;
441         if (isNaN(y)) return false;
442 
443         return ((x & SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) <
444                ((y & SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff);
445     }
446 
447     /**
448      * Returns true if the first half-precision float value is less (smaller
449      * toward negative infinity) than or equal to the second half-precision
450      * float value. If either of the values is NaN, the result is false.
451      *
452      * @param x The first half-precision value
453      * @param y The second half-precision value
454      *
455      * @return True if x is less than or equal to y, false otherwise
456      */
457     @libcore.api.CorePlatformApi
lessEquals(short x, short y)458     public static boolean lessEquals(short x, short y) {
459         if (isNaN(x)) return false;
460         if (isNaN(y)) return false;
461 
462         return ((x & SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) <=
463                ((y & SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff);
464     }
465 
466     /**
467      * Returns true if the first half-precision float value is greater (larger
468      * toward positive infinity) than the second half-precision float value.
469      * If either of the values is NaN, the result is false.
470      *
471      * @param x The first half-precision value
472      * @param y The second half-precision value
473      *
474      * @return True if x is greater than y, false otherwise
475      */
476     @libcore.api.CorePlatformApi
greater(short x, short y)477     public static boolean greater(short x, short y) {
478         if (isNaN(x)) return false;
479         if (isNaN(y)) return false;
480 
481         return ((x & SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) >
482                ((y & SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff);
483     }
484 
485     /**
486      * Returns true if the first half-precision float value is greater (larger
487      * toward positive infinity) than or equal to the second half-precision float
488      * value. If either of the values is NaN, the result is false.
489      *
490      * @param x The first half-precision value
491      * @param y The second half-precision value
492      *
493      * @return True if x is greater than y, false otherwise
494      */
495     @libcore.api.CorePlatformApi
greaterEquals(short x, short y)496     public static boolean greaterEquals(short x, short y) {
497         if (isNaN(x)) return false;
498         if (isNaN(y)) return false;
499 
500         return ((x & SIGN_MASK) != 0 ? 0x8000 - (x & 0xffff) : x & 0xffff) >=
501                ((y & SIGN_MASK) != 0 ? 0x8000 - (y & 0xffff) : y & 0xffff);
502     }
503 
504     /**
505      * Returns true if the two half-precision float values are equal.
506      * If either of the values is NaN, the result is false. {@link #POSITIVE_ZERO}
507      * and {@link #NEGATIVE_ZERO} are considered equal.
508      *
509      * @param x The first half-precision value
510      * @param y The second half-precision value
511      *
512      * @return True if x is equal to y, false otherwise
513      */
514     @libcore.api.CorePlatformApi
equals(short x, short y)515     public static boolean equals(short x, short y) {
516         if (isNaN(x)) return false;
517         if (isNaN(y)) return false;
518 
519         return x == y || ((x | y) & EXPONENT_SIGNIFICAND_MASK) == 0;
520     }
521 
522     /**
523      * Returns true if the specified half-precision float value represents
524      * infinity, false otherwise.
525      *
526      * @param h A half-precision float value
527      * @return True if the value is positive infinity or negative infinity,
528      *         false otherwise
529      */
530     @libcore.api.CorePlatformApi
isInfinite(short h)531     public static boolean isInfinite(short h) {
532         return (h & EXPONENT_SIGNIFICAND_MASK) == POSITIVE_INFINITY;
533     }
534 
535     /**
536      * Returns true if the specified half-precision float value represents
537      * a Not-a-Number, false otherwise.
538      *
539      * @param h A half-precision float value
540      * @return True if the value is a NaN, false otherwise
541      */
542     @libcore.api.CorePlatformApi
isNaN(short h)543     public static boolean isNaN(short h) {
544         return (h & EXPONENT_SIGNIFICAND_MASK) > POSITIVE_INFINITY;
545     }
546 
547     /**
548      * Returns true if the specified half-precision float value is normalized
549      * (does not have a subnormal representation). If the specified value is
550      * {@link #POSITIVE_INFINITY}, {@link #NEGATIVE_INFINITY},
551      * {@link #POSITIVE_ZERO}, {@link #NEGATIVE_ZERO}, NaN or any subnormal
552      * number, this method returns false.
553      *
554      * @param h A half-precision float value
555      * @return True if the value is normalized, false otherwise
556      */
557     @libcore.api.CorePlatformApi
isNormalized(short h)558     public static boolean isNormalized(short h) {
559         return (h & POSITIVE_INFINITY) != 0 && (h & POSITIVE_INFINITY) != POSITIVE_INFINITY;
560     }
561 
562     /**
563      * <p>Converts the specified half-precision float value into a
564      * single-precision float value. The following special cases are handled:</p>
565      * <ul>
566      * <li>If the input is {@link #NaN}, the returned value is {@link Float#NaN}</li>
567      * <li>If the input is {@link #POSITIVE_INFINITY} or
568      * {@link #NEGATIVE_INFINITY}, the returned value is respectively
569      * {@link Float#POSITIVE_INFINITY} or {@link Float#NEGATIVE_INFINITY}</li>
570      * <li>If the input is 0 (positive or negative), the returned value is +/-0.0f</li>
571      * <li>Otherwise, the returned value is a normalized single-precision float value</li>
572      * </ul>
573      *
574      * @param h The half-precision float value to convert to single-precision
575      * @return A normalized single-precision float value
576      */
577     @libcore.api.CorePlatformApi
toFloat(short h)578     public static float toFloat(short h) {
579         int bits = h & 0xffff;
580         int s = bits & SIGN_MASK;
581         int e = (bits >>> EXPONENT_SHIFT) & SHIFTED_EXPONENT_MASK;
582         int m = (bits                        ) & SIGNIFICAND_MASK;
583 
584         int outE = 0;
585         int outM = 0;
586 
587         if (e == 0) { // Denormal or 0
588             if (m != 0) {
589                 // Convert denorm fp16 into normalized fp32
590                 float o = Float.intBitsToFloat(FP32_DENORMAL_MAGIC + m);
591                 o -= FP32_DENORMAL_FLOAT;
592                 return s == 0 ? o : -o;
593             }
594         } else {
595             outM = m << 13;
596             if (e == 0x1f) { // Infinite or NaN
597                 outE = 0xff;
598                 if (outM != 0) { // SNaNs are quieted
599                     outM |= FP32_QNAN_MASK;
600                 }
601             } else {
602                 outE = e - EXPONENT_BIAS + FP32_EXPONENT_BIAS;
603             }
604         }
605 
606         int out = (s << 16) | (outE << FP32_EXPONENT_SHIFT) | outM;
607         return Float.intBitsToFloat(out);
608     }
609 
610     /**
611      * <p>Converts the specified single-precision float value into a
612      * half-precision float value. The following special cases are handled:</p>
613      * <ul>
614      * <li>If the input is NaN (see {@link Float#isNaN(float)}), the returned
615      * value is {@link #NaN}</li>
616      * <li>If the input is {@link Float#POSITIVE_INFINITY} or
617      * {@link Float#NEGATIVE_INFINITY}, the returned value is respectively
618      * {@link #POSITIVE_INFINITY} or {@link #NEGATIVE_INFINITY}</li>
619      * <li>If the input is 0 (positive or negative), the returned value is
620      * {@link #POSITIVE_ZERO} or {@link #NEGATIVE_ZERO}</li>
621      * <li>If the input is a less than {@link #MIN_VALUE}, the returned value
622      * is flushed to {@link #POSITIVE_ZERO} or {@link #NEGATIVE_ZERO}</li>
623      * <li>If the input is a less than {@link #MIN_NORMAL}, the returned value
624      * is a denorm half-precision float</li>
625      * <li>Otherwise, the returned value is rounded to the nearest
626      * representable half-precision float value</li>
627      * </ul>
628      *
629      * @param f The single-precision float value to convert to half-precision
630      * @return A half-precision float value
631      */
632     @libcore.api.CorePlatformApi
toHalf(float f)633     public static short toHalf(float f) {
634         int bits = Float.floatToRawIntBits(f);
635         int s = (bits >>> FP32_SIGN_SHIFT    );
636         int e = (bits >>> FP32_EXPONENT_SHIFT) & FP32_SHIFTED_EXPONENT_MASK;
637         int m = (bits                        ) & FP32_SIGNIFICAND_MASK;
638 
639         int outE = 0;
640         int outM = 0;
641 
642         if (e == 0xff) { // Infinite or NaN
643             outE = 0x1f;
644             outM = m != 0 ? 0x200 : 0;
645         } else {
646             e = e - FP32_EXPONENT_BIAS + EXPONENT_BIAS;
647             if (e >= 0x1f) { // Overflow
648                 outE = 0x1f;
649             } else if (e <= 0) { // Underflow
650                 if (e < -10) {
651                     // The absolute fp32 value is less than MIN_VALUE, flush to +/-0
652                 } else {
653                     // The fp32 value is a normalized float less than MIN_NORMAL,
654                     // we convert to a denorm fp16
655                     m = m | 0x800000;
656                     int shift = 14 - e;
657                     outM = m >> shift;
658 
659                     int lowm = m & ((1 << shift) - 1);
660                     int hway = 1 << (shift - 1);
661                     // if above halfway or exactly halfway and outM is odd
662                     if (lowm + (outM & 1) > hway){
663                         // Round to nearest even
664                         // Can overflow into exponent bit, which surprisingly is OK.
665                         // This increment relies on the +outM in the return statement below
666                         outM++;
667                     }
668                 }
669             } else {
670                 outE = e;
671                 outM = m >> 13;
672                 // if above halfway or exactly halfway and outM is odd
673                 if ((m & 0x1fff) + (outM & 0x1) > 0x1000) {
674                     // Round to nearest even
675                     // Can overflow into exponent bit, which surprisingly is OK.
676                     // This increment relies on the +outM in the return statement below
677                     outM++;
678                 }
679             }
680         }
681         // The outM is added here as the +1 increments for outM above can
682         // cause an overflow in the exponent bit which is OK.
683         return (short) ((s << SIGN_SHIFT) | (outE << EXPONENT_SHIFT) + outM);
684     }
685 
686     /**
687      * <p>Returns a hexadecimal string representation of the specified half-precision
688      * float value. If the value is a NaN, the result is <code>"NaN"</code>,
689      * otherwise the result follows this format:</p>
690      * <ul>
691      * <li>If the sign is positive, no sign character appears in the result</li>
692      * <li>If the sign is negative, the first character is <code>'-'</code></li>
693      * <li>If the value is inifinity, the string is <code>"Infinity"</code></li>
694      * <li>If the value is 0, the string is <code>"0x0.0p0"</code></li>
695      * <li>If the value has a normalized representation, the exponent and
696      * significand are represented in the string in two fields. The significand
697      * starts with <code>"0x1."</code> followed by its lowercase hexadecimal
698      * representation. Trailing zeroes are removed unless all digits are 0, then
699      * a single zero is used. The significand representation is followed by the
700      * exponent, represented by <code>"p"</code>, itself followed by a decimal
701      * string of the unbiased exponent</li>
702      * <li>If the value has a subnormal representation, the significand starts
703      * with <code>"0x0."</code> followed by its lowercase hexadecimal
704      * representation. Trailing zeroes are removed unless all digits are 0, then
705      * a single zero is used. The significand representation is followed by the
706      * exponent, represented by <code>"p-14"</code></li>
707      * </ul>
708      *
709      * @param h A half-precision float value
710      * @return A hexadecimal string representation of the specified value
711      */
712     @libcore.api.CorePlatformApi
toHexString(short h)713     public static String toHexString(short h) {
714         StringBuilder o = new StringBuilder();
715 
716         int bits = h & 0xffff;
717         int s = (bits >>> SIGN_SHIFT    );
718         int e = (bits >>> EXPONENT_SHIFT) & SHIFTED_EXPONENT_MASK;
719         int m = (bits                   ) & SIGNIFICAND_MASK;
720 
721         if (e == 0x1f) { // Infinite or NaN
722             if (m == 0) {
723                 if (s != 0) o.append('-');
724                 o.append("Infinity");
725             } else {
726                 o.append("NaN");
727             }
728         } else {
729             if (s == 1) o.append('-');
730             if (e == 0) {
731                 if (m == 0) {
732                     o.append("0x0.0p0");
733                 } else {
734                     o.append("0x0.");
735                     String significand = Integer.toHexString(m);
736                     o.append(significand.replaceFirst("0{2,}$", ""));
737                     o.append("p-14");
738                 }
739             } else {
740                 o.append("0x1.");
741                 String significand = Integer.toHexString(m);
742                 o.append(significand.replaceFirst("0{2,}$", ""));
743                 o.append('p');
744                 o.append(Integer.toString(e - EXPONENT_BIAS));
745             }
746         }
747 
748         return o.toString();
749     }
750 }
751