1 /*------------------------------------------------------------------------
2 / OCB Version 3 Reference Code (Optimized C)     Last modified 12-JUN-2013
3 /-------------------------------------------------------------------------
4 / Copyright (c) 2013 Ted Krovetz.
5 /
6 / Permission to use, copy, modify, and/or distribute this software for any
7 / purpose with or without fee is hereby granted, provided that the above
8 / copyright notice and this permission notice appear in all copies.
9 /
10 / THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 / WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 / MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 / ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 / WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 / ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 / OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 /
18 / Phillip Rogaway holds patents relevant to OCB. See the following for
19 / his patent grant: http://www.cs.ucdavis.edu/~rogaway/ocb/grant.htm
20 /
21 / Special thanks to Keegan McAllister for suggesting several good improvements
22 /
23 / Comments are welcome: Ted Krovetz <ted@krovetz.net> - Dedicated to Laurel K
24 /------------------------------------------------------------------------- */
25 
26 /* ----------------------------------------------------------------------- */
27 /* Usage notes                                                             */
28 /* ----------------------------------------------------------------------- */
29 
30 /* - When AE_PENDING is passed as the 'final' parameter of any function,
31 /    the length parameters must be a multiple of (BPI*16).
32 /  - When available, SSE or AltiVec registers are used to manipulate data.
33 /    So, when on machines with these facilities, all pointers passed to
34 /    any function should be 16-byte aligned.
35 /  - Plaintext and ciphertext pointers may be equal (ie, plaintext gets
36 /    encrypted in-place), but no other pair of pointers may be equal.
37 /  - This code assumes all x86 processors have SSE2 and SSSE3 instructions
38 /    when compiling under MSVC. If untrue, alter the #define.
39 /  - This code is tested for C99 and recent versions of GCC and MSVC.      */
40 
41 /* ----------------------------------------------------------------------- */
42 /* User configuration options                                              */
43 /* ----------------------------------------------------------------------- */
44 
45 /* Set the AES key length to use and length of authentication tag to produce.
46 /  Setting either to 0 requires the value be set at runtime via ae_init().
47 /  Some optimizations occur for each when set to a fixed value.            */
48 #define OCB_KEY_LEN 16 /* 0, 16, 24 or 32. 0 means set in ae_init */
49 #define OCB_TAG_LEN 16 /* 0 to 16. 0 means set in ae_init         */
50 
51 /* This implementation has built-in support for multiple AES APIs. Set any
52 /  one of the following to non-zero to specify which to use.               */
53 #define USE_OPENSSL_AES 1   /* http://openssl.org                      */
54 #define USE_REFERENCE_AES 0 /* Internet search: rijndael-alg-fst.c     */
55 #define USE_AES_NI 0        /* Uses compiler's intrinsics              */
56 
57 /* During encryption and decryption, various "L values" are required.
58 /  The L values can be precomputed during initialization (requiring extra
59 /  space in ae_ctx), generated as needed (slightly slowing encryption and
60 /  decryption), or some combination of the two. L_TABLE_SZ specifies how many
61 /  L values to precompute. L_TABLE_SZ must be at least 3. L_TABLE_SZ*16 bytes
62 /  are used for L values in ae_ctx. Plaintext and ciphertexts shorter than
63 /  2^L_TABLE_SZ blocks need no L values calculated dynamically.            */
64 #define L_TABLE_SZ 16
65 
66 /* Set L_TABLE_SZ_IS_ENOUGH non-zero iff you know that all plaintexts
67 /  will be shorter than 2^(L_TABLE_SZ+4) bytes in length. This results
68 /  in better performance.                                                  */
69 #define L_TABLE_SZ_IS_ENOUGH 1
70 
71 /* ----------------------------------------------------------------------- */
72 /* Includes and compiler specific definitions                              */
73 /* ----------------------------------------------------------------------- */
74 
75 #include <keymaster/key_blob_utils/ae.h>
76 #include <malloc.h>
77 #include <stdlib.h>
78 #include <string.h>
79 
80 /* Define standard sized integers                                          */
81 #if defined(_MSC_VER) && (_MSC_VER < 1600)
82 typedef unsigned __int8 uint8_t;
83 typedef unsigned __int32 uint32_t;
84 typedef unsigned __int64 uint64_t;
85 typedef __int64 int64_t;
86 #else
87 #include <stdint.h>
88 #endif
89 
90 /* Compiler-specific intrinsics and fixes: bswap64, ntz                    */
91 #if _MSC_VER
92 #define inline __inline                           /* MSVC doesn't recognize "inline" in C */
93 #define restrict __restrict                       /* MSVC doesn't recognize "restrict" in C */
94 #define __SSE2__ (_M_IX86 || _M_AMD64 || _M_X64)  /* Assume SSE2  */
95 #define __SSSE3__ (_M_IX86 || _M_AMD64 || _M_X64) /* Assume SSSE3 */
96 #include <intrin.h>
97 #pragma intrinsic(_byteswap_uint64, _BitScanForward, memcpy)
98 #define bswap64(x) _byteswap_uint64(x)
ntz(unsigned x)99 static inline unsigned ntz(unsigned x) {
100     _BitScanForward(&x, x);
101     return x;
102 }
103 #elif __GNUC__
104 #define inline __inline__                   /* No "inline" in GCC ansi C mode */
105 #define restrict __restrict__               /* No "restrict" in GCC ansi C mode */
106 #define bswap64(x) __builtin_bswap64(x)     /* Assuming GCC 4.3+ */
107 #define ntz(x) __builtin_ctz((unsigned)(x)) /* Assuming GCC 3.4+ */
108 #else /* Assume some C99 features: stdint.h, inline, restrict */
109 #define bswap32(x)                                                                                 \
110     ((((x)&0xff000000u) >> 24) | (((x)&0x00ff0000u) >> 8) | (((x)&0x0000ff00u) << 8) |             \
111      (((x)&0x000000ffu) << 24))
112 
bswap64(uint64_t x)113 static inline uint64_t bswap64(uint64_t x) {
114     union {
115         uint64_t u64;
116         uint32_t u32[2];
117     } in, out;
118     in.u64 = x;
119     out.u32[0] = bswap32(in.u32[1]);
120     out.u32[1] = bswap32(in.u32[0]);
121     return out.u64;
122 }
123 
124 #if (L_TABLE_SZ <= 9) && (L_TABLE_SZ_IS_ENOUGH) /* < 2^13 byte texts */
ntz(unsigned x)125 static inline unsigned ntz(unsigned x) {
126     static const unsigned char tz_table[] = {
127         0, 2, 3, 2, 4, 2, 3, 2, 5, 2, 3, 2, 4, 2, 3, 2, 6, 2, 3, 2, 4, 2, 3, 2, 5, 2,
128         3, 2, 4, 2, 3, 2, 7, 2, 3, 2, 4, 2, 3, 2, 5, 2, 3, 2, 4, 2, 3, 2, 6, 2, 3, 2,
129         4, 2, 3, 2, 5, 2, 3, 2, 4, 2, 3, 2, 8, 2, 3, 2, 4, 2, 3, 2, 5, 2, 3, 2, 4, 2,
130         3, 2, 6, 2, 3, 2, 4, 2, 3, 2, 5, 2, 3, 2, 4, 2, 3, 2, 7, 2, 3, 2, 4, 2, 3, 2,
131         5, 2, 3, 2, 4, 2, 3, 2, 6, 2, 3, 2, 4, 2, 3, 2, 5, 2, 3, 2, 4, 2, 3, 2};
132     return tz_table[x / 4];
133 }
134 #else                                           /* From http://supertech.csail.mit.edu/papers/debruijn.pdf */
ntz(unsigned x)135 static inline unsigned ntz(unsigned x) {
136     static const unsigned char tz_table[32] = {0,  1,  28, 2,  29, 14, 24, 3,  30, 22, 20,
137                                                15, 25, 17, 4,  8,  31, 27, 13, 23, 21, 19,
138                                                16, 7,  26, 12, 18, 6,  11, 5,  10, 9};
139     return tz_table[((uint32_t)((x & -x) * 0x077CB531u)) >> 27];
140 }
141 #endif
142 #endif
143 
144 /* ----------------------------------------------------------------------- */
145 /* Define blocks and operations -- Patch if incorrect on your compiler.    */
146 /* ----------------------------------------------------------------------- */
147 
148 #if __SSE2__ && !KEYMASTER_CLANG_TEST_BUILD
149 #include <xmmintrin.h> /* SSE instructions and _mm_malloc */
150 #include <emmintrin.h> /* SSE2 instructions               */
151 typedef __m128i block;
152 #define xor_block(x, y) _mm_xor_si128(x, y)
153 #define zero_block() _mm_setzero_si128()
154 #define unequal_blocks(x, y) (_mm_movemask_epi8(_mm_cmpeq_epi8(x, y)) != 0xffff)
155 #if __SSSE3__ || USE_AES_NI
156 #include <tmmintrin.h> /* SSSE3 instructions              */
157 #define swap_if_le(b)                                                                              \
158     _mm_shuffle_epi8(b, _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15))
159 #else
swap_if_le(block b)160 static inline block swap_if_le(block b) {
161     block a = _mm_shuffle_epi32(b, _MM_SHUFFLE(0, 1, 2, 3));
162     a = _mm_shufflehi_epi16(a, _MM_SHUFFLE(2, 3, 0, 1));
163     a = _mm_shufflelo_epi16(a, _MM_SHUFFLE(2, 3, 0, 1));
164     return _mm_xor_si128(_mm_srli_epi16(a, 8), _mm_slli_epi16(a, 8));
165 }
166 #endif
gen_offset(uint64_t KtopStr[3],unsigned bot)167 static inline block gen_offset(uint64_t KtopStr[3], unsigned bot) {
168     block hi = _mm_load_si128((__m128i*)(KtopStr + 0));  /* hi = B A */
169     block lo = _mm_loadu_si128((__m128i*)(KtopStr + 1)); /* lo = C B */
170     __m128i lshift = _mm_cvtsi32_si128(bot);
171     __m128i rshift = _mm_cvtsi32_si128(64 - bot);
172     lo = _mm_xor_si128(_mm_sll_epi64(hi, lshift), _mm_srl_epi64(lo, rshift));
173 #if __SSSE3__ || USE_AES_NI
174     return _mm_shuffle_epi8(lo, _mm_set_epi8(8, 9, 10, 11, 12, 13, 14, 15, 0, 1, 2, 3, 4, 5, 6, 7));
175 #else
176     return swap_if_le(_mm_shuffle_epi32(lo, _MM_SHUFFLE(1, 0, 3, 2)));
177 #endif
178 }
double_block(block bl)179 static inline block double_block(block bl) {
180     const __m128i mask = _mm_set_epi32(135, 1, 1, 1);
181     __m128i tmp = _mm_srai_epi32(bl, 31);
182     tmp = _mm_and_si128(tmp, mask);
183     tmp = _mm_shuffle_epi32(tmp, _MM_SHUFFLE(2, 1, 0, 3));
184     bl = _mm_slli_epi32(bl, 1);
185     return _mm_xor_si128(bl, tmp);
186 }
187 #elif __ALTIVEC__
188 #include <altivec.h>
189 typedef vector unsigned block;
190 #define xor_block(x, y) vec_xor(x, y)
191 #define zero_block() vec_splat_u32(0)
192 #define unequal_blocks(x, y) vec_any_ne(x, y)
193 #define swap_if_le(b) (b)
194 #if __PPC64__
gen_offset(uint64_t KtopStr[3],unsigned bot)195 block gen_offset(uint64_t KtopStr[3], unsigned bot) {
196     union {
197         uint64_t u64[2];
198         block bl;
199     } rval;
200     rval.u64[0] = (KtopStr[0] << bot) | (KtopStr[1] >> (64 - bot));
201     rval.u64[1] = (KtopStr[1] << bot) | (KtopStr[2] >> (64 - bot));
202     return rval.bl;
203 }
204 #else
205 /* Special handling: Shifts are mod 32, and no 64-bit types */
gen_offset(uint64_t KtopStr[3],unsigned bot)206 block gen_offset(uint64_t KtopStr[3], unsigned bot) {
207     const vector unsigned k32 = {32, 32, 32, 32};
208     vector unsigned hi = *(vector unsigned*)(KtopStr + 0);
209     vector unsigned lo = *(vector unsigned*)(KtopStr + 2);
210     vector unsigned bot_vec;
211     if (bot < 32) {
212         lo = vec_sld(hi, lo, 4);
213     } else {
214         vector unsigned t = vec_sld(hi, lo, 4);
215         lo = vec_sld(hi, lo, 8);
216         hi = t;
217         bot = bot - 32;
218     }
219     if (bot == 0)
220         return hi;
221     *(unsigned*)&bot_vec = bot;
222     vector unsigned lshift = vec_splat(bot_vec, 0);
223     vector unsigned rshift = vec_sub(k32, lshift);
224     hi = vec_sl(hi, lshift);
225     lo = vec_sr(lo, rshift);
226     return vec_xor(hi, lo);
227 }
228 #endif
double_block(block b)229 static inline block double_block(block b) {
230     const vector unsigned char mask = {135, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
231     const vector unsigned char perm = {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0};
232     const vector unsigned char shift7 = vec_splat_u8(7);
233     const vector unsigned char shift1 = vec_splat_u8(1);
234     vector unsigned char c = (vector unsigned char)b;
235     vector unsigned char t = vec_sra(c, shift7);
236     t = vec_and(t, mask);
237     t = vec_perm(t, t, perm);
238     c = vec_sl(c, shift1);
239     return (block)vec_xor(c, t);
240 }
241 #elif __ARM_NEON__
242 #include <arm_neon.h>
243 typedef int8x16_t block __attribute__ ((aligned (16))); /* Yay! Endian-neutral reads! */
244 #define xor_block(x, y) veorq_s8(x, y)
245 #define zero_block() vdupq_n_s8(0)
unequal_blocks(block a,block b)246 static inline int unequal_blocks(block a, block b) {
247     int64x2_t t = veorq_s64((int64x2_t)a, (int64x2_t)b);
248     return (vgetq_lane_s64(t, 0) | vgetq_lane_s64(t, 1)) != 0;
249 }
250 #define swap_if_le(b) (b) /* Using endian-neutral int8x16_t */
251 /* KtopStr is reg correct by 64 bits, return mem correct */
gen_offset(uint64_t KtopStr[3],unsigned bot)252 block gen_offset(uint64_t KtopStr[3], unsigned bot) {
253     const union {
254         unsigned x;
255         unsigned char endian;
256     } little = {1};
257     const int64x2_t k64 = {-64, -64};
258     /* Copy hi and lo into local variables to ensure proper alignment */
259     uint64x2_t hi = vld1q_u64(KtopStr + 0); /* hi = A B */
260     uint64x2_t lo = vld1q_u64(KtopStr + 1); /* lo = B C */
261     int64x2_t ls = vdupq_n_s64(bot);
262     int64x2_t rs = vqaddq_s64(k64, ls);
263     block rval = (block)veorq_u64(vshlq_u64(hi, ls), vshlq_u64(lo, rs));
264     if (little.endian)
265         rval = vrev64q_s8(rval);
266     return rval;
267 }
double_block(block b)268 static inline block double_block(block b) {
269     const block mask = {135, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
270     block tmp = vshrq_n_s8(b, 7);
271     tmp = vandq_s8(tmp, mask);
272     tmp = vextq_s8(tmp, tmp, 1); /* Rotate high byte to end */
273     b = vshlq_n_s8(b, 1);
274     return veorq_s8(tmp, b);
275 }
276 #else
277 typedef struct { uint64_t l, r; } block;
xor_block(block x,block y)278 static inline block xor_block(block x, block y) {
279     x.l ^= y.l;
280     x.r ^= y.r;
281     return x;
282 }
zero_block(void)283 static inline block zero_block(void) {
284     const block t = {0, 0};
285     return t;
286 }
287 #define unequal_blocks(x, y) ((((x).l ^ (y).l) | ((x).r ^ (y).r)) != 0)
swap_if_le(block b)288 static inline block swap_if_le(block b) {
289     const union {
290         unsigned x;
291         unsigned char endian;
292     } little = {1};
293     if (little.endian) {
294         block r;
295         r.l = bswap64(b.l);
296         r.r = bswap64(b.r);
297         return r;
298     } else
299         return b;
300 }
301 
302 /* KtopStr is reg correct by 64 bits, return mem correct */
gen_offset(uint64_t KtopStr[3],unsigned bot)303 block gen_offset(uint64_t KtopStr[3], unsigned bot) {
304     block rval;
305     if (bot != 0) {
306         rval.l = (KtopStr[0] << bot) | (KtopStr[1] >> (64 - bot));
307         rval.r = (KtopStr[1] << bot) | (KtopStr[2] >> (64 - bot));
308     } else {
309         rval.l = KtopStr[0];
310         rval.r = KtopStr[1];
311     }
312     return swap_if_le(rval);
313 }
314 
315 #if __GNUC__ && __arm__
double_block(block b)316 static inline block double_block(block b) {
317     __asm__("adds %1,%1,%1\n\t"
318             "adcs %H1,%H1,%H1\n\t"
319             "adcs %0,%0,%0\n\t"
320             "adcs %H0,%H0,%H0\n\t"
321             "it cs\n\t"
322             "eorcs %1,%1,#135"
323             : "+r"(b.l), "+r"(b.r)
324             :
325             : "cc");
326     return b;
327 }
328 #else
double_block(block b)329 static inline block double_block(block b) {
330     uint64_t t = (uint64_t)((int64_t)b.l >> 63);
331     b.l = (b.l + b.l) ^ (b.r >> 63);
332     b.r = (b.r + b.r) ^ (t & 135);
333     return b;
334 }
335 #endif
336 
337 #endif
338 
339 #ifndef __has_attribute
340 #define __has_attribute(x) 0
341 #endif
342 
343 #if __has_attribute(fallthrough)
344 #define __fallthrough __attribute__((__fallthrough__));
345 #else
346 #define __fallthrough
347 #endif
348 
349 /* ----------------------------------------------------------------------- */
350 /* AES - Code uses OpenSSL API. Other implementations get mapped to it.    */
351 /* ----------------------------------------------------------------------- */
352 
353 /*---------------*/
354 #if USE_OPENSSL_AES
355 /*---------------*/
356 
357 #include <openssl/aes.h> /* http://openssl.org/ */
358 
359 /* How to ECB encrypt an array of blocks, in place                         */
AES_ecb_encrypt_blks(block * blks,unsigned nblks,AES_KEY * key)360 static inline void AES_ecb_encrypt_blks(block* blks, unsigned nblks, AES_KEY* key) {
361     while (nblks) {
362         --nblks;
363         AES_encrypt((unsigned char*)(blks + nblks), (unsigned char*)(blks + nblks), key);
364     }
365 }
366 
AES_ecb_decrypt_blks(block * blks,unsigned nblks,AES_KEY * key)367 static inline void AES_ecb_decrypt_blks(block* blks, unsigned nblks, AES_KEY* key) {
368     while (nblks) {
369         --nblks;
370         AES_decrypt((unsigned char*)(blks + nblks), (unsigned char*)(blks + nblks), key);
371     }
372 }
373 
374 #define BPI 4 /* Number of blocks in buffer per ECB call */
375 
376 /*-------------------*/
377 #elif USE_REFERENCE_AES
378 /*-------------------*/
379 
380 #include "rijndael-alg-fst.h" /* Barreto's Public-Domain Code */
381 #if (OCB_KEY_LEN == 0)
382 typedef struct {
383     uint32_t rd_key[60];
384     int rounds;
385 } AES_KEY;
386 #define ROUNDS(ctx) ((ctx)->rounds)
387 #define AES_set_encrypt_key(x, y, z)                                                               \
388     do {                                                                                           \
389         rijndaelKeySetupEnc((z)->rd_key, x, y);                                                    \
390         (z)->rounds = y / 32 + 6;                                                                  \
391     } while (0)
392 #define AES_set_decrypt_key(x, y, z)                                                               \
393     do {                                                                                           \
394         rijndaelKeySetupDec((z)->rd_key, x, y);                                                    \
395         (z)->rounds = y / 32 + 6;                                                                  \
396     } while (0)
397 #else
398 typedef struct { uint32_t rd_key[OCB_KEY_LEN + 28]; } AES_KEY;
399 #define ROUNDS(ctx) (6 + OCB_KEY_LEN / 4)
400 #define AES_set_encrypt_key(x, y, z) rijndaelKeySetupEnc((z)->rd_key, x, y)
401 #define AES_set_decrypt_key(x, y, z) rijndaelKeySetupDec((z)->rd_key, x, y)
402 #endif
403 #define AES_encrypt(x, y, z) rijndaelEncrypt((z)->rd_key, ROUNDS(z), x, y)
404 #define AES_decrypt(x, y, z) rijndaelDecrypt((z)->rd_key, ROUNDS(z), x, y)
405 
AES_ecb_encrypt_blks(block * blks,unsigned nblks,AES_KEY * key)406 static void AES_ecb_encrypt_blks(block* blks, unsigned nblks, AES_KEY* key) {
407     while (nblks) {
408         --nblks;
409         AES_encrypt((unsigned char*)(blks + nblks), (unsigned char*)(blks + nblks), key);
410     }
411 }
412 
AES_ecb_decrypt_blks(block * blks,unsigned nblks,AES_KEY * key)413 void AES_ecb_decrypt_blks(block* blks, unsigned nblks, AES_KEY* key) {
414     while (nblks) {
415         --nblks;
416         AES_decrypt((unsigned char*)(blks + nblks), (unsigned char*)(blks + nblks), key);
417     }
418 }
419 
420 #define BPI 4 /* Number of blocks in buffer per ECB call */
421 
422 /*----------*/
423 #elif USE_AES_NI
424 /*----------*/
425 
426 #include <wmmintrin.h>
427 
428 #if (OCB_KEY_LEN == 0)
429 typedef struct {
430     __m128i rd_key[15];
431     int rounds;
432 } AES_KEY;
433 #define ROUNDS(ctx) ((ctx)->rounds)
434 #else
435 typedef struct { __m128i rd_key[7 + OCB_KEY_LEN / 4]; } AES_KEY;
436 #define ROUNDS(ctx) (6 + OCB_KEY_LEN / 4)
437 #endif
438 
439 #define EXPAND_ASSIST(v1, v2, v3, v4, shuff_const, aes_const)                                      \
440     v2 = _mm_aeskeygenassist_si128(v4, aes_const);                                                 \
441     v3 = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(v3), _mm_castsi128_ps(v1), 16));         \
442     v1 = _mm_xor_si128(v1, v3);                                                                    \
443     v3 = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(v3), _mm_castsi128_ps(v1), 140));        \
444     v1 = _mm_xor_si128(v1, v3);                                                                    \
445     v2 = _mm_shuffle_epi32(v2, shuff_const);                                                       \
446     v1 = _mm_xor_si128(v1, v2)
447 
448 #define EXPAND192_STEP(idx, aes_const)                                                             \
449     EXPAND_ASSIST(x0, x1, x2, x3, 85, aes_const);                                                  \
450     x3 = _mm_xor_si128(x3, _mm_slli_si128(x3, 4));                                                 \
451     x3 = _mm_xor_si128(x3, _mm_shuffle_epi32(x0, 255));                                            \
452     kp[idx] = _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(tmp), _mm_castsi128_ps(x0), 68));   \
453     kp[idx + 1] =                                                                                  \
454         _mm_castps_si128(_mm_shuffle_ps(_mm_castsi128_ps(x0), _mm_castsi128_ps(x3), 78));          \
455     EXPAND_ASSIST(x0, x1, x2, x3, 85, (aes_const * 2));                                            \
456     x3 = _mm_xor_si128(x3, _mm_slli_si128(x3, 4));                                                 \
457     x3 = _mm_xor_si128(x3, _mm_shuffle_epi32(x0, 255));                                            \
458     kp[idx + 2] = x0;                                                                              \
459     tmp = x3
460 
AES_128_Key_Expansion(const unsigned char * userkey,void * key)461 static void AES_128_Key_Expansion(const unsigned char* userkey, void* key) {
462     __m128i x0, x1, x2;
463     __m128i* kp = (__m128i*)key;
464     kp[0] = x0 = _mm_loadu_si128((__m128i*)userkey);
465     x2 = _mm_setzero_si128();
466     EXPAND_ASSIST(x0, x1, x2, x0, 255, 1);
467     kp[1] = x0;
468     EXPAND_ASSIST(x0, x1, x2, x0, 255, 2);
469     kp[2] = x0;
470     EXPAND_ASSIST(x0, x1, x2, x0, 255, 4);
471     kp[3] = x0;
472     EXPAND_ASSIST(x0, x1, x2, x0, 255, 8);
473     kp[4] = x0;
474     EXPAND_ASSIST(x0, x1, x2, x0, 255, 16);
475     kp[5] = x0;
476     EXPAND_ASSIST(x0, x1, x2, x0, 255, 32);
477     kp[6] = x0;
478     EXPAND_ASSIST(x0, x1, x2, x0, 255, 64);
479     kp[7] = x0;
480     EXPAND_ASSIST(x0, x1, x2, x0, 255, 128);
481     kp[8] = x0;
482     EXPAND_ASSIST(x0, x1, x2, x0, 255, 27);
483     kp[9] = x0;
484     EXPAND_ASSIST(x0, x1, x2, x0, 255, 54);
485     kp[10] = x0;
486 }
487 
AES_192_Key_Expansion(const unsigned char * userkey,void * key)488 static void AES_192_Key_Expansion(const unsigned char* userkey, void* key) {
489     __m128i x0, x1, x2, x3, tmp, *kp = (__m128i*)key;
490     kp[0] = x0 = _mm_loadu_si128((__m128i*)userkey);
491     tmp = x3 = _mm_loadu_si128((__m128i*)(userkey + 16));
492     x2 = _mm_setzero_si128();
493     EXPAND192_STEP(1, 1);
494     EXPAND192_STEP(4, 4);
495     EXPAND192_STEP(7, 16);
496     EXPAND192_STEP(10, 64);
497 }
498 
AES_256_Key_Expansion(const unsigned char * userkey,void * key)499 static void AES_256_Key_Expansion(const unsigned char* userkey, void* key) {
500     __m128i x0, x1, x2, x3, *kp = (__m128i*)key;
501     kp[0] = x0 = _mm_loadu_si128((__m128i*)userkey);
502     kp[1] = x3 = _mm_loadu_si128((__m128i*)(userkey + 16));
503     x2 = _mm_setzero_si128();
504     EXPAND_ASSIST(x0, x1, x2, x3, 255, 1);
505     kp[2] = x0;
506     EXPAND_ASSIST(x3, x1, x2, x0, 170, 1);
507     kp[3] = x3;
508     EXPAND_ASSIST(x0, x1, x2, x3, 255, 2);
509     kp[4] = x0;
510     EXPAND_ASSIST(x3, x1, x2, x0, 170, 2);
511     kp[5] = x3;
512     EXPAND_ASSIST(x0, x1, x2, x3, 255, 4);
513     kp[6] = x0;
514     EXPAND_ASSIST(x3, x1, x2, x0, 170, 4);
515     kp[7] = x3;
516     EXPAND_ASSIST(x0, x1, x2, x3, 255, 8);
517     kp[8] = x0;
518     EXPAND_ASSIST(x3, x1, x2, x0, 170, 8);
519     kp[9] = x3;
520     EXPAND_ASSIST(x0, x1, x2, x3, 255, 16);
521     kp[10] = x0;
522     EXPAND_ASSIST(x3, x1, x2, x0, 170, 16);
523     kp[11] = x3;
524     EXPAND_ASSIST(x0, x1, x2, x3, 255, 32);
525     kp[12] = x0;
526     EXPAND_ASSIST(x3, x1, x2, x0, 170, 32);
527     kp[13] = x3;
528     EXPAND_ASSIST(x0, x1, x2, x3, 255, 64);
529     kp[14] = x0;
530 }
531 
AES_set_encrypt_key(const unsigned char * userKey,const int bits,AES_KEY * key)532 static int AES_set_encrypt_key(const unsigned char* userKey, const int bits, AES_KEY* key) {
533     if (bits == 128) {
534         AES_128_Key_Expansion(userKey, key);
535     } else if (bits == 192) {
536         AES_192_Key_Expansion(userKey, key);
537     } else if (bits == 256) {
538         AES_256_Key_Expansion(userKey, key);
539     }
540 #if (OCB_KEY_LEN == 0)
541     key->rounds = 6 + bits / 32;
542 #endif
543     return 0;
544 }
545 
AES_set_decrypt_key_fast(AES_KEY * dkey,const AES_KEY * ekey)546 static void AES_set_decrypt_key_fast(AES_KEY* dkey, const AES_KEY* ekey) {
547     int j = 0;
548     int i = ROUNDS(ekey);
549 #if (OCB_KEY_LEN == 0)
550     dkey->rounds = i;
551 #endif
552     dkey->rd_key[i--] = ekey->rd_key[j++];
553     while (i)
554         dkey->rd_key[i--] = _mm_aesimc_si128(ekey->rd_key[j++]);
555     dkey->rd_key[i] = ekey->rd_key[j];
556 }
557 
AES_set_decrypt_key(const unsigned char * userKey,const int bits,AES_KEY * key)558 static int AES_set_decrypt_key(const unsigned char* userKey, const int bits, AES_KEY* key) {
559     AES_KEY temp_key;
560     AES_set_encrypt_key(userKey, bits, &temp_key);
561     AES_set_decrypt_key_fast(key, &temp_key);
562     return 0;
563 }
564 
AES_encrypt(const unsigned char * in,unsigned char * out,const AES_KEY * key)565 static inline void AES_encrypt(const unsigned char* in, unsigned char* out, const AES_KEY* key) {
566     int j, rnds = ROUNDS(key);
567     const __m128i* sched = ((__m128i*)(key->rd_key));
568     __m128i tmp = _mm_load_si128((__m128i*)in);
569     tmp = _mm_xor_si128(tmp, sched[0]);
570     for (j = 1; j < rnds; j++)
571         tmp = _mm_aesenc_si128(tmp, sched[j]);
572     tmp = _mm_aesenclast_si128(tmp, sched[j]);
573     _mm_store_si128((__m128i*)out, tmp);
574 }
575 
AES_decrypt(const unsigned char * in,unsigned char * out,const AES_KEY * key)576 static inline void AES_decrypt(const unsigned char* in, unsigned char* out, const AES_KEY* key) {
577     int j, rnds = ROUNDS(key);
578     const __m128i* sched = ((__m128i*)(key->rd_key));
579     __m128i tmp = _mm_load_si128((__m128i*)in);
580     tmp = _mm_xor_si128(tmp, sched[0]);
581     for (j = 1; j < rnds; j++)
582         tmp = _mm_aesdec_si128(tmp, sched[j]);
583     tmp = _mm_aesdeclast_si128(tmp, sched[j]);
584     _mm_store_si128((__m128i*)out, tmp);
585 }
586 
AES_ecb_encrypt_blks(block * blks,unsigned nblks,AES_KEY * key)587 static inline void AES_ecb_encrypt_blks(block* blks, unsigned nblks, AES_KEY* key) {
588     unsigned i, j, rnds = ROUNDS(key);
589     const __m128i* sched = ((__m128i*)(key->rd_key));
590     for (i = 0; i < nblks; ++i)
591         blks[i] = _mm_xor_si128(blks[i], sched[0]);
592     for (j = 1; j < rnds; ++j)
593         for (i = 0; i < nblks; ++i)
594             blks[i] = _mm_aesenc_si128(blks[i], sched[j]);
595     for (i = 0; i < nblks; ++i)
596         blks[i] = _mm_aesenclast_si128(blks[i], sched[j]);
597 }
598 
AES_ecb_decrypt_blks(block * blks,unsigned nblks,AES_KEY * key)599 static inline void AES_ecb_decrypt_blks(block* blks, unsigned nblks, AES_KEY* key) {
600     unsigned i, j, rnds = ROUNDS(key);
601     const __m128i* sched = ((__m128i*)(key->rd_key));
602     for (i = 0; i < nblks; ++i)
603         blks[i] = _mm_xor_si128(blks[i], sched[0]);
604     for (j = 1; j < rnds; ++j)
605         for (i = 0; i < nblks; ++i)
606             blks[i] = _mm_aesdec_si128(blks[i], sched[j]);
607     for (i = 0; i < nblks; ++i)
608         blks[i] = _mm_aesdeclast_si128(blks[i], sched[j]);
609 }
610 
611 #define BPI 8 /* Number of blocks in buffer per ECB call   */
612 /* Set to 4 for Westmere, 8 for Sandy Bridge */
613 
614 #endif
615 
616 /* ----------------------------------------------------------------------- */
617 /* Define OCB context structure.                                           */
618 /* ----------------------------------------------------------------------- */
619 
620 /*------------------------------------------------------------------------
621 / Each item in the OCB context is stored either "memory correct" or
622 / "register correct". On big-endian machines, this is identical. On
623 / little-endian machines, one must choose whether the byte-string
624 / is in the correct order when it resides in memory or in registers.
625 / It must be register correct whenever it is to be manipulated
626 / arithmetically, but must be memory correct whenever it interacts
627 / with the plaintext or ciphertext.
628 /------------------------------------------------------------------------- */
629 
630 struct _ae_ctx {
631     block offset;        /* Memory correct               */
632     block checksum;      /* Memory correct               */
633     block Lstar;         /* Memory correct               */
634     block Ldollar;       /* Memory correct               */
635     block L[L_TABLE_SZ]; /* Memory correct               */
636     block ad_checksum;   /* Memory correct               */
637     block ad_offset;     /* Memory correct               */
638     block cached_Top;    /* Memory correct               */
639     uint64_t KtopStr[3]; /* Register correct, each item  */
640     uint32_t ad_blocks_processed;
641     uint32_t blocks_processed;
642     AES_KEY decrypt_key;
643     AES_KEY encrypt_key;
644 #if (OCB_TAG_LEN == 0)
645     unsigned tag_len;
646 #endif
647 };
648 
649 /* ----------------------------------------------------------------------- */
650 /* L table lookup (or on-the-fly generation)                               */
651 /* ----------------------------------------------------------------------- */
652 
653 #if L_TABLE_SZ_IS_ENOUGH
654 #define getL(_ctx, _tz) ((_ctx)->L[_tz])
655 #else
getL(const ae_ctx * ctx,unsigned tz)656 static block getL(const ae_ctx* ctx, unsigned tz) {
657     if (tz < L_TABLE_SZ)
658         return ctx->L[tz];
659     else {
660         unsigned i;
661         /* Bring L[MAX] into registers, make it register correct */
662         block rval = swap_if_le(ctx->L[L_TABLE_SZ - 1]);
663         rval = double_block(rval);
664         for (i = L_TABLE_SZ; i < tz; i++)
665             rval = double_block(rval);
666         return swap_if_le(rval); /* To memory correct */
667     }
668 }
669 #endif
670 
671 /* ----------------------------------------------------------------------- */
672 /* Public functions                                                        */
673 /* ----------------------------------------------------------------------- */
674 
675 /* 32-bit SSE2 and Altivec systems need to be forced to allocate memory
676    on 16-byte alignments. (I believe all major 64-bit systems do already.) */
677 
ae_allocate(void * misc)678 ae_ctx* ae_allocate(void* misc) {
679     void* p;
680     (void)misc; /* misc unused in this implementation */
681 #if (__SSE2__ && !_M_X64 && !_M_AMD64 && !__amd64__)
682     p = _mm_malloc(sizeof(ae_ctx), 16);
683 #elif(__ALTIVEC__ && !__PPC64__)
684     if (posix_memalign(&p, 16, sizeof(ae_ctx)) != 0)
685         p = NULL;
686 #elif __ARM_NEON__
687     p = memalign(16, sizeof(ae_ctx));
688 #else
689     p = malloc(sizeof(ae_ctx));
690 #endif
691     return (ae_ctx*)p;
692 }
693 
ae_free(ae_ctx * ctx)694 void ae_free(ae_ctx* ctx) {
695 #if (__SSE2__ && !_M_X64 && !_M_AMD64 && !__amd64__)
696     _mm_free(ctx);
697 #else
698     free(ctx);
699 #endif
700 }
701 
702 /* ----------------------------------------------------------------------- */
703 
ae_clear(ae_ctx * ctx)704 int ae_clear(ae_ctx* ctx) /* Zero ae_ctx and undo initialization          */
705 {
706     memset(ctx, 0, sizeof(ae_ctx));
707     return AE_SUCCESS;
708 }
709 
ae_ctx_sizeof(void)710 int ae_ctx_sizeof(void) {
711     return (int)sizeof(ae_ctx);
712 }
713 
714 /* ----------------------------------------------------------------------- */
715 
ae_init(ae_ctx * ctx,const void * key,int key_len,int nonce_len,int tag_len)716 int ae_init(ae_ctx* ctx, const void* key, int key_len, int nonce_len, int tag_len) {
717     unsigned i;
718     block tmp_blk;
719 
720     if (nonce_len != 12)
721         return AE_NOT_SUPPORTED;
722 
723 /* Initialize encryption & decryption keys */
724 #if (OCB_KEY_LEN > 0)
725     key_len = OCB_KEY_LEN;
726 #endif
727     AES_set_encrypt_key((unsigned char*)key, key_len * 8, &ctx->encrypt_key);
728 #if USE_AES_NI
729     AES_set_decrypt_key_fast(&ctx->decrypt_key, &ctx->encrypt_key);
730 #else
731     AES_set_decrypt_key((unsigned char*)key, (int)(key_len * 8), &ctx->decrypt_key);
732 #endif
733 
734     /* Zero things that need zeroing */
735     ctx->cached_Top = ctx->ad_checksum = zero_block();
736     ctx->ad_blocks_processed = 0;
737 
738     /* Compute key-dependent values */
739     AES_encrypt((unsigned char*)&ctx->cached_Top, (unsigned char*)&ctx->Lstar, &ctx->encrypt_key);
740     tmp_blk = swap_if_le(ctx->Lstar);
741     tmp_blk = double_block(tmp_blk);
742     ctx->Ldollar = swap_if_le(tmp_blk);
743     tmp_blk = double_block(tmp_blk);
744     ctx->L[0] = swap_if_le(tmp_blk);
745     for (i = 1; i < L_TABLE_SZ; i++) {
746         tmp_blk = double_block(tmp_blk);
747         ctx->L[i] = swap_if_le(tmp_blk);
748     }
749 
750 #if (OCB_TAG_LEN == 0)
751     ctx->tag_len = tag_len;
752 #else
753     (void)tag_len; /* Suppress var not used error */
754 #endif
755 
756     return AE_SUCCESS;
757 }
758 
759 /* ----------------------------------------------------------------------- */
760 
gen_offset_from_nonce(ae_ctx * ctx,const void * nonce)761 static block gen_offset_from_nonce(ae_ctx* ctx, const void* nonce) {
762     const union {
763         unsigned x;
764         unsigned char endian;
765     } little = {1};
766     union {
767         uint32_t u32[4];
768         uint8_t u8[16];
769         block bl;
770     } tmp;
771     unsigned idx;
772 
773 /* Replace cached nonce Top if needed */
774 #if (OCB_TAG_LEN > 0)
775     if (little.endian)
776         tmp.u32[0] = 0x01000000 + ((OCB_TAG_LEN * 8 % 128) << 1);
777     else
778         tmp.u32[0] = 0x00000001 + ((OCB_TAG_LEN * 8 % 128) << 25);
779 #else
780     if (little.endian)
781         tmp.u32[0] = 0x01000000 + ((ctx->tag_len * 8 % 128) << 1);
782     else
783         tmp.u32[0] = 0x00000001 + ((ctx->tag_len * 8 % 128) << 25);
784 #endif
785     tmp.u32[1] = ((uint32_t*)nonce)[0];
786     tmp.u32[2] = ((uint32_t*)nonce)[1];
787     tmp.u32[3] = ((uint32_t*)nonce)[2];
788     idx = (unsigned)(tmp.u8[15] & 0x3f);           /* Get low 6 bits of nonce  */
789     tmp.u8[15] = tmp.u8[15] & 0xc0;                /* Zero low 6 bits of nonce */
790     if (unequal_blocks(tmp.bl, ctx->cached_Top)) { /* Cached?       */
791         ctx->cached_Top = tmp.bl;                  /* Update cache, KtopStr    */
792         AES_encrypt(tmp.u8, (unsigned char*)&ctx->KtopStr, &ctx->encrypt_key);
793         if (little.endian) { /* Make Register Correct    */
794             ctx->KtopStr[0] = bswap64(ctx->KtopStr[0]);
795             ctx->KtopStr[1] = bswap64(ctx->KtopStr[1]);
796         }
797         ctx->KtopStr[2] = ctx->KtopStr[0] ^ (ctx->KtopStr[0] << 8) ^ (ctx->KtopStr[1] >> 56);
798     }
799     return gen_offset(ctx->KtopStr, idx);
800 }
801 
process_ad(ae_ctx * ctx,const void * ad,int ad_len,int final)802 static void process_ad(ae_ctx* ctx, const void* ad, int ad_len, int final) {
803     union {
804         uint32_t u32[4];
805         uint8_t u8[16];
806         block bl;
807     } tmp;
808     block ad_offset, ad_checksum;
809     const block* adp = (block*)ad;
810     unsigned i, k, tz, remaining;
811 
812     ad_offset = ctx->ad_offset;
813     ad_checksum = ctx->ad_checksum;
814     i = ad_len / (BPI * 16);
815     if (i) {
816         unsigned ad_block_num = ctx->ad_blocks_processed;
817         do {
818             block ta[BPI], oa[BPI];
819             ad_block_num += BPI;
820             tz = ntz(ad_block_num);
821             oa[0] = xor_block(ad_offset, ctx->L[0]);
822             ta[0] = xor_block(oa[0], adp[0]);
823             oa[1] = xor_block(oa[0], ctx->L[1]);
824             ta[1] = xor_block(oa[1], adp[1]);
825             oa[2] = xor_block(ad_offset, ctx->L[1]);
826             ta[2] = xor_block(oa[2], adp[2]);
827 #if BPI == 4
828             ad_offset = xor_block(oa[2], getL(ctx, tz));
829             ta[3] = xor_block(ad_offset, adp[3]);
830 #elif BPI == 8
831             oa[3] = xor_block(oa[2], ctx->L[2]);
832             ta[3] = xor_block(oa[3], adp[3]);
833             oa[4] = xor_block(oa[1], ctx->L[2]);
834             ta[4] = xor_block(oa[4], adp[4]);
835             oa[5] = xor_block(oa[0], ctx->L[2]);
836             ta[5] = xor_block(oa[5], adp[5]);
837             oa[6] = xor_block(ad_offset, ctx->L[2]);
838             ta[6] = xor_block(oa[6], adp[6]);
839             ad_offset = xor_block(oa[6], getL(ctx, tz));
840             ta[7] = xor_block(ad_offset, adp[7]);
841 #endif
842             AES_ecb_encrypt_blks(ta, BPI, &ctx->encrypt_key);
843             ad_checksum = xor_block(ad_checksum, ta[0]);
844             ad_checksum = xor_block(ad_checksum, ta[1]);
845             ad_checksum = xor_block(ad_checksum, ta[2]);
846             ad_checksum = xor_block(ad_checksum, ta[3]);
847 #if (BPI == 8)
848             ad_checksum = xor_block(ad_checksum, ta[4]);
849             ad_checksum = xor_block(ad_checksum, ta[5]);
850             ad_checksum = xor_block(ad_checksum, ta[6]);
851             ad_checksum = xor_block(ad_checksum, ta[7]);
852 #endif
853             adp += BPI;
854         } while (--i);
855         ctx->ad_blocks_processed = ad_block_num;
856         ctx->ad_offset = ad_offset;
857         ctx->ad_checksum = ad_checksum;
858     }
859 
860     if (final) {
861         block ta[BPI];
862 
863         /* Process remaining associated data, compute its tag contribution */
864         remaining = ((unsigned)ad_len) % (BPI * 16);
865         if (remaining) {
866             k = 0;
867 #if (BPI == 8)
868             if (remaining >= 64) {
869                 tmp.bl = xor_block(ad_offset, ctx->L[0]);
870                 ta[0] = xor_block(tmp.bl, adp[0]);
871                 tmp.bl = xor_block(tmp.bl, ctx->L[1]);
872                 ta[1] = xor_block(tmp.bl, adp[1]);
873                 ad_offset = xor_block(ad_offset, ctx->L[1]);
874                 ta[2] = xor_block(ad_offset, adp[2]);
875                 ad_offset = xor_block(ad_offset, ctx->L[2]);
876                 ta[3] = xor_block(ad_offset, adp[3]);
877                 remaining -= 64;
878                 k = 4;
879             }
880 #endif
881             if (remaining >= 32) {
882                 ad_offset = xor_block(ad_offset, ctx->L[0]);
883                 ta[k] = xor_block(ad_offset, adp[k]);
884                 ad_offset = xor_block(ad_offset, getL(ctx, ntz(k + 2)));
885                 ta[k + 1] = xor_block(ad_offset, adp[k + 1]);
886                 remaining -= 32;
887                 k += 2;
888             }
889             if (remaining >= 16) {
890                 ad_offset = xor_block(ad_offset, ctx->L[0]);
891                 ta[k] = xor_block(ad_offset, adp[k]);
892                 remaining = remaining - 16;
893                 ++k;
894             }
895             if (remaining) {
896                 ad_offset = xor_block(ad_offset, ctx->Lstar);
897                 tmp.bl = zero_block();
898                 memcpy(tmp.u8, adp + k, remaining);
899                 tmp.u8[remaining] = (unsigned char)0x80u;
900                 ta[k] = xor_block(ad_offset, tmp.bl);
901                 ++k;
902             }
903             AES_ecb_encrypt_blks(ta, k, &ctx->encrypt_key);
904             switch (k) {
905 #if (BPI == 8)
906             case 8:
907                 ad_checksum = xor_block(ad_checksum, ta[7]);
908                 __fallthrough;
909             case 7:
910                 ad_checksum = xor_block(ad_checksum, ta[6]);
911                 __fallthrough;
912             case 6:
913                 ad_checksum = xor_block(ad_checksum, ta[5]);
914                 __fallthrough;
915             case 5:
916                 ad_checksum = xor_block(ad_checksum, ta[4]);
917                 __fallthrough;
918 #endif
919             case 4:
920                 ad_checksum = xor_block(ad_checksum, ta[3]);
921                 __fallthrough;
922             case 3:
923                 ad_checksum = xor_block(ad_checksum, ta[2]);
924                 __fallthrough;
925             case 2:
926                 ad_checksum = xor_block(ad_checksum, ta[1]);
927                 __fallthrough;
928             case 1:
929                 ad_checksum = xor_block(ad_checksum, ta[0]);
930             }
931             ctx->ad_checksum = ad_checksum;
932         }
933     }
934 }
935 
936 /* ----------------------------------------------------------------------- */
937 
ae_encrypt(ae_ctx * ctx,const void * nonce,const void * pt,int pt_len,const void * ad,int ad_len,void * ct,void * tag,int final)938 int ae_encrypt(ae_ctx* ctx, const void* nonce, const void* pt, int pt_len, const void* ad,
939                int ad_len, void* ct, void* tag, int final) {
940     union {
941         uint32_t u32[4];
942         uint8_t u8[16];
943         block bl;
944     } tmp;
945     block offset, checksum;
946     unsigned i, k;
947     block* ctp = (block*)ct;
948     const block* ptp = (block*)pt;
949 
950     /* Non-null nonce means start of new message, init per-message values */
951     if (nonce) {
952         ctx->offset = gen_offset_from_nonce(ctx, nonce);
953         ctx->ad_offset = ctx->checksum = zero_block();
954         ctx->ad_blocks_processed = ctx->blocks_processed = 0;
955         if (ad_len >= 0)
956             ctx->ad_checksum = zero_block();
957     }
958 
959     /* Process associated data */
960     if (ad_len > 0)
961         process_ad(ctx, ad, ad_len, final);
962 
963     /* Encrypt plaintext data BPI blocks at a time */
964     offset = ctx->offset;
965     checksum = ctx->checksum;
966     i = pt_len / (BPI * 16);
967     if (i) {
968         block oa[BPI];
969         unsigned block_num = ctx->blocks_processed;
970         oa[BPI - 1] = offset;
971         do {
972             block ta[BPI];
973             block_num += BPI;
974             oa[0] = xor_block(oa[BPI - 1], ctx->L[0]);
975             ta[0] = xor_block(oa[0], ptp[0]);
976             checksum = xor_block(checksum, ptp[0]);
977             oa[1] = xor_block(oa[0], ctx->L[1]);
978             ta[1] = xor_block(oa[1], ptp[1]);
979             checksum = xor_block(checksum, ptp[1]);
980             oa[2] = xor_block(oa[1], ctx->L[0]);
981             ta[2] = xor_block(oa[2], ptp[2]);
982             checksum = xor_block(checksum, ptp[2]);
983 #if BPI == 4
984             oa[3] = xor_block(oa[2], getL(ctx, ntz(block_num)));
985             ta[3] = xor_block(oa[3], ptp[3]);
986             checksum = xor_block(checksum, ptp[3]);
987 #elif BPI == 8
988             oa[3] = xor_block(oa[2], ctx->L[2]);
989             ta[3] = xor_block(oa[3], ptp[3]);
990             checksum = xor_block(checksum, ptp[3]);
991             oa[4] = xor_block(oa[1], ctx->L[2]);
992             ta[4] = xor_block(oa[4], ptp[4]);
993             checksum = xor_block(checksum, ptp[4]);
994             oa[5] = xor_block(oa[0], ctx->L[2]);
995             ta[5] = xor_block(oa[5], ptp[5]);
996             checksum = xor_block(checksum, ptp[5]);
997             oa[6] = xor_block(oa[7], ctx->L[2]);
998             ta[6] = xor_block(oa[6], ptp[6]);
999             checksum = xor_block(checksum, ptp[6]);
1000             oa[7] = xor_block(oa[6], getL(ctx, ntz(block_num)));
1001             ta[7] = xor_block(oa[7], ptp[7]);
1002             checksum = xor_block(checksum, ptp[7]);
1003 #endif
1004             AES_ecb_encrypt_blks(ta, BPI, &ctx->encrypt_key);
1005             ctp[0] = xor_block(ta[0], oa[0]);
1006             ctp[1] = xor_block(ta[1], oa[1]);
1007             ctp[2] = xor_block(ta[2], oa[2]);
1008             ctp[3] = xor_block(ta[3], oa[3]);
1009 #if (BPI == 8)
1010             ctp[4] = xor_block(ta[4], oa[4]);
1011             ctp[5] = xor_block(ta[5], oa[5]);
1012             ctp[6] = xor_block(ta[6], oa[6]);
1013             ctp[7] = xor_block(ta[7], oa[7]);
1014 #endif
1015             ptp += BPI;
1016             ctp += BPI;
1017         } while (--i);
1018         ctx->offset = offset = oa[BPI - 1];
1019         ctx->blocks_processed = block_num;
1020         ctx->checksum = checksum;
1021     }
1022 
1023     if (final) {
1024         block ta[BPI + 1], oa[BPI];
1025 
1026         /* Process remaining plaintext and compute its tag contribution    */
1027         unsigned remaining = ((unsigned)pt_len) % (BPI * 16);
1028         k = 0; /* How many blocks in ta[] need ECBing */
1029         if (remaining) {
1030 #if (BPI == 8)
1031             if (remaining >= 64) {
1032                 oa[0] = xor_block(offset, ctx->L[0]);
1033                 ta[0] = xor_block(oa[0], ptp[0]);
1034                 checksum = xor_block(checksum, ptp[0]);
1035                 oa[1] = xor_block(oa[0], ctx->L[1]);
1036                 ta[1] = xor_block(oa[1], ptp[1]);
1037                 checksum = xor_block(checksum, ptp[1]);
1038                 oa[2] = xor_block(oa[1], ctx->L[0]);
1039                 ta[2] = xor_block(oa[2], ptp[2]);
1040                 checksum = xor_block(checksum, ptp[2]);
1041                 offset = oa[3] = xor_block(oa[2], ctx->L[2]);
1042                 ta[3] = xor_block(offset, ptp[3]);
1043                 checksum = xor_block(checksum, ptp[3]);
1044                 remaining -= 64;
1045                 k = 4;
1046             }
1047 #endif
1048             if (remaining >= 32) {
1049                 oa[k] = xor_block(offset, ctx->L[0]);
1050                 ta[k] = xor_block(oa[k], ptp[k]);
1051                 checksum = xor_block(checksum, ptp[k]);
1052                 offset = oa[k + 1] = xor_block(oa[k], ctx->L[1]);
1053                 ta[k + 1] = xor_block(offset, ptp[k + 1]);
1054                 checksum = xor_block(checksum, ptp[k + 1]);
1055                 remaining -= 32;
1056                 k += 2;
1057             }
1058             if (remaining >= 16) {
1059                 offset = oa[k] = xor_block(offset, ctx->L[0]);
1060                 ta[k] = xor_block(offset, ptp[k]);
1061                 checksum = xor_block(checksum, ptp[k]);
1062                 remaining -= 16;
1063                 ++k;
1064             }
1065             if (remaining) {
1066                 tmp.bl = zero_block();
1067                 memcpy(tmp.u8, ptp + k, remaining);
1068                 tmp.u8[remaining] = (unsigned char)0x80u;
1069                 checksum = xor_block(checksum, tmp.bl);
1070                 ta[k] = offset = xor_block(offset, ctx->Lstar);
1071                 ++k;
1072             }
1073         }
1074         offset = xor_block(offset, ctx->Ldollar); /* Part of tag gen */
1075         ta[k] = xor_block(offset, checksum);      /* Part of tag gen */
1076         AES_ecb_encrypt_blks(ta, k + 1, &ctx->encrypt_key);
1077         offset = xor_block(ta[k], ctx->ad_checksum); /* Part of tag gen */
1078         if (remaining) {
1079             --k;
1080             tmp.bl = xor_block(tmp.bl, ta[k]);
1081             memcpy(ctp + k, tmp.u8, remaining);
1082         }
1083         switch (k) {
1084 #if (BPI == 8)
1085         case 7:
1086             ctp[6] = xor_block(ta[6], oa[6]);
1087             __fallthrough;
1088         case 6:
1089             ctp[5] = xor_block(ta[5], oa[5]);
1090             __fallthrough;
1091         case 5:
1092             ctp[4] = xor_block(ta[4], oa[4]);
1093             __fallthrough;
1094         case 4:
1095             ctp[3] = xor_block(ta[3], oa[3]);
1096             __fallthrough;
1097 #endif
1098         case 3:
1099             ctp[2] = xor_block(ta[2], oa[2]);
1100             __fallthrough;
1101         case 2:
1102             ctp[1] = xor_block(ta[1], oa[1]);
1103             __fallthrough;
1104         case 1:
1105             ctp[0] = xor_block(ta[0], oa[0]);
1106         }
1107 
1108         /* Tag is placed at the correct location
1109          */
1110         if (tag) {
1111 #if (OCB_TAG_LEN == 16)
1112             *(block*)tag = offset;
1113 #elif(OCB_TAG_LEN > 0)
1114             memcpy((char*)tag, &offset, OCB_TAG_LEN);
1115 #else
1116             memcpy((char*)tag, &offset, ctx->tag_len);
1117 #endif
1118         } else {
1119 #if (OCB_TAG_LEN > 0)
1120             memcpy((char*)ct + pt_len, &offset, OCB_TAG_LEN);
1121             pt_len += OCB_TAG_LEN;
1122 #else
1123             memcpy((char*)ct + pt_len, &offset, ctx->tag_len);
1124             pt_len += ctx->tag_len;
1125 #endif
1126         }
1127     }
1128     return (int)pt_len;
1129 }
1130 
1131 /* ----------------------------------------------------------------------- */
1132 
1133 /* Compare two regions of memory, taking a constant amount of time for a
1134    given buffer size -- under certain assumptions about the compiler
1135    and machine, of course.
1136 
1137    Use this to avoid timing side-channel attacks.
1138 
1139    Returns 0 for memory regions with equal contents; non-zero otherwise. */
constant_time_memcmp(const void * av,const void * bv,size_t n)1140 static int constant_time_memcmp(const void* av, const void* bv, size_t n) {
1141     const uint8_t* a = (const uint8_t*)av;
1142     const uint8_t* b = (const uint8_t*)bv;
1143     uint8_t result = 0;
1144     size_t i;
1145 
1146     for (i = 0; i < n; i++) {
1147         result |= *a ^ *b;
1148         a++;
1149         b++;
1150     }
1151 
1152     return (int)result;
1153 }
1154 
ae_decrypt(ae_ctx * ctx,const void * nonce,const void * ct,int ct_len,const void * ad,int ad_len,void * pt,const void * tag,int final)1155 int ae_decrypt(ae_ctx* ctx, const void* nonce, const void* ct, int ct_len, const void* ad,
1156                int ad_len, void* pt, const void* tag, int final) {
1157     union {
1158         uint32_t u32[4];
1159         uint8_t u8[16];
1160         block bl;
1161     } tmp;
1162     block offset, checksum;
1163     unsigned i, k;
1164     block* ctp = (block*)ct;
1165     block* ptp = (block*)pt;
1166 
1167     /* Reduce ct_len tag bundled in ct */
1168     if ((final) && (!tag))
1169 #if (OCB_TAG_LEN > 0)
1170         ct_len -= OCB_TAG_LEN;
1171 #else
1172         ct_len -= ctx->tag_len;
1173 #endif
1174 
1175     /* Non-null nonce means start of new message, init per-message values */
1176     if (nonce) {
1177         ctx->offset = gen_offset_from_nonce(ctx, nonce);
1178         ctx->ad_offset = ctx->checksum = zero_block();
1179         ctx->ad_blocks_processed = ctx->blocks_processed = 0;
1180         if (ad_len >= 0)
1181             ctx->ad_checksum = zero_block();
1182     }
1183 
1184     /* Process associated data */
1185     if (ad_len > 0)
1186         process_ad(ctx, ad, ad_len, final);
1187 
1188     /* Encrypt plaintext data BPI blocks at a time */
1189     offset = ctx->offset;
1190     checksum = ctx->checksum;
1191     i = ct_len / (BPI * 16);
1192     if (i) {
1193         block oa[BPI];
1194         unsigned block_num = ctx->blocks_processed;
1195         oa[BPI - 1] = offset;
1196         do {
1197             block ta[BPI];
1198             block_num += BPI;
1199             oa[0] = xor_block(oa[BPI - 1], ctx->L[0]);
1200             ta[0] = xor_block(oa[0], ctp[0]);
1201             oa[1] = xor_block(oa[0], ctx->L[1]);
1202             ta[1] = xor_block(oa[1], ctp[1]);
1203             oa[2] = xor_block(oa[1], ctx->L[0]);
1204             ta[2] = xor_block(oa[2], ctp[2]);
1205 #if BPI == 4
1206             oa[3] = xor_block(oa[2], getL(ctx, ntz(block_num)));
1207             ta[3] = xor_block(oa[3], ctp[3]);
1208 #elif BPI == 8
1209             oa[3] = xor_block(oa[2], ctx->L[2]);
1210             ta[3] = xor_block(oa[3], ctp[3]);
1211             oa[4] = xor_block(oa[1], ctx->L[2]);
1212             ta[4] = xor_block(oa[4], ctp[4]);
1213             oa[5] = xor_block(oa[0], ctx->L[2]);
1214             ta[5] = xor_block(oa[5], ctp[5]);
1215             oa[6] = xor_block(oa[7], ctx->L[2]);
1216             ta[6] = xor_block(oa[6], ctp[6]);
1217             oa[7] = xor_block(oa[6], getL(ctx, ntz(block_num)));
1218             ta[7] = xor_block(oa[7], ctp[7]);
1219 #endif
1220             AES_ecb_decrypt_blks(ta, BPI, &ctx->decrypt_key);
1221             ptp[0] = xor_block(ta[0], oa[0]);
1222             checksum = xor_block(checksum, ptp[0]);
1223             ptp[1] = xor_block(ta[1], oa[1]);
1224             checksum = xor_block(checksum, ptp[1]);
1225             ptp[2] = xor_block(ta[2], oa[2]);
1226             checksum = xor_block(checksum, ptp[2]);
1227             ptp[3] = xor_block(ta[3], oa[3]);
1228             checksum = xor_block(checksum, ptp[3]);
1229 #if (BPI == 8)
1230             ptp[4] = xor_block(ta[4], oa[4]);
1231             checksum = xor_block(checksum, ptp[4]);
1232             ptp[5] = xor_block(ta[5], oa[5]);
1233             checksum = xor_block(checksum, ptp[5]);
1234             ptp[6] = xor_block(ta[6], oa[6]);
1235             checksum = xor_block(checksum, ptp[6]);
1236             ptp[7] = xor_block(ta[7], oa[7]);
1237             checksum = xor_block(checksum, ptp[7]);
1238 #endif
1239             ptp += BPI;
1240             ctp += BPI;
1241         } while (--i);
1242         ctx->offset = offset = oa[BPI - 1];
1243         ctx->blocks_processed = block_num;
1244         ctx->checksum = checksum;
1245     }
1246 
1247     if (final) {
1248         block ta[BPI + 1], oa[BPI];
1249 
1250         /* Process remaining plaintext and compute its tag contribution    */
1251         unsigned remaining = ((unsigned)ct_len) % (BPI * 16);
1252         k = 0; /* How many blocks in ta[] need ECBing */
1253         if (remaining) {
1254 #if (BPI == 8)
1255             if (remaining >= 64) {
1256                 oa[0] = xor_block(offset, ctx->L[0]);
1257                 ta[0] = xor_block(oa[0], ctp[0]);
1258                 oa[1] = xor_block(oa[0], ctx->L[1]);
1259                 ta[1] = xor_block(oa[1], ctp[1]);
1260                 oa[2] = xor_block(oa[1], ctx->L[0]);
1261                 ta[2] = xor_block(oa[2], ctp[2]);
1262                 offset = oa[3] = xor_block(oa[2], ctx->L[2]);
1263                 ta[3] = xor_block(offset, ctp[3]);
1264                 remaining -= 64;
1265                 k = 4;
1266             }
1267 #endif
1268             if (remaining >= 32) {
1269                 oa[k] = xor_block(offset, ctx->L[0]);
1270                 ta[k] = xor_block(oa[k], ctp[k]);
1271                 offset = oa[k + 1] = xor_block(oa[k], ctx->L[1]);
1272                 ta[k + 1] = xor_block(offset, ctp[k + 1]);
1273                 remaining -= 32;
1274                 k += 2;
1275             }
1276             if (remaining >= 16) {
1277                 offset = oa[k] = xor_block(offset, ctx->L[0]);
1278                 ta[k] = xor_block(offset, ctp[k]);
1279                 remaining -= 16;
1280                 ++k;
1281             }
1282             if (remaining) {
1283                 block pad;
1284                 offset = xor_block(offset, ctx->Lstar);
1285                 AES_encrypt((unsigned char*)&offset, tmp.u8, &ctx->encrypt_key);
1286                 pad = tmp.bl;
1287                 memcpy(tmp.u8, ctp + k, remaining);
1288                 tmp.bl = xor_block(tmp.bl, pad);
1289                 tmp.u8[remaining] = (unsigned char)0x80u;
1290                 memcpy(ptp + k, tmp.u8, remaining);
1291                 checksum = xor_block(checksum, tmp.bl);
1292             }
1293         }
1294         AES_ecb_decrypt_blks(ta, k, &ctx->decrypt_key);
1295         switch (k) {
1296 #if (BPI == 8)
1297         case 7:
1298             ptp[6] = xor_block(ta[6], oa[6]);
1299             checksum = xor_block(checksum, ptp[6]);
1300             __fallthrough;
1301         case 6:
1302             ptp[5] = xor_block(ta[5], oa[5]);
1303             checksum = xor_block(checksum, ptp[5]);
1304             __fallthrough;
1305         case 5:
1306             ptp[4] = xor_block(ta[4], oa[4]);
1307             checksum = xor_block(checksum, ptp[4]);
1308             __fallthrough;
1309         case 4:
1310             ptp[3] = xor_block(ta[3], oa[3]);
1311             checksum = xor_block(checksum, ptp[3]);
1312             __fallthrough;
1313 #endif
1314         case 3:
1315             ptp[2] = xor_block(ta[2], oa[2]);
1316             checksum = xor_block(checksum, ptp[2]);
1317             __fallthrough;
1318         case 2:
1319             ptp[1] = xor_block(ta[1], oa[1]);
1320             checksum = xor_block(checksum, ptp[1]);
1321             __fallthrough;
1322         case 1:
1323             ptp[0] = xor_block(ta[0], oa[0]);
1324             checksum = xor_block(checksum, ptp[0]);
1325         }
1326 
1327         /* Calculate expected tag */
1328         offset = xor_block(offset, ctx->Ldollar);
1329         tmp.bl = xor_block(offset, checksum);
1330         AES_encrypt(tmp.u8, tmp.u8, &ctx->encrypt_key);
1331         tmp.bl = xor_block(tmp.bl, ctx->ad_checksum); /* Full tag */
1332 
1333         /* Compare with proposed tag, change ct_len if invalid */
1334         if ((OCB_TAG_LEN == 16) && tag) {
1335             if (unequal_blocks(tmp.bl, *(block*)tag))
1336                 ct_len = AE_INVALID;
1337         } else {
1338 #if (OCB_TAG_LEN > 0)
1339             int len = OCB_TAG_LEN;
1340 #else
1341             int len = ctx->tag_len;
1342 #endif
1343             if (tag) {
1344                 if (constant_time_memcmp(tag, tmp.u8, len) != 0)
1345                     ct_len = AE_INVALID;
1346             } else {
1347                 if (constant_time_memcmp((char*)ct + ct_len, tmp.u8, len) != 0)
1348                     ct_len = AE_INVALID;
1349             }
1350         }
1351     }
1352     return ct_len;
1353 }
1354 
1355 /* ----------------------------------------------------------------------- */
1356 /* Simple test program                                                     */
1357 /* ----------------------------------------------------------------------- */
1358 
1359 #if 0
1360 
1361 #include <stdio.h>
1362 #include <time.h>
1363 
1364 #if __GNUC__
1365 #define ALIGN(n) __attribute__((aligned(n)))
1366 #elif _MSC_VER
1367 #define ALIGN(n) __declspec(align(n))
1368 #else /* Not GNU/Microsoft: delete alignment uses.     */
1369 #define ALIGN(n)
1370 #endif
1371 
1372 static void pbuf(void *p, unsigned len, const void *s)
1373 {
1374     unsigned i;
1375     if (s)
1376         printf("%s", (char *)s);
1377     for (i = 0; i < len; i++)
1378         printf("%02X", (unsigned)(((unsigned char *)p)[i]));
1379     printf("\n");
1380 }
1381 
1382 static void vectors(ae_ctx *ctx, int len)
1383 {
1384     ALIGN(16) char pt[128];
1385     ALIGN(16) char ct[144];
1386     ALIGN(16) char nonce[] = {0,1,2,3,4,5,6,7,8,9,10,11};
1387     int i;
1388     for (i=0; i < 128; i++) pt[i] = i;
1389     i = ae_encrypt(ctx,nonce,pt,len,pt,len,ct,NULL,AE_FINALIZE);
1390     printf("P=%d,A=%d: ",len,len); pbuf(ct, i, NULL);
1391     i = ae_encrypt(ctx,nonce,pt,0,pt,len,ct,NULL,AE_FINALIZE);
1392     printf("P=%d,A=%d: ",0,len); pbuf(ct, i, NULL);
1393     i = ae_encrypt(ctx,nonce,pt,len,pt,0,ct,NULL,AE_FINALIZE);
1394     printf("P=%d,A=%d: ",len,0); pbuf(ct, i, NULL);
1395 }
1396 
1397 void validate()
1398 {
1399     ALIGN(16) char pt[1024];
1400     ALIGN(16) char ct[1024];
1401     ALIGN(16) char tag[16];
1402     ALIGN(16) char nonce[12] = {0,};
1403     ALIGN(16) char key[32] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31};
1404     ae_ctx ctx;
1405     char *val_buf, *next;
1406     int i, len;
1407 
1408     val_buf = (char *)malloc(22400 + 16);
1409     next = val_buf = (char *)(((size_t)val_buf + 16) & ~((size_t)15));
1410 
1411     if (0) {
1412 		ae_init(&ctx, key, 16, 12, 16);
1413 		/* pbuf(&ctx, sizeof(ctx), "CTX: "); */
1414 		vectors(&ctx,0);
1415 		vectors(&ctx,8);
1416 		vectors(&ctx,16);
1417 		vectors(&ctx,24);
1418 		vectors(&ctx,32);
1419 		vectors(&ctx,40);
1420     }
1421 
1422     memset(key,0,32);
1423     memset(pt,0,128);
1424     ae_init(&ctx, key, OCB_KEY_LEN, 12, OCB_TAG_LEN);
1425 
1426     /* RFC Vector test */
1427     for (i = 0; i < 128; i++) {
1428         int first = ((i/3)/(BPI*16))*(BPI*16);
1429         int second = first;
1430         int third = i - (first + second);
1431 
1432         nonce[11] = i;
1433 
1434         if (0) {
1435             ae_encrypt(&ctx,nonce,pt,i,pt,i,ct,NULL,AE_FINALIZE);
1436             memcpy(next,ct,(size_t)i+OCB_TAG_LEN);
1437             next = next+i+OCB_TAG_LEN;
1438 
1439             ae_encrypt(&ctx,nonce,pt,i,pt,0,ct,NULL,AE_FINALIZE);
1440             memcpy(next,ct,(size_t)i+OCB_TAG_LEN);
1441             next = next+i+OCB_TAG_LEN;
1442 
1443             ae_encrypt(&ctx,nonce,pt,0,pt,i,ct,NULL,AE_FINALIZE);
1444             memcpy(next,ct,OCB_TAG_LEN);
1445             next = next+OCB_TAG_LEN;
1446         } else {
1447             ae_encrypt(&ctx,nonce,pt,first,pt,first,ct,NULL,AE_PENDING);
1448             ae_encrypt(&ctx,NULL,pt+first,second,pt+first,second,ct+first,NULL,AE_PENDING);
1449             ae_encrypt(&ctx,NULL,pt+first+second,third,pt+first+second,third,ct+first+second,NULL,AE_FINALIZE);
1450             memcpy(next,ct,(size_t)i+OCB_TAG_LEN);
1451             next = next+i+OCB_TAG_LEN;
1452 
1453             ae_encrypt(&ctx,nonce,pt,first,pt,0,ct,NULL,AE_PENDING);
1454             ae_encrypt(&ctx,NULL,pt+first,second,pt,0,ct+first,NULL,AE_PENDING);
1455             ae_encrypt(&ctx,NULL,pt+first+second,third,pt,0,ct+first+second,NULL,AE_FINALIZE);
1456             memcpy(next,ct,(size_t)i+OCB_TAG_LEN);
1457             next = next+i+OCB_TAG_LEN;
1458 
1459             ae_encrypt(&ctx,nonce,pt,0,pt,first,ct,NULL,AE_PENDING);
1460             ae_encrypt(&ctx,NULL,pt,0,pt+first,second,ct,NULL,AE_PENDING);
1461             ae_encrypt(&ctx,NULL,pt,0,pt+first+second,third,ct,NULL,AE_FINALIZE);
1462             memcpy(next,ct,OCB_TAG_LEN);
1463             next = next+OCB_TAG_LEN;
1464         }
1465 
1466     }
1467     nonce[11] = 0;
1468     ae_encrypt(&ctx,nonce,NULL,0,val_buf,next-val_buf,ct,tag,AE_FINALIZE);
1469     pbuf(tag,OCB_TAG_LEN,0);
1470 
1471 
1472     /* Encrypt/Decrypt test */
1473     for (i = 0; i < 128; i++) {
1474         int first = ((i/3)/(BPI*16))*(BPI*16);
1475         int second = first;
1476         int third = i - (first + second);
1477 
1478         nonce[11] = i%128;
1479 
1480         if (1) {
1481             len = ae_encrypt(&ctx,nonce,val_buf,i,val_buf,i,ct,tag,AE_FINALIZE);
1482             len = ae_encrypt(&ctx,nonce,val_buf,i,val_buf,-1,ct,tag,AE_FINALIZE);
1483             len = ae_decrypt(&ctx,nonce,ct,len,val_buf,-1,pt,tag,AE_FINALIZE);
1484             if (len == -1) { printf("Authentication error: %d\n", i); return; }
1485             if (len != i) { printf("Length error: %d\n", i); return; }
1486             if (memcmp(val_buf,pt,i)) { printf("Decrypt error: %d\n", i); return; }
1487         } else {
1488             len = ae_encrypt(&ctx,nonce,val_buf,i,val_buf,i,ct,NULL,AE_FINALIZE);
1489             ae_decrypt(&ctx,nonce,ct,first,val_buf,first,pt,NULL,AE_PENDING);
1490             ae_decrypt(&ctx,NULL,ct+first,second,val_buf+first,second,pt+first,NULL,AE_PENDING);
1491             len = ae_decrypt(&ctx,NULL,ct+first+second,len-(first+second),val_buf+first+second,third,pt+first+second,NULL,AE_FINALIZE);
1492             if (len == -1) { printf("Authentication error: %d\n", i); return; }
1493             if (memcmp(val_buf,pt,i)) { printf("Decrypt error: %d\n", i); return; }
1494         }
1495 
1496     }
1497     printf("Decrypt: PASS\n");
1498 }
1499 
1500 int main()
1501 {
1502     validate();
1503     return 0;
1504 }
1505 #endif
1506 
1507 #if USE_AES_NI
1508 char infoString[] = "OCB3 (AES-NI)";
1509 #elif USE_REFERENCE_AES
1510 char infoString[] = "OCB3 (Reference)";
1511 #elif USE_OPENSSL_AES
1512 char infoString[] = "OCB3 (OpenSSL)";
1513 #endif
1514