1 /*
2  ---------------------------------------------------------------------------
3  Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved.
4 
5  LICENSE TERMS
6 
7  The redistribution and use of this software (with or without changes)
8  is allowed without the payment of fees or royalties provided that:
9 
10   1. source code distributions include the above copyright notice, this
11      list of conditions and the following disclaimer;
12 
13   2. binary distributions include the above copyright notice, this list
14      of conditions and the following disclaimer in their documentation;
15 
16   3. the name of the copyright holder is not used to endorse products
17      built using this software without specific written permission.
18 
19  DISCLAIMER
20 
21  This software is provided 'as is' with no explicit or implied warranties
22  in respect of its properties, including, but not limited to, correctness
23  and/or fitness for purpose.
24  ---------------------------------------------------------------------------
25  Issue 09/09/2006
26 
27  This is an AES implementation that uses only 8-bit byte operations on the
28  cipher state (there are options to use 32-bit types if available).
29 
30  The combination of mix columns and byte substitution used here is based on
31  that developed by Karl Malbrain. His contribution is acknowledged.
32  */
33 
34 /* define if you have a fast memcpy function on your system */
35 #if 1
36 #define HAVE_MEMCPY
37 #include <string.h>
38 #if 0
39 #if defined(_MSC_VER)
40 #include <intrin.h>
41 #pragma intrinsic(memcpy)
42 #endif
43 #endif
44 #endif
45 
46 #include <stdint.h>
47 #include <stdlib.h>
48 
49 /* define if you have fast 32-bit types on your system */
50 #if 1
51 #define HAVE_UINT_32T
52 #endif
53 
54 /* define if you don't want any tables */
55 #if 1
56 #define USE_TABLES
57 #endif
58 
59 /*  On Intel Core 2 duo VERSION_1 is faster */
60 
61 /* alternative versions (test for performance on your system) */
62 #if 1
63 #define VERSION_1
64 #endif
65 
66 #include "aes.h"
67 
68 #if defined(HAVE_UINT_32T)
69 typedef uint32_t uint_32t;
70 #endif
71 
72 /* functions for finite field multiplication in the AES Galois field    */
73 
74 #define WPOLY 0x011b
75 #define BPOLY 0x1b
76 #define DPOLY 0x008d
77 
78 #define f1(x) (x)
79 #define f2(x) (((x) << 1) ^ ((((x) >> 7) & 1) * WPOLY))
80 #define f4(x) (((x) << 2) ^ ((((x) >> 6) & 1) * WPOLY) ^ ((((x) >> 6) & 2) * WPOLY))
81 #define f8(x) (((x) << 3) ^ ((((x) >> 5) & 1) * WPOLY) ^ ((((x) >> 5) & 2) * WPOLY) ^ ((((x) >> 5) & 4) * WPOLY))
82 #define d2(x) (((x) >> 1) ^ ((x)&1 ? DPOLY : 0))
83 
84 #define f3(x) (f2(x) ^ (x))
85 #define f9(x) (f8(x) ^ (x))
86 #define fb(x) (f8(x) ^ f2(x) ^ (x))
87 #define fd(x) (f8(x) ^ f4(x) ^ (x))
88 #define fe(x) (f8(x) ^ f4(x) ^ f2(x))
89 
90 #if defined(USE_TABLES)
91 
92 #define sb_data(w)                                                                                                  \
93   { /* S Box data values */                                                                                         \
94     w(0x63), w(0x7c), w(0x77), w(0x7b), w(0xf2), w(0x6b), w(0x6f), w(0xc5), w(0x30), w(0x01), w(0x67), w(0x2b),     \
95         w(0xfe), w(0xd7), w(0xab), w(0x76), w(0xca), w(0x82), w(0xc9), w(0x7d), w(0xfa), w(0x59), w(0x47), w(0xf0), \
96         w(0xad), w(0xd4), w(0xa2), w(0xaf), w(0x9c), w(0xa4), w(0x72), w(0xc0), w(0xb7), w(0xfd), w(0x93), w(0x26), \
97         w(0x36), w(0x3f), w(0xf7), w(0xcc), w(0x34), w(0xa5), w(0xe5), w(0xf1), w(0x71), w(0xd8), w(0x31), w(0x15), \
98         w(0x04), w(0xc7), w(0x23), w(0xc3), w(0x18), w(0x96), w(0x05), w(0x9a), w(0x07), w(0x12), w(0x80), w(0xe2), \
99         w(0xeb), w(0x27), w(0xb2), w(0x75), w(0x09), w(0x83), w(0x2c), w(0x1a), w(0x1b), w(0x6e), w(0x5a), w(0xa0), \
100         w(0x52), w(0x3b), w(0xd6), w(0xb3), w(0x29), w(0xe3), w(0x2f), w(0x84), w(0x53), w(0xd1), w(0x00), w(0xed), \
101         w(0x20), w(0xfc), w(0xb1), w(0x5b), w(0x6a), w(0xcb), w(0xbe), w(0x39), w(0x4a), w(0x4c), w(0x58), w(0xcf), \
102         w(0xd0), w(0xef), w(0xaa), w(0xfb), w(0x43), w(0x4d), w(0x33), w(0x85), w(0x45), w(0xf9), w(0x02), w(0x7f), \
103         w(0x50), w(0x3c), w(0x9f), w(0xa8), w(0x51), w(0xa3), w(0x40), w(0x8f), w(0x92), w(0x9d), w(0x38), w(0xf5), \
104         w(0xbc), w(0xb6), w(0xda), w(0x21), w(0x10), w(0xff), w(0xf3), w(0xd2), w(0xcd), w(0x0c), w(0x13), w(0xec), \
105         w(0x5f), w(0x97), w(0x44), w(0x17), w(0xc4), w(0xa7), w(0x7e), w(0x3d), w(0x64), w(0x5d), w(0x19), w(0x73), \
106         w(0x60), w(0x81), w(0x4f), w(0xdc), w(0x22), w(0x2a), w(0x90), w(0x88), w(0x46), w(0xee), w(0xb8), w(0x14), \
107         w(0xde), w(0x5e), w(0x0b), w(0xdb), w(0xe0), w(0x32), w(0x3a), w(0x0a), w(0x49), w(0x06), w(0x24), w(0x5c), \
108         w(0xc2), w(0xd3), w(0xac), w(0x62), w(0x91), w(0x95), w(0xe4), w(0x79), w(0xe7), w(0xc8), w(0x37), w(0x6d), \
109         w(0x8d), w(0xd5), w(0x4e), w(0xa9), w(0x6c), w(0x56), w(0xf4), w(0xea), w(0x65), w(0x7a), w(0xae), w(0x08), \
110         w(0xba), w(0x78), w(0x25), w(0x2e), w(0x1c), w(0xa6), w(0xb4), w(0xc6), w(0xe8), w(0xdd), w(0x74), w(0x1f), \
111         w(0x4b), w(0xbd), w(0x8b), w(0x8a), w(0x70), w(0x3e), w(0xb5), w(0x66), w(0x48), w(0x03), w(0xf6), w(0x0e), \
112         w(0x61), w(0x35), w(0x57), w(0xb9), w(0x86), w(0xc1), w(0x1d), w(0x9e), w(0xe1), w(0xf8), w(0x98), w(0x11), \
113         w(0x69), w(0xd9), w(0x8e), w(0x94), w(0x9b), w(0x1e), w(0x87), w(0xe9), w(0xce), w(0x55), w(0x28), w(0xdf), \
114         w(0x8c), w(0xa1), w(0x89), w(0x0d), w(0xbf), w(0xe6), w(0x42), w(0x68), w(0x41), w(0x99), w(0x2d), w(0x0f), \
115         w(0xb0), w(0x54), w(0xbb), w(0x16)                                                                          \
116   }
117 
118 #define isb_data(w)                                                                                                 \
119   { /* inverse S Box data values */                                                                                 \
120     w(0x52), w(0x09), w(0x6a), w(0xd5), w(0x30), w(0x36), w(0xa5), w(0x38), w(0xbf), w(0x40), w(0xa3), w(0x9e),     \
121         w(0x81), w(0xf3), w(0xd7), w(0xfb), w(0x7c), w(0xe3), w(0x39), w(0x82), w(0x9b), w(0x2f), w(0xff), w(0x87), \
122         w(0x34), w(0x8e), w(0x43), w(0x44), w(0xc4), w(0xde), w(0xe9), w(0xcb), w(0x54), w(0x7b), w(0x94), w(0x32), \
123         w(0xa6), w(0xc2), w(0x23), w(0x3d), w(0xee), w(0x4c), w(0x95), w(0x0b), w(0x42), w(0xfa), w(0xc3), w(0x4e), \
124         w(0x08), w(0x2e), w(0xa1), w(0x66), w(0x28), w(0xd9), w(0x24), w(0xb2), w(0x76), w(0x5b), w(0xa2), w(0x49), \
125         w(0x6d), w(0x8b), w(0xd1), w(0x25), w(0x72), w(0xf8), w(0xf6), w(0x64), w(0x86), w(0x68), w(0x98), w(0x16), \
126         w(0xd4), w(0xa4), w(0x5c), w(0xcc), w(0x5d), w(0x65), w(0xb6), w(0x92), w(0x6c), w(0x70), w(0x48), w(0x50), \
127         w(0xfd), w(0xed), w(0xb9), w(0xda), w(0x5e), w(0x15), w(0x46), w(0x57), w(0xa7), w(0x8d), w(0x9d), w(0x84), \
128         w(0x90), w(0xd8), w(0xab), w(0x00), w(0x8c), w(0xbc), w(0xd3), w(0x0a), w(0xf7), w(0xe4), w(0x58), w(0x05), \
129         w(0xb8), w(0xb3), w(0x45), w(0x06), w(0xd0), w(0x2c), w(0x1e), w(0x8f), w(0xca), w(0x3f), w(0x0f), w(0x02), \
130         w(0xc1), w(0xaf), w(0xbd), w(0x03), w(0x01), w(0x13), w(0x8a), w(0x6b), w(0x3a), w(0x91), w(0x11), w(0x41), \
131         w(0x4f), w(0x67), w(0xdc), w(0xea), w(0x97), w(0xf2), w(0xcf), w(0xce), w(0xf0), w(0xb4), w(0xe6), w(0x73), \
132         w(0x96), w(0xac), w(0x74), w(0x22), w(0xe7), w(0xad), w(0x35), w(0x85), w(0xe2), w(0xf9), w(0x37), w(0xe8), \
133         w(0x1c), w(0x75), w(0xdf), w(0x6e), w(0x47), w(0xf1), w(0x1a), w(0x71), w(0x1d), w(0x29), w(0xc5), w(0x89), \
134         w(0x6f), w(0xb7), w(0x62), w(0x0e), w(0xaa), w(0x18), w(0xbe), w(0x1b), w(0xfc), w(0x56), w(0x3e), w(0x4b), \
135         w(0xc6), w(0xd2), w(0x79), w(0x20), w(0x9a), w(0xdb), w(0xc0), w(0xfe), w(0x78), w(0xcd), w(0x5a), w(0xf4), \
136         w(0x1f), w(0xdd), w(0xa8), w(0x33), w(0x88), w(0x07), w(0xc7), w(0x31), w(0xb1), w(0x12), w(0x10), w(0x59), \
137         w(0x27), w(0x80), w(0xec), w(0x5f), w(0x60), w(0x51), w(0x7f), w(0xa9), w(0x19), w(0xb5), w(0x4a), w(0x0d), \
138         w(0x2d), w(0xe5), w(0x7a), w(0x9f), w(0x93), w(0xc9), w(0x9c), w(0xef), w(0xa0), w(0xe0), w(0x3b), w(0x4d), \
139         w(0xae), w(0x2a), w(0xf5), w(0xb0), w(0xc8), w(0xeb), w(0xbb), w(0x3c), w(0x83), w(0x53), w(0x99), w(0x61), \
140         w(0x17), w(0x2b), w(0x04), w(0x7e), w(0xba), w(0x77), w(0xd6), w(0x26), w(0xe1), w(0x69), w(0x14), w(0x63), \
141         w(0x55), w(0x21), w(0x0c), w(0x7d)                                                                          \
142   }
143 
144 #define mm_data(w)                                                                                                  \
145   { /* basic data for forming finite field tables */                                                                \
146     w(0x00), w(0x01), w(0x02), w(0x03), w(0x04), w(0x05), w(0x06), w(0x07), w(0x08), w(0x09), w(0x0a), w(0x0b),     \
147         w(0x0c), w(0x0d), w(0x0e), w(0x0f), w(0x10), w(0x11), w(0x12), w(0x13), w(0x14), w(0x15), w(0x16), w(0x17), \
148         w(0x18), w(0x19), w(0x1a), w(0x1b), w(0x1c), w(0x1d), w(0x1e), w(0x1f), w(0x20), w(0x21), w(0x22), w(0x23), \
149         w(0x24), w(0x25), w(0x26), w(0x27), w(0x28), w(0x29), w(0x2a), w(0x2b), w(0x2c), w(0x2d), w(0x2e), w(0x2f), \
150         w(0x30), w(0x31), w(0x32), w(0x33), w(0x34), w(0x35), w(0x36), w(0x37), w(0x38), w(0x39), w(0x3a), w(0x3b), \
151         w(0x3c), w(0x3d), w(0x3e), w(0x3f), w(0x40), w(0x41), w(0x42), w(0x43), w(0x44), w(0x45), w(0x46), w(0x47), \
152         w(0x48), w(0x49), w(0x4a), w(0x4b), w(0x4c), w(0x4d), w(0x4e), w(0x4f), w(0x50), w(0x51), w(0x52), w(0x53), \
153         w(0x54), w(0x55), w(0x56), w(0x57), w(0x58), w(0x59), w(0x5a), w(0x5b), w(0x5c), w(0x5d), w(0x5e), w(0x5f), \
154         w(0x60), w(0x61), w(0x62), w(0x63), w(0x64), w(0x65), w(0x66), w(0x67), w(0x68), w(0x69), w(0x6a), w(0x6b), \
155         w(0x6c), w(0x6d), w(0x6e), w(0x6f), w(0x70), w(0x71), w(0x72), w(0x73), w(0x74), w(0x75), w(0x76), w(0x77), \
156         w(0x78), w(0x79), w(0x7a), w(0x7b), w(0x7c), w(0x7d), w(0x7e), w(0x7f), w(0x80), w(0x81), w(0x82), w(0x83), \
157         w(0x84), w(0x85), w(0x86), w(0x87), w(0x88), w(0x89), w(0x8a), w(0x8b), w(0x8c), w(0x8d), w(0x8e), w(0x8f), \
158         w(0x90), w(0x91), w(0x92), w(0x93), w(0x94), w(0x95), w(0x96), w(0x97), w(0x98), w(0x99), w(0x9a), w(0x9b), \
159         w(0x9c), w(0x9d), w(0x9e), w(0x9f), w(0xa0), w(0xa1), w(0xa2), w(0xa3), w(0xa4), w(0xa5), w(0xa6), w(0xa7), \
160         w(0xa8), w(0xa9), w(0xaa), w(0xab), w(0xac), w(0xad), w(0xae), w(0xaf), w(0xb0), w(0xb1), w(0xb2), w(0xb3), \
161         w(0xb4), w(0xb5), w(0xb6), w(0xb7), w(0xb8), w(0xb9), w(0xba), w(0xbb), w(0xbc), w(0xbd), w(0xbe), w(0xbf), \
162         w(0xc0), w(0xc1), w(0xc2), w(0xc3), w(0xc4), w(0xc5), w(0xc6), w(0xc7), w(0xc8), w(0xc9), w(0xca), w(0xcb), \
163         w(0xcc), w(0xcd), w(0xce), w(0xcf), w(0xd0), w(0xd1), w(0xd2), w(0xd3), w(0xd4), w(0xd5), w(0xd6), w(0xd7), \
164         w(0xd8), w(0xd9), w(0xda), w(0xdb), w(0xdc), w(0xdd), w(0xde), w(0xdf), w(0xe0), w(0xe1), w(0xe2), w(0xe3), \
165         w(0xe4), w(0xe5), w(0xe6), w(0xe7), w(0xe8), w(0xe9), w(0xea), w(0xeb), w(0xec), w(0xed), w(0xee), w(0xef), \
166         w(0xf0), w(0xf1), w(0xf2), w(0xf3), w(0xf4), w(0xf5), w(0xf6), w(0xf7), w(0xf8), w(0xf9), w(0xfa), w(0xfb), \
167         w(0xfc), w(0xfd), w(0xfe), w(0xff)                                                                          \
168   }
169 
170 static const uint_8t sbox[256] = sb_data(f1);
171 static const uint_8t isbox[256] = isb_data(f1);
172 
173 static const uint_8t gfm2_sbox[256] = sb_data(f2);
174 static const uint_8t gfm3_sbox[256] = sb_data(f3);
175 
176 static const uint_8t gfmul_9[256] = mm_data(f9);
177 static const uint_8t gfmul_b[256] = mm_data(fb);
178 static const uint_8t gfmul_d[256] = mm_data(fd);
179 static const uint_8t gfmul_e[256] = mm_data(fe);
180 
181 #define s_box(x) sbox[(x)]
182 #define is_box(x) isbox[(x)]
183 #define gfm2_sb(x) gfm2_sbox[(x)]
184 #define gfm3_sb(x) gfm3_sbox[(x)]
185 #define gfm_9(x) gfmul_9[(x)]
186 #define gfm_b(x) gfmul_b[(x)]
187 #define gfm_d(x) gfmul_d[(x)]
188 #define gfm_e(x) gfmul_e[(x)]
189 
190 #else
191 
192 /* this is the high bit of x right shifted by 1 */
193 /* position. Since the starting polynomial has  */
194 /* 9 bits (0x11b), this right shift keeps the   */
195 /* values of all top bits within a byte         */
196 
hibit(const uint_8t x)197 static uint_8t hibit(const uint_8t x) {
198   uint_8t r = (uint_8t)((x >> 1) | (x >> 2));
199 
200   r |= (r >> 2);
201   r |= (r >> 4);
202   return (r + 1) >> 1;
203 }
204 
205 /* return the inverse of the finite field element x */
206 
gf_inv(const uint_8t x)207 static uint_8t gf_inv(const uint_8t x) {
208   uint_8t p1 = x, p2 = BPOLY, n1 = hibit(x), n2 = 0x80, v1 = 1, v2 = 0;
209 
210   if (x < 2) return x;
211 
212   for (;;) {
213     if (n1)
214       while (n2 >= n1) /* divide polynomial p2 by p1    */
215       {
216         n2 /= n1;               /* shift smaller polynomial left */
217         p2 ^= (p1 * n2) & 0xff; /* and remove from larger one    */
218         v2 ^= (v1 * n2);        /* shift accumulated value and   */
219         n2 = hibit(p2);         /* add into result               */
220       }
221     else
222       return v1;
223 
224     if (n2) /* repeat with values swapped    */
225       while (n1 >= n2) {
226         n1 /= n2;
227         p1 ^= p2 * n1;
228         v1 ^= v2 * n1;
229         n1 = hibit(p1);
230       }
231     else
232       return v2;
233   }
234 }
235 
236 /* The forward and inverse affine transformations used in the S-box */
fwd_affine(const uint_8t x)237 uint_8t fwd_affine(const uint_8t x) {
238 #if defined(HAVE_UINT_32T)
239   uint_32t w = x;
240   w ^= (w << 1) ^ (w << 2) ^ (w << 3) ^ (w << 4);
241   return 0x63 ^ ((w ^ (w >> 8)) & 0xff);
242 #else
243   return 0x63 ^ x ^ (x << 1) ^ (x << 2) ^ (x << 3) ^ (x << 4) ^ (x >> 7) ^ (x >> 6) ^ (x >> 5) ^ (x >> 4);
244 #endif
245 }
246 
inv_affine(const uint_8t x)247 uint_8t inv_affine(const uint_8t x) {
248 #if defined(HAVE_UINT_32T)
249   uint_32t w = x;
250   w = (w << 1) ^ (w << 3) ^ (w << 6);
251   return 0x05 ^ ((w ^ (w >> 8)) & 0xff);
252 #else
253   return 0x05 ^ (x << 1) ^ (x << 3) ^ (x << 6) ^ (x >> 7) ^ (x >> 5) ^ (x >> 2);
254 #endif
255 }
256 
257 #define s_box(x) fwd_affine(gf_inv(x))
258 #define is_box(x) gf_inv(inv_affine(x))
259 #define gfm2_sb(x) f2(s_box(x))
260 #define gfm3_sb(x) f3(s_box(x))
261 #define gfm_9(x) f9(x)
262 #define gfm_b(x) fb(x)
263 #define gfm_d(x) fd(x)
264 #define gfm_e(x) fe(x)
265 
266 #endif
267 
268 #if defined(HAVE_MEMCPY)
269 #define block_copy_nn(d, s, l) memcpy(d, s, l)
270 #define block_copy(d, s) memcpy(d, s, N_BLOCK)
271 #else
272 #define block_copy_nn(d, s, l) copy_block_nn(d, s, l)
273 #define block_copy(d, s) copy_block(d, s)
274 #endif
275 
276 #if !defined(HAVE_MEMCPY)
copy_block(void * d,const void * s)277 static void copy_block(void* d, const void* s) {
278 #if defined(HAVE_UINT_32T)
279   ((uint_32t*)d)[0] = ((uint_32t*)s)[0];
280   ((uint_32t*)d)[1] = ((uint_32t*)s)[1];
281   ((uint_32t*)d)[2] = ((uint_32t*)s)[2];
282   ((uint_32t*)d)[3] = ((uint_32t*)s)[3];
283 #else
284   ((uint_8t*)d)[0] = ((uint_8t*)s)[0];
285   ((uint_8t*)d)[1] = ((uint_8t*)s)[1];
286   ((uint_8t*)d)[2] = ((uint_8t*)s)[2];
287   ((uint_8t*)d)[3] = ((uint_8t*)s)[3];
288   ((uint_8t*)d)[4] = ((uint_8t*)s)[4];
289   ((uint_8t*)d)[5] = ((uint_8t*)s)[5];
290   ((uint_8t*)d)[6] = ((uint_8t*)s)[6];
291   ((uint_8t*)d)[7] = ((uint_8t*)s)[7];
292   ((uint_8t*)d)[8] = ((uint_8t*)s)[8];
293   ((uint_8t*)d)[9] = ((uint_8t*)s)[9];
294   ((uint_8t*)d)[10] = ((uint_8t*)s)[10];
295   ((uint_8t*)d)[11] = ((uint_8t*)s)[11];
296   ((uint_8t*)d)[12] = ((uint_8t*)s)[12];
297   ((uint_8t*)d)[13] = ((uint_8t*)s)[13];
298   ((uint_8t*)d)[14] = ((uint_8t*)s)[14];
299   ((uint_8t*)d)[15] = ((uint_8t*)s)[15];
300 #endif
301 }
302 
copy_block_nn(void * d,const void * s,uint_8t nn)303 static void copy_block_nn(void* d, const void* s, uint_8t nn) {
304   while (nn--) *((uint_8t*)d)++ = *((uint_8t*)s)++;
305 }
306 #endif
307 
xor_block(void * d,const void * s)308 static void xor_block(void* d, const void* s) {
309 #if defined(HAVE_UINT_32T)
310   ((uint_32t*)d)[0] ^= ((uint_32t*)s)[0];
311   ((uint_32t*)d)[1] ^= ((uint_32t*)s)[1];
312   ((uint_32t*)d)[2] ^= ((uint_32t*)s)[2];
313   ((uint_32t*)d)[3] ^= ((uint_32t*)s)[3];
314 #else
315   ((uint_8t*)d)[0] ^= ((uint_8t*)s)[0];
316   ((uint_8t*)d)[1] ^= ((uint_8t*)s)[1];
317   ((uint_8t*)d)[2] ^= ((uint_8t*)s)[2];
318   ((uint_8t*)d)[3] ^= ((uint_8t*)s)[3];
319   ((uint_8t*)d)[4] ^= ((uint_8t*)s)[4];
320   ((uint_8t*)d)[5] ^= ((uint_8t*)s)[5];
321   ((uint_8t*)d)[6] ^= ((uint_8t*)s)[6];
322   ((uint_8t*)d)[7] ^= ((uint_8t*)s)[7];
323   ((uint_8t*)d)[8] ^= ((uint_8t*)s)[8];
324   ((uint_8t*)d)[9] ^= ((uint_8t*)s)[9];
325   ((uint_8t*)d)[10] ^= ((uint_8t*)s)[10];
326   ((uint_8t*)d)[11] ^= ((uint_8t*)s)[11];
327   ((uint_8t*)d)[12] ^= ((uint_8t*)s)[12];
328   ((uint_8t*)d)[13] ^= ((uint_8t*)s)[13];
329   ((uint_8t*)d)[14] ^= ((uint_8t*)s)[14];
330   ((uint_8t*)d)[15] ^= ((uint_8t*)s)[15];
331 #endif
332 }
333 
copy_and_key(void * d,const void * s,const void * k)334 static void copy_and_key(void* d, const void* s, const void* k) {
335 #if defined(HAVE_UINT_32T)
336   ((uint_32t*)d)[0] = ((uint_32t*)s)[0] ^ ((uint_32t*)k)[0];
337   ((uint_32t*)d)[1] = ((uint_32t*)s)[1] ^ ((uint_32t*)k)[1];
338   ((uint_32t*)d)[2] = ((uint_32t*)s)[2] ^ ((uint_32t*)k)[2];
339   ((uint_32t*)d)[3] = ((uint_32t*)s)[3] ^ ((uint_32t*)k)[3];
340 #elif 1
341   ((uint_8t*)d)[0] = ((uint_8t*)s)[0] ^ ((uint_8t*)k)[0];
342   ((uint_8t*)d)[1] = ((uint_8t*)s)[1] ^ ((uint_8t*)k)[1];
343   ((uint_8t*)d)[2] = ((uint_8t*)s)[2] ^ ((uint_8t*)k)[2];
344   ((uint_8t*)d)[3] = ((uint_8t*)s)[3] ^ ((uint_8t*)k)[3];
345   ((uint_8t*)d)[4] = ((uint_8t*)s)[4] ^ ((uint_8t*)k)[4];
346   ((uint_8t*)d)[5] = ((uint_8t*)s)[5] ^ ((uint_8t*)k)[5];
347   ((uint_8t*)d)[6] = ((uint_8t*)s)[6] ^ ((uint_8t*)k)[6];
348   ((uint_8t*)d)[7] = ((uint_8t*)s)[7] ^ ((uint_8t*)k)[7];
349   ((uint_8t*)d)[8] = ((uint_8t*)s)[8] ^ ((uint_8t*)k)[8];
350   ((uint_8t*)d)[9] = ((uint_8t*)s)[9] ^ ((uint_8t*)k)[9];
351   ((uint_8t*)d)[10] = ((uint_8t*)s)[10] ^ ((uint_8t*)k)[10];
352   ((uint_8t*)d)[11] = ((uint_8t*)s)[11] ^ ((uint_8t*)k)[11];
353   ((uint_8t*)d)[12] = ((uint_8t*)s)[12] ^ ((uint_8t*)k)[12];
354   ((uint_8t*)d)[13] = ((uint_8t*)s)[13] ^ ((uint_8t*)k)[13];
355   ((uint_8t*)d)[14] = ((uint_8t*)s)[14] ^ ((uint_8t*)k)[14];
356   ((uint_8t*)d)[15] = ((uint_8t*)s)[15] ^ ((uint_8t*)k)[15];
357 #else
358   block_copy(d, s);
359   xor_block(d, k);
360 #endif
361 }
362 
add_round_key(uint_8t d[N_BLOCK],const uint_8t k[N_BLOCK])363 static void add_round_key(uint_8t d[N_BLOCK], const uint_8t k[N_BLOCK]) {
364   xor_block(d, k);
365 }
366 
shift_sub_rows(uint_8t st[N_BLOCK])367 static void shift_sub_rows(uint_8t st[N_BLOCK]) {
368   uint_8t tt;
369 
370   st[0] = s_box(st[0]);
371   st[4] = s_box(st[4]);
372   st[8] = s_box(st[8]);
373   st[12] = s_box(st[12]);
374 
375   tt = st[1];
376   st[1] = s_box(st[5]);
377   st[5] = s_box(st[9]);
378   st[9] = s_box(st[13]);
379   st[13] = s_box(tt);
380 
381   tt = st[2];
382   st[2] = s_box(st[10]);
383   st[10] = s_box(tt);
384   tt = st[6];
385   st[6] = s_box(st[14]);
386   st[14] = s_box(tt);
387 
388   tt = st[15];
389   st[15] = s_box(st[11]);
390   st[11] = s_box(st[7]);
391   st[7] = s_box(st[3]);
392   st[3] = s_box(tt);
393 }
394 
inv_shift_sub_rows(uint_8t st[N_BLOCK])395 static void inv_shift_sub_rows(uint_8t st[N_BLOCK]) {
396   uint_8t tt;
397 
398   st[0] = is_box(st[0]);
399   st[4] = is_box(st[4]);
400   st[8] = is_box(st[8]);
401   st[12] = is_box(st[12]);
402 
403   tt = st[13];
404   st[13] = is_box(st[9]);
405   st[9] = is_box(st[5]);
406   st[5] = is_box(st[1]);
407   st[1] = is_box(tt);
408 
409   tt = st[2];
410   st[2] = is_box(st[10]);
411   st[10] = is_box(tt);
412   tt = st[6];
413   st[6] = is_box(st[14]);
414   st[14] = is_box(tt);
415 
416   tt = st[3];
417   st[3] = is_box(st[7]);
418   st[7] = is_box(st[11]);
419   st[11] = is_box(st[15]);
420   st[15] = is_box(tt);
421 }
422 
423 #if defined(VERSION_1)
mix_sub_columns(uint_8t dt[N_BLOCK])424 static void mix_sub_columns(uint_8t dt[N_BLOCK]) {
425   uint_8t st[N_BLOCK];
426   block_copy(st, dt);
427 #else
428 static void mix_sub_columns(uint_8t dt[N_BLOCK], uint_8t st[N_BLOCK]) {
429 #endif
430   dt[0] = gfm2_sb(st[0]) ^ gfm3_sb(st[5]) ^ s_box(st[10]) ^ s_box(st[15]);
431   dt[1] = s_box(st[0]) ^ gfm2_sb(st[5]) ^ gfm3_sb(st[10]) ^ s_box(st[15]);
432   dt[2] = s_box(st[0]) ^ s_box(st[5]) ^ gfm2_sb(st[10]) ^ gfm3_sb(st[15]);
433   dt[3] = gfm3_sb(st[0]) ^ s_box(st[5]) ^ s_box(st[10]) ^ gfm2_sb(st[15]);
434 
435   dt[4] = gfm2_sb(st[4]) ^ gfm3_sb(st[9]) ^ s_box(st[14]) ^ s_box(st[3]);
436   dt[5] = s_box(st[4]) ^ gfm2_sb(st[9]) ^ gfm3_sb(st[14]) ^ s_box(st[3]);
437   dt[6] = s_box(st[4]) ^ s_box(st[9]) ^ gfm2_sb(st[14]) ^ gfm3_sb(st[3]);
438   dt[7] = gfm3_sb(st[4]) ^ s_box(st[9]) ^ s_box(st[14]) ^ gfm2_sb(st[3]);
439 
440   dt[8] = gfm2_sb(st[8]) ^ gfm3_sb(st[13]) ^ s_box(st[2]) ^ s_box(st[7]);
441   dt[9] = s_box(st[8]) ^ gfm2_sb(st[13]) ^ gfm3_sb(st[2]) ^ s_box(st[7]);
442   dt[10] = s_box(st[8]) ^ s_box(st[13]) ^ gfm2_sb(st[2]) ^ gfm3_sb(st[7]);
443   dt[11] = gfm3_sb(st[8]) ^ s_box(st[13]) ^ s_box(st[2]) ^ gfm2_sb(st[7]);
444 
445   dt[12] = gfm2_sb(st[12]) ^ gfm3_sb(st[1]) ^ s_box(st[6]) ^ s_box(st[11]);
446   dt[13] = s_box(st[12]) ^ gfm2_sb(st[1]) ^ gfm3_sb(st[6]) ^ s_box(st[11]);
447   dt[14] = s_box(st[12]) ^ s_box(st[1]) ^ gfm2_sb(st[6]) ^ gfm3_sb(st[11]);
448   dt[15] = gfm3_sb(st[12]) ^ s_box(st[1]) ^ s_box(st[6]) ^ gfm2_sb(st[11]);
449 }
450 
451 #if defined(VERSION_1)
452 static void inv_mix_sub_columns(uint_8t dt[N_BLOCK]) {
453   uint_8t st[N_BLOCK];
454   block_copy(st, dt);
455 #else
456 static void inv_mix_sub_columns(uint_8t dt[N_BLOCK], uint_8t st[N_BLOCK]) {
457 #endif
458   dt[0] = is_box(gfm_e(st[0]) ^ gfm_b(st[1]) ^ gfm_d(st[2]) ^ gfm_9(st[3]));
459   dt[5] = is_box(gfm_9(st[0]) ^ gfm_e(st[1]) ^ gfm_b(st[2]) ^ gfm_d(st[3]));
460   dt[10] = is_box(gfm_d(st[0]) ^ gfm_9(st[1]) ^ gfm_e(st[2]) ^ gfm_b(st[3]));
461   dt[15] = is_box(gfm_b(st[0]) ^ gfm_d(st[1]) ^ gfm_9(st[2]) ^ gfm_e(st[3]));
462 
463   dt[4] = is_box(gfm_e(st[4]) ^ gfm_b(st[5]) ^ gfm_d(st[6]) ^ gfm_9(st[7]));
464   dt[9] = is_box(gfm_9(st[4]) ^ gfm_e(st[5]) ^ gfm_b(st[6]) ^ gfm_d(st[7]));
465   dt[14] = is_box(gfm_d(st[4]) ^ gfm_9(st[5]) ^ gfm_e(st[6]) ^ gfm_b(st[7]));
466   dt[3] = is_box(gfm_b(st[4]) ^ gfm_d(st[5]) ^ gfm_9(st[6]) ^ gfm_e(st[7]));
467 
468   dt[8] = is_box(gfm_e(st[8]) ^ gfm_b(st[9]) ^ gfm_d(st[10]) ^ gfm_9(st[11]));
469   dt[13] = is_box(gfm_9(st[8]) ^ gfm_e(st[9]) ^ gfm_b(st[10]) ^ gfm_d(st[11]));
470   dt[2] = is_box(gfm_d(st[8]) ^ gfm_9(st[9]) ^ gfm_e(st[10]) ^ gfm_b(st[11]));
471   dt[7] = is_box(gfm_b(st[8]) ^ gfm_d(st[9]) ^ gfm_9(st[10]) ^ gfm_e(st[11]));
472 
473   dt[12] = is_box(gfm_e(st[12]) ^ gfm_b(st[13]) ^ gfm_d(st[14]) ^ gfm_9(st[15]));
474   dt[1] = is_box(gfm_9(st[12]) ^ gfm_e(st[13]) ^ gfm_b(st[14]) ^ gfm_d(st[15]));
475   dt[6] = is_box(gfm_d(st[12]) ^ gfm_9(st[13]) ^ gfm_e(st[14]) ^ gfm_b(st[15]));
476   dt[11] = is_box(gfm_b(st[12]) ^ gfm_d(st[13]) ^ gfm_9(st[14]) ^ gfm_e(st[15]));
477 }
478 
479 #if defined(AES_ENC_PREKEYED) || defined(AES_DEC_PREKEYED)
480 
481 /*  Set the cipher key for the pre-keyed version */
482 /*  NOTE: If the length_type used for the key length is an
483     unsigned 8-bit character, a key length of 256 bits must
484     be entered as a length in bytes (valid inputs are hence
485     128, 192, 16, 24 and 32).
486 */
487 
488 return_type aes_set_key(const unsigned char key[], length_type keylen, aes_context ctx[1]) {
489   uint_8t cc, rc, hi;
490 
491   switch (keylen) {
492     case 16:
493     case 128: /* length in bits (128 = 8*16) */
494       keylen = 16;
495       break;
496     case 24:
497     case 192: /* length in bits (192 = 8*24) */
498       keylen = 24;
499       break;
500     case 32:
501       /*    case 256:           length in bits (256 = 8*32) */
502       keylen = 32;
503       break;
504     default:
505       ctx->rnd = 0;
506       return (return_type)-1;
507   }
508   block_copy_nn(ctx->ksch, key, keylen);
509   hi = (keylen + 28) << 2;
510   ctx->rnd = (hi >> 4) - 1;
511   for (cc = keylen, rc = 1; cc < hi; cc += 4) {
512     uint_8t tt, t0, t1, t2, t3;
513 
514     t0 = ctx->ksch[cc - 4];
515     t1 = ctx->ksch[cc - 3];
516     t2 = ctx->ksch[cc - 2];
517     t3 = ctx->ksch[cc - 1];
518     if (cc % keylen == 0) {
519       tt = t0;
520       t0 = s_box(t1) ^ rc;
521       t1 = s_box(t2);
522       t2 = s_box(t3);
523       t3 = s_box(tt);
524       rc = f2(rc);
525     } else if (keylen > 24 && cc % keylen == 16) {
526       t0 = s_box(t0);
527       t1 = s_box(t1);
528       t2 = s_box(t2);
529       t3 = s_box(t3);
530     }
531     tt = cc - keylen;
532     ctx->ksch[cc + 0] = ctx->ksch[tt + 0] ^ t0;
533     ctx->ksch[cc + 1] = ctx->ksch[tt + 1] ^ t1;
534     ctx->ksch[cc + 2] = ctx->ksch[tt + 2] ^ t2;
535     ctx->ksch[cc + 3] = ctx->ksch[tt + 3] ^ t3;
536   }
537   return 0;
538 }
539 
540 #endif
541 
542 #if defined(AES_ENC_PREKEYED)
543 
544 /*  Encrypt a single block of 16 bytes */
545 
546 return_type aes_encrypt(const unsigned char in[N_BLOCK], unsigned char out[N_BLOCK], const aes_context ctx[1]) {
547   if (ctx->rnd) {
548     uint_8t s1[N_BLOCK], r;
549     copy_and_key(s1, in, ctx->ksch);
550 
551     for (r = 1; r < ctx->rnd; ++r)
552 #if defined(VERSION_1)
553     {
554       mix_sub_columns(s1);
555       add_round_key(s1, ctx->ksch + r * N_BLOCK);
556     }
557 #else
558     {
559       uint_8t s2[N_BLOCK];
560       mix_sub_columns(s2, s1);
561       copy_and_key(s1, s2, ctx->ksch + r * N_BLOCK);
562     }
563 #endif
564     shift_sub_rows(s1);
565     copy_and_key(out, s1, ctx->ksch + r * N_BLOCK);
566   } else
567     return (return_type)-1;
568   return 0;
569 }
570 
571 /* CBC encrypt a number of blocks (input and return an IV) */
572 
573 return_type aes_cbc_encrypt(
574     const unsigned char* in, unsigned char* out, int n_block, unsigned char iv[N_BLOCK], const aes_context ctx[1]) {
575   while (n_block--) {
576     xor_block(iv, in);
577     if (aes_encrypt(iv, iv, ctx) != EXIT_SUCCESS) return EXIT_FAILURE;
578     memcpy(out, iv, N_BLOCK);
579     in += N_BLOCK;
580     out += N_BLOCK;
581   }
582   return EXIT_SUCCESS;
583 }
584 
585 #endif
586 
587 #if defined(AES_DEC_PREKEYED)
588 
589 /*  Decrypt a single block of 16 bytes */
590 
591 return_type aes_decrypt(const unsigned char in[N_BLOCK], unsigned char out[N_BLOCK], const aes_context ctx[1]) {
592   if (ctx->rnd) {
593     uint_8t s1[N_BLOCK], r;
594     copy_and_key(s1, in, ctx->ksch + ctx->rnd * N_BLOCK);
595     inv_shift_sub_rows(s1);
596 
597     for (r = ctx->rnd; --r;)
598 #if defined(VERSION_1)
599     {
600       add_round_key(s1, ctx->ksch + r * N_BLOCK);
601       inv_mix_sub_columns(s1);
602     }
603 #else
604     {
605       uint_8t s2[N_BLOCK];
606       copy_and_key(s2, s1, ctx->ksch + r * N_BLOCK);
607       inv_mix_sub_columns(s1, s2);
608     }
609 #endif
610     copy_and_key(out, s1, ctx->ksch);
611   } else
612     return (return_type)-1;
613   return 0;
614 }
615 
616 /* CBC decrypt a number of blocks (input and return an IV) */
617 
618 return_type aes_cbc_decrypt(
619     const unsigned char* in, unsigned char* out, int n_block, unsigned char iv[N_BLOCK], const aes_context ctx[1]) {
620   while (n_block--) {
621     uint_8t tmp[N_BLOCK];
622 
623     memcpy(tmp, in, N_BLOCK);
624     if (aes_decrypt(in, out, ctx) != EXIT_SUCCESS) return EXIT_FAILURE;
625     xor_block(out, iv);
626     memcpy(iv, tmp, N_BLOCK);
627     in += N_BLOCK;
628     out += N_BLOCK;
629   }
630   return EXIT_SUCCESS;
631 }
632 
633 #endif
634 
635 #if defined(AES_ENC_128_OTFK)
636 
637 /*  The 'on the fly' encryption key update for for 128 bit keys */
638 
639 static void update_encrypt_key_128(uint_8t k[N_BLOCK], uint_8t* rc) {
640   uint_8t cc;
641 
642   k[0] ^= s_box(k[13]) ^ *rc;
643   k[1] ^= s_box(k[14]);
644   k[2] ^= s_box(k[15]);
645   k[3] ^= s_box(k[12]);
646   *rc = f2(*rc);
647 
648   for (cc = 4; cc < 16; cc += 4) {
649     k[cc + 0] ^= k[cc - 4];
650     k[cc + 1] ^= k[cc - 3];
651     k[cc + 2] ^= k[cc - 2];
652     k[cc + 3] ^= k[cc - 1];
653   }
654 }
655 
656 /*  Encrypt a single block of 16 bytes with 'on the fly' 128 bit keying */
657 
658 void aes_encrypt_128(
659     const unsigned char in[N_BLOCK],
660     unsigned char out[N_BLOCK],
661     const unsigned char key[N_BLOCK],
662     unsigned char o_key[N_BLOCK]) {
663   uint_8t s1[N_BLOCK], r, rc = 1;
664 
665   if (o_key != key) block_copy(o_key, key);
666   copy_and_key(s1, in, o_key);
667 
668   for (r = 1; r < 10; ++r)
669 #if defined(VERSION_1)
670   {
671     mix_sub_columns(s1);
672     update_encrypt_key_128(o_key, &rc);
673     add_round_key(s1, o_key);
674   }
675 #else
676   {
677     uint_8t s2[N_BLOCK];
678     mix_sub_columns(s2, s1);
679     update_encrypt_key_128(o_key, &rc);
680     copy_and_key(s1, s2, o_key);
681   }
682 #endif
683 
684   shift_sub_rows(s1);
685   update_encrypt_key_128(o_key, &rc);
686   copy_and_key(out, s1, o_key);
687 }
688 
689 #endif
690 
691 #if defined(AES_DEC_128_OTFK)
692 
693 /*  The 'on the fly' decryption key update for for 128 bit keys */
694 
695 static void update_decrypt_key_128(uint_8t k[N_BLOCK], uint_8t* rc) {
696   uint_8t cc;
697 
698   for (cc = 12; cc > 0; cc -= 4) {
699     k[cc + 0] ^= k[cc - 4];
700     k[cc + 1] ^= k[cc - 3];
701     k[cc + 2] ^= k[cc - 2];
702     k[cc + 3] ^= k[cc - 1];
703   }
704   *rc = d2(*rc);
705   k[0] ^= s_box(k[13]) ^ *rc;
706   k[1] ^= s_box(k[14]);
707   k[2] ^= s_box(k[15]);
708   k[3] ^= s_box(k[12]);
709 }
710 
711 /*  Decrypt a single block of 16 bytes with 'on the fly' 128 bit keying */
712 
713 void aes_decrypt_128(
714     const unsigned char in[N_BLOCK],
715     unsigned char out[N_BLOCK],
716     const unsigned char key[N_BLOCK],
717     unsigned char o_key[N_BLOCK]) {
718   uint_8t s1[N_BLOCK], r, rc = 0x6c;
719   if (o_key != key) block_copy(o_key, key);
720 
721   copy_and_key(s1, in, o_key);
722   inv_shift_sub_rows(s1);
723 
724   for (r = 10; --r;)
725 #if defined(VERSION_1)
726   {
727     update_decrypt_key_128(o_key, &rc);
728     add_round_key(s1, o_key);
729     inv_mix_sub_columns(s1);
730   }
731 #else
732   {
733     uint_8t s2[N_BLOCK];
734     update_decrypt_key_128(o_key, &rc);
735     copy_and_key(s2, s1, o_key);
736     inv_mix_sub_columns(s1, s2);
737   }
738 #endif
739   update_decrypt_key_128(o_key, &rc);
740   copy_and_key(out, s1, o_key);
741 }
742 
743 #endif
744 
745 #if defined(AES_ENC_256_OTFK)
746 
747 /*  The 'on the fly' encryption key update for for 256 bit keys */
748 
749 static void update_encrypt_key_256(uint_8t k[2 * N_BLOCK], uint_8t* rc) {
750   uint_8t cc;
751 
752   k[0] ^= s_box(k[29]) ^ *rc;
753   k[1] ^= s_box(k[30]);
754   k[2] ^= s_box(k[31]);
755   k[3] ^= s_box(k[28]);
756   *rc = f2(*rc);
757 
758   for (cc = 4; cc < 16; cc += 4) {
759     k[cc + 0] ^= k[cc - 4];
760     k[cc + 1] ^= k[cc - 3];
761     k[cc + 2] ^= k[cc - 2];
762     k[cc + 3] ^= k[cc - 1];
763   }
764 
765   k[16] ^= s_box(k[12]);
766   k[17] ^= s_box(k[13]);
767   k[18] ^= s_box(k[14]);
768   k[19] ^= s_box(k[15]);
769 
770   for (cc = 20; cc < 32; cc += 4) {
771     k[cc + 0] ^= k[cc - 4];
772     k[cc + 1] ^= k[cc - 3];
773     k[cc + 2] ^= k[cc - 2];
774     k[cc + 3] ^= k[cc - 1];
775   }
776 }
777 
778 /*  Encrypt a single block of 16 bytes with 'on the fly' 256 bit keying */
779 
780 void aes_encrypt_256(
781     const unsigned char in[N_BLOCK],
782     unsigned char out[N_BLOCK],
783     const unsigned char key[2 * N_BLOCK],
784     unsigned char o_key[2 * N_BLOCK]) {
785   uint_8t s1[N_BLOCK], r, rc = 1;
786   if (o_key != key) {
787     block_copy(o_key, key);
788     block_copy(o_key + 16, key + 16);
789   }
790   copy_and_key(s1, in, o_key);
791 
792   for (r = 1; r < 14; ++r)
793 #if defined(VERSION_1)
794   {
795     mix_sub_columns(s1);
796     if (r & 1)
797       add_round_key(s1, o_key + 16);
798     else {
799       update_encrypt_key_256(o_key, &rc);
800       add_round_key(s1, o_key);
801     }
802   }
803 #else
804   {
805     uint_8t s2[N_BLOCK];
806     mix_sub_columns(s2, s1);
807     if (r & 1)
808       copy_and_key(s1, s2, o_key + 16);
809     else {
810       update_encrypt_key_256(o_key, &rc);
811       copy_and_key(s1, s2, o_key);
812     }
813   }
814 #endif
815 
816   shift_sub_rows(s1);
817   update_encrypt_key_256(o_key, &rc);
818   copy_and_key(out, s1, o_key);
819 }
820 
821 #endif
822 
823 #if defined(AES_DEC_256_OTFK)
824 
825 /*  The 'on the fly' encryption key update for for 256 bit keys */
826 
827 static void update_decrypt_key_256(uint_8t k[2 * N_BLOCK], uint_8t* rc) {
828   uint_8t cc;
829 
830   for (cc = 28; cc > 16; cc -= 4) {
831     k[cc + 0] ^= k[cc - 4];
832     k[cc + 1] ^= k[cc - 3];
833     k[cc + 2] ^= k[cc - 2];
834     k[cc + 3] ^= k[cc - 1];
835   }
836 
837   k[16] ^= s_box(k[12]);
838   k[17] ^= s_box(k[13]);
839   k[18] ^= s_box(k[14]);
840   k[19] ^= s_box(k[15]);
841 
842   for (cc = 12; cc > 0; cc -= 4) {
843     k[cc + 0] ^= k[cc - 4];
844     k[cc + 1] ^= k[cc - 3];
845     k[cc + 2] ^= k[cc - 2];
846     k[cc + 3] ^= k[cc - 1];
847   }
848 
849   *rc = d2(*rc);
850   k[0] ^= s_box(k[29]) ^ *rc;
851   k[1] ^= s_box(k[30]);
852   k[2] ^= s_box(k[31]);
853   k[3] ^= s_box(k[28]);
854 }
855 
856 /*  Decrypt a single block of 16 bytes with 'on the fly'
857     256 bit keying
858 */
859 void aes_decrypt_256(
860     const unsigned char in[N_BLOCK],
861     unsigned char out[N_BLOCK],
862     const unsigned char key[2 * N_BLOCK],
863     unsigned char o_key[2 * N_BLOCK]) {
864   uint_8t s1[N_BLOCK], r, rc = 0x80;
865 
866   if (o_key != key) {
867     block_copy(o_key, key);
868     block_copy(o_key + 16, key + 16);
869   }
870 
871   copy_and_key(s1, in, o_key);
872   inv_shift_sub_rows(s1);
873 
874   for (r = 14; --r;)
875 #if defined(VERSION_1)
876   {
877     if ((r & 1)) {
878       update_decrypt_key_256(o_key, &rc);
879       add_round_key(s1, o_key + 16);
880     } else
881       add_round_key(s1, o_key);
882     inv_mix_sub_columns(s1);
883   }
884 #else
885   {
886     uint_8t s2[N_BLOCK];
887     if ((r & 1)) {
888       update_decrypt_key_256(o_key, &rc);
889       copy_and_key(s2, s1, o_key + 16);
890     } else
891       copy_and_key(s2, s1, o_key);
892     inv_mix_sub_columns(s1, s2);
893   }
894 #endif
895   copy_and_key(out, s1, o_key);
896 }
897 
898 #endif
899