-
Notifications
You must be signed in to change notification settings - Fork 106
/
Copy pathaes_dec-asm_faster.S
457 lines (407 loc) · 9.2 KB
/
aes_dec-asm_faster.S
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
/* aes_dec-asm.S */
/*
This file is part of the AVR-Crypto-Lib.
Copyright (C) 2008, 2009 Daniel Otte (daniel.otte@rub.de)
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
/**
* \file aes_dec-asm.S
* \email daniel.otte@rub.de
* \author Daniel Otte
* \date 2009-01-10
* \license GPLv3 or later
*
*/
#include "avr-asm-macros.S"
A = 28
B = 29
P = 0
xREDUCER = 25
.global aes256_dec
aes256_dec:
ldi r20, 14
rjmp aes_decrypt_core
.global aes192_dec
aes192_dec:
ldi r20, 12
rjmp aes_decrypt_core
.global aes128_dec
aes128_dec:
ldi r20, 10
/*
void aes_decrypt_core(aes_cipher_state_t* state, const aes_genctx_t* ks, uint8_t rounds)
*/
T0= 2
T1= 3
T2= 4
T3= 5
T4 = 6
T5 = 7
ST00 = 8
ST01 = 9
ST02 = 10
ST03 = 11
ST10 = 12
ST11 = 13
ST12 = 14
ST13 = 15
ST20 = 16
ST21 = 17
ST22 = 18
ST23 = 19
ST30 = 20
ST31 = 21
ST32 = 22
ST33 = 23
CTR = 24
/*
* param state: r24:r25
* param ks: r22:r23
* param rounds: r20
*/
.global aes_decrypt_core
aes_decrypt_core:
push_range 2, 17
push r28
push r29
push r24
push r25
movw r26, r22
movw r30, r24
mov CTR, r20
inc r20
swap r20 /* r20*16 */
add r26, r20
adc r27, r1
clt
.irp param, ST00, ST01, ST02, ST03, ST10, ST11, ST12, ST13, ST20, ST21, ST22, ST23, ST30, ST31, ST32, ST33
ld \param, Z+
.endr
ldi xREDUCER, 0x1b /* load reducer */
.irp param, ST33, ST32, ST31, ST30, ST23, ST22, ST21, ST20, ST13, ST12, ST11, ST10, ST03, ST02, ST01, ST00
ld r0, -X
eor \param, r0
.endr
1:
dec CTR
brne 2f
set
2:
ldi r31, hi8(aes_invsbox)
/* substitute and invShift */
.irp param, ST00, ST10, ST20, ST30
mov r30, \param
lpm \param, Z
.endr
mov r30, ST31
lpm T0, Z
mov r30, ST21
lpm ST31, Z
mov r30, ST11
lpm ST21, Z
mov r30, ST01
lpm ST11, Z
mov ST01, T0
mov r30, ST32
lpm T0, Z
mov r30, ST22
lpm T1,Z
mov r30, ST12
lpm ST32, Z
mov r30, ST02
lpm ST22, Z
mov ST12, T0
mov ST02, T1
mov r30, ST03
lpm T0, Z
mov r30, ST13
lpm ST03, Z
mov r30, ST23
lpm ST13, Z
mov r30, ST33
lpm ST23, Z
mov ST33, T0
/* key addition */
.irp param, ST33, ST32, ST31, ST30, ST23, ST22, ST21, ST20, ST13, ST12, ST11, ST10, ST03, ST02, ST01, ST00
ld r0, -X
eor \param, r0
.endr
brtc 2f
exit:
pop r31
pop r30
st Z+, ST00
st Z+, ST01
st Z+, ST02
st Z+, ST03
st Z+, ST10
st Z+, ST11
st Z+, ST12
st Z+, ST13
st Z+, ST20
st Z+, ST21
st Z+, ST22
st Z+, ST23
st Z+, ST30
st Z+, ST31
st Z+, ST32
st Z+, ST33
pop r29
pop r28
pop_range 2, 17
ret
2:
/* inv column (row) mixing*/
/* invMixCol (Row) 1 */
/* preparing */
ldi r31, hi8(lut_gf256mul_0x09)
mov T0, ST03
eor T0, ST02 ; T0 = t
mov T1, ST00
eor T1, ST01 ; T1 = u
mov r30, T0
eor r30, T1
lpm T2, Z ; T2 = v'
ldi r31, hi8(lut_gf256mul_0x04)
mov r30, ST02
eor r30, ST00
lpm T3, Z
eor T3, T2; T3 = w
mov r30, ST03
eor r30, ST01
lpm P, Z ; T2 = v
eor T2, P
/* now the big move */
mov T4, ST00
eor T4, ST03
lsl T4
brcc 3f
eor T4, xREDUCER
3: eor T4, T2
eor ST03, T4
mov T4, ST02
eor T4, ST01
lsl T4
brcc 3f
eor T4, xREDUCER
3: eor T4, T2
eor ST01, T4
lsl T0
brcc 3f
eor T0, xREDUCER
3: eor T0, T3
eor ST02, T0
lsl T1
brcc 3f
eor T1, xREDUCER
3: eor T1, T3
eor ST00, T1
/* invMixCol (Row) 2 */
/* preparing */
ldi r31, hi8(lut_gf256mul_0x09)
mov T0, ST13
eor T0, ST12 ; T0 = t
mov T1, ST10
eor T1, ST11 ; T1 = u
mov r30, T0
eor r30, T1
lpm T2, Z ; T2 = v'
ldi r31, hi8(lut_gf256mul_0x04)
mov r30, ST12
eor r30, ST10
lpm T3, Z
eor T3, T2; T3 = w
mov r30, ST13
eor r30, ST11
lpm P, Z
eor T2, P ; T2 = v
/* now the big move */
mov T4, ST10
eor T4, ST13
lsl T4
brcc 3f
eor T4, xREDUCER
3: eor T4, T2
eor ST13, T4
mov T4, ST12
eor T4, ST11
lsl T4
brcc 3f
eor T4, xREDUCER
3: eor T4, T2
eor ST11, T4
lsl T0
brcc 3f
eor T0, xREDUCER
3: eor T0, T3
eor ST12, T0
lsl T1
brcc 3f
eor T1, xREDUCER
3: eor T1, T3
eor ST10, T1
/* invMixCol (Row) 2 */
/* preparing */
ldi r31, hi8(lut_gf256mul_0x09)
mov T0, ST23
eor T0, ST22 ; T0 = t
mov T1, ST20
eor T1, ST21 ; T1 = u
mov r30, T0
eor r30, T1
lpm T2, Z ; T2 = v'
ldi r31, hi8(lut_gf256mul_0x04)
mov r30, ST22
eor r30, ST20
lpm T3, Z
eor T3, T2; T3 = w
mov r30, ST23
eor r30, ST21
lpm P, Z
eor T2, P ; T2 = v
/* now the big move */
mov T4, ST20
eor T4, ST23
lsl T4
brcc 3f
eor T4, xREDUCER
3: eor T4, T2
eor ST23, T4
mov T4, ST22
eor T4, ST21
lsl T4
brcc 3f
eor T4, xREDUCER
3: eor T4, T2
eor ST21, T4
lsl T0
brcc 3f
eor T0, xREDUCER
3: eor T0, T3
eor ST22, T0
lsl T1
brcc 3f
eor T1, xREDUCER
3: eor T1, T3
eor ST20, T1
/* invMixCol (Row) 3 */
/* preparing */
ldi r31, hi8(lut_gf256mul_0x09)
mov T0, ST33
eor T0, ST32 ; T0 = t
mov T1, ST30
eor T1, ST31 ; T1 = u
mov r30, T0
eor r30, T1
lpm T2, Z ; T2 = v'
ldi r31, hi8(lut_gf256mul_0x04)
mov r30, ST32
eor r30, ST30
lpm T3, Z
eor T3, T2; T3 = w
mov r30, ST33
eor r30, ST31
lpm P, Z
eor T2, P ; T2 = v
/* now the big move */
mov T4, ST30
eor T4, ST33
lsl T4
brcc 3f
eor T4, xREDUCER
3: eor T4, T2
eor ST33, T4
mov T4, ST32
eor T4, ST31
lsl T4
brcc 3f
eor T4, xREDUCER
3: eor T4, T2
eor ST31, T4
lsl T0
brcc 3f
eor T0, xREDUCER
3: eor T0, T3
eor ST32, T0
lsl T1
brcc 3f
eor T1, xREDUCER
3: eor T1, T3
eor ST30, T1
rjmp 1b
.balign 256
lut_gf256mul_0x09:
.byte 0x00, 0x09, 0x12, 0x1B, 0x24, 0x2D, 0x36, 0x3F
.byte 0x48, 0x41, 0x5A, 0x53, 0x6C, 0x65, 0x7E, 0x77
.byte 0x90, 0x99, 0x82, 0x8B, 0xB4, 0xBD, 0xA6, 0xAF
.byte 0xD8, 0xD1, 0xCA, 0xC3, 0xFC, 0xF5, 0xEE, 0xE7
.byte 0x3B, 0x32, 0x29, 0x20, 0x1F, 0x16, 0x0D, 0x04
.byte 0x73, 0x7A, 0x61, 0x68, 0x57, 0x5E, 0x45, 0x4C
.byte 0xAB, 0xA2, 0xB9, 0xB0, 0x8F, 0x86, 0x9D, 0x94
.byte 0xE3, 0xEA, 0xF1, 0xF8, 0xC7, 0xCE, 0xD5, 0xDC
.byte 0x76, 0x7F, 0x64, 0x6D, 0x52, 0x5B, 0x40, 0x49
.byte 0x3E, 0x37, 0x2C, 0x25, 0x1A, 0x13, 0x08, 0x01
.byte 0xE6, 0xEF, 0xF4, 0xFD, 0xC2, 0xCB, 0xD0, 0xD9
.byte 0xAE, 0xA7, 0xBC, 0xB5, 0x8A, 0x83, 0x98, 0x91
.byte 0x4D, 0x44, 0x5F, 0x56, 0x69, 0x60, 0x7B, 0x72
.byte 0x05, 0x0C, 0x17, 0x1E, 0x21, 0x28, 0x33, 0x3A
.byte 0xDD, 0xD4, 0xCF, 0xC6, 0xF9, 0xF0, 0xEB, 0xE2
.byte 0x95, 0x9C, 0x87, 0x8E, 0xB1, 0xB8, 0xA3, 0xAA
.byte 0xEC, 0xE5, 0xFE, 0xF7, 0xC8, 0xC1, 0xDA, 0xD3
.byte 0xA4, 0xAD, 0xB6, 0xBF, 0x80, 0x89, 0x92, 0x9B
.byte 0x7C, 0x75, 0x6E, 0x67, 0x58, 0x51, 0x4A, 0x43
.byte 0x34, 0x3D, 0x26, 0x2F, 0x10, 0x19, 0x02, 0x0B
.byte 0xD7, 0xDE, 0xC5, 0xCC, 0xF3, 0xFA, 0xE1, 0xE8
.byte 0x9F, 0x96, 0x8D, 0x84, 0xBB, 0xB2, 0xA9, 0xA0
.byte 0x47, 0x4E, 0x55, 0x5C, 0x63, 0x6A, 0x71, 0x78
.byte 0x0F, 0x06, 0x1D, 0x14, 0x2B, 0x22, 0x39, 0x30
.byte 0x9A, 0x93, 0x88, 0x81, 0xBE, 0xB7, 0xAC, 0xA5
.byte 0xD2, 0xDB, 0xC0, 0xC9, 0xF6, 0xFF, 0xE4, 0xED
.byte 0x0A, 0x03, 0x18, 0x11, 0x2E, 0x27, 0x3C, 0x35
.byte 0x42, 0x4B, 0x50, 0x59, 0x66, 0x6F, 0x74, 0x7D
.byte 0xA1, 0xA8, 0xB3, 0xBA, 0x85, 0x8C, 0x97, 0x9E
.byte 0xE9, 0xE0, 0xFB, 0xF2, 0xCD, 0xC4, 0xDF, 0xD6
.byte 0x31, 0x38, 0x23, 0x2A, 0x15, 0x1C, 0x07, 0x0E
.byte 0x79, 0x70, 0x6B, 0x62, 0x5D, 0x54, 0x4F, 0x46
lut_gf256mul_0x04:
.byte 0x00, 0x04, 0x08, 0x0C, 0x10, 0x14, 0x18, 0x1C
.byte 0x20, 0x24, 0x28, 0x2C, 0x30, 0x34, 0x38, 0x3C
.byte 0x40, 0x44, 0x48, 0x4C, 0x50, 0x54, 0x58, 0x5C
.byte 0x60, 0x64, 0x68, 0x6C, 0x70, 0x74, 0x78, 0x7C
.byte 0x80, 0x84, 0x88, 0x8C, 0x90, 0x94, 0x98, 0x9C
.byte 0xA0, 0xA4, 0xA8, 0xAC, 0xB0, 0xB4, 0xB8, 0xBC
.byte 0xC0, 0xC4, 0xC8, 0xCC, 0xD0, 0xD4, 0xD8, 0xDC
.byte 0xE0, 0xE4, 0xE8, 0xEC, 0xF0, 0xF4, 0xF8, 0xFC
.byte 0x1B, 0x1F, 0x13, 0x17, 0x0B, 0x0F, 0x03, 0x07
.byte 0x3B, 0x3F, 0x33, 0x37, 0x2B, 0x2F, 0x23, 0x27
.byte 0x5B, 0x5F, 0x53, 0x57, 0x4B, 0x4F, 0x43, 0x47
.byte 0x7B, 0x7F, 0x73, 0x77, 0x6B, 0x6F, 0x63, 0x67
.byte 0x9B, 0x9F, 0x93, 0x97, 0x8B, 0x8F, 0x83, 0x87
.byte 0xBB, 0xBF, 0xB3, 0xB7, 0xAB, 0xAF, 0xA3, 0xA7
.byte 0xDB, 0xDF, 0xD3, 0xD7, 0xCB, 0xCF, 0xC3, 0xC7
.byte 0xFB, 0xFF, 0xF3, 0xF7, 0xEB, 0xEF, 0xE3, 0xE7
.byte 0x36, 0x32, 0x3E, 0x3A, 0x26, 0x22, 0x2E, 0x2A
.byte 0x16, 0x12, 0x1E, 0x1A, 0x06, 0x02, 0x0E, 0x0A
.byte 0x76, 0x72, 0x7E, 0x7A, 0x66, 0x62, 0x6E, 0x6A
.byte 0x56, 0x52, 0x5E, 0x5A, 0x46, 0x42, 0x4E, 0x4A
.byte 0xB6, 0xB2, 0xBE, 0xBA, 0xA6, 0xA2, 0xAE, 0xAA
.byte 0x96, 0x92, 0x9E, 0x9A, 0x86, 0x82, 0x8E, 0x8A
.byte 0xF6, 0xF2, 0xFE, 0xFA, 0xE6, 0xE2, 0xEE, 0xEA
.byte 0xD6, 0xD2, 0xDE, 0xDA, 0xC6, 0xC2, 0xCE, 0xCA
.byte 0x2D, 0x29, 0x25, 0x21, 0x3D, 0x39, 0x35, 0x31
.byte 0x0D, 0x09, 0x05, 0x01, 0x1D, 0x19, 0x15, 0x11
.byte 0x6D, 0x69, 0x65, 0x61, 0x7D, 0x79, 0x75, 0x71
.byte 0x4D, 0x49, 0x45, 0x41, 0x5D, 0x59, 0x55, 0x51
.byte 0xAD, 0xA9, 0xA5, 0xA1, 0xBD, 0xB9, 0xB5, 0xB1
.byte 0x8D, 0x89, 0x85, 0x81, 0x9D, 0x99, 0x95, 0x91
.byte 0xED, 0xE9, 0xE5, 0xE1, 0xFD, 0xF9, 0xF5, 0xF1
.byte 0xCD, 0xC9, 0xC5, 0xC1, 0xDD, 0xD9, 0xD5, 0xD1