642 lines
11 KiB
ArmAsm
642 lines
11 KiB
ArmAsm
|
/* noekeon_asm.S */
|
||
|
/*
|
||
|
This file is part of the Crypto-avr-lib/microcrypt-lib.
|
||
|
Copyright (C) 2008 Daniel Otte (daniel.otte@rub.de)
|
||
|
|
||
|
This program is free software: you can redistribute it and/or modify
|
||
|
it under the terms of the GNU General Public License as published by
|
||
|
the Free Software Foundation, either version 3 of the License, or
|
||
|
(at your option) any later version.
|
||
|
|
||
|
This program is distributed in the hope that it will be useful,
|
||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
|
GNU General Public License for more details.
|
||
|
|
||
|
You should have received a copy of the GNU General Public License
|
||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||
|
*/
|
||
|
/*
|
||
|
* noekeon assembler implementation for avr
|
||
|
* author: Daniel Otte
|
||
|
* email: daniel.otte@rub.de
|
||
|
* license: GPLv3
|
||
|
*/
|
||
|
|
||
|
#include <avr/io.h>
|
||
|
|
||
|
.macro push_all
|
||
|
push r2
|
||
|
push r3
|
||
|
push r4
|
||
|
push r5
|
||
|
push r6
|
||
|
push r7
|
||
|
push r8
|
||
|
push r9
|
||
|
push r10
|
||
|
push r11
|
||
|
push r12
|
||
|
push r13
|
||
|
push r14
|
||
|
push r15
|
||
|
push r16
|
||
|
push r17
|
||
|
push r28
|
||
|
push r29
|
||
|
.endm
|
||
|
|
||
|
.macro pop_all
|
||
|
pop r29
|
||
|
pop r28
|
||
|
pop r17
|
||
|
pop r16
|
||
|
pop r15
|
||
|
pop r14
|
||
|
pop r13
|
||
|
pop r12
|
||
|
pop r11
|
||
|
pop r10
|
||
|
pop r9
|
||
|
pop r8
|
||
|
pop r7
|
||
|
pop r6
|
||
|
pop r5
|
||
|
pop r4
|
||
|
pop r3
|
||
|
pop r2
|
||
|
clr r1
|
||
|
.endm
|
||
|
|
||
|
push_all_func:
|
||
|
pop r31
|
||
|
pop r30
|
||
|
push_all
|
||
|
ijmp
|
||
|
|
||
|
pop_all_func:
|
||
|
pop r31
|
||
|
pop r30
|
||
|
pop_all
|
||
|
ijmp
|
||
|
|
||
|
.macro xchg a b
|
||
|
eor \a, \b
|
||
|
eor \b, \a
|
||
|
eor \a, \b
|
||
|
.endm
|
||
|
|
||
|
.macro op32 op a b
|
||
|
\op \a\()_0, \b\()_0
|
||
|
\op \a\()_1, \b\()_1
|
||
|
\op \a\()_2, \b\()_2
|
||
|
\op \a\()_3, \b\()_3
|
||
|
.endm
|
||
|
|
||
|
|
||
|
.macro op32_4t op a b c d w x y z
|
||
|
\op \a, \w
|
||
|
\op \b, \x
|
||
|
\op \c, \y
|
||
|
\op \d, \z
|
||
|
.endm
|
||
|
|
||
|
|
||
|
.macro op32_prefix op p q a b c d w x y z
|
||
|
\op \p\()\a, \q\()\w
|
||
|
\op \p\()\b, \q\()\x
|
||
|
\op \p\()\c, \q\()\y
|
||
|
\op \p\()\d, \q\()\z
|
||
|
.endm
|
||
|
|
||
|
; === bigendian_rotl32 ===
|
||
|
; this function rotates a 32bit bigendian word n bits to the left
|
||
|
; param1: the 32-bit value
|
||
|
; given in r25,r24,r23,r22 (r22 is most significant)
|
||
|
; param2: the 8-bit parameter giving the number of bits to rotate
|
||
|
; given in r20
|
||
|
; return: the rotatet 32-bit word
|
||
|
; given in r25,r24,r23,r22
|
||
|
|
||
|
bigendian_rotl32:
|
||
|
/* copy high bit of r22 to carry */
|
||
|
mov r1, r22
|
||
|
2:
|
||
|
rol r1
|
||
|
|
||
|
rol r25
|
||
|
rol r24
|
||
|
rol r23
|
||
|
rol r22
|
||
|
|
||
|
dec r20
|
||
|
brne 2b
|
||
|
bigendian_rotl32_exit:
|
||
|
clr r1
|
||
|
ret
|
||
|
|
||
|
|
||
|
/******************************************************************************/
|
||
|
|
||
|
; === bigendian_rotl32 ===
|
||
|
; this function rotates a 32bit bigendian word n bits to the right
|
||
|
; param1: the 32-bit value
|
||
|
; given in r25,r24,r23,r22 (r22 is most significant)
|
||
|
; param2: the 8-bit parameter giving the number of bits to rotate
|
||
|
; given in r20
|
||
|
; return: the rotatet 32-bit word
|
||
|
; given in r25,r24,r23,r22
|
||
|
|
||
|
bigendian_rotr32:
|
||
|
/* copy high bit of r25 to carry */
|
||
|
|
||
|
mov r1, r25
|
||
|
2:
|
||
|
ror r1
|
||
|
|
||
|
ror r22
|
||
|
ror r23
|
||
|
ror r24
|
||
|
ror r25
|
||
|
dec r20
|
||
|
brne 2b
|
||
|
bigendian_rotr32_exit:
|
||
|
clr r1
|
||
|
ret
|
||
|
|
||
|
/******************************************************************************/
|
||
|
/*
|
||
|
void theta(uint32_t* k, uint32_t* a){
|
||
|
uint32_t temp;
|
||
|
temp = a[0] ^ a[2]; temp ^= ROTR32(temp, 8) ^ ROTL32(temp, 8);
|
||
|
a[1] ^= temp;
|
||
|
a[3] ^= temp;
|
||
|
|
||
|
a[0] ^= k[0];
|
||
|
a[1] ^= k[1];
|
||
|
a[2] ^= k[2];
|
||
|
a[3] ^= k[3];
|
||
|
|
||
|
temp = a[1] ^ a[3]; temp ^= ROTR32(temp, 8) ^ ROTL32(temp, 8);
|
||
|
a[0] ^= temp;
|
||
|
a[2] ^= temp;
|
||
|
}
|
||
|
*/
|
||
|
|
||
|
round_const: .byte 0x1B, 0x36, 0x6C, 0xD8, 0xAB, 0x4D, 0x9A, \
|
||
|
0x2F, 0x5E, 0xBC, 0x63, 0xC6, 0x97, 0x35, 0x6A, \
|
||
|
0xD4
|
||
|
|
||
|
;-- a[0]
|
||
|
state0_0 = 2
|
||
|
state0_1 = 3
|
||
|
state0_2 = 4
|
||
|
state0_3 = 5
|
||
|
;-- a[1]
|
||
|
state1_0 = 6
|
||
|
state1_1 = 7
|
||
|
state1_2 = 8
|
||
|
state1_3 = 9
|
||
|
;-- a[2]
|
||
|
state2_0 = 10
|
||
|
state2_1 = 11
|
||
|
state2_2 = 12
|
||
|
state2_3 = 13
|
||
|
;-- a[3]
|
||
|
state3_0 = 14
|
||
|
state3_1 = 15
|
||
|
state3_2 = 16
|
||
|
state3_3 = 17
|
||
|
|
||
|
; === theta ===
|
||
|
;
|
||
|
; param1: the state in r2-r17
|
||
|
; param2: pointer to k in X (r26,r27)
|
||
|
;
|
||
|
temp_a = 18
|
||
|
temp_b = 19
|
||
|
temp_c = 20
|
||
|
temp_d = 21
|
||
|
|
||
|
theta:
|
||
|
/* temp = a[0] ^ a[2]; temp ^= temp>>>8 ^ temp<<<8 */
|
||
|
op32_prefix mov, temp_, state0_, a,b,c,d, 0,1,2,3
|
||
|
op32_prefix eor, temp_, state2_, a,b,c,d, 0,1,2,3
|
||
|
|
||
|
mov r1, temp_a
|
||
|
eor r1, temp_b
|
||
|
eor r1, temp_c
|
||
|
eor r1, temp_d
|
||
|
|
||
|
op32_prefix eor, temp_, r, a,b,c,d, 1,1,1,1
|
||
|
|
||
|
/* temp is know a little bit mixed c,d,a,b (if abcd is normal order) */
|
||
|
/* a[1] ^= temp */
|
||
|
eor state1_0, temp_c
|
||
|
eor state1_1, temp_d
|
||
|
eor state1_2, temp_a
|
||
|
eor state1_3, temp_b
|
||
|
/* a[3] ^= temp */
|
||
|
eor state3_0, temp_c
|
||
|
eor state3_1, temp_d
|
||
|
eor state3_2, temp_a
|
||
|
eor state3_3, temp_b
|
||
|
|
||
|
/* state ^ k (X points to K) */
|
||
|
ldi r28, 2
|
||
|
clr r29 /* Y points to r2 aka state0_0 */
|
||
|
ldi temp_a, 16
|
||
|
1:
|
||
|
ld r1, X+
|
||
|
ld r0, Y
|
||
|
eor r1, r0
|
||
|
st Y+, r1
|
||
|
dec temp_a
|
||
|
brne 1b
|
||
|
sbiw r26, 16 /* set X back to key */
|
||
|
|
||
|
mov temp_a, state1_0
|
||
|
mov temp_b, state1_1
|
||
|
mov temp_c, state1_2
|
||
|
mov temp_d, state1_3
|
||
|
eor temp_a, state3_0
|
||
|
eor temp_b, state3_1
|
||
|
eor temp_c, state3_2
|
||
|
eor temp_d, state3_3
|
||
|
mov r1, temp_a
|
||
|
eor r1, temp_b
|
||
|
eor r1, temp_c
|
||
|
eor r1, temp_d
|
||
|
eor temp_a, r1
|
||
|
eor temp_b, r1
|
||
|
eor temp_c, r1
|
||
|
eor temp_d, r1
|
||
|
/* temp is know a little bit mixed c,d,a,b (if abcd is normal order) */
|
||
|
/* a[0] ^= temp */
|
||
|
eor state0_0, temp_c
|
||
|
eor state0_1, temp_d
|
||
|
eor state0_2, temp_a
|
||
|
eor state0_3, temp_b
|
||
|
/* a[2] ^= temp */
|
||
|
eor state2_0, temp_c
|
||
|
eor state2_1, temp_d
|
||
|
eor state2_2, temp_a
|
||
|
eor state2_3, temp_b
|
||
|
|
||
|
clr r1
|
||
|
ret
|
||
|
|
||
|
/******************************************************************************/
|
||
|
#ifndef NOEKEON_NO_ENC
|
||
|
; === noekeon_enc ===
|
||
|
;
|
||
|
; param1: pointer to buffer (r24,r25)
|
||
|
; param2: pointer to k (r22,r23)
|
||
|
;
|
||
|
.global noekeon_enc
|
||
|
noekeon_enc:
|
||
|
rcall push_all_func
|
||
|
/* load state */
|
||
|
movw r26, r22
|
||
|
ldi r28, 2
|
||
|
clr r29 /* Y points at r2 aka state0_0 */
|
||
|
movw r30, r24 /* Z points at state */
|
||
|
push r30
|
||
|
push r31
|
||
|
ldi r22, 16
|
||
|
push r22 /* 16 is also the number of rounds and gets pushed here */
|
||
|
1:
|
||
|
ld r0, Z+
|
||
|
st Y+, r0
|
||
|
dec r22
|
||
|
brne 1b
|
||
|
/* state loaded */
|
||
|
push r1 /* push round constan2 (0x00) */
|
||
|
ldi r20, 0x80
|
||
|
push r20 /* push round constan2 (0x00) */
|
||
|
rjmp 3f
|
||
|
2:
|
||
|
ldi r30, lo8(round_const+15)
|
||
|
ldi r31, hi8(round_const+15)
|
||
|
sub r30, r22
|
||
|
sbci r31, 0
|
||
|
clr r1
|
||
|
push r1
|
||
|
lpm r0, Z
|
||
|
push r0
|
||
|
3:
|
||
|
rcall round /* pops rc2 & rc1 */
|
||
|
pop r22
|
||
|
dec r22
|
||
|
push r22
|
||
|
brne 2b
|
||
|
|
||
|
pop r22
|
||
|
|
||
|
ldi r22, 0xD4
|
||
|
eor state0_3, r22
|
||
|
rcall theta
|
||
|
|
||
|
pop r31
|
||
|
pop r30
|
||
|
clr r29
|
||
|
ldi r28, 2
|
||
|
ldi r22, 16
|
||
|
1:
|
||
|
ld r0, Y+
|
||
|
st Z+, r0
|
||
|
dec r22
|
||
|
brne 1b
|
||
|
|
||
|
rcall pop_all_func
|
||
|
ret
|
||
|
#endif
|
||
|
/******************************************************************************/
|
||
|
/******************************************************************************/
|
||
|
#ifndef NOEKEON_NO_DEC
|
||
|
|
||
|
; === noekeon_dec ===
|
||
|
;
|
||
|
; param1: pointer to buffer/state (r24,r25)
|
||
|
; param2: pointer to k (r22,r23)
|
||
|
;
|
||
|
.global noekeon_dec
|
||
|
noekeon_dec:
|
||
|
rcall push_all_func
|
||
|
/* allocate 16 bytes on the stack */
|
||
|
in r30, _SFR_IO_ADDR(SPL)
|
||
|
in r31, _SFR_IO_ADDR(SPH)
|
||
|
sbiw r30, 16
|
||
|
out _SFR_IO_ADDR(SPH), r31
|
||
|
out _SFR_IO_ADDR(SPL), r30
|
||
|
|
||
|
adiw r30, 1
|
||
|
/* push state pointer */
|
||
|
push r24
|
||
|
push r25
|
||
|
movw r26, r22 /* move key ptr to X */
|
||
|
|
||
|
/* set stackkey to zero */
|
||
|
ldi r22, 16
|
||
|
1: st Z+, r1
|
||
|
dec r22
|
||
|
brne 1b
|
||
|
|
||
|
/* copy key to state */
|
||
|
clr r29
|
||
|
ldi r28, 2
|
||
|
ldi r22, 16
|
||
|
1: ld r0, X+
|
||
|
st Y+, r0
|
||
|
dec r22
|
||
|
brne 1b
|
||
|
|
||
|
movw r26, r30
|
||
|
sbiw r26, 16 /* set X back to begining of stack key */
|
||
|
rcall theta
|
||
|
|
||
|
/* mov state to stackkey */
|
||
|
clr r29
|
||
|
ldi r28, 2
|
||
|
ldi r22, 16
|
||
|
1: ld r0, Y+
|
||
|
st X+, r0
|
||
|
dec r22
|
||
|
brne 1b
|
||
|
sbiw r26, 16 /* set X back to begining of stack key */
|
||
|
|
||
|
/* move data from stateptr to state */
|
||
|
pop r31
|
||
|
pop r30
|
||
|
push r30
|
||
|
push r31
|
||
|
clr r29
|
||
|
ldi r28, 2
|
||
|
ldi r22, 16
|
||
|
push r22
|
||
|
1: ld r0, Z+
|
||
|
st Y+, r0
|
||
|
dec r22
|
||
|
brne 1b
|
||
|
|
||
|
;--- snip 8< ----
|
||
|
|
||
|
ldi r20, 0xD4
|
||
|
push r20 /* push round constant2 (0xD4) */
|
||
|
push r22 /* push round constan1 (0x00) */
|
||
|
rjmp 3f
|
||
|
2:
|
||
|
ldi r30, lo8(round_const-1)
|
||
|
ldi r31, hi8(round_const-1)
|
||
|
clr r1
|
||
|
add r30, r22
|
||
|
adc r31, r1
|
||
|
lpm r0, Z
|
||
|
push r0
|
||
|
push r1
|
||
|
3:
|
||
|
rcall round /* pops rc2 & rc1 */
|
||
|
pop r22
|
||
|
dec r22
|
||
|
push r22
|
||
|
brne 2b
|
||
|
;----
|
||
|
pop r22
|
||
|
|
||
|
rcall theta
|
||
|
ldi r22, 0x80
|
||
|
eor state0_3, r22
|
||
|
|
||
|
write_state_back:
|
||
|
/* write state back */
|
||
|
pop r31 /* pop state pointer */
|
||
|
pop r30
|
||
|
clr r29
|
||
|
ldi r28, 2
|
||
|
ldi r22, 16
|
||
|
1:
|
||
|
ld r0, Y+
|
||
|
st Z+, r0
|
||
|
dec r22
|
||
|
brne 1b
|
||
|
|
||
|
/* remove key from stack */
|
||
|
in r30, _SFR_IO_ADDR(SPL)
|
||
|
in r31, _SFR_IO_ADDR(SPH)
|
||
|
adiw r30, 16
|
||
|
out _SFR_IO_ADDR(SPH), r31
|
||
|
out _SFR_IO_ADDR(SPL), r30
|
||
|
rcall pop_all_func
|
||
|
ret
|
||
|
#endif
|
||
|
/******************************************************************************/
|
||
|
|
||
|
|
||
|
round:
|
||
|
pop r24
|
||
|
pop r25
|
||
|
pop r1
|
||
|
eor state0_3, r1
|
||
|
rcall theta
|
||
|
pop r1
|
||
|
eor state0_3, r1
|
||
|
push r25
|
||
|
push r24
|
||
|
pi_gamma_pi:
|
||
|
ldi r30, pm_lo8(bigendian_rotl32)
|
||
|
ldi r31, pm_hi8(bigendian_rotl32)
|
||
|
rcall pi
|
||
|
/* pi1 done; now gamma */
|
||
|
rcall gamma_1
|
||
|
/* a[0] <-> a[3] */
|
||
|
xchg state0_0, state3_0
|
||
|
xchg state0_1, state3_1
|
||
|
xchg state0_2, state3_2
|
||
|
xchg state0_3, state3_3
|
||
|
/* a[2] ^= a[0] ^ a[1] ^ a[3] */
|
||
|
op32 eor, state2, state0
|
||
|
op32 eor, state2, state1
|
||
|
op32 eor, state2, state3
|
||
|
|
||
|
rcall gamma_1
|
||
|
ldi r30, pm_lo8(bigendian_rotr32)
|
||
|
ldi r31, pm_hi8(bigendian_rotr32)
|
||
|
rcall pi
|
||
|
ret
|
||
|
|
||
|
gamma_1:
|
||
|
/* a[1] ^= ~(a[3]|a[2])*/
|
||
|
mov r1, state3_0
|
||
|
or r1, state2_0
|
||
|
com r1
|
||
|
eor state1_0, r1
|
||
|
|
||
|
mov r1, state3_1
|
||
|
or r1, state2_1
|
||
|
com r1
|
||
|
eor state1_1, r1
|
||
|
|
||
|
mov r1, state3_2
|
||
|
or r1, state2_2
|
||
|
com r1
|
||
|
eor state1_2, r1
|
||
|
|
||
|
mov r1, state3_3
|
||
|
or r1, state2_3
|
||
|
com r1
|
||
|
eor state1_3, r1
|
||
|
|
||
|
/* a[0] ^= a[2]&a[1] */
|
||
|
mov r1, state2_0
|
||
|
and r1, state1_0
|
||
|
eor state0_0, r1
|
||
|
|
||
|
mov r1, state2_1
|
||
|
and r1, state1_1
|
||
|
eor state0_1, r1
|
||
|
|
||
|
mov r1, state2_2
|
||
|
and r1, state1_2
|
||
|
eor state0_2, r1
|
||
|
|
||
|
mov r1, state2_3
|
||
|
and r1, state1_3
|
||
|
eor state0_3, r1
|
||
|
ret
|
||
|
|
||
|
pi:
|
||
|
/* a[1] <<<= 1*/
|
||
|
mov r22, state1_0
|
||
|
mov r23, state1_1
|
||
|
mov r24, state1_2
|
||
|
mov r25, state1_3
|
||
|
ldi r20, 1
|
||
|
icall
|
||
|
mov state1_0, r22
|
||
|
mov state1_1, r23
|
||
|
mov state1_2, r24
|
||
|
mov state1_3, r25
|
||
|
/* a[2] <<<= 5*/
|
||
|
mov r22, state2_0
|
||
|
mov r23, state2_1
|
||
|
mov r24, state2_2
|
||
|
mov r25, state2_3
|
||
|
ldi r20, 5
|
||
|
icall
|
||
|
mov state2_0, r22
|
||
|
mov state2_1, r23
|
||
|
mov state2_2, r24
|
||
|
mov state2_3, r25
|
||
|
/* a[3] <<<= 2*/
|
||
|
mov r22, state3_0
|
||
|
mov r23, state3_1
|
||
|
mov r24, state3_2
|
||
|
mov r25, state3_3
|
||
|
ldi r20, 2
|
||
|
icall
|
||
|
mov state3_0, r22
|
||
|
mov state3_1, r23
|
||
|
mov state3_2, r24
|
||
|
mov state3_3, r25
|
||
|
ret
|
||
|
|
||
|
/******************************************************************************/
|
||
|
|
||
|
/*
|
||
|
void noekeon_init(void* key, noekeon_ctx_t* ctx){
|
||
|
uint8_t nullv[16];
|
||
|
|
||
|
memset(nullv, 0, 16);
|
||
|
memcpy(ctx, key, 16);
|
||
|
noekeon_enc(ctx, nullv);
|
||
|
}
|
||
|
*/
|
||
|
|
||
|
#ifndef NOEKEON_NO_INIT
|
||
|
|
||
|
.global noekeon_init
|
||
|
noekeon_init:
|
||
|
; === noekeon_init ===
|
||
|
;
|
||
|
; param1: pointer to key (r24,r25)
|
||
|
; param2: pointer to context (r22,r23)
|
||
|
;
|
||
|
in r30, _SFR_IO_ADDR(SPL)
|
||
|
in r31, _SFR_IO_ADDR(SPH)
|
||
|
sbiw r30, 16
|
||
|
out _SFR_IO_ADDR(SPH), r31
|
||
|
out _SFR_IO_ADDR(SPL), r30
|
||
|
|
||
|
movw r26, r22
|
||
|
adiw r30, 1
|
||
|
movw r22, r30
|
||
|
/* set nullv(stack) to zero */
|
||
|
ldi r20, 16
|
||
|
1: st Z+, r1
|
||
|
dec r20
|
||
|
brne 1b
|
||
|
|
||
|
/* copy key data to ctx */
|
||
|
movw r30, r24
|
||
|
ldi r20, 16
|
||
|
1: ld r1, Z+
|
||
|
st X+, r1
|
||
|
dec r20
|
||
|
brne 1b
|
||
|
clr r1
|
||
|
|
||
|
sbiw r26, 16
|
||
|
movw r24, r26
|
||
|
rcall noekeon_enc
|
||
|
|
||
|
in r30, _SFR_IO_ADDR(SPL)
|
||
|
in r31, _SFR_IO_ADDR(SPH)
|
||
|
adiw r30, 16
|
||
|
out _SFR_IO_ADDR(SPH), r31
|
||
|
out _SFR_IO_ADDR(SPL), r30
|
||
|
ret
|
||
|
|
||
|
#endif
|
||
|
|
||
|
|