Commit f8e6561a authored by bg nerilex's avatar bg nerilex

more optimization for pi16cipher

parent 99398ae8
......@@ -32,15 +32,6 @@ ctx_ctr:
ctx_end:
ctx_size:
.data
mu16_const:
.word 0xF0E8, 0xE4E2, 0xE1D8, 0xD4D2
ny16_const:
.word 0xD1CC, 0xCAC9, 0xC6C5, 0xC3B8
.text
.macro addi16 p1:req, p2:req
......@@ -158,9 +149,7 @@ ny16_const1 = 0xCAC9
ny16_const2 = 0xC6C5
ny16_const3 = 0xC3B8
new_ast16:
ast16:
; loading
.macro ast16_prolog
push_range 8, 10
.if 0
clr r31
......@@ -175,8 +164,9 @@ ast16:
movw r26, r20
2: dec r18
brne 1b
movw r30, r24
.else
.elseif 1
movw r26, r22
.irp v, x0l, x0h, x1l, x1h, x2l, x2h, x3l, x3h
ld \v, X+
......@@ -185,9 +175,24 @@ ast16:
.irp v, y0l, y0h, y1l, y1h, y2l, y2h, y3l, y3h
ld \v, X+
.endr
movw r30, r24
.endif
.endm
.macro ast16_epilog
.if 1
.irp v, x0l, x0h, x1l, x1h, x2l, x2h, x3l, x3h
st Z+, \v
.endr
sbiw r30, 8
.endif
movw r30, r24
pop_range 8, 10
.endm
new_ast16:
ast16:
; loading
; ast16_prolog
; mu
movw t1, x0
......@@ -253,11 +258,7 @@ ast16:
add16 x2, y2
add16 x3, y3
; epilog
.irp v, x0l, x0h, x1l, x1h, x2l, x2h, x3l, x3h
st Z+, \v
.endr
sbiw r30, 8
pop_range 8, 10
; ast16_epilog
ret
/******************************************************************************/
......@@ -281,41 +282,33 @@ void e1_16(
}
*/
.global e1_16
;.global e1_16
e1_16:
push_ 6, 7, 16
movw r8, r20
.macro e1_16
push_ 28, 29
movw r30, r22
stack_alloc 8, reg1=r26, reg2=r27
adiw r26, 1
movw r22, r26
ldi r18, 8
1:
lpm r0, Z+
st X+, r0
dec r18
brne 1b
/* --- */
ldi r18, 3
mov r10, r18
rcall ast16
movw r28, r20
.irp v, x0l, x0h, x1l, x1h, x2l, x2h, x3l, x3h
lpm \v, Z+
.endr
ldi r16, 4
movw r30, r24
1:
movw r22, r30
adiw r30, 8
movw r24, r30
movw r26, r8
adiw r26, 8
movw r20, r26
movw r8, r26
push r16
.irp v, y0l, y0h, y1l, y1h, y2l, y2h, y3l, y3h
ld \v, Y+
.endr
rcall ast16
dec r10
.irp v, x0l, x0h, x1l, x1h, x2l, x2h, x3l, x3h
st Z+, \v
.endr
pop r16
dec r16
brne 1b
sbiw r30, 3 * 4 * 2
/* --- */
stack_free 8, reg1=r26, reg2=r27
pop_ 16, 7, 6
ret
sbiw r30, 4 * 4 * 2
pop_ 29, 28
.endm
; ret
/******************************************************************************/
/*
......@@ -336,45 +329,40 @@ void e2_16(
}
*/
.global e2_16
;.global e2_16
e2_16:
push_ 6, 7, 16
.macro e2_16
push_ 28, 29
movw r30, r22
movw r26, r20
adiw r26, 24
movw r8, r26
movw r22, r26
stack_alloc 8, reg1 = r26, reg2 = r27
adiw r26, 1
movw r20, r26
ldi r18, 8
1:
lpm r0, Z+
st X+, r0
dec r18
brne 1b
/* --- */
ldi r18, 3
mov r10, r18
adiw r24, 24
rcall ast16
movw r28, r20
movw r2, r24
.irp v, y0l, y0h, y1l, y1h, y2l, y2h, y3l, y3h
lpm \v, Z+
.endr
ldi r26, 4
movw r30, r2
adiw r30, 4 * 4 * 2
adiw r28, 4 * 4 * 2
1:
movw r20, r30
sbiw r30, 8
movw r24, r30
movw r26, r8
sbiw r26, 8
movw r22, r26
movw r8, r26
push r26
.irp v, x3h, x3l, x2h, x2l, x1h, x1l, x0h, x0l
ld \v, -Y
.endr
rcall ast16
dec r10
movw y0, x0
movw y1, x1
movw y2, x2
movw y3, x3
.irp v, x3h, x3l, x2h, x2l, x1h, x1l, x0h, x0l
st -Z, \v
.endr
pop r26
dec r26
brne 1b
/* --- */
stack_free 8, reg1 = r26, reg2 = r27
pop_ 16, 7, 6
ret
pop_ 29, 28
.endm
; ret
/******************************************************************************/
/*
......@@ -477,15 +465,10 @@ ctr_trans:
.global pi
pi:
push r6
push r7
push r16
push r28
push r29
push_range 2, 7
push_range 8, 10
push_range 11, 17
push_range 2, 17
stack_alloc 32, reg1 = r28, reg2 = r29
adiw r28, 1
......@@ -494,34 +477,40 @@ pi:
ldi r29, hi8(PI_CONST - 8)
ldi r16, 3
movw r30, r24
1:
Lpi_loop:
push r16
movw r24, r6
movw r6, r30
movw r20, r30
adiw r28, 8
movw r22, r28
rcall e1_16
push_ 6, 7
e1_16
pop_ 7, 6
movw r24, r6
movw r6, r30
movw r20, r30
adiw r28, 8
movw r22, r28
rcall e2_16
push_ 6, 7
e2_16
pop_ 7, 6
pop r16
dec r16
brne 1b
breq 1f
rjmp Lpi_loop
/* --- */
1:
stack_free 32, reg1 = r26, reg2 = r27
pop_range 11, 17
pop_range 8, 10
pop_range 2, 7
pop_range 2, 17
pop r29
pop r28
pop r16
pop r7
pop r6
ret
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment