Skip to content

Commit 0d54839

Browse files
committed
Benchmark and optimize AddRoundKey
1 parent add0b41 commit 0d54839

File tree

2 files changed

+62
-1
lines changed

2 files changed

+62
-1
lines changed

Classes/FCryptoAES.uc

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ class FCryptoAES extends Object
3030
notplaceable
3131
abstract;
3232

33+
`include(FCrypto\Classes\FCryptoMacros.uci);
3334
`include(FCrypto\Classes\FCryptoAESMacros.uci);
3435

3536
var const array<byte> RCon;
@@ -551,8 +552,39 @@ static final function AddRoundKey(
551552
// Q[I] = Q[I] ^ SK[I];
552553
// }
553554

554-
// TODO: need to benchmark whether a temp var here is better.
555+
local int Offset_1;
556+
local int Offset_2;
557+
local int Offset_3;
558+
local int Offset_4;
559+
local int Offset_5;
560+
local int Offset_6;
561+
local int Offset_7;
562+
563+
Offset_1 = Offset + 1;
564+
Offset_2 = Offset + 2;
565+
Offset_3 = Offset + 3;
566+
Offset_4 = Offset + 4;
567+
Offset_5 = Offset + 5;
568+
Offset_6 = Offset + 6;
569+
Offset_7 = Offset + 7;
570+
571+
Q[Offset ] = Q[Offset ] ^ SK[Offset ];
572+
Q[Offset_1] = Q[Offset_1] ^ SK[Offset_1];
573+
Q[Offset_2] = Q[Offset_2] ^ SK[Offset_2];
574+
Q[Offset_3] = Q[Offset_3] ^ SK[Offset_3];
575+
Q[Offset_4] = Q[Offset_4] ^ SK[Offset_4];
576+
Q[Offset_5] = Q[Offset_5] ^ SK[Offset_5];
577+
Q[Offset_6] = Q[Offset_6] ^ SK[Offset_6];
578+
Q[Offset_7] = Q[Offset_7] ^ SK[Offset_7];
579+
}
555580

581+
`if(`isdefined(FCBENCHMARK))
582+
static final function AddRoundKey_NoTempVars(
583+
out array<int> Q,
584+
const out array<int> SK,
585+
optional int Offset = 0
586+
)
587+
{
556588
Q[Offset ] = Q[Offset ] ^ SK[Offset ];
557589
Q[Offset + 1] = Q[Offset + 1] ^ SK[Offset + 1];
558590
Q[Offset + 2] = Q[Offset + 2] ^ SK[Offset + 2];
@@ -562,6 +594,7 @@ static final function AddRoundKey(
562594
Q[Offset + 6] = Q[Offset + 6] ^ SK[Offset + 6];
563595
Q[Offset + 7] = Q[Offset + 7] ^ SK[Offset + 7];
564596
}
597+
`endif
565598

566599
// TODO: can be made a macro for performance?
567600
static final function InvShiftRows(out array<int> Q)

Classes/FCryptoTestMutator.uc

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1401,9 +1401,11 @@ private final simulated function int TestSpeed()
14011401
local FCQWORD QW9;
14021402
local bool bQWCarry;
14031403
local float QWClock;
1404+
local float Q;
14041405
local int QWIdx;
14051406
local int BenchmarkRound;
14061407
local array<int> X;
1408+
local array<int> Y;
14071409

14081410
// TODO: Design for FCQWORD arithmetic.
14091411
Dummy = 0xFFFFFFFF;
@@ -1568,6 +1570,32 @@ private final simulated function int TestSpeed()
15681570
UnClock(QWClock);
15691571
`fclog("QWClock (decode2)=" $ QWClock);
15701572

1573+
Q = 0;
1574+
X.Length = 0;
1575+
X.Length = 1024;
1576+
Y.Length = 0;
1577+
Y.Length = 1024;
1578+
Clock(Q);
1579+
for (BenchmarkRound = 0; BenchmarkRound < 512; ++BenchmarkRound)
1580+
{
1581+
class'FCryptoAES'.static.AddRoundKey(X, Y);
1582+
}
1583+
UnClock(Q);
1584+
`fclog("Qclock (AddRoundKey (TempVars) )=" $ Q);
1585+
1586+
Q = 0;
1587+
X.Length = 0;
1588+
X.Length = 1024;
1589+
Y.Length = 0;
1590+
Y.Length = 1024;
1591+
Clock(Q);
1592+
for (BenchmarkRound = 0; BenchmarkRound < 512; ++BenchmarkRound)
1593+
{
1594+
class'FCryptoAES'.static.AddRoundKey_NoTempVars(X, Y);
1595+
}
1596+
UnClock(Q);
1597+
`fclog("Qclock (AddRoundKey (no temp vars))=" $ Q);
1598+
15711599
return 0;
15721600
}
15731601

0 commit comments

Comments
 (0)