Skip to content

Commit b781583

Browse files
committed
Implement aes_ct_dec.c in UScript
1 parent d07880d commit b781583

File tree

2 files changed

+257
-3
lines changed

2 files changed

+257
-3
lines changed

Classes/FCryptoAES.uc

Lines changed: 217 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -405,8 +405,7 @@ static final function int AesCtKeySched(
405405
Tmp = 0;
406406
for (I = 0; I < Nk; ++I)
407407
{
408-
// TODO: dedicated file for Enc/Dec functions?
409-
// Tmp = Dec32LE(Key, I << 2);
408+
Tmp = class'FCryptoEncDec'.static.Dec32LE(Key, I << 2);
410409
SKey[(I << 1) ] = Tmp;
411410
SKey[(I << 1) + 1] = Tmp;
412411
}
@@ -439,7 +438,7 @@ static final function int AesCtKeySched(
439438
J = 0;
440439
for (I = 0; I < Nkf; ++I)
441440
{
442-
CompSkey[I] = (SKey[J] & 0x55555555) | (SKey[J + 1] & 0xAAAAAAAA);
441+
CompSKey[I] = (SKey[J] & 0x55555555) | (SKey[J + 1] & 0xAAAAAAAA);
443442
J += 2;
444443
}
445444
return NumRounds;
@@ -471,6 +470,221 @@ static final function AesCtSKeyExpand(
471470
}
472471
}
473472

473+
static final function AesCtBitSliceInvSBox(out array<int> Q)
474+
{
475+
/*
476+
* AES S-box is:
477+
* S(x) = A(I(x)) ^ 0x63
478+
* where I() is inversion in GF(256), and A() is a linear
479+
* transform (0 is formally defined to be its own inverse).
480+
* Since inversion is an involution, the inverse S-box can be
481+
* computed from the S-box as:
482+
* iS(x) = B(S(B(x ^ 0x63)) ^ 0x63)
483+
* where B() is the inverse of A(). Indeed, for any y in GF(256):
484+
* iS(S(y)) = B(A(I(B(A(I(y)) ^ 0x63 ^ 0x63))) ^ 0x63 ^ 0x63) = y
485+
*
486+
* Note: we reuse the implementation of the forward S-box,
487+
* instead of duplicating it here, so that total code size is
488+
* lower. By merging the B() transforms into the S-box circuit
489+
* we could make faster CBC decryption, but CBC decryption is
490+
* already quite faster than CBC encryption because we can
491+
* process two blocks in parallel.
492+
*/
493+
494+
local int Q0;
495+
local int Q1;
496+
local int Q2;
497+
local int Q3;
498+
local int Q4;
499+
local int Q5;
500+
local int Q6;
501+
local int Q7;
502+
503+
Q0 = ~Q[0];
504+
Q1 = ~Q[1];
505+
Q2 = Q[2];
506+
Q3 = Q[3];
507+
Q4 = Q[4];
508+
Q5 = ~Q[5];
509+
Q6 = ~Q[6];
510+
Q7 = Q[7];
511+
Q[7] = Q1 ^ Q4 ^ Q6;
512+
Q[6] = Q0 ^ Q3 ^ Q5;
513+
Q[5] = Q7 ^ Q2 ^ Q4;
514+
Q[4] = Q6 ^ Q1 ^ Q3;
515+
Q[3] = Q5 ^ Q0 ^ Q2;
516+
Q[2] = Q4 ^ Q7 ^ Q1;
517+
Q[1] = Q3 ^ Q6 ^ Q0;
518+
Q[0] = Q2 ^ Q5 ^ Q7;
519+
520+
AesCtBitSliceSBox(Q);
521+
522+
Q0 = ~Q[0];
523+
Q1 = ~Q[1];
524+
Q2 = Q[2];
525+
Q3 = Q[3];
526+
Q4 = Q[4];
527+
Q5 = ~Q[5];
528+
Q6 = ~Q[6];
529+
Q7 = Q[7];
530+
Q[7] = Q1 ^ Q4 ^ Q6;
531+
Q[6] = Q0 ^ Q3 ^ Q5;
532+
Q[5] = Q7 ^ Q2 ^ Q4;
533+
Q[4] = Q6 ^ Q1 ^ Q3;
534+
Q[3] = Q5 ^ Q0 ^ Q2;
535+
Q[2] = Q4 ^ Q7 ^ Q1;
536+
Q[1] = Q3 ^ Q6 ^ Q0;
537+
Q[0] = Q2 ^ Q5 ^ Q7;
538+
}
539+
540+
// TODO: can be made a macro for performance?
541+
static final function AddRoundKey(
542+
out array<int> Q,
543+
const out array<int> SK,
544+
optional int Offset = 0
545+
)
546+
{
547+
// local int I;
548+
549+
// for (I = 0; I < 8; ++I)
550+
// {
551+
// Q[I] = Q[I] ^ SK[I];
552+
// }
553+
554+
// TODO: need to benchmark whether a temp var here is better.
555+
556+
Q[Offset + 0] = Q[Offset + 0] ^ SK[Offset + 0];
557+
Q[Offset + 1] = Q[Offset + 1] ^ SK[Offset + 1];
558+
Q[Offset + 2] = Q[Offset + 2] ^ SK[Offset + 2];
559+
Q[Offset + 3] = Q[Offset + 3] ^ SK[Offset + 3];
560+
Q[Offset + 4] = Q[Offset + 4] ^ SK[Offset + 4];
561+
Q[Offset + 5] = Q[Offset + 5] ^ SK[Offset + 5];
562+
Q[Offset + 6] = Q[Offset + 6] ^ SK[Offset + 6];
563+
Q[Offset + 7] = Q[Offset + 7] ^ SK[Offset + 7];
564+
}
565+
566+
// TODO: can be made a macro for performance?
567+
static final function InvShiftRows(out array<int> Q)
568+
{
569+
local int X;
570+
571+
// for (i = 0; i < 8; i ++) unrolled.
572+
573+
X = Q[0];
574+
Q[0] = (X & 0x000000FF)
575+
| ((X & 0x00003F00) << 2) | ((X & 0x0000C000) >>> 6)
576+
| ((X & 0x000F0000) << 4) | ((X & 0x00F00000) >>> 4)
577+
| ((X & 0x03000000) << 6) | ((X & 0xFC000000) >>> 2);
578+
X = Q[1];
579+
Q[1] = (X & 0x000000FF)
580+
| ((X & 0x00003F00) << 2) | ((X & 0x0000C000) >>> 6)
581+
| ((X & 0x000F0000) << 4) | ((X & 0x00F00000) >>> 4)
582+
| ((X & 0x03000000) << 6) | ((X & 0xFC000000) >>> 2);
583+
X = Q[2];
584+
Q[2] = (X & 0x000000FF)
585+
| ((X & 0x00003F00) << 2) | ((X & 0x0000C000) >>> 6)
586+
| ((X & 0x000F0000) << 4) | ((X & 0x00F00000) >>> 4)
587+
| ((X & 0x03000000) << 6) | ((X & 0xFC000000) >>> 2);
588+
X = Q[3];
589+
Q[3] = (X & 0x000000FF)
590+
| ((X & 0x00003F00) << 2) | ((X & 0x0000C000) >>> 6)
591+
| ((X & 0x000F0000) << 4) | ((X & 0x00F00000) >>> 4)
592+
| ((X & 0x03000000) << 6) | ((X & 0xFC000000) >>> 2);
593+
X = Q[4];
594+
Q[4] = (X & 0x000000FF)
595+
| ((X & 0x00003F00) << 2) | ((X & 0x0000C000) >>> 6)
596+
| ((X & 0x000F0000) << 4) | ((X & 0x00F00000) >>> 4)
597+
| ((X & 0x03000000) << 6) | ((X & 0xFC000000) >>> 2);
598+
X = Q[5];
599+
Q[5] = (X & 0x000000FF)
600+
| ((X & 0x00003F00) << 2) | ((X & 0x0000C000) >>> 6)
601+
| ((X & 0x000F0000) << 4) | ((X & 0x00F00000) >>> 4)
602+
| ((X & 0x03000000) << 6) | ((X & 0xFC000000) >>> 2);
603+
X = Q[6];
604+
Q[6] = (X & 0x000000FF)
605+
| ((X & 0x00003F00) << 2) | ((X & 0x0000C000) >>> 6)
606+
| ((X & 0x000F0000) << 4) | ((X & 0x00F00000) >>> 4)
607+
| ((X & 0x03000000) << 6) | ((X & 0xFC000000) >>> 2);
608+
X = Q[7];
609+
Q[7] = (X & 0x000000FF)
610+
| ((X & 0x00003F00) << 2) | ((X & 0x0000C000) >>> 6)
611+
| ((X & 0x000F0000) << 4) | ((X & 0x00F00000) >>> 4)
612+
| ((X & 0x03000000) << 6) | ((X & 0xFC000000) >>> 2);
613+
}
614+
615+
// static final function int RotR16(int X)
616+
// {
617+
// return (X << 16) | (X >>> 16);
618+
// }
619+
`define ROTR16(X) (((`X << 16) | (`X >>> 16)))
620+
621+
static final function InvMixColumns(out array<int> Q)
622+
{
623+
local int Q0;
624+
local int Q1;
625+
local int Q2;
626+
local int Q3;
627+
local int Q4;
628+
local int Q5;
629+
local int Q6;
630+
local int Q7;
631+
local int R0;
632+
local int R1;
633+
local int R2;
634+
local int R3;
635+
local int R4;
636+
local int R5;
637+
local int R6;
638+
local int R7;
639+
640+
Q0 = Q[0];
641+
Q1 = Q[1];
642+
Q2 = Q[2];
643+
Q3 = Q[3];
644+
Q4 = Q[4];
645+
Q5 = Q[5];
646+
Q6 = Q[6];
647+
Q7 = Q[7];
648+
R0 = (Q0 >> 8) | (Q0 << 24);
649+
R1 = (Q1 >> 8) | (Q1 << 24);
650+
R2 = (Q2 >> 8) | (Q2 << 24);
651+
R3 = (Q3 >> 8) | (Q3 << 24);
652+
R4 = (Q4 >> 8) | (Q4 << 24);
653+
R5 = (Q5 >> 8) | (Q5 << 24);
654+
R6 = (Q6 >> 8) | (Q6 << 24);
655+
R7 = (Q7 >> 8) | (Q7 << 24);
656+
657+
Q[0] = Q5 ^ Q6 ^ Q7 ^ R0 ^ R5 ^ R7 ^ `ROTR16(Q0 ^ Q5 ^ Q6 ^ R0 ^ R5);
658+
Q[1] = Q0 ^ Q5 ^ R0 ^ R1 ^ R5 ^ R6 ^ R7 ^ `ROTR16(Q1 ^ Q5 ^ Q7 ^ R1 ^ R5 ^ R6);
659+
Q[2] = Q0 ^ Q1 ^ Q6 ^ R1 ^ R2 ^ R6 ^ R7 ^ `ROTR16(Q0 ^ Q2 ^ Q6 ^ R2 ^ R6 ^ R7);
660+
Q[3] = Q0 ^ Q1 ^ Q2 ^ Q5 ^ Q6 ^ R0 ^ R2 ^ R3 ^ R5 ^ `ROTR16(Q0 ^ Q1 ^ Q3 ^ Q5 ^ Q6 ^ Q7 ^ R0 ^ R3 ^ R5 ^ R7);
661+
Q[4] = Q1 ^ Q2 ^ Q3 ^ Q5 ^ R1 ^ R3 ^ R4 ^ R5 ^ R6 ^ R7 ^ `ROTR16(Q1 ^ Q2 ^ Q4 ^ Q5 ^ Q7 ^ R1 ^ R4 ^ R5 ^ R6);
662+
Q[5] = Q2 ^ Q3 ^ Q4 ^ Q6 ^ R2 ^ R4 ^ R5 ^ R6 ^ R7 ^ `ROTR16(Q2 ^ Q3 ^ Q5 ^ Q6 ^ R2 ^ R5 ^ R6 ^ R7);
663+
Q[6] = Q3 ^ Q4 ^ Q5 ^ Q7 ^ R3 ^ R5 ^ R6 ^ R7 ^ `ROTR16(Q3 ^ Q4 ^ Q6 ^ Q7 ^ R3 ^ R6 ^ R7);
664+
Q[7] = Q4 ^ Q5 ^ Q6 ^ R4 ^ R6 ^ R7 ^ `ROTR16(Q4 ^ Q5 ^ Q7 ^ R4 ^ R7);
665+
}
666+
667+
static final function AesCtBitSliceDecrypt(
668+
int NumRounds,
669+
const out array<int> SKey,
670+
out array<int> Q
671+
)
672+
{
673+
local int U;
674+
675+
AddRoundKey(Q, SKey, NumRounds << 3);
676+
for (U = NumRounds - 1; U > 0; --U)
677+
{
678+
InvShiftRows(Q);
679+
AesCtBitSliceInvSBox(Q);
680+
AddRoundKey(Q, SKey, U << 3);
681+
InvMixColumns(Q);
682+
}
683+
InvShiftRows(Q);
684+
AesCtBitSliceInvSBox(Q);
685+
AddRoundKey(Q, SKey);
686+
}
687+
474688
DefaultProperties
475689
{
476690
// 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1B, 0x36

Classes/FCryptoEncDec.uc

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
/*
2+
* Copyright (c) 2024 Tuomo Kriikkula <tuokri@tuta.io>
3+
* Copyright (c) 2017 Thomas Pornin <pornin@bolet.org>
4+
*
5+
* Permission is hereby granted, free of charge, to any person obtaining
6+
* a copy of this software and associated documentation files (the
7+
* "Software"), to deal in the Software without restriction, including
8+
* without limitation the rights to use, copy, modify, merge, publish,
9+
* distribute, sublicense, and/or sell copies of the Software, and to
10+
* permit persons to whom the Software is furnished to do so, subject to
11+
* the following conditions:
12+
*
13+
* The above copyright notice and this permission notice shall be
14+
* included in all copies or substantial portions of the Software.
15+
*
16+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17+
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18+
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19+
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
20+
* BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
21+
* ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22+
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23+
* SOFTWARE.
24+
*/
25+
26+
// TODO: move all encoding/decoding functions here.
27+
class FCryptoEncDec extends Object;
28+
29+
static final function int Dec32LE(
30+
const out array<byte> Src,
31+
optional int Offset = 0
32+
)
33+
{
34+
return (
35+
(Src[Offset + 0] )
36+
| (Src[Offset + 1] << 8)
37+
| (Src[Offset + 2] << 16)
38+
| (Src[Offset + 3] << 24)
39+
);
40+
}

0 commit comments

Comments
 (0)