@@ -405,8 +405,7 @@ static final function int AesCtKeySched(
405
405
Tmp = 0 ;
406
406
for (I = 0 ; I < Nk ; ++I )
407
407
{
408
- // TODO: dedicated file for Enc/Dec functions?
409
- // Tmp = Dec32LE(Key, I << 2);
408
+ Tmp = class 'FCryptoEncDec' .static .Dec32LE (Key , I << 2 );
410
409
SKey [(I << 1 ) ] = Tmp ;
411
410
SKey [(I << 1 ) + 1 ] = Tmp ;
412
411
}
@@ -439,7 +438,7 @@ static final function int AesCtKeySched(
439
438
J = 0 ;
440
439
for (I = 0 ; I < Nkf ; ++I )
441
440
{
442
- CompSkey [I ] = (SKey [J ] & 0x55555555 ) | (SKey [J + 1 ] & 0xAAAAAAAA );
441
+ CompSKey [I ] = (SKey [J ] & 0x55555555 ) | (SKey [J + 1 ] & 0xAAAAAAAA );
443
442
J += 2 ;
444
443
}
445
444
return NumRounds ;
@@ -471,6 +470,221 @@ static final function AesCtSKeyExpand(
471
470
}
472
471
}
473
472
473
+ static final function AesCtBitSliceInvSBox (out array <int> Q )
474
+ {
475
+ /*
476
+ * AES S-box is:
477
+ * S(x) = A(I(x)) ^ 0x63
478
+ * where I() is inversion in GF(256), and A() is a linear
479
+ * transform (0 is formally defined to be its own inverse).
480
+ * Since inversion is an involution, the inverse S-box can be
481
+ * computed from the S-box as:
482
+ * iS(x) = B(S(B(x ^ 0x63)) ^ 0x63)
483
+ * where B() is the inverse of A(). Indeed, for any y in GF(256):
484
+ * iS(S(y)) = B(A(I(B(A(I(y)) ^ 0x63 ^ 0x63))) ^ 0x63 ^ 0x63) = y
485
+ *
486
+ * Note: we reuse the implementation of the forward S-box,
487
+ * instead of duplicating it here, so that total code size is
488
+ * lower. By merging the B() transforms into the S-box circuit
489
+ * we could make faster CBC decryption, but CBC decryption is
490
+ * already quite faster than CBC encryption because we can
491
+ * process two blocks in parallel.
492
+ */
493
+
494
+ local int Q0 ;
495
+ local int Q1 ;
496
+ local int Q2 ;
497
+ local int Q3 ;
498
+ local int Q4 ;
499
+ local int Q5 ;
500
+ local int Q6 ;
501
+ local int Q7 ;
502
+
503
+ Q0 = ~Q [0 ];
504
+ Q1 = ~Q [1 ];
505
+ Q2 = Q [2 ];
506
+ Q3 = Q [3 ];
507
+ Q4 = Q [4 ];
508
+ Q5 = ~Q [5 ];
509
+ Q6 = ~Q [6 ];
510
+ Q7 = Q [7 ];
511
+ Q [7 ] = Q1 ^ Q4 ^ Q6 ;
512
+ Q [6 ] = Q0 ^ Q3 ^ Q5 ;
513
+ Q [5 ] = Q7 ^ Q2 ^ Q4 ;
514
+ Q [4 ] = Q6 ^ Q1 ^ Q3 ;
515
+ Q [3 ] = Q5 ^ Q0 ^ Q2 ;
516
+ Q [2 ] = Q4 ^ Q7 ^ Q1 ;
517
+ Q [1 ] = Q3 ^ Q6 ^ Q0 ;
518
+ Q [0 ] = Q2 ^ Q5 ^ Q7 ;
519
+
520
+ AesCtBitSliceSBox (Q );
521
+
522
+ Q0 = ~Q [0 ];
523
+ Q1 = ~Q [1 ];
524
+ Q2 = Q [2 ];
525
+ Q3 = Q [3 ];
526
+ Q4 = Q [4 ];
527
+ Q5 = ~Q [5 ];
528
+ Q6 = ~Q [6 ];
529
+ Q7 = Q [7 ];
530
+ Q [7 ] = Q1 ^ Q4 ^ Q6 ;
531
+ Q [6 ] = Q0 ^ Q3 ^ Q5 ;
532
+ Q [5 ] = Q7 ^ Q2 ^ Q4 ;
533
+ Q [4 ] = Q6 ^ Q1 ^ Q3 ;
534
+ Q [3 ] = Q5 ^ Q0 ^ Q2 ;
535
+ Q [2 ] = Q4 ^ Q7 ^ Q1 ;
536
+ Q [1 ] = Q3 ^ Q6 ^ Q0 ;
537
+ Q [0 ] = Q2 ^ Q5 ^ Q7 ;
538
+ }
539
+
540
+ // TODO: can be made a macro for performance?
541
+ static final function AddRoundKey (
542
+ out array <int> Q ,
543
+ const out array <int> SK ,
544
+ optional int Offset = 0
545
+ )
546
+ {
547
+ // local int I;
548
+
549
+ // for (I = 0; I < 8; ++I)
550
+ // {
551
+ // Q[I] = Q[I] ^ SK[I];
552
+ // }
553
+
554
+ // TODO: need to benchmark whether a temp var here is better.
555
+
556
+ Q [Offset + 0 ] = Q [Offset + 0 ] ^ SK [Offset + 0 ];
557
+ Q [Offset + 1 ] = Q [Offset + 1 ] ^ SK [Offset + 1 ];
558
+ Q [Offset + 2 ] = Q [Offset + 2 ] ^ SK [Offset + 2 ];
559
+ Q [Offset + 3 ] = Q [Offset + 3 ] ^ SK [Offset + 3 ];
560
+ Q [Offset + 4 ] = Q [Offset + 4 ] ^ SK [Offset + 4 ];
561
+ Q [Offset + 5 ] = Q [Offset + 5 ] ^ SK [Offset + 5 ];
562
+ Q [Offset + 6 ] = Q [Offset + 6 ] ^ SK [Offset + 6 ];
563
+ Q [Offset + 7 ] = Q [Offset + 7 ] ^ SK [Offset + 7 ];
564
+ }
565
+
566
+ // TODO: can be made a macro for performance?
567
+ static final function InvShiftRows (out array <int> Q )
568
+ {
569
+ local int X ;
570
+
571
+ // for (i = 0; i < 8; i ++) unrolled.
572
+
573
+ X = Q [0 ];
574
+ Q [0 ] = (X & 0x000000FF )
575
+ | ((X & 0x00003F00 ) << 2 ) | ((X & 0x0000C000 ) >>> 6 )
576
+ | ((X & 0x000F0000 ) << 4 ) | ((X & 0x00F00000 ) >>> 4 )
577
+ | ((X & 0x03000000 ) << 6 ) | ((X & 0xFC000000 ) >>> 2 );
578
+ X = Q [1 ];
579
+ Q [1 ] = (X & 0x000000FF )
580
+ | ((X & 0x00003F00 ) << 2 ) | ((X & 0x0000C000 ) >>> 6 )
581
+ | ((X & 0x000F0000 ) << 4 ) | ((X & 0x00F00000 ) >>> 4 )
582
+ | ((X & 0x03000000 ) << 6 ) | ((X & 0xFC000000 ) >>> 2 );
583
+ X = Q [2 ];
584
+ Q [2 ] = (X & 0x000000FF )
585
+ | ((X & 0x00003F00 ) << 2 ) | ((X & 0x0000C000 ) >>> 6 )
586
+ | ((X & 0x000F0000 ) << 4 ) | ((X & 0x00F00000 ) >>> 4 )
587
+ | ((X & 0x03000000 ) << 6 ) | ((X & 0xFC000000 ) >>> 2 );
588
+ X = Q [3 ];
589
+ Q [3 ] = (X & 0x000000FF )
590
+ | ((X & 0x00003F00 ) << 2 ) | ((X & 0x0000C000 ) >>> 6 )
591
+ | ((X & 0x000F0000 ) << 4 ) | ((X & 0x00F00000 ) >>> 4 )
592
+ | ((X & 0x03000000 ) << 6 ) | ((X & 0xFC000000 ) >>> 2 );
593
+ X = Q [4 ];
594
+ Q [4 ] = (X & 0x000000FF )
595
+ | ((X & 0x00003F00 ) << 2 ) | ((X & 0x0000C000 ) >>> 6 )
596
+ | ((X & 0x000F0000 ) << 4 ) | ((X & 0x00F00000 ) >>> 4 )
597
+ | ((X & 0x03000000 ) << 6 ) | ((X & 0xFC000000 ) >>> 2 );
598
+ X = Q [5 ];
599
+ Q [5 ] = (X & 0x000000FF )
600
+ | ((X & 0x00003F00 ) << 2 ) | ((X & 0x0000C000 ) >>> 6 )
601
+ | ((X & 0x000F0000 ) << 4 ) | ((X & 0x00F00000 ) >>> 4 )
602
+ | ((X & 0x03000000 ) << 6 ) | ((X & 0xFC000000 ) >>> 2 );
603
+ X = Q [6 ];
604
+ Q [6 ] = (X & 0x000000FF )
605
+ | ((X & 0x00003F00 ) << 2 ) | ((X & 0x0000C000 ) >>> 6 )
606
+ | ((X & 0x000F0000 ) << 4 ) | ((X & 0x00F00000 ) >>> 4 )
607
+ | ((X & 0x03000000 ) << 6 ) | ((X & 0xFC000000 ) >>> 2 );
608
+ X = Q [7 ];
609
+ Q [7 ] = (X & 0x000000FF )
610
+ | ((X & 0x00003F00 ) << 2 ) | ((X & 0x0000C000 ) >>> 6 )
611
+ | ((X & 0x000F0000 ) << 4 ) | ((X & 0x00F00000 ) >>> 4 )
612
+ | ((X & 0x03000000 ) << 6 ) | ((X & 0xFC000000 ) >>> 2 );
613
+ }
614
+
615
+ // static final function int RotR16(int X)
616
+ // {
617
+ // return (X << 16) | (X >>> 16);
618
+ // }
619
+ `define ROTR16 (X ) (((`X << 16 ) | (`X >>> 16 )))
620
+
621
+ static final function InvMixColumns (out array <int> Q )
622
+ {
623
+ local int Q0 ;
624
+ local int Q1 ;
625
+ local int Q2 ;
626
+ local int Q3 ;
627
+ local int Q4 ;
628
+ local int Q5 ;
629
+ local int Q6 ;
630
+ local int Q7 ;
631
+ local int R0 ;
632
+ local int R1 ;
633
+ local int R2 ;
634
+ local int R3 ;
635
+ local int R4 ;
636
+ local int R5 ;
637
+ local int R6 ;
638
+ local int R7 ;
639
+
640
+ Q0 = Q [0 ];
641
+ Q1 = Q [1 ];
642
+ Q2 = Q [2 ];
643
+ Q3 = Q [3 ];
644
+ Q4 = Q [4 ];
645
+ Q5 = Q [5 ];
646
+ Q6 = Q [6 ];
647
+ Q7 = Q [7 ];
648
+ R0 = (Q0 >> 8 ) | (Q0 << 24 );
649
+ R1 = (Q1 >> 8 ) | (Q1 << 24 );
650
+ R2 = (Q2 >> 8 ) | (Q2 << 24 );
651
+ R3 = (Q3 >> 8 ) | (Q3 << 24 );
652
+ R4 = (Q4 >> 8 ) | (Q4 << 24 );
653
+ R5 = (Q5 >> 8 ) | (Q5 << 24 );
654
+ R6 = (Q6 >> 8 ) | (Q6 << 24 );
655
+ R7 = (Q7 >> 8 ) | (Q7 << 24 );
656
+
657
+ Q [0 ] = Q5 ^ Q6 ^ Q7 ^ R0 ^ R5 ^ R7 ^ `ROTR16 (Q0 ^ Q5 ^ Q6 ^ R0 ^ R5 );
658
+ Q [1 ] = Q0 ^ Q5 ^ R0 ^ R1 ^ R5 ^ R6 ^ R7 ^ `ROTR16 (Q1 ^ Q5 ^ Q7 ^ R1 ^ R5 ^ R6 );
659
+ Q [2 ] = Q0 ^ Q1 ^ Q6 ^ R1 ^ R2 ^ R6 ^ R7 ^ `ROTR16 (Q0 ^ Q2 ^ Q6 ^ R2 ^ R6 ^ R7 );
660
+ Q [3 ] = Q0 ^ Q1 ^ Q2 ^ Q5 ^ Q6 ^ R0 ^ R2 ^ R3 ^ R5 ^ `ROTR16 (Q0 ^ Q1 ^ Q3 ^ Q5 ^ Q6 ^ Q7 ^ R0 ^ R3 ^ R5 ^ R7 );
661
+ Q [4 ] = Q1 ^ Q2 ^ Q3 ^ Q5 ^ R1 ^ R3 ^ R4 ^ R5 ^ R6 ^ R7 ^ `ROTR16 (Q1 ^ Q2 ^ Q4 ^ Q5 ^ Q7 ^ R1 ^ R4 ^ R5 ^ R6 );
662
+ Q [5 ] = Q2 ^ Q3 ^ Q4 ^ Q6 ^ R2 ^ R4 ^ R5 ^ R6 ^ R7 ^ `ROTR16 (Q2 ^ Q3 ^ Q5 ^ Q6 ^ R2 ^ R5 ^ R6 ^ R7 );
663
+ Q [6 ] = Q3 ^ Q4 ^ Q5 ^ Q7 ^ R3 ^ R5 ^ R6 ^ R7 ^ `ROTR16 (Q3 ^ Q4 ^ Q6 ^ Q7 ^ R3 ^ R6 ^ R7 );
664
+ Q [7 ] = Q4 ^ Q5 ^ Q6 ^ R4 ^ R6 ^ R7 ^ `ROTR16 (Q4 ^ Q5 ^ Q7 ^ R4 ^ R7 );
665
+ }
666
+
667
+ static final function AesCtBitSliceDecrypt (
668
+ int NumRounds ,
669
+ const out array <int> SKey ,
670
+ out array <int> Q
671
+ )
672
+ {
673
+ local int U ;
674
+
675
+ AddRoundKey (Q , SKey , NumRounds << 3 );
676
+ for (U = NumRounds - 1 ; U > 0 ; --U )
677
+ {
678
+ InvShiftRows (Q );
679
+ AesCtBitSliceInvSBox (Q );
680
+ AddRoundKey (Q , SKey , U << 3 );
681
+ InvMixColumns (Q );
682
+ }
683
+ InvShiftRows (Q );
684
+ AesCtBitSliceInvSBox (Q );
685
+ AddRoundKey (Q , SKey );
686
+ }
687
+
474
688
DefaultProperties
475
689
{
476
690
// 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1B, 0x36
0 commit comments