@@ -77,17 +77,14 @@ impl HuffmanTable {
77
77
///
78
78
/// It's possible we could avoid checking for 0 if we can guarantee a sane table.
79
79
/// TODO: Check if a smaller type for code_len helps performance.
80
- fn lookup ( & self , bit_buf : BitBuffer ) -> Option < ( i32 , u32 ) > {
80
+ fn lookup ( & self , bit_buf : BitBuffer ) -> ( i32 , u32 ) {
81
81
let symbol = self . fast_lookup ( bit_buf) . into ( ) ;
82
82
if symbol >= 0 {
83
83
let length = ( symbol >> 9 ) as u32 ;
84
- match length {
85
- 0 => None ,
86
- _ => Some ( ( symbol, length) ) ,
87
- }
84
+ ( symbol, length)
88
85
} else {
89
86
// We didn't get a symbol from the fast lookup table, so check the tree instead.
90
- Some ( self . tree_lookup ( symbol, bit_buf, FAST_LOOKUP_BITS ) )
87
+ self . tree_lookup ( symbol, bit_buf, FAST_LOOKUP_BITS )
91
88
}
92
89
}
93
90
}
@@ -189,6 +186,8 @@ pub struct DecompressorOxide {
189
186
/// 1 if the current block is the last block, 0 otherwise.
190
187
finish : u8 ,
191
188
/// The type of the current block.
189
+ /// or if in a dynamic block, which huffman table we are currently
190
+ // initializing.
192
191
block_type : u8 ,
193
192
/// 1 if the adler32 value should be checked.
194
193
check_adler32 : u32 ,
@@ -337,7 +336,6 @@ enum State {
337
336
BadCodeSizeDistPrevLookup ,
338
337
InvalidLitlen ,
339
338
InvalidDist ,
340
- InvalidCodeLen ,
341
339
}
342
340
343
341
impl State {
@@ -531,9 +529,9 @@ where
531
529
// /* bit buffer contains >=15 bits (deflate's max. Huffman code size). */
532
530
loop {
533
531
let mut temp = i32:: from ( r. tables [ table] . fast_lookup ( l. bit_buf ) ) ;
534
-
535
532
if temp >= 0 {
536
533
let code_len = ( temp >> 9 ) as u32 ;
534
+ // TODO: Is there any point to check for code_len != 0 here still?
537
535
if ( code_len != 0 ) && ( l. num_bits >= code_len) {
538
536
break ;
539
537
}
@@ -601,10 +599,6 @@ where
601
599
code_len = res. 1 ;
602
600
} ;
603
601
604
- if code_len == 0 {
605
- return Action :: Jump ( InvalidCodeLen ) ;
606
- }
607
-
608
602
l. bit_buf >>= code_len;
609
603
l. num_bits -= code_len;
610
604
f ( r, l, symbol)
@@ -739,16 +733,23 @@ fn init_tree(r: &mut DecompressorOxide, l: &mut LocalVars) -> Option<Action> {
739
733
let bt = r. block_type as usize ;
740
734
741
735
let code_sizes = match bt {
742
- 0 => & mut r. code_size_literal [ ..] ,
743
- 1 => & mut r. code_size_dist ,
744
- 2 => & mut r. code_size_huffman ,
736
+ LITLEN_TABLE => & mut r. code_size_literal [ ..] ,
737
+ DIST_TABLE => & mut r. code_size_dist ,
738
+ HUFFLEN_TABLE => & mut r. code_size_huffman ,
745
739
_ => return None ,
746
740
} ;
747
741
let table = & mut r. tables [ bt] ;
748
742
749
743
let mut total_symbols = [ 0u16 ; 16 ] ;
750
744
let mut next_code = [ 0u32 ; 17 ] ;
751
- memset ( & mut table. look_up [ ..] , 0 ) ;
745
+ const INVALID_CODE : i16 = 1 << 9 | 286 ;
746
+ // Set the values in the fast table to return a
747
+ // non-zero length and an invalid symbol instead of zero
748
+ // so that we do not have to have a check for a zero
749
+ // code length in the hot code path later
750
+ // and can instead error out on the invalid symbol check
751
+ // on bogus input.
752
+ memset ( & mut table. look_up [ ..] , INVALID_CODE ) ;
752
753
memset ( & mut table. tree [ ..] , 0 ) ;
753
754
754
755
let table_size = r. table_sizes [ bt] as usize ;
@@ -765,14 +766,28 @@ fn init_tree(r: &mut DecompressorOxide, l: &mut LocalVars) -> Option<Action> {
765
766
766
767
let mut used_symbols = 0 ;
767
768
let mut total = 0u32 ;
769
+ // Count up the total number of used lengths and check that the table is not under or over-subscribed.
768
770
for ( & ts, next) in total_symbols. iter ( ) . zip ( next_code[ 1 ..] . iter_mut ( ) ) . skip ( 1 ) {
769
771
used_symbols += ts;
770
772
total += u32:: from ( ts) ;
771
773
total <<= 1 ;
772
774
* next = total;
773
775
}
774
776
775
- if total != 65_536 && used_symbols > 1 {
777
+ //
778
+ // While it's not explicitly stated in the spec, a hufflen table
779
+ // with a single length (or none) would be invalid as there needs to be
780
+ // at minimum a length for both a non-zero length huffman code for the end of block symbol
781
+ // and one of the codes to represent 0 to make sense - so just reject that here as well.
782
+ //
783
+ // The distance table is allowed to have a single distance code though according to the spect it is
784
+ // supposed to be accompanied by a second dummy code. It can also be empty indicating no used codes.
785
+ //
786
+ // The literal/length table can not be empty as there has to be an end of block symbol,
787
+ // The standard doesn't specify that there should be a dummy code in case of a single
788
+ // symbol (i.e an empty block). Normally that's not an issue though the code will have
789
+ // to take that into account later on in case of malformed input.
790
+ if total != 65_536 && ( used_symbols > 1 || bt == HUFFLEN_TABLE ) {
776
791
return Some ( Action :: Jump ( BadTotalSymbols ) ) ;
777
792
}
778
793
@@ -809,7 +824,7 @@ fn init_tree(r: &mut DecompressorOxide, l: &mut LocalVars) -> Option<Action> {
809
824
}
810
825
811
826
let mut tree_cur = table. look_up [ ( rev_code & ( FAST_LOOKUP_SIZE - 1 ) ) as usize ] ;
812
- if tree_cur == 0 {
827
+ if tree_cur == INVALID_CODE {
813
828
table. look_up [ ( rev_code & ( FAST_LOOKUP_SIZE - 1 ) ) as usize ] = tree_next;
814
829
tree_cur = tree_next;
815
830
tree_next -= 2 ;
@@ -841,12 +856,12 @@ fn init_tree(r: &mut DecompressorOxide, l: &mut LocalVars) -> Option<Action> {
841
856
table. tree [ tree_index] = symbol_index as i16 ;
842
857
}
843
858
844
- if r. block_type == 2 {
859
+ if r. block_type == HUFFLEN_TABLE as u8 {
845
860
l. counter = 0 ;
846
861
return Some ( Action :: Jump ( ReadLitlenDistTablesCodeSize ) ) ;
847
862
}
848
863
849
- if r. block_type == 0 {
864
+ if r. block_type == LITLEN_TABLE as u8 {
850
865
break ;
851
866
}
852
867
r. block_type -= 1 ;
@@ -1036,43 +1051,35 @@ fn decompress_fast(
1036
1051
1037
1052
fill_bit_buffer ( & mut l, in_iter) ;
1038
1053
1039
- if let Some ( ( symbol, code_len) ) = r. tables [ LITLEN_TABLE ] . lookup ( l. bit_buf ) {
1040
- l. counter = symbol as u32 ;
1054
+ let ( symbol, code_len) = r. tables [ LITLEN_TABLE ] . lookup ( l. bit_buf ) ;
1055
+ l. counter = symbol as u32 ;
1056
+ l. bit_buf >>= code_len;
1057
+ l. num_bits -= code_len;
1058
+
1059
+ if ( l. counter & 256 ) != 0 {
1060
+ // The symbol is not a literal.
1061
+ break ;
1062
+ } else {
1063
+ // If we have a 32-bit buffer we need to read another two bytes now
1064
+ // to have enough bits to keep going.
1065
+ if cfg ! ( not( target_pointer_width = "64" ) ) {
1066
+ fill_bit_buffer ( & mut l, in_iter) ;
1067
+ }
1068
+
1069
+ let ( symbol, code_len) = r. tables [ LITLEN_TABLE ] . lookup ( l. bit_buf ) ;
1041
1070
l. bit_buf >>= code_len;
1042
1071
l. num_bits -= code_len;
1043
-
1044
- if ( l. counter & 256 ) != 0 {
1045
- // The symbol is not a literal.
1072
+ // The previous symbol was a literal, so write it directly and check
1073
+ // the next one.
1074
+ out_buf. write_byte ( l. counter as u8 ) ;
1075
+ if ( symbol & 256 ) != 0 {
1076
+ l. counter = symbol as u32 ;
1077
+ // The symbol is a length value.
1046
1078
break ;
1047
1079
} else {
1048
- // If we have a 32-bit buffer we need to read another two bytes now
1049
- // to have enough bits to keep going.
1050
- if cfg ! ( not( target_pointer_width = "64" ) ) {
1051
- fill_bit_buffer ( & mut l, in_iter) ;
1052
- }
1053
-
1054
- if let Some ( ( symbol, code_len) ) = r. tables [ LITLEN_TABLE ] . lookup ( l. bit_buf ) {
1055
- l. bit_buf >>= code_len;
1056
- l. num_bits -= code_len;
1057
- // The previous symbol was a literal, so write it directly and check
1058
- // the next one.
1059
- out_buf. write_byte ( l. counter as u8 ) ;
1060
- if ( symbol & 256 ) != 0 {
1061
- l. counter = symbol as u32 ;
1062
- // The symbol is a length value.
1063
- break ;
1064
- } else {
1065
- // The symbol is a literal, so write it directly and continue.
1066
- out_buf. write_byte ( symbol as u8 ) ;
1067
- }
1068
- } else {
1069
- state. begin ( InvalidCodeLen ) ;
1070
- break ' o TINFLStatus :: Failed ;
1071
- }
1080
+ // The symbol is a literal, so write it directly and continue.
1081
+ out_buf. write_byte ( symbol as u8 ) ;
1072
1082
}
1073
- } else {
1074
- state. begin ( InvalidCodeLen ) ;
1075
- break ' o TINFLStatus :: Failed ;
1076
1083
}
1077
1084
}
1078
1085
@@ -1113,22 +1120,18 @@ fn decompress_fast(
1113
1120
fill_bit_buffer ( & mut l, in_iter) ;
1114
1121
}
1115
1122
1116
- if let Some ( ( mut symbol, code_len) ) = r. tables [ DIST_TABLE ] . lookup ( l. bit_buf ) {
1117
- symbol &= 511 ;
1118
- l. bit_buf >>= code_len;
1119
- l. num_bits -= code_len;
1120
- if symbol > 29 {
1121
- state. begin ( InvalidDist ) ;
1122
- break ' o TINFLStatus :: Failed ;
1123
- }
1124
-
1125
- l. num_extra = num_extra_bits_for_distance_code ( symbol as u8 ) ;
1126
- l. dist = u32:: from ( DIST_BASE [ symbol as usize ] ) ;
1127
- } else {
1128
- state. begin ( InvalidCodeLen ) ;
1123
+ let ( mut symbol, code_len) = r. tables [ DIST_TABLE ] . lookup ( l. bit_buf ) ;
1124
+ symbol &= 511 ;
1125
+ l. bit_buf >>= code_len;
1126
+ l. num_bits -= code_len;
1127
+ if symbol > 29 {
1128
+ state. begin ( InvalidDist ) ;
1129
1129
break ' o TINFLStatus :: Failed ;
1130
1130
}
1131
1131
1132
+ l. num_extra = num_extra_bits_for_distance_code ( symbol as u8 ) ;
1133
+ l. dist = u32:: from ( DIST_BASE [ symbol as usize ] ) ;
1134
+
1132
1135
if l. num_extra != 0 {
1133
1136
fill_bit_buffer ( & mut l, in_iter) ;
1134
1137
let extra_bits = l. bit_buf & ( ( 1 << l. num_extra ) - 1 ) ;
@@ -1544,7 +1547,7 @@ pub fn decompress(
1544
1547
} else {
1545
1548
fill_bit_buffer( & mut l, & mut in_iter) ;
1546
1549
1547
- if let Some ( ( symbol, code_len) ) = r. tables[ LITLEN_TABLE ] . lookup( l. bit_buf) {
1550
+ let ( symbol, code_len) = r. tables[ LITLEN_TABLE ] . lookup( l. bit_buf) ;
1548
1551
1549
1552
l. counter = symbol as u32 ;
1550
1553
l. bit_buf >>= code_len;
@@ -1560,7 +1563,7 @@ pub fn decompress(
1560
1563
fill_bit_buffer( & mut l, & mut in_iter) ;
1561
1564
}
1562
1565
1563
- if let Some ( ( symbol, code_len) ) = r. tables[ LITLEN_TABLE ] . lookup( l. bit_buf) {
1566
+ let ( symbol, code_len) = r. tables[ LITLEN_TABLE ] . lookup( l. bit_buf) ;
1564
1567
1565
1568
l. bit_buf >>= code_len;
1566
1569
l. num_bits -= code_len;
@@ -1576,13 +1579,9 @@ pub fn decompress(
1576
1579
out_buf. write_byte( symbol as u8 ) ;
1577
1580
Action :: None
1578
1581
}
1579
- } else {
1580
- Action :: Jump ( InvalidCodeLen )
1581
- }
1582
- }
1583
- } else {
1584
- Action :: Jump ( InvalidCodeLen )
1582
+
1585
1583
}
1584
+
1586
1585
}
1587
1586
} ) ,
1588
1587
@@ -1925,7 +1924,7 @@ mod test {
1925
1924
}
1926
1925
1927
1926
fn masked_lookup ( table : & HuffmanTable , bit_buf : BitBuffer ) -> ( i32 , u32 ) {
1928
- let ret = table. lookup ( bit_buf) . unwrap ( ) ;
1927
+ let ret = table. lookup ( bit_buf) ;
1929
1928
( ret. 0 & 511 , ret. 1 )
1930
1929
}
1931
1930
@@ -2097,4 +2096,35 @@ mod test {
2097
2096
assert_eq ! ( dist, num_extra_bits_for_distance_code( i as u8 ) ) ;
2098
2097
}
2099
2098
}
2099
+
2100
+ #[ test]
2101
+ fn check_tree ( ) {
2102
+ let mut r = DecompressorOxide :: new ( ) ;
2103
+ let mut l = LocalVars {
2104
+ bit_buf : 0 ,
2105
+ num_bits : 0 ,
2106
+ dist : 0 ,
2107
+ counter : 0 ,
2108
+ num_extra : 0 ,
2109
+ } ;
2110
+
2111
+ r. code_size_huffman [ 0 ] = 1 ;
2112
+ r. code_size_huffman [ 1 ] = 1 ;
2113
+ //r.code_size_huffman[2] = 3;
2114
+ //r.code_size_huffman[3] = 3;
2115
+ //r.code_size_huffman[1] = 4;
2116
+ r. block_type = HUFFLEN_TABLE as u8 ;
2117
+ r. table_sizes [ HUFFLEN_TABLE ] = 4 ;
2118
+ let res = init_tree ( & mut r, & mut l) . unwrap ( ) ;
2119
+
2120
+ let status = match res {
2121
+ Action :: Jump ( s) => s,
2122
+ _ => {
2123
+ //println!("issue");
2124
+ return ;
2125
+ }
2126
+ } ;
2127
+ //println!("status {:?}", status);
2128
+ assert ! ( status != BadTotalSymbols ) ;
2129
+ }
2100
2130
}
0 commit comments