Skip to content

Commit 1966d11

Browse files
Add Direct mapped cache to decrease Bram utilization.
Add rtl_src/cache_directly_mapped.sv and a simple testbench. The cache has been added to the design inside basic block so as to have the smallest impact possible. Since now basic block in principle can start processing instructions before the new current character is fetched a new control signal(go) has been added to control dequeueing from cur_char_fifo. Go is generated by regex_coprocessor FSM and is kept high only when status = S_EXEC. The parametrization of the cache is the number of lines in power of two(indicated by CACHE_WIDTH_BITS), the width of addresses(ADDR_WIDTH) is meant to handle and the page size (DWIDTH). Pages are transferred in a signle clock cycle. Parameters are exposed up to regex_coprocessor_*, and specified by AXI_TOP. In case is requested a basic block with CACHE_WIDTH_BITS smaller or equal than zero, no cache is implemented (This is the case for regex_coprocessor_single_bb). Introduced a way to choose between regex_corpocessor_n_bb and *_single_bb using number of BB (BB_N). Tested and working on Zynq-z1. Closes #17 Closes #6 (even if it was previous commmit)
1 parent c7034a2 commit 1966d11

8 files changed

+477
-65
lines changed

re2compiler

rtl_src/AXI_top.sv

Lines changed: 50 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -199,47 +199,54 @@ bram #(
199199
.data_o( bram_out )
200200
);
201201

202-
/*
203-
regex_coprocessor_single_bb #(
204-
.PC_WIDTH (PC_WIDTH ),
205-
.CHARACTER_WIDTH (CHARACTER_WIDTH ),
206-
.MEMORY_WIDTH (BRAM_READ_WIDTH-BRAM_READ_WIDTH_PARITY),
207-
.MEMORY_ADDR_WIDTH(BRAM_ADDR_WIDTH )
208-
) a_regex_coprocessor (
209-
.clk (clk),
210-
.reset (reset_master),
211-
.memory_ready (memory_addr_from_coprocessor_ready ),
212-
.memory_addr (memory_addr_from_coprocessor ),
213-
.memory_data (bram_payload),
214-
.memory_valid (memory_addr_from_coprocessor_valid ),
215-
.start_ready (start_ready),
216-
.start_cc_pointer (start_cc_pointer),
217-
.start_valid (start_valid),
218-
.finish (finish),
219-
.accept (accept)
220-
);
221-
*/
222-
223-
regex_coprocessor_n_bb #(
224-
.PC_WIDTH (PC_WIDTH ),
225-
.CHARACTER_WIDTH (CHARACTER_WIDTH ),
226-
.MEMORY_WIDTH (BRAM_READ_WIDTH-BRAM_READ_WIDTH_PARITY),
227-
.MEMORY_ADDR_WIDTH (BRAM_ADDR_WIDTH ),
228-
.LATENCY_COUNT_WIDTH (7),
229-
.FIFO_COUNT_WIDTH (6),
230-
.BB_N (5)
231-
)a_regex_coprocessor (
232-
.clk (clk),
233-
.reset (reset_master),
234-
.memory_ready (memory_addr_from_coprocessor_ready ),
235-
.memory_addr (memory_addr_from_coprocessor ),
236-
.memory_data (bram_payload),
237-
.memory_valid (memory_addr_from_coprocessor_valid ),
238-
.start_ready (start_ready),
239-
.start_cc_pointer (start_cc_pointer),
240-
.start_valid (start_valid),
241-
.finish (finish),
242-
.accept (accept)
243-
);
244-
202+
localparam BB_N = 6;
203+
localparam FIFO_COUNT_WIDTH = 6;
204+
localparam CACHE_WIDTH_BITS = 0;
205+
if (BB_N == 1)
206+
begin
207+
regex_coprocessor_single_bb #(
208+
.PC_WIDTH (PC_WIDTH ),
209+
.CHARACTER_WIDTH (CHARACTER_WIDTH ),
210+
.MEMORY_WIDTH (BRAM_READ_WIDTH-BRAM_READ_WIDTH_PARITY),
211+
.MEMORY_ADDR_WIDTH (BRAM_ADDR_WIDTH ),
212+
.FIFO_COUNT_WIDTH (FIFO_COUNT_WIDTH )
213+
) a_regex_coprocessor (
214+
.clk (clk),
215+
.reset (reset_master),
216+
.memory_ready (memory_addr_from_coprocessor_ready ),
217+
.memory_addr (memory_addr_from_coprocessor ),
218+
.memory_data (bram_payload),
219+
.memory_valid (memory_addr_from_coprocessor_valid ),
220+
.start_ready (start_ready),
221+
.start_cc_pointer (start_cc_pointer),
222+
.start_valid (start_valid),
223+
.finish (finish),
224+
.accept (accept)
225+
);
226+
end
227+
else
228+
begin
229+
regex_coprocessor_n_bb #(
230+
.PC_WIDTH (PC_WIDTH ),
231+
.CHARACTER_WIDTH (CHARACTER_WIDTH ),
232+
.MEMORY_WIDTH (BRAM_READ_WIDTH-BRAM_READ_WIDTH_PARITY),
233+
.MEMORY_ADDR_WIDTH (BRAM_ADDR_WIDTH ),
234+
.LATENCY_COUNT_WIDTH (8 ),
235+
.FIFO_COUNT_WIDTH (FIFO_COUNT_WIDTH ),
236+
.BB_N (BB_N ),
237+
.CACHE_WIDTH_BITS (CACHE_WIDTH_BITS )
238+
)a_regex_coprocessor (
239+
.clk (clk),
240+
.reset (reset_master),
241+
.memory_ready (memory_addr_from_coprocessor_ready ),
242+
.memory_addr (memory_addr_from_coprocessor ),
243+
.memory_data (bram_payload),
244+
.memory_valid (memory_addr_from_coprocessor_valid ),
245+
.start_ready (start_ready),
246+
.start_cc_pointer (start_cc_pointer),
247+
.start_valid (start_valid),
248+
.finish (finish),
249+
.accept (accept)
250+
);
251+
end
245252
endmodule

rtl_src/basic_block.sv

Lines changed: 58 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -9,20 +9,24 @@
99
// +------------------------------------------>memory_valid
1010
//+----------------------------------|------------------------+ \---<memory_ready
1111
//| Basic block | |
12-
//| | |
12+
//| +- - - - - - - + |
13+
//| cache |
14+
//| | (optional) | |
15+
//| |
16+
//| +- - - - - - - + |
1317
//| | |
1418
//| +-------|------+ |
1519
//| | Regex_cpu | | /--->output_pc_valid
1620
//| +-----> | ------>----------------------------->output_pc_and current
1721
//| | | | | \---<output_pc_ready
1822
//| | +--------------+ |
1923
//| | input_pc_and_current[0]
20-
//| | +------------------+ +-----+ |
21-
//| +---- curr_char_fifo <-----|--+ | |
24+
//| | +------------------+ 1 +-----+ |
25+
//| +---- curr_char_fifo <--+--+--+ | |
2226
//| +------------------+ |demux| | | /---<input_pc_valid
23-
//| +------------------+ | <--------------------------<input_pc_and_current
24-
//| 0<--data_out_ready---| next_char_fifo <--------+ +----------------------->input_pc_ready
25-
//| +------------------+ | |
27+
//| +------------------+ | <--+-----------------------<input_pc_and_current
28+
//| 0 --data_out_ready-->| next_char_fifo <--+-----+ +----------------------->input_pc_ready
29+
//| +------------------+ 0 | |
2630
//| | |
2731
//| even_in_ready | |
2832
//| and ---------------------+ |
@@ -41,14 +45,16 @@ module basic_block #(
4145
parameter FIFO_COUNT_WIDTH = 6 ,
4246
parameter CHARACTER_WIDTH = 8 ,
4347
parameter MEMORY_WIDTH = 16,
44-
parameter MEMORY_ADDR_WIDTH = 11
48+
parameter MEMORY_ADDR_WIDTH = 11,
49+
parameter CACHE_WIDTH_BITS = 0
4550
)(
4651
input wire clk,
4752
input wire reset,
53+
output logic accepts,
4854
output logic running,
55+
input logic go,
4956
input logic cur_is_even_character,
5057
input logic[CHARACTER_WIDTH-1:0] current_character,
51-
output logic accepts,
5258

5359
input logic memory_ready,
5460
output logic[MEMORY_ADDR_WIDTH-1:0] memory_addr,
@@ -72,8 +78,14 @@ module basic_block #(
7278
//sub signals of input_pc_and_current, output_pc_and_current
7379
logic [PC_WIDTH-1:0] output_pc, input_pc;
7480
logic input_pc_is_directed_to_current, output_pc_is_directed_to_current;
75-
81+
//signals fior regex_cpu
82+
logic regex_cpu_input_pc_ready,regex_cpu_input_pc_valid;
7683
//storage part of the basic block
84+
//cache wires
85+
wire regex_cpu_memory_ready ;
86+
wire [MEMORY_ADDR_WIDTH-1:0]regex_cpu_memory_addr ;
87+
wire [MEMORY_WIDTH-1 :0]regex_cpu_memory_data ;
88+
wire regex_cpu_memory_valid ;
7789
//FIFO even signal
7890
logic fifo_even_data_in_ready ;
7991
logic fifo_even_data_in_not_ready ;
@@ -242,7 +254,9 @@ module basic_block #(
242254
/////////////////////////////////////////////////////////////////////////////
243255
// Computing part of the basic block
244256
/////////////////////////////////////////////////////////////////////////////
245-
257+
// go signal enable dequeue process from fifo_cur_char and plays the role of an enabler regex_cpu
258+
assign fifo_cur_char_data_out_ready = go && regex_cpu_input_pc_ready ;
259+
assign regex_cpu_input_pc_valid = go && fifo_cur_char_data_out_valid;
246260
regex_cpu #(
247261
.PC_WIDTH (PC_WIDTH ),
248262
.CHARACTER_WIDTH (CHARACTER_WIDTH ),
@@ -252,17 +266,45 @@ module basic_block #(
252266
.clk (clk ),
253267
.reset (reset ),
254268
.current_character (current_character ),
255-
.input_pc_ready (fifo_cur_char_data_out_ready ),
269+
.input_pc_ready (regex_cpu_input_pc_ready ),
256270
.input_pc (fifo_cur_char_data_out ),
257-
.input_pc_valid (fifo_cur_char_data_out_valid ),
258-
.memory_ready (memory_ready ),
259-
.memory_addr (memory_addr ),
260-
.memory_data (memory_data ),
261-
.memory_valid (memory_valid ),
271+
.input_pc_valid (regex_cpu_input_pc_valid ),
272+
.memory_ready (regex_cpu_memory_ready ),
273+
.memory_addr (regex_cpu_memory_addr ),
274+
.memory_data (regex_cpu_memory_data ),
275+
.memory_valid (regex_cpu_memory_valid ),
262276
.output_pc_is_directed_to_current (output_pc_is_directed_to_current ),
263277
.output_pc_ready (output_pc_ready ),
264278
.output_pc (output_pc ),
265279
.output_pc_valid (output_pc_valid ),
266280
.accepts (accepts )
267281
);
282+
283+
//depending on CACHE_WIDTH_BITS
284+
if (CACHE_WIDTH_BITS <= 0)
285+
begin
286+
assign memory_addr = regex_cpu_memory_addr ;
287+
assign memory_valid = regex_cpu_memory_valid;
288+
assign regex_cpu_memory_ready = memory_ready ;
289+
assign regex_cpu_memory_data = memory_data ;
290+
end
291+
else
292+
begin
293+
cache_directly_mapped #(
294+
.DWIDTH (MEMORY_WIDTH ),
295+
.CACHE_WIDTH_BITS (CACHE_WIDTH_BITS ),
296+
.ADDR_WIDTH (MEMORY_ADDR_WIDTH )
297+
) a_cache (
298+
.clk (clk ),
299+
.reset (reset ),
300+
.addr_in_valid (regex_cpu_memory_valid ),
301+
.addr_in (regex_cpu_memory_addr ),
302+
.addr_in_ready (regex_cpu_memory_ready ),
303+
.data_out (regex_cpu_memory_data ),
304+
.addr_out_valid (memory_valid ),
305+
.addr_out (memory_addr ),
306+
.addr_out_ready (memory_ready ),
307+
.data_in (memory_data )
308+
);
309+
end
268310
endmodule

rtl_src/cache_directly_mapped.sv

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
`timescale 1ns/1ps
2+
//A cache to decouple memory access between different
3+
//basic block.
4+
//Remember that in this context none write the memory
5+
//hence no synchronization problem can happen.
6+
//1. memory is supplied with an address (filling addr_in,raising addr_in_valid)
7+
//2.a if memory has this content in cache answers positively raising addr_in_ready.
8+
//2.b otherwise memory relays memory request on addr_out using the same protocol.
9+
//
10+
module cache_directly_mapped #(
11+
parameter DWIDTH = 16,
12+
parameter CACHE_WIDTH_BITS = 5,
13+
parameter ADDR_WIDTH = 16
14+
)(
15+
input logic clk,
16+
input logic reset,
17+
input logic addr_in_valid,
18+
input logic [ADDR_WIDTH-1:0] addr_in,
19+
output logic addr_in_ready,
20+
output logic [DWIDTH-1:0] data_out,
21+
22+
output logic addr_out_valid,
23+
output logic [ADDR_WIDTH-1:0] addr_out,
24+
input logic addr_out_ready,
25+
input logic [DWIDTH-1:0] data_in
26+
27+
);
28+
localparam TAG_WIDTH = ADDR_WIDTH-CACHE_WIDTH_BITS;
29+
30+
(* dont_touch = "true" *) logic [DWIDTH-1:0] content [2**CACHE_WIDTH_BITS-1:0];
31+
(* dont_touch = "true" *) logic [TAG_WIDTH-1:0] tag [2**CACHE_WIDTH_BITS-1:0];
32+
(* dont_touch = "true" *) logic is_present [2**CACHE_WIDTH_BITS-1:0];
33+
logic is_present_i_next ;
34+
logic [DWIDTH-1:0] content_i_next ;
35+
logic [DWIDTH-1:0] data_out_saved, data_out_saved_next ;
36+
37+
typedef enum logic { S_IDLE, S_WRITE } State;
38+
State curState, nextState;
39+
40+
//decompose addr_in in tag and cache_line
41+
logic [CACHE_WIDTH_BITS-1:0 ] cache_line_in, cache_line_in_saved , cache_line_in_saved_next;
42+
logic [TAG_WIDTH-1:0] tag_in , tag_in_saved , tag_in_saved_next ;
43+
assign cache_line_in = addr_in[0+:CACHE_WIDTH_BITS];
44+
assign tag_in = addr_in[ADDR_WIDTH-1-:TAG_WIDTH];
45+
//compute hit signal
46+
logic hit;
47+
assign hit = (tag[cache_line_in] == tag_in && is_present[cache_line_in]) ;
48+
49+
always_ff @( posedge clk ) begin
50+
if(reset == 1'b1)
51+
begin
52+
curState <= S_IDLE;
53+
54+
for(int i=0; i<2**CACHE_WIDTH_BITS; i++)
55+
begin
56+
is_present[i] <= {1'b0};
57+
end
58+
59+
data_out_saved <= {(DWIDTH){1'b0}};
60+
end
61+
else
62+
begin
63+
curState <= nextState;
64+
cache_line_in_saved <= cache_line_in_saved_next;
65+
tag_in_saved <= tag_in_saved_next;
66+
if(curState == S_WRITE)
67+
begin
68+
tag [cache_line_in_saved] <= tag_in_saved;
69+
is_present [cache_line_in_saved] <= is_present_i_next;
70+
content [cache_line_in_saved] <= content_i_next;
71+
end
72+
data_out_saved <= data_out_saved_next;
73+
end
74+
end
75+
76+
//next state
77+
always_comb begin
78+
//default next state signals
79+
nextState = curState ;
80+
is_present_i_next = 1'b0 ;
81+
content_i_next = {(DWIDTH) {1'b0}};
82+
data_out_saved_next = {(DWIDTH) {1'b0}};
83+
cache_line_in_saved_next = cache_line_in_saved ;
84+
tag_in_saved_next = tag_in_saved ;
85+
case(curState)
86+
S_IDLE:
87+
begin
88+
data_out_saved_next = content[cache_line_in];
89+
if( addr_in_valid && ~hit)
90+
begin
91+
92+
93+
if(addr_out_ready)
94+
begin
95+
tag_in_saved_next = tag_in ;
96+
cache_line_in_saved_next = cache_line_in;
97+
nextState = S_WRITE;
98+
end
99+
end
100+
end
101+
S_WRITE:
102+
begin
103+
nextState = S_IDLE ;
104+
is_present_i_next = 1'b1 ;
105+
content_i_next = data_in ;
106+
107+
end
108+
endcase
109+
end
110+
111+
//output function
112+
always_comb begin
113+
//a response to addr_in_valid
114+
if( addr_in_valid == 1'b1) addr_in_ready = hit ;
115+
else addr_in_ready = 1'b0;
116+
//default output
117+
addr_out = {(ADDR_WIDTH){1'b0}} ;
118+
addr_out_valid = 1'b0 ;
119+
data_out = data_out_saved ;
120+
case(curState)
121+
S_IDLE:
122+
begin
123+
if( addr_in_valid && ~hit)
124+
begin
125+
addr_out = addr_in ;
126+
addr_out_valid = 1'b1 ;
127+
if(addr_out_ready)
128+
begin
129+
addr_in_ready = 1'b1 ;
130+
end
131+
end
132+
end
133+
S_WRITE:
134+
begin
135+
data_out = data_in ;
136+
end
137+
endcase
138+
end
139+
140+
endmodule

0 commit comments

Comments
 (0)