-
Notifications
You must be signed in to change notification settings - Fork 13
/
Copy pathbiquad.v
236 lines (219 loc) · 7.65 KB
/
biquad.v
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
// Biquad IIR filter
//
// y(t) = u(t)*b0 + u(t-1)*b1 + u(t-2)*b2 + y(t-1)*(-a1) + y(t-2)*(-a2)
//
// Minimizes latency by computing all but u(t)*b0 at end of previous sample.
//
// Minimizes resource consumption by using internal
// DSP registers for between-sample storage.
//
// Writing a coefficient holds the filter in reset until address 7 is written.
// This allows usage of a consistent set of coefficients.
//
module biquad #(
parameter DATA_WIDTH = 28,
parameter DATA_COUNT = 1,
parameter COEFFICIENT_WIDTH = 25,
parameter DEBUG = "false"
) (
input sysClk,
input sysCoefficientStrobe,
input [2:0] sysCoefficientAddress,
input [COEFFICIENT_WIDTH-1:0] sysCoefficientValue,
input dataClk,
(*mark_debug=DEBUG*) input [(DATA_COUNT*DATA_WIDTH)-1:0] S_TDATA,
(*mark_debug=DEBUG*) input S_TVALID,
(*mark_debug=DEBUG*) output reg S_TREADY,
(*mark_debug=DEBUG*) output reg [(DATA_COUNT*DATA_WIDTH)-1:0] M_TDATA,
(*mark_debug=DEBUG*) output reg M_TVALID,
(*mark_debug=DEBUG*) input M_TREADY
);
localparam MAC_WIDEN = 4;
localparam MAC_WIDTH = DATA_WIDTH + COEFFICIENT_WIDTH + MAC_WIDEN;
// Coefficient dual-port RAM 0:b0, 1:b1, 2:b2, 3:-a2, 4:-a1
// Coefficient range [-2,2) -- i.e. two bits to the left of the binary point
reg [COEFFICIENT_WIDTH-1:0] coefficientRAM [0:4], coefficientRAMq;
reg sysReset = 1;
always @(posedge sysClk) begin
if (sysCoefficientStrobe) begin
if (sysCoefficientAddress <= 4) begin
coefficientRAM[sysCoefficientAddress] <= sysCoefficientValue;
sysReset <= 1;
end else if (sysCoefficientAddress == 7) begin
sysReset <= 0;
end
end
end
// I/O history
(*mark_debug=DEBUG*) reg [(DATA_COUNT*DATA_WIDTH)-1:0] u, uOld = 0, yOld = 0;
// MAC parameter input multiplexer
reg [2:0] state = 0;
wire [(DATA_COUNT*DATA_WIDTH)-1:0] parameterMux = (state == 1) ? u :
(state == 2) ? u :
(state == 3) ? uOld :
(state == 4) ? yOld : M_TDATA;
// Move sysReset to our clock domain
wire reset;
reg_tech_cdc reset_cdc(.I(sysReset), .C(dataClk), .O(reset));
// Computation state machine
reg enMAC = 0, ldMAC = 0;
always @(posedge dataClk) begin
coefficientRAMq <= coefficientRAM[state];
end
always @(posedge dataClk) begin
if (reset) begin
state <= 0;
u <= 0;
uOld <= 0;
yOld <= 0;
S_TREADY <= 0;
M_TVALID <= 0;
end
else begin
case (state)
0: begin
if (S_TVALID && S_TREADY) begin
u <= S_TDATA;
S_TREADY <= 0;
enMAC <= 1;
state <= 1;
end
else begin
S_TREADY <= 1;
end
end
1: begin // MAC inputs: u(t), b0
// Multiplier inputs: y(t-1), -a1
// Accumulator input: y(t-2)*-a2
// Clip input: u(t-2)*b2 + u(t-1)*b1
// M_TDATA: u(t-1)*b1
state <= 2;
end
2: begin // MAC inputs: u(t-1), b1 for next cycle
// Multiplier inputs: u(t), b0
// Accumulator inputx: y(t-1)*-a1
// Clip input: y(t-2)*-a2 + u(t-2)*b2 + u(t-1)*b1
// M_TDATA: u(t-2)*b2 + u(t-1)*b1
ldMAC <= 1;
state <= 3;
end
3: begin // MAC inputs: u(t-2), b2 for next cycle
// Multiplier inputs: u(t-1), b1 for next cycle
// Accumulator input: u(t)*b0
// Clip input: y(t-1)*a1 + y(t-2)*-a2 + u(t-2)*b2 + u(t-1)*b1
// M_TDATA: y(t-2)*-a2 + u(t-2)*b2 + u(t-1)*b1
// sload = 1
ldMAC <= 0;
uOld <= u;
state <= 4;
end
4: begin // MAC inputs: y(t-2), -a2 for next cycle
// Multiplier inputs: u(t-2), b2 for next cycle
// Accumulator input: u(t-1)*b1 for next cycle
// Clip input: u(t)*b0+y(t-1)*a1+y(t-2)*-a2+u(t-2)*b2+u(t-1)*b1
// M_TDATA: y(t-1)*a1+y(t-2)*-a2+u(t-2)*b2+u(t-1)*b1
// sload_reg = 1
M_TVALID <= 1;
state <= 5;
end
5: begin // MAC inputs: y(t-1), -a1 for next cycle
// Multiplier inputs: y(t-2), -a2 for next cycle
// Accumulator input: u(t-2), b2 for next cycle
// Clip input: u(t-1)*b1 for next cycle
// M_TDATA: u(t)*b0+y(t-1)*a1+y(t-2)*-a2+u(t-2)*b2+u(t-1)*b1
// M_TVALID = 1
enMAC <= 0;
yOld <= M_TDATA;
if (M_TREADY) begin
M_TVALID <= 0;
S_TREADY <= 1;
state <= 0;
end
end
default: begin
enMAC <= 0;
ldMAC <= 0;
S_TREADY <= 0;
M_TVALID <= 0;
state <= 0;
end
endcase
end
end
///////////////////////////////////////////////////////////////////////////////
// Per-lane computation
genvar i;
generate
for (i = 0 ; i < DATA_COUNT ; i = i + 1) begin
// Instantiate multiply-accumulate module
// Module doesn't provide a reset port so fake one by enabling
// the module in 'load' mode with coefficients all 0.
wire [MAC_WIDTH-1:0] accum_out;
macc # (.SIZEA(DATA_WIDTH),
.SIZEB(COEFFICIENT_WIDTH),
.SIZEOUT(MAC_WIDTH))
macc_i (
.clk(dataClk),
.ce(reset || enMAC),
.sload(reset || ldMAC),
.a(reset ? {DATA_WIDTH{1'b0}} : parameterMux[i*DATA_WIDTH+:DATA_WIDTH]),
.b(coefficientRAMq),
.accum_out(accum_out));
// Clip accumulated result
// The '-2' on the input width and input bit selection accounts
// for the fact that the coefficient range is [-2,2).
wire [DATA_WIDTH-1:0] accum_out_clipped;
reduceWidth #(.IWIDTH(MAC_WIDTH-(COEFFICIENT_WIDTH-2)),
.OWIDTH(DATA_WIDTH))
clipMAC (.I(accum_out[MAC_WIDTH-1:COEFFICIENT_WIDTH-2]),
.O(accum_out_clipped));
always @(posedge dataClk) begin
M_TDATA[i*DATA_WIDTH+:DATA_WIDTH] <= accum_out_clipped;
end
end
endgenerate
endmodule
///////////////////////////////////////////////////////////////////////////////
// Multiply-accumulate unit
// Template from Vivado
module macc #(
parameter SIZEA = 25,
SIZEB = 28,
SIZEOUT = 55
) (
input clk,
input ce,
input sload,
input signed [SIZEA-1:0] a,
input signed [SIZEB-1:0] b,
output signed [SIZEOUT-1:0] accum_out
);
// Declare registers for intermediate values
reg signed [SIZEA-1:0] a_reg;
reg signed [SIZEB-1:0] b_reg;
reg sload_reg;
reg signed [SIZEA+SIZEB-1:0] mult_reg;
reg signed [SIZEOUT-1:0] adder_out, old_result;
always @(sload_reg or adder_out)
begin
if (sload_reg)
old_result <= 0;
else
// 'sload' is now and opens the accumulation loop.
// The accumulator takes the next multiplier output
// in the same cycle.
old_result <= adder_out;
end
always @(posedge clk)
if (ce)
begin
a_reg <= a;
b_reg <= b;
mult_reg <= a_reg * b_reg;
sload_reg <= sload;
// Store accumulation result into a register
adder_out <= old_result + mult_reg;
end
// Output accumulation result
assign accum_out = adder_out;
endmodule