Skip to content

Commit 37f801b

Browse files
committed
phasedaccu: break into a pipelined version
1 parent 4d10976 commit 37f801b

File tree

2 files changed

+98
-48
lines changed

2 files changed

+98
-48
lines changed

misoc/cores/duc.py

Lines changed: 97 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -205,8 +205,6 @@ def __init__(self, width, constants):
205205

206206
###
207207

208-
self.latency = 2 if n > 8 else 0
209-
210208
# TODO: improve MCM
211209
assert n <= 16
212210
assert range(n) == constants
@@ -215,11 +213,39 @@ def __init__(self, width, constants):
215213

216214
# manually generated multiplication for small numbers,
217215
# if you use "x*y" Vivado will use a DSP48E1 instead
218-
for x in range(n):
219-
ctx += o[x].eq((x & 0x1) * i +
220-
((x & 0x2) >> 1) * (i << 1) +
221-
((x & 0x4) >> 2) * (i << 2) +
222-
((x & 0x8) >> 3) * (i << 3))
216+
ctx = self.comb
217+
if n > 0:
218+
ctx += o[0].eq(0)
219+
if n > 1:
220+
ctx += o[1].eq(i)
221+
if n > 2:
222+
ctx += o[2].eq(i << 1)
223+
if n > 3:
224+
ctx += o[3].eq(i + (i << 1))
225+
if n > 4:
226+
ctx += o[4].eq(i << 2)
227+
if n > 5:
228+
ctx += o[5].eq(i + (i << 2))
229+
if n > 6:
230+
ctx += o[6].eq(o[3] << 1)
231+
if n > 7:
232+
ctx += o[7].eq((i << 3) - i)
233+
if n > 8:
234+
ctx += o[8].eq(i << 3)
235+
if n > 9:
236+
ctx += o[9].eq(i + (i << 3))
237+
if n > 10:
238+
ctx += o[10].eq(o[5] << 1)
239+
if n > 11:
240+
ctx += o[11].eq(i + (i << 3) + (i << 1))
241+
if n > 12:
242+
ctx += o[12].eq(o[6] << 1)
243+
if n > 13:
244+
ctx += o[13].eq(i + (i << 3) + (i << 2))
245+
if n > 14:
246+
ctx += o[14].eq(o[7] << 1)
247+
if n > 15:
248+
ctx += o[15].eq((i << 4) - i)
223249

224250

225251
class PhasedAccu(Module):
@@ -239,53 +265,77 @@ def __init__(self, n, fwidth, pwidth):
239265
self.z = [Signal(pwidth, reset_less=True)
240266
for _ in range(n)]
241267

242-
self.submodules.mcm = MCM(fwidth, range(n+1))
268+
self.submodules.mcm = MCM(fwidth, range(n))
243269
# reset by clr
244270
qa = Signal(fwidth, reset_less=True)
245271
qb = Signal(fwidth, reset_less=True)
246272
clr_d = Signal(reset_less=True)
247273

248-
if n > 8:
249-
# additional pipelining for n > 8
250-
clr_d2 = Signal(reset_less=True)
251-
mcm_o_d = [Signal(fwidth, reset_less=True) for _ in range(n)]
252-
self.sync += [
253-
# Delay signals to match now increased mcm latency
254-
clr_d.eq(self.clr),
255-
clr_d2.eq(clr_d),
256-
[mcm_o_d[i].eq(self.mcm.o[i]) for i in range(n)],
257-
258-
qa.eq(qa + self.mcm.o[n]),
259-
self.mcm.i.eq(self.f),
260-
If(clr_d | clr_d2,
261-
qa.eq(0),
262-
),
263-
If(clr_d2,
264-
self.mcm.i.eq(0),
265-
),
266-
qb.eq(qa + (self.p << (fwidth - pwidth))),
274+
self.sync += [
275+
clr_d.eq(self.clr),
276+
qa.eq(qa + (self.f << log2_int(n))),
277+
self.mcm.i.eq(self.f),
278+
If(self.clr | clr_d,
279+
qa.eq(0),
280+
),
281+
If(clr_d,
282+
self.mcm.i.eq(0),
283+
),
284+
qb.eq(qa + (self.p << (fwidth - pwidth))),
267285

268-
# Use delayed signals in the final phase calculation
269-
[z.eq((qb + mcm_o_d[i])[fwidth - pwidth:])
270-
for i, z in enumerate(self.z)]
271-
]
272-
else:
273-
self.sync += [
274-
clr_d.eq(self.clr),
275-
qa.eq(qa + (self.f << log2_int(n))),
276-
self.mcm.i.eq(self.f),
277-
If(self.clr | clr_d,
278-
qa.eq(0),
279-
),
280-
If(clr_d,
281-
self.mcm.i.eq(0),
282-
),
283-
qb.eq(qa + (self.p << (fwidth - pwidth))),
286+
# Use non-delayed signals in the final phase calculation
287+
[z.eq((qb + oi)[fwidth - pwidth:])
288+
for oi, z in zip(self.mcm.o, self.z)]
289+
]
284290

285-
# Use non-delayed signals in the final phase calculation
286-
[z.eq((qb + oi)[fwidth - pwidth:])
287-
for oi, z in zip(self.mcm.o, self.z)]
288-
]
291+
class PhasedAccuPipelined(Module):
292+
"""Phase accumulator with multiple phased outputs.
293+
294+
Output data (across cycles and outputs) is such
295+
that there is always one frequency word offset between successive
296+
phase samples.
297+
298+
* Input frequency, phase offset, clear
299+
* Output `n` phase samples per cycle
300+
"""
301+
def __init__(self, n, fwidth, pwidth):
302+
self.f = Signal(fwidth)
303+
self.p = Signal(pwidth)
304+
self.clr = Signal(reset=1)
305+
self.z = [Signal(pwidth, reset_less=True)
306+
for _ in range(n)]
307+
308+
self.submodules.mcm = MCM(fwidth, range(n+1))
309+
# reset by clr
310+
qa = Signal(fwidth, reset_less=True)
311+
qb = Signal(fwidth, reset_less=True)
312+
clr_d = Signal(reset_less=True)
313+
314+
clr_d2 = Signal(reset_less=True)
315+
mcm_o_reg = [Signal(fwidth, reset_less=True) for _ in range(n+1)]
316+
mcm_o_d = [Signal(fwidth, reset_less=True) for _ in range(n)]
317+
self.sync += [
318+
# extra register layer to accomodate latency
319+
[mcm_o_reg[i].eq(self.mcm.o[i]) for i in range(n+1)],
320+
# Delay signals to match now increased mcm latency
321+
clr_d.eq(self.clr),
322+
clr_d2.eq(clr_d),
323+
[mcm_o_d[i].eq(mcm_o_reg[i]) for i in range(n)],
324+
325+
qa.eq(qa + mcm_o_reg[n]),
326+
self.mcm.i.eq(self.f),
327+
If(clr_d | clr_d2,
328+
qa.eq(0),
329+
),
330+
If(clr_d2,
331+
self.mcm.i.eq(0),
332+
),
333+
qb.eq(qa + (self.p << (fwidth - pwidth))),
334+
335+
# Use delayed signals in the final phase calculation
336+
[z.eq((qb + mcm_o_d[i])[fwidth - pwidth:])
337+
for i, z in enumerate(self.z)]
338+
]
289339

290340
class PhaseModulator(Module):
291341
"""Complex phase modulator/shifter.

misoc/test/test_duc.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -107,7 +107,7 @@ def gen():
107107

108108
class TestPhasedAccuNonLog(unittest.TestCase):
109109
def setUp(self):
110-
self.dut = duc.PhasedAccu(n=12, fwidth=32, pwidth=16)
110+
self.dut = duc.PhasedAccuPipelined(n=12, fwidth=32, pwidth=16)
111111

112112
def test_init(self):
113113
self.assertEqual(len(self.dut.f), 32)

0 commit comments

Comments
 (0)