Skip to content

Commit 32f23d8

Browse files
committed
Use consistent syntax for macro definitions and invocations
Most assemblers seems to support .macro definitions/invocations with and without commas between parameters. Some, however, require a comma. In the code base, we mix the two styles on the codebase -- sometimes even in the same file. This commit adjusts autogen to automatically adjust definitions and invocations of assembly macros to use comma-separated arguments. Signed-off-by: Hanno Becker <[email protected]>
1 parent c6d7c93 commit 32f23d8

File tree

4 files changed

+197
-111
lines changed

4 files changed

+197
-111
lines changed

dev/x86_64/src/intt.S

Lines changed: 48 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -24,17 +24,17 @@
2424

2525
#include "consts.h"
2626

27-
.macro shuffle8 r0,r1,r2,r3
27+
.macro shuffle8 r0, r1, r2, r3
2828
vperm2i128 $0x20,%ymm\r1,%ymm\r0,%ymm\r2
2929
vperm2i128 $0x31,%ymm\r1,%ymm\r0,%ymm\r3
3030
.endm
3131

32-
.macro shuffle4 r0,r1,r2,r3
32+
.macro shuffle4 r0, r1, r2, r3
3333
vpunpcklqdq %ymm\r1,%ymm\r0,%ymm\r2
3434
vpunpckhqdq %ymm\r1,%ymm\r0,%ymm\r3
3535
.endm
3636

37-
.macro shuffle2 r0,r1,r2,r3
37+
.macro shuffle2 r0, r1, r2, r3
3838
#vpsllq $32,%ymm\r1,%ymm\r2
3939
vmovsldup %ymm\r1,%ymm\r2
4040
vpblendd $0xAA,%ymm\r2,%ymm\r0,%ymm\r2
@@ -50,7 +50,7 @@ vpblendd $0xAA,%ymm\r1,%ymm\r0,%ymm\r3
5050
* |h'| < MONTMUL_BOUND
5151
* (See the end of this file for the exact value of MONTMUL_BOUND)
5252
*/
53-
.macro butterfly l,h,zl0=1,zl1=1,zh0=2,zh1=2
53+
.macro butterfly l, h, zl0=1, zl1=1, zh0=2, zh1=2
5454
vpsubd %ymm\l,%ymm\h,%ymm12
5555
vpaddd %ymm\h,%ymm\l,%ymm\l
5656

@@ -88,25 +88,25 @@ vpermq $0x1B,(MLD_AVX2_BACKEND_DATA_OFFSET_ZETAS_QINV+296-8*\off-8)*4(%rsi),%ym
8888
vpermq $0x1B,(MLD_AVX2_BACKEND_DATA_OFFSET_ZETAS+296-8*\off-8)*4(%rsi),%ymm15
8989
vmovshdup %ymm3,%ymm1
9090
vmovshdup %ymm15,%ymm2
91-
butterfly 4,5,1,3,2,15
91+
butterfly 4, 5, 1, 3, 2, 15
9292

9393
vpermq $0x1B,(MLD_AVX2_BACKEND_DATA_OFFSET_ZETAS_QINV+296-8*\off-40)*4(%rsi),%ymm3
9494
vpermq $0x1B,(MLD_AVX2_BACKEND_DATA_OFFSET_ZETAS+296-8*\off-40)*4(%rsi),%ymm15
9595
vmovshdup %ymm3,%ymm1
9696
vmovshdup %ymm15,%ymm2
97-
butterfly 6,7,1,3,2,15
97+
butterfly 6, 7, 1, 3, 2, 15
9898

9999
vpermq $0x1B,(MLD_AVX2_BACKEND_DATA_OFFSET_ZETAS_QINV+296-8*\off-72)*4(%rsi),%ymm3
100100
vpermq $0x1B,(MLD_AVX2_BACKEND_DATA_OFFSET_ZETAS+296-8*\off-72)*4(%rsi),%ymm15
101101
vmovshdup %ymm3,%ymm1
102102
vmovshdup %ymm15,%ymm2
103-
butterfly 8,9,1,3,2,15
103+
butterfly 8, 9, 1, 3, 2, 15
104104

105105
vpermq $0x1B,(MLD_AVX2_BACKEND_DATA_OFFSET_ZETAS_QINV+296-8*\off-104)*4(%rsi),%ymm3
106106
vpermq $0x1B,(MLD_AVX2_BACKEND_DATA_OFFSET_ZETAS+296-8*\off-104)*4(%rsi),%ymm15
107107
vmovshdup %ymm3,%ymm1
108108
vmovshdup %ymm15,%ymm2
109-
butterfly 10,11,1,3,2,15
109+
butterfly 10, 11, 1, 3, 2, 15
110110

111111
/*
112112
* Bounds: |ymm{i}| < 2q for i in 4, 6, 8, 10
@@ -129,15 +129,15 @@ vpermq $0x1B,(MLD_AVX2_BACKEND_DATA_OFFSET_ZETAS_QINV+168-8*\off-8)*4(%rsi),%ym
129129
vpermq $0x1B,(MLD_AVX2_BACKEND_DATA_OFFSET_ZETAS+168-8*\off-8)*4(%rsi),%ymm15
130130
vmovshdup %ymm3,%ymm1
131131
vmovshdup %ymm15,%ymm2
132-
butterfly 4,6,1,3,2,15
133-
butterfly 5,7,1,3,2,15
132+
butterfly 4, 6, 1, 3, 2, 15
133+
butterfly 5, 7, 1, 3, 2, 15
134134

135135
vpermq $0x1B,(MLD_AVX2_BACKEND_DATA_OFFSET_ZETAS_QINV+168-8*\off-40)*4(%rsi),%ymm3
136136
vpermq $0x1B,(MLD_AVX2_BACKEND_DATA_OFFSET_ZETAS+168-8*\off-40)*4(%rsi),%ymm15
137137
vmovshdup %ymm3,%ymm1
138138
vmovshdup %ymm15,%ymm2
139-
butterfly 8,10,1,3,2,15
140-
butterfly 9,11,1,3,2,15
139+
butterfly 8, 10, 1, 3, 2, 15
140+
butterfly 9, 11, 1, 3, 2, 15
141141

142142
/* Bounds: |ymm{i}| < 4q */
143143

@@ -146,55 +146,55 @@ vpermq $0x1B,(MLD_AVX2_BACKEND_DATA_OFFSET_ZETAS_QINV+104-8*\off-8)*4(%rsi),%ym
146146
vpermq $0x1B,(MLD_AVX2_BACKEND_DATA_OFFSET_ZETAS+104-8*\off-8)*4(%rsi),%ymm15
147147
vmovshdup %ymm3,%ymm1
148148
vmovshdup %ymm15,%ymm2
149-
butterfly 4,8,1,3,2,15
150-
butterfly 5,9,1,3,2,15
151-
butterfly 6,10,1,3,2,15
152-
butterfly 7,11,1,3,2,15
149+
butterfly 4, 8, 1, 3, 2, 15
150+
butterfly 5, 9, 1, 3, 2, 15
151+
butterfly 6, 10, 1, 3, 2, 15
152+
butterfly 7, 11, 1, 3, 2, 15
153153

154154
/* Bounds: |ymm{i}| < 8q */
155155

156156
/* level 3 */
157-
shuffle2 4,5,3,5
158-
shuffle2 6,7,4,7
159-
shuffle2 8,9,6,9
160-
shuffle2 10,11,8,11
157+
shuffle2 4, 5, 3, 5
158+
shuffle2 6, 7, 4, 7
159+
shuffle2 8, 9, 6, 9
160+
shuffle2 10, 11, 8, 11
161161

162162
vpermq $0x1B,(MLD_AVX2_BACKEND_DATA_OFFSET_ZETAS_QINV+72-8*\off-8)*4(%rsi),%ymm1
163163
vpermq $0x1B,(MLD_AVX2_BACKEND_DATA_OFFSET_ZETAS+72-8*\off-8)*4(%rsi),%ymm2
164-
butterfly 3,5
165-
butterfly 4,7
166-
butterfly 6,9
167-
butterfly 8,11
164+
butterfly 3, 5
165+
butterfly 4, 7
166+
butterfly 6, 9
167+
butterfly 8, 11
168168

169169
/* Bounds: |ymm{i}| < 16q */
170170

171171
/* level 4 */
172-
shuffle4 3,4,10,4
173-
shuffle4 6,8,3,8
174-
shuffle4 5,7,6,7
175-
shuffle4 9,11,5,11
172+
shuffle4 3, 4, 10, 4
173+
shuffle4 6, 8, 3, 8
174+
shuffle4 5, 7, 6, 7
175+
shuffle4 9, 11, 5, 11
176176

177177
vpermq $0x1B,(MLD_AVX2_BACKEND_DATA_OFFSET_ZETAS_QINV+40-8*\off-8)*4(%rsi),%ymm1
178178
vpermq $0x1B,(MLD_AVX2_BACKEND_DATA_OFFSET_ZETAS+40-8*\off-8)*4(%rsi),%ymm2
179-
butterfly 10,4
180-
butterfly 3,8
181-
butterfly 6,7
182-
butterfly 5,11
179+
butterfly 10, 4
180+
butterfly 3, 8
181+
butterfly 6, 7
182+
butterfly 5, 11
183183

184184
/* Bounds: |ymm{i}| < 32q */
185185

186186
/* level 5 */
187-
shuffle8 10,3,9,3
188-
shuffle8 6,5,10,5
189-
shuffle8 4,8,6,8
190-
shuffle8 7,11,4,11
187+
shuffle8 10, 3, 9, 3
188+
shuffle8 6, 5, 10, 5
189+
shuffle8 4, 8, 6, 8
190+
shuffle8 7, 11, 4, 11
191191

192192
vpbroadcastd (MLD_AVX2_BACKEND_DATA_OFFSET_ZETAS_QINV+7-\off)*4(%rsi),%ymm1
193193
vpbroadcastd (MLD_AVX2_BACKEND_DATA_OFFSET_ZETAS+7-\off)*4(%rsi),%ymm2
194-
butterfly 9,3
195-
butterfly 10,5
196-
butterfly 6,8
197-
butterfly 4,11
194+
butterfly 9, 3
195+
butterfly 10, 5
196+
butterfly 6, 8
197+
butterfly 4, 11
198198

199199
/* Bounds: |ymm{i}| < 64q */
200200

@@ -221,24 +221,24 @@ vmovdqa 896+32*\off(%rdi),%ymm11
221221
/* level 6 */
222222
vpbroadcastd (MLD_AVX2_BACKEND_DATA_OFFSET_ZETAS_QINV+3)*4(%rsi),%ymm1
223223
vpbroadcastd (MLD_AVX2_BACKEND_DATA_OFFSET_ZETAS+3)*4(%rsi),%ymm2
224-
butterfly 4,6
225-
butterfly 5,7
224+
butterfly 4, 6
225+
butterfly 5, 7
226226

227227
vpbroadcastd (MLD_AVX2_BACKEND_DATA_OFFSET_ZETAS_QINV+2)*4(%rsi),%ymm1
228228
vpbroadcastd (MLD_AVX2_BACKEND_DATA_OFFSET_ZETAS+2)*4(%rsi),%ymm2
229-
butterfly 8,10
230-
butterfly 9,11
229+
butterfly 8, 10
230+
butterfly 9, 11
231231

232232
/* Bounds: |ymm{i}| < 128q */
233233

234234
/* level 7 */
235235
vpbroadcastd (MLD_AVX2_BACKEND_DATA_OFFSET_ZETAS_QINV+0)*4(%rsi),%ymm1
236236
vpbroadcastd (MLD_AVX2_BACKEND_DATA_OFFSET_ZETAS+0)*4(%rsi),%ymm2
237237

238-
butterfly 4,8
239-
butterfly 5,9
240-
butterfly 6,10
241-
butterfly 7,11
238+
butterfly 4, 8
239+
butterfly 5, 9
240+
butterfly 6, 10
241+
butterfly 7, 11
242242

243243
/*
244244
* Bounds: |ymm{i}| < 256q for i in 4...7

dev/x86_64/src/ntt.S

Lines changed: 48 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -25,17 +25,17 @@
2525

2626
#include "consts.h"
2727

28-
.macro shuffle8 r0,r1,r2,r3
28+
.macro shuffle8 r0, r1, r2, r3
2929
vperm2i128 $0x20,%ymm\r1,%ymm\r0,%ymm\r2
3030
vperm2i128 $0x31,%ymm\r1,%ymm\r0,%ymm\r3
3131
.endm
3232

33-
.macro shuffle4 r0,r1,r2,r3
33+
.macro shuffle4 r0, r1, r2, r3
3434
vpunpcklqdq %ymm\r1,%ymm\r0,%ymm\r2
3535
vpunpckhqdq %ymm\r1,%ymm\r0,%ymm\r3
3636
.endm
3737

38-
.macro shuffle2 r0,r1,r2,r3
38+
.macro shuffle2 r0, r1, r2, r3
3939
#vpsllq $32,%ymm\r1,%ymm\r2
4040
vmovsldup %ymm\r1,%ymm\r2
4141
vpblendd $0xAA,%ymm\r2,%ymm\r0,%ymm\r2
@@ -54,7 +54,7 @@ vpblendd $0xAA,%ymm\r1,%ymm\r0,%ymm\r3
5454
* In conclusion, the magnitudes of all coefficients grow by at most q after
5555
* each layer.
5656
*/
57-
.macro butterfly l,h,zl0=1,zl1=1,zh0=2,zh1=2
57+
.macro butterfly l, h, zl0=1, zl1=1, zh0=2, zh1=2
5858
vpmuldq %ymm\zl0,%ymm\h,%ymm13
5959
vmovshdup %ymm\h,%ymm12
6060
vpmuldq %ymm\zl1,%ymm12,%ymm14
@@ -133,23 +133,23 @@ vmovdqa 896+32*\off(%rdi),%ymm11
133133

134134
/* Bounds: |ymm{i}| < q */
135135

136-
butterfly 4,8
137-
butterfly 5,9
138-
butterfly 6,10
139-
butterfly 7,11
136+
butterfly 4, 8
137+
butterfly 5, 9
138+
butterfly 6, 10
139+
butterfly 7, 11
140140

141141
/* Bounds: |ymm{i}| < 2q */
142142

143143
/* level 1 */
144144
vpbroadcastd (MLD_AVX2_BACKEND_DATA_OFFSET_ZETAS_QINV+2)*4(%rsi),%ymm1
145145
vpbroadcastd (MLD_AVX2_BACKEND_DATA_OFFSET_ZETAS+2)*4(%rsi),%ymm2
146-
butterfly 4,6
147-
butterfly 5,7
146+
butterfly 4, 6
147+
butterfly 5, 7
148148

149149
vpbroadcastd (MLD_AVX2_BACKEND_DATA_OFFSET_ZETAS_QINV+3)*4(%rsi),%ymm1
150150
vpbroadcastd (MLD_AVX2_BACKEND_DATA_OFFSET_ZETAS+3)*4(%rsi),%ymm2
151-
butterfly 8,10
152-
butterfly 9,11
151+
butterfly 8, 10
152+
butterfly 9, 11
153153

154154
/* Bounds: |ymm{i}| < 3q */
155155

@@ -177,47 +177,47 @@ vmovdqa 256*\off+224(%rdi),%ymm11
177177
vpbroadcastd (MLD_AVX2_BACKEND_DATA_OFFSET_ZETAS_QINV+4+\off)*4(%rsi),%ymm1
178178
vpbroadcastd (MLD_AVX2_BACKEND_DATA_OFFSET_ZETAS+4+\off)*4(%rsi),%ymm2
179179

180-
butterfly 4,8
181-
butterfly 5,9
182-
butterfly 6,10
183-
butterfly 7,11
180+
butterfly 4, 8
181+
butterfly 5, 9
182+
butterfly 6, 10
183+
butterfly 7, 11
184184

185-
shuffle8 4,8,3,8
186-
shuffle8 5,9,4,9
187-
shuffle8 6,10,5,10
188-
shuffle8 7,11,6,11
185+
shuffle8 4, 8, 3, 8
186+
shuffle8 5, 9, 4, 9
187+
shuffle8 6, 10, 5, 10
188+
shuffle8 7, 11, 6, 11
189189

190190
/* Bounds: |ymm{i}| < 4q */
191191

192192
/* level 3 */
193193
vmovdqa (MLD_AVX2_BACKEND_DATA_OFFSET_ZETAS_QINV+8+8*\off)*4(%rsi),%ymm1
194194
vmovdqa (MLD_AVX2_BACKEND_DATA_OFFSET_ZETAS+8+8*\off)*4(%rsi),%ymm2
195195

196-
butterfly 3,5
197-
butterfly 8,10
198-
butterfly 4,6
199-
butterfly 9,11
196+
butterfly 3, 5
197+
butterfly 8, 10
198+
butterfly 4, 6
199+
butterfly 9, 11
200200

201-
shuffle4 3,5,7,5
202-
shuffle4 8,10,3,10
203-
shuffle4 4,6,8,6
204-
shuffle4 9,11,4,11
201+
shuffle4 3, 5, 7, 5
202+
shuffle4 8, 10, 3, 10
203+
shuffle4 4, 6, 8, 6
204+
shuffle4 9, 11, 4, 11
205205

206206
/* Bounds: |ymm{i}| < 5q */
207207

208208
/* level 4 */
209209
vmovdqa (MLD_AVX2_BACKEND_DATA_OFFSET_ZETAS_QINV+40+8*\off)*4(%rsi),%ymm1
210210
vmovdqa (MLD_AVX2_BACKEND_DATA_OFFSET_ZETAS+40+8*\off)*4(%rsi),%ymm2
211211

212-
butterfly 7,8
213-
butterfly 5,6
214-
butterfly 3,4
215-
butterfly 10,11
212+
butterfly 7, 8
213+
butterfly 5, 6
214+
butterfly 3, 4
215+
butterfly 10, 11
216216

217-
shuffle2 7,8,9,8
218-
shuffle2 5,6,7,6
219-
shuffle2 3,4,5,4
220-
shuffle2 10,11,3,11
217+
shuffle2 7, 8, 9, 8
218+
shuffle2 5, 6, 7, 6
219+
shuffle2 3, 4, 5, 4
220+
shuffle2 10, 11, 3, 11
221221

222222
/* Bounds: |ymm{i}| < 6q */
223223

@@ -227,10 +227,10 @@ vmovdqa (MLD_AVX2_BACKEND_DATA_OFFSET_ZETAS+72+8*\off)*4(%rsi),%ymm2
227227
vpsrlq $32,%ymm1,%ymm10
228228
vmovshdup %ymm2,%ymm15
229229

230-
butterfly 9,5,1,10,2,15
231-
butterfly 8,4,1,10,2,15
232-
butterfly 7,3,1,10,2,15
233-
butterfly 6,11,1,10,2,15
230+
butterfly 9, 5, 1, 10, 2, 15
231+
butterfly 8, 4, 1, 10, 2, 15
232+
butterfly 7, 3, 1, 10, 2, 15
233+
butterfly 6, 11, 1, 10, 2, 15
234234

235235
/* Bounds: |ymm{i}| < 7q */
236236

@@ -239,15 +239,15 @@ vmovdqa (MLD_AVX2_BACKEND_DATA_OFFSET_ZETAS_QINV+104+8*\off)*4(%rsi),%ymm1
239239
vmovdqa (MLD_AVX2_BACKEND_DATA_OFFSET_ZETAS+104+8*\off)*4(%rsi),%ymm2
240240
vpsrlq $32,%ymm1,%ymm10
241241
vmovshdup %ymm2,%ymm15
242-
butterfly 9,7,1,10,2,15
243-
butterfly 8,6,1,10,2,15
242+
butterfly 9, 7, 1, 10, 2, 15
243+
butterfly 8, 6, 1, 10, 2, 15
244244

245245
vmovdqa (MLD_AVX2_BACKEND_DATA_OFFSET_ZETAS_QINV+104+8*\off+32)*4(%rsi),%ymm1
246246
vmovdqa (MLD_AVX2_BACKEND_DATA_OFFSET_ZETAS+104+8*\off+32)*4(%rsi),%ymm2
247247
vpsrlq $32,%ymm1,%ymm10
248248
vmovshdup %ymm2,%ymm15
249-
butterfly 5,3,1,10,2,15
250-
butterfly 4,11,1,10,2,15
249+
butterfly 5, 3, 1, 10, 2, 15
250+
butterfly 4, 11, 1, 10, 2, 15
251251

252252
/* Bounds: |ymm{i}| < 8q */
253253

@@ -256,25 +256,25 @@ vmovdqa (MLD_AVX2_BACKEND_DATA_OFFSET_ZETAS_QINV+168+8*\off)*4(%rsi),%ymm1
256256
vmovdqa (MLD_AVX2_BACKEND_DATA_OFFSET_ZETAS+168+8*\off)*4(%rsi),%ymm2
257257
vpsrlq $32,%ymm1,%ymm10
258258
vmovshdup %ymm2,%ymm15
259-
butterfly 9,8,1,10,2,15
259+
butterfly 9, 8, 1, 10, 2, 15
260260

261261
vmovdqa (MLD_AVX2_BACKEND_DATA_OFFSET_ZETAS_QINV+168+8*\off+32)*4(%rsi),%ymm1
262262
vmovdqa (MLD_AVX2_BACKEND_DATA_OFFSET_ZETAS+168+8*\off+32)*4(%rsi),%ymm2
263263
vpsrlq $32,%ymm1,%ymm10
264264
vmovshdup %ymm2,%ymm15
265-
butterfly 7,6,1,10,2,15
265+
butterfly 7, 6, 1, 10, 2, 15
266266

267267
vmovdqa (MLD_AVX2_BACKEND_DATA_OFFSET_ZETAS_QINV+168+8*\off+64)*4(%rsi),%ymm1
268268
vmovdqa (MLD_AVX2_BACKEND_DATA_OFFSET_ZETAS+168+8*\off+64)*4(%rsi),%ymm2
269269
vpsrlq $32,%ymm1,%ymm10
270270
vmovshdup %ymm2,%ymm15
271-
butterfly 5,4,1,10,2,15
271+
butterfly 5, 4, 1, 10, 2, 15
272272

273273
vmovdqa (MLD_AVX2_BACKEND_DATA_OFFSET_ZETAS_QINV+168+8*\off+96)*4(%rsi),%ymm1
274274
vmovdqa (MLD_AVX2_BACKEND_DATA_OFFSET_ZETAS+168+8*\off+96)*4(%rsi),%ymm2
275275
vpsrlq $32,%ymm1,%ymm10
276276
vmovshdup %ymm2,%ymm15
277-
butterfly 3,11,1,10,2,15
277+
butterfly 3, 11, 1, 10, 2, 15
278278

279279
/* Bounds: |ymm{i}| < 9q */
280280

0 commit comments

Comments
 (0)