Skip to content

Commit 7edf398

Browse files
committed
Implement basic unit-stride vector load/store
Implement vle8_v, vle16_v, vle32_v, vse8_v, vse16_v, vse32_v. Using loop unrolling technique to handle a word at a time. The implementation assumes VLEN = 128. There are two types of illegal instructions: 1. When eew is narrower than csr_vl. Set vill in vtype to 1 and other bits to 0, set csr_vl to 0. 2. When LMUL > 1 and trying to access a vector register that is larger than 31. Use assert to handle this case.
1 parent c4725d5 commit 7edf398

File tree

1 file changed

+210
-6
lines changed

1 file changed

+210
-6
lines changed

src/rv32_template.c

+210-6
Original file line numberDiff line numberDiff line change
@@ -3201,21 +3201,126 @@ RVOP(
32013201

32023202
RVOP(
32033203
vle8_v,
3204-
{ V_NOP; },
3204+
{
3205+
uint8_t sew = 8 << ((rv->csr_vtype >> 3) & 0b111);
3206+
uint32_t addr = rv->X[ir->rs1];
3207+
3208+
if (ir->eew > sew) {
3209+
/* Illegal */
3210+
rv->csr_vtype = 0x80000000;
3211+
rv->csr_vl = 0;
3212+
return true;
3213+
} else {
3214+
uint8_t i = 0;
3215+
uint8_t j = 0;
3216+
for (uint32_t cnt = 0; rv->csr_vl - cnt >= 4;) {
3217+
i %= VREG_U32_COUNT;
3218+
/* Set illegal when trying to access vector register that is
3219+
* larger then 31.
3220+
*/
3221+
assert(ir->vd + j < 32);
3222+
/* Process full 32-bit words */
3223+
rv->V[ir->vd + j][i] = 0;
3224+
rv->V[ir->vd + j][i] |= rv->io.mem_read_b(rv, addr);
3225+
rv->V[ir->vd + j][i] |= rv->io.mem_read_b(rv, addr + 1) << 8;
3226+
rv->V[ir->vd + j][i] |= rv->io.mem_read_b(rv, addr + 2) << 16;
3227+
rv->V[ir->vd + j][i] |= rv->io.mem_read_b(rv, addr + 3) << 24;
3228+
cnt += 4;
3229+
i++;
3230+
3231+
/* Move to next vector register after filling VLEN */
3232+
if (!(cnt % (VREG_U32_COUNT << 2))) {
3233+
j++;
3234+
i = 0;
3235+
}
3236+
addr += 4;
3237+
}
3238+
/* Clear corresponding bits of eews */
3239+
if (rv->csr_vl % 4) {
3240+
rv->V[ir->vd + j][i] %= 0xFFFFFFFF << ((rv->csr_vl % 4) << 3);
3241+
}
3242+
/* Handle eews that is narrower then a word */
3243+
for (uint32_t cnt = 0; cnt < (rv->csr_vl % 4); cnt++) {
3244+
assert(ir->vd + j < 32); /* Illegal */
3245+
rv->V[ir->vd + j][i] |= rv->io.mem_read_b(rv, addr + cnt)
3246+
<< (cnt << 3);
3247+
}
3248+
}
3249+
},
32053250
GEN({
32063251
assert; /* FIXME: Implement */
32073252
}))
32083253

32093254
RVOP(
32103255
vle16_v,
3211-
{ V_NOP; },
3256+
{
3257+
uint8_t sew = 8 << ((rv->csr_vtype >> 3) & 0b111);
3258+
uint32_t addr = rv->X[ir->rs1];
3259+
3260+
if (ir->eew > sew) {
3261+
/* Illegal */
3262+
rv->csr_vtype = 0x80000000;
3263+
rv->csr_vl = 0;
3264+
return true;
3265+
} else {
3266+
uint8_t i = 0;
3267+
uint8_t j = 0;
3268+
for (uint32_t cnt = 0; rv->csr_vl - cnt >= 2;) {
3269+
i %= VREG_U32_COUNT;
3270+
assert(ir->vd + j < 32);
3271+
/* Process full 32-bit words */
3272+
rv->V[ir->vd + j][i] = 0;
3273+
rv->V[ir->vd + j][i] |= rv->io.mem_read_s(rv, addr);
3274+
rv->V[ir->vd + j][i] |= rv->io.mem_read_s(rv, addr + 2) << 16;
3275+
cnt += 2;
3276+
i++;
3277+
3278+
/* Move to next vector register after filling VLEN */
3279+
if (!(cnt % (VREG_U32_COUNT << 1))) {
3280+
j++;
3281+
i = 0;
3282+
}
3283+
addr += 4;
3284+
}
3285+
if (rv->csr_vl % 2) {
3286+
assert(ir->vd + j < 32); /* Illegal */
3287+
rv->V[ir->vd + j][i] |= rv->io.mem_read_s(rv, addr);
3288+
}
3289+
}
3290+
},
32123291
GEN({
32133292
assert; /* FIXME: Implement */
32143293
}))
32153294

32163295
RVOP(
32173296
vle32_v,
3218-
{ V_NOP; },
3297+
{
3298+
uint8_t sew = 8 << ((rv->csr_vtype >> 3) & 0b111);
3299+
uint32_t addr = rv->X[ir->rs1];
3300+
3301+
if (ir->eew > sew) {
3302+
/* Illegal */
3303+
rv->csr_vtype = 0x80000000;
3304+
rv->csr_vl = 0;
3305+
return true;
3306+
} else {
3307+
uint8_t i = 0;
3308+
uint8_t j = 0;
3309+
for (uint32_t cnt = 0; rv->csr_vl > cnt;) {
3310+
i %= VREG_U32_COUNT;
3311+
assert(ir->vd + j < 32);
3312+
rv->V[ir->vd + j][i] = rv->io.mem_read_w(rv, addr);
3313+
cnt += 1;
3314+
i++;
3315+
3316+
if (!(cnt % VREG_U32_COUNT)) {
3317+
j++;
3318+
i = 0;
3319+
}
3320+
addr += 4;
3321+
}
3322+
}
3323+
},
32193324
GEN({
32203325
assert; /* FIXME: Implement */
32213326
}))
@@ -4440,21 +4545,120 @@ RVOP(
44404545

44414546
RVOP(
44424547
vse8_v,
4443-
{ V_NOP; },
4548+
{
4549+
uint8_t sew = 8 << ((rv->csr_vtype >> 3) & 0b111);
4550+
uint32_t addr = rv->X[ir->rs1];
4551+
4552+
if (ir->eew > sew) {
4553+
/* Illegal */
4554+
rv->csr_vtype = 0x80000000;
4555+
rv->csr_vl = 0;
4556+
return true;
4557+
} else {
4558+
uint8_t i = 0;
4559+
uint8_t j = 0;
4560+
for (uint32_t cnt = 0; rv->csr_vl - cnt >= 4;) {
4561+
i %= VREG_U32_COUNT;
4562+
/* Set illegal when trying to access vector register that is
4563+
* larger then 31.
4564+
*/
4565+
assert(ir->vs3 + j < 32);
4566+
uint32_t tmp = rv->V[ir->vs3 + j][i];
4567+
/* Process full 32-bit words */
4568+
rv->io.mem_write_b(rv, addr, (tmp) & 0xff);
4569+
rv->io.mem_write_b(rv, addr + 1, (tmp >> 8) & 0xff);
4570+
rv->io.mem_write_b(rv, addr + 2, (tmp >> 16) & 0xff);
4571+
rv->io.mem_write_b(rv, addr + 3, (tmp >> 24) & 0xff);
4572+
cnt += 4;
4573+
i++;
4574+
4575+
/* Move to next vector register after filling VLEN */
4576+
if (!(cnt % (VREG_U32_COUNT << 2))) {
4577+
j++;
4578+
i = 0;
4579+
}
4580+
addr += 4;
4581+
}
4582+
/* Handle eews that is narrower then a word */
4583+
for (uint32_t cnt = 0; cnt < (rv->csr_vl % 4); cnt++) {
4584+
assert(ir->vs3 + j < 32); /* Illegal */
4585+
uint8_t tmp = (rv->V[ir->vs3 + j][i] >> (cnt << 3)) & 0xff;
4586+
rv->io.mem_write_b(rv, addr + cnt, tmp);
4587+
}
4588+
}
4589+
},
44444590
GEN({
44454591
assert; /* FIXME: Implement */
44464592
}))
44474593

44484594
RVOP(
44494595
vse16_v,
4450-
{ V_NOP; },
4596+
{
4597+
uint8_t sew = 8 << ((rv->csr_vtype >> 3) & 0b111);
4598+
uint32_t addr = rv->X[ir->rs1];
4599+
4600+
if (ir->eew > sew) {
4601+
/* Illegal */
4602+
rv->csr_vtype = 0x80000000;
4603+
rv->csr_vl = 0;
4604+
return true;
4605+
} else {
4606+
uint8_t i = 0;
4607+
uint8_t j = 0;
4608+
for (uint32_t cnt = 0; rv->csr_vl - cnt >= 2;) {
4609+
i %= VREG_U32_COUNT;
4610+
assert(ir->vs3 + j < 32);
4611+
uint32_t tmp = rv->V[ir->vs3 + j][i];
4612+
/* Process full 32-bit words */
4613+
rv->io.mem_write_s(rv, addr, (tmp) & 0xffff);
4614+
rv->io.mem_write_s(rv, addr + 2, (tmp >> 16) & 0xffff);
4615+
cnt += 2;
4616+
i++;
4617+
4618+
if (!(cnt % (VREG_U32_COUNT << 1))) {
4619+
j++;
4620+
i = 0;
4621+
}
4622+
addr += 4;
4623+
}
4624+
if (rv->csr_vl % 2) {
4625+
rv->io.mem_write_s(rv, addr, rv->V[ir->vs3 + j][i] & 0xffff);
4626+
}
4627+
}
4628+
},
44514629
GEN({
44524630
assert; /* FIXME: Implement */
44534631
}))
44544632

44554633
RVOP(
44564634
vse32_v,
4457-
{ V_NOP; },
4635+
{
4636+
uint8_t sew = 8 << ((rv->csr_vtype >> 3) & 0b111);
4637+
uint32_t addr = rv->X[ir->rs1];
4638+
4639+
if (ir->eew > sew) {
4640+
/* Illegal */
4641+
rv->csr_vtype = 0x80000000;
4642+
rv->csr_vl = 0;
4643+
return true;
4644+
} else {
4645+
uint8_t i = 0;
4646+
uint8_t j = 0;
4647+
for (uint32_t cnt = 0; rv->csr_vl > cnt;) {
4648+
i %= VREG_U32_COUNT;
4649+
assert(ir->vs3 + j < 32);
4650+
rv->io.mem_write_w(rv, addr, rv->V[ir->vs3 + j][i]);
4651+
cnt += 1;
4652+
i++;
4653+
4654+
if (!(cnt % (VREG_U32_COUNT))) {
4655+
j++;
4656+
i = 0;
4657+
}
4658+
addr += 4;
4659+
}
4660+
}
4661+
},
44584662
GEN({
44594663
assert; /* FIXME: Implement */
44604664
}))

0 commit comments

Comments
 (0)