From 99a19249475a4c81971255836e6e31a82f028376 Mon Sep 17 00:00:00 2001 From: marvintau Date: Fri, 21 Oct 2022 16:45:24 +0800 Subject: [PATCH 1/3] done with frontend & backend --- .metals/metals.lock.db | 6 + .metals/metals.mv.db | Bin 0 -> 57344 bytes src/main/scala/nutcore/Bundle.scala | 2 + src/main/scala/nutcore/NutCore.scala | 13 +- .../nutcore/backend/BackendCommons.scala | 43 ++ .../Backend.scala => dynamic/Dynamic.scala} | 70 +-- .../nutcore/backend/{ooo => dynamic}/EP.scala | 5 +- .../backend/{ooo => dynamic}/ROB.scala | 5 +- .../nutcore/backend/{ooo => dynamic}/RS.scala | 4 +- src/main/scala/nutcore/backend/fu/ALU.scala | 2 + .../backend/{seq => sequential}/EXU.scala | 0 .../backend/{seq => sequential}/ISU.scala | 0 .../backend/sequential/Sequential.scala | 44 ++ .../backend/{seq => sequential}/WBU.scala | 0 src/main/scala/nutcore/frontend/BPU.scala | 476 ------------------ src/main/scala/nutcore/frontend/Dynamic.scala | 48 ++ .../scala/nutcore/frontend/Embedded.scala | 36 ++ .../scala/nutcore/frontend/Frontend.scala | 124 ----- .../nutcore/frontend/FrontendCommons.scala | 46 ++ .../scala/nutcore/frontend/Sequential.scala | 45 ++ .../nutcore/frontend/decode/Decode.scala | 66 +++ .../{IDU.scala => decode/DecodeUnit.scala} | 64 +-- .../{ => instr_align_buffer}/IBF.scala | 0 .../{ => instr_align_buffer}/NaiveIBF.scala | 0 .../{IFU.scala => instr_fetch/Dynamic.scala} | 179 +------ .../frontend/instr_fetch/Embedded.scala | 63 +++ .../instr_fetch/InstrFetchCommons.scala | 41 ++ .../frontend/instr_fetch/Sequential.scala | 102 ++++ .../branch_predict/BranchPredictCommons.scala | 60 +++ .../instr_fetch/branch_predict/Dummy.scala | 28 ++ .../instr_fetch/branch_predict/Dynamic.scala | 143 ++++++ .../instr_fetch/branch_predict/Embedded.scala | 97 ++++ .../instr_fetch/branch_predict/Legacy.scala | 39 ++ .../branch_predict/Sequential.scala | 162 ++++++ src/main/scala/nutcore/isa/Priviledged.scala | 16 +- src/main/scala/nutcore/isa/RVI.scala | 2 + .../scala/nutcore/mem/{ => cache}/Cache.scala | 0 .../nutcore/utils/WritebackDelayer.scala | 2 + 38 files changed, 1124 insertions(+), 909 deletions(-) create mode 100644 .metals/metals.lock.db create mode 100644 .metals/metals.mv.db create mode 100644 src/main/scala/nutcore/backend/BackendCommons.scala rename src/main/scala/nutcore/backend/{ooo/Backend.scala => dynamic/Dynamic.scala} (92%) rename src/main/scala/nutcore/backend/{ooo => dynamic}/EP.scala (93%) rename src/main/scala/nutcore/backend/{ooo => dynamic}/ROB.scala (99%) rename src/main/scala/nutcore/backend/{ooo => dynamic}/RS.scala (99%) rename src/main/scala/nutcore/backend/{seq => sequential}/EXU.scala (100%) rename src/main/scala/nutcore/backend/{seq => sequential}/ISU.scala (100%) create mode 100644 src/main/scala/nutcore/backend/sequential/Sequential.scala rename src/main/scala/nutcore/backend/{seq => sequential}/WBU.scala (100%) delete mode 100644 src/main/scala/nutcore/frontend/BPU.scala create mode 100644 src/main/scala/nutcore/frontend/Dynamic.scala create mode 100644 src/main/scala/nutcore/frontend/Embedded.scala delete mode 100644 src/main/scala/nutcore/frontend/Frontend.scala create mode 100644 src/main/scala/nutcore/frontend/FrontendCommons.scala create mode 100644 src/main/scala/nutcore/frontend/Sequential.scala create mode 100644 src/main/scala/nutcore/frontend/decode/Decode.scala rename src/main/scala/nutcore/frontend/{IDU.scala => decode/DecodeUnit.scala} (77%) rename src/main/scala/nutcore/frontend/{ => instr_align_buffer}/IBF.scala (100%) rename src/main/scala/nutcore/frontend/{ => instr_align_buffer}/NaiveIBF.scala (100%) rename src/main/scala/nutcore/frontend/{IFU.scala => instr_fetch/Dynamic.scala} (56%) create mode 100644 src/main/scala/nutcore/frontend/instr_fetch/Embedded.scala create mode 100644 src/main/scala/nutcore/frontend/instr_fetch/InstrFetchCommons.scala create mode 100644 src/main/scala/nutcore/frontend/instr_fetch/Sequential.scala create mode 100644 src/main/scala/nutcore/frontend/instr_fetch/branch_predict/BranchPredictCommons.scala create mode 100644 src/main/scala/nutcore/frontend/instr_fetch/branch_predict/Dummy.scala create mode 100644 src/main/scala/nutcore/frontend/instr_fetch/branch_predict/Dynamic.scala create mode 100644 src/main/scala/nutcore/frontend/instr_fetch/branch_predict/Embedded.scala create mode 100644 src/main/scala/nutcore/frontend/instr_fetch/branch_predict/Legacy.scala create mode 100644 src/main/scala/nutcore/frontend/instr_fetch/branch_predict/Sequential.scala rename src/main/scala/nutcore/mem/{ => cache}/Cache.scala (100%) diff --git a/.metals/metals.lock.db b/.metals/metals.lock.db new file mode 100644 index 000000000..2ded17e26 --- /dev/null +++ b/.metals/metals.lock.db @@ -0,0 +1,6 @@ +#FileLock +#Fri Oct 21 14:12:57 CST 2022 +server=localhost\:40269 +hostName=localhost +method=file +id=183f92c5bf49f17491becab8aaffdbd093a600b4d36 diff --git a/.metals/metals.mv.db b/.metals/metals.mv.db new file mode 100644 index 0000000000000000000000000000000000000000..585dd68914f79ff802bc1b2c5da827d0a257fae9 GIT binary patch literal 57344 zcmeG_TW}=TRXwB8T9J00cz3;7JAU-euI&|`h<-oDsj#D&)@owCJTsDaA!O*+ti@_2 zA!(geiYgYIfD@;nDkiDeak)$&=7WObQ9KIxz!avaN>z$-1p+@%KtAAuDk@(Te1y~Y zG5wfssil=zd41<&ZuiXXd(S=R_PytI&zY`D6=!A5y-v&a^5^`jC<}x8Ru<{f*7#wNIN+U>IN+U>IN+ zU>IN+U>IN+U>IN+U>G<^3{WFK^#NG%SF2*>hP_^uY$$G3Wml-I`nR^Ka%J7V=5K(x`aE5GuCSI*|H{Lc6P;4HvY zd~v8+;6}Oo5H=@VQhKEH@4Mk|*%Qm&u>f-u;(fS~@GU7+eCOc8{YC*(#wFevdL^KY zR9RVm(Vqpfn1+C^7YI!d#SBC-(QpXKOhPh^kPJ~wksVLzlS~%H%FWfAo4!}uS|Yw2 zUZ03=B_N`xxN>F9b#JcQtL`>X{tW`0M}Tt@z=?*G4V`Zj=sW_QLZDO7e9|;e3!#&g z0qFcbbRLGzmciM@OB$c@BP@0OB>;87ta12uvOn6J}m8$RRp5xNzwrpp` zpOEzQ=X$oJN`iu(Q+?aDMbnP;XQV#|{TZeJZbAe7VK@QZ*u5wLr>7aV(!>45;Uv#V z>Ca{NhsP(&>CeT+X9%(*_rzlepWWy%|1w(jWo zF-(0efz1Boc0JKIV^oOtmTPW1l3^GlVW%3@ksAl^v1w91odO68m4~#8YqXR)I@Sh;YZ1o!u{bJ zk-$Rk$Qa0-601o@ME)Zq!gmtE61mf(-Nl-e<>+7fhtVLm`jhj1bl-oQ;6`?3Xb-5@iMNun? zMp1MMPEqU?{DNMT%%Ws--0(BUw}-gTV%J>$!=#R z_TuPXC*ZwK!fXOCaB-GSL@b@Es3zeLR9({GjmP{!z~IR#`?1^Kk9E8sn}QbI^W@(o z5E=wRU}9pdObUbwhESu7j1p&LB(zm3qAe0YO<&SACkki^TjTTrH3(ayNfAKh-rEFF zlK`qD7>5)Mjl>~g75TbvLER=Ho6$tN*9u))i_D6A^c*PAG2FpMAz;#Ja! zuad#p4fx2WNE9=UqY)rOuz+(X!anSvfu2T*EsE0D30Nut%Si0mG5J0QmLyU6p03_1 z-eg3yI%vLUQn*BsVlM?eXF78xfkFavQ({j7g#>|IvR!2?fD9CTzz)(nfkHw< zzXO_NR}sfB99U67+A&SnAEU(91W;6Pw_$1j8m!}g!4+Qn4&1}i{sX+!7ycJ7^+R9A zOMU6#)A!;k&&I?sz%altz%altz%altz%altfH8pO|4=nb#WEv2Pw$DI;LANNl-dvO zY83aM)OaFm@;-H&bOe3fz3vCGHaEEFE) zjs=DrQNV?W3ker8E)-m-xX^H+?4nQ3dbS~kRB@O*0 zqi+>`<6m@iZJ=)xeLLveMc+O)JQAZyhMhEK{qwQyDr&FQ90{iF;!1oZR7xHT(^7iRxY_is2d_J3n%`@g~L|HRwmF53T}X7+#Z zb|4F8%vdin`+vgzuNEaOaLGFHF8C!<`~U7X4#a*<7#X$>V&h=84eE4ykJ~o>XgAwN z)KU<0=GebkqZ)Hb2rUQ3E=G=C|0DK}x{-sL_(L%(ZiCU&oN4dK2PF-U>0T#pV1YrV zF|eW}EHK!jLaZ*>`2o3vW?Nc7Ci(yx(Yg2UCnxsTA@~~4b@7uU=oNxP>7es&U-nj4}1W4O$@$>q=pVK@VEO+V_m|wsBAoxFbx3M|Bqcd_OD#=EH^@IWsE0aSVAty40{M`S zXgUN`a}9L_LZTs8@!$ga#N!Kau0|I@v@~=PxOd|Vh-VgDpom|=1qwPfVvvw!BL?y5 z9)T|51i0^v__pG@iZ8(h4x2SXLz<0H?skL*a~5%?>sYL zPqQyigHmD$Fr^^SUmIR{h#5L-uV0vi7;z96&foA?UH?L`ARIUhdulM>ydE73-c|bf zc%DJdLiklQK8`E6_x*ffEb2c7kz`BUBq)2wKk*5EYGwPg_BP*!M7B3g`vY<6msGggC}YS)7JvfugAtEc|tpHA|%bC&pA9c9bKk8ECUKR0uAN$hs< zuU*v8=EUaDtdw5;-tYbAFlpewp@EnG@QF&TY)PHAzX4k)?=5e-YjCk0wDIXBsoTYW zbo+k<%}oCKpa1@E43TEO_{yuN{`Q@R&ezUc^8Bal4SsX`hO@SUpm=^s?snm~F8K{T z{8zvA=C=#v;cvmi|Nf6g>16GKr7TP>uU_*v);E?{x6tEPmXvNc`Xd^{KY#iAXUOBf z3y=TJADia`FTC=|&lS-TJy=zXkNg92Wf}UNuSAdN-ERyXjU#&axSxa%;jtrnsUcp8 z2M9Z&SEYPp?5G_(qL&CIe2;|ab#^|Y=ddGscm0Uo@s~<0L@#sE%NL>-qVyWTi+uN# zKFl4bmi=r?KKAf;_Tk}w)XmT=Aa5AQKX&>Jg^=Aba&W5Kckk$$rzvDf2OsJzA3OL) zVh<9-`5yg#l7{mz+H&6$C~7zl0uSOCv684L@>tYx9*rFv4KRp*$C(gfuJS0r;MZ}0 z!9V+DkU%bR#nW|M2J#2ogZu#{IQBgAT~x9!h5?2Fh5?2Fh5?2Fh5?2FhJm|*0c`&d z?eC%Wyl?+^4Y2?Fhid<)(aCA@7-aw7Ynm7edKhW^zzqMSA+Z0Se&O^x-0%hP{~sz1 zAG>elL!)EIPu&0Ek34Yl)aeJ$eDv%?4}a{DM<08fKUY3~fjh@Pd8KW2_ zVsoa!gS=7Z%O^fxz`yc~ZL7sMr}(+~4sTs)wmWUU+^&`Rc5SA^KPBo$&y&rb?5M8o z*#1-ebgezjKh-lu&2?4VFm>C6Bsq$%8;0hZh7IxOeI3%JX{OzCd|mV$LleE((N5&<-ys$Vq)2x>#$~|P0>TV*NRQCmJ zlPX^v4Uke^UftZXS5|zl3u(fxm-*&g$GQvy3Pv-xI5Shh)d+3Me0{pss?|GCW2x4< z+MK(5Q5D8XCC}euQ9|yDno1bd7+MV&I;2nF7zU>+BSi@Sd0PeZ^)oy!! zu~oO|`_-|obh?fC`r>RbK^9xhlsVTYGnt}jo#}RJt;^P~kBp9A1dRMt?wMgGFp&ay zc@XCDahMptk})v`o5-FN;Y{fQB(aH*((8_M!7Tp>5az922=n}W>)H0S1&AC3lPrOQ z9S^Q@NfL$4?n2Vh@uz`4Kb@OCQwjQn9i_mZMyU!=>a`3??bKK#$AMfp&Gk=BH|XIz(PG{<1Y^wSXIM@RL{j3|OtrH(~efG&@)Mw)Na1Y{XWC zg=k-GcdS{U5B{gC)=gQ3jNe^xQZc$pO;x)!O*Oirp^2g~rCZt*O#Q?wa#?P z{+X;6ym9@>MrS|F>qh|XKS9ur=p32U15+xwG!4LBM%~2b)uTX#zs^{qlF#RwTmZz> ze9LNHo(qJei>S*SQw$z!SyL9sa&-%&_`U=ko!n%)f_T+{b-!cr^;)}LYgm9xkiP5B zmL3CaIk_ikK6F(8n1F^*5~7cn9tWg-h*JYOtfo%E* zy>|(dQ>v~CQdcuXt*aRITGtR|z1yfs=9HwEa$Sn-UGVmnVOM&YE-*Q^rFk_-7rQA$gi*V(`cEl~nT zGe{qU)0Ou6DV-zHHo~Z&VNy`Y&K8+?^eL< z|1s~DK`nb!JIw6=$UlJD|2u*ALN4_yv;Si+&gg5z?ElRE&+PxZIl(gfKlR?uY4>FI ze`fy=UCXiIo!S5UCmoXRGR*#ukBUSL{v!C#L~cL8_J8iN2ZsyUc}A~2_?hDWLl%;+ zJpgF{`&meKk5z(Wmbm+fSwfu;LQQl$MRJPs!CudY;Rq2_O`MDu;r12B;rX#>zzCT| zEji$*B{a!vR#J@=lHW;Ajok^KnqfnyU}l9d$p&*@5lnUmGc}%x{08y%2#<2cUm#I& zqL8ZKxwBrC_8DV2`{fCvE=OJ-Nk~LnA+{UE6ATiCB}hyP*EA8i>671av=v$(LM%fF z%}dx-0-`$(Teney(&5<=WV9RZ{rv~Yc*n-dFu*XtFmMML!1n*p{vKM-`{(~X)swZu zjsNcx`@e@=PDvav8#$MXqD-7i{R+;`LGAy@yHpC965D`7@1gx4d``iK)Hn1yxBJr; z!2ZvbxnJOLaN*k=rH@-S?uwg^#^fI2yr)&1101Pe}G~P_W>_EV%GiGG#f* z6@fCHvEagMK~BHx{&tln(?EEk$ugormu0Df!?7$#Y0MY4hON8GlDh8=s@y_ z99xC-=hbek+l(04BpXPq+j#U~F#)d^|2sJxj|CUL>%oPGD@7Jum<1Qk7hKpZN;b&D z&m7+#3QrB?SJ+|2oyQb+SS_Mp)>8wO2|CxL5Y|(}uBSLJx+2=5f?ZEzId72$XU8bM z=^1c)Zxa=cNw{H!aKk$sz`_c#kVjVIJhFuJDG?s|K*9}EjTlqh`Ld#kmAs1Obj95_ zc2V4!u^7pL()-}_c3xlUovOE!U8+?K1;m6_5z>&M1A+^X(`U0P$DrG*=XO%77(y5N za!0g^f)QvHkrW8pLqjIoLv?>zMTpVNJ^d00cu)pn9h4x}K?z}<+h Module(new Frontend_embedded) - case (false, true) => Module(new Frontend_ooo) - case (false, false) => Module(new Frontend_inorder) + case (true, _) => Module(new FrontendEmbedded) + case (false, true) => Module(new FrontendDynamic) + case (false, false) => Module(new FrontendSequential) } // Backend if (EnableOutOfOrderExec) { val mmioXbar = Module(new SimpleBusCrossbarNto1(if (HasDcache) 2 else 3)) - val backend = Module(new Backend_ooo) + val backend = Module(new BackendDynamic) PipelineVector2Connect(new DecodeIO, frontend.io.out(0), frontend.io.out(1), backend.io.in(0), backend.io.in(1), frontend.io.flushVec(1), 16) backend.io.flush := frontend.io.flushVec(2) frontend.io.redirect <> backend.io.redirect @@ -143,7 +146,7 @@ class NutCore(implicit val p: NutCoreConfig) extends NutCoreModule { io.mmio <> mmioXbar.io.out } else { - val backend = Module(new Backend_inorder) + val backend = Module(new BackendSequential) PipelineVector2Connect(new DecodeIO, frontend.io.out(0), frontend.io.out(1), backend.io.in(0), backend.io.in(1), frontend.io.flushVec(1), 4) diff --git a/src/main/scala/nutcore/backend/BackendCommons.scala b/src/main/scala/nutcore/backend/BackendCommons.scala new file mode 100644 index 000000000..9c7c5c9ef --- /dev/null +++ b/src/main/scala/nutcore/backend/BackendCommons.scala @@ -0,0 +1,43 @@ +/************************************************************************************** +* Copyright (c) 2020 Institute of Computing Technology, CAS +* Copyright (c) 2020 University of Chinese Academy of Sciences +* +* NutShell is licensed under Mulan PSL v2. +* You can use this software according to the terms and conditions of the Mulan PSL v2. +* You may obtain a copy of Mulan PSL v2 at: +* http://license.coscl.org.cn/MulanPSL2 +* +* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +* FIT FOR A PARTICULAR PURPOSE. +* +* See the Mulan PSL v2 for more details. +***************************************************************************************/ + +package nutcore.backend + +import chisel3._ +import chisel3.util._ +import chisel3.util.experimental.BoringUtils + +import nutcore._ + +import utils._ +import bus.simplebus._ +import difftest._ + +trait HasBackendConst{ + // val multiIssue = true + val robSize = 16 + val robWidth = 2 + val robInstCapacity = robSize * robWidth + val checkpointSize = 4 // register map checkpoint size + val brTagWidth = log2Up(checkpointSize) + val prfAddrWidth = log2Up(robSize) + log2Up(robWidth) // physical rf addr width + + val DispatchWidth = 2 + val CommitWidth = 2 + val RetireWidth = 2 + + val enableCheckpoint = true +} diff --git a/src/main/scala/nutcore/backend/ooo/Backend.scala b/src/main/scala/nutcore/backend/dynamic/Dynamic.scala similarity index 92% rename from src/main/scala/nutcore/backend/ooo/Backend.scala rename to src/main/scala/nutcore/backend/dynamic/Dynamic.scala index 9655c85a8..d8a7a893e 100644 --- a/src/main/scala/nutcore/backend/ooo/Backend.scala +++ b/src/main/scala/nutcore/backend/dynamic/Dynamic.scala @@ -1,47 +1,19 @@ -/************************************************************************************** -* Copyright (c) 2020 Institute of Computing Technology, CAS -* Copyright (c) 2020 University of Chinese Academy of Sciences -* -* NutShell is licensed under Mulan PSL v2. -* You can use this software according to the terms and conditions of the Mulan PSL v2. -* You may obtain a copy of Mulan PSL v2 at: -* http://license.coscl.org.cn/MulanPSL2 -* -* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR -* FIT FOR A PARTICULAR PURPOSE. -* -* See the Mulan PSL v2 for more details. -***************************************************************************************/ - -package nutcore + +package nutcore.backend import chisel3._ import chisel3.util._ import chisel3.util.experimental.BoringUtils +import nutcore._ + import utils._ import bus.simplebus._ import difftest._ -trait HasBackendConst{ - // val multiIssue = true - val robSize = 16 - val robWidth = 2 - val robInstCapacity = robSize * robWidth - val checkpointSize = 4 // register map checkpoint size - val brTagWidth = log2Up(checkpointSize) - val prfAddrWidth = log2Up(robSize) + log2Up(robWidth) // physical rf addr width - - val DispatchWidth = 2 - val CommitWidth = 2 - val RetireWidth = 2 - - val enableCheckpoint = true -} // NutShell/Argo Out Of Order Execution Backend -class Backend_ooo(implicit val p: NutCoreConfig) extends NutCoreModule with HasRegFileParameter with HasBackendConst{ +class BackendDynamic(implicit val p: NutCoreConfig) extends NutCoreModule with HasRegFileParameter with HasBackendConst{ val io = IO(new Bundle { // EXU @@ -666,35 +638,3 @@ class Backend_ooo(implicit val p: NutCoreConfig) extends NutCoreModule with HasR } } - -class Backend_inorder(implicit val p: NutCoreConfig) extends NutCoreModule { - val io = IO(new Bundle { - val in = Vec(2, Flipped(Decoupled(new DecodeIO))) - val flush = Input(UInt(2.W)) - val dmem = new SimpleBusUC(addrBits = VAddrBits) - val memMMU = Flipped(new MemMMUIO) - - val redirect = new RedirectIO - }) - - val isu = Module(new ISU) - val exu = Module(new EXU) - val wbu = Module(new WBU) - - PipelineConnect(isu.io.out, exu.io.in, exu.io.out.fire(), io.flush(0)) - PipelineConnect(exu.io.out, wbu.io.in, true.B, io.flush(1)) - - isu.io.in <> io.in - - isu.io.flush := io.flush(0) - exu.io.flush := io.flush(1) - - isu.io.wb <> wbu.io.wb - io.redirect <> wbu.io.redirect - // forward - isu.io.forward <> exu.io.forward - - io.memMMU.imem <> exu.io.memMMU.imem - io.memMMU.dmem <> exu.io.memMMU.dmem - io.dmem <> exu.io.dmem -} \ No newline at end of file diff --git a/src/main/scala/nutcore/backend/ooo/EP.scala b/src/main/scala/nutcore/backend/dynamic/EP.scala similarity index 93% rename from src/main/scala/nutcore/backend/ooo/EP.scala rename to src/main/scala/nutcore/backend/dynamic/EP.scala index 94293f9c4..d666d283e 100644 --- a/src/main/scala/nutcore/backend/ooo/EP.scala +++ b/src/main/scala/nutcore/backend/dynamic/EP.scala @@ -1,9 +1,12 @@ -package nutcore + +package nutcore.backend import chisel3._ import chisel3.util._ import chisel3.util.experimental.BoringUtils +import nutcore._ + import utils._ // Out of Order Execution Pipeline for NutShell/Argo diff --git a/src/main/scala/nutcore/backend/ooo/ROB.scala b/src/main/scala/nutcore/backend/dynamic/ROB.scala similarity index 99% rename from src/main/scala/nutcore/backend/ooo/ROB.scala rename to src/main/scala/nutcore/backend/dynamic/ROB.scala index 615c9b94b..8da0ce443 100644 --- a/src/main/scala/nutcore/backend/ooo/ROB.scala +++ b/src/main/scala/nutcore/backend/dynamic/ROB.scala @@ -14,15 +14,18 @@ * See the Mulan PSL v2 for more details. ***************************************************************************************/ -package nutcore +package nutcore.backend import chisel3._ import chisel3.util._ import chisel3.util.experimental.BoringUtils +import nutcore._ + import utils._ import difftest._ + object physicalRFTools{ def getPRFAddr(robIndex: UInt, bank: UInt): UInt = { Cat(robIndex, bank(0)) diff --git a/src/main/scala/nutcore/backend/ooo/RS.scala b/src/main/scala/nutcore/backend/dynamic/RS.scala similarity index 99% rename from src/main/scala/nutcore/backend/ooo/RS.scala rename to src/main/scala/nutcore/backend/dynamic/RS.scala index 54ad1298f..10675c8e3 100644 --- a/src/main/scala/nutcore/backend/ooo/RS.scala +++ b/src/main/scala/nutcore/backend/dynamic/RS.scala @@ -14,12 +14,14 @@ * See the Mulan PSL v2 for more details. ***************************************************************************************/ -package nutcore +package nutcore.backend import chisel3._ import chisel3.util._ import chisel3.util.experimental.BoringUtils +import nutcore._ + import utils._ trait HasRSConst{ diff --git a/src/main/scala/nutcore/backend/fu/ALU.scala b/src/main/scala/nutcore/backend/fu/ALU.scala index 7b506c1e0..5071e5326 100644 --- a/src/main/scala/nutcore/backend/fu/ALU.scala +++ b/src/main/scala/nutcore/backend/fu/ALU.scala @@ -20,6 +20,8 @@ import chisel3._ import chisel3.util._ import chisel3.util.experimental.BoringUtils +import nutcore.frontend.instr_fetch.branch_predict._ + import utils._ import difftest._ import top.Settings diff --git a/src/main/scala/nutcore/backend/seq/EXU.scala b/src/main/scala/nutcore/backend/sequential/EXU.scala similarity index 100% rename from src/main/scala/nutcore/backend/seq/EXU.scala rename to src/main/scala/nutcore/backend/sequential/EXU.scala diff --git a/src/main/scala/nutcore/backend/seq/ISU.scala b/src/main/scala/nutcore/backend/sequential/ISU.scala similarity index 100% rename from src/main/scala/nutcore/backend/seq/ISU.scala rename to src/main/scala/nutcore/backend/sequential/ISU.scala diff --git a/src/main/scala/nutcore/backend/sequential/Sequential.scala b/src/main/scala/nutcore/backend/sequential/Sequential.scala new file mode 100644 index 000000000..4348ba248 --- /dev/null +++ b/src/main/scala/nutcore/backend/sequential/Sequential.scala @@ -0,0 +1,44 @@ + +package nutcore.backend + +import chisel3._ +import chisel3.util._ +import chisel3.util.experimental.BoringUtils + +import nutcore._ + +import utils._ +import bus.simplebus._ +import difftest._ + +class BackendSequential(implicit val p: NutCoreConfig) extends NutCoreModule { + val io = IO(new Bundle { + val in = Vec(2, Flipped(Decoupled(new DecodeIO))) + val flush = Input(UInt(2.W)) + val dmem = new SimpleBusUC(addrBits = VAddrBits) + val memMMU = Flipped(new MemMMUIO) + + val redirect = new RedirectIO + }) + + val isu = Module(new ISU) + val exu = Module(new EXU) + val wbu = Module(new WBU) + + PipelineConnect(isu.io.out, exu.io.in, exu.io.out.fire(), io.flush(0)) + PipelineConnect(exu.io.out, wbu.io.in, true.B, io.flush(1)) + + isu.io.in <> io.in + + isu.io.flush := io.flush(0) + exu.io.flush := io.flush(1) + + isu.io.wb <> wbu.io.wb + io.redirect <> wbu.io.redirect + // forward + isu.io.forward <> exu.io.forward + + io.memMMU.imem <> exu.io.memMMU.imem + io.memMMU.dmem <> exu.io.memMMU.dmem + io.dmem <> exu.io.dmem +} diff --git a/src/main/scala/nutcore/backend/seq/WBU.scala b/src/main/scala/nutcore/backend/sequential/WBU.scala similarity index 100% rename from src/main/scala/nutcore/backend/seq/WBU.scala rename to src/main/scala/nutcore/backend/sequential/WBU.scala diff --git a/src/main/scala/nutcore/frontend/BPU.scala b/src/main/scala/nutcore/frontend/BPU.scala deleted file mode 100644 index 470fe9ab1..000000000 --- a/src/main/scala/nutcore/frontend/BPU.scala +++ /dev/null @@ -1,476 +0,0 @@ -/************************************************************************************** -* Copyright (c) 2020 Institute of Computing Technology, CAS -* Copyright (c) 2020 University of Chinese Academy of Sciences -* -* NutShell is licensed under Mulan PSL v2. -* You can use this software according to the terms and conditions of the Mulan PSL v2. -* You may obtain a copy of Mulan PSL v2 at: -* http://license.coscl.org.cn/MulanPSL2 -* -* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR -* FIT FOR A PARTICULAR PURPOSE. -* -* See the Mulan PSL v2 for more details. -***************************************************************************************/ - -package nutcore - -import chisel3._ -import chisel3.util._ -import chisel3.util.experimental.BoringUtils - -import utils._ -import top.Settings - -class TableAddr(val idxBits: Int) extends NutCoreBundle { - val padLen = if (Settings.get("IsRV32") || !Settings.get("EnableOutOfOrderExec")) 2 else 3 - def tagBits = VAddrBits - padLen - idxBits - - //val res = UInt((AddrBits - VAddrBits).W) - val tag = UInt(tagBits.W) - val idx = UInt(idxBits.W) - val pad = UInt(padLen.W) - - def fromUInt(x: UInt) = x.asTypeOf(UInt(VAddrBits.W)).asTypeOf(this) - def getTag(x: UInt) = fromUInt(x).tag - def getIdx(x: UInt) = fromUInt(x).idx -} - -object BTBtype { - def B = "b00".U // branch - def J = "b01".U // jump - def I = "b10".U // indirect - def R = "b11".U // return - - def apply() = UInt(2.W) -} - -class BPUUpdateReq extends NutCoreBundle { - val valid = Output(Bool()) - val pc = Output(UInt(VAddrBits.W)) - val isMissPredict = Output(Bool()) - val actualTarget = Output(UInt(VAddrBits.W)) - val actualTaken = Output(Bool()) // for branch - val fuOpType = Output(FuOpType()) - val btbType = Output(BTBtype()) - val isRVC = Output(Bool()) // for ras, save PC+2 to stack if is RVC -} - -// nextline predicter generates NPC from current NPC in 1 cycle -class BPU_ooo extends NutCoreModule { - val io = IO(new Bundle { - val in = new Bundle { val pc = Flipped(Valid((UInt(VAddrBits.W)))) } - val out = new RedirectIO - val flush = Input(Bool()) - val brIdx = Output(Vec(4, Bool())) - // val target = Output(Vec(4, UInt(VAddrBits.W))) - // val instValid = Output(UInt(4.W)) // now instValid is generated in IFU - val crosslineJump = Output(Bool()) - }) - - val flush = BoolStopWatch(io.flush, io.in.pc.valid, startHighPriority = true) - - // BTB - val NRbtb = 512 - val btbAddr = new TableAddr(log2Up(NRbtb >> 2)) - def btbEntry() = new Bundle { - val tag = UInt(btbAddr.tagBits.W) - val _type = UInt(2.W) - val target = UInt(VAddrBits.W) - val crosslineJump = Bool() - val valid = Bool() - } - - val btb = List.fill(4)(Module(new SRAMTemplate(btbEntry(), set = NRbtb >> 2, shouldReset = true, holdRead = true, singlePort = true))) - // flush BTB when executing fence.i - val flushBTB = WireInit(false.B) - val flushTLB = WireInit(false.B) - BoringUtils.addSink(flushBTB, "MOUFlushICache") - BoringUtils.addSink(flushTLB, "MOUFlushTLB") - (0 to 3).map(i => (btb(i).reset := reset.asBool || (flushBTB || flushTLB))) - - Debug(reset.asBool || (flushBTB || flushTLB), "[BPU-RESET] bpu-reset flushBTB:%d flushTLB:%d\n", flushBTB, flushTLB) - - (0 to 3).map(i => (btb(i).io.r.req.valid := io.in.pc.valid)) - (0 to 3).map(i => (btb(i).io.r.req.bits.setIdx := btbAddr.getIdx(io.in.pc.bits))) - - - val btbRead = Wire(Vec(4, btbEntry())) - (0 to 3).map(i => (btbRead(i) := btb(i).io.r.resp.data(0))) - // since there is one cycle latency to read SyncReadMem, - // we should latch the input pc for one cycle - val pcLatch = RegEnable(io.in.pc.bits, io.in.pc.valid) - val btbHit = Wire(Vec(4, Bool())) - (0 to 3).map(i => btbHit(i) := btbRead(i).valid && btbRead(i).tag === btbAddr.getTag(pcLatch) && !flush && RegNext(btb(i).io.r.req.fire(), init = false.B)) - // btbHit will ignore pc(2,0). pc(2,0) is used to build brIdx - val crosslineJump = btbRead(3).crosslineJump && btbHit(3) && !io.brIdx(0) && !io.brIdx(1) && !io.brIdx(2) - io.crosslineJump := crosslineJump - // val crosslineJumpLatch = RegNext(crosslineJump) - // val crosslineJumpTarget = RegEnable(btbRead.target, crosslineJump) - - // PHT - val pht = List.fill(4)(Mem(NRbtb >> 2, UInt(2.W))) - val phtTaken = Wire(Vec(4, Bool())) - (0 to 3).map(i => (phtTaken(i) := RegEnable(pht(i).read(btbAddr.getIdx(io.in.pc.bits))(1), io.in.pc.valid))) - - // RAS - val NRras = 16 - val ras = Mem(NRras, UInt(VAddrBits.W)) - val sp = Counter(NRras) - val rasTarget = RegEnable(ras.read(sp.value), io.in.pc.valid) - - // update - val req = WireInit(0.U.asTypeOf(new BPUUpdateReq)) - val btbWrite = WireInit(0.U.asTypeOf(btbEntry())) - BoringUtils.addSink(req, "bpuUpdateReq") - - btbWrite.tag := btbAddr.getTag(req.pc) - btbWrite.target := req.actualTarget - btbWrite._type := req.btbType - btbWrite.crosslineJump := req.pc(2,1)==="h3".U && !req.isRVC // ((pc_offset % 8) == 6) && inst is 32bit in length - btbWrite.valid := true.B - // NOTE: We only update BTB at a miss prediction. - // If a miss prediction is found, the pipeline will be flushed - // in the next cycle. Therefore it is safe to use single-port - // SRAM to implement BTB, since write requests have higher priority - // than read request. Again, since the pipeline will be flushed - // in the next cycle, the read request will be useless. - (0 to 3).map(i => btb(i).io.w.req.valid := req.isMissPredict && req.valid && i.U === req.pc(2,1)) - (0 to 3).map(i => btb(i).io.w.req.bits.setIdx := btbAddr.getIdx(req.pc)) - (0 to 3).map(i => btb(i).io.w.req.bits.data := btbWrite) - - val getpht = LookupTree(req.pc(2,1), List.tabulate(4)(i => (i.U -> pht(i).read(btbAddr.getIdx(req.pc))))) - val cnt = RegNext(getpht) - val reqLatch = RegNext(req) - when (reqLatch.valid && ALUOpType.isBranch(reqLatch.fuOpType)) { - val taken = reqLatch.actualTaken - val newCnt = Mux(taken, cnt + 1.U, cnt - 1.U) - val wen = (taken && (cnt =/= "b11".U)) || (!taken && (cnt =/= "b00".U)) - when (wen) { - (0 to 3).map(i => when(i.U === reqLatch.pc(2,1)){pht(i).write(btbAddr.getIdx(reqLatch.pc), newCnt)}) - } - } - when (req.valid) { - when (req.fuOpType === ALUOpType.call) { - ras.write(sp.value + 1.U, Mux(req.isRVC, req.pc + 2.U, req.pc + 4.U)) - sp.value := sp.value + 1.U - } - .elsewhen (req.fuOpType === ALUOpType.ret) { - when(sp.value === 0.U) { - // RAS empty, do nothing - } - sp.value := Mux(sp.value===0.U, 0.U, sp.value - 1.U) - } - } - - def genInstValid(pc: UInt) = LookupTree(pc(2,1), List( - "b00".U -> "b1111".U, - "b01".U -> "b1110".U, - "b10".U -> "b1100".U, - "b11".U -> "b1000".U - )) - - val pcLatchValid = genInstValid(pcLatch) - - val target = Wire(Vec(4, UInt(VAddrBits.W))) - (0 to 3).map(i => target(i) := Mux(btbRead(i)._type === BTBtype.R, rasTarget, btbRead(i).target)) - (0 to 3).map(i => io.brIdx(i) := btbHit(i) && pcLatchValid(i).asBool && Mux(btbRead(i)._type === BTBtype.B, phtTaken(i), true.B) && btbRead(i).valid) - io.out.target := PriorityMux(io.brIdx, target) - io.out.valid := io.brIdx.asUInt.orR - io.out.rtype := 0.U - Debug(io.out.valid, "[BPU] pc %x io.brIdx.asUInt %b phtTaken %x %x %x %x valid %x %x %x %x\n", pcLatch, io.brIdx.asUInt, phtTaken(0), phtTaken(1), phtTaken(2), phtTaken(3), btbRead(0).valid, btbRead(1).valid, btbRead(2).valid, btbRead(3).valid) - - // io.out.valid := btbHit && Mux(btbRead._type === BTBtype.B, phtTaken, true.B) && !crosslineJump || crosslineJumpLatch && !flush && !crosslineJump - // Note: - // btbHit && Mux(btbRead._type === BTBtype.B, phtTaken, true.B) && !crosslineJump : normal branch predict - // crosslineJumpLatch && !flush && !crosslineJump : cross line branch predict, bpu will require imem to fetch the next 16bit of current inst in next instline - // `&& !crosslineJump` is used to make sure this logic will run correctly when imem stalls (pcUpdate === false) - // by using `instline`, we mean a 64 bit instfetch result from imem - // ROCKET uses a 32 bit instline, and its IDU logic is more simple than this implentation. -} - -class BPU_embedded extends NutCoreModule { - val io = IO(new Bundle { - val in = new Bundle { val pc = Flipped(Valid((UInt(32.W)))) } - val out = new RedirectIO - val flush = Input(Bool()) - }) - - val flush = BoolStopWatch(io.flush, io.in.pc.valid, startHighPriority = true) - - // BTB - val NRbtb = 512 - val btbAddr = new TableAddr(log2Up(NRbtb)) - def btbEntry() = new Bundle { - val tag = UInt(btbAddr.tagBits.W) - val _type = UInt(2.W) - val target = UInt(32.W) - } - - val btb = Module(new SRAMTemplate(btbEntry(), set = NRbtb, shouldReset = true, holdRead = true, singlePort = true)) - btb.io.r.req.valid := io.in.pc.valid - btb.io.r.req.bits.setIdx := btbAddr.getIdx(io.in.pc.bits) - - val btbRead = Wire(btbEntry()) - btbRead := btb.io.r.resp.data(0) - // since there is one cycle latency to read SyncReadMem, - // we should latch the input pc for one cycle - val pcLatch = RegEnable(io.in.pc.bits, io.in.pc.valid) - val btbHit = btbRead.tag === btbAddr.getTag(pcLatch) && !flush && RegNext(btb.io.r.req.ready, init = false.B) - - // PHT - val pht = Mem(NRbtb, UInt(2.W)) - val phtTaken = RegEnable(pht.read(btbAddr.getIdx(io.in.pc.bits))(1), io.in.pc.valid) - - // RAS - val NRras = 16 - val ras = Mem(NRras, UInt(32.W)) - val sp = Counter(NRras) - val rasTarget = RegEnable(ras.read(sp.value), io.in.pc.valid) - - // update - val req = WireInit(0.U.asTypeOf(new BPUUpdateReq)) - val btbWrite = WireInit(0.U.asTypeOf(btbEntry())) - BoringUtils.addSink(req, "bpuUpdateReq") - - btbWrite.tag := btbAddr.getTag(req.pc) - btbWrite.target := req.actualTarget - btbWrite._type := req.btbType - // NOTE: We only update BTB at a miss prediction. - // If a miss prediction is found, the pipeline will be flushed - // in the next cycle. Therefore it is safe to use single-port - // SRAM to implement BTB, since write requests have higher priority - // than read request. Again, since the pipeline will be flushed - // in the next cycle, the read request will be useless. - btb.io.w.req.valid := req.isMissPredict && req.valid - btb.io.w.req.bits.setIdx := btbAddr.getIdx(req.pc) - btb.io.w.req.bits.data := btbWrite - - val cnt = RegNext(pht.read(btbAddr.getIdx(req.pc))) - val reqLatch = RegNext(req) - when (reqLatch.valid && ALUOpType.isBranch(reqLatch.fuOpType)) { - val taken = reqLatch.actualTaken - val newCnt = Mux(taken, cnt + 1.U, cnt - 1.U) - val wen = (taken && (cnt =/= "b11".U)) || (!taken && (cnt =/= "b00".U)) - when (wen) { - pht.write(btbAddr.getIdx(reqLatch.pc), newCnt) - } - } - when (req.valid) { - when (req.fuOpType === ALUOpType.call) { - ras.write(sp.value + 1.U, req.pc + 4.U) - sp.value := sp.value + 1.U - } - .elsewhen (req.fuOpType === ALUOpType.ret) { - sp.value := sp.value - 1.U - } - } - - val flushBTB = WireInit(false.B) - val flushTLB = WireInit(false.B) - BoringUtils.addSink(flushBTB, "MOUFlushICache") - BoringUtils.addSink(flushTLB, "MOUFlushTLB") - - io.out.target := Mux(btbRead._type === BTBtype.R, rasTarget, btbRead.target) - io.out.valid := btbHit && Mux(btbRead._type === BTBtype.B, phtTaken, true.B) - io.out.rtype := 0.U -} - -class BPU_inorder extends NutCoreModule { - val io = IO(new Bundle { - val in = new Bundle { val pc = Flipped(Valid((UInt(VAddrBits.W)))) } - val out = new RedirectIO - val flush = Input(Bool()) - val brIdx = Output(UInt(3.W)) - val crosslineJump = Output(Bool()) - }) - - val flush = BoolStopWatch(io.flush, io.in.pc.valid, startHighPriority = true) - - // BTB - val NRbtb = 512 - val btbAddr = new TableAddr(log2Up(NRbtb)) - def btbEntry() = new Bundle { - val tag = UInt(btbAddr.tagBits.W) - val _type = UInt(2.W) - val target = UInt(VAddrBits.W) - val brIdx = UInt(3.W) - val valid = Bool() - } - - val btb = Module(new SRAMTemplate(btbEntry(), set = NRbtb, shouldReset = true, holdRead = true, singlePort = true)) - // flush BTB when executing fence.i - val flushBTB = WireInit(false.B) - val flushTLB = WireInit(false.B) - BoringUtils.addSink(flushBTB, "MOUFlushICache") - BoringUtils.addSink(flushTLB, "MOUFlushTLB") - btb.reset := reset.asBool || (flushBTB || flushTLB) - Debug(reset.asBool || (flushBTB || flushTLB), "[BPU-RESET] bpu-reset flushBTB:%d flushTLB:%d\n", flushBTB, flushTLB) - - btb.io.r.req.valid := io.in.pc.valid - btb.io.r.req.bits.setIdx := btbAddr.getIdx(io.in.pc.bits) - - - val btbRead = Wire(btbEntry()) - btbRead := btb.io.r.resp.data(0) - // since there is one cycle latency to read SyncReadMem, - // we should latch the input pc for one cycle - val pcLatch = RegEnable(io.in.pc.bits, io.in.pc.valid) - val btbHit = btbRead.valid && btbRead.tag === btbAddr.getTag(pcLatch) && !flush && RegNext(btb.io.r.req.fire(), init = false.B) && !(pcLatch(1) && btbRead.brIdx(0)) - // btbHit will ignore pc(1,0). pc(1,0) is used to build brIdx - // !(pcLatch(1) && btbRead.brIdx(0)) is used to deal with the following case: - // ------------------------------------------------- - // 0 jump rvc // marked as "take branch" in BTB - // 2 xxx rvc <-- pc // misrecognize this instr as "btb hit" with target of previous jump instr - // ------------------------------------------------- - val crosslineJump = btbRead.brIdx(2) && btbHit - io.crosslineJump := crosslineJump - // val crosslineJumpLatch = RegNext(crosslineJump) - // val crosslineJumpTarget = RegEnable(btbRead.target, crosslineJump) - Debug(btbHit, "[BTBHT1] %d pc=%x tag=%x,%x index=%x bridx=%x tgt=%x,%x flush %x type:%x\n", GTimer(), pcLatch, btbRead.tag, btbAddr.getTag(pcLatch), btbAddr.getIdx(pcLatch), btbRead.brIdx, btbRead.target, io.out.target, flush,btbRead._type) - Debug(btbHit, "[BTBHT2] btbRead.brIdx %x mask %x\n", btbRead.brIdx, Cat(crosslineJump, Fill(2, io.out.valid))) - // Debug(btbHit, "[BTBHT5] btbReqValid:%d btbReqSetIdx:%x\n",btb.io.r.req.valid, btb.io.r.req.bits.setId) - - // PHT - val pht = Mem(NRbtb, UInt(2.W)) - val phtTaken = RegEnable(pht.read(btbAddr.getIdx(io.in.pc.bits))(1), io.in.pc.valid) - - // RAS - - val NRras = 16 - val ras = Mem(NRras, UInt(VAddrBits.W)) - // val raBrIdxs = Mem(NRras, UInt(2.W)) - val sp = Counter(NRras) - val rasTarget = RegEnable(ras.read(sp.value), io.in.pc.valid) - // val rasBrIdx = RegEnable(raBrIdxs.read(sp.value), io.in.pc.valid) - - // update - val req = WireInit(0.U.asTypeOf(new BPUUpdateReq)) - val btbWrite = WireInit(0.U.asTypeOf(btbEntry())) - BoringUtils.addSink(req, "bpuUpdateReq") - - Debug(req.valid, "[BTBUP] pc=%x tag=%x index=%x bridx=%x tgt=%x type=%x\n", req.pc, btbAddr.getTag(req.pc), btbAddr.getIdx(req.pc), Cat(req.pc(1), ~req.pc(1)), req.actualTarget, req.btbType) - - //val fflag = req.btbType===3.U && btb.io.w.req.valid && btb.io.w.req.bits.setIdx==="hc9".U - //when(fflag && GTimer()>2888000.U) { - // Debug("%d\n", GTimer()) - // Debug("[BTBHT6] btbWrite.type is BTBtype.R/RET!!! Inpc:%x btbWrite.brIdx:%x setIdx:%x\n", io.in.pc.bits, btbWrite.brIdx, btb.io.w.req.bits.setIdx) - // Debug("[BTBHT6] tag:%x target:%x _type:%x bridx:%x\n", btbWrite.tag,btbWrite.target,btbWrite._type,btbWrite.brIdx) - // Debug(p"[BTBHT6] req:${req} \n") - //} - //Debug("[BTBHT5] tag: target:%x type:%d brIdx:%d\n", req.actualTarget, req.btbType, Cat(req.pc(2,0)==="h6".U && !req.isRVC, req.pc(1), ~req.pc(1))) - - btbWrite.tag := btbAddr.getTag(req.pc) - btbWrite.target := req.actualTarget - btbWrite._type := req.btbType - btbWrite.brIdx := Cat(req.pc(2,0)==="h6".U && !req.isRVC, req.pc(1), ~req.pc(1)) - btbWrite.valid := true.B - // NOTE: We only update BTB at a miss prediction. - // If a miss prediction is found, the pipeline will be flushed - // in the next cycle. Therefore it is safe to use single-port - // SRAM to implement BTB, since write requests have higher priority - // than read request. Again, since the pipeline will be flushed - // in the next cycle, the read request will be useless. - btb.io.w.req.valid := req.isMissPredict && req.valid - btb.io.w.req.bits.setIdx := btbAddr.getIdx(req.pc) - btb.io.w.req.bits.data := btbWrite - - //Debug(true) { - //when (btb.io.w.req.valid && btbWrite.tag === btbAddr.getTag("hffffffff803541a4".U)) { - // Debug("[BTBWrite] %d setIdx:%x req.valid:%d pc:%x target:%x bridx:%x\n", GTimer(), btbAddr.getIdx(req.pc), req.valid, req.pc, req.actualTarget, btbWrite.brIdx) - //} - //} - - //when (GTimer() > 77437484.U && btb.io.w.req.valid) { - // Debug("[BTBWrite-ALL] %d setIdx:%x req.valid:%d pc:%x target:%x bridx:%x\n", GTimer(), btbAddr.getIdx(req.pc), req.valid, req.pc, req.actualTarget, btbWrite.brIdx) - //} - - val cnt = RegNext(pht.read(btbAddr.getIdx(req.pc))) - val reqLatch = RegNext(req) - when (reqLatch.valid && ALUOpType.isBranch(reqLatch.fuOpType)) { - val taken = reqLatch.actualTaken - val newCnt = Mux(taken, cnt + 1.U, cnt - 1.U) - val wen = (taken && (cnt =/= "b11".U)) || (!taken && (cnt =/= "b00".U)) - when (wen) { - pht.write(btbAddr.getIdx(reqLatch.pc), newCnt) - //Debug(){ - //Debug("BPUPDATE: pc %x cnt %x\n", reqLatch.pc, newCnt) - //} - } - } - when (req.valid) { - when (req.fuOpType === ALUOpType.call) { - ras.write(sp.value + 1.U, Mux(req.isRVC, req.pc + 2.U, req.pc + 4.U)) - // raBrIdxs.write(sp.value + 1.U, Mux(req.pc(1), 2.U, 1.U)) - sp.value := sp.value + 1.U - } - .elsewhen (req.fuOpType === ALUOpType.ret) { - when(sp.value === 0.U) { - //Debug("ATTTTT: sp.value is 0.U\n") //TODO: sp.value may equal to 0.U - } - sp.value := Mux(sp.value===0.U, 0.U, sp.value - 1.U) //TODO: sp.value may less than 0.U - } - } - - io.out.target := Mux(btbRead._type === BTBtype.R, rasTarget, btbRead.target) - // io.out.target := Mux(crosslineJumpLatch && !flush, crosslineJumpTarget, Mux(btbRead._type === BTBtype.R, rasTarget, btbRead.target)) - // io.out.brIdx := btbRead.brIdx & Fill(3, io.out.valid) - io.brIdx := btbRead.brIdx & Cat(true.B, crosslineJump, Fill(2, io.out.valid)) - io.out.valid := btbHit && Mux(btbRead._type === BTBtype.B, phtTaken, true.B && rasTarget=/=0.U) //TODO: add rasTarget=/=0.U, need fix - io.out.rtype := 0.U - // io.out.valid := btbHit && Mux(btbRead._type === BTBtype.B, phtTaken, true.B) && !crosslineJump || crosslineJumpLatch && !flush && !crosslineJump - // Note: - // btbHit && Mux(btbRead._type === BTBtype.B, phtTaken, true.B) && !crosslineJump : normal branch predict - // crosslineJumpLatch && !flush && !crosslineJump : cross line branch predict, bpu will require imem to fetch the next 16bit of current inst in next instline - // `&& !crosslineJump` is used to make sure this logic will run correctly when imem stalls (pcUpdate === false) - // by using `instline`, we mean a 64 bit instfetch result from imem - // ROCKET uses a 32 bit instline, and its IDU logic is more simple than this implentation. -} - -class DummyPredicter extends NutCoreModule { - val io = IO(new Bundle { - val in = new Bundle { val pc = Flipped(Valid((UInt(VAddrBits.W)))) } - val out = new RedirectIO - val valid = Output(Bool()) - val flush = Input(Bool()) - val ignore = Input(Bool()) - val brIdx = Output(Vec(4, Bool())) - }) - // Note: when io.ignore, io.out.valid must be false.B for this pc - // This limitation is for cross instline inst fetch logic - io.valid := io.in.pc.valid // Predicter is returning a result - io.out.valid := false.B // Need redirect - io.out.target := DontCare // Redirect target - io.out.rtype := DontCare // Predicter does not need to care about it - io.brIdx := VecInit(Seq.fill(4)(false.B)) // Which inst triggers jump -} - -//---- Legacy BPUs ---- -/* -class BPU_nodelay extends NutCoreModule { - val io = IO(new Bundle { - val in = Flipped(Valid(new CtrlFlowIO)) - val out = new RedirectIO - }) - - val instr = io.in.bits.instr - val immJ = SignExt(Cat(instr(31), instr(19, 12), instr(20), instr(30, 21), 0.U(1.W)), XLEN) - val immB = SignExt(Cat(instr(31), instr(7), instr(30, 25), instr(11, 8), 0.U(1.W)), XLEN) - val table = Array( - RV32I_BRUInstr.JAL -> List(immJ, true.B), - RV32I_BRUInstr.BNE -> List(immB, instr(31)), - RV32I_BRUInstr.BEQ -> List(immB, instr(31)), - RV32I_BRUInstr.BLT -> List(immB, instr(31)), - RV32I_BRUInstr.BGE -> List(immB, instr(31)), - RV32I_BRUInstr.BLTU -> List(immB, instr(31)), - RV32I_BRUInstr.BGEU -> List(immB, instr(31)) - ) - val default = List(immB, false.B) - val offset :: predict :: Nil = ListLookup(instr, default, table) - - io.out.target := io.in.bits.pc + offset - io.out.valid := io.in.valid && predict(0) - io.out.rtype := 0.U -} -*/ \ No newline at end of file diff --git a/src/main/scala/nutcore/frontend/Dynamic.scala b/src/main/scala/nutcore/frontend/Dynamic.scala new file mode 100644 index 000000000..9d47d0cc0 --- /dev/null +++ b/src/main/scala/nutcore/frontend/Dynamic.scala @@ -0,0 +1,48 @@ + +package nutcore.frontend + +import chisel3._ +import chisel3.util._ +import chisel3.util.experimental.BoringUtils + +import nutcore._ +import nutcore.frontend.decode._ +import nutcore.frontend.instr_fetch._ + +import utils._ +import bus.simplebus._ +import chisel3.experimental.IO + +class FrontendDynamic(implicit val p: NutCoreConfig) extends NutCoreModule with HasFrontendIO { + def pipelineConnect2[T <: Data](left: DecoupledIO[T], right: DecoupledIO[T], + isFlush: Bool, entries: Int = 4, pipe: Boolean = false) = { + // NOTE: depend on https://github.com/chipsalliance/chisel3/pull/2245 + // right <> Queue(left, entries = entries, pipe = pipe, flush = Some(isFlush)) + right <> FlushableQueue(left, isFlush, entries = entries, pipe = pipe) + } + + val ifu = Module(new InstrFetchDynamic) + val ibf = Module(new IBF) + val idu = Module(new Decode) + + pipelineConnect2(ifu.io.out, ibf.io.in, ifu.io.flushVec(0)) + PipelineVector2Connect(new CtrlFlowIO, ibf.io.out(0), ibf.io.out(1), idu.io.in(0), idu.io.in(1), ifu.io.flushVec(1), if (EnableOutOfOrderExec) 8 else 4) + ibf.io.flush := ifu.io.flushVec(1) + + io.out <> idu.io.out + io.redirect <> ifu.io.redirect + io.flushVec <> ifu.io.flushVec + io.bpFlush <> ifu.io.bpFlush + io.ipf <> ifu.io.ipf + io.imem <> ifu.io.imem + + Debug("------------------------ FRONTEND:------------------------\n") + Debug("flush = %b, ifu:(%d,%d), ibf:(%d,%d), idu:(%d,%d)\n", + ifu.io.flushVec.asUInt, ifu.io.out.valid, ifu.io.out.ready, + ibf.io.in.valid, ibf.io.in.ready, idu.io.in(0).valid, idu.io.in(0).ready) + Debug(ifu.io.out.valid, "IFU: pc = 0x%x, instr = 0x%x\n", ifu.io.out.bits.pc, ifu.io.out.bits.instr) + Debug(ibf.io.in.valid, "IBF: pc = 0x%x, instr = 0x%x\n", ibf.io.in.bits.pc, ibf.io.in.bits.instr) + Debug(idu.io.in(0).valid, "IDU1: pc = 0x%x, instr = 0x%x, pnpc = 0x%x\n", idu.io.in(0).bits.pc, idu.io.in(0).bits.instr, idu.io.in(0).bits.pnpc) + Debug(idu.io.in(1).valid, "IDU2: pc = 0x%x, instr = 0x%x, pnpc = 0x%x\n", idu.io.in(1).bits.pc, idu.io.in(1).bits.instr, idu.io.in(1).bits.pnpc) +} + diff --git a/src/main/scala/nutcore/frontend/Embedded.scala b/src/main/scala/nutcore/frontend/Embedded.scala new file mode 100644 index 000000000..3ad4e710a --- /dev/null +++ b/src/main/scala/nutcore/frontend/Embedded.scala @@ -0,0 +1,36 @@ + +package nutcore.frontend + +import chisel3._ +import chisel3.util._ +import chisel3.util.experimental.BoringUtils + +import nutcore._ +import nutcore.frontend.decode._ +import nutcore.frontend.instr_fetch._ + +import utils._ +import bus.simplebus._ +import chisel3.experimental.IO + + +class FrontendEmbedded(implicit val p: NutCoreConfig) extends NutCoreModule with HasFrontendIO { + val ifu = Module(new InstrFetchEmbedded) + val idu = Module(new Decode) + + PipelineConnect(ifu.io.out, idu.io.in(0), idu.io.out(0).fire(), ifu.io.flushVec(0)) + idu.io.in(1) := DontCare + + io.out <> idu.io.out + io.redirect <> ifu.io.redirect + io.flushVec <> ifu.io.flushVec + io.bpFlush <> ifu.io.bpFlush + io.ipf <> ifu.io.ipf + io.imem <> ifu.io.imem + + Debug("------------------------ FRONTEND:------------------------\n") + Debug("flush = %b, ifu:(%d,%d), idu:(%d,%d)\n", + ifu.io.flushVec.asUInt, ifu.io.out.valid, ifu.io.out.ready, idu.io.in(0).valid, idu.io.in(0).ready) + Debug(ifu.io.out.valid, "IFU: pc = 0x%x, instr = 0x%x\n", ifu.io.out.bits.pc, ifu.io.out.bits.instr) + Debug(idu.io.in(0).valid, "IDU1: pc = 0x%x, instr = 0x%x, pnpc = 0x%x\n", idu.io.in(0).bits.pc, idu.io.in(0).bits.instr, idu.io.in(0).bits.pnpc) +} diff --git a/src/main/scala/nutcore/frontend/Frontend.scala b/src/main/scala/nutcore/frontend/Frontend.scala deleted file mode 100644 index f34f160d0..000000000 --- a/src/main/scala/nutcore/frontend/Frontend.scala +++ /dev/null @@ -1,124 +0,0 @@ -/************************************************************************************** -* Copyright (c) 2020 Institute of Computing Technology, CAS -* Copyright (c) 2020 University of Chinese Academy of Sciences -* -* NutShell is licensed under Mulan PSL v2. -* You can use this software according to the terms and conditions of the Mulan PSL v2. -* You may obtain a copy of Mulan PSL v2 at: -* http://license.coscl.org.cn/MulanPSL2 -* -* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR -* FIT FOR A PARTICULAR PURPOSE. -* -* See the Mulan PSL v2 for more details. -***************************************************************************************/ - -package nutcore - -import chisel3._ -import chisel3.util._ -import chisel3.util.experimental.BoringUtils -import utils._ -import bus.simplebus._ -import chisel3.experimental.IO - -class FrontendIO(implicit val p: NutCoreConfig) extends Bundle with HasNutCoreConst { - val imem = new SimpleBusUC(userBits = ICacheUserBundleWidth, addrBits = VAddrBits) - val out = Vec(2, Decoupled(new DecodeIO)) - val flushVec = Output(UInt(4.W)) - val redirect = Flipped(new RedirectIO) - val bpFlush = Output(Bool()) - val ipf = Input(Bool()) -} - - -trait HasFrontendIO { - implicit val p: NutCoreConfig - val io = IO(new FrontendIO) -} - -class Frontend_ooo(implicit val p: NutCoreConfig) extends NutCoreModule with HasFrontendIO { - def pipelineConnect2[T <: Data](left: DecoupledIO[T], right: DecoupledIO[T], - isFlush: Bool, entries: Int = 4, pipe: Boolean = false) = { - // NOTE: depend on https://github.com/chipsalliance/chisel3/pull/2245 - // right <> Queue(left, entries = entries, pipe = pipe, flush = Some(isFlush)) - right <> FlushableQueue(left, isFlush, entries = entries, pipe = pipe) - } - - val ifu = Module(new IFU_ooo) - val ibf = Module(new IBF) - val idu = Module(new IDU) - - pipelineConnect2(ifu.io.out, ibf.io.in, ifu.io.flushVec(0)) - PipelineVector2Connect(new CtrlFlowIO, ibf.io.out(0), ibf.io.out(1), idu.io.in(0), idu.io.in(1), ifu.io.flushVec(1), if (EnableOutOfOrderExec) 8 else 4) - ibf.io.flush := ifu.io.flushVec(1) - - io.out <> idu.io.out - io.redirect <> ifu.io.redirect - io.flushVec <> ifu.io.flushVec - io.bpFlush <> ifu.io.bpFlush - io.ipf <> ifu.io.ipf - io.imem <> ifu.io.imem - - Debug("------------------------ FRONTEND:------------------------\n") - Debug("flush = %b, ifu:(%d,%d), ibf:(%d,%d), idu:(%d,%d)\n", - ifu.io.flushVec.asUInt, ifu.io.out.valid, ifu.io.out.ready, - ibf.io.in.valid, ibf.io.in.ready, idu.io.in(0).valid, idu.io.in(0).ready) - Debug(ifu.io.out.valid, "IFU: pc = 0x%x, instr = 0x%x\n", ifu.io.out.bits.pc, ifu.io.out.bits.instr) - Debug(ibf.io.in.valid, "IBF: pc = 0x%x, instr = 0x%x\n", ibf.io.in.bits.pc, ibf.io.in.bits.instr) - Debug(idu.io.in(0).valid, "IDU1: pc = 0x%x, instr = 0x%x, pnpc = 0x%x\n", idu.io.in(0).bits.pc, idu.io.in(0).bits.instr, idu.io.in(0).bits.pnpc) - Debug(idu.io.in(1).valid, "IDU2: pc = 0x%x, instr = 0x%x, pnpc = 0x%x\n", idu.io.in(1).bits.pc, idu.io.in(1).bits.instr, idu.io.in(1).bits.pnpc) -} - -class Frontend_embedded(implicit val p: NutCoreConfig) extends NutCoreModule with HasFrontendIO { - val ifu = Module(new IFU_embedded) - val idu = Module(new IDU) - - PipelineConnect(ifu.io.out, idu.io.in(0), idu.io.out(0).fire(), ifu.io.flushVec(0)) - idu.io.in(1) := DontCare - - io.out <> idu.io.out - io.redirect <> ifu.io.redirect - io.flushVec <> ifu.io.flushVec - io.bpFlush <> ifu.io.bpFlush - io.ipf <> ifu.io.ipf - io.imem <> ifu.io.imem - - Debug("------------------------ FRONTEND:------------------------\n") - Debug("flush = %b, ifu:(%d,%d), idu:(%d,%d)\n", - ifu.io.flushVec.asUInt, ifu.io.out.valid, ifu.io.out.ready, idu.io.in(0).valid, idu.io.in(0).ready) - Debug(ifu.io.out.valid, "IFU: pc = 0x%x, instr = 0x%x\n", ifu.io.out.bits.pc, ifu.io.out.bits.instr) - Debug(idu.io.in(0).valid, "IDU1: pc = 0x%x, instr = 0x%x, pnpc = 0x%x\n", idu.io.in(0).bits.pc, idu.io.in(0).bits.instr, idu.io.in(0).bits.pnpc) -} - -class Frontend_inorder(implicit val p: NutCoreConfig) extends NutCoreModule with HasFrontendIO { - val ifu = Module(new IFU_inorder) - val ibf = Module(new NaiveRVCAlignBuffer) - val idu = Module(new IDU) - - def PipelineConnect2[T <: Data](left: DecoupledIO[T], right: DecoupledIO[T], - isFlush: Bool, entries: Int = 4, pipe: Boolean = false) = { - // NOTE: depend on https://github.com/chipsalliance/chisel3/pull/2245 - // right <> Queue(left, entries = entries, pipe = pipe, flush = Some(isFlush)) - right <> FlushableQueue(left, isFlush, entries = entries, pipe = pipe) - } - - PipelineConnect2(ifu.io.out, ibf.io.in, ifu.io.flushVec(0)) - PipelineConnect(ibf.io.out, idu.io.in(0), idu.io.out(0).fire(), ifu.io.flushVec(1)) - idu.io.in(1) := DontCare - - ibf.io.flush := ifu.io.flushVec(1) - io.out <> idu.io.out - io.redirect <> ifu.io.redirect - io.flushVec <> ifu.io.flushVec - io.bpFlush <> ifu.io.bpFlush - io.ipf <> ifu.io.ipf - io.imem <> ifu.io.imem - - Debug("------------------------ FRONTEND:------------------------\n") - Debug("flush = %b, ifu:(%d,%d), idu:(%d,%d)\n", - ifu.io.flushVec.asUInt, ifu.io.out.valid, ifu.io.out.ready, idu.io.in(0).valid, idu.io.in(0).ready) - Debug(ifu.io.out.valid, "IFU: pc = 0x%x, instr = 0x%x\n", ifu.io.out.bits.pc, ifu.io.out.bits.instr) - Debug(idu.io.in(0).valid, "IDU1: pc = 0x%x, instr = 0x%x, pnpc = 0x%x\n", idu.io.in(0).bits.pc, idu.io.in(0).bits.instr, idu.io.in(0).bits.pnpc) -} \ No newline at end of file diff --git a/src/main/scala/nutcore/frontend/FrontendCommons.scala b/src/main/scala/nutcore/frontend/FrontendCommons.scala new file mode 100644 index 000000000..55c7b65b7 --- /dev/null +++ b/src/main/scala/nutcore/frontend/FrontendCommons.scala @@ -0,0 +1,46 @@ +/************************************************************************************** +* Copyright (c) 2020 Institute of Computing Technology, CAS +* Copyright (c) 2020 University of Chinese Academy of Sciences +* +* NutShell is licensed under Mulan PSL v2. +* You can use this software according to the terms and conditions of the Mulan PSL v2. +* You may obtain a copy of Mulan PSL v2 at: +* http://license.coscl.org.cn/MulanPSL2 +* +* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +* FIT FOR A PARTICULAR PURPOSE. +* +* See the Mulan PSL v2 for more details. +***************************************************************************************/ + +package nutcore.frontend + +import chisel3._ +import chisel3.util._ +import chisel3.util.experimental.BoringUtils + +import nutcore._ +import nutcore.frontend.decode._ +import nutcore.frontend.instr_fetch._ + +import utils._ +import bus.simplebus._ +import chisel3.experimental.IO + +class FrontendIO(implicit val p: NutCoreConfig) extends Bundle with HasNutCoreConst { + val imem = new SimpleBusUC(userBits = ICacheUserBundleWidth, addrBits = VAddrBits) + val out = Vec(2, Decoupled(new DecodeIO)) + val flushVec = Output(UInt(4.W)) + val redirect = Flipped(new RedirectIO) + val bpFlush = Output(Bool()) + val ipf = Input(Bool()) +} + + +trait HasFrontendIO { + implicit val p: NutCoreConfig + val io = IO(new FrontendIO) +} + + diff --git a/src/main/scala/nutcore/frontend/Sequential.scala b/src/main/scala/nutcore/frontend/Sequential.scala new file mode 100644 index 000000000..554a40543 --- /dev/null +++ b/src/main/scala/nutcore/frontend/Sequential.scala @@ -0,0 +1,45 @@ + +package nutcore.frontend + +import chisel3._ +import chisel3.util._ +import chisel3.util.experimental.BoringUtils + +import nutcore._ +import nutcore.frontend.decode._ +import nutcore.frontend.instr_fetch._ + +import utils._ +import bus.simplebus._ +import chisel3.experimental.IO + +class FrontendSequential(implicit val p: NutCoreConfig) extends NutCoreModule with HasFrontendIO { + val ifu = Module(new InstrFetchSequential) + val ibf = Module(new NaiveRVCAlignBuffer) + val idu = Module(new Decode) + + def PipelineConnect2[T <: Data](left: DecoupledIO[T], right: DecoupledIO[T], + isFlush: Bool, entries: Int = 4, pipe: Boolean = false) = { + // NOTE: depend on https://github.com/chipsalliance/chisel3/pull/2245 + // right <> Queue(left, entries = entries, pipe = pipe, flush = Some(isFlush)) + right <> FlushableQueue(left, isFlush, entries = entries, pipe = pipe) + } + + PipelineConnect2(ifu.io.out, ibf.io.in, ifu.io.flushVec(0)) + PipelineConnect(ibf.io.out, idu.io.in(0), idu.io.out(0).fire(), ifu.io.flushVec(1)) + idu.io.in(1) := DontCare + + ibf.io.flush := ifu.io.flushVec(1) + io.out <> idu.io.out + io.redirect <> ifu.io.redirect + io.flushVec <> ifu.io.flushVec + io.bpFlush <> ifu.io.bpFlush + io.ipf <> ifu.io.ipf + io.imem <> ifu.io.imem + + Debug("------------------------ FRONTEND:------------------------\n") + Debug("flush = %b, ifu:(%d,%d), idu:(%d,%d)\n", + ifu.io.flushVec.asUInt, ifu.io.out.valid, ifu.io.out.ready, idu.io.in(0).valid, idu.io.in(0).ready) + Debug(ifu.io.out.valid, "IFU: pc = 0x%x, instr = 0x%x\n", ifu.io.out.bits.pc, ifu.io.out.bits.instr) + Debug(idu.io.in(0).valid, "IDU1: pc = 0x%x, instr = 0x%x, pnpc = 0x%x\n", idu.io.in(0).bits.pc, idu.io.in(0).bits.instr, idu.io.in(0).bits.pnpc) +} \ No newline at end of file diff --git a/src/main/scala/nutcore/frontend/decode/Decode.scala b/src/main/scala/nutcore/frontend/decode/Decode.scala new file mode 100644 index 000000000..0c7e7dc37 --- /dev/null +++ b/src/main/scala/nutcore/frontend/decode/Decode.scala @@ -0,0 +1,66 @@ +/************************************************************************************** +* Copyright (c) 2020 Institute of Computing Technology, CAS +* Copyright (c) 2020 University of Chinese Academy of Sciences +* +* NutShell is licensed under Mulan PSL v2. +* You can use this software according to the terms and conditions of the Mulan PSL v2. +* You may obtain a copy of Mulan PSL v2 at: +* http://license.coscl.org.cn/MulanPSL2 +* +* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +* FIT FOR A PARTICULAR PURPOSE. +* +* See the Mulan PSL v2 for more details. +***************************************************************************************/ + +package nutcore.frontend.decode + +import chisel3._ +import chisel3.util._ +import chisel3.util.experimental.BoringUtils + +import nutcore._ + +import utils._ +import difftest._ + +class Decode(implicit val p: NutCoreConfig) extends NutCoreModule with HasInstrType { + val io = IO(new Bundle { + val in = Vec(2, Flipped(Decoupled(new CtrlFlowIO))) + val out = Vec(2, Decoupled(new DecodeIO)) + }) + val decoder1 = Module(new DecodeUnit) + val decoder2 = Module(new DecodeUnit) + io.in(0) <> decoder1.io.in + io.in(1) <> decoder2.io.in + io.out(0) <> decoder1.io.out + io.out(1) <> decoder2.io.out + if(!EnableMultiIssue){ + io.in(1).ready := false.B + decoder2.io.in.valid := false.B + } + + val checkpoint_id = RegInit(0.U(64.W)) + + // debug runahead + val runahead = Module(new DifftestRunaheadEvent) + runahead.io.clock := clock + runahead.io.coreid := 0.U + runahead.io.valid := io.out(0).fire() + runahead.io.branch := decoder1.io.isBranch + runahead.io.pc := io.out(0).bits.cf.pc + runahead.io.checkpoint_id := checkpoint_id + when(runahead.io.valid && runahead.io.branch) { + checkpoint_id := checkpoint_id + 1.U // allocate a new checkpoint_id + } + io.out(0).bits.cf.isBranch := decoder1.io.isBranch + io.out(0).bits.cf.runahead_checkpoint_id := checkpoint_id + // when(runahead.io.valid) { + // printf("fire pc %x branch %x inst %x\n", runahead.io.pc, runahead.io.branch, io.out(0).bits.cf.instr) + // } + + if (!p.FPGAPlatform) { + BoringUtils.addSource(decoder1.io.isWFI | decoder2.io.isWFI, "isWFI") + } +} diff --git a/src/main/scala/nutcore/frontend/IDU.scala b/src/main/scala/nutcore/frontend/decode/DecodeUnit.scala similarity index 77% rename from src/main/scala/nutcore/frontend/IDU.scala rename to src/main/scala/nutcore/frontend/decode/DecodeUnit.scala index e9d9d7b85..3154d821e 100644 --- a/src/main/scala/nutcore/frontend/IDU.scala +++ b/src/main/scala/nutcore/frontend/decode/DecodeUnit.scala @@ -1,29 +1,17 @@ -/************************************************************************************** -* Copyright (c) 2020 Institute of Computing Technology, CAS -* Copyright (c) 2020 University of Chinese Academy of Sciences -* -* NutShell is licensed under Mulan PSL v2. -* You can use this software according to the terms and conditions of the Mulan PSL v2. -* You may obtain a copy of Mulan PSL v2 at: -* http://license.coscl.org.cn/MulanPSL2 -* -* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR -* FIT FOR A PARTICULAR PURPOSE. -* -* See the Mulan PSL v2 for more details. -***************************************************************************************/ - -package nutcore + +package nutcore.frontend.decode import chisel3._ import chisel3.util._ import chisel3.util.experimental.BoringUtils +import nutcore._ + import utils._ import difftest._ -class Decoder(implicit val p: NutCoreConfig) extends NutCoreModule with HasInstrType { + +class DecodeUnit(implicit val p: NutCoreConfig) extends NutCoreModule with HasInstrType { val io = IO(new Bundle { val in = Flipped(Decoupled(new CtrlFlowIO)) val out = Decoupled(new DecodeIO) @@ -188,43 +176,3 @@ class Decoder(implicit val p: NutCoreConfig) extends NutCoreModule with HasInstr io.isBranch := VecInit(RV32I_BRUInstr.table.map(i => i._2.tail(1) === fuOpType)).asUInt.orR && fuType === FuType.bru } - -class IDU(implicit val p: NutCoreConfig) extends NutCoreModule with HasInstrType { - val io = IO(new Bundle { - val in = Vec(2, Flipped(Decoupled(new CtrlFlowIO))) - val out = Vec(2, Decoupled(new DecodeIO)) - }) - val decoder1 = Module(new Decoder) - val decoder2 = Module(new Decoder) - io.in(0) <> decoder1.io.in - io.in(1) <> decoder2.io.in - io.out(0) <> decoder1.io.out - io.out(1) <> decoder2.io.out - if(!EnableMultiIssue){ - io.in(1).ready := false.B - decoder2.io.in.valid := false.B - } - - val checkpoint_id = RegInit(0.U(64.W)) - - // debug runahead - val runahead = Module(new DifftestRunaheadEvent) - runahead.io.clock := clock - runahead.io.coreid := 0.U - runahead.io.valid := io.out(0).fire() - runahead.io.branch := decoder1.io.isBranch - runahead.io.pc := io.out(0).bits.cf.pc - runahead.io.checkpoint_id := checkpoint_id - when(runahead.io.valid && runahead.io.branch) { - checkpoint_id := checkpoint_id + 1.U // allocate a new checkpoint_id - } - io.out(0).bits.cf.isBranch := decoder1.io.isBranch - io.out(0).bits.cf.runahead_checkpoint_id := checkpoint_id - // when(runahead.io.valid) { - // printf("fire pc %x branch %x inst %x\n", runahead.io.pc, runahead.io.branch, io.out(0).bits.cf.instr) - // } - - if (!p.FPGAPlatform) { - BoringUtils.addSource(decoder1.io.isWFI | decoder2.io.isWFI, "isWFI") - } -} diff --git a/src/main/scala/nutcore/frontend/IBF.scala b/src/main/scala/nutcore/frontend/instr_align_buffer/IBF.scala similarity index 100% rename from src/main/scala/nutcore/frontend/IBF.scala rename to src/main/scala/nutcore/frontend/instr_align_buffer/IBF.scala diff --git a/src/main/scala/nutcore/frontend/NaiveIBF.scala b/src/main/scala/nutcore/frontend/instr_align_buffer/NaiveIBF.scala similarity index 100% rename from src/main/scala/nutcore/frontend/NaiveIBF.scala rename to src/main/scala/nutcore/frontend/instr_align_buffer/NaiveIBF.scala diff --git a/src/main/scala/nutcore/frontend/IFU.scala b/src/main/scala/nutcore/frontend/instr_fetch/Dynamic.scala similarity index 56% rename from src/main/scala/nutcore/frontend/IFU.scala rename to src/main/scala/nutcore/frontend/instr_fetch/Dynamic.scala index c585db2eb..62d3e2bbe 100644 --- a/src/main/scala/nutcore/frontend/IFU.scala +++ b/src/main/scala/nutcore/frontend/instr_fetch/Dynamic.scala @@ -1,43 +1,20 @@ -/************************************************************************************** -* Copyright (c) 2020 Institute of Computing Technology, CAS -* Copyright (c) 2020 University of Chinese Academy of Sciences -* -* NutShell is licensed under Mulan PSL v2. -* You can use this software according to the terms and conditions of the Mulan PSL v2. -* You may obtain a copy of Mulan PSL v2 at: -* http://license.coscl.org.cn/MulanPSL2 -* -* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR -* FIT FOR A PARTICULAR PURPOSE. -* -* See the Mulan PSL v2 for more details. -***************************************************************************************/ - -package nutcore + +package nutcore.frontend.instr_fetch import chisel3._ import chisel3.util._ import chisel3.util.experimental.BoringUtils +import nutcore._ +import nutcore.frontend.instr_fetch.branch_predict._ + import utils._ import bus.simplebus._ import top.Settings import difftest._ -trait HasResetVector { - val resetVector = Settings.getLong("ResetVector") -} - -class ICacheUserBundle extends NutCoreBundle { - val pc = UInt(VAddrBits.W) - val brIdx = UInt(4.W) // mark if an inst is predicted to branch - val pnpc = UInt(VAddrBits.W) - val instValid = UInt(4.W) // mark which part of this inst line is valid -} -// Note: update ICacheUserBundleWidth when change ICacheUserBundle -class IFU_ooo extends NutCoreModule with HasResetVector { +class InstrFetchDynamic extends NutCoreModule with HasResetVector { val io = IO(new Bundle { val imem = new SimpleBusUC(userBits = ICacheUserBundleWidth, addrBits = VAddrBits) @@ -60,7 +37,7 @@ class IFU_ooo extends NutCoreModule with HasResetVector { // Note: we define instline as 8 Byte aligned data from icache // Next-line branch predictor - val nlp = Module(new BPU_ooo) + val nlp = Module(new BranchPredictDynamic) // nlpxxx_latch is used for the situation when I$ is disabled val nlpvalidreg = RegInit(false.B) @@ -145,7 +122,7 @@ class IFU_ooo extends NutCoreModule with HasResetVector { // Multi-cycle branch predictor // Multi-cycle branch predictor will not be synthesized if EnableMultiCyclePredictor is set to false - val mcp = Module(new DummyPredicter) + val mcp = Module(new BranchPredictDummy) mcp.io.in.pc.valid := io.imem.req.fire() mcp.io.in.pc.bits := pc mcp.io.flush := io.redirect.valid @@ -240,7 +217,7 @@ class IFU_ooo extends NutCoreModule with HasResetVector { val maybeBranch = Wire(Vec(4, Bool())) val brIdxByPredictor = Mux(validMCPRedirect, mcpResultQueue.io.deq.bits.brIdx.asUInt, io.imem.resp.bits.user.get(VAddrBits*2 + 3, VAddrBits*2)) (0 until 4).map(i => maybeBranch(i) := preDecodeIsBranch(io.out.bits.instr(16*(i+1)-1, 16*i))) //TODO: use icache pre-decode result - // When branch predicter set non-sequential npc for a non-branch inst, + // When branch predictor set non-sequential npc for a non-branch inst, // flush IFU, fetch sequential inst instead. when((brIdxByPredictor & ~maybeBranch.asUInt).orR && io.out.fire()){ Debug("[ERROR] FixInvalidBranchPredict\n") @@ -255,141 +232,3 @@ class IFU_ooo extends NutCoreModule with HasResetVector { BoringUtils.addSource(BoolStopWatch(io.imem.req.valid, io.imem.resp.fire()), "perfCntCondMimemStall") BoringUtils.addSource(io.flushVec.orR, "perfCntCondMifuFlush") } - -class IFU_embedded extends NutCoreModule with HasResetVector { - val io = IO(new Bundle { - val imem = new SimpleBusUC(userBits = 64, addrBits = VAddrBits) - val out = Decoupled(new CtrlFlowIO) - val redirect = Flipped(new RedirectIO) - val flushVec = Output(UInt(4.W)) - val bpFlush = Output(Bool()) - val ipf = Input(Bool()) - }) - - // pc - val pc = RegInit(resetVector.U(32.W)) - val pcUpdate = io.redirect.valid || io.imem.req.fire() - val snpc = pc + 4.U // sequential next pc - - val bpu = Module(new BPU_embedded) - - // predicted next pc - val pnpc = bpu.io.out.target - val npc = Mux(io.redirect.valid, io.redirect.target, Mux(bpu.io.out.valid, pnpc, snpc)) - - bpu.io.in.pc.valid := io.imem.req.fire() // only predict when Icache accepts a request - bpu.io.in.pc.bits := npc // predict one cycle early - bpu.io.flush := io.redirect.valid - - when (pcUpdate) { pc := npc } - - io.flushVec := Mux(io.redirect.valid, "b1111".U, 0.U) - io.bpFlush := false.B - - io.imem := DontCare - io.imem.req.bits.apply(addr = pc, size = "b10".U, cmd = SimpleBusCmd.read, wdata = 0.U, wmask = 0.U, user = Cat(pc, npc)) - io.imem.req.valid := io.out.ready - io.imem.resp.ready := io.out.ready || io.flushVec(0) - - io.out.bits := DontCare - io.out.bits.instr := io.imem.resp.bits.rdata - io.imem.resp.bits.user.map{ case x => - io.out.bits.pc := x(2*VAddrBits-1, VAddrBits) - io.out.bits.pnpc := x(VAddrBits-1, 0) - } - io.out.valid := io.imem.resp.valid && !io.flushVec(0) - - Debug(io.imem.req.fire(), "[IFI] pc=%x user=%x redirect %x npc %x pc %x pnpc %x\n", io.imem.req.bits.addr, io.imem.req.bits.user.getOrElse(0.U), io.redirect.valid, npc, pc, bpu.io.out.target) - Debug(io.out.fire(), "[IFO] pc=%x user=%x inst=%x npc=%x ipf %x\n", io.out.bits.pc, io.imem.resp.bits.user.get, io.out.bits.instr, io.out.bits.pnpc, io.ipf) - - BoringUtils.addSource(BoolStopWatch(io.imem.req.valid, io.imem.resp.fire()), "perfCntCondMimemStall") - BoringUtils.addSource(io.flushVec.orR, "perfCntCondMifuFlush") -} - -class IFU_inorder extends NutCoreModule with HasResetVector { - val io = IO(new Bundle { - - val imem = new SimpleBusUC(userBits = VAddrBits*2 + 4, addrBits = VAddrBits) - val out = Decoupled(new CtrlFlowIO) - - val redirect = Flipped(new RedirectIO) - val flushVec = Output(UInt(4.W)) - val bpFlush = Output(Bool()) - val ipf = Input(Bool()) - }) - - // pc - val pc = RegInit(resetVector.U(VAddrBits.W)) - val pcUpdate = io.redirect.valid || io.imem.req.fire() - val snpc = Mux(pc(1), pc + 2.U, pc + 4.U) // sequential next pc - - val bp1 = Module(new BPU_inorder) - - val crosslineJump = bp1.io.crosslineJump - val crosslineJumpLatch = RegInit(false.B) - when(pcUpdate || bp1.io.flush) { - crosslineJumpLatch := Mux(bp1.io.flush, false.B, crosslineJump && !crosslineJumpLatch) - } - val crosslineJumpTarget = RegEnable(bp1.io.out.target, crosslineJump) - val crosslineJumpForceSeq = crosslineJump && bp1.io.out.valid - val crosslineJumpForceTgt = crosslineJumpLatch && !bp1.io.flush - - // predicted next pc - val pnpc = Mux(crosslineJump, snpc, bp1.io.out.target) - val pbrIdx = bp1.io.brIdx - val npc = Mux(io.redirect.valid, io.redirect.target, Mux(crosslineJumpLatch, crosslineJumpTarget, Mux(bp1.io.out.valid, pnpc, snpc))) - val npcIsSeq = Mux(io.redirect.valid , false.B, Mux(crosslineJumpLatch, false.B, Mux(crosslineJump, true.B, Mux(bp1.io.out.valid, false.B, true.B)))) - // Debug("[NPC] %x %x %x %x %x %x\n",crosslineJumpLatch, crosslineJumpTarget, crosslineJump, bp1.io.out.valid, pnpc, snpc) - - // val npc = Mux(io.redirect.valid, io.redirect.target, Mux(io.redirectRVC.valid, io.redirectRVC.target, snpc)) - val brIdx = Wire(UInt(4.W)) - // brIdx(0) -> branch at pc offset 0 (mod 4) - // brIdx(1) -> branch at pc offset 2 (mod 4) - // brIdx(2) -> branch at pc offset 6 (mod 8), and this inst is not rvc inst - brIdx := Cat(npcIsSeq, Mux(io.redirect.valid, 0.U, pbrIdx)) - //TODO: BP will be disabled shortly after a redirect request - - bp1.io.in.pc.valid := io.imem.req.fire() // only predict when Icache accepts a request - bp1.io.in.pc.bits := npc // predict one cycle early - - // Debug(bp1.io.in.pc.valid, p"pc: ${Hexadecimal(pc)} npc: ${Hexadecimal(npc)}\n") - // Debug(bp1.io.out.valid, p"valid!!\n") - - bp1.io.flush := io.redirect.valid - - when (pcUpdate) { - pc := npc - // printf("[IF1] pc=%x\n", pc) - } - - Debug(pcUpdate, "[IFUPC] pc:%x pcUpdate:%d npc:%x RedValid:%d RedTarget:%x LJL:%d LJTarget:%x LJ:%d snpc:%x bpValid:%d pnpn:%x \n",pc, pcUpdate, npc, io.redirect.valid,io.redirect.target,crosslineJumpLatch,crosslineJumpTarget,crosslineJump,snpc,bp1.io.out.valid,pnpc) - - io.flushVec := Mux(io.redirect.valid, "b1111".U, 0.U) - io.bpFlush := false.B - - io.imem.req.bits.apply(addr = Cat(pc(VAddrBits-1,1),0.U(1.W)), //cache will treat it as Cat(pc(63,3),0.U(3.W)) - size = "b11".U, cmd = SimpleBusCmd.read, wdata = 0.U, wmask = 0.U, user = Cat(brIdx(3,0), npc(VAddrBits-1, 0), pc(VAddrBits-1, 0))) - io.imem.req.valid := io.out.ready - //TODO: add ctrlFlow.exceptionVec - io.imem.resp.ready := io.out.ready || io.flushVec(0) - - io.out.bits := DontCare - //inst path only uses 32bit inst, get the right inst according to pc(2) - - Debug(io.imem.req.fire(), "[IFI] pc=%x user=%x %x %x %x \n", io.imem.req.bits.addr, io.imem.req.bits.user.getOrElse(0.U), io.redirect.valid, pbrIdx, brIdx) - Debug(io.out.fire(), "[IFO] pc=%x inst=%x\n", io.out.bits.pc, io.out.bits.instr) - - // io.out.bits.instr := (if (XLEN == 64) io.imem.resp.bits.rdata.asTypeOf(Vec(2, UInt(32.W)))(io.out.bits.pc(2)) - // else io.imem.resp.bits.rdata) - io.out.bits.instr := io.imem.resp.bits.rdata - io.imem.resp.bits.user.map{ case x => - io.out.bits.pc := x(VAddrBits-1,0) - io.out.bits.pnpc := x(VAddrBits*2-1,VAddrBits) - io.out.bits.brIdx := x(VAddrBits*2 + 3, VAddrBits*2) - } - io.out.bits.exceptionVec(instrPageFault) := io.ipf - io.out.valid := io.imem.resp.valid && !io.flushVec(0) - - BoringUtils.addSource(BoolStopWatch(io.imem.req.valid, io.imem.resp.fire()), "perfCntCondMimemStall") - BoringUtils.addSource(io.flushVec.orR, "perfCntCondMifuFlush") -} \ No newline at end of file diff --git a/src/main/scala/nutcore/frontend/instr_fetch/Embedded.scala b/src/main/scala/nutcore/frontend/instr_fetch/Embedded.scala new file mode 100644 index 000000000..608049aad --- /dev/null +++ b/src/main/scala/nutcore/frontend/instr_fetch/Embedded.scala @@ -0,0 +1,63 @@ +package nutcore.frontend.instr_fetch + +import chisel3._ +import chisel3.util._ +import chisel3.util.experimental.BoringUtils + +import nutcore._ +import nutcore.frontend.instr_fetch.branch_predict._ + +import utils._ +import bus.simplebus._ +import top.Settings +import difftest._ + +class InstrFetchEmbedded extends NutCoreModule with HasResetVector { + val io = IO(new Bundle { + val imem = new SimpleBusUC(userBits = 64, addrBits = VAddrBits) + val out = Decoupled(new CtrlFlowIO) + val redirect = Flipped(new RedirectIO) + val flushVec = Output(UInt(4.W)) + val bpFlush = Output(Bool()) + val ipf = Input(Bool()) + }) + + // pc + val pc = RegInit(resetVector.U(32.W)) + val pcUpdate = io.redirect.valid || io.imem.req.fire() + val snpc = pc + 4.U // sequential next pc + + val bpu = Module(new BranchPredictEmbedded) + + // predicted next pc + val pnpc = bpu.io.out.target + val npc = Mux(io.redirect.valid, io.redirect.target, Mux(bpu.io.out.valid, pnpc, snpc)) + + bpu.io.in.pc.valid := io.imem.req.fire() // only predict when Icache accepts a request + bpu.io.in.pc.bits := npc // predict one cycle early + bpu.io.flush := io.redirect.valid + + when (pcUpdate) { pc := npc } + + io.flushVec := Mux(io.redirect.valid, "b1111".U, 0.U) + io.bpFlush := false.B + + io.imem := DontCare + io.imem.req.bits.apply(addr = pc, size = "b10".U, cmd = SimpleBusCmd.read, wdata = 0.U, wmask = 0.U, user = Cat(pc, npc)) + io.imem.req.valid := io.out.ready + io.imem.resp.ready := io.out.ready || io.flushVec(0) + + io.out.bits := DontCare + io.out.bits.instr := io.imem.resp.bits.rdata + io.imem.resp.bits.user.map{ case x => + io.out.bits.pc := x(2*VAddrBits-1, VAddrBits) + io.out.bits.pnpc := x(VAddrBits-1, 0) + } + io.out.valid := io.imem.resp.valid && !io.flushVec(0) + + Debug(io.imem.req.fire(), "[IFI] pc=%x user=%x redirect %x npc %x pc %x pnpc %x\n", io.imem.req.bits.addr, io.imem.req.bits.user.getOrElse(0.U), io.redirect.valid, npc, pc, bpu.io.out.target) + Debug(io.out.fire(), "[IFO] pc=%x user=%x inst=%x npc=%x ipf %x\n", io.out.bits.pc, io.imem.resp.bits.user.get, io.out.bits.instr, io.out.bits.pnpc, io.ipf) + + BoringUtils.addSource(BoolStopWatch(io.imem.req.valid, io.imem.resp.fire()), "perfCntCondMimemStall") + BoringUtils.addSource(io.flushVec.orR, "perfCntCondMifuFlush") +} diff --git a/src/main/scala/nutcore/frontend/instr_fetch/InstrFetchCommons.scala b/src/main/scala/nutcore/frontend/instr_fetch/InstrFetchCommons.scala new file mode 100644 index 000000000..511b30e9f --- /dev/null +++ b/src/main/scala/nutcore/frontend/instr_fetch/InstrFetchCommons.scala @@ -0,0 +1,41 @@ +/************************************************************************************** +* Copyright (c) 2020 Institute of Computing Technology, CAS +* Copyright (c) 2020 University of Chinese Academy of Sciences +* +* NutShell is licensed under Mulan PSL v2. +* You can use this software according to the terms and conditions of the Mulan PSL v2. +* You may obtain a copy of Mulan PSL v2 at: +* http://license.coscl.org.cn/MulanPSL2 +* +* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +* FIT FOR A PARTICULAR PURPOSE. +* +* See the Mulan PSL v2 for more details. +***************************************************************************************/ + +package nutcore.frontend.instr_fetch + +import chisel3._ +import chisel3.util._ +import chisel3.util.experimental.BoringUtils + +import nutcore._ +import nutcore.frontend.instr_fetch.branch_predict._ + +import utils._ +import bus.simplebus._ +import top.Settings +import difftest._ + +trait HasResetVector { + val resetVector = Settings.getLong("ResetVector") +} + +class ICacheUserBundle extends NutCoreBundle { + val pc = UInt(VAddrBits.W) + val brIdx = UInt(4.W) // mark if an inst is predicted to branch + val pnpc = UInt(VAddrBits.W) + val instValid = UInt(4.W) // mark which part of this inst line is valid +} +// Note: update ICacheUserBundleWidth when change ICacheUserBundle diff --git a/src/main/scala/nutcore/frontend/instr_fetch/Sequential.scala b/src/main/scala/nutcore/frontend/instr_fetch/Sequential.scala new file mode 100644 index 000000000..d2059bd16 --- /dev/null +++ b/src/main/scala/nutcore/frontend/instr_fetch/Sequential.scala @@ -0,0 +1,102 @@ + +package nutcore.frontend.instr_fetch + +import chisel3._ +import chisel3.util._ +import chisel3.util.experimental.BoringUtils + +import nutcore._ +import nutcore.frontend.instr_fetch.branch_predict._ + +import utils._ +import bus.simplebus._ +import top.Settings +import difftest._ + +class InstrFetchSequential extends NutCoreModule with HasResetVector { + val io = IO(new Bundle { + + val imem = new SimpleBusUC(userBits = VAddrBits*2 + 4, addrBits = VAddrBits) + val out = Decoupled(new CtrlFlowIO) + + val redirect = Flipped(new RedirectIO) + val flushVec = Output(UInt(4.W)) + val bpFlush = Output(Bool()) + val ipf = Input(Bool()) + }) + + // pc + val pc = RegInit(resetVector.U(VAddrBits.W)) + val pcUpdate = io.redirect.valid || io.imem.req.fire() + val snpc = Mux(pc(1), pc + 2.U, pc + 4.U) // sequential next pc + + val bp1 = Module(new BranchPredictSequential) + + val crosslineJump = bp1.io.crosslineJump + val crosslineJumpLatch = RegInit(false.B) + when(pcUpdate || bp1.io.flush) { + crosslineJumpLatch := Mux(bp1.io.flush, false.B, crosslineJump && !crosslineJumpLatch) + } + val crosslineJumpTarget = RegEnable(bp1.io.out.target, crosslineJump) + val crosslineJumpForceSeq = crosslineJump && bp1.io.out.valid + val crosslineJumpForceTgt = crosslineJumpLatch && !bp1.io.flush + + // predicted next pc + val pnpc = Mux(crosslineJump, snpc, bp1.io.out.target) + val pbrIdx = bp1.io.brIdx + val npc = Mux(io.redirect.valid, io.redirect.target, Mux(crosslineJumpLatch, crosslineJumpTarget, Mux(bp1.io.out.valid, pnpc, snpc))) + val npcIsSeq = Mux(io.redirect.valid , false.B, Mux(crosslineJumpLatch, false.B, Mux(crosslineJump, true.B, Mux(bp1.io.out.valid, false.B, true.B)))) + // Debug("[NPC] %x %x %x %x %x %x\n",crosslineJumpLatch, crosslineJumpTarget, crosslineJump, bp1.io.out.valid, pnpc, snpc) + + // val npc = Mux(io.redirect.valid, io.redirect.target, Mux(io.redirectRVC.valid, io.redirectRVC.target, snpc)) + val brIdx = Wire(UInt(4.W)) + // brIdx(0) -> branch at pc offset 0 (mod 4) + // brIdx(1) -> branch at pc offset 2 (mod 4) + // brIdx(2) -> branch at pc offset 6 (mod 8), and this inst is not rvc inst + brIdx := Cat(npcIsSeq, Mux(io.redirect.valid, 0.U, pbrIdx)) + //TODO: BP will be disabled shortly after a redirect request + + bp1.io.in.pc.valid := io.imem.req.fire() // only predict when Icache accepts a request + bp1.io.in.pc.bits := npc // predict one cycle early + + // Debug(bp1.io.in.pc.valid, p"pc: ${Hexadecimal(pc)} npc: ${Hexadecimal(npc)}\n") + // Debug(bp1.io.out.valid, p"valid!!\n") + + bp1.io.flush := io.redirect.valid + + when (pcUpdate) { + pc := npc + // printf("[IF1] pc=%x\n", pc) + } + + Debug(pcUpdate, "[IFUPC] pc:%x pcUpdate:%d npc:%x RedValid:%d RedTarget:%x LJL:%d LJTarget:%x LJ:%d snpc:%x bpValid:%d pnpn:%x \n",pc, pcUpdate, npc, io.redirect.valid,io.redirect.target,crosslineJumpLatch,crosslineJumpTarget,crosslineJump,snpc,bp1.io.out.valid,pnpc) + + io.flushVec := Mux(io.redirect.valid, "b1111".U, 0.U) + io.bpFlush := false.B + + io.imem.req.bits.apply(addr = Cat(pc(VAddrBits-1,1),0.U(1.W)), //cache will treat it as Cat(pc(63,3),0.U(3.W)) + size = "b11".U, cmd = SimpleBusCmd.read, wdata = 0.U, wmask = 0.U, user = Cat(brIdx(3,0), npc(VAddrBits-1, 0), pc(VAddrBits-1, 0))) + io.imem.req.valid := io.out.ready + //TODO: add ctrlFlow.exceptionVec + io.imem.resp.ready := io.out.ready || io.flushVec(0) + + io.out.bits := DontCare + //inst path only uses 32bit inst, get the right inst according to pc(2) + + Debug(io.imem.req.fire(), "[IFI] pc=%x user=%x %x %x %x \n", io.imem.req.bits.addr, io.imem.req.bits.user.getOrElse(0.U), io.redirect.valid, pbrIdx, brIdx) + Debug(io.out.fire(), "[IFO] pc=%x inst=%x\n", io.out.bits.pc, io.out.bits.instr) + + // io.out.bits.instr := (if (XLEN == 64) io.imem.resp.bits.rdata.asTypeOf(Vec(2, UInt(32.W)))(io.out.bits.pc(2)) + // else io.imem.resp.bits.rdata) + io.out.bits.instr := io.imem.resp.bits.rdata + io.imem.resp.bits.user.map{ case x => + io.out.bits.pc := x(VAddrBits-1,0) + io.out.bits.pnpc := x(VAddrBits*2-1,VAddrBits) + io.out.bits.brIdx := x(VAddrBits*2 + 3, VAddrBits*2) + } + io.out.bits.exceptionVec(instrPageFault) := io.ipf + io.out.valid := io.imem.resp.valid && !io.flushVec(0) + + BoringUtils.addSource(BoolStopWatch(io.imem.req.valid, io.imem.resp.fire()), "perfCntCondMimemStall") + BoringUtils.addSource(io.flushVec.orR, "perfCntCondMifuFlush") +} \ No newline at end of file diff --git a/src/main/scala/nutcore/frontend/instr_fetch/branch_predict/BranchPredictCommons.scala b/src/main/scala/nutcore/frontend/instr_fetch/branch_predict/BranchPredictCommons.scala new file mode 100644 index 000000000..9d5f75eb8 --- /dev/null +++ b/src/main/scala/nutcore/frontend/instr_fetch/branch_predict/BranchPredictCommons.scala @@ -0,0 +1,60 @@ +/************************************************************************************** +* Copyright (c) 2020 Institute of Computing Technology, CAS +* Copyright (c) 2020 University of Chinese Academy of Sciences +* +* NutShell is licensed under Mulan PSL v2. +* You can use this software according to the terms and conditions of the Mulan PSL v2. +* You may obtain a copy of Mulan PSL v2 at: +* http://license.coscl.org.cn/MulanPSL2 +* +* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +* FIT FOR A PARTICULAR PURPOSE. +* +* See the Mulan PSL v2 for more details. +***************************************************************************************/ + +package nutcore.frontend.instr_fetch.branch_predict + +import chisel3._ +import chisel3.util._ +import chisel3.util.experimental.BoringUtils + +import nutcore._ + +import utils._ +import top.Settings + +class TableAddr(val idxBits: Int) extends NutCoreBundle { + val padLen = if (Settings.get("IsRV32") || !Settings.get("EnableOutOfOrderExec")) 2 else 3 + def tagBits = VAddrBits - padLen - idxBits + + //val res = UInt((AddrBits - VAddrBits).W) + val tag = UInt(tagBits.W) + val idx = UInt(idxBits.W) + val pad = UInt(padLen.W) + + def fromUInt(x: UInt) = x.asTypeOf(UInt(VAddrBits.W)).asTypeOf(this) + def getTag(x: UInt) = fromUInt(x).tag + def getIdx(x: UInt) = fromUInt(x).idx +} + +object BTBtype { + def B = "b00".U // branch + def J = "b01".U // jump + def I = "b10".U // indirect + def R = "b11".U // return + + def apply() = UInt(2.W) +} + +class BPUUpdateReq extends NutCoreBundle { + val valid = Output(Bool()) + val pc = Output(UInt(VAddrBits.W)) + val isMissPredict = Output(Bool()) + val actualTarget = Output(UInt(VAddrBits.W)) + val actualTaken = Output(Bool()) // for branch + val fuOpType = Output(FuOpType()) + val btbType = Output(BTBtype()) + val isRVC = Output(Bool()) // for ras, save PC+2 to stack if is RVC +} diff --git a/src/main/scala/nutcore/frontend/instr_fetch/branch_predict/Dummy.scala b/src/main/scala/nutcore/frontend/instr_fetch/branch_predict/Dummy.scala new file mode 100644 index 000000000..b1ce00d9d --- /dev/null +++ b/src/main/scala/nutcore/frontend/instr_fetch/branch_predict/Dummy.scala @@ -0,0 +1,28 @@ +package nutcore.frontend.instr_fetch.branch_predict + +import chisel3._ +import chisel3.util._ +import chisel3.util.experimental.BoringUtils + +import nutcore._ + +import utils._ +import top.Settings + +class BranchPredictDummy extends NutCoreModule { + val io = IO(new Bundle { + val in = new Bundle { val pc = Flipped(Valid((UInt(VAddrBits.W)))) } + val out = new RedirectIO + val valid = Output(Bool()) + val flush = Input(Bool()) + val ignore = Input(Bool()) + val brIdx = Output(Vec(4, Bool())) + }) + // Note: when io.ignore, io.out.valid must be false.B for this pc + // This limitation is for cross instline inst fetch logic + io.valid := io.in.pc.valid // Predictor is returning a result + io.out.valid := false.B // Need redirect + io.out.target := DontCare // Redirect target + io.out.rtype := DontCare // Predictor does not need to care about it + io.brIdx := VecInit(Seq.fill(4)(false.B)) // Which inst triggers jump +} diff --git a/src/main/scala/nutcore/frontend/instr_fetch/branch_predict/Dynamic.scala b/src/main/scala/nutcore/frontend/instr_fetch/branch_predict/Dynamic.scala new file mode 100644 index 000000000..9c5b9e4ce --- /dev/null +++ b/src/main/scala/nutcore/frontend/instr_fetch/branch_predict/Dynamic.scala @@ -0,0 +1,143 @@ +package nutcore.frontend.instr_fetch.branch_predict + +import chisel3._ +import chisel3.util._ +import chisel3.util.experimental.BoringUtils + +import nutcore._ + +import utils._ +import top.Settings + +// nextline predictor generates NPC from current NPC in 1 cycle +class BranchPredictDynamic extends NutCoreModule { + val io = IO(new Bundle { + val in = new Bundle { val pc = Flipped(Valid((UInt(VAddrBits.W)))) } + val out = new RedirectIO + val flush = Input(Bool()) + val brIdx = Output(Vec(4, Bool())) + // val target = Output(Vec(4, UInt(VAddrBits.W))) + // val instValid = Output(UInt(4.W)) // now instValid is generated in IFU + val crosslineJump = Output(Bool()) + }) + + val flush = BoolStopWatch(io.flush, io.in.pc.valid, startHighPriority = true) + + // BTB + val NRbtb = 512 + val btbAddr = new TableAddr(log2Up(NRbtb >> 2)) + def btbEntry() = new Bundle { + val tag = UInt(btbAddr.tagBits.W) + val _type = UInt(2.W) + val target = UInt(VAddrBits.W) + val crosslineJump = Bool() + val valid = Bool() + } + + val btb = List.fill(4)(Module(new SRAMTemplate(btbEntry(), set = NRbtb >> 2, shouldReset = true, holdRead = true, singlePort = true))) + // flush BTB when executing fence.i + val flushBTB = WireInit(false.B) + val flushTLB = WireInit(false.B) + BoringUtils.addSink(flushBTB, "MOUFlushICache") + BoringUtils.addSink(flushTLB, "MOUFlushTLB") + (0 to 3).map(i => (btb(i).reset := reset.asBool || (flushBTB || flushTLB))) + + Debug(reset.asBool || (flushBTB || flushTLB), "[BPU-RESET] bpu-reset flushBTB:%d flushTLB:%d\n", flushBTB, flushTLB) + + (0 to 3).map(i => (btb(i).io.r.req.valid := io.in.pc.valid)) + (0 to 3).map(i => (btb(i).io.r.req.bits.setIdx := btbAddr.getIdx(io.in.pc.bits))) + + + val btbRead = Wire(Vec(4, btbEntry())) + (0 to 3).map(i => (btbRead(i) := btb(i).io.r.resp.data(0))) + // since there is one cycle latency to read SyncReadMem, + // we should latch the input pc for one cycle + val pcLatch = RegEnable(io.in.pc.bits, io.in.pc.valid) + val btbHit = Wire(Vec(4, Bool())) + (0 to 3).map(i => btbHit(i) := btbRead(i).valid && btbRead(i).tag === btbAddr.getTag(pcLatch) && !flush && RegNext(btb(i).io.r.req.fire(), init = false.B)) + // btbHit will ignore pc(2,0). pc(2,0) is used to build brIdx + val crosslineJump = btbRead(3).crosslineJump && btbHit(3) && !io.brIdx(0) && !io.brIdx(1) && !io.brIdx(2) + io.crosslineJump := crosslineJump + // val crosslineJumpLatch = RegNext(crosslineJump) + // val crosslineJumpTarget = RegEnable(btbRead.target, crosslineJump) + + // PHT + val pht = List.fill(4)(Mem(NRbtb >> 2, UInt(2.W))) + val phtTaken = Wire(Vec(4, Bool())) + (0 to 3).map(i => (phtTaken(i) := RegEnable(pht(i).read(btbAddr.getIdx(io.in.pc.bits))(1), io.in.pc.valid))) + + // RAS + val NRras = 16 + val ras = Mem(NRras, UInt(VAddrBits.W)) + val sp = Counter(NRras) + val rasTarget = RegEnable(ras.read(sp.value), io.in.pc.valid) + + // update + val req = WireInit(0.U.asTypeOf(new BPUUpdateReq)) + val btbWrite = WireInit(0.U.asTypeOf(btbEntry())) + BoringUtils.addSink(req, "bpuUpdateReq") + + btbWrite.tag := btbAddr.getTag(req.pc) + btbWrite.target := req.actualTarget + btbWrite._type := req.btbType + btbWrite.crosslineJump := req.pc(2,1)==="h3".U && !req.isRVC // ((pc_offset % 8) == 6) && inst is 32bit in length + btbWrite.valid := true.B + // NOTE: We only update BTB at a miss prediction. + // If a miss prediction is found, the pipeline will be flushed + // in the next cycle. Therefore it is safe to use single-port + // SRAM to implement BTB, since write requests have higher priority + // than read request. Again, since the pipeline will be flushed + // in the next cycle, the read request will be useless. + (0 to 3).map(i => btb(i).io.w.req.valid := req.isMissPredict && req.valid && i.U === req.pc(2,1)) + (0 to 3).map(i => btb(i).io.w.req.bits.setIdx := btbAddr.getIdx(req.pc)) + (0 to 3).map(i => btb(i).io.w.req.bits.data := btbWrite) + + val getpht = LookupTree(req.pc(2,1), List.tabulate(4)(i => (i.U -> pht(i).read(btbAddr.getIdx(req.pc))))) + val cnt = RegNext(getpht) + val reqLatch = RegNext(req) + when (reqLatch.valid && ALUOpType.isBranch(reqLatch.fuOpType)) { + val taken = reqLatch.actualTaken + val newCnt = Mux(taken, cnt + 1.U, cnt - 1.U) + val wen = (taken && (cnt =/= "b11".U)) || (!taken && (cnt =/= "b00".U)) + when (wen) { + (0 to 3).map(i => when(i.U === reqLatch.pc(2,1)){pht(i).write(btbAddr.getIdx(reqLatch.pc), newCnt)}) + } + } + when (req.valid) { + when (req.fuOpType === ALUOpType.call) { + ras.write(sp.value + 1.U, Mux(req.isRVC, req.pc + 2.U, req.pc + 4.U)) + sp.value := sp.value + 1.U + } + .elsewhen (req.fuOpType === ALUOpType.ret) { + when(sp.value === 0.U) { + // RAS empty, do nothing + } + sp.value := Mux(sp.value===0.U, 0.U, sp.value - 1.U) + } + } + + def genInstValid(pc: UInt) = LookupTree(pc(2,1), List( + "b00".U -> "b1111".U, + "b01".U -> "b1110".U, + "b10".U -> "b1100".U, + "b11".U -> "b1000".U + )) + + val pcLatchValid = genInstValid(pcLatch) + + val target = Wire(Vec(4, UInt(VAddrBits.W))) + (0 to 3).map(i => target(i) := Mux(btbRead(i)._type === BTBtype.R, rasTarget, btbRead(i).target)) + (0 to 3).map(i => io.brIdx(i) := btbHit(i) && pcLatchValid(i).asBool && Mux(btbRead(i)._type === BTBtype.B, phtTaken(i), true.B) && btbRead(i).valid) + io.out.target := PriorityMux(io.brIdx, target) + io.out.valid := io.brIdx.asUInt.orR + io.out.rtype := 0.U + Debug(io.out.valid, "[BPU] pc %x io.brIdx.asUInt %b phtTaken %x %x %x %x valid %x %x %x %x\n", pcLatch, io.brIdx.asUInt, phtTaken(0), phtTaken(1), phtTaken(2), phtTaken(3), btbRead(0).valid, btbRead(1).valid, btbRead(2).valid, btbRead(3).valid) + + // io.out.valid := btbHit && Mux(btbRead._type === BTBtype.B, phtTaken, true.B) && !crosslineJump || crosslineJumpLatch && !flush && !crosslineJump + // Note: + // btbHit && Mux(btbRead._type === BTBtype.B, phtTaken, true.B) && !crosslineJump : normal branch predict + // crosslineJumpLatch && !flush && !crosslineJump : cross line branch predict, bpu will require imem to fetch the next 16bit of current inst in next instline + // `&& !crosslineJump` is used to make sure this logic will run correctly when imem stalls (pcUpdate === false) + // by using `instline`, we mean a 64 bit instfetch result from imem + // ROCKET uses a 32 bit instline, and its IDU logic is more simple than this implentation. +} diff --git a/src/main/scala/nutcore/frontend/instr_fetch/branch_predict/Embedded.scala b/src/main/scala/nutcore/frontend/instr_fetch/branch_predict/Embedded.scala new file mode 100644 index 000000000..55332837d --- /dev/null +++ b/src/main/scala/nutcore/frontend/instr_fetch/branch_predict/Embedded.scala @@ -0,0 +1,97 @@ +package nutcore.frontend.instr_fetch.branch_predict + +import chisel3._ +import chisel3.util._ +import chisel3.util.experimental.BoringUtils + +import nutcore._ + +import utils._ +import top.Settings + +class BranchPredictEmbedded extends NutCoreModule { + val io = IO(new Bundle { + val in = new Bundle { val pc = Flipped(Valid((UInt(32.W)))) } + val out = new RedirectIO + val flush = Input(Bool()) + }) + + val flush = BoolStopWatch(io.flush, io.in.pc.valid, startHighPriority = true) + + // BTB + val NRbtb = 512 + val btbAddr = new TableAddr(log2Up(NRbtb)) + def btbEntry() = new Bundle { + val tag = UInt(btbAddr.tagBits.W) + val _type = UInt(2.W) + val target = UInt(32.W) + } + + val btb = Module(new SRAMTemplate(btbEntry(), set = NRbtb, shouldReset = true, holdRead = true, singlePort = true)) + btb.io.r.req.valid := io.in.pc.valid + btb.io.r.req.bits.setIdx := btbAddr.getIdx(io.in.pc.bits) + + val btbRead = Wire(btbEntry()) + btbRead := btb.io.r.resp.data(0) + // since there is one cycle latency to read SyncReadMem, + // we should latch the input pc for one cycle + val pcLatch = RegEnable(io.in.pc.bits, io.in.pc.valid) + val btbHit = btbRead.tag === btbAddr.getTag(pcLatch) && !flush && RegNext(btb.io.r.req.ready, init = false.B) + + // PHT + val pht = Mem(NRbtb, UInt(2.W)) + val phtTaken = RegEnable(pht.read(btbAddr.getIdx(io.in.pc.bits))(1), io.in.pc.valid) + + // RAS + val NRras = 16 + val ras = Mem(NRras, UInt(32.W)) + val sp = Counter(NRras) + val rasTarget = RegEnable(ras.read(sp.value), io.in.pc.valid) + + // update + val req = WireInit(0.U.asTypeOf(new BPUUpdateReq)) + val btbWrite = WireInit(0.U.asTypeOf(btbEntry())) + BoringUtils.addSink(req, "bpuUpdateReq") + + btbWrite.tag := btbAddr.getTag(req.pc) + btbWrite.target := req.actualTarget + btbWrite._type := req.btbType + // NOTE: We only update BTB at a miss prediction. + // If a miss prediction is found, the pipeline will be flushed + // in the next cycle. Therefore it is safe to use single-port + // SRAM to implement BTB, since write requests have higher priority + // than read request. Again, since the pipeline will be flushed + // in the next cycle, the read request will be useless. + btb.io.w.req.valid := req.isMissPredict && req.valid + btb.io.w.req.bits.setIdx := btbAddr.getIdx(req.pc) + btb.io.w.req.bits.data := btbWrite + + val cnt = RegNext(pht.read(btbAddr.getIdx(req.pc))) + val reqLatch = RegNext(req) + when (reqLatch.valid && ALUOpType.isBranch(reqLatch.fuOpType)) { + val taken = reqLatch.actualTaken + val newCnt = Mux(taken, cnt + 1.U, cnt - 1.U) + val wen = (taken && (cnt =/= "b11".U)) || (!taken && (cnt =/= "b00".U)) + when (wen) { + pht.write(btbAddr.getIdx(reqLatch.pc), newCnt) + } + } + when (req.valid) { + when (req.fuOpType === ALUOpType.call) { + ras.write(sp.value + 1.U, req.pc + 4.U) + sp.value := sp.value + 1.U + } + .elsewhen (req.fuOpType === ALUOpType.ret) { + sp.value := sp.value - 1.U + } + } + + val flushBTB = WireInit(false.B) + val flushTLB = WireInit(false.B) + BoringUtils.addSink(flushBTB, "MOUFlushICache") + BoringUtils.addSink(flushTLB, "MOUFlushTLB") + + io.out.target := Mux(btbRead._type === BTBtype.R, rasTarget, btbRead.target) + io.out.valid := btbHit && Mux(btbRead._type === BTBtype.B, phtTaken, true.B) + io.out.rtype := 0.U +} diff --git a/src/main/scala/nutcore/frontend/instr_fetch/branch_predict/Legacy.scala b/src/main/scala/nutcore/frontend/instr_fetch/branch_predict/Legacy.scala new file mode 100644 index 000000000..ddc41a6f1 --- /dev/null +++ b/src/main/scala/nutcore/frontend/instr_fetch/branch_predict/Legacy.scala @@ -0,0 +1,39 @@ +// package nutcore.frontend.instr_fetch.branch_predict + +// import chisel3._ +// import chisel3.util._ +// import chisel3.util.experimental.BoringUtils + +// import nutcore._ + +// import utils._ +// import top.Settings + +//---- Legacy BPUs ---- +/* +class BranchPredictLegacy extends NutCoreModule { + val io = IO(new Bundle { + val in = Flipped(Valid(new CtrlFlowIO)) + val out = new RedirectIO + }) + + val instr = io.in.bits.instr + val immJ = SignExt(Cat(instr(31), instr(19, 12), instr(20), instr(30, 21), 0.U(1.W)), XLEN) + val immB = SignExt(Cat(instr(31), instr(7), instr(30, 25), instr(11, 8), 0.U(1.W)), XLEN) + val table = Array( + RV32I_BRUInstr.JAL -> List(immJ, true.B), + RV32I_BRUInstr.BNE -> List(immB, instr(31)), + RV32I_BRUInstr.BEQ -> List(immB, instr(31)), + RV32I_BRUInstr.BLT -> List(immB, instr(31)), + RV32I_BRUInstr.BGE -> List(immB, instr(31)), + RV32I_BRUInstr.BLTU -> List(immB, instr(31)), + RV32I_BRUInstr.BGEU -> List(immB, instr(31)) + ) + val default = List(immB, false.B) + val offset :: predict :: Nil = ListLookup(instr, default, table) + + io.out.target := io.in.bits.pc + offset + io.out.valid := io.in.valid && predict(0) + io.out.rtype := 0.U +} +*/ diff --git a/src/main/scala/nutcore/frontend/instr_fetch/branch_predict/Sequential.scala b/src/main/scala/nutcore/frontend/instr_fetch/branch_predict/Sequential.scala new file mode 100644 index 000000000..314d83976 --- /dev/null +++ b/src/main/scala/nutcore/frontend/instr_fetch/branch_predict/Sequential.scala @@ -0,0 +1,162 @@ +package nutcore.frontend.instr_fetch.branch_predict + +import chisel3._ +import chisel3.util._ +import chisel3.util.experimental.BoringUtils + +import nutcore._ + +import utils._ +import top.Settings + +class BranchPredictSequential extends NutCoreModule { + val io = IO(new Bundle { + val in = new Bundle { val pc = Flipped(Valid((UInt(VAddrBits.W)))) } + val out = new RedirectIO + val flush = Input(Bool()) + val brIdx = Output(UInt(3.W)) + val crosslineJump = Output(Bool()) + }) + + val flush = BoolStopWatch(io.flush, io.in.pc.valid, startHighPriority = true) + + // BTB + val NRbtb = 512 + val btbAddr = new TableAddr(log2Up(NRbtb)) + def btbEntry() = new Bundle { + val tag = UInt(btbAddr.tagBits.W) + val _type = UInt(2.W) + val target = UInt(VAddrBits.W) + val brIdx = UInt(3.W) + val valid = Bool() + } + + val btb = Module(new SRAMTemplate(btbEntry(), set = NRbtb, shouldReset = true, holdRead = true, singlePort = true)) + // flush BTB when executing fence.i + val flushBTB = WireInit(false.B) + val flushTLB = WireInit(false.B) + BoringUtils.addSink(flushBTB, "MOUFlushICache") + BoringUtils.addSink(flushTLB, "MOUFlushTLB") + btb.reset := reset.asBool || (flushBTB || flushTLB) + Debug(reset.asBool || (flushBTB || flushTLB), "[BPU-RESET] bpu-reset flushBTB:%d flushTLB:%d\n", flushBTB, flushTLB) + + btb.io.r.req.valid := io.in.pc.valid + btb.io.r.req.bits.setIdx := btbAddr.getIdx(io.in.pc.bits) + + + val btbRead = Wire(btbEntry()) + btbRead := btb.io.r.resp.data(0) + // since there is one cycle latency to read SyncReadMem, + // we should latch the input pc for one cycle + val pcLatch = RegEnable(io.in.pc.bits, io.in.pc.valid) + val btbHit = btbRead.valid && btbRead.tag === btbAddr.getTag(pcLatch) && !flush && RegNext(btb.io.r.req.fire(), init = false.B) && !(pcLatch(1) && btbRead.brIdx(0)) + // btbHit will ignore pc(1,0). pc(1,0) is used to build brIdx + // !(pcLatch(1) && btbRead.brIdx(0)) is used to deal with the following case: + // ------------------------------------------------- + // 0 jump rvc // marked as "take branch" in BTB + // 2 xxx rvc <-- pc // misrecognize this instr as "btb hit" with target of previous jump instr + // ------------------------------------------------- + val crosslineJump = btbRead.brIdx(2) && btbHit + io.crosslineJump := crosslineJump + // val crosslineJumpLatch = RegNext(crosslineJump) + // val crosslineJumpTarget = RegEnable(btbRead.target, crosslineJump) + Debug(btbHit, "[BTBHT1] %d pc=%x tag=%x,%x index=%x bridx=%x tgt=%x,%x flush %x type:%x\n", GTimer(), pcLatch, btbRead.tag, btbAddr.getTag(pcLatch), btbAddr.getIdx(pcLatch), btbRead.brIdx, btbRead.target, io.out.target, flush,btbRead._type) + Debug(btbHit, "[BTBHT2] btbRead.brIdx %x mask %x\n", btbRead.brIdx, Cat(crosslineJump, Fill(2, io.out.valid))) + // Debug(btbHit, "[BTBHT5] btbReqValid:%d btbReqSetIdx:%x\n",btb.io.r.req.valid, btb.io.r.req.bits.setId) + + // PHT + val pht = Mem(NRbtb, UInt(2.W)) + val phtTaken = RegEnable(pht.read(btbAddr.getIdx(io.in.pc.bits))(1), io.in.pc.valid) + + // RAS + + val NRras = 16 + val ras = Mem(NRras, UInt(VAddrBits.W)) + // val raBrIdxs = Mem(NRras, UInt(2.W)) + val sp = Counter(NRras) + val rasTarget = RegEnable(ras.read(sp.value), io.in.pc.valid) + // val rasBrIdx = RegEnable(raBrIdxs.read(sp.value), io.in.pc.valid) + + // update + val req = WireInit(0.U.asTypeOf(new BPUUpdateReq)) + val btbWrite = WireInit(0.U.asTypeOf(btbEntry())) + BoringUtils.addSink(req, "bpuUpdateReq") + + Debug(req.valid, "[BTBUP] pc=%x tag=%x index=%x bridx=%x tgt=%x type=%x\n", req.pc, btbAddr.getTag(req.pc), btbAddr.getIdx(req.pc), Cat(req.pc(1), ~req.pc(1)), req.actualTarget, req.btbType) + + //val fflag = req.btbType===3.U && btb.io.w.req.valid && btb.io.w.req.bits.setIdx==="hc9".U + //when(fflag && GTimer()>2888000.U) { + // Debug("%d\n", GTimer()) + // Debug("[BTBHT6] btbWrite.type is BTBtype.R/RET!!! Inpc:%x btbWrite.brIdx:%x setIdx:%x\n", io.in.pc.bits, btbWrite.brIdx, btb.io.w.req.bits.setIdx) + // Debug("[BTBHT6] tag:%x target:%x _type:%x bridx:%x\n", btbWrite.tag,btbWrite.target,btbWrite._type,btbWrite.brIdx) + // Debug(p"[BTBHT6] req:${req} \n") + //} + //Debug("[BTBHT5] tag: target:%x type:%d brIdx:%d\n", req.actualTarget, req.btbType, Cat(req.pc(2,0)==="h6".U && !req.isRVC, req.pc(1), ~req.pc(1))) + + btbWrite.tag := btbAddr.getTag(req.pc) + btbWrite.target := req.actualTarget + btbWrite._type := req.btbType + btbWrite.brIdx := Cat(req.pc(2,0)==="h6".U && !req.isRVC, req.pc(1), ~req.pc(1)) + btbWrite.valid := true.B + // NOTE: We only update BTB at a miss prediction. + // If a miss prediction is found, the pipeline will be flushed + // in the next cycle. Therefore it is safe to use single-port + // SRAM to implement BTB, since write requests have higher priority + // than read request. Again, since the pipeline will be flushed + // in the next cycle, the read request will be useless. + btb.io.w.req.valid := req.isMissPredict && req.valid + btb.io.w.req.bits.setIdx := btbAddr.getIdx(req.pc) + btb.io.w.req.bits.data := btbWrite + + //Debug(true) { + //when (btb.io.w.req.valid && btbWrite.tag === btbAddr.getTag("hffffffff803541a4".U)) { + // Debug("[BTBWrite] %d setIdx:%x req.valid:%d pc:%x target:%x bridx:%x\n", GTimer(), btbAddr.getIdx(req.pc), req.valid, req.pc, req.actualTarget, btbWrite.brIdx) + //} + //} + + //when (GTimer() > 77437484.U && btb.io.w.req.valid) { + // Debug("[BTBWrite-ALL] %d setIdx:%x req.valid:%d pc:%x target:%x bridx:%x\n", GTimer(), btbAddr.getIdx(req.pc), req.valid, req.pc, req.actualTarget, btbWrite.brIdx) + //} + + val cnt = RegNext(pht.read(btbAddr.getIdx(req.pc))) + val reqLatch = RegNext(req) + when (reqLatch.valid && ALUOpType.isBranch(reqLatch.fuOpType)) { + val taken = reqLatch.actualTaken + val newCnt = Mux(taken, cnt + 1.U, cnt - 1.U) + val wen = (taken && (cnt =/= "b11".U)) || (!taken && (cnt =/= "b00".U)) + when (wen) { + pht.write(btbAddr.getIdx(reqLatch.pc), newCnt) + //Debug(){ + //Debug("BPUPDATE: pc %x cnt %x\n", reqLatch.pc, newCnt) + //} + } + } + when (req.valid) { + when (req.fuOpType === ALUOpType.call) { + ras.write(sp.value + 1.U, Mux(req.isRVC, req.pc + 2.U, req.pc + 4.U)) + // raBrIdxs.write(sp.value + 1.U, Mux(req.pc(1), 2.U, 1.U)) + sp.value := sp.value + 1.U + } + .elsewhen (req.fuOpType === ALUOpType.ret) { + when(sp.value === 0.U) { + //Debug("ATTTTT: sp.value is 0.U\n") //TODO: sp.value may equal to 0.U + } + sp.value := Mux(sp.value===0.U, 0.U, sp.value - 1.U) //TODO: sp.value may less than 0.U + } + } + + io.out.target := Mux(btbRead._type === BTBtype.R, rasTarget, btbRead.target) + // io.out.target := Mux(crosslineJumpLatch && !flush, crosslineJumpTarget, Mux(btbRead._type === BTBtype.R, rasTarget, btbRead.target)) + // io.out.brIdx := btbRead.brIdx & Fill(3, io.out.valid) + io.brIdx := btbRead.brIdx & Cat(true.B, crosslineJump, Fill(2, io.out.valid)) + io.out.valid := btbHit && Mux(btbRead._type === BTBtype.B, phtTaken, true.B && rasTarget=/=0.U) //TODO: add rasTarget=/=0.U, need fix + io.out.rtype := 0.U + // io.out.valid := btbHit && Mux(btbRead._type === BTBtype.B, phtTaken, true.B) && !crosslineJump || crosslineJumpLatch && !flush && !crosslineJump + // Note: + // btbHit && Mux(btbRead._type === BTBtype.B, phtTaken, true.B) && !crosslineJump : normal branch predict + // crosslineJumpLatch && !flush && !crosslineJump : cross line branch predict, bpu will require imem to fetch the next 16bit of current inst in next instline + // `&& !crosslineJump` is used to make sure this logic will run correctly when imem stalls (pcUpdate === false) + // by using `instline`, we mean a 64 bit instfetch result from imem + // ROCKET uses a 32 bit instline, and its IDU logic is more simple than this implentation. +} + diff --git a/src/main/scala/nutcore/isa/Priviledged.scala b/src/main/scala/nutcore/isa/Priviledged.scala index af483ee18..6c0067592 100644 --- a/src/main/scala/nutcore/isa/Priviledged.scala +++ b/src/main/scala/nutcore/isa/Priviledged.scala @@ -22,17 +22,17 @@ import chisel3.util._ import top.Settings object Priviledged extends HasInstrType { - def ECALL = BitPat("b000000000000_00000_000_00000_1110011") - def EBREAK = BitPat("b000000000001_00000_000_00000_1110011") - def MRET = BitPat("b001100000010_00000_000_00000_1110011") - def SRET = BitPat("b000100000010_00000_000_00000_1110011") - def SFANCE_VMA = BitPat("b0001001_?????_?????_000_00000_1110011") - def FENCE = BitPat("b????????????_?????_000_?????_0001111") - def WFI = BitPat("b0001000_00101_00000_000_00000_1110011") + def ECALL = BitPat("b000000000000_00000_000_00000_1110011") + def EBREAK = BitPat("b000000000001_00000_000_00000_1110011") + def MRET = BitPat("b001100000010_00000_000_00000_1110011") + def SRET = BitPat("b000100000010_00000_000_00000_1110011") + def SFENCE_VMA = BitPat("b0001001_?????_?????_000_00000_1110011") + def FENCE = BitPat("b????????????_?????_000_?????_0001111") + def WFI = BitPat("b0001000_00101_00000_000_00000_1110011") val table_s = Array( SRET -> List(InstrI, FuType.csr, CSROpType.jmp), - SFANCE_VMA -> List(InstrR, FuType.mou, MOUOpType.sfence_vma) + SFENCE_VMA -> List(InstrR, FuType.mou, MOUOpType.sfence_vma) ) val table = Array( diff --git a/src/main/scala/nutcore/isa/RVI.scala b/src/main/scala/nutcore/isa/RVI.scala index bdd214b64..e9d748cc7 100644 --- a/src/main/scala/nutcore/isa/RVI.scala +++ b/src/main/scala/nutcore/isa/RVI.scala @@ -19,6 +19,8 @@ package nutcore import chisel3._ import chisel3.util._ +import nutcore.frontend.instr_fetch.branch_predict._ + object RV32I_ALUInstr extends HasInstrType with HasNutCoreParameter { def ADDI = BitPat("b????????????_?????_000_?????_0010011") def SLLI = if (XLEN == 32) BitPat("b0000000?????_?????_001_?????_0010011") diff --git a/src/main/scala/nutcore/mem/Cache.scala b/src/main/scala/nutcore/mem/cache/Cache.scala similarity index 100% rename from src/main/scala/nutcore/mem/Cache.scala rename to src/main/scala/nutcore/mem/cache/Cache.scala diff --git a/src/main/scala/nutcore/utils/WritebackDelayer.scala b/src/main/scala/nutcore/utils/WritebackDelayer.scala index 1d1c3f3a3..fe7b78310 100644 --- a/src/main/scala/nutcore/utils/WritebackDelayer.scala +++ b/src/main/scala/nutcore/utils/WritebackDelayer.scala @@ -19,6 +19,8 @@ package nutcore import chisel3._ import chisel3.util._ +import nutcore.backend._ + import utils._ class WritebackDelayer(bru: Boolean = false, name: String = "unnamedDelayer") extends NutCoreModule with HasRSConst with HasBackendConst { From dd0ad14fe6e29eb17563ddf54d0854b3ceb21424 Mon Sep 17 00:00:00 2001 From: marvintau Date: Mon, 24 Oct 2022 12:53:30 +0800 Subject: [PATCH 2/3] done separating modules into files --- .gitignore | 1 + .metals/metals.lock.db | 6 +- .metals/metals.mv.db | Bin 57344 -> 53248 bytes src/main/scala/bus/simplebus/SimpleBus.scala | 2 +- src/main/scala/nutcore/NutCore.scala | 3 + .../scala/nutcore/backend/dynamic/ROB.scala | 2 +- src/main/scala/nutcore/mem/cache/Cache.scala | 372 +----------------- .../scala/nutcore/mem/cache/CacheCheck.scala | 89 +++++ .../nutcore/mem/cache/CacheMetaRead.scala | 54 +++ .../nutcore/mem/cache/CacheWriteBack.scala | 258 ++++++++++++ src/main/scala/nutcore/mem/tlb/Embedded.scala | 211 ++++++++++ .../EmbeddedExec.scala} | 201 +--------- .../scala/nutcore/mem/{ => tlb}/TLB.scala | 253 +----------- src/main/scala/nutcore/mem/tlb/TLBExec.scala | 254 ++++++++++++ src/main/scala/system/NutShell.scala | 2 + src/main/scala/system/Prefetcher.scala | 4 +- src/main/scala/utils/Debug.scala | 2 +- 17 files changed, 902 insertions(+), 812 deletions(-) create mode 100644 src/main/scala/nutcore/mem/cache/CacheCheck.scala create mode 100644 src/main/scala/nutcore/mem/cache/CacheMetaRead.scala create mode 100644 src/main/scala/nutcore/mem/cache/CacheWriteBack.scala create mode 100644 src/main/scala/nutcore/mem/tlb/Embedded.scala rename src/main/scala/nutcore/mem/{EmbeddedTLB.scala => tlb/EmbeddedExec.scala} (59%) rename src/main/scala/nutcore/mem/{ => tlb}/TLB.scala (50%) create mode 100644 src/main/scala/nutcore/mem/tlb/TLBExec.scala diff --git a/.gitignore b/.gitignore index 2d6a863fe..ab8faab70 100644 --- a/.gitignore +++ b/.gitignore @@ -350,3 +350,4 @@ fpga/.Xil/ # vscode temp file .vscode/ .bloop +.metals/ \ No newline at end of file diff --git a/.metals/metals.lock.db b/.metals/metals.lock.db index 2ded17e26..a8e3d074b 100644 --- a/.metals/metals.lock.db +++ b/.metals/metals.lock.db @@ -1,6 +1,6 @@ #FileLock -#Fri Oct 21 14:12:57 CST 2022 -server=localhost\:40269 +#Mon Oct 24 00:43:59 CST 2022 +server=localhost\:36367 hostName=localhost method=file -id=183f92c5bf49f17491becab8aaffdbd093a600b4d36 +id=18405baced7c341f59eba69cde4d0c59f48f797cf8c diff --git a/.metals/metals.mv.db b/.metals/metals.mv.db index 585dd68914f79ff802bc1b2c5da827d0a257fae9..dc14f78419341014a9cb59cb871a85a0d729519a 100644 GIT binary patch delta 4813 zcmeHLU2GIZ9G~60-W_+q#k+olmbQ1-QUcwUot@piy(8KZ6O{-Gq)`JAvY%2ZZJ{42 zrqDA+K_3J=!KjIW8b3*lt%px0riSoQUMPtV#0QlJAAHdliBF2N-`fuhl?RDoUuJf0 zX7)e-`Tc(Ln=4I+N@qi-vNS|*pkCCFSde&*8P*RE zboY@lh#Blti;AY;sZCFAi~|62padw20w6pM0Dr?OsmrPQ#tJfhy8%FCYvgqbzclh% zOpBOJoL-I{xfP-s@n56$A;>l2`lIzUD5ODivrUxzn~8kMdol{ z-(Zn9m&@{kF7W<(1zll=dWQydZOh;@?z#ghV67BDURIbsRUPV=d(|UFq`vD$0G$YA zre}<#nIyY=Sl>=^3?-elUUpb9AUpp~h${Kbj*WL>vw{=y2nJ#kS-3DZ=x-0SV=E}o zimlcTj+M^2!EsJ-D*lztIq4BuCp(%G0dyjetn_fIAo|%6S>B1j1IJRyA~+&=5l|tJ zD;82PVM5JB8>jLp!P{%)HC0a7dqUa;$!Rz1y3-Cdoh5arzsR!$ft>&_kJ_b)Kk`oS zM@oX8u3b~|tD*5sWPJeq0WkZF;vkeXSs<`}3YRire7hqm`(UyN!_C>SWn;?wtPdda zwXH3LM3Ku81JJb~h6=ohpsvS|kT~$W$nltBvr#2%Oou50 zuRYp885RlYRdRXySS!6sp*fWd=TdtrKmu>e)<$g_8YbuHPpR?#YCM^Z;j5R@IN$bg ztp$n2_+#Lm{b~IDOoB!RKGT-N&*nzyPpI+PYJ7{W!*#QH+|d+SYW$9-hLG_NHJ+(n z;qpnv6EKbIGUC{L1R5SA3=4lqnS(#h#Gj3AB&sNZEZJi@lIJ<_Lry29Fc*GyJ5fo= zgkK4?0)`}-o(9wpt4TvApv4PzX>I^{CmTRYvKM>1rsR*}79KsJ@?s@$^YAF?MqZL- z6vX2`q6D^p3kwBYa7ipWB<5@qlXmkFw1AArP!RmD5_usM;7^rR{27k~j$VL;KL zFz&p#2Ulm?@Rp0~#2fE*ls=pwQcoa3@1j6rDOq;)8QsVFjGxFgxo5;Z2r%-KIZuEc z{Z$w0ud{!aUd=Z-5{9xH%dy1!Mnc3g)wp%;9OA=M#duFFCqT z@My`tgH2Lb=kU&!cSrUa3%`r#ez-(*e;WZKd7iJ-^pSSNVfbh*9v(gZz+B8ZtL_2Y z56s1#%|%N zt)EJk%YX&|w!z^WE92=BI#s|)1nlqV>zrK`nXVPUBkLH*<%TO(9`dPelGL6~x zm1)BbOLu01tZDA!vKgN0xCMndhpO2*FF(qAm-~b6Kuwzgv^v>JG46 zKGjLoelKwz08~6i>fsWo*nSn5PsKoG`TERP73%|N4j|I=R0YWMlA>`zPqi0%H=>}Y zvKg@Ki;?$L_u4}!|8#BmM7R}zRX(&`YXt8@jNgyf*fs7o)txy&p4FNl|#Lp+mV4F2iunsV3a4e>VTZx)|50nGbM;{;7oHSab} G#s3Dvv$t&k literal 57344 zcmeG_TW}=TRXwB8T9J00cz3;7JAU-euI&|`h<-oDsj#D&)@owCJTsDaA!O*+ti@_2 zA!(geiYgYIfD@;nDkiDeak)$&=7WObQ9KIxz!avaN>z$-1p+@%KtAAuDk@(Te1y~Y zG5wfssil=zd41<&ZuiXXd(S=R_PytI&zY`D6=!A5y-v&a^5^`jC<}x8Ru<{f*7#wNIN+U>IN+U>IN+ zU>IN+U>IN+U>IN+U>G<^3{WFK^#NG%SF2*>hP_^uY$$G3Wml-I`nR^Ka%J7V=5K(x`aE5GuCSI*|H{Lc6P;4HvY zd~v8+;6}Oo5H=@VQhKEH@4Mk|*%Qm&u>f-u;(fS~@GU7+eCOc8{YC*(#wFevdL^KY zR9RVm(Vqpfn1+C^7YI!d#SBC-(QpXKOhPh^kPJ~wksVLzlS~%H%FWfAo4!}uS|Yw2 zUZ03=B_N`xxN>F9b#JcQtL`>X{tW`0M}Tt@z=?*G4V`Zj=sW_QLZDO7e9|;e3!#&g z0qFcbbRLGzmciM@OB$c@BP@0OB>;87ta12uvOn6J}m8$RRp5xNzwrpp` zpOEzQ=X$oJN`iu(Q+?aDMbnP;XQV#|{TZeJZbAe7VK@QZ*u5wLr>7aV(!>45;Uv#V z>Ca{NhsP(&>CeT+X9%(*_rzlepWWy%|1w(jWo zF-(0efz1Boc0JKIV^oOtmTPW1l3^GlVW%3@ksAl^v1w91odO68m4~#8YqXR)I@Sh;YZ1o!u{bJ zk-$Rk$Qa0-601o@ME)Zq!gmtE61mf(-Nl-e<>+7fhtVLm`jhj1bl-oQ;6`?3Xb-5@iMNun? zMp1MMPEqU?{DNMT%%Ws--0(BUw}-gTV%J>$!=#R z_TuPXC*ZwK!fXOCaB-GSL@b@Es3zeLR9({GjmP{!z~IR#`?1^Kk9E8sn}QbI^W@(o z5E=wRU}9pdObUbwhESu7j1p&LB(zm3qAe0YO<&SACkki^TjTTrH3(ayNfAKh-rEFF zlK`qD7>5)Mjl>~g75TbvLER=Ho6$tN*9u))i_D6A^c*PAG2FpMAz;#Ja! zuad#p4fx2WNE9=UqY)rOuz+(X!anSvfu2T*EsE0D30Nut%Si0mG5J0QmLyU6p03_1 z-eg3yI%vLUQn*BsVlM?eXF78xfkFavQ({j7g#>|IvR!2?fD9CTzz)(nfkHw< zzXO_NR}sfB99U67+A&SnAEU(91W;6Pw_$1j8m!}g!4+Qn4&1}i{sX+!7ycJ7^+R9A zOMU6#)A!;k&&I?sz%altz%altz%altz%altfH8pO|4=nb#WEv2Pw$DI;LANNl-dvO zY83aM)OaFm@;-H&bOe3fz3vCGHaEEFE) zjs=DrQNV?W3ker8E)-m-xX^H+?4nQ3dbS~kRB@O*0 zqi+>`<6m@iZJ=)xeLLveMc+O)JQAZyhMhEK{qwQyDr&FQ90{iF;!1oZR7xHT(^7iRxY_is2d_J3n%`@g~L|HRwmF53T}X7+#Z zb|4F8%vdin`+vgzuNEaOaLGFHF8C!<`~U7X4#a*<7#X$>V&h=84eE4ykJ~o>XgAwN z)KU<0=GebkqZ)Hb2rUQ3E=G=C|0DK}x{-sL_(L%(ZiCU&oN4dK2PF-U>0T#pV1YrV zF|eW}EHK!jLaZ*>`2o3vW?Nc7Ci(yx(Yg2UCnxsTA@~~4b@7uU=oNxP>7es&U-nj4}1W4O$@$>q=pVK@VEO+V_m|wsBAoxFbx3M|Bqcd_OD#=EH^@IWsE0aSVAty40{M`S zXgUN`a}9L_LZTs8@!$ga#N!Kau0|I@v@~=PxOd|Vh-VgDpom|=1qwPfVvvw!BL?y5 z9)T|51i0^v__pG@iZ8(h4x2SXLz<0H?skL*a~5%?>sYL zPqQyigHmD$Fr^^SUmIR{h#5L-uV0vi7;z96&foA?UH?L`ARIUhdulM>ydE73-c|bf zc%DJdLiklQK8`E6_x*ffEb2c7kz`BUBq)2wKk*5EYGwPg_BP*!M7B3g`vY<6msGggC}YS)7JvfugAtEc|tpHA|%bC&pA9c9bKk8ECUKR0uAN$hs< zuU*v8=EUaDtdw5;-tYbAFlpewp@EnG@QF&TY)PHAzX4k)?=5e-YjCk0wDIXBsoTYW zbo+k<%}oCKpa1@E43TEO_{yuN{`Q@R&ezUc^8Bal4SsX`hO@SUpm=^s?snm~F8K{T z{8zvA=C=#v;cvmi|Nf6g>16GKr7TP>uU_*v);E?{x6tEPmXvNc`Xd^{KY#iAXUOBf z3y=TJADia`FTC=|&lS-TJy=zXkNg92Wf}UNuSAdN-ERyXjU#&axSxa%;jtrnsUcp8 z2M9Z&SEYPp?5G_(qL&CIe2;|ab#^|Y=ddGscm0Uo@s~<0L@#sE%NL>-qVyWTi+uN# zKFl4bmi=r?KKAf;_Tk}w)XmT=Aa5AQKX&>Jg^=Aba&W5Kckk$$rzvDf2OsJzA3OL) zVh<9-`5yg#l7{mz+H&6$C~7zl0uSOCv684L@>tYx9*rFv4KRp*$C(gfuJS0r;MZ}0 z!9V+DkU%bR#nW|M2J#2ogZu#{IQBgAT~x9!h5?2Fh5?2Fh5?2Fh5?2FhJm|*0c`&d z?eC%Wyl?+^4Y2?Fhid<)(aCA@7-aw7Ynm7edKhW^zzqMSA+Z0Se&O^x-0%hP{~sz1 zAG>elL!)EIPu&0Ek34Yl)aeJ$eDv%?4}a{DM<08fKUY3~fjh@Pd8KW2_ zVsoa!gS=7Z%O^fxz`yc~ZL7sMr}(+~4sTs)wmWUU+^&`Rc5SA^KPBo$&y&rb?5M8o z*#1-ebgezjKh-lu&2?4VFm>C6Bsq$%8;0hZh7IxOeI3%JX{OzCd|mV$LleE((N5&<-ys$Vq)2x>#$~|P0>TV*NRQCmJ zlPX^v4Uke^UftZXS5|zl3u(fxm-*&g$GQvy3Pv-xI5Shh)d+3Me0{pss?|GCW2x4< z+MK(5Q5D8XCC}euQ9|yDno1bd7+MV&I;2nF7zU>+BSi@Sd0PeZ^)oy!! zu~oO|`_-|obh?fC`r>RbK^9xhlsVTYGnt}jo#}RJt;^P~kBp9A1dRMt?wMgGFp&ay zc@XCDahMptk})v`o5-FN;Y{fQB(aH*((8_M!7Tp>5az922=n}W>)H0S1&AC3lPrOQ z9S^Q@NfL$4?n2Vh@uz`4Kb@OCQwjQn9i_mZMyU!=>a`3??bKK#$AMfp&Gk=BH|XIz(PG{<1Y^wSXIM@RL{j3|OtrH(~efG&@)Mw)Na1Y{XWC zg=k-GcdS{U5B{gC)=gQ3jNe^xQZc$pO;x)!O*Oirp^2g~rCZt*O#Q?wa#?P z{+X;6ym9@>MrS|F>qh|XKS9ur=p32U15+xwG!4LBM%~2b)uTX#zs^{qlF#RwTmZz> ze9LNHo(qJei>S*SQw$z!SyL9sa&-%&_`U=ko!n%)f_T+{b-!cr^;)}LYgm9xkiP5B zmL3CaIk_ikK6F(8n1F^*5~7cn9tWg-h*JYOtfo%E* zy>|(dQ>v~CQdcuXt*aRITGtR|z1yfs=9HwEa$Sn-UGVmnVOM&YE-*Q^rFk_-7rQA$gi*V(`cEl~nT zGe{qU)0Ou6DV-zHHo~Z&VNy`Y&K8+?^eL< z|1s~DK`nb!JIw6=$UlJD|2u*ALN4_yv;Si+&gg5z?ElRE&+PxZIl(gfKlR?uY4>FI ze`fy=UCXiIo!S5UCmoXRGR*#ukBUSL{v!C#L~cL8_J8iN2ZsyUc}A~2_?hDWLl%;+ zJpgF{`&meKk5z(Wmbm+fSwfu;LQQl$MRJPs!CudY;Rq2_O`MDu;r12B;rX#>zzCT| zEji$*B{a!vR#J@=lHW;Ajok^KnqfnyU}l9d$p&*@5lnUmGc}%x{08y%2#<2cUm#I& zqL8ZKxwBrC_8DV2`{fCvE=OJ-Nk~LnA+{UE6ATiCB}hyP*EA8i>671av=v$(LM%fF z%}dx-0-`$(Teney(&5<=WV9RZ{rv~Yc*n-dFu*XtFmMML!1n*p{vKM-`{(~X)swZu zjsNcx`@e@=PDvav8#$MXqD-7i{R+;`LGAy@yHpC965D`7@1gx4d``iK)Hn1yxBJr; z!2ZvbxnJOLaN*k=rH@-S?uwg^#^fI2yr)&1101Pe}G~P_W>_EV%GiGG#f* z6@fCHvEagMK~BHx{&tln(?EEk$ugormu0Df!?7$#Y0MY4hON8GlDh8=s@y_ z99xC-=hbek+l(04BpXPq+j#U~F#)d^|2sJxj|CUL>%oPGD@7Jum<1Qk7hKpZN;b&D z&m7+#3QrB?SJ+|2oyQb+SS_Mp)>8wO2|CxL5Y|(}uBSLJx+2=5f?ZEzId72$XU8bM z=^1c)Zxa=cNw{H!aKk$sz`_c#kVjVIJhFuJDG?s|K*9}EjTlqh`Ld#kmAs1Obj95_ zc2V4!u^7pL()-}_c3xlUovOE!U8+?K1;m6_5z>&M1A+^X(`U0P$DrG*=XO%77(y5N za!0g^f)QvHkrW8pLqjIoLv?>zMTpVNJ^d00cu)pn9h4x}K?z}<+h - metaWay(i) := Mux(pickForwardMeta && w, forwardMeta.data, io.metaReadResp(i)) - } - - val hitVec = VecInit(metaWay.map(m => m.valid && (m.tag === addr.tag) && io.in.valid)).asUInt - val victimWaymask = if (Ways > 1) (1.U << LFSR64()(log2Up(Ways)-1,0)) else "b1".U - - val invalidVec = VecInit(metaWay.map(m => !m.valid)).asUInt - val hasInvalidWay = invalidVec.orR - val refillInvalidWaymask = Mux(invalidVec >= 8.U, "b1000".U, - Mux(invalidVec >= 4.U, "b0100".U, - Mux(invalidVec >= 2.U, "b0010".U, "b0001".U))) - - // val waymask = Mux(io.out.bits.hit, hitVec, victimWaymask) - val waymask = Mux(io.out.bits.hit, hitVec, Mux(hasInvalidWay, refillInvalidWaymask, victimWaymask)) - when(PopCount(waymask) > 1.U){ - metaWay.map(m => Debug("[ERROR] metaWay %x metat %x reqt %x\n", m.valid, m.tag, addr.tag)) - io.metaReadResp.map(m => Debug("[ERROR] metaReadResp %x metat %x reqt %x\n", m.valid, m.tag, addr.tag)) - Debug("[ERROR] forwardMetaReg isForwardMetaReg %x %x metat %x wm %b\n", isForwardMetaReg, forwardMetaReg.data.valid, forwardMetaReg.data.tag, forwardMetaReg.waymask.get) - Debug("[ERROR] forwardMeta isForwardMeta %x %x metat %x wm %b\n", isForwardMeta, io.metaWriteBus.req.bits.data.valid, io.metaWriteBus.req.bits.data.tag, io.metaWriteBus.req.bits.waymask.get) - } - when(PopCount(waymask) > 1.U){Debug("[ERROR] hit %b wmask %b hitvec %b\n", io.out.bits.hit, forwardMeta.waymask.getOrElse("1".U), hitVec)} - assert(!(io.in.valid && PopCount(waymask) > 1.U)) - - io.out.bits.metas := metaWay - io.out.bits.hit := io.in.valid && hitVec.orR - io.out.bits.waymask := waymask - io.out.bits.datas := io.dataReadResp - io.out.bits.mmio := AddressSpace.isMMIO(req.addr) - - val isForwardData = io.in.valid && (io.dataWriteBus.req match { case r => - r.valid && r.bits.setIdx === getDataIdx(req.addr) - }) - val isForwardDataReg = RegInit(false.B) - when (isForwardData) { isForwardDataReg := true.B } - when (io.in.fire() || !io.in.valid) { isForwardDataReg := false.B } - val forwardDataReg = RegEnable(io.dataWriteBus.req.bits, isForwardData) - io.out.bits.isForwardData := isForwardDataReg || isForwardData - io.out.bits.forwardData := Mux(isForwardData, io.dataWriteBus.req.bits, forwardDataReg) - - io.out.bits.req <> req - io.out.valid := io.in.valid - io.in.ready := !io.in.valid || io.out.fire() - - Debug("[isFD:%d isFDreg:%d inFire:%d invalid:%d \n", isForwardData, isForwardDataReg, io.in.fire(), io.in.valid) - Debug("[isFM:%d isFMreg:%d metawreq:%x widx:%x ridx:%x \n", isForwardMeta, isForwardMetaReg, io.metaWriteBus.req.valid, io.metaWriteBus.req.bits.setIdx, getMetaIdx(req.addr)) -} - -// writeback -sealed class CacheStage3(implicit val cacheConfig: CacheConfig) extends CacheModule { - class CacheStage3IO extends Bundle { - val in = Flipped(Decoupled(new Stage2IO)) - val out = Decoupled(new SimpleBusRespBundle(userBits = userBits, idBits = idBits)) - val isFinish = Output(Bool()) - val flush = Input(Bool()) - val dataReadBus = CacheDataArrayReadBus() - val dataWriteBus = CacheDataArrayWriteBus() - val metaWriteBus = CacheMetaArrayWriteBus() - - val mem = new SimpleBusUC - val mmio = new SimpleBusUC - val cohResp = Decoupled(new SimpleBusRespBundle) - - // use to distinguish prefetch request and normal request - val dataReadRespToL1 = Output(Bool()) - } - val io = IO(new CacheStage3IO) - - val metaWriteArb = Module(new Arbiter(CacheMetaArrayWriteBus().req.bits, 2)) - val dataWriteArb = Module(new Arbiter(CacheDataArrayWriteBus().req.bits, 2)) - - val req = io.in.bits.req - val addr = req.addr.asTypeOf(addrBundle) - val mmio = io.in.valid && io.in.bits.mmio - val hit = io.in.valid && io.in.bits.hit - val miss = io.in.valid && !io.in.bits.hit - val probe = io.in.valid && hasCoh.B && req.isProbe() - val hitReadBurst = hit && req.isReadBurst() - val meta = Mux1H(io.in.bits.waymask, io.in.bits.metas) - assert(!(mmio && hit), "MMIO request should not hit in cache") - - - // this is ugly - if (cacheName == "dcache") { - BoringUtils.addSource(mmio, "lsuMMIO") - } - - val useForwardData = io.in.bits.isForwardData && io.in.bits.waymask === io.in.bits.forwardData.waymask.getOrElse("b1".U) - val dataReadArray = Mux1H(io.in.bits.waymask, io.in.bits.datas).data - val dataRead = Mux(useForwardData, io.in.bits.forwardData.data.data, dataReadArray) - val wordMask = Mux(!ro.B && req.isWrite(), MaskExpand(req.wmask), 0.U(DataBits.W)) - - val writeL2BeatCnt = Counter(LineBeats) - when(io.out.fire() && (req.cmd === SimpleBusCmd.writeBurst || req.isWriteLast())) { - writeL2BeatCnt.inc() - } - - val hitWrite = hit && req.isWrite() - val dataHitWriteBus = Wire(CacheDataArrayWriteBus()).apply( - data = Wire(new DataBundle).apply(MaskData(dataRead, req.wdata, wordMask)), - valid = hitWrite, setIdx = Cat(addr.index, Mux(req.cmd === SimpleBusCmd.writeBurst || req.isWriteLast(), writeL2BeatCnt.value, addr.wordIndex)), waymask = io.in.bits.waymask) - - val metaHitWriteBus = Wire(CacheMetaArrayWriteBus()).apply( - valid = hitWrite && !meta.dirty, setIdx = getMetaIdx(req.addr), waymask = io.in.bits.waymask, - data = Wire(new MetaBundle).apply(tag = meta.tag, valid = true.B, dirty = (!ro).B) - ) - - val s_idle :: s_memReadReq :: s_memReadResp :: s_memWriteReq :: s_memWriteResp :: s_mmioReq :: s_mmioResp :: s_wait_resp :: s_release :: Nil = Enum(9) - val state = RegInit(s_idle) - val needFlush = RegInit(false.B) - - when (io.flush && (state =/= s_idle)) { needFlush := true.B } - when (io.out.fire() && needFlush) { needFlush := false.B } - - val readBeatCnt = Counter(LineBeats) - val writeBeatCnt = Counter(LineBeats) - - val s2_idle :: s2_dataReadWait :: s2_dataOK :: Nil = Enum(3) - val state2 = RegInit(s2_idle) - - io.dataReadBus.apply(valid = (state === s_memWriteReq || state === s_release) && (state2 === s2_idle), - setIdx = Cat(addr.index, Mux(state === s_release, readBeatCnt.value, writeBeatCnt.value))) - val dataWay = RegEnable(io.dataReadBus.resp.data, state2 === s2_dataReadWait) - val dataHitWay = Mux1H(io.in.bits.waymask, dataWay).data - - switch (state2) { - is (s2_idle) { when (io.dataReadBus.req.fire()) { state2 := s2_dataReadWait } } - is (s2_dataReadWait) { state2 := s2_dataOK } - is (s2_dataOK) { when (io.mem.req.fire() || io.cohResp.fire() || hitReadBurst && io.out.ready) { state2 := s2_idle } } - } - - // critical word first read - val raddr = (if (XLEN == 64) Cat(req.addr(PAddrBits-1,3), 0.U(3.W)) - else Cat(req.addr(PAddrBits-1,2), 0.U(2.W))) - // dirty block addr - val waddr = Cat(meta.tag, addr.index, 0.U(OffsetBits.W)) - val cmd = Mux(state === s_memReadReq, SimpleBusCmd.readBurst, - Mux((writeBeatCnt.value === (LineBeats - 1).U), SimpleBusCmd.writeLast, SimpleBusCmd.writeBurst)) - io.mem.req.bits.apply(addr = Mux(state === s_memReadReq, raddr, waddr), - cmd = cmd, size = (if (XLEN == 64) "b11".U else "b10".U), - wdata = dataHitWay, wmask = Fill(DataBytes, 1.U)) - - io.mem.resp.ready := true.B - io.mem.req.valid := (state === s_memReadReq) || ((state === s_memWriteReq) && (state2 === s2_dataOK)) - - // mmio - io.mmio.req.bits := req - io.mmio.resp.ready := true.B - io.mmio.req.valid := (state === s_mmioReq) - - val afterFirstRead = RegInit(false.B) - val alreadyOutFire = RegEnable(true.B, init = false.B, io.out.fire()) - val readingFirst = !afterFirstRead && io.mem.resp.fire() && (state === s_memReadResp) - val inRdataRegDemand = RegEnable(Mux(mmio, io.mmio.resp.bits.rdata, io.mem.resp.bits.rdata), - Mux(mmio, state === s_mmioResp, readingFirst)) - - // probe - io.cohResp.valid := ((state === s_idle) && probe) || - ((state === s_release) && (state2 === s2_dataOK)) - io.cohResp.bits.rdata := dataHitWay - val releaseLast = Counter(state === s_release && io.cohResp.fire(), LineBeats)._2 - io.cohResp.bits.cmd := Mux(state === s_release, Mux(releaseLast, SimpleBusCmd.readLast, 0.U), - Mux(hit, SimpleBusCmd.probeHit, SimpleBusCmd.probeMiss)) - - val respToL1Fire = hitReadBurst && io.out.ready && state2 === s2_dataOK - val respToL1Last = Counter((state === s_idle || state === s_release && state2 === s2_dataOK) && hitReadBurst && io.out.ready, LineBeats)._2 - - switch (state) { - is (s_idle) { - afterFirstRead := false.B - alreadyOutFire := false.B - - when (probe) { - when (io.cohResp.fire()) { - state := Mux(hit, s_release, s_idle) - readBeatCnt.value := addr.wordIndex - } - } .elsewhen (hitReadBurst && io.out.ready) { - state := s_release - readBeatCnt.value := Mux(addr.wordIndex === (LineBeats - 1).U, 0.U, (addr.wordIndex + 1.U)) - } .elsewhen ((miss || mmio) && !io.flush) { - state := Mux(mmio, s_mmioReq, Mux(!ro.B && meta.dirty, s_memWriteReq, s_memReadReq)) - } - } - - is (s_mmioReq) { when (io.mmio.req.fire()) { state := s_mmioResp } } - is (s_mmioResp) { when (io.mmio.resp.fire()) { state := s_wait_resp } } - - is (s_release) { - when (io.cohResp.fire() || respToL1Fire) { readBeatCnt.inc() } - when (probe && io.cohResp.fire() && releaseLast || respToL1Fire && respToL1Last) { state := s_idle } - } - - is (s_memReadReq) { when (io.mem.req.fire()) { - state := s_memReadResp - readBeatCnt.value := addr.wordIndex - }} - - is (s_memReadResp) { - when (io.mem.resp.fire()) { - afterFirstRead := true.B - readBeatCnt.inc() - when (req.cmd === SimpleBusCmd.writeBurst) { writeL2BeatCnt.value := 0.U } - when (io.mem.resp.bits.isReadLast()) { state := s_wait_resp } - } - } - - is (s_memWriteReq) { - when (io.mem.req.fire()) { writeBeatCnt.inc() } - when (io.mem.req.bits.isWriteLast() && io.mem.req.fire()) { state := s_memWriteResp } - } - - is (s_memWriteResp) { when (io.mem.resp.fire()) { state := s_memReadReq } } - is (s_wait_resp) { when (io.out.fire() || needFlush || alreadyOutFire) { state := s_idle } } - } - - val dataRefill = MaskData(io.mem.resp.bits.rdata, req.wdata, Mux(readingFirst, wordMask, 0.U(DataBits.W))) - val dataRefillWriteBus = Wire(CacheDataArrayWriteBus).apply( - valid = (state === s_memReadResp) && io.mem.resp.fire(), setIdx = Cat(addr.index, readBeatCnt.value), - data = Wire(new DataBundle).apply(dataRefill), waymask = io.in.bits.waymask) - - dataWriteArb.io.in(0) <> dataHitWriteBus.req - dataWriteArb.io.in(1) <> dataRefillWriteBus.req - io.dataWriteBus.req <> dataWriteArb.io.out - - val metaRefillWriteBus = Wire(CacheMetaArrayWriteBus()).apply( - valid = (state === s_memReadResp) && io.mem.resp.fire() && io.mem.resp.bits.isReadLast(), - data = Wire(new MetaBundle).apply(valid = true.B, tag = addr.tag, dirty = !ro.B && req.isWrite()), - setIdx = getMetaIdx(req.addr), waymask = io.in.bits.waymask - ) - - metaWriteArb.io.in(0) <> metaHitWriteBus.req - metaWriteArb.io.in(1) <> metaRefillWriteBus.req - io.metaWriteBus.req <> metaWriteArb.io.out - - if (cacheLevel == 2) { - when ((state === s_memReadResp) && io.mem.resp.fire() && req.isReadBurst()) { - // readBurst request miss - io.out.bits.rdata := dataRefill - io.out.bits.cmd := Mux(io.mem.resp.bits.isReadLast(), SimpleBusCmd.readLast, SimpleBusCmd.readBurst) - }.elsewhen (req.isWriteLast() || req.cmd === SimpleBusCmd.writeBurst) { - // writeBurst/writeLast request, no matter hit or miss - io.out.bits.rdata := Mux(hit, dataRead, inRdataRegDemand) - io.out.bits.cmd := DontCare - }.elsewhen (hitReadBurst && state === s_release) { - // readBurst request hit - io.out.bits.rdata := dataHitWay - io.out.bits.cmd := Mux(respToL1Last, SimpleBusCmd.readLast, SimpleBusCmd.readBurst) - }.otherwise { - io.out.bits.rdata := Mux(hit, dataRead, inRdataRegDemand) - io.out.bits.cmd := req.cmd - } - } else { - io.out.bits.rdata := Mux(hit, dataRead, inRdataRegDemand) - io.out.bits.cmd := Mux(io.in.bits.req.isRead(), SimpleBusCmd.readLast, Mux(io.in.bits.req.isWrite(), SimpleBusCmd.writeResp, DontCare))//DontCare, added by lemover - } - io.out.bits.user.zip(req.user).map { case (o,i) => o := i } - io.out.bits.id.zip(req.id).map { case (o,i) => o := i } - - io.out.valid := io.in.valid && Mux(req.isBurst() && (cacheLevel == 2).B, - Mux(req.isWrite() && (hit || !hit && state === s_wait_resp), true.B, (state === s_memReadResp && io.mem.resp.fire() && req.cmd === SimpleBusCmd.readBurst)) || (respToL1Fire && respToL1Last && state === s_release), - Mux(probe, false.B, Mux(hit, true.B, Mux(req.isWrite() || mmio, state === s_wait_resp, afterFirstRead && !alreadyOutFire))) - ) - - // With critical-word first, the pipeline registers between - // s2 and s3 can not be overwritten before a missing request - // is totally handled. We use io.isFinish to indicate when the - // request really ends. - io.isFinish := Mux(probe, io.cohResp.fire() && Mux(miss, state === s_idle, (state === s_release) && releaseLast), - Mux(hit || req.isWrite(), io.out.fire(), (state === s_wait_resp) && (io.out.fire() || alreadyOutFire)) - ) - - io.in.ready := io.out.ready && (state === s_idle && !hitReadBurst) && !miss && !probe - io.dataReadRespToL1 := hitReadBurst && (state === s_idle && io.out.ready || state === s_release && state2 === s2_dataOK) - - assert(!(metaHitWriteBus.req.valid && metaRefillWriteBus.req.valid)) - assert(!(dataHitWriteBus.req.valid && dataRefillWriteBus.req.valid)) - assert(!(!ro.B && io.flush), "only allow to flush icache") - Debug(" metaread idx %x waymask %b metas %x%x:%x %x%x:%x %x%x:%x %x%x:%x %x\n", getMetaIdx(req.addr), io.in.bits.waymask.asUInt, io.in.bits.metas(0).valid, io.in.bits.metas(0).dirty, io.in.bits.metas(0).tag, io.in.bits.metas(1).valid, io.in.bits.metas(1).dirty, io.in.bits.metas(1).tag, io.in.bits.metas(2).valid, io.in.bits.metas(2).dirty, io.in.bits.metas(2).tag, io.in.bits.metas(3).valid, io.in.bits.metas(3).dirty, io.in.bits.metas(3).tag, io.in.bits.datas.asUInt) - Debug(io.metaWriteBus.req.fire(), "%d: [" + cacheName + " S3]: metawrite idx %x wmask %b meta %x%x:%x\n", GTimer(), io.metaWriteBus.req.bits.setIdx, io.metaWriteBus.req.bits.waymask.get, io.metaWriteBus.req.bits.data.valid, io.metaWriteBus.req.bits.data.dirty, io.metaWriteBus.req.bits.data.tag) - Debug(" in.ready = %d, in.valid = %d, hit = %x, state = %d, addr = %x cmd:%d probe:%d isFinish:%d\n", io.in.ready, io.in.valid, hit, state, req.addr, req.cmd, probe, io.isFinish) - Debug(" out.valid:%d rdata:%x cmd:%d user:%x id:%x \n", io.out.valid, io.out.bits.rdata, io.out.bits.cmd, io.out.bits.user.getOrElse(0.U), io.out.bits.id.getOrElse(0.U)) - Debug(" DHW: (%d, %d), data:%x setIdx:%x MHW:(%d, %d)\n", dataHitWriteBus.req.valid, dataHitWriteBus.req.ready, dataHitWriteBus.req.bits.data.asUInt, dataHitWriteBus.req.bits.setIdx, metaHitWriteBus.req.valid, metaHitWriteBus.req.ready) - Debug(" DreadCache: %x \n", io.in.bits.datas.asUInt) - Debug(" useFD:%d isFD:%d FD:%x DreadArray:%x dataRead:%x inwaymask:%x FDwaymask:%x \n", useForwardData, io.in.bits.isForwardData, io.in.bits.forwardData.data.data, dataReadArray, dataRead, io.in.bits.waymask, io.in.bits.forwardData.waymask.getOrElse("b1".U)) - Debug(io.dataWriteBus.req.fire(), "[WB] waymask: %b data:%x setIdx:%x\n", - io.dataWriteBus.req.bits.waymask.get.asUInt, io.dataWriteBus.req.bits.data.asUInt, io.dataWriteBus.req.bits.setIdx) - Debug((state === s_memWriteReq) && io.mem.req.fire(), "[COUTW] cnt %x addr %x data %x cmd %x size %x wmask %x tag %x idx %x waymask %b \n", writeBeatCnt.value, io.mem.req.bits.addr, io.mem.req.bits.wdata, io.mem.req.bits.cmd, io.mem.req.bits.size, io.mem.req.bits.wmask, addr.tag, getMetaIdx(req.addr), io.in.bits.waymask) - Debug((state === s_memReadReq) && io.mem.req.fire(), "[COUTR] addr %x tag %x idx %x waymask %b \n", io.mem.req.bits.addr, addr.tag, getMetaIdx(req.addr), io.in.bits.waymask) - Debug((state === s_memReadResp) && io.mem.resp.fire(), "[COUTR] cnt %x data %x tag %x idx %x waymask %b \n", readBeatCnt.value, io.mem.resp.bits.rdata, addr.tag, getMetaIdx(req.addr), io.in.bits.waymask) -} - class Cache(implicit val cacheConfig: CacheConfig) extends CacheModule with HasCacheIO { // cpu pipeline val s1 = Module(new CacheStage1) diff --git a/src/main/scala/nutcore/mem/cache/CacheCheck.scala b/src/main/scala/nutcore/mem/cache/CacheCheck.scala new file mode 100644 index 000000000..2f62bd22b --- /dev/null +++ b/src/main/scala/nutcore/mem/cache/CacheCheck.scala @@ -0,0 +1,89 @@ + +package nutcore.mem.cache + +import chisel3._ +import chisel3.util._ +import chisel3.util.experimental.BoringUtils + +import nutcore._ + +import bus.simplebus._ +import bus.axi4._ +import chisel3.experimental.IO +import utils._ +import top.Settings + + +// meta read +// check +class CacheStage2(implicit val cacheConfig: CacheConfig) extends CacheModule { + class CacheStage2IO extends Bundle { + val in = Flipped(Decoupled(new Stage1IO)) + val out = Decoupled(new Stage2IO) + val metaReadResp = Flipped(Vec(Ways, new MetaBundle)) + val dataReadResp = Flipped(Vec(Ways, new DataBundle)) + val metaWriteBus = Input(CacheMetaArrayWriteBus()) + val dataWriteBus = Input(CacheDataArrayWriteBus()) + } + val io = IO(new CacheStage2IO) + + val req = io.in.bits.req + val addr = req.addr.asTypeOf(addrBundle) + + val isForwardMeta = io.in.valid && io.metaWriteBus.req.valid && io.metaWriteBus.req.bits.setIdx === getMetaIdx(req.addr) + val isForwardMetaReg = RegInit(false.B) + when (isForwardMeta) { isForwardMetaReg := true.B } + when (io.in.fire() || !io.in.valid) { isForwardMetaReg := false.B } + val forwardMetaReg = RegEnable(io.metaWriteBus.req.bits, isForwardMeta) + + val metaWay = Wire(Vec(Ways, chiselTypeOf(forwardMetaReg.data))) + val pickForwardMeta = isForwardMetaReg || isForwardMeta + val forwardMeta = Mux(isForwardMeta, io.metaWriteBus.req.bits, forwardMetaReg) + val forwardWaymask = forwardMeta.waymask.getOrElse("1".U).asBools + forwardWaymask.zipWithIndex.map { case (w, i) => + metaWay(i) := Mux(pickForwardMeta && w, forwardMeta.data, io.metaReadResp(i)) + } + + val hitVec = VecInit(metaWay.map(m => m.valid && (m.tag === addr.tag) && io.in.valid)).asUInt + val victimWaymask = if (Ways > 1) (1.U << LFSR64()(log2Up(Ways)-1,0)) else "b1".U + + val invalidVec = VecInit(metaWay.map(m => !m.valid)).asUInt + val hasInvalidWay = invalidVec.orR + val refillInvalidWaymask = Mux(invalidVec >= 8.U, "b1000".U, + Mux(invalidVec >= 4.U, "b0100".U, + Mux(invalidVec >= 2.U, "b0010".U, "b0001".U))) + + // val waymask = Mux(io.out.bits.hit, hitVec, victimWaymask) + val waymask = Mux(io.out.bits.hit, hitVec, Mux(hasInvalidWay, refillInvalidWaymask, victimWaymask)) + when(PopCount(waymask) > 1.U){ + metaWay.map(m => Debug("[ERROR] metaWay %x metat %x reqt %x\n", m.valid, m.tag, addr.tag)) + io.metaReadResp.map(m => Debug("[ERROR] metaReadResp %x metat %x reqt %x\n", m.valid, m.tag, addr.tag)) + Debug("[ERROR] forwardMetaReg isForwardMetaReg %x %x metat %x wm %b\n", isForwardMetaReg, forwardMetaReg.data.valid, forwardMetaReg.data.tag, forwardMetaReg.waymask.get) + Debug("[ERROR] forwardMeta isForwardMeta %x %x metat %x wm %b\n", isForwardMeta, io.metaWriteBus.req.bits.data.valid, io.metaWriteBus.req.bits.data.tag, io.metaWriteBus.req.bits.waymask.get) + } + when(PopCount(waymask) > 1.U){Debug("[ERROR] hit %b wmask %b hitvec %b\n", io.out.bits.hit, forwardMeta.waymask.getOrElse("1".U), hitVec)} + assert(!(io.in.valid && PopCount(waymask) > 1.U)) + + io.out.bits.metas := metaWay + io.out.bits.hit := io.in.valid && hitVec.orR + io.out.bits.waymask := waymask + io.out.bits.datas := io.dataReadResp + io.out.bits.mmio := AddressSpace.isMMIO(req.addr) + + val isForwardData = io.in.valid && (io.dataWriteBus.req match { case r => + r.valid && r.bits.setIdx === getDataIdx(req.addr) + }) + val isForwardDataReg = RegInit(false.B) + when (isForwardData) { isForwardDataReg := true.B } + when (io.in.fire() || !io.in.valid) { isForwardDataReg := false.B } + val forwardDataReg = RegEnable(io.dataWriteBus.req.bits, isForwardData) + io.out.bits.isForwardData := isForwardDataReg || isForwardData + io.out.bits.forwardData := Mux(isForwardData, io.dataWriteBus.req.bits, forwardDataReg) + + io.out.bits.req <> req + io.out.valid := io.in.valid + io.in.ready := !io.in.valid || io.out.fire() + + Debug("[isFD:%d isFDreg:%d inFire:%d invalid:%d \n", isForwardData, isForwardDataReg, io.in.fire(), io.in.valid) + Debug("[isFM:%d isFMreg:%d metawreq:%x widx:%x ridx:%x \n", isForwardMeta, isForwardMetaReg, io.metaWriteBus.req.valid, io.metaWriteBus.req.bits.setIdx, getMetaIdx(req.addr)) +} \ No newline at end of file diff --git a/src/main/scala/nutcore/mem/cache/CacheMetaRead.scala b/src/main/scala/nutcore/mem/cache/CacheMetaRead.scala new file mode 100644 index 000000000..9e73a9a71 --- /dev/null +++ b/src/main/scala/nutcore/mem/cache/CacheMetaRead.scala @@ -0,0 +1,54 @@ + +package nutcore.mem.cache + +import chisel3._ +import chisel3.util._ +import chisel3.util.experimental.BoringUtils + +import nutcore._ + +import bus.simplebus._ +import bus.axi4._ +import chisel3.experimental.IO +import utils._ +import top.Settings + + +class Stage1IO(implicit val cacheConfig: CacheConfig) extends CacheBundle { + val req = new SimpleBusReqBundle(userBits = userBits, idBits = idBits) +} + +class CacheStage1(implicit val cacheConfig: CacheConfig) extends CacheModule { + class CacheStage1IO extends Bundle { + val in = Flipped(Decoupled(new SimpleBusReqBundle(userBits = userBits, idBits = idBits))) + val out = Decoupled(new Stage1IO) + val metaReadBus = CacheMetaArrayReadBus() + val dataReadBus = CacheDataArrayReadBus() + } + val io = IO(new CacheStage1IO) + + if (ro) when (io.in.fire()) { assert(!io.in.bits.isWrite()) } + Debug(io.in.fire(), "[L1$] cache stage1, addr in: %x, user: %x id: %x\n", io.in.bits.addr, io.in.bits.user.getOrElse(0.U), io.in.bits.id.getOrElse(0.U)) + + // read meta array and data array + val readBusValid = io.in.valid && io.out.ready + io.metaReadBus.apply(valid = readBusValid, setIdx = getMetaIdx(io.in.bits.addr)) + io.dataReadBus.apply(valid = readBusValid, setIdx = getDataIdx(io.in.bits.addr)) + + io.out.bits.req := io.in.bits + io.out.valid := io.in.valid && io.metaReadBus.req.ready && io.dataReadBus.req.ready + io.in.ready := (!io.in.valid || io.out.fire()) && io.metaReadBus.req.ready && io.dataReadBus.req.ready + + Debug("in.ready = %d, in.valid = %d, out.valid = %d, out.ready = %d, addr = %x, cmd = %x, dataReadBus.req.valid = %d\n", io.in.ready, io.in.valid, io.out.valid, io.out.ready, io.in.bits.addr, io.in.bits.cmd, io.dataReadBus.req.valid) +} + +class Stage2IO(implicit val cacheConfig: CacheConfig) extends CacheBundle { + val req = new SimpleBusReqBundle(userBits = userBits, idBits = idBits) + val metas = Vec(Ways, new MetaBundle) + val datas = Vec(Ways, new DataBundle) + val hit = Output(Bool()) + val waymask = Output(UInt(Ways.W)) + val mmio = Output(Bool()) + val isForwardData = Output(Bool()) + val forwardData = Output(CacheDataArrayWriteBus().req.bits) +} diff --git a/src/main/scala/nutcore/mem/cache/CacheWriteBack.scala b/src/main/scala/nutcore/mem/cache/CacheWriteBack.scala new file mode 100644 index 000000000..b70b01bfa --- /dev/null +++ b/src/main/scala/nutcore/mem/cache/CacheWriteBack.scala @@ -0,0 +1,258 @@ + +package nutcore.mem.cache + +import chisel3._ +import chisel3.util._ +import chisel3.util.experimental.BoringUtils + +import nutcore._ + +import bus.simplebus._ +import bus.axi4._ +import chisel3.experimental.IO +import utils._ +import top.Settings + +// writeback +class CacheStage3(implicit val cacheConfig: CacheConfig) extends CacheModule { + class CacheStage3IO extends Bundle { + val in = Flipped(Decoupled(new Stage2IO)) + val out = Decoupled(new SimpleBusRespBundle(userBits = userBits, idBits = idBits)) + val isFinish = Output(Bool()) + val flush = Input(Bool()) + val dataReadBus = CacheDataArrayReadBus() + val dataWriteBus = CacheDataArrayWriteBus() + val metaWriteBus = CacheMetaArrayWriteBus() + + val mem = new SimpleBusUC + val mmio = new SimpleBusUC + val cohResp = Decoupled(new SimpleBusRespBundle) + + // use to distinguish prefetch request and normal request + val dataReadRespToL1 = Output(Bool()) + } + val io = IO(new CacheStage3IO) + + val metaWriteArb = Module(new Arbiter(CacheMetaArrayWriteBus().req.bits, 2)) + val dataWriteArb = Module(new Arbiter(CacheDataArrayWriteBus().req.bits, 2)) + + val req = io.in.bits.req + val addr = req.addr.asTypeOf(addrBundle) + val mmio = io.in.valid && io.in.bits.mmio + val hit = io.in.valid && io.in.bits.hit + val miss = io.in.valid && !io.in.bits.hit + val probe = io.in.valid && hasCoh.B && req.isProbe() + val hitReadBurst = hit && req.isReadBurst() + val meta = Mux1H(io.in.bits.waymask, io.in.bits.metas) + assert(!(mmio && hit), "MMIO request should not hit in cache") + + + // this is ugly + if (cacheName == "dcache") { + BoringUtils.addSource(mmio, "lsuMMIO") + } + + val useForwardData = io.in.bits.isForwardData && io.in.bits.waymask === io.in.bits.forwardData.waymask.getOrElse("b1".U) + val dataReadArray = Mux1H(io.in.bits.waymask, io.in.bits.datas).data + val dataRead = Mux(useForwardData, io.in.bits.forwardData.data.data, dataReadArray) + val wordMask = Mux(!ro.B && req.isWrite(), MaskExpand(req.wmask), 0.U(DataBits.W)) + + val writeL2BeatCnt = Counter(LineBeats) + when(io.out.fire() && (req.cmd === SimpleBusCmd.writeBurst || req.isWriteLast())) { + writeL2BeatCnt.inc() + } + + val hitWrite = hit && req.isWrite() + val dataHitWriteBus = Wire(CacheDataArrayWriteBus()).apply( + data = Wire(new DataBundle).apply(MaskData(dataRead, req.wdata, wordMask)), + valid = hitWrite, setIdx = Cat(addr.index, Mux(req.cmd === SimpleBusCmd.writeBurst || req.isWriteLast(), writeL2BeatCnt.value, addr.wordIndex)), waymask = io.in.bits.waymask) + + val metaHitWriteBus = Wire(CacheMetaArrayWriteBus()).apply( + valid = hitWrite && !meta.dirty, setIdx = getMetaIdx(req.addr), waymask = io.in.bits.waymask, + data = Wire(new MetaBundle).apply(tag = meta.tag, valid = true.B, dirty = (!ro).B) + ) + + val s_idle :: s_memReadReq :: s_memReadResp :: s_memWriteReq :: s_memWriteResp :: s_mmioReq :: s_mmioResp :: s_wait_resp :: s_release :: Nil = Enum(9) + val state = RegInit(s_idle) + val needFlush = RegInit(false.B) + + when (io.flush && (state =/= s_idle)) { needFlush := true.B } + when (io.out.fire() && needFlush) { needFlush := false.B } + + val readBeatCnt = Counter(LineBeats) + val writeBeatCnt = Counter(LineBeats) + + val s2_idle :: s2_dataReadWait :: s2_dataOK :: Nil = Enum(3) + val state2 = RegInit(s2_idle) + + io.dataReadBus.apply(valid = (state === s_memWriteReq || state === s_release) && (state2 === s2_idle), + setIdx = Cat(addr.index, Mux(state === s_release, readBeatCnt.value, writeBeatCnt.value))) + val dataWay = RegEnable(io.dataReadBus.resp.data, state2 === s2_dataReadWait) + val dataHitWay = Mux1H(io.in.bits.waymask, dataWay).data + + switch (state2) { + is (s2_idle) { when (io.dataReadBus.req.fire()) { state2 := s2_dataReadWait } } + is (s2_dataReadWait) { state2 := s2_dataOK } + is (s2_dataOK) { when (io.mem.req.fire() || io.cohResp.fire() || hitReadBurst && io.out.ready) { state2 := s2_idle } } + } + + // critical word first read + val raddr = (if (XLEN == 64) Cat(req.addr(PAddrBits-1,3), 0.U(3.W)) + else Cat(req.addr(PAddrBits-1,2), 0.U(2.W))) + // dirty block addr + val waddr = Cat(meta.tag, addr.index, 0.U(OffsetBits.W)) + val cmd = Mux(state === s_memReadReq, SimpleBusCmd.readBurst, + Mux((writeBeatCnt.value === (LineBeats - 1).U), SimpleBusCmd.writeLast, SimpleBusCmd.writeBurst)) + io.mem.req.bits.apply(addr = Mux(state === s_memReadReq, raddr, waddr), + cmd = cmd, size = (if (XLEN == 64) "b11".U else "b10".U), + wdata = dataHitWay, wmask = Fill(DataBytes, 1.U)) + + io.mem.resp.ready := true.B + io.mem.req.valid := (state === s_memReadReq) || ((state === s_memWriteReq) && (state2 === s2_dataOK)) + + // mmio + io.mmio.req.bits := req + io.mmio.resp.ready := true.B + io.mmio.req.valid := (state === s_mmioReq) + + val afterFirstRead = RegInit(false.B) + val alreadyOutFire = RegEnable(true.B, init = false.B, io.out.fire()) + val readingFirst = !afterFirstRead && io.mem.resp.fire() && (state === s_memReadResp) + val inRdataRegDemand = RegEnable(Mux(mmio, io.mmio.resp.bits.rdata, io.mem.resp.bits.rdata), + Mux(mmio, state === s_mmioResp, readingFirst)) + + // probe + io.cohResp.valid := ((state === s_idle) && probe) || + ((state === s_release) && (state2 === s2_dataOK)) + io.cohResp.bits.rdata := dataHitWay + val releaseLast = Counter(state === s_release && io.cohResp.fire(), LineBeats)._2 + io.cohResp.bits.cmd := Mux(state === s_release, Mux(releaseLast, SimpleBusCmd.readLast, 0.U), + Mux(hit, SimpleBusCmd.probeHit, SimpleBusCmd.probeMiss)) + + val respToL1Fire = hitReadBurst && io.out.ready && state2 === s2_dataOK + val respToL1Last = Counter((state === s_idle || state === s_release && state2 === s2_dataOK) && hitReadBurst && io.out.ready, LineBeats)._2 + + switch (state) { + is (s_idle) { + afterFirstRead := false.B + alreadyOutFire := false.B + + when (probe) { + when (io.cohResp.fire()) { + state := Mux(hit, s_release, s_idle) + readBeatCnt.value := addr.wordIndex + } + } .elsewhen (hitReadBurst && io.out.ready) { + state := s_release + readBeatCnt.value := Mux(addr.wordIndex === (LineBeats - 1).U, 0.U, (addr.wordIndex + 1.U)) + } .elsewhen ((miss || mmio) && !io.flush) { + state := Mux(mmio, s_mmioReq, Mux(!ro.B && meta.dirty, s_memWriteReq, s_memReadReq)) + } + } + + is (s_mmioReq) { when (io.mmio.req.fire()) { state := s_mmioResp } } + is (s_mmioResp) { when (io.mmio.resp.fire()) { state := s_wait_resp } } + + is (s_release) { + when (io.cohResp.fire() || respToL1Fire) { readBeatCnt.inc() } + when (probe && io.cohResp.fire() && releaseLast || respToL1Fire && respToL1Last) { state := s_idle } + } + + is (s_memReadReq) { when (io.mem.req.fire()) { + state := s_memReadResp + readBeatCnt.value := addr.wordIndex + }} + + is (s_memReadResp) { + when (io.mem.resp.fire()) { + afterFirstRead := true.B + readBeatCnt.inc() + when (req.cmd === SimpleBusCmd.writeBurst) { writeL2BeatCnt.value := 0.U } + when (io.mem.resp.bits.isReadLast()) { state := s_wait_resp } + } + } + + is (s_memWriteReq) { + when (io.mem.req.fire()) { writeBeatCnt.inc() } + when (io.mem.req.bits.isWriteLast() && io.mem.req.fire()) { state := s_memWriteResp } + } + + is (s_memWriteResp) { when (io.mem.resp.fire()) { state := s_memReadReq } } + is (s_wait_resp) { when (io.out.fire() || needFlush || alreadyOutFire) { state := s_idle } } + } + + val dataRefill = MaskData(io.mem.resp.bits.rdata, req.wdata, Mux(readingFirst, wordMask, 0.U(DataBits.W))) + val dataRefillWriteBus = Wire(CacheDataArrayWriteBus).apply( + valid = (state === s_memReadResp) && io.mem.resp.fire(), setIdx = Cat(addr.index, readBeatCnt.value), + data = Wire(new DataBundle).apply(dataRefill), waymask = io.in.bits.waymask) + + dataWriteArb.io.in(0) <> dataHitWriteBus.req + dataWriteArb.io.in(1) <> dataRefillWriteBus.req + io.dataWriteBus.req <> dataWriteArb.io.out + + val metaRefillWriteBus = Wire(CacheMetaArrayWriteBus()).apply( + valid = (state === s_memReadResp) && io.mem.resp.fire() && io.mem.resp.bits.isReadLast(), + data = Wire(new MetaBundle).apply(valid = true.B, tag = addr.tag, dirty = !ro.B && req.isWrite()), + setIdx = getMetaIdx(req.addr), waymask = io.in.bits.waymask + ) + + metaWriteArb.io.in(0) <> metaHitWriteBus.req + metaWriteArb.io.in(1) <> metaRefillWriteBus.req + io.metaWriteBus.req <> metaWriteArb.io.out + + if (cacheLevel == 2) { + when ((state === s_memReadResp) && io.mem.resp.fire() && req.isReadBurst()) { + // readBurst request miss + io.out.bits.rdata := dataRefill + io.out.bits.cmd := Mux(io.mem.resp.bits.isReadLast(), SimpleBusCmd.readLast, SimpleBusCmd.readBurst) + }.elsewhen (req.isWriteLast() || req.cmd === SimpleBusCmd.writeBurst) { + // writeBurst/writeLast request, no matter hit or miss + io.out.bits.rdata := Mux(hit, dataRead, inRdataRegDemand) + io.out.bits.cmd := DontCare + }.elsewhen (hitReadBurst && state === s_release) { + // readBurst request hit + io.out.bits.rdata := dataHitWay + io.out.bits.cmd := Mux(respToL1Last, SimpleBusCmd.readLast, SimpleBusCmd.readBurst) + }.otherwise { + io.out.bits.rdata := Mux(hit, dataRead, inRdataRegDemand) + io.out.bits.cmd := req.cmd + } + } else { + io.out.bits.rdata := Mux(hit, dataRead, inRdataRegDemand) + io.out.bits.cmd := Mux(io.in.bits.req.isRead(), SimpleBusCmd.readLast, Mux(io.in.bits.req.isWrite(), SimpleBusCmd.writeResp, DontCare))//DontCare, added by lemover + } + io.out.bits.user.zip(req.user).map { case (o,i) => o := i } + io.out.bits.id.zip(req.id).map { case (o,i) => o := i } + + io.out.valid := io.in.valid && Mux(req.isBurst() && (cacheLevel == 2).B, + Mux(req.isWrite() && (hit || !hit && state === s_wait_resp), true.B, (state === s_memReadResp && io.mem.resp.fire() && req.cmd === SimpleBusCmd.readBurst)) || (respToL1Fire && respToL1Last && state === s_release), + Mux(probe, false.B, Mux(hit, true.B, Mux(req.isWrite() || mmio, state === s_wait_resp, afterFirstRead && !alreadyOutFire))) + ) + + // With critical-word first, the pipeline registers between + // s2 and s3 can not be overwritten before a missing request + // is totally handled. We use io.isFinish to indicate when the + // request really ends. + io.isFinish := Mux(probe, io.cohResp.fire() && Mux(miss, state === s_idle, (state === s_release) && releaseLast), + Mux(hit || req.isWrite(), io.out.fire(), (state === s_wait_resp) && (io.out.fire() || alreadyOutFire)) + ) + + io.in.ready := io.out.ready && (state === s_idle && !hitReadBurst) && !miss && !probe + io.dataReadRespToL1 := hitReadBurst && (state === s_idle && io.out.ready || state === s_release && state2 === s2_dataOK) + + assert(!(metaHitWriteBus.req.valid && metaRefillWriteBus.req.valid)) + assert(!(dataHitWriteBus.req.valid && dataRefillWriteBus.req.valid)) + assert(!(!ro.B && io.flush), "only allow to flush icache") + Debug(" metaread idx %x waymask %b metas %x%x:%x %x%x:%x %x%x:%x %x%x:%x %x\n", getMetaIdx(req.addr), io.in.bits.waymask.asUInt, io.in.bits.metas(0).valid, io.in.bits.metas(0).dirty, io.in.bits.metas(0).tag, io.in.bits.metas(1).valid, io.in.bits.metas(1).dirty, io.in.bits.metas(1).tag, io.in.bits.metas(2).valid, io.in.bits.metas(2).dirty, io.in.bits.metas(2).tag, io.in.bits.metas(3).valid, io.in.bits.metas(3).dirty, io.in.bits.metas(3).tag, io.in.bits.datas.asUInt) + Debug(io.metaWriteBus.req.fire(), "%d: [" + cacheName + " S3]: metawrite idx %x wmask %b meta %x%x:%x\n", GTimer(), io.metaWriteBus.req.bits.setIdx, io.metaWriteBus.req.bits.waymask.get, io.metaWriteBus.req.bits.data.valid, io.metaWriteBus.req.bits.data.dirty, io.metaWriteBus.req.bits.data.tag) + Debug(" in.ready = %d, in.valid = %d, hit = %x, state = %d, addr = %x cmd:%d probe:%d isFinish:%d\n", io.in.ready, io.in.valid, hit, state, req.addr, req.cmd, probe, io.isFinish) + Debug(" out.valid:%d rdata:%x cmd:%d user:%x id:%x \n", io.out.valid, io.out.bits.rdata, io.out.bits.cmd, io.out.bits.user.getOrElse(0.U), io.out.bits.id.getOrElse(0.U)) + Debug(" DHW: (%d, %d), data:%x setIdx:%x MHW:(%d, %d)\n", dataHitWriteBus.req.valid, dataHitWriteBus.req.ready, dataHitWriteBus.req.bits.data.asUInt, dataHitWriteBus.req.bits.setIdx, metaHitWriteBus.req.valid, metaHitWriteBus.req.ready) + Debug(" DreadCache: %x \n", io.in.bits.datas.asUInt) + Debug(" useFD:%d isFD:%d FD:%x DreadArray:%x dataRead:%x inwaymask:%x FDwaymask:%x \n", useForwardData, io.in.bits.isForwardData, io.in.bits.forwardData.data.data, dataReadArray, dataRead, io.in.bits.waymask, io.in.bits.forwardData.waymask.getOrElse("b1".U)) + Debug(io.dataWriteBus.req.fire(), "[WB] waymask: %b data:%x setIdx:%x\n", + io.dataWriteBus.req.bits.waymask.get.asUInt, io.dataWriteBus.req.bits.data.asUInt, io.dataWriteBus.req.bits.setIdx) + Debug((state === s_memWriteReq) && io.mem.req.fire(), "[COUTW] cnt %x addr %x data %x cmd %x size %x wmask %x tag %x idx %x waymask %b \n", writeBeatCnt.value, io.mem.req.bits.addr, io.mem.req.bits.wdata, io.mem.req.bits.cmd, io.mem.req.bits.size, io.mem.req.bits.wmask, addr.tag, getMetaIdx(req.addr), io.in.bits.waymask) + Debug((state === s_memReadReq) && io.mem.req.fire(), "[COUTR] addr %x tag %x idx %x waymask %b \n", io.mem.req.bits.addr, addr.tag, getMetaIdx(req.addr), io.in.bits.waymask) + Debug((state === s_memReadResp) && io.mem.resp.fire(), "[COUTR] cnt %x data %x tag %x idx %x waymask %b \n", readBeatCnt.value, io.mem.resp.bits.rdata, addr.tag, getMetaIdx(req.addr), io.in.bits.waymask) +} diff --git a/src/main/scala/nutcore/mem/tlb/Embedded.scala b/src/main/scala/nutcore/mem/tlb/Embedded.scala new file mode 100644 index 000000000..6c520a303 --- /dev/null +++ b/src/main/scala/nutcore/mem/tlb/Embedded.scala @@ -0,0 +1,211 @@ +/************************************************************************************** +* Copyright (c) 2020 Institute of Computing Technology, CAS +* Copyright (c) 2020 University of Chinese Academy of Sciences +* +* NutShell is licensed under Mulan PSL v2. +* You can use this software according to the terms and conditions of the Mulan PSL v2. +* You may obtain a copy of Mulan PSL v2 at: +* http://license.coscl.org.cn/MulanPSL2 +* +* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER +* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR +* FIT FOR A PARTICULAR PURPOSE. +* +* See the Mulan PSL v2 for more details. +***************************************************************************************/ + +package nutcore.mem.tlb + +import chisel3._ +import chisel3.util._ +import chisel3.util.experimental.BoringUtils + +import nutcore._ + +import bus.simplebus._ +import bus.axi4._ +import chisel3.experimental.IO +import utils._ +import top.Settings + +trait HasTLBIO extends HasNutCoreParameter with HasTlbConst with HasCSRConst { + class TLBIO extends Bundle { + val in = Flipped(new SimpleBusUC(userBits = userBits, addrBits = VAddrBits)) + val out = new SimpleBusUC(userBits = userBits) + + val mem = new SimpleBusUC() + val flush = Input(Bool()) + val csrMMU = new MMUIO + val cacheEmpty = Input(Bool()) + val ipf = Output(Bool()) + } + val io = IO(new TLBIO) +} + +// Duplicate with TLBMD. Consider eliminate one. +class EmbeddedTLBMD(implicit val tlbConfig: TLBConfig) extends TlbModule { + val io = IO(new Bundle { + val tlbmd = Output(Vec(Ways, UInt(tlbLen.W))) + val write = Flipped(new TLBMDWriteBundle(IndexBits = IndexBits, Ways = Ways, tlbLen = tlbLen)) + val rindex = Input(UInt(IndexBits.W)) + val ready = Output(Bool()) + }) + + //val tlbmd = Reg(Vec(Ways, UInt(tlbLen.W))) + val tlbmd = Mem(Sets, Vec(Ways, UInt(tlbLen.W))) + io.tlbmd := tlbmd(io.rindex) + + //val reset = WireInit(false.B) + val resetState = RegInit(true.B)//RegEnable(true.B, init = true.B, reset) + val (resetSet, resetFinish) = Counter(resetState, Sets) + when (resetFinish) { resetState := false.B } + + val writeWen = io.write.wen//WireInit(false.B) + val writeSetIdx = io.write.windex + val writeWayMask = io.write.waymask + val writeData = io.write.wdata + + val wen = Mux(resetState, true.B, writeWen) + val setIdx = Mux(resetState, resetSet, writeSetIdx) + val waymask = Mux(resetState, Fill(Ways, "b1".U), writeWayMask) + val dataword = Mux(resetState, 0.U, writeData) + val wdata = VecInit(Seq.fill(Ways)(dataword)) + + when (wen) { tlbmd.write(setIdx, wdata, waymask.asBools) } + + io.ready := !resetState + def rready() = !resetState + def wready() = !resetState +} + +class EmbeddedTLB(implicit val tlbConfig: TLBConfig) extends TlbModule with HasTLBIO { + + val satp = WireInit(0.U(XLEN.W)) + BoringUtils.addSink(satp, "CSRSATP") + + // tlb exec + val tlbExec = Module(new EmbeddedTLBExec) + val tlbEmpty = Module(new EmbeddedTLBEmpty) + val mdTLB = Module(new EmbeddedTLBMD) + val mdUpdate = Wire(Bool()) + + tlbExec.io.flush := io.flush + tlbExec.io.satp := satp + tlbExec.io.mem <> io.mem + tlbExec.io.pf <> io.csrMMU + tlbExec.io.md <> RegEnable(mdTLB.io.tlbmd, mdUpdate) + tlbExec.io.mdReady := mdTLB.io.ready + mdTLB.io.rindex := getIndex(io.in.req.bits.addr) + mdTLB.io.write <> tlbExec.io.mdWrite + + io.ipf := false.B + + // meta reset + val flushTLB = WireInit(false.B) + BoringUtils.addSink(flushTLB, "MOUFlushTLB") + mdTLB.reset := reset.asBool || flushTLB + + // VM enable && io + val vmEnable = satp.asTypeOf(satpBundle).mode === 8.U && (io.csrMMU.priviledgeMode < ModeM) + + def PipelineConnectTLB[T <: Data](left: DecoupledIO[T], right: DecoupledIO[T], update: Bool, rightOutFire: Bool, isFlush: Bool, vmEnable: Bool) = { + val valid = RegInit(false.B) + when (rightOutFire) { valid := false.B } + when (left.valid && right.ready && vmEnable) { valid := true.B } + when (isFlush) { valid := false.B } + + left.ready := right.ready + right.bits <> RegEnable(left.bits, left.valid && right.ready) + right.valid := valid //&& !isFlush + + update := left.valid && right.ready + } + + tlbEmpty.io.in <> DontCare + tlbEmpty.io.out.ready := DontCare + PipelineConnectTLB(io.in.req, tlbExec.io.in, mdUpdate, tlbExec.io.isFinish, io.flush, vmEnable) + if(tlbname == "dtlb") { + PipelineConnect(tlbExec.io.out, tlbEmpty.io.in, tlbEmpty.io.out.fire(), io.flush) + } + when(!vmEnable) { + tlbExec.io.out.ready := true.B // let existed request go out + if( tlbname == "dtlb") { tlbEmpty.io.out.ready := true.B } + io.out.req.valid := io.in.req.valid + io.in.req.ready := io.out.req.ready + io.out.req.bits.addr := io.in.req.bits.addr(PAddrBits-1, 0) + io.out.req.bits.size := io.in.req.bits.size + io.out.req.bits.cmd := io.in.req.bits.cmd + io.out.req.bits.wmask := io.in.req.bits.wmask + io.out.req.bits.wdata := io.in.req.bits.wdata + io.out.req.bits.user.map(_ := io.in.req.bits.user.getOrElse(0.U)) + }.otherwise { + if (tlbname == "dtlb") { io.out.req <> tlbEmpty.io.out} + else { io.out.req <> tlbExec.io.out } + } + io.out.resp <> io.in.resp + + // lsu need dtlb signals + if(tlbname == "dtlb") { + val alreadyOutFinish = RegEnable(true.B, init=false.B, tlbExec.io.out.valid && !tlbExec.io.out.ready) + when(alreadyOutFinish && tlbExec.io.out.fire()) { alreadyOutFinish := false.B} + val tlbFinish = (tlbExec.io.out.valid && !alreadyOutFinish) || tlbExec.io.pf.isPF() + BoringUtils.addSource(tlbFinish, "DTLBFINISH") + BoringUtils.addSource(io.csrMMU.isPF(), "DTLBPF") + BoringUtils.addSource(vmEnable, "DTLBENABLE") + } + + // instruction page fault + if (tlbname == "itlb") { + when (tlbExec.io.ipf && vmEnable) { + tlbExec.io.out.ready := io.cacheEmpty && io.in.resp.ready + io.out.req.valid := false.B + } + + when (tlbExec.io.ipf && vmEnable && io.cacheEmpty) { + io.in.resp.valid := true.B + io.in.resp.bits.rdata := 0.U + io.in.resp.bits.cmd := SimpleBusCmd.readLast + io.in.resp.bits.user.map(_ := tlbExec.io.in.bits.user.getOrElse(0.U)) + io.ipf := tlbExec.io.ipf + } + } + + Debug("InReq(%d, %d) InResp(%d, %d) OutReq(%d, %d) OutResp(%d, %d) vmEnable:%d mode:%d\n", io.in.req.valid, io.in.req.ready, io.in.resp.valid, io.in.resp.ready, io.out.req.valid, io.out.req.ready, io.out.resp.valid, io.out.resp.ready, vmEnable, io.csrMMU.priviledgeMode) + Debug("InReq: addr:%x cmd:%d wdata:%x OutReq: addr:%x cmd:%x wdata:%x\n", io.in.req.bits.addr, io.in.req.bits.cmd, io.in.req.bits.wdata, io.out.req.bits.addr, io.out.req.bits.cmd, io.out.req.bits.wdata) + Debug("OutResp: rdata:%x cmd:%x Inresp: rdata:%x cmd:%x\n", io.out.resp.bits.rdata, io.out.resp.bits.cmd, io.in.resp.bits.rdata, io.in.resp.bits.cmd) + Debug("satp:%x flush:%d cacheEmpty:%d instrPF:%d loadPF:%d storePF:%d \n", satp, io.flush, io.cacheEmpty, io.ipf, io.csrMMU.loadPF, io.csrMMU.storePF) +} + +class EmbeddedTLBEmpty(implicit val tlbConfig: TLBConfig) extends TlbModule { + val io = IO(new Bundle { + val in = Flipped(Decoupled(new SimpleBusReqBundle(userBits = userBits))) + val out = Decoupled(new SimpleBusReqBundle(userBits = userBits)) + }) + + io.out <> io.in +} + +class EmbeddedTLB_fake(implicit val tlbConfig: TLBConfig) extends TlbModule with HasTLBIO { + io.mem <> DontCare + io.out <> io.in + io.csrMMU.loadPF := false.B + io.csrMMU.storePF := false.B + io.csrMMU.addr := io.in.req.bits.addr + io.ipf := false.B +} + + +object EmbeddedTLB { + def apply(in: SimpleBusUC, mem: SimpleBusUC, flush: Bool, csrMMU: MMUIO, enable: Boolean = true)(implicit tlbConfig: TLBConfig) = { + val tlb = if (enable) { + Module(new EmbeddedTLB) + } else { + Module(new EmbeddedTLB_fake) + } + tlb.io.in <> in + tlb.io.mem <> mem + tlb.io.flush := flush + tlb.io.csrMMU <> csrMMU + tlb + } +} \ No newline at end of file diff --git a/src/main/scala/nutcore/mem/EmbeddedTLB.scala b/src/main/scala/nutcore/mem/tlb/EmbeddedExec.scala similarity index 59% rename from src/main/scala/nutcore/mem/EmbeddedTLB.scala rename to src/main/scala/nutcore/mem/tlb/EmbeddedExec.scala index e54aa64a1..fef3f02a0 100644 --- a/src/main/scala/nutcore/mem/EmbeddedTLB.scala +++ b/src/main/scala/nutcore/mem/tlb/EmbeddedExec.scala @@ -1,177 +1,18 @@ -/************************************************************************************** -* Copyright (c) 2020 Institute of Computing Technology, CAS -* Copyright (c) 2020 University of Chinese Academy of Sciences -* -* NutShell is licensed under Mulan PSL v2. -* You can use this software according to the terms and conditions of the Mulan PSL v2. -* You may obtain a copy of Mulan PSL v2 at: -* http://license.coscl.org.cn/MulanPSL2 -* -* THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER -* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR -* FIT FOR A PARTICULAR PURPOSE. -* -* See the Mulan PSL v2 for more details. -***************************************************************************************/ -package nutcore +package nutcore.mem.tlb import chisel3._ import chisel3.util._ import chisel3.util.experimental.BoringUtils + +import nutcore._ + import bus.simplebus._ import bus.axi4._ import chisel3.experimental.IO import utils._ import top.Settings -trait HasTLBIO extends HasNutCoreParameter with HasTlbConst with HasCSRConst { - class TLBIO extends Bundle { - val in = Flipped(new SimpleBusUC(userBits = userBits, addrBits = VAddrBits)) - val out = new SimpleBusUC(userBits = userBits) - - val mem = new SimpleBusUC() - val flush = Input(Bool()) - val csrMMU = new MMUIO - val cacheEmpty = Input(Bool()) - val ipf = Output(Bool()) - } - val io = IO(new TLBIO) -} - -class EmbeddedTLBMD(implicit val tlbConfig: TLBConfig) extends TlbModule { - val io = IO(new Bundle { - val tlbmd = Output(Vec(Ways, UInt(tlbLen.W))) - val write = Flipped(new TLBMDWriteBundle(IndexBits = IndexBits, Ways = Ways, tlbLen = tlbLen)) - val rindex = Input(UInt(IndexBits.W)) - val ready = Output(Bool()) - }) - - //val tlbmd = Reg(Vec(Ways, UInt(tlbLen.W))) - val tlbmd = Mem(Sets, Vec(Ways, UInt(tlbLen.W))) - io.tlbmd := tlbmd(io.rindex) - - //val reset = WireInit(false.B) - val resetState = RegInit(true.B)//RegEnable(true.B, init = true.B, reset) - val (resetSet, resetFinish) = Counter(resetState, Sets) - when (resetFinish) { resetState := false.B } - - val writeWen = io.write.wen//WireInit(false.B) - val writeSetIdx = io.write.windex - val writeWayMask = io.write.waymask - val writeData = io.write.wdata - - val wen = Mux(resetState, true.B, writeWen) - val setIdx = Mux(resetState, resetSet, writeSetIdx) - val waymask = Mux(resetState, Fill(Ways, "b1".U), writeWayMask) - val dataword = Mux(resetState, 0.U, writeData) - val wdata = VecInit(Seq.fill(Ways)(dataword)) - - when (wen) { tlbmd.write(setIdx, wdata, waymask.asBools) } - - io.ready := !resetState - def rready() = !resetState - def wready() = !resetState -} - -class EmbeddedTLB(implicit val tlbConfig: TLBConfig) extends TlbModule with HasTLBIO { - - val satp = WireInit(0.U(XLEN.W)) - BoringUtils.addSink(satp, "CSRSATP") - - // tlb exec - val tlbExec = Module(new EmbeddedTLBExec) - val tlbEmpty = Module(new EmbeddedTLBEmpty) - val mdTLB = Module(new EmbeddedTLBMD) - val mdUpdate = Wire(Bool()) - - tlbExec.io.flush := io.flush - tlbExec.io.satp := satp - tlbExec.io.mem <> io.mem - tlbExec.io.pf <> io.csrMMU - tlbExec.io.md <> RegEnable(mdTLB.io.tlbmd, mdUpdate) - tlbExec.io.mdReady := mdTLB.io.ready - mdTLB.io.rindex := getIndex(io.in.req.bits.addr) - mdTLB.io.write <> tlbExec.io.mdWrite - - io.ipf := false.B - - // meta reset - val flushTLB = WireInit(false.B) - BoringUtils.addSink(flushTLB, "MOUFlushTLB") - mdTLB.reset := reset.asBool || flushTLB - - // VM enable && io - val vmEnable = satp.asTypeOf(satpBundle).mode === 8.U && (io.csrMMU.priviledgeMode < ModeM) - - def PipelineConnectTLB[T <: Data](left: DecoupledIO[T], right: DecoupledIO[T], update: Bool, rightOutFire: Bool, isFlush: Bool, vmEnable: Bool) = { - val valid = RegInit(false.B) - when (rightOutFire) { valid := false.B } - when (left.valid && right.ready && vmEnable) { valid := true.B } - when (isFlush) { valid := false.B } - - left.ready := right.ready - right.bits <> RegEnable(left.bits, left.valid && right.ready) - right.valid := valid //&& !isFlush - - update := left.valid && right.ready - } - - tlbEmpty.io.in <> DontCare - tlbEmpty.io.out.ready := DontCare - PipelineConnectTLB(io.in.req, tlbExec.io.in, mdUpdate, tlbExec.io.isFinish, io.flush, vmEnable) - if(tlbname == "dtlb") { - PipelineConnect(tlbExec.io.out, tlbEmpty.io.in, tlbEmpty.io.out.fire(), io.flush) - } - when(!vmEnable) { - tlbExec.io.out.ready := true.B // let existed request go out - if( tlbname == "dtlb") { tlbEmpty.io.out.ready := true.B } - io.out.req.valid := io.in.req.valid - io.in.req.ready := io.out.req.ready - io.out.req.bits.addr := io.in.req.bits.addr(PAddrBits-1, 0) - io.out.req.bits.size := io.in.req.bits.size - io.out.req.bits.cmd := io.in.req.bits.cmd - io.out.req.bits.wmask := io.in.req.bits.wmask - io.out.req.bits.wdata := io.in.req.bits.wdata - io.out.req.bits.user.map(_ := io.in.req.bits.user.getOrElse(0.U)) - }.otherwise { - if (tlbname == "dtlb") { io.out.req <> tlbEmpty.io.out} - else { io.out.req <> tlbExec.io.out } - } - io.out.resp <> io.in.resp - - // lsu need dtlb signals - if(tlbname == "dtlb") { - val alreadyOutFinish = RegEnable(true.B, init=false.B, tlbExec.io.out.valid && !tlbExec.io.out.ready) - when(alreadyOutFinish && tlbExec.io.out.fire()) { alreadyOutFinish := false.B} - val tlbFinish = (tlbExec.io.out.valid && !alreadyOutFinish) || tlbExec.io.pf.isPF() - BoringUtils.addSource(tlbFinish, "DTLBFINISH") - BoringUtils.addSource(io.csrMMU.isPF(), "DTLBPF") - BoringUtils.addSource(vmEnable, "DTLBENABLE") - } - - // instruction page fault - if (tlbname == "itlb") { - when (tlbExec.io.ipf && vmEnable) { - tlbExec.io.out.ready := io.cacheEmpty && io.in.resp.ready - io.out.req.valid := false.B - } - - when (tlbExec.io.ipf && vmEnable && io.cacheEmpty) { - io.in.resp.valid := true.B - io.in.resp.bits.rdata := 0.U - io.in.resp.bits.cmd := SimpleBusCmd.readLast - io.in.resp.bits.user.map(_ := tlbExec.io.in.bits.user.getOrElse(0.U)) - io.ipf := tlbExec.io.ipf - } - } - - Debug("InReq(%d, %d) InResp(%d, %d) OutReq(%d, %d) OutResp(%d, %d) vmEnable:%d mode:%d\n", io.in.req.valid, io.in.req.ready, io.in.resp.valid, io.in.resp.ready, io.out.req.valid, io.out.req.ready, io.out.resp.valid, io.out.resp.ready, vmEnable, io.csrMMU.priviledgeMode) - Debug("InReq: addr:%x cmd:%d wdata:%x OutReq: addr:%x cmd:%x wdata:%x\n", io.in.req.bits.addr, io.in.req.bits.cmd, io.in.req.bits.wdata, io.out.req.bits.addr, io.out.req.bits.cmd, io.out.req.bits.wdata) - Debug("OutResp: rdata:%x cmd:%x Inresp: rdata:%x cmd:%x\n", io.out.resp.bits.rdata, io.out.resp.bits.cmd, io.in.resp.bits.rdata, io.in.resp.bits.cmd) - Debug("satp:%x flush:%d cacheEmpty:%d instrPF:%d loadPF:%d storePF:%d \n", satp, io.flush, io.cacheEmpty, io.ipf, io.csrMMU.loadPF, io.csrMMU.storePF) -} - class EmbeddedTLBExec(implicit val tlbConfig: TLBConfig) extends TlbModule{ val io = IO(new Bundle { val in = Flipped(Decoupled(new SimpleBusReqBundle(userBits = userBits, addrBits = VAddrBits))) @@ -395,38 +236,4 @@ class EmbeddedTLBExec(implicit val tlbConfig: TLBConfig) extends TlbModule{ Debug("md: wen:%d windex:%x waymask:%x vpn:%x asid:%x mask:%x flag:%x asid:%x ppn:%x pteaddr:%x\n", io.mdWrite.wen, io.mdWrite.windex, io.mdWrite.waymask, io.mdWrite.wdata.asTypeOf(tlbBundle).vpn, io.mdWrite.wdata.asTypeOf(tlbBundle).asid, io.mdWrite.wdata.asTypeOf(tlbBundle).mask, io.mdWrite.wdata.asTypeOf(tlbBundle).flag, io.mdWrite.wdata.asTypeOf(tlbBundle).asid, io.mdWrite.wdata.asTypeOf(tlbBundle).ppn, io.mdWrite.wdata.asTypeOf(tlbBundle).pteaddr) Debug("MemReq(%d, %d) MemResp(%d, %d) addr:%x cmd:%d rdata:%x cmd:%d\n", io.mem.req.valid, io.mem.req.ready, io.mem.resp.valid, io.mem.resp.ready, io.mem.req.bits.addr, io.mem.req.bits.cmd, io.mem.resp.bits.rdata, io.mem.resp.bits.cmd) Debug("io.ipf:%d hitinstrPF:%d missIPF:%d pf.loadPF:%d pf.storePF:%d loadPF:%d storePF:%d\n", io.ipf, hitinstrPF, missIPF, io.pf.loadPF, io.pf.storePF, loadPF, storePF) -} - -class EmbeddedTLBEmpty(implicit val tlbConfig: TLBConfig) extends TlbModule { - val io = IO(new Bundle { - val in = Flipped(Decoupled(new SimpleBusReqBundle(userBits = userBits))) - val out = Decoupled(new SimpleBusReqBundle(userBits = userBits)) - }) - - io.out <> io.in -} - -class EmbeddedTLB_fake(implicit val tlbConfig: TLBConfig) extends TlbModule with HasTLBIO { - io.mem <> DontCare - io.out <> io.in - io.csrMMU.loadPF := false.B - io.csrMMU.storePF := false.B - io.csrMMU.addr := io.in.req.bits.addr - io.ipf := false.B -} - - -object EmbeddedTLB { - def apply(in: SimpleBusUC, mem: SimpleBusUC, flush: Bool, csrMMU: MMUIO, enable: Boolean = true)(implicit tlbConfig: TLBConfig) = { - val tlb = if (enable) { - Module(new EmbeddedTLB) - } else { - Module(new EmbeddedTLB_fake) - } - tlb.io.in <> in - tlb.io.mem <> mem - tlb.io.flush := flush - tlb.io.csrMMU <> csrMMU - tlb - } } \ No newline at end of file diff --git a/src/main/scala/nutcore/mem/TLB.scala b/src/main/scala/nutcore/mem/tlb/TLB.scala similarity index 50% rename from src/main/scala/nutcore/mem/TLB.scala rename to src/main/scala/nutcore/mem/tlb/TLB.scala index bcd452174..5d1c71564 100644 --- a/src/main/scala/nutcore/mem/TLB.scala +++ b/src/main/scala/nutcore/mem/tlb/TLB.scala @@ -14,18 +14,20 @@ * See the Mulan PSL v2 for more details. ***************************************************************************************/ -package nutcore +package nutcore.mem.tlb import chisel3._ import chisel3.util._ import chisel3.util.experimental.BoringUtils +import nutcore._ + import bus.simplebus._ import bus.axi4._ import utils._ import top.Settings -sealed trait Sv39Const extends HasNutCoreParameter{ +trait Sv39Const extends HasNutCoreParameter{ val Level = 3 val offLen = 12 val ppn0Len = 9 @@ -133,7 +135,7 @@ sealed trait Sv39Const extends HasNutCoreParameter{ } -sealed case class TLBConfig ( +case class TLBConfig ( name: String = "tlb", userBits: Int = 0, @@ -204,7 +206,7 @@ trait HasTlbConst extends Sv39Const{ abstract class TlbBundle(implicit tlbConfig: TLBConfig) extends NutCoreBundle with HasNutCoreParameter with HasTlbConst with Sv39Const abstract class TlbModule(implicit tlbConfig: TLBConfig) extends NutCoreModule with HasNutCoreParameter with HasTlbConst with Sv39Const with HasCSRConst -sealed class TLBMDWriteBundle (val IndexBits: Int, val Ways: Int, val tlbLen: Int) extends Bundle with HasNutCoreParameter with Sv39Const { +class TLBMDWriteBundle (val IndexBits: Int, val Ways: Int, val tlbLen: Int) extends Bundle with HasNutCoreParameter with Sv39Const { val wen = Output(Bool()) val windex = Output(UInt(IndexBits.W)) val waymask = Output(UInt(Ways.W)) @@ -218,7 +220,7 @@ sealed class TLBMDWriteBundle (val IndexBits: Int, val Ways: Int, val tlbLen: In } } -sealed class TLBMD(implicit val tlbConfig: TLBConfig) extends TlbModule { +class TLBMD(implicit val tlbConfig: TLBConfig) extends TlbModule { class TLBMDIO extends Bundle { val tlbmd = Output(Vec(Ways, UInt(tlbLen.W))) val write = Flipped(new TLBMDWriteBundle(IndexBits = IndexBits, Ways = Ways, tlbLen = tlbLen)) @@ -361,247 +363,8 @@ class TLB(implicit val tlbConfig: TLBConfig) extends TlbModule{ Debug("satp:%x flush:%d cacheEmpty:%d instrPF:%d loadPF:%d storePF:%d \n", satp, io.flush, io.cacheEmpty, io.ipf, io.csrMMU.loadPF, io.csrMMU.storePF) } -sealed class TLBExec(implicit val tlbConfig: TLBConfig) extends TlbModule{ - class TLBExecIO extends Bundle { - val in = Flipped(Decoupled(new SimpleBusReqBundle(userBits = userBits, addrBits = VAddrBits))) - val out = Decoupled(new SimpleBusReqBundle(userBits = userBits)) - - val md = Input(Vec(Ways, UInt(tlbLen.W))) - val mdWrite = new TLBMDWriteBundle(IndexBits = IndexBits, Ways = Ways, tlbLen = tlbLen) - val mdReady = Input(Bool()) - - val mem = new SimpleBusUC(userBits = userBits) - val flush = Input(Bool()) - val satp = Input(UInt(XLEN.W)) - val pf = new MMUIO - val ipf = Output(Bool()) - val isFinish = Output(Bool()) - } - val io = IO(new TLBExecIO) - - val md = io.md//RegEnable(mdTLB.io.tlbmd, io.in.ready) - - // lazy renaming - val req = io.in.bits - val vpn = req.addr.asTypeOf(vaBundle2).vpn.asTypeOf(vpnBundle) - val pf = io.pf - val satp = io.satp.asTypeOf(satpBundle) - val ifecth = if(tlbname == "itlb") true.B else false.B - - // pf init - pf.loadPF := false.B - pf.storePF := false.B - pf.addr := req.addr - - // check hit or miss - val hitVec = VecInit(md.map(m => m.asTypeOf(tlbBundle).flag.asTypeOf(flagBundle).v && (m.asTypeOf(tlbBundle).asid === satp.asid) && MaskEQ(m.asTypeOf(tlbBundle).mask, m.asTypeOf(tlbBundle).vpn, vpn.asUInt))).asUInt - val hit = io.in.valid && hitVec.orR - val miss = io.in.valid && !hitVec.orR - - val victimWaymask = if (Ways > 1) (1.U << LFSR64()(log2Up(Ways)-1,0)) else "b1".U - val waymask = Mux(hit, hitVec, victimWaymask) - - val loadPF = WireInit(false.B) - val storePF = WireInit(false.B) - - // hit - val hitMeta = Mux1H(waymask, md).asTypeOf(tlbBundle2).meta.asTypeOf(metaBundle) - val hitData = Mux1H(waymask, md).asTypeOf(tlbBundle2).data.asTypeOf(dataBundle) - val hitFlag = hitMeta.flag.asTypeOf(flagBundle) - val hitMask = hitMeta.mask - // hit write back pte.flag - val hitinstrPF = WireInit(false.B) - val hitWB = hit && (!hitFlag.a || !hitFlag.d && req.isWrite()) && !hitinstrPF && !(loadPF || storePF || io.pf.isPF()) - val hitRefillFlag = Cat(req.isWrite().asUInt, 1.U(1.W), 0.U(6.W)) | hitFlag.asUInt - val hitWBStore = RegEnable(Cat(0.U(10.W), hitData.ppn, 0.U(2.W), hitRefillFlag), hitWB) - - // hit permission check - val hitCheck = hit /*&& hitFlag.v */&& !(pf.priviledgeMode === ModeU && !hitFlag.u) && !(pf.priviledgeMode === ModeS && hitFlag.u && (!pf.status_sum || ifecth)) - val hitExec = hitCheck && hitFlag.x - val hitLoad = hitCheck && (hitFlag.r || pf.status_mxr && hitFlag.x) - val hitStore = hitCheck && hitFlag.w - - io.pf.loadPF := loadPF //RegNext(loadPF, init =false.B) - io.pf.storePF := storePF //RegNext(storePF, init = false.B) - - if (tlbname == "itlb") { hitinstrPF := !hitExec && hit} - if (tlbname == "dtlb") { - loadPF := !hitLoad && req.isRead() && hit - storePF := (!hitStore && req.isWrite() && hit) - // AMO pagefault type will be fixed in LSU - } - - // miss - val s_idle :: s_memReadReq :: s_memReadResp :: s_write_pte :: s_wait_resp :: s_miss_slpf :: Nil = Enum(6) - val state = RegInit(s_idle) - val level = RegInit(Level.U(log2Up(Level).W)) - - val memRespStore = Reg(UInt(XLEN.W)) - val missMask = WireInit("h3ffff".U(maskLen.W)) - val missMaskStore = Reg(UInt(maskLen.W)) - val missMetaRefill = WireInit(false.B) - val missRefillFlag = WireInit(0.U(8.W)) - val memRdata = io.mem.resp.bits.rdata.asTypeOf(pteBundle) - val raddr = Reg(UInt(PAddrBits.W)) - val alreadyOutFire = RegEnable(true.B, init = false.B, if(tlbname == "itlb") io.out.fire else io.out.valid) - - //handle flush - val needFlush = RegInit(false.B) - val ioFlush = io.flush - val isFlush = needFlush || ioFlush - when (ioFlush && (state =/= s_idle)) { needFlush := true.B} - if(tlbname == "itlb"){ - when (io.out.fire() && needFlush) { needFlush := false.B} - } - if(tlbname == "dtlb"){ - when (io.out.valid && needFlush) { needFlush := false.B} - } - - val missIPF = RegInit(false.B) - - // state machine to handle miss(ptw) and pte-writing-back - switch (state) { - is (s_idle) { - when (!ioFlush && hitWB) { - state := s_write_pte - needFlush := false.B - alreadyOutFire := false.B - }.elsewhen (miss && !ioFlush) { - state := s_memReadReq - raddr := paddrApply(satp.ppn, vpn.vpn2) // - level := Level.U - needFlush := false.B - alreadyOutFire := false.B - } - } - - is (s_memReadReq) { - when (isFlush) { - state := s_idle - needFlush := false.B - }.elsewhen (io.mem.req.fire()) { state := s_memReadResp} - } - - is (s_memReadResp) { - val missflag = memRdata.flag.asTypeOf(flagBundle) - when (io.mem.resp.fire()) { - when (isFlush) { - state := s_idle - needFlush := false.B - }.elsewhen (!(missflag.r || missflag.x) && (level===3.U || level===2.U)) { - when(!missflag.v || (!missflag.r && missflag.w)) { //TODO: fix needflush - if(tlbname == "itlb") { state := s_wait_resp } else { state := s_miss_slpf } - if(tlbname == "itlb") { missIPF := true.B } - if(tlbname == "dtlb") { - loadPF := req.isRead() - storePF := req.isWrite() - } - Debug("tlbException!!! ") - Debug(false, p" req:${req} Memreq:${io.mem.req} MemResp:${io.mem.resp}") - Debug(false, " level:%d",level) - Debug(false, "\n") - }.otherwise { - state := s_memReadReq - raddr := paddrApply(memRdata.ppn, Mux(level === 3.U, vpn.vpn1, vpn.vpn0)) - } - }.elsewhen (level =/= 0.U) { //TODO: fix needFlush - val permCheck = missflag.v && !(pf.priviledgeMode === ModeU && !missflag.u) && !(pf.priviledgeMode === ModeS && missflag.u && (!pf.status_sum || ifecth)) - val permExec = permCheck && missflag.x - val permLoad = permCheck && (missflag.r || pf.status_mxr && missflag.x) - val permStore = permCheck && missflag.w - val updateAD = if (Settings.get("FPGAPlatform")) !missflag.a || (!missflag.d && req.isWrite()) else false.B - val updateData = Cat( 0.U(56.W), req.isWrite(), 1.U(1.W), 0.U(6.W) ) - missRefillFlag := Cat(req.isWrite(), 1.U(1.W), 0.U(6.W)) | missflag.asUInt - memRespStore := io.mem.resp.bits.rdata | updateData - if(tlbname == "itlb") { - when (!permExec) { missIPF := true.B ; state := s_wait_resp} - .otherwise { - state := Mux(updateAD, s_write_pte, s_wait_resp) - missMetaRefill := true.B - } - } - if(tlbname == "dtlb") { - when((!permLoad && req.isRead()) || (!permStore && req.isWrite())) { - state := s_miss_slpf - loadPF := req.isRead() - storePF := req.isWrite() - }.otherwise { - state := Mux(updateAD, s_write_pte, s_wait_resp) - missMetaRefill := true.B - } - } - missMask := Mux(level===3.U, 0.U(maskLen.W), Mux(level===2.U, "h3fe00".U(maskLen.W), "h3ffff".U(maskLen.W))) - missMaskStore := missMask - } - level := level - 1.U - } - } - - is (s_write_pte) { - when (isFlush) { - state := s_idle - needFlush := false.B - }.elsewhen (io.mem.req.fire()) { state := s_wait_resp } - } - - is (s_wait_resp) { - if(tlbname == "itlb"){ - when (io.out.fire() || ioFlush || alreadyOutFire){ - state := s_idle - missIPF := false.B - alreadyOutFire := false.B - } - } - if(tlbname == "dtlb"){ - state := s_idle - missIPF := false.B - alreadyOutFire := false.B - } - } - - is (s_miss_slpf) { - state := s_idle - } - } - - // mem - val cmd = Mux(state === s_write_pte, SimpleBusCmd.write, SimpleBusCmd.read) - io.mem.req.bits.apply(addr = Mux(hitWB, hitData.pteaddr, raddr), cmd = cmd, size = (if (XLEN == 64) "b11".U else "b10".U), wdata = Mux( hitWB, hitWBStore, memRespStore), wmask = 0xff.U) - io.mem.req.valid := ((state === s_memReadReq || state === s_write_pte) && !isFlush) - io.mem.resp.ready := true.B - - // tlb refill - io.mdWrite.apply(wen = RegNext((missMetaRefill && !isFlush) || (hitWB && state === s_idle && !isFlush), init = false.B), - windex = RegNext(getIndex(req.addr)), waymask = RegNext(waymask), vpn = RegNext(vpn.asUInt), - asid = RegNext(Mux(hitWB, hitMeta.asid, satp.asid)), mask = RegNext(Mux(hitWB, hitMask, missMask)), - flag = RegNext(Mux(hitWB, hitRefillFlag, missRefillFlag)), ppn = RegNext(Mux(hitWB, hitData.ppn, memRdata.ppn)), - pteaddr = RegNext((Mux(hitWB, hitData.pteaddr, raddr)))) - - // io - io.out.bits := req - io.out.bits.addr := Mux(hit, maskPaddr(hitData.ppn, req.addr(PAddrBits-1, 0), hitMask), maskPaddr(memRespStore.asTypeOf(pteBundle).ppn, req.addr(PAddrBits-1, 0), missMaskStore)) - io.out.valid := io.in.valid && Mux(hit && !hitWB, !(io.pf.isPF() || loadPF || storePF), state === s_wait_resp)// && !alreadyOutFire - - io.in.ready := io.out.ready && (state === s_idle) && !miss && !hitWB && io.mdReady && (!io.pf.isPF() && !loadPF && !storePF)//maybe be optimized - - io.ipf := Mux(hit, hitinstrPF, missIPF) - io.isFinish := io.out.fire() || io.pf.isPF() - - if(tlbname == "dtlb") { - io.isFinish := io.out.valid || io.pf.isPF() - io.out.valid := io.in.valid && (Mux(hit && !hitWB, true.B, state === s_wait_resp) || loadPF || storePF)// && !alreadyOutFire - } - Debug("In(%d, %d) Out(%d, %d) InAddr:%x OutAddr:%x cmd:%d \n", io.in.valid, io.in.ready, io.out.valid, io.out.ready, req.addr, io.out.bits.addr, req.cmd) - Debug("io.Flush:%d needFlush:%d alreadyOutFire:%d isFinish:%d\n", io.flush, needFlush, alreadyOutFire, io.isFinish) - Debug("hit:%d hitWB:%d hitVPN:%x hitFlag:%x hitPPN:%x hitRefillFlag:%x hitWBStore:%x hitCheck:%d hitExec:%d hitLoad:%d hitStore:%d\n", hit, hitWB, hitMeta.vpn, hitFlag.asUInt, hitData.ppn, hitRefillFlag, hitWBStore, hitCheck, hitExec, hitLoad, hitStore) - Debug("miss:%d state:%d level:%d raddr:%x memRdata:%x missMask:%x missRefillFlag:%x missMetaRefill:%d\n", miss, state, level, raddr, memRdata.asUInt, missMask, missRefillFlag, missMetaRefill) - Debug("meta/data: (0)%x|%b|%x (1)%x|%b|%x (2)%x|%b|%x (3)%x|%b|%x rread:%d\n", md(0).asTypeOf(tlbBundle).vpn, md(0).asTypeOf(tlbBundle).flag, md(0).asTypeOf(tlbBundle).ppn, md(1).asTypeOf(tlbBundle).vpn, md(1).asTypeOf(tlbBundle).flag, md(1).asTypeOf(tlbBundle).ppn, md(2).asTypeOf(tlbBundle).vpn, md(2).asTypeOf(tlbBundle).flag, md(2).asTypeOf(tlbBundle).ppn, md(3).asTypeOf(tlbBundle).vpn, md(3).asTypeOf(tlbBundle).flag, md(3).asTypeOf(tlbBundle).ppn, io.mdReady) - Debug("md: wen:%d windex:%x waymask:%x vpn:%x asid:%x mask:%x flag:%x asid:%x ppn:%x pteaddr:%x\n", io.mdWrite.wen, io.mdWrite.windex, io.mdWrite.waymask, io.mdWrite.wdata.asTypeOf(tlbBundle).vpn, io.mdWrite.wdata.asTypeOf(tlbBundle).asid, io.mdWrite.wdata.asTypeOf(tlbBundle).mask, io.mdWrite.wdata.asTypeOf(tlbBundle).flag, io.mdWrite.wdata.asTypeOf(tlbBundle).asid, io.mdWrite.wdata.asTypeOf(tlbBundle).ppn, io.mdWrite.wdata.asTypeOf(tlbBundle).pteaddr) - Debug("MemReq(%d, %d) MemResp(%d, %d) addr:%x cmd:%d rdata:%x cmd:%d\n", io.mem.req.valid, io.mem.req.ready, io.mem.resp.valid, io.mem.resp.ready, io.mem.req.bits.addr, io.mem.req.bits.cmd, io.mem.resp.bits.rdata, io.mem.resp.bits.cmd) - Debug("io.ipf:%d hitinstrPF:%d missIPF:%d pf.loadPF:%d pf.storePF:%d loadPF:%d storePF:%d\n", io.ipf, hitinstrPF, missIPF, io.pf.loadPF, io.pf.storePF, loadPF, storePF) -} -sealed class TLBEmpty(implicit val tlbConfig: TLBConfig) extends TlbModule { +class TLBEmpty(implicit val tlbConfig: TLBConfig) extends TlbModule { class TLBEmptyIO extends Bundle { val in = Flipped(Decoupled(new SimpleBusReqBundle(userBits = userBits))) val out = Decoupled(new SimpleBusReqBundle(userBits = userBits)) diff --git a/src/main/scala/nutcore/mem/tlb/TLBExec.scala b/src/main/scala/nutcore/mem/tlb/TLBExec.scala new file mode 100644 index 000000000..b0ae01427 --- /dev/null +++ b/src/main/scala/nutcore/mem/tlb/TLBExec.scala @@ -0,0 +1,254 @@ + +package nutcore.mem.tlb + +import chisel3._ +import chisel3.util._ +import chisel3.util.experimental.BoringUtils + +import nutcore._ + +import bus.simplebus._ +import bus.axi4._ +import utils._ +import top.Settings + + +class TLBExec(implicit val tlbConfig: TLBConfig) extends TlbModule{ + class TLBExecIO extends Bundle { + val in = Flipped(Decoupled(new SimpleBusReqBundle(userBits = userBits, addrBits = VAddrBits))) + val out = Decoupled(new SimpleBusReqBundle(userBits = userBits)) + + val md = Input(Vec(Ways, UInt(tlbLen.W))) + val mdWrite = new TLBMDWriteBundle(IndexBits = IndexBits, Ways = Ways, tlbLen = tlbLen) + val mdReady = Input(Bool()) + + val mem = new SimpleBusUC(userBits = userBits) + val flush = Input(Bool()) + val satp = Input(UInt(XLEN.W)) + val pf = new MMUIO + val ipf = Output(Bool()) + val isFinish = Output(Bool()) + } + val io = IO(new TLBExecIO) + + val md = io.md//RegEnable(mdTLB.io.tlbmd, io.in.ready) + + // lazy renaming + val req = io.in.bits + val vpn = req.addr.asTypeOf(vaBundle2).vpn.asTypeOf(vpnBundle) + val pf = io.pf + val satp = io.satp.asTypeOf(satpBundle) + val ifecth = if(tlbname == "itlb") true.B else false.B + + // pf init + pf.loadPF := false.B + pf.storePF := false.B + pf.addr := req.addr + + // check hit or miss + val hitVec = VecInit(md.map(m => m.asTypeOf(tlbBundle).flag.asTypeOf(flagBundle).v && (m.asTypeOf(tlbBundle).asid === satp.asid) && MaskEQ(m.asTypeOf(tlbBundle).mask, m.asTypeOf(tlbBundle).vpn, vpn.asUInt))).asUInt + val hit = io.in.valid && hitVec.orR + val miss = io.in.valid && !hitVec.orR + + val victimWaymask = if (Ways > 1) (1.U << LFSR64()(log2Up(Ways)-1,0)) else "b1".U + val waymask = Mux(hit, hitVec, victimWaymask) + + val loadPF = WireInit(false.B) + val storePF = WireInit(false.B) + + // hit + val hitMeta = Mux1H(waymask, md).asTypeOf(tlbBundle2).meta.asTypeOf(metaBundle) + val hitData = Mux1H(waymask, md).asTypeOf(tlbBundle2).data.asTypeOf(dataBundle) + val hitFlag = hitMeta.flag.asTypeOf(flagBundle) + val hitMask = hitMeta.mask + // hit write back pte.flag + val hitinstrPF = WireInit(false.B) + val hitWB = hit && (!hitFlag.a || !hitFlag.d && req.isWrite()) && !hitinstrPF && !(loadPF || storePF || io.pf.isPF()) + val hitRefillFlag = Cat(req.isWrite().asUInt, 1.U(1.W), 0.U(6.W)) | hitFlag.asUInt + val hitWBStore = RegEnable(Cat(0.U(10.W), hitData.ppn, 0.U(2.W), hitRefillFlag), hitWB) + + // hit permission check + val hitCheck = hit /*&& hitFlag.v */&& !(pf.priviledgeMode === ModeU && !hitFlag.u) && !(pf.priviledgeMode === ModeS && hitFlag.u && (!pf.status_sum || ifecth)) + val hitExec = hitCheck && hitFlag.x + val hitLoad = hitCheck && (hitFlag.r || pf.status_mxr && hitFlag.x) + val hitStore = hitCheck && hitFlag.w + + io.pf.loadPF := loadPF //RegNext(loadPF, init =false.B) + io.pf.storePF := storePF //RegNext(storePF, init = false.B) + + if (tlbname == "itlb") { hitinstrPF := !hitExec && hit} + if (tlbname == "dtlb") { + loadPF := !hitLoad && req.isRead() && hit + storePF := (!hitStore && req.isWrite() && hit) + // AMO pagefault type will be fixed in LSU + } + + // miss + val s_idle :: s_memReadReq :: s_memReadResp :: s_write_pte :: s_wait_resp :: s_miss_slpf :: Nil = Enum(6) + val state = RegInit(s_idle) + val level = RegInit(Level.U(log2Up(Level).W)) + + val memRespStore = Reg(UInt(XLEN.W)) + val missMask = WireInit("h3ffff".U(maskLen.W)) + val missMaskStore = Reg(UInt(maskLen.W)) + val missMetaRefill = WireInit(false.B) + val missRefillFlag = WireInit(0.U(8.W)) + val memRdata = io.mem.resp.bits.rdata.asTypeOf(pteBundle) + val raddr = Reg(UInt(PAddrBits.W)) + val alreadyOutFire = RegEnable(true.B, init = false.B, if(tlbname == "itlb") io.out.fire else io.out.valid) + + //handle flush + val needFlush = RegInit(false.B) + val ioFlush = io.flush + val isFlush = needFlush || ioFlush + when (ioFlush && (state =/= s_idle)) { needFlush := true.B} + if(tlbname == "itlb"){ + when (io.out.fire() && needFlush) { needFlush := false.B} + } + if(tlbname == "dtlb"){ + when (io.out.valid && needFlush) { needFlush := false.B} + } + + val missIPF = RegInit(false.B) + + // state machine to handle miss(ptw) and pte-writing-back + switch (state) { + is (s_idle) { + when (!ioFlush && hitWB) { + state := s_write_pte + needFlush := false.B + alreadyOutFire := false.B + }.elsewhen (miss && !ioFlush) { + state := s_memReadReq + raddr := paddrApply(satp.ppn, vpn.vpn2) // + level := Level.U + needFlush := false.B + alreadyOutFire := false.B + } + } + + is (s_memReadReq) { + when (isFlush) { + state := s_idle + needFlush := false.B + }.elsewhen (io.mem.req.fire()) { state := s_memReadResp} + } + + is (s_memReadResp) { + val missflag = memRdata.flag.asTypeOf(flagBundle) + when (io.mem.resp.fire()) { + when (isFlush) { + state := s_idle + needFlush := false.B + }.elsewhen (!(missflag.r || missflag.x) && (level===3.U || level===2.U)) { + when(!missflag.v || (!missflag.r && missflag.w)) { //TODO: fix needflush + if(tlbname == "itlb") { state := s_wait_resp } else { state := s_miss_slpf } + if(tlbname == "itlb") { missIPF := true.B } + if(tlbname == "dtlb") { + loadPF := req.isRead() + storePF := req.isWrite() + } + Debug("tlbException!!! ") + Debug(false, p" req:${req} Memreq:${io.mem.req} MemResp:${io.mem.resp}") + Debug(false, " level:%d",level) + Debug(false, "\n") + }.otherwise { + state := s_memReadReq + raddr := paddrApply(memRdata.ppn, Mux(level === 3.U, vpn.vpn1, vpn.vpn0)) + } + }.elsewhen (level =/= 0.U) { //TODO: fix needFlush + val permCheck = missflag.v && !(pf.priviledgeMode === ModeU && !missflag.u) && !(pf.priviledgeMode === ModeS && missflag.u && (!pf.status_sum || ifecth)) + val permExec = permCheck && missflag.x + val permLoad = permCheck && (missflag.r || pf.status_mxr && missflag.x) + val permStore = permCheck && missflag.w + val updateAD = if (Settings.get("FPGAPlatform")) !missflag.a || (!missflag.d && req.isWrite()) else false.B + val updateData = Cat( 0.U(56.W), req.isWrite(), 1.U(1.W), 0.U(6.W) ) + missRefillFlag := Cat(req.isWrite(), 1.U(1.W), 0.U(6.W)) | missflag.asUInt + memRespStore := io.mem.resp.bits.rdata | updateData + if(tlbname == "itlb") { + when (!permExec) { missIPF := true.B ; state := s_wait_resp} + .otherwise { + state := Mux(updateAD, s_write_pte, s_wait_resp) + missMetaRefill := true.B + } + } + if(tlbname == "dtlb") { + when((!permLoad && req.isRead()) || (!permStore && req.isWrite())) { + state := s_miss_slpf + loadPF := req.isRead() + storePF := req.isWrite() + }.otherwise { + state := Mux(updateAD, s_write_pte, s_wait_resp) + missMetaRefill := true.B + } + } + missMask := Mux(level===3.U, 0.U(maskLen.W), Mux(level===2.U, "h3fe00".U(maskLen.W), "h3ffff".U(maskLen.W))) + missMaskStore := missMask + } + level := level - 1.U + } + } + + is (s_write_pte) { + when (isFlush) { + state := s_idle + needFlush := false.B + }.elsewhen (io.mem.req.fire()) { state := s_wait_resp } + } + + is (s_wait_resp) { + if(tlbname == "itlb"){ + when (io.out.fire() || ioFlush || alreadyOutFire){ + state := s_idle + missIPF := false.B + alreadyOutFire := false.B + } + } + if(tlbname == "dtlb"){ + state := s_idle + missIPF := false.B + alreadyOutFire := false.B + } + } + + is (s_miss_slpf) { + state := s_idle + } + } + + // mem + val cmd = Mux(state === s_write_pte, SimpleBusCmd.write, SimpleBusCmd.read) + io.mem.req.bits.apply(addr = Mux(hitWB, hitData.pteaddr, raddr), cmd = cmd, size = (if (XLEN == 64) "b11".U else "b10".U), wdata = Mux( hitWB, hitWBStore, memRespStore), wmask = 0xff.U) + io.mem.req.valid := ((state === s_memReadReq || state === s_write_pte) && !isFlush) + io.mem.resp.ready := true.B + + // tlb refill + io.mdWrite.apply(wen = RegNext((missMetaRefill && !isFlush) || (hitWB && state === s_idle && !isFlush), init = false.B), + windex = RegNext(getIndex(req.addr)), waymask = RegNext(waymask), vpn = RegNext(vpn.asUInt), + asid = RegNext(Mux(hitWB, hitMeta.asid, satp.asid)), mask = RegNext(Mux(hitWB, hitMask, missMask)), + flag = RegNext(Mux(hitWB, hitRefillFlag, missRefillFlag)), ppn = RegNext(Mux(hitWB, hitData.ppn, memRdata.ppn)), + pteaddr = RegNext((Mux(hitWB, hitData.pteaddr, raddr)))) + + // io + io.out.bits := req + io.out.bits.addr := Mux(hit, maskPaddr(hitData.ppn, req.addr(PAddrBits-1, 0), hitMask), maskPaddr(memRespStore.asTypeOf(pteBundle).ppn, req.addr(PAddrBits-1, 0), missMaskStore)) + io.out.valid := io.in.valid && Mux(hit && !hitWB, !(io.pf.isPF() || loadPF || storePF), state === s_wait_resp)// && !alreadyOutFire + + io.in.ready := io.out.ready && (state === s_idle) && !miss && !hitWB && io.mdReady && (!io.pf.isPF() && !loadPF && !storePF)//maybe be optimized + + io.ipf := Mux(hit, hitinstrPF, missIPF) + io.isFinish := io.out.fire() || io.pf.isPF() + + if(tlbname == "dtlb") { + io.isFinish := io.out.valid || io.pf.isPF() + io.out.valid := io.in.valid && (Mux(hit && !hitWB, true.B, state === s_wait_resp) || loadPF || storePF)// && !alreadyOutFire + } + Debug("In(%d, %d) Out(%d, %d) InAddr:%x OutAddr:%x cmd:%d \n", io.in.valid, io.in.ready, io.out.valid, io.out.ready, req.addr, io.out.bits.addr, req.cmd) + Debug("io.Flush:%d needFlush:%d alreadyOutFire:%d isFinish:%d\n", io.flush, needFlush, alreadyOutFire, io.isFinish) + Debug("hit:%d hitWB:%d hitVPN:%x hitFlag:%x hitPPN:%x hitRefillFlag:%x hitWBStore:%x hitCheck:%d hitExec:%d hitLoad:%d hitStore:%d\n", hit, hitWB, hitMeta.vpn, hitFlag.asUInt, hitData.ppn, hitRefillFlag, hitWBStore, hitCheck, hitExec, hitLoad, hitStore) + Debug("miss:%d state:%d level:%d raddr:%x memRdata:%x missMask:%x missRefillFlag:%x missMetaRefill:%d\n", miss, state, level, raddr, memRdata.asUInt, missMask, missRefillFlag, missMetaRefill) + Debug("meta/data: (0)%x|%b|%x (1)%x|%b|%x (2)%x|%b|%x (3)%x|%b|%x rread:%d\n", md(0).asTypeOf(tlbBundle).vpn, md(0).asTypeOf(tlbBundle).flag, md(0).asTypeOf(tlbBundle).ppn, md(1).asTypeOf(tlbBundle).vpn, md(1).asTypeOf(tlbBundle).flag, md(1).asTypeOf(tlbBundle).ppn, md(2).asTypeOf(tlbBundle).vpn, md(2).asTypeOf(tlbBundle).flag, md(2).asTypeOf(tlbBundle).ppn, md(3).asTypeOf(tlbBundle).vpn, md(3).asTypeOf(tlbBundle).flag, md(3).asTypeOf(tlbBundle).ppn, io.mdReady) + Debug("md: wen:%d windex:%x waymask:%x vpn:%x asid:%x mask:%x flag:%x asid:%x ppn:%x pteaddr:%x\n", io.mdWrite.wen, io.mdWrite.windex, io.mdWrite.waymask, io.mdWrite.wdata.asTypeOf(tlbBundle).vpn, io.mdWrite.wdata.asTypeOf(tlbBundle).asid, io.mdWrite.wdata.asTypeOf(tlbBundle).mask, io.mdWrite.wdata.asTypeOf(tlbBundle).flag, io.mdWrite.wdata.asTypeOf(tlbBundle).asid, io.mdWrite.wdata.asTypeOf(tlbBundle).ppn, io.mdWrite.wdata.asTypeOf(tlbBundle).pteaddr) + Debug("MemReq(%d, %d) MemResp(%d, %d) addr:%x cmd:%d rdata:%x cmd:%d\n", io.mem.req.valid, io.mem.req.ready, io.mem.resp.valid, io.mem.resp.ready, io.mem.req.bits.addr, io.mem.req.bits.cmd, io.mem.resp.bits.rdata, io.mem.resp.bits.cmd) + Debug("io.ipf:%d hitinstrPF:%d missIPF:%d pf.loadPF:%d pf.storePF:%d loadPF:%d storePF:%d\n", io.ipf, hitinstrPF, missIPF, io.pf.loadPF, io.pf.storePF, loadPF, storePF) +} diff --git a/src/main/scala/system/NutShell.scala b/src/main/scala/system/NutShell.scala index 4afa16e32..0566f7b2a 100644 --- a/src/main/scala/system/NutShell.scala +++ b/src/main/scala/system/NutShell.scala @@ -17,6 +17,8 @@ package system import nutcore._ +import nutcore.mem.cache._ + import bus.axi4.{AXI4, AXI4Lite} import bus.simplebus._ import device.{AXI4CLINT, AXI4PLIC} diff --git a/src/main/scala/system/Prefetcher.scala b/src/main/scala/system/Prefetcher.scala index a66b1f6a0..9b0778845 100644 --- a/src/main/scala/system/Prefetcher.scala +++ b/src/main/scala/system/Prefetcher.scala @@ -16,7 +16,9 @@ package system -import nutcore.{NutCore, NutCoreConfig, HasNutCoreParameter, AddressSpace, Cache, CacheConfig} +import nutcore.{NutCore, NutCoreConfig, HasNutCoreParameter, AddressSpace} +import nutcore.mem.cache._ + import bus.axi4.{AXI4, AXI4Lite} import bus.simplebus._ import utils._ diff --git a/src/main/scala/utils/Debug.scala b/src/main/scala/utils/Debug.scala index 5384ca707..318938ce0 100644 --- a/src/main/scala/utils/Debug.scala +++ b/src/main/scala/utils/Debug.scala @@ -59,7 +59,7 @@ object LogUtil { } } -sealed abstract class LogHelper(val logLevel: LogLevel) { +abstract class LogHelper(val logLevel: LogLevel) { def apply(cond: Bool, fmt: String, data: Bits*)(implicit name: String): Any = apply(cond, Printable.pack(fmt, data:_*)) From e8a31446e122e8825130e975290ac84e57fbc32f Mon Sep 17 00:00:00 2001 From: marvintau Date: Mon, 24 Oct 2022 23:28:37 +0800 Subject: [PATCH 3/3] retouched file naming --- .metals/metals.lock.db | 6 +++--- .metals/metals.mv.db | Bin 53248 -> 57344 bytes .../{Dynamic.scala => BackendDynamic.scala} | 0 ...quential.scala => BackendSequential.scala} | 0 .../{Dynamic.scala => FrontendDynamic.scala} | 0 ...{Embedded.scala => FrontendEmbedded.scala} | 0 ...uential.scala => FrontendSequential.scala} | 0 ...{Dynamic.scala => InstrFetchDynamic.scala} | 0 ...mbedded.scala => InstrFetchEmbedded.scala} | 0 ...ntial.scala => InstrFetchSequential.scala} | 0 .../{Dummy.scala => BranchPredictDummy.scala} | 0 ...namic.scala => BranchPredictDynamic.scala} | 0 ...dded.scala => BranchPredictEmbedded.scala} | 0 ...Legacy.scala => BranchPredictLegacy.scala} | 0 ...al.scala => BranchPredictSequential.scala} | 0 src/main/scala/nutcore/mem/cache/Cache.scala | 6 +++--- .../scala/nutcore/mem/cache/CacheCheck.scala | 6 +++--- .../nutcore/mem/cache/CacheMetaRead.scala | 6 +++--- .../nutcore/mem/cache/CacheWriteBack.scala | 6 +++--- 19 files changed, 15 insertions(+), 15 deletions(-) rename src/main/scala/nutcore/backend/dynamic/{Dynamic.scala => BackendDynamic.scala} (100%) rename src/main/scala/nutcore/backend/sequential/{Sequential.scala => BackendSequential.scala} (100%) rename src/main/scala/nutcore/frontend/{Dynamic.scala => FrontendDynamic.scala} (100%) rename src/main/scala/nutcore/frontend/{Embedded.scala => FrontendEmbedded.scala} (100%) rename src/main/scala/nutcore/frontend/{Sequential.scala => FrontendSequential.scala} (100%) rename src/main/scala/nutcore/frontend/instr_fetch/{Dynamic.scala => InstrFetchDynamic.scala} (100%) rename src/main/scala/nutcore/frontend/instr_fetch/{Embedded.scala => InstrFetchEmbedded.scala} (100%) rename src/main/scala/nutcore/frontend/instr_fetch/{Sequential.scala => InstrFetchSequential.scala} (100%) rename src/main/scala/nutcore/frontend/instr_fetch/branch_predict/{Dummy.scala => BranchPredictDummy.scala} (100%) rename src/main/scala/nutcore/frontend/instr_fetch/branch_predict/{Dynamic.scala => BranchPredictDynamic.scala} (100%) rename src/main/scala/nutcore/frontend/instr_fetch/branch_predict/{Embedded.scala => BranchPredictEmbedded.scala} (100%) rename src/main/scala/nutcore/frontend/instr_fetch/branch_predict/{Legacy.scala => BranchPredictLegacy.scala} (100%) rename src/main/scala/nutcore/frontend/instr_fetch/branch_predict/{Sequential.scala => BranchPredictSequential.scala} (100%) diff --git a/.metals/metals.lock.db b/.metals/metals.lock.db index a8e3d074b..cb74576f7 100644 --- a/.metals/metals.lock.db +++ b/.metals/metals.lock.db @@ -1,6 +1,6 @@ #FileLock -#Mon Oct 24 00:43:59 CST 2022 -server=localhost\:36367 +#Mon Oct 24 22:46:26 CST 2022 +server=localhost\:42597 hostName=localhost method=file -id=18405baced7c341f59eba69cde4d0c59f48f797cf8c +id=1840a758cb97dd43e099648973046290a4902dd73eb diff --git a/.metals/metals.mv.db b/.metals/metals.mv.db index dc14f78419341014a9cb59cb871a85a0d729519a..0cb6c419009f9b925b7343d50e994c953b934e4f 100644 GIT binary patch delta 5928 zcmeHLU2GIp6uvXN+iiDCfwoX0t-Ga^pJobk=Wk~=T1pyGV#NMMDhW2Tvok)MnF|;Da=M@WlsAh!G$3$rv6G3=h7b@yzV}m98KQ4@5c- zy)(DFd(NEmedm1VPD}+RE(T7lp_I2<*4HbHF{J0Tz4~a9;}nh+4J9(Io?S}*TH1QI zPsAdF3X>YjYWtHMGo)vdm>E__k~{~?!KA^nOh!MLOY+Q!xGSSbZ#W)CgI%3q*>)je734e zw1Xm%uQ#@gLm13(U^!*SL$>1~72_dV@epJ-?T-hqi%dR~AJfxYb1yh?k-ZR!R*0}D z2uxPf@*`?SdnL(hrzy+PB$jL+RPtu;vT;Z(Gqmr3-fbmERmD}QV{6w2&R;sm+gw$X`(#5zW?9R^?xFu!$jqwW#U=~d-#4>r+9HK%{t9uokco3o0^?X@^{-~jt`G22hTt` zsDNq?eTH1>Y9K%N)sL6mvlf&9Uvk_w=C4MwSps|&0GQ0O+sw)*mr(+O6Ho$<15e=E zdzp-%T1Df0o%tjWB$K#QYI^&aUSarpiRRK+D z3JSGsN#LQ|14QxPa2Ih*ZnId?G6v2v6aN_c`S{*T! zmj^u`Y&_*92)t}K%Ij<$3?R#<#?5Ku#5XI$Fd)e(RdHn5E?3W&fbl(O3D*%%I9MjW zm-ZP1cuK6SP^9IRaaYXDV~!#%6(Jh(Wx5zD|0`5+aE zU~CH&nJk}MLyop3T)>nVJ4AA$Hy$v`0U3O!2r>(pjopdnBD5TIm#{{9y6a%j*b`N0E6FrINRT`fU9D6&yWp)4{E|xb*P@RD9Gf)bk+H9LJL5xB>s*!nx zj{Hz)xsVR6^AP9*k%wj}ky6-n!U60+;x^^!tGo9gJY7@nWM>&Te=%HfK3Iv+5)+7x zC4lH4Qd&ghgE@P+J0c&tvj>YJ&zXha?g94jT;8w^TwvZIY`&>5k-t^dZlAMwF!w># zV?{y{6r5HaRt(BEE7Jbyx~N7>R@~chglo|~YcQv4Q;URlhyFyU#w17Uoyg&^B(cbn xN0ucI!wH?QnLT5vwfkr4Q{?+Y>kA#9Y;~#XanDB_kE*gNDO_4JscOv+{{TX;4d?&> delta 3437 zcmeH~U1%It6vyw(W;e-hv)O!$nh&C>kc`RBo!R+HYd!>Plwu^Lh(+z4k4@4fyCEN? z1#uSzzgnfu#Wu0UMz9DzXu}{D`Vt?c55AZhL7$`#3L@%@K32Umb7yvvl%!S!AG$AR zXYS6N|2^ma&N*9LixgKPr`s9f%mWYA3tV=}nm5L+SxvQ6KC5Px_$sxJ+uII`fMFN_%GLl7zjZBht+u%} zI*n$!vjD_~Vs9|`)WoYvGv?famb)U@ZAX)3_KTy5_+Ek2r}D<62Dy-p5!{5P>{IA*7GKCdZ4a3v zto=*2iD6?2J2sA(2P=MJJ^cW%ea{g607TZq`VS;O!Y3zQOX{9{Bar`FglWO4Tr-<( z!7q(AGg;h|YqagrX@;#r_C~&owHF3Xw%|AWf0kqxD&)l*b#fH!0p-T{LT<8KrdFX= zr4~`EV}4ecdXI5`t;sY>iq1`0ITEa?iDTA5bX?<=blSZbMq}2T#&aah8id4 ztmE^VVsbO?I^<^a`FVFGoz93ERpjO;rY$WaLkZuwQU~QA4YC(lH7#*@!&sO>Ipc(u zh96Qzik~;!zurC}dJZL>Iuxlt$@2%(&PU`CPyMMDlqH>8$SurSriK(K@I1X5m_I~M zoE~UVXp$v}zglmoRH<(fug$erNS<#VuU&5{K+*98GXgJpJ0qS{{q{Ynt|!wl!kf-6 z(M==jIOAr453&IL927!H@N`KM4T(fUBG8bK#F3B4XG%hKUEUT16a)de(lV+NmL~bq zbXQtoQj@l5Q+#dPrI97~c%aRv1Cn#&NXwL@38J)9esoWof;OKOhEE9Ys~|6mLV~^* z>WV_|r=hMY3Ese|cN=@57ZI|Y27pu+(6vOcTm#J2x5;%sVCx_C?ER$Dkn70Di^LLW zdpB0mTk*xVScehh$FC1YsR-(Z2?Zl7yS1d)R)+uTDX%u^6SBt@{@|PxR6Nr=bx7dJ zBZrtT?JFx|^-L7EeR^NzSi|#dEejYHf1W<95fNu=Km=6XHrLq>3hZVk1t6>0|luc&_EzoGh)Pc`26b!Xw<;ObVe9Rq($cx7J! zfj8!MLCL9TzLYz{b9X5Ltl>H$T%f@q2;mz84T~EMxMJ{;M5!7Z_{d<3{loJ6(T)-{ zK4xYKGgB~0y0&0uQOs1YIN_lssm6cSaxt?M56n-7qhzjl*_$gOMKV|1LwYBfGrqVU znkx>(mc6+`^%^tU-k43VF`Kk8=NOh_?Qim(>|-hm5dhKNJtra{`HzXEznNUz4ePg{YZ7!@83Pvw+mk3f4hZ=2;6cX3e-5@L6{lMVEg1j4 z+}A>yEa-ZLv{+&t42dNe5=;8>NaGpwozrtmXqSKv=64H`VjzZ#K_SH;J5?;SG6`14TEXKj2qGS}6=#Df&kCVMnM~B#1!oUj>W@F{&Dea&wZ9W`E7*z(Gp1sg L?q88eTU-2ZSjS9Y diff --git a/src/main/scala/nutcore/backend/dynamic/Dynamic.scala b/src/main/scala/nutcore/backend/dynamic/BackendDynamic.scala similarity index 100% rename from src/main/scala/nutcore/backend/dynamic/Dynamic.scala rename to src/main/scala/nutcore/backend/dynamic/BackendDynamic.scala diff --git a/src/main/scala/nutcore/backend/sequential/Sequential.scala b/src/main/scala/nutcore/backend/sequential/BackendSequential.scala similarity index 100% rename from src/main/scala/nutcore/backend/sequential/Sequential.scala rename to src/main/scala/nutcore/backend/sequential/BackendSequential.scala diff --git a/src/main/scala/nutcore/frontend/Dynamic.scala b/src/main/scala/nutcore/frontend/FrontendDynamic.scala similarity index 100% rename from src/main/scala/nutcore/frontend/Dynamic.scala rename to src/main/scala/nutcore/frontend/FrontendDynamic.scala diff --git a/src/main/scala/nutcore/frontend/Embedded.scala b/src/main/scala/nutcore/frontend/FrontendEmbedded.scala similarity index 100% rename from src/main/scala/nutcore/frontend/Embedded.scala rename to src/main/scala/nutcore/frontend/FrontendEmbedded.scala diff --git a/src/main/scala/nutcore/frontend/Sequential.scala b/src/main/scala/nutcore/frontend/FrontendSequential.scala similarity index 100% rename from src/main/scala/nutcore/frontend/Sequential.scala rename to src/main/scala/nutcore/frontend/FrontendSequential.scala diff --git a/src/main/scala/nutcore/frontend/instr_fetch/Dynamic.scala b/src/main/scala/nutcore/frontend/instr_fetch/InstrFetchDynamic.scala similarity index 100% rename from src/main/scala/nutcore/frontend/instr_fetch/Dynamic.scala rename to src/main/scala/nutcore/frontend/instr_fetch/InstrFetchDynamic.scala diff --git a/src/main/scala/nutcore/frontend/instr_fetch/Embedded.scala b/src/main/scala/nutcore/frontend/instr_fetch/InstrFetchEmbedded.scala similarity index 100% rename from src/main/scala/nutcore/frontend/instr_fetch/Embedded.scala rename to src/main/scala/nutcore/frontend/instr_fetch/InstrFetchEmbedded.scala diff --git a/src/main/scala/nutcore/frontend/instr_fetch/Sequential.scala b/src/main/scala/nutcore/frontend/instr_fetch/InstrFetchSequential.scala similarity index 100% rename from src/main/scala/nutcore/frontend/instr_fetch/Sequential.scala rename to src/main/scala/nutcore/frontend/instr_fetch/InstrFetchSequential.scala diff --git a/src/main/scala/nutcore/frontend/instr_fetch/branch_predict/Dummy.scala b/src/main/scala/nutcore/frontend/instr_fetch/branch_predict/BranchPredictDummy.scala similarity index 100% rename from src/main/scala/nutcore/frontend/instr_fetch/branch_predict/Dummy.scala rename to src/main/scala/nutcore/frontend/instr_fetch/branch_predict/BranchPredictDummy.scala diff --git a/src/main/scala/nutcore/frontend/instr_fetch/branch_predict/Dynamic.scala b/src/main/scala/nutcore/frontend/instr_fetch/branch_predict/BranchPredictDynamic.scala similarity index 100% rename from src/main/scala/nutcore/frontend/instr_fetch/branch_predict/Dynamic.scala rename to src/main/scala/nutcore/frontend/instr_fetch/branch_predict/BranchPredictDynamic.scala diff --git a/src/main/scala/nutcore/frontend/instr_fetch/branch_predict/Embedded.scala b/src/main/scala/nutcore/frontend/instr_fetch/branch_predict/BranchPredictEmbedded.scala similarity index 100% rename from src/main/scala/nutcore/frontend/instr_fetch/branch_predict/Embedded.scala rename to src/main/scala/nutcore/frontend/instr_fetch/branch_predict/BranchPredictEmbedded.scala diff --git a/src/main/scala/nutcore/frontend/instr_fetch/branch_predict/Legacy.scala b/src/main/scala/nutcore/frontend/instr_fetch/branch_predict/BranchPredictLegacy.scala similarity index 100% rename from src/main/scala/nutcore/frontend/instr_fetch/branch_predict/Legacy.scala rename to src/main/scala/nutcore/frontend/instr_fetch/branch_predict/BranchPredictLegacy.scala diff --git a/src/main/scala/nutcore/frontend/instr_fetch/branch_predict/Sequential.scala b/src/main/scala/nutcore/frontend/instr_fetch/branch_predict/BranchPredictSequential.scala similarity index 100% rename from src/main/scala/nutcore/frontend/instr_fetch/branch_predict/Sequential.scala rename to src/main/scala/nutcore/frontend/instr_fetch/branch_predict/BranchPredictSequential.scala diff --git a/src/main/scala/nutcore/mem/cache/Cache.scala b/src/main/scala/nutcore/mem/cache/Cache.scala index 66d7a65a7..0aa528642 100644 --- a/src/main/scala/nutcore/mem/cache/Cache.scala +++ b/src/main/scala/nutcore/mem/cache/Cache.scala @@ -125,9 +125,9 @@ trait HasCacheIO { class Cache(implicit val cacheConfig: CacheConfig) extends CacheModule with HasCacheIO { // cpu pipeline - val s1 = Module(new CacheStage1) - val s2 = Module(new CacheStage2) - val s3 = Module(new CacheStage3) + val s1 = Module(new CacheStageMetaRead) + val s2 = Module(new CacheStageCheck) + val s3 = Module(new CacheStageWriteBack) val metaArray = Module(new SRAMTemplateWithArbiter(nRead = 1, new MetaBundle, set = Sets, way = Ways, shouldReset = true)) val dataArray = Module(new SRAMTemplateWithArbiter(nRead = 2, new DataBundle, set = Sets * LineBeats, way = Ways)) diff --git a/src/main/scala/nutcore/mem/cache/CacheCheck.scala b/src/main/scala/nutcore/mem/cache/CacheCheck.scala index 2f62bd22b..26bb942d4 100644 --- a/src/main/scala/nutcore/mem/cache/CacheCheck.scala +++ b/src/main/scala/nutcore/mem/cache/CacheCheck.scala @@ -16,8 +16,8 @@ import top.Settings // meta read // check -class CacheStage2(implicit val cacheConfig: CacheConfig) extends CacheModule { - class CacheStage2IO extends Bundle { +class CacheStageCheck(implicit val cacheConfig: CacheConfig) extends CacheModule { + class CacheStageCheckIO extends Bundle { val in = Flipped(Decoupled(new Stage1IO)) val out = Decoupled(new Stage2IO) val metaReadResp = Flipped(Vec(Ways, new MetaBundle)) @@ -25,7 +25,7 @@ class CacheStage2(implicit val cacheConfig: CacheConfig) extends CacheModule { val metaWriteBus = Input(CacheMetaArrayWriteBus()) val dataWriteBus = Input(CacheDataArrayWriteBus()) } - val io = IO(new CacheStage2IO) + val io = IO(new CacheStageCheckIO) val req = io.in.bits.req val addr = req.addr.asTypeOf(addrBundle) diff --git a/src/main/scala/nutcore/mem/cache/CacheMetaRead.scala b/src/main/scala/nutcore/mem/cache/CacheMetaRead.scala index 9e73a9a71..43583ed8d 100644 --- a/src/main/scala/nutcore/mem/cache/CacheMetaRead.scala +++ b/src/main/scala/nutcore/mem/cache/CacheMetaRead.scala @@ -18,14 +18,14 @@ class Stage1IO(implicit val cacheConfig: CacheConfig) extends CacheBundle { val req = new SimpleBusReqBundle(userBits = userBits, idBits = idBits) } -class CacheStage1(implicit val cacheConfig: CacheConfig) extends CacheModule { - class CacheStage1IO extends Bundle { +class CacheStageMetaRead(implicit val cacheConfig: CacheConfig) extends CacheModule { + class CacheStageMetaReadIO extends Bundle { val in = Flipped(Decoupled(new SimpleBusReqBundle(userBits = userBits, idBits = idBits))) val out = Decoupled(new Stage1IO) val metaReadBus = CacheMetaArrayReadBus() val dataReadBus = CacheDataArrayReadBus() } - val io = IO(new CacheStage1IO) + val io = IO(new CacheStageMetaReadIO) if (ro) when (io.in.fire()) { assert(!io.in.bits.isWrite()) } Debug(io.in.fire(), "[L1$] cache stage1, addr in: %x, user: %x id: %x\n", io.in.bits.addr, io.in.bits.user.getOrElse(0.U), io.in.bits.id.getOrElse(0.U)) diff --git a/src/main/scala/nutcore/mem/cache/CacheWriteBack.scala b/src/main/scala/nutcore/mem/cache/CacheWriteBack.scala index b70b01bfa..e571604b7 100644 --- a/src/main/scala/nutcore/mem/cache/CacheWriteBack.scala +++ b/src/main/scala/nutcore/mem/cache/CacheWriteBack.scala @@ -14,8 +14,8 @@ import utils._ import top.Settings // writeback -class CacheStage3(implicit val cacheConfig: CacheConfig) extends CacheModule { - class CacheStage3IO extends Bundle { +class CacheStageWriteBack(implicit val cacheConfig: CacheConfig) extends CacheModule { + class CacheStageWriteBackIO extends Bundle { val in = Flipped(Decoupled(new Stage2IO)) val out = Decoupled(new SimpleBusRespBundle(userBits = userBits, idBits = idBits)) val isFinish = Output(Bool()) @@ -31,7 +31,7 @@ class CacheStage3(implicit val cacheConfig: CacheConfig) extends CacheModule { // use to distinguish prefetch request and normal request val dataReadRespToL1 = Output(Bool()) } - val io = IO(new CacheStage3IO) + val io = IO(new CacheStageWriteBackIO) val metaWriteArb = Module(new Arbiter(CacheMetaArrayWriteBus().req.bits, 2)) val dataWriteArb = Module(new Arbiter(CacheDataArrayWriteBus().req.bits, 2))