Skip to content

Commit b19c2bb

Browse files
authored
Merge pull request #29 from tancheng/utilization
Report tile utilization with DVFS info
2 parents a6de261 + 5be5412 commit b19c2bb

File tree

246 files changed

+367238
-61
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

246 files changed

+367238
-61
lines changed

.github/workflows/cmake.yml

+14-9
Original file line numberDiff line numberDiff line change
@@ -36,20 +36,25 @@ jobs:
3636
# Build your program with the given configuration
3737
run: make
3838

39-
- name: Test Nonlinear Feature
40-
working-directory: ${{github.workspace}}/test/nonlinear_test
39+
# Testing.
40+
- name: Test fusion via Nonlinear kernel
41+
working-directory: ${{github.workspace}}/test/nonlinear
4142
run: |
4243
clang-12 -emit-llvm -O3 -fno-unroll-loops -fno-vectorize -o nonlinear_test.bc -c nonlinear_test.cpp
43-
opt-12 -load ../../build/src/libmapperPass.so -load ${LOCALBASE}/lib/libLLVMSupport.so -load ${LOCALBASE}/lib/libclangAST.so -mapperPass -verify -mapperPass nonlinear_test.bc
44-
sh test.sh
44+
opt-12 -load ../../build/src/libmapperPass.so -mapperPass nonlinear_test.bc
45+
sh verify.sh
4546
46-
- name: Test Idiv Feature
47-
working-directory: ${{github.workspace}}/test/idiv_test
47+
- name: Test split via Idiv kernel
48+
working-directory: ${{github.workspace}}/test/idiv
4849
run: |
4950
clang-12 -emit-llvm -O3 -fno-unroll-loops -fno-vectorize -o idiv_test.bc -c idiv_test.cpp
5051
opt-12 -load ../../build/src/libmapperPass.so -mapperPass idiv_test.bc
51-
sh test.sh
52-
52+
sh verify.sh
5353
54+
- name: Test DVFS
55+
working-directory: ${{github.workspace}}/test/dvfs
56+
run: |
57+
clang-12 -emit-llvm -O3 -fno-unroll-loops -fno-vectorize -o kernel.bc -c kernel.cpp
58+
opt-12 -load ../../build/src/libmapperPass.so -mapperPass kernel.bc > trace.log
59+
sh verify.sh
5460
55-

src/CGRA.cpp

+40-3
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,15 @@ using json = nlohmann::json;
1616

1717
CGRA::CGRA(int t_rows, int t_columns, bool t_diagonalVectorization,
1818
bool t_heterogeneity, bool t_parameterizableCGRA,
19-
map<string, list<int>*>* t_additionalFunc) {
19+
map<string, list<int>*>* t_additionalFunc,
20+
bool t_supportDVFS, int t_DVFSIslandDim) {
2021
m_rows = t_rows;
2122
m_columns = t_columns;
2223
m_FUCount = t_rows * t_columns;
24+
m_supportDVFS = t_supportDVFS;
25+
m_DVFSIslandDim = t_DVFSIslandDim;
2326
m_supportComplex = new list<string>();
24-
m_supportCall = new list<string>();
27+
m_supportCall = new list<string>();
2528
nodes = new CGRANode**[t_rows];
2629

2730
// Initialize the m_supportComplex & m_supportCall list.
@@ -185,7 +188,6 @@ CGRA::CGRA(int t_rows, int t_columns, bool t_diagonalVectorization,
185188
}
186189
}
187190

188-
189191
// Enable the heterogeneity.
190192
if (t_heterogeneity) {
191193
// for (int r=0; r<t_rows; ++r) {
@@ -240,6 +242,27 @@ CGRA::CGRA(int t_rows, int t_columns, bool t_diagonalVectorization,
240242
disableSpecificConnections();
241243
}
242244

245+
if (t_supportDVFS) {
246+
for (int r=0; r<t_rows; ++r) {
247+
for (int c=0; c<t_columns; ++c) {
248+
nodes[r][c]->enableDVFS();
249+
int DVFSIslandX = c / t_DVFSIslandDim;
250+
int DVFSIslandY = r / t_DVFSIslandDim;
251+
int DVFSIslandId = DVFSIslandX + DVFSIslandY * t_columns / t_DVFSIslandDim;
252+
nodes[r][c]->setDVFSIsland(DVFSIslandX, DVFSIslandY, DVFSIslandId);
253+
// Islandize the CGRA nodes. In the prototype, each set of 2x2 nodes are
254+
// grouped as one island. For example, a 4x4 CGRA has 2x2 islands, the
255+
// tiles of (0, 2), (0, 3), (1, 2), (1, 3) are viewd as the (0, 1) island.
256+
if (m_DVFSIslands.find(DVFSIslandId) != m_DVFSIslands.end()) {
257+
m_DVFSIslands[DVFSIslandId].push_back(nodes[r][c]);
258+
} else {
259+
vector<CGRANode*> tiles{nodes[r][c]};
260+
m_DVFSIslands[DVFSIslandId] = tiles;
261+
}
262+
}
263+
}
264+
}
265+
243266
/*
244267
cout<<"[connection] horizontal and vertical."<<endl;
245268
// Connect the CGRA nodes with diagonal links.
@@ -357,3 +380,17 @@ CGRALink* CGRA::getLink(CGRANode* t_n1, CGRANode* t_n2) {
357380
int CGRA::getLinkCount() {
358381
return m_LinkCount;
359382
}
383+
384+
map<int, vector<CGRANode*>> CGRA::getDVFSIslands() {
385+
return m_DVFSIslands;
386+
}
387+
388+
void CGRA::syncDVFSIsland(CGRANode* t_node) {
389+
int islandID = t_node->getDVFSIslandID();
390+
for (auto& nodeWithinIsland : m_DVFSIslands[islandID]) {
391+
nodeWithinIsland->setDVFSLatencyMultiple(t_node->getDVFSLatencyMultiple());
392+
nodeWithinIsland->syncDVFS();
393+
cout << "[cheng] synced for node: " << nodeWithinIsland->getID() << "; check synced: " << nodeWithinIsland->isSynced() << "; addr: " << nodeWithinIsland << endl;
394+
}
395+
}
396+

src/CGRA.h

+9-1
Original file line numberDiff line numberDiff line change
@@ -22,12 +22,15 @@ class CGRA {
2222
int m_LinkCount;
2323
int m_rows;
2424
int m_columns;
25+
bool m_supportDVFS;
26+
int m_DVFSIslandDim;
27+
map<int, vector<CGRANode*>> m_DVFSIslands;
2528
list<string>* m_supportComplex;
2629
list<string>* m_supportCall;
2730
void disableSpecificConnections();
2831

2932
public:
30-
CGRA(int, int, bool, bool, bool, map<string, list<int>*>*);
33+
CGRA(int, int, bool, bool, bool, map<string, list<int>*>*, bool, int);
3134
CGRANode ***nodes;
3235
CGRALink **links;
3336
int getFUCount();
@@ -40,6 +43,11 @@ class CGRA {
4043
void setBypassConstraint(int);
4144
void setCtrlMemConstraint(int);
4245
void setRegConstraint(int);
46+
map<int, vector<CGRANode*>> getDVFSIslands();
47+
// Aligns all the CGRA nodes within the same DVFS island to the
48+
// same DVFS level based on the DVFS level of the given CGRA node.
49+
void syncDVFSIsland(CGRANode*);
4350
list<string>* getSupportComplex();
4451
list<string>* getSupportCall();
4552
};
53+

src/CGRALink.cpp

+22
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ CGRALink::CGRALink(int t_linkId) {
2020
m_generatedOut = new bool[1];
2121
m_arrived = new bool[1];
2222
m_disabled = false;
23+
m_mapped = false;
2324
}
2425

2526
void CGRALink::setCtrlMemConstraint(int t_ctrlMemConstraint) {
@@ -52,6 +53,10 @@ void CGRALink::setID(int t_id) {
5253
m_id = t_id;
5354
}
5455

56+
bool CGRALink::isMapped() {
57+
return m_mapped;
58+
}
59+
5560
void CGRALink::constructMRRG(int t_CGRANodeCount, int t_II) {
5661
m_II = t_II;
5762
m_cycleBoundary = t_CGRANodeCount*t_II*t_II;
@@ -152,7 +157,23 @@ bool CGRALink::canOccupy(DFGNode* t_srcDFGNode, CGRANode* t_srcCGRANode,
152157
m_generatedOut[t]) {
153158
return false;
154159
}
160+
}
155161

162+
// Checks whether the DVFS long execution (low frequency indicates
163+
// `pseudo` multi-cycle execution) is enabled. If enabled, only the
164+
// last cycle (i.e., last pipe stage) can send out the data. This
165+
// feature is used to mimic the asynchronous buffer on the link.
166+
// if (getSrc()->isFrequencyLowered() && !getSrc()->isEndPipe(t_cycle, t_II)) {
167+
// FIXME: this should be revised to align with the sync().
168+
int cycle_in_II = (t_cycle+t_II) % t_II;
169+
if (getDst()->isDVFSEnabled() and getDst()->getDVFSLatencyMultiple() > 1 and
170+
cycle_in_II % getDst()->getDVFSLatencyMultiple() != 0) {
171+
// if (getDst()->isDVFSEnabled() and getDst()->getDVFSLatencyMultiple() > 1 and
172+
// cycle_in_II % getDst()->getDVFSLatencyMultiple() == 1) {
173+
// // The output should be blocked by the low frequency computation.
174+
// // This should roughly model the behavior of the DVFS asynchronous
175+
// // buffer.
176+
return false;
156177
}
157178

158179
return true;
@@ -196,6 +217,7 @@ bool CGRALink::isReused(int t_cycle) {
196217

197218
void CGRALink::occupy(DFGNode* t_srcDFGNode, int t_cycle, int duration,
198219
int t_II, bool t_isBypass, bool t_isGeneratedOut, bool t_isStaticElasticCGRA) {
220+
m_mapped = true;
199221
int interval = t_II;
200222
if (t_isStaticElasticCGRA) {
201223
interval = 1;

src/CGRALink.h

+2
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ class CGRALink
3232
int m_ctrlMemSize;
3333
int m_bypassConstraint;
3434
int m_currentCtrlMemItems;
35+
bool m_mapped;
3536

3637
int m_cycleBoundary;
3738
bool m_disabled;
@@ -66,6 +67,7 @@ class CGRALink
6667
void setBypassConstraint(int);
6768
int getBypassConstraint();
6869
void disable();
70+
bool isMapped();
6971
};
7072

7173
#endif

0 commit comments

Comments
 (0)