-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathreconstruct_ESA.cpp
290 lines (269 loc) · 11.2 KB
/
reconstruct_ESA.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
#include <set>
#include <iostream>
#include <fstream>
#include <vector>
#include <unordered_set>
#include <tuple>
#include <string>
#include <utility>
#include <iterator>
#include <ctime>
#include <cmath>
#include <climits>
#include <algorithm>
#include <random>
#include <unordered_map>
std::ofstream outfileResults;
void SetToVec(std::unordered_set<int> &set, std::vector<int> &vec) {
vec.reserve(set.size()); // See https://stackoverflow.com/questions/42519867/efficiently-moving-contents-of-stdunordered-set-to-stdvector
for (auto it = set.begin(); it != set.end(); ) {
vec.push_back(std::move(set.extract(it++).value()));
}
} // Quickly writes the contents of a set into an empty vector of the same size.
class Graph
{
std::vector<std::vector<int>> edges; // Each index in the outer vector is a node in the graph. Each vector at edges[node] is a vector of neighbors connected to 'node'.
public:
size_t distanceQueryCount = 0;
size_t kthHopQueryCount = 0;
std::set<std::pair<int, int>> queriedPairs; // Stores vertex pairs that have already been queried
std::unordered_map<int, int> nodeIds; // Stores mapping of node id's in input file to 0-indexed node id's which we use in our implementation.
void SetNodes(int count) { // Initialize with a specified number of nodes.
edges = std::vector<std::vector<int>>(count, std::vector<int>());
}
void AddEdge(int node1, int node2) { // Add an undirected/directed connection between 2 nodes.
if (node1 == node2) {return;}
if (std::find(edges[node1].begin(), edges[node1].end(), node2) == edges[node1].end()) {
edges[node1].insert(lower_bound(edges[node1].begin(), edges[node1].end(), node2), node2);
edges[node2].insert(lower_bound(edges[node2].begin(), edges[node2].end(), node1), node1);
}
}
std::vector<int> ForceConnected() {
int attempts = 0;
int center;
std::vector<int> subgraph;
std::vector<int> connected;
do {
subgraph.clear();
center = std::rand() % GetNodeCount();
connected = GetDistances({center},0,{},true); // The reconstruction algorithms only work on connected graphs, and a very small fraction of the nodes in road network datasets are not connected to the whole.
for (size_t node = 0; node < GetNodeCount(); node++) {
if (connected[node] != INT_MAX) {subgraph.push_back(node);}
}
attempts++;
if (attempts > 10) {break;}
} while (subgraph.size() < GetNodeCount()/2);
for (size_t node = 0; node < GetNodeCount(); node++) {
if (connected[node] == INT_MAX) {edges[node] = {};}
}
return subgraph;
}
size_t GetNodeCount() {
return edges.size();
}
std::vector<int> GetNodeNeighbors(int node) {
return edges[node];
}
int GetMaximumDegree() {
int maximum = 0;
for (size_t node = 0; node < GetNodeCount(); node++) {
int degree = GetNodeNeighbors(node).size();
if (degree > maximum) {maximum=degree;}
}
return maximum;
}
void ClearCounters() {
distanceQueryCount = 0;
kthHopQueryCount = 0;
}
bool VerifyReconstruction(std::vector<std::vector<int>> reconstruction) {
// Checks if the edges in the reconstruction match the edges in the input graph.
for (size_t node = 0; node < GetNodeCount(); node++) {
std::sort(reconstruction[node].begin(), reconstruction[node].end());
}
for (size_t i = 0; i<reconstruction.size(); i++) {
for (size_t j = 0; j<reconstruction[i].size(); j++) {
if (reconstruction[i][j] != edges[i][j]) {
std::cout << reconstruction[i][j] << std::endl;
std::cout << edges[i][j] << std::endl;
}
}
}
return (reconstruction == edges);
}
void GetDistances(std::vector<int> sources, std::vector<int> &distances, int limit = 0, std::unordered_set<int> subgraph = {}, bool free = false, int queryCount = 0, int hops = 0) {
std::vector<int> previous;
BreadthFirstSearch(sources, distances, previous, limit, subgraph, free, queryCount, hops);
}
std::vector<int> GetDistances(std::vector<int> sources, int limit = 0, std::unordered_set<int> subgraph = {}, bool free = false, int queryCount = 0, int hops = 0) {
std::vector<int> distances(GetNodeCount(), INT_MAX); //By default, unreached nodes have a distance of INT_MAX.
GetDistances(sources, distances, limit, subgraph, free, queryCount, hops);
return distances;
}
void BreadthFirstSearch(std::vector<int> sources, std::vector<int> &distances, std::vector<int> &previous, int limit = 0, std::unordered_set<int> subgraph = {}, bool free = false, int queryCount = 0, int hops = 0) { //Breadth-first search flooding out from one or more source nodes. May optionally confine search to a subgraph. May optionally limit the depth of the search. Has both a version that assigns distances to an already allocated vector in-place, and a version that generates a new vector as a distance table.
if (!queryCount) {queryCount = sources.size();}
if (subgraph.size()) {
for (size_t node = 0; node < GetNodeCount(); node++) {
if (!subgraph.count(node)) {distances[node] = -1;} //If subgraph is specified, ignore nodes outside of that subgraph, assigning them a distance of -1.
}
}
std::vector<int> frontier = sources;
while (!frontier.empty()) {
for (int node : frontier) { //Update distances table with currently visited nodes.
distances[node] = hops;
if(!free) {
distanceQueryCount += queryCount; //One distance query from each of the source nodes.
kthHopQueryCount += hops*queryCount; //Since it's a breadth-first search, once we find a shortest path from any one source to the destination, we can stop making kth-hop queries to that destination.
if (hops == 0) {
distanceQueryCount--; //It is unnecessary to make a query to establish that a node has a distance of 0 with itself.
kthHopQueryCount--;
}
}
}
if (limit && hops == limit) {
if (!free) {
for (int distance : distances) { //Limiting the depth of the search is allowed for better running speed purposes, but we still have to count the queries that would have been necessary if we really only had a distance or kth_hop oracle.
if (distance == INT_MAX) {
distanceQueryCount += queryCount;
kthHopQueryCount += hops*queryCount;
}
}
}
break;
}
std::unordered_set<int> newFrontier; // Using an unordered set to prioritize fast insertion while filtering out duplicates.
for (int node : frontier) {
for (int neighbor : GetNodeNeighbors(node)) {
if (distances[neighbor] == INT_MAX) {
newFrontier.insert(neighbor); // New frontier = neighbors of frontier nodes that haven't yet been visited.
if (previous.size()) {previous[neighbor] = node;}
}
}
}
frontier.clear();
SetToVec(newFrontier, frontier); // Clear the frontier, then load the newFrontier into the frontier.
hops++;
}
}
};
// Load a graph from a file according to the following format:
// - first line contains the number of nodes in the graph
// - the rest of the lines contain edge information in the form of two node IDs per line
Graph LoadGraph(std::string fileName) {
Graph graph;
int currentNodeId = 0;
std::string line;
std::ifstream inFile;
inFile.open(fileName);
if (inFile.is_open()) {
std::getline(inFile,line);
graph.SetNodes(std::stoi(line));
while (std::getline(inFile,line)) {
if (std::count(line.begin(), line.end(), ' ') != 1) {continue;} // The only other relevant type of line in the file describes edges, which uniquely contains only two numbers seperated by exactly one space.
size_t split = line.find(' ');
int node1 = std::stoi(line.substr(0, split));
int node2 = std::stoi(line.substr(split+1));
int nodeId1, nodeId2;
// Give each node an id from 0 to num_nodes-1, since not all datasets have node IDs in this format.
if (graph.nodeIds.count(node1)) {
nodeId1 = graph.nodeIds[node1];
} else {
nodeId1 = currentNodeId++;
graph.nodeIds[node1] = nodeId1;
}
if (graph.nodeIds.count(node2)) {
nodeId2 = graph.nodeIds[node2];
} else {
nodeId2 = currentNodeId++;
graph.nodeIds[node2] = nodeId2;
}
graph.AddEdge(nodeId1, nodeId2);
}
inFile.close();
}
return graph;
}
std::pair<int, int> constructQueryPair(int node1, int node2) {
if (node1 < node2) {
return std::make_pair(node1, node2);
} else {
return std::make_pair(node2, node1);
}
}
std::vector<std::vector<int>> Reconstruct_ESA(Graph &graph, std::vector<int> &subgraph, int s) {
std::vector<std::vector<int>> edges(graph.GetNodeCount(), std::vector<int>());
std::vector<int> S;
while (s--) {
S.push_back(std::rand() % subgraph.size());
}
for (int node1: S) {
for (int node2: subgraph) {
std::pair<int, int> query = constructQueryPair(node1, node2);
if(node1 != node2 && !graph.queriedPairs.count(query)){
graph.distanceQueryCount += 1;
graph.queriedPairs.insert(query);
}
}
}
std::set<std::pair<int,int>> E_hat;
std::vector<std::vector<int>> dists(S.size(), std::vector<int>());
for (int i = 0; i<S.size(); i++) {
dists[i] = graph.GetDistances({S[i]},0,{},true);
}
for (int node1: subgraph) {
for (int node2: subgraph) {
bool add = true;
for (int i = 0; i<S.size(); i++) {
if (std::abs(dists[i][node1] - dists[i][node2]) > 1) {
add = false;
break;
}
}
if (add) {
E_hat.insert(constructQueryPair(node1, node2));
}
}
}
for (const auto& elem: E_hat) {
int node1 = elem.first; int node2 = elem.second;
std::vector<int> neighbors = graph.GetNodeNeighbors(node1);
for (int node: neighbors) {
if (node == node2) {
edges[node1].push_back(node2);
edges[node2].push_back(node1);
}
}
std::pair<int, int> query = constructQueryPair(node1, node2);
if(node1 != node2 && !graph.queriedPairs.count(query)){
graph.distanceQueryCount += 1;
graph.queriedPairs.insert(query);
}
}
return edges;
}
void ReconstructTest(Graph &graph) {
graph.ClearCounters();
std::vector<int> subgraph = graph.ForceConnected(); // Our algorithm reconstructs connected graphs only. If an unconnected graph is given as input, the largest connected component is reconstructed instead.
outfileResults << "Starting a new reconstruction...\n";
outfileResults << "\tNumber of Nodes: "+std::to_string(graph.GetNodeCount())+"\n";
outfileResults << "\tNumber of Connected Nodes: "+std::to_string(subgraph.size())+"\n";
outfileResults << "\tMaximum Degree: "+std::to_string(graph.GetMaximumDegree())+"\n";
clock_t time = clock();
// auto reconstruction = Reconstruct_ESA(graph, subgraph, std::sqrt(std::pow(std::log2(subgraph.size()), 2)*std::pow(subgraph.size(), 2.0/3.0)));
auto reconstruction = Reconstruct_ESA(graph, subgraph, std::pow(subgraph.size(), 2.0/3.0));
// auto reconstruction = Reconstruct_ESA(graph, subgraph, std::pow(std::log2(subgraph.size()), 2));
time = clock() - time;
outfileResults << "\tReconstruction correctness: "+std::to_string(graph.VerifyReconstruction(reconstruction))+"\n";
outfileResults << "\tReconstruction Time: "+std::to_string(((float)time)/CLOCKS_PER_SEC)+" seconds\n";
outfileResults << "\tDistance Query Count: "+std::to_string(graph.distanceQueryCount)+"\n";
outfileResults << "\tKth-Hop Query Count: "+std::to_string(graph.kthHopQueryCount)+"\n";
}
int main(int argc, char *argv[]) {
std::string file_name = "";
file_name += argv[1];
outfileResults.open("out_ESA/"+file_name+".results");
Graph graph = LoadGraph("data/"+file_name+".tmp");
ReconstructTest(graph);
outfileResults.close();
return 0;
}