-
Notifications
You must be signed in to change notification settings - Fork 339
/
Copy pathinterest-filter.js
141 lines (118 loc) · 3.81 KB
/
interest-filter.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
/* Libraries */
const jsonic = require('jsonic');
const jsonfile = require('jsonfile');
const _ = require('underscore');
const fs = require('fs');
/* Files */
const synonymFile = '_data/synonyms.json';
const interestFile = '_data/interests.json';
const unclassifiedInterestsFile = '_data/unclassifiedInterests.json';
/* Data */
const synonymData = [];
const interestData = [];
const unclassifiedInterests = [];
/* Read synonym data */
function initData() {
const synonymContents = fs.readFileSync(synonymFile, 'utf8');
const data = jsonic(synonymContents);
_.each(data, function (interest) {
synonymData.push(interest);
});
}
/* Write to file */
function writeToFile() {
for (let i = 0; i < interestData.length; i += 1) {
const levelData = _.sortBy(interestData[i].data, 'interest');
interestData[i].data = levelData;
}
jsonfile.writeFile(interestFile, interestData, function (err) {
if (err != null) {
console.error(err);
}
});
jsonfile.writeFile(unclassifiedInterestsFile, unclassifiedInterests, function (err) {
if (err != null) {
console.error(err);
}
});
if (unclassifiedInterests.length > 0) {
console.log(`Check ${unclassifiedInterestsFile} for unclassified interests`);
} else {
console.log('All interests are mapped to a term');
}
}
/* Compare the given interest and the synonym to see if they refer to the same term */
function compareInterestAndSynonym(interest, synonym) {
const genericInterest = interest.toLowerCase().replace(/ /g, '');
const genericSynonym = synonym.toLowerCase().replace(/ /g, '');
if (synonym.length <= 3) { // handle acronyms
return genericInterest === genericSynonym;
}
return genericInterest.indexOf(genericSynonym) !== -1;
}
/* Create a new interest group using the given interest and name */
function createInterestGroup(interest, name, levelIndex, level) {
const interestGroup = {
interest,
names: [name],
};
if (levelIndex >= 0) {
interestData[levelIndex].data.push(interestGroup);
} else {
const levelGroup = {
level,
data: [interestGroup],
};
interestData.push(levelGroup);
}
}
/* Insert interest to unclassifiedInterests */
function insertToUnclassifiedInterests(interest) {
unclassifiedInterests.push(interest);
}
/* Find the index in the interestData array for the certain level */
function findLevelIndex(level) {
for (let i = 0; i < interestData.length; i += 1) {
if (interestData[i].level.toLowerCase() === level.toLowerCase()) {
return i;
}
}
return -1;
}
/* Group interest together to the corresponding interest in interestData */
function groupInterest(interest, name, level) {
let term = 'unclassified';
let foundInSynonyms = false;
for (let i = 0; i < synonymData.length; i += 1) {
const matched = _.find(synonymData[i].synonyms, function (synonym) { // loop through all synoyms for a given term
return compareInterestAndSynonym(interest, synonym);
});
if (matched != null) { // a term for the interest is found
term = synonymData[i].term;
foundInSynonyms = true;
break;
}
}
if (foundInSynonyms) {
const levelIndex = findLevelIndex(level);
if (levelIndex >= 0) {
for (let i = 0; i < interestData[levelIndex].data.length; i += 1) {
if (interestData[levelIndex].data[i].interest.toLowerCase() === term.toLowerCase()) {
// insert the interest into interestData
interestData[levelIndex].data[i].names.push(name);
return;
}
}
}
// if this interest is not found in interestData, start its own group
createInterestGroup(term, name, levelIndex, level);
} else {
// not found, push to unclassified Interest
insertToUnclassifiedInterests(interest);
}
}
module.exports = {
groupInterest,
writeToFile,
initData,
};