Skip to content

Commit e643ca5

Browse files
committed
rename files
1 parent 626bdd9 commit e643ca5

File tree

3 files changed

+218
-218
lines changed

3 files changed

+218
-218
lines changed

generic.html

-94
This file was deleted.

index.html

+42-124
Original file line numberDiff line numberDiff line change
@@ -2,112 +2,56 @@
22
<html>
33
<head>
44
<meta charset="utf-8">
5-
<title>Convert</title>
5+
<title>NDJson to CSV</title>
66
<script>
7-
const lines = [];
8-
function dodrop(event)
9-
{
10-
var dt = event.dataTransfer;
11-
var files = dt.files;
7+
8+
function flatten(object, target, path) {
9+
path = path || '';
10+
Object.keys(object).forEach(function (key) {
11+
if (object[key] && typeof object[key] === 'object') {
12+
flatten(object[key], target, path + key);
13+
return;
14+
}
15+
target[path + key] = object[key];
16+
});
17+
}
18+
19+
function dodrop(event) {
20+
var dt = event.dataTransfer;
21+
var files = dt.files;
1222

13-
for (var i = 0; i < files.length; i++) {
14-
reader = new FileReader();
15-
reader.onload = function (event) {
1623

17-
const input = event.target.result;
18-
let result = input.split('\n').map(function(s) { if (s) { return JSON.parse(s); } });
1924

20-
const replacer = (key, value) => value === null ? '' : value // specify how you want to handle null values here
25+
for (var i = 0; i < files.length; i++) {
26+
reader = new FileReader();
27+
reader.onload = function (event) {
28+
29+
const input = event.target.result;
30+
let result = input.split('\n').map(function(s) { if (s) { return JSON.parse(s); } });
2131

22-
32+
const replacer = (key, value) => value === null ? '' : value // specify how you want to handle null values here
2333

24-
let header = [];
34+
const header = {}
35+
flatten(result[0], header)
36+
console.log(header);
2537

26-
if (result[result.length - 1] == undefined) {
27-
result.pop();
28-
}
29-
30-
result.forEach(function (row) {
31-
const timestamp = Date.parse(row["data"]["legacy"]["created_at"]);
32-
//const rows = [];
33-
const dt = new Date(row["data"]["legacy"]["created_at"]);
34-
const retweet = row["data"]["legacy"]["retweeted"]
35-
if (retweet) {
36-
retweet["result"] = retweet["result"]["tweet"]
37-
console.log(retweet);
38-
const rt_text = "RT @" + row["data"]["result"]["core"]["user_results"]["result"]["legacy"]["screen_name"] +
39-
": " + row["data"]["result"]["legacy"]["full_text"]
40-
row["legacy"]["full_text"] = rt_text
41-
}
38+
//get rid of any empty last lines
39+
if (result[result.length - 1] == undefined) {
40+
result.pop();
41+
}
4242

43-
const quote_tweet = row['data']["is_quote_status"];
44-
if (quote_tweet) {
45-
console.log(quote_tweet);
46-
}
43+
const csv = [
44+
Object.keys(header).join(','), // header row first
45+
...Object.values(result).map(function(r) {
46+
const row = {}
47+
flatten(r, row)
48+
return Object.keys(header).map(fieldName => JSON.stringify(row[fieldName]) ).join(',')
49+
})
50+
].join('\r\n')
51+
document.getElementById('csv').innerText = csv;
4752

48-
function escapeHTML(str){
49-
return new Option(str).innerHTML.replace(/\n/g,'\\n').replace(/\"/g, "\"\"");
50-
}
51-
let mentions = [];
52-
if (row["data"]["legacy"]["entities"]["user_mentions"]) {
53-
row["data"]["legacy"]["entities"]["user_mentions"].forEach(m => mentions.push(m["screen_name"]))
54-
}
55-
let videos = [];
56-
let photos = [];
57-
if (row["data"]["legacy"]["entities"]["media"]) {
58-
row["data"]["legacy"]["entities"]["media"].forEach(function(img) {
59-
if (img["type"] == "photo") { photos.push(img["media_url_https"]) }
60-
if (img["type"] == "video") { videos.push(img["media_url_https"]) }
61-
});
62-
}
63-
let tags = []
64-
if (row["data"]["legacy"]["entities"]["hashtags"]) {
65-
row["data"]["legacy"]["entities"]["hashtags"].forEach( t => tags.push(t.text))
66-
}
67-
const rows = {"id": row["data"]["rest_id"],
68-
"thread_id": row["data"]["legacy"]["conversation_id_str"],
69-
"timestamp": dt.getFullYear() + "-" + dt.getMonth() + "-" + dt.getDate() + " " + dt.getHours() + ":" + dt.getMinutes() + ":" + dt.getSeconds(),
70-
"unix_timestamp": timestamp,
71-
"link": "https://twitter.com/"+row["data"]['core']['user_results']['result']['legacy']['screen_name']+"/status/"+row['id'],
72-
"body": `\"${row["data"]["legacy"]["full_text"]}\"`,
73-
//"body": escapeHTML(row["data"]["legacy"]["full_text"]),
74-
"author": row["data"]["core"]["user_results"]["result"]["legacy"]["screen_name"],
75-
"author_fullname": row["data"]["core"]["user_results"]["result"]["legacy"]["name"],
76-
"author_id": row["data"]["legacy"]["user_id_str"],
77-
"source": row["source"],
78-
"language_guess": row["data"]["legacy"]["lang"],
79-
"possibly_sensitive": (row["data"]["possibly_sensitive"])? "yes" : "no",
80-
"retweet_count": row["data"]["legacy"]["retweet_count"],
81-
"reply_count": row["data"]["legacy"]["reply_count"],
82-
"like_count": row["data"]["legacy"]["favorite_count"],
83-
"quote_count": row["data"]["legacy"]["quote_count"],
84-
"impression_count": row["data"]["views"]["count"],
85-
"is_retweet": (retweet)? "yes": "no",
86-
"retweeted_user": (retweet) ? row["data"]["result"]["core"]["user_results"]["result"]["legacy"]["screen_name"]: "",
87-
"is_quote_tweet": (quote_tweet)? "yes": "no",
88-
"quoted_user": (quote_tweet) ? quote_tweet["result"]["core"]["user_results"]["result"]["legacy"]["screen_name"]: "",
89-
"is_reply": (row["data"]["legacy"]["conversation_id_str"].toString() != row["data"]["rest_id"].toString()) ? "yes" : "no",
90-
"replied_user": (row["data"]["legacy"]["in_reply_to_screen_name"])? row["data"]["legacy"]["in_reply_to_screen_name"]: "",
91-
"hashtags": (tags.length > 0) ? tags.join(";") : "",
92-
"urls": (row["data"]["legacy"]["entities"]["urls"]["expanded_url"]) ? row["data"]["legacy"]["entities"]["urls"]["expanded_url"].join(';').toString():"",
93-
"images": (photos.length > 0) ? photos.join(";") : "",
94-
"videos": (videos.length > 0) ? videos.join(";") : "",
95-
"mentions": (mentions.length > 0) ? mentions.join(";") : "",
96-
"place_name": (row["data"]["legacy"]["place"])? row["legacy"]["place"]["full_name"] : ""}
97-
98-
99-
100-
lines.push(Object.values(rows).join(','))
101-
if (header.length == 0) { header = Object.keys(rows);}
102-
} );
103-
104-
const csv = [
105-
header.join(','), // header row first
106-
lines.join('\n')
107-
].join('\n')
108-
document.getElementById('csv').innerText = csv;
109-
};
110-
reader.readAsText(files[i]);
53+
}
54+
reader.readAsText(files[i]);
11155
}
11256
}
11357

@@ -131,31 +75,6 @@
13175
downloadLink.click();
13276
}
13377

134-
function createNgrams (length) {
135-
136-
function flatten(arr) {
137-
return arr.reduce(function (flat, toFlatten) {
138-
return flat.concat(Array.isArray(toFlatten) ? flatten(toFlatten) : toFlatten);
139-
}, []);
140-
}
141-
142-
let words = []
143-
lines.forEach(function(l) { words.concat(l.split(',')[5].split(' ') ) } );
144-
console.log(words);
145-
var ngramsArray = [];
146-
147-
for (var i = 0; i < words.length - (length - 1); i++) {
148-
var subNgramsArray = [];
149-
150-
for (var j = 0; j < length; j++) {
151-
subNgramsArray.push(words[i + j])
152-
}
153-
154-
ngramsArray.push(subNgramsArray);
155-
}
156-
console.log(ngramsArray);
157-
return ngramsArray;
158-
}
15978
</script>
16079
</head>
16180
<body>
@@ -165,10 +84,9 @@
16584
ondragover="event.stopPropagation(); event.preventDefault();"
16685
ondrop="event.stopPropagation(); event.preventDefault();
16786
dodrop(event);">
168-
Drop your Zeeschuimer Twitter file here. Use the export button to download to CSV.
87+
Drop your NDJson file here. Use the export button to download to CSV.
16988
</div>
17089
<button onclick="downloadCSV('test.csv')">Export</button>
171-
<button onclick="createNgrams(2)">Create Ngrams</button>
17290
<div id="csv"></div>
17391

17492

0 commit comments

Comments
 (0)