|
2 | 2 | <html>
|
3 | 3 | <head>
|
4 | 4 | <meta charset="utf-8">
|
5 |
| - <title>Convert</title> |
| 5 | + <title>NDJson to CSV</title> |
6 | 6 | <script>
|
7 |
| - const lines = []; |
8 |
| - function dodrop(event) |
9 |
| -{ |
10 |
| - var dt = event.dataTransfer; |
11 |
| - var files = dt.files; |
| 7 | + |
| 8 | + function flatten(object, target, path) { |
| 9 | + path = path || ''; |
| 10 | + Object.keys(object).forEach(function (key) { |
| 11 | + if (object[key] && typeof object[key] === 'object') { |
| 12 | + flatten(object[key], target, path + key); |
| 13 | + return; |
| 14 | + } |
| 15 | + target[path + key] = object[key]; |
| 16 | + }); |
| 17 | + } |
| 18 | + |
| 19 | + function dodrop(event) { |
| 20 | + var dt = event.dataTransfer; |
| 21 | + var files = dt.files; |
12 | 22 |
|
13 |
| - for (var i = 0; i < files.length; i++) { |
14 |
| - reader = new FileReader(); |
15 |
| - reader.onload = function (event) { |
16 | 23 |
|
17 |
| - const input = event.target.result; |
18 |
| - let result = input.split('\n').map(function(s) { if (s) { return JSON.parse(s); } }); |
19 | 24 |
|
20 |
| - const replacer = (key, value) => value === null ? '' : value // specify how you want to handle null values here |
| 25 | + for (var i = 0; i < files.length; i++) { |
| 26 | + reader = new FileReader(); |
| 27 | + reader.onload = function (event) { |
| 28 | + |
| 29 | + const input = event.target.result; |
| 30 | + let result = input.split('\n').map(function(s) { if (s) { return JSON.parse(s); } }); |
21 | 31 |
|
22 |
| - |
| 32 | + const replacer = (key, value) => value === null ? '' : value // specify how you want to handle null values here |
23 | 33 |
|
24 |
| - let header = []; |
| 34 | + const header = {} |
| 35 | + flatten(result[0], header) |
| 36 | + console.log(header); |
25 | 37 |
|
26 |
| - if (result[result.length - 1] == undefined) { |
27 |
| - result.pop(); |
28 |
| - } |
29 |
| - |
30 |
| - result.forEach(function (row) { |
31 |
| - const timestamp = Date.parse(row["data"]["legacy"]["created_at"]); |
32 |
| - //const rows = []; |
33 |
| - const dt = new Date(row["data"]["legacy"]["created_at"]); |
34 |
| - const retweet = row["data"]["legacy"]["retweeted"] |
35 |
| - if (retweet) { |
36 |
| - retweet["result"] = retweet["result"]["tweet"] |
37 |
| - console.log(retweet); |
38 |
| - const rt_text = "RT @" + row["data"]["result"]["core"]["user_results"]["result"]["legacy"]["screen_name"] + |
39 |
| - ": " + row["data"]["result"]["legacy"]["full_text"] |
40 |
| - row["legacy"]["full_text"] = rt_text |
41 |
| - } |
| 38 | + //get rid of any empty last lines |
| 39 | + if (result[result.length - 1] == undefined) { |
| 40 | + result.pop(); |
| 41 | + } |
42 | 42 |
|
43 |
| - const quote_tweet = row['data']["is_quote_status"]; |
44 |
| - if (quote_tweet) { |
45 |
| - console.log(quote_tweet); |
46 |
| - } |
| 43 | + const csv = [ |
| 44 | + Object.keys(header).join(','), // header row first |
| 45 | + ...Object.values(result).map(function(r) { |
| 46 | + const row = {} |
| 47 | + flatten(r, row) |
| 48 | + return Object.keys(header).map(fieldName => JSON.stringify(row[fieldName]) ).join(',') |
| 49 | + }) |
| 50 | + ].join('\r\n') |
| 51 | + document.getElementById('csv').innerText = csv; |
47 | 52 |
|
48 |
| - function escapeHTML(str){ |
49 |
| - return new Option(str).innerHTML.replace(/\n/g,'\\n').replace(/\"/g, "\"\""); |
50 |
| - } |
51 |
| - let mentions = []; |
52 |
| - if (row["data"]["legacy"]["entities"]["user_mentions"]) { |
53 |
| - row["data"]["legacy"]["entities"]["user_mentions"].forEach(m => mentions.push(m["screen_name"])) |
54 |
| - } |
55 |
| - let videos = []; |
56 |
| - let photos = []; |
57 |
| - if (row["data"]["legacy"]["entities"]["media"]) { |
58 |
| - row["data"]["legacy"]["entities"]["media"].forEach(function(img) { |
59 |
| - if (img["type"] == "photo") { photos.push(img["media_url_https"]) } |
60 |
| - if (img["type"] == "video") { videos.push(img["media_url_https"]) } |
61 |
| - }); |
62 |
| - } |
63 |
| - let tags = [] |
64 |
| - if (row["data"]["legacy"]["entities"]["hashtags"]) { |
65 |
| - row["data"]["legacy"]["entities"]["hashtags"].forEach( t => tags.push(t.text)) |
66 |
| - } |
67 |
| - const rows = {"id": row["data"]["rest_id"], |
68 |
| - "thread_id": row["data"]["legacy"]["conversation_id_str"], |
69 |
| - "timestamp": dt.getFullYear() + "-" + dt.getMonth() + "-" + dt.getDate() + " " + dt.getHours() + ":" + dt.getMinutes() + ":" + dt.getSeconds(), |
70 |
| - "unix_timestamp": timestamp, |
71 |
| - "link": "https://twitter.com/"+row["data"]['core']['user_results']['result']['legacy']['screen_name']+"/status/"+row['id'], |
72 |
| - "body": `\"${row["data"]["legacy"]["full_text"]}\"`, |
73 |
| - //"body": escapeHTML(row["data"]["legacy"]["full_text"]), |
74 |
| - "author": row["data"]["core"]["user_results"]["result"]["legacy"]["screen_name"], |
75 |
| - "author_fullname": row["data"]["core"]["user_results"]["result"]["legacy"]["name"], |
76 |
| - "author_id": row["data"]["legacy"]["user_id_str"], |
77 |
| - "source": row["source"], |
78 |
| - "language_guess": row["data"]["legacy"]["lang"], |
79 |
| - "possibly_sensitive": (row["data"]["possibly_sensitive"])? "yes" : "no", |
80 |
| - "retweet_count": row["data"]["legacy"]["retweet_count"], |
81 |
| - "reply_count": row["data"]["legacy"]["reply_count"], |
82 |
| - "like_count": row["data"]["legacy"]["favorite_count"], |
83 |
| - "quote_count": row["data"]["legacy"]["quote_count"], |
84 |
| - "impression_count": row["data"]["views"]["count"], |
85 |
| - "is_retweet": (retweet)? "yes": "no", |
86 |
| - "retweeted_user": (retweet) ? row["data"]["result"]["core"]["user_results"]["result"]["legacy"]["screen_name"]: "", |
87 |
| - "is_quote_tweet": (quote_tweet)? "yes": "no", |
88 |
| - "quoted_user": (quote_tweet) ? quote_tweet["result"]["core"]["user_results"]["result"]["legacy"]["screen_name"]: "", |
89 |
| - "is_reply": (row["data"]["legacy"]["conversation_id_str"].toString() != row["data"]["rest_id"].toString()) ? "yes" : "no", |
90 |
| - "replied_user": (row["data"]["legacy"]["in_reply_to_screen_name"])? row["data"]["legacy"]["in_reply_to_screen_name"]: "", |
91 |
| - "hashtags": (tags.length > 0) ? tags.join(";") : "", |
92 |
| - "urls": (row["data"]["legacy"]["entities"]["urls"]["expanded_url"]) ? row["data"]["legacy"]["entities"]["urls"]["expanded_url"].join(';').toString():"", |
93 |
| - "images": (photos.length > 0) ? photos.join(";") : "", |
94 |
| - "videos": (videos.length > 0) ? videos.join(";") : "", |
95 |
| - "mentions": (mentions.length > 0) ? mentions.join(";") : "", |
96 |
| - "place_name": (row["data"]["legacy"]["place"])? row["legacy"]["place"]["full_name"] : ""} |
97 |
| - |
98 |
| - |
99 |
| - |
100 |
| - lines.push(Object.values(rows).join(',')) |
101 |
| - if (header.length == 0) { header = Object.keys(rows);} |
102 |
| - } ); |
103 |
| - |
104 |
| - const csv = [ |
105 |
| - header.join(','), // header row first |
106 |
| - lines.join('\n') |
107 |
| - ].join('\n') |
108 |
| - document.getElementById('csv').innerText = csv; |
109 |
| - }; |
110 |
| - reader.readAsText(files[i]); |
| 53 | + } |
| 54 | + reader.readAsText(files[i]); |
111 | 55 | }
|
112 | 56 | }
|
113 | 57 |
|
|
131 | 75 | downloadLink.click();
|
132 | 76 | }
|
133 | 77 |
|
134 |
| - function createNgrams (length) { |
135 |
| - |
136 |
| - function flatten(arr) { |
137 |
| - return arr.reduce(function (flat, toFlatten) { |
138 |
| - return flat.concat(Array.isArray(toFlatten) ? flatten(toFlatten) : toFlatten); |
139 |
| - }, []); |
140 |
| -} |
141 |
| - |
142 |
| - let words = [] |
143 |
| - lines.forEach(function(l) { words.concat(l.split(',')[5].split(' ') ) } ); |
144 |
| - console.log(words); |
145 |
| - var ngramsArray = []; |
146 |
| - |
147 |
| - for (var i = 0; i < words.length - (length - 1); i++) { |
148 |
| - var subNgramsArray = []; |
149 |
| - |
150 |
| - for (var j = 0; j < length; j++) { |
151 |
| - subNgramsArray.push(words[i + j]) |
152 |
| - } |
153 |
| - |
154 |
| - ngramsArray.push(subNgramsArray); |
155 |
| - } |
156 |
| - console.log(ngramsArray); |
157 |
| - return ngramsArray; |
158 |
| - } |
159 | 78 | </script>
|
160 | 79 | </head>
|
161 | 80 | <body>
|
|
165 | 84 | ondragover="event.stopPropagation(); event.preventDefault();"
|
166 | 85 | ondrop="event.stopPropagation(); event.preventDefault();
|
167 | 86 | dodrop(event);">
|
168 |
| - Drop your Zeeschuimer Twitter file here. Use the export button to download to CSV. |
| 87 | + Drop your NDJson file here. Use the export button to download to CSV. |
169 | 88 | </div>
|
170 | 89 | <button onclick="downloadCSV('test.csv')">Export</button>
|
171 |
| - <button onclick="createNgrams(2)">Create Ngrams</button> |
172 | 90 | <div id="csv"></div>
|
173 | 91 |
|
174 | 92 |
|
|
0 commit comments