Skip to content

Commit 5c4add7

Browse files
committed
feat: add in charset, fetch, validate
1 parent 70834bd commit 5c4add7

33 files changed

+1770
-168
lines changed

lerna.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{
22
"packages": ["packages/*"],
33
"useNx": false,
4-
"version": "0.0.2"
4+
"version": "0.0.3"
55
}

package-lock.json

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

package.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"name": "@datastream/monorepo",
3-
"version": "0.0.2",
3+
"version": "0.0.3",
44
"description": "Streams made easy.",
55
"type": "module",
66
"engines": {
@@ -13,7 +13,7 @@
1313
"pre-commit": "lint-staged",
1414
"install": "lerna bootstrap",
1515
"lint": "lint-staged",
16-
"test": "npm run build && node --test --conditions=node packages && node --test --conditions=webstream packages",
16+
"test": "npm run build && c8 node --test --conditions=node packages && c8 node --test --conditions=webstream packages",
1717
"build": "bin/esbuild",
1818
"release:tag": "git tag $npm_package_version && git push --tags",
1919
"lerna:rm": "npm run lerna:rm:node_modules && npm run lerna:rm:lock",
@@ -22,7 +22,7 @@
2222
"lerna:update": "lerna exec --bail --concurrency 5 npm update && npm install",
2323
"lerna:outdated": "lerna exec --concurrency 5 npm outdated",
2424
"lerna:audit": "lerna exec --concurrency 2 npm audit fix",
25-
"lerna:sync": "lerna exec --bail --concurrency 2 npm install && lerna publish --exact --yes --skip-npm --skip-git --repo-version $npm_package_version",
25+
"lerna:sync": "lerna publish --exact --yes --skip-npm --skip-git --repo-version $npm_package_version",
2626
"lerna:publish": "lerna publish --exact --yes --skip-git --repo-version $npm_package_version",
2727
"lerna:publish:next": "lerna publish --exact --yes --skip-git --repo-version $npm_package_version --dist-tag next"
2828
},

packages/charset/decode.js

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
import { createTransformStream } from '@datastream/core'
2+
import iconv from 'iconv-lite' // doesn't support esm
3+
4+
export const charsetDecodeStream = (charset) => {
5+
charset = getSupportedEncoding(charset)
6+
if (charset === 'UTF-8') return createTransformStream()
7+
return iconv.decodeStream(charset)
8+
}
9+
const getSupportedEncoding = (charset) => {
10+
if (charset === 'ISO-8859-8-I') charset = 'ISO-8859-8'
11+
if (!iconv.encodingExists(charset)) charset = 'UTF-8'
12+
return charset
13+
}
14+
export default charsetDecodeStream

packages/charset/detect.js

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
import { createTransformStream } from '@datastream/core'
2+
import detect from 'charset-detector'
3+
4+
const charsets = {
5+
'UTF-8': 0,
6+
'UTF-16BE': 0,
7+
'UTF-16LE': 0,
8+
'UTF-32BE': 0,
9+
'UTF-32LE': 0,
10+
Shift_JIS: 0,
11+
'ISO-2022-JP': 0,
12+
'ISO-2022-CN': 0,
13+
'ISO-2022-KR': 0,
14+
GB18030: 0,
15+
'EUC-JP': 0,
16+
'EUC-KR': 0,
17+
Big5: 0,
18+
'ISO-8859-1': 0,
19+
'ISO-8859-2': 0,
20+
'ISO-8859-5': 0,
21+
'ISO-8859-6': 0,
22+
'ISO-8859-7': 0,
23+
'ISO-8859-8-I': 0,
24+
'ISO-8859-8': 0,
25+
'windows-1251': 0,
26+
'windows-1256': 0,
27+
'windows-1252': 0,
28+
'windows-1254': 0,
29+
'windows-1250': 0,
30+
'KOIR8-R': 0,
31+
'ISO-8859-9': 0
32+
}
33+
34+
export const charsetDetectStream = (result, options = { key: 'charset' }) => {
35+
const { key } = options
36+
const transform = (chunk) => {
37+
const matches = detect(chunk)
38+
if (matches.length) {
39+
for (const match of matches) {
40+
charsets[match.charsetName] += match.confidence
41+
}
42+
}
43+
}
44+
const stream = createTransformStream(transform, options)
45+
stream.result = () => {
46+
const values = Object.entries(charsets)
47+
.map(([charset, confidence]) => ({ charset, confidence }))
48+
.sort((a, b) => b.confidence - a.confidence)
49+
return { key, value: values[0] }
50+
}
51+
return stream
52+
}
53+
54+
export default charsetDetectStream

packages/charset/encode.js

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
import { createTransformStream } from '@datastream/core'
2+
import iconv from 'iconv-lite' // doesn't support esm
3+
4+
export const charsetEncodeStream = (charset) => {
5+
charset = getSupportedEncoding(charset)
6+
if (charset === 'UTF-8') return createTransformStream()
7+
return iconv.encodeStream(charset)
8+
}
9+
const getSupportedEncoding = (charset) => {
10+
if (charset === 'ISO-8859-8-I') charset = 'ISO-8859-8'
11+
if (!iconv.encodingExists(charset)) charset = 'UTF-8'
12+
return charset
13+
}
14+
export default charsetEncodeStream

packages/charset/index.js

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
import detectStream from '@datastream/charset/detect'
2+
import decodeStream from '@datastream/charset/decode'
3+
import encodeStream from '@datastream/charset/encode'
4+
5+
export const charsetDetectStream = detectStream
6+
export const charsetDecodeStream = decodeStream
7+
export const charsetEncodeStream = encodeStream
8+
9+
export default {
10+
detectStream,
11+
decodeStream,
12+
encodeStream
13+
}

packages/charset/package-lock.json

Lines changed: 102 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

packages/charset/package.json

Lines changed: 135 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,135 @@
1+
{
2+
"name": "@datastream/charset",
3+
"version": "0.0.3",
4+
"description": "",
5+
"type": "module",
6+
"engines": {
7+
"node": ">=18"
8+
},
9+
"engineStrict": true,
10+
"publishConfig": {
11+
"access": "public"
12+
},
13+
"main": "./index.web.mjs",
14+
"module": "./index.web.mjs",
15+
"exports": {
16+
".": {
17+
"node": {
18+
"webstream": {
19+
"types": "./index.d.ts",
20+
"default": "./index.web.mjs"
21+
},
22+
"import": {
23+
"types": "./index.d.ts",
24+
"default": "./index.node.mjs"
25+
},
26+
"require": {
27+
"types": "./index.d.ts",
28+
"default": "./index.node.cjs"
29+
}
30+
},
31+
"import": {
32+
"types": "./index.d.ts",
33+
"default": "./index.web.mjs"
34+
}
35+
},
36+
"./detect": {
37+
"node": {
38+
"webstream": {
39+
"types": "./detect.d.ts",
40+
"default": "./detect.web.mjs"
41+
},
42+
"import": {
43+
"types": "./detect.d.ts",
44+
"default": "./detect.node.mjs"
45+
},
46+
"require": {
47+
"types": "./detect.d.ts",
48+
"default": "./detect.node.cjs"
49+
}
50+
},
51+
"import": {
52+
"types": "./detect.d.ts",
53+
"default": "./detect.web.mjs"
54+
}
55+
},
56+
"./decode": {
57+
"node": {
58+
"webstream": {
59+
"types": "./decode.d.ts",
60+
"default": "./decode.web.mjs"
61+
},
62+
"import": {
63+
"types": "./decode.d.ts",
64+
"default": "./decode.node.mjs"
65+
},
66+
"require": {
67+
"types": "./decode.d.ts",
68+
"default": "./decode.node.cjs"
69+
}
70+
},
71+
"import": {
72+
"types": "./decode.d.ts",
73+
"default": "./decode.web.mjs"
74+
}
75+
},
76+
"./encode": {
77+
"node": {
78+
"webstream": {
79+
"types": "./encode.d.ts",
80+
"default": "./encode.web.mjs"
81+
},
82+
"import": {
83+
"types": "./encode.d.ts",
84+
"default": "./encode.node.mjs"
85+
},
86+
"require": {
87+
"types": "./encode.d.ts",
88+
"default": "./encode.node.cjs"
89+
}
90+
},
91+
"import": {
92+
"types": "./encode.d.ts",
93+
"default": "./encode.web.mjs"
94+
}
95+
}
96+
},
97+
"types": "index.d.ts",
98+
"files": [
99+
"*.mjs",
100+
"*.cjs",
101+
"*.map",
102+
"*.d.ts"
103+
],
104+
"scripts": {
105+
"test": "npm run test:unit",
106+
"test:unit": "ava",
107+
"test:benchmark": "node __benchmarks__/index.js"
108+
},
109+
"license": "MIT",
110+
"keywords": [
111+
"Web Stream API",
112+
"Node Stream API"
113+
],
114+
"author": {
115+
"name": "datastream contributors",
116+
"url": "https://github.com/willfarrell/datastream/graphs/contributors"
117+
},
118+
"repository": {
119+
"type": "git",
120+
"url": "github:willfarrell/datastream",
121+
"directory": "packages/file-read"
122+
},
123+
"bugs": {
124+
"url": "https://github.com/willfarrell/datastream/issues"
125+
},
126+
"homepage": "https://datastream.js.org",
127+
"dependencies": {
128+
"@datastream/core": "0.0.3",
129+
"charset-detector": "0.0.2"
130+
},
131+
"devDependencies": {
132+
"@datastream/charset": "0.0.2"
133+
},
134+
"gitHead": "70834bdf6ea9d690ca90a079371b73a0c7ea4a14"
135+
}

0 commit comments

Comments
 (0)