Skip to content

Commit 2a65416

Browse files
committed
version bump 1.3.8: node inconsistency workaround
- Changing Buffer length is inconsistent, remedy is to explicitly slice buffer - updated test artifacts - added some notes to README
1 parent d0df77f commit 2a65416

40 files changed

+1462
-910
lines changed

README.md

+34-4
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
# Codepages for JS
22

33
[Codepages](https://en.wikipedia.org/wiki/Codepage) are character encodings. In
4-
many contexts, single-byte character sets are used in lieu of standard multibyte
5-
Unicode encodings. They use 256 characters with a simple mapping.
4+
many contexts, single- or double-byte character sets are used in lieu of Unicode
5+
encodings. The codepages map between characters and numbers.
66

77
[unicode.org](http://www.unicode.org/Public/MAPPINGS/) hosts lists of mappings.
88
The build script automatically downloads and parses the mappings in order to
@@ -81,7 +81,35 @@ In node:
8181

8282
var cptable = require('codepage/dist/cpexcel.full');
8383

84-
## Building the script
84+
## Rolling your own script
85+
86+
The `make.sh` script in the repo can take a manifest and generate JS source.
87+
88+
Usage:
89+
90+
bash make.sh path_to_manifest output_file_name JSVAR
91+
92+
where
93+
94+
- `JSVAR` is the name of the exported variable (generally `cptable`)
95+
- `output_file_name` is the output file (e.g. `cpexcel.js`, `cptable.js`)
96+
- `path_to_manifest` is the path to the manifest file.
97+
98+
The manifest file is expected to be a CSV with 3 columns:
99+
100+
<codepage number>,<source>,<size>
101+
102+
If a source is specified, it will try to download the specified file and parse.
103+
The file format is expected to follow the format from the unicode.org site.
104+
The size should be `1` for a single-byte codepage and `2` for a double-byte
105+
codepage. For mixed codepages (which use some single- and some double-byte
106+
codes), the script assumes the mapping is a prefix code and generates efficient
107+
JS code.
108+
109+
Generated scripts only include the mapping. `cat` a mapping with `cputils.js`
110+
to produce a complete script like `cpexcel.full.js`.
111+
112+
## Building the complete script
85113

86114
This script uses [voc](npm.im/voc). The script to build the codepage tables and
87115
the JS source is `codepage.md`, so building is as simple as `voc codepage.md`.
@@ -94,10 +122,11 @@ Some codepages are easier to implement algorithmically. Since these are
94122
hardcoded in utils, there is no corresponding entry (they are "magic")
95123

96124
| CP# | Information | Description |
97-
| --: | ----------- | ----------- |
125+
| --: | :----------: | :---------- |
98126
| 37| unicode.org |IBM EBCDIC US-Canada
99127
| 437| unicode.org |OEM United States
100128
| 500| unicode.org |IBM EBCDIC International
129+
| 620| NLS |Mazovia (Polish) MS-DOS
101130
| 708|MakeEncoding.cs|Arabic (ASMO 708)
102131
| 720|MakeEncoding.cs|Arabic (Transparent ASMO); Arabic (DOS)
103132
| 737| unicode.org |OEM Greek (formerly 437G); Greek (DOS)
@@ -118,6 +147,7 @@ hardcoded in utils, there is no corresponding entry (they are "magic")
118147
| 870|MakeEncoding.cs|IBM EBCDIC Multilingual/ROECE (Latin 2)
119148
| 874| unicode.org |Windows Thai
120149
| 875| unicode.org |IBM EBCDIC Greek Modern
150+
| 895| NLS |Kamenický (Czech) MS-DOS
121151
| 932| unicode.org |Japanese Shift-JIS
122152
| 936| unicode.org |Simplified Chinese GBK
123153
| 949| unicode.org |Korean

codepage.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -729,7 +729,7 @@ describe('failures', function() {
729729
```json>package.json
730730
{
731731
"name": "codepage",
732-
"version": "1.3.7",
732+
"version": "1.3.8",
733733
"author": "SheetJS",
734734
"description": "pure-JS library to handle codepages",
735735
"keywords": [ "codepage", "iconv", "convert", "strings" ],

cpexcel.js

+1-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

cptable.js

+1-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

cputils.js

+16-10
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,9 @@
3232

3333
var sfcc = function sfcc(x) { return String.fromCharCode(x); };
3434
var cca = function cca(x){ return x.charCodeAt(0); };
35-
if(typeof Buffer !== 'undefined') {
35+
36+
var has_buf = (typeof Buffer !== 'undefined');
37+
if(has_buf) {
3638
var mdl = 1024, mdb = new Buffer(mdl);
3739
var make_EE = function make_EE(E){
3840
var EE = new Buffer(65536);
@@ -67,6 +69,7 @@
6769
}
6870
}
6971
out.length = j;
72+
out = out.slice(0,j);
7073
} else {
7174
out = Buffer(len);
7275
for(i = 0; i < len; ++i) out[i] = EE[data[i].charCodeAt(0)];
@@ -125,6 +128,7 @@
125128
out[k++] = EE[j+1] || EE[j]; if(EE[j+1] > 0) out[k++] = EE[j];
126129
}
127130
out.length = k;
131+
out = out.slice(0,k);
128132
} else if(Buffer.isBuffer(data)) {
129133
for(i = k = 0; i < len; ++i) {
130134
D = data[i];
@@ -139,6 +143,7 @@
139143
}
140144
}
141145
out.length = k;
146+
out = out.slice(0,k);
142147
} else {
143148
for(i = k = 0; i < len; i++) {
144149
j = data[i].charCodeAt(0)*2;
@@ -237,7 +242,7 @@
237242
}
238243

239244
var encache = function encache() {
240-
if(typeof Buffer !== 'undefined') {
245+
if(has_buf) {
241246
if(cpdcache[sbcs_cache[0]]) return;
242247
var i, s;
243248
for(i = 0; i < sbcs_cache.length; ++i) {
@@ -263,7 +268,7 @@
263268
};
264269
var cp_decache = function cp_decache(cp) { cpdcache[cp] = cpecache[cp] = undefined; };
265270
var decache = function decache() {
266-
if(typeof Buffer !== 'undefined') {
271+
if(has_buf) {
267272
if(!cpdcache[sbcs_cache[0]]) return;
268273
sbcs_cache.forEach(cp_decache);
269274
dbcs_cache.forEach(cp_decache);
@@ -286,9 +291,9 @@
286291
var encode = function encode(cp, data, ofmt) {
287292
if(cp === last_cp) { return last_enc(data, ofmt); }
288293
if(cpecache[cp] !== undefined) { last_enc = cpecache[last_cp=cp]; return last_enc(data, ofmt); }
289-
if(typeof Buffer !== 'undefined' && Buffer.isBuffer(data)) data = data.toString('utf8');
294+
if(has_buf && Buffer.isBuffer(data)) data = data.toString('utf8');
290295
var len = data.length;
291-
var out = typeof Buffer !== 'undefined' ? new Buffer(4*len) : [], w, i, j = 0, c, tt, ww;
296+
var out = has_buf ? new Buffer(4*len) : [], w, i, j = 0, c, tt, ww;
292297
var C = cpt[cp], E, M;
293298
if(C && (E=C.enc)) for(i = 0; i < len; ++i, ++j) {
294299
w = E[data[i]];
@@ -300,7 +305,7 @@
300305
}
301306
else if((M=magic[cp])) switch(M) {
302307
case "utf8":
303-
if(typeof Buffer !== 'undefined' && typeof data === "string") { out = new Buffer(data, M); j = out.length; break; }
308+
if(has_buf && typeof data === "string") { out = new Buffer(data, M); j = out.length; break; }
304309
for(i = 0; i < len; ++i, ++j) {
305310
w = data[i].charCodeAt(0);
306311
if(w <= 0x007F) out[j] = w;
@@ -322,15 +327,15 @@
322327
}
323328
break;
324329
case "ascii":
325-
if(typeof Buffer !== 'undefined' && typeof data === "string") { out = new Buffer(data, M); j = out.length; break; }
330+
if(has_buf && typeof data === "string") { out = new Buffer(data, M); j = out.length; break; }
326331
for(i = 0; i < len; ++i, ++j) {
327332
w = data[i].charCodeAt(0);
328333
if(w <= 0x007F) out[j] = w;
329334
else throw new Error("bad ascii " + w);
330335
}
331336
break;
332337
case "utf16le":
333-
if(typeof Buffer !== 'undefined' && typeof data === "string") { out = new Buffer(data, M); j = out.length; break; }
338+
if(has_buf && typeof data === "string") { out = new Buffer(data, M); j = out.length; break; }
334339
for(i = 0; i < len; ++i) {
335340
w = data[i].charCodeAt(0);
336341
out[j++] = w&255;
@@ -382,6 +387,7 @@
382387
}
383388
else throw new Error("Unrecognized CP: " + cp);
384389
out.length = j;
390+
out = out.slice(0,j);
385391
if(typeof Buffer === 'undefined') return (ofmt == 'str') ? out.map(sfcc).join("") : out;
386392
if(ofmt === undefined || ofmt === 'buf') return out;
387393
if(ofmt !== 'arr') return out.toString('binary');
@@ -422,13 +428,13 @@
422428
}
423429
break;
424430
case "ascii":
425-
if(typeof Buffer !== 'undefined' && Buffer.isBuffer(data)) return data.toString(M);
431+
if(has_buf && Buffer.isBuffer(data)) return data.toString(M);
426432
for(i = 0; i < len; i++) out[i] = String.fromCharCode(data[i]);
427433
k = len; break;
428434
case "utf16le":
429435
i = 0;
430436
if(len >= 2 && data[0] == 0xFF) if(data[1] == 0xFE) i = 2;
431-
if(typeof Buffer !== 'undefined' && Buffer.isBuffer(data)) return data.toString(M);
437+
if(has_buf && Buffer.isBuffer(data)) return data.toString(M);
432438
j = 2;
433439
for(; i < len; i+=j) {
434440
out[k++] = String.fromCharCode((data[i+1]<<8) + data[i]);

ctest/fixtures.js

+8-8
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

dist/cpexcel.full.js

+16-10
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)