Skip to content

Commit

Permalink
Remove diacritics from identifiers
Browse files Browse the repository at this point in the history
  • Loading branch information
smrohrer committed Nov 18, 2020
1 parent 00d43fd commit 6ea59f4
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 3 deletions.
5 changes: 3 additions & 2 deletions createInternetArchiveCsv.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ const { argv } = require('process');

const fields = require('./fields');
const airtableFields = require('./airtableFields');
const {normalize} = require('./helpers');

const year = argv[2];
const month = argv[3];
Expand Down Expand Up @@ -59,11 +60,11 @@ function getCsvRow (record) {
}, {});
}

const stagedDirectories = new Set(fs.readdirSync(process.env.IA_STAGING));
const stagedDirectories = new Set(fs.readdirSync(process.env.IA_STAGING).map(normalize));

getRecords().then(airtableRecords => {
const rows = airtableRecords
.filter(record => stagedDirectories.has(record.get('Identifier')))
.filter(record => stagedDirectories.has(normalize(record.get('Identifier'))))
.map(getCsvRow);

const header = fields.map(field => ({id: field.name, title: field.name}));
Expand Down
3 changes: 2 additions & 1 deletion fields.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
const Field = require('./field');
const {normalize} = require('./helpers');

const fileMapper = record => {
const id = record.get('Identifier');
Expand Down Expand Up @@ -45,7 +46,7 @@ const creatorMapper = record => {

module.exports = [
// reserved IA metadata fields
new Field('Identifier', 'identifier', record => `${record.get('Identifier').replace('+', '-')}`),
new Field('Identifier', 'identifier', record => `${normalize(record.get('Identifier')).replace('+', '-')}`),
new Field(null, 'file', fileMapper),
new Field('Title', 'title'),
new Field(null, 'creator', creatorMapper),
Expand Down
3 changes: 3 additions & 0 deletions helpers.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
// Remove diacritics
// https://stackoverflow.com/a/37511463
exports.normalize = str => str.normalize("NFD").replace(/[\u0300-\u036f]/g, "");

0 comments on commit 6ea59f4

Please sign in to comment.