Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
smrohrer authored Nov 16, 2020
1 parent 975d458 commit 7f92b67
Show file tree
Hide file tree
Showing 8 changed files with 506 additions and 0 deletions.
74 changes: 74 additions & 0 deletions airtableFields.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
module.exports = [
'Identifier',
'Title',
'Creator',
'Description',
'Subject: Top level genealogy per language',
'Subject: Language Continent of Origin',
'Subject: Language Nation of Origin',
'Subject: Speaker Genders',
'Contributor: Speakers',
'Contributor: Caption Authors',
'Contributor: Videographer',
'Contributor: Description',
'Date Created',
'Type',
'Format',
'Language names',
'Languages: Speaker preferred names',
'Languages: ISO Code (639-3)',
'Languages: Glottocode',
'Languages: Dialect Glottocode',
'Languages: Macrolanguage ISO Code',
'Caption Languages',
'Caption Languages: ISO Code (639-6)',
'Caption Languages: Glottocode',
'Caption File Identifier',
'Caption File Links',
'Coverage: Video Nation',
'Coverage: Video Territory',
'Coverage: Distribution',
'Rights',
'Publisher',
'Date Received',
'Encoded Data',
'Tagged Data',
'Duration',
'Format T',
'Format Profile',
'Codec ID',
'File size',
'Format Info',
'Format Settings',
'Format Settings CABAC',
'Format Settings ReFrames',
'Codec ID/Info',
'Bit rate',
'Width',
'Height',
'Display Aspect Ratio',
'Frame Rate',
'Standard',
'Color Space',
'Chroma Subsampling',
'Bit Depth',
'Scan Type',
'Bits (Pixel*Frame)',
'Stream size',
'Color range',
'Color primaries',
'Transfer characteristics',
'Matrix coefficients',
'Codec configuration box',
'Format audio',
'Format/Info Audio',
'Bit Rate Audio',
'Bit rate mode audio',
'Codec ID Audio',
'Channel(s)',
'Channel layout',
'Compression mode',
'Sampling rate',
'Stream size audio',
'Subjects Reference ID: Ethnologue'
];
79 changes: 79 additions & 0 deletions createInternetArchiveCsv.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
#!/usr/bin/env node

'use strict';
var Airtable = require('airtable');
const createCsvWriter = require('csv-writer').createObjectCsvWriter;
const fs = require('fs');
const { argv } = require('process');

const fields = require('./fields');
const airtableFields = require('./airtableFields');

const year = argv[2];
const month = argv[3];

let formula;
if (year) {
if (month) {
formula = `FIND("_${year}${month}",Identifier)>0`;
} else {
formula = `FIND("_${year}",Identifier)>0`;
}
}

async function getRecords () {
return new Promise(resolve => {
const allRecords = [];
try {
var base = new Airtable({apiKey: process.env.APIKEY}).base(process.env.BASE);

base('🍩 Oral Histories').select({
view: "Archival View (Comprehensive)",
cellFormat: "string",
timeZone: "America/New_York",
userLocale: "en-ca",
filterByFormula: formula,
fields: airtableFields
}).eachPage(function page(records, fetchNextPage) {
allRecords.push(...records);
fetchNextPage();
}, function done(err) {
if (err) {
console.error(err);
return process.exit(1);
} else {
resolve(allRecords);
}
});
} catch (e) {
console.error(e);
return process.exit(1);
}
});
}

function getCsvRow (record) {
return fields.reduce((row, field) => {
row[field.name] = field.getValue(record);
return row;
}, {});
}

const stagedDirectories = new Set(fs.readdirSync(process.env.IA_STAGING));

getRecords().then(airtableRecords => {
const rows = airtableRecords
.filter(record => stagedDirectories.has(record.get('Identifier')))
.map(getCsvRow);

const header = fields.map(field => ({id: field.name, title: field.name}));

const csvWriter = createCsvWriter({
path: 'upload.csv',
header: header
});

return csvWriter.writeRecords(rows);
}).then(() => {
console.log('Done!');
});
9 changes: 9 additions & 0 deletions field.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
const baseMapper = field => record => record.get(field) || '';

module.exports = class Field {
constructor (wtField, iaField, mapper) {
this.wtField = wtField;
this.name = iaField;
this.getValue = mapper || baseMapper(wtField);
}
}
125 changes: 125 additions & 0 deletions fields.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
const Field = require('./field');

const fileMapper = record => {
const id = record.get('Identifier');
return `./${id}/data/${id}__video_edited.mp4`
};

const dateMapper = record => {
return record.get('Date Created') || record.get('Date Received') || '';
};

const coverageMapper = record => {
const territory = record.get('Coverage: Video Territory');
// e.g. New-York_US
// IA coverage should be the ISO 3166 2-letter country code
if (!territory) {
return '';
}
const match = territory.match(/_([A-Z]{2})$/);
if (match.length < 2) {
return '';
}
return match[1];
};

const creatorMapper = record => {
const creator = record.get('Creator');
// e.g. Daniel_BogreUdell_20130101 => Daniel Bogre Udell
// Can also be a list of id's
if (!creator) {
return '';
}

const formatter = individual => individual
.split('_')
.slice(0, -1)
.join(' ')
.replace(/([a-z])([A-Z])/g, (_, p1, p2) => `${p1} ${p2}`);

return creator
.split(', ')
.map(formatter)
.join(', ');
}

module.exports = [
// reserved IA metadata fields
new Field('Identifier', 'identifier', record => `TEST_${record.get('Identifier').replace('+', '-')}`),
new Field(null, 'file', fileMapper),
new Field('Title', 'title'),
new Field(null, 'creator', creatorMapper),
new Field(null, 'date', dateMapper),
new Field('Description', 'description'),
new Field('Display Aspect Ratio', 'aspect_ratio'),
new Field('Codec ID Audio', 'audio_codec'),
new Field('Sampling rate', 'audio_sample_rate'),
new Field(null, 'coverage', coverageMapper),
new Field('Frame Rate', 'frames_per_second'),
new Field('Language names', 'language'),
new Field(null, 'mediatype', () => 'movies'),
new Field(null, 'publisher', () => 'Wikitongues, Inc.'),
new Field('Rights', 'rights'),
new Field('Duration', 'runtime'),
new Field('Height', 'source_pixel_height'),
new Field('Width', 'source_pixel_width'),
// Wikitongues fields
new Field('Subject: Top level genealogy per language', 'subject_top_level_genealogy_per_language'),
new Field('Subject: Language Continent of Origin', 'subject_language_continent_of_origin'),
new Field('Subject: Language Nation of Origin', 'subject_language_nation_of_origin'),
new Field('Subject: Speaker Genders', 'subject_speaker_genders'),
new Field('Contributor: Speakers', 'contributor_speakers'),
new Field('Contributor: Caption Authors', 'contributor_caption_authors'),
new Field('Contributor: Videographer', 'contributor_videographer'),
new Field('Contributor: Description', 'contributor_description'),
new Field('Date Created', 'date_created'),
new Field('Type', 'type'),
new Field('Format', 'format'),
new Field('Languages: Speaker preferred names', 'languages_speaker_preferred_names'),
new Field('Languages: ISO Code (639-3)', 'languages_iso_code'),
new Field('Languages: Glottocode', 'languages_glottocode'),
new Field('Languages: Dialect Glottocode', 'languages_dialect_glottocode'),
new Field('Languages: Macrolanguage ISO Code', 'languages_macrolanguage_iso_code'),
new Field('Caption Languages', 'caption_languages'),
new Field('Caption Languages: ISO Code (639-6)', 'caption_languages_iso_code'),
new Field('Caption Languages: Glottocode', 'caption_languages_glottocode'),
new Field('Caption File Identifier', 'caption_file_identifier'),
new Field('Caption File Links', 'caption_file_links'),
new Field('Coverage: Video Nation', 'coverage_video_nation'),
new Field('Coverage: Video Territory', 'coverage_video_territory'),
new Field('Coverage: Distribution', 'coverage_distribution'),
new Field('Date Received', 'date_received'),
new Field('Encoded Data', 'encoded_data'),
new Field('Tagged Data', 'tagged_data'),
new Field('Format T', 'format_t'),
new Field('Format Profile', 'format_profile'),
new Field('Codec ID', 'codec_id'),
new Field('File size', 'file_size'),
new Field('Format Info', 'format_info'),
new Field('Format Settings', 'format_settings'),
new Field('Format Settings CABAC', 'format_settings_cabac'),
new Field('Format Settings ReFrames', 'format_settings_reframes'),
new Field('Codec ID/Info', 'codec_id_info'),
new Field('Bit rate', 'bit_rate'),
new Field('Standard', 'standard'),
new Field('Color Space', 'color_space'),
new Field('Chroma Subsampling', 'chroma_subsampling'),
new Field('Bit Depth', 'bit_depth'),
new Field('Scan Type', 'scan_type'),
new Field('Bits (Pixel*Frame)', 'bits'),
new Field('Stream size', 'stream_size'),
new Field('Color range', 'color_range'),
new Field('Color primaries', 'color_primaries'),
new Field('Transfer characteristics', 'transfer_characteristics'),
new Field('Matrix coefficients', 'matrix_coefficients'),
new Field('Codec configuration box', 'codec_configuration_box'),
new Field('Format audio', 'format_audio'),
new Field('Format/Info Audio', 'format_info_audio'),
new Field('Bit Rate Audio', 'bit_rate_audio'),
new Field('Bit rate mode audio', 'bit_rate_mode_audio'),
new Field('Channel(s)', 'channels'),
new Field('Channel layout', 'channel_layout'),
new Field('Compression mode', 'compression_mode'),
new Field('Stream size audio', 'stream_size_audio'),
new Field('Subjects Reference ID: Ethnologue', 'subject_reference_id_ethnologue')
];
14 changes: 14 additions & 0 deletions ia-install.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
#!/bin/bash

for file in ia-*; do
filename=`echo $file | cut -d'.' -f 1`
chmod 755 $file
cp $file /usr/local/bin/$filename
printf "Installing $file to /usr/local/bin/$filename.\n"
done

# Install the node script for ia-csv-creator globally
npm link
ln -sf /usr/local/lib/node_modules/ia-csv-creator/createInternetArchiveCsv.js /usr/local/bin/ia-csv-creator

echo "Done!"
53 changes: 53 additions & 0 deletions ia-prepare.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
#!/bin/bash

# if [ -z "$1" ]; then
# printf "Usage: $ ia-prepare <directory name>\nPlease make sure you reference a desired oral history directory to prepare.\n"
# exit 1
# fi

if ! [[ -f ~/ia-config ]]; then
echo "Couldn't find ia-config."
exit 1
fi

month=''
year=''
while getopts 'm:y:' flag; do
case "${flag}" in
m) month="${OPTARG}" ;;
y) year="${OPTARG}" ;;
*) echo "Invalid flag ${flag}"
exit 1 ;;
esac
done

if ! [ -z $year ]; then
# Year flag provided
if ! [[ $year =~ ^[0-9]{4}$ ]]; then
echo "Invalid year $year"
exit 1
fi

if ! [ -z $month ]; then
# Month flag provided
if ! [[ $month =~ ^0[1-9]|1[0-2]$ ]]; then
echo "Invalid month $month"
echo "Must be 01-12"
exit 1
fi
fi
elif ! [ -z $month ]; then
echo "Must set year, e.g. $ loc -m 10 -y 2020"
exit 1
else
directories=$@
fi

source ~/ia-config
> ~/ia-log

cd $IA_Staging

APIKEY=$IA_AIRTABLE_APIKEY BASE=$IA_AIRTABLE_BASE IA_STAGING=$IA_Staging ia-csv-creator $year $month >> ~/ia-log 2>&1

echo "Done!"
Loading

0 comments on commit 7f92b67

Please sign in to comment.