-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
8 changed files
with
506 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,74 @@ | ||
module.exports = [ | ||
'Identifier', | ||
'Title', | ||
'Creator', | ||
'Description', | ||
'Subject: Top level genealogy per language', | ||
'Subject: Language Continent of Origin', | ||
'Subject: Language Nation of Origin', | ||
'Subject: Speaker Genders', | ||
'Contributor: Speakers', | ||
'Contributor: Caption Authors', | ||
'Contributor: Videographer', | ||
'Contributor: Description', | ||
'Date Created', | ||
'Type', | ||
'Format', | ||
'Language names', | ||
'Languages: Speaker preferred names', | ||
'Languages: ISO Code (639-3)', | ||
'Languages: Glottocode', | ||
'Languages: Dialect Glottocode', | ||
'Languages: Macrolanguage ISO Code', | ||
'Caption Languages', | ||
'Caption Languages: ISO Code (639-6)', | ||
'Caption Languages: Glottocode', | ||
'Caption File Identifier', | ||
'Caption File Links', | ||
'Coverage: Video Nation', | ||
'Coverage: Video Territory', | ||
'Coverage: Distribution', | ||
'Rights', | ||
'Publisher', | ||
'Date Received', | ||
'Encoded Data', | ||
'Tagged Data', | ||
'Duration', | ||
'Format T', | ||
'Format Profile', | ||
'Codec ID', | ||
'File size', | ||
'Format Info', | ||
'Format Settings', | ||
'Format Settings CABAC', | ||
'Format Settings ReFrames', | ||
'Codec ID/Info', | ||
'Bit rate', | ||
'Width', | ||
'Height', | ||
'Display Aspect Ratio', | ||
'Frame Rate', | ||
'Standard', | ||
'Color Space', | ||
'Chroma Subsampling', | ||
'Bit Depth', | ||
'Scan Type', | ||
'Bits (Pixel*Frame)', | ||
'Stream size', | ||
'Color range', | ||
'Color primaries', | ||
'Transfer characteristics', | ||
'Matrix coefficients', | ||
'Codec configuration box', | ||
'Format audio', | ||
'Format/Info Audio', | ||
'Bit Rate Audio', | ||
'Bit rate mode audio', | ||
'Codec ID Audio', | ||
'Channel(s)', | ||
'Channel layout', | ||
'Compression mode', | ||
'Sampling rate', | ||
'Stream size audio', | ||
'Subjects Reference ID: Ethnologue' | ||
]; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
#!/usr/bin/env node | ||
|
||
'use strict'; | ||
var Airtable = require('airtable'); | ||
const createCsvWriter = require('csv-writer').createObjectCsvWriter; | ||
const fs = require('fs'); | ||
const { argv } = require('process'); | ||
|
||
const fields = require('./fields'); | ||
const airtableFields = require('./airtableFields'); | ||
|
||
const year = argv[2]; | ||
const month = argv[3]; | ||
|
||
let formula; | ||
if (year) { | ||
if (month) { | ||
formula = `FIND("_${year}${month}",Identifier)>0`; | ||
} else { | ||
formula = `FIND("_${year}",Identifier)>0`; | ||
} | ||
} | ||
|
||
async function getRecords () { | ||
return new Promise(resolve => { | ||
const allRecords = []; | ||
try { | ||
var base = new Airtable({apiKey: process.env.APIKEY}).base(process.env.BASE); | ||
|
||
base('🍩 Oral Histories').select({ | ||
view: "Archival View (Comprehensive)", | ||
cellFormat: "string", | ||
timeZone: "America/New_York", | ||
userLocale: "en-ca", | ||
filterByFormula: formula, | ||
fields: airtableFields | ||
}).eachPage(function page(records, fetchNextPage) { | ||
allRecords.push(...records); | ||
fetchNextPage(); | ||
}, function done(err) { | ||
if (err) { | ||
console.error(err); | ||
return process.exit(1); | ||
} else { | ||
resolve(allRecords); | ||
} | ||
}); | ||
} catch (e) { | ||
console.error(e); | ||
return process.exit(1); | ||
} | ||
}); | ||
} | ||
|
||
function getCsvRow (record) { | ||
return fields.reduce((row, field) => { | ||
row[field.name] = field.getValue(record); | ||
return row; | ||
}, {}); | ||
} | ||
|
||
const stagedDirectories = new Set(fs.readdirSync(process.env.IA_STAGING)); | ||
|
||
getRecords().then(airtableRecords => { | ||
const rows = airtableRecords | ||
.filter(record => stagedDirectories.has(record.get('Identifier'))) | ||
.map(getCsvRow); | ||
|
||
const header = fields.map(field => ({id: field.name, title: field.name})); | ||
|
||
const csvWriter = createCsvWriter({ | ||
path: 'upload.csv', | ||
header: header | ||
}); | ||
|
||
return csvWriter.writeRecords(rows); | ||
}).then(() => { | ||
console.log('Done!'); | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
const baseMapper = field => record => record.get(field) || ''; | ||
|
||
module.exports = class Field { | ||
constructor (wtField, iaField, mapper) { | ||
this.wtField = wtField; | ||
this.name = iaField; | ||
this.getValue = mapper || baseMapper(wtField); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,125 @@ | ||
const Field = require('./field'); | ||
|
||
const fileMapper = record => { | ||
const id = record.get('Identifier'); | ||
return `./${id}/data/${id}__video_edited.mp4` | ||
}; | ||
|
||
const dateMapper = record => { | ||
return record.get('Date Created') || record.get('Date Received') || ''; | ||
}; | ||
|
||
const coverageMapper = record => { | ||
const territory = record.get('Coverage: Video Territory'); | ||
// e.g. New-York_US | ||
// IA coverage should be the ISO 3166 2-letter country code | ||
if (!territory) { | ||
return ''; | ||
} | ||
const match = territory.match(/_([A-Z]{2})$/); | ||
if (match.length < 2) { | ||
return ''; | ||
} | ||
return match[1]; | ||
}; | ||
|
||
const creatorMapper = record => { | ||
const creator = record.get('Creator'); | ||
// e.g. Daniel_BogreUdell_20130101 => Daniel Bogre Udell | ||
// Can also be a list of id's | ||
if (!creator) { | ||
return ''; | ||
} | ||
|
||
const formatter = individual => individual | ||
.split('_') | ||
.slice(0, -1) | ||
.join(' ') | ||
.replace(/([a-z])([A-Z])/g, (_, p1, p2) => `${p1} ${p2}`); | ||
|
||
return creator | ||
.split(', ') | ||
.map(formatter) | ||
.join(', '); | ||
} | ||
|
||
module.exports = [ | ||
// reserved IA metadata fields | ||
new Field('Identifier', 'identifier', record => `TEST_${record.get('Identifier').replace('+', '-')}`), | ||
new Field(null, 'file', fileMapper), | ||
new Field('Title', 'title'), | ||
new Field(null, 'creator', creatorMapper), | ||
new Field(null, 'date', dateMapper), | ||
new Field('Description', 'description'), | ||
new Field('Display Aspect Ratio', 'aspect_ratio'), | ||
new Field('Codec ID Audio', 'audio_codec'), | ||
new Field('Sampling rate', 'audio_sample_rate'), | ||
new Field(null, 'coverage', coverageMapper), | ||
new Field('Frame Rate', 'frames_per_second'), | ||
new Field('Language names', 'language'), | ||
new Field(null, 'mediatype', () => 'movies'), | ||
new Field(null, 'publisher', () => 'Wikitongues, Inc.'), | ||
new Field('Rights', 'rights'), | ||
new Field('Duration', 'runtime'), | ||
new Field('Height', 'source_pixel_height'), | ||
new Field('Width', 'source_pixel_width'), | ||
// Wikitongues fields | ||
new Field('Subject: Top level genealogy per language', 'subject_top_level_genealogy_per_language'), | ||
new Field('Subject: Language Continent of Origin', 'subject_language_continent_of_origin'), | ||
new Field('Subject: Language Nation of Origin', 'subject_language_nation_of_origin'), | ||
new Field('Subject: Speaker Genders', 'subject_speaker_genders'), | ||
new Field('Contributor: Speakers', 'contributor_speakers'), | ||
new Field('Contributor: Caption Authors', 'contributor_caption_authors'), | ||
new Field('Contributor: Videographer', 'contributor_videographer'), | ||
new Field('Contributor: Description', 'contributor_description'), | ||
new Field('Date Created', 'date_created'), | ||
new Field('Type', 'type'), | ||
new Field('Format', 'format'), | ||
new Field('Languages: Speaker preferred names', 'languages_speaker_preferred_names'), | ||
new Field('Languages: ISO Code (639-3)', 'languages_iso_code'), | ||
new Field('Languages: Glottocode', 'languages_glottocode'), | ||
new Field('Languages: Dialect Glottocode', 'languages_dialect_glottocode'), | ||
new Field('Languages: Macrolanguage ISO Code', 'languages_macrolanguage_iso_code'), | ||
new Field('Caption Languages', 'caption_languages'), | ||
new Field('Caption Languages: ISO Code (639-6)', 'caption_languages_iso_code'), | ||
new Field('Caption Languages: Glottocode', 'caption_languages_glottocode'), | ||
new Field('Caption File Identifier', 'caption_file_identifier'), | ||
new Field('Caption File Links', 'caption_file_links'), | ||
new Field('Coverage: Video Nation', 'coverage_video_nation'), | ||
new Field('Coverage: Video Territory', 'coverage_video_territory'), | ||
new Field('Coverage: Distribution', 'coverage_distribution'), | ||
new Field('Date Received', 'date_received'), | ||
new Field('Encoded Data', 'encoded_data'), | ||
new Field('Tagged Data', 'tagged_data'), | ||
new Field('Format T', 'format_t'), | ||
new Field('Format Profile', 'format_profile'), | ||
new Field('Codec ID', 'codec_id'), | ||
new Field('File size', 'file_size'), | ||
new Field('Format Info', 'format_info'), | ||
new Field('Format Settings', 'format_settings'), | ||
new Field('Format Settings CABAC', 'format_settings_cabac'), | ||
new Field('Format Settings ReFrames', 'format_settings_reframes'), | ||
new Field('Codec ID/Info', 'codec_id_info'), | ||
new Field('Bit rate', 'bit_rate'), | ||
new Field('Standard', 'standard'), | ||
new Field('Color Space', 'color_space'), | ||
new Field('Chroma Subsampling', 'chroma_subsampling'), | ||
new Field('Bit Depth', 'bit_depth'), | ||
new Field('Scan Type', 'scan_type'), | ||
new Field('Bits (Pixel*Frame)', 'bits'), | ||
new Field('Stream size', 'stream_size'), | ||
new Field('Color range', 'color_range'), | ||
new Field('Color primaries', 'color_primaries'), | ||
new Field('Transfer characteristics', 'transfer_characteristics'), | ||
new Field('Matrix coefficients', 'matrix_coefficients'), | ||
new Field('Codec configuration box', 'codec_configuration_box'), | ||
new Field('Format audio', 'format_audio'), | ||
new Field('Format/Info Audio', 'format_info_audio'), | ||
new Field('Bit Rate Audio', 'bit_rate_audio'), | ||
new Field('Bit rate mode audio', 'bit_rate_mode_audio'), | ||
new Field('Channel(s)', 'channels'), | ||
new Field('Channel layout', 'channel_layout'), | ||
new Field('Compression mode', 'compression_mode'), | ||
new Field('Stream size audio', 'stream_size_audio'), | ||
new Field('Subjects Reference ID: Ethnologue', 'subject_reference_id_ethnologue') | ||
]; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
#!/bin/bash | ||
|
||
for file in ia-*; do | ||
filename=`echo $file | cut -d'.' -f 1` | ||
chmod 755 $file | ||
cp $file /usr/local/bin/$filename | ||
printf "Installing $file to /usr/local/bin/$filename.\n" | ||
done | ||
|
||
# Install the node script for ia-csv-creator globally | ||
npm link | ||
ln -sf /usr/local/lib/node_modules/ia-csv-creator/createInternetArchiveCsv.js /usr/local/bin/ia-csv-creator | ||
|
||
echo "Done!" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
#!/bin/bash | ||
|
||
# if [ -z "$1" ]; then | ||
# printf "Usage: $ ia-prepare <directory name>\nPlease make sure you reference a desired oral history directory to prepare.\n" | ||
# exit 1 | ||
# fi | ||
|
||
if ! [[ -f ~/ia-config ]]; then | ||
echo "Couldn't find ia-config." | ||
exit 1 | ||
fi | ||
|
||
month='' | ||
year='' | ||
while getopts 'm:y:' flag; do | ||
case "${flag}" in | ||
m) month="${OPTARG}" ;; | ||
y) year="${OPTARG}" ;; | ||
*) echo "Invalid flag ${flag}" | ||
exit 1 ;; | ||
esac | ||
done | ||
|
||
if ! [ -z $year ]; then | ||
# Year flag provided | ||
if ! [[ $year =~ ^[0-9]{4}$ ]]; then | ||
echo "Invalid year $year" | ||
exit 1 | ||
fi | ||
|
||
if ! [ -z $month ]; then | ||
# Month flag provided | ||
if ! [[ $month =~ ^0[1-9]|1[0-2]$ ]]; then | ||
echo "Invalid month $month" | ||
echo "Must be 01-12" | ||
exit 1 | ||
fi | ||
fi | ||
elif ! [ -z $month ]; then | ||
echo "Must set year, e.g. $ loc -m 10 -y 2020" | ||
exit 1 | ||
else | ||
directories=$@ | ||
fi | ||
|
||
source ~/ia-config | ||
> ~/ia-log | ||
|
||
cd $IA_Staging | ||
|
||
APIKEY=$IA_AIRTABLE_APIKEY BASE=$IA_AIRTABLE_BASE IA_STAGING=$IA_Staging ia-csv-creator $year $month >> ~/ia-log 2>&1 | ||
|
||
echo "Done!" |
Oops, something went wrong.