diff --git a/Vagrantfile b/Vagrantfile index 1a61212..f288414 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -1,12 +1,18 @@ # Vagrantfile API/syntax version. Don't touch unless you know what you're doing! +# +# Note: +# edit /etc/apt/sources.list to uncomment the following +# deb http://security.ubuntu.com/ubuntu trusty-security multiverse +# deb-src http://security.ubuntu.com/ubuntu trusty-security multiverse + VAGRANTFILE_API_VERSION = "2" Vagrant.configure(VAGRANTFILE_API_VERSION) do |config| - config.vm.box = "ubuntu/vivid64" + config.vm.box = "ubuntu/trusty64" # Change this to be something relevant to your project - config.vm.hostname = "cityscrape" + config.vm.hostname = "city-scrape" config.vm.provision :shell, inline: "apt-get -y install --fix-missing" config.vm.provision :shell, inline: "apt-get -y install python" diff --git a/config/cityscrape-config.sh b/config/cityscrape-config.sh index 43264dc..56d9774 100644 --- a/config/cityscrape-config.sh +++ b/config/cityscrape-config.sh @@ -19,5 +19,6 @@ export SOURCEFILE_URL='http://dynamic.stlouis-mo.gov/citydata/downloads/' # temporary file download and extraction before loading into database export WORKDIR=$BASEDIR/workdir export DDL_FILES=$WORKDIR/ddl -export $DDL_FILE_SUFFIX=".sql" +export DDL_FILE_SUFFIX="" +export SHAPEFILE_MANIFEST=shp_file.manifest \ No newline at end of file diff --git a/db-ingest.sh b/db-ingest.sh new file mode 100755 index 0000000..704e7b2 --- /dev/null +++ b/db-ingest.sh @@ -0,0 +1,37 @@ +#!/bin/bash -e + +# Something fucky with the postgres configuration +# vagrant@city-scrape:/vagrant$ sudo su +# root@city-scrape:/vagrant# su postgres +# postgres@city-scrape:/vagrant$ + +CONFIGFILE="config/cityscrape-config.sh" + +. $CONFIGFILE + +echo "Running Cityscrape PostgreSQL Ingest" +pushd $WORKDIR + +for f in *.mdb + + do + echo "Extracting tables from $f" + + mdb-schema $f postgres | sed 's/Char/Varchar/g' | sed 's/Postgres_Unknown 0x0c/text/g' | psql -U vagrant city -a -f + + tables=$(echo -en $(mdb-schema $f postgres | grep "CREATE TABLE" | awk '{ print $3 }' | sed -e 's/"//g');) + + for i in $tables + + do + echo "[File: "$f" ] [Table - "$i"]" + + mdb-export -D ‘%%Y-%%m-%%d %%H:%%M:%%S’ -I postgress -q \’ -R \; $f $i | psql -U vagrant city -w + + done + + done + +# # return to project root $BASEDIR +popd + diff --git a/generate-ddl.sh b/generate-ddl.sh new file mode 100755 index 0000000..f3e8bd8 --- /dev/null +++ b/generate-ddl.sh @@ -0,0 +1,32 @@ +#!/bin/bash + +CONFIGFILE="config/cityscrape-config.sh" + +. $CONFIGFILE + +pushd $WORKDIR +mdb_files=$(echo `ls *.mdb 2>/dev/null`) +if [ -z "$mdb_files" ]; then + echo "No *.mdb files found, exiting..." +else + for mdb_file in $mdb_files + do + echo "Extracting tables from $mdb_file" + ddl_file=$mdb_file$DDL_FILE_SUFFIX + + mdb-schema $mdb_file | sed 's/Char/Varchar/g' | sed 's/Postgres_Unknown 0x0c/text/g' > ddl/$ddl_file + + tables=$(echo -en $(mdb-schema $mdb_file postgres | grep "CREATE TABLE IF NOT EXISTS" | awk '{ print $3 }' | sed -e 's/"//g');) + + if [ -z "$tables" ] + then + echo "No tables found, skipping table ddl generation." + else + for table in $tables + do + echo $table > "$table$DDL_FILE_SUFFIX" + done + fi + done +fi +popd \ No newline at end of file diff --git a/generate-schema-from-ddl.sh b/generate-schema-from-ddl.sh new file mode 100755 index 0000000..bf83c60 --- /dev/null +++ b/generate-schema-from-ddl.sh @@ -0,0 +1,19 @@ +CONFIGFILE="config/cityscrape-config.sh" + +. $CONFIGFILE + +pushd $DDL_FILES + +files=$(echo `ls *.mdb 2>/dev/null`) + +if [[ -z "$files" ]]; then + echo "No Schema Definitions Found, Exiting..." + exit 3 +else + for file in $files + do + cat $file | tr -d "[]" > $file.sql + done +fi + +popd \ No newline at end of file diff --git a/generate-shapefile-manifest.sh b/generate-shapefile-manifest.sh new file mode 100755 index 0000000..f7bba98 --- /dev/null +++ b/generate-shapefile-manifest.sh @@ -0,0 +1,27 @@ +#!/bin/bash + +CONFIGFILE="config/cityscrape-config.sh" + +. $CONFIGFILE + +pushd $WORKDIR + +if ! [[ -z $SHAPEFILE_MANIFEST ]]; then + rm $SHAPEFILE_MANIFEST +else + echo "Removeing old shapefile manifest" + rm $SHAPEFILE_MANIFEST +fi + +shp_files=$(echo `ls *.shp 2>/dev/null`) + +if [ -z "$shp_files" ]; then + echo "No *.shp files found, skipping ogr2ogr..." +else + for shp_file in $shp_files; + do + echo ogr2ogr -overwrite -progress -skipfailures -f "PostgreSQL" PG:"host=localhost user=postgres dbname=city" $shp_file >> $SHAPEFILE_MANIFEST + done +fi + +popd \ No newline at end of file diff --git a/run-cityscrape-get.sh b/get.sh similarity index 100% rename from run-cityscrape-get.sh rename to get.sh diff --git a/run-cityscrape-postgresql-ingest.sh b/run-cityscrape-postgresql-ingest.sh old mode 100755 new mode 100644 index b775787..7d4db2d --- a/run-cityscrape-postgresql-ingest.sh +++ b/run-cityscrape-postgresql-ingest.sh @@ -40,9 +40,9 @@ for f in *.mdb mdb-export -D ‘%%Y-%%m-%%d %%H:%%M:%%S’ -I postgress -q \’ -R \; $f $i | psql -d city -U postgres -w -h localhost - done + done -done + done # # return to project root $BASEDIR popd diff --git a/run-cityscrape.sh b/run-cityscrape.sh new file mode 100755 index 0000000..7103ac8 --- /dev/null +++ b/run-cityscrape.sh @@ -0,0 +1,23 @@ +#!/bin/bash -e + +CONFIGFILE="config/cityscrape-config.sh" + +. $CONFIGFILE + +# echo "Step 1: Fetching Cityscrape data" +# ./get.sh + +# echo "Step 2: Unzipping archives" +# ./unzip.sh + +# echo "Step 3: Generating DDL files" +# ./generate-ddl.sh + +# echo "Step 4: Generatign Shapefile load commands" +# ./generate-shapefile-manifest.sh + +echo "Step 5: Generating Schema from ddl definitions" +./generate-schema-from-ddl.sh + +# echo "Step 6: Upload Shapefiles to database" +# ./upload-shapefiles-from-manifest.sh \ No newline at end of file diff --git a/run-ddl-generation.sh b/run-ddl-generation.sh old mode 100755 new mode 100644 index 1aa86f8..b19abee --- a/run-ddl-generation.sh +++ b/run-ddl-generation.sh @@ -4,33 +4,6 @@ CONFIGFILE="config/cityscrape-config.sh" . $CONFIGFILE -echo "Running Cityscrape PostgreSQL Ingest" - -pushd $WORKDIR -echo "Unzipping files..." - -zip_files=$(echo `ls *.zip 2>/dev/null`) -if [ -z "$zip_files" ]; then - echo "No *.zip files found, skipping unzip..." -else - for zip_file in $zip_files; - do - unzip -o $zip_file - done - echo "Unzip complete" -fi - -shp_files=$(echo `ls *.shp 2>/dev/null`) -if [ -z "$shp_files" ]; then - echo "No *.shp files found, skipping ogr2ogr..." -else - for shp_file in $shp_files; - do - echo `ls $shp_file` - # ogr2ogr -overwrite -progress -skipfailures -f "PostgreSQL" PG:"host=localhost user=postgres dbname=city" $shp_file - done -fi - pushd $DDL_FILES echo "Building ddl sql files now..." @@ -57,7 +30,4 @@ else done fi done -fi - -popd - +fi \ No newline at end of file diff --git a/unzip.sh b/unzip.sh new file mode 100755 index 0000000..63c9b40 --- /dev/null +++ b/unzip.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +CONFIGFILE="config/cityscrape-config.sh" + +. $CONFIGFILE + +echo "Running Cityscrape PostgreSQL Ingest" + +pushd $WORKDIR +echo "Unzipping files..." + +echo `ls *.zip` | xargs -n 1 unzip -o +popd \ No newline at end of file diff --git a/upload-shapefiles-from-shapefile-manifest.sh b/upload-shapefiles-from-shapefile-manifest.sh new file mode 100755 index 0000000..fbaab37 --- /dev/null +++ b/upload-shapefiles-from-shapefile-manifest.sh @@ -0,0 +1,10 @@ +#!/bin/bash +CONFIGFILE="config/cityscrape-config.sh" + +. $CONFIGFILE + +pushd $WORKDIR +while read line; do + $line +done < "$SHAPEFILE_MANIFEST" +popd \ No newline at end of file