Skip to content

Commit

Permalink
ddl generation and Access to Postgresql marshalling with bash utils
Browse files Browse the repository at this point in the history
  • Loading branch information
dylanraithel committed Sep 2, 2015
1 parent 0513905 commit f8ab55f
Show file tree
Hide file tree
Showing 12 changed files with 174 additions and 36 deletions.
10 changes: 8 additions & 2 deletions Vagrantfile
Original file line number Diff line number Diff line change
@@ -1,12 +1,18 @@

# Vagrantfile API/syntax version. Don't touch unless you know what you're doing!
#
# Note:
# edit /etc/apt/sources.list to uncomment the following
# deb http://security.ubuntu.com/ubuntu trusty-security multiverse
# deb-src http://security.ubuntu.com/ubuntu trusty-security multiverse

VAGRANTFILE_API_VERSION = "2"

Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|

config.vm.box = "ubuntu/vivid64"
config.vm.box = "ubuntu/trusty64"
# Change this to be something relevant to your project
config.vm.hostname = "cityscrape"
config.vm.hostname = "city-scrape"

config.vm.provision :shell, inline: "apt-get -y install --fix-missing"
config.vm.provision :shell, inline: "apt-get -y install python"
Expand Down
3 changes: 2 additions & 1 deletion config/cityscrape-config.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,6 @@ export SOURCEFILE_URL='http://dynamic.stlouis-mo.gov/citydata/downloads/'
# temporary file download and extraction before loading into database
export WORKDIR=$BASEDIR/workdir
export DDL_FILES=$WORKDIR/ddl
export $DDL_FILE_SUFFIX=".sql"
export DDL_FILE_SUFFIX=""

export SHAPEFILE_MANIFEST=shp_file.manifest
37 changes: 37 additions & 0 deletions db-ingest.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#!/bin/bash -e

# Something fucky with the postgres configuration
# vagrant@city-scrape:/vagrant$ sudo su
# root@city-scrape:/vagrant# su postgres
# postgres@city-scrape:/vagrant$

CONFIGFILE="config/cityscrape-config.sh"

. $CONFIGFILE

echo "Running Cityscrape PostgreSQL Ingest"
pushd $WORKDIR

for f in *.mdb

do
echo "Extracting tables from $f"

mdb-schema $f postgres | sed 's/Char/Varchar/g' | sed 's/Postgres_Unknown 0x0c/text/g' | psql -U vagrant city -a -f

tables=$(echo -en $(mdb-schema $f postgres | grep "CREATE TABLE" | awk '{ print $3 }' | sed -e 's/"//g');)

for i in $tables

do
echo "[File: "$f" ] [Table - "$i"]"

mdb-export -D ‘%%Y-%%m-%%d %%H:%%M:%%S’ -I postgress -q \’ -R \; $f $i | psql -U vagrant city -w

done

done

# # return to project root $BASEDIR
popd

32 changes: 32 additions & 0 deletions generate-ddl.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#!/bin/bash

CONFIGFILE="config/cityscrape-config.sh"

. $CONFIGFILE

pushd $WORKDIR
mdb_files=$(echo `ls *.mdb 2>/dev/null`)
if [ -z "$mdb_files" ]; then
echo "No *.mdb files found, exiting..."
else
for mdb_file in $mdb_files
do
echo "Extracting tables from $mdb_file"
ddl_file=$mdb_file$DDL_FILE_SUFFIX

mdb-schema $mdb_file | sed 's/Char/Varchar/g' | sed 's/Postgres_Unknown 0x0c/text/g' > ddl/$ddl_file

tables=$(echo -en $(mdb-schema $mdb_file postgres | grep "CREATE TABLE IF NOT EXISTS" | awk '{ print $3 }' | sed -e 's/"//g');)

if [ -z "$tables" ]
then
echo "No tables found, skipping table ddl generation."
else
for table in $tables
do
echo $table > "$table$DDL_FILE_SUFFIX"
done
fi
done
fi
popd
19 changes: 19 additions & 0 deletions generate-schema-from-ddl.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
CONFIGFILE="config/cityscrape-config.sh"

. $CONFIGFILE

pushd $DDL_FILES

files=$(echo `ls *.mdb 2>/dev/null`)

if [[ -z "$files" ]]; then
echo "No Schema Definitions Found, Exiting..."
exit 3
else
for file in $files
do
cat $file | tr -d "[]" > $file.sql
done
fi

popd
27 changes: 27 additions & 0 deletions generate-shapefile-manifest.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#!/bin/bash

CONFIGFILE="config/cityscrape-config.sh"

. $CONFIGFILE

pushd $WORKDIR

if ! [[ -z $SHAPEFILE_MANIFEST ]]; then
rm $SHAPEFILE_MANIFEST
else
echo "Removeing old shapefile manifest"
rm $SHAPEFILE_MANIFEST
fi

shp_files=$(echo `ls *.shp 2>/dev/null`)

if [ -z "$shp_files" ]; then
echo "No *.shp files found, skipping ogr2ogr..."
else
for shp_file in $shp_files;
do
echo ogr2ogr -overwrite -progress -skipfailures -f "PostgreSQL" PG:"host=localhost user=postgres dbname=city" $shp_file >> $SHAPEFILE_MANIFEST
done
fi

popd
File renamed without changes.
4 changes: 2 additions & 2 deletions run-cityscrape-postgresql-ingest.sh
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,9 @@ for f in *.mdb

mdb-export -D ‘%%Y-%%m-%%d %%H:%%M:%%S’ -I postgress -q \’ -R \; $f $i | psql -d city -U postgres -w -h localhost

done
done

done
done

# # return to project root $BASEDIR
popd
Expand Down
23 changes: 23 additions & 0 deletions run-cityscrape.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
#!/bin/bash -e

CONFIGFILE="config/cityscrape-config.sh"

. $CONFIGFILE

# echo "Step 1: Fetching Cityscrape data"
# ./get.sh

# echo "Step 2: Unzipping archives"
# ./unzip.sh

# echo "Step 3: Generating DDL files"
# ./generate-ddl.sh

# echo "Step 4: Generatign Shapefile load commands"
# ./generate-shapefile-manifest.sh

echo "Step 5: Generating Schema from ddl definitions"
./generate-schema-from-ddl.sh

# echo "Step 6: Upload Shapefiles to database"
# ./upload-shapefiles-from-manifest.sh
32 changes: 1 addition & 31 deletions run-ddl-generation.sh
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -4,33 +4,6 @@ CONFIGFILE="config/cityscrape-config.sh"

. $CONFIGFILE

echo "Running Cityscrape PostgreSQL Ingest"

pushd $WORKDIR
echo "Unzipping files..."

zip_files=$(echo `ls *.zip 2>/dev/null`)
if [ -z "$zip_files" ]; then
echo "No *.zip files found, skipping unzip..."
else
for zip_file in $zip_files;
do
unzip -o $zip_file
done
echo "Unzip complete"
fi

shp_files=$(echo `ls *.shp 2>/dev/null`)
if [ -z "$shp_files" ]; then
echo "No *.shp files found, skipping ogr2ogr..."
else
for shp_file in $shp_files;
do
echo `ls $shp_file`
# ogr2ogr -overwrite -progress -skipfailures -f "PostgreSQL" PG:"host=localhost user=postgres dbname=city" $shp_file
done
fi

pushd $DDL_FILES
echo "Building ddl sql files now..."

Expand All @@ -57,7 +30,4 @@ else
done
fi
done
fi

popd

fi
13 changes: 13 additions & 0 deletions unzip.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/bin/bash

CONFIGFILE="config/cityscrape-config.sh"

. $CONFIGFILE

echo "Running Cityscrape PostgreSQL Ingest"

pushd $WORKDIR
echo "Unzipping files..."

echo `ls *.zip` | xargs -n 1 unzip -o
popd
10 changes: 10 additions & 0 deletions upload-shapefiles-from-shapefile-manifest.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/bin/bash
CONFIGFILE="config/cityscrape-config.sh"

. $CONFIGFILE

pushd $WORKDIR
while read line; do
$line
done < "$SHAPEFILE_MANIFEST"
popd

0 comments on commit f8ab55f

Please sign in to comment.