Skip to content

Commit

Permalink
Merge pull request #7 from hopcity/ddl_generation
Browse files Browse the repository at this point in the history
Ddl generation
  • Loading branch information
hopcity authored Apr 29, 2017
2 parents 0513905 + 74718f4 commit afa908e
Show file tree
Hide file tree
Showing 15 changed files with 292 additions and 36 deletions.
10 changes: 8 additions & 2 deletions Vagrantfile
Original file line number Diff line number Diff line change
@@ -1,12 +1,18 @@

# Vagrantfile API/syntax version. Don't touch unless you know what you're doing!
#
# Note:
# edit /etc/apt/sources.list to uncomment the following
# deb http://security.ubuntu.com/ubuntu trusty-security multiverse
# deb-src http://security.ubuntu.com/ubuntu trusty-security multiverse

VAGRANTFILE_API_VERSION = "2"

Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|

config.vm.box = "ubuntu/vivid64"
config.vm.box = "ubuntu/trusty64"
# Change this to be something relevant to your project
config.vm.hostname = "cityscrape"
config.vm.hostname = "city-scrape"

config.vm.provision :shell, inline: "apt-get -y install --fix-missing"
config.vm.provision :shell, inline: "apt-get -y install python"
Expand Down
3 changes: 2 additions & 1 deletion config/cityscrape-config.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,6 @@ export SOURCEFILE_URL='http://dynamic.stlouis-mo.gov/citydata/downloads/'
# temporary file download and extraction before loading into database
export WORKDIR=$BASEDIR/workdir
export DDL_FILES=$WORKDIR/ddl
export $DDL_FILE_SUFFIX=".sql"
export DDL_FILE_SUFFIX=""

export SHAPEFILE_MANIFEST=shp_file.manifest
37 changes: 37 additions & 0 deletions db-ingest.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#!/bin/bash -e

# Something fucky with the postgres configuration
# vagrant@city-scrape:/vagrant$ sudo su
# root@city-scrape:/vagrant# su postgres
# postgres@city-scrape:/vagrant$

CONFIGFILE="config/cityscrape-config.sh"

. $CONFIGFILE

echo "Running Cityscrape PostgreSQL Ingest"
pushd $WORKDIR

for f in *.mdb

do
echo "Extracting tables from $f"

mdb-schema $f postgres | sed 's/Char/Varchar/g' | sed 's/Postgres_Unknown 0x0c/text/g' | psql -U vagrant city -a -f

tables=$(echo -en $(mdb-schema $f postgres | grep "CREATE TABLE" | awk '{ print $3 }' | sed -e 's/"//g');)

for i in $tables

do
echo "[File: "$f" ] [Table - "$i"]"

mdb-export -D ‘%%Y-%%m-%%d %%H:%%M:%%S’ -I postgress -q \’ -R \; $f $i | psql -U vagrant city -w

done

done

# # return to project root $BASEDIR
popd

32 changes: 32 additions & 0 deletions generate-ddl.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
#!/bin/bash

CONFIGFILE="config/cityscrape-config.sh"

. $CONFIGFILE

pushd $WORKDIR
mdb_files=$(echo `ls *.mdb 2>/dev/null`)
if [ -z "$mdb_files" ]; then
echo "No *.mdb files found, exiting..."
else
for mdb_file in $mdb_files
do
echo "Extracting tables from $mdb_file"
ddl_file=$mdb_file$DDL_FILE_SUFFIX

mdb-schema $mdb_file | sed 's/Char/Varchar/g' | sed 's/Postgres_Unknown 0x0c/text/g' > ddl/$ddl_file

tables=$(echo -en $(mdb-schema $mdb_file postgres | grep "CREATE TABLE IF NOT EXISTS" | awk '{ print $3 }' | sed -e 's/"//g');)

if [ -z "$tables" ]
then
echo "No tables found, skipping table ddl generation."
else
for table in $tables
do
echo $table > "$table$DDL_FILE_SUFFIX"
done
fi
done
fi
popd
33 changes: 33 additions & 0 deletions generate-schema-from-ddl.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
CONFIGFILE="config/cityscrape-config.sh"

. $CONFIGFILE

pushd $DDL_FILES

mdb_files=$(echo `ls *.mdb 2>/dev/null`)

# Create SQL files out of mdb files
if [[ -z "$mdb_files" ]]; then
echo "No MDB Schema Definitions Found, Exiting..."
exit 3
else
for mdb in $mdb_files
do
cat $mdb | tr -d "[]" > $mdb.sql
done
fi

# Pass SQL files to psql and execute against db
sql_files=$(echo `ls *postgres.sql 2>/dev/null`)
if [[ -z "$sql_files" ]]; then
echo "No Postgres Schema DDL, Exiting..."
exit 3
else
for sql in $sql_files
do
psql -U vagrant -d city -a -w -f $sql
echo "[File: $sql] Completed Succesfully"
done
fi

popd
27 changes: 27 additions & 0 deletions generate-shapefile-manifest.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
#!/bin/bash

CONFIGFILE="config/cityscrape-config.sh"

. $CONFIGFILE

pushd $WORKDIR

if ! [[ -z $SHAPEFILE_MANIFEST ]]; then
rm $SHAPEFILE_MANIFEST
else
echo "Removeing old shapefile manifest"
rm $SHAPEFILE_MANIFEST
fi

shp_files=$(echo `ls *.shp 2>/dev/null`)

if [ -z "$shp_files" ]; then
echo "No *.shp files found, skipping ogr2ogr..."
else
for shp_file in $shp_files;
do
echo ogr2ogr -overwrite -progress -skipfailures -f "PostgreSQL" PG:"host=localhost user=postgres dbname=city" $shp_file >> $SHAPEFILE_MANIFEST
done
fi

popd
File renamed without changes.
16 changes: 16 additions & 0 deletions marshall-datatypes.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
#!/bin/bash

CONFIGFILE="config/cityscrape-config.sh"

. $CONFIGFILE

pushd $DDL_FILES

sql=$(echo `ls *b.sql 2>/dev/null`)

if [[ -z "$sql" ]]; then
echo "No DDL files found for marshalling, exiting..."
exit 3
else
echo $sql | xargs -0 python ../../src/marshall.py
fi
4 changes: 2 additions & 2 deletions run-cityscrape-postgresql-ingest.sh
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,9 @@ for f in *.mdb

mdb-export -D ‘%%Y-%%m-%%d %%H:%%M:%%S’ -I postgress -q \’ -R \; $f $i | psql -d city -U postgres -w -h localhost

done
done

done
done

# # return to project root $BASEDIR
popd
Expand Down
26 changes: 26 additions & 0 deletions run-cityscrape.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
#!/bin/bash -e

CONFIGFILE="config/cityscrape-config.sh"

. $CONFIGFILE

echo "Step 1: Fetching Cityscrape data"
./get.sh

echo "Step 2: Unzipping archives"
./unzip.sh

echo "Step 3: Generating DDL files"
./generate-ddl.sh

echo "Step 4: Generatign Shapefile load commands"
./generate-shapefile-manifest.sh

echo "Step 5: DataType Marshalling with RegEx"
./marshall-datatypes.sh

echo "Step 5: Generating Schema from ddl definitions"
./generate-schema-from-ddl.sh

# echo "Step 6: Upload Shapefiles to database"
# ./upload-shapefiles-from-manifest.sh
32 changes: 1 addition & 31 deletions run-ddl-generation.sh
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -4,33 +4,6 @@ CONFIGFILE="config/cityscrape-config.sh"

. $CONFIGFILE

echo "Running Cityscrape PostgreSQL Ingest"

pushd $WORKDIR
echo "Unzipping files..."

zip_files=$(echo `ls *.zip 2>/dev/null`)
if [ -z "$zip_files" ]; then
echo "No *.zip files found, skipping unzip..."
else
for zip_file in $zip_files;
do
unzip -o $zip_file
done
echo "Unzip complete"
fi

shp_files=$(echo `ls *.shp 2>/dev/null`)
if [ -z "$shp_files" ]; then
echo "No *.shp files found, skipping ogr2ogr..."
else
for shp_file in $shp_files;
do
echo `ls $shp_file`
# ogr2ogr -overwrite -progress -skipfailures -f "PostgreSQL" PG:"host=localhost user=postgres dbname=city" $shp_file
done
fi

pushd $DDL_FILES
echo "Building ddl sql files now..."

Expand All @@ -57,7 +30,4 @@ else
done
fi
done
fi

popd

fi
Empty file added src/__init__.py
Empty file.
85 changes: 85 additions & 0 deletions src/marshall.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
'''
@author dylan.raithel
@date 2 Sep 2015
"Marshall" the Access types to Postgresql
'''

import re
import sys


def test_typeMap():
# from marshall import typeMap

file_name = '../workdir/ddl/prcl.mdb.sql'

mapper = typeMap()

mapper.handle_file(file_name)


class typeMap(object):

SUFFIX = 'postgres.sql'
PATH = '../workdir/ddl/'

def __init__(self, file_name):

self.file_name = file_name

def convert_file(self):
'''
Iterate over all the ddl files in the working directory that
need typeMap conversion
'''
self._handle_file(self.file_name)

def _handle_file(self, file_name):

self.mapper = self._access_to_postgres()
filepath = self.PATH + file_name
with open(filepath, 'r') as raw:
sqlmap = dict((
re.escape(k), v) for k, v in self.mapper.iteritems())

pattern = re.compile("|".join(sqlmap.keys()))

text_stream = raw.read()

text = pattern.sub(
lambda m: sqlmap[re.escape(m.group(0))], text_stream)

newfilename = '{}_{}'.format(file_name, self.SUFFIX)

with open(newfilename, 'w') as newfile:
newfile.write(text)

def _access_to_postgres(self):
'''
Return a map of MSsql data types to Posqgresql
'''
dictmap = {"Double": "Varchar", "Integer": "Varchar",
"Byte": "Varchar", "Text (4)": "Varchar",
"Long Integer": "Varchar", "DateTime": "Varchar",
"Boolean NOT NULL": "Varchar",
"Text (2)": "Varchar", "Single": "Varchar",
"Double": "Varchar", "Text (22)": "Varchar",
"Text (8)": "Varchar", "Text (80)": "Varchar",
"Text (26)": "Varchar", "Text (18)": "Varchar",
"Currency": "Varchar"}
return dictmap

def main():

file_name = sys.argv[1]

mapper = typeMap(file_name)

mapper.convert_file()


if __name__ == '__main__':
main()
13 changes: 13 additions & 0 deletions unzip.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
#!/bin/bash

CONFIGFILE="config/cityscrape-config.sh"

. $CONFIGFILE

echo "Running Cityscrape PostgreSQL Ingest"

pushd $WORKDIR
echo "Unzipping files..."

echo `ls *.zip` | xargs -n 1 unzip -o
popd
10 changes: 10 additions & 0 deletions upload-shapefiles-from-shapefile-manifest.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
#!/bin/bash
CONFIGFILE="config/cityscrape-config.sh"

. $CONFIGFILE

pushd $WORKDIR
while read line; do
$line
done < "$SHAPEFILE_MANIFEST"
popd

0 comments on commit afa908e

Please sign in to comment.