-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathget_data.py
61 lines (53 loc) · 2.18 KB
/
get_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#! python3
# Function to get SAP job data from various sources
from os import path
from datetime import datetime
import pandas as pd
today = datetime.today().strftime('%Y-%m-%d')
def get_data(job, source, date = today):
# Get data from https://jobs.sap.com/ using web scraper
if source == 'scrape':
from get_jobs import get_jobs
data = get_jobs(job)
return pd.DataFrame(data[1:], columns = data[0])
# Get data from RESTful API (Flask) - start webservice before running
elif source == 'api':
print('Fetching results...') # display text while downloading
import requests, json
job = job.strip().replace(' ', '%20')
res = requests.get('http://localhost:5000/SAP/api/v1.0/jobs/{}'.format(job))
data = json.loads(res.text)
print('Done.')
print('Found {} results.'.format(len(data) - 1))
return pd.DataFrame(data[1:], columns = data[0])
# Get data from MySQL database
elif source == 'mysql':
import mysql.connector as sqldb
# Connection to DB
job_db = sqldb.connect(
host = 'localhost',
user = 'root',
passwd = '',
database = 'SAP_jobs'
)
cursor = job_db.cursor()
# Fetch all tables (format: 'job@date') in database
cursor.execute("SHOW TABLES")
tables = [table_name for (table_name,) in cursor.fetchall()]
job_date = job.strip().replace(' ', '_') + '@' + date
# Fetch new data if not yet in database and more than one day old
if date == today and job_date not in tables:
from get_jobs import get_jobs_to_db
get_jobs_to_db(job)
elif job_date not in tables:
raise Exception("No job with this date in the database.")
return pd.read_sql('SELECT * FROM `{}`'.format(job_date), con = job_db)
# Get data from csv file
elif source == 'csv':
filename = '{}.csv'.format(job)
if path.isfile('./' + filename):
return pd.read_csv(filename)
else:
raise Exception("No such file.")
else:
raise Exception("Source options are 'scrape', 'api', 'mysql', 'csv'.")