-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcsv2db.py
93 lines (86 loc) · 2.91 KB
/
csv2db.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import sys
from logging import getLogger
from configparser import ConfigParser
from argparse import ArgumentParser
from datetime import datetime
from urllib.parse import urlparse
import pandas as pd
import numpy as np
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
from zope.sqlalchemy import register
import transaction
from tools import (
is_same,
plain_value,
)
from models import Product
from logger import setup_logging
default_csv_file = 'all.csv'
help_csv_file = 'default ' + default_csv_file
pars = ArgumentParser()
pars.add_argument('conf')
pars.add_argument('--csv-file', default=default_csv_file, help=help_csv_file)
option = pars.parse_args(sys.argv[1:])
setup_logging(option.conf)
log = getLogger(sys.argv[0])
conf = ConfigParser()
conf.read(option.conf)
db_url = conf.get('main', 'db_url')
engine = create_engine(db_url)
factory = sessionmaker(bind=engine)
db_session = factory()
register(db_session)
df = pd.read_csv(option.csv_file)
df = df.replace(np.nan, None, regex=True)
base_q = db_session.query(Product)
update_fields = (
'title', 'price', 'description', 'image', 'shop_name', 'shop_url', 'city',
'hostname', 'stock')
for i in df.index:
url = df['url'][i]
p = urlparse(url)
source = dict(
url=url, title=df['title'][i], price=float(df['price'][i]),
description=df['description'][i], image=df['image'][i],
shop_name=df['shop_name'][i], shop_url=df['shop_url'][i],
city=df['city'][i], hostname=p.netloc, stock=int(df['stock'][i]))
q = base_q.filter_by(url=url)
p = q.first()
target_update = dict()
target_insert = False
log_msg = []
if p:
target = p.to_dict()
for field in update_fields:
source_value = source[field]
target_value = target[field]
if is_same(source_value, target_value):
continue
target_update[field] = source_value
log_source_value = plain_value(source_value)
log_target_value = plain_value(target_value)
msg = '{f} {t} to be {s}'.format(
f=field, t=[log_target_value], s=[log_source_value])
log_msg.append(msg)
if target_update:
target_update['updated'] = datetime.now()
p.from_dict(target_update)
msg = ', '.join(log_msg)
msg = f'{url} UPDATE change {msg}'
log_func = log.info
else:
for field in update_fields:
source_value = source[field]
target_update[field] = plain_value(source_value)
msg = f'{url} already same {target_update}'
log_func = log.warning
else:
p = Product(**source)
target_insert = True
msg = f'INSERT {source}'
log_func = log.info
if target_insert or target_update:
with transaction.manager:
db_session.add(p)
log_func(msg)