-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathbigxmlcsv.py
More file actions
63 lines (53 loc) · 2.09 KB
/
Copy pathbigxmlcsv.py
File metadata and controls
63 lines (53 loc) · 2.09 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
__author__ = 'nbortolotti'
import csv
import cStringIO
from lxml import etree
class xmlcsv:
def __init__(self, input_file, output_file, row, schema):
xml_file = open(input_file, "r")
col = schema
resultado = []
for _, element in etree.iterparse(xml_file, tag=row):
rows = list(element.iter('row'))
for row_element in rows:
row_tmp = []
a = 0
while a < len(col):
row_tmp.insert(a, row_element.get(col[a]))
#print (row_element.get(col[a]))
a += 1
resultado.append(dict(zip(col, row_tmp)))
test_file = open(output_file, 'wb')
writer = UnicodeDictWriter(test_file, col)
writer.writerows(resultado)
test_file.close()
class UnicodeWriter(object):
def __init__(self, f, dialect=csv.excel_tab, encoding="utf-16", **kwds):
# Redirect output to a queue
self.queue = cStringIO.StringIO()
self.writer = csv.writer(self.queue, dialect=dialect, **kwds)
self.stream = f
self.encoding = encoding
def writerow(self, row):
# Modified from original: now using unicode(s) to deal with e.g. ints
self.writer.writerow([unicode(s).encode("utf-8") for s in row])
# Fetch UTF-8 output from the queue ...
data = self.queue.getvalue()
data = data.decode("utf-8")
# ... and reencode it into the target encoding
data = data.encode(self.encoding)
# write to the target stream
self.stream.write(data)
# empty queue
self.queue.truncate(0)
def writerows(self, rows):
for row in rows:
self.writerow(row)
class UnicodeDictWriter(UnicodeWriter):
def __init__(self, f, fields, dialect=csv.excel,
encoding="utf-8", **kwds):
super(UnicodeDictWriter, self).__init__(f, dialect, encoding, **kwds)
self.fields = fields
def writerow(self, drow):
row = [drow.get(field, '') for field in self.fields]
super(UnicodeDictWriter, self).writerow(row)