-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathxlsx_dataextract_SOC.py
94 lines (74 loc) · 3.53 KB
/
xlsx_dataextract_SOC.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
"""
This program will read all xlsx files, search all sheets for the columns:
- SOC
- Voltage
- Current
- Temperature
When a columns has been found, it will be added to the respective list.
If a column is not found any where in the file, the file will be skipped, and a message will be printed to the console.
If all columns are successfully found, they will be added to a csv file, until the csv file has >= 100000 rows.
"""
import pandas as pd
import os
import csv
from scipy.io import loadmat
# Get all the excel files in the current directory
excel_files = [f for f in os.listdir('Data/philcar') if f.endswith('.xlsx')]
filename = 'Data/phil_socdata_train.csv'
# Open the csv file and write the column names
with open(filename, 'w', newline='') as csvfile:
writer = csv.writer(csvfile, delimiter=',')
writer.writerow(['SOC', 'V', 'I', 'T','P','V_avg_five','V_avg_one','I_avg'])
# Loop through all the excel files
for excel_file in excel_files:
# skip the excel file with name Phil_DC_93_10degC.xlsx as it will be used as test data
if excel_file == 'Phil_DC_93_10degC.xlsx' or excel_file == 'Phil_DC_86_10degC.xlsx':
continue
# Read the excel file into a pandas dataframe
df = pd.read_excel("Data/philcar/"+excel_file, sheet_name=None)
time = []
soc = []
v = []
i = []
t = []
v_avg_five = []
v_avg_one = []
i_avg = []
= []
# gather the values from the dataframe if they are not NaN
for sheet in df:
if 'HV Battery SOC [%]' in df[sheet].columns:
soc = df[sheet]['HV Battery SOC [%]']
if 'HV Battery Voltage [V]' in df[sheet].columns:
v = df[sheet]['HV Battery Voltage [V]']
if 'HV Battery Current [A]' in df[sheet].columns:
i = df[sheet]['HV Battery Current [A]']
if 'OAT [degC]' in df[sheet].columns:
t = df[sheet]['OAT [degC]']
if soc.empty or v.empty or i.empty or t.empty:
print('Error: NaN value in file ' + excel_file)
continue
else:
# print the current iteration and the file name
print(str(excel_files.index(excel_file)) + ': Found all columns in file:' + excel_file)
# iterate through the lists and write the data to the csv file, including the timestep of 1s using pandas
for x in range(len(soc)):
# double check that the data is not NaN
if soc[x] == soc[x] and v[x] == v[x] and i[x] == i[x] and t[x] == t[x]:
# v_avg_five is the average voltage of the 500 previous values including the current value
v_avg_five.append(sum(v[max(0, x-499):x+1])/min(500, x+1))
# v_avg_one is the average voltage of the 100 previous values including the current value
v_avg_one.append(sum(v[max(0, x-99):x+1])/min(100, x+1))
# i_avg is the average current of the 500 previous values including the current value
i_avg.append(sum(i[max(0, x-499):x+1])/min(500, x+1))
#writes the data to the csv file
writer.writerow([soc[x], v[x], i[x], t[x], v[x]*i[x], v_avg_five[x], v_avg_one[x], i_avg[x]])
# remove any rows where v < 300
df = pd.read_csv(filename)
df = df[df['V'] > 300]
df.to_csv(filename, index=False)
# remove any rows where V < 200
df = pd.read_csv(filename)
df = df[df.V > 300]
df.to_csv(filename, index=False)
print('Done!')