-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathneural-results-chart.py
executable file
·96 lines (87 loc) · 4.62 KB
/
neural-results-chart.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
#!/usr/bin/env python3
import argparse
import matplotlib.pyplot as plt
from matplotlib.axes import Axes
import pandas as pd
import sqlite3
import math
import sklearn.linear_model
def extrapolate(series: pd.Series) -> pd.DataFrame:
starting_point = math.log10(series.index.max())
ending_point = 7 ; # 10,000,000 nodes -- the size of the largest neural model
ending_point = 11 ; # hopefully large enough to sort of see some relationship
extrapolation_x = [starting_point]
extrapolation_x += range(int(starting_point)+1,ending_point+1)
extrapolation_dataframe = pd.DataFrame({'log_parameter_count': extrapolation_x})
training_data = pd.DataFrame({'loss': series})
training_data['parameter_count'] = training_data.index
training_data['log_parameter_count'] = training_data.parameter_count.map(math.log10)
ts = sklearn.linear_model.TheilSenRegressor()
ts.fit(training_data[['log_parameter_count']], training_data.loss)
extrapolation_dataframe['loss'] = ts.predict(extrapolation_dataframe[['log_parameter_count']])
extrapolation_dataframe['parameter_count'] = extrapolation_dataframe.log_parameter_count.map(lambda x: 10 ** x)
#print(ts.coef_)
return extrapolation_dataframe
def plot_data(df: pd.DataFrame, tree_df: pd.DataFrame, ax: Axes, column_name: str, column_title: str, do_extrapolate: bool = True, min_x : int = 0) -> None:
# just show the #1 model to keep the chart simple
tree_df = tree_df[tree_df.model_file.str.endswith('1.sqlite') | tree_df.model_file.str.contains(',') | tree_df.model_file.str.contains('careful10000')]
tree_df = tree_df[tree_df.model_node_count > min_x]
for name in sorted(tree_df.model_file.unique()):
sub_df = tree_df[tree_df.model_file == name]
if 'unannotated' in name:
label='Ultra-tree unannotated model'
continue
elif ',' in name:
label="Ensemble"
marker = '^'
color = "orange"
elif 'careful10000' in name:
label="Careful 10000"
marker = "+"
color = "purple"
else:
label='Ultra-tree sense annotated model'
continue
sub_df.set_index('model_node_count').sort_index()[column_name].plot(ax=ax, label=label, marker=marker, color=color)
#print(label)
if do_extrapolate:
extrapolation = extrapolate(sub_df.set_index('model_node_count')[column_name])
extrapolation.set_index('parameter_count').loss.plot(ax=ax, label=f"Extrapolation of {label}",
linestyle="dotted",
marker="", color=color)
#print(extrapolation)
for name in sorted(df.augmentation.unique()):
df[df.augmentation == name].set_index('model_parameter_count')[column_name].plot(ax=ax, marker='o', label=f"{name} neural")
ax.set_xlabel('Model Parameter Count')
ax.set_ylabel(f'{column_title}')
ax.set_title(f'{column_title} vs Model Parameter Count')
ax.set_xscale('log')
#ax.set_yscale('log')
ax.grid(True)
ax.legend()
def main() -> None:
parser = argparse.ArgumentParser(description='Plot total_loss vs model_parameter_count from a CSV file.')
parser.add_argument('--input', default='neural-results.csv', help='Path to the input CSV file.')
parser.add_argument('--output', default='neural-results.png', help='Path to the output PNG file.')
parser.add_argument("--noun-output", default="noun-baseline.png", help="Path to the output PNG file for losses on nouns")
parser.add_argument('--tree-data-input', default='inferences.sqlite', help="SQLite database with inference results")
args = parser.parse_args()
neural_df = pd.read_csv(args.input).sort_values('model_parameter_count')
conn = sqlite3.connect(args.tree_data_input)
tree_df = pd.read_sql("""
select evaluation_run_id, model_file, model_node_count, total_loss, sum(loss) as noun_loss
from inferences join evaluation_runs using (evaluation_run_id)
where model_node_count is not null and total_loss is not null
and correct_path like '1.%.%'
group by evaluation_run_id, model_file, model_node_count, total_loss
""", conn)
fig, ax = plt.subplots()
plot_data(neural_df, tree_df, ax, 'total_loss', "Total Loss")
fig.tight_layout()
fig.savefig(args.output)
fig, ax = plt.subplots()
plot_data(neural_df, tree_df, ax, 'noun_loss', "Noun Loss", do_extrapolate=False, min_x=25)
fig.tight_layout()
fig.savefig(args.noun_output)
if __name__ == '__main__':
main()