Skip to content

Commit 697df7a

Browse files
committed
wheeeeegit add -Agit add -A!
1 parent 8699e22 commit 697df7a

File tree

13 files changed

+553
-458
lines changed

13 files changed

+553
-458
lines changed

.idea/workspace.xml

+319-407
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

bogo/Guess.py

+79
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
from bogo.RandomTree import RandomTree
2+
from sympy_descent.model import Model, NoThetaException, DivergentModelException
3+
import sympy as sp
4+
5+
def match(data):
6+
7+
def log(*args):
8+
return
9+
print(*args)
10+
11+
12+
best_error = float('inf')
13+
best_func = None
14+
15+
features = [c for c in data.columns if c!='y']
16+
17+
for _ in range(300):
18+
f = RandomTree(2, features=features)
19+
20+
log('\n\n----------------------')
21+
print(f.to_sympy())
22+
23+
try:
24+
solver = Model(
25+
f.to_sympy(),
26+
data,
27+
'y'
28+
)
29+
except NoThetaException:
30+
log('no thetas')
31+
continue
32+
33+
try:
34+
solver.descend('rand')
35+
except DivergentModelException:
36+
log('model diverged')
37+
continue
38+
39+
log(solver.weighted_model)
40+
41+
error = solver.error()
42+
print(error)
43+
print()
44+
45+
if error < best_error:
46+
best_error = error
47+
best_func = solver.weighted_model
48+
49+
50+
best_func = sp.simplify(best_func + sp.Symbol('y'))
51+
52+
print()
53+
print('best:', best_func)
54+
print('error:', best_error)
55+
56+
57+
58+
import pandas as pd
59+
60+
simple = pd.DataFrame({
61+
'x1':(1,2,3,4,5,6),
62+
'y': (3,4,5,6,7,8)
63+
})
64+
65+
lumpy = pd.DataFrame({
66+
'n': (56, 56, 65, 65, 50, 25, 87, 44, 35),
67+
'y': (87, 91, 85, 91, 75, 28, 122, 66, 58),
68+
})
69+
70+
# match(lumpy)
71+
72+
lumpy -= lumpy.min()
73+
lumpy /= lumpy.max()
74+
75+
match(lumpy)
76+
77+
78+
79+

bogo/RandomTree.py

+11-15
Original file line numberDiff line numberDiff line change
@@ -42,19 +42,15 @@ class Sum(Operation):
4242
class Product(Operation):
4343
func = sp.Mul
4444

45-
class Min(Operation):
46-
func = sp.Min
45+
# class Min(Operation):
46+
# func = sp.Min
4747

48-
class Max(Operation):
49-
func = sp.Max
48+
# class Max(Operation):
49+
# func = sp.Max
5050

51-
class Power(Operation):
52-
func = sp.Pow
53-
slot_limit = 2
54-
55-
class Log(Operation):
56-
func = sp.log
57-
slot_limit = 2
51+
# class Power(Operation):
52+
# func = sp.Pow
53+
# slot_limit = 2
5854

5955

6056
class RandomTree:
@@ -71,8 +67,6 @@ def __init__(self, operations=3,
7167
self.features = features
7268
self.fill(self.head)
7369

74-
print('done')
75-
7670
def random_child(self, node):
7771
r = randint(0, len(node.args) + node.empty)
7872
if r >= len(node.args):
@@ -91,8 +85,8 @@ def fill(self, node):
9185
while len(node.args) < 2:
9286
node.add(self.random_symbol())
9387

94-
while node.empty and random()<.5:
95-
node.add(self.random_symbol())
88+
while node.empty and random()<.4:
89+
node.add(sp.Symbol(choice(self.features)))
9690

9791
for child in node.args:
9892
if isinstance(child, Operation):
@@ -102,3 +96,5 @@ def to_sympy(self):
10296
return sp.simplify(self.head.serialize())
10397

10498

99+
100+

bogo/visualize.py

+29
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
# -0.10544438075674*n**3 + n + 0.0927434048707628
2+
3+
import pandas as pd
4+
import numpy as np
5+
6+
lumpy = pd.DataFrame({
7+
'n': (56, 56, 65, 65, 50, 25, 87, 44, 35),
8+
'y': (87, 91, 85, 91, 75, 28, 122, 66, 58),
9+
})
10+
11+
# match(lumpy)
12+
13+
lumpy -= lumpy.min()
14+
lumpy /= lumpy.max()
15+
16+
17+
import matplotlib.pyplot as plt
18+
19+
plt.scatter(lumpy.n, lumpy.y)
20+
21+
xaxis = np.arange(0,1,.001)
22+
23+
@np.vectorize
24+
def f(n):
25+
return -.1054444380*n**3 + n + .0927434
26+
27+
plt.plot(xaxis, f(xaxis))
28+
29+
plt.show()
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
{
2+
"testEngine.py::TestGradientDescent::test_double_thetas": true,
3+
"testEngine.py::TestGradientDescent::test_double_xs": true,
4+
"testEngine.py::TestGradientDescent::test_simplest": true
5+
}

symengine_descent/speed_test.py

-13
Original file line numberDiff line numberDiff line change
@@ -10,16 +10,3 @@
1010
run = lambda f: f()
1111

1212

13-
14-
@run
15-
def w_engine():
16-
x = symengine.var("x")
17-
18-
f = x**2 + x
19-
f = f.subs(x, 5)
20-
21-
print(eval_double(f))
22-
23-
24-
25-

symengine_descent/symengine_model.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
import numpy as np
22
import symengine as sym
33
from symengine import Symbol
4-
from sympy_descent.data_loader import DataLoader
4+
from sympy_descent.data_loader import DataLoaderDict
5+
6+
DataLoader = DataLoaderDict
57

68
THETA_PREFIX = 't'
79
def Theta(i:int):
@@ -82,7 +84,7 @@ def search(head):
8284
@np.vectorize
8385
def cost_partial(t):
8486
f = self.model * self.model.diff(t)
85-
return np.sum(f.subs(row) for row in self.data) #.simplify()
87+
return np.sum(f.subs(row) for row in self.data).simplify()
8688

8789
self.grad = cost_partial(self.thetas)
8890

@@ -123,7 +125,7 @@ def descend(self, initial_thetas,
123125
# =========================================================================
124126
@property
125127
def theta_dict(self):
126-
return tuple(zip(self.thetas, self.theta_vals))
128+
return dict(zip(self.thetas, self.theta_vals))
127129

128130
def save_weighted_model(self):
129131
self.weighted_model = self.model.subs(self.theta_dict).simplify()

symengine_descent/testEngine.py

+59
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
import unittest
2+
from symengine_descent.symengine_model import Model
3+
4+
from symengine import Symbol
5+
import pandas as pd
6+
7+
x1 = Symbol('x1')
8+
x2 = Symbol('x2')
9+
y = Symbol('y')
10+
t1 = Symbol('t1')
11+
t2 = Symbol('t2')
12+
t3 = Symbol('t3')
13+
14+
15+
def disp(m):
16+
print()
17+
print('test1')
18+
print(m.weighted_model)
19+
print('error:', m.error())
20+
21+
22+
class TestGradientDescent(unittest.TestCase):
23+
24+
def test_simplest(self):
25+
26+
solver = Model(
27+
t1 * x1,
28+
pd.DataFrame({'x1':(1,2,3), 'y':(1.0,1.9,3.1)}),
29+
target = 'y')
30+
31+
solver.descend('rand')
32+
33+
disp(solver)
34+
35+
def test_double_thetas(self):
36+
37+
solver = Model(
38+
t1*x1*x1 + t2 - y,
39+
pd.DataFrame({'x1':(0,1,2), 'y':(1,2,5)}),
40+
)
41+
42+
solver.descend([0.0, 0.0], alpha=.05, momentum=.5, threshold=.01)
43+
disp(solver)
44+
45+
def test_double_xs(self):
46+
47+
solver = Model(
48+
t1*x1 + t2*x2 + t3,
49+
pd.DataFrame({'x1':(0, 3, 6), 'x2':(0, 1, 2), 'y':(.1, .4, .8)}),
50+
target = 'y'
51+
)
52+
solver.descend([0.0, 0.0, 0.0])
53+
disp(solver)
54+
55+
56+
57+
58+
if __name__ == '__main__':
59+
unittest.main()

sympy_descent/data_loader.py

+8-1
Original file line numberDiff line numberDiff line change
@@ -12,4 +12,11 @@ def __init__(self, df:pd.DataFrame):
1212

1313
def __iter__(self):
1414
for row in self.df.iterrows():
15-
yield tuple(row[1].to_dict().items())
15+
yield tuple(row[1].to_dict().items())
16+
17+
18+
class DataLoaderDict(DataLoader):
19+
20+
def __iter__(self):
21+
for row in self.df.iterrows():
22+
yield row[1].to_dict()

sympy_descent/model.py

+22-10
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,14 @@
11
from sympy_descent.helpers import *
22
from sympy_descent.data_loader import DataLoader
33

4+
5+
class NoThetaException(Exception):
6+
"""No thetas found. Should start with `t`."""
7+
8+
class DivergentModelException(Exception):
9+
"""Whoops"""
10+
11+
412
class Model:
513

614
# =========================================================================
@@ -37,7 +45,7 @@ def search(head):
3745
search(self.model)
3846

3947
if len(thetas) == 0:
40-
raise Exception(f'No Thetas found. Should start with {THETA_PREFIX}')
48+
raise NoThetaException
4149

4250
self.thetas = sorted(tuple(thetas), key=str)
4351

@@ -66,29 +74,34 @@ def cost_partial(t):
6674

6775
# =========================================================================
6876
def descend(self, initial_thetas,
69-
alpha=.05, momentum=0.5, threshold=.01):
77+
alpha=.01, momentum=0.5, threshold=.01):
78+
79+
if initial_thetas == 'rand':
80+
initial_thetas = list(np.random.rand(len(self.thetas)))
7081

7182
assert len(initial_thetas) == len(self.thetas)
7283

7384
self.theta_vals = initial_thetas
7485

7586
deltas = np.zeros(len(self.thetas))
7687

77-
for i in range(999):
78-
79-
self.save_weighted_model()
88+
for i in range(1,500):
8089

81-
# print(i, '\t', *(f'{float(s):.4f}'.ljust(8) for s in (*self.theta_vals, self.error())))
90+
# if not i%50:
91+
# self.save_weighted_model()
92+
# print(i, '\t', *(f'{float(s):.4f}'.ljust(8) for s in (*self.theta_vals, self.error())))
8293

8394
deltas = (momentum*deltas
8495
+ np.array([grad.subs(self.theta_dict) for grad in self.grad]))
8596

86-
if sum(abs(deltas)) < threshold: break
97+
if sum(abs(deltas)) < threshold:
98+
break
8799

88100
self.theta_vals -= deltas * alpha
89101

90-
else:
91-
raise Exception('maximum iterations exceeded. something is probably wrong')
102+
if any(abs(self.theta_vals) > 9999):
103+
raise DivergentModelException()
104+
92105

93106
self.save_weighted_model()
94107

@@ -100,7 +113,6 @@ def theta_dict(self):
100113
def save_weighted_model(self):
101114
self.weighted_model = simplify(self.model.subs(self.theta_dict))
102115

103-
104116
def error(self):
105117
return np.sum(self.weighted_model.subs(row)**2 for row in self.data)
106118

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{}

tests/testBogo.py

+5-3
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,12 @@
11
import unittest
2-
from bogo.BogoBoost import RandomTree
2+
from bogo.RandomTree import RandomTree, bag
33

44
class MyTestCase(unittest.TestCase):
5+
56
def test_something(self):
6-
t = RandomTree(3)
7-
print(t.head.serialize)
7+
t = RandomTree(1)
8+
print(t.head.serialize())
9+
810

911
if __name__ == '__main__':
1012
unittest.main()

0 commit comments

Comments
 (0)