-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgppretty.m
323 lines (257 loc) · 10.8 KB
/
gppretty.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
function [gene_latex_expr,full_latex_expr,expr_sym,cell_expr_sym]=gppretty(gp,ind,knockout)
%GPPRETTY GPTIPS Function to simplify multigene symbolic regression model,
%create symbolic model objects and create LaTex versions of the expressions.
%
% *REQUIRES SYMBOLIC MATH TOOLBOX*
%
% Intended to simplify single and multigene symbolic regression
% expressions created with GPTIPS using the REGRESSMULTI_FITFUN fitness
% function.
%
% It is assumed that the overall symbolic model is a linear superposition
% of the M genes weighted by regression coefficients plus a bias (offset)
% term.
% I.e.
% ypred = c0 + c1*tree1 + ... + cM*treeM'
% where c0 = bias and c1, ..., cM are the gene weights.
%
% GPPRETTY(GP,IND) prettifies the population member with population index
% IND in the GPTIPS datastructure GP.
%
% GPPRETTY(GP,''BEST'') prettifies the best individual of the run.
%
% GPPRETTY(GP,''VALBEST'') prettifies the individual that performed best
% on the validation data (if it exists).
%
% GPPRETTY can also accept an optional third argument KNOCKOUT which
% should be a boolean vector the with same number of entries as genes in
% the individual to be simplified. This simplifies the individual with
% the indicated genes removed ('knocked out').
% E.g. GPPRETTY(GP,'BEST',[1 0 0 1]) knocks out the 1st and 4th genes
% from the best individual of run, then simplifies it. Note that the gene
% weights are recomputed from the training data when genes are
% knocked out.
%
% GENE_LATEX_EXPR = GPPRETTY(GP,''BEST'') returns a string GENE_LATEX_EXPR
% containing the simplified LaTeX representation of the separate genes of
% the multigene expression formatted as a LaTeX equation array. The bias
% term is 'folded' in with the first gene. The genes are simplified
% separately.
%
% Remarks:
% Each line of the LaTeX equation array is a simplified gene.
% To be rendered, the equation array represented by the string
% GENE_LATEX_EXPR must be copied and pasted into the correct context in
% an appropriate LaTeX document, for example:
%
% \documentclass{article}
% \pagestyle{empty}
% \begin{document}
% \begin{eqnarray*}y&=& 6.565- 0.2017\,\tanh \left( {\it x_2} \right) \left( 0.8519\,{\it x_3}-{\it x_1} \right)\\&-& 0.3174\,\tanh \left( - 0.923039\,{\it x_3}\, \left( {\it x_3}-{\it x_1} \right) -{\it x_2} \right)\end{eqnarray*}
% \end{document}
%
% In the above LaTeX code above the line beginning "\begin{eqnarray*}" is
% the string GENE_LATEX_EXPR that is generated by GPPRETTY. The rest you
% must supply yourself.
%
% [GENE_LATEX_EXPR,FULL_LATEX_EXPR] = GPPRETTY(GP,''BEST'') also returns
% FULL_LATEX_EXPR containing the simplified LaTeX representation of the
% combined genes of the multigene expression. That is, the genes are
% combined, then simplified.
%
% Remarks:
% Again, to be rendered, the equation represented by the string
% FULL_LATEX_EXPR must be copied and pasted into the correct context in
% an appropriate LaTeX document, for example:
%
% \documentclass{article}
% \pagestyle{empty}
% \begin{document}
% $
% y= 7.255+ 0.2060\,{\it x_2}+ 0.2086\,{\it x_3}- 0.2086\,{\it x_1}- 0.2086\,\tanh \left( {\it x_1} \right)
% $
% \end{document}
%
% Note:
% MATLAB currently does not render LaTeX well. At time of writing a good
% web-based LaTeX processor that outputs high quality images is available
% at http://sciencesoft.at/latex/
%
% Other usages:
% [GENE_LATEX_EXPR,FULL_LATEX_EXPR,EXPR_SYM]=GPPRETTY(GP,''BEST'') does
% the above and and returns the entire simplified symbolic expression as
% an object of class 'sym' as EXPR_SYM
%
% [GENE_LATEX_EXPR,FULL_LATEX_EXPR,EXPR_SYM,CELL_EXPR_SYM]=GPPRETTY(GP,''BEST'')
% does the above and also returns the individual simplified gene
% expressions as a cell array of 'sym' objects CELL_EXPR_SYM.
%
%
% Known problems:
% Occasionally, the underlying symbolic math software causes MATLAB to
% crash when attempting to simplify some expressions. The cause of this
% is currently unknown but it tends to happen with deep trees.
%
%
% (c) Dominic Searson 2008
%
% v1.0
%
% See also REGRESSMULTI_FITFUN, POPBROWSER, GPREFORMAT, SYM, PRETTY, LATEX
if nargin<2
disp('Usage is GPPRETTY(GP,IND) where IND is the population index of the desired individual');
disp('or GPPRETTY(GP,''BEST'') to use the best individual of the run ');
disp('or GPPRETTY(GP,''VALBEST'') uses the individual from the run that performed best on the validation set (if one is defined). ');
return;
elseif nargin<3
doknockout=false;
else
doknockout=true;
end
if license('test','symbolic_toolbox')
%set the display precision for symbolic toolbox
digits 4;
if isnumeric(ind)
if isempty(gp.fitness.returnvalues{ind})
gp.fitness.returnvalues{ind}(1)=0;
gp.fitness.returnvalues{ind}(2)=1;
end
%knockout genes if required, this requires that coefficients are
%recomputed on the training data
if doknockout
treestrs_eval=kogene(gp.results.best.eval_individual, knockout);
treestrs=kogene(gp.results.best.individual, knockout);
gp.state.run_completed=false; %trick fitness function into recomputing weights
[fitness,gp,ypred,coeffs]=feval(gp.fitness.fitfun,treestrs_eval,gp);
gp.fitness.returnvalues{ind}=coeffs;
ref_tree=gpreformat(gp,treestrs);
else
ref_tree=gpreformat(gp,gp.pop{ind});
end
%construct full symbolic expression using gene weights and gene expressions
full_expr=sym(gp.fitness.returnvalues{ind}(1),'d')+sym(gp.fitness.returnvalues{ind}(2),'d')*sym(ref_tree{1});
expr_array{1}=simple(full_expr);
for i=2:length(ref_tree);
gene_expr=sym(gp.fitness.returnvalues{ind}(i+1),'d')*sym(ref_tree{i});
full_expr=full_expr+gene_expr;
expr_array{i}=simple(gene_expr);
end
elseif ischar(ind) && strcmpi(ind,'best')
if isempty(gp.results.best.returnvalues)
gp.results.best.returnvalues(1)=0;
gp.results.best.returnvalues(2)=1;
end
%knockout genes if required, this requires that coefficients are
%recomputed on the training data
if doknockout
treestrs_eval=kogene(gp.results.best.eval_individual, knockout);
treestrs=kogene(gp.results.best.individual, knockout);
gp.state.run_completed=false; %trick fitness function into recomputing weights
[fitness,gp,ypred,coeffs]=feval(gp.fitness.fitfun,treestrs_eval,gp);
gp.results.best.returnvalues=coeffs;
ref_tree=gpreformat(gp,treestrs);
else
ref_tree=gpreformat(gp,gp.results.best.individual);
end
full_expr=sym(gp.results.best.returnvalues(1),'d')+sym(gp.results.best.returnvalues(2),'d')*sym(ref_tree{1});
expr_array{1}=simple(full_expr);
for i=2:length(ref_tree);
gene_expr=sym(gp.results.best.returnvalues(i+1),'d')*sym(ref_tree{i});
full_expr=full_expr+gene_expr;
expr_array{i}=simple(gene_expr);
end
F2=full_expr;
disp(F2);
save F2 F2;
elseif ischar(ind) && strcmpi(ind,'valbest')
% check that validation data is present
if (~isfield(gp.userdata,'xval')) || (~isfield(gp.userdata,'yval')) || ...
isempty(gp.userdata.xval) || isempty(gp.userdata.yval)
disp('No validation data was found. Try gppretty(gp,''best'') instead.');
return;
end
if isempty(gp.results.valbest.returnvalues)
gp.results.valbest.returnvalues(1)=0;
gp.results.valbest.returnvalues(2)=1;
end
%knockout genes if required, this requires that coefficients are
%recomputed on the training data
if doknockout
treestrs_eval=kogene(gp.results.valbest.eval_individual, knockout);
treestrs=kogene(gp.results.valbest.individual, knockout);
gp.state.run_completed=false; %trick fitness function into recomputing weights
[fitness,gp,ypred,coeffs]=feval(gp.fitness.fitfun,treestrs_eval,gp);
gp.results.valbest.returnvalues=coeffs;
ref_tree=gpreformat(gp,treestrs);
else
ref_tree=gpreformat(gp,gp.results.valbest.individual);
end
full_expr=sym(gp.results.valbest.returnvalues(1),'d')+sym(gp.results.valbest.returnvalues(2),'d')*sym(ref_tree{1});
expr_array{1}=simple(full_expr);
for i=2:length(ref_tree);
gene_expr=sym(gp.results.valbest.returnvalues(i+1),'d')*sym(ref_tree{i});
full_expr=full_expr+gene_expr;
expr_array{i}=simple(gene_expr);
end
else
error('Illegal argument');
end
disp(' ');
full_expr_simp=simple(full_expr);
if nargout<1
if length(expr_array)>1
disp('Simplified genes:');
disp('-----------------');
disp(' ');
disp('Gene 1 and bias term:');
pretty(expr_array{1});
disp(' ');
for a=2:length(expr_array)
disp(['Gene ' int2str(a) ':']);
pretty(expr_array{a});
disp( ' ');
end
end
disp('Simplified overall GP expression:')
disp('---------------------------------');
pretty(full_expr_simp);
end
if nargout >0
% process the LaTeX equation array
% line up the initial '=' and subsequent '+' and '-' symbols
% that mark the start of a new gene using &s
exprs=expr_array;
latex_expr=['y=&&' deblank(latex(exprs{1}))]; %creates line up point for genes
pat='x(\d+)';
latex_expr=regexprep(latex_expr,pat,'x_{$1}');
%latex_expr=strrep(latex_expr,'x','x_'); %adds a subscript markup for input vars
for i=2:length(exprs)
lex=deblank(latex(exprs{i}));
latex_expr=regexprep(latex_expr,pat,'x_{$1}');
if lex(1)=='-'
lex=lex(2:end);
lex=['&-&' lex]; %lines up the genes
latex_expr=[latex_expr '\\' lex]; %starts next line in array
else
lex=lex(2:end);
lex=['&+&' lex];
latex_expr=[latex_expr '\\' lex];
end
end
gene_latex_expr=['\begin{eqnarray*}' latex_expr '\end{eqnarray*}'];
end
if nargout>1
full_latex_expr=['y=' deblank(latex(full_expr_simp))];
full_latex_expr=regexprep(full_latex_expr,pat,'x_{$1}');
end
if nargout>2
expr_sym=full_expr_simp;
end
if nargout >3
cell_expr_sym=expr_array;
end
%set display precision back to 32
digits 32;
else
disp('You need the Symbolic Math Toolbox to use this function.');
end