-
Notifications
You must be signed in to change notification settings - Fork 183
/
Copy pathcharacter_demo.html
2027 lines (1945 loc) · 105 KB
/
character_demo.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
<html>
<head>
<title>RecurrentJS Sentence Memorization Demo</title>
<style>
body {
font-family: Arial, "Helvetica Neue", Helvetica, sans-serif;
color: #333;
padding: 20px;
}
#argmax {
background-color: #DFD;
}
#ppl {
color: #090;
font-size: 20px;
}
#epoch {
color: #900;
font-size: 20px;
}
.apred {
padding: 2px;
margin: 5px;
overflow: hidden;
height: 20px;
font-size: 14px;
}
#prepro_status {
background-color: #FFD;
padding: 5px;
}
#status {
padding: 2px;
margin-top: 5px;
}
#controls {
margin: 5px;
}
.theslider {
width:90%;
display: inline-block;
}
.slider_value {
width: 9%;
display: inline-block;
}
#wrap {
width: 800px;
margin-right: auto;
margin-left: auto;
margin-bottom: 200px;
}
.abutton {
width: 120px;
height: 30px;
margin: 10px 10px 10px 0px;
}
.hh {
background-color: #EEE;
padding: 5px;
margin-top: 5px;
border-bottom: 1px solid #999;
margin-bottom: 2px;
}
#pplgraph {
float: right;
}
#intro {
text-align: justify;
}
</style>
<link href="external/jquery-ui.min.css" rel="stylesheet">
<script src="external/jquery-1.8.3.min.js"></script>
<script src="external/jquery-ui.min.js"></script>
<script src="src/recurrent.js"></script>
<script src="src/vis.js"></script>
<script type="text/javascript">
// prediction params
var sample_softmax_temperature = 1.0; // how peaky model predictions should be
var max_chars_gen = 100; // max length of generated sentences
// various global var inits
var epoch_size = -1;
var input_size = -1;
var output_size = -1;
var letterToIndex = {};
var indexToLetter = {};
var vocab = [];
var data_sents = [];
var solver = new R.Solver(); // should be class because it needs memory for step caches
var pplGraph = new Rvis.Graph();
var model = {};
var initVocab = function(sents, count_threshold) {
// go over all characters and keep track of all unique ones seen
var txt = sents.join(''); // concat all
// count up all characters
var d = {};
for(var i=0,n=txt.length;i<n;i++) {
var txti = txt[i];
if(txti in d) { d[txti] += 1; }
else { d[txti] = 1; }
}
// filter by count threshold and create pointers
letterToIndex = {};
indexToLetter = {};
vocab = [];
// NOTE: start at one because we will have START and END tokens!
// that is, START token will be index 0 in model letter vectors
// and END token will be index 0 in the next character softmax
var q = 1;
for(ch in d) {
if(d.hasOwnProperty(ch)) {
if(d[ch] >= count_threshold) {
// add character to vocab
letterToIndex[ch] = q;
indexToLetter[q] = ch;
vocab.push(ch);
q++;
}
}
}
// globals written: indexToLetter, letterToIndex, vocab (list), and:
input_size = vocab.length + 1;
output_size = vocab.length + 1;
epoch_size = sents.length;
$("#prepro_status").text('found ' + vocab.length + ' distinct characters: ' + vocab.join(''));
}
var utilAddToModel = function(modelto, modelfrom) {
for(var k in modelfrom) {
if(modelfrom.hasOwnProperty(k)) {
// copy over the pointer but change the key to use the append
modelto[k] = modelfrom[k];
}
}
}
var initModel = function() {
// letter embedding vectors
var model = {};
model['Wil'] = new R.RandMat(input_size, letter_size , 0, 0.08);
if(generator === 'rnn') {
var rnn = R.initRNN(letter_size, hidden_sizes, output_size);
utilAddToModel(model, rnn);
} else {
var lstm = R.initLSTM(letter_size, hidden_sizes, output_size);
utilAddToModel(model, lstm);
}
return model;
}
var reinit_learning_rate_slider = function() {
// init learning rate slider for controlling the decay
// note that learning_rate is a global variable
$("#lr_slider").slider({
min: Math.log10(0.01) - 3.0,
max: Math.log10(0.01) + 0.05,
step: 0.05,
value: Math.log10(learning_rate),
slide: function( event, ui ) {
learning_rate = Math.pow(10, ui.value);
$("#lr_text").text(learning_rate.toFixed(5));
}
});
$("#lr_text").text(learning_rate.toFixed(5));
}
var reinit = function() {
// note: reinit writes global vars
// eval options to set some globals
eval($("#newnet").val());
reinit_learning_rate_slider();
solver = new R.Solver(); // reinit solver
pplGraph = new Rvis.Graph();
ppl_list = [];
tick_iter = 0;
// process the input, filter out blanks
var data_sents_raw = $('#ti').val().split('\n');
data_sents = [];
for(var i=0;i<data_sents_raw.length;i++) {
var sent = data_sents_raw[i].trim();
if(sent.length > 0) {
data_sents.push(sent);
}
}
initVocab(data_sents, 1); // takes count threshold for characters
model = initModel();
}
var saveModel = function() {
var out = {};
out['hidden_sizes'] = hidden_sizes;
out['generator'] = generator;
out['letter_size'] = letter_size;
var model_out = {};
for(var k in model) {
if(model.hasOwnProperty(k)) {
model_out[k] = model[k].toJSON();
}
}
out['model'] = model_out;
var solver_out = {};
solver_out['decay_rate'] = solver.decay_rate;
solver_out['smooth_eps'] = solver.smooth_eps;
step_cache_out = {};
for(var k in solver.step_cache) {
if(solver.step_cache.hasOwnProperty(k)) {
step_cache_out[k] = solver.step_cache[k].toJSON();
}
}
solver_out['step_cache'] = step_cache_out;
out['solver'] = solver_out;
out['letterToIndex'] = letterToIndex;
out['indexToLetter'] = indexToLetter;
out['vocab'] = vocab;
$("#tio").val(JSON.stringify(out));
}
var loadModel = function(j) {
hidden_sizes = j.hidden_sizes;
generator = j.generator;
letter_size = j.letter_size;
model = {};
for(var k in j.model) {
if(j.model.hasOwnProperty(k)) {
var matjson = j.model[k];
model[k] = new R.Mat(1,1);
model[k].fromJSON(matjson);
}
}
solver = new R.Solver(); // have to reinit the solver since model changed
solver.decay_rate = j.solver.decay_rate;
solver.smooth_eps = j.solver.smooth_eps;
solver.step_cache = {};
for(var k in j.solver.step_cache){
if(j.solver.step_cache.hasOwnProperty(k)){
var matjson = j.solver.step_cache[k];
solver.step_cache[k] = new R.Mat(1,1);
solver.step_cache[k].fromJSON(matjson);
}
}
letterToIndex = j['letterToIndex'];
indexToLetter = j['indexToLetter'];
vocab = j['vocab'];
// reinit these
ppl_list = [];
tick_iter = 0;
}
var forwardIndex = function(G, model, ix, prev) {
var x = G.rowPluck(model['Wil'], ix);
// forward prop the sequence learner
if(generator === 'rnn') {
var out_struct = R.forwardRNN(G, model, hidden_sizes, x, prev);
} else {
var out_struct = R.forwardLSTM(G, model, hidden_sizes, x, prev);
}
return out_struct;
}
var predictSentence = function(model, samplei, temperature) {
if(typeof samplei === 'undefined') { samplei = false; }
if(typeof temperature === 'undefined') { temperature = 1.0; }
var G = new R.Graph(false);
var s = '';
var prev = {};
while(true) {
// RNN tick
var ix = s.length === 0 ? 0 : letterToIndex[s[s.length-1]];
var lh = forwardIndex(G, model, ix, prev);
prev = lh;
// sample predicted letter
logprobs = lh.o;
if(temperature !== 1.0 && samplei) {
// scale log probabilities by temperature and renormalize
// if temperature is high, logprobs will go towards zero
// and the softmax outputs will be more diffuse. if temperature is
// very low, the softmax outputs will be more peaky
for(var q=0,nq=logprobs.w.length;q<nq;q++) {
logprobs.w[q] /= temperature;
}
}
probs = R.softmax(logprobs);
if(samplei) {
var ix = R.samplei(probs.w);
} else {
var ix = R.maxi(probs.w);
}
if(ix === 0) break; // END token predicted, break out
if(s.length > max_chars_gen) { break; } // something is wrong
var letter = indexToLetter[ix];
s += letter;
}
return s;
}
var costfun = function(model, sent) {
// takes a model and a sentence and
// calculates the loss. Also returns the Graph
// object which can be used to do backprop
var n = sent.length;
var G = new R.Graph();
var log2ppl = 0.0;
var cost = 0.0;
var prev = {};
for(var i=-1;i<n;i++) {
// start and end tokens are zeros
var ix_source = i === -1 ? 0 : letterToIndex[sent[i]]; // first step: start with START token
var ix_target = i === n-1 ? 0 : letterToIndex[sent[i+1]]; // last step: end with END token
lh = forwardIndex(G, model, ix_source, prev);
prev = lh;
// set gradients into logprobabilities
logprobs = lh.o; // interpret output as logprobs
probs = R.softmax(logprobs); // compute the softmax probabilities
log2ppl += -Math.log2(probs.w[ix_target]); // accumulate base 2 log prob and do smoothing
cost += -Math.log(probs.w[ix_target]);
// write gradients into log probabilities
logprobs.dw = probs.w;
logprobs.dw[ix_target] -= 1
}
var ppl = Math.pow(2, log2ppl / (n - 1));
return {'G':G, 'ppl':ppl, 'cost':cost};
}
function median(values) {
values.sort( function(a,b) {return a - b;} );
var half = Math.floor(values.length/2);
if(values.length % 2) return values[half];
else return (values[half-1] + values[half]) / 2.0;
}
var ppl_list = [];
var tick_iter = 0;
var tick = function() {
// sample sentence fromd data
var sentix = R.randi(0,data_sents.length);
var sent = data_sents[sentix];
var t0 = +new Date(); // log start timestamp
// evaluate cost function on a sentence
var cost_struct = costfun(model, sent);
// use built up graph to compute backprop (set .dw fields in mats)
cost_struct.G.backward();
// perform param update
var solver_stats = solver.step(model, learning_rate, regc, clipval);
//$("#gradclip").text('grad clipped ratio: ' + solver_stats.ratio_clipped)
var t1 = +new Date();
var tick_time = t1 - t0;
ppl_list.push(cost_struct.ppl); // keep track of perplexity
// evaluate now and then
tick_iter += 1;
if(tick_iter % 50 === 0) {
// draw samples
$('#samples').html('');
for(var q=0;q<5;q++) {
var pred = predictSentence(model, true, sample_softmax_temperature);
var pred_div = '<div class="apred">'+pred+'</div>'
$('#samples').append(pred_div);
}
}
if(tick_iter % 10 === 0) {
// draw argmax prediction
$('#argmax').html('');
var pred = predictSentence(model, false);
var pred_div = '<div class="apred">'+pred+'</div>'
$('#argmax').append(pred_div);
// keep track of perplexity
$('#epoch').text('epoch: ' + (tick_iter/epoch_size).toFixed(2));
$('#ppl').text('perplexity: ' + cost_struct.ppl.toFixed(2));
$('#ticktime').text('forw/bwd time per example: ' + tick_time.toFixed(1) + 'ms');
if(tick_iter % 100 === 0) {
var median_ppl = median(ppl_list);
ppl_list = [];
pplGraph.add(tick_iter, median_ppl);
pplGraph.drawSelf(document.getElementById("pplgraph"));
}
}
}
var gradCheck = function() {
var model = initModel();
var sent = '^test sentence$';
var cost_struct = costfun(model, sent);
cost_struct.G.backward();
var eps = 0.000001;
for(var k in model) {
if(model.hasOwnProperty(k)) {
var m = model[k]; // mat ref
for(var i=0,n=m.w.length;i<n;i++) {
oldval = m.w[i];
m.w[i] = oldval + eps;
var c0 = costfun(model, sent);
m.w[i] = oldval - eps;
var c1 = costfun(model, sent);
m.w[i] = oldval;
var gnum = (c0.cost - c1.cost)/(2 * eps);
var ganal = m.dw[i];
var relerr = (gnum - ganal)/(Math.abs(gnum) + Math.abs(ganal));
if(relerr > 1e-1) {
console.log(k + ': numeric: ' + gnum + ', analytic: ' + ganal + ', err: ' + relerr);
}
}
}
}
}
var iid = null;
$(function() {
// attach button handlers
$('#learn').click(function(){
reinit();
if(iid !== null) { clearInterval(iid); }
iid = setInterval(tick, 0);
});
$('#stop').click(function(){
if(iid !== null) { clearInterval(iid); }
iid = null;
});
$("#resume").click(function(){
if(iid === null) {
iid = setInterval(tick, 0);
}
});
$("#savemodel").click(saveModel);
$("#loadmodel").click(function(){
var j = JSON.parse($("#tio").val());
loadModel(j);
});
$("#loadpretrained").click(function(){
$.getJSON("lstm_100_model.json", function(data) {
pplGraph = new Rvis.Graph();
learning_rate = 0.0001;
reinit_learning_rate_slider();
loadModel(data);
});
});
$("#learn").click(); // simulate click on startup
//$('#gradcheck').click(gradCheck);
$("#temperature_slider").slider({
min: -1,
max: 1.05,
step: 0.05,
value: 0,
slide: function( event, ui ) {
sample_softmax_temperature = Math.pow(10, ui.value);
$("#temperature_text").text( sample_softmax_temperature.toFixed(2) );
}
});
});
</script>
</head>
<body>
<a href="https://github.com/karpathy/recurrentjs"><img style="position: absolute; top: 0; right: 0; border: 0;" src="https://s3.amazonaws.com/github/ribbons/forkme_right_darkblue_121621.png" alt="Fork me on GitHub"></a>
<div id="wrap">
<h1>Deep Recurrent Nets character generation demo</h1>
<div id="intro">
This demo shows usage of the <a href="https://github.com/karpathy/recurrentjs">recurrentjs library</a> that allows you to train deep Recurrent Neural Networks (RNN) and Long Short-Term Memory Networks (LSTM) in Javascript. But the core of the library is more general and allows you to set up arbitrary expression graphs that support fully automatic backpropagation.<br><br>
In this demo we take a dataset of sentences as input and learn to memorize the sentences character by character. That is, the RNN/LSTM takes a character, its context from previous time steps (as mediated by the hidden layers) and predicts the next character in the sequence. Here is an example: <br><br>
<div style="text-align:center;"><img src="eg.png"></div>
In the example image above that depicts a deep RNN, every character has an associated "letter vector" that we will train with backpropagation. These letter vectors are combined through a (learnable) Matrix-vector multiply transformation into the first hidden layer representation (yellow), then into second hidden layer representation (purple), and finally into the output space (blue). The output space has dimensionality equal to the number of characters in the dataset and every dimension provides the probability of the next character in the sequence. The network is therefore trained to always predict the next character (using Softmax + cross-entropy loss on all letters). The quantity we track during training is called the <b>perplexity</b>, which measures how surprised the network is to see the next character in a sequence. For example, if perplexity is 4.0 then it's as if the network was guessing uniformly at random from 4 possible characters for next letter (i.e. lowest it can be is 1). At test time, the prediction is currently done iteratively character by character in a greedy fashion, but I might eventually implemented more sophisticated methods (e.g. beam search).<br><br>
The demo is pre-filled with sentences from <a href="http://www.paulgraham.com/articles.html">Paul Graham's essays</a>, in an attempt to encode Paul Graham's knowledge into the weights of the Recurrent Networks. The long-term goal of the project then is to generate startup wisdom at will. Feel free to train on whatever data you wish, and to experiment with the parameters. If you want more impressive models you have to increase the sizes of hidden layers, and maybe slightly the letter vectors. However, this will take longer to train.<br><br>
For suggestions/bugs ping me at <a href="https://twitter.com/karpathy">@karpathy</a>.<br><br>
</div>
<div>
<div class="hh">Input sentences:</div>
<textarea style="width:100%; height:200px;" id="ti">
the company has, say, 6 months of runway
or to put it more brutally, 6 months before they're out of business
they expect to avoid that by raising more from investors
that last sentence is the fatal one
it's hard to convince investors the first time too, but founders expect that
what bites them the second time is a confluence of three forces:
the company is spending more now than it did the first time it raised money
investors have much higher standards for companies that have already raised money
the company is now starting to read as a failure
the first time it raised money, it was neither a success nor a failure; it was too early to ask
i'm going to call the situation i described in the first paragraph "the fatal pinch
one of the things that makes the fatal pinch so dangerous is that it's self-reinforcing
y combinator tells founders who raise money to act as if it's the last they'll ever get
i will now, by an amazing feat of clairvoyance, do this for you: the probability is zero
you should shut down the company if you're certain it will fail no matter what you do
companies rarely have to fail though
what i'm really doing here is giving you the option of admitting you've already given up
if you don't want to shut down the company, that leaves increasing revenues and decreasing expenses
in most startups, expenses people and decreasing expenses firing people
if so, now's the time
which leaves two options, firing good people and making more money
you should lean more toward firing people if the source of your trouble is overhiring
plus those 15 people might not even be the ones you need for whatever you end up building
so the solution may be to shrink and then figure out what direction to grow in
it may seem facile to suggest a startup make more money, as if that could be done for the asking
usually a startup is already trying as hard as it can to sell whatever it sells
but only work on whatever will get you the most revenue the soonest
or you may have expertise in some new field they don't understand
and to the extent you can, try to avoid the worst pitfalls of consulting
you keep the ip and no billing by the hour
you just have to realize in time that you're near death
and if you're in the fatal pinch, you are
it struck me recently how few of the most successful people i know are mean
there are exceptions, but remarkably few
meanness isn't rare
in fact, one of the things the internet has shown us is how mean people can be
a few decades ago, only famous people and professional writers got to publish their opinions
now everyone can, and we can all see the long tail of meanness that had previously been hidden
what's going on here? are meanness and success inversely correlated?
part of what's going on, of course, is selection bias
i only know people who work in certain fields: startup founders, programmers, professors
i'm willing to believe that successful people in other fields are mean
maybe successful hedge fund managers are mean; i don't know enough to say
it seems quite likely that most successful drug lords are mean
being married to her is like standing next to an airport baggage scanner
why? i think there are several reasons
one is that being mean makes you stupid
that's why i hate fights
you never do your best work in a fight, because fights are not sufficiently general
winning is always a function of the situation and the people involved
and yet fighting is just as much work as thinking about real problems
startups don't win by attacking
they win by transcending
there are exceptions of course, but usually the way to win is to race ahead, not to stop and fight
another reason mean founders lose is that they can't get the best people to work for them
they can hire people who will put up with them because they need a job
but the best people have other options
a mean person can't convince the best people to work for him unless he is super convincing
and while having the best people helps any organization, it's critical for startups
the startup founders who end up richest are not the ones driven by money
[1] the ones who keep going are driven by something else
they may not say so explicitly, but they're usually trying to improve the world
which means people with a desire to improve the world have a natural advantage
this kind of work is the future
for most of history success meant control of scarce resources
for most of history, success meant success at zero-sum games
and in most of them meanness was not a handicap but probably an advantage
that is changing
increasingly the games that matter are not zero-sum
there have long been games where you won by having new ideas
in the third century bc archimedes won by doing that
at least until an invading roman army killed him
and not just not being at war
people need to feel that what they create can't be stolen
that has always been the case for thinkers, which is why this trend began with them
the exciting thing is that their m
seems to be spreading
so i'm really glad i stopped to think about this
jessica and i have always worked hard to teach our kids not to be mean
we tolerate noise and mess and junk food, but not meanness
startups are very counterintuitive
i'm not sure why
maybe it's just because knowledge about them hasn't permeated our culture yet
but whatever the reason, starting a startup is a task where you can't always trust your instincts
it's like skiing in that way
when you first try skiing and you want to slow down, your instinct is to lean back
but if you lean back on skis you fly down the hill out of control
so part of learning to ski is learning to suppress that impulse
eventually you get new habits, but at first it takes a conscious effort
at first there's a list of things you're trying to remember as you start down the hill
startups are as unnatural as skiing, so there's a similar list for startups
counterintuitive
if you know nothing more than this, you may at least pause before making them
it's really true
they seem wrong
so of course your first impulse is to disregard them
if founders' instincts already gave them the right answers, they wouldn't need us
you only need other people to give you advice that surprises you
that's why there are a lot of ski instructors and not many running instructors
you can, however, trust your instincts about people
and in fact one of the most common mistakes young founders make is not to do that enough
if someone seems slippery, or bogus, or a jerk, don't ignore it
this is one case where it pays to be self-indulgent
work with people you genuinely like, and you've known long enough to be sure
the second counterintuitive point is that it's not that important to know a lot about startups
mark zuckerberg didn't succeed because he was an expert on startups
if you don't know anything about, say, how to raise an angel round, don't feel bad on that account
that sort of thing you can learn when you need to, and forget after you've done it
" it would set off alarms
from the outside that seems like what startups do
we saw this happen so often that we made up a name for it: playing house
eventually i realized why it was happening
think about what you have to do to get into college, for example
extracurricular activities, check
even in college classes most of the work is as artificial as running laps
i'm not attacking the educational system for being this way
i confess i did it myself in college
it was like a game
then they want to know what the tricks are for growing fast
and we have to tell them the best way to do that is simply to make something people want
" and the partner replying "just
gaming the system may continue to work if you go to work for a big company
[2] but that doesn't work with startups
startups are as impersonal as physics
you have to make something people want, and you prosper only to the extent you do
the dangerous thing is, faking does work to some degree on investors
but it's not in your interest to
the company is ultimately doomed
all you're doing is wasting your own time riding it down
so stop looking for the trick
it's exciting that there even exist parts of the world where you win by doing good work
how do you win in each type of work, and what would you like to win by doing? [4]
all-consuming
that brings us to our fourth counterintuitive point: startups are all-consuming
if you start a startup, it will take over your life to a degree you cannot imagine
so there is a real opportunity cost here
larry page may seem to have an enviable life, but there are aspects of it that are unenviable
if he goes on vacation for even a week, a whole week's backlog of shit accumulates
it never gets any easier
the nature of the problems change
but the total volume of worry never decreases; if anything it increases
many of which will make you a better parent when you do have kids
and since you can delay pushing the button for a while, most people in rich countries do
to be fair, the universities have their hand forced here
a lot of incoming students are interested in startups
universities are, at least de facto, expected to prepare them for their careers
so students who want to start startups hope universities can teach them about startups
can universities teach students about startups? yes and no
[5] so starting a startup is intrinsically something you can only really learn by doing it
you may be nominally a student for a bit, but you won't even be that for long
do not start a startup in college
starting a startup is like a brutally fast depth-first search
most people should still be searching breadth-first at 20
if you start a startup at 20 and you're sufficiently successful, you'll never get to do it
mark zuckerberg will never get to bum around a foreign country
he can do other things most people can't, like charter jets to fly him to foreign countries
but success has taken a lot of the serendipity out of his life
facebook is running him as much as he's running facebook
among other things it gives you more options to choose your life's work from
there's not even a tradeoff here
should you do it at any age? i realize i've made startups sound pretty hard
if i haven't, let me try again: starting a startup is really hard
what if it's too hard? how can you tell if you're up to this challenge?
the answer is the fifth counterintuitive point: you can't tell
starting a startup will change you a lot
it was easy to tell how smart they were, and most people reading this will be over that threshold
the hard part was predicting how tough and ambitious they would become
the founders sometimes think they know
if you're absolutely terrified of starting a startup, you probably shouldn't do it
but if you're merely unsure whether you're up to it, the only way to find out is to try
just not now
for getting both is the same
i've written a whole essay on this, so i won't repeat it all here
the way to come up with good startup ideas is to take a step back
in fact, so unconsciously that you don't even realize at first that they're startup ideas
this is not only possible, it's how apple, yahoo, google, and facebook all got started
none of these companies were even meant to be companies at first
they were all just side projects
the third part, incidentally, is how you get cofounders at the same time as the idea
" but that prescription, though sufficient, is too narrow
what was special about brian chesky and joe gebbia was not that they were experts in technology
what kind of problems are those? that is very hard to answer in the general case
so how do you know when you're working on real stuff? [8]
i know how i know
y combinator itself was something i only did because it seemed interesting
so i seem to have some sort of internal compass that helps me out
but i don't know what other people have in their heads
and indeed, probably also the best way to live
you may not realize they're startup ideas, but you'll know they're something that ought to exist
he didn't mean it to be a startup, and he never tried to turn it into one
" it's the classic version of college as education for its own sake
the component of entrepreneurship that really matters is domain expertise
the way to become larry page was to become an expert on search
at its best, starting a startup is merely an ulterior motive for curiosity
and you'll do it best if you introduce the ulterior motive toward the end of the process
most startups that raise money do it more than once
reality can be messier
some companies raise money twice in phase 2
others skip phase 1 and go straight to phase 2
but the three phase path is at least the one about which individual startups' paths oscillate
this essay focuses on phase 2 fundraising
that problem is irreducible; it should be hard
but much of the other kind of difficulty can be eliminated
you can't trust your intuitions
i'm going to give you a set of rules here that will get you through this process if anything will
at certain moments you'll be tempted to ignore them
so rule number zero is: these rules exist for a reason
the ultimate source of the forces acting on you are the forces acting on investors
but that fast growth means investors can't wait around
if you wait till a startup is obviously a success, it's too late
but that in turn makes investors nervous they're about to invest in a flop
as indeed they often are
what investors would like to do, if they could, is wait
but if you wait too long, other investors might take the deal away from you
and of course the other investors are all subject to the same forces
don't raise money unless you want it and it wants you
actually it isn't
rapid growth is what makes a company a startup
the other time not to raise money is when you won't be able to
be in fundraising mode or not
one of the things that surprises founders most about fundraising is how distracting it is
when you start fundraising, everything else grinds to a halt
the problem is not the time fundraising consumes but that it becomes the top idea in your mind
a startup can't endure that level of distraction for long
because fundraising is so distracting, a startup should either be in fundraising mode or not
you can take money from investors when you're not in fundraising mode
you just can't expend any attention on it
there are two things that take attention: convincing investors, and negotiating with them
[3] the terms will be whatever they turn out to be in your next equity round
investors will try to lure you into fundraising when you're not
it's great for them if they can, because they can thereby get a shot at you before everyone else
they'll send you emails saying they want to meet to learn more about you
deals don't happen that way
they may say they just want to meet and chat, but investors never just want to meet and chat
get introductions to investors
before you can talk to investors, you have to be introduced to them
if you're presenting at a demo day, you'll be introduced to a whole bunch simultaneously
but even if you are, you should supplement these with intros you collect yourself
do you have to be introduced? in phase 2, yes
intros vary greatly in effectiveness
the best type of intro is from a well-known investor who has just invested in you
so when you get an investor to commit, ask them to introduce you to other investors they respect
[7] the next best type of intro is from a founder of a company they've funded
you can also get intros from other people in the startup community, like lawyers and reporters
there are now sites like angellist, fundersclub, and wefunder that can introduce you to investors
we recommend startups treat them as auxiliary sources of money
raise money first from leads you get yourself
those will on average be better investors
hear no till you hear yes
i mentioned earlier that investors prefer to wait if they can
what's particularly dangerous for founders is the way they wait
essentially, they lead you on
they seem like they're about to invest right up till the moment they say no
if they even say no
some of the worse ones never actually do say no; they just stop replying to your emails
they hope that way to get a free option on investing
that's not the worst thing investors will do
and wishful thinking founders are happy to meet them half way
fortunately, the next rule is a tactic for neutralizing this behavior
but to work it depends on you not being tricked by the no that sounds like yes
if you believe an investor has committed, get them to confirm it
and till they confirm, regard them as saying no
do breadth-first search weighted by expected value
when you talk to investors your m
should be breadth-first search, weighted by expected value
you should always talk to investors in parallel rather than serially
meet such investors last, if at all
but you have to be disciplined about assigning probabilities
you can't let how much you want an investor influence your estimate of how much they want you
know where you stand
never leave a meeting with an investor without asking what happens next
if you're experienced at negotiations, you already know how to ask such questions
[13] if you're not, there's a trick you can use in this situation
investors know you're inexperienced at raising money
inexperience there doesn't make you unattractive
larry and sergey were noobs at fundraising
get the first commitment
the biggest factor in most investors' opinions of you is the opinion of other investors
once you start getting investors to commit, it becomes increasingly easy to get more to
but the other side of this coin is that it's often hard to get the first commitment
getting the first substantial offer can be half the total difficulty of fundraising
what counts as a substantial offer depends on who it's from and how much it is
money from friends and family doesn't usually count, no matter how much
close committed money
it's not a deal till the money's in the bank
and it's also one that furnishes them plenty of excuses to gratify it
the public markets snap startup investing around like a whip
if the chinese economy blows up tomorrow, all bets are off
tomorrow a big competitor could appear, or you could get cded, or your cofounder could quit
even a day's delay can bring news that causes an investor to change their mind
so when someone commits, get the money
knowing where you stand doesn't end when they say they'll invest
inexperienced investors are the ones most likely to get buyer's remorse
but i've heard of cases of even top-tier vc firms welching on deals
avoid investors who don't "lead
some investors are known for deciding quickly, and those are extra valuable early on
conversely, an investor who will only invest once other investors have is worthless initially
you can recognize this contemptible subspecies of investor because they often talk about "leads
" they say that they don't lead, or that they'll invest once you have a lead
now there are rarely actual rounds before the a round, or leads for them
now startups simply raise money from investors one at a time till they feel they have enough
the spectral signature of all mediocre investors
have multiple plans
many investors will ask how much you're planning to raise
this question makes founders feel they should be planning to raise a specific amount
but in fact you shouldn't
it's a mistake to have fixed plans in an undertaking as unpredictable as fundraising
" i've known a handful of founders who could pull that off without having vcs laugh in their faces
different plans match different investors
$15k per month is high, so don't actually spend that much
but it's ok to use a high estimate when fundraising to add a margin for error
if you have additional expenses, like manufacturing, add in those at the end
underestimate how much you want
then when you reach $150k you're more than half done
whereas if you'd said you were raising $500k, you'd be less than a third done at $150k
if fundraising stalled there for an appreciable time, you'd start to read as a failure
saying initially that you're raising $250k doesn't limit you to raising that much
startups do that all the time
i'm not saying you should lie, but that you should lower your expectations initially
there is almost no downside in starting with a low number
it not only won't cap the amount you raise, but will on the whole tend to increase it
a good metaphor here is angle of attack
if you try to fly at too steep an angle of attack, you just stall
be profitable if you can
if you can make it to profitability without raising any additional money
there are many analogies between fundraising and dating, and this is one of the strongest
no one wants you if you seem desperate
and the best way not to seem desperate is not to be desperate
and they are then surprised how difficult and unpleasant it is
of course not all startups can make it to ramen profitability in a few months
don't optimize for valuation
founders who raise money at high valuations tend to be unduly proud of it
this is stupid, because fundraising is not the test that matters
the real test is revenue
fundraising is just a means to that end
being proud of how well you did at fundraising is like being proud of your college grades
number two is good investors
valuation is at best third
the empirical evidence shows just how unimportant it is
6 million respectively
so let that satisfy your competitiveness
you're doing better than dropbox and airbnb at a test that doesn't matter
it will be easier to raise money at a lower valuation
it shouldn't be, but it is
but although it's a mistake for investors to care about price, a significant number do
yesno before valuation
some investors want to know what your valuation is before they even talk to you about investing
fortunately there is a way to avoid naming a price in this situation
and it is not just a negotiating trick; it's how you (both) should be operating
then if they decide they do want to invest, you can figure out a price
but first things first
this is a safe technique so long as you combine it with the next one
beware "valuation sensitive" investors
occasionally you'll encounter investors who describe themselves as "valuation sensitive
you should therefore never approach such investors first
this way, you'll not only get market price, but it will also take less time
so you'd only want to talk to this sort of investor if you were about to do that anyway
if you're surprised by a lowball offer, treat it as a backup offer and delay responding to it
but lowballing you is a dick move that should be met with the corresponding countermove
accept offers greedily
a greedy algorithm takes the best of the options in front of it right now
and that is how startups should approach fundraising in phases 2 and later
if someone makes you an acceptable offer, take it
if you have multiple incompatible offers, take the best
don't reject an acceptable offer in the hope of getting a better one in the future
these simple rules cover a wide variety of cases
if you're raising money from many investors, roll them up as they say yes
as you start to feel you've raised enough, the threshold for acceptable will start to get higher
in practice offers exist for stretches of time, not points
so when you get an acceptable offer that would be incompatible with others (e
this could lose you some that might have made an offer if they had more time
but by definition you don't care; the initial offer was acceptable
a deadline of three working days is acceptable
you shouldn't need more than that if you've been talking to investors in parallel
but a deadline any shorter is a sign you're dealing with a sketchy investor
you can usually call their bluff, and you may need to
but if it does, "get the best investors" is in the average case bad advice
the best investors are also the most selective, because they get their pick of all the startups
(the situation is different in phase 1
there's no practical difficulty
if the smaller investments are on convertible notes, they'll just convert into the series a round
till they do, you don't know for sure they will, and the greedy algorithm tells you what to do
don't sell more than 25% in phase 2
if you do well, you will probably raise a series a round eventually
i say probably because things are changing with series a rounds
startups may start to skip them
which means you should avoid doing things in earlier rounds that will mess up raising an a round
guess conservatively
have one person handle fundraising
(if the founders mistrust one another, this could cause some friction
even if the ceo is a programmer and another founder is a salesperson? yes
but wait till that point
you'll need an executive summary and (maybe) a deck
traditionally phase 2 fundraising consists of presenting a slide deck in person to investors
a lot of the most successful startups we fund never make decks in phase 2
they just talk to investors and explain what they plan to do
but don't refuse on that account to give copies to investors you meet
you just have to treat such leaks as a cost of doing business
in practice it's not that high a cost
i wouldn't do that
it's a sign they're not really interested
stop fundraising when it stops working
when do you stop fundraising? ideally when you've raised enough
but what if you haven't raised as much as you'd like? when do you give up?
when your fundraising options run out, they usually run out in the same way
don't keep sucking on the straw if you're just getting air
it's not going to get better
don't get addicted to fundraising
the work at an early stage startup often consists of unglamorous schleps
whereas fundraising, when it's going well, can be quite the opposite
the danger of fundraising is particularly acute for people who are good at it
it's always fun to work on something you're good at
if you're one of these people, beware
fundraising is not what will make your company successful
listening to users complain about bugs in your software is what will make you successful
startups can be destroyed by this
don't raise too much
though only a handful of startups have to worry about this, it is possible to raise too much
the dangers of raising too much are subtle but insidious
one is that it will set impossibly high expectations
a company's valuation is expected to rise each time it raises money
if not it's a sign of a company in trouble, which makes you unattractive to investors
and you have to be doing really, really well to raise money at $50 million
but the money itself may be more dangerous than the valuation
so if you do raise a huge amount of money, don't spend it
startups raising money occasionally alienate investors by seeming arrogant
it's a mistake to behave arrogantly to investors
the only safe strategy is never to seem arrogant at all
so you must cushion the blow with soft words
at yc we tell startups they can blame us
and now that i've written this, everyone else can blame me if they want
the danger of behaving arrogantly is greatest when you're doing well
when everyone wants you, it's hard not to let it go to your head
especially if till recently no one wanted you
but restrain yourself
the startup world is a small place, and startups have lots of ups and downs
this is a domain where it's more true than usual that pride goeth before a fall
be nice when investors reject you as well
the best investors are not wedded to their initial opinion of you
if they reject you in phase 2 and you end up doing well, they'll often invest in phase 3
in fact investors who reject you are some of your warmest leads for future fundraising
any investor who spent significant time deciding probably came close to saying yes
the bar will be higher next time
assume the money you raise in phase 2 will be the last you ever raise
you must make it to profitability on this money if you can
this is probably the optimal strategy for investors
it's too hard to pick winners early on
better to let the market do it for you
but it often comes as a surprise to startups how much harder it is to raise money in phase 3
the next time you raise money, the experiment has to have worked
you have to be on a trajectory that leads to going public
and while there are some ideas where the proof that the experiment worked might consist of e
query response times, usually the proof is profitability
usually phase 3 fundraising has to be type a fundraising
in practice there are two ways startups hose themselves between phases 2 and 3
some are just too slow to become profitable
they raise enough money to last for two years
there doesn't seem any particular urgency to be profitable
so they don't make any effort to make money for a year
but by that time, not making money has become habitual
when they finally decide to try, they find they can't
the other way companies hose themselves is by letting their expenses grow too fast
which almost always means hiring too many people
you usually shouldn't go out and hire 8 people as soon as you raise money at phase 2
usually you want to wait till you have growth (and thus usually revenues) to justify them
a lot of vcs will encourage you to hire aggressively
don't listen to them
don't make things complicated
that's fundraising in one sentence
don't introduce complicated optimizations, and don't let investors introduce complications either
fundraising is not what will make you successful
it's just a means to an end
be good, take care of yourselves, and don't leave the path
the biggest component in most investors' opinion of you is the opinion of other investors
which is of course a recipe for exponential growth
but actually the two are not that highly correlated
if you understand them, you can at least avoid being surprised
raising money decreases the risk of failure