Skip to content

Commit f6e636d

Browse files
author
remi.paucher
committed
Code pour la cross-validation
1 parent b43ff79 commit f6e636d

File tree

5 files changed

+107
-33
lines changed

5 files changed

+107
-33
lines changed

Hotmail_2_GenerateDB.py

Lines changed: 9 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@ def Myexcepthook(type, value, tb):
2323
GENERATE_VALIDATION_SET = False
2424

2525
FONTS = [
26-
("Fonts/califb.ttf", (180,)),
27-
("Fonts/georgia.ttf", (180,)),
26+
("Fonts/califb.ttf", (180,)),
27+
#("Fonts/georgia.ttf", (180,)),
2828
("Fonts/sylfaen.ttf", (180,)),
2929
#("Fonts/BKANT.TTF", (180,)),
3030
]
@@ -37,19 +37,18 @@ def Myexcepthook(type, value, tb):
3737
##############################################################################
3838
"""
3939

40-
#GENERATE_CAPITAL_LETTERS = True
41-
#GENERATE_DIGITS = True
42-
#elem_to_gen = Generate_Element_List(GENERATE_CAPITAL_LETTERS, GENERATE_DIGITS)
43-
44-
#elem_to_gen = ['B', 'D', 'E', 'M', 'N', 'T', 'U', '2', '3', '8']
45-
elem_to_gen = '3DE2MT'
40+
GENERATE_CAPITAL_LETTERS = True
41+
GENERATE_DIGITS = True
42+
elem_to_gen = Generate_Element_List(GENERATE_CAPITAL_LETTERS, GENERATE_DIGITS)
43+
44+
#elem_to_gen = '3DE2MT'
4645

4746
DESTINATION_FOLDER = 'Hotmail/DBTraining'
4847
CLEAN_DESTINATION_FOLDER = True
4948
DISTORTION_W_MIN = 0
50-
DISTORTION_W_MAX = 8
49+
DISTORTION_W_MAX = 7
5150
DISTORTION_H_MIN = 0
52-
DISTORTION_H_MAX = 8
51+
DISTORTION_H_MAX = 7
5352
SCALE_MIN = 25
5453
SCALE_MAX = 29
5554
STEP = 1
@@ -90,11 +89,3 @@ def Myexcepthook(type, value, tb):
9089
Generate_Set(DESTINATION_FOLDER,CLEAN_DESTINATION_FOLDER,DISTORTION_W_MIN,DISTORTION_W_MAX,DISTORTION_H_MIN,
9190
DISTORTION_H_MAX,SCALE_MIN,SCALE_MAX,STEP, elem_to_gen, FONTS, ALIGN_RANGEX, ALIGN_RANGEY, DEFAULT_SIZE)
9291

93-
94-
95-
96-
97-
98-
99-
100-

Hotmail_4_GUI.py

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,16 @@
1-
#!coding: utf-8
2-
3-
MODEL_FOLDER = "Hotmail/Models"
4-
DEFAULT_MODEL_FILE="model.svm"
5-
6-
CAPTCHA_FOLDER = "Hotmail/Rough Captchas"
7-
8-
DEFAULT_CAPTCHA_FILE = "Image011.jpg"
9-
10-
execfile("GUI.py")
11-
12-
13-
14-
15-
1+
#!coding: utf-8
2+
#!coding: utf-8
3+
4+
MODEL_FOLDER = "Hotmail/Models"
5+
DEFAULT_MODEL_FILE="model.svm"
6+
7+
CAPTCHA_FOLDER = "Hotmail/Rough Captchas"
8+
9+
DEFAULT_CAPTCHA_FILE = "Image011.jpg"
10+
11+
execfile("GUI.py")
12+
13+
14+
15+
16+

Hotmail_5_ComputeScores.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -156,7 +156,10 @@ def get_prediction(model, captcha, parent):
156156
for x in segs:
157157
for y in xrange(0, h):
158158
segmented_captcha.putpixel((x*parent.zoom, y), (255,0,0))
159+
159160
parent.SetGraphImage(segmented_captcha)
161+
parent.actif = False
162+
parent.launchPredictionButton.SetLabel("Lancer la prédiction")
160163

161164

162165

SVM_cross_validation.py

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
#!coding: utf-8
2+
from svm import *
3+
import os, sys
4+
import Image
5+
6+
import psyco
7+
psyco.full()
8+
9+
from Preprocess import load_image
10+
from cross_validation import *
11+
12+
13+
#TRACEBACK
14+
import traceback
15+
import sys
16+
def Myexcepthook(type, value, tb):
17+
lines=traceback.format_exception(type, value, tb)
18+
f=open('log.txt', 'a')
19+
f.write("\n".join(lines))
20+
f.close()
21+
print lines
22+
raw_input()
23+
sys.excepthook=Myexcepthook
24+
25+
26+
print "Loading characters..."
27+
28+
TRAINING_FOLDER = 'Egoshare/DBTraining-Captcha_based'
29+
30+
labels = []
31+
samples = []
32+
33+
for folder, subfolders, files in os.walk(TRAINING_FOLDER):
34+
if folder[0] != ".":
35+
loaded = False
36+
for file in [file for file in files if 'bmp' in file]:
37+
if not loaded:
38+
print "Loading ", folder
39+
loaded = True
40+
im = Image.open(os.path.join(folder, file))
41+
labels.append(ord(folder[-1])-65)
42+
samples.append(map(lambda e:e/255., list(im.getdata())))
43+
print "Loading done."
44+
45+
46+
print "\nStarting cross-validation..."
47+
rates = []
48+
CRANGE = [1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096, 8192, 16384, 32768, 65536]
49+
for C in CRANGE:
50+
param = svm_parameter(kernel_type = RBF, C=C, probability = 1) #, gamma=1./(2*1.25**2))
51+
rate = do_cross_validation(samples, labels, param, 10)
52+
53+
f=open('cross-validation_results.txt', 'a')
54+
f.write("C="+str(C)+"\tgamma=default\t=>\t"+str(rate)+'\n')
55+
f.close()
56+
57+
rates.append(rate)
58+
59+
60+
print "Cross-validation done\n"
61+
print "SUCCES RATES: ", rates
62+
print "\nOPTIMAL PARAMETERS: "
63+
index = rates.index(max(rates))
64+
print "C = ", CRANGE[index]
65+
print "Optimal success rate: ", max(rates)
66+
67+
raw_input()
68+

cross-validation_results.txt

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
HOTMAIL:
2+
--------
3+
#Cross-validation sur 580 caract�res
4+
C=1 gamma=default => 97.2222222222
5+
C=10 gamma=default => 99.1319444444
6+
C=100 gamma=default => 98.9583333333
7+
C=1000 gamma=default => 98.7847222222
8+
C=10000 gamma=default => 99.1319444444
9+
10+
EGOSHARE (captcha-based):
11+
-------------------------

0 commit comments

Comments
 (0)