Skip to content

Commit

Permalink
Now the parsing model can continue by choosing the first legal action…
Browse files Browse the repository at this point in the history
… from a ranked list
  • Loading branch information
jiyfeng committed Sep 27, 2015
1 parent f7d6e98 commit d1f10ee
Show file tree
Hide file tree
Showing 13 changed files with 891 additions and 180 deletions.
40 changes: 30 additions & 10 deletions code/model.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
## model.py
## Author: Yangfeng Ji
## Date: 09-09-2014
## Time-stamp: <yangfeng 09/24/2015 15:59:14>
## Time-stamp: <yangfeng 09/27/2015 12:32:37>

""" As a parsing model, it includes the following functions
1, Mini-batch training on the data generated by the Data class
Expand All @@ -16,6 +16,7 @@
from tree import RSTTree
from util import *
from datastructure import ActionError
from operator import itemgetter
import gzip, sys

class ParsingModel(object):
Expand Down Expand Up @@ -73,6 +74,24 @@ def predict(self, features):
return self.labelmap[label[0]]


def rank_labels(self, features):
""" Rank the decision label with their confidence
value
"""
vec = vectorize(features, self.vocab,
self.dpvocab, self.projmat)
vals = self.clf.decision_function(vec)
# print vals.shape
# print len(self.labelmap)
labelvals = {}
for idx in range(len(self.labelmap)):
labelvals[self.labelmap[idx]] = vals[0,idx]
sortedlabels = sorted(labelvals.items(), key=itemgetter(1),
reverse=True)
labels = [item[0] for item in sortedlabels]
return labels


def savemodel(self, fname):
""" Save model and vocab
"""
Expand Down Expand Up @@ -117,15 +136,16 @@ def sr_parse(self, doc, bcvocab=None):
# same arguments as in data generation part
fg = FeatureGenerator(stack, queue, doc, bcvocab)
feat = fg.features()
label = self.predict(feat)
action = label2action(label)
# The best choice here is to choose the first
# legal action
try:
srparser.operate(action)
except ActionError:
print "Parsing action error with {}".format(action)
sys.exit()
# label = self.predict(feat)
labels = self.rank_labels(feat)
for label in labels:
action = label2action(label)
try:
srparser.operate(action)
break
except ActionError:
# print "Parsing action error with {}".format(action)
pass
tree = srparser.getparsetree()
rst = RSTTree()
rst.asign_tree(tree)
Expand Down
Binary file modified code/model.pyc
Binary file not shown.
6 changes: 6 additions & 0 deletions tmp/8Sep2005Obama665.txt.brackets
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
((1, 1), 'Nucleus', 'span')
((2, 2), 'Satellite', 'elaboration')
((1, 2), 'Nucleus', 'list')
((3, 3), 'Satellite', 'attribution')
((4, 4), 'Nucleus', 'span')
((3, 4), 'Nucleus', 'list')
357 changes: 357 additions & 0 deletions tmp/8Sep2005Obama665.txt.merge

Large diffs are not rendered by default.

Binary file renamed tmp/doc.txt.ps → tmp/8Sep2005Obama665.txt.ps
Binary file not shown.
26 changes: 26 additions & 0 deletions tmp/8Sep2006Obama502.txt.brackets
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
((1, 1), 'Nucleus', 'span')
((2, 2), 'Satellite', 'attribution')
((3, 3), 'Nucleus', 'span')
((4, 4), 'Nucleus', 'span')
((5, 5), 'Nucleus', 'span')
((6, 6), 'Satellite', 'attribution')
((7, 7), 'Nucleus', 'span')
((8, 8), 'Satellite', 'attribution')
((9, 9), 'Nucleus', 'contrast')
((10, 10), 'Nucleus', 'span')
((11, 11), 'Nucleus', 'span')
((12, 12), 'Nucleus', 'span')
((13, 13), 'Satellite', 'elaboration')
((12, 13), 'Satellite', 'elaboration')
((11, 13), 'Satellite', 'elaboration')
((10, 13), 'Nucleus', 'span')
((14, 14), 'Satellite', 'elaboration')
((10, 14), 'Nucleus', 'contrast')
((9, 14), 'Nucleus', 'span')
((8, 14), 'Satellite', 'elaboration')
((7, 14), 'Nucleus', 'span')
((6, 14), 'Satellite', 'elaboration')
((5, 14), 'Satellite', 'elaboration')
((4, 14), 'Satellite', 'elaboration')
((3, 14), 'Nucleus', 'span')
((2, 14), 'Satellite', 'elaboration')
Loading

0 comments on commit d1f10ee

Please sign in to comment.