Now the parsing model can continue by choosing the first legal action…

… from a ranked list
jiyfeng · Sep 27, 2015 · d1f10ee · d1f10ee
1 parent f7d6e98
commit d1f10ee
Show file tree

Hide file tree

Showing 13 changed files with 891 additions and 180 deletions.
diff --git a/code/model.py b/code/model.py
@@ -1,7 +1,7 @@
 ## model.py
 ## Author: Yangfeng Ji
 ## Date: 09-09-2014
-## Time-stamp: <yangfeng 09/24/2015 15:59:14>
+## Time-stamp: <yangfeng 09/27/2015 12:32:37>
 
 """ As a parsing model, it includes the following functions
 1, Mini-batch training on the data generated by the Data class
@@ -16,6 +16,7 @@
 from tree import RSTTree
 from util import *
 from datastructure import ActionError
+from operator import itemgetter
 import gzip, sys
 
 class ParsingModel(object):
@@ -73,6 +74,24 @@ def predict(self, features):
         return self.labelmap[label[0]]
 
 
+    def rank_labels(self, features):
+        """ Rank the decision label with their confidence
+            value
+        """
+        vec = vectorize(features, self.vocab,
+                        self.dpvocab, self.projmat)
+        vals = self.clf.decision_function(vec)
+        # print vals.shape
+        # print len(self.labelmap)
+        labelvals = {}
+        for idx in range(len(self.labelmap)):
+            labelvals[self.labelmap[idx]] = vals[0,idx]
+        sortedlabels = sorted(labelvals.items(), key=itemgetter(1),
+                              reverse=True)
+        labels = [item[0] for item in sortedlabels]
+        return labels
+
+
     def savemodel(self, fname):
         """ Save model and vocab
         """
@@ -117,15 +136,16 @@ def sr_parse(self, doc, bcvocab=None):
             # same arguments as in data generation part
             fg = FeatureGenerator(stack, queue, doc, bcvocab)
             feat = fg.features()
-            label = self.predict(feat)
-            action = label2action(label)
-            # The best choice here is to choose the first
-            #   legal action
-            try:
-                srparser.operate(action)
-            except ActionError:
-                print "Parsing action error with {}".format(action)
-                sys.exit()
+            # label = self.predict(feat)
+            labels = self.rank_labels(feat)
+            for label in labels:
+                action = label2action(label)
+                try:
+                    srparser.operate(action)
+                    break
+                except ActionError:
+                    # print "Parsing action error with {}".format(action)
+                    pass
         tree = srparser.getparsetree()
         rst = RSTTree()
         rst.asign_tree(tree)

diff --git a/code/model.pyc b/code/model.pyc
diff --git a/tmp/8Sep2005Obama665.txt.brackets b/tmp/8Sep2005Obama665.txt.brackets
@@ -0,0 +1,6 @@
+((1, 1), 'Nucleus', 'span')
+((2, 2), 'Satellite', 'elaboration')
+((1, 2), 'Nucleus', 'list')
+((3, 3), 'Satellite', 'attribution')
+((4, 4), 'Nucleus', 'span')
+((3, 4), 'Nucleus', 'list')
diff --git a/tmp/8Sep2005Obama665.txt.merge b/tmp/8Sep2005Obama665.txt.merge
diff --git a/tmp/doc.txt.ps → tmp/8Sep2005Obama665.txt.ps b/tmp/doc.txt.ps → tmp/8Sep2005Obama665.txt.ps
diff --git a/tmp/8Sep2006Obama502.txt.brackets b/tmp/8Sep2006Obama502.txt.brackets
@@ -0,0 +1,26 @@
+((1, 1), 'Nucleus', 'span')
+((2, 2), 'Satellite', 'attribution')
+((3, 3), 'Nucleus', 'span')
+((4, 4), 'Nucleus', 'span')
+((5, 5), 'Nucleus', 'span')
+((6, 6), 'Satellite', 'attribution')
+((7, 7), 'Nucleus', 'span')
+((8, 8), 'Satellite', 'attribution')
+((9, 9), 'Nucleus', 'contrast')
+((10, 10), 'Nucleus', 'span')
+((11, 11), 'Nucleus', 'span')
+((12, 12), 'Nucleus', 'span')
+((13, 13), 'Satellite', 'elaboration')
+((12, 13), 'Satellite', 'elaboration')
+((11, 13), 'Satellite', 'elaboration')
+((10, 13), 'Nucleus', 'span')
+((14, 14), 'Satellite', 'elaboration')
+((10, 14), 'Nucleus', 'contrast')
+((9, 14), 'Nucleus', 'span')
+((8, 14), 'Satellite', 'elaboration')
+((7, 14), 'Nucleus', 'span')
+((6, 14), 'Satellite', 'elaboration')
+((5, 14), 'Satellite', 'elaboration')
+((4, 14), 'Satellite', 'elaboration')
+((3, 14), 'Nucleus', 'span')
+((2, 14), 'Satellite', 'elaboration')