forked from northanapon/learning_lstm
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathWordIndexer.lua
71 lines (59 loc) · 1.24 KB
/
WordIndexer.lua
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
--[[
Bi-directional indexer for word <-> index
]]--
local WordIndexer = torch.class('WordIndexer')
function WordIndexer:__init()
self.w2idx = {}
self.idx2w = {}
self.size = 0
self.unk_idx = -1
end
function WordIndexer:__len()
return self.size
end
function WordIndexer:contain(w)
if self.w2idx[w] then
return true
end
return false
end
function WordIndexer:set(idx, w)
if not self.w2idx[w] then
self.size = self.size + 1
end
self.w2idx[w] = idx
self.idx2w[idx] = w
end
function WordIndexer:index(w)
local idx = self.w2idx[w]
if not idx then
return self.unk_idx
end
return idx
end
function WordIndexer:indexes(words)
local indexes = torch.IntTensor(#words)
for i = 1, #words do
indexes[i] = self:index(words[i])
end
return indexes
end
function WordIndexer:word(i)
return self.idx2w[i]
end
function WordIndexer:words(indexes)
local words = {}
for i = 1, #indexes do
words[i] = self:word(indexes[i])
end
return words
end
function WordIndexer:add(w)
if self.w2idx[w] then
return self.w2idx[w]
end
self.size = self.size + 1
self.w2idx[w] = self.size
self.idx2w[self.size] = w
return self.size
end