Skip to content

Commit 67a2ef7

Browse files
committed
Update
1 parent 25dd54e commit 67a2ef7

File tree

4 files changed

+60
-10
lines changed

4 files changed

+60
-10
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# text-globalmaxpool-visualization
22

3-
NLP中,我们常常使用AttentionPooling对词向量序列进行聚合作为句向量的表示,在[text-attentionpooling-visualization](https://github.com/allenwind/text-attentionpooling-visualization)分析过,有不错的效果。此外,还常常使用MaxPooling对词向量序列进行聚合(句向量),如Tensorflow中的`tf.keras.layers.GlobalMaxPool1D`这里尝试可视化MaxPooling的效果。一个词向量序列可以用矩阵表示,其维度为(maxlen, hdims),MaxPooling操作后,变为向量,可以看做句向量的表示,其维度为(1, hdims)。hdims每个值对应着某个词向量的最大值,把其作为该词在下游任务中重要性的权重,并通过可视化来观察是否具有预期的效果。
3+
NLP中,我们常常使用AttentionPooling对词向量序列进行聚合作为句向量的表示,在[text-attentionpooling-visualization](https://github.com/allenwind/text-attentionpooling-visualization)分析过,有不错的效果。此外,还常常使用MaxPooling和AveragePooling对词向量序列进行聚合(句向量),其中AveragePooling可以看做是AttentionPooling的特例。这里只探讨MaxPooling,如Tensorflow中的`tf.keras.layers.GlobalMaxPool1D`这里尝试可视化MaxPooling的效果。一个词向量序列可以用矩阵表示,其维度为(maxlen, hdims),MaxPooling操作后,变为向量,可以看做句向量的表示,其维度为(1, hdims)。hdims每个值对应着某个词向量的最大值,把其作为该词在下游任务中重要性的权重,并通过可视化来观察是否具有预期的效果。
44

55

66

dataset.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,14 +7,21 @@
77
import numpy as np
88

99
_THUCNews = "/home/zhiwen/workspace/dataset/THUCNews-title-label.txt"
10-
def load_THUCNews_title_label(file=_THUCNews):
10+
def load_THUCNews_title_label(file=_THUCNews, nobrackets=True):
1111
with open(file, encoding="utf-8") as fd:
1212
text = fd.read()
1313
lines = text.split("\n")[:-1]
14+
np.random.shuffle(lines)
1415
titles = []
1516
labels = []
1617
for line in lines:
1718
title, label = line.split("\t")
19+
if not title:
20+
continue
21+
22+
# 去掉括号内容
23+
if nobrackets:
24+
title = re.sub("\(.+?\)", lambda x:"", title)
1825
titles.append(title)
1926
labels.append(label)
2027
categoricals = list(set(labels))

model.py

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,15 +21,15 @@
2121
from dataset import load_weibo_senti_100k
2222
from dataset import load_simplifyweibo_4_moods
2323

24-
X, y, classes = load_weibo_senti_100k()
24+
X, y, classes = load_THUCNews_title_label()
2525
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.8, random_state=7384672)
2626

2727
num_classes = len(classes)
2828
tokenizer = SimpleTokenizer()
2929
tokenizer.fit(X_train)
3030
X_train = tokenizer.transform(X_train)
3131

32-
# maxlen = 48
32+
maxlen = 48
3333
maxlen = find_best_maxlen(X_train)
3434

3535
X_train = sequence.pad_sequences(
@@ -38,7 +38,7 @@
3838
dtype="int32",
3939
padding="post",
4040
truncating="post",
41-
value=0
41+
value=0.0
4242
)
4343
y_train = tf.keras.utils.to_categorical(y_train)
4444

@@ -48,8 +48,9 @@
4848
inputs = Input(shape=(maxlen,))
4949
mask = Lambda(lambda x: tf.not_equal(x, 0))(inputs)
5050
x = Embedding(num_words, embedding_dims,
51-
embeddings_initializer="glorot_normal",
52-
input_length=maxlen)(inputs)
51+
embeddings_initializer="normal",
52+
input_length=maxlen,
53+
mask_zero=True)(inputs)
5354
x = Dropout(0.2)(x)
5455
x = Conv1D(filters=128,
5556
kernel_size=3,
@@ -71,7 +72,7 @@
7172
model_w_outputs = Model(inputs, w)
7273

7374
batch_size = 32
74-
epochs = 10
75+
epochs = 2
7576
callbacks = []
7677
model.fit(X_train, y_train,
7778
batch_size=batch_size,

pooling.py

Lines changed: 44 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import tensorflow as tf
22

3-
43
class MaskGlobalMaxPooling1D(tf.keras.layers.Layer):
54

65
def __init__(self, **kwargs):
@@ -11,11 +10,54 @@ def call(self, inputs, mask=None):
1110
mask = 1
1211
else:
1312
# 扩展维度便于广播
14-
mask = tf.expand_dims(tf.cast(mask, "float32"), -1)
13+
mask = tf.expand_dims(tf.cast(mask, tf.float32), -1)
1514
x = inputs
1615
x = x - (1 - mask) * 1e12 # 用一个大的负数mask
1716
x = tf.reduce_max(x, axis=1, keepdims=True)
1817
ws = tf.where(inputs == x, x, 0.0)
1918
ws = tf.reduce_sum(ws, axis=2)
2019
x = tf.squeeze(x, axis=1)
2120
return x, ws
21+
22+
class MaskGlobalAveragePooling1D(tf.keras.layers.Layer):
23+
24+
def __init__(self, **kwargs):
25+
super(MaskGlobalAveragePooling1D, self).__init__(**kwargs)
26+
27+
def call(self, inputs, mask=None):
28+
if mask is None:
29+
mask = 1
30+
else:
31+
mask = tf.expand_dims(tf.cast(mask, tf.float32), -1)
32+
x = inputs
33+
x = x * mask
34+
x = tf.reduce_sum(x, axis=1)
35+
x = x / tf.reduce_sum(mask, axis=1)
36+
ws = tf.square(inputs - tf.expand_dims(x, axis=1))
37+
ws = tf.reduce_mean(ws, axis=2)
38+
ws = ws + (1 - mask) * 1e12
39+
ws = 1 / ws
40+
return x, ws
41+
42+
class MinVariancePooling(tf.keras.layers.Layer):
43+
"""最小方差加权平均,Inverse-variance weighting
44+
等价于正太分布的最小熵加权平均"""
45+
46+
def __init__(self, **kwargs):
47+
super(MinVariancePooling, self).__init__(**kwargs)
48+
49+
def build(self, input_shape):
50+
d = tf.cast(input_shape[2], tf.float32)
51+
self.alpha = 1 / (d - 1)
52+
53+
def call(self, inputs, mask=None):
54+
if mask is None:
55+
mask = 1
56+
else:
57+
mask = tf.expand_dims(tf.cast(mask, tf.float32), -1)
58+
mu = tf.reduce_mean(inputs, axis=2, keepdims=True) # 均值
59+
var = self.alpha * tf.reduce_sum(tf.square(inputs - mu), axis=2, keepdims=True) # 方差的无偏估计
60+
var = var + (1 - mask) * 1e12 # 倒数的mask处理
61+
ivar = 1 / var
62+
ws = ivar / tf.reduce_sum(ivar, axis=1, keepdims=True)
63+
return tf.reduce_sum(inputs * ws * mask, axis=1), ws

0 commit comments

Comments
 (0)