|
2 | 2 | import os
|
3 | 3 | import sys
|
4 | 4 |
|
5 |
| -from absl.testing import parameterized |
6 | 5 | from tensorflow.python.distribute import multi_process_lib
|
7 | 6 | import multiprocessing
|
8 | 7 | import tensorflow as tf
|
@@ -73,79 +72,119 @@ def tearDownClass(cls):
|
73 | 72 | super(ParameterServerStrategyV2Test, cls).tearDownClass()
|
74 | 73 | cls.cluster.stop()
|
75 | 74 |
|
76 |
| - #@parameterized.parameters(True, False) |
77 |
| - def testPerWorkerVariableCreation(self): |
| 75 | + def testPerWorkerTraining(self): |
78 | 76 | var_dtype = tf.dtypes.float32
|
79 | 77 | var_name = 'var'
|
80 |
| - shape = [1] #if define_shape else None |
81 |
| - |
82 |
| - # with self.strategy.scope(): |
83 |
| - var = variables.Variable(initial_value=[0.0], |
84 |
| - shape=shape, |
85 |
| - dtype=var_dtype, |
86 |
| - name=var_name, |
87 |
| - per_worker_de_variable=True) |
88 |
| - |
89 |
| - # Use per-worker variable as a capture |
90 |
| - @def_function.function |
91 |
| - def worker_fn(): |
92 |
| - var.assign_add(constant_op.constant([1.0])) |
93 |
| - return var |
94 |
| - |
95 |
| - num_closures = 10 |
96 |
| - for ix in range(num_closures): |
97 |
| - self.coordinator.schedule(worker_fn) |
98 |
| - # Read the PWV many times to ensure result is up-to-date |
99 |
| - self.coordinator.join() |
100 |
| - result_sum = sum(var.read_all()).numpy() |
101 |
| - self.assertEqual(result_sum, ix + 1) |
102 |
| - |
103 |
| - for _ in range(num_closures): |
104 |
| - self.coordinator.schedule(worker_fn) |
105 |
| - self.coordinator.join() |
106 |
| - |
107 |
| - # Verify placement of variables |
108 |
| - devices = [wv._get_values().device for wv in var._per_worker_vars._values] |
109 |
| - expected_devices = [ |
110 |
| - f'/job:worker/replica:0/task:{ix}/device:CPU:0' |
111 |
| - for ix in range(self.strategy._num_workers) |
112 |
| - ] # pylint: disable=protected-access |
113 |
| - self.assertAllEqual(devices, expected_devices) |
114 |
| - |
115 |
| - result_sum = sum(var.read_all()).numpy() |
116 |
| - self.assertEqual(result_sum, num_closures * 2) |
117 |
| - |
118 |
| - def testKerasFit(self): |
119 |
| - embed_dim = 8 |
| 78 | + shape = [1] |
120 | 79 | with self.strategy.scope():
|
121 |
| - model = Sequential([ |
122 |
| - layers.Input(shape=(1,), dtype=tf.int32), |
123 |
| - de.keras.layers.Embedding(embed_dim, key_dtype=tf.int32), |
124 |
| - layers.Flatten(), |
125 |
| - layers.Dense(1, activation='sigmoid') |
126 |
| - ]) |
127 |
| - optimizer = Adam(1E-3) |
128 |
| - optimizer = de.DynamicEmbeddingOptimizer(optimizer) |
129 |
| - model.compile(loss='binary_crossentropy', |
130 |
| - optimizer=optimizer, |
131 |
| - metrics=['accuracy']) |
132 |
| - |
133 |
| - ids = np.random.randint(0, 100, size=(64 * 2, 1)) |
134 |
| - labels = np.random.randint(0, 2, size=(64 * 2, 1)) |
135 |
| - |
136 |
| - def dataset_fn(input_context): |
137 |
| - global_batch_size = 32 |
138 |
| - batch_size = input_context.get_per_replica_batch_size(global_batch_size) |
139 |
| - dataset = tf.data.Dataset.from_tensor_slices((ids, labels)) |
140 |
| - dataset = dataset.shard(input_context.num_input_pipelines, |
141 |
| - input_context.input_pipeline_id) |
142 |
| - dataset = dataset.batch(batch_size).repeat() |
143 |
| - return dataset |
144 |
| - |
145 |
| - dataset = self.strategy.distribute_datasets_from_function(dataset_fn) |
146 |
| - |
147 |
| - history = model.fit(dataset, epochs=1, steps_per_epoch=len(ids) // 64) |
148 |
| - self.assertIn('loss', history.history) |
| 80 | + var = variables.Variable(initial_value=[0.0], |
| 81 | + shape=shape, |
| 82 | + dtype=var_dtype, |
| 83 | + name=var_name, |
| 84 | + per_worker_variable=True) |
| 85 | + var._trainable = True |
| 86 | + with backprop.GradientTape(persistent=True) as tape: |
| 87 | + |
| 88 | + # 定义训练步骤 |
| 89 | + @tf.function |
| 90 | + def train_step(): |
| 91 | + with tf.GradientTape() as tape: |
| 92 | + # var._maybe_create_per_worker_vars() |
| 93 | + value = var.read_value() |
| 94 | + # if not var.trainable: |
| 95 | + tape.watch(value) # still need this with var._trainable = True set. |
| 96 | + y = value * 2.0 |
| 97 | + grad = tape.gradient(y, value) |
| 98 | + return grad |
| 99 | + |
| 100 | + @tf.function |
| 101 | + def train_step2(): |
| 102 | + with tf.GradientTape() as tape: |
| 103 | + var._maybe_create_per_worker_vars() |
| 104 | + value = var.value() |
| 105 | + # if not var.trainable: |
| 106 | + tape.watch(value) # still need this with var._trainable = True set. |
| 107 | + y = value * 2.0 |
| 108 | + grad = tape.gradient(y, value) |
| 109 | + return grad |
| 110 | + |
| 111 | + # 运行并检查结果 |
| 112 | + grads = self.strategy.run(train_step2) |
| 113 | + print(f"grads :{grads}") |
| 114 | + print(f"var.read_all() {var.read_all()}") |
| 115 | + #@parameterized.parameters(True, False) |
| 116 | + # def testPerWorkerVariableCreation(self): |
| 117 | + # var_dtype = tf.dtypes.float32 |
| 118 | + # var_name = 'var' |
| 119 | + # shape = [1] #if define_shape else None |
| 120 | + # |
| 121 | + # with self.strategy.scope(): |
| 122 | + # var = variables.Variable(initial_value=[0.0], |
| 123 | + # shape=shape, |
| 124 | + # dtype=var_dtype, |
| 125 | + # name=var_name, |
| 126 | + # per_worker_de_variable=True) |
| 127 | + # |
| 128 | + # # Use per-worker variable as a capture |
| 129 | + # @def_function.function |
| 130 | + # def worker_fn(): |
| 131 | + # var.assign_add(constant_op.constant([1.0])) |
| 132 | + # return var |
| 133 | + # |
| 134 | + # num_closures = 10 |
| 135 | + # for ix in range(num_closures): |
| 136 | + # self.coordinator.schedule(worker_fn) |
| 137 | + # # Read the PWV many times to ensure result is up-to-date |
| 138 | + # self.coordinator.join() |
| 139 | + # result_sum = sum(var.read_all()).numpy() |
| 140 | + # self.assertEqual(result_sum, ix + 1) |
| 141 | + # |
| 142 | + # for _ in range(num_closures): |
| 143 | + # self.coordinator.schedule(worker_fn) |
| 144 | + # self.coordinator.join() |
| 145 | + # |
| 146 | + # # Verify placement of variables |
| 147 | + # devices = [wv._get_values().device for wv in var._per_worker_vars._values] |
| 148 | + # expected_devices = [ |
| 149 | + # f'/job:worker/replica:0/task:{ix}/device:CPU:0' |
| 150 | + # for ix in range(self.strategy._num_workers) |
| 151 | + # ] # pylint: disable=protected-access |
| 152 | + # self.assertAllEqual(devices, expected_devices) |
| 153 | + # |
| 154 | + # result_sum = sum(var.read_all()).numpy() |
| 155 | + # self.assertEqual(result_sum, num_closures * 2) |
| 156 | + |
| 157 | + # def testKerasFit(self): |
| 158 | + # embed_dim = 8 |
| 159 | + # with self.strategy.scope(): |
| 160 | + # model = Sequential([ |
| 161 | + # layers.Input(shape=(1,), dtype=tf.int32), |
| 162 | + # de.keras.layers.Embedding(embed_dim, key_dtype=tf.int32), |
| 163 | + # layers.Flatten(), |
| 164 | + # layers.Dense(1, activation='sigmoid') |
| 165 | + # ]) |
| 166 | + # optimizer = Adam(1E-3) |
| 167 | + # optimizer = de.DynamicEmbeddingOptimizer(optimizer) |
| 168 | + # model.compile(loss='binary_crossentropy', |
| 169 | + # optimizer=optimizer, |
| 170 | + # metrics=['accuracy']) |
| 171 | + # |
| 172 | + # ids = np.random.randint(0, 100, size=(64 * 2, 1)) |
| 173 | + # labels = np.random.randint(0, 2, size=(64 * 2, 1)) |
| 174 | + # |
| 175 | + # def dataset_fn(input_context): |
| 176 | + # global_batch_size = 32 |
| 177 | + # batch_size = input_context.get_per_replica_batch_size(global_batch_size) |
| 178 | + # dataset = tf.data.Dataset.from_tensor_slices((ids, labels)) |
| 179 | + # dataset = dataset.shard(input_context.num_input_pipelines, |
| 180 | + # input_context.input_pipeline_id) |
| 181 | + # dataset = dataset.batch(batch_size).repeat() |
| 182 | + # return dataset |
| 183 | + # |
| 184 | + # dataset = self.strategy.distribute_datasets_from_function(dataset_fn) |
| 185 | + # |
| 186 | + # history = model.fit(dataset, epochs=1, steps_per_epoch=len(ids) // 64) |
| 187 | + # self.assertIn('loss', history.history) |
149 | 188 |
|
150 | 189 |
|
151 | 190 | # borrow from multi_process_lib._set_spawn_exe_path and modify it for tf_recommenders_addons
|
@@ -175,8 +214,8 @@ def guess_path(package_root):
|
175 | 214 | multiprocessing.get_context().set_executable(sys.argv[0])
|
176 | 215 |
|
177 | 216 |
|
178 |
| -# This is not for pytest |
179 |
| -# bazel test //tensorflow_recommenders_addons/dynamic_embedding/python/kernel_tests:parameter_server_bzl |
| 217 | +# This is not for pytest bazel clean --expunge |
| 218 | +# bazel test --test_output=all //tensorflow_recommenders_addons/dynamic_embedding/python/kernel_tests:parameter_server_bzl |
180 | 219 | if __name__ == "__main__":
|
181 | 220 | multi_process_lib._set_spawn_exe_path = custom_set_spawn_exe_path
|
182 | 221 | v2_compat.enable_v2_behavior()
|
|
0 commit comments