Skip to content

Commit 1324374

Browse files
jsimsatensorflower-gardener
authored andcommitted
[tf.data] Update map parallelization optimization to always default to preserving the order of elements.
PiperOrigin-RevId: 357026486 Change-Id: I86435cb1e5837e3ca27c77ab69968a0a0ce8ad54
1 parent 5eeb7c7 commit 1324374

File tree

3 files changed

+24
-21
lines changed

3 files changed

+24
-21
lines changed

tensorflow/core/grappler/optimizers/data/map_parallelization.cc

+3-2
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ namespace grappler {
3232
namespace {
3333

3434
constexpr char kMapDataset[] = "MapDataset";
35-
constexpr char kParallelMapDataset[] = "ParallelMapDataset";
35+
constexpr char kParallelMapDataset[] = "ParallelMapDatasetV2";
3636

3737
NodeDef MakeParallelMap(const string& name, MutableGraphView* graph) {
3838
// The inputs of the node to be parallelized could be changed by the
@@ -45,8 +45,9 @@ NodeDef MakeParallelMap(const string& name, MutableGraphView* graph) {
4545
&parallel_map);
4646
parallel_map.set_op(kParallelMapDataset);
4747
auto* num_parallel_calls = graph_utils::AddScalarConstNode(
48-
static_cast<int32>(data::model::kAutotune), graph);
48+
static_cast<int64>(data::model::kAutotune), graph);
4949
parallel_map.add_input(num_parallel_calls->name());
50+
AddNodeAttr("deterministic", "true", &parallel_map);
5051

5152
return parallel_map;
5253
}

tensorflow/core/grappler/optimizers/data/map_parallelization_test.cc

+3-3
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@ TEST_P(AutotuneSetting, MapParallelizationTest) {
6868

6969
GraphDef output;
7070
TF_ASSERT_OK(OptimizeWithMapParallelization(item, &output, autotune));
71-
EXPECT_EQ(graph_utils::ContainsNodeWithOp("ParallelMapDataset", output),
71+
EXPECT_EQ(graph_utils::ContainsNodeWithOp("ParallelMapDatasetV2", output),
7272
autotune);
7373
EXPECT_EQ(graph_utils::ContainsGraphNodeWithName("map", output), !autotune);
7474
}
@@ -99,7 +99,7 @@ TEST_P(FromFunctionDef, MapParallelizationTest) {
9999

100100
GraphDef output;
101101
TF_ASSERT_OK(OptimizeWithMapParallelization(item, &output, true));
102-
EXPECT_EQ(graph_utils::ContainsNodeWithOp("ParallelMapDataset", output),
102+
EXPECT_EQ(graph_utils::ContainsNodeWithOp("ParallelMapDatasetV2", output),
103103
!from_function_def);
104104
EXPECT_EQ(graph_utils::ContainsGraphNodeWithName("map", output),
105105
from_function_def);
@@ -131,7 +131,7 @@ TEST(ParallelizeAssert, MapParallelizationTest) {
131131

132132
GraphDef output;
133133
TF_ASSERT_OK(OptimizeWithMapParallelization(item, &output, true));
134-
EXPECT_TRUE(graph_utils::ContainsNodeWithOp("ParallelMapDataset", output));
134+
EXPECT_TRUE(graph_utils::ContainsNodeWithOp("ParallelMapDatasetV2", output));
135135
EXPECT_TRUE(graph_utils::ContainsGraphNodeWithName("map1", output));
136136
EXPECT_FALSE(graph_utils::ContainsGraphNodeWithName("map2", output));
137137
}

tensorflow/python/data/ops/dataset_ops.py

+18-16
Original file line numberDiff line numberDiff line change
@@ -1808,11 +1808,12 @@ def map(self, map_func, num_parallel_calls=None, deterministic=None):
18081808
If not specified, elements will be processed sequentially. If the value
18091809
`tf.data.AUTOTUNE` is used, then the number of parallel
18101810
calls is set dynamically based on available CPU.
1811-
deterministic: (Optional.) A boolean controlling whether determinism
1812-
should be traded for performance by allowing elements to be yielded out
1813-
of order. If `deterministic` is `None`, the
1814-
`tf.data.Options.experimental_deterministic` dataset option (`True` by
1815-
default) is used to decide whether to run deterministically.
1811+
deterministic: (Optional.) When `num_parallel_calls` is specified, this
1812+
boolean controls the order in which the transformation produces
1813+
elements. If set to `False`, the transformation is allowed to yield
1814+
elements out of order to trade determinism for performance. If not
1815+
specified, the `tf.data.Options.experimental_deterministic` option
1816+
(`True` by default) controls the behavior.
18161817
18171818
Returns:
18181819
Dataset: A `Dataset`.
@@ -1937,11 +1938,12 @@ def interleave(self,
19371938
from cycle elements synchronously with no parallelism. If the value
19381939
`tf.data.AUTOTUNE` is used, then the number of parallel
19391940
calls is set dynamically based on available CPU.
1940-
deterministic: (Optional.) A boolean controlling whether determinism
1941-
should be traded for performance by allowing elements to be produced out
1942-
of order. If `deterministic` is `None`, the
1943-
`tf.data.Options.experimental_deterministic` dataset option (`True` by
1944-
default) is used to decide whether to run deterministically.
1941+
deterministic: (Optional.) When `num_parallel_calls` is specified, this
1942+
boolean controls the order in which the transformation produces
1943+
elements. If set to `False`, the transformation is allowed to yield
1944+
elements out of order to trade determinism for performance. If not
1945+
specified, the `tf.data.Options.experimental_deterministic` option
1946+
(`True` by default) controls the behavior.
19451947
19461948
Returns:
19471949
Dataset: A `Dataset`.
@@ -2673,12 +2675,12 @@ def map_with_legacy_function(self,
26732675
If not specified, elements will be processed sequentially. If the value
26742676
`tf.data.AUTOTUNE` is used, then the number of parallel
26752677
calls is set dynamically based on available CPU.
2676-
deterministic: (Optional.) A boolean controlling whether determinism
2677-
should be traded for performance by allowing elements to be produced out
2678-
of order. If `deterministic` is `None`, the
2679-
`tf.data.Options.experimental_deterministic` dataset option (`True` by
2680-
default) is used to decide whether to produce elements
2681-
deterministically.
2678+
deterministic: (Optional.) When `num_parallel_calls` is specified, this
2679+
boolean controls the order in which the transformation produces
2680+
elements. If set to `False`, the transformation is allowed to yield
2681+
elements out of order to trade determinism for performance. If not
2682+
specified, the `tf.data.Options.experimental_deterministic` option
2683+
(`True` by default) controls the behavior.
26822684
26832685
Returns:
26842686
Dataset: A `Dataset`.

0 commit comments

Comments
 (0)