Skip to content

Commit 13545a4

Browse files
committed
[SPARK-54369][CONNECT][TESTS] Fix PythonPipelineSuite flakiness via Set instead of Seq
### What changes were proposed in this pull request? This PR aims to fix `PythonPipelineSuite` flakiness via `Set` instead of `Seq` in multiple places. ### Why are the changes needed? Currently, `PythonPipelineSuite` is flaky like the following. We should fix this flakiness. - https://github.com/apache/spark/actions/runs/19396864076/job/55498096472 ``` [info] - referencing internal datasets *** FAILED *** (821 milliseconds) [info] List(`spark_catalog`.`default`.`src`, `spark_catalog`.`default`.`c`, `spark_catalog`.`default`.`a`) did not equal List(`spark_catalog`.`default`.`src`, `spark_catalog`.`default`.`a`, `spark_catalog`.`default`.`c`) (PythonPipelineSuite.scala:366) ``` ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Pass the CIs. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #53080 from dongjoon-hyun/SPARK-XXX. Authored-by: Dongjoon Hyun <[email protected]> Signed-off-by: Dongjoon Hyun <[email protected]>
1 parent 2e4708e commit 13545a4

File tree

1 file changed

+6
-3
lines changed

1 file changed

+6
-3
lines changed

sql/connect/server/src/test/scala/org/apache/spark/sql/connect/pipelines/PythonPipelineSuite.scala

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -364,11 +364,13 @@ class PythonPipelineSuite
364364

365365
val (streamingFlows, batchFlows) = graph.resolvedFlows.partition(_.df.isStreaming)
366366
assert(
367-
batchFlows.map(_.identifier) == Seq(
367+
batchFlows.map(_.identifier).toSet == Set(
368368
graphIdentifier("src"),
369369
graphIdentifier("a"),
370370
graphIdentifier("c")))
371-
assert(streamingFlows.map(_.identifier) == Seq(graphIdentifier("b"), graphIdentifier("d")))
371+
assert(
372+
streamingFlows.map(_.identifier).toSet ==
373+
Set(graphIdentifier("b"), graphIdentifier("d")))
372374
}
373375

374376
test("referencing external datasets") {
@@ -722,7 +724,8 @@ class PythonPipelineSuite
722724
assert(
723725
graph
724726
.flowsTo(graphIdentifier("a"))
725-
.map(_.identifier) == Seq(graphIdentifier("a"), graphIdentifier("something")))
727+
.map(_.identifier)
728+
.toSet == Set(graphIdentifier("a"), graphIdentifier("something")))
726729
}
727730

728731
test("groupby and rollup works with internal datasets, referencing with (col, str)") {

0 commit comments

Comments
 (0)