diff --git a/premerge/advisor/advisor_lib.py b/premerge/advisor/advisor_lib.py index 341ac3281..520496abc 100644 --- a/premerge/advisor/advisor_lib.py +++ b/premerge/advisor/advisor_lib.py @@ -36,6 +36,7 @@ class TestExplanationRequest(TypedDict): } EXPLAINED_HEAD_MAX_COMMIT_INDEX_DIFFERENCE = 5 +EXPLAINED_FLAKY_MIN_COMMIT_RANGE = 200 def _create_table(table_name: str, connection: sqlite3.Connection): @@ -131,6 +132,50 @@ def _try_explain_failing_at_head( return None +def _try_explain_flaky_failure( + db_connection: sqlite3.Connection, + test_failure: TestFailure, + platform: str, +) -> FailureExplanation | None: + """See if a failure is flaky at head. + + This function looks at a test failure and tries to see if the failure is + a known flake at head. It does this heuristically, by seeing if there have + been at least two failures across more than 200 commits. This has the + advantage of being a simple heuristic and performant. We do not + explicitly handle the case where a test has been failing continiously + for this amount of time as this is an OOM more range than any non-flaky + tests have stayed in tree. + + Args: + db_connection: The database connection. + test_failure: The test failure to try and explain. + platform: The platform the test failed on. + + Returns: + Either None, if the test could not be explained as flaky, or a + FailureExplanation object explaining the test failure. + """ + test_name_matches = db_connection.execute( + "SELECT failure_message, commit_index FROM failures WHERE source_type='postcommit' AND platform=?", + (platform,), + ).fetchall() + commit_indices = [] + for failure_message, commit_index in test_name_matches: + if failure_message == test_failure["message"]: + commit_indices.append(commit_index) + if len(commit_indices) == 0: + return None + commit_range = max(commit_indices) - min(commit_indices) + if commit_range > EXPLAINED_FLAKY_MIN_COMMIT_RANGE: + return { + "name": test_failure["name"], + "explained": True, + "reason": "This test is flaky in main.", + } + return None + + def explain_failures( explanation_request: TestExplanationRequest, repository_path: str, @@ -138,13 +183,25 @@ def explain_failures( ) -> list[FailureExplanation]: explanations = [] for test_failure in explanation_request["failures"]: + commit_index = git_utils.get_commit_index( + explanation_request["base_commit_sha"], repository_path, db_connection + ) + # We want to try and explain flaky failures first. Otherwise we might + # explain a flaky failure as a failure at head if there is a recent + # failure in the last couple of commits. + explained_as_flaky = _try_explain_flaky_failure( + db_connection, + test_failure, + explanation_request["platform"], + ) + if explained_as_flaky: + explanations.append(explained_as_flaky) + continue explained_at_head = _try_explain_failing_at_head( db_connection, test_failure, explanation_request["base_commit_sha"], - git_utils.get_commit_index( - explanation_request["base_commit_sha"], repository_path, db_connection - ), + commit_index, explanation_request["platform"], ) if explained_at_head: diff --git a/premerge/advisor/advisor_lib_test.py b/premerge/advisor/advisor_lib_test.py index ebbfe59b5..d2b9ede1c 100644 --- a/premerge/advisor/advisor_lib_test.py +++ b/premerge/advisor/advisor_lib_test.py @@ -86,6 +86,9 @@ def setUp(self): [ ("8d29a3bb6f3d92d65bf5811b53bf42bf63685359", 1), ("6b7064686b706f7064656d6f6e68756e74657273", 2), + ("6a6f73687561747265656a6f7368756174726565", 201), + ("6269677375726269677375726269677375726269", 202), + ("6d746c616e676c65796d746c616e676c65796d74", 203), ], ) self.repository_path_dir = tempfile.TemporaryDirectory() @@ -280,3 +283,103 @@ def test_no_explain_different_message(self): } ], ) + + def _setup_flaky_test_info( + self, + source_type="postcommit", + message="failed in way 1", + second_failure_sha="6269677375726269677375726269677375726269", + ): + failures_info = [ + { + "source_type": source_type, + "base_commit_sha": "8d29a3bb6f3d92d65bf5811b53bf42bf63685359", + "source_id": "10000", + "failures": [ + {"name": "a.ll", "message": message}, + ], + "platform": "linux-x86_64", + }, + { + "source_type": source_type, + "base_commit_sha": second_failure_sha, + "source_id": "100001", + "failures": [ + {"name": "a.ll", "message": message}, + ], + "platform": "linux-x86_64", + }, + ] + for failure_info in failures_info: + advisor_lib.upload_failures( + failure_info, self.db_connection, self.repository_path + ) + + def _get_flaky_test_explanations(self): + explanation_request = { + "failures": [{"name": "a.ll", "message": "failed in way 1"}], + "base_commit_sha": "6d746c616e676c65796d746c616e676c65796d74", + "platform": "linux-x86_64", + } + return advisor_lib.explain_failures( + explanation_request, self.repository_path, self.db_connection + ) + + def test_explain_flaky(self): + self._setup_flaky_test_info() + self.assertListEqual( + self._get_flaky_test_explanations(), + [ + { + "name": "a.ll", + "explained": True, + "reason": "This test is flaky in main.", + } + ], + ) + + def test_no_explain_flaky_different_message(self): + self._setup_flaky_test_info(message="failed in way 2") + self.assertListEqual( + self._get_flaky_test_explanations(), + [ + { + "name": "a.ll", + "explained": False, + "reason": None, + } + ], + ) + + # Test that we do not explain away flaky failures from pull request data. + # PRs might have the same failures multiple times across a large span of + # base commits, which might accidentally trigger the heuristic. + def test_no_explain_flaky_pullrequest_data(self): + self._setup_flaky_test_info(source_type="pull_request") + self.assertListEqual( + self._get_flaky_test_explanations(), + [ + { + "name": "a.ll", + "explained": False, + "reason": None, + } + ], + ) + + # Test that if all of the flaky failures are within a small range, we do + # not report this as a flaky failure. + def test_no_explain_flaky_small_range(self): + self._setup_flaky_test_info( + second_failure_sha="6b7064686b706f7064656d6f6e68756e74657273" + ) + self.assertListEqual( + self._get_flaky_test_explanations(), + [ + { + "name": "a.ll", + "explained": False, + "reason": None, + } + ], + )