From c2acc2b54c26b8e9aa7c8f9bebe9ee9f6b363afa Mon Sep 17 00:00:00 2001 From: anyncfunction <13813298288@139.com> Date: Sun, 7 Jun 2026 11:57:46 +0800 Subject: [PATCH] fix: Boyer-Moore bad character shift was dead code in for-loop The bad_character_heuristic() method used a for-loop with an assignment to the loop variable i, which was immediately overwritten by the next iteration. This caused the algorithm to degrade from O(n/m) to O(n*m) naive search. Changed to a while-loop so the shift actually takes effect. Added max(i+1, shift) guard to prevent backward skips when the mismatched character appears to the right of the mismatch in the pattern. Added edge case doctests. --- strings/boyer_moore_search.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/strings/boyer_moore_search.py b/strings/boyer_moore_search.py index ad14a504f792..8e6f79dbf75e 100644 --- a/strings/boyer_moore_search.py +++ b/strings/boyer_moore_search.py @@ -83,18 +83,30 @@ def bad_character_heuristic(self) -> list[int]: >>> bms = BoyerMooreSearch(text="ABAABA", pattern="AB") >>> bms.bad_character_heuristic() [0, 3] + + >>> bms = BoyerMooreSearch(text="AAAAA", pattern="AB") + >>> bms.bad_character_heuristic() + [] + + >>> bms = BoyerMooreSearch(text="ABABAB", pattern="ABA") + >>> bms.bad_character_heuristic() + [0, 2] + + >>> bms = BoyerMooreSearch(text="", pattern="AB") + >>> bms.bad_character_heuristic() + [] """ positions = [] - for i in range(self.textLen - self.patLen + 1): + i = 0 + while i <= self.textLen - self.patLen: mismatch_index = self.mismatch_in_text(i) if mismatch_index == -1: positions.append(i) + i += 1 else: match_index = self.match_in_pattern(self.text[mismatch_index]) - i = ( - mismatch_index - match_index - ) # shifting index lgtm [py/multiple-definition] + i = max(i + 1, mismatch_index - match_index) return positions