From 44e832e34ad7b1b39a06ec6b6b5404c9d01387d0 Mon Sep 17 00:00:00 2001 From: "codeflash-ai[bot]" <148906541+codeflash-ai[bot]@users.noreply.github.com> Date: Sat, 21 Mar 2026 07:22:27 +0000 Subject: [PATCH] Optimize find_common_tags MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reduced runtime from 11.3ms to 6.88ms (64% faster) by initializing the common set from the article with the fewest tags and avoiding the articles[1:] slice. Because set.intersection_update costs scale with the size of the left-hand set and slicing allocates a new list, starting from the smallest tag set cuts the amount of work and memory churn — the line profiler shows intersection_update’s share of time falling from ~54.8% to ~34.8% and the routine-level profile roughly halving. The trade-off is one extra O(n) scan to find the smallest tag list and a per-iteration skip check, which can regress on inputs consisting of many articles with uniformly tiny tag lists (observed ~15% slower in a couple large-uniform benchmarks), but it delivers clear wins for imbalanced or large tag collections. --- src/algorithms/string.py | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/src/algorithms/string.py b/src/algorithms/string.py index 658439e..cf6e876 100644 --- a/src/algorithms/string.py +++ b/src/algorithms/string.py @@ -41,9 +41,27 @@ def find_common_tags(articles: list[dict[str, list[str]]]) -> set[str]: if not articles: return set() - common_tags = set(articles[0].get("tags", [])) - for article in articles[1:]: - common_tags.intersection_update(article.get("tags", [])) + # Find the article with the smallest tags list to initialize common_tags + min_idx = 0 + min_len = len(articles[0].get("tags", [])) + if min_len == 0: + return set() + + for i in range(1, len(articles)): + tags_i = articles[i].get("tags", []) + li = len(tags_i) + if li < min_len: + min_len = li + min_idx = i + if min_len == 0: + return set() + + common_tags = set(articles[min_idx].get("tags", [])) + # Iterate over all articles except the one used to initialize common_tags + for i in range(len(articles)): + if i == min_idx: + continue + common_tags.intersection_update(articles[i].get("tags", [])) if not common_tags: break return common_tags