|
| 1 | +""" |
| 2 | +Tests for the sfm module's find_most_similar_words function. |
| 3 | +""" |
| 4 | + |
| 5 | +import pytest |
| 6 | + |
| 7 | +from utils.sfm import sfm as fsm |
| 8 | + |
| 9 | + |
| 10 | +def test_find_most_similar_words(): |
| 11 | + """Test that find_most_similar_words returns expected results.""" |
| 12 | + corpus = [ |
| 13 | + "hello", |
| 14 | + "world", |
| 15 | + "foo", |
| 16 | + "bar", |
| 17 | + "baz", |
| 18 | + "qux", |
| 19 | + "quux", |
| 20 | + "corge", |
| 21 | + "grault", |
| 22 | + "garply", |
| 23 | + "waldo", |
| 24 | + ] |
| 25 | + |
| 26 | + # Test with a word similar to "hello" |
| 27 | + similar_to_hello = fsm.find_most_similar_words("helo", corpus, len(corpus)) |
| 28 | + |
| 29 | + # Check that "hello" is the most similar word |
| 30 | + assert similar_to_hello[0].word == "hello" |
| 31 | + assert similar_to_hello[0].distance == 1 |
| 32 | + |
| 33 | + # Test with a completely different word |
| 34 | + similar_to_yellow = fsm.find_most_similar_words("yellow", corpus, len(corpus)) |
| 35 | + |
| 36 | + # Check that we get results ordered by similarity |
| 37 | + assert len(similar_to_yellow) > 0 |
| 38 | + |
| 39 | + # Check that the results are sorted by distance (ascending) |
| 40 | + for i in range(1, len(similar_to_yellow)): |
| 41 | + assert similar_to_yellow[i - 1].distance <= similar_to_yellow[i].distance |
| 42 | + |
| 43 | + # Test with empty corpus |
| 44 | + empty_results = fsm.find_most_similar_words("test", [], 0) |
| 45 | + assert len(empty_results) == 0 |
| 46 | + |
| 47 | + # Test with limit |
| 48 | + limited_results = fsm.find_most_similar_words("test", corpus, 3) |
| 49 | + assert len(limited_results) <= 3 |
| 50 | + |
| 51 | + |
| 52 | +def test_edge_cases(): |
| 53 | + """Test edge cases for find_most_similar_words.""" |
| 54 | + corpus = ["hello", "world", "test"] |
| 55 | + |
| 56 | + # Test with empty string |
| 57 | + empty_query = fsm.find_most_similar_words("", corpus, len(corpus)) |
| 58 | + assert len(empty_query) > 0 # Should still return results |
| 59 | + |
| 60 | + # Test with query that matches exactly |
| 61 | + exact_match = fsm.find_most_similar_words("hello", corpus, len(corpus)) |
| 62 | + assert exact_match[0].word == "hello" |
| 63 | + assert exact_match[0].distance == 0 |
| 64 | + |
| 65 | + |
| 66 | +if __name__ == "__main__": |
| 67 | + pytest.main() |
0 commit comments