From b2361ce4267e6a2c51a31425718ff7cc83b67b15 Mon Sep 17 00:00:00 2001
From: Oleksandr Korovii <22007867+ZeusFSX@users.noreply.github.com>
Date: Thu, 5 Jun 2025 10:36:48 +0300
Subject: [PATCH] Add full XPath reduction test

---
 tests/test_robula.py      |  23 --------
 tests/test_robula_plus.py | 113 ++++++++++++++++++++++++++++++++++++++
 tests/test_xpath.py       |  27 +++++++++
 3 files changed, 140 insertions(+), 23 deletions(-)
 delete mode 100644 tests/test_robula.py
 create mode 100644 tests/test_robula_plus.py
 create mode 100644 tests/test_xpath.py
diff --git a/tests/test_robula.py b/tests/test_robula.py
deleted file mode 100644
index dba1e84..0000000
--- a/tests/test_robula.py
+++ /dev/null
@@ -1,23 +0,0 @@
-import unittest
-from Robula.robula_plus import RobulaPlus
-from Robula.xpath import XPath
-
-from lxml.etree import HTML
-
-class TestRobula(unittest.TestCase):
-    def test_transf_add_attribute_set(self):
-        rp = RobulaPlus()
-        doc = rp.makeDocument('<html><body><div id="id1" class="foo"></div></body></html>')
-        el = doc.xpath('//div')[0]
-        results = rp.transfAddAttributeSet(XPath('//*'), el)
-        values = [x.getValue() for x in results]
-        self.assertIn("//*[@id='id1' and @class='foo']", values)
-
-    def test_head_has_position_predicate(self):
-        self.assertTrue(XPath('//div[2]').headHasPositionPredicate())
-        self.assertTrue(XPath('//*[last()]').headHasPositionPredicate())
-        self.assertTrue(XPath('//*[position()=1]').headHasPositionPredicate())
-        self.assertFalse(XPath('//*[@id="x"]').headHasPositionPredicate())
-
-if __name__ == '__main__':
-    unittest.main()
diff --git a/tests/test_robula_plus.py b/tests/test_robula_plus.py
new file mode 100644
index 0000000..d7e0e9d
--- /dev/null
+++ b/tests/test_robula_plus.py
@@ -0,0 +1,113 @@
+import pytest
+from lxml.etree import _Element as Element
+from Robula.robula_plus import RobulaPlus, cmp_to_key
+from Robula.xpath import XPath
+
+HTML_DOC = (
+    "<html><body>"
+    "<div id='id1' class='foo' title='bar'><span>hello</span></div>"
+    "<div class='foo' name='second'><span>bye</span></div>"
+    "</body></html>"
+)
+
+FULL_HTML_DOC = (
+    "<html><body>"
+    "<div class='product-link'></div>"
+    "<div><div>"
+    "<li><a href='#'>one</a></li>"
+    "<li><a href='#'>two</a></li>"
+    "<li><a class='product-link'>three</a></li>"
+    "</div></div>"
+    "</body></html>"
+)
+
+@pytest.fixture
+def rp_doc():
+    rp = RobulaPlus()
+    doc = rp.makeDocument(HTML_DOC)
+    return rp, doc
+
+def test_make_document_and_get_element_by_xpath(rp_doc):
+    rp, doc = rp_doc
+    el = rp.getElementByXPath('//div', doc)
+    assert isinstance(el, Element)
+    with pytest.raises(ValueError):
+        rp.getElementByXPath('//unknown', doc)
+
+def test_uniquely_locate(rp_doc):
+    rp, doc = rp_doc
+    el1 = doc.xpath('//div')[0]
+    assert rp.uniquelyLocate('//*[@id="id1"]', el1, doc)
+    assert not rp.uniquelyLocate('//div', el1, doc)
+
+def test_get_ancestor_and_count(rp_doc):
+    rp, doc = rp_doc
+    el = doc.xpath('//div/span')[0]
+    assert rp.getAncestor(el, 1).tag == 'div'
+    assert rp.getAncestor(el, 2).tag == 'body'
+    assert rp.getAncestorCount(el) == 3
+
+def test_transforms(rp_doc):
+    rp, doc = rp_doc
+    el1 = doc.xpath('//div')[0]
+    # convert star
+    res = rp.transfConvertStar(XPath('//*'), el1)
+    assert res[0].getValue() == '//div'
+    assert rp.transfConvertStar(XPath('//div'), el1) == []
+    # add id
+    res = rp.transfAddId(XPath('//*'), el1)
+    assert any(x.getValue().replace('"', "'") == "//*[@id='id1']" for x in res)
+    # add attribute
+    res = rp.transfAddAttribute(XPath('//*'), el1)
+    vals = [x.getValue() for x in res]
+    assert "//*[@class='foo']" in vals
+    assert "//*[@title='bar']" in vals
+    assert any(v.replace('"', "'") == "//*[@id='id1']" for v in vals)
+    # add position
+    res = rp.transfAddPosition(XPath('//*'), el1)
+    assert res[0].getValue() == '//*[1]'
+    el2 = doc.xpath('//div')[1]
+    res2 = rp.transfAddPosition(XPath('//div'), el2)
+    assert res2[0].getValue() == '//div[2]'
+    # add level
+    res = rp.transfAddLevel(XPath('//div'), el1)
+    assert res[0].getValue() == '//*/div'
+
+def test_generate_power_set_and_compare_functions():
+    rp = RobulaPlus()
+    assert rp.generatePowerSet([1,2]) == [[], [1], [2], [2,1]]
+    a1 = {'name':'name','value':'x'}
+    a2 = {'name':'other','value':'y'}
+    assert rp.elementCompareFunction(a1,a2) == -1
+    assert rp.elementCompareFunction(a2,a1) == 1
+    assert rp.elementCompareFunction(a2,a2) == 0
+    assert rp.compareListElementAttributes([a1], [a1,a2]) == -1
+    assert rp.compareListElementAttributes([a1,a2], [a1]) == 1
+    assert rp.compareListElementAttributes([a1], [a1]) == 0
+
+def test_transf_add_attribute_set(rp_doc):
+    rp, doc = rp_doc
+    el = doc.xpath('//div')[0]
+    res = rp.transfAddAttributeSet(XPath('//*'), el)
+    values = [x.getValue() for x in res]
+    assert "//*[@id='id1' and @class='foo']" in values
+
+def test_get_robust_xpath(rp_doc):
+    rp, doc = rp_doc
+    el1, el2 = doc.xpath('//div')
+    assert rp.getRobustXPath(el1, doc).replace('"', "'") == "//*[@id='id1']"
+    assert rp.getRobustXPath(el2, doc) == '//*[2]'
+
+def test_cmp_to_key_sorting():
+    def reverse_cmp(a, b):
+        return (b - a)
+    values = [1,3,2]
+    sorted_vals = sorted(values, key=cmp_to_key(reverse_cmp))
+    assert sorted_vals == [3,2,1]
+
+
+def test_full_example_xpath_reduction():
+    rp = RobulaPlus()
+    doc = rp.makeDocument(FULL_HTML_DOC)
+    element = doc.xpath('/html/body/div/div[1]/li[3]/a')[0]
+    assert rp.getRobustXPath(element, doc) == "//a[@class='product-link']"
diff --git a/tests/test_xpath.py b/tests/test_xpath.py
new file mode 100644
index 0000000..1f74fff
--- /dev/null
+++ b/tests/test_xpath.py
@@ -0,0 +1,27 @@
+import pytest
+from Robula.xpath import XPath
+
+
+def test_xpath_basic():
+    xp = XPath('//div')
+    assert xp.getValue() == '//div'
+    assert xp.startsWith('//')
+    assert xp.substring(2) == 'div'
+    assert not xp.headHasAnyPredicates()
+    xp.addPredicateToHead("[@id='x']")
+    assert xp.getValue() == "//div[@id='x']"
+    assert xp.headHasAnyPredicates()
+    assert not xp.headHasTextPredicate()
+
+
+def test_xpath_text_predicate_and_length():
+    xp = XPath('//*[text()="a"]/span')
+    assert xp.headHasTextPredicate()
+    assert xp.getLength() == 2
+
+
+def test_xpath_position_predicate():
+    assert XPath('//div[2]').headHasPositionPredicate()
+    assert XPath('//*[last()]').headHasPositionPredicate()
+    assert XPath('//*[position()=1]').headHasPositionPredicate()
+    assert not XPath('//*[@id="x"]').headHasPositionPredicate()