From b2361ce4267e6a2c51a31425718ff7cc83b67b15 Mon Sep 17 00:00:00 2001
From: Oleksandr Korovii <22007867+ZeusFSX@users.noreply.github.com>
Date: Thu, 5 Jun 2025 10:36:48 +0300
Subject: [PATCH] Add full XPath reduction test
---
tests/test_robula.py | 23 --------
tests/test_robula_plus.py | 113 ++++++++++++++++++++++++++++++++++++++
tests/test_xpath.py | 27 +++++++++
3 files changed, 140 insertions(+), 23 deletions(-)
delete mode 100644 tests/test_robula.py
create mode 100644 tests/test_robula_plus.py
create mode 100644 tests/test_xpath.py
diff --git a/tests/test_robula.py b/tests/test_robula.py
deleted file mode 100644
index dba1e84..0000000
--- a/tests/test_robula.py
+++ /dev/null
@@ -1,23 +0,0 @@
-import unittest
-from Robula.robula_plus import RobulaPlus
-from Robula.xpath import XPath
-
-from lxml.etree import HTML
-
-class TestRobula(unittest.TestCase):
- def test_transf_add_attribute_set(self):
- rp = RobulaPlus()
- doc = rp.makeDocument('
')
- el = doc.xpath('//div')[0]
- results = rp.transfAddAttributeSet(XPath('//*'), el)
- values = [x.getValue() for x in results]
- self.assertIn("//*[@id='id1' and @class='foo']", values)
-
- def test_head_has_position_predicate(self):
- self.assertTrue(XPath('//div[2]').headHasPositionPredicate())
- self.assertTrue(XPath('//*[last()]').headHasPositionPredicate())
- self.assertTrue(XPath('//*[position()=1]').headHasPositionPredicate())
- self.assertFalse(XPath('//*[@id="x"]').headHasPositionPredicate())
-
-if __name__ == '__main__':
- unittest.main()
diff --git a/tests/test_robula_plus.py b/tests/test_robula_plus.py
new file mode 100644
index 0000000..d7e0e9d
--- /dev/null
+++ b/tests/test_robula_plus.py
@@ -0,0 +1,113 @@
+import pytest
+from lxml.etree import _Element as Element
+from Robula.robula_plus import RobulaPlus, cmp_to_key
+from Robula.xpath import XPath
+
+HTML_DOC = (
+ ""
+ "hello
"
+ "bye
"
+ ""
+)
+
+FULL_HTML_DOC = (
+ ""
+ ""
+ ""
+ ""
+)
+
+@pytest.fixture
+def rp_doc():
+ rp = RobulaPlus()
+ doc = rp.makeDocument(HTML_DOC)
+ return rp, doc
+
+def test_make_document_and_get_element_by_xpath(rp_doc):
+ rp, doc = rp_doc
+ el = rp.getElementByXPath('//div', doc)
+ assert isinstance(el, Element)
+ with pytest.raises(ValueError):
+ rp.getElementByXPath('//unknown', doc)
+
+def test_uniquely_locate(rp_doc):
+ rp, doc = rp_doc
+ el1 = doc.xpath('//div')[0]
+ assert rp.uniquelyLocate('//*[@id="id1"]', el1, doc)
+ assert not rp.uniquelyLocate('//div', el1, doc)
+
+def test_get_ancestor_and_count(rp_doc):
+ rp, doc = rp_doc
+ el = doc.xpath('//div/span')[0]
+ assert rp.getAncestor(el, 1).tag == 'div'
+ assert rp.getAncestor(el, 2).tag == 'body'
+ assert rp.getAncestorCount(el) == 3
+
+def test_transforms(rp_doc):
+ rp, doc = rp_doc
+ el1 = doc.xpath('//div')[0]
+ # convert star
+ res = rp.transfConvertStar(XPath('//*'), el1)
+ assert res[0].getValue() == '//div'
+ assert rp.transfConvertStar(XPath('//div'), el1) == []
+ # add id
+ res = rp.transfAddId(XPath('//*'), el1)
+ assert any(x.getValue().replace('"', "'") == "//*[@id='id1']" for x in res)
+ # add attribute
+ res = rp.transfAddAttribute(XPath('//*'), el1)
+ vals = [x.getValue() for x in res]
+ assert "//*[@class='foo']" in vals
+ assert "//*[@title='bar']" in vals
+ assert any(v.replace('"', "'") == "//*[@id='id1']" for v in vals)
+ # add position
+ res = rp.transfAddPosition(XPath('//*'), el1)
+ assert res[0].getValue() == '//*[1]'
+ el2 = doc.xpath('//div')[1]
+ res2 = rp.transfAddPosition(XPath('//div'), el2)
+ assert res2[0].getValue() == '//div[2]'
+ # add level
+ res = rp.transfAddLevel(XPath('//div'), el1)
+ assert res[0].getValue() == '//*/div'
+
+def test_generate_power_set_and_compare_functions():
+ rp = RobulaPlus()
+ assert rp.generatePowerSet([1,2]) == [[], [1], [2], [2,1]]
+ a1 = {'name':'name','value':'x'}
+ a2 = {'name':'other','value':'y'}
+ assert rp.elementCompareFunction(a1,a2) == -1
+ assert rp.elementCompareFunction(a2,a1) == 1
+ assert rp.elementCompareFunction(a2,a2) == 0
+ assert rp.compareListElementAttributes([a1], [a1,a2]) == -1
+ assert rp.compareListElementAttributes([a1,a2], [a1]) == 1
+ assert rp.compareListElementAttributes([a1], [a1]) == 0
+
+def test_transf_add_attribute_set(rp_doc):
+ rp, doc = rp_doc
+ el = doc.xpath('//div')[0]
+ res = rp.transfAddAttributeSet(XPath('//*'), el)
+ values = [x.getValue() for x in res]
+ assert "//*[@id='id1' and @class='foo']" in values
+
+def test_get_robust_xpath(rp_doc):
+ rp, doc = rp_doc
+ el1, el2 = doc.xpath('//div')
+ assert rp.getRobustXPath(el1, doc).replace('"', "'") == "//*[@id='id1']"
+ assert rp.getRobustXPath(el2, doc) == '//*[2]'
+
+def test_cmp_to_key_sorting():
+ def reverse_cmp(a, b):
+ return (b - a)
+ values = [1,3,2]
+ sorted_vals = sorted(values, key=cmp_to_key(reverse_cmp))
+ assert sorted_vals == [3,2,1]
+
+
+def test_full_example_xpath_reduction():
+ rp = RobulaPlus()
+ doc = rp.makeDocument(FULL_HTML_DOC)
+ element = doc.xpath('/html/body/div/div[1]/li[3]/a')[0]
+ assert rp.getRobustXPath(element, doc) == "//a[@class='product-link']"
diff --git a/tests/test_xpath.py b/tests/test_xpath.py
new file mode 100644
index 0000000..1f74fff
--- /dev/null
+++ b/tests/test_xpath.py
@@ -0,0 +1,27 @@
+import pytest
+from Robula.xpath import XPath
+
+
+def test_xpath_basic():
+ xp = XPath('//div')
+ assert xp.getValue() == '//div'
+ assert xp.startsWith('//')
+ assert xp.substring(2) == 'div'
+ assert not xp.headHasAnyPredicates()
+ xp.addPredicateToHead("[@id='x']")
+ assert xp.getValue() == "//div[@id='x']"
+ assert xp.headHasAnyPredicates()
+ assert not xp.headHasTextPredicate()
+
+
+def test_xpath_text_predicate_and_length():
+ xp = XPath('//*[text()="a"]/span')
+ assert xp.headHasTextPredicate()
+ assert xp.getLength() == 2
+
+
+def test_xpath_position_predicate():
+ assert XPath('//div[2]').headHasPositionPredicate()
+ assert XPath('//*[last()]').headHasPositionPredicate()
+ assert XPath('//*[position()=1]').headHasPositionPredicate()
+ assert not XPath('//*[@id="x"]').headHasPositionPredicate()