diff --git a/tests/test_robula.py b/tests/test_robula.py deleted file mode 100644 index dba1e84..0000000 --- a/tests/test_robula.py +++ /dev/null @@ -1,23 +0,0 @@ -import unittest -from Robula.robula_plus import RobulaPlus -from Robula.xpath import XPath - -from lxml.etree import HTML - -class TestRobula(unittest.TestCase): - def test_transf_add_attribute_set(self): - rp = RobulaPlus() - doc = rp.makeDocument('
') - el = doc.xpath('//div')[0] - results = rp.transfAddAttributeSet(XPath('//*'), el) - values = [x.getValue() for x in results] - self.assertIn("//*[@id='id1' and @class='foo']", values) - - def test_head_has_position_predicate(self): - self.assertTrue(XPath('//div[2]').headHasPositionPredicate()) - self.assertTrue(XPath('//*[last()]').headHasPositionPredicate()) - self.assertTrue(XPath('//*[position()=1]').headHasPositionPredicate()) - self.assertFalse(XPath('//*[@id="x"]').headHasPositionPredicate()) - -if __name__ == '__main__': - unittest.main() diff --git a/tests/test_robula_plus.py b/tests/test_robula_plus.py new file mode 100644 index 0000000..d7e0e9d --- /dev/null +++ b/tests/test_robula_plus.py @@ -0,0 +1,113 @@ +import pytest +from lxml.etree import _Element as Element +from Robula.robula_plus import RobulaPlus, cmp_to_key +from Robula.xpath import XPath + +HTML_DOC = ( + "" + "
hello
" + "
bye
" + "" +) + +FULL_HTML_DOC = ( + "" + "" + "
" + "
  • one
  • " + "
  • two
  • " + "
  • three
  • " + "
    " + "" +) + +@pytest.fixture +def rp_doc(): + rp = RobulaPlus() + doc = rp.makeDocument(HTML_DOC) + return rp, doc + +def test_make_document_and_get_element_by_xpath(rp_doc): + rp, doc = rp_doc + el = rp.getElementByXPath('//div', doc) + assert isinstance(el, Element) + with pytest.raises(ValueError): + rp.getElementByXPath('//unknown', doc) + +def test_uniquely_locate(rp_doc): + rp, doc = rp_doc + el1 = doc.xpath('//div')[0] + assert rp.uniquelyLocate('//*[@id="id1"]', el1, doc) + assert not rp.uniquelyLocate('//div', el1, doc) + +def test_get_ancestor_and_count(rp_doc): + rp, doc = rp_doc + el = doc.xpath('//div/span')[0] + assert rp.getAncestor(el, 1).tag == 'div' + assert rp.getAncestor(el, 2).tag == 'body' + assert rp.getAncestorCount(el) == 3 + +def test_transforms(rp_doc): + rp, doc = rp_doc + el1 = doc.xpath('//div')[0] + # convert star + res = rp.transfConvertStar(XPath('//*'), el1) + assert res[0].getValue() == '//div' + assert rp.transfConvertStar(XPath('//div'), el1) == [] + # add id + res = rp.transfAddId(XPath('//*'), el1) + assert any(x.getValue().replace('"', "'") == "//*[@id='id1']" for x in res) + # add attribute + res = rp.transfAddAttribute(XPath('//*'), el1) + vals = [x.getValue() for x in res] + assert "//*[@class='foo']" in vals + assert "//*[@title='bar']" in vals + assert any(v.replace('"', "'") == "//*[@id='id1']" for v in vals) + # add position + res = rp.transfAddPosition(XPath('//*'), el1) + assert res[0].getValue() == '//*[1]' + el2 = doc.xpath('//div')[1] + res2 = rp.transfAddPosition(XPath('//div'), el2) + assert res2[0].getValue() == '//div[2]' + # add level + res = rp.transfAddLevel(XPath('//div'), el1) + assert res[0].getValue() == '//*/div' + +def test_generate_power_set_and_compare_functions(): + rp = RobulaPlus() + assert rp.generatePowerSet([1,2]) == [[], [1], [2], [2,1]] + a1 = {'name':'name','value':'x'} + a2 = {'name':'other','value':'y'} + assert rp.elementCompareFunction(a1,a2) == -1 + assert rp.elementCompareFunction(a2,a1) == 1 + assert rp.elementCompareFunction(a2,a2) == 0 + assert rp.compareListElementAttributes([a1], [a1,a2]) == -1 + assert rp.compareListElementAttributes([a1,a2], [a1]) == 1 + assert rp.compareListElementAttributes([a1], [a1]) == 0 + +def test_transf_add_attribute_set(rp_doc): + rp, doc = rp_doc + el = doc.xpath('//div')[0] + res = rp.transfAddAttributeSet(XPath('//*'), el) + values = [x.getValue() for x in res] + assert "//*[@id='id1' and @class='foo']" in values + +def test_get_robust_xpath(rp_doc): + rp, doc = rp_doc + el1, el2 = doc.xpath('//div') + assert rp.getRobustXPath(el1, doc).replace('"', "'") == "//*[@id='id1']" + assert rp.getRobustXPath(el2, doc) == '//*[2]' + +def test_cmp_to_key_sorting(): + def reverse_cmp(a, b): + return (b - a) + values = [1,3,2] + sorted_vals = sorted(values, key=cmp_to_key(reverse_cmp)) + assert sorted_vals == [3,2,1] + + +def test_full_example_xpath_reduction(): + rp = RobulaPlus() + doc = rp.makeDocument(FULL_HTML_DOC) + element = doc.xpath('/html/body/div/div[1]/li[3]/a')[0] + assert rp.getRobustXPath(element, doc) == "//a[@class='product-link']" diff --git a/tests/test_xpath.py b/tests/test_xpath.py new file mode 100644 index 0000000..1f74fff --- /dev/null +++ b/tests/test_xpath.py @@ -0,0 +1,27 @@ +import pytest +from Robula.xpath import XPath + + +def test_xpath_basic(): + xp = XPath('//div') + assert xp.getValue() == '//div' + assert xp.startsWith('//') + assert xp.substring(2) == 'div' + assert not xp.headHasAnyPredicates() + xp.addPredicateToHead("[@id='x']") + assert xp.getValue() == "//div[@id='x']" + assert xp.headHasAnyPredicates() + assert not xp.headHasTextPredicate() + + +def test_xpath_text_predicate_and_length(): + xp = XPath('//*[text()="a"]/span') + assert xp.headHasTextPredicate() + assert xp.getLength() == 2 + + +def test_xpath_position_predicate(): + assert XPath('//div[2]').headHasPositionPredicate() + assert XPath('//*[last()]').headHasPositionPredicate() + assert XPath('//*[position()=1]').headHasPositionPredicate() + assert not XPath('//*[@id="x"]').headHasPositionPredicate()