From 4f3ea3c1d2b15164bae3b460bfb4aefaff163b96 Mon Sep 17 00:00:00 2001 From: Pierre Penninckx Date: Fri, 26 Dec 2014 16:23:59 +0100 Subject: [PATCH 01/10] [fix] exponent letter can be uppercase --- baron/grouper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/baron/grouper.py b/baron/grouper.py index ec7e028c..21552658 100644 --- a/baron/grouper.py +++ b/baron/grouper.py @@ -51,7 +51,7 @@ def group_generator(sequence): current += next(iterator) if str(current).lower() in ["ur", "br"] and str(iterator.show_next()).startswith(('"', "'")): current += next(iterator) - if any([re.match(x, current) for x in (r'^\d+e$', r'^\d+\.\d*e$', r'^\.\d+e$')]): + if any([re.match(x, current) for x in (r'^\d+[eE]$', r'^\d+\.\d*[eE]$', r'^\.\d+[eE]$')]): current += next(iterator) current += next(iterator) From 1d276fe33d22933ef8c5b7c386d013959547f71c Mon Sep 17 00:00:00 2001 From: Pierre Penninckx Date: Fri, 26 Dec 2014 16:25:05 +0100 Subject: [PATCH 02/10] [fix] handle comma exponent refs #29 --- baron/grouper.py | 2 +- tests/test_grouper.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/baron/grouper.py b/baron/grouper.py index 21552658..3fb1c7ec 100644 --- a/baron/grouper.py +++ b/baron/grouper.py @@ -82,7 +82,7 @@ def group_generator(sequence): (current == "." and iterator.show_next() and re.match(r'^\d+[jJ]?$', iterator.show_next())): current += next(iterator) - if iterator.show_next() and re.match(r'^\d*[jJ]?$', iterator.show_next()) and re.match(r'^\d*[jJ]?$', iterator.show_next()).group(): + while iterator.show_next() and re.match(r'^\d*[jJeE]?$', iterator.show_next()) and re.match(r'^\d*[jJeE]?$', iterator.show_next()).group(): current += next(iterator) if re.match(r'^\d+\.$', current) and iterator.show_next() and re.match(r'^\d*[eE]\d*$', iterator.show_next()): diff --git a/tests/test_grouper.py b/tests/test_grouper.py index 84811766..fe8b577d 100644 --- a/tests/test_grouper.py +++ b/tests/test_grouper.py @@ -200,3 +200,4 @@ def test_float_exponant(): assert group(['1', '.', 'e', '-', '2']) == ['1.e-2'] assert group(['1', '.', 'e', '2']) == ['1.e2'] assert group(['1', '.', 'e2']) == ['1.e2'] + assert group(['.', '3', 'e', '5', '5']) == ['.3e55'] From db7ab911d026168b071037c57b750a1aa27ac0f1 Mon Sep 17 00:00:00 2001 From: Pierre Penninckx Date: Fri, 26 Dec 2014 16:38:09 +0100 Subject: [PATCH 03/10] [fix] handle complex number with negative exponent refs #29 --- baron/grouper.py | 6 +++--- tests/test_grouper.py | 2 ++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/baron/grouper.py b/baron/grouper.py index 3fb1c7ec..2876ba79 100644 --- a/baron/grouper.py +++ b/baron/grouper.py @@ -78,11 +78,11 @@ def group_generator(sequence): if re.match(r'^\s+$', str(iterator.show_next())): current += next(iterator) - if (re.match(r'^\d+$', current) and iterator.show_next() and iterator.show_next() == ".") or\ - (current == "." and iterator.show_next() and re.match(r'^\d+[jJ]?$', iterator.show_next())): + if (re.match(r'^\d+$', current) and iterator.show_next() and iterator.show_next() in [".", "e", "E"]) or\ + (current == "." and iterator.show_next() and re.match(r'^\d+(-\d+)?[jJeE]?$', iterator.show_next())): current += next(iterator) - while iterator.show_next() and re.match(r'^\d*[jJeE]?$', iterator.show_next()) and re.match(r'^\d*[jJeE]?$', iterator.show_next()).group(): + while iterator.show_next() and re.match(r'^-?\d*[jJeE]?$', iterator.show_next()) and re.match(r'^-?\d*[jJeE]?$', iterator.show_next()).group(): current += next(iterator) if re.match(r'^\d+\.$', current) and iterator.show_next() and re.match(r'^\d*[eE]\d*$', iterator.show_next()): diff --git a/tests/test_grouper.py b/tests/test_grouper.py index fe8b577d..313d7280 100644 --- a/tests/test_grouper.py +++ b/tests/test_grouper.py @@ -183,6 +183,8 @@ def test_complex(): assert group(['1', '.', '1j']) == ['1.1j'] assert group(['1', '.', '1J']) == ['1.1J'] assert group(['1J']) == ['1J'] + assert group(['1', 'e', '-', '1', 'j']) == ['1e-1j'] + assert group(['1', 'e', '-1', 'j']) == ['1e-1j'] def test_float_exponant(): From 789be67a989b61e00ea5d392b7d57329f4940195 Mon Sep 17 00:00:00 2001 From: Pierre Penninckx Date: Fri, 26 Dec 2014 16:54:17 +0100 Subject: [PATCH 04/10] [mod] refactor matches using function --- baron/grouper.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/baron/grouper.py b/baron/grouper.py index 2876ba79..0210425c 100644 --- a/baron/grouper.py +++ b/baron/grouper.py @@ -35,6 +35,10 @@ def group(sequence): return list(group_generator(sequence)) +def match_on_next(regex, iterator): + return iterator.show_next() and re.match(regex, iterator.show_next()) + + def group_generator(sequence): iterator = FlexibleIterator(sequence) current = None @@ -78,20 +82,20 @@ def group_generator(sequence): if re.match(r'^\s+$', str(iterator.show_next())): current += next(iterator) - if (re.match(r'^\d+$', current) and iterator.show_next() and iterator.show_next() in [".", "e", "E"]) or\ - (current == "." and iterator.show_next() and re.match(r'^\d+(-\d+)?[jJeE]?$', iterator.show_next())): + if (re.match(r'^\d+$', current) and match_on_next(r'^[.eE]$', iterator)) or\ + (current == "." and match_on_next(r'^\d+(-\d+)?[jJeE]?$', iterator)): current += next(iterator) - while iterator.show_next() and re.match(r'^-?\d*[jJeE]?$', iterator.show_next()) and re.match(r'^-?\d*[jJeE]?$', iterator.show_next()).group(): + while match_on_next(r'^-?\d*[jJeE]?$', iterator) and match_on_next(r'^-?\d*[jJeE]?$', iterator).group(): current += next(iterator) - if re.match(r'^\d+\.$', current) and iterator.show_next() and re.match(r'^\d*[eE]\d*$', iterator.show_next()): + if re.match(r'^\d+\.$', current) and match_on_next(r'^\d*[eE]\d*$', iterator): current += next(iterator) - if re.match(r'^\d+\.?[eE]$', current) and iterator.show_next() and re.match(r'^\d+$', iterator.show_next()): + if re.match(r'^\d+\.?[eE]$', current) and match_on_next(r'^\d+$', iterator): current += next(iterator) - if re.match(r'^\d+\.?\d*[eE]$', current) and iterator.show_next() and iterator.show_next() in "-+" and re.match(r'^\d+$', iterator.show_next(2) if iterator.show_next(2) else ""): + if re.match(r'^\d+\.?\d*[eE]$', current) and match_on_next(r'^[-+]$', iterator) and iterator.show_next(2) and re.match(r'^\d+$', iterator.show_next(2)): current += next(iterator) current += next(iterator) From d012f0d6a4c879655ef0a5e6d4cdb87e92a101ca Mon Sep 17 00:00:00 2001 From: Pierre Penninckx Date: Fri, 26 Dec 2014 19:23:24 +0100 Subject: [PATCH 05/10] [mod] updates tests to match splitter's output --- baron/grouper.py | 8 ++++---- tests/test_grouper.py | 6 +++--- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/baron/grouper.py b/baron/grouper.py index 0210425c..1c2acc4f 100644 --- a/baron/grouper.py +++ b/baron/grouper.py @@ -61,7 +61,7 @@ def group_generator(sequence): # It's required in a case where I have something like that: # ['123.123e', '[+-]', '123'] - assert re.match(r'^\d+[eE][-+]?\d+$', current) or re.match(r'^\d*.\d*[eE][-+]?\d+$', current) + assert re.match(r'^\d+[eE][-+]?\d+[jJ]?$', current) or re.match(r'^\d*.\d*[eE][-+]?\d+[jJ]?$', current) if current == "\\" and iterator.show_next() in ('\n', '\r\n'): current += next(iterator) @@ -82,11 +82,11 @@ def group_generator(sequence): if re.match(r'^\s+$', str(iterator.show_next())): current += next(iterator) - if (re.match(r'^\d+$', current) and match_on_next(r'^[.eE]$', iterator)) or\ - (current == "." and match_on_next(r'^\d+(-\d+)?[jJeE]?$', iterator)): + if (re.match(r'^\d+$', current) and match_on_next(r'^\.$', iterator)) or\ + (current == "." and match_on_next(r'^\d+([jJ]|[eE]\d+)?$', iterator)): current += next(iterator) - while match_on_next(r'^-?\d*[jJeE]?$', iterator) and match_on_next(r'^-?\d*[jJeE]?$', iterator).group(): + if match_on_next(r'^\d*[jJ]?$', iterator) and match_on_next(r'^\d*[jJ]?$', iterator).group(): current += next(iterator) if re.match(r'^\d+\.$', current) and match_on_next(r'^\d*[eE]\d*$', iterator): diff --git a/tests/test_grouper.py b/tests/test_grouper.py index 313d7280..cfdcac4e 100644 --- a/tests/test_grouper.py +++ b/tests/test_grouper.py @@ -183,8 +183,7 @@ def test_complex(): assert group(['1', '.', '1j']) == ['1.1j'] assert group(['1', '.', '1J']) == ['1.1J'] assert group(['1J']) == ['1J'] - assert group(['1', 'e', '-', '1', 'j']) == ['1e-1j'] - assert group(['1', 'e', '-1', 'j']) == ['1e-1j'] + assert group(['1e', '-', '1j']) == ['1e-1j'] def test_float_exponant(): @@ -202,4 +201,5 @@ def test_float_exponant(): assert group(['1', '.', 'e', '-', '2']) == ['1.e-2'] assert group(['1', '.', 'e', '2']) == ['1.e2'] assert group(['1', '.', 'e2']) == ['1.e2'] - assert group(['.', '3', 'e', '5', '5']) == ['.3e55'] + assert group(['.', '3e55']) == ['.3e55'] + From dd2496cf653a9c0c6342d107bd269d7a04339945 Mon Sep 17 00:00:00 2001 From: Pierre Penninckx Date: Sat, 27 Dec 2014 01:07:51 +0100 Subject: [PATCH 06/10] [mod] update tests to check splitter output --- tests/test_grouper.py | 163 ++++++++++++++++++++++-------------------- 1 file changed, 84 insertions(+), 79 deletions(-) diff --git a/tests/test_grouper.py b/tests/test_grouper.py index cfdcac4e..e34e2f04 100644 --- a/tests/test_grouper.py +++ b/tests/test_grouper.py @@ -2,14 +2,21 @@ # -*- coding:Utf-8 -*- from baron.grouper import group +from baron.spliter import split +import pytest + + +def grouper_test(input, split_output, group_output): + assert split(input) == split_output + assert group(split_output) == group_output def test_empty(): - assert group([]) == [] + grouper_test("", [], []) def test_one(): - assert group(['a']) == ['a'] + grouper_test('a', ['a'], ['a']) def test_random(): @@ -17,189 +24,187 @@ def test_random(): def test_add_egual(): - assert group(["+", "="]) == ["+="] + grouper_test("+=", ["+", "="], ["+="]) def test_add_add(): - assert group(["+", "+"]) == ["+", "+"] + grouper_test("++", ["+", "+"], ["+", "+"]) def test_add_egual_double(): - assert group(["+", "=", "+", "="]) == ["+=", "+="] + grouper_test("+=+=", ["+", "=", "+", "="], ["+=", "+="]) def test_add_egual_random(): - assert group(list(" qsd += qsd")) == [" ", "q", "s", "d", " ", "+=", " ", "q", "s", "d"] + grouper_test(" qsd+=qsd", [' ', 'qsd', '+', '=', 'qsd'], [' ', 'qsd', '+=', 'qsd']) def test_minus_egual(): - assert group(["-", "="]) == ["-="] + grouper_test("-=", ["-", "="], ["-="]) def test_mult_egual(): - assert group(["*", "="]) == ["*="] + grouper_test("*=", ["*", "="], ["*="]) def test_div_egual(): - assert group(["/", "="]) == ["/="] + grouper_test("/=", ["/", "="], ["/="]) def test_modulo_egual(): - assert group(["%", "="]) == ["%="] + grouper_test("%=", ["%", "="], ["%="]) def test_amper_egual(): - assert group(["&", "="]) == ["&="] + grouper_test("&=", ["&", "="], ["&="]) def test_bar_egual(): - assert group(["|", "="]) == ["|="] + grouper_test("|=", ["|", "="], ["|="]) def test_power_egual(): - assert group(["^", "="]) == ["^="] + grouper_test("^=", ["^", "="], ["^="]) def test_less_less(): - assert group(["<", "<"]) == ["<<"] + grouper_test("<<", ["<", "<"], ["<<"]) def test_more_more(): - assert group([">", ">"]) == [">>"] + grouper_test(">>", [">", ">"], [">>"]) def test_egual_egual(): - assert group(["=", "="]) == ["=="] + grouper_test("==", ["=", "="], ["=="]) def test_different(): - assert group(["!", "="]) == ["!="] + grouper_test("!=", ["!", "="], ["!="]) def test_inferior_egual(): - assert group([">", "="]) == [">="] + grouper_test(">=", [">", "="], [">="]) def test_superior_egual(): - assert group(["<", "="]) == ["<="] + grouper_test("<=", ["<", "="], ["<="]) def test_different_old_style(): - assert group(["<", ">"]) == ["<>"] + grouper_test("<>", ["<", ">"], ["<>"]) def test_power_power_egual(): - assert group(["*", "*", "="]) == ["**="] + grouper_test("**=", ["*", "*", "="], ["**="]) def test_div_div_egual(): - assert group(["/", "/", "="]) == ["//="] + grouper_test("//=", ["/", "/", "="], ["//="]) def test_less_less_egual(): - assert group(["<", "<", "="]) == ["<<="] + grouper_test("<<=", ["<", "<", "="], ["<<="]) def test_more_more_egual(): - assert group([">", ">", "="]) == [">>="] + grouper_test(">>=", [">", ">", "="], [">>="]) def test_decorator(): - assert group(["@", "pouet"]) == ["@", "pouet"] + grouper_test("@pouet", ["@", "pouet"], ["@", "pouet"]) def test_endl(): - assert group(["\r", "\n"]) == ["\r\n"] + grouper_test("\r\n", ["\r", "\n"], ["\r\n"]) def test_raw_string(): - assert group(["r", "'pouet'"]) == ["r'pouet'"] - assert group(["R", "'pouet'"]) == ["R'pouet'"] + grouper_test("r'pouet'", ["r", "'pouet'"], ["r'pouet'"]) + grouper_test("R'pouet'", ["R", "'pouet'"], ["R'pouet'"]) def test_unicode_string(): - assert group(["u", "'pouet'"]) == ["u'pouet'"] - assert group(["U", "'pouet'"]) == ["U'pouet'"] + grouper_test("u'pouet'", ["u", "'pouet'"], ["u'pouet'"]) + grouper_test("U'pouet'", ["U", "'pouet'"], ["U'pouet'"]) def test_binary_string(): - assert group(["b", "'pouet'"]) == ["b'pouet'"] - assert group(["B", "'pouet'"]) == ["B'pouet'"] + grouper_test("b'pouet'", ["b", "'pouet'"], ["b'pouet'"]) + grouper_test("B'pouet'", ["B", "'pouet'"], ["B'pouet'"]) def test_binary_raw_string(): - assert group(["br", "'pouet'"]) == ["br'pouet'"] - assert group(["Br", "'pouet'"]) == ["Br'pouet'"] - assert group(["bR", "'pouet'"]) == ["bR'pouet'"] - assert group(["BR", "'pouet'"]) == ["BR'pouet'"] + grouper_test("br'pouet'", ["br", "'pouet'"], ["br'pouet'"]) + grouper_test("Br'pouet'", ["Br", "'pouet'"], ["Br'pouet'"]) + grouper_test("bR'pouet'", ["bR", "'pouet'"], ["bR'pouet'"]) + grouper_test("BR'pouet'", ["BR", "'pouet'"], ["BR'pouet'"]) def test_unicode_raw_string(): - assert group(["ur", "'pouet'"]) == ["ur'pouet'"] - assert group(["Ur", "'pouet'"]) == ["Ur'pouet'"] - assert group(["uR", "'pouet'"]) == ["uR'pouet'"] - assert group(["UR", "'pouet'"]) == ["UR'pouet'"] + grouper_test("ur'pouet'", ["ur", "'pouet'"], ["ur'pouet'"]) + grouper_test("Ur'pouet'", ["Ur", "'pouet'"], ["Ur'pouet'"]) + grouper_test("uR'pouet'", ["uR", "'pouet'"], ["uR'pouet'"]) + grouper_test("UR'pouet'", ["UR", "'pouet'"], ["UR'pouet'"]) def test_exponant(): - assert group(['1e', '+', '123']) == ['1e+123'] - assert group(['1e', '-', '123']) == ['1e-123'] - assert group(['1.1e', '+', '123']) == ['1.1e+123'] - assert group(['1.1e', '-', '123']) == ['1.1e-123'] - assert group(['.1e', '+', '123']) == ['.1e+123'] - assert group(['.1e', '-', '123']) == ['.1e-123'] + grouper_test("1e+123", ['1e', '+', '123'], ['1e+123']) + grouper_test("1e-123", ['1e', '-', '123'], ['1e-123']) + grouper_test("1.1e+123", ['1', '.', '1e', '+', '123'], ['1.1e+123']) + grouper_test("1.1e-123", ['1', '.', '1e', '-', '123'], ['1.1e-123']) + grouper_test(".1e+123", ['.', '1e', '+', '123'], ['.1e+123']) + grouper_test(".1e-123", ['.', '1e', '-', '123'], ['.1e-123']) def test_endl_with_backslash(): - assert group(['\\', '\n']) == ['\\\n'] + grouper_test("\\\n", ['\\', '\n'], ['\\\n']) def test_space_endl_with_backslash(): - assert group([' ', '\\', '\n', ' ']) == [' \\\n '] - assert group([' ', '\\', '\n', 'pouet']) == [' \\\n', 'pouet'] + grouper_test(" \\\n ", [' ', '\\', '\n', ' '], [' \\\n ']) + grouper_test(" \\\npouet", [' ', '\\', '\n', 'pouet'], [' \\\n', 'pouet']) def test_regression(): - assert group(['0x045e', ':', ' ']) == ['0x045e', ':', ' '] - assert group(['180', '.', '\n']) == ['180.', '\n'] + grouper_test("0x045e: ", ['0x045e', ':', ' '], ['0x045e', ':', ' ']) + grouper_test("180.\n", ['180', '.', '\n'], ['180.', '\n']) def test_backslash_window_endl(): - assert group(['\\', '\r', '\n']) == ['\\\r\n'] + grouper_test("\\\r\n", ['\\', '\r', '\n'], ['\\\r\n']) def test_regression_float(): - assert group(['1', '.']) == ['1.'] - assert group(['.', '1']) == ['.1'] - assert group(['1', '.', '1']) == ['1.1'] - assert group(['7', '.', '629e', '-', '6']) == ['7.629e-6'] + grouper_test('1.', ['1', '.'], ['1.']) + grouper_test('.1', ['.', '1'], ['.1']) + grouper_test('1.1', ['1', '.', '1'], ['1.1']) + grouper_test('7.629e-6', ['7', '.', '629e', '-', '6'], ['7.629e-6']) def test_complex(): - assert group(['.', '1j']) == ['.1j'] - assert group(['.', '1J']) == ['.1J'] - assert group(['1', '.', 'j']) == ['1.j'] - assert group(['1', '.', 'J']) == ['1.J'] - assert group(['1', '.', '1j']) == ['1.1j'] - assert group(['1', '.', '1J']) == ['1.1J'] - assert group(['1J']) == ['1J'] - assert group(['1e', '-', '1j']) == ['1e-1j'] + grouper_test(".1j", ['.', '1j'], ['.1j']) + grouper_test(".1J", ['.', '1J'], ['.1J']) + grouper_test("1.j", ['1', '.', 'j'], ['1.j']) + grouper_test("1.J", ['1', '.', 'J'], ['1.J']) + grouper_test("1.1j", ['1', '.', '1j'], ['1.1j']) + grouper_test("1.1J", ['1', '.', '1J'], ['1.1J']) + grouper_test("1J", ['1J'], ['1J']) + grouper_test("1e-1j", ['1e', '-', '1j'], ['1e-1j']) def test_float_exponant(): - assert group(['1E1']) == ['1E1'] - assert group(['1E', '-', '2']) == ['1E-2'] - assert group(['1E', '+', '2']) == ['1E+2'] - assert group(['1', '.', 'E', '+', '2']) == ['1.E+2'] - assert group(['1', '.', 'E', '-', '2']) == ['1.E-2'] - assert group(['1', '.', 'E', '2']) == ['1.E2'] - assert group(['1', '.', 'E2']) == ['1.E2'] - assert group(['1e1']) == ['1e1'] - assert group(['1e', '-', '2']) == ['1e-2'] - assert group(['1e', '+', '2']) == ['1e+2'] - assert group(['1', '.', 'e', '+', '2']) == ['1.e+2'] - assert group(['1', '.', 'e', '-', '2']) == ['1.e-2'] - assert group(['1', '.', 'e', '2']) == ['1.e2'] - assert group(['1', '.', 'e2']) == ['1.e2'] - assert group(['.', '3e55']) == ['.3e55'] + grouper_test("1E1", ['1E1'], ['1E1']) + grouper_test("1E-2", ['1E', '-', '2'], ['1E-2']) + grouper_test("1E+2", ['1E', '+', '2'], ['1E+2']) + grouper_test("1.E+2", ['1', '.', 'E', '+', '2'], ['1.E+2']) + grouper_test("1.E-2", ['1', '.', 'E', '-', '2'], ['1.E-2']) + grouper_test("1.E2", ['1', '.', 'E2'], ['1.E2']) + grouper_test("1e1", ['1e1'], ['1e1']) + grouper_test("1e-2", ['1e', '-', '2'], ['1e-2']) + grouper_test("1e+2", ['1e', '+', '2'], ['1e+2']) + grouper_test("1.e+2", ['1', '.', 'e', '+', '2'], ['1.e+2']) + grouper_test("1.e-2", ['1', '.', 'e', '-', '2'], ['1.e-2']) + grouper_test("1.e2", ['1', '.', 'e2'], ['1.e2']) + grouper_test(".3e55", ['.', '3e55'], ['.3e55']) From 22c6e72ac96ec4a3b4bfe04b162a85712a53421d Mon Sep 17 00:00:00 2001 From: Pierre Penninckx Date: Sat, 27 Dec 2014 01:08:33 +0100 Subject: [PATCH 07/10] [fix] handle exponent beginning with dot --- baron/grouper.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/baron/grouper.py b/baron/grouper.py index 1c2acc4f..5c146a80 100644 --- a/baron/grouper.py +++ b/baron/grouper.py @@ -83,7 +83,7 @@ def group_generator(sequence): current += next(iterator) if (re.match(r'^\d+$', current) and match_on_next(r'^\.$', iterator)) or\ - (current == "." and match_on_next(r'^\d+([jJ]|[eE]\d+)?$', iterator)): + (current == "." and match_on_next(r'^\d+([jJ]|[eE]\d*)?$', iterator)): current += next(iterator) if match_on_next(r'^\d*[jJ]?$', iterator) and match_on_next(r'^\d*[jJ]?$', iterator).group(): @@ -95,7 +95,7 @@ def group_generator(sequence): if re.match(r'^\d+\.?[eE]$', current) and match_on_next(r'^\d+$', iterator): current += next(iterator) - if re.match(r'^\d+\.?\d*[eE]$', current) and match_on_next(r'^[-+]$', iterator) and iterator.show_next(2) and re.match(r'^\d+$', iterator.show_next(2)): + if re.match(r'^\d*\.?\d*[eE]$', current) and match_on_next(r'^[-+]$', iterator) and iterator.show_next(2) and re.match(r'^\d+$', iterator.show_next(2)): current += next(iterator) current += next(iterator) From 485d0513a6a6940f66cc36202a79c65fea2762db Mon Sep 17 00:00:00 2001 From: Laurent Peuch Date: Sat, 10 Jan 2015 23:00:01 +0100 Subject: [PATCH 08/10] [enh] new token: FLOAT_EXPONANT_COMPLEX --- baron/tokenizer.py | 3 ++ tests/test_tokenizer.py | 87 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+) diff --git a/baron/tokenizer.py b/baron/tokenizer.py index 11e99ce3..b8c2325a 100644 --- a/baron/tokenizer.py +++ b/baron/tokenizer.py @@ -9,6 +9,9 @@ class UnknowItem(Exception): TOKENS = ( (r'[a-zA-Z_]\w*', 'NAME'), (r'0', 'INT'), + (r'[-+]?\d+[eE][-+]?\d+[jJ]', 'FLOAT_EXPONANT_COMPLEX'), + (r'[-+]?\d+.\d?[eE][-+]?\d+[jJ]', 'FLOAT_EXPONANT_COMPLEX'), + (r'[-+]?\d?.\d+[eE][-+]?\d+[jJ]', 'FLOAT_EXPONANT_COMPLEX'), (r'\d+[eE][-+]?\d*', 'FLOAT_EXPONANT'), (r'\d+\.\d*[eE][-+]?\d*', 'FLOAT_EXPONANT'), (r'\.\d+[eE][-+]?\d*', 'FLOAT_EXPONANT'), diff --git a/tests/test_tokenizer.py b/tests/test_tokenizer.py index 06aebd1d..c37edc83 100644 --- a/tests/test_tokenizer.py +++ b/tests/test_tokenizer.py @@ -395,4 +395,91 @@ def test_float_advanced(): match(".1", "FLOAT") match("1.1", "FLOAT") + +def test_exponant_complex(): + match("1e1j", "FLOAT_EXPONANT_COMPLEX") + match("1.e1j", "FLOAT_EXPONANT_COMPLEX") + match("1.1e1j", "FLOAT_EXPONANT_COMPLEX") + match(".1e1j", "FLOAT_EXPONANT_COMPLEX") + match("-1.e1j", "FLOAT_EXPONANT_COMPLEX") + match("-1.1e1j", "FLOAT_EXPONANT_COMPLEX") + match("-.1e1j", "FLOAT_EXPONANT_COMPLEX") + match("1e-1j", "FLOAT_EXPONANT_COMPLEX") + match("1.e-1j", "FLOAT_EXPONANT_COMPLEX") + match("1.1e-1j", "FLOAT_EXPONANT_COMPLEX") + match(".1e-1j", "FLOAT_EXPONANT_COMPLEX") + match("-1.e-1j", "FLOAT_EXPONANT_COMPLEX") + match("-1.1e-1j", "FLOAT_EXPONANT_COMPLEX") + match("-.1e-1j", "FLOAT_EXPONANT_COMPLEX") + match("1e+1j", "FLOAT_EXPONANT_COMPLEX") + match("1.e+1j", "FLOAT_EXPONANT_COMPLEX") + match("1.1e+1j", "FLOAT_EXPONANT_COMPLEX") + match(".1e+1j", "FLOAT_EXPONANT_COMPLEX") + match("-1.e+1j", "FLOAT_EXPONANT_COMPLEX") + match("-1.1e+1j", "FLOAT_EXPONANT_COMPLEX") + match("-.1e+1j", "FLOAT_EXPONANT_COMPLEX") + match("1e1J", "FLOAT_EXPONANT_COMPLEX") + match("1.e1J", "FLOAT_EXPONANT_COMPLEX") + match("1.1e1J", "FLOAT_EXPONANT_COMPLEX") + match(".1e1J", "FLOAT_EXPONANT_COMPLEX") + match("-1.e1J", "FLOAT_EXPONANT_COMPLEX") + match("-1.1e1J", "FLOAT_EXPONANT_COMPLEX") + match("-.1e1J", "FLOAT_EXPONANT_COMPLEX") + match("1e-1J", "FLOAT_EXPONANT_COMPLEX") + match("1.e-1J", "FLOAT_EXPONANT_COMPLEX") + match("1.1e-1J", "FLOAT_EXPONANT_COMPLEX") + match(".1e-1J", "FLOAT_EXPONANT_COMPLEX") + match("-1.e-1J", "FLOAT_EXPONANT_COMPLEX") + match("-1.1e-1J", "FLOAT_EXPONANT_COMPLEX") + match("-.1e-1J", "FLOAT_EXPONANT_COMPLEX") + match("1e+1J", "FLOAT_EXPONANT_COMPLEX") + match("1.e+1J", "FLOAT_EXPONANT_COMPLEX") + match("1.1e+1J", "FLOAT_EXPONANT_COMPLEX") + match(".1e+1J", "FLOAT_EXPONANT_COMPLEX") + match("-1.e+1J", "FLOAT_EXPONANT_COMPLEX") + match("-1.1e+1J", "FLOAT_EXPONANT_COMPLEX") + match("-.1e+1J", "FLOAT_EXPONANT_COMPLEX") + match("1e1J", "FLOAT_EXPONANT_COMPLEX") + match("1.e1J", "FLOAT_EXPONANT_COMPLEX") + match("1.1e1J", "FLOAT_EXPONANT_COMPLEX") + match(".1e1J", "FLOAT_EXPONANT_COMPLEX") + match("-1.e1J", "FLOAT_EXPONANT_COMPLEX") + match("-1.1e1J", "FLOAT_EXPONANT_COMPLEX") + match("-.1e1J", "FLOAT_EXPONANT_COMPLEX") + match("1e-1J", "FLOAT_EXPONANT_COMPLEX") + match("1.e-1J", "FLOAT_EXPONANT_COMPLEX") + match("1.1e-1J", "FLOAT_EXPONANT_COMPLEX") + match(".1e-1J", "FLOAT_EXPONANT_COMPLEX") + match("-1.e-1J", "FLOAT_EXPONANT_COMPLEX") + match("-1.1e-1J", "FLOAT_EXPONANT_COMPLEX") + match("-.1e-1J", "FLOAT_EXPONANT_COMPLEX") + match("1e+1J", "FLOAT_EXPONANT_COMPLEX") + match("1.e+1J", "FLOAT_EXPONANT_COMPLEX") + match("1.1e+1J", "FLOAT_EXPONANT_COMPLEX") + match(".1e+1J", "FLOAT_EXPONANT_COMPLEX") + match("-1.e+1J", "FLOAT_EXPONANT_COMPLEX") + match("-1.1e+1J", "FLOAT_EXPONANT_COMPLEX") + match("-.1e+1J", "FLOAT_EXPONANT_COMPLEX") + match("1E1J", "FLOAT_EXPONANT_COMPLEX") + match("1.E1J", "FLOAT_EXPONANT_COMPLEX") + match("1.1E1J", "FLOAT_EXPONANT_COMPLEX") + match(".1E1J", "FLOAT_EXPONANT_COMPLEX") + match("-1.E1J", "FLOAT_EXPONANT_COMPLEX") + match("-1.1E1J", "FLOAT_EXPONANT_COMPLEX") + match("-.1E1J", "FLOAT_EXPONANT_COMPLEX") + match("1E-1J", "FLOAT_EXPONANT_COMPLEX") + match("1.E-1J", "FLOAT_EXPONANT_COMPLEX") + match("1.1E-1J", "FLOAT_EXPONANT_COMPLEX") + match(".1E-1J", "FLOAT_EXPONANT_COMPLEX") + match("-1.E-1J", "FLOAT_EXPONANT_COMPLEX") + match("-1.1E-1J", "FLOAT_EXPONANT_COMPLEX") + match("-.1E-1J", "FLOAT_EXPONANT_COMPLEX") + match("1E+1J", "FLOAT_EXPONANT_COMPLEX") + match("1.E+1J", "FLOAT_EXPONANT_COMPLEX") + match("1.1E+1J", "FLOAT_EXPONANT_COMPLEX") + match(".1E+1J", "FLOAT_EXPONANT_COMPLEX") + match("-1.E+1J", "FLOAT_EXPONANT_COMPLEX") + match("-1.1E+1J", "FLOAT_EXPONANT_COMPLEX") + match("-.1E+1J", "FLOAT_EXPONANT_COMPLEX") + # TODO 1.1e1j From e4989f3f1f0842de7ab82093f0faff60adf02c7a Mon Sep 17 00:00:00 2001 From: Laurent Peuch Date: Sat, 10 Jan 2015 23:15:05 +0100 Subject: [PATCH 09/10] [enh] take new token into account for parser --- baron/grammator.py | 1 + 1 file changed, 1 insertion(+) diff --git a/baron/grammator.py b/baron/grammator.py index 78edd0cd..0fdacd43 100644 --- a/baron/grammator.py +++ b/baron/grammator.py @@ -619,6 +619,7 @@ def testlist1(pack): @pg.production("atom : BINARY") @pg.production("atom : FLOAT") @pg.production("atom : FLOAT_EXPONANT") + @pg.production("atom : FLOAT_EXPONANT_COMPLEX") @pg.production("atom : COMPLEX") def int(pack): (int_,) = pack From 80aeb57081eaa2e1cc8e7fa2fa4658dae5aa3bd4 Mon Sep 17 00:00:00 2001 From: Laurent Peuch Date: Sat, 10 Jan 2015 23:36:55 +0100 Subject: [PATCH 10/10] [doc] add some comments in grouper --- baron/grouper.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/baron/grouper.py b/baron/grouper.py index 5c146a80..0840e2a5 100644 --- a/baron/grouper.py +++ b/baron/grouper.py @@ -47,14 +47,22 @@ def group_generator(sequence): return current = next(iterator) + + # classical grouping using to_group if current in to_group_keys and matching_found(to_group, current, iterator.show_next()): current += next(iterator) if current in to_group_keys and matching_found(to_group, current, iterator.show_next()): current += next(iterator) + + # unicode/raw/binary string notation if current in list('uUrRbB') and str(iterator.show_next()).startswith(('"', "'")): current += next(iterator) + + # in case of unicode_raw or binary_raw string notation if str(current).lower() in ["ur", "br"] and str(iterator.show_next()).startswith(('"', "'")): current += next(iterator) + + # float exponant notation if any([re.match(x, current) for x in (r'^\d+[eE]$', r'^\d+\.\d*[eE]$', r'^\.\d+[eE]$')]): current += next(iterator) current += next(iterator) @@ -63,17 +71,20 @@ def group_generator(sequence): # ['123.123e', '[+-]', '123'] assert re.match(r'^\d+[eE][-+]?\d+[jJ]?$', current) or re.match(r'^\d*.\d*[eE][-+]?\d+[jJ]?$', current) + # escaped endl if current == "\\" and iterator.show_next() in ('\n', '\r\n'): current += next(iterator) if re.match(r'^\s+$', str(iterator.show_next())): current += next(iterator) + # escaped endl in window notation if current == "\\" and iterator.show_next() == "\r" and iterator.show_next(2) == "\n": current += next(iterator) current += next(iterator) if re.match(r'^\s+$', str(iterator.show_next())): current += next(iterator) + # space before escaped endl if re.match(r'^\s+$', current) and iterator.show_next() == "\\": current += next(iterator) current += next(iterator) @@ -82,6 +93,7 @@ def group_generator(sequence): if re.match(r'^\s+$', str(iterator.show_next())): current += next(iterator) + # complex number notation if (re.match(r'^\d+$', current) and match_on_next(r'^\.$', iterator)) or\ (current == "." and match_on_next(r'^\d+([jJ]|[eE]\d*)?$', iterator)): current += next(iterator)