diff --git a/pyproject.toml b/pyproject.toml index d13e92d..4e7d8f3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,7 +27,7 @@ dynamic = ["version"] Homepage = "https://github.com/astanin/python-tabulate" [project.optional-dependencies] -widechars = ["wcwidth"] +widechars = ["wcwidth>=0.5.1"] [project.scripts] tabulate = "tabulate:_main" diff --git a/tabulate/__init__.py b/tabulate/__init__.py index e100c09..57e17d2 100644 --- a/tabulate/__init__.py +++ b/tabulate/__init__.py @@ -1116,6 +1116,9 @@ def _visible_width(s): # optional wide-character support if wcwidth is not None and WIDE_CHARS_MODE: len_fn = wcwidth.wcswidth + if hasattr(wcwidth, "width"): + # wcwidth >=0.3.0 handles ansi + return wcwidth.width(s) else: len_fn = len if isinstance(s, (str, bytes)): @@ -1588,9 +1591,11 @@ def _normalize_tabular_data(tabular_data, headers, showindex="default"): if headers == "keys": headers = field_names rows = [ - [getattr(row, f) for f in field_names] - if not _is_separating_line(row) - else row + ( + [getattr(row, f) for f in field_names] + if not _is_separating_line(row) + else row + ) for row in rows ] @@ -1638,7 +1643,13 @@ def _normalize_tabular_data(tabular_data, headers, showindex="default"): return rows, headers, headers_pad -def _wrap_text_to_colwidths(list_of_lists, colwidths, numparses=True, break_long_words=_BREAK_LONG_WORDS, break_on_hyphens=_BREAK_ON_HYPHENS): +def _wrap_text_to_colwidths( + list_of_lists, + colwidths, + numparses=True, + break_long_words=_BREAK_LONG_WORDS, + break_on_hyphens=_BREAK_ON_HYPHENS, +): if len(list_of_lists): num_cols = len(list_of_lists[0]) else: @@ -1655,10 +1666,15 @@ def _wrap_text_to_colwidths(list_of_lists, colwidths, numparses=True, break_long continue if width is not None: - wrapper = _CustomTextWrap(width=width, break_long_words=break_long_words, break_on_hyphens=break_on_hyphens) + wrapper_wrap = partial( + _wrap_text, + width=width, + break_long_words=break_long_words, + break_on_hyphens=break_on_hyphens, + ) casted_cell = str(cell) wrapped = [ - "\n".join(wrapper.wrap(line)) + "\n".join(wrapper_wrap(line)) for line in casted_cell.splitlines() if line.strip() != "" ] @@ -2258,7 +2274,11 @@ def tabulate( numparses = _expand_numparse(disable_numparse, num_cols) list_of_lists = _wrap_text_to_colwidths( - list_of_lists, maxcolwidths, numparses=numparses, break_long_words=break_long_words, break_on_hyphens=break_on_hyphens + list_of_lists, + maxcolwidths, + numparses=numparses, + break_long_words=break_long_words, + break_on_hyphens=break_on_hyphens, ) if maxheadercolwidths is not None: @@ -2272,7 +2292,11 @@ def tabulate( numparses = _expand_numparse(disable_numparse, num_cols) headers = _wrap_text_to_colwidths( - [headers], maxheadercolwidths, numparses=numparses, break_long_words=break_long_words, break_on_hyphens=break_on_hyphens + [headers], + maxheadercolwidths, + numparses=numparses, + break_long_words=break_long_words, + break_on_hyphens=break_on_hyphens, )[0] # empty values in the first column of RST tables should be escaped (issue #82) @@ -2672,6 +2696,26 @@ def _format_table( return "" +def _wrap_text(text, width, break_long_words=True, break_on_hyphens=True): + """Wrap text to width with wide character and ANSI code support.""" + # wcwidth >= 0.5.0 has wrap() with proper grapheme cluster support and + # propagate_sgr=True by default, which handles ANSI code propagation natively. + if wcwidth is not None and hasattr(wcwidth, "wrap"): + return wcwidth.wrap( + text, + width, + break_long_words=break_long_words, + break_on_hyphens=break_on_hyphens, + ) + else: + # Fallback for wcwidth < 0.3.0 or no wcwidth + return _CustomTextWrap( + width=width, + break_long_words=break_long_words, + break_on_hyphens=break_on_hyphens, + ).wrap(text) + + class _CustomTextWrap(textwrap.TextWrapper): """A custom implementation of CPython's textwrap.TextWrapper. This supports both wide characters (Korea, Japanese, Chinese) - including mixed string. @@ -2740,11 +2784,13 @@ def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width): # of the next chunk onto the current line as will fit. if self.break_long_words: # Tabulate Custom: Build the string up piece-by-piece in order to - # take each charcter's width into account + # take each character's width into account chunk = reversed_chunks[-1] i = 1 # Only count printable characters, so strip_ansi first, index later. - while len(_strip_ansi(chunk)[:i]) <= space_left: + # Use self._len() instead of len() to account for displayed width, eg. + # wide chars like CJK count as 2 when using wcwidth<0.3.0 without wrap() + while self._len(_strip_ansi(chunk)[:i]) <= space_left: i = i + 1 # Consider escape codes when breaking words up total_escape_len = 0 diff --git a/test/test_api.py b/test/test_api.py index f35d09a..8382262 100644 --- a/test/test_api.py +++ b/test/test_api.py @@ -1,11 +1,8 @@ -"""API properties. - -""" +"""API properties.""" from tabulate import tabulate, tabulate_formats, simple_separated_format from common import skip - try: from inspect import signature, _empty except ImportError: diff --git a/test/test_cli.py b/test/test_cli.py index e71572d..d558adc 100644 --- a/test/test_cli.py +++ b/test/test_cli.py @@ -1,6 +1,4 @@ -"""Command-line interface. - -""" +"""Command-line interface.""" import os import sys @@ -12,7 +10,6 @@ from common import assert_equal - SAMPLE_SIMPLE_FORMAT = "\n".join( [ "----- ------ -------------", diff --git a/test/test_grapheme_clusters.py b/test/test_grapheme_clusters.py new file mode 100644 index 0000000..db6474c --- /dev/null +++ b/test/test_grapheme_clusters.py @@ -0,0 +1,243 @@ +"""Tests for Unicode grapheme cluster handling in tabulate.""" + +import pytest + +from tabulate import tabulate + +try: + import wcwidth + + HAS_WCWIDTH = True + HAS_WCWIDTH_030 = hasattr(wcwidth, "wrap") +except ImportError: + wcwidth = None + HAS_WCWIDTH = False + HAS_WCWIDTH_030 = False + +requires_wcwidth = pytest.mark.skipif(not HAS_WCWIDTH, reason="requires wcwidth") + +requires_wcwidth_030 = pytest.mark.skipif( + not HAS_WCWIDTH_030, reason="requires wcwidth >= 0.3.0" +) + + +class TestGraphemeClusterWidth: + """Tests for correct width calculation of grapheme clusters.""" + + @requires_wcwidth + def test_zwj_family_emoji_width(self): + """ZWJ family emoji has display width 2.""" + family = "\U0001f468\u200d\U0001f469\u200d\U0001f467" + assert wcwidth.wcswidth(family) == 2 + + @requires_wcwidth + def test_regional_indicator_flag_width(self): + """Regional indicator pair (flag) has display width 2.""" + us_flag = "\U0001f1fa\U0001f1f8" + assert wcwidth.wcswidth(us_flag) == 2 + + @requires_wcwidth + def test_vs16_emoji_width(self): + """VS16 variation selector creates wide emoji.""" + heart = "\u2764\ufe0f" + assert wcwidth.wcswidth(heart) == 2 + + +class TestGraphemeClusterAlignment: + """Tests for correct alignment of cells containing grapheme clusters.""" + + @requires_wcwidth + def test_zwj_alignment_in_grid(self): + """ZWJ emoji aligns correctly in grid format.""" + family = "\U0001f468\u200d\U0001f469\u200d\U0001f467" + data = [ + ["ABC", "text"], + [family, "emoji"], + ] + result = tabulate(data, headers=["col", "desc"], tablefmt="grid") + lines = result.split("\n") + + border_width = len(lines[0]) + for line in lines: + from tabulate import _visible_width + + assert _visible_width(line) == border_width + + @requires_wcwidth + def test_flag_alignment_in_grid(self): + """Regional indicator flags align correctly in grid format.""" + us_flag = "\U0001f1fa\U0001f1f8" + data = [ + ["AB", "text"], + [us_flag, "flag"], + ] + result = tabulate(data, headers=["col", "desc"], tablefmt="grid") + lines = result.split("\n") + + border_width = len(lines[0]) + for line in lines: + from tabulate import _visible_width + + assert _visible_width(line) == border_width + + +class TestGraphemeClusterWrapping: + """Tests for grapheme cluster preservation during text wrapping. + + These tests require wcwidth >= 0.3.0 for iter_graphemes and wrap() APIs. + """ + + @requires_wcwidth_030 + def test_zwj_not_broken_during_wrap(self): + """ZWJ sequence preserved as single unit during wrap.""" + family = "\U0001f468\u200d\U0001f469\u200d\U0001f467" + data = [[f"A{family}B"]] + result = tabulate(data, tablefmt="plain", maxcolwidths=3) + + graphemes_in_result = [] + for line in result.split("\n"): + graphemes_in_result.extend(list(wcwidth.iter_graphemes(line.strip()))) + + assert family in graphemes_in_result + + @requires_wcwidth_030 + def test_flag_not_broken_during_wrap(self): + """Regional indicator flag preserved as single unit during wrap.""" + us_flag = "\U0001f1fa\U0001f1f8" + gb_flag = "\U0001f1ec\U0001f1e7" + fr_flag = "\U0001f1eb\U0001f1f7" + flags = us_flag + gb_flag + fr_flag + + data = [[flags]] + result = tabulate(data, tablefmt="plain", maxcolwidths=5) + + graphemes_in_result = [] + for line in result.split("\n"): + graphemes_in_result.extend(list(wcwidth.iter_graphemes(line.strip()))) + + assert us_flag in graphemes_in_result + assert gb_flag in graphemes_in_result + assert fr_flag in graphemes_in_result + + @requires_wcwidth_030 + def test_vs16_not_broken_during_wrap(self): + """VS16 variation selector kept with base character during wrap.""" + heart = "\u2764\ufe0f" + data = [[heart * 3]] + result = tabulate(data, tablefmt="plain", maxcolwidths=4) + + graphemes_in_result = [] + for line in result.split("\n"): + graphemes_in_result.extend(list(wcwidth.iter_graphemes(line.strip()))) + + heart_count = sum(1 for g in graphemes_in_result if g == heart) + assert heart_count == 3 + + @requires_wcwidth_030 + def test_skin_tone_modifier_not_broken(self): + """Skin tone modifier preserved with emoji during wrap.""" + wave_light = "\U0001f44b\U0001f3fb" + data = [[f"Hi{wave_light}there"]] + result = tabulate(data, tablefmt="plain", maxcolwidths=5) + + graphemes_in_result = [] + for line in result.split("\n"): + graphemes_in_result.extend(list(wcwidth.iter_graphemes(line.strip()))) + + assert wave_light in graphemes_in_result + + +class TestComplexGraphemeClusters: + """Tests for complex grapheme cluster scenarios. + + These tests require wcwidth >= 0.3.0 for iter_graphemes API. + """ + + @requires_wcwidth_030 + def test_multiple_zwj_sequences_in_cell(self): + """Multiple ZWJ sequences in single cell handled correctly.""" + family = "\U0001f468\u200d\U0001f469\u200d\U0001f467" + technologist = "\U0001f468\U0001f3fb\u200d\U0001f4bb" + data = [[f"{family} and {technologist}"]] + result = tabulate(data, tablefmt="plain", maxcolwidths=15) + + graphemes_in_result = [] + for line in result.split("\n"): + graphemes_in_result.extend(list(wcwidth.iter_graphemes(line.strip()))) + + assert family in graphemes_in_result + assert technologist in graphemes_in_result + + @requires_wcwidth_030 + def test_flags_with_text_wrap(self): + """Flags interspersed with text wrap correctly.""" + us_flag = "\U0001f1fa\U0001f1f8" + data = [[f"Visit {us_flag} USA today!"]] + result = tabulate(data, tablefmt="plain", maxcolwidths=10) + + graphemes_in_result = [] + for line in result.split("\n"): + graphemes_in_result.extend(list(wcwidth.iter_graphemes(line.strip()))) + + assert us_flag in graphemes_in_result + + @requires_wcwidth_030 + def test_combining_marks_preserved(self): + """Combining diacritical marks stay with base character.""" + e_acute = "e\u0301" + data = [[f"caf{e_acute} au lait"]] + result = tabulate(data, tablefmt="plain", maxcolwidths=5) + + graphemes_in_result = [] + for line in result.split("\n"): + graphemes_in_result.extend(list(wcwidth.iter_graphemes(line.strip()))) + + assert e_acute in graphemes_in_result + + +class TestAnsiWithGraphemeClusters: + """Tests for ANSI escape codes combined with grapheme clusters.""" + + @requires_wcwidth + def test_ansi_colored_zwj_width(self): + """ANSI colored ZWJ emoji has correct width.""" + family = "\U0001f468\u200d\U0001f469\u200d\U0001f467" + colored = f"\x1b[31m{family}\x1b[0m" + + from tabulate import _visible_width + + assert _visible_width(colored) == 2 + + @requires_wcwidth + def test_ansi_colored_zwj_alignment(self): + """ANSI colored ZWJ emoji aligns correctly.""" + family = "\U0001f468\u200d\U0001f469\u200d\U0001f467" + colored = f"\x1b[31m{family}\x1b[0m" + data = [ + ["AB", "text"], + [colored, "emoji"], + ] + result = tabulate(data, headers=["col", "desc"], tablefmt="grid") + lines = result.split("\n") + + from tabulate import _visible_width + + border_width = _visible_width(lines[0]) + for line in lines: + assert _visible_width(line) == border_width + + @requires_wcwidth_030 + def test_ansi_colored_flag_wrap(self): + """ANSI colored flag not broken during wrap.""" + us_flag = "\U0001f1fa\U0001f1f8" + colored = f"\x1b[34m{us_flag}\x1b[0m" + data = [[f"A{colored}B"]] + result = tabulate(data, tablefmt="plain", maxcolwidths=4) + + assert "\U0001f1fa" in result + assert "\U0001f1f8" in result + lines = [line.strip() for line in result.split("\n") if line.strip()] + flag_parts_same_line = any( + "\U0001f1fa" in line and "\U0001f1f8" in line for line in lines + ) + assert flag_parts_same_line diff --git a/test/test_internal.py b/test/test_internal.py index e7564d3..f1960e7 100644 --- a/test/test_internal.py +++ b/test/test_internal.py @@ -180,7 +180,9 @@ def test_wrap_text_wide_chars(): except ImportError: skip("test_wrap_text_wide_chars is skipped") - rows = [["청자청자청자청자청자", "약간 감싸면 더 잘 보일 수있는 다소 긴 설명입니다"]] + rows = [ + ["청자청자청자청자청자", "약간 감싸면 더 잘 보일 수있는 다소 긴 설명입니다"] + ] widths = [5, 20] expected = [ [ @@ -244,7 +246,14 @@ def test_wrap_text_to_colwidths_colors_wide_char(): except ImportError: skip("test_wrap_text_to_colwidths_colors_wide_char is skipped") - data = [[("\033[31m약간 감싸면 더 잘 보일 수있는 다소 긴" " 설명입니다 설명입니다 설명입니다 설명입니다 설명\033[0m")]] + data = [ + [ + ( + "\033[31m약간 감싸면 더 잘 보일 수있는 다소 긴" + " 설명입니다 설명입니다 설명입니다 설명입니다 설명\033[0m" + ) + ] + ] result = T._wrap_text_to_colwidths(data, [30]) expected = [ @@ -275,13 +284,15 @@ def test_wrap_text_to_colwidths_multi_ansi_colors_full_cell(): ] result = T._wrap_text_to_colwidths(data, [30]) + # wcwidth.propagate_sgr combines separate SGR codes into single sequences + # (e.g., \x1b[31m\x1b[43m -> \x1b[31;43m), which is functionally equivalent expected = [ [ "\n".join( [ "\033[31m\033[43mThis is a rather long\033[0m", - "\033[31m\033[43mdescription that might look\033[0m", - "\033[31m\033[43mbetter if it is wrapped a bit\033[0m", + "\033[31;43mdescription that might look\033[0m", + "\033[31;43mbetter if it is wrapped a bit\033[0m", ] ) ] diff --git a/test/test_output.py b/test/test_output.py index 12dfc3a..d7c225b 100644 --- a/test/test_output.py +++ b/test/test_output.py @@ -3320,6 +3320,7 @@ def test_preserve_whitespace(): result = tabulate(test_table, table_headers, preserve_whitespace=False) assert_equal(expected, result) + def test_break_long_words(): "Output: Default table output, with breakwords true." table_headers = ["h1", "h2", "h3"] @@ -3335,6 +3336,7 @@ def test_break_long_words(): result = tabulate(test_table, table_headers, maxcolwidths=3, break_long_words=True) assert_equal(expected, result) + def test_break_on_hyphens(): "Output: Default table output, with break on hyphens true." table_headers = ["h1", "h2", "h3"] diff --git a/test/test_textwrapper.py b/test/test_textwrapper.py index 46dd818..b592270 100644 --- a/test/test_textwrapper.py +++ b/test/test_textwrapper.py @@ -1,11 +1,62 @@ """Discretely test functionality of our custom TextWrapper""" import datetime +from unittest.mock import patch, MagicMock -from tabulate import _CustomTextWrap as CTW, tabulate, _strip_ansi +import pytest + +from tabulate import _CustomTextWrap as CTW, _wrap_text, tabulate, _strip_ansi from textwrap import TextWrapper as OTW -from common import skip, assert_equal +from common import assert_equal + +try: + import wcwidth + + HAS_WCWIDTH = True + HAS_WCWIDTH_WRAP = hasattr(wcwidth, "wrap") +except ImportError: + wcwidth = None + HAS_WCWIDTH = False + HAS_WCWIDTH_WRAP = False + +requires_wcwidth = pytest.mark.skipif(not HAS_WCWIDTH, reason="requires wcwidth") + + +@pytest.fixture(params=["wcwidth_wrap", "custom_textwrap"]) +def wrap_backend(request): + """Fixture to test both wrap backends: wcwidth.wrap and _CustomTextWrap fallback.""" + # This ensures both code paths in _wrap_text() are tested: + # def _wrap_text(text, width, ...): + # if wcwidth is not None and hasattr(wcwidth, "wrap"): + # # Path 1: wcwidth.wrap (tested by wcwidth_wrap) + # return wcwidth.wrap(...) + # else: + # # Path 2: fallback (tested by custom_textwrap) + # return _CustomTextWrap(...).wrap(text) + # + # Tests using this fixture verify both backends produce identical results. + if request.param == "wcwidth_wrap": + if not HAS_WCWIDTH_WRAP: + pytest.skip("wcwidth.wrap not available") + yield "wcwidth_wrap" + else: + # Mock wcwidth to not have wrap attribute, forcing _CustomTextWrap fallback + if not HAS_WCWIDTH: + pytest.skip("wcwidth not available") + mock_wcwidth = MagicMock(spec=["wcswidth", "wcwidth"]) + mock_wcwidth.wcswidth = wcwidth.wcswidth + mock_wcwidth.wcwidth = wcwidth.wcwidth + with patch("tabulate.wcwidth", mock_wcwidth): + yield "custom_textwrap" + + +def _nwrap(lines): + """Normalize wrapped lines for cross-version comparison. + + https://github.com/python/cpython/issues/140627 + """ + return [line.rstrip() for line in lines] def test_wrap_multiword_non_wide(): @@ -15,8 +66,8 @@ def test_wrap_multiword_non_wide(): orig = OTW(width=width) cust = CTW(width=width) - assert orig.wrap(data) == cust.wrap( - data + assert _nwrap(orig.wrap(data)) == _nwrap( + cust.wrap(data) ), "Failure on non-wide char multiword regression check for width " + str(width) @@ -27,8 +78,8 @@ def test_wrap_multiword_non_wide_with_hypens(): orig = OTW(width=width) cust = CTW(width=width) - assert orig.wrap(data) == cust.wrap( - data + assert _nwrap(orig.wrap(data)) == _nwrap( + cust.wrap(data) ), "Failure on non-wide char hyphen regression check for width " + str(width) @@ -39,51 +90,37 @@ def test_wrap_longword_non_wide(): orig = OTW(width=width) cust = CTW(width=width) - assert orig.wrap(data) == cust.wrap( - data + assert _nwrap(orig.wrap(data)) == _nwrap( + cust.wrap(data) ), "Failure on non-wide char longword regression check for width " + str(width) -def test_wrap_wide_char_multiword(): +@requires_wcwidth +def test_wrap_wide_char_multiword(wrap_backend): """TextWrapper: wrapping support for wide characters with multiple words""" - try: - import wcwidth # noqa - except ImportError: - skip("test_wrap_wide_char is skipped") - data = "약간 감싸면 더 잘 보일 수있는 다소 긴 설명입니다" expected = ["약간 감싸면 더", "잘 보일 수있는", "다소 긴", "설명입니다"] - wrapper = CTW(width=15) - result = wrapper.wrap(data) - assert_equal(expected, result) + result = _wrap_text(data, width=15) + assert result == expected -def test_wrap_wide_char_longword(): +@requires_wcwidth +def test_wrap_wide_char_longword(wrap_backend): """TextWrapper: wrapping wide char word that needs to be broken up""" - try: - import wcwidth # noqa - except ImportError: - skip("test_wrap_wide_char_longword is skipped") - data = "약간감싸면더잘보일수있" expected = ["약간", "감싸", "면더", "잘보", "일수", "있"] # Explicit odd number to ensure the 2 width is taken into account - wrapper = CTW(width=5) - result = wrapper.wrap(data) - assert_equal(expected, result) + result = _wrap_text(data, width=5) + assert result == expected -def test_wrap_mixed_string(): +@requires_wcwidth +def test_wrap_mixed_string(wrap_backend): """TextWrapper: wrapping string with mix of wide and non-wide chars""" - try: - import wcwidth # noqa - except ImportError: - skip("test_wrap_wide_char is skipped") - data = ( "This content of this string (この文字列のこの内容) contains " "multiple character types (複数の文字タイプが含まれています)" @@ -97,9 +134,8 @@ def test_wrap_mixed_string(): "types (複数の文字タイ", "プが含まれています)", ] - wrapper = CTW(width=21) - result = wrapper.wrap(data) - assert_equal(expected, result) + result = _wrap_text(data, width=21) + assert result == expected def test_wrapper_len_ignores_color_chars():