From a4cb45cd3be21e2372e0bb9b8d28fed0b8af2531 Mon Sep 17 00:00:00 2001 From: Jeff Quast Date: Sat, 24 Jan 2026 22:50:04 -0500 Subject: [PATCH 1/6] finish wrap() of wcwidth.wrap() --- pyproject.toml | 2 +- tabulate/__init__.py | 89 +++++++++++++++++++++++++++++++++++++------- 2 files changed, 76 insertions(+), 15 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index d13e92d..e555fbe 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,7 +27,7 @@ dynamic = ["version"] Homepage = "https://github.com/astanin/python-tabulate" [project.optional-dependencies] -widechars = ["wcwidth"] +widechars = ["wcwidth>=0.3.4"] [project.scripts] tabulate = "tabulate:_main" diff --git a/tabulate/__init__.py b/tabulate/__init__.py index e100c09..a232561 100644 --- a/tabulate/__init__.py +++ b/tabulate/__init__.py @@ -1111,17 +1111,24 @@ def _visible_width(s): >>> _visible_width('\x1b[31mhello\x1b[0m'), _visible_width("world") (5, 5) - """ # optional wide-character support if wcwidth is not None and WIDE_CHARS_MODE: - len_fn = wcwidth.wcswidth - else: - len_fn = len + # when already a string, it could contain terminal sequences, + # wcwidth >= 0.3.0 handles ANSI codes internally, + if hasattr(wcwidth, "width"): + return wcwidth.width(str(s)) + # while previous versions need them stripped first. + if isinstance(s, (str, bytes): + return wcwidth.wcswidth(_strip_ansi(str(s))) + + # Otherwise, coerce to string, guaranteed to be without any control codes or funny business, + # we can use wcswidth() directly. + return wcwidth.wcswidth(str(s)) if isinstance(s, (str, bytes)): - return len_fn(_strip_ansi(s)) + return len(_strip_ansi(s)) else: - return len_fn(str(s)) + return len(str(s)) def _is_multiline(s): @@ -1226,6 +1233,7 @@ def _align_column( s_widths = list(map(width_fn, strings)) maxwidth = max(max(_flat_list(s_widths)), minwidth) # TODO: refactor column alignment in single-line and multiline modes + if is_multiline: if not enable_widechars and not has_invisible: padded_strings = [ @@ -1233,14 +1241,12 @@ def _align_column( for ms in strings ] else: - # enable wide-character width corrections + # Width corrections for wide chars/ANSI codes s_lens = [[len(s) for s in re.split("[\r\n]", ms)] for ms in strings] visible_widths = [ [maxwidth - (w - l) for w, l in zip(mw, ml)] for mw, ml in zip(s_widths, s_lens) ] - # wcswidth and _visible_width don't count invisible characters; - # padfn doesn't need to apply another correction padded_strings = [ "\n".join([padfn(w, s) for s, w in zip((ms.splitlines() or ms), mw)]) for ms, mw in zip(strings, visible_widths) @@ -1249,11 +1255,9 @@ def _align_column( if not enable_widechars and not has_invisible: padded_strings = [padfn(maxwidth, s) for s in strings] else: - # enable wide-character width corrections + # Width corrections for wide chars/ANSI codes s_lens = list(map(len, strings)) visible_widths = [maxwidth - (w - l) for w, l in zip(s_widths, s_lens)] - # wcswidth and _visible_width don't count invisible characters; - # padfn doesn't need to apply another correction padded_strings = [padfn(w, s) for s, w in zip(strings, visible_widths)] return padded_strings @@ -1655,10 +1659,9 @@ def _wrap_text_to_colwidths(list_of_lists, colwidths, numparses=True, break_long continue if width is not None: - wrapper = _CustomTextWrap(width=width, break_long_words=break_long_words, break_on_hyphens=break_on_hyphens) casted_cell = str(cell) wrapped = [ - "\n".join(wrapper.wrap(line)) + "\n".join(_wrap_text(line, width, break_long_words, break_on_hyphens)) for line in casted_cell.splitlines() if line.strip() != "" ] @@ -2672,6 +2675,64 @@ def _format_table( return "" +def _propagate_ansi_codes(lines): + """Propagate ANSI color codes across wrapped lines. + + When text with ANSI codes is wrapped by wcwidth.width, adjust each line to: + - Start with any active color codes from previous lines + - End with a reset if colors are active (to prevent bleeding into other cells) + + This function implements some amount of _CustomTextWrap's behavior as a post-processing step of + the 3rd-party wcwidth.wrap(), which also preserves and parses sequences, but adjusts them to + match the behavior of _CustomTextWrap, but to also benefit by its grapheme, emoji/flags, wide + characters. + """ + result = [] + active_codes = [] + + for line in lines: + code_matches = list(_ansi_codes.finditer(line)) + color_codes = [code.string[code.span()[0]:code.span()[1]] for code in code_matches] + next_line = "".join(active_codes) + line + + # Track codes for subsequent lines + for code in color_codes: + if code == _ansi_color_reset_code: + active_codes = [] + continue + active_codes.append(code) + + # Append reset if any colors are active + if active_codes: + next_line += _ansi_color_reset_code + + result.append(next_line) + + return result + + +def _wrap_text(text, width, break_long_words=True, break_on_hyphens=True): + """Wrap text to width with wide character and ANSI code support.""" + if wcwidth is not None and hasattr(wcwidth, "wrap"): + # wcwidth >= 0.3.0 has wrap() with proper grapheme cluster support, but it doesn't break + # sequences the way this library requires, so _propagate_ansi_codes() is applied to the + # result to match. + wrapped_lines = wcwidth.wrap( + text, width, + break_long_words=break_long_words, + break_on_hyphens=break_on_hyphens + ) + return _propagate_ansi_codes(wrapped_lines) + else: + # Fallback for wcwidth < 0.3.0 or no wcwidth + wrapper = _CustomTextWrap( + width=width, + break_long_words=break_long_words, + break_on_hyphens=break_on_hyphens + ) + return wrapper.wrap(text) + + class _CustomTextWrap(textwrap.TextWrapper): """A custom implementation of CPython's textwrap.TextWrapper. This supports both wide characters (Korea, Japanese, Chinese) - including mixed string. From 33ebf9ae4d831c5c0c735e6c7d65cd184ab9b1a0 Mon Sep 17 00:00:00 2001 From: Jeff Quast Date: Sun, 25 Jan 2026 00:29:15 -0500 Subject: [PATCH 2/6] cleanup diff --- pyproject.toml | 2 +- tabulate/__init__.py | 62 +++++++++++----------- test/test_textwrapper.py | 112 ++++++++++++++++++++++++++------------- 3 files changed, 108 insertions(+), 68 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index e555fbe..54d359f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,7 +27,7 @@ dynamic = ["version"] Homepage = "https://github.com/astanin/python-tabulate" [project.optional-dependencies] -widechars = ["wcwidth>=0.3.4"] +widechars = ["wcwidth>=0.3.5"] [project.scripts] tabulate = "tabulate:_main" diff --git a/tabulate/__init__.py b/tabulate/__init__.py index a232561..a219c26 100644 --- a/tabulate/__init__.py +++ b/tabulate/__init__.py @@ -1111,24 +1111,20 @@ def _visible_width(s): >>> _visible_width('\x1b[31mhello\x1b[0m'), _visible_width("world") (5, 5) + """ # optional wide-character support if wcwidth is not None and WIDE_CHARS_MODE: - # when already a string, it could contain terminal sequences, - # wcwidth >= 0.3.0 handles ANSI codes internally, + len_fn = wcwidth.wcswidth if hasattr(wcwidth, "width"): - return wcwidth.width(str(s)) - # while previous versions need them stripped first. - if isinstance(s, (str, bytes): - return wcwidth.wcswidth(_strip_ansi(str(s))) - - # Otherwise, coerce to string, guaranteed to be without any control codes or funny business, - # we can use wcswidth() directly. - return wcwidth.wcswidth(str(s)) + # wcwidth >=0.3.0 handles ansi + return wcwidth.width(s) + else: + len_fn = len if isinstance(s, (str, bytes)): - return len(_strip_ansi(s)) + return len_fn(_strip_ansi(s)) else: - return len(str(s)) + return len_fn(str(s)) def _is_multiline(s): @@ -1233,7 +1229,6 @@ def _align_column( s_widths = list(map(width_fn, strings)) maxwidth = max(max(_flat_list(s_widths)), minwidth) # TODO: refactor column alignment in single-line and multiline modes - if is_multiline: if not enable_widechars and not has_invisible: padded_strings = [ @@ -1241,12 +1236,14 @@ def _align_column( for ms in strings ] else: - # Width corrections for wide chars/ANSI codes + # enable wide-character width corrections s_lens = [[len(s) for s in re.split("[\r\n]", ms)] for ms in strings] visible_widths = [ [maxwidth - (w - l) for w, l in zip(mw, ml)] for mw, ml in zip(s_widths, s_lens) ] + # wcswidth and _visible_width don't count invisible characters; + # padfn doesn't need to apply another correction padded_strings = [ "\n".join([padfn(w, s) for s, w in zip((ms.splitlines() or ms), mw)]) for ms, mw in zip(strings, visible_widths) @@ -1255,9 +1252,11 @@ def _align_column( if not enable_widechars and not has_invisible: padded_strings = [padfn(maxwidth, s) for s in strings] else: - # Width corrections for wide chars/ANSI codes + # enable wide-character width corrections s_lens = list(map(len, strings)) visible_widths = [maxwidth - (w - l) for w, l in zip(s_widths, s_lens)] + # wcswidth and _visible_width don't count invisible characters; + # padfn doesn't need to apply another correction padded_strings = [padfn(w, s) for s, w in zip(strings, visible_widths)] return padded_strings @@ -1659,9 +1658,12 @@ def _wrap_text_to_colwidths(list_of_lists, colwidths, numparses=True, break_long continue if width is not None: + wrapper_wrap = functools.partial(_wrap_text, width=width, + break_long_words=break_long_words, + break_on_hyphens=break_on_hyphens) casted_cell = str(cell) wrapped = [ - "\n".join(_wrap_text(line, width, break_long_words, break_on_hyphens)) + "\n".join(wrapper_wrap(line)) for line in casted_cell.splitlines() if line.strip() != "" ] @@ -2678,7 +2680,7 @@ def _format_table( def _propagate_ansi_codes(lines): """Propagate ANSI color codes across wrapped lines. - When text with ANSI codes is wrapped by wcwidth.width, adjust each line to: + When text with ANSI codes is wrapped by wcwidth.wrap, adjust each line to: - Start with any active color codes from previous lines - End with a reset if colors are active (to prevent bleeding into other cells) @@ -2711,26 +2713,24 @@ def _propagate_ansi_codes(lines): return result -def _wrap_text(text, width, break_long_words=True, break_on_hyphens=True): +def _wrap_text(text, width, break_long_words, break_on_hyphens): """Wrap text to width with wide character and ANSI code support.""" + # wcwidth >= 0.3.0 has wrap() with proper grapheme cluster support, if wcwidth is not None and hasattr(wcwidth, "wrap"): - # wcwidth >= 0.3.0 has wrap() with proper grapheme cluster support, but it doesn't break - # sequences the way this library requires, so _propagate_ansi_codes() is applied to the - # result to match. - wrapped_lines = wcwidth.wrap( + # but it doesn't break, reset, then continue sequences the way this library requires, so + # _propagate_ansi_codes() is applied afterwards to match the same result as the built-in + # non-wcwidth implementation below. + return _propagate_ansi_codes(wcwidth.wrap( text, width, break_long_words=break_long_words, - break_on_hyphens=break_on_hyphens - ) - return _propagate_ansi_codes(wrapped_lines) + break_on_hyphens=break_on_hyphens)) else: # Fallback for wcwidth < 0.3.0 or no wcwidth - wrapper = _CustomTextWrap( + return _CustomTextWrap( width=width, break_long_words=break_long_words, break_on_hyphens=break_on_hyphens - ) - return wrapper.wrap(text) + ).wrap(text) class _CustomTextWrap(textwrap.TextWrapper): @@ -2801,11 +2801,13 @@ def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width): # of the next chunk onto the current line as will fit. if self.break_long_words: # Tabulate Custom: Build the string up piece-by-piece in order to - # take each charcter's width into account + # take each character's width into account chunk = reversed_chunks[-1] i = 1 # Only count printable characters, so strip_ansi first, index later. - while len(_strip_ansi(chunk)[:i]) <= space_left: + # Use self._len() instead of len() to account for displayed width, eg. + # wide chars like CJK count as 2 when using wcwidth<0.3.0 without wrap() + while self._len(_strip_ansi(chunk)[:i]) <= space_left: i = i + 1 # Consider escape codes when breaking words up total_escape_len = 0 diff --git a/test/test_textwrapper.py b/test/test_textwrapper.py index 46dd818..f39f782 100644 --- a/test/test_textwrapper.py +++ b/test/test_textwrapper.py @@ -1,12 +1,65 @@ """Discretely test functionality of our custom TextWrapper""" import datetime +from unittest.mock import patch, MagicMock -from tabulate import _CustomTextWrap as CTW, tabulate, _strip_ansi +import pytest + +from tabulate import _CustomTextWrap as CTW, _wrap_text, tabulate, _strip_ansi from textwrap import TextWrapper as OTW from common import skip, assert_equal +try: + import wcwidth + HAS_WCWIDTH = True + HAS_WCWIDTH_WRAP = hasattr(wcwidth, 'wrap') +except ImportError: + wcwidth = None + HAS_WCWIDTH = False + HAS_WCWIDTH_WRAP = False + +requires_wcwidth = pytest.mark.skipif(not HAS_WCWIDTH, reason="requires wcwidth") + + +def _normalize_wrap_result(lines): + """Normalize wrapped lines for cross-version comparison. + + CPython #140627: Older versions kept trailing whitespace when drop_whitespace=True. + Fixed in 3.13.11+, 3.14.2+, and 3.15+. Strip to normalize across versions. + """ + return [line.rstrip() for line in lines] + + +@pytest.fixture(params=['wcwidth_wrap', 'custom_textwrap']) +def wrap_backend(request): + """Fixture to test both wrap backends: wcwidth.wrap and _CustomTextWrap fallback.""" + # This ensures both code paths in _wrap_text() are tested: + # def _wrap_text(text, width, ...): + # if wcwidth is not None and hasattr(wcwidth, "wrap"): + # # Path 1: wcwidth.wrap (tested by wcwidth_wrap) + # return _propagate_ansi_codes(wcwidth.wrap(...)) + # else: + # # Path 2: fallback (tested by custom_textwrap) + # return _CustomTextWrap(...).wrap(text) + # + # and for tests that use it, eg. test_wrap_wide_char_multiword(wrap_backend), The tests assert + # the same expected output for both backends. This is good - it verifies both produce identical + # results. + if request.param == 'wcwidth_wrap': + if not HAS_WCWIDTH_WRAP: + pytest.skip("wcwidth.wrap not available") + yield 'wcwidth_wrap' + else: + # Mock wcwidth to not have wrap attribute, forcing _CustomTextWrap fallback + if not HAS_WCWIDTH: + pytest.skip("wcwidth not available") + mock_wcwidth = MagicMock(spec=['wcswidth', 'wcwidth']) + mock_wcwidth.wcswidth = wcwidth.wcswidth + mock_wcwidth.wcwidth = wcwidth.wcwidth + with patch('tabulate.wcwidth', mock_wcwidth): + yield 'custom_textwrap' + def test_wrap_multiword_non_wide(): """TextWrapper: non-wide character regression tests""" @@ -15,9 +68,9 @@ def test_wrap_multiword_non_wide(): orig = OTW(width=width) cust = CTW(width=width) - assert orig.wrap(data) == cust.wrap( - data - ), "Failure on non-wide char multiword regression check for width " + str(width) + assert _normalize_wrap_result(orig.wrap(data)) == _normalize_wrap_result( + cust.wrap(data) + ) def test_wrap_multiword_non_wide_with_hypens(): @@ -27,9 +80,9 @@ def test_wrap_multiword_non_wide_with_hypens(): orig = OTW(width=width) cust = CTW(width=width) - assert orig.wrap(data) == cust.wrap( - data - ), "Failure on non-wide char hyphen regression check for width " + str(width) + assert _normalize_wrap_result(orig.wrap(data)) == _normalize_wrap_result( + cust.wrap(data) + ) def test_wrap_longword_non_wide(): @@ -39,51 +92,37 @@ def test_wrap_longword_non_wide(): orig = OTW(width=width) cust = CTW(width=width) - assert orig.wrap(data) == cust.wrap( - data - ), "Failure on non-wide char longword regression check for width " + str(width) + assert _normalize_wrap_result(orig.wrap(data)) == _normalize_wrap_result( + cust.wrap(data) + ) -def test_wrap_wide_char_multiword(): +@requires_wcwidth +def test_wrap_wide_char_multiword(wrap_backend): """TextWrapper: wrapping support for wide characters with multiple words""" - try: - import wcwidth # noqa - except ImportError: - skip("test_wrap_wide_char is skipped") - data = "약간 감싸면 더 잘 보일 수있는 다소 긴 설명입니다" expected = ["약간 감싸면 더", "잘 보일 수있는", "다소 긴", "설명입니다"] - wrapper = CTW(width=15) - result = wrapper.wrap(data) - assert_equal(expected, result) + result = _wrap_text(data, width=15) + assert result == expected -def test_wrap_wide_char_longword(): +@requires_wcwidth +def test_wrap_wide_char_longword(wrap_backend): """TextWrapper: wrapping wide char word that needs to be broken up""" - try: - import wcwidth # noqa - except ImportError: - skip("test_wrap_wide_char_longword is skipped") - data = "약간감싸면더잘보일수있" expected = ["약간", "감싸", "면더", "잘보", "일수", "있"] # Explicit odd number to ensure the 2 width is taken into account - wrapper = CTW(width=5) - result = wrapper.wrap(data) - assert_equal(expected, result) + result = _wrap_text(data, width=5) + assert result == expected -def test_wrap_mixed_string(): +@requires_wcwidth +def test_wrap_mixed_string(wrap_backend): """TextWrapper: wrapping string with mix of wide and non-wide chars""" - try: - import wcwidth # noqa - except ImportError: - skip("test_wrap_wide_char is skipped") - data = ( "This content of this string (この文字列のこの内容) contains " "multiple character types (複数の文字タイプが含まれています)" @@ -97,9 +136,8 @@ def test_wrap_mixed_string(): "types (複数の文字タイ", "プが含まれています)", ] - wrapper = CTW(width=21) - result = wrapper.wrap(data) - assert_equal(expected, result) + result = _wrap_text(data, width=21) + assert result == expected def test_wrapper_len_ignores_color_chars(): From 34bb83e222b25ffeea70bfb745974448f84e9296 Mon Sep 17 00:00:00 2001 From: Jeff Quast Date: Sun, 25 Jan 2026 00:41:59 -0500 Subject: [PATCH 3/6] functools fix and match origin/master better, citations --- tabulate/__init__.py | 4 +- test/test_grapheme_clusters.py | 240 +++++++++++++++++++++++++++++++++ test/test_textwrapper.py | 29 ++-- 3 files changed, 256 insertions(+), 17 deletions(-) create mode 100644 test/test_grapheme_clusters.py diff --git a/tabulate/__init__.py b/tabulate/__init__.py index a219c26..d1f07e6 100644 --- a/tabulate/__init__.py +++ b/tabulate/__init__.py @@ -1658,7 +1658,7 @@ def _wrap_text_to_colwidths(list_of_lists, colwidths, numparses=True, break_long continue if width is not None: - wrapper_wrap = functools.partial(_wrap_text, width=width, + wrapper_wrap = partial(_wrap_text, width=width, break_long_words=break_long_words, break_on_hyphens=break_on_hyphens) casted_cell = str(cell) @@ -2713,7 +2713,7 @@ def _propagate_ansi_codes(lines): return result -def _wrap_text(text, width, break_long_words, break_on_hyphens): +def _wrap_text(text, width, break_long_words=True, break_on_hyphens=True): """Wrap text to width with wide character and ANSI code support.""" # wcwidth >= 0.3.0 has wrap() with proper grapheme cluster support, if wcwidth is not None and hasattr(wcwidth, "wrap"): diff --git a/test/test_grapheme_clusters.py b/test/test_grapheme_clusters.py new file mode 100644 index 0000000..6148125 --- /dev/null +++ b/test/test_grapheme_clusters.py @@ -0,0 +1,240 @@ +"""Tests for Unicode grapheme cluster handling in tabulate.""" + +import pytest + +from tabulate import tabulate, WIDE_CHARS_MODE + +try: + import wcwidth + HAS_WCWIDTH = True + HAS_WCWIDTH_030 = hasattr(wcwidth, 'wrap') +except ImportError: + wcwidth = None + HAS_WCWIDTH = False + HAS_WCWIDTH_030 = False + +requires_wcwidth = pytest.mark.skipif( + not HAS_WCWIDTH, reason="requires wcwidth" +) + +requires_wcwidth_030 = pytest.mark.skipif( + not HAS_WCWIDTH_030, reason="requires wcwidth >= 0.3.0" +) + + +class TestGraphemeClusterWidth: + """Tests for correct width calculation of grapheme clusters.""" + + @requires_wcwidth + def test_zwj_family_emoji_width(self): + """ZWJ family emoji has display width 2.""" + family = "\U0001F468\u200D\U0001F469\u200D\U0001F467" + assert wcwidth.wcswidth(family) == 2 + + @requires_wcwidth + def test_regional_indicator_flag_width(self): + """Regional indicator pair (flag) has display width 2.""" + us_flag = "\U0001F1FA\U0001F1F8" + assert wcwidth.wcswidth(us_flag) == 2 + + @requires_wcwidth + def test_vs16_emoji_width(self): + """VS16 variation selector creates wide emoji.""" + heart = "\u2764\uFE0F" + assert wcwidth.wcswidth(heart) == 2 + + +class TestGraphemeClusterAlignment: + """Tests for correct alignment of cells containing grapheme clusters.""" + + @requires_wcwidth + def test_zwj_alignment_in_grid(self): + """ZWJ emoji aligns correctly in grid format.""" + family = "\U0001F468\u200D\U0001F469\u200D\U0001F467" + data = [ + ["ABC", "text"], + [family, "emoji"], + ] + result = tabulate(data, headers=["col", "desc"], tablefmt="grid") + lines = result.split("\n") + + border_width = len(lines[0]) + for line in lines: + from tabulate import _visible_width + assert _visible_width(line) == border_width + + @requires_wcwidth + def test_flag_alignment_in_grid(self): + """Regional indicator flags align correctly in grid format.""" + us_flag = "\U0001F1FA\U0001F1F8" + data = [ + ["AB", "text"], + [us_flag, "flag"], + ] + result = tabulate(data, headers=["col", "desc"], tablefmt="grid") + lines = result.split("\n") + + border_width = len(lines[0]) + for line in lines: + from tabulate import _visible_width + assert _visible_width(line) == border_width + + +class TestGraphemeClusterWrapping: + """Tests for grapheme cluster preservation during text wrapping. + + These tests require wcwidth >= 0.3.0 for iter_graphemes and wrap() APIs. + """ + + @requires_wcwidth_030 + def test_zwj_not_broken_during_wrap(self): + """ZWJ sequence preserved as single unit during wrap.""" + family = "\U0001F468\u200D\U0001F469\u200D\U0001F467" + data = [[f"A{family}B"]] + result = tabulate(data, tablefmt="plain", maxcolwidths=3) + + graphemes_in_result = [] + for line in result.split("\n"): + graphemes_in_result.extend(list(wcwidth.iter_graphemes(line.strip()))) + + assert family in graphemes_in_result + + @requires_wcwidth_030 + def test_flag_not_broken_during_wrap(self): + """Regional indicator flag preserved as single unit during wrap.""" + us_flag = "\U0001F1FA\U0001F1F8" + gb_flag = "\U0001F1EC\U0001F1E7" + fr_flag = "\U0001F1EB\U0001F1F7" + flags = us_flag + gb_flag + fr_flag + + data = [[flags]] + result = tabulate(data, tablefmt="plain", maxcolwidths=5) + + graphemes_in_result = [] + for line in result.split("\n"): + graphemes_in_result.extend(list(wcwidth.iter_graphemes(line.strip()))) + + assert us_flag in graphemes_in_result + assert gb_flag in graphemes_in_result + assert fr_flag in graphemes_in_result + + @requires_wcwidth_030 + def test_vs16_not_broken_during_wrap(self): + """VS16 variation selector kept with base character during wrap.""" + heart = "\u2764\uFE0F" + data = [[heart * 3]] + result = tabulate(data, tablefmt="plain", maxcolwidths=4) + + graphemes_in_result = [] + for line in result.split("\n"): + graphemes_in_result.extend(list(wcwidth.iter_graphemes(line.strip()))) + + heart_count = sum(1 for g in graphemes_in_result if g == heart) + assert heart_count == 3 + + @requires_wcwidth_030 + def test_skin_tone_modifier_not_broken(self): + """Skin tone modifier preserved with emoji during wrap.""" + wave_light = "\U0001F44B\U0001F3FB" + data = [[f"Hi{wave_light}there"]] + result = tabulate(data, tablefmt="plain", maxcolwidths=5) + + graphemes_in_result = [] + for line in result.split("\n"): + graphemes_in_result.extend(list(wcwidth.iter_graphemes(line.strip()))) + + assert wave_light in graphemes_in_result + + +class TestComplexGraphemeClusters: + """Tests for complex grapheme cluster scenarios. + + These tests require wcwidth >= 0.3.0 for iter_graphemes API. + """ + + @requires_wcwidth_030 + def test_multiple_zwj_sequences_in_cell(self): + """Multiple ZWJ sequences in single cell handled correctly.""" + family = "\U0001F468\u200D\U0001F469\u200D\U0001F467" + technologist = "\U0001F468\U0001F3FB\u200D\U0001F4BB" + data = [[f"{family} and {technologist}"]] + result = tabulate(data, tablefmt="plain", maxcolwidths=15) + + graphemes_in_result = [] + for line in result.split("\n"): + graphemes_in_result.extend(list(wcwidth.iter_graphemes(line.strip()))) + + assert family in graphemes_in_result + assert technologist in graphemes_in_result + + @requires_wcwidth_030 + def test_flags_with_text_wrap(self): + """Flags interspersed with text wrap correctly.""" + us_flag = "\U0001F1FA\U0001F1F8" + data = [[f"Visit {us_flag} USA today!"]] + result = tabulate(data, tablefmt="plain", maxcolwidths=10) + + graphemes_in_result = [] + for line in result.split("\n"): + graphemes_in_result.extend(list(wcwidth.iter_graphemes(line.strip()))) + + assert us_flag in graphemes_in_result + + @requires_wcwidth_030 + def test_combining_marks_preserved(self): + """Combining diacritical marks stay with base character.""" + e_acute = "e\u0301" + data = [[f"caf{e_acute} au lait"]] + result = tabulate(data, tablefmt="plain", maxcolwidths=5) + + graphemes_in_result = [] + for line in result.split("\n"): + graphemes_in_result.extend(list(wcwidth.iter_graphemes(line.strip()))) + + assert e_acute in graphemes_in_result + + +class TestAnsiWithGraphemeClusters: + """Tests for ANSI escape codes combined with grapheme clusters.""" + + @requires_wcwidth + def test_ansi_colored_zwj_width(self): + """ANSI colored ZWJ emoji has correct width.""" + family = "\U0001F468\u200D\U0001F469\u200D\U0001F467" + colored = f"\x1b[31m{family}\x1b[0m" + + from tabulate import _visible_width + assert _visible_width(colored) == 2 + + @requires_wcwidth + def test_ansi_colored_zwj_alignment(self): + """ANSI colored ZWJ emoji aligns correctly.""" + family = "\U0001F468\u200D\U0001F469\u200D\U0001F467" + colored = f"\x1b[31m{family}\x1b[0m" + data = [ + ["AB", "text"], + [colored, "emoji"], + ] + result = tabulate(data, headers=["col", "desc"], tablefmt="grid") + lines = result.split("\n") + + from tabulate import _visible_width + border_width = _visible_width(lines[0]) + for line in lines: + assert _visible_width(line) == border_width + + @requires_wcwidth_030 + def test_ansi_colored_flag_wrap(self): + """ANSI colored flag not broken during wrap.""" + us_flag = "\U0001F1FA\U0001F1F8" + colored = f"\x1b[34m{us_flag}\x1b[0m" + data = [[f"A{colored}B"]] + result = tabulate(data, tablefmt="plain", maxcolwidths=4) + + assert "\U0001F1FA" in result + assert "\U0001F1F8" in result + lines = [line.strip() for line in result.split("\n") if line.strip()] + flag_parts_same_line = any( + "\U0001F1FA" in line and "\U0001F1F8" in line for line in lines + ) + assert flag_parts_same_line diff --git a/test/test_textwrapper.py b/test/test_textwrapper.py index f39f782..185fad4 100644 --- a/test/test_textwrapper.py +++ b/test/test_textwrapper.py @@ -22,15 +22,6 @@ requires_wcwidth = pytest.mark.skipif(not HAS_WCWIDTH, reason="requires wcwidth") -def _normalize_wrap_result(lines): - """Normalize wrapped lines for cross-version comparison. - - CPython #140627: Older versions kept trailing whitespace when drop_whitespace=True. - Fixed in 3.13.11+, 3.14.2+, and 3.15+. Strip to normalize across versions. - """ - return [line.rstrip() for line in lines] - - @pytest.fixture(params=['wcwidth_wrap', 'custom_textwrap']) def wrap_backend(request): """Fixture to test both wrap backends: wcwidth.wrap and _CustomTextWrap fallback.""" @@ -61,6 +52,14 @@ def wrap_backend(request): yield 'custom_textwrap' +def _nwrap(lines): + """Normalize wrapped lines for cross-version comparison. + + https://github.com/python/cpython/issues/140627 + """ + return [line.rstrip() for line in lines] + + def test_wrap_multiword_non_wide(): """TextWrapper: non-wide character regression tests""" data = "this is a test string for regression splitting" @@ -68,9 +67,9 @@ def test_wrap_multiword_non_wide(): orig = OTW(width=width) cust = CTW(width=width) - assert _normalize_wrap_result(orig.wrap(data)) == _normalize_wrap_result( + assert _nwrap(orig.wrap(data)) == _nwrap( cust.wrap(data) - ) + ), "Failure on non-wide char multiword regression check for width " + str(width) def test_wrap_multiword_non_wide_with_hypens(): @@ -80,9 +79,9 @@ def test_wrap_multiword_non_wide_with_hypens(): orig = OTW(width=width) cust = CTW(width=width) - assert _normalize_wrap_result(orig.wrap(data)) == _normalize_wrap_result( + assert _nwrap(orig.wrap(data)) == _nwrap( cust.wrap(data) - ) + ), "Failure on non-wide char hyphen regression check for width " + str(width) def test_wrap_longword_non_wide(): @@ -92,9 +91,9 @@ def test_wrap_longword_non_wide(): orig = OTW(width=width) cust = CTW(width=width) - assert _normalize_wrap_result(orig.wrap(data)) == _normalize_wrap_result( + assert _nwrap(orig.wrap(data)) == _nwrap( cust.wrap(data) - ) + ), "Failure on non-wide char longword regression check for width " + str(width) @requires_wcwidth From 90204334a75dc4b40103dbbff6f6a1e84db5e6d7 Mon Sep 17 00:00:00 2001 From: Jeff Quast Date: Sun, 25 Jan 2026 01:03:31 -0500 Subject: [PATCH 4/6] I'm sorry but your pre-commits really did a number --- tabulate/__init__.py | 55 ++++++++++++++++++++++++---------- test/test_api.py | 5 +--- test/test_cli.py | 5 +--- test/test_grapheme_clusters.py | 53 ++++++++++++++++---------------- test/test_internal.py | 13 ++++++-- test/test_output.py | 2 ++ test/test_textwrapper.py | 21 +++++++------ 7 files changed, 93 insertions(+), 61 deletions(-) diff --git a/tabulate/__init__.py b/tabulate/__init__.py index d1f07e6..2be9eba 100644 --- a/tabulate/__init__.py +++ b/tabulate/__init__.py @@ -1591,9 +1591,11 @@ def _normalize_tabular_data(tabular_data, headers, showindex="default"): if headers == "keys": headers = field_names rows = [ - [getattr(row, f) for f in field_names] - if not _is_separating_line(row) - else row + ( + [getattr(row, f) for f in field_names] + if not _is_separating_line(row) + else row + ) for row in rows ] @@ -1641,7 +1643,13 @@ def _normalize_tabular_data(tabular_data, headers, showindex="default"): return rows, headers, headers_pad -def _wrap_text_to_colwidths(list_of_lists, colwidths, numparses=True, break_long_words=_BREAK_LONG_WORDS, break_on_hyphens=_BREAK_ON_HYPHENS): +def _wrap_text_to_colwidths( + list_of_lists, + colwidths, + numparses=True, + break_long_words=_BREAK_LONG_WORDS, + break_on_hyphens=_BREAK_ON_HYPHENS, +): if len(list_of_lists): num_cols = len(list_of_lists[0]) else: @@ -1658,9 +1666,12 @@ def _wrap_text_to_colwidths(list_of_lists, colwidths, numparses=True, break_long continue if width is not None: - wrapper_wrap = partial(_wrap_text, width=width, - break_long_words=break_long_words, - break_on_hyphens=break_on_hyphens) + wrapper_wrap = partial( + _wrap_text, + width=width, + break_long_words=break_long_words, + break_on_hyphens=break_on_hyphens, + ) casted_cell = str(cell) wrapped = [ "\n".join(wrapper_wrap(line)) @@ -2263,7 +2274,11 @@ def tabulate( numparses = _expand_numparse(disable_numparse, num_cols) list_of_lists = _wrap_text_to_colwidths( - list_of_lists, maxcolwidths, numparses=numparses, break_long_words=break_long_words, break_on_hyphens=break_on_hyphens + list_of_lists, + maxcolwidths, + numparses=numparses, + break_long_words=break_long_words, + break_on_hyphens=break_on_hyphens, ) if maxheadercolwidths is not None: @@ -2277,7 +2292,11 @@ def tabulate( numparses = _expand_numparse(disable_numparse, num_cols) headers = _wrap_text_to_colwidths( - [headers], maxheadercolwidths, numparses=numparses, break_long_words=break_long_words, break_on_hyphens=break_on_hyphens + [headers], + maxheadercolwidths, + numparses=numparses, + break_long_words=break_long_words, + break_on_hyphens=break_on_hyphens, )[0] # empty values in the first column of RST tables should be escaped (issue #82) @@ -2694,7 +2713,9 @@ def _propagate_ansi_codes(lines): for line in lines: code_matches = list(_ansi_codes.finditer(line)) - color_codes = [code.string[code.span()[0]:code.span()[1]] for code in code_matches] + color_codes = [ + code.string[code.span()[0] : code.span()[1]] for code in code_matches + ] next_line = "".join(active_codes) + line # Track codes for subsequent lines @@ -2720,16 +2741,20 @@ def _wrap_text(text, width, break_long_words=True, break_on_hyphens=True): # but it doesn't break, reset, then continue sequences the way this library requires, so # _propagate_ansi_codes() is applied afterwards to match the same result as the built-in # non-wcwidth implementation below. - return _propagate_ansi_codes(wcwidth.wrap( - text, width, - break_long_words=break_long_words, - break_on_hyphens=break_on_hyphens)) + return _propagate_ansi_codes( + wcwidth.wrap( + text, + width, + break_long_words=break_long_words, + break_on_hyphens=break_on_hyphens, + ) + ) else: # Fallback for wcwidth < 0.3.0 or no wcwidth return _CustomTextWrap( width=width, break_long_words=break_long_words, - break_on_hyphens=break_on_hyphens + break_on_hyphens=break_on_hyphens, ).wrap(text) diff --git a/test/test_api.py b/test/test_api.py index f35d09a..8382262 100644 --- a/test/test_api.py +++ b/test/test_api.py @@ -1,11 +1,8 @@ -"""API properties. - -""" +"""API properties.""" from tabulate import tabulate, tabulate_formats, simple_separated_format from common import skip - try: from inspect import signature, _empty except ImportError: diff --git a/test/test_cli.py b/test/test_cli.py index e71572d..d558adc 100644 --- a/test/test_cli.py +++ b/test/test_cli.py @@ -1,6 +1,4 @@ -"""Command-line interface. - -""" +"""Command-line interface.""" import os import sys @@ -12,7 +10,6 @@ from common import assert_equal - SAMPLE_SIMPLE_FORMAT = "\n".join( [ "----- ------ -------------", diff --git a/test/test_grapheme_clusters.py b/test/test_grapheme_clusters.py index 6148125..db6474c 100644 --- a/test/test_grapheme_clusters.py +++ b/test/test_grapheme_clusters.py @@ -2,20 +2,19 @@ import pytest -from tabulate import tabulate, WIDE_CHARS_MODE +from tabulate import tabulate try: import wcwidth + HAS_WCWIDTH = True - HAS_WCWIDTH_030 = hasattr(wcwidth, 'wrap') + HAS_WCWIDTH_030 = hasattr(wcwidth, "wrap") except ImportError: wcwidth = None HAS_WCWIDTH = False HAS_WCWIDTH_030 = False -requires_wcwidth = pytest.mark.skipif( - not HAS_WCWIDTH, reason="requires wcwidth" -) +requires_wcwidth = pytest.mark.skipif(not HAS_WCWIDTH, reason="requires wcwidth") requires_wcwidth_030 = pytest.mark.skipif( not HAS_WCWIDTH_030, reason="requires wcwidth >= 0.3.0" @@ -28,19 +27,19 @@ class TestGraphemeClusterWidth: @requires_wcwidth def test_zwj_family_emoji_width(self): """ZWJ family emoji has display width 2.""" - family = "\U0001F468\u200D\U0001F469\u200D\U0001F467" + family = "\U0001f468\u200d\U0001f469\u200d\U0001f467" assert wcwidth.wcswidth(family) == 2 @requires_wcwidth def test_regional_indicator_flag_width(self): """Regional indicator pair (flag) has display width 2.""" - us_flag = "\U0001F1FA\U0001F1F8" + us_flag = "\U0001f1fa\U0001f1f8" assert wcwidth.wcswidth(us_flag) == 2 @requires_wcwidth def test_vs16_emoji_width(self): """VS16 variation selector creates wide emoji.""" - heart = "\u2764\uFE0F" + heart = "\u2764\ufe0f" assert wcwidth.wcswidth(heart) == 2 @@ -50,7 +49,7 @@ class TestGraphemeClusterAlignment: @requires_wcwidth def test_zwj_alignment_in_grid(self): """ZWJ emoji aligns correctly in grid format.""" - family = "\U0001F468\u200D\U0001F469\u200D\U0001F467" + family = "\U0001f468\u200d\U0001f469\u200d\U0001f467" data = [ ["ABC", "text"], [family, "emoji"], @@ -61,12 +60,13 @@ def test_zwj_alignment_in_grid(self): border_width = len(lines[0]) for line in lines: from tabulate import _visible_width + assert _visible_width(line) == border_width @requires_wcwidth def test_flag_alignment_in_grid(self): """Regional indicator flags align correctly in grid format.""" - us_flag = "\U0001F1FA\U0001F1F8" + us_flag = "\U0001f1fa\U0001f1f8" data = [ ["AB", "text"], [us_flag, "flag"], @@ -77,6 +77,7 @@ def test_flag_alignment_in_grid(self): border_width = len(lines[0]) for line in lines: from tabulate import _visible_width + assert _visible_width(line) == border_width @@ -89,7 +90,7 @@ class TestGraphemeClusterWrapping: @requires_wcwidth_030 def test_zwj_not_broken_during_wrap(self): """ZWJ sequence preserved as single unit during wrap.""" - family = "\U0001F468\u200D\U0001F469\u200D\U0001F467" + family = "\U0001f468\u200d\U0001f469\u200d\U0001f467" data = [[f"A{family}B"]] result = tabulate(data, tablefmt="plain", maxcolwidths=3) @@ -102,9 +103,9 @@ def test_zwj_not_broken_during_wrap(self): @requires_wcwidth_030 def test_flag_not_broken_during_wrap(self): """Regional indicator flag preserved as single unit during wrap.""" - us_flag = "\U0001F1FA\U0001F1F8" - gb_flag = "\U0001F1EC\U0001F1E7" - fr_flag = "\U0001F1EB\U0001F1F7" + us_flag = "\U0001f1fa\U0001f1f8" + gb_flag = "\U0001f1ec\U0001f1e7" + fr_flag = "\U0001f1eb\U0001f1f7" flags = us_flag + gb_flag + fr_flag data = [[flags]] @@ -121,7 +122,7 @@ def test_flag_not_broken_during_wrap(self): @requires_wcwidth_030 def test_vs16_not_broken_during_wrap(self): """VS16 variation selector kept with base character during wrap.""" - heart = "\u2764\uFE0F" + heart = "\u2764\ufe0f" data = [[heart * 3]] result = tabulate(data, tablefmt="plain", maxcolwidths=4) @@ -135,7 +136,7 @@ def test_vs16_not_broken_during_wrap(self): @requires_wcwidth_030 def test_skin_tone_modifier_not_broken(self): """Skin tone modifier preserved with emoji during wrap.""" - wave_light = "\U0001F44B\U0001F3FB" + wave_light = "\U0001f44b\U0001f3fb" data = [[f"Hi{wave_light}there"]] result = tabulate(data, tablefmt="plain", maxcolwidths=5) @@ -155,8 +156,8 @@ class TestComplexGraphemeClusters: @requires_wcwidth_030 def test_multiple_zwj_sequences_in_cell(self): """Multiple ZWJ sequences in single cell handled correctly.""" - family = "\U0001F468\u200D\U0001F469\u200D\U0001F467" - technologist = "\U0001F468\U0001F3FB\u200D\U0001F4BB" + family = "\U0001f468\u200d\U0001f469\u200d\U0001f467" + technologist = "\U0001f468\U0001f3fb\u200d\U0001f4bb" data = [[f"{family} and {technologist}"]] result = tabulate(data, tablefmt="plain", maxcolwidths=15) @@ -170,7 +171,7 @@ def test_multiple_zwj_sequences_in_cell(self): @requires_wcwidth_030 def test_flags_with_text_wrap(self): """Flags interspersed with text wrap correctly.""" - us_flag = "\U0001F1FA\U0001F1F8" + us_flag = "\U0001f1fa\U0001f1f8" data = [[f"Visit {us_flag} USA today!"]] result = tabulate(data, tablefmt="plain", maxcolwidths=10) @@ -200,16 +201,17 @@ class TestAnsiWithGraphemeClusters: @requires_wcwidth def test_ansi_colored_zwj_width(self): """ANSI colored ZWJ emoji has correct width.""" - family = "\U0001F468\u200D\U0001F469\u200D\U0001F467" + family = "\U0001f468\u200d\U0001f469\u200d\U0001f467" colored = f"\x1b[31m{family}\x1b[0m" from tabulate import _visible_width + assert _visible_width(colored) == 2 @requires_wcwidth def test_ansi_colored_zwj_alignment(self): """ANSI colored ZWJ emoji aligns correctly.""" - family = "\U0001F468\u200D\U0001F469\u200D\U0001F467" + family = "\U0001f468\u200d\U0001f469\u200d\U0001f467" colored = f"\x1b[31m{family}\x1b[0m" data = [ ["AB", "text"], @@ -219,6 +221,7 @@ def test_ansi_colored_zwj_alignment(self): lines = result.split("\n") from tabulate import _visible_width + border_width = _visible_width(lines[0]) for line in lines: assert _visible_width(line) == border_width @@ -226,15 +229,15 @@ def test_ansi_colored_zwj_alignment(self): @requires_wcwidth_030 def test_ansi_colored_flag_wrap(self): """ANSI colored flag not broken during wrap.""" - us_flag = "\U0001F1FA\U0001F1F8" + us_flag = "\U0001f1fa\U0001f1f8" colored = f"\x1b[34m{us_flag}\x1b[0m" data = [[f"A{colored}B"]] result = tabulate(data, tablefmt="plain", maxcolwidths=4) - assert "\U0001F1FA" in result - assert "\U0001F1F8" in result + assert "\U0001f1fa" in result + assert "\U0001f1f8" in result lines = [line.strip() for line in result.split("\n") if line.strip()] flag_parts_same_line = any( - "\U0001F1FA" in line and "\U0001F1F8" in line for line in lines + "\U0001f1fa" in line and "\U0001f1f8" in line for line in lines ) assert flag_parts_same_line diff --git a/test/test_internal.py b/test/test_internal.py index e7564d3..17107c6 100644 --- a/test/test_internal.py +++ b/test/test_internal.py @@ -180,7 +180,9 @@ def test_wrap_text_wide_chars(): except ImportError: skip("test_wrap_text_wide_chars is skipped") - rows = [["청자청자청자청자청자", "약간 감싸면 더 잘 보일 수있는 다소 긴 설명입니다"]] + rows = [ + ["청자청자청자청자청자", "약간 감싸면 더 잘 보일 수있는 다소 긴 설명입니다"] + ] widths = [5, 20] expected = [ [ @@ -244,7 +246,14 @@ def test_wrap_text_to_colwidths_colors_wide_char(): except ImportError: skip("test_wrap_text_to_colwidths_colors_wide_char is skipped") - data = [[("\033[31m약간 감싸면 더 잘 보일 수있는 다소 긴" " 설명입니다 설명입니다 설명입니다 설명입니다 설명\033[0m")]] + data = [ + [ + ( + "\033[31m약간 감싸면 더 잘 보일 수있는 다소 긴" + " 설명입니다 설명입니다 설명입니다 설명입니다 설명\033[0m" + ) + ] + ] result = T._wrap_text_to_colwidths(data, [30]) expected = [ diff --git a/test/test_output.py b/test/test_output.py index 12dfc3a..d7c225b 100644 --- a/test/test_output.py +++ b/test/test_output.py @@ -3320,6 +3320,7 @@ def test_preserve_whitespace(): result = tabulate(test_table, table_headers, preserve_whitespace=False) assert_equal(expected, result) + def test_break_long_words(): "Output: Default table output, with breakwords true." table_headers = ["h1", "h2", "h3"] @@ -3335,6 +3336,7 @@ def test_break_long_words(): result = tabulate(test_table, table_headers, maxcolwidths=3, break_long_words=True) assert_equal(expected, result) + def test_break_on_hyphens(): "Output: Default table output, with break on hyphens true." table_headers = ["h1", "h2", "h3"] diff --git a/test/test_textwrapper.py b/test/test_textwrapper.py index 185fad4..cfee621 100644 --- a/test/test_textwrapper.py +++ b/test/test_textwrapper.py @@ -8,12 +8,13 @@ from tabulate import _CustomTextWrap as CTW, _wrap_text, tabulate, _strip_ansi from textwrap import TextWrapper as OTW -from common import skip, assert_equal +from common import assert_equal try: import wcwidth + HAS_WCWIDTH = True - HAS_WCWIDTH_WRAP = hasattr(wcwidth, 'wrap') + HAS_WCWIDTH_WRAP = hasattr(wcwidth, "wrap") except ImportError: wcwidth = None HAS_WCWIDTH = False @@ -22,7 +23,7 @@ requires_wcwidth = pytest.mark.skipif(not HAS_WCWIDTH, reason="requires wcwidth") -@pytest.fixture(params=['wcwidth_wrap', 'custom_textwrap']) +@pytest.fixture(params=["wcwidth_wrap", "custom_textwrap"]) def wrap_backend(request): """Fixture to test both wrap backends: wcwidth.wrap and _CustomTextWrap fallback.""" # This ensures both code paths in _wrap_text() are tested: @@ -34,22 +35,20 @@ def wrap_backend(request): # # Path 2: fallback (tested by custom_textwrap) # return _CustomTextWrap(...).wrap(text) # - # and for tests that use it, eg. test_wrap_wide_char_multiword(wrap_backend), The tests assert - # the same expected output for both backends. This is good - it verifies both produce identical - # results. - if request.param == 'wcwidth_wrap': + # Tests using this fixture verify both backends produce identical results. + if request.param == "wcwidth_wrap": if not HAS_WCWIDTH_WRAP: pytest.skip("wcwidth.wrap not available") - yield 'wcwidth_wrap' + yield "wcwidth_wrap" else: # Mock wcwidth to not have wrap attribute, forcing _CustomTextWrap fallback if not HAS_WCWIDTH: pytest.skip("wcwidth not available") - mock_wcwidth = MagicMock(spec=['wcswidth', 'wcwidth']) + mock_wcwidth = MagicMock(spec=["wcswidth", "wcwidth"]) mock_wcwidth.wcswidth = wcwidth.wcswidth mock_wcwidth.wcwidth = wcwidth.wcwidth - with patch('tabulate.wcwidth', mock_wcwidth): - yield 'custom_textwrap' + with patch("tabulate.wcwidth", mock_wcwidth): + yield "custom_textwrap" def _nwrap(lines): From 27e168610f3dfd97b7c7158c7b09fb6931164f92 Mon Sep 17 00:00:00 2001 From: Jeff Quast Date: Sun, 25 Jan 2026 02:46:23 -0500 Subject: [PATCH 5/6] Be more restrictive with API ("nuclear") --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 54d359f..af91890 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,7 +27,7 @@ dynamic = ["version"] Homepage = "https://github.com/astanin/python-tabulate" [project.optional-dependencies] -widechars = ["wcwidth>=0.3.5"] +widechars = ["wcwidth>=0.3.5,<1"] [project.scripts] tabulate = "tabulate:_main" From 7403f4068d00a98dcfc415b3f4ac7d596288c9fd Mon Sep 17 00:00:00 2001 From: Jeff Quast Date: Tue, 27 Jan 2026 03:17:10 -0500 Subject: [PATCH 6/6] Integrate with wcwidth 0.5 (_propagate_ansi not needed) --- pyproject.toml | 2 +- tabulate/__init__.py | 56 +++++----------------------------------- test/test_internal.py | 6 +++-- test/test_textwrapper.py | 2 +- 4 files changed, 13 insertions(+), 53 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index af91890..4e7d8f3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,7 +27,7 @@ dynamic = ["version"] Homepage = "https://github.com/astanin/python-tabulate" [project.optional-dependencies] -widechars = ["wcwidth>=0.3.5,<1"] +widechars = ["wcwidth>=0.5.1"] [project.scripts] tabulate = "tabulate:_main" diff --git a/tabulate/__init__.py b/tabulate/__init__.py index 2be9eba..57e17d2 100644 --- a/tabulate/__init__.py +++ b/tabulate/__init__.py @@ -2696,58 +2696,16 @@ def _format_table( return "" -def _propagate_ansi_codes(lines): - """Propagate ANSI color codes across wrapped lines. - - When text with ANSI codes is wrapped by wcwidth.wrap, adjust each line to: - - Start with any active color codes from previous lines - - End with a reset if colors are active (to prevent bleeding into other cells) - - This function implements some amount of _CustomTextWrap's behavior as a post-processing step of - the 3rd-party wcwidth.wrap(), which also preserves and parses sequences, but adjusts them to - match the behavior of _CustomTextWrap, but to also benefit by its grapheme, emoji/flags, wide - characters. - """ - result = [] - active_codes = [] - - for line in lines: - code_matches = list(_ansi_codes.finditer(line)) - color_codes = [ - code.string[code.span()[0] : code.span()[1]] for code in code_matches - ] - next_line = "".join(active_codes) + line - - # Track codes for subsequent lines - for code in color_codes: - if code == _ansi_color_reset_code: - active_codes = [] - continue - active_codes.append(code) - - # Append reset if any colors are active - if active_codes: - next_line += _ansi_color_reset_code - - result.append(next_line) - - return result - - def _wrap_text(text, width, break_long_words=True, break_on_hyphens=True): """Wrap text to width with wide character and ANSI code support.""" - # wcwidth >= 0.3.0 has wrap() with proper grapheme cluster support, + # wcwidth >= 0.5.0 has wrap() with proper grapheme cluster support and + # propagate_sgr=True by default, which handles ANSI code propagation natively. if wcwidth is not None and hasattr(wcwidth, "wrap"): - # but it doesn't break, reset, then continue sequences the way this library requires, so - # _propagate_ansi_codes() is applied afterwards to match the same result as the built-in - # non-wcwidth implementation below. - return _propagate_ansi_codes( - wcwidth.wrap( - text, - width, - break_long_words=break_long_words, - break_on_hyphens=break_on_hyphens, - ) + return wcwidth.wrap( + text, + width, + break_long_words=break_long_words, + break_on_hyphens=break_on_hyphens, ) else: # Fallback for wcwidth < 0.3.0 or no wcwidth diff --git a/test/test_internal.py b/test/test_internal.py index 17107c6..f1960e7 100644 --- a/test/test_internal.py +++ b/test/test_internal.py @@ -284,13 +284,15 @@ def test_wrap_text_to_colwidths_multi_ansi_colors_full_cell(): ] result = T._wrap_text_to_colwidths(data, [30]) + # wcwidth.propagate_sgr combines separate SGR codes into single sequences + # (e.g., \x1b[31m\x1b[43m -> \x1b[31;43m), which is functionally equivalent expected = [ [ "\n".join( [ "\033[31m\033[43mThis is a rather long\033[0m", - "\033[31m\033[43mdescription that might look\033[0m", - "\033[31m\033[43mbetter if it is wrapped a bit\033[0m", + "\033[31;43mdescription that might look\033[0m", + "\033[31;43mbetter if it is wrapped a bit\033[0m", ] ) ] diff --git a/test/test_textwrapper.py b/test/test_textwrapper.py index cfee621..b592270 100644 --- a/test/test_textwrapper.py +++ b/test/test_textwrapper.py @@ -30,7 +30,7 @@ def wrap_backend(request): # def _wrap_text(text, width, ...): # if wcwidth is not None and hasattr(wcwidth, "wrap"): # # Path 1: wcwidth.wrap (tested by wcwidth_wrap) - # return _propagate_ansi_codes(wcwidth.wrap(...)) + # return wcwidth.wrap(...) # else: # # Path 2: fallback (tested by custom_textwrap) # return _CustomTextWrap(...).wrap(text)