From 0f05b61d399f47e11843855a1ac1d6303352f79b Mon Sep 17 00:00:00 2001 From: ohmayr Date: Thu, 15 Jan 2026 19:24:49 +0000 Subject: [PATCH] improve docs performance --- gapic/utils/rst.py | 126 ++++++++++++++++++++++++++------------------- 1 file changed, 74 insertions(+), 52 deletions(-) diff --git a/gapic/utils/rst.py b/gapic/utils/rst.py index dd1d65d75d..44c9d53bc8 100644 --- a/gapic/utils/rst.py +++ b/gapic/utils/rst.py @@ -13,12 +13,53 @@ # limitations under the License. import re -from typing import Optional +from typing import Optional, Dict import pypandoc # type: ignore from gapic.utils.lines import wrap +# Cache for the few complex items we actually send to pandoc +_RAW_RST_CACHE: Dict[str, str] = {} + +def _tuned_fast_convert(text: str) -> Optional[str]: + """ + Converts Markdown to RST using pure Python. + Only falls back to Pandoc for Tables and Images. + """ + # --- 1. FALLBACKS --- + # Tables (pipe surrounded by spaces) or Images (![). + # We allow "][" (Reference Links) to be handled by Python now. + if (re.search(r" \| ", text) or re.search(r"\|\n", text)) or "![" in text: + return None + + # --- 2. CONVERSION --- + + # A. CODE BLOCKS: `code` -> ``code`` + # CRITICAL: Run this FIRST. This ensures we handle existing backticks + # before we create NEW backticks for links. + # (? `Text `__ + # We fix the broken documentation by converting these to valid RST links. + # Since step A is done, these new backticks will NOT be doubled. + converted = re.sub(r"\[([^\]]+)\]\[([^\]]+)\]", r"`\1 <\2>`__", converted) + + # C. STANDARD LINKS: [Text](URL) -> `Text `__ + converted = re.sub(r"\[([^\]]+)\]\(([^)]+)\)", r"`\1 <\2>`__", converted) + + # D. BOLD/ITALICS: + converted = re.sub(r"(? Heading\n======= + converted = re.sub(r"^# (.*)$", r"\1\n" + "=" * 10, converted, flags=re.MULTILINE) + converted = re.sub(r"^## (.*)$", r"\1\n" + "-" * 10, converted, flags=re.MULTILINE) + + # F. LISTS: Markdown (- item) needs a preceding newline for RST. + converted = re.sub(r"(\n[^-*].*)\n\s*([-*] )", r"\1\n\n\2", converted) + + return converted def rst( text: str, @@ -27,60 +68,41 @@ def rst( nl: Optional[bool] = None, source_format: str = "commonmark", ): - """Convert the given text to ReStructured Text. - - Args: - text (str): The text to convert. - width (int): The number of columns. - indent (int): The number of columns to indent each line of text - (except the first). - nl (bool): Whether to append a trailing newline. - Defaults to appending a newline if the result is more than - one line long. - source_format (str): The source format. This is ``commonmark`` by - default, which is what is used by convention in protocol buffers. - - Returns: - str: The same text, in RST format. - """ - # Quick check: If the text block does not appear to have any formatting, - # do not convert it. - # (This makes code generation significantly faster; calling out to pandoc - # is by far the most expensive thing we do.) - if not re.search(r"[|*`_[\]]", text): - answer = wrap( - text, - indent=indent, - offset=indent + 3, - width=width - indent, - ) + # 1. Super Fast Path: No special chars? Just wrap. + if not re.search(r"[|*`_[\]#]", text): + answer = wrap(text, indent=indent, offset=indent + 3, width=width - indent) + return _finalize(answer, nl, indent) + + # 2. Check Cache + if text in _RAW_RST_CACHE: + raw_rst = _RAW_RST_CACHE[text] else: - # Convert from CommonMark to ReStructured Text. - answer = ( - pypandoc.convert_text( - text, - "rst", - format=source_format, - verify_format=False, - extra_args=["--columns=%d" % (width - indent)], - ) - .strip() - .replace("\n", f"\n{' ' * indent}") - ) - - # Add a newline to the end of the document if any line breaks are - # already present. - # - # This causes the closing """ to be on the subsequent line only when - # appropriate. + # 3. Try Tuned Python Convert (Fastest) + fast_result = _tuned_fast_convert(text) + + if fast_result is not None: + raw_rst = fast_result.strip() + else: + # 4. Fallback to Pandoc (Only for Tables/Images) + raw_rst = pypandoc.convert_text( + text, "rst", format=source_format, extra_args=["--columns=1000"] + ).strip() + + _RAW_RST_CACHE[text] = raw_rst + + # 5. Python Formatting + if "::" in raw_rst or ".. code" in raw_rst: + answer = raw_rst.replace("\n", f"\n{' ' * indent}") + else: + answer = wrap(raw_rst, indent=indent, offset=indent, width=width - indent) + + return _finalize(answer, nl, indent) + + +def _finalize(answer, nl, indent): + """Helper to handle trailing newlines and quotes.""" if nl or ("\n" in answer and nl is None): answer += "\n" + " " * indent - - # If the text ends in a double-quote, append a period. - # This ensures that we do not get a parse error when this output is - # followed by triple-quotes. if answer.endswith('"'): answer += "." - - # Done; return the answer. return answer