From 7b57e91d503a9550a01180260f52f1cef23d51ba Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 29 Apr 2026 18:27:07 +0000 Subject: [PATCH 1/3] Initial plan From b31353c9ba1e7e45dca7dbad4c77bc925505b218 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 29 Apr 2026 18:41:13 +0000 Subject: [PATCH 2/3] Serve clean .md versions of every doc page Agent-Logs-Url: https://github.com/FalkorDB/docs/sessions/a22cc9fa-6b51-4c1d-821b-103e4833d324 Co-authored-by: gkorland <753206+gkorland@users.noreply.github.com> --- .github/workflows/pages.yml | 53 +++++++++ .gitignore | 6 + .wordlist.txt | 2 + Gemfile | 32 ++++++ README.md | 28 +++++ _config.yml | 1 + _includes/code_tabs.html | 15 +++ _plugins/markdown_pages.rb | 147 +++++++++++++++++++++++++ markdown-content-negotiation.worker.js | 79 +++++++++++++ 9 files changed, 363 insertions(+) create mode 100644 .github/workflows/pages.yml create mode 100644 Gemfile create mode 100644 _plugins/markdown_pages.rb create mode 100644 markdown-content-negotiation.worker.js diff --git a/.github/workflows/pages.yml b/.github/workflows/pages.yml new file mode 100644 index 00000000..10a9012c --- /dev/null +++ b/.github/workflows/pages.yml @@ -0,0 +1,53 @@ +name: Deploy Jekyll site to Pages + +# Builds the Jekyll site (with custom plugins enabled, which the default +# GitHub Pages builder does not support) and deploys it to GitHub Pages. + +on: + push: + branches: [ "main" ] + workflow_dispatch: + +permissions: + contents: read + pages: write + id-token: write + +# Allow only one concurrent deployment, skipping runs queued between the +# run in-progress and latest queued. However, do NOT cancel in-progress +# runs as we want to allow these production deployments to complete. +concurrency: + group: "pages" + cancel-in-progress: false + +jobs: + build: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 + - name: Setup Ruby + uses: ruby/setup-ruby@c4e5b1316158f92e3d49443a9d58b31d25ac0f8f # v1.306.0 + with: + ruby-version: '3.2' + bundler-cache: true + - name: Setup Pages + id: pages + uses: actions/configure-pages@45bfe0192ca1faeb007ade9deae92b16b8254a0d # v6.0.0 + - name: Build with Jekyll + env: + JEKYLL_ENV: production + run: bundle exec jekyll build --baseurl "${{ steps.pages.outputs.base_path }}" + - name: Upload artifact + uses: actions/upload-pages-artifact@fc324d3547104276b827a68afc52ff2a11cc49c9 # v5.0.0 + + deploy: + environment: + name: github-pages + url: ${{ steps.deployment.outputs.page_url }} + runs-on: ubuntu-latest + needs: build + steps: + - name: Deploy to GitHub Pages + id: deployment + uses: actions/deploy-pages@cd2ce8fcbc39b97be8ca5fce6e763baed58fa128 # v5.0.0 diff --git a/.gitignore b/.gitignore index 6a11587b..3cbc540c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,8 @@ _site/ *.dic +.jekyll-cache/ +.jekyll-metadata +.sass-cache/ +Gemfile.lock +vendor/ +.bundle/ diff --git a/.wordlist.txt b/.wordlist.txt index 8c1a1543..20908419 100644 --- a/.wordlist.txt +++ b/.wordlist.txt @@ -828,3 +828,5 @@ ObjectPool QuickJS ACLs filesystem +CDN +Cloudflare diff --git a/Gemfile b/Gemfile new file mode 100644 index 00000000..2472a745 --- /dev/null +++ b/Gemfile @@ -0,0 +1,32 @@ +source "https://rubygems.org" + +# Jekyll and the Just-the-Docs theme. Pinned to the GitHub Pages +# compatible Jekyll 3.x line so that local builds match production. +gem "jekyll", "~> 3.10" +gem "just-the-docs" + +group :jekyll_plugins do + gem "jekyll-sitemap" + gem "jekyll-redirect-from" + gem "jekyll-seo-tag" + gem "jekyll-remote-theme" +end + +# Markdown processor used by GitHub Pages. +gem "kramdown-parser-gfm" + +# Windows and JRuby do not include zoneinfo files; bundle the tzinfo-data +# gem and associated library. +platforms :mingw, :x64_mingw, :mswin, :jruby do + gem "tzinfo", ">= 1", "< 3" + gem "tzinfo-data" +end + +# Performance booster for watching directories on Windows. +gem "wdm", "~> 0.1.1", :platforms => [:mingw, :x64_mingw, :mswin] + +# HTTP server adapter required by Ruby 3.x. +gem "webrick", "~> 1.8" + +# CSV gem is no longer in the default gem set on Ruby 3.4+. +gem "csv" diff --git a/README.md b/README.md index d987e97c..eada39d2 100644 --- a/README.md +++ b/README.md @@ -22,6 +22,34 @@ FalkorDB is a low-latency, scalable graph database with OpenCypher support. It p - **Operations**: Deployment, clustering, and infrastructure - **Integration**: Third-party tool integrations +## AI-Friendly Markdown Versions + +Every documentation page is also published as plain markdown so AI tools +and crawlers can ingest the content without parsing HTML. + +- **Per-page `.md` URLs** — append `.md` to any page URL (or replace + the trailing `.html`) to get the markdown source. For example: + - `https://docs.falkordb.com/commands/graph.query.html` + → `https://docs.falkordb.com/commands/graph.query.md` + - `https://docs.falkordb.com/` → `https://docs.falkordb.com/index.md` +- **Manifest** — a complete list of every markdown URL is published + at [`/.well-known/markdown-index.txt`](https://docs.falkordb.com/.well-known/markdown-index.txt). +- **Clean content** — the markdown export contains only the page + body. Front matter, navigation, footer and other site chrome are + stripped, and tabbed code-sample widgets are flattened into ordinary + fenced code blocks. +- **Content negotiation** — requests with an + `Accept: text/markdown` header are answered with the markdown version + of the requested page. This is implemented at the CDN edge; see + [`markdown-content-negotiation.worker.js`](markdown-content-negotiation.worker.js) + for the reference Cloudflare Worker. + +The markdown export is produced by the +[`_plugins/markdown_pages.rb`](_plugins/markdown_pages.rb) Jekyll +plugin during the GitHub Actions Pages build +([`.github/workflows/pages.yml`](.github/workflows/pages.yml)). To opt a +page out, set `markdown_skip: true` in its front matter. + ## Prerequisites To build and run the documentation locally, you need: diff --git a/_config.yml b/_config.yml index 1c5f50b5..297e942b 100644 --- a/_config.yml +++ b/_config.yml @@ -45,6 +45,7 @@ exclude: - Gemfile.lock - node_modules - vendor + - markdown-content-negotiation.worker.js # Footer last edited timestamp last_edit_timestamp: true diff --git a/_includes/code_tabs.html b/_includes/code_tabs.html index 5ee02ef6..1b669319 100644 --- a/_includes/code_tabs.html +++ b/_includes/code_tabs.html @@ -1,3 +1,17 @@ +{%- comment -%} + When rendered for the AI-readable markdown export + (`_plugins/markdown_pages.rb` sets the `markdown_format` variable), + emit each code variant as a fenced code block instead of the tabbed + HTML widget so the markdown output is clean and self-contained. +{%- endcomment -%} +{% if markdown_format -%} +{% assign codes = "python,javascript,cpp,rust,java,shell" | split: "," -%} +{% for code_name in codes -%}{% if include[code_name] %} +```{{ code_name }} +{{ include[code_name] }} +``` +{% endif -%}{% endfor -%} +{% else %}
{% assign codes = "python,javascript,cpp,rust,java,shell" | split: "," %} @@ -97,3 +111,4 @@
+{% endif %} diff --git a/_plugins/markdown_pages.rb b/_plugins/markdown_pages.rb new file mode 100644 index 00000000..449b1a5d --- /dev/null +++ b/_plugins/markdown_pages.rb @@ -0,0 +1,147 @@ +# frozen_string_literal: true + +# markdown_pages.rb +# +# Emits a clean Markdown version of every documentation page so AI tools +# (and humans) can fetch the raw content. +# +# For a page rendered at .html (or a "pretty" URL like /), this +# plugin writes a sibling file at .md containing only the page body +# (front matter, layouts, navigation, footer and other site chrome are +# excluded). Liquid tags and `{% include %}` directives are expanded so +# the output is self-contained markdown. +# +# Usage notes +# ----------- +# * A page can opt out by setting `markdown_skip: true` in its front +# matter. +# * Pages with `layout: redirect` are skipped automatically. +# * The plugin also writes `_site/.well-known/markdown-index.txt`, a +# newline-separated list of every emitted markdown URL, useful for +# crawlers and AI ingestion pipelines. +# +# Content negotiation +# ------------------- +# This plugin produces static `.md` files only. Honouring an +# `Accept: text/markdown` request header requires server/CDN logic +# (for example, a Cloudflare Worker or Netlify `_redirects` rule that +# rewrites the request to the `.md` URL when the header is present). +# A reference Cloudflare Worker is shipped at +# `markdown-content-negotiation.worker.js` in the repository root. + +require "fileutils" + +module FalkorDocs + module MarkdownPages + SKIP_LAYOUTS = %w[redirect].freeze + + module_function + + # Compute the on-disk path (inside `site.dest`) for the markdown + # version of a page given its rendered URL. + def target_path(site, url) + return File.join(site.dest, "index.md") if url.nil? || url == "/" || url.empty? + + if url.end_with?("/") + File.join(site.dest, url, "index.md") + elsif url.end_with?(".html") + File.join(site.dest, url.sub(/\.html\z/, ".md")) + else + File.join(site.dest, "#{url}.md") + end + end + + # Compute the public URL where the markdown version is served. + def target_url(url) + return "/index.md" if url.nil? || url == "/" || url.empty? + + if url.end_with?("/") + "#{url}index.md" + elsif url.end_with?(".html") + url.sub(/\.html\z/, ".md") + else + "#{url}.md" + end + end + + def eligible?(page) + return false unless page.path.end_with?(".md", ".markdown") + return false if page.data["markdown_skip"] + return false if SKIP_LAYOUTS.include?(page.data["layout"].to_s) + return false if page.url.to_s.end_with?(".md") + + true + end + + # Render the page's source content through Liquid only (no Markdown + # conversion) and write the result to disk. We re-read the source + # file from disk because by the time the `:site, :post_write` hook + # runs Jekyll has already mutated `page.content` in place to be the + # converted HTML output. + def write(site, page) + out_path = target_path(site, page.url) + FileUtils.mkdir_p(File.dirname(out_path)) + + source = read_source(page) + + payload = site.site_payload.merge( + "page" => page.to_liquid, + "markdown_format" => true, + ) + info = { + registers: { site: site, page: page.to_liquid }, + strict_filters: false, + strict_variables: false, + } + + rendered = + begin + template = site.liquid_renderer.file("#{page.path}.md").parse(source) + template.render!(payload, info) + rescue StandardError => e + Jekyll.logger.warn("MarkdownPages:", + "Liquid render failed for #{page.path}: #{e.message}") + source + end + + File.write(out_path, rendered) + target_url(page.url) + end + + # Read the original markdown source from disk and strip the YAML + # front matter block. Anything between the leading `---` markers is + # site chrome metadata (title, layout, nav order, etc.) and not + # part of the page body. + def read_source(page) + raw = File.read(File.join(page.site.source, page.relative_path)) + if raw =~ /\A---\s*\n.*?\n---\s*\n/m + raw.sub(/\A---\s*\n.*?\n---\s*\n/m, "").lstrip + else + raw + end + end + + # Write a manifest of every emitted markdown URL. + def write_index(site, urls) + return if urls.empty? + + index_path = File.join(site.dest, ".well-known", "markdown-index.txt") + FileUtils.mkdir_p(File.dirname(index_path)) + base = (site.config["url"] || "").to_s.sub(%r{/\z}, "") + lines = urls.sort.uniq.map { |u| "#{base}#{u}" } + File.write(index_path, "#{lines.join("\n")}\n") + end + end +end + +Jekyll::Hooks.register :site, :post_write do |site| + emitted = [] + site.pages.each do |page| + next unless FalkorDocs::MarkdownPages.eligible?(page) + + emitted << FalkorDocs::MarkdownPages.write(site, page) + end + FalkorDocs::MarkdownPages.write_index(site, emitted) + Jekyll.logger.info("MarkdownPages:", + "wrote #{emitted.size} markdown page(s)") +end diff --git a/markdown-content-negotiation.worker.js b/markdown-content-negotiation.worker.js new file mode 100644 index 00000000..428bee35 --- /dev/null +++ b/markdown-content-negotiation.worker.js @@ -0,0 +1,79 @@ +/** + * Cloudflare Worker / Pages Function: Markdown content negotiation for + * docs.falkordb.com. + * + * The Jekyll build emits a clean `.md` version of every documentation + * page next to its `.html` counterpart (see `_plugins/markdown_pages.rb`). + * This worker honours the `Accept` header so AI tools that prefer + * markdown can request the same URL and get markdown back without + * having to know the `.md` URL pattern. + * + * Behaviour + * --------- + * - If the request `Accept` header is `text/markdown` (or + * `text/x-markdown` / `text/plain`) and the request is not already + * for a `.md` URL, the worker rewrites the request internally to the + * corresponding `.md` URL on the same origin and returns its body + * with `Content-Type: text/markdown; charset=utf-8`. + * - All other requests are passed through unchanged. + * + * Mapping rules (mirroring the Jekyll plugin): + * / -> /index.md + * /foo/ -> /foo/index.md + * /foo/bar.html -> /foo/bar.md + * /foo/bar -> /foo/bar.md + * + * Deployment + * ---------- + * Bind this worker to the `docs.falkordb.com/*` route in Cloudflare, + * or use it as a Pages Function placed at `functions/_middleware.js`. + */ + +const MARKDOWN_ACCEPT = /(?:^|,\s*)(text\/markdown|text\/x-markdown)\b/i; + +function markdownPathFor(pathname) { + if (pathname === "" || pathname === "/") return "/index.md"; + if (pathname.endsWith("/")) return pathname + "index.md"; + if (pathname.endsWith(".md")) return pathname; + if (pathname.endsWith(".html")) return pathname.slice(0, -5) + ".md"; + // Skip URLs that already point at a static asset with an extension. + const lastSegment = pathname.slice(pathname.lastIndexOf("/") + 1); + if (lastSegment.includes(".")) return null; + return pathname + ".md"; +} + +export default { + async fetch(request) { + const accept = request.headers.get("accept") || ""; + if (!MARKDOWN_ACCEPT.test(accept)) { + return fetch(request); + } + + const url = new URL(request.url); + const mdPath = markdownPathFor(url.pathname); + if (!mdPath || mdPath === url.pathname) { + return fetch(request); + } + + const mdUrl = new URL(url.toString()); + mdUrl.pathname = mdPath; + + const upstream = await fetch(mdUrl.toString(), { + headers: { accept: "text/markdown" }, + }); + if (!upstream.ok) { + // Fall back to the original HTML response if the .md sibling is + // missing for any reason. + return fetch(request); + } + + const headers = new Headers(upstream.headers); + headers.set("Content-Type", "text/markdown; charset=utf-8"); + headers.set("Vary", "Accept"); + return new Response(upstream.body, { + status: upstream.status, + statusText: upstream.statusText, + headers, + }); + }, +}; From c7942877e0a7cdbbac8826a3f9b0e2531d7f5400 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 29 Apr 2026 18:44:00 +0000 Subject: [PATCH 3/3] Address review feedback: include text/plain in Accept regex; remove redundant to_s Agent-Logs-Url: https://github.com/FalkorDB/docs/sessions/a22cc9fa-6b51-4c1d-821b-103e4833d324 Co-authored-by: gkorland <753206+gkorland@users.noreply.github.com> --- _plugins/markdown_pages.rb | 2 +- markdown-content-negotiation.worker.js | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/_plugins/markdown_pages.rb b/_plugins/markdown_pages.rb index 449b1a5d..e41f8b39 100644 --- a/_plugins/markdown_pages.rb +++ b/_plugins/markdown_pages.rb @@ -127,7 +127,7 @@ def write_index(site, urls) index_path = File.join(site.dest, ".well-known", "markdown-index.txt") FileUtils.mkdir_p(File.dirname(index_path)) - base = (site.config["url"] || "").to_s.sub(%r{/\z}, "") + base = (site.config["url"] || "").sub(%r{/\z}, "") lines = urls.sort.uniq.map { |u| "#{base}#{u}" } File.write(index_path, "#{lines.join("\n")}\n") end diff --git a/markdown-content-negotiation.worker.js b/markdown-content-negotiation.worker.js index 428bee35..8d98a24e 100644 --- a/markdown-content-negotiation.worker.js +++ b/markdown-content-negotiation.worker.js @@ -29,7 +29,7 @@ * or use it as a Pages Function placed at `functions/_middleware.js`. */ -const MARKDOWN_ACCEPT = /(?:^|,\s*)(text\/markdown|text\/x-markdown)\b/i; +const MARKDOWN_ACCEPT = /(?:^|,\s*)(text\/markdown|text\/x-markdown|text\/plain)\b/i; function markdownPathFor(pathname) { if (pathname === "" || pathname === "/") return "/index.md";