From 42a453c2114e74f6ff32752b530bce720811f2b7 Mon Sep 17 00:00:00 2001 From: Laszlo Takacs Date: Mon, 20 Apr 2026 15:58:52 +0200 Subject: [PATCH 1/5] Add Django middleware --- .env.example | 2 + .gitignore | 7 +++ README.md | 57 ++++++++++++++++++++++ prerender_django/__init__.py | 0 prerender_django/middleware.py | 89 ++++++++++++++++++++++++++++++++++ pyproject.toml | 31 ++++++++++++ tests/__init__.py | 0 tests/settings.py | 3 ++ tests/test_middleware.py | 69 ++++++++++++++++++++++++++ 9 files changed, 258 insertions(+) create mode 100644 .env.example create mode 100644 .gitignore create mode 100644 README.md create mode 100644 prerender_django/__init__.py create mode 100644 prerender_django/middleware.py create mode 100644 pyproject.toml create mode 100644 tests/__init__.py create mode 100644 tests/settings.py create mode 100644 tests/test_middleware.py diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..07bced2 --- /dev/null +++ b/.env.example @@ -0,0 +1,2 @@ +PRERENDER_TOKEN= +PRERENDER_SERVICE_URL= diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..3c9f148 --- /dev/null +++ b/.gitignore @@ -0,0 +1,7 @@ +.venv/ +__pycache__/ +*.pyc +*.egg-info/ +dist/ +build/ +.env diff --git a/README.md b/README.md new file mode 100644 index 0000000..9d4d1ea --- /dev/null +++ b/README.md @@ -0,0 +1,57 @@ +# prerender-django + +Django middleware for [Prerender.io](https://prerender.io). Intercepts requests from bots and crawlers and serves prerendered HTML, so your JavaScript-rendered app is fully indexable by search engines and social media scrapers. + +Compatible with **Django 5+** and **Python 3.10+**. + +## Installation + +```bash +pip install prerender-django +``` + +## Setup + +Add the middleware to your `settings.py`: + +```python +MIDDLEWARE = [ + 'prerender_django.middleware.PrerenderMiddleware', + # ... your other middleware +] + +PRERENDER_TOKEN = 'YOUR_PRERENDER_TOKEN' +``` + +The middleware must be placed **before** any session or authentication middleware to intercept bot requests early. + +## Settings + +| Setting | Default | Description | +|---------|---------|-------------| +| `PRERENDER_TOKEN` | `None` | Your Prerender.io token | +| `PRERENDER_SERVICE_URL` | `https://service.prerender.io/` | Prerender service URL (use this for self-hosted Prerender) | + +## Self-hosted Prerender + +```python +PRERENDER_SERVICE_URL = 'http://your-prerender-server:3000' +``` + +## How it works + +Requests are prerendered when **all** of the following are true: + +- The HTTP method is `GET` +- The `User-Agent` matches a known bot/crawler (Googlebot, Bingbot, Twitterbot, GPTBot, ClaudeBot, etc.) + — OR the URL contains `_escaped_fragment_` + — OR the `X-Bufferbot` header is present +- The URL does not end with a static asset extension (`.js`, `.css`, `.png`, etc.) + +Everything else passes through to your normal Django views. + +If the Prerender service is unreachable, the middleware falls back gracefully and serves the normal response. + +## License + +MIT diff --git a/prerender_django/__init__.py b/prerender_django/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/prerender_django/middleware.py b/prerender_django/middleware.py new file mode 100644 index 0000000..a982f1c --- /dev/null +++ b/prerender_django/middleware.py @@ -0,0 +1,89 @@ +import logging +import urllib.error +import urllib.request + +from django.conf import settings +from django.http import HttpResponse + +logger = logging.getLogger(__name__) + +CRAWLER_USER_AGENTS = [ + 'googlebot', 'yahoo', 'bingbot', 'baiduspider', + 'facebookexternalhit', 'twitterbot', 'rogerbot', 'linkedinbot', + 'embedly', 'quora link preview', 'showyoubot', 'outbrain', + 'pinterest', 'slackbot', 'developers.google.com/+/web/snippet', + 'w3c_validator', 'perplexity', 'oai-searchbot', 'chatgpt-user', + 'gptbot', 'claudebot', 'amazonbot', +] + +EXTENSIONS_TO_IGNORE = frozenset([ + '.js', '.css', '.xml', '.less', '.png', '.jpg', '.jpeg', '.gif', + '.pdf', '.doc', '.txt', '.ico', '.rss', '.zip', '.mp3', '.rar', + '.exe', '.wmv', '.avi', '.ppt', '.mpg', '.mpeg', '.tif', '.wav', + '.mov', '.psd', '.ai', '.xls', '.mp4', '.m4a', '.swf', '.dat', + '.dmg', '.iso', '.flv', '.m4v', '.torrent', '.ttf', '.woff', '.svg', +]) + + +def _setting(name, default=None): + return getattr(settings, f'PRERENDER_{name}', default) + + +def _is_bot(user_agent): + ua = user_agent.lower() + return any(bot in ua for bot in CRAWLER_USER_AGENTS) + + +def _is_static_asset(path): + return any(path.endswith(ext) for ext in EXTENSIONS_TO_IGNORE) + + +def _should_prerender(request): + user_agent = request.META.get('HTTP_USER_AGENT', '') + if not user_agent or request.method != 'GET': + return False + if _is_static_asset(request.path): + return False + if '_escaped_fragment_' in request.GET: + return True + if request.META.get('HTTP_X_BUFFERBOT'): + return True + return _is_bot(user_agent) + + +def _build_api_url(request): + service_url = _setting('SERVICE_URL', 'https://service.prerender.io/') + if not service_url.endswith('/'): + service_url += '/' + return f'{service_url}{request.build_absolute_uri()}' + + +def _fetch_prerendered(api_url, user_agent): + token = _setting('TOKEN') + req = urllib.request.Request(api_url) + req.add_header('User-Agent', user_agent) + if token: + req.add_header('X-Prerender-Token', token) + try: + with urllib.request.urlopen(req) as resp: + return resp.status, resp.read().decode('utf-8') + except urllib.error.HTTPError as e: + return e.code, e.read().decode('utf-8') + + +class PrerenderMiddleware: + def __init__(self, get_response): + self.get_response = get_response + + def __call__(self, request): + if not _should_prerender(request): + return self.get_response(request) + + try: + api_url = _build_api_url(request) + user_agent = request.META.get('HTTP_USER_AGENT', '') + status, body = _fetch_prerendered(api_url, user_agent) + return HttpResponse(body, status=status, content_type='text/html') + except urllib.error.URLError as e: + logger.error('Prerender error, falling back: %s', e) + return self.get_response(request) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..30a979d --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,31 @@ +[build-system] +requires = ["setuptools>=68"] +build-backend = "setuptools.build_meta" + +[project] +name = "prerender-django" +version = "1.0.1" +description = "Django middleware for prerendering JavaScript-rendered pages for SEO via Prerender.io" +authors = [{ name = "Prerender.io" }] +license = "MIT" +readme = "README.md" +requires-python = ">=3.10" +keywords = ["django", "prerender", "prerender.io", "seo", "middleware"] +dependencies = [] + +[project.urls] +Repository = "https://github.com/prerender/integrations" + +[project.optional-dependencies] +dev = [ + "django>=5.0", + "pytest>=8.0", + "pytest-django>=4.8", +] + +[tool.pytest.ini_options] +DJANGO_SETTINGS_MODULE = "tests.settings" +pythonpath = ["."] + +[tool.setuptools.packages.find] +include = ["prerender_django*"] diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/settings.py b/tests/settings.py new file mode 100644 index 0000000..07608ba --- /dev/null +++ b/tests/settings.py @@ -0,0 +1,3 @@ +SECRET_KEY = 'test-secret-key' +DATABASES = {} +INSTALLED_APPS = [] diff --git a/tests/test_middleware.py b/tests/test_middleware.py new file mode 100644 index 0000000..d78b9f5 --- /dev/null +++ b/tests/test_middleware.py @@ -0,0 +1,69 @@ +import urllib.error +from unittest.mock import MagicMock, patch + +from django.http import HttpResponse +from django.test import RequestFactory + +from prerender_django.middleware import PrerenderMiddleware + +BOT_UA = 'Mozilla/5.0 (compatible; Googlebot/2.1)' +BROWSER_UA = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36' +PRERENDERED_HTML = 'prerendered' + +factory = RequestFactory() + + +def normal_response(_request): + return HttpResponse('original') + + +def mock_urlopen(status=200, body=PRERENDERED_HTML): + cm = MagicMock() + cm.__enter__ = MagicMock(return_value=cm) + cm.__exit__ = MagicMock(return_value=False) + cm.status = status + cm.read.return_value = body.encode('utf-8') + return cm + + +def test_browser_passes_through(): + middleware = PrerenderMiddleware(normal_response) + request = factory.get('/', HTTP_USER_AGENT=BROWSER_UA) + response = middleware(request) + assert response.status_code == 200 + assert response.content == b'original' + + +def test_bot_receives_prerendered_response(): + middleware = PrerenderMiddleware(normal_response) + request = factory.get('/', HTTP_USER_AGENT=BOT_UA) + with patch('urllib.request.urlopen', return_value=mock_urlopen()): + response = middleware(request) + assert response.status_code == 200 + assert PRERENDERED_HTML in response.content.decode() + + +def test_static_asset_with_bot_ua_passes_through(): + middleware = PrerenderMiddleware(normal_response) + request = factory.get('/style.css', HTTP_USER_AGENT=BOT_UA) + response = middleware(request) + assert response.status_code == 200 + assert response.content == b'original' + + +def test_escaped_fragment_triggers_prerender(): + middleware = PrerenderMiddleware(normal_response) + request = factory.get('/', {'_escaped_fragment_': ''}, HTTP_USER_AGENT=BROWSER_UA) + with patch('urllib.request.urlopen', return_value=mock_urlopen()): + response = middleware(request) + assert response.status_code == 200 + assert PRERENDERED_HTML in response.content.decode() + + +def test_network_error_falls_back_to_normal_response(): + middleware = PrerenderMiddleware(normal_response) + request = factory.get('/', HTTP_USER_AGENT=BOT_UA) + with patch('urllib.request.urlopen', side_effect=urllib.error.URLError('network error')): + response = middleware(request) + assert response.status_code == 200 + assert response.content == b'original' From ed0f17371620abdfdbdcc9620a763718f187e1eb Mon Sep 17 00:00:00 2001 From: Laszlo Takacs Date: Tue, 21 Apr 2026 09:18:23 +0200 Subject: [PATCH 2/5] Add X-Prerender-Int-Type header --- prerender_django/middleware.py | 1 + 1 file changed, 1 insertion(+) diff --git a/prerender_django/middleware.py b/prerender_django/middleware.py index a982f1c..9475407 100644 --- a/prerender_django/middleware.py +++ b/prerender_django/middleware.py @@ -64,6 +64,7 @@ def _fetch_prerendered(api_url, user_agent): req.add_header('User-Agent', user_agent) if token: req.add_header('X-Prerender-Token', token) + req.add_header('X-Prerender-Int-Type', 'Django') try: with urllib.request.urlopen(req) as resp: return resp.status, resp.read().decode('utf-8') From 18e529175511f05cb6823368f29becdacb9d57a5 Mon Sep 17 00:00:00 2001 From: Marcin Date: Wed, 13 May 2026 12:02:08 +0100 Subject: [PATCH 3/5] ci: add pull-request test workflow Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/pull-request.yml | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) create mode 100644 .github/workflows/pull-request.yml diff --git a/.github/workflows/pull-request.yml b/.github/workflows/pull-request.yml new file mode 100644 index 0000000..0ffee23 --- /dev/null +++ b/.github/workflows/pull-request.yml @@ -0,0 +1,30 @@ +name: Test + +on: + pull_request: + push: + branches: [main] + +jobs: + test: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + python-version: ['3.10', '3.11', '3.12'] + steps: + - uses: actions/checkout@v4 + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + cache: pip + + - name: Install package with dev extras + run: | + python -m pip install --upgrade pip + pip install -e ".[dev]" + + - name: Run tests + run: pytest From 60d21e89e0631c6309d9695fdff58f3b15592929 Mon Sep 17 00:00:00 2001 From: Marcin Date: Thu, 14 May 2026 09:35:09 +0100 Subject: [PATCH 4/5] feat: send X-Prerender-Int-Version and X-Prerender-Request-Id Lets the backend correlate requests with a specific integration version and uniquely identify each request for support triage. Co-Authored-By: Claude Opus 4.7 (1M context) --- prerender_django/__init__.py | 1 + prerender_django/middleware.py | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/prerender_django/__init__.py b/prerender_django/__init__.py index e69de29..cd7ca49 100644 --- a/prerender_django/__init__.py +++ b/prerender_django/__init__.py @@ -0,0 +1 @@ +__version__ = '1.0.1' diff --git a/prerender_django/middleware.py b/prerender_django/middleware.py index 9475407..18f7dea 100644 --- a/prerender_django/middleware.py +++ b/prerender_django/middleware.py @@ -1,10 +1,13 @@ import logging import urllib.error import urllib.request +import uuid from django.conf import settings from django.http import HttpResponse +from . import __version__ + logger = logging.getLogger(__name__) CRAWLER_USER_AGENTS = [ @@ -65,6 +68,8 @@ def _fetch_prerendered(api_url, user_agent): if token: req.add_header('X-Prerender-Token', token) req.add_header('X-Prerender-Int-Type', 'Django') + req.add_header('X-Prerender-Int-Version', __version__) + req.add_header('X-Prerender-Request-Id', str(uuid.uuid4())) try: with urllib.request.urlopen(req) as resp: return resp.status, resp.read().decode('utf-8') From cfd83ec15204be345e2660242746639b48f286ed Mon Sep 17 00:00:00 2001 From: Marcin Date: Thu, 14 May 2026 16:18:34 +0100 Subject: [PATCH 5/5] test: add contract tests against shared mock server Asserts the outgoing wire-protocol shape (URL, required and optional headers, Int-Type, Int-Version semver, Request-Id UUID format and per-request uniqueness, token omission when unconfigured). CI installs Node alongside Python and fetches mock-server.mjs from prerender/integration-contract before running pytest. Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/pull-request.yml | 8 +++ .gitignore | 1 + tests/conftest.py | 81 ++++++++++++++++++++++++ tests/test_contract.py | 98 ++++++++++++++++++++++++++++++ 4 files changed, 188 insertions(+) create mode 100644 tests/conftest.py create mode 100644 tests/test_contract.py diff --git a/.github/workflows/pull-request.yml b/.github/workflows/pull-request.yml index 0ffee23..be8ce08 100644 --- a/.github/workflows/pull-request.yml +++ b/.github/workflows/pull-request.yml @@ -26,5 +26,13 @@ jobs: python -m pip install --upgrade pip pip install -e ".[dev]" + - name: Setup Node (for contract mock server) + uses: actions/setup-node@v4 + with: + node-version: 20.x + + - name: Fetch contract mock server + run: curl -fsSL -o mock-server.mjs https://raw.githubusercontent.com/prerender/integration-contract/main/mock-server.mjs + - name: Run tests run: pytest diff --git a/.gitignore b/.gitignore index 3c9f148..2974607 100644 --- a/.gitignore +++ b/.gitignore @@ -5,3 +5,4 @@ __pycache__/ dist/ build/ .env +mock-server.mjs diff --git a/tests/conftest.py b/tests/conftest.py new file mode 100644 index 0000000..7e388da --- /dev/null +++ b/tests/conftest.py @@ -0,0 +1,81 @@ +"""Shared pytest fixtures for django integration tests. + +The `mock_server` fixture spawns the prerender integration-contract mock +server (a Node script) for the duration of the test session. CI fetches +mock-server.mjs into the repo root before running tests; locally: + + curl -fsSL -o mock-server.mjs https://raw.githubusercontent.com/prerender/integration-contract/main/mock-server.mjs +""" + +import os +import socket +import subprocess +import time +import urllib.request +from pathlib import Path + +import pytest + +MOCK_SERVER_PATH = Path(os.environ.get( + 'MOCK_SERVER_PATH', + Path(__file__).parent.parent / 'mock-server.mjs', +)) + + +def _free_port(): + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + s.bind(('127.0.0.1', 0)) + return s.getsockname()[1] + + +def _wait_for_health(url, attempts=50, delay=0.1): + for _ in range(attempts): + try: + with urllib.request.urlopen(url, timeout=1) as resp: + if resp.status == 200: + return + except Exception: + pass + time.sleep(delay) + raise RuntimeError(f'mock server at {url} did not become ready') + + +@pytest.fixture(scope='session') +def mock_server(): + if not MOCK_SERVER_PATH.exists(): + pytest.skip( + f'mock-server.mjs not found at {MOCK_SERVER_PATH}; fetch it via ' + 'curl -fsSL -o mock-server.mjs ' + 'https://raw.githubusercontent.com/prerender/integration-contract/main/mock-server.mjs' + ) + + port = _free_port() + proc = subprocess.Popen( + ['node', str(MOCK_SERVER_PATH)], + env={**os.environ, 'PORT': str(port)}, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + url = f'http://127.0.0.1:{port}' + try: + _wait_for_health(f'{url}/__health') + yield url + finally: + proc.terminate() + try: + proc.wait(timeout=5) + except subprocess.TimeoutExpired: + proc.kill() + + +@pytest.fixture(autouse=True) +def reset_mock(mock_server): + req = urllib.request.Request(f'{mock_server}/__reset', method='POST') + urllib.request.urlopen(req).read() + yield + + +def get_recorded(mock_server): + with urllib.request.urlopen(f'{mock_server}/__requests') as resp: + import json + return json.loads(resp.read()) diff --git a/tests/test_contract.py b/tests/test_contract.py new file mode 100644 index 0000000..509ba1d --- /dev/null +++ b/tests/test_contract.py @@ -0,0 +1,98 @@ +"""Contract tests for django integration against the shared mock server. + +Spec: https://github.com/prerender/integration-contract +""" + +import re + +from django.http import HttpResponse +from django.test import RequestFactory, override_settings + +from prerender_django.middleware import PrerenderMiddleware +from .conftest import get_recorded + +BOT_UA = 'Mozilla/5.0 (compatible; Googlebot/2.1)' +BROWSER_UA = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36' +TOKEN = 'test-token-abc123' +UUID_V4 = re.compile( + r'^[0-9a-f]{8}-[0-9a-f]{4}-4[0-9a-f]{3}-[89ab][0-9a-f]{3}-[0-9a-f]{12}$', + re.IGNORECASE, +) + +factory = RequestFactory() + + +def _normal_response(_request): + return HttpResponse('original') + + +def _call(middleware, path, headers=None, query=None): + request = factory.get(path, query or {}, **(headers or {})) + return middleware(request) + + +def test_bot_request_emits_outgoing_request_with_required_headers(mock_server): + with override_settings(PRERENDER_SERVICE_URL=f'{mock_server}/', PRERENDER_TOKEN=TOKEN): + middleware = PrerenderMiddleware(_normal_response) + _call(middleware, '/blog/post-1', headers={'HTTP_USER_AGENT': BOT_UA}) + + recorded = get_recorded(mock_server) + assert len(recorded) == 1 + r = recorded[0] + assert r['method'] == 'GET' + assert r['url'].endswith('/blog/post-1') + assert r['headers']['user-agent'] == BOT_UA + assert r['headers']['x-prerender-token'] == TOKEN + assert r['headers']['x-prerender-int-type'] == 'Django' + assert re.match(r'^\d+\.\d+\.\d+', r['headers']['x-prerender-int-version']) + assert UUID_V4.match(r['headers']['x-prerender-request-id']) + + +def test_browser_request_emits_no_outgoing_request(mock_server): + with override_settings(PRERENDER_SERVICE_URL=f'{mock_server}/', PRERENDER_TOKEN=TOKEN): + middleware = PrerenderMiddleware(_normal_response) + _call(middleware, '/', headers={'HTTP_USER_AGENT': BROWSER_UA}) + + assert get_recorded(mock_server) == [] + + +def test_static_asset_with_bot_ua_emits_no_outgoing_request(mock_server): + with override_settings(PRERENDER_SERVICE_URL=f'{mock_server}/', PRERENDER_TOKEN=TOKEN): + middleware = PrerenderMiddleware(_normal_response) + _call(middleware, '/style.css', headers={'HTTP_USER_AGENT': BOT_UA}) + + assert get_recorded(mock_server) == [] + + +def test_token_omitted_when_unconfigured(mock_server): + with override_settings(PRERENDER_SERVICE_URL=f'{mock_server}/', PRERENDER_TOKEN=None): + middleware = PrerenderMiddleware(_normal_response) + _call(middleware, '/', headers={'HTTP_USER_AGENT': BOT_UA}) + + recorded = get_recorded(mock_server) + assert len(recorded) == 1 + assert 'x-prerender-token' not in recorded[0]['headers'] + + +def test_escaped_fragment_triggers_prerender_for_browser_ua(mock_server): + with override_settings(PRERENDER_SERVICE_URL=f'{mock_server}/', PRERENDER_TOKEN=TOKEN): + middleware = PrerenderMiddleware(_normal_response) + _call(middleware, '/', query={'_escaped_fragment_': ''}, headers={'HTTP_USER_AGENT': BROWSER_UA}) + + recorded = get_recorded(mock_server) + assert len(recorded) == 1 + assert '_escaped_fragment_' in recorded[0]['url'] + + +def test_request_id_is_unique_per_outgoing_request(mock_server): + with override_settings(PRERENDER_SERVICE_URL=f'{mock_server}/', PRERENDER_TOKEN=TOKEN): + middleware = PrerenderMiddleware(_normal_response) + _call(middleware, '/', headers={'HTTP_USER_AGENT': BOT_UA}) + _call(middleware, '/', headers={'HTTP_USER_AGENT': BOT_UA}) + + recorded = get_recorded(mock_server) + assert len(recorded) == 2 + assert ( + recorded[0]['headers']['x-prerender-request-id'] + != recorded[1]['headers']['x-prerender-request-id'] + )