diff --git a/.github/workflows/pull-request.yml b/.github/workflows/pull-request.yml new file mode 100644 index 0000000..1ea2861 --- /dev/null +++ b/.github/workflows/pull-request.yml @@ -0,0 +1,48 @@ +name: Test + +on: + pull_request: + push: + branches: [main] + +jobs: + test: + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + php-version: ['8.2', '8.3', '8.4'] + steps: + - uses: actions/checkout@v4 + + - name: Setup PHP + uses: shivammathur/setup-php@v2 + with: + php-version: ${{ matrix.php-version }} + coverage: none + tools: composer:v2 + + - name: Get composer cache directory + id: composer-cache + run: echo "dir=$(composer config cache-files-dir)" >> "$GITHUB_OUTPUT" + + - name: Cache composer dependencies + uses: actions/cache@v4 + with: + path: ${{ steps.composer-cache.outputs.dir }} + key: ${{ runner.os }}-php-${{ matrix.php-version }}-${{ hashFiles('**/composer.json') }} + restore-keys: ${{ runner.os }}-php-${{ matrix.php-version }}- + + - name: Install dependencies + run: composer install --prefer-dist --no-progress --no-interaction + + - name: Setup Node (for contract mock server) + uses: actions/setup-node@v4 + with: + node-version: 20.x + + - name: Fetch contract mock server + run: curl -fsSL -o mock-server.mjs https://raw.githubusercontent.com/prerender/integration-contract/main/mock-server.mjs + + - name: Run tests + run: ./vendor/bin/pest diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..8f1efee --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +/vendor/ +.phpunit.cache/ +.phpunit.result.cache +composer.lock +mock-server.mjs diff --git a/README.md b/README.md new file mode 100644 index 0000000..c12500f --- /dev/null +++ b/README.md @@ -0,0 +1,71 @@ +# prerender-laravel + +Laravel middleware for [Prerender.io](https://prerender.io). Intercepts requests from bots and crawlers and serves prerendered HTML, so your JavaScript-rendered app is fully indexable by search engines and social media scrapers. + +Compatible with **Laravel 11+** and **PHP 8.2+**. + +## Installation + +```bash +composer require prerender/laravel-prerender +``` + +Publish the config file: + +```bash +php artisan vendor:publish --tag=prerender-config +``` + +## Setup + +Add your token to `.env`: + +```env +PRERENDER_TOKEN=your-token +``` + +The middleware registers itself automatically via the service provider. + +## Configuration + +| Key | Env var | Default | Description | +|-----|---------|---------|-------------| +| `enable` | `PRERENDER_ENABLE` | `true` | Disable entirely (e.g. local dev) | +| `prerender_url` | `PRERENDER_SERVICE_URL` | `https://service.prerender.io` | Service URL (override for self-hosted) | +| `prerender_token` | `PRERENDER_TOKEN` | `null` | Your Prerender.io token | +| `prerender_soft_http_codes` | `PRERENDER_SOFT_HTTP_STATUS_CODES` | `true` | Pass 3xx/404 codes through as-is | +| `full_url` | `PRERENDER_FULL_URL` | `false` | Send full URL including query string | +| `timeout` | `PRERENDER_TIMEOUT` | `0` | Guzzle timeout in seconds (0 = none) | + +### Whitelist / Blacklist + +Only prerender URLs matching the whitelist (empty = all URLs pass): + +```php +'whitelist' => ['/blog/*', '/product/*'], +``` + +Never prerender URLs matching the blacklist (static assets are blacklisted by default): + +```php +'blacklist' => ['*.js', '*.css', '/admin/*'], +``` + +Patterns support `*` wildcards. + +## How it works + +Requests are prerendered when **all** of the following are true: + +- The HTTP method is `GET` +- The `User-Agent` matches a known bot/crawler (Googlebot, Bingbot, Twitterbot, GPTBot, ClaudeBot, etc.) + — OR the URL contains `_escaped_fragment_` + — OR the `X-BUFFERBOT` header is present +- The URI is not blacklisted (static assets are excluded by default) +- The URI matches the whitelist (if configured) + +If the Prerender service is unreachable, the middleware falls back gracefully. + +## License + +MIT diff --git a/composer.json b/composer.json new file mode 100644 index 0000000..67fc50f --- /dev/null +++ b/composer.json @@ -0,0 +1,49 @@ +{ + "name": "prerender/laravel-prerender", + "description": "Laravel middleware for prerendering JavaScript-rendered pages via Prerender.io", + "keywords": ["laravel", "prerender", "prerender.io", "seo", "middleware"], + "homepage": "https://github.com/prerender/integrations", + "license": "MIT", + "authors": [ + { + "name": "Prerender.io", + "homepage": "https://prerender.io" + } + ], + "require": { + "php": "^8.2", + "guzzlehttp/guzzle": "^7.8", + "illuminate/contracts": "^11.0|^12.0", + "symfony/psr-http-message-bridge": "^7.0" + }, + "require-dev": { + "orchestra/testbench": "^9.0|^10.0", + "pestphp/pest": "^3.0", + "pestphp/pest-plugin-laravel": "^3.0" + }, + "autoload": { + "psr-4": { + "Prerender\\Laravel\\": "src/" + } + }, + "autoload-dev": { + "psr-4": { + "Prerender\\Laravel\\Tests\\": "tests/" + } + }, + "extra": { + "laravel": { + "providers": [ + "Prerender\\Laravel\\LaravelPrerenderServiceProvider" + ] + } + }, + "config": { + "sort-packages": true, + "allow-plugins": { + "pestphp/pest-plugin": true + } + }, + "minimum-stability": "dev", + "prefer-stable": true +} diff --git a/config/prerender.php b/config/prerender.php new file mode 100644 index 0000000..b303095 --- /dev/null +++ b/config/prerender.php @@ -0,0 +1,30 @@ + env('PRERENDER_ENABLE', true), + 'prerender_url' => env('PRERENDER_SERVICE_URL', 'https://service.prerender.io'), + 'prerender_token' => env('PRERENDER_TOKEN'), + 'prerender_soft_http_codes' => env('PRERENDER_SOFT_HTTP_STATUS_CODES', true), + 'full_url' => env('PRERENDER_FULL_URL', false), + 'timeout' => env('PRERENDER_TIMEOUT', 0), + + 'whitelist' => [], + + 'blacklist' => [ + '*.js', '*.css', '*.xml', '*.less', '*.png', '*.jpg', '*.jpeg', + '*.gif', '*.pdf', '*.doc', '*.txt', '*.ico', '*.rss', '*.zip', + '*.mp3', '*.rar', '*.exe', '*.wmv', '*.avi', '*.ppt', '*.mpg', + '*.mpeg', '*.tif', '*.wav', '*.mov', '*.psd', '*.ai', '*.xls', + '*.mp4', '*.m4a', '*.swf', '*.dat', '*.dmg', '*.iso', '*.flv', + '*.m4v', '*.torrent', '*.ttf', '*.woff', '*.woff2', '*.svg', + ], + + 'crawler_user_agents' => [ + 'googlebot', 'yahoo', 'bingbot', 'baiduspider', 'yandex', + 'facebookexternalhit', 'twitterbot', 'rogerbot', 'linkedinbot', + 'embedly', 'quora link preview', 'showyoubot', 'outbrain', + 'pinterest', 'slackbot', 'w3c_validator', 'redditbot', 'applebot', + 'discordbot', 'perplexity', 'oai-searchbot', 'chatgpt-user', + 'gptbot', 'claudebot', 'amazonbot', + ], +]; diff --git a/phpunit.xml.dist b/phpunit.xml.dist new file mode 100644 index 0000000..43aac90 --- /dev/null +++ b/phpunit.xml.dist @@ -0,0 +1,19 @@ + + + + + tests + + + + + src + + + diff --git a/src/LaravelPrerenderServiceProvider.php b/src/LaravelPrerenderServiceProvider.php new file mode 100644 index 0000000..bd886a7 --- /dev/null +++ b/src/LaravelPrerenderServiceProvider.php @@ -0,0 +1,38 @@ +publishes([ + __DIR__ . '/../config/prerender.php' => config_path('prerender.php'), + ], 'prerender-config'); + + if (! config('prerender.enable', true)) { + return; + } + + $this->app->make(Kernel::class)->pushMiddleware(PrerenderMiddleware::class); + } + + public function register(): void + { + $this->mergeConfigFrom(__DIR__ . '/../config/prerender.php', 'prerender'); + + $this->app->when(PrerenderMiddleware::class) + ->needs(Client::class) + ->give(function () { + $options = ['timeout' => config('prerender.timeout', 0)]; + if (! config('prerender.prerender_soft_http_codes', true)) { + $options['allow_redirects'] = false; + } + return new Client($options); + }); + } +} diff --git a/src/PrerenderMiddleware.php b/src/PrerenderMiddleware.php new file mode 100644 index 0000000..6913eb0 --- /dev/null +++ b/src/PrerenderMiddleware.php @@ -0,0 +1,129 @@ +prerenderUrl = $config['prerender_url']; + $this->prerenderToken = $config['prerender_token'] ?: null; + $this->returnSoftHttpCodes = (bool) $config['prerender_soft_http_codes']; + $this->useFullURL = (bool) $config['full_url']; + $this->crawlerUserAgents = $config['crawler_user_agents']; + $this->whitelist = $config['whitelist']; + $this->blacklist = $config['blacklist']; + } + + public function handle(Request $request, Closure $next): mixed + { + if (! $this->shouldShowPrerenderedPage($request)) { + return $next($request); + } + + $prerenderResponse = $this->getPrerenderedPageResponse($request); + + if (! $prerenderResponse) { + return $next($request); + } + + $statusCode = $prerenderResponse->getStatusCode(); + + if (! $this->returnSoftHttpCodes && $statusCode >= 300 && $statusCode < 400) { + $location = array_change_key_case($prerenderResponse->getHeaders(), CASE_LOWER)['location'][0] ?? '/'; + return redirect($location, $statusCode); + } + + return (new HttpFoundationFactory)->createResponse($prerenderResponse); + } + + private function shouldShowPrerenderedPage(Request $request): bool + { + if (! $request->isMethod('GET')) return false; + + $userAgent = strtolower($request->server->get('HTTP_USER_AGENT', '')); + if (empty($userAgent)) return false; + + if (! $this->isEligibleForPrerender($request, $userAgent)) return false; + + if ($this->whitelist && ! $this->isListed($request->getRequestUri(), $this->whitelist)) { + return false; + } + + $uris = array_values(array_filter([$request->getRequestUri(), $request->headers->get('Referer')])); + if ($this->blacklist && $this->isListed($uris, $this->blacklist)) return false; + + return true; + } + + private function isEligibleForPrerender(Request $request, string $userAgent): bool + { + if ($request->query->has('_escaped_fragment_')) return true; + if ($request->server->get('X-BUFFERBOT')) return true; + return collect($this->crawlerUserAgents) + ->contains(fn ($agent) => Str::contains($userAgent, strtolower($agent))); + } + + private function getPrerenderedPageResponse(Request $request): ?ResponseInterface + { + $headers = ['User-Agent' => $request->server->get('HTTP_USER_AGENT')]; + if ($this->prerenderToken) { + $headers['X-Prerender-Token'] = $this->prerenderToken; + } + $headers['X-Prerender-Int-Type'] = 'Laravel'; + $headers['X-Prerender-Int-Version'] = self::VERSION; + $headers['X-Prerender-Request-Id'] = (string) Str::uuid(); + + try { + return $this->client->get($this->buildApiUrl($request), compact('headers')); + } catch (RequestException $e) { + if (! $this->returnSoftHttpCodes && $e->getResponse()?->getStatusCode() === 404) { + abort(404); + } + return null; + } catch (ConnectException) { + return null; + } + } + + private function buildApiUrl(Request $request): string + { + return rtrim($this->prerenderUrl, '/') . '/' . $this->generatePrerenderUrl($request); + } + + private function generatePrerenderUrl(Request $request): string + { + if ($this->useFullURL) { + return $request->fullUrl(); + } + return $request->getScheme() . '://' . $request->getHost() . $request->getRequestUri(); + } + + private function isListed(string|array $needles, array $list): bool + { + return collect($list)->contains( + fn ($pattern) => collect((array) $needles)->contains(fn ($needle) => Str::is($pattern, $needle)) + ); + } +} diff --git a/tests/Feature/ContractTest.php b/tests/Feature/ContractTest.php new file mode 100644 index 0000000..639a0c9 --- /dev/null +++ b/tests/Feature/ContractTest.php @@ -0,0 +1,148 @@ + ['timeout' => 1, 'ignore_errors' => true]]); + $body = @file_get_contents("$url/__health", false, $ctx); + if ($body !== false) return; + usleep(100000); + } + throw new RuntimeException("mock server at $url did not become ready"); +} + +function mockRecorded(string $url): array +{ + return json_decode(file_get_contents("$url/__requests"), true); +} + +function mockReset(string $url): void +{ + $ctx = stream_context_create(['http' => ['method' => 'POST', 'ignore_errors' => true]]); + file_get_contents("$url/__reset", false, $ctx); +} + +beforeAll(function () { + $mockPath = getenv('MOCK_SERVER_PATH') ?: dirname(__DIR__, 2) . '/mock-server.mjs'; + if (!file_exists($mockPath)) { + throw new RuntimeException( + "mock-server.mjs not found at $mockPath; fetch it via curl from prerender/integration-contract" + ); + } + $port = findFreePort(); + $cmd = sprintf('PORT=%d node %s > /dev/null 2>&1 & echo $!', $port, escapeshellarg($mockPath)); + $pid = (int) trim(shell_exec($cmd)); + $url = "http://127.0.0.1:$port"; + waitForHealth($url); + $GLOBALS['__mock_pid'] = $pid; + $GLOBALS['__mock_url'] = $url; +}); + +afterAll(function () { + if (!empty($GLOBALS['__mock_pid'])) { + posix_kill($GLOBALS['__mock_pid'], SIGTERM); + } +}); + +beforeEach(function () { + mockReset($GLOBALS['__mock_url']); + config([ + 'prerender.prerender_url' => $GLOBALS['__mock_url'], + 'prerender.prerender_token' => TEST_TOKEN, + 'prerender.prerender_soft_http_codes' => true, + 'prerender.full_url' => false, + 'prerender.timeout' => 0, + 'prerender.whitelist' => [], + 'prerender.blacklist' => ['*.js', '*.css', '*.png'], + 'prerender.crawler_user_agents' => ['googlebot', 'bingbot'], + ]); +}); + +function buildMiddleware(): PrerenderMiddleware +{ + return new PrerenderMiddleware(new Client()); +} + +function botRequest(string $path = '/'): Request +{ + return Request::create($path, 'GET', [], [], [], ['HTTP_USER_AGENT' => CONTRACT_BOT_UA]); +} + +it('bot request emits exactly one outgoing request with required headers', function () { + buildMiddleware()->handle(botRequest('/blog/post-1'), fn () => response('original')); + + $recorded = mockRecorded($GLOBALS['__mock_url']); + expect($recorded)->toHaveCount(1); + $r = $recorded[0]; + expect($r['method'])->toBe('GET'); + expect($r['url'])->toEndWith('/blog/post-1'); + expect($r['headers']['user-agent'])->toBe(CONTRACT_BOT_UA); + expect($r['headers']['x-prerender-token'])->toBe(TEST_TOKEN); + expect($r['headers']['x-prerender-int-type'])->toBe('Laravel'); + expect($r['headers']['x-prerender-int-version'])->toMatch('/^\d+\.\d+\.\d+/'); + expect($r['headers']['x-prerender-request-id'])->toMatch(UUID_V4_REGEX); +}); + +it('browser request emits no outgoing request', function () { + $req = Request::create('/', 'GET', [], [], [], ['HTTP_USER_AGENT' => CONTRACT_BROWSER_UA]); + buildMiddleware()->handle($req, fn () => response('original')); + + expect(mockRecorded($GLOBALS['__mock_url']))->toBeEmpty(); +}); + +it('static asset with bot UA emits no outgoing request', function () { + buildMiddleware()->handle(botRequest('/styles.css'), fn () => response('original')); + + expect(mockRecorded($GLOBALS['__mock_url']))->toBeEmpty(); +}); + +it('token is omitted when unconfigured', function () { + config(['prerender.prerender_token' => null]); + buildMiddleware()->handle(botRequest('/'), fn () => response('original')); + + $recorded = mockRecorded($GLOBALS['__mock_url']); + expect($recorded)->toHaveCount(1); + expect($recorded[0]['headers'])->not->toHaveKey('x-prerender-token'); +}); + +it('escaped_fragment query triggers prerender for browser UA', function () { + $req = Request::create('/?_escaped_fragment_=', 'GET', [], [], [], ['HTTP_USER_AGENT' => CONTRACT_BROWSER_UA]); + buildMiddleware()->handle($req, fn () => response('original')); + + $recorded = mockRecorded($GLOBALS['__mock_url']); + expect($recorded)->toHaveCount(1); + expect($recorded[0]['url'])->toContain('_escaped_fragment_'); +}); + +it('request id is unique per outgoing request', function () { + buildMiddleware()->handle(botRequest('/'), fn () => response('original')); + buildMiddleware()->handle(botRequest('/'), fn () => response('original')); + + $recorded = mockRecorded($GLOBALS['__mock_url']); + expect($recorded)->toHaveCount(2); + expect($recorded[0]['headers']['x-prerender-request-id']) + ->not->toBe($recorded[1]['headers']['x-prerender-request-id']); +}); diff --git a/tests/Feature/PrerenderMiddlewareTest.php b/tests/Feature/PrerenderMiddlewareTest.php new file mode 100644 index 0000000..fa89c64 --- /dev/null +++ b/tests/Feature/PrerenderMiddlewareTest.php @@ -0,0 +1,99 @@ +prerendered'; + +function makeMiddleware(array $guzzleResponses = []): PrerenderMiddleware +{ + config([ + 'prerender.prerender_url' => 'https://service.prerender.io', + 'prerender.prerender_token' => null, + 'prerender.prerender_soft_http_codes' => true, + 'prerender.full_url' => false, + 'prerender.timeout' => 0, + 'prerender.whitelist' => [], + 'prerender.blacklist' => ['*.js', '*.css', '*.png'], + 'prerender.crawler_user_agents' => ['googlebot', 'bingbot', 'twitterbot'], + ]); + + $client = new Client(['handler' => HandlerStack::create(new MockHandler($guzzleResponses))]); + return new PrerenderMiddleware($client); +} + +it('passes browser requests through', function () { + $middleware = makeMiddleware(); + $request = Request::create('/', 'GET', [], [], [], ['HTTP_USER_AGENT' => BROWSER_UA]); + + $response = $middleware->handle($request, fn () => response('normal response')); + + expect($response->getContent())->toBe('normal response'); +}); + +it('returns prerendered response for bot UA', function () { + $middleware = makeMiddleware([new Response(200, [], PRERENDERED_HTML)]); + $request = Request::create('/about', 'GET', [], [], [], ['HTTP_USER_AGENT' => BOT_UA]); + + $response = $middleware->handle($request, fn () => response('normal response')); + + expect($response->getContent())->toBe(PRERENDERED_HTML); + expect($response->getStatusCode())->toBe(200); +}); + +it('passes static assets through even with bot UA', function () { + $middleware = makeMiddleware(); + $request = Request::create('/app.js', 'GET', [], [], [], ['HTTP_USER_AGENT' => BOT_UA]); + + $response = $middleware->handle($request, fn () => response('normal response')); + + expect($response->getContent())->toBe('normal response'); +}); + +it('prerenders when _escaped_fragment_ is present', function () { + $middleware = makeMiddleware([new Response(200, [], PRERENDERED_HTML)]); + $request = Request::create('/?_escaped_fragment_=', 'GET', [], [], [], ['HTTP_USER_AGENT' => BROWSER_UA]); + + $response = $middleware->handle($request, fn () => response('normal response')); + + expect($response->getContent())->toBe(PRERENDERED_HTML); +}); + +it('prerenders when X-BUFFERBOT header is present', function () { + $middleware = makeMiddleware([new Response(200, [], PRERENDERED_HTML)]); + $request = Request::create('/', 'GET', [], [], [], [ + 'HTTP_USER_AGENT' => BROWSER_UA, + 'X-BUFFERBOT' => 'true', + ]); + + $response = $middleware->handle($request, fn () => response('normal response')); + + expect($response->getContent())->toBe(PRERENDERED_HTML); +}); + +it('passes POST requests through', function () { + $middleware = makeMiddleware(); + $request = Request::create('/', 'POST', [], [], [], ['HTTP_USER_AGENT' => BOT_UA]); + + $response = $middleware->handle($request, fn () => response('normal response')); + + expect($response->getContent())->toBe('normal response'); +}); + +it('falls back gracefully on connection error', function () { + $error = new ConnectException('connection refused', new GuzzleRequest('GET', '/')); + $middleware = makeMiddleware([$error]); + $request = Request::create('/', 'GET', [], [], [], ['HTTP_USER_AGENT' => BOT_UA]); + + $response = $middleware->handle($request, fn () => response('normal response')); + + expect($response->getContent())->toBe('normal response'); +}); diff --git a/tests/Pest.php b/tests/Pest.php new file mode 100644 index 0000000..102f0d1 --- /dev/null +++ b/tests/Pest.php @@ -0,0 +1,3 @@ +in('Feature'); diff --git a/tests/TestCase.php b/tests/TestCase.php new file mode 100644 index 0000000..5625d32 --- /dev/null +++ b/tests/TestCase.php @@ -0,0 +1,14 @@ +