diff --git a/backend/src/Extraction/TextStack.Extraction/Extractors/Pdf/PdfPageTextExtractor.cs b/backend/src/Extraction/TextStack.Extraction/Extractors/Pdf/PdfPageTextExtractor.cs
index 5c87ddba..9ccb4de5 100644
--- a/backend/src/Extraction/TextStack.Extraction/Extractors/Pdf/PdfPageTextExtractor.cs
+++ b/backend/src/Extraction/TextStack.Extraction/Extractors/Pdf/PdfPageTextExtractor.cs
@@ -28,6 +28,18 @@ public static class PdfPageTextExtractor
 
     private static readonly Regex PageNumberPattern = new(@"^\d{1,4}$", RegexOptions.Compiled);
 
+    // Running headers from O'Reilly-style tech books take the shape
+    //   "4 | Chapter 1: Introduction to Building AI Applications…"
+    //   "The Rise of AI Engineering | 3"
+    // The page number varies per page, so the cross-page (identical-text)
+    // filter in PdfTextExtractor can't catch them — but the structural
+    // signature (small int + " | " + text, on a short paragraph) is
+    // distinctive. Encoded here from a Claude cleanup pair (slice 5 r1).
+    private const int RunningHeaderMaxLength = 200;
+    private static readonly Regex RunningHeaderLike = new(
+        @"^(?:\d{1,4}\s*\|\s*\S.+|\S.+\s*\|\s*\d{1,4})$",
+        RegexOptions.Compiled);
+
     public static List<PdfTextElement> ExtractPage(Page page)
     {
         var words = page.GetWords(NearestNeighbourWordExtractor.Instance).ToList();
@@ -138,15 +150,17 @@ private static bool EndsWithSoftHyphen(string text)
     }
 
     /// <summary>
-    /// True for short fragments that are page numbers, single dividers, or pure
-    /// punctuation noise that belong to header/footer chrome, not body.
+    /// True for short fragments that are page numbers, single dividers, pure
+    /// punctuation noise, or O'Reilly-style running headers — all chrome that
+    /// belongs at the page margin, not in the body.
     /// </summary>
-    private static bool IsArtifactNoise(string text)
+    internal static bool IsArtifactNoise(string text)
     {
         var trimmed = text.Trim();
         if (trimmed.Length == 0) return true;
         if (trimmed.Length <= 2 && NoisePunctuation.Contains(trimmed)) return true;
         if (PageNumberPattern.IsMatch(trimmed)) return true;
+        if (trimmed.Length <= RunningHeaderMaxLength && RunningHeaderLike.IsMatch(trimmed)) return true;
         return false;
     }
 
diff --git a/backend/src/Extraction/TextStack.Extraction/TextStack.Extraction.csproj b/backend/src/Extraction/TextStack.Extraction/TextStack.Extraction.csproj
index 351c58c6..41db529f 100644
--- a/backend/src/Extraction/TextStack.Extraction/TextStack.Extraction.csproj
+++ b/backend/src/Extraction/TextStack.Extraction/TextStack.Extraction.csproj
@@ -10,6 +10,10 @@
     <PackageReference Include="PDFtoImage" />
   </ItemGroup>
 
+  <ItemGroup>
+    <InternalsVisibleTo Include="TextStack.Extraction.Tests" />
+  </ItemGroup>
+
   <ItemGroup>
     <EmbeddedResource Include="TextProcessing\Data\words.txt" />
     <EmbeddedResource Include="TextProcessing\Data\spellings.json" />
diff --git a/tests/TextStack.Extraction.Tests/RunningHeaderFilterTests.cs b/tests/TextStack.Extraction.Tests/RunningHeaderFilterTests.cs
new file mode 100644
index 00000000..b9b7209c
--- /dev/null
+++ b/tests/TextStack.Extraction.Tests/RunningHeaderFilterTests.cs
@@ -0,0 +1,51 @@
+using TextStack.Extraction.Extractors.Pdf;
+
+namespace TextStack.Extraction.Tests;
+
+/// <summary>
+/// Ratchet round 1 (feat-0007 slice 5). Encodes recurring fix patterns
+/// observed in Claude cleanup pairs into deterministic filters.
+/// </summary>
+public class RunningHeaderFilterTests
+{
+    [Theory]
+    // O'Reilly running headers — page number on either side of " | ".
+    [InlineData("4 | Chapter 1: Introduction to Building AI Applications with Foundation Models")]
+    [InlineData("2 | Chapter 1: Introduction to Building AI Applications")]
+    [InlineData("The Rise of AI Engineering | 3")]
+    [InlineData("The Rise of AI Engineering | 5")]
+    [InlineData("Foundation Model Use Cases | 17")]
+    public void IsArtifactNoise_RunningHeaderWithPipeAndPageNumber_Filtered(string text)
+    {
+        Assert.True(PdfPageTextExtractor.IsArtifactNoise(text));
+    }
+
+    [Theory]
+    // Earlier defects the filter already caught — confirm regression-free.
+    [InlineData("4")]            // bare page number
+    [InlineData("|")]            // divider glyph
+    [InlineData("")]             // empty
+    public void IsArtifactNoise_LegacyArtifacts_StillFiltered(string text)
+    {
+        Assert.True(PdfPageTextExtractor.IsArtifactNoise(text));
+    }
+
+    [Theory]
+    // Real body content that happens to contain digits or pipes — must NOT match.
+    [InlineData("Foundation models emerged from large language models, which in turn originated as language models.")]
+    [InlineData("The Mixtral 8x7B model has a vocabulary size of 32,000.")]
+    [InlineData("Section 1.1 covers the basics — see also chapter 4 for details.")]
+    [InlineData("GPT-4 was released in March 2023.")]
+    public void IsArtifactNoise_BodyProse_NotFiltered(string text)
+    {
+        Assert.False(PdfPageTextExtractor.IsArtifactNoise(text));
+    }
+
+    [Fact]
+    public void IsArtifactNoise_LongRunningHeaderLike_NotFiltered()
+    {
+        // > 200 chars — even with the running-header signature, too long to be chrome.
+        var text = "9 | " + new string('a', 250);
+        Assert.False(PdfPageTextExtractor.IsArtifactNoise(text));
+    }
+}