diff --git a/src/main/java/org/openrewrite/prethink/ExportContext.java b/src/main/java/org/openrewrite/prethink/ExportContext.java index cf500fc..7c59ea2 100644 --- a/src/main/java/org/openrewrite/prethink/ExportContext.java +++ b/src/main/java/org/openrewrite/prethink/ExportContext.java @@ -79,14 +79,66 @@ public boolean causesAnotherCycle() { return true; } - @Value public static class Accumulator { - Set existingContextPaths; + private final Set existingContextPaths = new HashSet<>(); + + public Set getExistingContextPaths() { + return existingContextPaths; + } + + // Aggregated + rendered exactly once in the export phase (cycle >= 2), then + // reused for every file visit and any later cycle. Producers only write data + // tables in cycle <= 1, so the tables are frozen by then and caching is safe. + // `csvByFilename != null` is the "already rendered" sentinel. + @Nullable + volatile Map csvByFilename; + @Nullable + volatile String markdown; } @Override public Accumulator getInitialValue(ExecutionContext ctx) { - return new Accumulator(new HashSet<>()); + return new Accumulator(); + } + + /** + * Aggregate the referenced data tables and render their CSV + markdown output + * exactly once per run; subsequent calls are no-ops. This replaces re-reading + * every table from disk in generate(), again in getVisitor() once per visited + * context file, and again in the forced extra cycle. + */ + private void renderOnce(Accumulator acc, ExecutionContext ctx) { + if (acc.csvByFilename != null) { + return; + } + synchronized (acc) { + if (acc.csvByFilename != null) { + return; + } + DataTableStore store = DataTableExecutionContextView.view(ctx).getDataTableStore(); + Map> tablesByFqn = new LinkedHashMap<>(); + Map> rowsByFqn = new LinkedHashMap<>(); + aggregateMatchingTables(store, tablesByFqn, rowsByFqn); + + Map rendered = new LinkedHashMap<>(); + List exportedTables = new ArrayList<>(); + for (Map.Entry> entry : tablesByFqn.entrySet()) { + String tableFqn = entry.getKey(); + DataTable table = entry.getValue(); + List rows = rowsByFqn.getOrDefault(tableFqn, emptyList()); + rendered.put(tableToFilename(tableFqn), exportToCsv(table, rows)); + exportedTables.add(new DataTableInfo( + table.getDisplayName(), + table.getDescription(), + tableToFilename(tableFqn), + getColumnInfo(table, rows) + )); + } + acc.markdown = exportedTables.isEmpty() ? null : generateMarkdown(exportedTables); + // Publish last: readers that observe a non-null map also observe `markdown` + // and the fully-built map contents (volatile happens-before). + acc.csvByFilename = rendered; + } } @Override @@ -114,63 +166,35 @@ public Collection generate(Accumulator acc, ExecutionContext ctx) { return emptyList(); } - List contextFiles = new ArrayList<>(); - - // Access DataTableStore here - after preceding recipes have populated it - DataTableStore store = DataTableExecutionContextView.view(ctx).getDataTableStore(); - - // Aggregate rows from all instances of the same DataTable class, - // since multiple recipes may populate the same data table type - Map> tablesByFqn = new LinkedHashMap<>(); - Map> rowsByFqn = new LinkedHashMap<>(); - aggregateMatchingTables(store, tablesByFqn, rowsByFqn); + // Aggregate + render exactly once; reused by getVisitor() and any later cycle. + renderOnce(acc, ctx); - if (tablesByFqn.isEmpty()) { + List contextFiles = new ArrayList<>(); + Map csvByFilename = acc.csvByFilename; + if (csvByFilename == null || csvByFilename.isEmpty()) { return contextFiles; } - // Collect the data tables we're exporting for the markdown file - List exportedTables = new ArrayList<>(); - - for (Map.Entry> tableEntry : tablesByFqn.entrySet()) { - String tableFqn = tableEntry.getKey(); - DataTable table = tableEntry.getValue(); - List rows = rowsByFqn.getOrDefault(tableFqn, emptyList()); - - String filename = tableToFilename(tableFqn); - String csvContent = exportToCsv(table, rows); - Path filePath = CONTEXT_DIR.resolve(filename); - - // Collect table info for markdown - exportedTables.add(new DataTableInfo( - table.getDisplayName(), - table.getDescription(), - filename, - getColumnInfo(table, rows) - )); - + for (Map.Entry entry : csvByFilename.entrySet()) { + Path filePath = CONTEXT_DIR.resolve(entry.getKey()); // Only generate if file doesn't already exist if (!acc.getExistingContextPaths().contains(filePath)) { - PlainText csvFile = PlainText.builder() - .text(csvContent) + contextFiles.add(PlainText.builder() + .text(entry.getValue()) .sourcePath(filePath) - .build(); - contextFiles.add(csvFile); + .build()); } } // Generate the markdown description file - if (!exportedTables.isEmpty()) { - String mdFilename = toKebabCase(displayName) + ".md"; - Path mdPath = CONTEXT_DIR.resolve(mdFilename); - + String markdown = acc.markdown; + if (markdown != null) { + Path mdPath = CONTEXT_DIR.resolve(toKebabCase(displayName) + ".md"); if (!acc.getExistingContextPaths().contains(mdPath)) { - String mdContent = generateMarkdown(exportedTables); - PlainText mdFile = PlainText.builder() - .text(mdContent) + contextFiles.add(PlainText.builder() + .text(markdown) .sourcePath(mdPath) - .build(); - contextFiles.add(mdFile); + .build()); } } @@ -194,42 +218,22 @@ public TreeVisitor getVisitor(Accumulator acc) { Path path = pt.getSourcePath(); if (path.startsWith(CONTEXT_DIR)) { + // Reuse the once-rendered output instead of re-aggregating per file. + renderOnce(acc, ctx); String filename = path.getFileName().toString(); // Update CSV files if (filename.endsWith(".csv")) { - String newContent = getCsvContentForFile(filename, ctx); + Map csvByFilename = acc.csvByFilename; + String newContent = csvByFilename == null ? null : csvByFilename.get(filename); if (newContent != null && !newContent.equals(pt.getText())) { return pt.withText(newContent); } - } - - // Update markdown file - String expectedMdFilename = toKebabCase(displayName) + ".md"; - if (filename.equals(expectedMdFilename)) { - DataTableStore store2 = DataTableExecutionContextView.view(ctx).getDataTableStore(); - { - Map> tablesByFqn = new LinkedHashMap<>(); - Map> rowsByFqn = new LinkedHashMap<>(); - aggregateMatchingTables(store2, tablesByFqn, rowsByFqn); - List exportedTables = new ArrayList<>(); - for (Map.Entry> tableEntry : tablesByFqn.entrySet()) { - String tableFqn = tableEntry.getKey(); - DataTable table = tableEntry.getValue(); - List rows = rowsByFqn.getOrDefault(tableFqn, emptyList()); - exportedTables.add(new DataTableInfo( - table.getDisplayName(), - table.getDescription(), - tableToFilename(tableFqn), - getColumnInfo(table, rows) - )); - } - if (!exportedTables.isEmpty()) { - String newContent = generateMarkdown(exportedTables); - if (!newContent.equals(pt.getText())) { - return pt.withText(newContent); - } - } + } else if (filename.equals(toKebabCase(displayName) + ".md")) { + // Update markdown file + String markdown = acc.markdown; + if (markdown != null && !markdown.equals(pt.getText())) { + return pt.withText(markdown); } } } @@ -304,23 +308,6 @@ private List getColumnInfo(DataTable table, List rows) { return columns; } - private @Nullable String getCsvContentForFile(String filename, ExecutionContext ctx) { - DataTableStore store = DataTableExecutionContextView.view(ctx).getDataTableStore(); - - Map> tablesByFqn = new LinkedHashMap<>(); - Map> rowsByFqn = new LinkedHashMap<>(); - aggregateMatchingTables(store, tablesByFqn, rowsByFqn); - - for (Map.Entry> entry : tablesByFqn.entrySet()) { - String expectedFilename = tableToFilename(entry.getKey()); - if (expectedFilename.equals(filename)) { - return exportToCsv(entry.getValue(), - rowsByFqn.getOrDefault(entry.getKey(), emptyList())); - } - } - return null; - } - /** * Aggregate rows from all DataTable instances of the same class into a single list per class. * When multiple recipes produce the same DataTable type (e.g., FindNodeTestCoverage and diff --git a/src/test/java/org/openrewrite/prethink/ExportContextTest.java b/src/test/java/org/openrewrite/prethink/ExportContextTest.java index e8b5de7..41a94cd 100644 --- a/src/test/java/org/openrewrite/prethink/ExportContextTest.java +++ b/src/test/java/org/openrewrite/prethink/ExportContextTest.java @@ -24,7 +24,10 @@ import org.openrewrite.test.RewriteTest; import org.openrewrite.text.PlainText; +import java.util.Collection; import java.util.List; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.Stream; import static org.assertj.core.api.Assertions.assertThat; import static org.openrewrite.test.SourceSpecs.text; @@ -330,4 +333,78 @@ void aggregatesRowsFromMultipleInstancesOfSameDataTable() { ) ); } + + @Test + void aggregatesEachReferencedTableExactlyOncePerRun() { + // Regression for the per-context re-read blow-up: ExportContext used to call + // the DataTableStore for every referenced table once in generate(), once per + // visited context CSV in getVisitor(), and again in the forced extra cycle -- + // 2 * (F + 2) reads per table (6 here, F = 1 context CSV). It now aggregates + + // renders once per cycle and reuses that within the cycle. The ScanningRecipe + // accumulator is per-cycle (stored on the per-cycle root cursor), so the export + // runs in cycle 2 and the forced cycle 3 -> exactly 2 reads of the one + // referenced table, independent of how many context files are visited. + AtomicInteger getRowsCalls = new AtomicInteger(); + DataTableStore countingStore = new DataTableStore() { + final InMemoryDataTableStore delegate = new InMemoryDataTableStore(); + + @Override + public void insertRow(DataTable dataTable, ExecutionContext ctx, Row row) { + delegate.insertRow(dataTable, ctx, row); + } + + @Override + public Stream getRows(String dataTableName, String group) { + getRowsCalls.incrementAndGet(); + return delegate.getRows(dataTableName, group); + } + + @Override + public Collection> getDataTables() { + return delegate.getDataTables(); + } + }; + + Recipe pipeline = new Recipe() { + @Override + public String getDisplayName() { + return "Pipeline: producer + ExportContext"; + } + + @Override + public String getDescription() { + return "Test pipeline."; + } + + @Override + public List getRecipeList() { + return List.of( + new PopulateTestMappingA(), + new ExportContext( + "Test Coverage", + "Maps tests to implementations", + "Detailed description of test coverage context", + List.of("org.openrewrite.prethink.table.TestMapping") + ) + ); + } + }; + + SourceFile fooTest = PlainText.builder() + .text("package com.example;\npublic class FooTest {}") + .sourcePath(java.nio.file.Paths.get("src/test/java/FooTest.java")) + .build(); + InMemoryLargeSourceSet lss = new InMemoryLargeSourceSet(List.of(fooTest)); + + ExecutionContext ctx = new InMemoryExecutionContext(); + DataTableExecutionContextView.view(ctx).setDataTableStore(countingStore); + + // Match production: maxCycles=3, minCycles=1. + pipeline.run(lss, ctx, 3, 1); + + assertThat(getRowsCalls.get()) + .as("ExportContext must read each referenced table once per cycle (cycles 2 and 3), " + + "not once per visited context file (which was 2*(F+2) = 6 before memoization)") + .isEqualTo(2); + } }