diff --git a/.gitignore b/.gitignore index 0a784701375d9..83eff29224279 100644 --- a/.gitignore +++ b/.gitignore @@ -68,3 +68,4 @@ testfixtures_shared/ # build files generated doc-tools/missing-doclet/bin/ +/sandbox/plugins/engine-datafusion/target/ diff --git a/libs/common/src/main/java/org/opensearch/common/CheckedTriFunction.java b/libs/common/src/main/java/org/opensearch/common/CheckedTriFunction.java index 7898226b751f7..33141b65fc2b0 100644 --- a/libs/common/src/main/java/org/opensearch/common/CheckedTriFunction.java +++ b/libs/common/src/main/java/org/opensearch/common/CheckedTriFunction.java @@ -8,11 +8,14 @@ package org.opensearch.common; +import org.opensearch.common.annotation.PublicApi; + /** * A {@link TriFunction}-like interface which allows throwing checked exceptions. * * @opensearch.internal */ +@PublicApi(since = "2.9.0") @FunctionalInterface public interface CheckedTriFunction { R apply(S s, T t, U u) throws E; diff --git a/sandbox/libs/analytics-framework/build.gradle b/sandbox/libs/analytics-framework/build.gradle index 13e3d008f0a16..ad9f9c77ce36a 100644 --- a/sandbox/libs/analytics-framework/build.gradle +++ b/sandbox/libs/analytics-framework/build.gradle @@ -15,6 +15,7 @@ def calciteVersion = '1.41.0' dependencies { + compileOnly project(':server') api "org.apache.calcite:calcite-core:${calciteVersion}" // Calcite's expression tree and Enumerable runtime — required by calcite-core API api "org.apache.calcite:calcite-linq4j:${calciteVersion}" @@ -35,7 +36,7 @@ dependencies { testingConventions.enabled = false -// analytics-framework does not depend on server +// analytics-framework depends on server for SearchAnalyticsBackEndPlugin SPI tasks.named('forbiddenApisMain').configure { replaceSignatureFiles 'jdk-signatures' failOnMissingClasses = false @@ -47,60 +48,11 @@ tasks.named('forbiddenApisMain').configure { // Split into multiple calls to stay under the JVM method parameter limit. tasks.named('thirdPartyAudit').configure { ignoreMissingClasses( - // Jackson (optional JSON serialization in Calcite) - 'com.fasterxml.jackson.core.JsonParser$Feature', - 'com.fasterxml.jackson.core.PrettyPrinter', - 'com.fasterxml.jackson.core.type.TypeReference', - 'com.fasterxml.jackson.core.util.DefaultIndenter', - 'com.fasterxml.jackson.core.util.DefaultPrettyPrinter', - 'com.fasterxml.jackson.core.util.Separators', - 'com.fasterxml.jackson.core.util.Separators$Spacing', + // Jackson databind (optional JSON serialization in Calcite — only databind is missing, core is on classpath) 'com.fasterxml.jackson.databind.DeserializationFeature', 'com.fasterxml.jackson.databind.ObjectMapper', 'com.fasterxml.jackson.databind.ObjectWriter', - // Protobuf (Avatica RPC serialization, not used) - 'com.google.protobuf.AbstractMessageLite$Builder', - 'com.google.protobuf.AbstractParser', - 'com.google.protobuf.ByteString', - 'com.google.protobuf.CodedInputStream', - 'com.google.protobuf.CodedOutputStream', - 'com.google.protobuf.Descriptors$Descriptor', - 'com.google.protobuf.Descriptors$EnumDescriptor', - 'com.google.protobuf.Descriptors$EnumValueDescriptor', - 'com.google.protobuf.Descriptors$FieldDescriptor', - 'com.google.protobuf.Descriptors$FileDescriptor', - 'com.google.protobuf.Descriptors$OneofDescriptor', - 'com.google.protobuf.ExtensionRegistry', - 'com.google.protobuf.ExtensionRegistryLite', - 'com.google.protobuf.GeneratedMessageV3', - 'com.google.protobuf.GeneratedMessageV3$Builder', - 'com.google.protobuf.GeneratedMessageV3$BuilderParent', - 'com.google.protobuf.GeneratedMessageV3$FieldAccessorTable', - 'com.google.protobuf.GeneratedMessageV3$UnusedPrivateParameter', - 'com.google.protobuf.Internal', - 'com.google.protobuf.Internal$EnumLiteMap', - 'com.google.protobuf.Internal$IntList', - 'com.google.protobuf.Internal$LongList', - 'com.google.protobuf.InvalidProtocolBufferException', - 'com.google.protobuf.LazyStringArrayList', - 'com.google.protobuf.MapEntry', - 'com.google.protobuf.MapEntry$Builder', - 'com.google.protobuf.MapField', - 'com.google.protobuf.MapFieldReflectionAccessor', - 'com.google.protobuf.Message', - 'com.google.protobuf.MessageOrBuilder', - 'com.google.protobuf.Parser', - 'com.google.protobuf.ProtocolMessageEnum', - 'com.google.protobuf.ProtocolStringList', - 'com.google.protobuf.RepeatedFieldBuilderV3', - 'com.google.protobuf.SingleFieldBuilderV3', - 'com.google.protobuf.TextFormat', - 'com.google.protobuf.UninitializedMessageException', - 'com.google.protobuf.UnknownFieldSet', - 'com.google.protobuf.UnsafeByteOperations', - 'com.google.protobuf.WireFormat$FieldType', - // Uzaygezen (optional Hilbert curve spatial indexing) 'com.google.uzaygezen.core.BacktrackingQueryBuilder', 'com.google.uzaygezen.core.BitVector', @@ -200,61 +152,14 @@ tasks.named('thirdPartyAudit').configure { 'org.joou.UShort', 'org.joou.Unsigned', - // JTS / Proj4j (optional spatial/geometry support) + // JTS / Proj4j (optional spatial/geometry support — classes still missing) 'org.locationtech.jts.algorithm.InteriorPoint', - 'org.locationtech.jts.algorithm.LineIntersector', - 'org.locationtech.jts.algorithm.MinimumBoundingCircle', - 'org.locationtech.jts.algorithm.MinimumDiameter', - 'org.locationtech.jts.densify.Densifier', - 'org.locationtech.jts.geom.Coordinate', - 'org.locationtech.jts.geom.CoordinateSequence', - 'org.locationtech.jts.geom.CoordinateSequenceFactory', - 'org.locationtech.jts.geom.Envelope', - 'org.locationtech.jts.geom.Geometry', - 'org.locationtech.jts.geom.GeometryCollection', - 'org.locationtech.jts.geom.GeometryFactory', - 'org.locationtech.jts.geom.GeometryFilter', - 'org.locationtech.jts.geom.IntersectionMatrix', - 'org.locationtech.jts.geom.LineSegment', - 'org.locationtech.jts.geom.LineString', - 'org.locationtech.jts.geom.LinearRing', - 'org.locationtech.jts.geom.MultiLineString', - 'org.locationtech.jts.geom.MultiPoint', - 'org.locationtech.jts.geom.MultiPolygon', - 'org.locationtech.jts.geom.OctagonalEnvelope', - 'org.locationtech.jts.geom.Point', - 'org.locationtech.jts.geom.Polygon', - 'org.locationtech.jts.geom.util.AffineTransformation', - 'org.locationtech.jts.geom.util.GeometryEditor', - 'org.locationtech.jts.geom.util.GeometryEditor$CoordinateOperation', 'org.locationtech.jts.geom.util.GeometryFixer', - 'org.locationtech.jts.geom.util.GeometryTransformer', - 'org.locationtech.jts.geom.util.LineStringExtracter', - 'org.locationtech.jts.io.WKBReader', - 'org.locationtech.jts.io.WKBWriter', - 'org.locationtech.jts.io.WKTReader', - 'org.locationtech.jts.io.WKTWriter', 'org.locationtech.jts.io.geojson.GeoJsonReader', 'org.locationtech.jts.io.geojson.GeoJsonWriter', - 'org.locationtech.jts.io.gml2.GMLReader', - 'org.locationtech.jts.io.gml2.GMLWriter', - 'org.locationtech.jts.linearref.LengthIndexedLine', - 'org.locationtech.jts.operation.buffer.BufferOp', - 'org.locationtech.jts.operation.buffer.BufferParameters', 'org.locationtech.jts.operation.buffer.OffsetCurve', - 'org.locationtech.jts.operation.distance.DistanceOp', - 'org.locationtech.jts.operation.linemerge.LineMerger', - 'org.locationtech.jts.operation.overlay.snap.GeometrySnapper', - 'org.locationtech.jts.operation.polygonize.Polygonizer', - 'org.locationtech.jts.operation.union.UnaryUnionOp', - 'org.locationtech.jts.precision.GeometryPrecisionReducer', - 'org.locationtech.jts.simplify.DouglasPeuckerSimplifier', - 'org.locationtech.jts.simplify.TopologyPreservingSimplifier', - 'org.locationtech.jts.triangulate.DelaunayTriangulationBuilder', 'org.locationtech.jts.triangulate.polygon.ConstrainedDelaunayTriangulator', - 'org.locationtech.jts.triangulate.quadedge.QuadEdgeSubdivision', 'org.locationtech.jts.triangulate.tri.Tri', - 'org.locationtech.jts.util.GeometricShapeFactory', 'org.locationtech.proj4j.CRSFactory', 'org.locationtech.proj4j.CoordinateReferenceSystem', 'org.locationtech.proj4j.CoordinateTransform', diff --git a/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/EngineBridge.java b/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/EngineBridge.java deleted file mode 100644 index f0cd602312379..0000000000000 --- a/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/EngineBridge.java +++ /dev/null @@ -1,57 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -package org.opensearch.analytics.backend; - -/** - * JNI boundary interface between the query planner (Java) and a native - * execution engine (e.g., DataFusion/Rust). - * - *

The bridge has two responsibilities: - *

    - *
  1. {@link #convertFragment} — serialise a logical plan fragment into - * the engine's wire format (e.g., Substrait bytes).
  2. - *
  3. {@link #execute} — hand the serialised plan to the native engine - * and obtain an opaque handle to the result stream that lives - * entirely in native memory.
  4. - *
- * - *

Arrow data never crosses the JNI boundary into the JVM heap. - * Consumers read from the native stream via Arrow Flight or - * direct native-memory access using the returned handle. - * - * @param serialised plan type (e.g., {@code byte[]} for Substrait) - * @param result stream handle - * @param > logical plan type (e.g., Calcite {@code RelNode}) - * @opensearch.internal - */ -public interface EngineBridge { - - /** - * Converts a logical plan fragment into the native engine's serialised - * format. - * - * @param fragment the logical plan subtree to serialise - * @return the serialised plan in the engine's wire format - */ - Fragment convertFragment(LogicalPlan fragment); - - /** - * Submits the serialised plan to the native engine for execution and - * returns an opaque handle to the result stream. - * - *

The returned handle is a pointer into native memory (e.g., a - * {@code long} address of a Rust {@code RecordBatchStream}). The - * caller must eventually close the stream through a corresponding - * native call to avoid leaking resources. - * - * @param fragment the serialised plan produced by {@link #convertFragment} - * @return an opaque handle to the native result stream - */ - Stream execute(Fragment fragment); -} diff --git a/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/EngineResultBatch.java b/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/EngineResultBatch.java new file mode 100644 index 0000000000000..d062bcfe079af --- /dev/null +++ b/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/EngineResultBatch.java @@ -0,0 +1,39 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.analytics.backend; + +import java.util.List; + +/** + * Read-only view of a single record batch. Provides field names, row count, + * and positional access to field values. + * + * @opensearch.internal + */ +public interface EngineResultBatch { + + /** + * Ordered list of field (column) names in this batch. + */ + List getFieldNames(); + + /** + * Number of rows in this batch. + */ + int getRowCount(); + + /** + * Returns the value at the given row index for the named field. + * + * @param fieldName column name + * @param rowIndex zero-based row index + * @return the value (may be null) + */ + Object getFieldValue(String fieldName, int rowIndex); +} diff --git a/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/EngineResultStream.java b/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/EngineResultStream.java new file mode 100644 index 0000000000000..df8860911b83a --- /dev/null +++ b/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/EngineResultStream.java @@ -0,0 +1,30 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.analytics.backend; + +import java.util.Iterator; + +/** + * A closeable stream of record batches returned by engine execution. + * Callers iterate batches via the returned iterator and MUST close the stream + * when done to release native resources. + * + * @opensearch.internal + */ +public interface EngineResultStream extends AutoCloseable { + + /** + * Returns an iterator over the record batches in this stream. + * Each call returns the same iterator instance — the stream is single-pass. + */ + Iterator iterator(); + + @Override + void close(); +} diff --git a/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/ExecutionContext.java b/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/ExecutionContext.java new file mode 100644 index 0000000000000..c703a3bab0d92 --- /dev/null +++ b/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/ExecutionContext.java @@ -0,0 +1,52 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.analytics.backend; + +import org.opensearch.action.search.SearchShardTask; +import org.opensearch.index.engine.DataFormatAwareEngine; + +/** + * Execution context carrying reader and delegation state through + * the query execution lifecycle. + * + * @opensearch.internal + */ +public class ExecutionContext { + + private final String tableName; + private final DataFormatAwareEngine.DataFormatAwareReader reader; + private final SearchShardTask task; + + /** + * Constructs an execution context. + * @param tableName the target table name + * @param task the search shard task + * @param reader the data-format aware reader + */ + public ExecutionContext(String tableName, SearchShardTask task, DataFormatAwareEngine.DataFormatAwareReader reader) { + this.tableName = tableName; + this.task = task; + this.reader = reader; + } + + /** Returns the search shard task. */ + public SearchShardTask getTask() { + return task; + } + + /** Returns the target table name. */ + public String getTableName() { + return tableName; + } + + /** Returns the data-format aware reader. */ + public DataFormatAwareEngine.DataFormatAwareReader getReader() { + return reader; + } +} diff --git a/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/SearchExecEngine.java b/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/SearchExecEngine.java new file mode 100644 index 0000000000000..2defa07ad5ebf --- /dev/null +++ b/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/SearchExecEngine.java @@ -0,0 +1,37 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.analytics.backend; + +import org.opensearch.common.annotation.ExperimentalApi; + +import java.io.Closeable; +import java.io.IOException; + +/** + * Shard-level search execution engine interface. + * @opensearch.experimental + */ +@ExperimentalApi +public interface SearchExecEngine extends Closeable { + /** + * Creates an execution context from a resolved plan. + * + * @param context ExecutionContext + */ + void prepare(T context); + + /** + * Executes the context and returns a result stream. + * @param context the execution context + */ + V execute(T context) throws IOException; + + @Override + default void close() throws IOException {} +} diff --git a/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/jni/NativeHandle.java b/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/jni/NativeHandle.java new file mode 100644 index 0000000000000..a785484da5604 --- /dev/null +++ b/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/jni/NativeHandle.java @@ -0,0 +1,96 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.analytics.backend.jni; + +import java.lang.ref.Cleaner; +import java.util.concurrent.atomic.AtomicBoolean; + +/** + * Base class for type-safe native pointer wrappers. + * Provides automatic resource management and prevents use-after-close errors. + * Subclasses must implement {@link #doClose()} to release native resources. + * Cleaner is used to ensure resources are cleaned up even if the object is not explicitly closed. + */ +public abstract class NativeHandle implements AutoCloseable { + + /** Pointer to the native resource. */ + protected final long ptr; + private final AtomicBoolean closed = new AtomicBoolean(false); + /** Sentinel value representing a null native pointer. */ + protected static final long NULL_POINTER = 0L; + private final Cleaner.Cleanable cleanable; + + private static final Cleaner CLEANER = Cleaner.create(); + + /** + * Creates a new native handle. + * @param ptr the native pointer (must not be 0) + * @throws IllegalArgumentException if ptr is 0 + */ + protected NativeHandle(long ptr) { + if (ptr == NULL_POINTER) { + throw new IllegalArgumentException("Null native pointer"); + } + this.ptr = ptr; + this.cleanable = CLEANER.register(this, new CleanupAction(ptr, this::doClose)); + } + + /** + * Ensures the handle is still open. + * @throws IllegalStateException if the handle has been closed + */ + public void ensureOpen() { + if (closed.get()) { + throw new IllegalStateException("Handle already closed"); + } + } + + /** + * Gets the native pointer value. + * @return the native pointer + * @throws IllegalStateException if the handle has been closed + */ + public long getPointer() { + ensureOpen(); + return ptr; + } + + @Override + public void close() { + if (closed.compareAndSet(false, true)) { + cleanable.clean(); + } + } + + /** + * Releases the native resource. + * Called once when the handle is closed. + * Subclasses must implement this to free native memory. + */ + protected abstract void doClose(); + + /** + * Cleans up the native resource. + * Called by the cleaner when the handle is garbage collected. + */ + private static final class CleanupAction implements Runnable { + private final long ptr; + private final Runnable doClose; + + CleanupAction(long ptr, Runnable doClose) { + this.ptr = ptr; + this.doClose = doClose; + } + + @Override + public void run() { + doClose.run(); + } + } +} diff --git a/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/jni/package-info.java b/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/jni/package-info.java new file mode 100644 index 0000000000000..fe8d1f0e98a19 --- /dev/null +++ b/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/jni/package-info.java @@ -0,0 +1,12 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** + * JNI handles for native engine resources. + */ +package org.opensearch.analytics.backend.jni; diff --git a/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/spi/AnalyticsBackEndPlugin.java b/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/spi/AnalyticsBackEndPlugin.java deleted file mode 100644 index 454c6c17bd7f0..0000000000000 --- a/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/spi/AnalyticsBackEndPlugin.java +++ /dev/null @@ -1,27 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -package org.opensearch.analytics.spi; - -import org.apache.calcite.sql.SqlOperatorTable; -import org.opensearch.analytics.backend.EngineBridge; - -/** - * SPI extension point for back-end query engines (DataFusion, Lucene, etc.). - * @opensearch.internal - */ -public interface AnalyticsBackEndPlugin { - /** Unique engine name (e.g., "lucene", "datafusion"). */ - String name(); - - /** JNI boundary for executing serialized plans, or null for engines without native execution. */ - EngineBridge bridge(); - - /** Supported functions as a Calcite operator table, or null if the back-end adds no functions. */ - SqlOperatorTable operatorTable(); -} diff --git a/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/spi/AnalyticsSearchBackendPlugin.java b/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/spi/AnalyticsSearchBackendPlugin.java new file mode 100644 index 0000000000000..2c0a329464580 --- /dev/null +++ b/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/spi/AnalyticsSearchBackendPlugin.java @@ -0,0 +1,35 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.analytics.spi; + +import org.opensearch.analytics.backend.EngineResultStream; +import org.opensearch.analytics.backend.ExecutionContext; +import org.opensearch.analytics.backend.SearchExecEngine; +import org.opensearch.index.engine.dataformat.DataFormat; + +import java.util.List; + +/** + * SPI extension point for back-end query engines (DataFusion, Lucene, etc.). + * @opensearch.internal + */ +public interface AnalyticsSearchBackendPlugin { + + /** Unique engine name (e.g., "lucene", "datafusion"). */ + String name(); + + /** + * Creates a searcher bound to the given reader snapshot. + * @param ctx the execution context + */ + SearchExecEngine searcher(ExecutionContext ctx); + + /** Returns the data formats supported by this backend. */ + List getSupportedFormats(); +} diff --git a/sandbox/plugins/analytics-backend-datafusion/README.md b/sandbox/plugins/analytics-backend-datafusion/README.md index ae0e9b9b7a37e..80f731378de85 100644 --- a/sandbox/plugins/analytics-backend-datafusion/README.md +++ b/sandbox/plugins/analytics-backend-datafusion/README.md @@ -1,16 +1,52 @@ # analytics-backend-datafusion -DataFusion native execution engine plugin. Implements `AnalyticsBackEndPlugin` to provide a back-end that can execute query plan fragments via JNI. +DataFusion native execution engine plugin for the OpenSearch analytics framework. Implements `SearchBackEndPlugin` (server SPI for shard-level reader management) and `AnalyticsSearchBackendPlugin` (analytics-framework SPI for query execution) to execute query plan fragments via a Rust/DataFusion runtime over JNI. -## What it does +## Architecture -Exposes a `DataFusionBridge` (`EngineBridge`) that converts Calcite `RelNode` fragments into a serialized plan format and executes them through a native Rust/DataFusion library. Currently a stub. - -## How it fits in - -Declares `extendedPlugins = ['analytics-engine']` so the hub discovers it as an `AnalyticsBackEndPlugin`. The hub passes all discovered back-ends to the `QueryPlanExecutorPlugin` during executor creation. The executor will eventually use the bridge and capabilities to route plan fragments to the appropriate engine. +``` +┌─────────────────────────────────────────────────────────────────┐ +│ analytics-engine (hub) │ +│ ExtensiblePlugin — discovers AnalyticsSearchBackendPlugin SPIs │ +│ Routes query plan fragments to back-ends via DefaultPlanExecutor│ +└──────────────┬──────────────────────────────────────────────────┘ + │ SPI (extendedPlugins = ['analytics-engine']) + ▼ +┌─────────────────────────────────────────────────────────────────┐ +│ analytics-backend-datafusion │ +│ │ +│ DataFusionPlugin │ +│ ├── createComponents() → DataFusionService (node-level) │ +│ ├── searcher(ExecutionContext) → DatafusionSearchExecEngine │ +│ └── createReaderManager(format, shardPath) │ +│ → DatafusionReaderManager │ +│ │ +│ Execution flow: │ +│ ExecutionContext │ +│ → DatafusionSearchExecEngine.prepare() │ +│ (RelNode → Substrait bytes → DatafusionQuery) │ +│ → DatafusionSearchExecEngine.execute() │ +│ → DatafusionSearcher.search(DatafusionContext) │ +│ → NativeBridge.executeQuery() [JNI] │ +│ → DatafusionResultStream (Arrow record batches) │ +│ │ +│ Native layer (JNI): │ +│ NativeBridge ──→ rust │ +│ createDatafusionReader / closeDatafusionReader │ +│ createGlobalRuntime / closeGlobalRuntime │ +│ executeQuery / streamNext / streamClose │ +└─────────────────────────────────────────────────────────────────┘ +``` ## Key classes -- **`DataFusionPlugin`** — The `AnalyticsBackEndPlugin` SPI implementation. Reports `name() = "datafusion"`. -- **`DataFusionBridge`** — The `EngineBridge` implementation for native execution. +| Class | Role | +|---|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `DataFusionPlugin` | Plugin entry point. Implements `SearchBackEndPlugin` (server SPI — provides `createReaderManager` for shard-level data access) and `AnalyticsSearchBackendPlugin` (analytics-framework SPI — provides `searcher` for query execution). +| `DataFusionService` | Node-level lifecycle service. Loads the native JNI library, creates the Tokio runtime , global runtime environment and memory pool. Shared by all per-shard engines. | +| `DatafusionSearchExecEngine` | Per-query engine. `prepare()` converts the Calcite `RelNode` to a Substrait plan; `execute()` delegates to `DatafusionSearcher` and returns a `DatafusionResultStream`. | +| `DatafusionContext` | Execution context carrying the query plan, `DatafusionSearcher`, optional `IndexFilterTree`, native runtime pointer, result `StreamHandle` etc. Implements `SearchExecutionContext`. | +| `DatafusionSearcher` | Executes the Substrait plan against a native reader via `NativeBridge.executeQuery()`. Owns no resources - reader lifecycle is managed by `DatafusionReaderManager`. | +| `DatafusionReader` | Per-shard point-in-time snapshot of data files. Wraps a `ReaderHandle`. | +| `DatafusionReaderManager` | Manages `DatafusionReader` lifecycle per `CatalogSnapshot`. Handles refresh (swap in new reader) and deletion (close old reader). | +| `NativeRuntimeHandle` | Thread-safe wrapper around the native runtime pointer with liveness checks. | diff --git a/sandbox/plugins/analytics-backend-datafusion/build.gradle b/sandbox/plugins/analytics-backend-datafusion/build.gradle index 61fec92b7219d..0159c92127bc8 100644 --- a/sandbox/plugins/analytics-backend-datafusion/build.gradle +++ b/sandbox/plugins/analytics-backend-datafusion/build.gradle @@ -13,9 +13,11 @@ opensearchplugin { } dependencies { - // Shared types and SPI interfaces (EngineBridge, AnalyticsBackEndPlugin, etc.) - // Also provides calcite-core transitively via api. - api project(':sandbox:libs:analytics-framework') + // Provided at runtime by the parent analytics-engine plugin; compile-only to avoid jar hell. + compileOnly project(':sandbox:libs:analytics-framework') + + compileOnly "org.apache.logging.log4j:log4j-api:${versions.log4j}" + compileOnly "org.apache.logging.log4j:log4j-core:${versions.log4j}" } // TODO: Remove once back-end is built out with test suite diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionBridge.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionBridge.java deleted file mode 100644 index 97b4326361a0c..0000000000000 --- a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionBridge.java +++ /dev/null @@ -1,46 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -package org.opensearch.be.datafusion; - -import org.apache.calcite.rel.RelNode; -import org.opensearch.analytics.backend.EngineBridge; - -/** - * DataFusion EngineBridge implementation. - * Uses a byte[] representing serialized plan to execute. - */ -public class DataFusionBridge implements EngineBridge { - // S=byte[] (Substrait), H=Long (stream pointer), L=RelNode (logical plan) - - /** Creates a new DataFusion bridge. */ - public DataFusionBridge() {} - - /** - * Convert calcite fragment to an executable native fragment. - * Ex - substrait for Datafusion - * - * @param fragment the logical plan subtree to serialise - * @return substrait bytes - */ - @Override - public byte[] convertFragment(RelNode fragment) { - return new byte[0]; - } - - /** - * Execute query fragment - * - * @param fragment the serialised plan produced by {@link #convertFragment} - * @return RecordBatchStream pointer - */ - @Override - public Long execute(byte[] fragment) { - return 0L; - } -} diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionPlugin.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionPlugin.java index 79f4f834bfdb4..8e98add4fa1e8 100644 --- a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionPlugin.java +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionPlugin.java @@ -8,20 +8,96 @@ package org.opensearch.be.datafusion; -import org.apache.calcite.sql.SqlOperatorTable; -import org.opensearch.analytics.backend.EngineBridge; -import org.opensearch.analytics.spi.AnalyticsBackEndPlugin; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.analytics.backend.EngineResultStream; +import org.opensearch.analytics.backend.ExecutionContext; +import org.opensearch.analytics.backend.SearchExecEngine; +import org.opensearch.analytics.spi.AnalyticsSearchBackendPlugin; +import org.opensearch.cluster.metadata.IndexNameExpressionResolver; +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.settings.Setting; +import org.opensearch.common.settings.Settings; +import org.opensearch.core.common.io.stream.NamedWriteableRegistry; +import org.opensearch.core.xcontent.NamedXContentRegistry; +import org.opensearch.env.Environment; +import org.opensearch.env.NodeEnvironment; +import org.opensearch.index.engine.dataformat.DataFormat; +import org.opensearch.index.engine.exec.EngineReaderManager; +import org.opensearch.index.shard.ShardPath; import org.opensearch.plugins.Plugin; +import org.opensearch.plugins.SearchBackEndPlugin; +import org.opensearch.repositories.RepositoriesService; +import org.opensearch.script.ScriptService; +import org.opensearch.threadpool.ThreadPool; +import org.opensearch.transport.client.Client; +import org.opensearch.watcher.ResourceWatcherService; + +import java.io.IOException; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.function.Supplier; /** - * DataFusion native execution engine plugin. + * Main plugin class for the DataFusion native engine integration. + *

+ * Initializes the {@link DataFusionService} at node startup and creates + * per-shard {@link DatafusionSearchExecEngine} instances via the + * {@link AnalyticsSearchBackendPlugin} SPI. */ -public class DataFusionPlugin extends Plugin implements AnalyticsBackEndPlugin { +public class DataFusionPlugin extends Plugin implements SearchBackEndPlugin, AnalyticsSearchBackendPlugin { + + private static final Logger logger = LogManager.getLogger(DataFusionPlugin.class); + + /** Memory pool limit for the DataFusion runtime. */ + public static final Setting DATAFUSION_MEMORY_POOL_LIMIT = Setting.longSetting( + "datafusion.memory_pool_limit_bytes", + Runtime.getRuntime().maxMemory() / 4, + 0L, + Setting.Property.NodeScope + ); + + /** Spill memory limit — when exceeded, DataFusion spills to disk. */ + public static final Setting DATAFUSION_SPILL_MEMORY_LIMIT = Setting.longSetting( + "datafusion.spill_memory_limit_bytes", + Runtime.getRuntime().maxMemory() / 8, + 0L, + Setting.Property.NodeScope + ); - /** Creates a new DataFusion plugin. */ + private volatile DataFusionService dataFusionService; + + /** + * Creates the DataFusion plugin. + */ public DataFusionPlugin() {} - private final DataFusionBridge bridge = new DataFusionBridge(); + @Override + public Collection createComponents( + Client client, + ClusterService clusterService, + ThreadPool threadPool, + ResourceWatcherService resourceWatcherService, + ScriptService scriptService, + NamedXContentRegistry xContentRegistry, + Environment environment, + NodeEnvironment nodeEnvironment, + NamedWriteableRegistry namedWriteableRegistry, + IndexNameExpressionResolver indexNameExpressionResolver, + Supplier repositoriesServiceSupplier + ) { + Settings settings = environment.settings(); + long memoryPoolLimit = DATAFUSION_MEMORY_POOL_LIMIT.get(settings); + long spillMemoryLimit = DATAFUSION_SPILL_MEMORY_LIMIT.get(settings); + String spillDir = environment.dataFiles()[0].getParent().resolve("tmp").toAbsolutePath().toString(); + + dataFusionService = new DataFusionService(memoryPoolLimit, spillDir, spillMemoryLimit); + dataFusionService.start(); + logger.info("DataFusion plugin initialized — memory pool {}B, spill limit {}B", memoryPoolLimit, spillMemoryLimit); + + return Collections.singletonList(dataFusionService); + } @Override public String name() { @@ -29,12 +105,31 @@ public String name() { } @Override - public EngineBridge bridge() { - return bridge; + public SearchExecEngine searcher(ExecutionContext ctx) { + // TODO: resolve DataFormat properly instead of passing null + DatafusionReader dfReader = (DatafusionReader) ctx.getReader().getReader(null); + DatafusionContext context = new DatafusionContext(ctx.getTask(), dfReader, dataFusionService.getNativeRuntime()); + DatafusionSearchExecEngine datafusionSearchExecEngine = new DatafusionSearchExecEngine(context); + datafusionSearchExecEngine.prepare(ctx); + return datafusionSearchExecEngine; + } + + @Override + public EngineReaderManager createReaderManager(DataFormat format, ShardPath shardPath) throws IOException { + return new DatafusionReaderManager(format, shardPath); + } + + /** + * Data formats this plugin can handle. Used by CompositeEngine to route queries. + */ + public List getSupportedFormats() { + return null; // TODO : List.of("parquet"); } @Override - public SqlOperatorTable operatorTable() { - return null; + public void close() throws IOException { + if (dataFusionService != null) { + dataFusionService.close(); + } } } diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionService.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionService.java new file mode 100644 index 0000000000000..62271d88d1c57 --- /dev/null +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionService.java @@ -0,0 +1,107 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.be.datafusion; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.common.lifecycle.AbstractLifecycleComponent; + +import java.io.IOException; + +/** + * Node-level service managing the DataFusion native runtime lifecycle. + *

+ * All per-shard {@link DatafusionSearchExecEngine} instances share the single + * Tokio runtime and memory pool owned by this service. The service loads the + * native JNI library on start and tears down the runtime on stop/close. + */ +public class DataFusionService extends AbstractLifecycleComponent { + + private static final Logger logger = LogManager.getLogger(DataFusionService.class); + private static final String NATIVE_LIBRARY_NAME = "opensearch_datafusion_jni"; + + private final long memoryPoolLimit; + private final String spillDirectory; + private final long spillMemoryLimit; + + /** Handle to the native DataFusion global runtime (Tokio + memory pool). */ + private volatile NativeRuntimeHandle runtimeHandle; + + /** + * Creates a new DataFusionService. + * + * @param memoryPoolLimit maximum bytes for the DataFusion memory pool + * @param spillDirectory directory for spill files when memory is exceeded + * @param spillMemoryLimit maximum bytes before spilling to disk + */ + public DataFusionService(long memoryPoolLimit, String spillDirectory, long spillMemoryLimit) { + this.memoryPoolLimit = memoryPoolLimit; + this.spillDirectory = spillDirectory; + this.spillMemoryLimit = spillMemoryLimit; + } + + @Override + protected void doStart() { + logger.info("Starting DataFusion service — loading native library [{}]", NATIVE_LIBRARY_NAME); + try { + System.loadLibrary(NATIVE_LIBRARY_NAME); + } catch (UnsatisfiedLinkError e) { + logger.warn("Native library [{}] not found — DataFusion backend will be unavailable", NATIVE_LIBRARY_NAME); + return; + } + + // TODO: initialize Tokio runtime and memory pool via NativeBridge + // long ptr = NativeBridge.createGlobalRuntime(memoryPoolLimit, spillDirectory, spillMemoryLimit); + long ptr = 1L; // placeholder until NativeBridge is wired + this.runtimeHandle = new NativeRuntimeHandle(ptr); + logger.info("DataFusion service started"); + } + + @Override + protected void doStop() { + logger.info("Stopping DataFusion service"); + releaseRuntime(); + } + + @Override + protected void doClose() throws IOException { + releaseRuntime(); + } + + /** + * Returns the handle to the native DataFusion global runtime. + * All consumers should hold this reference and call {@link NativeRuntimeHandle#get()} + * at JNI invocation time to obtain the current live pointer. + * + * @throws IllegalStateException if the service has not been started + */ + public NativeRuntimeHandle getNativeRuntime() { + NativeRuntimeHandle handle = runtimeHandle; + if (handle == null) { + throw new IllegalStateException("DataFusionService has not been started"); + } + return handle; + } + + /** + * Returns the cache manager for per-shard cache management. + * Used by DatafusionReaderManager to evict stale entries on file deletion. + */ + // TODO: uncomment when CacheManager class is available + // public CacheManager getCacheManager() { return cacheManager; } + + private void releaseRuntime() { + NativeRuntimeHandle handle = runtimeHandle; + if (handle != null) { + handle.close(); + runtimeHandle = null; + logger.info("DataFusion native runtime released"); + } + } +} diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionContext.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionContext.java new file mode 100644 index 0000000000000..59f576d8f5cc4 --- /dev/null +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionContext.java @@ -0,0 +1,125 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.be.datafusion; + +import org.opensearch.action.search.SearchShardTask; +import org.opensearch.be.datafusion.jni.StreamHandle; +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.engine.IndexFilterTree; +import org.opensearch.search.SearchExecutionContext; + +import java.io.IOException; + +/** + * DataFusion-specific search execution context. + *

+ * Carries the DataFusion query plan, engine searcher, optional {@link IndexFilterTree}, + * and the native result stream handle after execution. + * + * @opensearch.experimental + */ +@ExperimentalApi +public class DatafusionContext implements SearchExecutionContext { + + private final DatafusionSearcher engineSearcher; + private final NativeRuntimeHandle nativeRuntime; + private DatafusionQuery datafusionQuery; + private IndexFilterTree filterTree; + private StreamHandle streamHandle; + private SearchShardTask task; + + /** + * Creates a DataFusion execution context + * @param task the search shard task + * @param reader the DataFusion reader providing index data + * @param nativeRuntime handle to the native DataFusion runtime + */ + public DatafusionContext(SearchShardTask task, DatafusionReader reader, NativeRuntimeHandle nativeRuntime) { + this.task = task; + this.engineSearcher = new DatafusionSearcher(reader.getReaderHandle()); + this.nativeRuntime = nativeRuntime; + } + + @Override + public void close() throws IOException { + try { + if (streamHandle != null) { + streamHandle.close(); + streamHandle = null; + } + } finally { + try { + if (filterTree != null) { + filterTree.close(); + } + } finally { + engineSearcher.close(); + } + } + } + + /** + * Returns the live native runtime pointer for JNI calls. + */ + public long getRuntimePtr() { + return nativeRuntime.get(); + } + + /** Returns the DataFusion query plan. */ + public DatafusionQuery getDatafusionQuery() { + return datafusionQuery; + } + + /** + * Sets the DataFusion query plan. + * @param query the DataFusion query to set + */ + public void setDatafusionQuery(DatafusionQuery query) { + this.datafusionQuery = query; + } + + /** Returns the index filter tree, or {@code null} if not set. */ + public IndexFilterTree getFilterTree() { + return filterTree; + } + + /** + * Sets the index filter tree for indexed query execution. + * @param filterTree the index filter tree + */ + public void setFilterTree(IndexFilterTree filterTree) { + this.filterTree = filterTree; + } + + /** + * Returns the native result stream handle, or {@code null} if execution has not completed. + */ + public StreamHandle getStreamHandle() { + return streamHandle; + } + + /** + * Sets the native result stream handle after query execution. + * + * @param streamHandle the native result stream handle + */ + public void setStreamHandle(StreamHandle streamHandle) { + this.streamHandle = streamHandle; + } + + @Override + public SearchShardTask task() { + return task; + } + + @Override + public DatafusionSearcher getSearcher() { + return engineSearcher; + } +} diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionQuery.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionQuery.java new file mode 100644 index 0000000000000..c1d420dd8fdb3 --- /dev/null +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionQuery.java @@ -0,0 +1,52 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.be.datafusion; + +/** + * Represents a DataFusion query — wraps substrait plan bytes and execution metadata. + */ +public class DatafusionQuery { + + private final String indexName; + private final byte[] substraitBytes; + private boolean fetchPhase; + + /** + * Creates a query with the given index name and serialized substrait plan. + * @param indexName the target index name + * @param substraitBytes the serialized substrait plan bytes + */ + public DatafusionQuery(String indexName, byte[] substraitBytes) { + this.indexName = indexName; + this.substraitBytes = substraitBytes; + } + + /** Returns the target index name. */ + public String getIndexName() { + return indexName; + } + + /** Returns the serialized substrait plan bytes. */ + public byte[] getSubstraitBytes() { + return substraitBytes; + } + + /** Returns whether this query is in the fetch phase. */ + public boolean isFetchPhase() { + return fetchPhase; + } + + /** + * Sets whether this query is in the fetch phase. + * @param fetchPhase true if this query is in the fetch phase + */ + public void setFetchPhase(boolean fetchPhase) { + this.fetchPhase = fetchPhase; + } +} diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionReader.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionReader.java new file mode 100644 index 0000000000000..356e486467e1e --- /dev/null +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionReader.java @@ -0,0 +1,65 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.be.datafusion; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.be.datafusion.jni.ReaderHandle; +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.engine.exec.WriterFileSet; + +import java.io.Closeable; +import java.io.IOException; +import java.util.Collection; + +/** + * DataFusion reader for JNI operations. + *

+ * Each reader represents a point-in-time snapshot of parquet/arrow files for a shard. + * Created from a catalog snapshot during refresh; closed when associated catalog snapshot is removed + * + * @opensearch.experimental + */ +@ExperimentalApi +public class DatafusionReader implements Closeable { + + private static final Logger logger = LogManager.getLogger(DatafusionReader.class); + private final String directoryPath; + private final ReaderHandle readerHandle; + + /** + * Creates a DatafusionReader for the given shard directory and files. + * + * @param directoryPath shard data directory + * @param files The file metadata collection + */ + public DatafusionReader(String directoryPath, Collection files) { + this.directoryPath = directoryPath; + String[] fileNames = new String[0]; + if (files != null) { + fileNames = files.stream().flatMap(writerFileSet -> writerFileSet.files().stream()).toArray(String[]::new); + } + readerHandle = new ReaderHandle(directoryPath, fileNames); + } + + @Override + public void close() throws IOException { + readerHandle.close(); + logger.debug("DatafusionReader closed for [{}]", directoryPath); + } + + /** + * Returns the type-safe handle to the native reader. + * Callers should hold this reference and call + * {@link ReaderHandle#getPointer()} only at JNI invocation time. + */ + public ReaderHandle getReaderHandle() { + return readerHandle; + } +} diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionReaderManager.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionReaderManager.java new file mode 100644 index 0000000000000..f97f11f78b743 --- /dev/null +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionReaderManager.java @@ -0,0 +1,84 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.be.datafusion; + +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.engine.dataformat.DataFormat; +import org.opensearch.index.engine.exec.CatalogSnapshot; +import org.opensearch.index.engine.exec.EngineReaderManager; +import org.opensearch.index.shard.ShardPath; + +import java.io.IOException; +import java.util.Collection; +import java.util.HashMap; +import java.util.Map; + +/** + * Manages {@link DatafusionReader} instances (native memory). + *

+ * Acquire returns a DatafusionReader with incremented ref count; + * release decrements it. On refresh, a new reader is swapped in + * atomically from the updated catalog snapshot. + * + * @opensearch.experimental + */ +@ExperimentalApi +public class DatafusionReaderManager implements EngineReaderManager { + + Map readers = new HashMap<>(); + private final DataFormat dataFormat; + private final String directoryPath; + + /** + * Creates a reader manager + * @param dataFormat the data format for this reader + * @param shardPath the shard path to read data from + */ + public DatafusionReaderManager(DataFormat dataFormat, ShardPath shardPath) { + this.dataFormat = dataFormat; + directoryPath = shardPath.getDataPath().resolve(dataFormat.name()).toString(); + } + + @Override + public DatafusionReader getReader(CatalogSnapshot catalogSnapshot) throws IOException { + if (readers.containsKey(catalogSnapshot)) { + return readers.get(catalogSnapshot); + } + throw new IOException("No DataFusion reader available"); + } + + @Override + public void onDeleted(CatalogSnapshot catalogSnapshot) throws IOException { + readers.remove(catalogSnapshot).close(); + } + + @Override + public void onFilesDeleted(Collection files) throws IOException { + // TODO: evict deleted files from cache manager + } + + @Override + public void onFilesAdded(Collection files) throws IOException { + // TODO: Add new files to cache manager + } + + @Override + public void beforeRefresh() throws IOException {} + + @Override + public void afterRefresh(boolean didRefresh, CatalogSnapshot catalogSnapshot) throws IOException { + if (!didRefresh) return; + // This catalog snapshot is already present in the reader manager + if (readers.containsKey(catalogSnapshot)) { + return; + } + DatafusionReader reader = new DatafusionReader(directoryPath, catalogSnapshot.getSearchableFiles(dataFormat.name())); + readers.put(catalogSnapshot, reader); + } +} diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionResultStream.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionResultStream.java new file mode 100644 index 0000000000000..7daad7fc6e29a --- /dev/null +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionResultStream.java @@ -0,0 +1,89 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.be.datafusion; + +import org.opensearch.analytics.backend.EngineResultBatch; +import org.opensearch.analytics.backend.EngineResultStream; +import org.opensearch.be.datafusion.jni.NativeBridge; +import org.opensearch.be.datafusion.jni.StreamHandle; +import org.opensearch.common.annotation.ExperimentalApi; + +import java.util.Iterator; +import java.util.NoSuchElementException; + +/** + * {@link EngineResultStream} backed by a native DataFusion record batch stream. + *

+ * Reads Arrow record batches from the native stream via JNI and exposes them + * as {@link EngineResultBatch} instances. The stream is single-pass; calling + * {@link #iterator()} multiple times returns the same iterator. + * + * @opensearch.experimental + */ +@ExperimentalApi +public class DatafusionResultStream implements EngineResultStream { + + private final StreamHandle streamHandle; + private volatile BatchIterator iteratorInstance; + + /** + * Creates a result stream + * @param streamHandle the native stream handle + */ + public DatafusionResultStream(StreamHandle streamHandle) { + this.streamHandle = streamHandle; + } + + @Override + public Iterator iterator() { + if (iteratorInstance == null) { + iteratorInstance = new BatchIterator(streamHandle); + } + return iteratorInstance; + } + + @Override + public void close() { + streamHandle.close(); + } + + /** + * Iterator that pulls Arrow record batches from the native stream via JNI. + * Each call to {@link #next()} returns a batch wrapping the current Arrow data. + */ + static class BatchIterator implements Iterator { + + private final StreamHandle streamHandle; + private Boolean hasNext; + + BatchIterator(StreamHandle streamHandle) { + this.streamHandle = streamHandle; + } + + @Override + public boolean hasNext() { + if (hasNext == null) { + long arrowArrayAddr = NativeBridge.streamNext(streamHandle.getStreamPtr(), streamHandle.getPointer()); + hasNext = arrowArrayAddr != 0; + // TODO: if hasNext, import ArrowArray into VectorSchemaRoot and cache for next() + } + return hasNext; + } + + @Override + public EngineResultBatch next() { + if (hasNext() == false) { + throw new NoSuchElementException(); + } + hasNext = null; + // TODO: return batch wrapping the imported VectorSchemaRoot + throw new UnsupportedOperationException("Arrow C Data import not yet wired"); + } + } +} diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionSearchExecEngine.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionSearchExecEngine.java new file mode 100644 index 0000000000000..e100d2f2e0a57 --- /dev/null +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionSearchExecEngine.java @@ -0,0 +1,56 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.be.datafusion; + +import org.opensearch.analytics.backend.EngineResultStream; +import org.opensearch.analytics.backend.ExecutionContext; +import org.opensearch.analytics.backend.SearchExecEngine; +import org.opensearch.common.annotation.ExperimentalApi; + +import java.io.IOException; + +/** + * DataFusion-backed search execution engine. + *

+ * Delegates execution to the native DataFusion runtime via {@link DatafusionSearcher}. + * + * @opensearch.experimental + */ +@ExperimentalApi +public class DatafusionSearchExecEngine implements SearchExecEngine { + + private final DatafusionContext datafusionContext; + + /** + * Creates an execution engine backed by the given DataFusion context. + * @param datafusionContext the DataFusion execution context + */ + public DatafusionSearchExecEngine(DatafusionContext datafusionContext) { + this.datafusionContext = datafusionContext; + } + + @Override + public void prepare(ExecutionContext requestContext) { + // TODO: wire Substrait conversion (RelNode → Substrait bytes) + byte[] substraitBytes = null; + datafusionContext.setDatafusionQuery(new DatafusionQuery(requestContext.getTableName(), substraitBytes)); + } + + @Override + public EngineResultStream execute(ExecutionContext requestContext) throws IOException { + DatafusionSearcher searcher = datafusionContext.getSearcher(); + searcher.search(datafusionContext); + return new DatafusionResultStream(datafusionContext.getStreamHandle()); + } + + @Override + public void close() throws IOException { + datafusionContext.close(); + } +} diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionSearcher.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionSearcher.java new file mode 100644 index 0000000000000..1f86a5f6af180 --- /dev/null +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionSearcher.java @@ -0,0 +1,81 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.be.datafusion; + +import org.opensearch.be.datafusion.jni.NativeBridge; +import org.opensearch.be.datafusion.jni.ReaderHandle; +import org.opensearch.be.datafusion.jni.StreamHandle; +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.engine.exec.EngineSearcher; + +import java.io.IOException; + +/** + * DataFusion searcher — executes substrait query plans against a native DataFusion reader. + *

+ * After {@link #search}, the result stream handle is available on the context + * via {@link DatafusionContext#getStreamHandle()}. + * + * @opensearch.experimental + */ +@ExperimentalApi +public class DatafusionSearcher implements EngineSearcher { + + private final ReaderHandle readerHandle; + + /** + * Creates a searcher + * @param readerHandle the native reader handle + */ + public DatafusionSearcher(ReaderHandle readerHandle) { + this.readerHandle = readerHandle; + } + + @Override + public void search(DatafusionContext context) throws IOException { + if (context.getFilterTree() == null) { + searchVanilla(context); + } else { + searchWithFilterTree(context); + } + } + + private void searchWithFilterTree(DatafusionContext context) { + throw new UnsupportedOperationException("Indexed query path not yet wired"); + } + + private void searchVanilla(DatafusionContext context) throws IOException { + DatafusionQuery query = context.getDatafusionQuery(); + if (query == null) { + throw new IllegalStateException("DatafusionQuery must be set before search"); + } + long streamPtr = NativeBridge.executeQuery( + readerHandle.getPointer(), + query.getIndexName(), + query.getSubstraitBytes(), + context.getRuntimePtr() + ); + context.setStreamHandle(new StreamHandle(streamPtr, context.getRuntimePtr())); + } + + /** + * Returns the type-safe handle to the native reader. + * Call {@link ReaderHandle#getPointer()} only at JNI invocation time + * to get the raw pointer with a liveness check. + */ + public ReaderHandle getReaderHandle() { + return readerHandle; + } + + @Override + public void close() { + // ReaderHandle lifecycle is owned by DatafusionReader / EngineReaderManager, + // not by the searcher. Do not close it here. + } +} diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/NativeRuntimeHandle.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/NativeRuntimeHandle.java new file mode 100644 index 0000000000000..77af5ff83e1d9 --- /dev/null +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/NativeRuntimeHandle.java @@ -0,0 +1,77 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.be.datafusion; + +import org.opensearch.common.annotation.ExperimentalApi; + +import java.io.Closeable; + +/** + * Thread-safe wrapper around a native runtime pointer. + *

+ * Encapsulates the raw {@code long} so it cannot be copied or used after + * the runtime is destroyed. All consumers obtain the pointer via {@link #get()} + * which performs a liveness check on every call. + *

+ * Implements {@link Closeable} so it integrates with try-with-resources, + * {@code IOUtils.close()}, and leak detection infrastructure. + * + * @opensearch.experimental + */ +@ExperimentalApi +public class NativeRuntimeHandle implements Closeable { + + private volatile long pointer; + + /** + * Creates a handle wrapping the given native pointer. + * + * @param pointer the native runtime pointer (must be non-zero) + * @throws IllegalArgumentException if pointer is zero + */ + public NativeRuntimeHandle(long pointer) { + if (pointer == 0L) { + throw new IllegalArgumentException("Cannot create NativeRuntimeHandle with null pointer"); + } + this.pointer = pointer; + } + + /** + * Returns the native runtime pointer, checking that it is still live. + * + * @throws IllegalStateException if the handle has been closed + */ + public long get() { + long ptr = pointer; + if (ptr == 0L) { + throw new IllegalStateException("Native runtime handle has been closed"); + } + return ptr; + } + + /** + * Returns true if the handle has not been closed. + */ + public boolean isOpen() { + return pointer != 0L; + } + + /** + * Releases the native runtime. Idempotent and thread-safe. + * After this call, {@link #get()} will throw. + */ + @Override + public synchronized void close() { + long ptr = pointer; + if (ptr != 0L) { + // TODO: NativeBridge.closeGlobalRuntime(ptr); + pointer = 0L; + } + } +} diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/jni/NativeBridge.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/jni/NativeBridge.java new file mode 100644 index 0000000000000..eae3f8e4d1286 --- /dev/null +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/jni/NativeBridge.java @@ -0,0 +1,85 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.be.datafusion.jni; + +/** + * Core JNI bridge to native DataFusion library. + * All native method declarations are centralized here. + */ +public final class NativeBridge { + + static { + // TODO : NativeLibraryLoader.load("opensearch_datafusion_jni"); + } + + private NativeBridge() {} + + /** + * Creates a native DataFusion reader + * @param path the directory path containing data files + * @param files the array of file names to read + */ + public static native long createDatafusionReader(String path, String[] files); + + /** + * Closes the native DataFusion reader. + * @param ptr the native reader pointer + */ + public static native void closeDatafusionReader(long ptr); + + /** + * Creates a global DataFusion runtime with the given resource limits. + * @param memoryLimit the maximum memory in bytes + * @param cacheManagerPtr the native cache manager pointer + * @param spillDir the directory path for spill files + * @param spillLimit the maximum spill size in bytes + */ + public static native long createGlobalRuntime(long memoryLimit, long cacheManagerPtr, String spillDir, long spillLimit); + + /** + * Closes the global DataFusion runtime. + * @param ptr the native runtime pointer + */ + public static native void closeGlobalRuntime(long ptr); + + /** + * Executes a substrait plan against the given reader and returns a stream pointer. + * + * @param readerPtr the native reader pointer + * @param tableName the target table name + * @param substraitPlan the serialized substrait plan bytes + * @param runtimePtr the native runtime pointer + * @return native stream pointer (caller must close via {@link #streamClose}) + */ + public static native long executeQuery(long readerPtr, String tableName, byte[] substraitPlan, long runtimePtr); + + /** + * Returns the Arrow schema address for the given stream. + * + * @param streamPtr the native stream pointer + * @return ArrowSchema C Data Interface address + */ + public static native long streamGetSchema(long streamPtr); + + /** + * Loads the next record batch from the stream. + * + * @param runtimePtr the native runtime pointer + * @param streamPtr the native stream pointer + * @return ArrowArray C Data Interface address, or 0 if end-of-stream + */ + public static native long streamNext(long runtimePtr, long streamPtr); + + /** + * Closes the native stream and releases associated resources. + * + * @param streamPtr the native stream pointer to close + */ + public static native void streamClose(long streamPtr); +} diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/jni/ReaderHandle.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/jni/ReaderHandle.java new file mode 100644 index 0000000000000..bb00f2540e347 --- /dev/null +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/jni/ReaderHandle.java @@ -0,0 +1,34 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.be.datafusion.jni; + +import org.opensearch.analytics.backend.jni.NativeHandle; + +/** + * Type-safe handle for native reader. + */ +public final class ReaderHandle extends NativeHandle { + + /** + * Creates a reader handle by allocating a native DataFusion reader for the given path and files. + * @param path the directory path containing data files + * @param files the array of file names to read + */ + public ReaderHandle(String path, String[] files) { + super(NativeBridge.createDatafusionReader(path, files)); + } + + /** + * Closes the datafusion reader and releases any associated resources. + */ + @Override + protected void doClose() { + NativeBridge.closeDatafusionReader(ptr); + } +} diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/jni/StreamHandle.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/jni/StreamHandle.java new file mode 100644 index 0000000000000..b2a3c3f29a9f6 --- /dev/null +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/jni/StreamHandle.java @@ -0,0 +1,40 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.be.datafusion.jni; + +import org.opensearch.analytics.backend.jni.NativeHandle; + +/** + * Type-safe handle for a native DataFusion result stream. + * Wraps the stream pointer returned by {@link NativeBridge#executeQuery}. + */ +public final class StreamHandle extends NativeHandle { + + private final long streamPtr; + + /** + * Creates a stream handle wrapping the native pointers. + * @param ptr the native handle pointer + * @param streamPtr the native stream pointer + */ + public StreamHandle(long ptr, long streamPtr) { + super(ptr); + this.streamPtr = streamPtr; + } + + /** Returns the native stream pointer. */ + public long getStreamPtr() { + return streamPtr; + } + + @Override + protected void doClose() { + NativeBridge.streamClose(ptr); + } +} diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/jni/package-info.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/jni/package-info.java new file mode 100644 index 0000000000000..6a8481365c71c --- /dev/null +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/jni/package-info.java @@ -0,0 +1,19 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** + * JNI bridge layer for DataFusion native library integration. + * + *

This package provides: + *

    + *
  • Type-safe native handle wrappers ({@link org.opensearch.be.datafusion.jni.ReaderHandle})
  • + *
  • Centralized native method declarations ({@link org.opensearch.be.datafusion.jni.NativeBridge})
  • + *
+ * + */ +package org.opensearch.be.datafusion.jni; diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/package-info.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/package-info.java index dccab0e7fb8a7..d69838a7fbd0d 100644 --- a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/package-info.java +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/package-info.java @@ -6,7 +6,5 @@ * compatible open source license. */ -/** - * DataFusion native execution engine back-end plugin. - */ +/** DataFusion native engine backend for OpenSearch analytics. */ package org.opensearch.be.datafusion; diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/resources/META-INF/services/org.opensearch.analytics.spi.AnalyticsBackEndPlugin b/sandbox/plugins/analytics-backend-datafusion/src/main/resources/META-INF/services/org.opensearch.analytics.spi.AnalyticsSearchBackendPlugin similarity index 100% rename from sandbox/plugins/analytics-backend-datafusion/src/main/resources/META-INF/services/org.opensearch.analytics.spi.AnalyticsBackEndPlugin rename to sandbox/plugins/analytics-backend-datafusion/src/main/resources/META-INF/services/org.opensearch.analytics.spi.AnalyticsSearchBackendPlugin diff --git a/sandbox/plugins/analytics-backend-lucene/build.gradle b/sandbox/plugins/analytics-backend-lucene/build.gradle new file mode 100644 index 0000000000000..42426fb8888e7 --- /dev/null +++ b/sandbox/plugins/analytics-backend-lucene/build.gradle @@ -0,0 +1,30 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +apply plugin: 'opensearch.internal-cluster-test' + +opensearchplugin { + description = 'OpenSearch plugin providing Lucene-based search execution engine' + classname = 'org.opensearch.lucene.LuceneSearchEnginePlugin' +} + +dependencies { + // Shared types and SPI interfaces (EngineBridge, AnalyticsBackEndPlugin, etc.) + // Also provides calcite-core transitively via api. + api project(':sandbox:libs:analytics-framework') + + implementation "org.apache.logging.log4j:log4j-api:${versions.log4j}" + implementation "org.apache.logging.log4j:log4j-core:${versions.log4j}" +} + +test { + systemProperty 'tests.security.manager', 'false' +} + +// TODO: Remove once back-end is built out with test suite +testingConventions.enabled = false diff --git a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneEngineSearcher.java b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneEngineSearcher.java new file mode 100644 index 0000000000000..ed3c792be16af --- /dev/null +++ b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneEngineSearcher.java @@ -0,0 +1,79 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.be.lucene; + +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.Weight; +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.engine.exec.EngineSearcher; + +import java.io.IOException; +import java.util.List; + +/** + * Lucene-backed engine searcher. + *

+ * This class is stateless with respect to active queries + * + * @opensearch.experimental + */ +@ExperimentalApi +public class LuceneEngineSearcher implements EngineSearcher { + + private final IndexSearcher indexSearcher; + private final DirectoryReader directoryReader; + + /** + * Creates a new LuceneEngineSearcher. + * + * @param indexSearcher the Lucene index searcher + * @param directoryReader the Lucene directory reader + */ + public LuceneEngineSearcher(IndexSearcher indexSearcher, DirectoryReader directoryReader) { + this.indexSearcher = indexSearcher; + this.directoryReader = directoryReader; + } + + /** + * Execute: create a Weight from the query, register it on the + * context's lifecycle manager, and store the key + segment metadata + * on the context for JNI callbacks. + * + * @param context the search context containing the query to execute + */ + @Override + public void search(LuceneSearchContext context) throws IOException { + Query query = context.getQuery(); + if (query == null) { + throw new IllegalStateException("No query set on LuceneSearchContext"); + } + Query rewritten = indexSearcher.rewrite(query); + Weight weight = indexSearcher.createWeight(rewritten, ScoreMode.COMPLETE_NO_SCORES, 1.0f); + List leaves = directoryReader.leaves(); + // TODO : Complete the wiring for search execution + + } + + /** Returns the underlying IndexSearcher. */ + public IndexSearcher getIndexSearcher() { + return indexSearcher; + } + + /** Returns the underlying DirectoryReader. */ + public DirectoryReader getDirectoryReader() { + return directoryReader; + } + + @Override + public void close() {} +} diff --git a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneIndexFilterContext.java b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneIndexFilterContext.java new file mode 100644 index 0000000000000..a14ec1645276a --- /dev/null +++ b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneIndexFilterContext.java @@ -0,0 +1,81 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.be.lucene; + +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.Weight; +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.engine.exec.CollectorQueryLifecycleManager; +import org.opensearch.index.engine.exec.IndexFilterContext; + +import java.io.IOException; +import java.util.List; + +/** + * Lucene-specific index filter context. + *

+ * Holds the Weight (per-query), and manages per-segment scorers/collectors. + * One context per (query, reader) pair. + * + * @opensearch.experimental + */ +@ExperimentalApi +public class LuceneIndexFilterContext implements IndexFilterContext { + + private final Weight weight; + private final List leaves; + private final CollectorQueryLifecycleManager collectorManager = new CollectorQueryLifecycleManager(); + + /** + * Creates a new LuceneIndexFilterContext for the given query and reader. + * + * @param query the Lucene query to filter by + * @param reader the directory reader over the index + */ + public LuceneIndexFilterContext(Query query, DirectoryReader reader) throws IOException { + IndexSearcher searcher = new IndexSearcher(reader); + Query rewritten = searcher.rewrite(query); + this.weight = searcher.createWeight(rewritten, ScoreMode.COMPLETE_NO_SCORES, 1.0f); + this.leaves = reader.leaves(); + } + + @Override + public int segmentCount() { + return leaves.size(); + } + + @Override + public int segmentMaxDoc(int segmentOrd) { + return leaves.get(segmentOrd).reader().maxDoc(); + } + + Weight getWeight() { + return weight; + } + + List getLeaves() { + return leaves; + } + + /** + * Returns the collector lifecycle manager + */ + public CollectorQueryLifecycleManager getCollectorManager() { + return collectorManager; + } + + @Override + public void close() { + collectorManager.close(); + } +} diff --git a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneIndexFilterProvider.java b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneIndexFilterProvider.java new file mode 100644 index 0000000000000..9851e07d33bbc --- /dev/null +++ b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneIndexFilterProvider.java @@ -0,0 +1,134 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.be.lucene; + +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.Scorer; +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.engine.exec.CollectorQueryLifecycleManager; +import org.opensearch.index.engine.exec.IndexFilterProvider; +import org.opensearch.index.engine.exec.SegmentCollector; + +import java.io.IOException; +import java.util.BitSet; + +/** + * Lucene-backed {@link IndexFilterProvider}. + * + * @opensearch.experimental + */ +@ExperimentalApi +public class LuceneIndexFilterProvider implements IndexFilterProvider { + + /** Creates a new LuceneIndexFilterProvider. */ + public LuceneIndexFilterProvider() {} + + @Override + public LuceneIndexFilterContext createContext(Query query, DirectoryReader reader) throws IOException { + return new LuceneIndexFilterContext(query, reader); + } + + /** + * Creates a collector for the given segment and registers it in the + * context's {@link CollectorQueryLifecycleManager}. + * + * @param context the index filter context + * @param segmentOrd the segment ordinal + * @param minDoc the minimum document ID + * @param maxDoc the maximum document ID + * @return an int key that identifies this collector across JNI + */ + @Override + public int createCollector(LuceneIndexFilterContext context, int segmentOrd, int minDoc, int maxDoc) { + SegmentCollector collector = createCollectorInternal(context, segmentOrd, minDoc, maxDoc); + return context.getCollectorManager().registerCollector(collector); + } + + /** + * Collects matching doc IDs for the collector identified by {@code key}. + * + * @param context the index filter context + * @param key the collector key + * @param minDoc the minimum document ID + * @param maxDoc the maximum document ID + */ + public long[] collectDocs(LuceneIndexFilterContext context, int key, int minDoc, int maxDoc) { + return context.getCollectorManager().collectDocs(key, minDoc, maxDoc); + } + + /** + * Releases the collector identified by {@code key}. + * + * @param context the index filter context + * @param key the collector key + */ + public void releaseCollector(LuceneIndexFilterContext context, int key) { + context.getCollectorManager().releaseCollector(key); + } + + @Override + public void close() {} + + private SegmentCollector createCollectorInternal(LuceneIndexFilterContext context, int segmentOrd, int minDoc, int maxDoc) { + try { + Scorer scorer = context.getWeight().scorer(context.getLeaves().get(segmentOrd)); + if (scorer == null) { + return EMPTY_COLLECTOR; + } + return new LuceneSegmentCollector(scorer.iterator(), minDoc, maxDoc); + } catch (IOException e) { + return EMPTY_COLLECTOR; + } + } + + private static final SegmentCollector EMPTY_COLLECTOR = (min, max) -> new long[0]; + + private static class LuceneSegmentCollector implements SegmentCollector { + private final DocIdSetIterator iterator; + private final int collectorMinDoc; + private final int collectorMaxDoc; + private int currentDoc = -1; + + LuceneSegmentCollector(DocIdSetIterator iterator, int minDoc, int maxDoc) { + this.iterator = iterator; + this.collectorMinDoc = minDoc; + this.collectorMaxDoc = maxDoc; + } + + @Override + public long[] collectDocs(int minDoc, int maxDoc) { + int effectiveMin = Math.max(minDoc, collectorMinDoc); + int effectiveMax = Math.min(maxDoc, collectorMaxDoc); + if (effectiveMin >= effectiveMax) { + return new long[0]; + } + + BitSet bitset = new BitSet(effectiveMax - effectiveMin); + try { + int docId = currentDoc; + if (docId == DocIdSetIterator.NO_MORE_DOCS || docId >= collectorMaxDoc) { + return new long[0]; + } + if (docId < effectiveMin) { + docId = iterator.advance(effectiveMin); + } + while (docId != DocIdSetIterator.NO_MORE_DOCS && docId < effectiveMax) { + bitset.set(docId - effectiveMin); + docId = iterator.nextDoc(); + } + currentDoc = docId; + } catch (IOException e) { + return new long[0]; + } + return bitset.toLongArray(); + } + } +} diff --git a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneReaderManager.java b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneReaderManager.java new file mode 100644 index 0000000000000..c46d480bccfb3 --- /dev/null +++ b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneReaderManager.java @@ -0,0 +1,84 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.be.lucene; + +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.search.ReferenceManager; +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.engine.dataformat.DataFormat; +import org.opensearch.index.engine.exec.CatalogSnapshot; +import org.opensearch.index.engine.exec.EngineReaderManager; + +import java.io.IOException; +import java.util.Collection; +import java.util.HashMap; +import java.util.Map; + +/** + * Lucene implementation of {@link EngineReaderManager}. + *

+ * Wraps Lucene's {@link ReferenceManager} for {@link DirectoryReader}. + * Acquire increments the ref count on the current reader; + * release decrements it — same pattern as {@code DatafusionReaderManager}. + * + * @opensearch.experimental + */ +@ExperimentalApi +public class LuceneReaderManager implements EngineReaderManager { + + Map readers = new HashMap<>(); + DataFormat dataFormat; + + /** + * Creates a new LuceneReaderManager for the given data format. + * + * @param dataFormat the data format for this reader manager + */ + @SuppressWarnings("unchecked") + public LuceneReaderManager(DataFormat dataFormat) { + this.dataFormat = dataFormat; + } + + /** + * Called when files are deleted after merges. + * + * @param files the collection of deleted file paths + */ + public void onFilesDeleted(Collection files) throws IOException { + // no-op + } + + @Override + public void onFilesAdded(Collection files) throws IOException { + // no-op + } + + @Override + public DirectoryReader getReader(CatalogSnapshot catalogSnapshot) throws IOException { + return readers.get(catalogSnapshot); + } + + @Override + public void beforeRefresh() throws IOException { + + } + + @Override + public void afterRefresh(boolean didRefresh, CatalogSnapshot catalogSnapshot) throws IOException { + if (readers.containsKey(catalogSnapshot)) { + return; + } + readers.put(catalogSnapshot, (DirectoryReader) catalogSnapshot.getReader(dataFormat)); + } + + @Override + public void onDeleted(CatalogSnapshot catalogSnapshot) throws IOException { + readers.remove(catalogSnapshot).close(); + } +} diff --git a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchContext.java b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchContext.java new file mode 100644 index 0000000000000..2dee8508d3ee5 --- /dev/null +++ b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchContext.java @@ -0,0 +1,76 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.be.lucene; + +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.opensearch.action.search.SearchShardTask; +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.search.SearchExecutionContext; + +import java.io.IOException; + +/** + * Lucene-specific search execution context. + * + * @opensearch.experimental + */ +@ExperimentalApi +public class LuceneSearchContext implements SearchExecutionContext { + + private final SearchShardTask task; + private final DirectoryReader reader; + private final LuceneEngineSearcher searcher; + private Query query; + + /** + * Creates a new LuceneSearchContext. + * + * @param task the search shard task + * @param reader the directory reader over the index + * @param query the Lucene query to execute + */ + public LuceneSearchContext(SearchShardTask task, DirectoryReader reader, Query query) throws IOException { + this.reader = reader; + IndexSearcher indexSearcher = new IndexSearcher(reader); + this.searcher = new LuceneEngineSearcher(indexSearcher, reader); + this.task = task; + this.query = query; + } + + /** Returns the current query. */ + public Query getQuery() { + return query; + } + + @Override + public SearchShardTask task() { + return task; + } + + @Override + public LuceneEngineSearcher getSearcher() { + return searcher; + } + + /** + * Sets the query for this context. + * + * @param query the Lucene query to set + */ + public void setQuery(Query query) { + this.query = query; + } + + @Override + public void close() throws IOException { + searcher.close(); + } +} diff --git a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchEnginePlugin.java b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchEnginePlugin.java new file mode 100644 index 0000000000000..747f476ed059b --- /dev/null +++ b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchEnginePlugin.java @@ -0,0 +1,45 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.be.lucene; + +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.engine.dataformat.DataFormat; +import org.opensearch.index.engine.exec.EngineReaderManager; +import org.opensearch.index.shard.ShardPath; +import org.opensearch.plugins.SearchBackEndPlugin; + +import java.io.IOException; +import java.util.List; + +/** + * Plugin providing Lucene as an index filter or source provider. + * + * @opensearch.experimental + */ +@ExperimentalApi +public class LuceneSearchEnginePlugin implements SearchBackEndPlugin { + + /** Creates a new LuceneSearchEnginePlugin. */ + public LuceneSearchEnginePlugin() {} + + @Override + public String name() { + return "lucene-analytics-backend"; + } + + @Override + public EngineReaderManager createReaderManager(DataFormat format, ShardPath shardPath) throws IOException { + return new LuceneReaderManager(format); + } + + @Override + public List getSupportedFormats() { + return List.of(); + } +} diff --git a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/package-info.java b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/package-info.java new file mode 100644 index 0000000000000..dd817c2a618f7 --- /dev/null +++ b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/package-info.java @@ -0,0 +1,10 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** Lucene-backed analytics backend plugin. */ +package org.opensearch.be.lucene; diff --git a/sandbox/plugins/analytics-engine/src/main/java/org/opensearch/analytics/AnalyticsPlugin.java b/sandbox/plugins/analytics-engine/src/main/java/org/opensearch/analytics/AnalyticsPlugin.java index 1191e4215afb2..afe06cd1e413c 100644 --- a/sandbox/plugins/analytics-engine/src/main/java/org/opensearch/analytics/AnalyticsPlugin.java +++ b/sandbox/plugins/analytics-engine/src/main/java/org/opensearch/analytics/AnalyticsPlugin.java @@ -17,7 +17,7 @@ import org.opensearch.analytics.exec.DefaultPlanExecutor; import org.opensearch.analytics.exec.QueryPlanExecutor; import org.opensearch.analytics.schema.OpenSearchSchemaBuilder; -import org.opensearch.analytics.spi.AnalyticsBackEndPlugin; +import org.opensearch.analytics.spi.AnalyticsSearchBackendPlugin; import org.opensearch.cluster.metadata.IndexNameExpressionResolver; import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.inject.Module; @@ -54,12 +54,12 @@ public class AnalyticsPlugin extends Plugin implements ExtensiblePlugin { */ public AnalyticsPlugin() {} - private final List backEnds = new ArrayList<>(); + private final List backEnds = new ArrayList<>(); private SqlOperatorTable operatorTable; @Override public void loadExtensions(ExtensionLoader loader) { - backEnds.addAll(loader.loadExtensions(AnalyticsBackEndPlugin.class)); + backEnds.addAll(loader.loadExtensions(AnalyticsSearchBackendPlugin.class)); operatorTable = aggregateOperatorTables(); } @@ -77,7 +77,10 @@ public Collection createComponents( IndexNameExpressionResolver indexNameExpressionResolver, Supplier repositoriesServiceSupplier ) { - return List.of(new DefaultPlanExecutor(backEnds), new DefaultEngineContext(clusterService, operatorTable)); + return List.of( + new DefaultPlanExecutor(backEnds, null/* TODO: pass indices service */, clusterService), + new DefaultEngineContext(clusterService, operatorTable) + ); } @Override @@ -91,17 +94,8 @@ public Collection createGuiceModules() { } private SqlOperatorTable aggregateOperatorTables() { - List tables = new ArrayList<>(); - for (AnalyticsBackEndPlugin backEnd : backEnds) { - SqlOperatorTable table = backEnd.operatorTable(); - if (table != null) { - tables.add(table); - } - } - if (tables.isEmpty()) { - return SqlOperatorTables.of(); - } - return SqlOperatorTables.chain(tables.toArray(new SqlOperatorTable[0])); + // TODO: re-wire once operatorTable() is added back to AnalyticsSearchBackendPlugin + return SqlOperatorTables.of(); } /** diff --git a/sandbox/plugins/analytics-engine/src/main/java/org/opensearch/analytics/exec/DefaultPlanExecutor.java b/sandbox/plugins/analytics-engine/src/main/java/org/opensearch/analytics/exec/DefaultPlanExecutor.java index a766466fc7b47..e45636150896a 100644 --- a/sandbox/plugins/analytics-engine/src/main/java/org/opensearch/analytics/exec/DefaultPlanExecutor.java +++ b/sandbox/plugins/analytics-engine/src/main/java/org/opensearch/analytics/exec/DefaultPlanExecutor.java @@ -9,37 +9,126 @@ package org.opensearch.analytics.exec; import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.TableScan; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; -import org.opensearch.analytics.spi.AnalyticsBackEndPlugin; +import org.opensearch.action.search.SearchShardTask; +import org.opensearch.analytics.backend.EngineResultBatch; +import org.opensearch.analytics.backend.EngineResultStream; +import org.opensearch.analytics.backend.ExecutionContext; +import org.opensearch.analytics.backend.SearchExecEngine; +import org.opensearch.analytics.spi.AnalyticsSearchBackendPlugin; +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.index.IndexService; +import org.opensearch.index.engine.DataFormatAwareEngine; +import org.opensearch.index.shard.IndexShard; +import org.opensearch.indices.IndicesService; import java.util.ArrayList; +import java.util.Iterator; +import java.util.LinkedHashMap; import java.util.List; +import java.util.Map; +import java.util.Set; /** * {@link QueryPlanExecutor} default implementation. + *

+ * Acquires a composite reader, creates a per-query {@link SearchExecEngine} + * bound to the reader, and delegates convert + execute to it. + * No backend-specific context is exposed to this class. */ public class DefaultPlanExecutor implements QueryPlanExecutor> { private static final Logger logger = LogManager.getLogger(DefaultPlanExecutor.class); + private final Map backEnds; + private final IndicesService indicesService; + private final ClusterService clusterService; /** - * Creates a plan executor with the given back-end plugins. + * Constructs a DefaultPlanExecutor with the given plugins and services. * - * @param backEnds registered back-end engine plugins + * @param plugins list of analytics search backend plugins + * @param indicesService service for accessing index shards + * @param clusterService service for accessing cluster state */ - public DefaultPlanExecutor(List backEnds) { - // TODO: use back-ends + public DefaultPlanExecutor(List plugins, IndicesService indicesService, ClusterService clusterService) { + this.backEnds = new LinkedHashMap<>(); + for (AnalyticsSearchBackendPlugin plugin : plugins) { + this.backEnds.put(plugin.name(), plugin); + } + this.indicesService = indicesService; + this.clusterService = clusterService; } + @SuppressWarnings("unchecked") @Override public Iterable execute(RelNode logicalFragment, Object context) { - RelNode fragment = logicalFragment; - int fieldCount = fragment.getRowType().getFieldCount(); + String tableName = extractTableName(logicalFragment); + AnalyticsSearchBackendPlugin plugin = selectBackEnd(); + if (plugin == null) { + return new ArrayList<>(); + } - logger.debug("[DefaultPlanExecutor] Executing fragment with {} fields: {}", fieldCount, fragment.explain()); + IndexShard shard = resolveShard(tableName); + DataFormatAwareEngine dataFormatAwareEngine = shard.getCompositeEngine(); + if (dataFormatAwareEngine == null) { + throw new IllegalStateException("No CompositeEngine on shard [" + shard.shardId() + "]"); + } - // Stub: return empty result set. - return new ArrayList<>(); + SearchShardTask task = null; // TODO : init task + List rows = new ArrayList<>(); + try (DataFormatAwareEngine.DataFormatAwareReader dataFormatAwareReader = dataFormatAwareEngine.acquireReader()) { + ExecutionContext ctx = new ExecutionContext(tableName, task, dataFormatAwareReader); + try (SearchExecEngine engine = plugin.searcher(ctx)) { + logger.info("[DefaultPlanExecutor] Executing via [{}]", plugin.name()); + try (EngineResultStream resultStream = engine.execute(ctx)) { + Iterator batchIterator = resultStream.iterator(); + while (batchIterator.hasNext()) { + EngineResultBatch batch = batchIterator.next(); + List fieldNames = batch.getFieldNames(); + for (int row = 0; row < batch.getRowCount(); row++) { + Object[] rowValues = new Object[fieldNames.size()]; + for (int col = 0; col < fieldNames.size(); col++) { + rowValues[col] = batch.getFieldValue(fieldNames.get(col), row); + } + rows.add(rowValues); + } + } + } + } + } catch (Exception e) { + throw new RuntimeException("Execution failed for [" + plugin.name() + "]", e); + } + return rows; + } + + static String extractTableName(RelNode node) { + if (node instanceof TableScan) { + List qn = node.getTable().getQualifiedName(); + return qn.get(qn.size() - 1); + } + for (RelNode input : node.getInputs()) { + String name = extractTableName(input); + if (name != null) return name; + } + throw new IllegalArgumentException("No TableScan found in plan fragment"); + } + + private IndexShard resolveShard(String indexName) { + IndexService indexService = indicesService.indexService(clusterService.state().metadata().index(indexName).getIndex()); + if (indexService == null) throw new IllegalStateException("Index [" + indexName + "] not on this node"); + Set shardIds = indexService.shardIds(); + if (shardIds.isEmpty()) throw new IllegalStateException("No shards for [" + indexName + "]"); + return indexService.getShardOrNull(shardIds.iterator().next()); + } + + private AnalyticsSearchBackendPlugin selectBackEnd() { + if (backEnds.isEmpty()) { + logger.warn("No back-end plugins registered — queries will return empty results"); + return null; + } + // TODO : This is placeholder - select based on data format + return backEnds.values().iterator().next(); } } diff --git a/sandbox/plugins/analytics-engine/src/test/java/org/opensearch/analytics/engine/DefaultPlanExecutorTests.java b/sandbox/plugins/analytics-engine/src/test/java/org/opensearch/analytics/engine/DefaultPlanExecutorTests.java deleted file mode 100644 index a61246f3dfc41..0000000000000 --- a/sandbox/plugins/analytics-engine/src/test/java/org/opensearch/analytics/engine/DefaultPlanExecutorTests.java +++ /dev/null @@ -1,107 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -package org.opensearch.analytics.engine; - -import org.apache.calcite.jdbc.JavaTypeFactoryImpl; -import org.apache.calcite.plan.RelOptCluster; -import org.apache.calcite.plan.RelTraitSet; -import org.apache.calcite.plan.hep.HepPlanner; -import org.apache.calcite.plan.hep.HepProgramBuilder; -import org.apache.calcite.rel.AbstractRelNode; -import org.apache.calcite.rel.RelNode; -import org.apache.calcite.rel.type.RelDataType; -import org.apache.calcite.rel.type.RelDataTypeFactory; -import org.apache.calcite.rex.RexBuilder; -import org.apache.calcite.sql.type.SqlTypeName; -import org.opensearch.analytics.exec.DefaultPlanExecutor; -import org.opensearch.test.OpenSearchTestCase; - -import java.util.List; - -/** - * Tests for {@link DefaultPlanExecutor}. - */ -public class DefaultPlanExecutorTests extends OpenSearchTestCase { - - private RelDataTypeFactory typeFactory; - private RelOptCluster cluster; - - @Override - public void setUp() throws Exception { - super.setUp(); - typeFactory = new JavaTypeFactoryImpl(); - RexBuilder rexBuilder = new RexBuilder(typeFactory); - HepPlanner planner = new HepPlanner(new HepProgramBuilder().build()); - cluster = RelOptCluster.create(planner, rexBuilder); - } - - /** - * Test that execute() does not throw for a valid fragment. - */ - public void testExecuteDoesNotThrowForValidFragment() { - DefaultPlanExecutor service = new DefaultPlanExecutor(List.of()); - - RelNode fragment = createRelNodeWithFieldCount(3); - Object context = new Object(); - - Object result = service.execute(fragment, context); - assertNotNull("execute() stub should return non-null", result); - } - - /** - * Test that execute() works with a multi-field fragment. - */ - public void testExecuteWithMultiFieldFragment() { - DefaultPlanExecutor service = new DefaultPlanExecutor(List.of()); - - int fieldCount = 5; - RelNode fragment = createRelNodeWithFieldCount(fieldCount); - Object context = new Object(); - - Object result = service.execute(fragment, context); - assertNotNull("execute() stub should return non-null", result); - } - - /** - * Test that execute() works with a single-field fragment. - */ - public void testExecuteWithSingleFieldFragment() { - DefaultPlanExecutor service = new DefaultPlanExecutor(List.of()); - - RelNode fragment = createRelNodeWithFieldCount(1); - Object context = new Object(); - - Object result = service.execute(fragment, context); - assertNotNull("execute() stub should return non-null", result); - } - - private RelNode createRelNodeWithFieldCount(int fieldCount) { - RelDataType rowType = buildRowType(fieldCount); - return new StubRelNode(cluster, cluster.traitSet(), rowType); - } - - private RelDataType buildRowType(int fieldCount) { - RelDataTypeFactory.Builder builder = typeFactory.builder(); - for (int i = 0; i < fieldCount; i++) { - builder.add("field_" + i, SqlTypeName.VARCHAR); - } - return builder.build(); - } - - /** - * Minimal concrete RelNode for testing. Extends AbstractRelNode - * which provides default implementations for all RelNode methods. - */ - private static class StubRelNode extends AbstractRelNode { - StubRelNode(RelOptCluster cluster, RelTraitSet traitSet, RelDataType rowType) { - super(cluster, traitSet); - this.rowType = rowType; - } - } -} diff --git a/sandbox/plugins/analytics-engine/src/test/java/org/opensearch/analytics/exec/DefaultPlanExecutorTests.java b/sandbox/plugins/analytics-engine/src/test/java/org/opensearch/analytics/exec/DefaultPlanExecutorTests.java new file mode 100644 index 0000000000000..0b34e3ea6495b --- /dev/null +++ b/sandbox/plugins/analytics-engine/src/test/java/org/opensearch/analytics/exec/DefaultPlanExecutorTests.java @@ -0,0 +1,407 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.analytics.exec; + +import org.apache.calcite.jdbc.JavaTypeFactoryImpl; +import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.plan.RelOptTable; +import org.apache.calcite.plan.RelTraitSet; +import org.apache.calcite.plan.hep.HepPlanner; +import org.apache.calcite.plan.hep.HepProgramBuilder; +import org.apache.calcite.rel.AbstractRelNode; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.TableScan; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.sql.type.SqlTypeName; +import org.opensearch.analytics.backend.EngineResultBatch; +import org.opensearch.analytics.backend.EngineResultStream; +import org.opensearch.analytics.backend.ExecutionContext; +import org.opensearch.analytics.backend.SearchExecEngine; +import org.opensearch.analytics.spi.AnalyticsSearchBackendPlugin; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.metadata.Metadata; +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.core.index.Index; +import org.opensearch.index.IndexService; +import org.opensearch.index.engine.DataFormatAwareEngine; +import org.opensearch.index.engine.dataformat.DataFormat; +import org.opensearch.index.engine.dataformat.FieldTypeCapabilities; +import org.opensearch.index.engine.exec.CatalogSnapshot; +import org.opensearch.index.engine.exec.EngineReaderManager; +import org.opensearch.index.engine.exec.Segment; +import org.opensearch.index.engine.exec.WriterFileSet; +import org.opensearch.index.shard.IndexShard; +import org.opensearch.indices.IndicesService; +import org.opensearch.test.OpenSearchTestCase; + +import java.io.IOException; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +/** + * Tests for {@link DefaultPlanExecutor}. + */ +public class DefaultPlanExecutorTests extends OpenSearchTestCase { + + private RelDataTypeFactory typeFactory; + private RelOptCluster cluster; + + @Override + public void setUp() throws Exception { + super.setUp(); + typeFactory = new JavaTypeFactoryImpl(); + RexBuilder rexBuilder = new RexBuilder(typeFactory); + HepPlanner planner = new HepPlanner(new HepProgramBuilder().build()); + cluster = RelOptCluster.create(planner, rexBuilder); + } + + /** + * extractTableName returns the table name from a TableScan node. + */ + public void testExtractTableNameFromTableScan() { + RelOptTable table = mockTable("schema", "my_index"); + TableScan scan = new StubTableScan(cluster, cluster.traitSet(), table); + assertEquals("my_index", DefaultPlanExecutor.extractTableName(scan)); + } + + /** + * extractTableName throws when no TableScan is found. + */ + public void testExtractTableNameThrowsForNonTableScan() { + RelNode stub = new StubRelNode(cluster, cluster.traitSet(), buildRowType(1)); + IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, () -> DefaultPlanExecutor.extractTableName(stub)); + assertTrue(ex.getMessage().contains("No TableScan found")); + } + + /** + * End-to-end: write file sets → catalog snapshot → DataFormatAwareEngine → + * DefaultPlanExecutor.execute() with mock backend returns rows via EngineResultStream. + */ + public void testEndToEndExecuteWithMockBackend() throws IOException { + MockDataFormat format = new MockDataFormat(); + Path dir = createTempDir(); + + WriterFileSet fs1 = WriterFileSet.builder().directory(dir).writerGeneration(1L).addFile("gen1.parquet").addNumRows(2).build(); + WriterFileSet fs2 = WriterFileSet.builder().directory(dir).writerGeneration(2L).addFile("gen2.parquet").addNumRows(1).build(); + + Segment seg1 = Segment.builder(0L).addSearchableFiles(format, fs1).build(); + Segment seg2 = Segment.builder(1L).addSearchableFiles(format, fs2).build(); + MockCatalogSnapshot snapshot = new MockCatalogSnapshot(1L, List.of(seg1, seg2), format); + + MockReaderManager readerManager = new MockReaderManager(format.name()); + readerManager.afterRefresh(true, snapshot); + + DataFormatAwareEngine engine = new DataFormatAwareEngine(Map.of(format, readerManager)); + engine.setLatestSnapshot(snapshot); + + // Mock shard + cluster wiring + IndexShard shard = mock(IndexShard.class); + when(shard.getCompositeEngine()).thenReturn(engine); + + Index index = new Index("my_index", "uuid"); + IndexMetadata indexMetadata = mock(IndexMetadata.class); + when(indexMetadata.getIndex()).thenReturn(index); + + Metadata metadata = mock(Metadata.class); + when(metadata.index("my_index")).thenReturn(indexMetadata); + + ClusterState clusterState = mock(ClusterState.class); + when(clusterState.metadata()).thenReturn(metadata); + + ClusterService clusterService = mock(ClusterService.class); + when(clusterService.state()).thenReturn(clusterState); + + IndexService indexService = mock(IndexService.class); + when(indexService.shardIds()).thenReturn(Set.of(0)); + when(indexService.getShardOrNull(0)).thenReturn(shard); + + IndicesService indicesService = mock(IndicesService.class); + when(indicesService.indexService(index)).thenReturn(indexService); + + MockBackendPlugin backendPlugin = new MockBackendPlugin(format); + DefaultPlanExecutor executor = new DefaultPlanExecutor(List.of(backendPlugin), indicesService, clusterService); + + RelOptTable table = mockTable("my_index"); + TableScan scan = new StubTableScan(cluster, cluster.traitSet(), table); + + Iterable results = executor.execute(scan, new Object()); + List rows = new ArrayList<>(); + results.forEach(rows::add); + + assertEquals(3, rows.size()); + } + + private RelOptTable mockTable(String... qualifiedName) { + RelOptTable table = mock(RelOptTable.class); + when(table.getQualifiedName()).thenReturn(List.of(qualifiedName)); + when(table.getRowType()).thenReturn(buildRowType(1)); + return table; + } + + private RelDataType buildRowType(int fieldCount) { + RelDataTypeFactory.Builder builder = typeFactory.builder(); + for (int i = 0; i < fieldCount; i++) { + builder.add("field_" + i, SqlTypeName.VARCHAR); + } + return builder.build(); + } + + private static class StubRelNode extends AbstractRelNode { + StubRelNode(RelOptCluster cluster, RelTraitSet traitSet, RelDataType rowType) { + super(cluster, traitSet); + this.rowType = rowType; + } + } + + private static class StubTableScan extends TableScan { + StubTableScan(RelOptCluster cluster, RelTraitSet traitSet, RelOptTable table) { + super(cluster, traitSet, List.of(), table); + } + } + + static class MockDataFormat implements DataFormat { + @Override + public String name() { + return "mock-columnar"; + } + + @Override + public long priority() { + return 100L; + } + + @Override + public Set supportedFields() { + return Set.of( + new FieldTypeCapabilities( + "integer", + Set.of(FieldTypeCapabilities.Capability.COLUMNAR_STORAGE, FieldTypeCapabilities.Capability.STORED_FIELDS) + ) + ); + } + } + + static class MockReaderManager implements EngineReaderManager { + private final String formatName; + private final Map readers = new HashMap<>(); + + MockReaderManager(String formatName) { + this.formatName = formatName; + } + + @Override + public Object getReader(CatalogSnapshot snapshot) { + return readers.get(snapshot); + } + + @Override + public void beforeRefresh() {} + + @Override + public void afterRefresh(boolean didRefresh, CatalogSnapshot snapshot) { + if (didRefresh == false || readers.containsKey(snapshot)) return; + long totalRows = 0; + for (WriterFileSet wfs : snapshot.getSearchableFiles(formatName)) { + totalRows += wfs.numRows(); + } + readers.put(snapshot, totalRows); + } + + @Override + public void onDeleted(CatalogSnapshot snapshot) { + readers.remove(snapshot); + } + + @Override + public void onFilesDeleted(Collection files) {} + + @Override + public void onFilesAdded(Collection files) {} + } + + static class MockCatalogSnapshot extends CatalogSnapshot { + private final List segments; + private final DataFormat format; + + MockCatalogSnapshot(long generation, List segments, DataFormat format) { + super("mock-snapshot", generation, 1L); + this.segments = segments; + this.format = format; + } + + @Override + public Map getUserData() { + return Map.of(); + } + + @Override + public long getId() { + return generation; + } + + @Override + public List getSegments() { + return segments; + } + + @Override + public Collection getSearchableFiles(String dataFormat) { + List result = new ArrayList<>(); + for (Segment seg : segments) { + WriterFileSet wfs = seg.dfGroupedSearchableFiles().get(dataFormat); + if (wfs != null) result.add(wfs); + } + return result; + } + + @Override + public Set getDataFormats() { + return Set.of(format.name()); + } + + @Override + public long getLastWriterGeneration() { + return generation; + } + + @Override + public String serializeToString() { + return "mock-snapshot-" + generation; + } + + @Override + public void setCatalogSnapshotMap(Map map) {} + + @Override + public void setUserData(Map userData, boolean b) {} + + @Override + public Object getReader(DataFormat dataFormat) { + return null; + } + + @Override + protected void closeInternal() {} + } + + static class MockSearchExecEngine implements SearchExecEngine { + private final long totalRows; + + MockSearchExecEngine(long totalRows) { + this.totalRows = totalRows; + } + + @Override + public void prepare(ExecutionContext context) {} + + @Override + public EngineResultStream execute(ExecutionContext context) { + return new MockResultStream(totalRows); + } + + @Override + public void close() {} + } + + static class MockResultStream implements EngineResultStream { + private final long rowCount; + + MockResultStream(long rowCount) { + this.rowCount = rowCount; + } + + @Override + public Iterator iterator() { + return new MockBatchIterator(rowCount); + } + + @Override + public void close() {} + } + + static class MockBatchIterator implements Iterator { + private final long rowCount; + private boolean consumed; + + MockBatchIterator(long rowCount) { + this.rowCount = rowCount; + } + + @Override + public boolean hasNext() { + return consumed == false; + } + + @Override + public EngineResultBatch next() { + consumed = true; + return new MockResultBatch((int) rowCount); + } + } + + static class MockResultBatch implements EngineResultBatch { + private final int rowCount; + + MockResultBatch(int rowCount) { + this.rowCount = rowCount; + } + + @Override + public List getFieldNames() { + return List.of("value"); + } + + @Override + public int getRowCount() { + return rowCount; + } + + @Override + public Object getFieldValue(String fieldName, int rowIndex) { + return "row_" + rowIndex; + } + } + + static class MockBackendPlugin implements AnalyticsSearchBackendPlugin { + private final DataFormat format; + + MockBackendPlugin(DataFormat format) { + this.format = format; + } + + @Override + public String name() { + return "mock-backend"; + } + + @Override + public SearchExecEngine searcher(ExecutionContext ctx) { + Object reader = ctx.getReader().getReader(format); + long rows = reader instanceof Long ? (Long) reader : 0L; + return new MockSearchExecEngine(rows); + } + + @Override + public List getSupportedFormats() { + return List.of(format); + } + } +} diff --git a/server/src/internalClusterTest/java/org/opensearch/index/shard/IndexShardIT.java b/server/src/internalClusterTest/java/org/opensearch/index/shard/IndexShardIT.java index c59d2bdbbaf89..0aa358fc71f89 100644 --- a/server/src/internalClusterTest/java/org/opensearch/index/shard/IndexShardIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/index/shard/IndexShardIT.java @@ -733,7 +733,8 @@ public static final IndexShard newIndexShard( indexService.getRefreshMutex(), clusterService.getClusterApplierService(), MergedSegmentPublisher.EMPTY, - ReferencedSegmentsPublisher.EMPTY + ReferencedSegmentsPublisher.EMPTY, + null // TODO ); } diff --git a/server/src/main/java/org/opensearch/index/IndexModule.java b/server/src/main/java/org/opensearch/index/IndexModule.java index 1a4b14ddef9ba..c6da5c1b00c8f 100644 --- a/server/src/main/java/org/opensearch/index/IndexModule.java +++ b/server/src/main/java/org/opensearch/index/IndexModule.java @@ -46,6 +46,7 @@ import org.opensearch.cluster.routing.ShardRouting; import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.CheckedFunction; +import org.opensearch.common.CheckedTriFunction; import org.opensearch.common.SetOnce; import org.opensearch.common.TriFunction; import org.opensearch.common.annotation.ExperimentalApi; @@ -74,6 +75,7 @@ import org.opensearch.index.engine.Engine; import org.opensearch.index.engine.EngineConfigFactory; import org.opensearch.index.engine.EngineFactory; +import org.opensearch.index.engine.exec.DataFormatAwareEngineFactory; import org.opensearch.index.mapper.MapperService; import org.opensearch.index.shard.IndexEventListener; import org.opensearch.index.shard.IndexShard; @@ -714,6 +716,10 @@ public IndexService newIndexService( ); } + /** + * @deprecated Use the overload that accepts a {@code compositeEngineFactorySupplier} parameter. + */ + @Deprecated public IndexService newIndexService( IndexService.IndexCreationContext indexCreationContext, NodeEnvironment environment, @@ -742,6 +748,73 @@ public IndexService newIndexService( Function segmentReplicationStatsProvider, Supplier clusterDefaultMaxMergeAtOnceSupplier, ClusterMergeSchedulerConfig clusterMergeSchedulerConfig + ) throws IOException { + return newIndexService( + indexCreationContext, + environment, + xContentRegistry, + shardStoreDeleter, + circuitBreakerService, + bigArrays, + threadPool, + scriptService, + clusterService, + client, + indicesQueryCache, + mapperRegistry, + indicesFieldDataCache, + namedWriteableRegistry, + idFieldDataEnabled, + valuesSourceRegistry, + remoteDirectoryFactory, + translogFactorySupplier, + clusterDefaultRefreshIntervalSupplier, + fixedRefreshIntervalSchedulingEnabled, + shardLevelRefreshEnabled, + recoverySettings, + remoteStoreSettings, + replicator, + segmentReplicationStatsProvider, + clusterDefaultMaxMergeAtOnceSupplier, + clusterMergeSchedulerConfig, + null + ); + } + + public IndexService newIndexService( + IndexService.IndexCreationContext indexCreationContext, + NodeEnvironment environment, + NamedXContentRegistry xContentRegistry, + IndexService.ShardStoreDeleter shardStoreDeleter, + CircuitBreakerService circuitBreakerService, + BigArrays bigArrays, + ThreadPool threadPool, + ScriptService scriptService, + ClusterService clusterService, + Client client, + IndicesQueryCache indicesQueryCache, + MapperRegistry mapperRegistry, + IndicesFieldDataCache indicesFieldDataCache, + NamedWriteableRegistry namedWriteableRegistry, + BooleanSupplier idFieldDataEnabled, + ValuesSourceRegistry valuesSourceRegistry, + IndexStorePlugin.DirectoryFactory remoteDirectoryFactory, + BiFunction translogFactorySupplier, + Supplier clusterDefaultRefreshIntervalSupplier, + Supplier fixedRefreshIntervalSchedulingEnabled, + Supplier shardLevelRefreshEnabled, + RecoverySettings recoverySettings, + RemoteStoreSettings remoteStoreSettings, + Consumer replicator, + Function segmentReplicationStatsProvider, + Supplier clusterDefaultMaxMergeAtOnceSupplier, + ClusterMergeSchedulerConfig clusterMergeSchedulerConfig, + CheckedTriFunction< + ShardPath, + MapperService, + IndexSettings, + DataFormatAwareEngineFactory, + IOException> dataFormatAwareEngineFactorySupplier ) throws IOException { final IndexEventListener eventListener = freeze(); Function> readerWrapperFactory = indexReaderWrapper @@ -814,7 +887,8 @@ public IndexService newIndexService( replicator, segmentReplicationStatsProvider, clusterDefaultMaxMergeAtOnceSupplier, - clusterMergeSchedulerConfig + clusterMergeSchedulerConfig, + dataFormatAwareEngineFactorySupplier ); success = true; return indexService; diff --git a/server/src/main/java/org/opensearch/index/IndexService.java b/server/src/main/java/org/opensearch/index/IndexService.java index 2a862dd94b43e..79ccb429f9b7b 100644 --- a/server/src/main/java/org/opensearch/index/IndexService.java +++ b/server/src/main/java/org/opensearch/index/IndexService.java @@ -47,6 +47,7 @@ import org.opensearch.cluster.routing.ShardRouting; import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.CheckedFunction; +import org.opensearch.common.CheckedTriFunction; import org.opensearch.common.Nullable; import org.opensearch.common.annotation.InternalApi; import org.opensearch.common.annotation.PublicApi; @@ -78,6 +79,7 @@ import org.opensearch.index.engine.EngineConfigFactory; import org.opensearch.index.engine.EngineFactory; import org.opensearch.index.engine.MergedSegmentWarmerFactory; +import org.opensearch.index.engine.exec.DataFormatAwareEngineFactory; import org.opensearch.index.fielddata.IndexFieldDataCache; import org.opensearch.index.fielddata.IndexFieldDataService; import org.opensearch.index.mapper.MapperService; @@ -209,6 +211,12 @@ public class IndexService extends AbstractIndexComponent implements IndicesClust private volatile TimeValue refreshInterval; private volatile boolean shardLevelRefreshEnabled; private final IndexStorePlugin.StoreFactory storeFactory; + private final CheckedTriFunction< + ShardPath, + MapperService, + IndexSettings, + DataFormatAwareEngineFactory, + IOException> dataFormatAwareEngineFactorySupplier; @InternalApi public IndexService( @@ -255,7 +263,13 @@ public IndexService( Consumer replicator, Function segmentReplicationStatsProvider, Supplier clusterDefaultMaxMergeAtOnceSupplier, - ClusterMergeSchedulerConfig clusterMergeSchedulerConfig + ClusterMergeSchedulerConfig clusterMergeSchedulerConfig, + CheckedTriFunction< + ShardPath, + MapperService, + IndexSettings, + DataFormatAwareEngineFactory, + IOException> dataFormatAwareEngineFactorySupplier ) { super(indexSettings); this.storeFactory = storeFactory; @@ -366,6 +380,7 @@ public IndexService( startIndexLevelRefreshTask(); } } + this.dataFormatAwareEngineFactorySupplier = dataFormatAwareEngineFactorySupplier; } @InternalApi @@ -454,7 +469,8 @@ public IndexService( s -> {}, (shardId) -> ReplicationStats.empty(), clusterDefaultMaxMergeAtOnce, - clusterMergeSchedulerConfig + clusterMergeSchedulerConfig, + null ); } @@ -775,6 +791,9 @@ protected void closeInternal() { directoryFactory ); eventListener.onStoreCreated(shardId); + DataFormatAwareEngineFactory dataFormatAwareEngineFactory = dataFormatAwareEngineFactorySupplier != null + ? dataFormatAwareEngineFactorySupplier.apply(path, mapperService, this.indexSettings) + : null; indexShard = new IndexShard( routing, this.indexSettings, @@ -813,7 +832,8 @@ protected void closeInternal() { refreshMutex, clusterService.getClusterApplierService(), this.indexSettings.isSegRepEnabledOrRemoteNode() ? mergedSegmentPublisher : null, - this.indexSettings.isSegRepEnabledOrRemoteNode() ? referencedSegmentsPublisher : null + this.indexSettings.isSegRepEnabledOrRemoteNode() ? referencedSegmentsPublisher : null, + dataFormatAwareEngineFactory ); eventListener.indexShardStateChanged(indexShard, null, indexShard.state(), "shard created"); eventListener.afterIndexShardCreated(indexShard); diff --git a/server/src/main/java/org/opensearch/index/engine/DataFormatAwareEngine.java b/server/src/main/java/org/opensearch/index/engine/DataFormatAwareEngine.java new file mode 100644 index 0000000000000..58cdc4c8d3d2e --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/DataFormatAwareEngine.java @@ -0,0 +1,143 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine; + +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.engine.dataformat.DataFormat; +import org.opensearch.index.engine.exec.CatalogSnapshot; +import org.opensearch.index.engine.exec.DataFormatAwareEngineFactory; +import org.opensearch.index.engine.exec.EngineReaderManager; + +import java.io.Closeable; +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * Owns all reader managers, lazily creates search engines, index filter providers + * and source providers per data format. + *

+ * Instances are created by {@link DataFormatAwareEngineFactory}. + * + * @opensearch.experimental + */ +@ExperimentalApi +public class DataFormatAwareEngine implements Closeable { + + private final Map> readerManagers; + private volatile CatalogSnapshot latestSnapshot; + + /** + * Constructs a new DataFormatAwareEngine with pre-built maps. + * Prefer using {@link DataFormatAwareEngineFactory#create()}. + */ + public DataFormatAwareEngine(Map> readerManagers) { + this.readerManagers = readerManagers; + } + + public EngineReaderManager getReaderManager(DataFormat format) { + return readerManagers.get(format); + } + + /** + * Called by the catalog snapshot lifecycle listener after a refresh + * to update the latest searchable snapshot. + */ + public void setLatestSnapshot(CatalogSnapshot snapshot) { + CatalogSnapshot prev = this.latestSnapshot; + this.latestSnapshot = snapshot; + if (prev != null) { + prev.decRef(); + } + } + + /** + * Acquires a DataFormatAwareReader on the latest catalog snapshot. + * The snapshot is incRef'd; the caller MUST close the returned + * {@link DataFormatAwareReader} when done, which decRef's the snapshot. + */ + public DataFormatAwareReader acquireReader() throws IOException { + CatalogSnapshot snapshot = latestSnapshot; + if (snapshot == null) { + throw new IllegalStateException("No catalog snapshot available"); + } + return acquireReader(snapshot); + } + + /** + * Acquires a dataFormatAwareReader on a specific catalog snapshot. + */ + public DataFormatAwareReader acquireReader(CatalogSnapshot catalogSnapshot) throws IOException { + catalogSnapshot.incRef(); + try { + Map readers = new HashMap<>(); + for (Map.Entry> entry : readerManagers.entrySet()) { + Object reader = entry.getValue().getReader(catalogSnapshot); + if (reader != null) { + readers.put(entry.getKey(), reader); + } + } + return new DataFormatAwareReader(catalogSnapshot, readers); + } catch (Exception e) { + catalogSnapshot.decRef(); + throw e; + } + } + + /** + * A catalog-snapshot-backed data-format aware reader providing per-format reader access. + * Closing this reader releases the catalog snapshot reference. + */ + @ExperimentalApi + public static class DataFormatAwareReader implements Closeable { + private final CatalogSnapshot catalogSnapshot; + private final Map readers; + + DataFormatAwareReader(CatalogSnapshot catalogSnapshot, Map readers) { + this.catalogSnapshot = catalogSnapshot; + this.readers = readers; + } + + public Object getReader(DataFormat format) { + return readers.get(format); + } + + public CatalogSnapshot getCatalogSnapshot() { + return catalogSnapshot; + } + + @Override + public void close() { + catalogSnapshot.decRef(); + } + } + + @Override + public void close() throws IOException { + List exceptions = new ArrayList<>(); + for (EngineReaderManager rm : readerManagers.values()) { + if (rm instanceof Closeable) { + try { + ((Closeable) rm).close(); + } catch (Exception e) { + exceptions.add(e); + } + } + } + if (exceptions.isEmpty() == false) { + IOException ioException = new IOException("Failed to close DataFormatAwareEngine resources"); + for (Exception e : exceptions) { + ioException.addSuppressed(e); + } + throw ioException; + } + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/IndexFilterTree.java b/server/src/main/java/org/opensearch/index/engine/IndexFilterTree.java new file mode 100644 index 0000000000000..c918aeaa5c704 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/IndexFilterTree.java @@ -0,0 +1,31 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine; + +import org.opensearch.common.annotation.ExperimentalApi; + +import java.io.Closeable; +import java.io.IOException; + +/** + * Boolean tree structure for multi-engine query decomposition. + *

+ * Wraps the root node and provides compact array + * serialization for JNI transport to the Rust layer. + *

+ * + * @opensearch.experimental + */ +@ExperimentalApi +public class IndexFilterTree implements Closeable { + + // TODO + @Override + public void close() throws IOException {} +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/CatalogSnapshot.java b/server/src/main/java/org/opensearch/index/engine/exec/CatalogSnapshot.java index 90207e58cd1f5..80abcb59eccbe 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/CatalogSnapshot.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/CatalogSnapshot.java @@ -10,6 +10,7 @@ import org.opensearch.common.annotation.ExperimentalApi; import org.opensearch.common.util.concurrent.AbstractRefCounted; +import org.opensearch.index.engine.dataformat.DataFormat; import java.io.IOException; import java.util.Collection; @@ -133,4 +134,6 @@ public CatalogSnapshot cloneNoAcquire() { * @param b additional boolean parameter for implementation-specific behavior */ public abstract void setUserData(Map userData, boolean b); + + public abstract Object getReader(DataFormat dataFormat); } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/CatalogSnapshotLifecycleListener.java b/server/src/main/java/org/opensearch/index/engine/exec/CatalogSnapshotLifecycleListener.java new file mode 100644 index 0000000000000..e0a40709acf33 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/CatalogSnapshotLifecycleListener.java @@ -0,0 +1,55 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec; + +import org.opensearch.common.annotation.ExperimentalApi; + +import java.io.IOException; + +/** + * Unified lifecycle listener for catalog snapshots. + *

+ * Combines refresh notifications (create/update) and delete notifications + * into a single interface so plugins only need to wire one listener. + * + * @opensearch.experimental + */ +@ExperimentalApi +public interface CatalogSnapshotLifecycleListener { + + /** Singleton that silently ignores every callback. */ + CatalogSnapshotLifecycleListener NOOP = new CatalogSnapshotLifecycleListener() { + @Override + public void beforeRefresh() {} + + @Override + public void afterRefresh(boolean didRefresh, CatalogSnapshot catalogSnapshot) {} + + @Override + public void onDeleted(CatalogSnapshot catalogSnapshot) {} + }; + + /** + * Called before a refresh operation. + */ + void beforeRefresh() throws IOException; + + /** + * Called after a refresh operation with the resulting catalog snapshot. + * @param didRefresh whether the refresh actually occurred + * @param catalogSnapshot the current catalog snapshot with file information + */ + void afterRefresh(boolean didRefresh, CatalogSnapshot catalogSnapshot) throws IOException; + + /** + * Called when a catalog snapshot is deleted. + * @param catalogSnapshot the snapshot being deleted + */ + void onDeleted(CatalogSnapshot catalogSnapshot) throws IOException; +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/CollectorQueryLifecycleManager.java b/server/src/main/java/org/opensearch/index/engine/exec/CollectorQueryLifecycleManager.java new file mode 100644 index 0000000000000..da24f5d7757e5 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/CollectorQueryLifecycleManager.java @@ -0,0 +1,90 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec; + +import org.opensearch.common.annotation.ExperimentalApi; + +import java.io.Closeable; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicInteger; + +/** + * Manages the lifecycle of {@link SegmentCollector} instances for a single query. + *

+ * Provides a JNI-friendly primitives-only API: callers receive an {@code int} key + * from {@link #registerCollector} and use it to invoke {@link #collectDocs} and + * {@link #releaseCollector}. Java owns the collector state; the native (Rust) side + * only holds lightweight int keys. + *

+ * One manager is created per query and closed when the query finishes. + * {@link #close()} acts as a safety net, releasing any collectors that were not + * explicitly released by the caller. + * + * @opensearch.experimental + */ +@ExperimentalApi +public class CollectorQueryLifecycleManager implements Closeable { + + private final AtomicInteger nextKey = new AtomicInteger(1); + private final Map collectors = new ConcurrentHashMap<>(); + + /** + * Registers a collector and returns its int key. + * + * @param collector the segment collector to manage + * @return a unique key that identifies this collector + */ + public int registerCollector(SegmentCollector collector) { + int key = nextKey.getAndIncrement(); + collectors.put(key, collector); + return key; + } + + /** + * Collects matching document IDs for the collector identified by {@code key}. + * + * @param key the collector key returned by {@link #registerCollector} + * @param minDoc inclusive lower bound + * @param maxDoc exclusive upper bound + * @return packed {@code long[]} bitset of matching doc IDs, or empty array if key is invalid + */ + public long[] collectDocs(int key, int minDoc, int maxDoc) { + SegmentCollector collector = collectors.get(key); + if (collector == null) { + return new long[0]; + } + return collector.collectDocs(minDoc, maxDoc); + } + + /** + * Releases the collector identified by {@code key}, closing it and + * removing it from the registry. + * + * @param key the collector key returned by {@link #registerCollector} + */ + public void releaseCollector(int key) { + SegmentCollector collector = collectors.remove(key); + if (collector != null) { + collector.close(); + } + } + + /** + * Closes all remaining collectors. Acts as a safety net for any + * collectors that were not explicitly released. + */ + @Override + public void close() { + for (SegmentCollector collector : collectors.values()) { + collector.close(); + } + collectors.clear(); + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/DataFormatAwareEngineFactory.java b/server/src/main/java/org/opensearch/index/engine/exec/DataFormatAwareEngineFactory.java new file mode 100644 index 0000000000000..d4c69da30a652 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/DataFormatAwareEngineFactory.java @@ -0,0 +1,70 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec; + +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.IndexSettings; +import org.opensearch.index.engine.DataFormatAwareEngine; +import org.opensearch.index.engine.dataformat.DataFormat; +import org.opensearch.index.mapper.MapperService; +import org.opensearch.index.shard.ShardPath; +import org.opensearch.plugins.PluginsService; +import org.opensearch.plugins.SearchBackEndPlugin; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +/** + * Factory that discovers {@link SearchBackEndPlugin}s via + * {@link PluginsService} and builds the per-format reader managers consumed by {@link DataFormatAwareEngine}. + *

+ * This keeps DataformatAwareEngine decoupled from the plugin layer. + * + * @opensearch.experimental + */ +@ExperimentalApi +public class DataFormatAwareEngineFactory { + + private final Map> readerManagers = new HashMap<>(); + private final IndexFileDeleter indexFileDeleter; + + public DataFormatAwareEngineFactory( + PluginsService pluginsService, + ShardPath shardPath, + MapperService mapperService, + IndexSettings indexSettings + ) throws IOException { + for (SearchBackEndPlugin plugin : pluginsService.filterPlugins(SearchBackEndPlugin.class)) { + for (DataFormat format : plugin.getSupportedFormats()) { + // TODO: use mapperService and indexSettings to filter formats relevant to this index + readerManagers.put(format, plugin.createReaderManager(format, shardPath)); + } + } + this.indexFileDeleter = new IndexFileDeleter(null, shardPath); + } + + /** + * Creates a new {@link DataFormatAwareEngine} populated with the discovered + * reader managers and memoizing suppliers. + */ + public DataFormatAwareEngine create() { + return new DataFormatAwareEngine(readerManagers); + } + + /** + * Creates a {@link CatalogSnapshotLifecycleListener} that routes events + * through the {@link IndexFileDeleter} and fans out to the given reader managers. + * + * @param readerManagers the per-format reader managers that receive notifications + */ + public CatalogSnapshotLifecycleListener createCatalogSnapshotListener(Map> readerManagers) { + return new DataFormatEngineCatalogSnapshotListener(readerManagers, indexFileDeleter); + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/DataFormatEngineCatalogSnapshotListener.java b/server/src/main/java/org/opensearch/index/engine/exec/DataFormatEngineCatalogSnapshotListener.java new file mode 100644 index 0000000000000..85e247bd29fd1 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/DataFormatEngineCatalogSnapshotListener.java @@ -0,0 +1,88 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec; + +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.engine.dataformat.DataFormat; + +import java.io.IOException; +import java.util.Collection; +import java.util.Map; + +/** + * Routes {@link CatalogSnapshotLifecycleListener} events through the + * {@link IndexFileDeleter} and then fans out to the per-format + * {@link EngineReaderManager}s. + *

+ * Keeps lifecycle orchestration separate from the engine's component + * registry responsibilities. + * + * @opensearch.experimental + */ +@ExperimentalApi +public class DataFormatEngineCatalogSnapshotListener implements CatalogSnapshotLifecycleListener { + + private final Map> readerManagers; + private final IndexFileDeleter indexFileDeleter; + + public DataFormatEngineCatalogSnapshotListener( + Map> readerManagers, + IndexFileDeleter indexFileDeleter + ) { + this.readerManagers = readerManagers; + this.indexFileDeleter = indexFileDeleter; + } + + @Override + public void beforeRefresh() throws IOException { + for (CatalogSnapshotLifecycleListener listener : readerManagers.values()) { + listener.beforeRefresh(); + } + } + + @Override + public void afterRefresh(boolean didRefresh, CatalogSnapshot catalogSnapshot) throws IOException { + Map> newFiles = indexFileDeleter.addFileReferences(catalogSnapshot); + if (newFiles.isEmpty() == false) { + notifyFilesAdded(newFiles); + } + for (CatalogSnapshotLifecycleListener listener : readerManagers.values()) { + listener.afterRefresh(didRefresh, catalogSnapshot); + } + } + + @Override + public void onDeleted(CatalogSnapshot catalogSnapshot) throws IOException { + Map> deletedFiles = indexFileDeleter.removeFileReferences(catalogSnapshot); + if (deletedFiles.isEmpty() == false) { + notifyFilesDeleted(deletedFiles); + } + for (CatalogSnapshotLifecycleListener listener : readerManagers.values()) { + listener.onDeleted(catalogSnapshot); + } + } + + private void notifyFilesAdded(Map> filesByFormat) throws IOException { + for (Map.Entry> entry : filesByFormat.entrySet()) { + EngineReaderManager rm = readerManagers.get(entry.getKey()); + if (rm != null) { + rm.onFilesAdded(entry.getValue()); + } + } + } + + private void notifyFilesDeleted(Map> filesByFormat) throws IOException { + for (Map.Entry> entry : filesByFormat.entrySet()) { + EngineReaderManager rm = readerManagers.get(entry.getKey()); + if (rm != null) { + rm.onFilesDeleted(entry.getValue()); + } + } + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/EngineReaderManager.java b/server/src/main/java/org/opensearch/index/engine/exec/EngineReaderManager.java new file mode 100644 index 0000000000000..b420dd6299471 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/EngineReaderManager.java @@ -0,0 +1,27 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec; + +import org.opensearch.common.annotation.ExperimentalApi; + +import java.io.IOException; + +/** + * Engine-agnostic reader manager. + *

+ * For Lucene, wraps {@code ReferenceManager}. + * For pluggable engines, wraps the engine-specific reader lifecycle. + * + * @param the reader type managed by this instance + * @opensearch.experimental + */ +@ExperimentalApi +public interface EngineReaderManager extends CatalogSnapshotLifecycleListener, FilesListener { + T getReader(CatalogSnapshot catalogSnapshot) throws IOException; +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/EngineSearcher.java b/server/src/main/java/org/opensearch/index/engine/exec/EngineSearcher.java new file mode 100644 index 0000000000000..4df7df733ea6d --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/EngineSearcher.java @@ -0,0 +1,35 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec; + +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.common.lease.Releasable; +import org.opensearch.search.SearchExecutionContext; + +import java.io.IOException; + +/** + * Engine-agnostic searcher interface. + *

+ * Each engine implementation provides its own searcher that knows how to + * execute queries against its reader. The searcher is acquired from + * the search execution engine and used to execute searches against a + * point-in-time snapshot. + * + * @param the context type this searcher operates on + * @opensearch.experimental + */ +@ExperimentalApi +public interface EngineSearcher extends Releasable { + + /** + * Execute a search using this searcher, populating results on the context. + */ + void search(C context) throws IOException; +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/FileMetadata.java b/server/src/main/java/org/opensearch/index/engine/exec/FileMetadata.java new file mode 100644 index 0000000000000..71b85e0c2a4c6 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/FileMetadata.java @@ -0,0 +1,106 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec; + +import org.opensearch.common.annotation.ExperimentalApi; + +import java.util.Objects; + +/** + * Represents metadata for a file in the index, including its data format and filename. + * Files can be in different formats (e.g., "lucene", "metadata") and this class provides + * a unified way to represent and serialize file information across the system. + * + * @opensearch.experimental + */ +@ExperimentalApi +public class FileMetadata { + + /** + * Delimiter used to separate filename and data format in serialized form. + */ + public static final String DELIMITER = ":::"; + private static final String METADATA_KEY = "metadata"; + + private final String file; + private final String dataFormat; + + /** + * Constructs a FileMetadata with explicit data format and filename. + * + * @param dataFormat the data format identifier (e.g., "lucene", "metadata") + * @param file the filename + */ + public FileMetadata(String dataFormat, String file) { + this.file = file; + this.dataFormat = dataFormat; + } + + /** + * Constructs a FileMetadata by parsing a serialized data-format-aware filename. + * The format is "filename:::dataFormat". If no delimiter is present and the filename + * starts with "metadata", it's treated as a metadata file. Otherwise, defaults to "lucene". + * + * @param dataFormatAwareFile the serialized filename with optional data format + */ + public FileMetadata(String dataFormatAwareFile) { + if (!dataFormatAwareFile.contains(DELIMITER) && dataFormatAwareFile.startsWith(METADATA_KEY)) { + this.dataFormat = "metadata"; + this.file = dataFormatAwareFile; + return; + } + String[] parts = dataFormatAwareFile.split(DELIMITER); + this.dataFormat = (parts.length == 1) ? "lucene" : parts[1]; + this.file = parts[0]; + } + + /** + * Serializes this FileMetadata to a string in the format "filename:::dataFormat". + * + * @return the serialized representation + */ + public String serialize() { + return file + DELIMITER + dataFormat; + } + + @Override + public String toString() { + return serialize(); + } + + /** + * Returns the filename. + * + * @return the filename + */ + public String file() { + return file; + } + + /** + * Returns the data format identifier. + * + * @return the data format (e.g., "lucene", "metadata") + */ + public String dataFormat() { + return dataFormat; + } + + @Override + public boolean equals(Object o) { + if (o == null || getClass() != o.getClass()) return false; + FileMetadata that = (FileMetadata) o; + return Objects.equals(file, that.file) && Objects.equals(dataFormat, that.dataFormat); + } + + @Override + public int hashCode() { + return Objects.hash(file, dataFormat); + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/FilesListener.java b/server/src/main/java/org/opensearch/index/engine/exec/FilesListener.java new file mode 100644 index 0000000000000..9a8ccbe35e082 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/FilesListener.java @@ -0,0 +1,24 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec; + +import org.opensearch.common.annotation.ExperimentalApi; + +import java.io.IOException; +import java.util.Collection; + +/** + * Listener for lifecycle of files + */ +@ExperimentalApi +public interface FilesListener { + void onFilesDeleted(Collection files) throws IOException; + + void onFilesAdded(Collection files) throws IOException; +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/IndexFileDeleter.java b/server/src/main/java/org/opensearch/index/engine/exec/IndexFileDeleter.java new file mode 100644 index 0000000000000..61507b7ffe9d7 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/IndexFileDeleter.java @@ -0,0 +1,122 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec; + +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.engine.DataFormatAwareEngine; +import org.opensearch.index.engine.dataformat.DataFormat; +import org.opensearch.index.shard.ShardPath; + +import java.io.IOException; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicInteger; + +/** + * Tracks per-format file reference counts and computes which files are newly + * added or fully dereferenced after catalog snapshot changes. + *

+ * This class does not notify reader managers itself — it returns the + * computed change sets so the caller ({@link DataFormatAwareEngine}) + * can route notifications to the appropriate reader managers. + * + * @opensearch.experimental + */ +@ExperimentalApi +public class IndexFileDeleter { + + private final Map> fileRefCounts = new ConcurrentHashMap<>(); + + public IndexFileDeleter(CatalogSnapshot initialCatalogSnapshot, ShardPath shardPath) throws IOException { + if (initialCatalogSnapshot != null) { + addFileReferences(initialCatalogSnapshot); + deleteUnreferencedFiles(shardPath); + } + } + + /** + * Increments reference counts for all files in the snapshot. + * + * @return files whose reference count went from 0 → 1 (newly added), grouped by format. + * Returns an empty map when there are no new files. + */ + public synchronized Map> addFileReferences(CatalogSnapshot snapshot) { + Map> dfSegregatedFiles = segregateFilesByFormat(snapshot); + Map> dfNewFiles = new HashMap<>(); + + for (Map.Entry> entry : dfSegregatedFiles.entrySet()) { + DataFormat dataFormat = entry.getKey(); + Collection newFiles = new HashSet<>(); + Map dfFileRefCounts = fileRefCounts.computeIfAbsent(dataFormat, k -> new HashMap<>()); + Collection files = entry.getValue(); + for (String file : files) { + AtomicInteger refCount = dfFileRefCounts.computeIfAbsent(file, k -> new AtomicInteger(0)); + if (refCount.incrementAndGet() == 1) { + newFiles.add(file); + } + } + if (newFiles.isEmpty() == false) { + dfNewFiles.put(dataFormat, newFiles); + } + } + + return dfNewFiles.isEmpty() ? Collections.emptyMap() : dfNewFiles; + } + + /** + * Decrements reference counts for all files in the snapshot. + * + * @return files whose reference count reached 0 (ready for deletion), grouped by format. + * Returns an empty map when there are no files to delete. + */ + public synchronized Map> removeFileReferences(CatalogSnapshot snapshot) { + Map> dfSegregatedFiles = segregateFilesByFormat(snapshot); + Map> dfFilesToDelete = new HashMap<>(); + + for (Map.Entry> entry : dfSegregatedFiles.entrySet()) { + DataFormat dataFormat = entry.getKey(); + Collection filesToDelete = new HashSet<>(); + Map dfFileRefCounts = fileRefCounts.get(dataFormat); + if (dfFileRefCounts != null) { + Collection files = entry.getValue(); + for (String file : files) { + AtomicInteger refCount = dfFileRefCounts.get(file); + if (refCount != null && refCount.decrementAndGet() == 0) { + dfFileRefCounts.remove(file); + filesToDelete.add(file); + } + } + } + if (filesToDelete.isEmpty() == false) { + dfFilesToDelete.put(dataFormat, filesToDelete); + } + } + + return dfFilesToDelete.isEmpty() ? Collections.emptyMap() : dfFilesToDelete; + } + + private Map> segregateFilesByFormat(CatalogSnapshot snapshot) { + Map> dfSegregatedFiles = new HashMap<>(); + // TODO + return dfSegregatedFiles; + } + + private void deleteUnreferencedFiles(ShardPath shardPath) throws IOException { + // TODO + } + + @Override + public String toString() { + return "IndexFileDeleter{fileRefCounts=" + fileRefCounts + "}"; + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/IndexFilterContext.java b/server/src/main/java/org/opensearch/index/engine/exec/IndexFilterContext.java new file mode 100644 index 0000000000000..36df32ca57ec1 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/IndexFilterContext.java @@ -0,0 +1,26 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec; + +import org.opensearch.common.annotation.ExperimentalApi; + +import java.io.Closeable; + +/** + * Context to maintain state for index filters + * + * @opensearch.experimental + */ +@ExperimentalApi +public interface IndexFilterContext extends Closeable { + + int segmentCount(); + + int segmentMaxDoc(int segmentOrd); +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/IndexFilterProvider.java b/server/src/main/java/org/opensearch/index/engine/exec/IndexFilterProvider.java new file mode 100644 index 0000000000000..2d5224c48d162 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/IndexFilterProvider.java @@ -0,0 +1,34 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec; + +import org.opensearch.common.annotation.ExperimentalApi; + +import java.io.Closeable; +import java.io.IOException; + +/** + * Provides index-level filtering (partition pruning, segment filtering) for a given data format. + * + * @param the query type (e.g. Lucene Query) + * @param the context type + * @param the engine-specific reader type + * @opensearch.experimental + */ +@ExperimentalApi +public interface IndexFilterProvider extends Closeable { + + C createContext(Q query, ReaderT reader) throws IOException; + + int createCollector(C context, int segmentOrd, int minDoc, int maxDoc); + + long[] collectDocs(C context, int collectorKey, int minDoc, int maxDoc); + + void releaseCollector(C context, int collectorKey); +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/SegmentCollector.java b/server/src/main/java/org/opensearch/index/engine/exec/SegmentCollector.java new file mode 100644 index 0000000000000..772244d88436f --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/SegmentCollector.java @@ -0,0 +1,37 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec; + +import org.opensearch.common.annotation.ExperimentalApi; + +import java.io.Closeable; + +/** + * A per-segment document collector returned by + * {@link IndexFilterProvider#createCollector}. + *

+ * Callers should use try-with-resources to ensure cleanup. + * + * @opensearch.experimental + */ +@ExperimentalApi +public interface SegmentCollector extends Closeable { + + /** + * Collect matching document IDs in the given range. + * + * @param minDoc inclusive lower bound + * @param maxDoc exclusive upper bound + * @return packed {@code long[]} bitset of matching doc IDs + */ + long[] collectDocs(int minDoc, int maxDoc); + + @Override + default void close() {} +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/SourceContext.java b/server/src/main/java/org/opensearch/index/engine/exec/SourceContext.java new file mode 100644 index 0000000000000..7bbfaadec8957 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/SourceContext.java @@ -0,0 +1,24 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec; + +import org.opensearch.common.annotation.ExperimentalApi; + +import java.io.Closeable; + +/** + * Context for a source provider execution. + * + * @opensearch.experimental + */ +@ExperimentalApi +public interface SourceContext extends Closeable { + + Object query(); +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/SourceProvider.java b/server/src/main/java/org/opensearch/index/engine/exec/SourceProvider.java new file mode 100644 index 0000000000000..ddddcd4157940 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/SourceProvider.java @@ -0,0 +1,31 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec; + +import org.opensearch.common.annotation.ExperimentalApi; + +import java.io.Closeable; +import java.io.IOException; +import java.util.Iterator; + +/** + * Provides source-field data for a given data format. + * + * @param the context type + * @param the result batch type + * @param the engine-specific reader type + * @opensearch.experimental + */ +@ExperimentalApi +public interface SourceProvider extends Closeable { + + C createContext(Object query, ReaderT reader) throws IOException; + + Iterator execute(C context) throws IOException; +} diff --git a/server/src/main/java/org/opensearch/index/shard/IndexShard.java b/server/src/main/java/org/opensearch/index/shard/IndexShard.java index 1c155c897acba..44d99b06b8bf0 100644 --- a/server/src/main/java/org/opensearch/index/shard/IndexShard.java +++ b/server/src/main/java/org/opensearch/index/shard/IndexShard.java @@ -129,6 +129,7 @@ import org.opensearch.index.cache.request.ShardRequestCache; import org.opensearch.index.codec.CodecService; import org.opensearch.index.engine.CommitStats; +import org.opensearch.index.engine.DataFormatAwareEngine; import org.opensearch.index.engine.Engine; import org.opensearch.index.engine.Engine.GetResult; import org.opensearch.index.engine.EngineBackedIndexer; @@ -144,6 +145,7 @@ import org.opensearch.index.engine.SafeCommitInfo; import org.opensearch.index.engine.Segment; import org.opensearch.index.engine.SegmentsStats; +import org.opensearch.index.engine.exec.DataFormatAwareEngineFactory; import org.opensearch.index.engine.exec.Indexer; import org.opensearch.index.fielddata.FieldDataStats; import org.opensearch.index.fielddata.ShardFieldData; @@ -316,6 +318,7 @@ public class IndexShard extends AbstractIndexShardComponent implements IndicesCl private volatile long pendingPrimaryTerm; // see JavaDocs for getPendingPrimaryTerm private final Object engineMutex = new Object(); // lock ordering: engineMutex -> mutex private final AtomicReference currentEngineReference = new AtomicReference<>(); + private final AtomicReference currentCompositeEngineReference = new AtomicReference<>(); final EngineFactory engineFactory; final EngineConfigFactory engineConfigFactory; @@ -404,6 +407,8 @@ Runnable getGlobalCheckpointSyncer() { // Used to limit the number of concurrent translog tasks. When the semaphore is exhausted, serial recovery is used. private static final Semaphore translogConcurrentRecoverySemaphore = new Semaphore(1000); + private final DataFormatAwareEngineFactory dataFormatAwareEngineFactory; + @InternalApi public IndexShard( final ShardRouting shardRouting, @@ -443,7 +448,8 @@ public IndexShard( final Object refreshMutex, final ClusterApplierService clusterApplierService, @Nullable final MergedSegmentPublisher mergedSegmentPublisher, - @Nullable final ReferencedSegmentsPublisher referencedSegmentsPublisher + @Nullable final ReferencedSegmentsPublisher referencedSegmentsPublisher, + @Nullable final DataFormatAwareEngineFactory dataFormatAwareEngineFactory ) throws IOException { super(shardRouting.shardId(), indexSettings); assert shardRouting.initializing(); @@ -569,6 +575,10 @@ public boolean shouldCache(Query query) { startRefreshTask(); } } + this.dataFormatAwareEngineFactory = dataFormatAwareEngineFactory; + if (dataFormatAwareEngineFactory != null) { + this.currentCompositeEngineReference.set(dataFormatAwareEngineFactory.create()); + } } /** @@ -2204,6 +2214,20 @@ public Engine.Searcher acquireSearcher(String source) { return acquireSearcher(source, Engine.SearcherScope.EXTERNAL); } + /** + * Returns the current CompositeEngine, or null if no optimized index is active. + */ + public DataFormatAwareEngine getCompositeEngine() { + return currentCompositeEngineReference.get(); + } + + /** + * Sets the CompositeEngine for this shard (called during shard initialization for optimized indexes). + */ + public void setCompositeEngine(DataFormatAwareEngine dataFormatAwareEngine) { + currentCompositeEngineReference.set(dataFormatAwareEngine); + } + private void markSearcherAccessed() { lastSearcherAccess.lazySet(threadPool.relativeTimeInMillis()); } diff --git a/server/src/main/java/org/opensearch/indices/IndicesService.java b/server/src/main/java/org/opensearch/indices/IndicesService.java index 16229f12c60a8..ad33e3811f273 100644 --- a/server/src/main/java/org/opensearch/indices/IndicesService.java +++ b/server/src/main/java/org/opensearch/indices/IndicesService.java @@ -63,6 +63,7 @@ import org.opensearch.common.CheckedConsumer; import org.opensearch.common.CheckedFunction; import org.opensearch.common.CheckedSupplier; +import org.opensearch.common.CheckedTriFunction; import org.opensearch.common.Nullable; import org.opensearch.common.annotation.InternalApi; import org.opensearch.common.annotation.PublicApi; @@ -123,6 +124,7 @@ import org.opensearch.index.engine.NRTReplicationEngineFactory; import org.opensearch.index.engine.NoOpEngine; import org.opensearch.index.engine.ReadOnlyEngine; +import org.opensearch.index.engine.exec.DataFormatAwareEngineFactory; import org.opensearch.index.fielddata.IndexFieldDataCache; import org.opensearch.index.flush.FlushStats; import org.opensearch.index.get.GetStats; @@ -146,6 +148,7 @@ import org.opensearch.index.shard.IndexShardState; import org.opensearch.index.shard.IndexingOperationListener; import org.opensearch.index.shard.IndexingStats; +import org.opensearch.index.shard.ShardPath; import org.opensearch.index.store.remote.filecache.FileCache; import org.opensearch.index.translog.InternalTranslogFactory; import org.opensearch.index.translog.RemoteBlobStoreInternalTranslogFactory; @@ -424,6 +427,12 @@ public class IndicesService extends AbstractLifecycleComponent private volatile int defaultMaxMergeAtOnce; private final StatusCounterStats statusCounterStats; private final ClusterMergeSchedulerConfig clusterMergeSchedulerConfig; + private final CheckedTriFunction< + ShardPath, + MapperService, + IndexSettings, + DataFormatAwareEngineFactory, + IOException> dataFormatEngineFactorySupplier; @Override protected void doStart() { @@ -609,6 +618,12 @@ protected void closeInternal() { MergeSchedulerConfig.CLUSTER_MAX_FORCE_MERGE_MB_PER_SEC_SETTING, this::onClusterLevelForceMergeMBPerSecUpdate ); + this.dataFormatEngineFactorySupplier = (shardPath, mapperService, indexSettings) -> new DataFormatAwareEngineFactory( + pluginsService, + shardPath, + mapperService, + indexSettings + ); } @InternalApi @@ -1109,6 +1124,7 @@ private synchronized IndexService createIndexService( for (IndexEventListener listener : builtInListeners) { indexModule.addIndexEventListener(listener); } + return indexModule.newIndexService( indexCreationContext, nodeEnv, @@ -1136,7 +1152,8 @@ private synchronized IndexService createIndexService( replicator, segmentReplicationStatsProvider, this::getClusterDefaultMaxMergeAtOnce, - clusterMergeSchedulerConfig + clusterMergeSchedulerConfig, + dataFormatEngineFactorySupplier ); } diff --git a/server/src/main/java/org/opensearch/plugins/SearchBackEndPlugin.java b/server/src/main/java/org/opensearch/plugins/SearchBackEndPlugin.java new file mode 100644 index 0000000000000..65fa1c99e917c --- /dev/null +++ b/server/src/main/java/org/opensearch/plugins/SearchBackEndPlugin.java @@ -0,0 +1,30 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.plugins; + +import org.opensearch.index.engine.dataformat.DataFormat; +import org.opensearch.index.engine.exec.EngineReaderManager; +import org.opensearch.index.shard.ShardPath; + +import java.io.IOException; +import java.util.List; + +/** + * Interface for back-end query engines. + * + * @opensearch.internal + */ +public interface SearchBackEndPlugin { + + String name(); + + List getSupportedFormats(); + + EngineReaderManager createReaderManager(DataFormat format, ShardPath shardPath) throws IOException; +} diff --git a/server/src/main/java/org/opensearch/search/SearchExecutionContext.java b/server/src/main/java/org/opensearch/search/SearchExecutionContext.java new file mode 100644 index 0000000000000..025effc3833cb --- /dev/null +++ b/server/src/main/java/org/opensearch/search/SearchExecutionContext.java @@ -0,0 +1,28 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search; + +import org.opensearch.action.search.SearchShardTask; +import org.opensearch.common.annotation.ExperimentalApi; + +import java.io.Closeable; + +/** + * Engine-agnostic search execution context. + * + * @opensearch.experimental + */ +@ExperimentalApi +public interface SearchExecutionContext extends Closeable { + + SearchShardTask task(); + + S getSearcher(); + +} diff --git a/server/src/test/java/org/opensearch/index/IndexModuleTests.java b/server/src/test/java/org/opensearch/index/IndexModuleTests.java index d3637aac98ae6..57ba262b790ea 100644 --- a/server/src/test/java/org/opensearch/index/IndexModuleTests.java +++ b/server/src/test/java/org/opensearch/index/IndexModuleTests.java @@ -281,7 +281,8 @@ private IndexService newIndexService(IndexModule module) throws IOException { s -> {}, null, () -> TieredMergePolicyProvider.DEFAULT_MAX_MERGE_AT_ONCE, - mockClusterMergeSchedulerConfig + mockClusterMergeSchedulerConfig, + null ); } diff --git a/server/src/test/java/org/opensearch/index/engine/dataformat/DataFormatPluginTests.java b/server/src/test/java/org/opensearch/index/engine/dataformat/DataFormatPluginTests.java index 117ce798494f2..0b967f718da97 100644 --- a/server/src/test/java/org/opensearch/index/engine/dataformat/DataFormatPluginTests.java +++ b/server/src/test/java/org/opensearch/index/engine/dataformat/DataFormatPluginTests.java @@ -13,6 +13,9 @@ import org.opensearch.common.settings.Settings; import org.opensearch.core.index.shard.ShardId; import org.opensearch.index.IndexSettings; +import org.opensearch.index.engine.DataFormatAwareEngine; +import org.opensearch.index.engine.exec.CatalogSnapshot; +import org.opensearch.index.engine.exec.EngineReaderManager; import org.opensearch.index.engine.exec.Segment; import org.opensearch.index.engine.exec.WriterFileSet; import org.opensearch.index.mapper.MappedFieldType; @@ -409,4 +412,316 @@ public > IndexingExecutionEngin return (IndexingExecutionEngine) new MockIndexingExecutionEngine(dataFormat); } } + + /** + * Search holds snapshot alive while refresh replaces it. + *

+ * Timeline: + * 1. new s1 → refcount = 1 (construction) + * 2. setLatestSnapshot(s1) → refcount = 1 (engine takes over construction ref) + * 3. acquireReader() → refcount = 2 (search adds ref) + * 4. setLatestSnapshot(s2) → s1 refcount = 1 (engine releases s1) + * 5. readerManager.onDeleted(s1) → reader closed, but s1 alive (search ref) + * 6. compositeReader.close() → s1 refcount = 0 → dead + */ + public void testSearchHoldsSnapshotAliveWhileRefreshDeletesFiles() throws IOException { + MockDataFormat format = new MockDataFormat(); + MockIndexingExecutionEngine indexEngine = new MockIndexingExecutionEngine(format); + + // Batch 1 + Writer w1 = indexEngine.createWriter(1L); + MockDocumentInput d1 = indexEngine.newDocumentInput(); + d1.addField(mock(MappedFieldType.class), "Alice"); + d1.setRowId("_row_id", 0); + w1.addDoc(d1); + WriterFileSet fs1 = w1.flush().getWriterFileSet(format).get(); + w1.close(); + + RefreshResult rr1 = indexEngine.refresh(RefreshInput.builder().addWriterFileSet(fs1).build()); + MockCatalogSnapshot snapshot1 = new MockCatalogSnapshot(1L, rr1.refreshedSegments(), format); + + MockReaderManager readerManager = new MockReaderManager(format.name()); + readerManager.afterRefresh(true, snapshot1); + + DataFormatAwareEngine dataFormatAwareEngine = new DataFormatAwareEngine(Map.of(format, readerManager)); + dataFormatAwareEngine.setLatestSnapshot(snapshot1); // takes over construction ref, refcount: 1 + + // Search acquires reader — refcount: 2 + DataFormatAwareEngine.DataFormatAwareReader dataFormatAwareReader = dataFormatAwareEngine.acquireReader(); + MockReader searchReader = (MockReader) dataFormatAwareReader.getReader(format); + assertEquals(1, searchReader.totalRows); + + // New refresh arrives — setLatestSnapshot(s2) decRefs s1 → refcount: 1 + Writer w2 = indexEngine.createWriter(2L); + MockDocumentInput d2 = indexEngine.newDocumentInput(); + d2.addField(mock(MappedFieldType.class), "Bob"); + d2.setRowId("_row_id", 1); + w2.addDoc(d2); + WriterFileSet fs2 = w2.flush().getWriterFileSet(format).get(); + w2.close(); + + RefreshResult rr2 = indexEngine.refresh(RefreshInput.builder().addWriterFileSet(fs1).addWriterFileSet(fs2).build()); + MockCatalogSnapshot snapshot2 = new MockCatalogSnapshot(2L, rr2.refreshedSegments(), format); + readerManager.afterRefresh(true, snapshot2); + dataFormatAwareEngine.setLatestSnapshot(snapshot2); // s1 refcount: 1 (only search ref) + + // Old snapshot deleted from reader manager — reader closes + readerManager.onDeleted(snapshot1); + assertTrue("Reader for snapshot1 closed in reader manager", searchReader.closed); + + // But snapshot1 still alive — search holds the last ref + assertTrue("Snapshot1 alive while search holds ref", snapshot1.tryIncRef()); + snapshot1.decRef(); // undo probe + + // Search completes — s1 refcount: 0 → dead + dataFormatAwareReader.close(); + assertFalse("Snapshot1 dead after search releases", snapshot1.tryIncRef()); + + // Snapshot 2 still works + try (DataFormatAwareEngine.DataFormatAwareReader cr2 = dataFormatAwareEngine.acquireReader()) { + MockReader r2 = (MockReader) cr2.getReader(format); + assertEquals(2, r2.totalRows); + } + } + + /** + * CompositeReader provides per-format reader access from a single catalog snapshot. + */ + public void testCompositeReaderMultiFormat() throws IOException { + MockDataFormat format1 = new MockDataFormat(); + DataFormat format2 = new DataFormat() { + @Override + public String name() { + return "mock-lucene"; + } + + @Override + public long priority() { + return 50L; + } + + @Override + public Set supportedFields() { + return Set.of(); + } + }; + + MockReaderManager rm1 = new MockReaderManager(format1.name()); + MockReaderManager rm2 = new MockReaderManager(format2.name()); + + Path dir = createTempDir(); + WriterFileSet wfs1 = WriterFileSet.builder().directory(dir).writerGeneration(1L).addFile("data.parquet").addNumRows(10).build(); + WriterFileSet wfs2 = WriterFileSet.builder().directory(dir).writerGeneration(1L).addFile("data.lucene").addNumRows(10).build(); + Segment seg = Segment.builder(0L).addSearchableFiles(format1, wfs1).addSearchableFiles(format2, wfs2).build(); + MockCatalogSnapshot snapshot = new MockCatalogSnapshot(1L, List.of(seg), format1) { + @Override + public Collection getSearchableFiles(String dataFormat) { + if ("mock-lucene".equals(dataFormat)) return List.of(wfs2); + return super.getSearchableFiles(dataFormat); + } + + @Override + public Set getDataFormats() { + return Set.of(format1.name(), format2.name()); + } + }; + + rm1.afterRefresh(true, snapshot); + rm2.afterRefresh(true, snapshot); + + DataFormatAwareEngine dataFormatAwareEngine = new DataFormatAwareEngine(Map.of(format1, rm1, format2, rm2)); + dataFormatAwareEngine.setLatestSnapshot(snapshot); + + try (DataFormatAwareEngine.DataFormatAwareReader cr = dataFormatAwareEngine.acquireReader()) { + MockReader r1 = (MockReader) cr.getReader(format1); + MockReader r2 = (MockReader) cr.getReader(format2); + assertNotNull(r1); + assertNotNull(r2); + assertEquals(10, r1.totalRows); + assertEquals(10, r2.totalRows); + assertTrue(r1.fileNames.contains("data.parquet")); + assertTrue(r2.fileNames.contains("data.lucene")); + } + } + + /** + * afterRefresh(false) is a no-op; duplicate afterRefresh for same snapshot reuses reader. + */ + public void testRefreshEdgeCases() throws IOException { + MockDataFormat format = new MockDataFormat(); + MockIndexingExecutionEngine indexEngine = new MockIndexingExecutionEngine(format); + + Writer w = indexEngine.createWriter(1L); + MockDocumentInput d = indexEngine.newDocumentInput(); + d.addField(mock(MappedFieldType.class), "x"); + d.setRowId("_row_id", 0); + w.addDoc(d); + WriterFileSet fs = w.flush().getWriterFileSet(format).get(); + w.close(); + + RefreshResult rr = indexEngine.refresh(RefreshInput.builder().addWriterFileSet(fs).build()); + MockCatalogSnapshot snapshot = new MockCatalogSnapshot(1L, rr.refreshedSegments(), format); + + MockReaderManager rm = new MockReaderManager(format.name()); + + rm.afterRefresh(false, snapshot); + assertNull(rm.getReader(snapshot)); + assertEquals(0, rm.readerCount()); + + rm.afterRefresh(true, snapshot); + assertNotNull(rm.getReader(snapshot)); + assertEquals(1, rm.readerCount()); + + MockReader first = rm.getReader(snapshot); + rm.afterRefresh(true, snapshot); + assertSame(first, rm.getReader(snapshot)); + assertEquals(1, rm.readerCount()); + } + + /** + * File add/delete notifications propagate through reader manager. + */ + public void testFileLifecycleNotifications() throws IOException { + MockReaderManager rm = new MockReaderManager("mock-columnar"); + + rm.onFilesAdded(List.of("a.parquet", "b.parquet")); + assertEquals(2, rm.addedFiles.size()); + assertTrue(rm.addedFiles.contains("a.parquet")); + + rm.onFilesDeleted(List.of("a.parquet")); + assertEquals(1, rm.deletedFiles.size()); + assertTrue(rm.deletedFiles.contains("a.parquet")); + } + + static class MockReader { + final List fileNames; + final long totalRows; + boolean closed; + + MockReader(List fileNames, long totalRows) { + this.fileNames = fileNames; + this.totalRows = totalRows; + } + + void close() { + closed = true; + } + } + + static class MockReaderManager implements EngineReaderManager { + private final String formatName; + private final Map readers = new HashMap<>(); + final List addedFiles = new ArrayList<>(); + final List deletedFiles = new ArrayList<>(); + + MockReaderManager(String formatName) { + this.formatName = formatName; + } + + @Override + public MockReader getReader(CatalogSnapshot snapshot) { + return readers.get(snapshot); + } + + int readerCount() { + return readers.size(); + } + + @Override + public void beforeRefresh() {} + + @Override + public void afterRefresh(boolean didRefresh, CatalogSnapshot snapshot) { + if (didRefresh == false || readers.containsKey(snapshot)) return; + Collection files = snapshot.getSearchableFiles(formatName); + List allFiles = new ArrayList<>(); + long totalRows = 0; + for (WriterFileSet wfs : files) { + allFiles.addAll(wfs.files()); + totalRows += wfs.numRows(); + } + readers.put(snapshot, new MockReader(allFiles, totalRows)); + } + + @Override + public void onDeleted(CatalogSnapshot snapshot) { + MockReader reader = readers.remove(snapshot); + if (reader != null) reader.close(); + } + + @Override + public void onFilesDeleted(Collection files) { + deletedFiles.addAll(files); + } + + @Override + public void onFilesAdded(Collection files) { + addedFiles.addAll(files); + } + } + + static class MockCatalogSnapshot extends CatalogSnapshot { + private final List segments; + private final MockDataFormat format; + + MockCatalogSnapshot(long generation, List segments, MockDataFormat format) { + super("mock-snapshot", generation, 1L); + this.segments = segments; + this.format = format; + } + + @Override + public Map getUserData() { + return Map.of(); + } + + @Override + public long getId() { + return generation; + } + + @Override + public List getSegments() { + return segments; + } + + @Override + public Collection getSearchableFiles(String dataFormat) { + List result = new ArrayList<>(); + for (Segment seg : segments) { + WriterFileSet wfs = seg.dfGroupedSearchableFiles().get(dataFormat); + if (wfs != null) result.add(wfs); + } + return result; + } + + @Override + public Set getDataFormats() { + return Set.of(format.name()); + } + + @Override + public long getLastWriterGeneration() { + return generation; + } + + @Override + public String serializeToString() { + return "mock-snapshot-" + generation; + } + + @Override + public void setCatalogSnapshotMap(Map map) {} + + @Override + public void setUserData(Map userData, boolean b) {} + + @Override + public Object getReader(DataFormat dataFormat) { + return null; + } + + @Override + protected void closeInternal() {} + } } diff --git a/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestCase.java b/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestCase.java index 7e236cf911060..5c85762448adb 100644 --- a/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestCase.java +++ b/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestCase.java @@ -805,7 +805,8 @@ protected IndexShard newShard( new Object(), clusterService.getClusterApplierService(), mergedSegmentPublisher, - ReferencedSegmentsPublisher.EMPTY + ReferencedSegmentsPublisher.EMPTY, + null // TODO ); indexShard.addShardFailureCallback(DEFAULT_SHARD_FAILURE_HANDLER); if (remoteStoreStatsTrackerFactory != null) {