From 8ab51e6e73ad534ac0a174d2242b62af90648744 Mon Sep 17 00:00:00 2001 From: bharath-techie Date: Wed, 4 Mar 2026 19:14:53 +0530 Subject: [PATCH 01/11] Native engine abstractions / skeleton flow Signed-off-by: bharath-techie --- .gitignore | 1 + sandbox/libs/analytics-framework/build.gradle | 3 +- .../analytics/backend/jni/NativeHandle.java | 94 ++++++++++ .../analytics/spi/AnalyticsBackEndPlugin.java | 9 +- .../analytics-backend-datafusion/build.gradle | 3 + .../be/datafusion/DataFusionBridge.java | 2 + .../be/datafusion/DataFusionPlugin.java | 105 ++++++++++- .../be/datafusion/DataFusionService.java | 105 +++++++++++ .../be/datafusion/DatafusionContext.java | 105 +++++++++++ .../be/datafusion/DatafusionQuery.java | 40 +++++ .../be/datafusion/DatafusionReader.java | 58 ++++++ .../datafusion/DatafusionReaderManager.java | 79 ++++++++ .../DatafusionSearchExecEngine.java | 90 ++++++++++ .../be/datafusion/DatafusionSearcher.java | 58 ++++++ .../be/datafusion/jni/NativeBridge.java | 27 +++ .../be/datafusion/jni/ReaderHandle.java | 29 +++ .../be/datafusion/jni/package-info.java | 19 ++ .../be/datafusion/package-info.java | 12 -- .../analytics-backend-lucene/build.gradle | 27 +++ .../be/lucene/LuceneEngineSearcher.java | 168 ++++++++++++++++++ .../be/lucene/LuceneReaderManager.java | 75 ++++++++ .../be/lucene/LuceneSearchContext.java | 116 ++++++++++++ .../be/lucene/LuceneSearchEnginePlugin.java | 56 ++++++ .../be/lucene/LuceneSearchExecEngine.java | 97 ++++++++++ .../opensearch/index/shard/IndexShardIT.java | 3 +- .../org/opensearch/index/IndexModule.java | 7 +- .../org/opensearch/index/IndexService.java | 13 +- .../index/engine/CompositeEngine.java | 118 ++++++++++++ .../index/engine/IndexFilterTree.java | 32 ++++ .../index/engine/exec/CatalogSnapshot.java | 3 + .../CatalogSnapshotAwareRefreshListener.java | 28 +++ .../exec/CatalogSnapshotDeleteListener.java | 18 ++ .../index/engine/exec/DataFormatRegistry.java | 62 +++++++ .../exec/DataFormatRegistryFactory.java | 36 ++++ .../engine/exec/EngineReaderManager.java | 27 +++ .../index/engine/exec/EngineSearcher.java | 35 ++++ .../index/engine/exec/FileMetadata.java | 106 +++++++++++ .../index/engine/exec/FilesListener.java | 21 +++ .../index/engine/exec/IndexFileDeleter.java | 123 +++++++++++++ .../index/engine/exec/SearchExecEngine.java | 58 ++++++ .../opensearch/index/shard/IndexShard.java | 24 ++- .../opensearch/indices/IndicesService.java | 12 +- .../plugins/SearchAnalyticsBackEndPlugin.java | 29 +++ .../search/SearchExecutionContext.java | 47 +++++ .../index/shard/IndexShardTestCase.java | 3 +- 45 files changed, 2154 insertions(+), 29 deletions(-) create mode 100644 sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/jni/NativeHandle.java create mode 100644 sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionService.java create mode 100644 sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionContext.java create mode 100644 sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionQuery.java create mode 100644 sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionReader.java create mode 100644 sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionReaderManager.java create mode 100644 sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionSearchExecEngine.java create mode 100644 sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionSearcher.java create mode 100644 sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/jni/NativeBridge.java create mode 100644 sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/jni/ReaderHandle.java create mode 100644 sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/jni/package-info.java delete mode 100644 sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/package-info.java create mode 100644 sandbox/plugins/analytics-backend-lucene/build.gradle create mode 100644 sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneEngineSearcher.java create mode 100644 sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneReaderManager.java create mode 100644 sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchContext.java create mode 100644 sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchEnginePlugin.java create mode 100644 sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchExecEngine.java create mode 100644 server/src/main/java/org/opensearch/index/engine/CompositeEngine.java create mode 100644 server/src/main/java/org/opensearch/index/engine/IndexFilterTree.java create mode 100644 server/src/main/java/org/opensearch/index/engine/exec/CatalogSnapshotAwareRefreshListener.java create mode 100644 server/src/main/java/org/opensearch/index/engine/exec/CatalogSnapshotDeleteListener.java create mode 100644 server/src/main/java/org/opensearch/index/engine/exec/DataFormatRegistry.java create mode 100644 server/src/main/java/org/opensearch/index/engine/exec/DataFormatRegistryFactory.java create mode 100644 server/src/main/java/org/opensearch/index/engine/exec/EngineReaderManager.java create mode 100644 server/src/main/java/org/opensearch/index/engine/exec/EngineSearcher.java create mode 100644 server/src/main/java/org/opensearch/index/engine/exec/FileMetadata.java create mode 100644 server/src/main/java/org/opensearch/index/engine/exec/FilesListener.java create mode 100644 server/src/main/java/org/opensearch/index/engine/exec/IndexFileDeleter.java create mode 100644 server/src/main/java/org/opensearch/index/engine/exec/SearchExecEngine.java create mode 100644 server/src/main/java/org/opensearch/plugins/SearchAnalyticsBackEndPlugin.java create mode 100644 server/src/main/java/org/opensearch/search/SearchExecutionContext.java diff --git a/.gitignore b/.gitignore index 0a784701375d9..83eff29224279 100644 --- a/.gitignore +++ b/.gitignore @@ -68,3 +68,4 @@ testfixtures_shared/ # build files generated doc-tools/missing-doclet/bin/ +/sandbox/plugins/engine-datafusion/target/ diff --git a/sandbox/libs/analytics-framework/build.gradle b/sandbox/libs/analytics-framework/build.gradle index 13e3d008f0a16..8748528a48dce 100644 --- a/sandbox/libs/analytics-framework/build.gradle +++ b/sandbox/libs/analytics-framework/build.gradle @@ -15,6 +15,7 @@ def calciteVersion = '1.41.0' dependencies { + compileOnly project(':server') api "org.apache.calcite:calcite-core:${calciteVersion}" // Calcite's expression tree and Enumerable runtime — required by calcite-core API api "org.apache.calcite:calcite-linq4j:${calciteVersion}" @@ -35,7 +36,7 @@ dependencies { testingConventions.enabled = false -// analytics-framework does not depend on server +// analytics-framework depends on server for SearchAnalyticsBackEndPlugin SPI tasks.named('forbiddenApisMain').configure { replaceSignatureFiles 'jdk-signatures' failOnMissingClasses = false diff --git a/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/jni/NativeHandle.java b/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/jni/NativeHandle.java new file mode 100644 index 0000000000000..f1131432a2950 --- /dev/null +++ b/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/jni/NativeHandle.java @@ -0,0 +1,94 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.analytics.backend.jni; + +import java.lang.ref.Cleaner; +import java.util.concurrent.atomic.AtomicBoolean; + +/** + * Base class for type-safe native pointer wrappers. + * Provides automatic resource management and prevents use-after-close errors. + * Subclasses must implement {@link #doClose()} to release native resources. + * Cleaner is used to ensure resources are cleaned up even if the object is not explicitly closed. + */ +public abstract class NativeHandle implements AutoCloseable { + + protected final long ptr; + private final AtomicBoolean closed = new AtomicBoolean(false); + protected static final long NULL_POINTER = 0L; + private final Cleaner.Cleanable cleanable; + + private static final Cleaner CLEANER = Cleaner.create(); + + /** + * Creates a new native handle. + * @param ptr the native pointer (must not be 0) + * @throws IllegalArgumentException if ptr is 0 + */ + protected NativeHandle(long ptr) { + if (ptr == NULL_POINTER) { + throw new IllegalArgumentException("Null native pointer"); + } + this.ptr = ptr; + this.cleanable = CLEANER.register(this, new CleanupAction(ptr, this::doClose)); + } + + /** + * Ensures the handle is still open. + * @throws IllegalStateException if the handle has been closed + */ + public void ensureOpen() { + if (closed.get()) { + throw new IllegalStateException("Handle already closed"); + } + } + + /** + * Gets the native pointer value. + * @return the native pointer + * @throws IllegalStateException if the handle has been closed + */ + public long getPointer() { + ensureOpen(); + return ptr; + } + + @Override + public void close() { + if (closed.compareAndSet(false, true)) { + cleanable.clean(); + } + } + + /** + * Releases the native resource. + * Called once when the handle is closed. + * Subclasses must implement this to free native memory. + */ + protected abstract void doClose(); + + /** + * Cleans up the native resource. + * Called by the cleaner when the handle is garbage collected. + */ + private static final class CleanupAction implements Runnable { + private final long ptr; + private final Runnable doClose; + + CleanupAction(long ptr, Runnable doClose) { + this.ptr = ptr; + this.doClose = doClose; + } + + @Override + public void run() { + doClose.run(); + } + } +} diff --git a/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/spi/AnalyticsBackEndPlugin.java b/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/spi/AnalyticsBackEndPlugin.java index 454c6c17bd7f0..3a508e7f52345 100644 --- a/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/spi/AnalyticsBackEndPlugin.java +++ b/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/spi/AnalyticsBackEndPlugin.java @@ -10,18 +10,23 @@ import org.apache.calcite.sql.SqlOperatorTable; import org.opensearch.analytics.backend.EngineBridge; +import org.opensearch.plugins.SearchAnalyticsBackEndPlugin; + +import java.io.IOException; +import java.util.List; /** * SPI extension point for back-end query engines (DataFusion, Lucene, etc.). * @opensearch.internal */ -public interface AnalyticsBackEndPlugin { +public interface AnalyticsBackEndPlugin extends SearchAnalyticsBackEndPlugin { /** Unique engine name (e.g., "lucene", "datafusion"). */ String name(); /** JNI boundary for executing serialized plans, or null for engines without native execution. */ - EngineBridge bridge(); + EngineBridge bridge(); // TODO this doesn't have context / index shard init /** Supported functions as a Calcite operator table, or null if the back-end adds no functions. */ SqlOperatorTable operatorTable(); + } diff --git a/sandbox/plugins/analytics-backend-datafusion/build.gradle b/sandbox/plugins/analytics-backend-datafusion/build.gradle index 61fec92b7219d..89929e691d7c9 100644 --- a/sandbox/plugins/analytics-backend-datafusion/build.gradle +++ b/sandbox/plugins/analytics-backend-datafusion/build.gradle @@ -16,6 +16,9 @@ dependencies { // Shared types and SPI interfaces (EngineBridge, AnalyticsBackEndPlugin, etc.) // Also provides calcite-core transitively via api. api project(':sandbox:libs:analytics-framework') + + implementation "org.apache.logging.log4j:log4j-api:${versions.log4j}" + implementation "org.apache.logging.log4j:log4j-core:${versions.log4j}" } // TODO: Remove once back-end is built out with test suite diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionBridge.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionBridge.java index 97b4326361a0c..a61afaeea8fcb 100644 --- a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionBridge.java +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionBridge.java @@ -14,6 +14,8 @@ /** * DataFusion EngineBridge implementation. * Uses a byte[] representing serialized plan to execute. + * // TODO : we need a stateful engine, not just a bridge, evaluate + * // switch to SearchExecEngine */ public class DataFusionBridge implements EngineBridge { // S=byte[] (Substrait), H=Long (stream pointer), L=RelNode (logical plan) diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionPlugin.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionPlugin.java index 79f4f834bfdb4..557a76cfa37e2 100644 --- a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionPlugin.java +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionPlugin.java @@ -9,19 +9,92 @@ package org.opensearch.be.datafusion; import org.apache.calcite.sql.SqlOperatorTable; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.opensearch.analytics.backend.EngineBridge; import org.opensearch.analytics.spi.AnalyticsBackEndPlugin; +import org.opensearch.cluster.metadata.IndexNameExpressionResolver; +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.common.settings.Setting; +import org.opensearch.common.settings.Settings; +import org.opensearch.core.common.io.stream.NamedWriteableRegistry; +import org.opensearch.core.xcontent.NamedXContentRegistry; +import org.opensearch.env.Environment; +import org.opensearch.env.NodeEnvironment; +import org.opensearch.index.engine.dataformat.DataFormat; +import org.opensearch.index.engine.exec.SearchExecEngine; +import org.opensearch.index.shard.ShardPath; import org.opensearch.plugins.Plugin; +import org.opensearch.repositories.RepositoriesService; +import org.opensearch.script.ScriptService; +import org.opensearch.threadpool.ThreadPool; +import org.opensearch.transport.client.Client; +import org.opensearch.watcher.ResourceWatcherService; + +import java.io.IOException; +import java.util.Collection; +import java.util.Collections; +import java.util.List; +import java.util.function.Supplier; /** - * DataFusion native execution engine plugin. + * Main plugin class for the DataFusion native engine integration. + *

+ * Initializes the {@link DataFusionService} at node startup and creates + * per-shard {@link DatafusionSearchExecEngine} instances via the + * {@link AnalyticsBackEndPlugin} SPI. */ public class DataFusionPlugin extends Plugin implements AnalyticsBackEndPlugin { - /** Creates a new DataFusion plugin. */ - public DataFusionPlugin() {} + private static final Logger logger = LogManager.getLogger(DataFusionPlugin.class); + + /** Memory pool limit for the DataFusion runtime. */ + public static final Setting DATAFUSION_MEMORY_POOL_LIMIT = Setting.longSetting( + "datafusion.memory_pool_limit_bytes", + Runtime.getRuntime().maxMemory() / 4, + 0L, + Setting.Property.NodeScope + ); + + /** Spill memory limit — when exceeded, DataFusion spills to disk. */ + public static final Setting DATAFUSION_SPILL_MEMORY_LIMIT = Setting.longSetting( + "datafusion.spill_memory_limit_bytes", + Runtime.getRuntime().maxMemory() / 8, + 0L, + Setting.Property.NodeScope + ); + + private final Settings settings; + private volatile DataFusionService dataFusionService; + + public DataFusionPlugin(Settings settings) { + this.settings = settings; + } + + @Override + public Collection createComponents( + Client client, + ClusterService clusterService, + ThreadPool threadPool, + ResourceWatcherService resourceWatcherService, + ScriptService scriptService, + NamedXContentRegistry xContentRegistry, + Environment environment, + NodeEnvironment nodeEnvironment, + NamedWriteableRegistry namedWriteableRegistry, + IndexNameExpressionResolver indexNameExpressionResolver, + Supplier repositoriesServiceSupplier + ) { + long memoryPoolLimit = DATAFUSION_MEMORY_POOL_LIMIT.get(settings); + long spillMemoryLimit = DATAFUSION_SPILL_MEMORY_LIMIT.get(settings); + String spillDir = environment.dataFiles()[0].getParent().resolve("tmp").toAbsolutePath().toString(); + + dataFusionService = new DataFusionService(memoryPoolLimit, spillDir, spillMemoryLimit); + dataFusionService.start(); + logger.info("DataFusion plugin initialized — memory pool {}B, spill limit {}B", memoryPoolLimit, spillMemoryLimit); - private final DataFusionBridge bridge = new DataFusionBridge(); + return Collections.singletonList(dataFusionService); + } @Override public String name() { @@ -30,11 +103,33 @@ public String name() { @Override public EngineBridge bridge() { - return bridge; + return null; // TODO decide between bridge and SearchExecEngine } @Override public SqlOperatorTable operatorTable() { return null; } + + @Override + public SearchExecEngine create(ShardPath shardPath, DataFormat dataFormat) throws IOException { + if (dataFusionService == null) { + throw new IllegalStateException("DataFusionPlugin.createComponents() has not been called yet"); + } + return new DatafusionSearchExecEngine(dataFusionService.getRuntimePointer(), dataFormat, shardPath); + } + + /** + * Data formats this plugin can handle. Used by CompositeEngine to route queries. + */ + public List getSupportedFormats() { + return null; // TODO : List.of("parquet"); + } + + @Override + public void close() throws IOException { + if (dataFusionService != null) { + dataFusionService.close(); + } + } } diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionService.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionService.java new file mode 100644 index 0000000000000..695ec743ae66e --- /dev/null +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionService.java @@ -0,0 +1,105 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.be.datafusion; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.common.lifecycle.AbstractLifecycleComponent; + +import java.io.IOException; + +/** + * Node-level service managing the DataFusion native runtime lifecycle. + *

+ * All per-shard {@link DatafusionSearchExecEngine} instances share the single + * Tokio runtime and memory pool owned by this service. The service loads the + * native JNI library on start and tears down the runtime on stop/close. + */ +public class DataFusionService extends AbstractLifecycleComponent { + + private static final Logger logger = LogManager.getLogger(DataFusionService.class); + private static final String NATIVE_LIBRARY_NAME = "opensearch_datafusion_jni"; + + private final long memoryPoolLimit; + private final String spillDirectory; + private final long spillMemoryLimit; + + /** Pointer to the native DataFusion global runtime (Tokio + memory pool). */ + private volatile long runtimePointer; + + /** + * Creates a new DataFusionService. + * + * @param memoryPoolLimit maximum bytes for the DataFusion memory pool + * @param spillDirectory directory for spill files when memory is exceeded + * @param spillMemoryLimit maximum bytes before spilling to disk + */ + public DataFusionService(long memoryPoolLimit, String spillDirectory, long spillMemoryLimit) { + this.memoryPoolLimit = memoryPoolLimit; + this.spillDirectory = spillDirectory; + this.spillMemoryLimit = spillMemoryLimit; + } + + @Override + protected void doStart() { + logger.info("Starting DataFusion service — loading native library [{}]", NATIVE_LIBRARY_NAME); + try { + System.loadLibrary(NATIVE_LIBRARY_NAME); + } catch (UnsatisfiedLinkError e) { + throw new IllegalStateException("Failed to load native library: " + NATIVE_LIBRARY_NAME, e); + } + + // TODO: initialize Tokio runtime and memory pool via NativeBridge + // runtimePointer = NativeBridge.createGlobalRuntime(memoryPoolLimit, spillDirectory, spillMemoryLimit); + this.runtimePointer = 0L; // placeholder until NativeBridge is wired + logger.info("DataFusion service started"); + } + + @Override + protected void doStop() { + logger.info("Stopping DataFusion service"); + releaseRuntime(); + } + + @Override + protected void doClose() throws IOException { + releaseRuntime(); + } + + /** + * Returns the pointer to the native DataFusion global runtime. + * All JNI calls that need the Tokio runtime pass this pointer. + * + * @throws IllegalStateException if the service has not been started + */ + public long getRuntimePointer() { + long ptr = runtimePointer; + if (ptr == 0L && lifecycle.started() == false) { + throw new IllegalStateException("DataFusionService has not been started"); + } + return ptr; + } + + /** + * Returns the cache manager for per-shard cache management. + * Used by DatafusionReaderManager to evict stale entries on file deletion. + */ + // TODO: uncomment when CacheManager class is available + // public CacheManager getCacheManager() { return cacheManager; } + + private void releaseRuntime() { + long ptr = runtimePointer; + if (ptr != 0L) { + // TODO: NativeBridge.closeGlobalRuntime(ptr); + // TODO: NativeBridge.shutdownTokioRuntimeManager(); + runtimePointer = 0L; + logger.info("DataFusion native runtime released"); + } + } +} diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionContext.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionContext.java new file mode 100644 index 0000000000000..1d165a394e3eb --- /dev/null +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionContext.java @@ -0,0 +1,105 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.be.datafusion; + +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.engine.IndexFilterTree; +import org.opensearch.index.engine.exec.CatalogSnapshot; +import org.opensearch.search.SearchExecutionContext; +import org.opensearch.search.SearchShardTarget; +import org.opensearch.search.internal.ShardSearchRequest; + +import java.io.IOException; + +/** + * DataFusion-specific search execution context. + *

+ * Carries the DataFusion query plan, engine searcher, optional {@link IndexFilterTree}, + * and columnar results. + * + * @opensearch.experimental + */ +@ExperimentalApi +public class DatafusionContext implements SearchExecutionContext { + + private final ShardSearchRequest request; + private final SearchShardTarget shardTarget; + private final DatafusionSearcher engineSearcher; + private final CatalogSnapshot catalogSnapshot; + private DatafusionQuery datafusionQuery; + private IndexFilterTree filterTree; + + public DatafusionContext( + CatalogSnapshot catalogSnapshot, + ShardSearchRequest request, + SearchShardTarget shardTarget, + DatafusionReaderManager readerManager + ) throws IOException { + this.catalogSnapshot = catalogSnapshot; + this.request = request; + this.shardTarget = shardTarget; + this.engineSearcher = new DatafusionSearcher(readerManager.getReader(catalogSnapshot).getReaderPtr()); + } + + @Override + public CatalogSnapshot catalogSnapshot() { + return catalogSnapshot; + } + + @Override + public ShardSearchRequest request() { + return request; + } + + @Override + public SearchShardTarget shardTarget() { + return shardTarget; + } + + @Override + public void close() throws IOException { + try { + if (filterTree != null) { + filterTree.close(); + } + } finally { + engineSearcher.close(); + } + } + + // DataFusion-specific + + public DatafusionSearcher getEngineSearcher() { + return engineSearcher; + } + + public DatafusionQuery getDatafusionQuery() { + return datafusionQuery; + } + + public void setDatafusionQuery(DatafusionQuery query) { + this.datafusionQuery = query; + } + + /** + * Returns the optional filter tree for indexed parquet queries. + * {@code null} indicates a pure parquet query with no external index involvement. + */ + public IndexFilterTree getFilterTree() { + return filterTree; + } + + /** + * Sets the filter tree for indexed parquet queries. + */ + public void setFilterTree(IndexFilterTree filterTree) { + this.filterTree = filterTree; + } + +} diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionQuery.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionQuery.java new file mode 100644 index 0000000000000..4d7fde7c6c503 --- /dev/null +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionQuery.java @@ -0,0 +1,40 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.be.datafusion; + +/** + * Represents a DataFusion query — wraps substrait plan bytes and execution metadata. + */ +public class DatafusionQuery { + + private final String indexName; + private final byte[] substraitBytes; + private boolean fetchPhase; + + public DatafusionQuery(String indexName, byte[] substraitBytes) { + this.indexName = indexName; + this.substraitBytes = substraitBytes; + } + + public String getIndexName() { + return indexName; + } + + public byte[] getSubstraitBytes() { + return substraitBytes; + } + + public boolean isFetchPhase() { + return fetchPhase; + } + + public void setFetchPhase(boolean fetchPhase) { + this.fetchPhase = fetchPhase; + } +} diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionReader.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionReader.java new file mode 100644 index 0000000000000..e27b57c3e2b53 --- /dev/null +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionReader.java @@ -0,0 +1,58 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.be.datafusion; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.be.datafusion.jni.ReaderHandle; +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.engine.exec.WriterFileSet; + +import java.io.Closeable; +import java.io.IOException; +import java.util.Collection; + +/** + * DataFusion reader for JNI operations. + *

+ * Each reader represents a point-in-time snapshot of parquet/arrow files for a shard. + * Created from a catalog snapshot during refresh; closed when associated catalog snapshot is removed + * + * @opensearch.experimental + */ +@ExperimentalApi +public class DatafusionReader implements Closeable { + + private static final Logger logger = LogManager.getLogger(DatafusionReader.class); + private final String directoryPath; + private final ReaderHandle readerHandle; + + /** + * @param directoryPath shard data directory + * @param files The file metadata collection + */ + public DatafusionReader(String directoryPath, Collection files) { + this.directoryPath = directoryPath; + String[] fileNames = new String[0]; + if (files != null) { + fileNames = files.stream().flatMap(writerFileSet -> writerFileSet.files().stream()).toArray(String[]::new); + } + readerHandle = new ReaderHandle(directoryPath, fileNames); + } + + @Override + public void close() throws IOException { + readerHandle.close(); + logger.debug("DatafusionReader closed for [{}]", directoryPath); + } + + public long getReaderPtr() { + return readerHandle.getPointer(); + } +} diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionReaderManager.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionReaderManager.java new file mode 100644 index 0000000000000..04160413e26bb --- /dev/null +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionReaderManager.java @@ -0,0 +1,79 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.be.datafusion; + +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.engine.dataformat.DataFormat; +import org.opensearch.index.engine.exec.CatalogSnapshot; +import org.opensearch.index.engine.exec.EngineReaderManager; +import org.opensearch.index.shard.ShardPath; + +import java.io.IOException; +import java.util.Collection; +import java.util.HashMap; +import java.util.Map; + +/** + * Manages {@link DatafusionReader} instances (native memory). + *

+ * Acquire returns a DatafusionReader with incremented ref count; + * release decrements it. On refresh, a new reader is swapped in + * atomically from the updated catalog snapshot. + * + * @opensearch.experimental + */ +@ExperimentalApi +public class DatafusionReaderManager implements EngineReaderManager { + + Map readers = new HashMap<>(); + private final DataFormat dataFormat; + private final String directoryPath; + + public DatafusionReaderManager(DataFormat dataFormat, ShardPath shardPath) { + this.dataFormat = dataFormat; + directoryPath = shardPath.getDataPath().resolve(dataFormat.name()).toString(); + } + + @Override + public DatafusionReader getReader(CatalogSnapshot catalogSnapshot) throws IOException { + if (readers.containsKey(catalogSnapshot)) { + return readers.get(catalogSnapshot); + } + throw new IOException("No DataFusion reader available"); + } + + @Override + public void onDeleted(CatalogSnapshot catalogSnapshot) throws IOException { + readers.remove(catalogSnapshot).close(); + } + + @Override + public void onFilesDeleted(Collection files) throws IOException { + // TODO: evict deleted files from cache manager + } + + @Override + public void onFilesAdded(Collection files) throws IOException { + // TODO: Add new files to cache manager + } + + @Override + public void beforeRefresh() throws IOException {} + + @Override + public void afterRefresh(boolean didRefresh, CatalogSnapshot catalogSnapshot) throws IOException { + if (!didRefresh) return; + // This catalog snapshot is already present in the reader manager + if (readers.containsKey(catalogSnapshot)) { + return; + } + DatafusionReader reader = new DatafusionReader(directoryPath, catalogSnapshot.getSearchableFiles(dataFormat.name())); + readers.put(catalogSnapshot, reader); + } +} diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionSearchExecEngine.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionSearchExecEngine.java new file mode 100644 index 0000000000000..b7bcb71937da8 --- /dev/null +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionSearchExecEngine.java @@ -0,0 +1,90 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.be.datafusion; + +import org.opensearch.action.search.SearchShardTask; +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.engine.IndexFilterTree; +import org.opensearch.index.engine.dataformat.DataFormat; +import org.opensearch.index.engine.exec.CatalogSnapshot; +import org.opensearch.index.engine.exec.EngineReaderManager; +import org.opensearch.index.engine.exec.SearchExecEngine; +import org.opensearch.index.shard.ShardPath; +import org.opensearch.search.SearchShardTarget; +import org.opensearch.search.internal.ShardSearchRequest; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.Collections; +import java.util.Iterator; + +/** + * DataFusion-backed {@link SearchExecEngine}. + * Plan type is {@code byte[]} (substrait bytes). + * + * @opensearch.experimental + */ +@ExperimentalApi +public class DatafusionSearchExecEngine implements SearchExecEngine { + + private final DatafusionReaderManager readerManager; + private final long runtimePtr; + private long nextContextId; + + public DatafusionSearchExecEngine(long runtimePtr, DataFormat dataFormat, ShardPath shardPath) { + readerManager = new DatafusionReaderManager(dataFormat, shardPath); + this.runtimePtr = runtimePtr; + } + + // TODO : figure out stream return type similar to engine bridge + @Override + public void execute(DatafusionContext context) throws IOException { + DatafusionSearcher searcher = context.getEngineSearcher(); + IndexFilterTree filterTree = context.getFilterTree(); + if (filterTree != null) { + throw new UnsupportedOperationException("Indexed query path not yet wired"); + } else { + searcher.search(context); + } + } + + @Override + public DatafusionContext createContext( + CatalogSnapshot snapshot, + ShardSearchRequest request, + SearchShardTarget shardTarget, + SearchShardTask task + ) throws IOException { + return new DatafusionContext(snapshot, request, shardTarget, readerManager); + } + + @Override + public byte[] convertFragment(Object fragment) { + // TODO: SubstraitConverter.toBytes((RelNode) fragment) + throw new UnsupportedOperationException("Substrait conversion not yet wired"); + } + + @Override + public Iterator executePlan(byte[] plan, DatafusionContext context) { + try { + context.setDatafusionQuery(new DatafusionQuery("", plan)); + execute(context); + // TODO results + return Collections.emptyIterator(); + // return results == null ? Collections.emptyIterator() : Collections.singleton(results).iterator(); + } catch (IOException e) { + throw new UncheckedIOException(e); + } + } + + @Override + public EngineReaderManager getReaderManager() { + return readerManager; + } +} diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionSearcher.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionSearcher.java new file mode 100644 index 0000000000000..8db660c50cee6 --- /dev/null +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionSearcher.java @@ -0,0 +1,58 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.be.datafusion; + +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.engine.exec.EngineSearcher; + +import java.io.IOException; + +/** + * DataFusion searcher — executes substrait query plans against a native DataFusion reader. + * + * @opensearch.experimental + */ +@ExperimentalApi +public class DatafusionSearcher implements EngineSearcher { + + private final long readerPtr; + + public DatafusionSearcher(long readerPtr) { + // TODO: initialize reader handle + this.readerPtr = readerPtr; + } + + @Override + public void search(DatafusionContext context) throws IOException { + if (context.getFilterTree() == null) { + searchVanilla(context); + } else { + searchWithFilterTree(context); + } + } + + private void searchWithFilterTree(DatafusionContext context) { + // TODO: wire NativeBridge — execute substrait plan, consume stream, populate context + throw new UnsupportedOperationException("DataFusion native bridge not yet wired"); + } + + private void searchVanilla(DatafusionContext context) throws IOException { + // TODO: wire NativeBridge — execute substrait plan, consume stream, populate context + throw new UnsupportedOperationException("DataFusion native bridge not yet wired"); + } + + public long getReaderPtr() { + return readerPtr; + } + + @Override + public void close() { + // TODO : reader handle close + } +} diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/jni/NativeBridge.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/jni/NativeBridge.java new file mode 100644 index 0000000000000..a5b1b29274ba2 --- /dev/null +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/jni/NativeBridge.java @@ -0,0 +1,27 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.be.datafusion.jni; + +/** + * Core JNI bridge to native DataFusion library. + * All native method declarations are centralized here. + */ +public final class NativeBridge { + + static { + // TODO : NativeLibraryLoader.load("opensearch_datafusion_jni"); + } + + private NativeBridge() {} + + // Reader management + public static native long createDatafusionReader(String path, String[] files); + + public static native void closeDatafusionReader(long ptr); +} diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/jni/ReaderHandle.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/jni/ReaderHandle.java new file mode 100644 index 0000000000000..fed2b8601b845 --- /dev/null +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/jni/ReaderHandle.java @@ -0,0 +1,29 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.be.datafusion.jni; + +import org.opensearch.analytics.backend.jni.NativeHandle; + +/** + * Type-safe handle for native reader. + */ +public final class ReaderHandle extends NativeHandle { + + public ReaderHandle(String path, String[] files) { + super(NativeBridge.createDatafusionReader(path, files)); + } + + /** + * Closes the datafusion reader and releases any associated resources. + */ + @Override + protected void doClose() { + NativeBridge.closeDatafusionReader(ptr); + } +} diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/jni/package-info.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/jni/package-info.java new file mode 100644 index 0000000000000..6a8481365c71c --- /dev/null +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/jni/package-info.java @@ -0,0 +1,19 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** + * JNI bridge layer for DataFusion native library integration. + * + *

This package provides: + *

    + *
  • Type-safe native handle wrappers ({@link org.opensearch.be.datafusion.jni.ReaderHandle})
  • + *
  • Centralized native method declarations ({@link org.opensearch.be.datafusion.jni.NativeBridge})
  • + *
+ * + */ +package org.opensearch.be.datafusion.jni; diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/package-info.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/package-info.java deleted file mode 100644 index dccab0e7fb8a7..0000000000000 --- a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/package-info.java +++ /dev/null @@ -1,12 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -/** - * DataFusion native execution engine back-end plugin. - */ -package org.opensearch.be.datafusion; diff --git a/sandbox/plugins/analytics-backend-lucene/build.gradle b/sandbox/plugins/analytics-backend-lucene/build.gradle new file mode 100644 index 0000000000000..c0413a6c6d41a --- /dev/null +++ b/sandbox/plugins/analytics-backend-lucene/build.gradle @@ -0,0 +1,27 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +apply plugin: 'opensearch.internal-cluster-test' + +opensearchplugin { + description = 'OpenSearch plugin providing Lucene-based search execution engine' + classname = 'org.opensearch.lucene.LuceneSearchEnginePlugin' +} + +dependencies { + // Shared types and SPI interfaces (EngineBridge, AnalyticsBackEndPlugin, etc.) + // Also provides calcite-core transitively via api. + api project(':sandbox:libs:analytics-framework') + + implementation "org.apache.logging.log4j:log4j-api:${versions.log4j}" + implementation "org.apache.logging.log4j:log4j-core:${versions.log4j}" +} + +test { + systemProperty 'tests.security.manager', 'false' +} diff --git a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneEngineSearcher.java b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneEngineSearcher.java new file mode 100644 index 0000000000000..ba523f42a78eb --- /dev/null +++ b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneEngineSearcher.java @@ -0,0 +1,168 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.be.lucene; + +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.Weight; +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.engine.exec.EngineSearcher; + +import java.io.IOException; +import java.util.BitSet; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicLong; + +/** + * TODO : need to rethink this + */ +@ExperimentalApi +public class LuceneEngineSearcher implements EngineSearcher { + + private final IndexSearcher indexSearcher; + private final DirectoryReader directoryReader; + + /** Active Weight contexts keyed by opaque pointer. */ + private static final Map activeWeights = new ConcurrentHashMap<>(); + /** Active partition scorer contexts keyed by opaque pointer. */ + private static final Map activeScorers = new ConcurrentHashMap<>(); + private static final AtomicLong nextId = new AtomicLong(1); + + public LuceneEngineSearcher(IndexSearcher indexSearcher, DirectoryReader directoryReader) { + this.indexSearcher = indexSearcher; + this.directoryReader = directoryReader; + } + + /** + * Execute: create a Weight from the query, register it, and store the + * pointer on the context so the indexed query path can use it. + */ + @Override + public void search(LuceneSearchContext context) throws IOException { + Query query = context.getQuery(); + if (query == null) { + throw new IllegalStateException("No query set on LuceneSearchContext"); + } + Query rewritten = indexSearcher.rewrite(query); + Weight weight = indexSearcher.createWeight(rewritten, ScoreMode.COMPLETE_NO_SCORES, 1.0f); + List leaves = directoryReader.leaves(); + + // TODO : need to redo this - this is specific to indexed table flow + long ptr = nextId.getAndIncrement(); + activeWeights.put(ptr, new WeightContext(weight, leaves)); + context.setWeightPointer(ptr); + context.setSegmentCount(leaves.size()); + context.setSegmentMaxDocs(leaves.stream().mapToInt(l -> l.reader().maxDoc()).toArray()); + } + + /** Create a partition scorer for a segment + doc range. Returns -1 if no matches. */ + public static long createCollector(long weightPtr, int segmentOrd, int minDoc, int maxDoc) { + WeightContext ctx = activeWeights.get(weightPtr); + if (ctx == null || segmentOrd < 0 || segmentOrd >= ctx.leaves.size()) { + return -1; + } + try { + Scorer scorer = ctx.weight.scorer(ctx.leaves.get(segmentOrd)); + if (scorer == null) return -1; + long id = nextId.getAndIncrement(); + activeScorers.put(id, new PartitionScorerContext(scorer.iterator(), minDoc, maxDoc)); + return id; + } catch (IOException e) { + return -1; + } + } + + /** Collect matching doc IDs in [rowGroupMin, rowGroupMax) as a bitset (long[]). */ + public static long[] collectDocs(long scorerPtr, int rowGroupMin, int rowGroupMax) { + PartitionScorerContext ctx = activeScorers.get(scorerPtr); + if (ctx == null) return new long[0]; + + int effectiveMin = Math.max(rowGroupMin, ctx.minDoc); + int effectiveMax = Math.min(rowGroupMax, ctx.maxDoc); + if (effectiveMin >= effectiveMax) return new long[0]; + + BitSet bitset = new BitSet(effectiveMax - effectiveMin); + try { + DocIdSetIterator iter = ctx.iterator; + int docId = ctx.currentDoc; + if (docId == DocIdSetIterator.NO_MORE_DOCS || docId >= ctx.maxDoc) return new long[0]; + if (docId < effectiveMin) docId = iter.advance(effectiveMin); + while (docId != DocIdSetIterator.NO_MORE_DOCS && docId < effectiveMax) { + bitset.set(docId - effectiveMin); + docId = iter.nextDoc(); + } + ctx.currentDoc = docId; + } catch (IOException e) { + return new long[0]; + } + return bitset.toLongArray(); + } + + /** Release a partition scorer. */ + public static void releaseCollector(long scorerPtr) { + activeScorers.remove(scorerPtr); + } + + /** Release a Weight context. */ + public static void releaseWeight(long weightPtr) { + activeWeights.remove(weightPtr); + } + + public static int getSegmentCount(long weightPtr) { + WeightContext ctx = activeWeights.get(weightPtr); + return ctx != null ? ctx.leaves.size() : -1; + } + + public static int getSegmentMaxDoc(long weightPtr, int segmentOrd) { + WeightContext ctx = activeWeights.get(weightPtr); + if (ctx == null || segmentOrd < 0 || segmentOrd >= ctx.leaves.size()) return -1; + return ctx.leaves.get(segmentOrd).reader().maxDoc(); + } + + public IndexSearcher getIndexSearcher() { + return indexSearcher; + } + + public DirectoryReader getDirectoryReader() { + return directoryReader; + } + + @Override + public void close() {} + + static class WeightContext { + final Weight weight; + final List leaves; + + WeightContext(Weight weight, List leaves) { + this.weight = weight; + this.leaves = leaves; + } + } + + static class PartitionScorerContext { + final DocIdSetIterator iterator; + final int minDoc; + final int maxDoc; + int currentDoc = -1; + + PartitionScorerContext(DocIdSetIterator iterator, int minDoc, int maxDoc) { + this.iterator = iterator; + this.minDoc = minDoc; + this.maxDoc = maxDoc; + } + } +} diff --git a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneReaderManager.java b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneReaderManager.java new file mode 100644 index 0000000000000..46ea0dc1c2359 --- /dev/null +++ b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneReaderManager.java @@ -0,0 +1,75 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.be.lucene; + +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.search.ReferenceManager; +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.engine.dataformat.DataFormat; +import org.opensearch.index.engine.exec.CatalogSnapshot; +import org.opensearch.index.engine.exec.EngineReaderManager; + +import java.io.IOException; +import java.util.Collection; +import java.util.HashMap; +import java.util.Map; + +/** + * Lucene implementation of {@link EngineReaderManager}. + *

+ * Wraps Lucene's {@link ReferenceManager} for {@link DirectoryReader}. + * Acquire increments the ref count on the current reader; + * release decrements it — same pattern as {@code DatafusionReaderManager}. + * + * @opensearch.experimental + */ +@ExperimentalApi +public class LuceneReaderManager implements EngineReaderManager { + + Map readers = new HashMap<>(); + DataFormat dataFormat; + + @SuppressWarnings("unchecked") + public LuceneReaderManager(DataFormat dataFormat) { + this.dataFormat = dataFormat; + } + + /** Called when files are deleted after merges. */ + public void onFilesDeleted(Collection files) throws IOException { + // no-op + } + + @Override + public void onFilesAdded(Collection files) throws IOException { + // no-op + } + + @Override + public DirectoryReader getReader(CatalogSnapshot catalogSnapshot) throws IOException { + return readers.get(catalogSnapshot); + } + + @Override + public void beforeRefresh() throws IOException { + + } + + @Override + public void afterRefresh(boolean didRefresh, CatalogSnapshot catalogSnapshot) throws IOException { + if (readers.containsKey(catalogSnapshot)) { + return; + } + readers.put(catalogSnapshot, (DirectoryReader) catalogSnapshot.getReader(dataFormat)); + } + + @Override + public void onDeleted(CatalogSnapshot catalogSnapshot) throws IOException { + readers.remove(catalogSnapshot).close(); + } +} diff --git a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchContext.java b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchContext.java new file mode 100644 index 0000000000000..c9ee6b9c30b56 --- /dev/null +++ b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchContext.java @@ -0,0 +1,116 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.be.lucene; + +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.engine.exec.CatalogSnapshot; +import org.opensearch.search.SearchExecutionContext; +import org.opensearch.search.SearchShardTarget; +import org.opensearch.search.internal.ShardSearchRequest; + +import java.io.IOException; + +/** + * Lucene-specific search execution context. + *

+ * Input: a Lucene {@link Query}. + * Output: a registered Weight pointer + segment metadata that Rust + * uses for JNI callbacks to stream bitsets per partition range. + * + * @opensearch.experimental + */ +@ExperimentalApi +public class LuceneSearchContext implements SearchExecutionContext { + + CatalogSnapshot catalogSnapshot; + private final ShardSearchRequest request; + private final SearchShardTarget shardTarget; + + private final DirectoryReader reader; + private final LuceneEngineSearcher searcher; + private Query query; + + private long weightPointer; + private int segmentCount; + private int[] segmentMaxDocs; + + public LuceneSearchContext( + CatalogSnapshot catalogSnapshot, + ShardSearchRequest request, + SearchShardTarget shardTarget, + LuceneReaderManager readerManager + ) throws IOException { + this.catalogSnapshot = catalogSnapshot; + reader = readerManager.getReader(catalogSnapshot); + IndexSearcher indexSearcher = new IndexSearcher(reader);// TODO : check if this is right + searcher = new LuceneEngineSearcher(indexSearcher, reader); + this.request = request; + this.shardTarget = shardTarget; + } + + public Query getQuery() { + return query; + } + + public void setQuery(Query query) { + this.query = query; + } + + public long getWeightPointer() { + return weightPointer; + } + + public void setWeightPointer(long weightPointer) { + this.weightPointer = weightPointer; + } + + public int getSegmentCount() { + return segmentCount; + } + + public void setSegmentCount(int segmentCount) { + this.segmentCount = segmentCount; + } + + public int[] getSegmentMaxDocs() { + return segmentMaxDocs; + } + + public void setSegmentMaxDocs(int[] segmentMaxDocs) { + this.segmentMaxDocs = segmentMaxDocs; + } + + @Override + public CatalogSnapshot catalogSnapshot() { + return null; + } + + @Override + public ShardSearchRequest request() { + return request; + } + + @Override + public SearchShardTarget shardTarget() { + return shardTarget; + } + + @Override + public void close() throws IOException { + // Release the registered Weight when context is closed + if (weightPointer != 0) { + LuceneEngineSearcher.releaseWeight(weightPointer); + weightPointer = 0; + } + searcher.close(); + } +} diff --git a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchEnginePlugin.java b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchEnginePlugin.java new file mode 100644 index 0000000000000..b2e9ebe25c39c --- /dev/null +++ b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchEnginePlugin.java @@ -0,0 +1,56 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.be.lucene; + +import org.apache.calcite.sql.SqlOperatorTable; +import org.opensearch.analytics.backend.EngineBridge; +import org.opensearch.analytics.spi.AnalyticsBackEndPlugin; +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.engine.dataformat.DataFormat; +import org.opensearch.index.engine.exec.SearchExecEngine; +import org.opensearch.index.shard.ShardPath; +import org.opensearch.plugins.Plugin; + +import java.io.IOException; +import java.util.List; + +/** + * Plugin providing the Lucene-based search execution engine. + * + * @opensearch.experimental + */ +@ExperimentalApi +public class LuceneSearchEnginePlugin implements AnalyticsBackEndPlugin { + + @Override + public String name() { + return "lucene-analytics-backend"; + } + + @Override + public EngineBridge bridge() { + return null; + } + + @Override + public SqlOperatorTable operatorTable() { + return null; + } + + @Override + public SearchExecEngine create(ShardPath shardPath, DataFormat dataFormat) throws IOException { + // TODO: obtain ReferenceManager from the shard's InternalEngine + throw new UnsupportedOperationException("Lucene engine creation not yet wired to shard lifecycle"); + } + + @Override + public List getSupportedFormats() { + return List.of(); + } +} diff --git a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchExecEngine.java b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchExecEngine.java new file mode 100644 index 0000000000000..55ab88663d5a2 --- /dev/null +++ b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchExecEngine.java @@ -0,0 +1,97 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.be.lucene; + +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.opensearch.action.search.SearchShardTask; +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.engine.dataformat.DataFormat; +import org.opensearch.index.engine.dataformat.FieldTypeCapabilities; +import org.opensearch.index.engine.exec.CatalogSnapshot; +import org.opensearch.index.engine.exec.EngineReaderManager; +import org.opensearch.index.engine.exec.SearchExecEngine; +import org.opensearch.search.SearchShardTarget; +import org.opensearch.search.internal.ShardSearchRequest; + +import java.io.IOException; +import java.util.Set; + +/** + * Lucene-backed {@link SearchExecEngine}. + * Plan type is {@link Query}. + * + * @opensearch.experimental + */ +@ExperimentalApi +public class LuceneSearchExecEngine implements SearchExecEngine { + + private final LuceneReaderManager readerManager; + private long nextContextId; + + public LuceneSearchExecEngine() { + this.readerManager = new LuceneReaderManager(getLuceneDataFormat()); + } + + private static DataFormat getLuceneDataFormat() { + return new DataFormat() { + @Override + public String name() { + return "Lucene"; + } + + @Override + public long priority() { + return 0; + } + + @Override + public Set supportedFields() { + return Set.of(); + } + }; + } + + // TODO : replace this with filter provider/delegate methods + @Override + public void execute(LuceneSearchContext context) throws IOException { + DirectoryReader reader = readerManager.getReader(context.catalogSnapshot()); + LuceneEngineSearcher searcher = new LuceneEngineSearcher(new IndexSearcher(reader), reader); + try { + searcher.search(context); + } finally { + searcher.close(); + } + } + + @Override + public LuceneSearchContext createContext( + CatalogSnapshot snapshot, + ShardSearchRequest request, + SearchShardTarget shardTarget, + SearchShardTask task + ) throws IOException { + return new LuceneSearchContext(snapshot, request, shardTarget, readerManager); + } + + @Override + public Query convertFragment(Object fragment) { + // DQE passes a Lucene Query directly + if (fragment instanceof Query) { + return (Query) fragment; + } + throw new UnsupportedOperationException("Expected Lucene Query, got " + fragment.getClass().getSimpleName()); + } + + @Override + public EngineReaderManager getReaderManager() { + return null; + } +} diff --git a/server/src/internalClusterTest/java/org/opensearch/index/shard/IndexShardIT.java b/server/src/internalClusterTest/java/org/opensearch/index/shard/IndexShardIT.java index c59d2bdbbaf89..0aa358fc71f89 100644 --- a/server/src/internalClusterTest/java/org/opensearch/index/shard/IndexShardIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/index/shard/IndexShardIT.java @@ -733,7 +733,8 @@ public static final IndexShard newIndexShard( indexService.getRefreshMutex(), clusterService.getClusterApplierService(), MergedSegmentPublisher.EMPTY, - ReferencedSegmentsPublisher.EMPTY + ReferencedSegmentsPublisher.EMPTY, + null // TODO ); } diff --git a/server/src/main/java/org/opensearch/index/IndexModule.java b/server/src/main/java/org/opensearch/index/IndexModule.java index 1a4b14ddef9ba..0c149dbb195a6 100644 --- a/server/src/main/java/org/opensearch/index/IndexModule.java +++ b/server/src/main/java/org/opensearch/index/IndexModule.java @@ -74,6 +74,7 @@ import org.opensearch.index.engine.Engine; import org.opensearch.index.engine.EngineConfigFactory; import org.opensearch.index.engine.EngineFactory; +import org.opensearch.index.engine.exec.DataFormatRegistry; import org.opensearch.index.mapper.MapperService; import org.opensearch.index.shard.IndexEventListener; import org.opensearch.index.shard.IndexShard; @@ -741,7 +742,8 @@ public IndexService newIndexService( Consumer replicator, Function segmentReplicationStatsProvider, Supplier clusterDefaultMaxMergeAtOnceSupplier, - ClusterMergeSchedulerConfig clusterMergeSchedulerConfig + ClusterMergeSchedulerConfig clusterMergeSchedulerConfig, + CheckedFunction dataFormatRegistrySupplier ) throws IOException { final IndexEventListener eventListener = freeze(); Function> readerWrapperFactory = indexReaderWrapper @@ -814,7 +816,8 @@ public IndexService newIndexService( replicator, segmentReplicationStatsProvider, clusterDefaultMaxMergeAtOnceSupplier, - clusterMergeSchedulerConfig + clusterMergeSchedulerConfig, + dataFormatRegistrySupplier ); success = true; return indexService; diff --git a/server/src/main/java/org/opensearch/index/IndexService.java b/server/src/main/java/org/opensearch/index/IndexService.java index 2a862dd94b43e..f8c05e4999fa3 100644 --- a/server/src/main/java/org/opensearch/index/IndexService.java +++ b/server/src/main/java/org/opensearch/index/IndexService.java @@ -78,6 +78,7 @@ import org.opensearch.index.engine.EngineConfigFactory; import org.opensearch.index.engine.EngineFactory; import org.opensearch.index.engine.MergedSegmentWarmerFactory; +import org.opensearch.index.engine.exec.DataFormatRegistry; import org.opensearch.index.fielddata.IndexFieldDataCache; import org.opensearch.index.fielddata.IndexFieldDataService; import org.opensearch.index.mapper.MapperService; @@ -209,6 +210,7 @@ public class IndexService extends AbstractIndexComponent implements IndicesClust private volatile TimeValue refreshInterval; private volatile boolean shardLevelRefreshEnabled; private final IndexStorePlugin.StoreFactory storeFactory; + private final CheckedFunction dataFormatRegistrySupplier; @InternalApi public IndexService( @@ -255,7 +257,8 @@ public IndexService( Consumer replicator, Function segmentReplicationStatsProvider, Supplier clusterDefaultMaxMergeAtOnceSupplier, - ClusterMergeSchedulerConfig clusterMergeSchedulerConfig + ClusterMergeSchedulerConfig clusterMergeSchedulerConfig, + CheckedFunction dataFormatRegistrySupplier ) { super(indexSettings); this.storeFactory = storeFactory; @@ -366,6 +369,7 @@ public IndexService( startIndexLevelRefreshTask(); } } + this.dataFormatRegistrySupplier = dataFormatRegistrySupplier; } @InternalApi @@ -454,7 +458,8 @@ public IndexService( s -> {}, (shardId) -> ReplicationStats.empty(), clusterDefaultMaxMergeAtOnce, - clusterMergeSchedulerConfig + clusterMergeSchedulerConfig, + null ); } @@ -775,6 +780,7 @@ protected void closeInternal() { directoryFactory ); eventListener.onStoreCreated(shardId); + DataFormatRegistry dataFormatRegistry = dataFormatRegistrySupplier.apply(path); indexShard = new IndexShard( routing, this.indexSettings, @@ -813,7 +819,8 @@ protected void closeInternal() { refreshMutex, clusterService.getClusterApplierService(), this.indexSettings.isSegRepEnabledOrRemoteNode() ? mergedSegmentPublisher : null, - this.indexSettings.isSegRepEnabledOrRemoteNode() ? referencedSegmentsPublisher : null + this.indexSettings.isSegRepEnabledOrRemoteNode() ? referencedSegmentsPublisher : null, + dataFormatRegistry ); eventListener.indexShardStateChanged(indexShard, null, indexShard.state(), "shard created"); eventListener.afterIndexShardCreated(indexShard); diff --git a/server/src/main/java/org/opensearch/index/engine/CompositeEngine.java b/server/src/main/java/org/opensearch/index/engine/CompositeEngine.java new file mode 100644 index 0000000000000..aae3bd5d1e9ea --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/CompositeEngine.java @@ -0,0 +1,118 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.lucene.search.ReferenceManager; +import org.opensearch.common.TriConsumer; +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.engine.dataformat.DataFormat; +import org.opensearch.index.engine.exec.CatalogSnapshot; +import org.opensearch.index.engine.exec.CatalogSnapshotAwareRefreshListener; +import org.opensearch.index.engine.exec.CatalogSnapshotDeleteListener; +import org.opensearch.index.engine.exec.DataFormatRegistry; +import org.opensearch.index.engine.exec.FilesListener; +import org.opensearch.index.shard.ShardPath; + +import java.io.Closeable; +import java.io.IOException; +import java.util.Collection; +import java.util.List; +import java.util.Map; +import java.util.function.Consumer; +import java.util.function.Supplier; + +@ExperimentalApi +public class CompositeEngine implements Closeable { + + private static final Logger logger = LogManager.getLogger(CompositeEngine.class); + private final Map fileListeners; + private final List catalogSnapshotAwareRefreshListeners; + private final List deleteSnapshotListeners; + private static final TriConsumer< + Supplier>, + CatalogSnapshotAwareRefreshListener, + Boolean> POST_REFRESH_CATALOG_SNAPSHOT_AWARE_LISTENER_CONSUMER = ( + catalogSnapshot, + catalogSnapshotAwareRefreshListener, + didRefresh) -> { + try { + // Wrap in Supplier as required by CatalogSnapshotAwareRefreshListener interface + catalogSnapshotAwareRefreshListener.afterRefresh(didRefresh, catalogSnapshot.get().getRef()); + } catch (IOException e) { + throw new RuntimeException(e); + } + }; + private static final Consumer POST_REFRESH_LISTENER_CONSUMER = refreshListener -> { + try { + refreshListener.afterRefresh(true); + } catch (IOException e) { + throw new RuntimeException(e); + } + }; + + public CompositeEngine(DataFormatRegistry dataFormatRegistry, ShardPath shardPath) throws IOException { + fileListeners = dataFormatRegistry.getFilesListenerMap(); + deleteSnapshotListeners = dataFormatRegistry.getCatalogSnapshotDeleteListeners(); + catalogSnapshotAwareRefreshListeners = dataFormatRegistry.getCatalogSnapshotAwareRefreshListeners(); + } + + @Override + public void close() throws IOException { + + } + + public void notifyDelete(Map> dfFilesToDelete) throws IOException { + for (DataFormat format : fileListeners.keySet()) { + fileListeners.get(format).onFilesDeleted(dfFilesToDelete.get(format)); + } + } + + public void notifyFilesAdded(Map> dfNewFiles) throws IOException { + for (DataFormat format : fileListeners.keySet()) { + fileListeners.get(format).onFilesAdded(dfNewFiles.get(format)); + } + } + + public void notifyCatalogSnapshotDelete(CatalogSnapshot catalogSnapshot) throws IOException { + for (CatalogSnapshotDeleteListener listener : deleteSnapshotListeners) { + listener.onDeleted(catalogSnapshot); + } + } + + private void invokeRefreshListeners(boolean didRefresh) { + catalogSnapshotAwareRefreshListeners.forEach( + refreshListener -> POST_REFRESH_CATALOG_SNAPSHOT_AWARE_LISTENER_CONSUMER.apply( + this::acquireSnapshot, + refreshListener, + didRefresh + ) + ); + + } + + public ReleasableRef acquireSnapshot() { + return null;// TODO : return this.catalogSnapshotManager.acquireSnapshot(); + } + + @ExperimentalApi + public static abstract class ReleasableRef implements AutoCloseable { + + private final T t; + + public ReleasableRef(T t) { + this.t = t; + } + + public T getRef() { + return t; + } + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/IndexFilterTree.java b/server/src/main/java/org/opensearch/index/engine/IndexFilterTree.java new file mode 100644 index 0000000000000..af83a9ceb7233 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/IndexFilterTree.java @@ -0,0 +1,32 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine; + +import org.opensearch.common.annotation.ExperimentalApi; + +import java.io.Closeable; +import java.io.IOException; + +/** + * Boolean tree structure for multi-engine query decomposition. + *

+ * Wraps the root node and provides compact array + * serialization for JNI transport to the Rust layer. + *

+ * + * @opensearch.experimental + */ +@ExperimentalApi +public class IndexFilterTree implements Closeable { + + // TODO + @Override + public void close() throws IOException { + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/CatalogSnapshot.java b/server/src/main/java/org/opensearch/index/engine/exec/CatalogSnapshot.java index 90207e58cd1f5..80abcb59eccbe 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/CatalogSnapshot.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/CatalogSnapshot.java @@ -10,6 +10,7 @@ import org.opensearch.common.annotation.ExperimentalApi; import org.opensearch.common.util.concurrent.AbstractRefCounted; +import org.opensearch.index.engine.dataformat.DataFormat; import java.io.IOException; import java.util.Collection; @@ -133,4 +134,6 @@ public CatalogSnapshot cloneNoAcquire() { * @param b additional boolean parameter for implementation-specific behavior */ public abstract void setUserData(Map userData, boolean b); + + public abstract Object getReader(DataFormat dataFormat); } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/CatalogSnapshotAwareRefreshListener.java b/server/src/main/java/org/opensearch/index/engine/exec/CatalogSnapshotAwareRefreshListener.java new file mode 100644 index 0000000000000..f1e491d19534e --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/CatalogSnapshotAwareRefreshListener.java @@ -0,0 +1,28 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec; + +import org.opensearch.common.annotation.ExperimentalApi; + +import java.io.IOException; + +@ExperimentalApi +public interface CatalogSnapshotAwareRefreshListener { + /** + * Called before refresh operation. + */ + void beforeRefresh() throws IOException; + + /** + * Called after refresh operation with catalog snapshot. + * @param didRefresh whether refresh actually occurred + * @param catalogSnapshot the current catalog snapshot with file information + */ + void afterRefresh(boolean didRefresh, CatalogSnapshot catalogSnapshot) throws IOException; +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/CatalogSnapshotDeleteListener.java b/server/src/main/java/org/opensearch/index/engine/exec/CatalogSnapshotDeleteListener.java new file mode 100644 index 0000000000000..e0df0ae6cefce --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/CatalogSnapshotDeleteListener.java @@ -0,0 +1,18 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec; + +import org.opensearch.common.annotation.ExperimentalApi; + +import java.io.IOException; + +@ExperimentalApi +public interface CatalogSnapshotDeleteListener { + void onDeleted(CatalogSnapshot catalogSnapshot) throws IOException; +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/DataFormatRegistry.java b/server/src/main/java/org/opensearch/index/engine/exec/DataFormatRegistry.java new file mode 100644 index 0000000000000..bead8e2036a1b --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/DataFormatRegistry.java @@ -0,0 +1,62 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec; + +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.engine.dataformat.DataFormat; +import org.opensearch.index.shard.ShardPath; +import org.opensearch.plugins.SearchAnalyticsBackEndPlugin; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * Registry of data format SPIs from associated plugins + * + * @opensearch.experimental + */ +@ExperimentalApi +public class DataFormatRegistry { + private final List catalogSnapshotAwareRefreshListeners = new ArrayList<>(); + private final Map filesListenerMap = new HashMap<>(); + private final List catalogSnapshotDeleteListeners = new ArrayList<>(); + private final Map> searchExecEngineMap = new HashMap<>(); + + public DataFormatRegistry(List searchPlugins, ShardPath shardPath) throws IOException { + for (SearchAnalyticsBackEndPlugin plugin : searchPlugins) { + for (DataFormat dataFormat : plugin.getSupportedFormats()) { + SearchExecEngine engine = plugin.create(shardPath, dataFormat); + EngineReaderManager readerManager = engine.getReaderManager(); + catalogSnapshotAwareRefreshListeners.add(readerManager); + filesListenerMap.put(dataFormat, readerManager); + catalogSnapshotDeleteListeners.add(readerManager); + searchExecEngineMap.put(dataFormat, engine); + } + } + } + + public List getCatalogSnapshotAwareRefreshListeners() { + return catalogSnapshotAwareRefreshListeners; + } + + public List getCatalogSnapshotDeleteListeners() { + return catalogSnapshotDeleteListeners; + } + + public Map getFilesListenerMap() { + return filesListenerMap; + } + + public SearchExecEngine getSearchExecEngine(DataFormat dataFormat) { + return searchExecEngineMap.get(dataFormat); + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/DataFormatRegistryFactory.java b/server/src/main/java/org/opensearch/index/engine/exec/DataFormatRegistryFactory.java new file mode 100644 index 0000000000000..4dd601f6fa40b --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/DataFormatRegistryFactory.java @@ -0,0 +1,36 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec; + +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.shard.ShardPath; +import org.opensearch.plugins.SearchAnalyticsBackEndPlugin; + +import java.io.IOException; +import java.util.List; + +@ExperimentalApi +public class DataFormatRegistryFactory { + private final List searchPlugins; + + public DataFormatRegistryFactory(List searchPlugins) { + this.searchPlugins = searchPlugins; + } + + /** + * Called at shard creation time when ShardPath is available. + */ + public DataFormatRegistry create(ShardPath shardPath) throws IOException { + return new DataFormatRegistry(searchPlugins, shardPath); + } + + public boolean hasPlugins() { + return !searchPlugins.isEmpty(); + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/EngineReaderManager.java b/server/src/main/java/org/opensearch/index/engine/exec/EngineReaderManager.java new file mode 100644 index 0000000000000..c3a6d94c29faf --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/EngineReaderManager.java @@ -0,0 +1,27 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec; + +import org.opensearch.common.annotation.ExperimentalApi; + +import java.io.IOException; + +/** + * Engine-agnostic reader manager. + *

+ * For Lucene, wraps {@code ReferenceManager}. + * For pluggable engines, wraps the engine-specific reader lifecycle. + * + * @param the reader type managed by this instance + * @opensearch.experimental + */ +@ExperimentalApi +public interface EngineReaderManager extends CatalogSnapshotAwareRefreshListener, FilesListener, CatalogSnapshotDeleteListener { + T getReader(CatalogSnapshot catalogSnapshot) throws IOException; +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/EngineSearcher.java b/server/src/main/java/org/opensearch/index/engine/exec/EngineSearcher.java new file mode 100644 index 0000000000000..bc5385d180bbb --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/EngineSearcher.java @@ -0,0 +1,35 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec; + +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.common.lease.Releasable; +import org.opensearch.search.SearchExecutionContext; + +import java.io.IOException; + +/** + * Engine-agnostic searcher interface. + *

+ * Each engine implementation provides its own searcher that knows how to + * execute queries against its reader. The searcher is acquired from + * {@link SearchExecEngine} and used to execute searches against a + * point-in-time snapshot. + * + * @param the context type this searcher operates on + * @opensearch.experimental + */ +@ExperimentalApi +public interface EngineSearcher extends Releasable { + + /** + * Execute a search using this searcher, populating results on the context. + */ + void search(C context) throws IOException; +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/FileMetadata.java b/server/src/main/java/org/opensearch/index/engine/exec/FileMetadata.java new file mode 100644 index 0000000000000..71b85e0c2a4c6 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/FileMetadata.java @@ -0,0 +1,106 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec; + +import org.opensearch.common.annotation.ExperimentalApi; + +import java.util.Objects; + +/** + * Represents metadata for a file in the index, including its data format and filename. + * Files can be in different formats (e.g., "lucene", "metadata") and this class provides + * a unified way to represent and serialize file information across the system. + * + * @opensearch.experimental + */ +@ExperimentalApi +public class FileMetadata { + + /** + * Delimiter used to separate filename and data format in serialized form. + */ + public static final String DELIMITER = ":::"; + private static final String METADATA_KEY = "metadata"; + + private final String file; + private final String dataFormat; + + /** + * Constructs a FileMetadata with explicit data format and filename. + * + * @param dataFormat the data format identifier (e.g., "lucene", "metadata") + * @param file the filename + */ + public FileMetadata(String dataFormat, String file) { + this.file = file; + this.dataFormat = dataFormat; + } + + /** + * Constructs a FileMetadata by parsing a serialized data-format-aware filename. + * The format is "filename:::dataFormat". If no delimiter is present and the filename + * starts with "metadata", it's treated as a metadata file. Otherwise, defaults to "lucene". + * + * @param dataFormatAwareFile the serialized filename with optional data format + */ + public FileMetadata(String dataFormatAwareFile) { + if (!dataFormatAwareFile.contains(DELIMITER) && dataFormatAwareFile.startsWith(METADATA_KEY)) { + this.dataFormat = "metadata"; + this.file = dataFormatAwareFile; + return; + } + String[] parts = dataFormatAwareFile.split(DELIMITER); + this.dataFormat = (parts.length == 1) ? "lucene" : parts[1]; + this.file = parts[0]; + } + + /** + * Serializes this FileMetadata to a string in the format "filename:::dataFormat". + * + * @return the serialized representation + */ + public String serialize() { + return file + DELIMITER + dataFormat; + } + + @Override + public String toString() { + return serialize(); + } + + /** + * Returns the filename. + * + * @return the filename + */ + public String file() { + return file; + } + + /** + * Returns the data format identifier. + * + * @return the data format (e.g., "lucene", "metadata") + */ + public String dataFormat() { + return dataFormat; + } + + @Override + public boolean equals(Object o) { + if (o == null || getClass() != o.getClass()) return false; + FileMetadata that = (FileMetadata) o; + return Objects.equals(file, that.file) && Objects.equals(dataFormat, that.dataFormat); + } + + @Override + public int hashCode() { + return Objects.hash(file, dataFormat); + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/FilesListener.java b/server/src/main/java/org/opensearch/index/engine/exec/FilesListener.java new file mode 100644 index 0000000000000..7c6b69acbe9cf --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/FilesListener.java @@ -0,0 +1,21 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec; + +import org.opensearch.common.annotation.ExperimentalApi; + +import java.io.IOException; +import java.util.Collection; + +@ExperimentalApi +public interface FilesListener { + void onFilesDeleted(Collection files) throws IOException; + + void onFilesAdded(Collection files) throws IOException; +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/IndexFileDeleter.java b/server/src/main/java/org/opensearch/index/engine/exec/IndexFileDeleter.java new file mode 100644 index 0000000000000..f26b5da62799f --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/IndexFileDeleter.java @@ -0,0 +1,123 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec; + +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.engine.CompositeEngine; +import org.opensearch.index.engine.dataformat.DataFormat; +import org.opensearch.index.shard.ShardPath; + +import java.io.IOException; +import java.util.Collection; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicInteger; + +@ExperimentalApi +public class IndexFileDeleter { + + private final Map> fileRefCounts = new ConcurrentHashMap<>(); + private final CompositeEngine compositeEngine; + + public IndexFileDeleter(CompositeEngine compositeEngine, CatalogSnapshot initialCatalogSnapshot, ShardPath shardPath) + throws IOException { + this.compositeEngine = compositeEngine; + if (initialCatalogSnapshot != null) { + addFileReferences(initialCatalogSnapshot); + deleteUnreferencedFiles(shardPath); + } + } + + public synchronized void addFileReferences(CatalogSnapshot snapshot) { + Map> dfSegregatedFiles = segregateFilesByFormat(snapshot); + Map> dfNewFiles = new HashMap<>(); + + for (Map.Entry> entry : dfSegregatedFiles.entrySet()) { + DataFormat dataFormat = entry.getKey(); + Collection newFiles = new HashSet<>(); + Map dfFileRefCounts = fileRefCounts.computeIfAbsent(dataFormat, k -> new HashMap<>()); + Collection files = entry.getValue(); + for (String file : files) { + AtomicInteger refCount = dfFileRefCounts.computeIfAbsent(file, k -> new AtomicInteger(0)); + if (refCount.incrementAndGet() == 1) { + // First reference — this file is new + newFiles.add(file); + } + } + if (!newFiles.isEmpty()) { + dfNewFiles.put(dataFormat, newFiles); + } + } + + if (!dfNewFiles.isEmpty()) { + notifyFilesAdded(dfNewFiles); + } + } + + public synchronized void removeFileReferences(CatalogSnapshot snapshot) { + Map> dfSegregatedFiles = segregateFilesByFormat(snapshot); + Map> dfFilesToDelete = new HashMap<>(); + + for (Map.Entry> entry : dfSegregatedFiles.entrySet()) { + DataFormat dataFormat = entry.getKey(); + Collection filesToDelete = new HashSet<>(); + Map dfFileRefCounts = fileRefCounts.get(dataFormat); + if (dfFileRefCounts != null) { + Collection files = entry.getValue(); + for (String file : files) { + AtomicInteger refCount = dfFileRefCounts.get(file); + if (refCount != null && refCount.decrementAndGet() == 0) { + dfFileRefCounts.remove(file); + filesToDelete.add(file); + } + } + } + if (!filesToDelete.isEmpty()) { + dfFilesToDelete.put(dataFormat, filesToDelete); + } + } + + if (!dfFilesToDelete.isEmpty()) { + notifyFilesDeleted(dfFilesToDelete); + } + } + + private void notifyFilesAdded(Map> dfNewFiles) { + try { + compositeEngine.notifyFilesAdded(dfNewFiles); + } catch (Exception e) { + System.err.println("Failed to notify new files: " + dfNewFiles + ", error: " + e.getMessage()); + } + } + + private void notifyFilesDeleted(Map> dfFilesToDelete) { + try { + compositeEngine.notifyDelete(dfFilesToDelete); + } catch (Exception e) { + System.err.println("Failed to delete unreferenced files: " + dfFilesToDelete + ", error: " + e.getMessage()); + } + } + + private Map> segregateFilesByFormat(CatalogSnapshot snapshot) { + Map> dfSegregatedFiles = new HashMap<>(); + // TODO + return dfSegregatedFiles; + } + + private void deleteUnreferencedFiles(ShardPath shardPath) throws IOException { + // TODO + } + + @Override + public String toString() { + return "IndexFileDeleter{fileRefCounts=" + fileRefCounts + "}"; + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/SearchExecEngine.java b/server/src/main/java/org/opensearch/index/engine/exec/SearchExecEngine.java new file mode 100644 index 0000000000000..d45f2f0d67b2f --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/SearchExecEngine.java @@ -0,0 +1,58 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec; + +import org.opensearch.action.search.SearchShardTask; +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.core.action.ActionListener; +import org.opensearch.search.SearchExecutionContext; +import org.opensearch.search.SearchShardTarget; +import org.opensearch.search.internal.ShardSearchRequest; + +import java.io.Closeable; +import java.io.IOException; +import java.util.Iterator; + +/** + * Shard-level search execution engine interface. + * + * @param the engine-specific context type + * @param the engine-native plan type (e.g. byte[] for substrait) + * @opensearch.experimental + */ +@ExperimentalApi +public interface SearchExecEngine extends Closeable { + + void execute(C context) throws IOException; + + default void execute(C context, ActionListener listener) { + try { + execute(context); + listener.onResponse(context); + } catch (Exception e) { + listener.onFailure(e); + } + } + + C createContext(CatalogSnapshot snapshot, ShardSearchRequest request, SearchShardTarget shardTarget, SearchShardTask task) + throws IOException; + + default T convertFragment(Object fragment) { + throw new UnsupportedOperationException("convertFragment not supported by " + getClass().getSimpleName()); + } + + default Iterator executePlan(T plan, C context) { + throw new UnsupportedOperationException("executePlan not supported by " + getClass().getSimpleName()); + } + + @Override + default void close() throws IOException {} + + EngineReaderManager getReaderManager(); +} diff --git a/server/src/main/java/org/opensearch/index/shard/IndexShard.java b/server/src/main/java/org/opensearch/index/shard/IndexShard.java index 1c155c897acba..fa8fb18939e7b 100644 --- a/server/src/main/java/org/opensearch/index/shard/IndexShard.java +++ b/server/src/main/java/org/opensearch/index/shard/IndexShard.java @@ -129,6 +129,7 @@ import org.opensearch.index.cache.request.ShardRequestCache; import org.opensearch.index.codec.CodecService; import org.opensearch.index.engine.CommitStats; +import org.opensearch.index.engine.CompositeEngine; import org.opensearch.index.engine.Engine; import org.opensearch.index.engine.Engine.GetResult; import org.opensearch.index.engine.EngineBackedIndexer; @@ -144,6 +145,7 @@ import org.opensearch.index.engine.SafeCommitInfo; import org.opensearch.index.engine.Segment; import org.opensearch.index.engine.SegmentsStats; +import org.opensearch.index.engine.exec.DataFormatRegistry; import org.opensearch.index.engine.exec.Indexer; import org.opensearch.index.fielddata.FieldDataStats; import org.opensearch.index.fielddata.ShardFieldData; @@ -316,6 +318,7 @@ public class IndexShard extends AbstractIndexShardComponent implements IndicesCl private volatile long pendingPrimaryTerm; // see JavaDocs for getPendingPrimaryTerm private final Object engineMutex = new Object(); // lock ordering: engineMutex -> mutex private final AtomicReference currentEngineReference = new AtomicReference<>(); + private final AtomicReference currentCompositeEngineReference = new AtomicReference<>(); final EngineFactory engineFactory; final EngineConfigFactory engineConfigFactory; @@ -404,6 +407,8 @@ Runnable getGlobalCheckpointSyncer() { // Used to limit the number of concurrent translog tasks. When the semaphore is exhausted, serial recovery is used. private static final Semaphore translogConcurrentRecoverySemaphore = new Semaphore(1000); + private final DataFormatRegistry dataFormatRegistry; + @InternalApi public IndexShard( final ShardRouting shardRouting, @@ -443,7 +448,8 @@ public IndexShard( final Object refreshMutex, final ClusterApplierService clusterApplierService, @Nullable final MergedSegmentPublisher mergedSegmentPublisher, - @Nullable final ReferencedSegmentsPublisher referencedSegmentsPublisher + @Nullable final ReferencedSegmentsPublisher referencedSegmentsPublisher, + @Nullable final DataFormatRegistry dataFormatRegistry ) throws IOException { super(shardRouting.shardId(), indexSettings); assert shardRouting.initializing(); @@ -569,6 +575,8 @@ public boolean shouldCache(Query query) { startRefreshTask(); } } + this.dataFormatRegistry = dataFormatRegistry; + CompositeEngine engine = new CompositeEngine(dataFormatRegistry, path); // TODO : just a placeholder } /** @@ -2204,6 +2212,20 @@ public Engine.Searcher acquireSearcher(String source) { return acquireSearcher(source, Engine.SearcherScope.EXTERNAL); } + /** + * Returns the current CompositeEngine, or null if no optimized index is active. + */ + public CompositeEngine getCompositeEngine() { + return currentCompositeEngineReference.get(); + } + + /** + * Sets the CompositeEngine for this shard (called during shard initialization for optimized indexes). + */ + public void setCompositeEngine(CompositeEngine compositeEngine) { + currentCompositeEngineReference.set(compositeEngine); + } + private void markSearcherAccessed() { lastSearcherAccess.lazySet(threadPool.relativeTimeInMillis()); } diff --git a/server/src/main/java/org/opensearch/indices/IndicesService.java b/server/src/main/java/org/opensearch/indices/IndicesService.java index 16229f12c60a8..d623e88bb9b55 100644 --- a/server/src/main/java/org/opensearch/indices/IndicesService.java +++ b/server/src/main/java/org/opensearch/indices/IndicesService.java @@ -123,6 +123,7 @@ import org.opensearch.index.engine.NRTReplicationEngineFactory; import org.opensearch.index.engine.NoOpEngine; import org.opensearch.index.engine.ReadOnlyEngine; +import org.opensearch.index.engine.exec.DataFormatRegistry; import org.opensearch.index.fielddata.IndexFieldDataCache; import org.opensearch.index.flush.FlushStats; import org.opensearch.index.get.GetStats; @@ -146,6 +147,7 @@ import org.opensearch.index.shard.IndexShardState; import org.opensearch.index.shard.IndexingOperationListener; import org.opensearch.index.shard.IndexingStats; +import org.opensearch.index.shard.ShardPath; import org.opensearch.index.store.remote.filecache.FileCache; import org.opensearch.index.translog.InternalTranslogFactory; import org.opensearch.index.translog.RemoteBlobStoreInternalTranslogFactory; @@ -169,6 +171,7 @@ import org.opensearch.node.remotestore.RemoteStoreNodeAttribute; import org.opensearch.plugins.IndexStorePlugin; import org.opensearch.plugins.PluginsService; +import org.opensearch.plugins.SearchAnalyticsBackEndPlugin; import org.opensearch.repositories.RepositoriesService; import org.opensearch.script.ScriptService; import org.opensearch.search.aggregations.support.ValuesSourceRegistry; @@ -424,6 +427,7 @@ public class IndicesService extends AbstractLifecycleComponent private volatile int defaultMaxMergeAtOnce; private final StatusCounterStats statusCounterStats; private final ClusterMergeSchedulerConfig clusterMergeSchedulerConfig; + private final CheckedFunction dataFormatRegistrySupplier; @Override protected void doStart() { @@ -609,6 +613,10 @@ protected void closeInternal() { MergeSchedulerConfig.CLUSTER_MAX_FORCE_MERGE_MB_PER_SEC_SETTING, this::onClusterLevelForceMergeMBPerSecUpdate ); + this.dataFormatRegistrySupplier = (shardPath) -> new DataFormatRegistry( + pluginsService.filterPlugins(SearchAnalyticsBackEndPlugin.class), + shardPath + ); } @InternalApi @@ -1109,6 +1117,7 @@ private synchronized IndexService createIndexService( for (IndexEventListener listener : builtInListeners) { indexModule.addIndexEventListener(listener); } + return indexModule.newIndexService( indexCreationContext, nodeEnv, @@ -1136,7 +1145,8 @@ private synchronized IndexService createIndexService( replicator, segmentReplicationStatsProvider, this::getClusterDefaultMaxMergeAtOnce, - clusterMergeSchedulerConfig + clusterMergeSchedulerConfig, + dataFormatRegistrySupplier ); } diff --git a/server/src/main/java/org/opensearch/plugins/SearchAnalyticsBackEndPlugin.java b/server/src/main/java/org/opensearch/plugins/SearchAnalyticsBackEndPlugin.java new file mode 100644 index 0000000000000..f71d65f68a1c0 --- /dev/null +++ b/server/src/main/java/org/opensearch/plugins/SearchAnalyticsBackEndPlugin.java @@ -0,0 +1,29 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.plugins; + +import org.opensearch.index.engine.dataformat.DataFormat; +import org.opensearch.index.engine.exec.SearchExecEngine; +import org.opensearch.index.shard.ShardPath; + +import java.io.IOException; +import java.util.List; + +/** + * SPI extension point for back-end query engines (DataFusion, Lucene, etc.). + * @opensearch.internal + */ +public interface SearchAnalyticsBackEndPlugin { + String name(); + + SearchExecEngine create(ShardPath shardPath, DataFormat dataFormat) throws IOException;; + + List getSupportedFormats(); +} + diff --git a/server/src/main/java/org/opensearch/search/SearchExecutionContext.java b/server/src/main/java/org/opensearch/search/SearchExecutionContext.java new file mode 100644 index 0000000000000..bd9d3859bb254 --- /dev/null +++ b/server/src/main/java/org/opensearch/search/SearchExecutionContext.java @@ -0,0 +1,47 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.search; + +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.engine.exec.CatalogSnapshot; +import org.opensearch.index.engine.exec.SearchExecEngine; +import org.opensearch.search.internal.ShardSearchRequest; + +import java.io.Closeable; + +/** + * Engine-agnostic search execution context. + *

+ * This is the minimal contract between {@link SearchExecEngine} + * and the transport/coordination layer ({@code SearchService}). + *

+ * Contains only what callers actually need: request, results, pagination, and lifecycle. + * Engine-specific state (Lucene query, DF substrait plan, searcher, etc.) lives in + * the engine's own context subtype. + *

+ * {@link org.opensearch.search.internal.SearchContext} extends this to add Lucene-specific + * methods for backward compatibility. + * + * @opensearch.experimental + */ +@ExperimentalApi +public interface SearchExecutionContext extends Closeable { + + CatalogSnapshot catalogSnapshot(); + + /** + * The shard-level search request. + */ + ShardSearchRequest request(); + + /** + * The shard this search targets. + */ + SearchShardTarget shardTarget(); +} diff --git a/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestCase.java b/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestCase.java index 7e236cf911060..5c85762448adb 100644 --- a/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestCase.java +++ b/test/framework/src/main/java/org/opensearch/index/shard/IndexShardTestCase.java @@ -805,7 +805,8 @@ protected IndexShard newShard( new Object(), clusterService.getClusterApplierService(), mergedSegmentPublisher, - ReferencedSegmentsPublisher.EMPTY + ReferencedSegmentsPublisher.EMPTY, + null // TODO ); indexShard.addShardFailureCallback(DEFAULT_SHARD_FAILURE_HANDLER); if (remoteStoreStatsTrackerFactory != null) { From 6575f27a2618df3e1e6c426b1e28fc19b2755c4a Mon Sep 17 00:00:00 2001 From: bharath-techie Date: Tue, 17 Mar 2026 18:22:58 +0530 Subject: [PATCH 02/11] Moving reader manager out of engine and adding contexts + providers Signed-off-by: bharath-techie --- .../opensearch/common/CheckedTriFunction.java | 4 + .../be/datafusion/DataFusionPlugin.java | 10 +- .../be/datafusion/DatafusionContext.java | 13 +- .../DatafusionSearchExecEngine.java | 25 +-- .../be/lucene/LuceneIndexFilterContext.java | 99 ++++++++++++ .../be/lucene/LuceneIndexFilterProvider.java | 78 +++++++++ .../be/lucene/LuceneSearchContext.java | 19 +-- .../be/lucene/LuceneSearchEnginePlugin.java | 21 ++- .../be/lucene/LuceneSearchExecEngine.java | 48 +----- .../be/lucene/LuceneSourceContext.java | 49 ++++++ .../be/lucene/LuceneSourceProvider.java | 50 ++++++ .../org/opensearch/index/IndexModule.java | 4 +- .../org/opensearch/index/IndexService.java | 10 +- .../index/engine/CompositeEngine.java | 153 ++++++++++++------ .../index/engine/exec/DataFormatRegistry.java | 69 +++++--- .../exec/DataFormatRegistryFactory.java | 36 ----- .../index/engine/exec/IndexFilterContext.java | 25 +++ .../engine/exec/IndexFilterProvider.java | 31 ++++ .../index/engine/exec/SearchExecEngine.java | 14 +- .../index/engine/exec/SourceContext.java | 24 +++ .../index/engine/exec/SourceProvider.java | 29 ++++ .../opensearch/index/shard/IndexShard.java | 4 +- .../opensearch/indices/IndicesService.java | 10 +- .../plugins/SearchAnalyticsBackEndPlugin.java | 35 +++- .../search/SearchExecutionContext.java | 20 --- 25 files changed, 641 insertions(+), 239 deletions(-) create mode 100644 sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneIndexFilterContext.java create mode 100644 sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneIndexFilterProvider.java create mode 100644 sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSourceContext.java create mode 100644 sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSourceProvider.java delete mode 100644 server/src/main/java/org/opensearch/index/engine/exec/DataFormatRegistryFactory.java create mode 100644 server/src/main/java/org/opensearch/index/engine/exec/IndexFilterContext.java create mode 100644 server/src/main/java/org/opensearch/index/engine/exec/IndexFilterProvider.java create mode 100644 server/src/main/java/org/opensearch/index/engine/exec/SourceContext.java create mode 100644 server/src/main/java/org/opensearch/index/engine/exec/SourceProvider.java diff --git a/libs/common/src/main/java/org/opensearch/common/CheckedTriFunction.java b/libs/common/src/main/java/org/opensearch/common/CheckedTriFunction.java index 7898226b751f7..13fec0b45425f 100644 --- a/libs/common/src/main/java/org/opensearch/common/CheckedTriFunction.java +++ b/libs/common/src/main/java/org/opensearch/common/CheckedTriFunction.java @@ -8,11 +8,15 @@ package org.opensearch.common; +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.common.annotation.InternalApi; + /** * A {@link TriFunction}-like interface which allows throwing checked exceptions. * * @opensearch.internal */ +@ExperimentalApi @FunctionalInterface public interface CheckedTriFunction { R apply(S s, T t, U u) throws E; diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionPlugin.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionPlugin.java index 557a76cfa37e2..b5d7c57c4ab48 100644 --- a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionPlugin.java +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionPlugin.java @@ -22,6 +22,7 @@ import org.opensearch.env.Environment; import org.opensearch.env.NodeEnvironment; import org.opensearch.index.engine.dataformat.DataFormat; +import org.opensearch.index.engine.exec.EngineReaderManager; import org.opensearch.index.engine.exec.SearchExecEngine; import org.opensearch.index.shard.ShardPath; import org.opensearch.plugins.Plugin; @@ -112,11 +113,16 @@ public SqlOperatorTable operatorTable() { } @Override - public SearchExecEngine create(ShardPath shardPath, DataFormat dataFormat) throws IOException { + public EngineReaderManager createReaderManager(DataFormat format, ShardPath shardPath) throws IOException { + return new DatafusionReaderManager(format, shardPath); + } + + @Override + public SearchExecEngine createSearchExecEngine(DataFormat format, ShardPath shardPath) throws IOException { if (dataFusionService == null) { throw new IllegalStateException("DataFusionPlugin.createComponents() has not been called yet"); } - return new DatafusionSearchExecEngine(dataFusionService.getRuntimePointer(), dataFormat, shardPath); + return new DatafusionSearchExecEngine(dataFusionService.getRuntimePointer(), format); } /** diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionContext.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionContext.java index 1d165a394e3eb..04c2c849931ff 100644 --- a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionContext.java +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionContext.java @@ -10,7 +10,6 @@ import org.opensearch.common.annotation.ExperimentalApi; import org.opensearch.index.engine.IndexFilterTree; -import org.opensearch.index.engine.exec.CatalogSnapshot; import org.opensearch.search.SearchExecutionContext; import org.opensearch.search.SearchShardTarget; import org.opensearch.search.internal.ShardSearchRequest; @@ -31,25 +30,17 @@ public class DatafusionContext implements SearchExecutionContext { private final ShardSearchRequest request; private final SearchShardTarget shardTarget; private final DatafusionSearcher engineSearcher; - private final CatalogSnapshot catalogSnapshot; private DatafusionQuery datafusionQuery; private IndexFilterTree filterTree; public DatafusionContext( - CatalogSnapshot catalogSnapshot, ShardSearchRequest request, SearchShardTarget shardTarget, - DatafusionReaderManager readerManager + DatafusionReader reader ) throws IOException { - this.catalogSnapshot = catalogSnapshot; this.request = request; this.shardTarget = shardTarget; - this.engineSearcher = new DatafusionSearcher(readerManager.getReader(catalogSnapshot).getReaderPtr()); - } - - @Override - public CatalogSnapshot catalogSnapshot() { - return catalogSnapshot; + this.engineSearcher = new DatafusionSearcher(reader.getReaderPtr()); } @Override diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionSearchExecEngine.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionSearchExecEngine.java index b7bcb71937da8..b3d3b759f5069 100644 --- a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionSearchExecEngine.java +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionSearchExecEngine.java @@ -12,10 +12,7 @@ import org.opensearch.common.annotation.ExperimentalApi; import org.opensearch.index.engine.IndexFilterTree; import org.opensearch.index.engine.dataformat.DataFormat; -import org.opensearch.index.engine.exec.CatalogSnapshot; -import org.opensearch.index.engine.exec.EngineReaderManager; import org.opensearch.index.engine.exec.SearchExecEngine; -import org.opensearch.index.shard.ShardPath; import org.opensearch.search.SearchShardTarget; import org.opensearch.search.internal.ShardSearchRequest; @@ -25,24 +22,19 @@ import java.util.Iterator; /** - * DataFusion-backed {@link SearchExecEngine}. - * Plan type is {@code byte[]} (substrait bytes). + * DataFusion-backed search execution engine. * * @opensearch.experimental */ @ExperimentalApi public class DatafusionSearchExecEngine implements SearchExecEngine { - private final DatafusionReaderManager readerManager; private final long runtimePtr; - private long nextContextId; - public DatafusionSearchExecEngine(long runtimePtr, DataFormat dataFormat, ShardPath shardPath) { - readerManager = new DatafusionReaderManager(dataFormat, shardPath); + public DatafusionSearchExecEngine(long runtimePtr, DataFormat dataFormat) { this.runtimePtr = runtimePtr; } - // TODO : figure out stream return type similar to engine bridge @Override public void execute(DatafusionContext context) throws IOException { DatafusionSearcher searcher = context.getEngineSearcher(); @@ -56,17 +48,17 @@ public void execute(DatafusionContext context) throws IOException { @Override public DatafusionContext createContext( - CatalogSnapshot snapshot, + Object reader, ShardSearchRequest request, SearchShardTarget shardTarget, SearchShardTask task ) throws IOException { - return new DatafusionContext(snapshot, request, shardTarget, readerManager); + DatafusionReader dfReader = (DatafusionReader) reader; + return new DatafusionContext(request, shardTarget, dfReader); } @Override public byte[] convertFragment(Object fragment) { - // TODO: SubstraitConverter.toBytes((RelNode) fragment) throw new UnsupportedOperationException("Substrait conversion not yet wired"); } @@ -75,16 +67,9 @@ public Iterator executePlan(byte[] plan, DatafusionContext context) { try { context.setDatafusionQuery(new DatafusionQuery("", plan)); execute(context); - // TODO results return Collections.emptyIterator(); - // return results == null ? Collections.emptyIterator() : Collections.singleton(results).iterator(); } catch (IOException e) { throw new UncheckedIOException(e); } } - - @Override - public EngineReaderManager getReaderManager() { - return readerManager; - } } diff --git a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneIndexFilterContext.java b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneIndexFilterContext.java new file mode 100644 index 0000000000000..921b85c189048 --- /dev/null +++ b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneIndexFilterContext.java @@ -0,0 +1,99 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.be.lucene; + +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.ScoreMode; +import org.apache.lucene.search.Weight; +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.engine.exec.IndexFilterContext; + +import java.io.IOException; +import java.util.List; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicInteger; + +/** + * Lucene-specific index filter context. + *

+ * Holds the Weight (per-query), and manages per-segment scorers/collectors. + * One context per (query, reader) pair. + * + * @opensearch.experimental + */ +@ExperimentalApi +public class LuceneIndexFilterContext implements IndexFilterContext { + + private final Weight weight; + private final List leaves; + private final AtomicInteger nextCollectorId = new AtomicInteger(1); + private final Map collectors = new ConcurrentHashMap<>(); + + public LuceneIndexFilterContext(Query query, DirectoryReader reader) throws IOException { + IndexSearcher searcher = new IndexSearcher(reader); + Query rewritten = searcher.rewrite(query); + this.weight = searcher.createWeight(rewritten, ScoreMode.COMPLETE_NO_SCORES, 1.0f); + this.leaves = reader.leaves(); + } + + @Override + public int segmentCount() { + return leaves.size(); + } + + @Override + public int segmentMaxDoc(int segmentOrd) { + return leaves.get(segmentOrd).reader().maxDoc(); + } + + Weight getWeight() { + return weight; + } + + List getLeaves() { + return leaves; + } + + int registerCollector(DocIdSetIterator iterator, int minDoc, int maxDoc) { + int key = nextCollectorId.getAndIncrement(); + collectors.put(key, new CollectorState(iterator, minDoc, maxDoc)); + return key; + } + + CollectorState getCollector(int key) { + return collectors.get(key); + } + + void removeCollector(int key) { + collectors.remove(key); + } + + @Override + public void close() { + collectors.clear(); + } + + static class CollectorState { + final DocIdSetIterator iterator; + final int minDoc; + final int maxDoc; + int currentDoc = -1; + + CollectorState(DocIdSetIterator iterator, int minDoc, int maxDoc) { + this.iterator = iterator; + this.minDoc = minDoc; + this.maxDoc = maxDoc; + } + } +} diff --git a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneIndexFilterProvider.java b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneIndexFilterProvider.java new file mode 100644 index 0000000000000..ef70dc8306edd --- /dev/null +++ b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneIndexFilterProvider.java @@ -0,0 +1,78 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.be.lucene; + +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.Scorer; +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.engine.exec.IndexFilterProvider; + +import java.io.IOException; +import java.util.BitSet; + +/** + * Lucene-backed {@link IndexFilterProvider}. + * + * @opensearch.experimental + */ +@ExperimentalApi +public class LuceneIndexFilterProvider implements IndexFilterProvider { + + @Override + public LuceneIndexFilterContext createContext(Query query, Object reader) throws IOException { + return new LuceneIndexFilterContext(query, (DirectoryReader) reader); + } + + @Override + public int createCollector(LuceneIndexFilterContext context, int segmentOrd, int minDoc, int maxDoc) { + try { + Scorer scorer = context.getWeight().scorer(context.getLeaves().get(segmentOrd)); + if (scorer == null) return -1; + return context.registerCollector(scorer.iterator(), minDoc, maxDoc); + } catch (IOException e) { + return -1; + } + } + + @Override + public long[] collectDocs(LuceneIndexFilterContext context, int collectorKey, int minDoc, int maxDoc) { + LuceneIndexFilterContext.CollectorState state = context.getCollector(collectorKey); + if (state == null) return new long[0]; + + int effectiveMin = Math.max(minDoc, state.minDoc); + int effectiveMax = Math.min(maxDoc, state.maxDoc); + if (effectiveMin >= effectiveMax) return new long[0]; + + BitSet bitset = new BitSet(effectiveMax - effectiveMin); + try { + DocIdSetIterator iter = state.iterator; + int docId = state.currentDoc; + if (docId == DocIdSetIterator.NO_MORE_DOCS || docId >= state.maxDoc) return new long[0]; + if (docId < effectiveMin) docId = iter.advance(effectiveMin); + while (docId != DocIdSetIterator.NO_MORE_DOCS && docId < effectiveMax) { + bitset.set(docId - effectiveMin); + docId = iter.nextDoc(); + } + state.currentDoc = docId; + } catch (IOException e) { + return new long[0]; + } + return bitset.toLongArray(); + } + + @Override + public void releaseCollector(LuceneIndexFilterContext context, int collectorKey) { + context.removeCollector(collectorKey); + } + + @Override + public void close() {} +} diff --git a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchContext.java b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchContext.java index c9ee6b9c30b56..f2eb7ac98e0a4 100644 --- a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchContext.java +++ b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchContext.java @@ -12,7 +12,6 @@ import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.opensearch.common.annotation.ExperimentalApi; -import org.opensearch.index.engine.exec.CatalogSnapshot; import org.opensearch.search.SearchExecutionContext; import org.opensearch.search.SearchShardTarget; import org.opensearch.search.internal.ShardSearchRequest; @@ -31,7 +30,6 @@ @ExperimentalApi public class LuceneSearchContext implements SearchExecutionContext { - CatalogSnapshot catalogSnapshot; private final ShardSearchRequest request; private final SearchShardTarget shardTarget; @@ -44,14 +42,12 @@ public class LuceneSearchContext implements SearchExecutionContext { private int[] segmentMaxDocs; public LuceneSearchContext( - CatalogSnapshot catalogSnapshot, ShardSearchRequest request, SearchShardTarget shardTarget, - LuceneReaderManager readerManager + DirectoryReader reader ) throws IOException { - this.catalogSnapshot = catalogSnapshot; - reader = readerManager.getReader(catalogSnapshot); - IndexSearcher indexSearcher = new IndexSearcher(reader);// TODO : check if this is right + this.reader = reader; + IndexSearcher indexSearcher = new IndexSearcher(reader); searcher = new LuceneEngineSearcher(indexSearcher, reader); this.request = request; this.shardTarget = shardTarget; @@ -61,6 +57,10 @@ public Query getQuery() { return query; } + public DirectoryReader getReader() { + return reader; + } + public void setQuery(Query query) { this.query = query; } @@ -89,11 +89,6 @@ public void setSegmentMaxDocs(int[] segmentMaxDocs) { this.segmentMaxDocs = segmentMaxDocs; } - @Override - public CatalogSnapshot catalogSnapshot() { - return null; - } - @Override public ShardSearchRequest request() { return request; diff --git a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchEnginePlugin.java b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchEnginePlugin.java index b2e9ebe25c39c..19013b10cb3f1 100644 --- a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchEnginePlugin.java +++ b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchEnginePlugin.java @@ -13,7 +13,9 @@ import org.opensearch.analytics.spi.AnalyticsBackEndPlugin; import org.opensearch.common.annotation.ExperimentalApi; import org.opensearch.index.engine.dataformat.DataFormat; -import org.opensearch.index.engine.exec.SearchExecEngine; +import org.opensearch.index.engine.exec.EngineReaderManager; +import org.opensearch.index.engine.exec.IndexFilterProvider; +import org.opensearch.index.engine.exec.SourceProvider; import org.opensearch.index.shard.ShardPath; import org.opensearch.plugins.Plugin; @@ -21,7 +23,7 @@ import java.util.List; /** - * Plugin providing the Lucene-based search execution engine. + * Plugin providing Lucene as an index filter or source provider. * * @opensearch.experimental */ @@ -44,9 +46,18 @@ public SqlOperatorTable operatorTable() { } @Override - public SearchExecEngine create(ShardPath shardPath, DataFormat dataFormat) throws IOException { - // TODO: obtain ReferenceManager from the shard's InternalEngine - throw new UnsupportedOperationException("Lucene engine creation not yet wired to shard lifecycle"); + public EngineReaderManager createReaderManager(DataFormat format, ShardPath shardPath) throws IOException { + return new LuceneReaderManager(format); + } + + @Override + public IndexFilterProvider createIndexFilterProvider(DataFormat format, ShardPath shardPath) throws IOException { + return new LuceneIndexFilterProvider(); + } + + @Override + public SourceProvider createSourceProvider(DataFormat format, ShardPath shardPath) throws IOException { + return new LuceneSourceProvider(); } @Override diff --git a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchExecEngine.java b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchExecEngine.java index 55ab88663d5a2..a0b8e37656170 100644 --- a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchExecEngine.java +++ b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchExecEngine.java @@ -13,56 +13,23 @@ import org.apache.lucene.search.Query; import org.opensearch.action.search.SearchShardTask; import org.opensearch.common.annotation.ExperimentalApi; -import org.opensearch.index.engine.dataformat.DataFormat; -import org.opensearch.index.engine.dataformat.FieldTypeCapabilities; -import org.opensearch.index.engine.exec.CatalogSnapshot; -import org.opensearch.index.engine.exec.EngineReaderManager; import org.opensearch.index.engine.exec.SearchExecEngine; import org.opensearch.search.SearchShardTarget; import org.opensearch.search.internal.ShardSearchRequest; import java.io.IOException; -import java.util.Set; /** - * Lucene-backed {@link SearchExecEngine}. - * Plan type is {@link Query}. + * Lucene-backed search execution engine. * * @opensearch.experimental */ @ExperimentalApi public class LuceneSearchExecEngine implements SearchExecEngine { - private final LuceneReaderManager readerManager; - private long nextContextId; - - public LuceneSearchExecEngine() { - this.readerManager = new LuceneReaderManager(getLuceneDataFormat()); - } - - private static DataFormat getLuceneDataFormat() { - return new DataFormat() { - @Override - public String name() { - return "Lucene"; - } - - @Override - public long priority() { - return 0; - } - - @Override - public Set supportedFields() { - return Set.of(); - } - }; - } - - // TODO : replace this with filter provider/delegate methods @Override public void execute(LuceneSearchContext context) throws IOException { - DirectoryReader reader = readerManager.getReader(context.catalogSnapshot()); + DirectoryReader reader = context.getReader(); LuceneEngineSearcher searcher = new LuceneEngineSearcher(new IndexSearcher(reader), reader); try { searcher.search(context); @@ -73,25 +40,20 @@ public void execute(LuceneSearchContext context) throws IOException { @Override public LuceneSearchContext createContext( - CatalogSnapshot snapshot, + Object reader, ShardSearchRequest request, SearchShardTarget shardTarget, SearchShardTask task ) throws IOException { - return new LuceneSearchContext(snapshot, request, shardTarget, readerManager); + DirectoryReader directoryReader = (DirectoryReader) reader; + return new LuceneSearchContext(request, shardTarget, directoryReader); } @Override public Query convertFragment(Object fragment) { - // DQE passes a Lucene Query directly if (fragment instanceof Query) { return (Query) fragment; } throw new UnsupportedOperationException("Expected Lucene Query, got " + fragment.getClass().getSimpleName()); } - - @Override - public EngineReaderManager getReaderManager() { - return null; - } } diff --git a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSourceContext.java b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSourceContext.java new file mode 100644 index 0000000000000..bf495f4220fb5 --- /dev/null +++ b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSourceContext.java @@ -0,0 +1,49 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.be.lucene; + +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.search.IndexSearcher; +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.engine.exec.SourceContext; + +import java.io.IOException; + +/** + * @opensearch.experimental + */ +@ExperimentalApi +public class LuceneSourceContext implements SourceContext { + + private final Object query; + private final DirectoryReader reader; + private final IndexSearcher searcher; + + public LuceneSourceContext(Object query, DirectoryReader reader) { + this.query = query; + this.reader = reader; + this.searcher = new IndexSearcher(reader); + } + + @Override + public Object query() { + return query; + } + + public DirectoryReader getReader() { + return reader; + } + + public IndexSearcher getSearcher() { + return searcher; + } + + @Override + public void close() throws IOException {} +} diff --git a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSourceProvider.java b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSourceProvider.java new file mode 100644 index 0000000000000..72a4a95083548 --- /dev/null +++ b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSourceProvider.java @@ -0,0 +1,50 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.be.lucene; + +import org.apache.lucene.index.DirectoryReader; +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.engine.exec.SourceProvider; + +import java.io.IOException; + +/** + * Lucene-backed {@link SourceProvider}. + *

+ * Executes the full query+scan+filter in Lucene and streams back + * projections/aggregation results to the primary engine (DataFusion). + *

+ * Used when all queried fields are Lucene-indexed and Lucene can + * fully resolve the query more efficiently than scanning parquet. + * + * @opensearch.experimental + */ +@ExperimentalApi +public class LuceneSourceProvider implements SourceProvider { + + @Override + public LuceneSourceContext createContext(Object query, Object reader) throws IOException { + return new LuceneSourceContext(query, (DirectoryReader) reader); + } + + @Override + public Object execute(LuceneSourceContext context) throws IOException { + // TODO: execute query via context.getSearcher(), collect results, return stream handle + throw new UnsupportedOperationException("Lucene source execution not yet implemented"); + } + + @Override + public Object next(LuceneSourceContext context, Object stream) throws IOException { + // TODO: pull next batch (Arrow VectorSchemaRoot) from stream + throw new UnsupportedOperationException("Lucene source streaming not yet implemented"); + } + + @Override + public void close() {} +} diff --git a/server/src/main/java/org/opensearch/index/IndexModule.java b/server/src/main/java/org/opensearch/index/IndexModule.java index 0c149dbb195a6..affda10e91ff4 100644 --- a/server/src/main/java/org/opensearch/index/IndexModule.java +++ b/server/src/main/java/org/opensearch/index/IndexModule.java @@ -46,6 +46,7 @@ import org.opensearch.cluster.routing.ShardRouting; import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.CheckedFunction; +import org.opensearch.common.CheckedTriFunction; import org.opensearch.common.SetOnce; import org.opensearch.common.TriFunction; import org.opensearch.common.annotation.ExperimentalApi; @@ -75,6 +76,7 @@ import org.opensearch.index.engine.EngineConfigFactory; import org.opensearch.index.engine.EngineFactory; import org.opensearch.index.engine.exec.DataFormatRegistry; +import org.opensearch.index.IndexSettings; import org.opensearch.index.mapper.MapperService; import org.opensearch.index.shard.IndexEventListener; import org.opensearch.index.shard.IndexShard; @@ -743,7 +745,7 @@ public IndexService newIndexService( Function segmentReplicationStatsProvider, Supplier clusterDefaultMaxMergeAtOnceSupplier, ClusterMergeSchedulerConfig clusterMergeSchedulerConfig, - CheckedFunction dataFormatRegistrySupplier + CheckedTriFunction dataFormatRegistrySupplier ) throws IOException { final IndexEventListener eventListener = freeze(); Function> readerWrapperFactory = indexReaderWrapper diff --git a/server/src/main/java/org/opensearch/index/IndexService.java b/server/src/main/java/org/opensearch/index/IndexService.java index f8c05e4999fa3..77d1f4adeb39a 100644 --- a/server/src/main/java/org/opensearch/index/IndexService.java +++ b/server/src/main/java/org/opensearch/index/IndexService.java @@ -47,6 +47,7 @@ import org.opensearch.cluster.routing.ShardRouting; import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.CheckedFunction; +import org.opensearch.common.CheckedTriFunction; import org.opensearch.common.Nullable; import org.opensearch.common.annotation.InternalApi; import org.opensearch.common.annotation.PublicApi; @@ -210,7 +211,8 @@ public class IndexService extends AbstractIndexComponent implements IndicesClust private volatile TimeValue refreshInterval; private volatile boolean shardLevelRefreshEnabled; private final IndexStorePlugin.StoreFactory storeFactory; - private final CheckedFunction dataFormatRegistrySupplier; + private final CheckedTriFunction + dataFormatRegistrySupplier; @InternalApi public IndexService( @@ -258,7 +260,7 @@ public IndexService( Function segmentReplicationStatsProvider, Supplier clusterDefaultMaxMergeAtOnceSupplier, ClusterMergeSchedulerConfig clusterMergeSchedulerConfig, - CheckedFunction dataFormatRegistrySupplier + CheckedTriFunction dataFormatRegistrySupplier ) { super(indexSettings); this.storeFactory = storeFactory; @@ -780,7 +782,9 @@ protected void closeInternal() { directoryFactory ); eventListener.onStoreCreated(shardId); - DataFormatRegistry dataFormatRegistry = dataFormatRegistrySupplier.apply(path); + DataFormatRegistry dataFormatRegistry = dataFormatRegistrySupplier != null + ? dataFormatRegistrySupplier.apply(path, mapperService, this.indexSettings) + : null; indexShard = new IndexShard( routing, this.indexSettings, diff --git a/server/src/main/java/org/opensearch/index/engine/CompositeEngine.java b/server/src/main/java/org/opensearch/index/engine/CompositeEngine.java index aae3bd5d1e9ea..7a6bc3cfccdb0 100644 --- a/server/src/main/java/org/opensearch/index/engine/CompositeEngine.java +++ b/server/src/main/java/org/opensearch/index/engine/CompositeEngine.java @@ -10,96 +10,145 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; -import org.apache.lucene.search.ReferenceManager; -import org.opensearch.common.TriConsumer; import org.opensearch.common.annotation.ExperimentalApi; import org.opensearch.index.engine.dataformat.DataFormat; import org.opensearch.index.engine.exec.CatalogSnapshot; import org.opensearch.index.engine.exec.CatalogSnapshotAwareRefreshListener; import org.opensearch.index.engine.exec.CatalogSnapshotDeleteListener; import org.opensearch.index.engine.exec.DataFormatRegistry; +import org.opensearch.index.engine.exec.EngineReaderManager; import org.opensearch.index.engine.exec.FilesListener; -import org.opensearch.index.shard.ShardPath; +import org.opensearch.index.engine.exec.IndexFilterProvider; +import org.opensearch.index.engine.exec.SearchExecEngine; +import org.opensearch.index.engine.exec.SourceProvider; import java.io.Closeable; import java.io.IOException; +import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.Map; -import java.util.function.Consumer; -import java.util.function.Supplier; +import java.util.concurrent.ConcurrentHashMap; +/** + * Owns all reader managers, lazily creates search engines per each shard and index filter providers. + * This stands as a bridge for reads/writes. Initializes engines and providers only relevant to the + * index settings and mappings. + * + * @opensearch.experimental + */ @ExperimentalApi public class CompositeEngine implements Closeable { private static final Logger logger = LogManager.getLogger(CompositeEngine.class); - private final Map fileListeners; - private final List catalogSnapshotAwareRefreshListeners; - private final List deleteSnapshotListeners; - private static final TriConsumer< - Supplier>, - CatalogSnapshotAwareRefreshListener, - Boolean> POST_REFRESH_CATALOG_SNAPSHOT_AWARE_LISTENER_CONSUMER = ( - catalogSnapshot, - catalogSnapshotAwareRefreshListener, - didRefresh) -> { - try { - // Wrap in Supplier as required by CatalogSnapshotAwareRefreshListener interface - catalogSnapshotAwareRefreshListener.afterRefresh(didRefresh, catalogSnapshot.get().getRef()); - } catch (IOException e) { - throw new RuntimeException(e); + + private final Map> readerManagers; + private final DataFormatRegistry dataFormatRegistry; + private final Map> searchEngines = new ConcurrentHashMap<>(); + private final Map> indexFilterProviders = new ConcurrentHashMap<>(); + private final Map> sourceProviders = new ConcurrentHashMap<>(); + + public CompositeEngine(DataFormatRegistry dataFormatRegistry) { + this.dataFormatRegistry = dataFormatRegistry; + this.readerManagers = dataFormatRegistry.getReaderManagers(); + } + + public EngineReaderManager getReaderManager(DataFormat format) { + return readerManagers.get(format); + } + + public SearchExecEngine getSearchExecEngine(DataFormat format) throws IOException { + SearchExecEngine engine = searchEngines.get(format); + if (engine != null) { + return engine; + } + synchronized (searchEngines) { + engine = searchEngines.get(format); + if (engine == null) { + engine = dataFormatRegistry.createSearchExecEngine(format); + searchEngines.put(format, engine); } - }; - private static final Consumer POST_REFRESH_LISTENER_CONSUMER = refreshListener -> { - try { - refreshListener.afterRefresh(true); - } catch (IOException e) { - throw new RuntimeException(e); + return engine; } - }; + } - public CompositeEngine(DataFormatRegistry dataFormatRegistry, ShardPath shardPath) throws IOException { - fileListeners = dataFormatRegistry.getFilesListenerMap(); - deleteSnapshotListeners = dataFormatRegistry.getCatalogSnapshotDeleteListeners(); - catalogSnapshotAwareRefreshListeners = dataFormatRegistry.getCatalogSnapshotAwareRefreshListeners(); + public IndexFilterProvider getIndexFilterProvider(DataFormat format) throws IOException { + IndexFilterProvider provider = indexFilterProviders.get(format); + if (provider != null) { + return provider; + } + synchronized (indexFilterProviders) { + provider = indexFilterProviders.get(format); + if (provider == null) { + provider = dataFormatRegistry.createIndexFilterProvider(format); + indexFilterProviders.put(format, provider); + } + return provider; + } } - @Override - public void close() throws IOException { + public SourceProvider getSourceProvider(DataFormat format) throws IOException { + SourceProvider sp = sourceProviders.get(format); + if (sp != null) { + return sp; + } + synchronized (sourceProviders) { + sp = sourceProviders.get(format); + if (sp == null) { + sp = dataFormatRegistry.createSourceProvider(format); + sourceProviders.put(format, sp); + } + return sp; + } + } + public List getCatalogSnapshotAwareRefreshListeners() { + return new ArrayList<>(readerManagers.values()); } - public void notifyDelete(Map> dfFilesToDelete) throws IOException { - for (DataFormat format : fileListeners.keySet()) { - fileListeners.get(format).onFilesDeleted(dfFilesToDelete.get(format)); - } + public List getCatalogSnapshotDeleteListeners() { + return new ArrayList<>(readerManagers.values()); } public void notifyFilesAdded(Map> dfNewFiles) throws IOException { - for (DataFormat format : fileListeners.keySet()) { - fileListeners.get(format).onFilesAdded(dfNewFiles.get(format)); + for (Map.Entry> entry : dfNewFiles.entrySet()) { + FilesListener listener = readerManagers.get(entry.getKey()); + if (listener != null) { + listener.onFilesAdded(entry.getValue()); + } } } - public void notifyCatalogSnapshotDelete(CatalogSnapshot catalogSnapshot) throws IOException { - for (CatalogSnapshotDeleteListener listener : deleteSnapshotListeners) { - listener.onDeleted(catalogSnapshot); + public void notifyDelete(Map> dfFilesToDelete) throws IOException { + for (Map.Entry> entry : dfFilesToDelete.entrySet()) { + FilesListener listener = readerManagers.get(entry.getKey()); + if (listener != null) { + listener.onFilesDeleted(entry.getValue()); + } } } - private void invokeRefreshListeners(boolean didRefresh) { - catalogSnapshotAwareRefreshListeners.forEach( - refreshListener -> POST_REFRESH_CATALOG_SNAPSHOT_AWARE_LISTENER_CONSUMER.apply( - this::acquireSnapshot, - refreshListener, - didRefresh - ) - ); - + public void notifyCatalogSnapshotDelete(CatalogSnapshot catalogSnapshot) throws IOException { + for (EngineReaderManager rm : readerManagers.values()) { + rm.onDeleted(catalogSnapshot); + } } public ReleasableRef acquireSnapshot() { - return null;// TODO : return this.catalogSnapshotManager.acquireSnapshot(); + return null; // TODO : return this.catalogSnapshotManager.acquireSnapshot(); + } + + @Override + public void close() throws IOException { + for (SearchExecEngine engine : searchEngines.values()) { + engine.close(); + } + for (IndexFilterProvider provider : indexFilterProviders.values()) { + provider.close(); + } + for (SourceProvider sp : sourceProviders.values()) { + sp.close(); + } } @ExperimentalApi diff --git a/server/src/main/java/org/opensearch/index/engine/exec/DataFormatRegistry.java b/server/src/main/java/org/opensearch/index/engine/exec/DataFormatRegistry.java index bead8e2036a1b..c09735083f29c 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/DataFormatRegistry.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/DataFormatRegistry.java @@ -8,55 +8,80 @@ package org.opensearch.index.engine.exec; +import org.opensearch.common.CheckedFunction; import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.IndexSettings; import org.opensearch.index.engine.dataformat.DataFormat; +import org.opensearch.index.mapper.MapperService; +import org.opensearch.index.engine.exec.SourceProvider; import org.opensearch.index.shard.ShardPath; import org.opensearch.plugins.SearchAnalyticsBackEndPlugin; import java.io.IOException; -import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; /** - * Registry of data format SPIs from associated plugins + * Registry of reader managers, search engine factories, and index filter provider factories per data format. + *

+ * Accepts {@link MapperService} and {@link IndexSettings} to determine which + * formats are relevant for the index. * * @opensearch.experimental */ @ExperimentalApi public class DataFormatRegistry { - private final List catalogSnapshotAwareRefreshListeners = new ArrayList<>(); - private final Map filesListenerMap = new HashMap<>(); - private final List catalogSnapshotDeleteListeners = new ArrayList<>(); - private final Map> searchExecEngineMap = new HashMap<>(); - public DataFormatRegistry(List searchPlugins, ShardPath shardPath) throws IOException { + private final Map> readerManagers = new HashMap<>(); + private final Map, IOException>> engineFactories = new HashMap<>(); + private final Map, IOException>> indexFilterProviderFactories = + new HashMap<>(); + private final Map, IOException>> sourceProviderFactories = + new HashMap<>(); + + public DataFormatRegistry( + List searchPlugins, + ShardPath shardPath, + MapperService mapperService, + IndexSettings indexSettings + ) throws IOException { for (SearchAnalyticsBackEndPlugin plugin : searchPlugins) { - for (DataFormat dataFormat : plugin.getSupportedFormats()) { - SearchExecEngine engine = plugin.create(shardPath, dataFormat); - EngineReaderManager readerManager = engine.getReaderManager(); - catalogSnapshotAwareRefreshListeners.add(readerManager); - filesListenerMap.put(dataFormat, readerManager); - catalogSnapshotDeleteListeners.add(readerManager); - searchExecEngineMap.put(dataFormat, engine); + for (DataFormat format : plugin.getSupportedFormats()) { + // TODO: use mapperService and indexSettings to filter formats relevant to this index + readerManagers.put(format, plugin.createReaderManager(format, shardPath)); + engineFactories.put(format, f -> plugin.createSearchExecEngine(f, shardPath)); + indexFilterProviderFactories.put(format, f -> plugin.createIndexFilterProvider(f, shardPath)); + sourceProviderFactories.put(format, f -> plugin.createSourceProvider(f, shardPath)); } } } - public List getCatalogSnapshotAwareRefreshListeners() { - return catalogSnapshotAwareRefreshListeners; + public Map> getReaderManagers() { + return readerManagers; } - public List getCatalogSnapshotDeleteListeners() { - return catalogSnapshotDeleteListeners; + public SearchExecEngine createSearchExecEngine(DataFormat format) throws IOException { + CheckedFunction, IOException> factory = engineFactories.get(format); + if (factory == null) { + throw new IllegalArgumentException("No plugin registered for format: " + format.name()); + } + return factory.apply(format); } - public Map getFilesListenerMap() { - return filesListenerMap; + public IndexFilterProvider createIndexFilterProvider(DataFormat format) throws IOException { + CheckedFunction, IOException> factory = indexFilterProviderFactories.get(format); + if (factory == null) { + throw new IllegalArgumentException("No index filter provider for format: " + format.name()); + } + return factory.apply(format); } - public SearchExecEngine getSearchExecEngine(DataFormat dataFormat) { - return searchExecEngineMap.get(dataFormat); + public SourceProvider createSourceProvider(DataFormat format) throws IOException { + CheckedFunction, IOException> factory = sourceProviderFactories.get(format); + if (factory == null) { + throw new IllegalArgumentException("No source provider for format: " + format.name()); + } + return factory.apply(format); } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/DataFormatRegistryFactory.java b/server/src/main/java/org/opensearch/index/engine/exec/DataFormatRegistryFactory.java deleted file mode 100644 index 4dd601f6fa40b..0000000000000 --- a/server/src/main/java/org/opensearch/index/engine/exec/DataFormatRegistryFactory.java +++ /dev/null @@ -1,36 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -package org.opensearch.index.engine.exec; - -import org.opensearch.common.annotation.ExperimentalApi; -import org.opensearch.index.shard.ShardPath; -import org.opensearch.plugins.SearchAnalyticsBackEndPlugin; - -import java.io.IOException; -import java.util.List; - -@ExperimentalApi -public class DataFormatRegistryFactory { - private final List searchPlugins; - - public DataFormatRegistryFactory(List searchPlugins) { - this.searchPlugins = searchPlugins; - } - - /** - * Called at shard creation time when ShardPath is available. - */ - public DataFormatRegistry create(ShardPath shardPath) throws IOException { - return new DataFormatRegistry(searchPlugins, shardPath); - } - - public boolean hasPlugins() { - return !searchPlugins.isEmpty(); - } -} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/IndexFilterContext.java b/server/src/main/java/org/opensearch/index/engine/exec/IndexFilterContext.java new file mode 100644 index 0000000000000..f27a27192be9d --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/IndexFilterContext.java @@ -0,0 +1,25 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec; + +import org.opensearch.common.annotation.ExperimentalApi; + +import java.io.Closeable; +import java.io.IOException; + +/** + * @opensearch.experimental + */ +@ExperimentalApi +public interface IndexFilterContext extends Closeable { + + int segmentCount(); + + int segmentMaxDoc(int segmentOrd); +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/IndexFilterProvider.java b/server/src/main/java/org/opensearch/index/engine/exec/IndexFilterProvider.java new file mode 100644 index 0000000000000..3e95b7fa7dd16 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/IndexFilterProvider.java @@ -0,0 +1,31 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec; + +import org.opensearch.common.annotation.ExperimentalApi; + +import java.io.Closeable; +import java.io.IOException; + +/** + * @param the query type (e.g. Lucene Query) + * @param the context type + * @opensearch.experimental + */ +@ExperimentalApi +public interface IndexFilterProvider extends Closeable { + + C createContext(Q query, Object reader) throws IOException; + + int createCollector(C context, int segmentOrd, int minDoc, int maxDoc); + + long[] collectDocs(C context, int collectorKey, int minDoc, int maxDoc); + + void releaseCollector(C context, int collectorKey); +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/SearchExecEngine.java b/server/src/main/java/org/opensearch/index/engine/exec/SearchExecEngine.java index d45f2f0d67b2f..2e9284f209ed4 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/SearchExecEngine.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/SearchExecEngine.java @@ -40,8 +40,16 @@ default void execute(C context, ActionListener listener) { } } - C createContext(CatalogSnapshot snapshot, ShardSearchRequest request, SearchShardTarget shardTarget, SearchShardTask task) - throws IOException; + /** + * Create a search context. The reader is provided by {@link org.opensearch.index.engine.CompositeEngine} + * which owns all reader managers. + */ + C createContext( + Object reader, + ShardSearchRequest request, + SearchShardTarget shardTarget, + SearchShardTask task + ) throws IOException; default T convertFragment(Object fragment) { throw new UnsupportedOperationException("convertFragment not supported by " + getClass().getSimpleName()); @@ -53,6 +61,4 @@ default Iterator executePlan(T plan, C context) { @Override default void close() throws IOException {} - - EngineReaderManager getReaderManager(); } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/SourceContext.java b/server/src/main/java/org/opensearch/index/engine/exec/SourceContext.java new file mode 100644 index 0000000000000..7bbfaadec8957 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/SourceContext.java @@ -0,0 +1,24 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec; + +import org.opensearch.common.annotation.ExperimentalApi; + +import java.io.Closeable; + +/** + * Context for a source provider execution. + * + * @opensearch.experimental + */ +@ExperimentalApi +public interface SourceContext extends Closeable { + + Object query(); +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/SourceProvider.java b/server/src/main/java/org/opensearch/index/engine/exec/SourceProvider.java new file mode 100644 index 0000000000000..5ba8efef51643 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/SourceProvider.java @@ -0,0 +1,29 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec; + +import org.opensearch.common.annotation.ExperimentalApi; + +import java.io.Closeable; +import java.io.IOException; + +/** + * @param the context type + * @param the result batch type + * @opensearch.experimental + */ +@ExperimentalApi +public interface SourceProvider extends Closeable { + + C createContext(Object query, Object reader) throws IOException; + + Object execute(C context) throws IOException; + + R next(C context, Object stream) throws IOException; +} diff --git a/server/src/main/java/org/opensearch/index/shard/IndexShard.java b/server/src/main/java/org/opensearch/index/shard/IndexShard.java index fa8fb18939e7b..6210b36bb79a8 100644 --- a/server/src/main/java/org/opensearch/index/shard/IndexShard.java +++ b/server/src/main/java/org/opensearch/index/shard/IndexShard.java @@ -576,7 +576,9 @@ public boolean shouldCache(Query query) { } } this.dataFormatRegistry = dataFormatRegistry; - CompositeEngine engine = new CompositeEngine(dataFormatRegistry, path); // TODO : just a placeholder + if (dataFormatRegistry != null) { + this.currentCompositeEngineReference.set(new CompositeEngine(dataFormatRegistry)); + } } /** diff --git a/server/src/main/java/org/opensearch/indices/IndicesService.java b/server/src/main/java/org/opensearch/indices/IndicesService.java index d623e88bb9b55..b0fc8f0e35d0e 100644 --- a/server/src/main/java/org/opensearch/indices/IndicesService.java +++ b/server/src/main/java/org/opensearch/indices/IndicesService.java @@ -62,6 +62,7 @@ import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.CheckedConsumer; import org.opensearch.common.CheckedFunction; +import org.opensearch.common.CheckedTriFunction; import org.opensearch.common.CheckedSupplier; import org.opensearch.common.Nullable; import org.opensearch.common.annotation.InternalApi; @@ -427,7 +428,8 @@ public class IndicesService extends AbstractLifecycleComponent private volatile int defaultMaxMergeAtOnce; private final StatusCounterStats statusCounterStats; private final ClusterMergeSchedulerConfig clusterMergeSchedulerConfig; - private final CheckedFunction dataFormatRegistrySupplier; + private final CheckedTriFunction + dataFormatRegistrySupplier; @Override protected void doStart() { @@ -613,9 +615,11 @@ protected void closeInternal() { MergeSchedulerConfig.CLUSTER_MAX_FORCE_MERGE_MB_PER_SEC_SETTING, this::onClusterLevelForceMergeMBPerSecUpdate ); - this.dataFormatRegistrySupplier = (shardPath) -> new DataFormatRegistry( + this.dataFormatRegistrySupplier = (shardPath, mapperService, indexSettings) -> new DataFormatRegistry( pluginsService.filterPlugins(SearchAnalyticsBackEndPlugin.class), - shardPath + shardPath, + mapperService, + indexSettings ); } diff --git a/server/src/main/java/org/opensearch/plugins/SearchAnalyticsBackEndPlugin.java b/server/src/main/java/org/opensearch/plugins/SearchAnalyticsBackEndPlugin.java index f71d65f68a1c0..69eb2863d9463 100644 --- a/server/src/main/java/org/opensearch/plugins/SearchAnalyticsBackEndPlugin.java +++ b/server/src/main/java/org/opensearch/plugins/SearchAnalyticsBackEndPlugin.java @@ -9,21 +9,48 @@ package org.opensearch.plugins; import org.opensearch.index.engine.dataformat.DataFormat; +import org.opensearch.index.engine.exec.EngineReaderManager; +import org.opensearch.index.engine.exec.IndexFilterProvider; import org.opensearch.index.engine.exec.SearchExecEngine; +import org.opensearch.index.engine.exec.SourceProvider; import org.opensearch.index.shard.ShardPath; import java.io.IOException; import java.util.List; /** - * SPI extension point for back-end query engines (DataFusion, Lucene, etc.). + * Interface for back-end query engines. + * * @opensearch.internal */ public interface SearchAnalyticsBackEndPlugin { String name(); - SearchExecEngine create(ShardPath shardPath, DataFormat dataFormat) throws IOException;; - List getSupportedFormats(); -} + EngineReaderManager createReaderManager(DataFormat format, ShardPath shardPath) throws IOException; + + /** + * Create a search execution engine. Return null if this plugin is an index provider only. + */ + default SearchExecEngine createSearchExecEngine(DataFormat format, ShardPath shardPath) throws IOException { + return null; + } + + /** + * Create an index filter provider. Return null if this plugin is a search engine only. + */ + default IndexFilterProvider createIndexFilterProvider(DataFormat format, ShardPath shardPath) throws IOException { + return null; + } + + /** + * Create a source provider. Return null if this plugin does not provide source data. + *

+ * A source provider executes the full query+scan+filter and streams back + * result batches (projections, aggregations) to the primary engine. + */ + default SourceProvider createSourceProvider(DataFormat format, ShardPath shardPath) throws IOException { + return null; + } +} diff --git a/server/src/main/java/org/opensearch/search/SearchExecutionContext.java b/server/src/main/java/org/opensearch/search/SearchExecutionContext.java index bd9d3859bb254..2368d7992b7b5 100644 --- a/server/src/main/java/org/opensearch/search/SearchExecutionContext.java +++ b/server/src/main/java/org/opensearch/search/SearchExecutionContext.java @@ -9,39 +9,19 @@ package org.opensearch.search; import org.opensearch.common.annotation.ExperimentalApi; -import org.opensearch.index.engine.exec.CatalogSnapshot; -import org.opensearch.index.engine.exec.SearchExecEngine; import org.opensearch.search.internal.ShardSearchRequest; import java.io.Closeable; /** * Engine-agnostic search execution context. - *

- * This is the minimal contract between {@link SearchExecEngine} - * and the transport/coordination layer ({@code SearchService}). - *

- * Contains only what callers actually need: request, results, pagination, and lifecycle. - * Engine-specific state (Lucene query, DF substrait plan, searcher, etc.) lives in - * the engine's own context subtype. - *

- * {@link org.opensearch.search.internal.SearchContext} extends this to add Lucene-specific - * methods for backward compatibility. * * @opensearch.experimental */ @ExperimentalApi public interface SearchExecutionContext extends Closeable { - CatalogSnapshot catalogSnapshot(); - - /** - * The shard-level search request. - */ ShardSearchRequest request(); - /** - * The shard this search targets. - */ SearchShardTarget shardTarget(); } From 13221aacea0efd7a92e5e098ab320928ad6a16e9 Mon Sep 17 00:00:00 2001 From: Bukhtawar Khan Date: Wed, 18 Mar 2026 18:59:14 +0530 Subject: [PATCH 03/11] Refactor CompositeEngine to use factory (#50) Signed-off-by: Bukhtawar Khan --- .../org/opensearch/index/IndexModule.java | 6 +- .../org/opensearch/index/IndexService.java | 16 +- .../index/engine/CompositeEngine.java | 211 ++++++++++-------- .../CatalogSnapshotAwareRefreshListener.java | 28 --- .../exec/CatalogSnapshotDeleteListener.java | 18 -- .../CatalogSnapshotLifecycleListener.java | 55 +++++ .../engine/exec/CompositeEngineFactory.java | 94 ++++++++ .../index/engine/exec/DataFormatRegistry.java | 87 -------- .../engine/exec/EngineReaderManager.java | 2 +- .../opensearch/index/shard/IndexShard.java | 12 +- .../opensearch/indices/IndicesService.java | 13 +- 11 files changed, 295 insertions(+), 247 deletions(-) create mode 100644 server/src/main/java/org/opensearch/index/engine/exec/CatalogSnapshotLifecycleListener.java create mode 100644 server/src/main/java/org/opensearch/index/engine/exec/CompositeEngineFactory.java diff --git a/server/src/main/java/org/opensearch/index/IndexModule.java b/server/src/main/java/org/opensearch/index/IndexModule.java index affda10e91ff4..c12c1ed21a2e1 100644 --- a/server/src/main/java/org/opensearch/index/IndexModule.java +++ b/server/src/main/java/org/opensearch/index/IndexModule.java @@ -75,7 +75,7 @@ import org.opensearch.index.engine.Engine; import org.opensearch.index.engine.EngineConfigFactory; import org.opensearch.index.engine.EngineFactory; -import org.opensearch.index.engine.exec.DataFormatRegistry; +import org.opensearch.index.engine.exec.CompositeEngineFactory; import org.opensearch.index.IndexSettings; import org.opensearch.index.mapper.MapperService; import org.opensearch.index.shard.IndexEventListener; @@ -745,7 +745,7 @@ public IndexService newIndexService( Function segmentReplicationStatsProvider, Supplier clusterDefaultMaxMergeAtOnceSupplier, ClusterMergeSchedulerConfig clusterMergeSchedulerConfig, - CheckedTriFunction dataFormatRegistrySupplier + CheckedTriFunction compositeEngineFactorySupplier ) throws IOException { final IndexEventListener eventListener = freeze(); Function> readerWrapperFactory = indexReaderWrapper @@ -819,7 +819,7 @@ public IndexService newIndexService( segmentReplicationStatsProvider, clusterDefaultMaxMergeAtOnceSupplier, clusterMergeSchedulerConfig, - dataFormatRegistrySupplier + compositeEngineFactorySupplier ); success = true; return indexService; diff --git a/server/src/main/java/org/opensearch/index/IndexService.java b/server/src/main/java/org/opensearch/index/IndexService.java index 77d1f4adeb39a..940a3968dc0af 100644 --- a/server/src/main/java/org/opensearch/index/IndexService.java +++ b/server/src/main/java/org/opensearch/index/IndexService.java @@ -79,7 +79,7 @@ import org.opensearch.index.engine.EngineConfigFactory; import org.opensearch.index.engine.EngineFactory; import org.opensearch.index.engine.MergedSegmentWarmerFactory; -import org.opensearch.index.engine.exec.DataFormatRegistry; +import org.opensearch.index.engine.exec.CompositeEngineFactory; import org.opensearch.index.fielddata.IndexFieldDataCache; import org.opensearch.index.fielddata.IndexFieldDataService; import org.opensearch.index.mapper.MapperService; @@ -211,8 +211,8 @@ public class IndexService extends AbstractIndexComponent implements IndicesClust private volatile TimeValue refreshInterval; private volatile boolean shardLevelRefreshEnabled; private final IndexStorePlugin.StoreFactory storeFactory; - private final CheckedTriFunction - dataFormatRegistrySupplier; + private final CheckedTriFunction + compositeEngineFactorySupplier; @InternalApi public IndexService( @@ -260,7 +260,7 @@ public IndexService( Function segmentReplicationStatsProvider, Supplier clusterDefaultMaxMergeAtOnceSupplier, ClusterMergeSchedulerConfig clusterMergeSchedulerConfig, - CheckedTriFunction dataFormatRegistrySupplier + CheckedTriFunction compositeEngineFactorySupplier ) { super(indexSettings); this.storeFactory = storeFactory; @@ -371,7 +371,7 @@ public IndexService( startIndexLevelRefreshTask(); } } - this.dataFormatRegistrySupplier = dataFormatRegistrySupplier; + this.compositeEngineFactorySupplier = compositeEngineFactorySupplier; } @InternalApi @@ -782,8 +782,8 @@ protected void closeInternal() { directoryFactory ); eventListener.onStoreCreated(shardId); - DataFormatRegistry dataFormatRegistry = dataFormatRegistrySupplier != null - ? dataFormatRegistrySupplier.apply(path, mapperService, this.indexSettings) + CompositeEngineFactory compositeEngineFactory = compositeEngineFactorySupplier != null + ? compositeEngineFactorySupplier.apply(path, mapperService, this.indexSettings) : null; indexShard = new IndexShard( routing, @@ -824,7 +824,7 @@ protected void closeInternal() { clusterService.getClusterApplierService(), this.indexSettings.isSegRepEnabledOrRemoteNode() ? mergedSegmentPublisher : null, this.indexSettings.isSegRepEnabledOrRemoteNode() ? referencedSegmentsPublisher : null, - dataFormatRegistry + compositeEngineFactory ); eventListener.indexShardStateChanged(indexShard, null, indexShard.state(), "shard created"); eventListener.afterIndexShardCreated(indexShard); diff --git a/server/src/main/java/org/opensearch/index/engine/CompositeEngine.java b/server/src/main/java/org/opensearch/index/engine/CompositeEngine.java index 7a6bc3cfccdb0..bf6cc026777bb 100644 --- a/server/src/main/java/org/opensearch/index/engine/CompositeEngine.java +++ b/server/src/main/java/org/opensearch/index/engine/CompositeEngine.java @@ -8,16 +8,12 @@ package org.opensearch.index.engine; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; +import org.opensearch.common.CheckedSupplier; import org.opensearch.common.annotation.ExperimentalApi; import org.opensearch.index.engine.dataformat.DataFormat; import org.opensearch.index.engine.exec.CatalogSnapshot; -import org.opensearch.index.engine.exec.CatalogSnapshotAwareRefreshListener; -import org.opensearch.index.engine.exec.CatalogSnapshotDeleteListener; -import org.opensearch.index.engine.exec.DataFormatRegistry; +import org.opensearch.index.engine.exec.CatalogSnapshotLifecycleListener; import org.opensearch.index.engine.exec.EngineReaderManager; -import org.opensearch.index.engine.exec.FilesListener; import org.opensearch.index.engine.exec.IndexFilterProvider; import org.opensearch.index.engine.exec.SearchExecEngine; import org.opensearch.index.engine.exec.SourceProvider; @@ -28,140 +24,177 @@ import java.util.Collection; import java.util.List; import java.util.Map; -import java.util.concurrent.ConcurrentHashMap; /** - * Owns all reader managers, lazily creates search engines per each shard and index filter providers. - * This stands as a bridge for reads/writes. Initializes engines and providers only relevant to the - * index settings and mappings. + * Owns all reader managers, lazily creates search engines, index filter providers + * and source providers per data format. + *

+ * Instances are created by {@link org.opensearch.index.engine.exec.CompositeEngineFactory}. * * @opensearch.experimental */ @ExperimentalApi public class CompositeEngine implements Closeable { - private static final Logger logger = LogManager.getLogger(CompositeEngine.class); - private final Map> readerManagers; - private final DataFormatRegistry dataFormatRegistry; - private final Map> searchEngines = new ConcurrentHashMap<>(); - private final Map> indexFilterProviders = new ConcurrentHashMap<>(); - private final Map> sourceProviders = new ConcurrentHashMap<>(); - - public CompositeEngine(DataFormatRegistry dataFormatRegistry) { - this.dataFormatRegistry = dataFormatRegistry; - this.readerManagers = dataFormatRegistry.getReaderManagers(); + private final Map, IOException>> engineSuppliers; + private final Map, IOException>> indexFilterProviderSuppliers; + private final Map, IOException>> sourceProviderSuppliers; + + /** + * Constructs a new CompositeEngine with pre-built maps. + * Prefer using {@link org.opensearch.index.engine.exec.CompositeEngineFactory#create()}. + */ + public CompositeEngine( + Map> readerManagers, + Map, IOException>> engineSuppliers, + Map, IOException>> indexFilterProviderSuppliers, + Map, IOException>> sourceProviderSuppliers + ) { + this.readerManagers = readerManagers; + this.engineSuppliers = engineSuppliers; + this.indexFilterProviderSuppliers = indexFilterProviderSuppliers; + this.sourceProviderSuppliers = sourceProviderSuppliers; } + // ---- Public getters ---- + public EngineReaderManager getReaderManager(DataFormat format) { return readerManagers.get(format); } public SearchExecEngine getSearchExecEngine(DataFormat format) throws IOException { - SearchExecEngine engine = searchEngines.get(format); - if (engine != null) { - return engine; - } - synchronized (searchEngines) { - engine = searchEngines.get(format); - if (engine == null) { - engine = dataFormatRegistry.createSearchExecEngine(format); - searchEngines.put(format, engine); - } - return engine; - } + return getFromSupplier(engineSuppliers, format, "search exec engine"); } public IndexFilterProvider getIndexFilterProvider(DataFormat format) throws IOException { - IndexFilterProvider provider = indexFilterProviders.get(format); - if (provider != null) { - return provider; - } - synchronized (indexFilterProviders) { - provider = indexFilterProviders.get(format); - if (provider == null) { - provider = dataFormatRegistry.createIndexFilterProvider(format); - indexFilterProviders.put(format, provider); - } - return provider; - } + return getFromSupplier(indexFilterProviderSuppliers, format, "index filter provider"); } public SourceProvider getSourceProvider(DataFormat format) throws IOException { - SourceProvider sp = sourceProviders.get(format); - if (sp != null) { - return sp; - } - synchronized (sourceProviders) { - sp = sourceProviders.get(format); - if (sp == null) { - sp = dataFormatRegistry.createSourceProvider(format); - sourceProviders.put(format, sp); - } - return sp; - } + return getFromSupplier(sourceProviderSuppliers, format, "source provider"); } - public List getCatalogSnapshotAwareRefreshListeners() { - return new ArrayList<>(readerManagers.values()); + private T getFromSupplier( + Map> suppliers, + DataFormat format, + String label + ) throws IOException { + CheckedSupplier supplier = suppliers.get(format); + if (supplier == null) { + throw new IllegalArgumentException("No " + label + " registered for format: " + format.name()); + } + return supplier.get(); } - public List getCatalogSnapshotDeleteListeners() { + // ---- Lifecycle listener helpers ---- + + public List getCatalogSnapshotLifecycleListeners() { return new ArrayList<>(readerManagers.values()); } - public void notifyFilesAdded(Map> dfNewFiles) throws IOException { - for (Map.Entry> entry : dfNewFiles.entrySet()) { - FilesListener listener = readerManagers.get(entry.getKey()); - if (listener != null) { - listener.onFilesAdded(entry.getValue()); + public void notifyFilesAdded(Map> filesByFormat) throws IOException { + for (Map.Entry> entry : filesByFormat.entrySet()) { + EngineReaderManager rm = readerManagers.get(entry.getKey()); + if (rm != null) { + rm.onFilesAdded(entry.getValue()); } } } - public void notifyDelete(Map> dfFilesToDelete) throws IOException { - for (Map.Entry> entry : dfFilesToDelete.entrySet()) { - FilesListener listener = readerManagers.get(entry.getKey()); - if (listener != null) { - listener.onFilesDeleted(entry.getValue()); + public void notifyDelete(Map> filesByFormat) throws IOException { + for (Map.Entry> entry : filesByFormat.entrySet()) { + EngineReaderManager rm = readerManagers.get(entry.getKey()); + if (rm != null) { + rm.onFilesDeleted(entry.getValue()); } } } public void notifyCatalogSnapshotDelete(CatalogSnapshot catalogSnapshot) throws IOException { - for (EngineReaderManager rm : readerManagers.values()) { - rm.onDeleted(catalogSnapshot); + for (CatalogSnapshotLifecycleListener listener : getCatalogSnapshotLifecycleListeners()) { + listener.onDeleted(catalogSnapshot); } } - public ReleasableRef acquireSnapshot() { - return null; // TODO : return this.catalogSnapshotManager.acquireSnapshot(); + // ---- Snapshot acquisition ---- + + /** + * Acquires a snapshot across all reader managers, returning a releasable reference. + */ + public ReleasableRef acquireSnapshot(CatalogSnapshot catalogSnapshot) throws IOException { + List readers = new ArrayList<>(); + for (EngineReaderManager rm : readerManagers.values()) { + readers.add(rm.getReader(catalogSnapshot)); + } + return new ReleasableRef(readers); } - @Override - public void close() throws IOException { - for (SearchExecEngine engine : searchEngines.values()) { - engine.close(); + /** + * A releasable reference to a set of readers acquired from reader managers. + */ + @ExperimentalApi + public static class ReleasableRef implements Closeable { + private final List readers; + + ReleasableRef(List readers) { + this.readers = readers; } - for (IndexFilterProvider provider : indexFilterProviders.values()) { - provider.close(); + + public List getReaders() { + return readers; } - for (SourceProvider sp : sourceProviders.values()) { - sp.close(); + + @Override + public void close() throws IOException { + // Reader managers handle their own reference counting; + // this is a placeholder for future release logic. } } - @ExperimentalApi - public static abstract class ReleasableRef implements AutoCloseable { - - private final T t; + // ---- Closeable ---- - public ReleasableRef(T t) { - this.t = t; + @Override + public void close() throws IOException { + List exceptions = new ArrayList<>(); + closeSupplierInstances(engineSuppliers.values(), exceptions); + closeSupplierInstances(indexFilterProviderSuppliers.values(), exceptions); + closeSupplierInstances(sourceProviderSuppliers.values(), exceptions); + for (EngineReaderManager rm : readerManagers.values()) { + if (rm instanceof Closeable) { + try { + ((Closeable) rm).close(); + } catch (Exception e) { + exceptions.add(e); + } + } } + if (exceptions.isEmpty() == false) { + IOException ioException = new IOException("Failed to close CompositeEngine resources"); + for (Exception e : exceptions) { + ioException.addSuppressed(e); + } + throw ioException; + } + } - public T getRef() { - return t; + /** + * Attempts to retrieve each memoized instance and close it if it implements {@link Closeable}. + * Suppliers that were never invoked will return quickly from the memoize wrapper. + */ + private static void closeSupplierInstances( + Collection> suppliers, + List exceptions + ) { + for (CheckedSupplier supplier : suppliers) { + try { + T instance = supplier.get(); + if (instance instanceof Closeable) { + ((Closeable) instance).close(); + } + } catch (Exception e) { + exceptions.add(e); + } } } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/CatalogSnapshotAwareRefreshListener.java b/server/src/main/java/org/opensearch/index/engine/exec/CatalogSnapshotAwareRefreshListener.java index f1e491d19534e..e69de29bb2d1d 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/CatalogSnapshotAwareRefreshListener.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/CatalogSnapshotAwareRefreshListener.java @@ -1,28 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -package org.opensearch.index.engine.exec; - -import org.opensearch.common.annotation.ExperimentalApi; - -import java.io.IOException; - -@ExperimentalApi -public interface CatalogSnapshotAwareRefreshListener { - /** - * Called before refresh operation. - */ - void beforeRefresh() throws IOException; - - /** - * Called after refresh operation with catalog snapshot. - * @param didRefresh whether refresh actually occurred - * @param catalogSnapshot the current catalog snapshot with file information - */ - void afterRefresh(boolean didRefresh, CatalogSnapshot catalogSnapshot) throws IOException; -} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/CatalogSnapshotDeleteListener.java b/server/src/main/java/org/opensearch/index/engine/exec/CatalogSnapshotDeleteListener.java index e0df0ae6cefce..e69de29bb2d1d 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/CatalogSnapshotDeleteListener.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/CatalogSnapshotDeleteListener.java @@ -1,18 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -package org.opensearch.index.engine.exec; - -import org.opensearch.common.annotation.ExperimentalApi; - -import java.io.IOException; - -@ExperimentalApi -public interface CatalogSnapshotDeleteListener { - void onDeleted(CatalogSnapshot catalogSnapshot) throws IOException; -} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/CatalogSnapshotLifecycleListener.java b/server/src/main/java/org/opensearch/index/engine/exec/CatalogSnapshotLifecycleListener.java new file mode 100644 index 0000000000000..e0a40709acf33 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/CatalogSnapshotLifecycleListener.java @@ -0,0 +1,55 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec; + +import org.opensearch.common.annotation.ExperimentalApi; + +import java.io.IOException; + +/** + * Unified lifecycle listener for catalog snapshots. + *

+ * Combines refresh notifications (create/update) and delete notifications + * into a single interface so plugins only need to wire one listener. + * + * @opensearch.experimental + */ +@ExperimentalApi +public interface CatalogSnapshotLifecycleListener { + + /** Singleton that silently ignores every callback. */ + CatalogSnapshotLifecycleListener NOOP = new CatalogSnapshotLifecycleListener() { + @Override + public void beforeRefresh() {} + + @Override + public void afterRefresh(boolean didRefresh, CatalogSnapshot catalogSnapshot) {} + + @Override + public void onDeleted(CatalogSnapshot catalogSnapshot) {} + }; + + /** + * Called before a refresh operation. + */ + void beforeRefresh() throws IOException; + + /** + * Called after a refresh operation with the resulting catalog snapshot. + * @param didRefresh whether the refresh actually occurred + * @param catalogSnapshot the current catalog snapshot with file information + */ + void afterRefresh(boolean didRefresh, CatalogSnapshot catalogSnapshot) throws IOException; + + /** + * Called when a catalog snapshot is deleted. + * @param catalogSnapshot the snapshot being deleted + */ + void onDeleted(CatalogSnapshot catalogSnapshot) throws IOException; +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/CompositeEngineFactory.java b/server/src/main/java/org/opensearch/index/engine/exec/CompositeEngineFactory.java new file mode 100644 index 0000000000000..5ef2e4e44fb24 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/CompositeEngineFactory.java @@ -0,0 +1,94 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec; + +import org.opensearch.common.CheckedFunction; +import org.opensearch.common.CheckedSupplier; +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.IndexSettings; +import org.opensearch.index.engine.CompositeEngine; +import org.opensearch.index.engine.dataformat.DataFormat; +import org.opensearch.index.mapper.MapperService; +import org.opensearch.index.shard.ShardPath; +import org.opensearch.plugins.PluginsService; +import org.opensearch.plugins.SearchAnalyticsBackEndPlugin; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +/** + * Factory that discovers {@link SearchAnalyticsBackEndPlugin}s via + * {@link PluginsService} and builds the per-format reader managers and + * memoizing suppliers consumed by {@link CompositeEngine}. + *

+ * This keeps CompositeEngine decoupled from the plugin layer. + * + * @opensearch.experimental + */ +@ExperimentalApi +public class CompositeEngineFactory { + + private final Map> readerManagers = new HashMap<>(); + private final Map, IOException>> engineSuppliers = new HashMap<>(); + private final Map, IOException>> indexFilterProviderSuppliers = new HashMap<>(); + private final Map, IOException>> sourceProviderSuppliers = new HashMap<>(); + + public CompositeEngineFactory( + PluginsService pluginsService, + ShardPath shardPath, + MapperService mapperService, + IndexSettings indexSettings + ) throws IOException { + for (SearchAnalyticsBackEndPlugin plugin : pluginsService.filterPlugins(SearchAnalyticsBackEndPlugin.class)) { + for (DataFormat format : plugin.getSupportedFormats()) { + // TODO: use mapperService and indexSettings to filter formats relevant to this index + readerManagers.put(format, plugin.createReaderManager(format, shardPath)); + engineSuppliers.put(format, memoize(format, f -> plugin.createSearchExecEngine(f, shardPath))); + indexFilterProviderSuppliers.put(format, memoize(format, f -> plugin.createIndexFilterProvider(f, shardPath))); + sourceProviderSuppliers.put(format, memoize(format, f -> plugin.createSourceProvider(f, shardPath))); + } + } + } + + /** + * Wraps a {@link CheckedFunction} factory into a thread-safe memoizing supplier + * using double-checked locking. The factory is invoked at most once. + */ + private static CheckedSupplier memoize(DataFormat format, CheckedFunction factory) { + return new CheckedSupplier<>() { + private volatile T instance; + + @Override + public T get() throws IOException { + T result = instance; + if (result != null) { + return result; + } + synchronized (this) { + result = instance; + if (result != null) { + return result; + } + result = factory.apply(format); + instance = result; + return result; + } + } + }; + } + + /** + * Creates a new {@link CompositeEngine} populated with the discovered + * reader managers and memoizing suppliers. + */ + public CompositeEngine create() { + return new CompositeEngine(readerManagers, engineSuppliers, indexFilterProviderSuppliers, sourceProviderSuppliers); + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/DataFormatRegistry.java b/server/src/main/java/org/opensearch/index/engine/exec/DataFormatRegistry.java index c09735083f29c..e69de29bb2d1d 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/DataFormatRegistry.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/DataFormatRegistry.java @@ -1,87 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -package org.opensearch.index.engine.exec; - -import org.opensearch.common.CheckedFunction; -import org.opensearch.common.annotation.ExperimentalApi; -import org.opensearch.index.IndexSettings; -import org.opensearch.index.engine.dataformat.DataFormat; -import org.opensearch.index.mapper.MapperService; -import org.opensearch.index.engine.exec.SourceProvider; -import org.opensearch.index.shard.ShardPath; -import org.opensearch.plugins.SearchAnalyticsBackEndPlugin; - -import java.io.IOException; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -/** - * Registry of reader managers, search engine factories, and index filter provider factories per data format. - *

- * Accepts {@link MapperService} and {@link IndexSettings} to determine which - * formats are relevant for the index. - * - * @opensearch.experimental - */ -@ExperimentalApi -public class DataFormatRegistry { - - private final Map> readerManagers = new HashMap<>(); - private final Map, IOException>> engineFactories = new HashMap<>(); - private final Map, IOException>> indexFilterProviderFactories = - new HashMap<>(); - private final Map, IOException>> sourceProviderFactories = - new HashMap<>(); - - public DataFormatRegistry( - List searchPlugins, - ShardPath shardPath, - MapperService mapperService, - IndexSettings indexSettings - ) throws IOException { - for (SearchAnalyticsBackEndPlugin plugin : searchPlugins) { - for (DataFormat format : plugin.getSupportedFormats()) { - // TODO: use mapperService and indexSettings to filter formats relevant to this index - readerManagers.put(format, plugin.createReaderManager(format, shardPath)); - engineFactories.put(format, f -> plugin.createSearchExecEngine(f, shardPath)); - indexFilterProviderFactories.put(format, f -> plugin.createIndexFilterProvider(f, shardPath)); - sourceProviderFactories.put(format, f -> plugin.createSourceProvider(f, shardPath)); - } - } - } - - public Map> getReaderManagers() { - return readerManagers; - } - - public SearchExecEngine createSearchExecEngine(DataFormat format) throws IOException { - CheckedFunction, IOException> factory = engineFactories.get(format); - if (factory == null) { - throw new IllegalArgumentException("No plugin registered for format: " + format.name()); - } - return factory.apply(format); - } - - public IndexFilterProvider createIndexFilterProvider(DataFormat format) throws IOException { - CheckedFunction, IOException> factory = indexFilterProviderFactories.get(format); - if (factory == null) { - throw new IllegalArgumentException("No index filter provider for format: " + format.name()); - } - return factory.apply(format); - } - - public SourceProvider createSourceProvider(DataFormat format) throws IOException { - CheckedFunction, IOException> factory = sourceProviderFactories.get(format); - if (factory == null) { - throw new IllegalArgumentException("No source provider for format: " + format.name()); - } - return factory.apply(format); - } -} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/EngineReaderManager.java b/server/src/main/java/org/opensearch/index/engine/exec/EngineReaderManager.java index c3a6d94c29faf..b420dd6299471 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/EngineReaderManager.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/EngineReaderManager.java @@ -22,6 +22,6 @@ * @opensearch.experimental */ @ExperimentalApi -public interface EngineReaderManager extends CatalogSnapshotAwareRefreshListener, FilesListener, CatalogSnapshotDeleteListener { +public interface EngineReaderManager extends CatalogSnapshotLifecycleListener, FilesListener { T getReader(CatalogSnapshot catalogSnapshot) throws IOException; } diff --git a/server/src/main/java/org/opensearch/index/shard/IndexShard.java b/server/src/main/java/org/opensearch/index/shard/IndexShard.java index 6210b36bb79a8..9c0e4a567e06f 100644 --- a/server/src/main/java/org/opensearch/index/shard/IndexShard.java +++ b/server/src/main/java/org/opensearch/index/shard/IndexShard.java @@ -145,7 +145,7 @@ import org.opensearch.index.engine.SafeCommitInfo; import org.opensearch.index.engine.Segment; import org.opensearch.index.engine.SegmentsStats; -import org.opensearch.index.engine.exec.DataFormatRegistry; +import org.opensearch.index.engine.exec.CompositeEngineFactory; import org.opensearch.index.engine.exec.Indexer; import org.opensearch.index.fielddata.FieldDataStats; import org.opensearch.index.fielddata.ShardFieldData; @@ -407,7 +407,7 @@ Runnable getGlobalCheckpointSyncer() { // Used to limit the number of concurrent translog tasks. When the semaphore is exhausted, serial recovery is used. private static final Semaphore translogConcurrentRecoverySemaphore = new Semaphore(1000); - private final DataFormatRegistry dataFormatRegistry; + private final CompositeEngineFactory compositeEngineFactory; @InternalApi public IndexShard( @@ -449,7 +449,7 @@ public IndexShard( final ClusterApplierService clusterApplierService, @Nullable final MergedSegmentPublisher mergedSegmentPublisher, @Nullable final ReferencedSegmentsPublisher referencedSegmentsPublisher, - @Nullable final DataFormatRegistry dataFormatRegistry + @Nullable final CompositeEngineFactory compositeEngineFactory ) throws IOException { super(shardRouting.shardId(), indexSettings); assert shardRouting.initializing(); @@ -575,9 +575,9 @@ public boolean shouldCache(Query query) { startRefreshTask(); } } - this.dataFormatRegistry = dataFormatRegistry; - if (dataFormatRegistry != null) { - this.currentCompositeEngineReference.set(new CompositeEngine(dataFormatRegistry)); + this.compositeEngineFactory = compositeEngineFactory; + if (compositeEngineFactory != null) { + this.currentCompositeEngineReference.set(compositeEngineFactory.create()); } } diff --git a/server/src/main/java/org/opensearch/indices/IndicesService.java b/server/src/main/java/org/opensearch/indices/IndicesService.java index b0fc8f0e35d0e..3a5797a130511 100644 --- a/server/src/main/java/org/opensearch/indices/IndicesService.java +++ b/server/src/main/java/org/opensearch/indices/IndicesService.java @@ -124,7 +124,7 @@ import org.opensearch.index.engine.NRTReplicationEngineFactory; import org.opensearch.index.engine.NoOpEngine; import org.opensearch.index.engine.ReadOnlyEngine; -import org.opensearch.index.engine.exec.DataFormatRegistry; +import org.opensearch.index.engine.exec.CompositeEngineFactory; import org.opensearch.index.fielddata.IndexFieldDataCache; import org.opensearch.index.flush.FlushStats; import org.opensearch.index.get.GetStats; @@ -172,7 +172,6 @@ import org.opensearch.node.remotestore.RemoteStoreNodeAttribute; import org.opensearch.plugins.IndexStorePlugin; import org.opensearch.plugins.PluginsService; -import org.opensearch.plugins.SearchAnalyticsBackEndPlugin; import org.opensearch.repositories.RepositoriesService; import org.opensearch.script.ScriptService; import org.opensearch.search.aggregations.support.ValuesSourceRegistry; @@ -428,8 +427,8 @@ public class IndicesService extends AbstractLifecycleComponent private volatile int defaultMaxMergeAtOnce; private final StatusCounterStats statusCounterStats; private final ClusterMergeSchedulerConfig clusterMergeSchedulerConfig; - private final CheckedTriFunction - dataFormatRegistrySupplier; + private final CheckedTriFunction + compositeEngineFactorySupplier; @Override protected void doStart() { @@ -615,8 +614,8 @@ protected void closeInternal() { MergeSchedulerConfig.CLUSTER_MAX_FORCE_MERGE_MB_PER_SEC_SETTING, this::onClusterLevelForceMergeMBPerSecUpdate ); - this.dataFormatRegistrySupplier = (shardPath, mapperService, indexSettings) -> new DataFormatRegistry( - pluginsService.filterPlugins(SearchAnalyticsBackEndPlugin.class), + this.compositeEngineFactorySupplier = (shardPath, mapperService, indexSettings) -> new CompositeEngineFactory( + pluginsService, shardPath, mapperService, indexSettings @@ -1150,7 +1149,7 @@ private synchronized IndexService createIndexService( segmentReplicationStatsProvider, this::getClusterDefaultMaxMergeAtOnce, clusterMergeSchedulerConfig, - dataFormatRegistrySupplier + compositeEngineFactorySupplier ); } From c010bb47b37d84ca3dbd37e7ba08f58b4e510d2c Mon Sep 17 00:00:00 2001 From: Bukhtawar Khan Date: Thu, 19 Mar 2026 12:05:49 +0530 Subject: [PATCH 04/11] Introduce segment collector interface and simplify Providers (#51) * Refactor CompositeEngine to use factory Signed-off-by: Bukhtawar Khan * Introduce SegmentCollector Signed-off-by: Bukhtawar Khan * Introduce SegmentCollector Signed-off-by: Bukhtawar Khan * Introduce SegmentCollector Signed-off-by: Bukhtawar Khan --------- Signed-off-by: Bukhtawar Khan --- .../be/lucene/LuceneIndexFilterContext.java | 40 ++----- .../be/lucene/LuceneIndexFilterProvider.java | 110 ++++++++++++------ .../be/lucene/LuceneSearchEnginePlugin.java | 4 +- .../be/lucene/LuceneSourceProvider.java | 20 ++-- .../index/engine/CompositeEngine.java | 12 +- .../exec/CollectorLifecycleManager.java | 90 ++++++++++++++ .../engine/exec/CompositeEngineFactory.java | 4 +- .../index/engine/exec/IndexFilterContext.java | 1 - .../engine/exec/IndexFilterProvider.java | 11 +- .../index/engine/exec/SegmentCollector.java | 37 ++++++ .../index/engine/exec/SourceProvider.java | 16 +-- .../plugins/SearchAnalyticsBackEndPlugin.java | 11 +- 12 files changed, 251 insertions(+), 105 deletions(-) create mode 100644 server/src/main/java/org/opensearch/index/engine/exec/CollectorLifecycleManager.java create mode 100644 server/src/main/java/org/opensearch/index/engine/exec/SegmentCollector.java diff --git a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneIndexFilterContext.java b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneIndexFilterContext.java index 921b85c189048..f5d9afe5d228e 100644 --- a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneIndexFilterContext.java +++ b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneIndexFilterContext.java @@ -10,19 +10,16 @@ import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreMode; import org.apache.lucene.search.Weight; import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.engine.exec.CollectorLifecycleManager; import org.opensearch.index.engine.exec.IndexFilterContext; import java.io.IOException; import java.util.List; -import java.util.Map; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.atomic.AtomicInteger; /** * Lucene-specific index filter context. @@ -37,8 +34,7 @@ public class LuceneIndexFilterContext implements IndexFilterContext { private final Weight weight; private final List leaves; - private final AtomicInteger nextCollectorId = new AtomicInteger(1); - private final Map collectors = new ConcurrentHashMap<>(); + private final CollectorLifecycleManager collectorManager = new CollectorLifecycleManager(); public LuceneIndexFilterContext(Query query, DirectoryReader reader) throws IOException { IndexSearcher searcher = new IndexSearcher(reader); @@ -65,35 +61,15 @@ List getLeaves() { return leaves; } - int registerCollector(DocIdSetIterator iterator, int minDoc, int maxDoc) { - int key = nextCollectorId.getAndIncrement(); - collectors.put(key, new CollectorState(iterator, minDoc, maxDoc)); - return key; - } - - CollectorState getCollector(int key) { - return collectors.get(key); - } - - void removeCollector(int key) { - collectors.remove(key); + /** + * Returns the collector lifecycle manager + */ + public CollectorLifecycleManager getCollectorManager() { + return collectorManager; } @Override public void close() { - collectors.clear(); - } - - static class CollectorState { - final DocIdSetIterator iterator; - final int minDoc; - final int maxDoc; - int currentDoc = -1; - - CollectorState(DocIdSetIterator iterator, int minDoc, int maxDoc) { - this.iterator = iterator; - this.minDoc = minDoc; - this.maxDoc = maxDoc; - } + collectorManager.close(); } } diff --git a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneIndexFilterProvider.java b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneIndexFilterProvider.java index ef70dc8306edd..207c7e45efe8c 100644 --- a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneIndexFilterProvider.java +++ b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneIndexFilterProvider.java @@ -14,6 +14,7 @@ import org.apache.lucene.search.Scorer; import org.opensearch.common.annotation.ExperimentalApi; import org.opensearch.index.engine.exec.IndexFilterProvider; +import org.opensearch.index.engine.exec.SegmentCollector; import java.io.IOException; import java.util.BitSet; @@ -24,55 +25,96 @@ * @opensearch.experimental */ @ExperimentalApi -public class LuceneIndexFilterProvider implements IndexFilterProvider { +public class LuceneIndexFilterProvider implements IndexFilterProvider { @Override - public LuceneIndexFilterContext createContext(Query query, Object reader) throws IOException { - return new LuceneIndexFilterContext(query, (DirectoryReader) reader); + public LuceneIndexFilterContext createContext(Query query, DirectoryReader reader) throws IOException { + return new LuceneIndexFilterContext(query, reader); } + + /** + * Creates a collector for the given segment and registers it in the + * context's {@link org.opensearch.index.engine.exec.CollectorLifecycleManager}. + * + * @return an int key that identifies this collector across JNI + */ @Override public int createCollector(LuceneIndexFilterContext context, int segmentOrd, int minDoc, int maxDoc) { - try { - Scorer scorer = context.getWeight().scorer(context.getLeaves().get(segmentOrd)); - if (scorer == null) return -1; - return context.registerCollector(scorer.iterator(), minDoc, maxDoc); - } catch (IOException e) { - return -1; - } + SegmentCollector collector = createCollectorInternal(context, segmentOrd, minDoc, maxDoc); + return context.getCollectorManager().registerCollector(collector); } - @Override - public long[] collectDocs(LuceneIndexFilterContext context, int collectorKey, int minDoc, int maxDoc) { - LuceneIndexFilterContext.CollectorState state = context.getCollector(collectorKey); - if (state == null) return new long[0]; + /** + * Collects matching doc IDs for the collector identified by {@code key}. + */ + public long[] collectDocs(LuceneIndexFilterContext context, int key, int minDoc, int maxDoc) { + return context.getCollectorManager().collectDocs(key, minDoc, maxDoc); + } - int effectiveMin = Math.max(minDoc, state.minDoc); - int effectiveMax = Math.min(maxDoc, state.maxDoc); - if (effectiveMin >= effectiveMax) return new long[0]; + /** + * Releases the collector identified by {@code key}. + */ + public void releaseCollector(LuceneIndexFilterContext context, int key) { + context.getCollectorManager().releaseCollector(key); + } + + @Override + public void close() {} - BitSet bitset = new BitSet(effectiveMax - effectiveMin); + private SegmentCollector createCollectorInternal(LuceneIndexFilterContext context, int segmentOrd, int minDoc, int maxDoc) { try { - DocIdSetIterator iter = state.iterator; - int docId = state.currentDoc; - if (docId == DocIdSetIterator.NO_MORE_DOCS || docId >= state.maxDoc) return new long[0]; - if (docId < effectiveMin) docId = iter.advance(effectiveMin); - while (docId != DocIdSetIterator.NO_MORE_DOCS && docId < effectiveMax) { - bitset.set(docId - effectiveMin); - docId = iter.nextDoc(); + Scorer scorer = context.getWeight().scorer(context.getLeaves().get(segmentOrd)); + if (scorer == null) { + return EMPTY_COLLECTOR; } - state.currentDoc = docId; + return new LuceneSegmentCollector(scorer.iterator(), minDoc, maxDoc); } catch (IOException e) { - return new long[0]; + return EMPTY_COLLECTOR; } - return bitset.toLongArray(); } - @Override - public void releaseCollector(LuceneIndexFilterContext context, int collectorKey) { - context.removeCollector(collectorKey); - } - @Override - public void close() {} + private static final SegmentCollector EMPTY_COLLECTOR = (min, max) -> new long[0]; + + private static class LuceneSegmentCollector implements SegmentCollector { + private final DocIdSetIterator iterator; + private final int collectorMinDoc; + private final int collectorMaxDoc; + private int currentDoc = -1; + + LuceneSegmentCollector(DocIdSetIterator iterator, int minDoc, int maxDoc) { + this.iterator = iterator; + this.collectorMinDoc = minDoc; + this.collectorMaxDoc = maxDoc; + } + + @Override + public long[] collectDocs(int minDoc, int maxDoc) { + int effectiveMin = Math.max(minDoc, collectorMinDoc); + int effectiveMax = Math.min(maxDoc, collectorMaxDoc); + if (effectiveMin >= effectiveMax) { + return new long[0]; + } + + BitSet bitset = new BitSet(effectiveMax - effectiveMin); + try { + int docId = currentDoc; + if (docId == DocIdSetIterator.NO_MORE_DOCS || docId >= collectorMaxDoc) { + return new long[0]; + } + if (docId < effectiveMin) { + docId = iterator.advance(effectiveMin); + } + while (docId != DocIdSetIterator.NO_MORE_DOCS && docId < effectiveMax) { + bitset.set(docId - effectiveMin); + docId = iterator.nextDoc(); + } + currentDoc = docId; + } catch (IOException e) { + return new long[0]; + } + return bitset.toLongArray(); + } + } } diff --git a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchEnginePlugin.java b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchEnginePlugin.java index 19013b10cb3f1..2a747086679f0 100644 --- a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchEnginePlugin.java +++ b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchEnginePlugin.java @@ -51,12 +51,12 @@ public EngineReaderManager createReaderManager(DataFormat format, ShardPath s } @Override - public IndexFilterProvider createIndexFilterProvider(DataFormat format, ShardPath shardPath) throws IOException { + public IndexFilterProvider createIndexFilterProvider(DataFormat format, ShardPath shardPath) throws IOException { return new LuceneIndexFilterProvider(); } @Override - public SourceProvider createSourceProvider(DataFormat format, ShardPath shardPath) throws IOException { + public SourceProvider createSourceProvider(DataFormat format, ShardPath shardPath) throws IOException { return new LuceneSourceProvider(); } diff --git a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSourceProvider.java b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSourceProvider.java index 72a4a95083548..d2de84add4880 100644 --- a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSourceProvider.java +++ b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSourceProvider.java @@ -13,6 +13,8 @@ import org.opensearch.index.engine.exec.SourceProvider; import java.io.IOException; +import java.util.Collections; +import java.util.Iterator; /** * Lucene-backed {@link SourceProvider}. @@ -26,23 +28,17 @@ * @opensearch.experimental */ @ExperimentalApi -public class LuceneSourceProvider implements SourceProvider { +public class LuceneSourceProvider implements SourceProvider { @Override - public LuceneSourceContext createContext(Object query, Object reader) throws IOException { - return new LuceneSourceContext(query, (DirectoryReader) reader); + public LuceneSourceContext createContext(Object query, DirectoryReader reader) throws IOException { + return new LuceneSourceContext(query, reader); } @Override - public Object execute(LuceneSourceContext context) throws IOException { - // TODO: execute query via context.getSearcher(), collect results, return stream handle - throw new UnsupportedOperationException("Lucene source execution not yet implemented"); - } - - @Override - public Object next(LuceneSourceContext context, Object stream) throws IOException { - // TODO: pull next batch (Arrow VectorSchemaRoot) from stream - throw new UnsupportedOperationException("Lucene source streaming not yet implemented"); + public Iterator execute(LuceneSourceContext context) throws IOException { + // TODO: execute query via context.getSearcher(), collect results, return iterator + return Collections.emptyIterator(); } @Override diff --git a/server/src/main/java/org/opensearch/index/engine/CompositeEngine.java b/server/src/main/java/org/opensearch/index/engine/CompositeEngine.java index bf6cc026777bb..48c2c8232b96a 100644 --- a/server/src/main/java/org/opensearch/index/engine/CompositeEngine.java +++ b/server/src/main/java/org/opensearch/index/engine/CompositeEngine.java @@ -38,8 +38,8 @@ public class CompositeEngine implements Closeable { private final Map> readerManagers; private final Map, IOException>> engineSuppliers; - private final Map, IOException>> indexFilterProviderSuppliers; - private final Map, IOException>> sourceProviderSuppliers; + private final Map, IOException>> indexFilterProviderSuppliers; + private final Map, IOException>> sourceProviderSuppliers; /** * Constructs a new CompositeEngine with pre-built maps. @@ -48,8 +48,8 @@ public class CompositeEngine implements Closeable { public CompositeEngine( Map> readerManagers, Map, IOException>> engineSuppliers, - Map, IOException>> indexFilterProviderSuppliers, - Map, IOException>> sourceProviderSuppliers + Map, IOException>> indexFilterProviderSuppliers, + Map, IOException>> sourceProviderSuppliers ) { this.readerManagers = readerManagers; this.engineSuppliers = engineSuppliers; @@ -67,11 +67,11 @@ public EngineReaderManager getReaderManager(DataFormat format) { return getFromSupplier(engineSuppliers, format, "search exec engine"); } - public IndexFilterProvider getIndexFilterProvider(DataFormat format) throws IOException { + public IndexFilterProvider getIndexFilterProvider(DataFormat format) throws IOException { return getFromSupplier(indexFilterProviderSuppliers, format, "index filter provider"); } - public SourceProvider getSourceProvider(DataFormat format) throws IOException { + public SourceProvider getSourceProvider(DataFormat format) throws IOException { return getFromSupplier(sourceProviderSuppliers, format, "source provider"); } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/CollectorLifecycleManager.java b/server/src/main/java/org/opensearch/index/engine/exec/CollectorLifecycleManager.java new file mode 100644 index 0000000000000..5764f6c754231 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/CollectorLifecycleManager.java @@ -0,0 +1,90 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec; + +import org.opensearch.common.annotation.ExperimentalApi; + +import java.io.Closeable; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicInteger; + +/** + * Manages the lifecycle of {@link SegmentCollector} instances for a single query. + *

+ * Provides a JNI-friendly primitives-only API: callers receive an {@code int} key + * from {@link #registerCollector} and use it to invoke {@link #collectDocs} and + * {@link #releaseCollector}. Java owns the collector state; the native (Rust) side + * only holds lightweight int keys. + *

+ * One manager is created per query and closed when the query finishes. + * {@link #close()} acts as a safety net, releasing any collectors that were not + * explicitly released by the caller. + * + * @opensearch.experimental + */ +@ExperimentalApi +public class CollectorLifecycleManager implements Closeable { + + private final AtomicInteger nextKey = new AtomicInteger(1); + private final Map collectors = new ConcurrentHashMap<>(); + + /** + * Registers a collector and returns its int key. + * + * @param collector the segment collector to manage + * @return a unique key that identifies this collector + */ + public int registerCollector(SegmentCollector collector) { + int key = nextKey.getAndIncrement(); + collectors.put(key, collector); + return key; + } + + /** + * Collects matching document IDs for the collector identified by {@code key}. + * + * @param key the collector key returned by {@link #registerCollector} + * @param minDoc inclusive lower bound + * @param maxDoc exclusive upper bound + * @return packed {@code long[]} bitset of matching doc IDs, or empty array if key is invalid + */ + public long[] collectDocs(int key, int minDoc, int maxDoc) { + SegmentCollector collector = collectors.get(key); + if (collector == null) { + return new long[0]; + } + return collector.collectDocs(minDoc, maxDoc); + } + + /** + * Releases the collector identified by {@code key}, closing it and + * removing it from the registry. + * + * @param key the collector key returned by {@link #registerCollector} + */ + public void releaseCollector(int key) { + SegmentCollector collector = collectors.remove(key); + if (collector != null) { + collector.close(); + } + } + + /** + * Closes all remaining collectors. Acts as a safety net for any + * collectors that were not explicitly released. + */ + @Override + public void close() { + for (SegmentCollector collector : collectors.values()) { + collector.close(); + } + collectors.clear(); + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/CompositeEngineFactory.java b/server/src/main/java/org/opensearch/index/engine/exec/CompositeEngineFactory.java index 5ef2e4e44fb24..33828ee97793e 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/CompositeEngineFactory.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/CompositeEngineFactory.java @@ -37,8 +37,8 @@ public class CompositeEngineFactory { private final Map> readerManagers = new HashMap<>(); private final Map, IOException>> engineSuppliers = new HashMap<>(); - private final Map, IOException>> indexFilterProviderSuppliers = new HashMap<>(); - private final Map, IOException>> sourceProviderSuppliers = new HashMap<>(); + private final Map, IOException>> indexFilterProviderSuppliers = new HashMap<>(); + private final Map, IOException>> sourceProviderSuppliers = new HashMap<>(); public CompositeEngineFactory( PluginsService pluginsService, diff --git a/server/src/main/java/org/opensearch/index/engine/exec/IndexFilterContext.java b/server/src/main/java/org/opensearch/index/engine/exec/IndexFilterContext.java index f27a27192be9d..415cecec55129 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/IndexFilterContext.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/IndexFilterContext.java @@ -11,7 +11,6 @@ import org.opensearch.common.annotation.ExperimentalApi; import java.io.Closeable; -import java.io.IOException; /** * @opensearch.experimental diff --git a/server/src/main/java/org/opensearch/index/engine/exec/IndexFilterProvider.java b/server/src/main/java/org/opensearch/index/engine/exec/IndexFilterProvider.java index 3e95b7fa7dd16..2d5224c48d162 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/IndexFilterProvider.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/IndexFilterProvider.java @@ -14,14 +14,17 @@ import java.io.IOException; /** - * @param the query type (e.g. Lucene Query) - * @param the context type + * Provides index-level filtering (partition pruning, segment filtering) for a given data format. + * + * @param the query type (e.g. Lucene Query) + * @param the context type + * @param the engine-specific reader type * @opensearch.experimental */ @ExperimentalApi -public interface IndexFilterProvider extends Closeable { +public interface IndexFilterProvider extends Closeable { - C createContext(Q query, Object reader) throws IOException; + C createContext(Q query, ReaderT reader) throws IOException; int createCollector(C context, int segmentOrd, int minDoc, int maxDoc); diff --git a/server/src/main/java/org/opensearch/index/engine/exec/SegmentCollector.java b/server/src/main/java/org/opensearch/index/engine/exec/SegmentCollector.java new file mode 100644 index 0000000000000..772244d88436f --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/SegmentCollector.java @@ -0,0 +1,37 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec; + +import org.opensearch.common.annotation.ExperimentalApi; + +import java.io.Closeable; + +/** + * A per-segment document collector returned by + * {@link IndexFilterProvider#createCollector}. + *

+ * Callers should use try-with-resources to ensure cleanup. + * + * @opensearch.experimental + */ +@ExperimentalApi +public interface SegmentCollector extends Closeable { + + /** + * Collect matching document IDs in the given range. + * + * @param minDoc inclusive lower bound + * @param maxDoc exclusive upper bound + * @return packed {@code long[]} bitset of matching doc IDs + */ + long[] collectDocs(int minDoc, int maxDoc); + + @Override + default void close() {} +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/SourceProvider.java b/server/src/main/java/org/opensearch/index/engine/exec/SourceProvider.java index 5ba8efef51643..ddddcd4157940 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/SourceProvider.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/SourceProvider.java @@ -12,18 +12,20 @@ import java.io.Closeable; import java.io.IOException; +import java.util.Iterator; /** - * @param the context type - * @param the result batch type + * Provides source-field data for a given data format. + * + * @param the context type + * @param the result batch type + * @param the engine-specific reader type * @opensearch.experimental */ @ExperimentalApi -public interface SourceProvider extends Closeable { - - C createContext(Object query, Object reader) throws IOException; +public interface SourceProvider extends Closeable { - Object execute(C context) throws IOException; + C createContext(Object query, ReaderT reader) throws IOException; - R next(C context, Object stream) throws IOException; + Iterator execute(C context) throws IOException; } diff --git a/server/src/main/java/org/opensearch/plugins/SearchAnalyticsBackEndPlugin.java b/server/src/main/java/org/opensearch/plugins/SearchAnalyticsBackEndPlugin.java index 69eb2863d9463..e113272744283 100644 --- a/server/src/main/java/org/opensearch/plugins/SearchAnalyticsBackEndPlugin.java +++ b/server/src/main/java/org/opensearch/plugins/SearchAnalyticsBackEndPlugin.java @@ -8,6 +8,9 @@ package org.opensearch.plugins; +import java.io.IOException; +import java.util.List; + import org.opensearch.index.engine.dataformat.DataFormat; import org.opensearch.index.engine.exec.EngineReaderManager; import org.opensearch.index.engine.exec.IndexFilterProvider; @@ -15,15 +18,13 @@ import org.opensearch.index.engine.exec.SourceProvider; import org.opensearch.index.shard.ShardPath; -import java.io.IOException; -import java.util.List; - /** * Interface for back-end query engines. * * @opensearch.internal */ public interface SearchAnalyticsBackEndPlugin { + String name(); List getSupportedFormats(); @@ -40,7 +41,7 @@ public interface SearchAnalyticsBackEndPlugin { /** * Create an index filter provider. Return null if this plugin is a search engine only. */ - default IndexFilterProvider createIndexFilterProvider(DataFormat format, ShardPath shardPath) throws IOException { + default IndexFilterProvider createIndexFilterProvider(DataFormat format, ShardPath shardPath) throws IOException { return null; } @@ -50,7 +51,7 @@ public interface SearchAnalyticsBackEndPlugin { * A source provider executes the full query+scan+filter and streams back * result batches (projections, aggregations) to the primary engine. */ - default SourceProvider createSourceProvider(DataFormat format, ShardPath shardPath) throws IOException { + default SourceProvider createSourceProvider(DataFormat format, ShardPath shardPath) throws IOException { return null; } } From ce7225b2d04de284e3dc346aa2dbfbbdf8a22bac Mon Sep 17 00:00:00 2001 From: Bukhtawar Khan Date: Thu, 19 Mar 2026 18:54:35 +0530 Subject: [PATCH 05/11] Decouple IndexFileDeleter (#52) * Refactor CompositeEngine to use factory Signed-off-by: Bukhtawar Khan * Introduce SegmentCollector Signed-off-by: Bukhtawar Khan * Introduce SegmentCollector Signed-off-by: Bukhtawar Khan * Introduce SegmentCollector Signed-off-by: Bukhtawar Khan * De-couple and simplify index file deleter Signed-off-by: Bukhtawar Khan * De-couple and simplify index file deleter Signed-off-by: Bukhtawar Khan * De-couple and simplify index file deleter, handle scorer and weight query lifecycle Signed-off-by: Bukhtawar Khan * De-couple and simplify index file deleter, handle scorer and weight query lifecycle Signed-off-by: Bukhtawar Khan * De-couple and simplify index file deleter, handle scorer and weight query lifecycle Signed-off-by: Bukhtawar Khan --------- Signed-off-by: Bukhtawar Khan --- .../be/datafusion/DataFusionPlugin.java | 2 +- .../be/datafusion/DataFusionService.java | 31 ++--- .../be/datafusion/DatafusionContext.java | 2 +- .../be/datafusion/DatafusionReader.java | 9 +- .../DatafusionSearchExecEngine.java | 6 +- .../be/datafusion/DatafusionSearcher.java | 20 +-- .../be/datafusion/NativeRuntimeHandle.java | 77 ++++++++++++ .../be/lucene/LuceneEngineSearcher.java | 117 ++---------------- .../be/lucene/LuceneIndexFilterContext.java | 6 +- .../be/lucene/LuceneIndexFilterProvider.java | 3 +- .../be/lucene/LuceneSearchContext.java | 42 ++----- .../index/engine/CompositeEngine.java | 31 ----- ...va => CollectorQueryLifecycleManager.java} | 2 +- ...ompositeEngineCatalogSnapshotListener.java | 88 +++++++++++++ .../engine/exec/CompositeEngineFactory.java | 15 +++ .../index/engine/exec/IndexFileDeleter.java | 62 +++++----- 16 files changed, 275 insertions(+), 238 deletions(-) create mode 100644 sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/NativeRuntimeHandle.java rename server/src/main/java/org/opensearch/index/engine/exec/{CollectorLifecycleManager.java => CollectorQueryLifecycleManager.java} (97%) create mode 100644 server/src/main/java/org/opensearch/index/engine/exec/CompositeEngineCatalogSnapshotListener.java diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionPlugin.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionPlugin.java index b5d7c57c4ab48..461a30b0ff5bf 100644 --- a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionPlugin.java +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionPlugin.java @@ -122,7 +122,7 @@ public EngineReaderManager createReaderManager(DataFormat format, ShardPath s if (dataFusionService == null) { throw new IllegalStateException("DataFusionPlugin.createComponents() has not been called yet"); } - return new DatafusionSearchExecEngine(dataFusionService.getRuntimePointer(), format); + return new DatafusionSearchExecEngine(dataFusionService.getNativeRuntime(), format); } /** diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionService.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionService.java index 695ec743ae66e..2cf1811a8b436 100644 --- a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionService.java +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionService.java @@ -30,8 +30,8 @@ public class DataFusionService extends AbstractLifecycleComponent { private final String spillDirectory; private final long spillMemoryLimit; - /** Pointer to the native DataFusion global runtime (Tokio + memory pool). */ - private volatile long runtimePointer; + /** Handle to the native DataFusion global runtime (Tokio + memory pool). */ + private volatile NativeRuntimeHandle runtimeHandle; /** * Creates a new DataFusionService. @@ -56,8 +56,9 @@ protected void doStart() { } // TODO: initialize Tokio runtime and memory pool via NativeBridge - // runtimePointer = NativeBridge.createGlobalRuntime(memoryPoolLimit, spillDirectory, spillMemoryLimit); - this.runtimePointer = 0L; // placeholder until NativeBridge is wired + // long ptr = NativeBridge.createGlobalRuntime(memoryPoolLimit, spillDirectory, spillMemoryLimit); + long ptr = 1L; // placeholder until NativeBridge is wired + this.runtimeHandle = new NativeRuntimeHandle(ptr); logger.info("DataFusion service started"); } @@ -73,17 +74,18 @@ protected void doClose() throws IOException { } /** - * Returns the pointer to the native DataFusion global runtime. - * All JNI calls that need the Tokio runtime pass this pointer. + * Returns the handle to the native DataFusion global runtime. + * All consumers should hold this reference and call {@link NativeRuntimeHandle#get()} + * at JNI invocation time to obtain the current live pointer. * * @throws IllegalStateException if the service has not been started */ - public long getRuntimePointer() { - long ptr = runtimePointer; - if (ptr == 0L && lifecycle.started() == false) { + public NativeRuntimeHandle getNativeRuntime() { + NativeRuntimeHandle handle = runtimeHandle; + if (handle == null) { throw new IllegalStateException("DataFusionService has not been started"); } - return ptr; + return handle; } /** @@ -94,11 +96,10 @@ public long getRuntimePointer() { // public CacheManager getCacheManager() { return cacheManager; } private void releaseRuntime() { - long ptr = runtimePointer; - if (ptr != 0L) { - // TODO: NativeBridge.closeGlobalRuntime(ptr); - // TODO: NativeBridge.shutdownTokioRuntimeManager(); - runtimePointer = 0L; + NativeRuntimeHandle handle = runtimeHandle; + if (handle != null) { + handle.close(); + runtimeHandle = null; logger.info("DataFusion native runtime released"); } } diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionContext.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionContext.java index 04c2c849931ff..2baab4bde6da1 100644 --- a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionContext.java +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionContext.java @@ -40,7 +40,7 @@ public DatafusionContext( ) throws IOException { this.request = request; this.shardTarget = shardTarget; - this.engineSearcher = new DatafusionSearcher(reader.getReaderPtr()); + this.engineSearcher = new DatafusionSearcher(reader.getReaderHandle()); } @Override diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionReader.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionReader.java index e27b57c3e2b53..25e5185a731ba 100644 --- a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionReader.java +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionReader.java @@ -52,7 +52,12 @@ public void close() throws IOException { logger.debug("DatafusionReader closed for [{}]", directoryPath); } - public long getReaderPtr() { - return readerHandle.getPointer(); + /** + * Returns the type-safe handle to the native reader. + * Callers should hold this reference and call + * {@link ReaderHandle#getPointer()} only at JNI invocation time. + */ + public ReaderHandle getReaderHandle() { + return readerHandle; } } diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionSearchExecEngine.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionSearchExecEngine.java index b3d3b759f5069..87492d0926c38 100644 --- a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionSearchExecEngine.java +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionSearchExecEngine.java @@ -29,10 +29,10 @@ @ExperimentalApi public class DatafusionSearchExecEngine implements SearchExecEngine { - private final long runtimePtr; + private final NativeRuntimeHandle nativeRuntime; - public DatafusionSearchExecEngine(long runtimePtr, DataFormat dataFormat) { - this.runtimePtr = runtimePtr; + public DatafusionSearchExecEngine(NativeRuntimeHandle nativeRuntime, DataFormat dataFormat) { + this.nativeRuntime = nativeRuntime; } @Override diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionSearcher.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionSearcher.java index 8db660c50cee6..a2de225434920 100644 --- a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionSearcher.java +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionSearcher.java @@ -8,6 +8,7 @@ package org.opensearch.be.datafusion; +import org.opensearch.be.datafusion.jni.ReaderHandle; import org.opensearch.common.annotation.ExperimentalApi; import org.opensearch.index.engine.exec.EngineSearcher; @@ -21,11 +22,10 @@ @ExperimentalApi public class DatafusionSearcher implements EngineSearcher { - private final long readerPtr; + private final ReaderHandle readerHandle; - public DatafusionSearcher(long readerPtr) { - // TODO: initialize reader handle - this.readerPtr = readerPtr; + public DatafusionSearcher(ReaderHandle readerHandle) { + this.readerHandle = readerHandle; } @Override @@ -47,12 +47,18 @@ private void searchVanilla(DatafusionContext context) throws IOException { throw new UnsupportedOperationException("DataFusion native bridge not yet wired"); } - public long getReaderPtr() { - return readerPtr; + /** + * Returns the type-safe handle to the native reader. + * Call {@link ReaderHandle#getPointer()} only at JNI invocation time + * to get the raw pointer with a liveness check. + */ + public ReaderHandle getReaderHandle() { + return readerHandle; } @Override public void close() { - // TODO : reader handle close + // ReaderHandle lifecycle is owned by DatafusionReader / EngineReaderManager, + // not by the searcher. Do not close it here. } } diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/NativeRuntimeHandle.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/NativeRuntimeHandle.java new file mode 100644 index 0000000000000..77af5ff83e1d9 --- /dev/null +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/NativeRuntimeHandle.java @@ -0,0 +1,77 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.be.datafusion; + +import org.opensearch.common.annotation.ExperimentalApi; + +import java.io.Closeable; + +/** + * Thread-safe wrapper around a native runtime pointer. + *

+ * Encapsulates the raw {@code long} so it cannot be copied or used after + * the runtime is destroyed. All consumers obtain the pointer via {@link #get()} + * which performs a liveness check on every call. + *

+ * Implements {@link Closeable} so it integrates with try-with-resources, + * {@code IOUtils.close()}, and leak detection infrastructure. + * + * @opensearch.experimental + */ +@ExperimentalApi +public class NativeRuntimeHandle implements Closeable { + + private volatile long pointer; + + /** + * Creates a handle wrapping the given native pointer. + * + * @param pointer the native runtime pointer (must be non-zero) + * @throws IllegalArgumentException if pointer is zero + */ + public NativeRuntimeHandle(long pointer) { + if (pointer == 0L) { + throw new IllegalArgumentException("Cannot create NativeRuntimeHandle with null pointer"); + } + this.pointer = pointer; + } + + /** + * Returns the native runtime pointer, checking that it is still live. + * + * @throws IllegalStateException if the handle has been closed + */ + public long get() { + long ptr = pointer; + if (ptr == 0L) { + throw new IllegalStateException("Native runtime handle has been closed"); + } + return ptr; + } + + /** + * Returns true if the handle has not been closed. + */ + public boolean isOpen() { + return pointer != 0L; + } + + /** + * Releases the native runtime. Idempotent and thread-safe. + * After this call, {@link #get()} will throw. + */ + @Override + public synchronized void close() { + long ptr = pointer; + if (ptr != 0L) { + // TODO: NativeBridge.closeGlobalRuntime(ptr); + pointer = 0L; + } + } +} diff --git a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneEngineSearcher.java b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneEngineSearcher.java index ba523f42a78eb..4554882cba0d3 100644 --- a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneEngineSearcher.java +++ b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneEngineSearcher.java @@ -10,24 +10,22 @@ import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreMode; -import org.apache.lucene.search.Scorer; import org.apache.lucene.search.Weight; import org.opensearch.common.annotation.ExperimentalApi; import org.opensearch.index.engine.exec.EngineSearcher; import java.io.IOException; -import java.util.BitSet; import java.util.List; -import java.util.Map; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.atomic.AtomicLong; /** - * TODO : need to rethink this + * Lucene-backed engine searcher. + *

+ * This class is stateless with respect to active queries + * + * @opensearch.experimental */ @ExperimentalApi public class LuceneEngineSearcher implements EngineSearcher { @@ -35,20 +33,15 @@ public class LuceneEngineSearcher implements EngineSearcher private final IndexSearcher indexSearcher; private final DirectoryReader directoryReader; - /** Active Weight contexts keyed by opaque pointer. */ - private static final Map activeWeights = new ConcurrentHashMap<>(); - /** Active partition scorer contexts keyed by opaque pointer. */ - private static final Map activeScorers = new ConcurrentHashMap<>(); - private static final AtomicLong nextId = new AtomicLong(1); - public LuceneEngineSearcher(IndexSearcher indexSearcher, DirectoryReader directoryReader) { this.indexSearcher = indexSearcher; this.directoryReader = directoryReader; } /** - * Execute: create a Weight from the query, register it, and store the - * pointer on the context so the indexed query path can use it. + * Execute: create a Weight from the query, register it on the + * context's lifecycle manager, and store the key + segment metadata + * on the context for JNI callbacks. */ @Override public void search(LuceneSearchContext context) throws IOException { @@ -59,77 +52,8 @@ public void search(LuceneSearchContext context) throws IOException { Query rewritten = indexSearcher.rewrite(query); Weight weight = indexSearcher.createWeight(rewritten, ScoreMode.COMPLETE_NO_SCORES, 1.0f); List leaves = directoryReader.leaves(); + //TODO : Complete the wiring for search execution - // TODO : need to redo this - this is specific to indexed table flow - long ptr = nextId.getAndIncrement(); - activeWeights.put(ptr, new WeightContext(weight, leaves)); - context.setWeightPointer(ptr); - context.setSegmentCount(leaves.size()); - context.setSegmentMaxDocs(leaves.stream().mapToInt(l -> l.reader().maxDoc()).toArray()); - } - - /** Create a partition scorer for a segment + doc range. Returns -1 if no matches. */ - public static long createCollector(long weightPtr, int segmentOrd, int minDoc, int maxDoc) { - WeightContext ctx = activeWeights.get(weightPtr); - if (ctx == null || segmentOrd < 0 || segmentOrd >= ctx.leaves.size()) { - return -1; - } - try { - Scorer scorer = ctx.weight.scorer(ctx.leaves.get(segmentOrd)); - if (scorer == null) return -1; - long id = nextId.getAndIncrement(); - activeScorers.put(id, new PartitionScorerContext(scorer.iterator(), minDoc, maxDoc)); - return id; - } catch (IOException e) { - return -1; - } - } - - /** Collect matching doc IDs in [rowGroupMin, rowGroupMax) as a bitset (long[]). */ - public static long[] collectDocs(long scorerPtr, int rowGroupMin, int rowGroupMax) { - PartitionScorerContext ctx = activeScorers.get(scorerPtr); - if (ctx == null) return new long[0]; - - int effectiveMin = Math.max(rowGroupMin, ctx.minDoc); - int effectiveMax = Math.min(rowGroupMax, ctx.maxDoc); - if (effectiveMin >= effectiveMax) return new long[0]; - - BitSet bitset = new BitSet(effectiveMax - effectiveMin); - try { - DocIdSetIterator iter = ctx.iterator; - int docId = ctx.currentDoc; - if (docId == DocIdSetIterator.NO_MORE_DOCS || docId >= ctx.maxDoc) return new long[0]; - if (docId < effectiveMin) docId = iter.advance(effectiveMin); - while (docId != DocIdSetIterator.NO_MORE_DOCS && docId < effectiveMax) { - bitset.set(docId - effectiveMin); - docId = iter.nextDoc(); - } - ctx.currentDoc = docId; - } catch (IOException e) { - return new long[0]; - } - return bitset.toLongArray(); - } - - /** Release a partition scorer. */ - public static void releaseCollector(long scorerPtr) { - activeScorers.remove(scorerPtr); - } - - /** Release a Weight context. */ - public static void releaseWeight(long weightPtr) { - activeWeights.remove(weightPtr); - } - - public static int getSegmentCount(long weightPtr) { - WeightContext ctx = activeWeights.get(weightPtr); - return ctx != null ? ctx.leaves.size() : -1; - } - - public static int getSegmentMaxDoc(long weightPtr, int segmentOrd) { - WeightContext ctx = activeWeights.get(weightPtr); - if (ctx == null || segmentOrd < 0 || segmentOrd >= ctx.leaves.size()) return -1; - return ctx.leaves.get(segmentOrd).reader().maxDoc(); } public IndexSearcher getIndexSearcher() { @@ -142,27 +66,4 @@ public DirectoryReader getDirectoryReader() { @Override public void close() {} - - static class WeightContext { - final Weight weight; - final List leaves; - - WeightContext(Weight weight, List leaves) { - this.weight = weight; - this.leaves = leaves; - } - } - - static class PartitionScorerContext { - final DocIdSetIterator iterator; - final int minDoc; - final int maxDoc; - int currentDoc = -1; - - PartitionScorerContext(DocIdSetIterator iterator, int minDoc, int maxDoc) { - this.iterator = iterator; - this.minDoc = minDoc; - this.maxDoc = maxDoc; - } - } } diff --git a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneIndexFilterContext.java b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneIndexFilterContext.java index f5d9afe5d228e..796a1c3cdcf17 100644 --- a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneIndexFilterContext.java +++ b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneIndexFilterContext.java @@ -15,7 +15,7 @@ import org.apache.lucene.search.ScoreMode; import org.apache.lucene.search.Weight; import org.opensearch.common.annotation.ExperimentalApi; -import org.opensearch.index.engine.exec.CollectorLifecycleManager; +import org.opensearch.index.engine.exec.CollectorQueryLifecycleManager; import org.opensearch.index.engine.exec.IndexFilterContext; import java.io.IOException; @@ -34,7 +34,7 @@ public class LuceneIndexFilterContext implements IndexFilterContext { private final Weight weight; private final List leaves; - private final CollectorLifecycleManager collectorManager = new CollectorLifecycleManager(); + private final CollectorQueryLifecycleManager collectorManager = new CollectorQueryLifecycleManager(); public LuceneIndexFilterContext(Query query, DirectoryReader reader) throws IOException { IndexSearcher searcher = new IndexSearcher(reader); @@ -64,7 +64,7 @@ List getLeaves() { /** * Returns the collector lifecycle manager */ - public CollectorLifecycleManager getCollectorManager() { + public CollectorQueryLifecycleManager getCollectorManager() { return collectorManager; } diff --git a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneIndexFilterProvider.java b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneIndexFilterProvider.java index 207c7e45efe8c..5a45f916e5762 100644 --- a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneIndexFilterProvider.java +++ b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneIndexFilterProvider.java @@ -13,6 +13,7 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.Scorer; import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.engine.exec.CollectorQueryLifecycleManager; import org.opensearch.index.engine.exec.IndexFilterProvider; import org.opensearch.index.engine.exec.SegmentCollector; @@ -35,7 +36,7 @@ public LuceneIndexFilterContext createContext(Query query, DirectoryReader reade /** * Creates a collector for the given segment and registers it in the - * context's {@link org.opensearch.index.engine.exec.CollectorLifecycleManager}. + * context's {@link CollectorQueryLifecycleManager}. * * @return an int key that identifies this collector across JNI */ diff --git a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchContext.java b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchContext.java index f2eb7ac98e0a4..7509f6ec1cd0a 100644 --- a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchContext.java +++ b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchContext.java @@ -20,10 +20,6 @@ /** * Lucene-specific search execution context. - *

- * Input: a Lucene {@link Query}. - * Output: a registered Weight pointer + segment metadata that Rust - * uses for JNI callbacks to stream bitsets per partition range. * * @opensearch.experimental */ @@ -37,10 +33,6 @@ public class LuceneSearchContext implements SearchExecutionContext { private final LuceneEngineSearcher searcher; private Query query; - private long weightPointer; - private int segmentCount; - private int[] segmentMaxDocs; - public LuceneSearchContext( ShardSearchRequest request, SearchShardTarget shardTarget, @@ -48,7 +40,7 @@ public LuceneSearchContext( ) throws IOException { this.reader = reader; IndexSearcher indexSearcher = new IndexSearcher(reader); - searcher = new LuceneEngineSearcher(indexSearcher, reader); + this.searcher = new LuceneEngineSearcher(indexSearcher, reader); this.request = request; this.shardTarget = shardTarget; } @@ -64,29 +56,18 @@ public DirectoryReader getReader() { public void setQuery(Query query) { this.query = query; } - - public long getWeightPointer() { - return weightPointer; - } - - public void setWeightPointer(long weightPointer) { - this.weightPointer = weightPointer; - } - + /** + * Returns the number of segments for the registered weight. + */ public int getSegmentCount() { - return segmentCount; - } - - public void setSegmentCount(int segmentCount) { - this.segmentCount = segmentCount; + return -1; } + /** + * Returns the max doc array for all segments of the registered weight. + */ public int[] getSegmentMaxDocs() { - return segmentMaxDocs; - } - - public void setSegmentMaxDocs(int[] segmentMaxDocs) { - this.segmentMaxDocs = segmentMaxDocs; + return null; } @Override @@ -101,11 +82,6 @@ public SearchShardTarget shardTarget() { @Override public void close() throws IOException { - // Release the registered Weight when context is closed - if (weightPointer != 0) { - LuceneEngineSearcher.releaseWeight(weightPointer); - weightPointer = 0; - } searcher.close(); } } diff --git a/server/src/main/java/org/opensearch/index/engine/CompositeEngine.java b/server/src/main/java/org/opensearch/index/engine/CompositeEngine.java index 48c2c8232b96a..f24f9a723bcaf 100644 --- a/server/src/main/java/org/opensearch/index/engine/CompositeEngine.java +++ b/server/src/main/java/org/opensearch/index/engine/CompositeEngine.java @@ -12,7 +12,6 @@ import org.opensearch.common.annotation.ExperimentalApi; import org.opensearch.index.engine.dataformat.DataFormat; import org.opensearch.index.engine.exec.CatalogSnapshot; -import org.opensearch.index.engine.exec.CatalogSnapshotLifecycleListener; import org.opensearch.index.engine.exec.EngineReaderManager; import org.opensearch.index.engine.exec.IndexFilterProvider; import org.opensearch.index.engine.exec.SearchExecEngine; @@ -87,36 +86,6 @@ private T getFromSupplier( return supplier.get(); } - // ---- Lifecycle listener helpers ---- - - public List getCatalogSnapshotLifecycleListeners() { - return new ArrayList<>(readerManagers.values()); - } - - public void notifyFilesAdded(Map> filesByFormat) throws IOException { - for (Map.Entry> entry : filesByFormat.entrySet()) { - EngineReaderManager rm = readerManagers.get(entry.getKey()); - if (rm != null) { - rm.onFilesAdded(entry.getValue()); - } - } - } - - public void notifyDelete(Map> filesByFormat) throws IOException { - for (Map.Entry> entry : filesByFormat.entrySet()) { - EngineReaderManager rm = readerManagers.get(entry.getKey()); - if (rm != null) { - rm.onFilesDeleted(entry.getValue()); - } - } - } - - public void notifyCatalogSnapshotDelete(CatalogSnapshot catalogSnapshot) throws IOException { - for (CatalogSnapshotLifecycleListener listener : getCatalogSnapshotLifecycleListeners()) { - listener.onDeleted(catalogSnapshot); - } - } - // ---- Snapshot acquisition ---- /** diff --git a/server/src/main/java/org/opensearch/index/engine/exec/CollectorLifecycleManager.java b/server/src/main/java/org/opensearch/index/engine/exec/CollectorQueryLifecycleManager.java similarity index 97% rename from server/src/main/java/org/opensearch/index/engine/exec/CollectorLifecycleManager.java rename to server/src/main/java/org/opensearch/index/engine/exec/CollectorQueryLifecycleManager.java index 5764f6c754231..da24f5d7757e5 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/CollectorLifecycleManager.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/CollectorQueryLifecycleManager.java @@ -30,7 +30,7 @@ * @opensearch.experimental */ @ExperimentalApi -public class CollectorLifecycleManager implements Closeable { +public class CollectorQueryLifecycleManager implements Closeable { private final AtomicInteger nextKey = new AtomicInteger(1); private final Map collectors = new ConcurrentHashMap<>(); diff --git a/server/src/main/java/org/opensearch/index/engine/exec/CompositeEngineCatalogSnapshotListener.java b/server/src/main/java/org/opensearch/index/engine/exec/CompositeEngineCatalogSnapshotListener.java new file mode 100644 index 0000000000000..320068bd4b565 --- /dev/null +++ b/server/src/main/java/org/opensearch/index/engine/exec/CompositeEngineCatalogSnapshotListener.java @@ -0,0 +1,88 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.index.engine.exec; + +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.engine.dataformat.DataFormat; + +import java.io.IOException; +import java.util.Collection; +import java.util.Map; + +/** + * Routes {@link CatalogSnapshotLifecycleListener} events through the + * {@link IndexFileDeleter} and then fans out to the per-format + * {@link EngineReaderManager}s. + *

+ * Keeps lifecycle orchestration separate from the engine's component + * registry responsibilities. + * + * @opensearch.experimental + */ +@ExperimentalApi +public class CompositeEngineCatalogSnapshotListener implements CatalogSnapshotLifecycleListener { + + private final Map> readerManagers; + private final IndexFileDeleter indexFileDeleter; + + public CompositeEngineCatalogSnapshotListener( + Map> readerManagers, + IndexFileDeleter indexFileDeleter + ) { + this.readerManagers = readerManagers; + this.indexFileDeleter = indexFileDeleter; + } + + @Override + public void beforeRefresh() throws IOException { + for (CatalogSnapshotLifecycleListener listener : readerManagers.values()) { + listener.beforeRefresh(); + } + } + + @Override + public void afterRefresh(boolean didRefresh, CatalogSnapshot catalogSnapshot) throws IOException { + Map> newFiles = indexFileDeleter.addFileReferences(catalogSnapshot); + if (newFiles.isEmpty() == false) { + notifyFilesAdded(newFiles); + } + for (CatalogSnapshotLifecycleListener listener : readerManagers.values()) { + listener.afterRefresh(didRefresh, catalogSnapshot); + } + } + + @Override + public void onDeleted(CatalogSnapshot catalogSnapshot) throws IOException { + Map> deletedFiles = indexFileDeleter.removeFileReferences(catalogSnapshot); + if (deletedFiles.isEmpty() == false) { + notifyFilesDeleted(deletedFiles); + } + for (CatalogSnapshotLifecycleListener listener : readerManagers.values()) { + listener.onDeleted(catalogSnapshot); + } + } + + private void notifyFilesAdded(Map> filesByFormat) throws IOException { + for (Map.Entry> entry : filesByFormat.entrySet()) { + EngineReaderManager rm = readerManagers.get(entry.getKey()); + if (rm != null) { + rm.onFilesAdded(entry.getValue()); + } + } + } + + private void notifyFilesDeleted(Map> filesByFormat) throws IOException { + for (Map.Entry> entry : filesByFormat.entrySet()) { + EngineReaderManager rm = readerManagers.get(entry.getKey()); + if (rm != null) { + rm.onFilesDeleted(entry.getValue()); + } + } + } +} diff --git a/server/src/main/java/org/opensearch/index/engine/exec/CompositeEngineFactory.java b/server/src/main/java/org/opensearch/index/engine/exec/CompositeEngineFactory.java index 33828ee97793e..d40d875e3cb2b 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/CompositeEngineFactory.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/CompositeEngineFactory.java @@ -40,6 +40,8 @@ public class CompositeEngineFactory { private final Map, IOException>> indexFilterProviderSuppliers = new HashMap<>(); private final Map, IOException>> sourceProviderSuppliers = new HashMap<>(); + private final IndexFileDeleter indexFileDeleter; + public CompositeEngineFactory( PluginsService pluginsService, ShardPath shardPath, @@ -55,6 +57,7 @@ public CompositeEngineFactory( sourceProviderSuppliers.put(format, memoize(format, f -> plugin.createSourceProvider(f, shardPath))); } } + this.indexFileDeleter = new IndexFileDeleter(null, shardPath); } /** @@ -91,4 +94,16 @@ public T get() throws IOException { public CompositeEngine create() { return new CompositeEngine(readerManagers, engineSuppliers, indexFilterProviderSuppliers, sourceProviderSuppliers); } + + /** + * Creates a {@link CatalogSnapshotLifecycleListener} that routes events + * through the {@link IndexFileDeleter} and fans out to the given reader managers. + * + * @param readerManagers the per-format reader managers that receive notifications + */ + public CatalogSnapshotLifecycleListener createCatalogSnapshotListener( + Map> readerManagers + ) { + return new CompositeEngineCatalogSnapshotListener(readerManagers, indexFileDeleter); + } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/IndexFileDeleter.java b/server/src/main/java/org/opensearch/index/engine/exec/IndexFileDeleter.java index f26b5da62799f..12121f01dac5f 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/IndexFileDeleter.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/IndexFileDeleter.java @@ -9,34 +9,47 @@ package org.opensearch.index.engine.exec; import org.opensearch.common.annotation.ExperimentalApi; -import org.opensearch.index.engine.CompositeEngine; import org.opensearch.index.engine.dataformat.DataFormat; import org.opensearch.index.shard.ShardPath; import java.io.IOException; import java.util.Collection; +import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicInteger; +/** + * Tracks per-format file reference counts and computes which files are newly + * added or fully dereferenced after catalog snapshot changes. + *

+ * This class does not notify reader managers itself — it returns the + * computed change sets so the caller ({@link org.opensearch.index.engine.CompositeEngine}) + * can route notifications to the appropriate reader managers. + * + * @opensearch.experimental + */ @ExperimentalApi public class IndexFileDeleter { private final Map> fileRefCounts = new ConcurrentHashMap<>(); - private final CompositeEngine compositeEngine; - public IndexFileDeleter(CompositeEngine compositeEngine, CatalogSnapshot initialCatalogSnapshot, ShardPath shardPath) - throws IOException { - this.compositeEngine = compositeEngine; + public IndexFileDeleter(CatalogSnapshot initialCatalogSnapshot, ShardPath shardPath) throws IOException { if (initialCatalogSnapshot != null) { addFileReferences(initialCatalogSnapshot); deleteUnreferencedFiles(shardPath); } } - public synchronized void addFileReferences(CatalogSnapshot snapshot) { + /** + * Increments reference counts for all files in the snapshot. + * + * @return files whose reference count went from 0 → 1 (newly added), grouped by format. + * Returns an empty map when there are no new files. + */ + public synchronized Map> addFileReferences(CatalogSnapshot snapshot) { Map> dfSegregatedFiles = segregateFilesByFormat(snapshot); Map> dfNewFiles = new HashMap<>(); @@ -48,21 +61,24 @@ public synchronized void addFileReferences(CatalogSnapshot snapshot) { for (String file : files) { AtomicInteger refCount = dfFileRefCounts.computeIfAbsent(file, k -> new AtomicInteger(0)); if (refCount.incrementAndGet() == 1) { - // First reference — this file is new newFiles.add(file); } } - if (!newFiles.isEmpty()) { + if (newFiles.isEmpty() == false) { dfNewFiles.put(dataFormat, newFiles); } } - if (!dfNewFiles.isEmpty()) { - notifyFilesAdded(dfNewFiles); - } + return dfNewFiles.isEmpty() ? Collections.emptyMap() : dfNewFiles; } - public synchronized void removeFileReferences(CatalogSnapshot snapshot) { + /** + * Decrements reference counts for all files in the snapshot. + * + * @return files whose reference count reached 0 (ready for deletion), grouped by format. + * Returns an empty map when there are no files to delete. + */ + public synchronized Map> removeFileReferences(CatalogSnapshot snapshot) { Map> dfSegregatedFiles = segregateFilesByFormat(snapshot); Map> dfFilesToDelete = new HashMap<>(); @@ -80,30 +96,12 @@ public synchronized void removeFileReferences(CatalogSnapshot snapshot) { } } } - if (!filesToDelete.isEmpty()) { + if (filesToDelete.isEmpty() == false) { dfFilesToDelete.put(dataFormat, filesToDelete); } } - if (!dfFilesToDelete.isEmpty()) { - notifyFilesDeleted(dfFilesToDelete); - } - } - - private void notifyFilesAdded(Map> dfNewFiles) { - try { - compositeEngine.notifyFilesAdded(dfNewFiles); - } catch (Exception e) { - System.err.println("Failed to notify new files: " + dfNewFiles + ", error: " + e.getMessage()); - } - } - - private void notifyFilesDeleted(Map> dfFilesToDelete) { - try { - compositeEngine.notifyDelete(dfFilesToDelete); - } catch (Exception e) { - System.err.println("Failed to delete unreferenced files: " + dfFilesToDelete + ", error: " + e.getMessage()); - } + return dfFilesToDelete.isEmpty() ? Collections.emptyMap() : dfFilesToDelete; } private Map> segregateFilesByFormat(CatalogSnapshot snapshot) { From b80dc730d12184a93c6ae3306389948564e0ec36 Mon Sep 17 00:00:00 2001 From: bharath-techie Date: Fri, 20 Mar 2026 18:38:10 +0530 Subject: [PATCH 06/11] wiring result stream and adding tests Signed-off-by: bharath-techie --- .../opensearch/common/CheckedTriFunction.java | 1 - .../analytics/backend/EngineResultBatch.java | 39 ++ .../backend/EngineResultBatchIterator.java | 18 + .../analytics/backend/EngineResultStream.java | 28 ++ ...java => AnalyticsSearchBackendPlugin.java} | 7 +- .../be/datafusion/DataFusionPlugin.java | 8 +- .../be/datafusion/DatafusionContext.java | 49 +- .../be/datafusion/DatafusionResultStream.java | 85 ++++ .../DatafusionSearchExecEngine.java | 42 +- .../be/datafusion/DatafusionSearcher.java | 21 +- .../be/datafusion/jni/NativeBridge.java | 40 +- .../be/datafusion/jni/StreamHandle.java | 34 ++ .../be/datafusion/package-info.java | 9 + ...nalytics.spi.AnalyticsSearchBackendPlugin} | 0 .../be/lucene/LuceneEngineSearcher.java | 2 +- .../be/lucene/LuceneIndexFilterProvider.java | 2 - .../be/lucene/LuceneSearchContext.java | 7 +- .../be/lucene/LuceneSearchEnginePlugin.java | 5 +- .../be/lucene/LuceneSearchExecEngine.java | 26 +- .../opensearch/be/lucene/package-info.java | 9 + ...analytics.spi.AnalyticsSearchBackendPlugin | 1 + .../opensearch/analytics/AnalyticsPlugin.java | 13 +- .../analytics/exec/DefaultPlanExecutor.java | 96 +++- .../engine/DefaultPlanExecutorTests.java | 6 +- .../org/opensearch/index/IndexModule.java | 5 +- .../org/opensearch/index/IndexService.java | 16 +- ...Engine.java => DataFormatAwareEngine.java} | 103 +++-- .../index/engine/IndexFilterTree.java | 5 +- .../CatalogSnapshotAwareRefreshListener.java | 0 .../exec/CatalogSnapshotDeleteListener.java | 0 ...java => DataFormatAwareEngineFactory.java} | 33 +- ...aFormatEngineCatalogSnapshotListener.java} | 4 +- .../index/engine/exec/DataFormatRegistry.java | 0 .../index/engine/exec/IndexFileDeleter.java | 3 +- .../index/engine/exec/SearchExecEngine.java | 42 +- .../opensearch/index/shard/IndexShard.java | 22 +- .../opensearch/indices/IndicesService.java | 14 +- ...ndPlugin.java => SearchBackEndPlugin.java} | 10 +- .../opensearch/index/IndexModuleTests.java | 3 +- .../dataformat/DataFormatPluginTests.java | 418 ++++++++++++++++++ 40 files changed, 1012 insertions(+), 214 deletions(-) create mode 100644 sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/EngineResultBatch.java create mode 100644 sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/EngineResultBatchIterator.java create mode 100644 sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/EngineResultStream.java rename sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/spi/{AnalyticsBackEndPlugin.java => AnalyticsSearchBackendPlugin.java} (82%) create mode 100644 sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionResultStream.java create mode 100644 sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/jni/StreamHandle.java create mode 100644 sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/package-info.java rename sandbox/plugins/analytics-backend-datafusion/src/main/resources/META-INF/services/{org.opensearch.analytics.spi.AnalyticsBackEndPlugin => org.opensearch.analytics.spi.AnalyticsSearchBackendPlugin} (100%) create mode 100644 sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/package-info.java create mode 100644 sandbox/plugins/analytics-backend-lucene/src/main/resources/META-INF/services/org.opensearch.analytics.spi.AnalyticsSearchBackendPlugin rename server/src/main/java/org/opensearch/index/engine/{CompositeEngine.java => DataFormatAwareEngine.java} (58%) delete mode 100644 server/src/main/java/org/opensearch/index/engine/exec/CatalogSnapshotAwareRefreshListener.java delete mode 100644 server/src/main/java/org/opensearch/index/engine/exec/CatalogSnapshotDeleteListener.java rename server/src/main/java/org/opensearch/index/engine/exec/{CompositeEngineFactory.java => DataFormatAwareEngineFactory.java} (76%) rename server/src/main/java/org/opensearch/index/engine/exec/{CompositeEngineCatalogSnapshotListener.java => DataFormatEngineCatalogSnapshotListener.java} (95%) delete mode 100644 server/src/main/java/org/opensearch/index/engine/exec/DataFormatRegistry.java rename server/src/main/java/org/opensearch/plugins/{SearchAnalyticsBackEndPlugin.java => SearchBackEndPlugin.java} (91%) diff --git a/libs/common/src/main/java/org/opensearch/common/CheckedTriFunction.java b/libs/common/src/main/java/org/opensearch/common/CheckedTriFunction.java index 13fec0b45425f..53d2adb3951b8 100644 --- a/libs/common/src/main/java/org/opensearch/common/CheckedTriFunction.java +++ b/libs/common/src/main/java/org/opensearch/common/CheckedTriFunction.java @@ -9,7 +9,6 @@ package org.opensearch.common; import org.opensearch.common.annotation.ExperimentalApi; -import org.opensearch.common.annotation.InternalApi; /** * A {@link TriFunction}-like interface which allows throwing checked exceptions. diff --git a/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/EngineResultBatch.java b/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/EngineResultBatch.java new file mode 100644 index 0000000000000..d062bcfe079af --- /dev/null +++ b/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/EngineResultBatch.java @@ -0,0 +1,39 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.analytics.backend; + +import java.util.List; + +/** + * Read-only view of a single record batch. Provides field names, row count, + * and positional access to field values. + * + * @opensearch.internal + */ +public interface EngineResultBatch { + + /** + * Ordered list of field (column) names in this batch. + */ + List getFieldNames(); + + /** + * Number of rows in this batch. + */ + int getRowCount(); + + /** + * Returns the value at the given row index for the named field. + * + * @param fieldName column name + * @param rowIndex zero-based row index + * @return the value (may be null) + */ + Object getFieldValue(String fieldName, int rowIndex); +} diff --git a/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/EngineResultBatchIterator.java b/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/EngineResultBatchIterator.java new file mode 100644 index 0000000000000..1de5bbd5b64c5 --- /dev/null +++ b/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/EngineResultBatchIterator.java @@ -0,0 +1,18 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.analytics.backend; + +import java.util.Iterator; + +/** + * Single-pass iterator over record batches from an {@link EngineResultStream}. + * + * @opensearch.internal + */ +public interface EngineResultBatchIterator extends Iterator {} diff --git a/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/EngineResultStream.java b/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/EngineResultStream.java new file mode 100644 index 0000000000000..7c189b4079889 --- /dev/null +++ b/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/EngineResultStream.java @@ -0,0 +1,28 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.analytics.backend; + +/** + * A closeable stream of record batches returned by engine execution. + * Callers iterate batches via the returned iterator and MUST close the stream + * when done to release native resources. + * + * @opensearch.internal + */ +public interface EngineResultStream extends AutoCloseable { + + /** + * Returns an iterator over the record batches in this stream. + * Each call returns the same iterator instance — the stream is single-pass. + */ + EngineResultBatchIterator iterator(); + + @Override + void close(); +} diff --git a/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/spi/AnalyticsBackEndPlugin.java b/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/spi/AnalyticsSearchBackendPlugin.java similarity index 82% rename from sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/spi/AnalyticsBackEndPlugin.java rename to sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/spi/AnalyticsSearchBackendPlugin.java index 3a508e7f52345..a942c70f0328d 100644 --- a/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/spi/AnalyticsBackEndPlugin.java +++ b/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/spi/AnalyticsSearchBackendPlugin.java @@ -10,16 +10,13 @@ import org.apache.calcite.sql.SqlOperatorTable; import org.opensearch.analytics.backend.EngineBridge; -import org.opensearch.plugins.SearchAnalyticsBackEndPlugin; - -import java.io.IOException; -import java.util.List; +import org.opensearch.plugins.SearchBackEndPlugin; /** * SPI extension point for back-end query engines (DataFusion, Lucene, etc.). * @opensearch.internal */ -public interface AnalyticsBackEndPlugin extends SearchAnalyticsBackEndPlugin { +public interface AnalyticsSearchBackendPlugin extends SearchBackEndPlugin { /** Unique engine name (e.g., "lucene", "datafusion"). */ String name(); diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionPlugin.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionPlugin.java index 461a30b0ff5bf..7987b2d16d0c0 100644 --- a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionPlugin.java +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionPlugin.java @@ -12,7 +12,7 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.opensearch.analytics.backend.EngineBridge; -import org.opensearch.analytics.spi.AnalyticsBackEndPlugin; +import org.opensearch.analytics.spi.AnalyticsSearchBackendPlugin; import org.opensearch.cluster.metadata.IndexNameExpressionResolver; import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.settings.Setting; @@ -43,9 +43,9 @@ *

* Initializes the {@link DataFusionService} at node startup and creates * per-shard {@link DatafusionSearchExecEngine} instances via the - * {@link AnalyticsBackEndPlugin} SPI. + * {@link AnalyticsSearchBackendPlugin} SPI. */ -public class DataFusionPlugin extends Plugin implements AnalyticsBackEndPlugin { +public class DataFusionPlugin extends Plugin implements AnalyticsSearchBackendPlugin { private static final Logger logger = LogManager.getLogger(DataFusionPlugin.class); @@ -118,7 +118,7 @@ public EngineReaderManager createReaderManager(DataFormat format, ShardPath s } @Override - public SearchExecEngine createSearchExecEngine(DataFormat format, ShardPath shardPath) throws IOException { + public SearchExecEngine createSearchExecEngine(DataFormat format, ShardPath shardPath) throws IOException { if (dataFusionService == null) { throw new IllegalStateException("DataFusionPlugin.createComponents() has not been called yet"); } diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionContext.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionContext.java index 2baab4bde6da1..d9a85ef04edb0 100644 --- a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionContext.java +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionContext.java @@ -8,6 +8,7 @@ package org.opensearch.be.datafusion; +import org.opensearch.be.datafusion.jni.StreamHandle; import org.opensearch.common.annotation.ExperimentalApi; import org.opensearch.index.engine.IndexFilterTree; import org.opensearch.search.SearchExecutionContext; @@ -20,7 +21,7 @@ * DataFusion-specific search execution context. *

* Carries the DataFusion query plan, engine searcher, optional {@link IndexFilterTree}, - * and columnar results. + * and the native result stream handle after execution. * * @opensearch.experimental */ @@ -30,17 +31,21 @@ public class DatafusionContext implements SearchExecutionContext { private final ShardSearchRequest request; private final SearchShardTarget shardTarget; private final DatafusionSearcher engineSearcher; + private final NativeRuntimeHandle nativeRuntime; private DatafusionQuery datafusionQuery; private IndexFilterTree filterTree; + private StreamHandle streamHandle; public DatafusionContext( ShardSearchRequest request, SearchShardTarget shardTarget, - DatafusionReader reader + DatafusionReader reader, + NativeRuntimeHandle nativeRuntime ) throws IOException { this.request = request; this.shardTarget = shardTarget; this.engineSearcher = new DatafusionSearcher(reader.getReaderHandle()); + this.nativeRuntime = nativeRuntime; } @Override @@ -56,11 +61,18 @@ public SearchShardTarget shardTarget() { @Override public void close() throws IOException { try { - if (filterTree != null) { - filterTree.close(); + if (streamHandle != null) { + streamHandle.close(); + streamHandle = null; } } finally { - engineSearcher.close(); + try { + if (filterTree != null) { + filterTree.close(); + } + } finally { + engineSearcher.close(); + } } } @@ -70,6 +82,13 @@ public DatafusionSearcher getEngineSearcher() { return engineSearcher; } + /** + * Returns the live native runtime pointer for JNI calls. + */ + public long getRuntimePtr() { + return nativeRuntime.get(); + } + public DatafusionQuery getDatafusionQuery() { return datafusionQuery; } @@ -78,19 +97,25 @@ public void setDatafusionQuery(DatafusionQuery query) { this.datafusionQuery = query; } - /** - * Returns the optional filter tree for indexed parquet queries. - * {@code null} indicates a pure parquet query with no external index involvement. - */ public IndexFilterTree getFilterTree() { return filterTree; } - /** - * Sets the filter tree for indexed parquet queries. - */ public void setFilterTree(IndexFilterTree filterTree) { this.filterTree = filterTree; } + /** + * Returns the native result stream handle, or {@code null} if execution has not completed. + */ + public StreamHandle getStreamHandle() { + return streamHandle; + } + + /** + * Sets the native result stream handle after query execution. + */ + public void setStreamHandle(StreamHandle streamHandle) { + this.streamHandle = streamHandle; + } } diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionResultStream.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionResultStream.java new file mode 100644 index 0000000000000..a5e1dc79786e0 --- /dev/null +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionResultStream.java @@ -0,0 +1,85 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.be.datafusion; + +import org.opensearch.analytics.backend.EngineResultBatch; +import org.opensearch.analytics.backend.EngineResultBatchIterator; +import org.opensearch.analytics.backend.EngineResultStream; +import org.opensearch.be.datafusion.jni.NativeBridge; +import org.opensearch.be.datafusion.jni.StreamHandle; +import org.opensearch.common.annotation.ExperimentalApi; + +import java.util.NoSuchElementException; + +/** + * {@link EngineResultStream} backed by a native DataFusion record batch stream. + *

+ * Reads Arrow record batches from the native stream via JNI and exposes them + * as {@link EngineResultBatch} instances. The stream is single-pass; calling + * {@link #iterator()} multiple times returns the same iterator. + * + * @opensearch.experimental + */ +@ExperimentalApi +public class DatafusionResultStream implements EngineResultStream { + + private final StreamHandle streamHandle; + private volatile BatchIterator iteratorInstance; + + public DatafusionResultStream(StreamHandle streamHandle) { + this.streamHandle = streamHandle; + } + + @Override + public EngineResultBatchIterator iterator() { + if (iteratorInstance == null) { + iteratorInstance = new BatchIterator(streamHandle); + } + return iteratorInstance; + } + + @Override + public void close() { + streamHandle.close(); + } + + /** + * Iterator that pulls Arrow record batches from the native stream via JNI. + * Each call to {@link #next()} returns a batch wrapping the current Arrow data. + */ + static class BatchIterator implements EngineResultBatchIterator { + + private final StreamHandle streamHandle; + private Boolean hasNext; + + BatchIterator(StreamHandle streamHandle) { + this.streamHandle = streamHandle; + } + + @Override + public boolean hasNext() { + if (hasNext == null) { + long arrowArrayAddr = NativeBridge.streamNext(streamHandle.getStreamPtr(), streamHandle.getPointer()); + hasNext = arrowArrayAddr != 0; + // TODO: if hasNext, import ArrowArray into VectorSchemaRoot and cache for next() + } + return hasNext; + } + + @Override + public EngineResultBatch next() { + if (hasNext() == false) { + throw new NoSuchElementException(); + } + hasNext = null; + // TODO: return batch wrapping the imported VectorSchemaRoot + throw new UnsupportedOperationException("Arrow C Data import not yet wired"); + } + } +} diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionSearchExecEngine.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionSearchExecEngine.java index 87492d0926c38..ea5deba39de0f 100644 --- a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionSearchExecEngine.java +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionSearchExecEngine.java @@ -10,24 +10,23 @@ import org.opensearch.action.search.SearchShardTask; import org.opensearch.common.annotation.ExperimentalApi; -import org.opensearch.index.engine.IndexFilterTree; import org.opensearch.index.engine.dataformat.DataFormat; import org.opensearch.index.engine.exec.SearchExecEngine; import org.opensearch.search.SearchShardTarget; import org.opensearch.search.internal.ShardSearchRequest; import java.io.IOException; -import java.io.UncheckedIOException; -import java.util.Collections; -import java.util.Iterator; /** * DataFusion-backed search execution engine. + *

+ * Converts logical plan fragments to Substrait, executes them via the native + * DataFusion runtime, and returns results as a {@link DatafusionResultStream}. * * @opensearch.experimental */ @ExperimentalApi -public class DatafusionSearchExecEngine implements SearchExecEngine { +public class DatafusionSearchExecEngine implements SearchExecEngine { private final NativeRuntimeHandle nativeRuntime; @@ -36,40 +35,29 @@ public DatafusionSearchExecEngine(NativeRuntimeHandle nativeRuntime, DataFormat } @Override - public void execute(DatafusionContext context) throws IOException { - DatafusionSearcher searcher = context.getEngineSearcher(); - IndexFilterTree filterTree = context.getFilterTree(); - if (filterTree != null) { - throw new UnsupportedOperationException("Indexed query path not yet wired"); - } else { - searcher.search(context); - } + public byte[] convertFragment(Object fragment) { + // TODO: wire Substrait conversion (RelNode → Substrait bytes) + throw new UnsupportedOperationException("Substrait conversion not yet wired"); } @Override public DatafusionContext createContext( Object reader, + byte[] plan, ShardSearchRequest request, SearchShardTarget shardTarget, SearchShardTask task ) throws IOException { DatafusionReader dfReader = (DatafusionReader) reader; - return new DatafusionContext(request, shardTarget, dfReader); + DatafusionContext context = new DatafusionContext(request, shardTarget, dfReader, nativeRuntime); + context.setDatafusionQuery(new DatafusionQuery("", plan)); + return context; } @Override - public byte[] convertFragment(Object fragment) { - throw new UnsupportedOperationException("Substrait conversion not yet wired"); - } - - @Override - public Iterator executePlan(byte[] plan, DatafusionContext context) { - try { - context.setDatafusionQuery(new DatafusionQuery("", plan)); - execute(context); - return Collections.emptyIterator(); - } catch (IOException e) { - throw new UncheckedIOException(e); - } + public DatafusionResultStream execute(DatafusionContext context) throws IOException { + DatafusionSearcher searcher = context.getEngineSearcher(); + searcher.search(context); + return new DatafusionResultStream(context.getStreamHandle()); } } diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionSearcher.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionSearcher.java index a2de225434920..b9f9d61e76aa1 100644 --- a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionSearcher.java +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionSearcher.java @@ -8,7 +8,9 @@ package org.opensearch.be.datafusion; +import org.opensearch.be.datafusion.jni.NativeBridge; import org.opensearch.be.datafusion.jni.ReaderHandle; +import org.opensearch.be.datafusion.jni.StreamHandle; import org.opensearch.common.annotation.ExperimentalApi; import org.opensearch.index.engine.exec.EngineSearcher; @@ -16,6 +18,9 @@ /** * DataFusion searcher — executes substrait query plans against a native DataFusion reader. + *

+ * After {@link #search}, the result stream handle is available on the context + * via {@link DatafusionContext#getStreamHandle()}. * * @opensearch.experimental */ @@ -38,13 +43,21 @@ public void search(DatafusionContext context) throws IOException { } private void searchWithFilterTree(DatafusionContext context) { - // TODO: wire NativeBridge — execute substrait plan, consume stream, populate context - throw new UnsupportedOperationException("DataFusion native bridge not yet wired"); + throw new UnsupportedOperationException("Indexed query path not yet wired"); } private void searchVanilla(DatafusionContext context) throws IOException { - // TODO: wire NativeBridge — execute substrait plan, consume stream, populate context - throw new UnsupportedOperationException("DataFusion native bridge not yet wired"); + DatafusionQuery query = context.getDatafusionQuery(); + if (query == null) { + throw new IllegalStateException("DatafusionQuery must be set before search"); + } + long streamPtr = NativeBridge.executeQuery( + readerHandle.getPointer(), + query.getIndexName(), + query.getSubstraitBytes(), + context.getRuntimePtr() + ); + context.setStreamHandle(new StreamHandle(streamPtr, context.getRuntimePtr())); } /** diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/jni/NativeBridge.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/jni/NativeBridge.java index a5b1b29274ba2..20caa6cbd3251 100644 --- a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/jni/NativeBridge.java +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/jni/NativeBridge.java @@ -20,8 +20,46 @@ public final class NativeBridge { private NativeBridge() {} - // Reader management public static native long createDatafusionReader(String path, String[] files); public static native void closeDatafusionReader(long ptr); + + public static native long createGlobalRuntime(long memoryLimit, long cacheManagerPtr, String spillDir, long spillLimit); + + public static native void closeGlobalRuntime(long ptr); + + /** + * Executes a substrait plan against the given reader and returns a stream pointer. + * + * @param readerPtr native reader pointer + * @param tableName table name for registration with DataFusion + * @param substraitPlan serialized substrait plan bytes + * @param runtimePtr native runtime pointer + * @return native stream pointer (caller must close via {@link #streamClose}) + */ + public static native long executeQuery(long readerPtr, String tableName, byte[] substraitPlan, long runtimePtr); + + /** + * Returns the Arrow schema address for the given stream. + * + * @param streamPtr native stream pointer + * @return ArrowSchema C Data Interface address + */ + public static native long streamGetSchema(long streamPtr); + + /** + * Loads the next record batch from the stream. + * + * @param runtimePtr native runtime pointer + * @param streamPtr native stream pointer + * @return ArrowArray C Data Interface address, or 0 if end-of-stream + */ + public static native long streamNext(long runtimePtr, long streamPtr); + + /** + * Closes the native stream and releases associated resources. + * + * @param streamPtr native stream pointer + */ + public static native void streamClose(long streamPtr); } diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/jni/StreamHandle.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/jni/StreamHandle.java new file mode 100644 index 0000000000000..53b380867e90b --- /dev/null +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/jni/StreamHandle.java @@ -0,0 +1,34 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.be.datafusion.jni; + +import org.opensearch.analytics.backend.jni.NativeHandle; + +/** + * Type-safe handle for a native DataFusion result stream. + * Wraps the stream pointer returned by {@link NativeBridge#executeQuery}. + */ +public final class StreamHandle extends NativeHandle { + + private final long streamPtr; + + public StreamHandle(long ptr, long streamPtr) { + super(ptr); + this.streamPtr = streamPtr; + } + + public long getStreamPtr() { + return streamPtr; + } + + @Override + protected void doClose() { + NativeBridge.streamClose(ptr); + } +} diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/package-info.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/package-info.java new file mode 100644 index 0000000000000..07ffaf562b3f0 --- /dev/null +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/package-info.java @@ -0,0 +1,9 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.be.datafusion; diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/resources/META-INF/services/org.opensearch.analytics.spi.AnalyticsBackEndPlugin b/sandbox/plugins/analytics-backend-datafusion/src/main/resources/META-INF/services/org.opensearch.analytics.spi.AnalyticsSearchBackendPlugin similarity index 100% rename from sandbox/plugins/analytics-backend-datafusion/src/main/resources/META-INF/services/org.opensearch.analytics.spi.AnalyticsBackEndPlugin rename to sandbox/plugins/analytics-backend-datafusion/src/main/resources/META-INF/services/org.opensearch.analytics.spi.AnalyticsSearchBackendPlugin diff --git a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneEngineSearcher.java b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneEngineSearcher.java index 4554882cba0d3..6cd3605499c07 100644 --- a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneEngineSearcher.java +++ b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneEngineSearcher.java @@ -52,7 +52,7 @@ public void search(LuceneSearchContext context) throws IOException { Query rewritten = indexSearcher.rewrite(query); Weight weight = indexSearcher.createWeight(rewritten, ScoreMode.COMPLETE_NO_SCORES, 1.0f); List leaves = directoryReader.leaves(); - //TODO : Complete the wiring for search execution + // TODO : Complete the wiring for search execution } diff --git a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneIndexFilterProvider.java b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneIndexFilterProvider.java index 5a45f916e5762..9aae1e997b2b2 100644 --- a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneIndexFilterProvider.java +++ b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneIndexFilterProvider.java @@ -33,7 +33,6 @@ public LuceneIndexFilterContext createContext(Query query, DirectoryReader reade return new LuceneIndexFilterContext(query, reader); } - /** * Creates a collector for the given segment and registers it in the * context's {@link CollectorQueryLifecycleManager}. @@ -75,7 +74,6 @@ private SegmentCollector createCollectorInternal(LuceneIndexFilterContext contex } } - private static final SegmentCollector EMPTY_COLLECTOR = (min, max) -> new long[0]; private static class LuceneSegmentCollector implements SegmentCollector { diff --git a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchContext.java b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchContext.java index 7509f6ec1cd0a..2851d2759b180 100644 --- a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchContext.java +++ b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchContext.java @@ -33,11 +33,7 @@ public class LuceneSearchContext implements SearchExecutionContext { private final LuceneEngineSearcher searcher; private Query query; - public LuceneSearchContext( - ShardSearchRequest request, - SearchShardTarget shardTarget, - DirectoryReader reader - ) throws IOException { + public LuceneSearchContext(ShardSearchRequest request, SearchShardTarget shardTarget, DirectoryReader reader) throws IOException { this.reader = reader; IndexSearcher indexSearcher = new IndexSearcher(reader); this.searcher = new LuceneEngineSearcher(indexSearcher, reader); @@ -56,6 +52,7 @@ public DirectoryReader getReader() { public void setQuery(Query query) { this.query = query; } + /** * Returns the number of segments for the registered weight. */ diff --git a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchEnginePlugin.java b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchEnginePlugin.java index 2a747086679f0..9de3cf5d53cfe 100644 --- a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchEnginePlugin.java +++ b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchEnginePlugin.java @@ -10,14 +10,13 @@ import org.apache.calcite.sql.SqlOperatorTable; import org.opensearch.analytics.backend.EngineBridge; -import org.opensearch.analytics.spi.AnalyticsBackEndPlugin; +import org.opensearch.analytics.spi.AnalyticsSearchBackendPlugin; import org.opensearch.common.annotation.ExperimentalApi; import org.opensearch.index.engine.dataformat.DataFormat; import org.opensearch.index.engine.exec.EngineReaderManager; import org.opensearch.index.engine.exec.IndexFilterProvider; import org.opensearch.index.engine.exec.SourceProvider; import org.opensearch.index.shard.ShardPath; -import org.opensearch.plugins.Plugin; import java.io.IOException; import java.util.List; @@ -28,7 +27,7 @@ * @opensearch.experimental */ @ExperimentalApi -public class LuceneSearchEnginePlugin implements AnalyticsBackEndPlugin { +public class LuceneSearchEnginePlugin implements AnalyticsSearchBackendPlugin { @Override public String name() { diff --git a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchExecEngine.java b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchExecEngine.java index a0b8e37656170..c899fdbe9263c 100644 --- a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchExecEngine.java +++ b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchExecEngine.java @@ -25,22 +25,20 @@ * @opensearch.experimental */ @ExperimentalApi -public class LuceneSearchExecEngine implements SearchExecEngine { +public class LuceneSearchExecEngine implements SearchExecEngine { @Override - public void execute(LuceneSearchContext context) throws IOException { - DirectoryReader reader = context.getReader(); - LuceneEngineSearcher searcher = new LuceneEngineSearcher(new IndexSearcher(reader), reader); - try { - searcher.search(context); - } finally { - searcher.close(); + public Query convertFragment(Object fragment) { + if (fragment instanceof Query) { + return (Query) fragment; } + throw new UnsupportedOperationException("Expected Lucene Query, got " + fragment.getClass().getSimpleName()); } @Override public LuceneSearchContext createContext( Object reader, + Query plan, ShardSearchRequest request, SearchShardTarget shardTarget, SearchShardTask task @@ -50,10 +48,14 @@ public LuceneSearchContext createContext( } @Override - public Query convertFragment(Object fragment) { - if (fragment instanceof Query) { - return (Query) fragment; + public Void execute(LuceneSearchContext context) throws IOException { + DirectoryReader reader = context.getReader(); + LuceneEngineSearcher searcher = new LuceneEngineSearcher(new IndexSearcher(reader), reader); + try { + searcher.search(context); + } finally { + searcher.close(); } - throw new UnsupportedOperationException("Expected Lucene Query, got " + fragment.getClass().getSimpleName()); + return null; // TODO : figure out this path or remove this class for now } } diff --git a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/package-info.java b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/package-info.java new file mode 100644 index 0000000000000..f34e1c6276645 --- /dev/null +++ b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/package-info.java @@ -0,0 +1,9 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.be.lucene; diff --git a/sandbox/plugins/analytics-backend-lucene/src/main/resources/META-INF/services/org.opensearch.analytics.spi.AnalyticsSearchBackendPlugin b/sandbox/plugins/analytics-backend-lucene/src/main/resources/META-INF/services/org.opensearch.analytics.spi.AnalyticsSearchBackendPlugin new file mode 100644 index 0000000000000..53330f0ac02ef --- /dev/null +++ b/sandbox/plugins/analytics-backend-lucene/src/main/resources/META-INF/services/org.opensearch.analytics.spi.AnalyticsSearchBackendPlugin @@ -0,0 +1 @@ +org.opensearch.be.lucene.LuceneSearchEnginePlugin diff --git a/sandbox/plugins/analytics-engine/src/main/java/org/opensearch/analytics/AnalyticsPlugin.java b/sandbox/plugins/analytics-engine/src/main/java/org/opensearch/analytics/AnalyticsPlugin.java index 1191e4215afb2..9d4132031aab6 100644 --- a/sandbox/plugins/analytics-engine/src/main/java/org/opensearch/analytics/AnalyticsPlugin.java +++ b/sandbox/plugins/analytics-engine/src/main/java/org/opensearch/analytics/AnalyticsPlugin.java @@ -17,7 +17,7 @@ import org.opensearch.analytics.exec.DefaultPlanExecutor; import org.opensearch.analytics.exec.QueryPlanExecutor; import org.opensearch.analytics.schema.OpenSearchSchemaBuilder; -import org.opensearch.analytics.spi.AnalyticsBackEndPlugin; +import org.opensearch.analytics.spi.AnalyticsSearchBackendPlugin; import org.opensearch.cluster.metadata.IndexNameExpressionResolver; import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.inject.Module; @@ -54,12 +54,12 @@ public class AnalyticsPlugin extends Plugin implements ExtensiblePlugin { */ public AnalyticsPlugin() {} - private final List backEnds = new ArrayList<>(); + private final List backEnds = new ArrayList<>(); private SqlOperatorTable operatorTable; @Override public void loadExtensions(ExtensionLoader loader) { - backEnds.addAll(loader.loadExtensions(AnalyticsBackEndPlugin.class)); + backEnds.addAll(loader.loadExtensions(AnalyticsSearchBackendPlugin.class)); operatorTable = aggregateOperatorTables(); } @@ -77,7 +77,10 @@ public Collection createComponents( IndexNameExpressionResolver indexNameExpressionResolver, Supplier repositoriesServiceSupplier ) { - return List.of(new DefaultPlanExecutor(backEnds), new DefaultEngineContext(clusterService, operatorTable)); + return List.of( + new DefaultPlanExecutor(backEnds, null/* TODO: pass indices service */, clusterService), + new DefaultEngineContext(clusterService, operatorTable) + ); } @Override @@ -92,7 +95,7 @@ public Collection createGuiceModules() { private SqlOperatorTable aggregateOperatorTables() { List tables = new ArrayList<>(); - for (AnalyticsBackEndPlugin backEnd : backEnds) { + for (AnalyticsSearchBackendPlugin backEnd : backEnds) { SqlOperatorTable table = backEnd.operatorTable(); if (table != null) { tables.add(table); diff --git a/sandbox/plugins/analytics-engine/src/main/java/org/opensearch/analytics/exec/DefaultPlanExecutor.java b/sandbox/plugins/analytics-engine/src/main/java/org/opensearch/analytics/exec/DefaultPlanExecutor.java index a766466fc7b47..1c3b904faeca4 100644 --- a/sandbox/plugins/analytics-engine/src/main/java/org/opensearch/analytics/exec/DefaultPlanExecutor.java +++ b/sandbox/plugins/analytics-engine/src/main/java/org/opensearch/analytics/exec/DefaultPlanExecutor.java @@ -9,37 +9,107 @@ package org.opensearch.analytics.exec; import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.TableScan; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; -import org.opensearch.analytics.spi.AnalyticsBackEndPlugin; +import org.opensearch.analytics.spi.AnalyticsSearchBackendPlugin; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.index.IndexService; +import org.opensearch.index.engine.DataFormatAwareEngine; +import org.opensearch.index.engine.dataformat.DataFormat; +import org.opensearch.index.engine.exec.SearchExecEngine; +import org.opensearch.index.shard.IndexShard; +import org.opensearch.indices.IndicesService; import java.util.ArrayList; +import java.util.LinkedHashMap; import java.util.List; +import java.util.Map; +import java.util.Set; /** * {@link QueryPlanExecutor} default implementation. + *

+ * Acquires a {@link DataFormatAwareEngine.DataFormatAwareReader} on the latest catalog snapshot, + * then routes plan fragments to the appropriate {@link SearchExecEngine} per data format. + * The composite reader holds the snapshot reference alive for the duration of the search. */ public class DefaultPlanExecutor implements QueryPlanExecutor> { private static final Logger logger = LogManager.getLogger(DefaultPlanExecutor.class); + private final Map backEnds; + private final IndicesService indicesService; + private final ClusterService clusterService; - /** - * Creates a plan executor with the given back-end plugins. - * - * @param backEnds registered back-end engine plugins - */ - public DefaultPlanExecutor(List backEnds) { - // TODO: use back-ends + public DefaultPlanExecutor(List plugins, IndicesService indicesService, ClusterService clusterService) { + this.backEnds = new LinkedHashMap<>(); + for (AnalyticsSearchBackendPlugin plugin : plugins) { + this.backEnds.put(plugin.name(), plugin); + } + this.indicesService = indicesService; + this.clusterService = clusterService; } + @SuppressWarnings("unchecked") @Override public Iterable execute(RelNode logicalFragment, Object context) { - RelNode fragment = logicalFragment; - int fieldCount = fragment.getRowType().getFieldCount(); + // TODO : wire this properly , this is just to give an idea of flow + AnalyticsSearchBackendPlugin plugin = selectBackEnd(); + String tableName = extractTableName(logicalFragment); + DataFormatAwareEngine dataFormatAwareEngine = resolveCompositeEngine(tableName); - logger.debug("[DefaultPlanExecutor] Executing fragment with {} fields: {}", fieldCount, fragment.explain()); + List formats = plugin.getSupportedFormats(); + DataFormat format = formats.get(0); - // Stub: return empty result set. - return new ArrayList<>(); + // Acquire composite reader — incRefs the latest catalog snapshot. + // Closing the reader decRefs the snapshot, allowing file cleanup. + try (DataFormatAwareEngine.DataFormatAwareReader dataFormatAwareReader = dataFormatAwareEngine.acquireReader()) { + Object reader = dataFormatAwareReader.getReader(format); + SearchExecEngine searchEngine = dataFormatAwareEngine.getSearchExecEngine(format); + Object plan = searchEngine.convertFragment(logicalFragment); + var engineContext = searchEngine.createContext(reader, plan, null, null, null); + Object result = searchEngine.execute(engineContext); + + // TODO: consume result stream into rows + logger.info("[DefaultPlanExecutor] Executed via [{}]", plugin.name()); + return new ArrayList<>(); + } catch (Exception e) { + throw new RuntimeException("Execution failed for [" + plugin.name() + "]", e); + } + } + + // TODO: Placeholder logic + static String extractTableName(RelNode node) { + if (node instanceof TableScan) { + List qn = node.getTable().getQualifiedName(); + return qn.get(qn.size() - 1); + } + for (RelNode input : node.getInputs()) { + String name = extractTableName(input); + if (name != null) return name; + } + throw new IllegalArgumentException("No TableScan found in plan fragment"); + } + + // TODO: Placeholder logic + private DataFormatAwareEngine resolveCompositeEngine(String indexName) { + IndexMetadata meta = clusterService.state().metadata().index(indexName); + if (meta == null) throw new IllegalArgumentException("Index [" + indexName + "] not found"); + IndexService indexService = indicesService.indexService(meta.getIndex()); + if (indexService == null) throw new IllegalStateException("Index [" + indexName + "] not on this node"); + Set shardIds = indexService.shardIds(); + if (shardIds.isEmpty()) throw new IllegalStateException("No shards for [" + indexName + "]"); + IndexShard shard = indexService.getShardOrNull(shardIds.iterator().next()); + if (shard == null) throw new IllegalStateException("Shard not found"); + DataFormatAwareEngine ce = shard.getCompositeEngine(); + if (ce == null) throw new IllegalStateException("No CompositeEngine on shard"); + return ce; + } + + // TODO: Placeholder logic + private AnalyticsSearchBackendPlugin selectBackEnd() { + if (backEnds.isEmpty()) throw new IllegalStateException("No back-end plugins registered"); + return backEnds.values().iterator().next(); } } diff --git a/sandbox/plugins/analytics-engine/src/test/java/org/opensearch/analytics/engine/DefaultPlanExecutorTests.java b/sandbox/plugins/analytics-engine/src/test/java/org/opensearch/analytics/engine/DefaultPlanExecutorTests.java index a61246f3dfc41..51a9b39c8dab4 100644 --- a/sandbox/plugins/analytics-engine/src/test/java/org/opensearch/analytics/engine/DefaultPlanExecutorTests.java +++ b/sandbox/plugins/analytics-engine/src/test/java/org/opensearch/analytics/engine/DefaultPlanExecutorTests.java @@ -45,7 +45,7 @@ public void setUp() throws Exception { * Test that execute() does not throw for a valid fragment. */ public void testExecuteDoesNotThrowForValidFragment() { - DefaultPlanExecutor service = new DefaultPlanExecutor(List.of()); + DefaultPlanExecutor service = new DefaultPlanExecutor(List.of(), null, null); RelNode fragment = createRelNodeWithFieldCount(3); Object context = new Object(); @@ -58,7 +58,7 @@ public void testExecuteDoesNotThrowForValidFragment() { * Test that execute() works with a multi-field fragment. */ public void testExecuteWithMultiFieldFragment() { - DefaultPlanExecutor service = new DefaultPlanExecutor(List.of()); + DefaultPlanExecutor service = new DefaultPlanExecutor(List.of(), null, null); int fieldCount = 5; RelNode fragment = createRelNodeWithFieldCount(fieldCount); @@ -72,7 +72,7 @@ public void testExecuteWithMultiFieldFragment() { * Test that execute() works with a single-field fragment. */ public void testExecuteWithSingleFieldFragment() { - DefaultPlanExecutor service = new DefaultPlanExecutor(List.of()); + DefaultPlanExecutor service = new DefaultPlanExecutor(List.of(), null, null); RelNode fragment = createRelNodeWithFieldCount(1); Object context = new Object(); diff --git a/server/src/main/java/org/opensearch/index/IndexModule.java b/server/src/main/java/org/opensearch/index/IndexModule.java index c12c1ed21a2e1..2dc861b54f94a 100644 --- a/server/src/main/java/org/opensearch/index/IndexModule.java +++ b/server/src/main/java/org/opensearch/index/IndexModule.java @@ -75,8 +75,7 @@ import org.opensearch.index.engine.Engine; import org.opensearch.index.engine.EngineConfigFactory; import org.opensearch.index.engine.EngineFactory; -import org.opensearch.index.engine.exec.CompositeEngineFactory; -import org.opensearch.index.IndexSettings; +import org.opensearch.index.engine.exec.DataFormatAwareEngineFactory; import org.opensearch.index.mapper.MapperService; import org.opensearch.index.shard.IndexEventListener; import org.opensearch.index.shard.IndexShard; @@ -745,7 +744,7 @@ public IndexService newIndexService( Function segmentReplicationStatsProvider, Supplier clusterDefaultMaxMergeAtOnceSupplier, ClusterMergeSchedulerConfig clusterMergeSchedulerConfig, - CheckedTriFunction compositeEngineFactorySupplier + CheckedTriFunction compositeEngineFactorySupplier ) throws IOException { final IndexEventListener eventListener = freeze(); Function> readerWrapperFactory = indexReaderWrapper diff --git a/server/src/main/java/org/opensearch/index/IndexService.java b/server/src/main/java/org/opensearch/index/IndexService.java index 940a3968dc0af..174168057b985 100644 --- a/server/src/main/java/org/opensearch/index/IndexService.java +++ b/server/src/main/java/org/opensearch/index/IndexService.java @@ -79,7 +79,7 @@ import org.opensearch.index.engine.EngineConfigFactory; import org.opensearch.index.engine.EngineFactory; import org.opensearch.index.engine.MergedSegmentWarmerFactory; -import org.opensearch.index.engine.exec.CompositeEngineFactory; +import org.opensearch.index.engine.exec.DataFormatAwareEngineFactory; import org.opensearch.index.fielddata.IndexFieldDataCache; import org.opensearch.index.fielddata.IndexFieldDataService; import org.opensearch.index.mapper.MapperService; @@ -211,8 +211,12 @@ public class IndexService extends AbstractIndexComponent implements IndicesClust private volatile TimeValue refreshInterval; private volatile boolean shardLevelRefreshEnabled; private final IndexStorePlugin.StoreFactory storeFactory; - private final CheckedTriFunction - compositeEngineFactorySupplier; + private final CheckedTriFunction< + ShardPath, + MapperService, + IndexSettings, + DataFormatAwareEngineFactory, + IOException> compositeEngineFactorySupplier; @InternalApi public IndexService( @@ -260,7 +264,7 @@ public IndexService( Function segmentReplicationStatsProvider, Supplier clusterDefaultMaxMergeAtOnceSupplier, ClusterMergeSchedulerConfig clusterMergeSchedulerConfig, - CheckedTriFunction compositeEngineFactorySupplier + CheckedTriFunction compositeEngineFactorySupplier ) { super(indexSettings); this.storeFactory = storeFactory; @@ -782,7 +786,7 @@ protected void closeInternal() { directoryFactory ); eventListener.onStoreCreated(shardId); - CompositeEngineFactory compositeEngineFactory = compositeEngineFactorySupplier != null + DataFormatAwareEngineFactory dataFormatAwareEngineFactory = compositeEngineFactorySupplier != null ? compositeEngineFactorySupplier.apply(path, mapperService, this.indexSettings) : null; indexShard = new IndexShard( @@ -824,7 +828,7 @@ protected void closeInternal() { clusterService.getClusterApplierService(), this.indexSettings.isSegRepEnabledOrRemoteNode() ? mergedSegmentPublisher : null, this.indexSettings.isSegRepEnabledOrRemoteNode() ? referencedSegmentsPublisher : null, - compositeEngineFactory + dataFormatAwareEngineFactory ); eventListener.indexShardStateChanged(indexShard, null, indexShard.state(), "shard created"); eventListener.afterIndexShardCreated(indexShard); diff --git a/server/src/main/java/org/opensearch/index/engine/CompositeEngine.java b/server/src/main/java/org/opensearch/index/engine/DataFormatAwareEngine.java similarity index 58% rename from server/src/main/java/org/opensearch/index/engine/CompositeEngine.java rename to server/src/main/java/org/opensearch/index/engine/DataFormatAwareEngine.java index f24f9a723bcaf..9fc7905487e55 100644 --- a/server/src/main/java/org/opensearch/index/engine/CompositeEngine.java +++ b/server/src/main/java/org/opensearch/index/engine/DataFormatAwareEngine.java @@ -12,6 +12,7 @@ import org.opensearch.common.annotation.ExperimentalApi; import org.opensearch.index.engine.dataformat.DataFormat; import org.opensearch.index.engine.exec.CatalogSnapshot; +import org.opensearch.index.engine.exec.DataFormatAwareEngineFactory; import org.opensearch.index.engine.exec.EngineReaderManager; import org.opensearch.index.engine.exec.IndexFilterProvider; import org.opensearch.index.engine.exec.SearchExecEngine; @@ -21,6 +22,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Collection; +import java.util.HashMap; import java.util.List; import java.util.Map; @@ -28,25 +30,26 @@ * Owns all reader managers, lazily creates search engines, index filter providers * and source providers per data format. *

- * Instances are created by {@link org.opensearch.index.engine.exec.CompositeEngineFactory}. + * Instances are created by {@link DataFormatAwareEngineFactory}. * * @opensearch.experimental */ @ExperimentalApi -public class CompositeEngine implements Closeable { +public class DataFormatAwareEngine implements Closeable { private final Map> readerManagers; - private final Map, IOException>> engineSuppliers; + private final Map, IOException>> engineSuppliers; private final Map, IOException>> indexFilterProviderSuppliers; private final Map, IOException>> sourceProviderSuppliers; + private volatile CatalogSnapshot latestSnapshot; /** * Constructs a new CompositeEngine with pre-built maps. - * Prefer using {@link org.opensearch.index.engine.exec.CompositeEngineFactory#create()}. + * Prefer using {@link DataFormatAwareEngineFactory#create()}. */ - public CompositeEngine( + public DataFormatAwareEngine( Map> readerManagers, - Map, IOException>> engineSuppliers, + Map, IOException>> engineSuppliers, Map, IOException>> indexFilterProviderSuppliers, Map, IOException>> sourceProviderSuppliers ) { @@ -56,13 +59,11 @@ public CompositeEngine( this.sourceProviderSuppliers = sourceProviderSuppliers; } - // ---- Public getters ---- - public EngineReaderManager getReaderManager(DataFormat format) { return readerManagers.get(format); } - public SearchExecEngine getSearchExecEngine(DataFormat format) throws IOException { + public SearchExecEngine getSearchExecEngine(DataFormat format) throws IOException { return getFromSupplier(engineSuppliers, format, "search exec engine"); } @@ -74,11 +75,8 @@ public EngineReaderManager getReaderManager(DataFormat format) { return getFromSupplier(sourceProviderSuppliers, format, "source provider"); } - private T getFromSupplier( - Map> suppliers, - DataFormat format, - String label - ) throws IOException { + private T getFromSupplier(Map> suppliers, DataFormat format, String label) + throws IOException { CheckedSupplier supplier = suppliers.get(format); if (supplier == null) { throw new IllegalArgumentException("No " + label + " registered for format: " + format.name()); @@ -86,43 +84,79 @@ private T getFromSupplier( return supplier.get(); } - // ---- Snapshot acquisition ---- + /** + * Called by the catalog snapshot lifecycle listener after a refresh + * to update the latest searchable snapshot. + */ + public void setLatestSnapshot(CatalogSnapshot snapshot) { + CatalogSnapshot prev = this.latestSnapshot; + this.latestSnapshot = snapshot; + if (prev != null) { + prev.decRef(); + } + } /** - * Acquires a snapshot across all reader managers, returning a releasable reference. + * Acquires a DataFormatAwareReader on the latest catalog snapshot. + * The snapshot is incRef'd; the caller MUST close the returned + * {@link DataFormatAwareReader} when done, which decRef's the snapshot. */ - public ReleasableRef acquireSnapshot(CatalogSnapshot catalogSnapshot) throws IOException { - List readers = new ArrayList<>(); - for (EngineReaderManager rm : readerManagers.values()) { - readers.add(rm.getReader(catalogSnapshot)); + public DataFormatAwareReader acquireReader() throws IOException { + CatalogSnapshot snapshot = latestSnapshot; + if (snapshot == null) { + throw new IllegalStateException("No catalog snapshot available"); } - return new ReleasableRef(readers); + return acquireReader(snapshot); } /** - * A releasable reference to a set of readers acquired from reader managers. + * Acquires a composite reader on a specific catalog snapshot. + */ + public DataFormatAwareReader acquireReader(CatalogSnapshot catalogSnapshot) throws IOException { + catalogSnapshot.incRef(); + try { + Map readers = new HashMap<>(); + for (Map.Entry> entry : readerManagers.entrySet()) { + Object reader = entry.getValue().getReader(catalogSnapshot); + if (reader != null) { + readers.put(entry.getKey(), reader); + } + } + return new DataFormatAwareReader(catalogSnapshot, readers); + } catch (Exception e) { + catalogSnapshot.decRef(); + throw e; + } + } + + /** + * A catalog-snapshot-backed data-format aware reader providing per-format reader access. + * Closing this reader releases the catalog snapshot reference. */ @ExperimentalApi - public static class ReleasableRef implements Closeable { - private final List readers; + public static class DataFormatAwareReader implements Closeable { + private final CatalogSnapshot catalogSnapshot; + private final Map readers; - ReleasableRef(List readers) { + DataFormatAwareReader(CatalogSnapshot catalogSnapshot, Map readers) { + this.catalogSnapshot = catalogSnapshot; this.readers = readers; } - public List getReaders() { - return readers; + public Object getReader(DataFormat format) { + return readers.get(format); + } + + public CatalogSnapshot getCatalogSnapshot() { + return catalogSnapshot; } @Override - public void close() throws IOException { - // Reader managers handle their own reference counting; - // this is a placeholder for future release logic. + public void close() { + catalogSnapshot.decRef(); } } - // ---- Closeable ---- - @Override public void close() throws IOException { List exceptions = new ArrayList<>(); @@ -151,10 +185,7 @@ public void close() throws IOException { * Attempts to retrieve each memoized instance and close it if it implements {@link Closeable}. * Suppliers that were never invoked will return quickly from the memoize wrapper. */ - private static void closeSupplierInstances( - Collection> suppliers, - List exceptions - ) { + private static void closeSupplierInstances(Collection> suppliers, List exceptions) { for (CheckedSupplier supplier : suppliers) { try { T instance = supplier.get(); diff --git a/server/src/main/java/org/opensearch/index/engine/IndexFilterTree.java b/server/src/main/java/org/opensearch/index/engine/IndexFilterTree.java index af83a9ceb7233..c918aeaa5c704 100644 --- a/server/src/main/java/org/opensearch/index/engine/IndexFilterTree.java +++ b/server/src/main/java/org/opensearch/index/engine/IndexFilterTree.java @@ -25,8 +25,7 @@ @ExperimentalApi public class IndexFilterTree implements Closeable { - // TODO + // TODO @Override - public void close() throws IOException { - } + public void close() throws IOException {} } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/CatalogSnapshotAwareRefreshListener.java b/server/src/main/java/org/opensearch/index/engine/exec/CatalogSnapshotAwareRefreshListener.java deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/server/src/main/java/org/opensearch/index/engine/exec/CatalogSnapshotDeleteListener.java b/server/src/main/java/org/opensearch/index/engine/exec/CatalogSnapshotDeleteListener.java deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/server/src/main/java/org/opensearch/index/engine/exec/CompositeEngineFactory.java b/server/src/main/java/org/opensearch/index/engine/exec/DataFormatAwareEngineFactory.java similarity index 76% rename from server/src/main/java/org/opensearch/index/engine/exec/CompositeEngineFactory.java rename to server/src/main/java/org/opensearch/index/engine/exec/DataFormatAwareEngineFactory.java index d40d875e3cb2b..b05fc42d65f84 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/CompositeEngineFactory.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/DataFormatAwareEngineFactory.java @@ -12,43 +12,44 @@ import org.opensearch.common.CheckedSupplier; import org.opensearch.common.annotation.ExperimentalApi; import org.opensearch.index.IndexSettings; -import org.opensearch.index.engine.CompositeEngine; +import org.opensearch.index.engine.DataFormatAwareEngine; import org.opensearch.index.engine.dataformat.DataFormat; import org.opensearch.index.mapper.MapperService; import org.opensearch.index.shard.ShardPath; import org.opensearch.plugins.PluginsService; -import org.opensearch.plugins.SearchAnalyticsBackEndPlugin; +import org.opensearch.plugins.SearchBackEndPlugin; import java.io.IOException; import java.util.HashMap; import java.util.Map; /** - * Factory that discovers {@link SearchAnalyticsBackEndPlugin}s via + * Factory that discovers {@link SearchBackEndPlugin}s via * {@link PluginsService} and builds the per-format reader managers and - * memoizing suppliers consumed by {@link CompositeEngine}. + * memoizing suppliers consumed by {@link DataFormatAwareEngine}. *

- * This keeps CompositeEngine decoupled from the plugin layer. + * This keeps DataformatAwareEngine decoupled from the plugin layer. * * @opensearch.experimental */ @ExperimentalApi -public class CompositeEngineFactory { +public class DataFormatAwareEngineFactory { private final Map> readerManagers = new HashMap<>(); - private final Map, IOException>> engineSuppliers = new HashMap<>(); - private final Map, IOException>> indexFilterProviderSuppliers = new HashMap<>(); + private final Map, IOException>> engineSuppliers = new HashMap<>(); + private final Map, IOException>> indexFilterProviderSuppliers = + new HashMap<>(); private final Map, IOException>> sourceProviderSuppliers = new HashMap<>(); private final IndexFileDeleter indexFileDeleter; - public CompositeEngineFactory( + public DataFormatAwareEngineFactory( PluginsService pluginsService, ShardPath shardPath, MapperService mapperService, IndexSettings indexSettings ) throws IOException { - for (SearchAnalyticsBackEndPlugin plugin : pluginsService.filterPlugins(SearchAnalyticsBackEndPlugin.class)) { + for (SearchBackEndPlugin plugin : pluginsService.filterPlugins(SearchBackEndPlugin.class)) { for (DataFormat format : plugin.getSupportedFormats()) { // TODO: use mapperService and indexSettings to filter formats relevant to this index readerManagers.put(format, plugin.createReaderManager(format, shardPath)); @@ -88,11 +89,11 @@ public T get() throws IOException { } /** - * Creates a new {@link CompositeEngine} populated with the discovered + * Creates a new {@link DataFormatAwareEngine} populated with the discovered * reader managers and memoizing suppliers. */ - public CompositeEngine create() { - return new CompositeEngine(readerManagers, engineSuppliers, indexFilterProviderSuppliers, sourceProviderSuppliers); + public DataFormatAwareEngine create() { + return new DataFormatAwareEngine(readerManagers, engineSuppliers, indexFilterProviderSuppliers, sourceProviderSuppliers); } /** @@ -101,9 +102,7 @@ public CompositeEngine create() { * * @param readerManagers the per-format reader managers that receive notifications */ - public CatalogSnapshotLifecycleListener createCatalogSnapshotListener( - Map> readerManagers - ) { - return new CompositeEngineCatalogSnapshotListener(readerManagers, indexFileDeleter); + public CatalogSnapshotLifecycleListener createCatalogSnapshotListener(Map> readerManagers) { + return new DataFormatEngineCatalogSnapshotListener(readerManagers, indexFileDeleter); } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/CompositeEngineCatalogSnapshotListener.java b/server/src/main/java/org/opensearch/index/engine/exec/DataFormatEngineCatalogSnapshotListener.java similarity index 95% rename from server/src/main/java/org/opensearch/index/engine/exec/CompositeEngineCatalogSnapshotListener.java rename to server/src/main/java/org/opensearch/index/engine/exec/DataFormatEngineCatalogSnapshotListener.java index 320068bd4b565..85e247bd29fd1 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/CompositeEngineCatalogSnapshotListener.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/DataFormatEngineCatalogSnapshotListener.java @@ -26,12 +26,12 @@ * @opensearch.experimental */ @ExperimentalApi -public class CompositeEngineCatalogSnapshotListener implements CatalogSnapshotLifecycleListener { +public class DataFormatEngineCatalogSnapshotListener implements CatalogSnapshotLifecycleListener { private final Map> readerManagers; private final IndexFileDeleter indexFileDeleter; - public CompositeEngineCatalogSnapshotListener( + public DataFormatEngineCatalogSnapshotListener( Map> readerManagers, IndexFileDeleter indexFileDeleter ) { diff --git a/server/src/main/java/org/opensearch/index/engine/exec/DataFormatRegistry.java b/server/src/main/java/org/opensearch/index/engine/exec/DataFormatRegistry.java deleted file mode 100644 index e69de29bb2d1d..0000000000000 diff --git a/server/src/main/java/org/opensearch/index/engine/exec/IndexFileDeleter.java b/server/src/main/java/org/opensearch/index/engine/exec/IndexFileDeleter.java index 12121f01dac5f..61507b7ffe9d7 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/IndexFileDeleter.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/IndexFileDeleter.java @@ -9,6 +9,7 @@ package org.opensearch.index.engine.exec; import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.index.engine.DataFormatAwareEngine; import org.opensearch.index.engine.dataformat.DataFormat; import org.opensearch.index.shard.ShardPath; @@ -26,7 +27,7 @@ * added or fully dereferenced after catalog snapshot changes. *

* This class does not notify reader managers itself — it returns the - * computed change sets so the caller ({@link org.opensearch.index.engine.CompositeEngine}) + * computed change sets so the caller ({@link DataFormatAwareEngine}) * can route notifications to the appropriate reader managers. * * @opensearch.experimental diff --git a/server/src/main/java/org/opensearch/index/engine/exec/SearchExecEngine.java b/server/src/main/java/org/opensearch/index/engine/exec/SearchExecEngine.java index 2e9284f209ed4..a78645054b5b7 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/SearchExecEngine.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/SearchExecEngine.java @@ -10,54 +10,44 @@ import org.opensearch.action.search.SearchShardTask; import org.opensearch.common.annotation.ExperimentalApi; -import org.opensearch.core.action.ActionListener; +import org.opensearch.index.engine.DataFormatAwareEngine; import org.opensearch.search.SearchExecutionContext; import org.opensearch.search.SearchShardTarget; import org.opensearch.search.internal.ShardSearchRequest; import java.io.Closeable; import java.io.IOException; -import java.util.Iterator; /** * Shard-level search execution engine interface. * * @param the engine-specific context type * @param the engine-native plan type (e.g. byte[] for substrait) + * @param the result stream type returned by {@link #execute} * @opensearch.experimental */ @ExperimentalApi -public interface SearchExecEngine extends Closeable { - - void execute(C context) throws IOException; - - default void execute(C context, ActionListener listener) { - try { - execute(context); - listener.onResponse(context); - } catch (Exception e) { - listener.onFailure(e); - } - } +public interface SearchExecEngine extends Closeable { /** - * Create a search context. The reader is provided by {@link org.opensearch.index.engine.CompositeEngine} - * which owns all reader managers. + * Converts a logical plan fragment into the engine's native plan format. */ - C createContext( - Object reader, - ShardSearchRequest request, - SearchShardTarget shardTarget, - SearchShardTask task - ) throws IOException; - default T convertFragment(Object fragment) { throw new UnsupportedOperationException("convertFragment not supported by " + getClass().getSimpleName()); } - default Iterator executePlan(T plan, C context) { - throw new UnsupportedOperationException("executePlan not supported by " + getClass().getSimpleName()); - } + /** + * Creates a search context bound to the given reader and plan. + * The reader is provided by {@link DataFormatAwareEngine} + * which owns all reader managers. + */ + C createContext(Object reader, T plan, ShardSearchRequest request, SearchShardTarget shardTarget, SearchShardTask task) + throws IOException; + + /** + * Executes the plan held by the context and returns the result stream. + */ + S execute(C context) throws IOException; @Override default void close() throws IOException {} diff --git a/server/src/main/java/org/opensearch/index/shard/IndexShard.java b/server/src/main/java/org/opensearch/index/shard/IndexShard.java index 9c0e4a567e06f..44d99b06b8bf0 100644 --- a/server/src/main/java/org/opensearch/index/shard/IndexShard.java +++ b/server/src/main/java/org/opensearch/index/shard/IndexShard.java @@ -129,7 +129,7 @@ import org.opensearch.index.cache.request.ShardRequestCache; import org.opensearch.index.codec.CodecService; import org.opensearch.index.engine.CommitStats; -import org.opensearch.index.engine.CompositeEngine; +import org.opensearch.index.engine.DataFormatAwareEngine; import org.opensearch.index.engine.Engine; import org.opensearch.index.engine.Engine.GetResult; import org.opensearch.index.engine.EngineBackedIndexer; @@ -145,7 +145,7 @@ import org.opensearch.index.engine.SafeCommitInfo; import org.opensearch.index.engine.Segment; import org.opensearch.index.engine.SegmentsStats; -import org.opensearch.index.engine.exec.CompositeEngineFactory; +import org.opensearch.index.engine.exec.DataFormatAwareEngineFactory; import org.opensearch.index.engine.exec.Indexer; import org.opensearch.index.fielddata.FieldDataStats; import org.opensearch.index.fielddata.ShardFieldData; @@ -318,7 +318,7 @@ public class IndexShard extends AbstractIndexShardComponent implements IndicesCl private volatile long pendingPrimaryTerm; // see JavaDocs for getPendingPrimaryTerm private final Object engineMutex = new Object(); // lock ordering: engineMutex -> mutex private final AtomicReference currentEngineReference = new AtomicReference<>(); - private final AtomicReference currentCompositeEngineReference = new AtomicReference<>(); + private final AtomicReference currentCompositeEngineReference = new AtomicReference<>(); final EngineFactory engineFactory; final EngineConfigFactory engineConfigFactory; @@ -407,7 +407,7 @@ Runnable getGlobalCheckpointSyncer() { // Used to limit the number of concurrent translog tasks. When the semaphore is exhausted, serial recovery is used. private static final Semaphore translogConcurrentRecoverySemaphore = new Semaphore(1000); - private final CompositeEngineFactory compositeEngineFactory; + private final DataFormatAwareEngineFactory dataFormatAwareEngineFactory; @InternalApi public IndexShard( @@ -449,7 +449,7 @@ public IndexShard( final ClusterApplierService clusterApplierService, @Nullable final MergedSegmentPublisher mergedSegmentPublisher, @Nullable final ReferencedSegmentsPublisher referencedSegmentsPublisher, - @Nullable final CompositeEngineFactory compositeEngineFactory + @Nullable final DataFormatAwareEngineFactory dataFormatAwareEngineFactory ) throws IOException { super(shardRouting.shardId(), indexSettings); assert shardRouting.initializing(); @@ -575,9 +575,9 @@ public boolean shouldCache(Query query) { startRefreshTask(); } } - this.compositeEngineFactory = compositeEngineFactory; - if (compositeEngineFactory != null) { - this.currentCompositeEngineReference.set(compositeEngineFactory.create()); + this.dataFormatAwareEngineFactory = dataFormatAwareEngineFactory; + if (dataFormatAwareEngineFactory != null) { + this.currentCompositeEngineReference.set(dataFormatAwareEngineFactory.create()); } } @@ -2217,15 +2217,15 @@ public Engine.Searcher acquireSearcher(String source) { /** * Returns the current CompositeEngine, or null if no optimized index is active. */ - public CompositeEngine getCompositeEngine() { + public DataFormatAwareEngine getCompositeEngine() { return currentCompositeEngineReference.get(); } /** * Sets the CompositeEngine for this shard (called during shard initialization for optimized indexes). */ - public void setCompositeEngine(CompositeEngine compositeEngine) { - currentCompositeEngineReference.set(compositeEngine); + public void setCompositeEngine(DataFormatAwareEngine dataFormatAwareEngine) { + currentCompositeEngineReference.set(dataFormatAwareEngine); } private void markSearcherAccessed() { diff --git a/server/src/main/java/org/opensearch/indices/IndicesService.java b/server/src/main/java/org/opensearch/indices/IndicesService.java index 3a5797a130511..5bd14d499dc6d 100644 --- a/server/src/main/java/org/opensearch/indices/IndicesService.java +++ b/server/src/main/java/org/opensearch/indices/IndicesService.java @@ -62,8 +62,8 @@ import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.CheckedConsumer; import org.opensearch.common.CheckedFunction; -import org.opensearch.common.CheckedTriFunction; import org.opensearch.common.CheckedSupplier; +import org.opensearch.common.CheckedTriFunction; import org.opensearch.common.Nullable; import org.opensearch.common.annotation.InternalApi; import org.opensearch.common.annotation.PublicApi; @@ -124,7 +124,7 @@ import org.opensearch.index.engine.NRTReplicationEngineFactory; import org.opensearch.index.engine.NoOpEngine; import org.opensearch.index.engine.ReadOnlyEngine; -import org.opensearch.index.engine.exec.CompositeEngineFactory; +import org.opensearch.index.engine.exec.DataFormatAwareEngineFactory; import org.opensearch.index.fielddata.IndexFieldDataCache; import org.opensearch.index.flush.FlushStats; import org.opensearch.index.get.GetStats; @@ -427,8 +427,12 @@ public class IndicesService extends AbstractLifecycleComponent private volatile int defaultMaxMergeAtOnce; private final StatusCounterStats statusCounterStats; private final ClusterMergeSchedulerConfig clusterMergeSchedulerConfig; - private final CheckedTriFunction - compositeEngineFactorySupplier; + private final CheckedTriFunction< + ShardPath, + MapperService, + IndexSettings, + DataFormatAwareEngineFactory, + IOException> compositeEngineFactorySupplier; @Override protected void doStart() { @@ -614,7 +618,7 @@ protected void closeInternal() { MergeSchedulerConfig.CLUSTER_MAX_FORCE_MERGE_MB_PER_SEC_SETTING, this::onClusterLevelForceMergeMBPerSecUpdate ); - this.compositeEngineFactorySupplier = (shardPath, mapperService, indexSettings) -> new CompositeEngineFactory( + this.compositeEngineFactorySupplier = (shardPath, mapperService, indexSettings) -> new DataFormatAwareEngineFactory( pluginsService, shardPath, mapperService, diff --git a/server/src/main/java/org/opensearch/plugins/SearchAnalyticsBackEndPlugin.java b/server/src/main/java/org/opensearch/plugins/SearchBackEndPlugin.java similarity index 91% rename from server/src/main/java/org/opensearch/plugins/SearchAnalyticsBackEndPlugin.java rename to server/src/main/java/org/opensearch/plugins/SearchBackEndPlugin.java index e113272744283..a0b1dfb10e0fe 100644 --- a/server/src/main/java/org/opensearch/plugins/SearchAnalyticsBackEndPlugin.java +++ b/server/src/main/java/org/opensearch/plugins/SearchBackEndPlugin.java @@ -8,9 +8,6 @@ package org.opensearch.plugins; -import java.io.IOException; -import java.util.List; - import org.opensearch.index.engine.dataformat.DataFormat; import org.opensearch.index.engine.exec.EngineReaderManager; import org.opensearch.index.engine.exec.IndexFilterProvider; @@ -18,12 +15,15 @@ import org.opensearch.index.engine.exec.SourceProvider; import org.opensearch.index.shard.ShardPath; +import java.io.IOException; +import java.util.List; + /** * Interface for back-end query engines. * * @opensearch.internal */ -public interface SearchAnalyticsBackEndPlugin { +public interface SearchBackEndPlugin { String name(); @@ -34,7 +34,7 @@ public interface SearchAnalyticsBackEndPlugin { /** * Create a search execution engine. Return null if this plugin is an index provider only. */ - default SearchExecEngine createSearchExecEngine(DataFormat format, ShardPath shardPath) throws IOException { + default SearchExecEngine createSearchExecEngine(DataFormat format, ShardPath shardPath) throws IOException { return null; } diff --git a/server/src/test/java/org/opensearch/index/IndexModuleTests.java b/server/src/test/java/org/opensearch/index/IndexModuleTests.java index d3637aac98ae6..57ba262b790ea 100644 --- a/server/src/test/java/org/opensearch/index/IndexModuleTests.java +++ b/server/src/test/java/org/opensearch/index/IndexModuleTests.java @@ -281,7 +281,8 @@ private IndexService newIndexService(IndexModule module) throws IOException { s -> {}, null, () -> TieredMergePolicyProvider.DEFAULT_MAX_MERGE_AT_ONCE, - mockClusterMergeSchedulerConfig + mockClusterMergeSchedulerConfig, + null ); } diff --git a/server/src/test/java/org/opensearch/index/engine/dataformat/DataFormatPluginTests.java b/server/src/test/java/org/opensearch/index/engine/dataformat/DataFormatPluginTests.java index 117ce798494f2..f076442ececd3 100644 --- a/server/src/test/java/org/opensearch/index/engine/dataformat/DataFormatPluginTests.java +++ b/server/src/test/java/org/opensearch/index/engine/dataformat/DataFormatPluginTests.java @@ -9,15 +9,23 @@ package org.opensearch.index.engine.dataformat; import org.opensearch.Version; +import org.opensearch.action.search.SearchShardTask; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.common.settings.Settings; import org.opensearch.core.index.shard.ShardId; import org.opensearch.index.IndexSettings; +import org.opensearch.index.engine.DataFormatAwareEngine; +import org.opensearch.index.engine.exec.CatalogSnapshot; +import org.opensearch.index.engine.exec.EngineReaderManager; +import org.opensearch.index.engine.exec.SearchExecEngine; import org.opensearch.index.engine.exec.Segment; import org.opensearch.index.engine.exec.WriterFileSet; import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.index.mapper.MapperService; import org.opensearch.index.shard.ShardPath; +import org.opensearch.search.SearchExecutionContext; +import org.opensearch.search.SearchShardTarget; +import org.opensearch.search.internal.ShardSearchRequest; import org.opensearch.test.OpenSearchTestCase; import java.io.IOException; @@ -409,4 +417,414 @@ public > IndexingExecutionEngin return (IndexingExecutionEngine) new MockIndexingExecutionEngine(dataFormat); } } + + /** + * write → refresh → catalog snapshot → DataFormatAwareEngine → acquireReader → search. + */ + public void testWritePathToSearchExecEngine() throws IOException { + MockDataFormat format = new MockDataFormat(); + MockIndexingExecutionEngine indexEngine = new MockIndexingExecutionEngine(format); + + Writer w = indexEngine.createWriter(1L); + MockDocumentInput d1 = indexEngine.newDocumentInput(); + d1.addField(mock(MappedFieldType.class), "Alice"); + d1.setRowId("_row_id", 0); + w.addDoc(d1); + MockDocumentInput d2 = indexEngine.newDocumentInput(); + d2.addField(mock(MappedFieldType.class), "Bob"); + d2.setRowId("_row_id", 1); + w.addDoc(d2); + WriterFileSet fs = w.flush().getWriterFileSet(format).get(); + w.close(); + + RefreshResult refreshResult = indexEngine.refresh(RefreshInput.builder().addWriterFileSet(fs).build()); + MockCatalogSnapshot snapshot = new MockCatalogSnapshot(1L, refreshResult.refreshedSegments(), format); + + MockReaderManager readerManager = new MockReaderManager(format.name()); + readerManager.afterRefresh(true, snapshot); + + DataFormatAwareEngine dataFormatAwareEngine = new DataFormatAwareEngine(Map.of(format, readerManager), Map.of(), Map.of(), Map.of()); + // setLatestSnapshot incRefs snapshot (refcount: 1 initial + 1 engine = 2) + dataFormatAwareEngine.setLatestSnapshot(snapshot); + + // acquireReader incRefs again (refcount: 3) + try (DataFormatAwareEngine.DataFormatAwareReader cr = dataFormatAwareEngine.acquireReader()) { + MockReader reader = (MockReader) cr.getReader(format); + assertNotNull(reader); + assertEquals(2, reader.totalRows); + + MockSearchExecEngine searchEngine = new MockSearchExecEngine(); + String plan = searchEngine.convertFragment("SELECT * FROM hits"); + MockSearchContext ctx = searchEngine.createContext(reader, plan, null, null, null); + List results = searchEngine.execute(ctx); + assertEquals(2, results.size()); + ctx.close(); + } + // cr.close() decRefs. Snapshot still alive — engine owns the construction ref. + assertTrue(snapshot.tryIncRef()); + snapshot.decRef(); // undo probe + } + + /** + * Search holds snapshot alive while refresh replaces it. + *

+ * Timeline: + * 1. new s1 → refcount = 1 (construction) + * 2. setLatestSnapshot(s1) → refcount = 1 (engine takes over construction ref) + * 3. acquireReader() → refcount = 2 (search adds ref) + * 4. setLatestSnapshot(s2) → s1 refcount = 1 (engine releases s1) + * 5. readerManager.onDeleted(s1) → reader closed, but s1 alive (search ref) + * 6. compositeReader.close() → s1 refcount = 0 → dead + */ + public void testSearchHoldsSnapshotAliveWhileRefreshDeletesFiles() throws IOException { + MockDataFormat format = new MockDataFormat(); + MockIndexingExecutionEngine indexEngine = new MockIndexingExecutionEngine(format); + + // Batch 1 + Writer w1 = indexEngine.createWriter(1L); + MockDocumentInput d1 = indexEngine.newDocumentInput(); + d1.addField(mock(MappedFieldType.class), "Alice"); + d1.setRowId("_row_id", 0); + w1.addDoc(d1); + WriterFileSet fs1 = w1.flush().getWriterFileSet(format).get(); + w1.close(); + + RefreshResult rr1 = indexEngine.refresh(RefreshInput.builder().addWriterFileSet(fs1).build()); + MockCatalogSnapshot snapshot1 = new MockCatalogSnapshot(1L, rr1.refreshedSegments(), format); + + MockReaderManager readerManager = new MockReaderManager(format.name()); + readerManager.afterRefresh(true, snapshot1); + + DataFormatAwareEngine dataFormatAwareEngine = new DataFormatAwareEngine(Map.of(format, readerManager), Map.of(), Map.of(), Map.of()); + dataFormatAwareEngine.setLatestSnapshot(snapshot1); // takes over construction ref, refcount: 1 + + // Search acquires reader — refcount: 2 + DataFormatAwareEngine.DataFormatAwareReader dataFormatAwareReader = dataFormatAwareEngine.acquireReader(); + MockReader searchReader = (MockReader) dataFormatAwareReader.getReader(format); + assertEquals(1, searchReader.totalRows); + + // New refresh arrives — setLatestSnapshot(s2) decRefs s1 → refcount: 1 + Writer w2 = indexEngine.createWriter(2L); + MockDocumentInput d2 = indexEngine.newDocumentInput(); + d2.addField(mock(MappedFieldType.class), "Bob"); + d2.setRowId("_row_id", 1); + w2.addDoc(d2); + WriterFileSet fs2 = w2.flush().getWriterFileSet(format).get(); + w2.close(); + + RefreshResult rr2 = indexEngine.refresh(RefreshInput.builder().addWriterFileSet(fs1).addWriterFileSet(fs2).build()); + MockCatalogSnapshot snapshot2 = new MockCatalogSnapshot(2L, rr2.refreshedSegments(), format); + readerManager.afterRefresh(true, snapshot2); + dataFormatAwareEngine.setLatestSnapshot(snapshot2); // s1 refcount: 1 (only search ref) + + // Old snapshot deleted from reader manager — reader closes + readerManager.onDeleted(snapshot1); + assertTrue("Reader for snapshot1 closed in reader manager", searchReader.closed); + + // But snapshot1 still alive — search holds the last ref + assertTrue("Snapshot1 alive while search holds ref", snapshot1.tryIncRef()); + snapshot1.decRef(); // undo probe + + // Search completes — s1 refcount: 0 → dead + dataFormatAwareReader.close(); + assertFalse("Snapshot1 dead after search releases", snapshot1.tryIncRef()); + + // Snapshot 2 still works + try (DataFormatAwareEngine.DataFormatAwareReader cr2 = dataFormatAwareEngine.acquireReader()) { + MockReader r2 = (MockReader) cr2.getReader(format); + assertEquals(2, r2.totalRows); + } + } + + /** + * CompositeReader provides per-format reader access from a single catalog snapshot. + */ + public void testCompositeReaderMultiFormat() throws IOException { + MockDataFormat format1 = new MockDataFormat(); + DataFormat format2 = new DataFormat() { + @Override + public String name() { + return "mock-lucene"; + } + + @Override + public long priority() { + return 50L; + } + + @Override + public Set supportedFields() { + return Set.of(); + } + }; + + MockReaderManager rm1 = new MockReaderManager(format1.name()); + MockReaderManager rm2 = new MockReaderManager(format2.name()); + + Path dir = createTempDir(); + WriterFileSet wfs1 = WriterFileSet.builder().directory(dir).writerGeneration(1L).addFile("data.parquet").addNumRows(10).build(); + WriterFileSet wfs2 = WriterFileSet.builder().directory(dir).writerGeneration(1L).addFile("data.lucene").addNumRows(10).build(); + Segment seg = Segment.builder(0L).addSearchableFiles(format1, wfs1).addSearchableFiles(format2, wfs2).build(); + MockCatalogSnapshot snapshot = new MockCatalogSnapshot(1L, List.of(seg), format1) { + @Override + public Collection getSearchableFiles(String dataFormat) { + if ("mock-lucene".equals(dataFormat)) return List.of(wfs2); + return super.getSearchableFiles(dataFormat); + } + + @Override + public Set getDataFormats() { + return Set.of(format1.name(), format2.name()); + } + }; + + rm1.afterRefresh(true, snapshot); + rm2.afterRefresh(true, snapshot); + + DataFormatAwareEngine dataFormatAwareEngine = new DataFormatAwareEngine(Map.of(format1, rm1, format2, rm2), Map.of(), Map.of(), Map.of()); + dataFormatAwareEngine.setLatestSnapshot(snapshot); + + try (DataFormatAwareEngine.DataFormatAwareReader cr = dataFormatAwareEngine.acquireReader()) { + MockReader r1 = (MockReader) cr.getReader(format1); + MockReader r2 = (MockReader) cr.getReader(format2); + assertNotNull(r1); + assertNotNull(r2); + assertEquals(10, r1.totalRows); + assertEquals(10, r2.totalRows); + assertTrue(r1.fileNames.contains("data.parquet")); + assertTrue(r2.fileNames.contains("data.lucene")); + } + } + + /** + * afterRefresh(false) is a no-op; duplicate afterRefresh for same snapshot reuses reader. + */ + public void testRefreshEdgeCases() throws IOException { + MockDataFormat format = new MockDataFormat(); + MockIndexingExecutionEngine indexEngine = new MockIndexingExecutionEngine(format); + + Writer w = indexEngine.createWriter(1L); + MockDocumentInput d = indexEngine.newDocumentInput(); + d.addField(mock(MappedFieldType.class), "x"); + d.setRowId("_row_id", 0); + w.addDoc(d); + WriterFileSet fs = w.flush().getWriterFileSet(format).get(); + w.close(); + + RefreshResult rr = indexEngine.refresh(RefreshInput.builder().addWriterFileSet(fs).build()); + MockCatalogSnapshot snapshot = new MockCatalogSnapshot(1L, rr.refreshedSegments(), format); + + MockReaderManager rm = new MockReaderManager(format.name()); + + rm.afterRefresh(false, snapshot); + assertNull(rm.getReader(snapshot)); + assertEquals(0, rm.readerCount()); + + rm.afterRefresh(true, snapshot); + assertNotNull(rm.getReader(snapshot)); + assertEquals(1, rm.readerCount()); + + MockReader first = rm.getReader(snapshot); + rm.afterRefresh(true, snapshot); + assertSame(first, rm.getReader(snapshot)); + assertEquals(1, rm.readerCount()); + } + + /** + * File add/delete notifications propagate through reader manager. + */ + public void testFileLifecycleNotifications() throws IOException { + MockReaderManager rm = new MockReaderManager("mock-columnar"); + + rm.onFilesAdded(List.of("a.parquet", "b.parquet")); + assertEquals(2, rm.addedFiles.size()); + assertTrue(rm.addedFiles.contains("a.parquet")); + + rm.onFilesDeleted(List.of("a.parquet")); + assertEquals(1, rm.deletedFiles.size()); + assertTrue(rm.deletedFiles.contains("a.parquet")); + } + + static class MockReader { + final List fileNames; + final long totalRows; + boolean closed; + + MockReader(List fileNames, long totalRows) { + this.fileNames = fileNames; + this.totalRows = totalRows; + } + + void close() { + closed = true; + } + } + + static class MockSearchContext implements SearchExecutionContext { + final String plan; + final long totalRows; + + MockSearchContext(String plan, long totalRows) { + this.plan = plan; + this.totalRows = totalRows; + } + + @Override + public ShardSearchRequest request() { + return null; + } + + @Override + public SearchShardTarget shardTarget() { + return null; + } + + @Override + public void close() {} + } + + static class MockSearchExecEngine implements SearchExecEngine> { + @Override + public String convertFragment(Object fragment) { + return "PLAN:" + fragment; + } + + @Override + public MockSearchContext createContext( + Object reader, + String plan, + ShardSearchRequest request, + SearchShardTarget shardTarget, + SearchShardTask task + ) { + MockReader r = (MockReader) reader; + return new MockSearchContext(plan, r.totalRows); + } + + @Override + public List execute(MockSearchContext context) { + List rows = new ArrayList<>(); + for (int i = 0; i < context.totalRows; i++) { + rows.add(new Object[] { "row_" + i }); + } + return rows; + } + } + + static class MockReaderManager implements EngineReaderManager { + private final String formatName; + private final Map readers = new HashMap<>(); + final List addedFiles = new ArrayList<>(); + final List deletedFiles = new ArrayList<>(); + + MockReaderManager(String formatName) { + this.formatName = formatName; + } + + @Override + public MockReader getReader(CatalogSnapshot snapshot) { + return readers.get(snapshot); + } + + int readerCount() { + return readers.size(); + } + + @Override + public void beforeRefresh() {} + + @Override + public void afterRefresh(boolean didRefresh, CatalogSnapshot snapshot) { + if (didRefresh == false || readers.containsKey(snapshot)) return; + Collection files = snapshot.getSearchableFiles(formatName); + List allFiles = new ArrayList<>(); + long totalRows = 0; + for (WriterFileSet wfs : files) { + allFiles.addAll(wfs.files()); + totalRows += wfs.numRows(); + } + readers.put(snapshot, new MockReader(allFiles, totalRows)); + } + + @Override + public void onDeleted(CatalogSnapshot snapshot) { + MockReader reader = readers.remove(snapshot); + if (reader != null) reader.close(); + } + + @Override + public void onFilesDeleted(Collection files) { + deletedFiles.addAll(files); + } + + @Override + public void onFilesAdded(Collection files) { + addedFiles.addAll(files); + } + } + + static class MockCatalogSnapshot extends CatalogSnapshot { + private final List segments; + private final MockDataFormat format; + + MockCatalogSnapshot(long generation, List segments, MockDataFormat format) { + super("mock-snapshot", generation, 1L); + this.segments = segments; + this.format = format; + } + + @Override + public Map getUserData() { + return Map.of(); + } + + @Override + public long getId() { + return generation; + } + + @Override + public List getSegments() { + return segments; + } + + @Override + public Collection getSearchableFiles(String dataFormat) { + List result = new ArrayList<>(); + for (Segment seg : segments) { + WriterFileSet wfs = seg.dfGroupedSearchableFiles().get(dataFormat); + if (wfs != null) result.add(wfs); + } + return result; + } + + @Override + public Set getDataFormats() { + return Set.of(format.name()); + } + + @Override + public long getLastWriterGeneration() { + return generation; + } + + @Override + public String serializeToString() { + return "mock-snapshot-" + generation; + } + + @Override + public void setCatalogSnapshotMap(Map map) {} + + @Override + public void setUserData(Map userData, boolean b) {} + + @Override + public Object getReader(DataFormat dataFormat) { + return null; + } + + @Override + protected void closeInternal() {} + } } From 9ba9266216d654a414f4f58db425c5dca3affded Mon Sep 17 00:00:00 2001 From: bharath-techie Date: Mon, 23 Mar 2026 23:17:12 +0530 Subject: [PATCH 07/11] separating out bridge/engine and indexing integration interfaces / analytics interfaces Signed-off-by: bharath-techie --- .../analytics/backend/EngineBridge.java | 30 ++++------ .../spi/AnalyticsSearchBackendPlugin.java | 20 +++++-- .../be/datafusion/DataFusionBridge.java | 58 ++++++++++--------- .../be/datafusion/DataFusionPlugin.java | 7 ++- .../be/lucene/LuceneSearchEnginePlugin.java | 8 ++- .../analytics/exec/DefaultPlanExecutor.java | 37 +++++------- .../org/opensearch/index/IndexModule.java | 7 ++- .../org/opensearch/index/IndexService.java | 7 ++- .../dataformat/DataFormatPluginTests.java | 21 ++++++- 9 files changed, 114 insertions(+), 81 deletions(-) diff --git a/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/EngineBridge.java b/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/EngineBridge.java index f0cd602312379..2931e6749dfeb 100644 --- a/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/EngineBridge.java +++ b/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/EngineBridge.java @@ -8,29 +8,21 @@ package org.opensearch.analytics.backend; +import java.io.Closeable; + /** - * JNI boundary interface between the query planner (Java) and a native - * execution engine (e.g., DataFusion/Rust). - * - *

The bridge has two responsibilities: - *

    - *
  1. {@link #convertFragment} — serialise a logical plan fragment into - * the engine's wire format (e.g., Substrait bytes).
  2. - *
  3. {@link #execute} — hand the serialised plan to the native engine - * and obtain an opaque handle to the result stream that lives - * entirely in native memory.
  4. - *
- * - *

Arrow data never crosses the JNI boundary into the JVM heap. - * Consumers read from the native stream via Arrow Flight or - * direct native-memory access using the returned handle. + * Per-query facade that bridges the backend-specific engines with the analytics engine. + *

+ * A bridge is created per query with a reader bound to a catalog snapshot. + * It converts a logical plan to the engine's native format and executes it, + * managing the backend context internally. * - * @param serialised plan type (e.g., {@code byte[]} for Substrait) - * @param result stream handle - * @param > logical plan type (e.g., Calcite {@code RelNode}) + * @param serialised plan type (e.g., {@code byte[]} for Substrait) + * @param result stream type + * @param logical plan type (e.g., Calcite {@code RelNode}) * @opensearch.internal */ -public interface EngineBridge { +public interface EngineBridge extends Closeable { /** * Converts a logical plan fragment into the native engine's serialised diff --git a/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/spi/AnalyticsSearchBackendPlugin.java b/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/spi/AnalyticsSearchBackendPlugin.java index a942c70f0328d..e0693cfa04d5a 100644 --- a/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/spi/AnalyticsSearchBackendPlugin.java +++ b/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/spi/AnalyticsSearchBackendPlugin.java @@ -10,20 +10,32 @@ import org.apache.calcite.sql.SqlOperatorTable; import org.opensearch.analytics.backend.EngineBridge; -import org.opensearch.plugins.SearchBackEndPlugin; +import org.opensearch.index.engine.dataformat.DataFormat; +import org.opensearch.index.engine.exec.SearchExecEngine; + +import java.io.IOException; +import java.util.List; /** * SPI extension point for back-end query engines (DataFusion, Lucene, etc.). * @opensearch.internal */ -public interface AnalyticsSearchBackendPlugin extends SearchBackEndPlugin { +public interface AnalyticsSearchBackendPlugin { + /** Unique engine name (e.g., "lucene", "datafusion"). */ String name(); - /** JNI boundary for executing serialized plans, or null for engines without native execution. */ - EngineBridge bridge(); // TODO this doesn't have context / index shard init + /** + * Creates a per-query bridge bound to the given reader. + * + * @param reader the format-specific reader from the composite reader + * @return a bridge for this query, caller must close when done + */ + EngineBridge bridge(DataFormat format, Object reader, SearchExecEngine engine) throws IOException; /** Supported functions as a Calcite operator table, or null if the back-end adds no functions. */ SqlOperatorTable operatorTable(); + // TODO : remove this ? + List getSupportedFormats(); } diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionBridge.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionBridge.java index a61afaeea8fcb..bd466a8dd2228 100644 --- a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionBridge.java +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionBridge.java @@ -11,38 +11,44 @@ import org.apache.calcite.rel.RelNode; import org.opensearch.analytics.backend.EngineBridge; +import java.io.IOException; + /** - * DataFusion EngineBridge implementation. - * Uses a byte[] representing serialized plan to execute. - * // TODO : we need a stateful engine, not just a bridge, evaluate - * // switch to SearchExecEngine + * Per-query bridge that wraps {@link DatafusionSearchExecEngine}. + * Bound to a reader at construction — hides the {@link DatafusionContext} + * from the analytics engine layer. */ -public class DataFusionBridge implements EngineBridge { - // S=byte[] (Substrait), H=Long (stream pointer), L=RelNode (logical plan) - - /** Creates a new DataFusion bridge. */ - public DataFusionBridge() {} - - /** - * Convert calcite fragment to an executable native fragment. - * Ex - substrait for Datafusion - * - * @param fragment the logical plan subtree to serialise - * @return substrait bytes - */ +public class DataFusionBridge implements EngineBridge { + + private final DatafusionSearchExecEngine engine; + private final DatafusionReader reader; + private DatafusionContext context; + + public DataFusionBridge(DatafusionSearchExecEngine engine, DatafusionReader reader) { + this.engine = engine; + this.reader = reader; + } + @Override public byte[] convertFragment(RelNode fragment) { - return new byte[0]; + return engine.convertFragment(fragment); + } + + @Override + public DatafusionResultStream execute(byte[] plan) { + try { + context = engine.createContext(reader, plan, null, null, null); + return engine.execute(context); + } catch (IOException e) { + throw new RuntimeException("DataFusion execution failed", e); + } } - /** - * Execute query fragment - * - * @param fragment the serialised plan produced by {@link #convertFragment} - * @return RecordBatchStream pointer - */ @Override - public Long execute(byte[] fragment) { - return 0L; + public void close() throws IOException { + if (context != null) { + context.close(); + context = null; + } } } diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionPlugin.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionPlugin.java index 7987b2d16d0c0..54610daa0558c 100644 --- a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionPlugin.java +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionPlugin.java @@ -26,6 +26,7 @@ import org.opensearch.index.engine.exec.SearchExecEngine; import org.opensearch.index.shard.ShardPath; import org.opensearch.plugins.Plugin; +import org.opensearch.plugins.SearchBackEndPlugin; import org.opensearch.repositories.RepositoriesService; import org.opensearch.script.ScriptService; import org.opensearch.threadpool.ThreadPool; @@ -45,7 +46,7 @@ * per-shard {@link DatafusionSearchExecEngine} instances via the * {@link AnalyticsSearchBackendPlugin} SPI. */ -public class DataFusionPlugin extends Plugin implements AnalyticsSearchBackendPlugin { +public class DataFusionPlugin extends Plugin implements AnalyticsSearchBackendPlugin, SearchBackEndPlugin { private static final Logger logger = LogManager.getLogger(DataFusionPlugin.class); @@ -103,8 +104,8 @@ public String name() { } @Override - public EngineBridge bridge() { - return null; // TODO decide between bridge and SearchExecEngine + public EngineBridge bridge(DataFormat format, Object reader, SearchExecEngine engine) throws IOException { + return new DataFusionBridge((DatafusionSearchExecEngine) engine, (DatafusionReader) reader); } @Override diff --git a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchEnginePlugin.java b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchEnginePlugin.java index 9de3cf5d53cfe..210916fad5b6d 100644 --- a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchEnginePlugin.java +++ b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchEnginePlugin.java @@ -15,8 +15,10 @@ import org.opensearch.index.engine.dataformat.DataFormat; import org.opensearch.index.engine.exec.EngineReaderManager; import org.opensearch.index.engine.exec.IndexFilterProvider; +import org.opensearch.index.engine.exec.SearchExecEngine; import org.opensearch.index.engine.exec.SourceProvider; import org.opensearch.index.shard.ShardPath; +import org.opensearch.plugins.SearchBackEndPlugin; import java.io.IOException; import java.util.List; @@ -27,7 +29,7 @@ * @opensearch.experimental */ @ExperimentalApi -public class LuceneSearchEnginePlugin implements AnalyticsSearchBackendPlugin { +public class LuceneSearchEnginePlugin implements AnalyticsSearchBackendPlugin, SearchBackEndPlugin { @Override public String name() { @@ -35,8 +37,8 @@ public String name() { } @Override - public EngineBridge bridge() { - return null; + public EngineBridge bridge(DataFormat dataFormat, Object reader, SearchExecEngine engine) { + return null; // TODO : Lucene backend is index filter / source provider only , need to think about bridge } @Override diff --git a/sandbox/plugins/analytics-engine/src/main/java/org/opensearch/analytics/exec/DefaultPlanExecutor.java b/sandbox/plugins/analytics-engine/src/main/java/org/opensearch/analytics/exec/DefaultPlanExecutor.java index 1c3b904faeca4..49afa8c3886d9 100644 --- a/sandbox/plugins/analytics-engine/src/main/java/org/opensearch/analytics/exec/DefaultPlanExecutor.java +++ b/sandbox/plugins/analytics-engine/src/main/java/org/opensearch/analytics/exec/DefaultPlanExecutor.java @@ -12,13 +12,13 @@ import org.apache.calcite.rel.core.TableScan; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.opensearch.analytics.backend.EngineBridge; import org.opensearch.analytics.spi.AnalyticsSearchBackendPlugin; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.service.ClusterService; import org.opensearch.index.IndexService; import org.opensearch.index.engine.DataFormatAwareEngine; import org.opensearch.index.engine.dataformat.DataFormat; -import org.opensearch.index.engine.exec.SearchExecEngine; import org.opensearch.index.shard.IndexShard; import org.opensearch.indices.IndicesService; @@ -31,9 +31,9 @@ /** * {@link QueryPlanExecutor} default implementation. *

- * Acquires a {@link DataFormatAwareEngine.DataFormatAwareReader} on the latest catalog snapshot, - * then routes plan fragments to the appropriate {@link SearchExecEngine} per data format. - * The composite reader holds the snapshot reference alive for the duration of the search. + * Acquires a composite reader, creates a per-query {@link EngineBridge} + * bound to the reader, and delegates convert + execute to it. + * No backend-specific context is exposed to this class. */ public class DefaultPlanExecutor implements QueryPlanExecutor> { @@ -54,32 +54,29 @@ public DefaultPlanExecutor(List plugins, IndicesSe @SuppressWarnings("unchecked") @Override public Iterable execute(RelNode logicalFragment, Object context) { - // TODO : wire this properly , this is just to give an idea of flow AnalyticsSearchBackendPlugin plugin = selectBackEnd(); String tableName = extractTableName(logicalFragment); - DataFormatAwareEngine dataFormatAwareEngine = resolveCompositeEngine(tableName); + DataFormatAwareEngine engine = resolveCompositeEngine(tableName); List formats = plugin.getSupportedFormats(); DataFormat format = formats.get(0); - // Acquire composite reader — incRefs the latest catalog snapshot. - // Closing the reader decRefs the snapshot, allowing file cleanup. - try (DataFormatAwareEngine.DataFormatAwareReader dataFormatAwareReader = dataFormatAwareEngine.acquireReader()) { - Object reader = dataFormatAwareReader.getReader(format); - SearchExecEngine searchEngine = dataFormatAwareEngine.getSearchExecEngine(format); - Object plan = searchEngine.convertFragment(logicalFragment); - var engineContext = searchEngine.createContext(reader, plan, null, null, null); - Object result = searchEngine.execute(engineContext); - - // TODO: consume result stream into rows - logger.info("[DefaultPlanExecutor] Executed via [{}]", plugin.name()); - return new ArrayList<>(); + try (DataFormatAwareEngine.DataFormatAwareReader reader = engine.acquireReader()) { + EngineBridge bridge = plugin.bridge(format, reader.getReader(format), engine.getSearchExecEngine(format)); + try { + Object plan = bridge.convertFragment(logicalFragment); + Object result = bridge.execute(plan); + // TODO: consume result stream into rows + logger.info("[DefaultPlanExecutor] Executed via [{}]", plugin.name()); + return new ArrayList<>(); + } finally { + bridge.close(); + } } catch (Exception e) { throw new RuntimeException("Execution failed for [" + plugin.name() + "]", e); } } - // TODO: Placeholder logic static String extractTableName(RelNode node) { if (node instanceof TableScan) { List qn = node.getTable().getQualifiedName(); @@ -92,7 +89,6 @@ static String extractTableName(RelNode node) { throw new IllegalArgumentException("No TableScan found in plan fragment"); } - // TODO: Placeholder logic private DataFormatAwareEngine resolveCompositeEngine(String indexName) { IndexMetadata meta = clusterService.state().metadata().index(indexName); if (meta == null) throw new IllegalArgumentException("Index [" + indexName + "] not found"); @@ -107,7 +103,6 @@ private DataFormatAwareEngine resolveCompositeEngine(String indexName) { return ce; } - // TODO: Placeholder logic private AnalyticsSearchBackendPlugin selectBackEnd() { if (backEnds.isEmpty()) throw new IllegalStateException("No back-end plugins registered"); return backEnds.values().iterator().next(); diff --git a/server/src/main/java/org/opensearch/index/IndexModule.java b/server/src/main/java/org/opensearch/index/IndexModule.java index 2dc861b54f94a..6b3371e12c9c6 100644 --- a/server/src/main/java/org/opensearch/index/IndexModule.java +++ b/server/src/main/java/org/opensearch/index/IndexModule.java @@ -744,7 +744,12 @@ public IndexService newIndexService( Function segmentReplicationStatsProvider, Supplier clusterDefaultMaxMergeAtOnceSupplier, ClusterMergeSchedulerConfig clusterMergeSchedulerConfig, - CheckedTriFunction compositeEngineFactorySupplier + CheckedTriFunction< + ShardPath, + MapperService, + IndexSettings, + DataFormatAwareEngineFactory, + IOException> compositeEngineFactorySupplier ) throws IOException { final IndexEventListener eventListener = freeze(); Function> readerWrapperFactory = indexReaderWrapper diff --git a/server/src/main/java/org/opensearch/index/IndexService.java b/server/src/main/java/org/opensearch/index/IndexService.java index 174168057b985..258d330acfef7 100644 --- a/server/src/main/java/org/opensearch/index/IndexService.java +++ b/server/src/main/java/org/opensearch/index/IndexService.java @@ -264,7 +264,12 @@ public IndexService( Function segmentReplicationStatsProvider, Supplier clusterDefaultMaxMergeAtOnceSupplier, ClusterMergeSchedulerConfig clusterMergeSchedulerConfig, - CheckedTriFunction compositeEngineFactorySupplier + CheckedTriFunction< + ShardPath, + MapperService, + IndexSettings, + DataFormatAwareEngineFactory, + IOException> compositeEngineFactorySupplier ) { super(indexSettings); this.storeFactory = storeFactory; diff --git a/server/src/test/java/org/opensearch/index/engine/dataformat/DataFormatPluginTests.java b/server/src/test/java/org/opensearch/index/engine/dataformat/DataFormatPluginTests.java index f076442ececd3..581a877d2a9e9 100644 --- a/server/src/test/java/org/opensearch/index/engine/dataformat/DataFormatPluginTests.java +++ b/server/src/test/java/org/opensearch/index/engine/dataformat/DataFormatPluginTests.java @@ -443,7 +443,12 @@ public void testWritePathToSearchExecEngine() throws IOException { MockReaderManager readerManager = new MockReaderManager(format.name()); readerManager.afterRefresh(true, snapshot); - DataFormatAwareEngine dataFormatAwareEngine = new DataFormatAwareEngine(Map.of(format, readerManager), Map.of(), Map.of(), Map.of()); + DataFormatAwareEngine dataFormatAwareEngine = new DataFormatAwareEngine( + Map.of(format, readerManager), + Map.of(), + Map.of(), + Map.of() + ); // setLatestSnapshot incRefs snapshot (refcount: 1 initial + 1 engine = 2) dataFormatAwareEngine.setLatestSnapshot(snapshot); @@ -495,7 +500,12 @@ public void testSearchHoldsSnapshotAliveWhileRefreshDeletesFiles() throws IOExce MockReaderManager readerManager = new MockReaderManager(format.name()); readerManager.afterRefresh(true, snapshot1); - DataFormatAwareEngine dataFormatAwareEngine = new DataFormatAwareEngine(Map.of(format, readerManager), Map.of(), Map.of(), Map.of()); + DataFormatAwareEngine dataFormatAwareEngine = new DataFormatAwareEngine( + Map.of(format, readerManager), + Map.of(), + Map.of(), + Map.of() + ); dataFormatAwareEngine.setLatestSnapshot(snapshot1); // takes over construction ref, refcount: 1 // Search acquires reader — refcount: 2 @@ -581,7 +591,12 @@ public Set getDataFormats() { rm1.afterRefresh(true, snapshot); rm2.afterRefresh(true, snapshot); - DataFormatAwareEngine dataFormatAwareEngine = new DataFormatAwareEngine(Map.of(format1, rm1, format2, rm2), Map.of(), Map.of(), Map.of()); + DataFormatAwareEngine dataFormatAwareEngine = new DataFormatAwareEngine( + Map.of(format1, rm1, format2, rm2), + Map.of(), + Map.of(), + Map.of() + ); dataFormatAwareEngine.setLatestSnapshot(snapshot); try (DataFormatAwareEngine.DataFormatAwareReader cr = dataFormatAwareEngine.acquireReader()) { From db1c08beb81d39672b36cf84d1f313859976907b Mon Sep 17 00:00:00 2001 From: bharath-techie Date: Tue, 24 Mar 2026 20:59:45 +0530 Subject: [PATCH 08/11] switching search exec engine per query Signed-off-by: bharath-techie --- .../analytics/backend/EngineBridge.java | 49 --- .../analytics/backend/ExecutionContext.java | 50 +++ .../analytics/backend/SearchExecEngine.java | 35 ++ .../analytics/plan/ResolvedPlan.java | 46 +++ .../spi/AnalyticsSearchBackendPlugin.java | 26 +- .../be/datafusion/DataFusionBridge.java | 54 --- .../be/datafusion/DataFusionPlugin.java | 29 +- .../be/datafusion/DatafusionContext.java | 44 +- .../DatafusionSearchExecEngine.java | 45 +- .../be/lucene/LuceneSearchContext.java | 47 +-- .../be/lucene/LuceneSearchEnginePlugin.java | 28 +- .../be/lucene/LuceneSearchExecEngine.java | 61 --- .../be/lucene/LuceneSourceContext.java | 49 --- .../be/lucene/LuceneSourceProvider.java | 46 --- .../opensearch/analytics/AnalyticsPlugin.java | 13 +- .../analytics/exec/AnalyticsQueryService.java | 144 +++++++ .../analytics/exec/DefaultPlanExecutor.java | 50 ++- .../engine/AnalyticsQueryServiceTests.java | 391 ++++++++++++++++++ .../engine/DefaultPlanExecutorTests.java | 107 ----- .../exec/DefaultPlanExecutorTests.java | 105 +++++ .../org/opensearch/index/IndexModule.java | 65 +++ .../index/engine/DataFormatAwareEngine.java | 59 +-- .../exec/DataFormatAwareEngineFactory.java | 10 +- .../index/engine/exec/EngineSearcher.java | 2 +- .../index/engine/exec/SearchExecEngine.java | 54 --- .../plugins/SearchBackEndPlugin.java | 27 -- .../search/SearchExecutionContext.java | 9 +- .../dataformat/DataFormatPluginTests.java | 122 +----- 28 files changed, 941 insertions(+), 826 deletions(-) delete mode 100644 sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/EngineBridge.java create mode 100644 sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/ExecutionContext.java create mode 100644 sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/SearchExecEngine.java create mode 100644 sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/plan/ResolvedPlan.java delete mode 100644 sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionBridge.java delete mode 100644 sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchExecEngine.java delete mode 100644 sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSourceContext.java delete mode 100644 sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSourceProvider.java create mode 100644 sandbox/plugins/analytics-engine/src/main/java/org/opensearch/analytics/exec/AnalyticsQueryService.java create mode 100644 sandbox/plugins/analytics-engine/src/test/java/org/opensearch/analytics/engine/AnalyticsQueryServiceTests.java delete mode 100644 sandbox/plugins/analytics-engine/src/test/java/org/opensearch/analytics/engine/DefaultPlanExecutorTests.java create mode 100644 sandbox/plugins/analytics-engine/src/test/java/org/opensearch/analytics/exec/DefaultPlanExecutorTests.java delete mode 100644 server/src/main/java/org/opensearch/index/engine/exec/SearchExecEngine.java diff --git a/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/EngineBridge.java b/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/EngineBridge.java deleted file mode 100644 index 2931e6749dfeb..0000000000000 --- a/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/EngineBridge.java +++ /dev/null @@ -1,49 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -package org.opensearch.analytics.backend; - -import java.io.Closeable; - -/** - * Per-query facade that bridges the backend-specific engines with the analytics engine. - *

- * A bridge is created per query with a reader bound to a catalog snapshot. - * It converts a logical plan to the engine's native format and executes it, - * managing the backend context internally. - * - * @param serialised plan type (e.g., {@code byte[]} for Substrait) - * @param result stream type - * @param logical plan type (e.g., Calcite {@code RelNode}) - * @opensearch.internal - */ -public interface EngineBridge extends Closeable { - - /** - * Converts a logical plan fragment into the native engine's serialised - * format. - * - * @param fragment the logical plan subtree to serialise - * @return the serialised plan in the engine's wire format - */ - Fragment convertFragment(LogicalPlan fragment); - - /** - * Submits the serialised plan to the native engine for execution and - * returns an opaque handle to the result stream. - * - *

The returned handle is a pointer into native memory (e.g., a - * {@code long} address of a Rust {@code RecordBatchStream}). The - * caller must eventually close the stream through a corresponding - * native call to avoid leaking resources. - * - * @param fragment the serialised plan produced by {@link #convertFragment} - * @return an opaque handle to the native result stream - */ - Stream execute(Fragment fragment); -} diff --git a/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/ExecutionContext.java b/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/ExecutionContext.java new file mode 100644 index 0000000000000..1d0c77473b401 --- /dev/null +++ b/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/ExecutionContext.java @@ -0,0 +1,50 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.analytics.backend; + +import org.opensearch.action.search.SearchShardTask; +import org.opensearch.analytics.plan.ResolvedPlan; +import org.opensearch.index.engine.DataFormatAwareEngine; + +/** + * Execution context carrying plan, reader, and delegation state through + * the query execution lifecycle. + * + * @opensearch.internal + */ +public class ExecutionContext { + + private final ResolvedPlan plan; + private final String tableName; + private final DataFormatAwareEngine.DataFormatAwareReader reader; + SearchShardTask task; + + public ExecutionContext(ResolvedPlan plan, String tableName, SearchShardTask task, DataFormatAwareEngine.DataFormatAwareReader reader) { + this.plan = plan; + this.tableName = tableName; + this.task = task; + this.reader = reader; + } + + public SearchShardTask getTask() { + return task; + } + + public ResolvedPlan plan() { + return plan; + } + + public String getTableName() { + return tableName; + } + + public DataFormatAwareEngine.DataFormatAwareReader getReader() { + return reader; + } +} diff --git a/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/SearchExecEngine.java b/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/SearchExecEngine.java new file mode 100644 index 0000000000000..9c29ee0faf8f4 --- /dev/null +++ b/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/SearchExecEngine.java @@ -0,0 +1,35 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.analytics.backend; + +import org.opensearch.common.annotation.ExperimentalApi; + +import java.io.Closeable; +import java.io.IOException; + +/** + * Shard-level search execution engine interface. + * @opensearch.experimental + */ +@ExperimentalApi +public interface SearchExecEngine extends Closeable { + + /** + * Creates an execution context from a resolved plan. + * + * @param context ExecutionContext + */ + void prepare(ExecutionContext context); + + /** Executes the context and returns a result stream. */ + EngineResultStream execute(ExecutionContext context) throws IOException; + + @Override + default void close() throws IOException {} +} diff --git a/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/plan/ResolvedPlan.java b/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/plan/ResolvedPlan.java new file mode 100644 index 0000000000000..6d644018bedcd --- /dev/null +++ b/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/plan/ResolvedPlan.java @@ -0,0 +1,46 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.analytics.plan; + +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rex.RexNode; + +import java.util.Map; + +/** + * An immutable value type representing a fully resolved query plan, + * consisting of the optimized and backend-tagged {@link RelNode} tree, + * the name of the backend that will execute it, and any delegation + * predicates that secondary backends must evaluate. + */ +public final class ResolvedPlan { + + private final RelNode root; + private final String primaryBackend; + private final Map delegationPredicates; + + public ResolvedPlan(RelNode root, String primaryBackend, Map delegationPredicates) { + this.root = root; + this.primaryBackend = primaryBackend; + this.delegationPredicates = Map.copyOf(delegationPredicates); + } + + public RelNode getRoot() { + return root; + } + + public String getPrimaryBackend() { + return primaryBackend; + } + + /** Predicates delegated to secondary backends (backend name → predicate). Empty if no delegation. */ + public Map getDelegationPredicates() { + return delegationPredicates; + } +} diff --git a/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/spi/AnalyticsSearchBackendPlugin.java b/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/spi/AnalyticsSearchBackendPlugin.java index e0693cfa04d5a..6b7057bc65988 100644 --- a/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/spi/AnalyticsSearchBackendPlugin.java +++ b/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/spi/AnalyticsSearchBackendPlugin.java @@ -8,13 +8,17 @@ package org.opensearch.analytics.spi; -import org.apache.calcite.sql.SqlOperatorTable; -import org.opensearch.analytics.backend.EngineBridge; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.logical.LogicalAggregate; +import org.apache.calcite.rel.logical.LogicalFilter; +import org.apache.calcite.rel.logical.LogicalProject; +import org.apache.calcite.rel.logical.LogicalTableScan; +import org.opensearch.analytics.backend.ExecutionContext; +import org.opensearch.analytics.backend.SearchExecEngine; import org.opensearch.index.engine.dataformat.DataFormat; -import org.opensearch.index.engine.exec.SearchExecEngine; -import java.io.IOException; import java.util.List; +import java.util.Set; /** * SPI extension point for back-end query engines (DataFusion, Lucene, etc.). @@ -25,16 +29,14 @@ public interface AnalyticsSearchBackendPlugin { /** Unique engine name (e.g., "lucene", "datafusion"). */ String name(); - /** - * Creates a per-query bridge bound to the given reader. - * - * @param reader the format-specific reader from the composite reader - * @return a bridge for this query, caller must close when done - */ - EngineBridge bridge(DataFormat format, Object reader, SearchExecEngine engine) throws IOException; + /** Creates a searcher bound to the given reader snapshot. */ + SearchExecEngine searcher(ExecutionContext ctx); /** Supported functions as a Calcite operator table, or null if the back-end adds no functions. */ - SqlOperatorTable operatorTable(); + /** Returns the set of RelNode operator classes this backend supports. */ + default Set> supportedOperators() { + return Set.of(LogicalTableScan.class, LogicalFilter.class, LogicalAggregate.class, LogicalProject.class); + } // TODO : remove this ? List getSupportedFormats(); diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionBridge.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionBridge.java deleted file mode 100644 index bd466a8dd2228..0000000000000 --- a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionBridge.java +++ /dev/null @@ -1,54 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -package org.opensearch.be.datafusion; - -import org.apache.calcite.rel.RelNode; -import org.opensearch.analytics.backend.EngineBridge; - -import java.io.IOException; - -/** - * Per-query bridge that wraps {@link DatafusionSearchExecEngine}. - * Bound to a reader at construction — hides the {@link DatafusionContext} - * from the analytics engine layer. - */ -public class DataFusionBridge implements EngineBridge { - - private final DatafusionSearchExecEngine engine; - private final DatafusionReader reader; - private DatafusionContext context; - - public DataFusionBridge(DatafusionSearchExecEngine engine, DatafusionReader reader) { - this.engine = engine; - this.reader = reader; - } - - @Override - public byte[] convertFragment(RelNode fragment) { - return engine.convertFragment(fragment); - } - - @Override - public DatafusionResultStream execute(byte[] plan) { - try { - context = engine.createContext(reader, plan, null, null, null); - return engine.execute(context); - } catch (IOException e) { - throw new RuntimeException("DataFusion execution failed", e); - } - } - - @Override - public void close() throws IOException { - if (context != null) { - context.close(); - context = null; - } - } -} diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionPlugin.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionPlugin.java index 54610daa0558c..624947f1311c5 100644 --- a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionPlugin.java +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionPlugin.java @@ -8,10 +8,10 @@ package org.opensearch.be.datafusion; -import org.apache.calcite.sql.SqlOperatorTable; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; -import org.opensearch.analytics.backend.EngineBridge; +import org.opensearch.analytics.backend.ExecutionContext; +import org.opensearch.analytics.backend.SearchExecEngine; import org.opensearch.analytics.spi.AnalyticsSearchBackendPlugin; import org.opensearch.cluster.metadata.IndexNameExpressionResolver; import org.opensearch.cluster.service.ClusterService; @@ -23,7 +23,6 @@ import org.opensearch.env.NodeEnvironment; import org.opensearch.index.engine.dataformat.DataFormat; import org.opensearch.index.engine.exec.EngineReaderManager; -import org.opensearch.index.engine.exec.SearchExecEngine; import org.opensearch.index.shard.ShardPath; import org.opensearch.plugins.Plugin; import org.opensearch.plugins.SearchBackEndPlugin; @@ -46,7 +45,7 @@ * per-shard {@link DatafusionSearchExecEngine} instances via the * {@link AnalyticsSearchBackendPlugin} SPI. */ -public class DataFusionPlugin extends Plugin implements AnalyticsSearchBackendPlugin, SearchBackEndPlugin { +public class DataFusionPlugin extends Plugin implements SearchBackEndPlugin, AnalyticsSearchBackendPlugin { private static final Logger logger = LogManager.getLogger(DataFusionPlugin.class); @@ -104,13 +103,13 @@ public String name() { } @Override - public EngineBridge bridge(DataFormat format, Object reader, SearchExecEngine engine) throws IOException { - return new DataFusionBridge((DatafusionSearchExecEngine) engine, (DatafusionReader) reader); - } - - @Override - public SqlOperatorTable operatorTable() { - return null; + public SearchExecEngine searcher(ExecutionContext ctx) { + // TODO: resolve DataFormat properly instead of passing null + DatafusionReader dfReader = (DatafusionReader) ctx.getReader().getReader(null); + DatafusionContext context = new DatafusionContext(ctx.getTask(), dfReader, dataFusionService.getNativeRuntime()); + DatafusionSearchExecEngine datafusionSearchExecEngine = new DatafusionSearchExecEngine(context); + datafusionSearchExecEngine.prepare(ctx); + return datafusionSearchExecEngine; } @Override @@ -118,14 +117,6 @@ public EngineReaderManager createReaderManager(DataFormat format, ShardPath s return new DatafusionReaderManager(format, shardPath); } - @Override - public SearchExecEngine createSearchExecEngine(DataFormat format, ShardPath shardPath) throws IOException { - if (dataFusionService == null) { - throw new IllegalStateException("DataFusionPlugin.createComponents() has not been called yet"); - } - return new DatafusionSearchExecEngine(dataFusionService.getNativeRuntime(), format); - } - /** * Data formats this plugin can handle. Used by CompositeEngine to route queries. */ diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionContext.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionContext.java index d9a85ef04edb0..6ec31b550164b 100644 --- a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionContext.java +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionContext.java @@ -8,12 +8,11 @@ package org.opensearch.be.datafusion; +import org.opensearch.action.search.SearchShardTask; import org.opensearch.be.datafusion.jni.StreamHandle; import org.opensearch.common.annotation.ExperimentalApi; import org.opensearch.index.engine.IndexFilterTree; import org.opensearch.search.SearchExecutionContext; -import org.opensearch.search.SearchShardTarget; -import org.opensearch.search.internal.ShardSearchRequest; import java.io.IOException; @@ -26,38 +25,21 @@ * @opensearch.experimental */ @ExperimentalApi -public class DatafusionContext implements SearchExecutionContext { +public class DatafusionContext implements SearchExecutionContext { - private final ShardSearchRequest request; - private final SearchShardTarget shardTarget; private final DatafusionSearcher engineSearcher; private final NativeRuntimeHandle nativeRuntime; private DatafusionQuery datafusionQuery; private IndexFilterTree filterTree; private StreamHandle streamHandle; + private SearchShardTask task; - public DatafusionContext( - ShardSearchRequest request, - SearchShardTarget shardTarget, - DatafusionReader reader, - NativeRuntimeHandle nativeRuntime - ) throws IOException { - this.request = request; - this.shardTarget = shardTarget; + public DatafusionContext(SearchShardTask task, DatafusionReader reader, NativeRuntimeHandle nativeRuntime) { + this.task = task; this.engineSearcher = new DatafusionSearcher(reader.getReaderHandle()); this.nativeRuntime = nativeRuntime; } - @Override - public ShardSearchRequest request() { - return request; - } - - @Override - public SearchShardTarget shardTarget() { - return shardTarget; - } - @Override public void close() throws IOException { try { @@ -76,12 +58,6 @@ public void close() throws IOException { } } - // DataFusion-specific - - public DatafusionSearcher getEngineSearcher() { - return engineSearcher; - } - /** * Returns the live native runtime pointer for JNI calls. */ @@ -118,4 +94,14 @@ public StreamHandle getStreamHandle() { public void setStreamHandle(StreamHandle streamHandle) { this.streamHandle = streamHandle; } + + @Override + public SearchShardTask task() { + return task; + } + + @Override + public DatafusionSearcher getSearcher() { + return engineSearcher; + } } diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionSearchExecEngine.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionSearchExecEngine.java index ea5deba39de0f..589075dfe5e0a 100644 --- a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionSearchExecEngine.java +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionSearchExecEngine.java @@ -8,56 +8,45 @@ package org.opensearch.be.datafusion; -import org.opensearch.action.search.SearchShardTask; +import org.opensearch.analytics.backend.EngineResultStream; +import org.opensearch.analytics.backend.ExecutionContext; +import org.opensearch.analytics.backend.SearchExecEngine; import org.opensearch.common.annotation.ExperimentalApi; -import org.opensearch.index.engine.dataformat.DataFormat; -import org.opensearch.index.engine.exec.SearchExecEngine; -import org.opensearch.search.SearchShardTarget; -import org.opensearch.search.internal.ShardSearchRequest; import java.io.IOException; /** * DataFusion-backed search execution engine. *

- * Converts logical plan fragments to Substrait, executes them via the native - * DataFusion runtime, and returns results as a {@link DatafusionResultStream}. + * Delegates execution to the native DataFusion runtime via {@link DatafusionSearcher}. * * @opensearch.experimental */ @ExperimentalApi -public class DatafusionSearchExecEngine implements SearchExecEngine { +public class DatafusionSearchExecEngine implements SearchExecEngine { - private final NativeRuntimeHandle nativeRuntime; + private final DatafusionContext datafusionContext; - public DatafusionSearchExecEngine(NativeRuntimeHandle nativeRuntime, DataFormat dataFormat) { - this.nativeRuntime = nativeRuntime; + public DatafusionSearchExecEngine(DatafusionContext datafusionContext) { + this.datafusionContext = datafusionContext; } @Override - public byte[] convertFragment(Object fragment) { + public void prepare(ExecutionContext requestContext) { // TODO: wire Substrait conversion (RelNode → Substrait bytes) - throw new UnsupportedOperationException("Substrait conversion not yet wired"); + byte[] substraitBytes = null; + datafusionContext.setDatafusionQuery(new DatafusionQuery(requestContext.getTableName(), substraitBytes)); } @Override - public DatafusionContext createContext( - Object reader, - byte[] plan, - ShardSearchRequest request, - SearchShardTarget shardTarget, - SearchShardTask task - ) throws IOException { - DatafusionReader dfReader = (DatafusionReader) reader; - DatafusionContext context = new DatafusionContext(request, shardTarget, dfReader, nativeRuntime); - context.setDatafusionQuery(new DatafusionQuery("", plan)); - return context; + public EngineResultStream execute(ExecutionContext requestContext) throws IOException { + DatafusionSearcher searcher = datafusionContext.getSearcher(); + searcher.search(datafusionContext); + return new DatafusionResultStream(datafusionContext.getStreamHandle()); } @Override - public DatafusionResultStream execute(DatafusionContext context) throws IOException { - DatafusionSearcher searcher = context.getEngineSearcher(); - searcher.search(context); - return new DatafusionResultStream(context.getStreamHandle()); + public void close() throws IOException { + datafusionContext.close(); } } diff --git a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchContext.java b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchContext.java index 2851d2759b180..4865beff04065 100644 --- a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchContext.java +++ b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchContext.java @@ -11,10 +11,9 @@ import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; +import org.opensearch.action.search.SearchShardTask; import org.opensearch.common.annotation.ExperimentalApi; import org.opensearch.search.SearchExecutionContext; -import org.opensearch.search.SearchShardTarget; -import org.opensearch.search.internal.ShardSearchRequest; import java.io.IOException; @@ -24,57 +23,37 @@ * @opensearch.experimental */ @ExperimentalApi -public class LuceneSearchContext implements SearchExecutionContext { - - private final ShardSearchRequest request; - private final SearchShardTarget shardTarget; +public class LuceneSearchContext implements SearchExecutionContext { + private final SearchShardTask task; private final DirectoryReader reader; private final LuceneEngineSearcher searcher; private Query query; - public LuceneSearchContext(ShardSearchRequest request, SearchShardTarget shardTarget, DirectoryReader reader) throws IOException { + public LuceneSearchContext(SearchShardTask task, DirectoryReader reader, Query query) throws IOException { this.reader = reader; IndexSearcher indexSearcher = new IndexSearcher(reader); this.searcher = new LuceneEngineSearcher(indexSearcher, reader); - this.request = request; - this.shardTarget = shardTarget; + this.task = task; + this.query = query; } public Query getQuery() { return query; } - public DirectoryReader getReader() { - return reader; - } - - public void setQuery(Query query) { - this.query = query; - } - - /** - * Returns the number of segments for the registered weight. - */ - public int getSegmentCount() { - return -1; - } - - /** - * Returns the max doc array for all segments of the registered weight. - */ - public int[] getSegmentMaxDocs() { - return null; + @Override + public SearchShardTask task() { + return task; } @Override - public ShardSearchRequest request() { - return request; + public LuceneEngineSearcher getSearcher() { + return searcher; } - @Override - public SearchShardTarget shardTarget() { - return shardTarget; + public void setQuery(Query query) { + this.query = query; } @Override diff --git a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchEnginePlugin.java b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchEnginePlugin.java index 210916fad5b6d..efbff088fb733 100644 --- a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchEnginePlugin.java +++ b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchEnginePlugin.java @@ -8,15 +8,9 @@ package org.opensearch.be.lucene; -import org.apache.calcite.sql.SqlOperatorTable; -import org.opensearch.analytics.backend.EngineBridge; -import org.opensearch.analytics.spi.AnalyticsSearchBackendPlugin; import org.opensearch.common.annotation.ExperimentalApi; import org.opensearch.index.engine.dataformat.DataFormat; import org.opensearch.index.engine.exec.EngineReaderManager; -import org.opensearch.index.engine.exec.IndexFilterProvider; -import org.opensearch.index.engine.exec.SearchExecEngine; -import org.opensearch.index.engine.exec.SourceProvider; import org.opensearch.index.shard.ShardPath; import org.opensearch.plugins.SearchBackEndPlugin; @@ -29,38 +23,18 @@ * @opensearch.experimental */ @ExperimentalApi -public class LuceneSearchEnginePlugin implements AnalyticsSearchBackendPlugin, SearchBackEndPlugin { +public class LuceneSearchEnginePlugin implements SearchBackEndPlugin { @Override public String name() { return "lucene-analytics-backend"; } - @Override - public EngineBridge bridge(DataFormat dataFormat, Object reader, SearchExecEngine engine) { - return null; // TODO : Lucene backend is index filter / source provider only , need to think about bridge - } - - @Override - public SqlOperatorTable operatorTable() { - return null; - } - @Override public EngineReaderManager createReaderManager(DataFormat format, ShardPath shardPath) throws IOException { return new LuceneReaderManager(format); } - @Override - public IndexFilterProvider createIndexFilterProvider(DataFormat format, ShardPath shardPath) throws IOException { - return new LuceneIndexFilterProvider(); - } - - @Override - public SourceProvider createSourceProvider(DataFormat format, ShardPath shardPath) throws IOException { - return new LuceneSourceProvider(); - } - @Override public List getSupportedFormats() { return List.of(); diff --git a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchExecEngine.java b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchExecEngine.java deleted file mode 100644 index c899fdbe9263c..0000000000000 --- a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchExecEngine.java +++ /dev/null @@ -1,61 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -package org.opensearch.be.lucene; - -import org.apache.lucene.index.DirectoryReader; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Query; -import org.opensearch.action.search.SearchShardTask; -import org.opensearch.common.annotation.ExperimentalApi; -import org.opensearch.index.engine.exec.SearchExecEngine; -import org.opensearch.search.SearchShardTarget; -import org.opensearch.search.internal.ShardSearchRequest; - -import java.io.IOException; - -/** - * Lucene-backed search execution engine. - * - * @opensearch.experimental - */ -@ExperimentalApi -public class LuceneSearchExecEngine implements SearchExecEngine { - - @Override - public Query convertFragment(Object fragment) { - if (fragment instanceof Query) { - return (Query) fragment; - } - throw new UnsupportedOperationException("Expected Lucene Query, got " + fragment.getClass().getSimpleName()); - } - - @Override - public LuceneSearchContext createContext( - Object reader, - Query plan, - ShardSearchRequest request, - SearchShardTarget shardTarget, - SearchShardTask task - ) throws IOException { - DirectoryReader directoryReader = (DirectoryReader) reader; - return new LuceneSearchContext(request, shardTarget, directoryReader); - } - - @Override - public Void execute(LuceneSearchContext context) throws IOException { - DirectoryReader reader = context.getReader(); - LuceneEngineSearcher searcher = new LuceneEngineSearcher(new IndexSearcher(reader), reader); - try { - searcher.search(context); - } finally { - searcher.close(); - } - return null; // TODO : figure out this path or remove this class for now - } -} diff --git a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSourceContext.java b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSourceContext.java deleted file mode 100644 index bf495f4220fb5..0000000000000 --- a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSourceContext.java +++ /dev/null @@ -1,49 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -package org.opensearch.be.lucene; - -import org.apache.lucene.index.DirectoryReader; -import org.apache.lucene.search.IndexSearcher; -import org.opensearch.common.annotation.ExperimentalApi; -import org.opensearch.index.engine.exec.SourceContext; - -import java.io.IOException; - -/** - * @opensearch.experimental - */ -@ExperimentalApi -public class LuceneSourceContext implements SourceContext { - - private final Object query; - private final DirectoryReader reader; - private final IndexSearcher searcher; - - public LuceneSourceContext(Object query, DirectoryReader reader) { - this.query = query; - this.reader = reader; - this.searcher = new IndexSearcher(reader); - } - - @Override - public Object query() { - return query; - } - - public DirectoryReader getReader() { - return reader; - } - - public IndexSearcher getSearcher() { - return searcher; - } - - @Override - public void close() throws IOException {} -} diff --git a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSourceProvider.java b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSourceProvider.java deleted file mode 100644 index d2de84add4880..0000000000000 --- a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSourceProvider.java +++ /dev/null @@ -1,46 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -package org.opensearch.be.lucene; - -import org.apache.lucene.index.DirectoryReader; -import org.opensearch.common.annotation.ExperimentalApi; -import org.opensearch.index.engine.exec.SourceProvider; - -import java.io.IOException; -import java.util.Collections; -import java.util.Iterator; - -/** - * Lucene-backed {@link SourceProvider}. - *

- * Executes the full query+scan+filter in Lucene and streams back - * projections/aggregation results to the primary engine (DataFusion). - *

- * Used when all queried fields are Lucene-indexed and Lucene can - * fully resolve the query more efficiently than scanning parquet. - * - * @opensearch.experimental - */ -@ExperimentalApi -public class LuceneSourceProvider implements SourceProvider { - - @Override - public LuceneSourceContext createContext(Object query, DirectoryReader reader) throws IOException { - return new LuceneSourceContext(query, reader); - } - - @Override - public Iterator execute(LuceneSourceContext context) throws IOException { - // TODO: execute query via context.getSearcher(), collect results, return iterator - return Collections.emptyIterator(); - } - - @Override - public void close() {} -} diff --git a/sandbox/plugins/analytics-engine/src/main/java/org/opensearch/analytics/AnalyticsPlugin.java b/sandbox/plugins/analytics-engine/src/main/java/org/opensearch/analytics/AnalyticsPlugin.java index 9d4132031aab6..afe06cd1e413c 100644 --- a/sandbox/plugins/analytics-engine/src/main/java/org/opensearch/analytics/AnalyticsPlugin.java +++ b/sandbox/plugins/analytics-engine/src/main/java/org/opensearch/analytics/AnalyticsPlugin.java @@ -94,17 +94,8 @@ public Collection createGuiceModules() { } private SqlOperatorTable aggregateOperatorTables() { - List tables = new ArrayList<>(); - for (AnalyticsSearchBackendPlugin backEnd : backEnds) { - SqlOperatorTable table = backEnd.operatorTable(); - if (table != null) { - tables.add(table); - } - } - if (tables.isEmpty()) { - return SqlOperatorTables.of(); - } - return SqlOperatorTables.chain(tables.toArray(new SqlOperatorTable[0])); + // TODO: re-wire once operatorTable() is added back to AnalyticsSearchBackendPlugin + return SqlOperatorTables.of(); } /** diff --git a/sandbox/plugins/analytics-engine/src/main/java/org/opensearch/analytics/exec/AnalyticsQueryService.java b/sandbox/plugins/analytics-engine/src/main/java/org/opensearch/analytics/exec/AnalyticsQueryService.java new file mode 100644 index 0000000000000..644c0a950973a --- /dev/null +++ b/sandbox/plugins/analytics-engine/src/main/java/org/opensearch/analytics/exec/AnalyticsQueryService.java @@ -0,0 +1,144 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.analytics.exec; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.opensearch.action.search.SearchShardTask; +import org.opensearch.analytics.backend.EngineResultBatch; +import org.opensearch.analytics.backend.EngineResultBatchIterator; +import org.opensearch.analytics.backend.EngineResultStream; +import org.opensearch.analytics.backend.ExecutionContext; +import org.opensearch.analytics.backend.SearchExecEngine; +import org.opensearch.analytics.plan.ResolvedPlan; +import org.opensearch.analytics.spi.AnalyticsSearchBackendPlugin; +import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.common.lifecycle.AbstractLifecycleComponent; +import org.opensearch.common.util.concurrent.ConcurrentCollections; +import org.opensearch.common.util.concurrent.ConcurrentMapLong; +import org.opensearch.index.engine.DataFormatAwareEngine; +import org.opensearch.index.shard.IndexShard; + +import java.util.ArrayList; +import java.util.List; +import java.util.Map; +import java.util.concurrent.atomic.AtomicLong; + +/** + * Data-node service for analytics query execution. Manages the lifecycle of + * query execution contexts and dispatches resolved plans to the appropriate + * backend engines. + * + *

Handles: shard engine resolution, reader snapshot acquisition, delegation + * setup, engine execution, result collection, and context tracking. + */ +@ExperimentalApi +public class AnalyticsQueryService extends AbstractLifecycleComponent { + + private static final Logger logger = LogManager.getLogger(AnalyticsQueryService.class); + + private final AtomicLong nextContextId = new AtomicLong(1); + private final ConcurrentMapLong activeContexts = ConcurrentCollections + .newConcurrentMapLongWithAggressiveConcurrency(); + + private final Map backEnds; + + public AnalyticsQueryService(Map backEnds) { + this.backEnds = backEnds; + } + + /** + * Executes a resolved plan against a local shard. + * + * @param plan the resolved plan with backend assignments and delegation predicates + * @param shard the local index shard + * @return rows as list of Object arrays + */ + public Iterable execute(ResolvedPlan plan, IndexShard shard, SearchShardTask task) { + DataFormatAwareEngine dataFormatAwareEngine = shard.getCompositeEngine(); + if (dataFormatAwareEngine == null) { + throw new IllegalStateException("No CompositeEngine on shard [" + shard.shardId() + "]"); + } + + AnalyticsSearchBackendPlugin plugin = backEnds.get(plan.getPrimaryBackend()); + if (plugin == null) { + throw new IllegalStateException("No plugin registered for backend [" + plan.getPrimaryBackend() + "]"); + } + + String tableName = plan.getRoot().getTable() != null + ? plan.getRoot().getTable().getQualifiedName().get(plan.getRoot().getTable().getQualifiedName().size() - 1) + : "unknown"; + + long ctxId = -1; + + try (DataFormatAwareEngine.DataFormatAwareReader dataFormatAwareReader = dataFormatAwareEngine.acquireReader()) { + + ExecutionContext ctx = new ExecutionContext(plan, tableName, task, dataFormatAwareReader); + ctxId = putContext(ctx); + List rows = new ArrayList<>(); + // Create primary engine and execute + try (SearchExecEngine engine = plugin.searcher(ctx)) { + logger.info("[AnalyticsQueryService] Executing via [{}], ctxId={}", plugin.name(), ctxId); + try (EngineResultStream resultStream = engine.execute(ctx)) { + EngineResultBatchIterator batchIterator = resultStream.iterator(); + while (batchIterator.hasNext()) { + EngineResultBatch batch = batchIterator.next(); + List fieldNames = batch.getFieldNames(); + for (int row = 0; row < batch.getRowCount(); row++) { + Object[] rowValues = new Object[fieldNames.size()]; + for (int col = 0; col < fieldNames.size(); col++) { + rowValues[col] = batch.getFieldValue(fieldNames.get(col), row); + } + rows.add(rowValues); + } + } + + } + } + logger.info("[AnalyticsQueryService] Completed via [{}], {} rows, ctxId={}", plugin.name(), rows.size(), ctxId); + return rows; + } catch (Exception e) { + throw new RuntimeException("Execution failed for [" + plugin.name() + "]", e); + } finally { + removeContext(ctxId); + } + } + + public long putContext(ExecutionContext context) { + long id = nextContextId.getAndIncrement(); + activeContexts.put(id, context); + return id; + } + + public ExecutionContext getContext(long id) { + return activeContexts.get(id); + } + + public ExecutionContext removeContext(long id) { + return activeContexts.remove(id); + } + + public int getActiveContextCount() { + return activeContexts.size(); + } + + @Override + protected void doStart() { + logger.info("[AnalyticsQueryService] Started"); + } + + @Override + protected void doStop() { + logger.info("[AnalyticsQueryService] Stopping, clearing {} active contexts", activeContexts.size()); + activeContexts.clear(); + } + + @Override + protected void doClose() {} +} diff --git a/sandbox/plugins/analytics-engine/src/main/java/org/opensearch/analytics/exec/DefaultPlanExecutor.java b/sandbox/plugins/analytics-engine/src/main/java/org/opensearch/analytics/exec/DefaultPlanExecutor.java index 49afa8c3886d9..e9220b7af8432 100644 --- a/sandbox/plugins/analytics-engine/src/main/java/org/opensearch/analytics/exec/DefaultPlanExecutor.java +++ b/sandbox/plugins/analytics-engine/src/main/java/org/opensearch/analytics/exec/DefaultPlanExecutor.java @@ -12,17 +12,15 @@ import org.apache.calcite.rel.core.TableScan; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; -import org.opensearch.analytics.backend.EngineBridge; +import org.opensearch.action.search.SearchShardTask; +import org.opensearch.analytics.plan.ResolvedPlan; import org.opensearch.analytics.spi.AnalyticsSearchBackendPlugin; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.service.ClusterService; import org.opensearch.index.IndexService; -import org.opensearch.index.engine.DataFormatAwareEngine; -import org.opensearch.index.engine.dataformat.DataFormat; import org.opensearch.index.shard.IndexShard; import org.opensearch.indices.IndicesService; -import java.util.ArrayList; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -31,7 +29,7 @@ /** * {@link QueryPlanExecutor} default implementation. *

- * Acquires a composite reader, creates a per-query {@link EngineBridge} + * Acquires a composite reader, creates a per-query {@link org.opensearch.analytics.backend.SearchExecEngine} * bound to the reader, and delegates convert + execute to it. * No backend-specific context is exposed to this class. */ @@ -41,6 +39,8 @@ public class DefaultPlanExecutor implements QueryPlanExecutor backEnds; private final IndicesService indicesService; private final ClusterService clusterService; + // TODO: - move out as data node side service + private final AnalyticsQueryService queryService; public DefaultPlanExecutor(List plugins, IndicesService indicesService, ClusterService clusterService) { this.backEnds = new LinkedHashMap<>(); @@ -49,32 +49,31 @@ public DefaultPlanExecutor(List plugins, IndicesSe } this.indicesService = indicesService; this.clusterService = clusterService; + this.queryService = new AnalyticsQueryService(backEnds); + // TODO : init planning components } @SuppressWarnings("unchecked") @Override public Iterable execute(RelNode logicalFragment, Object context) { - AnalyticsSearchBackendPlugin plugin = selectBackEnd(); String tableName = extractTableName(logicalFragment); - DataFormatAwareEngine engine = resolveCompositeEngine(tableName); + IndexMetadata indexMetadata = clusterService.state().metadata().index(tableName); + if (indexMetadata == null) { + throw new IllegalArgumentException("Index [" + tableName + "] not found in cluster state"); + } + int shardCount = indexMetadata.getNumberOfShards(); - List formats = plugin.getSupportedFormats(); - DataFormat format = formats.get(0); + ResolvedPlan plan = null; // TODO : queryPlanner.plan(logicalFragment, shardCount); - try (DataFormatAwareEngine.DataFormatAwareReader reader = engine.acquireReader()) { - EngineBridge bridge = plugin.bridge(format, reader.getReader(format), engine.getSearchExecEngine(format)); - try { - Object plan = bridge.convertFragment(logicalFragment); - Object result = bridge.execute(plan); - // TODO: consume result stream into rows - logger.info("[DefaultPlanExecutor] Executed via [{}]", plugin.name()); - return new ArrayList<>(); - } finally { - bridge.close(); - } - } catch (Exception e) { - throw new RuntimeException("Execution failed for [" + plugin.name() + "]", e); + if ("unresolved".equals(plan.getPrimaryBackend())) { + throw new IllegalStateException("Planning did not resolve backend assignment for plan root"); } + + logger.info("[DefaultPlanExecutor] Plan resolved to backend [{}]", plan.getPrimaryBackend()); + + IndexShard shard = resolveShard(tableName); + SearchShardTask task = null; // TODO : init task + return queryService.execute(plan, shard, task); } static String extractTableName(RelNode node) { @@ -89,7 +88,7 @@ static String extractTableName(RelNode node) { throw new IllegalArgumentException("No TableScan found in plan fragment"); } - private DataFormatAwareEngine resolveCompositeEngine(String indexName) { + private IndexShard resolveShard(String indexName) { IndexMetadata meta = clusterService.state().metadata().index(indexName); if (meta == null) throw new IllegalArgumentException("Index [" + indexName + "] not found"); IndexService indexService = indicesService.indexService(meta.getIndex()); @@ -97,10 +96,7 @@ private DataFormatAwareEngine resolveCompositeEngine(String indexName) { Set shardIds = indexService.shardIds(); if (shardIds.isEmpty()) throw new IllegalStateException("No shards for [" + indexName + "]"); IndexShard shard = indexService.getShardOrNull(shardIds.iterator().next()); - if (shard == null) throw new IllegalStateException("Shard not found"); - DataFormatAwareEngine ce = shard.getCompositeEngine(); - if (ce == null) throw new IllegalStateException("No CompositeEngine on shard"); - return ce; + return shard; } private AnalyticsSearchBackendPlugin selectBackEnd() { diff --git a/sandbox/plugins/analytics-engine/src/test/java/org/opensearch/analytics/engine/AnalyticsQueryServiceTests.java b/sandbox/plugins/analytics-engine/src/test/java/org/opensearch/analytics/engine/AnalyticsQueryServiceTests.java new file mode 100644 index 0000000000000..518bdfdc89a0f --- /dev/null +++ b/sandbox/plugins/analytics-engine/src/test/java/org/opensearch/analytics/engine/AnalyticsQueryServiceTests.java @@ -0,0 +1,391 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.analytics.engine; + +import org.apache.calcite.rel.RelNode; +import org.opensearch.action.search.SearchShardTask; +import org.opensearch.analytics.backend.EngineResultBatch; +import org.opensearch.analytics.backend.EngineResultBatchIterator; +import org.opensearch.analytics.backend.EngineResultStream; +import org.opensearch.analytics.backend.ExecutionContext; +import org.opensearch.analytics.backend.SearchExecEngine; +import org.opensearch.analytics.exec.AnalyticsQueryService; +import org.opensearch.analytics.plan.ResolvedPlan; +import org.opensearch.analytics.spi.AnalyticsSearchBackendPlugin; +import org.opensearch.index.engine.DataFormatAwareEngine; +import org.opensearch.index.engine.dataformat.DataFormat; +import org.opensearch.index.engine.dataformat.FieldTypeCapabilities; +import org.opensearch.index.engine.exec.CatalogSnapshot; +import org.opensearch.index.engine.exec.EngineReaderManager; +import org.opensearch.index.engine.exec.Segment; +import org.opensearch.index.engine.exec.WriterFileSet; +import org.opensearch.index.shard.IndexShard; +import org.opensearch.test.OpenSearchTestCase; + +import java.io.IOException; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; + +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +/** + * End-to-end tests for {@link AnalyticsQueryService} with mock + * {@link SearchExecEngine} and {@link AnalyticsSearchBackendPlugin}. + * Validates the write → refresh → catalog snapshot → acquire reader → execute path. + */ +public class AnalyticsQueryServiceTests extends OpenSearchTestCase { + + /** + * Full lifecycle: build catalog snapshot from writer file sets, wire up + * DataFormatAwareEngine, and execute via AnalyticsQueryService with a + * mock backend that returns rows from the reader. + */ + public void testEndToEndExecuteViaAnalyticsQueryService() throws IOException { + MockDataFormat format = new MockDataFormat(); + Path dir = createTempDir(); + + // Simulate two writer generations producing file sets + WriterFileSet fs1 = WriterFileSet.builder().directory(dir).writerGeneration(1L).addFile("data_gen1.parquet").addNumRows(2).build(); + WriterFileSet fs2 = WriterFileSet.builder().directory(dir).writerGeneration(2L).addFile("data_gen2.parquet").addNumRows(1).build(); + + // Build segments and catalog snapshot + Segment seg1 = Segment.builder(0L).addSearchableFiles(format, fs1).build(); + Segment seg2 = Segment.builder(1L).addSearchableFiles(format, fs2).build(); + MockCatalogSnapshot snapshot = new MockCatalogSnapshot(1L, List.of(seg1, seg2), format); + + // Wire reader manager and DataFormatAwareEngine + MockReaderManager readerManager = new MockReaderManager(format.name()); + readerManager.afterRefresh(true, snapshot); + + DataFormatAwareEngine engine = new DataFormatAwareEngine(Map.of(format, readerManager)); + engine.setLatestSnapshot(snapshot); + + // Mock IndexShard to return our engine + IndexShard shard = mock(IndexShard.class); + when(shard.getCompositeEngine()).thenReturn(engine); + + // Create mock backend plugin that returns rows based on reader content + MockBackendPlugin backendPlugin = new MockBackendPlugin(format); + AnalyticsQueryService service = new AnalyticsQueryService(Map.of("mock-backend", backendPlugin)); + + // Build a resolved plan targeting our mock backend + RelNode mockRoot = mock(RelNode.class); + when(mockRoot.getTable()).thenReturn(null); + ResolvedPlan plan = new ResolvedPlan(mockRoot, "mock-backend", Map.of()); + + Iterable results = service.execute(plan, shard, mock(SearchShardTask.class)); + List rows = new ArrayList<>(); + results.forEach(rows::add); + + // Mock engine returns 3 total rows (2 from gen1 + 1 from gen2) + assertEquals(3, rows.size()); + assertEquals(0, service.getActiveContextCount()); + } + + /** + * Verifies context tracking: contexts are registered during execution + * and cleaned up after completion. + */ + public void testContextTrackingLifecycle() { + AnalyticsQueryService service = new AnalyticsQueryService(Map.of()); + ExecutionContext ctx = new ExecutionContext(null, "test-table", null, null); + + long id = service.putContext(ctx); + assertEquals(1, service.getActiveContextCount()); + assertSame(ctx, service.getContext(id)); + + ExecutionContext removed = service.removeContext(id); + assertSame(ctx, removed); + assertEquals(0, service.getActiveContextCount()); + assertNull(service.getContext(id)); + } + + /** + * Verifies that execute throws when no backend plugin is registered + * for the plan's primary backend. + */ + public void testExecuteThrowsForUnknownBackend() { + MockDataFormat format = new MockDataFormat(); + Path dir = createTempDir(); + WriterFileSet fs = WriterFileSet.builder().directory(dir).writerGeneration(1L).addFile("f.parquet").addNumRows(1).build(); + Segment seg = Segment.builder(0L).addSearchableFiles(format, fs).build(); + MockCatalogSnapshot snapshot = new MockCatalogSnapshot(1L, List.of(seg), format); + + MockReaderManager rm = new MockReaderManager(format.name()); + rm.afterRefresh(true, snapshot); + DataFormatAwareEngine engine = new DataFormatAwareEngine(Map.of(format, rm)); + engine.setLatestSnapshot(snapshot); + + IndexShard shard = mock(IndexShard.class); + when(shard.getCompositeEngine()).thenReturn(engine); + + AnalyticsQueryService service = new AnalyticsQueryService(Map.of()); + ResolvedPlan plan = new ResolvedPlan(null, "nonexistent", Map.of()); + + IllegalStateException ex = expectThrows(IllegalStateException.class, () -> service.execute(plan, shard, null)); + assertTrue(ex.getMessage().contains("No plugin registered for backend")); + } + + /** + * Verifies that execute throws when shard has no composite engine. + */ + public void testExecuteThrowsWhenNoCompositeEngine() { + IndexShard shard = mock(IndexShard.class); + when(shard.getCompositeEngine()).thenReturn(null); + when(shard.shardId()).thenReturn(new org.opensearch.core.index.shard.ShardId("idx", "uuid", 0)); + + AnalyticsQueryService service = new AnalyticsQueryService(Map.of("be", mock(AnalyticsSearchBackendPlugin.class))); + ResolvedPlan plan = new ResolvedPlan(null, "be", Map.of()); + + IllegalStateException ex = expectThrows(IllegalStateException.class, () -> service.execute(plan, shard, null)); + assertTrue(ex.getMessage().contains("No CompositeEngine on shard")); + } + + // --- Mock implementations --- + + static class MockDataFormat implements DataFormat { + @Override + public String name() { + return "mock-columnar"; + } + + @Override + public long priority() { + return 100L; + } + + @Override + public Set supportedFields() { + return Set.of( + new FieldTypeCapabilities( + "integer", + Set.of(FieldTypeCapabilities.Capability.COLUMNAR_STORAGE, FieldTypeCapabilities.Capability.STORED_FIELDS) + ) + ); + } + } + + static class MockReaderManager implements EngineReaderManager { + private final String formatName; + private final Map readers = new HashMap<>(); + + MockReaderManager(String formatName) { + this.formatName = formatName; + } + + @Override + public Object getReader(CatalogSnapshot snapshot) { + return readers.get(snapshot); + } + + @Override + public void beforeRefresh() {} + + @Override + public void afterRefresh(boolean didRefresh, CatalogSnapshot snapshot) { + if (didRefresh == false || readers.containsKey(snapshot)) return; + Collection files = snapshot.getSearchableFiles(formatName); + long totalRows = 0; + for (WriterFileSet wfs : files) { + totalRows += wfs.numRows(); + } + readers.put(snapshot, totalRows); + } + + @Override + public void onDeleted(CatalogSnapshot snapshot) { + readers.remove(snapshot); + } + + @Override + public void onFilesDeleted(Collection files) {} + + @Override + public void onFilesAdded(Collection files) {} + } + + static class MockCatalogSnapshot extends CatalogSnapshot { + private final List segments; + private final MockDataFormat format; + + MockCatalogSnapshot(long generation, List segments, MockDataFormat format) { + super("mock-snapshot", generation, 1L); + this.segments = segments; + this.format = format; + } + + @Override + public Map getUserData() { + return Map.of(); + } + + @Override + public long getId() { + return generation; + } + + @Override + public List getSegments() { + return segments; + } + + @Override + public Collection getSearchableFiles(String dataFormat) { + List result = new ArrayList<>(); + for (Segment seg : segments) { + WriterFileSet wfs = seg.dfGroupedSearchableFiles().get(dataFormat); + if (wfs != null) result.add(wfs); + } + return result; + } + + @Override + public Set getDataFormats() { + return Set.of(format.name()); + } + + @Override + public long getLastWriterGeneration() { + return generation; + } + + @Override + public String serializeToString() { + return "mock-snapshot-" + generation; + } + + @Override + public void setCatalogSnapshotMap(Map map) {} + + @Override + public void setUserData(Map userData, boolean b) {} + + @Override + public Object getReader(DataFormat dataFormat) { + return null; + } + + @Override + protected void closeInternal() {} + } + + /** + * Mock SearchExecEngine that produces rows based on a fixed count + * provided at construction time. + */ + static class MockSearchExecEngine implements SearchExecEngine { + private final long totalRows; + + MockSearchExecEngine(long totalRows) { + this.totalRows = totalRows; + } + + @Override + public void prepare(ExecutionContext context) {} + + @Override + public EngineResultStream execute(ExecutionContext context) { + return new MockResultStream(totalRows); + } + + @Override + public void close() {} + } + + static class MockResultStream implements EngineResultStream { + private final long rowCount; + + MockResultStream(long rowCount) { + this.rowCount = rowCount; + } + + @Override + public EngineResultBatchIterator iterator() { + return new MockBatchIterator(rowCount); + } + + @Override + public void close() {} + } + + static class MockBatchIterator implements EngineResultBatchIterator { + private final long rowCount; + private boolean consumed; + + MockBatchIterator(long rowCount) { + this.rowCount = rowCount; + } + + @Override + public boolean hasNext() { + return consumed == false; + } + + @Override + public EngineResultBatch next() { + consumed = true; + return new MockResultBatch((int) rowCount); + } + } + + static class MockResultBatch implements EngineResultBatch { + private final int rowCount; + + MockResultBatch(int rowCount) { + this.rowCount = rowCount; + } + + @Override + public List getFieldNames() { + return List.of("value"); + } + + @Override + public int getRowCount() { + return rowCount; + } + + @Override + public Object getFieldValue(String fieldName, int rowIndex) { + return "row_" + rowIndex; + } + } + + static class MockBackendPlugin implements AnalyticsSearchBackendPlugin { + private final DataFormat format; + + MockBackendPlugin(DataFormat format) { + this.format = format; + } + + @Override + public String name() { + return "mock-backend"; + } + + @Override + public SearchExecEngine searcher(ExecutionContext ctx) { + // Reader manager stores totalRows (Long) as the reader object + Object reader = ctx.getReader().getReader(format); + long rows = reader instanceof Long ? (Long) reader : 0L; + return new MockSearchExecEngine(rows); + } + + @Override + public List getSupportedFormats() { + return List.of(format); + } + } +} diff --git a/sandbox/plugins/analytics-engine/src/test/java/org/opensearch/analytics/engine/DefaultPlanExecutorTests.java b/sandbox/plugins/analytics-engine/src/test/java/org/opensearch/analytics/engine/DefaultPlanExecutorTests.java deleted file mode 100644 index 51a9b39c8dab4..0000000000000 --- a/sandbox/plugins/analytics-engine/src/test/java/org/opensearch/analytics/engine/DefaultPlanExecutorTests.java +++ /dev/null @@ -1,107 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -package org.opensearch.analytics.engine; - -import org.apache.calcite.jdbc.JavaTypeFactoryImpl; -import org.apache.calcite.plan.RelOptCluster; -import org.apache.calcite.plan.RelTraitSet; -import org.apache.calcite.plan.hep.HepPlanner; -import org.apache.calcite.plan.hep.HepProgramBuilder; -import org.apache.calcite.rel.AbstractRelNode; -import org.apache.calcite.rel.RelNode; -import org.apache.calcite.rel.type.RelDataType; -import org.apache.calcite.rel.type.RelDataTypeFactory; -import org.apache.calcite.rex.RexBuilder; -import org.apache.calcite.sql.type.SqlTypeName; -import org.opensearch.analytics.exec.DefaultPlanExecutor; -import org.opensearch.test.OpenSearchTestCase; - -import java.util.List; - -/** - * Tests for {@link DefaultPlanExecutor}. - */ -public class DefaultPlanExecutorTests extends OpenSearchTestCase { - - private RelDataTypeFactory typeFactory; - private RelOptCluster cluster; - - @Override - public void setUp() throws Exception { - super.setUp(); - typeFactory = new JavaTypeFactoryImpl(); - RexBuilder rexBuilder = new RexBuilder(typeFactory); - HepPlanner planner = new HepPlanner(new HepProgramBuilder().build()); - cluster = RelOptCluster.create(planner, rexBuilder); - } - - /** - * Test that execute() does not throw for a valid fragment. - */ - public void testExecuteDoesNotThrowForValidFragment() { - DefaultPlanExecutor service = new DefaultPlanExecutor(List.of(), null, null); - - RelNode fragment = createRelNodeWithFieldCount(3); - Object context = new Object(); - - Object result = service.execute(fragment, context); - assertNotNull("execute() stub should return non-null", result); - } - - /** - * Test that execute() works with a multi-field fragment. - */ - public void testExecuteWithMultiFieldFragment() { - DefaultPlanExecutor service = new DefaultPlanExecutor(List.of(), null, null); - - int fieldCount = 5; - RelNode fragment = createRelNodeWithFieldCount(fieldCount); - Object context = new Object(); - - Object result = service.execute(fragment, context); - assertNotNull("execute() stub should return non-null", result); - } - - /** - * Test that execute() works with a single-field fragment. - */ - public void testExecuteWithSingleFieldFragment() { - DefaultPlanExecutor service = new DefaultPlanExecutor(List.of(), null, null); - - RelNode fragment = createRelNodeWithFieldCount(1); - Object context = new Object(); - - Object result = service.execute(fragment, context); - assertNotNull("execute() stub should return non-null", result); - } - - private RelNode createRelNodeWithFieldCount(int fieldCount) { - RelDataType rowType = buildRowType(fieldCount); - return new StubRelNode(cluster, cluster.traitSet(), rowType); - } - - private RelDataType buildRowType(int fieldCount) { - RelDataTypeFactory.Builder builder = typeFactory.builder(); - for (int i = 0; i < fieldCount; i++) { - builder.add("field_" + i, SqlTypeName.VARCHAR); - } - return builder.build(); - } - - /** - * Minimal concrete RelNode for testing. Extends AbstractRelNode - * which provides default implementations for all RelNode methods. - */ - private static class StubRelNode extends AbstractRelNode { - StubRelNode(RelOptCluster cluster, RelTraitSet traitSet, RelDataType rowType) { - super(cluster, traitSet); - this.rowType = rowType; - } - } -} diff --git a/sandbox/plugins/analytics-engine/src/test/java/org/opensearch/analytics/exec/DefaultPlanExecutorTests.java b/sandbox/plugins/analytics-engine/src/test/java/org/opensearch/analytics/exec/DefaultPlanExecutorTests.java new file mode 100644 index 0000000000000..3fe866a55dac0 --- /dev/null +++ b/sandbox/plugins/analytics-engine/src/test/java/org/opensearch/analytics/exec/DefaultPlanExecutorTests.java @@ -0,0 +1,105 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +package org.opensearch.analytics.exec; + +import org.apache.calcite.jdbc.JavaTypeFactoryImpl; +import org.apache.calcite.plan.RelOptCluster; +import org.apache.calcite.plan.RelOptTable; +import org.apache.calcite.plan.RelTraitSet; +import org.apache.calcite.plan.hep.HepPlanner; +import org.apache.calcite.plan.hep.HepProgramBuilder; +import org.apache.calcite.rel.AbstractRelNode; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.core.TableScan; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.sql.type.SqlTypeName; +import org.opensearch.test.OpenSearchTestCase; + +import java.util.List; + +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.when; + +/** + * Tests for {@link DefaultPlanExecutor}. + */ +public class DefaultPlanExecutorTests extends OpenSearchTestCase { + + private RelDataTypeFactory typeFactory; + private RelOptCluster cluster; + + @Override + public void setUp() throws Exception { + super.setUp(); + typeFactory = new JavaTypeFactoryImpl(); + RexBuilder rexBuilder = new RexBuilder(typeFactory); + HepPlanner planner = new HepPlanner(new HepProgramBuilder().build()); + cluster = RelOptCluster.create(planner, rexBuilder); + } + + /** + * extractTableName returns the table name from a TableScan node. + */ + public void testExtractTableNameFromTableScan() { + RelDataType rowType = buildRowType(3); + RelOptTable table = mock(RelOptTable.class); + when(table.getQualifiedName()).thenReturn(List.of("schema", "my_index")); + when(table.getRowType()).thenReturn(rowType); + + TableScan scan = new StubTableScan(cluster, cluster.traitSet(), table); + assertEquals("my_index", DefaultPlanExecutor.extractTableName(scan)); + } + + /** + * extractTableName throws when no TableScan is found. + */ + public void testExtractTableNameThrowsForNonTableScan() { + RelNode stub = new StubRelNode(cluster, cluster.traitSet(), buildRowType(1)); + IllegalArgumentException ex = expectThrows(IllegalArgumentException.class, () -> DefaultPlanExecutor.extractTableName(stub)); + assertTrue(ex.getMessage().contains("No TableScan found")); + } + + /** + * execute() throws NPE when clusterService is null (current TODO state). + */ + public void testExecuteThrowsWhenClusterServiceNull() { + DefaultPlanExecutor executor = new DefaultPlanExecutor(List.of(), null, null); + + RelDataType rowType = buildRowType(1); + RelOptTable table = mock(RelOptTable.class); + when(table.getQualifiedName()).thenReturn(List.of("my_index")); + when(table.getRowType()).thenReturn(rowType); + TableScan scan = new StubTableScan(cluster, cluster.traitSet(), table); + + expectThrows(NullPointerException.class, () -> executor.execute(scan, new Object())); + } + + private RelDataType buildRowType(int fieldCount) { + RelDataTypeFactory.Builder builder = typeFactory.builder(); + for (int i = 0; i < fieldCount; i++) { + builder.add("field_" + i, SqlTypeName.VARCHAR); + } + return builder.build(); + } + + private static class StubRelNode extends AbstractRelNode { + StubRelNode(RelOptCluster cluster, RelTraitSet traitSet, RelDataType rowType) { + super(cluster, traitSet); + this.rowType = rowType; + } + } + + private static class StubTableScan extends TableScan { + StubTableScan(RelOptCluster cluster, RelTraitSet traitSet, RelOptTable table) { + super(cluster, traitSet, List.of(), table); + } + } +} diff --git a/server/src/main/java/org/opensearch/index/IndexModule.java b/server/src/main/java/org/opensearch/index/IndexModule.java index 6b3371e12c9c6..ce5c697ea05bb 100644 --- a/server/src/main/java/org/opensearch/index/IndexModule.java +++ b/server/src/main/java/org/opensearch/index/IndexModule.java @@ -716,6 +716,71 @@ public IndexService newIndexService( ); } + /** + * @deprecated Use the overload that accepts a {@code compositeEngineFactorySupplier} parameter. + */ + @Deprecated + public IndexService newIndexService( + IndexService.IndexCreationContext indexCreationContext, + NodeEnvironment environment, + NamedXContentRegistry xContentRegistry, + IndexService.ShardStoreDeleter shardStoreDeleter, + CircuitBreakerService circuitBreakerService, + BigArrays bigArrays, + ThreadPool threadPool, + ScriptService scriptService, + ClusterService clusterService, + Client client, + IndicesQueryCache indicesQueryCache, + MapperRegistry mapperRegistry, + IndicesFieldDataCache indicesFieldDataCache, + NamedWriteableRegistry namedWriteableRegistry, + BooleanSupplier idFieldDataEnabled, + ValuesSourceRegistry valuesSourceRegistry, + IndexStorePlugin.DirectoryFactory remoteDirectoryFactory, + BiFunction translogFactorySupplier, + Supplier clusterDefaultRefreshIntervalSupplier, + Supplier fixedRefreshIntervalSchedulingEnabled, + Supplier shardLevelRefreshEnabled, + RecoverySettings recoverySettings, + RemoteStoreSettings remoteStoreSettings, + Consumer replicator, + Function segmentReplicationStatsProvider, + Supplier clusterDefaultMaxMergeAtOnceSupplier, + ClusterMergeSchedulerConfig clusterMergeSchedulerConfig + ) throws IOException { + return newIndexService( + indexCreationContext, + environment, + xContentRegistry, + shardStoreDeleter, + circuitBreakerService, + bigArrays, + threadPool, + scriptService, + clusterService, + client, + indicesQueryCache, + mapperRegistry, + indicesFieldDataCache, + namedWriteableRegistry, + idFieldDataEnabled, + valuesSourceRegistry, + remoteDirectoryFactory, + translogFactorySupplier, + clusterDefaultRefreshIntervalSupplier, + fixedRefreshIntervalSchedulingEnabled, + shardLevelRefreshEnabled, + recoverySettings, + remoteStoreSettings, + replicator, + segmentReplicationStatsProvider, + clusterDefaultMaxMergeAtOnceSupplier, + clusterMergeSchedulerConfig, + null + ); + } + public IndexService newIndexService( IndexService.IndexCreationContext indexCreationContext, NodeEnvironment environment, diff --git a/server/src/main/java/org/opensearch/index/engine/DataFormatAwareEngine.java b/server/src/main/java/org/opensearch/index/engine/DataFormatAwareEngine.java index 9fc7905487e55..1a2ccd39f1540 100644 --- a/server/src/main/java/org/opensearch/index/engine/DataFormatAwareEngine.java +++ b/server/src/main/java/org/opensearch/index/engine/DataFormatAwareEngine.java @@ -8,20 +8,15 @@ package org.opensearch.index.engine; -import org.opensearch.common.CheckedSupplier; import org.opensearch.common.annotation.ExperimentalApi; import org.opensearch.index.engine.dataformat.DataFormat; import org.opensearch.index.engine.exec.CatalogSnapshot; import org.opensearch.index.engine.exec.DataFormatAwareEngineFactory; import org.opensearch.index.engine.exec.EngineReaderManager; -import org.opensearch.index.engine.exec.IndexFilterProvider; -import org.opensearch.index.engine.exec.SearchExecEngine; -import org.opensearch.index.engine.exec.SourceProvider; import java.io.Closeable; import java.io.IOException; import java.util.ArrayList; -import java.util.Collection; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -38,52 +33,20 @@ public class DataFormatAwareEngine implements Closeable { private final Map> readerManagers; - private final Map, IOException>> engineSuppliers; - private final Map, IOException>> indexFilterProviderSuppliers; - private final Map, IOException>> sourceProviderSuppliers; private volatile CatalogSnapshot latestSnapshot; /** * Constructs a new CompositeEngine with pre-built maps. * Prefer using {@link DataFormatAwareEngineFactory#create()}. */ - public DataFormatAwareEngine( - Map> readerManagers, - Map, IOException>> engineSuppliers, - Map, IOException>> indexFilterProviderSuppliers, - Map, IOException>> sourceProviderSuppliers - ) { + public DataFormatAwareEngine(Map> readerManagers) { this.readerManagers = readerManagers; - this.engineSuppliers = engineSuppliers; - this.indexFilterProviderSuppliers = indexFilterProviderSuppliers; - this.sourceProviderSuppliers = sourceProviderSuppliers; } public EngineReaderManager getReaderManager(DataFormat format) { return readerManagers.get(format); } - public SearchExecEngine getSearchExecEngine(DataFormat format) throws IOException { - return getFromSupplier(engineSuppliers, format, "search exec engine"); - } - - public IndexFilterProvider getIndexFilterProvider(DataFormat format) throws IOException { - return getFromSupplier(indexFilterProviderSuppliers, format, "index filter provider"); - } - - public SourceProvider getSourceProvider(DataFormat format) throws IOException { - return getFromSupplier(sourceProviderSuppliers, format, "source provider"); - } - - private T getFromSupplier(Map> suppliers, DataFormat format, String label) - throws IOException { - CheckedSupplier supplier = suppliers.get(format); - if (supplier == null) { - throw new IllegalArgumentException("No " + label + " registered for format: " + format.name()); - } - return supplier.get(); - } - /** * Called by the catalog snapshot lifecycle listener after a refresh * to update the latest searchable snapshot. @@ -160,9 +123,6 @@ public void close() { @Override public void close() throws IOException { List exceptions = new ArrayList<>(); - closeSupplierInstances(engineSuppliers.values(), exceptions); - closeSupplierInstances(indexFilterProviderSuppliers.values(), exceptions); - closeSupplierInstances(sourceProviderSuppliers.values(), exceptions); for (EngineReaderManager rm : readerManagers.values()) { if (rm instanceof Closeable) { try { @@ -180,21 +140,4 @@ public void close() throws IOException { throw ioException; } } - - /** - * Attempts to retrieve each memoized instance and close it if it implements {@link Closeable}. - * Suppliers that were never invoked will return quickly from the memoize wrapper. - */ - private static void closeSupplierInstances(Collection> suppliers, List exceptions) { - for (CheckedSupplier supplier : suppliers) { - try { - T instance = supplier.get(); - if (instance instanceof Closeable) { - ((Closeable) instance).close(); - } - } catch (Exception e) { - exceptions.add(e); - } - } - } } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/DataFormatAwareEngineFactory.java b/server/src/main/java/org/opensearch/index/engine/exec/DataFormatAwareEngineFactory.java index b05fc42d65f84..85837f3f54fb4 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/DataFormatAwareEngineFactory.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/DataFormatAwareEngineFactory.java @@ -36,11 +36,6 @@ public class DataFormatAwareEngineFactory { private final Map> readerManagers = new HashMap<>(); - private final Map, IOException>> engineSuppliers = new HashMap<>(); - private final Map, IOException>> indexFilterProviderSuppliers = - new HashMap<>(); - private final Map, IOException>> sourceProviderSuppliers = new HashMap<>(); - private final IndexFileDeleter indexFileDeleter; public DataFormatAwareEngineFactory( @@ -53,9 +48,6 @@ public DataFormatAwareEngineFactory( for (DataFormat format : plugin.getSupportedFormats()) { // TODO: use mapperService and indexSettings to filter formats relevant to this index readerManagers.put(format, plugin.createReaderManager(format, shardPath)); - engineSuppliers.put(format, memoize(format, f -> plugin.createSearchExecEngine(f, shardPath))); - indexFilterProviderSuppliers.put(format, memoize(format, f -> plugin.createIndexFilterProvider(f, shardPath))); - sourceProviderSuppliers.put(format, memoize(format, f -> plugin.createSourceProvider(f, shardPath))); } } this.indexFileDeleter = new IndexFileDeleter(null, shardPath); @@ -93,7 +85,7 @@ public T get() throws IOException { * reader managers and memoizing suppliers. */ public DataFormatAwareEngine create() { - return new DataFormatAwareEngine(readerManagers, engineSuppliers, indexFilterProviderSuppliers, sourceProviderSuppliers); + return new DataFormatAwareEngine(readerManagers); } /** diff --git a/server/src/main/java/org/opensearch/index/engine/exec/EngineSearcher.java b/server/src/main/java/org/opensearch/index/engine/exec/EngineSearcher.java index bc5385d180bbb..4df7df733ea6d 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/EngineSearcher.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/EngineSearcher.java @@ -19,7 +19,7 @@ *

* Each engine implementation provides its own searcher that knows how to * execute queries against its reader. The searcher is acquired from - * {@link SearchExecEngine} and used to execute searches against a + * the search execution engine and used to execute searches against a * point-in-time snapshot. * * @param the context type this searcher operates on diff --git a/server/src/main/java/org/opensearch/index/engine/exec/SearchExecEngine.java b/server/src/main/java/org/opensearch/index/engine/exec/SearchExecEngine.java deleted file mode 100644 index a78645054b5b7..0000000000000 --- a/server/src/main/java/org/opensearch/index/engine/exec/SearchExecEngine.java +++ /dev/null @@ -1,54 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -package org.opensearch.index.engine.exec; - -import org.opensearch.action.search.SearchShardTask; -import org.opensearch.common.annotation.ExperimentalApi; -import org.opensearch.index.engine.DataFormatAwareEngine; -import org.opensearch.search.SearchExecutionContext; -import org.opensearch.search.SearchShardTarget; -import org.opensearch.search.internal.ShardSearchRequest; - -import java.io.Closeable; -import java.io.IOException; - -/** - * Shard-level search execution engine interface. - * - * @param the engine-specific context type - * @param the engine-native plan type (e.g. byte[] for substrait) - * @param the result stream type returned by {@link #execute} - * @opensearch.experimental - */ -@ExperimentalApi -public interface SearchExecEngine extends Closeable { - - /** - * Converts a logical plan fragment into the engine's native plan format. - */ - default T convertFragment(Object fragment) { - throw new UnsupportedOperationException("convertFragment not supported by " + getClass().getSimpleName()); - } - - /** - * Creates a search context bound to the given reader and plan. - * The reader is provided by {@link DataFormatAwareEngine} - * which owns all reader managers. - */ - C createContext(Object reader, T plan, ShardSearchRequest request, SearchShardTarget shardTarget, SearchShardTask task) - throws IOException; - - /** - * Executes the plan held by the context and returns the result stream. - */ - S execute(C context) throws IOException; - - @Override - default void close() throws IOException {} -} diff --git a/server/src/main/java/org/opensearch/plugins/SearchBackEndPlugin.java b/server/src/main/java/org/opensearch/plugins/SearchBackEndPlugin.java index a0b1dfb10e0fe..65fa1c99e917c 100644 --- a/server/src/main/java/org/opensearch/plugins/SearchBackEndPlugin.java +++ b/server/src/main/java/org/opensearch/plugins/SearchBackEndPlugin.java @@ -10,9 +10,6 @@ import org.opensearch.index.engine.dataformat.DataFormat; import org.opensearch.index.engine.exec.EngineReaderManager; -import org.opensearch.index.engine.exec.IndexFilterProvider; -import org.opensearch.index.engine.exec.SearchExecEngine; -import org.opensearch.index.engine.exec.SourceProvider; import org.opensearch.index.shard.ShardPath; import java.io.IOException; @@ -30,28 +27,4 @@ public interface SearchBackEndPlugin { List getSupportedFormats(); EngineReaderManager createReaderManager(DataFormat format, ShardPath shardPath) throws IOException; - - /** - * Create a search execution engine. Return null if this plugin is an index provider only. - */ - default SearchExecEngine createSearchExecEngine(DataFormat format, ShardPath shardPath) throws IOException { - return null; - } - - /** - * Create an index filter provider. Return null if this plugin is a search engine only. - */ - default IndexFilterProvider createIndexFilterProvider(DataFormat format, ShardPath shardPath) throws IOException { - return null; - } - - /** - * Create a source provider. Return null if this plugin does not provide source data. - *

- * A source provider executes the full query+scan+filter and streams back - * result batches (projections, aggregations) to the primary engine. - */ - default SourceProvider createSourceProvider(DataFormat format, ShardPath shardPath) throws IOException { - return null; - } } diff --git a/server/src/main/java/org/opensearch/search/SearchExecutionContext.java b/server/src/main/java/org/opensearch/search/SearchExecutionContext.java index 2368d7992b7b5..025effc3833cb 100644 --- a/server/src/main/java/org/opensearch/search/SearchExecutionContext.java +++ b/server/src/main/java/org/opensearch/search/SearchExecutionContext.java @@ -8,8 +8,8 @@ package org.opensearch.search; +import org.opensearch.action.search.SearchShardTask; import org.opensearch.common.annotation.ExperimentalApi; -import org.opensearch.search.internal.ShardSearchRequest; import java.io.Closeable; @@ -19,9 +19,10 @@ * @opensearch.experimental */ @ExperimentalApi -public interface SearchExecutionContext extends Closeable { +public interface SearchExecutionContext extends Closeable { - ShardSearchRequest request(); + SearchShardTask task(); + + S getSearcher(); - SearchShardTarget shardTarget(); } diff --git a/server/src/test/java/org/opensearch/index/engine/dataformat/DataFormatPluginTests.java b/server/src/test/java/org/opensearch/index/engine/dataformat/DataFormatPluginTests.java index 581a877d2a9e9..0b967f718da97 100644 --- a/server/src/test/java/org/opensearch/index/engine/dataformat/DataFormatPluginTests.java +++ b/server/src/test/java/org/opensearch/index/engine/dataformat/DataFormatPluginTests.java @@ -9,7 +9,6 @@ package org.opensearch.index.engine.dataformat; import org.opensearch.Version; -import org.opensearch.action.search.SearchShardTask; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.common.settings.Settings; import org.opensearch.core.index.shard.ShardId; @@ -17,15 +16,11 @@ import org.opensearch.index.engine.DataFormatAwareEngine; import org.opensearch.index.engine.exec.CatalogSnapshot; import org.opensearch.index.engine.exec.EngineReaderManager; -import org.opensearch.index.engine.exec.SearchExecEngine; import org.opensearch.index.engine.exec.Segment; import org.opensearch.index.engine.exec.WriterFileSet; import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.index.mapper.MapperService; import org.opensearch.index.shard.ShardPath; -import org.opensearch.search.SearchExecutionContext; -import org.opensearch.search.SearchShardTarget; -import org.opensearch.search.internal.ShardSearchRequest; import org.opensearch.test.OpenSearchTestCase; import java.io.IOException; @@ -418,58 +413,6 @@ public > IndexingExecutionEngin } } - /** - * write → refresh → catalog snapshot → DataFormatAwareEngine → acquireReader → search. - */ - public void testWritePathToSearchExecEngine() throws IOException { - MockDataFormat format = new MockDataFormat(); - MockIndexingExecutionEngine indexEngine = new MockIndexingExecutionEngine(format); - - Writer w = indexEngine.createWriter(1L); - MockDocumentInput d1 = indexEngine.newDocumentInput(); - d1.addField(mock(MappedFieldType.class), "Alice"); - d1.setRowId("_row_id", 0); - w.addDoc(d1); - MockDocumentInput d2 = indexEngine.newDocumentInput(); - d2.addField(mock(MappedFieldType.class), "Bob"); - d2.setRowId("_row_id", 1); - w.addDoc(d2); - WriterFileSet fs = w.flush().getWriterFileSet(format).get(); - w.close(); - - RefreshResult refreshResult = indexEngine.refresh(RefreshInput.builder().addWriterFileSet(fs).build()); - MockCatalogSnapshot snapshot = new MockCatalogSnapshot(1L, refreshResult.refreshedSegments(), format); - - MockReaderManager readerManager = new MockReaderManager(format.name()); - readerManager.afterRefresh(true, snapshot); - - DataFormatAwareEngine dataFormatAwareEngine = new DataFormatAwareEngine( - Map.of(format, readerManager), - Map.of(), - Map.of(), - Map.of() - ); - // setLatestSnapshot incRefs snapshot (refcount: 1 initial + 1 engine = 2) - dataFormatAwareEngine.setLatestSnapshot(snapshot); - - // acquireReader incRefs again (refcount: 3) - try (DataFormatAwareEngine.DataFormatAwareReader cr = dataFormatAwareEngine.acquireReader()) { - MockReader reader = (MockReader) cr.getReader(format); - assertNotNull(reader); - assertEquals(2, reader.totalRows); - - MockSearchExecEngine searchEngine = new MockSearchExecEngine(); - String plan = searchEngine.convertFragment("SELECT * FROM hits"); - MockSearchContext ctx = searchEngine.createContext(reader, plan, null, null, null); - List results = searchEngine.execute(ctx); - assertEquals(2, results.size()); - ctx.close(); - } - // cr.close() decRefs. Snapshot still alive — engine owns the construction ref. - assertTrue(snapshot.tryIncRef()); - snapshot.decRef(); // undo probe - } - /** * Search holds snapshot alive while refresh replaces it. *

@@ -500,12 +443,7 @@ public void testSearchHoldsSnapshotAliveWhileRefreshDeletesFiles() throws IOExce MockReaderManager readerManager = new MockReaderManager(format.name()); readerManager.afterRefresh(true, snapshot1); - DataFormatAwareEngine dataFormatAwareEngine = new DataFormatAwareEngine( - Map.of(format, readerManager), - Map.of(), - Map.of(), - Map.of() - ); + DataFormatAwareEngine dataFormatAwareEngine = new DataFormatAwareEngine(Map.of(format, readerManager)); dataFormatAwareEngine.setLatestSnapshot(snapshot1); // takes over construction ref, refcount: 1 // Search acquires reader — refcount: 2 @@ -591,12 +529,7 @@ public Set getDataFormats() { rm1.afterRefresh(true, snapshot); rm2.afterRefresh(true, snapshot); - DataFormatAwareEngine dataFormatAwareEngine = new DataFormatAwareEngine( - Map.of(format1, rm1, format2, rm2), - Map.of(), - Map.of(), - Map.of() - ); + DataFormatAwareEngine dataFormatAwareEngine = new DataFormatAwareEngine(Map.of(format1, rm1, format2, rm2)); dataFormatAwareEngine.setLatestSnapshot(snapshot); try (DataFormatAwareEngine.DataFormatAwareReader cr = dataFormatAwareEngine.acquireReader()) { @@ -675,57 +608,6 @@ void close() { } } - static class MockSearchContext implements SearchExecutionContext { - final String plan; - final long totalRows; - - MockSearchContext(String plan, long totalRows) { - this.plan = plan; - this.totalRows = totalRows; - } - - @Override - public ShardSearchRequest request() { - return null; - } - - @Override - public SearchShardTarget shardTarget() { - return null; - } - - @Override - public void close() {} - } - - static class MockSearchExecEngine implements SearchExecEngine> { - @Override - public String convertFragment(Object fragment) { - return "PLAN:" + fragment; - } - - @Override - public MockSearchContext createContext( - Object reader, - String plan, - ShardSearchRequest request, - SearchShardTarget shardTarget, - SearchShardTask task - ) { - MockReader r = (MockReader) reader; - return new MockSearchContext(plan, r.totalRows); - } - - @Override - public List execute(MockSearchContext context) { - List rows = new ArrayList<>(); - for (int i = 0; i < context.totalRows; i++) { - rows.add(new Object[] { "row_" + i }); - } - return rows; - } - } - static class MockReaderManager implements EngineReaderManager { private final String formatName; private final Map readers = new HashMap<>(); From c098bc0f5bfa911e4e4f9dbf4f7a31502083ed54 Mon Sep 17 00:00:00 2001 From: bharath-techie Date: Wed, 25 Mar 2026 00:20:42 +0530 Subject: [PATCH 09/11] adding java docs Signed-off-by: bharath-techie --- .../analytics/backend/ExecutionContext.java | 20 +- .../analytics/backend/SearchExecEngine.java | 5 +- .../analytics/backend/jni/NativeHandle.java | 2 + .../analytics/backend/jni/package-info.java | 12 + .../analytics/plan/ResolvedPlan.java | 46 --- .../spi/AnalyticsSearchBackendPlugin.java | 8 +- .../be/datafusion/DataFusionPlugin.java | 4 + .../be/datafusion/DatafusionContext.java | 18 + .../be/datafusion/DatafusionQuery.java | 12 + .../be/datafusion/DatafusionReader.java | 2 + .../datafusion/DatafusionReaderManager.java | 5 + .../be/datafusion/DatafusionResultStream.java | 4 + .../DatafusionSearchExecEngine.java | 4 + .../be/datafusion/DatafusionSearcher.java | 4 + .../be/datafusion/jni/NativeBridge.java | 36 +- .../be/datafusion/jni/ReaderHandle.java | 5 + .../be/datafusion/jni/StreamHandle.java | 6 + .../be/datafusion/package-info.java | 1 + .../be/lucene/LuceneEngineSearcher.java | 10 + .../be/lucene/LuceneIndexFilterContext.java | 6 + .../be/lucene/LuceneIndexFilterProvider.java | 15 + .../be/lucene/LuceneReaderManager.java | 11 +- .../be/lucene/LuceneSearchContext.java | 13 + .../be/lucene/LuceneSearchEnginePlugin.java | 3 + .../opensearch/be/lucene/package-info.java | 1 + .../analytics/exec/AnalyticsQueryService.java | 144 ------- .../analytics/exec/DefaultPlanExecutor.java | 72 ++-- .../engine/AnalyticsQueryServiceTests.java | 391 ------------------ .../exec/DefaultPlanExecutorTests.java | 328 ++++++++++++++- .../org/opensearch/index/IndexModule.java | 4 +- .../org/opensearch/index/IndexService.java | 10 +- .../index/engine/DataFormatAwareEngine.java | 6 +- .../exec/DataFormatAwareEngineFactory.java | 32 +- .../index/engine/exec/FilesListener.java | 3 + .../index/engine/exec/IndexFilterContext.java | 2 + .../opensearch/indices/IndicesService.java | 6 +- 36 files changed, 566 insertions(+), 685 deletions(-) create mode 100644 sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/jni/package-info.java delete mode 100644 sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/plan/ResolvedPlan.java delete mode 100644 sandbox/plugins/analytics-engine/src/main/java/org/opensearch/analytics/exec/AnalyticsQueryService.java delete mode 100644 sandbox/plugins/analytics-engine/src/test/java/org/opensearch/analytics/engine/AnalyticsQueryServiceTests.java diff --git a/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/ExecutionContext.java b/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/ExecutionContext.java index 1d0c77473b401..d93e6f7b797d2 100644 --- a/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/ExecutionContext.java +++ b/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/ExecutionContext.java @@ -9,41 +9,43 @@ package org.opensearch.analytics.backend; import org.opensearch.action.search.SearchShardTask; -import org.opensearch.analytics.plan.ResolvedPlan; import org.opensearch.index.engine.DataFormatAwareEngine; /** - * Execution context carrying plan, reader, and delegation state through + * Execution context carrying reader and delegation state through * the query execution lifecycle. * * @opensearch.internal */ public class ExecutionContext { - private final ResolvedPlan plan; private final String tableName; private final DataFormatAwareEngine.DataFormatAwareReader reader; SearchShardTask task; - public ExecutionContext(ResolvedPlan plan, String tableName, SearchShardTask task, DataFormatAwareEngine.DataFormatAwareReader reader) { - this.plan = plan; + /** + * Constructs an execution context. + * @param tableName the target table name + * @param task the search shard task + * @param reader the data-format aware reader + */ + public ExecutionContext(String tableName, SearchShardTask task, DataFormatAwareEngine.DataFormatAwareReader reader) { this.tableName = tableName; this.task = task; this.reader = reader; } + /** Returns the search shard task. */ public SearchShardTask getTask() { return task; } - public ResolvedPlan plan() { - return plan; - } - + /** Returns the target table name. */ public String getTableName() { return tableName; } + /** Returns the data-format aware reader. */ public DataFormatAwareEngine.DataFormatAwareReader getReader() { return reader; } diff --git a/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/SearchExecEngine.java b/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/SearchExecEngine.java index 9c29ee0faf8f4..b8a1ec4c943a8 100644 --- a/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/SearchExecEngine.java +++ b/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/SearchExecEngine.java @@ -27,7 +27,10 @@ public interface SearchExecEngine extends Closeable { */ void prepare(ExecutionContext context); - /** Executes the context and returns a result stream. */ + /** + * Executes the context and returns a result stream. + * @param context the execution context + */ EngineResultStream execute(ExecutionContext context) throws IOException; @Override diff --git a/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/jni/NativeHandle.java b/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/jni/NativeHandle.java index f1131432a2950..a785484da5604 100644 --- a/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/jni/NativeHandle.java +++ b/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/jni/NativeHandle.java @@ -19,8 +19,10 @@ */ public abstract class NativeHandle implements AutoCloseable { + /** Pointer to the native resource. */ protected final long ptr; private final AtomicBoolean closed = new AtomicBoolean(false); + /** Sentinel value representing a null native pointer. */ protected static final long NULL_POINTER = 0L; private final Cleaner.Cleanable cleanable; diff --git a/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/jni/package-info.java b/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/jni/package-info.java new file mode 100644 index 0000000000000..fe8d1f0e98a19 --- /dev/null +++ b/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/jni/package-info.java @@ -0,0 +1,12 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + */ + +/** + * JNI handles for native engine resources. + */ +package org.opensearch.analytics.backend.jni; diff --git a/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/plan/ResolvedPlan.java b/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/plan/ResolvedPlan.java deleted file mode 100644 index 6d644018bedcd..0000000000000 --- a/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/plan/ResolvedPlan.java +++ /dev/null @@ -1,46 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -package org.opensearch.analytics.plan; - -import org.apache.calcite.rel.RelNode; -import org.apache.calcite.rex.RexNode; - -import java.util.Map; - -/** - * An immutable value type representing a fully resolved query plan, - * consisting of the optimized and backend-tagged {@link RelNode} tree, - * the name of the backend that will execute it, and any delegation - * predicates that secondary backends must evaluate. - */ -public final class ResolvedPlan { - - private final RelNode root; - private final String primaryBackend; - private final Map delegationPredicates; - - public ResolvedPlan(RelNode root, String primaryBackend, Map delegationPredicates) { - this.root = root; - this.primaryBackend = primaryBackend; - this.delegationPredicates = Map.copyOf(delegationPredicates); - } - - public RelNode getRoot() { - return root; - } - - public String getPrimaryBackend() { - return primaryBackend; - } - - /** Predicates delegated to secondary backends (backend name → predicate). Empty if no delegation. */ - public Map getDelegationPredicates() { - return delegationPredicates; - } -} diff --git a/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/spi/AnalyticsSearchBackendPlugin.java b/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/spi/AnalyticsSearchBackendPlugin.java index 6b7057bc65988..b48023973c6db 100644 --- a/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/spi/AnalyticsSearchBackendPlugin.java +++ b/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/spi/AnalyticsSearchBackendPlugin.java @@ -29,15 +29,17 @@ public interface AnalyticsSearchBackendPlugin { /** Unique engine name (e.g., "lucene", "datafusion"). */ String name(); - /** Creates a searcher bound to the given reader snapshot. */ + /** + * Creates a searcher bound to the given reader snapshot. + * @param ctx the execution context + */ SearchExecEngine searcher(ExecutionContext ctx); - /** Supported functions as a Calcite operator table, or null if the back-end adds no functions. */ /** Returns the set of RelNode operator classes this backend supports. */ default Set> supportedOperators() { return Set.of(LogicalTableScan.class, LogicalFilter.class, LogicalAggregate.class, LogicalProject.class); } - // TODO : remove this ? + /** Returns the data formats supported by this backend. */ List getSupportedFormats(); } diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionPlugin.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionPlugin.java index 624947f1311c5..f1b379b7c8433 100644 --- a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionPlugin.java +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionPlugin.java @@ -68,6 +68,10 @@ public class DataFusionPlugin extends Plugin implements SearchBackEndPlugin, Ana private final Settings settings; private volatile DataFusionService dataFusionService; + /** + * Creates the DataFusion plugin with the given node settings. + * @param settings the node-level settings + */ public DataFusionPlugin(Settings settings) { this.settings = settings; } diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionContext.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionContext.java index 6ec31b550164b..59f576d8f5cc4 100644 --- a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionContext.java +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionContext.java @@ -34,6 +34,12 @@ public class DatafusionContext implements SearchExecutionContext { private final ReaderHandle readerHandle; + /** + * Creates a searcher + * @param readerHandle the native reader handle + */ public DatafusionSearcher(ReaderHandle readerHandle) { this.readerHandle = readerHandle; } diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/jni/NativeBridge.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/jni/NativeBridge.java index 20caa6cbd3251..eae3f8e4d1286 100644 --- a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/jni/NativeBridge.java +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/jni/NativeBridge.java @@ -20,21 +20,41 @@ public final class NativeBridge { private NativeBridge() {} + /** + * Creates a native DataFusion reader + * @param path the directory path containing data files + * @param files the array of file names to read + */ public static native long createDatafusionReader(String path, String[] files); + /** + * Closes the native DataFusion reader. + * @param ptr the native reader pointer + */ public static native void closeDatafusionReader(long ptr); + /** + * Creates a global DataFusion runtime with the given resource limits. + * @param memoryLimit the maximum memory in bytes + * @param cacheManagerPtr the native cache manager pointer + * @param spillDir the directory path for spill files + * @param spillLimit the maximum spill size in bytes + */ public static native long createGlobalRuntime(long memoryLimit, long cacheManagerPtr, String spillDir, long spillLimit); + /** + * Closes the global DataFusion runtime. + * @param ptr the native runtime pointer + */ public static native void closeGlobalRuntime(long ptr); /** * Executes a substrait plan against the given reader and returns a stream pointer. * - * @param readerPtr native reader pointer - * @param tableName table name for registration with DataFusion - * @param substraitPlan serialized substrait plan bytes - * @param runtimePtr native runtime pointer + * @param readerPtr the native reader pointer + * @param tableName the target table name + * @param substraitPlan the serialized substrait plan bytes + * @param runtimePtr the native runtime pointer * @return native stream pointer (caller must close via {@link #streamClose}) */ public static native long executeQuery(long readerPtr, String tableName, byte[] substraitPlan, long runtimePtr); @@ -42,7 +62,7 @@ private NativeBridge() {} /** * Returns the Arrow schema address for the given stream. * - * @param streamPtr native stream pointer + * @param streamPtr the native stream pointer * @return ArrowSchema C Data Interface address */ public static native long streamGetSchema(long streamPtr); @@ -50,8 +70,8 @@ private NativeBridge() {} /** * Loads the next record batch from the stream. * - * @param runtimePtr native runtime pointer - * @param streamPtr native stream pointer + * @param runtimePtr the native runtime pointer + * @param streamPtr the native stream pointer * @return ArrowArray C Data Interface address, or 0 if end-of-stream */ public static native long streamNext(long runtimePtr, long streamPtr); @@ -59,7 +79,7 @@ private NativeBridge() {} /** * Closes the native stream and releases associated resources. * - * @param streamPtr native stream pointer + * @param streamPtr the native stream pointer to close */ public static native void streamClose(long streamPtr); } diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/jni/ReaderHandle.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/jni/ReaderHandle.java index fed2b8601b845..bb00f2540e347 100644 --- a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/jni/ReaderHandle.java +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/jni/ReaderHandle.java @@ -15,6 +15,11 @@ */ public final class ReaderHandle extends NativeHandle { + /** + * Creates a reader handle by allocating a native DataFusion reader for the given path and files. + * @param path the directory path containing data files + * @param files the array of file names to read + */ public ReaderHandle(String path, String[] files) { super(NativeBridge.createDatafusionReader(path, files)); } diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/jni/StreamHandle.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/jni/StreamHandle.java index 53b380867e90b..b2a3c3f29a9f6 100644 --- a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/jni/StreamHandle.java +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/jni/StreamHandle.java @@ -18,11 +18,17 @@ public final class StreamHandle extends NativeHandle { private final long streamPtr; + /** + * Creates a stream handle wrapping the native pointers. + * @param ptr the native handle pointer + * @param streamPtr the native stream pointer + */ public StreamHandle(long ptr, long streamPtr) { super(ptr); this.streamPtr = streamPtr; } + /** Returns the native stream pointer. */ public long getStreamPtr() { return streamPtr; } diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/package-info.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/package-info.java index 07ffaf562b3f0..d69838a7fbd0d 100644 --- a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/package-info.java +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/package-info.java @@ -6,4 +6,5 @@ * compatible open source license. */ +/** DataFusion native engine backend for OpenSearch analytics. */ package org.opensearch.be.datafusion; diff --git a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneEngineSearcher.java b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneEngineSearcher.java index 6cd3605499c07..ed3c792be16af 100644 --- a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneEngineSearcher.java +++ b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneEngineSearcher.java @@ -33,6 +33,12 @@ public class LuceneEngineSearcher implements EngineSearcher private final IndexSearcher indexSearcher; private final DirectoryReader directoryReader; + /** + * Creates a new LuceneEngineSearcher. + * + * @param indexSearcher the Lucene index searcher + * @param directoryReader the Lucene directory reader + */ public LuceneEngineSearcher(IndexSearcher indexSearcher, DirectoryReader directoryReader) { this.indexSearcher = indexSearcher; this.directoryReader = directoryReader; @@ -42,6 +48,8 @@ public LuceneEngineSearcher(IndexSearcher indexSearcher, DirectoryReader directo * Execute: create a Weight from the query, register it on the * context's lifecycle manager, and store the key + segment metadata * on the context for JNI callbacks. + * + * @param context the search context containing the query to execute */ @Override public void search(LuceneSearchContext context) throws IOException { @@ -56,10 +64,12 @@ public void search(LuceneSearchContext context) throws IOException { } + /** Returns the underlying IndexSearcher. */ public IndexSearcher getIndexSearcher() { return indexSearcher; } + /** Returns the underlying DirectoryReader. */ public DirectoryReader getDirectoryReader() { return directoryReader; } diff --git a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneIndexFilterContext.java b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneIndexFilterContext.java index 796a1c3cdcf17..a14ec1645276a 100644 --- a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneIndexFilterContext.java +++ b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneIndexFilterContext.java @@ -36,6 +36,12 @@ public class LuceneIndexFilterContext implements IndexFilterContext { private final List leaves; private final CollectorQueryLifecycleManager collectorManager = new CollectorQueryLifecycleManager(); + /** + * Creates a new LuceneIndexFilterContext for the given query and reader. + * + * @param query the Lucene query to filter by + * @param reader the directory reader over the index + */ public LuceneIndexFilterContext(Query query, DirectoryReader reader) throws IOException { IndexSearcher searcher = new IndexSearcher(reader); Query rewritten = searcher.rewrite(query); diff --git a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneIndexFilterProvider.java b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneIndexFilterProvider.java index 9aae1e997b2b2..9851e07d33bbc 100644 --- a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneIndexFilterProvider.java +++ b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneIndexFilterProvider.java @@ -28,6 +28,9 @@ @ExperimentalApi public class LuceneIndexFilterProvider implements IndexFilterProvider { + /** Creates a new LuceneIndexFilterProvider. */ + public LuceneIndexFilterProvider() {} + @Override public LuceneIndexFilterContext createContext(Query query, DirectoryReader reader) throws IOException { return new LuceneIndexFilterContext(query, reader); @@ -37,6 +40,10 @@ public LuceneIndexFilterContext createContext(Query query, DirectoryReader reade * Creates a collector for the given segment and registers it in the * context's {@link CollectorQueryLifecycleManager}. * + * @param context the index filter context + * @param segmentOrd the segment ordinal + * @param minDoc the minimum document ID + * @param maxDoc the maximum document ID * @return an int key that identifies this collector across JNI */ @Override @@ -47,6 +54,11 @@ public int createCollector(LuceneIndexFilterContext context, int segmentOrd, int /** * Collects matching doc IDs for the collector identified by {@code key}. + * + * @param context the index filter context + * @param key the collector key + * @param minDoc the minimum document ID + * @param maxDoc the maximum document ID */ public long[] collectDocs(LuceneIndexFilterContext context, int key, int minDoc, int maxDoc) { return context.getCollectorManager().collectDocs(key, minDoc, maxDoc); @@ -54,6 +66,9 @@ public long[] collectDocs(LuceneIndexFilterContext context, int key, int minDoc, /** * Releases the collector identified by {@code key}. + * + * @param context the index filter context + * @param key the collector key */ public void releaseCollector(LuceneIndexFilterContext context, int key) { context.getCollectorManager().releaseCollector(key); diff --git a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneReaderManager.java b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneReaderManager.java index 46ea0dc1c2359..c46d480bccfb3 100644 --- a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneReaderManager.java +++ b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneReaderManager.java @@ -35,12 +35,21 @@ public class LuceneReaderManager implements EngineReaderManager Map readers = new HashMap<>(); DataFormat dataFormat; + /** + * Creates a new LuceneReaderManager for the given data format. + * + * @param dataFormat the data format for this reader manager + */ @SuppressWarnings("unchecked") public LuceneReaderManager(DataFormat dataFormat) { this.dataFormat = dataFormat; } - /** Called when files are deleted after merges. */ + /** + * Called when files are deleted after merges. + * + * @param files the collection of deleted file paths + */ public void onFilesDeleted(Collection files) throws IOException { // no-op } diff --git a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchContext.java b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchContext.java index 4865beff04065..2dee8508d3ee5 100644 --- a/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchContext.java +++ b/sandbox/plugins/analytics-backend-lucene/src/main/java/org/opensearch/be/lucene/LuceneSearchContext.java @@ -30,6 +30,13 @@ public class LuceneSearchContext implements SearchExecutionContextHandles: shard engine resolution, reader snapshot acquisition, delegation - * setup, engine execution, result collection, and context tracking. - */ -@ExperimentalApi -public class AnalyticsQueryService extends AbstractLifecycleComponent { - - private static final Logger logger = LogManager.getLogger(AnalyticsQueryService.class); - - private final AtomicLong nextContextId = new AtomicLong(1); - private final ConcurrentMapLong activeContexts = ConcurrentCollections - .newConcurrentMapLongWithAggressiveConcurrency(); - - private final Map backEnds; - - public AnalyticsQueryService(Map backEnds) { - this.backEnds = backEnds; - } - - /** - * Executes a resolved plan against a local shard. - * - * @param plan the resolved plan with backend assignments and delegation predicates - * @param shard the local index shard - * @return rows as list of Object arrays - */ - public Iterable execute(ResolvedPlan plan, IndexShard shard, SearchShardTask task) { - DataFormatAwareEngine dataFormatAwareEngine = shard.getCompositeEngine(); - if (dataFormatAwareEngine == null) { - throw new IllegalStateException("No CompositeEngine on shard [" + shard.shardId() + "]"); - } - - AnalyticsSearchBackendPlugin plugin = backEnds.get(plan.getPrimaryBackend()); - if (plugin == null) { - throw new IllegalStateException("No plugin registered for backend [" + plan.getPrimaryBackend() + "]"); - } - - String tableName = plan.getRoot().getTable() != null - ? plan.getRoot().getTable().getQualifiedName().get(plan.getRoot().getTable().getQualifiedName().size() - 1) - : "unknown"; - - long ctxId = -1; - - try (DataFormatAwareEngine.DataFormatAwareReader dataFormatAwareReader = dataFormatAwareEngine.acquireReader()) { - - ExecutionContext ctx = new ExecutionContext(plan, tableName, task, dataFormatAwareReader); - ctxId = putContext(ctx); - List rows = new ArrayList<>(); - // Create primary engine and execute - try (SearchExecEngine engine = plugin.searcher(ctx)) { - logger.info("[AnalyticsQueryService] Executing via [{}], ctxId={}", plugin.name(), ctxId); - try (EngineResultStream resultStream = engine.execute(ctx)) { - EngineResultBatchIterator batchIterator = resultStream.iterator(); - while (batchIterator.hasNext()) { - EngineResultBatch batch = batchIterator.next(); - List fieldNames = batch.getFieldNames(); - for (int row = 0; row < batch.getRowCount(); row++) { - Object[] rowValues = new Object[fieldNames.size()]; - for (int col = 0; col < fieldNames.size(); col++) { - rowValues[col] = batch.getFieldValue(fieldNames.get(col), row); - } - rows.add(rowValues); - } - } - - } - } - logger.info("[AnalyticsQueryService] Completed via [{}], {} rows, ctxId={}", plugin.name(), rows.size(), ctxId); - return rows; - } catch (Exception e) { - throw new RuntimeException("Execution failed for [" + plugin.name() + "]", e); - } finally { - removeContext(ctxId); - } - } - - public long putContext(ExecutionContext context) { - long id = nextContextId.getAndIncrement(); - activeContexts.put(id, context); - return id; - } - - public ExecutionContext getContext(long id) { - return activeContexts.get(id); - } - - public ExecutionContext removeContext(long id) { - return activeContexts.remove(id); - } - - public int getActiveContextCount() { - return activeContexts.size(); - } - - @Override - protected void doStart() { - logger.info("[AnalyticsQueryService] Started"); - } - - @Override - protected void doStop() { - logger.info("[AnalyticsQueryService] Stopping, clearing {} active contexts", activeContexts.size()); - activeContexts.clear(); - } - - @Override - protected void doClose() {} -} diff --git a/sandbox/plugins/analytics-engine/src/main/java/org/opensearch/analytics/exec/DefaultPlanExecutor.java b/sandbox/plugins/analytics-engine/src/main/java/org/opensearch/analytics/exec/DefaultPlanExecutor.java index e9220b7af8432..825bac9ff3b98 100644 --- a/sandbox/plugins/analytics-engine/src/main/java/org/opensearch/analytics/exec/DefaultPlanExecutor.java +++ b/sandbox/plugins/analytics-engine/src/main/java/org/opensearch/analytics/exec/DefaultPlanExecutor.java @@ -13,14 +13,19 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.opensearch.action.search.SearchShardTask; -import org.opensearch.analytics.plan.ResolvedPlan; +import org.opensearch.analytics.backend.EngineResultBatch; +import org.opensearch.analytics.backend.EngineResultBatchIterator; +import org.opensearch.analytics.backend.EngineResultStream; +import org.opensearch.analytics.backend.ExecutionContext; +import org.opensearch.analytics.backend.SearchExecEngine; import org.opensearch.analytics.spi.AnalyticsSearchBackendPlugin; -import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.service.ClusterService; import org.opensearch.index.IndexService; +import org.opensearch.index.engine.DataFormatAwareEngine; import org.opensearch.index.shard.IndexShard; import org.opensearch.indices.IndicesService; +import java.util.ArrayList; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -29,7 +34,7 @@ /** * {@link QueryPlanExecutor} default implementation. *

- * Acquires a composite reader, creates a per-query {@link org.opensearch.analytics.backend.SearchExecEngine} + * Acquires a composite reader, creates a per-query {@link SearchExecEngine} * bound to the reader, and delegates convert + execute to it. * No backend-specific context is exposed to this class. */ @@ -39,9 +44,14 @@ public class DefaultPlanExecutor implements QueryPlanExecutor backEnds; private final IndicesService indicesService; private final ClusterService clusterService; - // TODO: - move out as data node side service - private final AnalyticsQueryService queryService; + /** + * Constructs a DefaultPlanExecutor with the given plugins and services. + * + * @param plugins list of analytics search backend plugins + * @param indicesService service for accessing index shards + * @param clusterService service for accessing cluster state + */ public DefaultPlanExecutor(List plugins, IndicesService indicesService, ClusterService clusterService) { this.backEnds = new LinkedHashMap<>(); for (AnalyticsSearchBackendPlugin plugin : plugins) { @@ -49,31 +59,46 @@ public DefaultPlanExecutor(List plugins, IndicesSe } this.indicesService = indicesService; this.clusterService = clusterService; - this.queryService = new AnalyticsQueryService(backEnds); - // TODO : init planning components } @SuppressWarnings("unchecked") @Override public Iterable execute(RelNode logicalFragment, Object context) { String tableName = extractTableName(logicalFragment); - IndexMetadata indexMetadata = clusterService.state().metadata().index(tableName); - if (indexMetadata == null) { - throw new IllegalArgumentException("Index [" + tableName + "] not found in cluster state"); - } - int shardCount = indexMetadata.getNumberOfShards(); - - ResolvedPlan plan = null; // TODO : queryPlanner.plan(logicalFragment, shardCount); + String backendName = selectBackEnd().name(); - if ("unresolved".equals(plan.getPrimaryBackend())) { - throw new IllegalStateException("Planning did not resolve backend assignment for plan root"); + IndexShard shard = resolveShard(tableName); + DataFormatAwareEngine dataFormatAwareEngine = shard.getCompositeEngine(); + if (dataFormatAwareEngine == null) { + throw new IllegalStateException("No CompositeEngine on shard [" + shard.shardId() + "]"); } - logger.info("[DefaultPlanExecutor] Plan resolved to backend [{}]", plan.getPrimaryBackend()); - - IndexShard shard = resolveShard(tableName); + AnalyticsSearchBackendPlugin plugin = backEnds.get(backendName); SearchShardTask task = null; // TODO : init task - return queryService.execute(plan, shard, task); + List rows = new ArrayList<>(); + try (DataFormatAwareEngine.DataFormatAwareReader dataFormatAwareReader = dataFormatAwareEngine.acquireReader()) { + ExecutionContext ctx = new ExecutionContext(tableName, task, dataFormatAwareReader); + try (SearchExecEngine engine = plugin.searcher(ctx)) { + logger.info("[DefaultPlanExecutor] Executing via [{}]", plugin.name()); + try (EngineResultStream resultStream = engine.execute(ctx)) { + EngineResultBatchIterator batchIterator = resultStream.iterator(); + while (batchIterator.hasNext()) { + EngineResultBatch batch = batchIterator.next(); + List fieldNames = batch.getFieldNames(); + for (int row = 0; row < batch.getRowCount(); row++) { + Object[] rowValues = new Object[fieldNames.size()]; + for (int col = 0; col < fieldNames.size(); col++) { + rowValues[col] = batch.getFieldValue(fieldNames.get(col), row); + } + rows.add(rowValues); + } + } + } + } + } catch (Exception e) { + throw new RuntimeException("Execution failed for [" + plugin.name() + "]", e); + } + return rows; } static String extractTableName(RelNode node) { @@ -89,14 +114,11 @@ static String extractTableName(RelNode node) { } private IndexShard resolveShard(String indexName) { - IndexMetadata meta = clusterService.state().metadata().index(indexName); - if (meta == null) throw new IllegalArgumentException("Index [" + indexName + "] not found"); - IndexService indexService = indicesService.indexService(meta.getIndex()); + IndexService indexService = indicesService.indexService(clusterService.state().metadata().index(indexName).getIndex()); if (indexService == null) throw new IllegalStateException("Index [" + indexName + "] not on this node"); Set shardIds = indexService.shardIds(); if (shardIds.isEmpty()) throw new IllegalStateException("No shards for [" + indexName + "]"); - IndexShard shard = indexService.getShardOrNull(shardIds.iterator().next()); - return shard; + return indexService.getShardOrNull(shardIds.iterator().next()); } private AnalyticsSearchBackendPlugin selectBackEnd() { diff --git a/sandbox/plugins/analytics-engine/src/test/java/org/opensearch/analytics/engine/AnalyticsQueryServiceTests.java b/sandbox/plugins/analytics-engine/src/test/java/org/opensearch/analytics/engine/AnalyticsQueryServiceTests.java deleted file mode 100644 index 518bdfdc89a0f..0000000000000 --- a/sandbox/plugins/analytics-engine/src/test/java/org/opensearch/analytics/engine/AnalyticsQueryServiceTests.java +++ /dev/null @@ -1,391 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -package org.opensearch.analytics.engine; - -import org.apache.calcite.rel.RelNode; -import org.opensearch.action.search.SearchShardTask; -import org.opensearch.analytics.backend.EngineResultBatch; -import org.opensearch.analytics.backend.EngineResultBatchIterator; -import org.opensearch.analytics.backend.EngineResultStream; -import org.opensearch.analytics.backend.ExecutionContext; -import org.opensearch.analytics.backend.SearchExecEngine; -import org.opensearch.analytics.exec.AnalyticsQueryService; -import org.opensearch.analytics.plan.ResolvedPlan; -import org.opensearch.analytics.spi.AnalyticsSearchBackendPlugin; -import org.opensearch.index.engine.DataFormatAwareEngine; -import org.opensearch.index.engine.dataformat.DataFormat; -import org.opensearch.index.engine.dataformat.FieldTypeCapabilities; -import org.opensearch.index.engine.exec.CatalogSnapshot; -import org.opensearch.index.engine.exec.EngineReaderManager; -import org.opensearch.index.engine.exec.Segment; -import org.opensearch.index.engine.exec.WriterFileSet; -import org.opensearch.index.shard.IndexShard; -import org.opensearch.test.OpenSearchTestCase; - -import java.io.IOException; -import java.nio.file.Path; -import java.util.ArrayList; -import java.util.Collection; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Set; - -import static org.mockito.Mockito.mock; -import static org.mockito.Mockito.when; - -/** - * End-to-end tests for {@link AnalyticsQueryService} with mock - * {@link SearchExecEngine} and {@link AnalyticsSearchBackendPlugin}. - * Validates the write → refresh → catalog snapshot → acquire reader → execute path. - */ -public class AnalyticsQueryServiceTests extends OpenSearchTestCase { - - /** - * Full lifecycle: build catalog snapshot from writer file sets, wire up - * DataFormatAwareEngine, and execute via AnalyticsQueryService with a - * mock backend that returns rows from the reader. - */ - public void testEndToEndExecuteViaAnalyticsQueryService() throws IOException { - MockDataFormat format = new MockDataFormat(); - Path dir = createTempDir(); - - // Simulate two writer generations producing file sets - WriterFileSet fs1 = WriterFileSet.builder().directory(dir).writerGeneration(1L).addFile("data_gen1.parquet").addNumRows(2).build(); - WriterFileSet fs2 = WriterFileSet.builder().directory(dir).writerGeneration(2L).addFile("data_gen2.parquet").addNumRows(1).build(); - - // Build segments and catalog snapshot - Segment seg1 = Segment.builder(0L).addSearchableFiles(format, fs1).build(); - Segment seg2 = Segment.builder(1L).addSearchableFiles(format, fs2).build(); - MockCatalogSnapshot snapshot = new MockCatalogSnapshot(1L, List.of(seg1, seg2), format); - - // Wire reader manager and DataFormatAwareEngine - MockReaderManager readerManager = new MockReaderManager(format.name()); - readerManager.afterRefresh(true, snapshot); - - DataFormatAwareEngine engine = new DataFormatAwareEngine(Map.of(format, readerManager)); - engine.setLatestSnapshot(snapshot); - - // Mock IndexShard to return our engine - IndexShard shard = mock(IndexShard.class); - when(shard.getCompositeEngine()).thenReturn(engine); - - // Create mock backend plugin that returns rows based on reader content - MockBackendPlugin backendPlugin = new MockBackendPlugin(format); - AnalyticsQueryService service = new AnalyticsQueryService(Map.of("mock-backend", backendPlugin)); - - // Build a resolved plan targeting our mock backend - RelNode mockRoot = mock(RelNode.class); - when(mockRoot.getTable()).thenReturn(null); - ResolvedPlan plan = new ResolvedPlan(mockRoot, "mock-backend", Map.of()); - - Iterable results = service.execute(plan, shard, mock(SearchShardTask.class)); - List rows = new ArrayList<>(); - results.forEach(rows::add); - - // Mock engine returns 3 total rows (2 from gen1 + 1 from gen2) - assertEquals(3, rows.size()); - assertEquals(0, service.getActiveContextCount()); - } - - /** - * Verifies context tracking: contexts are registered during execution - * and cleaned up after completion. - */ - public void testContextTrackingLifecycle() { - AnalyticsQueryService service = new AnalyticsQueryService(Map.of()); - ExecutionContext ctx = new ExecutionContext(null, "test-table", null, null); - - long id = service.putContext(ctx); - assertEquals(1, service.getActiveContextCount()); - assertSame(ctx, service.getContext(id)); - - ExecutionContext removed = service.removeContext(id); - assertSame(ctx, removed); - assertEquals(0, service.getActiveContextCount()); - assertNull(service.getContext(id)); - } - - /** - * Verifies that execute throws when no backend plugin is registered - * for the plan's primary backend. - */ - public void testExecuteThrowsForUnknownBackend() { - MockDataFormat format = new MockDataFormat(); - Path dir = createTempDir(); - WriterFileSet fs = WriterFileSet.builder().directory(dir).writerGeneration(1L).addFile("f.parquet").addNumRows(1).build(); - Segment seg = Segment.builder(0L).addSearchableFiles(format, fs).build(); - MockCatalogSnapshot snapshot = new MockCatalogSnapshot(1L, List.of(seg), format); - - MockReaderManager rm = new MockReaderManager(format.name()); - rm.afterRefresh(true, snapshot); - DataFormatAwareEngine engine = new DataFormatAwareEngine(Map.of(format, rm)); - engine.setLatestSnapshot(snapshot); - - IndexShard shard = mock(IndexShard.class); - when(shard.getCompositeEngine()).thenReturn(engine); - - AnalyticsQueryService service = new AnalyticsQueryService(Map.of()); - ResolvedPlan plan = new ResolvedPlan(null, "nonexistent", Map.of()); - - IllegalStateException ex = expectThrows(IllegalStateException.class, () -> service.execute(plan, shard, null)); - assertTrue(ex.getMessage().contains("No plugin registered for backend")); - } - - /** - * Verifies that execute throws when shard has no composite engine. - */ - public void testExecuteThrowsWhenNoCompositeEngine() { - IndexShard shard = mock(IndexShard.class); - when(shard.getCompositeEngine()).thenReturn(null); - when(shard.shardId()).thenReturn(new org.opensearch.core.index.shard.ShardId("idx", "uuid", 0)); - - AnalyticsQueryService service = new AnalyticsQueryService(Map.of("be", mock(AnalyticsSearchBackendPlugin.class))); - ResolvedPlan plan = new ResolvedPlan(null, "be", Map.of()); - - IllegalStateException ex = expectThrows(IllegalStateException.class, () -> service.execute(plan, shard, null)); - assertTrue(ex.getMessage().contains("No CompositeEngine on shard")); - } - - // --- Mock implementations --- - - static class MockDataFormat implements DataFormat { - @Override - public String name() { - return "mock-columnar"; - } - - @Override - public long priority() { - return 100L; - } - - @Override - public Set supportedFields() { - return Set.of( - new FieldTypeCapabilities( - "integer", - Set.of(FieldTypeCapabilities.Capability.COLUMNAR_STORAGE, FieldTypeCapabilities.Capability.STORED_FIELDS) - ) - ); - } - } - - static class MockReaderManager implements EngineReaderManager { - private final String formatName; - private final Map readers = new HashMap<>(); - - MockReaderManager(String formatName) { - this.formatName = formatName; - } - - @Override - public Object getReader(CatalogSnapshot snapshot) { - return readers.get(snapshot); - } - - @Override - public void beforeRefresh() {} - - @Override - public void afterRefresh(boolean didRefresh, CatalogSnapshot snapshot) { - if (didRefresh == false || readers.containsKey(snapshot)) return; - Collection files = snapshot.getSearchableFiles(formatName); - long totalRows = 0; - for (WriterFileSet wfs : files) { - totalRows += wfs.numRows(); - } - readers.put(snapshot, totalRows); - } - - @Override - public void onDeleted(CatalogSnapshot snapshot) { - readers.remove(snapshot); - } - - @Override - public void onFilesDeleted(Collection files) {} - - @Override - public void onFilesAdded(Collection files) {} - } - - static class MockCatalogSnapshot extends CatalogSnapshot { - private final List segments; - private final MockDataFormat format; - - MockCatalogSnapshot(long generation, List segments, MockDataFormat format) { - super("mock-snapshot", generation, 1L); - this.segments = segments; - this.format = format; - } - - @Override - public Map getUserData() { - return Map.of(); - } - - @Override - public long getId() { - return generation; - } - - @Override - public List getSegments() { - return segments; - } - - @Override - public Collection getSearchableFiles(String dataFormat) { - List result = new ArrayList<>(); - for (Segment seg : segments) { - WriterFileSet wfs = seg.dfGroupedSearchableFiles().get(dataFormat); - if (wfs != null) result.add(wfs); - } - return result; - } - - @Override - public Set getDataFormats() { - return Set.of(format.name()); - } - - @Override - public long getLastWriterGeneration() { - return generation; - } - - @Override - public String serializeToString() { - return "mock-snapshot-" + generation; - } - - @Override - public void setCatalogSnapshotMap(Map map) {} - - @Override - public void setUserData(Map userData, boolean b) {} - - @Override - public Object getReader(DataFormat dataFormat) { - return null; - } - - @Override - protected void closeInternal() {} - } - - /** - * Mock SearchExecEngine that produces rows based on a fixed count - * provided at construction time. - */ - static class MockSearchExecEngine implements SearchExecEngine { - private final long totalRows; - - MockSearchExecEngine(long totalRows) { - this.totalRows = totalRows; - } - - @Override - public void prepare(ExecutionContext context) {} - - @Override - public EngineResultStream execute(ExecutionContext context) { - return new MockResultStream(totalRows); - } - - @Override - public void close() {} - } - - static class MockResultStream implements EngineResultStream { - private final long rowCount; - - MockResultStream(long rowCount) { - this.rowCount = rowCount; - } - - @Override - public EngineResultBatchIterator iterator() { - return new MockBatchIterator(rowCount); - } - - @Override - public void close() {} - } - - static class MockBatchIterator implements EngineResultBatchIterator { - private final long rowCount; - private boolean consumed; - - MockBatchIterator(long rowCount) { - this.rowCount = rowCount; - } - - @Override - public boolean hasNext() { - return consumed == false; - } - - @Override - public EngineResultBatch next() { - consumed = true; - return new MockResultBatch((int) rowCount); - } - } - - static class MockResultBatch implements EngineResultBatch { - private final int rowCount; - - MockResultBatch(int rowCount) { - this.rowCount = rowCount; - } - - @Override - public List getFieldNames() { - return List.of("value"); - } - - @Override - public int getRowCount() { - return rowCount; - } - - @Override - public Object getFieldValue(String fieldName, int rowIndex) { - return "row_" + rowIndex; - } - } - - static class MockBackendPlugin implements AnalyticsSearchBackendPlugin { - private final DataFormat format; - - MockBackendPlugin(DataFormat format) { - this.format = format; - } - - @Override - public String name() { - return "mock-backend"; - } - - @Override - public SearchExecEngine searcher(ExecutionContext ctx) { - // Reader manager stores totalRows (Long) as the reader object - Object reader = ctx.getReader().getReader(format); - long rows = reader instanceof Long ? (Long) reader : 0L; - return new MockSearchExecEngine(rows); - } - - @Override - public List getSupportedFormats() { - return List.of(format); - } - } -} diff --git a/sandbox/plugins/analytics-engine/src/test/java/org/opensearch/analytics/exec/DefaultPlanExecutorTests.java b/sandbox/plugins/analytics-engine/src/test/java/org/opensearch/analytics/exec/DefaultPlanExecutorTests.java index 3fe866a55dac0..e8ce3f23b6ca1 100644 --- a/sandbox/plugins/analytics-engine/src/test/java/org/opensearch/analytics/exec/DefaultPlanExecutorTests.java +++ b/sandbox/plugins/analytics-engine/src/test/java/org/opensearch/analytics/exec/DefaultPlanExecutorTests.java @@ -21,9 +21,37 @@ import org.apache.calcite.rel.type.RelDataTypeFactory; import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.sql.type.SqlTypeName; +import org.opensearch.analytics.backend.EngineResultBatch; +import org.opensearch.analytics.backend.EngineResultBatchIterator; +import org.opensearch.analytics.backend.EngineResultStream; +import org.opensearch.analytics.backend.ExecutionContext; +import org.opensearch.analytics.backend.SearchExecEngine; +import org.opensearch.analytics.spi.AnalyticsSearchBackendPlugin; +import org.opensearch.cluster.ClusterState; +import org.opensearch.cluster.metadata.IndexMetadata; +import org.opensearch.cluster.metadata.Metadata; +import org.opensearch.cluster.service.ClusterService; +import org.opensearch.core.index.Index; +import org.opensearch.index.IndexService; +import org.opensearch.index.engine.DataFormatAwareEngine; +import org.opensearch.index.engine.dataformat.DataFormat; +import org.opensearch.index.engine.dataformat.FieldTypeCapabilities; +import org.opensearch.index.engine.exec.CatalogSnapshot; +import org.opensearch.index.engine.exec.EngineReaderManager; +import org.opensearch.index.engine.exec.Segment; +import org.opensearch.index.engine.exec.WriterFileSet; +import org.opensearch.index.shard.IndexShard; +import org.opensearch.indices.IndicesService; import org.opensearch.test.OpenSearchTestCase; +import java.io.IOException; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; import java.util.List; +import java.util.Map; +import java.util.Set; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.when; @@ -49,11 +77,7 @@ public void setUp() throws Exception { * extractTableName returns the table name from a TableScan node. */ public void testExtractTableNameFromTableScan() { - RelDataType rowType = buildRowType(3); - RelOptTable table = mock(RelOptTable.class); - when(table.getQualifiedName()).thenReturn(List.of("schema", "my_index")); - when(table.getRowType()).thenReturn(rowType); - + RelOptTable table = mockTable("schema", "my_index"); TableScan scan = new StubTableScan(cluster, cluster.traitSet(), table); assertEquals("my_index", DefaultPlanExecutor.extractTableName(scan)); } @@ -68,18 +92,68 @@ public void testExtractTableNameThrowsForNonTableScan() { } /** - * execute() throws NPE when clusterService is null (current TODO state). + * End-to-end: write file sets → catalog snapshot → DataFormatAwareEngine → + * DefaultPlanExecutor.execute() with mock backend returns rows via EngineResultStream. */ - public void testExecuteThrowsWhenClusterServiceNull() { - DefaultPlanExecutor executor = new DefaultPlanExecutor(List.of(), null, null); + public void testEndToEndExecuteWithMockBackend() throws IOException { + MockDataFormat format = new MockDataFormat(); + Path dir = createTempDir(); - RelDataType rowType = buildRowType(1); - RelOptTable table = mock(RelOptTable.class); - when(table.getQualifiedName()).thenReturn(List.of("my_index")); - when(table.getRowType()).thenReturn(rowType); + WriterFileSet fs1 = WriterFileSet.builder().directory(dir).writerGeneration(1L).addFile("gen1.parquet").addNumRows(2).build(); + WriterFileSet fs2 = WriterFileSet.builder().directory(dir).writerGeneration(2L).addFile("gen2.parquet").addNumRows(1).build(); + + Segment seg1 = Segment.builder(0L).addSearchableFiles(format, fs1).build(); + Segment seg2 = Segment.builder(1L).addSearchableFiles(format, fs2).build(); + MockCatalogSnapshot snapshot = new MockCatalogSnapshot(1L, List.of(seg1, seg2), format); + + MockReaderManager readerManager = new MockReaderManager(format.name()); + readerManager.afterRefresh(true, snapshot); + + DataFormatAwareEngine engine = new DataFormatAwareEngine(Map.of(format, readerManager)); + engine.setLatestSnapshot(snapshot); + + // Mock shard + cluster wiring + IndexShard shard = mock(IndexShard.class); + when(shard.getCompositeEngine()).thenReturn(engine); + + Index index = new Index("my_index", "uuid"); + IndexMetadata indexMetadata = mock(IndexMetadata.class); + when(indexMetadata.getIndex()).thenReturn(index); + + Metadata metadata = mock(Metadata.class); + when(metadata.index("my_index")).thenReturn(indexMetadata); + + ClusterState clusterState = mock(ClusterState.class); + when(clusterState.metadata()).thenReturn(metadata); + + ClusterService clusterService = mock(ClusterService.class); + when(clusterService.state()).thenReturn(clusterState); + + IndexService indexService = mock(IndexService.class); + when(indexService.shardIds()).thenReturn(Set.of(0)); + when(indexService.getShardOrNull(0)).thenReturn(shard); + + IndicesService indicesService = mock(IndicesService.class); + when(indicesService.indexService(index)).thenReturn(indexService); + + MockBackendPlugin backendPlugin = new MockBackendPlugin(format); + DefaultPlanExecutor executor = new DefaultPlanExecutor(List.of(backendPlugin), indicesService, clusterService); + + RelOptTable table = mockTable("my_index"); TableScan scan = new StubTableScan(cluster, cluster.traitSet(), table); - expectThrows(NullPointerException.class, () -> executor.execute(scan, new Object())); + Iterable results = executor.execute(scan, new Object()); + List rows = new ArrayList<>(); + results.forEach(rows::add); + + assertEquals(3, rows.size()); + } + + private RelOptTable mockTable(String... qualifiedName) { + RelOptTable table = mock(RelOptTable.class); + when(table.getQualifiedName()).thenReturn(List.of(qualifiedName)); + when(table.getRowType()).thenReturn(buildRowType(1)); + return table; } private RelDataType buildRowType(int fieldCount) { @@ -102,4 +176,232 @@ private static class StubTableScan extends TableScan { super(cluster, traitSet, List.of(), table); } } + + static class MockDataFormat implements DataFormat { + @Override + public String name() { + return "mock-columnar"; + } + + @Override + public long priority() { + return 100L; + } + + @Override + public Set supportedFields() { + return Set.of( + new FieldTypeCapabilities( + "integer", + Set.of(FieldTypeCapabilities.Capability.COLUMNAR_STORAGE, FieldTypeCapabilities.Capability.STORED_FIELDS) + ) + ); + } + } + + static class MockReaderManager implements EngineReaderManager { + private final String formatName; + private final Map readers = new HashMap<>(); + + MockReaderManager(String formatName) { + this.formatName = formatName; + } + + @Override + public Object getReader(CatalogSnapshot snapshot) { + return readers.get(snapshot); + } + + @Override + public void beforeRefresh() {} + + @Override + public void afterRefresh(boolean didRefresh, CatalogSnapshot snapshot) { + if (didRefresh == false || readers.containsKey(snapshot)) return; + long totalRows = 0; + for (WriterFileSet wfs : snapshot.getSearchableFiles(formatName)) { + totalRows += wfs.numRows(); + } + readers.put(snapshot, totalRows); + } + + @Override + public void onDeleted(CatalogSnapshot snapshot) { + readers.remove(snapshot); + } + + @Override + public void onFilesDeleted(Collection files) {} + + @Override + public void onFilesAdded(Collection files) {} + } + + static class MockCatalogSnapshot extends CatalogSnapshot { + private final List segments; + private final DataFormat format; + + MockCatalogSnapshot(long generation, List segments, DataFormat format) { + super("mock-snapshot", generation, 1L); + this.segments = segments; + this.format = format; + } + + @Override + public Map getUserData() { + return Map.of(); + } + + @Override + public long getId() { + return generation; + } + + @Override + public List getSegments() { + return segments; + } + + @Override + public Collection getSearchableFiles(String dataFormat) { + List result = new ArrayList<>(); + for (Segment seg : segments) { + WriterFileSet wfs = seg.dfGroupedSearchableFiles().get(dataFormat); + if (wfs != null) result.add(wfs); + } + return result; + } + + @Override + public Set getDataFormats() { + return Set.of(format.name()); + } + + @Override + public long getLastWriterGeneration() { + return generation; + } + + @Override + public String serializeToString() { + return "mock-snapshot-" + generation; + } + + @Override + public void setCatalogSnapshotMap(Map map) {} + + @Override + public void setUserData(Map userData, boolean b) {} + + @Override + public Object getReader(DataFormat dataFormat) { + return null; + } + + @Override + protected void closeInternal() {} + } + + static class MockSearchExecEngine implements SearchExecEngine { + private final long totalRows; + + MockSearchExecEngine(long totalRows) { + this.totalRows = totalRows; + } + + @Override + public void prepare(ExecutionContext context) {} + + @Override + public EngineResultStream execute(ExecutionContext context) { + return new MockResultStream(totalRows); + } + + @Override + public void close() {} + } + + static class MockResultStream implements EngineResultStream { + private final long rowCount; + + MockResultStream(long rowCount) { + this.rowCount = rowCount; + } + + @Override + public EngineResultBatchIterator iterator() { + return new MockBatchIterator(rowCount); + } + + @Override + public void close() {} + } + + static class MockBatchIterator implements EngineResultBatchIterator { + private final long rowCount; + private boolean consumed; + + MockBatchIterator(long rowCount) { + this.rowCount = rowCount; + } + + @Override + public boolean hasNext() { + return consumed == false; + } + + @Override + public EngineResultBatch next() { + consumed = true; + return new MockResultBatch((int) rowCount); + } + } + + static class MockResultBatch implements EngineResultBatch { + private final int rowCount; + + MockResultBatch(int rowCount) { + this.rowCount = rowCount; + } + + @Override + public List getFieldNames() { + return List.of("value"); + } + + @Override + public int getRowCount() { + return rowCount; + } + + @Override + public Object getFieldValue(String fieldName, int rowIndex) { + return "row_" + rowIndex; + } + } + + static class MockBackendPlugin implements AnalyticsSearchBackendPlugin { + private final DataFormat format; + + MockBackendPlugin(DataFormat format) { + this.format = format; + } + + @Override + public String name() { + return "mock-backend"; + } + + @Override + public SearchExecEngine searcher(ExecutionContext ctx) { + Object reader = ctx.getReader().getReader(format); + long rows = reader instanceof Long ? (Long) reader : 0L; + return new MockSearchExecEngine(rows); + } + + @Override + public List getSupportedFormats() { + return List.of(format); + } + } } diff --git a/server/src/main/java/org/opensearch/index/IndexModule.java b/server/src/main/java/org/opensearch/index/IndexModule.java index ce5c697ea05bb..c6da5c1b00c8f 100644 --- a/server/src/main/java/org/opensearch/index/IndexModule.java +++ b/server/src/main/java/org/opensearch/index/IndexModule.java @@ -814,7 +814,7 @@ public IndexService newIndexService( MapperService, IndexSettings, DataFormatAwareEngineFactory, - IOException> compositeEngineFactorySupplier + IOException> dataFormatAwareEngineFactorySupplier ) throws IOException { final IndexEventListener eventListener = freeze(); Function> readerWrapperFactory = indexReaderWrapper @@ -888,7 +888,7 @@ public IndexService newIndexService( segmentReplicationStatsProvider, clusterDefaultMaxMergeAtOnceSupplier, clusterMergeSchedulerConfig, - compositeEngineFactorySupplier + dataFormatAwareEngineFactorySupplier ); success = true; return indexService; diff --git a/server/src/main/java/org/opensearch/index/IndexService.java b/server/src/main/java/org/opensearch/index/IndexService.java index 258d330acfef7..79ccb429f9b7b 100644 --- a/server/src/main/java/org/opensearch/index/IndexService.java +++ b/server/src/main/java/org/opensearch/index/IndexService.java @@ -216,7 +216,7 @@ public class IndexService extends AbstractIndexComponent implements IndicesClust MapperService, IndexSettings, DataFormatAwareEngineFactory, - IOException> compositeEngineFactorySupplier; + IOException> dataFormatAwareEngineFactorySupplier; @InternalApi public IndexService( @@ -269,7 +269,7 @@ public IndexService( MapperService, IndexSettings, DataFormatAwareEngineFactory, - IOException> compositeEngineFactorySupplier + IOException> dataFormatAwareEngineFactorySupplier ) { super(indexSettings); this.storeFactory = storeFactory; @@ -380,7 +380,7 @@ public IndexService( startIndexLevelRefreshTask(); } } - this.compositeEngineFactorySupplier = compositeEngineFactorySupplier; + this.dataFormatAwareEngineFactorySupplier = dataFormatAwareEngineFactorySupplier; } @InternalApi @@ -791,8 +791,8 @@ protected void closeInternal() { directoryFactory ); eventListener.onStoreCreated(shardId); - DataFormatAwareEngineFactory dataFormatAwareEngineFactory = compositeEngineFactorySupplier != null - ? compositeEngineFactorySupplier.apply(path, mapperService, this.indexSettings) + DataFormatAwareEngineFactory dataFormatAwareEngineFactory = dataFormatAwareEngineFactorySupplier != null + ? dataFormatAwareEngineFactorySupplier.apply(path, mapperService, this.indexSettings) : null; indexShard = new IndexShard( routing, diff --git a/server/src/main/java/org/opensearch/index/engine/DataFormatAwareEngine.java b/server/src/main/java/org/opensearch/index/engine/DataFormatAwareEngine.java index 1a2ccd39f1540..58cdc4c8d3d2e 100644 --- a/server/src/main/java/org/opensearch/index/engine/DataFormatAwareEngine.java +++ b/server/src/main/java/org/opensearch/index/engine/DataFormatAwareEngine.java @@ -36,7 +36,7 @@ public class DataFormatAwareEngine implements Closeable { private volatile CatalogSnapshot latestSnapshot; /** - * Constructs a new CompositeEngine with pre-built maps. + * Constructs a new DataFormatAwareEngine with pre-built maps. * Prefer using {@link DataFormatAwareEngineFactory#create()}. */ public DataFormatAwareEngine(Map> readerManagers) { @@ -73,7 +73,7 @@ public DataFormatAwareReader acquireReader() throws IOException { } /** - * Acquires a composite reader on a specific catalog snapshot. + * Acquires a dataFormatAwareReader on a specific catalog snapshot. */ public DataFormatAwareReader acquireReader(CatalogSnapshot catalogSnapshot) throws IOException { catalogSnapshot.incRef(); @@ -133,7 +133,7 @@ public void close() throws IOException { } } if (exceptions.isEmpty() == false) { - IOException ioException = new IOException("Failed to close CompositeEngine resources"); + IOException ioException = new IOException("Failed to close DataFormatAwareEngine resources"); for (Exception e : exceptions) { ioException.addSuppressed(e); } diff --git a/server/src/main/java/org/opensearch/index/engine/exec/DataFormatAwareEngineFactory.java b/server/src/main/java/org/opensearch/index/engine/exec/DataFormatAwareEngineFactory.java index 85837f3f54fb4..d4c69da30a652 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/DataFormatAwareEngineFactory.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/DataFormatAwareEngineFactory.java @@ -8,8 +8,6 @@ package org.opensearch.index.engine.exec; -import org.opensearch.common.CheckedFunction; -import org.opensearch.common.CheckedSupplier; import org.opensearch.common.annotation.ExperimentalApi; import org.opensearch.index.IndexSettings; import org.opensearch.index.engine.DataFormatAwareEngine; @@ -25,8 +23,7 @@ /** * Factory that discovers {@link SearchBackEndPlugin}s via - * {@link PluginsService} and builds the per-format reader managers and - * memoizing suppliers consumed by {@link DataFormatAwareEngine}. + * {@link PluginsService} and builds the per-format reader managers consumed by {@link DataFormatAwareEngine}. *

* This keeps DataformatAwareEngine decoupled from the plugin layer. * @@ -53,33 +50,6 @@ public DataFormatAwareEngineFactory( this.indexFileDeleter = new IndexFileDeleter(null, shardPath); } - /** - * Wraps a {@link CheckedFunction} factory into a thread-safe memoizing supplier - * using double-checked locking. The factory is invoked at most once. - */ - private static CheckedSupplier memoize(DataFormat format, CheckedFunction factory) { - return new CheckedSupplier<>() { - private volatile T instance; - - @Override - public T get() throws IOException { - T result = instance; - if (result != null) { - return result; - } - synchronized (this) { - result = instance; - if (result != null) { - return result; - } - result = factory.apply(format); - instance = result; - return result; - } - } - }; - } - /** * Creates a new {@link DataFormatAwareEngine} populated with the discovered * reader managers and memoizing suppliers. diff --git a/server/src/main/java/org/opensearch/index/engine/exec/FilesListener.java b/server/src/main/java/org/opensearch/index/engine/exec/FilesListener.java index 7c6b69acbe9cf..9a8ccbe35e082 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/FilesListener.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/FilesListener.java @@ -13,6 +13,9 @@ import java.io.IOException; import java.util.Collection; +/** + * Listener for lifecycle of files + */ @ExperimentalApi public interface FilesListener { void onFilesDeleted(Collection files) throws IOException; diff --git a/server/src/main/java/org/opensearch/index/engine/exec/IndexFilterContext.java b/server/src/main/java/org/opensearch/index/engine/exec/IndexFilterContext.java index 415cecec55129..36df32ca57ec1 100644 --- a/server/src/main/java/org/opensearch/index/engine/exec/IndexFilterContext.java +++ b/server/src/main/java/org/opensearch/index/engine/exec/IndexFilterContext.java @@ -13,6 +13,8 @@ import java.io.Closeable; /** + * Context to maintain state for index filters + * * @opensearch.experimental */ @ExperimentalApi diff --git a/server/src/main/java/org/opensearch/indices/IndicesService.java b/server/src/main/java/org/opensearch/indices/IndicesService.java index 5bd14d499dc6d..ad33e3811f273 100644 --- a/server/src/main/java/org/opensearch/indices/IndicesService.java +++ b/server/src/main/java/org/opensearch/indices/IndicesService.java @@ -432,7 +432,7 @@ public class IndicesService extends AbstractLifecycleComponent MapperService, IndexSettings, DataFormatAwareEngineFactory, - IOException> compositeEngineFactorySupplier; + IOException> dataFormatEngineFactorySupplier; @Override protected void doStart() { @@ -618,7 +618,7 @@ protected void closeInternal() { MergeSchedulerConfig.CLUSTER_MAX_FORCE_MERGE_MB_PER_SEC_SETTING, this::onClusterLevelForceMergeMBPerSecUpdate ); - this.compositeEngineFactorySupplier = (shardPath, mapperService, indexSettings) -> new DataFormatAwareEngineFactory( + this.dataFormatEngineFactorySupplier = (shardPath, mapperService, indexSettings) -> new DataFormatAwareEngineFactory( pluginsService, shardPath, mapperService, @@ -1153,7 +1153,7 @@ private synchronized IndexService createIndexService( segmentReplicationStatsProvider, this::getClusterDefaultMaxMergeAtOnce, clusterMergeSchedulerConfig, - compositeEngineFactorySupplier + dataFormatEngineFactorySupplier ); } From bfa6e1878e191e167b66efff8d6e4f921851dc35 Mon Sep 17 00:00:00 2001 From: bharath-techie Date: Wed, 25 Mar 2026 00:59:43 +0530 Subject: [PATCH 10/11] fixing gradle run Signed-off-by: bharath-techie --- .../plugins/analytics-backend-datafusion/build.gradle | 9 ++++----- .../org/opensearch/be/datafusion/DataFusionPlugin.java | 9 +++------ .../org/opensearch/be/datafusion/DataFusionService.java | 3 ++- 3 files changed, 9 insertions(+), 12 deletions(-) diff --git a/sandbox/plugins/analytics-backend-datafusion/build.gradle b/sandbox/plugins/analytics-backend-datafusion/build.gradle index 89929e691d7c9..0159c92127bc8 100644 --- a/sandbox/plugins/analytics-backend-datafusion/build.gradle +++ b/sandbox/plugins/analytics-backend-datafusion/build.gradle @@ -13,12 +13,11 @@ opensearchplugin { } dependencies { - // Shared types and SPI interfaces (EngineBridge, AnalyticsBackEndPlugin, etc.) - // Also provides calcite-core transitively via api. - api project(':sandbox:libs:analytics-framework') + // Provided at runtime by the parent analytics-engine plugin; compile-only to avoid jar hell. + compileOnly project(':sandbox:libs:analytics-framework') - implementation "org.apache.logging.log4j:log4j-api:${versions.log4j}" - implementation "org.apache.logging.log4j:log4j-core:${versions.log4j}" + compileOnly "org.apache.logging.log4j:log4j-api:${versions.log4j}" + compileOnly "org.apache.logging.log4j:log4j-core:${versions.log4j}" } // TODO: Remove once back-end is built out with test suite diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionPlugin.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionPlugin.java index f1b379b7c8433..678cd23813052 100644 --- a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionPlugin.java +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionPlugin.java @@ -65,16 +65,12 @@ public class DataFusionPlugin extends Plugin implements SearchBackEndPlugin, Ana Setting.Property.NodeScope ); - private final Settings settings; private volatile DataFusionService dataFusionService; /** - * Creates the DataFusion plugin with the given node settings. - * @param settings the node-level settings + * Creates the DataFusion plugin. */ - public DataFusionPlugin(Settings settings) { - this.settings = settings; - } + public DataFusionPlugin() {} @Override public Collection createComponents( @@ -90,6 +86,7 @@ public Collection createComponents( IndexNameExpressionResolver indexNameExpressionResolver, Supplier repositoriesServiceSupplier ) { + Settings settings = environment.settings(); long memoryPoolLimit = DATAFUSION_MEMORY_POOL_LIMIT.get(settings); long spillMemoryLimit = DATAFUSION_SPILL_MEMORY_LIMIT.get(settings); String spillDir = environment.dataFiles()[0].getParent().resolve("tmp").toAbsolutePath().toString(); diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionService.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionService.java index 2cf1811a8b436..62271d88d1c57 100644 --- a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionService.java +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionService.java @@ -52,7 +52,8 @@ protected void doStart() { try { System.loadLibrary(NATIVE_LIBRARY_NAME); } catch (UnsatisfiedLinkError e) { - throw new IllegalStateException("Failed to load native library: " + NATIVE_LIBRARY_NAME, e); + logger.warn("Native library [{}] not found — DataFusion backend will be unavailable", NATIVE_LIBRARY_NAME); + return; } // TODO: initialize Tokio runtime and memory pool via NativeBridge From 17d75465c34b6cadef997311a9906da003f0b92c Mon Sep 17 00:00:00 2001 From: bharath-techie Date: Wed, 25 Mar 2026 19:09:22 +0530 Subject: [PATCH 11/11] addressing comments and fixing build failures Signed-off-by: bharath-techie --- .../opensearch/common/CheckedTriFunction.java | 4 +- sandbox/libs/analytics-framework/build.gradle | 100 +----------------- .../backend/EngineResultBatchIterator.java | 18 ---- .../analytics/backend/EngineResultStream.java | 4 +- .../analytics/backend/ExecutionContext.java | 2 +- .../analytics/backend/SearchExecEngine.java | 7 +- .../spi/AnalyticsSearchBackendPlugin.java | 14 +-- .../analytics-backend-datafusion/README.md | 54 ++++++++-- .../be/datafusion/DataFusionPlugin.java | 3 +- .../be/datafusion/DatafusionResultStream.java | 6 +- .../DatafusionSearchExecEngine.java | 2 +- .../analytics-backend-lucene/build.gradle | 3 + ...analytics.spi.AnalyticsSearchBackendPlugin | 1 - .../analytics/exec/DefaultPlanExecutor.java | 18 ++-- .../exec/DefaultPlanExecutorTests.java | 10 +- 15 files changed, 84 insertions(+), 162 deletions(-) delete mode 100644 sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/EngineResultBatchIterator.java delete mode 100644 sandbox/plugins/analytics-backend-lucene/src/main/resources/META-INF/services/org.opensearch.analytics.spi.AnalyticsSearchBackendPlugin diff --git a/libs/common/src/main/java/org/opensearch/common/CheckedTriFunction.java b/libs/common/src/main/java/org/opensearch/common/CheckedTriFunction.java index 53d2adb3951b8..33141b65fc2b0 100644 --- a/libs/common/src/main/java/org/opensearch/common/CheckedTriFunction.java +++ b/libs/common/src/main/java/org/opensearch/common/CheckedTriFunction.java @@ -8,14 +8,14 @@ package org.opensearch.common; -import org.opensearch.common.annotation.ExperimentalApi; +import org.opensearch.common.annotation.PublicApi; /** * A {@link TriFunction}-like interface which allows throwing checked exceptions. * * @opensearch.internal */ -@ExperimentalApi +@PublicApi(since = "2.9.0") @FunctionalInterface public interface CheckedTriFunction { R apply(S s, T t, U u) throws E; diff --git a/sandbox/libs/analytics-framework/build.gradle b/sandbox/libs/analytics-framework/build.gradle index 8748528a48dce..ad9f9c77ce36a 100644 --- a/sandbox/libs/analytics-framework/build.gradle +++ b/sandbox/libs/analytics-framework/build.gradle @@ -48,60 +48,11 @@ tasks.named('forbiddenApisMain').configure { // Split into multiple calls to stay under the JVM method parameter limit. tasks.named('thirdPartyAudit').configure { ignoreMissingClasses( - // Jackson (optional JSON serialization in Calcite) - 'com.fasterxml.jackson.core.JsonParser$Feature', - 'com.fasterxml.jackson.core.PrettyPrinter', - 'com.fasterxml.jackson.core.type.TypeReference', - 'com.fasterxml.jackson.core.util.DefaultIndenter', - 'com.fasterxml.jackson.core.util.DefaultPrettyPrinter', - 'com.fasterxml.jackson.core.util.Separators', - 'com.fasterxml.jackson.core.util.Separators$Spacing', + // Jackson databind (optional JSON serialization in Calcite — only databind is missing, core is on classpath) 'com.fasterxml.jackson.databind.DeserializationFeature', 'com.fasterxml.jackson.databind.ObjectMapper', 'com.fasterxml.jackson.databind.ObjectWriter', - // Protobuf (Avatica RPC serialization, not used) - 'com.google.protobuf.AbstractMessageLite$Builder', - 'com.google.protobuf.AbstractParser', - 'com.google.protobuf.ByteString', - 'com.google.protobuf.CodedInputStream', - 'com.google.protobuf.CodedOutputStream', - 'com.google.protobuf.Descriptors$Descriptor', - 'com.google.protobuf.Descriptors$EnumDescriptor', - 'com.google.protobuf.Descriptors$EnumValueDescriptor', - 'com.google.protobuf.Descriptors$FieldDescriptor', - 'com.google.protobuf.Descriptors$FileDescriptor', - 'com.google.protobuf.Descriptors$OneofDescriptor', - 'com.google.protobuf.ExtensionRegistry', - 'com.google.protobuf.ExtensionRegistryLite', - 'com.google.protobuf.GeneratedMessageV3', - 'com.google.protobuf.GeneratedMessageV3$Builder', - 'com.google.protobuf.GeneratedMessageV3$BuilderParent', - 'com.google.protobuf.GeneratedMessageV3$FieldAccessorTable', - 'com.google.protobuf.GeneratedMessageV3$UnusedPrivateParameter', - 'com.google.protobuf.Internal', - 'com.google.protobuf.Internal$EnumLiteMap', - 'com.google.protobuf.Internal$IntList', - 'com.google.protobuf.Internal$LongList', - 'com.google.protobuf.InvalidProtocolBufferException', - 'com.google.protobuf.LazyStringArrayList', - 'com.google.protobuf.MapEntry', - 'com.google.protobuf.MapEntry$Builder', - 'com.google.protobuf.MapField', - 'com.google.protobuf.MapFieldReflectionAccessor', - 'com.google.protobuf.Message', - 'com.google.protobuf.MessageOrBuilder', - 'com.google.protobuf.Parser', - 'com.google.protobuf.ProtocolMessageEnum', - 'com.google.protobuf.ProtocolStringList', - 'com.google.protobuf.RepeatedFieldBuilderV3', - 'com.google.protobuf.SingleFieldBuilderV3', - 'com.google.protobuf.TextFormat', - 'com.google.protobuf.UninitializedMessageException', - 'com.google.protobuf.UnknownFieldSet', - 'com.google.protobuf.UnsafeByteOperations', - 'com.google.protobuf.WireFormat$FieldType', - // Uzaygezen (optional Hilbert curve spatial indexing) 'com.google.uzaygezen.core.BacktrackingQueryBuilder', 'com.google.uzaygezen.core.BitVector', @@ -201,61 +152,14 @@ tasks.named('thirdPartyAudit').configure { 'org.joou.UShort', 'org.joou.Unsigned', - // JTS / Proj4j (optional spatial/geometry support) + // JTS / Proj4j (optional spatial/geometry support — classes still missing) 'org.locationtech.jts.algorithm.InteriorPoint', - 'org.locationtech.jts.algorithm.LineIntersector', - 'org.locationtech.jts.algorithm.MinimumBoundingCircle', - 'org.locationtech.jts.algorithm.MinimumDiameter', - 'org.locationtech.jts.densify.Densifier', - 'org.locationtech.jts.geom.Coordinate', - 'org.locationtech.jts.geom.CoordinateSequence', - 'org.locationtech.jts.geom.CoordinateSequenceFactory', - 'org.locationtech.jts.geom.Envelope', - 'org.locationtech.jts.geom.Geometry', - 'org.locationtech.jts.geom.GeometryCollection', - 'org.locationtech.jts.geom.GeometryFactory', - 'org.locationtech.jts.geom.GeometryFilter', - 'org.locationtech.jts.geom.IntersectionMatrix', - 'org.locationtech.jts.geom.LineSegment', - 'org.locationtech.jts.geom.LineString', - 'org.locationtech.jts.geom.LinearRing', - 'org.locationtech.jts.geom.MultiLineString', - 'org.locationtech.jts.geom.MultiPoint', - 'org.locationtech.jts.geom.MultiPolygon', - 'org.locationtech.jts.geom.OctagonalEnvelope', - 'org.locationtech.jts.geom.Point', - 'org.locationtech.jts.geom.Polygon', - 'org.locationtech.jts.geom.util.AffineTransformation', - 'org.locationtech.jts.geom.util.GeometryEditor', - 'org.locationtech.jts.geom.util.GeometryEditor$CoordinateOperation', 'org.locationtech.jts.geom.util.GeometryFixer', - 'org.locationtech.jts.geom.util.GeometryTransformer', - 'org.locationtech.jts.geom.util.LineStringExtracter', - 'org.locationtech.jts.io.WKBReader', - 'org.locationtech.jts.io.WKBWriter', - 'org.locationtech.jts.io.WKTReader', - 'org.locationtech.jts.io.WKTWriter', 'org.locationtech.jts.io.geojson.GeoJsonReader', 'org.locationtech.jts.io.geojson.GeoJsonWriter', - 'org.locationtech.jts.io.gml2.GMLReader', - 'org.locationtech.jts.io.gml2.GMLWriter', - 'org.locationtech.jts.linearref.LengthIndexedLine', - 'org.locationtech.jts.operation.buffer.BufferOp', - 'org.locationtech.jts.operation.buffer.BufferParameters', 'org.locationtech.jts.operation.buffer.OffsetCurve', - 'org.locationtech.jts.operation.distance.DistanceOp', - 'org.locationtech.jts.operation.linemerge.LineMerger', - 'org.locationtech.jts.operation.overlay.snap.GeometrySnapper', - 'org.locationtech.jts.operation.polygonize.Polygonizer', - 'org.locationtech.jts.operation.union.UnaryUnionOp', - 'org.locationtech.jts.precision.GeometryPrecisionReducer', - 'org.locationtech.jts.simplify.DouglasPeuckerSimplifier', - 'org.locationtech.jts.simplify.TopologyPreservingSimplifier', - 'org.locationtech.jts.triangulate.DelaunayTriangulationBuilder', 'org.locationtech.jts.triangulate.polygon.ConstrainedDelaunayTriangulator', - 'org.locationtech.jts.triangulate.quadedge.QuadEdgeSubdivision', 'org.locationtech.jts.triangulate.tri.Tri', - 'org.locationtech.jts.util.GeometricShapeFactory', 'org.locationtech.proj4j.CRSFactory', 'org.locationtech.proj4j.CoordinateReferenceSystem', 'org.locationtech.proj4j.CoordinateTransform', diff --git a/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/EngineResultBatchIterator.java b/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/EngineResultBatchIterator.java deleted file mode 100644 index 1de5bbd5b64c5..0000000000000 --- a/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/EngineResultBatchIterator.java +++ /dev/null @@ -1,18 +0,0 @@ -/* - * SPDX-License-Identifier: Apache-2.0 - * - * The OpenSearch Contributors require contributions made to - * this file be licensed under the Apache-2.0 license or a - * compatible open source license. - */ - -package org.opensearch.analytics.backend; - -import java.util.Iterator; - -/** - * Single-pass iterator over record batches from an {@link EngineResultStream}. - * - * @opensearch.internal - */ -public interface EngineResultBatchIterator extends Iterator {} diff --git a/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/EngineResultStream.java b/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/EngineResultStream.java index 7c189b4079889..df8860911b83a 100644 --- a/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/EngineResultStream.java +++ b/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/EngineResultStream.java @@ -8,6 +8,8 @@ package org.opensearch.analytics.backend; +import java.util.Iterator; + /** * A closeable stream of record batches returned by engine execution. * Callers iterate batches via the returned iterator and MUST close the stream @@ -21,7 +23,7 @@ public interface EngineResultStream extends AutoCloseable { * Returns an iterator over the record batches in this stream. * Each call returns the same iterator instance — the stream is single-pass. */ - EngineResultBatchIterator iterator(); + Iterator iterator(); @Override void close(); diff --git a/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/ExecutionContext.java b/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/ExecutionContext.java index d93e6f7b797d2..c703a3bab0d92 100644 --- a/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/ExecutionContext.java +++ b/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/ExecutionContext.java @@ -21,7 +21,7 @@ public class ExecutionContext { private final String tableName; private final DataFormatAwareEngine.DataFormatAwareReader reader; - SearchShardTask task; + private final SearchShardTask task; /** * Constructs an execution context. diff --git a/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/SearchExecEngine.java b/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/SearchExecEngine.java index b8a1ec4c943a8..2defa07ad5ebf 100644 --- a/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/SearchExecEngine.java +++ b/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/backend/SearchExecEngine.java @@ -18,20 +18,19 @@ * @opensearch.experimental */ @ExperimentalApi -public interface SearchExecEngine extends Closeable { - +public interface SearchExecEngine extends Closeable { /** * Creates an execution context from a resolved plan. * * @param context ExecutionContext */ - void prepare(ExecutionContext context); + void prepare(T context); /** * Executes the context and returns a result stream. * @param context the execution context */ - EngineResultStream execute(ExecutionContext context) throws IOException; + V execute(T context) throws IOException; @Override default void close() throws IOException {} diff --git a/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/spi/AnalyticsSearchBackendPlugin.java b/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/spi/AnalyticsSearchBackendPlugin.java index b48023973c6db..2c0a329464580 100644 --- a/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/spi/AnalyticsSearchBackendPlugin.java +++ b/sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/spi/AnalyticsSearchBackendPlugin.java @@ -8,17 +8,12 @@ package org.opensearch.analytics.spi; -import org.apache.calcite.rel.RelNode; -import org.apache.calcite.rel.logical.LogicalAggregate; -import org.apache.calcite.rel.logical.LogicalFilter; -import org.apache.calcite.rel.logical.LogicalProject; -import org.apache.calcite.rel.logical.LogicalTableScan; +import org.opensearch.analytics.backend.EngineResultStream; import org.opensearch.analytics.backend.ExecutionContext; import org.opensearch.analytics.backend.SearchExecEngine; import org.opensearch.index.engine.dataformat.DataFormat; import java.util.List; -import java.util.Set; /** * SPI extension point for back-end query engines (DataFusion, Lucene, etc.). @@ -33,12 +28,7 @@ public interface AnalyticsSearchBackendPlugin { * Creates a searcher bound to the given reader snapshot. * @param ctx the execution context */ - SearchExecEngine searcher(ExecutionContext ctx); - - /** Returns the set of RelNode operator classes this backend supports. */ - default Set> supportedOperators() { - return Set.of(LogicalTableScan.class, LogicalFilter.class, LogicalAggregate.class, LogicalProject.class); - } + SearchExecEngine searcher(ExecutionContext ctx); /** Returns the data formats supported by this backend. */ List getSupportedFormats(); diff --git a/sandbox/plugins/analytics-backend-datafusion/README.md b/sandbox/plugins/analytics-backend-datafusion/README.md index ae0e9b9b7a37e..80f731378de85 100644 --- a/sandbox/plugins/analytics-backend-datafusion/README.md +++ b/sandbox/plugins/analytics-backend-datafusion/README.md @@ -1,16 +1,52 @@ # analytics-backend-datafusion -DataFusion native execution engine plugin. Implements `AnalyticsBackEndPlugin` to provide a back-end that can execute query plan fragments via JNI. +DataFusion native execution engine plugin for the OpenSearch analytics framework. Implements `SearchBackEndPlugin` (server SPI for shard-level reader management) and `AnalyticsSearchBackendPlugin` (analytics-framework SPI for query execution) to execute query plan fragments via a Rust/DataFusion runtime over JNI. -## What it does +## Architecture -Exposes a `DataFusionBridge` (`EngineBridge`) that converts Calcite `RelNode` fragments into a serialized plan format and executes them through a native Rust/DataFusion library. Currently a stub. - -## How it fits in - -Declares `extendedPlugins = ['analytics-engine']` so the hub discovers it as an `AnalyticsBackEndPlugin`. The hub passes all discovered back-ends to the `QueryPlanExecutorPlugin` during executor creation. The executor will eventually use the bridge and capabilities to route plan fragments to the appropriate engine. +``` +┌─────────────────────────────────────────────────────────────────┐ +│ analytics-engine (hub) │ +│ ExtensiblePlugin — discovers AnalyticsSearchBackendPlugin SPIs │ +│ Routes query plan fragments to back-ends via DefaultPlanExecutor│ +└──────────────┬──────────────────────────────────────────────────┘ + │ SPI (extendedPlugins = ['analytics-engine']) + ▼ +┌─────────────────────────────────────────────────────────────────┐ +│ analytics-backend-datafusion │ +│ │ +│ DataFusionPlugin │ +│ ├── createComponents() → DataFusionService (node-level) │ +│ ├── searcher(ExecutionContext) → DatafusionSearchExecEngine │ +│ └── createReaderManager(format, shardPath) │ +│ → DatafusionReaderManager │ +│ │ +│ Execution flow: │ +│ ExecutionContext │ +│ → DatafusionSearchExecEngine.prepare() │ +│ (RelNode → Substrait bytes → DatafusionQuery) │ +│ → DatafusionSearchExecEngine.execute() │ +│ → DatafusionSearcher.search(DatafusionContext) │ +│ → NativeBridge.executeQuery() [JNI] │ +│ → DatafusionResultStream (Arrow record batches) │ +│ │ +│ Native layer (JNI): │ +│ NativeBridge ──→ rust │ +│ createDatafusionReader / closeDatafusionReader │ +│ createGlobalRuntime / closeGlobalRuntime │ +│ executeQuery / streamNext / streamClose │ +└─────────────────────────────────────────────────────────────────┘ +``` ## Key classes -- **`DataFusionPlugin`** — The `AnalyticsBackEndPlugin` SPI implementation. Reports `name() = "datafusion"`. -- **`DataFusionBridge`** — The `EngineBridge` implementation for native execution. +| Class | Role | +|---|----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| `DataFusionPlugin` | Plugin entry point. Implements `SearchBackEndPlugin` (server SPI — provides `createReaderManager` for shard-level data access) and `AnalyticsSearchBackendPlugin` (analytics-framework SPI — provides `searcher` for query execution). +| `DataFusionService` | Node-level lifecycle service. Loads the native JNI library, creates the Tokio runtime , global runtime environment and memory pool. Shared by all per-shard engines. | +| `DatafusionSearchExecEngine` | Per-query engine. `prepare()` converts the Calcite `RelNode` to a Substrait plan; `execute()` delegates to `DatafusionSearcher` and returns a `DatafusionResultStream`. | +| `DatafusionContext` | Execution context carrying the query plan, `DatafusionSearcher`, optional `IndexFilterTree`, native runtime pointer, result `StreamHandle` etc. Implements `SearchExecutionContext`. | +| `DatafusionSearcher` | Executes the Substrait plan against a native reader via `NativeBridge.executeQuery()`. Owns no resources - reader lifecycle is managed by `DatafusionReaderManager`. | +| `DatafusionReader` | Per-shard point-in-time snapshot of data files. Wraps a `ReaderHandle`. | +| `DatafusionReaderManager` | Manages `DatafusionReader` lifecycle per `CatalogSnapshot`. Handles refresh (swap in new reader) and deletion (close old reader). | +| `NativeRuntimeHandle` | Thread-safe wrapper around the native runtime pointer with liveness checks. | diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionPlugin.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionPlugin.java index 678cd23813052..8e98add4fa1e8 100644 --- a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionPlugin.java +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DataFusionPlugin.java @@ -10,6 +10,7 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.opensearch.analytics.backend.EngineResultStream; import org.opensearch.analytics.backend.ExecutionContext; import org.opensearch.analytics.backend.SearchExecEngine; import org.opensearch.analytics.spi.AnalyticsSearchBackendPlugin; @@ -104,7 +105,7 @@ public String name() { } @Override - public SearchExecEngine searcher(ExecutionContext ctx) { + public SearchExecEngine searcher(ExecutionContext ctx) { // TODO: resolve DataFormat properly instead of passing null DatafusionReader dfReader = (DatafusionReader) ctx.getReader().getReader(null); DatafusionContext context = new DatafusionContext(ctx.getTask(), dfReader, dataFusionService.getNativeRuntime()); diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionResultStream.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionResultStream.java index 069deb7274046..7daad7fc6e29a 100644 --- a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionResultStream.java +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionResultStream.java @@ -9,12 +9,12 @@ package org.opensearch.be.datafusion; import org.opensearch.analytics.backend.EngineResultBatch; -import org.opensearch.analytics.backend.EngineResultBatchIterator; import org.opensearch.analytics.backend.EngineResultStream; import org.opensearch.be.datafusion.jni.NativeBridge; import org.opensearch.be.datafusion.jni.StreamHandle; import org.opensearch.common.annotation.ExperimentalApi; +import java.util.Iterator; import java.util.NoSuchElementException; /** @@ -41,7 +41,7 @@ public DatafusionResultStream(StreamHandle streamHandle) { } @Override - public EngineResultBatchIterator iterator() { + public Iterator iterator() { if (iteratorInstance == null) { iteratorInstance = new BatchIterator(streamHandle); } @@ -57,7 +57,7 @@ public void close() { * Iterator that pulls Arrow record batches from the native stream via JNI. * Each call to {@link #next()} returns a batch wrapping the current Arrow data. */ - static class BatchIterator implements EngineResultBatchIterator { + static class BatchIterator implements Iterator { private final StreamHandle streamHandle; private Boolean hasNext; diff --git a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionSearchExecEngine.java b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionSearchExecEngine.java index f80cf84dddd1b..e100d2f2e0a57 100644 --- a/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionSearchExecEngine.java +++ b/sandbox/plugins/analytics-backend-datafusion/src/main/java/org/opensearch/be/datafusion/DatafusionSearchExecEngine.java @@ -23,7 +23,7 @@ * @opensearch.experimental */ @ExperimentalApi -public class DatafusionSearchExecEngine implements SearchExecEngine { +public class DatafusionSearchExecEngine implements SearchExecEngine { private final DatafusionContext datafusionContext; diff --git a/sandbox/plugins/analytics-backend-lucene/build.gradle b/sandbox/plugins/analytics-backend-lucene/build.gradle index c0413a6c6d41a..42426fb8888e7 100644 --- a/sandbox/plugins/analytics-backend-lucene/build.gradle +++ b/sandbox/plugins/analytics-backend-lucene/build.gradle @@ -25,3 +25,6 @@ dependencies { test { systemProperty 'tests.security.manager', 'false' } + +// TODO: Remove once back-end is built out with test suite +testingConventions.enabled = false diff --git a/sandbox/plugins/analytics-backend-lucene/src/main/resources/META-INF/services/org.opensearch.analytics.spi.AnalyticsSearchBackendPlugin b/sandbox/plugins/analytics-backend-lucene/src/main/resources/META-INF/services/org.opensearch.analytics.spi.AnalyticsSearchBackendPlugin deleted file mode 100644 index 53330f0ac02ef..0000000000000 --- a/sandbox/plugins/analytics-backend-lucene/src/main/resources/META-INF/services/org.opensearch.analytics.spi.AnalyticsSearchBackendPlugin +++ /dev/null @@ -1 +0,0 @@ -org.opensearch.be.lucene.LuceneSearchEnginePlugin diff --git a/sandbox/plugins/analytics-engine/src/main/java/org/opensearch/analytics/exec/DefaultPlanExecutor.java b/sandbox/plugins/analytics-engine/src/main/java/org/opensearch/analytics/exec/DefaultPlanExecutor.java index 825bac9ff3b98..e45636150896a 100644 --- a/sandbox/plugins/analytics-engine/src/main/java/org/opensearch/analytics/exec/DefaultPlanExecutor.java +++ b/sandbox/plugins/analytics-engine/src/main/java/org/opensearch/analytics/exec/DefaultPlanExecutor.java @@ -14,7 +14,6 @@ import org.apache.logging.log4j.Logger; import org.opensearch.action.search.SearchShardTask; import org.opensearch.analytics.backend.EngineResultBatch; -import org.opensearch.analytics.backend.EngineResultBatchIterator; import org.opensearch.analytics.backend.EngineResultStream; import org.opensearch.analytics.backend.ExecutionContext; import org.opensearch.analytics.backend.SearchExecEngine; @@ -26,6 +25,7 @@ import org.opensearch.indices.IndicesService; import java.util.ArrayList; +import java.util.Iterator; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; @@ -65,7 +65,10 @@ public DefaultPlanExecutor(List plugins, IndicesSe @Override public Iterable execute(RelNode logicalFragment, Object context) { String tableName = extractTableName(logicalFragment); - String backendName = selectBackEnd().name(); + AnalyticsSearchBackendPlugin plugin = selectBackEnd(); + if (plugin == null) { + return new ArrayList<>(); + } IndexShard shard = resolveShard(tableName); DataFormatAwareEngine dataFormatAwareEngine = shard.getCompositeEngine(); @@ -73,15 +76,14 @@ public Iterable execute(RelNode logicalFragment, Object context) { throw new IllegalStateException("No CompositeEngine on shard [" + shard.shardId() + "]"); } - AnalyticsSearchBackendPlugin plugin = backEnds.get(backendName); SearchShardTask task = null; // TODO : init task List rows = new ArrayList<>(); try (DataFormatAwareEngine.DataFormatAwareReader dataFormatAwareReader = dataFormatAwareEngine.acquireReader()) { ExecutionContext ctx = new ExecutionContext(tableName, task, dataFormatAwareReader); - try (SearchExecEngine engine = plugin.searcher(ctx)) { + try (SearchExecEngine engine = plugin.searcher(ctx)) { logger.info("[DefaultPlanExecutor] Executing via [{}]", plugin.name()); try (EngineResultStream resultStream = engine.execute(ctx)) { - EngineResultBatchIterator batchIterator = resultStream.iterator(); + Iterator batchIterator = resultStream.iterator(); while (batchIterator.hasNext()) { EngineResultBatch batch = batchIterator.next(); List fieldNames = batch.getFieldNames(); @@ -122,7 +124,11 @@ private IndexShard resolveShard(String indexName) { } private AnalyticsSearchBackendPlugin selectBackEnd() { - if (backEnds.isEmpty()) throw new IllegalStateException("No back-end plugins registered"); + if (backEnds.isEmpty()) { + logger.warn("No back-end plugins registered — queries will return empty results"); + return null; + } + // TODO : This is placeholder - select based on data format return backEnds.values().iterator().next(); } } diff --git a/sandbox/plugins/analytics-engine/src/test/java/org/opensearch/analytics/exec/DefaultPlanExecutorTests.java b/sandbox/plugins/analytics-engine/src/test/java/org/opensearch/analytics/exec/DefaultPlanExecutorTests.java index e8ce3f23b6ca1..0b34e3ea6495b 100644 --- a/sandbox/plugins/analytics-engine/src/test/java/org/opensearch/analytics/exec/DefaultPlanExecutorTests.java +++ b/sandbox/plugins/analytics-engine/src/test/java/org/opensearch/analytics/exec/DefaultPlanExecutorTests.java @@ -22,7 +22,6 @@ import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.sql.type.SqlTypeName; import org.opensearch.analytics.backend.EngineResultBatch; -import org.opensearch.analytics.backend.EngineResultBatchIterator; import org.opensearch.analytics.backend.EngineResultStream; import org.opensearch.analytics.backend.ExecutionContext; import org.opensearch.analytics.backend.SearchExecEngine; @@ -49,6 +48,7 @@ import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; +import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; @@ -302,7 +302,7 @@ public Object getReader(DataFormat dataFormat) { protected void closeInternal() {} } - static class MockSearchExecEngine implements SearchExecEngine { + static class MockSearchExecEngine implements SearchExecEngine { private final long totalRows; MockSearchExecEngine(long totalRows) { @@ -329,7 +329,7 @@ static class MockResultStream implements EngineResultStream { } @Override - public EngineResultBatchIterator iterator() { + public Iterator iterator() { return new MockBatchIterator(rowCount); } @@ -337,7 +337,7 @@ public EngineResultBatchIterator iterator() { public void close() {} } - static class MockBatchIterator implements EngineResultBatchIterator { + static class MockBatchIterator implements Iterator { private final long rowCount; private boolean consumed; @@ -393,7 +393,7 @@ public String name() { } @Override - public SearchExecEngine searcher(ExecutionContext ctx) { + public SearchExecEngine searcher(ExecutionContext ctx) { Object reader = ctx.getReader().getReader(format); long rows = reader instanceof Long ? (Long) reader : 0L; return new MockSearchExecEngine(rows);