diff --git a/org.knime.python3.arrow/src/main/java/org/knime/python3/arrow/PythonArrowDataSource.java b/org.knime.python3.arrow/src/main/java/org/knime/python3/arrow/PythonArrowDataSource.java index 8dcdf3e49..24a8ddfa6 100644 --- a/org.knime.python3.arrow/src/main/java/org/knime/python3/arrow/PythonArrowDataSource.java +++ b/org.knime.python3.arrow/src/main/java/org/knime/python3/arrow/PythonArrowDataSource.java @@ -48,8 +48,6 @@ */ package org.knime.python3.arrow; -import java.util.List; - import org.knime.python3.PythonTableDataSource; /** @@ -86,16 +84,6 @@ default String getIdentifier() { */ long getRecordBatchOffset(int index); - /** - * Get the offsets of the dictionary batches relating to the record batch at the given index. This method can lock - * if the dictionary batches are not yet written to the file. When it returns it guarantees that the dictionary - * batches can be read at the returned offsets from the file. - * - * @param index the index of the dictionary batches - * @return the offsets of all dictionary batches for the given index - */ - List getDictionaryBatchOffsets(int index); - /** * @return the total number of batches */ diff --git a/org.knime.python3.arrow/src/main/java/org/knime/python3/arrow/PythonArrowDataUtils.java b/org.knime.python3.arrow/src/main/java/org/knime/python3/arrow/PythonArrowDataUtils.java index 9dc3a8145..20aa4c5e5 100644 --- a/org.knime.python3.arrow/src/main/java/org/knime/python3/arrow/PythonArrowDataUtils.java +++ b/org.knime.python3.arrow/src/main/java/org/knime/python3/arrow/PythonArrowDataUtils.java @@ -52,10 +52,8 @@ import java.nio.file.Files; import java.nio.file.Path; import java.util.ArrayList; -import java.util.Arrays; import java.util.List; import java.util.function.Supplier; -import java.util.stream.Collectors; import org.knime.core.columnar.arrow.ArrowBatchReadStore; import org.knime.core.columnar.arrow.ArrowBatchStore; @@ -484,12 +482,6 @@ private static OffsetProvider getOffsetProvider(final DefaultPythonArrowDataSink public long getRecordBatchOffset(final int index) { return dataSink.getRecordBatchOffsets().get(index); } - - @Override - public long[] getDictionaryBatchOffsets(final int index) { - // TODO support dictionary batches - return new long[0]; - } }; } @@ -549,12 +541,6 @@ public long getRecordBatchOffset(final int index) { return m_offsetProvider.getRecordBatchOffset(index); } - @Override - public List getDictionaryBatchOffsets(final int index) { - return Arrays.stream(m_offsetProvider.getDictionaryBatchOffsets(index)).boxed() - .collect(Collectors.toList()); - } - @Override public String[] getColumnNames() { return m_columnNames; diff --git a/org.knime.python3.arrow/src/main/python/knime/_arrow/_backend.py b/org.knime.python3.arrow/src/main/python/knime/_arrow/_backend.py index ed28d521d..51cea3455 100644 --- a/org.knime.python3.arrow/src/main/python/knime/_arrow/_backend.py +++ b/org.knime.python3.arrow/src/main/python/knime/_arrow/_backend.py @@ -194,7 +194,6 @@ def num_record_batches(self): return self._java_data_source.numBatches() def get_batch(self, index: int) -> pa.RecordBatch: - # TODO(dictionary) handle dictionaries offset = self._java_data_source.getRecordBatchOffset(index) self._source_file.seek(offset) # TODO do we need to map columns somehow (in Java we have the factory versions)