knime · tpietzsch · Feb 24, 2026
@@ -48,8 +48,6 @@
  */
 package org.knime.python3.arrow;
 
-import java.util.List;
-
 import org.knime.python3.PythonTableDataSource;
 
 /**
@@ -86,16 +84,6 @@ default String getIdentifier() {
      */
     long getRecordBatchOffset(int index);
 
-    /**
-     * Get the offsets of the dictionary batches relating to the record batch at the given index. This method can lock
-     * if the dictionary batches are not yet written to the file. When it returns it guarantees that the dictionary
-     * batches can be read at the returned offsets from the file.
-     *
-     * @param index the index of the dictionary batches
-     * @return the offsets of all dictionary batches for the given index
-     */
-    List<Long> getDictionaryBatchOffsets(int index);
-
     /**
      * @return the total number of batches
      */

@@ -52,10 +52,8 @@
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.ArrayList;
-import java.util.Arrays;
 import java.util.List;
 import java.util.function.Supplier;
-import java.util.stream.Collectors;
 
 import org.knime.core.columnar.arrow.ArrowBatchReadStore;
 import org.knime.core.columnar.arrow.ArrowBatchStore;
@@ -484,12 +482,6 @@ private static OffsetProvider getOffsetProvider(final DefaultPythonArrowDataSink
             public long getRecordBatchOffset(final int index) {
                 return dataSink.getRecordBatchOffsets().get(index);
             }
-
-            @Override
-            public long[] getDictionaryBatchOffsets(final int index) {
-                // TODO support dictionary batches
-                return new long[0];
-            }
         };
     }
 
@@ -549,12 +541,6 @@ public long getRecordBatchOffset(final int index) {
             return m_offsetProvider.getRecordBatchOffset(index);
         }
 
-        @Override
-        public List<Long> getDictionaryBatchOffsets(final int index) {
-            return Arrays.stream(m_offsetProvider.getDictionaryBatchOffsets(index)).boxed()
-                .collect(Collectors.toList());
-        }
-
         @Override
         public String[] getColumnNames() {
             return m_columnNames;

@@ -194,7 +194,6 @@ def num_record_batches(self):
         return self._java_data_source.numBatches()
 
     def get_batch(self, index: int) -> pa.RecordBatch:
-        # TODO(dictionary) handle dictionaries
         offset = self._java_data_source.getRecordBatchOffset(index)
         self._source_file.seek(offset)
         # TODO do we need to map columns somehow (in Java we have the factory versions)