diff --git a/src/test/java/org/apache/datasketches/tuple/strings/AosSketchCrossLanguageTest.java b/src/test/java/org/apache/datasketches/tuple/strings/AosSketchCrossLanguageTest.java index dd425abfa..d4c779d6c 100644 --- a/src/test/java/org/apache/datasketches/tuple/strings/AosSketchCrossLanguageTest.java +++ b/src/test/java/org/apache/datasketches/tuple/strings/AosSketchCrossLanguageTest.java @@ -19,15 +19,25 @@ package org.apache.datasketches.tuple.strings; +import static org.apache.datasketches.common.TestUtil.CHECK_CPP_FILES; import static org.apache.datasketches.common.TestUtil.GENERATE_JAVA_FILES; +import static org.apache.datasketches.common.TestUtil.cppPath; import static org.apache.datasketches.common.TestUtil.javaPath; import static org.testng.Assert.assertEquals; import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; +import java.lang.foreign.MemorySegment; import java.io.IOException; import java.nio.file.Files; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Set; import org.apache.datasketches.common.ResizeFactor; +import org.apache.datasketches.tuple.TupleSketch; +import org.apache.datasketches.tuple.TupleSketchIterator; import org.testng.annotations.Test; /** @@ -109,4 +119,108 @@ public void generateBinariesForCompatibilityTestingEmptyStrings() throws IOExcep Files.newOutputStream(javaPath.resolve("aos_empty_strings_java.sk")).write(sk.compact().toByteArray()); } + + @Test(groups = {CHECK_CPP_FILES}) + public void deserializeFromCppOneString() throws IOException { + final int[] nArr = {0, 1, 10, 100, 1000, 10_000, 100_000, 1_000_000}; + for (int n : nArr) { + final byte[] bytes = Files.readAllBytes(cppPath.resolve("aos_1_n" + n + "_cpp.sk")); + final TupleSketch sketch = ArrayOfStringsTupleSketch.heapifySketch(MemorySegment.ofArray(bytes), new ArrayOfStringsSummaryDeserializer()); + assertTrue(n == 0 ? sketch.isEmpty() : !sketch.isEmpty()); + assertEquals(sketch.getEstimate(), n, n * 0.03); + assertTrue(n > 1000? sketch.isEstimationMode() : !sketch.isEstimationMode()); + + final TupleSketchIterator it = sketch.iterator(); + while (it.next()) { + assertTrue(it.getHash() < sketch.getThetaLong()); + final String[] summary = it.getSummary().getValue(); + assertEquals(summary.length, 1); + } + } + } + + @Test(groups = {CHECK_CPP_FILES}) + public void deserializeFromCppThreeStrings() throws IOException { + final int[] nArr = {0, 1, 10, 100, 1000, 10_000, 100_000, 1_000_000}; + for (int n : nArr) { + final byte[] bytes = Files.readAllBytes(cppPath.resolve("aos_3_n" + n + "_cpp.sk")); + final TupleSketch sketch = ArrayOfStringsTupleSketch.heapifySketch(MemorySegment.ofArray(bytes), new ArrayOfStringsSummaryDeserializer()); + assertTrue(n == 0 ? sketch.isEmpty() : !sketch.isEmpty()); + assertEquals(sketch.getEstimate(), n, n * 0.03); + assertTrue(n > 1000? sketch.isEstimationMode() : !sketch.isEstimationMode()); + + final TupleSketchIterator it = sketch.iterator(); + while (it.next()) { + assertTrue(it.getHash() < sketch.getThetaLong()); + final String[] summary = it.getSummary().getValue(); + assertEquals(summary.length, 3); + } + } + } + + @Test(groups = {CHECK_CPP_FILES}) + public void deserializeFromCppOneStringNonEmptyNoEntries() throws IOException { + final byte[] bytes = Files.readAllBytes(cppPath.resolve("aos_1_non_empty_no_entries_cpp.sk")); + final TupleSketch sketch = ArrayOfStringsTupleSketch.heapifySketch(MemorySegment.ofArray(bytes), new ArrayOfStringsSummaryDeserializer()); + + assertFalse(sketch.isEmpty()); + assertEquals(sketch.getRetainedEntries(), 0); + } + + @Test(groups = {CHECK_CPP_FILES}) + public void deserializeFromCppMultiKeyStrings() throws IOException { + final int[] nArr = {0, 1, 10, 100, 1000, 10_000, 100_000, 1_000_000}; + for (int n : nArr) { + final byte[] bytes = Files.readAllBytes(cppPath.resolve("aos_multikey_n" + n + "_cpp.sk")); + final TupleSketch sketch = ArrayOfStringsTupleSketch.heapifySketch(MemorySegment.ofArray(bytes), new ArrayOfStringsSummaryDeserializer()); + assertTrue(n == 0 ? sketch.isEmpty() : !sketch.isEmpty()); + assertEquals(sketch.getEstimate(), n, n * 0.03); + assertTrue(n > 1000? sketch.isEstimationMode() : !sketch.isEstimationMode()); + + final TupleSketchIterator it = sketch.iterator(); + while (it.next()) { + assertTrue(it.getHash() < sketch.getThetaLong()); + final String[] summary = it.getSummary().getValue(); + assertEquals(summary.length, 1); + } + } + } + + @Test(groups = {CHECK_CPP_FILES}) + public void deserializeFromCppUnicodeStrings() throws IOException { + final byte[] bytes = Files.readAllBytes(cppPath.resolve("aos_unicode_cpp.sk")); + final TupleSketch sketch = ArrayOfStringsTupleSketch.heapifySketch(MemorySegment.ofArray(bytes), new ArrayOfStringsSummaryDeserializer()); + assertFalse(sketch.isEmpty()); + assertFalse(sketch.isEstimationMode()); + assertEquals(sketch.getEstimate(), 3.0); + + final Set> summaries = getSummaries(sketch); + assertTrue(summaries.contains(Arrays.asList("밸류", "값"))); + assertTrue(summaries.contains(Arrays.asList("📦", "🎁"))); + assertTrue(summaries.contains(Arrays.asList("ценить1", "ценить2"))); + } + + @Test(groups = {CHECK_CPP_FILES}) + public void deserializeFromCppEmptyStrings() throws IOException { + final byte[] bytes = Files.readAllBytes(cppPath.resolve("aos_empty_strings_cpp.sk")); + final TupleSketch sketch = ArrayOfStringsTupleSketch.heapifySketch(MemorySegment.ofArray(bytes), new ArrayOfStringsSummaryDeserializer()); + assertFalse(sketch.isEmpty()); + assertFalse(sketch.isEstimationMode()); + assertEquals(sketch.getEstimate(), 3.0); + + final Set> summaries = getSummaries(sketch); + assertTrue(summaries.contains(Arrays.asList("empty_key_value"))); + assertTrue(summaries.contains(Arrays.asList(""))); + assertTrue(summaries.contains(Arrays.asList("", ""))); + } + + private static Set> getSummaries(final TupleSketch sketch) { + final Set> summaries = new HashSet<>(); + final TupleSketchIterator it = sketch.iterator(); + while (it.next()) { + assertTrue(it.getHash() < sketch.getThetaLong()); + summaries.add(Arrays.asList(it.getSummary().getValue())); + } + return summaries; + } }