From b7eb6ac9324eb6333672380a401950491d45662f Mon Sep 17 00:00:00 2001 From: Karolina Bogacka Date: Fri, 16 May 2025 15:43:13 +0200 Subject: [PATCH 1/2] Add function that skips over empty frames --- .../jelly/cli/command/rdf/RdfValidate.scala | 23 ++++++++++++++---- src/test/resources/firstEmptyFrame.jelly | Bin 0 -> 535 bytes .../command/helpers/TestFixtureHelper.scala | 13 +++++++++- .../cli/command/rdf/RdfValidateSpec.scala | 10 +++++++- 4 files changed, 39 insertions(+), 7 deletions(-) create mode 100644 src/test/resources/firstEmptyFrame.jelly diff --git a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfValidate.scala b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfValidate.scala index f46e64e..9b736b2 100644 --- a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfValidate.scala +++ b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfValidate.scala @@ -91,7 +91,7 @@ object RdfValidate extends JellyCommand[RdfValidateOptions]: // Step 1: Validate delimiting validateDelimiting(delimiting, delimited) // Step 2: Validate basic stream structure & the stream options - val framesSeq = frameIterator.toSeq + val framesSeq = skipEmptyFrames(frameIterator.toSeq) validateOptions(framesSeq) // Step 3: Validate the content validateContent(framesSeq, frameIndices, rdfComparison) @@ -109,10 +109,6 @@ object RdfValidate extends JellyCommand[RdfValidateOptions]: throw CriticalException("Expected undelimited input, but the file was delimited") private def validateOptions(frames: Seq[RdfStreamFrame]): Unit = - // Validate basic stream structure - if frames.isEmpty then throw CriticalException("Empty input stream") - if frames.head.rows.isEmpty then - throw CriticalException("First frame in the input stream is empty") if !frames.head.rows.head.row.isOptions then throw CriticalException("First row in the input stream does not contain stream options") val streamOptions = frames.head.rows.head.row.options @@ -192,6 +188,23 @@ object RdfValidate extends JellyCommand[RdfValidateOptions]: comparator.compare(rdfComparison, actual) } + /** Skip empty frames in the stream. If the first frame is empty, we skip it and continue with the + * next one. If the first row is empty, we throw an exception + * @param frames + * frames to check + * @return + * frames after empty frames + */ + private def skipEmptyFrames( + frames: Seq[RdfStreamFrame], + ): Seq[RdfStreamFrame] = + if frames.isEmpty then throw CriticalException("Empty input stream") + if frames.head.rows.isEmpty then + // We want to accept empty frames in the stream, but not empty streams + if frames.tail.isEmpty then throw CriticalException("All frames are empty") + return skipEmptyFrames(frames.tail) + return frames + /** Reads the RDF file for comparison and returns a StreamRdfCollector * @param fileName * filename to read diff --git a/src/test/resources/firstEmptyFrame.jelly b/src/test/resources/firstEmptyFrame.jelly new file mode 100644 index 0000000000000000000000000000000000000000..af40929efc507acf1376081de2809b6f1213c69f GIT binary patch literal 535 zcma)(%TB^T6oxys)aHc5It#D;5 zj7c}tuse${|LquG1iAn&d2tq&AbSFXq))HcN{MI2aL;#Qi^ED5OyYa>A#-FhXk~<~ z1WI*aqMd1ULU5=JhG!(k35p?H(hD%%GtWrRp)=zdZof~78v#&9+nBETk5LnA1W(BW zy+1NRQ!iZB%QvpLItYZv)T4KQcwN`SI{FUT5=sjXQp`DHRqdfl^nuuyqRu zEY0{m^4VUav5Y{w&^{jrL6F7c756cVrhXKx;(QXuQ=Y~8nCLZ4w*$Ayz_E`0u~8gB Wk6!64L{DR-U9jg$Zpwn1SpEP~ke4q2 literal 0 HcmV?d00001 diff --git a/src/test/scala/eu/neverblink/jelly/cli/command/helpers/TestFixtureHelper.scala b/src/test/scala/eu/neverblink/jelly/cli/command/helpers/TestFixtureHelper.scala index dea7633..47158d6 100644 --- a/src/test/scala/eu/neverblink/jelly/cli/command/helpers/TestFixtureHelper.scala +++ b/src/test/scala/eu/neverblink/jelly/cli/command/helpers/TestFixtureHelper.scala @@ -10,7 +10,7 @@ import org.scalatest.BeforeAndAfterAll import org.scalatest.wordspec.AnyWordSpec import java.io.FileOutputStream -import java.nio.file.{Files, Path} +import java.nio.file.{Files, Path, Paths} import java.util.UUID.randomUUID import scala.util.Using @@ -24,6 +24,7 @@ trait TestFixtureHelper extends BeforeAndAfterAll: CliRiot.initialize() } + private val specificTestDir: Path = Paths.get("src", "test", "resources") private val tmpDir: Path = Files.createTempDirectory("jelly-cli") /** The number of triples to generate for the tests @@ -85,6 +86,16 @@ trait TestFixtureHelper extends BeforeAndAfterAll: testCode(tempFile.toString) } finally { tempFile.toFile.delete() } + def withSpecificJellyFile( + testCode: (String) => Any, + fileName: String, + ): Unit = { + val filePath = specificTestDir.resolve(fileName) + if !Files.exists(filePath) then + throw new IllegalArgumentException(s"File $fileName does not exist in $specificTestDir") + else testCode(filePath.toString) + } + def withFullJellyFile(testCode: (String) => Any, frameSize: Int = 256): Unit = val extension = getFileExtension(JellyLanguage.JELLY) val tempFile = Files.createTempFile(tmpDir, randomUUID.toString, f".${extension}") diff --git a/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfValidateSpec.scala b/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfValidateSpec.scala index da10075..3633890 100644 --- a/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfValidateSpec.scala +++ b/src/test/scala/eu/neverblink/jelly/cli/command/rdf/RdfValidateSpec.scala @@ -23,9 +23,17 @@ class RdfValidateSpec extends AnyWordSpec, Matchers, TestFixtureHelper: RdfValidate.runTestCommand(List("rdf", "validate")) } e.cause.get shouldBe a[CriticalException] - e.cause.get.getMessage should include("First frame in the input stream is empty") + e.cause.get.getMessage should include("All frames are empty") } + "accept empty frame before stream options" in withSpecificJellyFile( + testCode = { jellyF => + val (out, err) = RdfValidate.runTestCommand(List("rdf", "validate", jellyF)) + out shouldBe empty + }, + fileName = "firstEmptyFrame.jelly", + ) + "validate delimiting" when { val frame = RdfStreamFrame( Seq( From 556a6328237aeace0e2a08830fe4d5d17b8cb9aa Mon Sep 17 00:00:00 2001 From: Karolina Bogacka Date: Fri, 16 May 2025 16:32:33 +0200 Subject: [PATCH 2/2] Adress review comments --- .../jelly/cli/command/rdf/RdfValidate.scala | 8 +++++--- src/test/resources/threeFirstEmptyFrames.jelly | Bin 0 -> 530 bytes .../jelly/cli/command/rdf/RdfValidateSpec.scala | 8 ++++++++ 3 files changed, 13 insertions(+), 3 deletions(-) create mode 100644 src/test/resources/threeFirstEmptyFrames.jelly diff --git a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfValidate.scala b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfValidate.scala index 9b736b2..cabfdc7 100644 --- a/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfValidate.scala +++ b/src/main/scala/eu/neverblink/jelly/cli/command/rdf/RdfValidate.scala @@ -14,6 +14,7 @@ import org.apache.jena.riot.RDFParser import org.apache.jena.riot.system.StreamRDFLib import org.apache.jena.sparql.core.Quad +import scala.annotation.tailrec import scala.util.Using object RdfValidatePrint extends RdfCommandPrintUtil[RdfFormat.Jena]: @@ -195,15 +196,16 @@ object RdfValidate extends JellyCommand[RdfValidateOptions]: * @return * frames after empty frames */ + @tailrec private def skipEmptyFrames( frames: Seq[RdfStreamFrame], ): Seq[RdfStreamFrame] = if frames.isEmpty then throw CriticalException("Empty input stream") - if frames.head.rows.isEmpty then + else if frames.head.rows.isEmpty then // We want to accept empty frames in the stream, but not empty streams if frames.tail.isEmpty then throw CriticalException("All frames are empty") - return skipEmptyFrames(frames.tail) - return frames + skipEmptyFrames(frames.tail) + else frames /** Reads the RDF file for comparison and returns a StreamRdfCollector * @param fileName diff --git a/src/test/resources/threeFirstEmptyFrames.jelly b/src/test/resources/threeFirstEmptyFrames.jelly new file mode 100644 index 0000000000000000000000000000000000000000..0baf43666a681bfe83d79b4764246e02c7a5b8b4 GIT binary patch literal 530 zcma)(K~KUk7>2hw=JJKa*@NL^2zt<|blo5@e}P%>#D(lay7@6kU^Jy>@GtzyHpZkE zX1w+A^nLq2{c@oT5ZG`2!WH(>hC$XRw`--uE2VhkJF&&#URG4%VgD&hq>{BVLhc1h zb)ur3F*z~#!~-j|GV33SFa`m;g$d{m(&uBwSdorbJi;QGMhRP` + val (out, err) = RdfValidate.runTestCommand(List("rdf", "validate", jellyF)) + out shouldBe empty + }, + fileName = "threeFirstEmptyFrames.jelly", + ) + "validate delimiting" when { val frame = RdfStreamFrame( Seq(