From ed0f5a7c554051c97bf593d40cde187e0a1697f3 Mon Sep 17 00:00:00 2001 From: ntkrgk Date: Tue, 21 Apr 2026 11:21:20 +0900 Subject: [PATCH] =?UTF-8?q?=E6=94=B9=E8=A1=8C=E3=82=84=E7=A9=BA=E7=99=BD?= =?UTF-8?q?=E3=81=A0=E3=81=91=E3=81=AE=E8=A1=8C=E3=81=8C=E3=81=82=E3=82=8B?= =?UTF-8?q?=E6=99=82=E3=81=AB=E3=82=A8=E3=83=A9=E3=83=BC=E3=81=AB=E3=81=AA?= =?UTF-8?q?=E3=82=8B=E5=95=8F=E9=A1=8C=E3=82=92=E4=BF=AE=E6=AD=A3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../parser/jsonl/JsonlParserPlugin.java | 5 + .../parser/jsonl/TestJsonlParserPlugin.java | 123 ++++++++++++++++++ 2 files changed, 128 insertions(+) diff --git a/src/main/java/org/embulk/parser/jsonl/JsonlParserPlugin.java b/src/main/java/org/embulk/parser/jsonl/JsonlParserPlugin.java index 8472bd1..2107aff 100644 --- a/src/main/java/org/embulk/parser/jsonl/JsonlParserPlugin.java +++ b/src/main/java/org/embulk/parser/jsonl/JsonlParserPlugin.java @@ -175,6 +175,11 @@ public void run(TaskSource taskSource, Schema schema, FileInput input, PageOutpu } lineNumber++; + // Skip empty lines + if (line.trim().isEmpty()) { + continue; + } + try { Value value = jsonParser.parse(line); diff --git a/src/test/java/org/embulk/parser/jsonl/TestJsonlParserPlugin.java b/src/test/java/org/embulk/parser/jsonl/TestJsonlParserPlugin.java index 8e8188f..2538b4f 100644 --- a/src/test/java/org/embulk/parser/jsonl/TestJsonlParserPlugin.java +++ b/src/test/java/org/embulk/parser/jsonl/TestJsonlParserPlugin.java @@ -241,6 +241,129 @@ record = records.get(1); } } + @Test + public void testTrailingEmptyLine() throws Exception { + SchemaConfig schema = + schema(column("_c0", BOOLEAN), column("_c1", LONG), column("_c2", STRING)); + ConfigSource config = config().set("columns", schema); + + // Simulates a file with a trailing newline: the last element "" represents the + // empty line + List records = + runParser( + config, + Arrays.asList( + "{\"_c0\":true,\"_c1\":10,\"_c2\":\"first\"}", + "{\"_c0\":false,\"_c1\":20,\"_c2\":\"second\"}", + "")); // Empty line at the end + + assertEquals(2, records.size()); + assertEquals(true, records.get(0)[0]); + assertEquals(10L, records.get(0)[1]); + assertEquals("first", records.get(0)[2]); + } + + @Test + public void testLeadingEmptyLine() throws Exception { + SchemaConfig schema = + schema(column("_c0", BOOLEAN), column("_c1", LONG), column("_c2", STRING)); + ConfigSource config = config().set("columns", schema); + + List records = + runParser( + config, + Arrays.asList( + "", // Empty line at the beginning + "{\"_c0\":true,\"_c1\":10,\"_c2\":\"first\"}", + "{\"_c0\":false,\"_c1\":20,\"_c2\":\"second\"}")); + + assertEquals(2, records.size()); + } + + @Test + public void testMiddleEmptyLine() throws Exception { + SchemaConfig schema = + schema(column("_c0", BOOLEAN), column("_c1", LONG), column("_c2", STRING)); + ConfigSource config = config().set("columns", schema); + + List records = + runParser( + config, + Arrays.asList( + "{\"_c0\":true,\"_c1\":10,\"_c2\":\"first\"}", + "", // Empty line in the middle + "{\"_c0\":false,\"_c1\":20,\"_c2\":\"second\"}")); + + assertEquals(2, records.size()); + } + + @Test + public void testMultipleConsecutiveEmptyLines() throws Exception { + SchemaConfig schema = + schema(column("_c0", BOOLEAN), column("_c1", LONG), column("_c2", STRING)); + ConfigSource config = config().set("columns", schema); + + List records = + runParser( + config, + Arrays.asList( + "{\"_c0\":true,\"_c1\":10,\"_c2\":\"first\"}", + "", // Empty line + "", // Empty line + "", // Empty line + "{\"_c0\":false,\"_c1\":20,\"_c2\":\"second\"}")); + + assertEquals(2, records.size()); + } + + @Test + public void testWhitespaceOnlyLines() throws Exception { + SchemaConfig schema = + schema(column("_c0", BOOLEAN), column("_c1", LONG), column("_c2", STRING)); + ConfigSource config = config().set("columns", schema); + + List records = + runParser( + config, + Arrays.asList( + "{\"_c0\":true,\"_c1\":10,\"_c2\":\"first\"}", + " ", // Spaces only + "\t", // Tab only + " \t ", // Mixed whitespace + "{\"_c0\":false,\"_c1\":20,\"_c2\":\"second\"}")); + + assertEquals(2, records.size()); + } + + @Test + public void testOnlyEmptyLines() throws Exception { + SchemaConfig schema = + schema(column("_c0", BOOLEAN), column("_c1", LONG), column("_c2", STRING)); + ConfigSource config = config().set("columns", schema); + + List records = runParser(config, Arrays.asList("", " ", "\t", "")); + + assertEquals(0, records.size()); + } + + @Test + public void testEmptyLinesWithStopOnInvalidRecord() throws Exception { + SchemaConfig schema = + schema(column("_c0", BOOLEAN), column("_c1", LONG), column("_c2", STRING)); + ConfigSource config = config().set("columns", schema).set("stop_on_invalid_record", true); + + // Empty lines should be skipped even when stop_on_invalid_record is true + List records = + runParser( + config, + Arrays.asList( + "{\"_c0\":true,\"_c1\":10,\"_c2\":\"first\"}", + "", + "{\"_c0\":false,\"_c1\":20,\"_c2\":\"second\"}")); + + assertEquals(2, records.size()); + } + private ConfigSource config() { return CONFIG_MAPPER_FACTORY.newConfigSource(); }