From 8ed09915c512dce1b7db56e9e9bff91e4d76d3b2 Mon Sep 17 00:00:00 2001 From: LiNk-NY Date: Mon, 3 Nov 2025 15:54:01 -0500 Subject: [PATCH 1/4] support local and remote compressed *json files --- R/j_data_type.R | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/R/j_data_type.R b/R/j_data_type.R index e0d5832..9a9b7cd 100644 --- a/R/j_data_type.R +++ b/R/j_data_type.R @@ -91,8 +91,20 @@ j_data_type <- ) if (length(data) == 1L) { - ## url or file path or json - if (length(grep("^https?://", data))) { + ## compressed and uncompressed url or file path + if (file_ext(data) %in% c("gz", "bz2", "xz")) { + format <- + if (grepl("\\.ndjson\\.(gz|bz2|xz)$", data, TRUE)) + "ndjson" + else if (grepl("\\.json\\.(gz|bz2|xz)$", data, TRUE)) + "json" + c( + format, + if (grepl("^https?://", data, TRUE)) "url" else "file" + ) + } else if (identical(file_ext(data), "zip")) { + stop("`j_data_type()` of type `.zip` is not supported") + } else if (grepl("^https?://", data, TRUE)) { c(j_data_type(readLines(data, 2L, warn = FALSE)), "url") } else if (file.exists(data)) { c(j_data_type(readLines(data, 2L, warn = FALSE)), "file") From 14de248fefc51e28e2efb2e4025b2bd65b39d8fe Mon Sep 17 00:00:00 2001 From: LiNk-NY Date: Mon, 3 Nov 2025 16:04:47 -0500 Subject: [PATCH 2/4] re-use checks for url and file --- R/j_data_type.R | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/R/j_data_type.R b/R/j_data_type.R index 9a9b7cd..19ad312 100644 --- a/R/j_data_type.R +++ b/R/j_data_type.R @@ -91,6 +91,8 @@ j_data_type <- ) if (length(data) == 1L) { + is_url <- grepl("^https?://", data, TRUE) + is_file <- file.exists(data) ## compressed and uncompressed url or file path if (file_ext(data) %in% c("gz", "bz2", "xz")) { format <- @@ -100,13 +102,13 @@ j_data_type <- "json" c( format, - if (grepl("^https?://", data, TRUE)) "url" else "file" + if (is_url) "url" else if (is_file) "file" ) } else if (identical(file_ext(data), "zip")) { stop("`j_data_type()` of type `.zip` is not supported") - } else if (grepl("^https?://", data, TRUE)) { + } else if (is_url) { c(j_data_type(readLines(data, 2L, warn = FALSE)), "url") - } else if (file.exists(data)) { + } else if (is_file) { c(j_data_type(readLines(data, 2L, warn = FALSE)), "file") } else if (.is_scalar_character(data) && !inherits(data, "AsIs")) { "json" From 71e943b10a1736cc3e5615c1cbc777f86e7592eb Mon Sep 17 00:00:00 2001 From: LiNk-NY Date: Mon, 3 Nov 2025 16:05:50 -0500 Subject: [PATCH 3/4] update documentation --- R/j_data_type.R | 11 +++++++---- man/j_data_type.Rd | 11 +++++++---- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/R/j_data_type.R b/R/j_data_type.R index 19ad312..a105c30 100644 --- a/R/j_data_type.R +++ b/R/j_data_type.R @@ -49,10 +49,13 @@ #' #' - For a scalar (length 1) character `data`, either `"url"` #' (matching regular expression `"^https?://"`, `"file"` -#' (`file.exists(data)` returns `TRUE`), or `"json"`. When `"file"` -#' or `"url"` is inferred, the return value is a length 2 vector, -#' with the first element the inferred type of data (`"json"` or -#' `"ndjson"`) obtained from the first 2 lines of the file. +#' (`file.exists(data)` returns `TRUE`), or `"json"`. Compression +#' type is accounted for and supported compression extensions are +#' `".gz"`, `".bz2"`, and `".xz"`. `".zip"` is not supported and +#' will return an error. When `"file"` or `"url"` is inferred, +#' the return value is a length 2 vector, with the first element +#' the inferred type of data (`"json"` or `"ndjson"`) obtained +#' from the first 2 lines of the file. #' - For character data with `length(data) > 1`, `"ndjson"` if all #' elements start a square bracket or curly brace, consistently #' (i.e., agreeing with the start of the first record), otherwise diff --git a/man/j_data_type.Rd b/man/j_data_type.Rd index 6dd3b15..087ddaf 100644 --- a/man/j_data_type.Rd +++ b/man/j_data_type.Rd @@ -33,10 +33,13 @@ validate) the type of \code{data} based on the following rules: \itemize{ \item For a scalar (length 1) character \code{data}, either \code{"url"} (matching regular expression \code{"^https?://"}, \code{"file"} -(\code{file.exists(data)} returns \code{TRUE}), or \code{"json"}. When \code{"file"} -or \code{"url"} is inferred, the return value is a length 2 vector, -with the first element the inferred type of data (\code{"json"} or -\code{"ndjson"}) obtained from the first 2 lines of the file. +(\code{file.exists(data)} returns \code{TRUE}), or \code{"json"}. Compression +type is accounted for and supported compression extensions are +\code{".gz"}, \code{".bz2"}, and \code{".xz"}. \code{".zip"} is not supported and +will return an error. When \code{"file"} or \code{"url"} is inferred, +the return value is a length 2 vector, with the first element +the inferred type of data (\code{"json"} or \code{"ndjson"}) obtained +from the first 2 lines of the file. \item For character data with \code{length(data) > 1}, \code{"ndjson"} if all elements start a square bracket or curly brace, consistently (i.e., agreeing with the start of the first record), otherwise From 184e2b8419c6f317225acab39ab58f9beba78da9 Mon Sep 17 00:00:00 2001 From: LiNk-NY Date: Mon, 3 Nov 2025 16:06:04 -0500 Subject: [PATCH 4/4] import file_ext() from tools --- NAMESPACE | 1 + R/j_data_type.R | 2 ++ 2 files changed, 3 insertions(+) diff --git a/NAMESPACE b/NAMESPACE index 36d9b92..b6fb459 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -25,6 +25,7 @@ export(jsonpointer) export(version) importFrom(cli,cli_progress_bar) importFrom(cli,cli_progress_done) +importFrom(tools,file_ext) importFrom(utils,head) importFrom(utils,tail) useDynLib(rjsoncons, .registration = TRUE) diff --git a/R/j_data_type.R b/R/j_data_type.R index a105c30..f5f54df 100644 --- a/R/j_data_type.R +++ b/R/j_data_type.R @@ -40,6 +40,8 @@ #' #' @inheritParams j_query #' +#' @importFrom tools file_ext +#' #' @details #' #' `j_data_type()` without any arguments reports possible return