yugokato · yugokato · May 14, 2026 · May 14, 2026
diff --git a/README.md b/README.md
@@ -488,6 +488,9 @@ By default, the plugin reads and parses file content when loading as follows:
 - `.jsonl` — Each line is parsed as a JSON object
 - All other file types — Loads as raw text or binary content
 
+Compressed files with a `.gz`, `.bz2`, or `.xz` extension are decompressed transparently. The inner file type 
+determines which reader and read mode are used (e.g. `data.json.gz` uses `json.load`, `data.txt.bz2` loads as plain text).
+
 ### Customizing defaults
 
 You can customize this behavior by specifying a file reader that accepts a file-like object returned by `open()`. 

diff --git a/src/pytest_data_loader/loaders/impl.py b/src/pytest_data_loader/loaders/impl.py
@@ -19,7 +19,10 @@
 from pytest_data_loader.paths import (
     check_and_track_dir,
     check_circular_symlink,
+    compression_aware_open,
+    get_effective_suffix,
     get_matching_paths,
+    is_compressed_path,
     resolve_relative_path,
     split_glob_path,
 )
@@ -191,20 +194,24 @@ def __init__(self, *args: Any, gidx: int | None = None, **kwargs: Any):
         self.file_reader = self.load_attrs.reader
         self.read_options = self.load_attrs.read_options
         if not self.file_reader:
-            if registered_reader := FileReader.get_registered_reader(self.load_attrs.search_from, self.path.suffix):
+            if registered_reader := FileReader.get_registered_reader(
+                self.load_attrs.search_from, get_effective_suffix(self.path)
+            ):
                 self.file_reader = registered_reader.reader
                 if not self.read_options:
                     self.read_options = registered_reader.read_options
         assert isinstance(self.read_options, HashableDict)
         self._effective_read_mode: str | None = None
-        self._is_streamable = self.file_reader is not None or all(
-            # non-structured text data can be read line by line
-            [
-                self.path.suffix in FileLoader.STREAMABLE_FILE_TYPES,
-                self.read_mode != "rb",
-                self.load_attrs.onload_func is None,
-                self.load_attrs.parametrizer_func is None,
-            ]
+        self._is_streamable = not is_compressed_path(self.path) and (
+            self.file_reader is not None
+            or all(
+                [
+                    get_effective_suffix(self.path) in FileLoader.STREAMABLE_FILE_TYPES,
+                    self.read_mode != "rb",
+                    self.load_attrs.onload_func is None,
+                    self.load_attrs.parametrizer_func is None,
+                ]
+            )
         )
 
         # Caches used by data loaders.
@@ -462,7 +469,7 @@ def _get_file_obj(self) -> IO[Any]:
         """Get file object from cache or open a new one and cache it"""
         f = self._cached_file_objects.get((self.path, self.read_options))
         if not f or f.closed:
-            f = open(self.path, **self.read_options)
+            f = compression_aware_open(self.path, **self.read_options)
             self._cached_file_objects[(self.path, self.read_options)] = f
         f.seek(0)
         return f
@@ -506,7 +513,7 @@ def inspect_part_data(pos: int, part: Any) -> None:
                 else:
                     commit(pos, part)
 
-        with open(self.path, **self.read_options) as f:
+        with compression_aware_open(self.path, **self.read_options) as f:
             if self.file_reader:
                 # NOTE: Do NOT use _read_reader_and_split here to get the split data. Closing the file will invalidate
                 #       the cached part data generated by the file reader and cause issues when loading part data later.
@@ -534,7 +541,7 @@ def _read_file(self) -> str | bytes:
         if self.read_mode == "auto":
             # Detect read mode based on sampled data
             is_binary = False
-            with open(self.path, "rb") as f:
+            with compression_aware_open(self.path, mode="rb") as f:
                 chunk = f.read(4096)
 
             if chunk:
@@ -553,7 +560,7 @@ def _read_file(self) -> str | bytes:
         if self.read_mode == "r" and "encoding" not in read_options:
             read_options["encoding"] = "utf-8"
 
-        with open(self.path, **read_options) as f:
+        with compression_aware_open(self.path, **read_options) as f:
             return f.read()
 
     @requires_loader(DataLoaderType.PARAMETRIZE)

diff --git a/src/pytest_data_loader/paths.py b/src/pytest_data_loader/paths.py
@@ -1,15 +1,26 @@
 from __future__ import annotations
 
+import bz2
 import errno
 import glob
+import gzip
+import lzma
 import os
 import re
+from collections.abc import Callable
 from functools import lru_cache
 from pathlib import Path
-from typing import Literal
+from typing import IO, Any, Literal
 
 from pytest_data_loader.exceptions import DataNotFound
 
+_COMPRESSION_OPENERS: dict[str, Callable[..., IO[Any]]] = {
+    ".gz": gzip.open,
+    ".bz2": bz2.open,
+    ".xz": lzma.open,
+}
+SUPPORTED_COMPRESSION_EXTENSIONS: tuple[str, ...] = tuple(_COMPRESSION_OPENERS)
+
 
 @lru_cache
 def resolve_relative_path(
@@ -188,3 +199,46 @@ def split_glob_path(path: Path) -> tuple[Path, str]:
     base = Path(*parts[:split])
     pattern = str(Path(*parts[split:]))
     return base, pattern
+
+
+def is_compressed_path(path: Path) -> bool:
+    """Return whether the given path is a supported compressed file (.gz/.bz2/.xz).
+
+    :param path: File path to inspect
+    """
+    return path.suffix.lower() in SUPPORTED_COMPRESSION_EXTENSIONS
+
+
+def get_effective_suffix(path: Path) -> str:
+    """Return the format-bearing suffix of path, skipping a trailing compression suffix when present.
+
+    :param path: File path to inspect
+
+    Examples:
+        Path("data.json.gz") -> ".json"
+        Path("data.csv.bz2") -> ".csv"
+        Path("data.json")    -> ".json"
+        Path("data.gz")      -> ".gz"   (no inner suffix to expose)
+    """
+    suffixes = path.suffixes
+    if len(suffixes) >= 2 and is_compressed_path(path):
+        return suffixes[-2]
+    return path.suffix
+
+
+def compression_aware_open(path: Path, **open_kwargs: Any) -> IO[Any]:
+    """Open a file, routing through gzip.open()/bz2.open()/lzma.open() when the suffix matches.
+
+    For compression openers "r" means binary (unlike builtin open() where "r" means text). This function normalizes
+    the mode so that "r" and "rt" both produce a text-mode stream, matching the semantics of builtin open().
+
+    :param path: File path to open
+    :param open_kwargs: Keyword arguments forwarded to the opener (mode, encoding, errors, newline)
+    """
+    opener = _COMPRESSION_OPENERS.get(path.suffix.lower())
+    if opener is None:
+        return open(path, **open_kwargs)
+    mode = open_kwargs.get("mode") or "r"
+    # Compression openers treat "r" as binary. Map to "rt" so callers get text mode, matching builtin open.
+    open_kwargs["mode"] = "rt" if mode in ("r", "rt") else mode
+    return opener(path, **open_kwargs)
diff --git a/tests/.gitattributes b/tests/.gitattributes
@@ -0,0 +1,2 @@
+# For tests in Windows - Force LF for test data text files so their bytes match compressed counterparts (e.g. text.txt vs text.txt.gz)
+**/data/**  text=auto eol=lf
diff --git a/tests/data/files/compressed/comma.csv.xz b/tests/data/files/compressed/comma.csv.xz
diff --git a/tests/data/files/compressed/data.jsonl.bz2 b/tests/data/files/compressed/data.jsonl.bz2
diff --git a/tests/data/files/compressed/image.jpg.gz b/tests/data/files/compressed/image.jpg.gz
diff --git a/tests/data/files/compressed/object.json.gz b/tests/data/files/compressed/object.json.gz
diff --git a/tests/data/files/compressed/text.txt.gz b/tests/data/files/compressed/text.txt.gz
diff --git a/tests/data/files/compressed/yaml.yml.gz b/tests/data/files/compressed/yaml.yml.gz
diff --git a/tests/data/files/jsonl/data.jsonl b/tests/data/files/jsonl/data.jsonl
@@ -1,3 +1,3 @@
 {"name": "Alice", "age": 30}
 {"name": "Bob", "age": 25}
-{"name": "Charlie", "age": 35}
+{"name": "Charlie", "age": 35}
diff --git a/tests/data/files/jsonl/data2.jsonl b/tests/data/files/jsonl/data2.jsonl
@@ -1,2 +1,2 @@
 {"city": "Tokyo", "country": "Japan"}
-{"city": "Paris", "country": "France"}
+{"city": "Paris", "country": "France"}
diff --git a/tests/data/files/yaml/yaml.yml b/tests/data/files/yaml/yaml.yml
@@ -27,4 +27,4 @@ services:
     depends_on:
       - db
 volumes:
-  pgdata:
+  pgdata:
diff --git a/tests/data/files/yaml/yaml_documents.yml b/tests/data/files/yaml/yaml_documents.yml
@@ -30,4 +30,4 @@ spec:
         - configMapRef:
             name: app-config
         - secretRef:
-            name: app-secret
+            name: app-secret
diff --git a/tests/paths.py b/tests/paths.py
@@ -35,6 +35,15 @@
 PATH_JPEG_FILE = Path(IMAGE_DIR, "image.jpg")
 PATH_HIDDEN_FILE = Path(SOME_DIR, ".hidden_file")
 PATH_HIDDEN_DIR = Path(SOME_DIR, ".hidden_dir")
+PATH_COMPRESSED_FILE_DIR = Path(FILES_DIR, "compressed")
+PATH_JSON_FILE_GZ = PATH_COMPRESSED_FILE_DIR / f"{PATH_JSON_FILE_OBJECT.name}.gz"
+PATH_JSONL_FILE_BZ2 = PATH_COMPRESSED_FILE_DIR / f"{PATH_JSONL_FILE.name}.bz2"
+PATH_CSV_FILE_XZ = PATH_COMPRESSED_FILE_DIR / f"{PATH_CSV_FILE.name}.xz"
+PATH_TEXT_FILE_GZ = PATH_COMPRESSED_FILE_DIR / f"{PATH_TEXT_FILE.name}.gz"
+PATH_JPEG_FILE_GZ = PATH_COMPRESSED_FILE_DIR / f"{PATH_JPEG_FILE.name}.gz"
+PATH_YAML_FILE_GZ = PATH_COMPRESSED_FILE_DIR / f"{PATH_YAML_FILE.name}.gz"
+PATHS_COMPRESSED_TEXT_FILES = [PATH_JSON_FILE_GZ, PATH_JSONL_FILE_BZ2, PATH_CSV_FILE_XZ, PATH_TEXT_FILE_GZ]
+PATHS_COMPRESSED_BINARY_FILES = [PATH_JPEG_FILE_GZ]
 PATHS_TEXT_FILES = [
     PATH_TEXT_FILE,
     PATH_JSON_FILE_SCALAR,

diff --git a/tests/tests_loader/test_load_file.py b/tests/tests_loader/test_load_file.py
@@ -8,19 +8,23 @@
 from tests.paths import (
     ABS_PATH_LOADER_DIR,
     PATH_JPEG_FILE,
+    PATH_JPEG_FILE_GZ,
+    PATH_JSON_FILE_GZ,
     PATH_JSON_FILE_NESTED_OBJECT,
     PATH_JSON_FILE_OBJECT,
     PATH_TEXT_FILE,
+    PATH_TEXT_FILE_GZ,
 )
 
 pytestmark = pytest.mark.loaders
 
 # NOTE:
 # - lazy_loading option is separately tested in another test using pytester
-# - This file covers 3 types of data types the plugin handles differently:
+# - This file covers 4 types of data types the plugin handles differently:
 #   - text file (no file reader)
 #   - json file (with default file reader)
 #   - binary file
+#   - compressed files (gz, .bz2, .xz) for the above
 
 
 # Text file
@@ -132,3 +136,32 @@ def test_load_binary_file_with_id(request: FixtureRequest, data: bytes) -> None:
 def test_load_binary_file_with_marks(request: FixtureRequest, data: bytes) -> None:
     """Test @load loader with the marks option using binary file"""
     assert "foo" in {m.name for m in request.node.own_markers}
+
+
+# Compressed files
+@load("data", PATH_TEXT_FILE_GZ)
+def test_load_compressed_text_file(data: str) -> None:
+    """Test that @load with a .txt.gz file returns decompressed file data"""
+    assert isinstance(data, str)
+    assert data == (ABS_PATH_LOADER_DIR / PATH_TEXT_FILE).read_text()
+
+
+@load("data", PATH_JSON_FILE_GZ)
+def test_load_compressed_json_file(data: dict[str, Any]) -> None:
+    """Test that @load with a .json.gz file resolves to the default json.load reader transparently"""
+    assert isinstance(data, dict)
+    assert data == json.loads((ABS_PATH_LOADER_DIR / PATH_JSON_FILE_OBJECT).read_text())
+
+
+@load("data", PATH_JPEG_FILE_GZ)
+def test_load_compressed_autodetects_binary_mode(data: bytes) -> None:
+    """Test that @load with a .jpg.gz file auto-detects binary mode from decompressed content"""
+    assert isinstance(data, bytes)
+    assert data == (ABS_PATH_LOADER_DIR / PATH_JPEG_FILE).read_bytes()
+
+
+@load("data", PATH_TEXT_FILE_GZ, read_options={"mode": "rb"})
+def test_load_compressed_text_with_force_binary(data: bytes) -> None:
+    """Test that @load with a .txt.gz file in binary mode returns decompressed bytes"""
+    assert isinstance(data, bytes)
+    assert data == (ABS_PATH_LOADER_DIR / PATH_TEXT_FILE).read_bytes()
diff --git a/tests/tests_loader/test_parametrize_dir.py b/tests/tests_loader/test_parametrize_dir.py
@@ -1,11 +1,25 @@
+import json
 from pathlib import Path
+from typing import Any
 
 import pytest
 from pytest import FixtureRequest
 
 from pytest_data_loader import parametrize_dir
+from pytest_data_loader.paths import get_effective_suffix
 from pytest_data_loader.types import LoadedDataType
-from tests.paths import ABS_PATH_LOADER_DIR, IMAGE_DIR, PATH_TEXT_FILE_DIR, SOME_DIR, SOME_DIR_INNER
+from tests.paths import (
+    ABS_PATH_LOADER_DIR,
+    IMAGE_DIR,
+    PATH_COMPRESSED_FILE_DIR,
+    PATH_JPEG_FILE,
+    PATH_JSON_FILE_OBJECT,
+    PATH_TEXT_FILE,
+    PATH_TEXT_FILE_DIR,
+    PATH_YAML_FILE,
+    SOME_DIR,
+    SOME_DIR_INNER,
+)
 
 from .helper import get_parametrized_test_idx
 
@@ -118,3 +132,23 @@ def test_parametrize_dir_multi_dirs_recursive(request: FixtureRequest, data: str
     idx = get_parametrized_test_idx(request, "data")
     all_expected = ["data0", "data1", "data2", "data3", "data4", "data5", "line0\nline1\nline2"]
     assert data == all_expected[idx]
+
+
+@parametrize_dir(
+    ("file_path", "data"),
+    PATH_COMPRESSED_FILE_DIR,
+    filter=lambda p: get_effective_suffix(p) in (".txt", ".json", ".yml", ".jpg"),
+)
+def test_parametrize_dir_with_compressed_files(file_path: Path, data: Any) -> None:
+    """Test @parametrize_dir loader with compressed files in the directory"""
+    effective_suffix = get_effective_suffix(file_path)
+    if effective_suffix == ".txt":
+        assert data == (ABS_PATH_LOADER_DIR / PATH_TEXT_FILE).read_text()
+    elif effective_suffix == ".json":
+        assert data == json.loads((ABS_PATH_LOADER_DIR / PATH_JSON_FILE_OBJECT).read_text())
+    elif effective_suffix == ".yml":
+        assert data == (ABS_PATH_LOADER_DIR / PATH_YAML_FILE).read_text()
+    elif effective_suffix == ".jpg":
+        assert data == (ABS_PATH_LOADER_DIR / PATH_JPEG_FILE).read_bytes()
+    else:
+        raise NotImplementedError("Add test")
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		# For tests in Windows - Force LF for test data text files so their bytes match compressed counterparts (e.g. text.txt vs text.txt.gz)
		/data/ text=auto eol=lf
-Original file line number
+Diff line change
@@ Expand Up / @@ -27,4 +27,4 @@ services: @@
         depends_on:
           - db
     volumes:
-      pgdata:
+      pgdata: