Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 13 additions & 1 deletion lightx2v/models/networks/base_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,11 +143,23 @@ def _init_infer_class(self):

@staticmethod
def _read_safetensors_metadata(file_path):
"""Read tensor metadata (names, shapes, dtypes) from safetensors file header without loading data."""
"""Read tensor metadata (names, shapes, dtypes) from safetensors file header without loading data.

Supports two kinds of safetensors files:
1. Full model files: metadata is extracted from normal tensor header entries.
2. Lightweight dummy-meta files (exported by tools/convert/export_dummy_meta.py):
all tensor metadata is stored in ``__metadata__._tensor_meta`` as a JSON string,
with ``__metadata__._is_dummy_meta == "true"`` as the marker.
"""
with open(file_path, "rb") as f:
header_size = struct.unpack("<Q", f.read(8))[0]
header_json = f.read(header_size).decode("utf-8")
header = json.loads(header_json)

metadata = header.get("__metadata__", {})
if metadata.get("_is_dummy_meta") == "true" and "_tensor_meta" in metadata:
return json.loads(metadata["_tensor_meta"])
Comment on lines +160 to +161
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

如果 _is_dummy_meta"true"_tensor_meta 缺失,当前逻辑会跳过处理并返回空字典。这可能导致后续模型初始化时因缺少权重而报错,且错误信息难以定位。建议在此处增加明确的错误检查。

Suggested change
if metadata.get("_is_dummy_meta") == "true" and "_tensor_meta" in metadata:
return json.loads(metadata["_tensor_meta"])
if metadata.get("_is_dummy_meta") == "true":
if "_tensor_meta" not in metadata:
raise ValueError(f"Dummy meta file {file_path} is missing '_tensor_meta' in __metadata__")
return json.loads(metadata["_tensor_meta"])


tensors = {}
for key, info in header.items():
if key == "__metadata__":
Expand Down
2 changes: 1 addition & 1 deletion scripts/seko_talk/run_seko_talk_01_base_dummy.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#!/bin/bash

lightx2v_path=
model_path=/data/temp/SekoTalk-v2.5-bf16-step4
model_path=/data/temp/SekoTalk-v2.5-bf16-step4-dummy

export CUDA_VISIBLE_DEVICES=0

Expand Down
114 changes: 114 additions & 0 deletions tools/convert/export_dummy_meta.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
"""Export lightweight dummy-meta safetensors files from full model safetensors.

The output files contain ONLY tensor metadata (key, shape, dtype) in the
``__metadata__`` JSON header, with zero tensor data. They are typically a few
KB regardless of the original model size, and can be used as a drop-in
replacement when ``dummy_model: true`` is set in the config.

Usage examples
--------------
# Export a single file (output next to input with _dummy_meta suffix):
python tools/convert/export_dummy_meta.py /data/model/model.safetensors

# Export a single file to a specific output path:
python tools/convert/export_dummy_meta.py /data/model/model.safetensors -o /data/dummy/model.safetensors

# Export all *.safetensors in a directory (output to a separate directory):
python tools/convert/export_dummy_meta.py /data/model/ -o /data/model_dummy_meta/
"""

import argparse
import glob
import json
import os
import struct
import sys


def read_tensor_metadata(file_path: str) -> dict:
"""Read tensor metadata from a full safetensors file header."""
with open(file_path, "rb") as f:
header_size = struct.unpack("<Q", f.read(8))[0]
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

在读取 8 字节的文件头大小之前,建议检查读取到的字节长度。如果文件损坏或过小,直接解包会抛出 struct.error。增加长度检查可以提供更友好的错误提示。

Suggested change
header_size = struct.unpack("<Q", f.read(8))[0]
header_bytes_8 = f.read(8)
if len(header_bytes_8) < 8:
raise ValueError(f"Invalid safetensors file (too small): {file_path}")
header_size = struct.unpack("<Q", header_bytes_8)[0]

header_json = f.read(header_size).decode("utf-8")
header = json.loads(header_json)

tensor_meta = {}
for key, info in header.items():
if key == "__metadata__":
continue
tensor_meta[key] = {"shape": info["shape"], "dtype": info["dtype"]}
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

直接访问 info["shape"]info["dtype"] 在处理非标准或已损坏的 safetensors 文件时可能会抛出 KeyError。建议增加键值存在性检查。

Suggested change
tensor_meta[key] = {"shape": info["shape"], "dtype": info["dtype"]}
if "shape" in info and "dtype" in info:
tensor_meta[key] = {"shape": info["shape"], "dtype": info["dtype"]}

return tensor_meta


def write_dummy_meta_safetensors(tensor_meta: dict, output_path: str, source_filename: str = ""):
"""Write a lightweight safetensors file that stores only tensor metadata."""
header = {
"__metadata__": {
"_is_dummy_meta": "true",
"_tensor_meta": json.dumps(tensor_meta, separators=(",", ":")),
"_source_file": source_filename,
}
}
header_bytes = json.dumps(header, separators=(",", ":")).encode("utf-8")
os.makedirs(os.path.dirname(output_path) or ".", exist_ok=True)
with open(output_path, "wb") as f:
f.write(struct.pack("<Q", len(header_bytes)))
f.write(header_bytes)


def export_single_file(input_path: str, output_path: str):
"""Export one safetensors file to its dummy-meta counterpart."""
tensor_meta = read_tensor_metadata(input_path)
write_dummy_meta_safetensors(tensor_meta, output_path, source_filename=os.path.basename(input_path))

input_size = os.path.getsize(input_path)
output_size = os.path.getsize(output_path)
print(f" {os.path.basename(input_path)}: {input_size / 1024 / 1024:.1f} MB -> {output_size / 1024:.1f} KB ({len(tensor_meta)} tensors)")


def main():
parser = argparse.ArgumentParser(description="Export lightweight dummy-meta safetensors files from full model safetensors.")
parser.add_argument(
"input",
help="Path to a safetensors file or a directory containing *.safetensors files.",
)
parser.add_argument(
"-o",
"--output",
default=None,
help=("Output path. For single-file input: output file path (default: input_dummy_meta.safetensors next to input). For directory input: output directory (default: {input}_dummy_meta/)."),
)
args = parser.parse_args()

if os.path.isdir(args.input):
safetensors_files = sorted(glob.glob(os.path.join(args.input, "*.safetensors")))
if not safetensors_files:
print(f"No *.safetensors files found in {args.input}", file=sys.stderr)
sys.exit(1)

output_dir = args.output or (args.input.rstrip("/") + "_dummy_meta")
os.makedirs(output_dir, exist_ok=True)
print(f"Exporting {len(safetensors_files)} files from {args.input} -> {output_dir}")

for sf in safetensors_files:
out_path = os.path.join(output_dir, os.path.basename(sf))
export_single_file(sf, out_path)
else:
if not args.input.endswith(".safetensors"):
print(f"Input file must be a .safetensors file: {args.input}", file=sys.stderr)
sys.exit(1)

if args.output:
output_path = args.output
else:
stem = args.input.rsplit(".safetensors", 1)[0]
output_path = stem + "_dummy_meta.safetensors"

print(f"Exporting {args.input} -> {output_path}")
export_single_file(args.input, output_path)

print("Done.")


if __name__ == "__main__":
main()
9 changes: 9 additions & 0 deletions tools/convert/readme_zh.md
Original file line number Diff line number Diff line change
Expand Up @@ -385,3 +385,12 @@ python converter.py \
--output_name merged_model \
--single_file
```


### 5. dit权重头导出
#### 5.1 safetensors meta → dump_\txt
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

标题中的 dump_\txt 描述不准确,且包含转义字符。该工具实际生成的是轻量级的 .safetensors 权重头文件,而非文本文件。建议修改为更清晰的描述。

Suggested change
#### 5.1 safetensors meta → dump_\txt
#### 5.1 safetensors meta → dummy safetensors

```bash
python tools/convert/export_dummy_meta.py \
/data/temp/SekoTalk-v2.5-bf16-step4/ \
-o /data/temp/SekoTalk-v2.5-bf16-step4-dummy
```
Loading