Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 33 additions & 0 deletions Dockerfile-dpv-branch
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
FROM mambaorg/micromamba:latest

USER root

# Install git and other dependencies
RUN apt-get update && apt-get install -y git nano curl wget && apt-get clean && rm -rf /var/lib/apt/lists/*

# Clone llm-foundry repo and set up environment
RUN git clone -b llama-modeling-dpv https://github.com/LocalResearchGroup/llm-foundry.git /llm-foundry && \
cd /llm-foundry && \
micromamba create -n llm-foundry python=3.12 uv cuda -c nvidia/label/12.4.1 -c conda-forge && \
export UV_PROJECT_ENVIRONMENT=/opt/conda/envs/llm-foundry && \
micromamba run -n llm-foundry uv python pin 3.12 && \
micromamba run -n llm-foundry uv sync --dev --extra gpu && \
micromamba run -n llm-foundry uv sync --dev --extra gpu --extra flash --no-cache

ENV UV_PROJECT_ENVIRONMENT=/opt/conda/envs/llm-foundry
ENV CONDA_DEFAULT_ENV=llm-foundry
ENV PATH=/opt/conda/envs/llm-foundry/bin:$PATH

WORKDIR /llm-foundry

# Initialize conda in bash and activate environment by default
RUN echo "eval \"\$(micromamba shell hook --shell bash)\"" >> ~/.bashrc && \
echo "micromamba activate llm-foundry" >> ~/.bashrc

# Open port to view Aim dashboard live from the container (optional) - Not related to aim remote upload server.
EXPOSE 43800

# Default shell with environment activated
CMD ["/bin/bash"]

#Build: 2025-04-06-123410 #<-- Change this number each time
138 changes: 138 additions & 0 deletions llmfoundry/command_utils/eval.py
Original file line number Diff line number Diff line change
Expand Up @@ -566,3 +566,141 @@ def eval_from_yaml(
yaml_cfg = om.merge(yaml_cfg, cli_cfg)
assert isinstance(yaml_cfg, DictConfig)
return evaluate(yaml_cfg)


def convert_peft_adapter_format(model_dir: str) -> None:
"""Convert PEFT adapter from safetensors to bin format to avoid device metadata issues.

This function performs three operations:
1. Converts the adapter weights from safetensors to PyTorch .bin format
2. Renames the original safetensors file to .safetensors.bak
3. Updates the adapter_config.json to reference .bin files instead of .safetensors

Args:
model_dir: Full path to the model directory containing PEFT adapter files.
This should be the directory containing:
- adapter_config.json
- adapter_model.safetensors
Example: '/model-checkpoints/llama3-1b-lora-20250420_180800'

Returns:
None

Side Effects:
- Creates adapter_model.bin in model_dir
- Renames adapter_model.safetensors to adapter_model.safetensors.bak
- Modifies adapter_config.json to reference .bin files
"""
import torch
import json
import os

# Paths for the adapter files
adapter_path = os.path.join(model_dir, "adapter_model.safetensors")
bin_adapter_path = os.path.join(model_dir, "adapter_model.bin")
config_path = os.path.join(model_dir, "adapter_config.json")

try:
# Load and convert if needed
if os.path.exists(adapter_path) and not os.path.exists(bin_adapter_path):
# Load safetensors adapter with explicit CPU device
from safetensors.torch import load_file
weights = load_file(adapter_path, device="cpu")

# Save as PyTorch bin format
torch.save(weights, bin_adapter_path)
print(f"Converted adapter to .bin format: {bin_adapter_path}")

# Rename/move safetensors file to force bin usage
if os.path.exists(adapter_path):
backup_path = os.path.join(model_dir, "adapter_model.safetensors.bak")
os.rename(adapter_path, backup_path)
print(f"Moved safetensors file to {backup_path} to force bin usage")

# Update config to reference .bin file
if os.path.exists(config_path):
with open(config_path, 'r') as f:
config = json.load(f)

# Update config to use bin file
weight_map = config.get("weight_map", {})
for key in weight_map:
if "safetensors" in weight_map[key]:
weight_map[key] = weight_map[key].replace("safetensors", "bin")

# Also update model_type if needed
if "safetensors" in config.get("model_type", ""):
config["model_type"] = config["model_type"].replace("safetensors", "bin")

with open(config_path, 'w') as f:
json.dump(config, f, indent=2)

print(f"Updated adapter config to use .bin format")
except Exception as e:
print(f"Failed to convert adapter format: {e}")


def restore_safetensors_after_eval(model_dir: str) -> None:
"""Restore safetensor files to their original state after evaluation.

This function reverses the changes made by convert_peft_adapter_format():
1. Restores the original adapter_model.safetensors from .bak file if it exists
2. Updates the adapter_config.json to reference .safetensors again
3. Keeps the .bin file in place for potential future use

Args:
model_dir: Full path to the model directory containing PEFT adapter files.
This should be the directory containing:
- adapter_config.json
- adapter_model.bin
- adapter_model.safetensors.bak (created by convert_peft_adapter_format)
Example: '/model-checkpoints/llama3-1b-lora-20250420_180800'

Returns:
None

Side Effects:
- Restores adapter_model.safetensors from the .bak file if it exists
- Modifies adapter_config.json to reference .safetensors files
- Keeps adapter_model.bin for potential future use
"""
import os
import json

# Paths for the adapter files
backup_path = os.path.join(model_dir, "adapter_model.safetensors.bak")
adapter_path = os.path.join(model_dir, "adapter_model.safetensors")
config_path = os.path.join(model_dir, "adapter_config.json")

# Only restore if backup exists
if os.path.exists(backup_path):
if os.path.exists(adapter_path):
print(f"Safetensors file already exists at {adapter_path}, skipping restore")
else:
os.rename(backup_path, adapter_path)
print(f"Restored safetensors file from backup")

# Update config only if needed
if os.path.exists(config_path):
with open(config_path, 'r') as f:
config = json.load(f)

# Check if config needs updating
needs_update = False
weight_map = config.get("weight_map", {})

for key in weight_map:
if "bin" in weight_map[key]:
weight_map[key] = weight_map[key].replace("bin", "safetensors")
needs_update = True

if "bin" in config.get("model_type", ""):
config["model_type"] = config["model_type"].replace("bin", "safetensors")
needs_update = True

if needs_update:
with open(config_path, 'w') as f:
json.dump(config, f, indent=2)
print(f"Updated adapter config to use safetensors format")
else:
print(f"No backup found at {backup_path}, nothing to restore")
83 changes: 83 additions & 0 deletions llmfoundry/models/llama/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
# Training Custom Llama Models

## Customizing Training

### YAML file
To customize the training process, modify the YAML configuration file specified by `TRAIN_YAML`. The default is `scripts/train/yamls/llama/llama3-1b-lora2.yaml`.

### train_with_custom_llama.py

train_with_custom_llama.py serves as the entry point for training with our custom LLaMA implementation. It handles the configuration loading from YAML files, registers our CustomLlamaModel with the model registry, and orchestrates the training process. The script manages critical setup tasks including HuggingFace authentication, dataset path configuration, and preparing model parameters before delegating to the training framework. It can be customized through command-line arguments or environment variables, making it flexible for different training scenarios.

### Weight Loading in CustomLlamaModel

The _copy_weights_from_hf_llama method handles weight transfer from standard Hugging Face models to our custom implementation. It first loads a Hugging Face model via from_pretrained() to serve as a source, then systematically copies weights component by component including embeddings, transformer layers, normalization layers and output head. The method explicitly tracks copy progress, reporting both successful transfers and any uninitialized weights to ensure model integrity. This direct weight mapping approach enables our custom implementation to precisely match pretrained model behavior while gaining the performance benefits of our optimized architecture.


### CustomLlamaModel Initialization and Adapter Pattern

CustomLlamaModel follows a two-layer architecture that separates model implementation from framework integration. The outer class inherits from HuggingFaceModel, managing compatibility with the training framework, while the inner model (created via _initialize_model_from_config) implements the actual transformer architecture with optimized components. During initialization, the class loads a pretrained model, creates a corresponding optimized implementation, then systematically transfers weights via _copy_weights_from_hf_llama. This adapter pattern allows for performance optimizations in the inner model while maintaining full compatibility with HuggingFace's ecosystem, and includes built-in support for PEFT adapters that can be attached to the initialized model.


### Dual Forward Methods in the Adapter Pattern

The CustomLlamaModel implements two distinct forward methods that operate in tandem. The inner model's forward method (bound to the model instance using forward.__get__) contains the raw computational logic for the transformer architecture, handling token embeddings, attention operations, and feed-forward networks. The outer CustomLlamaModel's forward method serves as an adapter interface, filtering input arguments to match inner model requirements, managing state tracking, and implementing training-specific logic like loss calculation via the fused loss function. This separation allows the inner model to remain focused on efficient computation while the outer wrapper handles framework integration, creating a clean division of responsibilities that simplifies maintenance and optimization.

### Model Registration and Framework Integration

The register_custom_llama_model() function in register.py integrates our custom model implementation with the training framework. It adds the CustomLlamaModel class to the framework's model registry under the key "hf_causal_lm", allowing our model to be used wherever HuggingFace causal language models are supported. This registration happens explicitly in both train_with_custom_llama.py before starting training and in local_llama_training.py's evaluate_model function before evaluation begins. Without this registration step, the framework would use a standard implementation instead of our optimized version with custom components.

### local_llama_training.py

The local script adapts the Modal cloud deployment approach for single-machine environments while preserving the core workflow. Key differences include file path handling (local directories vs Modal Volumes), environment setup (local Python interpreter vs containerized environment), and execution model (synchronous function calls vs Modal's distributed functions). The local script adds more comprehensive logging, path validation, and error handling to manage filesystem interactions that Modal handles automatically. While Modal's script leverages cloud-specific features like network tunneling for Aim visualization and GPU provisioning via decorators, the local version provides equivalent functionality through direct subprocess calls and environment variable configuration. The way custom model integration happens should not change.

This is a local version of the LLM training script that runs directly on your GPUs without using Modal. It's designed to work with the LLM Foundry framework for training and fine-tuning language models.

## Prerequisites

**Follow the steps to install llmfoundry**

## Setup

1. **Clone the LLM Foundry repository**:
```bash
git clone https://github.com/mosaicml/llm-foundry.git
cd llm-foundry
```

2. **Install dependencies**:
```bash
pip install -e .
```

3. **Set up your HuggingFace token**:
```bash
export HF_TOKEN=your_token_here
```

## Usage

**Run the full training pipeline**:
```bash
python local_llama_training.py
```

or for multi-GPU training (with number of GPUs controlled by nproc_per_node parameter), run

```bash
NODE_RANK=0 python -m torch.distributed.run --nproc_per_node=2 local_llama_training.py
```
## Directory Structure

The script creates the following directory structure:

```
./
├── datasets/ # Dataset storage
│ └── c4_small/ # C4 dataset
├── model-checkpoints/ # Model checkpoints
├── runs/ # Training run outputs
│ └── model-name-timestamp/ # Individual run
└── local_llama_training.py # This script
```

37 changes: 37 additions & 0 deletions llmfoundry/models/llama/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
"""Llama model package."""

# from .model import LlamaForCausalLM
# from .config import LlamaConfig
# from .attention import LlamaAttention
# from .mlp import LlamaMLP
# from .decoder import LlamaDecoderLayer
# from .rms_norm import LlamaRMSNorm

# __all__ = [
# 'LlamaForCausalLM',
# 'LlamaConfig',
# 'LlamaAttention',
# 'LlamaMLP',
# 'LlamaDecoderLayer',
# 'LlamaRMSNorm',
# ]

# Import core components
from .config import LlamaConfig
from .attention import LlamaAttention
from .mlp import LlamaMLP
from .decoder import LlamaDecoderLayer
from .rms_norm import LlamaRMSNorm
from .register import get_custom_llama_model, register_custom_llama_model
from .model import CustomLlamaModel

__all__ = [
'LlamaConfig',
'LlamaAttention',
'LlamaMLP',
'LlamaDecoderLayer',
'LlamaRMSNorm',
'get_custom_llama_model',
'register_custom_llama_model',
'CustomLlamaModel',
]
Loading