This is the official implementation of the paper Rethinking Direct Preference Optimization in Diffusion Models. This repository is adapted from the Diffusion-DPO official implementation.
pip install -r requirements.txt
bash launchers/run_sd.sh
-
--beta_dpo(implicit) KL-divergence parameter beta for DPO -
REF_UPDATE_STEPUpdate period for the reference model -
MONITOR_THRESHOLDMonitoring threshold for the KL divergence between the reference model and the pre-trained model -
--timestep_gammaThe timestep sampling distribution parameter$\gamma$ -
--reward_scale_schedulingTo use the reward scale scheduling -
--alphaThe hyperparameter for the reward scale scheduling
@misc{kang2025rethinkingdirectpreferenceoptimization,
title={Rethinking Direct Preference Optimization in Diffusion Models},
author={Junyong Kang and Seohyun Lim and Kyungjune Baek and Hyunjung Shim},
year={2025},
eprint={2505.18736},
archivePrefix={arXiv},
primaryClass={cs.CV},
url={https://arxiv.org/abs/2505.18736},
}
