Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file added sync/SyncTalk/.DS_Store
Binary file not shown.
61 changes: 61 additions & 0 deletions sync/SyncTalk/.ipynb_checkpoints/Dockerfile-checkpoint
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# 使用 Ubuntu 18.04 作为基础镜像
FROM ubuntu:18.04

# 设置环境变量,防止交互式安装时出现提示
ENV DEBIAN_FRONTEND=noninteractive
ENV PATH="/opt/conda/bin:$PATH"
ENV CUDA_HOME="/usr/local/cuda-11.3"
ENV LD_LIBRARY_PATH="/usr/local/cuda-11.3/lib64:$LD_LIBRARY_PATH"

# 更新系统并安装必要工具
RUN apt-get update && apt-get install -y \
wget \
curl \
git \
build-essential \
software-properties-common \
portaudio19-dev \
&& apt-get clean

# 安装 CUDA 11.3 和 cuDNN 8.2
RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-keyring_1.0-1_all.deb && \
dpkg -i cuda-keyring_1.0-1_all.deb && \
apt-get update && apt-get install -y \
cuda-11-3 \
libcudnn8=8.2.1.*-1+cuda11.3 \
libcudnn8-dev=8.2.1.*-1+cuda11.3 \
&& apt-get clean

# 安装 Miniconda
RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O /tmp/miniconda.sh && \
bash /tmp/miniconda.sh -b -p /opt/conda && \
rm /tmp/miniconda.sh && \
/opt/conda/bin/conda clean -tipsy

# 创建 Conda 环境并安装 Python 3.8.8
RUN conda create -n synctalk python=3.8.8 -y && \
conda clean -a -y

# 激活环境并安装 PyTorch 和依赖
RUN /bin/bash -c "source activate synctalk && \
pip install torch==1.12.1+cu113 torchvision==0.13.1+cu113 torchaudio==0.12.1 --extra-index-url https://download.pytorch.org/whl/cu113 && \
pip install -r /app/requirements.txt && \
pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/py38_cu113_pyt1121/download.html && \
pip install tensorflow-gpu==2.8.1"

# 复制项目文件到容器
WORKDIR /app
COPY . /app

# 激活环境并安装本地包
RUN /bin/bash -c "source activate synctalk && \
pip install ./freqencoder && \
pip install ./shencoder && \
pip install ./gridencoder && \
pip install ./raymarching"

# 确保脚本可执行
RUN chmod +x install.sh inference.sh main.sh

# 默认执行 main.sh
CMD ["/bin/bash"]
20 changes: 20 additions & 0 deletions sync/SyncTalk/.ipynb_checkpoints/inference-checkpoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#!/bin/bash

# Ensure the script exits on any error
set -e

# Define variables for the arguments
DATA_PATH="data/May"
WORKSPACE="model/trial_May"
ASR_MODEL="ave"
AUDIO_PATH="data/May/aud.wav"

# Run the Python script with the specified arguments
python main.py "$DATA_PATH" \
--workspace "$WORKSPACE" \
-O \
--test \
--test_train \
--asr_model "$ASR_MODEL" \
--portrait \
--aud "$AUDIO_PATH"
31 changes: 31 additions & 0 deletions sync/SyncTalk/.ipynb_checkpoints/requirements-checkpoint.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
torch-ema
ninja
trimesh
opencv-python
tensorboardX
numpy==1.24.4
pandas==2.0.3
tqdm
matplotlib
PyMCubes==0.1.4
rich
dearpygui
packaging
scipy
scikit-learn
transformers==4.36.0
face_alignment==1.4.1
python_speech_features
numba
resampy
pyaudio
soundfile
einops
configargparse
mediapipe
lpips
imageio-ffmpeg
onnxruntime-gpu
librosa
fvcore
iopath
61 changes: 61 additions & 0 deletions sync/SyncTalk/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# 使用 Ubuntu 18.04 作为基础镜像
FROM ubuntu:18.04

# 设置环境变量,防止交互式安装时出现提示
ENV DEBIAN_FRONTEND=noninteractive
ENV PATH="/opt/conda/bin:$PATH"
ENV CUDA_HOME="/usr/local/cuda-11.3"
ENV LD_LIBRARY_PATH="/usr/local/cuda-11.3/lib64:$LD_LIBRARY_PATH"

# 更新系统并安装必要工具
RUN apt-get update && apt-get install -y \
wget \
curl \
git \
build-essential \
software-properties-common \
portaudio19-dev \
&& apt-get clean

# 安装 CUDA 11.3 和 cuDNN 8.2
RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-keyring_1.0-1_all.deb && \
dpkg -i cuda-keyring_1.0-1_all.deb && \
apt-get update && apt-get install -y \
cuda-11-3 \
libcudnn8=8.2.1.*-1+cuda11.3 \
libcudnn8-dev=8.2.1.*-1+cuda11.3 \
&& apt-get clean

# 安装 Miniconda
RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O /tmp/miniconda.sh && \
bash /tmp/miniconda.sh -b -p /opt/conda && \
rm /tmp/miniconda.sh && \
/opt/conda/bin/conda clean -tipsy

# 创建 Conda 环境并安装 Python 3.8.8
RUN conda create -n synctalk python=3.8.8 -y && \
conda clean -a -y

# 激活环境并安装 PyTorch 和依赖
RUN /bin/bash -c "source activate synctalk && \
pip install torch==1.12.1+cu113 torchvision==0.13.1+cu113 torchaudio==0.12.1 --extra-index-url https://download.pytorch.org/whl/cu113 && \
pip install -r /app/requirements.txt && \
pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/py38_cu113_pyt1121/download.html && \
pip install tensorflow-gpu==2.8.1"

# 复制项目文件到容器
WORKDIR /app
COPY . /app

# 激活环境并安装本地包
RUN /bin/bash -c "source activate synctalk && \
pip install ./freqencoder && \
pip install ./shencoder && \
pip install ./gridencoder && \
pip install ./raymarching"

# 确保脚本可执行
RUN chmod +x install.sh inference.sh main.sh

# 默认执行 main.sh
CMD ["/bin/bash"]
13 changes: 13 additions & 0 deletions sync/SyncTalk/LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
Copyright (c) 2024 Peng Ziqiao

This work is licensed under the Creative Commons Attribution-NonCommercial 4.0 International License (CC BY-NC 4.0). To view a copy of this license, visit http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to Creative Commons, PO Box 1866, Mountain View, CA 94042, USA.

Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, and distribute the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

1. Attribution — You must give appropriate credit, provide a link to the license, and indicate if changes were made. You may do so in any reasonable manner, but not in any way that suggests the licensor endorses you or your use.

2. NonCommercial — You may not use the material for commercial purposes.

3. No additional restrictions — You may not apply legal terms or technological measures that legally restrict others from doing anything the license permits.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
115 changes: 115 additions & 0 deletions sync/SyncTalk/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
# SyncTalk: The Devil😈 is in the Synchronization for Talking Head Synthesis [CVPR 2024]
The official repository of the paper [SyncTalk: The Devil is in the Synchronization for Talking Head Synthesis](https://arxiv.org/abs/2311.17590)

<p align='center'>
<b>
<a href="https://arxiv.org/abs/2311.17590">Paper</a>
|
<a href="https://ziqiaopeng.github.io/synctalk/">Project Page</a>
|
<a href="https://github.com/ZiqiaoPeng/SyncTalk">Code</a>
</b>
</p>

Colab notebook demonstration: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1Egq0_ZK5sJAAawShxC0y4JRZQuVS2X-Z?usp=sharing)

<p align='center'>
<img src='assets/image/synctalk.png' width='1000'/>
</p>

The proposed **SyncTalk** synthesizes synchronized talking head videos, employing tri-plane hash representations to maintain subject identity. It can generate synchronized lip movements, facial expressions, and stable head poses, and restores hair details to create high-resolution videos.

## Installation

Tested on Ubuntu 18.04, Pytorch 1.12.1 and CUDA 11.3.
```bash
git clone https://github.com/ZiqiaoPeng/SyncTalk.git
cd SyncTalk
```
### Install dependency

```bash
conda create -n synctalk python==3.8.8
conda activate synctalk
pip install torch==1.12.1+cu113 torchvision==0.13.1+cu113 torchaudio==0.12.1 --extra-index-url https://download.pytorch.org/whl/cu113
pip install -r requirements.txt
pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/py38_cu113_pyt1121/download.html
pip install ./freqencoder
pip install ./shencoder
pip install ./gridencoder
pip install ./raymarching
```
If you encounter problems installing PyTorch3D, you can use the following command to install it:
```bash
python ./scripts/install_pytorch3d.py
```

## Data Preparation
Please place the [May.zip](https://drive.google.com/file/d/18Q2H612CAReFxBd9kxr-i1dD8U1AUfsV/view?usp=sharing) in the **data** folder, the [trial_may.zip](https://drive.google.com/file/d/1C2639qi9jvhRygYHwPZDGs8pun3po3W7/view?usp=sharing) in the **model** folder, and then unzip them.

## Quick Start
### Run the evaluation code
```bash
python main.py data/May --workspace model/trial_may -O --test --asr_model ave

python main.py data/May --workspace model/trial_may -O --test --asr_model ave --portrait
```
“ave” refers to our Audio Visual Encoder, “portrait” signifies pasting the generated face back onto the original image, representing higher quality.
If it runs correctly, you will get the following results.

| Setting | PSNR | LPIPS | LMD |
|--------------------------|--------|--------|-------|
| SyncTalk (w/o Portrait) | 32.201 | 0.0394 | 2.822 |
| SyncTalk (Portrait) | 37.644 | 0.0117 | 2.825 |

This is for a single subject; the paper reports the average results for multiple subjects.

### Inference with target audio
```bash
python main.py data/May --workspace model/trial_may -O --test --test_train --asr_model ave --portrait --aud ./demo/test.wav
```
Please use files with the “.wav” extension for inference, and the inference results will be saved in “model/trial_may/results/”.
## Train
```bash
# by default, we load data from disk on the fly.
# we can also preload all data to CPU/GPU for faster training, but this is very memory-hungry for large datasets.
# `--preload 0`: load from disk (default, slower).
# `--preload 1`: load to CPU (slightly slower)
# `--preload 2`: load to GPU (fast)
python main.py data/May --workspace model/trial_may -O --iters 60000 --asr_model ave
python main.py data/May --workspace model/trial_may -O --iters 100000 --finetune_lips --patch_size 64 --asr_model ave

# or you can use the script to train
sh ./scripts/train_may.sh
```

## Test
```bash
python main.py data/May --workspace model/trial_may -O --test --asr_model ave --portrait
```


## TODO
- [x] **Release Training Code.**
- [x] **Release Pre-trained Model.**
- [x] **Release Google Colab.**
- [ ] Release Preprocessing Code.



## Citation

```
@InProceedings{peng2023synctalk,
title = {SyncTalk: The Devil is in the Synchronization for Talking Head Synthesis},
author = {Ziqiao Peng and Wentao Hu and Yue Shi and Xiangyu Zhu and Xiaomei Zhang and Jun He and Hongyan Liu and Zhaoxin Fan},
booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
month = {June},
year = {2024},
}
```

## Acknowledgement
This code is developed heavily relying on [ER-NeRF](https://github.com/Fictionarry/ER-NeRF), and also [RAD-NeRF](https://github.com/ashawkey/RAD-NeRF), [GeneFace](https://github.com/yerfor/GeneFace), [DFRF](https://github.com/sstzal/DFRF), [AD-NeRF](https://github.com/YudongGuo/AD-NeRF), and [Deep3DFaceRecon_pytorch](https://github.com/sicxu/Deep3DFaceRecon_pytorch).

Thanks for these great projects.
Binary file added sync/SyncTalk/assets/image/synctalk.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file added sync/SyncTalk/demo/.DS_Store
Binary file not shown.
Binary file added sync/SyncTalk/demo/test.wav
Binary file not shown.
1 change: 1 addition & 0 deletions sync/SyncTalk/freqencoder/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .freq import FreqEncoder
41 changes: 41 additions & 0 deletions sync/SyncTalk/freqencoder/backend.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import os
from torch.utils.cpp_extension import load

_src_path = os.path.dirname(os.path.abspath(__file__))

nvcc_flags = [
'-O3', '-std=c++14',
'-U__CUDA_NO_HALF_OPERATORS__', '-U__CUDA_NO_HALF_CONVERSIONS__', '-U__CUDA_NO_HALF2_OPERATORS__',
'-use_fast_math'
]

if os.name == "posix":
c_flags = ['-O3', '-std=c++14']
elif os.name == "nt":
c_flags = ['/O2', '/std:c++17']

# find cl.exe
def find_cl_path():
import glob
for edition in ["Enterprise", "Professional", "BuildTools", "Community"]:
paths = sorted(glob.glob(r"C:\\Program Files (x86)\\Microsoft Visual Studio\\*\\%s\\VC\\Tools\\MSVC\\*\\bin\\Hostx64\\x64" % edition), reverse=True)
if paths:
return paths[0]

# If cl.exe is not on path, try to find it.
if os.system("where cl.exe >nul 2>nul") != 0:
cl_path = find_cl_path()
if cl_path is None:
raise RuntimeError("Could not locate a supported Microsoft Visual C++ installation")
os.environ["PATH"] += ";" + cl_path

_backend = load(name='_freqencoder',
extra_cflags=c_flags,
extra_cuda_cflags=nvcc_flags,
sources=[os.path.join(_src_path, 'src', f) for f in [
'freqencoder.cu',
'bindings.cpp',
]],
)

__all__ = ['_backend']
Binary file not shown.
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# ninja log v5
0 26782 1734802515297543445 /home/pod/shared-nvme/SyncTalk/freqencoder/build/temp.linux-x86_64-cpython-38/home/pod/shared-nvme/SyncTalk/freqencoder/src/bindings.o 44f86bd32c0d33f9
1 50546 1734802539069452910 /home/pod/shared-nvme/SyncTalk/freqencoder/build/temp.linux-x86_64-cpython-38/home/pod/shared-nvme/SyncTalk/freqencoder/src/freqencoder.o 5409e7e776086ceb
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
ninja_required_version = 1.3
cxx = c++
nvcc = /usr/local/cuda/bin/nvcc

cflags = -pthread -B /home/pod/shared-nvme/conda/envs/synctalk/compiler_compat -Wl,--sysroot=/ -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -Wstrict-prototypes -fPIC -I/home/pod/shared-nvme/conda/envs/synctalk/lib/python3.8/site-packages/torch/include -I/home/pod/shared-nvme/conda/envs/synctalk/lib/python3.8/site-packages/torch/include/torch/csrc/api/include -I/home/pod/shared-nvme/conda/envs/synctalk/lib/python3.8/site-packages/torch/include/TH -I/home/pod/shared-nvme/conda/envs/synctalk/lib/python3.8/site-packages/torch/include/THC -I/usr/local/cuda/include -I/home/pod/shared-nvme/conda/envs/synctalk/include/python3.8 -c
post_cflags = -O3 -std=c++14 -DTORCH_API_INCLUDE_EXTENSION_H '-DPYBIND11_COMPILER_TYPE="_gcc"' '-DPYBIND11_STDLIB="_libstdcpp"' '-DPYBIND11_BUILD_ABI="_cxxabi1011"' -DTORCH_EXTENSION_NAME=_freqencoder -D_GLIBCXX_USE_CXX11_ABI=0
cuda_cflags = -I/home/pod/shared-nvme/conda/envs/synctalk/lib/python3.8/site-packages/torch/include -I/home/pod/shared-nvme/conda/envs/synctalk/lib/python3.8/site-packages/torch/include/torch/csrc/api/include -I/home/pod/shared-nvme/conda/envs/synctalk/lib/python3.8/site-packages/torch/include/TH -I/home/pod/shared-nvme/conda/envs/synctalk/lib/python3.8/site-packages/torch/include/THC -I/usr/local/cuda/include -I/home/pod/shared-nvme/conda/envs/synctalk/include/python3.8 -c
cuda_post_cflags = -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr --compiler-options ''"'"'-fPIC'"'"'' -O3 -std=c++14 -U__CUDA_NO_HALF_OPERATORS__ -U__CUDA_NO_HALF_CONVERSIONS__ -U__CUDA_NO_HALF2_OPERATORS__ -use_fast_math -DTORCH_API_INCLUDE_EXTENSION_H '-DPYBIND11_COMPILER_TYPE="_gcc"' '-DPYBIND11_STDLIB="_libstdcpp"' '-DPYBIND11_BUILD_ABI="_cxxabi1011"' -DTORCH_EXTENSION_NAME=_freqencoder -D_GLIBCXX_USE_CXX11_ABI=0 -gencode=arch=compute_86,code=compute_86 -gencode=arch=compute_86,code=sm_86
ldflags =

rule compile
command = $cxx -MMD -MF $out.d $cflags -c $in -o $out $post_cflags
depfile = $out.d
deps = gcc

rule cuda_compile
depfile = $out.d
deps = gcc
command = $nvcc $cuda_cflags -c $in -o $out $cuda_post_cflags



build /home/pod/shared-nvme/SyncTalk/freqencoder/build/temp.linux-x86_64-cpython-38/home/pod/shared-nvme/SyncTalk/freqencoder/src/bindings.o: compile /home/pod/shared-nvme/SyncTalk/freqencoder/src/bindings.cpp
build /home/pod/shared-nvme/SyncTalk/freqencoder/build/temp.linux-x86_64-cpython-38/home/pod/shared-nvme/SyncTalk/freqencoder/src/freqencoder.o: cuda_compile /home/pod/shared-nvme/SyncTalk/freqencoder/src/freqencoder.cu





Binary file not shown.
Binary file not shown.
Loading