Academic-Hammer · Eckesaibo · Dec 21, 2024 · Dec 21, 2024 · Dec 22, 2024 · Dec 22, 2024
diff --git a/sync/SyncTalk/.DS_Store b/sync/SyncTalk/.DS_Store
diff --git a/sync/SyncTalk/.ipynb_checkpoints/Dockerfile-checkpoint b/sync/SyncTalk/.ipynb_checkpoints/Dockerfile-checkpoint
@@ -0,0 +1,61 @@
+# 使用 Ubuntu 18.04 作为基础镜像
+FROM ubuntu:18.04
+
+# 设置环境变量，防止交互式安装时出现提示
+ENV DEBIAN_FRONTEND=noninteractive
+ENV PATH="/opt/conda/bin:$PATH"
+ENV CUDA_HOME="/usr/local/cuda-11.3"
+ENV LD_LIBRARY_PATH="/usr/local/cuda-11.3/lib64:$LD_LIBRARY_PATH"
+
+# 更新系统并安装必要工具
+RUN apt-get update && apt-get install -y \
+    wget \
+    curl \
+    git \
+    build-essential \
+    software-properties-common \
+    portaudio19-dev \
+    && apt-get clean
+
+# 安装 CUDA 11.3 和 cuDNN 8.2
+RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-keyring_1.0-1_all.deb && \
+    dpkg -i cuda-keyring_1.0-1_all.deb && \
+    apt-get update && apt-get install -y \
+    cuda-11-3 \
+    libcudnn8=8.2.1.*-1+cuda11.3 \
+    libcudnn8-dev=8.2.1.*-1+cuda11.3 \
+    && apt-get clean
+
+# 安装 Miniconda
+RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O /tmp/miniconda.sh && \
+    bash /tmp/miniconda.sh -b -p /opt/conda && \
+    rm /tmp/miniconda.sh && \
+    /opt/conda/bin/conda clean -tipsy
+
+# 创建 Conda 环境并安装 Python 3.8.8
+RUN conda create -n synctalk python=3.8.8 -y && \
+    conda clean -a -y
+
+# 激活环境并安装 PyTorch 和依赖
+RUN /bin/bash -c "source activate synctalk && \
+    pip install torch==1.12.1+cu113 torchvision==0.13.1+cu113 torchaudio==0.12.1 --extra-index-url https://download.pytorch.org/whl/cu113 && \
+    pip install -r /app/requirements.txt && \
+    pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/py38_cu113_pyt1121/download.html && \
+    pip install tensorflow-gpu==2.8.1"
+
+# 复制项目文件到容器
+WORKDIR /app
+COPY . /app
+
+# 激活环境并安装本地包
+RUN /bin/bash -c "source activate synctalk && \
+    pip install ./freqencoder && \
+    pip install ./shencoder && \
+    pip install ./gridencoder && \
+    pip install ./raymarching"
+
+# 确保脚本可执行
+RUN chmod +x install.sh inference.sh main.sh
+
+# 默认执行 main.sh
+CMD ["/bin/bash"]
diff --git a/sync/SyncTalk/.ipynb_checkpoints/inference-checkpoint.sh b/sync/SyncTalk/.ipynb_checkpoints/inference-checkpoint.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+
+# Ensure the script exits on any error
+set -e
+
+# Define variables for the arguments
+DATA_PATH="data/May"
+WORKSPACE="model/trial_May"
+ASR_MODEL="ave"
+AUDIO_PATH="data/May/aud.wav"
+
+# Run the Python script with the specified arguments
+python main.py "$DATA_PATH" \
+  --workspace "$WORKSPACE" \
+  -O \
+  --test \
+  --test_train \
+  --asr_model "$ASR_MODEL" \
+  --portrait \
+  --aud "$AUDIO_PATH"
diff --git a/sync/SyncTalk/.ipynb_checkpoints/requirements-checkpoint.txt b/sync/SyncTalk/.ipynb_checkpoints/requirements-checkpoint.txt
@@ -0,0 +1,31 @@
+torch-ema
+ninja
+trimesh
+opencv-python
+tensorboardX
+numpy==1.24.4
+pandas==2.0.3
+tqdm
+matplotlib
+PyMCubes==0.1.4
+rich
+dearpygui
+packaging
+scipy
+scikit-learn
+transformers==4.36.0
+face_alignment==1.4.1
+python_speech_features
+numba
+resampy
+pyaudio
+soundfile
+einops
+configargparse
+mediapipe
+lpips
+imageio-ffmpeg
+onnxruntime-gpu
+librosa
+fvcore
+iopath
diff --git a/sync/SyncTalk/Dockerfile b/sync/SyncTalk/Dockerfile
@@ -0,0 +1,61 @@
+# 使用 Ubuntu 18.04 作为基础镜像
+FROM ubuntu:18.04
+
+# 设置环境变量，防止交互式安装时出现提示
+ENV DEBIAN_FRONTEND=noninteractive
+ENV PATH="/opt/conda/bin:$PATH"
+ENV CUDA_HOME="/usr/local/cuda-11.3"
+ENV LD_LIBRARY_PATH="/usr/local/cuda-11.3/lib64:$LD_LIBRARY_PATH"
+
+# 更新系统并安装必要工具
+RUN apt-get update && apt-get install -y \
+    wget \
+    curl \
+    git \
+    build-essential \
+    software-properties-common \
+    portaudio19-dev \
+    && apt-get clean
+
+# 安装 CUDA 11.3 和 cuDNN 8.2
+RUN wget https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-keyring_1.0-1_all.deb && \
+    dpkg -i cuda-keyring_1.0-1_all.deb && \
+    apt-get update && apt-get install -y \
+    cuda-11-3 \
+    libcudnn8=8.2.1.*-1+cuda11.3 \
+    libcudnn8-dev=8.2.1.*-1+cuda11.3 \
+    && apt-get clean
+
+# 安装 Miniconda
+RUN wget https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O /tmp/miniconda.sh && \
+    bash /tmp/miniconda.sh -b -p /opt/conda && \
+    rm /tmp/miniconda.sh && \
+    /opt/conda/bin/conda clean -tipsy
+
+# 创建 Conda 环境并安装 Python 3.8.8
+RUN conda create -n synctalk python=3.8.8 -y && \
+    conda clean -a -y
+
+# 激活环境并安装 PyTorch 和依赖
+RUN /bin/bash -c "source activate synctalk && \
+    pip install torch==1.12.1+cu113 torchvision==0.13.1+cu113 torchaudio==0.12.1 --extra-index-url https://download.pytorch.org/whl/cu113 && \
+    pip install -r /app/requirements.txt && \
+    pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/py38_cu113_pyt1121/download.html && \
+    pip install tensorflow-gpu==2.8.1"
+
+# 复制项目文件到容器
+WORKDIR /app
+COPY . /app
+
+# 激活环境并安装本地包
+RUN /bin/bash -c "source activate synctalk && \
+    pip install ./freqencoder && \
+    pip install ./shencoder && \
+    pip install ./gridencoder && \
+    pip install ./raymarching"
+
+# 确保脚本可执行
+RUN chmod +x install.sh inference.sh main.sh
+
+# 默认执行 main.sh
+CMD ["/bin/bash"]
diff --git a/sync/SyncTalk/LICENSE b/sync/SyncTalk/LICENSE
@@ -0,0 +1,13 @@
+Copyright (c) 2024 Peng Ziqiao
+
+This work is licensed under the Creative Commons Attribution-NonCommercial 4.0 International License (CC BY-NC 4.0). To view a copy of this license, visit http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to Creative Commons, PO Box 1866, Mountain View, CA 94042, USA.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, and distribute the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+
+1. Attribution — You must give appropriate credit, provide a link to the license, and indicate if changes were made. You may do so in any reasonable manner, but not in any way that suggests the licensor endorses you or your use.
+
+2. NonCommercial — You may not use the material for commercial purposes.
+
+3. No additional restrictions — You may not apply legal terms or technological measures that legally restrict others from doing anything the license permits.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
diff --git a/sync/SyncTalk/README.md b/sync/SyncTalk/README.md
@@ -0,0 +1,115 @@
+# SyncTalk: The Devil😈 is in the Synchronization for Talking Head Synthesis [CVPR 2024]
+The official repository of the paper [SyncTalk: The Devil is in the Synchronization for Talking Head Synthesis](https://arxiv.org/abs/2311.17590)
+
+<p align='center'>
+  <b>
+    <a href="https://arxiv.org/abs/2311.17590">Paper</a>
+    | 
+    <a href="https://ziqiaopeng.github.io/synctalk/">Project Page</a>
+    |
+    <a href="https://github.com/ZiqiaoPeng/SyncTalk">Code</a> 
+  </b>
+</p> 
+
+Colab notebook demonstration: [![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1Egq0_ZK5sJAAawShxC0y4JRZQuVS2X-Z?usp=sharing)
+
+  <p align='center'>  
+    <img src='assets/image/synctalk.png' width='1000'/>
+  </p>
+
+  The proposed **SyncTalk** synthesizes synchronized talking head videos, employing tri-plane hash representations to maintain subject identity. It can generate synchronized lip movements, facial expressions, and stable head poses, and restores hair details to create high-resolution videos.
+
+## Installation
+
+Tested on Ubuntu 18.04, Pytorch 1.12.1 and CUDA 11.3.
+```bash
+git clone https://github.com/ZiqiaoPeng/SyncTalk.git
+cd SyncTalk
+```
+### Install dependency
+
+```bash
+conda create -n synctalk python==3.8.8
+conda activate synctalk
+pip install torch==1.12.1+cu113 torchvision==0.13.1+cu113 torchaudio==0.12.1 --extra-index-url https://download.pytorch.org/whl/cu113
+pip install -r requirements.txt
+pip install --no-index --no-cache-dir pytorch3d -f https://dl.fbaipublicfiles.com/pytorch3d/packaging/wheels/py38_cu113_pyt1121/download.html
+pip install ./freqencoder
+pip install ./shencoder
+pip install ./gridencoder
+pip install ./raymarching
+```
+If you encounter problems installing PyTorch3D, you can use the following command to install it:
+```bash
+python ./scripts/install_pytorch3d.py
+```
+
+## Data Preparation
+Please place the [May.zip](https://drive.google.com/file/d/18Q2H612CAReFxBd9kxr-i1dD8U1AUfsV/view?usp=sharing) in the **data** folder, the [trial_may.zip](https://drive.google.com/file/d/1C2639qi9jvhRygYHwPZDGs8pun3po3W7/view?usp=sharing) in the **model** folder, and then unzip them.
+
+## Quick Start
+### Run the evaluation code
+```bash
+python main.py data/May --workspace model/trial_may -O --test --asr_model ave
+
+python main.py data/May --workspace model/trial_may -O --test --asr_model ave --portrait
+```
+“ave” refers to our Audio Visual Encoder, “portrait” signifies pasting the generated face back onto the original image, representing higher quality.
+If it runs correctly, you will get the following results.
+
+| Setting                  | PSNR   | LPIPS  | LMD   |
+|--------------------------|--------|--------|-------|
+| SyncTalk (w/o Portrait)  | 32.201 | 0.0394 | 2.822 |
+| SyncTalk (Portrait)      | 37.644 | 0.0117 | 2.825 |
+
+This is for a single subject; the paper reports the average results for multiple subjects.
+
+### Inference with target audio
+```bash
+python main.py data/May --workspace model/trial_may -O --test --test_train --asr_model ave --portrait --aud ./demo/test.wav
+```
+Please use files with the “.wav” extension for inference, and the inference results will be saved in “model/trial_may/results/”.
+## Train
+```bash
+# by default, we load data from disk on the fly.
+# we can also preload all data to CPU/GPU for faster training, but this is very memory-hungry for large datasets.
+# `--preload 0`: load from disk (default, slower).
+# `--preload 1`: load to CPU (slightly slower)
+# `--preload 2`: load to GPU (fast)
+python main.py data/May --workspace model/trial_may -O --iters 60000 --asr_model ave
+python main.py data/May --workspace model/trial_may -O --iters 100000 --finetune_lips --patch_size 64 --asr_model ave
+
+# or you can use the script to train
+sh ./scripts/train_may.sh
+```
+
+## Test
+```bash
+python main.py data/May --workspace model/trial_may -O --test --asr_model ave --portrait
+```
+
+
+## TODO
+- [x] **Release Training Code.**
+- [x] **Release Pre-trained Model.**
+- [x] **Release Google Colab.**
+- [ ] Release Preprocessing Code.
+
+
+
+## Citation	
+
+```
+@InProceedings{peng2023synctalk,
+  title     = {SyncTalk: The Devil is in the Synchronization for Talking Head Synthesis}, 
+  author    = {Ziqiao Peng and Wentao Hu and Yue Shi and Xiangyu Zhu and Xiaomei Zhang and Jun He and Hongyan Liu and Zhaoxin Fan},
+  booktitle = {Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (CVPR)},
+  month     = {June},
+  year      = {2024},
+}
+```
+
+## Acknowledgement
+This code is developed heavily relying on [ER-NeRF](https://github.com/Fictionarry/ER-NeRF), and also [RAD-NeRF](https://github.com/ashawkey/RAD-NeRF), [GeneFace](https://github.com/yerfor/GeneFace), [DFRF](https://github.com/sstzal/DFRF), [AD-NeRF](https://github.com/YudongGuo/AD-NeRF), and [Deep3DFaceRecon_pytorch](https://github.com/sicxu/Deep3DFaceRecon_pytorch).
+
+Thanks for these great projects.
diff --git a/sync/SyncTalk/assets/image/synctalk.png b/sync/SyncTalk/assets/image/synctalk.png
diff --git a/sync/SyncTalk/demo/.DS_Store b/sync/SyncTalk/demo/.DS_Store
diff --git a/sync/SyncTalk/demo/test.wav b/sync/SyncTalk/demo/test.wav
diff --git a/sync/SyncTalk/freqencoder/__init__.py b/sync/SyncTalk/freqencoder/__init__.py
@@ -0,0 +1 @@
+from .freq import FreqEncoder
diff --git a/sync/SyncTalk/freqencoder/backend.py b/sync/SyncTalk/freqencoder/backend.py
@@ -0,0 +1,41 @@
+import os
+from torch.utils.cpp_extension import load
+
+_src_path = os.path.dirname(os.path.abspath(__file__))
+
+nvcc_flags = [
+    '-O3', '-std=c++14',
+    '-U__CUDA_NO_HALF_OPERATORS__', '-U__CUDA_NO_HALF_CONVERSIONS__', '-U__CUDA_NO_HALF2_OPERATORS__',
+    '-use_fast_math'
+]
+
+if os.name == "posix":
+    c_flags = ['-O3', '-std=c++14']
+elif os.name == "nt":
+    c_flags = ['/O2', '/std:c++17']
+
+    # find cl.exe
+    def find_cl_path():
+        import glob
+        for edition in ["Enterprise", "Professional", "BuildTools", "Community"]:
+            paths = sorted(glob.glob(r"C:\\Program Files (x86)\\Microsoft Visual Studio\\*\\%s\\VC\\Tools\\MSVC\\*\\bin\\Hostx64\\x64" % edition), reverse=True)
+            if paths:
+                return paths[0]
+
+    # If cl.exe is not on path, try to find it.
+    if os.system("where cl.exe >nul 2>nul") != 0:
+        cl_path = find_cl_path()
+        if cl_path is None:
+            raise RuntimeError("Could not locate a supported Microsoft Visual C++ installation")
+        os.environ["PATH"] += ";" + cl_path
+
+_backend = load(name='_freqencoder',
+                extra_cflags=c_flags,
+                extra_cuda_cflags=nvcc_flags,
+                sources=[os.path.join(_src_path, 'src', f) for f in [
+                    'freqencoder.cu',
+                    'bindings.cpp',
+                ]],
+                )
+
+__all__ = ['_backend']
diff --git a/...freqencoder/build/lib.linux-x86_64-cpython-38/_freqencoder.cpython-38-x86_64-linux-gnu.so b/...freqencoder/build/lib.linux-x86_64-cpython-38/_freqencoder.cpython-38-x86_64-linux-gnu.so
diff --git a/sync/SyncTalk/freqencoder/build/temp.linux-x86_64-cpython-38/.ninja_deps b/sync/SyncTalk/freqencoder/build/temp.linux-x86_64-cpython-38/.ninja_deps
diff --git a/sync/SyncTalk/freqencoder/build/temp.linux-x86_64-cpython-38/.ninja_log b/sync/SyncTalk/freqencoder/build/temp.linux-x86_64-cpython-38/.ninja_log
@@ -0,0 +1,3 @@
+# ninja log v5
+0	26782	1734802515297543445	/home/pod/shared-nvme/SyncTalk/freqencoder/build/temp.linux-x86_64-cpython-38/home/pod/shared-nvme/SyncTalk/freqencoder/src/bindings.o	44f86bd32c0d33f9
+1	50546	1734802539069452910	/home/pod/shared-nvme/SyncTalk/freqencoder/build/temp.linux-x86_64-cpython-38/home/pod/shared-nvme/SyncTalk/freqencoder/src/freqencoder.o	5409e7e776086ceb
diff --git a/sync/SyncTalk/freqencoder/build/temp.linux-x86_64-cpython-38/build.ninja b/sync/SyncTalk/freqencoder/build/temp.linux-x86_64-cpython-38/build.ninja
@@ -0,0 +1,29 @@
+ninja_required_version = 1.3
+cxx = c++
+nvcc = /usr/local/cuda/bin/nvcc
+
+cflags = -pthread -B /home/pod/shared-nvme/conda/envs/synctalk/compiler_compat -Wl,--sysroot=/ -Wsign-compare -DNDEBUG -g -fwrapv -O3 -Wall -Wstrict-prototypes -fPIC -I/home/pod/shared-nvme/conda/envs/synctalk/lib/python3.8/site-packages/torch/include -I/home/pod/shared-nvme/conda/envs/synctalk/lib/python3.8/site-packages/torch/include/torch/csrc/api/include -I/home/pod/shared-nvme/conda/envs/synctalk/lib/python3.8/site-packages/torch/include/TH -I/home/pod/shared-nvme/conda/envs/synctalk/lib/python3.8/site-packages/torch/include/THC -I/usr/local/cuda/include -I/home/pod/shared-nvme/conda/envs/synctalk/include/python3.8 -c
+post_cflags = -O3 -std=c++14 -DTORCH_API_INCLUDE_EXTENSION_H '-DPYBIND11_COMPILER_TYPE="_gcc"' '-DPYBIND11_STDLIB="_libstdcpp"' '-DPYBIND11_BUILD_ABI="_cxxabi1011"' -DTORCH_EXTENSION_NAME=_freqencoder -D_GLIBCXX_USE_CXX11_ABI=0
+cuda_cflags = -I/home/pod/shared-nvme/conda/envs/synctalk/lib/python3.8/site-packages/torch/include -I/home/pod/shared-nvme/conda/envs/synctalk/lib/python3.8/site-packages/torch/include/torch/csrc/api/include -I/home/pod/shared-nvme/conda/envs/synctalk/lib/python3.8/site-packages/torch/include/TH -I/home/pod/shared-nvme/conda/envs/synctalk/lib/python3.8/site-packages/torch/include/THC -I/usr/local/cuda/include -I/home/pod/shared-nvme/conda/envs/synctalk/include/python3.8 -c
+cuda_post_cflags = -D__CUDA_NO_HALF_OPERATORS__ -D__CUDA_NO_HALF_CONVERSIONS__ -D__CUDA_NO_BFLOAT16_CONVERSIONS__ -D__CUDA_NO_HALF2_OPERATORS__ --expt-relaxed-constexpr --compiler-options ''"'"'-fPIC'"'"'' -O3 -std=c++14 -U__CUDA_NO_HALF_OPERATORS__ -U__CUDA_NO_HALF_CONVERSIONS__ -U__CUDA_NO_HALF2_OPERATORS__ -use_fast_math -DTORCH_API_INCLUDE_EXTENSION_H '-DPYBIND11_COMPILER_TYPE="_gcc"' '-DPYBIND11_STDLIB="_libstdcpp"' '-DPYBIND11_BUILD_ABI="_cxxabi1011"' -DTORCH_EXTENSION_NAME=_freqencoder -D_GLIBCXX_USE_CXX11_ABI=0 -gencode=arch=compute_86,code=compute_86 -gencode=arch=compute_86,code=sm_86
+ldflags = 
+
+rule compile
+  command = $cxx -MMD -MF $out.d $cflags -c $in -o $out $post_cflags
+  depfile = $out.d
+  deps = gcc
+
+rule cuda_compile
+  depfile = $out.d
+  deps = gcc
+  command = $nvcc  $cuda_cflags -c $in -o $out $cuda_post_cflags
+
+
+
+build /home/pod/shared-nvme/SyncTalk/freqencoder/build/temp.linux-x86_64-cpython-38/home/pod/shared-nvme/SyncTalk/freqencoder/src/bindings.o: compile /home/pod/shared-nvme/SyncTalk/freqencoder/src/bindings.cpp
+build /home/pod/shared-nvme/SyncTalk/freqencoder/build/temp.linux-x86_64-cpython-38/home/pod/shared-nvme/SyncTalk/freqencoder/src/freqencoder.o: cuda_compile /home/pod/shared-nvme/SyncTalk/freqencoder/src/freqencoder.cu
+
+
+
+
+
diff --git a/...ild/temp.linux-x86_64-cpython-38/home/pod/shared-nvme/SyncTalk/freqencoder/src/bindings.o b/...ild/temp.linux-x86_64-cpython-38/home/pod/shared-nvme/SyncTalk/freqencoder/src/bindings.o
diff --git a/.../temp.linux-x86_64-cpython-38/home/pod/shared-nvme/SyncTalk/freqencoder/src/freqencoder.o b/.../temp.linux-x86_64-cpython-38/home/pod/shared-nvme/SyncTalk/freqencoder/src/freqencoder.o