diff --git a/.ci/README.md b/.ci/README.md
index 057d45127..42b6b59fd 100644
--- a/.ci/README.md
+++ b/.ci/README.md
@@ -5,24 +5,39 @@ This document describes the external pipeline executed through CSCS.
 The pipeline can be triggered by commenting on a pull request with
 
 ```
-cscs-ci run default  # runs the default pipeline
+cscs-ci run default  # runs the default pipeline (on GH200 nodes @ CSCS)
+cscs-ci run beverin  # runs the beverin pipeline (on MI300A nodes @ CSCS)
 ```
 
 An automatic trigger on all merge-requests is currently disabled.
 
-This pipeline has 2 stages: `build` and `test`.
+This pipeline has 3 stages: `prepare`, `build` and `test`.
 
-The `build` stage builds a uenv image that includes all necessary compilers, MPI libraries and other dependecies to build QUDA and tmLQCD against QUDA. In this stage, QUDA is built correctly for the GH200 machine at CSCS with all required build flags for production runs. The uenv recipe can be found [here](uenv-recipes/tmlqcd/daint-gh200).
+## `prepare` stage
 
-In the `test` stage, the aforementioned uenv image is loaded, tmLQCD is built and linked against the QUDA library that is inside the image. Finally a minimal HMC is executed and checked against some reference data.
+The `prepare` stage builds an uenv image that includes all necessary compilers, MPI libraries and other dependecies to build QUDA and tmLQCD against QUDA. The uenv recipe can be found [here for GH200](uenv-recipes/tmlqcd/daint-gh200) and [here for MI300A](uenv-recipes/tmlqcd/beverin-mi300).
 
-## Force recompilation of quda
+## `build` stage
+
+In the `build` stage, the aforementioned uenv image is loaded, tmLQCD and QUDA are built using their spack packages using the dependencies from the base image. This stage exposes an artifact with tmLQCD/QUDA binaries. For tmLQCD, the current branch is compiled. For QUDA the following environment variables are respected:
+
+  * `QUDA_GIT_REPO`: the git repository URL to use as source (defaults to `https://github.com/lattice/quda.git`)
+  * `QUDA_GIT_BRANCH`: the git branch to compile (defaults to `develop`)
+  * `QUDA_GIT_COMMIT`: the git commit to compile (defaults to the current head commit of `QUDA_GIT_BRANCH`)
+
+Then QUDA is cloned and compiled, completely bypassing the spack compile cache.
+
+## `test` stage
+
+In the `test` stage, the aforementioned uenv image is loaded, tmLQCD and QUDA are unpacked from the artifact. Finally a minimal HMC is executed and checked against some reference data.
+
+## Force recompilation of base image in `prepare` stage
 
 Remove the build cache:
 
 ```bash
-/capstor/scratch/cscs/${USER}/uenv-cache/user-environment/build_cache/linux-sles15-neoverse_v2/gcc-13.2.0/quda-*
-/capstor/scratch/cscs/${USER}/uenv-cache/user-environment/build_cache/linux-sles15-neoverse_v2-gcc-13.2.0-quda*
+/capstor/scratch/cscs/${USER}/uenv-cache/user-environment/build_cache/linux-sles15-neoverse_v2/gcc-13.2.0/tmlqcd-*
+/capstor/scratch/cscs/${USER}/uenv-cache/user-environment/build_cache/linux-sles15-neoverse_v2-gcc-13.2.0-tmlqcd*
 ```
 
 Or increment the the version counter tag in [.ci/include/cscs/00-variables.yml](include/cscs/00-variables.yml):
@@ -46,3 +61,4 @@ and commit.
 * [CSCS Uenv Writing Documentation](https://eth-cscs.github.io/alps-uenv/)
 * [CSCS Status Page](https://status.cscs.ch/)
 * [CSCS Spack Base Containers](https://github.com/orgs/eth-cscs/packages/container/package/docker-ci-ext%2Fspack-base-containers%2Fspack-build)
+* [Sirius CI/CD](https://github.com/electronic-structure/SIRIUS/tree/develop/ci) where this one is based upon
diff --git a/.ci/build.sh b/.ci/build.sh
new file mode 100755
index 000000000..d9dc14ba2
--- /dev/null
+++ b/.ci/build.sh
@@ -0,0 +1,52 @@
+#!/bin/bash
+
+set -xeuo pipefail
+
+export SPACK_SYSTEM_CONFIG_PATH="/user-environment/config"
+export SPACK_PYTHON="$(which python3.6)" # must be <=3.12, system python is 3.6
+export CICD_SRC_DIR="$PWD"
+export QUDA_SRC_DIR="$PWD/deps/src/quda"
+
+# QUDA git, branch and commit
+export QUDA_GIT_REPO="${QUDA_GIT_REPO:=https://github.com/lattice/quda.git}"
+export QUDA_GIT_BRANCH="${QUDA_GIT_BRANCH:=develop}"
+export QUDA_GIT_COMMIT="${QUDA_GIT_COMMIT:=$(git ls-remote ${QUDA_GIT_REPO} refs/heads/${QUDA_GIT_BRANCH} | awk '{print $1}')}"
+
+# obtain QUDA
+git clone -b "${QUDA_GIT_BRANCH}" "${QUDA_GIT_REPO}" "${QUDA_SRC_DIR}"
+git -C "${QUDA_SRC_DIR}" checkout "${QUDA_GIT_COMMIT}"
+
+# make sure we keep the stage direcorty
+spack config --scope=spack add config:build_stage:/dev/shm/spack-stage
+# we might need to install dependencies too, e.g. nlcglib in case of API changes
+spack config --scope=spack add config:install_tree:root:/dev/shm/spack-stage
+
+spack env create -d ./spack-env
+
+# add local repository with current tmlqcd recipe
+spack -e ./spack-env repo add "${REPO}"
+
+spack -e ./spack-env config add "packages:all:variants:[${VARIANTS}]"
+
+spack -e ./spack-env add "${SPEC}"
+
+# for tmlqcd use local src instead of fetch git
+spack -e ./spack-env develop -p "${CICD_SRC_DIR}" tmlqcd@cicd
+
+# for quda use local src instead of fetch git, to be able to tests against
+# differnt repo, branch, commit and also to support that quda branch develop is
+# a moving target
+spack -e ./spack-env develop -p "${QUDA_SRC_DIR}" quda@cicd
+
+# display spack.yaml
+cat ./spack-env/spack.yaml
+
+spack -e ./spack-env concretize
+spack -e ./spack-env install
+
+# the tar pipe below expects a relative path
+builddir_tmlqcd=$(spack -e ./spack-env location -b tmlqcd)
+builddir_quda=$(spack -e ./spack-env location -b quda)
+
+# create a symlink to spack build directory (keep in artifacts)
+tar -cf builddir.tar $builddir_tmlqcd $builddir_quda
diff --git a/.ci/cscs_beverin_pipeline.yml b/.ci/cscs_beverin_pipeline.yml
new file mode 100644
index 000000000..aa980c2c8
--- /dev/null
+++ b/.ci/cscs_beverin_pipeline.yml
@@ -0,0 +1,52 @@
+include:
+  - remote: 'https://gitlab.com/cscs-ci/recipes/-/raw/master/templates/v2/.ci-ext.yml'
+  - local: '/.ci/include/cscs/00-variables.yml'
+  - local: '/.ci/include/cscs/01-build-templates.yml'
+  - local: '/.ci/include/cscs/02-test-templates.yml'
+
+
+stages:
+  - prepare
+  - build
+  - test
+
+
+build-base/uenv/beverin-mi300:
+  stage: prepare
+  extends: [.uenv-builder-beverin-mi300, .beverin-mi300-secrets]
+  variables:
+    UENV_RECIPE: .ci/uenv-recipes/tmlqcd/beverin-mi300
+    SLURM_TIMELIMIT: "08:00:00"
+
+
+build-tmlqcd/uenv/beverin-mi300:
+  extends: [.uenv-runner-beverin-mi300, .build/base, .beverin-mi300-secrets]
+  needs: [build-base/uenv/beverin-mi300]
+  variables:
+    SPEC: "tmlqcd@cicd +lemon +quda ^quda@cicd +qdp +multigrid +twisted_clover +twisted_mass"
+    REPO: "./.ci/uenv-recipes/tmlqcd/beverin-mi300/repo/"
+    VARIANTS: "amdgpu_target=gfx942,amdgpu_target_sram_ecc=gfx942,+rocm,+mpi"
+    SLURM_TIMELIMIT: "01:00:00"
+
+
+test/beverin-mi300:
+  extends: [.uenv-runner-beverin-mi300, .test/base, .beverin-mi300-secrets]
+  needs: [build-tmlqcd/uenv/beverin-mi300]
+  variables:
+    REFPATH: "doc/sample-output/hmc-quda-cscs"
+    QUDA_ENABLE_TUNING: 0 # disable tuning
+    QUDA_ENABLE_P2P: 0 # disable P2P
+    SLURM_JOB_NUM_NODES: 2
+    SLURM_NTASKS: 8
+    SLURM_TIMELIMIT: "01:00:00"
+  script:
+    - hmc_tm -f doc/sample-input/sample-hmc-quda-cscs-beverin.input
+    - |
+      if test "${SLURM_PROCID}" -eq "0"; then
+        echo "Check the results on SLURM_PROCID=${SLURM_PROCID} ..."
+        numdiff -r 1.2e-6 -X 1:22 -X 1:5-21 -X 2:22 -X 2:5-21 output.data ${REFPATH}/output.data
+        for i in $(seq 0 2 18); do
+          f=onlinemeas.$(printf %06d $i);
+          numdiff -r 5e-4 ${f} ${REFPATH}/${f};
+        done
+      fi
diff --git a/.ci/cscs_default_pipeline.yml b/.ci/cscs_default_pipeline.yml
index 0a059aa40..c5a336df6 100644
--- a/.ci/cscs_default_pipeline.yml
+++ b/.ci/cscs_default_pipeline.yml
@@ -1,25 +1,52 @@
 include:
   - remote: 'https://gitlab.com/cscs-ci/recipes/-/raw/master/templates/v2/.ci-ext.yml'
   - local: '/.ci/include/cscs/00-variables.yml'
-  - local: '/.ci/include/cscs/01-test-templates.yml'
+  - local: '/.ci/include/cscs/01-build-templates.yml'
+  - local: '/.ci/include/cscs/02-test-templates.yml'
+
 
 stages:
+  - prepare
   - build
   - test
 
-build-quda/uenv/daint-gh200:
-  stage: build
+
+build-base/uenv/daint-gh200:
+  stage: prepare
   extends: .uenv-builder-daint-gh200
   variables:
     UENV_RECIPE: .ci/uenv-recipes/tmlqcd/daint-gh200
+    SLURM_TIMELIMIT: "04:00:00"
+
+
+build-tmlqcd/uenv/daint-gh200:
+  extends: [.uenv-runner-daint-gh200, .build/base]
+  needs: [build-base/uenv/daint-gh200]
+  variables:
+    SPEC: "tmlqcd@cicd +lemon +quda ^quda@cicd +qdp +multigrid +twisted_clover +twisted_mass"
+    REPO: "./.ci/uenv-recipes/tmlqcd/daint-gh200/repo/"
+    VARIANTS: "cuda_arch=90,+cuda,+mpi"
+    SLURM_TIMELIMIT: "01:00:00"
+
 
 test/daint-gh200:
-  extends: .test/hmc
+  extends: [.uenv-runner-daint-gh200, .test/base]
+  needs: [build-tmlqcd/uenv/daint-gh200]
   variables:
-    INPUT_FILE: "doc/sample-input/sample-hmc-quda-cscs.input"
     REFPATH: "doc/sample-output/hmc-quda-cscs"
     QUDA_ENABLE_TUNING: 0 # disable tuning
-    QUDA_ENABLE_GDR: 1 # enable GPU-Direct RDMA
+    QUDA_ENABLE_GDR: 0 # enable GPU-Direct RDMA
     SLURM_JOB_NUM_NODES: 2
     SLURM_NTASKS: 8
-    SLURM_TIMELIMIT: "00:30:00"
+    SLURM_TIMELIMIT: "01:00:00"
+  script:
+    - hmc_tm -f doc/sample-input/sample-hmc-quda-cscs.input
+    - |
+      if test "${SLURM_PROCID}" -eq "0"; then
+        echo "Check the results on SLURM_PROCID=${SLURM_PROCID} ..."
+        numdiff -r 1.2e-6 -X 1:22 -X 1:5-21 -X 2:22 -X 2:5-21 output.data ${REFPATH}/output.data
+        for i in $(seq 0 2 18); do
+          f=onlinemeas.$(printf %06d $i);
+          numdiff -r 5e-4 ${f} ${REFPATH}/${f};
+        done
+      fi
diff --git a/.ci/include/cscs/00-variables.yml b/.ci/include/cscs/00-variables.yml
index 27bb44047..bdafc1ae0 100644
--- a/.ci/include/cscs/00-variables.yml
+++ b/.ci/include/cscs/00-variables.yml
@@ -9,5 +9,10 @@
 variables:
   UENV_NAME: tmlqcd
   UENV_VERSION: experimental
-  UENV_TAG: v0.0.6
+  UENV_TAG: v0.0.8
 
+# These are the firecrest id and secret for the beverin pipeline
+.beverin-mi300-secrets:
+  variables:
+    F7T_CLIENT_ID: $F7T_TDS_CONSUMER_KEY
+    F7T_CLIENT_SECRET: $F7T_TDS_CONSUMER_SECRET
diff --git a/.ci/include/cscs/01-build-templates.yml b/.ci/include/cscs/01-build-templates.yml
new file mode 100644
index 000000000..6c4a87174
--- /dev/null
+++ b/.ci/include/cscs/01-build-templates.yml
@@ -0,0 +1,17 @@
+include:
+  - remote: 'https://gitlab.com/cscs-ci/recipes/-/raw/master/templates/v2/.ci-ext.yml'
+
+
+.build/base:
+  stage: build
+  image: ${UENV_NAME}/${UENV_VERSION}:${UENV_TAG}
+  artifacts:
+    paths:
+      - builddir.tar
+  variables:
+    SLURM_TIMELIMIT: "01:00:00"
+  script:
+    - git clone --filter=tree:0 $(jq -r .spack.repo /user-environment/meta/configure.json) /dev/shm/spack-clone
+    - git -C /dev/shm/spack-clone checkout $(jq -r .spack.commit /user-environment/meta/configure.json)
+    - source /dev/shm/spack-clone/share/spack/setup-env.sh
+    - bwrap --dev-bind / / --tmpfs ~ -- ./.ci/build.sh
diff --git a/.ci/include/cscs/01-test-templates.yml b/.ci/include/cscs/01-test-templates.yml
deleted file mode 100644
index 9a4a8da45..000000000
--- a/.ci/include/cscs/01-test-templates.yml
+++ /dev/null
@@ -1,54 +0,0 @@
-include:
-  - remote: 'https://gitlab.com/cscs-ci/recipes/-/raw/master/templates/v2/.ci-ext.yml'
-
-
-.test/base:
-  stage: test
-  extends: .uenv-runner-daint-gh200
-  image: ${UENV_NAME}/${UENV_VERSION}:${UENV_TAG}
-  variables:
-    WITH_UENV_VIEW: "default"
-    CFLAGS: "-O3 -fopenmp -mtune=neoverse-v2 -mcpu=neoverse-v2"
-    CXXFLAGS: "-O3 -fopenmp -mtune=neoverse-v2 -mcpu=neoverse-v2"
-    LDFLAGS: "-fopenmp"
-  before_script:
-    - |
-      if test "${SLURM_PROCID}" -eq "0"; then
-        export CC="$(which mpicc)"
-        export CXX="$(which mpicxx)"
-        mkdir -p install_dir
-        autoconf
-        ./configure \
-          --enable-quda_experimental \
-          --enable-mpi \
-          --enable-omp \
-          --with-mpidimension=4 \
-          --disable-sse2 \
-          --disable-sse3 \
-          --enable-alignment=32 \
-          --with-qudadir="/user-environment/env/default" \
-          --with-limedir="/user-environment/env/default" \
-          --with-lemondir="/user-environment/env/default" \
-          --with-lapack="-lopenblas -L/user-environment/env/default/lib" \
-          --with-cudadir="/user-environment/env/default/lib64" \
-          --prefix="$(pwd)/install_dir"
-        make
-        make install
-        touch preparation-done-${CI_JOB_ID}
-      fi
-    - while test ! -f preparation-done-${CI_JOB_ID}; do sleep 5; done
-
-
-.test/hmc:
-  extends: .test/base
-  script:
-    - ./install_dir/bin/hmc_tm -f "${INPUT_FILE}"
-    - |
-      if test "${SLURM_PROCID}" -eq "0"; then
-        echo "Check the results on SLURM_PROCID=${SLURM_PROCID} ..."
-        numdiff -r 1.2e-6 -X 1:22 -X 1:5-21 -X 2:22 -X 2:5-21 output.data ${REFPATH}/output.data
-        for i in $(seq 0 2 18); do
-          f=onlinemeas.$(printf %06d $i);
-          numdiff -r 5e-4 ${f} ${REFPATH}/${f};
-        done
-      fi
diff --git a/.ci/include/cscs/02-test-templates.yml b/.ci/include/cscs/02-test-templates.yml
new file mode 100644
index 000000000..d70a8a92c
--- /dev/null
+++ b/.ci/include/cscs/02-test-templates.yml
@@ -0,0 +1,20 @@
+include:
+  - remote: 'https://gitlab.com/cscs-ci/recipes/-/raw/master/templates/v2/.ci-ext.yml'
+
+
+.test/base:
+  stage: test
+  image: ${UENV_NAME}/${UENV_VERSION}:${UENV_TAG}
+  variables:
+    WITH_UENV_VIEW: "default"
+  before_script:
+    - |
+      if test "${SLURM_LOCALID}" -eq "0"; then
+        tar xf ./builddir.tar -C /
+        touch preparation-done-${CI_JOB_ID}
+      fi
+    - while test ! -f preparation-done-${CI_JOB_ID}; do sleep 5; done
+    - bindir=$(echo /dev/shm/spack-stage/*/spack-stage-tmlqcd-cicd-*/spack-build-*/src/bin)
+    - libdir=$(dirname $(echo /dev/shm/spack-stage/*/spack-stage-quda-cicd-*/spack-build-*/lib/libquda.so))
+    - export PATH=:${bindir}:$PATH
+    - export LD_LIBRARY_PATH=:${libdir}:$LD_LIBRARY_PATH
diff --git a/.ci/uenv-recipes/tmlqcd/daint-gh200/repo/packages/lemonio/package.py b/.ci/spack_packages/lemonio/package.py
old mode 100755
new mode 100644
similarity index 54%
rename from .ci/uenv-recipes/tmlqcd/daint-gh200/repo/packages/lemonio/package.py
rename to .ci/spack_packages/lemonio/package.py
index d70cac492..a3e66f64e
--- a/.ci/uenv-recipes/tmlqcd/daint-gh200/repo/packages/lemonio/package.py
+++ b/.ci/spack_packages/lemonio/package.py
@@ -2,12 +2,13 @@
 #
 # SPDX-License-Identifier: (Apache-2.0 OR MIT)
 
-from spack_repo.builtin.build_systems.autotools import AutotoolsPackage
+from spack_repo.builtin.build_systems import cmake
+from spack_repo.builtin.build_systems.cmake import CMakePackage, generator
 
 
 from spack.package import *
 
-class Lemonio(AutotoolsPackage):
+class Lemonio(CMakePackage):
     """LEMON: Lightweight Parallel I/O library for Lattice QCD."""
 
     homepage = "https://github.com/etmc/lemon"
@@ -16,13 +17,13 @@ class Lemonio(AutotoolsPackage):
 
     version('master', branch='master')
 
-    depends_on("autoconf", type="build", when="@master build_system=autotools")
-    depends_on("automake", type="build", when="@master build_system=autotools")
-    depends_on("libtool", type="build", when="@master build_system=autotools")
+    depends_on("c", type="build")
+    depends_on("cxx", type="build")
+    depends_on("fortran", type="build")
 
     depends_on('mpi')
+    generator("ninja")
 
     def configure_args(self):
         args = []
-        args.append('CC={0}'.format(self.spec['mpi'].mpicc))
         return args
diff --git a/.ci/spack_packages/quda/package.py b/.ci/spack_packages/quda/package.py
new file mode 100644
index 000000000..ad34b3fdb
--- /dev/null
+++ b/.ci/spack_packages/quda/package.py
@@ -0,0 +1,203 @@
+# Copyright Spack Project Developers. See COPYRIGHT file for details.
+#
+# SPDX-License-Identifier: (Apache-2.0 OR MIT)
+
+from spack_repo.builtin.build_systems.cmake import CMakePackage, generator
+from spack_repo.builtin.build_systems.cuda import CudaPackage
+from spack_repo.builtin.build_systems.rocm import ROCmPackage
+
+from spack.package import *
+
+
+class Quda(CMakePackage, CudaPackage, ROCmPackage):
+    """QUDA is a library for performing calculations in lattice QCD on GPUs."""
+
+    homepage = "https://lattice.github.io/quda/"
+    url = "https://github.com/lattice/quda/archive/refs/tags/v1.1.0.tar.gz"
+    git = "https://github.com/lattice/quda.git"
+
+    tags = ["hep", "lattice"]
+
+    maintainers("chaoos", "mtaillefumier")
+
+    license("MIT OR BSD-3-Clause", checked_by="chaoos")
+
+    version("develop", branch="develop")
+
+    # git describe --tags --match 'v*' 18bf43ed40c75ae276e55bb8ddf2f64aa5510c37
+    version(
+        "1.1.0-4597-g18bf43ed4", preferred=True, commit="18bf43ed40c75ae276e55bb8ddf2f64aa5510c37"
+    )
+
+    version("1.1.0", sha256="b4f635c993275010780ea09d8e593e0713a6ca1af1db6cc86c64518714fcc745")
+
+    # don't expose ${ROCM_PATH}/include/hipfft explicitly, spack finds it alone
+    patch("quda-hipfft-public-to-private.patch", when="@:")
+
+    # build dependencies
+    generator("ninja")
+    depends_on("cmake@3.18:", type="build")
+    depends_on("ninja", type="build")
+    depends_on("c", type="build")
+    depends_on("cxx", type="build")
+    depends_on("fortran", type="build", when="+tifr")
+    depends_on("fortran", type="build", when="+bqcd")
+
+    variant("shared", default=True, description="Build shared libraries")
+    variant(
+        "backwards", default=False, description="Enable stacktrace generation using backwards-cpp"
+    )
+    variant("mpi", default=False, description="Enable MPI support")
+    variant("qmp", default=False, description="Enable QMP")
+    variant("qio", default=False, description="Enable QIO", when="+qmp")
+    variant("openqcd", default=False, description="Enable openQCD interface")
+    variant("milc", default=False, description="Enable MILC interface")
+    variant("qdp", default=False, description="Enable QDP interface")
+    variant("bqcd", default=False, description="Enable BQCD interface")
+    variant("cps", default=False, description="Enable CPS interface")
+    variant("qdpjit", default=False, description="Enable QDPJIT interface")
+    variant("tifr", default=False, description="Enable TIFR interface")
+    variant("multigrid", default=False, description="Enable multigrid")
+    variant("nvshmem", default=False, description="Enable NVSHMEM", when="+cuda")
+    variant("openmp", default=False, description="Enable openmp support")
+    variant("clover", default=False, description="Build clover Dirac operators")
+    variant(
+        "clover_hasenbusch", default=False, description="Build clover Hasenbusch twist operators"
+    )
+    variant("domain_wall", default=False, description="Build domain wall Dirac operators")
+    variant("laplace", default=False, description="Build laplace operator")
+    variant(
+        "ndeg_twisted_clover",
+        default=False,
+        description="Build non-degenerate twisted clover Dirac operators",
+    )
+    variant(
+        "ndeg_twisted_mass",
+        default=False,
+        description="Build non-degenerate twisted mass Dirac operators",
+    )
+    variant("staggered", default=False, description="Build staggered Dirac operators")
+    variant("twisted_clover", default=False, description="Build twisted clover Dirac operators")
+    variant("twisted_mass", default=False, description="Build twisted mass Dirac operators")
+    variant("wilson", default=True, description="Build Wilson Dirac operators")
+    variant("usqcd", default=False, description="Download and build usqcd", when="+qmp")
+    variant("eigen", default=True, description="Enable eigen support")
+
+    with when("+multigrid"):
+        variant(
+            "mg_mrhs_list",
+            default="16",
+            multi=True,
+            description="The list of multi-rhs sizes that get compiled",
+        )
+        variant(
+            "mg_nvec_list",
+            default="6,24,32",
+            multi=True,
+            description="The list of null space vector sizes that get compiled",
+        )
+
+    # dependencies
+    depends_on("mpi", when="+mpi")
+    depends_on("cuda", when="+cuda")
+    depends_on("nvshmem", when="+nvshmem")
+    depends_on("gdrcopy", when="+nvshmem")
+
+    with when("+rocm"):
+        depends_on("hip")
+        depends_on("hipblas")
+        depends_on("hipfft")
+        depends_on("hiprand")
+        depends_on("hipcub")
+
+    conflicts("+qmp +mpi", msg="Specifying both QMP and MPI might result in undefined behavior")
+    conflicts("+cuda +rocm", msg="CUDA and ROCm support are mutually exclusive")
+    conflicts("~cuda ~rocm", msg="Either CUDA or ROCm support is required")
+    conflicts("cuda_arch=none", when="+cuda", msg="Please indicate a cuda_arch value")
+    conflicts("amdgpu_target=none", when="+rocm", msg="Please indicate a amdgpu_target value")
+    conflicts(
+        "+nvshmem", when="~mpi ~qmp", msg="NVSHMEM requires either +mpi or +qmp to be enabled"
+    )
+
+    # CMAKE_BUILD_TYPE
+    variant(
+        "build_type",
+        default="STRICT",
+        description="The build type to build",
+        values=("STRICT", "RELEASE", "DEVEL", "DEBUG", "HOSTDEBUG", "SANITIZE"),
+    )
+
+    def cmake_args(self):
+        if self.spec.satisfies("+cuda"):
+            target = "CUDA"
+            cuda_archs = self.spec.variants["cuda_arch"].value
+            arch = " ".join(f"sm_{i}" for i in cuda_archs)
+        elif self.spec.satisfies("+rocm"):
+            target = "HIP"
+            arch = self.spec.variants["amdgpu_target"].value
+
+        args = [
+            self.define("QUDA_BUILD_ALL_TESTS", False),
+            self.define("QUDA_TARGET_TYPE", target),
+            self.define("QUDA_GPU_ARCH", arch),
+            self.define("QUDA_PRECISION", 14),
+            self.define("QUDA_RECONSTRUCT", 7),
+            self.define("QUDA_DOWNLOAD_USQCD", "usqcd"),
+            self.define("QUDA_DIRAC_DEFAULT_OFF", True),
+            self.define_from_variant("QUDA_DIRAC_CLOVER", "clover"),
+            self.define_from_variant("QUDA_DIRAC_CLOVER_HASENBUSCH", "clover_hasenbusch"),
+            self.define_from_variant("QUDA_DIRAC_DOMAIN_WALL", "domain_wall"),
+            self.define_from_variant("QUDA_DIRAC_LAPLACE", "laplace"),
+            self.define_from_variant("QUDA_DIRAC_NDEG_TWISTED_CLOVER", "ndeg_twisted_clover"),
+            self.define_from_variant("QUDA_DIRAC_NDEG_TWISTED_MASS", "ndeg_twisted_mass"),
+            self.define_from_variant("QUDA_DIRAC_STAGGERED", "staggered"),
+            self.define_from_variant("QUDA_DIRAC_TWISTED_CLOVER", "twisted_clover"),
+            self.define_from_variant("QUDA_DIRAC_TWISTED_MASS", "twisted_mass"),
+            self.define_from_variant("QUDA_DIRAC_WILSON", "wilson"),
+            self.define_from_variant("QUDA_MPI", "mpi"),
+            self.define_from_variant("QUDA_QMP", "qmp"),
+            self.define_from_variant("QUDA_QIO", "qio"),
+            self.define_from_variant("QUDA_INTERFACE_OPENQCD", "openqcd"),
+            self.define_from_variant("QUDA_INTERFACE_MILC", "milc"),
+            self.define_from_variant("QUDA_INTERFACE_QDP", "qdp"),
+            self.define_from_variant("QUDA_INTERFACE_BQCD", "bqcd"),
+            self.define_from_variant("QUDA_INTERFACE_CPS", "cps"),
+            self.define_from_variant("QUDA_INTERFACE_QDPJIT", "qdpjit"),
+            self.define_from_variant("QUDA_INTERFACE_TIFR", "tifr"),
+            self.define_from_variant("QUDA_MULTIGRID", "multigrid"),
+            self.define_from_variant("QUDA_NVSHMEM", "nvshmem"),
+            self.define_from_variant("QUDA_OPENMP", "openmp"),
+            self.define_from_variant("QUDA_BACKWARDS", "backwards"),
+            self.define_from_variant("QUDA_USE_EIGEN", "eigen"),
+            self.define_from_variant("QUDA_BUILD_SHAREDLIB", "shared"),
+            self.define_from_variant("BUILD_SHARED_LIBS", "shared"),
+        ]
+        if self.spec.satisfies("+multigrid"):
+            args.append(
+                self.define(
+                    "QUDA_MULTIGRID_NVEC_LIST", ",".join(self.spec.variants["mg_nvec_list"].value)
+                )
+            )
+            args.append(
+                self.define(
+                    "QUDA_MULTIGRID_MRHS_LIST", ",".join(self.spec.variants["mg_mrhs_list"].value)
+                )
+            )
+
+        if self.spec.satisfies("+nvshmem"):
+            args.append(self.define("QUDA_NVSHMEM_HOME", self.spec["nvshmem"].prefix))
+            args.append(self.define("QUDA_GDRCOPY_HOME", self.spec["gdrcopy"].prefix))
+
+        if self.spec.satisfies("+cuda"):
+            args.append(self.define("QUDA_GPU_ARCH_SUFFIX", "real"))  # real or virtual
+        elif self.spec.satisfies("+rocm"):
+            args.append(self.define("CMAKE_C_COMPILER", self.spec["hip"].hipcc))
+            args.append(self.define("CMAKE_CXX_COMPILER", self.spec["hip"].hipcc))
+            # args.append(self.define("ROCM_PATH", self.spec["hip"].prefix))
+
+            # required when building on a machine with no AMD GPU present
+            args.append(self.define("AMDGPU_TARGETS", arch))
+
+            # suppress _GLIBCXX17_DEPRECATED warnings when compiling c++17
+            args.append(self.define("CMAKE_CXX_FLAGS", "-Wno-deprecated-declarations"))
+        return args
diff --git a/.ci/spack_packages/quda/quda-hipfft-public-to-private.patch b/.ci/spack_packages/quda/quda-hipfft-public-to-private.patch
new file mode 100644
index 000000000..127944195
--- /dev/null
+++ b/.ci/spack_packages/quda/quda-hipfft-public-to-private.patch
@@ -0,0 +1,13 @@
+diff --git a/lib/targets/hip/target_hip.cmake b/lib/targets/hip/target_hip.cmake
+index 6bdca1127..b22a469a1 100644
+--- a/lib/targets/hip/target_hip.cmake
++++ b/lib/targets/hip/target_hip.cmake
+@@ -132,7 +132,7 @@ set_source_files_properties( ${QUDA_CU_OBJS} PROPERTIES LANGUAGE HIP)
+ target_link_libraries(quda PUBLIC hip::hiprand roc::rocrand hip::hipcub roc::rocprim_hip)
+ target_link_libraries(quda PUBLIC roc::hipblas roc::rocblas)
+ 
+-target_include_directories(quda PUBLIC ${ROCM_PATH}/hipfft/include)
++target_include_directories(quda PRIVATE ${ROCM_PATH}/hipfft/include)
+ target_link_libraries(quda PUBLIC hip::hipfft)
+ 
+ add_subdirectory(targets/hip)
diff --git a/.ci/spack_packages/tmlqcd/package.py b/.ci/spack_packages/tmlqcd/package.py
new file mode 100755
index 000000000..5ed85c341
--- /dev/null
+++ b/.ci/spack_packages/tmlqcd/package.py
@@ -0,0 +1,139 @@
+# Copyright Spack Project Developers. See COPYRIGHT file for details.
+#
+# SPDX-License-Identifier: (Apache-2.0 OR MIT)
+
+from spack_repo.builtin.build_systems import cmake
+from spack_repo.builtin.build_systems.cmake import CMakePackage, generator
+from spack_repo.builtin.build_systems.rocm import ROCmPackage
+from spack_repo.builtin.build_systems.cuda import CudaPackage
+
+from spack.package import *
+
+
+class Tmlqcd(CMakePackage, CudaPackage, ROCmPackage):
+    """Base class for building tmlQCD."""
+
+    homepage = "https://www.itkp.uni-bonn.de/~urbach/software.html"
+    url = "https://github.com/etmc/tmLQCD/archive/refs/tags/rel-5-1-6.tar.gz"
+
+    # todo: change this back to etmc as soon as cmake PR is merged
+    git = "https://github.com/mtaillefumier/tmLQCD.git"
+    license("GPL-3.0-or-later")
+
+    maintainers("mtaillefumier")
+    version("master", branch="master")
+
+    # todo: remove this version as soon as
+    # https://github.com/etmc/tmLQCD/pull/664 is merged
+    version("cmake_support", branch="cmake_support")
+
+    variant("lemon", default=False, description="Enable the lemon backend")
+    variant("mpi", default=True, description="Enable mpi support")
+    variant("DDalphaAMG", default=False, description="Enable DAlphaAMG support")
+    variant("openmp", default=True, description="Enable OpenMP")
+    variant("fftw", default=True, description="Enable FFTW interface")
+    variant(
+        "persistent_mpi",
+        default=True,
+        description="Enable persistent mpi calls for spinor and gauge fields",
+        when="+mpi",
+    )
+    variant(
+        "nonblocking_mpi",
+        default=True,
+        description="Enable non-blocking mpi calls for spinor and gauge fields",
+        when="+mpi",
+    )
+    variant("fixedvolume", default=True, description="Enable fixed volume at compile time")
+    variant(
+        "alignment",
+        default="auto",
+        values=("none", "auto", "16", "32", "64"),
+        description="Automatically or expliclty align arrays",
+    )
+    variant("gauge_copy", default=True, description="Enable gauge field copy")
+    variant("half_spinor", default=True, description="Use a Dirac operator with half-spinor")
+    variant("shared", default=False, description="Enable shared library")
+    variant("shmem", default=False, description="Use shmem API")
+    variant("quda", default=True, description="Enable the QUDA library", when="+cuda")
+    variant("quda", default=True, description="Enable the QUDA library", when="+rocm")
+    variant(
+        "QPhiX", default=False, description="Enable the QPhiX library for Intel Xeon and Xeon Phis"
+    )
+    variant(
+        "mpi_dimensions",
+        default="4",
+        values=("1", "2", "3", "4", "x", "xy", "xyz"),
+        description="number of dimensions the mpi processes are distributed. the default is parallelization over all four dimensions txyz",
+        when="+mpi",
+    )
+
+    generator("ninja")
+
+    # language dependencies
+    depends_on("c", type="build")
+    depends_on("cxx", type="build")
+    depends_on("fortran", type="build")
+
+    # conflicts
+    conflicts("+cuda", when="cuda_arch=none")
+    conflicts("+rocm", when="amdgpu_target=none")
+    conflicts("+cuda +rocm", msg="CUDA and ROCm support are mutually exclusive")
+
+    # hard dependencies
+    depends_on("c-lime")
+    depends_on("blas")
+    depends_on("lapack")
+    depends_on("pkgconfig", type="build")
+
+    # dependencies
+    depends_on("mpi", when="+mpi")
+    depends_on("lemonio", when="+lemon")
+
+    depends_on("llvm-openmp", when="+rocm+openmp")
+
+    with when("+quda"):
+        depends_on(
+            "quda+shared+twisted_mass+twisted_clover+clover+ndeg_twisted_clover+ndeg_twisted_mass+wilson+qdp+multigrid"
+        )
+
+        depends_on("quda+mpi", when="+mpi")
+        depends_on("quda+cuda", when="+cuda")
+        depends_on("quda+rocm", when="+rocm")
+
+    depends_on("fftw-api@3", when="+fftw")
+
+
+class CMakeBuilder(cmake.CMakeBuilder):
+    def cmake_args(self):
+        args = [
+            self.define_from_variant("BUILD_SHARED_LIBS", "shared"),
+            self.define_from_variant("TM_USE_LEMON", "lemon"),
+            self.define_from_variant("TM_USE_MPI", "mpi"),
+            self.define_from_variant("TM_USE_QUDA", "quda"),
+            self.define_from_variant("TM_USE_CUDA", "cuda"),
+            self.define_from_variant("TM_USE_HIP", "rocm"),
+            self.define_from_variant("TM_USE_FFTW", "fftw"),
+            self.define_from_variant("TM_USE_OMP", "openmp"),
+            self.define_from_variant("TM_USE_SHMEM", "shmem"),
+            self.define_from_variant("TM_USE_GAUGE_COPY", "gauge_copy"),
+            self.define_from_variant("TM_USE_HALFSPINOR", "half_spinor"),
+        ]
+
+        # Use hipcc is case of a ROCm build
+        if "+rocm" in self.spec:
+            hip = self.spec["hip"]
+            args.append(self.define("CMAKE_C_COMPILER", hip.hipcc))
+            args.append(self.define("CMAKE_CXX_COMPILER", hip.hipcc))
+
+            # help hipcc find openmp
+            if "+openmp" in self.spec:
+                omp = self.spec["llvm-openmp"]
+                args.append(self.define("OpenMP_C_FLAGS", "-fopenmp"))
+                args.append(self.define("OpenMP_CXX_FLAGS", "-fopenmp"))
+                args.append(self.define("OpenMP_C_LIB_NAMES", "omp"))
+                args.append(self.define("OpenMP_CXX_LIB_NAMES", "omp"))
+                args.append(self.define("OpenMP_omp_LIBRARY", "{0}/libomp.so".format(omp.prefix.lib)))
+                args.append(self.define("OpenMP_CXX_INCLUDE_DIR", omp.prefix.include))
+
+        return args
diff --git a/.ci/uenv-recipes/tmlqcd/beverin-mi300/compilers.yaml b/.ci/uenv-recipes/tmlqcd/beverin-mi300/compilers.yaml
new file mode 100644
index 000000000..38a8faa0b
--- /dev/null
+++ b/.ci/uenv-recipes/tmlqcd/beverin-mi300/compilers.yaml
@@ -0,0 +1,4 @@
+gcc:
+  version: "13"
+llvm-amdgpu:
+  version: "6.3.3"
\ No newline at end of file
diff --git a/.ci/uenv-recipes/tmlqcd/beverin-mi300/config.yaml b/.ci/uenv-recipes/tmlqcd/beverin-mi300/config.yaml
new file mode 100644
index 000000000..b4b3c6495
--- /dev/null
+++ b/.ci/uenv-recipes/tmlqcd/beverin-mi300/config.yaml
@@ -0,0 +1,10 @@
+name: tmlqcd
+store: /user-environment
+spack:
+  repo: https://github.com/spack/spack.git
+  commit: releases/v1.1
+  packages:
+    repo: https://github.com/spack/spack-packages.git
+    commit: 5f20b9190596e0b875141e8cee03f0d3847ad65c
+description: "tmLQCD dependencies for CSCS CIon GH200."
+version: 2
diff --git a/.ci/uenv-recipes/tmlqcd/beverin-mi300/environments.yaml b/.ci/uenv-recipes/tmlqcd/beverin-mi300/environments.yaml
new file mode 100755
index 000000000..1b861a3cb
--- /dev/null
+++ b/.ci/uenv-recipes/tmlqcd/beverin-mi300/environments.yaml
@@ -0,0 +1,29 @@
+gcc-env:
+  compiler: [gcc, llvm-amdgpu]
+  network:
+      mpi: cray-mpich@8.1.32+rocm
+      specs: [ 'libfabric@2.3 +rocm' ]
+  unify: when_possible
+  duplicates:
+    strategy: full
+  specs:
+  # add GPU-specific packages here
+  - hip@6.3.3 ^mesa@23.3.6
+  - llvm-amdgpu
+  # tmlqcd and quda are not required, since we build their newest commits in the
+  # build stage. Although, we want all their dependencies in the base uenv.
+  - tmlqcd@cmake_support +lemon +quda ^quda@develop +qdp +multigrid +twisted_clover +twisted_mass
+  - numdiff
+  variants:
+  - +mpi
+  - +rocm
+  - amdgpu_target=gfx942
+  - amdgpu_target_sram_ecc=gfx942
+  views:
+    default:
+      link: roots
+      exclude: ["llvm"]
+      uenv:
+        add_compilers: true
+        prefix_paths:
+          LD_LIBRARY_PATH: [lib, lib64]
diff --git a/.ci/uenv-recipes/tmlqcd/beverin-mi300/modules.yaml b/.ci/uenv-recipes/tmlqcd/beverin-mi300/modules.yaml
new file mode 100644
index 000000000..623307b09
--- /dev/null
+++ b/.ci/uenv-recipes/tmlqcd/beverin-mi300/modules.yaml
@@ -0,0 +1,23 @@
+modules:
+  # Paths to check when creating modules for all module sets
+  prefix_inspections:
+    bin:
+      - PATH
+    lib:
+      - LD_LIBRARY_PATH
+    lib64:
+      - LD_LIBRARY_PATH
+
+  default:
+    arch_folder: false
+    # Where to install modules
+    roots:
+      tcl: /user-environment/modules
+    tcl:
+      all:
+        autoload: none
+      hash_length: 0
+      exclude_implicits: true
+      exclude: ['%gcc@7.5.0', 'gcc %gcc@7.5.0']
+      projections:
+        all: '{name}/{version}'
diff --git a/.ci/uenv-recipes/tmlqcd/beverin-mi300/post-install b/.ci/uenv-recipes/tmlqcd/beverin-mi300/post-install
new file mode 100755
index 000000000..a5e932cc5
--- /dev/null
+++ b/.ci/uenv-recipes/tmlqcd/beverin-mi300/post-install
@@ -0,0 +1,8 @@
+#!/bin/bash
+
+set -e
+
+# remove offending environment variables
+jq '.views["default"].env.values.scalar["HIPCC_LINK_FLAGS_APPEND"]|="" | .views["default"].env.values.scalar["HIPCC_COMPILE_FLAGS_APPEND"]|="" ' /user-environment/meta/env.json > /tmp/env.json
+# copy file back to destination
+cp /tmp/env.json /user-environment/meta/env.json
\ No newline at end of file
diff --git a/.ci/uenv-recipes/tmlqcd/beverin-mi300/repo/packages b/.ci/uenv-recipes/tmlqcd/beverin-mi300/repo/packages
new file mode 120000
index 000000000..1229fc196
--- /dev/null
+++ b/.ci/uenv-recipes/tmlqcd/beverin-mi300/repo/packages
@@ -0,0 +1 @@
+../../../../spack_packages
\ No newline at end of file
diff --git a/.ci/uenv-recipes/tmlqcd/beverin-mi300/repo/repo.yaml b/.ci/uenv-recipes/tmlqcd/beverin-mi300/repo/repo.yaml
new file mode 100644
index 000000000..f08fa46a4
--- /dev/null
+++ b/.ci/uenv-recipes/tmlqcd/beverin-mi300/repo/repo.yaml
@@ -0,0 +1,2 @@
+repo:
+  namespace: apps
diff --git a/.ci/uenv-recipes/tmlqcd/daint-gh200/compilers.yaml b/.ci/uenv-recipes/tmlqcd/daint-gh200/compilers.yaml
index 840d9974d..8bd185e43 100644
--- a/.ci/uenv-recipes/tmlqcd/daint-gh200/compilers.yaml
+++ b/.ci/uenv-recipes/tmlqcd/daint-gh200/compilers.yaml
@@ -1,2 +1,2 @@
 gcc:
-  version: "14.2"
+  version: "14.3"
diff --git a/.ci/uenv-recipes/tmlqcd/daint-gh200/config.yaml b/.ci/uenv-recipes/tmlqcd/daint-gh200/config.yaml
index 3ec694351..f6b76048e 100644
--- a/.ci/uenv-recipes/tmlqcd/daint-gh200/config.yaml
+++ b/.ci/uenv-recipes/tmlqcd/daint-gh200/config.yaml
@@ -2,10 +2,10 @@ name: tmlqcd
 store: /user-environment
 spack:
   repo: https://github.com/spack/spack.git
-  commit: releases/v1.0
+  commit: releases/v1.1
   packages:
     repo: https://github.com/spack/spack-packages.git
-    commit: releases/v2025.11
+    #commit: 
 modules: true
-description: "tmLQCD is a freely available software suite providing a set of tools to be used in lattice QCD simulations."
+description: "tmLQCD dependencies for CSCS CI on MI300A."
 version: 2
diff --git a/.ci/uenv-recipes/tmlqcd/daint-gh200/environments.yaml b/.ci/uenv-recipes/tmlqcd/daint-gh200/environments.yaml
index 41be6341d..cafba6582 100644
--- a/.ci/uenv-recipes/tmlqcd/daint-gh200/environments.yaml
+++ b/.ci/uenv-recipes/tmlqcd/daint-gh200/environments.yaml
@@ -2,15 +2,11 @@ gcc-env:
   compiler: [gcc]
   network:
       mpi: cray-mpich@8.1.32 +cuda
+      specs: ['libfabric@2.4.0+cuda']
   unify: true
   specs:
-  - python@3.12
+  - tmlqcd@cmake_support +lemon +quda ^quda@develop +qdp +multigrid +twisted_clover +twisted_mass
   - numdiff
-  - quda@develop +qdp +multigrid +twisted_clover +twisted_mass
-  - lemonio
-  - c-lime
-  - openblas
-  - cuda
   variants:
   - +mpi
   - +cuda
diff --git a/.ci/uenv-recipes/tmlqcd/daint-gh200/repo/packages b/.ci/uenv-recipes/tmlqcd/daint-gh200/repo/packages
new file mode 120000
index 000000000..39e779607
--- /dev/null
+++ b/.ci/uenv-recipes/tmlqcd/daint-gh200/repo/packages
@@ -0,0 +1 @@
+../../../../spack_packages/
\ No newline at end of file
diff --git a/.github/workflows/basic-build.yaml b/.github/workflows/basic-build.yaml
index afe18e145..d46b67830 100644
--- a/.github/workflows/basic-build.yaml
+++ b/.github/workflows/basic-build.yaml
@@ -35,16 +35,16 @@ jobs:
           repository: usqcd-software/c-lime
           path: lime
 
-      - name: autogen_lime
+      - name: create_builddir_lime
         working-directory: ${{github.workspace}}/lime
-        run: ./autogen.sh && mkdir build
+        run: mkdir build
 
       - name: build_lime
         working-directory: ${{github.workspace}}/lime/build
         run: |
           CC=gcc \
             CFLAGS="-march=haswell -mtune=haswell -O2" \
-            ../configure --prefix=$(pwd)/install_dir
+            cmake -DCMAKE_INSTALL_PREFIX=$(pwd)/install_dir .. >> config.log
           make -j
           make install
 
@@ -61,10 +61,9 @@ jobs:
           repository: etmc/lemon
           path: lemon
 
-      - name: autogen_lemon
+      - name: create_builddir_lemon
         working-directory: ${{github.workspace}}/lemon
         run: |
-          autoreconf -i -f
           mkdir build
 
       - name: build_lemon
@@ -72,9 +71,9 @@ jobs:
         run: |
           CC=mpicc \
             CFLAGS="-march=haswell -mtune=haswell -O2" \
-            ../configure --prefix=$(pwd)/install_dir
+            cmake -DCMAKE_INSTALL_PREFIX=$(pwd)/install_dir ..
           make -j
-          make install
+          make install > config.log
       
       - name: Archive lemon config.log
         if: ${{ always() }}
@@ -92,28 +91,19 @@ jobs:
         shell: bash
         run: mkdir ${{github.workspace}}/main/build
 
-      - name: autogen_tmlqcd
-        working-directory: ${{github.workspace}}/main
-        run: autoconf
-
       - name: configure_and_build
         shell: bash
         working-directory: ${{github.workspace}}/main/build
         run: |
-          CC=mpicc CXX=mpicxx \
-            LDFLAGS="-fopenmp" \
             CFLAGS="-O2 -mtune=haswell -march=haswell -mavx2 -mfma -DOMPI_SKIP_MPICXX -fopenmp" \
             CXXFLAGS="-O2 -mtune=haswell -march=haswell -mavx2 -mfma -DOMPI_SKIP_MPICXX -fopenmp" \
-            ../configure \
-            --enable-mpi \
-            --with-mpidimension=4 \
-            --enable-omp \
-            --disable-sse2 \
-            --disable-sse3 \
-            --with-limedir=${{github.workspace}}/lime/build/install_dir \
-            --with-lemondir=${{github.workspace}}/lemon/build/install_dir \
-            --with-lapack="-lblas -llapack" || cat config.log
-          make -j
+            cmake -DCMAKE_PREFIX_PATH="${{github.workspace}}/lime/build/install_dir;${{github.workspace}}/lemon/build/install_dir" \
+            -DTM_USE_MPI=ON \
+            -DTM_USE_OMP=ON \
+            -DTM_USE_LEMON=ON \
+            .. > config.log
+            cat config.log
+            make -j
 
       - name: Archive tmLQCD config.log
         if: ${{ always() }}
@@ -125,7 +115,7 @@ jobs:
       - name: nf2_rgmixedcg_hmc_tmcloverdetratio
         working-directory: ${{github.workspace}}/main/build
         run: |
-          mpirun -np 2 ./hmc_tm \
+          mpirun -np 2 src/bin/hmc_tm \
             -f ../doc/sample-input/sample-hmc-rgmixedcg-tmcloverdetratio.input
       
       - name: Archive nf2_rgmixedcg_hmc_tmcloverdetratio output
diff --git a/.github/workflows/ddalphaamg-build.yaml b/.github/workflows/ddalphaamg-build.yaml
index f50ffcae9..a2fef7241 100644
--- a/.github/workflows/ddalphaamg-build.yaml
+++ b/.github/workflows/ddalphaamg-build.yaml
@@ -40,19 +40,16 @@ jobs:
           repository: usqcd-software/c-lime
           path: lime
 
-      - name: autogen_lime
+      - name: create_builddir_lime
         working-directory: ${{github.workspace}}/lime
-        run: ./autogen.sh
-      
-      - name: create_lime_builddir
-        run: mkdir ${{github.workspace}}/lime/build
+        run: mkdir build
 
       - name: build_lime
         working-directory: ${{github.workspace}}/lime/build
         run: |
           CC=gcc \
             CFLAGS="-march=haswell -mtune=haswell -O2" \
-            ../configure --prefix=$(pwd)/install_dir
+            cmake -DCMAKE_INSTALL_PREFIX=$(pwd)/install_dir .. >> config.log
           make -j
           make install
 
@@ -69,23 +66,20 @@ jobs:
           repository: etmc/lemon
           path: lemon
 
-      - name: create_lemon_builddir
-        run: mkdir ${{github.workspace}}/lemon/build
-
-      - name: autogen_lemon
+      - name: create_builddir_lemon
         working-directory: ${{github.workspace}}/lemon
-        run: autoreconf -i -f
+        run: |
+          mkdir build
 
       - name: build_lemon
         working-directory: ${{github.workspace}}/lemon/build
         run: |
           CC=mpicc \
             CFLAGS="-march=haswell -mtune=haswell -O2" \
-            ../configure \
-            --prefix=$(pwd)/install_dir
+            cmake -DCMAKE_INSTALL_PREFIX=$(pwd)/install_dir ..
           make -j
-          make install
-
+          make install > config.log
+      
       - name: Archive lemon config.log
         if: ${{ always() }}
         uses: actions/upload-artifact@v4
@@ -111,34 +105,27 @@ jobs:
         shell: bash
         run: mkdir ${{github.workspace}}/main/build
 
-      - name: autogen_tmlqcd
-        working-directory: ${{github.workspace}}/main
-        run: autoconf
-
       - name: configure_and_build
         shell: bash
         working-directory: ${{github.workspace}}/main/build
         run: |
           CC=mpicc CXX=mpicxx \
             LDFLAGS="-fopenmp" \
-            CFLAGS="-O2 -mtune=haswell -march=haswell -mavx2 -mfma -DOMPI_SKIP_MPICXX -fopenmp" \
-            CXXFLAGS="-O2 -mtune=haswell -march=haswell -mavx2 -mfma -DOMPI_SKIP_MPICXX -fopenmp" \
-            ../configure \
-            --enable-mpi \
-            --with-mpidimension=4 \
-            --enable-omp \
-            --disable-sse2 \
-            --disable-sse3 \
-            --with-limedir=${{github.workspace}}/lime/build/install_dir \
-            --with-lemondir=${{github.workspace}}/lemon/build/install_dir \
-            --with-DDalphaAMG=${{github.workspace}}/ddalphaamg \
-            --with-lapack="-lblas -llapack" || cat config.log
+            CFLAGS="-O3 -ffast-math -mtune=haswell -march=haswell -mavx2 -mfma -DOMPI_SKIP_MPICXX -fopenmp" \
+            CXXFLAGS="-O3 -ffast-math -mtune=haswell -march=haswell -mavx2 -mfma -DOMPI_SKIP_MPICXX -fopenmp" \
+            cmake -DCMAKE_PREFIX_PATH="${{github.workspace}}/lime/build/install_dir;${{github.workspace}}/lemon/build/install_dir;${{github.workspace}}/ddalphaamg" \
+            -DTM_USE_MPI=ON \
+            -DTM_USE_OMP=ON \
+            -DTM_USE_LEMON=ON \
+            -DTM_USE_DDalphaAMG=ON \
+            .. > config.log
+            cat config.log
           make -j
 
       - name: nf2_ddalphaamg_hmc_tmcloverdetratio
         working-directory: ${{github.workspace}}/main/build
         run: |
-          mpirun -np 2 ./hmc_tm \
+          mpirun -np 2 src/bin/hmc_tm \
             -f ../doc/sample-input/sample-hmc-ddalphaamg-tmcloverdetratio.input
 
       - name: Archive nf2_ddalphaamg_hmc_tmcloverdetratio output
@@ -152,10 +139,10 @@ jobs:
         working-directory: ${{github.workspace}}/main/build
         run: |
           refpath=${{github.workspace}}/main/doc/sample-output/hmc-ddalphaamg-tmcloverdetratio
-          numdiff -r 1e-4 -X 1:10 -X 1:5-8 -X 2:10 -X 2:5-8 output.data ${refpath}/output.data
+          numdiff -r 1e-6 -X 1:10 -X 1:5-8 -X 2:10 -X 2:5-8 output.data ${refpath}/output.data
           for i in $(seq 0 2 18); do \
             f=onlinemeas.$(printf %06d $i); \
-            numdiff -r 1e-5 ${f} ${refpath}/${f}; \
+            numdiff -r 1e-6 ${f} ${refpath}/${f}; \
           done
 
 
diff --git a/.github/workflows/qphix-build.yaml b/.github/workflows/qphix-build.yaml
index 1b39cdf34..ec4ec5394 100644
--- a/.github/workflows/qphix-build.yaml
+++ b/.github/workflows/qphix-build.yaml
@@ -35,16 +35,16 @@ jobs:
           repository: usqcd-software/c-lime
           path: lime
 
-      - name: autogen_lime
+      - name: create_builddir_lime
         working-directory: ${{github.workspace}}/lime
-        run: ./autogen.sh && mkdir build
+        run: mkdir build
 
       - name: build_lime
         working-directory: ${{github.workspace}}/lime/build
         run: |
           CC=gcc \
             CFLAGS="-march=haswell -mtune=haswell -O2" \
-            ../configure --prefix=$(pwd)/install_dir
+            cmake -DCMAKE_INSTALL_PREFIX=$(pwd)/install_dir .. >> config.log
           make -j
           make install
 
@@ -61,10 +61,9 @@ jobs:
           repository: etmc/lemon
           path: lemon
 
-      - name: autogen_lemon
+      - name: create_builddir_lemon
         working-directory: ${{github.workspace}}/lemon
         run: |
-          autoreconf -i -f
           mkdir build
 
       - name: build_lemon
@@ -72,11 +71,10 @@ jobs:
         run: |
           CC=mpicc \
             CFLAGS="-march=haswell -mtune=haswell -O2" \
-            ../configure \
-            --prefix=$(pwd)/install_dir
+            cmake -DCMAKE_INSTALL_PREFIX=$(pwd)/install_dir ..
           make -j
-          make install
-
+          make install > config.log
+      
       - name: Archive lemon config.log
         if: ${{ always() }}
         uses: actions/upload-artifact@v4
@@ -84,7 +82,6 @@ jobs:
           name: lemon_config_output
           path: ${{github.workspace}}/lemon/build/config.log 
 
-
       - name: get_qmp
         uses: actions/checkout@v4
         with:
@@ -151,9 +148,10 @@ jobs:
             -DCMAKE_C_COMPILER=mpicc \
             -DCMAKE_C_FLAGS="-std=c99 -O2 -mavx2 -mfma -mtune=haswell -march=haswell -fopenmp" \
             -DCMAKE_INSTALL_PREFIX=$(pwd)/install_dir \
-            ..
+            .. >> config.log
           VERBOSE=1 make -j $(( ${nb_cores} + 3 ))
-          make install
+          make install > config.log
+          cat config.log
 
       - name: get_tmlqcd
         uses: actions/checkout@v4
@@ -164,31 +162,22 @@ jobs:
         shell: bash
         run: mkdir ${{github.workspace}}/main/build
 
-      - name: autogen_tmlqcd
-        working-directory: ${{github.workspace}}/main
-        run: autoconf
-
       - name: configure_and_build
         shell: bash
         working-directory: ${{github.workspace}}/main/build
         run: |
           CC=mpicc CXX=mpicxx \
-            LDFLAGS="-fopenmp" \
-            CFLAGS="-O2 -mtune=haswell -march=haswell -mavx2 -mfma -DOMPI_SKIP_MPICXX -fopenmp" \
-            CXXFLAGS="-O2 -mtune=haswell -march=haswell -mavx2 -mfma -DOMPI_SKIP_MPICXX -fopenmp" \
-            ../configure \
-            --enable-mpi \
-            --with-mpidimension=4 \
-            --enable-omp \
-            --disable-sse2 \
-            --disable-sse3 \
-            --with-limedir=${{github.workspace}}/lime/build/install_dir \
-            --with-lemondir=${{github.workspace}}/lemon/build/install_dir \
-            --with-lapack="-lblas -llapack" \
-            --with-qmpdir=${{github.workspace}}/qmp/build/install_dir \
-            --with-qphixdir=${{github.workspace}}/qphix/build/install_dir \
-            --enable-qphix-soalen=4 || cat config.log
-          make -j
+          cmake -DCMAKE_PREFIX_PATH="${{github.workspace}}/lime/build/install_dir;${{github.workspace}}/lemon/build/install_dir;${{github.workspace}}/qmp/build/install_dir;${{github.workspace}}/qphix/build/install_dir" \
+           -DTM_USE_MPI=ON \
+           -DTM_USE_OMP=ON \
+           -DTM_USE_LEMON=ON \
+           -DTM_USE_QPHIX=ON \
+           -DCMAKE_CXX_FLAGS="-O2 -mtune=haswell -march=haswell -mavx2 -mfma -DOMPI_SKIP_MPICXX -fopenmp" \
+           -DCMAKE_C_FLAGS="-O2 -mtune=haswell -march=haswell -mavx2 -mfma -DOMPI_SKIP_MPICXX -fopenmp" \
+           -DQPHIX_DIR="${{github.workspace}}/qphix/build/install_dir" \
+           -DQMP_DIR="${{github.workspace}}/qmp/build/install_dir" \
+            ..
+          make -j > config.log
 
       - name: Archive tmLQCD config.log
         if: ${{ always() }}
@@ -200,7 +189,7 @@ jobs:
       - name: nf2_qphix_hmc_tmcloverdetratio
         working-directory: ${{github.workspace}}/main/build
         run: |
-          mpirun -np 2 ./hmc_tm \
+          mpirun -np 2 src/bin/hmc_tm \
             -f ../doc/sample-input/sample-hmc-qphix-tmcloverdetratio.input
 
       - name: Archive nf2_qphix_hmc_tmcloverdetratio output
diff --git a/.gitignore b/.gitignore
index 79e2bc1b2..0a2e35fba 100644
--- a/.gitignore
+++ b/.gitignore
@@ -18,7 +18,6 @@ tags*
 hmc_tm
 invert
 offline_measurement
-lib/
 benchmark
 *.data
 *.para
diff --git a/CMakeLists.txt b/CMakeLists.txt
new file mode 100644
index 000000000..46e37f0d9
--- /dev/null
+++ b/CMakeLists.txt
@@ -0,0 +1,405 @@
+cmake_minimum_required(VERSION 3.26)
+
+project(
+  tmlqcd
+  DESCRIPTION "tmlQCD"
+  HOMEPAGE_URL "https://github.com/etmc/tmLQCD"
+  VERSION "6.0.2"
+  LANGUAGES C CXX)
+
+# include our cmake snippets
+set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} ${CMAKE_CURRENT_SOURCE_DIR}/cmake)
+
+# =================================================================================================
+# REQUIRE OUT-OF-SOURCE BUILDS
+file(TO_CMAKE_PATH "${PROJECT_BINARY_DIR}/CMakeLists.txt" LOC_PATH)
+if(EXISTS "${LOC_PATH}")
+  message(
+    FATAL_ERROR
+      "You cannot build in a source directory (or any directory with a CMakeLists.txt file). Please make a build subdirectory."
+  )
+endif()
+
+# =================================================================================================
+# PROJECT AND VERSION
+include(CMakeDependentOption)
+include(GNUInstallDirs)
+include(FetchContent)
+include(CMakePackageConfigHelpers)
+
+cmake_policy(SET CMP0048 NEW)
+
+if(POLICY CMP0144)
+  cmake_policy(SET CMP0144 NEW)
+endif()
+
+if(NOT DEFINED CMAKE_CUDA_STANDARD)
+  set(CMAKE_CUDA_STANDARD 14)
+  set(CMAKE_CUDA_STANDARD_REQUIRED ON)
+endif()
+
+if(NOT DEFINED CMAKE_CXX_STANDARD)
+  set(CMAKE_CXX_STANDARD 17)
+  set(CMAKE_CXX_STANDARD_REQUIRED ON)
+endif()
+
+if(NOT DEFINED CMAKE_C_STANDARD)
+  set(CMAKE_C_STANDARD 11)
+  set(CMAKE_C_STANDARD_REQUIRED ON)
+endif()
+
+if(NOT DEFINED CMAKE_HIP_STANDARD)
+  set(CMAKE_HIP_STANDARD 14)
+  set(CMAKE_HIP_STANDARD_REQUIRED ON)
+endif()
+
+find_package(PkgConfig)
+
+# Search for rocm in common locations
+foreach(
+  __var
+  ROCM_ROOT
+  HIP_ROOT
+  HIP_PATH
+  CRAY_ROCM_ROOT
+  ORNL_ROCM_ROOT
+  CRAY_ROCM_PREFIX
+  ROCM_PREFIX
+  CRAY_ROCM_DIR
+  ROCM_PATH)
+  if($ENV{${__var}})
+    list(APPEND CMAKE_PREFIX_PATH $ENV{__var})
+    set(ROCM_PATH
+        $ENV{__var}
+        CACHE PATH "Path to ROCm installation")
+  endif()
+endforeach()
+
+option(CMAKE_POSITION_INDEPENDENT_CODE "Enable position independent code" ON)
+option(BUILD_SHARED_LIBS "Enable shared library" OFF)
+option(TM_USE_FFTW "Enable fftw support" OFF)
+option(TM_USE_MPI "Enable MPI support" OFF)
+option(TM_USE_CUDA "Enable QUDA support" OFF)
+option(TM_USE_HIP "Enable HIP support" OFF)
+option(TM_USE_DDalphaAMG "Enable DDalphaAMG support" OFF)
+option(TM_USE_OMP "Enable OpenMP" ON)
+option(TM_FIXEDVOLUME "Fix volume at compile time" OFF)
+set(TM_ENABLE_ALIGNMENT
+    "auto"
+    CACHE
+      STRING
+      "Automatically or expliclty align arrays to byte number. auto, none, 16, 32, 64"
+)
+
+set_property(CACHE TM_ENABLE_ALIGNMENT PROPERTY STRINGS "auto" "none" "16" "32"
+                                                "64")
+
+option(TM_USE_OPTIMIZATION "enable optimisation" ON)
+option(TM_USE_GAUGE_COPY "Enable use of a copy of the gauge field" ON)
+option(TM_USE_HALFSPINOR "Use a Dirac Op. with halfspinor exchange" ON)
+option(TM_USE_QPHIX "Enable QPhiX" OFF)
+option(TM_USE_SHMEM "Use shmem API" OFF)
+option(TM_USE_QUDA "Enable QUDA support" OFF)
+option(TM_ENABLE_WARNINGS "Enable all warnings" ON)
+option(TM_ENABLE_TESTS "Enable tests" OFF)
+set(TM_QPHIX_SOALEN
+    "4"
+    CACHE STRING "QPhiX specific parameter")
+# MPI dependent options
+cmake_dependent_option(
+  TM_PERSISTENT_MPI "Use persistent MPI calls for halfspinor [default=no]" OFF
+  "TM_USE_MPI" OFF)
+cmake_dependent_option(
+  TM_NONBLOCKING_MPI "Use non-blocking MPI calls for spinor and gauge" ON
+  "TM_USE_MPI" OFF)
+
+# need to do it properly. Just a place holder
+cmake_dependent_option(
+  TM_MPI_DIMENSION "Use n dimensional parallelisation [default=4]" 4
+  "TM_USE_MPI" OFF)
+
+# HIP dependent options
+cmake_dependent_option(TM_USE_CUDA_HIP "Enable CUDA support in HIP" OFF
+                       "TM_USE_HIP" OFF)
+
+# clime and lemon depend on MPI
+cmake_dependent_option(TM_USE_LEMON "Use the lemon io library" OFF "TM_USE_MPI"
+                       ON)
+
+# GPU dependent options
+cmake_dependent_option(TM_USE_NVHPC "Enable Nvidia HPC toolkit" OFF
+                       "TM_USE_CUDA" OFF)
+
+# DDAlphaAMG specific options
+
+cmake_dependent_option(
+  DDalphaAMG_ENABLE_PARAMOUNT_OUTPUT "Enable paramount output support" ON
+  "TM_USE_DDalphaAMG" OFF)
+
+cmake_dependent_option(DDalphaAMG_ENABLE_FGMRES_RESTEST "Enable GMRES test" OFF
+                       "TM_USE_DDalphaAMG" OFF)
+
+cmake_dependent_option(
+  DDalphaAMG_ENABLE_PROFILING "Enable paramount output support" OFF
+  "TM_USE_DDalphaAMG" OFF)
+
+cmake_dependent_option(DDalphaAMG_ENABLE_TRACK_RES "Enable track res support"
+                       ON "TM_USE_DDalphaAMG" OFF)
+
+cmake_dependent_option(
+  DDalphaAMG_ENABLE_SINGLE_ALLREDUCE_ARNOLDI OFF
+  "Enable paramount output support" OFF "TM_USE_DDalphaAMG" OFF)
+
+cmake_dependent_option(
+  DDalphaAMG_ENABLE_COARSE_RES "Enable paramount output support" OFF
+  "TM_USE_DDalphaAMG" OFF)
+
+cmake_dependent_option(
+  DDalphaAMG_ENABLE_SCHWARZ_RES "Enable paramount output support" OFF
+  "TM_USE_DDalphaAMG" OFF)
+
+cmake_dependent_option(DDalphaAMG_ENABLE_OMP "Enable OpenMP support" ${TM_USE_OMP}
+                       "TM_USE_DDalphaAMG" OFF)
+
+cmake_dependent_option(
+  DDalphaAMG_ENABLE_TESTVECTOR_ANALYSIS "Enable vector analysis support" OFF
+  "TM_USE_DDalphaAMG" OFF)
+
+cmake_dependent_option(DDalphaAMG_ENABLE_HDF5 "Enable HDF5 support" OFF
+                       "TM_USE_DDalphaAMG" OFF)
+
+# search for blas and lapack
+find_package(BLAS REQUIRED)
+#
+find_package(LAPACK REQUIRED)
+set(TM_LAPACK ON)
+find_package(FLEX REQUIRED)
+
+set(PACKAGE_NAME ${PROJECT_DESCRIPTION})
+set(PACKAGE_VERSION ${PROJECT_VERSION})
+set(PACKAGE_TARNAME "tmlqcd")
+set(PACKAGE_BUGREPORT "https://github.com/etmc/tmLQCD")
+set(PACKAGE_STRING "${PROJECT_DESCRIPTION} ${PROJECT_VERSION}")
+
+set(ALIGN " ")
+set(ALIGN_BASE "0")
+set(ALIGN_BASE32 "0")
+set(ALIGN32 " ")
+
+# DO NOT MERGE the two if statements as otherwise the automatic alignment will
+# not be taken into account
+
+if(${TM_ENABLE_ALIGNMENT} STREQUAL "auto")
+  include(cmake/DetectSimdAndAlignment.cmake)
+  message(
+    STATUS "SIMD: ${SIMD_LEVEL} (${SIMD_ARCH_FAMILY}), align=${SIMD_ALIGNMENT}")
+endif()
+
+if(${TM_ENABLE_ALIGNMENT} STREQUAL "none")
+  set(ALIGN_BASE "0x00")
+  set(ALIGN " ")
+  set(ALIGN_BASE32 "0x00")
+  set(ALIGN32 " ")
+elseif((${TM_ENABLE_ALIGNMENT} STREQUAL "16") OR (${SIMD_ALIGNMENT} EQUAL 16))
+  set(ALIGN_BASE "0x0F")
+  set(ALIGN "__attribute__ ((aligned (16)))")
+  set(ALIGN_BASE32 "0x0F")
+  set(ALIGN32 "__attribute__ ((aligned (16)))")
+elseif((${TM_ENABLE_ALIGNMENT} STREQUAL "32") OR (${SIMD_ALIGNMENT} EQUAL 32))
+  set(ALIGN_BASE "0x2F")
+  set(ALIGN "__attribute__ ((aligned (32)))")
+  set(ALIGN_BASE32 "0x2F")
+  set(ALIGN32 "__attribute__ ((aligned (32)))")
+elseif((${TM_ENABLE_ALIGNMENT} STREQUAL "64") OR (${SIMD_ALIGNMENT} EQUAL 64))
+  set(ALIGN_BASE "0x3F")
+  set(ALIGN "__attribute__ ((aligned (64)))")
+  set(ALIGN_BASE32 "0x3F")
+  set(ALIGN32 "__attribute__ ((aligned (64)))")
+else()
+  message(
+    FATAL_ERROR
+      "Unusable value for array alignment. Allowed values are: auto, none, 16, 32, 64"
+  )
+endif()
+
+if(TM_USE_MPI)
+  find_package(MPI REQUIRED)
+  if(TM_NONBLOCKING_MPI)
+    set(TM_NONBLOCKING ON)
+  endif()
+  if(TM_PERSISTENT_MPI)
+    set(TM_PERSISTENT ON)
+  endif()
+endif()
+
+if(TM_USE_OMP)
+  find_package(OpenMP REQUIRED COMPONENTS C CXX)
+endif()
+
+if(TM_USE_HDF5)
+  find_package(HDF5 REQUIRED COMPONENTS C)
+endif()
+
+if(TM_USE_LEMON)
+  FetchContent_Declare(
+    lemon
+    GIT_REPOSITORY https://github.com/etmc/lemon
+    GIT_TAG 42c2d99dae9c04ca1f09c532a8f9bcb1bb667528
+    FIND_PACKAGE_ARGS NAMES lemon)
+  FetchContent_MakeAvailable(lemon)
+endif()
+
+find_package(CLime REQUIRED)
+set(TM_USE_LIME ON)
+
+if(TM_USE_QUDA)
+  find_package(QUDA REQUIRED config)
+endif()
+
+if(TM_USE_SHMEM)
+  message(INFO "SHMEM needs to be included")
+endif()
+
+if(TM_USE_CUDA AND TM_USE_HIP)
+  message(
+    ERROR
+    "HIP and CUDA are mutually exclusive. Please choose one GPU support only")
+endif()
+
+if(TM_USE_CUDA OR QUDA_TARGET_CUDA)
+  enable_language(CUDA)
+
+  # placeholder for nvhpc for future use
+  if(TM_USE_NVHPC)
+    find_package(NVHPC REQUIRED COMPONENTS CUDA MATH HOSTUTILS NCCL)
+  else()
+    find_package(CUDAToolkit REQUIRED)
+  endif()
+endif()
+
+# We may want to use hip-cuda for development or debugging purposes especially
+# if AMD GPU access is not possible. So allow it
+
+if(TM_USE_HIP OR QUDA_TARGET_HIP)
+  enable_language(HIP)
+  find_package(hipblas)
+  find_package(hipfft)
+  if(TM_USE_CUDA_HIP)
+    find_package(CUDA)
+  endif()
+
+  if(CMAKE_HIP_PLATFORM MATCHES "amd")
+    set(TM_GPU_PLATFORM_DFLAGS "__HIP_PLATFORM_AMD__")
+  else()
+    set(TM_GPU_PLATFORM_DFLAGS "__HIP_PLATFORM_NVIDIA__")
+  endif()
+endif()
+
+if(TM_USE_QPHIX)
+  find_package(Qphix REQUIRED)
+endif()
+
+# check for fftw3 (rely on pkgconfig).
+if(TM_USE_FFTW)
+  pkg_search_module(tmlqcd_fftw3 IMPORTED_TARGET GLOBAL fftw3)
+  if(tmlqcd_fftw3_FOUND)
+    add_library(tmlqcd::fftw3 ALIAS PkgConfig::tmlqcd_fftw3)
+  endif()
+endif()
+
+if(TM_USE_DDalphaAMG)
+  add_subdirectory(DDalphaAMG)
+endif()
+
+if(TM_ENABLE_WARNINGS)
+  add_compile_options($<$<COMPILE_LANG_AND_ID:C,GNU>:-Wall>
+                      $<$<COMPILE_LANG_AND_ID:CXX,GNU>:-Wall>)
+endif()
+
+# set the parallelization
+
+if(TM_USE_MPI)
+  if(TM_MPI_DIMENSION STREQUAL "1")
+    # T parallelisation
+    set(TM_PARALLELT ON)
+  elseif(TM_MPI_DIMENSION STREQUAL "2")
+    # XT parallelisation
+    set(TM_PARALLELXT ON)
+  elseif(TM_MPI_DIMENSION STREQUAL "3")
+    set(TM_PARALLELXYT ON)
+    # XYZ parallelisation
+  elseif(TM_MPI_DIMENSION STREQUAL "4")
+    # timeslice-splitted communications
+    set(TM_PARALLELXYZT ON)
+  elseif(TM_MPI_DIMENSION STREQUAL "X")
+    set(TM_PARALLELX ON)
+  elseif(TM_MPI_DIMENSION STREQUAL "XY")
+    set(TM_PARALLELXY ON)
+  elseif(TM_MPI_DIMENSION STREQUAL "XYZ")
+    set(TM_PARALLELXYZ ON)
+  else()
+    set(TM_PARALLELXYZT ON)
+  endif()
+endif()
+
+if(TM_USE_HALFSPINOR AND NOT TM_USE_GAUGE_COPY)
+  message(
+    FATAL_ERROR
+      "The TM_USE_GAUGE_COPY option should also be set to ON when TM_USE_HALFSPINOR is ON"
+  )
+endif()
+# keep the autotool config.h header.
+configure_file("${PROJECT_SOURCE_DIR}/cmake/tmlqcd_config_internal.h.in"
+               "${PROJECT_BINARY_DIR}/tmlqcd_config_internal.h" @ONLY)
+configure_file("${PROJECT_SOURCE_DIR}/src/lib/fixed_volume.h.in"
+               "${PROJECT_BINARY_DIR}/fixed_volume.h" @ONLY)
+# check if git command exists
+find_program(GIT_EXE NAMES git)
+
+# generate version header
+string(TIMESTAMP TM_TIMESTAMP "%Y-%m-%d %H:%M:%S")
+if(DEFINED GIT_EXE AND EXISTS "${PROJECT_SOURCE_DIR}/.git")
+  execute_process(
+    COMMAND git rev-parse HEAD
+    OUTPUT_VARIABLE TM_SHA
+    WORKING_DIRECTORY "${PROJECT_SOURCE_DIR}"
+    ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
+  execute_process(
+    COMMAND git describe --all
+    OUTPUT_VARIABLE TM_GIT_BRANCH
+    WORKING_DIRECTORY "${PROJECT_SOURCE_DIR}"
+    ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
+  message(STATUS "git hash ${TM_SHA}")
+else()
+  set(TM_SHA
+      "https://github.com/etmc/tmLQCD/releases/tag/rel-${TMLQCD_VERSION_MAJOR}-${TMLQCD_VERSION_MINOR}"
+  )
+endif()
+
+configure_file(cmake/git_hash.c.in git_hash.c @ONLY)
+add_subdirectory(src/lib)
+add_subdirectory(src/bin)
+
+write_basic_package_version_file(
+  "${PROJECT_BINARY_DIR}/tmlQCDConfigVersion.cmake"
+  VERSION "${PROJECT_VERSION}"
+  COMPATIBILITY SameMajorVersion)
+
+configure_file("${PROJECT_SOURCE_DIR}/cmake/tmlQCD-config.cmake.in"
+               "${PROJECT_BINARY_DIR}/tmlQCD-config.cmake" @ONLY)
+
+configure_file("${PROJECT_SOURCE_DIR}/cmake/tmlQCD.pc.in"
+               "${PROJECT_BINARY_DIR}/tmlQCD.pc" @ONLY)
+
+install(FILES "${PROJECT_BINARY_DIR}/tmlQCD-config.cmake"
+              "${PROJECT_BINARY_DIR}/tmlQCDConfigVersion.cmake"
+        DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/tmlQCD")
+
+install(FILES "${PROJECT_BINARY_DIR}/tmlQCD.pc"
+        DESTINATION "${CMAKE_INSTALL_LIBDIR}/pkgconfig")
+
+install(
+  DIRECTORY "${PROJECT_SOURCE_DIR}/cmake"
+  DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/tmlQCD"
+  FILES_MATCHING
+  PATTERN "*.cmake")
diff --git a/DDalphaAMG/CMakeLists.txt b/DDalphaAMG/CMakeLists.txt
new file mode 100644
index 000000000..2a80851a5
--- /dev/null
+++ b/DDalphaAMG/CMakeLists.txt
@@ -0,0 +1,216 @@
+# there is a lot of custom directories to circonvent the deletion of the
+# CMakeLists.txt contained in the DDalphaAMG directory. CMake will clone the
+# source code and build it with the default options used in the ci/cd. More
+# options are available in the main CMakeLists.txt.
+
+include(GNUInstallDirs)
+
+set(DDalphaAMG_SRC_DIR ${CMAKE_SOURCE_DIR}/DDalphaAMG/deps)
+
+FetchContent_Declare(
+  DDalphaAMG
+  GIT_REPOSITORY https://github.com/etmc/DDalphaAMG.git
+  SOURCE_DIR ${DDalphaAMG_SRC_DIR})
+
+FetchContent_MakeAvailable(DDalphaAMG)
+
+list(
+  APPEND
+  DDalphaAMG_SRC_GENERIC
+  interpolation_generic.c
+  gathering_generic.c
+  sse_interpolation_generic.c
+  coarse_oddeven_generic.c
+  operator_generic.c
+  oddeven_generic.c
+  linalg_generic.c
+  init_generic.c
+  vcycle_generic.c
+  dirac_generic.c
+  coarse_operator_generic.c
+  coarsening_generic.c
+  schwarz_generic.c
+  ghost_generic.c
+  vectorization_dirac_generic.c
+  linsolve_generic.c
+  sse_coarse_operator_generic.c
+  data_generic.c
+  setup_generic.c
+  sse_linalg_generic.c)
+
+list(
+  APPEND
+  DDalphaAMG_HEADER_GENERIC
+  interpolation_generic.h
+  gathering_generic.h
+  sse_interpolation_generic.h
+  coarse_oddeven_generic.h
+  operator_generic.h
+  oddeven_generic.h
+  linalg_generic.h
+  init_generic.h
+  vcycle_generic.h
+  dirac_generic.h
+  coarse_operator_generic.h
+  coarsening_generic.h
+  schwarz_generic.h
+  ghost_generic.h
+  vectorization_dirac_generic.h
+  linsolve_generic.h
+  sse_coarse_operator_generic.h
+  data_generic.h
+  setup_generic.h
+  sse_linalg_generic.h
+  main_pre_def_generic.h
+  main_post_def_generic.h)
+
+list(
+  APPEND
+  DDalphaAMG_SRC_GENERAL
+  ${DDalphaAMG_SRC_DIR}/src/preconditioner.c
+  ${DDalphaAMG_SRC_DIR}/src/threading.c
+  ${DDalphaAMG_SRC_DIR}/src/main.c
+  ${DDalphaAMG_SRC_DIR}/src/sse_dirac.c
+  ${DDalphaAMG_SRC_DIR}/src/var_table.c
+  ${DDalphaAMG_SRC_DIR}/src/data_layout.c
+  ${DDalphaAMG_SRC_DIR}/src/linsolve.c
+  ${DDalphaAMG_SRC_DIR}/src/ghost.c
+  ${DDalphaAMG_SRC_DIR}/src/top_level.c
+  ${DDalphaAMG_SRC_DIR}/src/dirac.c
+  ${DDalphaAMG_SRC_DIR}/src/linalg.c
+  ${DDalphaAMG_SRC_DIR}/src/init.c
+  ${DDalphaAMG_SRC_DIR}/src/DDalphaAMG_interface.c
+  ${DDalphaAMG_SRC_DIR}/src/lime_io.c
+  ${DDalphaAMG_SRC_DIR}/src/sse_linalg.c
+  ${DDalphaAMG_SRC_DIR}/src/solver_analysis.c
+  ${DDalphaAMG_SRC_DIR}/src/io.c)
+
+foreach(f IN LISTS DDalphaAMG_SRC_GENERIC)
+  string(REPLACE "_generic" "_float" f_float "${f}")
+
+  add_custom_command(
+    OUTPUT "${CMAKE_BINARY_DIR}/DDalphaAMG/${f_float}.sed-done"
+    COMMAND
+    sed -f "${DDalphaAMG_SRC_DIR}/float.sed" "${DDalphaAMG_SRC_DIR}/src/${f}"
+    > "${CMAKE_BINARY_DIR}/DDalphaAMG/${f_float}"
+    COMMAND ${CMAKE_COMMAND} -E touch
+    "${CMAKE_BINARY_DIR}/DDalphaAMG/${f_float}.sed-done"
+    DEPENDS "${DDalphaAMG_SRC_DIR}/src/${f}" "${DDalphaAMG_SRC_DIR}/float.sed"
+    VERBATIM)
+  list(APPEND SED_MARKERS "${CMAKE_BINARY_DIR}/DDalphaAMG/${f_float}.sed-done")
+  list(APPEND DDalphaAMG_SRC_SINGLE_DOUBLE ${f_float})
+
+  string(REPLACE "_generic" "_double" f_double "${f}")
+  add_custom_command(
+    OUTPUT "${CMAKE_BINARY_DIR}/DDalphaAMG/${f_double}.sed-done"
+    COMMAND
+    sed -f "${DDalphaAMG_SRC_DIR}/double.sed" "${DDalphaAMG_SRC_DIR}/src/${f}"
+    > "${CMAKE_BINARY_DIR}/DDalphaAMG/${f_double}"
+    COMMAND ${CMAKE_COMMAND} -E touch
+    "${CMAKE_BINARY_DIR}/DDalphaAMG/${f_double}.sed-done"
+    DEPENDS "${DDalphaAMG_SRC_DIR}/src/${f}" "${DDalphaAMG_SRC_DIR}/double.sed"
+    VERBATIM)
+  list(APPEND SED_MARKERS "${CMAKE_BINARY_DIR}/DDalphaAMG/${f_double}.sed-done")
+  list(APPEND DDalphaAMG_SRC_SINGLE_DOUBLE ${f_double})
+endforeach()
+
+# now parse the header
+foreach(f IN LISTS DDalphaAMG_HEADER_GENERIC)
+  string(REPLACE "_generic" "_float" f_float "${f}")
+  add_custom_command(
+    OUTPUT "${CMAKE_BINARY_DIR}/DDalphaAMG/${f_float}.sed-done"
+    COMMAND
+    sed -f "${DDalphaAMG_SRC_DIR}/float.sed" "${DDalphaAMG_SRC_DIR}/src/${f}"
+    > "${CMAKE_BINARY_DIR}/DDalphaAMG/${f_float}"
+    COMMAND ${CMAKE_COMMAND} -E touch
+    "${CMAKE_BINARY_DIR}/DDalphaAMG/${f_float}.sed-done"
+    DEPENDS "${DDalphaAMG_SRC_DIR}/src/${f}" "${DDalphaAMG_SRC_DIR}/float.sed"
+    VERBATIM)
+  list(APPEND SED_MARKERS "${CMAKE_BINARY_DIR}/DDalphaAMG/${f_float}.sed-done")
+  list(APPEND DDalphaAMG_HEADER_SINGLE_DOUBLE ${f_float})
+
+  string(REPLACE "_generic" "_double" f_double "${f}")
+  add_custom_command(
+    OUTPUT "${CMAKE_BINARY_DIR}/DDalphaAMG/${f_double}.sed-done"
+    COMMAND
+    sed -f "${DDalphaAMG_SRC_DIR}/double.sed" "${DDalphaAMG_SRC_DIR}/src/${f}"
+    > "${CMAKE_BINARY_DIR}/DDalphaAMG/${f_double}"
+    COMMAND ${CMAKE_COMMAND} -E touch
+    "${CMAKE_BINARY_DIR}/DDalphaAMG/${f_double}.sed-done"
+    DEPENDS "${DDalphaAMG_SRC_DIR}/src/${f}" "${DDalphaAMG_SRC_DIR}/double.sed"
+    VERBATIM)
+
+  list(APPEND SED_MARKERS "${CMAKE_BINARY_DIR}/DDalphaAMG/${f_double}.sed-done")
+  list(APPEND DDalphaAMG_HEADER_SINGLE_DOUBLE ${f_double})
+endforeach()
+
+foreach(outfile IN LISTS DDalphaAMG_SRC_SINGLE_DOUBLE
+    DDalphaAMG_HEADER_SINGLE_DOUBLE)
+  set_source_files_properties("${CMAKE_BINARY_DIR}/DDalphaAMG/${outfile}"
+    PROPERTIES GENERATED TRUE)
+endforeach()
+
+add_custom_target(run_sed ALL DEPENDS ${SED_MARKERS})
+
+add_library(DDalphaAMG ${DDalphaAMG_SRC_GENERAL}
+  ${DDalphaAMG_SRC_SINGLE_DOUBLE})
+
+target_compile_options(DDalphaAMG
+  PRIVATE "$<$<COMPILE_LANG_AND_ID:C,GNU>:-O3;-ffast-math;-mavx2;-mfma>")
+
+add_dependencies(DDalphaAMG run_sed)
+
+target_link_libraries(
+  DDalphaAMG
+  PUBLIC MPI::MPI_C $<$<BOOL:${DDalphaAMG_ENABLE_HDF5}>:hdf5:hdf5>
+  $<$<BOOL:${DDalphaAMG_ENABLE_OMP}>:OpenMP::OpenMP_C> tmlqcd::clime)
+
+target_include_directories(
+  DDalphaAMG
+  PUBLIC $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/DDalphaAMG>
+  $<BUILD_INTERFACE:${DDalphaAMG_SRC_DIR}/src>
+  $<BUILD_INTERFACE:${DDalphaAMG_SRC_DIR}/include>
+  $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}/DDalphaAMG>)
+
+target_compile_definitions(
+  DDalphaAMG
+  PUBLIC
+  $<$<BOOL:${DDalphaAMG_ENABLE_PARAMOUNT_OUTPUT}>:PARAMOUNTOUTPUT>
+  $<$<BOOL:${DDalphaAMG_ENABLE_FGMRES_RESTEST}>:FGMRES_RESTEST>
+  $<$<BOOL:${DDalphaAMG_ENABLE_PROFILING}>:PROFILING>
+  $<$<BOOL:${DDalphaAMG_ENABLE_SINGLE_ALLREDUCE_ARNOLDI}>:SINGLE_ALLREDUCE_ARNOLDI>
+  $<$<BOOL:${DDalphaAMG_ENABLE_COARSE_RES}>:COARSE_RES>
+  $<$<BOOL:${DDalphaAMG_ENABLE_SCHWARZ_RES}>:SCHWARZ_RES>
+  $<$<BOOL:${DDalphaAMG_ENABLE_OMP}>:OPENMP>
+  $<$<BOOL:${DDalphaAMG_ENABLE_TRACK_RES}>:TRACK_RES>
+  $<$<BOOL:${DDalphaAMG_ENABLE_TESTVECTOR_ANALYSIS}>:TESTVECTOR_ANALYSIS>
+  $<$<BOOL:${DDalphaAMG_ENABLE_HDF5}>:HAVE_HDF5>
+  $<$<CONFIG:Debug>:DEBUG>
+  SSE)
+
+install(FILES "${CMAKE_SOURCE_DIR}/deps/lib/DDalphaAMG.h"
+  DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/DDalphaAMG")
+
+write_basic_package_version_file(
+  "${PROJECT_BINARY_DIR}/DDalphaAMGonfigVersion.cmake"
+  VERSION "0.0.0"
+  COMPATIBILITY SameMajorVersion)
+
+install(TARGETS DDalphaAMG
+  EXPORT DDalphaAMG_targets
+  LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}")
+
+install(EXPORT DDalphaAMG_targets
+  FILE DDalphaAMG-Targets.cmake
+  NAMESPACE DDalphaAMG::
+  DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/DDalphaAMG")
+
+configure_file("${PROJECT_SOURCE_DIR}/cmake/DDalphaAMG-Config.cmake.in"
+  "${PROJECT_BINARY_DIR}/DDalphaAMG-Config.cmake" @ONLY)
+
+install(FILES "${PROJECT_BINARY_DIR}/DDalphaAMG-Config.cmake"
+  "${PROJECT_BINARY_DIR}/DDalphaAMG-ConfigVersion.cmake"
+  DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/DDalphaAMG")
+
+
+# add_library(tmlqcd::DDalphaAMG alias DDalphaAMG)
diff --git a/LapH_ev.c b/LapH_ev.c
deleted file mode 100644
index dd96133fb..000000000
--- a/LapH_ev.c
+++ /dev/null
@@ -1,180 +0,0 @@
-/***********************************************************************
- * Copyright (C) 2002,2003,2004,2005,2006,2007,2008 Carsten Urbach
- *
- * This file is part of tmLQCD.
- *
- * tmLQCD is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * tmLQCD is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
- ***********************************************************************/
-/*
- *  Program for computing the eigensystem of the Laplacian operator
- * Authors Luigi Scorzato, Marco Cristoforetti
- *
- *
- *******************************************************************************/
-
-#ifdef HAVE_CONFIG_H
-#include "tmlqcd_config.h"
-#else
-#error "no tmlqcd_config.h"
-#endif
-#include <math.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <time.h>
-#ifdef TM_USE_MPI
-#include <mpi.h>
-#endif
-#include <io/gauge.h>
-#include <io/params.h>
-#include "geometry_eo.h"
-#include "global.h"
-#include "init/init.h"
-#include "mpi_init.h"
-#include "ranlxd.h"
-#include "read_input.h"
-#include "solver/eigenvalues_Jacobi.h"
-#include "start.h"
-#include "su3.h"
-#include "xchange/xchange.h"
-
-int main(int argc, char *argv[]) {
-  int tslice, j, k;
-  char conf_filename[50];
-
-#ifdef TM_USE_MPI
-  MPI_Init(&argc, &argv);
-#endif
-
-  /* Read the input file */
-  read_input("LapH.input");
-
-  tmlqcd_mpi_init(argc, argv);
-
-  if (g_proc_id == 0) {
-#ifdef _GAUGE_COPY
-    printf("# The code was compiled with -D_GAUGE_COPY\n");
-#endif
-#ifdef _USE_HALFSPINOR
-    printf("# The code was compiled with -D_USE_HALFSPINOR\n");
-#endif
-#ifdef _USE_SHMEM
-    printf("# the code was compiled with -D_USE_SHMEM\n");
-#ifdef _PERSISTENT
-    printf("# the code was compiled for persistent MPI calls (halfspinor only)\n");
-#endif
-#endif
-#ifdef TM_USE_MPI
-#ifdef _NON_BLOCKING
-    printf("# the code was compiled for non-blocking MPI calls (spinor and gauge)\n");
-#endif
-#endif
-    printf("\n");
-    fflush(stdout);
-  }
-
-#ifndef WITHLAPH
-  printf(" Error: WITHLAPH not defined");
-  exit(0);
-#endif
-#ifdef TM_USE_MPI
-#ifndef _INDEX_INDEP_GEOM
-  printf(" Error: _INDEX_INDEP_GEOM not defined");
-  exit(0);
-#endif
-#ifndef _USE_TSPLITPAR
-  printf(" Error: _USE_TSPLITPAR not defined");
-  exit(0);
-#endif
-#endif
-#ifdef FIXEDVOLUME
-  printf(" Error: FIXEDVOLUME not allowed");
-  exit(0);
-#endif
-
-  init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 0);
-  init_geometry_indices(VOLUMEPLUSRAND + g_dbw2rand);
-
-  if (g_proc_id == 0) {
-    fprintf(stdout, "The number of processes is %d \n", g_nproc);
-    printf("# The lattice size is %d x %d x %d x %d\n", (int)(T * g_nproc_t), (int)(LX * g_nproc_x),
-           (int)(LY * g_nproc_y), (int)(g_nproc_z * LZ));
-    printf("# The local lattice size is %d x %d x %d x %d\n", (int)(T), (int)(LX), (int)(LY),
-           (int)LZ);
-    printf("# Computing LapH eigensystem \n");
-
-    fflush(stdout);
-  }
-
-  /* define the geometry */
-  geometry();
-
-  start_ranlux(1, 123456);
-
-  /* Read Gauge field */
-  sprintf(conf_filename, "%s.%.4d", gauge_input_filename, nstore);
-  if (g_cart_id == 0) {
-    printf("#\n# Trying to read gauge field from file %s in %s precision.\n", conf_filename,
-           (gauge_precision_read_flag == 32 ? "single" : "double"));
-    fflush(stdout);
-  }
-  if ((j = read_gauge_field(conf_filename, g_gauge_field)) != 0) {
-    fprintf(stderr, "Error %d while reading gauge field from %s\n Aborting...\n", j, conf_filename);
-    exit(-2);
-  }
-
-  if (g_cart_id == 0) {
-    printf("# Finished reading gauge field.\n");
-    fflush(stdout);
-  }
-
-#ifdef TM_USE_MPI
-  /*For parallelization: exchange the gaugefield */
-  xchange_gauge(g_gauge_field);
-#endif
-
-  /* Init Jacobi field */
-  init_jacobi_field(SPACEVOLUME + SPACERAND, 3);
-
-#ifdef TM_USE_MPI
-  {
-    /* for debugging in parallel set i_gdb = 0 */
-    volatile int i_gdb = 8;
-    char hostname[256];
-    gethostname(hostname, sizeof(hostname));
-    printf("PID %d on %s ready for attach\n", getpid(), hostname);
-    fflush(stdout);
-    if (g_cart_id == 0) {
-      while (0 == i_gdb) {
-        sleep(5);
-      }
-    }
-  }
-
-  MPI_Barrier(MPI_COMM_WORLD);
-#endif
-
-  for (k = 0; k < 3; k++) random_jacobi_field(g_jacobi_field[k], SPACEVOLUME);
-
-  /* Compute LapH Eigensystem */
-
-  for (tslice = 0; tslice < T; tslice++) {
-    eigenvalues_Jacobi(&no_eigenvalues, 5000, eigenvalue_precision, 0, tslice, nstore);
-  }
-
-#ifdef TM_USE_MPI
-  MPI_Finalize();
-#endif
-  return (0);
-}
diff --git a/Makefile.global b/Makefile.global
deleted file mode 100644
index dc1eefcf1..000000000
--- a/Makefile.global
+++ /dev/null
@@ -1,64 +0,0 @@
-# This Makefile is included from the other Makefiles
-# It contains some overall targets...
-
-# refresh Makefile and other stuff
-
-
-
-PROGRAMS_WITH_GIT_HASH := hmc_tm invert offline_measurement test_Dslash deriv_mg_tune
-
-.SUFFIXES:
-
-Makefile: ${top_srcdir}/Makefile.global $(srcdir)/Makefile.in $(abs_top_builddir)/config.status 
-	cd $(abs_top_builddir) \
-	  && CONFIG_FILES=$(subdir)/$@ CONFIG_HEADERS= $(SHELL) ./config.status
-
-$(abs_top_builddir)/config.status: $(top_srcdir)/configure
-	( cd ${abs_top_builddir} && $(SHELL) ./config.status --recheck ) 
-
-$(abs_top_builddir)/include/tmlqcd_config_internal.h: $(top_srcdir)/include/tmlqcd_config_internal.h.in $(abs_top_builddir)/config.status $(top_srcdir)/configure
-	( cd ${abs_top_builddir} && $(SHELL) ./config.status --header=include/tmlqcd_config_internal.h )
-
-# rebuild configure if configure.in changes but ignore errors
-# on many machines some of the macros fail to be recognized
-# but the resulting configure still works
-$(top_srcdir)/configure: $(top_srcdir)/configure.in 
-	-( cd $(top_srcdir) && $(AUTOCONF) )
-
-#dep rules
-
-# PROGRAMS_WITH_GIT_HASH require git_hash.h which is dynamically built by a phony make target
-# to prevent too frequent building of git_hash (slowing down the build)
-# we filter the list of all objects and treat these separately
-$(addsuffix .d, $(filter-out ${PROGRAMS_WITH_GIT_HASH},${ALLOBJ})): %.d: ${srcdir}/%.c Makefile
-	@ $(CCDEP) ${DEPFLAGS} ${DEFS} ${INCLUDES} $< > $@
-$(addsuffix .d, $(filter-out ${PROGRAMS_WITH_GIT_HASH},${CXXMODULES})): %.d: ${srcdir}/%.cpp Makefile
-	@ $(CXXDEP) ${CXXDEPFLAGS} ${DEFS} ${INCLUDES} $< > $@
-	
-# dirty hack to prevent make from entering an infinite loop because a phony target is given as a real
-# dependency (make will build invert.d and hmc_tm.d indefinitely)
-# when git_hash.h does not exist (as checked using wildcard) it is given as a dependency of invert.d and hmc_tm.d
-# once it exists, this is no longer the case
-# while this does break updating of git_hash.h while the dependencies are built, this is quite
-# irrelevant because it will be rebuilt during the compilation of either invert or hmc_tm
-ifneq (git_hash.h, $(findstring git_hash.h,$(wildcard $(top_srcdir)/git_has*.h)))
-$(addsuffix .d, $(filter ${PROGRAMS_WITH_GIT_HASH},${ALLOBJ})): %.d: ${srcdir}/%.c ${top_srcdir}/git_hash.h Makefile
-	@ $(CCDEP) ${DEPFLAGS} ${DEFS} ${INCLUDES} $< > $@
-else
-$(addsuffix .d, $(filter ${PROGRAMS_WITH_GIT_HASH},${ALLOBJ})): %.d: ${srcdir}/%.c Makefile
-	@ $(CCDEP) ${DEPFLAGS} ${DEFS} ${INCLUDES} $< > $@
-endif
-
-${top_builddir}/fixed_volume.h: ${top_srcdir}/fixed_volume.h.in ${top_builddir}/config.status
-	cd ${abs_top_builddir} && CONFIG_FILES=fixed_volume.h CONFIG_HEADERS= $(SHELL) ${top_builddir}/config.status
-
-all-recursive all-debug-recursive all-profile-recursive clean-recursive distclean-recursive compile-clean-recursive: Makefile
-	@set fnord ${MAKEFLAGS}; amf=$$2; \
-	dot_seen=no; \
-	target=`echo $@ | sed s/-recursive//`; \
-	list='$(SUBDIRS)'; for subdir in $$list; do \
-	  echo "Making $$target in $$subdir"; \
-	  local_target="$$target"; \
-	  ( cd $$subdir && $(MAKE) $$local_target ) \
-	    || case "$$amf" in *=*) exit 1;; *k*) fail=yes;; *) exit 1;; esac; \
-	done; test -z "$$fail";
diff --git a/Makefile.in b/Makefile.in
deleted file mode 100644
index 51437ff05..000000000
--- a/Makefile.in
+++ /dev/null
@@ -1,167 +0,0 @@
-srcdir = @srcdir@
-top_srcdir = @top_srcdir@
-abs_top_srcdir = @abs_top_srcdir@
-top_builddir =  .
-abs_top_builddir = @abs_top_builddir@
-builddir = @builddir@
-prefix = @prefix@
-exec_prefix = @exec_prefix@
-bindir = @bindir@
-program_transform_name = @program_transform_name@
-subdir = .
-
-AR = @AR@
-RANLIB = @RANLIB@
-CC = @CC@
-CXX = @CXX@
-CCDEP = @CCDEP@
-CXXDEP = @CXXDEP@
-CFLAGS = @CFLAGS@
-CXXFLAGS = @CXXFLAGS@
-LDFLAGS = @LDFLAGS@
-DEPFLAGS = @DEPFLAGS@
-CXXDEPFLAGS = @CXXDEPFLAGS@
-CPPFLAGS = @CPPFLAGS@
-CCLD = @CCLD@
-LEX = @LEX@
-AUTOCONF = @AUTOCONF@
-LIBS = @LIBS@
-SHELL = @SHELL@
-OPTARGS = @OPTARGS@
-SOPTARGS = @SOPTARGS@
-DEFS = @DEFS@
-USESUBDIRS = @USESUBDIRS@
-NVCC = @NVCC@
-GPUMPICOMPILER = @GPUMPICOMPILER@
-
-INCLUDES = @INCLUDES@
-LINK = $(CCLD) -o $@ ${LDFLAGS}
-
-COMPILE = ${CC} ${DEFS} ${INCLUDES} -o $@ ${CFLAGS}
-CXXCOMPILE = ${CXX} ${DEFS} ${INCLUDES} -o $@ ${CXXFLAGS} ${LDFLAGS}
-
-SMODULES = 
-
-MODULES = read_input gamma measure_gauge_action start \
-	expo matrix_utils get_staples update_backward_gauge \
-	measure_rectangles get_rectangle_staples  \
-	test/check_geometry test/check_xchange \
-	test/overlaptests \
-	invert_eo invert_doublet_eo update_gauge \
-	getopt sighandler reweighting_factor \
-	source_generation boundary update_tm ranlxd  \
-	mpi_init deriv_Sb deriv_Sb_D_psi ranlxs \
-	geometry_eo invert_overlap aligned_malloc \
-	prepare_source chebyshev_polynomial_nd Ptilde_nd  \
-	reweighting_factor_nd rnd_gauge_trafo \
-        update_momenta update_momenta_fg integrator  phmc \
-	little_D block operator \
-	spinor_fft \
-	fatal_error invert_clover_eo gettime \
-	tm_debug_printf compare_derivative \
-        @QUDA_INTERFACE@ @DDalphaAMG_INTERFACE@
-
-CXXMODULES = @QPHIX_INTERFACE@
-
-NOOPTMOD = test/check_xchange test/check_geometry
-
-PROGRAMS = hmc_tm benchmark invert gen_sources  \
-	check_locallity test_lemon hopping_test \
-	offline_measurement deriv_mg_tune @QPHIX_PROGRAMS@
-
-ALLOBJ = ${MODULES} ${PROGRAMS} ${SMODULES}
-SUBDIRS = ${USESUBDIRS}
-
-# delete the default suffix rules
-.SUFFIXES:
-
-# need to build modules before subdirs!
-all: Makefile dep $(SUBDIRS) hmc_tm invert benchmark offline_measurement deriv_mg_tune @QPHIX_PROGRAMS@
-
-$(SUBDIRS):
-	$(MAKE) --directory=$@
-
-# run the GIT-VERSION-GEN script to generate version information in git_hash.h
-# making sure that we run in the correct directory
-${top_srcdir}/git_hash.h:
-	@ ( cd @srcdir@ && sh GIT-VERSION-GEN )
-
--include $(addsuffix .d,$(ALLOBJ))
--include $(addsuffix .d,$(CXXMODULES))
-
-include ${top_srcdir}/Makefile.global
-
-# follow https://www.owlfolio.org/possibly-useful/flex-input-scanner-rules-are-too-complicated/
-# and pass the -Ca option such that more than 32k "NFA" states are allowed
-# our ruleset is so complicated that this has become necessary!
-${top_srcdir}/read_input.c: ${top_srcdir}/read_input.l
-ifneq (,$(findstring lex,${LEX}))
-	${LEX} -Ca -Ptmlqcd -i -t ${top_srcdir}/read_input.l > ${top_srcdir}/read_input.c
-else
-	$(error Unable to find (f)lex, read_input.c not built. Please install (f)lex!)
-endif
-
-libhmc.a: ${addsuffix .o, ${MODULES} ${SMODULES}} Makefile
-	@rm -f libhmc.a
-	@${AR} cru libhmc.a ${addsuffix .o, ${MODULES} ${SMODULES}}
-	@$(RANLIB) libhmc.a
-	@cp libhmc.a ${top_builddir}/lib/libhmc.a
-
-$(addsuffix .o,$(filter-out ${NOOPTMOD},${MODULES})): %.o: ${srcdir}/%.c %.d Makefile $(abs_top_builddir)/include/tmlqcd_config_internal.h
-	${COMPILE} ${OPTARGS} -c $<
-
-#here we don't need optimisation
-$(addsuffix .o,$(filter ${NOOPTMOD},${MODULES})): %.o: ${srcdir}/%.c %.d Makefile $(abs_top_builddir)/include/tmlqcd_config_internal.h
-	${COMPILE} -c $<
-
-${addsuffix .o, ${SMODULES}}: %.o: ${srcdir}/%.c %.d Makefile $(abs_top_builddir)/include/tmlqcd_config_internal.h
-	${COMPILE} ${SOPTARGS} -c $<
-
-# C++ modules
-$(addsuffix .o,${CXXMODULES}): %.o: ${srcdir}/%.cpp %.d Makefile $(abs_top_builddir)/include/tmlqcd_config_internal.h
-	${CXXCOMPILE} -c $<
-	
-${addsuffix .o, ${PROGRAMS}}: %.o: ${srcdir}/%.c %.d Makefile $(abs_top_builddir)/include/tmlqcd_config_internal.h ${top_srcdir}/git_hash.h
-	${COMPILE} ${OPTARGS} -c $<
-
-${PROGRAMS}: %: %.o libhmc.a $(SUBDIRS) $(addsuffix .o,${CXXMODULES})
-	 ${LINK} $@.o $(addsuffix .o,${CXXMODULES}) $(GPUOBJECTS) $(GPUOBJECTS_C) $(LIBS) ${LDFLAGS}
-
-
-# The rules for unit tests are kept in a separate file for tidyness
-include ${top_srcdir}/Makefile.tests
-
-dep: $(addsuffix .d,$(ALLOBJ)) $(addsuffix .d,$(CXXMODULES))
-	@ echo "...dependency files built"
-
-install: Makefile
-	@mkdir -p $(bindir); \
-	for p in hmc_tm invert benchmark offline_measurement deriv_mg_tune; do \
-	  progname=`echo $$p | sed '$(program_transform_name)'`; \
-	  echo "Installing $$p as $$progname in $(bindir)..."; \
-	  cp $$p $(bindir)/$$progname; \
-	done; \
-	echo "done";
-
-uninstall: Makefile
-	for p in hmc_tm invert benchmark offline_measurement deriv_mg_tune; do \
-	  progname=`echo $$p | sed '$(program_transform_name)'`; \
-	  echo "Un-Installing $$progname in $(bindir)..."; \
-	  rm $(bindir)/$$progname; \
-	done; \
-	echo "done";
-
-compile-clean: compile-clean-recursive Makefile
-	rm -f *.o *.d test/*.o test/*.d tests/*.o tests/*.d
-
-clean: clean-recursive Makefile
-	rm -f benchmark hmc_tm invert offline_measurement test_Dslash deriv_mg_tune @QPHIX_PROGRAMS@ *.o *.d test/*.o test/*.d tests/*.o tests/*.d
-
-distclean: distclean-recursive Makefile
-	rm -f benchmark hmc_tm invert offline_measurement *.o *.d *~ Makefile config.log config.status fixed_volume.h
-	rm -f include/tmlqcd_config_internal.h
-
-.PHONY: all ${SUBDIRS} ${top_srcdir}/git_hash.h clean compile-clean distclean dep install \
-	all-recursive all-debug-recursive all-profile-recursive \
-	clean-recursive distclean-recursive \
-	compile-clean-recursive
diff --git a/Makefile.tests b/Makefile.tests
deleted file mode 100644
index a9a393ac6..000000000
--- a/Makefile.tests
+++ /dev/null
@@ -1,64 +0,0 @@
-TESTS = tests/test_sample tests/test_su3 tests/test_buffers tests/test_qpx tests/test_linalg tests/test_clover tests/test_rat
-
-TEMP = $(patsubst %.c,%,$(wildcard $(top_srcdir)/tests/*.c))
-TESTMODULES = $(patsubst $(top_srcdir)/%,%,$(TEMP))
-
-TESTFLAGS = -L$(top_builddir)/cu/ -lcu
-
-$(addsuffix .o,$(TESTMODULES)): %.o : $(top_srcdir)/%.c
-	${COMPILE} -c $(OPTARGS) ${DEFS} $<
-
-# The linking stage needs to be differentiated because different tests rely on
-# different modules from the codebase
-# Each test itself consists of a number of modules that need to be linked.
-
-# when used as a prerequisite, the wildcard with "tests/test_sample*.c" replaced by "$@*.c" is not evaluated
-# correctly, even though it works perfectly in an echo statement, it results in make
-# trying to compile all objects in top_srcdir
-# we therefore evaluate the wildcard into a variable
-
-TEST_SAMPLE_OBJECTS:=$(patsubst $(top_srcdir)/%.c,%.o,$(wildcard $(top_srcdir)/tests/test_sample*.c))
-TEST_SAMPLE_FLAGS:=
-TEST_SAMPLE_LIBS:=$(top_builddir)/cu/libcu.a
-tests/test_sample: $(TEST_SAMPLE_OBJECTS) $(TEST_SAMPLE_LIBS)
-	${LINK} $(TEST_SAMPLE_OBJECTS) $(TESTFLAGS) $(TEST_SAMPLE_FLAGS)
-
-TEST_SU3_OBJECTS:=$(patsubst $(top_srcdir)/%.c,%.o,$(wildcard $(top_srcdir)/tests/test_su3*.c)) expo.o
-TEST_SU3_FLAGS:=-lm
-TEST_SU3_LIBS:=$(top_builddir)/cu/libcu.a
-tests/test_su3: $(TEST_SU3_OBJECTS) $(TEST_SU3_LIBS)
-	${LINK} $(TEST_SU3_OBJECTS) $(TESTFLAGS) $(TEST_SU3_FLAGS)
-
-TEST_QPX_OBJECTS:=$(patsubst $(top_srcdir)/%.c,%.o,$(wildcard $(top_srcdir)/tests/test_qpx*.c)) 
-TEST_QPX_FLAGS:=-lm
-TEST_QPX_LIBS:=$(top_builddir)/cu/libcu.a
-tests/test_qpx: $(TEST_QPX_OBJECTS) $(TEST_QPX_LIBS)
-	${LINK} $(TEST_QPX_OBJECTS) $(TESTFLAGS) $(TEST_QPX_FLAGS)
-
-TEST_LINALG_OBJECTS:=$(patsubst $(top_srcdir)/%.c,%.o,$(wildcard $(top_srcdir)/tests/test_linalg*.c)) 
-TEST_LINALG_FLAGS:=-lm
-TEST_LINALG_LIBS:=$(top_builddir)/cu/libcu.a $(top_builddir)/linalg/liblinalg.a
-tests/test_linalg: $(TEST_LINALG_OBJECTS) $(TEST_LINALG_LIBS)
-	${LINK} $(TEST_LINALG_OBJECTS) $(TEST_LINALG_LIBS) $(TESTFLAGS) $(TEST_LINALG_FLAGS)
-
-TEST_BUFFERS_OBJECTS:=$(patsubst $(top_srcdir)/%.c,%.o,$(wildcard $(top_srcdir)/tests/test_buffers*.c)) fatal_error.o
-TEST_BUFFERS_FLAGS:=-lbuffers -L$(top_builddir)/buffers/
-TEST_BUFFERS_LIBS:=$(top_builddir)/cu/libcu.a $(top_builddir)/buffers/libbuffers.a
-tests/test_buffers: $(TEST_BUFFERS_OBJECTS) $(TEST_BUFFERS_LIBS)
-	${LINK} $(TEST_BUFFERS_OBJECTS) $(TESTFLAGS) $(TEST_BUFFERS_FLAGS)
-
-TEST_CLOVER_OBJECTS:=$(patsubst $(top_srcdir)/%.c,%.o,$(wildcard $(top_srcdir)/tests/test_clover*.c)) operator/clover_leaf.o
-TEST_CLOVER_FLAGS:=-lm -lhmc -llinalg
-TEST_CLOVER_LIBS:=$(top_builddir)/cu/libcu.a
-tests/test_clover: $(TEST_CLOVER_OBJECTS) $(TEST_CLOVER_LIBS)
-	${LINK} $(TEST_CLOVER_OBJECTS) $(TESTFLAGS) $(TEST_CLOVER_FLAGS)
-
-TEST_RAT_OBJECTS:=$(patsubst $(top_srcdir)/%.c,%.o,$(wildcard $(top_srcdir)/tests/test_rat*.c)) 
-TEST_RAT_FLAGS:=-lm -lrational
-TEST_RAT_LIBS:=$(top_builddir)/cu/libcu.a
-tests/test_rat: $(TEST_RAT_OBJECTS) $(TEST_RAT_LIBS)
-	${LINK} $(TEST_RAT_OBJECTS) $(TESTFLAGS) $(TEST_RAT_FLAGS)
-
-
-tests: ${TESTS}
-
diff --git a/README b/README
deleted file mode 100644
index f7bf3a896..000000000
--- a/README
+++ /dev/null
@@ -1,364 +0,0 @@
-Here are some remarks collected in order to configure, compile and
-install the tmLQCD programme suit. For more information, also about running
-the code please read the documentation in the doc sub-directory. 
-
-CONFIGURE and COMPILE
-
-It is recommended to build the code not in the source directory but in
-a separate directory.
-
-The lime library (tested with version 1.2.3) is needed to compile the
-program. Please download it at
-
-http://usqcd.jlab.org/usqcd-software/c-lime/
-
-Configure and compile lime (for documentation see
-http://usqcd.jlab.org/usqcd-docs/c-lime/) first.
-Then you should use the configure option --with-lime=dir for the
-tmLQCD to set the correct directory where to find lime (see below). 
-
-For more documentation please change into the doc directory and type
-latex main.tex
-and see the sections for configuring, installing and testing the code.
-
-Here we have gathered some examples for some standard architectures.
-Building the tmLQCD executables is a three step procedure:
-
-****************************************************************************
-
-1) configure:
-
-In your build directory type
-
-path-to-the-sources/configure --help
-
-to get an overview of the available options and switches. In
-particular check out the prefix option for your installation path. 
-What follows now are some examples for a few standard architectures.
-
-- a scalar build on a P4 machine would look like:
-
-path-to-the-sources/configure --disable-mpi --enable-sse2 --enable-p4 \
-  --enable-gaugecopy --disable-newdiracop --with-limedir=<path-to-lime> \
-  --with-lapack="<linker options needed for lapack>" \
-  CC=<cc>
-
-- Opteron with SSE2:
-
-path-to-the-sources/configure --disable-mpi --enable-sse2 --enable-opteron \
-  --enable-gaugecopy --disable-newdiracop --with-limedir=<path-to-lime> \
-  --with-lapack="<linker options needed for lapack>" \
-  CC=<cc>
-
-- A MPI parallel (4dims) build on a P4 cluster:
-
-path-to-the-sources/configure --enable-mpi --enable-sse2 --enable-p4 \
-  --with-mpidimension=4 --enable-gaugecopy --disable-newdiracop \
-  --with-limedir=<path-to-lime> --with-lapack="<linker options needed for lapack>" \
-  CC=<mpicc>
-
-- on the Munich Altix machine:
-
-path-to-the-sources/configure --enable-mpi --with-mpidimension=4 \
-  --with-limedir=<path-to-lime> --enable-newdiracop \
-  --disable-shmem --with-lapack="<linker options needed for lapack>" \
-  CC=mpicc CFLAGS="-mcpu=itanium2 -O3 -g -c99 -mtune=itanium2" 
-
-for lapack on this machine please type
-module load mkl
-
-
-- on the HLRB ice installation use
-
-path-to-the-sources/configure --enable-mpi --with-mpidimension=4 \
-   --disable-sse2 --disable-p4  --with-limedir=<path-to-lime> \
-   --enable-newdiracop --with-lapack="<linker options needed for lapack>" \
-   CC="mpicc -std=c99" CFLAGS="-g" \
-
-where it is again important to use the Intel C compiler! 
-
-for lapack first load the module mkl and then use
-
---with-lapack="-L$LIBRARY_PATH -llapack -lblas"
-
-You may enable or disable other configure options as needed. See the
-documentation for more details.
-
-****************************************************************************
-
-2) make
-
-type `make` in your build directory.
-
-If there appears no error message during compilation you should end up
-with a few executable in the build directory, namely `hmc_tm`,
-`invert` and `invert_doublet`.
-
-****************************************************************************
-
-3) make install
-
-type `make install`
-
-to get the executables installed.
-
-
-
-****************************************************************************
-****************************************************************************
-
-in the following we provide a "codemap", giving a short explanation
-for the contents of each c-file:
-
-****************************************************************************
-top directory: apart from the main routines all routines are compiled into
-	       the run-time library libhmc.
-
-DML_crc32.c: invert, invert_doublet, hmc_tm
-	     some helper functions to compute the SCIDAC 
-	     checksum
-D_psi.c:     invert, invert_doublet, hmc_tm
-	     Wilson twisted mass Dirac operator, not even/odd 
-	     preconditioned 
-Hopping_Matrix.c: invert, invert_doublet, hmc_tm
-	     Hopping matrix for the even/odd preconditioned 
-	     Dirac operator
-Hopping_Matrix_nocom.c: benchmark
-	     Hopping matrix for the even/odd preconditioned 
-	     Dirac operator, communication switched off
-Nondegenerate_Matrix.c: invert_doublet, hmc_tm
-	     operators needed for even/odd preconditioning 
-	     the non-degenerate flavour doublet Dirac operator
-Ptilde_nd.c: hmc_tm
-	     the more precise polynomial $\tilde P$ needed for 
-	     the PHMC for the non-degenerate flavour doublet
-benchmark.c: main routine
-	     benchmark code for D_psi and Hopping_Matrix
-block.c:     experimental
-boundary.c:  invert, invert_doublet, hmc_tm
-	     implements the twisted boundary conditions for the
-	     spinor fields
-chebyshev_polynomial.c: experimental
-chebyshev_polynomial_nd.c: hmc_tm
-	     implements the generation of coefficients for the 
-	     chebyshev polynomial using the clenshaw recursion 
-	     relation
-deriv_Sb.c:  hmc_tm
-	     the variation of Q=gamma_5 D with respect to the 
-	     gauge fields in the even/odd case 
-deriv_Sb_D_psi.c: hmc_tm
-	     the variation of Q=gamma_5 D with respect to the 
-	     gauge fields in the non even/odd case 
-det_monomial.c: hmc_tm
-	     implements the functions needed for a det monomial
-detratio_monomial.c: hmc_tm
-	     implements the functions needed for a detratio monomial
-poly_monomial.c: hmc_tm
-             implements function needed for a POLY monomial 
-             (PHMC for light degenerate quarks)
-dml.c:       invert, invert_doublet, hmc_tm
-	     some helper functions to compute the SCIDAC 
-	     checksum
-double2single.c: main routine
-	     can convert a gauge field from double to single precision
-single2double.c: main routine
-	     can convert a gauge field from single to double precision
-eigenvalues_bi.c: hmc_tm
-	     computes eigenvalues of the mass non-degenerate two flavour 
-	     Dirac operatoe
-expo.c:      hmc_tm
-	     implements the exponetial function of an su(3) element
-gamma.c:     invert, invert_doublet, hmc_tm
-	     implements multiplication of gamma matrices and some useful
-	     combination of those with a spinor field
-gauge_io.c:  invert, invert_doublet, hmc_tm
-	     IO routines for gauge fields 
-gauge_monomial.c: hmc_tm
-	     implements the functions needed for a gauge monomial
-gen_sources.c: invert, invert_doublet, hmc_tm
-	     implements the generation of source spinor fields
-geometry_eo.c: invert, invert_doublet, hmc_tm
-	     anything related to gauge and spinor field geometry
-get_rectangle_staples.c: hmc_tm
-             computes rectangular staples of gauge links as needed for
-	     e.g. the Iwasaki gauge action and its derivative
-get_staples.c: hmc_tm
-             computes plaquette staples of gauge links as needed for
-	     for all gauge actions and their derivatives
-getopt.c:    invert, invert_doublet, hmc_tm
-	     needed for command line options
-hmc_tm.c:    main routine
-	     hmc_tm executable
-hybrid_update.c: hmc_tm
-	     implements the functions for the gauge field update and
-	     the momenta update
-init_bispinor_field.c 
-init_chi_copy.c
-init_chi_spinor_field.c
-init_dirac_halfspinor.c
-init_gauge_field.c
-init_gauge_tmp.c
-init_geometry_indices.c
-init_moment_field.c
-init_spinor_field.c
-init_stout_smear_vars.c: invert, invert_doublet, hmc_tm
-	     provide routines to allocate memory for the corresponding
-	     objects
-integrator.c: hmc_tm
-	     implements the routines needed for the integrator in the
-	     MD udpate
-invert.c:    main routine
-	     invert executable
-invert_doublet.c: main routine
-	     invert_doublet executable
-invert_doublet_eo.c: invert_doublet
-	     performs an inversion of the flavour doublet operator using
-	     even/odd preconditioning and the CG solver
-invert_eo.c: invert
-	     performs an inversion of the Wilson twisted mass Dirac operator
-	     using a solver as specified in the input file. Depending on the 
-	     input file even/odd preconditioning is used or not
-io.c:        invert, invert_doublet, hmc_tm
-	     helper routines: some deprecated IO routines for gauge and spinor 
-	     spinor fields, and the routine writing the initial stdout message
-	     of the executables
-io_utils.c:  invert, invert_doublet, hmc_tm
-	     IO helper routines related to swap endian and checksums
-linsolve.c:  hmc_tm
-	     CG and bicgstab solvers as used only in the HMC
-little_D.c:  experimental
-measure_rectangles.c: hmc_tm
-	     computes the gauge action related to the rectangular part
-monomial.c:  hmc_tm
-             provides the definition for monomials and initialisation functions
-mpi_init.c:  invert, invert_doublet, hmc_tm, benchmark
-	     MPI initialisation routine
-ndpoly_monomial.c: hmc_tm
-	     implements the functions needed for a ndpoly monomial
-observables.c: hmc_tm, invert, invert_doublet
-	     computes the gauge action related to the Wilson plaquette part
-online_measurement.c: hmc_tm
-	     anything related to online measurements
-phmc.c       hmc_tm
-	     functions and variables as needed for the PHC
-polyakov_loop.c: hmc_tm
-	     measures the polyakov loop
-propagator_io.c: invert, invert_doublet, hmc_tm
-	     functions related to spinor field IO
-ranlxd.c:    invert, invert_doublet, hmc_tm
-	     RANLUX random number generator (64 Bit)
-ranlxs.c:    invert, invert_doublet, hmc_tm
-	     RANLUX random number generator (32 Bit)
-read_input.l: invert, invert_doublet, hmc_tm
-             definition of the input file parser (flex)
-reweighting_factor.c: experimental
-reweighting_factor_nd.c: experimental
-sighandler.c: invert, invert_doublet, hmc_tm
-	     handles signal related to illegal instructions
-start.c:     invert, invert_doublet, hmc_tm
-	     functions needed to give initial values to gauge and spinor fields
-stout_smear.c: invert, invert_doublet
-	     functions to stout smear a given gauge configuration
-stout_smear_force.c: experimental
-tm_operators.c: invert, invert_doublet, hmc_tm
-	     operators needed for even/odd preconditioning the Wilson
-	     twisted mass Dirac operator
-update_backward_gauge.c: invert, invert_doublet, hmc_tm
-	     functions to update the gauge copy
-update_momenta.c: hmc_tm
-	     function to update the momenta in the HMC MD part
-update_tm.c: hmc_tm
-	     the HMC MD part
-xchange_2fields.c: invert, invert_doublet, hmc_tm
-	     implements the MPI communication of two even/odd spinor fields
-	     at once
-xchange_deri.c: hmc_tm
-	     implements the MPI communication of derivatives
-xchange_field.c: invert, invert_doublet, hmc_tm
-	     implements the MPI communication of a single even/odd spinor
-	     field
-xchange_gauge.c: invert, invert_doublet, hmc_tm
-	     implements the MPI communication of the gauge field
-xchange_halffield.c: invert, invert_doublet, hmc_tm
-	     implements the MPI communication of a half spinor field
-xchange_lexicfield.c: invert, invert_doublet, hmc_tm
-	     implements the MPI communication of a single (full) spinor
-	     field
-
-****************************************************************************
-the linalg directory: all routines here are compiled into the liblinalg
-                      runtime library
-                      capital letters are spinor fields, others scalars
-add.c:                Q = R + S
-assign.c:             R = S
-assign_add_mul.c:     P = P + c Q with c complex
-assign_add_mul_r.c:   P = P + c Q with c real
-assign_add_mul_add_mul.c:   R = R + c1*S + c2*U with c1 and c2 complex variables
-assign_add_mul_add_mul_r.c: R = R + c1*S + c2*U with c1 and c2 real variables
-assign_diff_mul.c:    S=S-c*Q
-assign_mul_add_mul_add_mul_add_mul_r.c: R = c1*R + c2*S + c3*U + c4*V
-			 		with c1, c2, c3, c4 real variables
-assign_mul_add_mul_add_mul_r.c:         R = c1*R + c2*S + c3*U 
-					with c1, c2 and c3 real variables
-assign_mul_add_mul_r.c:     R = c1*R + c2*S , c1 and c2 are real constants 
-assign_mul_add_r.c:         R = c*R + S  c is a real constant
-assign_mul_bra_add_mul_ket_add.c:       R = c2*(R + c1*S) + (*U)
-					with c1 and c2 complex variables
-assign_mul_bra_add_mul_ket_add_r.c:     R = c2*(R + c1*S) + (*U)
-					with c1 and c2 complex variables
-assign_mul_bra_add_mul_r.c:             R = c1*(R + c2*S)
-					with c1 and c2 complex variables
-comp_decomp.c:                          Splits the Bi-spinor R in the spinors S and T 
-convert_eo_to_lexic.c:                  convert to even odd spinors to one full spinor
-diff.c:                 Q = R - S
-diff_and_square_norm.c: Q = R - S and ||Q||^2
-mattimesvec.c:          w = M*v for complex vectors w,v and and complex square matrix M
-mul.c:                  R = c*S, for complex c
-mul_r.c:                R = c*S, for real c
-mul_add_mul.c:          R = c1*S + c2*U , c1 and c2 are complex constants
-mul_add_mul_r.c         R = c1*S + c2*U , c1 and c2 are real constants
-mul_diff_mul.c:         R = c1*S - c2*U , c1 and c2 are complex constants
-mul_diff_mul_r.c        R = c1*S - c2*U , c1 and c2 are real constants
-mul_diff_r.c            R = c1*S - U , c1 is a real constant 
-scalar_prod.c:          c = (R, S)
-scalar_prod_i.c:        c = Im(R, S)
-scalar_prod_r.c:        c = Re(R, S)
-square_and_prod_r.c:    Returns Re(R,S) and the square norm of S
-square_norm.c:          c = ||Q||^2
-
-****************************************************************************
-solver directory: all routines here are compiled into the libsolver
-                  runtime library
-		  the solvers are for spinor fields, if not indicated
-		  otherwise.
-
-Msap.c:                 experimental SAP preconditioner
-bicgstab_complex.c:     BiCGstab for complex fields
-bicgstabell.c:          experimental
-cg_her.c :              CG solver for hermitian operators
-cg_her_nd.c:            CG solver for hermitian heavy doublet operators
-cgs_real.c:             CGS solver
-chrono_guess.c:         routines for the chronological solver
-dfl_projector.c:        experimental
-diagonalise_general_matrix.c:  subroutine to diagonalise a complex n times n
-                               matrix. Input is a complex matrix in _C_ like
-                               order. Output is again _C_ like. Uses lapack
-eigenvalues.c           compute the nr_of_eigenvalues lowest eigenvalues
-                        of (gamma5*D)^2
-fgmres.c:               FGMRES (flexible GMRES) solver
-gcr.c:                  GCR solver
-gcr4complex.c:          GCR solver for complex fields
-generate_dfl_subspace.c: experimental
-gmres.c:                GMRES solver
-gmres_dr.c:             GMRES-DR solver
-gmres_precon.c:         GMRES usable for preconditioning other solvers (experimental)
-gram-schmidt.c:         Gram-Schmidt orthonormalisation routines
-jdher.c:                Jacobi Davidson for hermitian matrices (to compute EVs)
-lu_solve.c:             compute the inverse of a matrix with LU decomposition
-mr.c:                   MR solver
-pcg_her.c:              PCG solver
-poly_precon.c:          polynomial preconditioner using Chebysheff polynomials
-			with complex argument
-quicksort.c:            a quicksort routine
-sub_low_ev.c:           routines to subtract exactly computed eigenvectors from
-			a given spinor field
diff --git a/README.md b/README.md
new file mode 100644
index 000000000..f01ab8284
--- /dev/null
+++ b/README.md
@@ -0,0 +1,143 @@
+The software ships with a CMake environment, which will configure and build the
+programmes. It is recommended to configure and build the executables in a
+separate build directory. This also allows to have several builds with different
+options from the same source code directory.
+
+## Prerequisites
+
+In order to compile the programmes the `LAPACK` library (fortran version) needs to be installed. CMake will search for the
+library in all default directories. Also the latest version (tested is version
+1.2.3) of `C-LIME` must be available, which is used as
+a packaging scheme to read and write gauge configurations and propagators to
+files.
+
+## Configuring the hmc package
+:label{sec:config}
+
+The build system uses CMake to configure and build the hmc package. The
+following list gives all options (OFF by default unless specified):
+- `CMAKE_POSITION_INDEPENDENT_CODE`: Build a position independent
+  code. **ON** by default.
+- `BUILD_SHARED_LIBS`: Build the shared version of the hmc library.
+- `TM_USE_FFTW`: Enable fftw support. 
+- `TM_USE_CUDA`: Enable CUDA support.
+- `TM_USE_HIP`: Enable HIP support (AMD or NVidia GPUs)
+- `TM_USE_DDalphaAMG`: Enable DDalphaAMG support.
+- `TM_USE_LEMON`: Use the lemon io library.
+- `TM_USE_OMP`: Enable OpenMP (**ON** by default)
+- `TM_FIXEDVOLUME`: Fix volume at compile time.
+- `TM_ENABLE_ALIGNMENT`: Automatically or expliclty align arrays to
+  byte number. auto, none, 16, 32, 64.
+- `TM_USE_GAUGE_COPY`: Enable use of a copy of the gauge field (**ON**
+  by default). See section ref{sec:dirac} for details on this option. It will
+  increase the memory requirement of the code.
+- `TM_USE_HALFSPINOR`: Use a Dirac Op. with halfspinor exchange (**ON**
+  by default). See sub-section ref{sec:dirac} for details. 
+- `TM_USE_QUDA`: Enable QUDA support.
+- `TM_USE_SHMEM`: Use shmem API.
+- `TM_ENABLE_WARNINGS`: Enable all warnings (**ON** by default).
+- `TM_ENABLE_TESTS`: Enable tests.
+- `TM_USE_QPHIX`: Enable QPhiX.
+  - `TM_QPHIX_SOALEN`: QPhiX specific parameter (default is 4)
+  - **QPHIX_DIR**: Directory where QPhiX is installed.
+    The QPhiX current CMake build system does not export all information (
+    include and lib directories) that are needed to compile hmc.
+  - **QMP_DIR**: Directory where QMP is installed (
+    QPhiX dependency).
+    The QPhiX current CMake build system does not export all information about the
+    include and lib directories nor its dependencies (QMP in that case).
+- `TM_USE_MPI`: Enable MPI support.
+  - `TM_PERSISTENT_MPI`: Use persistent MPI calls for halfspinor.
+  - `TM_NONBLOCKING_MPI`: Use non-blocking MPI calls for spinor and
+    gauge.
+  - `TM_MPI_DIMENSION`: Use $n$ dimensional parallelisation ($XYZT$)
+    [default=4]. The number of parallel directions can be specified. $1, 2, 3$ and $4$
+    dimensional parallelisation is supported.
+  - `TM_USE_LEMON` Use the lemon io library
+
+The following minimal list of commands will configure and build the hmc package with
+minimal dependencies
+
+```bash
+mkdir build
+cd build
+cmake -DCMAKE_INSTALL_PREFIX=/my_path -DCMAKE_PREFIX_PATH=/my_c_line_path ..
+make -j
+make install
+'''
+
+These instructions assume that the `c-lime` package is installed in `/my_c_line_path`. By default `CMAKE_PREFIX_PATH` variable is a list
+of paths separated by a semi-colunm containing the path of all installed to
+dependencies.
+
+Adding `-DTM_USE_MPI=ON` will enable MPI support with parallelization
+over spatial and temporal dimensions. The command line is then
+
+```bash
+cmake -DCMAKE_INSTALL_PREFIX=/my_path -DCMAKE_PREFIX_PATH=/my_c_line_path -DTM_USE_MPI=ON ..
+'''
+
+We can combine it with the lemon-io library (isntalled in `/my_lemon_path`)
+
+```bash
+cmake -DCMAKE_INSTALL_PREFIX=/my_path \
+      -DCMAKE_PREFIX_PATH="/my_c_line_path;/my_lemon_path" \
+      -DTM_USE_MPI=ON \
+      -DTM_USE_LEMON=ON ..
+'''
+
+`QUDA` support (installed in `/my_quda_path`) can be added with
+
+```bash
+cmake -DCMAKE_INSTALL_PREFIX=/my_path \
+      -DCMAKE_PREFIX_PATH="/my_c_line_path;/my_lemon_path;/my_quda_path" \
+      -DTM_USE_MPI=ON \
+      -DTM_USE_LEMON=ON \
+      -DTM_USE_QUDA \
+      -DTM_USE_CUDA=ON \
+      -DCMAKE_CUDA_ARCHITECTURES=90 ..
+'''
+
+Note that the command assumes that QUDA is compiled with `CUDA` support. AMD GPU
+are also supported after replacing `-DTM_USE_CUDA=ON` with
+`-DTM_USE_HIP=ON` and compiling `QUDA` with `HIP` support. The ROCM architecture is defined by the variable
+`CMAKE_HIP_ARCHITECTURES=gfxxxx`.  An extra parameter `-DCMAKE_CXX_COMPILER=clang++` is needed because `QUDA` use the `ROCM clang++` 
+compiler internally and the build will fail if `gcc` or any other compiler is used during 
+link time. This option only affects the linking behavior not the compilation. The cmake command line for HIP/ROCM support is then
+```bash
+cmake -DCMAKE_INSTALL_PREFIX=/my_path \
+    -DCMAKE_PREFIX_PATH="/my_c_line_path;/my_lemon_path;/my_quda_path" \
+    -DTM_USE_MPI=ON \
+    -DTM_USE_LEMON=ON \
+    -DTM_USE_QUDA \
+    -DTM_USE_HIP=ON \
+    -DCMAKE_HIP_ARCHITECTURES=gfx90a \
+    -DCMAKE_CXX_COMPILER=/opr/rocm/bin/clang++ ..
+'''
+
+`QPhiX` and/or `DDalphaAMG` support can be added with
+
+```bash
+cmake -DCMAKE_INSTALL_PREFIX=/my_path \
+      -DCMAKE_PREFIX_PATH="/my_c_line_path;/my_lemon_path;/my_quda_path;/my_path_ddalphaamg" \
+      -DTM_USE_MPI=ON \
+      -DTM_USE_LEMON=ON \
+      -DTM_USE_QUDA=ON \
+      -DTM_USE_CUDA=ON \
+      -DCMAKE_CUDA_ARCHITECTURES=90 \
+      -DTM_USE_QPHIX=ON \
+      -DQPHIX_DIR=/my_qphix_dir \
+      -DTM_USE_DDalphaAMG=ON \
+      -DQMP_DIR=/my_qmp_dir \
+      -DTM_USE_OMP=ON ..
+'''
+
+`QPhiX` cmake config support is incomplete and requires both the `QPhiX`
+and `QMP` installation directories to work properly.
+
+`CMake` has several relevant specific options that control the build. Compiler
+options are defined by the variable `CMAKE_C_FLAGS` and `CMAKE_CXX_FLAGS`. CUDA and HIP compilations options are controlled by their
+equivalent `CMAKE_{CUDA/HIP}_FLAGS`.
+
+Adding for instance `-GNinja` to the `CMake` command line will use
+ninja instead of make.
diff --git a/cmake/DDalphaAMG-Config.cmake.in b/cmake/DDalphaAMG-Config.cmake.in
new file mode 100644
index 000000000..abb7d45d3
--- /dev/null
+++ b/cmake/DDalphaAMG-Config.cmake.in
@@ -0,0 +1,50 @@
+cmake_minimum_required(VERSION 3.23)
+
+if (NOT TARGET DDalphaAMG::DDalphaAMG)
+  
+  find_dependency(MPI REQUIRED)
+
+  if (@DDalphaAMG_ENABLE_PARAMOUNT_OUTPUT@)
+    set(DDalphaAMG_ENABLE_PARAMOUNT_OUTPUT @DDalphaAMG_ENABLE_PARAMOUNT_OUTPUT@)
+  endif()
+  if (@DDalphaAMG_ENABLE_FGMRES_RESTEST@)
+    set(DDalphaAMG_ENABLE_FGMRES_RESTEST @DDalphaAMG_ENABLE_FGMRES_RESTEST@)
+  endif()
+  if (@DDalphaAMG_ENABLE_FGMRES_RESTEST@)
+    set(DDalphaAMG_ENABLE_FGMRES_RESTEST @DDalphaAMG_ENABLE_FGMRES_RESTEST@)
+  endif()
+
+  if (@DDalphaAMG_ENABLE_PROFILING@)
+    set(DDalphaAMG_ENABLE_PROFILING @DDalphaAMG_ENABLE_PROFILING@)
+  endif()
+
+  if (@DDalphaAMG_ENABLE_TRACK_RES@)
+    set(DDalphaAMG_ENABLE_TRACK_RES @DDalphaAMG_ENABLE_TRACK_RES@)
+  endif()
+
+  if (@DDalphaAMG_ENABLE_TESTVECTOR_ANALYSIS@)
+    set(DDalphaAMG_ENABLE_TESTVECTOR_ANALYSIS @DDalphaAMG_ENABLE_TESTVECTOR_ANALYSIS@)
+  endif()
+
+  if (@DDalphaAMG_ENABLE_SCHWARZ_RES@)
+    set(DDalphaAMG_ENABLE_SCHWARZ_RES @DDalphaAMG_ENABLE_SCHWARZ_RES@)
+  endif()
+
+  if (@DDalphaAMG_ENABLE_COARSE_RES@)
+    set(DDalphaAMG_ENABLE_COARSE_RES @DDalphaAMG_ENABLE_COARSE_RES@)
+  endif()
+  if (@DDalphaAMG_ENABLE_SINGLE_ALLREDUCE_ARNOLDI@)
+    set(DDalphaAMG_ENABLE_SINGLE_ALLREDUCE_ARNOLDI @DDalphaAMG_ENABLE_SINGLE_ALLREDUCE_ARNOLDI@)
+  endif()
+  if (@DDalphaAMG_ENABLE_OMP@)
+    set(DDalphaAMG_ENABLE_OMP @DDalphaAMG_ENABLE_OMP@)
+    find_dependency(OpenMP REQUIRED)
+  endif()
+
+  if (@DDalphaAMG_ENABLE_HDF5@)
+    set(DDalphaAMG_ENABLE_HDF5 @DDalphaAMG_ENABLE_HDF5@)
+  endif()
+
+
+  include("${CMAKE_CURRENT_LIST_DIR}/DDalphaAMG-Targets.cmake")
+endif()
diff --git a/cmake/DetectSimdAndAlignment.cmake b/cmake/DetectSimdAndAlignment.cmake
new file mode 100644
index 000000000..707b9b65b
--- /dev/null
+++ b/cmake/DetectSimdAndAlignment.cmake
@@ -0,0 +1,288 @@
+# DetectSimdAndAlignment.cmake
+#
+# Detect SIMD architecture family, SIMD level and a reasonable alignment value.
+#
+# Exposed cache variables:
+#   SIMD_ARCH_FAMILY : x86 / ARM / PPC / UNKNOWN
+#   SIMD_LEVEL       : AVX512 / AVX2 / SSE2 / NEON / ALTIVEC / SCALAR
+#   SIMD_ALIGNMENT   : integer, in bytes (16, 32, 64, ...)
+#
+# Optional (if you want a configured header):
+#   SIMD_CONFIG_HEADER : path to the generated header (see bottom).
+#
+# Usage:
+#   include(cmake/DetectSimdAndAlignment.cmake)
+#   message(STATUS "SIMD: ${SIMD_ARCH_FAMILY} ${SIMD_LEVEL}, alignment=${SIMD_ALIGNMENT}")
+#
+#   # Example: propagate as defines
+#   target_compile_definitions(my_target PRIVATE
+#       SIMD_ALIGNMENT=${SIMD_ALIGNMENT}
+#       SIMD_LEVEL_${SIMD_LEVEL}
+#   )
+# DetectSimdAndAlignment.cmake - COMPLETE: x86 + ARM NEON + NVIDIA + PowerPC
+
+
+include_guard(GLOBAL) #
+
+include(CheckCXXSourceCompiles)
+include(CheckCXXSourceRuns) # For runtime CPU detection fallback
+
+# ------------------------------
+# 1. Detect architecture family
+# ------------------------------
+if(NOT DEFINED SIMD_ARCH_FAMILY)
+    string(TOLOWER "${CMAKE_SYSTEM_PROCESSOR}" _simd_proc)
+
+    if(_simd_proc MATCHES "x86_64|amd64|i[3-6]86")
+        set(_detected_arch "x86")
+    elseif(_simd_proc MATCHES "armv[0-9]+|aarch64|arm64")
+        set(_detected_arch "ARM")
+    elseif(_simd_proc MATCHES "ppc64(le|el)?|powerpc|ppc")
+        set(_detected_arch "PPC")
+    elseif(_simd_proc MATCHES "nvcl|sm_89|sm_90")
+        set(_detected_arch "NVIDIA")
+    else()
+        set(_detected_arch "UNKNOWN")
+    endif()
+
+    set(SIMD_ARCH_FAMILY "${_detected_arch}" CACHE STRING "SIMD architecture family")
+endif()
+
+# Defaults
+set(SIMD_LEVEL "SCALAR" CACHE STRING "Detected SIMD level")
+set(SIMD_ALIGNMENT 16 CACHE STRING "Alignment in bytes")
+set(SIMD_HAS_FLOAT ON CACHE BOOL "Float SIMD support")
+set(SIMD_HAS_DOUBLE ON CACHE BOOL "Double SIMD support")
+
+# Save/restore flags helper
+set(_SIMD_SAVED_REQUIRED_FLAGS "${CMAKE_REQUIRED_FLAGS}")
+macro(_simd_restore_flags)
+    if(DEFINED _SIMD_SAVED_REQUIRED_FLAGS)
+        set(CMAKE_REQUIRED_FLAGS "${_SIMD_SAVED_REQUIRED_FLAGS}")
+    endif()
+endmacro()
+
+# ------------------------------------------------
+# 2. x86: SSE2 → AVX2 → AVX512
+# ------------------------------------------------
+if(SIMD_ARCH_FAMILY STREQUAL "x86")
+    # AVX512 double (64-byte)
+    set(CMAKE_REQUIRED_FLAGS "${_SIMD_SAVED_REQUIRED_FLAGS} -mavx512f -mavx512dq")
+    check_cxx_source_compiles("
+        #include <immintrin.h>
+        int main() { __m512d v = _mm512_set1_pd(1.0); (void)v; return 0; }
+    " _HAVE_AVX512_DOUBLE)
+
+    if(_HAVE_AVX512_DOUBLE)
+        set(SIMD_LEVEL "AVX512" CACHE STRING "" FORCE)
+        set(SIMD_ALIGNMENT 64 CACHE STRING "" FORCE)
+        _simd_restore_flags()
+        return()
+    endif()
+
+    # AVX2 double (32-byte)
+    set(CMAKE_REQUIRED_FLAGS "${_SIMD_SAVED_REQUIRED_FLAGS} -mavx2")
+    check_cxx_source_compiles("
+        #include <immintrin.h>
+        int main() { __m256d v = _mm256_set1_pd(1.0); (void)v; return 0; }
+    " _HAVE_AVX2_DOUBLE)
+
+    if(_HAVE_AVX2_DOUBLE)
+        set(SIMD_LEVEL "AVX2" CACHE STRING "" FORCE)
+        set(SIMD_ALIGNMENT 32 CACHE STRING "" FORCE)
+        _simd_restore_flags()
+        return()
+    endif()
+
+    # SSE2 double minimum (16-byte)
+    set(CMAKE_REQUIRED_FLAGS "${_SIMD_SAVED_REQUIRED_FLAGS} -msse2")
+    check_cxx_source_compiles("
+        #include <emmintrin.h>
+        int main() { __m128d v = _mm_set1_pd(1.0); (void)v; return 0; }
+    " _HAVE_SSE2_DOUBLE)
+
+    if(_HAVE_SSE2_DOUBLE)
+        set(SIMD_LEVEL "SSE2" CACHE STRING "" FORCE)
+        set(SIMD_ALIGNMENT 16 CACHE STRING "" FORCE)
+        _simd_restore_flags()
+        return()
+    endif()
+
+# --------------------------------------
+# 3. ARM NEON - ALL FAMILIES
+# --------------------------------------
+elseif(SIMD_ARCH_FAMILY STREQUAL "ARM")
+    string(TOLOWER "${CMAKE_SYSTEM_PROCESSOR}" _arm_proc)
+
+    # AArch64 + SVE
+    if(_arm_proc MATCHES "aarch64|arm64")
+        set(CMAKE_REQUIRED_FLAGS "${_SIMD_SAVED_REQUIRED_FLAGS} -march=armv8-a+sve")
+        check_cxx_source_compiles("
+            #include <arm_sve.h>
+            int main() { svfloat32_t v = svdup_f32(1.0f); (void)v; return 0; }
+        " _HAVE_SVE)
+
+        if(_HAVE_SVE)
+            set(SIMD_LEVEL "SVE" CACHE STRING "" FORCE)
+            set(SIMD_ALIGNMENT 16 CACHE STRING "" FORCE)
+            _simd_restore_flags()
+            return()
+        endif()
+
+        # AArch64 NEON (double safe)
+        check_cxx_source_compiles("
+            #include <arm_neon.h>
+            int main() {
+                float64x2_t vd = vdupq_n_f64(1.0);
+                float32x4_t vf = vdupq_n_f32(1.0f);
+                (void)vd; (void)vf; return 0;
+            }" _HAVE_NEON_AARCH64)
+
+        if(_HAVE_NEON_AARCH64)
+            set(SIMD_LEVEL "NEON_AARCH64" CACHE STRING "" FORCE)
+            set(SIMD_ALIGNMENT 16 CACHE STRING "" FORCE)
+            _simd_restore_flags()
+            return()
+        endif()
+
+    # ARMv8 32-bit
+    elseif(_arm_proc MATCHES "armv8")
+        set(CMAKE_REQUIRED_FLAGS "${_SIMD_SAVED_REQUIRED_FLAGS} -march=armv8-a+simd")
+        check_cxx_source_compiles("
+            #include <arm_neon.h>
+            int main() { float32x4_t v = vdupq_n_f32(1.0f); (void)v; return 0; }
+        " _HAVE_ARMv8_NEON)
+
+        if(_HAVE_ARMv8_NEON)
+            set(SIMD_LEVEL "NEON_ARMv8" CACHE STRING "" FORCE)
+            set(SIMD_ALIGNMENT 16 CACHE STRING "" FORCE)
+            set(SIMD_HAS_DOUBLE OFF CACHE BOOL "" FORCE)
+            _simd_restore_flags()
+            return()
+        endif()
+
+    # ARMv7 NEON
+    elseif(_arm_proc MATCHES "armv7")
+        set(CMAKE_REQUIRED_FLAGS "${_SIMD_SAVED_REQUIRED_FLAGS} -mfpu=neon -march=armv7-a")
+        check_cxx_source_compiles("
+            #include <arm_neon.h>
+            int main() { float32x4_t v = vdupq_n_f32(1.0f); (void)v; return 0; }
+        " _HAVE_ARMv7_NEON)
+
+        if(_HAVE_ARMv7_NEON)
+            set(SIMD_LEVEL "NEON_ARMv7" CACHE STRING "" FORCE)
+            set(SIMD_ALIGNMENT 16 CACHE STRING "" FORCE)
+            set(SIMD_HAS_DOUBLE OFF CACHE BOOL "" FORCE)
+            _simd_restore_flags()
+            return()
+        endif()
+    endif()
+
+# --------------------------------------
+# 4. POWERPC - COMPLETE COVERAGE (NEW!)
+# --------------------------------------
+elseif(SIMD_ARCH_FAMILY STREQUAL "PPC")
+
+    string(TOLOWER "${CMAKE_SYSTEM_PROCESSOR}" _ppc_proc)
+
+    # === Power10+ (512-bit vectors, POWER10)
+    # Note: Power10 needs -mcpu=power10 or -mtune=power10
+    set(CMAKE_REQUIRED_FLAGS "${_SIMD_SAVED_REQUIRED_FLAGS} -mcpu=power10")
+    check_cxx_source_compiles("
+        #include <altivec.h>
+        int main() {
+            vector double vd = {1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0}; // 512-bit
+            vector float vf = {1.0f,1.0f,1.0f,1.0f,1.0f,1.0f,1.0f,1.0f,1.0f,1.0f,1.0f,1.0f,1.0f,1.0f,1.0f,1.0f};
+            (void)vd; (void)vf; return 0;
+        }" _HAVE_POWER10)
+
+    if(_HAVE_POWER10)
+        set(SIMD_LEVEL "POWER10" CACHE STRING "" FORCE)
+        set(SIMD_ALIGNMENT 64 CACHE STRING "" FORCE)  # 512-bit = 64 bytes
+        set(SIMD_HAS_FLOAT ON CACHE BOOL "" FORCE)
+        set(SIMD_HAS_DOUBLE ON CACHE BOOL "" FORCE)
+        _simd_restore_flags()
+        return()
+    endif()
+
+    # === Power9 VSX (256-bit, POWER8+)
+    set(CMAKE_REQUIRED_FLAGS "${_SIMD_SAVED_REQUIRED_FLAGS} -mcpu=power9 -mvsx")
+    check_cxx_source_compiles("
+        #include <altivec.h>
+        int main() {
+            vector double vd = {1.0,1.0,1.0,1.0};  // 256-bit VSX double
+            vector float vf = {1.0f,1.0f,1.0f,1.0f,1.0f,1.0f,1.0f}; // 256-bit
+            (void)vd; (void)vf; return 0;
+        }" _HAVE_VSX_POWER9)
+
+    if(_HAVE_VSX_POWER9)
+        set(SIMD_LEVEL "VSX_POWER9" CACHE STRING "" FORCE)
+        set(SIMD_ALIGNMENT 32 CACHE STRING "" FORCE)  # 256-bit = 32 bytes
+        set(SIMD_HAS_FLOAT ON CACHE BOOL "" FORCE)
+        set(SIMD_HAS_DOUBLE ON CACHE BOOL "" FORCE)
+        _simd_restore_flags()
+        return()
+    endif()
+
+    # === Power7+ VSX (128-bit double, POWER7+)
+    set(CMAKE_REQUIRED_FLAGS "${_SIMD_SAVED_REQUIRED_FLAGS} -mcpu=power7 -mvsx")
+    check_cxx_source_compiles("
+        #include <altivec.h>
+        int main() {
+            vector double vd = {1.0,1.0};  // VSX 128-bit double
+            (void)vd; return 0;
+        }" _HAVE_VSX_POWER7)
+
+    if(_HAVE_VSX_POWER7)
+        set(SIMD_LEVEL "VSX_POWER7" CACHE STRING "" FORCE)
+        set(SIMD_ALIGNMENT 16 CACHE STRING "" FORCE)
+        set(SIMD_HAS_FLOAT ON CACHE BOOL "" FORCE)
+        set(SIMD_HAS_DOUBLE ON CACHE BOOL "" FORCE)
+        _simd_restore_flags()
+        return()
+    endif()
+
+    # === Classic AltiVec/VMX (PowerPC baseline, 128-bit)
+    set(CMAKE_REQUIRED_FLAGS "${_SIMD_SAVED_REQUIRED_FLAGS} -maltivec -mabi=altivec")
+    check_cxx_source_compiles("
+        #include <altivec.h>
+        int main() {
+            vector float vf = (vector float){1.0f,1.0f,1.0f,1.0f};
+            (void)vf; return 0;
+        }" _HAVE_ALTIVEC)
+
+    if(_HAVE_ALTIVEC)
+        set(SIMD_LEVEL "ALTIVEC" CACHE STRING "" FORCE)
+        set(SIMD_ALIGNMENT 16 CACHE STRING "" FORCE)
+        set(SIMD_HAS_FLOAT ON CACHE BOOL "" FORCE)
+        set(SIMD_HAS_DOUBLE OFF CACHE BOOL "" FORCE)  # AltiVec: float primary
+        _simd_restore_flags()
+        return()
+    endif()
+
+# --------------------------------------
+# 5. NVIDIA GH200 (sm_89)
+# --------------------------------------
+elseif(SIMD_ARCH_FAMILY STREQUAL "NVIDIA")
+    set(CMAKE_REQUIRED_FLAGS "${_SIMD_SAVED_REQUIRED_FLAGS} --gpu-arch=sm_89")
+    check_cxx_source_compiles("
+        #include <cuda_runtime.h>
+        int main() { double d = 1.0; (void)d; return 0; }
+    " _HAVE_CUDA_SM89)
+
+    if(_HAVE_CUDA_SM89)
+        set(SIMD_LEVEL "CUDA_SM89" CACHE STRING "" FORCE)
+        set(SIMD_ALIGNMENT 16 CACHE STRING "" FORCE)
+        _simd_restore_flags()
+        return()
+    endif()
+
+# --------------------------------------
+# 6. Fallback
+# --------------------------------------
+else()
+    _simd_restore_flags()
+    return()
+endif()
+
+_simd_restore_flags()
diff --git a/cmake/FindCLime.cmake b/cmake/FindCLime.cmake
new file mode 100644
index 000000000..c9d94ea95
--- /dev/null
+++ b/cmake/FindCLime.cmake
@@ -0,0 +1,26 @@
+include(FindPackageHandleStandardArgs)
+
+find_library(
+  TM_CLIME_LIBRARIES
+  NAMES lime
+  PATH_SUFFIXES "lib" "lib64")
+
+find_path(
+  TM_CLIME_INCLUDE_DIRS
+  NAMES lime.h
+  PATH_SUFFIXES "include" "include/${_pacakge_name}" "${_package_name}")
+
+find_package_handle_standard_args(CLime DEFAULT_MSG TM_CLIME_LIBRARIES
+                                  TM_CLIME_INCLUDE_DIRS)
+
+if(NOT TARGET tmlqcd::clime)
+  add_library(tmlqcd::clime INTERFACE IMPORTED)
+  set_target_properties(tmlqcd::clime PROPERTIES INTERFACE_LINK_LIBRARIES
+                                                 "${TM_CLIME_LIBRARIES}")
+  set_target_properties(tmlqcd::clime PROPERTIES INTERFACE_INCLUDE_DIRECTORIES
+                                                 "${TM_CLIME_INCLUDE_DIRS}")
+endif()
+
+set(TM_CLIME_FOUND ON)
+mark_as_advanced(TM_CLIME_FOUND TM_CLIME_LIBRARIES
+                 TM_CLIME_INCLUDE_DIRS)
diff --git a/cmake/FindDDalphaAMG.cmake b/cmake/FindDDalphaAMG.cmake
new file mode 100644
index 000000000..5f0d2450f
--- /dev/null
+++ b/cmake/FindDDalphaAMG.cmake
@@ -0,0 +1,28 @@
+include(FindPackageHandleStandardArgs)
+
+find_library(
+  TM_DDALPHAAMG_LIBRARIES
+  NAMES DDalphaAMG DDalphaAMG_devel
+  PATH_SUFFIXES "lib" "lib64")
+
+find_path(
+  TM_DDALPHAAMG_INCLUDE_DIRS
+  NAMES DDalphaAMG.h
+  PATH_SUFFIXES "include")
+
+find_package_handle_standard_args(
+  DDalphaAMG DEFAULT_MSG TM_DDALPHAAMG_LIBRARIES TM_DDALPHAAMG_INCLUDE_DIRS)
+
+if(TM_DDALPHAAMG_LIBRARIES
+   AND TM_DDALPHAAMG_INCLUDE_DIRS
+   AND NOT TARGET tmlqcd::DDalphaAMG)
+  message("INCLUDE: ${TM_DDALPHAAMG_INCLUDE_DIRS}")
+  add_library(tmlqcd::DDalphaAMG INTERFACE IMPORTED)
+  set_property(TARGET tmlqcd::DDalphaAMG PROPERTY INTERFACE_LINK_LIBRARIES
+                                                  "${TM_DDALPHAAMG_LIBRARIES}")
+  set_property(
+    TARGET tmlqcd::DDalphaAMG PROPERTY INTERFACE_INCLUDE_DIRECTORIES
+                                       "${TM_DDALPHAAMG_INCLUDE_DIRS}")
+endif()
+
+mark_as_advanced(TM_DDALPHAAMG_LIBRARIES TM_DDALPHAAMG_INCLUDE_DIRS)
diff --git a/cmake/FindQphix.cmake b/cmake/FindQphix.cmake
new file mode 100644
index 000000000..15ab2d47a
--- /dev/null
+++ b/cmake/FindQphix.cmake
@@ -0,0 +1,39 @@
+find_library(TM_QMP_LIBS NAMES qmp PATH_SUFFIXES "lib" "lib64")
+find_library(TM_QPHIX_LIBS_CODEGEN NAMES "qphix_codegen" PATH_SUFFIXES "lib" "lib64")
+find_library(TM_QPHIX_LIBS_SOLVER NAMES "qphix_solver" PATH_SUFFIXES "lib" "lib64")
+
+message("${QMP_DIR}")
+
+find_path(
+  TM_QMP_INCLUDE_DIRS
+  NAMES qmp.h
+  PATH_SUFFIXES "include"
+  PATHS "${QMP_DIR}")
+
+find_path(
+  TM_QPHIX_INCLUDE_DIRS
+  NAMES qphix_config.h
+  PATH_SUFFIXES "qphix"
+  PATHS "${QPHIX_DIR}")
+find_path(
+  TM_QPHIX_CODEGEN_INCLUDE_DIRS
+  NAMES qpx_utils.h
+  PATH_SUFFIXES "qphix_codegen"
+  PATHS "${QPHIX_DIR}")
+
+message("${TM_QMP_INCLUDE_DIRS} ${TM_QPHIX_INCLUDE_DIRS} ${TM_QMP_LIBS} ${TM_QPHIX_LIBS_CODEGEN} ${TM_QPHIX_LIBS_SOLVER}")
+
+find_package_handle_standard_args(
+  Qphix DEFAULT_MSG TM_QPHIX_LIBS_CODEGEN TM_QPHIX_LIBS_SOLVER TM_QPHIX_INCLUDE_DIRS TM_QMP_LIBS TM_QMP_INCLUDE_DIRS TM_QPHIX_CODEGEN_INCLUDE_DIRS)
+
+if(TM_QMP_LIBS
+    AND TM_QPHIX_INCLUDE_DIRS
+    AND NOT TARGET tmlqcd::qphix)
+  add_library(tmlqcd::qphix INTERFACE IMPORTED)
+  set_target_properties(tmlqcd::qphix PROPERTIES INTERFACE_LINK_LIBRARIES
+    "${TM_QPHIX_LIBS_CODEGEN};${TM_QPHIX_LIBS_SOLVER};${TM_QMP_LIBS}")
+  set_target_properties(tmlqcd::qphix PROPERTIES INTERFACE_INCLUDE_DIRECTORIES
+    "${TM_QMP_INCLUDE_DIRS};${TM_QPHIX_INCLUDE_DIRS}/..;${TM_QPHIX_INCLUDE_DIRS};${TM_QPHIX_CODEGEN_INCLUDE_DIRS}")
+endif()
+
+mark_as_advanced(TM_QPHIX_LIBRARIES TM_QPHIX_INCLUDE_DIRS TM_QMP_LIBS TM_QMP_INCLUDE_DIRS)
diff --git a/cmake/git_hash.c.in b/cmake/git_hash.c.in
new file mode 100644
index 000000000..b73d81cd4
--- /dev/null
+++ b/cmake/git_hash.c.in
@@ -0,0 +1 @@
+const char git_hash[] = "@TM_SHA@";
diff --git a/cmake/tmlQCD-config.cmake.in b/cmake/tmlQCD-config.cmake.in
new file mode 100644
index 000000000..91aaa837a
--- /dev/null
+++ b/cmake/tmlQCD-config.cmake.in
@@ -0,0 +1,79 @@
+cmake_minimum_required(VERSION 3.23)
+
+if (NOT TARGET tmlqcd::tmlqcd)
+
+  # store CXX compiler id. Used in MKL package.
+  set(TM_C_COMPILER_ID @CMAKE_C_COMPILER_ID@)
+  if(NOT ${CMAKE_C_COMPILER_ID})
+    set(CMAKE_C_COMPILER_ID ${TM_C_COMPILER_ID})
+  endif()
+
+  # pass REQUIRED or QUIET depending on top Config call
+  if(tmlQCD_c_FIND_REQUIRED)
+    set(mode REQUIRED)
+  else()
+    set(mode QUIET)
+  endif()
+
+  if (@TM_USE_MPI@)
+    set(TM_USE_MPI @TM_USE_MPI@)
+    find_dependency(MPI ${mode} COMPONENTS C)
+  endif()
+
+  if (@TM_USE_OMP@)
+    set(TM_USE_OMP @TM_USE_OMP@)
+    find_dependency(OpenMP ${mode})
+  endif()
+
+  if (@TM_USE_LEMON@)
+    set(TM_USE_LEMON @TM_USE_LEMON@)
+    find_dependency(Lemon ${mode})
+  endif()
+
+  find_package(BLAS ${mode})
+  find_dependency(LAPACK ${mode})
+
+  find_package(CLime REQUIRED)
+  set(TM_USE_LIME ON)
+
+  if(@TM_USE_QUDA@)
+    set(TM_USE_QUDA @TM_USE_QUDA@)
+    find_package(QUDA REQUIRED config)
+  endif()
+
+  if(@TM_USE_HIP@)
+    set(TM_USE_HIP @TM_USE_HIP@)
+    enable_language(hip)
+  endif()
+
+  if(@TM_USE_QPHIX@)
+    set(TM_USE_QPHIX @TM_USE_QPHIX@)
+    set(QPHIX_DIR @QPHIX_DIR@)
+    set(QMP_DIR @QMP_DIR@)
+    find_package(Qphix REQUIRED)
+  endif()
+
+  if(@TM_USE_FFTW@)
+    set(TM_USE_FFTW @TM_USE_FFTW@)
+    pkg_search_module(tmlqcd_fftw3 IMPORTED_TARGET GLOBAL fftw3)
+    if(tmlqcd_fftw3_FOUND)
+      add_library(tmlqcd::fftw3 ALIAS PkgConfig::tmlqcd_fftw3)
+    endif()
+  endif()
+
+  if(TM_USE_DDalphaAMG)
+    set(TM_USE_DDalphaAMG @TM_USE_DDalphaAMG@)
+  endif()
+
+  set(TM_MPI_DIMENSION @TM_USE_DIMENSION@)
+
+  if(@TM_USE_HALFSPINOR@)
+    set(TM_USE_HALFSPINOR @TM_USE_HALFSPINOR@)
+  endif()
+
+  if (@TM_USE_GAUGE_COPY@)
+    set(TM_USE_GAUGE_COPY @TM_USE_GAUGE_COPY@)
+  endif()
+
+  include("${CMAKE_CURRENT_LIST_DIR}/tmlQCDTargets.cmake")
+endif()
diff --git a/cmake/tmlQCD.pc.in b/cmake/tmlQCD.pc.in
new file mode 100644
index 000000000..1660120d3
--- /dev/null
+++ b/cmake/tmlQCD.pc.in
@@ -0,0 +1,10 @@
+prefix="@CMAKE_INSTALL_PREFIX@"
+exec_prefix="${prefix}"
+libdir="${prefix}/@CMAKE_INSTALL_LIBDIR@"
+includedir="${prefix}/include"
+
+Name: tmlQCD
+Description: "tmlQCD"
+Version: "@PROJECT_VERSION@"
+Cflags: -I"${includedir}/include/tmlqcd"
+Libs: -L"${libdir}" -lhmc
diff --git a/cmake/tmlqcd_config_internal.h.in b/cmake/tmlqcd_config_internal.h.in
new file mode 100644
index 000000000..7c11d0446
--- /dev/null
+++ b/cmake/tmlqcd_config_internal.h.in
@@ -0,0 +1,124 @@
+/* Hand-written "config.h.in". Note that it's not this file which should be
+ * included, but rather the "tmlqcd_config.h" which renames a few of the defines
+ * into static const variables, following the convention used by the USQCD build
+ * systems, for example. */
+
+/* lapack available */
+#cmakedefine TM_LAPACK 
+
+/* Define to 1 if you have the `lime' library (-llime). */
+#cmakedefine TM_USE_LIME 
+
+/* Define to 1 if you have the `lemon' library (-llemon). */
+#cmakedefine TM_USE_LEMON 
+
+/* Compile with MPI support */
+#cmakedefine TM_USE_MPI
+
+/* Compile with OpenMP support */
+#cmakedefine TM_USE_OMP
+
+/* Compile with FFTW support */
+#cmakedefine TM_USE_FFTW 
+
+/* Fortran has not extra _ */
+#cmakedefine NOF77_
+
+/* Define to the address where bug reports for this package should be sent. */
+#define PACKAGE_BUGREPORT "@PACKAGE_BUGREPORT@"
+
+/* Define to the full name of this package. */
+#define PACKAGE_NAME "@tmlqcd_DESCRIPTION@"
+/* Define to the full name and version of this package. */
+#define PACKAGE_STRING "@tmlqcd_VERSION@"
+
+/* Define to the one symbol short name of this package. */
+#define PACKAGE_TARNAME "@PACKAGE_TARNAME@"
+
+/* Define to the version of this package. */
+#define PACKAGE_VERSION "@PROJECT_DESCRIPTION@ @PROJECT_VERSION@"
+
+/* X parallelisation */
+#cmakedefine TM_PARALLELX 
+
+/* XY parallelisation */
+#cmakedefine TM_PARALLELXY 
+
+/* XYZ parallelisation */
+#cmakedefine TM_PARALLELXYZ
+
+/* One dimensional parallelisation */
+#cmakedefine TM_PARALLELT
+
+/* Two dimensional parallelisation */
+#cmakedefine TM_PARALLELXT
+
+/* Three dimensional parallelisation */
+#cmakedefine TM_PARALLELXYT
+
+/* Four dimensional parallelisation */
+#cmakedefine TM_PARALLELXYZT
+
+/* Fixed volume at compiletime */
+#cmakedefine TM_FIXEDVOLUME
+
+/* Alignment for arrays -- necessary for SSE and automated vectorization */
+#define ALIGN_BASE @ALIGN_BASE@
+
+/* Alignment compiler hint macro */
+#define ALIGN @ALIGN@
+
+/* Alignment for 32bit arrays -- necessary for SSE and automated vectorization */
+#define ALIGN_BASE32 @ALIGN_BASE32@
+
+/* Alignment of 32bit fields, compiler hint macro */
+#define ALIGN32 @ALIGN32@
+
+/* Define to 1 if `lex' declares `yytext' as a `char *' by default, not a
+   `char[]'. */
+//#cmakedefine YYTEXT_POINTER
+
+/* Number of bits in a file offset, on hosts where this is settable. */
+#define TM_FILE_OFFSET_BITS @TM_FILE_OFFSET_BITS@
+
+/* Construct an extra copy of the gauge fields */
+#cmakedefine TM_USE_GAUGE_COPY
+
+/* Define to 1 to make fseeko visible on some hosts (e.g. glibc 2.2). */
+#cmakedefine TM_LARGEFILE_SOURCE
+
+/* Use even/odd geometry in the gauge fields */
+#cmakedefine TM_NEW_GEOMETRY
+
+/* x86 64 Bit architecture */
+#cmakedefine TM_x86_64
+
+/* Define to 1 if Dirac operator with halfspinor should be used */
+#cmakedefine TM_USE_HALFSPINOR 
+
+/* Define to 1 if shmem API should be used */
+#cmakedefine TM_USE_SHMEM
+
+/* Define to 1 if KOJAK instrumentalisation should be done*/
+#cmakedefine TM_KOJAK_INST
+
+/* Define to 1 if persistent MPI calls for halfspinor should be used */
+#cmakedefine TM_PERSISTENT
+
+/* Define to 1 if non-blocking MPI calls for spinor and gauge should be used */
+#cmakedefine TM_NONBLOCKING
+
+/* Using QUDA GPU */
+#cmakedefine TM_USE_QUDA 
+
+/* Using DDalphaAMG */
+#cmakedefine TM_USE_DDalphaAMG
+
+/* Using QPHIX */
+#cmakedefine TM_USE_QPHIX 
+
+#ifdef TM_USE_QPHIX
+/* Structure of Array length to use with QPhiX */
+#define QPHIX_SOALEN @TM_QPHIX_SOALEN@
+#endif
+
diff --git a/config.guess b/config.guess
deleted file mode 100644
index f7727026b..000000000
--- a/config.guess
+++ /dev/null
@@ -1,1701 +0,0 @@
-#! /bin/sh
-# Attempt to guess a canonical system name.
-#   Copyright 1992-2021 Free Software Foundation, Inc.
-
-timestamp='2021-01-01'
-
-# This file is free software; you can redistribute it and/or modify it
-# under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-# General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, see <https://www.gnu.org/licenses/>.
-#
-# As a special exception to the GNU General Public License, if you
-# distribute this file as part of a program that contains a
-# configuration script generated by Autoconf, you may include it under
-# the same distribution terms that you use for the rest of that
-# program.  This Exception is an additional permission under section 7
-# of the GNU General Public License, version 3 ("GPLv3").
-#
-# Originally written by Per Bothner; maintained since 2000 by Ben Elliston.
-#
-# You can get the latest version of this script from:
-# https://git.savannah.gnu.org/cgit/config.git/plain/config.guess
-#
-# Please send patches to <config-patches@gnu.org>.
-
-
-me=$(echo "$0" | sed -e 's,.*/,,')
-
-usage="\
-Usage: $0 [OPTION]
-
-Output the configuration name of the system \`$me' is run on.
-
-Options:
-  -h, --help         print this help, then exit
-  -t, --time-stamp   print date of last modification, then exit
-  -v, --version      print version number, then exit
-
-Report bugs and patches to <config-patches@gnu.org>."
-
-version="\
-GNU config.guess ($timestamp)
-
-Originally written by Per Bothner.
-Copyright 1992-2021 Free Software Foundation, Inc.
-
-This is free software; see the source for copying conditions.  There is NO
-warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
-
-help="
-Try \`$me --help' for more information."
-
-# Parse command line
-while test $# -gt 0 ; do
-  case $1 in
-    --time-stamp | --time* | -t )
-       echo "$timestamp" ; exit ;;
-    --version | -v )
-       echo "$version" ; exit ;;
-    --help | --h* | -h )
-       echo "$usage"; exit ;;
-    -- )     # Stop option processing
-       shift; break ;;
-    - )	# Use stdin as input.
-       break ;;
-    -* )
-       echo "$me: invalid option $1$help" >&2
-       exit 1 ;;
-    * )
-       break ;;
-  esac
-done
-
-if test $# != 0; then
-  echo "$me: too many arguments$help" >&2
-  exit 1
-fi
-
-# CC_FOR_BUILD -- compiler used by this script. Note that the use of a
-# compiler to aid in system detection is discouraged as it requires
-# temporary files to be created and, as you can see below, it is a
-# headache to deal with in a portable fashion.
-
-# Historically, `CC_FOR_BUILD' used to be named `HOST_CC'. We still
-# use `HOST_CC' if defined, but it is deprecated.
-
-# Portable tmp directory creation inspired by the Autoconf team.
-
-tmp=
-# shellcheck disable=SC2172
-trap 'test -z "$tmp" || rm -fr "$tmp"' 0 1 2 13 15
-
-set_cc_for_build() {
-    # prevent multiple calls if $tmp is already set
-    test "$tmp" && return 0
-    : "${TMPDIR=/tmp}"
-    # shellcheck disable=SC2039
-    { tmp=$( (umask 077 && mktemp -d "$TMPDIR/cgXXXXXX") 2>/dev/null) && test -n "$tmp" && test -d "$tmp" ; } ||
-	{ test -n "$RANDOM" && tmp=$TMPDIR/cg$$-$RANDOM && (umask 077 && mkdir "$tmp" 2>/dev/null) ; } ||
-	{ tmp=$TMPDIR/cg-$$ && (umask 077 && mkdir "$tmp" 2>/dev/null) && echo "Warning: creating insecure temp directory" >&2 ; } ||
-	{ echo "$me: cannot create a temporary directory in $TMPDIR" >&2 ; exit 1 ; }
-    dummy=$tmp/dummy
-    case ${CC_FOR_BUILD-},${HOST_CC-},${CC-} in
-	,,)    echo "int x;" > "$dummy.c"
-	       for driver in cc gcc c89 c99 ; do
-		   if ($driver -c -o "$dummy.o" "$dummy.c") >/dev/null 2>&1 ; then
-		       CC_FOR_BUILD="$driver"
-		       break
-		   fi
-	       done
-	       if test x"$CC_FOR_BUILD" = x ; then
-		   CC_FOR_BUILD=no_compiler_found
-	       fi
-	       ;;
-	,,*)   CC_FOR_BUILD=$CC ;;
-	,*,*)  CC_FOR_BUILD=$HOST_CC ;;
-    esac
-}
-
-# This is needed to find uname on a Pyramid OSx when run in the BSD universe.
-# (ghazi@noc.rutgers.edu 1994-08-24)
-if test -f /.attbin/uname ; then
-	PATH=$PATH:/.attbin ; export PATH
-fi
-
-UNAME_MACHINE=$( (uname -m) 2>/dev/null) || UNAME_MACHINE=unknown
-UNAME_RELEASE=$( (uname -r) 2>/dev/null) || UNAME_RELEASE=unknown
-UNAME_SYSTEM=$( (uname -s) 2>/dev/null) || UNAME_SYSTEM=unknown
-UNAME_VERSION=$( (uname -v) 2>/dev/null) || UNAME_VERSION=unknown
-
-case "$UNAME_SYSTEM" in
-Linux|GNU|GNU/*)
-	LIBC=unknown
-
-	set_cc_for_build
-	cat <<-EOF > "$dummy.c"
-	#include <features.h>
-	#if defined(__UCLIBC__)
-	LIBC=uclibc
-	#elif defined(__dietlibc__)
-	LIBC=dietlibc
-	#elif defined(__GLIBC__)
-	LIBC=gnu
-	#else
-	#include <stdarg.h>
-	/* First heuristic to detect musl libc.  */
-	#ifdef __DEFINED_va_list
-	LIBC=musl
-	#endif
-	#endif
-	EOF
-	eval "$($CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^LIBC' | sed 's, ,,g')"
-
-	# Second heuristic to detect musl libc.
-	if [ "$LIBC" = unknown ] &&
-	   command -v ldd >/dev/null &&
-	   ldd --version 2>&1 | grep -q ^musl; then
-		LIBC=musl
-	fi
-
-	# If the system lacks a compiler, then just pick glibc.
-	# We could probably try harder.
-	if [ "$LIBC" = unknown ]; then
-		LIBC=gnu
-	fi
-	;;
-esac
-
-# Note: order is significant - the case branches are not exclusive.
-
-case "$UNAME_MACHINE:$UNAME_SYSTEM:$UNAME_RELEASE:$UNAME_VERSION" in
-    *:NetBSD:*:*)
-	# NetBSD (nbsd) targets should (where applicable) match one or
-	# more of the tuples: *-*-netbsdelf*, *-*-netbsdaout*,
-	# *-*-netbsdecoff* and *-*-netbsd*.  For targets that recently
-	# switched to ELF, *-*-netbsd* would select the old
-	# object file format.  This provides both forward
-	# compatibility and a consistent mechanism for selecting the
-	# object file format.
-	#
-	# Note: NetBSD doesn't particularly care about the vendor
-	# portion of the name.  We always set it to "unknown".
-	sysctl="sysctl -n hw.machine_arch"
-	UNAME_MACHINE_ARCH=$( (uname -p 2>/dev/null || \
-	    "/sbin/$sysctl" 2>/dev/null || \
-	    "/usr/sbin/$sysctl" 2>/dev/null || \
-	    echo unknown))
-	case "$UNAME_MACHINE_ARCH" in
-	    aarch64eb) machine=aarch64_be-unknown ;;
-	    armeb) machine=armeb-unknown ;;
-	    arm*) machine=arm-unknown ;;
-	    sh3el) machine=shl-unknown ;;
-	    sh3eb) machine=sh-unknown ;;
-	    sh5el) machine=sh5le-unknown ;;
-	    earmv*)
-		arch=$(echo "$UNAME_MACHINE_ARCH" | sed -e 's,^e\(armv[0-9]\).*$,\1,')
-		endian=$(echo "$UNAME_MACHINE_ARCH" | sed -ne 's,^.*\(eb\)$,\1,p')
-		machine="${arch}${endian}"-unknown
-		;;
-	    *) machine="$UNAME_MACHINE_ARCH"-unknown ;;
-	esac
-	# The Operating System including object format, if it has switched
-	# to ELF recently (or will in the future) and ABI.
-	case "$UNAME_MACHINE_ARCH" in
-	    earm*)
-		os=netbsdelf
-		;;
-	    arm*|i386|m68k|ns32k|sh3*|sparc|vax)
-		set_cc_for_build
-		if echo __ELF__ | $CC_FOR_BUILD -E - 2>/dev/null \
-			| grep -q __ELF__
-		then
-		    # Once all utilities can be ECOFF (netbsdecoff) or a.out (netbsdaout).
-		    # Return netbsd for either.  FIX?
-		    os=netbsd
-		else
-		    os=netbsdelf
-		fi
-		;;
-	    *)
-		os=netbsd
-		;;
-	esac
-	# Determine ABI tags.
-	case "$UNAME_MACHINE_ARCH" in
-	    earm*)
-		expr='s/^earmv[0-9]/-eabi/;s/eb$//'
-		abi=$(echo "$UNAME_MACHINE_ARCH" | sed -e "$expr")
-		;;
-	esac
-	# The OS release
-	# Debian GNU/NetBSD machines have a different userland, and
-	# thus, need a distinct triplet. However, they do not need
-	# kernel version information, so it can be replaced with a
-	# suitable tag, in the style of linux-gnu.
-	case "$UNAME_VERSION" in
-	    Debian*)
-		release='-gnu'
-		;;
-	    *)
-		release=$(echo "$UNAME_RELEASE" | sed -e 's/[-_].*//' | cut -d. -f1,2)
-		;;
-	esac
-	# Since CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM:
-	# contains redundant information, the shorter form:
-	# CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM is used.
-	echo "$machine-${os}${release}${abi-}"
-	exit ;;
-    *:Bitrig:*:*)
-	UNAME_MACHINE_ARCH=$(arch | sed 's/Bitrig.//')
-	echo "$UNAME_MACHINE_ARCH"-unknown-bitrig"$UNAME_RELEASE"
-	exit ;;
-    *:OpenBSD:*:*)
-	UNAME_MACHINE_ARCH=$(arch | sed 's/OpenBSD.//')
-	echo "$UNAME_MACHINE_ARCH"-unknown-openbsd"$UNAME_RELEASE"
-	exit ;;
-    *:LibertyBSD:*:*)
-	UNAME_MACHINE_ARCH=$(arch | sed 's/^.*BSD\.//')
-	echo "$UNAME_MACHINE_ARCH"-unknown-libertybsd"$UNAME_RELEASE"
-	exit ;;
-    *:MidnightBSD:*:*)
-	echo "$UNAME_MACHINE"-unknown-midnightbsd"$UNAME_RELEASE"
-	exit ;;
-    *:ekkoBSD:*:*)
-	echo "$UNAME_MACHINE"-unknown-ekkobsd"$UNAME_RELEASE"
-	exit ;;
-    *:SolidBSD:*:*)
-	echo "$UNAME_MACHINE"-unknown-solidbsd"$UNAME_RELEASE"
-	exit ;;
-    *:OS108:*:*)
-	echo "$UNAME_MACHINE"-unknown-os108_"$UNAME_RELEASE"
-	exit ;;
-    macppc:MirBSD:*:*)
-	echo powerpc-unknown-mirbsd"$UNAME_RELEASE"
-	exit ;;
-    *:MirBSD:*:*)
-	echo "$UNAME_MACHINE"-unknown-mirbsd"$UNAME_RELEASE"
-	exit ;;
-    *:Sortix:*:*)
-	echo "$UNAME_MACHINE"-unknown-sortix
-	exit ;;
-    *:Twizzler:*:*)
-	echo "$UNAME_MACHINE"-unknown-twizzler
-	exit ;;
-    *:Redox:*:*)
-	echo "$UNAME_MACHINE"-unknown-redox
-	exit ;;
-    mips:OSF1:*.*)
-	echo mips-dec-osf1
-	exit ;;
-    alpha:OSF1:*:*)
-	case $UNAME_RELEASE in
-	*4.0)
-		UNAME_RELEASE=$(/usr/sbin/sizer -v | awk '{print $3}')
-		;;
-	*5.*)
-		UNAME_RELEASE=$(/usr/sbin/sizer -v | awk '{print $4}')
-		;;
-	esac
-	# According to Compaq, /usr/sbin/psrinfo has been available on
-	# OSF/1 and Tru64 systems produced since 1995.  I hope that
-	# covers most systems running today.  This code pipes the CPU
-	# types through head -n 1, so we only detect the type of CPU 0.
-	ALPHA_CPU_TYPE=$(/usr/sbin/psrinfo -v | sed -n -e 's/^  The alpha \(.*\) processor.*$/\1/p' | head -n 1)
-	case "$ALPHA_CPU_TYPE" in
-	    "EV4 (21064)")
-		UNAME_MACHINE=alpha ;;
-	    "EV4.5 (21064)")
-		UNAME_MACHINE=alpha ;;
-	    "LCA4 (21066/21068)")
-		UNAME_MACHINE=alpha ;;
-	    "EV5 (21164)")
-		UNAME_MACHINE=alphaev5 ;;
-	    "EV5.6 (21164A)")
-		UNAME_MACHINE=alphaev56 ;;
-	    "EV5.6 (21164PC)")
-		UNAME_MACHINE=alphapca56 ;;
-	    "EV5.7 (21164PC)")
-		UNAME_MACHINE=alphapca57 ;;
-	    "EV6 (21264)")
-		UNAME_MACHINE=alphaev6 ;;
-	    "EV6.7 (21264A)")
-		UNAME_MACHINE=alphaev67 ;;
-	    "EV6.8CB (21264C)")
-		UNAME_MACHINE=alphaev68 ;;
-	    "EV6.8AL (21264B)")
-		UNAME_MACHINE=alphaev68 ;;
-	    "EV6.8CX (21264D)")
-		UNAME_MACHINE=alphaev68 ;;
-	    "EV6.9A (21264/EV69A)")
-		UNAME_MACHINE=alphaev69 ;;
-	    "EV7 (21364)")
-		UNAME_MACHINE=alphaev7 ;;
-	    "EV7.9 (21364A)")
-		UNAME_MACHINE=alphaev79 ;;
-	esac
-	# A Pn.n version is a patched version.
-	# A Vn.n version is a released version.
-	# A Tn.n version is a released field test version.
-	# A Xn.n version is an unreleased experimental baselevel.
-	# 1.2 uses "1.2" for uname -r.
-	echo "$UNAME_MACHINE"-dec-osf"$(echo "$UNAME_RELEASE" | sed -e 's/^[PVTX]//' | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz)"
-	# Reset EXIT trap before exiting to avoid spurious non-zero exit code.
-	exitcode=$?
-	trap '' 0
-	exit $exitcode ;;
-    Amiga*:UNIX_System_V:4.0:*)
-	echo m68k-unknown-sysv4
-	exit ;;
-    *:[Aa]miga[Oo][Ss]:*:*)
-	echo "$UNAME_MACHINE"-unknown-amigaos
-	exit ;;
-    *:[Mm]orph[Oo][Ss]:*:*)
-	echo "$UNAME_MACHINE"-unknown-morphos
-	exit ;;
-    *:OS/390:*:*)
-	echo i370-ibm-openedition
-	exit ;;
-    *:z/VM:*:*)
-	echo s390-ibm-zvmoe
-	exit ;;
-    *:OS400:*:*)
-	echo powerpc-ibm-os400
-	exit ;;
-    arm:RISC*:1.[012]*:*|arm:riscix:1.[012]*:*)
-	echo arm-acorn-riscix"$UNAME_RELEASE"
-	exit ;;
-    arm*:riscos:*:*|arm*:RISCOS:*:*)
-	echo arm-unknown-riscos
-	exit ;;
-    SR2?01:HI-UX/MPP:*:* | SR8000:HI-UX/MPP:*:*)
-	echo hppa1.1-hitachi-hiuxmpp
-	exit ;;
-    Pyramid*:OSx*:*:* | MIS*:OSx*:*:* | MIS*:SMP_DC-OSx*:*:*)
-	# akee@wpdis03.wpafb.af.mil (Earle F. Ake) contributed MIS and NILE.
-	if test "$( (/bin/universe) 2>/dev/null)" = att ; then
-		echo pyramid-pyramid-sysv3
-	else
-		echo pyramid-pyramid-bsd
-	fi
-	exit ;;
-    NILE*:*:*:dcosx)
-	echo pyramid-pyramid-svr4
-	exit ;;
-    DRS?6000:unix:4.0:6*)
-	echo sparc-icl-nx6
-	exit ;;
-    DRS?6000:UNIX_SV:4.2*:7* | DRS?6000:isis:4.2*:7*)
-	case $(/usr/bin/uname -p) in
-	    sparc) echo sparc-icl-nx7; exit ;;
-	esac ;;
-    s390x:SunOS:*:*)
-	echo "$UNAME_MACHINE"-ibm-solaris2"$(echo "$UNAME_RELEASE" | sed -e 's/[^.]*//')"
-	exit ;;
-    sun4H:SunOS:5.*:*)
-	echo sparc-hal-solaris2"$(echo "$UNAME_RELEASE"|sed -e 's/[^.]*//')"
-	exit ;;
-    sun4*:SunOS:5.*:* | tadpole*:SunOS:5.*:*)
-	echo sparc-sun-solaris2"$(echo "$UNAME_RELEASE" | sed -e 's/[^.]*//')"
-	exit ;;
-    i86pc:AuroraUX:5.*:* | i86xen:AuroraUX:5.*:*)
-	echo i386-pc-auroraux"$UNAME_RELEASE"
-	exit ;;
-    i86pc:SunOS:5.*:* | i86xen:SunOS:5.*:*)
-	set_cc_for_build
-	SUN_ARCH=i386
-	# If there is a compiler, see if it is configured for 64-bit objects.
-	# Note that the Sun cc does not turn __LP64__ into 1 like gcc does.
-	# This test works for both compilers.
-	if test "$CC_FOR_BUILD" != no_compiler_found; then
-	    if (echo '#ifdef __amd64'; echo IS_64BIT_ARCH; echo '#endif') | \
-		(CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \
-		grep IS_64BIT_ARCH >/dev/null
-	    then
-		SUN_ARCH=x86_64
-	    fi
-	fi
-	echo "$SUN_ARCH"-pc-solaris2"$(echo "$UNAME_RELEASE"|sed -e 's/[^.]*//')"
-	exit ;;
-    sun4*:SunOS:6*:*)
-	# According to config.sub, this is the proper way to canonicalize
-	# SunOS6.  Hard to guess exactly what SunOS6 will be like, but
-	# it's likely to be more like Solaris than SunOS4.
-	echo sparc-sun-solaris3"$(echo "$UNAME_RELEASE"|sed -e 's/[^.]*//')"
-	exit ;;
-    sun4*:SunOS:*:*)
-	case "$(/usr/bin/arch -k)" in
-	    Series*|S4*)
-		UNAME_RELEASE=$(uname -v)
-		;;
-	esac
-	# Japanese Language versions have a version number like `4.1.3-JL'.
-	echo sparc-sun-sunos"$(echo "$UNAME_RELEASE"|sed -e 's/-/_/')"
-	exit ;;
-    sun3*:SunOS:*:*)
-	echo m68k-sun-sunos"$UNAME_RELEASE"
-	exit ;;
-    sun*:*:4.2BSD:*)
-	UNAME_RELEASE=$( (sed 1q /etc/motd | awk '{print substr($5,1,3)}') 2>/dev/null)
-	test "x$UNAME_RELEASE" = x && UNAME_RELEASE=3
-	case "$(/bin/arch)" in
-	    sun3)
-		echo m68k-sun-sunos"$UNAME_RELEASE"
-		;;
-	    sun4)
-		echo sparc-sun-sunos"$UNAME_RELEASE"
-		;;
-	esac
-	exit ;;
-    aushp:SunOS:*:*)
-	echo sparc-auspex-sunos"$UNAME_RELEASE"
-	exit ;;
-    # The situation for MiNT is a little confusing.  The machine name
-    # can be virtually everything (everything which is not
-    # "atarist" or "atariste" at least should have a processor
-    # > m68000).  The system name ranges from "MiNT" over "FreeMiNT"
-    # to the lowercase version "mint" (or "freemint").  Finally
-    # the system name "TOS" denotes a system which is actually not
-    # MiNT.  But MiNT is downward compatible to TOS, so this should
-    # be no problem.
-    atarist[e]:*MiNT:*:* | atarist[e]:*mint:*:* | atarist[e]:*TOS:*:*)
-	echo m68k-atari-mint"$UNAME_RELEASE"
-	exit ;;
-    atari*:*MiNT:*:* | atari*:*mint:*:* | atarist[e]:*TOS:*:*)
-	echo m68k-atari-mint"$UNAME_RELEASE"
-	exit ;;
-    *falcon*:*MiNT:*:* | *falcon*:*mint:*:* | *falcon*:*TOS:*:*)
-	echo m68k-atari-mint"$UNAME_RELEASE"
-	exit ;;
-    milan*:*MiNT:*:* | milan*:*mint:*:* | *milan*:*TOS:*:*)
-	echo m68k-milan-mint"$UNAME_RELEASE"
-	exit ;;
-    hades*:*MiNT:*:* | hades*:*mint:*:* | *hades*:*TOS:*:*)
-	echo m68k-hades-mint"$UNAME_RELEASE"
-	exit ;;
-    *:*MiNT:*:* | *:*mint:*:* | *:*TOS:*:*)
-	echo m68k-unknown-mint"$UNAME_RELEASE"
-	exit ;;
-    m68k:machten:*:*)
-	echo m68k-apple-machten"$UNAME_RELEASE"
-	exit ;;
-    powerpc:machten:*:*)
-	echo powerpc-apple-machten"$UNAME_RELEASE"
-	exit ;;
-    RISC*:Mach:*:*)
-	echo mips-dec-mach_bsd4.3
-	exit ;;
-    RISC*:ULTRIX:*:*)
-	echo mips-dec-ultrix"$UNAME_RELEASE"
-	exit ;;
-    VAX*:ULTRIX*:*:*)
-	echo vax-dec-ultrix"$UNAME_RELEASE"
-	exit ;;
-    2020:CLIX:*:* | 2430:CLIX:*:*)
-	echo clipper-intergraph-clix"$UNAME_RELEASE"
-	exit ;;
-    mips:*:*:UMIPS | mips:*:*:RISCos)
-	set_cc_for_build
-	sed 's/^	//' << EOF > "$dummy.c"
-#ifdef __cplusplus
-#include <stdio.h>  /* for printf() prototype */
-	int main (int argc, char *argv[]) {
-#else
-	int main (argc, argv) int argc; char *argv[]; {
-#endif
-	#if defined (host_mips) && defined (MIPSEB)
-	#if defined (SYSTYPE_SYSV)
-	  printf ("mips-mips-riscos%ssysv\\n", argv[1]); exit (0);
-	#endif
-	#if defined (SYSTYPE_SVR4)
-	  printf ("mips-mips-riscos%ssvr4\\n", argv[1]); exit (0);
-	#endif
-	#if defined (SYSTYPE_BSD43) || defined(SYSTYPE_BSD)
-	  printf ("mips-mips-riscos%sbsd\\n", argv[1]); exit (0);
-	#endif
-	#endif
-	  exit (-1);
-	}
-EOF
-	$CC_FOR_BUILD -o "$dummy" "$dummy.c" &&
-	  dummyarg=$(echo "$UNAME_RELEASE" | sed -n 's/\([0-9]*\).*/\1/p') &&
-	  SYSTEM_NAME=$("$dummy" "$dummyarg") &&
-	    { echo "$SYSTEM_NAME"; exit; }
-	echo mips-mips-riscos"$UNAME_RELEASE"
-	exit ;;
-    Motorola:PowerMAX_OS:*:*)
-	echo powerpc-motorola-powermax
-	exit ;;
-    Motorola:*:4.3:PL8-*)
-	echo powerpc-harris-powermax
-	exit ;;
-    Night_Hawk:*:*:PowerMAX_OS | Synergy:PowerMAX_OS:*:*)
-	echo powerpc-harris-powermax
-	exit ;;
-    Night_Hawk:Power_UNIX:*:*)
-	echo powerpc-harris-powerunix
-	exit ;;
-    m88k:CX/UX:7*:*)
-	echo m88k-harris-cxux7
-	exit ;;
-    m88k:*:4*:R4*)
-	echo m88k-motorola-sysv4
-	exit ;;
-    m88k:*:3*:R3*)
-	echo m88k-motorola-sysv3
-	exit ;;
-    AViiON:dgux:*:*)
-	# DG/UX returns AViiON for all architectures
-	UNAME_PROCESSOR=$(/usr/bin/uname -p)
-	if test "$UNAME_PROCESSOR" = mc88100 || test "$UNAME_PROCESSOR" = mc88110
-	then
-	    if test "$TARGET_BINARY_INTERFACE"x = m88kdguxelfx || \
-	       test "$TARGET_BINARY_INTERFACE"x = x
-	    then
-		echo m88k-dg-dgux"$UNAME_RELEASE"
-	    else
-		echo m88k-dg-dguxbcs"$UNAME_RELEASE"
-	    fi
-	else
-	    echo i586-dg-dgux"$UNAME_RELEASE"
-	fi
-	exit ;;
-    M88*:DolphinOS:*:*)	# DolphinOS (SVR3)
-	echo m88k-dolphin-sysv3
-	exit ;;
-    M88*:*:R3*:*)
-	# Delta 88k system running SVR3
-	echo m88k-motorola-sysv3
-	exit ;;
-    XD88*:*:*:*) # Tektronix XD88 system running UTekV (SVR3)
-	echo m88k-tektronix-sysv3
-	exit ;;
-    Tek43[0-9][0-9]:UTek:*:*) # Tektronix 4300 system running UTek (BSD)
-	echo m68k-tektronix-bsd
-	exit ;;
-    *:IRIX*:*:*)
-	echo mips-sgi-irix"$(echo "$UNAME_RELEASE"|sed -e 's/-/_/g')"
-	exit ;;
-    ????????:AIX?:[12].1:2)   # AIX 2.2.1 or AIX 2.1.1 is RT/PC AIX.
-	echo romp-ibm-aix     # uname -m gives an 8 hex-code CPU id
-	exit ;;               # Note that: echo "'$(uname -s)'" gives 'AIX '
-    i*86:AIX:*:*)
-	echo i386-ibm-aix
-	exit ;;
-    ia64:AIX:*:*)
-	if test -x /usr/bin/oslevel ; then
-		IBM_REV=$(/usr/bin/oslevel)
-	else
-		IBM_REV="$UNAME_VERSION.$UNAME_RELEASE"
-	fi
-	echo "$UNAME_MACHINE"-ibm-aix"$IBM_REV"
-	exit ;;
-    *:AIX:2:3)
-	if grep bos325 /usr/include/stdio.h >/dev/null 2>&1; then
-		set_cc_for_build
-		sed 's/^		//' << EOF > "$dummy.c"
-		#include <sys/systemcfg.h>
-
-		main()
-			{
-			if (!__power_pc())
-				exit(1);
-			puts("powerpc-ibm-aix3.2.5");
-			exit(0);
-			}
-EOF
-		if $CC_FOR_BUILD -o "$dummy" "$dummy.c" && SYSTEM_NAME=$("$dummy")
-		then
-			echo "$SYSTEM_NAME"
-		else
-			echo rs6000-ibm-aix3.2.5
-		fi
-	elif grep bos324 /usr/include/stdio.h >/dev/null 2>&1; then
-		echo rs6000-ibm-aix3.2.4
-	else
-		echo rs6000-ibm-aix3.2
-	fi
-	exit ;;
-    *:AIX:*:[4567])
-	IBM_CPU_ID=$(/usr/sbin/lsdev -C -c processor -S available | sed 1q | awk '{ print $1 }')
-	if /usr/sbin/lsattr -El "$IBM_CPU_ID" | grep ' POWER' >/dev/null 2>&1; then
-		IBM_ARCH=rs6000
-	else
-		IBM_ARCH=powerpc
-	fi
-	if test -x /usr/bin/lslpp ; then
-		IBM_REV=$(/usr/bin/lslpp -Lqc bos.rte.libc |
-			   awk -F: '{ print $3 }' | sed s/[0-9]*$/0/)
-	else
-		IBM_REV="$UNAME_VERSION.$UNAME_RELEASE"
-	fi
-	echo "$IBM_ARCH"-ibm-aix"$IBM_REV"
-	exit ;;
-    *:AIX:*:*)
-	echo rs6000-ibm-aix
-	exit ;;
-    ibmrt:4.4BSD:*|romp-ibm:4.4BSD:*)
-	echo romp-ibm-bsd4.4
-	exit ;;
-    ibmrt:*BSD:*|romp-ibm:BSD:*)            # covers RT/PC BSD and
-	echo romp-ibm-bsd"$UNAME_RELEASE"   # 4.3 with uname added to
-	exit ;;                             # report: romp-ibm BSD 4.3
-    *:BOSX:*:*)
-	echo rs6000-bull-bosx
-	exit ;;
-    DPX/2?00:B.O.S.:*:*)
-	echo m68k-bull-sysv3
-	exit ;;
-    9000/[34]??:4.3bsd:1.*:*)
-	echo m68k-hp-bsd
-	exit ;;
-    hp300:4.4BSD:*:* | 9000/[34]??:4.3bsd:2.*:*)
-	echo m68k-hp-bsd4.4
-	exit ;;
-    9000/[34678]??:HP-UX:*:*)
-	HPUX_REV=$(echo "$UNAME_RELEASE"|sed -e 's/[^.]*.[0B]*//')
-	case "$UNAME_MACHINE" in
-	    9000/31?)            HP_ARCH=m68000 ;;
-	    9000/[34]??)         HP_ARCH=m68k ;;
-	    9000/[678][0-9][0-9])
-		if test -x /usr/bin/getconf; then
-		    sc_cpu_version=$(/usr/bin/getconf SC_CPU_VERSION 2>/dev/null)
-		    sc_kernel_bits=$(/usr/bin/getconf SC_KERNEL_BITS 2>/dev/null)
-		    case "$sc_cpu_version" in
-		      523) HP_ARCH=hppa1.0 ;; # CPU_PA_RISC1_0
-		      528) HP_ARCH=hppa1.1 ;; # CPU_PA_RISC1_1
-		      532)                      # CPU_PA_RISC2_0
-			case "$sc_kernel_bits" in
-			  32) HP_ARCH=hppa2.0n ;;
-			  64) HP_ARCH=hppa2.0w ;;
-			  '') HP_ARCH=hppa2.0 ;;   # HP-UX 10.20
-			esac ;;
-		    esac
-		fi
-		if test "$HP_ARCH" = ""; then
-		    set_cc_for_build
-		    sed 's/^		//' << EOF > "$dummy.c"
-
-		#define _HPUX_SOURCE
-		#include <stdlib.h>
-		#include <unistd.h>
-
-		int main ()
-		{
-		#if defined(_SC_KERNEL_BITS)
-		    long bits = sysconf(_SC_KERNEL_BITS);
-		#endif
-		    long cpu  = sysconf (_SC_CPU_VERSION);
-
-		    switch (cpu)
-			{
-			case CPU_PA_RISC1_0: puts ("hppa1.0"); break;
-			case CPU_PA_RISC1_1: puts ("hppa1.1"); break;
-			case CPU_PA_RISC2_0:
-		#if defined(_SC_KERNEL_BITS)
-			    switch (bits)
-				{
-				case 64: puts ("hppa2.0w"); break;
-				case 32: puts ("hppa2.0n"); break;
-				default: puts ("hppa2.0"); break;
-				} break;
-		#else  /* !defined(_SC_KERNEL_BITS) */
-			    puts ("hppa2.0"); break;
-		#endif
-			default: puts ("hppa1.0"); break;
-			}
-		    exit (0);
-		}
-EOF
-		    (CCOPTS="" $CC_FOR_BUILD -o "$dummy" "$dummy.c" 2>/dev/null) && HP_ARCH=$("$dummy")
-		    test -z "$HP_ARCH" && HP_ARCH=hppa
-		fi ;;
-	esac
-	if test "$HP_ARCH" = hppa2.0w
-	then
-	    set_cc_for_build
-
-	    # hppa2.0w-hp-hpux* has a 64-bit kernel and a compiler generating
-	    # 32-bit code.  hppa64-hp-hpux* has the same kernel and a compiler
-	    # generating 64-bit code.  GNU and HP use different nomenclature:
-	    #
-	    # $ CC_FOR_BUILD=cc ./config.guess
-	    # => hppa2.0w-hp-hpux11.23
-	    # $ CC_FOR_BUILD="cc +DA2.0w" ./config.guess
-	    # => hppa64-hp-hpux11.23
-
-	    if echo __LP64__ | (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) |
-		grep -q __LP64__
-	    then
-		HP_ARCH=hppa2.0w
-	    else
-		HP_ARCH=hppa64
-	    fi
-	fi
-	echo "$HP_ARCH"-hp-hpux"$HPUX_REV"
-	exit ;;
-    ia64:HP-UX:*:*)
-	HPUX_REV=$(echo "$UNAME_RELEASE"|sed -e 's/[^.]*.[0B]*//')
-	echo ia64-hp-hpux"$HPUX_REV"
-	exit ;;
-    3050*:HI-UX:*:*)
-	set_cc_for_build
-	sed 's/^	//' << EOF > "$dummy.c"
-	#include <unistd.h>
-	int
-	main ()
-	{
-	  long cpu = sysconf (_SC_CPU_VERSION);
-	  /* The order matters, because CPU_IS_HP_MC68K erroneously returns
-	     true for CPU_PA_RISC1_0.  CPU_IS_PA_RISC returns correct
-	     results, however.  */
-	  if (CPU_IS_PA_RISC (cpu))
-	    {
-	      switch (cpu)
-		{
-		  case CPU_PA_RISC1_0: puts ("hppa1.0-hitachi-hiuxwe2"); break;
-		  case CPU_PA_RISC1_1: puts ("hppa1.1-hitachi-hiuxwe2"); break;
-		  case CPU_PA_RISC2_0: puts ("hppa2.0-hitachi-hiuxwe2"); break;
-		  default: puts ("hppa-hitachi-hiuxwe2"); break;
-		}
-	    }
-	  else if (CPU_IS_HP_MC68K (cpu))
-	    puts ("m68k-hitachi-hiuxwe2");
-	  else puts ("unknown-hitachi-hiuxwe2");
-	  exit (0);
-	}
-EOF
-	$CC_FOR_BUILD -o "$dummy" "$dummy.c" && SYSTEM_NAME=$("$dummy") &&
-		{ echo "$SYSTEM_NAME"; exit; }
-	echo unknown-hitachi-hiuxwe2
-	exit ;;
-    9000/7??:4.3bsd:*:* | 9000/8?[79]:4.3bsd:*:*)
-	echo hppa1.1-hp-bsd
-	exit ;;
-    9000/8??:4.3bsd:*:*)
-	echo hppa1.0-hp-bsd
-	exit ;;
-    *9??*:MPE/iX:*:* | *3000*:MPE/iX:*:*)
-	echo hppa1.0-hp-mpeix
-	exit ;;
-    hp7??:OSF1:*:* | hp8?[79]:OSF1:*:*)
-	echo hppa1.1-hp-osf
-	exit ;;
-    hp8??:OSF1:*:*)
-	echo hppa1.0-hp-osf
-	exit ;;
-    i*86:OSF1:*:*)
-	if test -x /usr/sbin/sysversion ; then
-	    echo "$UNAME_MACHINE"-unknown-osf1mk
-	else
-	    echo "$UNAME_MACHINE"-unknown-osf1
-	fi
-	exit ;;
-    parisc*:Lites*:*:*)
-	echo hppa1.1-hp-lites
-	exit ;;
-    C1*:ConvexOS:*:* | convex:ConvexOS:C1*:*)
-	echo c1-convex-bsd
-	exit ;;
-    C2*:ConvexOS:*:* | convex:ConvexOS:C2*:*)
-	if getsysinfo -f scalar_acc
-	then echo c32-convex-bsd
-	else echo c2-convex-bsd
-	fi
-	exit ;;
-    C34*:ConvexOS:*:* | convex:ConvexOS:C34*:*)
-	echo c34-convex-bsd
-	exit ;;
-    C38*:ConvexOS:*:* | convex:ConvexOS:C38*:*)
-	echo c38-convex-bsd
-	exit ;;
-    C4*:ConvexOS:*:* | convex:ConvexOS:C4*:*)
-	echo c4-convex-bsd
-	exit ;;
-    CRAY*Y-MP:*:*:*)
-	echo ymp-cray-unicos"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'
-	exit ;;
-    CRAY*[A-Z]90:*:*:*)
-	echo "$UNAME_MACHINE"-cray-unicos"$UNAME_RELEASE" \
-	| sed -e 's/CRAY.*\([A-Z]90\)/\1/' \
-	      -e y/ABCDEFGHIJKLMNOPQRSTUVWXYZ/abcdefghijklmnopqrstuvwxyz/ \
-	      -e 's/\.[^.]*$/.X/'
-	exit ;;
-    CRAY*TS:*:*:*)
-	echo t90-cray-unicos"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'
-	exit ;;
-    CRAY*T3E:*:*:*)
-	echo alphaev5-cray-unicosmk"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'
-	exit ;;
-    CRAY*SV1:*:*:*)
-	echo sv1-cray-unicos"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'
-	exit ;;
-    *:UNICOS/mp:*:*)
-	echo craynv-cray-unicosmp"$UNAME_RELEASE" | sed -e 's/\.[^.]*$/.X/'
-	exit ;;
-    F30[01]:UNIX_System_V:*:* | F700:UNIX_System_V:*:*)
-	FUJITSU_PROC=$(uname -m | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz)
-	FUJITSU_SYS=$(uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///')
-	FUJITSU_REL=$(echo "$UNAME_RELEASE" | sed -e 's/ /_/')
-	echo "${FUJITSU_PROC}-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
-	exit ;;
-    5000:UNIX_System_V:4.*:*)
-	FUJITSU_SYS=$(uname -p | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/\///')
-	FUJITSU_REL=$(echo "$UNAME_RELEASE" | tr ABCDEFGHIJKLMNOPQRSTUVWXYZ abcdefghijklmnopqrstuvwxyz | sed -e 's/ /_/')
-	echo "sparc-fujitsu-${FUJITSU_SYS}${FUJITSU_REL}"
-	exit ;;
-    i*86:BSD/386:*:* | i*86:BSD/OS:*:* | *:Ascend\ Embedded/OS:*:*)
-	echo "$UNAME_MACHINE"-pc-bsdi"$UNAME_RELEASE"
-	exit ;;
-    sparc*:BSD/OS:*:*)
-	echo sparc-unknown-bsdi"$UNAME_RELEASE"
-	exit ;;
-    *:BSD/OS:*:*)
-	echo "$UNAME_MACHINE"-unknown-bsdi"$UNAME_RELEASE"
-	exit ;;
-    arm:FreeBSD:*:*)
-	UNAME_PROCESSOR=$(uname -p)
-	set_cc_for_build
-	if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \
-	    | grep -q __ARM_PCS_VFP
-	then
-	    echo "${UNAME_PROCESSOR}"-unknown-freebsd"$(echo ${UNAME_RELEASE}|sed -e 's/[-(].*//')"-gnueabi
-	else
-	    echo "${UNAME_PROCESSOR}"-unknown-freebsd"$(echo ${UNAME_RELEASE}|sed -e 's/[-(].*//')"-gnueabihf
-	fi
-	exit ;;
-    *:FreeBSD:*:*)
-	UNAME_PROCESSOR=$(/usr/bin/uname -p)
-	case "$UNAME_PROCESSOR" in
-	    amd64)
-		UNAME_PROCESSOR=x86_64 ;;
-	    i386)
-		UNAME_PROCESSOR=i586 ;;
-	esac
-	echo "$UNAME_PROCESSOR"-unknown-freebsd"$(echo "$UNAME_RELEASE"|sed -e 's/[-(].*//')"
-	exit ;;
-    i*:CYGWIN*:*)
-	echo "$UNAME_MACHINE"-pc-cygwin
-	exit ;;
-    *:MINGW64*:*)
-	echo "$UNAME_MACHINE"-pc-mingw64
-	exit ;;
-    *:MINGW*:*)
-	echo "$UNAME_MACHINE"-pc-mingw32
-	exit ;;
-    *:MSYS*:*)
-	echo "$UNAME_MACHINE"-pc-msys
-	exit ;;
-    i*:PW*:*)
-	echo "$UNAME_MACHINE"-pc-pw32
-	exit ;;
-    *:Interix*:*)
-	case "$UNAME_MACHINE" in
-	    x86)
-		echo i586-pc-interix"$UNAME_RELEASE"
-		exit ;;
-	    authenticamd | genuineintel | EM64T)
-		echo x86_64-unknown-interix"$UNAME_RELEASE"
-		exit ;;
-	    IA64)
-		echo ia64-unknown-interix"$UNAME_RELEASE"
-		exit ;;
-	esac ;;
-    i*:UWIN*:*)
-	echo "$UNAME_MACHINE"-pc-uwin
-	exit ;;
-    amd64:CYGWIN*:*:* | x86_64:CYGWIN*:*:*)
-	echo x86_64-pc-cygwin
-	exit ;;
-    prep*:SunOS:5.*:*)
-	echo powerpcle-unknown-solaris2"$(echo "$UNAME_RELEASE"|sed -e 's/[^.]*//')"
-	exit ;;
-    *:GNU:*:*)
-	# the GNU system
-	echo "$(echo "$UNAME_MACHINE"|sed -e 's,[-/].*$,,')-unknown-$LIBC$(echo "$UNAME_RELEASE"|sed -e 's,/.*$,,')"
-	exit ;;
-    *:GNU/*:*:*)
-	# other systems with GNU libc and userland
-	echo "$UNAME_MACHINE-unknown-$(echo "$UNAME_SYSTEM" | sed 's,^[^/]*/,,' | tr "[:upper:]" "[:lower:]")$(echo "$UNAME_RELEASE"|sed -e 's/[-(].*//')-$LIBC"
-	exit ;;
-    *:Minix:*:*)
-	echo "$UNAME_MACHINE"-unknown-minix
-	exit ;;
-    aarch64:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
-    aarch64_be:Linux:*:*)
-	UNAME_MACHINE=aarch64_be
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
-    alpha:Linux:*:*)
-	case $(sed -n '/^cpu model/s/^.*: \(.*\)/\1/p' /proc/cpuinfo 2>/dev/null) in
-	  EV5)   UNAME_MACHINE=alphaev5 ;;
-	  EV56)  UNAME_MACHINE=alphaev56 ;;
-	  PCA56) UNAME_MACHINE=alphapca56 ;;
-	  PCA57) UNAME_MACHINE=alphapca56 ;;
-	  EV6)   UNAME_MACHINE=alphaev6 ;;
-	  EV67)  UNAME_MACHINE=alphaev67 ;;
-	  EV68*) UNAME_MACHINE=alphaev68 ;;
-	esac
-	objdump --private-headers /bin/sh | grep -q ld.so.1
-	if test "$?" = 0 ; then LIBC=gnulibc1 ; fi
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
-    arc:Linux:*:* | arceb:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
-    arm*:Linux:*:*)
-	set_cc_for_build
-	if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \
-	    | grep -q __ARM_EABI__
-	then
-	    echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	else
-	    if echo __ARM_PCS_VFP | $CC_FOR_BUILD -E - 2>/dev/null \
-		| grep -q __ARM_PCS_VFP
-	    then
-		echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"eabi
-	    else
-		echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"eabihf
-	    fi
-	fi
-	exit ;;
-    avr32*:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
-    cris:Linux:*:*)
-	echo "$UNAME_MACHINE"-axis-linux-"$LIBC"
-	exit ;;
-    crisv32:Linux:*:*)
-	echo "$UNAME_MACHINE"-axis-linux-"$LIBC"
-	exit ;;
-    e2k:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
-    frv:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
-    hexagon:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
-    i*86:Linux:*:*)
-	echo "$UNAME_MACHINE"-pc-linux-"$LIBC"
-	exit ;;
-    ia64:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
-    k1om:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
-    loongarch32:Linux:*:* | loongarch64:Linux:*:* | loongarchx32:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
-    m32r*:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
-    m68*:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
-    mips:Linux:*:* | mips64:Linux:*:*)
-	set_cc_for_build
-	IS_GLIBC=0
-	test x"${LIBC}" = xgnu && IS_GLIBC=1
-	sed 's/^	//' << EOF > "$dummy.c"
-	#undef CPU
-	#undef mips
-	#undef mipsel
-	#undef mips64
-	#undef mips64el
-	#if ${IS_GLIBC} && defined(_ABI64)
-	LIBCABI=gnuabi64
-	#else
-	#if ${IS_GLIBC} && defined(_ABIN32)
-	LIBCABI=gnuabin32
-	#else
-	LIBCABI=${LIBC}
-	#endif
-	#endif
-
-	#if ${IS_GLIBC} && defined(__mips64) && defined(__mips_isa_rev) && __mips_isa_rev>=6
-	CPU=mipsisa64r6
-	#else
-	#if ${IS_GLIBC} && !defined(__mips64) && defined(__mips_isa_rev) && __mips_isa_rev>=6
-	CPU=mipsisa32r6
-	#else
-	#if defined(__mips64)
-	CPU=mips64
-	#else
-	CPU=mips
-	#endif
-	#endif
-	#endif
-
-	#if defined(__MIPSEL__) || defined(__MIPSEL) || defined(_MIPSEL) || defined(MIPSEL)
-	MIPS_ENDIAN=el
-	#else
-	#if defined(__MIPSEB__) || defined(__MIPSEB) || defined(_MIPSEB) || defined(MIPSEB)
-	MIPS_ENDIAN=
-	#else
-	MIPS_ENDIAN=
-	#endif
-	#endif
-EOF
-	eval "$($CC_FOR_BUILD -E "$dummy.c" 2>/dev/null | grep '^CPU\|^MIPS_ENDIAN\|^LIBCABI')"
-	test "x$CPU" != x && { echo "$CPU${MIPS_ENDIAN}-unknown-linux-$LIBCABI"; exit; }
-	;;
-    mips64el:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
-    openrisc*:Linux:*:*)
-	echo or1k-unknown-linux-"$LIBC"
-	exit ;;
-    or32:Linux:*:* | or1k*:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
-    padre:Linux:*:*)
-	echo sparc-unknown-linux-"$LIBC"
-	exit ;;
-    parisc64:Linux:*:* | hppa64:Linux:*:*)
-	echo hppa64-unknown-linux-"$LIBC"
-	exit ;;
-    parisc:Linux:*:* | hppa:Linux:*:*)
-	# Look for CPU level
-	case $(grep '^cpu[^a-z]*:' /proc/cpuinfo 2>/dev/null | cut -d' ' -f2) in
-	  PA7*) echo hppa1.1-unknown-linux-"$LIBC" ;;
-	  PA8*) echo hppa2.0-unknown-linux-"$LIBC" ;;
-	  *)    echo hppa-unknown-linux-"$LIBC" ;;
-	esac
-	exit ;;
-    ppc64:Linux:*:*)
-	echo powerpc64-unknown-linux-"$LIBC"
-	exit ;;
-    ppc:Linux:*:*)
-	echo powerpc-unknown-linux-"$LIBC"
-	exit ;;
-    ppc64le:Linux:*:*)
-	echo powerpc64le-unknown-linux-"$LIBC"
-	exit ;;
-    ppcle:Linux:*:*)
-	echo powerpcle-unknown-linux-"$LIBC"
-	exit ;;
-    riscv32:Linux:*:* | riscv32be:Linux:*:* | riscv64:Linux:*:* | riscv64be:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
-    s390:Linux:*:* | s390x:Linux:*:*)
-	echo "$UNAME_MACHINE"-ibm-linux-"$LIBC"
-	exit ;;
-    sh64*:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
-    sh*:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
-    sparc:Linux:*:* | sparc64:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
-    tile*:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
-    vax:Linux:*:*)
-	echo "$UNAME_MACHINE"-dec-linux-"$LIBC"
-	exit ;;
-    x86_64:Linux:*:*)
-	set_cc_for_build
-	LIBCABI=$LIBC
-	if test "$CC_FOR_BUILD" != no_compiler_found; then
-	    if (echo '#ifdef __ILP32__'; echo IS_X32; echo '#endif') | \
-		(CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \
-		grep IS_X32 >/dev/null
-	    then
-		LIBCABI="$LIBC"x32
-	    fi
-	fi
-	echo "$UNAME_MACHINE"-pc-linux-"$LIBCABI"
-	exit ;;
-    xtensa*:Linux:*:*)
-	echo "$UNAME_MACHINE"-unknown-linux-"$LIBC"
-	exit ;;
-    i*86:DYNIX/ptx:4*:*)
-	# ptx 4.0 does uname -s correctly, with DYNIX/ptx in there.
-	# earlier versions are messed up and put the nodename in both
-	# sysname and nodename.
-	echo i386-sequent-sysv4
-	exit ;;
-    i*86:UNIX_SV:4.2MP:2.*)
-	# Unixware is an offshoot of SVR4, but it has its own version
-	# number series starting with 2...
-	# I am not positive that other SVR4 systems won't match this,
-	# I just have to hope.  -- rms.
-	# Use sysv4.2uw... so that sysv4* matches it.
-	echo "$UNAME_MACHINE"-pc-sysv4.2uw"$UNAME_VERSION"
-	exit ;;
-    i*86:OS/2:*:*)
-	# If we were able to find `uname', then EMX Unix compatibility
-	# is probably installed.
-	echo "$UNAME_MACHINE"-pc-os2-emx
-	exit ;;
-    i*86:XTS-300:*:STOP)
-	echo "$UNAME_MACHINE"-unknown-stop
-	exit ;;
-    i*86:atheos:*:*)
-	echo "$UNAME_MACHINE"-unknown-atheos
-	exit ;;
-    i*86:syllable:*:*)
-	echo "$UNAME_MACHINE"-pc-syllable
-	exit ;;
-    i*86:LynxOS:2.*:* | i*86:LynxOS:3.[01]*:* | i*86:LynxOS:4.[02]*:*)
-	echo i386-unknown-lynxos"$UNAME_RELEASE"
-	exit ;;
-    i*86:*DOS:*:*)
-	echo "$UNAME_MACHINE"-pc-msdosdjgpp
-	exit ;;
-    i*86:*:4.*:*)
-	UNAME_REL=$(echo "$UNAME_RELEASE" | sed 's/\/MP$//')
-	if grep Novell /usr/include/link.h >/dev/null 2>/dev/null; then
-		echo "$UNAME_MACHINE"-univel-sysv"$UNAME_REL"
-	else
-		echo "$UNAME_MACHINE"-pc-sysv"$UNAME_REL"
-	fi
-	exit ;;
-    i*86:*:5:[678]*)
-	# UnixWare 7.x, OpenUNIX and OpenServer 6.
-	case $(/bin/uname -X | grep "^Machine") in
-	    *486*)	     UNAME_MACHINE=i486 ;;
-	    *Pentium)	     UNAME_MACHINE=i586 ;;
-	    *Pent*|*Celeron) UNAME_MACHINE=i686 ;;
-	esac
-	echo "$UNAME_MACHINE-unknown-sysv${UNAME_RELEASE}${UNAME_SYSTEM}${UNAME_VERSION}"
-	exit ;;
-    i*86:*:3.2:*)
-	if test -f /usr/options/cb.name; then
-		UNAME_REL=$(sed -n 's/.*Version //p' </usr/options/cb.name)
-		echo "$UNAME_MACHINE"-pc-isc"$UNAME_REL"
-	elif /bin/uname -X 2>/dev/null >/dev/null ; then
-		UNAME_REL=$( (/bin/uname -X|grep Release|sed -e 's/.*= //'))
-		(/bin/uname -X|grep i80486 >/dev/null) && UNAME_MACHINE=i486
-		(/bin/uname -X|grep '^Machine.*Pentium' >/dev/null) \
-			&& UNAME_MACHINE=i586
-		(/bin/uname -X|grep '^Machine.*Pent *II' >/dev/null) \
-			&& UNAME_MACHINE=i686
-		(/bin/uname -X|grep '^Machine.*Pentium Pro' >/dev/null) \
-			&& UNAME_MACHINE=i686
-		echo "$UNAME_MACHINE"-pc-sco"$UNAME_REL"
-	else
-		echo "$UNAME_MACHINE"-pc-sysv32
-	fi
-	exit ;;
-    pc:*:*:*)
-	# Left here for compatibility:
-	# uname -m prints for DJGPP always 'pc', but it prints nothing about
-	# the processor, so we play safe by assuming i586.
-	# Note: whatever this is, it MUST be the same as what config.sub
-	# prints for the "djgpp" host, or else GDB configure will decide that
-	# this is a cross-build.
-	echo i586-pc-msdosdjgpp
-	exit ;;
-    Intel:Mach:3*:*)
-	echo i386-pc-mach3
-	exit ;;
-    paragon:*:*:*)
-	echo i860-intel-osf1
-	exit ;;
-    i860:*:4.*:*) # i860-SVR4
-	if grep Stardent /usr/include/sys/uadmin.h >/dev/null 2>&1 ; then
-	  echo i860-stardent-sysv"$UNAME_RELEASE" # Stardent Vistra i860-SVR4
-	else # Add other i860-SVR4 vendors below as they are discovered.
-	  echo i860-unknown-sysv"$UNAME_RELEASE"  # Unknown i860-SVR4
-	fi
-	exit ;;
-    mini*:CTIX:SYS*5:*)
-	# "miniframe"
-	echo m68010-convergent-sysv
-	exit ;;
-    mc68k:UNIX:SYSTEM5:3.51m)
-	echo m68k-convergent-sysv
-	exit ;;
-    M680?0:D-NIX:5.3:*)
-	echo m68k-diab-dnix
-	exit ;;
-    M68*:*:R3V[5678]*:*)
-	test -r /sysV68 && { echo 'm68k-motorola-sysv'; exit; } ;;
-    3[345]??:*:4.0:3.0 | 3[34]??A:*:4.0:3.0 | 3[34]??,*:*:4.0:3.0 | 3[34]??/*:*:4.0:3.0 | 4400:*:4.0:3.0 | 4850:*:4.0:3.0 | SKA40:*:4.0:3.0 | SDS2:*:4.0:3.0 | SHG2:*:4.0:3.0 | S7501*:*:4.0:3.0)
-	OS_REL=''
-	test -r /etc/.relid \
-	&& OS_REL=.$(sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid)
-	/bin/uname -p 2>/dev/null | grep 86 >/dev/null \
-	  && { echo i486-ncr-sysv4.3"$OS_REL"; exit; }
-	/bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \
-	  && { echo i586-ncr-sysv4.3"$OS_REL"; exit; } ;;
-    3[34]??:*:4.0:* | 3[34]??,*:*:4.0:*)
-	/bin/uname -p 2>/dev/null | grep 86 >/dev/null \
-	  && { echo i486-ncr-sysv4; exit; } ;;
-    NCR*:*:4.2:* | MPRAS*:*:4.2:*)
-	OS_REL='.3'
-	test -r /etc/.relid \
-	    && OS_REL=.$(sed -n 's/[^ ]* [^ ]* \([0-9][0-9]\).*/\1/p' < /etc/.relid)
-	/bin/uname -p 2>/dev/null | grep 86 >/dev/null \
-	    && { echo i486-ncr-sysv4.3"$OS_REL"; exit; }
-	/bin/uname -p 2>/dev/null | /bin/grep entium >/dev/null \
-	    && { echo i586-ncr-sysv4.3"$OS_REL"; exit; }
-	/bin/uname -p 2>/dev/null | /bin/grep pteron >/dev/null \
-	    && { echo i586-ncr-sysv4.3"$OS_REL"; exit; } ;;
-    m68*:LynxOS:2.*:* | m68*:LynxOS:3.0*:*)
-	echo m68k-unknown-lynxos"$UNAME_RELEASE"
-	exit ;;
-    mc68030:UNIX_System_V:4.*:*)
-	echo m68k-atari-sysv4
-	exit ;;
-    TSUNAMI:LynxOS:2.*:*)
-	echo sparc-unknown-lynxos"$UNAME_RELEASE"
-	exit ;;
-    rs6000:LynxOS:2.*:*)
-	echo rs6000-unknown-lynxos"$UNAME_RELEASE"
-	exit ;;
-    PowerPC:LynxOS:2.*:* | PowerPC:LynxOS:3.[01]*:* | PowerPC:LynxOS:4.[02]*:*)
-	echo powerpc-unknown-lynxos"$UNAME_RELEASE"
-	exit ;;
-    SM[BE]S:UNIX_SV:*:*)
-	echo mips-dde-sysv"$UNAME_RELEASE"
-	exit ;;
-    RM*:ReliantUNIX-*:*:*)
-	echo mips-sni-sysv4
-	exit ;;
-    RM*:SINIX-*:*:*)
-	echo mips-sni-sysv4
-	exit ;;
-    *:SINIX-*:*:*)
-	if uname -p 2>/dev/null >/dev/null ; then
-		UNAME_MACHINE=$( (uname -p) 2>/dev/null)
-		echo "$UNAME_MACHINE"-sni-sysv4
-	else
-		echo ns32k-sni-sysv
-	fi
-	exit ;;
-    PENTIUM:*:4.0*:*)	# Unisys `ClearPath HMP IX 4000' SVR4/MP effort
-			# says <Richard.M.Bartel@ccMail.Census.GOV>
-	echo i586-unisys-sysv4
-	exit ;;
-    *:UNIX_System_V:4*:FTX*)
-	# From Gerald Hewes <hewes@openmarket.com>.
-	# How about differentiating between stratus architectures? -djm
-	echo hppa1.1-stratus-sysv4
-	exit ;;
-    *:*:*:FTX*)
-	# From seanf@swdc.stratus.com.
-	echo i860-stratus-sysv4
-	exit ;;
-    i*86:VOS:*:*)
-	# From Paul.Green@stratus.com.
-	echo "$UNAME_MACHINE"-stratus-vos
-	exit ;;
-    *:VOS:*:*)
-	# From Paul.Green@stratus.com.
-	echo hppa1.1-stratus-vos
-	exit ;;
-    mc68*:A/UX:*:*)
-	echo m68k-apple-aux"$UNAME_RELEASE"
-	exit ;;
-    news*:NEWS-OS:6*:*)
-	echo mips-sony-newsos6
-	exit ;;
-    R[34]000:*System_V*:*:* | R4000:UNIX_SYSV:*:* | R*000:UNIX_SV:*:*)
-	if test -d /usr/nec; then
-		echo mips-nec-sysv"$UNAME_RELEASE"
-	else
-		echo mips-unknown-sysv"$UNAME_RELEASE"
-	fi
-	exit ;;
-    BeBox:BeOS:*:*)	# BeOS running on hardware made by Be, PPC only.
-	echo powerpc-be-beos
-	exit ;;
-    BeMac:BeOS:*:*)	# BeOS running on Mac or Mac clone, PPC only.
-	echo powerpc-apple-beos
-	exit ;;
-    BePC:BeOS:*:*)	# BeOS running on Intel PC compatible.
-	echo i586-pc-beos
-	exit ;;
-    BePC:Haiku:*:*)	# Haiku running on Intel PC compatible.
-	echo i586-pc-haiku
-	exit ;;
-    x86_64:Haiku:*:*)
-	echo x86_64-unknown-haiku
-	exit ;;
-    SX-4:SUPER-UX:*:*)
-	echo sx4-nec-superux"$UNAME_RELEASE"
-	exit ;;
-    SX-5:SUPER-UX:*:*)
-	echo sx5-nec-superux"$UNAME_RELEASE"
-	exit ;;
-    SX-6:SUPER-UX:*:*)
-	echo sx6-nec-superux"$UNAME_RELEASE"
-	exit ;;
-    SX-7:SUPER-UX:*:*)
-	echo sx7-nec-superux"$UNAME_RELEASE"
-	exit ;;
-    SX-8:SUPER-UX:*:*)
-	echo sx8-nec-superux"$UNAME_RELEASE"
-	exit ;;
-    SX-8R:SUPER-UX:*:*)
-	echo sx8r-nec-superux"$UNAME_RELEASE"
-	exit ;;
-    SX-ACE:SUPER-UX:*:*)
-	echo sxace-nec-superux"$UNAME_RELEASE"
-	exit ;;
-    Power*:Rhapsody:*:*)
-	echo powerpc-apple-rhapsody"$UNAME_RELEASE"
-	exit ;;
-    *:Rhapsody:*:*)
-	echo "$UNAME_MACHINE"-apple-rhapsody"$UNAME_RELEASE"
-	exit ;;
-    arm64:Darwin:*:*)
-	echo aarch64-apple-darwin"$UNAME_RELEASE"
-	exit ;;
-    *:Darwin:*:*)
-	UNAME_PROCESSOR=$(uname -p)
-	case $UNAME_PROCESSOR in
-	    unknown) UNAME_PROCESSOR=powerpc ;;
-	esac
-	if command -v xcode-select > /dev/null 2> /dev/null && \
-		! xcode-select --print-path > /dev/null 2> /dev/null ; then
-	    # Avoid executing cc if there is no toolchain installed as
-	    # cc will be a stub that puts up a graphical alert
-	    # prompting the user to install developer tools.
-	    CC_FOR_BUILD=no_compiler_found
-	else
-	    set_cc_for_build
-	fi
-	if test "$CC_FOR_BUILD" != no_compiler_found; then
-	    if (echo '#ifdef __LP64__'; echo IS_64BIT_ARCH; echo '#endif') | \
-		   (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \
-		   grep IS_64BIT_ARCH >/dev/null
-	    then
-		case $UNAME_PROCESSOR in
-		    i386) UNAME_PROCESSOR=x86_64 ;;
-		    powerpc) UNAME_PROCESSOR=powerpc64 ;;
-		esac
-	    fi
-	    # On 10.4-10.6 one might compile for PowerPC via gcc -arch ppc
-	    if (echo '#ifdef __POWERPC__'; echo IS_PPC; echo '#endif') | \
-		   (CCOPTS="" $CC_FOR_BUILD -E - 2>/dev/null) | \
-		   grep IS_PPC >/dev/null
-	    then
-		UNAME_PROCESSOR=powerpc
-	    fi
-	elif test "$UNAME_PROCESSOR" = i386 ; then
-	    # uname -m returns i386 or x86_64
-	    UNAME_PROCESSOR=$UNAME_MACHINE
-	fi
-	echo "$UNAME_PROCESSOR"-apple-darwin"$UNAME_RELEASE"
-	exit ;;
-    *:procnto*:*:* | *:QNX:[0123456789]*:*)
-	UNAME_PROCESSOR=$(uname -p)
-	if test "$UNAME_PROCESSOR" = x86; then
-		UNAME_PROCESSOR=i386
-		UNAME_MACHINE=pc
-	fi
-	echo "$UNAME_PROCESSOR"-"$UNAME_MACHINE"-nto-qnx"$UNAME_RELEASE"
-	exit ;;
-    *:QNX:*:4*)
-	echo i386-pc-qnx
-	exit ;;
-    NEO-*:NONSTOP_KERNEL:*:*)
-	echo neo-tandem-nsk"$UNAME_RELEASE"
-	exit ;;
-    NSE-*:NONSTOP_KERNEL:*:*)
-	echo nse-tandem-nsk"$UNAME_RELEASE"
-	exit ;;
-    NSR-*:NONSTOP_KERNEL:*:*)
-	echo nsr-tandem-nsk"$UNAME_RELEASE"
-	exit ;;
-    NSV-*:NONSTOP_KERNEL:*:*)
-	echo nsv-tandem-nsk"$UNAME_RELEASE"
-	exit ;;
-    NSX-*:NONSTOP_KERNEL:*:*)
-	echo nsx-tandem-nsk"$UNAME_RELEASE"
-	exit ;;
-    *:NonStop-UX:*:*)
-	echo mips-compaq-nonstopux
-	exit ;;
-    BS2000:POSIX*:*:*)
-	echo bs2000-siemens-sysv
-	exit ;;
-    DS/*:UNIX_System_V:*:*)
-	echo "$UNAME_MACHINE"-"$UNAME_SYSTEM"-"$UNAME_RELEASE"
-	exit ;;
-    *:Plan9:*:*)
-	# "uname -m" is not consistent, so use $cputype instead. 386
-	# is converted to i386 for consistency with other x86
-	# operating systems.
-	# shellcheck disable=SC2154
-	if test "$cputype" = 386; then
-	    UNAME_MACHINE=i386
-	else
-	    UNAME_MACHINE="$cputype"
-	fi
-	echo "$UNAME_MACHINE"-unknown-plan9
-	exit ;;
-    *:TOPS-10:*:*)
-	echo pdp10-unknown-tops10
-	exit ;;
-    *:TENEX:*:*)
-	echo pdp10-unknown-tenex
-	exit ;;
-    KS10:TOPS-20:*:* | KL10:TOPS-20:*:* | TYPE4:TOPS-20:*:*)
-	echo pdp10-dec-tops20
-	exit ;;
-    XKL-1:TOPS-20:*:* | TYPE5:TOPS-20:*:*)
-	echo pdp10-xkl-tops20
-	exit ;;
-    *:TOPS-20:*:*)
-	echo pdp10-unknown-tops20
-	exit ;;
-    *:ITS:*:*)
-	echo pdp10-unknown-its
-	exit ;;
-    SEI:*:*:SEIUX)
-	echo mips-sei-seiux"$UNAME_RELEASE"
-	exit ;;
-    *:DragonFly:*:*)
-	echo "$UNAME_MACHINE"-unknown-dragonfly"$(echo "$UNAME_RELEASE"|sed -e 's/[-(].*//')"
-	exit ;;
-    *:*VMS:*:*)
-	UNAME_MACHINE=$( (uname -p) 2>/dev/null)
-	case "$UNAME_MACHINE" in
-	    A*) echo alpha-dec-vms ; exit ;;
-	    I*) echo ia64-dec-vms ; exit ;;
-	    V*) echo vax-dec-vms ; exit ;;
-	esac ;;
-    *:XENIX:*:SysV)
-	echo i386-pc-xenix
-	exit ;;
-    i*86:skyos:*:*)
-	echo "$UNAME_MACHINE"-pc-skyos"$(echo "$UNAME_RELEASE" | sed -e 's/ .*$//')"
-	exit ;;
-    i*86:rdos:*:*)
-	echo "$UNAME_MACHINE"-pc-rdos
-	exit ;;
-    i*86:AROS:*:*)
-	echo "$UNAME_MACHINE"-pc-aros
-	exit ;;
-    x86_64:VMkernel:*:*)
-	echo "$UNAME_MACHINE"-unknown-esx
-	exit ;;
-    amd64:Isilon\ OneFS:*:*)
-	echo x86_64-unknown-onefs
-	exit ;;
-    *:Unleashed:*:*)
-	echo "$UNAME_MACHINE"-unknown-unleashed"$UNAME_RELEASE"
-	exit ;;
-esac
-
-# No uname command or uname output not recognized.
-set_cc_for_build
-cat > "$dummy.c" <<EOF
-#ifdef _SEQUENT_
-#include <sys/types.h>
-#include <sys/utsname.h>
-#endif
-#if defined(ultrix) || defined(_ultrix) || defined(__ultrix) || defined(__ultrix__)
-#if defined (vax) || defined (__vax) || defined (__vax__) || defined(mips) || defined(__mips) || defined(__mips__) || defined(MIPS) || defined(__MIPS__)
-#include <signal.h>
-#if defined(_SIZE_T_) || defined(SIGLOST)
-#include <sys/utsname.h>
-#endif
-#endif
-#endif
-main ()
-{
-#if defined (sony)
-#if defined (MIPSEB)
-  /* BFD wants "bsd" instead of "newsos".  Perhaps BFD should be changed,
-     I don't know....  */
-  printf ("mips-sony-bsd\n"); exit (0);
-#else
-#include <sys/param.h>
-  printf ("m68k-sony-newsos%s\n",
-#ifdef NEWSOS4
-  "4"
-#else
-  ""
-#endif
-  ); exit (0);
-#endif
-#endif
-
-#if defined (NeXT)
-#if !defined (__ARCHITECTURE__)
-#define __ARCHITECTURE__ "m68k"
-#endif
-  int version;
-  version=$( (hostinfo | sed -n 's/.*NeXT Mach \([0-9]*\).*/\1/p') 2>/dev/null);
-  if (version < 4)
-    printf ("%s-next-nextstep%d\n", __ARCHITECTURE__, version);
-  else
-    printf ("%s-next-openstep%d\n", __ARCHITECTURE__, version);
-  exit (0);
-#endif
-
-#if defined (MULTIMAX) || defined (n16)
-#if defined (UMAXV)
-  printf ("ns32k-encore-sysv\n"); exit (0);
-#else
-#if defined (CMU)
-  printf ("ns32k-encore-mach\n"); exit (0);
-#else
-  printf ("ns32k-encore-bsd\n"); exit (0);
-#endif
-#endif
-#endif
-
-#if defined (__386BSD__)
-  printf ("i386-pc-bsd\n"); exit (0);
-#endif
-
-#if defined (sequent)
-#if defined (i386)
-  printf ("i386-sequent-dynix\n"); exit (0);
-#endif
-#if defined (ns32000)
-  printf ("ns32k-sequent-dynix\n"); exit (0);
-#endif
-#endif
-
-#if defined (_SEQUENT_)
-  struct utsname un;
-
-  uname(&un);
-  if (strncmp(un.version, "V2", 2) == 0) {
-    printf ("i386-sequent-ptx2\n"); exit (0);
-  }
-  if (strncmp(un.version, "V1", 2) == 0) { /* XXX is V1 correct? */
-    printf ("i386-sequent-ptx1\n"); exit (0);
-  }
-  printf ("i386-sequent-ptx\n"); exit (0);
-#endif
-
-#if defined (vax)
-#if !defined (ultrix)
-#include <sys/param.h>
-#if defined (BSD)
-#if BSD == 43
-  printf ("vax-dec-bsd4.3\n"); exit (0);
-#else
-#if BSD == 199006
-  printf ("vax-dec-bsd4.3reno\n"); exit (0);
-#else
-  printf ("vax-dec-bsd\n"); exit (0);
-#endif
-#endif
-#else
-  printf ("vax-dec-bsd\n"); exit (0);
-#endif
-#else
-#if defined(_SIZE_T_) || defined(SIGLOST)
-  struct utsname un;
-  uname (&un);
-  printf ("vax-dec-ultrix%s\n", un.release); exit (0);
-#else
-  printf ("vax-dec-ultrix\n"); exit (0);
-#endif
-#endif
-#endif
-#if defined(ultrix) || defined(_ultrix) || defined(__ultrix) || defined(__ultrix__)
-#if defined(mips) || defined(__mips) || defined(__mips__) || defined(MIPS) || defined(__MIPS__)
-#if defined(_SIZE_T_) || defined(SIGLOST)
-  struct utsname *un;
-  uname (&un);
-  printf ("mips-dec-ultrix%s\n", un.release); exit (0);
-#else
-  printf ("mips-dec-ultrix\n"); exit (0);
-#endif
-#endif
-#endif
-
-#if defined (alliant) && defined (i860)
-  printf ("i860-alliant-bsd\n"); exit (0);
-#endif
-
-  exit (1);
-}
-EOF
-
-$CC_FOR_BUILD -o "$dummy" "$dummy.c" 2>/dev/null && SYSTEM_NAME=$($dummy) &&
-	{ echo "$SYSTEM_NAME"; exit; }
-
-# Apollos put the system type in the environment.
-test -d /usr/apollo && { echo "$ISP-apollo-$SYSTYPE"; exit; }
-
-echo "$0: unable to guess system type" >&2
-
-case "$UNAME_MACHINE:$UNAME_SYSTEM" in
-    mips:Linux | mips64:Linux)
-	# If we got here on MIPS GNU/Linux, output extra information.
-	cat >&2 <<EOF
-
-NOTE: MIPS GNU/Linux systems require a C compiler to fully recognize
-the system type. Please install a C compiler and try again.
-EOF
-	;;
-esac
-
-cat >&2 <<EOF
-
-This script (version $timestamp), has failed to recognize the
-operating system you are using. If your script is old, overwrite *all*
-copies of config.guess and config.sub with the latest versions from:
-
-  https://git.savannah.gnu.org/cgit/config.git/plain/config.guess
-and
-  https://git.savannah.gnu.org/cgit/config.git/plain/config.sub
-EOF
-
-year=$(echo $timestamp | sed 's,-.*,,')
-# shellcheck disable=SC2003
-if test "$(expr "$(date +%Y)" - "$year")" -lt 3 ; then
-   cat >&2 <<EOF
-
-If $0 has already been updated, send the following data and any
-information you think might be pertinent to config-patches@gnu.org to
-provide the necessary information to handle your system.
-
-config.guess timestamp = $timestamp
-
-uname -m = $( (uname -m) 2>/dev/null || echo unknown)
-uname -r = $( (uname -r) 2>/dev/null || echo unknown)
-uname -s = $( (uname -s) 2>/dev/null || echo unknown)
-uname -v = $( (uname -v) 2>/dev/null || echo unknown)
-
-/usr/bin/uname -p = $( (/usr/bin/uname -p) 2>/dev/null)
-/bin/uname -X     = $( (/bin/uname -X) 2>/dev/null)
-
-hostinfo               = $( (hostinfo) 2>/dev/null)
-/bin/universe          = $( (/bin/universe) 2>/dev/null)
-/usr/bin/arch -k       = $( (/usr/bin/arch -k) 2>/dev/null)
-/bin/arch              = $( (/bin/arch) 2>/dev/null)
-/usr/bin/oslevel       = $( (/usr/bin/oslevel) 2>/dev/null)
-/usr/convex/getsysinfo = $( (/usr/convex/getsysinfo) 2>/dev/null)
-
-UNAME_MACHINE = "$UNAME_MACHINE"
-UNAME_RELEASE = "$UNAME_RELEASE"
-UNAME_SYSTEM  = "$UNAME_SYSTEM"
-UNAME_VERSION = "$UNAME_VERSION"
-EOF
-fi
-
-exit 1
-
-# Local variables:
-# eval: (add-hook 'before-save-hook 'time-stamp)
-# time-stamp-start: "timestamp='"
-# time-stamp-format: "%:y-%02m-%02d"
-# time-stamp-end: "'"
-# End:
diff --git a/config.sub b/config.sub
deleted file mode 100644
index 0cbdae682..000000000
--- a/config.sub
+++ /dev/null
@@ -1,1855 +0,0 @@
-#! /bin/sh
-# Configuration validation subroutine script.
-#   Copyright 1992-2021 Free Software Foundation, Inc.
-
-timestamp='2021-01-01'
-
-# This file is free software; you can redistribute it and/or modify it
-# under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 3 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-# General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, see <https://www.gnu.org/licenses/>.
-#
-# As a special exception to the GNU General Public License, if you
-# distribute this file as part of a program that contains a
-# configuration script generated by Autoconf, you may include it under
-# the same distribution terms that you use for the rest of that
-# program.  This Exception is an additional permission under section 7
-# of the GNU General Public License, version 3 ("GPLv3").
-
-
-# Please send patches to <config-patches@gnu.org>.
-#
-# Configuration subroutine to validate and canonicalize a configuration type.
-# Supply the specified configuration type as an argument.
-# If it is invalid, we print an error message on stderr and exit with code 1.
-# Otherwise, we print the canonical config type on stdout and succeed.
-
-# You can get the latest version of this script from:
-# https://git.savannah.gnu.org/cgit/config.git/plain/config.sub
-
-# This file is supposed to be the same for all GNU packages
-# and recognize all the CPU types, system types and aliases
-# that are meaningful with *any* GNU software.
-# Each package is responsible for reporting which valid configurations
-# it does not support.  The user should be able to distinguish
-# a failure to support a valid configuration from a meaningless
-# configuration.
-
-# The goal of this file is to map all the various variations of a given
-# machine specification into a single specification in the form:
-#	CPU_TYPE-MANUFACTURER-OPERATING_SYSTEM
-# or in some cases, the newer four-part form:
-#	CPU_TYPE-MANUFACTURER-KERNEL-OPERATING_SYSTEM
-# It is wrong to echo any other type of specification.
-
-me=$(echo "$0" | sed -e 's,.*/,,')
-
-usage="\
-Usage: $0 [OPTION] CPU-MFR-OPSYS or ALIAS
-
-Canonicalize a configuration name.
-
-Options:
-  -h, --help         print this help, then exit
-  -t, --time-stamp   print date of last modification, then exit
-  -v, --version      print version number, then exit
-
-Report bugs and patches to <config-patches@gnu.org>."
-
-version="\
-GNU config.sub ($timestamp)
-
-Copyright 1992-2021 Free Software Foundation, Inc.
-
-This is free software; see the source for copying conditions.  There is NO
-warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE."
-
-help="
-Try \`$me --help' for more information."
-
-# Parse command line
-while test $# -gt 0 ; do
-  case $1 in
-    --time-stamp | --time* | -t )
-       echo "$timestamp" ; exit ;;
-    --version | -v )
-       echo "$version" ; exit ;;
-    --help | --h* | -h )
-       echo "$usage"; exit ;;
-    -- )     # Stop option processing
-       shift; break ;;
-    - )	# Use stdin as input.
-       break ;;
-    -* )
-       echo "$me: invalid option $1$help" >&2
-       exit 1 ;;
-
-    *local*)
-       # First pass through any local machine types.
-       echo "$1"
-       exit ;;
-
-    * )
-       break ;;
-  esac
-done
-
-case $# in
- 0) echo "$me: missing argument$help" >&2
-    exit 1;;
- 1) ;;
- *) echo "$me: too many arguments$help" >&2
-    exit 1;;
-esac
-
-# Split fields of configuration type
-# shellcheck disable=SC2162
-IFS="-" read field1 field2 field3 field4 <<EOF
-$1
-EOF
-
-# Separate into logical components for further validation
-case $1 in
-	*-*-*-*-*)
-		echo Invalid configuration \`"$1"\': more than four components >&2
-		exit 1
-		;;
-	*-*-*-*)
-		basic_machine=$field1-$field2
-		basic_os=$field3-$field4
-		;;
-	*-*-*)
-		# Ambiguous whether COMPANY is present, or skipped and KERNEL-OS is two
-		# parts
-		maybe_os=$field2-$field3
-		case $maybe_os in
-			nto-qnx* | linux-* | uclinux-uclibc* \
-			| uclinux-gnu* | kfreebsd*-gnu* | knetbsd*-gnu* | netbsd*-gnu* \
-			| netbsd*-eabi* | kopensolaris*-gnu* | cloudabi*-eabi* \
-			| storm-chaos* | os2-emx* | rtmk-nova*)
-				basic_machine=$field1
-				basic_os=$maybe_os
-				;;
-			android-linux)
-				basic_machine=$field1-unknown
-				basic_os=linux-android
-				;;
-			*)
-				basic_machine=$field1-$field2
-				basic_os=$field3
-				;;
-		esac
-		;;
-	*-*)
-		# A lone config we happen to match not fitting any pattern
-		case $field1-$field2 in
-			decstation-3100)
-				basic_machine=mips-dec
-				basic_os=
-				;;
-			*-*)
-				# Second component is usually, but not always the OS
-				case $field2 in
-					# Prevent following clause from handling this valid os
-					sun*os*)
-						basic_machine=$field1
-						basic_os=$field2
-						;;
-					# Manufacturers
-					dec* | mips* | sequent* | encore* | pc533* | sgi* | sony* \
-					| att* | 7300* | 3300* | delta* | motorola* | sun[234]* \
-					| unicom* | ibm* | next | hp | isi* | apollo | altos* \
-					| convergent* | ncr* | news | 32* | 3600* | 3100* \
-					| hitachi* | c[123]* | convex* | sun | crds | omron* | dg \
-					| ultra | tti* | harris | dolphin | highlevel | gould \
-					| cbm | ns | masscomp | apple | axis | knuth | cray \
-					| microblaze* | sim | cisco \
-					| oki | wec | wrs | winbond)
-						basic_machine=$field1-$field2
-						basic_os=
-						;;
-					*)
-						basic_machine=$field1
-						basic_os=$field2
-						;;
-				esac
-			;;
-		esac
-		;;
-	*)
-		# Convert single-component short-hands not valid as part of
-		# multi-component configurations.
-		case $field1 in
-			386bsd)
-				basic_machine=i386-pc
-				basic_os=bsd
-				;;
-			a29khif)
-				basic_machine=a29k-amd
-				basic_os=udi
-				;;
-			adobe68k)
-				basic_machine=m68010-adobe
-				basic_os=scout
-				;;
-			alliant)
-				basic_machine=fx80-alliant
-				basic_os=
-				;;
-			altos | altos3068)
-				basic_machine=m68k-altos
-				basic_os=
-				;;
-			am29k)
-				basic_machine=a29k-none
-				basic_os=bsd
-				;;
-			amdahl)
-				basic_machine=580-amdahl
-				basic_os=sysv
-				;;
-			amiga)
-				basic_machine=m68k-unknown
-				basic_os=
-				;;
-			amigaos | amigados)
-				basic_machine=m68k-unknown
-				basic_os=amigaos
-				;;
-			amigaunix | amix)
-				basic_machine=m68k-unknown
-				basic_os=sysv4
-				;;
-			apollo68)
-				basic_machine=m68k-apollo
-				basic_os=sysv
-				;;
-			apollo68bsd)
-				basic_machine=m68k-apollo
-				basic_os=bsd
-				;;
-			aros)
-				basic_machine=i386-pc
-				basic_os=aros
-				;;
-			aux)
-				basic_machine=m68k-apple
-				basic_os=aux
-				;;
-			balance)
-				basic_machine=ns32k-sequent
-				basic_os=dynix
-				;;
-			blackfin)
-				basic_machine=bfin-unknown
-				basic_os=linux
-				;;
-			cegcc)
-				basic_machine=arm-unknown
-				basic_os=cegcc
-				;;
-			convex-c1)
-				basic_machine=c1-convex
-				basic_os=bsd
-				;;
-			convex-c2)
-				basic_machine=c2-convex
-				basic_os=bsd
-				;;
-			convex-c32)
-				basic_machine=c32-convex
-				basic_os=bsd
-				;;
-			convex-c34)
-				basic_machine=c34-convex
-				basic_os=bsd
-				;;
-			convex-c38)
-				basic_machine=c38-convex
-				basic_os=bsd
-				;;
-			cray)
-				basic_machine=j90-cray
-				basic_os=unicos
-				;;
-			crds | unos)
-				basic_machine=m68k-crds
-				basic_os=
-				;;
-			da30)
-				basic_machine=m68k-da30
-				basic_os=
-				;;
-			decstation | pmax | pmin | dec3100 | decstatn)
-				basic_machine=mips-dec
-				basic_os=
-				;;
-			delta88)
-				basic_machine=m88k-motorola
-				basic_os=sysv3
-				;;
-			dicos)
-				basic_machine=i686-pc
-				basic_os=dicos
-				;;
-			djgpp)
-				basic_machine=i586-pc
-				basic_os=msdosdjgpp
-				;;
-			ebmon29k)
-				basic_machine=a29k-amd
-				basic_os=ebmon
-				;;
-			es1800 | OSE68k | ose68k | ose | OSE)
-				basic_machine=m68k-ericsson
-				basic_os=ose
-				;;
-			gmicro)
-				basic_machine=tron-gmicro
-				basic_os=sysv
-				;;
-			go32)
-				basic_machine=i386-pc
-				basic_os=go32
-				;;
-			h8300hms)
-				basic_machine=h8300-hitachi
-				basic_os=hms
-				;;
-			h8300xray)
-				basic_machine=h8300-hitachi
-				basic_os=xray
-				;;
-			h8500hms)
-				basic_machine=h8500-hitachi
-				basic_os=hms
-				;;
-			harris)
-				basic_machine=m88k-harris
-				basic_os=sysv3
-				;;
-			hp300 | hp300hpux)
-				basic_machine=m68k-hp
-				basic_os=hpux
-				;;
-			hp300bsd)
-				basic_machine=m68k-hp
-				basic_os=bsd
-				;;
-			hppaosf)
-				basic_machine=hppa1.1-hp
-				basic_os=osf
-				;;
-			hppro)
-				basic_machine=hppa1.1-hp
-				basic_os=proelf
-				;;
-			i386mach)
-				basic_machine=i386-mach
-				basic_os=mach
-				;;
-			isi68 | isi)
-				basic_machine=m68k-isi
-				basic_os=sysv
-				;;
-			m68knommu)
-				basic_machine=m68k-unknown
-				basic_os=linux
-				;;
-			magnum | m3230)
-				basic_machine=mips-mips
-				basic_os=sysv
-				;;
-			merlin)
-				basic_machine=ns32k-utek
-				basic_os=sysv
-				;;
-			mingw64)
-				basic_machine=x86_64-pc
-				basic_os=mingw64
-				;;
-			mingw32)
-				basic_machine=i686-pc
-				basic_os=mingw32
-				;;
-			mingw32ce)
-				basic_machine=arm-unknown
-				basic_os=mingw32ce
-				;;
-			monitor)
-				basic_machine=m68k-rom68k
-				basic_os=coff
-				;;
-			morphos)
-				basic_machine=powerpc-unknown
-				basic_os=morphos
-				;;
-			moxiebox)
-				basic_machine=moxie-unknown
-				basic_os=moxiebox
-				;;
-			msdos)
-				basic_machine=i386-pc
-				basic_os=msdos
-				;;
-			msys)
-				basic_machine=i686-pc
-				basic_os=msys
-				;;
-			mvs)
-				basic_machine=i370-ibm
-				basic_os=mvs
-				;;
-			nacl)
-				basic_machine=le32-unknown
-				basic_os=nacl
-				;;
-			ncr3000)
-				basic_machine=i486-ncr
-				basic_os=sysv4
-				;;
-			netbsd386)
-				basic_machine=i386-pc
-				basic_os=netbsd
-				;;
-			netwinder)
-				basic_machine=armv4l-rebel
-				basic_os=linux
-				;;
-			news | news700 | news800 | news900)
-				basic_machine=m68k-sony
-				basic_os=newsos
-				;;
-			news1000)
-				basic_machine=m68030-sony
-				basic_os=newsos
-				;;
-			necv70)
-				basic_machine=v70-nec
-				basic_os=sysv
-				;;
-			nh3000)
-				basic_machine=m68k-harris
-				basic_os=cxux
-				;;
-			nh[45]000)
-				basic_machine=m88k-harris
-				basic_os=cxux
-				;;
-			nindy960)
-				basic_machine=i960-intel
-				basic_os=nindy
-				;;
-			mon960)
-				basic_machine=i960-intel
-				basic_os=mon960
-				;;
-			nonstopux)
-				basic_machine=mips-compaq
-				basic_os=nonstopux
-				;;
-			os400)
-				basic_machine=powerpc-ibm
-				basic_os=os400
-				;;
-			OSE68000 | ose68000)
-				basic_machine=m68000-ericsson
-				basic_os=ose
-				;;
-			os68k)
-				basic_machine=m68k-none
-				basic_os=os68k
-				;;
-			paragon)
-				basic_machine=i860-intel
-				basic_os=osf
-				;;
-			parisc)
-				basic_machine=hppa-unknown
-				basic_os=linux
-				;;
-			psp)
-				basic_machine=mipsallegrexel-sony
-				basic_os=psp
-				;;
-			pw32)
-				basic_machine=i586-unknown
-				basic_os=pw32
-				;;
-			rdos | rdos64)
-				basic_machine=x86_64-pc
-				basic_os=rdos
-				;;
-			rdos32)
-				basic_machine=i386-pc
-				basic_os=rdos
-				;;
-			rom68k)
-				basic_machine=m68k-rom68k
-				basic_os=coff
-				;;
-			sa29200)
-				basic_machine=a29k-amd
-				basic_os=udi
-				;;
-			sei)
-				basic_machine=mips-sei
-				basic_os=seiux
-				;;
-			sequent)
-				basic_machine=i386-sequent
-				basic_os=
-				;;
-			sps7)
-				basic_machine=m68k-bull
-				basic_os=sysv2
-				;;
-			st2000)
-				basic_machine=m68k-tandem
-				basic_os=
-				;;
-			stratus)
-				basic_machine=i860-stratus
-				basic_os=sysv4
-				;;
-			sun2)
-				basic_machine=m68000-sun
-				basic_os=
-				;;
-			sun2os3)
-				basic_machine=m68000-sun
-				basic_os=sunos3
-				;;
-			sun2os4)
-				basic_machine=m68000-sun
-				basic_os=sunos4
-				;;
-			sun3)
-				basic_machine=m68k-sun
-				basic_os=
-				;;
-			sun3os3)
-				basic_machine=m68k-sun
-				basic_os=sunos3
-				;;
-			sun3os4)
-				basic_machine=m68k-sun
-				basic_os=sunos4
-				;;
-			sun4)
-				basic_machine=sparc-sun
-				basic_os=
-				;;
-			sun4os3)
-				basic_machine=sparc-sun
-				basic_os=sunos3
-				;;
-			sun4os4)
-				basic_machine=sparc-sun
-				basic_os=sunos4
-				;;
-			sun4sol2)
-				basic_machine=sparc-sun
-				basic_os=solaris2
-				;;
-			sun386 | sun386i | roadrunner)
-				basic_machine=i386-sun
-				basic_os=
-				;;
-			sv1)
-				basic_machine=sv1-cray
-				basic_os=unicos
-				;;
-			symmetry)
-				basic_machine=i386-sequent
-				basic_os=dynix
-				;;
-			t3e)
-				basic_machine=alphaev5-cray
-				basic_os=unicos
-				;;
-			t90)
-				basic_machine=t90-cray
-				basic_os=unicos
-				;;
-			toad1)
-				basic_machine=pdp10-xkl
-				basic_os=tops20
-				;;
-			tpf)
-				basic_machine=s390x-ibm
-				basic_os=tpf
-				;;
-			udi29k)
-				basic_machine=a29k-amd
-				basic_os=udi
-				;;
-			ultra3)
-				basic_machine=a29k-nyu
-				basic_os=sym1
-				;;
-			v810 | necv810)
-				basic_machine=v810-nec
-				basic_os=none
-				;;
-			vaxv)
-				basic_machine=vax-dec
-				basic_os=sysv
-				;;
-			vms)
-				basic_machine=vax-dec
-				basic_os=vms
-				;;
-			vsta)
-				basic_machine=i386-pc
-				basic_os=vsta
-				;;
-			vxworks960)
-				basic_machine=i960-wrs
-				basic_os=vxworks
-				;;
-			vxworks68)
-				basic_machine=m68k-wrs
-				basic_os=vxworks
-				;;
-			vxworks29k)
-				basic_machine=a29k-wrs
-				basic_os=vxworks
-				;;
-			xbox)
-				basic_machine=i686-pc
-				basic_os=mingw32
-				;;
-			ymp)
-				basic_machine=ymp-cray
-				basic_os=unicos
-				;;
-			*)
-				basic_machine=$1
-				basic_os=
-				;;
-		esac
-		;;
-esac
-
-# Decode 1-component or ad-hoc basic machines
-case $basic_machine in
-	# Here we handle the default manufacturer of certain CPU types.  It is in
-	# some cases the only manufacturer, in others, it is the most popular.
-	w89k)
-		cpu=hppa1.1
-		vendor=winbond
-		;;
-	op50n)
-		cpu=hppa1.1
-		vendor=oki
-		;;
-	op60c)
-		cpu=hppa1.1
-		vendor=oki
-		;;
-	ibm*)
-		cpu=i370
-		vendor=ibm
-		;;
-	orion105)
-		cpu=clipper
-		vendor=highlevel
-		;;
-	mac | mpw | mac-mpw)
-		cpu=m68k
-		vendor=apple
-		;;
-	pmac | pmac-mpw)
-		cpu=powerpc
-		vendor=apple
-		;;
-
-	# Recognize the various machine names and aliases which stand
-	# for a CPU type and a company and sometimes even an OS.
-	3b1 | 7300 | 7300-att | att-7300 | pc7300 | safari | unixpc)
-		cpu=m68000
-		vendor=att
-		;;
-	3b*)
-		cpu=we32k
-		vendor=att
-		;;
-	bluegene*)
-		cpu=powerpc
-		vendor=ibm
-		basic_os=cnk
-		;;
-	decsystem10* | dec10*)
-		cpu=pdp10
-		vendor=dec
-		basic_os=tops10
-		;;
-	decsystem20* | dec20*)
-		cpu=pdp10
-		vendor=dec
-		basic_os=tops20
-		;;
-	delta | 3300 | motorola-3300 | motorola-delta \
-	      | 3300-motorola | delta-motorola)
-		cpu=m68k
-		vendor=motorola
-		;;
-	dpx2*)
-		cpu=m68k
-		vendor=bull
-		basic_os=sysv3
-		;;
-	encore | umax | mmax)
-		cpu=ns32k
-		vendor=encore
-		;;
-	elxsi)
-		cpu=elxsi
-		vendor=elxsi
-		basic_os=${basic_os:-bsd}
-		;;
-	fx2800)
-		cpu=i860
-		vendor=alliant
-		;;
-	genix)
-		cpu=ns32k
-		vendor=ns
-		;;
-	h3050r* | hiux*)
-		cpu=hppa1.1
-		vendor=hitachi
-		basic_os=hiuxwe2
-		;;
-	hp3k9[0-9][0-9] | hp9[0-9][0-9])
-		cpu=hppa1.0
-		vendor=hp
-		;;
-	hp9k2[0-9][0-9] | hp9k31[0-9])
-		cpu=m68000
-		vendor=hp
-		;;
-	hp9k3[2-9][0-9])
-		cpu=m68k
-		vendor=hp
-		;;
-	hp9k6[0-9][0-9] | hp6[0-9][0-9])
-		cpu=hppa1.0
-		vendor=hp
-		;;
-	hp9k7[0-79][0-9] | hp7[0-79][0-9])
-		cpu=hppa1.1
-		vendor=hp
-		;;
-	hp9k78[0-9] | hp78[0-9])
-		# FIXME: really hppa2.0-hp
-		cpu=hppa1.1
-		vendor=hp
-		;;
-	hp9k8[67]1 | hp8[67]1 | hp9k80[24] | hp80[24] | hp9k8[78]9 | hp8[78]9 | hp9k893 | hp893)
-		# FIXME: really hppa2.0-hp
-		cpu=hppa1.1
-		vendor=hp
-		;;
-	hp9k8[0-9][13679] | hp8[0-9][13679])
-		cpu=hppa1.1
-		vendor=hp
-		;;
-	hp9k8[0-9][0-9] | hp8[0-9][0-9])
-		cpu=hppa1.0
-		vendor=hp
-		;;
-	i*86v32)
-		cpu=$(echo "$1" | sed -e 's/86.*/86/')
-		vendor=pc
-		basic_os=sysv32
-		;;
-	i*86v4*)
-		cpu=$(echo "$1" | sed -e 's/86.*/86/')
-		vendor=pc
-		basic_os=sysv4
-		;;
-	i*86v)
-		cpu=$(echo "$1" | sed -e 's/86.*/86/')
-		vendor=pc
-		basic_os=sysv
-		;;
-	i*86sol2)
-		cpu=$(echo "$1" | sed -e 's/86.*/86/')
-		vendor=pc
-		basic_os=solaris2
-		;;
-	j90 | j90-cray)
-		cpu=j90
-		vendor=cray
-		basic_os=${basic_os:-unicos}
-		;;
-	iris | iris4d)
-		cpu=mips
-		vendor=sgi
-		case $basic_os in
-		    irix*)
-			;;
-		    *)
-			basic_os=irix4
-			;;
-		esac
-		;;
-	miniframe)
-		cpu=m68000
-		vendor=convergent
-		;;
-	*mint | mint[0-9]* | *MiNT | *MiNT[0-9]*)
-		cpu=m68k
-		vendor=atari
-		basic_os=mint
-		;;
-	news-3600 | risc-news)
-		cpu=mips
-		vendor=sony
-		basic_os=newsos
-		;;
-	next | m*-next)
-		cpu=m68k
-		vendor=next
-		case $basic_os in
-		    openstep*)
-		        ;;
-		    nextstep*)
-			;;
-		    ns2*)
-		      basic_os=nextstep2
-			;;
-		    *)
-		      basic_os=nextstep3
-			;;
-		esac
-		;;
-	np1)
-		cpu=np1
-		vendor=gould
-		;;
-	op50n-* | op60c-*)
-		cpu=hppa1.1
-		vendor=oki
-		basic_os=proelf
-		;;
-	pa-hitachi)
-		cpu=hppa1.1
-		vendor=hitachi
-		basic_os=hiuxwe2
-		;;
-	pbd)
-		cpu=sparc
-		vendor=tti
-		;;
-	pbb)
-		cpu=m68k
-		vendor=tti
-		;;
-	pc532)
-		cpu=ns32k
-		vendor=pc532
-		;;
-	pn)
-		cpu=pn
-		vendor=gould
-		;;
-	power)
-		cpu=power
-		vendor=ibm
-		;;
-	ps2)
-		cpu=i386
-		vendor=ibm
-		;;
-	rm[46]00)
-		cpu=mips
-		vendor=siemens
-		;;
-	rtpc | rtpc-*)
-		cpu=romp
-		vendor=ibm
-		;;
-	sde)
-		cpu=mipsisa32
-		vendor=sde
-		basic_os=${basic_os:-elf}
-		;;
-	simso-wrs)
-		cpu=sparclite
-		vendor=wrs
-		basic_os=vxworks
-		;;
-	tower | tower-32)
-		cpu=m68k
-		vendor=ncr
-		;;
-	vpp*|vx|vx-*)
-		cpu=f301
-		vendor=fujitsu
-		;;
-	w65)
-		cpu=w65
-		vendor=wdc
-		;;
-	w89k-*)
-		cpu=hppa1.1
-		vendor=winbond
-		basic_os=proelf
-		;;
-	none)
-		cpu=none
-		vendor=none
-		;;
-	leon|leon[3-9])
-		cpu=sparc
-		vendor=$basic_machine
-		;;
-	leon-*|leon[3-9]-*)
-		cpu=sparc
-		vendor=$(echo "$basic_machine" | sed 's/-.*//')
-		;;
-
-	*-*)
-		# shellcheck disable=SC2162
-		IFS="-" read cpu vendor <<EOF
-$basic_machine
-EOF
-		;;
-	# We use `pc' rather than `unknown'
-	# because (1) that's what they normally are, and
-	# (2) the word "unknown" tends to confuse beginning users.
-	i*86 | x86_64)
-		cpu=$basic_machine
-		vendor=pc
-		;;
-	# These rules are duplicated from below for sake of the special case above;
-	# i.e. things that normalized to x86 arches should also default to "pc"
-	pc98)
-		cpu=i386
-		vendor=pc
-		;;
-	x64 | amd64)
-		cpu=x86_64
-		vendor=pc
-		;;
-	# Recognize the basic CPU types without company name.
-	*)
-		cpu=$basic_machine
-		vendor=unknown
-		;;
-esac
-
-unset -v basic_machine
-
-# Decode basic machines in the full and proper CPU-Company form.
-case $cpu-$vendor in
-	# Here we handle the default manufacturer of certain CPU types in canonical form. It is in
-	# some cases the only manufacturer, in others, it is the most popular.
-	craynv-unknown)
-		vendor=cray
-		basic_os=${basic_os:-unicosmp}
-		;;
-	c90-unknown | c90-cray)
-		vendor=cray
-		basic_os=${Basic_os:-unicos}
-		;;
-	fx80-unknown)
-		vendor=alliant
-		;;
-	romp-unknown)
-		vendor=ibm
-		;;
-	mmix-unknown)
-		vendor=knuth
-		;;
-	microblaze-unknown | microblazeel-unknown)
-		vendor=xilinx
-		;;
-	rs6000-unknown)
-		vendor=ibm
-		;;
-	vax-unknown)
-		vendor=dec
-		;;
-	pdp11-unknown)
-		vendor=dec
-		;;
-	we32k-unknown)
-		vendor=att
-		;;
-	cydra-unknown)
-		vendor=cydrome
-		;;
-	i370-ibm*)
-		vendor=ibm
-		;;
-	orion-unknown)
-		vendor=highlevel
-		;;
-	xps-unknown | xps100-unknown)
-		cpu=xps100
-		vendor=honeywell
-		;;
-
-	# Here we normalize CPU types with a missing or matching vendor
-	dpx20-unknown | dpx20-bull)
-		cpu=rs6000
-		vendor=bull
-		basic_os=${basic_os:-bosx}
-		;;
-
-	# Here we normalize CPU types irrespective of the vendor
-	amd64-*)
-		cpu=x86_64
-		;;
-	blackfin-*)
-		cpu=bfin
-		basic_os=linux
-		;;
-	c54x-*)
-		cpu=tic54x
-		;;
-	c55x-*)
-		cpu=tic55x
-		;;
-	c6x-*)
-		cpu=tic6x
-		;;
-	e500v[12]-*)
-		cpu=powerpc
-		basic_os=${basic_os}"spe"
-		;;
-	mips3*-*)
-		cpu=mips64
-		;;
-	ms1-*)
-		cpu=mt
-		;;
-	m68knommu-*)
-		cpu=m68k
-		basic_os=linux
-		;;
-	m9s12z-* | m68hcs12z-* | hcs12z-* | s12z-*)
-		cpu=s12z
-		;;
-	openrisc-*)
-		cpu=or32
-		;;
-	parisc-*)
-		cpu=hppa
-		basic_os=linux
-		;;
-	pentium-* | p5-* | k5-* | k6-* | nexgen-* | viac3-*)
-		cpu=i586
-		;;
-	pentiumpro-* | p6-* | 6x86-* | athlon-* | athalon_*-*)
-		cpu=i686
-		;;
-	pentiumii-* | pentium2-* | pentiumiii-* | pentium3-*)
-		cpu=i686
-		;;
-	pentium4-*)
-		cpu=i786
-		;;
-	pc98-*)
-		cpu=i386
-		;;
-	ppc-* | ppcbe-*)
-		cpu=powerpc
-		;;
-	ppcle-* | powerpclittle-*)
-		cpu=powerpcle
-		;;
-	ppc64-*)
-		cpu=powerpc64
-		;;
-	ppc64le-* | powerpc64little-*)
-		cpu=powerpc64le
-		;;
-	sb1-*)
-		cpu=mipsisa64sb1
-		;;
-	sb1el-*)
-		cpu=mipsisa64sb1el
-		;;
-	sh5e[lb]-*)
-		cpu=$(echo "$cpu" | sed 's/^\(sh.\)e\(.\)$/\1\2e/')
-		;;
-	spur-*)
-		cpu=spur
-		;;
-	strongarm-* | thumb-*)
-		cpu=arm
-		;;
-	tx39-*)
-		cpu=mipstx39
-		;;
-	tx39el-*)
-		cpu=mipstx39el
-		;;
-	x64-*)
-		cpu=x86_64
-		;;
-	xscale-* | xscalee[bl]-*)
-		cpu=$(echo "$cpu" | sed 's/^xscale/arm/')
-		;;
-	arm64-*)
-		cpu=aarch64
-		;;
-
-	# Recognize the canonical CPU Types that limit and/or modify the
-	# company names they are paired with.
-	cr16-*)
-		basic_os=${basic_os:-elf}
-		;;
-	crisv32-* | etraxfs*-*)
-		cpu=crisv32
-		vendor=axis
-		;;
-	cris-* | etrax*-*)
-		cpu=cris
-		vendor=axis
-		;;
-	crx-*)
-		basic_os=${basic_os:-elf}
-		;;
-	neo-tandem)
-		cpu=neo
-		vendor=tandem
-		;;
-	nse-tandem)
-		cpu=nse
-		vendor=tandem
-		;;
-	nsr-tandem)
-		cpu=nsr
-		vendor=tandem
-		;;
-	nsv-tandem)
-		cpu=nsv
-		vendor=tandem
-		;;
-	nsx-tandem)
-		cpu=nsx
-		vendor=tandem
-		;;
-	mipsallegrexel-sony)
-		cpu=mipsallegrexel
-		vendor=sony
-		;;
-	tile*-*)
-		basic_os=${basic_os:-linux-gnu}
-		;;
-
-	*)
-		# Recognize the canonical CPU types that are allowed with any
-		# company name.
-		case $cpu in
-			1750a | 580 \
-			| a29k \
-			| aarch64 | aarch64_be \
-			| abacus \
-			| alpha | alphaev[4-8] | alphaev56 | alphaev6[78] \
-			| alpha64 | alpha64ev[4-8] | alpha64ev56 | alpha64ev6[78] \
-			| alphapca5[67] | alpha64pca5[67] \
-			| am33_2.0 \
-			| amdgcn \
-			| arc | arceb \
-			| arm | arm[lb]e | arme[lb] | armv* \
-			| avr | avr32 \
-			| asmjs \
-			| ba \
-			| be32 | be64 \
-			| bfin | bpf | bs2000 \
-			| c[123]* | c30 | [cjt]90 | c4x \
-			| c8051 | clipper | craynv | csky | cydra \
-			| d10v | d30v | dlx | dsp16xx \
-			| e2k | elxsi | epiphany \
-			| f30[01] | f700 | fido | fr30 | frv | ft32 | fx80 \
-			| h8300 | h8500 \
-			| hppa | hppa1.[01] | hppa2.0 | hppa2.0[nw] | hppa64 \
-			| hexagon \
-			| i370 | i*86 | i860 | i960 | ia16 | ia64 \
-			| ip2k | iq2000 \
-			| k1om \
-			| le32 | le64 \
-			| lm32 \
-			| loongarch32 | loongarch64 | loongarchx32 \
-			| m32c | m32r | m32rle \
-			| m5200 | m68000 | m680[012346]0 | m68360 | m683?2 | m68k \
-			| m6811 | m68hc11 | m6812 | m68hc12 | m68hcs12x \
-			| m88110 | m88k | maxq | mb | mcore | mep | metag \
-			| microblaze | microblazeel \
-			| mips | mipsbe | mipseb | mipsel | mipsle \
-			| mips16 \
-			| mips64 | mips64eb | mips64el \
-			| mips64octeon | mips64octeonel \
-			| mips64orion | mips64orionel \
-			| mips64r5900 | mips64r5900el \
-			| mips64vr | mips64vrel \
-			| mips64vr4100 | mips64vr4100el \
-			| mips64vr4300 | mips64vr4300el \
-			| mips64vr5000 | mips64vr5000el \
-			| mips64vr5900 | mips64vr5900el \
-			| mipsisa32 | mipsisa32el \
-			| mipsisa32r2 | mipsisa32r2el \
-			| mipsisa32r6 | mipsisa32r6el \
-			| mipsisa64 | mipsisa64el \
-			| mipsisa64r2 | mipsisa64r2el \
-			| mipsisa64r6 | mipsisa64r6el \
-			| mipsisa64sb1 | mipsisa64sb1el \
-			| mipsisa64sr71k | mipsisa64sr71kel \
-			| mipsr5900 | mipsr5900el \
-			| mipstx39 | mipstx39el \
-			| mmix \
-			| mn10200 | mn10300 \
-			| moxie \
-			| mt \
-			| msp430 \
-			| nds32 | nds32le | nds32be \
-			| nfp \
-			| nios | nios2 | nios2eb | nios2el \
-			| none | np1 | ns16k | ns32k | nvptx \
-			| open8 \
-			| or1k* \
-			| or32 \
-			| orion \
-			| picochip \
-			| pdp10 | pdp11 | pj | pjl | pn | power \
-			| powerpc | powerpc64 | powerpc64le | powerpcle | powerpcspe \
-			| pru \
-			| pyramid \
-			| riscv | riscv32 | riscv32be | riscv64 | riscv64be \
-			| rl78 | romp | rs6000 | rx \
-			| s390 | s390x \
-			| score \
-			| sh | shl \
-			| sh[1234] | sh[24]a | sh[24]ae[lb] | sh[23]e | she[lb] | sh[lb]e \
-			| sh[1234]e[lb] |  sh[12345][lb]e | sh[23]ele | sh64 | sh64le \
-			| sparc | sparc64 | sparc64b | sparc64v | sparc86x | sparclet \
-			| sparclite \
-			| sparcv8 | sparcv9 | sparcv9b | sparcv9v | sv1 | sx* \
-			| spu \
-			| tahoe \
-			| thumbv7* \
-			| tic30 | tic4x | tic54x | tic55x | tic6x | tic80 \
-			| tron \
-			| ubicom32 \
-			| v70 | v850 | v850e | v850e1 | v850es | v850e2 | v850e2v3 \
-			| vax \
-			| visium \
-			| w65 \
-			| wasm32 | wasm64 \
-			| we32k \
-			| x86 | x86_64 | xc16x | xgate | xps100 \
-			| xstormy16 | xtensa* \
-			| ymp \
-			| z8k | z80)
-				;;
-
-			*)
-				echo Invalid configuration \`"$1"\': machine \`"$cpu-$vendor"\' not recognized 1>&2
-				exit 1
-				;;
-		esac
-		;;
-esac
-
-# Here we canonicalize certain aliases for manufacturers.
-case $vendor in
-	digital*)
-		vendor=dec
-		;;
-	commodore*)
-		vendor=cbm
-		;;
-	*)
-		;;
-esac
-
-# Decode manufacturer-specific aliases for certain operating systems.
-
-if test x$basic_os != x
-then
-
-# First recognize some ad-hoc caes, or perhaps split kernel-os, or else just
-# set os.
-case $basic_os in
-	gnu/linux*)
-		kernel=linux
-		os=$(echo $basic_os | sed -e 's|gnu/linux|gnu|')
-		;;
-	os2-emx)
-		kernel=os2
-		os=$(echo $basic_os | sed -e 's|os2-emx|emx|')
-		;;
-	nto-qnx*)
-		kernel=nto
-		os=$(echo $basic_os | sed -e 's|nto-qnx|qnx|')
-		;;
-	*-*)
-		# shellcheck disable=SC2162
-		IFS="-" read kernel os <<EOF
-$basic_os
-EOF
-		;;
-	# Default OS when just kernel was specified
-	nto*)
-		kernel=nto
-		os=$(echo $basic_os | sed -e 's|nto|qnx|')
-		;;
-	linux*)
-		kernel=linux
-		os=$(echo $basic_os | sed -e 's|linux|gnu|')
-		;;
-	*)
-		kernel=
-		os=$basic_os
-		;;
-esac
-
-# Now, normalize the OS (knowing we just have one component, it's not a kernel,
-# etc.)
-case $os in
-	# First match some system type aliases that might get confused
-	# with valid system types.
-	# solaris* is a basic system type, with this one exception.
-	auroraux)
-		os=auroraux
-		;;
-	bluegene*)
-		os=cnk
-		;;
-	solaris1 | solaris1.*)
-		os=$(echo $os | sed -e 's|solaris1|sunos4|')
-		;;
-	solaris)
-		os=solaris2
-		;;
-	unixware*)
-		os=sysv4.2uw
-		;;
-	# es1800 is here to avoid being matched by es* (a different OS)
-	es1800*)
-		os=ose
-		;;
-	# Some version numbers need modification
-	chorusos*)
-		os=chorusos
-		;;
-	isc)
-		os=isc2.2
-		;;
-	sco6)
-		os=sco5v6
-		;;
-	sco5)
-		os=sco3.2v5
-		;;
-	sco4)
-		os=sco3.2v4
-		;;
-	sco3.2.[4-9]*)
-		os=$(echo $os | sed -e 's/sco3.2./sco3.2v/')
-		;;
-	sco*v* | scout)
-		# Don't match below
-		;;
-	sco*)
-		os=sco3.2v2
-		;;
-	psos*)
-		os=psos
-		;;
-	qnx*)
-		os=qnx
-		;;
-	hiux*)
-		os=hiuxwe2
-		;;
-	lynx*178)
-		os=lynxos178
-		;;
-	lynx*5)
-		os=lynxos5
-		;;
-	lynxos*)
-		# don't get caught up in next wildcard
-		;;
-	lynx*)
-		os=lynxos
-		;;
-	mac[0-9]*)
-		os=$(echo "$os" | sed -e 's|mac|macos|')
-		;;
-	opened*)
-		os=openedition
-		;;
-	os400*)
-		os=os400
-		;;
-	sunos5*)
-		os=$(echo "$os" | sed -e 's|sunos5|solaris2|')
-		;;
-	sunos6*)
-		os=$(echo "$os" | sed -e 's|sunos6|solaris3|')
-		;;
-	wince*)
-		os=wince
-		;;
-	utek*)
-		os=bsd
-		;;
-	dynix*)
-		os=bsd
-		;;
-	acis*)
-		os=aos
-		;;
-	atheos*)
-		os=atheos
-		;;
-	syllable*)
-		os=syllable
-		;;
-	386bsd)
-		os=bsd
-		;;
-	ctix* | uts*)
-		os=sysv
-		;;
-	nova*)
-		os=rtmk-nova
-		;;
-	ns2)
-		os=nextstep2
-		;;
-	# Preserve the version number of sinix5.
-	sinix5.*)
-		os=$(echo $os | sed -e 's|sinix|sysv|')
-		;;
-	sinix*)
-		os=sysv4
-		;;
-	tpf*)
-		os=tpf
-		;;
-	triton*)
-		os=sysv3
-		;;
-	oss*)
-		os=sysv3
-		;;
-	svr4*)
-		os=sysv4
-		;;
-	svr3)
-		os=sysv3
-		;;
-	sysvr4)
-		os=sysv4
-		;;
-	ose*)
-		os=ose
-		;;
-	*mint | mint[0-9]* | *MiNT | MiNT[0-9]*)
-		os=mint
-		;;
-	dicos*)
-		os=dicos
-		;;
-	pikeos*)
-		# Until real need of OS specific support for
-		# particular features comes up, bare metal
-		# configurations are quite functional.
-		case $cpu in
-		    arm*)
-			os=eabi
-			;;
-		    *)
-			os=elf
-			;;
-		esac
-		;;
-	*)
-		# No normalization, but not necessarily accepted, that comes below.
-		;;
-esac
-
-else
-
-# Here we handle the default operating systems that come with various machines.
-# The value should be what the vendor currently ships out the door with their
-# machine or put another way, the most popular os provided with the machine.
-
-# Note that if you're going to try to match "-MANUFACTURER" here (say,
-# "-sun"), then you have to tell the case statement up towards the top
-# that MANUFACTURER isn't an operating system.  Otherwise, code above
-# will signal an error saying that MANUFACTURER isn't an operating
-# system, and we'll never get to this point.
-
-kernel=
-case $cpu-$vendor in
-	score-*)
-		os=elf
-		;;
-	spu-*)
-		os=elf
-		;;
-	*-acorn)
-		os=riscix1.2
-		;;
-	arm*-rebel)
-		kernel=linux
-		os=gnu
-		;;
-	arm*-semi)
-		os=aout
-		;;
-	c4x-* | tic4x-*)
-		os=coff
-		;;
-	c8051-*)
-		os=elf
-		;;
-	clipper-intergraph)
-		os=clix
-		;;
-	hexagon-*)
-		os=elf
-		;;
-	tic54x-*)
-		os=coff
-		;;
-	tic55x-*)
-		os=coff
-		;;
-	tic6x-*)
-		os=coff
-		;;
-	# This must come before the *-dec entry.
-	pdp10-*)
-		os=tops20
-		;;
-	pdp11-*)
-		os=none
-		;;
-	*-dec | vax-*)
-		os=ultrix4.2
-		;;
-	m68*-apollo)
-		os=domain
-		;;
-	i386-sun)
-		os=sunos4.0.2
-		;;
-	m68000-sun)
-		os=sunos3
-		;;
-	m68*-cisco)
-		os=aout
-		;;
-	mep-*)
-		os=elf
-		;;
-	mips*-cisco)
-		os=elf
-		;;
-	mips*-*)
-		os=elf
-		;;
-	or32-*)
-		os=coff
-		;;
-	*-tti)	# must be before sparc entry or we get the wrong os.
-		os=sysv3
-		;;
-	sparc-* | *-sun)
-		os=sunos4.1.1
-		;;
-	pru-*)
-		os=elf
-		;;
-	*-be)
-		os=beos
-		;;
-	*-ibm)
-		os=aix
-		;;
-	*-knuth)
-		os=mmixware
-		;;
-	*-wec)
-		os=proelf
-		;;
-	*-winbond)
-		os=proelf
-		;;
-	*-oki)
-		os=proelf
-		;;
-	*-hp)
-		os=hpux
-		;;
-	*-hitachi)
-		os=hiux
-		;;
-	i860-* | *-att | *-ncr | *-altos | *-motorola | *-convergent)
-		os=sysv
-		;;
-	*-cbm)
-		os=amigaos
-		;;
-	*-dg)
-		os=dgux
-		;;
-	*-dolphin)
-		os=sysv3
-		;;
-	m68k-ccur)
-		os=rtu
-		;;
-	m88k-omron*)
-		os=luna
-		;;
-	*-next)
-		os=nextstep
-		;;
-	*-sequent)
-		os=ptx
-		;;
-	*-crds)
-		os=unos
-		;;
-	*-ns)
-		os=genix
-		;;
-	i370-*)
-		os=mvs
-		;;
-	*-gould)
-		os=sysv
-		;;
-	*-highlevel)
-		os=bsd
-		;;
-	*-encore)
-		os=bsd
-		;;
-	*-sgi)
-		os=irix
-		;;
-	*-siemens)
-		os=sysv4
-		;;
-	*-masscomp)
-		os=rtu
-		;;
-	f30[01]-fujitsu | f700-fujitsu)
-		os=uxpv
-		;;
-	*-rom68k)
-		os=coff
-		;;
-	*-*bug)
-		os=coff
-		;;
-	*-apple)
-		os=macos
-		;;
-	*-atari*)
-		os=mint
-		;;
-	*-wrs)
-		os=vxworks
-		;;
-	*)
-		os=none
-		;;
-esac
-
-fi
-
-# Now, validate our (potentially fixed-up) OS.
-case $os in
-	# Sometimes we do "kernel-abi", so those need to count as OSes.
-	musl* | newlib* | uclibc*)
-		;;
-	# Likewise for "kernel-libc"
-	eabi | eabihf | gnueabi | gnueabihf)
-		;;
-	# Now accept the basic system types.
-	# The portable systems comes first.
-	# Each alternative MUST end in a * to match a version number.
-	gnu* | android* | bsd* | mach* | minix* | genix* | ultrix* | irix* \
-	     | *vms* | esix* | aix* | cnk* | sunos | sunos[34]* \
-	     | hpux* | unos* | osf* | luna* | dgux* | auroraux* | solaris* \
-	     | sym* |  plan9* | psp* | sim* | xray* | os68k* | v88r* \
-	     | hiux* | abug | nacl* | netware* | windows* \
-	     | os9* | macos* | osx* | ios* \
-	     | mpw* | magic* | mmixware* | mon960* | lnews* \
-	     | amigaos* | amigados* | msdos* | newsos* | unicos* | aof* \
-	     | aos* | aros* | cloudabi* | sortix* | twizzler* \
-	     | nindy* | vxsim* | vxworks* | ebmon* | hms* | mvs* \
-	     | clix* | riscos* | uniplus* | iris* | isc* | rtu* | xenix* \
-	     | mirbsd* | netbsd* | dicos* | openedition* | ose* \
-	     | bitrig* | openbsd* | solidbsd* | libertybsd* | os108* \
-	     | ekkobsd* | freebsd* | riscix* | lynxos* | os400* \
-	     | bosx* | nextstep* | cxux* | aout* | elf* | oabi* \
-	     | ptx* | coff* | ecoff* | winnt* | domain* | vsta* \
-	     | udi* | lites* | ieee* | go32* | aux* | hcos* \
-	     | chorusrdb* | cegcc* | glidix* \
-	     | cygwin* | msys* | pe* | moss* | proelf* | rtems* \
-	     | midipix* | mingw32* | mingw64* | mint* \
-	     | uxpv* | beos* | mpeix* | udk* | moxiebox* \
-	     | interix* | uwin* | mks* | rhapsody* | darwin* \
-	     | openstep* | oskit* | conix* | pw32* | nonstopux* \
-	     | storm-chaos* | tops10* | tenex* | tops20* | its* \
-	     | os2* | vos* | palmos* | uclinux* | nucleus* | morphos* \
-	     | scout* | superux* | sysv* | rtmk* | tpf* | windiss* \
-	     | powermax* | dnix* | nx6 | nx7 | sei* | dragonfly* \
-	     | skyos* | haiku* | rdos* | toppers* | drops* | es* \
-	     | onefs* | tirtos* | phoenix* | fuchsia* | redox* | bme* \
-	     | midnightbsd* | amdhsa* | unleashed* | emscripten* | wasi* \
-	     | nsk* | powerunix* | genode* | zvmoe* | qnx* | emx*)
-		;;
-	# This one is extra strict with allowed versions
-	sco3.2v2 | sco3.2v[4-9]* | sco5v6*)
-		# Don't forget version if it is 3.2v4 or newer.
-		;;
-	none)
-		;;
-	*)
-		echo Invalid configuration \`"$1"\': OS \`"$os"\' not recognized 1>&2
-		exit 1
-		;;
-esac
-
-# As a final step for OS-related things, validate the OS-kernel combination
-# (given a valid OS), if there is a kernel.
-case $kernel-$os in
-	linux-gnu* | linux-dietlibc* | linux-android* | linux-newlib* | linux-musl* | linux-uclibc* )
-		;;
-	uclinux-uclibc* )
-		;;
-	-dietlibc* | -newlib* | -musl* | -uclibc* )
-		# These are just libc implementations, not actual OSes, and thus
-		# require a kernel.
-		echo "Invalid configuration \`$1': libc \`$os' needs explicit kernel." 1>&2
-		exit 1
-		;;
-	kfreebsd*-gnu* | kopensolaris*-gnu*)
-		;;
-	nto-qnx*)
-		;;
-	os2-emx)
-		;;
-	*-eabi* | *-gnueabi*)
-		;;
-	-*)
-		# Blank kernel with real OS is always fine.
-		;;
-	*-*)
-		echo "Invalid configuration \`$1': Kernel \`$kernel' not known to work with OS \`$os'." 1>&2
-		exit 1
-		;;
-esac
-
-# Here we handle the case where we know the os, and the CPU type, but not the
-# manufacturer.  We pick the logical manufacturer.
-case $vendor in
-	unknown)
-		case $cpu-$os in
-			*-riscix*)
-				vendor=acorn
-				;;
-			*-sunos*)
-				vendor=sun
-				;;
-			*-cnk* | *-aix*)
-				vendor=ibm
-				;;
-			*-beos*)
-				vendor=be
-				;;
-			*-hpux*)
-				vendor=hp
-				;;
-			*-mpeix*)
-				vendor=hp
-				;;
-			*-hiux*)
-				vendor=hitachi
-				;;
-			*-unos*)
-				vendor=crds
-				;;
-			*-dgux*)
-				vendor=dg
-				;;
-			*-luna*)
-				vendor=omron
-				;;
-			*-genix*)
-				vendor=ns
-				;;
-			*-clix*)
-				vendor=intergraph
-				;;
-			*-mvs* | *-opened*)
-				vendor=ibm
-				;;
-			*-os400*)
-				vendor=ibm
-				;;
-			s390-* | s390x-*)
-				vendor=ibm
-				;;
-			*-ptx*)
-				vendor=sequent
-				;;
-			*-tpf*)
-				vendor=ibm
-				;;
-			*-vxsim* | *-vxworks* | *-windiss*)
-				vendor=wrs
-				;;
-			*-aux*)
-				vendor=apple
-				;;
-			*-hms*)
-				vendor=hitachi
-				;;
-			*-mpw* | *-macos*)
-				vendor=apple
-				;;
-			*-*mint | *-mint[0-9]* | *-*MiNT | *-MiNT[0-9]*)
-				vendor=atari
-				;;
-			*-vos*)
-				vendor=stratus
-				;;
-		esac
-		;;
-esac
-
-echo "$cpu-$vendor-${kernel:+$kernel-}$os"
-exit
-
-# Local variables:
-# eval: (add-hook 'before-save-hook 'time-stamp)
-# time-stamp-start: "timestamp='"
-# time-stamp-format: "%:y-%02m-%02d"
-# time-stamp-end: "'"
-# End:
diff --git a/configure.in b/configure.in
deleted file mode 100644
index 01f51feb4..000000000
--- a/configure.in
+++ /dev/null
@@ -1,737 +0,0 @@
-#
-# Process this file with autoconf to produce a configure script
-#
-AC_PREREQ(2.59)
-AC_INIT(tmLQCD, 6.0.2, curbach@gmx.de)
-AC_CONFIG_HEADER(include/tmlqcd_config_internal.h)
-AC_CONFIG_SRCDIR([hmc_tm.c])
-AC_CANONICAL_HOST()
-AC_PREFIX_DEFAULT($HOME)
-AC_ARG_PROGRAM
-
-if test "$host_vendor" = "cray"; then
-  ac_cv_c_bigendian=yes
-fi
-
-AC_PROG_CC
-AC_PROG_CC_C99
-dnl AC_PROG_CC_STDC
-AC_C_CONST
-AC_C_INLINE
-AC_C_RESTRICT
-AC_F77_LIBRARY_LDFLAGS
-AC_CHECK_TOOL(AR, ar, [ar])
-LIBS="$LIBS $FLIBS -lm"
-
-AC_PROG_LEX
-dnl AC_PROG_LEX sets $LEX to ":" if neither lex nor flex are found! 
-if test "$LEX" = ":"; then
-  AC_MSG_ERROR([(F)LEX is required for building read_input.c. Please install it and run configure again.])
-fi
-
-AC_PROG_MAKE_SET
-AC_PROG_RANLIB
-AC_CHECK_PROG(CCDEP, gcc, "gcc", "$CC")
-AC_CHECK_PROG(CXXDEP, g++, "g++", "$CXX")
-#(endian="", AC_DEFINE(LITTLE_ENDIAN,1,The endian of the architechture))
-
-# AC_PROG_FC([ifort gfortran])
-# AC_FC_FUNC(testfunc, )
-
-LDFLAGS="$LDFLAGS -L\${HOME}/lib -L\${top_builddir}/lib"
-CCLD=${CC}
-
-# compilation in operator is slowest so we do it first, saves time in parallel compiles
-USESUBDIRS="operator linalg solver monomial buffers cu io meas xchange init rational smearing wrapper"
-
-AC_CHECK_HEADERS([stdint.h],
-[ dnl for inttypes.h and stdint.h for uint_xxx types
-  dnl if successful check for the actual types too
-  AC_CHECK_TYPES([uint16_t, uint32_t, uint64_t],
-                 [],
-                 [AC_MSG_ERROR([stdint.h found but either uint16_t, uint32_t or uint64_t not found]) ]
-                )
-],
-[
-  dnl no inttypes.h or stdint.h found check common unsigned types
-  dnl for sizes and make appropriate decisions in the lime_fixed_types.h file
-  AC_CHECK_SIZEOF(unsigned char)
-  AC_CHECK_SIZEOF(unsigned short)
-  AC_CHECK_SIZEOF(unsigned int)
-  AC_CHECK_SIZEOF(unsigned long)
-  AC_CHECK_SIZEOF(unsigned long long)
-]
-)
-
-AC_MSG_CHECKING(where to find lime)
-AC_ARG_WITH(limedir,
-  AS_HELP_STRING([--with-limedir[=dir]], [search lime in dir [default=./lime]]),
-  lime_dir=$withval, lime_dir="./lime")
-AC_MSG_RESULT($lime_dir)
-LDFLAGS="$LDFLAGS -L${lime_dir}/lib/"
-AC_CHECK_LIB([lime], [limeReaderNextRecord],[],
-              [AC_MSG_ERROR([library liblime is missing or needed function is not available])])
-
-#LIBS="$LIBS $FLIBS -lm"
-
-AC_MSG_CHECKING(whether we want to use lemon)
-AC_ARG_WITH(lemondir,
-            AS_HELP_STRING([--with-lemondir[=dir]], [use lemon, to be found in dir]),
-             [echo $withval
-              LEMON_AVAILABLE=1
-              lemon_dir=$withval
-              LDFLAGS="$LDFLAGS -L${lemon_dir}/lib"
-              AC_CHECK_LIB([lemon],
-                           [lemonReaderNextRecord],
-                           [],
-                           [AC_MSG_ERROR([library liblemon was not found])])],
-             [echo no
-              LEMON_AVAILABLE=0])
-
-AC_MSG_CHECKING(whether we want to use MPI)
-AC_ARG_ENABLE(mpi,
-  AS_HELP_STRING([--enable-mpi], [enable use of mpi [default=yes]]),
-  enable_mpi=$enableval, enable_mpi=yes)
-if test $enable_mpi = yes; then
-  AC_MSG_RESULT(yes)
-  AC_DEFINE(TM_USE_MPI,1,Compile with MPI support)
-else
-  AC_MSG_RESULT(no)
-fi
-
-AC_MSG_CHECKING(whether we want to use DDalphaAMG)
-AC_ARG_WITH(DDalphaAMG,
-            AS_HELP_STRING([--with-DDalphaAMG[=dir]], [use DDalphaAMG, to be found in dir]),
-             [echo $withval
-              DDalphaAMG_AVAILABLE=1
-              DDalphaAMG_INTERFACE="DDalphaAMG_interface"
-              AC_DEFINE(DDalphaAMG,1,Using DDalphaAMG)
-              DDalphaAMG_dir=$withval
-              LDFLAGS="$LDFLAGS -L${DDalphaAMG_dir}/lib"
-              INCLUDES="$INCLUDES -I${DDalphaAMG_dir}/include/"
-              AC_CHECK_LIB([DDalphaAMG],
-                           [DDalphaAMG_finalize],
-                           [],
-                           [AC_MSG_ERROR([library DDalphaAMG was not found])])],
-             [echo no
-              DDalphaAMG_AVAILABLE=0
-              DDalphaAMG_INTERFACE="DDalphaAMG_interface"
-              ])
-
-AC_MSG_CHECKING(whether we want to use OpenMP)
-AC_ARG_ENABLE(omp,
-  AS_HELP_STRING([--enable-omp], [enable use of OpenMP [default=yes]]),
-  enable_omp=$enableval, enable_omp=yes)
-if test $enable_omp = yes; then
-  AC_MSG_RESULT(yes)
-  AC_DEFINE(TM_USE_OMP,1,Compile with OpenMP support)
-  AC_CHECK_HEADERS([omp.h],,[AC_MSG_ERROR([Cannot find OpenMP headers!])])
-  AC_OPENMP
-# -- AC_OPENMP provides a compiler-dependent OPENMP_CFLAGS so we can set it here
-    CFLAGS="$CFLAGS $OPENMP_CFLAGS"
-    CPPFLAGS="$CPPFLAGS $OPENMP_CFLAGS"
-    LDFLAGS="$LDFLAGS $OPENMP_CFLAGS"
-else
-  AC_MSG_RESULT(no)
-fi
-
-fftw_lib=/usr
-AC_MSG_CHECKING(whether we want to use FFTW)
-AC_ARG_ENABLE(fftw,
-  AS_HELP_STRING([--enable-fftw], [enable use of fftw [default=no]]),
-  enable_fftw=$enableval, enable_fftw=no)
-if test $enable_fftw = yes; then
-  AC_MSG_RESULT(yes)
-  AC_DEFINE(HAVE_FFTW,1,Compile with FFTW support)
-  LIBS="-lfftw3 ${LIBS}"
-elif test $enable_fftw = no; then
-  AC_MSG_RESULT(no)
-else
-  AC_MSG_RESULT(yes)
-  AC_DEFINE(HAVE_FFTW,1,Compile with FFTW support)
-  fftw_lib=${enable_fftw}
-  LDFLAGS="$LDFLAGS -L${fftw_lib}/lib64"
-  LIBS="-lfftw3 ${LIBS}"
-  INCLUDES="-I${fftw_lib}/include ${INCLUDES}"
-fi
-
-if test $enable_mpi = yes; then
-  AC_MSG_CHECKING(which parallelisation to use for MPI)
-  AC_ARG_WITH(mpidimension,
-    AS_HELP_STRING([--with-mpidimension[=n]], [use n dimensional parallelisation [default=1]]),
-    withmpidimension=$withval, withmpidimension=1)
-  if test $withmpidimension = 1; then
-    AC_MSG_RESULT(n=1 [t])
-    AC_DEFINE(PARALLELT,1,One dimensional parallelisation)
-  elif test $withmpidimension = 2; then
-    AC_MSG_RESULT(n=2 [xt])
-    AC_DEFINE(PARALLELXT,1,Two dimensional parallelisation)
-  elif test $withmpidimension = 3; then
-    AC_MSG_RESULT(n=3 [xyt])
-    AC_DEFINE(PARALLELXYT,1,Three dimensional parallelisation)
-  elif test $withmpidimension = 4; then
-    AC_MSG_RESULT(n=4 [xyzt])
-    AC_DEFINE(PARALLELXYZT,1,Four dimensional parallelisation)
-  elif test $withmpidimension = X; then
-    AC_MSG_RESULT(n=1 [x])
-    AC_DEFINE(PARALLELX,1, X parallelisation)
-  elif test $withmpidimension = XY; then
-    AC_MSG_RESULT(n=2 [xy])
-    AC_DEFINE(PARALLELXY,1, XY parallelisation)
-  elif test $withmpidimension = XYZ; then
-    AC_MSG_RESULT(n=3 [xyz])
-    AC_DEFINE(PARALLELXYZ,1, XYZ parallelisation)
-  elif test $withmpidimension = T; then
-    AC_MSG_RESULT(n=1 [t])
-    AC_DEFINE(PARALLELT,1, T parallelisation)
-  elif test $withmpidimension = XT; then
-    AC_MSG_RESULT(n=2 [xt])
-    AC_DEFINE(PARALLELXT,1, XT parallelisation)
-  elif test $withmpidimension = XYT; then
-    AC_MSG_RESULT(n=3 [xyt])
-    AC_DEFINE(PARALLELXYT,1, XYT parallelisation)
-  elif test $withmpidimension = XYZT; then
-    AC_MSG_RESULT(n=4 [xyzt])
-    AC_DEFINE(PARALLELXYZT,1, XYZT parallelisation)
-  else
-    AC_MSG_RESULT(unknown)
-    AC_MSG_ERROR([Only t, xt, xyt, xyzt, x, xy, xyz parallelisation available])
-  fi
-
-  AC_MSG_CHECKING(whether we shall use persistent MPI calls for halfspinor)
-  AC_ARG_WITH([persistentmpi],
-    AS_HELP_STRING([--with-persistentmpi], [use persistent MPI calls for halfspinor [default=no]]),
-    withpersistent=$withval, withpersistent=no)
-  if test $withpersistent = yes; then
-    AC_MSG_RESULT(yes)
-    AC_DEFINE(_PERSISTENT,1,use persistent MPI calls for halfspinor)
-  else
-    AC_MSG_RESULT(no)
-  fi
-
-  AC_MSG_CHECKING(whether we shall use non-blocking MPI calls)
-  AC_ARG_WITH([nonblockingmpi],
-    AS_HELP_STRING([--with-nonblockingmpi], [use non-blocking MPI calls for spinor and gauge [default=yes]]),
-    withnonblock=$withval, withnonblock=yes)
-  if test $withnonblock = yes; then
-    AC_MSG_RESULT(yes)
-    AC_DEFINE(_NON_BLOCKING,1,use non-blocking MPI calls for spinor ang gauge)
-  else
-    AC_MSG_RESULT(no)
-  fi
-fi
-
-AC_MSG_CHECKING([whether we want to fix volume at compiletime])
-AC_ARG_WITH([fixedvolume],
-  AS_HELP_STRING([--with-fixedvolume], [fix volume at compiletime [default=no]]),
-  with_fixvol=$withval, with_fixvol=no)
-if test $with_fixvol = yes; then
-  AC_MSG_RESULT(yes)
-  AC_DEFINE(FIXEDVOLUME,1,Fixed volume at compiletime)
-  AC_CONFIG_FILES([fixed_volume.h])
-else
-  AC_MSG_RESULT(no)
-fi
-
-AC_MSG_CHECKING([whether we want to use KOJAK instrumentalisation])
-AC_ARG_WITH([kojakinst],
-  AS_HELP_STRING([--with-kojakinst], [instrumentalise for KOJAK [default=no]]),
-  with_kojakinst=$withval, with_kojakinst=no)
-if test $with_kojakinst = yes; then
-  AC_MSG_RESULT(yes)
-  CC="kinst-pomp ${CC}"
-else
-  AC_MSG_RESULT(no)
-fi
-
-AC_MSG_CHECKING(whether we want to use lapack and blas)
-AC_ARG_WITH(lapack,
-  AS_HELP_STRING([--with-lapack], [enable use of lapack [default=yes]]),
-  with_lapack=$withval, with_lapack=yes)
-if test "$with_lapack" = yes; then
-  AC_MSG_RESULT(yes)
-  LAPACKLIB=
-  AC_DEFINE(HAVE_LAPACK,1,lapack available)
-elif test "$with_lapack" != no; then
-  AC_MSG_RESULT(yes)
-  LIBS="$withval $LIBS"
-  with_lapack=yes
-  AC_DEFINE(HAVE_LAPACK,1,lapack available)
-else
-  AC_MSG_RESULT(no)
-  AC_MSG_ERROR([lapack is needed! Will stop here.])
-fi
-
-if test $enable_mpi = yes; then
-  dnl In general one cannot run mpi programs directly
-  dnl thats why we need here cross_compiling=yes
-  dnl for non CRAY
-  if test "$host_vendor" != "cray"; then
-    cross_compiling=yes
-  fi
-fi
-
-dnl for the case of other configure scripts
-dnl AC_CONFIG_SUBDIRS( rng )
-
-dnl check for clock_gettime and set correct library flag if one is required
-dnl (this is done by AC_CHECK_LIB)
-AC_CHECK_FUNCS(clock_gettime, [], [AC_CHECK_LIB(rt, clock_gettime)])
-
-dnl in principle clock_gettime and CLOCK_MONOTONIC/CLOCK_REALTIME should be available
-dnl only when using POSIX 199309, we set this explicitly here
-dnl this should not cause problems on any relatively modern (post y2k) machine!
-if ( test "$ac_cv_lib_rt_clock_gettime" = "yes" || test "$ac_cv_func_clock_gettime" = "yes" ); then
-  AC_DEFINE(HAVE_CLOCK_GETTIME,1)
-dnl  we set this in gettime.c explicitly for the time being 
-dnl  due to endian problem on BG/Q
-dnl  CFLAGS="$CFLAGS -D_POSIX_C_SOURCE=199309L"
-  AC_MSG_NOTICE([Instructing the compiler to use POSIX 199309L])
-fi
-
-dnl Checks for lapack and defines proper name mangling scheme for
-dnl linking with f77 code
-AC_F77_FUNC(zheev)
-if test "$zheev" = "zheev"; then
-  AC_DEFINE(NOF77_,1,Fortran has no extra _)
-fi
-AC_SEARCH_LIBS([$zheev],[lapack], [], [AC_MSG_ERROR([Cannot find lapack])])
-
-dnl Checks for header files.
-AC_HEADER_STDC
-AC_CHECK_HEADERS([float.h libintl.h limits.h stdint.h stdlib.h string.h strings.h sys/time.h unistd.h endian.h])
-AC_CHECK_HEADER( getopt.h, [])
-
-dnl Checks for typedefs, structures, and compiler characteristics.
-AC_C_CONST
-AC_TYPE_OFF_T
-AC_TYPE_SIZE_T
-AC_HEADER_TIME
-
-dnl Checks for library functions.
-AC_SYS_LARGEFILE
-AC_FUNC_FSEEKO
-AC_FUNC_MALLOC
-AC_TYPE_SIGNAL
-AC_CHECK_FUNCS([gettimeofday pow sqrt])
-
-dnl We now define some replacement variables
-AC_SUBST(OPTARGS)
-AC_SUBST(SOPTARGS)
-AC_SUBST(INCLUDES)
-AC_SUBST(AUTOCONF)
-AC_SUBST(SOLVEROUT)
-AC_SUBST(CCDEP)
-AC_SUBST(CXXDEP)
-AC_SUBST(CCLD)
-AC_SUBST(DEPFLAGS)
-AC_SUBST(CXXDEPFLAGS)
-AC_SUBST(DEBUG_FLAG)
-AC_SUBST(PROFILE_FLAG)
-AC_SUBST(XCHANGELIB)
-AC_SUBST(XCHANGEDIR)
-AC_SUBST(MEASDIR)
-AC_SUBST(XLIB)
-AC_SUBST([LEMON_AVAILABLE])
-AC_SUBST(QUDA_INTERFACE)
-AC_SUBST(QPHIX_INTERFACE)
-AC_SUBST(QPHIX_PROGRAMS)
-AC_SUBST(DDalphaAMG_INTERFACE)
-
-INCLUDES="$INCLUDES -I\$(HOME)/include/ -I. -I\${abs_top_builddir}/  -I\${abs_top_builddir}/include/ -I\${abs_top_srcdir}/ -I\${abs_top_srcdir}/include/ -I${lime_dir}/include/ -I${lemon_dir}/include/"
-DEPFLAGS="$DEPFLAGS"
-
-AC_MSG_CHECKING(what alignment we want for arrays)
-AC_ARG_ENABLE(alignment,
-  [AS_HELP_STRING([--enable-alignment[=n]], [Automatically or expliclty align arrays to byte number: auto, none, 16, 32, 64 [default=auto]])],
-  withalign=$enableval, withalign=auto)
-if test "$withalign" = "none"; then
-  AC_MSG_RESULT(none)
-  withalign=1
-  AC_DEFINE(ALIGN_BASE, 0x00, [Align base])
-  AC_DEFINE(ALIGN, [])
-  AC_DEFINE(ALIGN_BASE32, 0x00, [Align base32])
-  AC_DEFINE(ALIGN32, [], [])
-elif test $withalign = 16; then
-  AC_MSG_RESULT(16 bytes)
-  AC_DEFINE(ALIGN_BASE, 0x0F, [Align base])
-  AC_DEFINE(ALIGN, [__attribute__ ((aligned (16)))])
-  AC_DEFINE(ALIGN_BASE32, 0x0F, [Align base32])
-  AC_DEFINE(ALIGN32, [__attribute__ ((aligned (16)))], [])
-elif test $withalign = 32; then
-  AC_MSG_RESULT(32 bytes)
-  AC_DEFINE(ALIGN_BASE, 0x1F, [Align base])
-  AC_DEFINE(ALIGN, [__attribute__ ((aligned (32)))])
-  AC_DEFINE(ALIGN_BASE32, 0x1F, [Align base32])
-  AC_DEFINE(ALIGN32, [__attribute__ ((aligned (32)))], [])
-elif test $withalign = 64; then
-  AC_MSG_RESULT(64 bytes)
-  AC_DEFINE(ALIGN_BASE, 0x3F, [Align base])
-  AC_DEFINE(ALIGN, [__attribute__ ((aligned (64)))])
-  AC_DEFINE(ALIGN_BASE32, 0x3F, [Align base32])
-  AC_DEFINE(ALIGN32, [__attribute__ ((aligned (64)))], [])
-elif test $withalign = auto; then
-  withautoalign=1
-  AC_MSG_RESULT(auto)
-  AC_DEFINE(ALIGN_BASE, 0x00, [Align base])
-  AC_DEFINE(ALIGN, [], [])
-  AC_DEFINE(ALIGN_BASE32, 0x00, [Align base32])
-  AC_DEFINE(ALIGN32, [], [])
-else
-  AC_MSG_RESULT(Unusable value for array alignment)
-  AC_MSG_ERROR([Allowed values are: auto, none, 16, 32, 64])
-fi
-
-dnl We here check for alignment issues with QPX instructions -- this flag has been set earlier
-if test $enable_qpx = yes; then
-  if test $withalign = auto; then
-    if test $withautoalign -lt 32; then
-      AC_MSG_RESULT(increasing array alignment to 32 bytes for use of QPX instructions on BG/Q)
-      AC_DEFINE(ALIGN_BASE, 0x1F, [Align base])
-      AC_DEFINE(ALIGN, [__attribute__ ((aligned (32)))])
-      AC_MSG_RESULT(increasing 32bit array alignment to 16 bytes for use of QPX instructions on BG/Q)
-      AC_DEFINE(ALIGN_BASE32, 0x0F, [Align base32])
-      AC_DEFINE(ALIGN32, [__attribute__ ((aligned (16)))])
-      withautoalign=32
-    fi
-  elif test $withalign -lt 32; then
-    AC_MSG_ERROR([alignment incompatible with QPX instructions (32 bytes required)])
-  fi
-fi
-
-dnl Check for alignment associated with (non-QPX) BG optimization.
-dnl This will also result in using 32 byte alignment on MareNostrum, but that should be fairly innocuous.
-if test "$host_cpu" = "powerpc" && test "$host_vendor" = "ibm" && test "$host_os" = "blrts"; then
-  if test $withalign = auto; then
-    if test $withautoalign -lt 16; then
-      AC_MSG_RESULT(increasing array alignment to 16 bytes for BG/L optimization)
-      AC_DEFINE(ALIGN_BASE, 0x0F, [Align base])
-      AC_DEFINE(ALIGN, [__attribute__ ((aligned (16)))], [Align base])
-      withautoalign=16
-    fi
-  fi
-elif test "$host_cpu" = "powerpc" && test "$host_vendor" = "ibm" && test "$host_os" = "bprts"; then
-  if test $withalign = auto; then
-    if test $withautoalign -lt 16; then
-      AC_MSG_RESULT(increasing array alignment to 16 bytes for BG/P optimization)
-      AC_DEFINE(ALIGN_BASE, 0x0F, [Align base])
-      AC_DEFINE(ALIGN, [__attribute__ ((aligned (16)))], [Align base])
-      withautoalign=16
-    fi
-  fi
-elif test "$host_cpu" = "powerpc64" && test "$host_vendor" = "unknown" && test "$host_os" = "linux-gnu"; then
-  if test $withalign = auto; then
-    if test $withautoalign -lt 32; then
-      AC_MSG_RESULT(increasing array alignment to 32 bytes for BG/Q and generic POWER optimization)
-      AC_DEFINE(ALIGN_BASE, 0x1F, [Align base])
-      AC_DEFINE(ALIGN, [__attribute__ ((aligned (32)))])
-      AC_MSG_RESULT(increasing array 32 bit alignment to 16 bytes for BG/Q and generic POWER optimization)
-      AC_DEFINE(ALIGN_BASE32, 0x0F, [Align base])
-      AC_DEFINE(ALIGN32, [__attribute__ ((aligned (16)))])
-      withautoalign=32
-    fi
-  fi
-fi
-
-AC_MSG_CHECKING(whether we want to use gprof as profiler)
-AC_ARG_WITH(gprof,
-  AS_HELP_STRING([--with-gprof], [use of gprof profiler [default=no]]),
-  enable_gprof=$withval, enable_gprof=no)
-if test $enable_gprof = yes; then
-  AC_MSG_RESULT(yes)
-    if test "$host_cpu" = "powerpc" && test "$host_vendor" = "ibm"; then
-      PROFILE_FLAG="-pg -qfullpath -g"
-    else
-      PROFILE_FLAG="-pg -g"
-    fi
-else
-  AC_MSG_RESULT(no)
-  PROFILE_FLAG=
-fi
-
-dnl Now we have to set all Flags and compiler properly
-PGCC=`$CC -V 2>&1 | grep pgcc`
-ICC=`$CC -V 2>&1 | grep -i intel`
-
-dnl first for PC's
-if test "$host_cpu" = "i686" || test "$host_cpu" = "x86_64"; then
-dnl the GNU compiler
-  if test "$GCC" = yes && test "$ICC" = ""; then
-    DEPFLAGS="-MM"
-    CFLAGS="$CFLAGS -pedantic -Wall"
-    OPTARGS='-O'
-    SOPTARGS='-O'
-
-    if test "$host_cpu" = "x86_64"; then
-      AC_DEFINE(_x86_64,1,x86 64 Bit architecture)
-    fi
-    CCDEP="$CC"
-    if test $enable_mpi = yes; then
-      CCDEP="gcc"
-    fi
-    CXXDEP="$CXX"
-    if test $enable_mpi = yes; then
-      CXXDEP="g++"
-    fi
-    DEBUG_FLAG="-g"
-dnl other compilers
-  else
-dnl check for pgcc
-    if test "$PGCC" != ""; then
-      DEPFLAGS="-M"
-      echo "We are using the Portland Group C compiler!"
-      OPTARGS="-O2"
-      SOPTARGS="-O2"
-      DEBUG_FLAG="-g"
-      PROFILE_FLAG="-p -g"
-      CCDEP="$CC"
-
-dnl check for icc
-    elif test "$ICC" != ""; then
-      echo "We are using the Intel C compiler!"
-      DEPFLAGS="-M"
-      OPTARGS="-O3"
-      SOPTARGS="-O3"
-      DEBUG_FLAG="-g"
-      PROFILE_FLAG="-p -g"
-      CCDEP="$CC"
-      CXXDEP="$CXX"
-    else
-      DEPFLAGS="-M"
-      CFLAGS="$CFLAGS -O"
-      DEBUG_FLAG="-g"
-      CCDEP="$CC"
-      CXXDEP="$CXX"
-    fi
-  fi
-# The CRAY
-elif test "$host_vendor" = "cray"; then
-  echo
-  echo "Hey, we are on a cray, you should take some time for this..."
-  echo "get yourself a coffee or so!"
-  echo
-  CFLAGS="$CFLAGS -dp"
-  AC_DEFINE(CRAY,1,We are on a CRAY)
-  OPTARGS="-O3"
-  SOPTARGS="-O3"
-  DEBUG_FLAG="-g"
-  CCDEP="$CC"
-  DEPFLAGS="-M"
-else
-  AC_CHECK_PROG(CCDEP, gcc, "gcc", "$CC")
-  if test "$CCDEP" = "gcc"; then
-    DEPFLAGS="-MM"
-  else
-    DEPFLAGS="-M"
-  fi
-  OPTARGS=
-  SOPTARGS=
-fi
-
-CXXDEPFLAGS="$DEPFLAGS --std=c++11"
-
-AC_MSG_CHECKING(whether we want to switch on optimisation)
-AC_ARG_ENABLE(optimize,
-  AS_HELP_STRING([--enable-optimize], [enable optimisation [default=yes]]),
-  enable_optimize=$enableval, enable_optimize=yes)
-if test $enable_optimize = no; then
-  AC_MSG_RESULT(no)
-  OPTARGS=
-  SOPTARGS=
-else
-  AC_MSG_RESULT(yes)
-fi
-
-AC_MSG_CHECKING(whether we want to use a copy of the gauge field)
-AC_ARG_ENABLE(gaugecopy,
-  AS_HELP_STRING([--enable-gaugecopy], [enable use of a copy of the gauge field [default=yes]]),
-  enable_gaugecopy=$enableval, enable_gaugecopy=yes)
-if test $enable_gaugecopy = yes; then
-  AC_MSG_RESULT(yes)
-  AC_DEFINE(_GAUGE_COPY,1,Construct an extra copy of the gauge fields)
-else
-  AC_MSG_RESULT(no)
-fi
-
-AC_MSG_CHECKING(whether we want to use a Dirac Op. with halfspinor exchange)
-AC_ARG_ENABLE(halfspinor,
-  AS_HELP_STRING([--enable-halfspinor], [use a Dirac Op. with halfspinor exchange [default=yes]]),
-  enable_halfspinor=$enableval, enable_halfspinor=yes)
-if test $enable_halfspinor = yes; then
-  AC_MSG_RESULT(yes)
-  AC_DEFINE(_USE_HALFSPINOR,1,Exchange only a halfspinor in the Dirac Operator)
-  if test $enable_gaugecopy = no; then
-    AC_MSG_WARN([switching on gaugecopy for Dirac operator with halfspinor!])
-    AC_DEFINE(_GAUGE_COPY,1,Construct an extra copy of the gauge fields)
-  fi
-else
-  AC_MSG_RESULT(no)
-fi
-
-AC_MSG_CHECKING(whether we want to use shmem API)
-AC_ARG_ENABLE(shmem,
-  AS_HELP_STRING([--enable-shmem],[use shmem API [default=no]]),
-  enable_shmem=$enableval, enable_shmem=no)
-if test $enable_shmem = yes; then
-  AC_MSG_RESULT(yes)
-  AC_DEFINE(_USE_SHMEM,1,Use shmem API)
-  LIBS="$LIBS -lsma"
-else
-  AC_MSG_RESULT(no)
-fi
-
-
-AC_SUBST(USESUBDIRS)
-
-AC_MSG_CHECKING(whether we want to use CUDA)
-AC_ARG_WITH(cudadir,
-            AS_HELP_STRING([--with-cudadir[=dir]], [use CUDA library (specify 'lib' directory)]),
-             [AC_MSG_RESULT($withval)
-              CUDA_AVAILABLE=1
-              cuda_dir=$withval
-              LDFLAGS="$LDFLAGS -L${cuda_dir} -lcuda"
-              AC_CHECK_LIB([cudart],
-                           [cudaMalloc],
-                           [],
-                           [AC_MSG_ERROR([Can't link a simple program against library cudart.])])],
-             [AC_MSG_RESULT(no)
-              CUDA_AVAILABLE=0])
-
-AC_MSG_CHECKING(whether we want to use HIP)
-AC_ARG_WITH(hipdir,
-            AS_HELP_STRING([--with-hipdir[=dir]], [use HIP library (specify 'lib' directory)]),
-             [AC_MSG_RESULT($withval)
-              HIP_AVAILABLE=1
-              hip_dir=$withval
-              LDFLAGS="$LDFLAGS -L${hip_dir} -lamdhip64"
-              AC_CHECK_LIB([amdhip64],
-                           [hipMalloc],
-                           [],
-                           [AC_MSG_ERROR([Can't link a simple program against library amdhip64.])])],
-             [AC_MSG_RESULT(no)
-              HIP_AVAILABLE=0])
-
-
-# QUDA library for GPUs
-AC_MSG_CHECKING(whether we want to use QUDA)
-AC_ARG_WITH(qudadir,
-            AS_HELP_STRING([--with-qudadir[=dir]], [use QUDA library (specify directory which contains 'include' and 'lib' subdirs)]),
-             [AC_MSG_RESULT($withval)
-              if test $CUDA_AVAILABLE -ne 1 && test $HIP_AVAILABLE -ne 1; then
-                AC_MSG_ERROR([Need either CUDA or HIP to link against QUDA!])
-              fi
-              QUDA_AVAILABLE=1
-              AC_DEFINE(TM_USE_QUDA,1,Using QUDA GPU)
-              quda_dir=$withval
-              LDFLAGS="$LDFLAGS -L${quda_dir}/lib"
-              INCLUDES="$INCLUDES -I${quda_dir}/include/"
-              QUDA_INTERFACE="quda_interface"
-              AC_CHECK_LIB([quda],
-                           [freeGaugeQuda],
-                           [],
-                           [AC_MSG_ERROR([Can't link a simple program against library libquda. (Did you set CXX properly?)])]
-                           )
-              #QUDA needs to be linked with C++ linker
-              CCLD=${CXX}
-             ],
-             [AC_MSG_RESULT(no)
-              QUDA_AVAILABLE=0
-              QUDA_INTERFACE=""
-              ]
-            )
-AC_SUBST([QUDA_AVAILABLE])
-
-AC_MSG_CHECKING(whether the QUDA version is experimental)
-AC_ARG_ENABLE(quda_experimental,
-  AS_HELP_STRING([--enable-quda_experimental], [enable support for experimental QUDA versions [default=no]]),
-  enable_quda_experimental=$enableval, enable_quda_experimental=no)
-if test $enable_quda_experimental = yes; then
-  AC_MSG_RESULT(yes)
-  AC_DEFINE(TM_QUDA_EXPERIMENTAL,1,Experimental QUDA version in use)
-else
-  AC_MSG_RESULT(no)
-fi
-AC_MSG_CHECKING(whether the QUDA force is enabled)
-AC_ARG_ENABLE(quda_fermionic_forces,
-  AS_HELP_STRING([--enable-quda_fermionic_forces], [enable support for fermionic forces using QUDA [default=yes]]),
-  enable_quda_fermionic_forces=$enableval, enable_quda_fermionic_forces=yes)
-if test $enable_quda_fermionic_forces = no; then
-  AC_MSG_RESULT(no)
-else
-  AC_MSG_RESULT(yes)
-  AC_DEFINE(TM_QUDA_FERMIONIC_FORCES,1, fermionic forces with QUDA are enabled)
-fi
-
-# QPhiX library for Intel Xeon and Xeon Phis
-AC_MSG_CHECKING(whether we want to use QPhiX)
-AC_ARG_WITH(qphixdir,
-            AS_HELP_STRING([--with-qphixdir[=dir]], [use QPhiX, to be found in dir]),
-             [echo yes
-              QPHIX_AVAILABLE=1
-              AC_DEFINE(TM_USE_QPHIX,1,Using QPhiX)
-              qphix_dir=$withval
-              LDFLAGS="$LDFLAGS -L${qphix_dir}/lib -lqphix_solver -lqphix_codegen"
-              INCLUDES="$INCLUDES -I${qphix_dir}/include/" 
-              QPHIX_INTERFACE="qphix_interface"
-              QPHIX_PROGRAMS=""
-              # Due to github issue #404, the qphix test_Dslash code has been disabled by BaKo
-              # for the time being
-              # it should be updated to make use of the QPhiX internal interfaces
-              # for passing full lattice spinors
-              # "qphix_test_Dslash"
-
-              # QMP: TODO AC_CHECK_LIB
-              AC_MSG_CHECKING([where to search for QMP libs])
-              AC_ARG_WITH(qmpdir,
-                          AS_HELP_STRING([--with-qmpdir[=dir]], [if using QPhiX, then set QMP lib dir]),
-                          qmp_dir=$withval
-                          LDFLAGS="$LDFLAGS -L${qmp_dir}/lib -lqmp"
-                          INCLUDES="$INCLUDES -I${qmp_dir}/include/"
-                          )
-              AC_MSG_RESULT($qmp_dir)
-
-              AC_MSG_CHECKING([Setting QPhiX SOALEN])
-              AC_ARG_ENABLE(qphix-soalen,
-                            AS_HELP_STRING([--enable-qphix-soalen], [if using QPhiX, set SOALEN [default=4]]),
-                            enable_qphix_soalen=$enableval, enable_qphix_soalen=4)
-              AC_MSG_RESULT($enable_qphix_soalen)
-              AC_DEFINE_UNQUOTED(QPHIX_SOALEN, ${enable_qphix_soalen}, Structure of Array length to use with QPhiX)
-
-              AC_PROG_CXX
-              #QPhiX needs to be linked with C++ linker
-              CCLD=${CXX}
-             ],
-             [echo no
-              QPHIX_AVAILABLE=0
-              QPHIX_INTERFACE=""])
-AC_SUBST([QPHIX_AVAILABLE])
-
-if test ! -e lib; then
-  mkdir lib
-fi
-
-dnl create the test and tests directory here
-if test ! -e test; then
-  mkdir test
-fi
-
-if test ! -e tests; then
-  mkdir tests
-fi
-
-if test ! -e tests/regressions; then
-  mkdir tests/regressions
-fi
-
-
-LIBS="-lhmc -lmonomial -loperator -lsolver -linit -lmeas -llinalg -lhmc -lxchange -lrational -lio $LIBS"
-AUTOCONF=autoconf
-
-for i in $USESUBDIRS
-do
-  make_files="$make_files $i/Makefile"
-done
-
-AC_CONFIG_FILES([Makefile $make_files])
-
-AC_OUTPUT
diff --git a/doc/install.tex b/doc/install.tex
index e4d86c2da..9d5e6f887 100644
--- a/doc/install.tex
+++ b/doc/install.tex
@@ -1,103 +1,136 @@
-The software ships with a GNU autoconf environment and a configure
-script, which will generate GNU Makefiles to build the programmes. It
-is supported and recommended to configure and build the executables in
-a separate build directory. This also allows to have several builds with
-different options from the same source code directory. 
+The software ships with a CMake environment, which will configure and build the
+programmes. It is recommended to configure and build the executables in a
+separate build directory. This also allows to have several builds with different
+options from the same source code directory.
 
 \subsection{Prerequisites}
 
-In order to compile the programmes the {\ttfamily
-  LAPACK}~\cite{lapack:web} library (fortran version) needs to be
-installed. In addition it must be known which linker options are
-needed to link against {\ttfamily LAPACK}, e.g. {\ttfamily
-  -Lpath-to-lapack -llapack  -lblas}. Also a the latest
-version (tested is version 1.2.3) of {\ttfamily
-  C-LIME}~\cite{lime:web} must be available, which is used as a
-packaging scheme to read and write gauge configurations and
-propagators to files.
+In order to compile the programmes the {\ttfamily LAPACK}~\cite{lapack:web}
+library (fortran version) needs to be installed. CMake will search for the
+library in all default directories. Also the latest version (tested is version
+1.2.3) of {\ttfamily C-LIME}~\cite{lime:web} must be available, which is used as
+a packaging scheme to read and write gauge configurations and propagators to
+files.
 
 \subsection{Configuring the hmc package}
 \label{sec:config}
 
-In order to get a simple configuration of the hmc package it is enough
-to just type 
-\begin{verbatim}
-path-to-src-code/configure   --with-lime=<path-to-lime> \
-     --with-lapack=<linker-flags> CC=<mycc> \
-     F77=<myf77> CFLAGS=<c-compiler flags>
-\end{verbatim}
-in the build directory. If 
-{\ttfamily CC, F77} and {\ttfamily CFLGAS} are not specified,
-{\ttfamily configure} will guess them.
-
-The code was successfully compiled and run at least on the following
-platforms: i686 and compatible, x64 and compatible, IBM Regatta
-systems, IBM Blue Gene/L, IBM Blue Gene/P, SGI Altix and SGI PC
-clusters, powerpc clusters.
-
-The configure script accepts certain options to influence the building
-procedure. One can get an overview over all supported options with
-{\ttfamily configure --help}. There are {\ttfamily enable|disable}
-options switching on and off optional features and {\ttfamily
-  with|without} switches usually related to optional packages. In the
-following we describe the most important of them (check {\ttfamily
-  configure --help} for the defaults and more options):
-
+The build system uses CMake to configure and build the hmc package. The
+following list gives all options (OFF by default unless specified):
 \begin{itemize}
-\item {\ttfamily --enable-mpi}:\\
-  This option switches on the support for MPI. On certain platforms it
-  automatically chooses the correct parallel compiler or searches for
-  a command {\ttfamily mpicc} in the search path.
-
-\item {\ttfamily --enable-gaugecopy}:\\
-  See section \ref{sec:dirac} for details on this option. It will
+\item {\ttfamily CMAKE\_POSITION\_INDEPENDENT\_CODE}: Build a position independent
+  code. ON by default.
+\item {\ttfamily BUILD\_SHARED\_LIBS}: Build the shared version of the hmc library.
+\item {\ttfamily TM\_USE\_FFTW}: Enable fftw support. 
+\item {\ttfamily TM\_USE\_CUDA}: Enable CUDA support.
+\item {\ttfamily TM\_USE\_HIP}: Enable HIP support (AMD or NVidia GPUs)
+\item {\ttfamily TM\_USE\_DDalphaAMG}: Enable DDalphaAMG support.
+\item {\ttfamily TM\_USE\_LEMON}: Use the lemon io library.
+\item {\ttfamily TM\_USE\_OMP}: Enable OpenMP ({\bf ON} by default)
+\item {\ttfamily TM\_FIXEDVOLUME}: Fix volume at compile time.
+\item {\ttfamily TM\_ENABLE\_ALIGNMENT}: Automatically or expliclty align arrays to
+  byte number. auto, none, 16, 32, 64.
+\item {\ttfamily TM\_USE\_GAUGE\_COPY}: Enable use of a copy of the gauge field (ON
+  by default). See section \ref{sec:dirac} for details on this option. It will
   increase the memory requirement of the code.
+\item {\ttfamily TM\_USE\_HALFSPINOR}: Use a Dirac Op. with halfspinor exchange (ON
+  by default). See sub-section \ref{sec:dirac} for details. 
+\item {\ttfamily TM\_USE\_QUDA}: Enable QUDA support.
+\item {\ttfamily TM\_USE\_SHMEM}: Use shmem API.
+\item {\ttfamily TM\_ENABLE\_WARNINGS}: Enable all warnings (ON by default).
+\item {\ttfamily TM\_ENABLE\_TESTS}: Enable tests.
+\item {\ttfamily TM\_USE\_QPHIX}: Enable QPhiX.
+  \begin{itemize}
+  \item {\ttfamily TM\_QPHIX\_SOALEN}: QPhiX specific parameter (default is 4)
+  \item \textcolor{red}{{\ttfamily QPHIX\_DIR}}: Directory where QPhiX is installed.
+    The QPhiX current CMake build system does not export all information (
+    include and lib directories) that are needed to compile hmc.
+  \item \textcolor{red}{\ttfamily QMP\_DIR}: Directory where QMP is installed (
+    QPhiX dependency).
+    The QPhiX current CMake build system does not export all information about the
+    include and lib directories nor its dependencies (QMP in that case).
+  \end{itemize}
+\item {\ttfamily TM\_USE\_MPI}: Enable MPI support.
+  \begin{itemize}
+  \item {\ttfamily TM\_PERSISTENT\_MPI}: Use persistent MPI calls for halfspinor.
+  \item {\ttfamily TM\_NONBLOCKING\_MPI}: Use non-blocking MPI calls for spinor and
+    gauge.
+  \item {\ttfamily TM\_MPI\_DIMENSION}: Use $n$ dimensional parallelisation ($XYZT$)
+    [default=4]. The number of parallel directions can be specified. $1, 2, 3$ and $4$
+    dimensional parallelisation is supported.
+  \item {\ttfamily TM\_USE\_LEMON} Use the lemon io library
+  \end{itemize}
+\end{itemize}
 
-\item {\ttfamily --enable-halfspinor}:\\
-  If this option is enabled the Dirac operator using half spinor
-  fields is used. See sub-section \ref{sec:dirac} for details. If this
-  feature is switched on, also the gauge copy feature is switched
-  on automatically. 
-
-%\item {\ttfamily --enable-shmem}:\\
-%  Use shared memory API instead of MPI for the communication of spinor
-%  fields. This is currently only usable on the Munich Altix machine.
-
-\item {\ttfamily --with-mpidimension=n}:\\
-  This option has only effect if the preceding one is switched
-  on. The number of parallel directions can be specified. 1,2,3 and 4
-  dimensional parallelisation is supported.
-
-\item {\ttfamily --with-lapack="<linker flags>"}:\\
-  the code requires lapack to be linked. All linker flags necessary
-  to do so must be specified here. Note, that {\ttfamily LIBS="..."}
-  works similar.
+The following minimal list of commands will configure and build the hmc package with
+minimal dependencies
+\begin{verbatim}
+mkdir build
+cd build
+cmake -DCMAKE_INSTALL_PREFIX=/my_path -DCMAKE_PREFIX_PATH=/my_c_line_path ..
+make -j
+make install
+\end{verbatim}
 
-\item {\ttfamily --with-limedir=<dir>}:\\
-  Tells configure where to find the lime package, which is required for
-  the build of the HMC. It is used for the ILDG file format.
- 
-\end{itemize}
+These instructions assume that the {\ttfamily c-lime} package is installed in {\ttfamily
+  /my\_c\_line\_path}. By default {\ttfamily CMAKE\_PREFIX\_PATH} variable is a list
+of paths separated by a semi-colunm containing the path of all installed to
+dependencies.
 
-The configure script will guess at the very beginning on which
-platform the build is done. In case this fails or a cross compilation
-must be performed please use the option {\ttfamily --host=HOST}. For
-instance in order to compile for the BG/P one needs to specify
-{\ttfamily --host=ppc-ibm-bprts --build=ppc64-ibm-linux}. 
+Adding {\ttfamily -DTM\_USE\_MPI=ON} will enable MPI support with parallelization
+over spatial and temporal dimensions. The command line is then
+\begin{verbatim}
+cmake -DCMAKE_INSTALL_PREFIX=/my_path -DCMAKE_PREFIX_PATH=/my_c_line_path -DTM_USE_MPI=ON ..
+\end{verbatim}
+We can combine it with the lemon-io library (isntalled in {\ttfamily /my\_lemon\_path})
+\begin{verbatim}
+cmake -DCMAKE_INSTALL_PREFIX=/my_path \
+      -DCMAKE_PREFIX_PATH="/my_c_line_path;/my_lemon_path" \
+      -DTM_USE_MPI=ON \
+      -DTM_USE_LEMON=ON ..
+\end{verbatim}
 
-For certain architectures like the Blue Gene systems there are
-{\ttfamily README.arch} files in the top source directory with
-example configure calls.
+{\ttfamily QUDA} support (installed in {\ttfamily my\_quda\_path}) can be added with
+\begin{verbatim}
+cmake -DCMAKE_INSTALL_PREFIX=/my_path \
+      -DCMAKE_PREFIX_PATH="/my_c_line_path;/my_lemon_path;\my_quda_path" \
+      -DTM_USE_MPI=ON \
+      -DTM_USE_LEMON=ON \
+      -DTM_USE_QUDA \
+      -DTM_USE_CUDA=ON \
+      -DCMAKE_CUDA_ARCHITECTURES=90 ..
+\end{verbatim}
+Note that the command assumes that QUDA is compiled with CUDA support. AMD GPU
+are also supported after replacing {\ttfamily -DTM\_USE\_CUDA=ON} with
+{\ttfamily -DTM\_USE\_HIP=ON} and compiling {\ttfamily QUDA} with {\ttfamily
+  HIP} support. The {\ttfamily ROCM} architecture is defined by the variable
+{\ttfamily CMAKE\_HIP\_ARCHITECTURES=gfxxxx}.
 
-\subsection{Building and Installing}
+{\ttfamily QPhiX} and/or {\ttfamily DDalphaAMG} support can be added with
+\begin{verbatim}
+cmake -DCMAKE_INSTALL_PREFIX=/my_path \
+      -DCMAKE_PREFIX_PATH="/my_c_line_path;/my_lemon_path;/my_quda_path;/my_path_ddalphaamg" \
+      -DTM_USE_MPI=ON \
+      -DTM_USE_LEMON=ON \
+      -DTM_USE_QUDA=ON \
+      -DTM_USE_CUDA=ON \
+      -DCMAKE_CUDA_ARCHITECTURES=90 \
+      -DTM_USE_QPHIX=ON \
+      -DQPHIX_DIR=/my_qphix_dir \
+      -DTM_USE_DDalphaAMG=ON \
+      -DQMP_DIR=/my_qmp_dir \
+      -DTM_USE_OMP=ON ..
+\end{verbatim}
+{\ttfamily QPhiX} cmake config support is incomplete and requires both the {\ttfamily QPhiX}
+and {\ttfamily QMP} installation directories to work properly.
 
-After successfully configuring the package the code can be build by
-simply typing {\ttfamily make} in the build directory. This will
-compile the standard executables. Typing {\ttfamily make install} will
-copy these executables into the install directory. The default install
-directory is {\ttfamily \$HOME/bin}, which can be influenced e.g. with
-the {\ttfamily --prefix} option to {\ttfamily configure}. 
+CMake has several relevant specific options that control the build. Compiler
+options are defined by the variable {\ttfamily CMAKE\_C\_FLAGS} and {\ttfamily
+  CMAKE\_CXX\_FLAGS}. CUDA and HIP compilations options are controlled by their
+equivalent {\ttfamily CMAKE\_\{CUDA/HIP\}\_FLAGS}. 
 
+Adding for instance {\ttfamily -GNinja} to the {\ttfamily CMake} command line will use
+{\ttfamily ninja} instead of {\ttfamily make}.
 
 %%% Local Variables: 
 %%% mode: latex
diff --git a/doc/sample-input/sample-hmc-ddalphaamg-tmcloverdetratio.input b/doc/sample-input/sample-hmc-ddalphaamg-tmcloverdetratio.input
index 083932f19..4f1ac0af4 100644
--- a/doc/sample-input/sample-hmc-ddalphaamg-tmcloverdetratio.input
+++ b/doc/sample-input/sample-hmc-ddalphaamg-tmcloverdetratio.input
@@ -52,8 +52,8 @@ BeginMonomial CLOVERDET
   # nominator shift
   rho = 0.1
   kappa = 0.138
-  AcceptancePrecision =  1.e-20
-  ForcePrecision = 1.e-14
+  AcceptancePrecision =  1.e-24
+  ForcePrecision = 1.e-22
   Name = cloverdet
   solver = ddalphaamg
 EndMonomial
@@ -67,8 +67,8 @@ BeginMonomial CLOVERDETRATIO
   rho2 = 0.1
   CSW = 1.00
   kappa = 0.138
-  AcceptancePrecision =  1.e-20
-  ForcePrecision = 1.e-16
+  AcceptancePrecision =  1.e-24
+  ForcePrecision = 1.e-22
   Name = cloverdetratio
   solver = ddalphaamg
 EndMonomial
@@ -91,7 +91,7 @@ BeginOperator CLOVER
   2KappaMu = 0.01
   CSW = 1.00
   kappa = 0.138
-  SolverPrecision = 1e-16
+  SolverPrecision = 1e-24
   MaxSolverIterations = 1000
   useevenodd = no
   solver = ddalphaamg
diff --git a/doc/sample-input/sample-hmc-quda-cscs-beverin.input b/doc/sample-input/sample-hmc-quda-cscs-beverin.input
new file mode 100644
index 000000000..f332083f5
--- /dev/null
+++ b/doc/sample-input/sample-hmc-quda-cscs-beverin.input
@@ -0,0 +1,274 @@
+L=24
+T=48
+
+NrXProcs = 2
+NrYProcs = 2
+NrZprocs = 1
+
+ompnumthreads=32
+
+BarrierMonomialsConverge = yes
+
+Measurements = 20
+
+thermalisationsweeps = 0
+
+seed=146555
+
+Startcondition = hot
+InitialStoreCounter = 0
+
+2KappaMu = 0.0023801411000
+2KappaMuBar = 0.03875149727400
+2KappaEpsBar = 0.04103923289600
+CSW = 1.7112
+kappa =    0.1400083
+NSave = 10
+ThetaT = 1.0
+UseEvenOdd = yes
+userelativeprecision=yes
+
+ReversibilityCheck = no
+ReversibilityCheckIntervall = 10
+
+DebugLevel = 2
+
+ReproduceRandomNumbers = yes
+RanluxdLevel = 2
+
+BeginMeasurement CORRELATORS
+  Frequency = 2
+EndMeasurement
+
+BeginExternalInverter QUDA
+  Pipeline = 0
+  gcrNkrylov = 24
+  MGNumberOfLevels = 3
+  MGNumberOfVectors = 24, 32
+  MGSetupSolver = cg
+  MGSetup2KappaMu = 0.0023801411000
+  MGVerbosity = silent, silent, silent
+  MGSetupSolverTolerance = 5e-7, 5e-7
+  MGSetupMaxSolverIterations = 1500, 1500
+  MGCoarseSolverType = gcr, gcr, cagcr
+  MGSmootherType = cagcr, cagcr, cagcr
+  MGBlockSizesX = 3,2
+  MGBlockSizesY = 3,2
+  MGBlockSizesZ = 4,3
+  MGBlockSizesT = 3,2
+  MGResetSetupMDUThreshold = 1.0
+  MGRefreshSetupMDUThreshold = 0.0149
+  MGRefreshSetupMaxSolverIterations = 40, 40
+ 
+  MGCoarseMuFactor = 1.4, 2.4, 110.0
+  MGCoarseMaxSolverIterations = 45, 40, 10
+  MGCoarseSolverTolerance = 0.1, 0.35, 0.45
+  MGSmootherPostIterations = 2, 3, 6
+  MGSmootherPreIterations = 2, 0, 1
+  MGSmootherTolerance = 0.1, 0.1, 0.2
+  MGOverUnderRelaxationFactor = 0.90, 0.85, 1.00  
+EndExternalInverter
+
+BeginMonomial GAUGE
+  Type = Iwasaki
+  beta = 1.745
+  Timescale = 0
+  UseExternalLibrary = quda
+EndMonomial
+
+BeginMonomial CLOVERDET
+  Timescale = 1
+  kappa =    0.1400083
+  2KappaMu = 0.0023801411000
+  CSW = 1.7112
+  rho = 0.45
+  MaxSolverIterations = 5000
+  AcceptancePrecision =  1.e-23
+  ForcePrecision = 1.e-19
+  Name = cloverdetlight
+  solver= cg
+  UseExternalInverter = quda
+  UseSloppyPrecision = half
+  UseExternalLibrary = quda
+EndMonomial
+
+BeginMonomial CLOVERDETRATIO
+  Timescale = 1
+  kappa =    0.1400083
+  2KappaMu = 0.0023801411000
+  rho = 0.030
+  rho2 = 0.45
+  CSW = 1.7112
+  MaxSolverIterations = 1000
+  AcceptancePrecision =  1.e-23
+  ForcePrecision = 1.e-19
+  Name = cloverdetratio1light
+
+  solver= cg
+  UseExternalInverter = quda
+  UseSloppyPrecision = half
+  UseExternalLibrary = quda
+EndMonomial
+
+BeginMonomial CLOVERDETRATIO
+  Timescale = 2
+  kappa =    0.1400083
+  2KappaMu = 0.0023801411000
+  rho = 0.0030
+  # rho2 = 0.045
+  rho2 = 0.030
+  CSW = 1.7112
+  MaxSolverIterations = 1000
+  AcceptancePrecision =  1.e-23
+  ForcePrecision = 1.e-20
+  Name = cloverdetratio2light
+  solver = mg
+  UseExternalInverter = quda
+  UseSloppyPrecision = single
+
+  HB_solver = cg
+  HB_usesloppyprecision = half
+  HB_UseExternalInverter = quda
+  HB_MaxSolverIterations = 3000
+  UseExternalLibrary = quda
+EndMonomial
+
+
+BeginMonomial CLOVERDETRATIO
+  Timescale = 3
+  kappa =    0.1400083
+  2KappaMu = 0.0023801411000
+  rho = 0.0
+  rho2 = 0.0030
+  CSW = 1.7112
+  MaxSolverIterations = 1000
+  AcceptancePrecision =  1.e-23
+  ForcePrecision = 1.e-20
+  Name = cloverdetratio3light
+  solver = mg
+  UseExternalInverter = quda
+  UseSloppyPrecision = single
+  UseExternalLibrary = quda
+EndMonomial
+
+
+BeginMonomial NDCLOVERRAT
+  Timescale = 1
+  kappa =    0.1400083
+  CSW = 1.7112
+  AcceptancePrecision =  1e-23
+  ForcePrecision = 1e-19
+  StildeMin = 0.0000376
+  StildeMax = 4.7
+  Name = ndcloverrat1
+  DegreeOfRational = 10
+  Cmin = 0
+  Cmax = 3
+  ComputeEVFreq = 0
+  2KappaMuBar = 0.03875149727400
+  2KappaEpsBar = 0.04103923289600
+  AddTrLog = yes
+  solver= cgmmsnd
+  UseExternalInverter = quda
+  UseSloppyPrecision = single
+  RefinementPrecision = half
+  MaxSolverIterations = 5000
+EndMonomial
+
+BeginMonomial NDCLOVERRAT
+  Timescale = 2
+  kappa =    0.1400083
+  CSW = 1.7112
+  AcceptancePrecision =  1e-23
+  ForcePrecision = 1e-19
+  StildeMin = 0.0000376
+  StildeMax = 4.7
+  Name = ndcloverrat2
+  DegreeOfRational = 10
+  Cmin = 4
+  Cmax = 6
+  ComputeEVFreq = 0
+  2KappaMuBar = 0.03875149727400
+  2KappaEpsBar = 0.04103923289600
+  AddTrLog = no
+  solver= cgmmsnd
+  UseExternalInverter = quda
+  UseSloppyPrecision = single
+  RefinementPrecision = half
+  MaxSolverIterations = 5000
+EndMonomial
+
+BeginMonomial NDCLOVERRAT
+  Timescale = 3
+  kappa =    0.1400083
+  CSW = 1.7112
+  AcceptancePrecision =  1e-23
+  ForcePrecision = 1e-19
+  StildeMin = 0.0000376
+  StildeMax = 4.7
+  Name = ndcloverrat3
+  DegreeOfRational = 10
+  Cmin = 7
+  Cmax = 9
+  ComputeEVFreq = 0
+  2KappaMuBar = 0.03875149727400
+  2KappaEpsBar = 0.04103923289600
+  AddTrLog = no
+  solver= cgmmsnd
+  UseExternalInverter = quda
+  UseSloppyPrecision = single
+  RefinementPrecision = half
+  MaxSolverIterations = 5000
+EndMonomial
+
+BeginMonomial NDCLOVERRATCOR
+  Timescale = 1
+  kappa =    0.1400083
+  CSW = 1.7112
+  AcceptancePrecision =  1e-23
+  ForcePrecision = 1e-19
+  StildeMin = 0.0000376
+  StildeMax = 4.7
+  Name = ndcloverratcor
+  DegreeOfRational = 10
+  ComputeEVFreq = 0
+  #UseExternalEigsolver = quda
+  2KappaMuBar = 0.03875149727400
+  2KappaEpsBar = 0.04103923289600
+  solver= cgmmsnd
+  UseExternalInverter = quda
+  UseSloppyPrecision = single
+  RefinementPrecision = half
+  MaxSolverIterations = 15000
+EndMonomial
+
+BeginIntegrator
+  Type0 = 2MN
+  Type1 = 2MN
+  Type2 = 2MN
+  Type3 = 2MN
+  IntegrationSteps0 = 1
+  IntegrationSteps1 = 1
+  IntegrationSteps2 = 1
+  IntegrationSteps3 = 1
+  tau = 0.03
+  Lambda0 =  0.19318332750
+  Lambda1 = 0.194
+  Lambda2 = 0.196
+  Lambda2 = 0.198
+  NumberOfTimescales =  4
+  MonitorForces = no
+EndIntegrator
+
+BeginOperator CLOVER
+  CSW = 1.7112
+  kappa =    0.1400083
+  2KappaMu = 0.0023801411000
+  SolverPrecision = 1e-20
+  useevenodd = yes
+  useexternalinverter = quda
+  usesloppyprecision = single
+  solver = mg
+  MaxSolverIterations = 500
+EndOperator
+
diff --git a/doc/sample-input/sample-hmc_nf211_tmclover_quda.input b/doc/sample-input/sample-hmc_nf211_tmclover_quda.input
deleted file mode 120000
index 9587e4499..000000000
--- a/doc/sample-input/sample-hmc_nf211_tmclover_quda.input
+++ /dev/null
@@ -1 +0,0 @@
-../.ci/sample-input/quda.in
\ No newline at end of file
diff --git a/doc/sample-output/hmc-ddalphaamg-tmcloverdetratio/onlinemeas.000006 b/doc/sample-output/hmc-ddalphaamg-tmcloverdetratio/onlinemeas.000006
index 4734cef39..e39a749b5 100644
--- a/doc/sample-output/hmc-ddalphaamg-tmcloverdetratio/onlinemeas.000006
+++ b/doc/sample-output/hmc-ddalphaamg-tmcloverdetratio/onlinemeas.000006
@@ -3,12 +3,12 @@
 1  1  2  4.420688e-01  4.450500e-01
 1  1  3  7.635246e-02  7.292410e-02
 1  1  4  2.547268e-02  0.000000e+00
-2  1  0  -6.505429e-01  0.000000e+00
+2  1  0  -6.505430e-01  0.000000e+00
 2  1  1  1.641175e+00  -1.692241e+00
 2  1  2  2.578280e-01  -2.469258e-01
 2  1  3  3.891519e-02  -3.979994e-02
 2  1  4  -1.066065e-03  0.000000e+00
-6  1  0  -8.320553e-02  0.000000e+00
+6  1  0  -8.320554e-02  0.000000e+00
 6  1  1  1.107951e-01  -1.188322e-01
 6  1  2  1.658398e-02  -2.028692e-02
 6  1  3  3.560260e-03  -3.057380e-03
diff --git a/doc/sample-output/hmc-ddalphaamg-tmcloverdetratio/onlinemeas.000008 b/doc/sample-output/hmc-ddalphaamg-tmcloverdetratio/onlinemeas.000008
index 5ab1cdbb4..b7d02ad69 100644
--- a/doc/sample-output/hmc-ddalphaamg-tmcloverdetratio/onlinemeas.000008
+++ b/doc/sample-output/hmc-ddalphaamg-tmcloverdetratio/onlinemeas.000008
@@ -2,14 +2,14 @@
 1  1  1  3.340338e+00  2.839302e+00
 1  1  2  6.626364e-01  4.646722e-01
 1  1  3  1.371573e-01  8.789097e-02
-1  1  4  4.330242e-02  0.000000e+00
+1  1  4  4.330241e-02  0.000000e+00
 2  1  0  1.081758e+00  0.000000e+00
 2  1  1  1.893688e+00  -1.631633e+00
 2  1  2  3.752751e-01  -2.454020e-01
-2  1  3  7.408655e-02  -4.233174e-02
-2  1  4  7.188709e-03  0.000000e+00
-6  1  0  9.491140e-01  0.000000e+00
+2  1  3  7.408654e-02  -4.233174e-02
+2  1  4  7.188707e-03  0.000000e+00
+6  1  0  9.491139e-01  0.000000e+00
 6  1  1  1.554260e-01  -1.443985e-01
-6  1  2  3.971019e-02  -2.313646e-02
-6  1  3  7.194545e-03  -3.899612e-03
-6  1  4  9.475720e-04  0.000000e+00
+6  1  2  3.971018e-02  -2.313646e-02
+6  1  3  7.194543e-03  -3.899613e-03
+6  1  4  9.475708e-04  0.000000e+00
diff --git a/doc/sample-output/hmc-ddalphaamg-tmcloverdetratio/onlinemeas.000010 b/doc/sample-output/hmc-ddalphaamg-tmcloverdetratio/onlinemeas.000010
index 228672c7b..7a5a54ece 100644
--- a/doc/sample-output/hmc-ddalphaamg-tmcloverdetratio/onlinemeas.000010
+++ b/doc/sample-output/hmc-ddalphaamg-tmcloverdetratio/onlinemeas.000010
@@ -1,15 +1,15 @@
 1  1  0  3.387886e+01  0.000000e+00
 1  1  1  2.788626e+00  2.999193e+00
-1  1  2  4.787638e-01  5.208716e-01
+1  1  2  4.787637e-01  5.208716e-01
 1  1  3  1.020219e-01  1.109699e-01
 1  1  4  4.286604e-02  0.000000e+00
-2  1  0  -9.232348e-01  0.000000e+00
+2  1  0  -9.232351e-01  0.000000e+00
 2  1  1  1.498371e+00  -1.651027e+00
 2  1  2  2.458386e-01  -2.892127e-01
-2  1  3  5.026310e-02  -5.299146e-02
-2  1  4  9.781858e-05  0.000000e+00
-6  1  0  7.982990e-01  0.000000e+00
-6  1  1  1.062982e-01  -1.420299e-01
-6  1  2  2.513887e-02  -2.502436e-02
-6  1  3  6.373549e-03  -6.618316e-03
-6  1  4  -3.151778e-04  0.000000e+00
+2  1  3  5.026309e-02  -5.299147e-02
+2  1  4  9.781639e-05  0.000000e+00
+6  1  0  7.982995e-01  0.000000e+00
+6  1  1  1.062981e-01  -1.420299e-01
+6  1  2  2.513885e-02  -2.502437e-02
+6  1  3  6.373541e-03  -6.618322e-03
+6  1  4  -3.151829e-04  0.000000e+00
diff --git a/doc/sample-output/hmc-ddalphaamg-tmcloverdetratio/onlinemeas.000012 b/doc/sample-output/hmc-ddalphaamg-tmcloverdetratio/onlinemeas.000012
index f2523546f..10e85c917 100644
--- a/doc/sample-output/hmc-ddalphaamg-tmcloverdetratio/onlinemeas.000012
+++ b/doc/sample-output/hmc-ddalphaamg-tmcloverdetratio/onlinemeas.000012
@@ -1,15 +1,15 @@
 1  1  0  3.382234e+01  0.000000e+00
 1  1  1  2.995261e+00  2.875812e+00
-1  1  2  5.063795e-01  5.100208e-01
-1  1  3  1.006529e-01  1.174076e-01
-1  1  4  4.782682e-02  0.000000e+00
-2  1  0  -2.748838e-01  0.000000e+00
+1  1  2  5.063796e-01  5.100208e-01
+1  1  3  1.006530e-01  1.174076e-01
+1  1  4  4.782684e-02  0.000000e+00
+2  1  0  -2.748842e-01  0.000000e+00
 2  1  1  1.546980e+00  -1.573491e+00
-2  1  2  2.491686e-01  -2.772142e-01
-2  1  3  4.337945e-02  -6.241266e-02
-2  1  4  -6.321273e-03  0.000000e+00
+2  1  2  2.491687e-01  -2.772142e-01
+2  1  3  4.337949e-02  -6.241267e-02
+2  1  4  -6.321260e-03  0.000000e+00
 6  1  0  5.387689e-01  0.000000e+00
 6  1  1  1.311453e-01  -1.202091e-01
 6  1  2  2.469061e-02  -3.317881e-02
-6  1  3  1.976094e-03  -8.455000e-03
-6  1  4  -1.927750e-03  0.000000e+00
+6  1  3  1.976102e-03  -8.454999e-03
+6  1  4  -1.927752e-03  0.000000e+00
diff --git a/doc/sample-output/hmc-ddalphaamg-tmcloverdetratio/onlinemeas.000014 b/doc/sample-output/hmc-ddalphaamg-tmcloverdetratio/onlinemeas.000014
index b19dceb50..cd7513818 100644
--- a/doc/sample-output/hmc-ddalphaamg-tmcloverdetratio/onlinemeas.000014
+++ b/doc/sample-output/hmc-ddalphaamg-tmcloverdetratio/onlinemeas.000014
@@ -1,15 +1,15 @@
-1  1  0  3.384332e+01  0.000000e+00
-1  1  1  3.058971e+00  2.867482e+00
-1  1  2  5.927879e-01  5.535211e-01
-1  1  3  1.463143e-01  1.372395e-01
-1  1  4  6.632476e-02  0.000000e+00
-2  1  0  6.828284e-01  0.000000e+00
-2  1  1  1.675028e+00  -1.532287e+00
-2  1  2  3.123937e-01  -3.063247e-01
-2  1  3  6.949756e-02  -6.162975e-02
-2  1  4  7.048824e-03  0.000000e+00
-6  1  0  -2.533447e-01  0.000000e+00
-6  1  1  1.515393e-01  -1.275537e-01
-6  1  2  2.346966e-02  -4.019053e-02
-6  1  3  1.433944e-03  -1.463813e-02
-6  1  4  -7.198556e-03  0.000000e+00
+1  1  0  3.384331e+01  0.000000e+00
+1  1  1  3.058970e+00  2.867480e+00
+1  1  2  5.927875e-01  5.535203e-01
+1  1  3  1.463141e-01  1.372392e-01
+1  1  4  6.632463e-02  0.000000e+00
+2  1  0  6.828323e-01  0.000000e+00
+2  1  1  1.675028e+00  -1.532286e+00
+2  1  2  3.123934e-01  -3.063241e-01
+2  1  3  6.949739e-02  -6.162966e-02
+2  1  4  7.048780e-03  0.000000e+00
+6  1  0  -2.533367e-01  0.000000e+00
+6  1  1  1.515395e-01  -1.275537e-01
+6  1  2  2.346976e-02  -4.019037e-02
+6  1  3  1.433995e-03  -1.463802e-02
+6  1  4  -7.198496e-03  0.000000e+00
diff --git a/doc/sample-output/hmc-ddalphaamg-tmcloverdetratio/onlinemeas.000016 b/doc/sample-output/hmc-ddalphaamg-tmcloverdetratio/onlinemeas.000016
index 3c9999abe..c51999157 100644
--- a/doc/sample-output/hmc-ddalphaamg-tmcloverdetratio/onlinemeas.000016
+++ b/doc/sample-output/hmc-ddalphaamg-tmcloverdetratio/onlinemeas.000016
@@ -1,15 +1,15 @@
-1  1  0  3.350499e+01  0.000000e+00
-1  1  1  2.932334e+00  2.982039e+00
-1  1  2  5.434805e-01  5.351988e-01
-1  1  3  1.330346e-01  1.288597e-01
-1  1  4  6.230389e-02  0.000000e+00
-2  1  0  -5.539881e-01  0.000000e+00
-2  1  1  1.574741e+00  -1.569952e+00
-2  1  2  2.688100e-01  -2.687716e-01
-2  1  3  5.300017e-02  -5.329829e-02
-2  1  4  -3.085424e-03  0.000000e+00
-6  1  0  -7.089285e-01  0.000000e+00
-6  1  1  1.479719e-01  -1.378143e-01
-6  1  2  3.012575e-02  -2.458313e-02
-6  1  3  1.285082e-02  -2.411722e-03
-6  1  4  4.275350e-03  0.000000e+00
+1  1  0  3.350500e+01  0.000000e+00
+1  1  1  2.932339e+00  2.982047e+00
+1  1  2  5.434813e-01  5.352003e-01
+1  1  3  1.330350e-01  1.288600e-01
+1  1  4  6.230422e-02  0.000000e+00
+2  1  0  -5.539986e-01  0.000000e+00
+2  1  1  1.574744e+00  -1.569958e+00
+2  1  2  2.688097e-01  -2.687722e-01
+2  1  3  5.299989e-02  -5.329831e-02
+2  1  4  -3.085467e-03  0.000000e+00
+6  1  0  -7.089245e-01  0.000000e+00
+6  1  1  1.479736e-01  -1.378157e-01
+6  1  2  3.012610e-02  -2.458331e-02
+6  1  3  1.285099e-02  -2.411671e-03
+6  1  4  4.275483e-03  0.000000e+00
diff --git a/doc/sample-output/hmc-ddalphaamg-tmcloverdetratio/onlinemeas.000018 b/doc/sample-output/hmc-ddalphaamg-tmcloverdetratio/onlinemeas.000018
index 82a17be37..296f3a58a 100644
--- a/doc/sample-output/hmc-ddalphaamg-tmcloverdetratio/onlinemeas.000018
+++ b/doc/sample-output/hmc-ddalphaamg-tmcloverdetratio/onlinemeas.000018
@@ -1,15 +1,15 @@
-1  1  0  3.327833e+01  0.000000e+00
-1  1  1  2.914884e+00  2.887613e+00
-1  1  2  5.422550e-01  5.327642e-01
-1  1  3  1.342023e-01  1.336877e-01
-1  1  4  6.497096e-02  0.000000e+00
-2  1  0  2.431595e-01  0.000000e+00
-2  1  1  1.491172e+00  -1.524832e+00
-2  1  2  2.682825e-01  -2.606945e-01
-2  1  3  5.792593e-02  -5.123251e-02
-2  1  4  4.446912e-03  0.000000e+00
-6  1  0  -6.232563e-01  0.000000e+00
-6  1  1  1.495484e-01  -1.463705e-01
-6  1  2  3.212356e-02  -2.883108e-02
-6  1  3  1.268960e-02  -2.096584e-03
-6  1  4  5.938212e-03  0.000000e+00
+1  1  0  3.327825e+01  0.000000e+00
+1  1  1  2.914904e+00  2.887607e+00
+1  1  2  5.422698e-01  5.327645e-01
+1  1  3  1.342080e-01  1.336886e-01
+1  1  4  6.497243e-02  0.000000e+00
+2  1  0  2.431385e-01  0.000000e+00
+2  1  1  1.491198e+00  -1.524830e+00
+2  1  2  2.682909e-01  -2.606948e-01
+2  1  3  5.792842e-02  -5.123145e-02
+2  1  4  4.448118e-03  0.000000e+00
+6  1  0  -6.233064e-01  0.000000e+00
+6  1  1  1.495564e-01  -1.463691e-01
+6  1  2  3.212387e-02  -2.883197e-02
+6  1  3  1.268921e-02  -2.097509e-03
+6  1  4  5.937257e-03  0.000000e+00
diff --git a/doc/sample-output/hmc-ddalphaamg-tmcloverdetratio/output.data b/doc/sample-output/hmc-ddalphaamg-tmcloverdetratio/output.data
index 420c5bd08..82ded2a49 100644
--- a/doc/sample-output/hmc-ddalphaamg-tmcloverdetratio/output.data
+++ b/doc/sample-output/hmc-ddalphaamg-tmcloverdetratio/output.data
@@ -1,20 +1,20 @@
-00000000 0.291802998166 0.150813159478 8.600084e-01 11 216 17 116 1 1.695511e+01
-00000001 0.367103026382 0.123429505793 8.838839e-01 6 100 10 61 1 8.344895e+00
-00000002 0.411116783387 0.045573902534 9.554490e-01 6 121 11 65 1 9.167332e+00
-00000003 0.437627142550 0.260037508415 7.710227e-01 6 125 12 65 1 9.248657e+00
-00000004 0.461210881231 -0.087496153400 1.091438e+00 12 250 18 130 1 1.827640e+01
-00000005 0.477370074683 0.331285967965 7.179998e-01 7 125 12 69 1 9.230436e+00
-00000006 0.490500781082 0.211017029075 8.097603e-01 7 137 13 78 1 9.551435e+00
-00000007 0.499497899365 -0.271498625284 1.311929e+00 7 145 13 78 1 9.687941e+00
-00000008 0.507695934800 -0.453122057246 1.573216e+00 14 300 21 156 1 1.978807e+01
-00000009 0.516327346026 -0.140664545065 1.151038e+00 7 150 14 78 1 9.840636e+00
-00000010 0.521231381097 0.051098995079 9.501846e-01 8 150 14 78 1 9.848677e+00
-00000011 0.529386494343 -0.099410380527 1.104519e+00 8 150 15 78 1 9.740137e+00
-00000012 0.529386494343 0.376606119881 6.861863e-01 16 300 22 176 0 2.002176e+01
-00000013 0.531891500121 -0.145672763511 1.156818e+00 8 150 15 85 1 1.024416e+01
-00000014 0.536421758394 -0.180197251221 1.197454e+00 8 150 15 91 1 1.036149e+01
-00000015 0.540314511899 -0.014164886897 1.014266e+00 8 151 16 91 1 1.031493e+01
-00000016 0.545384396635 0.456731404050 6.333504e-01 16 346 24 182 1 2.143623e+01
-00000017 0.549339344577 -0.381480879502 1.464452e+00 8 175 16 91 1 1.008381e+01
-00000018 0.548855930680 0.308666362994 7.344258e-01 8 175 16 91 1 1.027304e+01
-00000019 0.551922552298 -0.257831496596 1.294121e+00 9 175 16 91 1 1.029375e+01
+00000000 0.291802998166 0.150813205437 8.600083e-01 13 310 20 148 1 1.796150e+01
+00000001 0.367103026382 0.123429431545 8.838840e-01 6 150 12 77 1 9.083342e+00
+00000002 0.411116783383 0.045573922136 9.554490e-01 7 162 12 78 1 9.169988e+00
+00000003 0.437627142549 0.260037513702 7.710227e-01 7 175 14 78 1 9.476499e+00
+00000004 0.461210881240 -0.087496158702 1.091438e+00 14 350 21 176 1 2.006502e+01
+00000005 0.477370074736 0.331285968350 7.179998e-01 8 175 14 91 1 9.868406e+00
+00000006 0.490500781150 0.211016940603 8.097603e-01 8 197 15 91 1 1.009657e+01
+00000007 0.499497899357 -0.271498597692 1.311929e+00 8 200 16 91 1 1.018464e+01
+00000008 0.507695934126 -0.453122351633 1.573217e+00 16 400 24 200 1 2.089432e+01
+00000009 0.516327346842 -0.140664533460 1.151038e+00 9 200 16 104 1 1.039694e+01
+00000010 0.521231381453 0.051098763556 9.501848e-01 9 219 17 104 1 1.061809e+01
+00000011 0.529386496220 -0.099409891257 1.104519e+00 9 225 17 104 1 1.063195e+01
+00000012 0.529386496220 0.376609569126 6.861839e-01 18 450 26 208 0 2.172998e+01
+00000013 0.531891491082 -0.145673731306 1.156819e+00 9 225 18 104 1 1.213700e+01
+00000014 0.536421760877 -0.180205391949 1.197463e+00 9 225 18 104 1 1.241186e+01
+00000015 0.540314479338 -0.014163550569 1.014264e+00 9 225 18 113 1 1.196440e+01
+00000016 0.545384434240 0.456736123271 6.333474e-01 19 450 27 234 1 2.358034e+01
+00000017 0.549339288645 -0.381464882069 1.464428e+00 10 228 19 117 1 1.220460e+01
+00000018 0.548856488255 0.308746327179 7.343670e-01 10 247 19 117 1 1.213587e+01
+00000019 0.551923185293 -0.257738529083 1.294000e+00 10 244 19 117 1 1.161498e+01
diff --git a/doc/sample-output/hmc-ddalphaamg-tmcloverdetratio/return_check.data b/doc/sample-output/hmc-ddalphaamg-tmcloverdetratio/return_check.data
index 9baf9a7c7..8bb8c4b85 100644
--- a/doc/sample-output/hmc-ddalphaamg-tmcloverdetratio/return_check.data
+++ b/doc/sample-output/hmc-ddalphaamg-tmcloverdetratio/return_check.data
@@ -1,5 +1,5 @@
-00000000 ddh = -2.6085e-08 ddh/dh = -1.7296e-07 ddh/H = -1.9734e-13 ddU= 2.6765e-11
-00000004 ddh = 7.9704e-09 ddh/dh = -9.1094e-08 ddh/H = 4.5374e-14 ddU= 1.0448e-11
-00000008 ddh = 6.6077e-08 ddh/dh = -1.4583e-07 ddh/H = 3.5941e-13 ddU= 3.3593e-11
-00000012 ddh = 2.1891e-08 ddh/dh = 5.8126e-08 ddh/H = 1.1640e-13 ddU= 4.4380e-11
-00000016 ddh = 1.7062e-07 ddh/dh = 3.7356e-07 ddh/H = 9.0165e-13 ddU= 1.1339e-10
+00000000 ddh = 2.5750e-11 ddh/dh = 1.7074e-10 ddh/H = 1.9480e-16 ddU= 3.9489e-15
+00000004 ddh = 1.4438e-11 ddh/dh = -1.6502e-10 ddh/H = 8.2195e-17 ddU= 4.0239e-15
+00000008 ddh = -1.1642e-10 ddh/dh = 2.5692e-10 ddh/H = -6.3322e-16 ddU= 6.5245e-15
+00000012 ddh = 1.2739e-10 ddh/dh = 3.3824e-10 ddh/H = 6.7734e-16 ddU= 5.1587e-15
+00000016 ddh = 3.2742e-11 ddh/dh = 7.1686e-11 ddh/H = 1.7303e-16 ddU= 9.5886e-15
diff --git a/doc/sample-output/hmc_nf211_tmclover_quda b/doc/sample-output/hmc_nf211_tmclover_quda
deleted file mode 120000
index c4ac0d4ff..000000000
--- a/doc/sample-output/hmc_nf211_tmclover_quda
+++ /dev/null
@@ -1 +0,0 @@
-../.ci/sample-output/cscs-test
\ No newline at end of file
diff --git a/include/tmlqcd_config_internal.h.in b/include/tmlqcd_config_internal.h.in
deleted file mode 100644
index 037ad84a5..000000000
--- a/include/tmlqcd_config_internal.h.in
+++ /dev/null
@@ -1,180 +0,0 @@
-/* Hand-written "config.h.in". Note that it's not this file which should be
- * included, but rather the "tmlqcd_config.h" which renames a few of the defines
- * into static const variables, following the convention used by the USQCD build
- * systems, for example. */
-
-/* We are on a CRAY */
-#undef CRAY
-
-/* lapack available */
-#undef HAVE_LAPACK
-
-/* Define to 1 if you have the `lime' library (-llime). */
-#undef HAVE_LIBLIME
-
-/* Define to 1 if you have the `lemon' library (-llemon). */
-#undef HAVE_LIBLEMON
-
-/* 1 if clock_gettime is available for use in benchmark */
-#undef HAVE_CLOCK_GETTIME
-
-/* Compile with MPI support */
-#undef TM_USE_MPI
-
-/* Compile with OpenMP support */
-#undef TM_USE_OMP
-
-/* Compile with FFTW support */
-#undef HAVE_FFTW
-
-/* Fortran has not extra _ */
-#undef NOF77_
-
-/* Define to the address where bug reports for this package should be sent. */
-#undef PACKAGE_BUGREPORT
-
-/* Define to the full name of this package. */
-#undef PACKAGE_NAME
-
-/* Define to the full name and version of this package. */
-#undef PACKAGE_STRING
-
-/* Define to the one symbol short name of this package. */
-#undef PACKAGE_TARNAME
-
-/* Define to the version of this package. */
-#undef PACKAGE_VERSION
-
-/* X parallelisation */
-#undef PARALLELX
-
-/* XY parallelisation */
-#undef PARALLELXY
-
-/* XYZ parallelisation */
-#undef PARALLELXYZ
-
-/* One dimensional parallelisation */
-#undef PARALLELT
-
-/* Two dimensional parallelisation */
-#undef PARALLELXT
-
-/* Three dimensional parallelisation */
-#undef PARALLELXYT
-
-/* Four dimensional parallelisation */
-#undef PARALLELXYZT
-
-/* Fixed volume at compiletime */
-#undef FIXEDVOLUME
-
-/* Define to 1 if fseeko (and presumably ftello) exists and is declared. */
-#undef HAVE_FSEEKO
-
-/* Alignment for arrays -- necessary for SSE and automated vectorization */
-#undef ALIGN_BASE
-
-/* Alignment compiler hint macro */
-#undef ALIGN
-
-/* Alignment for 32bit arrays -- necessary for SSE and automated vectorization */
-#undef ALIGN_BASE32
-
-/* Alignment of 32bit fields, compiler hint macro */
-#undef ALIGN32
-
-/* Define to 1 if `lex' declares `yytext' as a `char *' by default, not a
-   `char[]'. */
-#undef YYTEXT_POINTER
-
-/* Number of bits in a file offset, on hosts where this is settable. */
-#undef _FILE_OFFSET_BITS
-
-/* Construct an extra copy of the gauge fields */
-#undef _GAUGE_COPY
-
-/* Define to 1 to make fseeko visible on some hosts (e.g. glibc 2.2). */
-#undef _LARGEFILE_SOURCE
-
-/* Define for large files, on AIX-style hosts. */
-#undef _LARGE_FILES
-
-/* Use even/odd geometry in the gauge fields */
-#undef _NEW_GEOMETRY
-
-/* x86 64 Bit architecture */
-#undef _x86_64
-
-/* Define to empty if `const' does not conform to ANSI C. */
-#undef const
-
-/* Define to `__inline__' or `__inline' if that's what the C compiler
-   calls it, or to nothing if 'inline' is not supported under any name.  */
-#ifndef __cplusplus
-#undef inline
-#endif
-
-/* Define to `long' if <sys/types.h> does not define. */
-#undef off_t
-
-/* Define to `unsigned' if <sys/types.h> does not define. */
-#undef size_t
-
-/* Define to 1 if you have the <stdint.h> header file. */
-#undef HAVE_STDINT_H
-
-/* Define to 1 if you have the <sys/types.h> header file. */
-#undef HAVE_SYS_TYPES_H
-
-/* Define to 1 if the system has the type `uint16_t'. */
-#undef HAVE_UINT16_T
-
-/* Define to 1 if the system has the type `uint32_t'. */
-#undef HAVE_UINT32_T
-
-/* Define to 1 if the system has the type `uint64_t'. */
-#undef HAVE_UINT64_T
-
-/* Define to 1 if you have the <unistd.h> header file. */
-#undef HAVE_UNISTD_H
-
-/* Define to 1 if Dirac operator with halfspinor should be used */
-#undef _USE_HALFSPINOR
-
-/* Define to 1 if shmem API should be used */
-#undef _USE_SHMEM
-
-/* Define to 1 if KOJAK instrumentalisation should be done*/
-#undef _KOJAK_INST
-
-/* Define to equivalent of C99 restrict keyword, or to nothing if this is not
-   supported. Do not define if restrict is supported directly. */
-#undef restrict
-
-/* Define to 1 if persistent MPI calls for halfspinor should be used */
-#undef _PERSISTENT
-
-/* Define to 1 if non-blocking MPI calls for spinor and gauge should be used */
-#undef _NON_BLOCKING
-
-/* Define to 1 if you have the `quda' library (-lquda). */
-#undef HAVE_LIBQUDA
-
-/* Using QUDA GPU */
-#undef TM_USE_QUDA
-
-/* Using experimental QUDA version */
-#undef TM_QUDA_EXPERIMENTAL
-
-/* Using QUDA fermionic forces */
-#undef TM_QUDA_FERMIONIC_FORCES
-
-/* Using DDalphaAMG */
-#undef DDalphaAMG
-
-/* Using QPHIX */
-#undef TM_USE_QPHIX
-
-/* Structure of Array length to use with QPhiX */
-#undef QPHIX_SOALEN
diff --git a/install-sh b/install-sh
deleted file mode 100644
index e69de29bb..000000000
diff --git a/io/Makefile.in b/io/Makefile.in
deleted file mode 100644
index 41b5b78ce..000000000
--- a/io/Makefile.in
+++ /dev/null
@@ -1,135 +0,0 @@
-
-srcdir = @srcdir@
-top_builddir =  @top_builddir@
-abs_top_builddir = @abs_top_builddir@
-top_srcdir = @top_srcdir@
-abs_top_srcdir = @abs_top_srcdir@
-subdir = io
-builddir = @builddir@
-
-CFLAGS = @CFLAGS@
-DEPFLAGS = @DEPFLAGS@
-LDFLAGS = @LDFLAGS@
-DEFS = @DEFS@
-OPTARGS = @OPTARGS@
-
-AR = @AR@
-RANLIB = @RANLIB@
-CC = @CC@
-CCDEP = @CCDEP@
-CCLD = $(CC)
-LINK = $(CCLD) $(CFLAGS) $(LDFLAGS) ${OPTARGS} -o $@
-LEX = @LEX@
-AUTOCONF = @AUTOCONF@
-DEFS = @DEFS@
-
-LEMON_AVAILABLE = @LEMON_AVAILABLE@
-
-INCLUDES = @INCLUDES@
-LDADD =
-COMPILE = ${CC} ${DEFS} ${INCLUDES} ${CFLAGS} ${OPTARGS}
-
-LIBRARIES = libio
-
-libio_TARGETS = utils_engineering \
-		utils_parse_checksum_xml \
-		utils_write_message \
-		utils_read_message \
-		gauge_write_binary \
-		gauge_read_binary \
-		gauge_read \
-		gauge_write \
-		utils_write_xlf \
-		utils_write_xlf_xml \
-		utils_write_ildg_format \
-		utils_write_header \
-		utils_write_checksum \
-		utils_write_inverter_info \
-		utils_kill_with_error \
-		utils_construct_reader \
-		utils_destruct_reader \
-		utils_construct_writer \
-		utils_destruct_writer \
-		utils_close_writer_record \
-		utils_close_reader_record \
-		utils_write_first_message \
-		utils_parse_propagator_type \
-		utils_parse_ildgformat_xml \
-		params_construct_ildgFormat \
-		params_construct_propagatorFormat \
-		params_construct_sourceFormat \
-		params_construct_xlfInfo \
-		params_construct_InverterInfo \
-		spinor_write \
-		spinor_read \
-		spinor_write_binary \
-		spinor_read_binary \
-		spinor_write_info \
-		spinor_write_source_format \
-		spinor_write_propagator_format \
-		spinor_write_propagator_type \
-		utils DML_crc32 dml \
-		eospinor_write \
-		eospinor_read \
-		io_cm \
-		deri_write_stdout spinor_write_stdout sw_write_stdout \
-		gauge_write_luscher_binary
-
-libio_OBJECTS = $(addsuffix .o, ${libio_TARGETS})
-
-# default rule
-
-all: Makefile dep libio.a
-
-# rules for debugging
-debug all-debug: CFLAGS := $(CFLAGS) @DEBUG_FLAG@
-debug all-debug: all
-
-# rules for profiling information
-profile all-profile: CFLAGS := $(filter-out -fomit-frame-pointer,${CFLAGS}) @PROFILE_FLAG@
-profile all-profile: all
-
-
-#include dep rules
-
-
--include $(addsuffix .d,${libio_TARGETS})
-
-include ${top_srcdir}/Makefile.global
-
-# rule to compile objects
-
-%.o: ${srcdir}/%.c %.d Makefile ${abs_top_builddir}/include/tmlqcd_config_internal.h
-	$(COMPILE) -c $<
-
-
-# rule to make libio
-libio.a: ${libio_OBJECTS} Makefile
-	@rm -f libio.a
-	@${AR} cru libio.a $(libio_OBJECTS)
-	@$(RANLIB) libio.a
-	@cp libio.a ${top_builddir}/lib/libio.a
-
-# rule to generate .d files
-
-$(addsuffix .d,$(libio_TARGETS)): %.d: ${srcdir}/%.c Makefile
-	@$(CCDEP) ${DEFS} ${DEPFLAGS} ${INCLUDES} $< > $@
-
-# rule to make dependencies
-
-dep: ${addsuffix .d, ${libio_TARGETS}}
-
-# rules to clean
-
-compile-clean: Makefile
-	rm -f ${$(addsuffix _OBJECTS, ${LIBRARIES})} *.d
-
-clean: compile-clean
-	rm -f $(addsuffix .a, ${LIBRARIES})
-	rm -f ../lib/libio.a
-
-distclean: clean
-	rm -f Makefile
-
-
-.PHONY: all dep clean compile-clean distclean debug all-debug profile all-profile
diff --git a/operator/hopping.h b/operator/hopping.h
deleted file mode 100644
index f3b948c49..000000000
--- a/operator/hopping.h
+++ /dev/null
@@ -1,152 +0,0 @@
-/**********************************************************************
- *
- * Copyright (C) 2012 Carsten Urbach
- *
- * BG and halfspinor versions (C) 2007, 2008 Carsten Urbach
- *
- * This file is based on an implementation of the Dirac operator
- * written by Martin Luescher, modified by Martin Hasenbusch in 2002
- * and modified and extended by Carsten Urbach from 2003-2008
- *
- * This file is part of tmLQCD.
- *
- * tmLQCD is free software: you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation, either version 3 of the License, or
- * (at your option) any later version.
- *
- * tmLQCD is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
- *
- **********************************************************************/
-
-#ifndef _HOPPING_H
-#define _HOPPING_H
-
-#define _declare_regs()      \
-  su3_vector ALIGN psi, chi; \
-  spinor ALIGN temp;
-
-#define _hop_t_p()                      \
-  _vector_add(psi, sp->s0, sp->s2);     \
-  _su3_multiply(chi, (*up), psi);       \
-  _complex_times_vector(psi, ka0, chi); \
-  _vector_assign(temp.s0, psi);         \
-  _vector_assign(temp.s2, psi);         \
-  _vector_add(psi, sp->s1, sp->s3);     \
-  _su3_multiply(chi, (*up), psi);       \
-  _complex_times_vector(psi, ka0, chi); \
-  _vector_assign(temp.s1, psi);         \
-  _vector_assign(temp.s3, psi);
-
-#define _hop_t_m()                         \
-  _vector_sub(psi, sm->s0, sm->s2);        \
-  _su3_inverse_multiply(chi, (*um), psi);  \
-  _complexcjg_times_vector(psi, ka0, chi); \
-  _vector_add_assign(temp.s0, psi);        \
-  _vector_sub_assign(temp.s2, psi);        \
-  _vector_sub(psi, sm->s1, sm->s3);        \
-  _su3_inverse_multiply(chi, (*um), psi);  \
-  _complexcjg_times_vector(psi, ka0, chi); \
-  _vector_add_assign(temp.s1, psi);        \
-  _vector_sub_assign(temp.s3, psi);
-
-#define _hop_x_p()                      \
-  _vector_i_add(psi, sp->s0, sp->s3);   \
-  _su3_multiply(chi, (*up), psi);       \
-  _complex_times_vector(psi, ka1, chi); \
-  _vector_add_assign(temp.s0, psi);     \
-  _vector_i_sub_assign(temp.s3, psi);   \
-  _vector_i_add(psi, sp->s1, sp->s2);   \
-  _su3_multiply(chi, (*up), psi);       \
-  _complex_times_vector(psi, ka1, chi); \
-  _vector_add_assign(temp.s1, psi);     \
-  _vector_i_sub_assign(temp.s2, psi);
-
-#define _hop_x_m()                         \
-  _vector_i_sub(psi, sm->s0, sm->s3);      \
-  _su3_inverse_multiply(chi, (*um), psi);  \
-  _complexcjg_times_vector(psi, ka1, chi); \
-  _vector_add_assign(temp.s0, psi);        \
-  _vector_i_add_assign(temp.s3, psi);      \
-  _vector_i_sub(psi, sm->s1, sm->s2);      \
-  _su3_inverse_multiply(chi, (*um), psi);  \
-  _complexcjg_times_vector(psi, ka1, chi); \
-  _vector_add_assign(temp.s1, psi);        \
-  _vector_i_add_assign(temp.s2, psi);
-
-#define _hop_y_p()                      \
-  _vector_add(psi, sp->s0, sp->s3);     \
-  _su3_multiply(chi, (*up), psi);       \
-  _complex_times_vector(psi, ka2, chi); \
-  _vector_add_assign(temp.s0, psi);     \
-  _vector_add_assign(temp.s3, psi);     \
-  _vector_sub(psi, sp->s1, sp->s2);     \
-  _su3_multiply(chi, (*up), psi);       \
-  _complex_times_vector(psi, ka2, chi); \
-  _vector_add_assign(temp.s1, psi);     \
-  _vector_sub_assign(temp.s2, psi);
-
-#define _hop_y_m()                         \
-  _vector_sub(psi, sm->s0, sm->s3);        \
-  _su3_inverse_multiply(chi, (*um), psi);  \
-  _complexcjg_times_vector(psi, ka2, chi); \
-  _vector_add_assign(temp.s0, psi);        \
-  _vector_sub_assign(temp.s3, psi);        \
-  _vector_add(psi, sm->s1, sm->s2);        \
-  _su3_inverse_multiply(chi, (*um), psi);  \
-  _complexcjg_times_vector(psi, ka2, chi); \
-  _vector_add_assign(temp.s1, psi);        \
-  _vector_add_assign(temp.s2, psi);
-
-#define _hop_z_p()                      \
-  _vector_i_add(psi, sp->s0, sp->s2);   \
-  _su3_multiply(chi, (*up), psi);       \
-  _complex_times_vector(psi, ka3, chi); \
-  _vector_add_assign(temp.s0, psi);     \
-  _vector_i_sub_assign(temp.s2, psi);   \
-  _vector_i_sub(psi, sp->s1, sp->s3);   \
-  _su3_multiply(chi, (*up), psi);       \
-  _complex_times_vector(psi, ka3, chi); \
-  _vector_add_assign(temp.s1, psi);     \
-  _vector_i_add_assign(temp.s3, psi);
-
-#define _hop_z_m()                         \
-  _vector_i_sub(psi, sm->s0, sm->s2);      \
-  _su3_inverse_multiply(chi, (*um), psi);  \
-  _complexcjg_times_vector(psi, ka3, chi); \
-  _vector_add_assign(temp.s0, psi);        \
-  _vector_i_add_assign(temp.s2, psi);      \
-  _vector_i_add(psi, sm->s1, sm->s3);      \
-  _su3_inverse_multiply(chi, (*um), psi);  \
-  _complexcjg_times_vector(psi, ka3, chi); \
-  _vector_add_assign(temp.s1, psi);        \
-  _vector_i_sub_assign(temp.s3, psi);
-
-#define _hop_mul_g5_cmplx_and_store()                 \
-  _complex_times_vector(rn->s0, cfactor, temp.s0);    \
-  _complex_times_vector(rn->s1, cfactor, temp.s1);    \
-  _complexcjg_times_vector(rn->s2, cfactor, temp.s2); \
-  _complexcjg_times_vector(rn->s3, cfactor, temp.s3);
-
-#define _g5_cmplx_sub_hop_and_g5store()           \
-  _complex_times_vector(psi, cfactor, pn->s0);    \
-  _vector_sub(rn->s0, psi, temp.s0);              \
-  _complex_times_vector(chi, cfactor, pn->s1);    \
-  _vector_sub(rn->s1, chi, temp.s1);              \
-  _complexcjg_times_vector(psi, cfactor, pn->s2); \
-  _vector_sub(rn->s2, temp.s2, psi);              \
-  _complexcjg_times_vector(chi, cfactor, pn->s3); \
-  _vector_sub(rn->s3, temp.s3, chi);
-
-#define _store_res()               \
-  _vector_assign(rn->s0, temp.s0); \
-  _vector_assign(rn->s1, temp.s1); \
-  _vector_assign(rn->s2, temp.s2); \
-  _vector_assign(rn->s3, temp.s3);
-#endif
diff --git a/profiling/hmc_mk2/logs/example_log.out b/profiling/hmc_mk2/logs/example_log.out
index faf4874bf..642963b16 100644
--- a/profiling/hmc_mk2/logs/example_log.out
+++ b/profiling/hmc_mk2/logs/example_log.out
@@ -270,8 +270,8 @@ operator 0 parsed line 229
 This is the hmc code for twisted mass Wilson QCD
 
 Version 5.2.0, commit 51cf008a89944ecdd9345cdb62aaf0a203a7f306
-# The code is compiled with -D_GAUGE_COPY
-# The code is compiled with -D_USE_HALFSPINOR
+# The code is compiled with -DTM_USE_GAUGE_COPY
+# The code is compiled with -DTM_USE_HALFSPINOR
 # the code is compiled for non-blocking MPI calls (spinor and gauge)
 # the code is compiled with openMP support
 # Non-Schroedinger (anti-periodic, periodic or twisted) boundary conditions are used
diff --git a/solver/Makefile.in b/solver/Makefile.in
deleted file mode 100644
index 584428871..000000000
--- a/solver/Makefile.in
+++ /dev/null
@@ -1,106 +0,0 @@
-
-srcdir = @srcdir@
-top_builddir =  @top_builddir@
-abs_top_builddir = @abs_top_builddir@
-top_srcdir = @top_srcdir@
-abs_top_srcdir = @abs_top_srcdir@
-subdir = solver
-builddir = @builddir@
-
-CFLAGS = @CFLAGS@ @SOLVEROUT@
-DEPFLAGS = @DEPFLAGS@
-LDFLAGS = @LDFLAGS@
-DEFS = @DEFS@
-OPTARGS = @OPTARGS@
-
-AR = @AR@
-RANLIB = @RANLIB@
-CC = @CC@
-CCDEP = @CCDEP@
-CCLD = $(CC)
-LINK = $(CCLD) $(CFLAGS) $(LDFLAGS) ${OPTARGS} -o $@
-LEX = @LEX@
-AUTOCONF = @AUTOCONF@
-DEFS = @DEFS@
-
-INCLUDES = @INCLUDES@
-LDADD =
-#COMPILE = ${CC} ${DEFS} $(INCLUDES) ${CFLAGS}
-COMPILE = ${CC} ${DEFS} ${INCLUDES} ${CFLAGS} ${OPTARGS}
-
-LIBRARIES = libsolver
-libsolver_TARGETS = bicgstab_complex gmres incr_eigcg eigcg restart_X ortho \
-	            cgs_real cg_her mr chrono_guess \
-	            bicgstabell bicgstab2 eigenvalues fgmres \
-	            gcr gcr4complex diagonalise_general_matrix \
-	            cgne4complex mr4complex fgmres4complex \
-	            quicksort gmres_dr lu_solve jdher Msap \
-                    jdher_bi gram-schmidt eigenvalues_bi \
-                    bicgstab_complex_bi cg_her_bi pcg_her \
-                    sub_low_ev cg_her_nd poly_precon \
-                    generate_dfl_subspace dfl_projector \
-                    cg_mms_tm cg_mms_tm_nd mixed_cg_mms_tm_nd \
-                    solver_field sumr mixed_cg_her index_jd \
-		    rg_mixed_cg_her rg_mixed_cg_her_nd \
-                    dirac_operator_eigenvectors \
-		    mcr cr mcr4complex bicg_complex monomial_solve \
-		    solver_types init_guess
-
-libsolver_OBJECTS = $(addsuffix .o, ${libsolver_TARGETS})
-
-# default rule
-
-all: Makefile dep libsolver.a
-
-# rules for debugging
-debug all-debug: CFLAGS := $(CFLAGS) @DEBUG_FLAG@
-debug all-debug: all
-
-# rules for profiling information
-profile all-profile: CFLAGS := $(filter-out -fomit-frame-pointer,${CFLAGS}) @PROFILE_FLAG@
-profile all-profile: all
-
-
-#include dep rules
-
--include $(addsuffix .d,${libsolver_TARGETS})
-
-include ${top_srcdir}/Makefile.global
-
-# rule to compile objects
-
-%.o: ${srcdir}/%.c %.d Makefile ${abs_top_builddir}/include/tmlqcd_config_internal.h
-	$(COMPILE) -c $<
-
-
-# rule to make liblinalg
-
-libsolver.a: ${libsolver_OBJECTS} Makefile
-	@rm -f libsolver.a
-	@${AR} cru libsolver.a $(libsolver_OBJECTS)
-	@$(RANLIB) libsolver.a
-	@cp libsolver.a ${top_builddir}/lib/libsolver.a
-
-# rule to generate .d files
-
-$(addsuffix .d,$(libsolver_TARGETS)): %.d: ${srcdir}/%.c Makefile
-	@$(CCDEP) ${DEFS} ${DEPFLAGS} ${INCLUDES} $< > $@
-
-# rule to make dependencies
-
-dep: ${addsuffix .d, ${libsolver_TARGETS}}
-
-# rules to clean
-
-compile-clean: Makefile
-	rm -f ${$(addsuffix _OBJECTS, ${LIBRARIES})} *.d
-
-clean: compile-clean 
-	rm -f $(addsuffix .a, ${LIBRARIES})
-	rm -f ../lib/libsolver.a
-
-distclean: clean
-	rm -f Makefile
-
-
-.PHONY: all dep clean compile-clean distclean debug all-debug profile all-profile
diff --git a/src/bin/CMakeLists.txt b/src/bin/CMakeLists.txt
new file mode 100644
index 000000000..f641f8b18
--- /dev/null
+++ b/src/bin/CMakeLists.txt
@@ -0,0 +1,46 @@
+list(APPEND tmlqcd_prog "invert;benchmark;deriv_mg_tune;hmc_tm;offline_measurement")
+
+include_directories(
+  $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}>
+  $<BUILD_INTERFACE:${CMAKE_SOURCE_DIR}/src/lib/include>
+  $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
+  $<$<BOOL:${TM_USE_LEMON}>:${TM_LEMON_INCLUDE_DIRS}>
+  ${TM_CLIME_INCLUDE_DIRS})
+
+foreach(_prog ${tmlqcd_prog})
+  add_executable(${_prog} "${_prog}.c")
+
+  target_link_libraries(${_prog} PUBLIC tmlqcd)
+  set_target_properties(
+    ${_prog}
+    PROPERTIES IMPORTED_LINK_INTERFACE_LANGUAGES "CXX"
+               POSITION_INDEPENDENT_CODE ON
+               LINKER_LANGUAGE "CXX")
+endforeach()
+
+if(TM_ENABLE_TESTS)
+  list(
+    APPEND
+    tmlqcd_test_prog
+    "check_locallity.c;hopping_test.cscalar_prod_r_test.c;test_eigenvalues.c;test_lemon.c"
+  )
+  if(TM_USE_LEMON)
+    list(APPEND tmlqcd_test_prog test_lemon.c)
+  endif()
+  if(TN_USE_QPHIX)
+    list(APPEND tmlqcd_test_prog qphix_test_Dslash.c)
+  endif()
+
+  foreach(_prog ${tmlqcd_test_prog})
+    add_executable(${_prog} "${CMAKE_SOURCE_DIR}/src/bin/tests/${_prog}.c")
+
+    target_link_libraries(${_prog} PUBLIC hmc)
+    set_target_properties(
+      ${_prog}
+      PROPERTIES IMPORTED_LINK_INTERFACE_LANGUAGES "CXX"
+                 POSITION_INDEPENDENT_CODE ON
+                 LINKER_LANGUAGE "CXX")
+  endforeach()
+endif()
+
+install(TARGETS ${tmlqcd_prog} RUNTIME DESTINATION "${CMAKE_INSTALL_BINDIR}")
diff --git a/benchmark.c b/src/bin/benchmark.c
similarity index 94%
rename from benchmark.c
rename to src/bin/benchmark.c
index 3dd70a86b..b2f4ee68c 100644
--- a/benchmark.c
+++ b/src/bin/benchmark.c
@@ -33,7 +33,7 @@
 #include <time.h>
 #ifdef TM_USE_MPI
 #include <mpi.h>
-#ifdef HAVE_LIBLEMON
+#ifdef TM_USE_LEMON
 #include <io/gauge.h>
 #include <io/params.h>
 #endif
@@ -61,19 +61,19 @@
 #include "test/check_geometry.h"
 #include "xchange/xchange.h"
 
-#ifdef PARALLELT
+#ifdef TM_PARALLELT
 #define SLICE (LX * LY * LZ / 2)
-#elif defined PARALLELXT
+#elif defined TM_PARALLELXT
 #define SLICE ((LX * LY * LZ / 2) + (T * LY * LZ / 2))
-#elif defined PARALLELXYT
+#elif defined TM_PARALLELXYT
 #define SLICE ((LX * LY * LZ / 2) + (T * LY * LZ / 2) + (T * LX * LZ / 2))
-#elif defined PARALLELXYZT
+#elif defined TM_PARALLELXYZT
 #define SLICE ((LX * LY * LZ / 2) + (T * LY * LZ / 2) + (T * LX * LZ / 2) + (T * LX * LY / 2))
-#elif defined PARALLELX
+#elif defined TM_PARALLELX
 #define SLICE ((LY * LZ * T / 2))
-#elif defined PARALLELXY
+#elif defined TM_PARALLELXY
 #define SLICE ((LY * LZ * T / 2) + (LX * LZ * T / 2))
-#elif defined PARALLELXYZ
+#elif defined TM_PARALLELXYZ
 #define SLICE ((LY * LZ * T / 2) + (LX * LZ * T / 2) + (LX * LY * T / 2))
 #endif
 
@@ -81,7 +81,7 @@ int check_xchange();
 
 int main(int argc, char *argv[]) {
   int j, j_max, k, k_max = 1;
-#ifdef HAVE_LIBLEMON
+#ifdef TM_USE_LEMON
   paramsXlfInfo *xlfInfo;
 #endif
   int status = 0;
@@ -123,20 +123,20 @@ int main(int argc, char *argv[]) {
   tmlqcd_mpi_init(argc, argv);
 
   if (g_proc_id == 0) {
-#ifdef _GAUGE_COPY
-    printf("# The code was compiled with -D_GAUGE_COPY\n");
+#ifdef TM_USE_GAUGE_COPY
+    printf("# The code was compiled with -DTM_USE_GAUGE_COPY\n");
 #endif
-#ifdef _USE_HALFSPINOR
-    printf("# The code was compiled with -D_USE_HALFSPINOR\n");
+#ifdef TM_USE_HALFSPINOR
+    printf("# The code was compiled with -DTM_USE_HALFSPINOR\n");
 #endif
-#ifdef _USE_SHMEM
-    printf("# The code was compiled with -D_USE_SHMEM\n");
-#ifdef _PERSISTENT
+#ifdef TM_USE_SHMEM
+    printf("# The code was compiled with -DTM_USE_SHMEM\n");
+#ifdef TM_PERSISTENT
     printf("# The code was compiled for persistent MPI calls (halfspinor only)\n");
 #endif
 #endif
 #ifdef TM_USE_MPI
-#ifdef _NON_BLOCKING
+#ifdef TM_NON_BLOCKING
     printf("# The code was compiled for non-blocking MPI calls (spinor and gauge)\n");
 #endif
 #endif
@@ -144,7 +144,7 @@ int main(int argc, char *argv[]) {
     fflush(stdout);
   }
 
-#ifdef _GAUGE_COPY
+#ifdef TM_USE_GAUGE_COPY
   init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 1);
 #else
   init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 0);
@@ -186,7 +186,7 @@ int main(int argc, char *argv[]) {
   /* define the boundary conditions for the fermion fields */
   boundary(g_kappa);
 
-#ifdef _USE_HALFSPINOR
+#ifdef TM_USE_HALFSPINOR
   j = init_dirac_halfspinor();
   if (j != 0) {
     fprintf(stderr, "Not enough memory for halfspinor fields! Aborting...\n");
@@ -200,7 +200,7 @@ int main(int argc, char *argv[]) {
       exit(0);
     }
   }
-#if (defined _PERSISTENT)
+#if (defined TM_PERSISTENT)
   init_xchange_halffield();
 #endif
 #endif
@@ -210,7 +210,7 @@ int main(int argc, char *argv[]) {
     fprintf(stderr, "Checking of geometry failed. Unable to proceed.\nAborting....\n");
     exit(1);
   }
-#if (defined TM_USE_MPI && !(defined _USE_SHMEM))
+#if (defined TM_USE_MPI && !(defined TM_USE_SHMEM))
   check_xchange();
 #endif
 
@@ -344,7 +344,7 @@ int main(int argc, char *argv[]) {
     sdt = sdt / ((double)(2 * SLICE));
     if (g_proc_id == 0) {
       printf("# The size of the package is %d bytes.\n", (SLICE) * 192);
-#ifdef _USE_HALFSPINOR
+#ifdef TM_USE_HALFSPINOR
       printf("# The bandwidth is %5.2f + %5.2f MB/sec\n", 192. / sdt / 1024 / 1024,
              192. / sdt / 1024. / 1024);
 #else
@@ -431,7 +431,7 @@ int main(int argc, char *argv[]) {
     }
   }
 
-#ifdef HAVE_LIBLEMON
+#ifdef TM_USE_LEMON
   if (g_proc_id == 0) {
     printf("# Performing parallel IO test ...\n");
   }
diff --git a/deriv_mg_tune.c b/src/bin/deriv_mg_tune.c
similarity index 97%
rename from deriv_mg_tune.c
rename to src/bin/deriv_mg_tune.c
index d3abb66ee..0fd6ec529 100644
--- a/deriv_mg_tune.c
+++ b/src/bin/deriv_mg_tune.c
@@ -64,7 +64,7 @@
 #include "solver/solver.h"
 #include "test/check_geometry.h"
 #include "update_tm.h"
-#ifdef DDalphaAMG
+#ifdef TM_USE_DDalphaAMG
 #include "DDalphaAMG_interface.h"
 #endif
 #ifdef TM_USE_QUDA
@@ -98,11 +98,6 @@ int main(int argc, char *argv[]) {
 
   init_critical_globals(TM_PROGRAM_DERIV_MG_TUNE);
 
-#ifdef _KOJAK_INST
-#pragma pomp inst init
-#pragma pomp inst begin(main)
-#endif
-
   verbose = 1;
   g_use_clover_flag = 0;
 
@@ -136,7 +131,7 @@ int main(int argc, char *argv[]) {
 
   g_mu = g_mu1;
 
-#ifdef _GAUGE_COPY
+#ifdef TM_USE_GAUGE_COPY
   status = init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 1);
   status += init_gauge_field_32(VOLUMEPLUSRAND + g_dbw2rand, 1);
 #else
@@ -204,7 +199,7 @@ int main(int argc, char *argv[]) {
     exit(1);
   }
 
-#ifdef _USE_HALFSPINOR
+#ifdef TM_USE_HALFSPINOR
   j = init_dirac_halfspinor();
   if (j != 0) {
     fprintf(stderr, "Not enough memory for halffield! Aborting...\n");
@@ -217,7 +212,7 @@ int main(int argc, char *argv[]) {
     exit(-1);
   }
 
-#if (defined _PERSISTENT)
+#if (defined TM_PERSISTENT)
   init_xchange_halffield();
 #endif
 #endif
@@ -367,9 +362,6 @@ int main(int argc, char *argv[]) {
 #endif
 
   return (0);
-#ifdef _KOJAK_INST
-#pragma pomp inst end(main)
-#endif
 }
 
 static void usage(const tm_ExitCode_t exit_code) {
@@ -395,11 +387,11 @@ static void process_args(int argc, char *argv[], char **input_filename, char **f
   while ((c = getopt(argc, argv, "h?vVf:o:m:")) != -1) {
     switch (c) {
       case 'f':
-        *input_filename = calloc(200, sizeof(char));
+        *input_filename = calloc(256, sizeof(char));
         strncpy(*input_filename, optarg, 200);
         break;
       case 'o':
-        *filename = calloc(200, sizeof(char));
+        *filename = calloc(256, sizeof(char));
         strncpy(*filename, optarg, 200);
         break;
       case 'v':
@@ -433,12 +425,14 @@ static void process_args(int argc, char *argv[], char **input_filename, char **f
 
 static void set_default_filenames(char **input_filename, char **filename) {
   if (*input_filename == NULL) {
-    *input_filename = calloc(13, sizeof(char));
+    *input_filename = calloc(16, sizeof(char));
     strcpy(*input_filename, "hmc.input");
+    (*input_filename)[9] = '\0';
   }
 
   if (*filename == NULL) {
     *filename = calloc(7, sizeof(char));
     strcpy(*filename, "output");
+    (*filename)[6] = '\0';
   }
 }
diff --git a/gen_sources.c b/src/bin/gen_sources.c
similarity index 100%
rename from gen_sources.c
rename to src/bin/gen_sources.c
diff --git a/hmc_tm.c b/src/bin/hmc_tm.c
similarity index 97%
rename from hmc_tm.c
rename to src/bin/hmc_tm.c
index 2db6f8c1b..017ab9cb3 100644
--- a/hmc_tm.c
+++ b/src/bin/hmc_tm.c
@@ -67,7 +67,7 @@
 #include "solver/solver.h"
 #include "test/check_geometry.h"
 #include "update_tm.h"
-#ifdef DDalphaAMG
+#ifdef TM_USE_DDalphaAMG
 #include "DDalphaAMG_interface.h"
 #endif
 #ifdef TM_USE_QUDA
@@ -85,11 +85,11 @@ static void set_default_filenames(char **input_filename, char **filename);
 int main(int argc, char *argv[]) {
   FILE *parameterfile = NULL, *countfile = NULL;
   char *filename = NULL;
-  char datafilename[206];
-  char parameterfilename[206];
-  char gauge_filename[50];
-  char nstore_filename[50];
-  char tmp_filename[50];
+  char datafilename[256];
+  char parameterfilename[256];
+  char gauge_filename[64];
+  char nstore_filename[64];
+  char tmp_filename[64];
   char *input_filename = NULL;
   int status = 0, accept = 0;
   int j, ix, mu, trajectory_counter = 0;
@@ -113,11 +113,6 @@ int main(int argc, char *argv[]) {
 
   init_critical_globals(TM_PROGRAM_HMC_TM);
 
-#ifdef _KOJAK_INST
-#pragma pomp inst init
-#pragma pomp inst begin(main)
-#endif
-
   strcpy(gauge_filename, "conf.save");
   strcpy(nstore_filename, "nstore_counter");
   strcpy(tmp_filename, ".conf.tmp");
@@ -168,7 +163,7 @@ int main(int argc, char *argv[]) {
 
   g_mu = g_mu1;
 
-#ifdef _GAUGE_COPY
+#ifdef TM_USE_GAUGE_COPY
   status = init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 1);
   status += init_gauge_field_32(VOLUMEPLUSRAND + g_dbw2rand, 1);
 #else
@@ -257,7 +252,7 @@ int main(int argc, char *argv[]) {
     exit(1);
   }
 
-#ifdef _USE_HALFSPINOR
+#ifdef TM_USE_HALFSPINOR
   j = init_dirac_halfspinor();
   if (j != 0) {
     fprintf(stderr, "Not enough memory for halffield! Aborting...\n");
@@ -270,7 +265,7 @@ int main(int argc, char *argv[]) {
     exit(-1);
   }
 
-#if (defined _PERSISTENT)
+#if (defined TM_PERSISTENT)
   init_xchange_halffield();
 #endif
 #endif
@@ -504,7 +499,7 @@ int main(int argc, char *argv[]) {
     }
 
     /* online measurements */
-#ifdef DDalphaAMG
+#ifdef TM_USE_DDalphaAMG
     // When the configuration is rejected, we have to update it in the MG and redo the setup.
     int mg_update = accept ? 0 : 1;
 #endif
@@ -514,7 +509,7 @@ int main(int argc, char *argv[]) {
         if (g_proc_id == 0) {
           fprintf(stdout, "#\n# Beginning online measurement.\n");
         }
-#ifdef DDalphaAMG
+#ifdef TM_USE_DDalphaAMG
         if (mg_update) {
           mg_update = 0;
           MG_reset();
@@ -591,9 +586,6 @@ int main(int argc, char *argv[]) {
 #endif
 
   return (0);
-#ifdef _KOJAK_INST
-#pragma pomp inst end(main)
-#endif
 }
 
 static void usage(const tm_ExitCode_t exit_code) {
@@ -619,11 +611,11 @@ static void process_args(int argc, char *argv[], char **input_filename, char **f
   while ((c = getopt(argc, argv, "h?vVf:o:m:")) != -1) {
     switch (c) {
       case 'f':
-        *input_filename = calloc(200, sizeof(char));
+        *input_filename = calloc(256, sizeof(char));
         strncpy(*input_filename, optarg, 200);
         break;
       case 'o':
-        *filename = calloc(200, sizeof(char));
+        *filename = calloc(256, sizeof(char));
         strncpy(*filename, optarg, 200);
         break;
       case 'v':
diff --git a/invert.c b/src/bin/invert.c
similarity index 98%
rename from invert.c
rename to src/bin/invert.c
index 007e0ea41..b5040ba88 100644
--- a/invert.c
+++ b/src/bin/invert.c
@@ -84,7 +84,7 @@
 #ifdef TM_USE_QPHIX
 #include "qphix_interface.h"
 #endif
-#ifdef DDalphaAMG
+#ifdef TM_USE_DDalphaAMG
 #include "DDalphaAMG_interface.h"
 #endif
 #include "expo.h"
@@ -114,11 +114,6 @@ int main(int argc, char *argv[]) {
 
   init_critical_globals(TM_PROGRAM_INVERT);
 
-#ifdef _KOJAK_INST
-#pragma pomp inst init
-#pragma pomp inst begin(main)
-#endif
-
   DUM_DERI = 8;
   DUM_MATRIX = DUM_DERI + 5;
   NO_OF_SPINORFIELDS = DUM_MATRIX + 4;
@@ -165,7 +160,7 @@ int main(int argc, char *argv[]) {
   g_dbw2rand = 0;
 #endif
 
-#ifdef _GAUGE_COPY
+#ifdef TM_USE_GAUGE_COPY
   j = init_gauge_field(VOLUMEPLUSRAND, 1);
   j += init_gauge_field_32(VOLUMEPLUSRAND, 1);
 #else
@@ -246,7 +241,7 @@ int main(int argc, char *argv[]) {
   init_measurements();
 
   /* this could be maybe moved to init_operators */
-#ifdef _USE_HALFSPINOR
+#ifdef TM_USE_HALFSPINOR
   j = init_dirac_halfspinor();
   if (j != 0) {
     fprintf(stderr, "Not enough memory for halffield! Aborting...\n");
@@ -258,7 +253,7 @@ int main(int argc, char *argv[]) {
     fprintf(stderr, "Not enough memory for 32-bit halffield! Aborting...\n");
     exit(-1);
   }
-#if (defined _PERSISTENT)
+#if (defined TM_PERSISTENT)
   if (even_odd_flag) init_xchange_halffield();
 #endif
 #endif
@@ -362,7 +357,7 @@ int main(int argc, char *argv[]) {
     g_precWS = NULL;
     if (use_preconditioning == 1) {
       /* todo load fftw wisdom */
-#if (defined HAVE_FFTW) && !(defined TM_USE_MPI)
+#if (defined TM_USE_FFTW) && !(defined TM_USE_MPI)
       loadFFTWWisdom(g_spinor_field[0], g_spinor_field[1], T, LX);
 #else
       use_preconditioning = 0;
@@ -457,9 +452,6 @@ int main(int argc, char *argv[]) {
   MPI_Finalize();
 #endif
   return (0);
-#ifdef _KOJAK_INST
-#pragma pomp inst end(main)
-#endif
 }
 
 static void usage(tm_ExitCode_t exit_code) {
diff --git a/util/main_ildg2uk.c b/src/bin/main_ildg2uk.c
similarity index 100%
rename from util/main_ildg2uk.c
rename to src/bin/main_ildg2uk.c
diff --git a/offline_measurement.c b/src/bin/offline_measurement.c
similarity index 88%
rename from offline_measurement.c
rename to src/bin/offline_measurement.c
index c2ae72f9c..667dce3f9 100644
--- a/offline_measurement.c
+++ b/src/bin/offline_measurement.c
@@ -73,9 +73,9 @@ static void set_default_filenames(char **input_filename, char **filename);
 
 int main(int argc, char *argv[]) {
   FILE *parameterfile = NULL;
-  int j, i;
-  char datafilename[206];
-  char parameterfilename[206];
+  int err;
+  char datafilename[256];
+  char parameterfilename[256];
   char conf_filename[CONF_FILENAME_LENGTH];
   char *input_filename = NULL;
   char *filename = NULL;
@@ -83,11 +83,6 @@ int main(int argc, char *argv[]) {
 
   init_critical_globals(TM_PROGRAM_OFFLINE_MEASUREMENT);
 
-#ifdef _KOJAK_INST
-#pragma pomp inst init
-#pragma pomp inst begin(main)
-#endif
-
   DUM_DERI = 8;
   DUM_MATRIX = DUM_DERI + 5;
   NO_OF_SPINORFIELDS = DUM_MATRIX + 3;
@@ -120,51 +115,50 @@ int main(int argc, char *argv[]) {
   /* we need to make sure that we don't have even_odd_flag = 1 */
   /* if any of the operators doesn't use it                    */
   /* in this way even/odd can still be used by other operators */
-  for (j = 0; j < no_operators; j++)
+  for (int j = 0; j < no_operators; j++)
     if (!operator_list[j].even_odd_flag) even_odd_flag = 0;
 
 #ifndef TM_USE_MPI
   g_dbw2rand = 0;
 #endif
 
-#ifdef _GAUGE_COPY
-  j = init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 1);
+#ifdef TM_USE_GAUGE_COPY
+  err = init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 1);
 #else
-  j = init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 0);
+  err = init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 0);
 #endif
-  if (j != 0) {
+  if (err != 0) {
     fprintf(stderr, "Not enough memory for gauge_fields! Aborting...\n");
     exit(-1);
   }
-  j = init_geometry_indices(VOLUMEPLUSRAND + g_dbw2rand);
-  if (j != 0) {
+  if (init_geometry_indices(VOLUMEPLUSRAND + g_dbw2rand) != 0) {
     fprintf(stderr, "Not enough memory for geometry indices! Aborting...\n");
     exit(-1);
   }
   if (no_monomials > 0) {
     if (even_odd_flag) {
-      j = init_monomials(VOLUMEPLUSRAND / 2, even_odd_flag);
+      err = init_monomials(VOLUMEPLUSRAND / 2, even_odd_flag);
     } else {
-      j = init_monomials(VOLUMEPLUSRAND, even_odd_flag);
+      err = init_monomials(VOLUMEPLUSRAND, even_odd_flag);
     }
-    if (j != 0) {
+    if (err != 0) {
       fprintf(stderr, "Not enough memory for monomial pseudo fermion fields! Aborting...\n");
       exit(-1);
     }
   }
   if (even_odd_flag) {
-    j = init_spinor_field(VOLUMEPLUSRAND / 2, NO_OF_SPINORFIELDS);
+    err = init_spinor_field(VOLUMEPLUSRAND / 2, NO_OF_SPINORFIELDS);
   } else {
-    j = init_spinor_field(VOLUMEPLUSRAND, NO_OF_SPINORFIELDS);
+    err = init_spinor_field(VOLUMEPLUSRAND, NO_OF_SPINORFIELDS);
   }
-  if (j != 0) {
+  if (err != 0) {
     fprintf(stderr, "Not enough memory for spinor fields! Aborting...\n");
     exit(-1);
   }
 
   if (g_running_phmc) {
-    j = init_chi_spinor_field(VOLUMEPLUSRAND / 2, 20);
-    if (j != 0) {
+    err = init_chi_spinor_field(VOLUMEPLUSRAND / 2, 20);
+    if (err != 0) {
       fprintf(stderr, "Not enough memory for PHMC Chi fields! Aborting...\n");
       exit(-1);
     }
@@ -210,30 +204,28 @@ int main(int argc, char *argv[]) {
   init_measurements();
 
   /* this could be maybe moved to init_operators */
-#ifdef _USE_HALFSPINOR
-  j = init_dirac_halfspinor();
-  if (j != 0) {
+#ifdef TM_USE_HALFSPINOR
+  if (init_dirac_halfspinor() != 0) {
     fprintf(stderr, "Not enough memory for halffield! Aborting...\n");
     exit(-1);
   }
   if (g_sloppy_precision_flag == 1) {
-    j = init_dirac_halfspinor32();
-    if (j != 0) {
+    if (init_dirac_halfspinor32() != 0) {
       fprintf(stderr, "Not enough memory for 32-bit halffield! Aborting...\n");
       exit(-1);
     }
   }
-#if (defined _PERSISTENT)
+#if (defined TM_PERSISTENT)
   if (even_odd_flag) init_xchange_halffield();
 #endif
 #endif
 
-  for (j = 0; j < Nmeas; j++) {
+  for (int j = 0; j < Nmeas; j++) {
     int n_written =
         snprintf(conf_filename, CONF_FILENAME_LENGTH, "%s.%.4d", gauge_input_filename, nstore);
     if (n_written < 0 || n_written > CONF_FILENAME_LENGTH) {
-      char error_message[500];
-      snprintf(error_message, 500,
+      char error_message[512];
+      snprintf(error_message, 512,
                "Encoding error or gauge configuration filename "
                "longer than %d characters! See offline_measurement.c CONF_FILENAME_LENGTH\n",
                CONF_FILENAME_LENGTH);
@@ -244,8 +236,8 @@ int main(int argc, char *argv[]) {
              (gauge_precision_read_flag == 32 ? "single" : "double"));
       fflush(stdout);
     }
-    if ((i = read_gauge_field(conf_filename, g_gauge_field)) != 0) {
-      fprintf(stderr, "Error %d while reading gauge field from %s\n Aborting...\n", i,
+    if ((err = read_gauge_field(conf_filename, g_gauge_field)) != 0) {
+      fprintf(stderr, "Error %d while reading gauge field from %s\n Aborting...\n", err,
               conf_filename);
       exit(-2);
     }
@@ -306,10 +298,6 @@ int main(int argc, char *argv[]) {
   MPI_Finalize();
 #endif
   return (0);
-
-#ifdef _KOJAK_INST
-#pragma pomp inst end(main)
-#endif
 }
 
 static void usage(const tm_ExitCode_t exit_code) {
@@ -330,11 +318,11 @@ static void usage(const tm_ExitCode_t exit_code) {
 }
 
 static void process_args(int argc, char *argv[], char **input_filename, char **filename) {
-  int c;
+  int c = 0;
   while ((c = getopt(argc, argv, "h?vVf:o:")) != -1) {
     switch (c) {
       case 'f':
-        *input_filename = calloc(200, sizeof(char));
+        *input_filename = calloc(256, sizeof(char));
         strncpy(*input_filename, optarg, 200);
         break;
       case 'v':
diff --git a/check_locallity.c b/src/bin/tests/check_locallity.c
similarity index 98%
rename from check_locallity.c
rename to src/bin/tests/check_locallity.c
index 9ed46daee..01d12826b 100644
--- a/check_locallity.c
+++ b/src/bin/tests/check_locallity.c
@@ -17,16 +17,14 @@
  * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
  ***********************************************************************/
 
-#include "lime.h"
-#ifdef HAVE_CONFIG_H
-#include <tmlqcd_config.h>
-#endif
+#include <lime.h>
 #include <math.h>
 #include <signal.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <time.h>
+#include <tmlqcd_config.h>
 #ifdef TM_USE_MPI
 #include <mpi.h>
 #endif
@@ -79,13 +77,13 @@ int main(int argc, char *argv[]) {
   double *norm;
   struct stout_parameters params_smear;
 
-#ifdef _GAUGE_COPY
+#ifdef TM_USE_GAUGE_COPY
   int kb = 0;
 #endif
 #ifdef TM_USE_MPI
   double atime = 0., etime = 0.;
 #endif
-#ifdef _KOJAK_INST
+#ifdef TM_KOJAK_INST
 #pragma pomp inst init
 #pragma pomp inst begin(main)
 #endif
@@ -146,7 +144,7 @@ int main(int argc, char *argv[]) {
   g_dbw2rand = 0;
 #endif
 
-#ifdef _GAUGE_COPY
+#ifdef TM_USE_GAUGE_COPY
   j = init_gauge_field(VOLUMEPLUSRAND, 1);
 #else
   j = init_gauge_field(VOLUMEPLUSRAND, 0);
@@ -188,7 +186,7 @@ int main(int argc, char *argv[]) {
   /* define the boundary conditions for the fermion fields */
   boundary();
 
-#ifdef _USE_HALFSPINOR
+#ifdef TM_USE_HALFSPINOR
   j = init_dirac_halfspinor();
   if (j != 0) {
     fprintf(stderr, "Not enough memory for halffield! Aborting...\n");
@@ -201,7 +199,7 @@ int main(int argc, char *argv[]) {
       exit(-1);
     }
   }
-#if (defined _PERSISTENT)
+#if (defined TM_PERSISTENT)
   init_xchange_halffield();
 #endif
 #endif
@@ -314,7 +312,7 @@ int main(int argc, char *argv[]) {
   free_spinor_field();
   free_moment_field();
   return (0);
-#ifdef _KOJAK_INST
+#ifdef TM_KOJAK_INST
 #pragma pomp inst end(main)
 #endif
 }
diff --git a/hopping_test.c b/src/bin/tests/hopping_test.c
similarity index 94%
rename from hopping_test.c
rename to src/bin/tests/hopping_test.c
index 04df878e5..0e5ff03e7 100644
--- a/hopping_test.c
+++ b/src/bin/tests/hopping_test.c
@@ -34,7 +34,7 @@
 #include <time.h>
 #ifdef TM_USE_MPI
 #include <mpi.h>
-#ifdef HAVE_LIBLEMON
+#ifdef TM_USE_LEMON
 #include <io/gauge.h>
 #include <io/params.h>
 #endif
@@ -59,19 +59,19 @@
 #include "test/check_geometry.h"
 #include "xchange/xchange.h"
 
-#ifdef PARALLELT
+#ifdef TM_PARALLELT
 #define SLICE (LX * LY * LZ / 2)
-#elif defined PARALLELXT
+#elif defined TM_PARALLELXT
 #define SLICE ((LX * LY * LZ / 2) + (T * LY * LZ / 2))
-#elif defined PARALLELXYT
+#elif defined TM_PARALLELXYT
 #define SLICE ((LX * LY * LZ / 2) + (T * LY * LZ / 2) + (T * LX * LZ / 2))
-#elif defined PARALLELXYZT
+#elif defined TM_PARALLELXYZT
 #define SLICE ((LX * LY * LZ / 2) + (T * LY * LZ / 2) + (T * LX * LZ / 2) + (T * LX * LY / 2))
-#elif defined PARALLELX
+#elif defined TM_PARALLELX
 #define SLICE ((LY * LZ * T / 2))
-#elif defined PARALLELXY
+#elif defined TM_PARALLELXY
 #define SLICE ((LY * LZ * T / 2) + (LX * LZ * T / 2))
-#elif defined PARALLELXYZ
+#elif defined TM_PARALLELXYZ
 #define SLICE ((LY * LZ * T / 2) + (LX * LZ * T / 2) + (LX * LY * T / 2))
 #endif
 
@@ -102,20 +102,20 @@ int main(int argc, char *argv[]) {
   tmlqcd_mpi_init(argc, argv);
 
   if (g_proc_id == 0) {
-#ifdef _GAUGE_COPY
-    printf("# The code was compiled with -D_GAUGE_COPY\n");
+#ifdef TM_USE_GAUGE_COPY
+    printf("# The code was compiled with -DTM_USE_GAUGE_COPY\n");
 #endif
-#ifdef _USE_HALFSPINOR
-    printf("# The code was compiled with -D_USE_HALFSPINOR\n");
+#ifdef TM_USE_HALFSPINOR
+    printf("# The code was compiled with -DTM_USE_HALFSPINOR\n");
 #endif
-#ifdef _USE_SHMEM
-    printf("# the code was compiled with -D_USE_SHMEM\n");
-#ifdef _PERSISTENT
+#ifdef TM_USE_SHMEM
+    printf("# the code was compiled with -DTM_USE_SHMEM\n");
+#ifdef TM_PERSISTENT
     printf("# the code was compiled for persistent MPI calls (halfspinor only)\n");
 #endif
 #endif
 #ifdef TM_USE_MPI
-#ifdef _NON_BLOCKING
+#ifdef TM_NON_BLOCKING
     printf("# the code was compiled for non-blocking MPI calls (spinor and gauge)\n");
 #endif
 #endif
@@ -123,7 +123,7 @@ int main(int argc, char *argv[]) {
     fflush(stdout);
   }
 
-#ifdef _GAUGE_COPY
+#ifdef TM_USE_GAUGE_COPY
   init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 1);
 #else
   init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 0);
@@ -165,7 +165,7 @@ int main(int argc, char *argv[]) {
   /* define the boundary conditions for the fermion fields */
   boundary(g_kappa);
 
-#ifdef _USE_HALFSPINOR
+#ifdef TM_USE_HALFSPINOR
   j = init_dirac_halfspinor();
   if (j != 0) {
     fprintf(stderr, "Not enough memory for halfspinor fields! Aborting...\n");
@@ -179,7 +179,7 @@ int main(int argc, char *argv[]) {
       exit(0);
     }
   }
-#if (defined _PERSISTENT)
+#if (defined TM_PERSISTENT)
   init_xchange_halffield();
 #endif
 #endif
@@ -190,7 +190,7 @@ int main(int argc, char *argv[]) {
     exit(1);
   }
 
-#if (defined TM_USE_MPI && !(defined _USE_SHMEM))
+#if (defined TM_USE_MPI && !(defined TM_USE_SHMEM))
   check_xchange();
 #endif
 
diff --git a/qphix_test_Dslash.c b/src/bin/tests/qphix_test_Dslash.c
similarity index 98%
rename from qphix_test_Dslash.c
rename to src/bin/tests/qphix_test_Dslash.c
index 56250bc5a..41e2602a4 100644
--- a/qphix_test_Dslash.c
+++ b/src/bin/tests/qphix_test_Dslash.c
@@ -35,7 +35,7 @@
 #include <time.h>
 #ifdef TM_USE_MPI
 #include <mpi.h>
-#ifdef HAVE_LIBLEMON
+#ifdef TM_USE_LEMON
 #include <io/gauge.h>
 #include <io/params.h>
 #endif
@@ -86,7 +86,7 @@ double compare_spinors(spinor* s1, spinor* s2);
 
 int main(int argc, char* argv[]) {
   int j;
-#ifdef HAVE_LIBLEMON
+#ifdef TM_USE_LEMON
   paramsXlfInfo* xlfInfo;
 #endif
   int status = 0;
@@ -105,7 +105,7 @@ int main(int argc, char* argv[]) {
   tmlqcd_mpi_init(argc, argv);
   g_dbw2rand = 0;
 
-#ifdef _GAUGE_COPY
+#ifdef TM_USE_GAUGE_COPY
   init_gauge_field(VOLUMEPLUSRAND, 1);
 #else
   init_gauge_field(VOLUMEPLUSRAND, 0);
@@ -135,7 +135,7 @@ int main(int argc, char* argv[]) {
   /* define the geometry */
   geometry();
 
-#ifdef _USE_HALFSPINOR
+#ifdef TM_USE_HALFSPINOR
   j = init_dirac_halfspinor();
   if (j != 0) {
     fprintf(stderr, "Not enough memory for halfspinor fields! Aborting...\n");
@@ -146,7 +146,7 @@ int main(int argc, char* argv[]) {
     fprintf(stderr, "Not enough memory for 32-Bit halfspinor fields! Aborting...\n");
     exit(0);
   }
-#if (defined _PERSISTENT)
+#if (defined TM_PERSISTENT)
   init_xchange_halffield();
 #endif
 #endif
@@ -180,7 +180,7 @@ int main(int argc, char* argv[]) {
 #endif
 
   g_update_gauge_copy = 1;
-#ifdef _GAUGE_COPY
+#ifdef TM_USE_GAUGE_COPY
   update_backward_gauge(g_gauge_field);
 #endif
 
diff --git a/test/scalar_prod_r_test.c b/src/bin/tests/scalar_prod_r_test.c
similarity index 100%
rename from test/scalar_prod_r_test.c
rename to src/bin/tests/scalar_prod_r_test.c
diff --git a/test/test_eigenvalues.c b/src/bin/tests/test_eigenvalues.c
similarity index 98%
rename from test/test_eigenvalues.c
rename to src/bin/tests/test_eigenvalues.c
index 053944698..c52d29cf8 100644
--- a/test/test_eigenvalues.c
+++ b/src/bin/tests/test_eigenvalues.c
@@ -227,7 +227,7 @@ int main(int argc, char *argv[]) {
   g_eps_sq_acc = g_eps_sq_acc1;
   g_eps_sq_force = g_eps_sq_force1;
 
-#ifdef _GAUGE_COPY
+#ifdef TM_USE_GAUGE_COPY
   j = init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 1);
 #else
   j = init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 0);
@@ -274,11 +274,11 @@ int main(int argc, char *argv[]) {
 
     parameterfile = fopen(parameterfilename, "w");
     printf("# This is the hmc code for twisted Mass Wilson QCD\n\nVersion %s\n", Version);
-#ifdef _NEW_GEOMETRY
-    printf("# The code was compiled with -D_NEW_GEOMETRY\n");
+#ifdef TM_NEW_GEOMETRY
+    printf("# The code was compiled with -DTM_NEW_GEOMETRY\n");
 #endif
-#ifdef _GAUGE_COPY
-    printf("# The code was compiled with -D_GAUGE_COPY\n");
+#ifdef TM_USE_GAUGE_COPY
+    printf("# The code was compiled with -DTM_USE_GAUGE_COPY\n");
 #endif
     printf("# The lattice size is %d x %d x %d x %d\n", (int)(T * g_nproc_t), (int)(LX * g_nproc_x),
            (int)(LY), (int)(LZ));
@@ -430,7 +430,7 @@ int main(int argc, char *argv[]) {
 #ifdef TM_USE_MPI
   xchange_gauge(g_gauge_field);
 #endif
-#ifdef _GAUGE_COPY
+#ifdef TM_USE_GAUGE_COPY
   update_backward_gauge();
 #endif
 
diff --git a/test_lemon.c b/src/bin/tests/test_lemon.c
similarity index 99%
rename from test_lemon.c
rename to src/bin/tests/test_lemon.c
index f2147ad3f..9ef46be7b 100644
--- a/test_lemon.c
+++ b/src/bin/tests/test_lemon.c
@@ -66,7 +66,7 @@ int main(int argc, char *argv[]) {
 
   tmlqcd_mpi_init(argc, argv);
 
-#ifdef _GAUGE_COPY
+#ifdef TM_USE_GAUGE_COPY
   init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 1);
 #else
   init_gauge_field(VOLUMEPLUSRAND + g_dbw2rand, 0);
@@ -108,7 +108,7 @@ int main(int argc, char *argv[]) {
   xlfInfo = construct_paramsXlfInfo(plaquette_energy, 0);
   write_lime_gauge_field("conf.lime", 64, xlfInfo);
 
-#ifdef HAVE_LIBLEMON
+#ifdef TM_USE_LEMON
   if (g_proc_id == 0) {
     printf("Now we do write with lemon to conf.lemon...\n");
   }
diff --git a/src/lib/CMakeLists.txt b/src/lib/CMakeLists.txt
new file mode 100644
index 000000000..937836ec1
--- /dev/null
+++ b/src/lib/CMakeLists.txt
@@ -0,0 +1,456 @@
+list(
+  APPEND
+  IO_SRC_C
+  io/utils_write_inverter_info.c
+  io/gauge_read.c
+  io/utils_write_xlf.c
+  io/utils_construct_reader.c
+  io/params_construct_xlfInfo.c
+  io/utils_kill_with_error.c
+  io/DML_crc32.c
+  io/spinor_write_source_format.c
+  io/deri_write_stdout.c
+  io/spinor_write_propagator_format.c
+  io/utils_engineering.c
+  io/utils_parse_propagator_type.c
+  io/io_cm.c
+  io/utils_parse_ildgformat_xml.c
+  io/utils_read_message.c
+  io/utils_write_ildg_format.c
+  io/utils_destruct_writer.c
+  io/gauge_write.c
+  io/utils_write_message.c
+  io/params_construct_ildgFormat.c
+  io/spinor_read.c
+  io/utils_close_reader_record.c
+  io/spinor_read_binary.c
+  io/utils.c
+  io/spinor_write_stdout.c
+  io/spinor_write_info.c
+  io/utils_write_checksum.c
+  io/utils_write_header.c
+  io/eospinor_read.c
+  io/utils_write_first_message.c
+  io/params_construct_InverterInfo.c
+  io/utils_parse_checksum_xml.c
+  io/utils_construct_writer.c
+  io/sw_write_stdout.c
+  io/spinor_write_propagator_type.c
+  io/gauge_write_binary.c
+  io/spinor_write.c
+  io/utils_write_xlf_xml.c
+  io/params_construct_propagatorFormat.c
+  io/gauge_read_binary.c
+  io/dml.c
+  io/spinor_write_binary.c
+  io/utils_destruct_reader.c
+  io/utils_close_writer_record.c
+  io/eospinor_write.c
+  io/gauge_write_luscher_binary.c
+  io/params_construct_sourceFormat.c)
+
+list(
+  APPEND
+  INIT_SRC_C
+  init/init_dirac_halfspinor.c
+  init/init_geometry_indices.c
+  init/init_openmp.c
+  init/init_gauge_field.c
+  init/init_parallel.c
+  init/init_chi_spinor_field.c
+  init/init_gauge_fg.c
+  init/init_spinor_field.c
+  init/init_global_states.c
+  init/init_bispinor_field.c
+  init/init_gauge_tmp.c
+  init/init_critical_globals.c
+  init/init_omp_accumulators.c
+  init/init_stout_smear_vars.c
+  init/init_moment_field.c)
+
+list(
+  APPEND
+  SOLVER_SRC_C
+  solver/bicg_complex.c
+  solver/dfl_projector.c
+  solver/gcr.c
+  # this file is not used anywhere at the moment
+  #  solver/gmres_precon.c
+  solver/chrono_guess.c
+  solver/gcr4complex.c
+  solver/jdher.c
+  solver/gmres_dr.c
+  solver/cg_her_bi.c
+  solver/solver_field.c
+  solver/quicksort.c
+  solver/bicgstab2.c
+  solver/cgs_real.c
+  solver/monomial_solve.c
+  solver/cr.c
+  solver/gram-schmidt.c
+  solver/solver_types.c
+  solver/cg_her.c
+  solver/jdher_bi.c
+  solver/eigcg.c
+  solver/poly_precon.c
+  solver/Msap.c
+  solver/fgmres.c
+  solver/dirac_operator_eigenvectors.c
+  solver/incr_eigcg.c
+  solver/index_jd.c
+  solver/sumr.c
+  solver/cgne4complex.c
+  solver/eigenvalues_bi.c
+  solver/gmres.c
+  solver/lu_solve.c
+  solver/diagonalise_general_matrix.c
+  solver/mcr.c
+  solver/bicgstabell.c
+  solver/rg_mixed_cg_her.c
+  solver/mixed_cg_her.c
+  solver/mixed_cg_mms_tm_nd.c
+  solver/rg_mixed_cg_her_nd.c
+  solver/restart_X.c
+  solver/generate_dfl_subspace.c
+  solver/eigenvalues.c
+  solver/mcr4complex.c
+  solver/mr4complex.c
+  solver/bicgstab_complex.c
+  solver/cg_mms_tm_nd.c
+  solver/mr.c
+  solver/cg_her_nd.c
+  solver/bicgstab_complex_bi.c
+  solver/sub_low_ev.c
+  solver/ortho.c
+  solver/pcg_her.c
+  solver/fgmres4complex.c
+  solver/cg_mms_tm.c
+  solver/init_guess.c)
+
+list(
+  APPEND
+  LINALG_SRC_C
+  linalg/assign_mul_bra_add_mul_r.c
+  linalg/mul_r_gamma5.c
+  linalg/convert_eo_to_lexic.c
+  linalg/print_spinor.c
+  linalg/mul_diff_mul_r.c
+  linalg/square_norm_32.c
+  linalg/mul.c
+  linalg/mul_r.c
+  linalg/mul_gamma5.c
+  linalg/ratio.c
+  linalg/square_norm.c
+  linalg/mul_diff_mul.c
+  linalg/square_and_minmax.c
+  linalg/add.c
+  linalg/assign_add_mul_add_mul_r.c
+  linalg/comp_decomp.c
+  linalg/mul_add_mul.c
+  linalg/diff_32.c
+  linalg/assign_add_mul.c
+  linalg/addto_32.c
+  linalg/assign_mul_add_mul_add_mul_add_mul_r.c
+  linalg/assign_add_mul_r.c
+  linalg/diff.c
+  linalg/assign_mul_add_mul_r.c
+  linalg/scalar_prod_r.c
+  linalg/assign_to_32.c
+  linalg/assign_add_mul_add_mul.c
+  linalg/mul_diff_r.c
+  linalg/assign_mul_add_r_and_square.c
+  linalg/assign_mul_add_mul_r_32.c
+  linalg/assign_mul_add_mul.c
+  linalg/assign_mul_add_mul_add_mul_r.c
+  linalg/scalar_prod_r_32.c
+  linalg/assign_mul_add_r.c
+  linalg/assign_mul_add_r_32.c
+  linalg/assign_add_mul_r_32.c
+  linalg/convert_even_to_lexic.c
+  linalg/mul_r_32.c
+  linalg/assign_add_mul_r_add_mul.c
+  linalg/convert_odd_to_lexic.c
+  linalg/diff_and_square_norm.c
+  linalg/scalar_prod_i.c
+  linalg/mul_add_mul_r.c
+  linalg/assign_diff_mul.c
+  linalg/assign_mul_bra_add_mul_ket_add_r.c
+  linalg/set_even_to_zero.c
+  linalg/assign_mul_add.c
+  linalg/square_and_prod_r.c
+  linalg/assign_mul_bra_add_mul_ket_add.c
+  linalg/assign_add_mul_r_32.c
+  linalg/scalar_prod.c
+  linalg/mattimesvec.c
+  linalg/assign.c
+  linalg/print_spinor_similar_components.c)
+
+list(APPEND RATIONAL_SRC_C rational/zolotarev.c rational/rational.c
+     rational/elliptic.c)
+
+list(
+  APPEND
+  OPERATOR_SRC_C
+  operator/clover_invert.c
+  operator/tm_operators_nd_32.c
+  operator/clover_accumulate_deriv.c
+  operator/Hopping_Matrix.c
+  operator/tm_operators.c
+  operator/tm_times_Hopping_Matrix.c
+  operator/clovertm_operators_32.c
+  operator/Dov_proj.c
+  operator/clover_deriv.c
+  operator/clover_det.c
+  operator/clover_leaf.c
+  operator/clovertm_operators.c
+  operator/Dov_psi.c
+  operator/tm_operators_nd.c
+  operator/tm_sub_Hopping_Matrix.c
+  operator/Hopping_Matrix_nocom.c
+  operator/clover_term.c
+  operator/Hopping_Matrix_32_nocom.c
+  operator/D_psi.c
+  operator/tm_operators_32.c
+  operator/Hopping_Matrix_32.c)
+
+list(
+  APPEND
+  SMEARING_SRC_C
+  smearing/hex_stout_exclude_two.c
+  smearing/hex_hex_smear.c
+  smearing/utils_print_su3.c
+  smearing/hyp_APE_project_exclude_none.c
+  smearing/hyp_hyp_staples_exclude_one.c
+  smearing/hyp_APE_project_exclude_one.c
+  smearing/hex_stout_exclude_one.c
+  smearing/hyp_hyp_staples_exclude_two.c
+  smearing/hex_stout_exclude_none.c
+  smearing/stout_stout_smear.c
+  smearing/hyp_hyp_smear.c
+  smearing/hyp_APE_project_exclude_two.c
+  smearing/utils_project_herm.c
+  smearing/utils_reunitarize.c
+  smearing/utils_generic_staples.c
+  smearing/hyp_hyp_staples_exclude_none.c
+  smearing/ape_ape_smear.c
+  smearing/uils_print_config_to_screen.c
+  smearing/utils_project_antiherm.c)
+
+list(
+  APPEND
+  BUFFER_SRC_C
+  buffers/gauge_return_gauge_field.c
+  buffers/gauge_get_gauge_field.c
+  buffers/gauge_finalize_gauge_buffers.c
+  buffers/gauge_initialize_gauge_buffers.c
+  buffers/gauge.c
+  buffers/gauge_free_unused_gauge_buffers.c
+  buffers/gauge_get_gauge_field_array.c
+  buffers/utils_generic_exchange.c
+  buffers/gauge_allocate_gauge_buffers.c
+  buffers/gauge_return_gauge_field_array.c)
+
+list(
+  APPEND
+  MONOMIAL_SRC_C
+  monomial/detratio_monomial.c
+  monomial/poly_monomial.c
+  monomial/cloverdetratio_monomial.c
+  monomial/ndrat_monomial.c
+  monomial/cloverdet_monomial.c
+  monomial/clover_trlog_monomial.c
+  monomial/cloverndpoly_monomial.c
+  monomial/monitor_forces.c
+  monomial/ndpoly_monomial.c
+  monomial/det_monomial.c
+  monomial/monomial.c
+  monomial/cloverdetratio_rwmonomial.c
+  monomial/gauge_monomial.c
+  monomial/clovernd_trlog_monomial.c
+  monomial/ratcor_monomial.c
+  monomial/nddetratio_monomial.c
+  monomial/rat_monomial.c
+  monomial/ndratcor_monomial.c
+  monomial/moment_energy.c)
+
+list(
+  APPEND
+  EXCHANGE_SRC_C
+  xchange/xchange_lexicfield.c
+  xchange/xchange_2fields.c
+  xchange/xchange_gauge.c
+  xchange/xchange_halffield.c
+  xchange/little_field_gather.c
+  xchange/xchange_deri.c
+  xchange/xchange_field.c)
+
+list(
+  APPEND
+  MEAS_SRC_C
+  meas/pion_norm.c
+  meas/correlators.c
+  meas/polyakov_loop.c
+  meas/measurements.c
+  meas/oriented_plaquettes.c
+  meas/gradient_flow.c
+  meas/measure_clover_field_strength_observables.c)
+
+list(
+  APPEND
+  MAIN_SRC_C
+  measure_gauge_action.c
+  start.c
+  deriv_Sb.c
+  reweighting_factor_nd.c
+  ranlxs.c
+  source_generation.c
+  invert_doublet_eo.c
+  geometry_eo.c
+  getopt.c
+  tm_debug_printf.c
+  chebyshev_polynomial_nd.c
+  invert_eo.c
+  little_D.c
+  get_rectangle_staples.c
+  rnd_gauge_trafo.c
+  measure_rectangles.c
+  deriv_Sb_D_psi.c
+  mpi_init.c
+  update_momenta_fg.c
+  gamma.c
+  matrix_utils.c
+  reweighting_factor.c
+  update_tm.c
+  invert_overlap.c
+  phmc.c
+  get_staples.c
+  clenshaw_coef.c
+  block.c
+  spinor_fft.c
+  boundary.c
+  prepare_source.c
+  DDalphaAMG_interface.c
+  update_backward_gauge.c
+  invert_clover_eo.c
+  gettime.c
+  update_momenta.c
+  sighandler.c
+  compare_derivative.c
+  ranlxd.c
+  aligned_malloc.c
+  fatal_error.c
+  operator.c
+  expo.c
+  overrelaxation.c
+  Ptilde_nd.c
+  update_gauge.c
+  integrator.c)
+
+list(APPEND TEST_SRC_C test/check_xchange.c test/check_geometry.c
+     test/overlaptests.c)
+if(TM_USE_QPHIX)
+  list(APPEND MAIN_SRC_C qphix/qphix_interface.cpp)
+endif()
+
+if(TM_USE_QUDA)
+  list(APPEND MAIN_SRC_C quda_interface.c)
+endif()
+
+list(
+  APPEND
+  ALL_SRC
+  ${MAIN_SRC_C}
+  ${EXCHANGE_SRC_C}
+  ${MONOMIAL_SRC_C}
+  ${BUFFER_SRC_C}
+  ${SMEARING_SRC_C}
+  ${OPERATOR_SRC_C}
+  ${RATIONAL_SRC_C}
+  ${LINALG_SRC_C}
+  ${IO_SRC_C}
+  ${INIT_SRC_C}
+  ${SOLVER_SRC_C}
+  ${TEST_SRC_C}
+  ${MEAS_SRC_C}
+  ${PROJECT_BINARY_DIR}/git_hash.c)
+
+include_directories(
+  $<BUILD_INTERFACE:${CMAKE_BINARY_DIR}>
+  $<BUILD_INTERFACE:${CMAKE_SOURCE_DIR}/src/lib/include>
+  $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
+  $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
+  ${TMLQCD_CLIME_INCLUDE_DIRS})
+
+# cmake 4.0 uses a different syntax for the option
+if(CMAKE_MAJOR_VERSION LESS 4)
+  flex_target(tmlqcd_input_read read_input.l ${CMAKE_BINARY_DIR}/read_input.c
+              COMPILE_FLAGS "-Ca -Ptmlqcd -i")
+else()
+  flex_target(tmlqcd_input_read read_input.l ${CMAKE_BINARY_DIR}/read_input.c
+              OPTIONS "-Ca -Ptmlqcd -i")
+endif()
+
+# create a target library with namespacing because cmake does not know name
+# space at all
+
+if(BUILD_SHARED_LIBS)
+  add_library(tmlqcd SHARED "${ALL_SRC};${FLEX_tmlqcd_input_read_OUTPUTS}")
+else()
+  add_library(tmlqcd STATIC "${ALL_SRC};${FLEX_tmlqcd_input_read_OUTPUTS}")
+endif()
+
+set_target_properties(tmlqcd PROPERTIES VERSION ${PROJECT_VERSION} SOVERSION 1)
+
+# define a library and add the dependencies
+target_link_libraries(
+  tmlqcd
+  PUBLIC $<$<BOOL:${TM_USE_DDalphaAMG}>:DDalphaAMG>
+         $<$<BOOL:${TM_USE_QPHIX}>:tmlqcd::qphix>
+         $<$<BOOL:${TM_USE_FFTW}>:tmlqcd::fftw3>
+         $<$<BOOL:${TM_USE_QUDA}>:QUDA::quda>
+         $<$<BOOL:${TM_USE_CUDA}>:CUDA::cufft
+         CUDA::cufftw
+         CUDA::cublas
+         CUDA::cudart
+         CUDA::cuda_driver>
+         $<$<BOOL:${TM_USE_HIP}>:hip::hipfft
+         roc::hipblas
+         hip::host>
+         tmlqcd::clime
+         $<$<BOOL:${TM_USE_LEMON}>:lemon::lemon>
+         ${LAPACK_LIBRARIES}
+         ${BLAS_LIBRARIES}
+         $<$<BOOL:${TM_USE_MPI}>:MPI::MPI_C
+         MPI::MPI_CXX>
+         $<$<BOOL:${TM_USE_OMP}>:OpenMP::OpenMP_C
+         OpenMP::OpenMP_CXX>
+         m)
+
+target_compile_definitions(
+  tmlqcd PUBLIC HAVE_CONFIG_H $<$<BOOL:${TM_USE_HIP}>:${TM_GPU_PLATFORM_DFLAGS}>)
+
+target_include_directories(
+  tmlqcd
+  PUBLIC $<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME}>
+         $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>
+         $<BUILD_INTERFACE:${CMAKE_CURRENT_BINARY_DIR}>)
+
+install(
+  TARGETS tmlqcd
+  EXPORT tmlqcd_targets
+  LIBRARY DESTINATION "${CMAKE_INSTALL_LIBDIR}")
+
+install(
+  EXPORT tmlqcd_targets
+  FILE tmlQCDTargets.cmake
+  NAMESPACE tmlQCD::
+  DESTINATION "${CMAKE_INSTALL_LIBDIR}/cmake/${PROJECT_NAME}")
+
+install(
+  DIRECTORY ${CMAKE_SOURCE_DIR}/src/lib/include
+  DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME}"
+  FILES_MATCHING
+  PATTERN "*.h")
+
+install(FILES "${CMAKE_BINARY_DIR}/tmlqcd_config_internal.h"
+        DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME}")
diff --git a/DDalphaAMG_interface.c b/src/lib/DDalphaAMG_interface.c
similarity index 99%
rename from DDalphaAMG_interface.c
rename to src/lib/DDalphaAMG_interface.c
index 029d2f76f..a4b1d61cf 100644
--- a/DDalphaAMG_interface.c
+++ b/src/lib/DDalphaAMG_interface.c
@@ -23,7 +23,7 @@
 
 #include "DDalphaAMG_interface.h"
 
-#ifndef DDalphaAMG
+#ifndef TM_USE_DDalphaAMG
 
 int mg_setup_iter;
 int mg_coarse_setup_iter;
@@ -207,7 +207,8 @@ static inline int MG_check(spinor *const phi_new, spinor *const phi_old, const i
           "ERROR: something bad happened... MG converged giving the wrong solution!! Trying to "
           "restart... \n");
       printf(
-          "ERROR contd: || s - f_{tmLQC} * f_{DDalphaAMG}^{-1} * s || / ||s|| = %e / %e = %e > %e "
+          "ERROR contd: || s - f_{tmLQC} * f_{DDalphaAMG}^{-1} * s || / ||s|| = %e / %e = "
+          "%e > %e "
           "\n",
           differ[0], differ[1], differ[0] / differ[1], precision);
     }
@@ -257,7 +258,8 @@ static inline int MG_check_nd(spinor *const up_new, spinor *const dn_new, spinor
           "ERROR: something bad happened... MG converged giving the wrong solution!! Trying to "
           "restart... \n");
       printf(
-          "ERROR contd: || s - f_{tmLQC} * f_{DDalphaAMG}^{-1} * s || / ||s|| = %e / %e = %e > %e "
+          "ERROR contd: || s - f_{tmLQC} * f_{DDalphaAMG}^{-1} * s || / ||s|| = %e / %e = "
+          "%e > %e "
           "\n",
           differ[0], differ[1], differ[0] / differ[1], precision);
     }
@@ -304,7 +306,8 @@ static inline int MG_mms_check_nd(spinor **const up_new, spinor **const dn_new,
             "ERROR: something bad happened... MG converged giving the wrong solution!! Trying to "
             "restart... \n");
         printf(
-            "ERROR contd: || s - f_{tmLQC} * f_{DDalphaAMG}^{-1} * s || / ||s|| = %e / %e = %e > "
+            "ERROR contd: || s - f_{tmLQC} * f_{DDalphaAMG}^{-1} * s || / ||s|| = %e / %e = "
+            "%e > "
             "%e \n",
             differ[0], differ[1], differ[0] / differ[1], precision[i]);
       }
@@ -529,15 +532,15 @@ static int MG_solve(spinor *const phi_new, spinor *const phi_old, const double p
 
 static int MG_solve_nd(spinor *up_new, spinor *dn_new, spinor *const up_old, spinor *const dn_old,
                        const double precision, const int N, matrix_mult_nd f) {
-  // for rescaling  convention in DDalphaAMG: (4+m)*\delta_{x,y} in tmLQCD: 1*\delta_{x,y} ->
+  // for rescaling  convention in DalphaAMG: (4+m)*\delta_{x,y} in tmLQCD: 1*\delta_{x,y} ->
   // rescale by 1/4+m moreover in the nd case, the tmLQCD is multiplied by phmc_invmaxev
   double mg_scale = 0.5 / g_kappa / phmc_invmaxev;
   double sqnorm;
   int init_guess = 0;
   spinor *old1 = up_old;
   spinor *old2 = dn_old;
-  spinor *new1 = up_new, *new1tmp;
-  spinor *new2 = dn_new, *new2tmp;
+  spinor *new1 = up_new, *new1tmp = NULL;
+  spinor *new2 = dn_new, *new2tmp = NULL;
   spinor **solver_field = NULL, **oe_solver_field = NULL;
   int no_solver_field = 0;
 
diff --git a/DDalphaAMG_interface.h b/src/lib/DDalphaAMG_interface.h
similarity index 98%
rename from DDalphaAMG_interface.h
rename to src/lib/DDalphaAMG_interface.h
index 96f59c31e..cc7ae1678 100644
--- a/DDalphaAMG_interface.h
+++ b/src/lib/DDalphaAMG_interface.h
@@ -17,7 +17,7 @@
  * You should have received a copy of the GNU General Public License
  * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
  *
- * Interface for DDalphaAMG
+ * Interface for TM_USE_DDalphaAMG
  *
  *******************************************************************************/
 
diff --git a/Ptilde_nd.c b/src/lib/Ptilde_nd.c
similarity index 100%
rename from Ptilde_nd.c
rename to src/lib/Ptilde_nd.c
diff --git a/Ptilde_nd.h b/src/lib/Ptilde_nd.h
similarity index 100%
rename from Ptilde_nd.h
rename to src/lib/Ptilde_nd.h
diff --git a/aligned_malloc.c b/src/lib/aligned_malloc.c
similarity index 100%
rename from aligned_malloc.c
rename to src/lib/aligned_malloc.c
diff --git a/aligned_malloc.h b/src/lib/aligned_malloc.h
similarity index 100%
rename from aligned_malloc.h
rename to src/lib/aligned_malloc.h
diff --git a/block.c b/src/lib/block.c
similarity index 100%
rename from block.c
rename to src/lib/block.c
diff --git a/block.h b/src/lib/block.h
similarity index 100%
rename from block.h
rename to src/lib/block.h
diff --git a/boundary.c b/src/lib/boundary.c
similarity index 100%
rename from boundary.c
rename to src/lib/boundary.c
diff --git a/boundary.h b/src/lib/boundary.h
similarity index 100%
rename from boundary.h
rename to src/lib/boundary.h
diff --git a/buffers/Makefile.in b/src/lib/buffers/Makefile.in
similarity index 100%
rename from buffers/Makefile.in
rename to src/lib/buffers/Makefile.in
diff --git a/buffers/gauge.c b/src/lib/buffers/gauge.c
similarity index 100%
rename from buffers/gauge.c
rename to src/lib/buffers/gauge.c
diff --git a/buffers/gauge.h b/src/lib/buffers/gauge.h
similarity index 100%
rename from buffers/gauge.h
rename to src/lib/buffers/gauge.h
diff --git a/buffers/gauge.ih b/src/lib/buffers/gauge.ih
similarity index 100%
rename from buffers/gauge.ih
rename to src/lib/buffers/gauge.ih
diff --git a/buffers/gauge_allocate_gauge_buffers.c b/src/lib/buffers/gauge_allocate_gauge_buffers.c
similarity index 100%
rename from buffers/gauge_allocate_gauge_buffers.c
rename to src/lib/buffers/gauge_allocate_gauge_buffers.c
diff --git a/buffers/gauge_finalize_gauge_buffers.c b/src/lib/buffers/gauge_finalize_gauge_buffers.c
similarity index 100%
rename from buffers/gauge_finalize_gauge_buffers.c
rename to src/lib/buffers/gauge_finalize_gauge_buffers.c
diff --git a/buffers/gauge_free_unused_gauge_buffers.c b/src/lib/buffers/gauge_free_unused_gauge_buffers.c
similarity index 100%
rename from buffers/gauge_free_unused_gauge_buffers.c
rename to src/lib/buffers/gauge_free_unused_gauge_buffers.c
diff --git a/buffers/gauge_get_gauge_field.c b/src/lib/buffers/gauge_get_gauge_field.c
similarity index 100%
rename from buffers/gauge_get_gauge_field.c
rename to src/lib/buffers/gauge_get_gauge_field.c
diff --git a/buffers/gauge_get_gauge_field_array.c b/src/lib/buffers/gauge_get_gauge_field_array.c
similarity index 100%
rename from buffers/gauge_get_gauge_field_array.c
rename to src/lib/buffers/gauge_get_gauge_field_array.c
diff --git a/buffers/gauge_initialize_gauge_buffers.c b/src/lib/buffers/gauge_initialize_gauge_buffers.c
similarity index 100%
rename from buffers/gauge_initialize_gauge_buffers.c
rename to src/lib/buffers/gauge_initialize_gauge_buffers.c
diff --git a/buffers/gauge_return_gauge_field.c b/src/lib/buffers/gauge_return_gauge_field.c
similarity index 100%
rename from buffers/gauge_return_gauge_field.c
rename to src/lib/buffers/gauge_return_gauge_field.c
diff --git a/buffers/gauge_return_gauge_field_array.c b/src/lib/buffers/gauge_return_gauge_field_array.c
similarity index 100%
rename from buffers/gauge_return_gauge_field_array.c
rename to src/lib/buffers/gauge_return_gauge_field_array.c
diff --git a/buffers/utils.h b/src/lib/buffers/utils.h
similarity index 100%
rename from buffers/utils.h
rename to src/lib/buffers/utils.h
diff --git a/buffers/utils.ih b/src/lib/buffers/utils.ih
similarity index 100%
rename from buffers/utils.ih
rename to src/lib/buffers/utils.ih
diff --git a/buffers/utils_generic_exchange.blocking.inc b/src/lib/buffers/utils_generic_exchange.blocking.inc
similarity index 98%
rename from buffers/utils_generic_exchange.blocking.inc
rename to src/lib/buffers/utils_generic_exchange.blocking.inc
index e6e5f975c..71b44900c 100644
--- a/buffers/utils_generic_exchange.blocking.inc
+++ b/src/lib/buffers/utils_generic_exchange.blocking.inc
@@ -26,7 +26,7 @@
 		 g_cart_grid, &status);
   }
   
-#  if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT)
+#  if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
   /* send the data to the neighbour on the left in x direction */
   /* recieve the data from the neighbour on the right in x direction */
   MPI_Sendrecv(buffer[0],              1, slice_X_gath_type, g_nb_x_dn, 93,
@@ -108,10 +108,10 @@
 		 1, edge_XT_cont_type, g_nb_t_dn, 98,
 		 g_cart_grid, &status);
   }
-  /* end of if defined PARALLELXT || PARALLELXYT || PARALLELXYZT*/
+  /* end of if defined TM_PARALLELXT || TM_PARALLELXYT || TM_PARALLELXYZT*/
 #  endif
 
-#  if (defined PARALLELXYT || defined PARALLELXYZT)
+#  if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
   /* send the data to the neighbour on the left in y direction */
   /* recieve the data from the neighbour on the right in y direction */
   MPI_Sendrecv(buffer[0],                            1, slice_Y_gath_type, g_nb_y_dn, 103,
@@ -247,9 +247,9 @@
 		 g_cart_grid, &status);
   }
 
-  /* end of if defined PARALLELXYT || PARALLELXYZT */
+  /* end of if defined TM_PARALLELXYT || TM_PARALLELXYZT */
 #  endif
-#  if defined PARALLELXYZT
+#  if defined TM_PARALLELXYZT
   /* z-Rand */
   /* send the data to the neighbour on the left in z direction */
   /* recieve the data from the neighbour on the right in z direction */
@@ -454,4 +454,4 @@
 
   }
 
-#endif /* PARALLELXYZT */
+#endif /* TM_PARALLELXYZT */
diff --git a/buffers/utils_generic_exchange.c b/src/lib/buffers/utils_generic_exchange.c
similarity index 96%
rename from buffers/utils_generic_exchange.c
rename to src/lib/buffers/utils_generic_exchange.c
index c1c3c844a..d1a68a351 100644
--- a/buffers/utils_generic_exchange.c
+++ b/src/lib/buffers/utils_generic_exchange.c
@@ -4,13 +4,13 @@
 void generic_exchange(void *field_in, int bytes_per_site) {}
 #else /* MPI */
 void generic_exchange(void *field_in, int bytes_per_site) {
-#if defined _NON_BLOCKING
+#if defined TM_NON_BLOCKING
   int cntr = 0;
   MPI_Request request[108];
   MPI_Status status[108];
-#else  /* _NON_BLOCKING */
+#else  /* TM_NON_BLOCKING */
   MPI_Status status;
-#endif /* _NON_BLOCKING */
+#endif /* TM_NON_BLOCKING */
   static int initialized = 0;
 
   /* We start by defining all the MPI datatypes required */
@@ -125,11 +125,11 @@ void generic_exchange(void *field_in, int bytes_per_site) {
   }
 
   /* Following are implementations using different compile time flags */
-#if defined _NON_BLOCKING
+#if defined TM_NON_BLOCKING
 #include "utils_generic_exchange.nonblocking.inc"
-#else  /* _NON_BLOCKING */
+#else /* TM_NON_BLOCKING */
 #include "utils_generic_exchange.blocking.inc"
-#endif /* _NON_BLOCKING */
+#endif /* TM_NON_BLOCKING */
 }
 
 #endif /* MPI */
diff --git a/buffers/utils_generic_exchange.nonblocking.inc b/src/lib/buffers/utils_generic_exchange.nonblocking.inc
similarity index 98%
rename from buffers/utils_generic_exchange.nonblocking.inc
rename to src/lib/buffers/utils_generic_exchange.nonblocking.inc
index 0789a490f..71409008f 100644
--- a/buffers/utils_generic_exchange.nonblocking.inc
+++ b/src/lib/buffers/utils_generic_exchange.nonblocking.inc
@@ -32,7 +32,7 @@
     cntr=cntr+2;
   }
   
-#    if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT)
+#    if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
   /* send the data to the neighbour on the left in x direction */
   /* recieve the data from the neighbour on the right in x direction */
   MPI_Isend(buffer[0],              1, slice_X_gath_type, g_nb_x_dn, 87,
@@ -71,7 +71,7 @@
 #    endif
   MPI_Waitall(cntr, request, status);
   cntr=0;
-#    if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT)
+#    if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
   /* The edges */
 
   /* send the data to the neighbour on the left in t direction */
@@ -137,10 +137,10 @@
 	      g_cart_grid, &request[cntr+1]);
     cntr=cntr+2;
   }
-  /* end of if defined PARALLELXT || PARALLELXYT || PARALLELXYZT*/
+  /* end of if defined TM_PARALLELXT || TM_PARALLELXYT || TM_PARALLELXYZT*/
 #    endif
 
-#    if (defined PARALLELXYT || defined PARALLELXYZT)
+#    if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
   /* send the data to the neighbour on the left in y direction */
   /* recieve the data from the neighbour on the right in y direction */
   MPI_Isend(buffer[0],                            1, slice_Y_gath_type, g_nb_y_dn, 106,
@@ -177,7 +177,7 @@
 #    endif
   MPI_Waitall(cntr, request, status);
   cntr=0;
-#    if (defined PARALLELXYT || defined PARALLELXYZT)
+#    if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
 
   /* jetzt wirds richtig eklig ... */
 
@@ -312,9 +312,9 @@
     cntr=cntr+2;
   }
 
-  /* end of if defined PARALLELXYT || PARALLELXYZT */
+  /* end of if defined TM_PARALLELXYT || TM_PARALLELXYZT */
 #    endif
-#    if defined PARALLELXYZT
+#    if defined TM_PARALLELXYZT
   /* z-Rand */
   /* send the data to the neighbour on the left in z direction */
   /* recieve the data from the neighbour on the right in z direction */
@@ -359,7 +359,7 @@
   }
 #    endif
   MPI_Waitall(cntr, request, status);
-#    if defined PARALLELXYZT
+#    if defined TM_PARALLELXYZT
   cntr=0;
   /* edges */
 
diff --git a/chebyshev_polynomial.c b/src/lib/chebyshev_polynomial.c
similarity index 98%
rename from chebyshev_polynomial.c
rename to src/lib/chebyshev_polynomial.c
index 501937b80..f67055cda 100644
--- a/chebyshev_polynomial.c
+++ b/src/lib/chebyshev_polynomial.c
@@ -280,11 +280,11 @@ void degree_of_polynomial(const int repro) {
           printf("||auxc_3||=%e\n",temp); */
 
     diff(&auxs[0], &auxs[0], &aux3s[0], VOLUME / 2);
-    temp = square_norm(&auxs[0], VOLUME / 2) / square_norm(&aux3s[0], VOLUME / 2, 1) / 4.0;
+    temp = square_norm(&auxs[0], VOLUME / 2, 1) / square_norm(&aux3s[0], VOLUME / 2, 1) / 4.0;
     if (g_proc_id == g_stdio_proc) {
       printf("difference=%e\n", temp);
       diff(&auxc[0], &auxc[0], &aux3c[0], VOLUME / 2);
-      temp = square_norm(&auxc[0], VOLUME / 2) / square_norm(&aux3c[0], VOLUME / 2, 1) / 4.0;
+      temp = square_norm(&auxc[0], VOLUME / 2, 1) / square_norm(&aux3c[0], VOLUME / 2, 1) / 4.0;
       printf("difference=%e\n", temp);
     }
     if (temp < stopeps) break;
diff --git a/chebyshev_polynomial.h b/src/lib/chebyshev_polynomial.h
similarity index 100%
rename from chebyshev_polynomial.h
rename to src/lib/chebyshev_polynomial.h
diff --git a/chebyshev_polynomial_nd.c b/src/lib/chebyshev_polynomial_nd.c
similarity index 100%
rename from chebyshev_polynomial_nd.c
rename to src/lib/chebyshev_polynomial_nd.c
diff --git a/chebyshev_polynomial_nd.h b/src/lib/chebyshev_polynomial_nd.h
similarity index 100%
rename from chebyshev_polynomial_nd.h
rename to src/lib/chebyshev_polynomial_nd.h
diff --git a/clenshaw_coef.c b/src/lib/clenshaw_coef.c
similarity index 100%
rename from clenshaw_coef.c
rename to src/lib/clenshaw_coef.c
diff --git a/clenshaw_coef.h b/src/lib/clenshaw_coef.h
similarity index 100%
rename from clenshaw_coef.h
rename to src/lib/clenshaw_coef.h
diff --git a/compare_derivative.c b/src/lib/compare_derivative.c
similarity index 100%
rename from compare_derivative.c
rename to src/lib/compare_derivative.c
diff --git a/compare_derivative.h b/src/lib/compare_derivative.h
similarity index 100%
rename from compare_derivative.h
rename to src/lib/compare_derivative.h
diff --git a/cu/COPYING b/src/lib/cu/COPYING
similarity index 100%
rename from cu/COPYING
rename to src/lib/cu/COPYING
diff --git a/cu/COPYING.LESSER b/src/lib/cu/COPYING.LESSER
similarity index 100%
rename from cu/COPYING.LESSER
rename to src/lib/cu/COPYING.LESSER
diff --git a/cu/Makefile.in b/src/lib/cu/Makefile.in
similarity index 100%
rename from cu/Makefile.in
rename to src/lib/cu/Makefile.in
diff --git a/cu/check-regressions b/src/lib/cu/check-regressions
similarity index 100%
rename from cu/check-regressions
rename to src/lib/cu/check-regressions
diff --git a/cu/cu.c b/src/lib/cu/cu.c
similarity index 100%
rename from cu/cu.c
rename to src/lib/cu/cu.c
diff --git a/cu/cu.h b/src/lib/cu/cu.h
similarity index 100%
rename from cu/cu.h
rename to src/lib/cu/cu.h
diff --git a/default_input_values.h b/src/lib/default_input_values.h
similarity index 100%
rename from default_input_values.h
rename to src/lib/default_input_values.h
diff --git a/deriv_Sb.c b/src/lib/deriv_Sb.c
similarity index 93%
rename from deriv_Sb.c
rename to src/lib/deriv_Sb.c
index 4303c80d5..c48c8db38 100644
--- a/deriv_Sb.c
+++ b/src/lib/deriv_Sb.c
@@ -56,7 +56,7 @@
 void deriv_Sb(const int ieo, spinor* const l, spinor* const k, hamiltonian_field_t* const hf,
               const double factor) {
   tm_stopwatch_push(&g_timers, __func__, "");
-#ifdef _GAUGE_COPY
+#ifdef TM_USE_GAUGE_COPY
   if (g_update_gauge_copy) {
     update_backward_gauge(hf->gaugefield);
   }
@@ -83,10 +83,6 @@ void deriv_Sb(const int ieo, spinor* const l, spinor* const k, hamiltonian_field
 
 #ifdef TM_USE_OMP
 #undef static
-#endif
-
-#ifdef _KOJAK_INST
-#pragma pomp inst begin(derivSb)
 #endif
 
     if (ieo == 0) {
@@ -114,7 +110,7 @@ void deriv_Sb(const int ieo, spinor* const l, spinor* const k, hamiltonian_field
       icy = g_lexic2eosub[iy];
 
       sp = k + icy;
-#if (defined _GAUGE_COPY && !defined _USE_HALFSPINOR)
+#if (defined TM_USE_GAUGE_COPY && !defined TM_USE_HALFSPINOR)
       up = &g_gauge_field_copy[icx][0];
 #else
     up = &hf->gaugefield[ix][0];
@@ -136,7 +132,7 @@ void deriv_Sb(const int ieo, spinor* const l, spinor* const k, hamiltonian_field
       icy = g_lexic2eosub[iy];
 
       sm = k + icy;
-#if (defined _GAUGE_COPY && !defined _USE_HALFSPINOR)
+#if (defined TM_USE_GAUGE_COPY && !defined TM_USE_HALFSPINOR)
       um = up + 1;
 #else
     um = &hf->gaugefield[iy][0];
@@ -159,7 +155,7 @@ void deriv_Sb(const int ieo, spinor* const l, spinor* const k, hamiltonian_field
       icy = g_lexic2eosub[iy];
 
       sp = k + icy;
-#if (defined _GAUGE_COPY && !defined _USE_HALFSPINOR)
+#if (defined TM_USE_GAUGE_COPY && !defined TM_USE_HALFSPINOR)
       up = um + 1;
 #else
     up = &hf->gaugefield[ix][1];
@@ -181,7 +177,7 @@ void deriv_Sb(const int ieo, spinor* const l, spinor* const k, hamiltonian_field
       icy = g_lexic2eosub[iy];
 
       sm = k + icy;
-#if (defined _GAUGE_COPY && !defined _USE_HALFSPINOR)
+#if (defined TM_USE_GAUGE_COPY && !defined TM_USE_HALFSPINOR)
       um = up + 1;
 #else
     um = &hf->gaugefield[iy][1];
@@ -203,7 +199,7 @@ void deriv_Sb(const int ieo, spinor* const l, spinor* const k, hamiltonian_field
       icy = g_lexic2eosub[iy];
 
       sp = k + icy;
-#if (defined _GAUGE_COPY && !defined _USE_HALFSPINOR)
+#if (defined TM_USE_GAUGE_COPY && !defined TM_USE_HALFSPINOR)
       up = um + 1;
 #else
     up = &hf->gaugefield[ix][2];
@@ -225,7 +221,7 @@ void deriv_Sb(const int ieo, spinor* const l, spinor* const k, hamiltonian_field
       icy = g_lexic2eosub[iy];
 
       sm = k + icy;
-#if (defined _GAUGE_COPY && !defined _USE_HALFSPINOR)
+#if (defined TM_USE_GAUGE_COPY && !defined TM_USE_HALFSPINOR)
       um = up + 1;
 #else
     um = &hf->gaugefield[iy][2];
@@ -247,7 +243,7 @@ void deriv_Sb(const int ieo, spinor* const l, spinor* const k, hamiltonian_field
       icy = g_lexic2eosub[iy];
 
       sp = k + icy;
-#if (defined _GAUGE_COPY && !defined _USE_HALFSPINOR)
+#if (defined TM_USE_GAUGE_COPY && !defined TM_USE_HALFSPINOR)
       up = um + 1;
 #else
     up = &hf->gaugefield[ix][3];
@@ -269,7 +265,7 @@ void deriv_Sb(const int ieo, spinor* const l, spinor* const k, hamiltonian_field
       icy = g_lexic2eosub[iy];
 
       sm = k + icy;
-#if (defined _GAUGE_COPY && !defined _USE_HALFSPINOR)
+#if (defined TM_USE_GAUGE_COPY && !defined TM_USE_HALFSPINOR)
       um = up + 1;
 #else
     um = &hf->gaugefield[iy][3];
@@ -292,7 +288,4 @@ void deriv_Sb(const int ieo, spinor* const l, spinor* const k, hamiltonian_field
   } /* OpenMP closing brace */
 #endif
   tm_stopwatch_pop(&g_timers, 0, 1, "");
-#ifdef _KOJAK_INST
-#pragma pomp inst end(derivSb)
-#endif
 }
diff --git a/deriv_Sb.h b/src/lib/deriv_Sb.h
similarity index 100%
rename from deriv_Sb.h
rename to src/lib/deriv_Sb.h
diff --git a/deriv_Sb_D_psi.c b/src/lib/deriv_Sb_D_psi.c
similarity index 98%
rename from deriv_Sb_D_psi.c
rename to src/lib/deriv_Sb_D_psi.c
index 6ba15d490..3f3319efc 100644
--- a/deriv_Sb_D_psi.c
+++ b/src/lib/deriv_Sb_D_psi.c
@@ -61,10 +61,6 @@ void deriv_Sb_D_psi(spinor* const l, spinor* const k, hamiltonian_field_t* const
 
 #ifdef TM_USE_OMP
 #undef static
-#endif
-
-#ifdef _KOJAK_INST
-#pragma pomp inst begin(derivSb)
 #endif
 
     /************** loop over all lattice sites ****************/
@@ -225,9 +221,6 @@ void deriv_Sb_D_psi(spinor* const l, spinor* const k, hamiltonian_field_t* const
 
       /****************** end of loop ************************/
     }
-#ifdef _KOJAK_INST
-#pragma pomp inst end(derivSb)
-#endif
 
 #ifdef TM_USE_OMP
   } /*OpenMP closing brace */
diff --git a/deriv_Sb_D_psi.h b/src/lib/deriv_Sb_D_psi.h
similarity index 100%
rename from deriv_Sb_D_psi.h
rename to src/lib/deriv_Sb_D_psi.h
diff --git a/expo.c b/src/lib/expo.c
similarity index 100%
rename from expo.c
rename to src/lib/expo.c
diff --git a/expo.h b/src/lib/expo.h
similarity index 100%
rename from expo.h
rename to src/lib/expo.h
diff --git a/fatal_error.c b/src/lib/fatal_error.c
similarity index 100%
rename from fatal_error.c
rename to src/lib/fatal_error.c
diff --git a/fatal_error.h b/src/lib/fatal_error.h
similarity index 100%
rename from fatal_error.h
rename to src/lib/fatal_error.h
diff --git a/fixed_volume.h.in b/src/lib/fixed_volume.h.in
similarity index 100%
rename from fixed_volume.h.in
rename to src/lib/fixed_volume.h.in
diff --git a/gamma.c b/src/lib/gamma.c
similarity index 100%
rename from gamma.c
rename to src/lib/gamma.c
diff --git a/gamma.h b/src/lib/gamma.h
similarity index 100%
rename from gamma.h
rename to src/lib/gamma.h
diff --git a/geometry_eo.c b/src/lib/geometry_eo.c
similarity index 89%
rename from geometry_eo.c
rename to src/lib/geometry_eo.c
index 8622131e9..f89189357 100644
--- a/geometry_eo.c
+++ b/src/lib/geometry_eo.c
@@ -45,7 +45,7 @@
 
 void Hopping_Matrix_Indices(void);
 
-#if ((defined PARALLELX) || (defined PARALLELXY) || (defined PARALLELXYZ))
+#if ((defined TM_PARALLELX) || (defined TM_PARALLELXY) || (defined TM_PARALLELXYZ))
 
 /* This is the version of the function Index  introduced for Aurora-like parallelizations (mainly
  * xyz)  */
@@ -72,7 +72,7 @@ int Index(const int x0, const int x1, const int x2, const int x3) {
     ix = VOLUME + T * LY * LZ + y0 * LY * LZ + y2 * LZ + y3;
   }
 
-#if (defined PARALLELXY || defined PARALLELXYZ)
+#if (defined TM_PARALLELXY || defined TM_PARALLELXYZ)
   /* y-Rand */
   if (x2 == LY) {
     ix = VOLUME + 2 * T * LY * LZ + y0 * LX * LZ + y1 * LZ + y3;
@@ -97,9 +97,9 @@ int Index(const int x0, const int x1, const int x2, const int x3) {
       ix = VOLUME + RAND + 3 * T * LZ + y0 * LZ + y3;
     }
   }
-#endif /* endif of PARALLELXY  || PARALLELXYZ */
+#endif /* endif of TM_PARALLELXY  || TM_PARALLELXYZ */
 
-#if defined PARALLELXYZ
+#if defined TM_PARALLELXYZ
   /* z-Rand */
   if (x3 == LZ) {
     ix = VOLUME + 2 * T * LY * LZ + 2 * T * LX * LZ + y0 * LX * LY + y1 * LY + y2;
@@ -142,7 +142,7 @@ int Index(const int x0, const int x1, const int x2, const int x3) {
     }
   }
 
-#endif /* endif of PARALLELXYZ */
+#endif /* endif of TM_PARALLELXYZ */
 
   /* The DBW2 stuff --> second boundary slice */
   /* This we put a the very end.              */
@@ -150,44 +150,44 @@ int Index(const int x0, const int x1, const int x2, const int x3) {
   /* x2-rand+ */
   if (x1 == LX + 1) {
     ix = VOLUMEPLUSRAND + y0 * LY * LZ + y2 * LZ + y3;
-#if (defined PARALLELXY || defined PARALLELXYZ)
+#if (defined TM_PARALLELXY || defined TM_PARALLELXYZ)
     /* x2y */
     if (x2 == LY) {
       ix = VOLUMEPLUSRAND + RAND + y0 * LZ + y3;
     } else if (x2 == -1) {
       ix = VOLUMEPLUSRAND + RAND + 1 * T * LZ + y0 * LZ + y3;
     }
-#endif /* endif of PARALLELXY || PARALLELXYZ  */
-#if defined PARALLELXYZ
+#endif /* endif of TM_PARALLELXY || TM_PARALLELXYZ  */
+#if defined TM_PARALLELXYZ
     /* x2z */
     else if (x3 == LZ) {
       ix = VOLUMEPLUSRAND + RAND + 8 * T * LZ + 4 * T * LY + y0 * LY + y2;
     } else if (x3 == -1) {
       ix = VOLUMEPLUSRAND + RAND + 8 * T * LZ + 5 * T * LY + y0 * LY + y2;
     }
-#endif /* endif of PARALLELXYZ  */
+#endif /* endif of TM_PARALLELXYZ  */
   }
   /* x2-rand- */
   if (x1 == -2) {
     ix = VOLUMEPLUSRAND + T * LY * LZ + y0 * LY * LZ + y2 * LZ + y3;
-#if (defined PARALLELXY || defined PARALLELXYZ)
+#if (defined TM_PARALLELXY || defined TM_PARALLELXYZ)
     /* x2y */
     if (x2 == LY) {
       ix = VOLUMEPLUSRAND + RAND + 2 * T * LZ + y0 * LZ + y3;
     } else if (x2 == -1) {
       ix = VOLUMEPLUSRAND + RAND + 3 * T * LZ + y0 * LZ + y3;
     }
-#endif /* endif of PARALLELXY || PARALLELXYZ  */
-#if defined PARALLELXYZ
+#endif /* endif of TM_PARALLELXY || TM_PARALLELXYZ  */
+#if defined TM_PARALLELXYZ
     /* x2z */
     else if (x3 == LZ) {
       ix = VOLUMEPLUSRAND + RAND + 8 * T * LZ + 6 * T * LY + y0 * LY + y2;
     } else if (x3 == -1) {
       ix = VOLUMEPLUSRAND + RAND + 8 * T * LZ + 7 * T * LY + y0 * LY + y2;
     }
-#endif /* endif of  PARALLELXYZ  */
+#endif /* endif of  TM_PARALLELXYZ  */
   }
-#if (defined PARALLELXY || defined PARALLELXYZ)
+#if (defined TM_PARALLELXY || defined TM_PARALLELXYZ)
   /* y2-rand+ */
   if (x2 == LY + 1) {
     ix = VOLUMEPLUSRAND + 2 * T * LY * LZ + y0 * LX * LZ + y1 * LZ + y3;
@@ -197,14 +197,14 @@ int Index(const int x0, const int x1, const int x2, const int x3) {
     } else if (x1 == -1) {
       ix = VOLUMEPLUSRAND + RAND + 6 * T * LZ + y0 * LZ + y3;
     }
-#if defined PARALLELXYZ
+#if defined TM_PARALLELXYZ
     /* y2z */
     else if (x3 == LZ) {
       ix = VOLUMEPLUSRAND + RAND + 8 * T * LZ + 8 * T * LY + 4 * T * LX + y0 * LX + y1;
     } else if (x3 == -1) {
       ix = VOLUMEPLUSRAND + RAND + 8 * T * LZ + 8 * T * LY + 5 * T * LX + y0 * LX + y1;
     }
-#endif /* endif of PARALLELXYZ  */
+#endif /* endif of TM_PARALLELXYZ  */
   }
   /* y2-rand- */
   if (x2 == -2) {
@@ -215,17 +215,17 @@ int Index(const int x0, const int x1, const int x2, const int x3) {
     } else if (x1 == -1) {
       ix = VOLUMEPLUSRAND + RAND + 7 * T * LZ + y0 * LZ + y3;
     }
-#if defined PARALLELXYZ
+#if defined TM_PARALLELXYZ
     /* y2z */
     else if (x3 == LZ) {
       ix = VOLUMEPLUSRAND + RAND + 8 * T * LZ + 8 * T * LY + 6 * T * LX + y0 * LX + y1;
     } else if (x3 == -1) {
       ix = VOLUMEPLUSRAND + RAND + 8 * T * LZ + 8 * T * LY + 7 * T * LX + y0 * LX + y1;
     }
-#endif /* endif of PARALLELXYZ  */
+#endif /* endif of TM_PARALLELXYZ  */
   }
-#endif /* endif of PARALLELXY || PARALLELXYZ  */
-#if defined PARALLELXYZ
+#endif /* endif of TM_PARALLELXY || TM_PARALLELXYZ  */
+#if defined TM_PARALLELXYZ
   /* z2-rand+ */
   if (x3 == LZ + 1) {
     ix = VOLUMEPLUSRAND + 2 * T * LY * LZ + 2 * T * LX * LZ + y0 * LX * LY + y1 * LY + y2;
@@ -259,12 +259,12 @@ int Index(const int x0, const int x1, const int x2, const int x3) {
       ix = VOLUMEPLUSRAND + RAND + 8 * T * LZ + 8 * T * LY + 3 * T * LX + y0 * LX + y1;
     }
   }
-#endif /* endif of PARALLELXYZ  */
+#endif /* endif of TM_PARALLELXYZ  */
 
   return (ix);
 }
 
-#else /* original version of Index(): used for no parallelization  or PARALLEL*T */
+#else /* original version of Index(): used for no parallelization  or TM_PARALLEL*T */
 
 int Index(const int x0, const int x1, const int x2, const int x3) {
   int y0, y1, y2, y3, ix;
@@ -274,7 +274,8 @@ int Index(const int x0, const int x1, const int x2, const int x3) {
   y3 = (x3 + LZ) % LZ;
   ix = ((y0 * LX + y1) * LY + y2) * LZ + y3;
 
-#if ((defined PARALLELT) || (defined PARALLELXT) || (defined PARALLELXYT) || (defined PARALLELXYZT))
+#if ((defined TM_PARALLELT) || (defined TM_PARALLELXT) || (defined TM_PARALLELXYT) || \
+     (defined TM_PARALLELXYZT))
   if (x0 == T) {
     ix = VOLUME + y3 + LZ * y2 + LZ * LY * y1;
   }
@@ -283,7 +284,7 @@ int Index(const int x0, const int x1, const int x2, const int x3) {
     ix = VOLUME + LX * LY * LZ + y3 + LZ * y2 + LZ * LY * y1;
   }
 #endif
-#if ((defined PARALLELXT) || (defined PARALLELXYT) || (defined PARALLELXYZT))
+#if ((defined TM_PARALLELXT) || (defined TM_PARALLELXYT) || (defined TM_PARALLELXYZT))
   if (x1 == LX) {
     ix = VOLUME + 2 * LX * LY * LZ + y0 * LY * LZ + y2 * LZ + y3;
   }
@@ -309,9 +310,9 @@ int Index(const int x0, const int x1, const int x2, const int x3) {
     }
   }
 
-#endif /* endif of PARALLELXT || PARALLELXYT || PARALLELXYZT */
+#endif /* endif of TM_PARALLELXT || TM_PARALLELXYT || TM_PARALLELXYZT */
 
-#if (defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
   /* y-Rand */
   if (x2 == LY) {
     ix = VOLUME + 2 * LX * LY * LZ + 2 * T * LY * LZ + y0 * LX * LZ + y1 * LZ + y3;
@@ -358,8 +359,8 @@ int Index(const int x0, const int x1, const int x2, const int x3) {
     }
   }
 
-#endif /* endif of PARALLELXYT  || PARALLELXYZT */
-#if defined PARALLELXYZT
+#endif /* endif of TM_PARALLELXYT  || TM_PARALLELXYZT */
+#if defined TM_PARALLELXYZT
   /* z-Rand */
   if (x3 == LZ) {
     ix =
@@ -429,30 +430,31 @@ int Index(const int x0, const int x1, const int x2, const int x3) {
     }
   }
 
-#endif /* endif of PARALLELXYZT */
+#endif /* endif of TM_PARALLELXYZT */
 
   /* The DBW2 stuff --> second boundary slice */
   /* This we put a the very end.              */
-#if ((defined PARALLELT) || (defined PARALLELXT) || (defined PARALLELXYT) || (defined PARALLELXYZT))
+#if ((defined TM_PARALLELT) || (defined TM_PARALLELXT) || (defined TM_PARALLELXYT) || \
+     (defined TM_PARALLELXYZT))
   if (x0 == T + 1) {
     ix = VOLUMEPLUSRAND + y3 + LZ * y2 + LZ * LY * y1;
-#if ((defined PARALLELXT) || (defined PARALLELXYT) || (defined PARALLELXYZT))
+#if ((defined TM_PARALLELXT) || (defined TM_PARALLELXYT) || (defined TM_PARALLELXYZT))
     /* t2x */
     if (x1 == LX) {
       ix = VOLUMEPLUSRAND + RAND + y2 * LZ + y3;
     } else if (x1 == -1) {
       ix = VOLUMEPLUSRAND + RAND + 1 * LY * LZ + y2 * LZ + y3;
     }
-#endif /* endif of PARALLELXT || PARALLELXYT || PARALLELXYZT  */
-#if (defined PARALLELXYT || defined PARALLELXYZT)
+#endif /* endif of TM_PARALLELXT || TM_PARALLELXYT || TM_PARALLELXYZT  */
+#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
     /* t2y */
     else if (x2 == LY) {
       ix = VOLUMEPLUSRAND + RAND + 8 * LY * LZ + 8 * T * LZ + y1 * LZ + y3;
     } else if (x2 == -1) {
       ix = VOLUMEPLUSRAND + RAND + 8 * LY * LZ + 8 * T * LZ + 2 * LX * LZ + y1 * LZ + y3;
     }
-#endif /* endif of PARALLELXYT || PARALLELXYZT  */
-#if defined PARALLELXYZT
+#endif /* endif of TM_PARALLELXYT || TM_PARALLELXYZT  */
+#if defined TM_PARALLELXYZT
     /* t2z */
     else if (x3 == LZ) {
       ix = VOLUMEPLUSRAND + RAND + 8 * LY * LZ + 8 * T * LZ + 8 * LX * LZ + y1 * LY + y2;
@@ -460,28 +462,28 @@ int Index(const int x0, const int x1, const int x2, const int x3) {
       ix = VOLUMEPLUSRAND + RAND + 8 * LY * LZ + 8 * T * LZ + 8 * LX * LZ + 2 * LX * LY + y1 * LY +
            y2;
     }
-#endif /* endif of PARALLELXYZT  */
+#endif /* endif of TM_PARALLELXYZT  */
   }
   /* the slice at time -2 is put behind the one at time T+1 */
   else if (x0 == -2) {
     ix = VOLUMEPLUSRAND + LX * LY * LZ + y3 + LZ * y2 + LZ * LY * y1;
-#if ((defined PARALLELXT) || (defined PARALLELXYT) || (defined PARALLELXYZT))
+#if ((defined TM_PARALLELXT) || (defined TM_PARALLELXYT) || (defined TM_PARALLELXYZT))
     /* t2x */
     if (x1 == LX) {
       ix = VOLUMEPLUSRAND + RAND + 2 * LY * LZ + y2 * LZ + y3;
     } else if (x1 == -1) {
       ix = VOLUMEPLUSRAND + RAND + 3 * LY * LZ + y2 * LZ + y3;
     }
-#endif /* endif of PARALLELXT || PARALLELXYT || PARALLELXYZT  */
-#if (defined PARALLELXYT || defined PARALLELXYZT)
+#endif /* endif of TM_PARALLELXT || TM_PARALLELXYT || TM_PARALLELXYZT  */
+#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
     /* t2y */
     else if (x2 == LY) {
       ix = VOLUMEPLUSRAND + RAND + 8 * LY * LZ + 8 * T * LZ + LX * LZ + y1 * LZ + y3;
     } else if (x2 == -1) {
       ix = VOLUMEPLUSRAND + RAND + 8 * LY * LZ + 8 * T * LZ + 3 * LX * LZ + y1 * LZ + y3;
     }
-#endif /* endif of PARALLELXYT || PARALLELXYZT  */
-#if defined PARALLELXYZT
+#endif /* endif of TM_PARALLELXYT || TM_PARALLELXYZT  */
+#if defined TM_PARALLELXYZT
     /* t2z */
     else if (x3 == LZ) {
       ix = VOLUMEPLUSRAND + RAND + 8 * LY * LZ + 8 * T * LZ + 8 * LX * LZ + LX * LY + y1 * LY + y2;
@@ -489,10 +491,10 @@ int Index(const int x0, const int x1, const int x2, const int x3) {
       ix = VOLUMEPLUSRAND + RAND + 8 * LY * LZ + 8 * T * LZ + 8 * LX * LZ + 3 * LX * LY + y1 * LY +
            y2;
     }
-#endif /* endif of PARALLELXYZT  */
+#endif /* endif of TM_PARALLELXYZT  */
   }
-#endif /* endif of PARALLELT || PARALLELXT || PARALLELXYT || PARALLELXYZT  */
-#if ((defined PARALLELXT) || (defined PARALLELXYT) || defined PARALLELXYZT)
+#endif /* endif of TM_PARALLELT || TM_PARALLELXT || TM_PARALLELXYT || TM_PARALLELXYZT  */
+#if ((defined TM_PARALLELXT) || (defined TM_PARALLELXYT) || defined TM_PARALLELXYZT)
   if (x1 == LX + 1) {
     ix = VOLUMEPLUSRAND + 2 * LX * LY * LZ + y0 * LY * LZ + y2 * LZ + y3;
     /* x2t */
@@ -501,15 +503,15 @@ int Index(const int x0, const int x1, const int x2, const int x3) {
     } else if (x0 == -1) {
       ix = VOLUMEPLUSRAND + RAND + 6 * LY * LZ + y2 * LZ + y3;
     }
-#if (defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
     /* x2y */
     else if (x2 == LY) {
       ix = VOLUMEPLUSRAND + RAND + 8 * LY * LZ + y0 * LZ + y3;
     } else if (x2 == -1) {
       ix = VOLUMEPLUSRAND + RAND + 8 * LY * LZ + 1 * T * LZ + y0 * LZ + y3;
     }
-#endif /* endif of PARALLELXYT || PARALLELXYZT  */
-#if defined PARALLELXYZT
+#endif /* endif of TM_PARALLELXYT || TM_PARALLELXYZT  */
+#if defined TM_PARALLELXYZT
     /* x2z */
     else if (x3 == LZ) {
       ix = VOLUMEPLUSRAND + RAND + 8 * LY * LZ + 8 * T * LZ + 8 * LX * LZ + 8 * LX * LY +
@@ -518,7 +520,7 @@ int Index(const int x0, const int x1, const int x2, const int x3) {
       ix = VOLUMEPLUSRAND + RAND + 8 * LY * LZ + 8 * T * LZ + 8 * LX * LZ + 8 * LX * LY +
            5 * T * LY + y0 * LY + y2;
     }
-#endif /* endif of PARALLELXYZT  */
+#endif /* endif of TM_PARALLELXYZT  */
   }
   if (x1 == -2) {
     ix = VOLUMEPLUSRAND + 2 * LX * LY * LZ + T * LY * LZ + y0 * LY * LZ + y2 * LZ + y3;
@@ -528,15 +530,15 @@ int Index(const int x0, const int x1, const int x2, const int x3) {
     } else if (x0 == -1) {
       ix = VOLUMEPLUSRAND + RAND + 7 * LY * LZ + y2 * LZ + y3;
     }
-#if (defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
     /* x2y */
     else if (x2 == LY) {
       ix = VOLUMEPLUSRAND + RAND + 8 * LY * LZ + 2 * T * LZ + y0 * LZ + y3;
     } else if (x2 == -1) {
       ix = VOLUMEPLUSRAND + RAND + 8 * LY * LZ + 3 * T * LZ + y0 * LZ + y3;
     }
-#endif /* endif of PARALLELXYT || PARALLELXYZT  */
-#if defined PARALLELXYZT
+#endif /* endif of TM_PARALLELXYT || TM_PARALLELXYZT  */
+#if defined TM_PARALLELXYZT
     /* x2z */
     else if (x3 == LZ) {
       ix = VOLUMEPLUSRAND + RAND + 8 * LY * LZ + 8 * T * LZ + 8 * LX * LZ + 8 * LX * LY +
@@ -545,10 +547,10 @@ int Index(const int x0, const int x1, const int x2, const int x3) {
       ix = VOLUMEPLUSRAND + RAND + 8 * LY * LZ + 8 * T * LZ + 8 * LX * LZ + 8 * LX * LY +
            7 * T * LY + y0 * LY + y2;
     }
-#endif /* endif of  PARALLELXYZT  */
+#endif /* endif of  TM_PARALLELXYZT  */
   }
-#endif /* endif of PARALLELXT || PARALLELXYT || PARALLELXYZT  */
-#if (defined PARALLELXYT || defined PARALLELXYZT)
+#endif /* endif of TM_PARALLELXT || TM_PARALLELXYT || TM_PARALLELXYZT  */
+#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
   if (x2 == LY + 1) {
     ix = VOLUMEPLUSRAND + 2 * LX * LY * LZ + 2 * T * LY * LZ + y0 * LX * LZ + y1 * LZ + y3;
     /* y2x */
@@ -563,7 +565,7 @@ int Index(const int x0, const int x1, const int x2, const int x3) {
     } else if (x0 == -1) {
       ix = VOLUMEPLUSRAND + RAND + 8 * LY * LZ + 8 * T * LZ + 5 * LX * LZ + y1 * LZ + y3;
     }
-#if defined PARALLELXYZT
+#if defined TM_PARALLELXYZT
     /* y2z */
     else if (x3 == LZ) {
       ix = VOLUMEPLUSRAND + RAND + 8 * LY * LZ + 8 * T * LZ + 8 * LX * LZ + 8 * LX * LY +
@@ -572,7 +574,7 @@ int Index(const int x0, const int x1, const int x2, const int x3) {
       ix = VOLUMEPLUSRAND + RAND + 8 * LY * LZ + 8 * T * LZ + 8 * LX * LZ + 8 * LX * LY +
            8 * T * LY + 5 * T * LX + y0 * LX + y1;
     }
-#endif /* endif of PARALLELXYZT  */
+#endif /* endif of TM_PARALLELXYZT  */
   }
   if (x2 == -2) {
     ix = VOLUMEPLUSRAND + 2 * LX * LY * LZ + 2 * T * LY * LZ + T * LX * LZ + y0 * LX * LZ +
@@ -589,7 +591,7 @@ int Index(const int x0, const int x1, const int x2, const int x3) {
     } else if (x0 == -1) {
       ix = VOLUMEPLUSRAND + RAND + 8 * LY * LZ + 8 * T * LZ + 7 * LX * LZ + y1 * LZ + y3;
     }
-#if defined PARALLELXYZT
+#if defined TM_PARALLELXYZT
     /* y2z */
     else if (x3 == LZ) {
       ix = VOLUMEPLUSRAND + RAND + 8 * LY * LZ + 8 * T * LZ + 8 * LX * LZ + 8 * LX * LY +
@@ -598,10 +600,10 @@ int Index(const int x0, const int x1, const int x2, const int x3) {
       ix = VOLUMEPLUSRAND + RAND + 8 * LY * LZ + 8 * T * LZ + 8 * LX * LZ + 8 * LX * LY +
            8 * T * LY + 7 * T * LX + y0 * LX + y1;
     }
-#endif /* endif of PARALLELXYZT  */
+#endif /* endif of TM_PARALLELXYZT  */
   }
-#endif /* endif of PARALLELXYT || PARALLELXYZT  */
-#if defined PARALLELXYZT
+#endif /* endif of TM_PARALLELXYT || TM_PARALLELXYZT  */
+#if defined TM_PARALLELXYZT
   /* z2-Rand */
   if (x3 == LZ + 1) {
     if ((x0 < T) && (x0 > -1) && (x1 < LX) && (x1 > -1) && (x2 > -1) && (x2 < LY)) {
@@ -663,14 +665,14 @@ int Index(const int x0, const int x1, const int x2, const int x3) {
            8 * T * LY + 3 * T * LX + y0 * LX + y1;
     }
   }
-#endif /* endif of PARALLELXYZT  */
+#endif /* endif of TM_PARALLELXYZT  */
   /*   if(ix == 372) { */
   /*     printf("## %d %d %d %d ix = %d, %d %d %d %d\n", x0, x1, x2, x3, ix, T, LX, LY, LZ); */
   /*   } */
   return (ix);
 }
 
-#endif /* PARALLEL???  */
+#endif /* TM_PARALLEL???  */
 
 void geometry() {
   int x0, x1, x2, x3, ix;
@@ -685,17 +687,19 @@ void geometry() {
 
   xeven = malloc(VOLUMEPLUSRAND * sizeof(int));
 
-#if (defined PARALLELT || defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELT || defined TM_PARALLELXT || defined TM_PARALLELXYT || \
+     defined TM_PARALLELXYZT)
   startvaluet = 1;
 #endif
-#if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT || defined PARALLELX || \
-     defined PARALLELXY || defined PARALLELXYZ)
+#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT || \
+     defined TM_PARALLELX || defined TM_PARALLELXY || defined TM_PARALLELXYZ)
   startvaluex = 1;
 #endif
-#if (defined PARALLELXYT || defined PARALLELXYZT || defined PARALLELXY || defined PARALLELXYZ)
+#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT || defined TM_PARALLELXY || \
+     defined TM_PARALLELXYZ)
   startvaluey = 1;
 #endif
-#if (defined PARALLELXYZT || defined PARALLELXYZ)
+#if (defined TM_PARALLELXYZT || defined TM_PARALLELXYZ)
   startvaluez = 1;
 #endif
 
@@ -795,7 +799,7 @@ void geometry() {
     }
   }
 
-#if (defined PARALLELXYZT || defined PARALLELXYZ)
+#if (defined TM_PARALLELXYZT || defined TM_PARALLELXYZ)
   ix = 0;
   for (x0 = 0; x0 < T; x0++) {
     for (x1 = 0; x1 < LX; x1++) {
@@ -851,8 +855,7 @@ void geometry() {
     }
   }
 
-
-#endif /* PARALLELXYZ || PARALLELXYZT*/
+#endif /* TM_PARALLELXYZ || TM_PARALLELXYZT*/
 
   /* The rectangular gauge action part */
   /* Everything is stored behind VOLUMEPLUSRAND-1 !*/
@@ -861,7 +864,8 @@ void geometry() {
       printf("# Initialising rectangular gauge action stuff\n");
       fflush(stdout);
     }
-#if (defined PARALLELT || defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELT || defined TM_PARALLELXT || defined TM_PARALLELXYT || \
+     defined TM_PARALLELXYZT)
     for (x1 = -startvaluex; x1 < (LX + startvaluex); x1++) {
       for (x2 = -startvaluey; x2 < (LY + startvaluey); x2++) {
         for (x3 = -startvaluez; x3 < (LZ + startvaluez); x3++) {
@@ -910,8 +914,8 @@ void geometry() {
       }
     }
 #endif
-#if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT || defined PARALLELX || \
-     defined PARALLELXY || defined PARALLELXYZ)
+#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT || \
+     defined TM_PARALLELX || defined TM_PARALLELXY || defined TM_PARALLELXYZ)
     for (x0 = -startvaluet; x0 < (T + startvaluet); x0++) {
       for (x2 = -startvaluey; x2 < (LY + startvaluey); x2++) {
         for (x3 = -startvaluez; x3 < (LZ + startvaluez); x3++) {
@@ -959,7 +963,8 @@ void geometry() {
       }
     }
 #endif
-#if (defined PARALLELXYT || defined PARALLELXYZT || defined PARALLELXY || defined PARALLELXYZ)
+#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT || defined TM_PARALLELXY || \
+     defined TM_PARALLELXYZ)
     for (x0 = -startvaluet; x0 < (T + startvaluet); x0++) {
       for (x1 = -startvaluex; x1 < (LX + startvaluex); x1++) {
         for (x3 = -startvaluez; x3 < (LZ + startvaluez); x3++) {
@@ -1007,7 +1012,7 @@ void geometry() {
       }
     }
 #endif
-#if (defined PARALLELXYZT || defined PARALLELXYZ)
+#if (defined TM_PARALLELXYZT || defined TM_PARALLELXYZ)
     for (x0 = -startvaluet; x0 < (T + startvaluet); x0++) {
       for (x1 = -startvaluex; x1 < (LX + startvaluex); x1++) {
         for (x2 = -startvaluey; x2 < (LY + startvaluey); x2++) {
diff --git a/geometry_eo.h b/src/lib/geometry_eo.h
similarity index 100%
rename from geometry_eo.h
rename to src/lib/geometry_eo.h
diff --git a/get_rectangle_staples.c b/src/lib/get_rectangle_staples.c
similarity index 97%
rename from get_rectangle_staples.c
rename to src/lib/get_rectangle_staples.c
index eb2a7db9f..c8f69596b 100644
--- a/get_rectangle_staples.c
+++ b/src/lib/get_rectangle_staples.c
@@ -34,9 +34,6 @@ void get_rectangle_staples_general(su3 *const v, const int x, const int mu,
                                    const su3 *const *const gf) {
   su3 ALIGN tmp1, tmp2;
   const su3 *a, *b, *c, *d, *e;
-#ifdef _KOJAK_INST
-#pragma pomp inst begin(rectstaples)
-#endif
   _su3_zero((*v));
   for (int nu = 0; nu < 4; nu++) {
     if (mu != nu) {
@@ -178,7 +175,4 @@ void get_rectangle_staples_general(su3 *const v, const int x, const int mu,
       _su3_times_su3_acc((*v), tmp2, tmp1);
     }
   }
-#ifdef _KOJAK_INST
-#pragma pomp inst end(rectstaples)
-#endif
 }
diff --git a/get_rectangle_staples.h b/src/lib/get_rectangle_staples.h
similarity index 100%
rename from get_rectangle_staples.h
rename to src/lib/get_rectangle_staples.h
diff --git a/get_staples.c b/src/lib/get_staples.c
similarity index 90%
rename from get_staples.c
rename to src/lib/get_staples.c
index e80648382..ae7f19d09 100644
--- a/get_staples.c
+++ b/src/lib/get_staples.c
@@ -35,10 +35,6 @@ void get_staples(su3* const staple, const int x, const int mu, const su3** in_ga
   su3 ALIGN st;
   const su3 *w1, *w2, *w3;
 
-#ifdef _KOJAK_INST
-#pragma pomp inst begin(staples)
-#endif
-
   _su3_zero(*staple);
   for (int k = 0; k < 4; k++) {
     if (k != mu) {
@@ -61,9 +57,6 @@ void get_staples(su3* const staple, const int x, const int mu, const su3** in_ga
       _su3d_times_su3_acc(*staple, *w1, st);
     }
   }
-#ifdef _KOJAK_INST
-#pragma pomp inst end(staples)
-#endif
 }
 
 void get_spacelike_staples(su3* const staple, const int x, const int mu,
@@ -72,10 +65,6 @@ void get_spacelike_staples(su3* const staple, const int x, const int mu,
   su3 ALIGN st;
   const su3 *w1, *w2, *w3;
 
-#ifdef _KOJAK_INST
-#pragma pomp inst begin(staples)
-#endif
-
   _su3_zero(*staple);
   for (int k = 1; k < 4; k++) {
     if (k != mu) {
@@ -98,9 +87,6 @@ void get_spacelike_staples(su3* const staple, const int x, const int mu,
       _su3d_times_su3_acc(*staple, *w1, st);
     }
   }
-#ifdef _KOJAK_INST
-#pragma pomp inst end(staples)
-#endif
 }
 
 void get_timelike_staples(su3* const staple, const int x, const int mu,
@@ -109,10 +95,6 @@ void get_timelike_staples(su3* const staple, const int x, const int mu,
   su3 ALIGN st;
   const su3 *w1, *w2, *w3;
 
-#ifdef _KOJAK_INST
-#pragma pomp inst begin(staples)
-#endif
-
   _su3_zero(*staple);
   int k = 0;
   if (k != mu) {
@@ -134,7 +116,4 @@ void get_timelike_staples(su3* const staple, const int x, const int mu,
     /* v = v + w1^d * st */
     _su3d_times_su3_acc(*staple, *w1, st);
   }
-#ifdef _KOJAK_INST
-#pragma pomp inst end(staples)
-#endif
 }
diff --git a/get_staples.h b/src/lib/get_staples.h
similarity index 100%
rename from get_staples.h
rename to src/lib/get_staples.h
diff --git a/getopt.c b/src/lib/getopt.c
similarity index 100%
rename from getopt.c
rename to src/lib/getopt.c
diff --git a/getopt.h b/src/lib/getopt.h
similarity index 100%
rename from getopt.h
rename to src/lib/getopt.h
diff --git a/gettime.c b/src/lib/gettime.c
similarity index 98%
rename from gettime.c
rename to src/lib/gettime.c
index 68c123ae4..adae6dcb3 100644
--- a/gettime.c
+++ b/src/lib/gettime.c
@@ -21,7 +21,7 @@
 #ifdef HAVE_CONFIG_H
 #include <tmlqcd_config.h>
 #endif
-#ifdef HAVE_CLOCK_GETTIME
+#ifdef TM_CLOCK_GETTIME
 #ifndef _POSIX_C_SOURCE
 #define _POSIX_C_SOURCE 199309L
 #endif
@@ -45,7 +45,7 @@ double gettime(void) {
 
   t = MPI_Wtime();
 
-#elif (defined HAVE_CLOCK_GETTIME)
+#elif (defined TM_CLOCK_GETTIME)
 
   struct timespec ts;
 
diff --git a/gettime.h b/src/lib/gettime.h
similarity index 100%
rename from gettime.h
rename to src/lib/gettime.h
diff --git a/src/lib/git_hash.h b/src/lib/git_hash.h
new file mode 100644
index 000000000..a3a22b48d
--- /dev/null
+++ b/src/lib/git_hash.h
@@ -0,0 +1,6 @@
+#ifndef TM_GIT_HASH_H
+#define TM_GIT_HASH_H
+
+extern const char git_hash[];
+
+#endif
diff --git a/global.h b/src/lib/global.h
similarity index 98%
rename from global.h
rename to src/lib/global.h
index 1fc644d3e..31d6dc0d4 100644
--- a/global.h
+++ b/src/lib/global.h
@@ -38,7 +38,7 @@
 #ifdef TM_USE_MPI
 #include <mpi.h>
 #endif
-#ifdef FIXEDVOLUME
+#ifdef TM_FIXEDVOLUME
 #include "fixed_volume.h"
 #endif
 #include "su3.h"
@@ -79,7 +79,7 @@ EXTERN tm_mpi_thread_level_t g_mpi_thread_level;
 EXTERN tm_timers_t g_timers;
 
 EXTERN int T_global;
-#ifndef FIXEDVOLUME
+#ifndef TM_FIXEDVOLUME
 EXTERN int T, L, LX, LY, LZ, VOLUME;
 EXTERN int N_PROC_T, N_PROC_X, N_PROC_Y, N_PROC_Z;
 EXTERN int RAND, EDGES, VOLUMEPLUSRAND;
@@ -121,7 +121,6 @@ EXTERN int *g_field_z_disp_even_up;
 EXTERN int *g_field_z_disp_odd_dn;
 EXTERN int *g_field_z_disp_odd_up;
 
-
 /* IF PHMC  */
 EXTERN spinor **g_chi_up_spinor_field;
 EXTERN spinor **g_chi_dn_spinor_field;
@@ -130,7 +129,7 @@ EXTERN int g_running_phmc;
 
 EXTERN su3 **g_gauge_field;
 EXTERN su3_32 **g_gauge_field_32;
-#ifdef _USE_HALFSPINOR
+#ifdef TM_USE_HALFSPINOR
 EXTERN su3 ***g_gauge_field_copy;
 EXTERN su3_32 ***g_gauge_field_copy_32;
 #else
diff --git a/hamiltonian_field.h b/src/lib/hamiltonian_field.h
similarity index 100%
rename from hamiltonian_field.h
rename to src/lib/hamiltonian_field.h
diff --git a/include/tmLQCD.h b/src/lib/include/tmLQCD.h
similarity index 100%
rename from include/tmLQCD.h
rename to src/lib/include/tmLQCD.h
diff --git a/include/tmlqcd_config.h b/src/lib/include/tmlqcd_config.h
similarity index 100%
rename from include/tmlqcd_config.h
rename to src/lib/include/tmlqcd_config.h
diff --git a/init/Makefile.in b/src/lib/init/Makefile.in
similarity index 100%
rename from init/Makefile.in
rename to src/lib/init/Makefile.in
diff --git a/init/init.h b/src/lib/init/init.h
similarity index 100%
rename from init/init.h
rename to src/lib/init/init.h
index 0fe9ae51b..127622a8b 100644
--- a/init/init.h
+++ b/src/lib/init/init.h
@@ -33,8 +33,8 @@
 #include "init/init_gauge_tmp.h"
 #include "init/init_geometry_indices.h"
 #include "init/init_global_states.h"
-#include "init/init_parallel.h"
 #include "init/init_moment_field.h"
+#include "init/init_parallel.h"
 #include "init/init_spinor_field.h"
 #include "init/init_stout_smear_vars.h"
 #ifdef TM_USE_OMP
diff --git a/init/init_bispinor_field.c b/src/lib/init/init_bispinor_field.c
similarity index 100%
rename from init/init_bispinor_field.c
rename to src/lib/init/init_bispinor_field.c
diff --git a/init/init_bispinor_field.h b/src/lib/init/init_bispinor_field.h
similarity index 100%
rename from init/init_bispinor_field.h
rename to src/lib/init/init_bispinor_field.h
diff --git a/init/init_chi_spinor_field.c b/src/lib/init/init_chi_spinor_field.c
similarity index 100%
rename from init/init_chi_spinor_field.c
rename to src/lib/init/init_chi_spinor_field.c
diff --git a/init/init_chi_spinor_field.h b/src/lib/init/init_chi_spinor_field.h
similarity index 100%
rename from init/init_chi_spinor_field.h
rename to src/lib/init/init_chi_spinor_field.h
diff --git a/init/init_critical_globals.c b/src/lib/init/init_critical_globals.c
similarity index 100%
rename from init/init_critical_globals.c
rename to src/lib/init/init_critical_globals.c
diff --git a/init/init_critical_globals.h b/src/lib/init/init_critical_globals.h
similarity index 100%
rename from init/init_critical_globals.h
rename to src/lib/init/init_critical_globals.h
diff --git a/init/init_dirac_halfspinor.c b/src/lib/init/init_dirac_halfspinor.c
similarity index 83%
rename from init/init_dirac_halfspinor.c
rename to src/lib/init/init_dirac_halfspinor.c
index f5939d9cc..6b4fba174 100644
--- a/init/init_dirac_halfspinor.c
+++ b/src/lib/init/init_dirac_halfspinor.c
@@ -69,15 +69,13 @@ int init_dirac_halfspinor() {
     errno = 0;
     return (1);
   }
-  sendBuffer =
-      (halfspinor *)(((unsigned long int)(sendBuffer_) + ALIGN_BASE + 1) & ~ALIGN_BASE);
+  sendBuffer = (halfspinor *)(((unsigned long int)(sendBuffer_) + ALIGN_BASE + 1) & ~ALIGN_BASE);
   if ((void *)(recvBuffer_ = (halfspinor *)calloc(RAND / 2 + 8, sizeof(halfspinor))) == NULL) {
     printf("malloc errno : %d\n", errno);
     errno = 0;
     return (1);
   }
-  recvBuffer =
-      (halfspinor *)(((unsigned long int)(recvBuffer_) + ALIGN_BASE + 1) & ~ALIGN_BASE);
+  recvBuffer = (halfspinor *)(((unsigned long int)(recvBuffer_) + ALIGN_BASE + 1) & ~ALIGN_BASE);
 #endif
 
   for (int ieo = 0; ieo < 2; ieo++) {
@@ -94,7 +92,8 @@ int init_dirac_halfspinor() {
         NBPointer[ieo][8 * i + 2 * mu + 1] =
             &HalfSpinor[8 * g_lexic2eosub[g_iup[j][mu]] + 2 * mu + 1];
       }
-#if ((defined PARALLELT) || (defined PARALLELXT) || (defined PARALLELXYT) || (defined PARALLELXYZT))
+#if ((defined TM_PARALLELT) || (defined TM_PARALLELXT) || (defined TM_PARALLELXYT) || \
+     (defined TM_PARALLELXYZT))
       if (t == 0) {
         k = (g_lexic2eosub[g_idn[j][0]] - VOLUME / 2);
         NBPointer[ieo][8 * i] = &sendBuffer[k];
@@ -104,8 +103,8 @@ int init_dirac_halfspinor() {
         NBPointer[ieo][8 * i + 1] = &sendBuffer[k];
       }
 #endif
-#if ((defined PARALLELX) || (defined PARALLELXY) || (defined PARALLELXYZ) || \
-     (defined PARALLELXT) || (defined PARALLELXYT) || (defined PARALLELXYZT))
+#if ((defined TM_PARALLELX) || (defined TM_PARALLELXY) || (defined TM_PARALLELXYZ) || \
+     (defined TM_PARALLELXT) || (defined TM_PARALLELXYT) || (defined TM_PARALLELXYZT))
       if (x == 0) {
         k = (g_lexic2eosub[g_idn[j][1]] - VOLUME / 2);
         NBPointer[ieo][8 * i + 2] = &sendBuffer[k];
@@ -115,8 +114,8 @@ int init_dirac_halfspinor() {
         NBPointer[ieo][8 * i + 3] = &sendBuffer[k];
       }
 #endif
-#if ((defined PARALLELXY) || (defined PARALLELXYZ) || (defined PARALLELXYT) || \
-     (defined PARALLELXYZT))
+#if ((defined TM_PARALLELXY) || (defined TM_PARALLELXYZ) || (defined TM_PARALLELXYT) || \
+     (defined TM_PARALLELXYZT))
       if (y == 0) {
         k = (g_lexic2eosub[g_idn[j][2]] - VOLUME / 2);
         NBPointer[ieo][8 * i + 4] = &sendBuffer[k];
@@ -126,7 +125,7 @@ int init_dirac_halfspinor() {
         NBPointer[ieo][8 * i + 5] = &sendBuffer[k];
       }
 #endif
-#if ((defined PARALLELXYZ) || (defined PARALLELXYZT))
+#if ((defined TM_PARALLELXYZ) || (defined TM_PARALLELXYZT))
       if (z == 0) {
         k = (g_lexic2eosub[g_idn[j][3]] - VOLUME / 2);
         NBPointer[ieo][8 * i + 6] = &sendBuffer[k];
@@ -154,7 +153,8 @@ int init_dirac_halfspinor() {
       for (int mu = 0; mu < 8; mu++) {
         NBPointer[ieo][8 * i + mu] = &HalfSpinor[8 * i + mu];
       }
-#if ((defined PARALLELT) || (defined PARALLELXT) || (defined PARALLELXYT) || (defined PARALLELXYZT))
+#if ((defined TM_PARALLELT) || (defined TM_PARALLELXT) || (defined TM_PARALLELXYT) || \
+     (defined TM_PARALLELXYZT))
       if (t == T - 1) {
         NBPointer[ieo][8 * i] = &recvBuffer[(g_lexic2eosub[g_iup[j][0]] - VOLUME / 2)];
       }
@@ -162,8 +162,8 @@ int init_dirac_halfspinor() {
         NBPointer[ieo][8 * i + 1] = &recvBuffer[(g_lexic2eosub[g_idn[j][0]] - VOLUME / 2)];
       }
 #endif
-#if ((defined PARALLELX) || (defined PARALLELXY) || (defined PARALLELXYZ) || \
-     (defined PARALLELXT) || (defined PARALLELXYT) || (defined PARALLELXYZT))
+#if ((defined TM_PARALLELX) || (defined TM_PARALLELXY) || (defined TM_PARALLELXYZ) || \
+     (defined TM_PARALLELXT) || (defined TM_PARALLELXYT) || (defined TM_PARALLELXYZT))
       if (x == LX - 1) {
         NBPointer[ieo][8 * i + 2] = &recvBuffer[(g_lexic2eosub[g_iup[j][1]] - VOLUME / 2)];
       }
@@ -171,8 +171,8 @@ int init_dirac_halfspinor() {
         NBPointer[ieo][8 * i + 3] = &recvBuffer[(g_lexic2eosub[g_idn[j][1]] - VOLUME / 2)];
       }
 #endif
-#if ((defined PARALLELXY) || (defined PARALLELXYZ) || (defined PARALLELXYT) || \
-     (defined PARALLELXYZT))
+#if ((defined TM_PARALLELXY) || (defined TM_PARALLELXYZ) || (defined TM_PARALLELXYT) || \
+     (defined TM_PARALLELXYZT))
       if (y == LY - 1) {
         NBPointer[ieo][8 * i + 4] = &recvBuffer[(g_lexic2eosub[g_iup[j][2]] - VOLUME / 2)];
       }
@@ -180,7 +180,7 @@ int init_dirac_halfspinor() {
         NBPointer[ieo][8 * i + 5] = &recvBuffer[(g_lexic2eosub[g_idn[j][2]] - VOLUME / 2)];
       }
 #endif
-#if ((defined PARALLELXYZ) || (defined PARALLELXYZT))
+#if ((defined TM_PARALLELXYZ) || (defined TM_PARALLELXYZT))
       if (z == LZ - 1) {
         NBPointer[ieo][8 * i + 6] = &recvBuffer[(g_lexic2eosub[g_iup[j][3]] - VOLUME / 2)];
       }
@@ -240,7 +240,8 @@ int init_dirac_halfspinor32() {
         NBPointer32[ieo][8 * i + 2 * mu + 1] =
             &HalfSpinor32[8 * g_lexic2eosub[g_iup[j][mu]] + 2 * mu + 1];
       }
-#if ((defined PARALLELT) || (defined PARALLELXT) || (defined PARALLELXYT) || (defined PARALLELXYZT))
+#if ((defined TM_PARALLELT) || (defined TM_PARALLELXT) || (defined TM_PARALLELXYT) || \
+     (defined TM_PARALLELXYZT))
       if (t == 0) {
         k = (g_lexic2eosub[g_idn[j][0]] - VOLUME / 2);
         NBPointer32[ieo][8 * i] = &sendBuffer32[k];
@@ -250,8 +251,8 @@ int init_dirac_halfspinor32() {
         NBPointer32[ieo][8 * i + 1] = &sendBuffer32[k];
       }
 #endif
-#if ((defined PARALLELX) || (defined PARALLELXY) || (defined PARALLELXYZ) || \
-     (defined PARALLELXT) || (defined PARALLELXYT) || (defined PARALLELXYZT))
+#if ((defined TM_PARALLELX) || (defined TM_PARALLELXY) || (defined TM_PARALLELXYZ) || \
+     (defined TM_PARALLELXT) || (defined TM_PARALLELXYT) || (defined TM_PARALLELXYZT))
       if (x == 0) {
         k = (g_lexic2eosub[g_idn[j][1]] - VOLUME / 2);
         NBPointer32[ieo][8 * i + 2] = &sendBuffer32[k];
@@ -261,8 +262,8 @@ int init_dirac_halfspinor32() {
         NBPointer32[ieo][8 * i + 3] = &sendBuffer32[k];
       }
 #endif
-#if ((defined PARALLELXY) || (defined PARALLELXYZ) || (defined PARALLELXYT) || \
-     (defined PARALLELXYZT))
+#if ((defined TM_PARALLELXY) || (defined TM_PARALLELXYZ) || (defined TM_PARALLELXYT) || \
+     (defined TM_PARALLELXYZT))
       if (y == 0) {
         k = (g_lexic2eosub[g_idn[j][2]] - VOLUME / 2);
         NBPointer32[ieo][8 * i + 4] = &sendBuffer32[k];
@@ -272,7 +273,7 @@ int init_dirac_halfspinor32() {
         NBPointer32[ieo][8 * i + 5] = &sendBuffer32[k];
       }
 #endif
-#if ((defined PARALLELXYZ) || (defined PARALLELXYZT))
+#if ((defined TM_PARALLELXYZ) || (defined TM_PARALLELXYZT))
       if (z == 0) {
         k = (g_lexic2eosub[g_idn[j][3]] - VOLUME / 2);
         NBPointer32[ieo][8 * i + 6] = &sendBuffer32[k];
@@ -300,7 +301,8 @@ int init_dirac_halfspinor32() {
       for (mu = 0; mu < 8; mu++) {
         NBPointer32[ieo][8 * i + mu] = &HalfSpinor32[8 * i + mu];
       }
-#if ((defined PARALLELT) || (defined PARALLELXT) || (defined PARALLELXYT) || (defined PARALLELXYZT))
+#if ((defined TM_PARALLELT) || (defined TM_PARALLELXT) || (defined TM_PARALLELXYT) || \
+     (defined TM_PARALLELXYZT))
       if (t == T - 1) {
         NBPointer32[ieo][8 * i] = &recvBuffer32[(g_lexic2eosub[g_iup[j][0]] - VOLUME / 2)];
       }
@@ -308,8 +310,8 @@ int init_dirac_halfspinor32() {
         NBPointer32[ieo][8 * i + 1] = &recvBuffer32[(g_lexic2eosub[g_idn[j][0]] - VOLUME / 2)];
       }
 #endif
-#if ((defined PARALLELX) || (defined PARALLELXY) || (defined PARALLELXYZ) || \
-     (defined PARALLELXT) || (defined PARALLELXYT) || (defined PARALLELXYZT))
+#if ((defined TM_PARALLELX) || (defined TM_PARALLELXY) || (defined TM_PARALLELXYZ) || \
+     (defined TM_PARALLELXT) || (defined TM_PARALLELXYT) || (defined TM_PARALLELXYZT))
       if (x == LX - 1) {
         NBPointer32[ieo][8 * i + 2] = &recvBuffer32[(g_lexic2eosub[g_iup[j][1]] - VOLUME / 2)];
       }
@@ -317,8 +319,8 @@ int init_dirac_halfspinor32() {
         NBPointer32[ieo][8 * i + 3] = &recvBuffer32[(g_lexic2eosub[g_idn[j][1]] - VOLUME / 2)];
       }
 #endif
-#if ((defined PARALLELXY) || (defined PARALLELXYZ) || (defined PARALLELXYT) || \
-     (defined PARALLELXYZT))
+#if ((defined TM_PARALLELXY) || (defined TM_PARALLELXYZ) || (defined TM_PARALLELXYT) || \
+     (defined TM_PARALLELXYZT))
       if (y == LY - 1) {
         NBPointer32[ieo][8 * i + 4] = &recvBuffer32[(g_lexic2eosub[g_iup[j][2]] - VOLUME / 2)];
       }
@@ -326,7 +328,7 @@ int init_dirac_halfspinor32() {
         NBPointer32[ieo][8 * i + 5] = &recvBuffer32[(g_lexic2eosub[g_idn[j][2]] - VOLUME / 2)];
       }
 #endif
-#if ((defined PARALLELXYZ) || (defined PARALLELXYZT))
+#if ((defined TM_PARALLELXYZ) || (defined TM_PARALLELXYZT))
       if (z == LZ - 1) {
         NBPointer32[ieo][8 * i + 6] = &recvBuffer32[(g_lexic2eosub[g_iup[j][3]] - VOLUME / 2)];
       }
diff --git a/init/init_dirac_halfspinor.h b/src/lib/init/init_dirac_halfspinor.h
similarity index 100%
rename from init/init_dirac_halfspinor.h
rename to src/lib/init/init_dirac_halfspinor.h
diff --git a/init/init_gauge_fg.c b/src/lib/init/init_gauge_fg.c
similarity index 100%
rename from init/init_gauge_fg.c
rename to src/lib/init/init_gauge_fg.c
diff --git a/init/init_gauge_fg.h b/src/lib/init/init_gauge_fg.h
similarity index 100%
rename from init/init_gauge_fg.h
rename to src/lib/init/init_gauge_fg.h
diff --git a/init/init_gauge_field.c b/src/lib/init/init_gauge_field.c
similarity index 98%
rename from init/init_gauge_field.c
rename to src/lib/init/init_gauge_field.c
index e30e040bf..a83e66dfd 100644
--- a/init/init_gauge_field.c
+++ b/src/lib/init/init_gauge_field.c
@@ -34,7 +34,6 @@ su3* gauge_field_copy = NULL;
 su3_32* gauge_field_copy_32 = NULL;
 
 int init_gauge_field(const int V, const int back) {
-  int i = 0;
   g_gauge_field_copy = NULL;
 
   if (g_exposu3_no_c == 0) init_exposu3();
@@ -54,7 +53,7 @@ int init_gauge_field(const int V, const int back) {
     g_gauge_field[i] = g_gauge_field[i - 1] + 4;
   }
 
-#if defined _USE_HALFSPINOR
+#if defined TM_USE_HALFSPINOR
   if (back == 1) {
     /*
       g_gauge_field_copy[ieo][PM][sites/2][mu]
@@ -134,7 +133,7 @@ int init_gauge_field_32(const int V, const int back) {
     g_gauge_field_32[i] = g_gauge_field_32[i - 1] + 4;
   }
 
-#if defined _USE_HALFSPINOR
+#if defined TM_USE_HALFSPINOR
   if (back == 1) {
     /*
       g_gauge_field_copy[ieo][PM][sites/2][mu]
@@ -167,7 +166,7 @@ int init_gauge_field_32(const int V, const int back) {
       g_gauge_field_copy_32[1][i] = g_gauge_field_copy_32[1][i - 1] + 4;
     }
   }
-#else /* than _USE_HALFSPINOR  */
+#else /* than TM_USE_HALFSPINOR  */
   if (back == 1) {
     if ((void*)(g_gauge_field_copy_32 = (su3_32**)calloc((VOLUME + RAND), sizeof(su3_32*))) ==
         NULL) {
@@ -217,7 +216,7 @@ void convert_32_gauge_field(su3_32** gf32, su3** gf, int V) {
       gf32[i][mu].c22 = (_Complex float)gf[i][mu].c22;
     }
   }
-#if defined _USE_HALFSPINOR
+#if defined TM_USE_HALFSPINOR
 
 #endif
 }
diff --git a/init/init_gauge_field.h b/src/lib/init/init_gauge_field.h
similarity index 100%
rename from init/init_gauge_field.h
rename to src/lib/init/init_gauge_field.h
diff --git a/init/init_gauge_tmp.c b/src/lib/init/init_gauge_tmp.c
similarity index 100%
rename from init/init_gauge_tmp.c
rename to src/lib/init/init_gauge_tmp.c
diff --git a/init/init_gauge_tmp.h b/src/lib/init/init_gauge_tmp.h
similarity index 100%
rename from init/init_gauge_tmp.h
rename to src/lib/init/init_gauge_tmp.h
diff --git a/init/init_geometry_indices.c b/src/lib/init/init_geometry_indices.c
similarity index 97%
rename from init/init_geometry_indices.c
rename to src/lib/init/init_geometry_indices.c
index ef54c45de..edd568d93 100644
--- a/init/init_geometry_indices.c
+++ b/src/lib/init/init_geometry_indices.c
@@ -58,7 +58,7 @@ int init_geometry_indices(const int V) {
   g_eo2lexic = (int *)calloc(V, sizeof(int));
   if ((void *)g_eo2lexic == NULL) return (11);
 
-#if (defined PARALLELXYZT || defined PARALLELXYZ)
+#if (defined TM_PARALLELXYZT || defined TM_PARALLELXYZ)
   g_field_z_ipt_even = (int *)calloc(T * LX * LY, sizeof(int));
   if ((void *)g_field_z_ipt_even == NULL) return (12);
   g_field_z_ipt_odd = (int *)calloc(T * LX * LY, sizeof(int));
@@ -74,7 +74,6 @@ int init_geometry_indices(const int V) {
   if ((void *)g_field_z_disp_odd_up == NULL) return (17);
 #endif
 
-
   g_coord = (int **)calloc(VOLUME, sizeof(int *));
   if ((void *)g_coord == NULL) return (19);
   for (i = 0; i < VOLUME; i++) {
@@ -136,7 +135,7 @@ void free_geometry_indices() {
   free(g_eo2lexic);
   free(g_lexic2eosub);
   free(g_lexic2eo);
-#if (defined PARALLELXYZT || defined PARALLELXYZ)
+#if (defined TM_PARALLELXYZT || defined TM_PARALLELXYZ)
   free(g_field_z_ipt_odd);
   free(g_field_z_ipt_even);
 #endif
diff --git a/init/init_geometry_indices.h b/src/lib/init/init_geometry_indices.h
similarity index 100%
rename from init/init_geometry_indices.h
rename to src/lib/init/init_geometry_indices.h
diff --git a/init/init_global_states.c b/src/lib/init/init_global_states.c
similarity index 100%
rename from init/init_global_states.c
rename to src/lib/init/init_global_states.c
diff --git a/init/init_global_states.h b/src/lib/init/init_global_states.h
similarity index 100%
rename from init/init_global_states.h
rename to src/lib/init/init_global_states.h
diff --git a/init/init_moment_field.c b/src/lib/init/init_moment_field.c
similarity index 100%
rename from init/init_moment_field.c
rename to src/lib/init/init_moment_field.c
diff --git a/init/init_moment_field.h b/src/lib/init/init_moment_field.h
similarity index 100%
rename from init/init_moment_field.h
rename to src/lib/init/init_moment_field.h
diff --git a/init/init_omp_accumulators.c b/src/lib/init/init_omp_accumulators.c
similarity index 100%
rename from init/init_omp_accumulators.c
rename to src/lib/init/init_omp_accumulators.c
diff --git a/init/init_omp_accumulators.h b/src/lib/init/init_omp_accumulators.h
similarity index 100%
rename from init/init_omp_accumulators.h
rename to src/lib/init/init_omp_accumulators.h
diff --git a/init/init_openmp.c b/src/lib/init/init_openmp.c
similarity index 100%
rename from init/init_openmp.c
rename to src/lib/init/init_openmp.c
diff --git a/init/init_openmp.h b/src/lib/init/init_openmp.h
similarity index 100%
rename from init/init_openmp.h
rename to src/lib/init/init_openmp.h
diff --git a/init/init_parallel.c b/src/lib/init/init_parallel.c
similarity index 100%
rename from init/init_parallel.c
rename to src/lib/init/init_parallel.c
diff --git a/init/init_parallel.h b/src/lib/init/init_parallel.h
similarity index 94%
rename from init/init_parallel.h
rename to src/lib/init/init_parallel.h
index f88ebe1b4..553da6765 100644
--- a/init/init_parallel.h
+++ b/src/lib/init/init_parallel.h
@@ -19,8 +19,8 @@
  *
  *******************************************************************************/
 
-#ifndef _INIT_PARALLEL_H
-#define _INIT_PARALLEL_H
+#ifndef _INIT_TM_PARALLEL_H
+#define _INIT_TM_PARALLEL_H
 
 void init_parallel_and_read_input(int argc, char *argv[], const char input_filename[]);
 
diff --git a/init/init_spinor_field.c b/src/lib/init/init_spinor_field.c
similarity index 93%
rename from init/init_spinor_field.c
rename to src/lib/init/init_spinor_field.c
index c70945634..6fea95cd8 100644
--- a/init/init_spinor_field.c
+++ b/src/lib/init/init_spinor_field.c
@@ -23,7 +23,7 @@
 #include <errno.h>
 #include <stdio.h>
 #include <stdlib.h>
-#ifdef _USE_SHMEM
+#ifdef TM_USE_SHMEM
 #include <mpp/shmem.h>
 #endif
 #include "global.h"
@@ -37,7 +37,7 @@ spinor *sp_tbuff = NULL;
 int init_spinor_field(const int V, const int nr) {
   int i = 0;
 
-#if (defined _USE_SHMEM && !(defined _USE_HALFSPINOR))
+#if (defined TM_USE_SHMEM && !(defined TM_USE_HALFSPINOR))
   if ((void *)(sp = (spinor *)shmalloc((nr * V + 1) * sizeof(spinor))) == NULL) {
     printf("malloc errno : %d\n", errno);
     errno = 0;
@@ -65,7 +65,7 @@ int init_spinor_field(const int V, const int nr) {
 }
 
 void free_spinor_field() {
-#if (defined _USE_SHMEM && !(defined _USE_HALFSPINOR))
+#if (defined TM_USE_SHMEM && !(defined TM_USE_HALFSPINOR))
   shfree(sp);
   shfree(sp_csg);
 #else
@@ -78,7 +78,7 @@ spinor32 *sp32 = NULL;
 int init_spinor_field_32(const int V, const int nr) {
   int i = 0;
 
-#if (defined _USE_SHMEM && !(defined _USE_HALFSPINOR))
+#if (defined TM_USE_SHMEM && !(defined TM_USE_HALFSPINOR))
   if ((void *)(sp32 = (spinor32 *)shmalloc((nr * V + 1) * sizeof(spinor32))) == NULL) {
     printf("malloc errno : %d\n", errno);
     errno = 0;
@@ -106,7 +106,7 @@ int init_spinor_field_32(const int V, const int nr) {
 }
 
 void free_spinor_field_32() {
-#if (defined _USE_SHMEM && !(defined _USE_HALFSPINOR))
+#if (defined TM_USE_SHMEM && !(defined TM_USE_HALFSPINOR))
   shfree(sp32);
 #else
   free(sp32);
@@ -119,7 +119,7 @@ void free_spinor_field_32() {
 int allocate_spinor_field_array(spinor ***spinors, spinor **sp, const int V, const int nr) {
   int i = 0;
 
-#if (defined _USE_SHMEM && !(defined _USE_HALFSPINOR))
+#if (defined TM_USE_SHMEM && !(defined TM_USE_HALFSPINOR))
   if ((void *)((*sp) = (spinor *)shmalloc((nr * V + 1) * sizeof(spinor))) == NULL) {
     printf("malloc errno : %d\n", errno);
     errno = 0;
@@ -147,7 +147,7 @@ int allocate_spinor_field_array(spinor ***spinors, spinor **sp, const int V, con
 }
 
 void free_spinor_field_array(spinor **sp) {
-#if (defined _USE_SHMEM && !(defined _USE_HALFSPINOR))
+#if (defined TM_USE_SHMEM && !(defined TM_USE_HALFSPINOR))
   shfree(*sp);
 #else
   free(*sp);
@@ -165,7 +165,7 @@ int init_csg_field(const int V) {
 
   /* if all histories are zero, we do not need initialisation */
   if (sum != 0) {
-#if (defined _USE_SHMEM && !(defined _USE_HALFSPINOR))
+#if (defined TM_USE_SHMEM && !(defined TM_USE_HALFSPINOR))
     sp_csg = (spinor *)shmalloc((sum * V + 1) * sizeof(spinor));
 #else
     sp_csg = (spinor *)calloc(sum * V + 1, sizeof(spinor));
diff --git a/init/init_spinor_field.h b/src/lib/init/init_spinor_field.h
similarity index 100%
rename from init/init_spinor_field.h
rename to src/lib/init/init_spinor_field.h
diff --git a/init/init_stout_smear_vars.c b/src/lib/init/init_stout_smear_vars.c
similarity index 96%
rename from init/init_stout_smear_vars.c
rename to src/lib/init/init_stout_smear_vars.c
index e1c64f75a..5b69005b2 100644
--- a/init/init_stout_smear_vars.c
+++ b/src/lib/init/init_stout_smear_vars.c
@@ -27,7 +27,6 @@
 #include "expo.h"
 #include "global.h"
 #include "init_stout_smear_vars.h"
-#include "sse.h"
 #include "su3.h"
 
 su3* gauge_field_saved;
@@ -83,11 +82,8 @@ int init_stout_smear_vars(const int V, const int stout_no_iter) {
   printf("Running init_stout_smear_vars\n");
   const int dim = 4;
 
-  int i, k, x, mu;
+  /* int i, k, x, mu; */
 
-  i = 0;
-  k = 0;
-  mu = 0;
 
   if (g_exposu3_no_c == 0) init_exposu3();
 
@@ -107,7 +103,7 @@ int init_stout_smear_vars(const int V, const int stout_no_iter) {
 
   g_gauge_field_smeared[0] = gauge_field_smeared;
 
-  for (x = 1; x < V; x++) {
+  for (int x = 1; x < V; x++) {
     g_gauge_field_smeared[x] = g_gauge_field_smeared[x - 1] + 4;
   }
 
@@ -126,7 +122,7 @@ int init_stout_smear_vars(const int V, const int stout_no_iter) {
 
   g_gauge_field_saved[0] = gauge_field_saved;
 
-  for (x = 1; x < V; x++) {
+  for (int x = 1; x < V; x++) {
     g_gauge_field_saved[x] = g_gauge_field_saved[x - 1] + 4;
   }
 
@@ -145,7 +141,7 @@ int init_stout_smear_vars(const int V, const int stout_no_iter) {
 
   g_C_smearing[0] = C_smearing;
 
-  for (x = 1; x < V; x++) {
+  for (int x = 1; x < V; x++) {
     g_C_smearing[x] = g_C_smearing[x - 1] + 4;
   }
 
@@ -164,7 +160,7 @@ int init_stout_smear_vars(const int V, const int stout_no_iter) {
 
   g_Q_smearing[0] = Q_smearing;
 
-  for (x = 1; x < V; x++) {
+  for (int x = 1; x < V; x++) {
     g_Q_smearing[x] = g_Q_smearing[x - 1] + 4;
   }
 
@@ -183,7 +179,7 @@ int init_stout_smear_vars(const int V, const int stout_no_iter) {
 
   g_Q_squared_smearing[0] = Q_squared_smearing;
 
-  for (x = 1; x < V; x++) {
+  for (int x = 1; x < V; x++) {
     g_Q_squared_smearing[x] = g_Q_squared_smearing[x - 1] + 4;
   }
 
@@ -211,7 +207,7 @@ int init_stout_smear_vars(const int V, const int stout_no_iter) {
   g_B1_smearing[0] = B1_smearing;
   g_B2_smearing[0] = B2_smearing;
 
-  for (x = 1; x < V; x++) {
+  for (int x = 1; x < V; x++) {
     g_B1_smearing[x] = g_B1_smearing[x - 1] + 4;
     g_B2_smearing[x] = g_B2_smearing[x - 1] + 4;
   }
@@ -231,7 +227,7 @@ int init_stout_smear_vars(const int V, const int stout_no_iter) {
 
   g_Gamma_smearing[0] = Gamma_smearing;
 
-  for (x = 1; x < V; x++) {
+  for (int x = 1; x < V; x++) {
     g_Gamma_smearing[x] = g_Gamma_smearing[x - 1] + 4;
   }
 
@@ -250,7 +246,7 @@ int init_stout_smear_vars(const int V, const int stout_no_iter) {
 
   g_Lambda_smearing[0] = Lambda_smearing;
 
-  for (x = 1; x < V; x++) {
+  for (int x = 1; x < V; x++) {
     g_Lambda_smearing[x] = g_Lambda_smearing[x - 1] + 4;
   }
 
@@ -366,7 +362,7 @@ int init_stout_smear_vars(const int V, const int stout_no_iter) {
 
   g_stout_force_field[0] = stout_force_field;
 
-  for (x = 1; x < V; x++) {
+  for (int x = 1; x < V; x++) {
     g_stout_force_field[x] = g_stout_force_field[x - 1] + 4;
   }
 
@@ -386,7 +382,7 @@ int init_stout_smear_vars(const int V, const int stout_no_iter) {
 
   g_previous_stout_force_field[0] = previous_stout_force_field;
 
-  for (x = 1; x < V; x++) {
+  for (int x = 1; x < V; x++) {
     g_previous_stout_force_field[x] = g_previous_stout_force_field[x - 1] + 4;
   }
 
diff --git a/init/init_stout_smear_vars.h b/src/lib/init/init_stout_smear_vars.h
similarity index 100%
rename from init/init_stout_smear_vars.h
rename to src/lib/init/init_stout_smear_vars.h
diff --git a/integrator.c b/src/lib/integrator.c
similarity index 100%
rename from integrator.c
rename to src/lib/integrator.c
diff --git a/integrator.h b/src/lib/integrator.h
similarity index 100%
rename from integrator.h
rename to src/lib/integrator.h
diff --git a/invert_clover_eo.c b/src/lib/invert_clover_eo.c
similarity index 99%
rename from invert_clover_eo.c
rename to src/lib/invert_clover_eo.c
index e3b6cad31..63e512819 100644
--- a/invert_clover_eo.c
+++ b/src/lib/invert_clover_eo.c
@@ -53,7 +53,7 @@
 #ifdef TM_USE_QUDA
 #include "quda_interface.h"
 #endif
-#ifdef DDalphaAMG
+#ifdef TM_USE_DDalphaAMG
 #include "DDalphaAMG_interface.h"
 #endif
 #ifdef TM_USE_QPHIX
@@ -81,7 +81,7 @@ int invert_clover_eo(spinor* const Even_new, spinor* const Odd_new, spinor* cons
     }
 #endif
 
-#ifdef DDalphaAMG
+#ifdef TM_USE_DDalphaAMG
     if (solver_flag == MG) {
       return MG_solver_eo(Even_new, Odd_new, Even, Odd, precision, max_iter, rel_prec, VOLUME / 2,
                           gf[0], &Msw_full);
@@ -197,7 +197,7 @@ int invert_clover_eo(spinor* const Even_new, spinor* const Odd_new, spinor* cons
                     rel_prec, VOLUME, Qsq);
       Qm(g_spinor_field[DUM_DERI + 1], g_spinor_field[DUM_DERI]);
     }
-#ifdef DDalphaAMG
+#ifdef TM_USE_DDalphaAMG
     else if (solver_flag == MG) {
       return MG_solver_eo(Even_new, Odd_new, Even, Odd, precision, max_iter, rel_prec, VOLUME / 2,
                           gf[0], &Msw_full);
diff --git a/invert_clover_eo.h b/src/lib/invert_clover_eo.h
similarity index 100%
rename from invert_clover_eo.h
rename to src/lib/invert_clover_eo.h
diff --git a/invert_doublet_eo.c b/src/lib/invert_doublet_eo.c
similarity index 99%
rename from invert_doublet_eo.c
rename to src/lib/invert_doublet_eo.c
index 5be48415e..8d5a7dd82 100644
--- a/invert_doublet_eo.c
+++ b/src/lib/invert_doublet_eo.c
@@ -50,7 +50,7 @@
 #ifdef TM_USE_QUDA
 #include "quda_interface.h"
 #endif
-#ifdef DDalphaAMG
+#ifdef TM_USE_DDalphaAMG
 #include "DDalphaAMG_interface.h"
 #endif
 #ifdef TM_USE_QPHIX
@@ -75,7 +75,7 @@ int invert_doublet_eo(spinor* const Even_new_s, spinor* const Odd_new_s, spinor*
   }
 #endif
 
-#ifdef DDalphaAMG
+#ifdef TM_USE_DDalphaAMG
   if (solver_flag == MG) {
     return MG_solver_nd_eo(Even_new_s, Odd_new_s, Even_new_c, Odd_new_c, Even_s, Odd_s, Even_c,
                            Odd_c, precision, max_iter, rel_prec, VOLUME / 2, g_gauge_field,
@@ -162,7 +162,7 @@ int invert_cloverdoublet_eo(spinor* const Even_new_s, spinor* const Odd_new_s,
   }
 #endif
 
-#ifdef DDalphaAMG
+#ifdef TM_USE_DDalphaAMG
   if (solver_flag == MG) {
     return MG_solver_nd_eo(Even_new_s, Odd_new_s, Even_new_c, Odd_new_c, Even_s, Odd_s, Even_c,
                            Odd_c, precision, max_iter, rel_prec, VOLUME / 2, g_gauge_field,
diff --git a/invert_doublet_eo.h b/src/lib/invert_doublet_eo.h
similarity index 100%
rename from invert_doublet_eo.h
rename to src/lib/invert_doublet_eo.h
diff --git a/invert_eo.c b/src/lib/invert_eo.c
similarity index 99%
rename from invert_eo.c
rename to src/lib/invert_eo.c
index 25ee4a297..3b7625d48 100644
--- a/invert_eo.c
+++ b/src/lib/invert_eo.c
@@ -34,6 +34,7 @@
 #ifdef HAVE_CONFIG_H
 #include <tmlqcd_config.h>
 #endif
+
 #include <stdlib.h>
 #include "gamma.h"
 #include "global.h"
@@ -60,7 +61,7 @@
 #ifdef TM_USE_QPHIX
 #include "qphix_interface.h"
 #endif
-#ifdef DDalphaAMG
+#ifdef TM_USE_DDalphaAMG
 #include "DDalphaAMG_interface.h"
 #endif
 
@@ -83,7 +84,7 @@ int invert_eo(spinor *const Even_new, spinor *const Odd_new, spinor *const Even,
   }
 #endif
 
-#ifdef DDalphaAMG
+#ifdef TM_USE_DDalphaAMG
   if (solver_flag == MG)
     return MG_solver_eo(Even_new, Odd_new, Even, Odd, precision, max_iter, rel_prec, VOLUME / 2,
                         g_gauge_field, &M_full);
diff --git a/invert_eo.h b/src/lib/invert_eo.h
similarity index 100%
rename from invert_eo.h
rename to src/lib/invert_eo.h
diff --git a/invert_overlap.c b/src/lib/invert_overlap.c
similarity index 100%
rename from invert_overlap.c
rename to src/lib/invert_overlap.c
diff --git a/invert_overlap.h b/src/lib/invert_overlap.h
similarity index 100%
rename from invert_overlap.h
rename to src/lib/invert_overlap.h
diff --git a/io/DML_crc32.c b/src/lib/io/DML_crc32.c
similarity index 100%
rename from io/DML_crc32.c
rename to src/lib/io/DML_crc32.c
diff --git a/io/deri_write_stdout.c b/src/lib/io/deri_write_stdout.c
similarity index 100%
rename from io/deri_write_stdout.c
rename to src/lib/io/deri_write_stdout.c
diff --git a/io/deri_write_stdout.h b/src/lib/io/deri_write_stdout.h
similarity index 100%
rename from io/deri_write_stdout.h
rename to src/lib/io/deri_write_stdout.h
diff --git a/io/dml.c b/src/lib/io/dml.c
similarity index 100%
rename from io/dml.c
rename to src/lib/io/dml.c
diff --git a/io/dml.h b/src/lib/io/dml.h
similarity index 100%
rename from io/dml.h
rename to src/lib/io/dml.h
diff --git a/io/eospinor.h b/src/lib/io/eospinor.h
similarity index 100%
rename from io/eospinor.h
rename to src/lib/io/eospinor.h
diff --git a/io/eospinor.ih b/src/lib/io/eospinor.ih
similarity index 100%
rename from io/eospinor.ih
rename to src/lib/io/eospinor.ih
diff --git a/io/eospinor_read.c b/src/lib/io/eospinor_read.c
similarity index 100%
rename from io/eospinor_read.c
rename to src/lib/io/eospinor_read.c
diff --git a/io/eospinor_write.c b/src/lib/io/eospinor_write.c
similarity index 100%
rename from io/eospinor_write.c
rename to src/lib/io/eospinor_write.c
diff --git a/io/gauge.h b/src/lib/io/gauge.h
similarity index 100%
rename from io/gauge.h
rename to src/lib/io/gauge.h
diff --git a/io/gauge.ih b/src/lib/io/gauge.ih
similarity index 100%
rename from io/gauge.ih
rename to src/lib/io/gauge.ih
diff --git a/io/gauge_read.c b/src/lib/io/gauge_read.c
similarity index 99%
rename from io/gauge_read.c
rename to src/lib/io/gauge_read.c
index b7be10928..de53d9c28 100644
--- a/io/gauge_read.c
+++ b/src/lib/io/gauge_read.c
@@ -19,7 +19,7 @@
  ***********************************************************************/
 
 #include "gauge.ih"
-#ifdef DDalphaAMG
+#ifdef TM_USE_DDalphaAMG
 #include "DDalphaAMG_interface.h"
 #endif
 
@@ -209,7 +209,7 @@ int read_gauge_field(char *filename, su3 **const gf) {
     // reading a new gauge configuration moves the gauge_id a long way
     // to guarantee that the change is propagated
     update_tm_gauge_id(&g_gauge_state, TM_GAUGE_PROPAGATE_THRESHOLD);
-#ifdef DDalphaAMG
+#ifdef TM_USE_DDalphaAMG
     MG_reset();
 #endif
   }
diff --git a/io/gauge_read_binary.c b/src/lib/io/gauge_read_binary.c
similarity index 99%
rename from io/gauge_read_binary.c
rename to src/lib/io/gauge_read_binary.c
index b61284cab..473e4d9c7 100644
--- a/io/gauge_read_binary.c
+++ b/src/lib/io/gauge_read_binary.c
@@ -22,7 +22,7 @@
 /* FIXME I will first fix this function by using referral.
          Probably should be done better in the future. AD. */
 
-#ifdef HAVE_LIBLEMON
+#ifdef TM_USE_LEMON
 int read_binary_gauge_data(LemonReader* lemonreader, DML_Checksum* checksum,
                            paramsIldgFormat* input, su3** const gf) {
   int t, x, y, z, status = 0;
@@ -144,7 +144,7 @@ int read_binary_gauge_data(LemonReader* lemonreader, DML_Checksum* checksum,
   free(filebuffer);
   return (0);
 }
-#else /* HAVE_LIBLEMON */
+#else /* TM_USE_LEMON */
 int read_binary_gauge_data(LimeReader *limereader, DML_Checksum *checksum, paramsIldgFormat *input,
                            su3 **const gf) {
   int t, x, y, z, status = 0;
@@ -273,4 +273,4 @@ int read_binary_gauge_data(LimeReader *limereader, DML_Checksum *checksum, param
 #endif
   return (0);
 }
-#endif /* HAVE_LIBLEMON */
+#endif /* TM_USE_LEMON */
diff --git a/io/gauge_write.c b/src/lib/io/gauge_write.c
similarity index 100%
rename from io/gauge_write.c
rename to src/lib/io/gauge_write.c
diff --git a/io/gauge_write_binary.c b/src/lib/io/gauge_write_binary.c
similarity index 99%
rename from io/gauge_write_binary.c
rename to src/lib/io/gauge_write_binary.c
index 668b53a17..ad3c7882e 100644
--- a/io/gauge_write_binary.c
+++ b/src/lib/io/gauge_write_binary.c
@@ -22,7 +22,7 @@
 /* FIXME I will first fix this function by using referral.
          Probably should be done better in the future. AD. */
 
-#ifdef HAVE_LIBLEMON
+#ifdef TM_USE_LEMON
 int write_binary_gauge_data(LemonWriter* lemonwriter, const int prec, DML_Checksum* checksum) {
   int x, xG, y, yG, z, zG, t, tG, status = 0;
   su3 tmp3[4];
@@ -133,7 +133,7 @@ int write_binary_gauge_data(LemonWriter* lemonwriter, const int prec, DML_Checks
   return 0;
 }
 
-#else /* HAVE_LIBLEMON */
+#else /* TM_USE_LEMON */
 
 int write_binary_gauge_data(LimeWriter* limewriter, const int prec, DML_Checksum* checksum) {
   int x, X, y, Y, z, Z, tt, t0, tag = 0, id = 0, status = 0;
@@ -281,4 +281,4 @@ int write_binary_gauge_data(LimeWriter* limewriter, const int prec, DML_Checksum
 
   return (0);
 }
-#endif /* HAVE_LIBLEMON */
+#endif /* TM_USE_LEMON */
diff --git a/io/gauge_write_luscher_binary.c b/src/lib/io/gauge_write_luscher_binary.c
similarity index 100%
rename from io/gauge_write_luscher_binary.c
rename to src/lib/io/gauge_write_luscher_binary.c
diff --git a/io/gauge_write_luscher_binary.h b/src/lib/io/gauge_write_luscher_binary.h
similarity index 100%
rename from io/gauge_write_luscher_binary.h
rename to src/lib/io/gauge_write_luscher_binary.h
diff --git a/io/io_cm.c b/src/lib/io/io_cm.c
similarity index 100%
rename from io/io_cm.c
rename to src/lib/io/io_cm.c
diff --git a/io/io_cm.h b/src/lib/io/io_cm.h
similarity index 100%
rename from io/io_cm.h
rename to src/lib/io/io_cm.h
diff --git a/io/params.h b/src/lib/io/params.h
similarity index 100%
rename from io/params.h
rename to src/lib/io/params.h
diff --git a/io/params.ih b/src/lib/io/params.ih
similarity index 100%
rename from io/params.ih
rename to src/lib/io/params.ih
diff --git a/io/params_construct_InverterInfo.c b/src/lib/io/params_construct_InverterInfo.c
similarity index 100%
rename from io/params_construct_InverterInfo.c
rename to src/lib/io/params_construct_InverterInfo.c
diff --git a/io/params_construct_ildgFormat.c b/src/lib/io/params_construct_ildgFormat.c
similarity index 100%
rename from io/params_construct_ildgFormat.c
rename to src/lib/io/params_construct_ildgFormat.c
diff --git a/io/params_construct_propagatorFormat.c b/src/lib/io/params_construct_propagatorFormat.c
similarity index 100%
rename from io/params_construct_propagatorFormat.c
rename to src/lib/io/params_construct_propagatorFormat.c
diff --git a/io/params_construct_sourceFormat.c b/src/lib/io/params_construct_sourceFormat.c
similarity index 100%
rename from io/params_construct_sourceFormat.c
rename to src/lib/io/params_construct_sourceFormat.c
diff --git a/io/params_construct_xlfInfo.c b/src/lib/io/params_construct_xlfInfo.c
similarity index 100%
rename from io/params_construct_xlfInfo.c
rename to src/lib/io/params_construct_xlfInfo.c
diff --git a/io/selector.h b/src/lib/io/selector.h
similarity index 95%
rename from io/selector.h
rename to src/lib/io/selector.h
index 806178bff..236be8d32 100644
--- a/io/selector.h
+++ b/src/lib/io/selector.h
@@ -21,11 +21,11 @@
 #define _IO_SELECTOR_H
 
 #include <lime.h>
-#ifdef HAVE_LIBLEMON
+#ifdef TM_USE_LEMON
 #include <lemon.h>
-#endif /* HAVE_LIBLEMON */
+#endif /* TM_USE_LEMON */
 
-#ifdef HAVE_LIBLEMON
+#ifdef TM_USE_LEMON
 #define LIME_FILE MPI_File
 #define WRITER LemonWriter
 #define READER LemonReader
@@ -42,7 +42,7 @@
 #define WriterCloseRecord lemonWriterCloseRecord
 #define DestroyReader lemonDestroyReader
 #define DestroyHeader lemonDestroyHeader
-#else /* HAVE_LIBLEMON */
+#else /* TM_USE_LEMON */
 #define LIME_FILE FILE
 #define WRITER LimeWriter
 #define READER LimeReader
diff --git a/io/spinor.h b/src/lib/io/spinor.h
similarity index 100%
rename from io/spinor.h
rename to src/lib/io/spinor.h
diff --git a/io/spinor.ih b/src/lib/io/spinor.ih
similarity index 100%
rename from io/spinor.ih
rename to src/lib/io/spinor.ih
diff --git a/io/spinor_read.c b/src/lib/io/spinor_read.c
similarity index 100%
rename from io/spinor_read.c
rename to src/lib/io/spinor_read.c
diff --git a/io/spinor_read_binary.c b/src/lib/io/spinor_read_binary.c
similarity index 98%
rename from io/spinor_read_binary.c
rename to src/lib/io/spinor_read_binary.c
index 6d459fd2c..81607a700 100644
--- a/io/spinor_read_binary.c
+++ b/src/lib/io/spinor_read_binary.c
@@ -19,7 +19,7 @@
 
 #include "spinor.ih"
 
-#ifdef HAVE_LIBLEMON
+#ifdef TM_USE_LEMON
 int read_binary_spinor_data(spinor *const s, spinor *const r, LemonReader *lemonreader,
                             DML_Checksum *checksum) {
   int t, x, y, z, i = 0, status = 0;
@@ -126,7 +126,7 @@ int read_binary_spinor_data(spinor *const s, spinor *const r, LemonReader *lemon
   free(filebuffer);
   return 0;
 }
-#else /* HAVE_LIBLEMON */
+#else /* TM_USE_LEMON */
 int read_binary_spinor_data(spinor *const s, spinor *const r, LimeReader *limereader,
                             DML_Checksum *checksum) {
   int t, x, y, z, i = 0, status = 0;
@@ -212,9 +212,9 @@ int read_binary_spinor_data(spinor *const s, spinor *const r, LimeReader *limere
 #endif
   return (0);
 }
-#endif /* HAVE_LIBLEMON */
+#endif /* TM_USE_LEMON */
 
-#ifdef HAVE_LIBLEMON
+#ifdef TM_USE_LEMON
 int read_binary_spinor_data_l(spinor *const s, LemonReader *lemonreader, DML_Checksum *checksum) {
   int t, x, y, z, i = 0, status = 0;
   int latticeSize[] = {T_global, g_nproc_x * LX, g_nproc_y * LY, g_nproc_z * LZ};
@@ -314,7 +314,7 @@ int read_binary_spinor_data_l(spinor *const s, LemonReader *lemonreader, DML_Che
   free(filebuffer);
   return 0;
 }
-#else /* HAVE_LIBLEMON */
+#else /* TM_USE_LEMON */
 int read_binary_spinor_data_l(spinor *const s, LimeReader *limereader, DML_Checksum *checksum) {
   int t, x, y, z, i = 0, status = 0;
   n_uint64_t bytes;
@@ -390,4 +390,4 @@ int read_binary_spinor_data_l(spinor *const s, LimeReader *limereader, DML_Check
 #endif
   return (0);
 }
-#endif /* HAVE_LIBLEMON */
+#endif /* TM_USE_LEMON */
diff --git a/io/spinor_write.c b/src/lib/io/spinor_write.c
similarity index 100%
rename from io/spinor_write.c
rename to src/lib/io/spinor_write.c
diff --git a/io/spinor_write_binary.c b/src/lib/io/spinor_write_binary.c
similarity index 99%
rename from io/spinor_write_binary.c
rename to src/lib/io/spinor_write_binary.c
index a2bc0cd68..560b5ce65 100644
--- a/io/spinor_write_binary.c
+++ b/src/lib/io/spinor_write_binary.c
@@ -19,7 +19,7 @@
 
 #include "spinor.ih"
 
-#ifdef HAVE_LIBLEMON
+#ifdef TM_USE_LEMON
 int write_binary_spinor_data(spinor *const s, spinor *const r, LemonWriter *lemonwriter,
                              DML_Checksum *checksum, int const prec) {
   int x, y, z, t, i = 0, xG, yG, zG, tG, status = 0;
@@ -124,7 +124,7 @@ int write_binary_spinor_data(spinor *const s, spinor *const r, LemonWriter *lemo
   return 0;
 }
 
-#else /* HAVE_LIBLEMON */
+#else /* TM_USE_LEMON */
 int write_binary_spinor_data(spinor *const s, spinor *const r, LimeWriter *limewriter,
                              DML_Checksum *checksum, const int prec) {
   int x, X, y, Y, z, Z, t, t0, tag = 0, id = 0, i = 0, status = 0;
@@ -272,9 +272,9 @@ int write_binary_spinor_data(spinor *const s, spinor *const r, LimeWriter *limew
   }
   return (0);
 }
-#endif /* HAVE_LIBLEMON */
+#endif /* TM_USE_LEMON */
 
-#ifdef HAVE_LIBLEMON
+#ifdef TM_USE_LEMON
 int write_binary_spinor_data_l(spinor *const s, LemonWriter *lemonwriter, DML_Checksum *checksum,
                                int const prec) {
   int x, y, z, t, i = 0, xG, yG, zG, tG, status = 0;
@@ -374,7 +374,7 @@ int write_binary_spinor_data_l(spinor *const s, LemonWriter *lemonwriter, DML_Ch
   return 0;
 }
 
-#else /* HAVE_LIBLEMON */
+#else /* TM_USE_LEMON */
 int write_binary_spinor_data_l(spinor *const s, LimeWriter *limewriter, DML_Checksum *checksum,
                                const int prec) {
   int x, X, y, Y, z, Z, t, t0, tag = 0, id = 0, i = 0, status = 0;
@@ -514,4 +514,4 @@ int write_binary_spinor_data_l(spinor *const s, LimeWriter *limewriter, DML_Chec
   }
   return (0);
 }
-#endif /* HAVE_LIBLEMON */
+#endif /* TM_USE_LEMON */
diff --git a/io/spinor_write_info.c b/src/lib/io/spinor_write_info.c
similarity index 100%
rename from io/spinor_write_info.c
rename to src/lib/io/spinor_write_info.c
diff --git a/io/spinor_write_propagator_format.c b/src/lib/io/spinor_write_propagator_format.c
similarity index 100%
rename from io/spinor_write_propagator_format.c
rename to src/lib/io/spinor_write_propagator_format.c
diff --git a/io/spinor_write_propagator_type.c b/src/lib/io/spinor_write_propagator_type.c
similarity index 89%
rename from io/spinor_write_propagator_type.c
rename to src/lib/io/spinor_write_propagator_type.c
index 67356b8f1..77eb17728 100644
--- a/io/spinor_write_propagator_type.c
+++ b/src/lib/io/spinor_write_propagator_type.c
@@ -4,9 +4,9 @@ void write_propagator_type(WRITER *writer, const int type) {
   uint64_t bytes;
   char *message;
 
-#ifndef HAVE_LIBLEMON
+#ifndef TM_USE_LEMON
   if (g_cart_id == 0) {
-#endif /* ! HAVE_LIBLEMON */
+#endif /* ! TM_USE_LEMON */
 
     message = (char *)malloc(128);
 
@@ -34,7 +34,7 @@ void write_propagator_type(WRITER *writer, const int type) {
 
     close_writer_record(writer);
     free(message);
-#ifndef HAVE_LIBLEMON
+#ifndef TM_USE_LEMON
   }
-#endif /* ! HAVE_LIBLEMON */
+#endif /* ! TM_USE_LEMON */
 }
diff --git a/io/spinor_write_source_format.c b/src/lib/io/spinor_write_source_format.c
similarity index 95%
rename from io/spinor_write_source_format.c
rename to src/lib/io/spinor_write_source_format.c
index a501ae5d3..e6cf0e782 100644
--- a/io/spinor_write_source_format.c
+++ b/src/lib/io/spinor_write_source_format.c
@@ -22,9 +22,9 @@
 void write_source_format(WRITER *writer, paramsSourceFormat const *format) {
   uint64_t bytes;
   char *buf = NULL;
-#ifndef HAVE_LIBLEMON
+#ifndef TM_USE_LEMON
   if (g_cart_id == 0) {
-#endif /* ! HAVE_LIBLEMON */
+#endif /* ! TM_USE_LEMON */
     buf = (char *)malloc(512);
     sprintf(buf,
             "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
@@ -49,7 +49,7 @@ void write_source_format(WRITER *writer, paramsSourceFormat const *format) {
     close_writer_record(writer);
 
     free(buf);
-#ifndef HAVE_LIBLEMON
+#ifndef TM_USE_LEMON
   }
-#endif /* ! HAVE_LIBLEMON */
+#endif /* ! TM_USE_LEMON */
 }
diff --git a/io/spinor_write_stdout.c b/src/lib/io/spinor_write_stdout.c
similarity index 100%
rename from io/spinor_write_stdout.c
rename to src/lib/io/spinor_write_stdout.c
diff --git a/io/spinor_write_stdout.h b/src/lib/io/spinor_write_stdout.h
similarity index 100%
rename from io/spinor_write_stdout.h
rename to src/lib/io/spinor_write_stdout.h
diff --git a/io/sw_write_stdout.c b/src/lib/io/sw_write_stdout.c
similarity index 100%
rename from io/sw_write_stdout.c
rename to src/lib/io/sw_write_stdout.c
diff --git a/io/sw_write_stdout.h b/src/lib/io/sw_write_stdout.h
similarity index 100%
rename from io/sw_write_stdout.h
rename to src/lib/io/sw_write_stdout.h
diff --git a/io/utils.c b/src/lib/io/utils.c
similarity index 100%
rename from io/utils.c
rename to src/lib/io/utils.c
diff --git a/io/utils.h b/src/lib/io/utils.h
similarity index 99%
rename from io/utils.h
rename to src/lib/io/utils.h
index afcca1553..85e98a5e2 100644
--- a/io/utils.h
+++ b/src/lib/io/utils.h
@@ -20,9 +20,7 @@
 #ifndef _UTILS_H
 #define _UTILS_H
 
-#ifdef HAVE_CONFIG_H
 #include <tmlqcd_config.h>
-#endif
 
 #include "io/dml.h"
 #include "io/params.h"
diff --git a/io/utils.ih b/src/lib/io/utils.ih
similarity index 96%
rename from io/utils.ih
rename to src/lib/io/utils.ih
index 073bd64b5..dd963b5b9 100644
--- a/io/utils.ih
+++ b/src/lib/io/utils.ih
@@ -18,9 +18,7 @@
 ***********************************************************************/
 
 #include <lime.h>
-#ifdef HAVE_CONFIG_H
 #include "tmlqcd_config.h"
-#endif
 
 #include <stdlib.h>
 #include <stdio.h>
@@ -29,7 +27,7 @@
 #include <endian.h>
 #include <sys/time.h>
 #include <sys/types.h>
-#ifdef MPI
+#ifdef TM_USE_MPI
 #include <mpi.h>
 #endif
 #include <unistd.h>
diff --git a/io/utils_close_reader_record.c b/src/lib/io/utils_close_reader_record.c
similarity index 100%
rename from io/utils_close_reader_record.c
rename to src/lib/io/utils_close_reader_record.c
diff --git a/io/utils_close_writer_record.c b/src/lib/io/utils_close_writer_record.c
similarity index 100%
rename from io/utils_close_writer_record.c
rename to src/lib/io/utils_close_writer_record.c
diff --git a/io/utils_construct_reader.c b/src/lib/io/utils_construct_reader.c
similarity index 83%
rename from io/utils_construct_reader.c
rename to src/lib/io/utils_construct_reader.c
index 085206786..832ede73d 100644
--- a/io/utils_construct_reader.c
+++ b/src/lib/io/utils_construct_reader.c
@@ -1,26 +1,28 @@
 #include "utils.ih"
 
+extern MPI_Comm g_cart_grid;
+
 void construct_reader(READER **reader, char *filename) {
   LIME_FILE *fh = NULL;
   int status = 0;
 
   if (g_debug_level > 0 && g_cart_id == 0) {
-#ifdef HAVE_LIBLEMON
+#ifdef TM_USE_LEMON
     printf("# Constructing LEMON reader for file %s ...\n", filename);
 #else
     printf("# Constructing LIME reader for file %s ...\n", filename);
 #endif
   }
 
-#ifdef HAVE_LIBLEMON
+#ifdef TM_USE_LEMON
   fh = (MPI_File *)malloc(sizeof(MPI_File));
   status = MPI_File_open(g_cart_grid, filename, MPI_MODE_RDONLY, MPI_INFO_NULL, fh);
   status = (status == MPI_SUCCESS) ? 0 : 1;
-#else  /* HAVE_LIBLEMON */
+#else  /* TM_USE_LEMON */
   fh = fopen(filename, "r");
   status = (fh == NULL) ? 1 : 0;
   fflush(stderr);
-#endif /* HAVE_LIBLEMON */
+#endif /* TM_USE_LEMON */
 
   if (status) {
     kill_with_error(fh, g_cart_id,
@@ -28,11 +30,11 @@ void construct_reader(READER **reader, char *filename) {
                     "rights.\nUnable to continue.\n");
   }
 
-#ifdef HAVE_LIBLEMON
+#ifdef TM_USE_LEMON
   *reader = lemonCreateReader(fh, g_cart_grid);
-#else  /* HAVE_LIBLEMON */
+#else  /* TM_USE_LEMON */
   *reader = limeCreateReader(fh);
-#endif /* HAVE_LIBLEMON */
+#endif /* TM_USE_LEMON */
 
   if (*reader == (READER *)NULL) {
     kill_with_error(fh, g_cart_id, "\nCould not create reader, unable to continue.\n");
diff --git a/io/utils_construct_writer.c b/src/lib/io/utils_construct_writer.c
similarity index 92%
rename from io/utils_construct_writer.c
rename to src/lib/io/utils_construct_writer.c
index 4f13900fe..f2fe58bb7 100644
--- a/io/utils_construct_writer.c
+++ b/src/lib/io/utils_construct_writer.c
@@ -4,14 +4,14 @@ void construct_writer(WRITER **writer, char *filename, const int append) {
   LIME_FILE *fh = NULL;
   int status = 0;
   if (g_debug_level > 0 && g_cart_id == 0) {
-#ifdef HAVE_LIBLEMON
+#ifdef TM_USE_LEMON
     printf("# Constructing LEMON writer for file %s for append = %d\n", filename, append);
 #else
     printf("# Constructing LIME writer for file %s for append = %d\n", filename, append);
 #endif
   }
 
-#ifdef HAVE_LIBLEMON
+#ifdef TM_USE_LEMON
   fh = (MPI_File *)malloc(sizeof(MPI_File));
   if (append) {
     status = MPI_File_open(g_cart_grid, filename,
@@ -24,7 +24,7 @@ void construct_writer(WRITER **writer, char *filename, const int append) {
   status = (status == MPI_SUCCESS) ? 0 : 1;
   *writer = lemonCreateWriter(fh, g_cart_grid);
   status = status || (writer == NULL);
-#else  /* HAVE_LIBLEMON */
+#else  /* TM_USE_LEMON */
   if (g_cart_id == 0) {
     if (append) {
       fh = fopen(filename, "a");
@@ -35,7 +35,7 @@ void construct_writer(WRITER **writer, char *filename, const int append) {
     *writer = limeCreateWriter(fh);
     status = status || (writer == NULL);
   }
-#endif /* HAVE_LIBLEMON */
+#endif /* TM_USE_LEMON */
 
   if (status) kill_with_error(fh, g_cart_id, "Failed to create writer. Aborting...\n");
 }
diff --git a/io/utils_destruct_reader.c b/src/lib/io/utils_destruct_reader.c
similarity index 74%
rename from io/utils_destruct_reader.c
rename to src/lib/io/utils_destruct_reader.c
index 4ee23d595..2ed391c47 100644
--- a/io/utils_destruct_reader.c
+++ b/src/lib/io/utils_destruct_reader.c
@@ -5,10 +5,10 @@ void destruct_reader(READER *reader) {
 
   fh = reader->fp;
   DestroyReader(reader);
-#ifdef HAVE_LIBLEMON
+#ifdef TM_USE_LEMON
   MPI_File_close(fh);
   free(fh); /* NB This assumes construct_writer was used to malloc memory! */
-#else       /* HAVE_LIBLEMON */
+#else       /* TM_USE_LEMON */
   fclose(fh);
-#endif      /* HAVE_LIBLEMON */
+#endif      /* TM_USE_LEMON */
 }
diff --git a/io/utils_destruct_writer.c b/src/lib/io/utils_destruct_writer.c
similarity index 79%
rename from io/utils_destruct_writer.c
rename to src/lib/io/utils_destruct_writer.c
index 840c06b4e..1f6216167 100644
--- a/io/utils_destruct_writer.c
+++ b/src/lib/io/utils_destruct_writer.c
@@ -3,16 +3,16 @@
 void destruct_writer(WRITER *writer) {
   LIME_FILE *fh = NULL;
 
-#ifdef HAVE_LIBLEMON
+#ifdef TM_USE_LEMON
   fh = writer->fp;
   lemonDestroyWriter(writer);
   MPI_File_close(fh);
   free(fh); /* NB This assumes construct_writer was used to malloc memory! */
-#else       /* HAVE_LIBLEMON */
+#else       /* TM_USE_LEMON */
   if (g_cart_id == 0) {
     fh = writer->fp;
     limeDestroyWriter(writer);
     fclose(fh);
   }
-#endif      /* HAVE_LIBLEMON */
+#endif      /* TM_USE_LEMON */
 }
diff --git a/io/utils_engineering.c b/src/lib/io/utils_engineering.c
similarity index 100%
rename from io/utils_engineering.c
rename to src/lib/io/utils_engineering.c
diff --git a/io/utils_kill_with_error.c b/src/lib/io/utils_kill_with_error.c
similarity index 88%
rename from io/utils_kill_with_error.c
rename to src/lib/io/utils_kill_with_error.c
index bd697220d..322536bd7 100644
--- a/io/utils_kill_with_error.c
+++ b/src/lib/io/utils_kill_with_error.c
@@ -7,11 +7,11 @@ void kill_with_error(LIME_FILE *fh, int const rank, char const *error) {
   }
 
   if (fh != NULL)
-#ifdef HAVE_LIBLEMON
+#ifdef TM_USE_LEMON
     MPI_File_close(fh);
 #else
     fclose(fh);
-#endif /* HAVE_LIBLEMON */
+#endif /* TM_USE_LEMON */
 
 #ifdef TM_USE_MPI
   MPI_Abort(MPI_COMM_WORLD, 1);
diff --git a/io/utils_parse_checksum_xml.c b/src/lib/io/utils_parse_checksum_xml.c
similarity index 100%
rename from io/utils_parse_checksum_xml.c
rename to src/lib/io/utils_parse_checksum_xml.c
diff --git a/io/utils_parse_ildgformat_xml.c b/src/lib/io/utils_parse_ildgformat_xml.c
similarity index 100%
rename from io/utils_parse_ildgformat_xml.c
rename to src/lib/io/utils_parse_ildgformat_xml.c
diff --git a/io/utils_parse_propagator_type.c b/src/lib/io/utils_parse_propagator_type.c
similarity index 100%
rename from io/utils_parse_propagator_type.c
rename to src/lib/io/utils_parse_propagator_type.c
diff --git a/io/utils_read_message.c b/src/lib/io/utils_read_message.c
similarity index 100%
rename from io/utils_read_message.c
rename to src/lib/io/utils_read_message.c
diff --git a/io/utils_write_checksum.c b/src/lib/io/utils_write_checksum.c
similarity index 100%
rename from io/utils_write_checksum.c
rename to src/lib/io/utils_write_checksum.c
diff --git a/io/utils_write_first_message.c b/src/lib/io/utils_write_first_message.c
similarity index 89%
rename from io/utils_write_first_message.c
rename to src/lib/io/utils_write_first_message.c
index 983b92b0a..4233789cc 100644
--- a/io/utils_write_first_message.c
+++ b/src/lib/io/utils_write_first_message.c
@@ -30,28 +30,28 @@ int write_first_messages(FILE* parameterfile, char const* const executable,
            TMLQCD_PACKAGE_VERSION, git_hash);
   printf("%s", message);
   fprintf(parameterfile, "%s", message);
-#ifdef _GAUGE_COPY
-  printf("# The code is compiled with -D_GAUGE_COPY\n");
-  fprintf(parameterfile, "# The code is compiled with -D_GAUGE_COPY\n");
+#ifdef TM_USE_GAUGE_COPY
+  printf("# The code is compiled with -DTM_USE_GAUGE_COPY\n");
+  fprintf(parameterfile, "# The code is compiled with -DTM_USE_GAUGE_COPY\n");
 #endif
-#ifdef _USE_HALFSPINOR
-  printf("# The code is compiled with -D_USE_HALFSPINOR\n");
-  fprintf(parameterfile, "# The code is compiled with -D_USE_HALFSPINOR\n");
+#ifdef TM_USE_HALFSPINOR
+  printf("# The code is compiled with -DTM_USE_HALFSPINOR\n");
+  fprintf(parameterfile, "# The code is compiled with -DTM_USE_HALFSPINOR\n");
 #endif
-#ifdef _USE_SHMEM
-  printf("# the code is compiled with -D_USE_SHMEM\n");
-  fprintf(parameterfile, "# the code is compiled with -D_USE_SHMEM\n");
-#ifdef _PERSISTENT
+#ifdef TM_USE_SHMEM
+  printf("# the code is compiled with -DTM_USE_SHMEM\n");
+  fprintf(parameterfile, "# the code is compiled with -DTM_USE_SHMEM\n");
+#ifdef TM_PERSISTENT
   printf("# the code is compiled for persistent MPI calls (halfspinor only)\n");
   fprintf(parameterfile, "# the code is compiled for persistent MPI calls (halfspinor only)\n");
 #endif
 #endif
 #ifdef TM_USE_MPI
-#ifdef _NON_BLOCKING
+#ifdef TM_NON_BLOCKING
   printf("# the code is compiled for non-blocking MPI calls (spinor and gauge)\n");
   fprintf(parameterfile, "# the code is compiled for non-blocking MPI calls (spinor and gauge)\n");
 #endif
-#ifdef HAVE_LIBLEMON
+#ifdef TM_USE_LEMON
   printf("# the code is compiled with MPI IO / Lemon\n");
   fprintf(parameterfile, "# the code is compiled with MPI IO / Lemon\n");
 #endif
diff --git a/io/utils_write_header.c b/src/lib/io/utils_write_header.c
similarity index 93%
rename from io/utils_write_header.c
rename to src/lib/io/utils_write_header.c
index 7f5f85c83..be8ae4ade 100644
--- a/io/utils_write_header.c
+++ b/src/lib/io/utils_write_header.c
@@ -23,9 +23,9 @@ void write_header(WRITER *writer, int MB, int ME, char const *type, uint64_t byt
   int status;
   RECORD_HEADER *header;
 
-#ifndef HAVE_LIBLEMON
+#ifndef TM_USE_LEMON
   if (g_cart_id == 0) {
-#endif /* ! HAVE_LIBLEMON */
+#endif /* ! TM_USE_LEMON */
     /* Nasty (but probably harmless) hack to get rid of const qualifier - the original c-lime was
      * sloppy here. */
     header = CreateHeader(MB, ME, (char *)type, bytes);
@@ -35,8 +35,8 @@ void write_header(WRITER *writer, int MB, int ME, char const *type, uint64_t byt
     if (status != LIME_SUCCESS) {
       kill_with_error(writer->fp, g_cart_id, "Header writing error. Aborting\n");
     }
-#ifndef HAVE_LIBLEMON
+#ifndef TM_USE_LEMON
   }
-#endif /* ! HAVE_LIBLEMON */
+#endif /* ! TM_USE_LEMON */
   return;
 }
diff --git a/io/utils_write_ildg_format.c b/src/lib/io/utils_write_ildg_format.c
similarity index 100%
rename from io/utils_write_ildg_format.c
rename to src/lib/io/utils_write_ildg_format.c
diff --git a/io/utils_write_inverter_info.c b/src/lib/io/utils_write_inverter_info.c
similarity index 100%
rename from io/utils_write_inverter_info.c
rename to src/lib/io/utils_write_inverter_info.c
diff --git a/io/utils_write_message.c b/src/lib/io/utils_write_message.c
similarity index 93%
rename from io/utils_write_message.c
rename to src/lib/io/utils_write_message.c
index b71cdbbce..d346c9a9c 100644
--- a/io/utils_write_message.c
+++ b/src/lib/io/utils_write_message.c
@@ -23,9 +23,9 @@ int write_message(WRITER *writer, char const *buffer, uint64_t bytes) {
   int status;
   n_uint64_t bytesWritten = bytes;
 
-#ifndef HAVE_LIBLEMON
+#ifndef TM_USE_LEMON
   if (g_cart_id == 0) {
-#endif /* ! HAVE_LIBLEMON */
+#endif /* ! TM_USE_LEMON */
     if (buffer == (char *)NULL) return (0);
 
 #ifdef TM_USE_MPI
@@ -35,8 +35,8 @@ int write_message(WRITER *writer, char const *buffer, uint64_t bytes) {
 #endif
     if (status != LIME_SUCCESS || bytes != bytesWritten)
       kill_with_error(writer->fp, g_cart_id, "I/O error on writing message. Aborting...\n");
-#ifndef HAVE_LIBLEMON
+#ifndef TM_USE_LEMON
   }
-#endif /* ! HAVE_LIBLEMON */
+#endif /* ! TM_USE_LEMON */
   return (0);
 }
diff --git a/io/utils_write_xlf.c b/src/lib/io/utils_write_xlf.c
similarity index 100%
rename from io/utils_write_xlf.c
rename to src/lib/io/utils_write_xlf.c
diff --git a/io/utils_write_xlf_xml.c b/src/lib/io/utils_write_xlf_xml.c
similarity index 100%
rename from io/utils_write_xlf_xml.c
rename to src/lib/io/utils_write_xlf_xml.c
diff --git a/kahan_summation.h b/src/lib/kahan_summation.h
similarity index 100%
rename from kahan_summation.h
rename to src/lib/kahan_summation.h
diff --git a/linalg/Makefile.in b/src/lib/linalg/Makefile.in
similarity index 100%
rename from linalg/Makefile.in
rename to src/lib/linalg/Makefile.in
diff --git a/linalg/add.c b/src/lib/linalg/add.c
similarity index 100%
rename from linalg/add.c
rename to src/lib/linalg/add.c
diff --git a/linalg/add.h b/src/lib/linalg/add.h
similarity index 100%
rename from linalg/add.h
rename to src/lib/linalg/add.h
diff --git a/linalg/addto_32.c b/src/lib/linalg/addto_32.c
similarity index 100%
rename from linalg/addto_32.c
rename to src/lib/linalg/addto_32.c
diff --git a/linalg/addto_32.h b/src/lib/linalg/addto_32.h
similarity index 100%
rename from linalg/addto_32.h
rename to src/lib/linalg/addto_32.h
diff --git a/linalg/assign.c b/src/lib/linalg/assign.c
similarity index 99%
rename from linalg/assign.c
rename to src/lib/linalg/assign.c
index fd04de1e4..19fcda44b 100644
--- a/linalg/assign.c
+++ b/src/lib/linalg/assign.c
@@ -47,4 +47,3 @@ void assign_32(spinor32 *const R, spinor32 *const S, const int N) {
   memcpy(R, S, N * sizeof(spinor32));
   return;
 }
-
diff --git a/linalg/assign.h b/src/lib/linalg/assign.h
similarity index 100%
rename from linalg/assign.h
rename to src/lib/linalg/assign.h
diff --git a/linalg/assign_add_mul.c b/src/lib/linalg/assign_add_mul.c
similarity index 95%
rename from linalg/assign_add_mul.c
rename to src/lib/linalg/assign_add_mul.c
index cdc9f4931..eae85f685 100644
--- a/linalg/assign_add_mul.c
+++ b/src/lib/linalg/assign_add_mul.c
@@ -41,7 +41,7 @@
 #define _PSWITCH(s) s
 #define _PTSWITCH(s) s
 
-#include "assign_add_mul_body.c"
+#include "assign_add_mul_body.inc"
 
 #undef _C_TYPE
 #undef _PSWITCH
@@ -51,7 +51,7 @@
 #define _PSWITCH(s) s##_32
 #define _PTSWITCH(s) s##32
 
-#include "assign_add_mul_body.c"
+#include "assign_add_mul_body.inc"
 
 #undef _C_TYPE
 #undef _PSWITCH
diff --git a/linalg/assign_add_mul.h b/src/lib/linalg/assign_add_mul.h
similarity index 100%
rename from linalg/assign_add_mul.h
rename to src/lib/linalg/assign_add_mul.h
diff --git a/linalg/assign_add_mul_add_mul.c b/src/lib/linalg/assign_add_mul_add_mul.c
similarity index 100%
rename from linalg/assign_add_mul_add_mul.c
rename to src/lib/linalg/assign_add_mul_add_mul.c
diff --git a/linalg/assign_add_mul_add_mul.h b/src/lib/linalg/assign_add_mul_add_mul.h
similarity index 100%
rename from linalg/assign_add_mul_add_mul.h
rename to src/lib/linalg/assign_add_mul_add_mul.h
diff --git a/linalg/assign_add_mul_add_mul_r.c b/src/lib/linalg/assign_add_mul_add_mul_r.c
similarity index 100%
rename from linalg/assign_add_mul_add_mul_r.c
rename to src/lib/linalg/assign_add_mul_add_mul_r.c
diff --git a/linalg/assign_add_mul_add_mul_r.h b/src/lib/linalg/assign_add_mul_add_mul_r.h
similarity index 100%
rename from linalg/assign_add_mul_add_mul_r.h
rename to src/lib/linalg/assign_add_mul_add_mul_r.h
diff --git a/linalg/assign_add_mul_body.c b/src/lib/linalg/assign_add_mul_body.inc
similarity index 100%
rename from linalg/assign_add_mul_body.c
rename to src/lib/linalg/assign_add_mul_body.inc
diff --git a/linalg/assign_add_mul_r.c b/src/lib/linalg/assign_add_mul_r.c
similarity index 100%
rename from linalg/assign_add_mul_r.c
rename to src/lib/linalg/assign_add_mul_r.c
diff --git a/linalg/assign_add_mul_r.h b/src/lib/linalg/assign_add_mul_r.h
similarity index 100%
rename from linalg/assign_add_mul_r.h
rename to src/lib/linalg/assign_add_mul_r.h
diff --git a/linalg/assign_add_mul_r_32.c b/src/lib/linalg/assign_add_mul_r_32.c
similarity index 91%
rename from linalg/assign_add_mul_r_32.c
rename to src/lib/linalg/assign_add_mul_r_32.c
index 8df54858b..5ab9366ac 100644
--- a/linalg/assign_add_mul_r_32.c
+++ b/src/lib/linalg/assign_add_mul_r_32.c
@@ -28,17 +28,14 @@
 #ifdef HAVE_CONFIG_H
 #include <tmlqcd_config.h>
 #endif
-#ifdef TM_USE_OMP
-#include <omp.h>
-#endif
 #include <math.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include "assign_add_mul_r_32.h"
 #include "su3.h"
 
-inline void assign_add_mul_r_32_orphaned(spinor32 *const R, spinor32 *const S, const float c,
-                                         const int N) {
+void assign_add_mul_r_32_orphaned(spinor32 *const R, spinor32 *const S, const float c,
+                                  const int N) {
 #ifdef TM_USE_OMP
 #pragma omp parallel for
 #endif
diff --git a/linalg/assign_add_mul_r_32.h b/src/lib/linalg/assign_add_mul_r_32.h
similarity index 100%
rename from linalg/assign_add_mul_r_32.h
rename to src/lib/linalg/assign_add_mul_r_32.h
diff --git a/linalg/assign_add_mul_r_add_mul.c b/src/lib/linalg/assign_add_mul_r_add_mul.c
similarity index 100%
rename from linalg/assign_add_mul_r_add_mul.c
rename to src/lib/linalg/assign_add_mul_r_add_mul.c
diff --git a/linalg/assign_add_mul_r_add_mul.h b/src/lib/linalg/assign_add_mul_r_add_mul.h
similarity index 100%
rename from linalg/assign_add_mul_r_add_mul.h
rename to src/lib/linalg/assign_add_mul_r_add_mul.h
diff --git a/linalg/assign_diff_mul.c b/src/lib/linalg/assign_diff_mul.c
similarity index 100%
rename from linalg/assign_diff_mul.c
rename to src/lib/linalg/assign_diff_mul.c
diff --git a/linalg/assign_diff_mul.h b/src/lib/linalg/assign_diff_mul.h
similarity index 100%
rename from linalg/assign_diff_mul.h
rename to src/lib/linalg/assign_diff_mul.h
diff --git a/linalg/assign_mul_add.c b/src/lib/linalg/assign_mul_add.c
similarity index 100%
rename from linalg/assign_mul_add.c
rename to src/lib/linalg/assign_mul_add.c
diff --git a/linalg/assign_mul_add.h b/src/lib/linalg/assign_mul_add.h
similarity index 100%
rename from linalg/assign_mul_add.h
rename to src/lib/linalg/assign_mul_add.h
diff --git a/linalg/assign_mul_add_mul.c b/src/lib/linalg/assign_mul_add_mul.c
similarity index 100%
rename from linalg/assign_mul_add_mul.c
rename to src/lib/linalg/assign_mul_add_mul.c
diff --git a/linalg/assign_mul_add_mul.h b/src/lib/linalg/assign_mul_add_mul.h
similarity index 100%
rename from linalg/assign_mul_add_mul.h
rename to src/lib/linalg/assign_mul_add_mul.h
diff --git a/linalg/assign_mul_add_mul_add_mul_add_mul_r.c b/src/lib/linalg/assign_mul_add_mul_add_mul_add_mul_r.c
similarity index 100%
rename from linalg/assign_mul_add_mul_add_mul_add_mul_r.c
rename to src/lib/linalg/assign_mul_add_mul_add_mul_add_mul_r.c
diff --git a/linalg/assign_mul_add_mul_add_mul_add_mul_r.h b/src/lib/linalg/assign_mul_add_mul_add_mul_add_mul_r.h
similarity index 100%
rename from linalg/assign_mul_add_mul_add_mul_add_mul_r.h
rename to src/lib/linalg/assign_mul_add_mul_add_mul_add_mul_r.h
diff --git a/linalg/assign_mul_add_mul_add_mul_r.c b/src/lib/linalg/assign_mul_add_mul_add_mul_r.c
similarity index 100%
rename from linalg/assign_mul_add_mul_add_mul_r.c
rename to src/lib/linalg/assign_mul_add_mul_add_mul_r.c
diff --git a/linalg/assign_mul_add_mul_add_mul_r.h b/src/lib/linalg/assign_mul_add_mul_add_mul_r.h
similarity index 100%
rename from linalg/assign_mul_add_mul_add_mul_r.h
rename to src/lib/linalg/assign_mul_add_mul_add_mul_r.h
diff --git a/linalg/assign_mul_add_mul_r.c b/src/lib/linalg/assign_mul_add_mul_r.c
similarity index 100%
rename from linalg/assign_mul_add_mul_r.c
rename to src/lib/linalg/assign_mul_add_mul_r.c
diff --git a/linalg/assign_mul_add_mul_r.h b/src/lib/linalg/assign_mul_add_mul_r.h
similarity index 100%
rename from linalg/assign_mul_add_mul_r.h
rename to src/lib/linalg/assign_mul_add_mul_r.h
diff --git a/linalg/assign_mul_add_mul_r_32.c b/src/lib/linalg/assign_mul_add_mul_r_32.c
similarity index 100%
rename from linalg/assign_mul_add_mul_r_32.c
rename to src/lib/linalg/assign_mul_add_mul_r_32.c
diff --git a/linalg/assign_mul_add_mul_r_32.h b/src/lib/linalg/assign_mul_add_mul_r_32.h
similarity index 100%
rename from linalg/assign_mul_add_mul_r_32.h
rename to src/lib/linalg/assign_mul_add_mul_r_32.h
diff --git a/linalg/assign_mul_add_r.c b/src/lib/linalg/assign_mul_add_r.c
similarity index 100%
rename from linalg/assign_mul_add_r.c
rename to src/lib/linalg/assign_mul_add_r.c
diff --git a/linalg/assign_mul_add_r.h b/src/lib/linalg/assign_mul_add_r.h
similarity index 100%
rename from linalg/assign_mul_add_r.h
rename to src/lib/linalg/assign_mul_add_r.h
diff --git a/linalg/assign_mul_add_r_32.c b/src/lib/linalg/assign_mul_add_r_32.c
similarity index 100%
rename from linalg/assign_mul_add_r_32.c
rename to src/lib/linalg/assign_mul_add_r_32.c
diff --git a/linalg/assign_mul_add_r_32.h b/src/lib/linalg/assign_mul_add_r_32.h
similarity index 100%
rename from linalg/assign_mul_add_r_32.h
rename to src/lib/linalg/assign_mul_add_r_32.h
diff --git a/linalg/assign_mul_add_r_and_square.c b/src/lib/linalg/assign_mul_add_r_and_square.c
similarity index 100%
rename from linalg/assign_mul_add_r_and_square.c
rename to src/lib/linalg/assign_mul_add_r_and_square.c
diff --git a/linalg/assign_mul_add_r_and_square.h b/src/lib/linalg/assign_mul_add_r_and_square.h
similarity index 100%
rename from linalg/assign_mul_add_r_and_square.h
rename to src/lib/linalg/assign_mul_add_r_and_square.h
diff --git a/linalg/assign_mul_bra_add_mul_ket_add.c b/src/lib/linalg/assign_mul_bra_add_mul_ket_add.c
similarity index 100%
rename from linalg/assign_mul_bra_add_mul_ket_add.c
rename to src/lib/linalg/assign_mul_bra_add_mul_ket_add.c
diff --git a/linalg/assign_mul_bra_add_mul_ket_add.h b/src/lib/linalg/assign_mul_bra_add_mul_ket_add.h
similarity index 100%
rename from linalg/assign_mul_bra_add_mul_ket_add.h
rename to src/lib/linalg/assign_mul_bra_add_mul_ket_add.h
diff --git a/linalg/assign_mul_bra_add_mul_ket_add_r.c b/src/lib/linalg/assign_mul_bra_add_mul_ket_add_r.c
similarity index 100%
rename from linalg/assign_mul_bra_add_mul_ket_add_r.c
rename to src/lib/linalg/assign_mul_bra_add_mul_ket_add_r.c
diff --git a/linalg/assign_mul_bra_add_mul_ket_add_r.h b/src/lib/linalg/assign_mul_bra_add_mul_ket_add_r.h
similarity index 100%
rename from linalg/assign_mul_bra_add_mul_ket_add_r.h
rename to src/lib/linalg/assign_mul_bra_add_mul_ket_add_r.h
diff --git a/linalg/assign_mul_bra_add_mul_r.c b/src/lib/linalg/assign_mul_bra_add_mul_r.c
similarity index 100%
rename from linalg/assign_mul_bra_add_mul_r.c
rename to src/lib/linalg/assign_mul_bra_add_mul_r.c
diff --git a/linalg/assign_mul_bra_add_mul_r.h b/src/lib/linalg/assign_mul_bra_add_mul_r.h
similarity index 100%
rename from linalg/assign_mul_bra_add_mul_r.h
rename to src/lib/linalg/assign_mul_bra_add_mul_r.h
diff --git a/linalg/assign_to_32.c b/src/lib/linalg/assign_to_32.c
similarity index 100%
rename from linalg/assign_to_32.c
rename to src/lib/linalg/assign_to_32.c
diff --git a/linalg/assign_to_32.h b/src/lib/linalg/assign_to_32.h
similarity index 100%
rename from linalg/assign_to_32.h
rename to src/lib/linalg/assign_to_32.h
diff --git a/linalg/blas.h b/src/lib/linalg/blas.h
similarity index 97%
rename from linalg/blas.h
rename to src/lib/linalg/blas.h
index a972e5029..110afb01f 100644
--- a/linalg/blas.h
+++ b/src/lib/linalg/blas.h
@@ -23,8 +23,8 @@
 #include <complex.h>
 #include "linalg/fortran.h"
 
-#if defined CRAY || defined HITACHI
-/* On the CRAY is all different, of course... */
+#if defined TM_CRAY || defined HITACHI
+/* On the TM_CRAY is all different, of course... */
 #include "fortran.h"
 #define zgemm ZGEMM
 #define zgemv ZGEMV
diff --git a/linalg/comp_decomp.c b/src/lib/linalg/comp_decomp.c
similarity index 100%
rename from linalg/comp_decomp.c
rename to src/lib/linalg/comp_decomp.c
diff --git a/linalg/comp_decomp.h b/src/lib/linalg/comp_decomp.h
similarity index 100%
rename from linalg/comp_decomp.h
rename to src/lib/linalg/comp_decomp.h
diff --git a/linalg/convert_eo_to_lexic.c b/src/lib/linalg/convert_eo_to_lexic.c
similarity index 100%
rename from linalg/convert_eo_to_lexic.c
rename to src/lib/linalg/convert_eo_to_lexic.c
diff --git a/linalg/convert_eo_to_lexic.h b/src/lib/linalg/convert_eo_to_lexic.h
similarity index 100%
rename from linalg/convert_eo_to_lexic.h
rename to src/lib/linalg/convert_eo_to_lexic.h
diff --git a/linalg/convert_even_to_lexic.c b/src/lib/linalg/convert_even_to_lexic.c
similarity index 100%
rename from linalg/convert_even_to_lexic.c
rename to src/lib/linalg/convert_even_to_lexic.c
diff --git a/linalg/convert_even_to_lexic.h b/src/lib/linalg/convert_even_to_lexic.h
similarity index 100%
rename from linalg/convert_even_to_lexic.h
rename to src/lib/linalg/convert_even_to_lexic.h
diff --git a/linalg/convert_odd_to_lexic.c b/src/lib/linalg/convert_odd_to_lexic.c
similarity index 100%
rename from linalg/convert_odd_to_lexic.c
rename to src/lib/linalg/convert_odd_to_lexic.c
diff --git a/linalg/convert_odd_to_lexic.h b/src/lib/linalg/convert_odd_to_lexic.h
similarity index 100%
rename from linalg/convert_odd_to_lexic.h
rename to src/lib/linalg/convert_odd_to_lexic.h
diff --git a/linalg/diff.c b/src/lib/linalg/diff.c
similarity index 100%
rename from linalg/diff.c
rename to src/lib/linalg/diff.c
diff --git a/linalg/diff.h b/src/lib/linalg/diff.h
similarity index 100%
rename from linalg/diff.h
rename to src/lib/linalg/diff.h
diff --git a/linalg/diff_32.c b/src/lib/linalg/diff_32.c
similarity index 100%
rename from linalg/diff_32.c
rename to src/lib/linalg/diff_32.c
diff --git a/linalg/diff_32.h b/src/lib/linalg/diff_32.h
similarity index 100%
rename from linalg/diff_32.h
rename to src/lib/linalg/diff_32.h
diff --git a/linalg/diff_and_square_norm.c b/src/lib/linalg/diff_and_square_norm.c
similarity index 100%
rename from linalg/diff_and_square_norm.c
rename to src/lib/linalg/diff_and_square_norm.c
diff --git a/linalg/diff_and_square_norm.h b/src/lib/linalg/diff_and_square_norm.h
similarity index 100%
rename from linalg/diff_and_square_norm.h
rename to src/lib/linalg/diff_and_square_norm.h
diff --git a/linalg/fortran.h b/src/lib/linalg/fortran.h
similarity index 100%
rename from linalg/fortran.h
rename to src/lib/linalg/fortran.h
diff --git a/linalg/lapack.h b/src/lib/linalg/lapack.h
similarity index 99%
rename from linalg/lapack.h
rename to src/lib/linalg/lapack.h
index 1c7f4ce7a..a651b07ae 100644
--- a/linalg/lapack.h
+++ b/src/lib/linalg/lapack.h
@@ -23,7 +23,7 @@
 #include <complex.h>
 #include "linalg/fortran.h"
 
-#if defined CRAY || defined HITACHI
+#if defined TM_CRAY || defined HITACHI
 #define zgels CGELS
 #define zgesv CGESV
 #define zgeevx CGEEVX
diff --git a/linalg/map_to_blas.h b/src/lib/linalg/map_to_blas.h
similarity index 100%
rename from linalg/map_to_blas.h
rename to src/lib/linalg/map_to_blas.h
diff --git a/linalg/mattimesvec.c b/src/lib/linalg/mattimesvec.c
similarity index 100%
rename from linalg/mattimesvec.c
rename to src/lib/linalg/mattimesvec.c
diff --git a/linalg/mattimesvec.h b/src/lib/linalg/mattimesvec.h
similarity index 100%
rename from linalg/mattimesvec.h
rename to src/lib/linalg/mattimesvec.h
diff --git a/linalg/mul.c b/src/lib/linalg/mul.c
similarity index 100%
rename from linalg/mul.c
rename to src/lib/linalg/mul.c
diff --git a/linalg/mul.h b/src/lib/linalg/mul.h
similarity index 100%
rename from linalg/mul.h
rename to src/lib/linalg/mul.h
diff --git a/linalg/mul_add_mul.c b/src/lib/linalg/mul_add_mul.c
similarity index 100%
rename from linalg/mul_add_mul.c
rename to src/lib/linalg/mul_add_mul.c
diff --git a/linalg/mul_add_mul.h b/src/lib/linalg/mul_add_mul.h
similarity index 100%
rename from linalg/mul_add_mul.h
rename to src/lib/linalg/mul_add_mul.h
diff --git a/linalg/mul_add_mul_r.c b/src/lib/linalg/mul_add_mul_r.c
similarity index 100%
rename from linalg/mul_add_mul_r.c
rename to src/lib/linalg/mul_add_mul_r.c
diff --git a/linalg/mul_add_mul_r.h b/src/lib/linalg/mul_add_mul_r.h
similarity index 100%
rename from linalg/mul_add_mul_r.h
rename to src/lib/linalg/mul_add_mul_r.h
diff --git a/linalg/mul_diff_mul.c b/src/lib/linalg/mul_diff_mul.c
similarity index 100%
rename from linalg/mul_diff_mul.c
rename to src/lib/linalg/mul_diff_mul.c
diff --git a/linalg/mul_diff_mul.h b/src/lib/linalg/mul_diff_mul.h
similarity index 100%
rename from linalg/mul_diff_mul.h
rename to src/lib/linalg/mul_diff_mul.h
diff --git a/linalg/mul_diff_mul_r.c b/src/lib/linalg/mul_diff_mul_r.c
similarity index 100%
rename from linalg/mul_diff_mul_r.c
rename to src/lib/linalg/mul_diff_mul_r.c
diff --git a/linalg/mul_diff_mul_r.h b/src/lib/linalg/mul_diff_mul_r.h
similarity index 100%
rename from linalg/mul_diff_mul_r.h
rename to src/lib/linalg/mul_diff_mul_r.h
diff --git a/linalg/mul_diff_r.c b/src/lib/linalg/mul_diff_r.c
similarity index 100%
rename from linalg/mul_diff_r.c
rename to src/lib/linalg/mul_diff_r.c
diff --git a/linalg/mul_diff_r.h b/src/lib/linalg/mul_diff_r.h
similarity index 100%
rename from linalg/mul_diff_r.h
rename to src/lib/linalg/mul_diff_r.h
diff --git a/linalg/mul_gamma5.c b/src/lib/linalg/mul_gamma5.c
similarity index 100%
rename from linalg/mul_gamma5.c
rename to src/lib/linalg/mul_gamma5.c
diff --git a/linalg/mul_gamma5.h b/src/lib/linalg/mul_gamma5.h
similarity index 100%
rename from linalg/mul_gamma5.h
rename to src/lib/linalg/mul_gamma5.h
diff --git a/linalg/mul_r.c b/src/lib/linalg/mul_r.c
similarity index 100%
rename from linalg/mul_r.c
rename to src/lib/linalg/mul_r.c
diff --git a/linalg/mul_r.h b/src/lib/linalg/mul_r.h
similarity index 100%
rename from linalg/mul_r.h
rename to src/lib/linalg/mul_r.h
diff --git a/linalg/mul_r_32.c b/src/lib/linalg/mul_r_32.c
similarity index 100%
rename from linalg/mul_r_32.c
rename to src/lib/linalg/mul_r_32.c
diff --git a/linalg/mul_r_32.h b/src/lib/linalg/mul_r_32.h
similarity index 100%
rename from linalg/mul_r_32.h
rename to src/lib/linalg/mul_r_32.h
diff --git a/linalg/mul_r_gamma5.c b/src/lib/linalg/mul_r_gamma5.c
similarity index 100%
rename from linalg/mul_r_gamma5.c
rename to src/lib/linalg/mul_r_gamma5.c
diff --git a/linalg/mul_r_gamma5.h b/src/lib/linalg/mul_r_gamma5.h
similarity index 100%
rename from linalg/mul_r_gamma5.h
rename to src/lib/linalg/mul_r_gamma5.h
diff --git a/linalg/print_spinor.c b/src/lib/linalg/print_spinor.c
similarity index 100%
rename from linalg/print_spinor.c
rename to src/lib/linalg/print_spinor.c
diff --git a/linalg/print_spinor.h b/src/lib/linalg/print_spinor.h
similarity index 100%
rename from linalg/print_spinor.h
rename to src/lib/linalg/print_spinor.h
diff --git a/linalg/print_spinor_similar_components.c b/src/lib/linalg/print_spinor_similar_components.c
similarity index 100%
rename from linalg/print_spinor_similar_components.c
rename to src/lib/linalg/print_spinor_similar_components.c
diff --git a/linalg/print_spinor_similar_components.h b/src/lib/linalg/print_spinor_similar_components.h
similarity index 100%
rename from linalg/print_spinor_similar_components.h
rename to src/lib/linalg/print_spinor_similar_components.h
diff --git a/linalg/ratio.c b/src/lib/linalg/ratio.c
similarity index 100%
rename from linalg/ratio.c
rename to src/lib/linalg/ratio.c
diff --git a/linalg/ratio.h b/src/lib/linalg/ratio.h
similarity index 100%
rename from linalg/ratio.h
rename to src/lib/linalg/ratio.h
diff --git a/linalg/scalar_prod.c b/src/lib/linalg/scalar_prod.c
similarity index 95%
rename from linalg/scalar_prod.c
rename to src/lib/linalg/scalar_prod.c
index 365b966c1..2e2a631c7 100644
--- a/linalg/scalar_prod.c
+++ b/src/lib/linalg/scalar_prod.c
@@ -36,7 +36,7 @@
 #define _PSWITCH(s) s
 #define _PTSWITCH(s) s
 
-#include "scalar_prod_body.c"
+#include "scalar_prod_body.inc"
 
 #undef _C_TYPE
 #undef _PSWITCH
@@ -46,7 +46,7 @@
 #define _PSWITCH(s) s##_32
 #define _PTSWITCH(s) s##32
 
-#include "scalar_prod_body.c"
+#include "scalar_prod_body.inc"
 
 #undef _C_TYPE
 #undef _PSWITCH
diff --git a/linalg/scalar_prod.h b/src/lib/linalg/scalar_prod.h
similarity index 100%
rename from linalg/scalar_prod.h
rename to src/lib/linalg/scalar_prod.h
diff --git a/linalg/scalar_prod_body.c b/src/lib/linalg/scalar_prod_body.inc
similarity index 100%
rename from linalg/scalar_prod_body.c
rename to src/lib/linalg/scalar_prod_body.inc
diff --git a/linalg/scalar_prod_i.c b/src/lib/linalg/scalar_prod_i.c
similarity index 100%
rename from linalg/scalar_prod_i.c
rename to src/lib/linalg/scalar_prod_i.c
diff --git a/linalg/scalar_prod_i.h b/src/lib/linalg/scalar_prod_i.h
similarity index 100%
rename from linalg/scalar_prod_i.h
rename to src/lib/linalg/scalar_prod_i.h
diff --git a/linalg/scalar_prod_r.c b/src/lib/linalg/scalar_prod_r.c
similarity index 99%
rename from linalg/scalar_prod_r.c
rename to src/lib/linalg/scalar_prod_r.c
index f4fd9293b..c5288aa34 100644
--- a/linalg/scalar_prod_r.c
+++ b/src/lib/linalg/scalar_prod_r.c
@@ -97,4 +97,3 @@ double scalar_prod_r(const spinor *const S, const spinor *const R, const int N,
 #endif
   return res;
 }
-
diff --git a/linalg/scalar_prod_r.h b/src/lib/linalg/scalar_prod_r.h
similarity index 100%
rename from linalg/scalar_prod_r.h
rename to src/lib/linalg/scalar_prod_r.h
diff --git a/linalg/scalar_prod_r_32.c b/src/lib/linalg/scalar_prod_r_32.c
similarity index 100%
rename from linalg/scalar_prod_r_32.c
rename to src/lib/linalg/scalar_prod_r_32.c
diff --git a/linalg/scalar_prod_r_32.h b/src/lib/linalg/scalar_prod_r_32.h
similarity index 100%
rename from linalg/scalar_prod_r_32.h
rename to src/lib/linalg/scalar_prod_r_32.h
diff --git a/linalg/set_even_to_zero.c b/src/lib/linalg/set_even_to_zero.c
similarity index 100%
rename from linalg/set_even_to_zero.c
rename to src/lib/linalg/set_even_to_zero.c
diff --git a/linalg/set_even_to_zero.h b/src/lib/linalg/set_even_to_zero.h
similarity index 100%
rename from linalg/set_even_to_zero.h
rename to src/lib/linalg/set_even_to_zero.h
diff --git a/linalg/square_and_minmax.c b/src/lib/linalg/square_and_minmax.c
similarity index 100%
rename from linalg/square_and_minmax.c
rename to src/lib/linalg/square_and_minmax.c
diff --git a/linalg/square_and_minmax.h b/src/lib/linalg/square_and_minmax.h
similarity index 100%
rename from linalg/square_and_minmax.h
rename to src/lib/linalg/square_and_minmax.h
diff --git a/linalg/square_and_prod_r.c b/src/lib/linalg/square_and_prod_r.c
similarity index 100%
rename from linalg/square_and_prod_r.c
rename to src/lib/linalg/square_and_prod_r.c
diff --git a/linalg/square_and_prod_r.h b/src/lib/linalg/square_and_prod_r.h
similarity index 100%
rename from linalg/square_and_prod_r.h
rename to src/lib/linalg/square_and_prod_r.h
diff --git a/linalg/square_norm.c b/src/lib/linalg/square_norm.c
similarity index 100%
rename from linalg/square_norm.c
rename to src/lib/linalg/square_norm.c
diff --git a/linalg/square_norm.h b/src/lib/linalg/square_norm.h
similarity index 100%
rename from linalg/square_norm.h
rename to src/lib/linalg/square_norm.h
diff --git a/linalg/square_norm_32.c b/src/lib/linalg/square_norm_32.c
similarity index 100%
rename from linalg/square_norm_32.c
rename to src/lib/linalg/square_norm_32.c
diff --git a/linalg/square_norm_32.h b/src/lib/linalg/square_norm_32.h
similarity index 100%
rename from linalg/square_norm_32.h
rename to src/lib/linalg/square_norm_32.h
diff --git a/linalg_eo.h b/src/lib/linalg_eo.h
similarity index 100%
rename from linalg_eo.h
rename to src/lib/linalg_eo.h
diff --git a/little_D.c b/src/lib/little_D.c
similarity index 97%
rename from little_D.c
rename to src/lib/little_D.c
index 370e7583a..e923f462e 100644
--- a/little_D.c
+++ b/src/lib/little_D.c
@@ -243,7 +243,7 @@ void apply_little_D_spinor(spinor *r, spinor *s) {
 #endif
 #define _C_TYPE _Complex double
 
-#include "little_D_body.c"
+#include "little_D_body.inc"
 
 #undef _C_TYPE
 #undef _PSWITCH
@@ -259,7 +259,7 @@ void apply_little_D_spinor(spinor *r, spinor *s) {
 #endif
 #define _C_TYPE _Complex float
 
-#include "little_D_body.c"
+#include "little_D_body.inc"
 
 #undef _C_TYPE
 #undef _PSWITCH
@@ -276,11 +276,11 @@ extern int waitcount;
 void init_little_field_exchange(_Complex double *w) {
 #ifdef TM_USE_MPI
   int i = 0;
-#if (defined PARALLELT || defined PARALLELX)
+#if (defined TM_PARALLELT || defined TM_PARALLELX)
   int no_dirs = 2;
-#elif (defined PARALLELXT || defined PARALLELXY || defined PARALLELXYZ)
+#elif (defined TM_PARALLELXT || defined TM_PARALLELXY || defined TM_PARALLELXYZ)
   int no_dirs = 4;
-#elif (defined PARALLELXYT || defined PARALLELXYZT)
+#elif (defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
   int no_dirs = 6;
 #endif
   if (waitcount != 0) {
@@ -304,7 +304,7 @@ void init_little_field_exchange(_Complex double *w) {
               g_nb_list[i], i + 1, g_cart_grid, &lrequests[2 * i + 3]);
     waitcount += 4;
   }
-#ifdef PARALLELXYZT
+#ifdef TM_PARALLELXYZT
   /* send to the right, receive from the left */
   i = 6;
   MPI_Isend((void *)(w + g_N_s), g_N_s, MPI_DOUBLE_COMPLEX, g_nb_list[i], i, g_cart_grid,
diff --git a/little_D.h b/src/lib/little_D.h
similarity index 100%
rename from little_D.h
rename to src/lib/little_D.h
diff --git a/little_D_body.c b/src/lib/little_D_body.inc
similarity index 100%
rename from little_D_body.c
rename to src/lib/little_D_body.inc
diff --git a/matrix_utils.c b/src/lib/matrix_utils.c
similarity index 96%
rename from matrix_utils.c
rename to src/lib/matrix_utils.c
index d5c4198ea..63c98657b 100644
--- a/matrix_utils.c
+++ b/src/lib/matrix_utils.c
@@ -30,9 +30,8 @@
 #ifndef TM_USE_OMP
 static
 #endif
-    void
-    exponent_from_coefficients(su3 *out, _Complex double f0, _Complex double f1, _Complex double f2,
-                               su3 const *in) {
+    void exponent_from_coefficients(su3 *out, _Complex double f0, _Complex double f1,
+                                    _Complex double f2, su3 const *in) {
   su3 ALIGN tmp;
   _complex_times_su3(tmp, f2, *in);
   _su3_add_equals_complex_identity(tmp, f1);
diff --git a/matrix_utils.h b/src/lib/matrix_utils.h
similarity index 100%
rename from matrix_utils.h
rename to src/lib/matrix_utils.h
diff --git a/meas/Makefile.in b/src/lib/meas/Makefile.in
similarity index 100%
rename from meas/Makefile.in
rename to src/lib/meas/Makefile.in
diff --git a/meas/correlators.c b/src/lib/meas/correlators.c
similarity index 100%
rename from meas/correlators.c
rename to src/lib/meas/correlators.c
diff --git a/meas/correlators.h b/src/lib/meas/correlators.h
similarity index 100%
rename from meas/correlators.h
rename to src/lib/meas/correlators.h
diff --git a/meas/field_strength_types.h b/src/lib/meas/field_strength_types.h
similarity index 100%
rename from meas/field_strength_types.h
rename to src/lib/meas/field_strength_types.h
diff --git a/meas/gradient_flow.c b/src/lib/meas/gradient_flow.c
similarity index 100%
rename from meas/gradient_flow.c
rename to src/lib/meas/gradient_flow.c
diff --git a/meas/gradient_flow.h b/src/lib/meas/gradient_flow.h
similarity index 100%
rename from meas/gradient_flow.h
rename to src/lib/meas/gradient_flow.h
diff --git a/meas/measure_clover_field_strength_observables.c b/src/lib/meas/measure_clover_field_strength_observables.c
similarity index 100%
rename from meas/measure_clover_field_strength_observables.c
rename to src/lib/meas/measure_clover_field_strength_observables.c
diff --git a/meas/measure_clover_field_strength_observables.h b/src/lib/meas/measure_clover_field_strength_observables.h
similarity index 100%
rename from meas/measure_clover_field_strength_observables.h
rename to src/lib/meas/measure_clover_field_strength_observables.h
diff --git a/meas/measurements.c b/src/lib/meas/measurements.c
similarity index 100%
rename from meas/measurements.c
rename to src/lib/meas/measurements.c
diff --git a/meas/measurements.h b/src/lib/meas/measurements.h
similarity index 100%
rename from meas/measurements.h
rename to src/lib/meas/measurements.h
diff --git a/meas/oriented_plaquettes.c b/src/lib/meas/oriented_plaquettes.c
similarity index 100%
rename from meas/oriented_plaquettes.c
rename to src/lib/meas/oriented_plaquettes.c
diff --git a/meas/oriented_plaquettes.h b/src/lib/meas/oriented_plaquettes.h
similarity index 100%
rename from meas/oriented_plaquettes.h
rename to src/lib/meas/oriented_plaquettes.h
diff --git a/meas/pion_norm.c b/src/lib/meas/pion_norm.c
similarity index 100%
rename from meas/pion_norm.c
rename to src/lib/meas/pion_norm.c
diff --git a/meas/pion_norm.h b/src/lib/meas/pion_norm.h
similarity index 100%
rename from meas/pion_norm.h
rename to src/lib/meas/pion_norm.h
diff --git a/meas/polyakov_loop.c b/src/lib/meas/polyakov_loop.c
similarity index 99%
rename from meas/polyakov_loop.c
rename to src/lib/meas/polyakov_loop.c
index 9108bcb99..25deea402 100644
--- a/meas/polyakov_loop.c
+++ b/src/lib/meas/polyakov_loop.c
@@ -446,7 +446,7 @@ int polyakov_loop_dir(const int nstore /* in  */, const int dir /* in  */) {
 
   /* (1) collect contributions from different time/z slices to nodes with rank=0
      in spatial volume/space-time slices */
-#ifndef PARALLELXYZT
+#ifndef TM_PARALLELXYZT
   if (dir == 0) {
 #endif
     tmp_ray = (su3 *)calloc(VOL3, sizeof(su3)); /* */
@@ -456,7 +456,7 @@ int polyakov_loop_dir(const int nstore /* in  */, const int dir /* in  */) {
     }
 
     MPI_Reduce(tmp_loc, tmp_ray, VOL3, mpi_su3, mpi_reduce_su3_ray, 0, ray);
-#ifndef PARALLELXYZT
+#ifndef TM_PARALLELXYZT
   }
 #endif
 
@@ -475,7 +475,7 @@ int polyakov_loop_dir(const int nstore /* in  */, const int dir /* in  */) {
     ks = 0.0;
 
 #ifdef TM_USE_MPI
-#ifdef PARALLELXYZT
+#ifdef TM_PARALLELXYZT
     u = tmp_ray;
 #else
     if (dir == 0) {
@@ -502,11 +502,11 @@ int polyakov_loop_dir(const int nstore /* in  */, const int dir /* in  */) {
 #ifdef TM_USE_MPI
     MPI_Reduce(&pl_tmp, &pl, 1, MPI_DOUBLE_COMPLEX, MPI_SUM, 0, slice);
   }
-#ifndef PARALLELXYZT
+#ifndef TM_PARALLELXYZT
   if (dir == 0) {
 #endif
     free(tmp_ray);
-#ifndef PARALLELXYZT
+#ifndef TM_PARALLELXYZT
   }
 #endif
 
diff --git a/meas/polyakov_loop.h b/src/lib/meas/polyakov_loop.h
similarity index 100%
rename from meas/polyakov_loop.h
rename to src/lib/meas/polyakov_loop.h
diff --git a/measure_gauge_action.c b/src/lib/measure_gauge_action.c
similarity index 99%
rename from measure_gauge_action.c
rename to src/lib/measure_gauge_action.c
index 6a558a51b..ecbe7a888 100644
--- a/measure_gauge_action.c
+++ b/src/lib/measure_gauge_action.c
@@ -26,12 +26,10 @@
  *     Returns the value of the action
  ************************************************************************/
 
-#ifdef HAVE_CONFIG_H
-#include <tmlqcd_config.h>
-#endif
 #include <math.h>
 #include <stdio.h>
 #include <stdlib.h>
+#include <tmlqcd_config.h>
 #ifdef TM_USE_OMP
 #include <omp.h>
 #endif
diff --git a/measure_gauge_action.h b/src/lib/measure_gauge_action.h
similarity index 100%
rename from measure_gauge_action.h
rename to src/lib/measure_gauge_action.h
diff --git a/measure_rectangles.c b/src/lib/measure_rectangles.c
similarity index 100%
rename from measure_rectangles.c
rename to src/lib/measure_rectangles.c
diff --git a/measure_rectangles.h b/src/lib/measure_rectangles.h
similarity index 100%
rename from measure_rectangles.h
rename to src/lib/measure_rectangles.h
diff --git a/misc_types.h b/src/lib/misc_types.h
similarity index 99%
rename from misc_types.h
rename to src/lib/misc_types.h
index 91ceda0a8..412719dce 100644
--- a/misc_types.h
+++ b/src/lib/misc_types.h
@@ -101,7 +101,7 @@ typedef enum tm_mpi_thread_level_t {
   TM_MPI_THREAD_SINGLE = QMP_THREAD_SINGLE,
   TM_MPI_THREAD_MULTIPLE = QMP_THREAD_MULTIPLE
 } tm_mpi_thread_level_t;
-#elif TM_USE_MPI
+#elif defined(TM_USE_MPI)
 typedef enum tm_mpi_thread_level_t {
   TM_MPI_THREAD_SINGLE = MPI_THREAD_SERIALIZED,
   TM_MPI_THREAD_MULTIPLE = MPI_THREAD_MULTIPLE
diff --git a/monomial/Makefile.in b/src/lib/monomial/Makefile.in
similarity index 100%
rename from monomial/Makefile.in
rename to src/lib/monomial/Makefile.in
diff --git a/monomial/clover_trlog_monomial.c b/src/lib/monomial/clover_trlog_monomial.c
similarity index 100%
rename from monomial/clover_trlog_monomial.c
rename to src/lib/monomial/clover_trlog_monomial.c
diff --git a/monomial/clover_trlog_monomial.h b/src/lib/monomial/clover_trlog_monomial.h
similarity index 100%
rename from monomial/clover_trlog_monomial.h
rename to src/lib/monomial/clover_trlog_monomial.h
diff --git a/monomial/cloverdet_monomial.c b/src/lib/monomial/cloverdet_monomial.c
similarity index 100%
rename from monomial/cloverdet_monomial.c
rename to src/lib/monomial/cloverdet_monomial.c
diff --git a/monomial/cloverdet_monomial.h b/src/lib/monomial/cloverdet_monomial.h
similarity index 100%
rename from monomial/cloverdet_monomial.h
rename to src/lib/monomial/cloverdet_monomial.h
diff --git a/monomial/cloverdetratio_monomial.c b/src/lib/monomial/cloverdetratio_monomial.c
similarity index 100%
rename from monomial/cloverdetratio_monomial.c
rename to src/lib/monomial/cloverdetratio_monomial.c
diff --git a/monomial/cloverdetratio_monomial.h b/src/lib/monomial/cloverdetratio_monomial.h
similarity index 100%
rename from monomial/cloverdetratio_monomial.h
rename to src/lib/monomial/cloverdetratio_monomial.h
diff --git a/monomial/cloverdetratio_rwmonomial.c b/src/lib/monomial/cloverdetratio_rwmonomial.c
similarity index 100%
rename from monomial/cloverdetratio_rwmonomial.c
rename to src/lib/monomial/cloverdetratio_rwmonomial.c
diff --git a/monomial/cloverdetratio_rwmonomial.h b/src/lib/monomial/cloverdetratio_rwmonomial.h
similarity index 100%
rename from monomial/cloverdetratio_rwmonomial.h
rename to src/lib/monomial/cloverdetratio_rwmonomial.h
diff --git a/monomial/clovernd_trlog_monomial.c b/src/lib/monomial/clovernd_trlog_monomial.c
similarity index 100%
rename from monomial/clovernd_trlog_monomial.c
rename to src/lib/monomial/clovernd_trlog_monomial.c
diff --git a/monomial/clovernd_trlog_monomial.h b/src/lib/monomial/clovernd_trlog_monomial.h
similarity index 100%
rename from monomial/clovernd_trlog_monomial.h
rename to src/lib/monomial/clovernd_trlog_monomial.h
diff --git a/monomial/cloverndpoly_monomial.c b/src/lib/monomial/cloverndpoly_monomial.c
similarity index 100%
rename from monomial/cloverndpoly_monomial.c
rename to src/lib/monomial/cloverndpoly_monomial.c
diff --git a/monomial/cloverndpoly_monomial.h b/src/lib/monomial/cloverndpoly_monomial.h
similarity index 100%
rename from monomial/cloverndpoly_monomial.h
rename to src/lib/monomial/cloverndpoly_monomial.h
diff --git a/monomial/det_monomial.c b/src/lib/monomial/det_monomial.c
similarity index 100%
rename from monomial/det_monomial.c
rename to src/lib/monomial/det_monomial.c
diff --git a/monomial/det_monomial.h b/src/lib/monomial/det_monomial.h
similarity index 100%
rename from monomial/det_monomial.h
rename to src/lib/monomial/det_monomial.h
diff --git a/monomial/detratio_monomial.c b/src/lib/monomial/detratio_monomial.c
similarity index 100%
rename from monomial/detratio_monomial.c
rename to src/lib/monomial/detratio_monomial.c
diff --git a/monomial/detratio_monomial.h b/src/lib/monomial/detratio_monomial.h
similarity index 100%
rename from monomial/detratio_monomial.h
rename to src/lib/monomial/detratio_monomial.h
diff --git a/monomial/gauge_monomial.c b/src/lib/monomial/gauge_monomial.c
similarity index 100%
rename from monomial/gauge_monomial.c
rename to src/lib/monomial/gauge_monomial.c
diff --git a/monomial/gauge_monomial.h b/src/lib/monomial/gauge_monomial.h
similarity index 100%
rename from monomial/gauge_monomial.h
rename to src/lib/monomial/gauge_monomial.h
diff --git a/monomial/moment_energy.c b/src/lib/monomial/moment_energy.c
similarity index 100%
rename from monomial/moment_energy.c
rename to src/lib/monomial/moment_energy.c
diff --git a/monomial/moment_energy.h b/src/lib/monomial/moment_energy.h
similarity index 100%
rename from monomial/moment_energy.h
rename to src/lib/monomial/moment_energy.h
diff --git a/monomial/monitor_forces.c b/src/lib/monomial/monitor_forces.c
similarity index 100%
rename from monomial/monitor_forces.c
rename to src/lib/monomial/monitor_forces.c
diff --git a/monomial/monitor_forces.h b/src/lib/monomial/monitor_forces.h
similarity index 100%
rename from monomial/monitor_forces.h
rename to src/lib/monomial/monitor_forces.h
diff --git a/monomial/monomial.c b/src/lib/monomial/monomial.c
similarity index 100%
rename from monomial/monomial.c
rename to src/lib/monomial/monomial.c
diff --git a/monomial/monomial.h b/src/lib/monomial/monomial.h
similarity index 100%
rename from monomial/monomial.h
rename to src/lib/monomial/monomial.h
diff --git a/monomial/nddetratio_monomial.c b/src/lib/monomial/nddetratio_monomial.c
similarity index 100%
rename from monomial/nddetratio_monomial.c
rename to src/lib/monomial/nddetratio_monomial.c
diff --git a/monomial/nddetratio_monomial.h b/src/lib/monomial/nddetratio_monomial.h
similarity index 100%
rename from monomial/nddetratio_monomial.h
rename to src/lib/monomial/nddetratio_monomial.h
diff --git a/monomial/ndpoly_monomial.c b/src/lib/monomial/ndpoly_monomial.c
similarity index 100%
rename from monomial/ndpoly_monomial.c
rename to src/lib/monomial/ndpoly_monomial.c
diff --git a/monomial/ndpoly_monomial.h b/src/lib/monomial/ndpoly_monomial.h
similarity index 100%
rename from monomial/ndpoly_monomial.h
rename to src/lib/monomial/ndpoly_monomial.h
diff --git a/monomial/ndrat_monomial.c b/src/lib/monomial/ndrat_monomial.c
similarity index 100%
rename from monomial/ndrat_monomial.c
rename to src/lib/monomial/ndrat_monomial.c
diff --git a/monomial/ndrat_monomial.h b/src/lib/monomial/ndrat_monomial.h
similarity index 100%
rename from monomial/ndrat_monomial.h
rename to src/lib/monomial/ndrat_monomial.h
diff --git a/monomial/ndratcor_monomial.c b/src/lib/monomial/ndratcor_monomial.c
similarity index 100%
rename from monomial/ndratcor_monomial.c
rename to src/lib/monomial/ndratcor_monomial.c
diff --git a/monomial/ndratcor_monomial.h b/src/lib/monomial/ndratcor_monomial.h
similarity index 100%
rename from monomial/ndratcor_monomial.h
rename to src/lib/monomial/ndratcor_monomial.h
diff --git a/monomial/poly_monomial.c b/src/lib/monomial/poly_monomial.c
similarity index 100%
rename from monomial/poly_monomial.c
rename to src/lib/monomial/poly_monomial.c
diff --git a/monomial/poly_monomial.h b/src/lib/monomial/poly_monomial.h
similarity index 100%
rename from monomial/poly_monomial.h
rename to src/lib/monomial/poly_monomial.h
diff --git a/monomial/rat_monomial.c b/src/lib/monomial/rat_monomial.c
similarity index 100%
rename from monomial/rat_monomial.c
rename to src/lib/monomial/rat_monomial.c
diff --git a/monomial/rat_monomial.h b/src/lib/monomial/rat_monomial.h
similarity index 100%
rename from monomial/rat_monomial.h
rename to src/lib/monomial/rat_monomial.h
diff --git a/monomial/ratcor_monomial.c b/src/lib/monomial/ratcor_monomial.c
similarity index 100%
rename from monomial/ratcor_monomial.c
rename to src/lib/monomial/ratcor_monomial.c
diff --git a/monomial/ratcor_monomial.h b/src/lib/monomial/ratcor_monomial.h
similarity index 100%
rename from monomial/ratcor_monomial.h
rename to src/lib/monomial/ratcor_monomial.h
diff --git a/mpi_init.c b/src/lib/mpi_init.c
similarity index 94%
rename from mpi_init.c
rename to src/lib/mpi_init.c
index 2bbbde315..f245f0556 100644
--- a/mpi_init.c
+++ b/src/lib/mpi_init.c
@@ -25,7 +25,7 @@
 #ifdef TM_USE_MPI
 #include <mpi.h>
 #endif
-#ifdef _USE_SHMEM
+#ifdef TM_USE_SHMEM
 #include <mpp/shmem.h>
 #endif
 #include "global.h"
@@ -134,7 +134,7 @@ MPI_Datatype halffield_y_slice_gath;
 
 MPI_Datatype halffield_z_slice_cont;
 
-#if (defined PARALLELXYZT || defined PARALLELXYZ)
+#if (defined TM_PARALLELXYZT || defined TM_PARALLELXYZ)
 MPI_Datatype field_z_slice_even_dn;
 MPI_Datatype field_z_slice_even_up;
 MPI_Datatype field_z_slice_odd_dn;
@@ -188,60 +188,60 @@ void tmlqcd_mpi_init(int argc, char *argv[]) {
   }
 
 #ifdef TM_USE_MPI
-#ifdef _USE_SHMEM
+#ifdef TM_USE_SHMEM
   /* we need that the PE number in MPI_COMM_WORL  */
   /* exactly correspond to the one in g_cart_grid */
   reorder = 0;
 #endif
 
-#ifndef FIXEDVOLUME
+#ifndef TM_FIXEDVOLUME
   N_PROC_T = 0; /* the other N_PROC_? are read from input, if not constraint below */
                 /* N_PROC_T will be set by MPI_Dims_create, if not constraint below */
 #endif
 
-#if defined PARALLELT
+#if defined TM_PARALLELT
   ndims = 1;
-#ifndef FIXEDVOLUME
+#ifndef TM_FIXEDVOLUME
   N_PROC_X = 1;
   N_PROC_Y = 1;
   N_PROC_Z = 1;
 #endif
 #endif
-#if defined PARALLELX
+#if defined TM_PARALLELX
   ndims = 1;
-#ifndef FIXEDVOLUME
+#ifndef TM_FIXEDVOLUME
   N_PROC_T = 1;
   N_PROC_Y = 1;
   N_PROC_Z = 1;
 #endif
 #endif
-#if defined PARALLELXT
+#if defined TM_PARALLELXT
   ndims = 2;
-#ifndef FIXEDVOLUME
+#ifndef TM_FIXEDVOLUME
   N_PROC_Y = 1;
   N_PROC_Z = 1;
 #endif
 #endif
-#if defined PARALLELXY
+#if defined TM_PARALLELXY
   ndims = 2;
-#ifndef FIXEDVOLUME
+#ifndef TM_FIXEDVOLUME
   N_PROC_T = 1;
   N_PROC_Z = 1;
 #endif
 #endif
-#if defined PARALLELXYT
+#if defined TM_PARALLELXYT
   ndims = 3;
-#ifndef FIXEDVOLUME
+#ifndef TM_FIXEDVOLUME
   N_PROC_Z = 1;
 #endif
 #endif
-#if defined PARALLELXYZ
+#if defined TM_PARALLELXYZ
   ndims = 3;
-#ifndef FIXEDVOLUME
+#ifndef TM_FIXEDVOLUME
   N_PROC_T = 1;
 #endif
 #endif
-#if defined PARALLELXYZT
+#if defined TM_PARALLELXYZT
   ndims = 4;
 #endif
   dims[0] = N_PROC_T;
@@ -278,7 +278,7 @@ void tmlqcd_mpi_init(int argc, char *argv[]) {
     exit(-1);
   }
 
-#ifndef FIXEDVOLUME
+#ifndef TM_FIXEDVOLUME
   N_PROC_T = g_nproc_t;
   N_PROC_X = g_nproc_x;
   N_PROC_Y = g_nproc_y;
@@ -289,42 +289,42 @@ void tmlqcd_mpi_init(int argc, char *argv[]) {
   LZ = LZ / g_nproc_z;
   VOLUME = (T * LX * LY * LZ);
   SPACEVOLUME = VOLUME / T;
-#ifdef PARALLELT
+#ifdef TM_PARALLELT
   RAND = (2 * LX * LY * LZ);
   EDGES = 0;
-#elif defined PARALLELX
+#elif defined TM_PARALLELX
   RAND = (2 * T * LY * LZ);
   EDGES = 0;
-#elif defined PARALLELXT
+#elif defined TM_PARALLELXT
   RAND = 2 * LZ * (LY * LX + T * LY);
   EDGES = 4 * LZ * LY;
-#elif defined PARALLELXY
+#elif defined TM_PARALLELXY
   RAND = 2 * LZ * T * (LX + LY);
   EDGES = 4 * LZ * T;
-#elif defined PARALLELXYT
+#elif defined TM_PARALLELXYT
   RAND = 2 * LZ * (LY * LX + T * LY + T * LX);
   EDGES = 4 * LZ * (LY + T + LX);
-#elif defined PARALLELXYZ
+#elif defined TM_PARALLELXYZ
   RAND = 2 * T * (LY * LZ + LX * LZ + LX * LY);
   EDGES = 4 * T * (LX + LY + LZ);
-#elif defined PARALLELXYZT
+#elif defined TM_PARALLELXYZT
   RAND = 2 * LZ * LY * LX + 2 * LZ * T * LY + 2 * LZ * T * LX + 2 * T * LX * LY;
   EDGES = 4 * LZ * LY + 4 * LZ * T + 4 * LZ * LX + 4 * LY * T + 4 * LY * LX + 4 * T * LX;
-#else  /* ifdef PARALLELT */
+#else  /* ifdef TM_PARALLELT */
   RAND = 0;
   EDGES = 0;
-#endif /* ifdef PARALLELT */
+#endif /* ifdef TM_PARALLELT */
   /* Note that VOLUMEPLUSRAND is not always equal to VOLUME+RAND */
   /* VOLUMEPLUSRAND rather includes the edges */
   VOLUMEPLUSRAND = VOLUME + RAND + EDGES;
   SPACERAND = RAND / T;
-#endif /* ifndef FIXEDVOLUME */
+#endif /* ifndef TM_FIXEDVOLUME */
   g_dbw2rand = (RAND + 2 * EDGES);
 
-#if (defined PARALLELXYZT || defined PARALLELXYZ)
+#if (defined TM_PARALLELXYZT || defined TM_PARALLELXYZ)
   field_buffer_z = (spinor *)malloc(T * LX * LY / 2 * sizeof(spinor));
   field_buffer_z2 = (spinor *)malloc(T * LX * LY / 2 * sizeof(spinor));
-#ifdef _NON_BLOCKING
+#ifdef TM_NON_BLOCKING
   field_buffer_z3 = (spinor *)malloc(T * LX * LY / 2 * sizeof(spinor));
   field_buffer_z4 = (spinor *)malloc(T * LX * LY / 2 * sizeof(spinor));
 #endif
@@ -347,23 +347,25 @@ void tmlqcd_mpi_init(int argc, char *argv[]) {
   for (i = 0; i < 8; i++) {
     g_nb_list[i] = g_cart_id;
   }
-#if (defined PARALLELT || defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELT || defined TM_PARALLELXT || defined TM_PARALLELXYT || \
+     defined TM_PARALLELXYZT)
   MPI_Cart_shift(g_cart_grid, 0, 1, &g_nb_t_dn, &g_nb_t_up);
   g_nb_list[0] = g_nb_t_up;
   g_nb_list[1] = g_nb_t_dn;
 #endif
-#if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT || defined PARALLELX || \
-     defined PARALLELXY || defined PARALLELXYZ)
+#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT || \
+     defined TM_PARALLELX || defined TM_PARALLELXY || defined TM_PARALLELXYZ)
   MPI_Cart_shift(g_cart_grid, 1, 1, &g_nb_x_dn, &g_nb_x_up);
   g_nb_list[2] = g_nb_x_up;
   g_nb_list[3] = g_nb_x_dn;
 #endif
-#if (defined PARALLELXYT || defined PARALLELXYZT || defined PARALLELXY || defined PARALLELXYZ)
+#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT || defined TM_PARALLELXY || \
+     defined TM_PARALLELXYZ)
   MPI_Cart_shift(g_cart_grid, 2, 1, &g_nb_y_dn, &g_nb_y_up);
   g_nb_list[4] = g_nb_y_up;
   g_nb_list[5] = g_nb_y_dn;
 #endif
-#if (defined PARALLELXYZT || defined PARALLELXYZ)
+#if (defined TM_PARALLELXYZT || defined TM_PARALLELXYZ)
   MPI_Cart_shift(g_cart_grid, 3, 1, &g_nb_z_dn, &g_nb_z_up);
   g_nb_list[6] = g_nb_z_up;
   g_nb_list[7] = g_nb_z_dn;
@@ -552,7 +554,6 @@ void tmlqcd_mpi_init(int argc, char *argv[]) {
   MPI_Type_commit(&lfield_z_slice_cont32);
   MPI_Type_commit(&lfield_z_slice_gath32);
 
-
   /* The internal z_ and zt_ slices are constructed in geometry() with MPI_Type_indexed() */
 
   /* Now the derivative fields */
@@ -669,7 +670,7 @@ void tmlqcd_mpi_init(int argc, char *argv[]) {
   g_mpi_ST_rank = 0;
   g_stdio_proc = 0;
 
-#ifndef FIXEDVOLUME
+#ifndef TM_FIXEDVOLUME
   T = T_global;
   VOLUME = (T * LX * LY * LZ);
   SPACEVOLUME = VOLUME / T;
@@ -687,7 +688,7 @@ void tmlqcd_mpi_init(int argc, char *argv[]) {
 
   /* Here we perform some checks in order not to */
   /* run into trouble later                      */
-#if (defined PARALLELXYZT || defined PARALLELXYZ)
+#if (defined TM_PARALLELXYZT || defined TM_PARALLELXYZ)
   if ((T * LX * LY) % 2 != 0 && even_odd_flag == 1) {
     fprintf(stderr, "T*LX*LY must be even!\nAborting prgram...\n");
 #ifdef TM_USE_MPI
diff --git a/mpi_init.h b/src/lib/mpi_init.h
similarity index 97%
rename from mpi_init.h
rename to src/lib/mpi_init.h
index dce6dfad7..c6e816946 100644
--- a/mpi_init.h
+++ b/src/lib/mpi_init.h
@@ -107,9 +107,8 @@ extern MPI_Datatype halffield_y_slice_cont;
 extern MPI_Datatype halffield_y_slice_gath;
 extern MPI_Datatype halffield_z_slice_cont;
 
-
-#if (defined PARALLELT || defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT || \
-     defined PARALLELXYZ)
+#if (defined TM_PARALLELT || defined TM_PARALLELXT || defined TM_PARALLELXYT || \
+     defined TM_PARALLELXYZT || defined TM_PARALLELXYZ)
 extern MPI_Datatype field_z_slice_even_dn;
 extern MPI_Datatype field_z_slice_even_up;
 extern MPI_Datatype field_z_slice_odd_dn;
diff --git a/omp_accumulator.h b/src/lib/omp_accumulator.h
similarity index 100%
rename from omp_accumulator.h
rename to src/lib/omp_accumulator.h
diff --git a/operator.c b/src/lib/operator.c
similarity index 99%
rename from operator.c
rename to src/lib/operator.c
index 6b6a94df2..e15a97701 100644
--- a/operator.c
+++ b/src/lib/operator.c
@@ -63,7 +63,7 @@
 #ifdef TM_USE_QUDA
 #include "quda_interface.h"
 #endif
-#ifdef DDalphaAMG
+#ifdef TM_USE_DDalphaAMG
 #include "DDalphaAMG_interface.h"
 #endif
 
diff --git a/operator.h b/src/lib/operator.h
similarity index 100%
rename from operator.h
rename to src/lib/operator.h
diff --git a/operator/Block_D_psi_body.c b/src/lib/operator/Block_D_psi_body.inc
similarity index 100%
rename from operator/Block_D_psi_body.c
rename to src/lib/operator/Block_D_psi_body.inc
diff --git a/operator/D_psi.c b/src/lib/operator/D_psi.c
similarity index 98%
rename from operator/D_psi.c
rename to src/lib/operator/D_psi.c
index 750fe67e8..ab93cb8fd 100644
--- a/operator/D_psi.c
+++ b/src/lib/operator/D_psi.c
@@ -54,7 +54,7 @@
 #define _PSWITCH(s) s##_32
 #define _PTSWITCH(s) s##32
 
-#include "D_psi_body.c"
+#include "D_psi_body.inc"
 
 #undef _C_TYPE
 #undef _F_TYPE
@@ -66,7 +66,7 @@
 #define _PSWITCH(s) s
 #define _PTSWITCH(s) s
 
-#include "D_psi_body.c"
+#include "D_psi_body.inc"
 
 #undef _C_TYPE
 #undef _F_TYPE
@@ -92,7 +92,7 @@ void D_psi_prec(spinor *const P, spinor *const Q) {
 #define _PSWITCH(s) s##_32
 #define _PTSWITCH(s) s##32
 
-#include "Block_D_psi_body.c"
+#include "Block_D_psi_body.inc"
 
 #undef _F_TYPE
 #undef _C_TYPE
@@ -104,7 +104,7 @@ void D_psi_prec(spinor *const P, spinor *const Q) {
 #define _PSWITCH(s) s
 #define _PTSWITCH(s) s
 
-#include "Block_D_psi_body.c"
+#include "Block_D_psi_body.inc"
 
 #undef _F_TYPE
 #undef _C_TYPE
diff --git a/operator/D_psi.h b/src/lib/operator/D_psi.h
similarity index 100%
rename from operator/D_psi.h
rename to src/lib/operator/D_psi.h
diff --git a/operator/D_psi_body.c b/src/lib/operator/D_psi_body.inc
similarity index 99%
rename from operator/D_psi_body.c
rename to src/lib/operator/D_psi_body.inc
index 06bde0fc5..f73822776 100644
--- a/operator/D_psi_body.c
+++ b/src/lib/operator/D_psi_body.inc
@@ -283,7 +283,7 @@ void _PSWITCH(D_psi)(_PTSWITCH(spinor) *const P, _PTSWITCH(spinor) *const Q) {
   _C_TYPE ALIGN32 phase_2l = (_C_TYPE)phase_2;
   _C_TYPE ALIGN32 phase_3l = (_C_TYPE)phase_3;
 
-#ifdef _GAUGE_COPY
+#ifdef TM_USE_GAUGE_COPY
   if (_PSWITCH(g_update_gauge_copy)) {
     _PSWITCH(update_backward_gauge)(_PSWITCH(g_gauge_field));
   }
diff --git a/operator/Dov_proj.c b/src/lib/operator/Dov_proj.c
similarity index 100%
rename from operator/Dov_proj.c
rename to src/lib/operator/Dov_proj.c
diff --git a/operator/Dov_proj.h b/src/lib/operator/Dov_proj.h
similarity index 100%
rename from operator/Dov_proj.h
rename to src/lib/operator/Dov_proj.h
diff --git a/operator/Dov_psi.c b/src/lib/operator/Dov_psi.c
similarity index 100%
rename from operator/Dov_psi.c
rename to src/lib/operator/Dov_psi.c
diff --git a/operator/Dov_psi.h b/src/lib/operator/Dov_psi.h
similarity index 100%
rename from operator/Dov_psi.h
rename to src/lib/operator/Dov_psi.h
diff --git a/operator/Hopping_Matrix.c b/src/lib/operator/Hopping_Matrix.c
similarity index 87%
rename from operator/Hopping_Matrix.c
rename to src/lib/operator/Hopping_Matrix.c
index a8da9e810..d25c3d961 100644
--- a/operator/Hopping_Matrix.c
+++ b/src/lib/operator/Hopping_Matrix.c
@@ -38,11 +38,11 @@
  *
  *  Structure of top level precompiler directives
  *
- * - defining _USE_HALFSPINOR implies that we also use
+ * - defining TM_USE_HALFSPINOR implies that we also use
  *   a "gauge copy"
  *
  * - such that we are checking for the _USE_GAUGECOPY feature seperatly in the
- *   ELSE branch of the "if defined _USE_HALFSPINOR" statement
+ *   ELSE branch of the "if defined TM_USE_HALFSPINOR" statement
  *
  ****************************************************************/
 
@@ -64,11 +64,11 @@
 #include "operator/Hopping_Matrix.h"
 #include "update_backward_gauge.h"
 
-#if defined _USE_HALFSPINOR
+#if defined TM_USE_HALFSPINOR
 #include "operator/halfspinor_hopping.h"
 
 void Hopping_Matrix(const int ieo, spinor* const l, spinor* const k) {
-#ifdef _GAUGE_COPY
+#ifdef TM_USE_GAUGE_COPY
   if (g_update_gauge_copy) {
     update_backward_gauge(g_gauge_field);
   }
@@ -80,7 +80,7 @@ void Hopping_Matrix(const int ieo, spinor* const l, spinor* const k) {
     su3* restrict u0 ALIGN;
 #endif
 
-#include "operator/halfspinor_body.c"
+#include "operator/halfspinor_body.inc"
 
 #ifdef TM_USE_OMP
   } /* OpenMP closing brace */
@@ -88,10 +88,10 @@ void Hopping_Matrix(const int ieo, spinor* const l, spinor* const k) {
   return;
 }
 
-#else /* thats _USE_HALFSPINOR */
+#else /* thats TM_USE_HALFSPINOR */
 void Hopping_Matrix(const int ieo, spinor* const l, spinor* const k) {
 
-#ifdef _GAUGE_COPY
+#ifdef TM_USE_GAUGE_COPY
   if (g_update_gauge_copy) {
     update_backward_gauge(g_gauge_field);
   }
@@ -106,7 +106,8 @@ void Hopping_Matrix(const int ieo, spinor* const l, spinor* const k) {
   {
 #endif
 
-#include "operator/hopping_body_dbl.c"
+#include "hopping.h"
+#include "operator/hopping_body_dbl.inc"
 
 #ifdef TM_USE_OMP
   } /* OpenMP closing brace */
@@ -114,4 +115,4 @@ void Hopping_Matrix(const int ieo, spinor* const l, spinor* const k) {
   return;
 }
 
-#endif /* thats _USE_HALFSPINOR */
+#endif /* thats TM_USE_HALFSPINOR */
diff --git a/operator/Hopping_Matrix.h b/src/lib/operator/Hopping_Matrix.h
similarity index 100%
rename from operator/Hopping_Matrix.h
rename to src/lib/operator/Hopping_Matrix.h
diff --git a/operator/Hopping_Matrix_32.c b/src/lib/operator/Hopping_Matrix_32.c
similarity index 91%
rename from operator/Hopping_Matrix_32.c
rename to src/lib/operator/Hopping_Matrix_32.c
index d1fbe78c7..06fd33efb 100644
--- a/operator/Hopping_Matrix_32.c
+++ b/src/lib/operator/Hopping_Matrix_32.c
@@ -39,11 +39,11 @@
  *
  *  Structure of top level precompiler directives
  *
- * - defining _USE_HALFSPINOR implies that we also use
+ * - defining TM_USE_HALFSPINOR implies that we also use
  *   a "gauge copy"
  *
  * - such that we are checking for the _USE_GAUGECOPY feature seperatly in the
- *   ELSE branch of the "if defined _USE_HALFSPINOR" statement
+ *   ELSE branch of the "if defined TM_USE_HALFSPINOR" statement
  *
  ****************************************************************/
 
@@ -63,16 +63,16 @@
 #endif
 #include "boundary.h"
 #include "init/init_dirac_halfspinor.h"
-#include "update_backward_gauge.h"
 #include "operator/Hopping_Matrix_32.h"
+#include "update_backward_gauge.h"
 
-#if defined _USE_HALFSPINOR
+#if defined TM_USE_HALFSPINOR
 #include "operator/halfspinor_hopping_32.h"
 #endif
 
 void Hopping_Matrix_32_orphaned(const int ieo, spinor32* const l, spinor32* const k) {
-#if defined _USE_HALFSPINOR
-#ifdef _GAUGE_COPY
+#if defined TM_USE_HALFSPINOR
+#ifdef TM_USE_GAUGE_COPY
   if (g_update_gauge_copy_32) {
     update_backward_gauge_32_orphaned(g_gauge_field_32);
   }
@@ -82,7 +82,7 @@ void Hopping_Matrix_32_orphaned(const int ieo, spinor32* const l, spinor32* cons
   su3_32* restrict u0 ALIGN32;
 #endif
 
-#include "operator/halfspinor_body_32.c"
+#include "operator/halfspinor_body_32.inc"
 #else
   printf("Error: Single precision Matrix only implemented with HALFSPINOR\n");
   exit(200);
diff --git a/operator/Hopping_Matrix_32.h b/src/lib/operator/Hopping_Matrix_32.h
similarity index 100%
rename from operator/Hopping_Matrix_32.h
rename to src/lib/operator/Hopping_Matrix_32.h
diff --git a/operator/Hopping_Matrix_32_nocom.c b/src/lib/operator/Hopping_Matrix_32_nocom.c
similarity index 100%
rename from operator/Hopping_Matrix_32_nocom.c
rename to src/lib/operator/Hopping_Matrix_32_nocom.c
diff --git a/operator/Hopping_Matrix_nocom.c b/src/lib/operator/Hopping_Matrix_nocom.c
similarity index 97%
rename from operator/Hopping_Matrix_nocom.c
rename to src/lib/operator/Hopping_Matrix_nocom.c
index dce8ad591..00c34c38b 100644
--- a/operator/Hopping_Matrix_nocom.c
+++ b/src/lib/operator/Hopping_Matrix_nocom.c
@@ -48,8 +48,5 @@
 
 #define Hopping_Matrix Hopping_Matrix_nocom
 #define _NO_COMM 1
-#ifdef _KOJAK_INST
-#undef _KOJAK_INST
-#endif
 
 #include "Hopping_Matrix.c"
diff --git a/operator/Hopping_Matrix_nocom.h b/src/lib/operator/Hopping_Matrix_nocom.h
similarity index 100%
rename from operator/Hopping_Matrix_nocom.h
rename to src/lib/operator/Hopping_Matrix_nocom.h
diff --git a/operator/Makefile.in b/src/lib/operator/Makefile.in
similarity index 100%
rename from operator/Makefile.in
rename to src/lib/operator/Makefile.in
diff --git a/operator/assign_mul_one_sw_pm_imu_inv_block_body.c b/src/lib/operator/assign_mul_one_sw_pm_imu_inv_block_body.inc
similarity index 100%
rename from operator/assign_mul_one_sw_pm_imu_inv_block_body.c
rename to src/lib/operator/assign_mul_one_sw_pm_imu_inv_block_body.inc
diff --git a/operator/assign_mul_one_sw_pm_imu_site_lexic_body.c b/src/lib/operator/assign_mul_one_sw_pm_imu_site_lexic_body.inc
similarity index 100%
rename from operator/assign_mul_one_sw_pm_imu_site_lexic_body.c
rename to src/lib/operator/assign_mul_one_sw_pm_imu_site_lexic_body.inc
diff --git a/operator/clover_accumulate_deriv.c b/src/lib/operator/clover_accumulate_deriv.c
similarity index 100%
rename from operator/clover_accumulate_deriv.c
rename to src/lib/operator/clover_accumulate_deriv.c
diff --git a/operator/clover_deriv.c b/src/lib/operator/clover_deriv.c
similarity index 100%
rename from operator/clover_deriv.c
rename to src/lib/operator/clover_deriv.c
diff --git a/operator/clover_det.c b/src/lib/operator/clover_det.c
similarity index 100%
rename from operator/clover_det.c
rename to src/lib/operator/clover_det.c
diff --git a/operator/clover_inline.h b/src/lib/operator/clover_inline.h
similarity index 100%
rename from operator/clover_inline.h
rename to src/lib/operator/clover_inline.h
diff --git a/operator/clover_invert.c b/src/lib/operator/clover_invert.c
similarity index 100%
rename from operator/clover_invert.c
rename to src/lib/operator/clover_invert.c
diff --git a/operator/clover_leaf.c b/src/lib/operator/clover_leaf.c
similarity index 100%
rename from operator/clover_leaf.c
rename to src/lib/operator/clover_leaf.c
diff --git a/operator/clover_leaf.h b/src/lib/operator/clover_leaf.h
similarity index 100%
rename from operator/clover_leaf.h
rename to src/lib/operator/clover_leaf.h
diff --git a/operator/clover_term.c b/src/lib/operator/clover_term.c
similarity index 100%
rename from operator/clover_term.c
rename to src/lib/operator/clover_term.c
diff --git a/operator/clovertm_operators.c b/src/lib/operator/clovertm_operators.c
similarity index 99%
rename from operator/clovertm_operators.c
rename to src/lib/operator/clovertm_operators.c
index fe328ac00..b6221b78b 100644
--- a/operator/clovertm_operators.c
+++ b/src/lib/operator/clovertm_operators.c
@@ -64,7 +64,7 @@ su3 ***sw_inv;
 #define _PTSWITCH(s) s
 #define _PSWITCH(s) s
 
-#include "assign_mul_one_sw_pm_imu_site_lexic_body.c"
+#include "assign_mul_one_sw_pm_imu_site_lexic_body.inc"
 
 #undef _F_TYPE
 #undef _PSWITCH
@@ -74,7 +74,7 @@ su3 ***sw_inv;
 #define _PTSWITCH(s) s##32
 #define _PSWITCH(s) s##_32
 
-#include "assign_mul_one_sw_pm_imu_site_lexic_body.c"
+#include "assign_mul_one_sw_pm_imu_site_lexic_body.inc"
 
 #undef _F_TYPE
 #undef _PSWITCH
@@ -1039,7 +1039,7 @@ void assign_mul_one_sw_pm_imu_eps(const int ieo, spinor *const k_s, spinor *cons
 #define _PSWITCH(s) s
 #define _PTSWITCH(s) s
 
-#include "assign_mul_one_sw_pm_imu_inv_block_body.c"
+#include "assign_mul_one_sw_pm_imu_inv_block_body.inc"
 
 #undef _F_TYPE
 #undef _PSWITCH
@@ -1049,7 +1049,7 @@ void assign_mul_one_sw_pm_imu_eps(const int ieo, spinor *const k_s, spinor *cons
 #define _PSWITCH(s) s##_32
 #define _PTSWITCH(s) s##32
 
-#include "assign_mul_one_sw_pm_imu_inv_block_body.c"
+#include "assign_mul_one_sw_pm_imu_inv_block_body.inc"
 
 #undef _F_TYPE
 #undef _PSWITCH
diff --git a/operator/clovertm_operators.h b/src/lib/operator/clovertm_operators.h
similarity index 100%
rename from operator/clovertm_operators.h
rename to src/lib/operator/clovertm_operators.h
diff --git a/operator/clovertm_operators_32.c b/src/lib/operator/clovertm_operators_32.c
similarity index 100%
rename from operator/clovertm_operators_32.c
rename to src/lib/operator/clovertm_operators_32.c
diff --git a/operator/clovertm_operators_32.h b/src/lib/operator/clovertm_operators_32.h
similarity index 100%
rename from operator/clovertm_operators_32.h
rename to src/lib/operator/clovertm_operators_32.h
diff --git a/operator/halfspinor_body.c b/src/lib/operator/halfspinor_body.inc
similarity index 96%
rename from operator/halfspinor_body.c
rename to src/lib/operator/halfspinor_body.inc
index 542292b1d..8286c89f3 100644
--- a/operator/halfspinor_body.c
+++ b/src/lib/operator/halfspinor_body.inc
@@ -30,10 +30,6 @@ halfspinor* restrict* phi ALIGN;
 halfspinor32* restrict* phi32 ALIGN;
 _declare_hregs();
 
-#ifdef _KOJAK_INST
-#pragma pomp inst begin(hoppingmatrix)
-#endif
-
 #ifndef TM_USE_OMP
 s = k;
 _prefetch_spinor(s);
@@ -103,7 +99,7 @@ if (g_sloppy_precision == 1 && g_sloppy_precision_flag == 1) {
 #endif
 
 #if (defined TM_USE_MPI && !defined _NO_COMM)
-  xchange_halffield32();
+    xchange_halffield32();
 #endif
 
 #ifdef TM_USE_OMP
@@ -237,7 +233,7 @@ if (g_sloppy_precision == 1 && g_sloppy_precision_flag == 1) {
 #endif
 
 #if (defined TM_USE_MPI && !defined _NO_COMM)
-  xchange_halffield();
+    xchange_halffield();
 #endif
 
 #ifdef TM_USE_OMP
@@ -320,6 +316,3 @@ if (g_sloppy_precision == 1 && g_sloppy_precision_flag == 1) {
 #endif
   }
 }
-#ifdef _KOJAK_INST
-#pragma pomp inst end(hoppingmatrix)
-#endif
diff --git a/operator/halfspinor_body_32.c b/src/lib/operator/halfspinor_body_32.inc
similarity index 98%
rename from operator/halfspinor_body_32.c
rename to src/lib/operator/halfspinor_body_32.inc
index a8022382c..c1f9a25a1 100644
--- a/operator/halfspinor_body_32.c
+++ b/src/lib/operator/halfspinor_body_32.inc
@@ -1,6 +1,6 @@
 /**********************************************************************
  * single precision version Copyright (C) 2013 Florian Burger
- * based on halfspinor_body.c by Carsten Urbach
+ * based on halfspinor_body.inc by Carsten Urbach
  *
  * This file is based on an implementation of the Dirac operator
  * written by Martin Luescher, modified by Martin Hasenbusch in 2002
diff --git a/operator/halfspinor_hopping.h b/src/lib/operator/halfspinor_hopping.h
similarity index 100%
rename from operator/halfspinor_hopping.h
rename to src/lib/operator/halfspinor_hopping.h
diff --git a/operator/halfspinor_hopping_32.h b/src/lib/operator/halfspinor_hopping_32.h
similarity index 100%
rename from operator/halfspinor_hopping_32.h
rename to src/lib/operator/halfspinor_hopping_32.h
diff --git a/src/lib/operator/hopping.h b/src/lib/operator/hopping.h
new file mode 100644
index 000000000..5027f1976
--- /dev/null
+++ b/src/lib/operator/hopping.h
@@ -0,0 +1,153 @@
+/**********************************************************************
+ *
+ * Copyright (C) 2012 Carsten Urbach
+ *
+ * BG and halfspinor versions (C) 2007, 2008 Carsten Urbach
+ *
+ * This file is based on an implementation of the Dirac operator
+ * written by Martin Luescher, modified by Martin Hasenbusch in 2002
+ * and modified and extended by Carsten Urbach from 2003-2008
+ *
+ * This file is part of tmLQCD.
+ *
+ * tmLQCD is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * tmLQCD is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ **********************************************************************/
+
+#ifndef _HOPPING_H
+#define _HOPPING_H
+
+#define _declare_regs()	     \
+  su3_vector ALIGN psi, chi; \
+  spinor ALIGN temp;
+
+#define _hop_t_p()				\
+  _vector_add(psi,sp->s0,sp->s2);		\
+  _su3_multiply(chi,(*up),psi);			\
+  _complex_times_vector(psi,ka0,chi);		\
+  _vector_assign(temp.s0,psi);			\
+  _vector_assign(temp.s2,psi);			\
+  _vector_add(psi,sp->s1,sp->s3);		\
+  _su3_multiply(chi,(*up),psi);			\
+  _complex_times_vector(psi,ka0,chi);		\
+  _vector_assign(temp.s1,psi);			\
+  _vector_assign(temp.s3,psi);
+  
+#define _hop_t_m()				\
+  _vector_sub(psi,sm->s0,sm->s2);		\
+  _su3_inverse_multiply(chi,(*um),psi);		\
+  _complexcjg_times_vector(psi,ka0,chi);	\
+  _vector_add_assign(temp.s0,psi);		\
+  _vector_sub_assign(temp.s2,psi);		\
+  _vector_sub(psi,sm->s1,sm->s3);		\
+  _su3_inverse_multiply(chi,(*um),psi);		\
+  _complexcjg_times_vector(psi,ka0,chi);	\
+  _vector_add_assign(temp.s1,psi);		\
+  _vector_sub_assign(temp.s3,psi);
+
+#define _hop_x_p()				\
+  _vector_i_add(psi,sp->s0,sp->s3);		\
+  _su3_multiply(chi,(*up),psi);			\
+  _complex_times_vector(psi,ka1,chi);		\
+  _vector_add_assign(temp.s0,psi);		\
+  _vector_i_sub_assign(temp.s3,psi);		\
+  _vector_i_add(psi,sp->s1,sp->s2);		\
+  _su3_multiply(chi,(*up),psi);			\
+  _complex_times_vector(psi,ka1,chi);		\
+  _vector_add_assign(temp.s1,psi);		\
+  _vector_i_sub_assign(temp.s2,psi);
+
+#define _hop_x_m()				\
+  _vector_i_sub(psi,sm->s0,sm->s3);		\
+  _su3_inverse_multiply(chi,(*um),psi);		\
+  _complexcjg_times_vector(psi,ka1,chi);	\
+  _vector_add_assign(temp.s0,psi);		\
+  _vector_i_add_assign(temp.s3,psi);		\
+  _vector_i_sub(psi,sm->s1,sm->s2);		\
+  _su3_inverse_multiply(chi,(*um),psi);		\
+  _complexcjg_times_vector(psi,ka1,chi);	\
+  _vector_add_assign(temp.s1,psi);		\
+  _vector_i_add_assign(temp.s2,psi);
+
+#define _hop_y_p()				\
+  _vector_add(psi,sp->s0,sp->s3);		\
+  _su3_multiply(chi,(*up),psi);			\
+  _complex_times_vector(psi,ka2,chi);		\
+  _vector_add_assign(temp.s0,psi);		\
+  _vector_add_assign(temp.s3,psi);		\
+  _vector_sub(psi,sp->s1,sp->s2);		\
+  _su3_multiply(chi,(*up),psi);			\
+  _complex_times_vector(psi,ka2,chi);		\
+  _vector_add_assign(temp.s1,psi);		\
+  _vector_sub_assign(temp.s2,psi);
+
+#define _hop_y_m()				\
+  _vector_sub(psi,sm->s0,sm->s3);		\
+  _su3_inverse_multiply(chi,(*um),psi);		\
+  _complexcjg_times_vector(psi,ka2,chi);	\
+  _vector_add_assign(temp.s0,psi);		\
+  _vector_sub_assign(temp.s3,psi);		\
+  _vector_add(psi,sm->s1,sm->s2);		\
+  _su3_inverse_multiply(chi,(*um),psi);		\
+  _complexcjg_times_vector(psi,ka2,chi);	\
+  _vector_add_assign(temp.s1,psi);		\
+  _vector_add_assign(temp.s2,psi);
+
+#define _hop_z_p()				\
+  _vector_i_add(psi,sp->s0,sp->s2);		\
+  _su3_multiply(chi,(*up),psi);			\
+  _complex_times_vector(psi,ka3,chi);		\
+  _vector_add_assign(temp.s0,psi);		\
+  _vector_i_sub_assign(temp.s2,psi);		\
+  _vector_i_sub(psi,sp->s1,sp->s3);		\
+  _su3_multiply(chi,(*up),psi);			\
+  _complex_times_vector(psi,ka3,chi);		\
+  _vector_add_assign(temp.s1,psi);		\
+  _vector_i_add_assign(temp.s3,psi);
+
+#define _hop_z_m()				\
+  _vector_i_sub(psi,sm->s0,sm->s2);		\
+  _su3_inverse_multiply(chi,(*um),psi);		\
+  _complexcjg_times_vector(psi,ka3,chi);	\
+  _vector_add_assign(temp.s0, psi);		\
+  _vector_i_add_assign(temp.s2, psi);		\
+  _vector_i_add(psi,sm->s1,sm->s3);		\
+  _su3_inverse_multiply(chi,(*um),psi);		\
+  _complexcjg_times_vector(psi,ka3,chi);	\
+  _vector_add_assign(temp.s1, psi);		\
+  _vector_i_sub_assign(temp.s3, psi);
+
+#define _hop_mul_g5_cmplx_and_store()			\
+  _complex_times_vector(rn->s0, cfactor, temp.s0);	\
+  _complex_times_vector(rn->s1, cfactor, temp.s1);	\
+  _complexcjg_times_vector(rn->s2, cfactor, temp.s2);	\
+  _complexcjg_times_vector(rn->s3, cfactor, temp.s3);
+
+#define _g5_cmplx_sub_hop_and_g5store()			\
+  _complex_times_vector(psi, cfactor, pn->s0);		\
+  _vector_sub(rn->s0, psi, temp.s0);			\
+  _complex_times_vector(chi, cfactor, pn->s1);		\
+  _vector_sub(rn->s1, chi, temp.s1);			\
+  _complexcjg_times_vector(psi, cfactor, pn->s2);	\
+  _vector_sub(rn->s2, temp.s2, psi);			\
+  _complexcjg_times_vector(chi, cfactor, pn->s3);	\
+  _vector_sub(rn->s3, temp.s3, chi);
+
+#define _store_res()				\
+  _vector_assign(rn->s0, temp.s0);		\
+  _vector_assign(rn->s1, temp.s1);		\
+  _vector_assign(rn->s2, temp.s2);		\
+  _vector_assign(rn->s3, temp.s3);
+
+#endif
diff --git a/operator/hopping_body_dbl.c b/src/lib/operator/hopping_body_dbl.inc
similarity index 91%
rename from operator/hopping_body_dbl.c
rename to src/lib/operator/hopping_body_dbl.inc
index 02edbaec7..41682fd31 100644
--- a/operator/hopping_body_dbl.c
+++ b/src/lib/operator/hopping_body_dbl.inc
@@ -23,6 +23,7 @@
  * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
  *
  **********************************************************************/
+#include "hopping.h"
 
 #include "hopping.h"
 
@@ -45,7 +46,7 @@ if (ieo == 0) {
 #ifndef TM_USE_OMP
 hi = &g_hi[16 * ioff];
 
-#if ((defined _GAUGE_COPY))
+#if ((defined TM_USE_GAUGE_COPY))
 up = &g_gauge_field_copy[ioff][0];
 #else
 up = &g_gauge_field[(*hi)][0];
@@ -62,7 +63,7 @@ hi++;
 for (int icx = ioff; icx < (VOLUME / 2 + ioff); icx++) {
 #ifdef TM_USE_OMP
   hi = &g_hi[16 * icx];
-#if ((defined _GAUGE_COPY))
+#if ((defined TM_USE_GAUGE_COPY))
   up = &g_gauge_field_copy[icx][0];
 #else
   up = &g_gauge_field[(*hi)][0];
@@ -76,7 +77,7 @@ for (int icx = ioff; icx < (VOLUME / 2 + ioff); icx++) {
   pn = p + (icx - ioff);
 #endif
   /*********************** direction +t ************************/
-#if (!defined _GAUGE_COPY)
+#if (!defined TM_USE_GAUGE_COPY)
   um = &g_gauge_field[(*hi)][0];
 #else
   um = up + 1;
@@ -88,7 +89,7 @@ for (int icx = ioff; icx < (VOLUME / 2 + ioff); icx++) {
   _hop_t_p();
 
   /*********************** direction -t ************************/
-#if ((defined _GAUGE_COPY))
+#if ((defined TM_USE_GAUGE_COPY))
   up = um + 1;
 #else
   up += 1;
@@ -99,7 +100,7 @@ for (int icx = ioff; icx < (VOLUME / 2 + ioff); icx++) {
   _hop_t_m();
 
   /*********************** direction +1 ************************/
-#ifndef _GAUGE_COPY
+#ifndef TM_USE_GAUGE_COPY
   um = &g_gauge_field[(*hi)][1];
 #else
   um = up + 1;
@@ -111,7 +112,7 @@ for (int icx = ioff; icx < (VOLUME / 2 + ioff); icx++) {
   _hop_x_p();
 
   /*********************** direction -1 ************************/
-#if ((defined _GAUGE_COPY))
+#if ((defined TM_USE_GAUGE_COPY))
   up = um + 1;
 #else
   up += 1;
@@ -122,7 +123,7 @@ for (int icx = ioff; icx < (VOLUME / 2 + ioff); icx++) {
   _hop_x_m();
 
   /*********************** direction +2 ************************/
-#ifndef _GAUGE_COPY
+#ifndef TM_USE_GAUGE_COPY
   um = &g_gauge_field[(*hi)][2];
 #else
   um = up + 1;
@@ -134,7 +135,7 @@ for (int icx = ioff; icx < (VOLUME / 2 + ioff); icx++) {
   _hop_y_p();
 
   /*********************** direction -2 ************************/
-#if ((defined _GAUGE_COPY))
+#if ((defined TM_USE_GAUGE_COPY))
   up = um + 1;
 #else
   up += 1;
@@ -145,7 +146,7 @@ for (int icx = ioff; icx < (VOLUME / 2 + ioff); icx++) {
   _hop_y_m();
 
   /*********************** direction +3 ************************/
-#ifndef _GAUGE_COPY
+#ifndef TM_USE_GAUGE_COPY
   um = &g_gauge_field[(*hi)][3];
 #else
   um = up + 1;
@@ -158,7 +159,7 @@ for (int icx = ioff; icx < (VOLUME / 2 + ioff); icx++) {
 
   /*********************** direction -3 ************************/
 #ifndef TM_USE_OMP
-#if ((defined _GAUGE_COPY))
+#if ((defined TM_USE_GAUGE_COPY))
   up = um + 1;
 #else
   up = &g_gauge_field[(*hi)][0];
diff --git a/operator/hopping_sgl.c b/src/lib/operator/hopping_sgl.c
similarity index 96%
rename from operator/hopping_sgl.c
rename to src/lib/operator/hopping_sgl.c
index 5067ab13d..487bfc47f 100644
--- a/operator/hopping_sgl.c
+++ b/src/lib/operator/hopping_sgl.c
@@ -37,7 +37,7 @@ void Hopping_Matrix(int ieo, spinor32* const l, spinor32* const k) {
   spinor32* restrict r, * restrict sp, * restrict sm;
   spinor32 temp;
 
-#ifdef _GAUGE_COPY
+#ifdef TM_USE_GAUGE_COPY
   if (g_update_gauge_copy) {
     update_backward_gauge();
   }
@@ -72,7 +72,7 @@ void Hopping_Matrix(int ieo, spinor32* const l, spinor32* const k) {
     icy = g_lexic2eosub[iy];
 
     sp = k + icy;
-#if ((defined _GAUGE_COPY))
+#if ((defined TM_USE_GAUGE_COPY))
     up = &g_gauge_field_copy[icx][0];
 #else
     up = &g_gauge_field[ix][0];
@@ -100,7 +100,7 @@ void Hopping_Matrix(int ieo, spinor32* const l, spinor32* const k) {
     icy = g_lexic2eosub[iy];
 
     sm = k + icy;
-#if ((defined _GAUGE_COPY))
+#if ((defined TM_USE_GAUGE_COPY))
     um = up + 1;
 #else
     um = &g_gauge_field[iy][0];
@@ -129,7 +129,7 @@ void Hopping_Matrix(int ieo, spinor32* const l, spinor32* const k) {
 
     sp = k + icy;
 
-#if ((defined _GAUGE_COPY))
+#if ((defined TM_USE_GAUGE_COPY))
     up = um + 1;
 #else
     up += 1;
@@ -157,7 +157,7 @@ void Hopping_Matrix(int ieo, spinor32* const l, spinor32* const k) {
     icy = g_lexic2eosub[iy];
 
     sm = k + icy;
-#ifndef _GAUGE_COPY
+#ifndef TM_USE_GAUGE_COPY
     um = &g_gauge_field[iy][1];
 #else
     um = up + 1;
@@ -185,7 +185,7 @@ void Hopping_Matrix(int ieo, spinor32* const l, spinor32* const k) {
     icy = g_lexic2eosub[iy];
 
     sp = k + icy;
-#if ((defined _GAUGE_COPY))
+#if ((defined TM_USE_GAUGE_COPY))
     up = um + 1;
 #else
     up += 1;
@@ -212,7 +212,7 @@ void Hopping_Matrix(int ieo, spinor32* const l, spinor32* const k) {
     icy = g_lexic2eosub[iy];
 
     sm = k + icy;
-#ifndef _GAUGE_COPY
+#ifndef TM_USE_GAUGE_COPY
     um = &g_gauge_field[iy][2];
 #else
     um = up + 1;
@@ -240,7 +240,7 @@ void Hopping_Matrix(int ieo, spinor32* const l, spinor32* const k) {
     icy = g_lexic2eosub[iy];
 
     sp = k + icy;
-#if ((defined _GAUGE_COPY))
+#if ((defined TM_USE_GAUGE_COPY))
     up = um + 1;
 #else
     up += 1;
@@ -267,7 +267,7 @@ void Hopping_Matrix(int ieo, spinor32* const l, spinor32* const k) {
     icy = g_lexic2eosub[iy];
 
     sm = k + icy;
-#ifndef _GAUGE_COPY
+#ifndef TM_USE_GAUGE_COPY
     um = &g_gauge_field[iy][3];
 #else
     um = up + 1;
diff --git a/operator/mul_one_pm_imu_inv_body.c b/src/lib/operator/mul_one_pm_imu_inv_body.inc
similarity index 100%
rename from operator/mul_one_pm_imu_inv_body.c
rename to src/lib/operator/mul_one_pm_imu_inv_body.inc
diff --git a/operator/mul_one_pm_imu_sub_mul_body.c b/src/lib/operator/mul_one_pm_imu_sub_mul_body.inc
similarity index 100%
rename from operator/mul_one_pm_imu_sub_mul_body.c
rename to src/lib/operator/mul_one_pm_imu_sub_mul_body.inc
diff --git a/operator/tm_operators.c b/src/lib/operator/tm_operators.c
similarity index 99%
rename from operator/tm_operators.c
rename to src/lib/operator/tm_operators.c
index 934db2068..0d40b6097 100644
--- a/operator/tm_operators.c
+++ b/src/lib/operator/tm_operators.c
@@ -514,7 +514,7 @@ void tm_sub_H_eo_gamma5(spinor *const l, spinor *const p, spinor *const k, const
 #define _PSWITCH(s) s
 #define _PTSWITCH(s) s
 
-#include "mul_one_pm_imu_inv_body.c"
+#include "mul_one_pm_imu_inv_body.inc"
 
 #undef _F_TYPE
 #undef _C_TYPE
@@ -526,7 +526,7 @@ void tm_sub_H_eo_gamma5(spinor *const l, spinor *const p, spinor *const k, const
 #define _PSWITCH(s) s##_32
 #define _PTSWITCH(s) s##32
 
-#include "mul_one_pm_imu_inv_body.c"
+#include "mul_one_pm_imu_inv_body.inc"
 
 #undef _F_TYPE
 #undef _C_TYPE
@@ -768,7 +768,7 @@ void mul_one_pm_imu_sub_mul_gamma5(spinor *const l, spinor *const k, spinor *con
 #define _PSWITCH(s) s
 #define _PTSWITCH(s) s
 
-#include "mul_one_pm_imu_sub_mul_body.c"
+#include "mul_one_pm_imu_sub_mul_body.inc"
 
 #undef _C_TYPE
 #undef _F_TYPE
@@ -780,7 +780,7 @@ void mul_one_pm_imu_sub_mul_gamma5(spinor *const l, spinor *const k, spinor *con
 #define _PSWITCH(s) s##_32
 #define _PTSWITCH(s) s##32
 
-#include "mul_one_pm_imu_sub_mul_body.c"
+#include "mul_one_pm_imu_sub_mul_body.inc"
 
 #undef _C_TYPE
 #undef _F_TYPE
diff --git a/operator/tm_operators.h b/src/lib/operator/tm_operators.h
similarity index 100%
rename from operator/tm_operators.h
rename to src/lib/operator/tm_operators.h
diff --git a/operator/tm_operators_32.c b/src/lib/operator/tm_operators_32.c
similarity index 100%
rename from operator/tm_operators_32.c
rename to src/lib/operator/tm_operators_32.c
diff --git a/operator/tm_operators_32.h b/src/lib/operator/tm_operators_32.h
similarity index 100%
rename from operator/tm_operators_32.h
rename to src/lib/operator/tm_operators_32.h
diff --git a/operator/tm_operators_nd.c b/src/lib/operator/tm_operators_nd.c
similarity index 100%
rename from operator/tm_operators_nd.c
rename to src/lib/operator/tm_operators_nd.c
diff --git a/operator/tm_operators_nd.h b/src/lib/operator/tm_operators_nd.h
similarity index 100%
rename from operator/tm_operators_nd.h
rename to src/lib/operator/tm_operators_nd.h
diff --git a/operator/tm_operators_nd_32.c b/src/lib/operator/tm_operators_nd_32.c
similarity index 100%
rename from operator/tm_operators_nd_32.c
rename to src/lib/operator/tm_operators_nd_32.c
diff --git a/operator/tm_operators_nd_32.h b/src/lib/operator/tm_operators_nd_32.h
similarity index 100%
rename from operator/tm_operators_nd_32.h
rename to src/lib/operator/tm_operators_nd_32.h
diff --git a/operator/tm_sub_Hopping_Matrix.c b/src/lib/operator/tm_sub_Hopping_Matrix.c
similarity index 91%
rename from operator/tm_sub_Hopping_Matrix.c
rename to src/lib/operator/tm_sub_Hopping_Matrix.c
index fd2aef9db..ef64be0c1 100644
--- a/operator/tm_sub_Hopping_Matrix.c
+++ b/src/lib/operator/tm_sub_Hopping_Matrix.c
@@ -51,12 +51,12 @@
 // where cfactor = a + i b
 //
 
-#if (defined _USE_HALFSPINOR)
+#if (defined TM_USE_HALFSPINOR)
 #include "operator/halfspinor_hopping.h"
 
 void tm_sub_Hopping_Matrix(const int ieo, spinor* const l, spinor* const p, spinor* const k,
                            complex double const cfactor) {
-#ifdef _GAUGE_COPY
+#ifdef TM_USE_GAUGE_COPY
   if (g_update_gauge_copy) {
     update_backward_gauge(g_gauge_field);
   }
@@ -70,7 +70,7 @@ void tm_sub_Hopping_Matrix(const int ieo, spinor* const l, spinor* const p, spin
 
 #define _TM_SUB_HOP
     spinor* pn;
-#include "operator/halfspinor_body.c"
+#include "operator/halfspinor_body.inc"
 #undef _TM_SUB_HOP
 #ifdef TM_USE_OMP
   } /* OpenMP closing brace */
@@ -78,10 +78,12 @@ void tm_sub_Hopping_Matrix(const int ieo, spinor* const l, spinor* const p, spin
   return;
 }
 
-#elif (!defined _NO_COMM && !defined _USE_HALFSPINOR)
+#elif (!defined _NO_COMM && !defined TM_USE_HALFSPINOR)
+#include "hopping.h"
+
 void tm_sub_Hopping_Matrix(const int ieo, spinor* const l, spinor* p, spinor* const k,
                            complex double const cfactor) {
-#ifdef _GAUGE_COPY
+#ifdef TM_USE_GAUGE_COPY
   if (g_update_gauge_copy) {
     update_backward_gauge(g_gauge_field);
   }
@@ -98,7 +100,7 @@ void tm_sub_Hopping_Matrix(const int ieo, spinor* const l, spinor* p, spinor* co
 #define _TM_SUB_HOP
     spinor* pn;
 
-#include "operator/hopping_body_dbl.c"
+#include "operator/hopping_body_dbl.inc"
 
 #undef _TM_SUB_HOP
 #ifdef TM_USE_OMP
diff --git a/operator/tm_sub_Hopping_Matrix.h b/src/lib/operator/tm_sub_Hopping_Matrix.h
similarity index 100%
rename from operator/tm_sub_Hopping_Matrix.h
rename to src/lib/operator/tm_sub_Hopping_Matrix.h
diff --git a/operator/tm_times_Hopping_Matrix.c b/src/lib/operator/tm_times_Hopping_Matrix.c
similarity index 88%
rename from operator/tm_times_Hopping_Matrix.c
rename to src/lib/operator/tm_times_Hopping_Matrix.c
index 3b336d2a9..1de2a872c 100644
--- a/operator/tm_times_Hopping_Matrix.c
+++ b/src/lib/operator/tm_times_Hopping_Matrix.c
@@ -51,12 +51,12 @@
 // where cfactor = a + i b
 //
 
-#if (defined _USE_HALFSPINOR && !defined _NO_COMM)
-#include "operator/halfspinor_hopping.h"
+#if (defined TM_USE_HALFSPINOR && !defined _NO_COMM)
+#include "halfspinor_hopping.h"
 
 void tm_times_Hopping_Matrix(const int ieo, spinor* const l, spinor* const k,
                              complex double const cfactor) {
-#ifdef _GAUGE_COPY
+#ifdef TM_USE_GAUGE_COPY
   if (g_update_gauge_copy) {
     update_backward_gauge(g_gauge_field);
   }
@@ -69,7 +69,7 @@ void tm_times_Hopping_Matrix(const int ieo, spinor* const l, spinor* const k,
 #endif
 
 #define _MUL_G5_CMPLX
-#include "operator/halfspinor_body.c"
+#include "halfspinor_body.inc"
 #undef _MUL_G5_CMPLX
 
 #ifdef TM_USE_OMP
@@ -78,10 +78,12 @@ void tm_times_Hopping_Matrix(const int ieo, spinor* const l, spinor* const k,
   return;
 }
 
-#elif (!defined _NO_COMM && !defined _USE_HALFSPINOR)
+#elif (!defined _NO_COMM && !defined TM_USE_HALFSPINOR)
+#include "hopping.h"
+
 void tm_times_Hopping_Matrix(const int ieo, spinor* const l, spinor* const k,
                              double complex const cfactor) {
-#ifdef _GAUGE_COPY
+#ifdef TM_USE_GAUGE_COPY
   if (g_update_gauge_copy) {
     update_backward_gauge(g_gauge_field);
   }
@@ -96,11 +98,11 @@ void tm_times_Hopping_Matrix(const int ieo, spinor* const l, spinor* const k,
   {
 #endif
 #define _MUL_G5_CMPLX
-#include "operator/hopping_body_dbl.c"
+#include "hopping_body_dbl.inc"
 #undef _MUL_G5_CMPLX
 #ifdef TM_USE_OMP
   } /* OpenMP closing brace */
 #endif
   return;
 }
-#endif  //_USE_HALFSPINOR && !defined _NO_COMM
+#endif  // TM_USE_HALFSPINOR && !defined _NO_COMM
diff --git a/operator/tm_times_Hopping_Matrix.h b/src/lib/operator/tm_times_Hopping_Matrix.h
similarity index 100%
rename from operator/tm_times_Hopping_Matrix.h
rename to src/lib/operator/tm_times_Hopping_Matrix.h
diff --git a/operator_types.h b/src/lib/operator_types.h
similarity index 100%
rename from operator_types.h
rename to src/lib/operator_types.h
diff --git a/overrelaxation.c b/src/lib/overrelaxation.c
similarity index 98%
rename from overrelaxation.c
rename to src/lib/overrelaxation.c
index 2c2e486f7..2a1329bba 100644
--- a/overrelaxation.c
+++ b/src/lib/overrelaxation.c
@@ -153,7 +153,7 @@ void flip_subgroup(int ix, int mu, su3 vv, int i) {
   *z = w;
 }
 
-#if defined PARALLEL1
+#if defined TM_PARALLEL1
 void overrel_sweep() {
   int x0, x1, x2, x3;
   int mu, ix;
@@ -205,7 +205,7 @@ void overrel_sweep() {
   static su3 v;
   for (mu = 0; mu < 4; mu++) {
     for (ix = 0; ix < VOLUME; ix++) {
-      get_staples(&v, ix, mu, g_gauge_field);
+      get_staples(&v, ix, mu, (const su3 **)g_gauge_field);
       flip_subgroup(ix, mu, v, 1);
       flip_subgroup(ix, mu, v, 2);
       flip_subgroup(ix, mu, v, 3);
diff --git a/overrelaxation.h b/src/lib/overrelaxation.h
similarity index 100%
rename from overrelaxation.h
rename to src/lib/overrelaxation.h
diff --git a/parallel_io.h b/src/lib/parallel_io.h
similarity index 97%
rename from parallel_io.h
rename to src/lib/parallel_io.h
index 50e03fd59..98df3fb8c 100644
--- a/parallel_io.h
+++ b/src/lib/parallel_io.h
@@ -17,8 +17,8 @@
  * along with tmLQCD.  If not, see <http://www.gnu.org/licenses/>.
  ***********************************************************************/
 
-#ifndef _PARALLEL_IO_H
-#define _PARALLEL_IO_H
+#ifndef _TM_PARALLEL_IO_H
+#define _TM_PARALLEL_IO_H
 
 #include <lemon.h>
 #include "dml.h"
diff --git a/phmc.c b/src/lib/phmc.c
similarity index 100%
rename from phmc.c
rename to src/lib/phmc.c
diff --git a/phmc.h b/src/lib/phmc.h
similarity index 100%
rename from phmc.h
rename to src/lib/phmc.h
diff --git a/prepare_source.c b/src/lib/prepare_source.c
similarity index 100%
rename from prepare_source.c
rename to src/lib/prepare_source.c
diff --git a/prepare_source.h b/src/lib/prepare_source.h
similarity index 100%
rename from prepare_source.h
rename to src/lib/prepare_source.h
diff --git a/qphix_base_classes.hpp b/src/lib/qphix/qphix_base_classes.hpp
similarity index 100%
rename from qphix_base_classes.hpp
rename to src/lib/qphix/qphix_base_classes.hpp
diff --git a/qphix_interface.cpp b/src/lib/qphix/qphix_interface.cpp
similarity index 100%
rename from qphix_interface.cpp
rename to src/lib/qphix/qphix_interface.cpp
diff --git a/qphix_interface.hpp b/src/lib/qphix/qphix_interface.hpp
similarity index 100%
rename from qphix_interface.hpp
rename to src/lib/qphix/qphix_interface.hpp
diff --git a/qphix_interface_utils.hpp b/src/lib/qphix/qphix_interface_utils.hpp
similarity index 100%
rename from qphix_interface_utils.hpp
rename to src/lib/qphix/qphix_interface_utils.hpp
diff --git a/qphix_interface.h b/src/lib/qphix_interface.h
similarity index 100%
rename from qphix_interface.h
rename to src/lib/qphix_interface.h
diff --git a/qphix_types.h b/src/lib/qphix_types.h
similarity index 100%
rename from qphix_types.h
rename to src/lib/qphix_types.h
diff --git a/qphix_veclen.h b/src/lib/qphix_veclen.h
similarity index 100%
rename from qphix_veclen.h
rename to src/lib/qphix_veclen.h
diff --git a/quda_dummy_types.h b/src/lib/quda_dummy_types.h
similarity index 100%
rename from quda_dummy_types.h
rename to src/lib/quda_dummy_types.h
diff --git a/quda_gauge_paths.inc b/src/lib/quda_gauge_paths.inc
similarity index 100%
rename from quda_gauge_paths.inc
rename to src/lib/quda_gauge_paths.inc
diff --git a/quda_interface.c b/src/lib/quda_interface.c
similarity index 99%
rename from quda_interface.c
rename to src/lib/quda_interface.c
index b9a4e62a7..17898ea8a 100644
--- a/quda_interface.c
+++ b/src/lib/quda_interface.c
@@ -2059,9 +2059,7 @@ void _setQudaMultigridParam(QudaMultigridParam *mg_param) {
 
     // this is needed after QUDA commit
     // https://github.com/lattice/quda/commit/7903288629f0fcc474989fec5a1393ecc17a4b42
-#ifdef TM_QUDA_EXPERIMENTAL
     mg_param->n_vec_batch[level] = 1;
-#endif
 
     // set the MG EigSolver parameters, almost equivalent to
     // setEigParam from QUDA's multigrid_invert_test, except
@@ -2513,7 +2511,7 @@ void compute_gauge_derivative_quda(monomial *const mnl, hamiltonian_field_t *con
 
   const int rect = mnl->use_rectangles;
 
-  const int *path_length = rect ? plaq_rect_length : plaq_length;
+  const int *path_length = ((rect) ? (plaq_rect_length) : (plaq_length));
 
   const int num_paths = rect ? 24 : 6;
   const int max_length = rect ? 5 : 3;
@@ -2548,8 +2546,8 @@ void compute_gauge_derivative_quda(monomial *const mnl, hamiltonian_field_t *con
   reset_quda_gauge_state(&quda_gauge_state);
 
   tm_stopwatch_push(&g_timers, "computeGaugeForceQuda", "");
-  computeGaugeForceQuda((void *)mom_quda, (void *)gauge_quda, path_buf, path_length, loop_coeff,
-                        num_paths, max_length, 1.0, &f_gauge_param);
+  computeGaugeForceQuda((void *)mom_quda, (void *)gauge_quda, path_buf, (int *)path_length,
+                        loop_coeff, num_paths, max_length, 1.0, &f_gauge_param);
   tm_stopwatch_pop(&g_timers, 0, 1, "TM_QUDA");
 
   free(path_buf);
@@ -3031,7 +3029,6 @@ void quda_mg_tune_params(void *spinorOut, void *spinorIn, const int max_iter) {
   free(tunable_params);
 }
 
-#ifdef TM_QUDA_FERMIONIC_FORCES
 void compute_cloverdet_derivative_quda(monomial *const mnl, hamiltonian_field_t *const hf,
                                        spinor *const X_o, spinor *const phi, int detratio) {
   tm_stopwatch_push(&g_timers, __func__, "");
@@ -3131,23 +3128,6 @@ void compute_ndcloverrat_derivative_quda(monomial *const mnl, hamiltonian_field_
 
   tm_stopwatch_pop(&g_timers, 0, 1, "TM_QUDA");
 }
-#else
-void compute_cloverdet_derivative_quda(monomial *const mnl, hamiltonian_field_t *const hf,
-                                       spinor *const X_o, spinor *const phi, int detratio) {
-  tm_debug_printf(0, 0,
-                  "Error:   UseExternalLibrary = quda requires that tmLQCD is compiled with "
-                  "--enable-quda_fermionic=yes\n");
-  exit(1);
-}
-void compute_ndcloverrat_derivative_quda(monomial *const mnl, hamiltonian_field_t *const hf,
-                                         spinor **const Qup, spinor **const Qdn,
-                                         solver_params_t *solver_params, int detratio) {
-  tm_debug_printf(0, 0,
-                  "Error:   UseExternalLibrary = quda requires that tmLQCD is compiled with "
-                  "--enable-quda_fermionic=yes\n");
-  exit(1);
-}
-#endif
 
 void compute_WFlow_quda(const double eps, const double tmax, const int traj, FILE *outfile) {
   tm_stopwatch_push(&g_timers, __func__, "");
diff --git a/quda_interface.h b/src/lib/quda_interface.h
similarity index 100%
rename from quda_interface.h
rename to src/lib/quda_interface.h
diff --git a/quda_types.h b/src/lib/quda_types.h
similarity index 100%
rename from quda_types.h
rename to src/lib/quda_types.h
diff --git a/ranlxd.c b/src/lib/ranlxd.c
similarity index 100%
rename from ranlxd.c
rename to src/lib/ranlxd.c
diff --git a/ranlxd.h b/src/lib/ranlxd.h
similarity index 100%
rename from ranlxd.h
rename to src/lib/ranlxd.h
diff --git a/ranlxs.c b/src/lib/ranlxs.c
similarity index 100%
rename from ranlxs.c
rename to src/lib/ranlxs.c
diff --git a/ranlxs.h b/src/lib/ranlxs.h
similarity index 100%
rename from ranlxs.h
rename to src/lib/ranlxs.h
diff --git a/rational/Makefile.in b/src/lib/rational/Makefile.in
similarity index 100%
rename from rational/Makefile.in
rename to src/lib/rational/Makefile.in
diff --git a/rational/elliptic.c b/src/lib/rational/elliptic.c
similarity index 100%
rename from rational/elliptic.c
rename to src/lib/rational/elliptic.c
diff --git a/rational/elliptic.h b/src/lib/rational/elliptic.h
similarity index 100%
rename from rational/elliptic.h
rename to src/lib/rational/elliptic.h
diff --git a/rational/rational.c b/src/lib/rational/rational.c
similarity index 100%
rename from rational/rational.c
rename to src/lib/rational/rational.c
diff --git a/rational/rational.h b/src/lib/rational/rational.h
similarity index 100%
rename from rational/rational.h
rename to src/lib/rational/rational.h
diff --git a/rational/zolotarev.c b/src/lib/rational/zolotarev.c
similarity index 100%
rename from rational/zolotarev.c
rename to src/lib/rational/zolotarev.c
diff --git a/rational/zolotarev.h b/src/lib/rational/zolotarev.h
similarity index 100%
rename from rational/zolotarev.h
rename to src/lib/rational/zolotarev.h
diff --git a/read_input.h b/src/lib/read_input.h
similarity index 100%
rename from read_input.h
rename to src/lib/read_input.h
diff --git a/read_input.l b/src/lib/read_input.l
similarity index 99%
rename from read_input.l
rename to src/lib/read_input.l
index 6af756c7e..5eb542f87 100644
--- a/read_input.l
+++ b/src/lib/read_input.l
@@ -849,8 +849,8 @@ static inline double fltlist_next_token(int * const list_end){
 }
 
 <INITMULTIGRID>AMG{SPC}* {
-#ifdef DDalphaAMG
- if(myverbose) printf("Initialising DDalphaAMG line %d\n", line_of_file); 
+#ifdef TM_USE_DDalphaAMG
+ if(myverbose) printf("Initialising DDalphaAMG line %d\n", line_of_file);
  BEGIN(MULTIGRID);
 #else
  printf("ERROR line %d: DDalphaAMG library not included\n", line_of_file);
@@ -1385,7 +1385,7 @@ static inline double fltlist_next_token(int * const list_end){
     BEGIN(name_caller);
   }
   DDalphaAMG {
-#ifdef DDalphaAMG
+#ifdef TM_USE_DDalphaAMG
     optr->solver = MG;
     if(myverbose) printf("  Solver set to DDalphaAMG line %d operator %d\n", line_of_file, current_operator);
     BEGIN(name_caller);
@@ -1490,7 +1490,7 @@ static inline double fltlist_next_token(int * const list_end){
     BEGIN(name_caller);
   }
   DDalphaAMG {
-#ifdef DDalphaAMG
+#ifdef TM_USE_DDalphaAMG
     optr->solver = MG;
     if(myverbose) printf("  Solver set to DDalphaAMG line %d operator %d\n", line_of_file, current_operator);
     BEGIN(name_caller);
@@ -1543,7 +1543,7 @@ static inline double fltlist_next_token(int * const list_end){
     BEGIN(name_caller);
   }
   DDalphaAMG {
-#ifdef DDalphaAMG
+#ifdef TM_USE_DDalphaAMG
     optr->solver = MG;
     if(myverbose) printf("  Solver set to DDalphaAMG line %d operator %d\n", line_of_file, current_operator);
     BEGIN(name_caller);
@@ -2834,7 +2834,7 @@ static inline double fltlist_next_token(int * const list_end){
     BEGIN(name_caller);
   }
   DDalphaAMG {
-#ifdef DDalphaAMG
+#ifdef TM_USE_DDalphaAMG
     if(myverbose) printf("  Solver set to DDalphaAMG line %d operator %d\n", line_of_file, current_operator);
     mnl->solver = MG;
     BEGIN(solver_caller);
@@ -2877,7 +2877,7 @@ static inline double fltlist_next_token(int * const list_end){
     BEGIN(solver_caller);
   }
   DDalphaAMG {
-#ifdef DDalphaAMG
+#ifdef TM_USE_DDalphaAMG
     if(myverbose) printf("  HB Solver set to DDalphaAMG line %d operator %d\n", line_of_file, current_operator);
     mnl->HB_solver = MG;
     BEGIN(solver_caller);
@@ -2902,7 +2902,7 @@ static inline double fltlist_next_token(int * const list_end){
     BEGIN(solver_caller);
   }
   DDalphaAMG {
-#ifdef DDalphaAMG
+#ifdef TM_USE_DDalphaAMG
     if(myverbose) printf("  Solver set to DDalphaAMG line %d operator %d\n", line_of_file, current_operator);
     mnl->solver = MG;
     BEGIN(solver_caller);
@@ -2935,7 +2935,7 @@ static inline double fltlist_next_token(int * const list_end){
     BEGIN(solver_caller);
   }
   DDalphaAMG {
-#ifdef DDalphaAMG
+#ifdef TM_USE_DDalphaAMG
     if(myverbose) printf("  Solver set to DDalphaAMG line %d operator %d\n", line_of_file, current_operator);
     mnl->solver = MG;
     BEGIN(solver_caller);
@@ -3229,49 +3229,49 @@ static inline double fltlist_next_token(int * const list_end){
 }
 
 <TT>{DIGIT}+                  {
-#ifndef FIXEDVOLUME
+#ifndef TM_FIXEDVOLUME
   T_global = atoi(yytext);
   if(myverbose!=0) printf("T =%s\n", yytext);
 #endif
 }
 <LL>{DIGIT}+                  {
-#ifndef FIXEDVOLUME
+#ifndef TM_FIXEDVOLUME
   L = atoi(yytext);
   if(myverbose!=0) printf("L =%s\n", yytext);
 #endif
 }
 <LLX>{DIGIT}+                  {
-#ifndef FIXEDVOLUME
+#ifndef TM_FIXEDVOLUME
   LX = atoi(yytext);
   if(myverbose!=0) printf("LX =%s\n", yytext);
 #endif
 }
 <LLY>{DIGIT}+                  {
-#ifndef FIXEDVOLUME
+#ifndef TM_FIXEDVOLUME
   LY = atoi(yytext);
   if(myverbose!=0) printf("LY =%s\n", yytext);
 #endif
 }
 <LLZ>{DIGIT}+                  {
-#ifndef FIXEDVOLUME
+#ifndef TM_FIXEDVOLUME
   LZ = atoi(yytext);
   if(myverbose!=0) printf("LZ =%s\n", yytext);
 #endif
 }
 <NPROCX>{DIGIT}+              {
-#ifndef FIXEDVOLUME
+#ifndef TM_FIXEDVOLUME
   N_PROC_X = atoi(yytext);
   if(myverbose!=0) printf("Nr of processors in x direction = %s\n", yytext);
 #endif
 }
 <NPROCY>{DIGIT}+              {
-#ifndef FIXEDVOLUME
+#ifndef TM_FIXEDVOLUME
   N_PROC_Y = atoi(yytext);
   if(myverbose!=0) printf("Nr of processors in y direction = %s\n", yytext);
 #endif
 }
 <NPROCZ>{DIGIT}+              {
-#ifndef FIXEDVOLUME
+#ifndef TM_FIXEDVOLUME
   N_PROC_Z = atoi(yytext);
   if(myverbose!=0) printf("Nr of processors in z direction = %s\n", yytext);
 #endif
@@ -3776,7 +3776,7 @@ int read_input(const char * conf_file){
    * Setting default values!
    ********************************************/
   reread = 0;
-#ifndef FIXEDVOLUME
+#ifndef TM_FIXEDVOLUME
   T_global = _default_T_global;
   L = _default_L;
   LX = _default_LX;
@@ -3994,7 +3994,7 @@ int read_input(const char * conf_file){
   yyout = fopen("/dev/null", "w");
 
   parse_config();  
-#ifndef FIXEDVOLUME
+#ifndef TM_FIXEDVOLUME
   if(LX == 0) {
     LX = L;
   }
@@ -4029,7 +4029,7 @@ int read_input(const char * conf_file){
  */
 
 int reread_input(const char * conf_file){
-#ifndef FIXEDVOLUME
+#ifndef TM_FIXEDVOLUME
   int tt=T, ll=L, lx = LX, ly = LY, lz = LZ, 
       np=N_PROC_X, npy = N_PROC_Y;
 #endif
@@ -4054,7 +4054,7 @@ int reread_input(const char * conf_file){
 
   parse_config();  
 
-#ifndef FIXEDVOLUME
+#ifndef TM_FIXEDVOLUME
   T = tt;
   L = ll;
   LX = lx;
diff --git a/reweighting_factor.c b/src/lib/reweighting_factor.c
similarity index 100%
rename from reweighting_factor.c
rename to src/lib/reweighting_factor.c
diff --git a/reweighting_factor.h b/src/lib/reweighting_factor.h
similarity index 100%
rename from reweighting_factor.h
rename to src/lib/reweighting_factor.h
diff --git a/reweighting_factor_nd.c b/src/lib/reweighting_factor_nd.c
similarity index 100%
rename from reweighting_factor_nd.c
rename to src/lib/reweighting_factor_nd.c
diff --git a/reweighting_factor_nd.h b/src/lib/reweighting_factor_nd.h
similarity index 100%
rename from reweighting_factor_nd.h
rename to src/lib/reweighting_factor_nd.h
diff --git a/rnd_gauge_trafo.c b/src/lib/rnd_gauge_trafo.c
similarity index 100%
rename from rnd_gauge_trafo.c
rename to src/lib/rnd_gauge_trafo.c
diff --git a/rnd_gauge_trafo.h b/src/lib/rnd_gauge_trafo.h
similarity index 100%
rename from rnd_gauge_trafo.h
rename to src/lib/rnd_gauge_trafo.h
diff --git a/sighandler.c b/src/lib/sighandler.c
similarity index 100%
rename from sighandler.c
rename to src/lib/sighandler.c
diff --git a/sighandler.h b/src/lib/sighandler.h
similarity index 100%
rename from sighandler.h
rename to src/lib/sighandler.h
diff --git a/smearing/Makefile.in b/src/lib/smearing/Makefile.in
similarity index 100%
rename from smearing/Makefile.in
rename to src/lib/smearing/Makefile.in
diff --git a/smearing/ape.h b/src/lib/smearing/ape.h
similarity index 100%
rename from smearing/ape.h
rename to src/lib/smearing/ape.h
diff --git a/smearing/ape.ih b/src/lib/smearing/ape.ih
similarity index 100%
rename from smearing/ape.ih
rename to src/lib/smearing/ape.ih
diff --git a/smearing/ape_ape_smear.c b/src/lib/smearing/ape_ape_smear.c
similarity index 100%
rename from smearing/ape_ape_smear.c
rename to src/lib/smearing/ape_ape_smear.c
diff --git a/smearing/hex.h b/src/lib/smearing/hex.h
similarity index 100%
rename from smearing/hex.h
rename to src/lib/smearing/hex.h
diff --git a/smearing/hex.ih b/src/lib/smearing/hex.ih
similarity index 100%
rename from smearing/hex.ih
rename to src/lib/smearing/hex.ih
diff --git a/smearing/hex_hex_smear.c b/src/lib/smearing/hex_hex_smear.c
similarity index 100%
rename from smearing/hex_hex_smear.c
rename to src/lib/smearing/hex_hex_smear.c
diff --git a/smearing/hex_stout_exclude_none.c b/src/lib/smearing/hex_stout_exclude_none.c
similarity index 100%
rename from smearing/hex_stout_exclude_none.c
rename to src/lib/smearing/hex_stout_exclude_none.c
diff --git a/smearing/hex_stout_exclude_one.c b/src/lib/smearing/hex_stout_exclude_one.c
similarity index 100%
rename from smearing/hex_stout_exclude_one.c
rename to src/lib/smearing/hex_stout_exclude_one.c
diff --git a/smearing/hex_stout_exclude_two.c b/src/lib/smearing/hex_stout_exclude_two.c
similarity index 100%
rename from smearing/hex_stout_exclude_two.c
rename to src/lib/smearing/hex_stout_exclude_two.c
diff --git a/smearing/hyp.h b/src/lib/smearing/hyp.h
similarity index 100%
rename from smearing/hyp.h
rename to src/lib/smearing/hyp.h
diff --git a/smearing/hyp.ih b/src/lib/smearing/hyp.ih
similarity index 100%
rename from smearing/hyp.ih
rename to src/lib/smearing/hyp.ih
diff --git a/smearing/hyp_APE_project_exclude_none.c b/src/lib/smearing/hyp_APE_project_exclude_none.c
similarity index 100%
rename from smearing/hyp_APE_project_exclude_none.c
rename to src/lib/smearing/hyp_APE_project_exclude_none.c
diff --git a/smearing/hyp_APE_project_exclude_one.c b/src/lib/smearing/hyp_APE_project_exclude_one.c
similarity index 100%
rename from smearing/hyp_APE_project_exclude_one.c
rename to src/lib/smearing/hyp_APE_project_exclude_one.c
diff --git a/smearing/hyp_APE_project_exclude_two.c b/src/lib/smearing/hyp_APE_project_exclude_two.c
similarity index 100%
rename from smearing/hyp_APE_project_exclude_two.c
rename to src/lib/smearing/hyp_APE_project_exclude_two.c
diff --git a/smearing/hyp_hyp_smear.c b/src/lib/smearing/hyp_hyp_smear.c
similarity index 100%
rename from smearing/hyp_hyp_smear.c
rename to src/lib/smearing/hyp_hyp_smear.c
diff --git a/smearing/hyp_hyp_staples_exclude_none.c b/src/lib/smearing/hyp_hyp_staples_exclude_none.c
similarity index 100%
rename from smearing/hyp_hyp_staples_exclude_none.c
rename to src/lib/smearing/hyp_hyp_staples_exclude_none.c
diff --git a/smearing/hyp_hyp_staples_exclude_one.c b/src/lib/smearing/hyp_hyp_staples_exclude_one.c
similarity index 100%
rename from smearing/hyp_hyp_staples_exclude_one.c
rename to src/lib/smearing/hyp_hyp_staples_exclude_one.c
diff --git a/smearing/hyp_hyp_staples_exclude_two.c b/src/lib/smearing/hyp_hyp_staples_exclude_two.c
similarity index 100%
rename from smearing/hyp_hyp_staples_exclude_two.c
rename to src/lib/smearing/hyp_hyp_staples_exclude_two.c
diff --git a/smearing/stout.h b/src/lib/smearing/stout.h
similarity index 100%
rename from smearing/stout.h
rename to src/lib/smearing/stout.h
diff --git a/smearing/stout.ih b/src/lib/smearing/stout.ih
similarity index 100%
rename from smearing/stout.ih
rename to src/lib/smearing/stout.ih
diff --git a/smearing/stout_stout_smear.c b/src/lib/smearing/stout_stout_smear.c
similarity index 100%
rename from smearing/stout_stout_smear.c
rename to src/lib/smearing/stout_stout_smear.c
diff --git a/smearing/uils_print_config_to_screen.c b/src/lib/smearing/uils_print_config_to_screen.c
similarity index 100%
rename from smearing/uils_print_config_to_screen.c
rename to src/lib/smearing/uils_print_config_to_screen.c
diff --git a/smearing/utils.h b/src/lib/smearing/utils.h
similarity index 100%
rename from smearing/utils.h
rename to src/lib/smearing/utils.h
diff --git a/smearing/utils.ih b/src/lib/smearing/utils.ih
similarity index 100%
rename from smearing/utils.ih
rename to src/lib/smearing/utils.ih
diff --git a/smearing/utils_generic_staples.c b/src/lib/smearing/utils_generic_staples.c
similarity index 100%
rename from smearing/utils_generic_staples.c
rename to src/lib/smearing/utils_generic_staples.c
diff --git a/smearing/utils_print_config_to_screen.c b/src/lib/smearing/utils_print_config_to_screen.c
similarity index 100%
rename from smearing/utils_print_config_to_screen.c
rename to src/lib/smearing/utils_print_config_to_screen.c
diff --git a/smearing/utils_print_su3.c b/src/lib/smearing/utils_print_su3.c
similarity index 100%
rename from smearing/utils_print_su3.c
rename to src/lib/smearing/utils_print_su3.c
diff --git a/smearing/utils_project_antiherm.c b/src/lib/smearing/utils_project_antiherm.c
similarity index 100%
rename from smearing/utils_project_antiherm.c
rename to src/lib/smearing/utils_project_antiherm.c
diff --git a/smearing/utils_project_herm.c b/src/lib/smearing/utils_project_herm.c
similarity index 100%
rename from smearing/utils_project_herm.c
rename to src/lib/smearing/utils_project_herm.c
diff --git a/smearing/utils_reunitarize.c b/src/lib/smearing/utils_reunitarize.c
similarity index 100%
rename from smearing/utils_reunitarize.c
rename to src/lib/smearing/utils_reunitarize.c
diff --git a/smearing/utils_reunitarize_MILC.c b/src/lib/smearing/utils_reunitarize_MILC.c
similarity index 88%
rename from smearing/utils_reunitarize_MILC.c
rename to src/lib/smearing/utils_reunitarize_MILC.c
index 757a797df..fec177a42 100644
--- a/smearing/utils_reunitarize_MILC.c
+++ b/src/lib/smearing/utils_reunitarize_MILC.c
@@ -1,3 +1,4 @@
+#include <complex.h>
 #include "utils.ih"
 
 /* No reunitarization code seems to be available, so I've adapted (stolen) this routine from the
@@ -35,12 +36,11 @@ void reunitarize(su3 *omega) {
   bj2 = omega->c02;
 
   omega->c20 = bj1 * omega->c12;
-  omega->c20 -= bj2 *omega
-                    ->c11
+  omega->c20 -= bj2 * omega->c11;
 
-                        omega->c21 = bj2 * omega->c10;
+  omega->c21 = bj2 * omega->c10;
   omega->c21 -= bj0 * omega->c12;
 
   omega->c22 = bj0 * omega->c11;
-  omega->c22 -= bj1r * omega->c10;
+  omega->c22 -= bj1 * omega->c10;
 }
diff --git a/solver/M_plus_block_psi_body.c b/src/lib/solver/M_plus_block_psi_body.inc
similarity index 100%
rename from solver/M_plus_block_psi_body.c
rename to src/lib/solver/M_plus_block_psi_body.inc
diff --git a/solver/Msap.c b/src/lib/solver/Msap.c
similarity index 99%
rename from solver/Msap.c
rename to src/lib/solver/Msap.c
index 79e99489f..0a228eebe 100644
--- a/solver/Msap.c
+++ b/src/lib/solver/Msap.c
@@ -48,7 +48,7 @@ void dummy_Di(spinor* const P, spinor* const Q, const int i) {
 #define _PTSWITCH(s) s
 #define _PSWITCH(s) s
 
-#include "M_plus_block_psi_body.c"
+#include "M_plus_block_psi_body.inc"
 
 #undef _PTSWITCH
 #undef _PSWITCH
@@ -58,7 +58,7 @@ void dummy_Di(spinor* const P, spinor* const Q, const int i) {
 // this is ugly!
 #define DUM_MATRIX 0
 
-#include "M_plus_block_psi_body.c"
+#include "M_plus_block_psi_body.inc"
 
 #undef _PTSWITCH
 #undef _PSWITCH
diff --git a/solver/Msap.h b/src/lib/solver/Msap.h
similarity index 100%
rename from solver/Msap.h
rename to src/lib/solver/Msap.h
diff --git a/solver/bicg_complex.c b/src/lib/solver/bicg_complex.c
similarity index 100%
rename from solver/bicg_complex.c
rename to src/lib/solver/bicg_complex.c
diff --git a/solver/bicg_complex.h b/src/lib/solver/bicg_complex.h
similarity index 100%
rename from solver/bicg_complex.h
rename to src/lib/solver/bicg_complex.h
diff --git a/solver/bicgstab2.c b/src/lib/solver/bicgstab2.c
similarity index 100%
rename from solver/bicgstab2.c
rename to src/lib/solver/bicgstab2.c
diff --git a/solver/bicgstab2.h b/src/lib/solver/bicgstab2.h
similarity index 100%
rename from solver/bicgstab2.h
rename to src/lib/solver/bicgstab2.h
diff --git a/solver/bicgstab_complex.c b/src/lib/solver/bicgstab_complex.c
similarity index 100%
rename from solver/bicgstab_complex.c
rename to src/lib/solver/bicgstab_complex.c
diff --git a/solver/bicgstab_complex.h b/src/lib/solver/bicgstab_complex.h
similarity index 100%
rename from solver/bicgstab_complex.h
rename to src/lib/solver/bicgstab_complex.h
diff --git a/solver/bicgstab_complex_bi.c b/src/lib/solver/bicgstab_complex_bi.c
similarity index 100%
rename from solver/bicgstab_complex_bi.c
rename to src/lib/solver/bicgstab_complex_bi.c
diff --git a/solver/bicgstab_complex_bi.h b/src/lib/solver/bicgstab_complex_bi.h
similarity index 100%
rename from solver/bicgstab_complex_bi.h
rename to src/lib/solver/bicgstab_complex_bi.h
diff --git a/solver/bicgstabell.c b/src/lib/solver/bicgstabell.c
similarity index 100%
rename from solver/bicgstabell.c
rename to src/lib/solver/bicgstabell.c
diff --git a/solver/bicgstabell.h b/src/lib/solver/bicgstabell.h
similarity index 100%
rename from solver/bicgstabell.h
rename to src/lib/solver/bicgstabell.h
diff --git a/solver/cg_her.c b/src/lib/solver/cg_her.c
similarity index 99%
rename from solver/cg_her.c
rename to src/lib/solver/cg_her.c
index bf6981c4b..b556acb25 100644
--- a/solver/cg_her.c
+++ b/src/lib/solver/cg_her.c
@@ -102,7 +102,7 @@ int cg_her(spinor* const P, spinor* const Q, const int max_iter, double eps_sq,
     if (((err <= eps_sq) && (rel_prec == 0)) || ((err <= eps_sq * squarenorm) && (rel_prec == 1))) {
       break;
     }
-#ifdef _USE_HALFSPINOR
+#ifdef TM_USE_HALFSPINOR
     if (((err * err <= eps_sq) && (rel_prec == 0)) ||
         ((err * err <= eps_sq * squarenorm) && (rel_prec == 1))) {
       g_sloppy_precision = 1;
diff --git a/solver/cg_her.h b/src/lib/solver/cg_her.h
similarity index 100%
rename from solver/cg_her.h
rename to src/lib/solver/cg_her.h
diff --git a/solver/cg_her_bi.c b/src/lib/solver/cg_her_bi.c
similarity index 100%
rename from solver/cg_her_bi.c
rename to src/lib/solver/cg_her_bi.c
diff --git a/solver/cg_her_bi.h b/src/lib/solver/cg_her_bi.h
similarity index 100%
rename from solver/cg_her_bi.h
rename to src/lib/solver/cg_her_bi.h
diff --git a/solver/cg_her_nd.c b/src/lib/solver/cg_her_nd.c
similarity index 99%
rename from solver/cg_her_nd.c
rename to src/lib/solver/cg_her_nd.c
index 03a85a713..746c21718 100644
--- a/solver/cg_her_nd.c
+++ b/src/lib/solver/cg_her_nd.c
@@ -133,7 +133,7 @@ int cg_her_nd(spinor* const P_up, spinor* P_dn, spinor* const Q_up, spinor* cons
     if (((err <= eps_sq) && (rel_prec == 0)) || ((err <= eps_sq * squarenorm) && (rel_prec == 1))) {
       break;
     }
-#ifdef _USE_HALFSPINOR
+#ifdef TM_USE_HALFSPINOR
     if (((err * err <= eps_sq) && (rel_prec == 0)) ||
         ((err * err <= eps_sq * squarenorm) && (rel_prec == 1))) {
       g_sloppy_precision = 1;
diff --git a/solver/cg_her_nd.h b/src/lib/solver/cg_her_nd.h
similarity index 100%
rename from solver/cg_her_nd.h
rename to src/lib/solver/cg_her_nd.h
diff --git a/solver/cg_mms_tm.c b/src/lib/solver/cg_mms_tm.c
similarity index 100%
rename from solver/cg_mms_tm.c
rename to src/lib/solver/cg_mms_tm.c
diff --git a/solver/cg_mms_tm.h b/src/lib/solver/cg_mms_tm.h
similarity index 100%
rename from solver/cg_mms_tm.h
rename to src/lib/solver/cg_mms_tm.h
diff --git a/solver/cg_mms_tm_nd.c b/src/lib/solver/cg_mms_tm_nd.c
similarity index 100%
rename from solver/cg_mms_tm_nd.c
rename to src/lib/solver/cg_mms_tm_nd.c
diff --git a/solver/cg_mms_tm_nd.h b/src/lib/solver/cg_mms_tm_nd.h
similarity index 100%
rename from solver/cg_mms_tm_nd.h
rename to src/lib/solver/cg_mms_tm_nd.h
diff --git a/solver/cgne4complex.c b/src/lib/solver/cgne4complex.c
similarity index 100%
rename from solver/cgne4complex.c
rename to src/lib/solver/cgne4complex.c
diff --git a/solver/cgne4complex.h b/src/lib/solver/cgne4complex.h
similarity index 100%
rename from solver/cgne4complex.h
rename to src/lib/solver/cgne4complex.h
diff --git a/solver/cgs_real.c b/src/lib/solver/cgs_real.c
similarity index 100%
rename from solver/cgs_real.c
rename to src/lib/solver/cgs_real.c
diff --git a/solver/cgs_real.h b/src/lib/solver/cgs_real.h
similarity index 100%
rename from solver/cgs_real.h
rename to src/lib/solver/cgs_real.h
diff --git a/solver/chrono_guess.c b/src/lib/solver/chrono_guess.c
similarity index 100%
rename from solver/chrono_guess.c
rename to src/lib/solver/chrono_guess.c
diff --git a/solver/chrono_guess.h b/src/lib/solver/chrono_guess.h
similarity index 100%
rename from solver/chrono_guess.h
rename to src/lib/solver/chrono_guess.h
diff --git a/solver/cr.c b/src/lib/solver/cr.c
similarity index 99%
rename from solver/cr.c
rename to src/lib/solver/cr.c
index 58022ac28..f6a1bd348 100644
--- a/solver/cr.c
+++ b/src/lib/solver/cr.c
@@ -106,7 +106,7 @@ int cr(spinor* const P, spinor* const Q, const int m, const int max_restarts, co
       break;
     }
 
-#ifdef _USE_HALFSPINOR
+#ifdef TM_USE_HALFSPINOR
     if (((err * err <= eps_sq) && (rel_prec == 0)) ||
         ((err * err <= eps_sq * norm_sq) && (rel_prec == 1))) {
       if (g_sloppy_precision_flag == 1) {
diff --git a/solver/cr.h b/src/lib/solver/cr.h
similarity index 100%
rename from solver/cr.h
rename to src/lib/solver/cr.h
diff --git a/solver/dfl_projector.c b/src/lib/solver/dfl_projector.c
similarity index 99%
rename from solver/dfl_projector.c
rename to src/lib/solver/dfl_projector.c
index b840aabfa..5f0ce2026 100644
--- a/solver/dfl_projector.c
+++ b/src/lib/solver/dfl_projector.c
@@ -453,7 +453,7 @@ void little_project(_Complex double *const out, _Complex double *const in, const
 #define _MPI_C_TYPE MPI_DOUBLE_COMPLEX
 #define _F_TYPE double
 
-#include "little_project_eo_body.c"
+#include "little_project_eo_body.inc"
 
 #undef _PSWITCH
 #undef _F_TYPE
@@ -465,7 +465,7 @@ void little_project(_Complex double *const out, _Complex double *const in, const
 #define _MPI_C_TYPE MPI_COMPLEX
 #define _F_TYPE float
 
-#include "little_project_eo_body.c"
+#include "little_project_eo_body.inc"
 
 #undef _PSWITCH
 #undef _F_TYPE
@@ -552,7 +552,7 @@ void little_P_L_D(_Complex double *const out, _Complex double *const in) {
 #define _PSWITCH(s) s
 #define _F_TYPE double
 
-#include "little_mg_precon_body.c"
+#include "little_mg_precon_body.inc"
 
 #undef _PSWITCH
 #undef _F_TYPE
@@ -560,7 +560,7 @@ void little_P_L_D(_Complex double *const out, _Complex double *const in) {
 #define _PSWITCH(s) s##_32
 #define _F_TYPE float
 
-#include "little_mg_precon_body.c"
+#include "little_mg_precon_body.inc"
 
 #undef _PSWITCH
 #undef _F_TYPE
diff --git a/solver/dfl_projector.h b/src/lib/solver/dfl_projector.h
similarity index 100%
rename from solver/dfl_projector.h
rename to src/lib/solver/dfl_projector.h
diff --git a/solver/diagonalise_general_matrix.c b/src/lib/solver/diagonalise_general_matrix.c
similarity index 99%
rename from solver/diagonalise_general_matrix.c
rename to src/lib/solver/diagonalise_general_matrix.c
index 0667da9aa..9fb989da6 100644
--- a/solver/diagonalise_general_matrix.c
+++ b/src/lib/solver/diagonalise_general_matrix.c
@@ -70,7 +70,7 @@ void diagonalise_general_matrix(int n, _Complex double *A, int lda, _Complex dou
 
   /* Query call to get the optimal lwork */
   lwork = -1;
-#ifdef HAVE_LAPACK
+#ifdef TM_LAPACK
   _FT(zgeevx)("N", "N", "V", "N", &n, A, &lda, evalues, vl, &n, vr, &n, &ilo, &ihi, scale, &abnrm,
               rcone, rconv, &dummy, &lwork, rwork, &info, 1, 1, 1, 1);
   lwork = (int)(creal(dummy));
diff --git a/solver/diagonalise_general_matrix.h b/src/lib/solver/diagonalise_general_matrix.h
similarity index 100%
rename from solver/diagonalise_general_matrix.h
rename to src/lib/solver/diagonalise_general_matrix.h
diff --git a/solver/dirac_operator_eigenvectors.c b/src/lib/solver/dirac_operator_eigenvectors.c
similarity index 99%
rename from solver/dirac_operator_eigenvectors.c
rename to src/lib/solver/dirac_operator_eigenvectors.c
index 42e85d198..845d5aedc 100644
--- a/solver/dirac_operator_eigenvectors.c
+++ b/src/lib/solver/dirac_operator_eigenvectors.c
@@ -28,7 +28,7 @@
 #ifdef FFTW
 #include <fftw3.h>
 #endif
-#ifdef _USE_SHMEM
+#ifdef TM_USE_SHMEM
 #include <mpp/shmem.h>
 #endif
 #include <stdlib.h>
@@ -330,7 +330,7 @@ _Complex double calcDDaggerDovEvalue(const int *praw, double kappa, double rho,
 }
 
 void spinor_fft(spinor *spinor_in, spinor *spinor_out, int tt, int ll, unsigned int forward) {
-#ifdef HAVE_FFTW
+#ifdef TM_USE_FFTW
   fftw_plan plan = spinor_fftw_plan(spinor_in, spinor_out, tt, ll, forward, FFTW_WISDOM_ONLY);
   fftw_execute(plan);
 #else
@@ -555,7 +555,7 @@ void spinorPrecWS_Free(spinorPrecWS *ws) {
  */
 
 void eigenvector_Dtm(spinor *spin, double mu, int epsilon, int k, int color, int rawp[4]) {
-#ifdef HAVE_FFTW
+#ifdef TM_USE_FFTW
   fftw_plan p1bw;
 #endif
   int i = 0;
@@ -630,7 +630,7 @@ void eigenvector_Dtm(spinor *spin, double mu, int epsilon, int k, int color, int
 
   _spinor_muleq_real(*phi, 1.0 / sqrt((double)(VOLUME)));
 
-#ifdef HAVE_FFTW
+#ifdef TM_USE_FFTW
   p1bw = spinor_fftw_plan(spin, spin, T, L, 0, FFTW_WISDOM_ONLY);
   fftw_execute(p1bw);
 #endif
@@ -638,7 +638,7 @@ void eigenvector_Dtm(spinor *spin, double mu, int epsilon, int k, int color, int
   /* spinor mulp half phase */
 }
 
-#ifdef HAVE_FFTW
+#ifdef TM_USE_FFTW
 fftw_plan spinor_fftw_plan(const spinor *spinor_in, spinor *spinor_out, int T, int ll,
                            unsigned int forward, int fftw_flags) {
   /*    int index_s = gsi(get_index(it, ix, iy, iz, tt, ll)); */
@@ -760,13 +760,13 @@ void spinorPrecondition(spinor *spinor_out, const spinor *spinor_in, spinorPrecW
   spinor phi_plus;
   double OOVOL = 1. / (double)(VOLUME);
 
-#ifdef HAVE_FFTW
+#ifdef TM_USE_FFTW
   fftw_plan plan_fw;
   fftw_plan plan_bw;
 #endif
 
   if (autofft == 1) {
-#ifdef HAVE_FFTW
+#ifdef TM_USE_FFTW
     /*     spinor_mulp_half_phase(spinor_out,spinor_in,ws->c_table, ws->s_table,1,1.); */
     plan_fw = spinor_fftw_plan(spinor_in, spinor_out, tt, ll, 1 /* = true */, FFTW_WISDOM_ONLY);
     fftw_execute(plan_fw);
@@ -889,7 +889,7 @@ void spinorPrecondition(spinor *spinor_out, const spinor *spinor_in, spinorPrecW
   }
 
   if (autofft == 1) {
-#ifdef HAVE_FFTW
+#ifdef TM_USE_FFTW
     plan_bw = spinor_fftw_plan(spinor_out, spinor_out, tt, LX, 0, FFTW_WISDOM_ONLY);
     fftw_execute(plan_bw);
 #endif
@@ -1292,7 +1292,7 @@ void spinor_mulp_half_phase(spinor *spinor_out, const spinor *spinor_in, double
  * loading and storing of fftw wisdoms
  */
 
-#ifdef HAVE_FFTW
+#ifdef TM_USE_FFTW
 void loadFFTWWisdom(spinor *spinor_in, spinor *spinor_out, int tt, int ll) {
   /*   ostringstream filename_fftw_wisdom; */
   /*   filename_fftw_wisdom << "fftw_wisdom_" << setw(2) << setfill('0') << T << "x"<< setw(2) <<
@@ -2050,7 +2050,7 @@ void calculateDiagFalloffElements(const int op_id) {
   if (g_precWS == NULL) {
     /* we are going to need fft*/
 
-#ifdef HAVE_FFTW
+#ifdef TM_USE_FFTW
     loadFFTWWisdom(g_spinor_field[0], g_spinor_field[1], T, LX);
 #endif
   }
diff --git a/solver/dirac_operator_eigenvectors.h b/src/lib/solver/dirac_operator_eigenvectors.h
similarity index 99%
rename from solver/dirac_operator_eigenvectors.h
rename to src/lib/solver/dirac_operator_eigenvectors.h
index 1ebe2ce71..b10a86312 100644
--- a/solver/dirac_operator_eigenvectors.h
+++ b/src/lib/solver/dirac_operator_eigenvectors.h
@@ -24,7 +24,7 @@
 #ifdef HAVE_CONFIG_H
 #include "tmlqcd_config.h"
 #endif
-#ifdef HAVE_FFTW
+#ifdef TM_USE_FFTW
 #include <fftw3.h>
 #endif
 
@@ -68,7 +68,7 @@ extern tm_operator PRECWSOPERATORSELECT[14];
 /* */
 extern double g_prec_sequence_d_dagger_d[3];
 
-#ifdef HAVE_FFTW
+#ifdef TM_USE_FFTW
 fftw_plan spinor_fftw_plan(const spinor *spinor_in, spinor *spinor_out, int tt, int ll,
                            unsigned int forward, int fftw_flags);
 #endif
@@ -170,7 +170,7 @@ void spinor_mulp_half_phase(spinor *spinor_out, const spinor *spinor_in, double
  * read and write fftw wisdoms
  * this is supposed to speed up things
  */
-#ifdef HAVE_FFTW
+#ifdef TM_USE_FFTW
 void writeFFTWWisdom(int tt, int ll);
 void loadFFTWWisdom(spinor *spinor_in, spinor *spinor_out, int tt, int ll);
 #endif
diff --git a/solver/eigcg.c b/src/lib/solver/eigcg.c
similarity index 100%
rename from solver/eigcg.c
rename to src/lib/solver/eigcg.c
diff --git a/solver/eigcg.h b/src/lib/solver/eigcg.h
similarity index 100%
rename from solver/eigcg.h
rename to src/lib/solver/eigcg.h
diff --git a/solver/eigenvalues.c b/src/lib/solver/eigenvalues.c
similarity index 99%
rename from solver/eigenvalues.c
rename to src/lib/solver/eigenvalues.c
index 1725387d0..4d8d08887 100644
--- a/solver/eigenvalues.c
+++ b/src/lib/solver/eigenvalues.c
@@ -68,7 +68,7 @@ double eigenvalues(int *nr_of_eigenvalues, const int max_iterations, const doubl
                    const int even_odd_flag) {
   double returnvalue;
   _Complex double norm2;
-#ifdef HAVE_LAPACK
+#ifdef TM_LAPACK
   static int allocated = 0;
   char filename[200];
   FILE *ofs;
diff --git a/solver/eigenvalues.h b/src/lib/solver/eigenvalues.h
similarity index 100%
rename from solver/eigenvalues.h
rename to src/lib/solver/eigenvalues.h
diff --git a/solver/eigenvalues_bi.c b/src/lib/solver/eigenvalues_bi.c
similarity index 100%
rename from solver/eigenvalues_bi.c
rename to src/lib/solver/eigenvalues_bi.c
diff --git a/solver/eigenvalues_bi.h b/src/lib/solver/eigenvalues_bi.h
similarity index 100%
rename from solver/eigenvalues_bi.h
rename to src/lib/solver/eigenvalues_bi.h
diff --git a/solver/fgmres.c b/src/lib/solver/fgmres.c
similarity index 99%
rename from solver/fgmres.c
rename to src/lib/solver/fgmres.c
index 60d10fa72..154428124 100644
--- a/solver/fgmres.c
+++ b/src/lib/solver/fgmres.c
@@ -85,7 +85,7 @@ int fgmres(spinor *const P, spinor *const Q, const int m, const int max_restarts
   atime = gettime();
   cumiter_lgcr = 0;
   if (N == VOLUME) {
-    init_solver_field(&solver_field, VOLUMEPLUSRAND, nr_sf); /* #ifdef HAVE_LAPACK */
+    init_solver_field(&solver_field, VOLUMEPLUSRAND, nr_sf); /* #ifdef TM_LAPACK */
   } else {
     init_solver_field(&solver_field, VOLUMEPLUSRAND / 2, nr_sf);
   }
diff --git a/solver/fgmres.h b/src/lib/solver/fgmres.h
similarity index 100%
rename from solver/fgmres.h
rename to src/lib/solver/fgmres.h
diff --git a/solver/fgmres4complex.c b/src/lib/solver/fgmres4complex.c
similarity index 96%
rename from solver/fgmres4complex.c
rename to src/lib/solver/fgmres4complex.c
index 5d77f3ab0..83b8a72c1 100644
--- a/solver/fgmres4complex.c
+++ b/src/lib/solver/fgmres4complex.c
@@ -53,7 +53,7 @@
 #define _PSWITCH(s) s
 #define _F_TYPE double
 
-#include "fgmres4complex_body.c"
+#include "fgmres4complex_body.inc"
 
 #undef _PSWITCH
 #undef _F_TYPE
@@ -61,7 +61,7 @@
 #define _PSWITCH(s) s##_32
 #define _F_TYPE float
 
-#include "fgmres4complex_body.c"
+#include "fgmres4complex_body.inc"
 
 #undef _PSWITCH
 #undef _F_TYPE
diff --git a/solver/fgmres4complex.h b/src/lib/solver/fgmres4complex.h
similarity index 100%
rename from solver/fgmres4complex.h
rename to src/lib/solver/fgmres4complex.h
diff --git a/solver/fgmres4complex_body.c b/src/lib/solver/fgmres4complex_body.inc
similarity index 99%
rename from solver/fgmres4complex_body.c
rename to src/lib/solver/fgmres4complex_body.inc
index b11528c58..1f6fa9c89 100644
--- a/solver/fgmres4complex_body.c
+++ b/src/lib/solver/fgmres4complex_body.inc
@@ -57,7 +57,7 @@ int _PSWITCH(fgmres4complex)(_Complex _F_TYPE *const P, _Complex _F_TYPE *const
   int fltcntr = 0;
   double alphasave = 0;
 
-  _PSWITCH(init_lsolver_field)(&solver_field, /*why not N?*/ lda, nr_sf); /* #ifdef HAVE_LAPACK */
+  _PSWITCH(init_lsolver_field)(&solver_field, /*why not N?*/ lda, nr_sf); /* #ifdef TM_LAPACK */
 
   eps = sqrt(eps_sq);
   _PSWITCH(init_lgmres)(m, lda);
diff --git a/solver/gcr.c b/src/lib/solver/gcr.c
similarity index 100%
rename from solver/gcr.c
rename to src/lib/solver/gcr.c
diff --git a/solver/gcr.h b/src/lib/solver/gcr.h
similarity index 100%
rename from solver/gcr.h
rename to src/lib/solver/gcr.h
diff --git a/solver/gcr4complex.c b/src/lib/solver/gcr4complex.c
similarity index 96%
rename from solver/gcr4complex.c
rename to src/lib/solver/gcr4complex.c
index e6019f404..4d394cd7f 100644
--- a/solver/gcr4complex.c
+++ b/src/lib/solver/gcr4complex.c
@@ -41,7 +41,7 @@
 #define _C_TYPE _Complex double
 #define _F_TYPE double
 
-#include "gcr4complex_body.c"
+#include "gcr4complex_body.inc"
 
 #undef _PSWITCH
 #undef _PTSWITCH
@@ -53,7 +53,7 @@
 #define _C_TYPE _Complex float
 #define _F_TYPE float
 
-#include "gcr4complex_body.c"
+#include "gcr4complex_body.inc"
 
 #undef _PSWITCH
 #undef _PTSWITCH
diff --git a/solver/gcr4complex.h b/src/lib/solver/gcr4complex.h
similarity index 100%
rename from solver/gcr4complex.h
rename to src/lib/solver/gcr4complex.h
diff --git a/solver/gcr4complex_body.h b/src/lib/solver/gcr4complex_body.h
similarity index 100%
rename from solver/gcr4complex_body.h
rename to src/lib/solver/gcr4complex_body.h
diff --git a/solver/gcr4complex_body.c b/src/lib/solver/gcr4complex_body.inc
similarity index 100%
rename from solver/gcr4complex_body.c
rename to src/lib/solver/gcr4complex_body.inc
diff --git a/solver/generate_dfl_subspace.c b/src/lib/solver/generate_dfl_subspace.c
similarity index 100%
rename from solver/generate_dfl_subspace.c
rename to src/lib/solver/generate_dfl_subspace.c
diff --git a/solver/generate_dfl_subspace.h b/src/lib/solver/generate_dfl_subspace.h
similarity index 100%
rename from solver/generate_dfl_subspace.h
rename to src/lib/solver/generate_dfl_subspace.h
diff --git a/solver/gmres.c b/src/lib/solver/gmres.c
similarity index 100%
rename from solver/gmres.c
rename to src/lib/solver/gmres.c
diff --git a/solver/gmres.h b/src/lib/solver/gmres.h
similarity index 100%
rename from solver/gmres.h
rename to src/lib/solver/gmres.h
diff --git a/solver/gmres_dr.c b/src/lib/solver/gmres_dr.c
similarity index 99%
rename from solver/gmres_dr.c
rename to src/lib/solver/gmres_dr.c
index 4b9f429e0..781b32d86 100644
--- a/solver/gmres_dr.c
+++ b/src/lib/solver/gmres_dr.c
@@ -54,7 +54,7 @@
 #include "solver/solver_field.h"
 #include "su3.h"
 
-#ifndef HAVE_LAPACK
+#ifndef TM_LAPACK
 /* In case there is no lapack use normal gmres */
 int gmres_dr(spinor* const P, spinor* const Q, const int m, const int nr_ev, const int max_restarts,
              const double eps_sq, const int rel_prec, const int N, matrix_mult f) {
diff --git a/solver/gmres_dr.h b/src/lib/solver/gmres_dr.h
similarity index 100%
rename from solver/gmres_dr.h
rename to src/lib/solver/gmres_dr.h
diff --git a/solver/gmres_precon.c b/src/lib/solver/gmres_precon.c
similarity index 100%
rename from solver/gmres_precon.c
rename to src/lib/solver/gmres_precon.c
diff --git a/solver/gmres_precon.h b/src/lib/solver/gmres_precon.h
similarity index 100%
rename from solver/gmres_precon.h
rename to src/lib/solver/gmres_precon.h
diff --git a/solver/gram-schmidt.c b/src/lib/solver/gram-schmidt.c
similarity index 98%
rename from solver/gram-schmidt.c
rename to src/lib/solver/gram-schmidt.c
index 1e8da1d24..4c2ee4310 100644
--- a/solver/gram-schmidt.c
+++ b/src/lib/solver/gram-schmidt.c
@@ -26,7 +26,7 @@
 #include "linalg/blas.h"
 #include "linalg_eo.h"
 #include "su3spinor.h"
-#ifdef CRAY
+#ifdef TM_CRAY
 #include <fortran.h>
 #endif
 #include "gram-schmidt.h"
@@ -62,7 +62,7 @@ void IteratedClassicalGS(_Complex double v[], double *vnrm, int n, int m, _Compl
       work1[j] = scalar_prod((spinor *)(A + j * lda), (spinor *)v,
                              n * sizeof(_Complex double) / sizeof(spinor), 1);
     }
-#ifdef HAVE_LAPACK
+#ifdef TM_LAPACK
     _FT(zgemv)(fupl_n, &n, &m, &CMONE, A, &lda, work1, &ONE, &CONE, v, &ONE, 1);
 #endif
     (*vnrm) = sqrt(square_norm((spinor *)v, n * sizeof(_Complex double) / sizeof(spinor), 1));
@@ -75,7 +75,6 @@ void IteratedClassicalGS(_Complex double v[], double *vnrm, int n, int m, _Compl
   }
 }
 
-
 /*
  *  ModifiedGramSchmidt
  *
@@ -90,7 +89,7 @@ void ModifiedGS(_Complex double v[], int n, int m, _Complex double A[], int lda)
     s = scalar_prod((spinor *)(A + i * lda), (spinor *)v,
                     n * sizeof(_Complex double) / sizeof(spinor), 1);
     s = -s;
-#ifdef HAVE_LAPACK
+#ifdef TM_LAPACK
     _FT(zaxpy)(&n, &s, A + i * lda, &ONE, v, &ONE);
 #endif
   }
diff --git a/solver/gram-schmidt.h b/src/lib/solver/gram-schmidt.h
similarity index 100%
rename from solver/gram-schmidt.h
rename to src/lib/solver/gram-schmidt.h
diff --git a/solver/incr_eigcg.c b/src/lib/solver/incr_eigcg.c
similarity index 100%
rename from solver/incr_eigcg.c
rename to src/lib/solver/incr_eigcg.c
diff --git a/solver/incr_eigcg.h b/src/lib/solver/incr_eigcg.h
similarity index 100%
rename from solver/incr_eigcg.h
rename to src/lib/solver/incr_eigcg.h
diff --git a/solver/index_jd.c b/src/lib/solver/index_jd.c
similarity index 100%
rename from solver/index_jd.c
rename to src/lib/solver/index_jd.c
diff --git a/solver/index_jd.h b/src/lib/solver/index_jd.h
similarity index 100%
rename from solver/index_jd.h
rename to src/lib/solver/index_jd.h
diff --git a/solver/init_guess.c b/src/lib/solver/init_guess.c
similarity index 100%
rename from solver/init_guess.c
rename to src/lib/solver/init_guess.c
diff --git a/solver/init_guess.h b/src/lib/solver/init_guess.h
similarity index 100%
rename from solver/init_guess.h
rename to src/lib/solver/init_guess.h
diff --git a/solver/jdher.c b/src/lib/solver/jdher.c
similarity index 99%
rename from solver/jdher.c
rename to src/lib/solver/jdher.c
index c3d874781..2ed9b2246 100644
--- a/solver/jdher.c
+++ b/src/lib/solver/jdher.c
@@ -127,8 +127,8 @@ void jdher(int n, int lda, double tau, double tol, int kmax, int jmax, int jmin,
    * initialize with NULL, so we can free even unallocated ptrs */
   double *s = NULL, *resnrm = NULL, *resnrm_old = NULL, *dtemp = NULL, *rwork = NULL;
 
-  _Complex double *V_ = NULL, *V, *Vtmp = NULL, *U = NULL, *M = NULL, *Z = NULL, *Res_ = NULL, *Res,
-                  *eigwork = NULL, *temp1_ = NULL, *temp1;
+  _Complex double *V = NULL, *Vtmp = NULL, *U = NULL, *M = NULL, *Z = NULL, *Res, *eigwork = NULL,
+                  *temp1_ = NULL, *temp1;
 
   int *idx1 = NULL, *idx2 = NULL, *convind = NULL, *keepind = NULL, *solvestep = NULL,
       *actcorrits = NULL;
diff --git a/solver/jdher.h b/src/lib/solver/jdher.h
similarity index 100%
rename from solver/jdher.h
rename to src/lib/solver/jdher.h
diff --git a/solver/jdher_bi.c b/src/lib/solver/jdher_bi.c
similarity index 100%
rename from solver/jdher_bi.c
rename to src/lib/solver/jdher_bi.c
diff --git a/solver/jdher_bi.h b/src/lib/solver/jdher_bi.h
similarity index 100%
rename from solver/jdher_bi.h
rename to src/lib/solver/jdher_bi.h
diff --git a/solver/little_mg_precon_body.c b/src/lib/solver/little_mg_precon_body.inc
similarity index 100%
rename from solver/little_mg_precon_body.c
rename to src/lib/solver/little_mg_precon_body.inc
diff --git a/solver/little_project_eo_body.c b/src/lib/solver/little_project_eo_body.inc
similarity index 100%
rename from solver/little_project_eo_body.c
rename to src/lib/solver/little_project_eo_body.inc
diff --git a/solver/lu_solve.c b/src/lib/solver/lu_solve.c
similarity index 100%
rename from solver/lu_solve.c
rename to src/lib/solver/lu_solve.c
diff --git a/solver/lu_solve.h b/src/lib/solver/lu_solve.h
similarity index 100%
rename from solver/lu_solve.h
rename to src/lib/solver/lu_solve.h
diff --git a/solver/matrix_mult_typedef.h b/src/lib/solver/matrix_mult_typedef.h
similarity index 100%
rename from solver/matrix_mult_typedef.h
rename to src/lib/solver/matrix_mult_typedef.h
diff --git a/solver/matrix_mult_typedef_bi.h b/src/lib/solver/matrix_mult_typedef_bi.h
similarity index 100%
rename from solver/matrix_mult_typedef_bi.h
rename to src/lib/solver/matrix_mult_typedef_bi.h
diff --git a/solver/matrix_mult_typedef_nd.h b/src/lib/solver/matrix_mult_typedef_nd.h
similarity index 100%
rename from solver/matrix_mult_typedef_nd.h
rename to src/lib/solver/matrix_mult_typedef_nd.h
diff --git a/solver/mcr.c b/src/lib/solver/mcr.c
similarity index 99%
rename from solver/mcr.c
rename to src/lib/solver/mcr.c
index 707181cc2..184fa567f 100644
--- a/solver/mcr.c
+++ b/src/lib/solver/mcr.c
@@ -127,7 +127,7 @@ int mcr(spinor* const P, spinor* const Q, const int m, const int max_restarts, c
         break;
       }
 
-#ifdef _USE_HALFSPINOR
+#ifdef TM_USE_HALFSPINOR
       if (((err * err <= eps_sq) && (rel_prec == 0)) ||
           ((err * err <= eps_sq * norm_sq) && (rel_prec == 1))) {
         if (g_sloppy_precision_flag == 1) {
diff --git a/solver/mcr.h b/src/lib/solver/mcr.h
similarity index 100%
rename from solver/mcr.h
rename to src/lib/solver/mcr.h
diff --git a/solver/mcr4complex.c b/src/lib/solver/mcr4complex.c
similarity index 100%
rename from solver/mcr4complex.c
rename to src/lib/solver/mcr4complex.c
diff --git a/solver/mcr4complex.h b/src/lib/solver/mcr4complex.h
similarity index 100%
rename from solver/mcr4complex.h
rename to src/lib/solver/mcr4complex.h
diff --git a/solver/mixed_cg_her.c b/src/lib/solver/mixed_cg_her.c
similarity index 100%
rename from solver/mixed_cg_her.c
rename to src/lib/solver/mixed_cg_her.c
diff --git a/solver/mixed_cg_her.h b/src/lib/solver/mixed_cg_her.h
similarity index 100%
rename from solver/mixed_cg_her.h
rename to src/lib/solver/mixed_cg_her.h
diff --git a/solver/mixed_cg_mms_tm_nd.c b/src/lib/solver/mixed_cg_mms_tm_nd.c
similarity index 100%
rename from solver/mixed_cg_mms_tm_nd.c
rename to src/lib/solver/mixed_cg_mms_tm_nd.c
diff --git a/solver/mixed_cg_mms_tm_nd.h b/src/lib/solver/mixed_cg_mms_tm_nd.h
similarity index 100%
rename from solver/mixed_cg_mms_tm_nd.h
rename to src/lib/solver/mixed_cg_mms_tm_nd.h
diff --git a/solver/monomial_solve.c b/src/lib/solver/monomial_solve.c
similarity index 99%
rename from solver/monomial_solve.c
rename to src/lib/solver/monomial_solve.c
index 94873079f..0e73e9b0d 100644
--- a/solver/monomial_solve.c
+++ b/src/lib/solver/monomial_solve.c
@@ -77,7 +77,7 @@
 #include "solver/solver_params.h"
 #include "solver/solver_types.h"
 
-#ifdef DDalphaAMG
+#ifdef TM_USE_DDalphaAMG
 #include "DDalphaAMG_interface.h"
 #endif
 #ifdef TM_USE_QPHIX
@@ -184,7 +184,7 @@ int solve_degenerate(spinor* const P, spinor* const Q, solver_params_t solver_pa
   } else if (solver_type == BICGSTAB) {
     iteration_count = bicgstab_complex(P, Q, max_iter, eps_sq, rel_prec, N, f);
   }
-#ifdef DDalphaAMG
+#ifdef TM_USE_DDalphaAMG
   else if (solver_type == MG)
     iteration_count = MG_solver(P, Q, eps_sq, max_iter, rel_prec, N, g_gauge_field, f);
 #endif
@@ -283,7 +283,7 @@ int solve_mms_tm(spinor** const P, spinor* const Q, solver_params_t* solver_para
       if (solver_params->type == CGMMS) {
     iteration_count = cg_mms_tm(P, Q, solver_params);
   }
-#ifdef DDalphaAMG
+#ifdef TM_USE_DDalphaAMG
   else if (solver_params->type == MG) {
     // if the mg_mms_mass is larger than the smallest shift we use MG
     if (mg_no_shifts > 0 || mg_mms_mass >= solver_params->shifts[0]) {
@@ -507,7 +507,7 @@ int solve_mms_nd(spinor** const Pup, spinor** const Pdn, spinor* const Qup, spin
     } else if (solver_params->type == CGMMSND) {
       iteration_count = cg_mms_tm_nd(Pup, Pdn, Qup, Qdn, solver_params);
     }
-#ifdef DDalphaAMG
+#ifdef TM_USE_DDalphaAMG
     else if (solver_params->type == MG) {
       // if the mg_mms_mass is larger than the smallest shift we use MG
       if (mg_no_shifts > 0 || mg_mms_mass >= solver_params->shifts[0]) {
@@ -691,7 +691,7 @@ int solve_mms_nd_plus(spinor** const Pup, spinor** const Pdn, spinor* const Qup,
 
   int iteration_count = 0;
 
-#ifdef DDalphaAMG
+#ifdef TM_USE_DDalphaAMG
   // With MG we can solve directly the unsquared operator
   if (solver_params->type == MG) {
     matrix_mult_nd f = Qtm_tau1_ndpsi_add_Ishift;
diff --git a/solver/monomial_solve.h b/src/lib/solver/monomial_solve.h
similarity index 100%
rename from solver/monomial_solve.h
rename to src/lib/solver/monomial_solve.h
diff --git a/solver/mr.c b/src/lib/solver/mr.c
similarity index 98%
rename from solver/mr.c
rename to src/lib/solver/mr.c
index db6a60eb1..fed20f40b 100644
--- a/solver/mr.c
+++ b/src/lib/solver/mr.c
@@ -111,7 +111,7 @@ int mr(spinor* const P, spinor* const Q, const int max_iter, const double eps_sq
 #define _PSWITCH(s) s
 #define _PTSWITCH(s) s
 
-#include "mrblk_body.c"
+#include "mrblk_body.inc"
 
 #undef _F_TYPE
 #undef _C_TYPE
@@ -123,7 +123,7 @@ int mr(spinor* const P, spinor* const Q, const int max_iter, const double eps_sq
 #define _PSWITCH(s) s##_32
 #define _PTSWITCH(s) s##32
 
-#include "mrblk_body.c"
+#include "mrblk_body.inc"
 
 #undef _F_TYPE
 #undef _C_TYPE
diff --git a/solver/mr.h b/src/lib/solver/mr.h
similarity index 100%
rename from solver/mr.h
rename to src/lib/solver/mr.h
diff --git a/solver/mr4complex.c b/src/lib/solver/mr4complex.c
similarity index 100%
rename from solver/mr4complex.c
rename to src/lib/solver/mr4complex.c
diff --git a/solver/mr4complex.h b/src/lib/solver/mr4complex.h
similarity index 100%
rename from solver/mr4complex.h
rename to src/lib/solver/mr4complex.h
diff --git a/solver/mrblk_body.c b/src/lib/solver/mrblk_body.inc
similarity index 100%
rename from solver/mrblk_body.c
rename to src/lib/solver/mrblk_body.inc
diff --git a/solver/ortho.c b/src/lib/solver/ortho.c
similarity index 100%
rename from solver/ortho.c
rename to src/lib/solver/ortho.c
diff --git a/solver/ortho.h b/src/lib/solver/ortho.h
similarity index 100%
rename from solver/ortho.h
rename to src/lib/solver/ortho.h
diff --git a/solver/pcg_her.c b/src/lib/solver/pcg_her.c
similarity index 100%
rename from solver/pcg_her.c
rename to src/lib/solver/pcg_her.c
diff --git a/solver/pcg_her.h b/src/lib/solver/pcg_her.h
similarity index 100%
rename from solver/pcg_her.h
rename to src/lib/solver/pcg_her.h
diff --git a/solver/poly_precon.c b/src/lib/solver/poly_precon.c
similarity index 100%
rename from solver/poly_precon.c
rename to src/lib/solver/poly_precon.c
diff --git a/solver/poly_precon.h b/src/lib/solver/poly_precon.h
similarity index 100%
rename from solver/poly_precon.h
rename to src/lib/solver/poly_precon.h
diff --git a/solver/quicksort.c b/src/lib/solver/quicksort.c
similarity index 100%
rename from solver/quicksort.c
rename to src/lib/solver/quicksort.c
diff --git a/solver/quicksort.h b/src/lib/solver/quicksort.h
similarity index 100%
rename from solver/quicksort.h
rename to src/lib/solver/quicksort.h
diff --git a/solver/restart_X.c b/src/lib/solver/restart_X.c
similarity index 100%
rename from solver/restart_X.c
rename to src/lib/solver/restart_X.c
diff --git a/solver/restart_X.h b/src/lib/solver/restart_X.h
similarity index 100%
rename from solver/restart_X.h
rename to src/lib/solver/restart_X.h
diff --git a/solver/rg_mixed_cg_her.c b/src/lib/solver/rg_mixed_cg_her.c
similarity index 100%
rename from solver/rg_mixed_cg_her.c
rename to src/lib/solver/rg_mixed_cg_her.c
diff --git a/solver/rg_mixed_cg_her.h b/src/lib/solver/rg_mixed_cg_her.h
similarity index 100%
rename from solver/rg_mixed_cg_her.h
rename to src/lib/solver/rg_mixed_cg_her.h
diff --git a/solver/rg_mixed_cg_her_nd.c b/src/lib/solver/rg_mixed_cg_her_nd.c
similarity index 100%
rename from solver/rg_mixed_cg_her_nd.c
rename to src/lib/solver/rg_mixed_cg_her_nd.c
diff --git a/solver/rg_mixed_cg_her_nd.h b/src/lib/solver/rg_mixed_cg_her_nd.h
similarity index 100%
rename from solver/rg_mixed_cg_her_nd.h
rename to src/lib/solver/rg_mixed_cg_her_nd.h
diff --git a/solver/rg_mixed_cg_typedef.h b/src/lib/solver/rg_mixed_cg_typedef.h
similarity index 100%
rename from solver/rg_mixed_cg_typedef.h
rename to src/lib/solver/rg_mixed_cg_typedef.h
diff --git a/solver/solver.h b/src/lib/solver/solver.h
similarity index 100%
rename from solver/solver.h
rename to src/lib/solver/solver.h
diff --git a/solver/solver_field.c b/src/lib/solver/solver_field.c
similarity index 96%
rename from solver/solver_field.c
rename to src/lib/solver/solver_field.c
index 1cfd06515..5644a4cae 100644
--- a/solver/solver_field.c
+++ b/src/lib/solver/solver_field.c
@@ -37,7 +37,7 @@ int init_solver_field(spinor*** const solver_field, const int V, const int nr) {
   }
 
   /* allocate the full chunk of memory to solver_field[nr] */
-#if (defined _USE_SHMEM && !(defined _USE_HALFSPINOR))
+#if (defined TM_USE_SHMEM && !(defined TM_USE_HALFSPINOR))
   if ((void*)((*solver_field)[nr] = (spinor*)shmalloc((nr * V + 1) * sizeof(spinor))) == NULL) {
     fprintf(stderr, "malloc errno in init_solver_field: %d\n", errno);
     errno = 0;
@@ -74,7 +74,7 @@ int init_solver_field_32(spinor32*** const solver_field, const int V, const int
   }
 
   /* allocate the full chunk of memory to solver_field[nr] */
-#if (defined _USE_SHMEM && !(defined _USE_HALFSPINOR))
+#if (defined TM_USE_SHMEM && !(defined TM_USE_HALFSPINOR))
   if ((void*)((*solver_field)[nr] = (spinor32*)shmalloc((nr * V + 1) * sizeof(spinor32))) == NULL) {
     fprintf(stderr, "malloc errno in init_solver_field: %d\n", errno);
     errno = 0;
@@ -143,7 +143,7 @@ int init_lsolver_field(_Complex double*** const solver_field, const int V, const
   }
 
   /* allocate the full chunk of memory to solver_field[nr] */
-#if (defined _USE_SHMEM && !(defined _USE_HALFSPINOR))
+#if (defined TM_USE_SHMEM && !(defined TM_USE_HALFSPINOR))
   if ((void*)((*solver_field)[nr] =
                   (_Complex double*)shmalloc((nr * V + 1) * sizeof(_Complex double))) == NULL) {
     fprintf(stderr, "malloc errno in init_solver_field: %d\n", errno);
@@ -184,7 +184,7 @@ int init_lsolver_field_32(_Complex float*** const solver_field, const int V, con
   }
 
   /* allocate the full chunk of memory to solver_field[nr] */
-#if (defined _USE_SHMEM && !(defined _USE_HALFSPINOR))
+#if (defined TM_USE_SHMEM && !(defined TM_USE_HALFSPINOR))
   if ((void*)((*solver_field)[nr] =
                   (_Complex float*)shmalloc((nr * V + 1) * sizeof(_Complex float))) == NULL) {
     fprintf(stderr, "malloc errno in init_solver_field: %d\n", errno);
diff --git a/solver/solver_field.h b/src/lib/solver/solver_field.h
similarity index 100%
rename from solver/solver_field.h
rename to src/lib/solver/solver_field.h
diff --git a/solver/solver_params.h b/src/lib/solver/solver_params.h
similarity index 100%
rename from solver/solver_params.h
rename to src/lib/solver/solver_params.h
diff --git a/solver/solver_types.c b/src/lib/solver/solver_types.c
similarity index 100%
rename from solver/solver_types.c
rename to src/lib/solver/solver_types.c
diff --git a/solver/solver_types.h b/src/lib/solver/solver_types.h
similarity index 100%
rename from solver/solver_types.h
rename to src/lib/solver/solver_types.h
diff --git a/solver/sub_low_ev.c b/src/lib/solver/sub_low_ev.c
similarity index 100%
rename from solver/sub_low_ev.c
rename to src/lib/solver/sub_low_ev.c
diff --git a/solver/sub_low_ev.h b/src/lib/solver/sub_low_ev.h
similarity index 100%
rename from solver/sub_low_ev.h
rename to src/lib/solver/sub_low_ev.h
diff --git a/solver/sumr.c b/src/lib/solver/sumr.c
similarity index 100%
rename from solver/sumr.c
rename to src/lib/solver/sumr.c
diff --git a/solver/sumr.h b/src/lib/solver/sumr.h
similarity index 100%
rename from solver/sumr.h
rename to src/lib/solver/sumr.h
diff --git a/source_generation.c b/src/lib/source_generation.c
similarity index 100%
rename from source_generation.c
rename to src/lib/source_generation.c
diff --git a/source_generation.h b/src/lib/source_generation.h
similarity index 100%
rename from source_generation.h
rename to src/lib/source_generation.h
diff --git a/spinor_fft.c b/src/lib/spinor_fft.c
similarity index 98%
rename from spinor_fft.c
rename to src/lib/spinor_fft.c
index fb101d269..54ece4bda 100644
--- a/spinor_fft.c
+++ b/src/lib/spinor_fft.c
@@ -22,7 +22,7 @@
 #include "mpi_init.h"
 #include "spinor_fft.h"
 
-#ifdef HAVE_FFTW
+#ifdef TM_USE_FFTW
 #include <fftw3.h>
 #endif
 
@@ -35,7 +35,7 @@ void check_mpi_comm_membership(MPI_Comm commself, MPI_Comm commcheck, const char
                                const char *name_b, FILE *logFile);
 #endif
 
-#ifdef HAVE_FFTW
+#ifdef TM_USE_FFTW
 fftw_plan spinor_fftw_plan2d(spinor *spinor_in, spinor *spinor_out, int dim0, int dim1, int howmany,
                              unsigned int forward, int fftw_flags);
 #endif
@@ -50,7 +50,7 @@ void spinor_fft_transpose_xp_t(spinor *fieldout, spinor *fieldin, int dim0, int
 void spinor_fft_reduce_2d(spinor *localSpinorField, int *collectionRank, spinor ***field_collection,
                           spinor **membuff) {
   /* this implementation is intended for four dimensional parallelisation */
-#if (defined PARALLELXYZT && defined TM_USE_MPI && defined HAVE_FFTW)
+#if (defined TM_PARALLELXYZT && defined TM_USE_MPI && defined TM_USE_FFTW)
 
   int sendRecvCoord[4];
   int i;
@@ -195,7 +195,7 @@ void spinor_fft_reduce_2d(spinor *localSpinorField, int *collectionRank, spinor
 void spinor_fft_redist_2d(spinor *localSpinorField, int collectionRank, spinor **field_collection,
                           spinor *membuff) {
   /* this implementation is intended for four dimensional parallelisation */
-#if (defined PARALLELXYZT && defined TM_USE_MPI && defined HAVE_FFTW)
+#if (defined TM_PARALLELXYZT && defined TM_USE_MPI && defined TM_USE_FFTW)
 
   int sendRecvCoord[4];
   int dims[] = {g_nproc_t, g_nproc_x, g_nproc_y, g_nproc_z};
@@ -326,7 +326,7 @@ void spinor_fft_redist_2d(spinor *localSpinorField, int collectionRank, spinor *
 #endif
 }
 
-#ifdef HAVE_FFTW
+#ifdef TM_USE_FFTW
 fftw_plan spinor_fftw_plan2d(spinor *spinor_in, spinor *spinor_out, int dim0, int dim1,
                              int howmany_wospin, unsigned int forward, int fftw_flags) {
   /*    int index_s = gsi(get_index(it, ix, iy, iz, T, L)); */
diff --git a/spinor_fft.h b/src/lib/spinor_fft.h
similarity index 100%
rename from spinor_fft.h
rename to src/lib/spinor_fft.h
diff --git a/start.c b/src/lib/start.c
similarity index 100%
rename from start.c
rename to src/lib/start.c
diff --git a/start.h b/src/lib/start.h
similarity index 100%
rename from start.h
rename to src/lib/start.h
diff --git a/struct_accessors.h b/src/lib/struct_accessors.h
similarity index 100%
rename from struct_accessors.h
rename to src/lib/struct_accessors.h
diff --git a/su3.h b/src/lib/su3.h
similarity index 100%
rename from su3.h
rename to src/lib/su3.h
diff --git a/su3adj.h b/src/lib/su3adj.h
similarity index 100%
rename from su3adj.h
rename to src/lib/su3adj.h
diff --git a/su3spinor.h b/src/lib/su3spinor.h
similarity index 100%
rename from su3spinor.h
rename to src/lib/su3spinor.h
diff --git a/tensors.h b/src/lib/tensors.h
similarity index 100%
rename from tensors.h
rename to src/lib/tensors.h
diff --git a/test/check_geometry.c b/src/lib/test/check_geometry.c
similarity index 98%
rename from test/check_geometry.c
rename to src/lib/test/check_geometry.c
index 74589a739..20f7acc96 100644
--- a/test/check_geometry.c
+++ b/src/lib/test/check_geometry.c
@@ -90,7 +90,8 @@ int check_geometry() {
           ix = g_ipt[x0][x1][x2][x3];
 
           iy0 = g_iup[ix][0];
-#if (defined PARALLELT || defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELT || defined TM_PARALLELXT || defined TM_PARALLELXYT || \
+     defined TM_PARALLELXYZT)
           if (x0 != T - 1) {
             iz0 = g_ipt[(x0 + 1) % T][x1][x2][x3];
           } else {
@@ -107,7 +108,7 @@ int check_geometry() {
 #endif
 
           iy1 = g_iup[ix][1];
-#if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
           if (x1 != LX - 1) {
             iz1 = g_ipt[x0][(x1 + 1) % LX][x2][x3];
           } else {
@@ -125,7 +126,7 @@ int check_geometry() {
 #endif
 
           iy2 = g_iup[ix][2];
-#if (defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
           if (x2 != LY - 1) {
             iz2 = g_ipt[x0][x1][(x2 + 1) % LY][x3];
           } else {
@@ -145,7 +146,7 @@ int check_geometry() {
 #endif
 
           iy3 = g_iup[ix][3];
-#if defined PARALLELXYZT
+#if defined TM_PARALLELXYZT
           if (x3 != LZ - 1) {
             iz3 = g_ipt[x0][x1][x2][(x3 + 1) % LZ];
           } else {
@@ -176,7 +177,8 @@ int check_geometry() {
           }
 
           iy0 = g_idn[ix][0];
-#if (defined PARALLELT || defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELT || defined TM_PARALLELXT || defined TM_PARALLELXYT || \
+     defined TM_PARALLELXYZT)
           if (x0 != 0) {
             iz0 = g_ipt[(x0 + T - 1) % T][x1][x2][x3];
           } else {
@@ -194,7 +196,7 @@ int check_geometry() {
 #endif
 
           iy1 = g_idn[ix][1];
-#if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
           if (x1 != 0) {
             iz1 = g_ipt[x0][(x1 + LX - 1) % LX][x2][x3];
           } else {
@@ -212,7 +214,7 @@ int check_geometry() {
           iz1 = g_ipt[x0][(x1 + LX - 1) % LX][x2][x3];
 #endif
           iy2 = g_idn[ix][2];
-#if (defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
           if (x2 != 0) {
             iz2 = g_ipt[x0][x1][(x2 + LY - 1) % LY][x3];
           } else {
@@ -231,7 +233,7 @@ int check_geometry() {
 #endif
 
           iy3 = g_idn[ix][3];
-#if defined PARALLELXYZT
+#if defined TM_PARALLELXYZT
           if (x3 != 0) {
             iz3 = g_ipt[x0][x1][x2][(x3 + LZ - 1) % LZ];
           } else {
@@ -262,8 +264,8 @@ int check_geometry() {
           }
 
           /* The edges */
-          /* In case of PARALLELT there is actually no edge to take care of */
-#if ((defined PARALLELXT) || (defined PARALLELXYT) || (defined PARALLELXYZT))
+          /* In case of TM_PARALLELT there is actually no edge to take care of */
+#if ((defined TM_PARALLELXT) || (defined TM_PARALLELXYT) || (defined TM_PARALLELXYZT))
           if (x0 == 0) {
             iy0 = g_idn[g_idn[ix][1]][0];
             if (x1 != 0) {
@@ -318,7 +320,7 @@ int check_geometry() {
 
 #endif
 
-#if (defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
           if (x0 == 0) {
             iy0 = g_idn[g_idn[ix][2]][0];
             if (x2 != 0) {
@@ -421,7 +423,7 @@ int check_geometry() {
             }
           }
 #endif
-#if defined PARALLELXYZT
+#if defined TM_PARALLELXYZT
           if (x0 == 0) {
             iy0 = g_idn[g_idn[ix][3]][0];
             if (x3 != 0) {
@@ -700,7 +702,7 @@ int check_geometry() {
       }
     }
 
-#if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
     for (x0 = 0; x0 < T + 2; x0++) {
       for (x2 = 0; x2 < LY; x2++) {
         for (x3 = 0; x3 < LZ; x3++) {
@@ -827,7 +829,7 @@ int check_geometry() {
     }
 #endif
 
-#if (defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
 
     for (x0 = 0; x0 < T + 2; x0++) {
       for (x1 = 0; x1 < LX + 2; x1++) {
@@ -1027,7 +1029,7 @@ int check_geometry() {
       }
     }
 #endif
-#ifdef PARALLELXYZT
+#ifdef TM_PARALLELXYZT
     for (x0 = 0; x0 < T + 2; x0++) {
       for (x1 = 0; x1 < LX + 2; x1++) {
         for (x2 = 0; x2 < LY + 2; x2++) {
@@ -1554,4 +1556,3 @@ int check_geometry() {
 
   return (0);
 }
-
diff --git a/test/check_geometry.h b/src/lib/test/check_geometry.h
similarity index 100%
rename from test/check_geometry.h
rename to src/lib/test/check_geometry.h
diff --git a/test/check_nan.c b/src/lib/test/check_nan.c
similarity index 100%
rename from test/check_nan.c
rename to src/lib/test/check_nan.c
diff --git a/test/check_nan.h b/src/lib/test/check_nan.h
similarity index 100%
rename from test/check_nan.h
rename to src/lib/test/check_nan.h
diff --git a/test/check_overlap.c b/src/lib/test/check_overlap.c
similarity index 97%
rename from test/check_overlap.c
rename to src/lib/test/check_overlap.c
index 43742a21b..d34e2ae5b 100644
--- a/test/check_overlap.c
+++ b/src/lib/test/check_overlap.c
@@ -105,12 +105,7 @@ int main(int argc, char *argv[]) {
   char *gaugecksum = NULL;
   double plaquette_energy;
 
-#ifdef _KOJAK_INST
-#pragma pomp inst init
-#pragma pomp inst begin(main)
-#endif
-
-#ifdef HAVE_LIBLEMON
+#ifdef TM_USE_LEMON
   MPI_File fh;
   LemonWriter *lemonWriter;
   paramsXlfInfo *xlfInfo;
@@ -188,7 +183,7 @@ int main(int argc, char *argv[]) {
   g_dbw2rand = 0;
 #endif
 
-#ifdef _GAUGE_COPY
+#ifdef TM_USE_GAUGE_COPY
   j = init_gauge_field(VOLUMEPLUSRAND, 1);
 #else
   j = init_gauge_field(VOLUMEPLUSRAND, 0);
@@ -273,7 +268,7 @@ int main(int argc, char *argv[]) {
 
   phmc_invmaxev = 1.;
 
-#ifdef _USE_HALFSPINOR
+#ifdef TM_USE_HALFSPINOR
   j = init_dirac_halfspinor();
   if (j != 0) {
     fprintf(stderr, "Not enough memory for halffield! Aborting...\n");
@@ -286,7 +281,7 @@ int main(int argc, char *argv[]) {
       exit(-1);
     }
   }
-#if (defined _PERSISTENT)
+#if (defined TM_PERSISTENT)
   if (even_odd_flag) {
     init_xchange_halffield();
   }
@@ -299,9 +294,9 @@ int main(int argc, char *argv[]) {
       printf("Reading Gauge field from file %s\n", conf_filename);
       fflush(stdout);
     }
-#ifdef HAVE_LIBLEMON
+#ifdef TM_USE_LEMON
     read_lemon_gauge_field_parallel(conf_filename, &gaugecksum, &xlfmessage, &gaugelfn);
-#else  /* HAVE_LIBLEMON */
+#else  /* TM_USE_LEMON */
     if (xlfmessage != (char *)NULL) free(xlfmessage);
     if (gaugelfn != (char *)NULL) free(gaugelfn);
     if (gaugecksum != (char *)NULL) free(gaugecksum);
@@ -310,7 +305,7 @@ int main(int argc, char *argv[]) {
     gaugelfn = read_message(conf_filename, "ildg-data-lfn");
     gaugecksum = read_message(conf_filename, "scidac-checksum");
     printf("%s \n", gaugecksum);
-#endif /* HAVE_LIBLEMON */
+#endif /* TM_USE_LEMON */
     if (g_proc_id == 0) {
       printf("done!\n");
       fflush(stdout);
@@ -389,7 +384,4 @@ int main(int argc, char *argv[]) {
     free_chi_dn_spinor_field();
   }
   return (0);
-#ifdef _KOJAK_INST
-#pragma pomp inst end(main)
-#endif
 }
diff --git a/test/check_xchange.c b/src/lib/test/check_xchange.c
similarity index 98%
rename from test/check_xchange.c
rename to src/lib/test/check_xchange.c
index db5d97cb3..a20f86df4 100644
--- a/test/check_xchange.c
+++ b/src/lib/test/check_xchange.c
@@ -63,7 +63,7 @@ int check_xchange() {
       }
     }
 
-#if ((defined PARALLELXT) || (defined PARALLELXYT) || (defined PARALLELXYZT))
+#if ((defined TM_PARALLELXT) || (defined TM_PARALLELXYT) || (defined TM_PARALLELXYZT))
     for (x0 = 0; x0 < T; x0++) {
       for (x2 = 0; x2 < LY; x2++) {
         for (x3 = 0; x3 < LZ; x3++) {
@@ -74,7 +74,7 @@ int check_xchange() {
     }
 #endif
 
-#if (defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
     for (x0 = 0; x0 < T; x0++) {
       for (x1 = 0; x1 < LX; x1++) {
         for (x3 = 0; x3 < LZ; x3++) {
@@ -113,7 +113,7 @@ int check_xchange() {
       }
     }
 
-#if ((defined PARALLELXT) || (defined PARALLELXYT) || (defined PARALLELXYZT))
+#if ((defined TM_PARALLELXT) || (defined TM_PARALLELXYT) || (defined TM_PARALLELXYZT))
     x = (double*)&g_spinor_field[0][(VOLUME + 2 * LX * LY * LZ) / 2];
     for (i = 0; i < T * LY * LZ / 2 * 24; i++, x++) {
       if ((int)(*x) != g_nb_x_up) {
@@ -139,7 +139,7 @@ int check_xchange() {
     }
 #endif
 
-#if (defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
     x = (double*)&g_spinor_field[0][(VOLUME + 2 * LX * LY * LZ) / 2 + 2 * T * LY * LZ / 2];
     for (i = 0; i < T * LX * LZ / 2 * 24; i++, x++) {
       if ((int)(*x) != g_nb_y_up) {
@@ -166,7 +166,7 @@ int check_xchange() {
     }
 #endif
 
-#if (defined PARALLELXYZT)
+#if (defined TM_PARALLELXYZT)
     set_spinor_field(0, -1.);
 
     for (x0 = 0; x0 < T; x0++) {
@@ -270,7 +270,7 @@ int check_xchange() {
       }
     }
 
-#if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
     /* Set the x boundary */
     for (x0 = 0; x0 < T; x0++) {
       for (x2 = 0; x2 < LY; x2++) {
@@ -284,7 +284,7 @@ int check_xchange() {
     }
 #endif
 
-#if (defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
     /* Set the y boundary */
     for (x0 = 0; x0 < T; x0++) {
       for (x1 = 0; x1 < LX; x1++) {
@@ -298,7 +298,7 @@ int check_xchange() {
     }
 #endif
 
-#if (defined PARALLELXYZT)
+#if (defined TM_PARALLELXYZT)
     /* Set the z boundary */
     for (x0 = 0; x0 < T; x0++) {
       for (x1 = 0; x1 < LX; x1++) {
@@ -340,7 +340,7 @@ int check_xchange() {
       }
     }
 
-#if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
     x = (double*)&g_gauge_field[(T + 2) * LX * LY * LZ][0];
     for (i = 0; i < T * LY * LZ * 72; i++, x++) {
       if ((int)(*x) != g_nb_x_up) {
@@ -368,7 +368,7 @@ int check_xchange() {
     }
 #endif
 
-#if (defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
     x = (double*)&g_gauge_field[(T + 2) * LX * LY * LZ + 2 * T * LZ * LY][0];
     for (i = 0; i < T * LX * LZ * 72; i++, x++) {
       if ((int)(*x) != g_nb_y_up) {
@@ -396,7 +396,7 @@ int check_xchange() {
     }
 #endif
 
-#if (defined PARALLELXYZT)
+#if (defined TM_PARALLELXYZT)
     x = (double*)g_gauge_field[VOLUME + 2 * LX * LY * LZ + 2 * T * LZ * LY + 2 * T * LX * LZ];
     for (i = 0; i < T * LX * LY * 72; i++, x++) {
       if ((int)(*x) != g_nb_z_up) {
@@ -504,7 +504,7 @@ int check_xchange() {
     MPI_Barrier(MPI_COMM_WORLD);
 
     /* The edges */
-#if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
     fprintf(stdout, "# Rank: %d, (c0, c1, c2, c3) = (%d, %d, %d, %d)\n", g_proc_id,
             g_proc_coords[0], g_proc_coords[1], g_proc_coords[2], g_proc_coords[3]);
     fflush(stdout);
@@ -577,7 +577,7 @@ int check_xchange() {
     }
 #endif
 
-#if (defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
     di[1] = (g_proc_coords[1] - 1) % g_nproc_x;
     di[2] = (g_proc_coords[2] - 1) % g_nproc_y;
     di[0] = g_proc_coords[0];
@@ -712,7 +712,7 @@ int check_xchange() {
       }
     }
 #endif
-#ifdef PARALLELXYZT
+#ifdef TM_PARALLELXYZT
     di[1] = (g_proc_coords[1] - 1) % g_nproc_x;
     di[3] = (g_proc_coords[3] - 1) % g_nproc_z;
     di[0] = g_proc_coords[0];
@@ -1001,7 +1001,7 @@ int check_xchange() {
         }
       }
 
-#if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
       x = (double*)&g_gauge_field[VOLUMEPLUSRAND + 2 * LX * LY * LZ][0];
       for (i = 0; i < T * LY * LZ * 72; i++, x++) {
         if ((int)(*x) != g_nb_x_up) {
@@ -1029,7 +1029,7 @@ int check_xchange() {
       }
 #endif
 
-#if (defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
       x = (double*)&g_gauge_field[VOLUMEPLUSRAND + 2 * LX * LY * LZ + 2 * T * LZ * LY][0];
       for (i = 0; i < T * LX * LZ * 72; i++, x++) {
         if ((int)(*x) != g_nb_y_up) {
@@ -1058,7 +1058,7 @@ int check_xchange() {
       }
 #endif
 
-#if (defined PARALLELXYZT)
+#if (defined TM_PARALLELXYZT)
       x = (double*)&g_gauge_field[VOLUMEPLUSRAND + 2 * LX * LY * LZ + 2 * T * LZ * LY +
                                   2 * T * LX * LZ][0];
       for (i = 0; i < T * LX * LY * 72; i++, x++) {
@@ -1088,7 +1088,7 @@ int check_xchange() {
       }
 #endif
 
-#if defined PARALLELXYZT
+#if defined TM_PARALLELXYZT
 
       set_gauge_field(-1.);
 
@@ -1279,7 +1279,7 @@ int check_xchange() {
           }
         }
       }
-#if defined PARALLELXYZT
+#if defined TM_PARALLELXYZT
       /* Set the tz boundary */
       for (x1 = 0; x1 < LX; x1++) {
         for (x2 = 0; x2 < LY; x2++) {
@@ -1332,7 +1332,7 @@ int check_xchange() {
       xchange_gauge(g_gauge_field);
       MPI_Barrier(MPI_COMM_WORLD);
 
-#if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
       di[0] = (g_proc_coords[0] - 1) % g_nproc_t;
       di[1] = (g_proc_coords[1] - 1) % g_nproc_x;
       di[2] = g_proc_coords[2];
@@ -1453,7 +1453,7 @@ int check_xchange() {
       }
 #endif
 
-#if (defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
 
       di[1] = (g_proc_coords[1] - 1) % g_nproc_x;
       di[2] = (g_proc_coords[2] - 1) % g_nproc_y;
@@ -1693,7 +1693,7 @@ int check_xchange() {
         }
       }
 #endif
-#if defined PARALLELXYZT
+#if defined TM_PARALLELXYZT
 
       di[0] = (g_proc_coords[0] - 1) % g_nproc_t;
       di[3] = (g_proc_coords[3] - 1) % g_nproc_z;
@@ -2123,7 +2123,7 @@ int check_xchange() {
         }
       }
     }
-#if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
     for (x0 = 0; x0 < T; x0++) {
       for (x2 = 0; x2 < LY; x2++) {
         for (x3 = 0; x3 < LZ; x3++) {
@@ -2145,7 +2145,7 @@ int check_xchange() {
       }
     }
 #endif
-#if (defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
     for (x0 = 0; x0 < T; x0++) {
       for (x1 = 0; x1 < LX; x1++) {
         for (x3 = 0; x3 < LZ; x3++) {
@@ -2167,7 +2167,7 @@ int check_xchange() {
       }
     }
 #endif
-#if defined PARALLELXYZT
+#if defined TM_PARALLELXYZT
     for (x0 = 0; x0 < T; x0++) {
       for (x1 = 0; x1 < LX; x1++) {
         for (x2 = 0; x2 < LY; x2++) {
@@ -2194,7 +2194,7 @@ int check_xchange() {
     xchange_deri(df0);
     MPI_Barrier(MPI_COMM_WORLD);
 
-#if defined PARALLELT
+#if defined TM_PARALLELT
     for (x1 = 0; x1 < LX; x1++) {
       for (x2 = 0; x2 < LY; x2++) {
         for (x3 = 0; x3 < LZ; x3++) {
@@ -2228,7 +2228,7 @@ int check_xchange() {
       }
     }
 #endif
-#if defined PARALLELXT
+#if defined TM_PARALLELXT
     for (x1 = 1; x1 < LX - 1; x1++) {
       for (x2 = 0; x2 < LY; x2++) {
         for (x3 = 0; x3 < LZ; x3++) {
@@ -2351,7 +2351,7 @@ int check_xchange() {
       }
     }
 #endif
-#if defined PARALLELXYT
+#if defined TM_PARALLELXYT
     for (x1 = 1; x1 < LX - 1; x1++) {
       for (x2 = 1; x2 < LY - 1; x2++) {
         for (x3 = 0; x3 < LZ; x3++) {
@@ -2748,7 +2748,7 @@ int check_xchange() {
 
 #endif
 
-#if defined PARALLELXYZT
+#if defined TM_PARALLELXYZT
     for (x1 = 1; x1 < LX - 1; x1++) {
       for (x2 = 1; x2 < LY - 1; x2++) {
         for (x3 = 1; x3 < LZ - 1; x3++) {
@@ -3026,7 +3026,7 @@ int check_xchange() {
       }
     }
 
-#if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
 
     // xt edge
     for (x2 = 0; x2 < LY; x2++) {
@@ -3063,7 +3063,7 @@ int check_xchange() {
     }
 #endif
 
-#if (defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
 
     // ty edge
     for (x1 = 0; x1 < LX; x1++) {
@@ -3139,7 +3139,7 @@ int check_xchange() {
     xchange_deri(df0);
     MPI_Barrier(MPI_COMM_WORLD);
 
-#if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
 
     di[0] = (g_proc_coords[0] - 1) % g_nproc_t;
     di[1] = (g_proc_coords[1] - 1) % g_nproc_x;
@@ -3156,7 +3156,7 @@ int check_xchange() {
     di[1] = (g_proc_coords[1] + 1) % g_nproc_x;
     MPI_Cart_rank(g_cart_grid, di, &pp);
 
-#ifdef PARALLELXT
+#ifdef TM_PARALLELXT
     for (x2 = 0; x2 < LY; x2++) {
       for (x3 = 0; x3 < LZ; x3++) {
 #else
@@ -3224,7 +3224,7 @@ int check_xchange() {
 
 #endif
 
-#if (defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
 
     // xy-edge
     di[1] = (g_proc_coords[1] - 1) % g_nproc_x;
diff --git a/test/hopping_test.README b/src/lib/test/hopping_test.README
similarity index 100%
rename from test/hopping_test.README
rename to src/lib/test/hopping_test.README
diff --git a/test/hopping_test.input.compare b/src/lib/test/hopping_test.input.compare
similarity index 100%
rename from test/hopping_test.input.compare
rename to src/lib/test/hopping_test.input.compare
diff --git a/test/hopping_test.input.new b/src/lib/test/hopping_test.input.new
similarity index 100%
rename from test/hopping_test.input.new
rename to src/lib/test/hopping_test.input.new
diff --git a/test/hopping_test.input.start b/src/lib/test/hopping_test.input.start
similarity index 100%
rename from test/hopping_test.input.start
rename to src/lib/test/hopping_test.input.start
diff --git a/test/hopping_test_generate_script b/src/lib/test/hopping_test_generate_script
similarity index 100%
rename from test/hopping_test_generate_script
rename to src/lib/test/hopping_test_generate_script
diff --git a/test/hopping_test_qscript b/src/lib/test/hopping_test_qscript
similarity index 100%
rename from test/hopping_test_qscript
rename to src/lib/test/hopping_test_qscript
diff --git a/test/measure_rectangles.debug.c b/src/lib/test/measure_rectangles.debug.c
similarity index 98%
rename from test/measure_rectangles.debug.c
rename to src/lib/test/measure_rectangles.debug.c
index 75a71d2b2..422f681b2 100644
--- a/test/measure_rectangles.debug.c
+++ b/src/lib/test/measure_rectangles.debug.c
@@ -61,10 +61,10 @@ double measure_rectangles() {
   char filename[100];
 
   sprintf(filename, "debug_mr.s");
-#ifdef PARALLELT
+#ifdef TM_PARALLELT
   sprintf(filename, "debug_mr.pt.%d", g_proc_id);
 #endif
-#ifdef PARALLELXT
+#ifdef TM_PARALLELXT
   sprintf(filename, "debug_mr.pxt.%d", g_proc_id);
 #endif
   debugfile = fopen(filename, "w");
diff --git a/test/overlaptests.c b/src/lib/test/overlaptests.c
similarity index 100%
rename from test/overlaptests.c
rename to src/lib/test/overlaptests.c
diff --git a/test/overlaptests.h b/src/lib/test/overlaptests.h
similarity index 100%
rename from test/overlaptests.h
rename to src/lib/test/overlaptests.h
diff --git a/test/qdran64.h b/src/lib/test/qdran64.h
similarity index 100%
rename from test/qdran64.h
rename to src/lib/test/qdran64.h
diff --git a/tm_debug_printf.c b/src/lib/tm_debug_printf.c
similarity index 100%
rename from tm_debug_printf.c
rename to src/lib/tm_debug_printf.c
diff --git a/tm_debug_printf.h b/src/lib/tm_debug_printf.h
similarity index 100%
rename from tm_debug_printf.h
rename to src/lib/tm_debug_printf.h
diff --git a/update_backward_gauge.c b/src/lib/update_backward_gauge.c
similarity index 99%
rename from update_backward_gauge.c
rename to src/lib/update_backward_gauge.c
index a041e577c..b28ab6acf 100644
--- a/update_backward_gauge.c
+++ b/src/lib/update_backward_gauge.c
@@ -25,7 +25,7 @@
 #include "su3.h"
 #include "update_backward_gauge.h"
 
-#if defined _USE_HALFSPINOR
+#if defined TM_USE_HALFSPINOR
 void update_backward_gauge(su3** const gf) {
 #ifdef TM_USE_OMP
 #pragma omp parallel
diff --git a/update_backward_gauge.h b/src/lib/update_backward_gauge.h
similarity index 100%
rename from update_backward_gauge.h
rename to src/lib/update_backward_gauge.h
diff --git a/update_gauge.c b/src/lib/update_gauge.c
similarity index 73%
rename from update_gauge.c
rename to src/lib/update_gauge.c
index dde4cbf31..7a7dd34a1 100644
--- a/update_gauge.c
+++ b/src/lib/update_gauge.c
@@ -39,7 +39,7 @@
 #include "su3spinor.h"
 #include "update_gauge.h"
 #include "xchange/xchange.h"
-#ifdef DDalphaAMG
+#ifdef TM_USE_DDalphaAMG
 #include "DDalphaAMG_interface.h"
 #endif
 /*******************************************************
@@ -51,48 +51,28 @@
 void update_gauge(const double step, hamiltonian_field_t *const hf) {
   tm_stopwatch_push(&g_timers, __func__, "");
   update_tm_gauge_id(&g_gauge_state, step);
-#ifdef DDalphaAMG
+#ifdef TM_USE_DDalphaAMG
   MG_update_gauge(step);
 #endif
 
 #ifdef TM_USE_OMP
-#define static
-#pragma omp parallel
-  {
+#pragma omp parallel for
 #endif
-    int i, mu;
-    static su3 v, w;
-    su3 *z;
-    static su3adj deriv;
-    su3adj *xm;
-#ifdef _KOJAK_INST
-#pragma pomp inst begin(updategauge)
-#endif
-
-#ifdef TM_USE_OMP
-#undef static
-#endif
-
-#ifdef TM_USE_OMP
-#pragma omp for
-#endif
-    for (i = 0; i < VOLUME; i++) {
-      for (mu = 0; mu < 4; mu++) {
-        /* moment[i][mu] = h_{i,mu}^{alpha} */
-        xm = &hf->momenta[i][mu];
-        z = &hf->gaugefield[i][mu];
-        _su3adj_assign_const_times_su3adj(deriv, step, *xm);
-        exposu3(&w, &deriv);
-        restoresu3(&v, &w);
-        _su3_times_su3(w, v, *z);
-        restoresu3(&v, &w);
-        _su3_assign(*z, v);
-      }
+  for (int i = 0; i < VOLUME; i++) {
+    for (int mu = 0; mu < 4; mu++) {
+      /* moment[i][mu] = h_{i,mu}^{alpha} */
+      su3 v, w;
+      su3adj *xm = &hf->momenta[i][mu];
+      su3 *z = &hf->gaugefield[i][mu];
+      su3adj deriv;
+      _su3adj_assign_const_times_su3adj(deriv, step, *xm);
+      exposu3(&w, &deriv);
+      restoresu3(&v, &w);
+      _su3_times_su3(w, v, *z);
+      restoresu3(&v, &w);
+      _su3_assign(*z, v);
     }
-
-#ifdef TM_USE_OMP
-  } /* OpenMP parallel closing brace */
-#endif
+  }
 
 #ifdef TM_USE_MPI
   /* for parallelization */
@@ -115,7 +95,4 @@ void update_gauge(const double step, hamiltonian_field_t *const hf) {
 
   tm_stopwatch_pop(&g_timers, 0, 1, "");
   return;
-#ifdef _KOJAK_INST
-#pragma pomp inst end(updategauge)
-#endif
 }
diff --git a/update_gauge.h b/src/lib/update_gauge.h
similarity index 100%
rename from update_gauge.h
rename to src/lib/update_gauge.h
diff --git a/update_momenta.c b/src/lib/update_momenta.c
similarity index 100%
rename from update_momenta.c
rename to src/lib/update_momenta.c
diff --git a/update_momenta.h b/src/lib/update_momenta.h
similarity index 100%
rename from update_momenta.h
rename to src/lib/update_momenta.h
diff --git a/update_momenta_fg.c b/src/lib/update_momenta_fg.c
similarity index 98%
rename from update_momenta_fg.c
rename to src/lib/update_momenta_fg.c
index 0aab582cd..cf1e9e4fb 100644
--- a/update_momenta_fg.c
+++ b/src/lib/update_momenta_fg.c
@@ -44,7 +44,7 @@
 #include "su3adj.h"
 #include "su3spinor.h"
 #include "xchange/xchange.h"
-#ifdef DDalphaAMG
+#ifdef TM_USE_DDalphaAMG
 #include "DDalphaAMG_interface.h"
 #endif
 
@@ -123,7 +123,7 @@ void fg_update_momenta_reset_gaugefield(const double step, hamiltonian_field_t *
  *******************************************************/
 void update_momenta_fg(int *mnllist, double step, const int no, hamiltonian_field_t *const hf,
                        double step0) {
-#ifdef DDalphaAMG
+#ifdef TM_USE_DDalphaAMG
   MG_update_gauge(0.0);
 #endif
   if (g_exposu3_no_c == 0) init_exposu3();
@@ -156,7 +156,7 @@ void update_momenta_fg(int *mnllist, double step, const int no, hamiltonian_fiel
   /* for parallelization */
   xchange_gauge(hf->gaugefield);
 #endif
-#ifdef DDalphaAMG
+#ifdef TM_USE_DDalphaAMG
   MG_update_gauge(0.0);
 #endif
 
@@ -201,7 +201,7 @@ void update_momenta_fg(int *mnllist, double step, const int no, hamiltonian_fiel
   /* for parallelization */
   xchange_gauge(hf->gaugefield);
 #endif
-#ifdef DDalphaAMG
+#ifdef TM_USE_DDalphaAMG
   MG_update_gauge(0.0);
 #endif
 
diff --git a/update_momenta_fg.h b/src/lib/update_momenta_fg.h
similarity index 100%
rename from update_momenta_fg.h
rename to src/lib/update_momenta_fg.h
diff --git a/update_tm.c b/src/lib/update_tm.c
similarity index 99%
rename from update_tm.c
rename to src/lib/update_tm.c
index 72a6194e7..3f1cdc5d5 100644
--- a/update_tm.c
+++ b/src/lib/update_tm.c
@@ -64,7 +64,7 @@
 #include "su3.h"
 #include "update_tm.h"
 #include "xchange/xchange.h"
-#ifdef DDalphaAMG
+#ifdef TM_USE_DDalphaAMG
 #include "DDalphaAMG_interface.h"
 #endif
 
@@ -120,7 +120,7 @@ int update_tm(double *plaquette_energy, double *rectangle_energy, char *filename
     }
   }
 
-#ifdef DDalphaAMG
+#ifdef TM_USE_DDalphaAMG
   MG_reset();
 #endif
 
@@ -211,7 +211,7 @@ int update_tm(double *plaquette_energy, double *rectangle_energy, char *filename
       free(xlfInfo);
     }
 
-#ifdef DDalphaAMG
+#ifdef TM_USE_DDalphaAMG
     MG_reset();
 #endif
 
@@ -354,7 +354,7 @@ int update_tm(double *plaquette_energy, double *rectangle_energy, char *filename
     // will result in the updated gauge field to be propagated
     update_tm_gauge_id(&g_gauge_state, TM_GAUGE_PROPAGATE_THRESHOLD);
     update_tm_gauge_id(&g_gauge_state_32, TM_GAUGE_PROPAGATE_THRESHOLD);
-#ifdef DDalphaAMG
+#ifdef TM_USE_DDalphaAMG
     MG_reset();
 #endif
   }
diff --git a/update_tm.h b/src/lib/update_tm.h
similarity index 100%
rename from update_tm.h
rename to src/lib/update_tm.h
diff --git a/util/io.c b/src/lib/util/io.c
similarity index 99%
rename from util/io.c
rename to src/lib/util/io.c
index 6df42d288..4f6267c78 100644
--- a/util/io.c
+++ b/src/lib/util/io.c
@@ -36,7 +36,7 @@
  *
  */
 
-#define _FILE_OFFSET_BITS 64
+#define TM_FILE_OFFSET_BITS 64
 
 #include "io.h"
 #include <stdio.h>
diff --git a/util/io.h b/src/lib/util/io.h
similarity index 100%
rename from util/io.h
rename to src/lib/util/io.h
diff --git a/util/laguer/chebyRoot.C b/src/lib/util/laguer/chebyRoot.C
similarity index 100%
rename from util/laguer/chebyRoot.C
rename to src/lib/util/laguer/chebyRoot.C
diff --git a/util/laguer/chebyRoot.H b/src/lib/util/laguer/chebyRoot.H
similarity index 100%
rename from util/laguer/chebyRoot.H
rename to src/lib/util/laguer/chebyRoot.H
diff --git a/util/laguer/laguer.c b/src/lib/util/laguer/laguer.c
similarity index 100%
rename from util/laguer/laguer.c
rename to src/lib/util/laguer/laguer.c
diff --git a/util/laguer/quadroptRoot.C b/src/lib/util/laguer/quadroptRoot.C
similarity index 100%
rename from util/laguer/quadroptRoot.C
rename to src/lib/util/laguer/quadroptRoot.C
diff --git a/util/oox/oox.c b/src/lib/util/oox/oox.c
similarity index 100%
rename from util/oox/oox.c
rename to src/lib/util/oox/oox.c
diff --git a/util/oox/oox_gawrapper.cxx b/src/lib/util/oox/oox_gawrapper.cxx
similarity index 100%
rename from util/oox/oox_gawrapper.cxx
rename to src/lib/util/oox/oox_gawrapper.cxx
diff --git a/util/oox/oox_gawrapper.h b/src/lib/util/oox/oox_gawrapper.h
similarity index 100%
rename from util/oox/oox_gawrapper.h
rename to src/lib/util/oox/oox_gawrapper.h
diff --git a/util/swapendian.c b/src/lib/util/swapendian.c
similarity index 100%
rename from util/swapendian.c
rename to src/lib/util/swapendian.c
diff --git a/util/tmlqcd-indent b/src/lib/util/tmlqcd-indent
similarity index 100%
rename from util/tmlqcd-indent
rename to src/lib/util/tmlqcd-indent
diff --git a/wrapper/Makefile.in b/src/lib/wrapper/Makefile.in
similarity index 100%
rename from wrapper/Makefile.in
rename to src/lib/wrapper/Makefile.in
diff --git a/wrapper/lib_wrapper.c b/src/lib/wrapper/lib_wrapper.c
similarity index 99%
rename from wrapper/lib_wrapper.c
rename to src/lib/wrapper/lib_wrapper.c
index 6c95a27d5..19d36ddc6 100644
--- a/wrapper/lib_wrapper.c
+++ b/src/lib/wrapper/lib_wrapper.c
@@ -60,11 +60,11 @@
 #include "misc_types.h"
 #include "mpi_init.h"
 #include "operator.h"
+#include "operator/clover_leaf.h"
+#include "qphix_interface.h"
 #include "read_input.h"
 #include "sighandler.h"
 #include "start.h"
-#include "operator/clover_leaf.h"
-#include "qphix_interface.h"
 
 #define CONF_FILENAME_LENGTH 500
 
@@ -121,7 +121,7 @@ int tmLQCD_invert_init(int argc, char* argv[], const int _verbose, const int ext
   for (int j = 0; j < no_operators; j++)
     if (!operator_list[j].even_odd_flag) even_odd_flag = 0;
 
-#ifdef _GAUGE_COPY
+#ifdef TM_USE_GAUGE_COPY
   int j = init_gauge_field(VOLUMEPLUSRAND, 1);
   j += init_gauge_field_32(VOLUMEPLUSRAND, 1);
 #else
@@ -161,7 +161,7 @@ int tmLQCD_invert_init(int argc, char* argv[], const int _verbose, const int ext
   // initialise the operators
   init_operators();
 
-#ifdef _USE_HALFSPINOR
+#ifdef TM_USE_HALFSPINOR
   j = init_dirac_halfspinor();
   if (j != 0) {
     fprintf(stderr, "tmLQCD_init_invert: Not enough memory for halffield! Aborting...\n");
@@ -172,7 +172,7 @@ int tmLQCD_invert_init(int argc, char* argv[], const int _verbose, const int ext
     fprintf(stderr, "tmLQCD_init_invert: Not enough memory for 32-bit halffield! Aborting...\n");
     return (-1);
   }
-#if (defined _PERSISTENT)
+#if (defined TM_PERSISTENT)
   if (even_odd_flag) init_xchange_halffield();
 #endif
 #endif
diff --git a/xchange/Makefile.in b/src/lib/xchange/Makefile.in
similarity index 100%
rename from xchange/Makefile.in
rename to src/lib/xchange/Makefile.in
diff --git a/xchange/little_field_gather.c b/src/lib/xchange/little_field_gather.c
similarity index 95%
rename from xchange/little_field_gather.c
rename to src/lib/xchange/little_field_gather.c
index 2821ddcd0..ae1a53abc 100644
--- a/xchange/little_field_gather.c
+++ b/src/lib/xchange/little_field_gather.c
@@ -47,7 +47,7 @@ int waitcount = 0;
 #define _C_TYPE _Complex double
 #define _MPI_C_TYPE MPI_DOUBLE_COMPLEX
 
-#include "little_field_gather_body.c"
+#include "little_field_gather_body.inc"
 
 #undef _PSWITCH
 #undef _PTSWITCH
@@ -59,7 +59,7 @@ int waitcount = 0;
 #define _C_TYPE _Complex float
 #define _MPI_C_TYPE MPI_COMPLEX
 
-#include "little_field_gather_body.c"
+#include "little_field_gather_body.inc"
 
 #undef _PSWITCH
 #undef _PTSWITCH
diff --git a/xchange/little_field_gather.h b/src/lib/xchange/little_field_gather.h
similarity index 100%
rename from xchange/little_field_gather.h
rename to src/lib/xchange/little_field_gather.h
diff --git a/xchange/little_field_gather_body.c b/src/lib/xchange/little_field_gather_body.inc
similarity index 100%
rename from xchange/little_field_gather_body.c
rename to src/lib/xchange/little_field_gather_body.inc
diff --git a/xchange/xchange.h b/src/lib/xchange/xchange.h
similarity index 100%
rename from xchange/xchange.h
rename to src/lib/xchange/xchange.h
diff --git a/xchange/xchange_2fields.c b/src/lib/xchange/xchange_2fields.c
similarity index 96%
rename from xchange/xchange_2fields.c
rename to src/lib/xchange/xchange_2fields.c
index c5dfa86a8..46496a0ba 100644
--- a/xchange/xchange_2fields.c
+++ b/src/lib/xchange/xchange_2fields.c
@@ -41,21 +41,17 @@
 #include "su3.h"
 #include "xchange_2fields.h"
 
-#if (defined _NON_BLOCKING)
+#if (defined TM_NON_BLOCKING)
 
 /* this version uses non-blocking MPI calls */
 void xchange_2fields(spinor* const l, spinor* const k, const int ieo) {
   MPI_Request requests[32];
   MPI_Status status[32];
   int reqcount = 0;
-#if defined PARALLELXYZT
+#if defined TM_PARALLELXYZT
   int ix = 0;
 #endif
 
-#ifdef _KOJAK_INST
-#pragma pomp inst begin(xchange2fields)
-#endif
-
 #ifdef TM_USE_MPI
 
   /* send the data to the neighbour on the left */
@@ -88,7 +84,7 @@ void xchange_2fields(spinor* const l, spinor* const k, const int ieo) {
             g_cart_grid, &requests[reqcount + 1]);
   reqcount = reqcount + 2;
 
-#if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
   /* send the data to the neighbour on the left in x direction */
   /* recieve the data from the neighbour on the right in x direction */
   MPI_Isend((void*)l, 1, field_x_slice_gath, g_nb_x_dn, 91, g_cart_grid, &requests[reqcount]);
@@ -120,7 +116,7 @@ void xchange_2fields(spinor* const l, spinor* const k, const int ieo) {
   reqcount = reqcount + 2;
 #endif
 
-#if (defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
   /* send the data to the neighbour on the left in y direction */
   /* recieve the data from the neighbour on the right in y direction */
   MPI_Isend((void*)l, 1, field_y_slice_gath, g_nb_y_dn, 101, g_cart_grid, &requests[reqcount]);
@@ -153,7 +149,7 @@ void xchange_2fields(spinor* const l, spinor* const k, const int ieo) {
 
 #endif
 
-#if (defined PARALLELXYZT)
+#if (defined TM_PARALLELXYZT)
   /* fill buffer ! */
   /* This is now depending on whether the field is */
   /* even or odd */
@@ -237,8 +233,5 @@ void xchange_2fields(spinor* const l, spinor* const k, const int ieo) {
   MPI_Waitall(reqcount, requests, status);
 #endif
   return;
-#ifdef _KOJAK_INST
-#pragma pomp inst end(xchange2fields)
-#endif
 }
-#endif /*  _NON_BLOCKING */
+#endif /*  TM_NON_BLOCKING */
diff --git a/xchange/xchange_2fields.h b/src/lib/xchange/xchange_2fields.h
similarity index 98%
rename from xchange/xchange_2fields.h
rename to src/lib/xchange/xchange_2fields.h
index 35dc7f6c5..6a83085f0 100644
--- a/xchange/xchange_2fields.h
+++ b/src/lib/xchange/xchange_2fields.h
@@ -31,7 +31,7 @@
 #define EVEN 1
 #define ODD 0
 
-#ifdef _NON_BLOCKING
+#ifdef TM_NON_BLOCKING
 void xchange_2fields(spinor* const k, spinor* const l, const int ieo);
 #else
 #define xchange_2fields(k, l, ieo) \
diff --git a/xchange/xchange_deri.c b/src/lib/xchange/xchange_deri.c
similarity index 95%
rename from xchange/xchange_deri.c
rename to src/lib/xchange/xchange_deri.c
index a260ed8b6..7defa1e7c 100644
--- a/xchange/xchange_deri.c
+++ b/src/lib/xchange/xchange_deri.c
@@ -55,7 +55,7 @@ void xchange_deri(su3adj** const df) {
 #ifdef TM_USE_MPI
   int ix, iy, t, y, z, x;
   MPI_Status status;
-#if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
   /* The edges need to come first */
 
   /* send the data to the neighbour on the left in t direction */
@@ -96,9 +96,9 @@ void xchange_deri(su3adj** const df) {
     }
   }
 
-#endif /* (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT) */
+#endif /* (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT) */
 
-#if (defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
   /* edges */
 
   /* send the data to the neighbour on the left in x direction */
@@ -178,9 +178,9 @@ void xchange_deri(su3adj** const df) {
     }
   }
 
-#endif /* (defined PARALLELXYT || defined PARALLELXYZT) */
+#endif /* (defined TM_PARALLELXYT || defined TM_PARALLELXYZT) */
 
-#ifdef PARALLELXYZT
+#ifdef TM_PARALLELXYZT
 
   /* send the data to the neighbour on the left in x direction */
   /* recieve the data from the neighbour on the right in x direction */
@@ -305,7 +305,7 @@ void xchange_deri(su3adj** const df) {
     }
   }
 
-#endif /* PARALLELXYZT */
+#endif /* TM_PARALLELXYZT */
 
   // now the normal boundaries
 
@@ -341,7 +341,7 @@ void xchange_deri(su3adj** const df) {
     }
   }
 
-#if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
   /* send the data to the neighbour on the left in x direction */
   /* recieve the data from the neighbour on the right in x direction */
   MPI_Sendrecv((void*)df[(T + 2) * LX * LY * LZ + T * LY * LZ], 1, deri_x_slice_cont, g_nb_x_dn, 42,
@@ -372,9 +372,9 @@ void xchange_deri(su3adj** const df) {
     }
   }
 
-#endif /* (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT) */
+#endif /* (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT) */
 
-#if (defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
 
   /* send the data to the neighbour on the left in y direction */
   /* recieve the data from the neighbour on the right in y direction */
@@ -406,9 +406,9 @@ void xchange_deri(su3adj** const df) {
     }
   }
 
-#endif /* (defined PARALLELXYT || defined PARALLELXYZT) */
+#endif /* (defined TM_PARALLELXYT || defined TM_PARALLELXYZT) */
 
-#ifdef PARALLELXYZT
+#ifdef TM_PARALLELXYZT
   /* send the data to the neighbour on the left in y direction */
   /* recieve the data from the neighbour on the right in y direction */
   MPI_Sendrecv(
@@ -441,7 +441,7 @@ void xchange_deri(su3adj** const df) {
     }
   }
 
-#endif /* PARALLELXYZT */
+#endif /* TM_PARALLELXYZT */
 #endif /* MPI */
   return;
 }
diff --git a/xchange/xchange_deri.h b/src/lib/xchange/xchange_deri.h
similarity index 100%
rename from xchange/xchange_deri.h
rename to src/lib/xchange/xchange_deri.h
diff --git a/xchange/xchange_field.c b/src/lib/xchange/xchange_field.c
similarity index 87%
rename from xchange/xchange_field.c
rename to src/lib/xchange/xchange_field.c
index 576574789..217631f8e 100644
--- a/xchange/xchange_field.c
+++ b/src/lib/xchange/xchange_field.c
@@ -35,7 +35,7 @@
 #ifdef TM_USE_MPI
 #include <mpi.h>
 #endif
-#ifdef _USE_SHMEM
+#ifdef TM_USE_SHMEM
 #include <mpp/shmem.h>
 #endif
 
@@ -44,33 +44,25 @@
 #include "su3.h"
 #include "xchange_field.h"
 
-#if (defined PARALLELXYZT)
-#pragma disjoint(*field_buffer_z2, *field_buffer_z)
-#endif
-
 /* this version uses non-blocking MPI calls */
-#if (defined _NON_BLOCKING)
+#if (defined TM_NON_BLOCKING)
 
 void xchange_field(spinor* const l, const int ieo) {
 #ifdef TM_USE_MPI
   MPI_Request requests[16];
   MPI_Status status[16];
 #endif
-#ifdef PARALLELT
+#ifdef TM_PARALLELT
   int reqcount = 4;
-#elif defined PARALLELXT
+#elif defined TM_PARALLELXT
   int reqcount = 8;
-#elif defined PARALLELXYT
+#elif defined TM_PARALLELXYT
   int reqcount = 12;
-#elif defined PARALLELXYZT
+#elif defined TM_PARALLELXYZT
   int ix = 0;
   int reqcount = 16;
 #endif
 
-#ifdef _KOJAK_INST
-#pragma pomp inst begin(xchangefield)
-#endif
-
 #ifdef TM_USE_MPI
 
   /* In 4 dimensions there are two processors sharing the   */
@@ -84,7 +76,7 @@ void xchange_field(spinor* const l, const int ieo) {
     MPI_Isend((void*)l, 1, field_time_slice_cont, g_nb_t_dn, 81, g_cart_grid, &requests[0]);
     MPI_Irecv((void*)(l + T * LX * LY * LZ / 2), 1, field_time_slice_cont, g_nb_t_up, 81,
               g_cart_grid, &requests[1]);
-#if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
     /* send the data to the neighbour on the left in x direction */
     /* recieve the data from the neighbour on the right in x direction */
     MPI_Isend((void*)l, 1, field_x_slice_gath, g_nb_x_dn, 91, g_cart_grid, &requests[4]);
@@ -92,7 +84,7 @@ void xchange_field(spinor* const l, const int ieo) {
               g_cart_grid, &requests[5]);
 #endif
 
-#if (defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
     /* send the data to the neighbour on the left in y direction */
     /* recieve the data from the neighbour on the right in y direction */
     MPI_Isend((void*)l, 1, field_y_slice_gath, g_nb_y_dn, 101, g_cart_grid, &requests[8]);
@@ -100,16 +92,16 @@ void xchange_field(spinor* const l, const int ieo) {
               g_nb_y_up, 101, g_cart_grid, &requests[9]);
 #endif
 
-#if (defined PARALLELXYZT)
+#if (defined TM_PARALLELXYZT)
     /* fill buffer ! */
     /* This is now depending on whether the field is */
     /* even or odd */
     if (ieo == 1) {
-      for (ix = 0; ix < T * LX * LY / 2; ix++) {
+      for (int ix = 0; ix < T * LX * LY / 2; ix++) {
         field_buffer_z[ix] = l[g_field_z_ipt_even[ix]];
       }
     } else {
-      for (ix = 0; ix < T * LX * LY / 2; ix++) {
+      for (int ix = 0; ix < T * LX * LY / 2; ix++) {
         field_buffer_z[ix] = l[g_field_z_ipt_odd[ix]];
       }
     }
@@ -129,7 +121,7 @@ void xchange_field(spinor* const l, const int ieo) {
     MPI_Irecv((void*)(l + (T + 1) * LX * LY * LZ / 2), 1, field_time_slice_cont, g_nb_t_dn, 82,
               g_cart_grid, &requests[3]);
 
-#if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
     /* send the data to the neighbour on the right in x direction */
     /* recieve the data from the neighbour on the left in x direction */
     MPI_Isend((void*)(l + (LX - 1) * LY * LZ / 2), 1, field_x_slice_gath, g_nb_x_up, 92,
@@ -138,7 +130,7 @@ void xchange_field(spinor* const l, const int ieo) {
               g_nb_x_dn, 92, g_cart_grid, &requests[7]);
 #endif
 
-#if (defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
     /* send the data to the neighbour on the right in y direction */
     /* recieve the data from the neighbour on the left in y direction */
     MPI_Isend((void*)(l + (LY - 1) * LZ / 2), 1, field_y_slice_gath, g_nb_y_up, 102, g_cart_grid,
@@ -147,7 +139,7 @@ void xchange_field(spinor* const l, const int ieo) {
               field_y_slice_cont, g_nb_y_dn, 102, g_cart_grid, &requests[11]);
 #endif
 
-#if defined PARALLELXYZT
+#if defined TM_PARALLELXYZT
     if (ieo == 1) {
       for (ix = T * LX * LY / 2; ix < T * LX * LY; ix++) {
         field_buffer_z2[ix - T * LX * LY / 2] = l[g_field_z_ipt_even[ix]];
@@ -174,7 +166,7 @@ void xchange_field(spinor* const l, const int ieo) {
               g_cart_grid, &requests[0]);
     MPI_Irecv((void*)(l + (T + 1) * LX * LY * LZ / 2), 1, field_time_slice_cont, g_nb_t_dn, 82,
               g_cart_grid, &requests[1]);
-#if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
     /* send the data to the neighbour on the right in x direction */
     /* recieve the data from the neighbour on the left in x direction */
     MPI_Isend((void*)(l + (LX - 1) * LY * LZ / 2), 1, field_x_slice_gath, g_nb_x_up, 92,
@@ -183,7 +175,7 @@ void xchange_field(spinor* const l, const int ieo) {
               g_nb_x_dn, 92, g_cart_grid, &requests[5]);
 #endif
 
-#if (defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
     /* send the data to the neighbour on the right in y direction */
     /* recieve the data from the neighbour on the left in y direction */
     MPI_Isend((void*)(l + (LY - 1) * LZ / 2), 1, field_y_slice_gath, g_nb_y_up, 102, g_cart_grid,
@@ -192,7 +184,7 @@ void xchange_field(spinor* const l, const int ieo) {
               field_y_slice_cont, g_nb_y_dn, 102, g_cart_grid, &requests[9]);
 #endif
 
-#if (defined PARALLELXYZT)
+#if (defined TM_PARALLELXYZT)
     /* fill buffer ! */
     /* This is now depending on whether the field is */
     /* even or odd */
@@ -218,7 +210,7 @@ void xchange_field(spinor* const l, const int ieo) {
     MPI_Isend((void*)l, 1, field_time_slice_cont, g_nb_t_dn, 81, g_cart_grid, &requests[2]);
     MPI_Irecv((void*)(l + T * LX * LY * LZ / 2), 1, field_time_slice_cont, g_nb_t_up, 81,
               g_cart_grid, &requests[3]);
-#if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
     /* send the data to the neighbour on the left in x direction */
     /* recieve the data from the neighbour on the right in x direction */
     MPI_Isend((void*)l, 1, field_x_slice_gath, g_nb_x_dn, 91, g_cart_grid, &requests[6]);
@@ -226,7 +218,7 @@ void xchange_field(spinor* const l, const int ieo) {
               g_cart_grid, &requests[7]);
 #endif
 
-#if (defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
     /* send the data to the neighbour on the left in y direction */
     /* recieve the data from the neighbour on the right in y direction */
     MPI_Isend((void*)l, 1, field_y_slice_gath, g_nb_y_dn, 101, g_cart_grid, &requests[10]);
@@ -234,7 +226,7 @@ void xchange_field(spinor* const l, const int ieo) {
               g_nb_y_up, 101, g_cart_grid, &requests[11]);
 #endif
 
-#if defined PARALLELXYZT
+#if defined TM_PARALLELXYZT
     if (ieo == 1) {
       for (ix = T * LX * LY / 2; ix < T * LX * LY; ix++) {
         field_buffer_z2[ix - T * LX * LY / 2] = l[g_field_z_ipt_even[ix]];
@@ -259,23 +251,16 @@ void xchange_field(spinor* const l, const int ieo) {
 #endif
 
   return;
-#ifdef _KOJAK_INST
-#pragma pomp inst end(xchangefield)
-#endif
 }
 
-#elif (defined _USE_SHMEM) /* _NON_BLOCKING */
+#elif (defined TM_USE_SHMEM) /* TM_NON_BLOCKING */
 
 /* Here comes the version with shared memory */
 /* exchanges the field  l */
 void xchange_field(spinor* const l, const int ieo) {
 
 #ifdef TM_USE_MPI
-  int i, ix, mu, x0, x1, x2, x3, k;
-
-#ifdef _KOJAK_INST
-#pragma pomp inst begin(xchangefield)
-#endif
+  int k;
 
   shmem_barrier_all();
 
@@ -283,33 +268,33 @@ void xchange_field(spinor* const l, const int ieo) {
   shmem_double_put((double*)(l + (T + 1) * LX * LY * LZ / 2),
                    (double*)(l + (T - 1) * LX * LY * LZ / 2), (LX * LY * LZ * 12), g_nb_t_up);
 
-#if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
   k = (T + 2) * LX * LY * LZ / 2;
-  for (x0 = 0; x0 < T; x0++) {
+  for (int x0 = 0; x0 < T; x0++) {
     shmem_double_put((double*)(l + k), (double*)(l + g_lexic2eo[g_ipt[x0][0][0][0]]), 12 * LZ * LY,
                      g_nb_x_dn);
     k += LZ * LY;
   }
   k = ((T + 2) * LX * LY * LZ + T * LY * LZ) / 2;
-  for (x0 = 0; x0 < T; x0++) {
+  for (int x0 = 0; x0 < T; x0++) {
     shmem_double_put((double*)(l + k), (double*)(l + g_lexic2eo[g_ipt[x0][LX - 1][0][0]]),
                      12 * LZ * LY, g_nb_x_up);
     k += LZ * LY;
   }
 #endif
 
-#if (defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
   k = ((T + 2) * LX * LY * LZ + 2 * T * LY * LZ) / 2;
-  for (x0 = 0; x0 < T; x0++) {
-    for (x1 = 0; x1 < LX; x1++) {
+  for (int x0 = 0; x0 < T; x0++) {
+    for (int x1 = 0; x1 < LX; x1++) {
       shmem_double_put((double*)(l + k), (double*)(l + g_lexic2eo[g_ipt[x0][x1][0][0]]), 12 * LZ,
                        g_nb_y_dn);
       k += LZ;
     }
   }
   k = ((T + 2) * LX * LY * LZ + 2 * T * LY * LZ + T * LX * LZ) / 2;
-  for (x0 = 0; x0 < T; x0++) {
-    for (x1 = 0; x1 < LX; x1++) {
+  for (int x0 = 0; x0 < T; x0++) {
+    for (int x1 = 0; x1 < LX; x1++) {
       shmem_double_put((double*)(l + k), (double*)(l + g_lexic2eo[g_ipt[x0][x1][LY - 1][0]]),
                        12 * LZ, g_nb_y_up);
       k += LZ;
@@ -317,8 +302,8 @@ void xchange_field(spinor* const l, const int ieo) {
   }
 #endif
 
-#if (defined PARALLELXYZT)
-  x0 = (VOLUME / 2 + LX * LY * LZ + T * LY * LZ + T * LX * LZ);
+#if (defined TM_PARALLELXYZT)
+  int x0 = (VOLUME / 2 + LX * LY * LZ + T * LY * LZ + T * LX * LZ);
   if (ieo == 1) {
     for (k = 0; k < T * LX * LY / 2; k++) {
       shmem_double_put((double*)(l + x0), (double*)(l + g_field_z_ipt_even[k]), 24, g_nb_z_dn);
@@ -347,24 +332,14 @@ void xchange_field(spinor* const l, const int ieo) {
   shmem_barrier_all();
 #endif  // MPI
   return;
-#ifdef _KOJAK_INST
-#pragma pomp inst end(xchangefield)
-#endif
 }
 
 /* Here comes the naive version */
 /* Using MPI_Sendrecv */
-#else /* _NON_BLOCKING _USE_SHMEM */
+#else   /* TM_NON_BLOCKING TM_USE_SHMEM */
 /* exchanges the field  l */
 void xchange_field(spinor* const l, const int ieo) {
 
-#ifdef PARALLELXYZT
-  int x0 = 0, x1 = 0, x2 = 0, ix = 0;
-#endif
-#ifdef _KOJAK_INST
-#pragma pomp inst begin(xchangefield)
-#endif
-
 #ifdef TM_USE_MPI
 
   MPI_Status status;
@@ -379,7 +354,7 @@ void xchange_field(spinor* const l, const int ieo) {
                (void*)(l + (T + 1) * LX * LY * LZ / 2), 1, field_time_slice_cont, g_nb_t_dn, 82,
                g_cart_grid, &status);
 
-#if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
   /* send the data to the neighbour on the left in x direction */
   /* recieve the data from the neighbour on the right in x direction */
   MPI_Sendrecv((void*)l, 1, field_x_slice_gath, g_nb_x_dn, 91,
@@ -394,7 +369,7 @@ void xchange_field(spinor* const l, const int ieo) {
 
 #endif
 
-#if (defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
   /* send the data to the neighbour on the left in y direction */
   /* recieve the data from the neighbour on the right in y direction */
   MPI_Sendrecv((void*)l, 1, field_y_slice_gath, g_nb_y_dn, 101,
@@ -409,16 +384,16 @@ void xchange_field(spinor* const l, const int ieo) {
 
 #endif
 
-#if (defined PARALLELXYZT)
+#if (defined TM_PARALLELXYZT)
   /* fill buffer ! */
   /* This is now depending on whether the field is */
   /* even or odd */
   if (ieo == 1) {
-    for (ix = 0; ix < T * LX * LY / 2; ix++) {
+    for (int ix = 0; ix < T * LX * LY / 2; ix++) {
       field_buffer_z[ix] = l[g_field_z_ipt_even[ix]];
     }
   } else {
-    for (ix = 0; ix < T * LX * LY / 2; ix++) {
+    for (int ix = 0; ix < T * LX * LY / 2; ix++) {
       field_buffer_z[ix] = l[g_field_z_ipt_odd[ix]];
     }
   }
@@ -429,11 +404,11 @@ void xchange_field(spinor* const l, const int ieo) {
                12 * T * LX * LY, MPI_DOUBLE, g_nb_z_up, 503, g_cart_grid, &status);
 
   if (ieo == 1) {
-    for (ix = T * LX * LY / 2; ix < T * LX * LY; ix++) {
+    for (int ix = T * LX * LY / 2; ix < T * LX * LY; ix++) {
       field_buffer_z[ix - T * LX * LY / 2] = l[g_field_z_ipt_even[ix]];
     }
   } else {
-    for (ix = T * LX * LY / 2; ix < T * LX * LY; ix++) {
+    for (int ix = T * LX * LY / 2; ix < T * LX * LY; ix++) {
       field_buffer_z[ix - T * LX * LY / 2] = l[g_field_z_ipt_odd[ix]];
     }
   }
@@ -448,9 +423,6 @@ void xchange_field(spinor* const l, const int ieo) {
 #endif
 #endif  // MPI
   return;
-#ifdef _KOJAK_INST
-#pragma pomp inst end(xchangefield)
-#endif
 }
 
-#endif /* _NON_BLOCKING */
+#endif /* TM_NON_BLOCKING */
diff --git a/xchange/xchange_field.h b/src/lib/xchange/xchange_field.h
similarity index 100%
rename from xchange/xchange_field.h
rename to src/lib/xchange/xchange_field.h
diff --git a/xchange/xchange_gauge.c b/src/lib/xchange/xchange_gauge.c
similarity index 98%
rename from xchange/xchange_gauge.c
rename to src/lib/xchange/xchange_gauge.c
index 3465d970f..254702822 100644
--- a/xchange/xchange_gauge.c
+++ b/src/lib/xchange/xchange_gauge.c
@@ -38,7 +38,7 @@
 #include "su3adj.h"
 #include "xchange_gauge.h"
 
-#if defined _NON_BLOCKING
+#if defined TM_NON_BLOCKING
 void xchange_gauge(su3** const gf) {
   int cntr = 0;
 #ifdef TM_USE_MPI
@@ -80,7 +80,7 @@ void xchange_gauge(su3** const gf) {
     cntr = cntr + 2;
   }
 
-#if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
   /* send the data to the neighbour on the left in x direction */
   /* recieve the data from the neighbour on the right in x direction */
   MPI_Isend(gf[0], 1, gauge_x_slice_gath, g_nb_x_dn, 87, g_cart_grid, &request[cntr]);
@@ -117,7 +117,7 @@ void xchange_gauge(su3** const gf) {
 #endif
   MPI_Waitall(cntr, request, status);
   cntr = 0;
-#if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
   /* The edges */
 
   /* send the data to the neighbour on the left in t direction */
@@ -175,10 +175,10 @@ void xchange_gauge(su3** const gf) {
               g_cart_grid, &request[cntr + 1]);
     cntr = cntr + 2;
   }
-  /* end of if defined PARALLELXT || PARALLELXYT || PARALLELXYZT*/
+  /* end of if defined TM_PARALLELXT || TM_PARALLELXYT || TM_PARALLELXYZT*/
 #endif
 
-#if (defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
   /* send the data to the neighbour on the left in y direction */
   /* recieve the data from the neighbour on the right in y direction */
   MPI_Isend(gf[0], 1, gauge_y_slice_gath, g_nb_y_dn, 106, g_cart_grid, &request[cntr]);
@@ -212,7 +212,7 @@ void xchange_gauge(su3** const gf) {
 #endif
   MPI_Waitall(cntr, request, status);
   cntr = 0;
-#if (defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
 
   /* jetzt wirds richtig eklig ... */
 
@@ -326,9 +326,9 @@ void xchange_gauge(su3** const gf) {
     cntr = cntr + 2;
   }
 
-  /* end of if defined PARALLELXYT || PARALLELXYZT */
+  /* end of if defined TM_PARALLELXYT || TM_PARALLELXYZT */
 #endif
-#if defined PARALLELXYZT
+#if defined TM_PARALLELXYZT
   /* z-Rand */
   /* send the data to the neighbour on the left in z direction */
   /* recieve the data from the neighbour on the right in z direction */
@@ -361,7 +361,7 @@ void xchange_gauge(su3** const gf) {
   }
 #endif
   MPI_Waitall(cntr, request, status);
-#if defined PARALLELXYZT
+#if defined TM_PARALLELXYZT
   cntr = 0;
   /* edges */
 
@@ -538,13 +538,13 @@ void xchange_gauge(su3** const gf) {
   }
   MPI_Waitall(cntr, request, status);
 
-  /* end of if defined PARALLELXYZT */
+  /* end of if defined TM_PARALLELXYZT */
 #endif
 #endif
   return;
 }
 
-#else /* _NON_BLOCKING */
+#else /* TM_NON_BLOCKING */
 void xchange_gauge(su3** const gf) {
 
 #ifdef TM_USE_MPI
@@ -576,7 +576,7 @@ void xchange_gauge(su3** const gf) {
                  g_cart_grid, &status);
   }
 
-#if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
   /* send the data to the neighbour on the left in x direction */
   /* recieve the data from the neighbour on the right in x direction */
   MPI_Sendrecv(gf[0], 1, gauge_x_slice_gath, g_nb_x_dn, 93, gf[(T + 2) * LX * LY * LZ], 1,
@@ -648,10 +648,10 @@ void xchange_gauge(su3** const gf) {
                  g_nb_t_up, 98, gf[VOLUMEPLUSRAND + RAND + 6 * LY * LZ], 1, gauge_xt_edge_cont,
                  g_nb_t_dn, 98, g_cart_grid, &status);
   }
-  /* end of if defined PARALLELXT || PARALLELXYT || PARALLELXYZT*/
+  /* end of if defined TM_PARALLELXT || TM_PARALLELXYT || TM_PARALLELXYZT*/
 #endif
 
-#if (defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
   /* send the data to the neighbour on the left in y direction */
   /* recieve the data from the neighbour on the right in y direction */
   MPI_Sendrecv(gf[0], 1, gauge_y_slice_gath, g_nb_y_dn, 103,
@@ -770,9 +770,9 @@ void xchange_gauge(su3** const gf) {
                  gauge_ty_edge_cont, g_nb_y_dn, 298, g_cart_grid, &status);
   }
 
-  /* end of if defined PARALLELXYT || PARALLELXYZT */
+  /* end of if defined TM_PARALLELXYT || TM_PARALLELXYZT */
 #endif
-#if defined PARALLELXYZT
+#if defined TM_PARALLELXYZT
   /* z-Rand */
   /* send the data to the neighbour on the left in z direction */
   /* recieve the data from the neighbour on the right in z direction */
@@ -954,11 +954,10 @@ void xchange_gauge(su3** const gf) {
                  1, gauge_zy_edge_cont, g_nb_y_dn, 510, g_cart_grid, &status);
   }
 
-  /* end of if defined PARALLELXYZT */
+  /* end of if defined TM_PARALLELXYZT */
 #endif
 #endif
   return;
 }
 
-
-#endif /* _NON_BLOCKING */
+#endif /* TM_NON_BLOCKING */
diff --git a/xchange/xchange_gauge.h b/src/lib/xchange/xchange_gauge.h
similarity index 100%
rename from xchange/xchange_gauge.h
rename to src/lib/xchange/xchange_gauge.h
diff --git a/xchange/xchange_halffield.c b/src/lib/xchange/xchange_halffield.c
similarity index 90%
rename from xchange/xchange_halffield.c
rename to src/lib/xchange/xchange_halffield.c
index d1eae8a04..8b56d6593 100644
--- a/xchange/xchange_halffield.c
+++ b/src/lib/xchange/xchange_halffield.c
@@ -41,27 +41,15 @@
 #include "su3.h"
 #include "xchange_halffield.h"
 
-#if (defined _USE_HALFSPINOR)
+#if (defined TM_USE_HALFSPINOR)
 
-#if (defined _PERSISTENT)
+#if (defined TM_PERSISTENT)
 
 MPI_Request prequests[16];
 
 /* 2. */
 void init_xchange_halffield() {
 #ifdef TM_USE_MPI
-
-#ifdef PARALLELT
-  int reqcount = 4;
-#elif defined PARALLELXT
-  int reqcount = 8;
-#elif defined PARALLELXYT
-  int reqcount = 12;
-#elif defined PARALLELXYZT
-  int x0 = 0, x1 = 0, x2 = 0, ix = 0;
-  int reqcount = 16;
-#endif
-
   /* send the data to the neighbour on the right in t direction */
   /* recieve the data from the neighbour on the left in t direction */
   MPI_Send_init((void*)(sendBuffer), LX * LY * LZ * 12 / 2, MPI_DOUBLE, g_nb_t_up, 81, g_cart_grid,
@@ -78,7 +66,7 @@ void init_xchange_halffield() {
   MPI_Recv_init((void*)(recvBuffer), LX * LY * LZ * 12 / 2, MPI_DOUBLE, g_nb_t_up, 82, g_cart_grid,
                 &prequests[3]);
 
-#if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
 
   /* send the data to the neighbour on the right in x direction */
   /* recieve the data from the neighbour on the left in x direction */
@@ -97,7 +85,7 @@ void init_xchange_halffield() {
                 g_cart_grid, &prequests[7]);
 #endif
 
-#if (defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
   /* send the data to the neighbour on the right in y direction */
   /* recieve the data from the neighbour on the left in y direction */
   MPI_Send_init((void*)(sendBuffer + LX * LY * LZ + T * LY * LZ), T * LX * LZ * 12 / 2, MPI_DOUBLE,
@@ -115,7 +103,7 @@ void init_xchange_halffield() {
                 g_nb_y_up, 102, g_cart_grid, &prequests[11]);
 #endif
 
-#if (defined PARALLELXYZT)
+#if (defined TM_PARALLELXYZT)
   /* send the data to the neighbour on the right in z direction */
   /* recieve the data from the neighbour on the left in z direction */
   MPI_Send_init((void*)(sendBuffer + LX * LY * LZ + T * LY * LZ + T * LX * LZ),
@@ -141,14 +129,13 @@ void xchange_halffield() {
 #ifdef TM_USE_MPI
 
   MPI_Status status[16];
-#ifdef PARALLELT
+#ifdef TM_PARALLELT
   int reqcount = 4;
-#elif defined PARALLELXT
+#elif defined TM_PARALLELXT
   int reqcount = 8;
-#elif defined PARALLELXYT
+#elif defined TM_PARALLELXYT
   int reqcount = 12;
-#elif defined PARALLELXYZT
-  int x0 = 0, x1 = 0, x2 = 0, ix = 0;
+#elif defined TM_PARALLELXYZT
   int reqcount = 16;
 #endif
   MPI_Startall(reqcount, prequests);
@@ -158,7 +145,7 @@ void xchange_halffield() {
   return;
 }
 
-#else /* def (_USE_SHMEM || _PERSISTENT) */
+#else /* def (TM_USE_SHMEM || TM_PERSISTENT) */
 /* 4. */
 void xchange_halffield() {
 
@@ -166,19 +153,16 @@ void xchange_halffield() {
 
   MPI_Request requests[16];
   MPI_Status status[16];
-#ifdef PARALLELT
+#ifdef TM_PARALLELT
   int reqcount = 4;
-#elif defined PARALLELXT
+#elif defined TM_PARALLELXT
   int reqcount = 8;
-#elif defined PARALLELXYT
+#elif defined TM_PARALLELXYT
   int reqcount = 12;
-#elif defined PARALLELXYZT
+#elif defined TM_PARALLELXYZT
   int reqcount = 16;
 #endif
 
-#ifdef _KOJAK_INST
-#pragma pomp inst begin(xchangehalf)
-#endif
   /* send the data to the neighbour on the right in t direction */
   /* recieve the data from the neighbour on the left in t direction */
   MPI_Isend((void*)(sendBuffer), LX * LY * LZ * 12 / 2, MPI_DOUBLE, g_nb_t_up, 81, g_cart_grid,
@@ -193,7 +177,7 @@ void xchange_halffield() {
   MPI_Irecv((void*)(recvBuffer), LX * LY * LZ * 12 / 2, MPI_DOUBLE, g_nb_t_up, 82, g_cart_grid,
             &requests[3]);
 
-#if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
 
   /* send the data to the neighbour on the right in x direction */
   /* recieve the data from the neighbour on the left in x direction */
@@ -210,7 +194,7 @@ void xchange_halffield() {
             g_cart_grid, &requests[7]);
 #endif
 
-#if (defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
   /* send the data to the neighbour on the right in y direction */
   /* recieve the data from the neighbour on the left in y direction */
   MPI_Isend((void*)(sendBuffer + LX * LY * LZ + T * LY * LZ), T * LX * LZ * 12 / 2, MPI_DOUBLE,
@@ -226,7 +210,7 @@ void xchange_halffield() {
             g_nb_y_up, 102, g_cart_grid, &requests[11]);
 #endif
 
-#if (defined PARALLELXYZT)
+#if (defined TM_PARALLELXYZT)
   /* send the data to the neighbour on the right in z direction */
   /* recieve the data from the neighbour on the left in z direction */
   MPI_Isend((void*)(sendBuffer + LX * LY * LZ + T * LY * LZ + T * LX * LZ), T * LX * LY * 12 / 2,
@@ -245,29 +229,22 @@ void xchange_halffield() {
   MPI_Waitall(reqcount, requests, status);
 #endif /* MPI */
   return;
-
-#ifdef _KOJAK_INST
-#pragma pomp inst end(xchangehalf)
-#endif
 }
 
-#endif /* def (_USE_SHMEM || _PERSISTENT) */
+#endif /* def (TM_USE_SHMEM || TM_PERSISTENT) */
 void xchange_halffield32() {
 #ifdef TM_USE_MPI
 
   MPI_Request requests[16];
   MPI_Status status[16];
-#ifdef PARALLELT
+#ifdef TM_PARALLELT
   int reqcount = 4;
-#elif defined PARALLELXT
+#elif defined TM_PARALLELXT
   int reqcount = 8;
-#elif defined PARALLELXYT
+#elif defined TM_PARALLELXYT
   int reqcount = 12;
-#elif defined PARALLELXYZT
+#elif defined TM_PARALLELXYZT
   int reqcount = 16;
-#endif
-#ifdef _KOJAK_INST
-#pragma pomp inst begin(xchangehalf32)
 #endif
 
   /* send the data to the neighbour on the right in t direction */
@@ -284,7 +261,7 @@ void xchange_halffield32() {
   MPI_Irecv((void*)(recvBuffer32), LX * LY * LZ * 12 / 2, MPI_FLOAT, g_nb_t_up, 82, g_cart_grid,
             &requests[3]);
 
-#if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
 
   /* send the data to the neighbour on the right in x direction */
   /* recieve the data from the neighbour on the left in x direction */
@@ -301,7 +278,7 @@ void xchange_halffield32() {
             g_cart_grid, &requests[7]);
 #endif
 
-#if (defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
   /* send the data to the neighbour on the right in y direction */
   /* recieve the data from the neighbour on the left in y direction */
   MPI_Isend((void*)(sendBuffer32 + LX * LY * LZ + T * LY * LZ), T * LX * LZ * 12 / 2, MPI_FLOAT,
@@ -317,7 +294,7 @@ void xchange_halffield32() {
             g_nb_y_up, 102, g_cart_grid, &requests[11]);
 #endif
 
-#if (defined PARALLELXYZT)
+#if (defined TM_PARALLELXYZT)
   /* send the data to the neighbour on the right in z direction */
   /* recieve the data from the neighbour on the left in z direction */
   MPI_Isend((void*)(sendBuffer32 + LX * LY * LZ + T * LY * LZ + T * LX * LZ), T * LX * LY * 12 / 2,
@@ -336,8 +313,5 @@ void xchange_halffield32() {
   MPI_Waitall(reqcount, requests, status);
 #endif /* MPI */
   return;
-#ifdef _KOJAK_INST
-#pragma pomp inst end(xchangehalf32)
-#endif
 }
-#endif /* defined _USE_HALFSPINOR */
+#endif /* defined TM_USE_HALFSPINOR */
diff --git a/xchange/xchange_halffield.h b/src/lib/xchange/xchange_halffield.h
similarity index 100%
rename from xchange/xchange_halffield.h
rename to src/lib/xchange/xchange_halffield.h
diff --git a/xchange/xchange_lexicfield.c b/src/lib/xchange/xchange_lexicfield.c
similarity index 88%
rename from xchange/xchange_lexicfield.c
rename to src/lib/xchange/xchange_lexicfield.c
index 9def17fc6..857bb4b98 100644
--- a/xchange/xchange_lexicfield.c
+++ b/src/lib/xchange/xchange_lexicfield.c
@@ -43,7 +43,7 @@
 #include "xchange_lexicfield.h"
 
 /* this version uses non-blocking MPI calls */
-#if (defined _NON_BLOCKING)
+#if (defined TM_NON_BLOCKING)
 
 /* this is the version independent of the content of the function Index (only available with
  * non-blocking)) */
@@ -51,27 +51,23 @@
 void xchange_lexicfield(spinor* const l) {
   MPI_Request requests[16];
   MPI_Status status[16];
-#ifdef PARALLELT
+#ifdef TM_PARALLELT
   int reqcount = 4;
-#elif defined PARALLELXT
+#elif defined TM_PARALLELXT
   int reqcount = 8;
-#elif defined PARALLELXYT
+#elif defined TM_PARALLELXYT
   int reqcount = 12;
-#elif defined PARALLELXYZT
+#elif defined TM_PARALLELXYZT
   int reqcount = 16;
 #endif
-#ifdef _KOJAK_INST
-#pragma pomp inst begin(xchange_lexicfield)
-#endif
 
 #ifdef TM_USE_MPI
-
   /* send the data to the neighbour on the left */
   /* recieve the data from the neighbour on the right */
   MPI_Isend((void*)l, 1, lfield_time_slice_cont, g_nb_t_dn, 5081, g_cart_grid, &requests[0]);
   MPI_Irecv((void*)(l + VOLUME), 1, lfield_time_slice_cont, g_nb_t_up, 5081, g_cart_grid,
             &requests[1]);
-#if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
   /* send the data to the neighbour on the left in x direction */
   /* recieve the data from the neighbour on the right in x direction */
   MPI_Isend((void*)l, 1, lfield_x_slice_gath, g_nb_x_dn, 5091, g_cart_grid, &requests[4]);
@@ -80,7 +76,7 @@ void xchange_lexicfield(spinor* const l) {
 
 #endif
 
-#if (defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
   /* send the data to the neighbour on the left in y direction */
   /* recieve the data from the neighbour on the right in y direction */
   MPI_Isend((void*)l, 1, lfield_y_slice_gath, g_nb_y_dn, 5101, g_cart_grid, &requests[8]);
@@ -88,7 +84,7 @@ void xchange_lexicfield(spinor* const l) {
             5101, g_cart_grid, &requests[9]);
 #endif
 
-#if (defined PARALLELXYZT)
+#if (defined TM_PARALLELXYZT)
 
   /* send the data to the neighbour on the left in z direction */
   /* recieve the data from the neighbour on the right in z direction */
@@ -103,7 +99,7 @@ void xchange_lexicfield(spinor* const l) {
   MPI_Irecv((void*)(l + (T + 1) * LX * LY * LZ), 1, lfield_time_slice_cont, g_nb_t_dn, 5082,
             g_cart_grid, &requests[3]);
 
-#if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
   /* send the data to the neighbour on the right in x direction */
   /* recieve the data from the neighbour on the left in x direction */
   MPI_Isend((void*)(l + (LX - 1) * LY * LZ), 1, lfield_x_slice_gath, g_nb_x_up, 5092, g_cart_grid,
@@ -112,7 +108,7 @@ void xchange_lexicfield(spinor* const l) {
             5092, g_cart_grid, &requests[7]);
 #endif
 
-#if (defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
   /* send the data to the neighbour on the right in y direction */
   /* recieve the data from the neighbour on the left in y direction */
   MPI_Isend((void*)(l + (LY - 1) * LZ), 1, lfield_y_slice_gath, g_nb_y_up, 5102, g_cart_grid,
@@ -121,7 +117,7 @@ void xchange_lexicfield(spinor* const l) {
             g_nb_y_dn, 5102, g_cart_grid, &requests[11]);
 #endif
 
-#if defined PARALLELXYZT
+#if defined TM_PARALLELXYZT
 
   /* send the data to the neighbour on the right in y direction */
   /* recieve the data from the neighbour on the left in y direction */
@@ -135,26 +131,15 @@ void xchange_lexicfield(spinor* const l) {
 
 #endif
   return;
-#ifdef _KOJAK_INST
-#pragma pomp inst end(xchange_lexicfield)
-#endif
 }
 
 /* Here comes the naive version */
 /* Using MPI_Sendrecv */
-#else /* _NON_BLOCKING */
+#else /* TM_NON_BLOCKING */
 /* exchanges the field  l */
 void xchange_lexicfield(spinor* const l) {
 
-#ifdef PARALLELXYZT
-  int x0 = 0, x1 = 0, x2 = 0, ix = 0;
-#endif
-#ifdef _KOJAK_INST
-#pragma pomp inst begin(xchange_lexicfield)
-#endif
-
 #ifdef TM_USE_MPI
-
   MPI_Status status;
   /* send the data to the neighbour on the left */
   /* recieve the data from the neighbour on the right */
@@ -167,7 +152,7 @@ void xchange_lexicfield(spinor* const l) {
                (void*)(l + (T + 1) * LX * LY * LZ), 1, lfield_time_slice_cont, g_nb_t_dn, 5082,
                g_cart_grid, &status);
 
-#if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
   /* send the data to the neighbour on the left in x direction */
   /* recieve the data from the neighbour on the right in x direction */
   MPI_Sendrecv((void*)l, 1, lfield_x_slice_gath, g_nb_x_dn, 5091,
@@ -182,7 +167,7 @@ void xchange_lexicfield(spinor* const l) {
 
 #endif
 
-#if (defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
   /* send the data to the neighbour on the left in y direction */
   /* recieve the data from the neighbour on the right in y direction */
   MPI_Sendrecv((void*)l, 1, lfield_y_slice_gath, g_nb_y_dn, 5101,
@@ -197,7 +182,7 @@ void xchange_lexicfield(spinor* const l) {
 
 #endif
 
-#if (defined PARALLELXYZT)
+#if (defined TM_PARALLELXYZT)
   /* send the data to the neighbour on the left in z direction */
   /* recieve the data from the neighbour on the right in z direction */
   MPI_Sendrecv((void*)l, 1, lfield_z_slice_gath, g_nb_z_dn, 5503,
@@ -214,9 +199,6 @@ void xchange_lexicfield(spinor* const l) {
 #endif
 #endif
   return;
-#ifdef _KOJAK_INST
-#pragma pomp inst end(xchange_lexicfield)
-#endif
 }
 
 #endif
@@ -226,31 +208,27 @@ void xchange_lexicfield(spinor* const l) {
  ***********************************************************************/
 
 /* this version uses non-blocking MPI calls */
-#if (defined _NON_BLOCKING)
+#if (defined TM_NON_BLOCKING)
 void xchange_lexicfield32(spinor32* const l) {
   MPI_Request requests[16];
   MPI_Status status[16];
-#ifdef PARALLELT
+#ifdef TM_PARALLELT
   int reqcount = 4;
-#elif defined PARALLELXT
+#elif defined TM_PARALLELXT
   int reqcount = 8;
-#elif defined PARALLELXYT
+#elif defined TM_PARALLELXYT
   int reqcount = 12;
-#elif defined PARALLELXYZT
+#elif defined TM_PARALLELXYZT
   int reqcount = 16;
 #endif
-#ifdef _KOJAK_INST
-#pragma pomp inst begin(xchange_lexicfield32)
-#endif
 
 #ifdef TM_USE_MPI
-
   /* send the data to the neighbour on the left */
   /* recieve the data from the neighbour on the right */
   MPI_Isend((void*)l, 1, lfield_time_slice_cont32, g_nb_t_dn, 5081, g_cart_grid, &requests[0]);
   MPI_Irecv((void*)(l + VOLUME), 1, lfield_time_slice_cont32, g_nb_t_up, 5081, g_cart_grid,
             &requests[1]);
-#if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
   /* send the data to the neighbour on the left in x direction */
   /* recieve the data from the neighbour on the right in x direction */
   MPI_Isend((void*)l, 1, lfield_x_slice_gath32, g_nb_x_dn, 5091, g_cart_grid, &requests[4]);
@@ -259,7 +237,7 @@ void xchange_lexicfield32(spinor32* const l) {
 
 #endif
 
-#if (defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
   /* send the data to the neighbour on the left in y direction */
   /* recieve the data from the neighbour on the right in y direction */
   MPI_Isend((void*)l, 1, lfield_y_slice_gath32, g_nb_y_dn, 5101, g_cart_grid, &requests[8]);
@@ -267,7 +245,7 @@ void xchange_lexicfield32(spinor32* const l) {
             5101, g_cart_grid, &requests[9]);
 #endif
 
-#if (defined PARALLELXYZT)
+#if (defined TM_PARALLELXYZT)
 
   /* send the data to the neighbour on the left in z direction */
   /* recieve the data from the neighbour on the right in z direction */
@@ -282,7 +260,7 @@ void xchange_lexicfield32(spinor32* const l) {
   MPI_Irecv((void*)(l + (T + 1) * LX * LY * LZ), 1, lfield_time_slice_cont32, g_nb_t_dn, 5082,
             g_cart_grid, &requests[3]);
 
-#if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
   /* send the data to the neighbour on the right in x direction */
   /* recieve the data from the neighbour on the left in x direction */
   MPI_Isend((void*)(l + (LX - 1) * LY * LZ), 1, lfield_x_slice_gath32, g_nb_x_up, 5092, g_cart_grid,
@@ -291,7 +269,7 @@ void xchange_lexicfield32(spinor32* const l) {
             g_nb_x_dn, 5092, g_cart_grid, &requests[7]);
 #endif
 
-#if (defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
   /* send the data to the neighbour on the right in y direction */
   /* recieve the data from the neighbour on the left in y direction */
   MPI_Isend((void*)(l + (LY - 1) * LZ), 1, lfield_y_slice_gath32, g_nb_y_up, 5102, g_cart_grid,
@@ -300,7 +278,7 @@ void xchange_lexicfield32(spinor32* const l) {
             lfield_y_slice_cont32, g_nb_y_dn, 5102, g_cart_grid, &requests[11]);
 #endif
 
-#if defined PARALLELXYZT
+#if defined TM_PARALLELXYZT
 
   /* send the data to the neighbour on the right in y direction */
   /* recieve the data from the neighbour on the left in y direction */
@@ -314,26 +292,14 @@ void xchange_lexicfield32(spinor32* const l) {
 
 #endif
   return;
-#ifdef _KOJAK_INST
-#pragma pomp inst end(xchange_lexicfield32)
-#endif
 }
 
 /* Here comes the naive version */
 /* Using MPI_Sendrecv */
-#else /* _NON_BLOCKING */
+#else /* TM_NON_BLOCKING */
 /* exchanges the field  l */
 void xchange_lexicfield32(spinor32* const l) {
-
-#ifdef PARALLELXYZT
-  int x0 = 0, x1 = 0, x2 = 0, ix = 0;
-#endif
-#ifdef _KOJAK_INST
-#pragma pomp inst begin(xchange_lexicfield32)
-#endif
-
 #ifdef TM_USE_MPI
-
   MPI_Status status;
   /* send the data to the neighbour on the left */
   /* recieve the data from the neighbour on the right */
@@ -347,7 +313,7 @@ void xchange_lexicfield32(spinor32* const l) {
                (void*)(l + (T + 1) * LX * LY * LZ), 1, lfield_time_slice_cont32, g_nb_t_dn, 5082,
                g_cart_grid, &status);
 
-#if (defined PARALLELXT || defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELXT || defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
   /* send the data to the neighbour on the left in x direction */
   /* recieve the data from the neighbour on the right in x direction */
   MPI_Sendrecv((void*)l, 1, lfield_x_slice_gath32, g_nb_x_dn, 5091,
@@ -362,7 +328,7 @@ void xchange_lexicfield32(spinor32* const l) {
 
 #endif
 
-#if (defined PARALLELXYT || defined PARALLELXYZT)
+#if (defined TM_PARALLELXYT || defined TM_PARALLELXYZT)
   /* send the data to the neighbour on the left in y direction */
   /* recieve the data from the neighbour on the right in y direction */
   MPI_Sendrecv((void*)l, 1, lfield_y_slice_gath32, g_nb_y_dn, 5101,
@@ -377,7 +343,7 @@ void xchange_lexicfield32(spinor32* const l) {
 
 #endif
 
-#if (defined PARALLELXYZT)
+#if (defined TM_PARALLELXYZT)
   /* send the data to the neighbour on the left in z direction */
   /* recieve the data from the neighbour on the right in z direction */
   MPI_Sendrecv((void*)l, 1, lfield_z_slice_gath32, g_nb_z_dn, 5503,
@@ -394,9 +360,6 @@ void xchange_lexicfield32(spinor32* const l) {
 #endif
 #endif
   return;
-#ifdef _KOJAK_INST
-#pragma pomp inst end(xchange_lexicfield32)
-#endif
 }
 
 #endif
diff --git a/xchange/xchange_lexicfield.h b/src/lib/xchange/xchange_lexicfield.h
similarity index 100%
rename from xchange/xchange_lexicfield.h
rename to src/lib/xchange/xchange_lexicfield.h
diff --git a/test/Makefile b/test/Makefile
deleted file mode 100644
index 8efc8b569..000000000
--- a/test/Makefile
+++ /dev/null
@@ -1,88 +0,0 @@
-TARGETS = scalar_prod_r_test
-
-USESF = yes
-
-OS = -os3
-
-# gcc shouldn't see this options, that's why we don't use CGLAGS here
-NLCCFLAGS = -D_STD_C99_COMPLEX_CHECKED -D_STD_C99_COMPLEX -Dapenext
-INCLUDES = -I../
-# workaround to let nlcc not see the non-standard complex.h
-NLCCINCLUDES = -I${NROOT}/include/nlibc/ ${INCLUDES}
-
-NLCCOPTS = -gp ${NLCCFLAGS} ${NLCCINCLUDES}
-ifdef USESF
-  MPPOPTS = -sf -v
-  SHAKEROPTS = -n -z 
-else 
-  MPPOPTS = -v
-  SHAKEROPTS = +a -z 
-endif
-SOFANOPTS = --rr
-
-# needed due to a bug in nlcc
-NLCCOS = -OS3
-
-NLCC = nlcc-0.5.2
-MPP = mpp
-SOFAN = sofan
-SHAKER = shaker
-M4 = m4
-CCDEP = gcc
-DEPFLAGS = -MM -MQ $*.sasm ${CFLAGS} ${INCLUDES}
-
-DEPFILES = $(addsuffix .d, ${TARGETS})
-MEMFILES = $(addsuffix .mem, ${TARGETS}) $(addsuffix -sofan.mem, ${TARGETS}) \
-	   $(addsuffix .no, ${TARGETS}) $(addsuffix -sofan.no, ${TARGETS})
-ASMFILES = $(addsuffix .sasm, ${TARGETS}) $(addsuffix .masm, ${TARGETS}) $(addsuffix -sofan.masm, ${TARGETS})
-NCDFILES = $(addsuffix .ncd, ${TARGETS}) $(addsuffix -sofan.ncd, ${TARGETS})
-SFOUTFILES = $(addsuffix .svn-out, ${TARGETS}) $(addsuffix .svn-out%, ${TARGETS}) \
-             $(addsuffix .sf_log, ${TARGETS}) $(addsuffix .sf_log%, ${TARGETS}) \
-             $(addsuffix .sf_log0, ${TARGETS}) $(addsuffix .sf_log0%, ${TARGETS}) \
-             $(addsuffix .err-sf, ${TARGETS}) $(addsuffix .svn-out, ${TARGETS}) \
-             $(addsuffix .dmo, ${TARGETS}) \
-	     $(addsuffix -sofan.svn-out, ${TARGETS}) $(addsuffix -sofan.svn-out%, ${TARGETS}) \
-             $(addsuffix -sofan.sf_log, ${TARGETS}) $(addsuffix -sofan.sf_log%, ${TARGETS}) \
-             $(addsuffix -sofan.sf_log0, ${TARGETS}) $(addsuffix -sofan.sf_log0%, ${TARGETS}) \
-             $(addsuffix -sofan.err-sf, ${TARGETS}) $(addsuffix -sofan.svn-out, ${TARGETS}) \
-             $(addsuffix -sofan.dmo, ${TARGETS})
-GCCBINARIES = $(addsuffix .gccbin, ${TARGETS})
-
-all: $(addsuffix -sofan.mem, ${TARGETS})
-allgcc:  $(addsuffix .gccbin, ${TARGETS})
-
--include $(DEPFILES)
-
-%.mem: %.masm
-	${SHAKER} ${SHAKEROPTS} $<
-
-%.masm: %.sasm
-	${MPP} ${OS} ${MPPOPTS} $<
-
-%-sofan.masm: %.masm
-	${SOFAN} ${SOFANOPTS} $< $@
-
-%.sasm: %.c Makefile
-	${NLCC} ${NLCCOPTS} ${NLCCOS} -S $<
-
-%.ncd: %.mem
-	dispminit $< > $@
-
-%-sofan.perf: %-sofan.ncd
-	nperf -asm=$*.sasm -c -l -a $< > $@ || (rm -f $@; exit 1)
-
-# beware, this is not very general
-%.gccbin: %.c
-	gcc -I../ $< -o $@
-
-$(DEPFILES): %.d: %.c Makefile
-	$(CCDEP) ${DEPFLAGS} ${INCLUDES} $< > $@
-
-clean:
-	rm -f ${ASMFILES} ${MEMFILES} ${NCDFILES} ${GCCBINARIES}
-
-distclean: clean
-	rm -f ${DEPFILES} ${SFOUTFILES}
-
-.SECONDARY:
-.DELETE_ON_ERROR:
diff --git a/util/laguer/Makefile b/util/laguer/Makefile
deleted file mode 100644
index f9bce70e3..000000000
--- a/util/laguer/Makefile
+++ /dev/null
@@ -1,9 +0,0 @@
-CXX=g++
-CXXFLAGS=-g -O2
-CLNDIR=${HOME}/daten/workdir/cln/
-
-chebyRoot: chebyRoot.C Makefile chebyRoot.H
-	${CXX} $< -g -o $@ -I${CLNDIR}/include/ -L${CLNDIR}/lib -lcln -lm
-
-clean:
-	rm -f *.o chebyRoot *.dat *.log *~
diff --git a/util/oox/Makefile b/util/oox/Makefile
deleted file mode 100644
index 88de5bdd5..000000000
--- a/util/oox/Makefile
+++ /dev/null
@@ -1,46 +0,0 @@
-CC=gcc
-CXX=g++
-CFLAGS=-O2 -fexpensive-optimizations -fomit-frame-pointer # -mfpmath=sse -msse2 
-LIBS=-lm
-OBJECTS_OOX=oox.o
-INCLUDE=-I./
-
-
-# variables for oox_ga executable
-# if you want to compile with ga lib support
-# please adjust the GALIBPATH variable
-# to the toplevel dir of galib
-# it is assumed that you compiled the library
-# such that a libga.a file is present in the 
-# ./ga subdir of galib
-GALIBPATH=/usr1/scratch/annube/galib247
-LIBS_GA=${LIBS} -L${GALIBPATH}/ga -lga
-CFLAGS_GA=${CFLAGS} -DWITHGALIB
-INCLUDE_GA=${INCLUDE} -I${GALIBPATH}
-OBJECTS_OOX_GA=oox_ga.o oox_gawrapper.o
-
-
-all: oox oox_ga
-
-oox: ${OBJECTS_OOX} Makefile
-	${CXX} ${OBJECTS_OOX} -o $@ ${CFLAGS} ${LIBS}
-
-oox_ga: ${OBJECTS_OOX_GA} Makefile
-	${CXX} ${OBJECTS_OOX_GA} -o $@ ${CFLAGS_GA} ${LIBS_GA}
-
-oox_gawrapper.o: oox_gawrapper.cxx
-	${CXX} ${CFLAGS_GA} -o $@ -c $< ${INCLUDE_GA}
-
-oox_ga.o: oox.c
-	${CC} ${CFLAGS_GA} -o $@ -c $< ${INCLUDE_GA}
-
-clean:
-	rm oox oox_ga *.o
-
-.SUFFIXES:
-
-%.o: %.c
-	${CC} ${CFLAGS}	-o $@ -c $< ${INCLUDE}
-
-%.o: %.cxx
-	${CXX} ${CFLAGS} -o $@ -c $< ${INCLUDE}