Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 35 additions & 13 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,18 +1,40 @@
cmake_minimum_required(VERSION 2.8.12)
project(ProPCA)
cmake_minimum_required(VERSION 3.15)
project(ProPCA VERSION 1.0.0 LANGUAGES CXX)
include(GNUInstallDirs)
set(default_build_type "Release")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DDEBUG=1")

include_directories(include/)
cmake_policy(SET CMP0069 NEW)
include(CheckIPOSupported)
check_ipo_supported(RESULT iporesult)

set(CMAKE_CXX_FLAGS "-std=c++0x -O3 -lrt")
set(CMAKE_CXX_STANDARD 11)
find_package(Threads REQUIRED)
set(ARCH "haswell" CACHE STRING "Architecture to tell gcc to optimize for (-march)")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -march=${ARCH} -mtune=${ARCH}")
option(PROPCA_ASAN "Enable (debug) address sanitizer build" OFF)
option(PROPCA_TSAN "Enable (debug) Thread sanitizer build" OFF)
add_executable(propca src/propca.cpp src/genotype.cpp src/storage.cpp)
target_include_directories(propca PRIVATE "${CMAKE_CURRENT_SOURCE_DIR}/include")
target_link_libraries(propca Threads::Threads)
if(iporesult)
message (STATUS "IPO supported for project ${PROJECT_NAME}")
set_property(TARGET propca PROPERTY CMAKE_INTERPROCEDURAL_OPTIMIZATION TRUE)
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -flto")
endif()

IF (NOT DEFINED SSE_SUPPORT)
SET(SSE_SUPPORT 0)
ENDIF()
ADD_DEFINITIONS(-DSSE_SUPPORT=${SSE_SUPPORT})

IF (NOT DEFINED DEBUG)
SET(DEBUG 0)
ENDIF()
ADD_DEFINITIONS(-DDEBUG=${DEBUG})

add_executable(propca src/propca.cpp src/genotype.cpp src/storage.cpp)
if(PROPCA_ASAN)
set(CMAKE_BUILD_TYPE "Debug")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_LINKER_FLAGS_DEBUG} -fno-omit-frame-pointer -fsanitize=address")
endif()


if(PROPCA_TSAN)
set(CMAKE_BUILD_TYPE "RelWithDebInfo")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_LINKER_FLAGS_DEBUG} -fno-omit-frame-pointer -fsanitize=thread")
endif()


install(TARGETS propca)
25 changes: 9 additions & 16 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,32 +26,25 @@ make

### Installing

Installing ProPCA is fairly simple. Just issue the following commands on a linux machine
Installing ProPCA is fairly simple. Just issue the following commands on a Unix-alike machine

```
git clone https://github.com/sriramlab/ProPCA.git
cd ProPCA
mkdir build
cd build
cmake -S . -B build
cmake --build build
```
By default, the release version is built, if you wish to build the debug version, build with cmake flag `-D DEBUG=1` as shown below.

ProPCA supports, [SSE](https://en.wikipedia.org/wiki/Streaming_SIMD_Extensions) instructions.

If your architecure is Intel and supports SSE instructions, build with the cmake flag `-D SSE_SUPPORT=1` for an faster improved version as shown below.

By default, the release version is built, if you wish to build the debug version, build with cmake flag `-DCMAKE_BUILD_TYPE=Debug` as shown below.

```
cmake -D SSE_SUPPORT=1 -D DEBUG=1 ..
make
cmake -S . -B Debug -DCMAKE_BUILD_TYPE=Debug
cmake --build Debug
```

Else just issue these commands below:
ProPCA supports, [SSE](https://en.wikipedia.org/wiki/Streaming_SIMD_Extensions) instructions.

If your architecure is Intel and supports SSE instructions, the C++ compiler will automatically detect this for a faster version of the algorithm.

```
cmake ..
make
```



Expand Down
1 change: 1 addition & 0 deletions examples/par.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,5 @@ output_path = example_
accelerated_em = 1
var_normalize = true
memory_efficient = false
text_version = true
fast_mode = false
6 changes: 3 additions & 3 deletions include/arguments.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#include <sstream>
#include <map>
#include <fstream>
#include <bits/stdc++.h>


using namespace std;

Expand Down Expand Up @@ -47,7 +47,7 @@ struct is_same<T,T> {
extern options command_line_opts;


void exitWithError(const std::string &error) {
inline void exitWithError(const std::string &error) {
std::cout << error;
std::cin.ignore();
std::cin.get();
Expand Down Expand Up @@ -191,7 +191,7 @@ class ConfigFile{
}
};

void parse_args(int argc, char const *argv[]){
inline void parse_args(int argc, char const *argv[]){

// Setting Default Values
command_line_opts.num_of_evec=5;
Expand Down
83 changes: 42 additions & 41 deletions include/genotype.h
Original file line number Diff line number Diff line change
@@ -1,64 +1,65 @@
#ifndef GENOTYPE_H
#define GENOTYPE_H
#include <bits/stdc++.h>

#include "storage.h"
#include <Eigen/Dense>
#include <Eigen/Core>
#include <Eigen/LU>
#include <Eigen/SVD>
#include <iostream>
#include "Eigen/Dense"
#include "Eigen/Core"
#include "Eigen/LU"
#include "Eigen/SVD"


class genotype {
std::vector< std::vector <bool> > msb;
std::vector< std::vector <bool> > lsb;
std::vector<int> columnsum;
std::vector<double> columnmeans;
std::vector< std::vector <bool> > msb;
std::vector< std::vector <bool> > lsb;
std::vector<int> columnsum;
std::vector<double> columnmeans;

public:
public:

unsigned char mask;
int wordsize;
unsigned int unitsperword;
int unitsize;
int nrow, ncol;
unsigned char *gtype;
unsigned char mask;
int wordsize;
unsigned int unitsperword;
int unitsize;
int nrow, ncol;
unsigned char *gtype;

int Nsnp, Nindv, Nsegments_hori, segment_size_hori, segment_size_ver, Nsegments_ver;
int Nbits_hori, Nbits_ver;
int Nelements_hori, Nelements_ver;
std::vector< std::vector<int> > p;
int Nsnp, Nindv, Nsegments_hori, segment_size_hori, segment_size_ver, Nsegments_ver;
int Nbits_hori, Nbits_ver;
int Nelements_hori, Nelements_ver;
std::vector< std::vector<int> > p;

// std::vector< std::vector<unsigned> > p_eff;
// std::vector< std::vector<unsigned> > q_eff;
// std::vector< std::vector<unsigned> > p_eff;
// std::vector< std::vector<unsigned> > q_eff;

std::vector< std::vector<int> > not_O_j;
std::vector< std::vector<int> > not_O_i;
std::vector< std::vector<int> > not_O_j;
std::vector< std::vector<int> > not_O_i;

void init_means(bool is_missing);
void init_means(bool is_missing);

float get_observed_pj(const std::string &line);
float get_observed_pj(const unsigned char* line);
float get_observed_pj(const std::string &line);
float get_observed_pj(const unsigned char* line);

void read_txt_naive(std::string filename,bool allow_missing);
void read_txt_naive(std::string filename,bool allow_missing);

void read_txt_mailman (std::string filename,bool allow_missing);
void read_txt_mailman (std::string filename,bool allow_missing);

void read_plink (std::string filenameprefix, bool allow_missing,bool mailman_mode);
void read_bed (std::string filename, bool allow_missing, bool mailman_mode ) ;
void read_bed_mailman (std::string filename,bool allow_missing ) ;
void read_bed_naive (std::string filename ,bool allow_missing) ;
void read_plink (std::string filenameprefix, bool allow_missing,bool mailman_mode);
void read_bed (std::string filename, bool allow_missing, bool mailman_mode ) ;
void read_bed_mailman (std::string filename,bool allow_missing ) ;
void read_bed_naive (std::string filename ,bool allow_missing) ;

int countlines(std::string filename);
void set_metadata () ;
int countlines(std::string filename);
void set_metadata () ;


double get_geno(int snpindex,int indvindex,bool var_normalize);
double get_col_mean(int snpindex);
double get_col_sum(int snpindex);
double get_col_std(int snpindex);
void update_col_mean(int snpindex,double value);
double get_geno(int snpindex,int indvindex,bool var_normalize);
double get_col_mean(int snpindex);
double get_col_sum(int snpindex);
double get_col_std(int snpindex);
void update_col_mean(int snpindex,double value);

void generate_eigen_geno(Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor> &geno_matrix,bool var_normalize);
void generate_eigen_geno(Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor> &geno_matrix,bool var_normalize);


};
Expand Down
16 changes: 9 additions & 7 deletions include/helper.h
Original file line number Diff line number Diff line change
@@ -1,15 +1,17 @@
#ifndef HELPER_H
#define HELPER_H

#include <bits/stdc++.h>

#include "time.h"
#include <cstdio>
#include <iostream>


using namespace std;


extern struct timespec t0;
struct timespec elapsed (){
inline struct timespec elapsed (){
struct timespec ts;
clock_gettime (CLOCK_REALTIME, &ts);
if (ts.tv_nsec < t0.tv_nsec){
Expand All @@ -20,12 +22,12 @@ struct timespec elapsed (){
return (ts);
}

int timelog (const char* message){
inline int timelog (const char* message){
struct timespec ts = elapsed ();
return (printf ("[%06ld.%09ld] %s\n", ts.tv_sec, ts.tv_nsec, message));
}

void * malloc_double_align(size_t n, unsigned int a /*alignment*/, double *& output){
inline void * malloc_double_align(size_t n, unsigned int a /*alignment*/, double *& output){
void *adres=NULL;
void *adres2=NULL;
adres=malloc(n*sizeof(double)+a);
Expand All @@ -36,17 +38,17 @@ void * malloc_double_align(size_t n, unsigned int a /*alignment*/, double *& out
return adres; // pointer to be used in free()
}

void print_timenl () {
inline void print_timenl () {
clock_t c = clock();
double t = double(c) / CLOCKS_PER_SEC;
cout << "Time = " << t << endl ;
}

void print_time () {
inline void print_time () {
clock_t c = clock();
double t = double(c) / CLOCKS_PER_SEC;
cout << "Time = " << t << " : ";
}


#endif
#endif
17 changes: 8 additions & 9 deletions include/mailman.h
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
#ifndef MAILMAN_H
#define MAILMAN_H

#include <Eigen/Dense>
#include <Eigen/Core>
#include <Eigen/LU>
#include <Eigen/SVD>
#include <bits/stdc++.h>
#include "Eigen/Dense"
#include "Eigen/Core"
#include "Eigen/LU"
#include "Eigen/SVD"
#include "storage.h"
#include <assert.h>
#include <emmintrin.h>

#include <vector>

namespace mailman {

Expand All @@ -23,7 +22,7 @@ namespace mailman {
* c : intermediate computation
* y : result
*/
void fastmultiply_normal(int m, int n , int k, std::vector<int> &p, Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor> &x, double *yint, double *c, double **y){
inline void fastmultiply_normal(int m, int n , int k, std::vector<int> &p, Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor> &x, double *yint, double *c, double **y){
for (int i = 0 ; i < n; i++) {
int l = p[i] ;
for (int j = 0 ; j < k ; j ++)
Expand Down Expand Up @@ -68,7 +67,7 @@ namespace mailman {
* c : intermediate computation
* y : result. also contains Y_0 that is updated.
*/
void fastmultiply_pre_normal(int m, int n , int k, int start, std::vector<int> &p, Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor> &x, double *yint, double *c, double **y){
inline void fastmultiply_pre_normal(int m, int n , int k, int start, std::vector<int> &p, Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor> &x, double *yint, double *c, double **y){
int size1 = pow(3.,m);
memset (yint, 0, size1* sizeof(double));

Expand Down Expand Up @@ -106,7 +105,7 @@ namespace mailman {
* c : intermediate computation
* y : result
*/
#if SSE_SUPPORT==1
#ifdef __SSE__
// k must be a multiple of 10
void fastmultiply_sse (int m, int n , int k, std::vector<int> &p, Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor> &x, double *yint, double *c, double **y){
__m128d x0, x2, x4, x6, x8;
Expand Down
4 changes: 2 additions & 2 deletions include/storage.h
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
#ifndef STORAGE_H
#define STORAGE_H

#include <bits/stdc++.h>
#include <vector>

void add_to_arr(int x, int j, int beta,std::vector<unsigned> &arr);
int extract_from_arr(int j,int beta,std::vector<unsigned> &arr);
std::vector<int> get_orig_arr(int beta,std::vector<unsigned> &arr,int Nelements);

#endif
#endif
Loading