FFHT/_ffht_2.c at master · FALCONN-LIB/FFHT · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#include <Python.h>
#include <numpy/arrayobject.h>
#include "fht.h"

#define UNUSED(x) (void)(x)

static char module_docstring[] =
    "A C extension that computes the Fast Hadamard Transform";
static char fht_docstring[] =
    "Compute the Fast Hadamard Transform (FHT) for a given "
    "one-dimensional NumPy array.\n\n"
    "The Hadamard Transform is a linear orthogonal map defined on real vectors "
    "whose length is a _power of two_. For the precise definition, see the "
    "[Wikipedia entry](https://en.wikipedia.org/wiki/Hadamard_transform). The "
    "Hadamard Transform has been recently used a lot in various machine "
    "learning "
    "and numerical algorithms.\n\n"
    "The implementation uses "
    "[AVX](https://en.wikipedia.org/wiki/Advanced_Vector_Extensions) "
    "to speed up the computation. If AVX is not supported on your machine, "
    "a simpler implementation without (explicit) vectorization is used.\n\n"
    "The function takes two parameters:\n\n"
    "* `buffer` is a NumPy array which is being transformed. It must be a "
    "one-dimensional array with `dtype` equal to `float32` or `float64` (the "
    "former is recommended unless you need high accuracy) and of size being a "
    "power "
    "of two. If your CPU supports AVX, then `buffer` must be aligned to 32 "
    "bytes. "
    "To allocate such an aligned buffer, use the function `created_aligned` "
    "from this "
    "module.\n"
    "* `chunk` is a positive integer that controls when the implementation "
    "switches "
    "from recursive to iterative algorithm. The overall algorithm is "
    "recursive, but as "
    "soon as the vector becomes no longer than `chunk`, the iterative "
    "algorithm is "
    "invoked. For technical reasons, `chunk` must be at least 8. A good choice "
    "is to "
    "set `chunk` to 1024. But to fine-tune the performance one should use a "
    "program "
    "`best_chunk` supplied with the library.\n";

static PyObject *ffht_fht(PyObject *self, PyObject *args);

static PyMethodDef module_methods[] = {
    {"fht", ffht_fht, METH_VARARGS, fht_docstring}, {NULL, NULL, 0, NULL}};

PyMODINIT_FUNC initffht(void);

PyMODINIT_FUNC initffht(void) {
  PyObject *m = Py_InitModule3("ffht", module_methods, module_docstring);
  if (!m) return;

  import_array();
}

static PyObject *ffht_fht(PyObject *self, PyObject *args) {
  UNUSED(self);

  PyObject *buffer_obj;

  if (!PyArg_ParseTuple(args, "O", &buffer_obj)) {
    return NULL;
  }

  PyArray_Descr *dtype;
  int ndim;
  npy_intp dims[NPY_MAXDIMS];
  PyArrayObject *arr = NULL;

  if (PyArray_GetArrayParamsFromObject(buffer_obj, NULL, 1, &dtype, &ndim, dims,
                                       &arr, NULL) < 0) {
    return NULL;
  }

  if (arr == NULL) {
    PyErr_SetString(PyExc_TypeError, "not a numpy array");
    return NULL;
  }

  dtype = PyArray_DESCR(arr);

  if (dtype->type_num != NPY_FLOAT && dtype->type_num != NPY_DOUBLE) {
    PyErr_SetString(PyExc_TypeError, "array must consist of floats or doubles");
    Py_DECREF(arr);
    return NULL;
  }

  if (PyArray_NDIM(arr) != 1) {
    PyErr_SetString(PyExc_TypeError, "array must be one-dimensional");
    Py_DECREF(arr);
    return NULL;
  }

  int n = PyArray_DIM(arr, 0);

  if (n == 0 || (n & (n - 1))) {
    PyErr_SetString(PyExc_ValueError, "array's length must be a power of two");
    Py_DECREF(arr);
    return NULL;
  }

  int log_n = 0;
  while ((1 << log_n) < n) {
    ++log_n;
  }

  void *raw_buffer = PyArray_DATA(arr);
  int res;
  if (dtype->type_num == NPY_FLOAT) {
    float *buffer = (float *)raw_buffer;
    res = fht_float(buffer, log_n);
  } else {
    double *buffer = (double *)raw_buffer;
    res = fht_double(buffer, log_n);
  }

  if (res) {
    PyErr_SetString(PyExc_RuntimeError, "FHT did not work properly");
    Py_DECREF(arr);
    return NULL;
  }

  Py_DECREF(arr);

  return Py_BuildValue("");
}