Face-alignment-Trees/feature2.cpp at master · goodluckcwl/Face-alignment-Trees · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
#include <opencv2/opencv.hpp>
#include "featpyramid.h"

using cv::Mat;
using cv::Vec;
using cv::Rect;
using cv::Scalar;

// takes a float color image and a bin size
// returns HOG features
//Mat		features( const Mat &image, const int sbin );

//--------------------------------------------------------
// small value, used to avoid division by zero
#ifndef eps
#define eps	0.0001f
#endif

#define	round(x)	((x-floor(x))>0.5 ? ceil(x) : floor(x))

// unit vectors used to compute gradient orientation
const float uu[9] = { 1.0000f, 0.9397f, 0.7660f, 0.500f, 0.1736f, -0.1736f, -0.5000f, -0.7660f, -0.9397f };
const float vv[9] = { 0.0000f, 0.3420f, 0.6428f, 0.8660f, 0.9848f, 0.9848f, 0.8660f, 0.6428f, 0.3420f };

#ifndef	MIN
#define MIN(x,y)	 (x<y?x:y)
#endif
#ifndef MAX
#define MAX(x,y)	 (x>y?x:y)
#endif


// 因为featpyramid()中对各个pyra.feat[i]进行0/1扩展耗费了大量时间，为了减小这个开支，这里直接对计算的feature扩展
Mat		features2(const Mat &image, const int sbin, const int padx,const int pady)
{
	const int	dims[3] = { image.rows, image.cols, image.channels() };
	const int	row_step = image.cols * image.channels();
	const float * const imgpt = image.ptr<float>(0, 0);
	if (image.type() != CV_32FC3)
		fprintf(stderr,"Invalid input!");

	// memory for caching orientation histograms & their norms
	const int	blocks[2] = { (int)round((float)dims[0] / (float)sbin), (int)round((float)dims[1] / (float)sbin) };
	const int	block_sz = blocks[0] * blocks[1];
	const int	visible[2] = { blocks[0] * sbin, blocks[1] * sbin };

	Mat		histmat[18];
	float	*hist[18];
	int			hist_step[18];
	Mat		hist_tmp(blocks[0], blocks[1] * 18, CV_32FC1, Scalar(0));
	for (int i = 0; i<18; i++){
		//histmat[i] = Mat::zeros( blocks[0], blocks[1], CV_32FC1 );
		histmat[i] = hist_tmp(Rect(i*blocks[1], 0, blocks[1], blocks[0]));
		hist[i] = histmat[i].ptr<float>(0, 0);
		hist_step[i] = histmat[i].step1();
	}

	// 1. Calculate the original HOG
	for (int yy = 1; yy < visible[0] - 1; yy++) { // each row
		int		y = MIN(yy, dims[0] - 2);
		const float	*imgpt1 = imgpt + y*row_step;
		for (int xx = 1; xx < visible[1] - 1; xx++) { // each col
			int		x = MIN(xx, dims[1] - 2);

			// b-g-r color at (y,x) is { *s, *(s+1), *(s+2) }
			const float	*s = imgpt1 + x*dims[2];
			float Right[3] = { *(s + 3), *(s + 4), *(s + 5) };
			float Left[3] = { *(s - 3), *(s - 2), *(s - 1) };
			float Up[3] = { *(s - row_step), *(s + 1 - row_step), *(s + 2 - row_step) };
			float Down[3] = { *(s + row_step), *(s + 1 + row_step), *(s + 2 + row_step) };

			// gradients and amplitude of orientations of each channel
			float	dxs, dys, vs;
			float	dx, dy, v = -1;
			for (int i = 0; i != 3; i++){
				dxs = Right[i] - Left[i];
				dys = Down[i] - Up[i];
				vs = dxs*dxs + dys*dys;
				if (vs >= v){
					v = vs;
					dx = dxs;
					dy = dys;
				}
			}

			// snap to one of 18 orientations
			float best_dot = 0;
			int best_o = 0;
			for (int o = 0; o < 9; o++) {
				float dot = uu[o] * dx + vv[o] * dy;
				if (dot > best_dot) {
					best_dot = dot;
					best_o = o;
				}
				else if (-dot > best_dot) {
					best_dot = -dot;
					best_o = o + 9;
				}
			}

			// add to 4 histograms around pixel using linear interpolation
			float xp = (xx + 0.5f) / (float)sbin - 0.5f;
			float yp = (yy + 0.5f) / (float)sbin - 0.5f;
			//int ixp = (int)floorf(xp),  iyp = (int)floorf(yp);
			int ixp = (int)xp, iyp = (int)yp;
			if (xp<0 && xp != ixp)
				ixp--;
			if (yp<0 && yp != iyp)
				iyp--;
			int ixp_p = ixp + 1, iyp_p = iyp + 1;
			float vx0 = xp - ixp, vy0 = yp - iyp;
			float vx1 = 1.f - vx0, vy1 = 1.f - vy0;
			v = sqrtf(v);

			if (ixp<0){
				ixp = 0;
				vx1 = 0;
			}
			if (ixp_p >= blocks[1]){
				ixp_p = blocks[1] - 1;
				vx0 = 0;
			}
			if (iyp<0){
				iyp = 0;
				vy1 = 0;
			}
			if (iyp_p >= blocks[0]){
				iyp_p = blocks[0] - 1;
				vy0 = 0;
			}

			float	*hist_pt = hist[best_o] + iyp*hist_step[best_o] + ixp;
			ixp_p = ixp_p - ixp;
			iyp_p = iyp_p - iyp;
			*(hist_pt) += vx1*vy1*v;
			*(hist_pt + ixp_p) += vx0*vy1*v;
			*(hist_pt + iyp_p*hist_step[best_o]) += vx1*vy0*v;
			*(hist_pt + iyp_p*hist_step[best_o] + ixp_p) += vx0*vy0*v;
		}
	}

	// 2. Compute energy in each block by summing over orientations
	Mat		normmat(blocks[0], blocks[1], CV_32FC1, Scalar(0));
	float * const norm = normmat.ptr<float>(0, 0);
	const int	norm_step = normmat.step1();

	Mat	histmat_insens[9]; // insensitive features
	float	*histinsens[9];
	int		histinsens_step[9];
	for (int i = 0; i<9; i++){
		histmat_insens[i] = histmat[i] + histmat[i + 9];
		accumulateSquare(histmat_insens[i], normmat);
		histinsens[i] = histmat_insens[i].ptr<float>(0, 0);
		histinsens_step[i] = histmat_insens[i].step1();
	}

	// normalization coefficients
	Mat	Normmss(blocks[0] - 1, blocks[1] - 1, CV_32FC1);
	float * const Norms = Normmss.ptr<float>(0, 0);
	const int	norms_step = Normmss.step1();
	for (int i = 0; i<Normmss.rows; i++){
		float	*src = norm + i*norm_step;
		float	*dst = Norms + i*norms_step;
		for (int j = 0; j<Normmss.cols; j++){
			float	tmp = *src + *(src + 1) + *(src + norm_step) + *(src + norm_step + 1) + eps;
			//*(dst++) = 1.f / sqrtf(tmp);
			*(dst++) = tmp;
			src++;
		}
	}
	cv::sqrt(Normmss, Normmss);
	Normmss = 1.f / Normmss;

	// 3. Reduced HOG features.
	int out[3];
	out[0] = MAX(blocks[0] - 2, 0);
	out[1] = MAX(blocks[1] - 2, 0);
	out[2] = 27 + 4 + 1; // 32 dimensional features
	Vec<float, 32>	init_val;
	assert(out[2] == 32);
	init_val = 0;
	init_val[out[2] - 1] = 1;
	Mat	feat_mat(out[0] + 2 * pady, out[1] + 2 * padx, CV_32FC(out[2]));
	feat_mat.setTo(init_val);
	float * const feat_data = feat_mat.ptr<float>(pady, padx);
	const int feat_step = feat_mat.step1();

	for (int y = 0; y<out[0]; y++){
		float	*dst = feat_data + y*feat_step;
		float	*Normpt = Norms + y*norms_step;
		float	*histinsens0[9];
		for (int i = 0; i<9; i++)
			histinsens0[i] = histinsens[i] + (y + 1)*histinsens_step[i];
		for (int x = 0; x<out[1]; x++){
			//
			float	n4 = *Normpt;
			float	n2 = *(Normpt + 1);
			float	n3 = *(Normpt + norms_step);
			float	n1 = *(Normpt + norms_step + 1);
			Normpt++;

			// 18 contrast-sensitive features
			float	t1 = 0, t2 = 0, t3 = 0, t4 = 0;
			for (int i = 0; i<18; i++){
				float	src = *(hist[i] + (y + 1)*hist_step[i] + x + 1);
				float h1 = MIN(src * n1, 0.2f);
				float h2 = MIN(src * n2, 0.2f);
				float h3 = MIN(src * n3, 0.2f);
				float h4 = MIN(src * n4, 0.2f);
				*(dst++) = 0.5f * (h1 + h2 + h3 + h4);
				t1 += h1;
				t2 += h2;
				t3 += h3;
				t4 += h4;
			}

			// 9 contrast-insensitive features
			for (int i = 0; i<9; i++){
				float	*src = ++histinsens0[i];
				//float	src = histmat_insens[i].at<float>(y+1,x+1);
				float h1 = MIN(*src * n1, 0.2f);
				float h2 = MIN(*src * n2, 0.2f);
				float h3 = MIN(*src * n3, 0.2f);
				float h4 = MIN(*src * n4, 0.2f);
				*(dst++) = 0.5f * (h1 + h2 + h3 + h4);
			}

			// 4 texture features
			*(dst++) = 0.2357f * t1;
			*(dst++) = 0.2357f * t2;
			*(dst++) = 0.2357f * t3;
			*(dst++) = 0.2357f * t4;

			// 1 truncation feature
			*(dst++) = 0;

		}
	}

	return	feat_mat;
}


/* 如何加速

1. 使用宏（MIN,MAX）比使用内联函数要快
2. 一次分配数据空间比多次分配数据空间要快：
a. 多个同样大小的矩阵--一次分配一个大的空间，每个矩阵分别索引该空间的一个子块
b. 函数内部使用的中间矩阵，如果大小不变，可以声明静态变量；如果大小会变，但是有上线，可以使用静态vector
3. 标准库模板stl中提供了一些常用函数，如 floorf(),sqrtf()，但是这些函数的效率不高，经常使用会导致函数性能下降，
如果可以，尽量使用整型截取(int())来代替floorf，使用openCV提供的cv::sqrt函数来一次处理大量数据的sqrtf()操作。
另外，std::sqrtf()函数要比std::sqrt()函数要快（在大量重复试验下，可以看出明显差异）。
4. 尽可能使用指针操作--指针操作是最快速内存访问方法，要避免使用openCV提供的矩阵元素获取的函数。

*/