減色プログラム
Revisão | 4b1196c90fb583a3002d91b6997db06f780726fe (tree) |
---|---|
Hora | 2011-05-22 13:48:48 |
Autor | beru <berupon@gmai...> |
Commiter | beru |
used AVX intrinsics but it doesn't improve processing speed. actually, it's slow compared to SSE version. why is that?
@@ -12,7 +12,7 @@ struct Array2D | ||
12 | 12 | |
13 | 13 | void allocate() { |
14 | 14 | // pBuff_ = new T[width_ * height_]; |
15 | - pBuff_ = (T*) _aligned_malloc(sizeof(T) * width_ * height_, 16); | |
15 | + pBuff_ = (T*) _aligned_malloc(sizeof(T) * width_ * height_, 32); | |
16 | 16 | } |
17 | 17 | |
18 | 18 | Array2D(const Array2D& arr) |
@@ -52,102 +52,102 @@ struct Array2D | ||
52 | 52 | } |
53 | 53 | |
54 | 54 | __forceinline |
55 | - T* operator[] (int row) { | |
56 | - return &pBuff_[row * width_]; | |
57 | - } | |
58 | - | |
59 | - __forceinline | |
60 | - const T* operator[] (int row) const { | |
61 | - return &pBuff_[row * width_]; | |
62 | - } | |
55 | + T* operator[] (int row) { | |
56 | + return &pBuff_[row * width_]; | |
57 | + } | |
58 | + | |
59 | + __forceinline | |
60 | + const T* operator[] (int row) const { | |
61 | + return &pBuff_[row * width_]; | |
62 | + } | |
63 | 63 | |
64 | - Array2D<T>& operator *= (const T& scalar) { | |
65 | -#if 1 | |
66 | - for (size_t i=0; i<width_*height_; ++i) { | |
67 | - pBuff_[i] *= scalar; | |
68 | - } | |
69 | -#else | |
70 | - for (int i=0; i<width_; i++) { | |
71 | - for (int j=0; j<height_; j++) { | |
72 | - (*this)[j][i] *= scalar; | |
73 | - } | |
74 | - } | |
75 | -#endif | |
76 | - return *this; | |
77 | - } | |
78 | - | |
79 | - template <typename T2> | |
80 | - Array2D<T> operator * (const T2& scalar) { | |
81 | - Array2D<T> result(*this); | |
82 | - result *= scalar; | |
83 | - return result; | |
84 | - } | |
85 | - | |
86 | - std::vector<T> operator * (const std::vector<T>& vec) { | |
87 | - std::vector<T> result(height_); | |
88 | - T sum; | |
89 | - for (int row=0; row<height_; row++) { | |
90 | - sum = 0; | |
91 | - for (int col=0; col<width_; col++) { | |
92 | - sum += (*this)[row][col] * vec[col]; | |
93 | - } | |
94 | - result[row] = sum; | |
95 | - } | |
96 | - return result; | |
64 | + Array2D<T>& operator *= (const T& scalar) { | |
65 | +#if 1 | |
66 | + for (size_t i=0; i<width_*height_; ++i) { | |
67 | + pBuff_[i] *= scalar; | |
68 | + } | |
69 | +#else | |
70 | + for (int i=0; i<width_; i++) { | |
71 | + for (int j=0; j<height_; j++) { | |
72 | + (*this)[j][i] *= scalar; | |
73 | + } | |
74 | + } | |
75 | +#endif | |
76 | + return *this; | |
97 | 77 | } |
98 | 78 | |
99 | - Array2D<T>& multiply_row_scalar(int row, double mult) { | |
100 | - for (int i=0; i<width_; i++) { | |
101 | - (*this)[row][i] *= mult; | |
102 | - } | |
103 | - return *this; | |
104 | - } | |
105 | - | |
106 | - Array2D<T>& add_row_multiple(int from_row, int to_row, double mult) { | |
107 | - for (int i=0; i<width_; ++i) { | |
108 | - (*this)[to_row][i] += mult*(*this)[from_row][i]; | |
109 | - } | |
110 | - return *this; | |
111 | - } | |
112 | - | |
113 | - // We use simple Gaussian elimination - perf doesn't matter since | |
114 | - // the matrices will be K x K, where K = number of palette entries. | |
115 | - Array2D<T> matrix_inverse() { | |
116 | - Array2D<T> result(width_, height_); | |
117 | - Array2D<T>& a = *this; | |
118 | - | |
119 | - // Set result to identity matrix | |
120 | - result *= 0; | |
121 | - for (int i=0; i<width_; i++) { | |
122 | - result[i][i] = 1; | |
123 | - } | |
124 | - // Reduce to echelon form, mirroring in result | |
125 | - for (int i=0; i<width_; i++) { | |
126 | - result.multiply_row_scalar(i, 1/a[i][i]); | |
127 | - multiply_row_scalar(i, 1/a[i][i]); | |
128 | - for (int j=i+1; j<height_; j++) { | |
129 | - result.add_row_multiple(i, j, -a[j][i]); | |
130 | - add_row_multiple(i, j, -a[j][i]); | |
131 | - } | |
132 | - } | |
133 | - // Back substitute, mirroring in result | |
134 | - for (int i=width_-1; i>=0; i--) { | |
135 | - for (int j=i-1; j>=0; j--) { | |
136 | - result.add_row_multiple(i, j, -a[j][i]); | |
137 | - add_row_multiple(i, j, -a[j][i]); | |
138 | - } | |
139 | - } | |
140 | - // result is now the inverse | |
141 | - return result; | |
142 | - } | |
79 | + template <typename T2> | |
80 | + Array2D<T> operator * (const T2& scalar) { | |
81 | + Array2D<T> result(*this); | |
82 | + result *= scalar; | |
83 | + return result; | |
84 | + } | |
85 | + | |
86 | + std::vector<T> operator * (const std::vector<T>& vec) { | |
87 | + std::vector<T> result(height_); | |
88 | + T sum; | |
89 | + for (int row=0; row<height_; row++) { | |
90 | + sum = 0; | |
91 | + for (int col=0; col<width_; col++) { | |
92 | + sum += (*this)[row][col] * vec[col]; | |
93 | + } | |
94 | + result[row] = sum; | |
95 | + } | |
96 | + return result; | |
97 | + } | |
98 | + | |
99 | + Array2D<T>& multiply_row_scalar(int row, double mult) { | |
100 | + for (int i=0; i<width_; i++) { | |
101 | + (*this)[row][i] *= mult; | |
102 | + } | |
103 | + return *this; | |
104 | + } | |
105 | + | |
106 | + Array2D<T>& add_row_multiple(int from_row, int to_row, double mult) { | |
107 | + for (int i=0; i<width_; ++i) { | |
108 | + (*this)[to_row][i] += mult*(*this)[from_row][i]; | |
109 | + } | |
110 | + return *this; | |
111 | + } | |
112 | + | |
113 | + // We use simple Gaussian elimination - perf doesn't matter since | |
114 | + // the matrices will be K x K, where K = number of palette entries. | |
115 | + Array2D<T> matrix_inverse() { | |
116 | + Array2D<T> result(width_, height_); | |
117 | + Array2D<T>& a = *this; | |
118 | + | |
119 | + // Set result to identity matrix | |
120 | + result *= 0; | |
121 | + for (int i=0; i<width_; i++) { | |
122 | + result[i][i] = 1; | |
123 | + } | |
124 | + // Reduce to echelon form, mirroring in result | |
125 | + for (int i=0; i<width_; i++) { | |
126 | + result.multiply_row_scalar(i, 1/a[i][i]); | |
127 | + multiply_row_scalar(i, 1/a[i][i]); | |
128 | + for (int j=i+1; j<height_; j++) { | |
129 | + result.add_row_multiple(i, j, -a[j][i]); | |
130 | + add_row_multiple(i, j, -a[j][i]); | |
131 | + } | |
132 | + } | |
133 | + // Back substitute, mirroring in result | |
134 | + for (int i=width_-1; i>=0; i--) { | |
135 | + for (int j=i-1; j>=0; j--) { | |
136 | + result.add_row_multiple(i, j, -a[j][i]); | |
137 | + add_row_multiple(i, j, -a[j][i]); | |
138 | + } | |
139 | + } | |
140 | + // result is now the inverse | |
141 | + return result; | |
142 | + } | |
143 | 143 | |
144 | 144 | }; |
145 | 145 | |
146 | -template <typename T> | |
147 | -Array2D<T> operator * (T scalar, const Array2D<T>& a) { | |
148 | - Array2D<T> tmp = a; | |
149 | - return tmp * scalar; | |
150 | -} | |
146 | +template <typename T> | |
147 | +Array2D<T> operator * (T scalar, const Array2D<T>& a) { | |
148 | + Array2D<T> tmp = a; | |
149 | + return tmp * scalar; | |
150 | +} | |
151 | 151 | |
152 | 152 | template <typename T> |
153 | 153 | struct Array3D |
@@ -186,11 +186,11 @@ public: | ||
186 | 186 | } |
187 | 187 | |
188 | 188 | /* |
189 | - Array2D<T> operator[] (int depth) { | |
190 | - return Array2D<T>(width_, height_, &pBuff_[depth * width_ * height_]); | |
189 | + Array2D<T> operator[] (int depth) { | |
190 | + return Array2D<T>(width_, height_, &pBuff_[depth * width_ * height_]); | |
191 | 191 | } |
192 | - Array2D<T> operator[] (int depth) const { | |
193 | - return Array2D<T>(width_, height_, &pBuff_[depth * width_ * height_]); | |
192 | + Array2D<T> operator[] (int depth) const { | |
193 | + return Array2D<T>(width_, height_, &pBuff_[depth * width_ * height_]); | |
194 | 194 | } |
195 | 195 | */ |
196 | 196 | __forceinline |
@@ -0,0 +1,115 @@ | ||
1 | +#pragma once | |
2 | + | |
3 | +#include <immintrin.h> | |
4 | + | |
5 | +struct Color4d | |
6 | +{ | |
7 | + __m256d v; | |
8 | + | |
9 | + Color4d() { | |
10 | + ; | |
11 | + } | |
12 | + | |
13 | + Color4d(const Color4d& c) { | |
14 | + *this = c; | |
15 | + } | |
16 | + | |
17 | + Color4d(double r, double g, double b, double a) { | |
18 | + v = _mm256_setr_pd(r,g,b,a); | |
19 | + } | |
20 | + | |
21 | + Color4d& operator = (const Color4d& rhs) { | |
22 | + v = rhs.v; | |
23 | + return *this; | |
24 | + } | |
25 | + | |
26 | + Color4d direct_product(const Color4d& rhs) const { | |
27 | + Color4d result; | |
28 | + result.v = _mm256_mul_pd(v, rhs.v); | |
29 | + return result; | |
30 | + } | |
31 | + | |
32 | + double dot_product(const Color4d& rhs) { | |
33 | +// http://www.icnet.ne.jp/~nsystem/simd_tobira/dpps.html | |
34 | + __m256d s = _mm256_mul_pd(this->v, rhs.v); | |
35 | + __m128d s1 = _mm256_extractf128_pd(s, 0); | |
36 | + __m128d s2 = _mm256_extractf128_pd(s, 1); | |
37 | + __m128d as = _mm_add_pd(s1, s2); | |
38 | + as = _mm_hadd_pd(as, as); | |
39 | + return as.m128d_f64[0]; | |
40 | + } | |
41 | + | |
42 | + Color4d& operator += (const Color4d& rhs) { | |
43 | + v = _mm256_add_pd(v, rhs.v); | |
44 | + return *this; | |
45 | + } | |
46 | + | |
47 | + Color4d operator + (const Color4d& rhs) { | |
48 | + return Color4d(*this) += rhs; | |
49 | + } | |
50 | + | |
51 | + Color4d& operator -= (const Color4d& rhs) { | |
52 | + v = _mm256_sub_pd(v, rhs.v); | |
53 | + return *this; | |
54 | + } | |
55 | + | |
56 | + Color4d operator - (const Color4d& rhs) { | |
57 | + return Color4d(*this) -= rhs; | |
58 | + } | |
59 | + | |
60 | + Color4d& operator *= (const Color4d& rhs) { | |
61 | + v = _mm256_mul_pd(v, rhs.v); | |
62 | + return *this; | |
63 | + } | |
64 | + | |
65 | + Color4d operator * (const Color4d& rhs) { | |
66 | + return Color4d(*this) *= rhs; | |
67 | + } | |
68 | + | |
69 | + Color4d& operator *= (double scalar) { | |
70 | + __m256d s = _mm256_set1_pd(scalar); | |
71 | + v = _mm256_mul_pd(v, s); | |
72 | + return *this; | |
73 | + } | |
74 | + | |
75 | + Color4d operator * (double scalar) { | |
76 | + return Color4d(*this) *= scalar; | |
77 | + } | |
78 | + | |
79 | + double& operator[] (int idx) { | |
80 | + return ((double*)&v)[idx]; | |
81 | + } | |
82 | + const double& operator[] (int idx) const { | |
83 | + return ((double*)&v)[idx]; | |
84 | + } | |
85 | + | |
86 | + double norm_squared() { | |
87 | +#if 1 | |
88 | + __m256d s = _mm256_mul_pd(v, v); | |
89 | + __m128d s1 = _mm256_extractf128_pd(s, 0); | |
90 | + __m128d s2 = _mm256_extractf128_pd(s, 1); | |
91 | + __m128d as = _mm_add_pd(s1, s2); | |
92 | + as = _mm_hadd_pd(as, as); | |
93 | + return as.m128d_f64[0]; | |
94 | +#else | |
95 | + double result = 0; | |
96 | + for (int i=0; i<3; i++) { | |
97 | + result += (*this)[i] * (*this)[i]; | |
98 | + } | |
99 | + return result; | |
100 | +#endif | |
101 | + } | |
102 | + | |
103 | + void zero() { | |
104 | + v = _mm256_setzero_pd(); | |
105 | + } | |
106 | +}; | |
107 | + | |
108 | +inline Color4d operator * (double scalar, const Color4d& c) { | |
109 | + return Color4d(c) *= scalar; | |
110 | +} | |
111 | + | |
112 | +inline Color4d operator * (const Color4d& c, double scalar) { | |
113 | + return Color4d(c) *= scalar; | |
114 | +} | |
115 | + |
@@ -1,5 +1,7 @@ | ||
1 | 1 | #pragma once |
2 | 2 | |
3 | +#include <intrin.h> | |
4 | + | |
3 | 5 | struct Color4d |
4 | 6 | { |
5 | 7 | __m128d v[2]; |
@@ -24,16 +26,16 @@ struct Color4d | ||
24 | 26 | } |
25 | 27 | |
26 | 28 | Color4d direct_product(const Color4d& rhs) const { |
27 | - Color4d result; | |
28 | -#if 1 | |
29 | - result.v[0] = _mm_mul_pd(v[0], rhs.v[0]); | |
30 | - result.v[1] = _mm_mul_pd(v[1], rhs.v[1]); | |
31 | -#else | |
32 | - for (int i=0; i<3; i++) { | |
33 | - result[i] = (*this)[i] * rhs[i]; | |
34 | - } | |
35 | -#endif | |
36 | - return result; | |
29 | + Color4d result; | |
30 | +#if 1 | |
31 | + result.v[0] = _mm_mul_pd(v[0], rhs.v[0]); | |
32 | + result.v[1] = _mm_mul_pd(v[1], rhs.v[1]); | |
33 | +#else | |
34 | + for (int i=0; i<3; i++) { | |
35 | + result[i] = (*this)[i] * rhs[i]; | |
36 | + } | |
37 | +#endif | |
38 | + return result; | |
37 | 39 | } |
38 | 40 | |
39 | 41 | double dot_product(const Color4d& rhs) { |
@@ -44,10 +46,10 @@ struct Color4d | ||
44 | 46 | v = _mm_hadd_pd(v, v); |
45 | 47 | return v.m128d_f64[0]; |
46 | 48 | #else |
47 | - double result = 0; | |
48 | - for (int i=0; i<3; i++) { | |
49 | - result += (*this)[i] * rhs[i]; | |
50 | - } | |
49 | + double result = 0; | |
50 | + for (int i=0; i<3; i++) { | |
51 | + result += (*this)[i] * rhs[i]; | |
52 | + } | |
51 | 53 | return result; |
52 | 54 | #endif |
53 | 55 | } |
@@ -101,14 +103,14 @@ struct Color4d | ||
101 | 103 | return Color4d(*this) *= scalar; |
102 | 104 | } |
103 | 105 | |
104 | - double& operator[] (int idx) { | |
105 | - return ((double*)&v)[idx]; | |
106 | + double& operator[] (int idx) { | |
107 | + return ((double*)&v)[idx]; | |
106 | 108 | } |
107 | - const double& operator[] (int idx) const { | |
108 | - return ((double*)&v)[idx]; | |
109 | + const double& operator[] (int idx) const { | |
110 | + return ((double*)&v)[idx]; | |
109 | 111 | } |
110 | 112 | |
111 | - double norm_squared() { | |
113 | + double norm_squared() { | |
112 | 114 | #if 1 |
113 | 115 | __m128d t = _mm_add_pd( |
114 | 116 | _mm_mul_pd(v[0], v[0]), |
@@ -117,13 +119,13 @@ struct Color4d | ||
117 | 119 | t = _mm_hadd_pd(t, t); |
118 | 120 | return t.m128d_f64[0]; |
119 | 121 | #else |
120 | - double result = 0; | |
121 | - for (int i=0; i<3; i++) { | |
122 | - result += (*this)[i] * (*this)[i]; | |
123 | - } | |
124 | - return result; | |
125 | -#endif | |
126 | - } | |
122 | + double result = 0; | |
123 | + for (int i=0; i<3; i++) { | |
124 | + result += (*this)[i] * (*this)[i]; | |
125 | + } | |
126 | + return result; | |
127 | +#endif | |
128 | + } | |
127 | 129 | |
128 | 130 | void zero() { |
129 | 131 | v[0] = _mm_setzero_pd(); |
@@ -131,11 +133,11 @@ struct Color4d | ||
131 | 133 | } |
132 | 134 | }; |
133 | 135 | |
134 | -inline Color4d operator * (double scalar, const Color4d& c) { | |
135 | - return Color4d(c) *= scalar; | |
136 | -} | |
136 | +inline Color4d operator * (double scalar, const Color4d& c) { | |
137 | + return Color4d(c) *= scalar; | |
138 | +} | |
137 | 139 | |
138 | -inline Color4d operator * (const Color4d& c, double scalar) { | |
139 | - return Color4d(c) *= scalar; | |
140 | -} | |
140 | +inline Color4d operator * (const Color4d& c, double scalar) { | |
141 | + return Color4d(c) *= scalar; | |
142 | +} | |
141 | 143 |
@@ -1,7 +1,6 @@ | ||
1 | 1 | #include "stdafx.h" |
2 | -#include "Color4f.h" | |
3 | - | |
4 | -#include "Color4d.h" | |
2 | +#include "Color4f_sse.h" | |
3 | +#include "Color4d_sse.h" | |
5 | 4 | |
6 | 5 | Color4f& Color4f::operator = (const Color4d& rhs) |
7 | 6 | { |
@@ -30,23 +30,23 @@ struct Color4f | ||
30 | 30 | Color4f& operator = (const Color4d& rhs); |
31 | 31 | |
32 | 32 | Color4f direct_product(const Color4f& rhs) const { |
33 | - Color4f result; | |
34 | -#if 1 | |
35 | - result.v = _mm_mul_ps(v, rhs.v); | |
36 | -#else | |
37 | - for (int i=0; i<3; i++) { | |
38 | - result[i] = (*this)[i] * rhs[i]; | |
39 | - } | |
40 | -#endif | |
41 | - return result; | |
33 | + Color4f result; | |
34 | +#if 1 | |
35 | + result.v = _mm_mul_ps(v, rhs.v); | |
36 | +#else | |
37 | + for (int i=0; i<3; i++) { | |
38 | + result[i] = (*this)[i] * rhs[i]; | |
39 | + } | |
40 | +#endif | |
41 | + return result; | |
42 | 42 | } |
43 | 43 | |
44 | 44 | float dot_product(const Color4f& rhs) { |
45 | 45 | // http://www.icnet.ne.jp/~nsystem/simd_tobira/dpps.html |
46 | - float result = 0; | |
47 | - for (int i=0; i<3; i++) { | |
48 | - result += (*this)[i] * rhs[i]; | |
49 | - } | |
46 | + float result = 0; | |
47 | + for (int i=0; i<3; i++) { | |
48 | + result += (*this)[i] * rhs[i]; | |
49 | + } | |
50 | 50 | return result; |
51 | 51 | } |
52 | 52 |
@@ -95,28 +95,28 @@ struct Color4f | ||
95 | 95 | return result; |
96 | 96 | } |
97 | 97 | |
98 | - float& operator[] (int idx) { | |
99 | - return v.m128_f32[3-idx]; | |
98 | + float& operator[] (int idx) { | |
99 | + return v.m128_f32[3-idx]; | |
100 | 100 | } |
101 | - const float& operator[] (int idx) const { | |
102 | - return v.m128_f32[3-idx]; | |
101 | + const float& operator[] (int idx) const { | |
102 | + return v.m128_f32[3-idx]; | |
103 | 103 | } |
104 | 104 | |
105 | - float norm_squared() { | |
106 | - float result = 0; | |
107 | - for (int i=0; i<3; i++) { | |
108 | - result += (*this)[i] * (*this)[i]; | |
109 | - } | |
110 | - return result; | |
111 | - } | |
105 | + float norm_squared() { | |
106 | + float result = 0; | |
107 | + for (int i=0; i<3; i++) { | |
108 | + result += (*this)[i] * (*this)[i]; | |
109 | + } | |
110 | + return result; | |
111 | + } | |
112 | 112 | |
113 | 113 | void zero() { |
114 | 114 | v = _mm_setzero_ps(); |
115 | 115 | } |
116 | 116 | }; |
117 | 117 | |
118 | -inline Color4f operator * (float scalar, const Color4f& c) { | |
119 | - Color4f tmp = c; | |
120 | - return tmp * scalar; | |
121 | -} | |
118 | +inline Color4f operator * (float scalar, const Color4f& c) { | |
119 | + Color4f tmp = c; | |
120 | + return tmp * scalar; | |
121 | +} | |
122 | 122 |
@@ -1,8 +1,8 @@ | ||
1 | 1 | #pragma once |
2 | 2 | |
3 | 3 | #include "Array.h" |
4 | -#include "Color4f.h" | |
5 | -#include "Color4d.h" | |
4 | +#include "Color4d_sse.h" | |
5 | +//#include "Color4d_avx.h" | |
6 | 6 | |
7 | 7 | typedef Color4d Color; |
8 | 8 | typedef Array2D<Color> Image; |
@@ -339,7 +339,7 @@ | ||
339 | 339 | UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}" |
340 | 340 | > |
341 | 341 | <File |
342 | - RelativePath="..\Color4f.cpp" | |
342 | + RelativePath="..\Color4f_sse.cpp" | |
343 | 343 | > |
344 | 344 | </File> |
345 | 345 | <File |
@@ -385,11 +385,11 @@ | ||
385 | 385 | > |
386 | 386 | </File> |
387 | 387 | <File |
388 | - RelativePath="..\Color4d.h" | |
388 | + RelativePath="..\Color4d_sse.h" | |
389 | 389 | > |
390 | 390 | </File> |
391 | 391 | <File |
392 | - RelativePath="..\Color4f.h" | |
392 | + RelativePath="..\Color4f_sse.h" | |
393 | 393 | > |
394 | 394 | </File> |
395 | 395 | <File |
@@ -50,11 +50,12 @@ | ||
50 | 50 | </PrecompiledHeader> |
51 | 51 | <WarningLevel>Level3</WarningLevel> |
52 | 52 | <Optimization>Disabled</Optimization> |
53 | - <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions> | |
53 | + <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions)</PreprocessorDefinitions> | |
54 | 54 | <AdditionalIncludeDirectories>../</AdditionalIncludeDirectories> |
55 | 55 | <BrowseInformation>true</BrowseInformation> |
56 | 56 | <MultiProcessorCompilation>true</MultiProcessorCompilation> |
57 | 57 | <ForcedIncludeFiles>common.h</ForcedIncludeFiles> |
58 | + <AdditionalOptions>/arch:AVX %(AdditionalOptions)</AdditionalOptions> | |
58 | 59 | </ClCompile> |
59 | 60 | <Link> |
60 | 61 | <SubSystem>Console</SubSystem> |
@@ -75,6 +76,9 @@ | ||
75 | 76 | <FloatingPointModel>Fast</FloatingPointModel> |
76 | 77 | <MultiProcessorCompilation>true</MultiProcessorCompilation> |
77 | 78 | <ForcedIncludeFiles>common.h</ForcedIncludeFiles> |
79 | + <EnableEnhancedInstructionSet>StreamingSIMDExtensions2</EnableEnhancedInstructionSet> | |
80 | + <RuntimeLibrary>MultiThreaded</RuntimeLibrary> | |
81 | + <AdditionalOptions>/arch:__AVX %(AdditionalOptions)</AdditionalOptions> | |
78 | 82 | </ClCompile> |
79 | 83 | <Link> |
80 | 84 | <SubSystem>Console</SubSystem> |
@@ -93,7 +97,8 @@ | ||
93 | 97 | </ItemGroup> |
94 | 98 | <ItemGroup> |
95 | 99 | <ClInclude Include="..\Array.h" /> |
96 | - <ClInclude Include="..\Color4d.h" /> | |
100 | + <ClInclude Include="..\Color4d_avx.h" /> | |
101 | + <ClInclude Include="..\Color4d_sse.h" /> | |
97 | 102 | <ClInclude Include="..\common.h" /> |
98 | 103 | <ClInclude Include="..\dxor.h" /> |
99 | 104 | <ClInclude Include="..\quantize.h" /> |