• R/O
  • HTTP
  • SSH
  • HTTPS

コミット

よく使われているワード(クリックで追加)

javac++androidlinuxc#windowsobjective-ccocoa誰得qtpythonphprubygameguibathyscaphec計画中(planning stage)翻訳omegatframeworktwitterdomtestvb.netdirectxゲームエンジンbtronarduinopreviewer

減色プログラム


コミットメタ情報

リビジョンec393489c021af1d5498d68597e98300079ebdcb (tree)
日時2011-05-19 03:43:15
作者berupon <berupon@gmai...>
コミッターberupon

ログメッセージ

optimized

変更サマリ

差分

--- a/Array.h
+++ b/Array.h
@@ -51,19 +51,28 @@ struct Array2D
5151 }
5252 }
5353
54+ __forceinline
5455 T* operator[] (int row) {
5556 return &pBuff_[row * width_];
5657 }
58+
59+ __forceinline
5760 const T* operator[] (int row) const {
5861 return &pBuff_[row * width_];
5962 }
6063
6164 Array2D<T>& operator *= (const T& scalar) {
65+#if 1
66+ for (size_t i=0; i<width_*height_; ++i) {
67+ pBuff_[i] *= scalar;
68+ }
69+#else
6270 for (int i=0; i<width_; i++) {
6371 for (int j=0; j<height_; j++) {
6472 (*this)[j][i] *= scalar;
6573 }
6674 }
75+#endif
6776 return *this;
6877 }
6978
@@ -174,12 +183,30 @@ struct Array3D
174183 delete pBuff_;
175184 }
176185
186+/*
177187 Array2D<T> operator[] (int depth) {
178188 return Array2D<T>(width_, height_, &pBuff_[depth * width_ * height_]);
179189 }
180190 Array2D<T> operator[] (int depth) const {
181191 return Array2D<T>(width_, height_, &pBuff_[depth * width_ * height_]);
182192 }
193+*/
194+ __forceinline
195+ T& operator() (size_t z, size_t y, size_t x) {
196+ return pBuff_[
197+ z * width_ * height_
198+ + y * width_
199+ + x
200+ ];
201+ }
202+ __forceinline
203+ const T& operator() (size_t z, size_t y, size_t x) const {
204+ return pBuff_[
205+ z * width_ * height_
206+ + y * width_
207+ + x
208+ ];
209+ }
183210 };
184211
185212
--- a/color_quantizer.vcproj
+++ b/color_quantizer.vcproj
@@ -119,7 +119,10 @@
119119 AdditionalIncludeDirectories="./"
120120 PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE;_CRT_SECURE_NO_WARNINGS"
121121 RuntimeLibrary="2"
122+ BufferSecurityCheck="false"
122123 EnableFunctionLevelLinking="true"
124+ EnableEnhancedInstructionSet="2"
125+ FloatingPointModel="2"
123126 UsePrecompiledHeader="2"
124127 WarningLevel="3"
125128 DebugInformationFormat="3"
@@ -175,6 +178,10 @@
175178 UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
176179 >
177180 <File
181+ RelativePath=".\lib.cpp"
182+ >
183+ </File>
184+ <File
178185 RelativePath=".\main.cpp"
179186 >
180187 </File>
--- /dev/null
+++ b/lib.cpp
@@ -0,0 +1,3 @@
1+#include "stdafx.h"
2+
3+#pragma comment(lib, "winmm.lib")
--- a/main.cpp
+++ b/main.cpp
@@ -48,6 +48,8 @@ typedef Array2D<Color4f> Image4f;
4848
4949 // http://proglab.aki.gs/mediaproc/colors.html
5050
51+size_t call_count = 0;
52+
5153 int _tmain(int argc, _TCHAR* argv[])
5254 {
5355 if (argc < 1+3 || argc > 1+5) {
@@ -183,8 +185,14 @@ int _tmain(int argc, _TCHAR* argv[])
183185 NULL,
184186 &filter5_weights
185187 };
188+
189+ DWORD started = ::timeGetTime();
190+
186191 spatial_color_quant(image, *filters[filter_size], quantized_image, palette, num_colors, coarse_variables, 1.0, 0.001, 3, 1);
187192
193+ DWORD timeTaken = ::timeGetTime() - started;
194+ printf("time taken : %d ms, call count : %d\n", timeTaken, call_count);
195+
188196 {
189197 FILE* out = _tfopen(argv[2], _T("wb"));
190198 if (out == NULL) {
--- a/quantize.cpp
+++ b/quantize.cpp
@@ -34,7 +34,7 @@ void fill_random(Array3D<float>& a)
3434 for (int i=0; i<a.width_; i++) {
3535 for (int j=0; j<a.height_; j++) {
3636 for (int k=0; k<a.depth_; k++) {
37- a[k][j][i] = ((float)rand())/RAND_MAX;
37+ a(k,j,i) = ((float)rand())/RAND_MAX;
3838 }
3939 }
4040 }
@@ -118,6 +118,7 @@ void compute_b_array(
118118 }
119119 }
120120
121+__forceinline
121122 Color4f b_value(const Image4f& b, int i_x, int i_y, int j_x, int j_y)
122123 {
123124 int radius_width = (b.width_ - 1)/2,
@@ -210,11 +211,11 @@ int best_match_color(
210211 )
211212 {
212213 int max_v = 0;
213- float max_weight = vars[0][i_y][i_x];
214+ float max_weight = vars(0,i_y,i_x);
214215 for (size_t v=1; v<num_colors; ++v) {
215- if (vars[v][i_y][i_x] > max_weight) {
216+ if (vars(v,i_y,i_x) > max_weight) {
216217 max_v = v;
217- max_weight = vars[v][i_y][i_x];
218+ max_weight = vars(v,i_y,i_x);
218219 }
219220 }
220221 return max_v;
@@ -228,34 +229,43 @@ void zoom(const Array3D<float>& small, Array3D<float>& big)
228229 // is 1.2 fine pixels wide and high.
229230 for (int y=0; y<big.height_/2*2; y++) {
230231 for (int x=0; x<big.width_/2*2; x++) {
231- float left = max(0.0, (x-0.1)/2.0), right = min(small.width_-0.001, (x+1.1)/2.0);
232- float top = max(0.0, (y-0.1)/2.0), bottom = min(small.height_-0.001, (y+1.1)/2.0);
232+ float left = max(0.0, (x-0.1)/2.0);
233+ float right = min(small.width_-0.001, (x+1.1)/2.0);
234+ float top = max(0.0, (y-0.1)/2.0);
235+ float bottom = min(small.height_-0.001, (y+1.1)/2.0);
233236 int x_left = (int)floor(left), x_right = (int)floor(right);
234237 int y_top = (int)floor(top), y_bottom = (int)floor(bottom);
235238 float area = (right-left)*(bottom-top);
236- float top_left_weight = (ceil(left) - left)*(ceil(top) - top)/area;
237- float top_right_weight = (right - floor(right))*(ceil(top) - top)/area;
238- float bottom_left_weight = (ceil(left) - left)*(bottom - floor(bottom))/area;
239- float bottom_right_weight = (right - floor(right))*(bottom - floor(bottom))/area;
240- float top_weight = (right-left)*(ceil(top) - top)/area;
241- float bottom_weight = (right-left)*(bottom - floor(bottom))/area;
242- float left_weight = (bottom-top)*(ceil(left) - left)/area;
243- float right_weight = (bottom-top)*(right - floor(right))/area;
239+ float inv_area = 1.0 / area;
240+ float left2 = (ceil(left) - left);
241+ float top2 = (ceil(top) - top);
242+ float bottom2 = (bottom - floor(bottom));
243+ float right2 = (right - floor(right));
244+ float top_left_weight = left2 * top2 * inv_area;
245+ float top_right_weight = right2 * top2 * inv_area;
246+ float bottom_left_weight = left2 * bottom2 * inv_area;
247+ float bottom_right_weight = right2 * bottom2 * inv_area;
248+ float top_weight = (right-left) * top2 * inv_area;
249+ float bottom_weight = (right-left) * bottom2 * inv_area;
250+ float left_weight = (bottom-top) * left2 * inv_area;
251+ float right_weight = (bottom-top) * right2 * inv_area;
244252 for (int z=0; z<big.depth_; z++) {
253+ float val;
245254 if (x_left == x_right && y_top == y_bottom) {
246- big[z][y][x] = small[z][y_top][x_left];
255+ val = small(z,y_top,x_left);
247256 } else if (x_left == x_right) {
248- big[z][y][x] = top_weight*small[z][y_top][x_left] +
249- bottom_weight*small[z][y_bottom][x_left];
257+ val = top_weight * small(z,y_top,x_left) +
258+ bottom_weight * small(z,y_bottom,x_left);
250259 } else if (y_top == y_bottom) {
251- big[z][y][x] = left_weight*small[z][y_top][x_left] +
252- right_weight*small[z][y_top][x_right];
260+ val = left_weight * small(z,y_top,x_left) +
261+ right_weight * small(z,y_top,x_right);
253262 } else {
254- big[z][y][x] = top_left_weight*small[z][y_top][x_left] +
255- top_right_weight*small[z][y_top][x_right] +
256- bottom_left_weight*small[z][y_bottom][x_left] +
257- bottom_right_weight*small[z][y_bottom][x_right];
263+ val = top_left_weight * small(z,y_top,x_left) +
264+ top_right_weight * small(z,y_top,x_right) +
265+ bottom_left_weight * small(z,y_bottom,x_left) +
266+ bottom_right_weight * small(z,y_bottom,x_right);
258267 }
268+ big(z,y,x) = val;
259269 }
260270 }
261271 }
@@ -289,16 +299,16 @@ void compute_initial_s(
289299 if (i_x == j_x && i_y == j_y) continue;
290300 Color4f b_ij = b_value(b,i_x,i_y,j_x,j_y);
291301 for (int v=0; v<palette_size; v++) {
292- float vcv = coarse_variables[v][i_y][i_x];
302+ float vcv = coarse_variables(v,i_y,i_x);
293303 for (int alpha=v; alpha<palette_size; alpha++) {
294- float mult = vcv * coarse_variables[alpha][j_y][j_x];
304+ float mult = vcv * coarse_variables(alpha,j_y,j_x);
295305 s[alpha][v] += mult * b_ij;
296306 }
297307 }
298308 }
299309 }
300310 for (int v=0; v<palette_size; v++) {
301- s[v][v] += coarse_variables[v][i_y][i_x]*center_b;
311+ s[v][v] += coarse_variables(v,i_y,i_x)*center_b;
302312 }
303313 }
304314 }
@@ -326,11 +336,11 @@ void update_s(
326336 if (i_x == j_x && i_y == j_y) continue;
327337 Color4f* ps = s[alpha];
328338 for (int v=0; v <= alpha; v++) {
329- float mult = coarse_variables[v][i_y][i_x];
339+ float mult = coarse_variables(v,i_y,i_x);
330340 ps[v] += mult * delta_b_ij;
331341 }
332342 for (int v=alpha; v<palette_size; v++) {
333- float mult = coarse_variables[v][i_y][i_x];
343+ float mult = coarse_variables(v,i_y,i_x);
334344 s[v][alpha] += mult * delta_b_ij;
335345 }
336346 }
@@ -359,7 +369,7 @@ void refine_palette(
359369 sum.zero();
360370 for (int i_y=0; i_y<coarse_variables.height_; i_y++) {
361371 for (int i_x=0; i_x<coarse_variables.width_; i_x++) {
362- float cv = coarse_variables[v][i_y][i_x];
372+ float cv = coarse_variables(v,i_y,i_x);
363373 Color4f av = a[i_y][i_x];
364374 Color4f result = cv * av;
365375 sum += result;
@@ -371,7 +381,7 @@ void refine_palette(
371381 for (unsigned int k=0; k<3; k++) {
372382 Array2D<float> S_k = extract_vector_layer_2d(s, k);
373383 vector<float> R_k = extract_vector_layer_1d(&r[0], num_colors, k);
374- vector<float> palette_channel = -1.0f*((2.0f*S_k).matrix_inverse())*R_k;
384+ vector<float> palette_channel = -1.0f * ((2.0f*S_k).matrix_inverse()) * R_k;
375385 for (unsigned int v=0; v<num_colors; v++) {
376386 float val = palette_channel[v];
377387 if (val < 0) val = 0;
@@ -399,7 +409,7 @@ void compute_initial_j_palette_sum(
399409 Color4f palette_sum;
400410 palette_sum.zero();
401411 for (size_t alpha=0; alpha<num_colors; ++alpha) {
402- palette_sum += coarse_variables[alpha][j_y][j_x]*palette[alpha];
412+ palette_sum += coarse_variables(alpha,j_y,j_x)*palette[alpha];
403413 }
404414 j_palette_sum[j_y][j_x] = palette_sum;
405415 }
@@ -559,8 +569,8 @@ void spatial_color_quant(
559569 // Prevent the matrix S from becoming singular
560570 if (new_val <= 0) new_val = 1e-10;
561571 if (new_val >= 1) new_val = 1 - 1e-10;
562- float delta_m_iv = new_val - coarse_variables[v][i_y][i_x];
563- coarse_variables[v][i_y][i_x] = new_val;
572+ float delta_m_iv = new_val - coarse_variables(v,i_y,i_x);
573+ coarse_variables(v,i_y,i_x) = new_val;
564574 j_pal += delta_m_iv * palette[v];
565575 if (abs(delta_m_iv) > 0.001 && !skip_palette_maintenance) {
566576 update_s(s, coarse_variables, b, i_x, i_y, v, delta_m_iv);
@@ -636,7 +646,7 @@ void spatial_color_quant(
636646 temperature *= temperature_multiplier;
637647 }
638648 }
639-
649+
640650 // This is normally not used, but is handy sometimes for debugging
641651 while (coarse_level > 0) {
642652 coarse_level--;