libeblearn
|
00001 /*************************************************************************** 00002 * Copyright (C) 2010 by Pierre Sermanet * 00003 * pierre.sermanet@gmail.com * 00004 * All rights reserved. 00005 * 00006 * Redistribution and use in source and binary forms, with or without 00007 * modification, are permitted provided that the following conditions are met: 00008 * * Redistributions of source code must retain the above copyright 00009 * notice, this list of conditions and the following disclaimer. 00010 * * Redistributions in binary form must reproduce the above copyright 00011 * notice, this list of conditions and the following disclaimer in the 00012 * documentation and/or other materials provided with the distribution. 00013 * * Redistribution under a license not approved by the Open Source 00014 * Initiative (http://www.opensource.org) must display the 00015 * following acknowledgement in all advertising material: 00016 * This product includes software developed at the Courant 00017 * Institute of Mathematical Sciences (http://cims.nyu.edu). 00018 * * The names of the authors may not be used to endorse or promote products 00019 * derived from this software without specific prior written permission. 00020 * 00021 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED 00022 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 00023 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 00024 * DISCLAIMED. IN NO EVENT SHALL ThE AUTHORS BE LIABLE FOR ANY 00025 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 00026 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 00027 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 00028 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00029 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00030 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00031 ***************************************************************************/ 00032 00033 namespace ebl { 00034 00036 // channels_module 00037 00038 template <typename T, class Tstate> 00039 channels_module<T,Tstate>:: 00040 channels_module(bool globnorm_, const char *name_) 00041 : module_1_1<T,Tstate>(name_), globnorm(globnorm_) { 00042 } 00043 00044 template <typename T, class Tstate> 00045 channels_module<T,Tstate>::~channels_module() { 00046 } 00047 00048 template <typename T, class Tstate> 00049 void channels_module<T,Tstate>::resize_output(Tstate &in, Tstate &out, 00050 int dim0) { 00051 if (!this->bresize) return ; 00052 idxdim d(in.x); 00053 if (dim0 > 0) 00054 d.setdim(0, dim0); 00055 module_1_1<T,Tstate>::resize_output(in, out, &d); 00056 } 00057 00059 // channorm_module 00060 00061 template <typename T, class Tstate> 00062 channorm_module<T,Tstate>:: 00063 channorm_module(idxdim &kerdim_, bool mirror_, t_norm norm_mode_, 00064 const char *name_, int nf, bool globnorm) 00065 : channels_module<T,Tstate>(globnorm, name_), normker(kerdim_), 00066 tmp(1,1,1), norm(NULL), mirror(mirror_), norm_mode(norm_mode_) { 00067 norm = new_norm(normker, mirror, norm_mode, nf); 00068 EDEBUG(this->describe()); 00069 } 00070 00071 template <typename T, class Tstate> 00072 channorm_module<T,Tstate>::~channorm_module() { 00073 if (norm) 00074 delete norm; 00075 } 00076 00077 template <typename T, class Tstate> 00078 void channorm_module<T,Tstate>::resize_output(Tstate &in, Tstate &out, 00079 int dim0) { 00080 if (!this->bresize) return ; 00081 idxdim d(in.x); 00082 if (dim0 > 0) 00083 d.setdim(0, dim0); 00084 module_1_1<T,Tstate>::resize_output(in, out, &d); 00085 module_1_1<T,Tstate>::resize_output(in, tmp, &d); 00086 } 00087 00088 template <typename T, class Tstate> 00089 std::string channorm_module<T,Tstate>::describe() { 00090 std::string s; 00091 s << this->name() << "'s normalization module: " << norm->describe(); 00092 return s; 00093 } 00094 00095 template <typename T, class Tstate> 00096 module_1_1<T,Tstate>* channorm_module<T,Tstate>:: 00097 new_norm(idxdim &normker, bool mirror, t_norm norm_mode, int nf) { 00098 switch (norm_mode) { 00099 case WSTD_NORM: 00100 return new contrast_norm_module<T,Tstate>(normker, nf, mirror, 00101 false, true); 00102 break ; 00103 case LAPLACIAN_NORM: 00104 return new laplacian_module<T,Tstate>(nf, mirror, true); 00105 break ; 00106 default: eblerror("unknown normalization mode " << norm_mode); 00107 } 00108 return NULL; 00109 } 00110 00112 // rgb_to_ynuv_module 00113 00114 template <typename T, class Tstate> 00115 rgb_to_ynuv_module<T,Tstate>:: 00116 rgb_to_ynuv_module(idxdim &normker_, bool mirror, t_norm norm_mode, 00117 bool globnorm) 00118 : channorm_module<T,Tstate>(normker_, mirror, norm_mode, "rgb_to_ynuv", 00119 1, globnorm) { 00120 } 00121 00122 template <typename T, class Tstate> 00123 rgb_to_ynuv_module<T,Tstate>::~rgb_to_ynuv_module() { 00124 } 00125 00126 template <typename T, class Tstate> 00127 void rgb_to_ynuv_module<T,Tstate>::fprop(Tstate &in, Tstate &out) { 00128 this->resize_output(in, out); // resize (iff necessary) 00129 if (in.x.dim(0) != 3) { 00130 // cerr << "warning: in rgb_to_ynuv, input is not 3-channel, " 00131 // << "ignoring color." << endl; 00132 } else { 00133 idx<T> uv, yuv; 00134 // RGB to YUV 00135 idx_eloop2(inx, in.x, T, outx, out.x, T) { 00136 idx_eloop2(inxx, inx, T, outxx, outx, T) { 00137 rgb_to_yuv_1D(inxx, outxx); }} 00138 } 00139 // normalize Y 00140 this->tmp.x = out.x.narrow(0, 1, 0); 00141 this->norm->fprop(this->tmp, this->tmp); // local 00142 // remove global mean and divide by stddev 00143 if (this->globnorm) image_global_normalization(this->tmp.x); 00144 // remove global mean and divide by stddev of UV 00145 idx<T> uv = out.x.narrow(0, 2, 1); 00146 if (this->globnorm) image_global_normalization(uv); 00147 else { 00148 idx_addc(uv, (T)-128, uv); 00149 idx_dotc(uv, (T).01, uv); 00150 } 00151 } 00152 00153 template <typename T, class Tstate> 00154 rgb_to_ynuv_module<T,Tstate>* rgb_to_ynuv_module<T,Tstate>::copy() { 00155 return new rgb_to_ynuv_module<T,Tstate>(this->normker, this->mirror, 00156 this->norm_mode, this->globnorm); 00157 } 00158 00160 // rgb_to_ynuvn_module 00161 00162 template <typename T, class Tstate> 00163 rgb_to_ynuvn_module<T,Tstate>:: 00164 rgb_to_ynuvn_module(idxdim &normker_, bool mirror, t_norm norm_mode, 00165 bool globnorm) 00166 : channorm_module<T,Tstate>(normker_, mirror, norm_mode, "rgb_to_ynuvn", 00167 1, globnorm) { 00168 norm2 = this->new_norm(normker_, mirror, norm_mode, 2); 00169 } 00170 00171 template <typename T, class Tstate> 00172 rgb_to_ynuvn_module<T,Tstate>::~rgb_to_ynuvn_module() { 00173 delete norm2; 00174 } 00175 00176 template <typename T, class Tstate> 00177 void rgb_to_ynuvn_module<T,Tstate>::fprop(Tstate &in, Tstate &out) { 00178 this->resize_output(in, out); // resize (iff necessary) 00179 if (in.x.dim(0) != 3) { 00180 // cerr << "warning: in rgb_to_ynuvn, input is not 3-channel, " 00181 // << "ignoring color." << endl; 00182 } else { 00183 idx<T> uv, yuv; 00184 // RGB to YUV 00185 idx_eloop2(inx, in.x, T, outx, out.x, T) { 00186 idx_eloop2(inxx, inx, T, outxx, outx, T) { 00187 rgb_to_yuv_1D(inxx, outxx); }} 00188 } 00189 // normalize Y 00190 this->tmp.x = out.x.narrow(0, 1, 0); 00191 this->norm->fprop(this->tmp, this->tmp); // local 00192 // remove global mean and divide by stddev 00193 if (this->globnorm) image_global_normalization(this->tmp.x); 00194 // normalize UV 00195 this->tmp.x = out.x.narrow(0, 2, 1); 00196 norm2->fprop(this->tmp, this->tmp); // local 00197 // remove global mean and divide by stddev 00198 if (this->globnorm) image_global_normalization(this->tmp.x); 00199 } 00200 00201 template <typename T, class Tstate> 00202 rgb_to_ynuvn_module<T,Tstate>* rgb_to_ynuvn_module<T,Tstate>::copy() { 00203 return new rgb_to_ynuvn_module<T,Tstate>(this->normker, this->mirror, 00204 this->norm_mode, this->globnorm); 00205 } 00206 00208 // rgb_to_ynunvn_module 00209 00210 template <typename T, class Tstate> 00211 rgb_to_ynunvn_module<T,Tstate>:: 00212 rgb_to_ynunvn_module(idxdim &normker_, bool mirror, t_norm norm_mode, 00213 bool globnorm) 00214 : channorm_module<T,Tstate>(normker_, mirror, norm_mode, "rgb_to_ynunvn", 00215 1, globnorm) { 00216 } 00217 00218 template <typename T, class Tstate> 00219 rgb_to_ynunvn_module<T,Tstate>::~rgb_to_ynunvn_module() { 00220 } 00221 00222 template <typename T, class Tstate> 00223 void rgb_to_ynunvn_module<T,Tstate>::fprop(Tstate &in, Tstate &out) { 00224 this->resize_output(in, out); // resize (iff necessary) 00225 if (in.x.dim(0) != 3) { 00226 // cerr << "warning: in rgb_to_ynunvn, input is not 3-channel, " 00227 // << "ignoring color." << endl; 00228 } else { 00229 idx<T> uv, yuv; 00230 // RGB to YUV 00231 idx_eloop2(inx, in.x, T, outx, out.x, T) { 00232 idx_eloop2(inxx, inx, T, outxx, outx, T) { 00233 rgb_to_yuv_1D(inxx, outxx); }} 00234 } 00235 // normalize Y 00236 this->tmp.x = out.x.narrow(0, 1, 0); 00237 this->norm->fprop(this->tmp, this->tmp); // local 00238 // remove global mean and divide by stddev 00239 if (this->globnorm) image_global_normalization(this->tmp.x); 00240 // normalize U 00241 this->tmp.x = out.x.narrow(0, 1, 1); 00242 this->norm->fprop(this->tmp, this->tmp); // local 00243 // remove global mean and divide by stddev 00244 if (this->globnorm) image_global_normalization(this->tmp.x); 00245 // normalize V 00246 this->tmp.x = out.x.narrow(0, 1, 2); 00247 this->norm->fprop(this->tmp, this->tmp); // local 00248 // remove global mean and divide by stddev 00249 if (this->globnorm) image_global_normalization(this->tmp.x); 00250 } 00251 00252 template <typename T, class Tstate> 00253 rgb_to_ynunvn_module<T,Tstate>* rgb_to_ynunvn_module<T,Tstate>::copy() { 00254 return new rgb_to_ynunvn_module<T,Tstate>(this->normker, this->mirror, 00255 this->norm_mode, this->globnorm); 00256 } 00257 00259 // rgb_to_yuv_module 00260 00261 template <typename T, class Tstate> 00262 rgb_to_yuv_module<T,Tstate>::rgb_to_yuv_module(bool globnorm) 00263 : channels_module<T,Tstate>(globnorm, "rgb_to_yuv") { 00264 } 00265 00266 template <typename T, class Tstate> 00267 rgb_to_yuv_module<T,Tstate>::~rgb_to_yuv_module() { 00268 } 00269 00270 template <typename T, class Tstate> 00271 void rgb_to_yuv_module<T,Tstate>::fprop(Tstate &in, Tstate &out) { 00272 this->resize_output(in, out); // resize (iff necessary) 00273 if (in.x.dim(0) != 3) { 00274 eblerror("expected 3 channels in dim 0 but found: " << in.x); 00275 } else { 00276 // RGB to YUV 00277 idx_eloop2(inx, in.x, T, outx, out.x, T) { 00278 idx_eloop2(inxx, inx, T, outxx, outx, T) { 00279 rgb_to_yuv_1D(inxx, outxx); }} 00280 // remove global mean and divide by stddev 00281 if (this->globnorm) { // normalize Y 00282 idx<T> y = out.x.narrow(0, 1, 0); 00283 image_global_normalization(y); 00284 } 00285 // normalize UV 00286 idx<T> uv = out.x.narrow(0, 2, 1); 00287 if (this->globnorm) image_global_normalization(uv); 00288 else { // fixed normalization around -1,1 00289 idx<T> uv = out.x.narrow(0, 2, 1); 00290 idx_addc(uv, (T)-128, uv); 00291 idx_dotc(uv, (T).01, uv); 00292 } 00293 } 00294 } 00295 00296 template <typename T, class Tstate> 00297 rgb_to_yuv_module<T,Tstate>* rgb_to_yuv_module<T,Tstate>::copy() { 00298 return new rgb_to_yuv_module<T,Tstate>(this->globnorm); 00299 } 00300 00302 // rgb_to_yuvn_module 00303 00304 template <typename T, class Tstate> 00305 rgb_to_yuvn_module<T,Tstate>:: 00306 rgb_to_yuvn_module(idxdim &normker_, bool mirror, t_norm norm_mode, 00307 bool globnorm) 00308 : channorm_module<T,Tstate>(normker_, mirror, norm_mode, "rgb_to_yuvn", 3, 00309 globnorm) { 00310 } 00311 00312 template <typename T, class Tstate> 00313 rgb_to_yuvn_module<T,Tstate>::~rgb_to_yuvn_module() { 00314 } 00315 00316 template <typename T, class Tstate> 00317 void rgb_to_yuvn_module<T,Tstate>::fprop(Tstate &in, Tstate &out) { 00318 this->resize_output(in, out); // resize (iff necessary) 00319 if (in.x.dim(0) != 3) { 00320 eblerror("expected 3 channels in dim 0 but found: " << in.x); 00321 } else { 00322 // RGB to YUV 00323 idx_eloop2(inx, in.x, T, outx, out.x, T) { 00324 idx_eloop2(inxx, inx, T, outxx, outx, T) { 00325 rgb_to_yuv_1D(inxx, outxx); }} 00326 } 00327 // first normalize globally Y and UV separately 00328 idx<T> y = out.x.narrow(0, 1, 0); 00329 image_global_normalization(y); 00330 idx<T> uv = out.x.narrow(0, 2, 1); 00331 image_global_normalization(uv); 00332 // normalize YUV 00333 this->tmp.x = out.x; 00334 this->norm->fprop(this->tmp, this->tmp); // local 00335 // remove global mean and divide by stddev 00336 if (this->globnorm) image_global_normalization(out.x); 00337 } 00338 00339 template <typename T, class Tstate> 00340 rgb_to_yuvn_module<T,Tstate>* rgb_to_yuvn_module<T,Tstate>::copy() { 00341 return new rgb_to_yuvn_module<T,Tstate>(this->normker, this->mirror, 00342 this->norm_mode, this->globnorm); 00343 } 00344 00346 // rgb_to_rgbn_module 00347 00348 template <typename T, class Tstate> 00349 rgb_to_rgbn_module<T,Tstate>:: 00350 rgb_to_rgbn_module(idxdim &normker_, bool mirror, t_norm norm_mode, 00351 bool globnorm) 00352 : channorm_module<T,Tstate>(normker_, mirror, norm_mode, "rgb_to_rgbn", 3, 00353 globnorm) { 00354 } 00355 00356 template <typename T, class Tstate> 00357 rgb_to_rgbn_module<T,Tstate>::~rgb_to_rgbn_module() { 00358 } 00359 00360 template <typename T, class Tstate> 00361 void rgb_to_rgbn_module<T,Tstate>::fprop(Tstate &in, Tstate &out) { 00362 this->resize_output(in, out); // resize (iff necessary) 00363 // normalize RGB 00364 this->norm->fprop(in, out); // local 00365 // remove global mean and divide by stddev 00366 if (this->globnorm) image_global_normalization(out.x); 00367 } 00368 00369 template <typename T, class Tstate> 00370 rgb_to_rgbn_module<T,Tstate>* rgb_to_rgbn_module<T,Tstate>::copy() { 00371 return new rgb_to_rgbn_module<T,Tstate>(this->normker, this->mirror, 00372 this->norm_mode, this->globnorm); 00373 } 00374 00376 // rgb_to_y_module 00377 00378 template <typename T, class Tstate> 00379 rgb_to_y_module<T,Tstate>::rgb_to_y_module(bool globnorm) 00380 : channels_module<T,Tstate>(globnorm, "rgb_to_y") { 00381 } 00382 00383 template <typename T, class Tstate> 00384 rgb_to_y_module<T,Tstate>::~rgb_to_y_module() { 00385 } 00386 00387 template <typename T, class Tstate> 00388 void rgb_to_y_module<T,Tstate>::fprop(Tstate &in, Tstate &out) { 00389 this->resize_output(in, out, 1); // resize (iff necessary) 00390 if (in.x.dim(0) != 3) { 00391 eblerror("expected 3 channels in dim 0 but found: " << in.x); 00392 } else { 00393 // RGB to YUV 00394 idx_eloop2(inx, in.x, T, outx, out.x, T) { 00395 idx_eloop2(inxx, inx, T, outxx, outx, T) { 00396 rgb_to_y_1D(inxx, outxx); }} 00397 // remove global mean and divide by stddev 00398 if (this->globnorm) image_global_normalization(out.x); 00399 } 00400 } 00401 00402 template <typename T, class Tstate> 00403 rgb_to_y_module<T,Tstate>* rgb_to_y_module<T,Tstate>::copy() { 00404 return new rgb_to_y_module<T,Tstate>(this->globnorm); 00405 } 00406 00408 // rgb_to_yn_module 00409 00410 template <typename T, class Tstate> 00411 rgb_to_yn_module<T,Tstate>::rgb_to_yn_module(idxdim &normker, bool mirror, 00412 t_norm norm_mode, bool globnorm) 00413 : channorm_module<T,Tstate>(normker, mirror, norm_mode, "rgb_to_yn", 1, 00414 globnorm) { 00415 } 00416 00417 template <typename T, class Tstate> 00418 rgb_to_yn_module<T,Tstate>::~rgb_to_yn_module() { 00419 } 00420 00421 template <typename T, class Tstate> 00422 void rgb_to_yn_module<T,Tstate>::fprop(Tstate &in, Tstate &out) { 00423 this->resize_output(in, out, 1); // resize (iff necessary) 00424 if (in.x.dim(0) != 3) { 00425 // cerr << "warning: in rgb_to_yn, input is not 3-channel, " 00426 // << "ignoring color." << endl; 00427 // convert Y to Yp 00428 this->norm->fprop(in, out); // local 00429 } else { 00430 // RGB to Y 00431 idx_eloop2(inx, in.x, T, tmpx, this->tmp.x, T) { 00432 idx_eloop2(inxx, inx, T, tmpxx, tmpx, T) { 00433 rgb_to_y_1D(inxx, tmpxx); }} 00434 // convert Y to Yp 00435 this->norm->fprop(this->tmp, out); // local 00436 // remove global mean and divide by stddev 00437 if (this->globnorm) image_global_normalization(out.x); 00438 } 00439 } 00440 00441 template <typename T, class Tstate> 00442 rgb_to_yn_module<T,Tstate>* rgb_to_yn_module<T,Tstate>::copy() { 00443 return new rgb_to_yn_module<T,Tstate>(this->normker, this->mirror, 00444 this->norm_mode, this->globnorm); 00445 } 00446 00448 // y_to_yp_module 00449 00450 template <typename T, class Tstate> 00451 y_to_yp_module<T,Tstate>::y_to_yp_module(idxdim &normker, bool mirror, 00452 t_norm norm_mode, bool globnorm) 00453 : channorm_module<T,Tstate>(normker, mirror, norm_mode, "y_to_yp", 1, 00454 globnorm) { 00455 } 00456 00457 template <typename T, class Tstate> 00458 y_to_yp_module<T,Tstate>::~y_to_yp_module() { 00459 } 00460 00461 template <typename T, class Tstate> 00462 void y_to_yp_module<T,Tstate>::fprop(Tstate &in, Tstate &out) { 00463 this->norm->fprop(in, out); // local 00464 // remove global mean and divide by stddev 00465 if (this->globnorm) image_global_normalization(out.x); 00466 } 00467 00468 template <typename T, class Tstate> 00469 y_to_yp_module<T,Tstate>* y_to_yp_module<T,Tstate>::copy() { 00470 return new y_to_yp_module<T,Tstate>(this->normker, this->mirror, 00471 this->norm_mode, this->globnorm); 00472 } 00473 00475 // bgr_to_ypuv_module 00476 00477 template <typename T, class Tstate> 00478 bgr_to_ypuv_module<T,Tstate>:: 00479 bgr_to_ypuv_module(idxdim &normker, bool mirror, t_norm norm_mode, 00480 bool globnorm) 00481 : channorm_module<T,Tstate>(normker, mirror, norm_mode, "bgr_to_ypuv", 1, 00482 globnorm) { 00483 } 00484 00485 template <typename T, class Tstate> 00486 bgr_to_ypuv_module<T,Tstate>::~bgr_to_ypuv_module() { 00487 } 00488 00489 template <typename T, class Tstate> 00490 void bgr_to_ypuv_module<T,Tstate>::fprop(Tstate &in, Tstate &out) { 00491 this->resize_output(in, out); // resize (iff necessary) 00492 idx<T> uv, yp, yuv; 00493 00494 // BGR to YUV 00495 idx_eloop2(inx, in.x, T, outx, out.x, T) { 00496 idx_eloop2(inxx, inx, T, outxx, outx, T) { 00497 bgr_to_yuv_1D(inxx, outxx); 00498 } 00499 } 00500 // remove global mean and divide by stddev 00501 uv = out.x.narrow(0, 2, 1); 00502 if (this->globnorm) image_global_normalization(uv); 00503 // convert Y to Yp 00504 this->tmp.x = out.x.narrow(0, 1, 0); 00505 this->norm->fprop(this->tmp, this->tmp); // local 00506 // remove global mean and divide by stddev 00507 if (this->globnorm) image_global_normalization(this->tmp.x); 00508 } 00509 00510 template <typename T, class Tstate> 00511 bgr_to_ypuv_module<T,Tstate>* bgr_to_ypuv_module<T,Tstate>::copy() { 00512 return new bgr_to_ypuv_module<T,Tstate>(this->normker, this->mirror, 00513 this->norm_mode, this->globnorm); 00514 } 00515 00517 // bgr_to_yp_module 00518 00519 template <typename T, class Tstate> 00520 bgr_to_yp_module<T,Tstate>::bgr_to_yp_module(idxdim &normker, bool mirror, 00521 t_norm norm_mode, bool globnorm) 00522 : channorm_module<T,Tstate>(normker, mirror, norm_mode, "bgr_to_yp", 1, 00523 globnorm) { 00524 } 00525 00526 template <typename T, class Tstate> 00527 bgr_to_yp_module<T,Tstate>::~bgr_to_yp_module() { 00528 } 00529 00530 template <typename T, class Tstate> 00531 void bgr_to_yp_module<T,Tstate>::fprop(Tstate &in, Tstate &out) { 00532 this->resize_output(in, out, 1); // resize (iff necessary) 00533 // BGR to YUV 00534 idx_eloop2(inx, in.x, T, tmpx, this->tmp.x, T) { 00535 idx_eloop2(inxx, inx, T, tmpxx, tmpx, T) { 00536 bgr_to_y_1D(inxx, tmpxx); }} 00537 // convert Y to Yp 00538 this->norm->fprop(this->tmp, out); // local 00539 // remove global mean and divide by stddev 00540 if (this->globnorm) image_global_normalization(out.x); 00541 } 00542 00543 template <typename T, class Tstate> 00544 bgr_to_yp_module<T,Tstate>* bgr_to_yp_module<T,Tstate>::copy() { 00545 return new bgr_to_yp_module<T,Tstate>(this->normker, this->mirror, 00546 this->norm_mode, this->globnorm); 00547 } 00548 00550 // rgb_to_hp_module 00551 00552 template <typename T, class Tstate> 00553 rgb_to_hp_module<T,Tstate>::rgb_to_hp_module(idxdim &normker, bool mirror, 00554 t_norm norm_mode, bool globnorm) 00555 : channorm_module<T,Tstate>(normker, mirror, norm_mode, "rgb_to_hp", 1, 00556 globnorm) { 00557 } 00558 00559 template <typename T, class Tstate> 00560 rgb_to_hp_module<T,Tstate>::~rgb_to_hp_module() { 00561 } 00562 00563 template <typename T, class Tstate> 00564 void rgb_to_hp_module<T,Tstate>::fprop(Tstate &in, Tstate &out) { 00565 this->resize_output(in, out, 1); // resize (iff necessary) 00566 // RGB to YUV 00567 idx_eloop2(inx, in.x, T, tmpx, this->tmp.x, T) { 00568 idx_eloop2(inxx, inx, T, tmpxx, tmpx, T) { 00569 rgb_to_h_1D(inxx, tmpxx); }} 00570 // convert H to Hp 00571 this->norm->fprop(this->tmp, out); // local 00572 // remove global mean and divide by stddev 00573 if (this->globnorm) image_global_normalization(out.x); 00574 } 00575 00576 template <typename T, class Tstate> 00577 rgb_to_hp_module<T,Tstate>* rgb_to_hp_module<T,Tstate>::copy() { 00578 return new rgb_to_hp_module<T,Tstate>(this->normker, this->mirror, 00579 this->norm_mode, this->globnorm); 00580 } 00581 00583 // resizepp_module 00584 00585 template <typename T, class Tstate> 00586 resizepp_module<T,Tstate>:: 00587 resizepp_module(idxdim &size_, uint mode_, module_1_1<T,Tstate> *pp_, 00588 bool own_pp_, idxdim *dzpad_, bool pratio) 00589 : module_1_1<T,Tstate>("resizepp"), 00590 pp(pp_), own_pp(own_pp_), size(size_), inpp(1,1,1), outpp(1,1,1), 00591 tmp3(1,1,1), mode(mode_), input_mode(0), inrect(0, 0, 0, 0), 00592 inrect_set(false), outrect_set(false), dzpad(NULL), zpad(NULL), 00593 hjitter(0), wjitter(0), sjitter(1.0), rjitter(0.0), 00594 scale_hfactor(1.0), scale_wfactor(1.0), 00595 preserve_ratio(pratio), hratio(0), wratio(0), lastout(NULL), 00596 out_copy(NULL), display_min(-1), display_max(1), original_bboxes(1) { 00597 set_dimensions(size_.dim(0), size_.dim(1)); 00598 if (dzpad_) { 00599 dzpad = new idxdim(*dzpad_); 00600 set_zpads(dzpad->dim(0), dzpad->dim(1)); 00601 } 00602 if (!preserve_ratio) input_mode = 1; // do not preserve aspect ratio 00603 } 00604 00605 template <typename T, class Tstate> 00606 resizepp_module<T,Tstate>:: 00607 resizepp_module(uint mode_, module_1_1<T,Tstate> *pp_, 00608 bool own_pp_, idxdim *dzpad_, bool pratio) 00609 : pp(pp_), own_pp(own_pp_), size(1,1), height(0), width(0), 00610 inpp(1,1,1), outpp(1,1,1), tmp3(1,1,1), mode(mode_), input_mode(0), 00611 inrect(0, 0, 0, 0), inrect_set(false), outrect_set(false), dzpad(NULL), 00612 zpad(NULL), hjitter(0), wjitter(0), sjitter(1.0), rjitter(0.0), 00613 scale_hfactor(1.0), scale_wfactor(1.0), preserve_ratio(pratio), hratio(0), 00614 wratio(0), lastout(NULL), out_copy(NULL), display_min(-1), display_max(1), 00615 original_bboxes(1) { 00616 if (dzpad_ && dzpad_->order() > 0) { 00617 dzpad = new idxdim(*dzpad_); 00618 set_zpads(dzpad->dim(0), dzpad->dim(1)); 00619 } 00620 if (!preserve_ratio) input_mode = 1; // do not preserve aspect ratio 00621 } 00622 00623 template <typename T, class Tstate> 00624 resizepp_module<T,Tstate>:: 00625 resizepp_module(double hratio_, double wratio_, uint mode_, 00626 module_1_1<T,Tstate> *pp_, 00627 bool own_pp_, idxdim *dzpad_, bool pratio) 00628 : pp(pp_), own_pp(own_pp_), size(1,1), height(0), width(0), 00629 inpp(1,1,1), outpp(1,1,1), tmp3(1,1,1), mode(mode_), input_mode(0), 00630 inrect(0, 0, 0, 0), inrect_set(false), outrect_set(false), dzpad(NULL), 00631 zpad(NULL), hjitter(0), wjitter(0), sjitter(1.0), rjitter(0.0), 00632 scale_hfactor(1.0), scale_wfactor(1.0), preserve_ratio(pratio), hratio(0), 00633 wratio(0), lastout(NULL), out_copy(NULL), display_min(-1), display_max(1), 00634 original_bboxes(1) { 00635 if (dzpad_) { 00636 dzpad = new idxdim(*dzpad_); 00637 set_zpads(dzpad->dim(0), dzpad->dim(1)); 00638 } 00639 if (preserve_ratio) 00640 input_mode = 2; 00641 else 00642 input_mode = 1; 00643 } 00644 00645 template <typename T, class Tstate> 00646 resizepp_module<T,Tstate>::~resizepp_module() { 00647 if (pp && own_pp) delete pp; 00648 if (zpad) delete zpad; 00649 if (dzpad) delete dzpad; 00650 } 00651 00652 template <typename T, class Tstate> 00653 void resizepp_module<T,Tstate>::set_dimensions(intg height_, intg width_) { 00654 height = height_; 00655 width = width_; 00656 // if (dzpad) { 00657 // height -= dzpad->dim(0) * 2; 00658 // width -= dzpad->dim(1) * 2; 00659 // } 00660 size.setdim(0, height); 00661 size.setdim(1, width); 00662 } 00663 00664 template <typename T, class Tstate> 00665 void resizepp_module<T,Tstate>::set_zpads(intg hpad, intg wpad) { 00666 // // reset height/width without current zpad 00667 // if (dzpad) { 00668 // height += dzpad->dim(0) * 2; 00669 // width += dzpad->dim(1) * 2; 00670 // } 00671 // update zpads and height/width 00672 // if (!dzpad) 00673 // dzpad = new idxdim(hpad, wpad); 00674 // else { 00675 // dzpad->setdim(0, hpad); 00676 // dzpad->setdim(1, wpad); 00677 // } 00678 // height -= dzpad->dim(0) * 2; 00679 // width -= dzpad->dim(1) * 2; 00680 // size.setdim(0, height); 00681 // size.setdim(1, width); 00682 // update zpad module 00683 if (zpad) { 00684 delete zpad; 00685 zpad = NULL; 00686 } 00687 if (dzpad) { 00688 delete dzpad; 00689 dzpad = NULL; 00690 } 00691 dzpad = new idxdim(hpad, wpad); 00692 if (dzpad && (dzpad->dim(0) > 0 || dzpad->dim(1) > 0)) 00693 zpad = new zpad_module<T,Tstate>(dzpad->dim(0), dzpad->dim(1)); 00694 } 00695 00696 template <typename T, class Tstate> 00697 void resizepp_module<T,Tstate>::set_zpad(idxdim &kernel) { 00698 if (kernel.empty()) return ; 00699 // update zpad module 00700 if (zpad) { 00701 delete zpad; 00702 zpad = NULL; 00703 } 00704 if (!dzpad) dzpad = new idxdim(kernel); 00705 else *dzpad = kernel; 00706 zpad = new zpad_module<T,Tstate>(kernel); 00707 } 00708 00709 template <typename T, class Tstate> 00710 void resizepp_module<T,Tstate>::set_zpad(midxdim &kernels) { 00711 if (kernels.empty()) return ; 00712 // update zpad module 00713 if (zpad) { 00714 delete zpad; 00715 zpad = NULL; 00716 } 00717 // if (!dzpad) dzpad = new idxdim(kernels); 00718 // else *dzpad = kernels; 00719 zpad = new zpad_module<T,Tstate>(kernels); 00720 } 00721 00722 template <typename T, class Tstate> 00723 void resizepp_module<T,Tstate>::set_jitter(int h, int w, float s, float r) { 00724 hjitter = h; 00725 wjitter = w; 00726 sjitter = s; 00727 rjitter = r; 00728 } 00729 00730 template <typename T, class Tstate> 00731 void resizepp_module<T,Tstate>::set_scale_factor(double s) { 00732 scale_hfactor = s; 00733 scale_wfactor = s; 00734 } 00735 00736 template <typename T, class Tstate> 00737 void resizepp_module<T,Tstate>::set_scale_factor(double sh, double sw) { 00738 scale_hfactor = sh; 00739 scale_wfactor = sw; 00740 } 00741 00742 template <typename T, class Tstate> 00743 void resizepp_module<T,Tstate>::set_input_region(const rect<int> &inr) { 00744 inrect = inr; 00745 inrect_set = true; 00746 } 00747 00748 template <typename T, class Tstate> 00749 void resizepp_module<T,Tstate>::set_output_region(const rect<int> &outr) { 00750 outrect = outr; 00751 // if (dzpad) { 00752 // outrect.height -= dzpad->dim(0) * 2; 00753 // outrect.width -= dzpad->dim(1) * 2; 00754 // } 00755 outrect_set = true; 00756 } 00757 00758 template <typename T, class Tstate> 00759 rect<int> resizepp_module<T,Tstate>::get_original_bbox() { 00760 if (original_bboxes.size() == 0) eblerror("expected at least 1 box"); 00761 return original_bboxes[0]; 00762 } 00763 00764 template <typename T, class Tstate> 00765 rect<int> resizepp_module<T,Tstate>::get_input_bbox() { 00766 return input_bbox; 00767 } 00768 00769 template <typename T, class Tstate> 00770 const std::vector<rect<int> >& resizepp_module<T,Tstate>::get_input_bboxes() { 00771 input_bboxes.clear(); 00772 input_bboxes.push_back(this->get_input_bbox()); 00773 return input_bboxes; 00774 } 00775 00776 template <typename T, class Tstate> 00777 const std::vector<rect<int> >& resizepp_module<T,Tstate>:: 00778 get_original_bboxes() { 00779 return original_bboxes; 00780 } 00781 00782 template <typename T, class Tstate> 00783 rect<int> resizepp_module<T,Tstate>::compute_regions(Tstate &in) { 00784 // set input region to entire image if no input region is given 00785 rect<int> r = rect<int>(0, 0, in.x.dim(1), in.x.dim(2)); 00786 if (inrect_set) // set input region 00787 r = inrect; 00788 // set output region 00789 if (!outrect_set) 00790 outrect = rect<int>(0, 0, height, width); 00791 // find ratio between input box and output box 00792 float ratio = std::max(r.height / (float) outrect.height, 00793 r.width / (float) outrect.width); 00794 // apply scale jitter (keeping same center) 00795 if (sjitter != 1.0 || scale_hfactor != 1.0 || scale_wfactor != 1.0) 00796 r.scale_centered(sjitter * scale_hfactor, sjitter * scale_wfactor); 00797 // apply spatial jitter 00798 r.h0 -= (int) (hjitter * ratio); 00799 r.w0 -= (int) (wjitter * ratio); 00800 return r; 00801 } 00802 00803 template <typename T, class Tstate> 00804 void resizepp_module<T,Tstate>::remember_regions(intg outh, intg outw, 00805 rect<int> &r) { 00806 // remember input box 00807 input_bbox = r; 00808 if (preserve_ratio) { // fit input ratio into target ratio 00809 double iratio = r.height / (double) r.width; // input ratio 00810 double tratio = outh / (double) outw; // target ratio 00811 if (tratio > iratio) input_bbox.scale_height(1 / tratio); 00812 else if (tratio < iratio) input_bbox.scale_width(1 / tratio); 00813 } 00814 // double rh = outh / (double) original_bbox.height; 00815 // double rw = outw / (double) (std::max)((int) 1, original_bbox.width); 00816 // input_bbox.scale_centered(rh, rw); 00817 } 00818 00819 template <typename T, class Tstate> 00820 void resizepp_module<T,Tstate>::set_display_range(T dmin, T dmax) { 00821 display_min = dmin; 00822 display_max = dmax; 00823 } 00824 00825 template <typename T, class Tstate> 00826 void resizepp_module<T,Tstate>::get_display_range(T &dmin, T &dmax) { 00827 dmin = display_min; 00828 dmax = display_max; 00829 } 00830 00831 // fprop methods ///////////////////////////////////////////////////////////// 00832 00833 template <typename T, class Tstate> 00834 void resizepp_module<T,Tstate>::fprop(Tstate &in, Tstate &out) { 00835 fprop(in, out.x); 00836 // remember last output 00837 lout.clear(); 00838 lout.push_back(new Tstate(out)); 00839 lastout = &lout; 00840 copy_outputs(lout); 00841 } 00842 00843 template <typename T, class Tstate> 00844 void resizepp_module<T,Tstate>::fprop(Tstate &in, idx<T> &out) { 00845 // compute input/output regions 00846 rect<int> r = compute_regions(in); 00847 EDEBUG("resizing " << in.x << " to " << outrect << " with ROI " << r); 00848 rect<int> outr; 00849 // resize input while preserving aspect ratio 00850 tmp = in.x.shift_dim(0, 2); // resize functions expect channels in 3rd dim 00851 idx<T> resized; 00852 switch (mode) { 00853 case MEAN_RESIZE: 00854 resized = image_mean_resize(tmp, outrect.height, 00855 outrect.width, input_mode, &r, &outr); 00856 break ; 00857 case GAUSSIAN_RESIZE: 00858 resized = image_gaussian_resize(tmp, outrect.height, 00859 outrect.width, input_mode, &r,&outr); 00860 break ; 00861 case BILINEAR_RESIZE: 00862 if (input_mode == 1 || input_mode == 2) { // use ratios 00863 resized = image_resize(tmp, hratio, wratio, input_mode, &r, &outr); 00864 EDEBUG(this->name() << ": resizing with ratios " << hratio 00865 << " and " << wratio); 00866 } 00867 else // use pixels 00868 resized = image_resize(tmp, (double) outrect.height, 00869 (double) outrect.width, input_mode, &r, &outr); 00870 break ; 00871 default: 00872 eblerror("unknown resizing mode"); 00873 } 00874 resized = resized.shift_dim(2, 0); 00875 // call preprocessing 00876 if (pp) { // no preprocessing if NULL module 00877 inpp.x = resized; 00878 pp->fprop(inpp, outpp); 00879 resized = outpp.x; 00880 } 00881 // resize out to target dimensions if necessary 00882 if (((out.dim(1) != height) || (out.dim(2) != width)) && !pp) 00883 out.resize(in.x.dim(0), height, width); 00884 else if (((out.dim(1) != height) || (out.dim(2) != width) 00885 || (out.dim(0) != outpp.x.dim(0))) && pp) 00886 out.resize(outpp.x.dim(0), height, width); 00887 idx_clear(out); 00888 resized = resized.shift_dim(0, 2); 00889 // apply rotation (around center of roi) 00890 if (rjitter != 0.0) { 00891 idx<T> r2 = idx_copy(resized); // make a contiguous copy 00892 resized = image_rotate(r2, rjitter, (int) outr.hcenter(), 00893 (int) outr.wcenter()); 00894 } 00895 // copy out region to output 00896 original_bboxes[0] = outr; 00897 tmp2 = image_region_to_rect(resized, outr, out.dim(1), 00898 out.dim(2), &original_bboxes[0]); 00899 tmp2 = tmp2.shift_dim(2, 0); 00900 remember_regions(out.dim(1), out.dim(2), r); 00901 //idx_copy(tmp2, tmp); 00902 if (!zpad) 00903 idx_copy(tmp2, out); 00904 else { // zero padding 00905 original_bboxes[0].shift(dzpad->dim(0), dzpad->dim(1)); 00906 tmp3.resize(tmp2.get_idxdim()); 00907 idx_copy(tmp2, tmp3.x); 00908 zpad->fprop(tmp3, out); 00909 EDEBUG("padded " << tmp3.x << " with " << zpad->get_paddings() << " -> " 00910 << out); 00911 } 00912 EDEBUG("resized " << in.x << " to " << out); 00913 } 00914 00915 template <typename T, class Tstate> 00916 void resizepp_module<T,Tstate>::fprop(Tstate &in, midx<T> &out) { 00917 // expect 1D midx 00918 if (out.order() != 1) 00919 eblerror("expected a 1-dimensional midx but got order " << out.order()); 00920 // out.clear(); 00921 // out.resize(1); 00922 // if (out.dim(0) != 1) 00923 // eblerror("expected a 1-element midx but got dimension " << out.dim(0)); 00924 idxdim d(in.x.get_idxdim()); 00925 d.setdims(1); 00926 idx<T> tmp(d); 00927 // fprop 00928 fprop(in, tmp); 00929 out.set(tmp, 0); 00930 } 00931 00932 template <typename T, class Tstate> 00933 resizepp_module<T,Tstate>* resizepp_module<T,Tstate>::copy() { 00934 module_1_1<T,Tstate> *newpp = NULL; 00935 if (pp) 00936 newpp = (module_1_1<T,Tstate>*) pp->copy(); 00937 return new resizepp_module(size, mode, newpp, true, dzpad); 00938 } 00939 00940 template <typename T, class Tstate> 00941 std::string resizepp_module<T,Tstate>::describe() { 00942 std::string desc; 00943 desc << "resizepp module " << this->name() << ", resizing with method " 00944 << mode; 00945 if (input_mode == 1 || input_mode == 2) // using ratios 00946 desc << " with height ratio " << hratio << " and width ratio " << wratio; 00947 else 00948 desc << " to " << height << "x" << width; 00949 desc << " while " 00950 << (preserve_ratio ? "" : "not ") << "preserving aspect ratio"; 00951 if (zpad && dzpad) 00952 desc << ", with zpad " << *dzpad; 00953 desc << ", pp: "; 00954 if (pp) 00955 desc << pp->describe(); 00956 else 00957 desc << "none"; 00958 if (scale_hfactor != 1.0 || scale_wfactor != 1.0) 00959 desc << ", scaling input box by " << scale_hfactor << " x " 00960 << scale_wfactor; 00961 return desc; 00962 } 00963 00964 template <typename T, class Tstate> 00965 mstate<Tstate>* resizepp_module<T,Tstate>::last_output() { 00966 return lastout; 00967 } 00968 00969 template <typename T, class Tstate> 00970 void resizepp_module<T,Tstate>::set_output_copy(mstate<Tstate> &out) { 00971 out_copy = &out; 00972 } 00973 00974 template <typename T, class Tstate> 00975 fidxdim resizepp_module<T,Tstate>::bprop_size(const fidxdim &osize) { 00976 msize.clear(); 00977 msize.push_back(osize); 00978 return osize; 00979 } 00980 00981 template <typename T, class Tstate> 00982 mfidxdim resizepp_module<T,Tstate>::bprop_size(mfidxdim &osize) { 00983 msize = osize; 00984 return osize; 00985 } 00986 00987 template <typename T, class Tstate> 00988 mfidxdim resizepp_module<T,Tstate>::get_msize() { 00989 return msize; 00990 } 00991 00992 template <typename T, class Tstate> 00993 uint resizepp_module<T,Tstate>::nlayers() { 00994 return 1; 00995 } 00996 00997 template <typename T, class Tstate> 00998 void resizepp_module<T,Tstate>::copy_outputs(mstate<Tstate> &out) { 00999 // copy output to another copy 01000 if (out_copy) { 01001 out_copy->resize(out); 01002 out_copy->copy(out); 01003 } 01004 } 01005 01007 // fovea_module 01008 01009 template <typename T, class Tstate> 01010 fovea_module<T,Tstate>:: 01011 fovea_module(std::vector<double> &fovea_, midxdim &fovea_scales_size_, 01012 idxdim &size_, bool boxscale_, uint mode_, 01013 module_1_1<T,Tstate> *pp_, bool own_pp_, 01014 idxdim *dzpad_, const char *name_) 01015 : resizepp_module<T,Tstate>(size_, mode_, pp_, own_pp_, dzpad_), 01016 s2m_module<T,Tstate>(fovea_.size(), name_), fovea(fovea_), 01017 fovea_scales_size(fovea_scales_size_), boxscale(boxscale_) { 01018 } 01019 01020 template <typename T, class Tstate> 01021 fovea_module<T,Tstate>:: 01022 fovea_module(std::vector<double> &fovea_, bool boxscale_, uint mode_, 01023 module_1_1<T,Tstate> *pp_, bool own_pp_, idxdim *dzpad_, 01024 const char *name_) 01025 : resizepp_module<T,Tstate>(mode_, pp_, own_pp_, dzpad_), 01026 s2m_module<T,Tstate>(fovea_.size(), name_), fovea(fovea_), 01027 boxscale(boxscale_) { 01028 } 01029 01030 template <typename T, class Tstate> 01031 fovea_module<T,Tstate>::~fovea_module() {} 01032 01033 template <typename T, class Tstate> 01034 void fovea_module<T,Tstate>::fprop(Tstate &in, Tstate &out) { 01035 eblerror("not implemented"); 01036 } 01037 01038 template <typename T, class Tstate> 01039 void fovea_module<T,Tstate>::fprop(Tstate &in, midx<T> &out) { 01040 // expecting a 1D midx 01041 if (out.order() != 1) 01042 eblerror("expected a 1-dimensional midx but got order " << out.order()); 01043 if ((uint) out.dim(0) != fovea.size()) 01044 out.resize(fovea.size()); 01045 mstate<Tstate> mout; 01046 this->fprop(in, mout); 01047 // convert mout (which is a vector) into out (midx) 01048 for (uint i = 0; i < mout.size(); ++i) 01049 out.set(mout[i].x, i); 01050 } 01051 01052 template <typename T, class Tstate> 01053 void fovea_module<T,Tstate>::fprop(Tstate &in, mstate<Tstate> &out) { 01054 obboxes.clear(); 01055 ibboxes.clear(); 01056 // check that fovea is defined 01057 if (fovea.size() == 0) 01058 eblerror("cannot process a fovea with empty scales"); 01059 // remember target size 01060 idxdim s = this->size; 01061 // resize if necessary and set appropriate number of scales 01062 s2m_module<T,Tstate>::resize_output(in, out); 01063 rect<int> obbox, ibbox; 01064 // fprop all scales 01065 for (uint f = 0; f < fovea.size(); ++f) { 01066 Tstate &o = out[f]; 01067 if (boxscale) // box scaling mode 01068 this->set_scale_factor(fovea[f]); 01069 else { // image scaling mode 01070 idxdim p = s * (1 / (float) fovea[f]); 01071 this->set_dimensions(p.dim(0), p.dim(1)); 01072 EDEBUG("applying fovea factor " << 1/fovea[f] << " to " << in.x); 01073 } 01074 this->set_dimensions(fovea_scales_size[f].dim(0), 01075 fovea_scales_size[f].dim(1)); 01076 resizepp_module<T,Tstate>::fprop(in, o); 01077 if (f == 0) { // remember first original bbox 01078 obbox = this->get_original_bbox(); 01079 ibbox = this->get_input_bbox(); 01080 } 01081 // remember all boxes in original input 01082 obboxes.push_back(this->get_original_bbox()); 01083 ibboxes.push_back(this->get_input_bbox()); 01084 } 01085 this->original_bboxes[0] = obbox; // use 1st scale as reference 01086 this->input_bbox = ibbox; // use 1st scale as reference 01087 this->lastout = &out; 01088 } 01089 01090 template <typename T, class Tstate> 01091 void fovea_module<T,Tstate>::bprop(Tstate &in, mstate<Tstate> &out) { 01092 } 01093 01094 template <typename T, class Tstate> 01095 void fovea_module<T,Tstate>::bbprop(Tstate &in, mstate<Tstate> &out) { 01096 } 01097 01098 template <typename T, class Tstate> 01099 mfidxdim fovea_module<T,Tstate>::bprop_size(mfidxdim &osize) { 01100 this->msize = osize; 01101 if (osize.size() <= 0) 01102 eblerror("expected at least 1 element but found " << osize.size()); 01103 return osize; 01104 } 01105 01106 template <typename T, class Tstate> 01107 std::string fovea_module<T,Tstate>::describe() { 01108 std::string desc = "fovea "; 01109 desc << resizepp_module<T,Tstate>::describe() 01110 << ", fovea: " << fovea << ", resizing "; 01111 if (boxscale) 01112 desc << "box with fovea factors."; 01113 else 01114 desc << "image with inverse fovea factors."; 01115 return desc; 01116 } 01117 01118 template <typename T, class Tstate> 01119 const std::vector<rect<int> >& fovea_module<T,Tstate>::get_original_bboxes() { 01120 return obboxes; 01121 } 01122 01123 template <typename T, class Tstate> 01124 const std::vector<rect<int> >& fovea_module<T,Tstate>::get_input_bboxes() { 01125 return ibboxes; 01126 } 01127 01128 template <typename T, class Tstate> 01129 uint fovea_module<T,Tstate>::nlayers() { 01130 return fovea.size(); 01131 } 01132 01134 // mschan_module 01135 01136 template <typename T, class Tstate> 01137 mschan_module<T,Tstate>::mschan_module(uint nstates, const char *name) 01138 : s2m_module<T,Tstate>(nstates, name) { 01139 } 01140 01141 template <typename T, class Tstate> 01142 mschan_module<T,Tstate>::~mschan_module() { 01143 } 01144 01145 template <typename T, class Tstate> 01146 void mschan_module<T,Tstate>::fprop(Tstate &in, mstate<Tstate> &out) { 01147 uint nchans = in.x.dim(0) / this->nstates(); 01148 idxdim d = in.x.get_idxdim(); 01149 d.setdim(0, nchans); 01150 // resize out if necessary 01151 s2m_module<T,Tstate>::resize_output(in, out, &d); 01152 // copy each channel into its state 01153 // TODO: handle multiple channels per state, using fovea size 01154 for (uint f = 0; f < in.x.dim(0); ++f) { 01155 Tstate &o = out[f]; 01156 idx<T> inx = in.x.narrow(0, nchans, f * nchans); 01157 idx_copy(inx, o.x); 01158 } 01159 } 01160 01161 template <typename T, class Tstate> 01162 void mschan_module<T,Tstate>::bprop(Tstate &in, mstate<Tstate> &out) { 01163 } 01164 01165 template <typename T, class Tstate> 01166 void mschan_module<T,Tstate>::bbprop(Tstate &in, mstate<Tstate> &out) { 01167 } 01168 01170 // resize_module 01171 01172 template <typename T, class Tstate> 01173 resize_module<T,Tstate>:: 01174 resize_module(double hratio_, double wratio_, uint mode_, 01175 uint hzpad_, uint wzpad_, bool pratio) 01176 : module_1_1<T,Tstate>("resize"), 01177 tmp3(1,1,1), mode(mode_), input_mode(0), inrect(0, 0, 0, 0), 01178 inrect_set(false), 01179 outrect_set(false), hzpad(hzpad_), wzpad(wzpad_), zpad(NULL), 01180 hjitter(0), wjitter(0), sjitter(1.0), preserve_ratio(pratio), 01181 hratio(hratio_), wratio(wratio_) { 01182 set_zpads(hzpad_, wzpad_); 01183 if (preserve_ratio) 01184 input_mode = 2; 01185 else 01186 input_mode = 1; 01187 } 01188 01189 template <typename T, class Tstate> 01190 resize_module<T,Tstate>:: 01191 resize_module(intg height_, intg width_, uint mode_, uint hzpad_, uint wzpad_, 01192 bool pratio) 01193 : module_1_1<T,Tstate>("resize"), 01194 tmp3(1,1,1), mode(mode_), input_mode(0), inrect(0, 0, 0, 0), 01195 inrect_set(false), 01196 outrect_set(false), hzpad(hzpad_), wzpad(wzpad_), zpad(NULL), 01197 hjitter(0), wjitter(0), sjitter(1.0), preserve_ratio(pratio) { 01198 set_dimensions(height_, width_); 01199 set_zpads(hzpad_, wzpad_); 01200 } 01201 01202 template <typename T, class Tstate> 01203 resize_module<T,Tstate>:: 01204 resize_module(uint mode_, uint hzpad_, uint wzpad_, bool pratio) 01205 : tmp3(1,1,1), mode(mode_), input_mode(0), inrect(0, 0, 0, 0), 01206 inrect_set(false), outrect_set(false), hzpad(hzpad_), wzpad(wzpad_), 01207 zpad(NULL), hjitter(0), wjitter(0), sjitter(1.0), preserve_ratio(pratio) { 01208 set_zpads(hzpad_, wzpad_); 01209 } 01210 01211 template <typename T, class Tstate> 01212 resize_module<T,Tstate>::~resize_module() { 01213 if (zpad) 01214 delete zpad; 01215 } 01216 01217 template <typename T, class Tstate> 01218 void resize_module<T,Tstate>::set_dimensions(intg height_, intg width_) { 01219 height = height_ - hzpad * 2; 01220 width = width_ - wzpad * 2; 01221 } 01222 01223 template <typename T, class Tstate> 01224 void resize_module<T,Tstate>::set_zpads(intg hpad, intg wpad) { 01225 // reset height/width without current zpad 01226 height += hzpad * 2; 01227 width += wzpad * 2; 01228 // update zpads and height/width 01229 hzpad = hpad; 01230 wzpad = wpad; 01231 height -= hzpad * 2; 01232 width -= wzpad * 2; 01233 // update zpad module 01234 if (zpad) { 01235 delete zpad; 01236 zpad = NULL; 01237 } 01238 if (hzpad > 0 || wzpad > 0) 01239 zpad = new zpad_module<T,Tstate>(hzpad, wzpad); 01240 } 01241 01242 template <typename T, class Tstate> 01243 void resize_module<T,Tstate>::set_jitter(int h, int w, float s, float r) { 01244 hjitter = h; 01245 wjitter = w; 01246 sjitter = s; 01247 rjitter = r; 01248 } 01249 01250 template <typename T, class Tstate> 01251 void resize_module<T,Tstate>::set_input_region(const rect<int> &inr) { 01252 inrect = inr; 01253 inrect_set = true; 01254 } 01255 01256 template <typename T, class Tstate> 01257 void resize_module<T,Tstate>::set_output_region(const rect<int> &outr) { 01258 outrect = outr; 01259 outrect.height -= hzpad * 2; 01260 outrect.width -= wzpad * 2; 01261 outrect_set = true; 01262 } 01263 01264 template <typename T, class Tstate> 01265 void resize_module<T,Tstate>::fprop(Tstate &in, Tstate &out) { 01266 // TODO: TMP FIX 01267 float th = (in.x.dim(1) - 6) / 3 + 4; 01268 float tw = (in.x.dim(2) - 6) / 3 + 4; 01269 // float th = (in.x.dim(1) - 6) / 2 + 4; 01270 // float tw = (in.x.dim(2) - 6) / 2 + 4; 01271 hratio = th / (float) in.x.dim(1); 01272 wratio = tw / (float) in.x.dim(2); 01273 01274 // set input region to entire image if no input region is given 01275 if (!inrect_set) 01276 inrect = rect<int>(0, 0, in.x.dim(1), in.x.dim(2)); 01277 // apply scale jitter (keeping same center) 01278 if (sjitter != 1.0) 01279 inrect.scale_centered(sjitter, sjitter); 01280 01281 if (!outrect_set) 01282 outrect = rect<int>(0, 0, height, width); 01283 rect<int> outr; 01284 // resize input while preserving aspect ratio 01285 tmp = in.x.shift_dim(0, 2); 01286 idx<T> resized; 01287 switch (mode) { 01288 case MEAN_RESIZE: 01289 resized = image_mean_resize(tmp, outrect.height, 01290 outrect.width, input_mode, &inrect, &outr); 01291 break ; 01292 case GAUSSIAN_RESIZE: 01293 resized = image_gaussian_resize(tmp, outrect.height, 01294 outrect.width, input_mode, &inrect,&outr); 01295 break ; 01296 case BILINEAR_RESIZE: 01297 if (input_mode == 1 || input_mode == 2) { // use ratios 01298 resized = image_resize(tmp, hratio, wratio, input_mode, 01299 &inrect, &outr); 01300 EDEBUG(this->name() << ": resizing with ratios " << hratio 01301 << " and " << wratio); 01302 } 01303 else // use pixels 01304 resized = image_resize(tmp, (double) outrect.height, 01305 (double) outrect.width, input_mode, 01306 &inrect, &outr); 01307 break ; 01308 default: 01309 eblerror("unknown resizing mode"); 01310 } 01311 resized = resized.shift_dim(2, 0); 01312 // resize out to target dimensions if necessary 01313 if (out.x.dim(0) != in.x.dim(0) || out.x.dim(1) != resized.dim(1) 01314 || out.x.dim(2) != resized.dim(2)) 01315 out.resize(in.x.dim(0), resized.dim(1), resized.dim(2)); 01316 idx_clear(out.x); 01317 resized = resized.shift_dim(0, 2); 01318 // apply rotation (around center of roi) 01319 if (rjitter != 0.0) { 01320 idx<T> r2 = idx_copy(resized); // make a contiguous copy 01321 resized = image_rotate(r2, rjitter, (int) outr.hcenter(), 01322 (int) outr.wcenter()); 01323 } 01324 // apply spatial jitter 01325 outr.h0 += hjitter; 01326 outr.w0 += wjitter; 01327 // copy out region to output 01328 original_bbox = outr; 01329 tmp2 = image_region_to_rect(resized, outr, out.x.dim(1), 01330 out.x.dim(2), &original_bbox); 01331 tmp2 = tmp2.shift_dim(2, 0); 01332 //idx_copy(tmp2, tmp); 01333 if (!zpad) 01334 idx_copy(tmp2, out.x); 01335 else { // zero padding 01336 original_bbox.shift(hzpad, wzpad); 01337 tmp3.resize(tmp2.get_idxdim()); 01338 idx_copy(tmp2, tmp3.x); 01339 zpad->fprop(tmp3, out); 01340 } 01341 } 01342 01343 template <typename T, class Tstate> 01344 void resize_module<T,Tstate>::bprop(Tstate &in, Tstate &out) { 01345 // do nothing 01346 // unlikely to be needed by anyone 01347 } 01348 01349 template <typename T, class Tstate> 01350 void resize_module<T,Tstate>::bbprop(Tstate &in, Tstate &out) { 01351 // do nothing 01352 // unlikely to be needed by anyone 01353 } 01354 01355 template <typename T, class Tstate> 01356 rect<int> resize_module<T,Tstate>::get_original_bbox() { 01357 return original_bbox; 01358 } 01359 01360 template <typename T, class Tstate> 01361 resize_module<T,Tstate>* resize_module<T,Tstate>::copy() { 01362 return new resize_module(*this); 01363 } 01364 01365 template <typename T, class Tstate> 01366 std::string resize_module<T,Tstate>::describe() { 01367 std::string desc; 01368 desc << "resize module " << this->name() << ", resizing with method " 01369 << mode; 01370 if (input_mode == 1 || input_mode == 2) // using ratios 01371 desc << " with height ratio " << hratio << " and width ratio " << wratio; 01372 else 01373 desc << " to " << height << "x" << width; 01374 desc << " while " 01375 << (preserve_ratio ? "" : "not ") << "preserving aspect ratio"; 01376 return desc; 01377 } 01378 01380 // laplacian_pyramid_module 01381 01382 template <typename T, class Tstate> 01383 laplacian_pyramid_module<T,Tstate>:: 01384 laplacian_pyramid_module(uint nscales_, midxdim &sizes_, uint mode_, 01385 module_1_1<T,Tstate> *pp_, bool own_pp_, 01386 idxdim *dzpad_, bool gnorm_, bool lnorm_, 01387 bool lnorm2_, bool cnorm_, bool cacross, bool keep, 01388 const char *name_) 01389 : resizepp_module<T,Tstate>(sizes_[0], mode_, pp_, own_pp_, dzpad_), 01390 s2m_module<T,Tstate>(nscales_, name_), nscales(nscales_), 01391 sizes(sizes_), global_norm(gnorm_), 01392 local_norm(lnorm_), local_norm2(lnorm2_), color_lnorm(cnorm_), 01393 cnorm_across(cacross), keep_aspect_ratio(keep), 01394 tmp(1), outs(nscales) { 01395 kerdims.push_back(idxdim(5, 5)); 01396 init(); 01397 } 01398 01399 template <typename T, class Tstate> 01400 laplacian_pyramid_module<T,Tstate>:: 01401 laplacian_pyramid_module(uint nscales_, idxdim &kerd, midxdim &sizes_, 01402 uint mode_, module_1_1<T,Tstate> *pp_, bool own_pp_, 01403 idxdim *dzpad_, bool gnorm_, bool lnorm_, 01404 bool lnorm2_, bool cnorm_, bool cacross, bool keep, 01405 const char *name_) 01406 : resizepp_module<T,Tstate>(sizes_[0], mode_, pp_, own_pp_, dzpad_), 01407 s2m_module<T,Tstate>(nscales_, name_), nscales(nscales_), 01408 sizes(sizes_), global_norm(gnorm_), 01409 local_norm(lnorm_), local_norm2(lnorm2_), color_lnorm(cnorm_), 01410 cnorm_across(cacross), keep_aspect_ratio(keep), tmp(1), 01411 outs(nscales) { 01412 kerdims.push_back(kerd); 01413 init(); 01414 } 01415 01416 template <typename T, class Tstate> 01417 laplacian_pyramid_module<T,Tstate>:: 01418 laplacian_pyramid_module(uint nscales_, midxdim &kerdims_, 01419 midxdim &sizes_, 01420 uint mode_, module_1_1<T,Tstate> *pp_, bool own_pp_, 01421 idxdim *dzpad_, bool gnorm_, bool lnorm_, 01422 bool lnorm2_, bool cnorm_, bool cacross, bool keep, 01423 const char *name_) 01424 : resizepp_module<T,Tstate>(sizes_[0], mode_, pp_, own_pp_, dzpad_), 01425 s2m_module<T,Tstate>(nscales_, name_), nscales(nscales_), sizes(sizes_), 01426 kerdims(kerdims_), global_norm(gnorm_), 01427 local_norm(lnorm_), local_norm2(lnorm2_), color_lnorm(cnorm_), 01428 cnorm_across(cacross), keep_aspect_ratio(keep), tmp(1), 01429 outs(nscales) { 01430 if (kerdims.size() < nscales) 01431 eblerror("expected at least same number of scales and filter dims but " 01432 << "got " << nscales << " and " << kerdims); 01433 init(); 01434 } 01435 01436 template <typename T, class Tstate> 01437 laplacian_pyramid_module<T,Tstate>:: 01438 laplacian_pyramid_module(uint nscales_, uint mode_, module_1_1<T,Tstate> *pp_, 01439 bool own_pp_, idxdim *dzpad_, bool gnorm_, 01440 bool lnorm_, bool lnorm2_, bool cnorm_, bool cacross, 01441 bool keep, const char *name_) 01442 : resizepp_module<T,Tstate>(mode_, pp_, own_pp_, dzpad_), 01443 s2m_module<T,Tstate>(nscales, name_), 01444 nscales(nscales_), global_norm(gnorm_), 01445 local_norm(lnorm_), local_norm2(lnorm2_), color_lnorm(cnorm_), 01446 cnorm_across(cacross), keep_aspect_ratio(keep), tmp(1), 01447 outs(nscales) { 01448 kerdims.push_back(idxdim(5, 5)); 01449 init(); 01450 } 01451 01452 template <typename T, class Tstate> 01453 laplacian_pyramid_module<T,Tstate>::~laplacian_pyramid_module() { 01454 for (uint i = 0; i < norms.size(); ++i) 01455 delete norms[i]; 01456 for (uint i = 0; i < cnorms.size(); ++i) 01457 delete cnorms[i]; 01458 for (uint i = 0; i < pads.size(); ++i) 01459 delete pads[i]; 01460 } 01461 01462 template <typename T, class Tstate> 01463 void laplacian_pyramid_module<T,Tstate>::init() { 01464 burt_filtering_only = false; 01465 use_pad = true; 01466 mirror = true; 01467 scalings.push_back(2.0); // default scaling 01468 // limit sizes to order 2 01469 for (uint i = 0; i < sizes.size(); ++i) { 01470 while (sizes[i].order() > 2) 01471 sizes[i].remove_dim(sizes[i].order() - 1); 01472 } 01473 this->size = sizes[0]; 01474 // replicate filter sizes if just 1 01475 if (kerdims.size() == 1 && nscales > 1) 01476 for (uint i = 1; i < nscales; ++i) kerdims.push_back(kerdims[0]); 01477 // remove extra dimensions 01478 kerdims.erase(kerdims.begin() + nscales, kerdims.end()); 01479 // 5x5 gaussian kernel for subsampling 01480 burt = create_burt_adelson_kernel<T>(); 01481 // loop on all scale kernel dimensions 01482 bool threshold = false; 01483 for (uint i = 0; i < kerdims.size(); ++i) { 01484 idxdim d = kerdims[i]; 01485 // high frequency kernels 01486 filters.push_back(create_gaussian_kernel<T>(d)); 01487 // normalizations 01488 if (local_norm || local_norm2) { 01489 // grayscale normalizations 01490 layers<T,Tstate> *norm = new layers<T,Tstate>(); 01491 norms.push_back(norm); 01492 if (local_norm) 01493 norm->add_module(new divisive_norm_module<T,Tstate> 01494 (d, 1, mirror, threshold, NULL, "", true)); 01495 if (local_norm2) 01496 norm->add_module(new contrast_norm_module<T,Tstate> 01497 (d, 1, mirror, threshold, true, NULL, "", true)); 01498 // color normalizations 01499 if (color_lnorm) { 01500 layers<T,Tstate> *cnorm = new layers<T,Tstate>(); 01501 cnorms.push_back(cnorm); 01502 if (local_norm) 01503 cnorm->add_module(new contrast_norm_module<T,Tstate> 01504 (d, 2, mirror, threshold, true, NULL, "", 01505 cnorm_across)); 01506 if (local_norm2) 01507 cnorm->add_module(new contrast_norm_module<T,Tstate> 01508 (d, 2, mirror, threshold, true, NULL, "", 01509 cnorm_across)); 01510 } 01511 } 01512 // padding 01513 if (mirror) // switch between zero and mirror padding 01514 pads.push_back(new mirrorpad_module<T,Tstate> 01515 ((d.dim(0) - 1)/2, (d.dim(1) - 1)/2)); 01516 else 01517 pads.push_back(new zpad_module<T,Tstate>(d)); 01518 } 01519 // resize boxes 01520 original_bboxes.resize(1); 01521 input_bboxes.resize(nlayers()); 01522 } 01523 01524 template <typename T, class Tstate> 01525 void laplacian_pyramid_module<T,Tstate>::fprop(Tstate &in, Tstate &out) { 01526 eblerror("not implemented"); 01527 } 01528 01529 template <typename T, class Tstate> 01530 void laplacian_pyramid_module<T,Tstate>:: 01531 fprop(Tstate &in, mstate<Tstate> &out) { 01532 fprop(in, outs); 01533 out.clear(); 01534 for (uint i = 0; i < outs.dim(0); ++i) 01535 out.push_back(new Tstate(outs.get(i))); 01536 // adding zero borders if defined 01537 if (zpad) { 01538 zpad->fprop(out, zpad_out); 01539 out.clear(); 01540 out.push_back_new(zpad_out); 01541 } 01542 this->copy_outputs(out); 01543 } 01544 01545 template <typename T, class Tstate> 01546 void laplacian_pyramid_module<T,Tstate>:: 01547 bprop(Tstate &in, mstate<Tstate> &out) { 01548 } 01549 01550 template <typename T, class Tstate> 01551 void laplacian_pyramid_module<T,Tstate>:: 01552 bbprop(Tstate &in, mstate<Tstate> &out) { 01553 } 01554 01555 template <typename T, class Tstate> 01556 mfidxdim laplacian_pyramid_module<T,Tstate>::fprop_size(mfidxdim &isize) { 01557 EDEBUG(this->name() << ": " << isize << " f-> ..."); 01558 mfidxdim osize; 01559 fidxdim i0 = isize[0]; 01560 for (uint i = 0; i < outs.dim(0); ++i) 01561 osize.push_back_new(i0); 01562 this->ninputs = isize.size(); 01563 this->noutputs = osize.size(); 01564 if (zpad) osize = zpad->fprop_size(osize); 01565 EDEBUG(this->name() << ": " << isize << " f-> " << osize); 01566 return osize; 01567 } 01568 01569 template <typename T, class Tstate> 01570 mfidxdim laplacian_pyramid_module<T,Tstate>::bprop_size(mfidxdim &osize_) { 01571 mfidxdim osize = osize_; 01572 this->msize = osize; 01573 if (zpad) osize = zpad->bprop_size(osize); 01574 // return resizepp_module<T,Tstate>::bprop_size(osize); 01575 mfidxdim isize; 01576 if (osize.size() != input_bboxes.size()) 01577 eblerror("expected same size in osize (" << osize 01578 << ") and input_bboxes (" << input_bboxes << ")"); 01579 // scale output given the in/out original ratios 01580 // uint fact = 1; 01581 for (uint i = 0; i < osize.size(); ++i) { 01582 if (osize.exists(i)) { 01583 fidxdim d(1, original_bboxes[0].height / (float) input_bboxes[i].height, 01584 original_bboxes[0].width / (float) input_bboxes[i].width); 01585 // fidxdim d(1, fact, fact); 01586 fidxdim o = osize[i] * d; 01587 isize.push_back_new(o); 01588 } 01589 else isize.push_back_empty(); 01590 //fact *= 2; 01591 } 01592 EDEBUG(this->name() << ": " << osize << " b-> " << isize); 01593 return isize; 01594 } 01595 01596 template <typename T, class Tstate> 01597 std::string laplacian_pyramid_module<T,Tstate>::describe() { 01598 std::string desc = "Laplacian pyramid "; 01599 desc << resizepp_module<T,Tstate>::describe() 01600 << ", with " << nscales << " scales and " << (int) scalings.size() 01601 << " pyramids"; 01602 return desc; 01603 } 01604 01605 template <typename T, class Tstate> 01606 uint laplacian_pyramid_module<T,Tstate>::nlayers() { 01607 return nscales * scalings.size(); 01608 } 01609 01610 template <typename T, class Tstate> 01611 void laplacian_pyramid_module<T,Tstate>::set_scalings(vector<float> &s) { 01612 scalings = s; 01613 input_bboxes.resize(nlayers()); 01614 } 01615 01616 // protected methods ///////////////////////////////////////////////////////// 01617 01618 template <typename T, class Tstate> 01619 void laplacian_pyramid_module<T,Tstate>::fprop(Tstate &in, midx<T> &out) { 01620 // expecting a 1D midx 01621 if (out.order() != 1) 01622 eblerror("expected a 1-dimensional midx but got order " << out.order()); 01623 if (out.dim(0) != nlayers()) out.resize(nlayers()); 01624 if (tmp.dim(0) != nlayers()) tmp.resize(nlayers()); 01625 // only accept 2D images or 3D with channel dim to 0. 01626 if ((in.x.order() != 2) && (in.x.order() != 3)) 01627 eblerror("unexpected image format" << in); 01628 // allocate temporary buffers if not allocated yet 01629 if (blurred.order() != in.x.order()) { 01630 idxdim d(in.x); 01631 d.setdims(1); 01632 blurred = idx<T>(d); 01633 blurred_high = idx<T>(d); 01634 padded = Tstate(d); 01635 outpp = Tstate(d); 01636 } 01637 01638 // loop on pyramids 01639 for (uint pyr = 0; pyr < scalings.size(); ++pyr) { 01640 LOCAL_TIMING_START(); // profiling 01641 // reset scale counter 01642 iscale = 0; 01643 // compute input/output regions 01644 //out.clear(); 01645 rect<int> inr = this->compute_regions(in); 01646 rect<int> rr = inr; 01647 rect<int> outr; //, &oregion = this->original_bbox; 01648 // remember image region of the first scale 01649 if (pyr == 0) original_bboxes[0] = inr; 01650 // start from twice the target size 01651 idxdim tgt = this->size * (scalings[pyr] / 2.0); 01652 idxdim tgt0 = tgt * 2; 01653 idxdim ind(inr.height, inr.width); 01654 // shift dim 0 to 2 and copy to have continuous data 01655 idx<T> im = in.x.shift_dim(0, 2); 01656 // if input is big enough start with tgt0 01657 if (false) {//ind >= tgt0) { // input region is big enough to start at tgt0 01658 // resize bilinearly to tgt0 01659 resize(im, tgt0, inr, outr); 01660 // blur im into blurred 01661 blur(burt, im, blurred, inr); 01662 // subsample 01663 subsample(blurred, im, inr); 01664 } else { // start from tgt directly (even if upsampling) 01665 resize(im, tgt, inr, outr); 01666 // no blur or subsampling since we are already at tgt 01667 } 01668 01669 // // TMP 01670 // zpad_module<T,Tstate> zp(20, 20, 20, 20); 01671 // idxdim di = im.get_idxdim(), dp(di.dim(0), di.dim(1) + 40, di.dim(2) + 40); 01672 // Tstate zpi(di), zpo(dp); 01673 // zpi.x = im; 01674 // zp.fprop(zpi, zpo); 01675 // im = zpo.x; 01676 // // inr = this->compute_regions(zpo); 01677 // // rr = inr; 01678 01679 LOCAL_TIMING_REPORT("laplacian_pyramid, initial resizing"); 01680 // loop and produce each scale 01681 for (uint scale = 0; scale < nscales; ++scale, ++iscale) { 01682 // remember input size 01683 input_bboxes[scale + nscales * pyr] = inr; 01684 // blur im into blurred 01685 LOCAL_TIMING2_START(); 01686 blur(burt, im, blurred, inr); 01687 // use burt filtered to obtain high frequencies 01688 if (burt_filtering_only) 01689 blurred_high = blurred; 01690 else // or use another filter (more computation) 01691 blur(filters[iscale], im, blurred_high, inr); 01692 LOCAL_TIMING2_REPORT("blurring"); 01693 // compute high passed target 01694 idx<T> high = highpass(im, blurred_high, tgt, inr, scale == 0); 01695 LOCAL_TIMING2_REPORT("highpass"); 01696 // assign high pass to output 01697 out.set(high, scale + nscales * pyr); 01698 // subsample only if more scales to come 01699 if (scale + 1 != nscales) 01700 subsample(blurred, im, inr); 01701 LOCAL_TIMING2_REPORT("subsampling"); 01702 // subsample target dimensions 01703 if (scale + 1 < sizes.size()) // set target manually 01704 tgt = sizes[scale + 1]; 01705 else // simply reduce target by at each new scale 01706 tgt = tgt * .5; 01707 LOCAL_TIMING_REPORT("laplacian_pyramid, scale " << scale); 01708 } 01709 this->remember_regions(this->height, this->width, rr); 01710 } 01711 EDEBUG("laplacian outputs: " << out.str()); 01712 if (!this->silent) cout << "laplacian outputs: " << out.str() << endl; 01713 TIMING2(this->name()); 01714 } 01715 01716 template <typename T, class Tstate> 01717 void laplacian_pyramid_module<T,Tstate>:: 01718 resize(idx<T> &im, idxdim &tgt, rect<int> &inr, rect<int> &outr) { 01719 // resize bilinearly to tgt 01720 im = image_resize(im, (double) tgt.dim(0), (double) tgt.dim(1), 01721 keep_aspect_ratio ? 0 : 1, &inr, &outr); 01722 inr = outr; 01723 im = im.shift_dim(2, 0); 01724 // call preprocessing 01725 idx<T> inn = im; 01726 if (this->pp) { // no preprocessing if NULL module 01727 // disable the channel pp's global normalization (we'll do our own) 01728 // ((channels_module<T,Tstate>*)this->pp)->globnorm = false; 01729 inpp.x = im; 01730 this->pp->fprop(inpp, outpp); 01731 im = outpp.x; 01732 } 01733 // apply rotation (around center of roi) 01734 if (this->rjitter != 0.0) { 01735 im = im.shift_dim(0, 2); 01736 idx<T> r2 = idx_copy(im); // make a contiguous copy 01737 im = image_rotate(r2, this->rjitter, (int) inr.hcenter(), 01738 (int) inr.wcenter()); 01739 im = im.shift_dim(2, 0); 01740 inr.rotate(this->rjitter); 01741 } 01742 // make a continuous version of im 01743 idx<T> cont(im.get_idxdim()); 01744 idx_copy(im, cont); 01745 im = cont; 01746 } 01747 01748 template <typename T, class Tstate> 01749 void laplacian_pyramid_module<T,Tstate>:: 01750 blur(idx<T> &filter, idx<T> &in, idx<T> &out, rect<int> &roi) { 01751 if (in.get_idxdim() != out.get_idxdim()) 01752 out.resize(in.get_idxdim()); 01753 idx<T> i = in, o = out; 01754 Tstate ts; 01755 ts.x = i; 01756 // normalize gobally 01757 normalize_intensity_globally(in); 01758 // pad input for convolution 01759 if (use_pad) { 01760 pads[iscale]->set_kernel(filter.get_idxdim()); 01761 pads[iscale]->fprop(ts, padded); 01762 i = padded.x; 01763 } else { // narrow out to receive non-padded filtering 01764 o = o.narrow(1, o.dim(1) - filter.dim(0) + 1, (filter.dim(0) - 1) / 2); 01765 o = o.narrow(2, o.dim(2) - filter.dim(1) + 1, (filter.dim(1) - 1) / 2); 01766 } 01767 // convolve with filter for high frequency 01768 if (in.order() == 3) { // loop over each channel 01769 idx_bloop2(tin, i, T, tout, o, T) { 01770 idx<T> uin = tin.unfold(0, filter.dim(0), 1); 01771 uin = uin.unfold(1, filter.dim(1), 1); 01772 idx_m4dotm2(uin, filter, tout); 01773 } 01774 } else { 01775 idx<T> uin = i.unfold(0, filter.dim(0), 1); 01776 uin = uin.unfold(1, filter.dim(1), 1); 01777 idx_m4dotm2(i, filter, o); // just one channel 01778 } 01779 // reduce ROI if not padded to account for filter borders 01780 if (!use_pad) { 01781 roi.h0 += (filter.dim(0) - 1) / 2; 01782 roi.w0 += (filter.dim(1) - 1) / 2; 01783 roi.height -= filter.dim(0) - 1; 01784 roi.width -= filter.dim(1) - 1; 01785 } 01786 } 01787 01788 template <typename T, class Tstate> 01789 idx<T> laplacian_pyramid_module<T,Tstate>:: 01790 highpass(idx<T> &in, idx<T> &blurred, idxdim &tgt, rect<int> &inr, 01791 bool first) { 01792 // TODO: this assumes brightness in channel 1 and color in remaining 01793 idx<T> dtgt(in.dim(0), tgt.dim(0), tgt.dim(1)); 01794 idx<T> high(in.get_idxdim()); 01795 idx<T> tmpin = in.narrow(0, 1, 0); 01796 idx<T> tmpbl = blurred.narrow(0, 1, 0); 01797 idx<T> tmphi = high.narrow(0, 1, 0); 01798 Tstate ti, to; 01799 01800 // grayscale /////////////////////////////////////////////////////////////// 01801 // remove low-frequencies and normalize locally (1st layer only) 01802 if (norms.size() > 0) { 01803 // resize temporary buffer if necessary 01804 if (tmpin.get_idxdim() != high0.get_idxdim()) { 01805 if (tmpin.order() != high0.order()) high0 = idx<T>(tmpin.get_idxdim()); 01806 else high0.resize(tmpin.get_idxdim()); 01807 } 01808 // just subtract blurred version in the 1st layer 01809 idx_sub(tmpin, tmpbl, high0); 01810 // apply grayscale normalizations 01811 ti.x = high0; 01812 to.x = tmphi; 01813 norms[iscale]->fprop(ti, to); 01814 } else // just subtract blurred version in the 1st layer 01815 idx_sub(tmpin, tmpbl, tmphi); 01816 01817 // color /////////////////////////////////////////////////////////////////// 01818 if (in.dim(0) > 1) { 01819 tmpin = in.narrow(0, in.dim(0) - 1, 1); 01820 tmphi = high.narrow(0, high.dim(0) - 1, 1); 01821 if (cnorms.size() > 0) { // color local normalization 01822 ti.x = tmpin; 01823 to.x = tmphi; 01824 cnorms[iscale]->fprop(ti, to); 01825 } else // simply copy remaining layers (color) 01826 idx_copy(tmpin, tmphi); 01827 } 01828 // normalize globally 01829 if (global_norm) normalize_globally(high);//, inr); 01830 rect<int> outr; 01831 // paste at center 01832 return image_region_to_rect(high, inr, tgt.dim(0), tgt.dim(1), &outr, 1, 2); 01833 } 01834 01835 template <typename T, class Tstate> 01836 void laplacian_pyramid_module<T,Tstate>::subsample(idx<T> &in, idx<T> &out, 01837 rect<int> &inr) { 01838 idxdim dsub(in); 01839 dsub.setdim(1, dsub.dim(1) / 2); 01840 dsub.setdim(2, dsub.dim(2) / 2); 01841 out.resize(dsub); 01842 idxlooper<T> tmpin(in, 2); 01843 idx_eloop1(sub, out, T) { 01844 idxlooper<T> tmpi(tmpin, 1); 01845 idx_eloop1(su, sub, T) { 01846 idx_copy(tmpi, su); 01847 tmpi.next(); 01848 tmpi.next(); 01849 } 01850 tmpin.next(); 01851 tmpin.next(); 01852 } 01853 // rescale input box 01854 inr = inr / 2; 01855 } 01856 01857 template <typename T, class Tstate> 01858 void laplacian_pyramid_module<T,Tstate>:: 01859 normalize_globally(idx<T> &in) { //, rect<int> &roi) { 01860 normalize_intensity_globally(in); //, roi); 01861 normalize_color_globally(in); //, roi); 01862 } 01863 01864 template <typename T, class Tstate> 01865 void laplacian_pyramid_module<T,Tstate>:: 01866 normalize_intensity_globally(idx<T> &in) {//, rect<int> &roi) { 01867 idx<T> intensity = in.narrow(0, 1, 0); 01868 normalize_globally2(intensity); //, roi); 01869 } 01870 01871 template <typename T, class Tstate> 01872 void laplacian_pyramid_module<T,Tstate>:: 01873 normalize_color_globally(idx<T> &in) {//, rect<int> &roi) { 01874 idx<T> color = in.narrow(0, 2, 1); 01875 normalize_globally2(color); //, roi); 01876 } 01877 01878 template <typename T, class Tstate> 01879 void laplacian_pyramid_module<T,Tstate>:: 01880 normalize_globally2(idx<T> &in) { //, rect<int> &roi) { 01881 // if (!global_norm) return ; 01882 image_global_normalization(in); 01883 01884 // idx<T> r = in.narrow(1, roi.height, roi.h0); 01885 // r = r.narrow(2, roi.width, roi.w0); 01886 // idx<T> r2(r.get_idxdim()); 01887 // // get mean of roi 01888 // T mean = idx_mean(r); 01889 // idx_addc(r, (T)-mean, r2); // remove mean 01890 // // get std deviation of roi 01891 // #ifdef __WINDOWS__ 01892 // T coeff = (T) sqrt((double) (idx_sumsqr(r2) / r2.nelements())); 01893 // #else 01894 // T coeff = (T) sqrt((T) (idx_sumsqr(r2) / r2.nelements())); 01895 // #endif 01896 // // apply to entire image 01897 // idx_addc(in, (T)-mean, in); // remove mean 01898 // if (coeff != 0) idx_dotc(in, 1 / coeff, in); 01899 } 01900 01902 01903 template <typename T, class Tstate> 01904 jitter_module<T,Tstate>::jitter_module(const char *name_) 01905 : module_1_1<T,Tstate>(name_), zp(NULL) { 01906 // no deformation defaults 01907 th0 = 0; th1 = 0; tw0 = 0; tw1 = 0; // translation ranges 01908 deg0 = 0; deg1 = 0; // rotation range 01909 sh0 = 1; sh1 = 1; sw0 = 1; sw1 = 1; // scaling range 01910 shh0 = 0; shh1 = 0; shw0 = 0; shw1 = 0; // shear range 01911 elsz0 = 0; elsz1 = 0; elcoeff0 = 0; elcoeff1 = 0; // elastic ranges 01912 } 01913 01914 template <typename T, class Tstate> 01915 jitter_module<T,Tstate>::~jitter_module() { 01916 if (zp) delete zp; 01917 } 01918 01919 template <typename T, class Tstate> 01920 void jitter_module<T,Tstate>::set_translations(vector<int> &v) { 01921 if (v.size() != 4) eblerror("expected 4 elements in " << v); 01922 th0 = v[0]; th1 = v[1]; tw0 = v[2]; tw1 = v[3]; 01923 } 01924 01925 template <typename T, class Tstate> 01926 void jitter_module<T,Tstate>::set_rotations(vector<float> &v) { 01927 if (v.size() != 2) eblerror("expected 2 elements in " << v); 01928 deg0 = v[0]; deg1 = v[1]; 01929 } 01930 01931 template <typename T, class Tstate> 01932 void jitter_module<T,Tstate>::set_scalings(vector<float> &v) { 01933 if (v.size() != 4) eblerror("expected 4 elements in " << v); 01934 sh0 = v[0]; sh1 = v[1]; sw0 = v[2]; sw1 = v[3]; 01935 } 01936 01937 template <typename T, class Tstate> 01938 void jitter_module<T,Tstate>::set_shears(vector<float> &v) { 01939 if (v.size() != 4) eblerror("expected 4 elements in " << v); 01940 shh0 = v[0]; shh1 = v[1]; shw0 = v[2]; shw1 = v[3]; 01941 } 01942 01943 template <typename T, class Tstate> 01944 void jitter_module<T,Tstate>::set_elastics(vector<float> &v) { 01945 if (v.size() != 4) eblerror("expected 4 elements in " << v); 01946 elsz0 = (uint) v[0]; elsz1 = (uint) v[1]; 01947 elcoeff0 = v[2]; elcoeff1 = v[3]; 01948 } 01949 01950 template <typename T, class Tstate> 01951 void jitter_module<T,Tstate>::set_padding(vector<uint> &p) { 01952 if (p.size() != 4) eblerror("expected 4 elements in " << p); 01953 zp = new zpad_module<T,Tstate>(p[0], p[1], p[2], p[3]); 01954 } 01955 01956 template <typename T, class Tstate> 01957 void jitter_module<T,Tstate>::fprop(Tstate &in, Tstate &out) { 01958 Tstate *i = ∈ 01959 if (zp) { 01960 zp->fprop(in, tmp); 01961 i = &tmp; 01962 } 01963 this->resize_output(*i, out); 01964 if (this->ignored(in, out)) return ; 01965 // random deformations 01966 int th = (int) drand(th0, th1), tw = (int) drand(tw0, tw1); // translation 01967 float deg = drand(deg0, deg1); // rotation 01968 float sh = drand(sh0, sh1), sw = drand(sw0, sw1); // scale 01969 float shh = drand(shh0, shh1), shw = drand(shw0, shw1); // shear 01970 uint elsize = (uint) drand(elsz0, elsz1); 01971 float elc = elsize; // elastic 01972 // expect channels to be in dim 2, 01973 // TODO: allow specifying planar or interleaved in image_deformation 01974 // function to avoid this inefficient and dirty code 01975 idx<T> i2 = i->x.shift_dim(0, 2); 01976 idx<T> a(i2.get_idxdim()), b(i2.get_idxdim()); 01977 idx_copy(i2, a); 01978 // deform 01979 image_deformation(a, b, th, tw, sh, sw, deg, shh, shw, elsize, elc); 01980 // TODO: same as above 01981 b = b.shift_dim(2, 0); 01982 idx_copy(b, out.x); 01983 } 01984 01985 template <typename T, class Tstate> 01986 void jitter_module<T,Tstate>::bprop(Tstate &in, Tstate &out) { 01987 } 01988 01989 template <typename T, class Tstate> 01990 void jitter_module<T,Tstate>::bbprop(Tstate &in, Tstate &out){ 01991 } 01992 01993 template <typename T, class Tstate> 01994 std::string jitter_module<T,Tstate>::describe() { 01995 std::string s; 01996 s << "jitter_module " << this->name() 01997 << " with translation height range [" << th0 << " .. " << th1 << "]" 01998 << "and width range [" << tw0 << " .. " << tw1 << "]" 01999 << ", rotation range [" << deg0 << " .. " << deg1 << "]" 02000 << ", scaling height range [" << sh0 << " .. " << sh1 << "]" 02001 << " and width range [" << sw0 << " .. " << sw1 << "]" 02002 << ", shear height range [" << shh0 << " .. " << shh1 << "]" 02003 << " and width range [" << shw0 << " .. " << shw1 << "]" 02004 << ", elastic smoothing size range [" << elsz0 << " .. " << elsz1 << "]" 02005 << " and coeff range [" << elcoeff0 << " .. " << elcoeff1 << "]"; 02006 if (zp) s << ", padding: " << zp->describe(); 02007 return s; 02008 } 02009 02010 template <typename T, class Tstate> 02011 jitter_module<T,Tstate>* jitter_module<T,Tstate>::copy() { 02012 jitter_module<T,Tstate> *l2 = 02013 new jitter_module<T,Tstate>(this->name()); 02014 l2->th0 = th0; l2->th1 = th1; l2->tw0 = tw0; l2->tw1 = tw1; 02015 l2->deg0 = deg0; l2->deg1 = deg1; 02016 l2->sh0 = sh0; l2->sh1 = sh1; l2->sw0 = sw0; l2->sw1 = sw1; 02017 l2->shh0 = shh0; l2->shh1 = shh1; l2->shw0 = shw0; l2->shw1 = shw1; 02018 l2->elsz0 = elsz0; l2->elsz1 = elsz1; 02019 l2->elcoeff0 = elcoeff0; l2->elcoeff1 = elcoeff1; 02020 return l2; 02021 } 02022 02023 } // end namespace ebl