libeblearn
|
00001 /*************************************************************************** 00002 * Copyright (C) 2011 by Yann LeCun, Pierre Sermanet and Soumith Chintala* 00003 * yann@cs.nyu.edu, pierre.sermanet@gmail.com, soumith@gmail.com * 00004 * All rights reserved. 00005 * 00006 * Redistribution and use in source and binary forms, with or without 00007 * modification, are permitted provided that the following conditions are met: 00008 * * Redistributions of source code must retain the above copyright 00009 * notice, this list of conditions and the following disclaimer. 00010 * * Redistributions in binary form must reproduce the above copyright 00011 * notice, this list of conditions and the following disclaimer in the 00012 * documentation and/or other materials provided with the distribution. 00013 * * Redistribution under a license not approved by the Open Source 00014 * Initiative (http://www.opensource.org) must display the 00015 * following acknowledgement in all advertising material: 00016 * This product includes software developed at the Courant 00017 * Institute of Mathematical Sciences (http://cims.nyu.edu). 00018 * * The names of the authors may not be used to endorse or promote products 00019 * derived from this software without specific prior written permission. 00020 * 00021 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED 00022 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 00023 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 00024 * DISCLAIMED. IN NO EVENT SHALL ThE AUTHORS BE LIABLE FOR ANY 00025 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 00026 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 00027 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 00028 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00029 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00030 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00031 ***************************************************************************/ 00032 00033 namespace ebl { 00034 00036 // subsampling_module 00037 00038 template <typename T, class Tstate> 00039 subsampling_module<T,Tstate>:: 00040 subsampling_module(parameter<T,Tstate> *p, uint thick, idxdim &kernel_, 00041 idxdim &stride_, const char *name_, bool crop_, bool pad_) 00042 : module_1_1<T,Tstate>(name_), coeff(p, thick), thickness(thick), 00043 kernel(kernel_), stride(stride_), crop(crop_), pad(pad_) { 00044 // insert thickness dimension 00045 idxdim d = kernel; 00046 d.insert_dim(0, thickness); 00047 // allocate buffer 00048 sub = Tstate(d); 00049 // default coefficients are: 1/(stridei*stridej)^.5 00050 forget_param_linear fgp(1, .5); 00051 forget(fgp); 00052 } 00053 00054 template <typename T, class Tstate> 00055 subsampling_module<T,Tstate>::~subsampling_module() { 00056 } 00057 00058 template <typename T, class Tstate> 00059 void subsampling_module<T,Tstate>::fprop(Tstate &in, Tstate &out) { 00060 this->resize_output(in, out); // resize (iff necessary) 00061 // subsampling ( coeff * average ) 00062 idx_clear(sub.x); 00063 idx<T> inx = in.x, outx = out.x; 00064 bool clear = false; 00065 intg in1 = inx.dim(1) - (inx.dim(1) % stride.dim(0)); 00066 intg in2 = inx.dim(2) - (inx.dim(2) % stride.dim(1)); 00067 // input too small, just clear output 00068 if (in1 == 0 || in2 == 0) { 00069 idx_clear(out.x); 00070 return ; 00071 } 00072 // temporarly crop input if mismatch in size 00073 if (crop && (inx.dim(1) % stride.dim(0)) != 0) { 00074 inx = inx.narrow(1, in1, 0); 00075 outx = outx.narrow(1, (intg) floor((float)(inx.dim(1) / stride.dim(0))), 0); 00076 clear = true; // clear outx for untouched padded area 00077 } 00078 if (crop && (inx.dim(2) % stride.dim(1)) != 0) { 00079 inx = inx.narrow(2, in2, 0); 00080 outx = outx.narrow(2, (intg) floor((float)(inx.dim(2) / stride.dim(1))), 0); 00081 clear = true; // clear outx for untouched padded area 00082 } 00083 if (clear) idx_clear(out.x); 00084 // subsample 00085 { idx_bloop4(lix, inx, T, lsx, sub.x, T, lcx, coeff.x, T, ltx, outx, T) { 00086 idx<T> uuin(lix.unfold(1, stride.dim(1), stride.dim(1))); 00087 uuin = uuin.unfold(0, stride.dim(0), stride.dim(0)); 00088 // loop on each pixel of subs kernel, assuming that idx_add is faster 00089 // than looping on each kernel 00090 idx_eloop1(z1, uuin, T) { 00091 idx_eloop1(z2, z1, T) { 00092 idx_add(z2, lsx); // average 00093 } 00094 } 00095 idx_dotc(lsx, lcx.get(), ltx); // coeff 00096 }} 00097 } 00098 00099 template <typename T, class Tstate> 00100 void subsampling_module<T,Tstate>::bprop(Tstate &in, Tstate &out) { 00101 idx<T> indx = in.dx, outdx = out.dx; 00102 intg in1 = indx.dim(1) - (indx.dim(1) % stride.dim(0)); 00103 intg in2 = indx.dim(2) - (indx.dim(2) % stride.dim(1)); 00104 // input too small, do nothing 00105 if (in1 == 0 || in2 == 0) return ; 00106 // temporarly crop input if mismatch in size 00107 if (crop && (indx.dim(1) % stride.dim(0)) != 0) { 00108 indx = indx.narrow(1, in1, 0); 00109 outdx = outdx.narrow(1, (int) floor((float)(indx.dim(1) / stride.dim(0))), 0); 00110 } 00111 if (crop && (indx.dim(2) % stride.dim(1)) != 0) { 00112 indx = indx.narrow(2, in2, 0); 00113 outdx = outdx.narrow(2, (intg) floor((float)(indx.dim(2) / stride.dim(1))), 0); 00114 } 00115 // update internal coefficient's dx 00116 idx_bloop3(lcdx, coeff.dx, T, ltdx, outdx, T, lsx, sub.x, T) { 00117 idx_dotacc(lsx, ltdx, lcdx); 00118 } 00119 // oversampling and accumulate to input's dx 00120 idx_bloop4(lidx, indx, T, lsdx, sub.dx, T, 00121 lcx, coeff.x, T, ltdx2, outdx, T) { 00122 idx_dotc(ltdx2, lcx.get(), lsdx); 00123 idx_m2oversampleacc(lsdx, stride.dim(0), stride.dim(1), lidx); 00124 } 00125 } 00126 00127 template <typename T, class Tstate> 00128 void subsampling_module<T,Tstate>::bbprop(Tstate &in, Tstate &out) { 00129 idx<T> inddx = in.ddx, outddx = out.ddx; 00130 intg in1 = inddx.dim(1) - (inddx.dim(1) % stride.dim(0)); 00131 intg in2 = inddx.dim(2) - (inddx.dim(2) % stride.dim(1)); 00132 // input too small, do nothing 00133 if (in1 == 0 || in2 == 0) return ; 00134 // temporarly crop input if mismatch in size 00135 if (crop && (inddx.dim(1) % stride.dim(0)) != 0) { 00136 inddx = inddx.narrow(1, in1,0); 00137 outddx = outddx.narrow(1, (intg) floor((float)(inddx.dim(1) / stride.dim(0))), 0); 00138 } 00139 if (crop && (inddx.dim(2) % stride.dim(1)) != 0) { 00140 inddx = inddx.narrow(2, in2,0); 00141 outddx = outddx.narrow(2, (intg) floor((float)(inddx.dim(2) / stride.dim(1))), 0); 00142 } 00143 // update internal coefficient's ddx 00144 idx_bloop3(lcdx, coeff.ddx, T, ltdx, outddx, T, lsx, sub.x, T) { 00145 idx_m2squdotm2acc(lsx, ltdx, lcdx); 00146 } 00147 // oversampling and accumulte to input's ddx 00148 idx_bloop4(lidx, inddx, T, lsdx, sub.ddx, T, 00149 lcx, coeff.x, T, ltdx2, outddx, T) { 00150 T cf = lcx.get(); 00151 idx_dotc(ltdx2, cf * cf, lsdx); 00152 idx_m2oversampleacc(lsdx, stride.dim(0), stride.dim(1), lidx); 00153 } 00154 } 00155 00156 template <typename T, class Tstate> 00157 void subsampling_module<T,Tstate>::forget(forget_param_linear &fp) { 00158 double c = fp.value / pow(stride.dim(0) * stride.dim(1), fp.exponent); 00159 idx_fill(coeff.x, (T) c); 00160 } 00161 00162 template <typename T, class Tstate> 00163 bool subsampling_module<T,Tstate>::resize_output(Tstate &in, Tstate &out) { 00164 //if (this->bresize) return false; 00165 TIMING_RESIZING_ACCSTART(); // start accumulating resizing time 00166 intg sin_i = in.x.dim(1); 00167 intg sin_j = in.x.dim(2); 00168 intg si = (intg) floor(sin_i / (float) stride.dim(0)); 00169 intg sj = (intg) floor(sin_j / (float) stride.dim(1)); 00170 // check sizes 00171 if (!crop && !pad && 00172 ((sin_i % stride.dim(0)) != 0 || (sin_j % stride.dim(2)) != 0)) { 00173 cerr << "subsampling " << sin_i << "x" << sin_j << " with stride " 00174 << stride << endl; 00175 eblerror("inconsistent input size and subsampling ratio"); 00176 } 00177 // resize output and sub based in input dimensions 00178 idxdim d(in.x.spec); // use same dimensions as in 00179 d.setdim(1, si); // new size after subsampling 00180 d.setdim(2, sj); // new size after subsampling 00181 if (out.x.get_idxdim() != d) { // resize only if necessary 00182 // resize internal buffer 00183 if (sub.x.order() != d.order()) 00184 sub = Tstate(d); 00185 else 00186 sub.resize(d); 00187 // pad output if not matching strides 00188 if (pad && sin_i % stride.dim(0) != 0) d.setdim(1, d.dim(1) + 1); 00189 if (pad && sin_j % stride.dim(1) != 0) d.setdim(2, d.dim(2) + 1); 00190 // resize out 00191 EDEBUG(this->name() << ": resizing output from " << out.x << " to " << d); 00192 if (out.x.order() != d.order()) out = Tstate(d); 00193 else out.resize(d); 00194 TIMING_RESIZING_ACCSTOP(); // stop accumulating resizing time 00195 return true; 00196 } 00197 TIMING_RESIZING_ACCSTOP(); // stop accumulating resizing time 00198 return false; 00199 } 00200 00201 template <typename T, class Tstate> 00202 fidxdim subsampling_module<T,Tstate>::fprop_size(fidxdim &isize) { 00203 fidxdim osize = isize; 00204 // update spatial dimensions 00205 for (uint i = 1; i < isize.order(); ++i) 00206 osize.setdim(i, std::max((intg) 1, 00207 (intg) floor(isize.dim(i) / 00208 (float) stride.dim(i - 1)))); 00210 isize = bprop_size(osize); 00211 return osize; 00212 } 00213 00214 template <typename T, class Tstate> 00215 fidxdim subsampling_module<T,Tstate>::bprop_size(const fidxdim &osize) { 00217 fidxdim d = stride; 00218 d.insert_dim(0, 1); 00219 fidxdim isize = osize * d; 00220 return isize; 00221 } 00222 00223 template <typename T, class Tstate> 00224 subsampling_module<T,Tstate>* subsampling_module<T,Tstate>::copy() { 00225 // new module 00226 subsampling_module<T,Tstate> *l2 = 00227 new subsampling_module<T, Tstate>(NULL, thickness, kernel, stride, 00228 this->name()); 00229 // assign same parameter state 00230 l2->coeff = coeff; 00231 l2->sub = sub; 00232 return l2; 00233 } 00234 00235 template <typename T, class Tstate> 00236 std::string subsampling_module<T, Tstate>::describe() { 00237 std::string desc; 00238 desc << "subsampling module " << this->name() << " with thickness " 00239 << thickness << ", kernel " << kernel << " and stride " << stride; 00240 return desc; 00241 } 00242 00243 template <typename T, class Tstate> 00244 void subsampling_module<T,Tstate>::dump_fprop(Tstate &in, Tstate &out) { 00245 fprop(in, out); 00246 DUMP(in.x, this->name() << "_subsampling_module_in.x"); 00247 DUMP(coeff.x, this->name() << "_subsampling_module_coeff"); 00248 DUMP(out.x, this->name() << "_subsampling_module_out.x"); 00249 } 00250 00252 // lppooling_module 00253 00254 template <typename T, class Tstate> 00255 lppooling_module<T,Tstate>:: 00256 lppooling_module(uint thick, idxdim &kernel_, 00257 idxdim &stride_, uint lppower_, const char *name_, bool crop_) 00258 : module_1_1<T,Tstate>(name_), thickness(thick), 00259 kernel(kernel_), stride(stride_), crop(crop_), 00260 lp_pow(lppower_), 00261 sqmod((T)lppower_), sqrtmod((T)(1.0/(T)lppower_)) { 00262 // insert thickness dimension 00263 idxdim d = kernel; 00264 d.insert_dim(0, thick); 00265 // allocate buffers 00266 squared = Tstate(d); 00267 convolved = Tstate(d); 00268 // prepare convolution 00269 idx<intg> table = one2one_table(thick); 00270 conv = new convolution_module<T,Tstate> 00271 (¶m, kernel, stride, table, "lppooling_convolution", crop); 00272 // gaussian kernel 00273 idx<T> filter = create_gaussian_kernel<T>(kernel); 00274 idx_bloop1(k, conv->kernel.x, T) { 00275 idx_copy(filter, k); } 00276 } 00277 00278 template <typename T, class Tstate> 00279 lppooling_module<T,Tstate>::~lppooling_module() { 00280 delete conv; 00281 } 00282 00283 template <typename T, class Tstate> 00284 void lppooling_module<T,Tstate>::fprop(Tstate &in, Tstate &out) { 00285 // avoid div by 0 when bproping 00286 idx_addc(in.x, 1e-10, in.x); // TODO: temporary 00287 // special case for l1pool 00288 if (lp_pow == 1) { 00289 // gaussian-weighted neighborhood of in^2 00290 conv->fprop(in, out); 00291 return; 00292 } 00293 // if it is an odd lp, example l3, l5 take an abs of the input 00294 if ((lp_pow % 2) == 1) idx_abs(in.x, in.x); 00295 // in^p 00296 sqmod.fprop(in, squared); 00297 // gaussian-weighted neighborhood of in^p 00298 conv->fprop(squared, convolved); 00299 // in^1/p(gaussian-weighted neighborhood of in^p) 00300 sqrtmod.fprop(convolved, out); 00301 } 00302 00303 template <typename T, class Tstate> 00304 void lppooling_module<T,Tstate>::bprop(Tstate &in, Tstate &out) { 00305 // special case for l1pool 00306 if (lp_pow == 1) { 00307 conv->bprop(in, out); 00308 return; 00309 } 00310 squared.clear_dx(); 00311 convolved.clear_dx(); 00312 if ((lp_pow % 2) == 1) idx_abs(convolved.dx, convolved.dx); 00313 sqrtmod.bprop(convolved, out); 00314 conv->bprop(squared, convolved); 00315 sqmod.bprop(in, squared); 00316 } 00317 00318 template <typename T, class Tstate> 00319 void lppooling_module<T,Tstate>::bbprop(Tstate &in, Tstate &out) { 00320 // special case for l1pool 00321 if (lp_pow == 1) { 00322 conv->bbprop(in, out); 00323 return; 00324 } 00325 squared.clear_ddx(); 00326 convolved.clear_ddx(); 00327 if ((lp_pow % 2) == 1) idx_abs(convolved.ddx, convolved.ddx); 00328 sqrtmod.bbprop(convolved, out); 00329 conv->bbprop(squared, convolved); 00330 sqmod.bbprop(in, squared); 00331 } 00332 00333 template <typename T, class Tstate> 00334 fidxdim lppooling_module<T,Tstate>::fprop_size(fidxdim &isize) { 00335 fidxdim osize = isize; 00336 // update spatial dimensions 00337 for (uint i = 1; i < isize.order(); ++i) { 00338 uint diff = kernel.dim(i - 1) - stride.dim(i - 1); 00339 osize.setdim(i, std::max((intg) 1, (intg) (floor(isize.dim(i) - diff) 00340 / (float) stride.dim(i - 1)))); 00341 } 00342 // recompute the input size to be compliant with the output 00343 isize = bprop_size(osize); 00344 return osize; 00345 } 00346 00347 template <typename T, class Tstate> 00348 fidxdim lppooling_module<T,Tstate>::bprop_size(const fidxdim &osize) { 00349 fidxdim isize; 00350 if (osize.empty()) return isize; 00351 // multiply by stride 00352 fidxdim d = stride; 00353 d.insert_dim(0, 1); 00354 isize = osize * d; 00355 // add borders 00356 for (uint i = 1; i < isize.order(); ++i) 00357 isize.setdim(i, isize.dim(i) + kernel.dim(i - 1) - stride.dim(i - 1)); 00358 return isize; 00359 } 00360 00361 template <typename T, class Tstate> 00362 lppooling_module<T,Tstate>* lppooling_module<T,Tstate>::copy() { 00363 lppooling_module<T,Tstate> *l2 = 00364 new lppooling_module<T, Tstate>(thickness, kernel, stride, lp_pow, this->name()); 00365 return l2; 00366 } 00367 00368 template <typename T, class Tstate> 00369 std::string lppooling_module<T, Tstate>::describe() { 00370 std::string desc; 00371 desc << "lppooling module " << this->name() << " with thickness " 00372 << thickness << ", kernel " << kernel << " ,stride " << stride 00373 << " and Power "<< lp_pow; 00374 return desc; 00375 } 00376 00377 template <typename T, class Tstate> 00378 void lppooling_module<T,Tstate>::dump_fprop(Tstate &in, Tstate &out) { 00379 fprop(in, out); 00380 conv->dump_fprop(in, out); 00381 } 00382 00384 // wavg_pooling_module 00385 00386 template <typename T, class Tstate> 00387 wavg_pooling_module<T,Tstate>:: 00388 wavg_pooling_module(uint thick, idxdim &kernel_, 00389 idxdim &stride_, const char *name_, bool crop_) 00390 : module_1_1<T,Tstate>(name_), thickness(thick), 00391 kernel(kernel_), stride(stride_), crop(crop_) { 00392 // prepare convolution 00393 idx<intg> table = one2one_table(thick); 00394 conv = new convolution_module<T,Tstate> 00395 (¶m, kernel, stride, table, "wavg_pooling_convolution", crop); 00396 // gaussian kernel 00397 idx<T> filter = create_gaussian_kernel<T>(kernel); 00398 idx_bloop1(k, conv->kernel.x, T) { 00399 idx_copy(filter, k); } 00400 } 00401 00402 template <typename T, class Tstate> 00403 wavg_pooling_module<T,Tstate>::~wavg_pooling_module() { 00404 delete conv; 00405 } 00406 00407 template <typename T, class Tstate> 00408 void wavg_pooling_module<T,Tstate>::fprop(Tstate &in, Tstate &out) { 00409 conv->fprop(in, out); 00410 } 00411 00412 template <typename T, class Tstate> 00413 void wavg_pooling_module<T,Tstate>::bprop(Tstate &in, Tstate &out) { 00414 conv->bprop(in, out); 00415 } 00416 00417 template <typename T, class Tstate> 00418 void wavg_pooling_module<T,Tstate>::bbprop(Tstate &in, Tstate &out) { 00419 conv->bbprop(in, out); 00420 } 00421 00422 template <typename T, class Tstate> 00423 fidxdim wavg_pooling_module<T,Tstate>::fprop_size(fidxdim &isize) { 00424 fidxdim osize = isize; 00425 // update spatial dimensions 00426 for (uint i = 1; i < isize.order(); ++i) { 00427 uint diff = kernel.dim(i - 1) - stride.dim(i - 1); 00428 osize.setdim(i, std::max((intg) 1, (intg) (floor(isize.dim(i) - diff) 00429 / (float) stride.dim(i - 1)))); 00430 } 00431 // recompute the input size to be compliant with the output 00432 isize = bprop_size(osize); 00433 return osize; 00434 } 00435 00436 template <typename T, class Tstate> 00437 fidxdim wavg_pooling_module<T,Tstate>::bprop_size(const fidxdim &osize) { 00438 // just multiply each dimension by its stride 00439 fidxdim d = stride; 00440 d.insert_dim(0, 1); 00441 fidxdim isize = osize * d; 00442 // add borders 00443 for (uint i = 1; i < isize.order(); ++i) 00444 isize.setdim(i, isize.dim(i) + kernel.dim(i - 1) - stride.dim(i - 1)); 00445 return isize; 00446 } 00447 00448 template <typename T, class Tstate> 00449 wavg_pooling_module<T,Tstate>* wavg_pooling_module<T,Tstate>::copy() { 00450 wavg_pooling_module<T,Tstate> *l2 = 00451 new wavg_pooling_module<T, Tstate> 00452 (thickness, kernel, stride, this->name()); 00453 return l2; 00454 } 00455 00456 template <typename T, class Tstate> 00457 std::string wavg_pooling_module<T, Tstate>::describe() { 00458 std::string desc; 00459 desc << "wavg_pooling module " << this->name() << " with thickness " 00460 << thickness << ", kernel " << kernel << " and stride " << stride; 00461 return desc; 00462 } 00463 00464 template <typename T, class Tstate> 00465 void wavg_pooling_module<T,Tstate>::dump_fprop(Tstate &in, Tstate &out) { 00466 fprop(in, out); 00467 conv->dump_fprop(in, out); 00468 } 00469 00471 // pyramid_module 00472 00473 template <typename T, class Tstate> 00474 pyramid_module<T,Tstate>:: 00475 pyramid_module(uint nscales_, float scaling_ratio_, idxdim &size_, 00476 uint mode_, module_1_1<T,Tstate> *pp_, 00477 bool own_pp_, idxdim *dzpad_, const char *name_) 00478 : resizepp_module<T,Tstate>(size_, mode_, pp_, own_pp_, dzpad_), 00479 s2m_module<T,Tstate>(nscales_, name_), 00480 nscales(nscales_), scaling_ratio(scaling_ratio_) { 00481 } 00482 00483 template <typename T, class Tstate> 00484 pyramid_module<T,Tstate>:: 00485 pyramid_module(uint nscales_, float scaling_ratio_, uint mode_, 00486 module_1_1<T,Tstate> *pp_, 00487 bool own_pp_, idxdim *dzpad_, const char *name_) 00488 : resizepp_module<T,Tstate>(mode_, pp_, own_pp_, dzpad_), 00489 s2m_module<T,Tstate>(nscales, name_), 00490 nscales(nscales_), scaling_ratio(scaling_ratio_) { 00491 } 00492 00493 template <typename T, class Tstate> 00494 pyramid_module<T,Tstate>::~pyramid_module() {} 00495 00496 template <typename T, class Tstate> 00497 void pyramid_module<T,Tstate>::fprop(Tstate &in, Tstate &out) { 00498 eblerror("not implemented"); 00499 } 00500 00501 template <typename T, class Tstate> 00502 void pyramid_module<T,Tstate>:: 00503 fprop(Tstate &in, mstate<Tstate> &out) { 00504 eblerror("not implemented"); 00505 } 00506 00507 template <typename T, class Tstate> 00508 void pyramid_module<T,Tstate>:: 00509 fprop(Tstate &in, midx<T> &out) { 00510 // expecting a 1D midx 00511 if (out.order() != 1) 00512 eblerror("expected a 1-dimensional midx but got order " << out.order()); 00513 if (out.dim(0) != nscales) 00514 out.resize(nscales); 00515 idxdim d(in.x); 00516 d.setdims(1); 00517 idxdim tgt = this->size; 00518 out.clear(); 00519 for (uint i = 0; i < nscales; ++i) { 00520 idx<T> tmp(d); 00521 this->set_dimensions(tgt.dim(0), tgt.dim(1)); 00522 resizepp_module<T,Tstate>::fprop(in, tmp); 00523 out.set(tmp, i); 00524 tgt.setdim(0, (intg) (tgt.dim(0) * scaling_ratio)); 00525 tgt.setdim(1, (intg) (tgt.dim(1) * scaling_ratio)); 00526 } 00527 } 00528 00529 template <typename T, class Tstate> 00530 void pyramid_module<T,Tstate>:: 00531 bprop(Tstate &in, mstate<Tstate> &out) { 00532 } 00533 00534 template <typename T, class Tstate> 00535 void pyramid_module<T,Tstate>:: 00536 bbprop(Tstate &in, mstate<Tstate> &out) { 00537 } 00538 00539 template <typename T, class Tstate> 00540 mfidxdim pyramid_module<T,Tstate>::bprop_size(mfidxdim &osize) { 00541 this->msize = osize; 00542 if (osize.size() <= 0) 00543 eblerror("expected at least 1 element but found " << osize.size()); 00544 return osize[0]; 00545 } 00546 00547 template <typename T, class Tstate> 00548 std::string pyramid_module<T,Tstate>::describe() { 00549 std::string desc = "Laplacian pyramid "; 00550 desc << resizepp_module<T,Tstate>::describe() 00551 << ", with " << nscales << " scales"; 00552 return desc; 00553 } 00554 00555 // template <typename T, class Tstate> 00556 // const std::vector<rect<int> >& pyramid_module<T,Tstate>:: 00557 // get_original_bboxes() { 00558 // return obboxes; 00559 // } 00560 00561 // template <typename T, class Tstate> 00562 // const std::vector<rect<int> >& pyramid_module<T,Tstate>:: 00563 // get_input_bboxes() { 00564 // return ibboxes; 00565 // } 00566 00568 // average_pyramid_module 00569 00570 template <typename T, class Tstate> 00571 average_pyramid_module<T,Tstate>:: 00572 average_pyramid_module(parameter<T,Tstate> *p, uint thickness, 00573 midxdim &strides_, const char *name_) 00574 : s2m_module<T,Tstate>(strides_.size(), name_), strides(strides_) { 00575 // check that we can reuse results of previous subsamplings, 00576 // i.e. when all strides are a multiple of the previous scale's stride 00577 well_behaved = true; 00578 for (uint i = 1; i < strides.size(); ++i) { 00579 idxdim d0 = strides[i-1]; 00580 idxdim d1 = strides[i]; 00581 idx_checkorder2(d0, d0.order(), d1, d0.order()); 00582 // check that each dimension of d0 is a multiple of d1 00583 for (uint j = 0; j < d0.order(); ++j) { 00584 if (d1.dim(j) % d0.dim(j) != 0) 00585 well_behaved = false; 00586 } 00587 } 00588 // allocate subsampling modules 00589 for (uint i = 0; i < strides.size(); ++i) { 00590 idxdim d = strides[i]; 00591 string name; 00592 name << "average_pyramid" << d; 00593 mods.push_back(new subsampling_module<T,Tstate> 00594 (p, thickness, d, d, name.c_str(), true, true)); 00595 } 00596 } 00597 00598 template <typename T, class Tstate> 00599 average_pyramid_module<T,Tstate>::~average_pyramid_module() { 00600 for (uint i = 0; i < mods.size(); ++i) 00601 delete mods[i]; 00602 } 00603 00604 template <typename T, class Tstate> 00605 void average_pyramid_module<T,Tstate>::fprop(Tstate &in, Tstate &out) { 00606 eblerror("not implemented"); 00607 } 00608 00609 template <typename T, class Tstate> 00610 void average_pyramid_module<T,Tstate>::fprop(Tstate &in, mstate<Tstate> &out){ 00611 this->resize_output(in, out); 00612 if (well_behaved) { // we can reuse results of previous scale 00613 for (uint i = 0; i < mods.size(); ++i) { 00614 Tstate &o = out[i]; 00615 if (i == 0) // first time, start from in 00616 mods[i]->fprop(in, o); 00617 else { // start from previous scale 00618 Tstate i0 = out[i - 1]; 00619 mods[i]->fprop(i0, o); 00620 } 00621 } 00622 } else { // generate each scale from scratch (less efficient) 00623 for (uint i = 0; i < mods.size(); ++i) { 00624 Tstate &o = out[i]; 00625 mods[i]->fprop(in, o); 00626 } 00627 } 00628 } 00629 00630 template <typename T, class Tstate> 00631 void average_pyramid_module<T,Tstate>:: 00632 bprop(Tstate &in, mstate<Tstate> &out) { 00633 if (well_behaved) { // we can reuse results of previous scale 00634 for (int i = (int) mods.size() - 1; i >= 0; --i) { 00635 Tstate &o = out[i]; 00636 if (i == 0) // first time, start from in 00637 mods[i]->bprop(in, o); 00638 else { // start from previous scale 00639 Tstate i0 = out[i - 1]; 00640 mods[i]->bprop(i0, o); 00641 } 00642 } 00643 } else { // generate each scale from scratch (less efficient) 00644 for (int i = (int) mods.size() - 1; i >= 0; --i) { 00645 Tstate &o = out[i]; 00646 mods[i]->bprop(in, o); 00647 } 00648 } 00649 } 00650 00651 template <typename T, class Tstate> 00652 void average_pyramid_module<T,Tstate>:: 00653 bbprop(Tstate &in, mstate<Tstate> &out) { 00654 if (well_behaved) { // we can reuse results of previous scale 00655 for (int i = (int) mods.size() - 1; i >= 0; --i) { 00656 Tstate &o = out[i]; 00657 if (i == 0) // first time, start from in 00658 mods[i]->bbprop(in, o); 00659 else { // start from previous scale 00660 Tstate i0 = out[i - 1]; 00661 mods[i]->bbprop(i0, o); 00662 } 00663 } 00664 } else { // generate each scale from scratch (less efficient) 00665 for (int i = (int) mods.size() - 1; i >= 0; --i) { 00666 Tstate &o = out[i]; 00667 mods[i]->bbprop(in, o); 00668 } 00669 } 00670 } 00671 00672 template <typename T, class Tstate> 00673 std::string average_pyramid_module<T,Tstate>::describe() { 00674 std::string desc; 00675 desc << "Average pyramid with " << (int) strides.size() 00676 << " scales with strides " << strides 00677 << " (" << (well_behaved ? "" : "not ") << "well-behaved)"; 00678 return desc; 00679 } 00680 00681 template <typename T, class Tstate> 00682 mfidxdim average_pyramid_module<T,Tstate>::bprop_size(mfidxdim &osize) { 00683 if (osize.size() != strides.size()) 00684 eblerror("expected same number of outputs and strides but got " 00685 << osize << " and " << strides); 00686 // multiply all output by strides, they should all match 00687 fidxdim s = strides[0]; 00688 s.insert_dim(0, 1); 00689 fidxdim d1 = osize[0] * s; 00690 for (uint i = 1; i < osize.size(); ++i) { 00691 s = strides[i]; 00692 s.insert_dim(0, 1); 00693 fidxdim d2 = osize[i] * s; 00694 // TEMPORARY CHECK REMOVE 00695 // if (d1 != d2) 00696 // eblerror("all inputs should match but got " << d1 << " and " << d2); 00697 } 00698 mfidxdim m(d1); 00699 return m; 00700 } 00701 00703 // maxss_module 00704 00705 template <typename T, class Tstate> 00706 maxss_module<T,Tstate>:: 00707 maxss_module(uint thick, idxdim &kernel_, idxdim &stride_, const char *name_) 00708 : module_1_1<T,Tstate>(name_), thickness(thick), kernel(kernel_), 00709 stride(stride_), switches(thickness, 1, 1, 2), 00710 float_precision(false), double_precision(false), 00711 indices(thickness, 1, 1, 2) { 00712 #ifdef __TH__ 00713 // check precision to decide if we use TH or not 00714 Tstate *temp = new Tstate(1,1); 00715 fstate_idx<float> *cont = dynamic_cast<fstate_idx<float>*>(temp); 00716 if (cont) { 00717 float_precision = true; 00718 } 00719 else { 00720 fstate_idx<double> *cont_d = dynamic_cast<fstate_idx<double>*>(temp); 00721 if(cont_d) { 00722 double_precision = true; 00723 } 00724 } 00725 delete temp; 00726 #else 00727 eblerror("Maxss is implemented only using the TH library. "); 00728 #endif 00729 } 00730 00731 template <typename T, class Tstate> 00732 maxss_module<T,Tstate>::~maxss_module() { 00733 } 00734 00735 template <typename T, class Tstate> 00736 void maxss_module<T,Tstate>::fprop(Tstate &in, Tstate &out) { 00737 this->resize_output(in, out); // resize (iff necessary) 00738 #ifdef __TH__ 00739 if((float_precision || double_precision) && in.x.order()==3) { 00740 th_maxpool_3d(in.x, kernel.dim(0), kernel.dim(1), out.x, stride.dim(0), 00741 stride.dim(1), indices); 00742 return; 00743 } 00744 #endif 00745 { idx_bloop3(lix, in.x, T, sw, switches, int, ltx, out.x, T) { 00746 int i = 0, j = 0; 00747 idx<T> uuin(lix.unfold(1, kernel.dim(2), stride.dim(1))); 00748 uuin = uuin.unfold(0, kernel.dim(1), stride.dim(0)); 00749 idx_bloop3(z1, uuin, T, sw1, sw, int, o1, ltx, T) { 00750 idx_bloop3(z2, z1, T, sw2, sw1, int, o2, o1, T) { 00751 intg indx = idx_indexmax(z2); // find index of max 00752 // height in input 00753 sw2.set((int) (indx / kernel.dim(2) + stride.dim(0) * i), 0); 00754 // width in input 00755 sw2.set((int) (indx / kernel.dim(2) + stride.dim(1) * j), 1); 00756 o2.set(z2.get(indx)); // copy max to output 00757 } 00758 j++; 00759 } 00760 i++; 00761 }} 00762 } 00763 00764 template <typename T, class Tstate> 00765 void maxss_module<T,Tstate>::bprop(Tstate &in, Tstate &out) { 00766 #ifdef __TH__ 00767 if((float_precision || double_precision) && in.x.order()==3) { 00768 th_maxpool_3d_bprop(in.x, kernel.dim(0), kernel.dim(1), out.dx, in.dx, 00769 stride.dim(0), stride.dim(1), indices); 00770 return; 00771 } 00772 #endif 00773 // copy derivatives in the position given by the switches 00774 int i = 0, j = 0; 00775 idx_bloop3(di1, in.dx, T, s1, switches, int, do1, out.dx, T) { 00776 idx_bloop2(s2, s1, int, do2, do1, T) { 00777 idx_bloop2(s3, s2, int, do3, do2, T) { 00778 i = s3.get(0); j = s3.get(1); 00779 di1.set(di1.get(i, j) + do3.get(), i, j); 00780 }}} 00781 } 00782 00783 template <typename T, class Tstate> 00784 void maxss_module<T,Tstate>::bbprop(Tstate &in, Tstate &out) { 00785 #ifdef __TH__ 00786 if((float_precision || double_precision) && in.x.order()==3) { 00787 th_maxpool_3d_bprop(in.x, kernel.dim(0), kernel.dim(1), out.ddx, in.ddx, 00788 stride.dim(0), stride.dim(1), indices); 00789 return; 00790 } 00791 #endif 00792 // copy derivatives in the position given by the switches 00793 int i = 0, j = 0; 00794 idx_bloop3(di1, in.ddx, T, s1, switches, int, do1, out.ddx, T) { 00795 idx_bloop2(s2, s1, int, do2, do1, T) { 00796 idx_bloop2(s3, s2, int, do3, do2, T) { 00797 i = s3.get(0); j = s3.get(1); 00798 di1.set(di1.get(i, j) + do3.get(), i, j); 00799 }}} 00800 } 00801 00802 template <typename T, class Tstate> 00803 bool maxss_module<T,Tstate>::resize_output(Tstate &in, Tstate &out) { 00804 if (!this->bresize) return false; 00805 TIMING_RESIZING_ACCSTART(); // start accumulating resizing time 00806 intg sin_i = in.x.dim(1); 00807 intg sin_j = in.x.dim(2); 00808 intg si = sin_i / stride.dim(0); 00809 intg sj = sin_j / stride.dim(1); 00810 // TH Handles inconsistent sizes by adding a slight overlap 00811 // #ifndef __TH__ 00812 // check sizes 00813 if ((sin_i % stride.dim(0)) != 0 || (sin_j % stride.dim(1)) != 0) { 00814 cerr << "maxss " << sin_i << "x" << sin_j << " with stride " 00815 << stride << endl; 00816 eblerror("inconsistent input size and maxss ratio"); 00817 } 00818 // #endif 00819 // resize output and sub based in input dimensions 00820 idxdim d(in.x.spec); // use same dimensions as in 00821 d.setdim(1, si); // new size after maxss 00822 d.setdim(2, sj); // new size after maxss 00823 if (out.x.get_idxdim() != d) { // resize only if necessary 00824 EDEBUG(this->name() << ": resizing output from " << out.x << " to " << d); 00825 out.resize(d); 00826 d.insert_dim(2, d.order()); 00827 switches.resize(d); 00828 indices.resize(d); 00829 } 00830 TIMING_RESIZING_ACCSTOP(); // stop accumulating resizing time 00831 return true; 00832 } 00833 00834 template <typename T, class Tstate> 00835 fidxdim maxss_module<T,Tstate>::fprop_size(fidxdim &isize) { 00836 fidxdim osize = isize; 00837 // update spatial dimensions 00838 for (uint i = 1; i < isize.order(); ++i) 00839 osize.setdim(i, std::max((intg) 1, 00840 (intg) floor((float) (isize.dim(i) / stride.dim(i - 1))))); 00842 isize = bprop_size(osize); 00843 return osize; 00844 } 00845 00846 template <typename T, class Tstate> 00847 fidxdim maxss_module<T,Tstate>::bprop_size(const fidxdim &osize) { 00849 fidxdim d = stride; 00850 d.insert_dim(0, 1); 00851 fidxdim isize = osize * d; 00852 return isize; 00853 } 00854 00855 template <typename T, class Tstate> 00856 maxss_module<T,Tstate>* maxss_module<T,Tstate>::copy(parameter<T,Tstate> *p) { 00857 // new module (with its own local parameter buffers) 00858 maxss_module<T,Tstate> *l2 = 00859 new maxss_module<T, Tstate>(thickness, kernel, stride, this->name()); 00860 return l2; 00861 } 00862 00863 template <typename T, class Tstate> 00864 std::string maxss_module<T, Tstate>::describe() { 00865 std::string desc; 00866 desc << "maxss module " << this->name() << " with thickness " << thickness 00867 << ", kernel " << kernel << " and stride " << stride; 00868 return desc; 00869 } 00870 00871 } // end namespace ebl