libeblearn
/home/rex/ebltrunk/core/libeblearn/include/ebl_basic.hpp
00001 /***************************************************************************
00002  *   Copyright (C) 2011 by Yann LeCun, Pierre Sermanet and Soumith Chintala*
00003  *   yann@cs.nyu.edu, pierre.sermanet@gmail.com, soumith@gmail.com  *
00004  *   All rights reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions are met:
00008  *     * Redistributions of source code must retain the above copyright
00009  *       notice, this list of conditions and the following disclaimer.
00010  *     * Redistributions in binary form must reproduce the above copyright
00011  *       notice, this list of conditions and the following disclaimer in the
00012  *       documentation and/or other materials provided with the distribution.
00013  *     * Redistribution under a license not approved by the Open Source
00014  *       Initiative (http://www.opensource.org) must display the
00015  *       following acknowledgement in all advertising material:
00016  *        This product includes software developed at the Courant
00017  *        Institute of Mathematical Sciences (http://cims.nyu.edu).
00018  *     * The names of the authors may not be used to endorse or promote products
00019  *       derived from this software without specific prior written permission.
00020  *
00021  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
00022  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
00023  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
00024  * DISCLAIMED. IN NO EVENT SHALL ThE AUTHORS BE LIABLE FOR ANY
00025  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
00026  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
00027  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
00028  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00029  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00030  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00031  ***************************************************************************/
00032 
00033 namespace ebl {
00034 
00036   // linear_module
00037 
00038   template <typename T, class Tstate>
00039   linear_module<T, Tstate>::linear_module(parameter<T,Tstate> *p,
00040                                           intg in, intg out, const char *name_)
00041     : module_1_1<T,Tstate>(name_), w(p, out, in) {
00042   }
00043 
00044   template <typename T, class Tstate>
00045   linear_module<T, Tstate>::~linear_module() {
00046   }
00047 
00048   template <typename T, class Tstate>
00049   void linear_module<T, Tstate>::fprop(Tstate &in, Tstate &out) {
00050     if (!in.x.contiguousp() || !out.x.contiguousp())
00051       eblerror("input should be contiguous");
00052     // flatten dimensions starting from second one
00053     idxdim d(in.x);
00054     d.remove_dim(0);
00055     idx<T> inx(in.x.getstorage(), 0, in.x.dim(0), d.nelements());
00056     d.insert_dim(0, w.x.dim(0));
00057     this->resize_output(in, out, &d); // resize (iff necessary)
00058     idx<T> outx(out.x.getstorage(), 0, out.x.dim(0), inx.dim(1));
00059     // linear combination
00060     idx_m2dotm2(w.x, inx, outx);
00061   }
00062 
00063   template <typename T, class Tstate>
00064   void linear_module<T, Tstate>::bprop(Tstate &in, Tstate &out) {
00065     idx<T> inx = in.x.view_as_order(1); // view as 1D idx
00066     idx<T> indx = in.dx.view_as_order(1); // view as 1D idx
00067     idx<T> outdx = out.dx.view_as_order(1); // view as 1D idx
00068     idx<T> twx(w.x.transpose(0, 1)); // switch dimensions 0 and 1
00069     if (outdx.nelements() != w.dx.dim(0))
00070       eblerror("output should have " << w.dx.dim(0) << " elements "
00071                << "but has " << outdx.nelements() << " (" << outdx << ")");
00072     // bprop
00073     idx_m1extm1acc(outdx, inx, w.dx); // backprop to weights
00074     idx_m2dotm1acc(twx, outdx, indx); // backprop to input
00075   }
00076 
00077   template <typename T, class Tstate>
00078   void linear_module<T, Tstate>::bbprop(Tstate &in, Tstate &out) {
00079     idx<T> inx = in.x.view_as_order(1); // view as 1D idx
00080     idx<T> inddx = in.ddx.view_as_order(1); // view as 1D idx
00081     idx<T> outddx = out.ddx.view_as_order(1); // view as 1D idx
00082     idx<T> twx = w.x.transpose(0, 1); // switch dimensions 0 and 1
00083     if (outddx.nelements() != w.ddx.dim(0))
00084       eblerror("output should have " << w.ddx.dim(0) << " elements "
00085                << "but has " << outddx.nelements() << " (" << outddx << ")");
00086     // bbprop
00087     idx_m1squextm1acc(outddx, inx, w.ddx); // backprop to weights
00088     idx_m2squdotm1acc(twx, outddx, inddx); // backprop to input
00089   }
00090 
00091   template <typename T, class Tstate>
00092   void linear_module<T, Tstate>::forget(forget_param_linear &fp) {
00093     double fanin = w.x.dim(1);
00094     double z = fp.value / pow(fanin, fp.exponent);
00095     idx_aloop1(lx, w.x, T) {
00096       *lx = (T) fp.generator.drand(-z, z);
00097     }
00098   }
00099 
00100   template <typename T, class Tstate>
00101   void linear_module<T, Tstate>::normalize() {
00102 #ifdef __CBLAS__
00103     norm_columns(w.x);
00104 #else
00105     eblerror("norm_columns not implemented without cblas");
00106 #endif
00107   }
00108 
00109   template <typename T, class Tstate>
00110   fidxdim linear_module<T, Tstate>::fprop_size(fidxdim &isize) {
00112     fidxdim osize = isize;
00113     osize.setdim(0, w.x.dim(0));
00114     isize = bprop_size(osize);
00115     return osize;
00116   }
00117 
00118   template <typename T, class Tstate>
00119   fidxdim linear_module<T, Tstate>::bprop_size(const fidxdim &osize) {
00120     fidxdim isize = osize;
00121     isize.setdim(0, w.x.dim(1));
00122     return isize;
00123   }
00124 
00125   template <typename T, class Tstate>
00126   linear_module<T, Tstate>* linear_module<T, Tstate>::
00127   copy(parameter<T,Tstate> *p) {
00128     // new module
00129     linear_module<T, Tstate> *l2 =
00130       new linear_module<T, Tstate>(p, w.x.dim(1), w.x.dim(0), this->name());
00131     // assign same parameter state if no parameters were specified
00132     if (!p) l2->w = w;
00133     return l2;
00134   }
00135 
00136   template <typename T, class Tstate>
00137   void linear_module<T, Tstate>::load_x(idx<T> &weights) {
00138     if (!w.x.same_dim(weights)) {
00139       // if sizes are the same except for the feature size, load
00140       // into the corresponding slices with a warning
00141       // this allows to load grayscale pretrained weights only
00142       // in a grayscale + color net for example.
00143       idxdim d(w.x);
00144       d.setdim(0, weights.dim(0));
00145       if (d == weights.get_idxdim()) {
00146         cerr << "Warning: loading weights partly (the first " << d.dim(0)
00147              << " features) from " << weights << " instead of entire weights ("
00148              << w.x << ")." << endl;
00149         idx<T> slices = w.x.narrow(0, weights.dim(0), 0);
00150         idx_copy(weights, slices);
00151       } else
00152         eblthrow("expected same dimension weights but got " << w.x << " and "
00153                  << weights << " instead in " << this->name());
00154     } else
00155       idx_copy(weights, w.x);
00156   }
00157 
00158   template <typename T, class Tstate>
00159   std::string linear_module<T, Tstate>::describe() {
00160     std::string desc;
00161     desc << "linear module " << this->name() << " "
00162          << w.x.dim(1) << " -> " << w.x.dim(0);
00163     return desc;
00164   }
00165 
00166   template <typename T, class Tstate>
00167   void linear_module<T, Tstate>::dump_fprop(Tstate &in, Tstate &out) {
00168     fprop(in, out);
00169     DUMP(in.x, this->name() << "_linear_module_in.x");
00170     DUMP(w.x, this->name() << "_linear_module_weights");
00171   }
00172 
00174   // convolution_module
00175 
00176   template <typename T, class Tstate>
00177   convolution_module<T, Tstate>::
00178   convolution_module(parameter<T,Tstate> *p, idxdim &ker_, idxdim &stride_,
00179                      idx<intg> &tbl, const char *name_, bool crop_)
00180     : module_1_1<T,Tstate>(name_), ker(ker_), stride(stride_), table(tbl),
00181       warnings_shown(false), float_precision(false), double_precision(false),
00182       crop(crop_), use_ipp(false) {
00183     idxdim d(ker);
00184     d.insert_dim(0, tbl.dim(0));
00185     kernel = Tstate(p, d);
00186     // check sanity of connection table
00187     if (table.dim(1) != 2) { // check table order
00188       cerr << "error: expecting a table with dim 1 equal to 2 but found: ";
00189       cerr << table << endl;
00190       eblerror("connection table error");
00191     }
00192     check_table_duplicates(table);
00193     idx<intg> tbl0 = table.select(1, 0);
00194     tablemax = idx_max(tbl0);
00195     idx<intg> tbl1 = table.select(1, 1);
00196     thickness = idx_max(tbl1) + 1;
00197     // check table uses all inputs
00198     idx<bool> tblcount(tablemax + 1);
00199     idx_bloop1(tb, table, intg) {
00200       tblcount.set(true, tb.get(0));
00201     }
00202     bool not_using_all_inputs = false;
00203     for (int i = 0; i <= tablemax; ++i) {
00204       if (tblcount.get(i) == false) {
00205         cerr << "warning: input " << i;
00206         cerr << " not used by connection table in convolution module." << endl;
00207         not_using_all_inputs = true;
00208       }
00209     }
00210     // check if its a full-table
00211     if (((tablemax + 1) * thickness) == table.dim(0) && !not_using_all_inputs)
00212       fulltable = true;
00213 #ifdef __TH__
00214     // check precision to decide if we use TH or not
00215     fstate_idx<float> *cont = dynamic_cast<fstate_idx<float>*>(&kernel);
00216     if (cont) {
00217       float_precision = true;
00218       outtmp = idx<T>(1, 1);
00219     }
00220     else {
00221       fstate_idx<double> *cont_d = dynamic_cast<fstate_idx<double>*>(&kernel);
00222       if(cont_d) {
00223         double_precision = true;
00224         outtmp = idx<T>(1, 1);
00225       }
00226     }
00227 #else
00228   #ifdef __IPP__
00229     // check precision to decide if we use IPP or not
00230     fstate_idx<float> *cont = dynamic_cast<fstate_idx<float>*>(&kernel);
00231     if (cont) {
00232       float_precision = true;
00233       // allocate reversed kernel
00234       revkernel = idx<T>(kernel.x.dim(1), kernel.x.dim(2));
00235       outtmp = idx<T>(1, 1);
00236     }
00237     ipp_err_printed = false;
00238     use_ipp = true;
00239   #endif
00240 #endif
00241     // for external display classes, declare which internal buffers to display
00242     this->internals.push_back(kernel.x);
00243     this->internals_str.push_back("kernels");
00244   }
00245 
00246   template <typename T, class Tstate>
00247   convolution_module<T, Tstate>::~convolution_module() {
00248   }
00249 
00250   template <typename T, class Tstate>
00251   void convolution_module<T, Tstate>::fprop(Tstate &in, Tstate &out) {
00252     if (!convolution_module<T, Tstate>::resize_output(in, out))
00253       return ; // do nothing if resizing failed
00254     // temporarly crop input if mismatch in size
00255     // inx = input tensor over which the kernel has to be applied
00256     idx<T> inx = in.x;
00257     intg ki = kernel.x.dim(1), kj = kernel.x.dim(2);
00258     intg si = stride.dim(0), sj = stride.dim(1);
00259     intg oi = inx.dim(1) - (ki - si), oj = inx.dim(2) - (kj - sj);
00260     if (crop && oi % stride.dim(0) != 0)
00261       inx = inx.narrow(1, inx.dim(1) - oi % si, 0);
00262     if (crop && oj % stride.dim(1) != 0)
00263       inx = inx.narrow(2, inx.dim(2) - oj % sj, 0);
00264     idx_clear(out.x);
00265 #ifdef __TH__
00266     // a direct 3D-map optimization
00267     if((float_precision || double_precision) && in.x.order()==3) {
00268 #ifdef __OPENMP__
00269       svector< idx<T> > lk_list, suin_list, sout_list;
00270       svector<idx<T> > outtmp_list;
00271       vector< idx<intg> > lt_list;
00272       idx_bloop2(lk, kernel.x, T, lt, table, intg) {
00273         lk_list.push_back(new idx<T>(lk));
00274         lt_list.push_back(lt);
00275         suin_list.push_back(new idx<T>(inx.select(0, lt.get(0))));
00276         sout_list.push_back(new idx<T>((out.x).select(0, lt.get(1))));
00277         // need own outtmp variable for parallelization
00278         outtmp_list.push_back(new idx<T>(outtmp.get_idxdim()));
00279       }
00280       intg i;
00281       intg num_outputs = lk_list.size();
00282 #pragma omp parallel for private(i)
00283       for ( i = 0; i < num_outputs; ++i) {
00284         // 2D convolution
00285         th_convolution(suin_list[i], lk_list[i], outtmp_list[i], 
00286                        stride.dim(0), stride.dim(1)); 
00287       } // end of for loop
00288 #pragma omp parallel for private(i)
00289       for ( i = 0; i < out.x.dim(0); ++i) {
00290         for(int j=0; j < lt_list.size(); j++) {
00291           if(lt_list[j].get(1) == i)
00292             th_add(outtmp_list[i],sout_list[i]);
00293         }
00294       }
00295 #else
00296       //if (fulltable)
00297       // th_convolution_3d(inx, kernel.x, out.x, stride.dim(0), stride.dim(1));
00298       //else
00299         th_convolution_3dmap(inx, kernel.x, out.x, table, stride.dim(0), stride.dim(1));
00300 #endif // endif __OPENMP__
00301       return;
00302     }
00303     else {
00304       // unfolding input for a faster convolution operation
00305       idx<T> uuin(inx.unfold(1, kernel.x.dim(1), stride.dim(0)));
00306       uuin = uuin.unfold(2, kernel.x.dim(2), stride.dim(1));
00307       idx_bloop2(lk, kernel.x, T, lt, table, intg) {
00308         idx<T> sout((out.x).select(0, lt.get(1)));
00309         idx<T> suin(uuin.select(0, lt.get(0)));
00310         idx_m4dotm2acc(suin, lk, sout); // 2D convolution
00311       }
00312       return;
00313     }
00314 #endif // endif __TH__
00315     
00316     // unfolding input for a faster convolution operation
00317     idx<T> uuin(inx.unfold(1, kernel.x.dim(1), stride.dim(0)));
00318     uuin = uuin.unfold(2, kernel.x.dim(2), stride.dim(1));
00319     idx_clear(out.x);
00320     // convolve 2D slice for each convolution kernel
00321     { idx_bloop2(lk, kernel.x, T, lt, table, intg) {
00322         idx<T> sout((out.x).select(0, lt.get(1)));
00323   #ifdef __IPP__
00324         if (float_precision && use_ipp) {
00325           rev_idx2_tr(lk, revkernel);
00326           //      idx_clear(outtmp);
00327           idx<T> suin(inx.select(0, lt.get(0)));
00328           ipp_convolution(suin, revkernel, outtmp);
00329           ipp_add(outtmp, sout);
00330         } else { // not using IPP
00331           idx<T> suin(uuin.select(0, lt.get(0)));
00332           idx_m4dotm2acc(suin, lk, sout); // 2D convolution
00333         }
00334   #else
00335         idx<T> suin(uuin.select(0, lt.get(0)));
00336         idx_m4dotm2acc(suin, lk, sout); // 2D convolution
00337   #endif //endif __IPP__
00338       }
00339     }
00340   }
00341 
00342   template <typename T, class Tstate>
00343   void convolution_module<T, Tstate>::bprop(Tstate &in, Tstate &out) {
00344     // temporarly crop input if mismatch in size
00345     idx<T> inx = in.x, indx = in.dx;
00346     intg ki = kernel.x.dim(1), kj = kernel.x.dim(2);
00347     intg si = stride.dim(0), sj = stride.dim(1);
00348     intg oi = inx.dim(1) - (ki - si), oj = inx.dim(2) - (kj - sj);
00349     if (crop && oi % stride.dim(0) != 0) {
00350       inx = inx.narrow(1, inx.dim(1) - oi % si, 0);
00351       indx = indx.narrow(1, inx.dim(1) - oi % si, 0);
00352     }
00353     if (crop && oj % stride.dim(1) != 0) {
00354       inx = inx.narrow(2, inx.dim(2) - oj % sj, 0);
00355       indx = indx.narrow(2, inx.dim(2) - oj % sj, 0);
00356     }
00357  #ifdef __TH__
00358      if ((float_precision || double_precision) && in.x.order() == 3) {
00359        idx_clear(indx);
00360        th_convolution_3dmap_bprop(inx, kernel.x, out.dx, indx, 
00361                                   kernel.dx, table, 
00362                                   stride.dim(0), stride.dim(1));
00363        return;
00364      }
00365      else {
00366        // use the regular method
00367        // backprop through convolution
00368        idx<T> uuin(indx.unfold(1, kernel.dx.dim(1), stride.dim(0)));
00369        uuin = uuin.unfold(2, kernel.dx.dim(2), stride.dim(1));
00370        idx<T> uuinf(inx.unfold(1, kernel.dx.dim(1), stride.dim(0)));
00371        uuinf = uuinf.unfold(2, kernel.dx.dim(2), stride.dim(1));
00372        int transp[5] = { 0, 3, 4, 1, 2 };
00373        idx<T> borp(uuinf.transpose(transp));
00374        { idx_bloop3 (lk, kernel.dx, T, lkf, kernel.x, T,
00375                      lt, table, intg) {
00376            intg islice = lt.get(0);
00377            idx<T> suin(uuin.select(0, islice));
00378            idx<T> sborp(borp.select(0, islice));
00379            idx<T> sout(out.dx.select(0, lt.get(1)));
00380            idx_m2extm2acc(sout, lkf, suin); // backward convolution
00381            idx_m4dotm2acc(sborp, sout, lk); // compute gradient for kernel
00382          }
00383        }
00384        return;
00385      }
00386  #else
00387     // backprop through convolution
00388     idx<T> uuin(indx.unfold(1, kernel.dx.dim(1), stride.dim(0)));
00389     uuin = uuin.unfold(2, kernel.dx.dim(2), stride.dim(1));
00390     idx<T> uuinf(inx.unfold(1, kernel.dx.dim(1), stride.dim(0)));
00391     uuinf = uuinf.unfold(2, kernel.dx.dim(2), stride.dim(1));
00392     int transp[5] = { 0, 3, 4, 1, 2 };
00393     idx<T> borp(uuinf.transpose(transp));
00394     { idx_bloop3 (lk, kernel.dx, T, lkf, kernel.x, T,
00395                   lt, table, intg) {
00396         intg islice = lt.get(0);
00397         idx<T> suin(uuin.select(0, islice));
00398         idx<T> sborp(borp.select(0, islice));
00399         idx<T> sout(out.dx.select(0, lt.get(1)));
00400 #ifdef __IPP__
00401         if (float_precision && use_ipp)
00402           idx_m2extm2acc(sout, revkernel, suin); // backward convolution
00403         else
00404           idx_m2extm2acc(sout, lkf, suin); // backward convolution
00405         idx_m4dotm2acc(sborp, sout, lk); // compute gradient for kernel
00406 #else
00407         idx_m2extm2acc(sout, lkf, suin); // backward convolution
00408         idx_m4dotm2acc(sborp, sout, lk); // compute gradient for kernel
00409 #endif //IPP
00410       }}
00411 #endif //TH
00412   }
00413 
00414   template <typename T, class Tstate>
00415   void convolution_module<T,Tstate>::bbprop(Tstate &in, Tstate &out) {
00416     // temporarly crop input if mismatch in size
00417     idx<T> inx = in.x, inddx = in.ddx;
00418     intg ki = kernel.x.dim(1), kj = kernel.x.dim(2);
00419     intg si = stride.dim(0), sj = stride.dim(1);
00420     intg oi = inx.dim(1) - (ki - si), oj = inx.dim(2) - (kj - sj);
00421     if (crop && oi % stride.dim(0) != 0) {
00422       inx = inx.narrow(1, inx.dim(1) - oi % si, 0);
00423       inddx = inddx.narrow(1, inx.dim(1) - oi % sj, 0);
00424     }
00425     if (crop && oj % stride.dim(1) != 0) {
00426       inx = inx.narrow(2, inx.dim(2) - oj % si, 0);
00427       inddx = inddx.narrow(2, inx.dim(2) - oj % sj, 0);
00428     }
00429     // backprop through convolution
00430     idx<T> uuin(inddx.unfold(1, kernel.ddx.dim(1), stride.dim(0)));
00431     uuin = uuin.unfold(2, kernel.ddx.dim(2), stride.dim(1));
00432     idx<T> uuinf(inx.unfold(1, kernel.ddx.dim(1), stride.dim(0)));
00433     uuinf = uuinf.unfold(2, kernel.ddx.dim(2), stride.dim(1));
00434     int transp[5] = { 0, 3, 4, 1, 2 };
00435     idx<T> borp(uuinf.transpose(transp));
00436     { idx_bloop3 (lk, kernel.ddx, T, lkf, kernel.x, T,
00437                   lt, table, intg) {
00438         intg islice = lt.get(0);
00439         idx<T> suin(uuin.select(0, islice));
00440         idx<T> sborp(borp.select(0, islice));
00441         idx<T> sout((out.ddx).select(0, lt.get(1)));
00442 
00443 #ifdef __IPP__
00444         if (float_precision && use_ipp)
00445           idx_m2squextm2acc(sout, revkernel, suin); // backward convolution
00446         else
00447           idx_m2squextm2acc(sout, lkf, suin); // backward convolution
00448         idx_m4squdotm2acc(sborp, sout, lk); // compute gradient for kernel
00449 #else
00450         idx_m2squextm2acc(sout, lkf, suin); // backward convolution
00451         idx_m4squdotm2acc(sborp, sout, lk); // compute gradient for kernel
00452 #endif
00453       }}
00454   }
00455 
00456   template <typename T, class Tstate>
00457   void convolution_module<T,Tstate>::forget(forget_param_linear &fp) {
00458     idx<T> kx(kernel.x);
00459     intg vsize = kx.dim(1);
00460     intg hsize = kx.dim(2);
00461     idx<intg> ts(table.select(1, 1));
00462     idx<int> fanin(1 + idx_max(ts));
00463     idx_clear(fanin);
00464     { idx_bloop1(tab, table, intg)      {
00465         fanin.set(1 + fanin.get(tab.get(1)), tab.get(1)); }}
00466     { idx_bloop2(tab, table, intg, x, kx, T) {
00467         double s = fp.value / pow((vsize * hsize * fanin.get(tab.get(1))),
00468                                   fp.exponent);
00469         { idx_bloop1(lx, x, T) {
00470             { idx_bloop1(llx, lx, T) {
00471                 T n = (T) fp.generator.drand(-s, s);
00472                 llx.set(n);
00473               }}
00474           }}
00475       }}
00476   }
00477 
00478   template <typename T, class Tstate>
00479   bool convolution_module<T,Tstate>::resize_output(Tstate &in, Tstate &out) {
00480     if (!this->bresize) return false;
00481     TIMING_RESIZING_ACCSTART(); // start accumulating resizing time
00482     intg ki = kernel.x.dim(1), kj = kernel.x.dim(2);
00483     intg si = stride.dim(0), sj = stride.dim(1);
00484     // check input size for table
00485     if (in.x.dim(0) < tablemax + 1)
00486       eblerror("error: expecting input with size " << tablemax + 1
00487                << " in dimension 0 but found: " << in.x);
00488     if (!warnings_shown && (in.x.dim(0) > tablemax + 1)) {
00489       warnings_shown = true;
00490       cerr << "warning: convolution connection table is not using all inputs "
00491            << "in layer " << this->name() << " the maximum input index used by "
00492            << "the table is " << tablemax << " but the input is "
00493            << in.x << endl;
00494     }
00495     // check sizes
00496     if (!crop && (((in.x.dim(1) - (ki - si)) % si != 0) ||
00497                   ((in.x.dim(2) - (kj - sj)) % sj != 0)))
00498       eblerror("inconsistent input size, kernel size, and subsampling ratio");
00499 #ifdef __IPP__
00500     if ((stride.dim(0) != 1) || (stride.dim(1) != 1)) {
00501       use_ipp = false;
00502       if (!ipp_err_printed) {
00503         // cerr << "Warning: not using IPP in " << this->name()
00504         //      << " because stride > 1 not implemented for IPP" << endl;
00505         ipp_err_printed = true;
00506       }
00507     } else use_ipp = true;
00508 #endif
00509     idx<T> inx = in.x;
00510     idxdim d(in.x.spec); // use same dimensions as in
00511     intg oi = inx.dim(1) - (ki - si), oj = inx.dim(2) - (kj - sj);
00512     intg ii = inx.dim(1) - oi % si, jj = inx.dim(2) - oj % sj;
00513     // if kernel is smaller than input, just resize output to 1 blank pixel
00514     if (ii == 0 || jj == 0 || ki > inx.dim(1) || kj > inx.dim(2)) {
00515       d.setdims(1);
00516       d.setdim(0, thickness);
00517       out = Tstate(d);
00518       out.clear();
00519       return false;
00520     }
00521     // crop input if mismatch in size
00522     if (crop && oi % stride.dim(0) != 0)
00523       inx = inx.narrow(1, ii, 0);
00524     if (crop && oj % stride.dim(1) != 0)
00525       inx = inx.narrow(2, jj, 0);
00526     // unfolding input for a faster convolution operation
00527     idx<T> uuin(inx.unfold(1, ki, si));
00528     uuin = uuin.unfold(2, kj, sj);
00529     // resize output based in input dimensions
00530     d.setdim(0, thickness); // except for the first one
00531     d.setdim(1, uuin.dim(1)); // convolution trims dimensions a bit
00532     d.setdim(2, uuin.dim(2)); // convolution trims dimensions a bit
00533     if (out.x.get_idxdim() != d) { // resize only if necessary
00534       EDEBUG(this->name() << ": resizing output from " << out.x << " to " << d);
00535 #ifdef __TH__
00536       if (float_precision || double_precision) {
00537         outtmp.resize(d.dim(1), d.dim(2));
00538       }
00539 #else
00540   #ifdef __IPP__
00541       if (float_precision && use_ipp) {
00542         outtmp.resize(d.dim(1), d.dim(2));
00543       }
00544   #endif
00545 #endif
00546       if (out.x.order() != d.order())
00547         out = Tstate(d);
00548       else
00549         out.resize(d);
00550     }
00551     TIMING_RESIZING_ACCSTOP(); // stop accumulating resizing time
00552     return true;
00553   }
00554 
00555   template <typename T, class Tstate>
00556   fidxdim convolution_module<T,Tstate>::fprop_size(fidxdim &isize) {
00557     fidxdim osize = isize;
00558     if (osize.empty()) return osize;
00559     // features dimension
00560     osize.setdim(0, thickness);
00561     // update spatial dimensions
00562     for (uint i = 1; i < isize.order(); ++i)
00563       osize.setdim(i, std::max((float) 1, isize.dim(i) - kernel.x.dim(i) + 1));
00565     isize = bprop_size(osize);
00566     return osize;
00567   }
00568 
00569   template <typename T, class Tstate>
00570   fidxdim convolution_module<T,Tstate>::bprop_size(const fidxdim &osize) {
00571     fidxdim isize = osize;
00572     // features dimension
00573     isize.setdim(0, thickness);
00574     // spatial dimensions
00575     for (uint i = 1; i < osize.order(); ++i) {
00576       isize.setdim(i, osize.dim(i) + kernel.x.dim(i) - 1);
00577       //      isize.setoffset(i, osize.offset(i) - kernel.x.dim(i) / 2);
00578     }
00579     return isize;
00580   }
00581 
00582   template <typename T, class Tstate>
00583   convolution_module<T,Tstate>* convolution_module<T,Tstate>::
00584   copy(parameter<T,Tstate> *p) {
00585     convolution_module<T,Tstate> *l2 =
00586       new convolution_module<T,Tstate>(p, ker, stride, table, this->name());
00587     if (!p) // assign same parameter state if no parameters were specified
00588       l2->kernel = kernel;
00589     return l2;
00590   }
00591 
00592   template <typename T, class Tstate>
00593   void convolution_module<T, Tstate>::load_x(idx<T> &weights) {
00594     if (!kernel.x.same_dim(weights)) {
00595       // if sizes are the same except for the feature size, load
00596       // into the corresponding slices with a warning
00597       // this allows to load grayscale pretrained weights only
00598       // in a grayscale + color net for example.
00599       idxdim d(kernel.x);
00600       d.setdim(0, weights.dim(0));
00601       if (d == weights.get_idxdim()) {
00602         cerr << "Warning: loading weights partly (the first " << d.dim(0)
00603              << " features) from " << weights << " instead of entire weights ("
00604              << kernel.x << ")." << endl;
00605         idx<T> slices = kernel.x.narrow(0, weights.dim(0), 0);
00606         idx_copy(weights, slices);
00607       } else
00608         eblthrow("expected same dimension weights but got " << kernel.x
00609                  << " and " << weights << " instead in " << this->name());
00610     } else
00611       idx_copy(weights, kernel.x);
00612   }
00613 
00614   template <typename T, class Tstate>
00615   std::string convolution_module<T, Tstate>::describe() {
00616     std::string desc;
00617     desc << "convolution module " << this->name() << " with " << kernel.x.dim(0)
00618          << " kernels with size " << ker << ", stride " << stride
00619          << " and table " << table << " (" << tablemax+1 << "->" << thickness
00620          << ")";
00621     return desc;
00622   }
00623 
00624   template <typename T, class Tstate>
00625   void convolution_module<T, Tstate>::dump_fprop(Tstate &in, Tstate &out) {
00626     fprop(in, out);
00627     DUMP(in.x, this->name() << "_convolution_module_in.x");
00628     DUMP(kernel.x, this->name() << "_convolution_module_ker.x");
00629     DUMP(table, this->name() << "_convolution_module_table");
00630     DUMP(out.x, this->name() << "_convolution_module_out.x");
00631   }
00632 
00634   // addc_module
00635 
00636   template <typename T, class Tstate>
00637   addc_module<T,Tstate>::addc_module(parameter<T,Tstate> *p, intg size,
00638                                      const char *name_)
00639     : module_1_1<T,Tstate>(name_), bias(p, size) {
00640   }
00641 
00642   template <typename T, class Tstate>
00643   addc_module<T,Tstate>::~addc_module() {
00644   }
00645 
00646   template <typename T, class Tstate>
00647   void addc_module<T,Tstate>::fprop(Tstate& in, Tstate& out) {
00648     if (&in != &out) { // resize only when input and output are different
00649       idxdim d(in.x.spec); // use same dimensions as in
00650       d.setdim(0, bias.x.dim(0)); // except for the first one
00651       this->resize_output(in, out, &d); // resize iff necessary
00652     }
00653     // add each bias to entire slices cut from the first dimension
00654     idx_bloop3(inx, in.x, T, biasx, bias.x, T, outx, out.x, T) {
00655       idx_addc(inx, biasx.get(), outx);
00656     }
00657   }
00658 
00659   template <typename T, class Tstate>
00660   void addc_module<T,Tstate>::bprop(Tstate& in, Tstate& out) {
00661     if (&in != &out) idx_checknelems2_all(in.dx, out.dx);
00662 
00663     idx_bloop3(indx, in.dx, T, biasdx, bias.dx, T,
00664                outdx, out.dx, T) {
00665       if (&in != &out) // only pass on info if necessary
00666         idx_add(outdx, indx, indx); // accumulate gradients to input
00667       idx_sumacc(outdx, biasdx); // accumulate gradients to weights
00668     }
00669   }
00670 
00671   template <typename T, class Tstate>
00672   void addc_module<T,Tstate>::bbprop(Tstate& in, Tstate& out) {
00673     if (&in != &out) idx_checknelems2_all(in.ddx, out.ddx);
00674 
00675     idx_bloop3(inddx, in.ddx, T, biasddx, bias.ddx, T,
00676                outddx, out.ddx, T) {
00677       if (&in != &out) // only pass on info if necessary
00678         idx_add(outddx, inddx, inddx); // accumulate 2nd gradients to input
00679       idx_sumacc(outddx, biasddx); // accumulate 2nd gradients to weights
00680     }
00681   }
00682 
00683   template <typename T, class Tstate>
00684   void addc_module<T,Tstate>::forget(forget_param_linear& fp) {
00685     idx_clear(bias.x);
00686   }
00687 
00688   template <typename T, class Tstate>
00689   addc_module<T,Tstate>* addc_module<T,Tstate>::copy(parameter<T,Tstate> *p) {
00690     // new module (with its own local parameter buffers)
00691     addc_module<T,Tstate> *l2 =
00692       new addc_module<T, Tstate>(p, bias.x.dim(0), this->name());
00693     // assign same parameter state if no parameters were specified
00694     if (!p) l2->bias = bias;
00695     return l2;
00696   }
00697 
00698   template <typename T, class Tstate>
00699   void addc_module<T, Tstate>::load_x(idx<T> &weights) {
00700     if (!bias.x.same_dim(weights)) {
00701       // if sizes are the same except for the feature size, load
00702       // into the corresponding slices with a warning
00703       // this allows to load grayscale pretrained weights only
00704       // in a grayscale + color net for example.
00705       idxdim d(bias.x);
00706       d.setdim(0, weights.dim(0));
00707       if (d == weights.get_idxdim()) {
00708         cerr << "Warning: loading weights partly (the first " << d.dim(0)
00709              << " features) from " << weights << " instead of entire weights ("
00710              << bias.x << ")." << endl;
00711         idx<T> slices = bias.x.narrow(0, weights.dim(0), 0);
00712         idx_copy(weights, slices);
00713       } else
00714       eblthrow("expected same dimension weights but got " << bias.x << " and "
00715                << weights << " instead in " << this->name());
00716     } else
00717       idx_copy(weights, bias.x);
00718   }
00719 
00720   template <typename T, class Tstate>
00721   std::string addc_module<T, Tstate>::describe() {
00722     std::string desc;
00723     desc << "bias module " << this->name() << " with "
00724          << bias.x.dim(0) << " biases";
00725     return desc;
00726   }
00727 
00728   template <typename T, class Tstate>
00729   void addc_module<T,Tstate>::dump_fprop(Tstate& in, Tstate& out) {
00730     fprop(in, out);
00731     DUMP(in.x, this->name() << "_addc_module_in.x");
00732     DUMP(bias.x, this->name() << "_addc_module_weights");
00733     DUMP(out.x, this->name() << "_addc_module_out.x");
00734   }
00735 
00737   // power_module
00738 
00739   template <typename T, class Tstate>
00740   power_module<T,Tstate>::power_module(T p_)
00741     : module_1_1<T,Tstate>("power"), p(p_) {
00742   }
00743 
00744   template <typename T, class Tstate>
00745   power_module<T,Tstate>::~power_module() {
00746   }
00747 
00748   template <typename T, class Tstate>
00749   void power_module<T,Tstate>::fprop(Tstate &in, Tstate &out) {
00750     this->resize_output(in, out); // resize iff necessary
00751 // #ifdef __TH__
00752 //     th_pow(in.x, out.x, p);
00753 // #else
00754     idx_power(in.x, p, out.x);
00755 //#endif
00756   }
00757 
00758   template <typename T, class Tstate>
00759   void power_module<T,Tstate>::bprop(Tstate &in, Tstate &out) {
00760     state_idx_check_different(in, out); // forbid same in and out
00761     idx_checknelems2_all(in.dx, out.dx); // must have same dimensions
00762 
00763     if (!tt.same_dim(in.x.get_idxdim())) { // resize temp buffer
00764       idxdim d(in.x);
00765       tt = idx<T>(d);
00766     }
00767     // compute derivative
00768 // #ifdef __TH__
00769 //     th_pow(in.x, tt, p - 1);
00770 // #else
00771     idx_power(in.x, p - 1, tt);
00772 //#endif
00773     idx_mul(out.dx, tt, tt); 
00774     idx_dotcacc(tt, p, in.dx);
00775   }
00776 
00777   template <typename T, class Tstate>
00778   void power_module<T,Tstate>::bbprop(Tstate &in, Tstate &out) {
00779     state_idx_check_different(in, out); // forbid same in and out
00780     idx_checknelems2_all(in.ddx, out.ddx); // must have same dimensions
00781 
00782     if (!tt.same_dim(in.x.get_idxdim())) { // resize temp buffer
00783       idxdim d(in.x);
00784       tt = idx<T>(d);
00785     }
00786     // compute 2nd derivative
00787 // #ifdef __TH__
00788 //     th_pow(in.x, tt, p - 1);
00789 // #else
00790     idx_power(in.x, p - 1, tt);
00791 //#endif
00792     idx_mul(tt, tt, tt); 
00793     idx_mul(out.ddx, tt, tt); 
00794     idx_dotcacc(tt, (p * p), in.ddx);
00795   }
00796 
00798   // diff_module
00799 
00800   template <typename T, class Tstate>
00801   diff_module<T,Tstate>::diff_module() : module_2_1<T,Tstate>("diff") {
00802   }
00803 
00804   template <typename T, class Tstate>
00805   diff_module<T,Tstate>::~diff_module() {
00806   }
00807 
00808   template <typename T, class Tstate>
00809   void diff_module<T,Tstate>::fprop(Tstate &in1, Tstate &in2, Tstate &out) {
00810     this->resize_output(in1, in2, out); // resize iff necessary
00811     idx_sub(in1.x, in2.x, out.x);
00812   }
00813 
00814   template <typename T, class Tstate>
00815   void diff_module<T,Tstate>::bprop(Tstate &in1, Tstate &in2, Tstate &out) {
00816     state_idx_check_different3(in1, in2, out); // forbid same in and out
00817     idx_checknelems3_all(in1.dx, in2.dx, out.dx);// must have same dimensions
00818 
00819     idx_add(out.dx, in1.dx); // derivative wrt in1
00820     idx_minus_acc(out.dx, in2.dx); // derivative wrt in2
00821   }
00822 
00823   template <typename T, class Tstate>
00824   void diff_module<T,Tstate>::bbprop(Tstate &in1, Tstate &in2, Tstate &out) {
00825     state_idx_check_different3(in1, in2, out); // forbid same in and out
00826     idx_checknelems3_all(in1.ddx, in2.ddx, out.ddx);// must have same dimensions
00827 
00828     idx_add(out.ddx, in1.ddx); // derivative wrt in1
00829     idx_add(out.ddx, in2.ddx); // derivative wrt in2
00830   }
00831 
00833   // mul_module
00834 
00835   template <typename T, class Tstate>
00836   mul_module<T,Tstate>::mul_module() : module_2_1<T,Tstate>("mul") {
00837   }
00838 
00839   template <typename T, class Tstate>
00840   mul_module<T,Tstate>::~mul_module() {
00841   }
00842 
00843   template <typename T, class Tstate>
00844   void mul_module<T,Tstate>::fprop(Tstate &in1, Tstate &in2, Tstate &out) {
00845     this->resize_output(in1, in2, out); // resize iff necessary
00846     idx_mul(in1.x, in2.x, out.x);
00847   }
00848 
00849   template <typename T, class Tstate>
00850   void mul_module<T,Tstate>::bprop(Tstate &in1, Tstate &in2, Tstate &out) {
00851     state_idx_check_different3(in1, in2, out); // forbid same in and out
00852     idx_checknelems3_all(in1.dx, in2.dx, out.dx);// must have same dimensions
00853 
00854     if (!tmp.same_dim(in1.x.get_idxdim())) { // resize temp buffer
00855       idxdim d(in1.x);
00856       tmp = idx<T>(d);
00857     }
00858     idx_mul(out.dx, in2.x, tmp);
00859     idx_add(tmp, in1.dx);
00860     idx_mul(out.dx, in1.x, tmp);
00861     idx_add(tmp, in2.dx);
00862   }
00863 
00864   template <typename T, class Tstate>
00865   void mul_module<T,Tstate>::bbprop(Tstate &in1, Tstate &in2, Tstate &out) {
00866     state_idx_check_different3(in1, in2, out); // forbid same in and out
00867     idx_checknelems3_all(in1.ddx, in2.ddx, out.ddx);// must have same dimensions
00868 
00869     if (!tmp.same_dim(in1.x.get_idxdim())) { // resize temp buffer
00870       idxdim d(in1.x);
00871       tmp = idx<T>(d);
00872     }
00873     idx_mul(in2.x, in2.x, tmp);
00874     idx_mul(out.ddx, tmp, tmp);
00875     idx_add(tmp, in1.ddx);
00876     idx_mul(in1.x, in1.x, tmp);
00877     idx_mul(out.ddx, tmp, tmp);
00878     idx_add(tmp, in2.ddx);
00879   }
00880 
00882   // thres_module
00883 
00884   template <typename T, class Tstate>
00885   thres_module<T,Tstate>::thres_module(T thres_, T val_)
00886     : module_1_1<T,Tstate>("thres"), thres(thres_), val(val_) {
00887   }
00888 
00889   template <typename T, class Tstate>
00890   thres_module<T,Tstate>::~thres_module() {
00891   }
00892 
00893   template <typename T, class Tstate>
00894   void thres_module<T,Tstate>::fprop(Tstate &in, Tstate &out) {
00895     this->resize_output(in, out); // resize iff necessary
00896     idx_aloop2(inx, in.x, T, outx, out.x, T) {
00897       if (*inx > thres)
00898         *outx = *inx;
00899       else
00900         *outx = val;
00901     }
00902   }
00903 
00904   template <typename T, class Tstate>
00905   void thres_module<T,Tstate>::bprop(Tstate &in, Tstate &out) {
00906     state_idx_check_different(in, out); // forbid same in and out
00907     idx_checknelems2_all(in.dx, out.dx); // must have same dimensions
00908 
00909     idx_aloop3(inx, in.x, T, indx, in.dx, T, outdx, out.dx, T) {
00910       if (*inx > thres)
00911         *indx += *outdx;
00912     }
00913   }
00914 
00915   template <typename T, class Tstate>
00916   void thres_module<T,Tstate>::bbprop(Tstate &in, Tstate &out) {
00917     state_idx_check_different(in, out); // forbid same in and out
00918     idx_checknelems2_all(in.ddx, out.ddx); // must have same dimensions
00919 
00920     idx_add(out.ddx, in.ddx);
00921   }
00922 
00924   // cutborder_module
00925 
00926   template <typename T, class Tstate>
00927   cutborder_module<T,Tstate>::cutborder_module(int nr_, int nc_)
00928     : module_1_1<T,Tstate>("cutborder"), nrow(nr_), ncol(nc_) {
00929   }
00930 
00931   template <typename T, class Tstate>
00932   cutborder_module<T,Tstate>::~cutborder_module() {
00933   }
00934 
00935   template <typename T, class Tstate>
00936   void cutborder_module<T,Tstate>::fprop(Tstate &in, Tstate &out) {
00937     intg inr = in.x.dim(1);
00938     intg inc = in.x.dim(2);
00939     intg outr = inr - 2 * nrow;
00940     intg outc = inc - 2 * ncol;
00941     idxdim d(in.x.dim(0), outr, outc);
00942     this->resize_output(in, out, &d); // resize iff necessary
00943     out.clear();
00944     idx<T> tmp = in.x.narrow(1, outr, nrow);
00945     tmp = tmp.narrow(2, outc, ncol);
00946     idx_copy(tmp, out.x);
00947   }
00948 
00949   template <typename T, class Tstate>
00950   void cutborder_module<T,Tstate>::bprop(Tstate &in,
00951                                          Tstate &out) {
00952     state_idx_check_different(in, out); // forbid same in and out
00953 
00954     intg inr = out.x.dim(1);
00955     intg inc = out.x.dim(2);
00956     idx<T> tmp = in.dx.narrow(1, inr, nrow);
00957     tmp = tmp.narrow(2, inc, ncol);
00958     idx_add(out.dx, tmp);
00959   }
00960 
00961   template <typename T, class Tstate>
00962   void cutborder_module<T,Tstate>::bbprop(Tstate &in, Tstate &out) {
00963     state_idx_check_different(in, out); // forbid same in and out
00964 
00965     intg inr = out.x.dim(1);
00966     intg inc = out.x.dim(2);
00967     idx<T> tmp = in.ddx.narrow(1, inr, nrow);
00968     tmp = tmp.narrow(2, inc, ncol);
00969     idx_add(out.ddx, tmp);
00970   }
00971 
00973   // zpad_module
00974 
00975   template <typename T, class Tstate>
00976   zpad_module<T,Tstate>::zpad_module(const char *name_)
00977     : module_1_1<T,Tstate>(name_) {
00978     set_paddings(0, 0, 0, 0);
00979   }
00980 
00981   template <typename T, class Tstate>
00982   zpad_module<T,Tstate>::zpad_module(int nr, int nc)
00983     : module_1_1<T,Tstate>("zpad") {
00984     set_paddings(nr, nc, nr, nc);
00985   }
00986 
00987   template <typename T, class Tstate>
00988   zpad_module<T,Tstate>::zpad_module(int top, int left, int bottom, int right)
00989     : module_1_1<T,Tstate>("zpad") {
00990     set_paddings(top, left, bottom, right);
00991   }
00992 
00993   template <typename T, class Tstate>
00994   zpad_module<T,Tstate>::zpad_module(idxdim &kerdims, const char *name_)
00995     : module_1_1<T,Tstate>(name_) {
00996     set_kernel(kerdims);
00997   }
00998 
00999   template <typename T, class Tstate>
01000   zpad_module<T,Tstate>::zpad_module(midxdim &kerdims, const char *name_)
01001     : module_1_1<T,Tstate>(name_) {
01002     set_kernels(kerdims);
01003   }
01004 
01005   template <typename T, class Tstate>
01006   zpad_module<T,Tstate>::~zpad_module() {
01007   }
01008 
01009   template <typename T, class Tstate>
01010   void zpad_module<T,Tstate>::fprop(mstate<Tstate> &in, mstate<Tstate> &out) {
01011     // check that in/out have at least 1 state and the same number of them.
01012     if (in.size() == 0) eblerror("input should have at least 1");
01013     out.resize(in);
01014     // run regular fprop on each states
01015     for (uint i = 0; i < in.size(); ++i) {
01016       Tstate &fin = in[i], &fout = out[i];
01017       if (i < pads.size()) pad = pads[i];
01018       fprop(fin, fout);
01019     }
01020     // remember number of input/outputs
01021     this->ninputs = in.size();
01022     this->noutputs = out.size();
01023   }
01024   
01025   template <typename T, class Tstate>
01026   void zpad_module<T,Tstate>::fprop(Tstate &in, Tstate &out) {
01027     int top = pad.dim(0), left = pad.dim(1), bottom = pad.dim(2),
01028       right = pad.dim(3);
01029     idx<T> input = in.x;
01030     idxdim d(input.dim(0),
01031              input.dim(1) + top + bottom,
01032              input.dim(2) + left + right);
01033 
01034     if (&in == &out) { // same buffers, use a temporary copy
01035       if (input.get_idxdim() != in.x.get_idxdim())
01036         input = idx<T>(in.x.get_idxdim());
01037       idx_copy(in.x, input); // only copy forward
01038     }
01039     this->resize_output(in, out, &d); // resize iff necessary
01040     out.clear();
01041     idx<T> tmp = out.x.narrow(1, input.dim(1), top);
01042     tmp = tmp.narrow(2, input.dim(2), left);
01043     idx_copy(input, tmp);
01044   }
01045 
01046   template <typename T, class Tstate>
01047   void zpad_module<T,Tstate>::fprop(Tstate &in, idx<T> &out) {
01048     int top = pad.dim(0), left = pad.dim(1), bottom = pad.dim(2),
01049       right = pad.dim(3);
01050     idx<T> input = in.x;
01051     idxdim d(input.dim(0),
01052              input.dim(1) + top + bottom,
01053              input.dim(2) + left + right);
01054     if (&in.x == &out) { // same buffers, use a temporary copy
01055       input = idx<T>(in.x.get_idxdim());
01056       idx_copy(in.x, input); // only copy forward
01057     }
01058     if (!out.same_dim(d)) // resize only when necessary
01059       out.resize(d);
01060     idx_clear(out);
01061     idx<T> tmp = out.narrow(1, input.dim(1), top);
01062     tmp = tmp.narrow(2, input.dim(2), left);
01063     idx_copy(input, tmp);
01064   }
01065 
01066   template <typename T, class Tstate>
01067   void zpad_module<T,Tstate>::fprop(idx<T> &in, idx<T> &out) {
01068     int top = pad.dim(0), left = pad.dim(1), bottom = pad.dim(2),
01069       right = pad.dim(3);
01070     idx<T> input = in;
01071     idxdim d(input.dim(0),
01072              input.dim(1) + top + bottom,
01073              input.dim(2) + left + right);
01074     if (&in == &out) { // same buffers, use a temporary copy
01075       input = idx<T>(in.get_idxdim());
01076       idx_copy(in, input); // only copy forward
01077     }
01078     if (!out.same_dim(d)) // resize only when necessary
01079       out.resize(d);
01080     idx_clear(out);
01081     idx<T> tmp = out.narrow(1, input.dim(1), top);
01082     tmp = tmp.narrow(2, input.dim(2), left);
01083     idx_copy(input, tmp);
01084   }
01085 
01086   template <typename T, class Tstate>
01087   void zpad_module<T,Tstate>::bprop(Tstate &in, Tstate &out) {
01088     int top = pad.dim(0), left = pad.dim(1), bottom = pad.dim(2),
01089       right = pad.dim(3);
01090     // if in and out are the same, we just want to crop the buffers
01091     // by the extra padding that was added by the fprop
01092     if (&in == &out) {
01093       // crop state
01094       Tstate tmp = in.narrow(1, out.x.dim(1) - top - bottom, top);
01095       tmp = tmp.narrow(2, out.x.dim(2) - left - right, left);
01096       in = tmp;
01097     } else { // different buffers, accumulate gradients to input
01098       idx<T> tmp = out.dx.narrow(1, in.x.dim(1), top);
01099       tmp = tmp.narrow(2, in.x.dim(2), left);
01100       idx_add(tmp, in.dx);
01101     }
01102   }
01103 
01104   template <typename T, class Tstate>
01105   void zpad_module<T,Tstate>::bbprop(Tstate &in, Tstate &out) {
01106     int top = pad.dim(0), left = pad.dim(1), bottom = pad.dim(2),
01107       right = pad.dim(3);
01108     // if in and out are the same, we just want to crop the buffers
01109     // by the extra padding that was added by the fprop
01110     if (&in == &out) {
01111       // crop state
01112       Tstate tmp = in.narrow(1, out.x.dim(1) - top - bottom, top);
01113       tmp = tmp.narrow(2, out.x.dim(2) - left - right, left);
01114       in = tmp;
01115     } else { // different buffers, accumulate gradients to input
01116       idx<T> tmp = out.ddx.narrow(1, in.x.dim(1), top);
01117       tmp = tmp.narrow(2, in.x.dim(2), left);
01118       idx_add(tmp, in.ddx);
01119     }
01120   }
01121 
01122   template <typename T, class Tstate>
01123   idxdim zpad_module<T,Tstate>::get_paddings() {
01124     return pad;
01125   }
01126 
01127   template <typename T, class Tstate>
01128   idxdim zpad_module<T,Tstate>::get_paddings(idxdim &ker) {
01129     int top = (int) (floor(ker.dim(0) / (float) 2.0));
01130     int left = (int) (floor(ker.dim(1) / (float) 2.0));
01131     int bottom = top;
01132     int right = left;
01133     // remove 1 pixel on right and bottom borders if even.
01134     if (ker.dim(0) % 2 == 0) bottom -= 1;
01135     if (ker.dim(1) % 2 == 0) right -= 1;
01136     return idxdim(top, left, bottom, right);
01137   }
01138 
01139   template <typename T, class Tstate>
01140   midxdim zpad_module<T,Tstate>::get_paddings(midxdim &kers) {
01141     midxdim p;
01142     for (uint i = 0; i < kers.size(); ++i) {
01143       idxdim &ker = kers[i];
01144       ker = get_paddings(ker);
01145       p.push_back_new(ker);
01146     }
01147     return p;
01148   }
01149 
01150   template <typename T, class Tstate>
01151   void zpad_module<T,Tstate>::set_paddings(int top, int left, int bottom,
01152                                            int right) {
01153     pads.clear();
01154     pad = idxdim(top, left, bottom, right);
01155     pads.push_back_new(pad);
01156   }
01157 
01158   template <typename T, class Tstate>
01159   void zpad_module<T,Tstate>::set_paddings(idxdim &pads_) {
01160     if (pads_.order() != 4)
01161       eblerror("expected a 4-dim idxdim but got: " << pads_);
01162     pads.clear();
01163     pads.push_back(pads_);
01164     pad = pads_;
01165   }
01166 
01167   template <typename T, class Tstate>
01168   void zpad_module<T,Tstate>::set_kernel(idxdim &ker) {
01169     if (ker.maxdim() == 0) {
01170       eblwarn("no padding for kernel " << ker);
01171       return ;
01172     }
01173     idxdim p = get_paddings(ker);
01174     set_paddings(p);
01175   }
01176 
01177   template <typename T, class Tstate>
01178   void zpad_module<T,Tstate>::set_kernels(midxdim &kers) {
01179     intg mx = 0;
01180     for (uint i = 0; i < kers.size(); ++i) {
01181       idxdim &k = kers[i];
01182       if (k.maxdim() > mx) mx = k.maxdim();
01183     }
01184     if (mx == 0) {
01185       eblwarn("no padding for kernels " << kers);
01186       return ;
01187     }
01188     pads = get_paddings(kers);
01189     pad = pads[0];
01190   }
01191 
01192   template <typename T, class Tstate>
01193   fidxdim zpad_module<T,Tstate>::fprop_size(fidxdim &isize) {
01194     int top = pad.dim(0), left = pad.dim(1), bottom = pad.dim(2),
01195       right = pad.dim(3);
01196     fidxdim osize = isize;
01197     osize.setdim(1, isize.dim(1) + top + bottom);
01198     osize.setdim(2, isize.dim(2) + left + right);
01200     isize = bprop_size(osize);
01201     return osize;
01202   }
01203 
01204   template <typename T, class Tstate>
01205   fidxdim zpad_module<T, Tstate>::bprop_size(const fidxdim &osize) {
01206     int top = pad.dim(0), left = pad.dim(1);
01207     fidxdim isize = osize;
01208     isize.setoffset(1, osize.offset(1) - top);
01209     isize.setoffset(2, osize.offset(2) - left);
01210     return isize;
01211   }
01212 
01213   template <typename T, class Tstate>
01214   mfidxdim zpad_module<T, Tstate>::fprop_size(mfidxdim &isize) {
01215     mfidxdim osize;
01216     for (uint i = 0; i < isize.size(); ++i) {
01217       if (i < pads.size()) pad = pads[i];
01218       if (isize.exists(i)) {
01219         fidxdim s = fprop_size(isize[i]);
01220         osize.push_back(s);
01221       } else osize.push_back_empty();
01222     }
01223     //EDEBUG(this->name() << ": " << isize << " f-> " << osize);
01224     return osize;
01225   }
01226 
01227   template <typename T, class Tstate>
01228   mfidxdim zpad_module<T, Tstate>::bprop_size(mfidxdim &osize) {
01229     mfidxdim isize;
01230     for (uint i = 0; i < osize.size(); ++i) {
01231       if (i < pads.size()) pad = pads[i];
01232       if (osize.exists(i)) {
01233         fidxdim s = bprop_size(osize[i]);
01234         isize.push_back(s);
01235       } else isize.push_back_empty();
01236     }
01237     //EDEBUG(this->name() << ": " << osize << " -> " << isize);
01238     return isize;
01239   }
01240 
01241   template <typename T, class Tstate>
01242   std::string zpad_module<T, Tstate>::describe() {
01243     std::string desc;
01244     desc << "zpad module " << this->name() << " is padding with: "
01245          << pads;
01246     return desc;
01247   }
01248 
01249   template <typename T, class Tstate>
01250   zpad_module<T,Tstate>* zpad_module<T,Tstate>::
01251   copy(parameter<T,Tstate> *p) {
01252     zpad_module<T,Tstate> *z = new zpad_module<T,Tstate>(this->name());
01253     z->pads = pads;
01254     return z;
01255   }
01256 
01258   // mirrorpad_module
01259 
01260   template <typename T, class Tstate>
01261   mirrorpad_module<T,Tstate>::mirrorpad_module(int nr, int nc)
01262     : zpad_module<T,Tstate>(nr, nc) {
01263     this->_name = "mirrorpad";
01264   }
01265 
01266   template <typename T, class Tstate>
01267   mirrorpad_module<T,Tstate>::mirrorpad_module(idxdim &kernel)
01268     : zpad_module<T,Tstate>(kernel) {
01269     this->_name = "mirrorpad";
01270   }
01271 
01272   template <typename T, class Tstate>
01273   mirrorpad_module<T,Tstate>::~mirrorpad_module() {
01274   }
01275 
01276   template <typename T, class Tstate>
01277   void mirrorpad_module<T,Tstate>::fprop(Tstate &in, Tstate &out) {
01278     int top = pad.dim(0), left = pad.dim(1), bottom = pad.dim(2),
01279       right = pad.dim(3);
01280     idx<T> input = in.x;
01281     idxdim d(input.dim(0), input.dim(1) + top + bottom,
01282              input.dim(2) + left + right);
01283     if (&in == &out) { // same buffers, use a temporary copy
01284       cout << "TEMPORARY COPY!!!!!!!!!!!!!" << endl;
01285       // FIXME
01286       input = idx<T>(input.get_idxdim());
01287       idx_copy(input, input); // only copy forward
01288     }
01289     this->resize_output(in, out, &d); // resize iff necessary
01290     idx<T> tmp, tmp2;
01291     int i;
01292     tmp = out.x.narrow(1, input.dim(1), top);
01293     tmp = tmp.narrow(2, input.dim(2), left);
01294     idx_copy(input, tmp);
01295     // mirror border left
01296     for (i = std::max(0, (int) (left - input.dim(1) / 2)); i < left; ++i) {
01297       tmp2 = input.narrow(1, 1, left - i - 1);
01298       tmp = out.x.narrow(1, 1, i);
01299       tmp = tmp.narrow(2, input.dim(2), left);
01300       idx_copy(tmp2, tmp);
01301     }
01302     // mirror border right
01303     for (i = std::max(0, (int) (right - input.dim(1) / 2)); i < right; ++i) {
01304       tmp2 = input.narrow(1, 1, input.dim(1) - right - 1 + i);
01305       tmp = out.x.narrow(1, 1, out.x.dim(1) - 1 - i);
01306       tmp = tmp.narrow(2, input.dim(2), right);
01307       idx_copy(tmp2, tmp);
01308     }
01309     // mirror border top using out as input
01310     for (i = std::max(0, (int) (top - input.dim(2) / 2)); i < top; ++i) {
01311       tmp2 = out.x.narrow(2, 1, top + top - i - 1);
01312       tmp = out.x.narrow(2, 1, i);
01313       idx_copy(tmp2, tmp);
01314     }
01315     // mirror border bottom using out as input
01316     for (i = std::max(0, (int) (bottom - input.dim(2) / 2)); i < bottom; ++i) {
01317       tmp2 = out.x.narrow(2, 1, out.x.dim(2) - bottom * 2 - 1 + i);
01318       tmp = out.x.narrow(2, 1, out.x.dim(2) - 1 - i);
01319       idx_copy(tmp2, tmp);
01320     }
01321   }
01322 
01323   template <typename T, class Tstate>
01324   void mirrorpad_module<T,Tstate>::fprop(Tstate &in, idx<T> &out) {
01325     eblerror("not implemented");
01326   }
01327 
01328   template <typename T, class Tstate>
01329   mirrorpad_module<T,Tstate>* mirrorpad_module<T,Tstate>::
01330   copy(parameter<T,Tstate> *p) {
01331     int top = pad.dim(0), left = pad.dim(1);
01332     return new mirrorpad_module<T,Tstate>(top, left);
01333   }
01334 
01335   // fsum_module ///////////////////////////////////////////////////////////////
01336 
01337   template <typename T, class Tstate>
01338   fsum_module<T,Tstate>::fsum_module(bool div_, float split_)
01339     : module_1_1<T,Tstate>("fsum"), div(div_), split(split_) {
01340   }
01341 
01342   template <typename T, class Tstate>
01343   fsum_module<T,Tstate>::~fsum_module() {
01344   }
01345 
01346   template <typename T, class Tstate>
01347   void fsum_module<T,Tstate>::fprop(Tstate &in, Tstate &out) {
01348     this->resize_output(in, out); // resize iff necessary
01349     T sum;
01350     uint i, ngroup, size = in.x.dim(0);
01351     uint nsplit = std::max((uint) 2, (uint) (in.x.dim(0) * split));
01352     idx<T> igroup, ogroup;
01353     idx_eloop2(inx2, in.x, T, outx2, out.x, T) {
01354       idx_eloop2(inx1, inx2, T, outx1, outx2, T) {
01355         if (split != 1.0) { // sum in groups
01356           for (i = 0; i < size; ) {
01357             ngroup = std::min(nsplit, size - i);
01358             igroup = inx1.narrow(0, ngroup, i);
01359             ogroup = outx1.narrow(0, ngroup, i);
01360             sum = idx_sum(igroup);
01361             if (div) sum = sum / igroup.nelements();
01362             idx_fill(ogroup, sum);
01363             i += ngroup;
01364           }
01365         } else { // no splitting
01366           sum = idx_sum(inx1);
01367           if (div) sum = sum / inx1.nelements();
01368           idx_fill(outx1, sum);
01369         }
01370       }
01371     }
01372   }
01373 
01374   template <typename T, class Tstate>
01375   void fsum_module<T,Tstate>::bprop(Tstate &in, Tstate &out) {
01376     state_idx_check_different(in, out); // forbid same in and out
01377     idx_checknelems2_all(in.dx, out.dx); // must have same dimensions
01378 
01379     T sum;
01380     uint i, ngroup, size = in.x.dim(0);
01381     uint nsplit = std::max((uint) 2, (uint) (in.x.dim(0) * split));
01382     idx<T> igroup, ogroup;
01383     idx_eloop2(indx2, in.dx, T, outdx2, out.dx, T) {
01384       idx_eloop2(indx1, indx2, T, outdx1, outdx2, T) {
01385         if (split != 1.0) { // sum in groups
01386           for (i = 0; i < size; ) {
01387             ngroup = std::min(nsplit, size - i);
01388             igroup = indx1.narrow(0, ngroup, i);
01389             ogroup = outdx1.narrow(0, ngroup, i);
01390             sum = idx_sum(ogroup);
01391             if (div) sum = sum / igroup.nelements();
01392             idx_addc(igroup, sum, igroup);
01393             i += ngroup;
01394           }
01395         } else { // no splitting
01396           sum = idx_sum(outdx1);
01397           if (div) sum = sum / indx1.nelements();
01398           idx_addc(indx1, sum, indx1);
01399         }
01400       }
01401     }
01402   }
01403 
01404   template <typename T, class Tstate>
01405   void fsum_module<T,Tstate>::bbprop(Tstate &in, Tstate &out) {
01406     state_idx_check_different(in, out); // forbid same in and out
01407     idx_checknelems2_all(in.ddx, out.ddx); // must have same dimensions
01408 
01409     T sum;
01410     uint i, ngroup, size = in.x.dim(0);
01411     uint nsplit = std::max((uint) 2, (uint) (in.x.dim(0) * split));
01412     idx<T> igroup, ogroup;
01413     idx_eloop2(inddx2, in.ddx, T, outddx2, out.ddx, T) {
01414       idx_eloop2(inddx1, inddx2, T, outddx1, outddx2, T) {
01415         if (split != 1) { // sum in groups
01416           for (i = 0; i < size; ) {
01417             ngroup = std::min(nsplit, size - i);
01418             igroup = inddx1.narrow(0, ngroup, i);
01419             ogroup = outddx1.narrow(0, ngroup, i);
01420             sum = idx_sum(ogroup);
01421             if (div) sum = sum / igroup.nelements();
01422             idx_addc(igroup, sum, igroup);
01423             i += ngroup;
01424           }
01425         } else { // no splitting
01426           sum = idx_sum(outddx1);
01427           if (div) sum = sum / inddx1.nelements();
01428           idx_addc(inddx1, sum, inddx1);
01429         }
01430       }
01431     }
01432   }
01433 
01435   // binarize_module
01436 
01437   template <typename T, class Tstate>
01438   binarize_module<T,Tstate>::binarize_module(T threshold_, T false_value_,
01439                                       T true_value_)
01440     : threshold(threshold_), false_value(false_value_), true_value(true_value_){
01441   }
01442 
01443   template <typename T, class Tstate>
01444   binarize_module<T,Tstate>::~binarize_module() {
01445   }
01446 
01447   template <typename T, class Tstate>
01448   void binarize_module<T,Tstate>::fprop(Tstate &in, Tstate &out) {
01449     this->resize_output(in, out); // resize out iff necessary
01450     idx_aloop2(inx, in.x, T, outx, out.x, T) {
01451       if (*inx > threshold)
01452         *outx = true_value;
01453       else
01454         *outx = false_value;
01455     }
01456   }
01457 
01459   // range_lut_module
01460 
01461   template <typename T, class Tstate>
01462   range_lut_module<T,Tstate>::range_lut_module(idx<T> *value_range_) {
01463     if (value_range_ == NULL)
01464       eblerror("expected non null range matrix");
01465     value_range = idx<T>(value_range_->get_idxdim());
01466     idx_copy(*value_range_, value_range);
01467   }
01468 
01469   template <typename T, class Tstate>
01470   range_lut_module<T,Tstate>::~range_lut_module() {
01471   }
01472 
01473   template <typename T, class Tstate>
01474   void range_lut_module<T,Tstate>::fprop(Tstate &in, Tstate &out) {
01475     this->resize_output(in, out); // resize out iff necessary
01476     idx_aloop2(inx, in.x, T, outx, out.x, T) {
01477       //      cout << "v0: " << *inx;
01478       idx_bloop1(vr, value_range, T) {
01479         if (*inx < vr.get(1)) {
01480           *outx = vr.get(0);
01481           break ;
01482         }
01483       }
01484       //      cout << " v1: " << *outx << endl;
01485     }
01486   }
01487 
01489   // diag_module
01490 
01491   template <typename T, class Tstate>
01492   diag_module<T, Tstate>::diag_module(parameter<T,Tstate> *p, intg thick,
01493                                       const char *name_)
01494     : module_1_1<T,Tstate>(name_), coeff(p, thick) {
01495     // initialize coeffs to 1
01496     idx_fill(coeff.x, (T)1.0);
01497   }
01498 
01499   template <typename T, class Tstate>
01500   diag_module<T, Tstate>::~diag_module() {
01501   }
01502 
01503   template <typename T, class Tstate>
01504   void diag_module<T, Tstate>::fprop(Tstate &in, Tstate &out) {
01505     this->resize_output(in, out); // resize out iff necessary
01506     idx_bloop3(c, coeff.x, T, i, in.x, T, o, out.x, T) {
01507       idx_dotc(i, c.get(), o);
01508     }
01509   }
01510 
01511   template <typename T, class Tstate>
01512   void diag_module<T, Tstate>::bprop(Tstate &in, Tstate &out) {
01513     idx_bloop5(c, coeff.x, T, cd, coeff.dx, T, i, in.x, T, id, in.dx, T,
01514                od, out.dx, T) {
01515       idx_dotcacc(od, c.get(), id); // bprop to input
01516       idx_dotacc(i, od, cd); // bprop to weights
01517     }
01518   }
01519 
01520   template <typename T, class Tstate>
01521   void diag_module<T, Tstate>::bbprop(Tstate &in, Tstate &out) {
01522     idx_bloop5(c, coeff.x, T, cdd, coeff.ddx, T, i, in.x, T, idd, in.ddx, T,
01523                odd, out.ddx, T) {
01524       idx_dotcacc(odd, c.get() * c.get(), idd); // bprop to input
01525       idx_m2squdotm2acc(i, odd, cdd); // bprop to weights
01526     }
01527   }
01528 
01529   template <typename T, class Tstate>
01530   bool diag_module<T, Tstate>::resize_output(Tstate &in, Tstate &out) {
01531     // resize output based on input dimensions
01532     idxdim d(in.x); // use same dimensions as in
01533     d.setdim(0, coeff.x.dim(0)); // except for the first one
01534     return module_1_1<T,Tstate>::resize_output(in, out, &d);
01535   }
01536 
01537   template <typename T, class Tstate>
01538   void diag_module<T, Tstate>::load_x(idx<T> &weights) {
01539     if (!coeff.x.same_dim(weights)) {
01540       // if sizes are the same except for the feature size, load
01541       // into the corresponding slices with a warning
01542       // this allows to load grayscale pretrained weights only
01543       // in a grayscale + color net for example.
01544       idxdim d(coeff.x);
01545       d.setdim(0, weights.dim(0));
01546       if (d == weights.get_idxdim()) {
01547         cerr << "Warning: loading weights partly (the first " << d.dim(0)
01548              << " features) from " << weights << " instead of entire weights ("
01549              << coeff.x << ")." << endl;
01550         idx<T> slices = coeff.x.narrow(0, weights.dim(0), 0);
01551         idx_copy(weights, slices);
01552       } else
01553         eblthrow("expected same dimension weights but got " << coeff.x <<
01554                  " and " << weights << " instead in " << this->name());
01555     } else
01556     idx_copy(weights, coeff.x);
01557   }
01558 
01559   template <typename T, class Tstate>
01560   std::string diag_module<T, Tstate>::describe() {
01561     std::string desc;
01562     desc << "diag module " << this->name() << " with "
01563          << coeff.x << " coefficients";
01564     return desc;
01565   }
01566 
01567   template <typename T, class Tstate>
01568   diag_module<T,Tstate>* diag_module<T,Tstate>::copy(parameter<T,Tstate> *p) {
01569     diag_module<T,Tstate>* d = new diag_module<T,Tstate>(p, coeff.x.dim(0));
01570     // assign same parameter state if no parameters were specified
01571     if (!p) d->coeff = coeff;
01572     return d;
01573   }
01574 
01576   // copy_module
01577 
01578   template <typename T, class Tstate>
01579   copy_module<T, Tstate>::copy_module(const char *name_)
01580     : module_1_1<T,Tstate>(name_) {
01581   }
01582 
01583   template <typename T, class Tstate>
01584   copy_module<T, Tstate>::~copy_module() {
01585   }
01586 
01587   template <typename T, class Tstate>
01588   void copy_module<T, Tstate>::fprop(Tstate &in, Tstate &out) {
01589     this->resize_output(in, out); // resize (iff necessary)
01590     idx_copy(in.x, out.x);
01591   }
01592 
01593   template <typename T, class Tstate>
01594   void copy_module<T, Tstate>::bprop(Tstate &in, Tstate &out) {
01595     idx_copy(out.dx, in.dx);
01596   }
01597 
01598   template <typename T, class Tstate>
01599   void copy_module<T, Tstate>::bbprop(Tstate &in, Tstate &out) {
01600     idx_copy(out.ddx, in.ddx);
01601   }
01602 
01603   template <typename T, class Tstate>
01604   std::string copy_module<T, Tstate>::describe() {
01605     std::string desc;
01606     desc << "copy module " << this->name();
01607     return desc;
01608   }
01609 
01611   // back_module
01612 
01613 #define BACK_MIN -10.0
01614 
01615   template <typename T, class Tstate>
01616   back_module<T, Tstate>::back_module(const char *name_)
01617     : module_1_1<T,Tstate>(name_), s0(NULL), s1(NULL), s2(NULL) {
01618   }
01619 
01620   template <typename T, class Tstate>
01621   back_module<T, Tstate>::~back_module() {
01622   }
01623 
01624   template <typename T, class Tstate>
01625   void back_module<T, Tstate>::fprop(Tstate &in, Tstate &out) {
01626     this->resize_output(in, out); // resize (iff necessary)
01627     // copy input to s0
01628     idx_copy(in.x, *s0);
01629     cout << "back: mins: so: " << idx_min(*s0) << " s1: " << idx_min(*s1) << " s2: "
01630          << idx_min(*s2) << endl;
01631     cout << "back: maxs: so: " << idx_max(*s0) << " s1: " << idx_max(*s1) << " s2: "
01632          << idx_max(*s2) << endl;
01633     // put max of all buffers in output
01634 //     idx_aloop3(x0, *s0, T, x1, *s1, T, o, out.x, T) {
01635 //       *o = std::max(*x0, *x1);
01636 //     }
01637     // put max of all buffers in output
01638     idx_aloop4(x0, *s0, T, x1, *s1, T, x2, *s2, T, o, out.x, T) {
01639       *o = std::max(*x0, std::max(*x1, *x2));
01640     }
01641   }
01642 
01643   template <typename T, class Tstate>
01644   void back_module<T, Tstate>::bb(std::vector<bbox*> &boxes) {
01645     cout << "back: " << boxes.size() << " boxes" << endl;
01646     // shift internal buffers and clear first one
01647     idx_copy(*s1, *s2);
01648     idx_fill(*s1, (T)BACK_MIN);
01649     // copy all boxes features to s1
01650     int height = s0->dim(1);
01651     int width = s0->dim(2);
01652     for (uint i = 0; i < boxes.size(); ++i) {
01653       bbox &b = *(boxes[i]);
01654       // find box's location at this stage
01655       float rho = b.o.h0 / (float) b.oheight;
01656       float rwo = b.o.w0 / (float) b.owidth;
01657       int h0 = (int) (height * rho);
01658       int w0 = (int) (width * rwo);
01659       int h = pixel_size.dim(1);
01660       int w = pixel_size.dim(2);
01661       // cut bbox if outside of buffers
01662       if (h0 < 0) { h -= h0; h0 = 0; }
01663       if (w0 < 0) { w -= w0; w0 = 0; }
01664       if (h0 + h > height) h -= h0 + h - height;
01665       if (w0 + w > width) w -= w0 + w - width;
01666       // max-copy box features from s0 to s1
01667       idx<T> b1 = s1->narrow(1, h, h0);
01668       b1 = b1.narrow(2, w, w0);
01669       idx<T> b0 = s0->narrow(1, h, h0);
01670       b0 = b0.narrow(2, w, w0);
01671       idx_max(b0, b1);
01672     }
01673     // shift buffers for horizontal motion
01674     int wshift = (int) (.02 * width);
01675     cout << "back: shift by " << wshift << " pixels (width: "
01676          << width << ")" << endl;
01677     idx<T> tmp(s1->get_idxdim());
01678     idx_fill(tmp, (T)BACK_MIN);
01679     idx<T> shifted = tmp.narrow(2, width - wshift, wshift);
01680     idx<T> original = s1->narrow(2, width - wshift, 0);
01681     idx_copy(original, shifted);
01682     idx_copy(tmp, *s1);
01683     // shift s2
01684     idx_fill(tmp, (T)BACK_MIN);
01685     shifted = tmp.narrow(2, width - wshift, wshift);
01686     original = s2->narrow(2, width - wshift, 0);
01687     idx_copy(original, shifted);
01688     idx_copy(tmp, *s2);
01689     // decay buffers
01690     //    idx_addc(*s1, (T) -0.2, *s1);
01691     //    idx_addc(*s2, (T) -0.2, *s2);
01692   }
01693 
01694   template <typename T, class Tstate>
01695   bool back_module<T, Tstate>::resize_output(Tstate &in, Tstate &out) {
01696     TIMING_RESIZING_ACCSTART(); // start accumulating resizing time
01697     // resize output based on input dimensions
01698     idxdim d(in.x); // use same dimensions as in
01699     if (out.x.get_idxdim() != d) { // resize only if necessary
01700       EDEBUG(this->name() << ": resizing output from " << out.x << " to " << d);
01701       out.resize(d);
01702     }
01703     if (!s0 || s0->get_idxdim() != d) {
01704       cout << "back: resizing internal buffers to " << d << endl;
01705       if (s0) s0->resize(d); else s0 = new idx<T>(d);
01706       if (s1) s1->resize(d); else s1 = new idx<T>(d);
01707       if (s2) s2->resize(d); else s2 = new idx<T>(d);
01708       idx_fill(*s0, (T)BACK_MIN);
01709       idx_fill(*s1, (T)BACK_MIN);
01710       idx_fill(*s2, (T)BACK_MIN);
01711     }
01712     TIMING_RESIZING_ACCSTOP(); // stop accumulating resizing time
01713     return true;
01714   }
01715 
01716   template <typename T, class Tstate>
01717   fidxdim back_module<T, Tstate>::bprop_size(const fidxdim &osize) {
01718     pixel_size = osize;
01719     cout << "back_module: 1 output pixel corresponds here to " << pixel_size
01720          << endl;
01721     return osize;
01722   }
01723 
01724   template <typename T, class Tstate>
01725   std::string back_module<T, Tstate>::describe() {
01726     std::string desc;
01727     desc << "back module " << this->name();
01728     return desc;
01729   }
01730 
01732   // printer_module
01733 
01734   template <typename T, class Tstate>
01735   printer_module<T,Tstate>::printer_module(const char* name_)
01736     : module_1_1<T,Tstate>(name_) {
01737   }
01738 
01739   template <typename T, class Tstate>
01740   printer_module<T,Tstate>::~printer_module() {
01741   }
01742 
01743   template <typename T, class Tstate>
01744   void printer_module<T,Tstate>::fprop(Tstate &in, Tstate &out) {
01745     T max = idx_max(in.x);
01746     T min = idx_min(in.x);
01747     cout << "min: "<< min << "\tmax: "<< max <<endl;
01748     idx_copy(in.x, out.x);
01749   }
01750 
01751   template <typename T, class Tstate>
01752   void printer_module<T,Tstate>::bprop(Tstate &in,
01753                                          Tstate &out) {
01754     T max = idx_max(out.dx);
01755     T min = idx_min(out.dx);
01756     cout << "min: "<< min << "\tmax: "<< max <<endl;
01757     idx_copy(out.dx, in.dx);
01758   }
01759 
01760   template <typename T, class Tstate>
01761   void printer_module<T,Tstate>::bbprop(Tstate &in, Tstate &out) {
01762     T max = idx_max(out.ddx);
01763     T min = idx_min(out.ddx);
01764     cout << "min: "<< min << "\tmax: "<< max <<endl;
01765     idx_copy(out.ddx, in.ddx);
01766   }
01767 
01768 
01769 } // end namespace ebl