libeblearn
/home/rex/ebltrunk/core/libeblearn/include/ebl_layers.hpp
00001 /***************************************************************************
00002  *   Copyright (C) 2008 by Yann LeCun and Pierre Sermanet *
00003  *   yann@cs.nyu.edu, pierre.sermanet@gmail.com *
00004  *
00005  * Redistribution and use in source and binary forms, with or without
00006  * modification, are permitted provided that the following conditions are met:
00007  *     * Redistributions of source code must retain the above copyright
00008  *       notice, this list of conditions and the following disclaimer.
00009  *     * Redistributions in binary form must reproduce the above copyright
00010  *       notice, this list of conditions and the following disclaimer in the
00011  *       documentation and/or other materials provided with the distribution.
00012  *     * Redistribution under a license not approved by the Open Source
00013  *       Initiative (http://www.opensource.org) must display the
00014  *       following acknowledgement in all advertising material:
00015  *        This product includes software developed at the Courant
00016  *        Institute of Mathematical Sciences (http://cims.nyu.edu).
00017  *     * The names of the authors may not be used to endorse or promote products
00018  *       derived from this software without specific prior written permission.
00019  *
00020  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
00021  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
00022  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
00023  * DISCLAIMED. IN NO EVENT SHALL ThE AUTHORS BE LIABLE FOR ANY
00024  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
00025  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
00026  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
00027  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00028  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00029  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00030  ***************************************************************************/
00031 
00032 namespace ebl {
00033 
00035   // full_layer
00036 
00037   template <typename T, class Tstate>
00038   full_layer<T,Tstate>::full_layer(parameter<T,Tstate> *p, intg indim0,
00039                                    intg noutputs, bool btanh_,
00040                                    const char *name_)
00041     : module_1_1<T,Tstate>(name_), btanh(btanh_),
00042       linear(p, indim0, noutputs, name_),
00043       adder(p, noutputs, name_),
00044       sigmoid(btanh_ ? (module_1_1<T,Tstate>*) new tanh_module<T,Tstate>()
00045               : (module_1_1<T,Tstate>*) new stdsigmoid_module<T,Tstate>()) {
00046     // the order of sum is not yet known and this is just an internal buffer
00047     // that does not need to be save in the parameter, so we allocate it later
00048     sum = NULL; 
00049     this->_name = name_;
00050   }
00051 
00052   template <typename T, class Tstate>
00053   full_layer<T,Tstate>::~full_layer() {
00054     if (sum) delete sum;
00055     if (sigmoid) delete sigmoid;
00056   }
00057 
00058   template <typename T, class Tstate>
00059   void full_layer<T,Tstate>::fprop(Tstate &in, Tstate &out) {
00060     // resize output and sum
00061     idxdim d(in.x.spec); // use same dimensions as in
00062     d.setdim(0, adder.bias.x.dim(0)); // except for the first one
00063     if (!sum) sum = new Tstate(d); // we now know the order of sum
00064 
00065     // fprop
00066     linear.fprop(in, *sum);
00067     adder.fprop(*sum, *sum);
00068     sigmoid->fprop(*sum, out);
00069   }
00070 
00071   template <typename T, class Tstate>
00072   void full_layer<T,Tstate>::bprop(Tstate &in, Tstate &out) {
00073     idx_clear(sum->dx);
00074     sigmoid->bprop(*sum, out);
00075     adder.bprop(*sum, *sum);
00076     linear.bprop(in, *sum);
00077   }
00078 
00079   template <typename T, class Tstate>
00080   void full_layer<T,Tstate>::bbprop(Tstate &in, Tstate &out) {
00081     idx_clear(sum->ddx);
00082     sigmoid->bbprop(*sum, out);
00083     adder.bbprop(*sum, *sum);
00084     linear.bbprop(in, *sum);
00085   }
00086 
00087   template <typename T, class Tstate>
00088   void full_layer<T,Tstate>::forget(forget_param_linear &fp) {
00089     linear.forget(fp);
00090     adder.forget(fp);
00091   }
00092 
00093   template <typename T, class Tstate>
00094   fidxdim full_layer<T,Tstate>::fprop_size(fidxdim &isize) {
00095     return linear.fprop_size(isize);
00096   }
00097 
00098   template <typename T, class Tstate>
00099   idxdim full_layer<T,Tstate>::bprop_size(const idxdim &osize) {
00100     return linear.bprop_size(osize);
00101   }
00102 
00103   template <typename T, class Tstate>
00104   full_layer<T,Tstate>* full_layer<T,Tstate>::copy() {
00105     // allocate
00106     full_layer<T,Tstate>* l2 =
00107       new full_layer<T,Tstate>(NULL, linear.w.x.dim(1), linear.w.x.dim(0),
00108                                btanh);
00109     // copy data
00110     idx_copy(linear.w.x, l2->linear.w.x);
00111     idx_copy(adder.bias.x, l2->adder.bias.x);
00112     return l2;
00113   }
00114 
00115   template <typename T, class Tstate>
00116   std::string full_layer<T,Tstate>::describe() {
00117     std::string s;
00118     s << "fully connected layer " << this->name() << " composed of a linear "
00119       << "module: " << linear.describe() << ", a bias module: "
00120       << adder.describe();
00121     if (sigmoid) s << ", and a sigmoid module: " << sigmoid->describe();
00122     return s;
00123   }
00124     
00126   // convolution_layer
00127 
00128   template <typename T, class Tstate>
00129   convolution_layer<T,Tstate>::
00130   convolution_layer(parameter<T,Tstate> *p, idxdim &ker, idxdim &stride,
00131                     idx<intg> &tbl, bool btanh_, const char *name_) 
00132     : module_1_1<T,Tstate>(name_), btanh(btanh_),
00133       convol(p, ker, stride, tbl, name_), 
00134       adder(p, convol.thickness, name_),
00135       sigmoid(btanh_ ? (module_1_1<T,Tstate>*) new tanh_module<T,Tstate>()
00136               : (module_1_1<T,Tstate>*) new stdsigmoid_module<T,Tstate>()) {
00137     sum = NULL;
00138     this->_name = name_;
00139   }
00140 
00141   template <typename T, class Tstate>
00142   convolution_layer<T,Tstate>::~convolution_layer() {
00143     if (sum) delete sum;
00144     if (sigmoid) delete sigmoid;
00145   }
00146 
00147   template <typename T, class Tstate>
00148   void convolution_layer<T,Tstate>::fprop(Tstate &in, Tstate &out) {
00149     // 1. allocate sum
00150     idxdim d(in.x.spec); // use same dimensions as in
00151     if (!sum) sum = new Tstate(d);
00152 
00153     // 2. fprop
00154     //    sum->clear();
00155     convol.fprop(in, *sum);
00156     adder.fprop(*sum, *sum);
00157     sigmoid->fprop(*sum, out);
00158   }
00159 
00160   template <typename T, class Tstate>
00161   void convolution_layer<T,Tstate>::bprop(Tstate &in, Tstate &out) {
00162     idx_clear(sum->dx);
00163     sigmoid->bprop(*sum, out);
00164     adder.bprop(*sum, *sum);
00165     convol.bprop(in, *sum);
00166   }
00167 
00168   template <typename T, class Tstate>
00169   void convolution_layer<T,Tstate>::bbprop(Tstate &in, Tstate &out) {
00170     idx_clear(sum->ddx);
00171     sigmoid->bbprop(*sum, out);
00172     adder.bbprop(*sum, *sum);
00173     convol.bbprop(in, *sum);
00174   }
00175 
00176   template <typename T, class Tstate>
00177   void convolution_layer<T,Tstate>::forget(forget_param_linear &fp) {
00178     convol.forget(fp);
00179     adder.forget(fp);
00180   }
00181 
00182   template <typename T, class Tstate>
00183   fidxdim convolution_layer<T,Tstate>::fprop_size(fidxdim &isize) {
00184     return convol.fprop_size(isize);
00185   }
00186 
00187   template <typename T, class Tstate>
00188   fidxdim convolution_layer<T,Tstate>::bprop_size(const fidxdim &osize) {
00189     return convol.bprop_size(osize);
00190   }
00191 
00192   template <typename T, class Tstate>
00193   convolution_layer<T,Tstate>* convolution_layer<T,Tstate>::copy() {
00194     // allocate
00195     convolution_layer<T,Tstate> *l2 = new convolution_layer<T,Tstate>
00196       (NULL, convol.ker, convol.stride, convol.table, btanh);
00197     // copy data
00198     idx_copy(convol.kernel.x, l2->convol.kernel.x);
00199     idx_copy(adder.bias.x, l2->adder.bias.x);
00200     return l2;
00201   }
00202 
00204   // convabsnorm_layer
00205 
00206   template <typename T, class Tstate>
00207   convabsnorm_layer<T,Tstate>::convabsnorm_layer(parameter<T,Tstate> *p, 
00208                                                  intg kerneli, intg kernelj,
00209                                                  intg stridei_, intg stridej_,
00210                                                  idx<intg> &tbl, bool mirror, bool btanh_,
00211                                                  const char *name_) 
00212     : module_1_1<T,Tstate>(name_), btanh(btanh_),
00213       lconv(p, kerneli, kernelj, stridei_, stridej_, tbl, btanh_, name_),
00214       abs(), norm(kerneli, kernelj, lconv.convol.thickness, mirror),
00215       tmp(NULL), tmp2(NULL) {
00216     this->_name = name_;
00217   }
00218 
00219   template <typename T, class Tstate>
00220   convabsnorm_layer<T,Tstate>::~convabsnorm_layer() {
00221     if (tmp) delete tmp;
00222     if (tmp2) delete tmp2;
00223   }
00224 
00225   template <typename T, class Tstate>
00226   void convabsnorm_layer<T,Tstate>::fprop(Tstate &in, Tstate &out) {
00227     // 1. resize tmp
00228     idxdim d(in.x.spec); // use same dimensions as in
00229     d.setdim(0, lconv.convol.thickness); // except for the first one
00230     if (!tmp) tmp = new Tstate(d);
00231     if (!tmp2) tmp2 = new Tstate(d);
00232 
00233     // 2. fprop
00234     // tmp->clear();
00235     // tmp2->clear();
00236     lconv.fprop(in, *tmp);
00237     abs.fprop(*tmp, *tmp2);
00238     norm.fprop(*tmp2, out);
00239   }
00240 
00241   template <typename T, class Tstate>
00242   void convabsnorm_layer<T,Tstate>::bprop(Tstate &in, Tstate &out) {
00243     idx_clear(tmp->dx);
00244     idx_clear(tmp2->dx);
00245     norm.bprop(*tmp2, out);
00246     abs.bprop(*tmp, *tmp2);
00247     lconv.bprop(in, *tmp);
00248   }
00249 
00250   template <typename T, class Tstate>
00251   void convabsnorm_layer<T,Tstate>::bbprop(Tstate &in, Tstate &out) {
00252     idx_clear(tmp->ddx);
00253     idx_clear(tmp2->ddx);
00254     norm.bbprop(*tmp2, out);
00255     abs.bbprop(*tmp, *tmp2);
00256     lconv.bbprop(in, *tmp);
00257   }
00258 
00259   template <typename T, class Tstate>
00260   void convabsnorm_layer<T,Tstate>::forget(forget_param_linear &fp) {
00261     lconv.forget(fp);
00262   }
00263 
00264   template <typename T, class Tstate>
00265   fidxdim convabsnorm_layer<T,Tstate>::fprop_size(fidxdim &isize) {
00266     return lconv.fprop_size(isize);
00267   }
00268 
00269   template <typename T, class Tstate>
00270   fidxdim convabsnorm_layer<T,Tstate>::bprop_size(const fidxdim &osize) {
00271     return lconv.bprop_size(osize);
00272   }
00273 
00274   template <typename T, class Tstate>
00275   convabsnorm_layer<T,Tstate>* convabsnorm_layer<T,Tstate>::copy() {
00276     // allocate
00277     convabsnorm_layer<T,Tstate> *l2 = new convabsnorm_layer<T,Tstate>
00278       (NULL, lconv.convol.ker, lconv.convol.stride, lconv.convol.table,
00279        norm.mirror, btanh);
00280     // copy data
00281     idx_copy(lconv.convol.kernel.x, l2->lconv.convol.kernel.x);
00282     idx_copy(lconv.adder.bias.x, l2->lconv.adder.bias.x);
00283     return l2;
00284   }
00285 
00287   // subsampling_layer
00288 
00289   template <typename T, class Tstate>
00290   subsampling_layer<T,Tstate>::
00291   subsampling_layer(parameter<T,Tstate> *p, uint thickness, idxdim &kernel,
00292                     idxdim &stride, bool btanh_, const char *name_)
00293     : module_1_1<T,Tstate>(name_), btanh(btanh_),
00294       subsampler(p, thickness, kernel, stride, name_),
00295       adder(p, thickness, name_),
00296       sigmoid(btanh_ ? (module_1_1<T,Tstate>*) new tanh_module<T,Tstate>()
00297               : (module_1_1<T,Tstate>*) new stdsigmoid_module<T,Tstate>()) {
00298     sum = NULL;
00299     this->_name = name_;
00300   }
00301 
00302   template <typename T, class Tstate>
00303   subsampling_layer<T,Tstate>::~subsampling_layer() {
00304     if (sum) delete sum;
00305     if (sigmoid) delete sigmoid;
00306   }
00307 
00308   template <typename T, class Tstate>
00309   void subsampling_layer<T,Tstate>::fprop(Tstate &in, Tstate &out) {
00310     // 1. resize sum
00311     idxdim d(in.x.spec); // use same dimensions as in
00312     d.setdim(0, subsampler.thickness); // except for the first one
00313     if (!sum) sum = new Tstate(d);
00314 
00315     // 2. fprop
00316     subsampler.fprop(in, *sum);
00317     adder.fprop(*sum, *sum);
00318     sigmoid->fprop(*sum, out);
00319   }
00320 
00321   template <typename T, class Tstate>
00322   void subsampling_layer<T,Tstate>::bprop(Tstate &in, Tstate &out) {
00323     idx_clear(sum->dx);
00324     sigmoid->bprop(*sum, out);
00325     adder.bprop(*sum, *sum);
00326     subsampler.bprop(in, *sum);
00327   }
00328 
00329   template <typename T, class Tstate>
00330   void subsampling_layer<T,Tstate>::bbprop(Tstate &in, Tstate &out) {
00331     idx_clear(sum->ddx);
00332     sigmoid->bbprop(*sum, out);
00333     adder.bbprop(*sum, *sum);
00334     subsampler.bbprop(in, *sum);
00335   }
00336 
00337   template <typename T, class Tstate>
00338   void subsampling_layer<T,Tstate>::forget(forget_param_linear &fp) {
00339     subsampler.forget(fp);
00340     adder.forget(fp);
00341   }
00342 
00343   template <typename T, class Tstate>
00344   fidxdim subsampling_layer<T,Tstate>::fprop_size(fidxdim &isize) {
00345     return subsampler.fprop_size(isize);
00346   }
00347 
00348   template <typename T, class Tstate>
00349   fidxdim subsampling_layer<T,Tstate>::bprop_size(const fidxdim &osize) {
00350     return subsampler.bprop_size(osize);
00351   }
00352 
00353   template <typename T, class Tstate>
00354   subsampling_layer<T,Tstate>* subsampling_layer<T,Tstate>::copy() {
00355     // allocate
00356     subsampling_layer<T,Tstate> *l2 =
00357       new subsampling_layer<T,Tstate>(NULL, subsampler.thickness,
00358                                       subsampler.kernel, subsampler.stride,
00359                                       btanh);
00360     // copy data
00361     idx_copy(subsampler.coeff.x, l2->subsampler.coeff.x);
00362     idx_copy(adder.bias.x, l2->adder.bias.x);
00363     return l2;
00364   }
00365 
00366   template <typename T, class Tstate>
00367   std::string subsampling_layer<T, Tstate>::describe() {
00368     std::string desc;
00369     desc << "subsampling layer " << this->name() << " with thickness "
00370          << subsampler.thickness << ", kernel "
00371          << subsampler.kernel << ", stride " << subsampler.stride
00372          << ", bias " << adder.bias.x
00373          << " and non linearity " << sigmoid->name();
00374     return desc;
00375   }
00376   
00377 } // end namespace ebl