libeblearn
|
00001 /*************************************************************************** 00002 * Copyright (C) 2008 by Yann LeCun and Pierre Sermanet * 00003 * yann@cs.nyu.edu, pierre.sermanet@gmail.com * 00004 * 00005 * Redistribution and use in source and binary forms, with or without 00006 * modification, are permitted provided that the following conditions are met: 00007 * * Redistributions of source code must retain the above copyright 00008 * notice, this list of conditions and the following disclaimer. 00009 * * Redistributions in binary form must reproduce the above copyright 00010 * notice, this list of conditions and the following disclaimer in the 00011 * documentation and/or other materials provided with the distribution. 00012 * * Redistribution under a license not approved by the Open Source 00013 * Initiative (http://www.opensource.org) must display the 00014 * following acknowledgement in all advertising material: 00015 * This product includes software developed at the Courant 00016 * Institute of Mathematical Sciences (http://cims.nyu.edu). 00017 * * The names of the authors may not be used to endorse or promote products 00018 * derived from this software without specific prior written permission. 00019 * 00020 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED 00021 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 00022 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 00023 * DISCLAIMED. IN NO EVENT SHALL ThE AUTHORS BE LIABLE FOR ANY 00024 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 00025 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 00026 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 00027 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00028 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00029 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00030 ***************************************************************************/ 00031 00032 namespace ebl { 00033 00035 // full_layer 00036 00037 template <typename T, class Tstate> 00038 full_layer<T,Tstate>::full_layer(parameter<T,Tstate> *p, intg indim0, 00039 intg noutputs, bool btanh_, 00040 const char *name_) 00041 : module_1_1<T,Tstate>(name_), btanh(btanh_), 00042 linear(p, indim0, noutputs, name_), 00043 adder(p, noutputs, name_), 00044 sigmoid(btanh_ ? (module_1_1<T,Tstate>*) new tanh_module<T,Tstate>() 00045 : (module_1_1<T,Tstate>*) new stdsigmoid_module<T,Tstate>()) { 00046 // the order of sum is not yet known and this is just an internal buffer 00047 // that does not need to be save in the parameter, so we allocate it later 00048 sum = NULL; 00049 this->_name = name_; 00050 } 00051 00052 template <typename T, class Tstate> 00053 full_layer<T,Tstate>::~full_layer() { 00054 if (sum) delete sum; 00055 if (sigmoid) delete sigmoid; 00056 } 00057 00058 template <typename T, class Tstate> 00059 void full_layer<T,Tstate>::fprop(Tstate &in, Tstate &out) { 00060 // resize output and sum 00061 idxdim d(in.x.spec); // use same dimensions as in 00062 d.setdim(0, adder.bias.x.dim(0)); // except for the first one 00063 if (!sum) sum = new Tstate(d); // we now know the order of sum 00064 00065 // fprop 00066 linear.fprop(in, *sum); 00067 adder.fprop(*sum, *sum); 00068 sigmoid->fprop(*sum, out); 00069 } 00070 00071 template <typename T, class Tstate> 00072 void full_layer<T,Tstate>::bprop(Tstate &in, Tstate &out) { 00073 idx_clear(sum->dx); 00074 sigmoid->bprop(*sum, out); 00075 adder.bprop(*sum, *sum); 00076 linear.bprop(in, *sum); 00077 } 00078 00079 template <typename T, class Tstate> 00080 void full_layer<T,Tstate>::bbprop(Tstate &in, Tstate &out) { 00081 idx_clear(sum->ddx); 00082 sigmoid->bbprop(*sum, out); 00083 adder.bbprop(*sum, *sum); 00084 linear.bbprop(in, *sum); 00085 } 00086 00087 template <typename T, class Tstate> 00088 void full_layer<T,Tstate>::forget(forget_param_linear &fp) { 00089 linear.forget(fp); 00090 adder.forget(fp); 00091 } 00092 00093 template <typename T, class Tstate> 00094 fidxdim full_layer<T,Tstate>::fprop_size(fidxdim &isize) { 00095 return linear.fprop_size(isize); 00096 } 00097 00098 template <typename T, class Tstate> 00099 idxdim full_layer<T,Tstate>::bprop_size(const idxdim &osize) { 00100 return linear.bprop_size(osize); 00101 } 00102 00103 template <typename T, class Tstate> 00104 full_layer<T,Tstate>* full_layer<T,Tstate>::copy() { 00105 // allocate 00106 full_layer<T,Tstate>* l2 = 00107 new full_layer<T,Tstate>(NULL, linear.w.x.dim(1), linear.w.x.dim(0), 00108 btanh); 00109 // copy data 00110 idx_copy(linear.w.x, l2->linear.w.x); 00111 idx_copy(adder.bias.x, l2->adder.bias.x); 00112 return l2; 00113 } 00114 00115 template <typename T, class Tstate> 00116 std::string full_layer<T,Tstate>::describe() { 00117 std::string s; 00118 s << "fully connected layer " << this->name() << " composed of a linear " 00119 << "module: " << linear.describe() << ", a bias module: " 00120 << adder.describe(); 00121 if (sigmoid) s << ", and a sigmoid module: " << sigmoid->describe(); 00122 return s; 00123 } 00124 00126 // convolution_layer 00127 00128 template <typename T, class Tstate> 00129 convolution_layer<T,Tstate>:: 00130 convolution_layer(parameter<T,Tstate> *p, idxdim &ker, idxdim &stride, 00131 idx<intg> &tbl, bool btanh_, const char *name_) 00132 : module_1_1<T,Tstate>(name_), btanh(btanh_), 00133 convol(p, ker, stride, tbl, name_), 00134 adder(p, convol.thickness, name_), 00135 sigmoid(btanh_ ? (module_1_1<T,Tstate>*) new tanh_module<T,Tstate>() 00136 : (module_1_1<T,Tstate>*) new stdsigmoid_module<T,Tstate>()) { 00137 sum = NULL; 00138 this->_name = name_; 00139 } 00140 00141 template <typename T, class Tstate> 00142 convolution_layer<T,Tstate>::~convolution_layer() { 00143 if (sum) delete sum; 00144 if (sigmoid) delete sigmoid; 00145 } 00146 00147 template <typename T, class Tstate> 00148 void convolution_layer<T,Tstate>::fprop(Tstate &in, Tstate &out) { 00149 // 1. allocate sum 00150 idxdim d(in.x.spec); // use same dimensions as in 00151 if (!sum) sum = new Tstate(d); 00152 00153 // 2. fprop 00154 // sum->clear(); 00155 convol.fprop(in, *sum); 00156 adder.fprop(*sum, *sum); 00157 sigmoid->fprop(*sum, out); 00158 } 00159 00160 template <typename T, class Tstate> 00161 void convolution_layer<T,Tstate>::bprop(Tstate &in, Tstate &out) { 00162 idx_clear(sum->dx); 00163 sigmoid->bprop(*sum, out); 00164 adder.bprop(*sum, *sum); 00165 convol.bprop(in, *sum); 00166 } 00167 00168 template <typename T, class Tstate> 00169 void convolution_layer<T,Tstate>::bbprop(Tstate &in, Tstate &out) { 00170 idx_clear(sum->ddx); 00171 sigmoid->bbprop(*sum, out); 00172 adder.bbprop(*sum, *sum); 00173 convol.bbprop(in, *sum); 00174 } 00175 00176 template <typename T, class Tstate> 00177 void convolution_layer<T,Tstate>::forget(forget_param_linear &fp) { 00178 convol.forget(fp); 00179 adder.forget(fp); 00180 } 00181 00182 template <typename T, class Tstate> 00183 fidxdim convolution_layer<T,Tstate>::fprop_size(fidxdim &isize) { 00184 return convol.fprop_size(isize); 00185 } 00186 00187 template <typename T, class Tstate> 00188 fidxdim convolution_layer<T,Tstate>::bprop_size(const fidxdim &osize) { 00189 return convol.bprop_size(osize); 00190 } 00191 00192 template <typename T, class Tstate> 00193 convolution_layer<T,Tstate>* convolution_layer<T,Tstate>::copy() { 00194 // allocate 00195 convolution_layer<T,Tstate> *l2 = new convolution_layer<T,Tstate> 00196 (NULL, convol.ker, convol.stride, convol.table, btanh); 00197 // copy data 00198 idx_copy(convol.kernel.x, l2->convol.kernel.x); 00199 idx_copy(adder.bias.x, l2->adder.bias.x); 00200 return l2; 00201 } 00202 00204 // convabsnorm_layer 00205 00206 template <typename T, class Tstate> 00207 convabsnorm_layer<T,Tstate>::convabsnorm_layer(parameter<T,Tstate> *p, 00208 intg kerneli, intg kernelj, 00209 intg stridei_, intg stridej_, 00210 idx<intg> &tbl, bool mirror, bool btanh_, 00211 const char *name_) 00212 : module_1_1<T,Tstate>(name_), btanh(btanh_), 00213 lconv(p, kerneli, kernelj, stridei_, stridej_, tbl, btanh_, name_), 00214 abs(), norm(kerneli, kernelj, lconv.convol.thickness, mirror), 00215 tmp(NULL), tmp2(NULL) { 00216 this->_name = name_; 00217 } 00218 00219 template <typename T, class Tstate> 00220 convabsnorm_layer<T,Tstate>::~convabsnorm_layer() { 00221 if (tmp) delete tmp; 00222 if (tmp2) delete tmp2; 00223 } 00224 00225 template <typename T, class Tstate> 00226 void convabsnorm_layer<T,Tstate>::fprop(Tstate &in, Tstate &out) { 00227 // 1. resize tmp 00228 idxdim d(in.x.spec); // use same dimensions as in 00229 d.setdim(0, lconv.convol.thickness); // except for the first one 00230 if (!tmp) tmp = new Tstate(d); 00231 if (!tmp2) tmp2 = new Tstate(d); 00232 00233 // 2. fprop 00234 // tmp->clear(); 00235 // tmp2->clear(); 00236 lconv.fprop(in, *tmp); 00237 abs.fprop(*tmp, *tmp2); 00238 norm.fprop(*tmp2, out); 00239 } 00240 00241 template <typename T, class Tstate> 00242 void convabsnorm_layer<T,Tstate>::bprop(Tstate &in, Tstate &out) { 00243 idx_clear(tmp->dx); 00244 idx_clear(tmp2->dx); 00245 norm.bprop(*tmp2, out); 00246 abs.bprop(*tmp, *tmp2); 00247 lconv.bprop(in, *tmp); 00248 } 00249 00250 template <typename T, class Tstate> 00251 void convabsnorm_layer<T,Tstate>::bbprop(Tstate &in, Tstate &out) { 00252 idx_clear(tmp->ddx); 00253 idx_clear(tmp2->ddx); 00254 norm.bbprop(*tmp2, out); 00255 abs.bbprop(*tmp, *tmp2); 00256 lconv.bbprop(in, *tmp); 00257 } 00258 00259 template <typename T, class Tstate> 00260 void convabsnorm_layer<T,Tstate>::forget(forget_param_linear &fp) { 00261 lconv.forget(fp); 00262 } 00263 00264 template <typename T, class Tstate> 00265 fidxdim convabsnorm_layer<T,Tstate>::fprop_size(fidxdim &isize) { 00266 return lconv.fprop_size(isize); 00267 } 00268 00269 template <typename T, class Tstate> 00270 fidxdim convabsnorm_layer<T,Tstate>::bprop_size(const fidxdim &osize) { 00271 return lconv.bprop_size(osize); 00272 } 00273 00274 template <typename T, class Tstate> 00275 convabsnorm_layer<T,Tstate>* convabsnorm_layer<T,Tstate>::copy() { 00276 // allocate 00277 convabsnorm_layer<T,Tstate> *l2 = new convabsnorm_layer<T,Tstate> 00278 (NULL, lconv.convol.ker, lconv.convol.stride, lconv.convol.table, 00279 norm.mirror, btanh); 00280 // copy data 00281 idx_copy(lconv.convol.kernel.x, l2->lconv.convol.kernel.x); 00282 idx_copy(lconv.adder.bias.x, l2->lconv.adder.bias.x); 00283 return l2; 00284 } 00285 00287 // subsampling_layer 00288 00289 template <typename T, class Tstate> 00290 subsampling_layer<T,Tstate>:: 00291 subsampling_layer(parameter<T,Tstate> *p, uint thickness, idxdim &kernel, 00292 idxdim &stride, bool btanh_, const char *name_) 00293 : module_1_1<T,Tstate>(name_), btanh(btanh_), 00294 subsampler(p, thickness, kernel, stride, name_), 00295 adder(p, thickness, name_), 00296 sigmoid(btanh_ ? (module_1_1<T,Tstate>*) new tanh_module<T,Tstate>() 00297 : (module_1_1<T,Tstate>*) new stdsigmoid_module<T,Tstate>()) { 00298 sum = NULL; 00299 this->_name = name_; 00300 } 00301 00302 template <typename T, class Tstate> 00303 subsampling_layer<T,Tstate>::~subsampling_layer() { 00304 if (sum) delete sum; 00305 if (sigmoid) delete sigmoid; 00306 } 00307 00308 template <typename T, class Tstate> 00309 void subsampling_layer<T,Tstate>::fprop(Tstate &in, Tstate &out) { 00310 // 1. resize sum 00311 idxdim d(in.x.spec); // use same dimensions as in 00312 d.setdim(0, subsampler.thickness); // except for the first one 00313 if (!sum) sum = new Tstate(d); 00314 00315 // 2. fprop 00316 subsampler.fprop(in, *sum); 00317 adder.fprop(*sum, *sum); 00318 sigmoid->fprop(*sum, out); 00319 } 00320 00321 template <typename T, class Tstate> 00322 void subsampling_layer<T,Tstate>::bprop(Tstate &in, Tstate &out) { 00323 idx_clear(sum->dx); 00324 sigmoid->bprop(*sum, out); 00325 adder.bprop(*sum, *sum); 00326 subsampler.bprop(in, *sum); 00327 } 00328 00329 template <typename T, class Tstate> 00330 void subsampling_layer<T,Tstate>::bbprop(Tstate &in, Tstate &out) { 00331 idx_clear(sum->ddx); 00332 sigmoid->bbprop(*sum, out); 00333 adder.bbprop(*sum, *sum); 00334 subsampler.bbprop(in, *sum); 00335 } 00336 00337 template <typename T, class Tstate> 00338 void subsampling_layer<T,Tstate>::forget(forget_param_linear &fp) { 00339 subsampler.forget(fp); 00340 adder.forget(fp); 00341 } 00342 00343 template <typename T, class Tstate> 00344 fidxdim subsampling_layer<T,Tstate>::fprop_size(fidxdim &isize) { 00345 return subsampler.fprop_size(isize); 00346 } 00347 00348 template <typename T, class Tstate> 00349 fidxdim subsampling_layer<T,Tstate>::bprop_size(const fidxdim &osize) { 00350 return subsampler.bprop_size(osize); 00351 } 00352 00353 template <typename T, class Tstate> 00354 subsampling_layer<T,Tstate>* subsampling_layer<T,Tstate>::copy() { 00355 // allocate 00356 subsampling_layer<T,Tstate> *l2 = 00357 new subsampling_layer<T,Tstate>(NULL, subsampler.thickness, 00358 subsampler.kernel, subsampler.stride, 00359 btanh); 00360 // copy data 00361 idx_copy(subsampler.coeff.x, l2->subsampler.coeff.x); 00362 idx_copy(adder.bias.x, l2->adder.bias.x); 00363 return l2; 00364 } 00365 00366 template <typename T, class Tstate> 00367 std::string subsampling_layer<T, Tstate>::describe() { 00368 std::string desc; 00369 desc << "subsampling layer " << this->name() << " with thickness " 00370 << subsampler.thickness << ", kernel " 00371 << subsampler.kernel << ", stride " << subsampler.stride 00372 << ", bias " << adder.bias.x 00373 << " and non linearity " << sigmoid->name(); 00374 return desc; 00375 } 00376 00377 } // end namespace ebl