documentation/libeblearn_html/ebl__answer_8hpp_source.html

00001 /***************************************************************************
00002  *   Copyright (C) 2011 by Pierre Sermanet   *
00003  *   pierre.sermanet@gmail.com   *
00004  *   All rights reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions are met:
00008  *     * Redistributions of source code must retain the above copyright
00009  *       notice, this list of conditions and the following disclaimer.
00010  *     * Redistributions in binary form must reproduce the above copyright
00011  *       notice, this list of conditions and the following disclaimer in the
00012  *       documentation and/or other materials provided with the distribution.
00013  *     * Redistribution under a license not approved by the Open Source
00014  *       Initiative (http://www.opensource.org) must display the
00015  *       following acknowledgement in all advertising material:
00016  *        This product includes software developed at the Courant
00017  *        Institute of Mathematical Sciences (http://cims.nyu.edu).
00018  *     * The names of the authors may not be used to endorse or promote products
00019  *       derived from this software without specific prior written permission.
00020  *
00021  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
00022  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
00023  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
00024  * DISCLAIMED. IN NO EVENT SHALL ThE AUTHORS BE LIABLE FOR ANY
00025  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
00026  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
00027  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
00028  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00029  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00030  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00031  ***************************************************************************/
00032
00033 #ifndef EBL_ANSWER_HPP_
00034 #define EBL_ANSWER_HPP_
00035
00036 using namespace std;
00037
00038 namespace ebl {
00039
00041   // answer_module
00042
00043   template <typename T, typename Tds1, typename Tds2, class Tstate>
00044   answer_module<T,Tds1,Tds2,Tstate>::answer_module(uint nfeatures_,
00045                                                    const char *name_)
00046     : module_1_1<T,Tstate>(name_), nfeatures(nfeatures_) {
00047   }
00048
00049   template <typename T, typename Tds1, typename Tds2, class Tstate>
00050   answer_module<T,Tds1,Tds2,Tstate>::~answer_module() {
00051   }
00052
00053   template <typename T, typename Tds1, typename Tds2, class Tstate>
00054   void answer_module<T,Tds1,Tds2,Tstate>::fprop(Tstate &in, Tstate &out) {
00055     eblerror("not implemented");
00056   }
00057
00058   // single-state propagation //////////////////////////////////////////////////
00059
00060   template <typename T, typename Tds1, typename Tds2, class Tstate>
00061   void answer_module<T,Tds1,Tds2,Tstate>::
00062   fprop(labeled_datasource<T,Tds1,Tds2> &ds, Tstate &out) {
00063     eblerror("not implemented");
00064   }
00065
00066   template <typename T, typename Tds1, typename Tds2, class Tstate>
00067   void answer_module<T,Tds1,Tds2,Tstate>::
00068   bprop(labeled_datasource<T,Tds1,Tds2> &ds, Tstate &out) {
00069     // empty bprop by default
00070   }
00071
00072   template <typename T, typename Tds1, typename Tds2, class Tstate>
00073   void answer_module<T,Tds1,Tds2,Tstate>::
00074   bbprop(labeled_datasource<T,Tds1,Tds2> &ds, Tstate &out) {
00075     // empty bbprop by default
00076   }
00077
00078   // multi-state propagation ///////////////////////////////////////////////////
00079
00080   template <typename T, typename Tds1, typename Tds2, class Tstate>
00081   void answer_module<T,Tds1,Tds2,Tstate>::
00082   fprop(labeled_datasource<T,Tds1,Tds2> &ds, mstate<Tstate> &out) {
00083     eblerror("not implemented");
00084   }
00085
00086   template <typename T, typename Tds1, typename Tds2, class Tstate>
00087   void answer_module<T,Tds1,Tds2,Tstate>::
00088   bprop(labeled_datasource<T,Tds1,Tds2> &ds, mstate<Tstate> &out) {
00089     // empty bprop by default
00090   }
00091
00092   template <typename T, typename Tds1, typename Tds2, class Tstate>
00093   void answer_module<T,Tds1,Tds2,Tstate>::
00094   bbprop(labeled_datasource<T,Tds1,Tds2> &ds, mstate<Tstate> &out) {
00095     // empty bbprop by default
00096   }
00097
00099
00100   template <typename T, typename Tds1, typename Tds2, class Tstate>
00101   bool answer_module<T,Tds1,Tds2,Tstate>::
00102   correct(Tstate &answer, Tstate &label) {
00103     eblerror("not implemented");
00104         return false;
00105   }
00106
00107   template <typename T, typename Tds1, typename Tds2, class Tstate>
00108   void answer_module<T,Tds1,Tds2,Tstate>::
00109   update_log(classifier_meter &log, intg age, idx<T> &energy, idx<T> &answer,
00110              idx<T> &label, idx<T> &target, idx<T> &rawout) {
00111     eblerror("not implemented");
00112   }
00113
00114   template <typename T, typename Tds1, typename Tds2, class Tstate>
00115   void answer_module<T,Tds1,Tds2,Tstate>::forget(forget_param_linear &fp) {
00116   }
00117
00118   template <typename T, typename Tds1, typename Tds2, class Tstate>
00119   std::string answer_module<T,Tds1,Tds2,Tstate>::describe() {
00120     eblerror("not implemented");
00121         string s;
00122         return s;
00123   }
00124
00125   template <typename T, typename Tds1, typename Tds2, class Tstate>
00126   uint answer_module<T,Tds1,Tds2,Tstate>::get_nfeatures() {
00127     return nfeatures;
00128   }
00129
00131   // class_answer
00132
00133   template <typename T, typename Tds1, typename Tds2, class Tstate>
00134   class_answer<T,Tds1,Tds2,Tstate>::
00135   class_answer(uint nclasses, double target_factor, bool binary_target_,
00136                t_confidence conf, bool apply_tanh_, const char *name_,
00137                int force)
00138     : answer_module<T,Tds1,Tds2,Tstate>(binary_target_?1:nclasses, name_),
00139       conf_type(conf), binary_target(binary_target_), resize_output(true),
00140       apply_tanh(apply_tanh_), tmp(1,1,1), force_class(force) {
00141     // create 1-of-n targets with target 1.0 for shown class, -1.0 for the rest
00142     targets = create_target_matrix<T>(nclasses, (T)1.0);
00143     // binary target
00144     if (binary_target) {
00145       if (nclasses != 2)
00146         eblerror("expecting 2 classes only when binary_target is on");
00147       targets = idx<T>(2, 1, 1);
00148       // int neg_id = ds.get_class_id("bg"); // negative class
00149       // if (neg_id == 0) {
00150       //        targets.set(-1.0, 0, 0); // negative: -1.0
00151       //        targets.set( 1.0, 1, 0); // positive:  1.0
00152       // } else {
00153       targets.sset((T) 1.0, 0); // positive:  1.0
00154       targets.sset((T)-1.0, 1); // negative: -1.0
00155       // }
00156     }
00157     // target factor
00158     idx_dotc(targets, target_factor, targets);
00159     print_targets(targets);
00160     // set min/max of target
00161     target_min = idx_min(targets);
00162     target_max = idx_max(targets);
00163     // set confidence parameters
00164     T max_dist;
00165     switch (conf_type) {
00166     case confidence_sqrdist:
00167       max_dist = target_max - target_min;
00168       conf_ratio = targets.dim(0) * max_dist * max_dist;
00169       // shift value to be subtracted before dividing by conf_ratio
00170       conf_shift = target_min;
00171       cout << "Using sqrdist confidence formula with normalization ratio "
00172            << conf_ratio << " and shift value " << conf_shift << endl;
00173       break ;
00174     case confidence_single:
00175       conf_ratio = target_max - target_min;
00176       // shift value to be subtracted before dividing by conf_ratio
00177       conf_shift = target_min;
00178       cout << "Using single output confidence with normalization ratio "
00179            << conf_ratio << " and shift value " << conf_shift << endl;
00180       break ;
00181     case confidence_max:
00182       conf_ratio = target_max - target_min;
00183       conf_shift = 0; // no shift needed, the difference min is 0.
00184       cout << "Using max confidence formula with normalization ratio "
00185            << conf_ratio << endl;
00186       break ;
00187     default:
00188       eblerror("confidence type " << conf_type << " undefined");
00189     }
00190   }
00191
00192   template <typename T, typename Tds1, typename Tds2, class Tstate>
00193   class_answer<T,Tds1,Tds2,Tstate>::~class_answer() {
00194   }
00195
00196   template <typename T, typename Tds1, typename Tds2, class Tstate>
00197   void class_answer<T,Tds1,Tds2,Tstate>::fprop(Tstate &in, Tstate &out) {
00198     // resize out if necessary
00199     idxdim d(in.x);
00200     d.setdim(0, 2); // 2 outputs per pixel: class,confidence
00201     idx<T> outx = out.x;
00202     idx<T> inx = in.x;
00203     if (resize_output) {
00204       if (d != out.x.get_idxdim()) {
00205         out.resize(d);
00206         outx = out.x;
00207       }
00208     } else { // if not resizing, narrow to the number of targets
00209       if (outx.dim(0) != targets.dim(0))
00210         outx = outx.narrow(0, targets.dim(0), 0);
00211     }
00212     // apply tanh if required
00213     if (apply_tanh) {
00214       mtanh.fprop(in, tmp);
00215       inx = tmp.x;
00216     }
00217     // loop on features (dimension 0) to set class and confidence
00218     int classid;
00219     T conf, max2;
00220     idx_1loop2(ii, inx, T, oo, outx, T, {
00221         if (binary_target) {
00222           T t0 = targets.gget(0);
00223           T t1 = targets.gget(1);
00224           T a = ii.gget();
00225           if (fabs((double) a - t0) < fabs((double) a - t1)) {
00226             oo.set((T) 0, 0); // class 0
00227             oo.set((T) (2 - fabs((double) a - t0)) / 2, 1); // conf
00228           } else {
00229             oo.set((T) 1, 0); // class 1
00230             oo.set((T) (2 - fabs((double) a - t1)) / 2, 1); // conf
00231           }
00232         }
00233         else { // 1-of-n target
00234           // set class answer
00235           if (force_class >= 0) classid = force_class;
00236           else classid = idx_indexmax(ii);
00237           oo.set((T) classid, 0);
00238           // set confidence
00239           intg p;
00240           bool ini = false;
00241           switch (conf_type) {
00242           case confidence_sqrdist: // squared distance to target
00243             target = targets.select(0, classid);
00244             conf = (T) (1.0 - ((idx_sqrdist(target, ii) - conf_shift)
00245                                / conf_ratio));
00246             oo.set(conf, 1);
00247             break ;
00248           case confidence_single: // simply return class' out (normalized)
00249             conf = (T) ((ii.get(classid) - conf_shift) / conf_ratio);
00250             oo.set(conf, 1);
00251             break ;
00252           case confidence_max: // distance with 2nd max answer
00253             conf = std::max(target_min, std::min(target_max, ii.get(classid)));
00254             for (p = 0; p < ii.dim(0); ++p) {
00255               if (p != classid) {
00256                 if (!ini) {
00257                   max2 = ii.get(p);
00258                   ini = true;
00259                 } else {
00260                   if (ii.get(p) > max2)
00261                     max2 = ii.get(p);
00262                 }
00263               }
00264             }
00265             max2 = std::max(target_min, std::min(target_max, max2));
00266             oo.set((T) ((conf - max2) / conf_ratio), 1);
00267             break ;
00268           default:
00269             eblerror("confidence type " << conf_type << " undefined");
00270           }
00271         }
00272       });
00273     EDEBUG(this->name() << ": in " << in << " (in.x min " << idx_min(in.x)
00274           << " max " << idx_max(in.x) << ") out " << out << " (out.x min "
00275           << idx_min(out.x) << " max " << idx_max(out.x) << ")");
00276 #ifdef __DEBUG__
00277     idx<T> ldec = out.x.select(0, 0);
00278     EDEBUG(this->name() << ": class min " << idx_min(ldec)
00279           << " max " << idx_max(ldec));
00280     idx<T> lconf = out.x.select(0, 1);
00281     EDEBUG(this->name() << ": confidence min " << idx_min(lconf)
00282           << " max " << idx_max(lconf));
00283 #endif
00284   }
00285
00286   template <typename T, typename Tds1, typename Tds2, class Tstate>
00287   void class_answer<T,Tds1,Tds2,Tstate>::
00288   fprop(labeled_datasource<T,Tds1,Tds2> &ds, Tstate &out) {
00289     // get label, i.e. input 2
00290     ds.fprop_label(last_label);
00291     // select the target given the class id
00292     idx<T> target = targets.select(0, (int) last_label.x.get());
00293     // resize out if necessary
00294     idx<T> outx = out.x;
00295     if (resize_output) {
00296       idxdim d(ds.sample_dims()), dt(target.get_idxdim());
00297       d.setdims(1);
00298       for (uint i = 0; i < dt.order(); ++i)
00299         d.setdim(i, dt.dim(i));
00300       if (out.x.get_idxdim() != d) {
00301         if (out.x.order() != d.order())
00302           out = Tstate(d); // re-allocating
00303         else
00304           out.resize(d); // just resizing
00305         outx = out.x;
00306       }
00307     } else { // if not resizing, narrow to the number of targets
00308       if (outx.dim(0) != target.dim(0))
00309         outx = outx.narrow(0, target.dim(0), 0);
00310     }
00311     // copy target to output
00312     idx_copy(target, outx);
00313   }
00314
00315   template <typename T, typename Tds1, typename Tds2, class Tstate>
00316   bool class_answer<T,Tds1,Tds2,Tstate>::
00317   correct(Tstate &answer, Tstate &label) {
00318     return (answer.x.gget(0) == label.x.gget());
00319   }
00320
00321   template <typename T, typename Tds1, typename Tds2, class Tstate>
00322   void class_answer<T,Tds1,Tds2,Tstate>::
00323   update_log(classifier_meter &log, intg age, idx<T> &energy, idx<T> &answer,
00324              idx<T> &label, idx<T> &target, idx<T> &rawout) {
00325     log.update(age, (uint) label.gget(0), (uint) answer.gget(0),
00326                (double) energy.gget());
00327   }
00328
00329   template <typename T, typename Tds1, typename Tds2, class Tstate>
00330   std::string class_answer<T,Tds1,Tds2,Tstate>::describe() {
00331     std::string s;
00332     s << "class_answer module " << this->name() << " with " << targets.dim(0)
00333       << " classes, confidence type " << (int) conf_type << " and targets "
00334       << targets;
00335     if (apply_tanh)
00336       s << ", a tanh is applied to inputs";
00337     s << ". ";
00338     print_targets(targets);
00339     return s;
00340   }
00341
00343   // scalerclass_answer
00344
00345   template <typename T, typename Tds1, typename Tds2, class Tstate>
00346   scalerclass_answer<T,Tds1,Tds2,Tstate>::
00347   scalerclass_answer(uint nclasses, double target_factor, bool binary_target,
00348                      t_confidence conf, bool apply_tanh_, uint jsize_,
00349                      uint joffset_, float mgauss, bool predict_conf_,
00350                      bool predict_bconf_, idx<T> *biases_,
00351                      idx<T> *coeffs_, const char *name_)
00352     : class_answer<T,Tds1,Tds2,Tstate>(nclasses, target_factor, binary_target,
00353                                        conf, apply_tanh_, name_),
00354       jitter(1, 1), out_class(1), jsize(jsize_), joffset(joffset_),
00355       scale_mgauss(mgauss), predict_conf(predict_conf_),
00356       predict_bconf(predict_bconf_), pconf_offset(0), biases(NULL),
00357       coeffs(NULL) {
00358     resize_output = false;
00359     this->nfeatures += jsize;
00360     if (predict_conf) {
00361       pconf_offset = this->nfeatures;
00362       this->nfeatures++;
00363     }
00364     // initialize variables to log names
00365     log_fields.push_back("spatial");
00366     log_fields.push_back("scale");
00367     log_fields.push_back("localization");
00368     log_fields.push_back("confidence");
00369     log_fields.push_back("localization_total");
00370     log_fields.push_back("confidence_total");
00371     // coeffs & biases
00372     if (biases_) biases = new idx<T>(*biases_);
00373     if (coeffs_) coeffs = new idx<T>(*coeffs_);
00374   }
00375
00376   template <typename T, typename Tds1, typename Tds2, class Tstate>
00377   scalerclass_answer<T,Tds1,Tds2,Tstate>::~scalerclass_answer() {
00378     if (biases) delete biases;
00379     if (coeffs) delete coeffs;
00380   }
00381
00382   template <typename T, typename Tds1, typename Tds2, class Tstate>
00383   void scalerclass_answer<T,Tds1,Tds2,Tstate>::fprop(Tstate &in, Tstate &out) {
00384     // only works for 3d bufs, TODO: make this generic
00385     idx_checkorder2(in.x, 3, out.x, 3);
00386     // resize out if necessary
00387     idxdim d(in.x);
00388     // when jsize = 3, 5 outputs per pixel: class,confidence,scale,h,w
00389     d.setdim(0, 2 + jsize);
00390     if (d != out.x.get_idxdim())
00391       out.resize(d);
00392     // narrow for regular class extraction
00393     tmp1 = in.narrow(0, targets.dim(1), 0);
00394     tmp2 = out.narrow(0, 2, 0); // class,confidence
00395     // fprop class,confidence
00396     class_answer<T,Tds1,Tds2,Tstate>::fprop(tmp1, tmp2);
00397     // copy jitter outputs
00398     idx<T> i = in.x.narrow(0, jsize, targets.dim(1));
00399     idx<T> o = out.x.narrow(0, jsize, 2);
00400     idx_copy(i, o);
00401     // un-normalize jitter outputs
00402     if (coeffs) {
00403       idx<T> tmpcoeff = coeffs->narrow(0, jsize, 0);
00404       idx_bloop2(tc, tmpcoeff, T, oo, o, T) {
00405         idx_dotc(oo, 1 / tc.get(), oo); }
00406     }
00407     if (biases) {
00408       idx<T> tmpbias = biases->narrow(0, jsize, 0);
00409       idx_bloop2(tb, tmpbias, T, oo, o, T) {
00410         idx_addc(oo, - tb.get(), oo); }
00411     }
00412     // if conf is predicted, replace class conf by prediction
00413     if (predict_conf) {
00414       i = in.x.narrow(0, 1, pconf_offset);
00415       o = out.x.narrow(0, 1, 1);
00416       idx_copy(i, o);
00417       // un-normalize conf outputs
00418       if (coeffs) {
00419         idx<T> tmpcoeff = coeffs->narrow(0, 1, jsize);
00420         idx_bloop2(tc, tmpcoeff, T, oo, o, T) {
00421           idx_dotc(oo, 1 / tc.get(), oo); }
00422       }
00423       if (biases) {
00424         idx<T> tmpbias = biases->narrow(0, 1, jsize);
00425         idx_bloop2(tb, tmpbias, T, oo, o, T) {
00426           idx_addc(oo, - tb.get(), oo); }
00427       }
00428       // cap conf prediction by 0 and 1
00429       idx_threshold(o, (T)0); // cap below by 0
00430       idx_threshold2(o, (T)1); // cap above by 1
00431     }
00432     // // modulate confidence with scale and spatial jitter
00433     // idx_eloop1(outx, out.x, T) {
00434     //   idx_eloop1(o, outx, T) {
00435     //  T c = o.gget(1);
00436     //  T s = o.gget(2);
00437     //  T h = o.gget(3);
00438     //  T w = o.gget(4);
00439     //  c *= 1 - (.5 * (fabs(s - 1) + sqrt(h * h + w * w)));
00440     //  o.sset(c, 1);
00441     //   }
00442     // }
00443   }
00444
00445   template <typename T, typename Tds1, typename Tds2, class Tstate>
00446   void scalerclass_answer<T,Tds1,Tds2,Tstate>::
00447   fprop(labeled_datasource<T,Tds1,Tds2> &ds, Tstate &out) {
00448     // if out has the wrong order, allocate.
00449     if (out.x.order() != ds.sample_dims().order()) {
00450       idxdim d = ds.sample_dims();
00451       d.setdims(1);
00452       out = Tstate(d);
00453     }
00454     // resize out if necessary
00455     idx<T> target = targets.select(0, 0);
00456     if (out_class.x.get_idxdim() != target.get_idxdim())
00457       out_class.resize(target.get_idxdim());
00458     // fprop regular target
00459     class_answer<T,Tds1,Tds2,Tstate>::fprop(ds, out_class);
00460     uint jitt_offset = out_class.x.dim(0);
00461     // get jitter info
00462     ds.fprop_jitter(jitter);
00463     idx<T> jitt = jitter.x.narrow(1, jsize, joffset);
00464     // resize out if necessary
00465     idxdim d(out.x);
00466     d.setdim(0, jitt.dim(0));
00467     d.setdim(1, this->nfeatures);
00468     if (out.x.get_idxdim() != d)
00469       out.resize(d);
00470     // replicate class targets
00471     idx<T> outc = out.x.narrow(1, out_class.x.dim(0), 0);
00472     idx_bloop1(tgt, outc, T) {
00473       idx_copy(out_class.x, tgt);
00474     }
00475     outc = out.x.narrow(1, jsize, jitt_offset);
00476     idx_copy(jitt, outc);
00477     // when predicting confidence, default (negative) target confidence is 0.0
00478     if (predict_conf) {
00479       outc = out.x.narrow(1, 1, pconf_offset);
00480       idx_fill(outc, (T) 0);
00481     }
00482     // modulate confidence by scale and visibility
00483     idxdim sd = ds.sample_dims();
00484     rect<float> netrec(0, 0, sd.dim(1), sd.dim(2));
00485     bbstate_idx<Tds2> label;
00486     if (out.x.dim(1) != 5 && out.x.dim(1) != 6)
00487       eblerror("expected 5 or 6 elts in dim 1 of " << out.x);
00488     { idx_bloop1(tgt, out.x, T) {
00489       T s = tgt.gget(jitt_offset); // scale
00490       if (s != 0) {
00491         // compute target box
00492         T h = tgt.gget(jitt_offset + 1); // height offset
00493         T w = tgt.gget(jitt_offset + 2); // width offset
00494         rect<float> r(netrec);
00495         r.h0 += h * netrec.height;
00496         r.w0 += w * netrec.height;
00497         r.scale_centered(1 / s, 1 / s);
00498         // compute visibility ratio
00499         float vis = r.overlap_ratio(netrec);
00500         // compute confidence given visibility (output is [0,1])
00501         // gnuplot: set yrange[0:1];set xrange[0:1]; plot tanh(x*20 - 18)/1.4+.33
00502         T visconf = (T) (tanh(vis * 20 - 18) / 1.4 + .33);
00503         // compute confidence given scale (output is [0,1])
00504         // gnuplot: set yrange[0:1];set xrange[0:3];plot (exp(-(x-1.5)*(x-1.5)/(2 * .2)) * 4 - 1)/2+.5
00505         T sconf = std::min((T) 1.0, (T)
00506                            ((exp(-(s - scale_mgauss) * (s - scale_mgauss)
00507                                  / (2 * .2)) * 4 - 1)/2+.5));
00508         // compute distance to center (the closer the higher the conf)
00509         // set xrange[-1:1];set yrange[-1:1];plot exp(-2*sqrt(x*x))
00510         //T dconf = (T) (exp(-2 * sqrt((double) h*h + w*w)));
00511         // take minimum of all confs for final confidence
00512         //      T final_conf = std::min(visconf, std::min(dconf, sconf));
00513         T final_conf = std::min(visconf, sconf);
00514         EDEBUG("s: " << s << " h: " << h << " w: " << w << " sconf: " << sconf
00515               << " visconf: " << visconf
00516               //              << " dconf: " << dconf
00517               << " final: " << final_conf);
00518         // update confidence target
00519         if (predict_conf) { // fill additional confidence feature
00520           if (predict_bconf) { // target conf is binary
00521             if (final_conf < .5)
00522               tgt.sset((T)0, pconf_offset);
00523             else
00524               tgt.sset((T)1, pconf_offset);
00525           } else // target conf is continuous
00526             tgt.sset(final_conf, pconf_offset);
00527         } else { // modulate positive object's target directly
00528           ds.fprop_label(label); // get positive offset
00529           tgt.sset(final_conf, (int)label.x.get());
00530         }
00531       }
00532       }}
00533   }
00534
00535   template <typename T, typename Tds1, typename Tds2, class Tstate>
00536   void scalerclass_answer<T,Tds1,Tds2,Tstate>::
00537   update_log(classifier_meter &log, intg age, idx<T> &energy, idx<T> &answer,
00538              idx<T> &label, idx<T> &target, idx<T> &rawout) {
00539     // regular classification logging
00540     class_answer<T,Tds1,Tds2,Tstate>::update_log(log, age, energy, answer,
00541                                                  label, target, rawout);
00542     // additional logging
00543     if (log.log_fields.size() == 0) { // initialize variable strings
00544       log.log_values.clear();
00545       for (uint i = 0; i < log_fields.size(); ++i) {
00546         log.log_fields.push_back(log_fields[i]);
00547         log.log_values.push_back(0.0);
00548         log.total_values.push_back(0.0);
00549       }
00550     }
00551     // determine confidence location
00552     uint jittoff = target.dim(0) - jsize;
00553     if (predict_conf) jittoff--;
00554     uint conf_off;
00555     if (predict_conf)
00556       conf_off = target.dim(0) - 1;
00557     else {
00558       idx<T> ctgt = target.narrow(0, jittoff, 0);
00559       conf_off = idx_indexmax(ctgt);
00560     }
00561     T conf_target = target.gget(conf_off);
00562     T s = target.gget(jittoff);
00563     // update localization values only if target conf is > .5 and positive
00564     if (conf_target > .5 && s > 0) {
00565       T herr = target.gget(jittoff + 1) - answer.gget(3);
00566       T werr = target.gget(jittoff + 2) - answer.gget(4);
00567       T spatial_err = (T) sqrt((double) herr * herr + werr * werr); // spatial
00568       T scale_err = (T) fabs((double) target.gget(jittoff) - answer.gget(2));; // scale
00569       log.log_values[0] += spatial_err; // spatial
00570       log.log_values[1] += scale_err; // scale
00571       log.log_values[2] += spatial_err + scale_err / 2; // localization
00572       log.log_values[4] += 1; // count samples
00573       // normalization
00574       log.total_values[0] = log.log_values[4]; // normalization total
00575       log.total_values[1] = log.log_values[4]; // normalization total
00576       log.total_values[2] = log.log_values[4]; // normalization total
00577       log.total_values[4] = 1; // normalization total
00578     }
00579     // update confidence value if positive example
00580     if (s > 0) {
00581       log.log_values[3] += fabs((double) rawout.gget(conf_off) - target.gget(conf_off));
00582       log.log_values[5] += 1; // count samples
00583       // normalization
00584       log.total_values[3] = log.log_values[5]; // normalization total
00585       log.total_values[5] = 1; // normalization total
00586     }
00587   }
00588
00589   template <typename T, typename Tds1, typename Tds2, class Tstate>
00590   std::string scalerclass_answer<T,Tds1,Tds2,Tstate>::describe() {
00591     std::string s;
00592     s << "scalerclass_answer module " << this->name() << " with "
00593       << targets.dim(0)
00594       << " classes, confidence type " << (uint) conf_type << " and targets "
00595       << targets << ", jitter size " << jsize << " with offset " << joffset;
00596     if (apply_tanh)
00597       s << ", a tanh is applied to inputs";
00598     if (predict_conf)
00599       s << ", predicting confidence";
00600     s << ", target confidence is " << (predict_bconf?"binary":"continuous");
00601     s << ", biases: ";
00602     if (biases)
00603       biases->printElems(s);
00604     else
00605       s <<"none";
00606     s << ", coeffs: ";
00607     if (coeffs)
00608       coeffs->printElems(s);
00609     else
00610       s <<"none";
00611     s << ". ";
00612     print_targets(targets);
00613     return s;
00614   }
00615
00617   // scaler_answer
00618
00619   template <typename T, typename Tds1, typename Tds2, class Tstate>
00620   scaler_answer<T,Tds1,Tds2,Tstate>::
00621   scaler_answer(uint negative_id_, uint positive_id_, bool raw_confidence_,
00622                 float threshold_, bool spatial_, const char *name_)
00623     : answer_module<T,Tds1,Tds2,Tstate>(spatial_ ? 3 : 1, name_),
00624       negative_id(negative_id_), positive_id(positive_id_),
00625       raw_confidence(raw_confidence_), jitter(1), threshold((T) threshold_),
00626           spatial(spatial_), jsize(answer_module<T,Tds1,Tds2,Tstate>::nfeatures) {
00627   }
00628
00629   template <typename T, typename Tds1, typename Tds2, class Tstate>
00630   scaler_answer<T,Tds1,Tds2,Tstate>::~scaler_answer() {
00631   }
00632
00633   template <typename T, typename Tds1, typename Tds2, class Tstate>
00634   void scaler_answer<T,Tds1,Tds2,Tstate>::fprop(Tstate &in, Tstate &out) {
00635     // only works for 3d bufs, TODO: make this generic
00636     idx_checkorder2(in.x, 3, out.x, 3);
00637     idx_checkdim1(in.x, 0, jsize);
00638     // resize out if necessary
00639     idxdim d(in.x);
00640     // 5 outputs per pixel: class,confidence,scale,h,w
00641     d.setdim(0, 2 + jsize);
00642     if (d != out.x.get_idxdim())
00643       out.resize(d);
00644     // loop on features (dimension 0) to set answers
00645     uint classid; T conf;
00646     idx_eloop2(ii, in.x, T, oo, out.x, T) {
00647       idx_eloop2(iii, ii, T, ooo, oo, T) {
00648         // set class answer
00649         T i = iii.get(0);
00650         classid = ((i <= threshold) ? negative_id : positive_id);
00651         ooo.set((T) classid, 0); // set classid answer
00652         if (raw_confidence) {
00653           ooo.set(i, 1); // conf is simply the output
00654           ooo.set(i, 2);
00655         } else { // confidence is the position in the margin area
00656           conf = std::min((T)1, std::max((T)0, (T) ((i + 1) / 2)));
00657           if (classid == negative_id) {
00658             ooo.set((T) 1 - conf, 1); // conf
00659             ooo.set((T) 0, 2);
00660             if (spatial) {
00661               ooo.set((T) 0, 3);
00662               ooo.set((T) 0, 4);
00663             }
00664             //ooo.set((T) std::min((T) 1, std::max(0, -i - 1)), 1); // conf
00665             //    ooo.set(std::max((T)0, -i), 1); // conf
00666           } else {
00667             ooo.set(conf, 1); // conf
00668             ooo.set(i, 2); // scale answer
00669             if (spatial) {
00670               ooo.set(iii.get(1), 3); // h answer
00671               ooo.set(iii.get(2), 4); // w answer
00672             }
00673             //ooo.set((T) std::min((T) 1, std::max((T) 0, i / 2 + 1)), 1); // conf
00674           //      ooo.set(std::max((T) 0, i + 1), 1); // conf
00675           }
00676         }
00677       }
00678     }
00679   }
00680
00681   template <typename T, typename Tds1, typename Tds2, class Tstate>
00682   void scaler_answer<T,Tds1,Tds2,Tstate>::
00683   fprop(labeled_datasource<T,Tds1,Tds2> &ds, Tstate &out){
00684     // check output size
00685     idxdim d(out.x);
00686     d.setdim(0, jsize);
00687     if (out.x.get_idxdim() != d)
00688       out.resize(d);
00689     // set scale jitter as single output
00690     ds.fprop_jitter(jitter);
00691     idx_copy(jitter.x, out.x); // scale,h,w jitter
00692   }
00693
00694   template <typename T, typename Tds1, typename Tds2, class Tstate>
00695   std::string scaler_answer<T,Tds1,Tds2,Tstate>::describe() {
00696     std::string s;
00697     s << "scaler_answer module " << this->name() << " with negative id "
00698       << negative_id << " and positive id " << positive_id
00699       << ", confidence is the ";
00700     if (spatial)
00701       s << ", using spatial jitter";
00702     if (raw_confidence)
00703       s << "raw output.";
00704     else
00705       s << "position in the margin area.";
00706     return s;
00707   }
00708
00710   // regression_answer
00711
00712   template <typename T, typename Tds1, typename Tds2, class Tstate>
00713   regression_answer<T,Tds1,Tds2,Tstate>::
00714   regression_answer(uint nfeatures_, float64 threshold_, const char *name_)
00715     : answer_module<T,Tds1,Tds2,Tstate>(nfeatures_, name_),
00716       threshold(threshold_) {
00717   }
00718
00719   template <typename T, typename Tds1, typename Tds2, class Tstate>
00720   regression_answer<T,Tds1,Tds2,Tstate>::~regression_answer() {
00721   }
00722
00723   template <typename T, typename Tds1, typename Tds2, class Tstate>
00724   void regression_answer<T,Tds1,Tds2,Tstate>::fprop(Tstate &in, Tstate &out) {
00725     // resize out if necessary
00726     idxdim d(in.x);
00727     if (d != out.x.get_idxdim())
00728       out.resize(d);
00729     idx_copy(in.x, out.x);
00730   }
00731
00732   template <typename T, typename Tds1, typename Tds2, class Tstate>
00733   void regression_answer<T,Tds1,Tds2,Tstate>::
00734   fprop(labeled_datasource<T,Tds1,Tds2> &ds, Tstate &out) {
00735     ds.fprop_label_net(out);
00736   }
00737
00738   template <typename T, typename Tds1, typename Tds2, class Tstate>
00739   bool regression_answer<T,Tds1,Tds2,Tstate>::
00740   correct(Tstate &answer, Tstate &label) {
00741     if (idx_l1(answer.x, label.x) <= threshold)
00742       return true;
00743     return false;
00744   }
00745
00746   template <typename T, typename Tds1, typename Tds2, class Tstate>
00747   void regression_answer<T,Tds1,Tds2,Tstate>::
00748   update_log(classifier_meter &log, intg age, idx<T> &energy, idx<T> &answer,
00749              idx<T> &label, idx<T> &target, idx<T> &rawout) {
00750     log.update(age, (bool)(idx_l1(answer, label) <= threshold),
00751                (double) energy.gget());
00752   }
00753
00754   template <typename T, typename Tds1, typename Tds2, class Tstate>
00755   std::string regression_answer<T,Tds1,Tds2,Tstate>::describe() {
00756     std::string s;
00757     s << "regression_answer module " << this->name() << " with threshold "
00758       << threshold << " for correctness classification";
00759     return s;
00760   }
00761
00763   // vote_answer
00764
00765   template <typename T, typename Tds1, typename Tds2, class Tstate>
00766   vote_answer<T,Tds1,Tds2,Tstate>::
00767   vote_answer(uint nclasses, double target_factor, bool binary_target_,
00768               t_confidence conf, bool apply_tanh_, const char *name_)
00769     : class_answer<T,Tds1,Tds2,Tstate>(nclasses, target_factor, binary_target_,
00770                                        conf, apply_tanh_, name_) {
00771   }
00772
00773   template <typename T, typename Tds1, typename Tds2, class Tstate>
00774   vote_answer<T,Tds1,Tds2,Tstate>::~vote_answer() {
00775   }
00776
00777   template <typename T, typename Tds1, typename Tds2, class Tstate>
00778   void vote_answer<T,Tds1,Tds2,Tstate>::fprop(Tstate &in, Tstate &out) {
00779     // check that in's dim 0 is a multiple of nfeatures
00780     if (in.x.dim(0) % this->nfeatures != 0)
00781       eblerror("expected number of features to be multiple of "
00782                << this->nfeatures);
00783     Tstate i;
00784     idx<T> confidences(this->nfeatures);
00785     idx_clear(confidences);
00786     // loop on all concatenated outputs
00787     uint off;
00788     //    uint bestid = 0;
00789     T bestconf = 0;
00790     for (off = 0; off < in.x.dim(0); off += this->nfeatures) {
00791       i = in.narrow(0, this->nfeatures, off);
00792       class_answer<T,Tds1,Tds2,Tstate>::fprop(i, out);
00793       // accumulate confidence
00794       uint index = (uint) out.x.get(0);
00795       confidences.set(out.x.get(1) + confidences.get(index), index);
00796       cout << "id: " << index << " conf: " << out.x.get(1) << endl;
00797       if (bestconf < out.x.get(1)) {
00798         bestconf = out.x.get(1);
00799         //bestid = index;
00800       }
00801     }
00802     // take strongest confidence as vote
00803     intg id = idx_indexmax(confidences);
00804     T conf = confidences.get(id) / (T) (off / this->nfeatures);
00805 //     out.x.sset(bestid, 0); // class id
00806 //     out.x.sset(bestconf, 1); // confidence
00807     out.x.sset((T) id, 0); // class id
00808     out.x.sset(conf, 1); // confidence
00809     cout << "vote: id: " << id << " conf: " << conf << endl;
00810   }
00811
00813   // trainable_module
00814
00815   template <typename T, typename Tds1, typename Tds2, class Tin1, class Tin2,
00816             class Ten>
00817   trainable_module<T,Tds1,Tds2,Tin1,Tin2,Ten>::
00818   trainable_module(ebm_2<Tin1,Tin2,Ten> &energy_, module_1_1<T,Tin1> &mod1_,
00819                    module_1_1<T,Tin2> *mod2_,
00820                    answer_module<T,Tds1,Tds2,Tin1> *dsmod1_,
00821                    answer_module<T,Tds1,Tds2,Tin2> *dsmod2_, const char *name_,
00822                    const char *switcher)
00823     : energy_mod(energy_), mod1(mod1_), mod2(mod2_), dsmod1(dsmod1_),
00824       dsmod2(dsmod2_), ms_switch(NULL)
00825       // TODO: fix hardcorded order
00826       // in1(1,1,1), out1(1,1,1), in2(1,1,1), out2(1,1,1), answers(1,1,1),
00827       // targets(1,1,1), mod_name(name_), tmp_energy(1,1,1)
00828   {
00829     // try to find switcher module in mod1
00830     if (switcher) {
00831       std::vector<ms_module<T,Tin1>*> all = arch_find_all(&mod1, ms_switch);
00832       for (uint i = 0; i < all.size(); ++i)
00833         if (!strcmp(all[i]->name(), switcher)) {
00834           ms_switch = all[i];
00835           break ;
00836         }
00837     }
00838   }
00839
00840   template <typename T, typename Tds1, typename Tds2, class Tin1, class Tin2,
00841             class Ten>
00842   trainable_module<T,Tds1,Tds2,Tin1,Tin2,Ten>::~trainable_module() {
00843   }
00844
00845   template <typename T, typename Tds1, typename Tds2, class Tin1, class Tin2,
00846             class Ten>
00847   void trainable_module<T,Tds1,Tds2,Tin1,Tin2,Ten>::update_scale
00848   (labeled_datasource<T,Tds1,Tds2> &ds) {
00849     // update switch data
00850     if (ms_switch && ds.has_scales()) ms_switch->set_switch(ds.fprop_scale());
00851   }
00852
00853   template <typename T, typename Tds1, typename Tds2, class Tin1, class Tin2,
00854             class Ten>
00855   void trainable_module<T,Tds1,Tds2,Tin1,Tin2,Ten>::
00856   fprop(labeled_datasource<T,Tds1,Tds2> &ds, Ten &energy) {
00857     // flow 1 //////////////////////////////////////////////////////////////////
00858     TIMING2("between end of fprop/bprop and sample retrieval");
00859     if (ds.mstate_samples() || mod1.mstate_input()) {//input must be multi-state
00860       // produce state 1
00861       if (dsmod1) // specific data production
00862         dsmod1->fprop(ds, msin1);
00863       else // generic, simply take ds' input 1
00864         ds.fprop_data(msin1);
00865       TIMING2("sample retrieval");
00866       // fprop flow 1
00867       update_scale(ds);
00868       mod1.fprop(msin1, out1);
00869       TIMING2("entire fprop");
00870     } else {
00871       // produce state 1
00872       if (dsmod1) // specific data production
00873         dsmod1->fprop(ds, in1);
00874       else // generic, simply take ds' input 1
00875         ds.fprop_data(in1);
00876       TIMING2("sample retrieval");
00877       // fprop flow 1
00878       mod1.fprop(in1, out1);
00879       TIMING2("entire fprop");
00880     }
00881
00882     // flow 2 //////////////////////////////////////////////////////////////////
00883     Tin2 *i2 = &in2;
00884     if (!mod2) // no main module in flow 2, put input directly into out2
00885       i2 = &out2;
00886     // produce state 2
00887     if (dsmod2) // specific data production
00888       dsmod2->fprop(ds, *i2);
00889     else // generic, simply take ds' input 2
00890       ds.fprop_label_net(*i2);
00891     // fprop flow 2
00892     if (mod2)
00893       mod2->fprop(*i2, out2);
00894
00895     // energy //////////////////////////////////////////////////////////////////
00896     energy_mod.fprop(out1, out2, energy);
00897   }
00898
00899   template <typename T, typename Tds1, typename Tds2, class Tin1, class Tin2,
00900             class Ten>
00901   void trainable_module<T,Tds1,Tds2,Tin1,Tin2,Ten>::
00902   bprop(labeled_datasource<T,Tds1,Tds2> &ds, Ten &energy) {
00903     TIMING2("until beginning of bprop");
00904     // clear buffers
00905     out1.clear_dx();
00906     out2.clear_dx();
00907     // bprop
00908     energy_mod.bprop(out1, out2, energy);
00909     if (ds.mstate_samples() || mod1.mstate_input())
00910       mod1.bprop(msin1, out1);
00911     else
00912       mod1.bprop(in1, out1);
00913     if (dsmod2)
00914       dsmod2->bprop(ds, out2);
00915     TIMING2("entire bprop");
00916   }
00917
00918   template <typename T, typename Tds1, typename Tds2, class Tin1, class Tin2,
00919             class Ten>
00920   void trainable_module<T,Tds1,Tds2,Tin1,Tin2,Ten>::
00921   bbprop(labeled_datasource<T,Tds1,Tds2> &ds, Ten &energy) {
00922     TIMING2("until beginning of bbprop");
00923     // clear buffers
00924     out1.clear_ddx();
00925     out2.clear_ddx();
00926     // bbprop
00927     energy_mod.bbprop(out1, out2, energy);
00928     if (ds.mstate_samples() || mod1.mstate_input())
00929       mod1.bbprop(msin1, out1);
00930     else
00931       mod1.bbprop(in1, out1);
00932     if (dsmod2)
00933       dsmod2->bbprop(ds, out2);
00934     TIMING2("entire bbprop");
00935   }
00936
00937   template <typename T, typename Tds1, typename Tds2, class Tin1, class Tin2,
00938             class Ten>
00939   int trainable_module<T,Tds1,Tds2,Tin1,Tin2,Ten>::
00940   infer2(labeled_datasource<T,Tds1,Tds2> &ds, Ten &energy) {
00941     eblerror("not implemented");
00942         return 0;
00943   }
00944
00945   template <typename T, typename Tds1, typename Tds2, class Tin1, class Tin2,
00946             class Ten>
00947   void trainable_module<T,Tds1,Tds2,Tin1,Tin2,Ten>::
00948   forget(forget_param_linear &fp) {
00949     mod1.forget(fp);
00950     if (mod2)
00951       mod2->forget(fp);
00952     if (dsmod2)
00953       dsmod2->forget(fp);
00954   }
00955
00956   template <typename T, typename Tds1, typename Tds2, class Tin1, class Tin2,
00957             class Ten>
00958   const Tin1& trainable_module<T,Tds1,Tds2,Tin1,Tin2,Ten>::compute_answers() {
00959     if (!dsmod2)
00960       eblerror("dsmod2 must be defined to compute answers");
00961     dsmod2->fprop(out1, answers);
00962     return answers;
00963   }
00964
00965   template <typename T, typename Tds1, typename Tds2, class Tin1, class Tin2,
00966             class Ten>
00967   bool trainable_module<T,Tds1,Tds2,Tin1,Tin2,Ten>::correct(Tin1 &answer,
00968                                                             Tin1 &label) {
00969     if (!dsmod2)
00970       eblerror("dsmod2 must be defined to compute correctness");
00971     return dsmod2->correct(answer, label);
00972   }
00973
00974   template <typename T, typename Tds1, typename Tds2, class Tin1, class Tin2,
00975             class Ten>
00976   void trainable_module<T,Tds1,Tds2,Tin1,Tin2,Ten>::
00977   update_log(classifier_meter &log, intg age, idx<T> &energy, idx<T> &answer,
00978              idx<T> &label, idx<T> &target, idx<T> &rawout) {
00979     if (!dsmod2)
00980       eblerror("dsmod2 must be defined to update log");
00981     dsmod2->update_log(log, age, energy, answer, label, target, rawout);
00982   }
00983
00984   template <typename T, typename Tds1, typename Tds2, class Tin1, class Tin2,
00985             class Ten>
00986   void trainable_module<T,Tds1,Tds2,Tin1,Tin2,Ten>::compute_answers(Tin1 &ans) {
00987     if (!dsmod2)
00988       eblerror("dsmod2 must be defined to compute answers");
00989     dsmod2->fprop(out1, ans);
00990   }
00991
00992   template <typename T, typename Tds1, typename Tds2, class Tin1, class Tin2,
00993             class Ten>
00994   idx<T> trainable_module<T,Tds1,Tds2,Tin1,Tin2,Ten>::
00995   compute_targets(labeled_datasource<T,Tds1,Tds2> &ds) {
00996     if (!dsmod2)
00997       eblerror("dsmod2 must be defined to compute targets");
00998     scalerclass_energy<T,Tin1> *sce =
00999       dynamic_cast<scalerclass_energy<T,Tin1>*>(&energy_mod);
01000     if (sce)
01001       targets.x = sce->last_target_raw;
01002     else
01003       dsmod2->fprop(ds, targets);
01004     return targets.x;
01005   }
01006
01007   template <typename T, typename Tds1, typename Tds2, class Tin1, class Tin2,
01008             class Ten>
01009   const char *trainable_module<T,Tds1,Tds2,Tin1,Tin2,Ten>::name() {
01010     return mod_name.c_str();
01011   }
01012
01013   template <typename T, typename Tds1, typename Tds2, class Tin1, class Tin2,
01014             class Ten>
01015   std::string trainable_module<T,Tds1,Tds2,Tin1,Tin2,Ten>::describe() {
01016     std::string s;
01017     s << "trainer module " << this->name() << ": " << energy_mod.describe();
01018     if (dsmod2)
01019       s << ", " << dsmod2->describe();
01020     return s;
01021   }
01022
01023   // utility functions /////////////////////////////////////////////////////////
01024
01025   template <typename T>
01026   void print_targets(idx<T> &targets) {
01027     cout << "Targets: " << targets << endl;
01028     if (targets.nelements() < 500)
01029       cout << targets.str() << endl;
01030   }
01031
01032 } // end namespace ebl
01033
01034 #endif /*EBL_ANSWER_HPP_*/