libeblearn
|
00001 /*************************************************************************** 00002 * Copyright (C) 2011 by Pierre Sermanet * 00003 * pierre.sermanet@gmail.com * 00004 * All rights reserved. 00005 * 00006 * Redistribution and use in source and binary forms, with or without 00007 * modification, are permitted provided that the following conditions are met: 00008 * * Redistributions of source code must retain the above copyright 00009 * notice, this list of conditions and the following disclaimer. 00010 * * Redistributions in binary form must reproduce the above copyright 00011 * notice, this list of conditions and the following disclaimer in the 00012 * documentation and/or other materials provided with the distribution. 00013 * * Redistribution under a license not approved by the Open Source 00014 * Initiative (http://www.opensource.org) must display the 00015 * following acknowledgement in all advertising material: 00016 * This product includes software developed at the Courant 00017 * Institute of Mathematical Sciences (http://cims.nyu.edu). 00018 * * The names of the authors may not be used to endorse or promote products 00019 * derived from this software without specific prior written permission. 00020 * 00021 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED 00022 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 00023 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 00024 * DISCLAIMED. IN NO EVENT SHALL ThE AUTHORS BE LIABLE FOR ANY 00025 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 00026 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 00027 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 00028 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00029 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00030 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00031 ***************************************************************************/ 00032 00033 #ifndef EBL_ANSWER_HPP_ 00034 #define EBL_ANSWER_HPP_ 00035 00036 using namespace std; 00037 00038 namespace ebl { 00039 00041 // answer_module 00042 00043 template <typename T, typename Tds1, typename Tds2, class Tstate> 00044 answer_module<T,Tds1,Tds2,Tstate>::answer_module(uint nfeatures_, 00045 const char *name_) 00046 : module_1_1<T,Tstate>(name_), nfeatures(nfeatures_) { 00047 } 00048 00049 template <typename T, typename Tds1, typename Tds2, class Tstate> 00050 answer_module<T,Tds1,Tds2,Tstate>::~answer_module() { 00051 } 00052 00053 template <typename T, typename Tds1, typename Tds2, class Tstate> 00054 void answer_module<T,Tds1,Tds2,Tstate>::fprop(Tstate &in, Tstate &out) { 00055 eblerror("not implemented"); 00056 } 00057 00058 // single-state propagation ////////////////////////////////////////////////// 00059 00060 template <typename T, typename Tds1, typename Tds2, class Tstate> 00061 void answer_module<T,Tds1,Tds2,Tstate>:: 00062 fprop(labeled_datasource<T,Tds1,Tds2> &ds, Tstate &out) { 00063 eblerror("not implemented"); 00064 } 00065 00066 template <typename T, typename Tds1, typename Tds2, class Tstate> 00067 void answer_module<T,Tds1,Tds2,Tstate>:: 00068 bprop(labeled_datasource<T,Tds1,Tds2> &ds, Tstate &out) { 00069 // empty bprop by default 00070 } 00071 00072 template <typename T, typename Tds1, typename Tds2, class Tstate> 00073 void answer_module<T,Tds1,Tds2,Tstate>:: 00074 bbprop(labeled_datasource<T,Tds1,Tds2> &ds, Tstate &out) { 00075 // empty bbprop by default 00076 } 00077 00078 // multi-state propagation /////////////////////////////////////////////////// 00079 00080 template <typename T, typename Tds1, typename Tds2, class Tstate> 00081 void answer_module<T,Tds1,Tds2,Tstate>:: 00082 fprop(labeled_datasource<T,Tds1,Tds2> &ds, mstate<Tstate> &out) { 00083 eblerror("not implemented"); 00084 } 00085 00086 template <typename T, typename Tds1, typename Tds2, class Tstate> 00087 void answer_module<T,Tds1,Tds2,Tstate>:: 00088 bprop(labeled_datasource<T,Tds1,Tds2> &ds, mstate<Tstate> &out) { 00089 // empty bprop by default 00090 } 00091 00092 template <typename T, typename Tds1, typename Tds2, class Tstate> 00093 void answer_module<T,Tds1,Tds2,Tstate>:: 00094 bbprop(labeled_datasource<T,Tds1,Tds2> &ds, mstate<Tstate> &out) { 00095 // empty bbprop by default 00096 } 00097 00099 00100 template <typename T, typename Tds1, typename Tds2, class Tstate> 00101 bool answer_module<T,Tds1,Tds2,Tstate>:: 00102 correct(Tstate &answer, Tstate &label) { 00103 eblerror("not implemented"); 00104 return false; 00105 } 00106 00107 template <typename T, typename Tds1, typename Tds2, class Tstate> 00108 void answer_module<T,Tds1,Tds2,Tstate>:: 00109 update_log(classifier_meter &log, intg age, idx<T> &energy, idx<T> &answer, 00110 idx<T> &label, idx<T> &target, idx<T> &rawout) { 00111 eblerror("not implemented"); 00112 } 00113 00114 template <typename T, typename Tds1, typename Tds2, class Tstate> 00115 void answer_module<T,Tds1,Tds2,Tstate>::forget(forget_param_linear &fp) { 00116 } 00117 00118 template <typename T, typename Tds1, typename Tds2, class Tstate> 00119 std::string answer_module<T,Tds1,Tds2,Tstate>::describe() { 00120 eblerror("not implemented"); 00121 string s; 00122 return s; 00123 } 00124 00125 template <typename T, typename Tds1, typename Tds2, class Tstate> 00126 uint answer_module<T,Tds1,Tds2,Tstate>::get_nfeatures() { 00127 return nfeatures; 00128 } 00129 00131 // class_answer 00132 00133 template <typename T, typename Tds1, typename Tds2, class Tstate> 00134 class_answer<T,Tds1,Tds2,Tstate>:: 00135 class_answer(uint nclasses, double target_factor, bool binary_target_, 00136 t_confidence conf, bool apply_tanh_, const char *name_, 00137 int force) 00138 : answer_module<T,Tds1,Tds2,Tstate>(binary_target_?1:nclasses, name_), 00139 conf_type(conf), binary_target(binary_target_), resize_output(true), 00140 apply_tanh(apply_tanh_), tmp(1,1,1), force_class(force) { 00141 // create 1-of-n targets with target 1.0 for shown class, -1.0 for the rest 00142 targets = create_target_matrix<T>(nclasses, (T)1.0); 00143 // binary target 00144 if (binary_target) { 00145 if (nclasses != 2) 00146 eblerror("expecting 2 classes only when binary_target is on"); 00147 targets = idx<T>(2, 1, 1); 00148 // int neg_id = ds.get_class_id("bg"); // negative class 00149 // if (neg_id == 0) { 00150 // targets.set(-1.0, 0, 0); // negative: -1.0 00151 // targets.set( 1.0, 1, 0); // positive: 1.0 00152 // } else { 00153 targets.sset((T) 1.0, 0); // positive: 1.0 00154 targets.sset((T)-1.0, 1); // negative: -1.0 00155 // } 00156 } 00157 // target factor 00158 idx_dotc(targets, target_factor, targets); 00159 print_targets(targets); 00160 // set min/max of target 00161 target_min = idx_min(targets); 00162 target_max = idx_max(targets); 00163 // set confidence parameters 00164 T max_dist; 00165 switch (conf_type) { 00166 case confidence_sqrdist: 00167 max_dist = target_max - target_min; 00168 conf_ratio = targets.dim(0) * max_dist * max_dist; 00169 // shift value to be subtracted before dividing by conf_ratio 00170 conf_shift = target_min; 00171 cout << "Using sqrdist confidence formula with normalization ratio " 00172 << conf_ratio << " and shift value " << conf_shift << endl; 00173 break ; 00174 case confidence_single: 00175 conf_ratio = target_max - target_min; 00176 // shift value to be subtracted before dividing by conf_ratio 00177 conf_shift = target_min; 00178 cout << "Using single output confidence with normalization ratio " 00179 << conf_ratio << " and shift value " << conf_shift << endl; 00180 break ; 00181 case confidence_max: 00182 conf_ratio = target_max - target_min; 00183 conf_shift = 0; // no shift needed, the difference min is 0. 00184 cout << "Using max confidence formula with normalization ratio " 00185 << conf_ratio << endl; 00186 break ; 00187 default: 00188 eblerror("confidence type " << conf_type << " undefined"); 00189 } 00190 } 00191 00192 template <typename T, typename Tds1, typename Tds2, class Tstate> 00193 class_answer<T,Tds1,Tds2,Tstate>::~class_answer() { 00194 } 00195 00196 template <typename T, typename Tds1, typename Tds2, class Tstate> 00197 void class_answer<T,Tds1,Tds2,Tstate>::fprop(Tstate &in, Tstate &out) { 00198 // resize out if necessary 00199 idxdim d(in.x); 00200 d.setdim(0, 2); // 2 outputs per pixel: class,confidence 00201 idx<T> outx = out.x; 00202 idx<T> inx = in.x; 00203 if (resize_output) { 00204 if (d != out.x.get_idxdim()) { 00205 out.resize(d); 00206 outx = out.x; 00207 } 00208 } else { // if not resizing, narrow to the number of targets 00209 if (outx.dim(0) != targets.dim(0)) 00210 outx = outx.narrow(0, targets.dim(0), 0); 00211 } 00212 // apply tanh if required 00213 if (apply_tanh) { 00214 mtanh.fprop(in, tmp); 00215 inx = tmp.x; 00216 } 00217 // loop on features (dimension 0) to set class and confidence 00218 int classid; 00219 T conf, max2; 00220 idx_1loop2(ii, inx, T, oo, outx, T, { 00221 if (binary_target) { 00222 T t0 = targets.gget(0); 00223 T t1 = targets.gget(1); 00224 T a = ii.gget(); 00225 if (fabs((double) a - t0) < fabs((double) a - t1)) { 00226 oo.set((T) 0, 0); // class 0 00227 oo.set((T) (2 - fabs((double) a - t0)) / 2, 1); // conf 00228 } else { 00229 oo.set((T) 1, 0); // class 1 00230 oo.set((T) (2 - fabs((double) a - t1)) / 2, 1); // conf 00231 } 00232 } 00233 else { // 1-of-n target 00234 // set class answer 00235 if (force_class >= 0) classid = force_class; 00236 else classid = idx_indexmax(ii); 00237 oo.set((T) classid, 0); 00238 // set confidence 00239 intg p; 00240 bool ini = false; 00241 switch (conf_type) { 00242 case confidence_sqrdist: // squared distance to target 00243 target = targets.select(0, classid); 00244 conf = (T) (1.0 - ((idx_sqrdist(target, ii) - conf_shift) 00245 / conf_ratio)); 00246 oo.set(conf, 1); 00247 break ; 00248 case confidence_single: // simply return class' out (normalized) 00249 conf = (T) ((ii.get(classid) - conf_shift) / conf_ratio); 00250 oo.set(conf, 1); 00251 break ; 00252 case confidence_max: // distance with 2nd max answer 00253 conf = std::max(target_min, std::min(target_max, ii.get(classid))); 00254 for (p = 0; p < ii.dim(0); ++p) { 00255 if (p != classid) { 00256 if (!ini) { 00257 max2 = ii.get(p); 00258 ini = true; 00259 } else { 00260 if (ii.get(p) > max2) 00261 max2 = ii.get(p); 00262 } 00263 } 00264 } 00265 max2 = std::max(target_min, std::min(target_max, max2)); 00266 oo.set((T) ((conf - max2) / conf_ratio), 1); 00267 break ; 00268 default: 00269 eblerror("confidence type " << conf_type << " undefined"); 00270 } 00271 } 00272 }); 00273 EDEBUG(this->name() << ": in " << in << " (in.x min " << idx_min(in.x) 00274 << " max " << idx_max(in.x) << ") out " << out << " (out.x min " 00275 << idx_min(out.x) << " max " << idx_max(out.x) << ")"); 00276 #ifdef __DEBUG__ 00277 idx<T> ldec = out.x.select(0, 0); 00278 EDEBUG(this->name() << ": class min " << idx_min(ldec) 00279 << " max " << idx_max(ldec)); 00280 idx<T> lconf = out.x.select(0, 1); 00281 EDEBUG(this->name() << ": confidence min " << idx_min(lconf) 00282 << " max " << idx_max(lconf)); 00283 #endif 00284 } 00285 00286 template <typename T, typename Tds1, typename Tds2, class Tstate> 00287 void class_answer<T,Tds1,Tds2,Tstate>:: 00288 fprop(labeled_datasource<T,Tds1,Tds2> &ds, Tstate &out) { 00289 // get label, i.e. input 2 00290 ds.fprop_label(last_label); 00291 // select the target given the class id 00292 idx<T> target = targets.select(0, (int) last_label.x.get()); 00293 // resize out if necessary 00294 idx<T> outx = out.x; 00295 if (resize_output) { 00296 idxdim d(ds.sample_dims()), dt(target.get_idxdim()); 00297 d.setdims(1); 00298 for (uint i = 0; i < dt.order(); ++i) 00299 d.setdim(i, dt.dim(i)); 00300 if (out.x.get_idxdim() != d) { 00301 if (out.x.order() != d.order()) 00302 out = Tstate(d); // re-allocating 00303 else 00304 out.resize(d); // just resizing 00305 outx = out.x; 00306 } 00307 } else { // if not resizing, narrow to the number of targets 00308 if (outx.dim(0) != target.dim(0)) 00309 outx = outx.narrow(0, target.dim(0), 0); 00310 } 00311 // copy target to output 00312 idx_copy(target, outx); 00313 } 00314 00315 template <typename T, typename Tds1, typename Tds2, class Tstate> 00316 bool class_answer<T,Tds1,Tds2,Tstate>:: 00317 correct(Tstate &answer, Tstate &label) { 00318 return (answer.x.gget(0) == label.x.gget()); 00319 } 00320 00321 template <typename T, typename Tds1, typename Tds2, class Tstate> 00322 void class_answer<T,Tds1,Tds2,Tstate>:: 00323 update_log(classifier_meter &log, intg age, idx<T> &energy, idx<T> &answer, 00324 idx<T> &label, idx<T> &target, idx<T> &rawout) { 00325 log.update(age, (uint) label.gget(0), (uint) answer.gget(0), 00326 (double) energy.gget()); 00327 } 00328 00329 template <typename T, typename Tds1, typename Tds2, class Tstate> 00330 std::string class_answer<T,Tds1,Tds2,Tstate>::describe() { 00331 std::string s; 00332 s << "class_answer module " << this->name() << " with " << targets.dim(0) 00333 << " classes, confidence type " << (int) conf_type << " and targets " 00334 << targets; 00335 if (apply_tanh) 00336 s << ", a tanh is applied to inputs"; 00337 s << ". "; 00338 print_targets(targets); 00339 return s; 00340 } 00341 00343 // scalerclass_answer 00344 00345 template <typename T, typename Tds1, typename Tds2, class Tstate> 00346 scalerclass_answer<T,Tds1,Tds2,Tstate>:: 00347 scalerclass_answer(uint nclasses, double target_factor, bool binary_target, 00348 t_confidence conf, bool apply_tanh_, uint jsize_, 00349 uint joffset_, float mgauss, bool predict_conf_, 00350 bool predict_bconf_, idx<T> *biases_, 00351 idx<T> *coeffs_, const char *name_) 00352 : class_answer<T,Tds1,Tds2,Tstate>(nclasses, target_factor, binary_target, 00353 conf, apply_tanh_, name_), 00354 jitter(1, 1), out_class(1), jsize(jsize_), joffset(joffset_), 00355 scale_mgauss(mgauss), predict_conf(predict_conf_), 00356 predict_bconf(predict_bconf_), pconf_offset(0), biases(NULL), 00357 coeffs(NULL) { 00358 resize_output = false; 00359 this->nfeatures += jsize; 00360 if (predict_conf) { 00361 pconf_offset = this->nfeatures; 00362 this->nfeatures++; 00363 } 00364 // initialize variables to log names 00365 log_fields.push_back("spatial"); 00366 log_fields.push_back("scale"); 00367 log_fields.push_back("localization"); 00368 log_fields.push_back("confidence"); 00369 log_fields.push_back("localization_total"); 00370 log_fields.push_back("confidence_total"); 00371 // coeffs & biases 00372 if (biases_) biases = new idx<T>(*biases_); 00373 if (coeffs_) coeffs = new idx<T>(*coeffs_); 00374 } 00375 00376 template <typename T, typename Tds1, typename Tds2, class Tstate> 00377 scalerclass_answer<T,Tds1,Tds2,Tstate>::~scalerclass_answer() { 00378 if (biases) delete biases; 00379 if (coeffs) delete coeffs; 00380 } 00381 00382 template <typename T, typename Tds1, typename Tds2, class Tstate> 00383 void scalerclass_answer<T,Tds1,Tds2,Tstate>::fprop(Tstate &in, Tstate &out) { 00384 // only works for 3d bufs, TODO: make this generic 00385 idx_checkorder2(in.x, 3, out.x, 3); 00386 // resize out if necessary 00387 idxdim d(in.x); 00388 // when jsize = 3, 5 outputs per pixel: class,confidence,scale,h,w 00389 d.setdim(0, 2 + jsize); 00390 if (d != out.x.get_idxdim()) 00391 out.resize(d); 00392 // narrow for regular class extraction 00393 tmp1 = in.narrow(0, targets.dim(1), 0); 00394 tmp2 = out.narrow(0, 2, 0); // class,confidence 00395 // fprop class,confidence 00396 class_answer<T,Tds1,Tds2,Tstate>::fprop(tmp1, tmp2); 00397 // copy jitter outputs 00398 idx<T> i = in.x.narrow(0, jsize, targets.dim(1)); 00399 idx<T> o = out.x.narrow(0, jsize, 2); 00400 idx_copy(i, o); 00401 // un-normalize jitter outputs 00402 if (coeffs) { 00403 idx<T> tmpcoeff = coeffs->narrow(0, jsize, 0); 00404 idx_bloop2(tc, tmpcoeff, T, oo, o, T) { 00405 idx_dotc(oo, 1 / tc.get(), oo); } 00406 } 00407 if (biases) { 00408 idx<T> tmpbias = biases->narrow(0, jsize, 0); 00409 idx_bloop2(tb, tmpbias, T, oo, o, T) { 00410 idx_addc(oo, - tb.get(), oo); } 00411 } 00412 // if conf is predicted, replace class conf by prediction 00413 if (predict_conf) { 00414 i = in.x.narrow(0, 1, pconf_offset); 00415 o = out.x.narrow(0, 1, 1); 00416 idx_copy(i, o); 00417 // un-normalize conf outputs 00418 if (coeffs) { 00419 idx<T> tmpcoeff = coeffs->narrow(0, 1, jsize); 00420 idx_bloop2(tc, tmpcoeff, T, oo, o, T) { 00421 idx_dotc(oo, 1 / tc.get(), oo); } 00422 } 00423 if (biases) { 00424 idx<T> tmpbias = biases->narrow(0, 1, jsize); 00425 idx_bloop2(tb, tmpbias, T, oo, o, T) { 00426 idx_addc(oo, - tb.get(), oo); } 00427 } 00428 // cap conf prediction by 0 and 1 00429 idx_threshold(o, (T)0); // cap below by 0 00430 idx_threshold2(o, (T)1); // cap above by 1 00431 } 00432 // // modulate confidence with scale and spatial jitter 00433 // idx_eloop1(outx, out.x, T) { 00434 // idx_eloop1(o, outx, T) { 00435 // T c = o.gget(1); 00436 // T s = o.gget(2); 00437 // T h = o.gget(3); 00438 // T w = o.gget(4); 00439 // c *= 1 - (.5 * (fabs(s - 1) + sqrt(h * h + w * w))); 00440 // o.sset(c, 1); 00441 // } 00442 // } 00443 } 00444 00445 template <typename T, typename Tds1, typename Tds2, class Tstate> 00446 void scalerclass_answer<T,Tds1,Tds2,Tstate>:: 00447 fprop(labeled_datasource<T,Tds1,Tds2> &ds, Tstate &out) { 00448 // if out has the wrong order, allocate. 00449 if (out.x.order() != ds.sample_dims().order()) { 00450 idxdim d = ds.sample_dims(); 00451 d.setdims(1); 00452 out = Tstate(d); 00453 } 00454 // resize out if necessary 00455 idx<T> target = targets.select(0, 0); 00456 if (out_class.x.get_idxdim() != target.get_idxdim()) 00457 out_class.resize(target.get_idxdim()); 00458 // fprop regular target 00459 class_answer<T,Tds1,Tds2,Tstate>::fprop(ds, out_class); 00460 uint jitt_offset = out_class.x.dim(0); 00461 // get jitter info 00462 ds.fprop_jitter(jitter); 00463 idx<T> jitt = jitter.x.narrow(1, jsize, joffset); 00464 // resize out if necessary 00465 idxdim d(out.x); 00466 d.setdim(0, jitt.dim(0)); 00467 d.setdim(1, this->nfeatures); 00468 if (out.x.get_idxdim() != d) 00469 out.resize(d); 00470 // replicate class targets 00471 idx<T> outc = out.x.narrow(1, out_class.x.dim(0), 0); 00472 idx_bloop1(tgt, outc, T) { 00473 idx_copy(out_class.x, tgt); 00474 } 00475 outc = out.x.narrow(1, jsize, jitt_offset); 00476 idx_copy(jitt, outc); 00477 // when predicting confidence, default (negative) target confidence is 0.0 00478 if (predict_conf) { 00479 outc = out.x.narrow(1, 1, pconf_offset); 00480 idx_fill(outc, (T) 0); 00481 } 00482 // modulate confidence by scale and visibility 00483 idxdim sd = ds.sample_dims(); 00484 rect<float> netrec(0, 0, sd.dim(1), sd.dim(2)); 00485 bbstate_idx<Tds2> label; 00486 if (out.x.dim(1) != 5 && out.x.dim(1) != 6) 00487 eblerror("expected 5 or 6 elts in dim 1 of " << out.x); 00488 { idx_bloop1(tgt, out.x, T) { 00489 T s = tgt.gget(jitt_offset); // scale 00490 if (s != 0) { 00491 // compute target box 00492 T h = tgt.gget(jitt_offset + 1); // height offset 00493 T w = tgt.gget(jitt_offset + 2); // width offset 00494 rect<float> r(netrec); 00495 r.h0 += h * netrec.height; 00496 r.w0 += w * netrec.height; 00497 r.scale_centered(1 / s, 1 / s); 00498 // compute visibility ratio 00499 float vis = r.overlap_ratio(netrec); 00500 // compute confidence given visibility (output is [0,1]) 00501 // gnuplot: set yrange[0:1];set xrange[0:1]; plot tanh(x*20 - 18)/1.4+.33 00502 T visconf = (T) (tanh(vis * 20 - 18) / 1.4 + .33); 00503 // compute confidence given scale (output is [0,1]) 00504 // gnuplot: set yrange[0:1];set xrange[0:3];plot (exp(-(x-1.5)*(x-1.5)/(2 * .2)) * 4 - 1)/2+.5 00505 T sconf = std::min((T) 1.0, (T) 00506 ((exp(-(s - scale_mgauss) * (s - scale_mgauss) 00507 / (2 * .2)) * 4 - 1)/2+.5)); 00508 // compute distance to center (the closer the higher the conf) 00509 // set xrange[-1:1];set yrange[-1:1];plot exp(-2*sqrt(x*x)) 00510 //T dconf = (T) (exp(-2 * sqrt((double) h*h + w*w))); 00511 // take minimum of all confs for final confidence 00512 // T final_conf = std::min(visconf, std::min(dconf, sconf)); 00513 T final_conf = std::min(visconf, sconf); 00514 EDEBUG("s: " << s << " h: " << h << " w: " << w << " sconf: " << sconf 00515 << " visconf: " << visconf 00516 // << " dconf: " << dconf 00517 << " final: " << final_conf); 00518 // update confidence target 00519 if (predict_conf) { // fill additional confidence feature 00520 if (predict_bconf) { // target conf is binary 00521 if (final_conf < .5) 00522 tgt.sset((T)0, pconf_offset); 00523 else 00524 tgt.sset((T)1, pconf_offset); 00525 } else // target conf is continuous 00526 tgt.sset(final_conf, pconf_offset); 00527 } else { // modulate positive object's target directly 00528 ds.fprop_label(label); // get positive offset 00529 tgt.sset(final_conf, (int)label.x.get()); 00530 } 00531 } 00532 }} 00533 } 00534 00535 template <typename T, typename Tds1, typename Tds2, class Tstate> 00536 void scalerclass_answer<T,Tds1,Tds2,Tstate>:: 00537 update_log(classifier_meter &log, intg age, idx<T> &energy, idx<T> &answer, 00538 idx<T> &label, idx<T> &target, idx<T> &rawout) { 00539 // regular classification logging 00540 class_answer<T,Tds1,Tds2,Tstate>::update_log(log, age, energy, answer, 00541 label, target, rawout); 00542 // additional logging 00543 if (log.log_fields.size() == 0) { // initialize variable strings 00544 log.log_values.clear(); 00545 for (uint i = 0; i < log_fields.size(); ++i) { 00546 log.log_fields.push_back(log_fields[i]); 00547 log.log_values.push_back(0.0); 00548 log.total_values.push_back(0.0); 00549 } 00550 } 00551 // determine confidence location 00552 uint jittoff = target.dim(0) - jsize; 00553 if (predict_conf) jittoff--; 00554 uint conf_off; 00555 if (predict_conf) 00556 conf_off = target.dim(0) - 1; 00557 else { 00558 idx<T> ctgt = target.narrow(0, jittoff, 0); 00559 conf_off = idx_indexmax(ctgt); 00560 } 00561 T conf_target = target.gget(conf_off); 00562 T s = target.gget(jittoff); 00563 // update localization values only if target conf is > .5 and positive 00564 if (conf_target > .5 && s > 0) { 00565 T herr = target.gget(jittoff + 1) - answer.gget(3); 00566 T werr = target.gget(jittoff + 2) - answer.gget(4); 00567 T spatial_err = (T) sqrt((double) herr * herr + werr * werr); // spatial 00568 T scale_err = (T) fabs((double) target.gget(jittoff) - answer.gget(2));; // scale 00569 log.log_values[0] += spatial_err; // spatial 00570 log.log_values[1] += scale_err; // scale 00571 log.log_values[2] += spatial_err + scale_err / 2; // localization 00572 log.log_values[4] += 1; // count samples 00573 // normalization 00574 log.total_values[0] = log.log_values[4]; // normalization total 00575 log.total_values[1] = log.log_values[4]; // normalization total 00576 log.total_values[2] = log.log_values[4]; // normalization total 00577 log.total_values[4] = 1; // normalization total 00578 } 00579 // update confidence value if positive example 00580 if (s > 0) { 00581 log.log_values[3] += fabs((double) rawout.gget(conf_off) - target.gget(conf_off)); 00582 log.log_values[5] += 1; // count samples 00583 // normalization 00584 log.total_values[3] = log.log_values[5]; // normalization total 00585 log.total_values[5] = 1; // normalization total 00586 } 00587 } 00588 00589 template <typename T, typename Tds1, typename Tds2, class Tstate> 00590 std::string scalerclass_answer<T,Tds1,Tds2,Tstate>::describe() { 00591 std::string s; 00592 s << "scalerclass_answer module " << this->name() << " with " 00593 << targets.dim(0) 00594 << " classes, confidence type " << (uint) conf_type << " and targets " 00595 << targets << ", jitter size " << jsize << " with offset " << joffset; 00596 if (apply_tanh) 00597 s << ", a tanh is applied to inputs"; 00598 if (predict_conf) 00599 s << ", predicting confidence"; 00600 s << ", target confidence is " << (predict_bconf?"binary":"continuous"); 00601 s << ", biases: "; 00602 if (biases) 00603 biases->printElems(s); 00604 else 00605 s <<"none"; 00606 s << ", coeffs: "; 00607 if (coeffs) 00608 coeffs->printElems(s); 00609 else 00610 s <<"none"; 00611 s << ". "; 00612 print_targets(targets); 00613 return s; 00614 } 00615 00617 // scaler_answer 00618 00619 template <typename T, typename Tds1, typename Tds2, class Tstate> 00620 scaler_answer<T,Tds1,Tds2,Tstate>:: 00621 scaler_answer(uint negative_id_, uint positive_id_, bool raw_confidence_, 00622 float threshold_, bool spatial_, const char *name_) 00623 : answer_module<T,Tds1,Tds2,Tstate>(spatial_ ? 3 : 1, name_), 00624 negative_id(negative_id_), positive_id(positive_id_), 00625 raw_confidence(raw_confidence_), jitter(1), threshold((T) threshold_), 00626 spatial(spatial_), jsize(answer_module<T,Tds1,Tds2,Tstate>::nfeatures) { 00627 } 00628 00629 template <typename T, typename Tds1, typename Tds2, class Tstate> 00630 scaler_answer<T,Tds1,Tds2,Tstate>::~scaler_answer() { 00631 } 00632 00633 template <typename T, typename Tds1, typename Tds2, class Tstate> 00634 void scaler_answer<T,Tds1,Tds2,Tstate>::fprop(Tstate &in, Tstate &out) { 00635 // only works for 3d bufs, TODO: make this generic 00636 idx_checkorder2(in.x, 3, out.x, 3); 00637 idx_checkdim1(in.x, 0, jsize); 00638 // resize out if necessary 00639 idxdim d(in.x); 00640 // 5 outputs per pixel: class,confidence,scale,h,w 00641 d.setdim(0, 2 + jsize); 00642 if (d != out.x.get_idxdim()) 00643 out.resize(d); 00644 // loop on features (dimension 0) to set answers 00645 uint classid; T conf; 00646 idx_eloop2(ii, in.x, T, oo, out.x, T) { 00647 idx_eloop2(iii, ii, T, ooo, oo, T) { 00648 // set class answer 00649 T i = iii.get(0); 00650 classid = ((i <= threshold) ? negative_id : positive_id); 00651 ooo.set((T) classid, 0); // set classid answer 00652 if (raw_confidence) { 00653 ooo.set(i, 1); // conf is simply the output 00654 ooo.set(i, 2); 00655 } else { // confidence is the position in the margin area 00656 conf = std::min((T)1, std::max((T)0, (T) ((i + 1) / 2))); 00657 if (classid == negative_id) { 00658 ooo.set((T) 1 - conf, 1); // conf 00659 ooo.set((T) 0, 2); 00660 if (spatial) { 00661 ooo.set((T) 0, 3); 00662 ooo.set((T) 0, 4); 00663 } 00664 //ooo.set((T) std::min((T) 1, std::max(0, -i - 1)), 1); // conf 00665 // ooo.set(std::max((T)0, -i), 1); // conf 00666 } else { 00667 ooo.set(conf, 1); // conf 00668 ooo.set(i, 2); // scale answer 00669 if (spatial) { 00670 ooo.set(iii.get(1), 3); // h answer 00671 ooo.set(iii.get(2), 4); // w answer 00672 } 00673 //ooo.set((T) std::min((T) 1, std::max((T) 0, i / 2 + 1)), 1); // conf 00674 // ooo.set(std::max((T) 0, i + 1), 1); // conf 00675 } 00676 } 00677 } 00678 } 00679 } 00680 00681 template <typename T, typename Tds1, typename Tds2, class Tstate> 00682 void scaler_answer<T,Tds1,Tds2,Tstate>:: 00683 fprop(labeled_datasource<T,Tds1,Tds2> &ds, Tstate &out){ 00684 // check output size 00685 idxdim d(out.x); 00686 d.setdim(0, jsize); 00687 if (out.x.get_idxdim() != d) 00688 out.resize(d); 00689 // set scale jitter as single output 00690 ds.fprop_jitter(jitter); 00691 idx_copy(jitter.x, out.x); // scale,h,w jitter 00692 } 00693 00694 template <typename T, typename Tds1, typename Tds2, class Tstate> 00695 std::string scaler_answer<T,Tds1,Tds2,Tstate>::describe() { 00696 std::string s; 00697 s << "scaler_answer module " << this->name() << " with negative id " 00698 << negative_id << " and positive id " << positive_id 00699 << ", confidence is the "; 00700 if (spatial) 00701 s << ", using spatial jitter"; 00702 if (raw_confidence) 00703 s << "raw output."; 00704 else 00705 s << "position in the margin area."; 00706 return s; 00707 } 00708 00710 // regression_answer 00711 00712 template <typename T, typename Tds1, typename Tds2, class Tstate> 00713 regression_answer<T,Tds1,Tds2,Tstate>:: 00714 regression_answer(uint nfeatures_, float64 threshold_, const char *name_) 00715 : answer_module<T,Tds1,Tds2,Tstate>(nfeatures_, name_), 00716 threshold(threshold_) { 00717 } 00718 00719 template <typename T, typename Tds1, typename Tds2, class Tstate> 00720 regression_answer<T,Tds1,Tds2,Tstate>::~regression_answer() { 00721 } 00722 00723 template <typename T, typename Tds1, typename Tds2, class Tstate> 00724 void regression_answer<T,Tds1,Tds2,Tstate>::fprop(Tstate &in, Tstate &out) { 00725 // resize out if necessary 00726 idxdim d(in.x); 00727 if (d != out.x.get_idxdim()) 00728 out.resize(d); 00729 idx_copy(in.x, out.x); 00730 } 00731 00732 template <typename T, typename Tds1, typename Tds2, class Tstate> 00733 void regression_answer<T,Tds1,Tds2,Tstate>:: 00734 fprop(labeled_datasource<T,Tds1,Tds2> &ds, Tstate &out) { 00735 ds.fprop_label_net(out); 00736 } 00737 00738 template <typename T, typename Tds1, typename Tds2, class Tstate> 00739 bool regression_answer<T,Tds1,Tds2,Tstate>:: 00740 correct(Tstate &answer, Tstate &label) { 00741 if (idx_l1(answer.x, label.x) <= threshold) 00742 return true; 00743 return false; 00744 } 00745 00746 template <typename T, typename Tds1, typename Tds2, class Tstate> 00747 void regression_answer<T,Tds1,Tds2,Tstate>:: 00748 update_log(classifier_meter &log, intg age, idx<T> &energy, idx<T> &answer, 00749 idx<T> &label, idx<T> &target, idx<T> &rawout) { 00750 log.update(age, (bool)(idx_l1(answer, label) <= threshold), 00751 (double) energy.gget()); 00752 } 00753 00754 template <typename T, typename Tds1, typename Tds2, class Tstate> 00755 std::string regression_answer<T,Tds1,Tds2,Tstate>::describe() { 00756 std::string s; 00757 s << "regression_answer module " << this->name() << " with threshold " 00758 << threshold << " for correctness classification"; 00759 return s; 00760 } 00761 00763 // vote_answer 00764 00765 template <typename T, typename Tds1, typename Tds2, class Tstate> 00766 vote_answer<T,Tds1,Tds2,Tstate>:: 00767 vote_answer(uint nclasses, double target_factor, bool binary_target_, 00768 t_confidence conf, bool apply_tanh_, const char *name_) 00769 : class_answer<T,Tds1,Tds2,Tstate>(nclasses, target_factor, binary_target_, 00770 conf, apply_tanh_, name_) { 00771 } 00772 00773 template <typename T, typename Tds1, typename Tds2, class Tstate> 00774 vote_answer<T,Tds1,Tds2,Tstate>::~vote_answer() { 00775 } 00776 00777 template <typename T, typename Tds1, typename Tds2, class Tstate> 00778 void vote_answer<T,Tds1,Tds2,Tstate>::fprop(Tstate &in, Tstate &out) { 00779 // check that in's dim 0 is a multiple of nfeatures 00780 if (in.x.dim(0) % this->nfeatures != 0) 00781 eblerror("expected number of features to be multiple of " 00782 << this->nfeatures); 00783 Tstate i; 00784 idx<T> confidences(this->nfeatures); 00785 idx_clear(confidences); 00786 // loop on all concatenated outputs 00787 uint off; 00788 // uint bestid = 0; 00789 T bestconf = 0; 00790 for (off = 0; off < in.x.dim(0); off += this->nfeatures) { 00791 i = in.narrow(0, this->nfeatures, off); 00792 class_answer<T,Tds1,Tds2,Tstate>::fprop(i, out); 00793 // accumulate confidence 00794 uint index = (uint) out.x.get(0); 00795 confidences.set(out.x.get(1) + confidences.get(index), index); 00796 cout << "id: " << index << " conf: " << out.x.get(1) << endl; 00797 if (bestconf < out.x.get(1)) { 00798 bestconf = out.x.get(1); 00799 //bestid = index; 00800 } 00801 } 00802 // take strongest confidence as vote 00803 intg id = idx_indexmax(confidences); 00804 T conf = confidences.get(id) / (T) (off / this->nfeatures); 00805 // out.x.sset(bestid, 0); // class id 00806 // out.x.sset(bestconf, 1); // confidence 00807 out.x.sset((T) id, 0); // class id 00808 out.x.sset(conf, 1); // confidence 00809 cout << "vote: id: " << id << " conf: " << conf << endl; 00810 } 00811 00813 // trainable_module 00814 00815 template <typename T, typename Tds1, typename Tds2, class Tin1, class Tin2, 00816 class Ten> 00817 trainable_module<T,Tds1,Tds2,Tin1,Tin2,Ten>:: 00818 trainable_module(ebm_2<Tin1,Tin2,Ten> &energy_, module_1_1<T,Tin1> &mod1_, 00819 module_1_1<T,Tin2> *mod2_, 00820 answer_module<T,Tds1,Tds2,Tin1> *dsmod1_, 00821 answer_module<T,Tds1,Tds2,Tin2> *dsmod2_, const char *name_, 00822 const char *switcher) 00823 : energy_mod(energy_), mod1(mod1_), mod2(mod2_), dsmod1(dsmod1_), 00824 dsmod2(dsmod2_), ms_switch(NULL) 00825 // TODO: fix hardcorded order 00826 // in1(1,1,1), out1(1,1,1), in2(1,1,1), out2(1,1,1), answers(1,1,1), 00827 // targets(1,1,1), mod_name(name_), tmp_energy(1,1,1) 00828 { 00829 // try to find switcher module in mod1 00830 if (switcher) { 00831 std::vector<ms_module<T,Tin1>*> all = arch_find_all(&mod1, ms_switch); 00832 for (uint i = 0; i < all.size(); ++i) 00833 if (!strcmp(all[i]->name(), switcher)) { 00834 ms_switch = all[i]; 00835 break ; 00836 } 00837 } 00838 } 00839 00840 template <typename T, typename Tds1, typename Tds2, class Tin1, class Tin2, 00841 class Ten> 00842 trainable_module<T,Tds1,Tds2,Tin1,Tin2,Ten>::~trainable_module() { 00843 } 00844 00845 template <typename T, typename Tds1, typename Tds2, class Tin1, class Tin2, 00846 class Ten> 00847 void trainable_module<T,Tds1,Tds2,Tin1,Tin2,Ten>::update_scale 00848 (labeled_datasource<T,Tds1,Tds2> &ds) { 00849 // update switch data 00850 if (ms_switch && ds.has_scales()) ms_switch->set_switch(ds.fprop_scale()); 00851 } 00852 00853 template <typename T, typename Tds1, typename Tds2, class Tin1, class Tin2, 00854 class Ten> 00855 void trainable_module<T,Tds1,Tds2,Tin1,Tin2,Ten>:: 00856 fprop(labeled_datasource<T,Tds1,Tds2> &ds, Ten &energy) { 00857 // flow 1 ////////////////////////////////////////////////////////////////// 00858 TIMING2("between end of fprop/bprop and sample retrieval"); 00859 if (ds.mstate_samples() || mod1.mstate_input()) {//input must be multi-state 00860 // produce state 1 00861 if (dsmod1) // specific data production 00862 dsmod1->fprop(ds, msin1); 00863 else // generic, simply take ds' input 1 00864 ds.fprop_data(msin1); 00865 TIMING2("sample retrieval"); 00866 // fprop flow 1 00867 update_scale(ds); 00868 mod1.fprop(msin1, out1); 00869 TIMING2("entire fprop"); 00870 } else { 00871 // produce state 1 00872 if (dsmod1) // specific data production 00873 dsmod1->fprop(ds, in1); 00874 else // generic, simply take ds' input 1 00875 ds.fprop_data(in1); 00876 TIMING2("sample retrieval"); 00877 // fprop flow 1 00878 mod1.fprop(in1, out1); 00879 TIMING2("entire fprop"); 00880 } 00881 00882 // flow 2 ////////////////////////////////////////////////////////////////// 00883 Tin2 *i2 = &in2; 00884 if (!mod2) // no main module in flow 2, put input directly into out2 00885 i2 = &out2; 00886 // produce state 2 00887 if (dsmod2) // specific data production 00888 dsmod2->fprop(ds, *i2); 00889 else // generic, simply take ds' input 2 00890 ds.fprop_label_net(*i2); 00891 // fprop flow 2 00892 if (mod2) 00893 mod2->fprop(*i2, out2); 00894 00895 // energy ////////////////////////////////////////////////////////////////// 00896 energy_mod.fprop(out1, out2, energy); 00897 } 00898 00899 template <typename T, typename Tds1, typename Tds2, class Tin1, class Tin2, 00900 class Ten> 00901 void trainable_module<T,Tds1,Tds2,Tin1,Tin2,Ten>:: 00902 bprop(labeled_datasource<T,Tds1,Tds2> &ds, Ten &energy) { 00903 TIMING2("until beginning of bprop"); 00904 // clear buffers 00905 out1.clear_dx(); 00906 out2.clear_dx(); 00907 // bprop 00908 energy_mod.bprop(out1, out2, energy); 00909 if (ds.mstate_samples() || mod1.mstate_input()) 00910 mod1.bprop(msin1, out1); 00911 else 00912 mod1.bprop(in1, out1); 00913 if (dsmod2) 00914 dsmod2->bprop(ds, out2); 00915 TIMING2("entire bprop"); 00916 } 00917 00918 template <typename T, typename Tds1, typename Tds2, class Tin1, class Tin2, 00919 class Ten> 00920 void trainable_module<T,Tds1,Tds2,Tin1,Tin2,Ten>:: 00921 bbprop(labeled_datasource<T,Tds1,Tds2> &ds, Ten &energy) { 00922 TIMING2("until beginning of bbprop"); 00923 // clear buffers 00924 out1.clear_ddx(); 00925 out2.clear_ddx(); 00926 // bbprop 00927 energy_mod.bbprop(out1, out2, energy); 00928 if (ds.mstate_samples() || mod1.mstate_input()) 00929 mod1.bbprop(msin1, out1); 00930 else 00931 mod1.bbprop(in1, out1); 00932 if (dsmod2) 00933 dsmod2->bbprop(ds, out2); 00934 TIMING2("entire bbprop"); 00935 } 00936 00937 template <typename T, typename Tds1, typename Tds2, class Tin1, class Tin2, 00938 class Ten> 00939 int trainable_module<T,Tds1,Tds2,Tin1,Tin2,Ten>:: 00940 infer2(labeled_datasource<T,Tds1,Tds2> &ds, Ten &energy) { 00941 eblerror("not implemented"); 00942 return 0; 00943 } 00944 00945 template <typename T, typename Tds1, typename Tds2, class Tin1, class Tin2, 00946 class Ten> 00947 void trainable_module<T,Tds1,Tds2,Tin1,Tin2,Ten>:: 00948 forget(forget_param_linear &fp) { 00949 mod1.forget(fp); 00950 if (mod2) 00951 mod2->forget(fp); 00952 if (dsmod2) 00953 dsmod2->forget(fp); 00954 } 00955 00956 template <typename T, typename Tds1, typename Tds2, class Tin1, class Tin2, 00957 class Ten> 00958 const Tin1& trainable_module<T,Tds1,Tds2,Tin1,Tin2,Ten>::compute_answers() { 00959 if (!dsmod2) 00960 eblerror("dsmod2 must be defined to compute answers"); 00961 dsmod2->fprop(out1, answers); 00962 return answers; 00963 } 00964 00965 template <typename T, typename Tds1, typename Tds2, class Tin1, class Tin2, 00966 class Ten> 00967 bool trainable_module<T,Tds1,Tds2,Tin1,Tin2,Ten>::correct(Tin1 &answer, 00968 Tin1 &label) { 00969 if (!dsmod2) 00970 eblerror("dsmod2 must be defined to compute correctness"); 00971 return dsmod2->correct(answer, label); 00972 } 00973 00974 template <typename T, typename Tds1, typename Tds2, class Tin1, class Tin2, 00975 class Ten> 00976 void trainable_module<T,Tds1,Tds2,Tin1,Tin2,Ten>:: 00977 update_log(classifier_meter &log, intg age, idx<T> &energy, idx<T> &answer, 00978 idx<T> &label, idx<T> &target, idx<T> &rawout) { 00979 if (!dsmod2) 00980 eblerror("dsmod2 must be defined to update log"); 00981 dsmod2->update_log(log, age, energy, answer, label, target, rawout); 00982 } 00983 00984 template <typename T, typename Tds1, typename Tds2, class Tin1, class Tin2, 00985 class Ten> 00986 void trainable_module<T,Tds1,Tds2,Tin1,Tin2,Ten>::compute_answers(Tin1 &ans) { 00987 if (!dsmod2) 00988 eblerror("dsmod2 must be defined to compute answers"); 00989 dsmod2->fprop(out1, ans); 00990 } 00991 00992 template <typename T, typename Tds1, typename Tds2, class Tin1, class Tin2, 00993 class Ten> 00994 idx<T> trainable_module<T,Tds1,Tds2,Tin1,Tin2,Ten>:: 00995 compute_targets(labeled_datasource<T,Tds1,Tds2> &ds) { 00996 if (!dsmod2) 00997 eblerror("dsmod2 must be defined to compute targets"); 00998 scalerclass_energy<T,Tin1> *sce = 00999 dynamic_cast<scalerclass_energy<T,Tin1>*>(&energy_mod); 01000 if (sce) 01001 targets.x = sce->last_target_raw; 01002 else 01003 dsmod2->fprop(ds, targets); 01004 return targets.x; 01005 } 01006 01007 template <typename T, typename Tds1, typename Tds2, class Tin1, class Tin2, 01008 class Ten> 01009 const char *trainable_module<T,Tds1,Tds2,Tin1,Tin2,Ten>::name() { 01010 return mod_name.c_str(); 01011 } 01012 01013 template <typename T, typename Tds1, typename Tds2, class Tin1, class Tin2, 01014 class Ten> 01015 std::string trainable_module<T,Tds1,Tds2,Tin1,Tin2,Ten>::describe() { 01016 std::string s; 01017 s << "trainer module " << this->name() << ": " << energy_mod.describe(); 01018 if (dsmod2) 01019 s << ", " << dsmod2->describe(); 01020 return s; 01021 } 01022 01023 // utility functions ///////////////////////////////////////////////////////// 01024 01025 template <typename T> 01026 void print_targets(idx<T> &targets) { 01027 cout << "Targets: " << targets << endl; 01028 if (targets.nelements() < 500) 01029 cout << targets.str() << endl; 01030 } 01031 01032 } // end namespace ebl 01033 01034 #endif /*EBL_ANSWER_HPP_*/