libidx
|
00001 /*************************************************************************** 00002 * Copyright (C) 2012 by Pierre Sermanet * 00003 * pierre.sermanet@gmail.com * 00004 * All rights reserved. 00005 * 00006 * Redistribution and use in source and binary forms, with or without 00007 * modification, are permitted provided that the following conditions are met: 00008 * * Redistributions of source code must retain the above copyright 00009 * notice, this list of conditions and the following disclaimer. 00010 * * Redistributions in binary form must reproduce the above copyright 00011 * notice, this list of conditions and the following disclaimer in the 00012 * documentation and/or other materials provided with the distribution. 00013 * * Redistribution under a license not approved by the Open Source 00014 * Initiative (http://www.opensource.org) must display the 00015 * following acknowledgement in all advertising material: 00016 * This product includes software developed at the Courant 00017 * Institute of Mathematical Sciences (http://cims.nyu.edu). 00018 * * The names of the authors may not be used to endorse or promote products 00019 * derived from this software without specific prior written permission. 00020 * 00021 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED 00022 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 00023 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 00024 * DISCLAIMED. IN NO EVENT SHALL ThE AUTHORS BE LIABLE FOR ANY 00025 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 00026 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 00027 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 00028 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00029 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 00030 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00031 ***************************************************************************/ 00032 00033 #ifndef IPP_H 00034 #define IPP_H 00035 00036 #include "config.h" 00037 #include "numerics.h" 00038 #include "idx.h" 00039 #include "ippops.h" 00040 00041 #ifdef __IPP__ 00042 00043 namespace ebl { 00044 00045 // idx_copy ////////////////////////////////////////////////////////////////// 00046 00048 template <> void idx_copy(const idx<ubyte> &src, idx<ubyte> &dst); 00050 template <> void idx_copy(const idx<uint16> &src, idx<uint16> &dst); 00052 template <> void idx_copy(const idx<int16> &src, idx<int16> &dst); 00054 template <> void idx_copy(const idx<int32> &src, idx<int32> &dst); 00055 00056 // idx_clear ///////////////////////////////////////////////////////////////// 00057 00059 template<> void idx_clear(idx<ubyte> &inp); 00061 template<> void idx_clear(idx<uint16> &inp); 00063 template<> void idx_clear(idx<int16> &inp); 00065 template<> void idx_clear(idx<float32> &inp); 00066 00067 // idx_fill ////////////////////////////////////////////////////////////////// 00068 00070 template<> void idx_fill(idx<ubyte> &inp, ubyte v); 00072 template<> void idx_fill(idx<uint16> &inp, uint16 v); 00074 template<> void idx_fill(idx<int16> &inp, int16 v); 00076 template<> void idx_fill(idx<float32> &inp, float32 v); 00077 00078 // idx_add (not-in-place) //////////////////////////////////////////////////// 00079 00081 template<> void idx_add(idx<ubyte> &i1, idx<ubyte> &i2, idx<ubyte> &out); 00083 template<> void idx_add(idx<uint16> &i1, idx<uint16> &i2, idx<uint16> &out); 00085 template<> void idx_add(idx<int16> &i1, idx<int16> &i2, idx<int16> &out); 00087 template<> void idx_add(idx<float32> &i1, idx<float32> &i2, 00088 idx<float32> &out); 00089 00090 // idx_add (in-place) //////////////////////////////////////////////////////// 00091 00093 template<> void idx_add(idx<ubyte> &in, idx<ubyte> &out); 00095 template<> void idx_add(idx<uint16> &in, idx<uint16> &out); 00097 template<> void idx_add(idx<int16> &in, idx<int16> &out); 00098 00099 // idx_addc ////////////////////////////////////////////////////////////////// 00100 // TODO: add inplace addc, which can be used for speed up in 00101 // bias modules for example where output is the input 00102 00104 template<> void idx_addc(idx<ubyte> &inp, ubyte c, idx<ubyte> &out); 00106 template<> void idx_addc(idx<uint16> &inp, uint16 c, idx<uint16> &out); 00108 template<> void idx_addc(idx<int16> &inp, int16 c, idx<int16> &out); 00110 template<> void idx_addc(idx<float32> &inp, float32 c, idx<float32> &out); 00111 00112 // idx_addc_bounded ////////////////////////////////////////////////////////// 00113 00115 // specialized ubyte version 00116 template<> void idx_addc_bounded(idx<ubyte> &inp, ubyte c, idx<ubyte> &out); 00118 // specialized uint16 version 00119 template<> void idx_addc_bounded(idx<uint16> &inp, uint16 c, 00120 idx<uint16> &out); 00122 // specialized int16 version 00123 template<> void idx_addc_bounded(idx<int16> &inp, int16 c, idx<int16> &out); 00125 // specialized float32 version 00126 template<> void idx_addc_bounded(idx<float32> &inp, float32 c, 00127 idx<float32> &out); 00128 00129 // idx_sub (not-in-place) //////////////////////////////////////////////////// 00130 00132 template<> void idx_sub(idx<ubyte> &i1, idx<ubyte> &i2, idx<ubyte> &out); 00134 template<> void idx_sub(idx<uint16> &i1, idx<uint16> &i2, idx<uint16> &out); 00136 template<> void idx_sub(idx<int16> &i1, idx<int16> &i2, idx<int16> &out); 00138 template<> void idx_sub(idx<float32> &i1, idx<float32> &i2, 00139 idx<float32> &out); 00140 00141 // idx_subc_bounded ////////////////////////////////////////////////////////// 00142 00144 // specialized ubyte version 00145 template<> void idx_subc_bounded(idx<ubyte> &inp, ubyte c, idx<ubyte> &out); 00147 // specialized uint16 version 00148 template<> 00149 void idx_subc_bounded(idx<uint16> &inp, uint16 c, idx<uint16> &out); 00151 // specialized int16 version 00152 template<> void idx_subc_bounded(idx<int16> &inp, int16 c, idx<int16> &out); 00154 // specialized float32 version 00155 template<> 00156 void idx_subc_bounded(idx<float32> &inp, float32 c, idx<float32> &out); 00157 00158 // idx_minus ///////////////////////////////////////////////////////////////// 00159 00161 template<> void idx_minus(idx<int16> &in, idx<int16> &out); 00163 template<> void idx_minus(idx<float32> &in, idx<float32> &out); 00164 00165 // idx_minus_acc ///////////////////////////////////////////////////////////// 00166 00168 template<> void idx_minus_acc(idx<int16> &in, idx<int16> &out); 00170 template<> void idx_minus_acc(idx<float32> &in, idx<float32> &out); 00171 00172 // idx_mul (not-in-place) //////////////////////////////////////////////////// 00173 00175 template<> void idx_mul(idx<ubyte> &i1, idx<ubyte> &i2, idx<ubyte> &out); 00177 template<> void idx_mul(idx<uint16> &i1, idx<uint16> &i2, idx<uint16> &out); 00179 template<> void idx_mul(idx<int16> &i1, idx<int16> &i2, idx<int16> &out); 00181 template<> void idx_mul(idx<float32> &i1, idx<float32> &i2, 00182 idx<float32> &out); 00183 00184 // idx_dotc ////////////////////////////////////////////////////////////////// 00185 00187 template<> void idx_dotc(idx<ubyte> &inp, ubyte c, idx<ubyte> &out); 00189 template<> void idx_dotc(idx<uint16> &inp, uint16 c, idx<uint16> &out); 00191 template<> void idx_dotc(idx<int16> &inp, int16 c, idx<int16> &out); 00193 template<> void idx_dotc(idx<float32> &inp, float32 c, idx<float32> &out); 00194 00195 // idx_dotc_bounded ////////////////////////////////////////////////////////// 00196 00198 // specialized ubyte version 00199 template<> void idx_dotc_bounded(idx<ubyte> &inp, ubyte c, idx<ubyte> &out); 00201 // specialized uint16 version 00202 template<> 00203 void idx_dotc_bounded(idx<uint16> &inp, uint16 c, idx<uint16> &out); 00205 // specialized int16 version 00206 template<> void idx_dotc_bounded(idx<int16> &inp, int16 c, idx<int16> &out); 00208 // specialized float32 version 00209 template<> 00210 void idx_dotc_bounded(idx<float32> &inp, float32 c, idx<float32> &out); 00211 00212 // idx_div /////////////////////////////////////////////////////////////////// 00213 00215 template<> void idx_div(idx<ubyte> &i1, idx<ubyte> &i2, idx<ubyte> &out); 00217 template<> void idx_div(idx<uint16> &i1, idx<uint16> &i2, idx<uint16> &out); 00219 template<> void idx_div(idx<int16> &i1, idx<int16> &i2, idx<int16> &out); 00221 template<> void idx_div(idx<float32> &i1, idx<float32> &i2, 00222 idx<float32> &out); 00223 00224 // idx_inv /////////////////////////////////////////////////////////////////// 00225 00228 template<> void idx_inv(idx<float32> &inp, idx<float32> &out); 00229 00230 // idx_abs /////////////////////////////////////////////////////////////////// 00231 00233 template<> void idx_abs(idx<int16>& inp, idx<int16>& out); 00235 template<> void idx_abs(idx<float32>& inp, idx<float32>& out); 00236 00237 // idx_threshold (in-place) ////////////////////////////////////////////////// 00238 00240 template<> void idx_threshold(idx<ubyte>& in, ubyte th); 00242 template<> void idx_threshold(idx<uint16>& in, uint16 th); 00244 template<> void idx_threshold(idx<int16>& in, int16 th); 00246 template<> void idx_threshold(idx<float32>& in, float32 th); 00247 00248 // idx_threshold (not-in-place) ////////////////////////////////////////////// 00249 00252 template<> void idx_threshold(idx<ubyte>& in, ubyte th, idx<ubyte>& out); 00255 template<> void idx_threshold(idx<uint16>& in, uint16 th, idx<uint16>& out); 00258 template<> void idx_threshold(idx<int16>& in, int16 th, idx<int16>& out); 00261 template<> void idx_threshold(idx<float32>& in, float32 th,idx<float32>& out); 00262 00263 // idx_threshold (with value, in-place) ////////////////////////////////////// 00264 00266 template<> void idx_threshold(idx<ubyte>& in, ubyte th, ubyte value); 00268 template<> void idx_threshold(idx<uint16>& in, uint16 th, uint16 value); 00270 template<> void idx_threshold(idx<int16>& in, int16 th, int16 value); 00272 template<> void idx_threshold(idx<float32>& in, float32 th, float32 value); 00273 00274 // idx_threshold (with value, not-in-place) ////////////////////////////////// 00275 00278 template<> 00279 void idx_threshold(idx<ubyte>& in, ubyte th, ubyte value, idx<ubyte>& out); 00282 template<> 00283 void idx_threshold(idx<uint16>& in, uint16 th, uint16 value,idx<uint16>& out); 00286 template<> 00287 void idx_threshold(idx<int16>& in, int16 th, int16 value, idx<int16>& out); 00290 template<> 00291 void idx_threshold(idx<float32>& in, float32 th, 00292 float32 value, idx<float32>& out); 00293 00294 // idx_sqrt ////////////////////////////////////////////////////////////////// 00295 00297 template<> void idx_sqrt(idx<ubyte>& inp, idx<ubyte>& out); 00299 template<> void idx_sqrt(idx<uint16>& inp, idx<uint16>& out); 00301 template<> void idx_sqrt(idx<int16>& inp, idx<int16>& out); 00303 template<> void idx_sqrt(idx<float32>& inp, idx<float32>& out); 00304 00305 // idx_exp /////////////////////////////////////////////////////////////////// 00306 00308 template<> EXPORT void idx_exp(idx<ubyte>& inp); 00310 template<> EXPORT void idx_exp(idx<uint16>& inp); 00312 template<> void idx_exp(idx<int16>& inp); 00314 //template<> void idx_exp(idx<float32>& inp); 00315 00316 // idx_sum /////////////////////////////////////////////////////////////////// 00317 00318 #if 0 //TODO 00319 00321 template<> ubyte idx_sum(idx<ubyte> &inp, ubyte *out); 00323 template<> uint16 idx_sum(idx<uint16> &inp, uint16 *out); 00325 template<> int16 idx_sum(idx<int16> &inp, int16 *out); 00326 00327 #if defined(__OPENMP__) and defined(__USE_SSE__) 00328 00329 template<> float32 idx_sum(idx<float32> &inp, float32 *out); 00330 #endif 00331 00332 #endif 00333 00334 // idx_sumabs //////////////////////////////////////////////////////////////// 00335 00337 template<> float64 idx_sumabs(idx<ubyte> &inp, ubyte *out); 00339 template<> float64 idx_sumabs(idx<uint16> &inp, uint16 *out); 00341 template<> float64 idx_sumabs(idx<int16> &inp, int16 *out); 00343 template<> float64 idx_sumabs(idx<float32> &inp, float32 *out); 00344 00345 // idx_l2norm //////////////////////////////////////////////////////////////// 00346 00348 template<> float64 idx_l2norm(idx<ubyte> &in); 00350 template<> float64 idx_l2norm(idx<uint16> &in); 00352 template<> float64 idx_l2norm(idx<int16> &in); 00354 template<> float64 idx_l2norm(idx<float32> &in); 00355 00356 // idx_mean ////////////////////////////////////////////////////////////////// 00357 00359 template<> ubyte idx_mean(idx<ubyte> &in, ubyte* out); 00361 template<> uint16 idx_mean(idx<uint16> &in, uint16* out); 00363 template<> int16 idx_mean(idx<int16> &in, int16* out); 00365 template<> float32 idx_mean(idx<float32> &in, float32* out); 00366 00367 // idx_std_normalize ///////////////////////////////////////////////////////// 00368 00372 template<> 00373 EXPORT void idx_std_normalize(idx<float32> &in, idx<float32> &out, 00374 float32 *mean); 00375 00376 // idx_dot /////////////////////////////////////////////////////////////////// 00377 00378 #if defined(__CBLAS__) || (defined(__IPP__) && defined(__IPP_DOT__)) 00379 00380 EXPORT float64 idx_dot(idx<float32> &i1, idx<float32> &i2); 00381 #endif 00382 00383 #if defined(__IPP__) && defined(__IPP_DOT__) 00384 00385 template<> float64 idx_dot(idx<ubyte> &i1, idx<ubyte> &i2); 00387 template<> float64 idx_dot(idx<byte> &i1, idx<byte> &i2); 00389 template<> float64 idx_dot(idx<uint16> &i1, idx<uint16> &i2); 00391 template<> float64 idx_dot(idx<int16> &i1, idx<int16> &i2); 00393 template<> float64 idx_dot(idx<uint32> &i1, idx<uint32> &i2); 00395 template<> float64 idx_dot(idx<int32> &i1, idx<int32> &i2); 00396 #endif 00397 00398 // m2dotm1 /////////////////////////////////////////////////////////////////// 00399 00401 template <> 00402 void idx_m2dotm1(idx<float32> &a, idx<float32> &x, idx<float32> &y); 00403 00404 // idx_max /////////////////////////////////////////////////////////////////// 00405 00407 template<> ubyte idx_max(idx<ubyte> &m); 00409 template<> uint16 idx_max(idx<uint16> &m); 00411 template<> int16 idx_max(idx<int16> &m); 00413 template<> float32 idx_max(idx<float32> &m); 00414 00415 // idx_max between two idx's (in-place) ////////////////////////////////////// 00416 00419 template<> void idx_max(idx<ubyte> &in1, idx<ubyte> &in2); 00422 template<> void idx_max(idx<uint16> &in1, idx<uint16> &in2); 00425 template<> void idx_max(idx<int16> &in1, idx<int16> &in2); 00428 template<> void idx_max(idx<float32> &in1, idx<float32> &in2); 00429 00430 // idx_max between two idx's (not-in-place) ////////////////////////////////// 00431 00434 template<> void idx_max(idx<ubyte> &in1, idx<ubyte> &in2, idx<ubyte> &out); 00437 template<> void idx_max(idx<uint16> &in1, idx<uint16> &in2, idx<uint16> &out); 00440 template<> void idx_max(idx<int16> &in1, idx<int16> &in2, idx<int16> &out); 00443 template<> 00444 void idx_max(idx<float32> &in1, idx<float32> &in2, idx<float32> &out); 00445 00446 // idx_min /////////////////////////////////////////////////////////////////// 00447 00449 template<> ubyte idx_min(idx<ubyte> &m); 00451 template<> uint16 idx_min(idx<uint16> &m); 00453 template<> int16 idx_min(idx<int16> &m); 00455 template<> float32 idx_min(idx<float32> &m); 00456 00457 // idx_indexmax ////////////////////////////////////////////////////////////// 00458 00460 template<> intg idx_indexmax(idx<ubyte> &m); 00462 template<> intg idx_indexmax(idx<uint16> &m); 00464 template<> intg idx_indexmax(idx<int16> &m); 00466 template<> intg idx_indexmax(idx<float32> &m); 00467 00468 // idx_indexmin ////////////////////////////////////////////////////////////// 00469 00471 template<> intg idx_indexmin(idx<ubyte> &m); 00473 template<> intg idx_indexmin(idx<uint16> &m); 00475 template<> intg idx_indexmin(idx<int16> &m); 00477 template<> intg idx_indexmin(idx<float32> &m); 00478 00479 // idx_sqrdist /////////////////////////////////////////////////////////////// 00480 00482 template<> float64 idx_sqrdist(idx<ubyte> &i1, idx<ubyte> &i2); 00484 template<> float64 idx_sqrdist(idx<uint16> &i1, idx<uint16> &i2); 00486 template<> float64 idx_sqrdist(idx<int16> &i1, idx<int16> &i2); 00488 template<> float64 idx_sqrdist(idx<float32> &i1, idx<float32> &i2); 00489 00490 // idx_sqrdist (with idx out) //////////////////////////////////////////////// 00491 00493 template<> 00494 void idx_sqrdist(idx<ubyte> &i1, idx<ubyte> &i2, idx<ubyte> &out); 00496 template<> 00497 void idx_sqrdist(idx<uint16> &i1, idx<uint16> &i2, idx<uint16> &out); 00499 template<> 00500 void idx_sqrdist(idx<int16> &i1, idx<int16> &i2, idx<int16> &out); 00502 template<> 00503 void idx_sqrdist(idx<float32> &i1, idx<float32> &i2, idx<float32> &out); 00504 00505 } // end namespace ebl 00506 00507 #include "idxops_ipp.hpp" 00508 00509 #endif /* ifdef __IPP__ */ 00510 00511 #endif /* define IPP_H */