libidx
/home/rex/ebltrunk/core/libidx/include/ipp.h
00001 /***************************************************************************
00002  *   Copyright (C) 2012 by Pierre Sermanet *
00003  *   pierre.sermanet@gmail.com *
00004  *   All rights reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions are met:
00008  *     * Redistributions of source code must retain the above copyright
00009  *       notice, this list of conditions and the following disclaimer.
00010  *     * Redistributions in binary form must reproduce the above copyright
00011  *       notice, this list of conditions and the following disclaimer in the
00012  *       documentation and/or other materials provided with the distribution.
00013  *     * Redistribution under a license not approved by the Open Source
00014  *       Initiative (http://www.opensource.org) must display the
00015  *       following acknowledgement in all advertising material:
00016  *        This product includes software developed at the Courant
00017  *        Institute of Mathematical Sciences (http://cims.nyu.edu).
00018  *     * The names of the authors may not be used to endorse or promote products
00019  *       derived from this software without specific prior written permission.
00020  *
00021  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
00022  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
00023  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
00024  * DISCLAIMED. IN NO EVENT SHALL ThE AUTHORS BE LIABLE FOR ANY
00025  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
00026  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
00027  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
00028  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00029  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00030  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00031  ***************************************************************************/
00032 
00033 #ifndef IPP_H
00034 #define IPP_H
00035 
00036 #include "config.h"
00037 #include "numerics.h"
00038 #include "idx.h"
00039 #include "ippops.h"
00040 
00041 #ifdef __IPP__
00042 
00043 namespace ebl {
00044 
00045   // idx_copy //////////////////////////////////////////////////////////////////
00046 
00048   template <> void idx_copy(const idx<ubyte> &src, idx<ubyte> &dst);
00050   template <> void idx_copy(const idx<uint16> &src, idx<uint16> &dst);
00052   template <> void idx_copy(const idx<int16> &src, idx<int16> &dst);
00054   template <> void idx_copy(const idx<int32> &src, idx<int32> &dst);
00055 
00056   // idx_clear /////////////////////////////////////////////////////////////////
00057 
00059   template<> void idx_clear(idx<ubyte> &inp);
00061   template<> void idx_clear(idx<uint16> &inp);
00063   template<> void idx_clear(idx<int16> &inp);
00065   template<> void idx_clear(idx<float32> &inp);
00066 
00067   // idx_fill //////////////////////////////////////////////////////////////////
00068 
00070   template<> void idx_fill(idx<ubyte> &inp, ubyte v);
00072   template<> void idx_fill(idx<uint16> &inp, uint16 v);
00074   template<> void idx_fill(idx<int16> &inp, int16 v);
00076   template<> void idx_fill(idx<float32> &inp, float32 v);
00077 
00078   // idx_add (not-in-place) ////////////////////////////////////////////////////
00079 
00081   template<> void idx_add(idx<ubyte> &i1, idx<ubyte> &i2, idx<ubyte> &out);
00083   template<> void idx_add(idx<uint16> &i1, idx<uint16> &i2, idx<uint16> &out);
00085   template<> void idx_add(idx<int16> &i1, idx<int16> &i2, idx<int16> &out);
00087   template<> void idx_add(idx<float32> &i1, idx<float32> &i2,
00088                           idx<float32> &out);
00089 
00090   // idx_add (in-place) ////////////////////////////////////////////////////////
00091 
00093   template<> void idx_add(idx<ubyte> &in, idx<ubyte> &out);
00095   template<> void idx_add(idx<uint16> &in, idx<uint16> &out);
00097   template<> void idx_add(idx<int16> &in, idx<int16> &out);
00098 
00099   // idx_addc //////////////////////////////////////////////////////////////////
00100   // TODO: add inplace addc, which can be used for speed up in
00101   // bias modules for example where output is the input
00102 
00104   template<> void idx_addc(idx<ubyte> &inp, ubyte c, idx<ubyte> &out);
00106   template<> void idx_addc(idx<uint16> &inp, uint16 c, idx<uint16> &out);
00108   template<> void idx_addc(idx<int16> &inp, int16 c, idx<int16> &out);
00110   template<> void idx_addc(idx<float32> &inp, float32 c, idx<float32> &out);
00111 
00112   // idx_addc_bounded //////////////////////////////////////////////////////////
00113 
00115   // specialized ubyte version
00116   template<> void idx_addc_bounded(idx<ubyte> &inp, ubyte c, idx<ubyte> &out);
00118   // specialized uint16 version
00119   template<> void idx_addc_bounded(idx<uint16> &inp, uint16 c,
00120                                    idx<uint16> &out);
00122   // specialized int16 version
00123   template<> void idx_addc_bounded(idx<int16> &inp, int16 c, idx<int16> &out);
00125   // specialized float32 version
00126   template<> void idx_addc_bounded(idx<float32> &inp, float32 c,
00127                                    idx<float32> &out);
00128 
00129   // idx_sub (not-in-place) ////////////////////////////////////////////////////
00130 
00132   template<> void idx_sub(idx<ubyte> &i1, idx<ubyte> &i2, idx<ubyte> &out);
00134   template<> void idx_sub(idx<uint16> &i1, idx<uint16> &i2, idx<uint16> &out);
00136   template<> void idx_sub(idx<int16> &i1, idx<int16> &i2, idx<int16> &out);
00138   template<> void idx_sub(idx<float32> &i1, idx<float32> &i2,
00139                           idx<float32> &out);
00140 
00141   // idx_subc_bounded //////////////////////////////////////////////////////////
00142 
00144   // specialized ubyte version
00145   template<> void idx_subc_bounded(idx<ubyte> &inp, ubyte c, idx<ubyte> &out);
00147   // specialized uint16 version
00148   template<>
00149   void idx_subc_bounded(idx<uint16> &inp, uint16 c, idx<uint16> &out);
00151   // specialized int16 version
00152   template<> void idx_subc_bounded(idx<int16> &inp, int16 c, idx<int16> &out);
00154   // specialized float32 version
00155   template<>
00156   void idx_subc_bounded(idx<float32> &inp, float32 c, idx<float32> &out);
00157 
00158   // idx_minus /////////////////////////////////////////////////////////////////
00159 
00161   template<> void idx_minus(idx<int16> &in, idx<int16> &out);
00163   template<> void idx_minus(idx<float32> &in, idx<float32> &out);
00164 
00165   // idx_minus_acc /////////////////////////////////////////////////////////////
00166 
00168   template<> void idx_minus_acc(idx<int16> &in, idx<int16> &out);
00170   template<> void idx_minus_acc(idx<float32> &in, idx<float32> &out);
00171 
00172   // idx_mul (not-in-place) ////////////////////////////////////////////////////
00173 
00175   template<> void idx_mul(idx<ubyte> &i1, idx<ubyte> &i2, idx<ubyte> &out);
00177   template<> void idx_mul(idx<uint16> &i1, idx<uint16> &i2, idx<uint16> &out);
00179   template<> void idx_mul(idx<int16> &i1, idx<int16> &i2, idx<int16> &out);
00181   template<> void idx_mul(idx<float32> &i1, idx<float32> &i2,
00182                           idx<float32> &out);
00183 
00184   // idx_dotc //////////////////////////////////////////////////////////////////
00185 
00187   template<> void idx_dotc(idx<ubyte> &inp, ubyte c, idx<ubyte> &out);
00189   template<> void idx_dotc(idx<uint16> &inp, uint16 c, idx<uint16> &out);
00191   template<> void idx_dotc(idx<int16> &inp, int16 c, idx<int16> &out);
00193   template<> void idx_dotc(idx<float32> &inp, float32 c, idx<float32> &out);
00194 
00195   // idx_dotc_bounded //////////////////////////////////////////////////////////
00196 
00198   // specialized ubyte version
00199   template<> void idx_dotc_bounded(idx<ubyte> &inp, ubyte c, idx<ubyte> &out);
00201   // specialized uint16 version
00202   template<>
00203   void idx_dotc_bounded(idx<uint16> &inp, uint16 c, idx<uint16> &out);
00205   // specialized int16 version
00206   template<> void idx_dotc_bounded(idx<int16> &inp, int16 c, idx<int16> &out);
00208   // specialized float32 version
00209   template<>
00210   void idx_dotc_bounded(idx<float32> &inp, float32 c, idx<float32> &out);
00211 
00212   // idx_div ///////////////////////////////////////////////////////////////////
00213 
00215   template<> void idx_div(idx<ubyte> &i1, idx<ubyte> &i2, idx<ubyte> &out);
00217   template<> void idx_div(idx<uint16> &i1, idx<uint16> &i2, idx<uint16> &out);
00219   template<> void idx_div(idx<int16> &i1, idx<int16> &i2, idx<int16> &out);
00221   template<> void idx_div(idx<float32> &i1, idx<float32> &i2,
00222                           idx<float32> &out);
00223   
00224   // idx_inv ///////////////////////////////////////////////////////////////////
00225 
00228   template<> void idx_inv(idx<float32> &inp, idx<float32> &out);
00229 
00230   // idx_abs ///////////////////////////////////////////////////////////////////
00231 
00233   template<> void idx_abs(idx<int16>& inp, idx<int16>& out);
00235   template<> void idx_abs(idx<float32>& inp, idx<float32>& out);
00236 
00237   // idx_threshold (in-place) //////////////////////////////////////////////////
00238 
00240   template<> void idx_threshold(idx<ubyte>& in, ubyte th);
00242   template<> void idx_threshold(idx<uint16>& in, uint16 th);
00244   template<> void idx_threshold(idx<int16>& in, int16 th);
00246   template<> void idx_threshold(idx<float32>& in, float32 th);
00247 
00248   // idx_threshold (not-in-place) //////////////////////////////////////////////
00249   
00252   template<> void idx_threshold(idx<ubyte>& in, ubyte th, idx<ubyte>& out);
00255   template<> void idx_threshold(idx<uint16>& in, uint16 th, idx<uint16>& out);
00258   template<> void idx_threshold(idx<int16>& in, int16 th, idx<int16>& out);
00261   template<> void idx_threshold(idx<float32>& in, float32 th,idx<float32>& out);
00262 
00263   // idx_threshold (with value, in-place) //////////////////////////////////////
00264   
00266   template<> void idx_threshold(idx<ubyte>& in, ubyte th, ubyte value);
00268   template<> void idx_threshold(idx<uint16>& in, uint16 th, uint16 value);
00270   template<> void idx_threshold(idx<int16>& in, int16 th, int16 value);
00272   template<> void idx_threshold(idx<float32>& in, float32 th, float32 value);
00273 
00274   // idx_threshold (with value, not-in-place) //////////////////////////////////
00275 
00278   template<>
00279   void idx_threshold(idx<ubyte>& in, ubyte th, ubyte value, idx<ubyte>& out);
00282   template<>
00283   void idx_threshold(idx<uint16>& in, uint16 th, uint16 value,idx<uint16>& out);
00286   template<>
00287   void idx_threshold(idx<int16>& in, int16 th, int16 value, idx<int16>& out);  
00290   template<>
00291   void idx_threshold(idx<float32>& in, float32 th,
00292                      float32 value, idx<float32>& out);
00293  
00294   // idx_sqrt //////////////////////////////////////////////////////////////////
00295   
00297   template<> void idx_sqrt(idx<ubyte>& inp, idx<ubyte>& out);
00299   template<> void idx_sqrt(idx<uint16>& inp, idx<uint16>& out);
00301   template<> void idx_sqrt(idx<int16>& inp, idx<int16>& out);
00303   template<> void idx_sqrt(idx<float32>& inp, idx<float32>& out);
00304 
00305   // idx_exp ///////////////////////////////////////////////////////////////////
00306 
00308   template<> EXPORT void idx_exp(idx<ubyte>& inp);
00310   template<> EXPORT void idx_exp(idx<uint16>& inp);
00312   template<> void idx_exp(idx<int16>& inp);
00314   //template<> void idx_exp(idx<float32>& inp);
00315 
00316   // idx_sum ///////////////////////////////////////////////////////////////////
00317 
00318 #if 0 //TODO
00319 
00321   template<> ubyte idx_sum(idx<ubyte> &inp, ubyte *out);
00323   template<> uint16 idx_sum(idx<uint16> &inp, uint16 *out);
00325   template<> int16 idx_sum(idx<int16> &inp, int16 *out);  
00326 
00327 #if defined(__OPENMP__) and defined(__USE_SSE__)
00328 
00329   template<> float32 idx_sum(idx<float32> &inp, float32 *out);
00330 #endif
00331 
00332 #endif
00333 
00334   // idx_sumabs ////////////////////////////////////////////////////////////////
00335 
00337   template<> float64 idx_sumabs(idx<ubyte> &inp, ubyte *out);
00339   template<> float64 idx_sumabs(idx<uint16> &inp, uint16 *out);
00341   template<> float64 idx_sumabs(idx<int16> &inp, int16 *out);
00343   template<> float64 idx_sumabs(idx<float32> &inp, float32 *out);
00344 
00345   // idx_l2norm ////////////////////////////////////////////////////////////////
00346 
00348   template<> float64 idx_l2norm(idx<ubyte> &in);
00350   template<> float64 idx_l2norm(idx<uint16> &in);
00352   template<> float64 idx_l2norm(idx<int16> &in);
00354   template<> float64 idx_l2norm(idx<float32> &in);
00355 
00356   // idx_mean //////////////////////////////////////////////////////////////////
00357 
00359   template<> ubyte idx_mean(idx<ubyte> &in, ubyte* out);
00361   template<> uint16 idx_mean(idx<uint16> &in, uint16* out);
00363   template<> int16 idx_mean(idx<int16> &in, int16* out);
00365   template<> float32 idx_mean(idx<float32> &in, float32* out);
00366 
00367   // idx_std_normalize /////////////////////////////////////////////////////////
00368 
00372   template<>
00373   EXPORT void idx_std_normalize(idx<float32> &in, idx<float32> &out,
00374                                 float32 *mean);
00375   
00376   // idx_dot ///////////////////////////////////////////////////////////////////
00377 
00378 #if defined(__CBLAS__) || (defined(__IPP__) && defined(__IPP_DOT__))
00379 
00380   EXPORT float64 idx_dot(idx<float32> &i1, idx<float32> &i2);
00381 #endif
00382 
00383 #if defined(__IPP__) && defined(__IPP_DOT__)
00384 
00385   template<> float64 idx_dot(idx<ubyte> &i1, idx<ubyte> &i2);
00387   template<> float64 idx_dot(idx<byte> &i1, idx<byte> &i2);
00389   template<> float64 idx_dot(idx<uint16> &i1, idx<uint16> &i2);
00391   template<> float64 idx_dot(idx<int16> &i1, idx<int16> &i2);
00393   template<> float64 idx_dot(idx<uint32> &i1, idx<uint32> &i2);
00395   template<> float64 idx_dot(idx<int32> &i1, idx<int32> &i2);
00396 #endif
00397 
00398   // m2dotm1 ///////////////////////////////////////////////////////////////////
00399 
00401   template <>
00402     void idx_m2dotm1(idx<float32> &a, idx<float32> &x, idx<float32> &y);
00403 
00404   // idx_max ///////////////////////////////////////////////////////////////////
00405 
00407   template<> ubyte idx_max(idx<ubyte> &m);
00409   template<> uint16 idx_max(idx<uint16> &m);
00411   template<> int16 idx_max(idx<int16> &m);
00413   template<> float32 idx_max(idx<float32> &m);
00414 
00415   // idx_max between two idx's (in-place) //////////////////////////////////////
00416 
00419   template<> void idx_max(idx<ubyte> &in1, idx<ubyte> &in2);
00422   template<> void idx_max(idx<uint16> &in1, idx<uint16> &in2);
00425   template<> void idx_max(idx<int16> &in1, idx<int16> &in2);
00428   template<> void idx_max(idx<float32> &in1, idx<float32> &in2);
00429 
00430   // idx_max between two idx's (not-in-place) //////////////////////////////////
00431 
00434   template<> void idx_max(idx<ubyte> &in1, idx<ubyte> &in2, idx<ubyte> &out);
00437   template<> void idx_max(idx<uint16> &in1, idx<uint16> &in2, idx<uint16> &out);
00440   template<> void idx_max(idx<int16> &in1, idx<int16> &in2, idx<int16> &out);
00443   template<>
00444   void idx_max(idx<float32> &in1, idx<float32> &in2, idx<float32> &out);
00445 
00446   // idx_min ///////////////////////////////////////////////////////////////////
00447 
00449   template<> ubyte idx_min(idx<ubyte> &m);
00451   template<> uint16 idx_min(idx<uint16> &m);
00453   template<> int16 idx_min(idx<int16> &m);
00455   template<> float32 idx_min(idx<float32> &m);
00456 
00457   // idx_indexmax //////////////////////////////////////////////////////////////
00458 
00460   template<> intg idx_indexmax(idx<ubyte> &m);
00462   template<> intg idx_indexmax(idx<uint16> &m);
00464   template<> intg idx_indexmax(idx<int16> &m);
00466   template<> intg idx_indexmax(idx<float32> &m);
00467 
00468   // idx_indexmin //////////////////////////////////////////////////////////////
00469 
00471   template<> intg idx_indexmin(idx<ubyte> &m);
00473   template<> intg idx_indexmin(idx<uint16> &m);
00475   template<> intg idx_indexmin(idx<int16> &m);
00477   template<> intg idx_indexmin(idx<float32> &m);
00478 
00479   // idx_sqrdist ///////////////////////////////////////////////////////////////
00480 
00482   template<> float64 idx_sqrdist(idx<ubyte> &i1, idx<ubyte> &i2);
00484   template<> float64 idx_sqrdist(idx<uint16> &i1, idx<uint16> &i2);
00486   template<> float64 idx_sqrdist(idx<int16> &i1, idx<int16> &i2);
00488   template<> float64 idx_sqrdist(idx<float32> &i1, idx<float32> &i2);
00489 
00490   // idx_sqrdist (with idx out) ////////////////////////////////////////////////
00491 
00493   template<>
00494   void idx_sqrdist(idx<ubyte> &i1, idx<ubyte> &i2, idx<ubyte> &out);
00496   template<>
00497   void idx_sqrdist(idx<uint16> &i1, idx<uint16> &i2, idx<uint16> &out);
00499   template<>
00500   void idx_sqrdist(idx<int16> &i1, idx<int16> &i2, idx<int16> &out);
00502   template<>
00503   void idx_sqrdist(idx<float32> &i1, idx<float32> &i2, idx<float32> &out);
00504   
00505 } // end namespace ebl
00506 
00507 #include "idxops_ipp.hpp"
00508 
00509 #endif /* ifdef __IPP__ */
00510 
00511 #endif /* define IPP_H */