libidx
/home/rex/ebltrunk/core/libidx/include/ippops.hpp
00001 /***************************************************************************
00002  *   Copyright (C) 2010 by Pierre Sermanet *
00003  *   pierre.sermanet@gmail.com *
00004  *   All rights reserved.
00005  *
00006  * Redistribution and use in source and binary forms, with or without
00007  * modification, are permitted provided that the following conditions are met:
00008  *     * Redistributions of source code must retain the above copyright
00009  *       notice, this list of conditions and the following disclaimer.
00010  *     * Redistributions in binary form must reproduce the above copyright
00011  *       notice, this list of conditions and the following disclaimer in the
00012  *       documentation and/or other materials provided with the distribution.
00013  *     * Redistribution under a license not approved by the Open Source
00014  *       Initiative (http://www.opensource.org) must display the
00015  *       following acknowledgement in all advertising material:
00016  *        This product includes software developed at the Courant
00017  *        Institute of Mathematical Sciences (http://cims.nyu.edu).
00018  *     * The names of the authors may not be used to endorse or promote products
00019  *       derived from this software without specific prior written permission.
00020  *
00021  * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
00022  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
00023  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
00024  * DISCLAIMED. IN NO EVENT SHALL ThE AUTHORS BE LIABLE FOR ANY
00025  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
00026  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
00027  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
00028  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00029  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
00030  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00031  ***************************************************************************/
00032 
00033 #ifndef IPPOPS_HPP_
00034 #define IPPOPS_HPP_
00035 
00036 #include "numerics.h"
00037 #include "stl.h"
00038 
00039 namespace ebl {
00040 
00042   // init
00043 
00044   void ipp_init(int ncores) {
00045 #ifdef __IPP__
00046     if (ncores > 0)
00047       ippSetNumThreads(ncores);
00048     ippGetNumThreads(&ncores);
00049     std::cout << "Using Intel IPP with " << ncores << " core(s)." << std::endl;
00050 #else
00051     std::cout << "Not using Intel IPP." << std::endl;
00052 #endif
00053   }
00054 
00055 #ifdef __IPP__
00056 
00058   // ipp checks
00059 
00060 #ifdef __IPP_CHECKS__
00061   template <typename T>
00062   inline void ipp_checks1(const idx<T> &in) {
00063     idx_check_contiguous1(in);
00064     if (in.nelements() > INT_MAX)
00065       eblerror("TODO: Cannot use long with IPP.");
00066   }
00067 
00068   template <typename T>
00069   inline void ipp_checks2(const idx<T> &in1, const idx<T> &in2) {
00070     idx_checknelems2_all(in1, in2);
00071     idx_check_contiguous2(in1, in2);
00072     if (in1.nelements() > INT_MAX)
00073       eblerror("TODO: Cannot use long with IPP.");
00074   }
00075   
00076   template <typename T>
00077   inline void ipp_checks3(const idx<T> &in1, const idx<T> &in2,
00078                           const idx<T> &in3) {
00079     idx_checknelems3_all(in1, in2, in3);
00080     idx_check_contiguous3(in1, in2, in3);
00081     if (in1.nelements() > INT_MAX)
00082       eblerror("TODO: Cannot use long with IPP.");
00083   }
00084 #else
00085   #define ipp_checks1(a)
00086   #define ipp_checks2(a, b)
00087   #define ipp_checks3(a, b, c)
00088 #endif
00089 
00091   // ipp functions templates without specialization
00092 
00093   template <typename T>
00094   void ipp_convolution(const idx<T> &in, const idx<T> &ker, idx<T> &out) {
00095     eblerror("ipp_convolution : type not available for IPP. Available types are ubyte, int16, float32");
00096   }
00097 
00098   template <typename T>
00099   void ipp_copy(const idx<T> &in, idx<T> &out) {
00100     eblerror("ipp_copy : type not available for IPP. Available types are ubyte, uint16, int16, int32, float32");
00101   }
00102 
00103   template <typename T>
00104   void ipp_clear(idx<T> &inp) {
00105     eblerror("ipp_clear : type not available for IPP. Available types are ubyte, uint16, int16, int32, float32");
00106   }
00107 
00108   template <typename T>
00109   void ipp_fill(idx<T> &inp) {
00110     eblerror("ipp_fill : type not available for IPP. Available types are ubyte, uint16, int16, int32, float32");
00111   }
00112 
00113   template <typename T>
00114   void ipp_minus(const idx<T> &inp, idx<T> &out) {
00115     ipp_checks2(inp, out);
00116     ipp_clear(out);
00117     ipp_sub(out, inp);
00118   }
00119 
00120   template <typename T>
00121   void ipp_add(const idx<T> &in, idx<T> &out) {
00122     eblerror("ipp_add in-place : type not available for IPP. Available types are ubyte, uint16, int16, float32");
00123   }
00124 
00125   template <typename T>
00126   void ipp_add(const idx<T> &in, const idx<T> &in2, idx<T> &out) {
00127     eblerror("ipp_add not-in-place : type not available for IPP. Available types are ubyte, uint16, int16, float32");
00128   }
00129 
00130   template <typename T>
00131   void ipp_addc(idx<T> &in, T c) {
00132     eblerror("ipp_addc in-place : type not available for IPP. Available types are ubyte, uint16, int16, float32");
00133   }
00134 
00135   template <typename T>
00136   void ipp_addc(const idx<T> &in, T c, idx<T> &out) {
00137     eblerror("ipp_addc not-in-place : type not available for IPP. Available types are ubyte, uint16, int16, float32");
00138   }
00139 
00140   template <typename T>
00141   void ipp_subc(idx<T> &in, T c) {
00142     eblerror("ipp_subc in-place : type not available for IPP. Available types are ubyte, uint16, int16, float32");
00143   }
00144 
00145   template <typename T>
00146   void ipp_subc(const idx<T> &in, T c, idx<T> &out) {
00147     eblerror("ipp_subc not-in-place : type not available for IPP. Available types are ubyte, uint16, int16, float32");
00148   }
00149 
00150   template <typename T>
00151   void ipp_sub(idx<T> &in1, const idx<T> &in2) {
00152     eblerror("ipp_sub in-place : type not available for IPP. Available types are ubyte, uint16, int16, float32");
00153   }
00154 
00155   template <typename T>
00156   void ipp_sub(const idx<T> &in1, const idx<T> &in2, idx<T> &out) {
00157     eblerror("ipp_sub not-in-place : type not available for IPP. Available types are ubyte, uint16, int16, float32");
00158   }
00159 
00160   template <typename T>
00161   void ipp_mul(const idx<T> &in1, idx<T> &in2) {
00162     eblerror("ipp_mul in-place : type not available for IPP. Available types are ubyte, uint16, int16, float32");
00163   }
00164 
00165   template <typename T>
00166   void ipp_mul(const idx<T> &in1, const idx<T> &in2, idx<T> &out) {
00167     eblerror("ipp_mul not-in-place : type not available for IPP. Available types are ubyte, uint16, int16, float32");
00168   }
00169 
00170   template <typename T>
00171   void ipp_m2dotm1(const idx<T> &in1, const idx<T> &in2, idx<T> &out) {
00172     eblerror("ipp_mul not-in-place : type not available for IPP. Available types are float32 and float64");
00173   }
00174 
00175   template <typename T>
00176   void ipp_m2dotm2(const idx<T> &in1, const idx<T> &in2, idx<T> &out) {
00177     eblerror("ipp_mul not-in-place : type not available for IPP. Available types are float32 and float 64");
00178   }
00179 
00180   template <typename T>
00181   void ipp_dotc(idx<T> &in1, T c) {
00182     eblerror("ipp_dotc in-place : type not available for IPP. Available types are ubyte, uint16, int16, float32");
00183   }
00184 
00185   template <typename T>
00186   void ipp_dotc(const idx<T> &in1, T c, idx<T> &out) {
00187     eblerror("ipp_mul not-in-place : type not available for IPP. Available types are ubyte, uint16, int16, float32");
00188   }
00189 
00190   template <typename T>
00191   void ipp_div(idx<T> &in1, const idx<T> &in2) {
00192     eblerror("ipp_div in-place : type not available for IPP. Available types are ubyte, uint16, int16, float32");
00193   }
00194 
00195   template <typename T>
00196   void ipp_div(const idx<T> &in1, const idx<T> &in2, idx<T> &out) {
00197     eblerror("ipp_div not-in-place : type not available for IPP. Available types are ubyte, uint16, int16, float32");
00198   }
00199 
00200   template <typename T>
00201   void ipp_abs(idx<T> &inp) {
00202     eblerror("ipp_abs in-place : type not available for IPP. Available types are int16, float32");
00203   }
00204 
00205   template <typename T>
00206   void ipp_abs(const idx<T> &in, idx<T> &out) {
00207     eblerror("ipp_abs not-in-place : type not available for IPP. Available types are int16, float32");
00208   }
00209 
00210   template <typename T>
00211   void ipp_sqrt(idx<T> &inp) {
00212     eblerror("ipp_sqrt in-place : type not available for IPP. Available types are ubyte, uint16, int16, float32");
00213   }
00214 
00215   template <typename T>
00216   void ipp_sqrt(const idx<T> &in, idx<T> &out) {
00217     eblerror("ipp_sqrt not-in-place : type not available for IPP. Available types are ubyte, uint16, int16, float32");
00218   }
00219 
00220   template <typename T>
00221   void ipp_exp(idx<T> &inp) {
00222     eblerror("ipp_exp in-place : type not available for IPP. Available types are ubyte, uint16, int16, float32");
00223   }
00224 
00225   template <typename T>
00226   void ipp_exp(const idx<T> &in, idx<T> &out) {
00227     eblerror("ipp_exp not-in-place : type not available for IPP. Available types are ubyte, uint16, int16, float32");
00228   }
00229 
00230   template <typename T>
00231   float64 ipp_sum(const idx<T> &inp) {
00232     eblerror("ipp_sum : type not available for IPP. Available types are ubyte, uint16, int16, float32");
00233   }
00234 
00235   template <typename T>
00236   float64 ipp_sumacc(const idx<T> &in, idx<T> &acc) {
00237     if (acc.order() != 0)
00238       eblerror("ipp_sumacc : acc must have order 0");
00239     float64 sum = ipp_sum(in) + (float64)acc.get();
00240     acc.set(saturate(sum, T));
00241     return sum;
00242   }
00243 
00244   template <typename T>
00245   float64 ipp_l2norm(const idx<T> &in) {
00246     eblerror("ipp_l3norm : type not available for IPP. Available types are ubyte, uint16, int16, float32");
00247   }
00248 
00249   template <typename T>
00250   float64 ipp_mean(const idx<T> &in) {
00251     eblerror("ipp_mean : type not available for IPP. Available types are ubyte, uint16, int16, float32");
00252   }
00253 
00254   template <typename T>
00255   void ipp_std_normalize(const idx<T> &in, idx<T> &out, T* mean) {
00256     eblerror("ipp_std_normalize : type not available for IPP. Available types are ubyte, float32");
00257   }
00258 
00259 #ifdef __IPP_DOT__
00260   template <typename T>
00261   float64 ipp_dot(const idx<T> &in1, const idx<T> &in2) {
00262     eblerror("ipp_dot : type not available for IPP. Available types are ubyte, byte, uint16, int16, uint32, int32, float32");
00263   }
00264 
00265   template <typename T>
00266   float64 ipp_dotacc(const idx<T> &in1, const idx<T> &in2,
00267                      idx<T> &acc) {
00268     if (acc.order() != 0)
00269       eblerror("ipp_sumacc : acc must have order 0");
00270     float64 sum = ipp_dot(in1, in2) + (float64)acc.get();
00271     acc.set(saturate(sum, T));
00272     return sum;
00273   }
00274 #endif
00275 
00276   template <typename T>
00277   T ipp_max(const idx<T> &inp) {
00278     eblerror("ipp_max : type not available for IPP. Available types are ubyte, uint16, int16, float32");
00279   }
00280 
00281   template <typename T>
00282   intg ipp_indexmax(const idx<T> &inp) {
00283     eblerror("ipp_indexmax : type not available for IPP. Available types are ubyte, uint16, int16, float32");
00284   }
00285 
00286   template <typename T>
00287   T ipp_min(const idx<T> &inp) {
00288     eblerror("ipp_min : type not available for IPP. Available types are ubyte, uint16, int16, float32");
00289   }
00290 
00291   template <typename T>
00292   intg ipp_indexmin(const idx<T> &inp) {
00293     eblerror("ipp_indexmin : type not available for IPP. Available types are ubyte, uint16, int16, float32");
00294   }
00295 
00296   template <typename T>
00297   void ipp_maxevery(const idx<T> &in1, idx<T> &in2) {
00298     eblerror("ipp_maxevery : type not available for IPP. Available types are ubyte, uint16, int16, float32");
00299   }
00300 
00301   template <typename T>
00302   void ipp_maxevery(const idx<T> &in1, const idx<T> &in2, idx<T> &out) {
00303     ipp_copy(in2, out);
00304     ipp_maxevery(in1, out);
00305   }
00306 
00307   template <typename T>
00308   float64 ipp_sqrdist(const idx<T> &i1, const idx<T> &i2) {
00309     eblerror("ipp_sqrdist : type not available for IPP. Available types are ubyte, uint16, int16, float32");
00310   }
00311 
00312   template <typename T>
00313   float64 ipp_sqrdist(const idx<T> &in1, const idx<T> &in2, idx<T> &out) {
00314     if (out.order() != 0)
00315       eblerror("ipp_sqrdist : out must have order 0");
00316     float64 ret = ipp_sqrdist(in1, in2);
00317     out.set(saturate(ret, T));
00318     return ret;
00319   }
00320 
00321   template <typename T>
00322   void ipp_threshold_lt(idx<T> &in, T th) {
00323     eblerror("ipp_threshold_lt (in-place): type not available for IPP. Available types are ubyte, uint16, int16, float32");
00324   }
00325 
00326   template <typename T>
00327   void ipp_threshold_lt(const idx<T> &in, T th, idx<T> &out) {
00328     eblerror("ipp_threshold_lt (not-in-place): type not available for IPP. Available types are ubyte, uint16, int16, float32");
00329   }
00330 
00331   template <typename T>
00332   void ipp_threshold_gt(idx<T> &in, T th) {
00333     eblerror("ipp_threshold_gt (in-place): type not available for IPP. Available types are ubyte, uint16, int16, float32");
00334   }
00335 
00336   template <typename T>
00337   void ipp_threshold_gt(const idx<T> &in, T th, idx<T> &out) {
00338     eblerror("ipp_threshold_gt (not-in-place): type not available for IPP. Available types are ubyte, uint16, int16, float32");
00339   }
00340 
00342   // ipp implementations
00343 
00344   // templates for ipp_convolution
00345   // TODO: handle non contiguous?
00346   
00347   //TODO : remove in release ?
00348   template <typename T>
00349   inline void ipp_convolution_check_size (const idx<T> &in, const idx<T> &ker,
00350                                           idx<T> &out) {
00351     ipp_checks1(in);
00352     ipp_checks1(ker);
00353     ipp_checks1(out);
00354     if ((in.dim(0) > INT_MAX) || (in.dim(1) > INT_MAX) ||
00355         (ker.dim(0) > INT_MAX) || (ker.dim(1) > INT_MAX) ||
00356         (in.mod(0) > INT_MAX) || (ker.mod(0) > INT_MAX) ||
00357         (out.mod(0) > INT_MAX))
00358       eblerror("TODO: Cannot use long with IPP.");
00359   }
00360     
00361   //   templates
00362   template <>
00363   void ipp_convolution(const idx<ubyte> &in, const idx<ubyte> &ker,
00364                       idx<ubyte> &out) {
00365     ipp_convolution_check_size(in, ker, out);
00366     IppiSize insize, kersize;
00367     insize.height = in.dim(0);
00368     insize.width = in.dim(1);
00369     kersize.height = ker.dim(0);
00370     kersize.width = ker.dim(1);
00371     ippiConvValid_8u_C1R(in.idx_ptr(),
00372                          sizeof(ubyte) * (int)in.mod(0), //instep
00373                          insize, ker.idx_ptr(),
00374                          sizeof(ubyte) * (int)ker.mod(0), //kerstep
00375                                 kersize, out.idx_ptr(),
00376                          sizeof(ubyte) * (int)out.mod(0), //outstep
00377                          1);
00378   }
00379 
00380   template <>
00381   void ipp_convolution(const idx<int16> &in, const idx<int16> &ker,
00382                       idx<int16> &out) {
00383     ipp_convolution_check_size(in, ker, out);
00384     IppiSize insize, kersize;
00385     insize.height = in.dim(0);
00386     insize.width = in.dim(1);
00387     kersize.height = ker.dim(0);
00388     kersize.width = ker.dim(1);
00389     ippiConvValid_16s_C1R(in.idx_ptr(),
00390                           sizeof(int16) * (int)in.mod(0), //instep
00391                           insize, ker.idx_ptr(),
00392                           sizeof(int16) * (int)ker.mod(0), //kerstep
00393                           kersize, out.idx_ptr(),
00394                           sizeof(int16) * (int)out.mod(0), //outstep
00395                           1);
00396   }
00397 
00398   template <>
00399   void ipp_convolution(const idx<float32> &in, const idx<float32> &ker,
00400                       idx<float32> &out) {
00401     ipp_convolution_check_size(in, ker, out);
00402     IppiSize insize, kersize;
00403     insize.height = in.dim(0);
00404     insize.width = in.dim(1);
00405     kersize.height = ker.dim(0);
00406     kersize.width = ker.dim(1);
00407     ippiConvValid_32f_C1R(in.idx_ptr(),
00408                           sizeof(float32) * (int)in.mod(0), //instep
00409                           insize, ker.idx_ptr(),
00410                           sizeof(float32) * (int)ker.mod(0), //kerstep
00411                           kersize, out.idx_ptr(),
00412                           sizeof(float32) * (int)out.mod(0)); //outstep
00413   }
00414 
00415   // templates for ipp_copy
00416 
00417   template <>
00418   void ipp_copy(const idx<ubyte> &in, idx<ubyte> &out) {
00419     ipp_checks2(in, out);
00420     IppiSize    insize;
00421     insize.height = in.nelements();
00422     insize.width = 1; 
00423     ippiCopy_8u_C1R(in.idx_ptr(),  sizeof(ubyte),
00424                     out.idx_ptr(), sizeof(ubyte),
00425                     insize);
00426   }
00427 
00428   template <>
00429   void ipp_copy(const idx<uint16> &in, idx<uint16> &out) {
00430     ipp_checks2(in, out);
00431     IppiSize    insize;
00432     insize.height = in.nelements();
00433     insize.width = 1; 
00434     ippiCopy_16u_C1R(in.idx_ptr(),  sizeof(uint16),
00435                      out.idx_ptr(), sizeof(uint16),
00436                      insize);
00437   }
00438 
00439   template <>
00440   void ipp_copy(const idx<int16> &in, idx<int16> &out) {
00441     ipp_checks2(in, out);
00442     IppiSize    insize;
00443     insize.height = in.nelements();
00444     insize.width = 1; 
00445     ippiCopy_16s_C1R(in.idx_ptr(),  sizeof(int16),
00446                      out.idx_ptr(), sizeof(int16),
00447                      insize);
00448   }
00449 
00450   template <>
00451   void ipp_copy(const idx<int32> &in, idx<int32> &out) {
00452     ipp_checks2(in, out);
00453     IppiSize    insize;
00454     insize.height = in.nelements();
00455     insize.width = 1; 
00456     ippiCopy_32s_C1R(in.idx_ptr(),  sizeof(int32),
00457                      out.idx_ptr(), sizeof(int32),
00458                      insize);
00459   }
00460 
00461 
00462   template <>
00463   void ipp_copy(const idx<float32> &in, idx<float32> &out) {
00464     ipp_checks2(in, out);
00465     IppiSize    insize;
00466     insize.height = in.nelements();
00467     insize.width = 1; 
00468     ippiCopy_32f_C1R(in.idx_ptr(),  sizeof(float32),
00469                      out.idx_ptr(), sizeof(float32),
00470                      insize);
00471   }
00472 
00473   // templates for ipp_clear
00474 
00475   template <>
00476   void ipp_clear(idx<ubyte> &inp) {
00477     ipp_checks1(inp);
00478     IppiSize insize;
00479     insize.height = inp.nelements();
00480     insize.width = 1;
00481     ippiSet_8u_C1R(0, inp.idx_ptr(), sizeof(ubyte), insize);
00482   }
00483 
00484   template <>
00485   void ipp_clear(idx<uint16> &inp) {
00486     ipp_checks1(inp);
00487     IppiSize insize;
00488     insize.height = inp.nelements();
00489     insize.width = 1;
00490     ippiSet_16u_C1R(0, inp.idx_ptr(), sizeof(uint16), insize);
00491   }
00492 
00493   template <>
00494   void ipp_clear(idx<int16> &inp) {
00495     ipp_checks1(inp);
00496     IppiSize insize;
00497     insize.height = inp.nelements();
00498     insize.width = 1;
00499     ippiSet_16s_C1R(0, inp.idx_ptr(), sizeof(int16), insize);
00500   }
00501 
00502   template <>
00503   void ipp_clear(idx<int32> &inp) {
00504     ipp_checks1(inp);
00505     IppiSize insize;
00506     insize.height = inp.nelements();
00507     insize.width = 1;
00508     ippiSet_32s_C1R(0, inp.idx_ptr(), sizeof(int32), insize);
00509   }
00510 
00511   template <>
00512   void ipp_clear(idx<float32> &inp) {
00513     ipp_checks1(inp);
00514     IppiSize insize;
00515     insize.height = inp.nelements();
00516     insize.width = 1;
00517     ippiSet_32f_C1R(0, inp.idx_ptr(), sizeof(float32), insize);
00518   }
00519 
00520   // templates for ipp_fill
00521 
00522   template <>
00523   void ipp_fill(idx<ubyte> &inp, ubyte v) {
00524     ipp_checks1(inp);
00525     IppiSize insize;
00526     insize.height = inp.nelements();
00527     insize.width = 1;
00528     ippiSet_8u_C1R(v, inp.idx_ptr(), sizeof(ubyte), insize);
00529   }
00530 
00531   template <>
00532   void ipp_fill(idx<uint16> &inp, uint16 v) {
00533     ipp_checks1(inp);
00534     IppiSize insize;
00535     insize.height = inp.nelements();
00536     insize.width = 1;
00537     ippiSet_16u_C1R(v, inp.idx_ptr(), sizeof(uint16), insize);
00538   }
00539 
00540   template <>
00541   void ipp_fill(idx<int16> &inp, int16 v) {
00542     ipp_checks1(inp);
00543     IppiSize insize;
00544     insize.height = inp.nelements();
00545     insize.width = 1;
00546     ippiSet_16s_C1R(v, inp.idx_ptr(), sizeof(int16), insize);
00547   }
00548 
00549   template <>
00550   void ipp_fill(idx<int32> &inp, int32 v) {
00551     ipp_checks1(inp);
00552     IppiSize insize;
00553     insize.height = inp.nelements();
00554     insize.width = 1;
00555     ippiSet_32s_C1R(v, inp.idx_ptr(), sizeof(int32), insize);
00556   }
00557 
00558   template <>
00559   void ipp_fill(idx<float32> &inp, float32 v) {
00560     ipp_checks1(inp);
00561     IppiSize insize;
00562     insize.height = inp.nelements();
00563     insize.width = 1;
00564     ippiSet_32f_C1R(v, inp.idx_ptr(), sizeof(float32), insize);
00565   }
00566 
00567   // templates for in-place ipp_add
00568 
00569   template <>
00570   void ipp_add(const idx<float32> &in1, idx<float32> &in2) {
00571     ipp_checks2(in1, in2);
00572     IppiSize insize;
00573     insize.height = in1.nelements();
00574     insize.width = 1;
00575     ippiAdd_32f_C1IR(in1.idx_ptr(), sizeof(float32),
00576                      in2.idx_ptr(), sizeof(float32),
00577                      insize);
00578   }
00579 
00580   template <>
00581   void ipp_add(const idx<ubyte> &in1, idx<ubyte> &in2) {
00582     ipp_checks2(in1, in2);
00583     IppiSize insize;
00584     insize.height = in1.nelements();
00585     insize.width = 1;
00586     ippiAdd_8u_C1IRSfs(in1.idx_ptr(), sizeof(ubyte),
00587                        in2.idx_ptr(), sizeof(ubyte),
00588                        insize, 0);
00589   }
00590 
00591   template <>
00592   void ipp_add(const idx<uint16> &in1, idx<uint16> &in2) {
00593     ipp_checks2(in1, in2);
00594     IppiSize insize;
00595     insize.height = in1.nelements();
00596     insize.width = 1;
00597     ippiAdd_16u_C1IRSfs(in1.idx_ptr(), sizeof(uint16),
00598                         in2.idx_ptr(), sizeof(uint16),
00599                         insize, 0);
00600   }
00601 
00602   template <>
00603   void ipp_add(const idx<int16> &in1, idx<int16> &in2) {
00604     ipp_checks2(in1, in2);
00605     IppiSize insize;
00606     insize.height = in1.nelements();
00607     insize.width = 1;
00608     ippiAdd_16s_C1IRSfs(in1.idx_ptr(), sizeof(int16),
00609                         in2.idx_ptr(), sizeof(int16),
00610                         insize, 0);
00611   }
00612 
00613 
00614   // templates for not in place ipp_add
00615 
00616   template <>
00617   void ipp_add(const idx<float32> &in1, const idx<float32> &in2,
00618               idx<float32> &out) {
00619     ipp_checks3(in1, in2, out);
00620 
00621     IppiSize insize;
00622     insize.height = in1.nelements();
00623     insize.width = 1;
00624     ippiAdd_32f_C1R(in1.idx_ptr(), sizeof(float32),
00625                     in2.idx_ptr(), sizeof(float32),
00626                     out.idx_ptr(), sizeof(float32),
00627                     insize);
00628   }
00629 
00630   template <>
00631   void ipp_add(const idx<ubyte> &in1, const idx<ubyte> &in2,
00632               idx<ubyte> &out) {
00633     ipp_checks3(in1, in2, out);
00634 
00635     IppiSize insize;
00636     insize.height = in1.nelements();
00637     insize.width = 1;
00638     ippiAdd_8u_C1RSfs(in1.idx_ptr(), sizeof(ubyte),
00639                       in2.idx_ptr(), sizeof(ubyte),
00640                       out.idx_ptr(), sizeof(ubyte),
00641                       insize, 0);
00642   }
00643 
00644   template <>
00645   void ipp_add(const idx<uint16> &in1, const idx<uint16> &in2,
00646               idx<uint16> &out) {
00647     ipp_checks3(in1, in2, out);
00648 
00649     IppiSize insize;
00650     insize.height = in1.nelements();
00651     insize.width = 1;
00652     ippiAdd_16u_C1RSfs(in1.idx_ptr(), sizeof(uint16),
00653                        in2.idx_ptr(), sizeof(uint16),
00654                        out.idx_ptr(), sizeof(uint16),
00655                        insize, 0);
00656   }
00657 
00658   template <>
00659   void ipp_add(const idx<int16> &in1, const idx<int16> &in2,
00660               idx<int16> &out) {
00661     ipp_checks3(in1, in2, out);
00662 
00663     IppiSize insize;
00664     insize.height = in1.nelements();
00665     insize.width = 1;
00666     ippiAdd_16s_C1RSfs(in1.idx_ptr(), sizeof(int16),
00667                        in2.idx_ptr(), sizeof(int16),
00668                        out.idx_ptr(), sizeof(int16),
00669                        insize, 0);
00670   }
00671 
00672   // templates for in-place ipp_addc
00673 
00674   template <>
00675   void ipp_addc(idx<float32> &in, float32 c) {
00676     ipp_checks1(in);
00677     IppiSize    insize;
00678     insize.height = in.nelements();
00679     insize.width = 1; 
00680     ippiAddC_32f_C1IR(c, in.idx_ptr(), sizeof(float32), insize);
00681   }
00682 
00683   template <>
00684   void ipp_addc(idx<ubyte> &in, ubyte c) {
00685     ipp_checks1(in);
00686     IppiSize    insize;
00687     insize.height = in.nelements();
00688     insize.width = 1; 
00689     ippiAddC_8u_C1IRSfs(c, in.idx_ptr(), sizeof(ubyte), insize, 0);
00690   }
00691 
00692   template <>
00693   void ipp_addc(idx<int16> &in, int16 c) {
00694     ipp_checks1(in);
00695     IppiSize    insize;
00696     insize.height = in.nelements();
00697     insize.width = 1; 
00698     ippiAddC_16s_C1IRSfs(c, in.idx_ptr(), sizeof(int16), insize, 0);
00699   }
00700 
00701   template <>
00702   void ipp_addc(idx<uint16> &in, uint16 c) {
00703     ipp_checks1(in);
00704     IppiSize    insize;
00705     insize.height = in.nelements();
00706     insize.width = 1; 
00707     ippiAddC_16u_C1IRSfs(c, in.idx_ptr(), sizeof(uint16), insize, 0);
00708   }
00709 
00710   // templates for not-in-place addc
00711 
00712   template <>
00713   void ipp_addc(const idx<float32> &in, float32 c, idx<float32> &out) {
00714     ipp_checks2(in, out);
00715     IppiSize    insize;
00716     insize.height = in.nelements();
00717     insize.width = 1; 
00718     ippiAddC_32f_C1R(in.idx_ptr(), sizeof(float32), c,
00719                      out.idx_ptr(), sizeof(float32), insize);
00720   }
00721 
00722   template <>
00723   void ipp_addc(const idx<ubyte> &in, ubyte c, idx<ubyte> &out) {
00724     ipp_checks2(in, out);
00725     IppiSize    insize;
00726     insize.height = in.nelements();
00727     insize.width = 1; 
00728     ippiAddC_8u_C1RSfs(in.idx_ptr(), sizeof(ubyte), c,
00729                        out.idx_ptr(), sizeof(ubyte), insize, 0);
00730   }
00731 
00732   template <>
00733   void ipp_addc(const idx<uint16> &in, uint16 c, idx<uint16> &out) {
00734     ipp_checks2(in, out);
00735     IppiSize    insize;
00736     insize.height = in.nelements();
00737     insize.width = 1; 
00738     ippiAddC_16u_C1RSfs(in.idx_ptr(), sizeof(uint16), c,
00739                         out.idx_ptr(), sizeof(uint16), insize, 0);
00740   }
00741 
00742   template <>
00743   void ipp_addc(const idx<int16> &in, int16 c, idx<int16> &out) {
00744     ipp_checks2(in, out);
00745     IppiSize    insize;
00746     insize.height = in.nelements();
00747     insize.width = 1; 
00748     ippiAddC_16s_C1RSfs(in.idx_ptr(), sizeof(int16), c,
00749                         out.idx_ptr(), sizeof(int16), insize, 0);
00750   }
00751 
00752   // templates for in-place ipp_subc
00753 
00754   template <>
00755   void ipp_subc(idx<ubyte> &in, ubyte c) {
00756     ipp_checks1(in);
00757     IppiSize insize;
00758     insize.height = in.nelements();
00759     insize.width = 1; 
00760     ippiSubC_8u_C1IRSfs(c, in.idx_ptr(), sizeof(ubyte), insize, 0);
00761   }
00762 
00763   template <>
00764   void ipp_subc(idx<uint16> &in, uint16 c) {
00765     ipp_checks1(in);
00766     IppiSize insize;
00767     insize.height = in.nelements();
00768     insize.width = 1; 
00769     ippiSubC_16u_C1IRSfs(c, in.idx_ptr(), sizeof(uint16), insize, 0);
00770   }
00771 
00772   template <>
00773   void ipp_subc(idx<int16> &in, int16 c) {
00774     ipp_checks1(in);
00775     IppiSize insize;
00776     insize.height = in.nelements();
00777     insize.width = 1; 
00778     ippiSubC_16s_C1IRSfs(c, in.idx_ptr(), sizeof(int16), insize, 0);
00779   }
00780 
00781   template <>
00782   void ipp_subc(idx<float32> &in, float32 c) {
00783     ipp_checks1(in);
00784     IppiSize insize;
00785     insize.height = in.nelements();
00786     insize.width = 1; 
00787     ippiSubC_32f_C1IR(c, in.idx_ptr(), sizeof(float32), insize);
00788   }
00789 
00790   // templates for not-in-place ipp_subc
00791 
00792   template <>
00793   void ipp_subc(const idx<ubyte> &in, ubyte c, idx<ubyte> &out) {
00794     ipp_checks2(in, out);
00795     IppiSize insize;
00796     insize.height = in.nelements();
00797     insize.width = 1; 
00798     ippiSubC_8u_C1RSfs(in.idx_ptr(), sizeof(ubyte),
00799                        c, out.idx_ptr(), sizeof(ubyte),
00800                        insize, 0);
00801   }
00802 
00803   template <>
00804   void ipp_subc(const idx<uint16> &in, uint16 c, idx<uint16> &out) {
00805     ipp_checks2(in, out);
00806     IppiSize insize;
00807     insize.height = in.nelements();
00808     insize.width = 1; 
00809     ippiSubC_16u_C1RSfs(in.idx_ptr(), sizeof(uint16),
00810                         c, out.idx_ptr(), sizeof(uint16),
00811                         insize, 0);
00812   }
00813 
00814   template <>
00815   void ipp_subc(const idx<int16> &in, int16 c, idx<int16> &out) {
00816     ipp_checks2(in, out);
00817     IppiSize insize;
00818     insize.height = in.nelements();
00819     insize.width = 1; 
00820     ippiSubC_16s_C1RSfs(in.idx_ptr(), sizeof(int16),
00821                         c, out.idx_ptr(), sizeof(int16),
00822                         insize, 0);
00823   }
00824 
00825   template <>
00826   void ipp_subc(const idx<float32> &in, float32 c, idx<float32> &out) {
00827     ipp_checks2(in, out);
00828     IppiSize insize;
00829     insize.height = in.nelements();
00830     insize.width = 1; 
00831     ippiSubC_32f_C1R(in.idx_ptr(), sizeof(float32),
00832                      c, out.idx_ptr(), sizeof(float32),
00833                      insize);
00834   }
00835 
00836   // templates for ipp_sub
00837 
00838   template <>
00839   void ipp_sub(idx<float32> &in1, const idx<float32> &in2) {
00840     ipp_checks2(in1, in2);
00841     IppiSize insize;
00842     insize.height = in1.nelements();
00843     insize.width = 1;
00844     ippiSub_32f_C1IR(in2.idx_ptr(), sizeof(float32),
00845                      in1.idx_ptr(), sizeof(float32),
00846                      insize);
00847   }
00848 
00849   template <>
00850   void ipp_sub(idx<ubyte> &in1, const idx<ubyte> &in2) {
00851     ipp_checks2(in1, in2);
00852     IppiSize insize;
00853     insize.height = in1.nelements();
00854     insize.width = 1;
00855     ippiSub_8u_C1IRSfs(in2.idx_ptr(), sizeof(ubyte),
00856                        in1.idx_ptr(), sizeof(ubyte),
00857                        insize, 0);
00858   }
00859 
00860   template <>
00861   void ipp_sub(idx<uint16> &in1, const idx<uint16> &in2) {
00862     ipp_checks2(in1, in2);
00863     IppiSize insize;
00864     insize.height = in1.nelements();
00865     insize.width = 1;
00866     ippiSub_16u_C1IRSfs(in2.idx_ptr(), sizeof(uint16),
00867                         in1.idx_ptr(), sizeof(uint16),
00868                         insize, 0);
00869   }
00870 
00871   template <>
00872   void ipp_sub(idx<int16> &in1, const idx<int16> &in2) {
00873     ipp_checks2(in1, in2);
00874     IppiSize insize;
00875     insize.height = in1.nelements();
00876     insize.width = 1;
00877     ippiSub_16s_C1IRSfs(in2.idx_ptr(), sizeof(int16),
00878                         in1.idx_ptr(), sizeof(int16),
00879                         insize, 0);
00880   }
00881 
00882 
00883   // templates for not-in-place ipp_sub
00884 
00885   template <>
00886   void ipp_sub(const idx<float32> &in1, const idx<float32> &in2,
00887               idx<float32> &out) {
00888     ipp_checks3(in1, in2, out);
00889     IppiSize insize;
00890     insize.height = in1.nelements();
00891     insize.width = 1;
00892     ippiSub_32f_C1R(in2.idx_ptr(), sizeof(float32),
00893                     in1.idx_ptr(), sizeof(float32),
00894                     out.idx_ptr(), sizeof(float32),
00895                     insize);
00896   }
00897 
00898   template <>
00899   void ipp_sub(const idx<ubyte> &in1, const idx<ubyte> &in2,
00900               idx<ubyte> &out) {
00901     ipp_checks3(in1, in2, out);
00902     IppiSize insize;
00903     insize.height = in1.nelements();
00904     insize.width = 1;
00905     ippiSub_8u_C1RSfs(in2.idx_ptr(), sizeof(ubyte),
00906                       in1.idx_ptr(), sizeof(ubyte),
00907                       out.idx_ptr(), sizeof(ubyte),
00908                       insize, 0);
00909   }
00910 
00911   template <>
00912   void ipp_sub(const idx<uint16> &in1, const idx<uint16> &in2,
00913               idx<uint16> &out) {
00914     ipp_checks3(in1, in2, out);
00915     IppiSize insize;
00916     insize.height = in1.nelements();
00917     insize.width = 1;
00918     ippiSub_16u_C1RSfs(in2.idx_ptr(), sizeof(uint16),
00919                        in1.idx_ptr(), sizeof(uint16),
00920                        out.idx_ptr(), sizeof(uint16),
00921                        insize, 0);
00922   }
00923 
00924   template <>
00925   void ipp_sub(const idx<int16> &in1, const idx<int16> &in2,
00926               idx<int16> &out) {
00927     ipp_checks3(in1, in2, out);
00928     IppiSize insize;
00929     insize.height = in1.nelements();
00930     insize.width = 1;
00931     ippiSub_16s_C1RSfs(in2.idx_ptr(), sizeof(int16),
00932                        in1.idx_ptr(), sizeof(int16),
00933                        out.idx_ptr(), sizeof(int16),
00934                        insize, 0);
00935   }
00936 
00937   // templates for in-place ipp_mul
00938 
00939   template <>
00940   void ipp_mul(const idx<float32> &in1, idx<float32> &in2) {
00941     ipp_checks2(in1, in2);
00942     IppiSize insize;
00943     insize.height = in1.nelements();
00944     insize.width = 1;
00945     ippiMul_32f_C1IR(in1.idx_ptr(), sizeof(float32),
00946                      in2.idx_ptr(), sizeof(float32),
00947                      insize);
00948   }
00949 
00950   template <>
00951   void ipp_mul(const idx<ubyte> &in1, idx<ubyte> &in2) {
00952     ipp_checks2(in1, in2);
00953     IppiSize insize;
00954     insize.height = in1.nelements();
00955     insize.width = 1;
00956     ippiMul_8u_C1IRSfs(in1.idx_ptr(), sizeof(ubyte),
00957                        in2.idx_ptr(), sizeof(ubyte),
00958                        insize, 0);
00959   }
00960 
00961   template <>
00962   void ipp_mul(const idx<uint16> &in1, idx<uint16> &in2) {
00963     ipp_checks2(in1, in2);
00964     IppiSize insize;
00965     insize.height = in1.nelements();
00966     insize.width = 1;
00967     ippiMul_16u_C1IRSfs(in1.idx_ptr(), sizeof(uint16),
00968                         in2.idx_ptr(), sizeof(uint16),
00969                         insize, 0);
00970   }
00971 
00972   template <>
00973   void ipp_mul(const idx<int16> &in1, idx<int16> &in2) {
00974     ipp_checks2(in1, in2);
00975     IppiSize insize;
00976     insize.height = in1.nelements();
00977     insize.width = 1;
00978     ippiMul_16s_C1IRSfs(in1.idx_ptr(), sizeof(int16),
00979                         in2.idx_ptr(), sizeof(int16),
00980                         insize, 0);
00981   }
00982 
00983   // templates for not-in-place ipp_mul
00984 
00985   template <>
00986   void ipp_mul(const idx<float32> &in1, const idx<float32> &in2,
00987               idx<float32> &out) {
00988     ipp_checks3(in1, in2, out);
00989     IppiSize insize;
00990     insize.height = in1.nelements();
00991     insize.width = 1;
00992     ippiMul_32f_C1R(in1.idx_ptr(), sizeof(float32),
00993                     in2.idx_ptr(), sizeof(float32),
00994                     out.idx_ptr(), sizeof(float32),
00995                     insize);
00996   }
00997 
00998   template <>
00999   void ipp_mul(const idx<ubyte> &in1, const idx<ubyte> &in2,
01000               idx<ubyte> &out) {
01001     ipp_checks3(in1, in2, out);
01002     IppiSize insize;
01003     insize.height = in1.nelements();
01004     insize.width = 1;
01005     ippiMul_8u_C1RSfs(in1.idx_ptr(), sizeof(ubyte),
01006                       in2.idx_ptr(), sizeof(ubyte),
01007                       out.idx_ptr(), sizeof(ubyte),
01008                       insize, 0);
01009   }
01010 
01011 
01012   template <>
01013   void ipp_mul(const idx<uint16> &in1, const idx<uint16> &in2,
01014               idx<uint16> &out) {
01015     ipp_checks3(in1, in2, out);
01016     IppiSize insize;
01017     insize.height = in1.nelements();
01018     insize.width = 1;
01019     ippiMul_16u_C1RSfs(in1.idx_ptr(), sizeof(uint16),
01020                        in2.idx_ptr(), sizeof(uint16),
01021                        out.idx_ptr(), sizeof(uint16),
01022                        insize, 0);
01023   }
01024 
01025 
01026   template <>
01027   void ipp_mul(const idx<int16> &in1, const idx<int16> &in2,
01028               idx<int16> &out) {
01029     ipp_checks3(in1, in2, out);
01030     IppiSize insize;
01031     insize.height = in1.nelements();
01032     insize.width = 1;
01033     ippiMul_16s_C1RSfs(in1.idx_ptr(), sizeof(int16),
01034                        in2.idx_ptr(), sizeof(int16),
01035                        out.idx_ptr(), sizeof(int16),
01036                        insize, 0);
01037   }
01038 
01039   // templates for not-in-place ipp_m2dotm1 ////////////////////////////////////
01040 
01041   template <>
01042   void ipp_m2dotm1(const idx<float32> &in1, const idx<float32> &in2,
01043                    idx<float32> &y){
01044     idx_check_contiguous3(in1, in2, y);
01045     int so = sizeof (float32);
01046     ippmMul_mv_32f(in1.idx_ptr(), (int) in1.mod(0) * so, (int) in1.mod(1) * so,
01047                    (int) in1.dim(1), (int) in1.dim(0),
01048                    in2.idx_ptr(), (int) in2.mod(0) * so, (int) in2.dim(0),
01049                    y.idx_ptr(), (int) y.mod(0) * so);
01050   }
01051 
01052   template <>
01053   void ipp_m2dotm1(const idx<float64> &in1, const idx<float64> &in2,
01054                    idx<float64> &y){
01055     idx_check_contiguous3(in1, in2, y);
01056     int so = sizeof (float64);
01057     ippmMul_mv_64f(in1.idx_ptr(), (int) in1.mod(0) * so, (int) in1.mod(1) * so,
01058                    (int) in1.dim(1), (int) in1.dim(0),
01059                    in2.idx_ptr(), (int) in2.mod(0) * so, (int) in2.dim(0),
01060                    y.idx_ptr(), (int) y.mod(0) * so);
01061   }
01062 
01063   // templates for not-in-place ipp_m2dotm1 ////////////////////////////////////
01064 
01065   template <>
01066   void ipp_m2dotm2(const idx<float32> &in1, const idx<float32> &in2,
01067                    idx<float32> &y){
01068     std::cout << "using ipp m2m2" << std::endl;
01069     idx_check_contiguous3(in1, in2, y);
01070     int so = sizeof (float32);
01071     ippmMul_mm_32f(in1.idx_ptr(), (int) in1.mod(0) * so, (int) in1.mod(1) * so,
01072                    (int) in1.dim(1), (int) in1.dim(0),
01073                    in2.idx_ptr(), (int) in2.mod(0) * so, (int) in1.mod(1) * so,
01074                    (int) in2.dim(1), (int) in2.dim(0),
01075                    y.idx_ptr(), (int) y.mod(0) * so, (int) y.mod(1) * so);
01076   }
01077 
01078   template <>
01079   void ipp_m2dotm2(const idx<float64> &in1, const idx<float64> &in2,
01080                    idx<float64> &y){
01081     std::cout << "using ipp m2m2" << std::endl;
01082     idx_check_contiguous3(in1, in2, y);
01083     int so = sizeof (float64);
01084     ippmMul_mm_64f(in1.idx_ptr(), (int) in1.mod(0) * so, (int) in1.mod(1) * so,
01085                    (int) in1.dim(1), (int) in1.dim(0),
01086                    in2.idx_ptr(), (int) in2.mod(0) * so, (int) in1.mod(1) * so,
01087                    (int) in2.dim(1), (int) in2.dim(0),
01088                    y.idx_ptr(), (int) y.mod(0) * so, (int) y.mod(1) * so);
01089   }
01090 
01091   // templates for in-place ipp_dotc
01092 
01093   template <>
01094   void ipp_dotc(idx<float32> &inp, float32 c) {
01095     ipp_checks1(inp);
01096     IppiSize insize;
01097     insize.height = inp.nelements();
01098     insize.width = 1;
01099     ippiMulC_32f_C1IR(c, inp.idx_ptr(), sizeof(float32), insize);
01100   }
01101 
01102   template <>
01103   void ipp_dotc(idx<ubyte> &inp, ubyte c) {
01104     ipp_checks1(inp);
01105     IppiSize insize;
01106     insize.height = inp.nelements();
01107     insize.width = 1;
01108     ippiMulC_8u_C1IRSfs(c, inp.idx_ptr(), sizeof(ubyte), insize, 0);
01109   }
01110 
01111   template <>
01112   void ipp_dotc(idx<uint16> &inp, uint16 c) {
01113     ipp_checks1(inp);
01114     IppiSize insize;
01115     insize.height = inp.nelements();
01116     insize.width = 1;
01117     ippiMulC_16u_C1IRSfs(c, inp.idx_ptr(), sizeof(uint16), insize, 0);
01118   }
01119 
01120   template <>
01121   void ipp_dotc(idx<int16> &inp, int16 c) {
01122     ipp_checks1(inp);
01123     IppiSize insize;
01124     insize.height = inp.nelements();
01125     insize.width = 1;
01126     ippiMulC_16s_C1IRSfs(c, inp.idx_ptr(), sizeof(int16), insize, 0);
01127   }
01128 
01129   // templates for not-in-place ipp_dotc
01130 
01131   template <>
01132   void ipp_dotc(const idx<float32> &in, float32 c, idx<float32> &out) {
01133     ipp_checks2(in, out);
01134     IppiSize    insize;
01135     insize.height = in.nelements();
01136     insize.width = 1; 
01137     ippiMulC_32f_C1R(in.idx_ptr(), sizeof(float32), c,
01138                      out.idx_ptr(), sizeof(float32), insize);
01139   }
01140 
01141   template <>
01142   void ipp_dotc(const idx<ubyte> &in, ubyte c, idx<ubyte> &out) {
01143     ipp_checks2(in, out);
01144     IppiSize    insize;
01145     insize.height = in.nelements();
01146     insize.width = 1; 
01147     ippiMulC_8u_C1RSfs(in.idx_ptr(), sizeof(ubyte), c,
01148                        out.idx_ptr(), sizeof(ubyte), insize, 0);
01149   }
01150 
01151   template <>
01152   void ipp_dotc(const idx<uint16> &in, uint16 c, idx<uint16> &out) {
01153     ipp_checks2(in, out);
01154     IppiSize    insize;
01155     insize.height = in.nelements();
01156     insize.width = 1; 
01157     ippiMulC_16u_C1RSfs(in.idx_ptr(), sizeof(uint16), c,
01158                         out.idx_ptr(), sizeof(uint16), insize, 0);
01159   }
01160 
01161   template <>
01162   void ipp_dotc(const idx<int16> &in, int16 c, idx<int16> &out) {
01163     ipp_checks2(in, out);
01164     IppiSize    insize;
01165     insize.height = in.nelements();
01166     insize.width = 1; 
01167     ippiMulC_16s_C1RSfs(in.idx_ptr(), sizeof(int16), c,
01168                         out.idx_ptr(), sizeof(int16), insize, 0);
01169   }
01170 
01171 
01172   // templates for in-place ipp_div
01173 
01174   template <>
01175   void ipp_div(idx<float32> &in1, const idx<float32> &in2) {
01176     ipp_checks2(in1, in2);
01177     IppiSize insize;
01178     insize.height = in1.nelements();
01179     insize.width = 1;
01180     ippiDiv_32f_C1IR(in2.idx_ptr(), sizeof(float32),
01181                      in1.idx_ptr(), sizeof(float32),
01182                      insize);
01183   }
01184 
01185   template <>
01186   void ipp_div(idx<ubyte> &in1, const idx<ubyte> &in2) {
01187     ipp_checks2(in1, in2);
01188     IppiSize insize;
01189     insize.height = in1.nelements();
01190     insize.width = 1;
01191     ippiDiv_8u_C1IRSfs(in2.idx_ptr(), sizeof(ubyte),
01192                        in1.idx_ptr(), sizeof(ubyte),
01193                        insize, 0);
01194   }
01195 
01196   template <>
01197   void ipp_div(idx<uint16> &in1, const idx<uint16> &in2) {
01198     ipp_checks2(in1, in2);
01199     IppiSize insize;
01200     insize.height = in1.nelements();
01201     insize.width = 1;
01202     ippiDiv_16u_C1IRSfs(in2.idx_ptr(), sizeof(uint16),
01203                         in1.idx_ptr(), sizeof(uint16),
01204                         insize, 0);
01205   }
01206 
01207   template <>
01208   void ipp_div(idx<int16> &in1, const idx<int16> &in2) {
01209     ipp_checks2(in1, in2);
01210     IppiSize insize;
01211     insize.height = in1.nelements();
01212     insize.width = 1;
01213     ippiDiv_16s_C1IRSfs(in2.idx_ptr(), sizeof(int16),
01214                         in1.idx_ptr(), sizeof(int16),
01215                         insize, 0);
01216   }
01217 
01218   // templates for not-in-place ipp_div
01219 
01220   template <>
01221   void ipp_div(const idx<float32> &in1, const idx<float32> &in2,
01222               idx<float32> &out) {
01223     ipp_checks3(in1, in2, out);
01224     IppiSize insize;
01225     insize.height = in1.nelements();
01226     insize.width = 1;
01227     ippiDiv_32f_C1R(in2.idx_ptr(), sizeof(float32),
01228                     in1.idx_ptr(), sizeof(float32),
01229                     out.idx_ptr(), sizeof(float32),
01230                     insize);
01231   }
01232 
01233   template <>
01234   void ipp_div(const idx<ubyte> &in1, const idx<ubyte> &in2,
01235               idx<ubyte> &out) {
01236     ipp_checks3(in1, in2, out);
01237     IppiSize insize;
01238     insize.height = in1.nelements();
01239     insize.width = 1;
01240     ippiDiv_8u_C1RSfs(in2.idx_ptr(), sizeof(ubyte),
01241                       in1.idx_ptr(), sizeof(ubyte),
01242                       out.idx_ptr(), sizeof(ubyte),
01243                       insize, 0);
01244   }
01245 
01246   template <>
01247   void ipp_div(const idx<uint16> &in1, const idx<uint16> &in2,
01248               idx<uint16> &out) {
01249     ipp_checks3(in1, in2, out);
01250     IppiSize insize;
01251     insize.height = in1.nelements();
01252     insize.width = 1;
01253     ippiDiv_16u_C1RSfs(in2.idx_ptr(), sizeof(uint16),
01254                        in1.idx_ptr(), sizeof(uint16),
01255                        out.idx_ptr(), sizeof(uint16),
01256                        insize, 0);
01257   }
01258 
01259   template <>
01260   void ipp_div(const idx<int16> &in1, const idx<int16> &in2,
01261               idx<int16> &out) {
01262     ipp_checks3(in1, in2, out);
01263     IppiSize insize;
01264     insize.height = in1.nelements();
01265     insize.width = 1;
01266     ippiDiv_16s_C1RSfs(in2.idx_ptr(), sizeof(int16),
01267                        in1.idx_ptr(), sizeof(int16),
01268                        out.idx_ptr(), sizeof(int16),
01269                        insize, 0);
01270   }
01271 
01272   // ipp_inv
01273 
01274   void ipp_inv(const idx<float32> &in, idx<float32> &out) {
01275     ipp_fill(out, 1.0f);
01276     ipp_div(out, in);
01277   }
01278 
01279   // templates for in-place ipp_abs
01280 
01281   template <>
01282   void ipp_abs(idx<int16> &inp) {
01283     ipp_checks1(inp);
01284     IppiSize insize;
01285     insize.height = inp.nelements();
01286     insize.width = 1;
01287     ippiAbs_16s_C1IR(inp.idx_ptr(), sizeof(int16), insize);
01288   }
01289 
01290   template <>
01291   void ipp_abs(idx<float32> &inp) {
01292     ipp_checks1(inp);
01293     IppiSize insize;
01294     insize.height = inp.nelements();
01295     insize.width = 1;
01296     ippiAbs_32f_C1IR(inp.idx_ptr(), sizeof(float32), insize);
01297   }
01298 
01299   // templates for not-in-place ipp_abs
01300 
01301   template <>
01302   void ipp_abs(const idx<int16> &inp, idx<int16> &out) {
01303     ipp_checks2(inp, out);
01304     IppiSize insize;
01305     insize.height = inp.nelements();
01306     insize.width = 1;
01307     ippiAbs_16s_C1R(inp.idx_ptr(), sizeof(int16),
01308                     out.idx_ptr(), sizeof(int16),
01309                     insize);
01310   }
01311 
01312   template <>
01313   void ipp_abs(const idx<float32> &inp, idx<float32> &out) {
01314     ipp_checks2(inp, out);
01315     IppiSize insize;
01316     insize.height = inp.nelements();
01317     insize.width = 1;
01318     ippiAbs_32f_C1R(inp.idx_ptr(), sizeof(float32),
01319                     out.idx_ptr(), sizeof(float32),
01320                     insize);
01321   }
01322   
01323   // templates for in-place ipp_sqrt
01324 
01325   template <>
01326   void ipp_sqrt(idx<ubyte> &inp) {
01327     ipp_checks1(inp);
01328     IppiSize insize;
01329     insize.height = inp.nelements();
01330     insize.width = 1;
01331     ippiSqrt_8u_C1IRSfs(inp.idx_ptr(), sizeof(ubyte), insize, 0);
01332   }
01333 
01334   template <>
01335   void ipp_sqrt(idx<uint16> &inp) {
01336     ipp_checks1(inp);
01337     IppiSize insize;
01338     insize.height = inp.nelements();
01339     insize.width = 1;
01340     ippiSqrt_16u_C1IRSfs(inp.idx_ptr(), sizeof(uint16), insize, 0);
01341   } 
01342 
01343   template <>
01344   void ipp_sqrt(idx<int16> &inp) {
01345     ipp_checks1(inp);
01346     IppiSize insize;
01347     insize.height = inp.nelements();
01348     insize.width = 1;
01349     ippiSqrt_16s_C1IRSfs(inp.idx_ptr(), sizeof(int16), insize, 0);
01350   }
01351 
01352   template <>
01353   void ipp_sqrt(idx<float32> &inp) {
01354     ipp_checks1(inp);
01355     IppiSize insize;
01356     insize.height = inp.nelements();
01357     insize.width = 1;
01358     ippiSqrt_32f_C1IR(inp.idx_ptr(), sizeof(float32), insize);
01359   }
01360 
01361   // templates for not-in-place ipp_sqrt
01362 
01363   template <>
01364   void ipp_sqrt(const idx<ubyte> &inp, idx<ubyte> &out) {
01365     ipp_checks2(inp, out);
01366     IppiSize insize;
01367     insize.height = inp.nelements();
01368     insize.width = 1;
01369     ippiSqrt_8u_C1RSfs(inp.idx_ptr(), sizeof(ubyte),
01370                        out.idx_ptr(), sizeof(ubyte),
01371                        insize, 0);
01372   }
01373   
01374   template <>
01375   void ipp_sqrt(const idx<uint16> &inp, idx<uint16> &out) {
01376     ipp_checks2(inp, out);
01377     IppiSize insize;
01378     insize.height = inp.nelements();
01379     insize.width = 1;
01380     ippiSqrt_16u_C1RSfs(inp.idx_ptr(), sizeof(uint16),
01381                         out.idx_ptr(), sizeof(uint16),
01382                         insize, 0);
01383   }
01384 
01385   template <>
01386   void ipp_sqrt(const idx<int16> &inp, idx<int16> &out) {
01387     ipp_checks2(inp, out);
01388     IppiSize insize;
01389     insize.height = inp.nelements();
01390     insize.width = 1;
01391     ippiSqrt_16s_C1RSfs(inp.idx_ptr(), sizeof(int16),
01392                         out.idx_ptr(), sizeof(int16),
01393                         insize, 0);
01394   }
01395 
01396   template <>
01397   void ipp_sqrt(const idx<float32> &inp, idx<float32> &out) {
01398     ipp_checks2(inp, out);
01399     IppiSize insize;
01400     insize.height = inp.nelements();
01401     insize.width = 1;
01402     ippiSqrt_32f_C1R(inp.idx_ptr(), sizeof(float32),
01403                      out.idx_ptr(), sizeof(float32),
01404                      insize);
01405   }
01406 
01407   // templates for in-place ipp_exp
01408 
01409   template <>
01410   void ipp_exp(idx<ubyte> &inp) {
01411     ipp_checks1(inp);
01412     IppiSize insize;
01413     insize.height = inp.nelements();
01414     insize.width = 1;
01415     ippiExp_8u_C1IRSfs(inp.idx_ptr(), sizeof(ubyte), insize, 0);
01416   }
01417 
01418   template <>
01419   void ipp_exp(idx<uint16> &inp) {
01420     ipp_checks1(inp);
01421     IppiSize insize;
01422     insize.height = inp.nelements();
01423     insize.width = 1;
01424     ippiExp_16u_C1IRSfs(inp.idx_ptr(), sizeof(uint16), insize, 0);
01425   } 
01426 
01427   template <>
01428   void ipp_exp(idx<int16> &inp) {
01429     ipp_checks1(inp);
01430     IppiSize insize;
01431     insize.height = inp.nelements();
01432     insize.width = 1;
01433     ippiExp_16s_C1IRSfs(inp.idx_ptr(), sizeof(int16), insize, 0);
01434   }
01435 
01436   template <>
01437   void ipp_exp(idx<float32> &inp) {
01438     ipp_checks1(inp);
01439     IppiSize insize;
01440     insize.height = inp.nelements();
01441     insize.width = 1;
01442     ippiExp_32f_C1IR(inp.idx_ptr(), sizeof(float32), insize);
01443   }
01444 
01445   // templates for not-in-place ipp_exp
01446 
01447   template <>
01448   void ipp_exp(const idx<ubyte> &inp, idx<ubyte> &out) {
01449     ipp_checks2(inp, out);
01450     IppiSize insize;
01451     insize.height = inp.nelements();
01452     insize.width = 1;
01453     ippiExp_8u_C1RSfs(inp.idx_ptr(), sizeof(ubyte),
01454                        out.idx_ptr(), sizeof(ubyte),
01455                        insize, 0);
01456   }
01457   
01458   template <>
01459   void ipp_exp(const idx<uint16> &inp, idx<uint16> &out) {
01460     ipp_checks2(inp, out);
01461     IppiSize insize;
01462     insize.height = inp.nelements();
01463     insize.width = 1;
01464     ippiExp_16u_C1RSfs(inp.idx_ptr(), sizeof(uint16),
01465                         out.idx_ptr(), sizeof(uint16),
01466                         insize, 0);
01467   }
01468 
01469   template <>
01470   void ipp_exp(const idx<int16> &inp, idx<int16> &out) {
01471     ipp_checks2(inp, out);
01472     IppiSize insize;
01473     insize.height = inp.nelements();
01474     insize.width = 1;
01475     ippiExp_16s_C1RSfs(inp.idx_ptr(), sizeof(int16),
01476                         out.idx_ptr(), sizeof(int16),
01477                         insize, 0);
01478   }
01479 
01480   template <>
01481   void ipp_exp(const idx<float32> &inp, idx<float32> &out) {
01482     ipp_checks2(inp, out);
01483     IppiSize insize;
01484     insize.height = inp.nelements();
01485     insize.width = 1;
01486     ippiExp_32f_C1R(inp.idx_ptr(), sizeof(float32),
01487                      out.idx_ptr(), sizeof(float32),
01488                      insize);
01489   }
01490 
01491   // templates for ipp_sum
01492 
01493   template <>
01494   float64 ipp_sum(const idx<ubyte> &inp) {
01495     ipp_checks1(inp);
01496     IppiSize insize;
01497     insize.height = inp.nelements();
01498     insize.width = 1;
01499     float64 ret;
01500     ippiSum_8u_C1R(inp.idx_ptr(), sizeof(ubyte),
01501                    insize, &ret);
01502     return ret;
01503   }
01504 
01505   template <>
01506   float64 ipp_sum(const idx<uint16> &inp) {
01507     ipp_checks1(inp);
01508     IppiSize insize;
01509     insize.height = inp.nelements();
01510     insize.width = 1;
01511     float64 ret;
01512     ippiSum_16u_C1R(inp.idx_ptr(), sizeof(uint16),
01513                     insize, &ret);
01514     return ret;
01515   }
01516 
01517   template <>
01518   float64 ipp_sum(const idx<int16> &inp) {
01519     ipp_checks1(inp);
01520     IppiSize insize;
01521     insize.height = inp.nelements();
01522     insize.width = 1;
01523     float64 ret;
01524     ippiSum_16s_C1R(inp.idx_ptr(), sizeof(int16),
01525                     insize, &ret);
01526     return ret;
01527   }
01528 
01529   template <>
01530   float64 ipp_sum(const idx<float32> &inp) {
01531     ipp_checks1(inp);
01532     IppiSize insize;
01533     insize.height = inp.nelements();
01534     insize.width = 1;
01535     float64 ret;
01536 #ifdef __IPPFAST__
01537     ippiSum_32f_C1R(inp.idx_ptr(), sizeof(float32),
01538                     insize, &ret, ippAlgHintFast);
01539 #else
01540 #ifndef __IPPACC__
01541 #error either __IPPFAST__ or __IPPACC__ must be defined
01542 #endif
01543     ippiSum_32f_C1R(inp.idx_ptr(), sizeof(float32),
01544                     insize, &ret, ippAlgHintAccurate);
01545 #endif
01546     return ret;
01547   }
01548 
01549   // templates for ipp_numabs
01550 
01551   template <>
01552   float64 ipp_sumabs(const idx<ubyte> &inp) {
01553     ipp_checks1(inp);
01554     IppiSize insize;
01555     insize.height = inp.nelements();
01556     insize.width = 1;
01557     float64 ret;
01558     ippiNorm_L1_8u_C1R(inp.idx_ptr(), sizeof(ubyte),
01559                        insize, &ret);
01560     return ret;
01561   }
01562 
01563   template <>
01564   float64 ipp_sumabs(const idx<uint16> &inp) {
01565     ipp_checks1(inp);
01566     IppiSize insize;
01567     insize.height = inp.nelements();
01568     insize.width = 1;
01569     float64 ret;
01570     ippiNorm_L1_16u_C1R(inp.idx_ptr(), sizeof(uint16),
01571                         insize, &ret);
01572     return ret;
01573   }
01574 
01575   template <>
01576   float64 ipp_sumabs(const idx<int16> &inp) {
01577     ipp_checks1(inp);
01578     IppiSize insize;
01579     insize.height = inp.nelements();
01580     insize.width = 1;
01581     float64 ret;
01582     ippiNorm_L1_16s_C1R(inp.idx_ptr(), sizeof(int16),
01583                         insize, &ret);
01584     return ret;
01585   }
01586 
01587   template <>
01588   float64 ipp_sumabs(const idx<float32> &inp) {
01589     ipp_checks1(inp);
01590     IppiSize insize;
01591     insize.height = inp.nelements();
01592     insize.width = 1;
01593     float64 ret;
01594 #ifdef __IPPFAST__
01595     ippiNorm_L1_32f_C1R(inp.idx_ptr(), sizeof(float32),
01596                         insize, &ret, ippAlgHintFast);
01597 #else
01598 #ifndef __IPPACC__
01599 #error either __IPPFAST__ or __IPPACC__ must be defined
01600 #endif
01601     ippiNorm_L1_32f_C1R(inp.idx_ptr(), sizeof(float32),
01602                         insize, &ret, ippAlgHintAccurate);
01603 #endif
01604     return ret;
01605   }
01606 
01607   // templates for ipp_l2norm
01608 
01609   template <>
01610   float64 ipp_l2norm(const idx<ubyte> &inp) {
01611     ipp_checks1(inp);
01612     IppiSize insize;
01613     insize.height = inp.nelements();
01614     insize.width = 1;
01615     float64 ret;
01616     ippiNorm_L2_8u_C1R(inp.idx_ptr(), sizeof(ubyte),
01617                        insize, &ret);
01618     return ret;
01619   }
01620 
01621   template <>
01622   float64 ipp_l2norm(const idx<uint16> &inp) {
01623     ipp_checks1(inp);
01624     IppiSize insize;
01625     insize.height = inp.nelements();
01626     insize.width = 1;
01627     float64 ret;
01628     ippiNorm_L2_16u_C1R(inp.idx_ptr(), sizeof(uint16),
01629                         insize, &ret);
01630     return ret;
01631   }
01632 
01633   template <>
01634   float64 ipp_l2norm(const idx<int16> &inp) {
01635     ipp_checks1(inp);
01636     IppiSize insize;
01637     insize.height = inp.nelements();
01638     insize.width = 1;
01639     float64 ret;
01640     ippiNorm_L2_16s_C1R(inp.idx_ptr(), sizeof(int16),
01641                         insize, &ret);
01642     return ret;
01643   }
01644 
01645   template <>
01646   float64 ipp_l2norm(const idx<float32> &inp) {
01647     ipp_checks1(inp);
01648     IppiSize insize;
01649     insize.height = inp.nelements();
01650     insize.width = 1;
01651     float64 ret;
01652 #ifdef __IPPFAST__
01653     ippiNorm_L2_32f_C1R(inp.idx_ptr(), sizeof(float32),
01654                         insize, &ret, ippAlgHintFast);
01655 #else
01656 #ifndef __IPPACC__
01657 #error either __IPPFAST__ or __IPPACC__ must be defined
01658 #endif
01659     ippiNorm_L2_32f_C1R(inp.idx_ptr(), sizeof(float32),
01660                         insize, &ret, ippAlgHintAccurate);
01661 #endif
01662     return ret;
01663   }
01664 
01665   // templates for ipp_mean
01666 
01667   template <>
01668   float64 ipp_mean(const idx<ubyte> &inp) {
01669     ipp_checks1(inp);
01670     IppiSize insize;
01671     insize.height = inp.nelements();
01672     insize.width = 1;
01673     float64 ret;
01674     ippiMean_8u_C1R(inp.idx_ptr(), sizeof(ubyte),
01675                    insize, &ret);
01676     return ret;
01677   }
01678 
01679   template <>
01680   float64 ipp_mean(const idx<uint16> &inp) {
01681     ipp_checks1(inp);
01682     IppiSize insize;
01683     insize.height = inp.nelements();
01684     insize.width = 1;
01685     float64 ret;
01686     ippiMean_16u_C1R(inp.idx_ptr(), sizeof(uint16),
01687                     insize, &ret);
01688     return ret;
01689   }
01690 
01691   template <>
01692   float64 ipp_mean(const idx<int16> &inp) {
01693     ipp_checks1(inp);
01694     IppiSize insize;
01695     insize.height = inp.nelements();
01696     insize.width = 1;
01697     float64 ret;
01698     ippiMean_16s_C1R(inp.idx_ptr(), sizeof(int16),
01699                     insize, &ret);
01700     return ret;
01701   }
01702 
01703   template <>
01704   float64 ipp_mean(const idx<float32> &inp) {
01705     ipp_checks1(inp);
01706     IppiSize insize;
01707     insize.height = inp.nelements();
01708     insize.width = 1;
01709     float64 ret;
01710 #ifdef __IPPFAST__
01711     ippiMean_32f_C1R(inp.idx_ptr(), sizeof(float32),
01712                         insize, &ret, ippAlgHintFast);
01713 #else
01714 #ifndef __IPPACC__
01715 #error either __IPPFAST__ or __IPPACC__ must be defined
01716 #endif
01717     ippiMean_32f_C1R(inp.idx_ptr(), sizeof(float32),
01718                         insize, &ret, ippAlgHintAccurate);
01719 #endif
01720     return ret;
01721   }
01722 
01723   // templates for ipp_std_normalize
01724 
01725   template <>
01726   void ipp_std_normalize(const idx<ubyte> &inp, idx<ubyte> &out,
01727                          ubyte* mean_p) {
01728     ipp_checks2(inp, out);
01729     IppiSize insize;
01730     insize.height = inp.nelements();
01731     insize.width = 1;
01732     float64 mean, std_dev;
01733     if (mean_p == NULL) {
01734       ippiMean_StdDev_8u_C1R(inp.idx_ptr(), sizeof(ubyte),
01735                              insize, &mean, &std_dev);
01736     } else {
01737       ippiMean_StdDev_8u_C1R(inp.idx_ptr(), sizeof(ubyte),
01738                              insize, NULL, &std_dev);
01739       mean = *mean_p;
01740     }
01741     ipp_subc(inp, (ubyte)mean, out);
01742     idxdim size = idxdim(inp);
01743     idx<ubyte> temp(size);
01744     ipp_fill(temp, (ubyte)std_dev);
01745     ipp_div(out, temp);
01746   }
01747 
01748   template <>
01749   void ipp_std_normalize(const idx<float32> &inp, idx<float32> &out,
01750                          float32* mean_p) {
01751     ipp_checks2(inp, out);
01752     IppiSize insize;
01753     insize.height = inp.nelements();
01754     insize.width = 1;
01755     float64 mean, std_dev;
01756     if (mean_p == NULL) {
01757       ippiMean_StdDev_32f_C1R(inp.idx_ptr(), sizeof(float32),
01758                               insize, &mean, &std_dev);
01759     } else {
01760       ippiMean_StdDev_32f_C1R(inp.idx_ptr(), sizeof(float32),
01761                               insize, NULL, &std_dev);
01762       mean = *mean_p;
01763     }
01764     ipp_addc(inp, - (float32)mean, out);
01765     ipp_dotc(out, (float32)(1.0 / std_dev));
01766   }
01767 
01768 #ifdef __IPP_DOT__
01769   // templates for ipp_dot
01770 
01771   template <>
01772   float64 ipp_dot(const idx<ubyte> &in1, const idx<ubyte> &in2) {
01773     ipp_checks2(in1, in2);
01774     IppiSize insize;
01775     insize.height = in1.nelements();
01776     insize.width = 1;
01777     float64 ret;
01778     ippiDotProd_8u64f_C1R(in1.idx_ptr(), sizeof(ubyte),
01779                           in2.idx_ptr(), sizeof(ubyte),
01780                           insize, &ret);
01781     return ret;
01782   }
01783 
01784   template <>
01785   float64 ipp_dot(const idx<byte> &in1, const idx<byte> &in2) {
01786     ipp_checks2(in1, in2);
01787     IppiSize insize;
01788     insize.height = in1.nelements();
01789     insize.width = 1;
01790     float64 ret;
01791     ippiDotProd_8s64f_C1R(in1.idx_ptr(), sizeof(byte),
01792                           in2.idx_ptr(), sizeof(byte),
01793                           insize, &ret);
01794     return ret;
01795   }
01796 
01797   template <>
01798   float64 ipp_dot(const idx<uint16> &in1, const idx<uint16> &in2) {
01799     ipp_checks2(in1, in2);
01800     IppiSize insize;
01801     insize.height = in1.nelements();
01802     insize.width = 1;
01803     float64 ret;
01804     ippiDotProd_16u64f_C1R(in1.idx_ptr(), sizeof(uint16),
01805                            in2.idx_ptr(), sizeof(uint16),
01806                            insize, &ret);
01807     return ret;
01808   }
01809 
01810   template <>
01811   float64 ipp_dot(const idx<int16> &in1, const idx<int16> &in2) {
01812     ipp_checks2(in1, in2);
01813     IppiSize insize;
01814     insize.height = in1.nelements();
01815     insize.width = 1;
01816     float64 ret;
01817     ippiDotProd_16s64f_C1R(in1.idx_ptr(), sizeof(int16),
01818                            in2.idx_ptr(), sizeof(int16),
01819                            insize, &ret);
01820     return ret;
01821   }
01822 
01823   template <>
01824   float64 ipp_dot(const idx<uint32> &in1, const idx<uint32> &in2) {
01825     ipp_checks2(in1, in2);
01826     IppiSize insize;
01827     insize.height = in1.nelements();
01828     insize.width = 1;
01829     float64 ret;
01830     ippiDotProd_32u64f_C1R(in1.idx_ptr(), sizeof(uint32),
01831                            in2.idx_ptr(), sizeof(uint32),
01832                            insize, &ret);
01833     return ret;
01834   }
01835 
01836   template <>
01837   float64 ipp_dot(const idx<int32> &in1, const idx<int32> &in2) {
01838     ipp_checks2(in1, in2);
01839     IppiSize insize;
01840     insize.height = in1.nelements();
01841     insize.width = 1;
01842     float64 ret;
01843     ippiDotProd_32s64f_C1R(in1.idx_ptr(), sizeof(int32),
01844                            in2.idx_ptr(), sizeof(int32),
01845                            insize, &ret);
01846     return ret;
01847   }
01848 
01849   template <>
01850   float64 ipp_dot(const idx<float32> &in1, const idx<float32> &in2) {
01851     ipp_checks2(in1, in2);
01852     IppiSize insize;
01853     insize.height = in1.nelements();
01854     insize.width = 1;
01855     float64 ret;
01856 #ifdef __IPPFAST__
01857     ippiDotProd_32f64f_C1R(in1.idx_ptr(), sizeof(float32),
01858                            in2.idx_ptr(), sizeof(float32),
01859                            insize, &ret, ippAlgHintFast);
01860 #else
01861 #ifndef __IPPACC__
01862 #error either __IPPFAST__ or __IPPACC__ must be defined
01863 #endif
01864     ippiDotProd_32f64f_C1R(in1.idx_ptr(), sizeof(float32),
01865                            in2.idx_ptr(), sizeof(float32),
01866                            insize, &ret, ippAlgHintAccurate);
01867 #endif
01868     return ret;
01869   }
01870 #endif
01871 
01872   // templates for ipp_max
01873 
01874   template <>
01875   ubyte ipp_max(const idx<ubyte> &inp) {
01876     ipp_checks1(inp);
01877     IppiSize insize;
01878     insize.height = inp.nelements();
01879     insize.width = 1;
01880     ubyte ret;
01881     ippiMax_8u_C1R(inp.idx_ptr(), sizeof(ubyte),
01882                    insize, &ret);
01883     return ret;
01884   }
01885 
01886   template <>
01887   uint16 ipp_max(const idx<uint16> &inp) {
01888     ipp_checks1(inp);
01889     IppiSize insize;
01890     insize.height = inp.nelements();
01891     insize.width = 1;
01892     uint16 ret;
01893     ippiMax_16u_C1R(inp.idx_ptr(), sizeof(uint16),
01894                     insize, &ret);
01895     return ret;
01896   }
01897 
01898   template <>
01899   int16 ipp_max(const idx<int16> &inp) {
01900     ipp_checks1(inp);
01901     IppiSize insize;
01902     insize.height = inp.nelements();
01903     insize.width = 1;
01904     int16 ret;
01905     ippiMax_16s_C1R(inp.idx_ptr(), sizeof(int16),
01906                    insize, &ret);
01907     return ret;
01908   }
01909 
01910   template <>
01911   float32 ipp_max(const idx<float32> &inp) {
01912     ipp_checks1(inp);
01913     IppiSize insize;
01914     insize.height = inp.nelements();
01915     insize.width = 1;
01916     float32 ret;
01917     ippiMax_32f_C1R(inp.idx_ptr(), sizeof(float32),
01918                     insize, &ret);
01919     return ret;
01920   }
01921 
01922   // templates for ipp_indexmax
01923   
01924   template <>
01925   intg ipp_indexmax(const idx<ubyte> &inp) {
01926     ipp_checks1(inp);
01927     IppiSize insize;
01928     insize.width = inp.nelements();
01929     insize.height = 1;
01930     int retx, rety;
01931     ubyte m;
01932     ippiMaxIndx_8u_C1R(inp.idx_ptr(), sizeof(ubyte),
01933                        insize, &m, &retx, &rety);
01934     return (intg)retx;
01935   }
01936 
01937   template <>
01938   intg ipp_indexmax(const idx<uint16> &inp) {
01939     ipp_checks1(inp);
01940     IppiSize insize;
01941     insize.width = inp.nelements();
01942     insize.height = 1;
01943     int retx, rety;
01944     uint16 m;
01945     ippiMaxIndx_16u_C1R(inp.idx_ptr(), sizeof(uint16),
01946                         insize, &m, &retx, &rety);
01947     return (intg)retx;
01948   }
01949 
01950   template <>
01951   intg ipp_indexmax(const idx<int16> &inp) {
01952     ipp_checks1(inp);
01953     IppiSize insize;
01954     insize.width = inp.nelements();
01955     insize.height = 1;
01956     int retx, rety;
01957     int16 m;
01958     ippiMaxIndx_16s_C1R(inp.idx_ptr(), sizeof(int16),
01959                         insize, &m, &retx, &rety);
01960     return (intg)retx;
01961   }
01962 
01963   template <>
01964   intg ipp_indexmax(const idx<float32> &inp) {
01965     ipp_checks1(inp);
01966     IppiSize insize;
01967     insize.width = inp.nelements();
01968     insize.height = 1;
01969     int retx, rety;
01970     float32 m;
01971     ippiMaxIndx_32f_C1R(inp.idx_ptr(), sizeof(float32),
01972                         insize, &m, &retx, &rety);
01973     return (intg)retx;
01974   }
01975 
01976   // templates for ipp_min
01977 
01978   template <>
01979   ubyte ipp_min(const idx<ubyte> &inp) {
01980     ipp_checks1(inp);
01981     IppiSize insize;
01982     insize.height = inp.nelements();
01983     insize.width = 1;
01984     ubyte ret;
01985     ippiMin_8u_C1R(inp.idx_ptr(), sizeof(ubyte),
01986                    insize, &ret);
01987     return ret;
01988   }
01989 
01990   template <>
01991   uint16 ipp_min(const idx<uint16> &inp) {
01992     ipp_checks1(inp);
01993     IppiSize insize;
01994     insize.height = inp.nelements();
01995     insize.width = 1;
01996     uint16 ret;
01997     ippiMin_16u_C1R(inp.idx_ptr(), sizeof(uint16),
01998                     insize, &ret);
01999     return ret;
02000   }
02001 
02002   template <>
02003   int16 ipp_min(const idx<int16> &inp) {
02004     ipp_checks1(inp);
02005     IppiSize insize;
02006     insize.height = inp.nelements();
02007     insize.width = 1;
02008     int16 ret;
02009     ippiMin_16s_C1R(inp.idx_ptr(), sizeof(int16),
02010                    insize, &ret);
02011     return ret;
02012   }
02013 
02014   template <>
02015   float32 ipp_min(const idx<float32> &inp) {
02016     ipp_checks1(inp);
02017     IppiSize insize;
02018     insize.height = inp.nelements();
02019     insize.width = 1;
02020     float32 ret;
02021     ippiMin_32f_C1R(inp.idx_ptr(), sizeof(float32),
02022                     insize, &ret);
02023     return ret;
02024   }
02025 
02026   // templates for ipp_indexmin
02027   
02028   template <>
02029   intg ipp_indexmin(const idx<ubyte> &inp) {
02030     ipp_checks1(inp);
02031     IppiSize insize;
02032     insize.width = inp.nelements();
02033     insize.height = 1;
02034     int retx, rety;
02035     ubyte m;
02036     ippiMinIndx_8u_C1R(inp.idx_ptr(), sizeof(ubyte),
02037                        insize, &m, &retx, &rety);
02038     return (intg)retx;
02039   }
02040 
02041   template <>
02042   intg ipp_indexmin(const idx<uint16> &inp) {
02043     ipp_checks1(inp);
02044     IppiSize insize;
02045     insize.width = inp.nelements();
02046     insize.height = 1;
02047     int retx, rety;
02048     uint16 m;
02049     ippiMinIndx_16u_C1R(inp.idx_ptr(), sizeof(uint16),
02050                         insize, &m, &retx, &rety);
02051     return (intg)retx;
02052   }
02053 
02054   template <>
02055   intg ipp_indexmin(const idx<int16> &inp) {
02056     ipp_checks1(inp);
02057     IppiSize insize;
02058     insize.width = inp.nelements();
02059     insize.height = 1;
02060     int retx, rety;
02061     int16 m;
02062     ippiMinIndx_16s_C1R(inp.idx_ptr(), sizeof(int16),
02063                         insize, &m, &retx, &rety);
02064     return (intg)retx;
02065   }
02066 
02067   template <>
02068   intg ipp_indexmin(const idx<float32> &inp) {
02069     ipp_checks1(inp);
02070     IppiSize insize;
02071     insize.width = inp.nelements();
02072     insize.height = 1;
02073     int retx, rety;
02074     float32 m;
02075     ippiMinIndx_32f_C1R(inp.idx_ptr(), sizeof(float32),
02076                         insize, &m, &retx, &rety);
02077     return (intg)retx;
02078   }
02079 
02080   // templates for ipp_maxevery
02081 
02082   template <>
02083   void ipp_maxevery(const idx<ubyte> &in1, idx<ubyte> &in2) {
02084     ipp_checks2(in1, in2);
02085     IppiSize insize;
02086     insize.height = in1.nelements();
02087     insize.width = 1;
02088     ippiMaxEvery_8u_C1IR(in1.idx_ptr(), sizeof(ubyte),
02089                          in2.idx_ptr(), sizeof(ubyte),
02090                          insize);
02091   }
02092 
02093   template <>
02094   void ipp_maxevery(const idx<uint16> &in1, idx<uint16> &in2) {
02095     ipp_checks2(in1, in2);
02096     IppiSize insize;
02097     insize.height = in1.nelements();
02098     insize.width = 1;
02099     ippiMaxEvery_16u_C1IR(in1.idx_ptr(), sizeof(uint16),
02100                           in2.idx_ptr(), sizeof(uint16),
02101                           insize);
02102   }
02103 
02104   template <>
02105   void ipp_maxevery(const idx<int16> &in1, idx<int16> &in2) {
02106     ipp_checks2(in1, in2);
02107     IppiSize insize;
02108     insize.height = in1.nelements();
02109     insize.width = 1;
02110     ippiMaxEvery_16s_C1IR(in1.idx_ptr(), sizeof(int16),
02111                           in2.idx_ptr(), sizeof(int16),
02112                           insize);
02113   }
02114 
02115   template <>
02116   void ipp_maxevery(const idx<float32> &in1, idx<float32> &in2) {
02117     ipp_checks2(in1, in2);
02118     IppiSize insize;
02119     insize.height = in1.nelements();
02120     insize.width = 1;
02121     ippiMaxEvery_32f_C1IR(in1.idx_ptr(), sizeof(float32),
02122                           in2.idx_ptr(), sizeof(float32),
02123                           insize);
02124   }
02125 
02126   // templates for ipp_sqrdist
02127 
02128   template <>
02129   float64 ipp_sqrdist(const idx<ubyte> &in1, const idx<ubyte> &in2) {
02130     ipp_checks2(in1, in2);
02131     IppiSize insize;
02132     insize.width = in1.nelements();
02133     insize.height = 1;
02134     float64 ret;
02135     ippiNormDiff_L2_8u_C1R(in1.idx_ptr(), sizeof(ubyte),
02136                            in2.idx_ptr(), sizeof(ubyte),
02137                            insize, &ret);
02138     return ret;
02139   }
02140 
02141   template <>
02142   float64 ipp_sqrdist(const idx<uint16> &in1, const idx<uint16> &in2) {
02143     ipp_checks2(in1, in2);
02144     IppiSize insize;
02145     insize.width = in1.nelements();
02146     insize.height = 1;
02147     float64 ret;
02148     ippiNormDiff_L2_16u_C1R(in1.idx_ptr(), sizeof(uint16),
02149                             in2.idx_ptr(), sizeof(uint16),
02150                             insize, &ret);
02151     return ret;
02152   }
02153 
02154   template <>
02155   float64 ipp_sqrdist(const idx<int16> &in1, const idx<int16> &in2) {
02156     ipp_checks2(in1, in2);
02157     IppiSize insize;
02158     insize.width = in1.nelements();
02159     insize.height = 1;
02160     float64 ret;
02161     ippiNormDiff_L2_16s_C1R(in1.idx_ptr(), sizeof(int16),
02162                             in2.idx_ptr(), sizeof(int16),
02163                             insize, &ret);
02164     return ret;
02165   }
02166 
02167   template <>
02168   float64 ipp_sqrdist(const idx<float32> &in1, const idx<float32> &in2) {
02169     ipp_checks2(in1, in2);
02170     IppiSize insize;
02171     insize.width = in1.nelements();
02172     insize.height = 1;
02173     float64 ret;
02174 #ifdef __IPPFAST__
02175     ippiNormDiff_L2_32f_C1R(in1.idx_ptr(), sizeof(float32),
02176                             in2.idx_ptr(), sizeof(float32),
02177                             insize, &ret, ippAlgHintFast);
02178 #else
02179 #ifndef __IPPACC__
02180 #error either __IPPFAST__ or __IPPACC__ must be defined
02181 #endif
02182     ippiNormDiff_L2_32f_C1R(in1.idx_ptr(), sizeof(float32),
02183                             in2.idx_ptr(), sizeof(float32),
02184                             insize, &ret, ippAlgHintAccurate);
02185 #endif
02186     return ret;
02187   }
02188 
02189   // templates for ipp_threshold_lt (in-place)
02190 
02191   template <>
02192   void ipp_threshold_lt(idx<ubyte> &in, ubyte th) {
02193     ipp_checks1(in);
02194     IppiSize insize;
02195     insize.width = in.nelements();
02196     insize.height = 1;
02197     ippiThreshold_LT_8u_C1IR(in.idx_ptr(), sizeof(ubyte),
02198                              insize, th);
02199   }
02200 
02201   template <>
02202   void ipp_threshold_lt(idx<uint16> &in, uint16 th) {
02203     ipp_checks1(in);
02204     IppiSize insize;
02205     insize.width = in.nelements();
02206     insize.height = 1;
02207     ippiThreshold_LT_16u_C1IR(in.idx_ptr(), sizeof(uint16),
02208                               insize, th);
02209   }
02210 
02211   template <>
02212   void ipp_threshold_lt(idx<int16> &in, int16 th) {
02213     ipp_checks1(in);
02214     IppiSize insize;
02215     insize.width = in.nelements();
02216     insize.height = 1;
02217     ippiThreshold_LT_16s_C1IR(in.idx_ptr(), sizeof(int16),
02218                               insize, th);
02219   }
02220 
02221   template <>
02222   void ipp_threshold_lt(idx<float32> &in, float32 th) {
02223     ipp_checks1(in);
02224     IppiSize insize;
02225     insize.width = in.nelements();
02226     insize.height = 1;
02227     ippiThreshold_LT_32f_C1IR(in.idx_ptr(), sizeof(float32),
02228                               insize, th);
02229   }
02230 
02231   // templates for ipp_threshold_lt (not-in-place)
02232 
02233   template <>
02234   void ipp_threshold_lt(const idx<ubyte> &in, ubyte th, idx<ubyte> &out) {
02235     ipp_checks2(in, out);
02236     IppiSize insize;
02237     insize.width = in.nelements();
02238     insize.height = 1;
02239     ippiThreshold_LT_8u_C1R(in.idx_ptr(), sizeof(ubyte),
02240                             out.idx_ptr(), sizeof(ubyte),
02241                             insize, th);
02242   }
02243 
02244   template <>
02245   void ipp_threshold_lt(const idx<uint16> &in, uint16 th, idx<uint16> &out) {
02246     ipp_checks2(in, out);
02247     IppiSize insize;
02248     insize.width = in.nelements();
02249     insize.height = 1;
02250     ippiThreshold_LT_16u_C1R(in.idx_ptr(), sizeof(uint16),
02251                              out.idx_ptr(), sizeof(uint16),
02252                              insize, th);
02253   }
02254 
02255   template <>
02256   void ipp_threshold_lt(const idx<int16> &in, int16 th, idx<int16> &out) {
02257     ipp_checks2(in, out);
02258     IppiSize insize;
02259     insize.width = in.nelements();
02260     insize.height = 1;
02261     ippiThreshold_LT_16s_C1R(in.idx_ptr(), sizeof(int16),
02262                              out.idx_ptr(), sizeof(int16),
02263                              insize, th);
02264   }
02265 
02266   template <>
02267   void ipp_threshold_lt(const idx<float32> &in, float32 th,
02268                         idx<float32> &out) {
02269     ipp_checks2(in, out);
02270     IppiSize insize;
02271     insize.width = in.nelements();
02272     insize.height = 1;
02273     ippiThreshold_LT_32f_C1R(in.idx_ptr(), sizeof(float32),
02274                              out.idx_ptr(), sizeof(float32),
02275                              insize, th);
02276   }
02277 
02278   // templates for ipp_threshold_gt (in-place)
02279 
02280   template <>
02281   void ipp_threshold_gt(idx<ubyte> &in, ubyte th) {
02282     ipp_checks1(in);
02283     IppiSize insize;
02284     insize.width = in.nelements();
02285     insize.height = 1;
02286     ippiThreshold_GT_8u_C1IR(in.idx_ptr(), sizeof(ubyte),
02287                              insize, th);
02288   }
02289 
02290   template <>
02291   void ipp_threshold_gt(idx<uint16> &in, uint16 th) {
02292     ipp_checks1(in);
02293     IppiSize insize;
02294     insize.width = in.nelements();
02295     insize.height = 1;
02296     ippiThreshold_GT_16u_C1IR(in.idx_ptr(), sizeof(uint16),
02297                               insize, th);
02298   }
02299 
02300   template <>
02301   void ipp_threshold_gt(idx<int16> &in, int16 th) {
02302     ipp_checks1(in);
02303     IppiSize insize;
02304     insize.width = in.nelements();
02305     insize.height = 1;
02306     ippiThreshold_GT_16s_C1IR(in.idx_ptr(), sizeof(int16),
02307                               insize, th);
02308   }
02309 
02310   template <>
02311   void ipp_threshold_gt(idx<float32> &in, float32 th) {
02312     ipp_checks1(in);
02313     IppiSize insize;
02314     insize.width = in.nelements();
02315     insize.height = 1;
02316     ippiThreshold_GT_32f_C1IR(in.idx_ptr(), sizeof(float32),
02317                               insize, th);
02318   }
02319 
02320   // templates for ipp_threshold_gt (not-in-place)
02321 
02322   template <>
02323   void ipp_threshold_gt(const idx<ubyte> &in, ubyte th, idx<ubyte> &out) {
02324     ipp_checks2(in, out);
02325     IppiSize insize;
02326     insize.width = in.nelements();
02327     insize.height = 1;
02328     ippiThreshold_GT_8u_C1R(in.idx_ptr(), sizeof(ubyte),
02329                             out.idx_ptr(), sizeof(ubyte),
02330                             insize, th);
02331   }
02332 
02333   template <>
02334   void ipp_threshold_gt(const idx<uint16> &in, uint16 th, idx<uint16> &out) {
02335     ipp_checks2(in, out);
02336     IppiSize insize;
02337     insize.width = in.nelements();
02338     insize.height = 1;
02339     ippiThreshold_GT_16u_C1R(in.idx_ptr(), sizeof(uint16),
02340                              out.idx_ptr(), sizeof(uint16),
02341                              insize, th);
02342   }
02343 
02344   template <>
02345   void ipp_threshold_gt(const idx<int16> &in, int16 th, idx<int16> &out) {
02346     ipp_checks2(in, out);
02347     IppiSize insize;
02348     insize.width = in.nelements();
02349     insize.height = 1;
02350     ippiThreshold_GT_16s_C1R(in.idx_ptr(), sizeof(int16),
02351                              out.idx_ptr(), sizeof(int16),
02352                              insize, th);
02353   }
02354 
02355   template <>
02356   void ipp_threshold_gt(const idx<float32> &in, float32 th,
02357                         idx<float32> &out) {
02358     ipp_checks2(in, out);
02359     IppiSize insize;
02360     insize.width = in.nelements();
02361     insize.height = 1;
02362     ippiThreshold_GT_32f_C1R(in.idx_ptr(), sizeof(float32),
02363                              out.idx_ptr(), sizeof(float32),
02364                              insize, th);
02365   }
02366 
02367   // templates for ipp_threshold_lt (with value, in-place)
02368 
02369   template <>
02370   void ipp_threshold_lt(idx<ubyte> &in, ubyte th, ubyte value) {
02371     ipp_checks1(in);
02372     IppiSize insize;
02373     insize.width = in.nelements();
02374     insize.height = 1;
02375     ippiThreshold_LTVal_8u_C1IR(in.idx_ptr(), sizeof(ubyte),
02376                                 insize, th, value);
02377   }
02378 
02379   template <>
02380   void ipp_threshold_lt(idx<uint16> &in, uint16 th, uint16 value) {
02381     ipp_checks1(in);
02382     IppiSize insize;
02383     insize.width = in.nelements();
02384     insize.height = 1;
02385     ippiThreshold_LTVal_16u_C1IR(in.idx_ptr(), sizeof(uint16),
02386                                  insize, th, value);
02387   }
02388 
02389   template <>
02390   void ipp_threshold_lt(idx<int16> &in, int16 th, int16 value) {
02391     ipp_checks1(in);
02392     IppiSize insize;
02393     insize.width = in.nelements();
02394     insize.height = 1;
02395     ippiThreshold_LTVal_16s_C1IR(in.idx_ptr(), sizeof(int16),
02396                                  insize, th, value);
02397   }
02398 
02399   template <>
02400   void ipp_threshold_lt(idx<float32> &in, float32 th, float32 value) {
02401     ipp_checks1(in);
02402     IppiSize insize;
02403     insize.width = in.nelements();
02404     insize.height = 1;
02405     ippiThreshold_LTVal_32f_C1IR(in.idx_ptr(), sizeof(float32),
02406                                  insize, th, value);
02407   }
02408 
02409   // templates for ipp_threshold_lt (with value, not-in-place)
02410 
02411   template <>
02412   void ipp_threshold_lt(const idx<ubyte> &in, ubyte th, ubyte value,
02413                         idx<ubyte> &out) {
02414     ipp_checks2(in, out);
02415     IppiSize insize;
02416     insize.width = in.nelements();
02417     insize.height = 1;
02418     ippiThreshold_LTVal_8u_C1R(in.idx_ptr(), sizeof(ubyte),
02419                                out.idx_ptr(), sizeof(ubyte),
02420                                insize, th, value);
02421   }
02422 
02423   template <>
02424   void ipp_threshold_lt(const idx<uint16> &in, uint16 th, uint16 value,
02425                         idx<uint16> &out) {
02426     ipp_checks2(in, out);
02427     IppiSize insize;
02428     insize.width = in.nelements();
02429     insize.height = 1;
02430     ippiThreshold_LTVal_16u_C1R(in.idx_ptr(), sizeof(uint16),
02431                                 out.idx_ptr(), sizeof(uint16),
02432                                 insize, th, value);
02433   }
02434 
02435   template <>
02436   void ipp_threshold_lt(const idx<int16> &in, int16 th, int16 value,
02437                         idx<int16> &out) {
02438     ipp_checks2(in, out);
02439     IppiSize insize;
02440     insize.width = in.nelements();
02441     insize.height = 1;
02442     ippiThreshold_LTVal_16s_C1R(in.idx_ptr(), sizeof(int16),
02443                                 out.idx_ptr(), sizeof(int16),
02444                                 insize, th, value);
02445   }
02446 
02447   template <>
02448   void ipp_threshold_lt(const idx<float32> &in, float32 th,
02449                         float32 value, idx<float32> &out) {
02450     ipp_checks2(in, out);
02451     IppiSize insize;
02452     insize.width = in.nelements();
02453     insize.height = 1;
02454     ippiThreshold_LTVal_32f_C1R(in.idx_ptr(), sizeof(float32),
02455                                 out.idx_ptr(), sizeof(float32),
02456                                 insize, th, value);
02457   }
02458 
02459   // templates for ipp_threshold_gt (with value, in-place)
02460 
02461   template <>
02462   void ipp_threshold_gt(idx<ubyte> &in, ubyte th, ubyte value) {
02463     ipp_checks1(in);
02464     IppiSize insize;
02465     insize.width = in.nelements();
02466     insize.height = 1;
02467     ippiThreshold_GTVal_8u_C1IR(in.idx_ptr(), sizeof(ubyte),
02468                                 insize, th, value);
02469   }
02470 
02471   template <>
02472   void ipp_threshold_gt(idx<uint16> &in, uint16 th, uint16 value) {
02473     ipp_checks1(in);
02474     IppiSize insize;
02475     insize.width = in.nelements();
02476     insize.height = 1;
02477     ippiThreshold_GTVal_16u_C1IR(in.idx_ptr(), sizeof(uint16),
02478                                  insize, th, value);
02479   }
02480 
02481   template <>
02482   void ipp_threshold_gt(idx<int16> &in, int16 th, int16 value) {
02483     ipp_checks1(in);
02484     IppiSize insize;
02485     insize.width = in.nelements();
02486     insize.height = 1;
02487     ippiThreshold_GTVal_16s_C1IR(in.idx_ptr(), sizeof(int16),
02488                                  insize, th, value);
02489   }
02490 
02491   template <>
02492   void ipp_threshold_gt(idx<float32> &in, float32 th, float32 value) {
02493     ipp_checks1(in);
02494     IppiSize insize;
02495     insize.width = in.nelements();
02496     insize.height = 1;
02497     ippiThreshold_GTVal_32f_C1IR(in.idx_ptr(), sizeof(float32),
02498                                  insize, th, value);
02499   }
02500 
02501   // templates for ipp_threshold_gt (with value, not-in-place)
02502 
02503   template <>
02504   void ipp_threshold_gt(const idx<ubyte> &in, ubyte th, ubyte value,
02505                         idx<ubyte> &out) {
02506     ipp_checks2(in, out);
02507     IppiSize insize;
02508     insize.width = in.nelements();
02509     insize.height = 1;
02510     ippiThreshold_GTVal_8u_C1R(in.idx_ptr(), sizeof(ubyte),
02511                                out.idx_ptr(), sizeof(ubyte),
02512                                insize, th, value);
02513   }
02514 
02515   template <>
02516   void ipp_threshold_gt(const idx<uint16> &in, uint16 th, uint16 value,
02517                         idx<uint16> &out) {
02518     ipp_checks2(in, out);
02519     IppiSize insize;
02520     insize.width = in.nelements();
02521     insize.height = 1;
02522     ippiThreshold_GTVal_16u_C1R(in.idx_ptr(), sizeof(uint16),
02523                                 out.idx_ptr(), sizeof(uint16),
02524                                 insize, th, value);
02525   }
02526 
02527   template <>
02528   void ipp_threshold_gt(const idx<int16> &in, int16 th, int16 value,
02529                         idx<int16> &out) {
02530     ipp_checks2(in, out);
02531     IppiSize insize;
02532     insize.width = in.nelements();
02533     insize.height = 1;
02534     ippiThreshold_GTVal_16s_C1R(in.idx_ptr(), sizeof(int16),
02535                                 out.idx_ptr(), sizeof(int16),
02536                                 insize, th, value);
02537   }
02538 
02539   template <>
02540   void ipp_threshold_gt(const idx<float32> &in, float32 th,
02541                         float32 value, idx<float32> &out) {
02542     ipp_checks2(in, out);
02543     IppiSize insize;
02544     insize.width = in.nelements();
02545     insize.height = 1;
02546     ippiThreshold_GTVal_32f_C1R(in.idx_ptr(), sizeof(float32),
02547                                 out.idx_ptr(), sizeof(float32),
02548                                 insize, th, value);
02549   }
02550 
02551 #endif /* __IPP__ */
02552 
02553 } /* ebl namespace */
02554 
02555 #endif /* IPPOPS_HPP_ */