owndefs.h 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944
  1. /*
  2. * Copyright (C) 2016 Intel Corporation. All rights reserved.
  3. *
  4. * Redistribution and use in source and binary forms, with or without
  5. * modification, are permitted provided that the following conditions
  6. * are met:
  7. *
  8. * * Redistributions of source code must retain the above copyright
  9. * notice, this list of conditions and the following disclaimer.
  10. * * Redistributions in binary form must reproduce the above copyright
  11. * notice, this list of conditions and the following disclaimer in
  12. * the documentation and/or other materials provided with the
  13. * distribution.
  14. * * Neither the name of Intel Corporation nor the names of its
  15. * contributors may be used to endorse or promote products derived
  16. * from this software without specific prior written permission.
  17. *
  18. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  19. * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  20. * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  21. * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  22. * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  23. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  24. * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  25. * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  26. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  27. * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  28. * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29. *
  30. */
  31. #ifndef __OWNDEFS_H__
  32. #define __OWNDEFS_H__
  33. #if defined( _VXWORKS )
  34. #include <vxWorks.h>
  35. #undef NONE
  36. #endif
  37. #include "ippdefs.h"
  38. #if defined(__INTEL_COMPILER) || defined(_MSC_VER)
  39. #define __INLINE static __inline
  40. #elif defined( __GNUC__ )
  41. #define __INLINE static __inline__
  42. #else
  43. #define __INLINE static
  44. #endif
  45. #if defined(__INTEL_COMPILER)
  46. #define __RESTRICT restrict
  47. #elif !defined( __RESTRICT )
  48. #define __RESTRICT
  49. #endif
  50. #if defined( IPP_W32DLL )
  51. #if defined( _MSC_VER ) || defined( __INTEL_COMPILER )
  52. #define IPPFUN(type,name,arg) __declspec(dllexport) type __STDCALL name arg
  53. #else
  54. #define IPPFUN(type,name,arg) extern type __STDCALL name arg
  55. #endif
  56. #else
  57. #define IPPFUN(type,name,arg) extern type __STDCALL name arg
  58. #endif
  59. /* structure represeting 128 bit unsigned integer type */
  60. typedef struct{
  61. Ipp64u low;
  62. Ipp64u high;
  63. }Ipp128u;
  64. #define _IPP_PX 0 /* pure C-code ia32 */
  65. #define _IPP_M5 1 /* Quark (Pentium) - x86+x87 ia32 */
  66. #define _IPP_M6 2 /* Pentium MMX - MMX ia32 */
  67. #define _IPP_A6 4 /* Pentium III - SSE ia32 */
  68. #define _IPP_W7 8 /* Pentium 4 - SSE2 ia32 */
  69. #define _IPP_T7 16 /* Pentium with x64 support (Nocona) - SSE3 ia32 */
  70. #define _IPP_V8 32 /* Merom - SSSE3 ia32 */
  71. #define _IPP_P8 64 /* Penryn - SSE4.1 + tick for SSE4.2 ia32 */
  72. #define _IPP_G9 128 /* SandyBridge (GSSE) - AVX ia32 */
  73. #define _IPP_H9 256 /* Haswell (AVX2) ia32 */
  74. #define _IPP_I0 512 /* KNL (AVX-512) ia32 */
  75. #define _IPP_S0 1024 /* SkyLake Xeon (AVX-512) ia32 */
  76. #define _IPPXSC_PX 0
  77. #define _IPPXSC_S1 1
  78. #define _IPPXSC_S2 2
  79. #define _IPPXSC_C2 4
  80. #define _IPPLRB_PX 0
  81. #define _IPPLRB_B1 1
  82. #define _IPPLRB_B2 2
  83. #define _IPP64_PX _IPP_PX
  84. #define _IPP64_I7 64
  85. #define _IPP32E_PX _IPP_PX /* pure C-code x64 */
  86. #define _IPP32E_M7 32 /* Pentium with x64 support (Nocona) - SSE3 x64 */
  87. #define _IPP32E_U8 64 /* Merom - SSSE3 x64 */
  88. #define _IPP32E_Y8 128 /* Penryn - SSE4.1 + tick for SSE4.2 x64 */
  89. #define _IPP32E_E9 256 /* SandyBridge (GSSE) - AVX x64 */
  90. #define _IPP32E_L9 512 /* Haswell (AVX2) x64 */
  91. #define _IPP32E_N0 1024 /* KNL (AVX-512) x64 */
  92. #define _IPP32E_K0 2048 /* SkyLake Xeon (AVX-512) x64 */
  93. #define _IPPLP32_PX _IPP_PX
  94. #define _IPPLP32_S8 1 /* old Atom (SSSE3+movbe) (Silverthorne) ia32 */
  95. #define _IPPLP64_PX _IPP_PX
  96. #define _IPPLP64_N8 1 /* old Atom (SSSE3+movbe) (Silverthorne) x64 */
  97. #if defined(__INTEL_COMPILER) || (_MSC_VER >= 1300)
  98. #define __ALIGN8 __declspec (align(8))
  99. #define __ALIGN16 __declspec (align(16))
  100. #if !defined( OSX32 )
  101. #define __ALIGN32 __declspec (align(32))
  102. #else
  103. #define __ALIGN32 __declspec (align(16))
  104. #endif
  105. #define __ALIGN64 __declspec (align(64))
  106. #elif defined (__GNUC__)
  107. #define __ALIGN8 __attribute((aligned(8)))
  108. #define __ALIGN16 __attribute((aligned(16)))
  109. #define __ALIGN32 __attribute((aligned(32)))
  110. #define __ALIGN64 __attribute((aligned(64)))
  111. #else
  112. #define __ALIGN8
  113. #define __ALIGN16
  114. #define __ALIGN32
  115. #define __ALIGN64
  116. #endif
  117. #if defined ( _M5 ) /* Quark (Pentium) - x86+x87 ia32 */
  118. #define _IPP _IPP_M5
  119. #define _IPP32E _IPP32E_PX
  120. #define _IPPLRB _IPPLRB_PX
  121. #define _IPPLP32 _IPPLP32_PX
  122. #define _IPPLP64 _IPPLP64_PX
  123. #elif defined ( _M6 ) /* Pentium MMX - MMX ia32 */
  124. #define _IPP _IPP_M6
  125. #define _IPP32E _IPP32E_PX
  126. #define _IPPLRB _IPPLRB_PX
  127. #define _IPPLP32 _IPPLP32_PX
  128. #define _IPPLP64 _IPPLP64_PX
  129. #elif defined( _A6 ) /* Pentium III - SSE ia32 */
  130. #define _IPP _IPP_A6
  131. #define _IPP32E _IPP32E_PX
  132. #define _IPPLRB _IPPLRB_PX
  133. #define _IPPLP32 _IPPLP32_PX
  134. #define _IPPLP64 _IPPLP64_PX
  135. #elif defined( _W7 ) /* Pentium 4 - SSE2 ia32 */
  136. #define _IPP _IPP_W7
  137. #define _IPP32E _IPP32E_PX
  138. #define _IPPLRB _IPPLRB_PX
  139. #define _IPPLP32 _IPPLP32_PX
  140. #define _IPPLP64 _IPPLP64_PX
  141. #elif defined( _T7 ) /* Pentium with x64 support (Nocona) - SSE3 ia32 */
  142. #define _IPP _IPP_T7
  143. #define _IPP32E _IPP32E_PX
  144. #define _IPPLRB _IPPLRB_PX
  145. #define _IPPLP32 _IPPLP32_PX
  146. #define _IPPLP64 _IPPLP64_PX
  147. #elif defined( _V8 ) /* Merom - SSSE3 ia32 */
  148. #define _IPP _IPP_V8
  149. #define _IPP32E _IPP32E_PX
  150. #define _IPPLRB _IPPLRB_PX
  151. #define _IPPLP32 _IPPLP32_PX
  152. #define _IPPLP64 _IPPLP64_PX
  153. #elif defined( _P8 ) /* Penryn - SSE4.1 + tick for SSE4.2 ia32 */
  154. #define _IPP _IPP_P8
  155. #define _IPP32E _IPP32E_PX
  156. #define _IPPLRB _IPPLRB_PX
  157. #define _IPPLP32 _IPPLP32_PX
  158. #define _IPPLP64 _IPPLP64_PX
  159. #elif defined( _G9 ) /* SandyBridge (GSSE) - AVX ia32 */
  160. #define _IPP _IPP_G9
  161. #define _IPP32E _IPP32E_PX
  162. #define _IPPLRB _IPPLRB_PX
  163. #define _IPPLP32 _IPPLP32_PX
  164. #define _IPPLP64 _IPPLP64_PX
  165. #elif defined( _H9 ) /* Haswell (AVX2) ia32 */
  166. #define _IPP _IPP_H9
  167. #define _IPP32E _IPP32E_PX
  168. #define _IPPLRB _IPPLRB_PX
  169. #define _IPPLP32 _IPPLP32_PX
  170. #define _IPPLP64 _IPPLP64_PX
  171. #elif defined( _M7 ) /* Pentium with x64 support (Nocona) - SSE3 x64 */
  172. #define _IPP _IPP_PX
  173. #define _IPP32E _IPP32E_M7
  174. #define _IPPLRB _IPPLRB_PX
  175. #define _IPPLP32 _IPPLP32_PX
  176. #define _IPPLP64 _IPPLP64_PX
  177. #elif defined( _U8 ) /* Merom - SSSE3 x64 */
  178. #define _IPP _IPP_PX
  179. #define _IPP32E _IPP32E_U8
  180. #define _IPPLRB _IPPLRB_PX
  181. #define _IPPLP32 _IPPLP32_PX
  182. #define _IPPLP64 _IPPLP64_PX
  183. #elif defined( _Y8 ) /* Penryn - SSE4.1 + tick for SSE4.2 x64 */
  184. #define _IPP _IPP_PX
  185. #define _IPP32E _IPP32E_Y8
  186. #define _IPPLRB _IPPLRB_PX
  187. #define _IPPLP32 _IPPLP32_PX
  188. #define _IPPLP64 _IPPLP64_PX
  189. #elif defined( _E9 ) /* SandyBridge (GSSE) - AVX x64 */
  190. #define _IPP _IPP_PX
  191. #define _IPP32E _IPP32E_E9
  192. #define _IPPLRB _IPPLRB_PX
  193. #define _IPPLP32 _IPPLP32_PX
  194. #define _IPPLP64 _IPPLP64_PX
  195. #elif defined( _L9 ) /* Haswell (AVX2) x64 */
  196. #define _IPP _IPP_PX
  197. #define _IPP32E _IPP32E_L9
  198. #define _IPPLRB _IPPLRB_PX
  199. #define _IPPLP32 _IPPLP32_PX
  200. #define _IPPLP64 _IPPLP64_PX
  201. #elif defined( _N0 ) /* KNL (AVX-512) x64 */
  202. #define _IPP _IPP_PX
  203. #define _IPP32E _IPP32E_N0
  204. #define _IPPLRB _IPPLRB_PX
  205. #define _IPPLP32 _IPPLP32_PX
  206. #define _IPPLP64 _IPPLP64_PX
  207. #elif defined( _K0 ) /* SkyLake Xeon (AVX-512) x64 */
  208. #define _IPP _IPP_PX
  209. #define _IPP32E _IPP32E_K0
  210. #define _IPPLRB _IPPLRB_PX
  211. #define _IPPLP32 _IPPLP32_PX
  212. #define _IPPLP64 _IPPLP64_PX
  213. #elif defined( _B2 ) /* KNC (MIC) */
  214. #define _IPP _IPP_PX
  215. #define _IPP32E _IPP32E_PX
  216. #define _IPPLRB _IPPLRB_B2
  217. #define _IPPLP32 _IPPLP32_PX
  218. #define _IPPLP64 _IPPLP64_PX
  219. #elif defined( _S8 ) /* old Atom (SSSE3+movbe) (Silverthorne) ia32 */
  220. #define _IPP _IPP_V8
  221. #define _IPP32E _IPP32E_PX
  222. #define _IPPLRB _IPPLRB_PX
  223. #define _IPPLP32 _IPPLP32_S8
  224. #define _IPPLP64 _IPPLP64_PX
  225. #elif defined( _N8 ) /* old Atom (SSSE3+movbe) (Silverthorne) x64 */
  226. #define _IPP _IPP_PX
  227. #define _IPP32E _IPP32E_U8
  228. #define _IPPLRB _IPPLRB_PX
  229. #define _IPPLP32 _IPPLP32_PX
  230. #define _IPPLP64 _IPPLP64_N8
  231. #else
  232. #define _IPP _IPP_PX
  233. #define _IPP32E _IPP32E_PX
  234. #define _IPPLRB _IPPLRB_PX
  235. #define _IPPLP32 _IPPLP32_PX
  236. #define _IPPLP64 _IPPLP64_PX
  237. #endif
  238. #define _IPP_ARCH_IA32 1
  239. #define _IPP_ARCH_IA64 2
  240. #define _IPP_ARCH_EM64T 4
  241. #define _IPP_ARCH_XSC 8
  242. #define _IPP_ARCH_LRB 16
  243. #define _IPP_ARCH_LP32 32
  244. #define _IPP_ARCH_LP64 64
  245. #define _IPP_ARCH_LRB2 128
  246. #if defined ( _ARCH_IA32 )
  247. #define _IPP_ARCH _IPP_ARCH_IA32
  248. #elif defined( _ARCH_EM64T )
  249. #define _IPP_ARCH _IPP_ARCH_EM64T
  250. #elif defined( _ARCH_LRB2 )
  251. #define _IPP_ARCH _IPP_ARCH_LRB2
  252. #elif defined( _ARCH_LP32 )
  253. #define _IPP_ARCH _IPP_ARCH_LP32
  254. #elif defined( _ARCH_LP64 )
  255. #define _IPP_ARCH _IPP_ARCH_LP64
  256. #else
  257. #if defined(_M_AMD64) || defined(__x86_64) || defined(__x86_64__)
  258. #define _IPP_ARCH _IPP_ARCH_EM64T
  259. #else
  260. #define _IPP_ARCH _IPP_ARCH_IA32
  261. #endif
  262. #endif
  263. #if ((_IPP_ARCH == _IPP_ARCH_IA32) || (_IPP_ARCH == _IPP_ARCH_LP32))
  264. __INLINE
  265. Ipp32s IPP_INT_PTR( const void* ptr ) {
  266. union {
  267. void* Ptr;
  268. Ipp32s Int;
  269. } dd;
  270. dd.Ptr = (void*)ptr;
  271. return dd.Int;
  272. }
  273. __INLINE
  274. Ipp32u IPP_UINT_PTR( const void* ptr ) {
  275. union {
  276. void* Ptr;
  277. Ipp32u Int;
  278. } dd;
  279. dd.Ptr = (void*)ptr;
  280. return dd.Int;
  281. }
  282. #elif ((_IPP_ARCH == _IPP_ARCH_EM64T) || (_IPP_ARCH == _IPP_ARCH_LRB2) || (_IPP_ARCH == _IPP_ARCH_LP64))
  283. __INLINE
  284. Ipp64s IPP_INT_PTR( const void* ptr ) {
  285. union {
  286. void* Ptr;
  287. Ipp64s Int;
  288. } dd;
  289. dd.Ptr = (void*)ptr;
  290. return dd.Int;
  291. }
  292. __INLINE
  293. Ipp64u IPP_UINT_PTR( const void* ptr ) {
  294. union {
  295. void* Ptr;
  296. Ipp64u Int;
  297. } dd;
  298. dd.Ptr = (void*)ptr;
  299. return dd.Int;
  300. }
  301. #else
  302. #define IPP_INT_PTR( ptr ) ( (long)(ptr) )
  303. #define IPP_UINT_PTR( ptr ) ( (unsigned long)(ptr) )
  304. #endif
  305. #define IPP_ALIGN_TYPE(type, align) ((align)/sizeof(type)-1)
  306. #define IPP_BYTES_TO_ALIGN(ptr, align) ((-(IPP_INT_PTR(ptr)&((align)-1)))&((align)-1))
  307. #define IPP_ALIGNED_PTR(ptr, align) (void*)( (unsigned char*)(ptr) + (IPP_BYTES_TO_ALIGN( ptr, align )) )
  308. #define IPP_ALIGNED_SIZE(size, align) (((size)+(align)-1)&~((align)-1))
  309. #define IPP_MALLOC_ALIGNED_BYTES 64
  310. #define IPP_MALLOC_ALIGNED_8BYTES 8
  311. #define IPP_MALLOC_ALIGNED_16BYTES 16
  312. #define IPP_MALLOC_ALIGNED_32BYTES 32
  313. #define IPP_ALIGNED_ARRAY(align,arrtype,arrname,arrlength)\
  314. char arrname##AlignedArrBuff[sizeof(arrtype)*(arrlength)+IPP_ALIGN_TYPE(char, align)];\
  315. arrtype *arrname = (arrtype*)IPP_ALIGNED_PTR(arrname##AlignedArrBuff,align)
  316. #if defined( __cplusplus )
  317. extern "C" {
  318. #endif
  319. /* /////////////////////////////////////////////////////////////////////////////
  320. IPP Context Identification
  321. /////////////////////////////////////////////////////////////////////////// */
  322. #define IPP_CONTEXT( a, b, c, d) \
  323. (int)(((unsigned)(a) << 24) | ((unsigned)(b) << 16) | \
  324. ((unsigned)(c) << 8) | (unsigned)(d))
  325. typedef enum {
  326. idCtxUnknown = 0,
  327. idCtxFFT_C_16sc,
  328. idCtxFFT_C_16s,
  329. idCtxFFT_R_16s,
  330. idCtxFFT_C_32fc,
  331. idCtxFFT_C_32f,
  332. idCtxFFT_R_32f,
  333. idCtxFFT_C_64fc,
  334. idCtxFFT_C_64f,
  335. idCtxFFT_R_64f,
  336. idCtxDFT_C_16sc,
  337. idCtxDFT_C_16s,
  338. idCtxDFT_R_16s,
  339. idCtxDFT_C_32fc,
  340. idCtxDFT_C_32f,
  341. idCtxDFT_R_32f,
  342. idCtxDFT_C_64fc,
  343. idCtxDFT_C_64f,
  344. idCtxDFT_R_64f,
  345. idCtxDCTFwd_16s,
  346. idCtxDCTInv_16s,
  347. idCtxDCTFwd_32f,
  348. idCtxDCTInv_32f,
  349. idCtxDCTFwd_64f,
  350. idCtxDCTInv_64f,
  351. idCtxFFT2D_C_32fc,
  352. idCtxFFT2D_R_32f,
  353. idCtxDFT2D_C_32fc,
  354. idCtxDFT2D_R_32f,
  355. idCtxFFT2D_R_32s,
  356. idCtxDFT2D_R_32s,
  357. idCtxDCT2DFwd_32f,
  358. idCtxDCT2DInv_32f,
  359. idCtxMoment64f,
  360. idCtxMoment64s,
  361. idCtxRandUni_8u,
  362. idCtxRandUni_16s,
  363. idCtxRandUni_32f,
  364. idCtxRandUni_64f,
  365. idCtxRandGauss_8u,
  366. idCtxRandGauss_16s,
  367. idCtxRandGauss_32f,
  368. idCtxRandGauss_64f,
  369. idCtxWTFwd_32f,
  370. idCtxWTFwd_8u32f,
  371. idCtxWTFwd_8s32f,
  372. idCtxWTFwd_16u32f,
  373. idCtxWTFwd_16s32f,
  374. idCtxWTFwd2D_32f_C1R,
  375. idCtxWTInv2D_32f_C1R,
  376. idCtxWTFwd2D_32f_C3R,
  377. idCtxWTInv2D_32f_C3R,
  378. idCtxWTInv_32f,
  379. idCtxWTInv_32f8u,
  380. idCtxWTInv_32f8s,
  381. idCtxWTInv_32f16u,
  382. idCtxWTInv_32f16s,
  383. idCtxMDCTFwd_32f,
  384. idCtxMDCTInv_32f,
  385. idCtxMDCTFwd_16s,
  386. idCtxFIRBlock_32f,
  387. idCtxFDP_32f,
  388. idCtxRLMS_32f = IPP_CONTEXT( 'L', 'M', 'S', '1'),
  389. idCtxRLMS32f_16s = IPP_CONTEXT( 'L', 'M', 'S', 0 ),
  390. idCtxIIRAR_32f = IPP_CONTEXT( 'I', 'I', '0', '1'),
  391. idCtxIIRBQ_32f = IPP_CONTEXT( 'I', 'I', '0', '2'),
  392. idCtxIIRAR_32fc = IPP_CONTEXT( 'I', 'I', '0', '3'),
  393. idCtxIIRBQ_32fc = IPP_CONTEXT( 'I', 'I', '0', '4'),
  394. idCtxIIRAR32f_16s = IPP_CONTEXT( 'I', 'I', '0', '5'),
  395. idCtxIIRBQ32f_16s = IPP_CONTEXT( 'I', 'I', '0', '6'),
  396. idCtxIIRAR32fc_16sc = IPP_CONTEXT( 'I', 'I', '0', '7'),
  397. idCtxIIRBQ32fc_16sc = IPP_CONTEXT( 'I', 'I', '0', '8'),
  398. idCtxIIRAR32s_16s = IPP_CONTEXT( 'I', 'I', '0', '9'),
  399. idCtxIIRBQ32s_16s = IPP_CONTEXT( 'I', 'I', '1', '0'),
  400. idCtxIIRAR32sc_16sc = IPP_CONTEXT( 'I', 'I', '1', '1'),
  401. idCtxIIRBQ32sc_16sc = IPP_CONTEXT( 'I', 'I', '1', '2'),
  402. idCtxIIRAR_64f = IPP_CONTEXT( 'I', 'I', '1', '3'),
  403. idCtxIIRBQ_64f = IPP_CONTEXT( 'I', 'I', '1', '4'),
  404. idCtxIIRAR_64fc = IPP_CONTEXT( 'I', 'I', '1', '5'),
  405. idCtxIIRBQ_64fc = IPP_CONTEXT( 'I', 'I', '1', '6'),
  406. idCtxIIRAR64f_32f = IPP_CONTEXT( 'I', 'I', '1', '7'),
  407. idCtxIIRBQ64f_32f = IPP_CONTEXT( 'I', 'I', '1', '8'),
  408. idCtxIIRAR64fc_32fc = IPP_CONTEXT( 'I', 'I', '1', '9'),
  409. idCtxIIRBQ64fc_32fc = IPP_CONTEXT( 'I', 'I', '2', '0'),
  410. idCtxIIRAR64f_32s = IPP_CONTEXT( 'I', 'I', '2', '1'),
  411. idCtxIIRBQ64f_32s = IPP_CONTEXT( 'I', 'I', '2', '2'),
  412. idCtxIIRAR64fc_32sc = IPP_CONTEXT( 'I', 'I', '2', '3'),
  413. idCtxIIRBQ64fc_32sc = IPP_CONTEXT( 'I', 'I', '2', '4'),
  414. idCtxIIRAR64f_16s = IPP_CONTEXT( 'I', 'I', '2', '5'),
  415. idCtxIIRBQ64f_16s = IPP_CONTEXT( 'I', 'I', '2', '6'),
  416. idCtxIIRAR64fc_16sc = IPP_CONTEXT( 'I', 'I', '2', '7'),
  417. idCtxIIRBQ64fc_16sc = IPP_CONTEXT( 'I', 'I', '2', '8'),
  418. idCtxIIRBQDF1_32f = IPP_CONTEXT( 'I', 'I', '2', '9'),
  419. idCtxIIRBQDF164f_32s= IPP_CONTEXT( 'I', 'I', '3', '0'),
  420. idCtxFIRSR_32f = IPP_CONTEXT( 'F', 'I', '0', '1'),
  421. idCtxFIRSR_32fc = IPP_CONTEXT( 'F', 'I', '0', '2'),
  422. idCtxFIRMR_32f = IPP_CONTEXT( 'F', 'I', '0', '3'),
  423. idCtxFIRMR_32fc = IPP_CONTEXT( 'F', 'I', '0', '4'),
  424. idCtxFIRSR32f_16s = IPP_CONTEXT( 'F', 'I', '0', '5'),
  425. idCtxFIRSR32fc_16sc = IPP_CONTEXT( 'F', 'I', '0', '6'),
  426. idCtxFIRMR32f_16s = IPP_CONTEXT( 'F', 'I', '0', '7'),
  427. idCtxFIRMR32fc_16sc = IPP_CONTEXT( 'F', 'I', '0', '8'),
  428. idCtxFIRSR32s_16s = IPP_CONTEXT( 'F', 'I', '0', '9'),
  429. idCtxFIRSR32sc_16sc = IPP_CONTEXT( 'F', 'I', '1', '0'),
  430. idCtxFIRMR32s_16s = IPP_CONTEXT( 'F', 'I', '1', '1'),
  431. idCtxFIRMR32sc_16sc = IPP_CONTEXT( 'F', 'I', '1', '2'),
  432. idCtxFIRSR_64f = IPP_CONTEXT( 'F', 'I', '1', '3'),
  433. idCtxFIRSR_64fc = IPP_CONTEXT( 'F', 'I', '1', '4'),
  434. idCtxFIRMR_64f = IPP_CONTEXT( 'F', 'I', '1', '5'),
  435. idCtxFIRMR_64fc = IPP_CONTEXT( 'F', 'I', '1', '6'),
  436. idCtxFIRSR64f_32f = IPP_CONTEXT( 'F', 'I', '1', '7'),
  437. idCtxFIRSR64fc_32fc = IPP_CONTEXT( 'F', 'I', '1', '8'),
  438. idCtxFIRMR64f_32f = IPP_CONTEXT( 'F', 'I', '1', '9'),
  439. idCtxFIRMR64fc_32fc = IPP_CONTEXT( 'F', 'I', '2', '0'),
  440. idCtxFIRSR64f_32s = IPP_CONTEXT( 'F', 'I', '2', '1'),
  441. idCtxFIRSR64fc_32sc = IPP_CONTEXT( 'F', 'I', '2', '2'),
  442. idCtxFIRMR64f_32s = IPP_CONTEXT( 'F', 'I', '2', '3'),
  443. idCtxFIRMR64fc_32sc = IPP_CONTEXT( 'F', 'I', '2', '4'),
  444. idCtxFIRSR64f_16s = IPP_CONTEXT( 'F', 'I', '2', '5'),
  445. idCtxFIRSR64fc_16sc = IPP_CONTEXT( 'F', 'I', '2', '6'),
  446. idCtxFIRMR64f_16s = IPP_CONTEXT( 'F', 'I', '2', '7'),
  447. idCtxFIRMR64fc_16sc = IPP_CONTEXT( 'F', 'I', '2', '8'),
  448. idCtxFIRSR_16s = IPP_CONTEXT( 'F', 'I', '2', '9'),
  449. idCtxFIRMR_16s = IPP_CONTEXT( 'F', 'I', '3', '0'),
  450. idCtxFIRSRStream_16s= IPP_CONTEXT( 'F', 'I', '3', '1'),
  451. idCtxFIRMRStream_16s= IPP_CONTEXT( 'F', 'I', '3', '2'),
  452. idCtxFIRSRStream_32f= IPP_CONTEXT( 'F', 'I', '3', '3'),
  453. idCtxFIRMRStream_32f= IPP_CONTEXT( 'F', 'I', '3', '4'),
  454. idCtxRLMS32s_16s = IPP_CONTEXT( 'L', 'M', 'S', 'R'),
  455. idCtxCLMS32s_16s = IPP_CONTEXT( 'L', 'M', 'S', 'C'),
  456. idCtxEncode_JPEG2K,
  457. idCtxDES = IPP_CONTEXT( ' ', 'D', 'E', 'S'),
  458. idCtxBlowfish = IPP_CONTEXT( ' ', ' ', 'B', 'F'),
  459. idCtxRijndael = IPP_CONTEXT( ' ', 'R', 'I', 'J'),
  460. idCtxSMS4 = IPP_CONTEXT( 'S', 'M', 'S', '4'),
  461. idCtxTwofish = IPP_CONTEXT( ' ', ' ', 'T', 'F'),
  462. idCtxARCFOUR = IPP_CONTEXT( ' ', 'R', 'C', '4'),
  463. idCtxRC564 = IPP_CONTEXT( 'R', 'C', '5', '1'),
  464. idCtxRC5128 = IPP_CONTEXT( 'R', 'C', '5', '2'),
  465. idCtxSHA1 = IPP_CONTEXT( 'S', 'H', 'S', '1'),
  466. idCtxSHA224 = IPP_CONTEXT( 'S', 'H', 'S', '3'),
  467. idCtxSHA256 = IPP_CONTEXT( 'S', 'H', 'S', '2'),
  468. idCtxSHA384 = IPP_CONTEXT( 'S', 'H', 'S', '4'),
  469. idCtxSHA512 = IPP_CONTEXT( 'S', 'H', 'S', '5'),
  470. idCtxMD5 = IPP_CONTEXT( ' ', 'M', 'D', '5'),
  471. idCtxHMAC = IPP_CONTEXT( 'H', 'M', 'A', 'C'),
  472. idCtxDAA = IPP_CONTEXT( ' ', 'D', 'A', 'A'),
  473. idCtxBigNum = IPP_CONTEXT( 'B', 'I', 'G', 'N'),
  474. idCtxMontgomery = IPP_CONTEXT( 'M', 'O', 'N', 'T'),
  475. idCtxPrimeNumber = IPP_CONTEXT( 'P', 'R', 'I', 'M'),
  476. idCtxPRNG = IPP_CONTEXT( 'P', 'R', 'N', 'G'),
  477. idCtxRSA = IPP_CONTEXT( ' ', 'R', 'S', 'A'),
  478. idCtxRSA_PubKey = IPP_CONTEXT( 'R', 'S', 'A', '0'),
  479. idCtxRSA_PrvKey1 = IPP_CONTEXT( 'R', 'S', 'A', '1'),
  480. idCtxRSA_PrvKey2 = IPP_CONTEXT( 'R', 'S', 'A', '2'),
  481. idCtxDSA = IPP_CONTEXT( ' ', 'D', 'S', 'A'),
  482. idCtxECCP = IPP_CONTEXT( ' ', 'E', 'C', 'P'),
  483. idCtxECCB = IPP_CONTEXT( ' ', 'E', 'C', 'B'),
  484. idCtxECCPPoint = IPP_CONTEXT( 'P', 'E', 'C', 'P'),
  485. idCtxECCBPoint = IPP_CONTEXT( 'P', 'E', 'C', 'B'),
  486. idCtxDH = IPP_CONTEXT( ' ', ' ', 'D', 'H'),
  487. idCtxDLP = IPP_CONTEXT( ' ', 'D', 'L', 'P'),
  488. idCtxCMAC = IPP_CONTEXT( 'C', 'M', 'A', 'C'),
  489. idCtxRFFT2_8u,
  490. idCtxHilbert_32f32fc,
  491. idCtxHilbert_16s32fc,
  492. idCtxHilbert_16s16sc,
  493. idCtxTone_16s,
  494. idCtxTriangle_16s,
  495. idCtxDFTOutOrd_C_32fc,
  496. idCtxDFTOutOrd_C_64fc,
  497. idCtxFFT_C_32sc,
  498. idCtxFFT_C_32s,
  499. idCtxFFT_R_32s,
  500. idCtxFFT_R_16s32s,
  501. idCtxDecodeProgr_JPEG2K,
  502. idCtxWarp_MPEG4,
  503. idCtxQuantInvIntra_MPEG4,
  504. idCtxQuantInvInter_MPEG4,
  505. idCtxQuantIntra_MPEG4,
  506. idCtxQuantInter_MPEG4,
  507. idCtxAnalysisFilter_SBR_C_32f32fc,
  508. idCtxAnalysisFilter_SBR_C_32f,
  509. idCtxAnalysisFilter_SBR_R_32f,
  510. idCtxSynthesisFilter_SBR_C_32fc32f,
  511. idCtxSynthesisFilter_SBR_C_32f,
  512. idCtxSynthesisFilter_SBR_R_32f,
  513. idCtxSynthesisDownFilter_SBR_C_32fc32f,
  514. idCtxSynthesisDownFilter_SBR_C_32f,
  515. idCtxSynthesisDownFilter_SBR_R_32f,
  516. idCtxVLCEncode,
  517. idCtxVLCDecode,
  518. idCtxAnalysisFilter_SBR_C_32s32sc,
  519. idCtxAnalysisFilter_SBR_R_32s,
  520. idCtxSynthesisFilter_SBR_C_32sc32s,
  521. idCtxSynthesisFilter_SBR_R_32s,
  522. idCtxSynthesisDownFilter_SBR_C_32sc32s,
  523. idCtxSynthesisDownFilter_SBR_R_32s,
  524. idCtxSynthesisFilter_PQMF_MP3_32f,
  525. idCtxAnalysisFilter_PQMF_MP3_32f,
  526. idCtxResampleRow,
  527. idCtxAnalysisFilter_SBR_Enc_C_32f32fc,
  528. idCtxSynthesisFilter_DTS_32f,
  529. idCtxFilterBilateralGauss_8u,
  530. idCtxFilterBilateralGaussFast_8u,
  531. idCtxBGF,
  532. idCtxPolyGF,
  533. idCtxRSenc,
  534. idCtxRSdec,
  535. idCtxSnow3g = IPP_CONTEXT( 'S', 'n', 'o', 'w'),
  536. idCtxSnow3gF8,
  537. idCtxSnow3gF9,
  538. idCtxKasumi = IPP_CONTEXT( 'K', 'a', 's', 'u'),
  539. idCtxKasumiF8,
  540. idCtxKasumiF9,
  541. idCtxResizeHannFilter_8u,
  542. idCtxResizeLanczosFilter_8u,
  543. idCtxAESXCBC,
  544. idCtxAESCCM,
  545. idCtxAESGCM,
  546. idCtxMsgCatalog,
  547. idCtxGFP,
  548. idCtxGFPE,
  549. idCtxGFPX,
  550. idCtxGFPXE,
  551. idCtxGFPXQX,
  552. idCtxGFPXQXE,
  553. idCtxGFPEC,
  554. idCtxGFPPoint,
  555. idCtxGFPXEC,
  556. idCtxGFPXECPoint,
  557. idCtxPairing,
  558. idCtxResize_32f,
  559. idCtxResizeYUV420,
  560. idCtxResizeYUV422,
  561. idCtxResize_64f,
  562. idCtxFilterBilateralBorder,
  563. idCtxThresholdAdaptiveGauss,
  564. idCtxHOG,
  565. idCtxFastN,
  566. idCtxHash,
  567. idCtxSM3
  568. } IppCtxId;
  569. /* /////////////////////////////////////////////////////////////////////////////
  570. Helpers
  571. /////////////////////////////////////////////////////////////////////////// */
  572. #define IPP_NOERROR_RET() return ippStsNoErr
  573. #define IPP_ERROR_RET( ErrCode ) return (ErrCode)
  574. #ifdef _IPP_DEBUG
  575. #define IPP_BADARG_RET( expr, ErrCode )\
  576. {if (expr) { IPP_ERROR_RET( ErrCode ); }}
  577. #else
  578. #define IPP_BADARG_RET( expr, ErrCode )
  579. #endif
  580. #define IPP_BAD_SIZE_RET( n )\
  581. IPP_BADARG_RET( (n)<=0, ippStsSizeErr )
  582. #define IPP_BAD_STEP_RET( n )\
  583. IPP_BADARG_RET( (n)<=0, ippStsStepErr )
  584. #define IPP_BAD_PTR1_RET( ptr )\
  585. IPP_BADARG_RET( NULL==(ptr), ippStsNullPtrErr )
  586. #define IPP_BAD_PTR2_RET( ptr1, ptr2 )\
  587. {IPP_BAD_PTR1_RET( ptr1 ); IPP_BAD_PTR1_RET( ptr2 )}
  588. #define IPP_BAD_PTR3_RET( ptr1, ptr2, ptr3 )\
  589. {IPP_BAD_PTR2_RET( ptr1, ptr2 ); IPP_BAD_PTR1_RET( ptr3 )}
  590. #define IPP_BAD_PTR4_RET( ptr1, ptr2, ptr3, ptr4 )\
  591. {IPP_BAD_PTR2_RET( ptr1, ptr2 ); IPP_BAD_PTR2_RET( ptr3, ptr4 )}
  592. #define IPP_BAD_ISIZE_RET(roi) \
  593. IPP_BADARG_RET( ((roi).width<=0 || (roi).height<=0), ippStsSizeErr)
  594. /* ////////////////////////////////////////////////////////////////////////// */
  595. /* internal messages */
  596. #define MSG_LOAD_DLL_ERR (-9700) /* Error at loading of %s library */
  597. #define MSG_NO_DLL (-9701) /* No DLLs were found in the Waterfall procedure */
  598. #define MSG_NO_SHARED (-9702) /* No shared libraries were found in the Waterfall procedure */
  599. /* ////////////////////////////////////////////////////////////////////////// */
  600. typedef union { /* double precision */
  601. Ipp64s hex;
  602. Ipp64f fp;
  603. } IppFP_64f;
  604. typedef union { /* single precision */
  605. Ipp32s hex;
  606. Ipp32f fp;
  607. } IppFP_32f;
  608. extern const IppFP_32f ippConstantOfNAN_32f;
  609. extern const IppFP_64f ippConstantOfNAN_64f;
  610. extern const IppFP_32f ippConstantOfINF_32f;
  611. extern const IppFP_64f ippConstantOfINF_64f;
  612. extern const IppFP_32f ippConstantOfINF_NEG_32f;
  613. extern const IppFP_64f ippConstantOfINF_NEG_64f;
  614. #define NAN_32F (ippConstantOfNAN_32f.fp)
  615. #define NAN_64F (ippConstantOfNAN_64f.fp)
  616. #define INF_32F (ippConstantOfINF_32f.fp)
  617. #define INF_64F (ippConstantOfINF_64f.fp)
  618. #define INF_NEG_32F (ippConstantOfINF_NEG_32f.fp)
  619. #define INF_NEG_64F (ippConstantOfINF_NEG_64f.fp)
  620. /* ////////////////////////////////////////////////////////////////////////// */
  621. typedef enum {
  622. ippunreg=-1,
  623. ippac = 0,
  624. ippcc = 1,
  625. ippch = 2,
  626. ippcp = 3,
  627. ippcv = 4,
  628. ippdc = 5,
  629. ippdi = 6,
  630. ippgen = 7,
  631. ippi = 8,
  632. ippj = 9,
  633. ippm = 10,
  634. ippr = 11,
  635. ipps = 12,
  636. ippsc = 13,
  637. ippsr = 14,
  638. ippvc = 15,
  639. ippvm = 16,
  640. ippmsdk = 17,
  641. ippcpepid = 18,
  642. ippe = 19,
  643. ipprs = 20,
  644. ippsq = 21,
  645. ippnomore
  646. } IppDomain;
  647. int __CDECL ownGetNumThreads( void );
  648. int __CDECL ownGetFeature( Ipp64u MaskOfFeature ); /* the main function of tick-tock dispatcher */
  649. #ifdef _IPP_DYNAMIC
  650. typedef IppStatus (__STDCALL *DYN_RELOAD)( int );
  651. void __CDECL ownRegisterLib( IppDomain, DYN_RELOAD );
  652. void __CDECL ownUnregisterLib( IppDomain );
  653. #endif
  654. /* the number of threads available for any ipp function that uses OMP; */
  655. /* at the ippxx.dll loading time is equal to the number of logical processors, */
  656. /* and can be changed ONLY externally by library user to any desired number */
  657. /* by means of ippSetNumThreads() function */
  658. #define IPP_GET_NUM_THREADS() ( ownGetNumThreads() )
  659. #define IPP_OMP_NUM_THREADS() num_threads( IPP_GET_NUM_THREADS() )
  660. #define IPP_OMP_LIMIT_MAX_NUM_THREADS(n) num_threads( IPP_MIN(IPP_GET_NUM_THREADS(),(n)))
  661. /* ////////////////////////////////////////////////////////////////////////// */
  662. /* Define NULL pointer value */
  663. #ifndef NULL
  664. #ifdef __cplusplus
  665. #define NULL 0
  666. #else
  667. #define NULL ((void *)0)
  668. #endif
  669. #endif
  670. #define UNREFERENCED_PARAMETER(p) (p)=(p)
  671. #if defined( _IPP_MARK_LIBRARY )
  672. static char G[] = {73, 80, 80, 71, 101, 110, 117, 105, 110, 101, 243, 193, 210, 207, 215};
  673. #endif
  674. #define STR2(x) #x
  675. #define STR(x) STR2(x)
  676. #define MESSAGE( desc )\
  677. message(__FILE__ "(" STR(__LINE__) "):" #desc)
  678. /*
  679. // endian definition
  680. */
  681. #define IPP_LITTLE_ENDIAN (0)
  682. #define IPP_BIG_ENDIAN (1)
  683. #if defined( _IPP_LE )
  684. #define IPP_ENDIAN IPP_LITTLE_ENDIAN
  685. #elif defined( _IPP_BE )
  686. #define IPP_ENDIAN IPP_BIG_ENDIAN
  687. #else
  688. #if defined( __ARMEB__ )
  689. #define IPP_ENDIAN IPP_BIG_ENDIAN
  690. #else
  691. #define IPP_ENDIAN IPP_LITTLE_ENDIAN
  692. #endif
  693. #endif
  694. /* ////////////////////////////////////////////////////////////////////////// */
  695. /* intrinsics */
  696. #if (_IPP >= _IPP_A6) || (_IPP32E >= _IPP32E_M7)
  697. #if defined(__INTEL_COMPILER) || (_MSC_VER >= 1300)
  698. #if (_IPP == _IPP_A6)
  699. #include "xmmintrin.h"
  700. #elif (_IPP == _IPP_W7)
  701. #if defined(__INTEL_COMPILER)
  702. #include "emmintrin.h"
  703. #else
  704. #undef _W7
  705. #include "emmintrin.h"
  706. #define _W7
  707. #endif
  708. #define _mm_loadu _mm_loadu_si128
  709. #elif (_IPP == _IPP_T7) || (_IPP32E == _IPP32E_M7)
  710. #if defined(__INTEL_COMPILER)
  711. #include "pmmintrin.h"
  712. #define _mm_loadu _mm_lddqu_si128
  713. #elif (_MSC_FULL_VER >= 140050110)
  714. #include "intrin.h"
  715. #define _mm_loadu _mm_lddqu_si128
  716. #elif (_MSC_FULL_VER < 140050110)
  717. #include "emmintrin.h"
  718. #define _mm_loadu _mm_loadu_si128
  719. #endif
  720. #elif (_IPP == _IPP_V8) || (_IPP32E == _IPP32E_U8)
  721. #if defined(__INTEL_COMPILER)
  722. #include "tmmintrin.h"
  723. #define _mm_loadu _mm_lddqu_si128
  724. #elif (_MSC_FULL_VER >= 140050110)
  725. #include "intrin.h"
  726. #define _mm_loadu _mm_lddqu_si128
  727. #elif (_MSC_FULL_VER < 140050110)
  728. #include "emmintrin.h"
  729. #define _mm_loadu _mm_loadu_si128
  730. #endif
  731. #elif (_IPP == _IPP_P8) || (_IPP32E == _IPP32E_Y8)
  732. #if defined(__INTEL_COMPILER)
  733. #include "smmintrin.h"
  734. #define _mm_loadu _mm_lddqu_si128
  735. #elif (_MSC_FULL_VER >= 140050110)
  736. #include "intrin.h"
  737. #define _mm_loadu _mm_lddqu_si128
  738. #elif (_MSC_FULL_VER < 140050110)
  739. #include "emmintrin.h"
  740. #define _mm_loadu _mm_loadu_si128
  741. #endif
  742. #elif (_IPP >= _IPP_G9) || (_IPP32E >= _IPP32E_E9)
  743. #if defined(__INTEL_COMPILER)
  744. #include "immintrin.h"
  745. #define _mm_loadu _mm_lddqu_si128
  746. #elif (_MSC_FULL_VER >= 160021003)
  747. #include "immintrin.h"
  748. #define _mm_loadu _mm_lddqu_si128
  749. #endif
  750. #endif
  751. #endif
  752. #elif (_IPPLP32 >= _IPPLP32_S8) || (_IPPLP64 >= _IPPLP64_N8)
  753. #if defined(__INTEL_COMPILER)
  754. #include "tmmintrin.h"
  755. #define _mm_loadu _mm_lddqu_si128
  756. #elif (_MSC_FULL_VER >= 140050110)
  757. #include "intrin.h"
  758. #define _mm_loadu _mm_lddqu_si128
  759. #elif (_MSC_FULL_VER < 140050110)
  760. #include "emmintrin.h"
  761. #define _mm_loadu _mm_loadu_si128
  762. #endif
  763. #elif (_IPPLRB >= _IPPLRB_B2)
  764. #if defined(__INTEL_COMPILER) || defined(_REF_LIB)
  765. #include "immintrin.h"
  766. #endif
  767. #endif
  768. // **** intrinsics for bit casting ****
  769. #if defined(__INTEL_COMPILER)
  770. extern unsigned int __intel_castf32_u32(float val);
  771. extern float __intel_castu32_f32(unsigned int val);
  772. extern unsigned __int64 __intel_castf64_u64(double val);
  773. extern double __intel_castu64_f64(unsigned __int64 val);
  774. #define __CAST_32f32u(val) __intel_castf32_u32((Ipp32f)val)
  775. #define __CAST_32u32f(val) __intel_castu32_f32((Ipp32u)val)
  776. #define __CAST_64f64u(val) __intel_castf64_u64((Ipp64f)val)
  777. #define __CAST_64u64f(val) __intel_castu64_f64((Ipp64u)val)
  778. #else
  779. #define __CAST_32f32u(val) ( *((Ipp32u*)&val) )
  780. #define __CAST_32u32f(val) ( *((Ipp32f*)&val) )
  781. #define __CAST_64f64u(val) ( *((Ipp64u*)&val) )
  782. #define __CAST_64u64f(val) ( *((Ipp64f*)&val) )
  783. #endif
  784. // short names for vector registers casting
  785. #define _pd2ps _mm_castpd_ps
  786. #define _ps2pd _mm_castps_pd
  787. #define _pd2pi _mm_castpd_si128
  788. #define _pi2pd _mm_castsi128_pd
  789. #define _ps2pi _mm_castps_si128
  790. #define _pi2ps _mm_castsi128_ps
  791. #define _ypd2ypi _mm256_castpd_si256
  792. #define _ypi2ypd _mm256_castsi256_pd
  793. #define _yps2ypi _mm256_castps_si256
  794. #define _ypi2yps _mm256_castsi256_ps
  795. #define _ypd2yps _mm256_castpd_ps
  796. #define _yps2ypd _mm256_castps_pd
  797. #define _yps2ps _mm256_castps256_ps128
  798. #define _ypi2pi _mm256_castsi256_si128
  799. #define _ypd2pd _mm256_castpd256_pd128
  800. #define _ps2yps _mm256_castps128_ps256
  801. #define _pi2ypi _mm256_castsi128_si256
  802. #define _pd2ypd _mm256_castpd128_pd256
  803. #if defined(__INTEL_COMPILER)
  804. #define __IVDEP ivdep
  805. #else
  806. #define __IVDEP message("message :: 'ivdep' is not defined")
  807. #endif
  808. //usage: #pragma __IVDEP
  809. /* //////////////////////////////////////////////////////////////////////////
  810. _IPP_DATA shoul be defined only:
  811. - if compile not merged library
  812. - only for 1 CPU for merged library to avoid data duplication
  813. */
  814. #if defined( _MERGED_BLD ) && ( defined(_G9) || defined(_E9) ) /* compile data only for g9 and e9 CPU */
  815. #define _IPP_DATA 1
  816. #elif !defined( _MERGED_BLD ) /* compile data if it isn't merged library */
  817. #define _IPP_DATA 1
  818. #endif
  819. #if defined( __cplusplus )
  820. }
  821. #endif
  822. #endif /* __OWNDEFS_H__ */