00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023 #if (defined(DSP_X86_64) && (!defined(DSP_X86)))
00024
00025 #include <stdio.h>
00026 #include <string.h>
00027 #include <limits.h>
00028 #include <math.h>
00029 #include <float.h>
00030
00031 #include "dsp/X86-64.h"
00032
00033
00034 #ifdef __cplusplus
00035 extern "C"
00036 {
00037 #endif
00038
00039
00040 void dsp_x86_64_minmaxf (float *fpMin, float *fpMax, const float *fpSrc,
00041 long lDataLength)
00042 {
00043 long lDataCntr;
00044
00045 *fpMin = FLT_MAX;
00046 *fpMax = -FLT_MAX;
00047 X86_ASM (
00048 "movss %0, %%xmm0\n\t" \
00049 "movss %1, %%xmm1\n\t"
00050 :
00051 : "m" (*fpMin),
00052 "m" (*fpMax)
00053 : "xmm0", "xmm1", "memory");
00054 for (lDataCntr = 0; lDataCntr < lDataLength; lDataCntr++)
00055 {
00056 X86_ASM (
00057 "movss %0, %%xmm2\n\t" \
00058 "minss %%xmm2, %%xmm0\n\t" \
00059 "maxss %%xmm2, %%xmm1\n\t"
00060 :
00061 : "m" (fpSrc[lDataCntr])
00062 : "xmm0", "xmm1", "xmm2", "memory");
00063 }
00064 X86_ASM (
00065 "movss %%xmm0, %0\n\t" \
00066 "movss %%xmm1, %1\n\t"
00067 : "=m" (*fpMin),
00068 "=m" (*fpMax)
00069 :
00070 : "xmm0", "xmm1", "memory");
00071 }
00072
00073
00074 void dsp_x86_64_minmax (double *dpMin, double *dpMax, const double *dpSrc,
00075 long lDataLength)
00076 {
00077 long lDataCntr;
00078
00079 *dpMin = FLT_MAX;
00080 *dpMax = -FLT_MAX;
00081 X86_ASM (
00082 "movsd %0, %%xmm0\n\t" \
00083 "movsd %1, %%xmm1\n\t"
00084 :
00085 : "m" (*dpMin),
00086 "m" (*dpMax)
00087 : "xmm0", "xmm1", "memory");
00088 for (lDataCntr = 0; lDataCntr < lDataLength; lDataCntr++)
00089 {
00090 X86_ASM (
00091 "movsd %0, %%xmm2\n\t" \
00092 "minsd %%xmm2, %%xmm0\n\t" \
00093 "maxsd %%xmm2, %%xmm1\n\t"
00094 :
00095 : "m" (dpSrc[lDataCntr])
00096 : "xmm0", "xmm1", "xmm2", "memory");
00097 }
00098 X86_ASM (
00099 "movss %%xmm0, %0\n\t" \
00100 "movss %%xmm1, %1\n\t"
00101 : "=m" (*dpMin),
00102 "=m" (*dpMax)
00103 :
00104 : "xmm0", "xmm1", "memory");
00105 }
00106
00107
00108 float dsp_x86_64_crosscorrf (const float *fpSrc1, const float *fpSrc2,
00109 long lDataLength)
00110 {
00111 long lDataCntr;
00112 float fScale;
00113 float fNormFact;
00114 float fProdSum;
00115 float fSqSum1;
00116 float fSqSum2;
00117 float fRes;
00118
00119 X86_ASM (
00120 "xorps %%xmm0, %%xmm0\n\t" \
00121 "xorps %%xmm1, %%xmm1\n\t" \
00122 "xorps %%xmm2, %%xmm2\n\t"
00123 :
00124 :
00125 : "xmm0", "xmm1", "xmm2");
00126 for (lDataCntr = 0; lDataCntr < lDataLength; lDataCntr++)
00127 {
00128 X86_ASM (
00129 "movss %3, %%xmm3\n\t" \
00130 "movss %4, %%xmm4\n\t" \
00131 \
00132 "movss %%xmm4, %%xmm5\n\t" \
00133 "mulss %%xmm3, %%xmm5\n\t" \
00134 "addss %%xmm5, %%xmm0\n\t" \
00135 \
00136 "movss %%xmm3, %%xmm5\n\t" \
00137 "mulss %%xmm3, %%xmm5\n\t" \
00138 "addss %%xmm5, %%xmm1\n\t" \
00139 \
00140 "movss %%xmm4, %%xmm5\n\t" \
00141 "mulss %%xmm4, %%xmm5\n\t" \
00142 "addss %%xmm5, %%xmm2\n\t" \
00143 \
00144 "movss %%xmm0, %0\n\t" \
00145 "movss %%xmm1, %1\n\t" \
00146 "movss %%xmm2, %2\n\t"
00147 : "=m" (fProdSum),
00148 "=m" (fSqSum1),
00149 "=m" (fSqSum2)
00150 : "m" (fpSrc1[lDataCntr]),
00151 "m" (fpSrc2[lDataCntr])
00152 : "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
00153 }
00154 fScale = 1.0F / iDataLength;
00155 fNormFact = sqrtf(fSqSum1 * fSqSum2) * fScale;
00156 fRes = (fProdSum * fScale) / fNormFact;
00157
00158 return fRes;
00159 }
00160
00161
00162 double dsp_x86_64_crosscorr (const double *dpSrc1, const double *dpSrc2,
00163 long lDataLength)
00164 {
00165 long lDataCntr;
00166 double dScale;
00167 double dNormFact;
00168 double dProdSum;
00169 double dSqSum1;
00170 double dSqSum2;
00171 double dRes;
00172
00173 X86_ASM (
00174 "xorpd %%xmm0, %%xmm0\n\t" \
00175 "xorpd %%xmm1, %%xmm1\n\t" \
00176 "xorpd %%xmm2, %%xmm2\n\t"
00177 :
00178 :
00179 : "xmm0", "xmm1", "xmm2");
00180 for (lDataCntr = 0; lDataCntr < lDataLength; lDataCntr++)
00181 {
00182 X86_ASM (
00183 "movsd %3, %%xmm3\n\t" \
00184 "movsd %4, %%xmm4\n\t" \
00185 \
00186 "movsd %%xmm4, %%xmm5\n\t" \
00187 "mulsd %%xmm3, %%xmm5\n\t" \
00188 "addsd %%xmm5, %%xmm0\n\t" \
00189 \
00190 "movsd %%xmm3, %%xmm5\n\t" \
00191 "mulsd %%xmm3, %%xmm5\n\t" \
00192 "addsd %%xmm5, %%xmm1\n\t" \
00193 \
00194 "movsd %%xmm4, %%xmm5\n\t" \
00195 "mulsd %%xmm4, %%xmm5\n\t" \
00196 "addsd %%xmm5, %%xmm2\n\t" \
00197 \
00198 "movsd %%xmm0, %0\n\t" \
00199 "movsd %%xmm1, %1\n\t" \
00200 "movsd %%xmm2, %2\n\t"
00201 : "=m" (dProdSum),
00202 "=m" (dSqSum1),
00203 "=m" (dSqSum2)
00204 : "m" (dpSrc1[lDataCntr]),
00205 "m" (dpSrc2[lDataCntr])
00206 : "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5");
00207 }
00208 dScale = 1.0 / iDataLength;
00209 dNormFact = sqrt(dSqSum1 * dSqSum2) * dScale;
00210 dRes = (dProdSum * dScale) / dNormFact;
00211
00212 return dRes;
00213 }
00214
00215
00216 void dsp_x86_64_firf (float *fpDest, const float *fpSrc, long lDataLength,
00217 const float *fpCoeff, long lCoeffLength)
00218 {
00219 long lDestCntr;
00220 long lSrcCntr;
00221 long lCoeffCntr;
00222 long lSrcCount;
00223
00224 lDestCntr = 0;
00225 lSrcCount = lDataLength + lCoeffLength;
00226 for (lSrcCntr = lCoeffLength;
00227 lSrcCntr < lSrcCount;
00228 lSrcCntr++)
00229 {
00230 X86_ASM (
00231 "xorps %%xmm0, %%xmm0\n\t"
00232 :
00233 :
00234 : "xmm0");
00235 for (lCoeffCntr = 0;
00236 lCoeffCntr < lCoeffLength;
00237 lCoeffCntr++)
00238 {
00239 X86_ASM (
00240 "movss %0, %%xmm1\n\t"
00241 "mulss %1, %%xmm1\n\t"
00242 "addss %%xmm1, %%xmm0\n\t"
00243 :
00244 : "m" (fpSrc[lSrcCntr - lCoeffCntr]),
00245 "m" (fpCoeff[lCoeffCntr])
00246 : "xmm0", "xmm1", "memory");
00247 }
00248 X86_ASM (
00249 "movss %%xmm0, %0\n\t"
00250 : "=m" (fpDest[lDestCntr++])
00251 :
00252 : "xmm0", "memory");
00253 }
00254 }
00255
00256
00257 void dsp_x86_64_fir (double *dpDest, const double *dpSrc, long lDataLength,
00258 const double *dpCoeff, long lCoeffLength)
00259 {
00260 long lDestCntr;
00261 long lSrcCntr;
00262 long lCoeffCntr;
00263 long lSrcCount;
00264
00265 lDestCntr = 0;
00266 lSrcCount = lDataLength + lCoeffLength;
00267 for (lSrcCntr = lCoeffLength;
00268 lSrcCntr < lSrcCount;
00269 lSrcCntr++)
00270 {
00271 X86_ASM (
00272 "xorpd %%xmm0, %%xmm0\n\t"
00273 :
00274 :
00275 : "xmm0");
00276 for (lCoeffCntr = 0;
00277 lCoeffCntr < lCoeffLength;
00278 lCoeffCntr++)
00279 {
00280 X86_ASM (
00281 "movsd %0, %%xmm1\n\t"
00282 "mulsd %1, %%xmm1\n\t"
00283 "addsd %%xmm1, %%xmm0\n\t"
00284 :
00285 : "m" (dpSrc[lSrcCntr - lCoeffCntr]),
00286 "m" (dpCoeff[lCoeffCntr])
00287 : "xmm0", "xmm1", "memory");
00288 }
00289 X86_ASM (
00290 "movsd %%xmm0, %0\n\t"
00291 : "=m" (dpDest[lDestCntr++])
00292 :
00293 : "xmm0", "memory");
00294 }
00295 }
00296
00297
00298 void dsp_x86_64_iirf (float *fpVect, long lDataLength, const float *fpCoeff,
00299 float *fpX, float *fpY)
00300 {
00301 long lDataCntr;
00302
00303 X86_ASM (
00304 "movss %0, %%xmm1\n\t" \
00305 "movss %1, %%xmm2\n\t" \
00306 "movss %2, %%xmm3\n\t" \
00307 "movss %3, %%xmm4\n\t" \
00308 "prefetchnta %4\n\t"
00309 :
00310 : "m" (fpX[1]),
00311 "m" (fpX[2]),
00312 "m" (fpY[0]),
00313 "m" (fpY[1]),
00314 "m" (fpCoeff[0])
00315 : "xmm1", "xmm2", "xmm3", "xmm4", "memory");
00316 for (lDataCntr = 0; lDataCntr < lDataLength; lDataCntr++)
00317 {
00318 X86_ASM (
00319 "movss %%xmm1, %%xmm0\n\t" \
00320 "movss %%xmm2, %%xmm1\n\t" \
00321 "movss %1, %%xmm2\n\t" \
00322 \
00323 "movss %2, %%xmm5\n\t" \
00324 "mulss %%xmm2, %%xmm5\n\t" \
00325 "movss %3, %%xmm6\n\t" \
00326 "mulss %%xmm1, %%xmm6\n\t" \
00327 "addss %%xmm6, %%xmm5\n\t" \
00328 "movss %4, %%xmm6\n\t" \
00329 "mulss %%xmm0, %%xmm6\n\t" \
00330 "addss %%xmm6, %%xmm5\n\t" \
00331 \
00332 "movss %5, %%xmm6\n\t" \
00333 "mulss %%xmm4, %%xmm6\n\t" \
00334 "movss %6, %%xmm7\n\t" \
00335 "mulss %%xmm3, %%xmm7\n\t" \
00336 "addss %%xmm7, %%xmm6\n\t" \
00337 \
00338 "addss %%xmm5, %%xmm6\n\t" \
00339 "movss %%xmm4, %%xmm3\n\t" \
00340 "movss %%xmm6, %%xmm4\n\t" \
00341 \
00342 "movss %%xmm6, %0\n\t"
00343 : "=m" (fpVect[lDataCntr])
00344 : "0" (fpVect[lDataCntr]),
00345 "m" (fpCoeff[0]),
00346 "m" (fpCoeff[1]),
00347 "m" (fpCoeff[2]),
00348 "m" (fpCoeff[3]),
00349 "m" (fpCoeff[4])
00350 : "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
00351 "memory");
00352 }
00353 X86_ASM (
00354 "movss %%xmm0, %0\n\t" \
00355 "movss %%xmm1, %1\n\t" \
00356 "movss %%xmm2, %2\n\t" \
00357 "movss %%xmm3, %3\n\t" \
00358 "movss %%xmm4, %4\n\t"
00359 : "=m" (fpX[0]),
00360 "=m" (fpX[1]),
00361 "=m" (fpX[2]),
00362 "=m" (fpY[0]),
00363 "=m" (fpY[1])
00364 :
00365 : "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "memory");
00366 }
00367
00368
00369 void dsp_x86_64_iir (double *dpVect, long lDataLength, const double *dpCoeff,
00370 double *dpX, double *dpY)
00371 {
00372 long lDataCntr;
00373
00374 X86_ASM (
00375 "movsd %0, %%xmm1\n\t" \
00376 "movsd %1, %%xmm2\n\t" \
00377 "movsd %2, %%xmm3\n\t" \
00378 "movsd %3, %%xmm4\n\t" \
00379 "prefetchnta %4\n\t" \
00380 "prefetchnta %5\n\t"
00381 :
00382 : "m" (dpX[1]),
00383 "m" (dpX[2]),
00384 "m" (dpY[0]),
00385 "m" (dpY[1]),
00386 "m" (dpCoeff[0]),
00387 "m" (dpCoeff[3])
00388 : "xmm1", "xmm2", "xmm3", "xmm4", "memory");
00389 for (lDataCntr = 0; lDataCntr < lDataLength; lDataCntr++)
00390 {
00391 X86_ASM (
00392 "movsd %%xmm1, %%xmm0\n\t" \
00393 "movsd %%xmm2, %%xmm1\n\t" \
00394 "movsd %1, %%xmm2\n\t" \
00395 \
00396 "movsd %2, %%xmm5\n\t" \
00397 "mulsd %%xmm2, %%xmm5\n\t" \
00398 "movsd %3, %%xmm6\n\t" \
00399 "mulsd %%xmm1, %%xmm6\n\t" \
00400 "addsd %%xmm6, %%xmm5\n\t" \
00401 "movsd %4, %%xmm6\n\t" \
00402 "mulsd %%xmm0, %%xmm6\n\t" \
00403 "addsd %%xmm6, %%xmm5\n\t" \
00404 \
00405 "movsd %5, %%xmm6\n\t" \
00406 "mulsd %%xmm4, %%xmm6\n\t" \
00407 "movsd %6, %%xmm7\n\t" \
00408 "mulsd %%xmm3, %%xmm7\n\t" \
00409 "addsd %%xmm7, %%xmm6\n\t" \
00410 \
00411 "addsd %%xmm5, %%xmm6\n\t" \
00412 "movsd %%xmm4, %%xmm3\n\t" \
00413 "movsd %%xmm6, %%xmm4\n\t" \
00414 \
00415 "movsd %%xmm6, %0\n\t"
00416 : "=m" (dpVect[lDataCntr])
00417 : "0" (dpVect[lDataCntr]),
00418 "m" (dpCoeff[0]),
00419 "m" (dpCoeff[1]),
00420 "m" (dpCoeff[2]),
00421 "m" (dpCoeff[3]),
00422 "m" (dpCoeff[4])
00423 : "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
00424 "memory");
00425 }
00426 X86_ASM (
00427 "movsd %%xmm0, %0\n\t" \
00428 "movsd %%xmm1, %1\n\t" \
00429 "movsd %%xmm2, %2\n\t" \
00430 "movsd %%xmm3, %3\n\t" \
00431 "movsd %%xmm4, %4\n\t"
00432 : "=m" (dpX[0]),
00433 "=m" (dpX[1]),
00434 "=m" (dpX[2]),
00435 "=m" (dpY[0]),
00436 "=m" (dpY[1])
00437 :
00438 : "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "memory");
00439 }
00440
00441
00442 void dsp_x86_64_iirf_nip (float *fpDest, const float *fpSrc, long lDataLength,
00443 const float *fpCoeff, float *fpX, float *fpY)
00444 {
00445 long lDataCntr;
00446
00447 X86_ASM (
00448 "movss %0, %%xmm1\n\t" \
00449 "movss %1, %%xmm2\n\t" \
00450 "movss %2, %%xmm3\n\t" \
00451 "movss %3, %%xmm4\n\t" \
00452 "prefetchnta %4\n\t"
00453 :
00454 : "m" (fpX[1]),
00455 "m" (fpX[2]),
00456 "m" (fpY[0]),
00457 "m" (fpY[1]),
00458 "m" (fpCoeff[0])
00459 : "xmm1", "xmm2", "xmm3", "xmm4", "memory");
00460 for (lDataCntr = 0; lDataCntr < lDataLength; lDataCntr++)
00461 {
00462 X86_ASM (
00463 "movss %%xmm1, %%xmm0\n\t" \
00464 "movss %%xmm2, %%xmm1\n\t" \
00465 "movss %1, %%xmm2\n\t" \
00466 \
00467 "movss %2, %%xmm5\n\t" \
00468 "mulss %%xmm2, %%xmm5\n\t" \
00469 "movss %3, %%xmm6\n\t" \
00470 "mulss %%xmm1, %%xmm6\n\t" \
00471 "addss %%xmm6, %%xmm5\n\t" \
00472 "movss %4, %%xmm6\n\t" \
00473 "mulss %%xmm0, %%xmm6\n\t" \
00474 "addss %%xmm6, %%xmm5\n\t" \
00475 \
00476 "movss %5, %%xmm6\n\t" \
00477 "mulss %%xmm4, %%xmm6\n\t" \
00478 "movss %6, %%xmm7\n\t" \
00479 "mulss %%xmm3, %%xmm7\n\t" \
00480 "addss %%xmm7, %%xmm6\n\t" \
00481 \
00482 "addss %%xmm5, %%xmm6\n\t" \
00483 "movss %%xmm4, %%xmm3\n\t" \
00484 "movss %%xmm6, %%xmm4\n\t" \
00485 \
00486 "movss %%xmm6, %0\n\t"
00487 : "=m" (fpDest[lDataCntr])
00488 : "m" (fpSrc[lDataCntr]),
00489 "m" (fpCoeff[0]),
00490 "m" (fpCoeff[1]),
00491 "m" (fpCoeff[2]),
00492 "m" (fpCoeff[3]),
00493 "m" (fpCoeff[4])
00494 : "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
00495 "memory");
00496 }
00497 X86_ASM (
00498 "movss %%xmm0, %0\n\t" \
00499 "movss %%xmm1, %1\n\t" \
00500 "movss %%xmm2, %2\n\t" \
00501 "movss %%xmm3, %3\n\t" \
00502 "movss %%xmm4, %4\n\t"
00503 : "=m" (fpX[0]),
00504 "=m" (fpX[1]),
00505 "=m" (fpX[2]),
00506 "=m" (fpY[0]),
00507 "=m" (fpY[1])
00508 :
00509 : "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "memory");
00510 }
00511
00512
00513 void dsp_x86_64_iir_nip (double *dpDest, const double *dpSrc, long lDataLength,
00514 const double *dpCoeff, double *dpX, double *dpY)
00515 {
00516 long lDataCntr;
00517
00518 X86_ASM (
00519 "movsd %0, %%xmm1\n\t" \
00520 "movsd %1, %%xmm2\n\t" \
00521 "movsd %2, %%xmm3\n\t" \
00522 "movsd %3, %%xmm4\n\t" \
00523 "prefetchnta %4\n\t" \
00524 "prefetchnta %5\n\t"
00525 :
00526 : "m" (dpX[1]),
00527 "m" (dpX[2]),
00528 "m" (dpY[0]),
00529 "m" (dpY[1]),
00530 "m" (dpCoeff[0]),
00531 "m" (dpCoeff[3])
00532 : "xmm1", "xmm2", "xmm3", "xmm4", "memory");
00533 for (lDataCntr = 0; lDataCntr < lDataLength; lDataCntr++)
00534 {
00535 X86_ASM (
00536 "movsd %%xmm1, %%xmm0\n\t" \
00537 "movsd %%xmm2, %%xmm1\n\t" \
00538 "movsd %1, %%xmm2\n\t" \
00539 \
00540 "movsd %2, %%xmm5\n\t" \
00541 "mulsd %%xmm2, %%xmm5\n\t" \
00542 "movsd %3, %%xmm6\n\t" \
00543 "mulsd %%xmm1, %%xmm6\n\t" \
00544 "addsd %%xmm6, %%xmm5\n\t" \
00545 "movsd %4, %%xmm6\n\t" \
00546 "mulsd %%xmm0, %%xmm6\n\t" \
00547 "addsd %%xmm6, %%xmm5\n\t" \
00548 \
00549 "movsd %5, %%xmm6\n\t" \
00550 "mulsd %%xmm4, %%xmm6\n\t" \
00551 "movsd %6, %%xmm7\n\t" \
00552 "mulsd %%xmm3, %%xmm7\n\t" \
00553 "addsd %%xmm7, %%xmm6\n\t" \
00554 \
00555 "addsd %%xmm5, %%xmm6\n\t" \
00556 "movsd %%xmm4, %%xmm3\n\t" \
00557 "movsd %%xmm6, %%xmm4\n\t" \
00558 \
00559 "movsd %%xmm6, %0\n\t"
00560 : "=m" (dpDest[lDataCntr])
00561 : "m" (dpSrc[lDataCntr]),
00562 "m" (dpCoeff[0]),
00563 "m" (dpCoeff[1]),
00564 "m" (dpCoeff[2]),
00565 "m" (dpCoeff[3]),
00566 "m" (dpCoeff[4])
00567 : "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7",
00568 "memory");
00569 }
00570 X86_ASM (
00571 "movsd %%xmm0, %0\n\t" \
00572 "movsd %%xmm1, %1\n\t" \
00573 "movsd %%xmm2, %2\n\t" \
00574 "movsd %%xmm3, %3\n\t" \
00575 "movsd %%xmm4, %4\n\t"
00576 : "=m" (dpX[0]),
00577 "=m" (dpX[1]),
00578 "=m" (dpX[2]),
00579 "=m" (dpY[0]),
00580 "=m" (dpY[1])
00581 :
00582 : "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "memory");
00583 }
00584
00585
00586 #ifdef __cplusplus
00587 }
00588 #endif
00589
00590 #endif