00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032 #include <math.h>
00033 #include "ls_fft.h"
00034 #include "sharp_ylmgen_c.h"
00035 #include "sharp_internal.h"
00036 #include "c_utils.h"
00037 #include "sharp_core.h"
00038 #include "sharp_vecutil.h"
00039 #include "walltime_c.h"
00040 #include "sharp_almhelpers.h"
00041 #include "sharp_geomhelpers.h"
00042
00043 typedef complex double dcmplx;
00044 typedef complex float fcmplx;
00045
00046 static const double sqrt_one_half = 0.707106781186547572737310929369;
00047 static const double sqrt_two = 1.414213562373095145474621858739;
00048
00049 static int chunksize_min=500, nchunks_max=10;
00050
00051 static void get_chunk_info (int ndata, int nmult, int *nchunks, int *chunksize)
00052 {
00053 *chunksize = (ndata+nchunks_max-1)/nchunks_max;
00054 if (*chunksize>=chunksize_min)
00055 *chunksize = ((*chunksize+nmult-1)/nmult)*nmult;
00056 else
00057 {
00058 *nchunks = (ndata+chunksize_min-1)/chunksize_min;
00059 *chunksize = (ndata+(*nchunks)-1)/(*nchunks);
00060 if (*nchunks>1)
00061 *chunksize = ((*chunksize+nmult-1)/nmult)*nmult;
00062 }
00063 *nchunks = (ndata+(*chunksize)-1)/(*chunksize);
00064 }
00065
00066 int sharp_get_mlim (int lmax, int spin, double sth, double cth)
00067 {
00068 double ofs=lmax*0.01;
00069 if (ofs<100.) ofs=100.;
00070 double b = -2*spin*fabs(cth);
00071 double t1 = lmax*sth+ofs;
00072 double c = (double)spin*spin-t1*t1;
00073 double discr = b*b-4*c;
00074 if (discr<=0) return lmax;
00075 double res=(-b+sqrt(discr))/2.;
00076 if (res>lmax) res=lmax;
00077 return (int)(res+0.5);
00078 }
00079
00080 typedef struct
00081 {
00082 double phi0_;
00083 dcmplx *shiftarr;
00084 int s_shift;
00085 real_plan plan;
00086 int norot;
00087 } ringhelper;
00088
00089 static void ringhelper_init (ringhelper *self)
00090 {
00091 static ringhelper rh_null = { 0, NULL, 0, NULL, 0 };
00092 *self = rh_null;
00093 }
00094
00095 static void ringhelper_destroy (ringhelper *self)
00096 {
00097 if (self->plan) kill_real_plan(self->plan);
00098 DEALLOC(self->shiftarr);
00099 ringhelper_init(self);
00100 }
00101
00102 static void ringhelper_update (ringhelper *self, int nph, int mmax, double phi0)
00103 {
00104 self->norot = (fabs(phi0)<1e-14);
00105 if (!(self->norot))
00106 if ((mmax!=self->s_shift-1) || (!FAPPROX(phi0,self->phi0_,1e-12)))
00107 {
00108 RESIZE (self->shiftarr,dcmplx,mmax+1);
00109 self->s_shift = mmax+1;
00110 self->phi0_ = phi0;
00111 for (int m=0; m<=mmax; ++m)
00112 self->shiftarr[m] = cos(m*phi0) + _Complex_I*sin(m*phi0);
00113 }
00114 if (!self->plan) self->plan=make_real_plan(nph);
00115 if (nph!=(int)self->plan->length)
00116 {
00117 kill_real_plan(self->plan);
00118 self->plan=make_real_plan(nph);
00119 }
00120 }
00121
00122 static int ringinfo_compare (const void *xa, const void *xb)
00123 {
00124 const sharp_ringinfo *a=xa, *b=xb;
00125 return (a->sth < b->sth) ? -1 : (a->sth > b->sth) ? 1 : 0;
00126 }
00127 static int ringpair_compare (const void *xa, const void *xb)
00128 {
00129 const sharp_ringpair *a=xa, *b=xb;
00130 if (a->r1.nph==b->r1.nph)
00131 return (a->r1.phi0 < b->r1.phi0) ? -1 :
00132 ((a->r1.phi0 > b->r1.phi0) ? 1 :
00133 (a->r1.cth>b->r1.cth ? -1 : 1));
00134 return (a->r1.nph<b->r1.nph) ? -1 : 1;
00135 }
00136
00137 void sharp_make_general_alm_info (int lmax, int nm, int stride, const int *mval,
00138 const ptrdiff_t *mstart, int flags, sharp_alm_info **alm_info)
00139 {
00140 sharp_alm_info *info = RALLOC(sharp_alm_info,1);
00141 info->lmax = lmax;
00142 info->nm = nm;
00143 info->mval = RALLOC(int,nm);
00144 info->mvstart = RALLOC(ptrdiff_t,nm);
00145 info->stride = stride;
00146 info->flags = flags;
00147 for (int mi=0; mi<nm; ++mi)
00148 {
00149 info->mval[mi] = mval[mi];
00150 info->mvstart[mi] = mstart[mi];
00151 }
00152 *alm_info = info;
00153 }
00154
00155 void sharp_make_alm_info (int lmax, int mmax, int stride,
00156 const ptrdiff_t *mstart, sharp_alm_info **alm_info)
00157 {
00158 int *mval=RALLOC(int,mmax+1);
00159 for (int i=0; i<=mmax; ++i)
00160 mval[i]=i;
00161 sharp_make_general_alm_info (lmax, mmax+1, stride, mval, mstart, 0, alm_info);
00162 DEALLOC(mval);
00163 }
00164
00165 ptrdiff_t sharp_alm_index (const sharp_alm_info *self, int l, int mi)
00166 {
00167 UTIL_ASSERT(!(self->flags & SHARP_PACKED),
00168 "sharp_alm_index not applicable with SHARP_PACKED alms");
00169 return self->mvstart[mi]+self->stride*l;
00170 }
00171
00172 void sharp_destroy_alm_info (sharp_alm_info *info)
00173 {
00174 DEALLOC (info->mval);
00175 DEALLOC (info->mvstart);
00176 DEALLOC (info);
00177 }
00178
00179 void sharp_make_geom_info (int nrings, const int *nph, const ptrdiff_t *ofs,
00180 const int *stride, const double *phi0, const double *theta,
00181 const double *wgt, sharp_geom_info **geom_info)
00182 {
00183 sharp_geom_info *info = RALLOC(sharp_geom_info,1);
00184 sharp_ringinfo *infos = RALLOC(sharp_ringinfo,nrings);
00185
00186 int pos=0;
00187 info->pair=RALLOC(sharp_ringpair,nrings);
00188 info->npairs=0;
00189 info->nphmax=0;
00190 *geom_info = info;
00191
00192 for (int m=0; m<nrings; ++m)
00193 {
00194 infos[m].theta = theta[m];
00195 infos[m].cth = cos(theta[m]);
00196 infos[m].sth = sin(theta[m]);
00197 infos[m].weight = (wgt != NULL) ? wgt[m] : 1.;
00198 infos[m].phi0 = phi0[m];
00199 infos[m].ofs = ofs[m];
00200 infos[m].stride = stride[m];
00201 infos[m].nph = nph[m];
00202 if (info->nphmax<nph[m]) info->nphmax=nph[m];
00203 }
00204 qsort(infos,nrings,sizeof(sharp_ringinfo),ringinfo_compare);
00205 while (pos<nrings)
00206 {
00207 info->pair[info->npairs].r1=infos[pos];
00208 if ((pos<nrings-1) && FAPPROX(infos[pos].cth,-infos[pos+1].cth,1e-12))
00209 {
00210 if (infos[pos].cth>0)
00211 info->pair[info->npairs].r2=infos[pos+1];
00212 else
00213 {
00214 info->pair[info->npairs].r1=infos[pos+1];
00215 info->pair[info->npairs].r2=infos[pos];
00216 }
00217 ++pos;
00218 }
00219 else
00220 info->pair[info->npairs].r2.nph=-1;
00221 ++pos;
00222 ++info->npairs;
00223 }
00224 DEALLOC(infos);
00225
00226 qsort(info->pair,info->npairs,sizeof(sharp_ringpair),ringpair_compare);
00227 }
00228
00229 void sharp_destroy_geom_info (sharp_geom_info *geom_info)
00230 {
00231 DEALLOC (geom_info->pair);
00232 DEALLOC (geom_info);
00233 }
00234
00235
00236
00237
00238 static int sharp_get_mmax (int *mval, int nm)
00239 {
00240 int *mcheck=RALLOC(int,nm);
00241 SET_ARRAY(mcheck,0,nm,0);
00242 for (int i=0; i<nm; ++i)
00243 {
00244 int m_cur=mval[i];
00245 UTIL_ASSERT((m_cur>=0) && (m_cur<nm), "not all m values are present");
00246 UTIL_ASSERT(mcheck[m_cur]==0, "duplicate m value");
00247 mcheck[m_cur]=1;
00248 }
00249 DEALLOC(mcheck);
00250 return nm-1;
00251 }
00252
00253 static void ringhelper_phase2ring (ringhelper *self,
00254 const sharp_ringinfo *info, double *data, int mmax, const dcmplx *phase,
00255 int pstride, int flags)
00256 {
00257 int nph = info->nph;
00258
00259 ringhelper_update (self, nph, mmax, info->phi0);
00260
00261 double wgt = (flags&SHARP_USE_WEIGHTS) ? info->weight : 1.;
00262 if (flags&SHARP_REAL_HARMONICS)
00263 wgt *= sqrt_one_half;
00264
00265 if (nph>=2*mmax+1)
00266 {
00267 for (int m=0; m<=mmax; ++m)
00268 {
00269 dcmplx tmp = phase[m*pstride]*wgt;
00270 if(!self->norot) tmp*=self->shiftarr[m];
00271 data[2*m]=creal(tmp);
00272 data[2*m+1]=cimag(tmp);
00273 }
00274 for (int m=2*(mmax+1); m<nph+2; ++m)
00275 data[m]=0.;
00276 }
00277 else
00278 {
00279 data[0]=creal(phase[0])*wgt;
00280 SET_ARRAY(data,1,nph+2,0.);
00281
00282 int idx1=1, idx2=nph-1;
00283 for (int m=1; m<=mmax; ++m)
00284 {
00285 dcmplx tmp = phase[m*pstride]*wgt;
00286 if(!self->norot) tmp*=self->shiftarr[m];
00287 if (idx1<(nph+2)/2)
00288 {
00289 data[2*idx1]+=creal(tmp);
00290 data[2*idx1+1]+=cimag(tmp);
00291 }
00292 if (idx2<(nph+2)/2)
00293 {
00294 data[2*idx2]+=creal(tmp);
00295 data[2*idx2+1]-=cimag(tmp);
00296 }
00297 if (++idx1>=nph) idx1=0;
00298 if (--idx2<0) idx2=nph-1;
00299 }
00300 }
00301 data[1]=data[0];
00302 real_plan_backward_fftpack (self->plan, &(data[1]));
00303 }
00304
00305 static void ringhelper_ring2phase (ringhelper *self,
00306 const sharp_ringinfo *info, double *data, int mmax, dcmplx *phase,
00307 int pstride, int flags)
00308 {
00309 int nph = info->nph;
00310 #if 1
00311 int maxidx = mmax;
00312 #else
00313 int maxidx = IMIN(nph-1,mmax);
00314 #endif
00315
00316 ringhelper_update (self, nph, mmax, -info->phi0);
00317 double wgt = (flags&SHARP_USE_WEIGHTS) ? info->weight : 1;
00318 if (flags&SHARP_REAL_HARMONICS)
00319 wgt *= sqrt_two;
00320
00321 real_plan_forward_fftpack (self->plan, &(data[1]));
00322 data[0]=data[1];
00323 data[1]=data[nph+1]=0.;
00324
00325 if (maxidx<=nph/2)
00326 {
00327 if (self->norot)
00328 for (int m=0; m<=maxidx; ++m)
00329 phase[m*pstride] = (data[2*m] + _Complex_I*data[2*m+1]) * wgt;
00330 else
00331 for (int m=0; m<=maxidx; ++m)
00332 phase[m*pstride] =
00333 (data[2*m] + _Complex_I*data[2*m+1]) * self->shiftarr[m] * wgt;
00334 }
00335 else
00336 {
00337 for (int m=0; m<=maxidx; ++m)
00338 {
00339 int idx=m%nph;
00340 dcmplx val;
00341 if (idx<(nph-idx))
00342 val = (data[2*idx] + _Complex_I*data[2*idx+1]) * wgt;
00343 else
00344 val = (data[2*(nph-idx)] - _Complex_I*data[2*(nph-idx)+1]) * wgt;
00345 if (!self->norot)
00346 val *= self->shiftarr[m];
00347 phase[m*pstride]=val;
00348 }
00349 }
00350
00351 for (int m=maxidx+1;m<=mmax; ++m)
00352 phase[m*pstride]=0.;
00353 }
00354
00355 static void fill_map (const sharp_geom_info *ginfo, void *map, double value,
00356 int flags)
00357 {
00358 if (flags & SHARP_NO_FFT)
00359 {
00360 for (int j=0;j<ginfo->npairs;++j)
00361 {
00362 if (flags&SHARP_DP)
00363 {
00364 for (ptrdiff_t i=0;i<ginfo->pair[j].r1.nph;++i)
00365 ((dcmplx *)map)[ginfo->pair[j].r1.ofs+i*ginfo->pair[j].r1.stride]
00366 =value;
00367 for (ptrdiff_t i=0;i<ginfo->pair[j].r2.nph;++i)
00368 ((dcmplx *)map)[ginfo->pair[j].r2.ofs+i*ginfo->pair[j].r2.stride]
00369 =value;
00370 }
00371 else
00372 {
00373 for (ptrdiff_t i=0;i<ginfo->pair[j].r1.nph;++i)
00374 ((fcmplx *)map)[ginfo->pair[j].r1.ofs+i*ginfo->pair[j].r1.stride]
00375 =(float)value;
00376 for (ptrdiff_t i=0;i<ginfo->pair[j].r2.nph;++i)
00377 ((fcmplx *)map)[ginfo->pair[j].r2.ofs+i*ginfo->pair[j].r2.stride]
00378 =(float)value;
00379 }
00380 }
00381 }
00382 else
00383 {
00384 for (int j=0;j<ginfo->npairs;++j)
00385 {
00386 if (flags&SHARP_DP)
00387 {
00388 for (ptrdiff_t i=0;i<ginfo->pair[j].r1.nph;++i)
00389 ((double *)map)[ginfo->pair[j].r1.ofs+i*ginfo->pair[j].r1.stride]
00390 =value;
00391 for (ptrdiff_t i=0;i<ginfo->pair[j].r2.nph;++i)
00392 ((double *)map)[ginfo->pair[j].r2.ofs+i*ginfo->pair[j].r2.stride]
00393 =value;
00394 }
00395 else
00396 {
00397 for (ptrdiff_t i=0;i<ginfo->pair[j].r1.nph;++i)
00398 ((float *)map)[ginfo->pair[j].r1.ofs+i*ginfo->pair[j].r1.stride]
00399 =(float)value;
00400 for (ptrdiff_t i=0;i<ginfo->pair[j].r2.nph;++i)
00401 ((float *)map)[ginfo->pair[j].r2.ofs+i*ginfo->pair[j].r2.stride]
00402 =(float)value;
00403 }
00404 }
00405 }
00406 }
00407
00408 static void clear_alm (const sharp_alm_info *ainfo, void *alm, int flags)
00409 {
00410 #define CLEARLOOP(real_t,body) \
00411 { \
00412 real_t *talm = (real_t *)alm; \
00413 for (int l=m;l<=ainfo->lmax;++l) \
00414 body \
00415 }
00416
00417 for (int mi=0;mi<ainfo->nm;++mi)
00418 {
00419 int m=ainfo->mval[mi];
00420 ptrdiff_t mvstart = ainfo->mvstart[mi];
00421 ptrdiff_t stride = ainfo->stride;
00422 if (!(ainfo->flags&SHARP_PACKED))
00423 mvstart*=2;
00424 if ((ainfo->flags&SHARP_PACKED)&&(m==0))
00425 {
00426 if (flags&SHARP_DP)
00427 CLEARLOOP(double, talm[mvstart+l*stride] = 0.;)
00428 else
00429 CLEARLOOP(float, talm[mvstart+l*stride] = 0.;)
00430 }
00431 else
00432 {
00433 stride*=2;
00434 if (flags&SHARP_DP)
00435 CLEARLOOP(double,talm[mvstart+l*stride]=talm[mvstart+l*stride+1]=0.;)
00436 else
00437 CLEARLOOP(float,talm[mvstart+l*stride]=talm[mvstart+l*stride+1]=0.;)
00438 }
00439
00440 #undef CLEARLOOP
00441 }
00442 }
00443
00444 static void init_output (sharp_job *job)
00445 {
00446 if (job->flags&SHARP_ADD) return;
00447 if (job->type == SHARP_MAP2ALM)
00448 for (int i=0; i<job->ntrans*job->nalm; ++i)
00449 clear_alm (job->ainfo,job->alm[i],job->flags);
00450 else
00451 for (int i=0; i<job->ntrans*job->nmaps; ++i)
00452 fill_map (job->ginfo,job->map[i],0.,job->flags);
00453 }
00454
00455 static void alloc_phase (sharp_job *job, int nm, int ntheta)
00456 {
00457 if (job->type==SHARP_MAP2ALM)
00458 {
00459 if ((nm&1023)==0) nm+=3;
00460 job->s_m=2*job->ntrans*job->nmaps;
00461 job->s_th=job->s_m*nm;
00462 }
00463 else
00464 {
00465 if ((ntheta&1023)==0) ntheta+=3;
00466 job->s_th=2*job->ntrans*job->nmaps;
00467 job->s_m=job->s_th*ntheta;
00468 }
00469 job->phase=RALLOC(dcmplx,2*job->ntrans*job->nmaps*nm*ntheta);
00470 }
00471
00472 static void dealloc_phase (sharp_job *job)
00473 { DEALLOC(job->phase); }
00474
00475 static void alloc_almtmp (sharp_job *job, int lmax)
00476 { job->almtmp=RALLOC(dcmplx,job->ntrans*job->nalm*(lmax+1)); }
00477
00478 static void dealloc_almtmp (sharp_job *job)
00479 { DEALLOC(job->almtmp); }
00480
00481 static void alm2almtmp (sharp_job *job, int lmax, int mi)
00482 {
00483
00484 #define COPY_LOOP(real_t, source_t, expr_of_x) \
00485 for (int l=job->ainfo->mval[mi]; l<=lmax; ++l) \
00486 for (int i=0; i<job->ntrans*job->nalm; ++i) \
00487 { \
00488 source_t x = *(source_t *)(((real_t *)job->alm[i])+ofs+l*stride); \
00489 job->almtmp[job->ntrans*job->nalm*l+i] = expr_of_x; \
00490 }
00491
00492 if (job->type!=SHARP_MAP2ALM)
00493 {
00494 ptrdiff_t ofs=job->ainfo->mvstart[mi];
00495 int stride=job->ainfo->stride;
00496 int m=job->ainfo->mval[mi];
00497
00498
00499
00500 double norm_m0=(job->flags&SHARP_REAL_HARMONICS) ? sqrt_two : 1.;
00501 if (!(job->ainfo->flags&SHARP_PACKED))
00502 ofs *= 2;
00503 if (!((job->ainfo->flags&SHARP_PACKED)&&(m==0)))
00504 stride *= 2;
00505 if (job->spin==0)
00506 {
00507 if (m==0)
00508 {
00509 if (job->flags&SHARP_DP)
00510 COPY_LOOP(double, double, x*norm_m0)
00511 else
00512 COPY_LOOP(float, float, x*norm_m0)
00513 }
00514 else
00515 {
00516 if (job->flags&SHARP_DP)
00517 COPY_LOOP(double, dcmplx, x)
00518 else
00519 COPY_LOOP(float, fcmplx, x)
00520 }
00521 }
00522 else
00523 {
00524 if (m==0)
00525 {
00526 if (job->flags&SHARP_DP)
00527 COPY_LOOP(double, double, x*job->norm_l[l]*norm_m0)
00528 else
00529 COPY_LOOP(float, float, x*job->norm_l[l]*norm_m0)
00530 }
00531 else
00532 {
00533 if (job->flags&SHARP_DP)
00534 COPY_LOOP(double, dcmplx, x*job->norm_l[l])
00535 else
00536 COPY_LOOP(float, fcmplx, x*job->norm_l[l])
00537 }
00538 }
00539 }
00540 else
00541 SET_ARRAY(job->almtmp,job->ntrans*job->nalm*job->ainfo->mval[mi],
00542 job->ntrans*job->nalm*(lmax+1),0.);
00543
00544 #undef COPY_LOOP
00545 }
00546
00547 static void almtmp2alm (sharp_job *job, int lmax, int mi)
00548 {
00549
00550 #define COPY_LOOP(real_t, target_t, expr_of_x) \
00551 for (int l=job->ainfo->mval[mi]; l<=lmax; ++l) \
00552 for (int i=0; i<job->ntrans*job->nalm; ++i) \
00553 { \
00554 dcmplx x = job->almtmp[job->ntrans*job->nalm*l+i]; \
00555 *(target_t *)(((real_t *)job->alm[i])+ofs+l*stride) += expr_of_x; \
00556 }
00557
00558 if (job->type != SHARP_MAP2ALM) return;
00559 ptrdiff_t ofs=job->ainfo->mvstart[mi];
00560 int stride=job->ainfo->stride;
00561 int m=job->ainfo->mval[mi];
00562
00563
00564
00565 double norm_m0=(job->flags&SHARP_REAL_HARMONICS) ? sqrt_one_half : 1.;
00566 if (!(job->ainfo->flags&SHARP_PACKED))
00567 ofs *= 2;
00568 if (!((job->ainfo->flags&SHARP_PACKED)&&(m==0)))
00569 stride *= 2;
00570 if (job->spin==0)
00571 {
00572 if (m==0)
00573 {
00574 if (job->flags&SHARP_DP)
00575 COPY_LOOP(double, double, creal(x)*norm_m0)
00576 else
00577 COPY_LOOP(float, float, crealf(x)*norm_m0)
00578 }
00579 else
00580 {
00581 if (job->flags&SHARP_DP)
00582 COPY_LOOP(double, dcmplx, x)
00583 else
00584 COPY_LOOP(float, fcmplx, (fcmplx)x)
00585 }
00586 }
00587 else
00588 {
00589 if (m==0)
00590 {
00591 if (job->flags&SHARP_DP)
00592 COPY_LOOP(double, double, creal(x)*job->norm_l[l]*norm_m0)
00593 else
00594 COPY_LOOP(float, fcmplx, (float)(creal(x)*job->norm_l[l]*norm_m0))
00595 }
00596 else
00597 {
00598 if (job->flags&SHARP_DP)
00599 COPY_LOOP(double, dcmplx, x*job->norm_l[l])
00600 else
00601 COPY_LOOP(float, fcmplx, (fcmplx)(x*job->norm_l[l]))
00602 }
00603 }
00604
00605 #undef COPY_LOOP
00606 }
00607
00608 static void ringtmp2ring (sharp_job *job, sharp_ringinfo *ri, double *ringtmp,
00609 int rstride)
00610 {
00611 double **dmap = (double **)job->map;
00612 float **fmap = (float **)job->map;
00613 for (int i=0; i<job->ntrans*job->nmaps; ++i)
00614 for (int m=0; m<ri->nph; ++m)
00615 if (job->flags & SHARP_DP)
00616 dmap[i][ri->ofs+m*ri->stride] += ringtmp[i*rstride+m+1];
00617 else
00618 fmap[i][ri->ofs+m*ri->stride] += (float)ringtmp[i*rstride+m+1];
00619 }
00620
00621 static void ring2ringtmp (sharp_job *job, sharp_ringinfo *ri, double *ringtmp,
00622 int rstride)
00623 {
00624 for (int i=0; i<job->ntrans*job->nmaps; ++i)
00625 for (int m=0; m<ri->nph; ++m)
00626 ringtmp[i*rstride+m+1] = (job->flags & SHARP_DP) ?
00627 ((double *)(job->map[i]))[ri->ofs+m*ri->stride] :
00628 ((float *)(job->map[i]))[ri->ofs+m*ri->stride];
00629 }
00630
00631 static void ring2phase_direct (sharp_job *job, sharp_ringinfo *ri, int mmax,
00632 dcmplx *phase)
00633 {
00634 if (ri->nph<0)
00635 {
00636 for (int i=0; i<job->ntrans*job->nmaps; ++i)
00637 for (int m=0; m<=mmax; ++m)
00638 phase[2*i+job->s_m*m]=0.;
00639 }
00640 else
00641 {
00642 UTIL_ASSERT(ri->nph==mmax+1,"bad ring size");
00643 double wgt = (job->flags&SHARP_USE_WEIGHTS) ? (ri->nph*ri->weight) : 1.;
00644 if (job->flags&SHARP_REAL_HARMONICS)
00645 wgt *= sqrt_two;
00646 for (int i=0; i<job->ntrans*job->nmaps; ++i)
00647 for (int m=0; m<=mmax; ++m)
00648 phase[2*i+job->s_m*m]= (job->flags & SHARP_DP) ?
00649 ((dcmplx *)(job->map[i]))[ri->ofs+m*ri->stride]*wgt :
00650 ((fcmplx *)(job->map[i]))[ri->ofs+m*ri->stride]*wgt;
00651 }
00652 }
00653 static void phase2ring_direct (sharp_job *job, sharp_ringinfo *ri, int mmax,
00654 dcmplx *phase)
00655 {
00656 if (ri->nph<0) return;
00657 UTIL_ASSERT(ri->nph==mmax+1,"bad ring size");
00658 dcmplx **dmap = (dcmplx **)job->map;
00659 fcmplx **fmap = (fcmplx **)job->map;
00660 double wgt = (job->flags&SHARP_USE_WEIGHTS) ? (ri->nph*ri->weight) : 1.;
00661 if (job->flags&SHARP_REAL_HARMONICS)
00662 wgt *= sqrt_one_half;
00663 for (int i=0; i<job->ntrans*job->nmaps; ++i)
00664 for (int m=0; m<=mmax; ++m)
00665 if (job->flags & SHARP_DP)
00666 dmap[i][ri->ofs+m*ri->stride] += wgt*phase[2*i+job->s_m*m];
00667 else
00668 fmap[i][ri->ofs+m*ri->stride] += (fcmplx)(wgt*phase[2*i+job->s_m*m]);
00669 }
00670
00671
00672 static void map2phase (sharp_job *job, int mmax, int llim, int ulim)
00673 {
00674 if (job->type != SHARP_MAP2ALM) return;
00675 int pstride = job->s_m;
00676 if (job->flags & SHARP_NO_FFT)
00677 {
00678 for (int ith=llim; ith<ulim; ++ith)
00679 {
00680 int dim2 = job->s_th*(ith-llim);
00681 ring2phase_direct(job,&(job->ginfo->pair[ith].r1),mmax,
00682 &(job->phase[dim2]));
00683 ring2phase_direct(job,&(job->ginfo->pair[ith].r2),mmax,
00684 &(job->phase[dim2+1]));
00685 }
00686 }
00687 else
00688 {
00689 #pragma omp parallel if ((job->flags&SHARP_NO_OPENMP)==0)
00690 {
00691 ringhelper helper;
00692 ringhelper_init(&helper);
00693 int rstride=job->ginfo->nphmax+2;
00694 double *ringtmp=RALLOC(double,job->ntrans*job->nmaps*rstride);
00695 #pragma omp for schedule(dynamic,1)
00696 for (int ith=llim; ith<ulim; ++ith)
00697 {
00698 int dim2 = job->s_th*(ith-llim);
00699 ring2ringtmp(job,&(job->ginfo->pair[ith].r1),ringtmp,rstride);
00700 for (int i=0; i<job->ntrans*job->nmaps; ++i)
00701 ringhelper_ring2phase (&helper,&(job->ginfo->pair[ith].r1),
00702 &ringtmp[i*rstride],mmax,&job->phase[dim2+2*i],pstride,job->flags);
00703 if (job->ginfo->pair[ith].r2.nph>0)
00704 {
00705 ring2ringtmp(job,&(job->ginfo->pair[ith].r2),ringtmp,rstride);
00706 for (int i=0; i<job->ntrans*job->nmaps; ++i)
00707 ringhelper_ring2phase (&helper,&(job->ginfo->pair[ith].r2),
00708 &ringtmp[i*rstride],mmax,&job->phase[dim2+2*i+1],pstride,job->flags);
00709 }
00710 }
00711 DEALLOC(ringtmp);
00712 ringhelper_destroy(&helper);
00713 }
00714 }
00715 }
00716
00717 static void phase2map (sharp_job *job, int mmax, int llim, int ulim)
00718 {
00719 if (job->type == SHARP_MAP2ALM) return;
00720 int pstride = job->s_m;
00721 if (job->flags & SHARP_NO_FFT)
00722 {
00723 for (int ith=llim; ith<ulim; ++ith)
00724 {
00725 int dim2 = job->s_th*(ith-llim);
00726 phase2ring_direct(job,&(job->ginfo->pair[ith].r1),mmax,
00727 &(job->phase[dim2]));
00728 phase2ring_direct(job,&(job->ginfo->pair[ith].r2),mmax,
00729 &(job->phase[dim2+1]));
00730 }
00731 }
00732 else
00733 {
00734 #pragma omp parallel if ((job->flags&SHARP_NO_OPENMP)==0)
00735 {
00736 ringhelper helper;
00737 ringhelper_init(&helper);
00738 int rstride=job->ginfo->nphmax+2;
00739 double *ringtmp=RALLOC(double,job->ntrans*job->nmaps*rstride);
00740 #pragma omp for schedule(dynamic,1)
00741 for (int ith=llim; ith<ulim; ++ith)
00742 {
00743 int dim2 = job->s_th*(ith-llim);
00744 for (int i=0; i<job->ntrans*job->nmaps; ++i)
00745 ringhelper_phase2ring (&helper,&(job->ginfo->pair[ith].r1),
00746 &ringtmp[i*rstride],mmax,&job->phase[dim2+2*i],pstride,job->flags);
00747 ringtmp2ring(job,&(job->ginfo->pair[ith].r1),ringtmp,rstride);
00748 if (job->ginfo->pair[ith].r2.nph>0)
00749 {
00750 for (int i=0; i<job->ntrans*job->nmaps; ++i)
00751 ringhelper_phase2ring (&helper,&(job->ginfo->pair[ith].r2),
00752 &ringtmp[i*rstride],mmax,&job->phase[dim2+2*i+1],pstride,job->flags);
00753 ringtmp2ring(job,&(job->ginfo->pair[ith].r2),ringtmp,rstride);
00754 }
00755 }
00756 DEALLOC(ringtmp);
00757 ringhelper_destroy(&helper);
00758 }
00759 }
00760 }
00761
00762 static void sharp_execute_job (sharp_job *job)
00763 {
00764 double timer=wallTime();
00765 job->opcnt=0;
00766 int lmax = job->ainfo->lmax,
00767 mmax=sharp_get_mmax(job->ainfo->mval, job->ainfo->nm);
00768
00769 job->norm_l = (job->type==SHARP_ALM2MAP_DERIV1) ?
00770 sharp_Ylmgen_get_d1norm (lmax) :
00771 sharp_Ylmgen_get_norm (lmax, job->spin);
00772
00773
00774 init_output (job);
00775
00776 int nchunks, chunksize;
00777 get_chunk_info(job->ginfo->npairs,(job->flags&SHARP_NVMAX)*VLEN,&nchunks,
00778 &chunksize);
00779 alloc_phase (job,mmax+1,chunksize);
00780
00781
00782 for (int chunk=0; chunk<nchunks; ++chunk)
00783 {
00784 int llim=chunk*chunksize, ulim=IMIN(llim+chunksize,job->ginfo->npairs);
00785 int *ispair = RALLOC(int,ulim-llim);
00786 int *mlim = RALLOC(int,ulim-llim);
00787 double *cth = RALLOC(double,ulim-llim), *sth = RALLOC(double,ulim-llim);
00788 for (int i=0; i<ulim-llim; ++i)
00789 {
00790 ispair[i] = job->ginfo->pair[i+llim].r2.nph>0;
00791 cth[i] = job->ginfo->pair[i+llim].r1.cth;
00792 sth[i] = job->ginfo->pair[i+llim].r1.sth;
00793 mlim[i] = sharp_get_mlim(lmax, job->spin, sth[i], cth[i]);
00794 }
00795
00796
00797 map2phase (job, mmax, llim, ulim);
00798
00799 #pragma omp parallel if ((job->flags&SHARP_NO_OPENMP)==0)
00800 {
00801 sharp_job ljob = *job;
00802 ljob.opcnt=0;
00803 sharp_Ylmgen_C generator;
00804 sharp_Ylmgen_init (&generator,lmax,mmax,ljob.spin);
00805 alloc_almtmp(&ljob,lmax);
00806
00807 #pragma omp for schedule(dynamic,1)
00808 for (int mi=0; mi<job->ainfo->nm; ++mi)
00809 {
00810
00811 alm2almtmp (&ljob, lmax, mi);
00812
00813 inner_loop (&ljob, ispair, cth, sth, llim, ulim, &generator, mi, mlim);
00814
00815
00816 almtmp2alm (&ljob, lmax, mi);
00817 }
00818
00819 sharp_Ylmgen_destroy(&generator);
00820 dealloc_almtmp(&ljob);
00821
00822 #pragma omp critical
00823 job->opcnt+=ljob.opcnt;
00824 }
00825
00826
00827 phase2map (job, mmax, llim, ulim);
00828
00829 DEALLOC(ispair);
00830 DEALLOC(mlim);
00831 DEALLOC(cth);
00832 DEALLOC(sth);
00833 }
00834
00835 DEALLOC(job->norm_l);
00836 dealloc_phase (job);
00837 job->time=wallTime()-timer;
00838 }
00839
00840 static void sharp_build_job_common (sharp_job *job, sharp_jobtype type,
00841 int spin, void *alm, void *map, const sharp_geom_info *geom_info,
00842 const sharp_alm_info *alm_info, int ntrans, int flags)
00843 {
00844 UTIL_ASSERT((ntrans>0)&&(ntrans<=SHARP_MAXTRANS),
00845 "bad number of simultaneous transforms");
00846 if (type==SHARP_ALM2MAP_DERIV1) spin=1;
00847 if (type==SHARP_MAP2ALM) flags|=SHARP_USE_WEIGHTS;
00848 if (type==SHARP_Yt) type=SHARP_MAP2ALM;
00849 if (type==SHARP_WY) { type=SHARP_ALM2MAP; flags|=SHARP_USE_WEIGHTS; }
00850
00851 UTIL_ASSERT((spin>=0)&&(spin<=alm_info->lmax), "bad spin");
00852 job->type = type;
00853 job->spin = spin;
00854 job->norm_l = NULL;
00855 job->nmaps = (type==SHARP_ALM2MAP_DERIV1) ? 2 : ((spin>0) ? 2 : 1);
00856 job->nalm = (type==SHARP_ALM2MAP_DERIV1) ? 1 : ((spin>0) ? 2 : 1);
00857 job->ginfo = geom_info;
00858 job->ainfo = alm_info;
00859 job->flags = flags;
00860 if ((job->flags&SHARP_NVMAX)==0)
00861 job->flags|=sharp_nv_oracle (type, spin, ntrans);
00862 job->time = 0.;
00863 job->opcnt = 0;
00864 job->ntrans = ntrans;
00865 job->alm=alm;
00866 job->map=map;
00867 }
00868
00869 void sharp_execute (sharp_jobtype type, int spin, void *alm, void *map,
00870 const sharp_geom_info *geom_info, const sharp_alm_info *alm_info, int ntrans,
00871 int flags, double *time, unsigned long long *opcnt)
00872 {
00873 sharp_job job;
00874 sharp_build_job_common (&job, type, spin, alm, map, geom_info, alm_info,
00875 ntrans, flags);
00876
00877 sharp_execute_job (&job);
00878 if (time!=NULL) *time = job.time;
00879 if (opcnt!=NULL) *opcnt = job.opcnt;
00880 }
00881
00882 void sharp_set_chunksize_min(int new_chunksize_min)
00883 { chunksize_min=new_chunksize_min; }
00884 void sharp_set_nchunks_max(int new_nchunks_max)
00885 { nchunks_max=new_nchunks_max; }
00886
00887 int sharp_get_nv_max (void)
00888 { return 6; }
00889
00890 static int sharp_oracle (sharp_jobtype type, int spin, int ntrans)
00891 {
00892 int lmax=511;
00893 int mmax=(lmax+1)/2;
00894 int nrings=(lmax+1)/4;
00895 int ppring=1;
00896
00897 spin = (spin!=0) ? 2 : 0;
00898
00899 ptrdiff_t npix=(ptrdiff_t)nrings*ppring;
00900 sharp_geom_info *tinfo;
00901 sharp_make_gauss_geom_info (nrings, ppring, 0., 1, ppring, &tinfo);
00902
00903 ptrdiff_t nalms = ((mmax+1)*(mmax+2))/2 + (mmax+1)*(lmax-mmax);
00904 int ncomp = ntrans*((spin==0) ? 1 : 2);
00905
00906 double **map;
00907 ALLOC2D(map,double,ncomp,npix);
00908 SET_ARRAY(map[0],0,npix*ncomp,0.);
00909
00910 sharp_alm_info *alms;
00911 sharp_make_triangular_alm_info(lmax,mmax,1,&alms);
00912
00913 dcmplx **alm;
00914 ALLOC2D(alm,dcmplx,ncomp,nalms);
00915 SET_ARRAY(alm[0],0,nalms*ncomp,0.);
00916
00917 double time=1e30;
00918 int nvbest=-1;
00919
00920 for (int nv=1; nv<=sharp_get_nv_max(); ++nv)
00921 {
00922 double time_acc=0.;
00923 double jtime;
00924 int ntries=0;
00925 do
00926 {
00927 sharp_execute(type,spin,&alm[0],&map[0],tinfo,alms,ntrans,
00928 nv|SHARP_DP|SHARP_NO_OPENMP,&jtime,NULL);
00929
00930 if (jtime<time) { time=jtime; nvbest=nv; }
00931 time_acc+=jtime;
00932 ++ntries;
00933 }
00934 while ((time_acc<0.02)&&(ntries<2));
00935 }
00936
00937 DEALLOC2D(map);
00938 DEALLOC2D(alm);
00939
00940 sharp_destroy_alm_info(alms);
00941 sharp_destroy_geom_info(tinfo);
00942 return nvbest;
00943 }
00944
00945 int sharp_nv_oracle (sharp_jobtype type, int spin, int ntrans)
00946 {
00947 static const int maxtr = 6;
00948 static int nv_opt[6][2][5] = {
00949 {{0,0,0,0,0},{0,0,0,0,0}},
00950 {{0,0,0,0,0},{0,0,0,0,0}},
00951 {{0,0,0,0,0},{0,0,0,0,0}},
00952 {{0,0,0,0,0},{0,0,0,0,0}},
00953 {{0,0,0,0,0},{0,0,0,0,0}},
00954 {{0,0,0,0,0},{0,0,0,0,0}} };
00955
00956 if (type==SHARP_ALM2MAP_DERIV1) spin=1;
00957 UTIL_ASSERT(type<5,"bad type");
00958 UTIL_ASSERT((ntrans>0),"bad number of simultaneous transforms");
00959 UTIL_ASSERT(spin>=0, "bad spin");
00960 ntrans=IMIN(ntrans,maxtr);
00961
00962 if (nv_opt[ntrans-1][spin!=0][type]==0)
00963 nv_opt[ntrans-1][spin!=0][type]=sharp_oracle(type,spin,ntrans);
00964 return nv_opt[ntrans-1][spin!=0][type];
00965 }
00966
00967 #ifdef USE_MPI
00968 #include "sharp_mpi.c"
00969 #endif