NFFT  3.3.2
sort.c
00001 /*
00002  * Copyright (c) 2002, 2016 Jens Keiner, Stefan Kunis, Daniel Potts
00003  *
00004  * This program is free software; you can redistribute it and/or modify it under
00005  * the terms of the GNU General Public License as published by the Free Software
00006  * Foundation; either version 2 of the License, or (at your option) any later
00007  * version.
00008  *
00009  * This program is distributed in the hope that it will be useful, but WITHOUT
00010  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
00011  * FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
00012  * details.
00013  *
00014  * You should have received a copy of the GNU General Public License along with
00015  * this program; if not, write to the Free Software Foundation, Inc., 51
00016  * Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
00017  */
00018 
00019 
00020 #ifdef _OPENMP
00021 #include <omp.h>
00022 #endif
00023 
00024 #include "infft.h"
00025 
00026 #define z_swap(_a_, _b_, _t_) do { (_t_) = (_a_); (_a_) = (_b_); (_b_) = (_t_); } while (0)
00027 
00033 static inline void sort_node_indices_sort_bubble(INT n, INT *keys)
00034 {
00035   INT i, j, ti;
00036 
00037   for (i = 0; i < n; ++i)
00038   {
00039     j = i;
00040     while (j > 0 && keys[2 * j + 0] < keys[2 * (j - 1) + 0])
00041     {
00042       z_swap(keys[2 * j + 0], keys[2 * (j - 1) + 0], ti);
00043       z_swap(keys[2 * j + 1], keys[2 * (j - 1) + 1], ti);
00044       --j;
00045     }
00046   }
00047 }
00048 
00054 static inline void sort_node_indices_radix_count(INT n, INT *keys, INT shift, INT mask, INT *counts)
00055 {
00056   INT i, k;
00057 
00058   for (i = 0; i < n; ++i)
00059   {
00060     k = (keys[2 * i + 0] >> shift) & mask;
00061     ++counts[k];
00062   }
00063 }
00064 
00070 static inline void sort_node_indices_radix_rearrange(INT n, INT *keys_in, INT *keys_out, INT shift, INT mask, INT *displs)
00071 {
00072   INT i, k;
00073 
00074   for (i = 0; i < n; ++i)
00075   {
00076     k = (keys_in[2 * i + 0] >> shift) & mask;
00077     keys_out[2 * displs[k] + 0] = keys_in[2 * i + 0];
00078     keys_out[2 * displs[k] + 1] = keys_in[2 * i + 1];
00079     ++displs[k];
00080   }
00081 }
00082 
00083 #define rwidth 9
00084 #define radix_n (1 << rwidth)
00085 
00091 void Y(sort_node_indices_radix_lsdf)(INT n, INT *keys0, INT *keys1, INT rhigh)
00092 {
00093   const INT radix_mask = radix_n - 1;
00094   const INT rhigh_in = rhigh;
00095 
00096   const INT tmax =
00097 #ifdef _OPENMP
00098     omp_get_max_threads();
00099 #else
00100     1;
00101 #endif
00102 
00103   INT *from, *to, *tmp;
00104 
00105   INT i, k, l, h;
00106   INT *lcounts;
00107 
00108   INT tid = 0, tnum = 1;
00109 
00110   STACK_MALLOC(INT*, lcounts, (size_t)(tmax * radix_n) * sizeof(INT));
00111 
00112   from = keys0;
00113   to = keys1;
00114 
00115   while (rhigh >= 0)
00116   {
00117 #ifdef _OPENMP
00118     #pragma omp parallel private(tid, tnum, i, l, h)
00119     {
00120       tid = omp_get_thread_num();
00121       tnum = omp_get_num_threads();
00122 #endif
00123 
00124       for (i = 0; i < radix_n; ++i) lcounts[tid * radix_n + i] = 0;
00125 
00126       l = (tid * n) / tnum;
00127       h = ((tid + 1) * n) / tnum;
00128 
00129       sort_node_indices_radix_count(h - l, from + (2 * l), rhigh_in - rhigh, radix_mask, &lcounts[tid * radix_n]);
00130 #ifdef _OPENMP
00131     }
00132 #endif
00133 
00134     k = 0;
00135     for (i = 0; i < radix_n; ++i)
00136     {
00137       for (l = 0; l < tmax; ++l) lcounts[l * radix_n + i] = (k += lcounts[l * radix_n + i]) - lcounts[l * radix_n + i];
00138     }
00139 
00140 #ifdef _OPENMP
00141     #pragma omp parallel private(tid, tnum, i, l, h)
00142     {
00143       tid = omp_get_thread_num();
00144       tnum = omp_get_num_threads();
00145 #endif
00146 
00147       l = (tid * n) / tnum;
00148       h = ((tid + 1) * n) / tnum;
00149 
00150       sort_node_indices_radix_rearrange(h - l, from + (2 * l), to, rhigh_in - rhigh, radix_mask, &lcounts[tid * radix_n]);
00151 #ifdef _OPENMP
00152     }
00153 #endif
00154 
00155 /*    print_keys(n, to);*/
00156 
00157     tmp = from;
00158     from = to;
00159     to = tmp;
00160 
00161     rhigh -= rwidth;
00162   }
00163 
00164   if (to == keys0) memcpy(to, from, (size_t)(n) * 2 * sizeof(INT));
00165 
00166   STACK_FREE(lcounts);
00167 }
00168 
00174 void Y(sort_node_indices_radix_msdf)(INT n, INT *keys0, INT *keys1, INT rhigh)
00175 {
00176   const INT radix_mask = radix_n - 1;
00177 
00178   const INT tmax =
00179 #ifdef _OPENMP
00180     omp_get_max_threads();
00181 #else
00182     1;
00183 #endif
00184 
00185   INT i, k, l, h;
00186   INT *lcounts;
00187 
00188   INT counts[radix_n], displs[radix_n];
00189 
00190   INT tid = 0, tnum = 1;
00191 
00192   STACK_MALLOC(INT*, lcounts, (size_t)(tmax * radix_n) * sizeof(INT));
00193 
00194   rhigh -= rwidth;
00195 
00196 #ifdef _OPENMP
00197   #pragma omp parallel private(tid, tnum, i, l, h)
00198   {
00199     tid = omp_get_thread_num();
00200     tnum = omp_get_num_threads();
00201 #endif
00202 
00203     for (i = 0; i < radix_n; ++i) lcounts[tid * radix_n + i] = 0;
00204 
00205     l = (tid * n) / tnum;
00206     h = ((tid + 1) * n) / tnum;
00207 
00208     sort_node_indices_radix_count(h - l, keys0 + (2 * l), rhigh + 1, radix_mask, &lcounts[tid * radix_n]);
00209 #ifdef _OPENMP
00210   }
00211 #endif
00212 
00213   k = 0;
00214   for (i = 0; i < radix_n; ++i)
00215   {
00216     for (l = 0; l < tmax; ++l) lcounts[l * radix_n + i] = (k += lcounts[l * radix_n + i]) - lcounts[l * radix_n + i];
00217 
00218     displs[i] = lcounts[0 * radix_n + i];
00219     if (i > 0) counts[i - 1] = displs[i] - displs[i - 1];
00220   }
00221   counts[radix_n - 1] = n - displs[radix_n - 1];
00222 
00223 #ifdef _OPENMP
00224   #pragma omp parallel private(tid, tnum, i, l, h)
00225   {
00226     tid = omp_get_thread_num();
00227     tnum = omp_get_num_threads();
00228 #endif
00229 
00230     l = (tid * n) / tnum;
00231     h = ((tid + 1) * n) / tnum;
00232 
00233     sort_node_indices_radix_rearrange(h - l, keys0 + (2 * l), keys1, rhigh + 1, radix_mask, &lcounts[tid * radix_n]);
00234 #ifdef _OPENMP
00235   }
00236 #endif
00237 
00238   memcpy(keys0, keys1, (size_t)(n) * 2 * sizeof(INT));
00239 
00240   if (rhigh >= 0)
00241   {
00242     for (i = 0; i < radix_n; ++i)
00243     {
00244       if (counts[i] > 1)
00245       {
00246         if (counts[i] > 256)
00247           Y(sort_node_indices_radix_msdf)(counts[i], keys0 + 2 * displs[i], keys1 + 2 * displs[i], rhigh);
00248         else
00249           sort_node_indices_sort_bubble(counts[i], keys0 + 2 * displs[i]);
00250       }
00251     }
00252   }
00253 
00254   STACK_FREE(lcounts);
00255 }