tbb_machine.h

00001 /*
00002     Copyright 2005-2012 Intel Corporation.  All Rights Reserved.
00003 
00004     The source code contained or described herein and all documents related
00005     to the source code ("Material") are owned by Intel Corporation or its
00006     suppliers or licensors.  Title to the Material remains with Intel
00007     Corporation or its suppliers and licensors.  The Material is protected
00008     by worldwide copyright laws and treaty provisions.  No part of the
00009     Material may be used, copied, reproduced, modified, published, uploaded,
00010     posted, transmitted, distributed, or disclosed in any way without
00011     Intel's prior express written permission.
00012 
00013     No license under any patent, copyright, trade secret or other
00014     intellectual property right is granted to or conferred upon you by
00015     disclosure or delivery of the Materials, either expressly, by
00016     implication, inducement, estoppel or otherwise.  Any license under such
00017     intellectual property rights must be express and approved by Intel in
00018     writing.
00019 */
00020 
00021 #ifndef __TBB_machine_H
00022 #define __TBB_machine_H
00023 
00116 #include "tbb_stddef.h"
00117 
00118 namespace tbb {
00119 namespace internal {
00120 
00122 // Overridable helpers declarations
00123 //
00124 // A machine/*.h file may choose to define these templates, otherwise it must
00125 // request default implementation by setting appropriate __TBB_USE_GENERIC_XXX macro(s).
00126 //
00127 template <typename T, std::size_t S>
00128 struct machine_load_store;
00129 
00130 template <typename T, std::size_t S>
00131 struct machine_load_store_relaxed;
00132 
00133 template <typename T, std::size_t S>
00134 struct machine_load_store_seq_cst;
00135 //
00136 // End of overridable helpers declarations
00138 
00139 template<size_t S> struct atomic_selector;
00140 
00141 template<> struct atomic_selector<1> {
00142     typedef int8_t word;
00143     inline static word fetch_store ( volatile void* location, word value );
00144 };
00145 
00146 template<> struct atomic_selector<2> {
00147     typedef int16_t word;
00148     inline static word fetch_store ( volatile void* location, word value );
00149 };
00150 
00151 template<> struct atomic_selector<4> {
00152 #if _MSC_VER && !_WIN64
00153     // Work-around that avoids spurious /Wp64 warnings
00154     typedef intptr_t word;
00155 #else
00156     typedef int32_t word;
00157 #endif
00158     inline static word fetch_store ( volatile void* location, word value );
00159 };
00160 
00161 template<> struct atomic_selector<8> {
00162     typedef int64_t word;
00163     inline static word fetch_store ( volatile void* location, word value );
00164 };
00165 
00166 }} // namespaces internal, tbb
00167 
00168 #if _WIN32||_WIN64
00169 
00170 #ifdef _MANAGED
00171 #pragma managed(push, off)
00172 #endif
00173 
00174     #if __MINGW64__ || __MINGW32__
00175         extern "C" __declspec(dllimport) int __stdcall SwitchToThread( void );
00176         #define __TBB_Yield()  SwitchToThread()
00177         #if (TBB_USE_GCC_BUILTINS && __TBB_GCC_BUILTIN_ATOMICS_PRESENT)
00178             #include "machine/gcc_generic.h"
00179         #elif __MINGW64__
00180             #include "machine/linux_intel64.h"
00181         #elif __MINGW32__
00182             #include "machine/linux_ia32.h"
00183         #endif
00184     #elif (TBB_USE_ICC_BUILTINS && __TBB_ICC_BUILTIN_ATOMICS_PRESENT)
00185         #include "machine/icc_generic.h"
00186     #elif defined(_M_IX86)
00187         #include "machine/windows_ia32.h"
00188     #elif defined(_M_X64) 
00189         #include "machine/windows_intel64.h"
00190     #elif _XBOX
00191         #include "machine/xbox360_ppc.h"
00192     #endif
00193 
00194 #ifdef _MANAGED
00195 #pragma managed(pop)
00196 #endif
00197 
00198 #elif __TBB_DEFINE_MIC
00199 
00200     #include "machine/mic_common.h"
00201     //TODO: check if ICC atomic intrinsics are available for MIC
00202     #include "machine/linux_intel64.h"
00203 
00204 #elif __linux__ || __FreeBSD__ || __NetBSD__
00205 
00206     #if (TBB_USE_GCC_BUILTINS && __TBB_GCC_BUILTIN_ATOMICS_PRESENT)
00207         #include "machine/gcc_generic.h"
00208     #elif (TBB_USE_ICC_BUILTINS && __TBB_ICC_BUILTIN_ATOMICS_PRESENT)
00209         #include "machine/icc_generic.h"
00210     #elif __i386__
00211         #include "machine/linux_ia32.h"
00212     #elif __x86_64__
00213         #include "machine/linux_intel64.h"
00214     #elif __ia64__
00215         #include "machine/linux_ia64.h"
00216     #elif __powerpc__
00217         #include "machine/mac_ppc.h"
00218     #elif __TBB_GCC_BUILTIN_ATOMICS_PRESENT
00219         #include "machine/gcc_generic.h"
00220     #endif
00221     #include "machine/linux_common.h"
00222 
00223 #elif __APPLE__
00224     //TODO:  TBB_USE_GCC_BUILTINS is not used for Mac, Sun, Aix
00225     #if (TBB_USE_ICC_BUILTINS && __TBB_ICC_BUILTIN_ATOMICS_PRESENT)
00226         #include "machine/icc_generic.h"
00227     #elif __i386__
00228         #include "machine/linux_ia32.h"
00229     #elif __x86_64__
00230         #include "machine/linux_intel64.h"
00231     #elif __POWERPC__
00232         #include "machine/mac_ppc.h"
00233     #endif
00234     #include "machine/macos_common.h"
00235 
00236 #elif _AIX
00237 
00238     #include "machine/ibm_aix51.h"
00239 
00240 #elif __sun || __SUNPRO_CC
00241 
00242     #define __asm__ asm
00243     #define __volatile__ volatile
00244 
00245     #if __i386  || __i386__
00246         #include "machine/linux_ia32.h"
00247     #elif __x86_64__
00248         #include "machine/linux_intel64.h"
00249     #elif __sparc
00250         #include "machine/sunos_sparc.h"
00251     #endif
00252     #include <sched.h>
00253 
00254     #define __TBB_Yield() sched_yield()
00255 
00256 #endif /* OS selection */
00257 
00258 #ifndef __TBB_64BIT_ATOMICS
00259     #define __TBB_64BIT_ATOMICS 1
00260 #endif
00261 
00262 //TODO: replace usage of these functions with usage of tbb::atomic, and then remove them
00263 //TODO: map functions with W suffix to use cast to tbb::atomic and according op, i.e. as_atomic().op()
00264 // Special atomic functions
00265 #if __TBB_USE_FENCED_ATOMICS
00266     #define __TBB_machine_cmpswp1   __TBB_machine_cmpswp1full_fence
00267     #define __TBB_machine_cmpswp2   __TBB_machine_cmpswp2full_fence
00268     #define __TBB_machine_cmpswp4   __TBB_machine_cmpswp4full_fence
00269     #define __TBB_machine_cmpswp8   __TBB_machine_cmpswp8full_fence
00270 
00271     #if __TBB_WORDSIZE==8
00272         #define __TBB_machine_fetchadd8             __TBB_machine_fetchadd8full_fence
00273         #define __TBB_machine_fetchstore8           __TBB_machine_fetchstore8full_fence
00274         #define __TBB_FetchAndAddWrelease(P,V)      __TBB_machine_fetchadd8release(P,V)
00275         #define __TBB_FetchAndIncrementWacquire(P)  __TBB_machine_fetchadd8acquire(P,1)
00276         #define __TBB_FetchAndDecrementWrelease(P)  __TBB_machine_fetchadd8release(P,(-1))
00277     #else
00278         #define __TBB_machine_fetchadd4             __TBB_machine_fetchadd4full_fence
00279         #define __TBB_machine_fetchstore4           __TBB_machine_fetchstore4full_fence
00280         #define __TBB_FetchAndAddWrelease(P,V)      __TBB_machine_fetchadd4release(P,V)
00281         #define __TBB_FetchAndIncrementWacquire(P)  __TBB_machine_fetchadd4acquire(P,1)
00282         #define __TBB_FetchAndDecrementWrelease(P)  __TBB_machine_fetchadd4release(P,(-1))
00283     #endif /* __TBB_WORDSIZE==4 */
00284 #else /* !__TBB_USE_FENCED_ATOMICS */
00285     #define __TBB_FetchAndAddWrelease(P,V)      __TBB_FetchAndAddW(P,V)
00286     #define __TBB_FetchAndIncrementWacquire(P)  __TBB_FetchAndAddW(P,1)
00287     #define __TBB_FetchAndDecrementWrelease(P)  __TBB_FetchAndAddW(P,(-1))
00288 #endif /* !__TBB_USE_FENCED_ATOMICS */
00289 
00290 #if __TBB_WORDSIZE==4
00291     #define __TBB_CompareAndSwapW(P,V,C)    __TBB_machine_cmpswp4(P,V,C)
00292     #define __TBB_FetchAndAddW(P,V)         __TBB_machine_fetchadd4(P,V)
00293     #define __TBB_FetchAndStoreW(P,V)       __TBB_machine_fetchstore4(P,V)
00294 #elif  __TBB_WORDSIZE==8
00295     #if __TBB_USE_GENERIC_DWORD_LOAD_STORE || __TBB_USE_GENERIC_DWORD_FETCH_ADD || __TBB_USE_GENERIC_DWORD_FETCH_STORE
00296         #error These macros should only be used on 32-bit platforms.
00297     #endif
00298 
00299     #define __TBB_CompareAndSwapW(P,V,C)    __TBB_machine_cmpswp8(P,V,C)
00300     #define __TBB_FetchAndAddW(P,V)         __TBB_machine_fetchadd8(P,V)
00301     #define __TBB_FetchAndStoreW(P,V)       __TBB_machine_fetchstore8(P,V)
00302 #else /* __TBB_WORDSIZE != 8 */
00303     #error Unsupported machine word size.
00304 #endif /* __TBB_WORDSIZE */
00305 
00306 #ifndef __TBB_Pause
00307     inline void __TBB_Pause(int32_t) {
00308         __TBB_Yield();
00309     }
00310 #endif
00311 
00312 namespace tbb {
00313 
00315 inline void atomic_fence () { __TBB_full_memory_fence(); }
00316 
00317 namespace internal {
00318 
00320 
00321 class atomic_backoff : no_copy {
00323 
00325     static const int32_t LOOPS_BEFORE_YIELD = 16;
00326     int32_t count;
00327 public:
00328     atomic_backoff() : count(1) {}
00329 
00331     void pause() {
00332         if( count<=LOOPS_BEFORE_YIELD ) {
00333             __TBB_Pause(count);
00334             // Pause twice as long the next time.
00335             count*=2;
00336         } else {
00337             // Pause is so long that we might as well yield CPU to scheduler.
00338             __TBB_Yield();
00339         }
00340     }
00341 
00342     // pause for a few times and then return false immediately.
00343     bool bounded_pause() {
00344         if( count<=LOOPS_BEFORE_YIELD ) {
00345             __TBB_Pause(count);
00346             // Pause twice as long the next time.
00347             count*=2;
00348             return true;
00349         } else {
00350             return false;
00351         }
00352     }
00353 
00354     void reset() {
00355         count = 1;
00356     }
00357 };
00358 
00360 
00361 template<typename T, typename U>
00362 void spin_wait_while_eq( const volatile T& location, U value ) {
00363     atomic_backoff backoff;
00364     while( location==value ) backoff.pause();
00365 }
00366 
00368 
00369 template<typename T, typename U>
00370 void spin_wait_until_eq( const volatile T& location, const U value ) {
00371     atomic_backoff backoff;
00372     while( location!=value ) backoff.pause();
00373 }
00374 
00375 //TODO: add static_assert for the requirements stated below
00376 //TODO: check if it works with signed types
00377 
00378 // there are following restrictions/limitations for this operation:
00379 //  - T should be unsigned, otherwise sign propagation will break correctness of bit manipulations.
00380 //  - T should be integer type of at most 4 bytes, for the casts and calculations to work.
00381 //      (Together, these rules limit applicability of Masked CAS to uint8_t and uint16_t only,
00382 //      as it does nothing useful for 4 bytes).
00383 //  - The operation assumes that the architecture consistently uses either little-endian or big-endian:
00384 //      it does not support mixed-endian or page-specific bi-endian architectures.
00385 // This function is the only use of __TBB_BIG_ENDIAN.
00386 #if (__TBB_BIG_ENDIAN!=-1)
00387     #if ( __TBB_USE_GENERIC_PART_WORD_CAS)
00388         #error generic implementation of part-word CAS was explicitly disabled for this configuration
00389     #endif
00390 template<typename T>
00391 inline T __TBB_MaskedCompareAndSwap (volatile T * const ptr, const T value, const T comparand ) {
00392     struct endianness{ static bool is_big_endian(){
00393         #ifndef __TBB_BIG_ENDIAN
00394             const uint32_t probe = 0x03020100;
00395             return (((const char*)(&probe))[0]==0x03);
00396         #elif (__TBB_BIG_ENDIAN==0) || (__TBB_BIG_ENDIAN==1)
00397             return __TBB_BIG_ENDIAN;
00398         #else
00399             #error unexpected value of __TBB_BIG_ENDIAN
00400         #endif
00401     }};
00402 
00403     const uint32_t byte_offset            = (uint32_t) ((uintptr_t)ptr & 0x3);
00404     volatile uint32_t * const aligned_ptr = (uint32_t*)((uintptr_t)ptr - byte_offset );
00405 
00406     // location of T within uint32_t for a C++ shift operation
00407     const uint32_t bits_to_shift     = 8*(endianness::is_big_endian() ? (4 - sizeof(T) - (byte_offset)) : byte_offset);
00408     const uint32_t mask              = (((uint32_t)1<<(sizeof(T)*8)) - 1 )<<bits_to_shift;
00409     const uint32_t shifted_comparand = ((uint32_t)comparand << bits_to_shift)&mask;
00410     const uint32_t shifted_value     = ((uint32_t)value     << bits_to_shift)&mask;
00411 
00412     for(atomic_backoff b;;b.pause()) {
00413         const uint32_t surroundings  = *aligned_ptr & ~mask ; // reload the aligned_ptr value which might change during the pause
00414         const uint32_t big_comparand = surroundings | shifted_comparand ;
00415         const uint32_t big_value     = surroundings | shifted_value     ;
00416         // __TBB_machine_cmpswp4 presumed to have full fence.
00417         // Cast shuts up /Wp64 warning
00418         const uint32_t big_result = (uint32_t)__TBB_machine_cmpswp4( aligned_ptr, big_value, big_comparand );
00419         if( big_result == big_comparand                    // CAS succeeded
00420           || ((big_result ^ big_comparand) & mask) != 0)   // CAS failed and the bits of interest have changed
00421         {
00422             return T((big_result & mask) >> bits_to_shift);
00423         }
00424         else continue;                                     // CAS failed but the bits of interest left unchanged
00425     }
00426 }
00427 #endif
00428 template<size_t S, typename T>
00429 inline T __TBB_CompareAndSwapGeneric (volatile void *ptr, T value, T comparand );
00430 
00431 template<>
00432 inline uint8_t __TBB_CompareAndSwapGeneric <1,uint8_t> (volatile void *ptr, uint8_t value, uint8_t comparand ) {
00433 #if __TBB_USE_GENERIC_PART_WORD_CAS
00434     return __TBB_MaskedCompareAndSwap<uint8_t>((volatile uint8_t *)ptr,value,comparand);
00435 #else
00436     return __TBB_machine_cmpswp1(ptr,value,comparand);
00437 #endif
00438 }
00439 
00440 template<>
00441 inline uint16_t __TBB_CompareAndSwapGeneric <2,uint16_t> (volatile void *ptr, uint16_t value, uint16_t comparand ) {
00442 #if __TBB_USE_GENERIC_PART_WORD_CAS
00443     return __TBB_MaskedCompareAndSwap<uint16_t>((volatile uint16_t *)ptr,value,comparand);
00444 #else
00445     return __TBB_machine_cmpswp2(ptr,value,comparand);
00446 #endif
00447 }
00448 
00449 template<>
00450 inline uint32_t __TBB_CompareAndSwapGeneric <4,uint32_t> (volatile void *ptr, uint32_t value, uint32_t comparand ) {
00451     // Cast shuts up /Wp64 warning
00452     return (uint32_t)__TBB_machine_cmpswp4(ptr,value,comparand);
00453 }
00454 
00455 #if __TBB_64BIT_ATOMICS
00456 template<>
00457 inline uint64_t __TBB_CompareAndSwapGeneric <8,uint64_t> (volatile void *ptr, uint64_t value, uint64_t comparand ) {
00458     return __TBB_machine_cmpswp8(ptr,value,comparand);
00459 }
00460 #endif
00461 
00462 template<size_t S, typename T>
00463 inline T __TBB_FetchAndAddGeneric (volatile void *ptr, T addend) {
00464     atomic_backoff b;
00465     T result;
00466     for(;;) {
00467         result = *reinterpret_cast<volatile T *>(ptr);
00468         // __TBB_CompareAndSwapGeneric presumed to have full fence.
00469         if( __TBB_CompareAndSwapGeneric<S,T> ( ptr, result+addend, result )==result )
00470             break;
00471         b.pause();
00472     }
00473     return result;
00474 }
00475 
00476 template<size_t S, typename T>
00477 inline T __TBB_FetchAndStoreGeneric (volatile void *ptr, T value) {
00478     atomic_backoff b;
00479     T result;
00480     for(;;) {
00481         result = *reinterpret_cast<volatile T *>(ptr);
00482         // __TBB_CompareAndSwapGeneric presumed to have full fence.
00483         if( __TBB_CompareAndSwapGeneric<S,T> ( ptr, value, result )==result )
00484             break;
00485         b.pause();
00486     }
00487     return result;
00488 }
00489 
00490 #if __TBB_USE_GENERIC_PART_WORD_CAS
00491 #define __TBB_machine_cmpswp1 tbb::internal::__TBB_CompareAndSwapGeneric<1,uint8_t>
00492 #define __TBB_machine_cmpswp2 tbb::internal::__TBB_CompareAndSwapGeneric<2,uint16_t>
00493 #endif
00494 
00495 #if __TBB_USE_GENERIC_FETCH_ADD || __TBB_USE_GENERIC_PART_WORD_FETCH_ADD
00496 #define __TBB_machine_fetchadd1 tbb::internal::__TBB_FetchAndAddGeneric<1,uint8_t>
00497 #define __TBB_machine_fetchadd2 tbb::internal::__TBB_FetchAndAddGeneric<2,uint16_t>
00498 #endif
00499 
00500 #if __TBB_USE_GENERIC_FETCH_ADD
00501 #define __TBB_machine_fetchadd4 tbb::internal::__TBB_FetchAndAddGeneric<4,uint32_t>
00502 #endif
00503 
00504 #if __TBB_USE_GENERIC_FETCH_ADD || __TBB_USE_GENERIC_DWORD_FETCH_ADD
00505 #define __TBB_machine_fetchadd8 tbb::internal::__TBB_FetchAndAddGeneric<8,uint64_t>
00506 #endif
00507 
00508 #if __TBB_USE_GENERIC_FETCH_STORE || __TBB_USE_GENERIC_PART_WORD_FETCH_STORE
00509 #define __TBB_machine_fetchstore1 tbb::internal::__TBB_FetchAndStoreGeneric<1,uint8_t>
00510 #define __TBB_machine_fetchstore2 tbb::internal::__TBB_FetchAndStoreGeneric<2,uint16_t>
00511 #endif
00512 
00513 #if __TBB_USE_GENERIC_FETCH_STORE
00514 #define __TBB_machine_fetchstore4 tbb::internal::__TBB_FetchAndStoreGeneric<4,uint32_t>
00515 #endif
00516 
00517 #if __TBB_USE_GENERIC_FETCH_STORE || __TBB_USE_GENERIC_DWORD_FETCH_STORE
00518 #define __TBB_machine_fetchstore8 tbb::internal::__TBB_FetchAndStoreGeneric<8,uint64_t>
00519 #endif
00520 
00521 #if __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE
00522 #define __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(S)                                             \
00523     atomic_selector<S>::word atomic_selector<S>::fetch_store ( volatile void* location, word value ) {  \
00524         return __TBB_machine_fetchstore##S( location, value );                                          \
00525     }
00526 
00527 __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(1)
00528 __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(2)
00529 __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(4)
00530 __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE(8)
00531 
00532 #undef __TBB_MACHINE_DEFINE_ATOMIC_SELECTOR_FETCH_STORE
00533 #endif /* __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE */
00534 
00535 #if __TBB_USE_GENERIC_DWORD_LOAD_STORE
00536 inline void __TBB_machine_store8 (volatile void *ptr, int64_t value) {
00537     for(;;) {
00538         int64_t result = *(int64_t *)ptr;
00539         if( __TBB_machine_cmpswp8(ptr,value,result)==result ) break;
00540     }
00541 }
00542 
00543 inline int64_t __TBB_machine_load8 (const volatile void *ptr) {
00544     // Comparand and new value may be anything, they only must be equal, and
00545     // the value should have a low probability to be actually found in 'location'.
00546     const int64_t anyvalue = 2305843009213693951LL;
00547     return __TBB_machine_cmpswp8(const_cast<volatile void *>(ptr),anyvalue,anyvalue);
00548 }
00549 #endif /* __TBB_USE_GENERIC_DWORD_LOAD_STORE */
00550 
00551 #if __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE
00552 
00558 template <typename T, size_t S>
00559 struct machine_load_store {
00560     static T load_with_acquire ( const volatile T& location ) {
00561         T to_return = location;
00562         __TBB_acquire_consistency_helper();
00563         return to_return;
00564     }
00565     static void store_with_release ( volatile T &location, T value ) {
00566         __TBB_release_consistency_helper();
00567         location = value;
00568     }
00569 };
00570 
00571 //in general, plain load and store of 32bit compiler is not atomic for 64bit types
00572 #if __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS
00573 template <typename T>
00574 struct machine_load_store<T,8> {
00575     static T load_with_acquire ( const volatile T& location ) {
00576         return (T)__TBB_machine_load8( (const volatile void*)&location );
00577     }
00578     static void store_with_release ( volatile T& location, T value ) {
00579         __TBB_machine_store8( (volatile void*)&location, (int64_t)value );
00580     }
00581 };
00582 #endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */
00583 #endif /* __TBB_USE_GENERIC_HALF_FENCED_LOAD_STORE */
00584 
00585 #if __TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE
00586 template <typename T, size_t S>
00587 struct machine_load_store_seq_cst {
00588     static T load ( const volatile T& location ) {
00589         __TBB_full_memory_fence();
00590         return machine_load_store<T,S>::load_with_acquire( location );
00591     }
00592 #if __TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE
00593     static void store ( volatile T &location, T value ) {
00594         atomic_selector<S>::fetch_store( (volatile void*)&location, (typename atomic_selector<S>::word)value );
00595     }
00596 #else /* !__TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE */
00597     static void store ( volatile T &location, T value ) {
00598         machine_load_store<T,S>::store_with_release( location, value );
00599         __TBB_full_memory_fence();
00600     }
00601 #endif /* !__TBB_USE_FETCHSTORE_AS_FULL_FENCED_STORE */
00602 };
00603 
00604 #if __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS
00605 
00607 template <typename T>
00608 struct machine_load_store_seq_cst<T,8> {
00609     static T load ( const volatile T& location ) {
00610         // Comparand and new value may be anything, they only must be equal, and
00611         // the value should have a low probability to be actually found in 'location'.
00612         const int64_t anyvalue = 2305843009213693951LL;
00613         return __TBB_machine_cmpswp8( (volatile void*)const_cast<volatile T*>(&location), anyvalue, anyvalue );
00614     }
00615     static void store ( volatile T &location, T value ) {
00616         int64_t result = (volatile int64_t&)location;
00617         while ( __TBB_machine_cmpswp8((volatile void*)&location, (int64_t)value, result) != result )
00618             result = (volatile int64_t&)location;
00619     }
00620 };
00621 #endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */
00622 #endif /*__TBB_USE_GENERIC_SEQUENTIAL_CONSISTENCY_LOAD_STORE */
00623 
00624 #if __TBB_USE_GENERIC_RELAXED_LOAD_STORE
00625 // Relaxed operations add volatile qualifier to prevent compiler from optimizing them out.
00629 template <typename T, size_t S>
00630 struct machine_load_store_relaxed {
00631     static inline T load ( const volatile T& location ) {
00632         return location;
00633     }
00634     static inline void store ( volatile T& location, T value ) {
00635         location = value;
00636     }
00637 };
00638 
00639 #if __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS
00640 template <typename T>
00641 struct machine_load_store_relaxed<T,8> {
00642     static inline T load ( const volatile T& location ) {
00643         return (T)__TBB_machine_load8( (const volatile void*)&location );
00644     }
00645     static inline void store ( volatile T& location, T value ) {
00646         __TBB_machine_store8( (volatile void*)&location, (int64_t)value );
00647     }
00648 };
00649 #endif /* __TBB_WORDSIZE==4 && __TBB_64BIT_ATOMICS */
00650 #endif /* __TBB_USE_GENERIC_RELAXED_LOAD_STORE */
00651 
00652 #undef __TBB_WORDSIZE //this macro is forbidden to use outside of atomic machinery
00653 
00654 template<typename T>
00655 inline T __TBB_load_with_acquire(const volatile T &location) {
00656     return machine_load_store<T,sizeof(T)>::load_with_acquire( location );
00657 }
00658 template<typename T, typename V>
00659 inline void __TBB_store_with_release(volatile T& location, V value) {
00660     machine_load_store<T,sizeof(T)>::store_with_release( location, T(value) );
00661 }
00663 inline void __TBB_store_with_release(volatile size_t& location, size_t value) {
00664     machine_load_store<size_t,sizeof(size_t)>::store_with_release( location, value );
00665 }
00666 
00667 template<typename T>
00668 inline T __TBB_load_full_fence(const volatile T &location) {
00669     return machine_load_store_seq_cst<T,sizeof(T)>::load( location );
00670 }
00671 template<typename T, typename V>
00672 inline void __TBB_store_full_fence(volatile T& location, V value) {
00673     machine_load_store_seq_cst<T,sizeof(T)>::store( location, T(value) );
00674 }
00676 inline void __TBB_store_full_fence(volatile size_t& location, size_t value) {
00677     machine_load_store_seq_cst<size_t,sizeof(size_t)>::store( location, value );
00678 }
00679 
00680 template<typename T>
00681 inline T __TBB_load_relaxed (const volatile T& location) {
00682     return machine_load_store_relaxed<T,sizeof(T)>::load( const_cast<T&>(location) );
00683 }
00684 template<typename T, typename V>
00685 inline void __TBB_store_relaxed ( volatile T& location, V value ) {
00686     machine_load_store_relaxed<T,sizeof(T)>::store( const_cast<T&>(location), T(value) );
00687 }
00689 inline void __TBB_store_relaxed ( volatile size_t& location, size_t value ) {
00690     machine_load_store_relaxed<size_t,sizeof(size_t)>::store( const_cast<size_t&>(location), value );
00691 }
00692 
00693 // Macro __TBB_TypeWithAlignmentAtLeastAsStrict(T) should be a type with alignment at least as
00694 // strict as type T.  The type should have a trivial default constructor and destructor, so that
00695 // arrays of that type can be declared without initializers.
00696 // It is correct (but perhaps a waste of space) if __TBB_TypeWithAlignmentAtLeastAsStrict(T) expands
00697 // to a type bigger than T.
00698 // The default definition here works on machines where integers are naturally aligned and the
00699 // strictest alignment is 64.
00700 #ifndef __TBB_TypeWithAlignmentAtLeastAsStrict
00701 
00702 #if __TBB_ATTRIBUTE_ALIGNED_PRESENT
00703 
00704 #define __TBB_DefineTypeWithAlignment(PowerOf2)       \
00705 struct __TBB_machine_type_with_alignment_##PowerOf2 { \
00706     uint32_t member[PowerOf2/sizeof(uint32_t)];       \
00707 } __attribute__((aligned(PowerOf2)));
00708 #define __TBB_alignof(T) __alignof__(T)
00709 
00710 #elif __TBB_DECLSPEC_ALIGN_PRESENT
00711 
00712 #define __TBB_DefineTypeWithAlignment(PowerOf2)       \
00713 __declspec(align(PowerOf2))                           \
00714 struct __TBB_machine_type_with_alignment_##PowerOf2 { \
00715     uint32_t member[PowerOf2/sizeof(uint32_t)];       \
00716 };
00717 #define __TBB_alignof(T) __alignof(T)
00718 
00719 #else /* A compiler with unknown syntax for data alignment */
00720 #error Must define __TBB_TypeWithAlignmentAtLeastAsStrict(T)
00721 #endif
00722 
00723 /* Now declare types aligned to useful powers of two */
00724 // TODO: Is __TBB_DefineTypeWithAlignment(8) needed on 32 bit platforms?
00725 __TBB_DefineTypeWithAlignment(16)
00726 __TBB_DefineTypeWithAlignment(32)
00727 __TBB_DefineTypeWithAlignment(64)
00728 
00729 typedef __TBB_machine_type_with_alignment_64 __TBB_machine_type_with_strictest_alignment;
00730 
00731 // Primary template is a declaration of incomplete type so that it fails with unknown alignments
00732 template<size_t N> struct type_with_alignment;
00733 
00734 // Specializations for allowed alignments
00735 template<> struct type_with_alignment<1> { char member; };
00736 template<> struct type_with_alignment<2> { uint16_t member; };
00737 template<> struct type_with_alignment<4> { uint32_t member; };
00738 template<> struct type_with_alignment<8> { uint64_t member; };
00739 template<> struct type_with_alignment<16> {__TBB_machine_type_with_alignment_16 member; };
00740 template<> struct type_with_alignment<32> {__TBB_machine_type_with_alignment_32 member; };
00741 template<> struct type_with_alignment<64> {__TBB_machine_type_with_alignment_64 member; };
00742 
00743 #if __TBB_ALIGNOF_NOT_INSTANTIATED_TYPES_BROKEN
00745 
00747 template<size_t Size, typename T>
00748 struct work_around_alignment_bug {
00749     static const size_t alignment = __TBB_alignof(T);
00750 };
00751 #define __TBB_TypeWithAlignmentAtLeastAsStrict(T) tbb::internal::type_with_alignment<tbb::internal::work_around_alignment_bug<sizeof(T),T>::alignment>
00752 #else
00753 #define __TBB_TypeWithAlignmentAtLeastAsStrict(T) tbb::internal::type_with_alignment<__TBB_alignof(T)>
00754 #endif  /* __TBB_ALIGNOF_NOT_INSTANTIATED_TYPES_BROKEN */
00755 
00756 #endif  /* __TBB_TypeWithAlignmentAtLeastAsStrict */
00757 
00758 // Template class here is to avoid instantiation of the static data for modules that don't use it
00759 template<typename T>
00760 struct reverse {
00761     static const T byte_table[256];
00762 };
00763 // An efficient implementation of the reverse function utilizes a 2^8 lookup table holding the bit-reversed
00764 // values of [0..2^8 - 1]. Those values can also be computed on the fly at a slightly higher cost.
00765 template<typename T>
00766 const T reverse<T>::byte_table[256] = {
00767     0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0, 0x70, 0xF0,
00768     0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8, 0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8,
00769     0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4, 0x64, 0xE4, 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4,
00770     0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC, 0x1C, 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC,
00771     0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2, 0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2,
00772     0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA,
00773     0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, 0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6,
00774     0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE, 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE,
00775     0x01, 0x81, 0x41, 0xC1, 0x21, 0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1,
00776     0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, 0x19, 0x99, 0x59, 0xD9, 0x39, 0xB9, 0x79, 0xF9,
00777     0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5, 0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5,
00778     0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD, 0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD,
00779     0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3,
00780     0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB, 0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB,
00781     0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7,
00782     0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF, 0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF
00783 };
00784 
00785 } // namespace internal
00786 } // namespace tbb
00787 
00788 // Preserving access to legacy APIs
00789 using tbb::internal::__TBB_load_with_acquire;
00790 using tbb::internal::__TBB_store_with_release;
00791 
00792 // Mapping historically used names to the ones expected by atomic_load_store_traits
00793 #define __TBB_load_acquire  __TBB_load_with_acquire
00794 #define __TBB_store_release __TBB_store_with_release
00795 
00796 #ifndef __TBB_Log2
00797 inline intptr_t __TBB_Log2( uintptr_t x ) {
00798     if( x==0 ) return -1;
00799     intptr_t result = 0;
00800     uintptr_t tmp;
00801 
00802     if( sizeof(x)>4 && (tmp = ((uint64_t)x)>>32)) { x=tmp; result += 32; }
00803     if( (tmp = x>>16) ) { x=tmp; result += 16; }
00804     if( (tmp = x>>8) )  { x=tmp; result += 8; }
00805     if( (tmp = x>>4) )  { x=tmp; result += 4; }
00806     if( (tmp = x>>2) )  { x=tmp; result += 2; }
00807     return (x&2)? result+1: result;
00808 }
00809 #endif
00810 
00811 #ifndef __TBB_AtomicOR
00812 inline void __TBB_AtomicOR( volatile void *operand, uintptr_t addend ) {
00813     tbb::internal::atomic_backoff b;
00814     for(;;) {
00815         uintptr_t tmp = *(volatile uintptr_t *)operand;
00816         uintptr_t result = __TBB_CompareAndSwapW(operand, tmp|addend, tmp);
00817         if( result==tmp ) break;
00818         b.pause();
00819     }
00820 }
00821 #endif
00822 
00823 #ifndef __TBB_AtomicAND
00824 inline void __TBB_AtomicAND( volatile void *operand, uintptr_t addend ) {
00825     tbb::internal::atomic_backoff b;
00826     for(;;) {
00827         uintptr_t tmp = *(volatile uintptr_t *)operand;
00828         uintptr_t result = __TBB_CompareAndSwapW(operand, tmp&addend, tmp);
00829         if( result==tmp ) break;
00830         b.pause();
00831     }
00832 }
00833 #endif
00834 
00835 #ifndef __TBB_Flag
00836 typedef unsigned char __TBB_Flag;
00837 #endif
00838 typedef __TBB_atomic __TBB_Flag __TBB_atomic_flag;
00839 
00840 #ifndef __TBB_TryLockByte
00841 inline bool __TBB_TryLockByte( __TBB_atomic_flag &flag ) {
00842     return __TBB_machine_cmpswp1(&flag,1,0)==0;
00843 }
00844 #endif
00845 
00846 #ifndef __TBB_LockByte
00847 inline __TBB_Flag __TBB_LockByte( __TBB_atomic_flag& flag ) {
00848     if ( !__TBB_TryLockByte(flag) ) {
00849         tbb::internal::atomic_backoff b;
00850         do {
00851             b.pause();
00852         } while ( !__TBB_TryLockByte(flag) );
00853     }
00854     return 0;
00855 }
00856 #endif
00857 
00858 #ifndef  __TBB_UnlockByte
00859 #define __TBB_UnlockByte __TBB_store_with_release
00860 #endif
00861 
00862 #ifndef __TBB_ReverseByte
00863 inline unsigned char __TBB_ReverseByte(unsigned char src) {
00864     return tbb::internal::reverse<unsigned char>::byte_table[src];
00865 }
00866 #endif
00867 
00868 template<typename T>
00869 T __TBB_ReverseBits(T src) {
00870     T dst;
00871     unsigned char *original = (unsigned char *) &src;
00872     unsigned char *reversed = (unsigned char *) &dst;
00873 
00874     for( int i = sizeof(T)-1; i >= 0; i-- )
00875         reversed[i] = __TBB_ReverseByte( original[sizeof(T)-i-1] );
00876 
00877     return dst;
00878 }
00879 
00880 #endif /* __TBB_machine_H */

Copyright © 2005-2012 Intel Corporation. All Rights Reserved.

Intel, Pentium, Intel Xeon, Itanium, Intel XScale and VTune are registered trademarks or trademarks of Intel Corporation or its subsidiaries in the United States and other countries.

* Other names and brands may be claimed as the property of others.