00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032 #ifndef _GLIBCXX_PARALLEL_COMPATIBILITY_H
00033 #define _GLIBCXX_PARALLEL_COMPATIBILITY_H 1
00034
00035 #include <parallel/types.h>
00036 #include <parallel/base.h>
00037
00038 #if defined(__SUNPRO_CC) && defined(__sparc)
00039 #include <sys/atomic.h>
00040 #endif
00041
00042 #if !defined(_WIN32) || defined (__CYGWIN__)
00043 #include <sched.h>
00044 #endif
00045
00046 #if defined(_MSC_VER)
00047 #include <Windows.h>
00048 #include <intrin.h>
00049 #undef max
00050 #undef min
00051 #endif
00052
00053 #ifdef __MINGW32__
00054
00055
00056
00057 extern "C"
00058 __attribute((dllimport)) void __attribute__((stdcall)) Sleep (unsigned long);
00059 #endif
00060
00061 namespace __gnu_parallel
00062 {
00063 #if defined(__ICC)
00064 template<typename _MustBeInt = int>
00065 int32_t __faa32(int32_t* __x, int32_t __inc)
00066 {
00067 asm volatile("lock xadd %0,%1"
00068 : "=__r" (__inc), "=__m" (*__x)
00069 : "0" (__inc)
00070 : "memory");
00071 return __inc;
00072 }
00073 #if defined(__x86_64)
00074 template<typename _MustBeInt = int>
00075 int64_t __faa64(int64_t* __x, int64_t __inc)
00076 {
00077 asm volatile("lock xadd %0,%1"
00078 : "=__r" (__inc), "=__m" (*__x)
00079 : "0" (__inc)
00080 : "memory");
00081 return __inc;
00082 }
00083 #endif
00084 #endif
00085
00086
00087
00088
00089
00090
00091
00092
00093
00094 inline int32_t
00095 __fetch_and_add_32(volatile int32_t* __ptr, int32_t __addend)
00096 {
00097 #if defined(__ICC) //x86 version
00098 return _InterlockedExchangeAdd((void*)__ptr, __addend);
00099 #elif defined(__ECC) //IA-64 version
00100 return _InterlockedExchangeAdd((void*)__ptr, __addend);
00101 #elif defined(__ICL) || defined(_MSC_VER)
00102 return _InterlockedExchangeAdd(reinterpret_cast<volatile long*>(__ptr),
00103 __addend);
00104 #elif defined(__GNUC__)
00105 return __sync_fetch_and_add(__ptr, __addend);
00106 #elif defined(__SUNPRO_CC) && defined(__sparc)
00107 volatile int32_t __before, __after;
00108 do
00109 {
00110 __before = *__ptr;
00111 __after = __before + __addend;
00112 } while (atomic_cas_32((volatile unsigned int*)__ptr, __before,
00113 __after) != __before);
00114 return __before;
00115 #else //fallback, slow
00116 #pragma message("slow __fetch_and_add_32")
00117 int32_t __res;
00118 #pragma omp critical
00119 {
00120 __res = *__ptr;
00121 *(__ptr) += __addend;
00122 }
00123 return __res;
00124 #endif
00125 }
00126
00127
00128
00129
00130
00131
00132
00133 inline int64_t
00134 __fetch_and_add_64(volatile int64_t* __ptr, int64_t __addend)
00135 {
00136 #if defined(__ICC) && defined(__x86_64) //x86 version
00137 return __faa64<int>((int64_t*)__ptr, __addend);
00138 #elif defined(__ECC) //IA-64 version
00139 return _InterlockedExchangeAdd64((void*)__ptr, __addend);
00140 #elif defined(__ICL) || defined(_MSC_VER)
00141 #ifndef _WIN64
00142 _GLIBCXX_PARALLEL_ASSERT(false);
00143 return 0;
00144 #else
00145 return _InterlockedExchangeAdd64(__ptr, __addend);
00146 #endif
00147 #elif defined(__GNUC__) && defined(__x86_64)
00148 return __sync_fetch_and_add(__ptr, __addend);
00149 #elif defined(__GNUC__) && defined(__i386) && \
00150 (defined(__i686) || defined(__pentium4) || defined(__athlon))
00151 return __sync_fetch_and_add(__ptr, __addend);
00152 #elif defined(__SUNPRO_CC) && defined(__sparc)
00153 volatile int64_t __before, __after;
00154 do
00155 {
00156 __before = *__ptr;
00157 __after = __before + __addend;
00158 } while (atomic_cas_64((volatile unsigned long long*)__ptr, __before,
00159 __after) != __before);
00160 return __before;
00161 #else //fallback, slow
00162 #if defined(__GNUC__) && defined(__i386)
00163
00164
00165 #endif
00166 #pragma message("slow __fetch_and_add_64")
00167 int64_t __res;
00168 #pragma omp critical
00169 {
00170 __res = *__ptr;
00171 *(__ptr) += __addend;
00172 }
00173 return __res;
00174 #endif
00175 }
00176
00177
00178
00179
00180
00181
00182
00183 template<typename _Tp>
00184 inline _Tp
00185 __fetch_and_add(volatile _Tp* __ptr, _Tp __addend)
00186 {
00187 if (sizeof(_Tp) == sizeof(int32_t))
00188 return
00189 (_Tp)__fetch_and_add_32((volatile int32_t*) __ptr, (int32_t)__addend);
00190 else if (sizeof(_Tp) == sizeof(int64_t))
00191 return
00192 (_Tp)__fetch_and_add_64((volatile int64_t*) __ptr, (int64_t)__addend);
00193 else
00194 _GLIBCXX_PARALLEL_ASSERT(false);
00195 }
00196
00197
00198 #if defined(__ICC)
00199
00200 template<typename _MustBeInt = int>
00201 inline int32_t
00202 __cas32(volatile int32_t* __ptr, int32_t __old, int32_t __nw)
00203 {
00204 int32_t __before;
00205 __asm__ __volatile__("lock; cmpxchgl %1,%2"
00206 : "=a"(__before)
00207 : "q"(__nw), "__m"(*(volatile long long*)(__ptr)),
00208 "0"(__old)
00209 : "memory");
00210 return __before;
00211 }
00212
00213 #if defined(__x86_64)
00214 template<typename _MustBeInt = int>
00215 inline int64_t
00216 __cas64(volatile int64_t *__ptr, int64_t __old, int64_t __nw)
00217 {
00218 int64_t __before;
00219 __asm__ __volatile__("lock; cmpxchgq %1,%2"
00220 : "=a"(__before)
00221 : "q"(__nw), "__m"(*(volatile long long*)(__ptr)),
00222 "0"(__old)
00223 : "memory");
00224 return __before;
00225 }
00226 #endif
00227
00228 #endif
00229
00230
00231
00232
00233
00234
00235
00236
00237
00238 inline bool
00239 __compare_and_swap_32(volatile int32_t* __ptr, int32_t __comparand,
00240 int32_t __replacement)
00241 {
00242 #if defined(__ICC) //x86 version
00243 return _InterlockedCompareExchange((void*)__ptr, __replacement,
00244 __comparand) == __comparand;
00245 #elif defined(__ECC) //IA-64 version
00246 return _InterlockedCompareExchange((void*)__ptr, __replacement,
00247 __comparand) == __comparand;
00248 #elif defined(__ICL) || defined(_MSC_VER)
00249 return _InterlockedCompareExchange(
00250 reinterpret_cast<volatile long*>(__ptr),
00251 __replacement, __comparand)
00252 == __comparand;
00253 #elif defined(__GNUC__)
00254 return __sync_bool_compare_and_swap(__ptr, __comparand, __replacement);
00255 #elif defined(__SUNPRO_CC) && defined(__sparc)
00256 return atomic_cas_32((volatile unsigned int*)__ptr, __comparand,
00257 __replacement) == __comparand;
00258 #else
00259 #pragma message("slow __compare_and_swap_32")
00260 bool __res = false;
00261 #pragma omp critical
00262 {
00263 if (*__ptr == __comparand)
00264 {
00265 *__ptr = __replacement;
00266 __res = true;
00267 }
00268 }
00269 return __res;
00270 #endif
00271 }
00272
00273
00274
00275
00276
00277
00278
00279
00280
00281 inline bool
00282 __compare_and_swap_64(volatile int64_t* __ptr, int64_t __comparand,
00283 int64_t __replacement)
00284 {
00285 #if defined(__ICC) && defined(__x86_64) //x86 version
00286 return __cas64<int>(__ptr, __comparand, __replacement) == __comparand;
00287 #elif defined(__ECC) //IA-64 version
00288 return _InterlockedCompareExchange64((void*)__ptr, __replacement,
00289 __comparand) == __comparand;
00290 #elif defined(__ICL) || defined(_MSC_VER)
00291 #ifndef _WIN64
00292 _GLIBCXX_PARALLEL_ASSERT(false);
00293 return 0;
00294 #else
00295 return _InterlockedCompareExchange64(__ptr, __replacement,
00296 __comparand) == __comparand;
00297 #endif
00298
00299 #elif defined(__GNUC__) && defined(__x86_64)
00300 return __sync_bool_compare_and_swap(__ptr, __comparand, __replacement);
00301 #elif defined(__GNUC__) && defined(__i386) && \
00302 (defined(__i686) || defined(__pentium4) || defined(__athlon))
00303 return __sync_bool_compare_and_swap(__ptr, __comparand, __replacement);
00304 #elif defined(__SUNPRO_CC) && defined(__sparc)
00305 return atomic_cas_64((volatile unsigned long long*)__ptr,
00306 __comparand, __replacement) == __comparand;
00307 #else
00308 #if defined(__GNUC__) && defined(__i386)
00309
00310
00311 #endif
00312 #pragma message("slow __compare_and_swap_64")
00313 bool __res = false;
00314 #pragma omp critical
00315 {
00316 if (*__ptr == __comparand)
00317 {
00318 *__ptr = __replacement;
00319 __res = true;
00320 }
00321 }
00322 return __res;
00323 #endif
00324 }
00325
00326
00327
00328
00329
00330
00331
00332
00333 template<typename _Tp>
00334 inline bool
00335 __compare_and_swap(volatile _Tp* __ptr, _Tp __comparand, _Tp __replacement)
00336 {
00337 if (sizeof(_Tp) == sizeof(int32_t))
00338 return __compare_and_swap_32((volatile int32_t*) __ptr,
00339 (int32_t)__comparand,
00340 (int32_t)__replacement);
00341 else if (sizeof(_Tp) == sizeof(int64_t))
00342 return __compare_and_swap_64((volatile int64_t*) __ptr,
00343 (int64_t)__comparand,
00344 (int64_t)__replacement);
00345 else
00346 _GLIBCXX_PARALLEL_ASSERT(false);
00347 }
00348
00349
00350
00351 inline void
00352 __yield()
00353 {
00354 #if defined (_WIN32) && !defined (__CYGWIN__)
00355 Sleep(0);
00356 #else
00357 sched_yield();
00358 #endif
00359 }
00360 }
00361
00362 #endif