C++ Library Extensions 2022.12.09
To help learn modern C++ programming
tpf_parallel.hpp
Go to the documentation of this file.
1
12#ifndef _TPF_PARALLEL_HPP
13#define _TPF_PARALLEL_HPP
14
15#ifndef NOMINMAX
16#define NOMINMAX
17#endif
18
19#ifdef _MSVC_LANG
20 #if _MSVC_LANG < 201703L
21 #error This libary requires C++17 Standard (Visual Studio 2017).
22 #endif
23#else
24
25 #if __cplusplus < 201703
26 #error This library requires C++17 Standard (GNU g++ version 8.0 or clang++ version 8.0 above)
27 #endif // end of __cplusplus
28
29#endif // end of _MSVC_LANG
30
31#ifndef TBB_SUPPRESS_DEPRECATED_MESSAGES
32 #define TBB_SUPPRESS_DEPRECATED_MESSAGES 1
33#endif // end of TBB_SUPPRESS_DEPRECATED_MESSAGES
34
35#if !defined(DEBUGGING_MODE)
36 #if defined(_DEBUG)
37 #define DEBUGGING_MODE 1
38 #elif defined(TBB_USE_DEBUG)
39 #if TBB_USE_DEBUG==1
40 #define DEBUGGING_MODE 1
41 #else
42 #define DEBUGGING_MODE 0
43 #endif // TBB_USE_DEBUG==1
44 #else
45 #define DEBUGGING_MODE 0
46 #endif // defined(_DEBUG)
47#endif // !defined(DEBUGGING_MODE)
48
49#ifndef LIB_INTERNAL_PutDebugMessageAndExit
50 #define LIB_INTERNAL_PutDebugMessageAndExit(error_msg) \
51 { std::cerr<<"File name ["<<__FILE__<< "]\n" \
52 << "Line " << __LINE__ << " : " << error_msg; \
53 std::exit(1); }
54#endif // end of LIB_INTERNAL_PutDebugMessageAndExit
55
56#ifndef Tpf_PutDebugMessageAndExit
57 #define Tpf_PutDebugMessageAndExit(error_msg) LIB_INTERNAL_PutDebugMessageAndExit(error_msg)
58#endif // end of Tpf_PutDebugMessageAndExit
59
60#include <tpf_types.hpp>
61#include <atomic>
62#include <algorithm>
63#include <execution>
64#include <tbb/tbb.h>
65
70namespace tpf
71{
76 namespace parallel
77 {
78 /*
79 Feature testing (C++20)
80 https://en.cppreference.com/w/cpp/feature_test
81 */
82
83 /*
84 014 - Cache-Aligned Elements of C++ STL Container with cache_wrapper class
85 https://www.youtube.com/watch?v=oNHvpH1eZiM&list=PL1_C6uWTeBDFiAKI2NWGVV8J2_U9-4GXl&index=15
86
87 013 - Cache-Aligned STL Container with Intel TBB cache_aligned_allocator, C++20 Feature testing
88 https://www.youtube.com/watch?v=__XCCzSes0k&list=PL1_C6uWTeBDFiAKI2NWGVV8J2_U9-4GXl&index=14
89
90 012 - Cache aligned pointer with operator new( std::align_val_t{64}, std::nothrow)
91 https://www.youtube.com/watch?v=hJ3di5bq3xU&list=PL1_C6uWTeBDFiAKI2NWGVV8J2_U9-4GXl&index=13
92
93 011 - std::jthread The C++20 Thread Launcher, Experiment on alignas
94 https://www.youtube.com/watch?v=KUwXn_axEME&list=PL1_C6uWTeBDFiAKI2NWGVV8J2_U9-4GXl&index=12
95
96 010 - C++ Threading Books, Parallel Algorithm, atomic, mutex, scoped_lock, ranges, anonymous union
97 https://www.youtube.com/watch?v=DLyQ_VEFguI&list=PL1_C6uWTeBDFiAKI2NWGVV8J2_U9-4GXl&index=11
98
99 009 - How to use 100% of C++ placement new, delete, nothrow new, delete operators!
100 https://www.youtube.com/watch?v=EEBuoFG3q4M&list=PL1_C6uWTeBDFiAKI2NWGVV8J2_U9-4GXl&index=10
101
102 008 - Overload Class-specific operator new / delete, Memory Leak Detection on Command Line
103 https://www.youtube.com/watch?v=19Xg55hI6wg&list=PL1_C6uWTeBDFiAKI2NWGVV8J2_U9-4GXl&index=9
104
105 007 - Experiment on Global operator new, dynamic allocation with Zero Byte
106 https://www.youtube.com/watch?v=gfZL5nDthFQ&list=PL1_C6uWTeBDFiAKI2NWGVV8J2_U9-4GXl&index=8
107
108 006 - Benchmarking std::map vs. std::unordered_map - random string generator: random_words
109 https://www.youtube.com/watch?v=eS1hwYS2FQk&list=PL1_C6uWTeBDFiAKI2NWGVV8J2_U9-4GXl&index=7
110
111 005 - Experiment on Polymorphic Memory Resource (PMR) - C++17 - The Complete Guide
112 https://www.youtube.com/watch?v=5gA3YbcLzKI&list=PL1_C6uWTeBDFiAKI2NWGVV8J2_U9-4GXl&index=5
113 */
114
115 #ifdef __cpp_lib_hardware_interference_size
116 inline constexpr auto cache_line_size = std::hardware_constructive_interference_size;
117 #else
118 inline constexpr auto cache_line_size = 64;
119 #endif // end of __cpp_lib_hardware_interference_size
120
121 inline constexpr size_t default_alignment()
122 { return __STDCPP_DEFAULT_NEW_ALIGNMENT__; }
123
124 template<typename ElementType, typename Operation>
125 inline void atomic_operation(std::atomic<ElementType>& atom, Operation&& opr)
126 {
127 ElementType old_value = atom;
128 ElementType new_value = opr(old_value);
129
130 while(!atom.compare_exchange_strong(old_value, new_value))
131 {
132 old_value = atom;
133 new_value = opr(old_value);
134 }
135 }
136
137 #ifndef _TPF_ITERATOR
138 #define _TPF_ITERATOR
139
140 #ifdef __TBB_iterators_H
141
142 template <typename IntType>
144
145 #else
146 // By courtesy of Intel's Threading Building Blocks team!!
147 // With best respect and gratitude for Intel Corporation.
148 template <typename IntType>
150 static_assert(std::numeric_limits<IntType>::is_integer, "Cannot instantiate counting_iterator with a non-integer type");
151 public:
152 typedef typename std::make_signed<IntType>::type difference_type;
153 typedef IntType value_type;
154 typedef const IntType* pointer;
155 typedef const IntType& reference;
156 typedef std::random_access_iterator_tag iterator_category;
157
158 counting_iterator() : my_counter() {}
159 explicit counting_iterator(IntType init) : my_counter(init) {}
160
161 reference operator*() const { return my_counter; }
162 value_type operator[](difference_type i) const { return *(*this + i); }
163
164 difference_type operator-(const counting_iterator& it) const { return my_counter - it.my_counter; }
165
166 counting_iterator& operator+=(difference_type forward) { my_counter += forward; return *this; }
167 counting_iterator& operator-=(difference_type backward) { return *this += -backward; }
168 counting_iterator& operator++() { return *this += 1; }
169 counting_iterator& operator--() { return *this -= 1; }
170
172 counting_iterator it(*this);
173 ++(*this);
174 return it;
175 }
177 counting_iterator it(*this);
178 --(*this);
179 return it;
180 }
181
182 counting_iterator operator-(difference_type backward) const { return counting_iterator(my_counter - backward); }
183 counting_iterator operator+(difference_type forward) const { return counting_iterator(my_counter + forward); }
184 friend counting_iterator operator+(difference_type forward, const counting_iterator it) { return it + forward; }
185
186 bool operator==(const counting_iterator& it) const { return *this - it == 0; }
187 bool operator!=(const counting_iterator& it) const { return !(*this == it); }
188 bool operator<(const counting_iterator& it) const {return *this - it < 0; }
189 bool operator>(const counting_iterator& it) const { return it < *this; }
190 bool operator<=(const counting_iterator& it) const { return !(*this > it); }
191 bool operator>=(const counting_iterator& it) const { return !(*this < it); }
192
193 private:
194 IntType my_counter;
195
196 }; // end of class counting_iterator
197 #endif // end of __TBB_iterators_H
198
199 #endif // end of _TPF_ITERATOR
200
201 // returns the least power of 2 greater than or equal to n
202 inline constexpr unsigned long long
203 least_power_of_2_ge_n(unsigned long long n)
204 {
205 if(n == 1) return 0;
206
207 size_t power_of_2 = 1;
208
209 while(power_of_2 < n)
210 {
211 power_of_2 <<= 1; // power_of_2 *= 2
212 }
213 return power_of_2;
214 }
215
216 inline constexpr unsigned long long
217 fast_integer_power(unsigned long long x, unsigned long long n)
218 {
219 if(n == 0) return 1;
220
221 unsigned long long b = 1;
222
223 while(n)
224 {
225 if( n & 1 ) b *= x;
226
227 n >>= 1; x *= x;
228 }
229
230 return b;
231 }
232
233 template<typename Type, size_t CacheSize = cache_line_size>
234 struct alignas(CacheSize) cache_wrapper
235 {
236 private:
237 Type m_value;
238
239 public:
240 cache_wrapper(Type v = Type{}): m_value{ std::move(v) } { }
241
242 cache_wrapper(const cache_wrapper&) = default;
244
247
248 const Type& get() const noexcept
249 {
250 return this->m_value;
251 }
252
253 operator const Type&() const noexcept
254 {
255 return this->m_value;
256 }
257
258 operator Type&() noexcept
259 {
260 return this->m_value;
261 }
262
263 Type* operator&() noexcept
264 {
265 return &this->m_value;
266 }
267
268 const Type* operator&() const noexcept
269 {
270 return &this->m_value;
271 }
272
273 friend bool operator < (const cache_wrapper& left, const cache_wrapper& right)
274 {
275 return left.m_value < right.m_value;
276 }
277
278 friend bool operator <= (const cache_wrapper& left, const cache_wrapper& right)
279 {
280 return left.m_value <= right.m_value;
281 }
282
283 friend bool operator > (const cache_wrapper& left, const cache_wrapper& right)
284 {
285 return left.m_value > right.m_value;
286 }
287
288 friend bool operator >= (const cache_wrapper& left, const cache_wrapper& right)
289 {
290 return left.m_value >= right.m_value;
291 }
292
293 friend bool operator == (const cache_wrapper& left, const cache_wrapper& right)
294 {
295 return left.m_value == right.m_value;
296 }
297
298 friend bool operator != (const cache_wrapper& left, const cache_wrapper& right)
299 {
300 return left.m_value == right.m_value;
301 }
302
303 }; // end of class cache_wrapper
304
305 template<typename T, bool = false>
306 struct alignas(cache_line_size) st_cache_wrapper // non-class type
307 {
309 };
310
311 template<typename T>
312 struct alignas(cache_line_size) st_cache_wrapper<T, true> // class type
313 {
314 using type = T;
315 };
316
317 template<typename T>
319
320 template<typename Type, template<typename, typename...> class Container>
321 using cache_aligned_container_t = Container< cache_wrapper_t<Type>,
322 tbb::cache_aligned_allocator<cache_wrapper_t<Type>>>;
323
324 template<typename Type>
326
327 template<typename Type, size_t Size>
328 struct alignas(cache_line_size) st_cache_aligned_array:
329 public std::array<cache_wrapper_t<Type>, Size> { };
330
331 template<typename Type, size_t Size>
333
334 template<typename Type>
335 bool is_cache_aligned(Type* ptr)
336 {
337 /*
338 012 - Cache aligned pointer with operator new( std::align_val_t{64}, std::nothrow)
339 https://www.youtube.com/watch?v=hJ3di5bq3xU&list=PL1_C6uWTeBDFiAKI2NWGVV8J2_U9-4GXl&index=13
340
341 011 - std::jthread The C++20 Thread Launcher, Experiment on alignas
342 https://www.youtube.com/watch?v=KUwXn_axEME&list=PL1_C6uWTeBDFiAKI2NWGVV8J2_U9-4GXl&index=12
343 */
344
345 return (((std::size_t)(char*)ptr) % cache_line_size) == 0;
346 }
347
348 template<typename Type1, typename Type2>
349 bool is_cache_aligned(Type1* ptr1, Type2* ptr2)
350 {
351 return (((std::size_t)((char*)ptr1 - (char*)ptr2))% cache_line_size) == 0;
352 }
353
354 template<typename Type1, typename Type2>
355 bool is_cache_aligned(Type1& value1, Type2& value2)
356 {
357 return is_cache_aligned(&value1, &value2);
358 }
359
360 template<typename Type>
361 bool is_cache_aligned(Type& value)
362 {
363 return is_cache_aligned(&value);
364 }
365 // The programmer can modify or adjust this value
366 constexpr size_t ConcurrencyLimit = 10;
367
368 // The programmer should not modify this value
369 constexpr size_t ForceSequential = ConcurrencyLimit - 1;
370 constexpr size_t ForceParallel = ConcurrencyLimit + 1;
371
372 inline bool go_parallel(size_t element_count, size_t greater_than)
373 {
374 return (element_count > std::thread::hardware_concurrency() * greater_than);
375 }
376
377 template<typename IteratorType>
378 inline bool go_parallel(IteratorType begin, IteratorType end, size_t greater_than)
379 {
380 auto distance = std::distance(begin, end);
381 if(distance < 0) distance = - distance;
382
383 size_t element_count = (size_t)distance;
384
385 // return (element_count > std::thread::hardware_concurrency() * greater_than);
386 return go_parallel(element_count, greater_than);
387 }
388
389 template<typename ContainerType>
390 inline bool go_parallel(ContainerType&& container, size_t greater_than)
391 {
392 size_t element_count = container.size();
393
394 //return (element_count > std::thread::hardware_concurrency()*greater_than);
395 return go_parallel(element_count, greater_than);
396 }
397
398 template<typename IndexType, typename CallbackType>
399 void for_index(IndexType begin_index, IndexType end_index, CallbackType&& callback,
400 size_t greater_than = ConcurrencyLimit)
401 {
402 auto counting_begin = counting_iterator<size_t>{(size_t)begin_index};
403 auto counting_end = counting_iterator<size_t>{(size_t)end_index};
404
405 size_t minimum = begin_index < end_index ? begin_index : end_index;
406 size_t maximum = begin_index < end_index ? end_index : begin_index;
407
408 if(go_parallel(maximum - minimum, greater_than))
409 std::for_each(std::execution::par_unseq,
410 counting_begin, counting_end, std::forward<CallbackType>(callback));
411 else
412 std::for_each(counting_begin, counting_end, std::forward<CallbackType>(callback));
413 }
414
415 template<typename ContainerType, typename CallbackType>
416 void for_index(ContainerType&& container, CallbackType&& callback,
417 size_t greater_than = ConcurrencyLimit)
418 {
419 for_index(size_t{}, container.size(),
420 std::forward<CallbackType>(callback), greater_than);
421 }
422
423 template<typename IndexType, typename InitType,
424 typename SumupType, typename HandleParallelType>
425 auto reduce_index(IndexType begin_index, IndexType end_index, InitType&& init_value,
426 SumupType&& sum_up, HandleParallelType&& handle_parallel,
427 size_t greater_than = ConcurrencyLimit)
428 {
429 auto counting_begin = counting_iterator<size_t>{(size_t)begin_index};
430 auto counting_end = counting_iterator<size_t>{(size_t)end_index};
431
432 size_t minimum = begin_index < end_index ? begin_index : end_index;
433 size_t maximum = begin_index < end_index ? end_index : begin_index;
434
435 if(go_parallel(maximum - minimum, greater_than))
436 return std::transform_reduce(std::execution::par_unseq,
437 counting_begin, counting_end, std::forward<InitType>(init_value),
438 std::forward<SumupType>(sum_up),
439 std::forward<HandleParallelType>(handle_parallel));
440 else
441 return std::transform_reduce(std::execution::seq,
442 counting_begin, counting_end, std::forward<InitType>(init_value),
443 std::forward<SumupType>(sum_up),
444 std::forward<HandleParallelType>(handle_parallel));
445 }
446
447 template<typename ContainerType, typename InitType,
448 typename SumupType, typename HandleParallelType>
449 auto reduce_index(ContainerType&& container, InitType&& init_value,
450 SumupType&& sum_up, HandleParallelType&& handle_parallel,
451 size_t greater_than = ConcurrencyLimit)
452 {
453 return reduce_index(size_t{}, container.size(),
454 std::forward<InitType>(init_value),
455 std::forward<SumupType>(sum_up),
456 std::forward<HandleParallelType>(handle_parallel));
457 }
458
459 // We will add more algorithms to augment C++ Standard library
460
461 } // end of namespace parallel
462
463} // end of namespace tpf
464
465#endif // end of file _TPF_PARALLEL_HPP
friend counting_iterator operator+(difference_type forward, const counting_iterator it)
bool operator==(const counting_iterator &it) const
counting_iterator & operator++()
std::random_access_iterator_tag iterator_category
counting_iterator operator++(int)
counting_iterator & operator--()
bool operator>=(const counting_iterator &it) const
counting_iterator & operator+=(difference_type forward)
counting_iterator & operator-=(difference_type backward)
bool operator>(const counting_iterator &it) const
value_type operator[](difference_type i) const
counting_iterator operator--(int)
counting_iterator operator+(difference_type forward) const
bool operator<(const counting_iterator &it) const
bool operator<=(const counting_iterator &it) const
std::make_signed< IntType >::type difference_type
counting_iterator operator-(difference_type backward) const
bool operator!=(const counting_iterator &it) const
difference_type operator-(const counting_iterator &it) const
constexpr decltype(auto) counting_iterator(IndexType idx=IndexType{})
bool operator==(rational< L > const &lhs, rational< R > const &rhs)
string_stream & operator>(string_stream &os, Type &&arg)
string_stream & operator<(string_stream &os, Type &&arg)
constexpr unsigned long long fast_integer_power(unsigned long long x, unsigned long long n)
bool is_cache_aligned(Type *ptr)
constexpr size_t default_alignment()
auto reduce_index(IndexType begin_index, IndexType end_index, InitType &&init_value, SumupType &&sum_up, HandleParallelType &&handle_parallel, size_t greater_than=ConcurrencyLimit)
constexpr unsigned long long least_power_of_2_ge_n(unsigned long long n)
constexpr size_t ForceParallel
constexpr size_t ForceSequential
Container< cache_wrapper_t< Type >, tbb::cache_aligned_allocator< cache_wrapper_t< Type > > > cache_aligned_container_t
constexpr size_t ConcurrencyLimit
void for_index(IndexType begin_index, IndexType end_index, CallbackType &&callback, size_t greater_than=ConcurrencyLimit)
void atomic_operation(std::atomic< ElementType > &atom, Operation &&opr)
cache_aligned_container_t< Type, std::vector > cache_aligned_vector_t
bool go_parallel(size_t element_count, size_t greater_than)
constexpr auto cache_line_size
typename st_cache_wrapper< T, std::is_class_v< T > >::type cache_wrapper_t
types::enable_if_container_type_t< ContainerType< EleType, Types... >, bool > operator!=(const ContainerType< EleType, Types... > &left, const ContainerType< EleType, Types... > &right)
Definition: tpf_set.hpp:197
types::enable_if_container_type_t< ContainerType< EleType, Types... >, bool > operator>=(const ContainerType< EleType, Types... > &left, const ContainerType< EleType, Types... > &right)
Definition: tpf_set.hpp:281
types::enable_if_container_type_t< ContainerType< EleType, Types... >, bool > operator<=(const ContainerType< EleType, Types... > &left, const ContainerType< EleType, Types... > &right)
Definition: tpf_set.hpp:274
Includes subnamespace conversion.
Definition: 31-visit.cpp:7
auto minimum(Type_1 a, Type_2 b)
Definition: tpf_types.hpp:196
auto maximum(Type_1 a, Type_2 b)
Definition: tpf_types.hpp:181
cache_wrapper(cache_wrapper &&)=default
cache_wrapper & operator=(cache_wrapper &&)=default
const Type * operator&() const noexcept
cache_wrapper & operator=(const cache_wrapper &)=default
cache_wrapper(const cache_wrapper &)=default
const Type & get() const noexcept
Type functions are implemented.