C++ Library Extensions 2022.12.09
To help learn modern C++ programming
cpg_parallel.hpp
Go to the documentation of this file.
1/*
2 Author: Thomas Kim 김창희
3 First Edit: Dec. 16, 2021
4*/
5
6#ifndef _CPG_PARALLEL_HPP
7#define _CPG_PARALLEL_HPP
8
9#ifndef NOMINMAX
10#define NOMINMAX
11#endif
12
13#include "cpg_iterator.hpp"
16
17namespace cpg::parallel
18{
19 namespace cgt = cpg::types;
20
21 namespace hidden
22 {
23 template<typename OperationType, typename... ContainerTypes>
24 requires ( cgt::vector_c<ContainerTypes> && ... ) || ( cgt::std_array_flat_c<ContainerTypes> && ... )
25 constexpr auto resultant_element_type(OperationType&& operation, ContainerTypes&&... containers)
26 {
27 auto Vs = std::tuple{ containers... };
28
29 constexpr auto N = sizeof...(ContainerTypes);
30
31 return cgt::for_stallion<N>([&]<auto... i>(cgt::sequence<i...>)
32 {
33 using return_t = decltype(operation( std::get<i>(Vs)[0] ...));
34
35 if constexpr(cgt::valid_type_c<return_t>)
36 return operation( std::get<i>(Vs)[0] ...);
37 else
38 return cgt::no_type{};
39 });
40 }
41
42 template<typename OperationType,
43 cgt::vector_c ContainerType, cgt::std_array_flat_c... ArrayTypes>
44 constexpr auto resultant_element_type(OperationType&& operation,
45 ContainerType&& container, ArrayTypes&&... arrays)
46 {
47 auto Vs = std::tuple{ arrays... };
48
49 constexpr auto N = sizeof...(ArrayTypes);
50
51 if constexpr( N > 0)
52 {
53 using result_t = decltype(cgt::for_stallion<N>([&]<auto... i>(cgt::sequence<i...>)
54 { return operation( container[0], std::get<i>(Vs) ... ); }));
55
56 if constexpr(cgt::valid_type_c<result_t>)
57 return cgt::for_stallion<N>([&]<auto... i>(cgt::sequence<i...>)
58 {
59 return operation( container[0], std::get<i>(Vs) ... );
60 });
61 else
62 return cgt::no_type{};
63 }
64 else
65 {
66 using result_t = decltype(operation( container[0] ));
67
68 if constexpr(cgt::valid_type_c<result_t>)
69 return operation( container[0] );
70 else
71 return cgt::no_type{};
72 }
73 }
74
75 template< typename OperationType, std::integral Dimension,
76 std::integral... Dimensions>
77 constexpr auto resultant_element_type(OperationType&& operation,
78 Dimension dim, Dimensions... dims)
79 {
80 constexpr auto N = sizeof...(Dimensions) + 1;
81 std::array<long long, N> indices{};
82
83 return cgt::for_stallion<N>( [&]<auto... i>(cgt::sequence<i...>)
84 {
85 using result_t = decltype(operation( indices[i]... ));
86
87 if constexpr( cgt::valid_type_c<result_t> )
88 return operation( indices[i]... );
89 else
90 return cgt::no_type{};
91 } );
92 }
93
94 }
95 // end of namespace hidden
96
97 /*
98 ( thread_local + atomic<bool> )
99 + ( exception_ptr + current_exception + rethrow_exception )
100 + ( mutex + scoped_lock )
101
102 Keyword thread_local,
103 Storage class specifiers - https://en.cppreference.com/w/cpp/language/storage_duration
104
105 std::scoped_lock - https://en.cppreference.com/w/cpp/thread/scoped_lock
106
107 std::exception_ptr - https://en.cppreference.com/w/cpp/error/exception_ptr
108 std::current_exception - https://en.cppreference.com/w/cpp/error/current_exception
109 std::rethrow_exception - https://en.cppreference.com/w/cpp/error/rethrow_exception
110 std::make_exception_ptr - https://en.cppreference.com/w/cpp/error/make_exception_ptr
111
112 144 - C++17 STL 패러렐 알고리즘 사용시 특히 주의해야 할 점, Execution Policy, Compound Requirements 실제 사용예
113 https://www.youtube.com/watch?v=f0G0b2nv4ME&list=PLsIvhalfft1EtaiJJRmWKUGG0W1hyUKiw&index=143
114
115 141 - C++ 함수의 void 반환값 void return 처리 방법, valid_type_c 구현
116 https://www.youtube.com/watch?v=AFIChtb4rik&list=PLsIvhalfft1EtaiJJRmWKUGG0W1hyUKiw&index=141
117
118 140 - Deserialization, 나머지 정리의 결정판, 중딩 수학이 코딩에 미치는 영향 - Aeonary 편리한 go_std_parallel()의 결정판 구현
119 https://www.youtube.com/watch?v=v3mPVspzYNk&list=PLsIvhalfft1EtaiJJRmWKUGG0W1hyUKiw&index=140
120
121 139 - C++ 다차원 동적 배열(Matrix) 만드는 방법 - 고급 Serialization에 거의 항상 사용하는 방법
122 https://www.youtube.com/watch?v=kcKJ24NhVaQ&list=PLsIvhalfft1EtaiJJRmWKUGG0W1hyUKiw&index=139
123 */
124
125 template< template<typename, typename...> class ContainerType,
126 typename ExecutionPolicy, typename OperationType,
127 std::integral RangeType, std::integral... RangeTypes>
128 constexpr decltype(auto) go_std_parallel(ExecutionPolicy&& execution_policy,
129 OperationType&& operation, RangeType head, RangeTypes... tails) noexcept
130 requires requires
131 {
132 { operation(head, tails...) } noexcept ;
133 }
134 {
135 constexpr long long rank = sizeof...(RangeTypes) + 1;
136
137 using rank_array_t = std::array<long long, rank>;
138
139 rank_array_t
140 dimensions{ static_cast<long long>(head), static_cast<long long>(tails)... };
141
142 rank_array_t multipliers{1};
143
144 for(size_t n = rank-1; n >= 1; --n)
145 {
146 multipliers[n] = multipliers[0]; // m_m[n] = m_m[0]
147 multipliers[0] *= dimensions[n]; // m_m[0] = m_d[n]
148 }
149
150 auto compute_index = [&multipliers](auto n)
151 {
152 rank_array_t indices;
153
154 for(long long r = 0; r < rank; ++r) // rank * (rank + 1) / 2
155 {
156 indices[r] = n / multipliers[r];
157 n %= multipliers[r];
158 }
159
160 return indices;
161 };
162
163 long long total = (head * ... * tails); // Binary Left Fold
164
165 using element_t =
166 decltype( hidden::resultant_element_type(operation, head, tails...) );
167
168 if constexpr(cgt::valid_type_c<element_t>)
169 {
170 using container_t = ContainerType<element_t>;
171
172 container_t Result(total);
173
174 auto parallel_task = [&Result, &compute_index, &operation](auto i)
175 {
176 auto indices = compute_index(i);
177
178 cgt::for_stallion<rank>( [&]< auto...k > // 0, 1, 2
180 {
181 Result[i] = operation( indices[k]... );
182 });
183
184 };
185
186 std::for_each(execution_policy, counting_iterator(),
187 counting_iterator(total), parallel_task);
188
189 return Result;
190 }
191 else
192 {
193 auto parallel_task = [&compute_index, &operation](auto i)
194 {
195 auto indices = compute_index(i);
196
197 cgt::for_stallion<rank>( [&]< auto...k > // 0, 1, 2
199 {
200 operation( indices[k]... );
201 });
202 };
203
204 std::for_each(execution_policy, counting_iterator(),
205 counting_iterator(total), parallel_task);
206
207 return cgt::no_type{};
208 }
209 }
210
211 template<typename OperationType, std::integral RangeType, std::integral... RangeTypes>
212 constexpr decltype(auto) go_std_parallel(OperationType&& operation,
213 RangeType head, RangeTypes... tails) noexcept requires requires
214 {
215 { operation(head, tails... ) } noexcept;
216 }
217 {
218 return go_std_parallel<std::vector>(std::execution::par_unseq,
219 std::forward<OperationType>(operation), head, tails...);
220 }
221
222 template< template<typename, typename...> class ContainerType,
223 typename ExecutionPolicy, typename OperationType,
224 std::integral RangeType, std::integral... RangeTypes>
225 decltype(auto) go_std_parallel_exception_safe(ExecutionPolicy&& execution_policy,
226 OperationType&& operation, RangeType head, RangeTypes... tails)
227 requires cgt::valid_type_c<decltype(hidden::resultant_element_type(operation, head, tails...))>
228 {
229 constexpr long long rank = sizeof...(RangeTypes) + 1;
230
231 using rank_array_t = std::array<long long, rank>;
232
233 rank_array_t
234 dimensions{ static_cast<long long>(head), static_cast<long long>(tails)... };
235
236 rank_array_t multipliers{1};
237
238 for(size_t n = rank-1; n >= 1; --n)
239 {
240 multipliers[n] = multipliers[0]; // m_m[n] = m_m[0]
241 multipliers[0] *= dimensions[n]; // m_m[0] = m_d[n]
242 }
243
244 auto compute_index = [&multipliers](auto n)
245 {
246 rank_array_t indices;
247
248 for(long long r = 0; r < rank; ++r) // rank * (rank + 1) / 2
249 {
250 indices[r] = n / multipliers[r];
251 n %= multipliers[r];
252 }
253
254 return indices;
255 };
256
257 long long total = (head * ... * tails); // Binary Left Fold
258
259 using element_t =
260 decltype( hidden::resultant_element_type(operation, head, tails...) );
261
262 std::atomic<bool> report_exception{};
264 std::exception_ptr work_exception;
265
266 using container_t = ContainerType<element_t>;
267
268 container_t Result(total);
269
270 auto parallel_task = [&report_exception, &mutex, &work_exception,
271 &Result, &compute_index, &operation](auto i)
272 {
273 static thread_local bool exception_occurred{};
274
275 if(exception_occurred) return;
276 else if(report_exception)
277 {
278 exception_occurred = true; return;
279 }
280
281 try
282 {
283 auto indices = compute_index(i);
284
285 cgt::for_stallion<rank>( [&]< auto...k > // 0, 1, 2
287 {
288 Result[i] = operation( indices[k]... );
289 });
290 }
291 catch(...)
292 {
293 if(report_exception == false)
294 {
295 std::scoped_lock lock{mutex};
296
297 if(!work_exception)
298 work_exception = std::current_exception();
299
300 report_exception = true;
301 }
302
303 exception_occurred = true;
304 }
305 };
306 // end of parallel_task
307
308 std::for_each(execution_policy, counting_iterator(),
309 counting_iterator(total), parallel_task);
310
311 if(work_exception)
312 std::rethrow_exception(work_exception);
313
314 return Result;
315 }
316
317 template< template<typename, typename...> class ContainerType,
318 typename ExecutionPolicy, typename OperationType,
319 std::integral RangeType, std::integral... RangeTypes>
320 decltype(auto) go_std_parallel_exception_safe(ExecutionPolicy&& execution_policy,
321 OperationType&& operation, RangeType head, RangeTypes... tails)
322 requires requires { requires cgt::no_type_c<decltype(hidden::resultant_element_type(operation, head, tails...))>; }
323 {
324 constexpr long long rank = sizeof...(RangeTypes) + 1;
325
326 using rank_array_t = std::array<long long, rank>;
327
328 rank_array_t
329 dimensions{ static_cast<long long>(head), static_cast<long long>(tails)... };
330
331 rank_array_t multipliers{1};
332
333 for(size_t n = rank-1; n >= 1; --n)
334 {
335 multipliers[n] = multipliers[0]; // m_m[n] = m_m[0]
336 multipliers[0] *= dimensions[n]; // m_m[0] = m_d[n]
337 }
338
339 auto compute_index = [&multipliers](auto n)
340 {
341 rank_array_t indices;
342
343 for(long long r = 0; r < rank; ++r) // rank * (rank + 1) / 2
344 {
345 indices[r] = n / multipliers[r];
346 n %= multipliers[r];
347 }
348
349 return indices;
350 };
351
352 long long total = (head * ... * tails); // Binary Left Fold
353
354 std::atomic<bool> report_exception{};
356 std::exception_ptr work_exception;
357
358 auto parallel_task =
359 [&report_exception, &mutex, &work_exception,
360 &compute_index, &operation](auto i)
361 {
362 thread_local bool exception_occurred{}; // false
363
364 if(exception_occurred)
365 return;
366 else if(report_exception)
367 {
368 exception_occurred = true; return;
369 }
370
371 try
372 {
373 auto indices = compute_index(i);
374
375 cgt::for_stallion<rank>( [&]< auto...k >
377 {
378 operation( indices[k]... );
379 });
380 }
381 catch(...)
382 {
383 if(report_exception == false)
384 {
385 std::scoped_lock lock{mutex};
386
387 if(!work_exception)
388 work_exception = std::current_exception();
389 }
390
391 exception_occurred = true;
392 report_exception = true;
393 }
394 };
395 // end of parallel_task
396
397 std::for_each(execution_policy, counting_iterator(),
398 counting_iterator(total), parallel_task);
399
400 if(work_exception)
401 std::rethrow_exception(work_exception);
402
403 return cgt::no_type{};
404 }
405
406 template<typename OperationType, std::integral RangeType, std::integral... RangeTypes>
407 decltype(auto) go_std_parallel_exception_safe(OperationType&& operation,
408 RangeType head, RangeTypes... tails)
409 {
410 return go_std_parallel_exception_safe<std::vector>(std::execution::par_unseq,
411 std::forward<OperationType>(operation), head, tails...);
412 }
413
415 template< template<typename, auto> class ContainerType = std::array,
416 typename ExecutionPolicy=cgt::no_type,
417 typename OperationType = cgt::no_type, auto head = 1, auto... tails>
418 constexpr decltype(auto) go_std_parallel(ExecutionPolicy&& execution_policy,
419 OperationType&& operation, cgt::sequence<head, tails...>)
420 {
421 constexpr long long rank = sizeof...(tails) + 1;
422
423 std::array<long long, rank>
424 dimensions{ static_cast<long long>(head),
425 static_cast<long long>(tails)... };
426
427 std::array<long long, rank> multipliers{1};
428
429 for(size_t n = rank-1; n >=1; --n)
430 {
431 multipliers[n] = multipliers[0];
432 multipliers[0] *= dimensions[n];
433 }
434
435 auto compute_index = [&multipliers](auto n)
436 {
437 std::array<long long, rank> indices{};
438
439 for(long long r = 0; r < rank; ++r)
440 {
441 indices[r] = n / multipliers[r];
442 n %= multipliers[r];
443 }
444
445 return indices;
446 };
447
448 auto total = dimensions[0] * multipliers[0];
449
450 using element_t = decltype(hidden::resultant_element_type(operation, head, tails...));
451
452 constexpr std::size_t N = (head * ... * tails);
453
454 if constexpr(cgt::valid_type_c<element_t>)
455 {
456 using container_t = ContainerType<element_t, N>;
457
458 container_t R;
459
460 auto parallel_task =[&R, &compute_index, &operation](auto i)
461 {
462 auto indices = compute_index(i);
463
464 cgt::for_stallion<rank>([&]<auto...k>(cgt::sequence<k...>)
465 {
466 R[i] = operation(indices[k]... );
467 });
468 };
469
470 std::for_each(execution_policy,
471 counting_iterator(), counting_iterator(total), parallel_task);
472
473 return R;
474 }
475 else
476 return cgt::no_type{};
477 }
478
479 // go_std_parallel(..., vector, arrays...)
480 template<typename ExecutionPolicy, typename OperationType,
481 cgt::vector_c ContainerType, cgt::std_array_flat_c... ArrayTypes>
482 constexpr decltype(auto) go_std_parallel(ExecutionPolicy&& execution_policy,
483 OperationType&& operation, ContainerType&& container, ArrayTypes&&...arrays)
485 std::remove_cvref_t<ArrayTypes>{}...))
486 {
487 auto Vs = std::forward_as_tuple(arrays...);
488
489 using element_t =
490 decltype(hidden::resultant_element_type(operation, container, arrays...));
491
492 if constexpr(cgt::valid_type_c<element_t>)
493 {
494 using vector_t = std::vector<element_t>;
495 vector_t R(container.size());
496
497 auto parallel_task = [&](auto i)
498 {
499 constexpr auto N = sizeof...(ArrayTypes);
500
501 // if constexpr( N > 0 )
502 // R[i] = cgt::for_stallion<N>([&]<auto... k>(cgt::sequence<k...>)
503 // { return operation(container[i], std::get<k>(Vs)... ); });
504 // else
505 // R[i] = operation(container[i]);
506
507 if constexpr(N == 0)
508 R[i] = operation(container[i]);
509 else if constexpr(N == 1)
510 R[i] = operation(container[i], std::get<0>(Vs) );
511 else if constexpr(N == 2)
512 R[i] = operation(container[i],
513 std::get<0>(Vs), std::get<1>(Vs));
514 else if constexpr(N == 3)
515 R[i] = operation(container[i],
516 std::get<0>(Vs), std::get<1>(Vs), std::get<2>(Vs));
517 else if constexpr(N == 4)
518 R[i] = operation(container[i],
519 std::get<0>(Vs), std::get<1>(Vs), std::get<2>(Vs), std::get<3>(Vs));
520 else
521 R[i] = cgt::for_stallion<N>([&]<auto... k>(cgt::sequence<k...>)
522 { return operation(container[i], std::get<k>(Vs)... ); });
523 };
524
525 std::for_each(execution_policy, counting_iterator(),
526 counting_iterator(container.size()), parallel_task);
527 return R;
528 }
529 else
530 return cgt::no_type{};
531 }
532 // end of go_std_parallel(..., vector, arrays...)
533
534 // go_std_parallel(..., vector, vector, vectors...)
535 template<typename ExecutionPolicy, typename OperationType,
536 cgt::vector_c... ContainerTypes> requires ( sizeof...(ContainerTypes) > 1)
537 constexpr decltype(auto) go_std_parallel(ExecutionPolicy&& execution_policy,
538 OperationType&& operation, ContainerTypes&&... containers)
539 {
540 auto Vs = std::forward_as_tuple(containers...);
541
542 auto A_size = std::get<0>(Vs).size();
543
544 auto all_element_counts_are_the_same =
546
547 assert(all_element_counts_are_the_same);
548
549 using element_t =
550 decltype(hidden::resultant_element_type(operation, containers...));
551
552 if constexpr(cgt::valid_type_c<element_t>)
553 {
554 using vector_t = std::vector<element_t>;
555
556 vector_t R(A_size);
557
558 auto parallel_task = [&](auto i)
559 {
560 constexpr auto N = sizeof...(ContainerTypes);
561
562 R[i] = cgt::for_stallion<N>([&]<auto... k>(cgt::sequence<k...>)
563 {
564 return operation( std::get<k>(Vs)[i] ... );
565 });
566 };
567
568 std::for_each(execution_policy, counting_iterator(),
569 counting_iterator(A_size), parallel_task);
570
571 return R;
572 }
573 else
574 return cgt::no_type{};
575 }
576 // end of go_std_parallel(..., vector, vector, vectors...)
577
578 // go_std_parallel(..., array, arrays...)
579 template<typename ExecutionPolicy, typename OperationType,
580 cgt::std_array_flat_c... ContainerTypes> requires (sizeof...(ContainerTypes) > 0)
581 constexpr decltype(auto) go_std_parallel(ExecutionPolicy&& execution_policy,
582 OperationType&& operation, ContainerTypes&&... containers)
583 requires (cgt::element_counts_are_the_same(std::remove_cvref_t<ContainerTypes>{}...))
584 {
585 auto Vs = std::forward_as_tuple(containers...);
586
587 using Vs_t = decltype(Vs);
588 using A_t = std::remove_cvref_t< std::tuple_element_t<0, Vs_t> >;
589 constexpr auto A_size = std::tuple_size_v< A_t >;
590
591 using element_t =
592 decltype(hidden::resultant_element_type(operation, containers...));
593
594 if constexpr(cgt::valid_type_c<element_t>)
595 {
596 using array_t = std::array<element_t, A_size>; array_t R;
597
598 auto parallel_task = [&](auto i)
599 {
600 constexpr auto N = sizeof...(ContainerTypes);
601
602 // R[i] = cgt::for_stallion<N>([&]<auto... k>(cgt::sequence<k...>)
603 // {
604 // return operation( std::get<k>(Vs)[i] ... );
605 // });
606
607 if constexpr(N == 1)
608 R[i] = operation( std::get<0>(Vs)[i] );
609 else if constexpr(N == 2)
610 R[i] = operation( std::get<0>(Vs)[i], std::get<1>(Vs)[i] );
611 else if constexpr(N == 3)
612 R[i] = operation( std::get<0>(Vs)[i], std::get<1>(Vs)[i], std::get<2>(Vs)[i] );
613 else if constexpr(N == 4)
614 R[i] = operation( std::get<0>(Vs)[i], std::get<1>(Vs)[i],
615 std::get<2>(Vs)[i], std::get<3>(Vs)[i]);
616 else if constexpr(N == 5)
617 R[i] = operation( std::get<0>(Vs)[i], std::get<1>(Vs)[i],
618 std::get<2>(Vs)[i], std::get<3>(Vs)[i], std::get<4>(Vs)[i]);
619 else
620 R[i] = cgt::for_stallion<N>([&]<auto... k>(cgt::sequence<k...>)
621 {
622 return operation( std::get<k>(Vs)[i] ... );
623 });
624 };
625
626 std::for_each(execution_policy, counting_iterator(),
627 counting_iterator(A_size), parallel_task);
628
629 return R;
630 }
631 else
632 return cgt::no_type{};
633 }
634 // end of go_std_parallel(..., array, arrays...)
635}
636// end of namespace cpg::parallel
637
638#endif // end of file
int return_t
std::mutex mutex
Definition: 022-mutex.cpp:12
std::vector< Type > vector_t
BoundType integral(FuncType &&f, std::tuple< BoundType, BoundType > bound)
constexpr auto resultant_element_type(OperationType &&operation, ContainerTypes &&... containers)
decltype(auto) go_std_parallel_exception_safe(ExecutionPolicy &&execution_policy, OperationType &&operation, RangeType head, RangeTypes... tails)
constexpr decltype(auto) counting_iterator(IndexType idx=IndexType{})
constexpr decltype(auto) go_std_parallel(ExecutionPolicy &&execution_policy, OperationType &&operation, RangeType head, RangeTypes... tails) noexcept
constexpr auto element_counts_are_the_same(ContainerTypes &&... containers)
Definition: cpg_types.hpp:3427
typename hidden::st_first_element< std::remove_cvref_t< T > >::type first_type_t
Definition: cpg_types.hpp:2643
std::integer_sequence< std::common_type_t< std::remove_cvref_t< decltype(Indices)>... >, Indices... > sequence
Definition: cpg_types.hpp:2665