LLVM OpenMP* Runtime Library
Loading...
Searching...
No Matches
kmp_collapse.h
1/*
2 * kmp_collapse.h -- header for loop collapse feature
3 */
4
5//===----------------------------------------------------------------------===//
6//
7// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8// See https://llvm.org/LICENSE.txt for license information.
9// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef KMP_COLLAPSE_H
14#define KMP_COLLAPSE_H
15
16#include <type_traits>
17
18// Type of the index into the loop nest structures
19// (with values from 0 to less than n from collapse(n))
20typedef kmp_int32 kmp_index_t;
21
22// Type for combined loop nest space IV:
23typedef kmp_uint64 kmp_loop_nest_iv_t;
24
25// Loop has <, <=, etc. as a comparison:
26enum comparison_t : kmp_int32 {
27 comp_less_or_eq = 0,
28 comp_greater_or_eq = 1,
29 comp_not_eq = 2,
30 comp_less = 3,
31 comp_greater = 4
32};
33
34// Type of loop IV.
35// Type of bounds and step, after usual promotions
36// are a subset of these types (32 & 64 only):
37enum loop_type_t : kmp_int32 {
38 loop_type_uint8 = 0,
39 loop_type_int8 = 1,
40 loop_type_uint16 = 2,
41 loop_type_int16 = 3,
42 loop_type_uint32 = 4,
43 loop_type_int32 = 5,
44 loop_type_uint64 = 6,
45 loop_type_int64 = 7
46};
47
52template <typename T> struct bounds_infoXX_template {
53
54 // typedef typename traits_t<T>::unsigned_t UT;
55 typedef typename traits_t<T>::signed_t ST;
56
57 loop_type_t loop_type; // The differentiator
58 loop_type_t loop_iv_type;
59 comparison_t comparison;
60 // outer_iv should be 0 (or any other less then number of dimentions)
61 // if loop doesn't depend on it (lb1 and ub1 will be 0).
62 // This way we can do multiplication without a check.
63 kmp_index_t outer_iv;
64
65 // unions to keep the size constant:
66 union {
67 T lb0;
68 kmp_uint64 lb0_u64; // real type can be signed
69 };
70
71 union {
72 T lb1;
73 kmp_uint64 lb1_u64; // real type can be signed
74 };
75
76 union {
77 T ub0;
78 kmp_uint64 ub0_u64; // real type can be signed
79 };
80
81 union {
82 T ub1;
83 kmp_uint64 ub1_u64; // real type can be signed
84 };
85
86 union {
87 ST step; // signed even if bounds type is unsigned
88 kmp_int64 step_64; // signed
89 };
90
91 kmp_loop_nest_iv_t trip_count;
92};
93
100
101 loop_type_t loop_type; // The differentiator
102 loop_type_t loop_iv_type;
103 comparison_t comparison;
104 // outer_iv should be 0 (or any other less then number of dimentions)
105 // if loop doesn't depend on it (lb1 and ub1 will be 0).
106 // This way we can do multiplication without a check.
107 kmp_index_t outer_iv;
108
109 kmp_uint64 lb0_u64; // real type can be signed
110 kmp_uint64 lb1_u64; // real type can be signed
111 kmp_uint64 ub0_u64; // real type can be signed
112 kmp_uint64 ub1_u64; // real type can be signed
113 kmp_int64 step_64; // signed
114
115 // This is internal, but it's the only internal thing we need
116 // in rectangular case, so let's expose it here:
117 kmp_loop_nest_iv_t trip_count;
118};
119
120//-------------------------------------------------------------------------
121// Additional types for internal representation:
122
123// Array for a point in the loop space, in the original space.
124// It's represented in kmp_uint64, but each dimention is calculated in
125// that loop IV type. Also dimentions have to be converted to those types
126// when used in generated code.
127typedef kmp_uint64* kmp_point_t;
128
129// Array: Number of loop iterations on each nesting level to achieve some point,
130// in expanded space or in original space.
131// OMPTODO: move from using iterations to using offsets (iterations multiplied
132// by steps). For those we need to be careful with the types, as step can be
133// negative, but it'll remove multiplications and divisions in several places.
134typedef kmp_loop_nest_iv_t* kmp_iterations_t;
135
136// Internal struct with additional info:
137template <typename T> struct bounds_info_internalXX_template {
138
139 // OMPTODO: should span have type T or should it better be
140 // kmp_uint64/kmp_int64 depending on T sign? (if kmp_uint64/kmp_int64 than
141 // updated bounds should probably also be kmp_uint64/kmp_int64). I'd like to
142 // use big_span_t, if it can be resolved at compile time.
143 typedef
144 typename std::conditional<std::is_signed<T>::value, kmp_int64, kmp_uint64>
145 big_span_t;
146
147 // typedef typename big_span_t span_t;
148 typedef T span_t;
149
150 bounds_infoXX_template<T> b; // possibly adjusted bounds
151
152 // Leaving this as a union in case we'll switch to span_t with different sizes
153 // (depending on T)
154 union {
155 // Smallest possible value of iv (may be smaller than actually possible)
156 span_t span_smallest;
157 kmp_uint64 span_smallest_u64;
158 };
159
160 // Leaving this as a union in case we'll switch to span_t with different sizes
161 // (depending on T)
162 union {
163 // Biggest possible value of iv (may be bigger than actually possible)
164 span_t span_biggest;
165 kmp_uint64 span_biggest_u64;
166 };
167
168 // Did we adjust loop bounds (not counting canonicalization)?
169 bool loop_bounds_adjusted;
170};
171
172// Internal struct with additional info:
173struct bounds_info_internal_t {
174
175 bounds_info_t b; // possibly adjusted bounds
176
177 // Smallest possible value of iv (may be smaller than actually possible)
178 kmp_uint64 span_smallest_u64;
179
180 // Biggest possible value of iv (may be bigger than actually possible)
181 kmp_uint64 span_biggest_u64;
182
183 // Did we adjust loop bounds (not counting canonicalization)?
184 bool loop_bounds_adjusted;
185};
186
187//----------APIs for rectangular loop nests--------------------------------
188
189// Canonicalize loop nest and calculate overall trip count.
190// "bounds_nest" has to be allocated per thread.
191// API will modify original bounds_nest array to bring it to a canonical form
192// (only <= and >=, no !=, <, >). If the original loop nest was already in a
193// canonical form there will be no changes to bounds in bounds_nest array
194// (only trip counts will be calculated).
195// Returns trip count of overall space.
196extern "C" kmp_loop_nest_iv_t
197__kmpc_process_loop_nest_rectang(ident_t *loc, kmp_int32 gtid,
198 /*in/out*/ bounds_info_t *original_bounds_nest,
199 kmp_index_t n);
200
201// Calculate old induction variables corresponding to overall new_iv.
202// Note: original IV will be returned as if it had kmp_uint64 type,
203// will have to be converted to original type in user code.
204// Note: trip counts should be already calculated by
205// __kmpc_process_loop_nest_rectang.
206// OMPTODO: special case 2, 3 nested loops - if it'll be possible to inline
207// that into user code.
208extern "C" void
209__kmpc_calc_original_ivs_rectang(ident_t *loc, kmp_loop_nest_iv_t new_iv,
210 const bounds_info_t *original_bounds_nest,
211 /*out*/ kmp_uint64 *original_ivs,
212 kmp_index_t n);
213
214//----------Init API for non-rectangular loops--------------------------------
215
216// Init API for collapsed loops (static, no chunks defined).
217// "bounds_nest" has to be allocated per thread.
218// API will modify original bounds_nest array to bring it to a canonical form
219// (only <= and >=, no !=, <, >). If the original loop nest was already in a
220// canonical form there will be no changes to bounds in bounds_nest array
221// (only trip counts will be calculated). Internally API will expand the space
222// to parallelogram/parallelepiped, calculate total, calculate bounds for the
223// chunks in terms of the new IV, re-calc them in terms of old IVs (especially
224// important on the left side, to hit the lower bounds and not step over), and
225// pick the correct chunk for this thread (so it will calculate chunks up to the
226// needed one). It could be optimized to calculate just this chunk, potentially
227// a bit less well distributed among threads. It is designed to make sure that
228// threads will receive predictable chunks, deterministically (so that next nest
229// of loops with similar characteristics will get exactly same chunks on same
230// threads).
231// Current contract: chunk_bounds_nest has only lb0 and ub0,
232// lb1 and ub1 are set to 0 and can be ignored. (This may change in the future).
233extern "C" kmp_int32
234__kmpc_for_collapsed_init(ident_t *loc, kmp_int32 gtid,
235 /*in/out*/ bounds_info_t *original_bounds_nest,
236 /*out*/ bounds_info_t *chunk_bounds_nest,
237 kmp_index_t n,
238 /*out*/ kmp_int32 *plastiter);
239
240#endif // KMP_COLLAPSE_H
Definition kmp.h:238