LLVM OpenMP* Runtime Library
kmp_affinity.h
1 /*
2  * kmp_affinity.h -- header for affinity management
3  */
4 
5 
6 //===----------------------------------------------------------------------===//
7 //
8 // The LLVM Compiler Infrastructure
9 //
10 // This file is dual licensed under the MIT and the University of Illinois Open
11 // Source Licenses. See LICENSE.txt for details.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef KMP_AFFINITY_H
16 #define KMP_AFFINITY_H
17 
18 extern int __kmp_affinity_compact; /* Affinity 'compact' value */
19 
20 class Address {
21 public:
22  static const unsigned maxDepth = 32;
23  unsigned labels[maxDepth];
24  unsigned childNums[maxDepth];
25  unsigned depth;
26  unsigned leader;
27  Address(unsigned _depth)
28  : depth(_depth), leader(FALSE) {
29  }
30  Address &operator=(const Address &b) {
31  depth = b.depth;
32  for (unsigned i = 0; i < depth; i++) {
33  labels[i] = b.labels[i];
34  childNums[i] = b.childNums[i];
35  }
36  leader = FALSE;
37  return *this;
38  }
39  bool operator==(const Address &b) const {
40  if (depth != b.depth)
41  return false;
42  for (unsigned i = 0; i < depth; i++)
43  if(labels[i] != b.labels[i])
44  return false;
45  return true;
46  }
47  bool isClose(const Address &b, int level) const {
48  if (depth != b.depth)
49  return false;
50  if ((unsigned)level >= depth)
51  return true;
52  for (unsigned i = 0; i < (depth - level); i++)
53  if(labels[i] != b.labels[i])
54  return false;
55  return true;
56  }
57  bool operator!=(const Address &b) const {
58  return !operator==(b);
59  }
60  void print() const {
61  unsigned i;
62  printf("Depth: %u --- ", depth);
63  for(i=0;i<depth;i++) {
64  printf("%u ", labels[i]);
65  }
66  }
67 };
68 
69 class AddrUnsPair {
70 public:
71  Address first;
72  unsigned second;
73  AddrUnsPair(Address _first, unsigned _second)
74  : first(_first), second(_second) {
75  }
76  AddrUnsPair &operator=(const AddrUnsPair &b)
77  {
78  first = b.first;
79  second = b.second;
80  return *this;
81  }
82  void print() const {
83  printf("first = "); first.print();
84  printf(" --- second = %u", second);
85  }
86  bool operator==(const AddrUnsPair &b) const {
87  if(first != b.first) return false;
88  if(second != b.second) return false;
89  return true;
90  }
91  bool operator!=(const AddrUnsPair &b) const {
92  return !operator==(b);
93  }
94 };
95 
96 
97 static int
98 __kmp_affinity_cmp_Address_labels(const void *a, const void *b)
99 {
100  const Address *aa = (const Address *)&(((AddrUnsPair *)a)
101  ->first);
102  const Address *bb = (const Address *)&(((AddrUnsPair *)b)
103  ->first);
104  unsigned depth = aa->depth;
105  unsigned i;
106  KMP_DEBUG_ASSERT(depth == bb->depth);
107  for (i = 0; i < depth; i++) {
108  if (aa->labels[i] < bb->labels[i]) return -1;
109  if (aa->labels[i] > bb->labels[i]) return 1;
110  }
111  return 0;
112 }
113 
114 
115 static int
116 __kmp_affinity_cmp_Address_child_num(const void *a, const void *b)
117 {
118  const Address *aa = (const Address *)&(((AddrUnsPair *)a)
119  ->first);
120  const Address *bb = (const Address *)&(((AddrUnsPair *)b)
121  ->first);
122  unsigned depth = aa->depth;
123  unsigned i;
124  KMP_DEBUG_ASSERT(depth == bb->depth);
125  KMP_DEBUG_ASSERT((unsigned)__kmp_affinity_compact <= depth);
126  KMP_DEBUG_ASSERT(__kmp_affinity_compact >= 0);
127  for (i = 0; i < (unsigned)__kmp_affinity_compact; i++) {
128  int j = depth - i - 1;
129  if (aa->childNums[j] < bb->childNums[j]) return -1;
130  if (aa->childNums[j] > bb->childNums[j]) return 1;
131  }
132  for (; i < depth; i++) {
133  int j = i - __kmp_affinity_compact;
134  if (aa->childNums[j] < bb->childNums[j]) return -1;
135  if (aa->childNums[j] > bb->childNums[j]) return 1;
136  }
137  return 0;
138 }
139 
140 
147 public:
150  static const kmp_uint32 maxLeaves=4;
151  static const kmp_uint32 minBranch=4;
156  kmp_uint32 maxLevels;
157 
161  kmp_uint32 depth;
162  kmp_uint32 base_num_threads;
163  enum init_status { initialized=0, not_initialized=1, initializing=2 };
164  volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized, 2=initialization in progress
165  volatile kmp_int8 resizing; // 0=not resizing, 1=resizing
166 
170  kmp_uint32 *numPerLevel;
171  kmp_uint32 *skipPerLevel;
172 
173  void deriveLevels(AddrUnsPair *adr2os, int num_addrs) {
174  int hier_depth = adr2os[0].first.depth;
175  int level = 0;
176  for (int i=hier_depth-1; i>=0; --i) {
177  int max = -1;
178  for (int j=0; j<num_addrs; ++j) {
179  int next = adr2os[j].first.childNums[i];
180  if (next > max) max = next;
181  }
182  numPerLevel[level] = max+1;
183  ++level;
184  }
185  }
186 
187  hierarchy_info() : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {}
188 
189  void fini() { if (!uninitialized && numPerLevel) __kmp_free(numPerLevel); }
190 
191  void init(AddrUnsPair *adr2os, int num_addrs)
192  {
193  kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&uninitialized, not_initialized, initializing);
194  if (bool_result == 0) { // Wait for initialization
195  while (TCR_1(uninitialized) != initialized) KMP_CPU_PAUSE();
196  return;
197  }
198  KMP_DEBUG_ASSERT(bool_result==1);
199 
200  /* Added explicit initialization of the data fields here to prevent usage of dirty value
201  observed when static library is re-initialized multiple times (e.g. when
202  non-OpenMP thread repeatedly launches/joins thread that uses OpenMP). */
203  depth = 1;
204  resizing = 0;
205  maxLevels = 7;
206  numPerLevel = (kmp_uint32 *)__kmp_allocate(maxLevels*2*sizeof(kmp_uint32));
207  skipPerLevel = &(numPerLevel[maxLevels]);
208  for (kmp_uint32 i=0; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
209  numPerLevel[i] = 1;
210  skipPerLevel[i] = 1;
211  }
212 
213  // Sort table by physical ID
214  if (adr2os) {
215  qsort(adr2os, num_addrs, sizeof(*adr2os), __kmp_affinity_cmp_Address_labels);
216  deriveLevels(adr2os, num_addrs);
217  }
218  else {
219  numPerLevel[0] = maxLeaves;
220  numPerLevel[1] = num_addrs/maxLeaves;
221  if (num_addrs%maxLeaves) numPerLevel[1]++;
222  }
223 
224  base_num_threads = num_addrs;
225  for (int i=maxLevels-1; i>=0; --i) // count non-empty levels to get depth
226  if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1'
227  depth++;
228 
229  kmp_uint32 branch = minBranch;
230  if (numPerLevel[0] == 1) branch = num_addrs/maxLeaves;
231  if (branch<minBranch) branch=minBranch;
232  for (kmp_uint32 d=0; d<depth-1; ++d) { // optimize hierarchy width
233  while (numPerLevel[d] > branch || (d==0 && numPerLevel[d]>maxLeaves)) { // max 4 on level 0!
234  if (numPerLevel[d] & 1) numPerLevel[d]++;
235  numPerLevel[d] = numPerLevel[d] >> 1;
236  if (numPerLevel[d+1] == 1) depth++;
237  numPerLevel[d+1] = numPerLevel[d+1] << 1;
238  }
239  if(numPerLevel[0] == 1) {
240  branch = branch >> 1;
241  if (branch<4) branch = minBranch;
242  }
243  }
244 
245  for (kmp_uint32 i=1; i<depth; ++i)
246  skipPerLevel[i] = numPerLevel[i-1] * skipPerLevel[i-1];
247  // Fill in hierarchy in the case of oversubscription
248  for (kmp_uint32 i=depth; i<maxLevels; ++i)
249  skipPerLevel[i] = 2*skipPerLevel[i-1];
250 
251  uninitialized = initialized; // One writer
252 
253  }
254 
255  // Resize the hierarchy if nproc changes to something larger than before
256  void resize(kmp_uint32 nproc)
257  {
258  kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
259  while (bool_result == 0) { // someone else is trying to resize
260  KMP_CPU_PAUSE();
261  if (nproc <= base_num_threads) // happy with other thread's resize
262  return;
263  else // try to resize
264  bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
265  }
266  KMP_DEBUG_ASSERT(bool_result!=0);
267  if (nproc <= base_num_threads) return; // happy with other thread's resize
268 
269  // Calculate new maxLevels
270  kmp_uint32 old_sz = skipPerLevel[depth-1];
271  kmp_uint32 incs = 0, old_maxLevels = maxLevels;
272  // First see if old maxLevels is enough to contain new size
273  for (kmp_uint32 i=depth; i<maxLevels && nproc>old_sz; ++i) {
274  skipPerLevel[i] = 2*skipPerLevel[i-1];
275  numPerLevel[i-1] *= 2;
276  old_sz *= 2;
277  depth++;
278  }
279  if (nproc > old_sz) { // Not enough space, need to expand hierarchy
280  while (nproc > old_sz) {
281  old_sz *=2;
282  incs++;
283  depth++;
284  }
285  maxLevels += incs;
286 
287  // Resize arrays
288  kmp_uint32 *old_numPerLevel = numPerLevel;
289  kmp_uint32 *old_skipPerLevel = skipPerLevel;
290  numPerLevel = skipPerLevel = NULL;
291  numPerLevel = (kmp_uint32 *)__kmp_allocate(maxLevels*2*sizeof(kmp_uint32));
292  skipPerLevel = &(numPerLevel[maxLevels]);
293 
294  // Copy old elements from old arrays
295  for (kmp_uint32 i=0; i<old_maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
296  numPerLevel[i] = old_numPerLevel[i];
297  skipPerLevel[i] = old_skipPerLevel[i];
298  }
299 
300  // Init new elements in arrays to 1
301  for (kmp_uint32 i=old_maxLevels; i<maxLevels; ++i) { // init numPerLevel[*] to 1 item per level
302  numPerLevel[i] = 1;
303  skipPerLevel[i] = 1;
304  }
305 
306  // Free old arrays
307  __kmp_free(old_numPerLevel);
308  }
309 
310  // Fill in oversubscription levels of hierarchy
311  for (kmp_uint32 i=old_maxLevels; i<maxLevels; ++i)
312  skipPerLevel[i] = 2*skipPerLevel[i-1];
313 
314  base_num_threads = nproc;
315  resizing = 0; // One writer
316 
317  }
318 };
319 #endif // KMP_AFFINITY_H
kmp_uint32 depth
Definition: kmp_affinity.h:161
kmp_uint32 * numPerLevel
Definition: kmp_affinity.h:170
kmp_uint32 maxLevels
Definition: kmp_affinity.h:156