Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_Details_KokkosCounter.cpp
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10// clang-format off
12#include "TpetraCore_config.h"
13#include "Kokkos_Core.hpp"
14#include "Teuchos_TestForException.hpp"
15#include <cstring>
16#include <string>
17
18namespace Tpetra {
19namespace Details {
20
21
22 /***************************** Deep Copy *****************************/
23 namespace DeepCopyCounterDetails {
24 // Static variables
25 bool is_initialized=true;
26 size_t count_same=0;
27 size_t count_different=0;
28 bool count_active=false;
29
30 void kokkosp_begin_deep_copy(Kokkos::Tools::SpaceHandle dst_handle, const char* dst_name, const void* dst_ptr,
31 Kokkos::Tools::SpaceHandle src_handle, const char* src_name, const void* src_ptr,
32 uint64_t size) {
33
34 if(count_active) {
35 if(strcmp(dst_handle.name,src_handle.name))
36 count_different++;
37 else
38 count_same++;
39 }
40 }
41
42 }// end DeepCopyCounterDetails
43
44
46 DeepCopyCounterDetails::count_active=true;
47 Kokkos::Tools::Experimental::set_begin_deep_copy_callback(DeepCopyCounterDetails::kokkosp_begin_deep_copy);
48 }
49
51 DeepCopyCounterDetails::count_same=0;
52 DeepCopyCounterDetails::count_different=0;
53 }
54
56 DeepCopyCounterDetails::count_active=false;
57 }
58
60 return DeepCopyCounterDetails::count_same;
61 }
62
64 return DeepCopyCounterDetails::count_different;
65 }
66
67
68
69 /***************************** Fence *****************************/
70
71
72 namespace FenceCounterDetails {
73
74 // Static variables
75 bool is_initialized=false;
76 bool count_active=false;
77 std::vector<size_t> count_instance;
78 std::vector<size_t> count_global;
79 int num_devices=0;
80
81
82 void kokkosp_begin_fence(const char* name, const uint32_t deviceId,
83 uint64_t* handle) {
84
85 if(count_active) {
86 using namespace Kokkos::Tools::Experimental;
87 ExecutionSpaceIdentifier eid = identifier_from_devid(deviceId);
88
89 // Figure out what count bin to stick this in
90 int idx = (int) eid.type;
91#if KOKKOS_VERSION >= 40499
92 if(eid.instance_id == int_for_synchronization_reason(SpecialSynchronizationCases::GlobalDeviceSynchronization))
93#else
94 if(eid.instance_id == Impl::int_for_synchronization_reason(SpecialSynchronizationCases::GlobalDeviceSynchronization))
95#endif
96 count_global[idx]++;
97 else
98 count_instance[idx]++;
99 }
100 }
101
102
103 std::string get_label(int i) {
104 using namespace Kokkos::Tools::Experimental;
105 DeviceType i_type = devicetype_from_uint32t(i);
106 std::string device_label;
107 if (i_type == DeviceType::Serial) device_label="Serial";
108 else if (i_type == DeviceType::OpenMP) device_label="OpenMP";
109 else if (i_type == DeviceType::Cuda) device_label="Cuda";
110 else if (i_type == DeviceType::HIP) device_label="HIP";
111 else if (i_type == DeviceType::OpenMPTarget) device_label="OpenMPTarget";
112 else if (i_type == DeviceType::HPX) device_label="HPX";
113 else if (i_type == DeviceType::Threads) device_label="Threats";
114 else if (i_type == DeviceType::SYCL) device_label="SYCL";
115 else if (i_type == DeviceType::OpenACC) device_label="OpenACC";
116 else if (i_type == DeviceType::Unknown) device_label="Unknown";
117
118 return device_label;
119 }
120
121 void initialize() {
122 using namespace Kokkos::Tools::Experimental;
123 num_devices = (int) DeviceType::Unknown;
124 count_instance.resize(num_devices);
125 count_instance.assign(num_devices,0);
126 count_global.resize(num_devices);
127 count_global.assign(num_devices,0);
128 is_initialized=true;
129 }
130
131 }// end FenceCounterDetails
132
133
134
135
137 if(!FenceCounterDetails::is_initialized)
138 FenceCounterDetails::initialize();
139 FenceCounterDetails::count_active=true;
140 Kokkos::Tools::Experimental::set_begin_fence_callback(FenceCounterDetails::kokkosp_begin_fence);
141 }
142
144 FenceCounterDetails::count_instance.assign(FenceCounterDetails::num_devices,0);
145 FenceCounterDetails::count_global.assign(FenceCounterDetails::num_devices,0);
146 }
147
149 FenceCounterDetails::count_active=false;
150 }
151
152 size_t FenceCounter::get_count_global(const std::string & device) {
153 using namespace Kokkos::Tools::Experimental;
154 for(int i=0;i<FenceCounterDetails::num_devices; i++) {
155 std::string device_label = FenceCounterDetails::get_label(i);
156
157 if(device == device_label)
158 return FenceCounterDetails::count_global[i];
159 }
160
161 // Haven't found a device by this name
162 TEUCHOS_TEST_FOR_EXCEPTION(1,std::runtime_error,std::string("Error: ") + device + std::string(" is not a device known to Tpetra"));
163 }
164
165
166 size_t FenceCounter::get_count_instance(const std::string & device) {
167 using namespace Kokkos::Tools::Experimental;
168 for(int i=0;i<FenceCounterDetails::num_devices; i++) {
169 std::string device_label = FenceCounterDetails::get_label(i);
170
171 if(device == device_label)
172 return FenceCounterDetails::count_instance[i];
173 }
174
175 // Haven't found a device by this name
176 TEUCHOS_TEST_FOR_EXCEPTION(1,std::runtime_error,std::string("Error: ") + device + std::string(" is not a device known to Tpetra"));
177 }
178
179// clang-format on
180namespace KokkosRegionCounterDetails {
181std::vector<std::string> regions;
182
183void push_region_callback(const char *label) { regions.push_back(label); }
184static_assert(std::is_same_v<decltype(&push_region_callback),
185 Kokkos_Profiling_pushFunction>,
186 "Unexpected Kokkos profiling interface API. This is an internal "
187 "Tpetra developer error, please report this.");
188
189} // namespace KokkosRegionCounterDetails
190
192 Kokkos::Tools::Experimental::set_push_region_callback(
193 KokkosRegionCounterDetails::push_region_callback);
194}
195
197 KokkosRegionCounterDetails::regions.clear();
198}
199
201 Kokkos::Tools::Experimental::set_push_region_callback(nullptr);
202}
203
204size_t
206 size_t count = 0;
207 for (const auto &region : KokkosRegionCounterDetails::regions) {
208 count += (region.find(needle) != std::string::npos);
209 }
210 return count;
211}
212
213void KokkosRegionCounter::dump_regions(Teuchos::FancyOStream &os) {
214 for (const auto &region : KokkosRegionCounterDetails::regions) {
215 os << region << "\n";
216 }
217}
218
219void KokkosRegionCounter::dump_regions(std::ostream &os) {
220 for (const auto &region : KokkosRegionCounterDetails::regions) {
221 os << region << "\n";
222 }
223}
224
225
226// clang-format off
227
228
229} // namespace Details
230} // namespace Tpetra
231
Declaration of various tools for counting Kokkos calls of various types using the Kokkos Profiling Li...
size_t get_count_different_space()
Query the deep_copy counter for copies between different spaces.
size_t get_count_same_space()
Query the deep_copy counter for copies in the same space.
void stop()
Stop the deep_copy counter.
void start()
Start the deep_copy counter.
void reset()
Reset the deep_copy counter.
size_t get_count_instance(const std::string &device)
Query the fence counter for given device, for an exec_space_instance.fence().
size_t get_count_global(const std::string &device)
Query the fence counter for given device, for an Kokkos::fence().
void dump_regions(std::ostream &os)
Print all observed region labels, separated by newline.
size_t get_count_region_contains(const std::string &substr)
How many regions containing substr have been seen.
Nonmember function that computes a residual Computes R = B - A * X.
Namespace Tpetra contains the class and methods constituting the Tpetra library.