Tpetra parallel linear algebra Version of the Day
Loading...
Searching...
No Matches
Tpetra_Details_Merge.hpp
1// @HEADER
2// *****************************************************************************
3// Tpetra: Templated Linear Algebra Services Package
4//
5// Copyright 2008 NTESS and the Tpetra contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
10#ifndef TPETRA_DETAILS_MERGE_HPP
11#define TPETRA_DETAILS_MERGE_HPP
12
13#include "TpetraCore_config.h"
14#include "Teuchos_TestForException.hpp"
15#include <algorithm> // std::sort
16#include <utility> // std::pair, std::make_pair
17#include <stdexcept>
18
19namespace Tpetra {
20namespace Details {
21
40template<class OrdinalType, class IndexType>
41IndexType
42countMergeUnsortedIndices (const OrdinalType curInds[],
43 const IndexType numCurInds,
44 const OrdinalType inputInds[],
45 const IndexType numInputInds)
46{
47 IndexType mergeCount = 0;
48
49 if (numCurInds <= numInputInds) {
50 // More input than current entries, so iterate linearly over
51 // input and scan current entries repeatedly.
52 for (IndexType inPos = 0; inPos < numInputInds; ++inPos) {
53 const OrdinalType inVal = inputInds[inPos];
54 for (IndexType curPos = 0; curPos < numCurInds; ++curPos) {
55 if (curInds[curPos] == inVal) {
56 ++mergeCount;
57 }
58 }
59 }
60 }
61 else { // numCurInds > numInputInds
62 // More current entries than input, so iterate linearly over
63 // current entries and scan input repeatedly.
64 for (IndexType curPos = 0; curPos < numCurInds; ++curPos) {
65 const OrdinalType curVal = curInds[curPos];
66 for (IndexType inPos = 0; inPos < numInputInds; ++inPos) {
67 if (inputInds[inPos] == curVal) {
68 ++mergeCount;
69 }
70 }
71 }
72 }
73
74#ifdef HAVE_TPETRA_DEBUG
75 TEUCHOS_TEST_FOR_EXCEPTION
76 (mergeCount > numInputInds, std::logic_error, "mergeCount = " <<
77 mergeCount << " > numInputInds = " << numInputInds << ".");
78#endif // HAVE_TPETRA_DEBUG
79 return mergeCount;
80}
81
99template<class OrdinalType, class IndexType>
100IndexType
101countMergeSortedIndices (const OrdinalType curInds[],
102 const IndexType numCurInds,
103 const OrdinalType inputInds[],
104 const IndexType numInputInds)
105{
106 // Only count possible merges; don't merge yet. If the row
107 // doesn't have enough space, we want to return without side
108 // effects.
109 IndexType curPos = 0;
110 IndexType inPos = 0;
111 IndexType mergeCount = 0;
112 while (inPos < numInputInds && curPos < numCurInds) {
113 const OrdinalType inVal = inputInds[inPos];
114 const OrdinalType curVal = curInds[curPos];
115
116 if (curVal == inVal) { // can merge
117 ++mergeCount;
118 ++inPos; // go on to next input
119 } else if (curVal < inVal) {
120 ++curPos; // go on to next row entry
121 } else { // curVal > inVal
122 ++inPos; // can't merge it ever, since row entries sorted
123 }
124 }
125
126#ifdef HAVE_TPETRA_DEBUG
127 TEUCHOS_TEST_FOR_EXCEPTION
128 (inPos > numInputInds, std::logic_error, "inPos = " << inPos <<
129 " > numInputInds = " << numInputInds << ".");
130 TEUCHOS_TEST_FOR_EXCEPTION
131 (curPos > numCurInds, std::logic_error, "curPos = " << curPos <<
132 " > numCurInds = " << numCurInds << ".");
133 TEUCHOS_TEST_FOR_EXCEPTION
134 (mergeCount > numInputInds, std::logic_error, "mergeCount = " <<
135 mergeCount << " > numInputInds = " << numInputInds << ".");
136#endif // HAVE_TPETRA_DEBUG
137
138 // At this point, 2 situations are possible:
139 //
140 // 1. inPos == numInputInds: We looked at all inputs. Some
141 // (mergeCount of them) could have been merged.
142 // 2. inPos < numInputInds: We didn't get to look at all inputs.
143 // Since the inputs are sorted, we know that those inputs we
144 // didn't examine weren't mergeable.
145 //
146 // Either way, mergeCount gives the number of mergeable inputs.
147 return mergeCount;
148}
149
150
171template<class OrdinalType, class IndexType>
172std::pair<bool, IndexType>
173mergeSortedIndices (OrdinalType curInds[],
174 const IndexType midPos,
175 const IndexType endPos,
176 const OrdinalType inputInds[],
177 const IndexType numInputInds)
178{
179 // Optimize for the following cases, in decreasing order of
180 // optimization concern:
181 //
182 // a. Current row has allocated space but no entries
183 // b. All input indices already in the graph
184 //
185 // If the row has insufficient space for a merge, don't do
186 // anything! Just return an upper bound on the number of extra
187 // entries required to fit everything. This imposes extra cost,
188 // but correctly supports the count, allocate, fill, compute
189 // pattern. (If some entries were merged in and others weren't,
190 // how would you know which got merged in? CrsGraph insert is
191 // idempotent, but CrsMatrix insert does a += on the value and
192 // is therefore not idempotent.)
193 if (midPos == 0) {
194 // Current row has no entries, but may have preallocated space.
195 if (endPos >= numInputInds) {
196 // Sufficient space for new entries; copy directly.
197 for (IndexType k = 0; k < numInputInds; ++k) {
198 curInds[k] = inputInds[k];
199 }
200 std::sort (curInds, curInds + numInputInds);
201 return std::make_pair (true, numInputInds);
202 }
203 else { // not enough space
204 return std::make_pair (false, numInputInds);
205 }
206 }
207 else { // current row contains indices, requiring merge
208 // Only count possible merges; don't merge yet. If the row
209 // doesn't have enough space, we want to return without side
210 // effects.
211 const IndexType mergeCount =
213 inputInds,
214 numInputInds);
215 const IndexType extraSpaceNeeded = numInputInds - mergeCount;
216 const IndexType newRowLen = midPos + extraSpaceNeeded;
217 if (newRowLen > endPos) {
218 return std::make_pair (false, newRowLen);
219 }
220 else { // we have enough space; merge in
221 IndexType curPos = 0;
222 IndexType inPos = 0;
223 IndexType newPos = midPos;
224 while (inPos < numInputInds && curPos < midPos) {
225 const OrdinalType inVal = inputInds[inPos];
226 const OrdinalType curVal = curInds[curPos];
227
228 if (curVal == inVal) { // can merge
229 ++inPos; // merge and go on to next input
230 } else if (curVal < inVal) {
231 ++curPos; // go on to next row entry
232 } else { // curVal > inVal
233 // The input doesn't exist in the row.
234 // Copy it to the end; we'll sort it in later.
235 curInds[newPos] = inVal;
236 ++newPos;
237 ++inPos; // move on to next input
238 }
239 }
240
241 // If any inputs remain, and the current row has space for them,
242 // then copy them in. We'll sort them later.
243 for (; inPos < numInputInds && newPos < newRowLen; ++inPos, ++newPos) {
244 curInds[newPos] = inputInds[inPos];
245 }
246
247#ifdef HAVE_TPETRA_DEBUG
248 TEUCHOS_TEST_FOR_EXCEPTION
249 (newPos != newRowLen, std::logic_error, "mergeSortedIndices: newPos = "
250 << newPos << " != newRowLen = " << newRowLen << " = " << midPos <<
251 " + " << extraSpaceNeeded << ". Please report this bug to the Tpetra "
252 "developers.");
253#endif // HAVE_TPETRA_DEBUG
254
255 if (newPos != midPos) { // new entries at end; sort them in
256 // FIXME (mfh 03 Jan 2016) Rather than sorting, it would
257 // be faster (linear time) just to iterate backwards
258 // through the current entries, pushing them over to make
259 // room for unmerged input. However, I'm not so worried
260 // about the asymptotics here, because dense rows in a
261 // sparse matrix are ungood anyway.
262 std::sort (curInds, curInds + newPos);
263 }
264 return std::make_pair (true, newPos);
265 }
266 }
267}
268
269
291template<class OrdinalType, class IndexType>
292std::pair<bool, IndexType>
293mergeUnsortedIndices (OrdinalType curInds[],
294 const IndexType midPos,
295 const IndexType endPos,
296 const OrdinalType inputInds[],
297 const IndexType numInputInds)
298{
299 // Optimize for the following cases, in decreasing order of
300 // optimization concern:
301 //
302 // a. Current row has allocated space but no entries
303 // b. All input indices already in the graph
304 //
305 // If the row has insufficient space for a merge, don't do
306 // anything! Just return an upper bound on the number of extra
307 // entries required to fit everything. This imposes extra cost,
308 // but correctly supports the count, allocate, fill, compute
309 // pattern. (If some entries were merged in and others weren't,
310 // how would you know which got merged in? CrsGraph insert is
311 // idempotent, but CrsMatrix insert does a += on the value and
312 // is therefore not idempotent.)
313 if (midPos == 0) {
314 // Current row has no entries, but may have preallocated space.
315 if (endPos >= numInputInds) {
316 // Sufficient space for new entries; copy directly.
317 for (IndexType k = 0; k < numInputInds; ++k) {
318 curInds[k] = inputInds[k];
319 }
320 return std::make_pair (true, numInputInds);
321 }
322 else { // not enough space
323 return std::make_pair (false, numInputInds);
324 }
325 }
326 else { // current row contains indices, requiring merge
327 // Only count possible merges; don't merge yet. If the row
328 // doesn't have enough space, we want to return without side
329 // effects.
330 const IndexType mergeCount =
332 inputInds,
333 numInputInds);
334 const IndexType extraSpaceNeeded = numInputInds - mergeCount;
335 const IndexType newRowLen = midPos + extraSpaceNeeded;
336 if (newRowLen > endPos) {
337 return std::make_pair (false, newRowLen);
338 }
339 else { // we have enough space; merge in
340 // Iterate linearly over input. Scan current entries
341 // repeatedly. Add new entries at end.
342 IndexType newPos = midPos;
343 for (IndexType inPos = 0; inPos < numInputInds; ++inPos) {
344 const OrdinalType inVal = inputInds[inPos];
345 bool merged = false;
346 for (IndexType curPos = 0; curPos < midPos; ++curPos) {
347 if (curInds[curPos] == inVal) {
348 merged = true;
349 }
350 }
351 if (! merged) {
352 curInds[newPos] = inVal;
353 ++newPos;
354 }
355 }
356 return std::make_pair (true, newPos);
357 }
358 }
359}
360
385template<class OrdinalType, class ValueType, class IndexType>
386std::pair<bool, IndexType>
387mergeUnsortedIndicesAndValues (OrdinalType curInds[],
388 ValueType curVals[],
389 const IndexType midPos,
390 const IndexType endPos,
391 const OrdinalType inputInds[],
392 const ValueType inputVals[],
393 const IndexType numInputInds)
394{
395 // Optimize for the following cases, in decreasing order of
396 // optimization concern:
397 //
398 // a. Current row has allocated space but no entries
399 // b. All input indices already in the graph
400 //
401 // If the row has insufficient space for a merge, don't do
402 // anything! Just return an upper bound on the number of extra
403 // entries required to fit everything. This imposes extra cost,
404 // but correctly supports the count, allocate, fill, compute
405 // pattern. (If some entries were merged in and others weren't,
406 // how would you know which got merged in? CrsGraph insert is
407 // idempotent, but CrsMatrix insert does a += on the value and
408 // is therefore not idempotent.)
409 if (midPos == 0) {
410 // Current row has no entries, but may have preallocated space.
411 if (endPos >= numInputInds) {
412 // Sufficient space for new entries; copy directly.
413 for (IndexType k = 0; k < numInputInds; ++k) {
414 curInds[k] = inputInds[k];
415 curVals[k] = inputVals[k];
416 }
417 return std::make_pair (true, numInputInds);
418 }
419 else { // not enough space
420 return std::make_pair (false, numInputInds);
421 }
422 }
423 else { // current row contains indices, requiring merge
424 // Only count possible merges; don't merge yet. If the row
425 // doesn't have enough space, we want to return without side
426 // effects.
427 const IndexType mergeCount =
429 inputInds,
430 numInputInds);
431 const IndexType extraSpaceNeeded = numInputInds - mergeCount;
432 const IndexType newRowLen = midPos + extraSpaceNeeded;
433 if (newRowLen > endPos) {
434 return std::make_pair (false, newRowLen);
435 }
436 else { // we have enough space; merge in
437 // Iterate linearly over input. Scan current entries
438 // repeatedly. Add new entries at end.
439 IndexType newPos = midPos;
440 for (IndexType inPos = 0; inPos < numInputInds; ++inPos) {
441 const OrdinalType inInd = inputInds[inPos];
442 bool merged = false;
443 for (IndexType curPos = 0; curPos < midPos; ++curPos) {
444 if (curInds[curPos] == inInd) {
445 merged = true;
446 curVals[curPos] += inputVals[inPos];
447 }
448 }
449 if (! merged) {
450 curInds[newPos] = inInd;
451 curVals[newPos] = inputVals[inPos];
452 ++newPos;
453 }
454 }
455 return std::make_pair (true, newPos);
456 }
457 }
458}
459
460} // namespace Details
461} // namespace Tpetra
462
463#endif // TPETRA_DETAILS_MERGE_HPP
Nonmember function that computes a residual Computes R = B - A * X.
std::pair< bool, IndexType > mergeSortedIndices(OrdinalType curInds[], const IndexType midPos, const IndexType endPos, const OrdinalType inputInds[], const IndexType numInputInds)
Attempt to merge the input indices into the current row's column indices, assuming that both the curr...
IndexType countMergeSortedIndices(const OrdinalType curInds[], const IndexType numCurInds, const OrdinalType inputInds[], const IndexType numInputInds)
Count the number of column indices that can be merged into the current row, assuming that both the cu...
std::pair< bool, IndexType > mergeUnsortedIndices(OrdinalType curInds[], const IndexType midPos, const IndexType endPos, const OrdinalType inputInds[], const IndexType numInputInds)
Attempt to merge the input indices into the current row's column indices, assuming that both the curr...
IndexType countMergeUnsortedIndices(const OrdinalType curInds[], const IndexType numCurInds, const OrdinalType inputInds[], const IndexType numInputInds)
Count the number of column indices that can be merged into the current row, assuming that both the cu...
std::pair< bool, IndexType > mergeUnsortedIndicesAndValues(OrdinalType curInds[], ValueType curVals[], const IndexType midPos, const IndexType endPos, const OrdinalType inputInds[], const ValueType inputVals[], const IndexType numInputInds)
Attempt to merge the input indices and values into the current row's column indices and corresponding...
Namespace Tpetra contains the class and methods constituting the Tpetra library.