34 using ExecutionSpace =
typename DeviceType::execution_space;
35 using TeamPolicy = Kokkos::TeamPolicy<ExecutionSpace>;
36 using TeamMember =
typename TeamPolicy::member_type;
38 using IntegralViewType = Kokkos::View<typename RankExpander<Scalar, integralViewRank>::value_type, DeviceType>;
39 IntegralViewType integralView_;
46 int leftComponentSpan_;
47 int rightComponentSpan_;
48 int numTensorComponents_;
49 int leftFieldOrdinalOffset_;
50 int rightFieldOrdinalOffset_;
51 bool forceNonSpecialized_;
53 size_t fad_size_output_ = 0;
55 Kokkos::Array<int, 7> offsetsForComponentOrdinal_;
59 Kokkos::Array<int,Parameters::MaxTensorComponents> leftFieldBounds_;
60 Kokkos::Array<int,Parameters::MaxTensorComponents> rightFieldBounds_;
61 Kokkos::Array<int,Parameters::MaxTensorComponents> pointBounds_;
63 Kokkos::Array<int,Parameters::MaxTensorComponents> leftFieldRelativeEnumerationSpans_;
64 Kokkos::Array<int,Parameters::MaxTensorComponents> rightFieldRelativeEnumerationSpans_;
77 int leftFieldOrdinalOffset,
78 int rightFieldOrdinalOffset,
79 bool forceNonSpecialized)
81 integralView_(integralData.template getUnderlyingView<integralViewRank>()),
82 leftComponent_(leftComponent),
83 composedTransform_(composedTransform),
84 rightComponent_(rightComponent),
85 cellMeasures_(cellMeasures),
88 leftComponentSpan_(leftComponent.
extent_int(2)),
89 rightComponentSpan_(rightComponent.
extent_int(2)),
91 leftFieldOrdinalOffset_(leftFieldOrdinalOffset),
92 rightFieldOrdinalOffset_(rightFieldOrdinalOffset),
93 forceNonSpecialized_(forceNonSpecialized)
95 INTREPID2_TEST_FOR_EXCEPTION(numTensorComponents_ != rightComponent_.numTensorComponents(), std::invalid_argument,
"Left and right components must have matching number of tensorial components");
98 const int FIELD_DIM = 0;
99 const int POINT_DIM = 1;
103 for (
int r=0; r<numTensorComponents_; r++)
105 leftFieldBounds_[r] = leftComponent_.getTensorComponent(r).extent_int(FIELD_DIM);
106 maxFieldsLeft_ = std::max(maxFieldsLeft_, leftFieldBounds_[r]);
107 rightFieldBounds_[r] = rightComponent_.getTensorComponent(r).extent_int(FIELD_DIM);
108 maxFieldsRight_ = std::max(maxFieldsRight_, rightFieldBounds_[r]);
109 pointBounds_[r] = leftComponent_.getTensorComponent(r).extent_int(POINT_DIM);
110 maxPointCount_ = std::max(maxPointCount_, pointBounds_[r]);
114 int leftRelativeEnumerationSpan = 1;
115 int rightRelativeEnumerationSpan = 1;
116 for (
int startingComponent=numTensorComponents_-2; startingComponent>=0; startingComponent--)
118 leftRelativeEnumerationSpan *= leftFieldBounds_[startingComponent];
119 rightRelativeEnumerationSpan *= rightFieldBounds_[startingComponent];
120 leftFieldRelativeEnumerationSpans_ [startingComponent] = leftRelativeEnumerationSpan;
121 rightFieldRelativeEnumerationSpans_[startingComponent] = rightRelativeEnumerationSpan;
127 const bool allocateFadStorage = !(std::is_standard_layout<Scalar>::value && std::is_trivial<Scalar>::value);
128 if (allocateFadStorage)
133 const int R = numTensorComponents_ - 1;
136 int allocationSoFar = 0;
137 offsetsForComponentOrdinal_[R] = allocationSoFar;
140 for (
int r=R-1; r>0; r--)
145 num_ij *= leftFields * rightFields;
147 offsetsForComponentOrdinal_[r] = allocationSoFar;
148 allocationSoFar += num_ij;
150 offsetsForComponentOrdinal_[0] = allocationSoFar;
153 template<
size_t maxComponents,
size_t numComponents = maxComponents>
154 KOKKOS_INLINE_FUNCTION
155 int incrementArgument( Kokkos::Array<int,maxComponents> &arguments,
156 const Kokkos::Array<int,maxComponents> &bounds)
const
158 if (numComponents == 0)
164 int r =
static_cast<int>(numComponents - 1);
165 while (arguments[r] + 1 >= bounds[r])
171 if (r >= 0) ++arguments[r];
177 KOKKOS_INLINE_FUNCTION
179 const Kokkos::Array<int,Parameters::MaxTensorComponents> &bounds,
180 const int &numComponents)
const
182 if (numComponents == 0)
return -1;
183 int r =
static_cast<int>(numComponents - 1);
184 while (arguments[r] + 1 >= bounds[r])
190 if (r >= 0) ++arguments[r];
194 template<
size_t maxComponents,
size_t numComponents = maxComponents>
195 KOKKOS_INLINE_FUNCTION
196 int nextIncrementResult(
const Kokkos::Array<int,maxComponents> &arguments,
197 const Kokkos::Array<int,maxComponents> &bounds)
const
199 if (numComponents == 0)
205 int r =
static_cast<int>(numComponents - 1);
206 while (arguments[r] + 1 >= bounds[r])
216 KOKKOS_INLINE_FUNCTION
218 const Kokkos::Array<int,Parameters::MaxTensorComponents> &bounds,
219 const int &numComponents)
const
221 if (numComponents == 0)
return -1;
222 int r = numComponents - 1;
223 while (arguments[r] + 1 >= bounds[r])
231 template<
size_t maxComponents,
size_t numComponents = maxComponents>
232 KOKKOS_INLINE_FUNCTION
233 int relativeEnumerationIndex(
const Kokkos::Array<int,maxComponents> &arguments,
234 const Kokkos::Array<int,maxComponents> &bounds,
235 const int startIndex)
const
238 if (numComponents == 0)
244 int enumerationIndex = 0;
245 for (
size_t r=numComponents-1; r>
static_cast<size_t>(startIndex); r--)
247 enumerationIndex += arguments[r];
248 enumerationIndex *= bounds[r-1];
250 enumerationIndex += arguments[startIndex];
251 return enumerationIndex;
265 KOKKOS_INLINE_FUNCTION
268 constexpr int numTensorComponents = 3;
270 Kokkos::Array<int,numTensorComponents> pointBounds;
271 Kokkos::Array<int,numTensorComponents> leftFieldBounds;
272 Kokkos::Array<int,numTensorComponents> rightFieldBounds;
273 for (
unsigned r=0; r<numTensorComponents; r++)
275 pointBounds[r] = pointBounds_[r];
276 leftFieldBounds[r] = leftFieldBounds_[r];
277 rightFieldBounds[r] = rightFieldBounds_[r];
280 const int cellDataOrdinal = teamMember.league_rank();
281 const int numThreads = teamMember.team_size();
282 const int scratchViewSize = offsetsForComponentOrdinal_[0];
284 Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged> scratchView;
285 Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged> pointWeights;
286 Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged> leftFields_x, leftFields_y, leftFields_z, rightFields_x, rightFields_y, rightFields_z;
287 if (fad_size_output_ > 0) {
288 scratchView = Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), scratchViewSize * numThreads, fad_size_output_);
289 pointWeights = Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged> (teamMember.team_shmem(), composedTransform_.extent_int(1), fad_size_output_);
290 leftFields_x = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), leftFieldBounds[0], pointBounds[0], fad_size_output_);
291 rightFields_x = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), rightFieldBounds[0], pointBounds[0], fad_size_output_);
292 leftFields_y = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), leftFieldBounds[1], pointBounds[1], fad_size_output_);
293 rightFields_y = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), rightFieldBounds[1], pointBounds[1], fad_size_output_);
294 leftFields_z = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), leftFieldBounds[2], pointBounds[2], fad_size_output_);
295 rightFields_z = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), rightFieldBounds[2], pointBounds[2], fad_size_output_);
298 scratchView = Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), scratchViewSize * numThreads);
299 pointWeights = Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged> (teamMember.team_shmem(), composedTransform_.extent_int(1));
300 leftFields_x = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), leftFieldBounds[0], pointBounds[0]);
301 rightFields_x = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), rightFieldBounds[0], pointBounds[0]);
302 leftFields_y = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), leftFieldBounds[1], pointBounds[1]);
303 rightFields_y = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), rightFieldBounds[1], pointBounds[1]);
304 leftFields_z = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), leftFieldBounds[2], pointBounds[2]);
305 rightFields_z = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), rightFieldBounds[2], pointBounds[2]);
311 constexpr int R = numTensorComponents - 1;
313 const int composedTransformRank = composedTransform_.rank();
315 for (
int a_component=0; a_component < leftComponentSpan_; a_component++)
317 const int a = a_offset_ + a_component;
318 for (
int b_component=0; b_component < rightComponentSpan_; b_component++)
320 const int b = b_offset_ + b_component;
325 const int numLeftFieldsFinal = leftFinalComponent.
extent_int(0);
326 const int numRightFieldsFinal = rightFinalComponent.
extent_int(0);
335 const int maxFields = (maxFieldsLeft_ > maxFieldsRight_) ? maxFieldsLeft_ : maxFieldsRight_;
336 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,maxFields * maxPointCount_), [&] (
const int& fieldOrdinalPointOrdinal) {
337 const int fieldOrdinal = fieldOrdinalPointOrdinal % maxFields;
338 const int pointOrdinal = fieldOrdinalPointOrdinal / maxFields;
339 if ((fieldOrdinal < leftTensorComponent_x.
extent_int(0)) && (pointOrdinal < leftTensorComponent_x.
extent_int(1)))
341 const int leftRank = leftTensorComponent_x.
rank();
342 leftFields_x(fieldOrdinal,pointOrdinal) = (leftRank == 2) ? leftTensorComponent_x(fieldOrdinal,pointOrdinal) : leftTensorComponent_x(fieldOrdinal,pointOrdinal,a_component);
344 if ((fieldOrdinal < leftTensorComponent_y.
extent_int(0)) && (pointOrdinal < leftTensorComponent_y.
extent_int(1)))
346 const int leftRank = leftTensorComponent_y.
rank();
347 leftFields_y(fieldOrdinal,pointOrdinal) = (leftRank == 2) ? leftTensorComponent_y(fieldOrdinal,pointOrdinal) : leftTensorComponent_y(fieldOrdinal,pointOrdinal,a_component);
349 if ((fieldOrdinal < leftTensorComponent_z.
extent_int(0)) && (pointOrdinal < leftTensorComponent_z.
extent_int(1)))
351 const int leftRank = leftTensorComponent_z.
rank();
352 leftFields_z(fieldOrdinal,pointOrdinal) = (leftRank == 2) ? leftTensorComponent_z(fieldOrdinal,pointOrdinal) : leftTensorComponent_z(fieldOrdinal,pointOrdinal,a_component);
354 if ((fieldOrdinal < rightTensorComponent_x.
extent_int(0)) && (pointOrdinal < rightTensorComponent_x.
extent_int(1)))
356 const int rightRank = rightTensorComponent_x.
rank();
357 rightFields_x(fieldOrdinal,pointOrdinal) = (rightRank == 2) ? rightTensorComponent_x(fieldOrdinal,pointOrdinal) : rightTensorComponent_x(fieldOrdinal,pointOrdinal,b_component);
359 if ((fieldOrdinal < rightTensorComponent_y.
extent_int(0)) && (pointOrdinal < rightTensorComponent_y.
extent_int(1)))
361 const int rightRank = rightTensorComponent_y.
rank();
362 rightFields_y(fieldOrdinal,pointOrdinal) = (rightRank == 2) ? rightTensorComponent_y(fieldOrdinal,pointOrdinal) : rightTensorComponent_y(fieldOrdinal,pointOrdinal,b_component);
364 if ((fieldOrdinal < rightTensorComponent_z.
extent_int(0)) && (pointOrdinal < rightTensorComponent_z.
extent_int(1)))
366 const int rightRank = rightTensorComponent_z.
rank();
367 rightFields_z(fieldOrdinal,pointOrdinal) = (rightRank == 2) ? rightTensorComponent_z(fieldOrdinal,pointOrdinal) : rightTensorComponent_z(fieldOrdinal,pointOrdinal,b_component);
371 if (composedTransform_.underlyingMatchesLogical())
373 if (composedTransformRank == 4)
375 const auto & composedTransformView = composedTransform_.getUnderlyingView4();
376 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,composedTransformView.extent_int(1)), [&] (
const int& pointOrdinal) {
377 pointWeights(pointOrdinal) = composedTransformView(cellDataOrdinal,pointOrdinal,a,b) * cellMeasures_(cellDataOrdinal,pointOrdinal);
382 const auto & composedTransformView = composedTransform_.getUnderlyingView2();
383 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,composedTransformView.extent_int(1)), [&] (
const int& pointOrdinal) {
384 pointWeights(pointOrdinal) = composedTransformView(cellDataOrdinal,pointOrdinal) * cellMeasures_(cellDataOrdinal,pointOrdinal);
390 if (composedTransformRank == 4)
392 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,composedTransform_.extent_int(1)), [&] (
const int& pointOrdinal) {
393 pointWeights(pointOrdinal) = composedTransform_(cellDataOrdinal,pointOrdinal,a,b) * cellMeasures_(cellDataOrdinal,pointOrdinal);
398 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,composedTransform_.extent_int(1)), [&] (
const int& pointOrdinal) {
399 pointWeights(pointOrdinal) = composedTransform_(cellDataOrdinal,pointOrdinal) * cellMeasures_(cellDataOrdinal,pointOrdinal);
407 teamMember.team_barrier();
410 const int scratchOffsetForThread = teamMember.team_rank() * scratchViewSize;
411 for (
int i=scratchOffsetForThread; i<scratchOffsetForThread+scratchViewSize; i++)
413 scratchView(i) = 0.0;
417 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,numLeftFieldsFinal * numRightFieldsFinal), [&] (
const int& leftRightFieldEnumeration) {
418 const int iz = leftRightFieldEnumeration % numLeftFieldsFinal;
419 const int jz = leftRightFieldEnumeration / numLeftFieldsFinal;
423 Kokkos::Array<int,numTensorComponents> leftFieldArguments;
424 Kokkos::Array<int,numTensorComponents> rightFieldArguments;
425 rightFieldArguments[R] = jz;
426 leftFieldArguments[R] = iz;
428 Kokkos::Array<int,numTensorComponents> pointArguments;
429 for (
int i=0; i<numTensorComponents; i++)
431 pointArguments[i] = 0;
434 for (
int lx=0; lx<pointBounds[0]; lx++)
436 pointArguments[0] = lx;
440 const int Gy_start_index = scratchOffsetForThread + offsetsForComponentOrdinal_[1];
441 const int Gy_end_index = scratchOffsetForThread + offsetsForComponentOrdinal_[0];
443 for (
int Gy_index=Gy_start_index; Gy_index < Gy_end_index; Gy_index++)
445 scratchView(Gy_index) = 0;
448 for (
int ly=0; ly<pointBounds[1]; ly++)
450 pointArguments[1] = ly;
452 Scalar * Gz = &scratchView(scratchOffsetForThread + offsetsForComponentOrdinal_[R]);
455 for (
int lz=0; lz < pointBounds[2]; lz++)
457 const Scalar & leftValue = leftFields_z(iz,lz);
458 const Scalar & rightValue = rightFields_z(jz,lz);
460 pointArguments[2] = lz;
461 int pointEnumerationIndex = relativeEnumerationIndex<numTensorComponents>(pointArguments, pointBounds, 0);
463 *Gz += leftValue * pointWeights(pointEnumerationIndex) * rightValue;
468 for (
int iy=0; iy<leftFieldBounds_[1]; iy++)
470 leftFieldArguments[1] = iy;
471 const int leftEnumerationIndex_y = relativeEnumerationIndex<numTensorComponents,R>(leftFieldArguments, leftFieldBounds, 1);
473 const Scalar & leftValue = leftFields_y(iy,ly);
475 for (
int jy=0; jy<rightFieldBounds_[1]; jy++)
477 rightFieldArguments[1] = jy;
479 const int rightEnumerationIndex_y = relativeEnumerationIndex<numTensorComponents,R>(rightFieldArguments, rightFieldBounds, 1);
480 const Scalar & rightValue = rightFields_y(jy,ly);
482 const int & rightEnumerationSpan_y = rightFieldRelativeEnumerationSpans_[1];
483 const int Gy_index = leftEnumerationIndex_y * rightEnumerationSpan_y + rightEnumerationIndex_y;
485 const int Gz_index = 0;
486 const Scalar & Gz = scratchView(scratchOffsetForThread + offsetsForComponentOrdinal_[2] + Gz_index);
488 Scalar & Gy = scratchView(scratchOffsetForThread + offsetsForComponentOrdinal_[1] + Gy_index);
490 Gy += leftValue * Gz * rightValue;
495 for (
int ix=0; ix<leftFieldBounds_[0]; ix++)
497 leftFieldArguments[0] = ix;
498 const Scalar & leftValue = leftFields_x(ix,lx);
500 for (
int iy=0; iy<leftFieldBounds_[1]; iy++)
502 leftFieldArguments[1] = iy;
504 const int leftEnumerationIndex_y = relativeEnumerationIndex<numTensorComponents,R>(leftFieldArguments, leftFieldBounds, 1);
506 for (
int jx=0; jx<rightFieldBounds_[0]; jx++)
508 rightFieldArguments[0] = jx;
509 const Scalar & rightValue = rightFields_x(jx,lx);
511 for (
int jy=0; jy<rightFieldBounds_[1]; jy++)
513 rightFieldArguments[1] = jy;
514 const int rightEnumerationIndex_y = relativeEnumerationIndex<numTensorComponents,R>(rightFieldArguments, rightFieldBounds, 1);
516 const int rightEnumerationSpan_y = rightFieldRelativeEnumerationSpans_[1];
518 const int Gy_index = leftEnumerationIndex_y * rightEnumerationSpan_y + rightEnumerationIndex_y;
519 Scalar & Gy = scratchView(scratchOffsetForThread + offsetsForComponentOrdinal_[1] + Gy_index);
522 int leftEnumerationIndex = relativeEnumerationIndex<numTensorComponents>( leftFieldArguments, leftFieldBounds, 0);
523 int rightEnumerationIndex = relativeEnumerationIndex<numTensorComponents>(rightFieldArguments, rightFieldBounds, 0);
524 const int leftFieldIndex = leftEnumerationIndex + leftFieldOrdinalOffset_;
525 const int rightFieldIndex = rightEnumerationIndex + rightFieldOrdinalOffset_;
527 if (integralViewRank == 3)
549 integralView_.access(cellDataOrdinal,leftFieldIndex,rightFieldIndex) += leftValue * Gy * rightValue;
554 integralView_.access(leftFieldIndex,rightFieldIndex,0) += leftValue * Gy * rightValue;
568 template<
size_t numTensorComponents>
569 KOKKOS_INLINE_FUNCTION
570 void run(
const TeamMember & teamMember )
const
572 Kokkos::Array<int,numTensorComponents> pointBounds;
573 Kokkos::Array<int,numTensorComponents> leftFieldBounds;
574 Kokkos::Array<int,numTensorComponents> rightFieldBounds;
575 for (
unsigned r=0; r<numTensorComponents; r++)
577 pointBounds[r] = pointBounds_[r];
578 leftFieldBounds[r] = leftFieldBounds_[r];
579 rightFieldBounds[r] = rightFieldBounds_[r];
582 const int cellDataOrdinal = teamMember.league_rank();
583 const int numThreads = teamMember.team_size();
584 const int scratchViewSize = offsetsForComponentOrdinal_[0];
585 Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged> scratchView;
586 Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged> pointWeights;
587 Kokkos::View<Scalar***, DeviceType, Kokkos::MemoryUnmanaged> leftFields, rightFields;
588 if (fad_size_output_ > 0) {
589 scratchView = Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), scratchViewSize * numThreads, fad_size_output_);
590 pointWeights = Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged> (teamMember.team_shmem(), composedTransform_.
extent_int(1), fad_size_output_);
591 leftFields = Kokkos::View<Scalar***, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), numTensorComponents, maxFieldsLeft_, maxPointCount_, fad_size_output_);
592 rightFields = Kokkos::View<Scalar***, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), numTensorComponents, maxFieldsRight_, maxPointCount_, fad_size_output_);
595 scratchView = Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), scratchViewSize*numThreads);
596 pointWeights = Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged> (teamMember.team_shmem(), composedTransform_.
extent_int(1));
597 leftFields = Kokkos::View<Scalar***, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), numTensorComponents, maxFieldsLeft_, maxPointCount_);
598 rightFields = Kokkos::View<Scalar***, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), numTensorComponents, maxFieldsRight_, maxPointCount_);
604 constexpr int R = numTensorComponents - 1;
606 const int composedTransformRank = composedTransform_.rank();
608 for (
int a_component=0; a_component < leftComponentSpan_; a_component++)
610 const int a = a_offset_ + a_component;
611 for (
int b_component=0; b_component < rightComponentSpan_; b_component++)
613 const int b = b_offset_ + b_component;
615 const Data<Scalar,DeviceType> & leftFinalComponent = leftComponent_.getTensorComponent(R);
616 const Data<Scalar,DeviceType> & rightFinalComponent = rightComponent_.getTensorComponent(R);
618 const int numLeftFieldsFinal = leftFinalComponent.extent_int(0);
619 const int numRightFieldsFinal = rightFinalComponent.extent_int(0);
621 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,numTensorComponents), [&] (
const int& r) {
622 const Data<Scalar,DeviceType> & leftTensorComponent = leftComponent_.getTensorComponent(r);
623 const Data<Scalar,DeviceType> & rightTensorComponent = rightComponent_.getTensorComponent(r);
624 const int leftFieldCount = leftTensorComponent.extent_int(0);
625 const int pointCount = leftTensorComponent.extent_int(1);
626 const int leftRank = leftTensorComponent.rank();
627 const int rightFieldCount = rightTensorComponent.extent_int(0);
628 const int rightRank = rightTensorComponent.rank();
629 for (
int fieldOrdinal=0; fieldOrdinal<maxFieldsLeft_; fieldOrdinal++)
632 const int fieldAddress = (fieldOrdinal < leftFieldCount) ? fieldOrdinal : leftFieldCount - 1;
633 for (
int pointOrdinal=0; pointOrdinal<maxPointCount_; pointOrdinal++)
635 const int pointAddress = (pointOrdinal < pointCount) ? pointOrdinal : pointCount - 1;
636 leftFields(r,fieldAddress,pointAddress) = (leftRank == 2) ? leftTensorComponent(fieldAddress,pointAddress) : leftTensorComponent(fieldAddress,pointAddress,a_component);
639 for (
int fieldOrdinal=0; fieldOrdinal<maxFieldsRight_; fieldOrdinal++)
642 const int fieldAddress = (fieldOrdinal < rightFieldCount) ? fieldOrdinal : rightFieldCount - 1;
643 for (
int pointOrdinal=0; pointOrdinal<maxPointCount_; pointOrdinal++)
645 const int pointAddress = (pointOrdinal < pointCount) ? pointOrdinal : pointCount - 1;
646 rightFields(r,fieldAddress,pointAddress) = (rightRank == 2) ? rightTensorComponent(fieldAddress,pointAddress) : rightTensorComponent(fieldAddress,pointAddress,b_component);
651 if (composedTransformRank == 4)
653 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,composedTransform_.extent_int(1)), [&] (
const int& pointOrdinal) {
654 pointWeights(pointOrdinal) = composedTransform_(cellDataOrdinal,pointOrdinal,a,b) * cellMeasures_(cellDataOrdinal,pointOrdinal);
659 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,composedTransform_.extent_int(1)), [&] (
const int& pointOrdinal) {
660 pointWeights(pointOrdinal) = composedTransform_(cellDataOrdinal,pointOrdinal) * cellMeasures_(cellDataOrdinal,pointOrdinal);
666 teamMember.team_barrier();
668 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,numLeftFieldsFinal * numRightFieldsFinal), [&] (
const int& leftRightFieldEnumeration) {
669 const int scratchOffsetForThread = teamMember.team_rank() * scratchViewSize;
670 const int i_R = leftRightFieldEnumeration % numLeftFieldsFinal;
671 const int j_R = leftRightFieldEnumeration / numLeftFieldsFinal;
675 Kokkos::Array<int,numTensorComponents> leftFieldArguments;
676 Kokkos::Array<int,numTensorComponents> rightFieldArguments;
677 rightFieldArguments[R] = j_R;
678 leftFieldArguments[R] = i_R;
681 for (
int i=scratchOffsetForThread; i<scratchOffsetForThread+scratchViewSize; i++)
683 scratchView(i) = 0.0;
685 Kokkos::Array<int,numTensorComponents> pointArguments;
686 for (
unsigned i=0; i<numTensorComponents; i++)
688 pointArguments[i] = 0;
695 const int pointBounds_R = pointBounds[R];
696 int & pointArgument_R = pointArguments[R];
697 for (pointArgument_R=0; pointArgument_R < pointBounds_R; pointArgument_R++)
702 G_R = &scratchView(scratchOffsetForThread + offsetsForComponentOrdinal_[R]);
706 const int leftFieldIndex = i_R + leftFieldOrdinalOffset_;
707 const int rightFieldIndex = j_R + rightFieldOrdinalOffset_;
709 if (integralViewRank == 3)
712 G_R = &integralView_.access(cellDataOrdinal,leftFieldIndex,rightFieldIndex);
717 G_R = &integralView_.access(leftFieldIndex,rightFieldIndex,0);
721 const int & pointIndex_R = pointArguments[R];
723 const Scalar & leftValue = leftFields(R,i_R,pointIndex_R);
724 const Scalar & rightValue = rightFields(R,j_R,pointIndex_R);
726 int pointEnumerationIndex = relativeEnumerationIndex<numTensorComponents>(pointArguments, pointBounds, 0);
728 *G_R += leftValue * pointWeights(pointEnumerationIndex) * rightValue;
733 const int r_next = nextIncrementResult<numTensorComponents,numTensorComponents-1>(pointArguments, pointBounds);
734 const int r_min = (r_next >= 0) ? r_next : 0;
736 for (
int s=R-1; s>=r_min; s--)
738 const int & pointIndex_s = pointArguments[s];
741 for (
int s1=s; s1<R; s1++)
743 leftFieldArguments[s1] = 0;
747 const int & i_s = leftFieldArguments[s];
748 const int & j_s = rightFieldArguments[s];
751 const int & rightEnumerationSpan_s = rightFieldRelativeEnumerationSpans_[s];
752 const int & rightEnumerationSpan_sp = rightFieldRelativeEnumerationSpans_[s+1];
756 const int leftEnumerationIndex_s = relativeEnumerationIndex<numTensorComponents,R>(leftFieldArguments, leftFieldBounds, s);
759 const int leftEnumerationIndex_sp = (s+1 == R) ? 0 : relativeEnumerationIndex<numTensorComponents,R>(leftFieldArguments, leftFieldBounds, s+1);
761 const Scalar & leftValue = leftFields(s,i_s,pointIndex_s);
763 for (
int s1=s; s1<R; s1++)
765 rightFieldArguments[s1] = 0;
770 const int rightEnumerationIndex_s = relativeEnumerationIndex<numTensorComponents,R>(rightFieldArguments, rightFieldBounds, s);
773 const int rightEnumerationIndex_sp = (s+1 == R) ? 0 : relativeEnumerationIndex<numTensorComponents,R>(rightFieldArguments, rightFieldBounds, s+1);
775 const Scalar & rightValue = rightFields(s,j_s,pointIndex_s);
777 const int G_s_index = leftEnumerationIndex_s * rightEnumerationSpan_s + rightEnumerationIndex_s;
783 const int G_sp_index = leftEnumerationIndex_sp * rightEnumerationSpan_sp + rightEnumerationIndex_sp;
785 const Scalar & G_sp = scratchView(scratchOffsetForThread + offsetsForComponentOrdinal_[s+1] + G_sp_index);
790 G_s = &scratchView(scratchOffsetForThread + offsetsForComponentOrdinal_[s] + G_s_index);
795 int leftEnumerationIndex = relativeEnumerationIndex<numTensorComponents>( leftFieldArguments, leftFieldBounds, 0);
796 int rightEnumerationIndex = relativeEnumerationIndex<numTensorComponents>(rightFieldArguments, rightFieldBounds, 0);
797 const int leftFieldIndex = leftEnumerationIndex + leftFieldOrdinalOffset_;
798 const int rightFieldIndex = rightEnumerationIndex + rightFieldOrdinalOffset_;
800 if (integralViewRank == 3)
803 G_s = &integralView_.access(cellDataOrdinal,leftFieldIndex,rightFieldIndex);
808 G_s = &integralView_.access(leftFieldIndex,rightFieldIndex,0);
812 *G_s += leftValue * G_sp * rightValue;
817 sRight = incrementArgument<numTensorComponents,R>(rightFieldArguments, rightFieldBounds);
821 sLeft = incrementArgument<numTensorComponents,R>(leftFieldArguments, leftFieldBounds);
828 const int endIndex = scratchOffsetForThread + offsetsForComponentOrdinal_[r_min];
829 for (
int i=scratchOffsetForThread; i<endIndex; i++)
831 scratchView(i) = 0.0;
838 r = incrementArgument<numTensorComponents,numTensorComponents-1>(pointArguments, pointBounds);
846 KOKKOS_INLINE_FUNCTION
847 void operator()(
const TeamMember & teamMember )
const
849 switch (numTensorComponents_)
851 case 1: run<1>(teamMember);
break;
852 case 2: run<2>(teamMember);
break;
854 if (forceNonSpecialized_)
859 case 4: run<4>(teamMember);
break;
860 case 5: run<5>(teamMember);
break;
861 case 6: run<6>(teamMember);
break;
862 case 7: run<7>(teamMember);
break;
863#ifdef INTREPID2_HAVE_DEBUG
876 const int R = numTensorComponents_ - 1;
880 const int flopsPerPoint_ab = cellMeasures_.numTensorComponents();
882 for (
int a_component=0; a_component < leftComponentSpan_; a_component++)
884 for (
int b_component=0; b_component < rightComponentSpan_; b_component++)
889 const int numLeftFieldsFinal = leftFinalComponent.
extent_int(0);
890 const int numRightFieldsFinal = rightFinalComponent.
extent_int(0);
892 flopCount += flopsPerPoint_ab * cellMeasures_.extent_int(1);
894 int flopsPer_i_R_j_R = 0;
898 Kokkos::Array<int,Parameters::MaxTensorComponents> leftFieldArguments;
899 leftFieldArguments[R] = 0;
901 Kokkos::Array<int,Parameters::MaxTensorComponents> pointArguments;
902 for (
int i=0; i<numTensorComponents_; i++)
904 pointArguments[i] = 0;
909 Kokkos::Array<int,Parameters::MaxTensorComponents> rightFieldArguments;
910 rightFieldArguments[R] = 0;
916 for (pointArguments[R]=0; pointArguments[R] < pointBounds_[R]; pointArguments[R]++)
918 flopsPer_i_R_j_R += 4;
926 const int r_next = nextIncrementResult(pointArguments, pointBounds_, numTensorComponents_);
927 const int r_min = (r_next >= 0) ? r_next : 0;
929 for (
int s=R-1; s>=r_min; s--)
932 int numLeftIterations = leftFieldRelativeEnumerationSpans_[s];
933 int numRightIterations = rightFieldRelativeEnumerationSpans_[s];
935 flopsPer_i_R_j_R += numLeftIterations * numRightIterations * 3;
939 r = incrementArgument(pointArguments, pointBounds_, numTensorComponents_);
942 flopCount += flopsPer_i_R_j_R * numLeftFieldsFinal * numRightFieldsFinal;
950 int teamSize(
const int &maxTeamSizeFromKokkos)
const
952 const int R = numTensorComponents_ - 1;
953 const int threadParallelismExpressed = leftFieldBounds_[R] * rightFieldBounds_[R];
954 return std::min(maxTeamSizeFromKokkos, threadParallelismExpressed);
961 size_t shmem_size = 0;
963 if (fad_size_output_ > 0)
965 shmem_size += Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size(offsetsForComponentOrdinal_[0] * team_size, fad_size_output_);
966 shmem_size += Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size(composedTransform_.extent_int(1), fad_size_output_);
967 shmem_size += Kokkos::View<Scalar***, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size(numTensorComponents_, maxFieldsLeft_, maxPointCount_, fad_size_output_);
968 shmem_size += Kokkos::View<Scalar***, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size(numTensorComponents_, maxFieldsRight_, maxPointCount_, fad_size_output_);
972 shmem_size += Kokkos::View<Scalar*, DeviceType>::shmem_size(offsetsForComponentOrdinal_[0] * team_size);
973 shmem_size += Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size(composedTransform_.extent_int(1));
974 shmem_size += Kokkos::View<Scalar***, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size(numTensorComponents_, maxFieldsLeft_, maxPointCount_);
975 shmem_size += Kokkos::View<Scalar***, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size(numTensorComponents_, maxFieldsRight_, maxPointCount_);
992 class F_IntegratePointValueCache
994 using ExecutionSpace =
typename DeviceType::execution_space;
995 using TeamPolicy = Kokkos::TeamPolicy<DeviceType>;
996 using TeamMember =
typename TeamPolicy::member_type;
998 using IntegralViewType = Kokkos::View<typename RankExpander<Scalar, integralViewRank>::value_type, DeviceType>;
999 IntegralViewType integralView_;
1006 int leftComponentSpan_;
1007 int rightComponentSpan_;
1008 int numTensorComponents_;
1009 int leftFieldOrdinalOffset_;
1010 int rightFieldOrdinalOffset_;
1012 size_t fad_size_output_ = 0;
1016 Kokkos::Array<int,Parameters::MaxTensorComponents> leftFieldBounds_;
1017 Kokkos::Array<int,Parameters::MaxTensorComponents> rightFieldBounds_;
1018 Kokkos::Array<int,Parameters::MaxTensorComponents> pointBounds_;
1021 int maxFieldsRight_;
1031 int leftFieldOrdinalOffset,
1032 int rightFieldOrdinalOffset)
1034 integralView_(integralData.template getUnderlyingView<integralViewRank>()),
1035 leftComponent_(leftComponent),
1036 composedTransform_(composedTransform),
1037 rightComponent_(rightComponent),
1038 cellMeasures_(cellMeasures),
1039 a_offset_(a_offset),
1040 b_offset_(b_offset),
1041 leftComponentSpan_(leftComponent.
extent_int(2)),
1042 rightComponentSpan_(rightComponent.
extent_int(2)),
1044 leftFieldOrdinalOffset_(leftFieldOrdinalOffset),
1045 rightFieldOrdinalOffset_(rightFieldOrdinalOffset)
1047 INTREPID2_TEST_FOR_EXCEPTION(numTensorComponents_ != rightComponent_.numTensorComponents(), std::invalid_argument,
"Left and right components must have matching number of tensorial components");
1049 const int FIELD_DIM = 0;
1050 const int POINT_DIM = 1;
1052 maxFieldsRight_ = 0;
1054 for (
int r=0; r<numTensorComponents_; r++)
1056 leftFieldBounds_[r] = leftComponent_.getTensorComponent(r).extent_int(FIELD_DIM);
1057 maxFieldsLeft_ = std::max(maxFieldsLeft_, leftFieldBounds_[r]);
1058 rightFieldBounds_[r] = rightComponent_.getTensorComponent(r).extent_int(FIELD_DIM);
1059 maxFieldsRight_ = std::max(maxFieldsRight_, rightFieldBounds_[r]);
1060 pointBounds_[r] = leftComponent_.getTensorComponent(r).extent_int(POINT_DIM);
1061 maxPointCount_ = std::max(maxPointCount_, pointBounds_[r]);
1067 const bool allocateFadStorage = !(std::is_standard_layout<Scalar>::value && std::is_trivial<Scalar>::value);
1068 if (allocateFadStorage)
1074 template<
size_t maxComponents,
size_t numComponents = maxComponents>
1075 KOKKOS_INLINE_FUNCTION
1076 int incrementArgument( Kokkos::Array<int,maxComponents> &arguments,
1077 const Kokkos::Array<int,maxComponents> &bounds)
const
1079 if (numComponents == 0)
return -1;
1080 int r =
static_cast<int>(numComponents - 1);
1081 while (arguments[r] + 1 >= bounds[r])
1087 if (r >= 0) ++arguments[r];
1092 KOKKOS_INLINE_FUNCTION
1094 const Kokkos::Array<int,Parameters::MaxTensorComponents> &bounds,
1095 const int &numComponents)
const
1097 if (numComponents == 0)
return -1;
1098 int r =
static_cast<int>(numComponents - 1);
1099 while (arguments[r] + 1 >= bounds[r])
1105 if (r >= 0) ++arguments[r];
1109 template<
size_t maxComponents,
size_t numComponents = maxComponents>
1110 KOKKOS_INLINE_FUNCTION
1111 int nextIncrementResult(
const Kokkos::Array<int,maxComponents> &arguments,
1112 const Kokkos::Array<int,maxComponents> &bounds)
const
1114 if (numComponents == 0)
return -1;
1115 int r =
static_cast<int>(numComponents - 1);
1116 while (arguments[r] + 1 >= bounds[r])
1125 KOKKOS_INLINE_FUNCTION
1127 const Kokkos::Array<int,Parameters::MaxTensorComponents> &bounds,
1128 const int &numComponents)
const
1130 if (numComponents == 0)
return -1;
1131 int r = numComponents - 1;
1132 while (arguments[r] + 1 >= bounds[r])
1140 template<
size_t maxComponents,
size_t numComponents = maxComponents>
1141 KOKKOS_INLINE_FUNCTION
1142 int relativeEnumerationIndex(
const Kokkos::Array<int,maxComponents> &arguments,
1143 const Kokkos::Array<int,maxComponents> &bounds,
1144 const int startIndex)
const
1147 if (numComponents == 0)
1151 int enumerationIndex = 0;
1152 for (
size_t r=numComponents-1; r>
static_cast<size_t>(startIndex); r--)
1154 enumerationIndex += arguments[r];
1155 enumerationIndex *= bounds[r-1];
1157 enumerationIndex += arguments[startIndex];
1158 return enumerationIndex;
1162 KOKKOS_INLINE_FUNCTION
1163 enable_if_t<rank==3 && rank==integralViewRank, Scalar &>
1164 integralViewEntry(
const IntegralViewType& integralView,
const int &cellDataOrdinal,
const int &i,
const int &j)
const
1166 return integralView(cellDataOrdinal,i,j);
1170 KOKKOS_INLINE_FUNCTION
1171 enable_if_t<rank==2 && rank==integralViewRank, Scalar &>
1172 integralViewEntry(
const IntegralViewType& integralView,
const int &cellDataOrdinal,
const int &i,
const int &j)
const
1174 return integralView(i,j);
1178 KOKKOS_INLINE_FUNCTION
1181 constexpr int numTensorComponents = 3;
1183 const int pointBounds_x = pointBounds_[0];
1184 const int pointBounds_y = pointBounds_[1];
1185 const int pointBounds_z = pointBounds_[2];
1186 const int pointsInNonzeroComponentDimensions = pointBounds_y * pointBounds_z;
1188 const int leftFieldBounds_x = leftFieldBounds_[0];
1189 const int rightFieldBounds_x = rightFieldBounds_[0];
1190 const int leftFieldBounds_y = leftFieldBounds_[1];
1191 const int rightFieldBounds_y = rightFieldBounds_[1];
1192 const int leftFieldBounds_z = leftFieldBounds_[2];
1193 const int rightFieldBounds_z = rightFieldBounds_[2];
1195 Kokkos::Array<int,numTensorComponents> leftFieldBounds;
1196 Kokkos::Array<int,numTensorComponents> rightFieldBounds;
1197 for (
unsigned r=0; r<numTensorComponents; r++)
1199 leftFieldBounds[r] = leftFieldBounds_[r];
1200 rightFieldBounds[r] = rightFieldBounds_[r];
1203 const auto integralView = integralView_;
1204 const auto leftFieldOrdinalOffset = leftFieldOrdinalOffset_;
1205 const auto rightFieldOrdinalOffset = rightFieldOrdinalOffset_;
1207 const int cellDataOrdinal = teamMember.league_rank();
1208 const int threadNumber = teamMember.team_rank();
1210 const int numThreads = teamMember.team_size();
1211 const int GyEntryCount = pointBounds_z;
1212 Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged> GxIntegrals;
1213 Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged> GyIntegrals;
1214 Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged> pointWeights;
1216 Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged> leftFields_x, rightFields_x;
1217 Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged> leftFields_y, rightFields_y;
1218 Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged> leftFields_z, rightFields_z;
1219 if (fad_size_output_ > 0) {
1220 GxIntegrals = Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), pointsInNonzeroComponentDimensions, fad_size_output_);
1221 GyIntegrals = Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), GyEntryCount * numThreads, fad_size_output_);
1222 pointWeights = Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged> (teamMember.team_shmem(), composedTransform_.extent_int(1), fad_size_output_);
1224 leftFields_x = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), leftFieldBounds_x, pointBounds_x, fad_size_output_);
1225 rightFields_x = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), rightFieldBounds_x, pointBounds_x, fad_size_output_);
1226 leftFields_y = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), leftFieldBounds_y, pointBounds_y, fad_size_output_);
1227 rightFields_y = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), rightFieldBounds_y, pointBounds_y, fad_size_output_);
1228 leftFields_z = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), leftFieldBounds_z, pointBounds_z, fad_size_output_);
1229 rightFields_z = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), rightFieldBounds_z, pointBounds_z, fad_size_output_);
1232 GxIntegrals = Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), pointsInNonzeroComponentDimensions);
1233 GyIntegrals = Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), GyEntryCount * numThreads);
1234 pointWeights = Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged> (teamMember.team_shmem(), composedTransform_.extent_int(1));
1236 leftFields_x = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), leftFieldBounds_x, pointBounds_x);
1237 rightFields_x = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), rightFieldBounds_x, pointBounds_x);
1238 leftFields_y = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), leftFieldBounds_y, pointBounds_y);
1239 rightFields_y = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), rightFieldBounds_y, pointBounds_y);
1240 leftFields_z = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), leftFieldBounds_z, pointBounds_z);
1241 rightFields_z = Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>(teamMember.team_shmem(), rightFieldBounds_z, pointBounds_z);
1249 const int composedTransformRank = composedTransform_.rank();
1252 teamMember.team_barrier();
1254 for (
int a_component=0; a_component < leftComponentSpan_; a_component++)
1256 const int a = a_offset_ + a_component;
1257 for (
int b_component=0; b_component < rightComponentSpan_; b_component++)
1259 const int b = b_offset_ + b_component;
1268 const int maxFields = (maxFieldsLeft_ > maxFieldsRight_) ? maxFieldsLeft_ : maxFieldsRight_;
1269 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,maxFields), [&] (
const int& fieldOrdinal) {
1270 if (fieldOrdinal < leftTensorComponent_x.
extent_int(0))
1272 const int pointCount = leftTensorComponent_x.
extent_int(1);
1273 const int leftRank = leftTensorComponent_x.
rank();
1274 for (
int pointOrdinal=0; pointOrdinal<pointCount; pointOrdinal++)
1276 leftFields_x(fieldOrdinal,pointOrdinal) = (leftRank == 2) ? leftTensorComponent_x(fieldOrdinal,pointOrdinal) : leftTensorComponent_x(fieldOrdinal,pointOrdinal,a_component);
1279 if (fieldOrdinal < leftTensorComponent_y.
extent_int(0))
1281 const int pointCount = leftTensorComponent_y.
extent_int(1);
1282 const int leftRank = leftTensorComponent_y.
rank();
1283 for (
int pointOrdinal=0; pointOrdinal<pointCount; pointOrdinal++)
1285 leftFields_y(fieldOrdinal,pointOrdinal) = (leftRank == 2) ? leftTensorComponent_y(fieldOrdinal,pointOrdinal) : leftTensorComponent_y(fieldOrdinal,pointOrdinal,a_component);
1288 if (fieldOrdinal < leftTensorComponent_z.
extent_int(0))
1290 const int pointCount = leftTensorComponent_z.
extent_int(1);
1291 const int leftRank = leftTensorComponent_z.
rank();
1292 for (
int pointOrdinal=0; pointOrdinal<pointCount; pointOrdinal++)
1294 leftFields_z(fieldOrdinal,pointOrdinal) = (leftRank == 2) ? leftTensorComponent_z(fieldOrdinal,pointOrdinal) : leftTensorComponent_z(fieldOrdinal,pointOrdinal,a_component);
1297 if (fieldOrdinal < rightTensorComponent_x.
extent_int(0))
1299 const int pointCount = rightTensorComponent_x.
extent_int(1);
1300 const int rightRank = rightTensorComponent_x.
rank();
1301 for (
int pointOrdinal=0; pointOrdinal<pointCount; pointOrdinal++)
1303 rightFields_x(fieldOrdinal,pointOrdinal) = (rightRank == 2) ? rightTensorComponent_x(fieldOrdinal,pointOrdinal) : rightTensorComponent_x(fieldOrdinal,pointOrdinal,a_component);
1306 if (fieldOrdinal < rightTensorComponent_y.
extent_int(0))
1308 const int pointCount = rightTensorComponent_y.
extent_int(1);
1309 const int rightRank = rightTensorComponent_y.
rank();
1310 for (
int pointOrdinal=0; pointOrdinal<pointCount; pointOrdinal++)
1312 rightFields_y(fieldOrdinal,pointOrdinal) = (rightRank == 2) ? rightTensorComponent_y(fieldOrdinal,pointOrdinal) : rightTensorComponent_y(fieldOrdinal,pointOrdinal,a_component);
1315 if (fieldOrdinal < rightTensorComponent_z.
extent_int(0))
1317 const int pointCount = rightTensorComponent_z.
extent_int(1);
1318 const int rightRank = rightTensorComponent_z.
rank();
1319 for (
int pointOrdinal=0; pointOrdinal<pointCount; pointOrdinal++)
1321 rightFields_z(fieldOrdinal,pointOrdinal) = (rightRank == 2) ? rightTensorComponent_z(fieldOrdinal,pointOrdinal) : rightTensorComponent_z(fieldOrdinal,pointOrdinal,a_component);
1326 if (composedTransformRank == 4)
1328 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,composedTransform_.extent_int(1)), [&] (
const int& pointOrdinal) {
1329 pointWeights(pointOrdinal) = composedTransform_(cellDataOrdinal,pointOrdinal,a,b) * cellMeasures_(cellDataOrdinal,pointOrdinal);
1334 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,composedTransform_.extent_int(1)), [&] (
const int& pointOrdinal) {
1335 pointWeights(pointOrdinal) = composedTransform_(cellDataOrdinal,pointOrdinal) * cellMeasures_(cellDataOrdinal,pointOrdinal);
1340 teamMember.team_barrier();
1342 for (
int i0=0; i0<leftFieldBounds_x; i0++)
1344 for (
int j0=0; j0<rightFieldBounds_x; j0++)
1346 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,pointsInNonzeroComponentDimensions), [&] (
const int& pointEnumerationIndexLaterDimensions) {
1348 const int tensorPointEnumerationOffset = pointBounds_x * pointEnumerationIndexLaterDimensions;
1350 Scalar & Gx = GxIntegrals(pointEnumerationIndexLaterDimensions);
1353 if (fad_size_output_ == 0)
1356 Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(teamMember, pointBounds_x), [&] (
const int &x_pointOrdinal, Scalar &integralThusFar)
1358 integralThusFar += leftFields_x(i0,x_pointOrdinal) * rightFields_x(j0,x_pointOrdinal) * pointWeights(tensorPointEnumerationOffset + x_pointOrdinal);
1363 for (
int x_pointOrdinal=0; x_pointOrdinal<pointBounds_x; x_pointOrdinal++)
1365 Gx += leftFields_x(i0,x_pointOrdinal) * rightFields_x(j0,x_pointOrdinal) * pointWeights(tensorPointEnumerationOffset + x_pointOrdinal);
1371 teamMember.team_barrier();
1373 Kokkos::parallel_for(Kokkos::TeamThreadRange(teamMember,0,leftFieldBounds_y * rightFieldBounds_y), [&] (
const int& i1j1) {
1374 const int i1 = i1j1 % leftFieldBounds_y;
1375 const int j1 = i1j1 / leftFieldBounds_y;
1377 int Gy_index_offset = GyEntryCount * threadNumber;
1379 for (
int lz=0; lz<pointBounds_z; lz++)
1381 int pointEnumerationIndex = lz * pointBounds_y;
1382 if (fad_size_output_ == 0)
1384 Scalar Gy_local = 0;
1387 Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(teamMember, pointBounds_y), [&] (
const int &ly, Scalar &integralThusFar)
1389 const Scalar & leftValue = leftFields_y(i1,ly);
1390 const Scalar & rightValue = rightFields_y(j1,ly);
1392 integralThusFar += leftValue * rightValue * GxIntegrals(pointEnumerationIndex + ly);
1395 GyIntegrals(Gy_index_offset + lz) = Gy_local;
1399 Scalar & Gy = GyIntegrals(Gy_index_offset + lz);
1400 for (
int ly=0; ly<pointBounds_y; ly++)
1402 const Scalar & leftValue = leftFields_y(i1,ly);
1403 const Scalar & rightValue = rightFields_y(j1,ly);
1405 Gy += leftValue * rightValue * GxIntegrals(pointEnumerationIndex + ly);
1410 for (
int i2=0; i2<leftFieldBounds_z; i2++)
1412 for (
int j2=0; j2<rightFieldBounds_z; j2++)
1416 int Gy_index_offset = GyEntryCount * threadNumber;
1418 if (fad_size_output_ == 0)
1421 Kokkos::parallel_reduce(Kokkos::ThreadVectorRange(teamMember, pointBounds_z), [&] (
const int &lz, Scalar &integralThusFar)
1423 const Scalar & leftValue = leftFields_z(i2,lz);
1424 const Scalar & rightValue = rightFields_z(j2,lz);
1426 integralThusFar += leftValue * rightValue * GyIntegrals(Gy_index_offset+lz);
1431 for (
int lz=0; lz<pointBounds_z; lz++)
1433 const Scalar & leftValue = leftFields_z(i2,lz);
1434 const Scalar & rightValue = rightFields_z(j2,lz);
1436 Gz += leftValue * rightValue * GyIntegrals(Gy_index_offset+lz);
1440 const int i = leftFieldOrdinalOffset + i0 + (i1 + i2 * leftFieldBounds_y) * leftFieldBounds_x;
1441 const int j = rightFieldOrdinalOffset + j0 + (j1 + j2 * rightFieldBounds_y) * rightFieldBounds_x;
1446 Kokkos::single (Kokkos::PerThread(teamMember), [&] () {
1447 integralViewEntry<integralViewRank>(integralView, cellDataOrdinal, i, j) += Gz;
1453 teamMember.team_barrier();
1460 template<
size_t numTensorComponents>
1461 KOKKOS_INLINE_FUNCTION
1462 void run(
const TeamMember & teamMember )
const
1600 KOKKOS_INLINE_FUNCTION
1601 void operator()(
const TeamMember & teamMember )
const
1603 switch (numTensorComponents_)
1605 case 1: run<1>(teamMember);
break;
1606 case 2: run<2>(teamMember);
break;
1608 case 4: run<4>(teamMember);
break;
1609 case 5: run<5>(teamMember);
break;
1610 case 6: run<6>(teamMember);
break;
1611 case 7: run<7>(teamMember);
break;
1612#ifdef INTREPID2_HAVE_DEBUG
1625 constexpr int numTensorComponents = 3;
1626 Kokkos::Array<int,numTensorComponents> pointBounds;
1627 Kokkos::Array<int,numTensorComponents> leftFieldBounds;
1628 Kokkos::Array<int,numTensorComponents> rightFieldBounds;
1630 int pointsInNonzeroComponentDimensions = 1;
1631 for (
unsigned r=0; r<numTensorComponents; r++)
1633 pointBounds[r] = pointBounds_[r];
1634 if (r > 0) pointsInNonzeroComponentDimensions *= pointBounds[r];
1635 leftFieldBounds[r] = leftFieldBounds_[r];
1636 rightFieldBounds[r] = rightFieldBounds_[r];
1639 for (
int a_component=0; a_component < leftComponentSpan_; a_component++)
1641 for (
int b_component=0; b_component < rightComponentSpan_; b_component++)
1646 flopCount += composedTransform_.extent_int(1) * cellMeasures_.numTensorComponents();
1648 for (
int i0=0; i0<leftFieldBounds[0]; i0++)
1650 for (
int j0=0; j0<rightFieldBounds[0]; j0++)
1652 flopCount += pointsInNonzeroComponentDimensions * pointBounds[0] * 3;
1692 flopCount += leftFieldBounds[1] * rightFieldBounds[1] * pointBounds[1] * pointBounds[2] * 3;
1693 flopCount += leftFieldBounds[1] * rightFieldBounds[1] * leftFieldBounds[2] * rightFieldBounds[2] * pointBounds[2] * 3;
1694 flopCount += leftFieldBounds[1] * rightFieldBounds[1] * leftFieldBounds[2] * rightFieldBounds[2] * 1;
1770 const int R = numTensorComponents_ - 1;
1771 const int threadParallelismExpressed = leftFieldBounds_[R] * rightFieldBounds_[R];
1772 return std::min(maxTeamSizeFromKokkos, threadParallelismExpressed);
1779 size_t shmem_size = 0;
1781 const int GyEntryCount = pointBounds_[2];
1783 int pointsInNonzeroComponentDimensions = 1;
1784 for (
int d=1; d<numTensorComponents_; d++)
1786 pointsInNonzeroComponentDimensions *= pointBounds_[d];
1789 if (fad_size_output_ > 0)
1791 shmem_size += Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size(pointsInNonzeroComponentDimensions, fad_size_output_);
1792 shmem_size += Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size(GyEntryCount * numThreads, fad_size_output_);
1793 shmem_size += Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size (composedTransform_.extent_int(1), fad_size_output_);
1795 shmem_size += Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size( leftFieldBounds_[0], pointBounds_[0], fad_size_output_);
1796 shmem_size += Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size( rightFieldBounds_[0], pointBounds_[0], fad_size_output_);
1797 shmem_size += Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size( leftFieldBounds_[1], pointBounds_[1], fad_size_output_);
1798 shmem_size += Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size( rightFieldBounds_[1], pointBounds_[1], fad_size_output_);
1799 shmem_size += Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size( leftFieldBounds_[2], pointBounds_[2], fad_size_output_);
1800 shmem_size += Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size( rightFieldBounds_[2], pointBounds_[2], fad_size_output_);
1804 shmem_size += Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size(pointsInNonzeroComponentDimensions);
1805 shmem_size += Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size(GyEntryCount * numThreads);
1806 shmem_size += Kokkos::View<Scalar*, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size (composedTransform_.extent_int(1));
1808 shmem_size += Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size( leftFieldBounds_[0], pointBounds_[0]);
1809 shmem_size += Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size( rightFieldBounds_[0], pointBounds_[0]);
1810 shmem_size += Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size( leftFieldBounds_[1], pointBounds_[1]);
1811 shmem_size += Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size( rightFieldBounds_[1], pointBounds_[1]);
1812 shmem_size += Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size( leftFieldBounds_[2], pointBounds_[2]);
1813 shmem_size += Kokkos::View<Scalar**, DeviceType, Kokkos::MemoryUnmanaged>::shmem_size( rightFieldBounds_[2], pointBounds_[2]);
1929 double* approximateFlops)
1931 using ExecutionSpace =
typename DeviceType::execution_space;
1935 const bool leftHasOrdinalFilter = basisValuesLeft.basisValues().ordinalFilter().extent_int(0) > 0;
1936 const bool rightHasOrdinalFilter = basisValuesRight.basisValues().ordinalFilter().extent_int(0) > 0;
1937 TEUCHOS_TEST_FOR_EXCEPTION(leftHasOrdinalFilter || rightHasOrdinalFilter, std::invalid_argument,
"Ordinal filters for BasisValues are not yet supported by IntegrationTools");
1939 if (approximateFlops != NULL)
1941 *approximateFlops = 0;
1953 const int numFieldsLeft = basisValuesLeft.
numFields();
1954 const int numFieldsRight = basisValuesRight.
numFields();
1955 const int spaceDim = basisValuesLeft.
spaceDim();
1957 INTREPID2_TEST_FOR_EXCEPTION(basisValuesLeft.
spaceDim() != basisValuesRight.
spaceDim(), std::invalid_argument,
"vectorDataLeft and vectorDataRight must agree on the space dimension");
1959 const int leftFamilyCount = basisValuesLeft.basisValues().numFamilies();
1960 const int rightFamilyCount = basisValuesRight.basisValues().numFamilies();
1964 int numTensorComponentsLeft = -1;
1965 const bool leftIsVectorValued = basisValuesLeft.
vectorData().isValid();
1967 if (leftIsVectorValued)
1969 const auto &refVectorLeft = basisValuesLeft.
vectorData();
1970 int numFamiliesLeft = refVectorLeft.numFamilies();
1971 int numVectorComponentsLeft = refVectorLeft.numComponents();
1972 Kokkos::Array<int,7> maxFieldsForComponentLeft {0,0,0,0,0,0,0};
1973 for (
int familyOrdinal=0; familyOrdinal<numFamiliesLeft; familyOrdinal++)
1975 for (
int vectorComponent=0; vectorComponent<numVectorComponentsLeft; vectorComponent++)
1980 if (numTensorComponentsLeft == -1)
1984 INTREPID2_TEST_FOR_EXCEPTION(numVectorComponentsLeft != tensorData.
numTensorComponents(), std::invalid_argument,
"Each valid entry in vectorDataLeft must have the same number of tensor components as every other");
1985 for (
int r=0; r<numTensorComponentsLeft; r++)
1995 numTensorComponentsLeft = basisValuesLeft.basisValues().tensorData(0).numTensorComponents();
1996 for (
int familyOrdinal = 0; familyOrdinal < leftFamilyCount; familyOrdinal++)
1998 INTREPID2_TEST_FOR_EXCEPTION(basisValuesLeft.basisValues().tensorData(familyOrdinal).numTensorComponents() != numTensorComponentsLeft, std::invalid_argument,
"All families must match in the number of tensor components");
2001 int numTensorComponentsRight = -1;
2002 const bool rightIsVectorValued = basisValuesRight.
vectorData().isValid();
2004 if (rightIsVectorValued)
2006 const auto &refVectorRight = basisValuesRight.
vectorData();
2007 int numFamiliesRight = refVectorRight.numFamilies();
2008 int numVectorComponentsRight = refVectorRight.numComponents();
2009 Kokkos::Array<int,7> maxFieldsForComponentRight {0,0,0,0,0,0,0};
2010 for (
int familyOrdinal=0; familyOrdinal<numFamiliesRight; familyOrdinal++)
2012 for (
int vectorComponent=0; vectorComponent<numVectorComponentsRight; vectorComponent++)
2014 const auto &tensorData = refVectorRight.getComponent(familyOrdinal,vectorComponent);
2015 if (tensorData.numTensorComponents() > 0)
2017 if (numTensorComponentsRight == -1)
2019 numTensorComponentsRight = tensorData.numTensorComponents();
2021 INTREPID2_TEST_FOR_EXCEPTION(numVectorComponentsRight != tensorData.numTensorComponents(), std::invalid_argument,
"Each valid entry in vectorDataRight must have the same number of tensor components as every other");
2022 for (
int r=0; r<numTensorComponentsRight; r++)
2024 maxFieldsForComponentRight[r] = std::max(tensorData.getTensorComponent(r).extent_int(0), maxFieldsForComponentRight[r]);
2029 INTREPID2_TEST_FOR_EXCEPTION(numTensorComponentsRight != numTensorComponentsLeft, std::invalid_argument,
"Right families must match left in the number of tensor components");
2034 for (
int familyOrdinal=0; familyOrdinal< rightFamilyCount; familyOrdinal++)
2036 INTREPID2_TEST_FOR_EXCEPTION(basisValuesRight.basisValues().tensorData(familyOrdinal).numTensorComponents() != numTensorComponentsLeft, std::invalid_argument,
"Right families must match left in the number of tensor components");
2041 if ((numPointTensorComponents == numTensorComponentsLeft) && basisValuesLeft.
axisAligned() && basisValuesRight.
axisAligned())
2050 Kokkos::Array<int,Parameters::MaxTensorComponents> pointDimensions;
2051 for (
int r=0; r<numPointTensorComponents; r++)
2058 Kokkos::View<Scalar**, DeviceType> integralView2;
2059 Kokkos::View<Scalar***, DeviceType> integralView3;
2060 if (integralViewRank == 3)
2068 for (
int leftFamilyOrdinal=0; leftFamilyOrdinal<leftFamilyCount; leftFamilyOrdinal++)
2071 int leftFieldOffset = basisValuesLeft.basisValues().familyFieldOrdinalOffset(leftFamilyOrdinal);
2073 const int leftVectorComponentCount = leftIsVectorValued ? basisValuesLeft.
vectorData().numComponents() : 1;
2074 for (
int leftVectorComponentOrdinal = 0; leftVectorComponentOrdinal < leftVectorComponentCount; leftVectorComponentOrdinal++)
2077 : basisValuesLeft.basisValues().tensorData(leftFamilyOrdinal);
2083 const int leftDimSpan = leftComponent.
extent_int(2);
2085 const int leftComponentFieldCount = leftComponent.
extent_int(0);
2087 for (
int rightFamilyOrdinal=0; rightFamilyOrdinal<rightFamilyCount; rightFamilyOrdinal++)
2090 int rightFieldOffset = basisValuesRight.
vectorData().familyFieldOrdinalOffset(rightFamilyOrdinal);
2092 const int rightVectorComponentCount = rightIsVectorValued ? basisValuesRight.
vectorData().numComponents() : 1;
2093 for (
int rightVectorComponentOrdinal = 0; rightVectorComponentOrdinal < rightVectorComponentCount; rightVectorComponentOrdinal++)
2096 : basisValuesRight.basisValues().tensorData(rightFamilyOrdinal);
2097 if (!rightComponent.
isValid())
2102 const int rightDimSpan = rightComponent.
extent_int(2);
2104 const int rightComponentFieldCount = rightComponent.
extent_int(0);
2107 if ((a_offset + leftDimSpan <= b_offset) || (b_offset + rightDimSpan <= a_offset))
2109 b_offset += rightDimSpan;
2115 INTREPID2_TEST_FOR_EXCEPTION(( a_offset != b_offset), std::logic_error,
"left and right dimension offsets should match.");
2116 INTREPID2_TEST_FOR_EXCEPTION(( leftDimSpan != rightDimSpan), std::invalid_argument,
"left and right components must span the same number of dimensions.");
2118 const int d_start = a_offset;
2119 const int d_end = d_start + leftDimSpan;
2122 ComponentIntegralsArray componentIntegrals;
2123 for (
int r=0; r<numPointTensorComponents; r++)
2140 const int numPoints = pointDimensions[r];
2147 const int leftTensorComponentDimSpan = leftTensorComponent.
extent_int(2);
2148 const int leftTensorComponentFields = leftTensorComponent.
extent_int(0);
2149 const int rightTensorComponentDimSpan = rightTensorComponent.
extent_int(2);
2150 const int rightTensorComponentFields = rightTensorComponent.
extent_int(0);
2152 INTREPID2_TEST_FOR_EXCEPTION(( leftTensorComponentDimSpan != rightTensorComponentDimSpan), std::invalid_argument,
"left and right components must span the same number of dimensions.");
2154 for (
int d=d_start; d<d_end; d++)
2156 ScalarView<Scalar,DeviceType> componentIntegralView;
2158 const bool allocateFadStorage = !(std::is_standard_layout<Scalar>::value && std::is_trivial<Scalar>::value);
2159 if (allocateFadStorage)
2162 componentIntegralView = ScalarView<Scalar,DeviceType>(
"componentIntegrals for tensor component " + std::to_string(r) +
", in dimension " + std::to_string(d), leftTensorComponentFields, rightTensorComponentFields, fad_size_output);
2166 componentIntegralView = ScalarView<Scalar,DeviceType>(
"componentIntegrals for tensor component " + std::to_string(r) +
", in dimension " + std::to_string(d), leftTensorComponentFields, rightTensorComponentFields);
2169 auto policy = Kokkos::MDRangePolicy<ExecutionSpace,Kokkos::Rank<2>>({0,0},{leftTensorComponentFields,rightTensorComponentFields});
2172 leftTensorComponentDimSpan);
2173 Kokkos::parallel_for(
"compute componentIntegrals", policy, refSpaceIntegralFunctor);
2175 componentIntegrals[r][d] = componentIntegralView;
2177 if (approximateFlops != NULL)
2179 *approximateFlops += leftTensorComponentFields*rightTensorComponentFields*numPoints*(3);
2184 ExecutionSpace().fence();
2186 Kokkos::Array<int,3> upperBounds {cellDataExtent,leftComponentFieldCount,rightComponentFieldCount};
2187 Kokkos::Array<int,3> lowerBounds {0,0,0};
2188 auto policy = Kokkos::MDRangePolicy<ExecutionSpace,Kokkos::Rank<3>>(lowerBounds, upperBounds);
2190 Kokkos::parallel_for(
"compute field integrals", policy,
2191 KOKKOS_LAMBDA (
const int &cellDataOrdinal,
const int &leftFieldOrdinal,
const int &rightFieldOrdinal) {
2192 const Scalar & cellMeasureWeight = cellMeasures.
getTensorComponent(0)(cellDataOrdinal);
2200 Scalar integralSum = 0.0;
2201 for (
int d=d_start; d<d_end; d++)
2203 const Scalar & transformLeft_d = basisValuesLeft.
transformWeight(cellDataOrdinal,0,d,d);
2204 const Scalar & transformRight_d = basisValuesRight.
transformWeight(cellDataOrdinal,0,d,d);
2206 const Scalar & leftRightTransform_d = transformLeft_d * transformRight_d;
2209 Scalar integral_d = 1.0;
2211 for (
int r=0; r<numPointTensorComponents; r++)
2213 integral_d *= componentIntegrals[r][d](leftTensorIterator.
argument(r),rightTensorIterator.
argument(r));
2216 integralSum += leftRightTransform_d * integral_d;
2219 const int i = leftFieldOrdinal + leftFieldOffset;
2220 const int j = rightFieldOrdinal + rightFieldOffset;
2222 if (integralViewRank == 3)
2224 integralView3(cellDataOrdinal,i,j) += cellMeasureWeight * integralSum;
2228 integralView2(i,j) += cellMeasureWeight * integralSum;
2232 b_offset += rightDimSpan;
2235 a_offset += leftDimSpan;
2239 if (approximateFlops != NULL)
2242 *approximateFlops += (2 + spaceDim * (3 + numPointTensorComponents)) * cellDataExtent * numFieldsLeft * numFieldsRight;
2250 const bool transposeLeft =
true;
2251 const bool transposeRight =
false;
2257 const int leftRank = leftTransform.
rank();
2258 const int rightRank = rightTransform.
rank();
2262 const bool bothRank4 = (leftRank == 4) && (rightRank == 4);
2263 const bool bothRank3 = (leftRank == 3) && (rightRank == 3);
2264 const bool bothRank2 = (leftRank == 2) && (rightRank == 2);
2265 const bool ranks32 = ((leftRank == 3) && (rightRank == 2)) || ((leftRank == 2) && (rightRank == 3));
2266 const bool ranks42 = ((leftRank == 4) && (rightRank == 2)) || ((leftRank == 2) && (rightRank == 4));
2271 composedTransform.
storeMatMat(transposeLeft, leftTransform, transposeRight, rightTransform);
2274 if (approximateFlops != NULL)
2282 const int newRank = 4;
2285 extents[3] = extents[2];
2287 variationTypes[3] = variationTypes[2];
2289 auto leftTransformMatrix = leftTransform.
shallowCopy(newRank, extents, variationTypes);
2294 extents[3] = extents[2];
2296 variationTypes[3] = variationTypes[2];
2298 auto rightTransformMatrix = rightTransform.
shallowCopy(newRank, extents, variationTypes);
2301 composedTransform.
storeMatMat(transposeLeft, leftTransformMatrix, transposeRight, rightTransformMatrix);
2303 if (approximateFlops != NULL)
2314 const int newRank = 4;
2315 auto extents = composedTransform.
getExtents();
2317 composedTransform = composedTransform.
shallowCopy(newRank, extents, variationTypes);
2318 if (approximateFlops != NULL)
2325 const auto & rank3Transform = (leftRank == 3) ? leftTransform : rightTransform;
2326 const auto & rank2Transform = (leftRank == 2) ? leftTransform : rightTransform;
2334 const int newRank = 4;
2335 auto extents = composedTransform.
getExtents();
2345 extents[3] = extents[2];
2347 variationTypes[3] = variationTypes[2];
2350 composedTransform = composedTransform.
shallowCopy(newRank, extents, variationTypes);
2368 INTREPID2_TEST_FOR_EXCEPTION(
true, std::invalid_argument,
"Unsupported transform combination");
2371 else if (leftTransform.
isValid())
2380 const int newRank = 4;
2384 composedTransform = leftTransform.
shallowCopy(newRank, extents, variationTypes);
2387 case 2: composedTransform = leftTransform;
break;
2389 INTREPID2_TEST_FOR_EXCEPTION(
true, std::invalid_argument,
"Unsupported transform combination");
2392 else if (rightTransform.
isValid())
2395 composedTransform = rightTransform;
2398 case 4: composedTransform = rightTransform;
break;
2402 const int newRank = 4;
2405 extents[3] = extents[2];
2406 variationTypes[3] = variationTypes[2];
2410 composedTransform = rightTransform.
shallowCopy(newRank, extents, variationTypes);
2413 case 2: composedTransform = rightTransform;
break;
2415 INTREPID2_TEST_FOR_EXCEPTION(
true, std::invalid_argument,
"Unsupported transform combination");
2421 Kokkos::Array<ordinal_type,4> extents {basisValuesLeft.
numCells(),basisValuesLeft.
numPoints(),spaceDim,spaceDim};
2424 Kokkos::View<Scalar*,DeviceType> identityUnderlyingView(
"Intrepid2::FST::integrate() - identity view",spaceDim);
2425 Kokkos::deep_copy(identityUnderlyingView, 1.0);
2433 const int leftFamilyCount = basisValuesLeft. basisValues().numFamilies();
2434 const int rightFamilyCount = basisValuesRight.basisValues().numFamilies();
2435 const int leftComponentCount = leftIsVectorValued ? basisValuesLeft. vectorData().numComponents() : 1;
2436 const int rightComponentCount = rightIsVectorValued ? basisValuesRight.
vectorData().numComponents() : 1;
2438 int leftFieldOrdinalOffset = 0;
2439 for (
int leftFamilyOrdinal=0; leftFamilyOrdinal<leftFamilyCount; leftFamilyOrdinal++)
2444 bool haveLaunchedContributionToCurrentFamilyLeft =
false;
2445 for (
int leftComponentOrdinal=0; leftComponentOrdinal<leftComponentCount; leftComponentOrdinal++)
2448 : basisValuesLeft.basisValues().tensorData(leftFamilyOrdinal);
2452 a_offset += basisValuesLeft.
vectorData().numDimsForComponent(leftComponentOrdinal);
2456 int rightFieldOrdinalOffset = 0;
2457 for (
int rightFamilyOrdinal=0; rightFamilyOrdinal<rightFamilyCount; rightFamilyOrdinal++)
2461 bool haveLaunchedContributionToCurrentFamilyRight =
false;
2463 for (
int rightComponentOrdinal=0; rightComponentOrdinal<rightComponentCount; rightComponentOrdinal++)
2466 : basisValuesRight.basisValues().tensorData(rightFamilyOrdinal);
2467 if (!rightComponent.
isValid())
2470 b_offset += basisValuesRight.
vectorData().numDimsForComponent(rightComponentOrdinal);
2477 Kokkos::TeamPolicy<ExecutionSpace> policy = Kokkos::TeamPolicy<ExecutionSpace>(cellDataExtent,Kokkos::AUTO(),vectorSize);
2486 bool forceNonSpecialized =
false;
2487 bool usePointCacheForRank3Tensor =
true;
2491 if (haveLaunchedContributionToCurrentFamilyLeft && haveLaunchedContributionToCurrentFamilyRight)
2493 ExecutionSpace().fence();
2495 haveLaunchedContributionToCurrentFamilyLeft =
true;
2496 haveLaunchedContributionToCurrentFamilyRight =
true;
2498 if (integralViewRank == 2)
2502 auto functor =
Impl::F_IntegratePointValueCache<Scalar, DeviceType, 2>(integrals, leftComponent, composedTransform, rightComponent, cellMeasures, a_offset, b_offset, leftFieldOrdinalOffset, rightFieldOrdinalOffset);
2504 const int recommendedTeamSize = policy.team_size_recommended(functor,Kokkos::ParallelForTag());
2505 const int teamSize = functor.teamSize(recommendedTeamSize);
2507 policy = Kokkos::TeamPolicy<DeviceType>(cellDataExtent,teamSize,vectorSize);
2509 Kokkos::parallel_for(
"F_IntegratePointValueCache rank 2", policy, functor);
2511 if (approximateFlops != NULL)
2513 *approximateFlops += functor.approximateFlopCountPerCell() * integrals.
getDataExtent(0);
2518 auto functor =
Impl::F_Integrate<Scalar, DeviceType, 2>(integrals, leftComponent, composedTransform, rightComponent, cellMeasures, a_offset, b_offset, leftFieldOrdinalOffset, rightFieldOrdinalOffset, forceNonSpecialized);
2520 const int recommendedTeamSize = policy.team_size_recommended(functor,Kokkos::ParallelForTag());
2521 const int teamSize = functor.teamSize(recommendedTeamSize);
2523 policy = Kokkos::TeamPolicy<ExecutionSpace>(cellDataExtent,teamSize,vectorSize);
2525 Kokkos::parallel_for(
"F_Integrate rank 2", policy, functor);
2527 if (approximateFlops != NULL)
2529 *approximateFlops += functor.approximateFlopCountPerCell() * integrals.
getDataExtent(0);
2533 else if (integralViewRank == 3)
2537 auto functor =
Impl::F_IntegratePointValueCache<Scalar, DeviceType, 3>(integrals, leftComponent, composedTransform, rightComponent, cellMeasures, a_offset, b_offset, leftFieldOrdinalOffset, rightFieldOrdinalOffset);
2539 const int recommendedTeamSize = policy.team_size_recommended(functor,Kokkos::ParallelForTag());
2540 const int teamSize = functor.teamSize(recommendedTeamSize);
2542 policy = Kokkos::TeamPolicy<ExecutionSpace>(cellDataExtent,teamSize,vectorSize);
2544 Kokkos::parallel_for(
"F_IntegratePointValueCache rank 3", policy, functor);
2546 if (approximateFlops != NULL)
2548 *approximateFlops += functor.approximateFlopCountPerCell() * integrals.
getDataExtent(0);
2553 auto functor =
Impl::F_Integrate<Scalar, DeviceType, 3>(integrals, leftComponent, composedTransform, rightComponent, cellMeasures, a_offset, b_offset, leftFieldOrdinalOffset, rightFieldOrdinalOffset, forceNonSpecialized);
2555 const int recommendedTeamSize = policy.team_size_recommended(functor,Kokkos::ParallelForTag());
2556 const int teamSize = functor.teamSize(recommendedTeamSize);
2558 policy = Kokkos::TeamPolicy<DeviceType>(cellDataExtent,teamSize,vectorSize);
2560 Kokkos::parallel_for(
"F_Integrate rank 3", policy, functor);
2562 if (approximateFlops != NULL)
2564 *approximateFlops += functor.approximateFlopCountPerCell() * integrals.
getDataExtent(0);
2568 b_offset += rightIsVectorValued ? basisValuesRight.
vectorData().numDimsForComponent(rightComponentOrdinal) : 1;
2570 rightFieldOrdinalOffset += rightIsVectorValued ? basisValuesRight.
vectorData().numFieldsInFamily(rightFamilyOrdinal) : basisValuesRight.basisValues().numFieldsInFamily(rightFamilyOrdinal);
2572 a_offset += leftIsVectorValued ? basisValuesLeft.
vectorData().numDimsForComponent(leftComponentOrdinal) : 1;
2574 leftFieldOrdinalOffset += leftIsVectorValued ? basisValuesLeft.
vectorData().numFieldsInFamily(leftFamilyOrdinal) : basisValuesLeft.basisValues().numFieldsInFamily(leftFamilyOrdinal);
2581 ExecutionSpace().fence();