Intrepid2
Intrepid2_HierarchicalBasis_HDIV_PYR.hpp
Go to the documentation of this file.
1// @HEADER
2// *****************************************************************************
3// Intrepid2 Package
4//
5// Copyright 2007 NTESS and the Intrepid2 contributors.
6// SPDX-License-Identifier: BSD-3-Clause
7// *****************************************************************************
8// @HEADER
9
18
19#ifndef Intrepid2_HierarchicalBasis_HDIV_PYR_h
20#define Intrepid2_HierarchicalBasis_HDIV_PYR_h
21
22#include <Kokkos_DynRankView.hpp>
23
24#include <Intrepid2_config.h>
25
26#include "Intrepid2_Basis.hpp"
32#include "Intrepid2_Utils.hpp"
33
34#include <math.h>
35
36#include "Teuchos_RCP.hpp"
37
38namespace Intrepid2
39{
45 template<class DeviceType, class OutputScalar, class PointScalar,
46 class OutputFieldType, class InputPointsType>
47 struct Hierarchical_HDIV_PYR_Functor
48 {
49 using ExecutionSpace = typename DeviceType::execution_space;
50 using ScratchSpace = typename ExecutionSpace::scratch_memory_space;
51 using OutputScratchView = Kokkos::View<OutputScalar*,ScratchSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
52 using OutputScratchView2D = Kokkos::View<OutputScalar**,ScratchSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
53 using PointScratchView = Kokkos::View<PointScalar*, ScratchSpace,Kokkos::MemoryTraits<Kokkos::Unmanaged>>;
54
55 using TeamPolicy = Kokkos::TeamPolicy<ExecutionSpace>;
56 using TeamMember = typename TeamPolicy::member_type;
57
58 EOperator opType_;
59
60 OutputFieldType output_; // F,P
61 InputPointsType inputPoints_; // P,D
62
63 int polyOrder_;
64 bool useCGBasis_;
65 int numFields_, numPoints_;
66
67 size_t fad_size_output_;
68
69 static const int numVertices = 5;
70 static const int numMixedEdges = 4;
71 static const int numTriEdges = 4;
72 static const int numEdges = 8;
73 // the following ordering of the edges matches that used by ESEAS
74 // (it *looks* like this is what ESEAS uses; the basis comparison tests will clarify whether I've read their code correctly)
75 // see also PyramidEdgeNodeMap in Shards_BasicTopologies.hpp
76 const int edge_start_[numEdges] = {0,1,2,3,0,1,2,3}; // edge i is from edge_start_[i] to edge_end_[i]
77 const int edge_end_[numEdges] = {1,2,3,0,4,4,4,4}; // edge i is from edge_start_[i] to edge_end_[i]
78
79 // quadrilateral face comes first
80 static const int numQuadFaces = 1;
81 static const int numTriFaces = 4;
82
83 // face ordering matches ESEAS. (See BlendProjectPyraTF in ESEAS.)
84 const int tri_face_vertex_0[numTriFaces] = {0,1,3,0}; // faces are abc where 0 ≤ a < b < c ≤ 3
85 const int tri_face_vertex_1[numTriFaces] = {1,2,2,3};
86 const int tri_face_vertex_2[numTriFaces] = {4,4,4,4};
87
88 Hierarchical_HDIV_PYR_Functor(EOperator opType, OutputFieldType output, InputPointsType inputPoints,
89 int polyOrder)
90 : opType_(opType), output_(output), inputPoints_(inputPoints),
91 polyOrder_(polyOrder),
92 fad_size_output_(getScalarDimensionForView(output))
93 {
94 numFields_ = output.extent_int(0);
95 numPoints_ = output.extent_int(1);
96 const auto & p = polyOrder;
97 const auto basisCardinality = p * p + 2 * p * (p+1) + 3 * p * p * (p-1);
98
99 INTREPID2_TEST_FOR_EXCEPTION(numPoints_ != inputPoints.extent_int(0), std::invalid_argument, "point counts need to match!");
100 INTREPID2_TEST_FOR_EXCEPTION(numFields_ != basisCardinality, std::invalid_argument, "output field size does not match basis cardinality");
101 }
102
104 KOKKOS_INLINE_FUNCTION
105 void cross(Kokkos::Array<OutputScalar,3> &c,
106 const Kokkos::Array<OutputScalar,3> &a,
107 const Kokkos::Array<OutputScalar,3> &b) const
108 {
109 c[0] = a[1] * b[2] - a[2] * b[1];
110 c[1] = a[2] * b[0] - a[0] * b[2];
111 c[2] = a[0] * b[1] - a[1] * b[0];
112 }
113
115 KOKKOS_INLINE_FUNCTION
116 void dot(OutputScalar &c,
117 const Kokkos::Array<OutputScalar,3> &a,
118 const Kokkos::Array<OutputScalar,3> &b) const
119 {
120 c = 0;
121 for (ordinal_type d=0; d<3; d++)
122 {
123 c += a[d] * b[d];
124 }
125 }
126
127 KOKKOS_INLINE_FUNCTION
128 OutputScalar dot(const Kokkos::Array<OutputScalar,3> &a,
129 const Kokkos::Array<OutputScalar,3> &b) const
130 {
131 OutputScalar c = 0;
132 for (ordinal_type d=0; d<3; d++)
133 {
134 c += a[d] * b[d];
135 }
136 return c;
137 }
138
139 KOKKOS_INLINE_FUNCTION
140 void E_E(Kokkos::Array<OutputScalar,3> &EE,
141 const ordinal_type &i,
142 const OutputScratchView &PHom,
143 const PointScalar &s0, const PointScalar &s1,
144 const Kokkos::Array<PointScalar,3> &s0_grad,
145 const Kokkos::Array<PointScalar,3> &s1_grad) const
146 {
147 const auto & P_i = PHom(i);
148 for (ordinal_type d=0; d<3; d++)
149 {
150 EE[d] = P_i * (s0 * s1_grad[d] - s1 * s0_grad[d]);
151 }
152 }
153
154 KOKKOS_INLINE_FUNCTION
155 void E_E_CURL(Kokkos::Array<OutputScalar,3> &curl_EE,
156 const ordinal_type &i,
157 const OutputScratchView &PHom,
158 const PointScalar &s0, const PointScalar &s1,
159 const Kokkos::Array<PointScalar,3> &s0_grad,
160 const Kokkos::Array<PointScalar,3> &s1_grad) const
161 {
162 // curl (E_i^E)(s0,s1) = (i+2) [P_i](s0,s1) (grad s0 x grad s1)
163 OutputScalar ip2_Pi = (i+2) * PHom(i);
164 cross(curl_EE, s0_grad, s1_grad);
165 for (ordinal_type d=0; d<3; d++)
166 {
167 curl_EE[d] *= ip2_Pi;
168 }
169 }
170
173 KOKKOS_INLINE_FUNCTION
174 void V_QUAD(Kokkos::Array<OutputScalar,3> &VQUAD,
175 const ordinal_type &i, const ordinal_type &j,
176 const OutputScratchView &PHom_s,
177 const PointScalar &s0, const PointScalar &s1,
178 const Kokkos::Array<PointScalar,3> &s0_grad,
179 const Kokkos::Array<PointScalar,3> &s1_grad,
180 const OutputScratchView &PHom_t,
181 const PointScalar &t0, const PointScalar &t1,
182 const Kokkos::Array<PointScalar,3> &t0_grad,
183 const Kokkos::Array<PointScalar,3> &t1_grad) const
184 {
185 Kokkos::Array<OutputScalar,3> EE_i, EE_j;
186
187 E_E(EE_i, i, PHom_s, s0, s1, s0_grad, s1_grad);
188 E_E(EE_j, j, PHom_t, t0, t1, t0_grad, t1_grad);
189
190 // VQUAD = EE_i x EE_j:
191 cross(VQUAD, EE_i, EE_j);
192 }
193
196 KOKKOS_INLINE_FUNCTION
197 void E_QUAD(Kokkos::Array<OutputScalar,3> &EQUAD,
198 const ordinal_type &i, const ordinal_type &j,
199 const OutputScratchView &HomPi_s01,
200 const PointScalar &s0, const PointScalar &s1,
201 const Kokkos::Array<PointScalar,3> &s0_grad,
202 const Kokkos::Array<PointScalar,3> &s1_grad,
203 const OutputScratchView &HomLi_t01) const
204 {
205 const OutputScalar &phiE_j = HomLi_t01(j);
206
207 Kokkos::Array<OutputScalar,3> EE_i;
208 E_E(EE_i, i, HomPi_s01, s0, s1, s0_grad, s1_grad);
209
210 for (ordinal_type d=0; d<3; d++)
211 {
212 EQUAD[d] = phiE_j * EE_i[d];
213 }
214 }
215
216 KOKKOS_INLINE_FUNCTION
217 void E_QUAD_CURL(Kokkos::Array<OutputScalar,3> &EQUAD_CURL,
218 const ordinal_type &i, const ordinal_type &j,
219 const OutputScratchView &HomPi_s01,
220 const PointScalar &s0, const PointScalar &s1,
221 const Kokkos::Array<PointScalar,3> &s0_grad,
222 const Kokkos::Array<PointScalar,3> &s1_grad,
223 const OutputScratchView &HomPj_t01,
224 const OutputScratchView &HomLj_t01,
225 const OutputScratchView &HomLj_dt_t01,
226 const Kokkos::Array<PointScalar,3> &t0_grad,
227 const Kokkos::Array<PointScalar,3> &t1_grad) const
228 {
229 const OutputScalar &phiE_j = HomLj_t01(j);
230
231 Kokkos::Array<OutputScalar,3> curl_EE_i;
232 E_E_CURL(curl_EE_i, i, HomPi_s01, s0, s1, s0_grad, s1_grad);
233
234 Kokkos::Array<OutputScalar,3> EE_i;
235 E_E(EE_i, i, HomPi_s01, s0, s1, s0_grad, s1_grad);
236
237 Kokkos::Array<OutputScalar,3> grad_phiE_j;
238 computeGradHomLi(grad_phiE_j, j, HomPj_t01, HomLj_dt_t01, t0_grad, t1_grad);
239
240 cross(EQUAD_CURL, grad_phiE_j, EE_i);
241 for (ordinal_type d=0; d<3; d++)
242 {
243 EQUAD_CURL[d] += phiE_j * curl_EE_i[d];
244 }
245 }
246
248 KOKKOS_INLINE_FUNCTION
249 void V_LEFT_TRI(Kokkos::Array<OutputScalar,3> &VLEFTTRI,
250 const OutputScalar &phi_i, const Kokkos::Array<OutputScalar,3> &phi_i_grad,
251 const OutputScalar &phi_j, const Kokkos::Array<OutputScalar,3> &phi_j_grad,
252 const OutputScalar &t0, const Kokkos::Array<OutputScalar,3> &t0_grad) const {
253 cross(VLEFTTRI, phi_i_grad, phi_j_grad);
254 const OutputScalar t0_2 = t0 * t0;
255 for (ordinal_type d=0; d<3; d++)
256 {
257 VLEFTTRI[d] *= t0_2;
258 }
259
260 Kokkos::Array<OutputScalar,3> tmp_t0_grad_t0, tmp_diff, tmp_cross;
261 for (ordinal_type d=0; d<3; d++)
262 {
263 tmp_t0_grad_t0[d] = t0 * t0_grad[d];
264 tmp_diff[d] = phi_i * phi_j_grad[d] - phi_j * phi_i_grad[d];
265 }
266 cross(tmp_cross, tmp_t0_grad_t0, tmp_diff);
267
268 for (ordinal_type d=0; d<3; d++)
269 {
270 VLEFTTRI[d] += tmp_cross[d];
271 }
272 };
273
274
276 KOKKOS_INLINE_FUNCTION
277 void V_RIGHT_TRI(Kokkos::Array<OutputScalar,3> &VRIGHTTRI,
278 const OutputScalar &mu1, const Kokkos::Array<OutputScalar,3> &mu1_grad,
279 const OutputScalar &phi_i, const Kokkos::Array<OutputScalar,3> &phi_i_grad,
280 const OutputScalar &t0, const Kokkos::Array<OutputScalar,3> &t0_grad) const {
281 Kokkos::Array<OutputScalar,3> left_vector; // left vector in the cross product we take below.
282
283 const OutputScalar t0_2 = t0 * t0;
284 for (ordinal_type d=0; d<3; d++)
285 {
286 left_vector[d] = t0_2 * phi_i_grad[d] + 2. * t0 * phi_i * t0_grad[d];
287 }
288 cross(VRIGHTTRI, left_vector, mu1_grad);
289 };
290
291 // This is the "Ancillary Operator" V^{tri}_{ij} on p. 433 of Fuentes et al.
292 KOKKOS_INLINE_FUNCTION
293 void V_TRI(Kokkos::Array<OutputScalar,3> &VTRI,
294 const ordinal_type &i, // i >= 0
295 const ordinal_type &j, // j >= 0
296 const OutputScratchView &P, // container in which shiftedScaledLegendreValues have been computed for the appropriate face
297 const OutputScratchView &P_2ip1, // container in which shiftedScaledJacobiValues have been computed for (2i+1) for the appropriate face
298 const Kokkos::Array<PointScalar,3> &vectorWeight) const // s0 (grad s1 x grad s2) + s1 (grad s2 x grad s0) + s2 (grad s0 x grad s1) -- see computeFaceVectorWeight()
299 {
300 const auto &P_i = P(i);
301 const auto &P_2ip1_j = P_2ip1(j);
302
303 for (ordinal_type d=0; d<3; d++)
304 {
305 VTRI[d] = P_i * P_2ip1_j * vectorWeight[d];
306 }
307 }
308
309 // Divergence of the "Ancillary Operator" V^{tri}_{ij} on p. 433 of Fuentes et al.
310 KOKKOS_INLINE_FUNCTION
311 void V_TRI_DIV(OutputScalar &VTRI_DIV,
312 const ordinal_type &i, // i >= 0
313 const ordinal_type &j, // j >= 0
314 const OutputScratchView &P, // container in which shiftedScaledLegendreValues have been computed for the appropriate face
315 const OutputScratchView &P_2ip1, // container in which shiftedScaledJacobiValues have been computed for (2i+1) for the appropriate face
316 const OutputScalar &divWeight) const // grad s0 \dot (grad s1 x grad s2)
317 {
318 const auto &P_i = P(i);
319 const auto &P_2ip1_j = P_2ip1(j);
320
321 VTRI_DIV = (i + j + 3.) * P_i * P_2ip1_j * divWeight;
322 }
323
325 KOKKOS_INLINE_FUNCTION
326 void V_TRI_B42(Kokkos::Array<OutputScalar,3> &VTRI_mus0_mus1_s2_over_mu,
327 const Kokkos::Array<OutputScalar,3> &VTRI_00_s0_s1_s2,
328 const Kokkos::Array<OutputScalar,3> &EE_0_s0_s1,
329 const OutputScalar &s2,
330 const OutputScalar &mu, const Kokkos::Array<OutputScalar,3> &mu_grad,
331 const ordinal_type &i, // i >= 0
332 const ordinal_type &j, // j >= 0
333 const OutputScratchView &P_mus0_mus1, // container in which shiftedScaledLegendreValues have been computed for the appropriate face, with arguments (mu s0, mu s1)
334 const OutputScratchView &P_2ip1_mus0pmus1_s2 // container in which shiftedScaledJacobiValues have been computed for (2i+1) for the appropriate face, with arguments (mu s0 + mu s1, s2)
335 ) const
336 {
337 const auto &Pi_mus0_mus1 = P_mus0_mus1(i);
338 const auto &Pj_2ip1_mus0pmus1_s2 = P_2ip1_mus0pmus1_s2(j);
339
340 // place s2 (grad mu) x E^E_0 in result container
341 cross(VTRI_mus0_mus1_s2_over_mu, mu_grad, EE_0_s0_s1);
342 for (ordinal_type d=0; d<3; d++)
343 {
344 VTRI_mus0_mus1_s2_over_mu[d] *= s2;
345 }
346
347 // add mu V^{tri}_00 to it:
348 for (ordinal_type d=0; d<3; d++)
349 {
350 VTRI_mus0_mus1_s2_over_mu[d] += mu * VTRI_00_s0_s1_s2[d];
351 }
352
353 // multiply by [P_i, P^{2i+1}_j]
354 for (ordinal_type d=0; d<3; d++)
355 {
356 VTRI_mus0_mus1_s2_over_mu[d] *= Pi_mus0_mus1 * Pj_2ip1_mus0pmus1_s2;
357 }
358 }
359
361 KOKKOS_INLINE_FUNCTION
362 void V_TRI_B42_DIV(OutputScalar &div_VTRI_mus0_mus1_s2_over_mu,
363 const Kokkos::Array<OutputScalar,3> &VTRI_00_s0_s1_s2,
364 const Kokkos::Array<OutputScalar,3> &EE_0_s0_s1,
365 const OutputScalar &s2, const Kokkos::Array<OutputScalar,3> &s2_grad,
366 const OutputScalar &mu, const Kokkos::Array<OutputScalar,3> &mu_grad,
367 const ordinal_type &i, // i >= 0
368 const ordinal_type &j, // j >= 0
369 const OutputScratchView &P_mus0_mus1, // container in which shiftedScaledLegendreValues have been computed for the appropriate face, with arguments (mu s0, mu s1)
370 const OutputScratchView &P_2ip1_mus0pmus1_s2 // container in which shiftedScaledJacobiValues have been computed for (2i+1) for the appropriate face, with arguments (mu s0 + mu s1, s2)
371 ) const
372 {
373 const auto &Pi_mus0_mus1 = P_mus0_mus1(i);
374 const auto &Pj_2ip1_mus0pmus1_s2 = P_2ip1_mus0pmus1_s2(j);
375
376 Kokkos::Array<OutputScalar,3> vector;
377
378 // place E^E_0 x grad s2 in scratch vector container
379 cross(vector, EE_0_s0_s1, s2_grad);
380 // multiply by (i+j+3)
381 for (ordinal_type d=0; d<3; d++)
382 {
383 vector[d] *= i+j+3.;
384 }
385 // subtract V_00
386 for (ordinal_type d=0; d<3; d++)
387 {
388 vector[d] -= VTRI_00_s0_s1_s2[d];
389 }
390
391 OutputScalar dot_product;
392 dot(dot_product, mu_grad, vector);
393
394 div_VTRI_mus0_mus1_s2_over_mu = Pi_mus0_mus1 * Pj_2ip1_mus0pmus1_s2 * dot_product;
395 }
396
397 KOKKOS_INLINE_FUNCTION
398 void computeFaceVectorWeight(Kokkos::Array<OutputScalar,3> &vectorWeight,
399 const PointScalar &s0, const Kokkos::Array<PointScalar,3> &s0Grad,
400 const PointScalar &s1, const Kokkos::Array<PointScalar,3> &s1Grad,
401 const PointScalar &s2, const Kokkos::Array<PointScalar,3> &s2Grad) const
402 {
403 // compute s0 (grad s1 x grad s2) + s1 (grad s2 x grad s0) + s2 (grad s0 x grad s1)
404
405 Kokkos::Array<Kokkos::Array<PointScalar,3>,3> cross_products;
406
407 cross(cross_products[0], s1Grad, s2Grad);
408 cross(cross_products[1], s2Grad, s0Grad);
409 cross(cross_products[2], s0Grad, s1Grad);
410
411 Kokkos::Array<PointScalar,3> s {s0,s1,s2};
412
413 for (ordinal_type d=0; d<3; d++)
414 {
415 OutputScalar v_d = 0;
416 for (ordinal_type i=0; i<3; i++)
417 {
418 v_d += s[i] * cross_products[i][d];
419 }
420 vectorWeight[d] = v_d;
421 }
422 }
423
424 KOKKOS_INLINE_FUNCTION
425 void computeFaceDivWeight(OutputScalar &divWeight,
426 const Kokkos::Array<PointScalar,3> &s0Grad,
427 const Kokkos::Array<PointScalar,3> &s1Grad,
428 const Kokkos::Array<PointScalar,3> &s2Grad) const
429 {
430 // grad s0 \dot (grad s1 x grad s2)
431
432 Kokkos::Array<PointScalar,3> grad_s1_cross_grad_s2;
433 cross(grad_s1_cross_grad_s2, s1Grad, s2Grad);
434
435 dot(divWeight, s0Grad, grad_s1_cross_grad_s2);
436 }
437
438 KOKKOS_INLINE_FUNCTION
439 void computeGradHomLi(Kokkos::Array<OutputScalar,3> &HomLi_grad, // grad [L_i](s0,s1)
440 const ordinal_type i,
441 const OutputScratchView &HomPi_s0s1, // [P_i](s0,s1)
442 const OutputScratchView &HomLi_dt_s0s1, // [d/dt L_i](s0,s1)
443 const Kokkos::Array<PointScalar,3> &s0Grad,
444 const Kokkos::Array<PointScalar,3> &s1Grad) const
445 {
446// grad [L_i](s0,s1) = [P_{i-1}](s0,s1) * grad s1 + [R_{i-1}](s0,s1) * grad (s0 + s1)
447 const auto & R_i_minus_1 = HomLi_dt_s0s1(i); // d/dt L_i = R_{i-1}
448 const auto & P_i_minus_1 = HomPi_s0s1(i-1);
449 for (ordinal_type d=0; d<3; d++)
450 {
451 HomLi_grad[d] = P_i_minus_1 * s1Grad[d] + R_i_minus_1 * (s0Grad[d] + s1Grad[d]);
452 }
453 }
454
455 KOKKOS_INLINE_FUNCTION
456 void operator()( const TeamMember & teamMember ) const
457 {
458 auto pointOrdinal = teamMember.league_rank();
459 OutputScratchView scratch1D_1, scratch1D_2, scratch1D_3;
460 OutputScratchView scratch1D_4, scratch1D_5, scratch1D_6;
461 OutputScratchView scratch1D_7, scratch1D_8, scratch1D_9;
462 if (fad_size_output_ > 0) {
463 scratch1D_1 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1, fad_size_output_);
464 scratch1D_2 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1, fad_size_output_);
465 scratch1D_3 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1, fad_size_output_);
466 scratch1D_4 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1, fad_size_output_);
467 scratch1D_5 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1, fad_size_output_);
468 scratch1D_6 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1, fad_size_output_);
469 scratch1D_7 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1, fad_size_output_);
470 scratch1D_8 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1, fad_size_output_);
471 scratch1D_9 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1, fad_size_output_);
472 }
473 else {
474 scratch1D_1 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1);
475 scratch1D_2 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1);
476 scratch1D_3 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1);
477 scratch1D_4 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1);
478 scratch1D_5 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1);
479 scratch1D_6 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1);
480 scratch1D_7 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1);
481 scratch1D_8 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1);
482 scratch1D_9 = OutputScratchView(teamMember.team_shmem(), polyOrder_ + 1);
483 }
484
485 const auto & x = inputPoints_(pointOrdinal,0);
486 const auto & y = inputPoints_(pointOrdinal,1);
487 const auto & z = inputPoints_(pointOrdinal,2);
488
489 // Intrepid2 uses (-1,1)^2 for x,y
490 // ESEAS uses (0,1)^2
491
492 Kokkos::Array<PointScalar,3> coords;
493 transformToESEASPyramid<>(coords[0], coords[1], coords[2], x, y, z); // map x,y coordinates from (-z,z)^2 to (0,z)^2
494
495 // pyramid "affine" coordinates and gradients get stored in lambda, lambdaGrad:
496 using Kokkos::Array;
497 Array<PointScalar,5> lambda;
498 Array<Kokkos::Array<PointScalar,3>,5> lambdaGrad;
499
500 Array<Array<PointScalar,3>,2> mu;
501 Array<Array<Array<PointScalar,3>,3>,2> muGrad;
502
503 Array<Array<PointScalar,2>,3> nu;
504 Array<Array<Array<PointScalar,3>,2>,3> nuGrad;
505
506 affinePyramid(lambda, lambdaGrad, mu, muGrad, nu, nuGrad, coords);
507
508 switch (opType_)
509 {
510 case OPERATOR_VALUE:
511 {
512 ordinal_type fieldOrdinalOffset = 0;
513 // quadrilateral face
514 {
515 // rename scratch1, scratch2
516 auto & Pi = scratch1D_1;
517 auto & Pj = scratch1D_2;
518
519 auto & s0 = mu[0][0], & s1 = mu[1][0];
520 auto & s0_grad = muGrad[0][0], & s1_grad = muGrad[1][0];
521 auto & t0 = mu[0][1], & t1 = mu[1][1];
522 auto & t0_grad = muGrad[0][1], & t1_grad = muGrad[1][1];
523
524 Polynomials::shiftedScaledLegendreValues(Pi, polyOrder_-1, s1, s0 + s1);
525 Polynomials::shiftedScaledLegendreValues(Pj, polyOrder_-1, t1, t0 + t1);
526
527 const auto & muZ_0 = mu[0][2];
528 OutputScalar mu0_cubed = muZ_0 * muZ_0 * muZ_0;
529
530 // following the ESEAS ordering: j increments first
531 for (int j=0; j<polyOrder_; j++)
532 {
533 for (int i=0; i<polyOrder_; i++)
534 {
535 Kokkos::Array<OutputScalar,3> VQUAD; // output from V_QUAD
536 V_QUAD(VQUAD, i, j,
537 Pi, s0, s1, s0_grad, s1_grad,
538 Pj, t0, t1, t0_grad, t1_grad);
539
540 for (ordinal_type d=0; d<3; d++)
541 {
542 output_(fieldOrdinalOffset,pointOrdinal,d) = mu0_cubed * VQUAD[d];
543 }
544 fieldOrdinalOffset++;
545 }
546 }
547 }
548
549 // triangle faces
550 {
551 /*
552 Face functions for triangular face (d,e,f) given by:
553 1/2 * ( mu_c^{zeta,xi_b} * V_TRI_ij(nu_012^{zeta,xi_a})
554 + 1/mu_c^{zeta,xi_b} * V_TRI_ij(nu_012^{zeta,xi_a} * mu_c^{zeta,xi_b}) )
555 but the division by mu_c^{zeta,xi_b} should ideally be avoided in computations; there is
556 an alternate expression, not implemented by ESEAS. We start with the above expression
557 so that we can get agreement with ESEAS. Hopefully after that we can do the alternative,
558 and confirm that we maintain agreement with ESEAS for points where ESEAS does not generate
559 nans.
560
561
562 where s0, s1, s2 are nu[0][a-1],nu[1][a-1],nu[2][a-1] (nu_012 above)
563 and (a,b) = (1,2) for faces 0,2; (a,b) = (2,1) for others;
564 c = 0 for faces 0,3; c = 1 for others
565 */
566 const auto & P = scratch1D_1; // for V_TRI( nu_012)
567 const auto & P_2ip1 = scratch1D_2;
568 const auto & Pmu = scratch1D_3; // for V_TRI( mu * nu_012)
569 const auto & Pmu_2ip1 = scratch1D_4;
570 for (int faceOrdinal=0; faceOrdinal<numTriFaces; faceOrdinal++)
571 {
572 // face 0,2 --> a=1, b=2
573 // face 1,3 --> a=2, b=1
574 int a = (faceOrdinal % 2 == 0) ? 1 : 2;
575 int b = 3 - a;
576 // face 0,3 --> c=0
577 // face 1,2 --> c=1
578 int c = ((faceOrdinal == 0) || (faceOrdinal == 3)) ? 0 : 1;
579
580 const auto & s0 = nu[0][a-1]; const auto & s0_grad = nuGrad[0][a-1];
581 const auto & s1 = nu[1][a-1]; const auto & s1_grad = nuGrad[1][a-1];
582 const auto & s2 = nu[2][a-1];
583 const PointScalar jacobiScaling = s0 + s1 + s2;
584
585 const PointScalar legendreScaling = s0 + s1;
586 Polynomials::shiftedScaledLegendreValues(P, polyOrder_-1, s1, legendreScaling);
587
588 const auto lambda0_index = tri_face_vertex_0[faceOrdinal];
589 const auto lambda1_index = tri_face_vertex_1[faceOrdinal];
590 const auto lambda2_index = tri_face_vertex_2[faceOrdinal];
591
592 const auto & mu_c_b = mu [c][b-1];
593 const auto & mu_c_b_grad = muGrad[c][b-1];
594 const auto & mu_s0 = lambda[lambda0_index];
595 const auto & mu_s1 = lambda[lambda1_index];
596 const auto & mu_s2 = lambda[lambda2_index];
597
598 const PointScalar muJacobiScaling = mu_s0 + mu_s1 + mu_s2;
599
600 const PointScalar muLegendreScaling = mu_s0 + mu_s1;
601 Polynomials::shiftedScaledLegendreValues(Pmu, polyOrder_-1, mu_s1, muLegendreScaling);
602
603 Kokkos::Array<PointScalar, 3> vectorWeight;
604 computeFaceVectorWeight(vectorWeight, nu[0][a-1], nuGrad[0][a-1],
605 nu[1][a-1], nuGrad[1][a-1],
606 nu[2][a-1], nuGrad[2][a-1]);
607
608 Kokkos::Array<OutputScalar,3> VTRI_00;
609 V_TRI(VTRI_00,0,0,P,P_2ip1,vectorWeight);
610
611 Kokkos::Array<OutputScalar,3> EE_0;
612 E_E(EE_0, 0, P, s0, s1, s0_grad, s1_grad);
613
614 for (int totalPolyOrder=0; totalPolyOrder<polyOrder_; totalPolyOrder++)
615 {
616 for (int i=0; i<=totalPolyOrder; i++)
617 {
618 const int j = totalPolyOrder - i;
619
620 const double alpha = i*2.0 + 1;
621 Polynomials::shiftedScaledJacobiValues( P_2ip1, alpha, polyOrder_-1, s2, jacobiScaling);
622 Polynomials::shiftedScaledJacobiValues(Pmu_2ip1, alpha, polyOrder_-1, mu_s2, muJacobiScaling);
623
624 Kokkos::Array<OutputScalar,3> VTRI; // output from V_TRI
625 V_TRI(VTRI, i, j, P, P_2ip1, vectorWeight);
626
627 Kokkos::Array<OutputScalar,3> one_over_mu_VTRI_mu; // (B.42) result
628 V_TRI_B42(one_over_mu_VTRI_mu, VTRI_00, EE_0, s2, mu_c_b, mu_c_b_grad, i, j, Pmu, Pmu_2ip1);
629
630 for (ordinal_type d=0; d<3; d++)
631 {
632 output_(fieldOrdinalOffset,pointOrdinal,d) = 0.5 * (VTRI[d] * mu_c_b + one_over_mu_VTRI_mu[d]);
633 }
634
635 fieldOrdinalOffset++;
636 }
637 }
638 }
639 } // triangle faces block
640
641 // interior functions
642 {
643 // label scratch
644 const auto & Li_muZ01 = scratch1D_1; // used for phi_k^E values in Family I, II, IV
645 const auto & Li_muX01 = scratch1D_2; // used for E_QUAD computations
646 const auto & Li_muY01 = scratch1D_3; // used for E_QUAD computations
647 const auto & Pi_muX01 = scratch1D_4; // used for E_QUAD computations where xi_1 comes first
648 const auto & Pi_muY01 = scratch1D_5; // used for E_QUAD computations where xi_2 comes first
649 const auto & Pi_muZ01 = scratch1D_6; // used for E_QUAD computations where xi_2 comes first
650 const auto & Li_dt_muX01 = scratch1D_7; // used for E_QUAD computations
651 const auto & Li_dt_muY01 = scratch1D_8; // used for E_QUAD computations
652 const auto & Li_dt_muZ01 = scratch1D_9; // used for E_QUAD computations
653
654 const auto & muX_0 = mu[0][0]; const auto & muX_0_grad = muGrad[0][0];
655 const auto & muX_1 = mu[1][0]; const auto & muX_1_grad = muGrad[1][0];
656 const auto & muY_0 = mu[0][1]; const auto & muY_0_grad = muGrad[0][1];
657 const auto & muY_1 = mu[1][1]; const auto & muY_1_grad = muGrad[1][1];
658 const auto & muZ_0 = mu[0][2]; const auto & muZ_0_grad = muGrad[0][2];
659 const auto & muZ_1 = mu[1][2]; const auto & muZ_1_grad = muGrad[1][2];
660
661 Polynomials::shiftedScaledIntegratedLegendreValues(Li_muX01, polyOrder_, muX_1, muX_0 + muX_1);
662 Polynomials::shiftedScaledIntegratedLegendreValues(Li_muY01, polyOrder_, muY_1, muY_0 + muY_1);
663 Polynomials::shiftedScaledIntegratedLegendreValues(Li_muZ01, polyOrder_, muZ_1, muZ_0 + muZ_1);
664
665 Polynomials::shiftedScaledLegendreValues(Pi_muX01, polyOrder_, muX_1, muX_0 + muX_1);
666 Polynomials::shiftedScaledLegendreValues(Pi_muY01, polyOrder_, muY_1, muY_0 + muY_1);
667 Polynomials::shiftedScaledLegendreValues(Pi_muZ01, polyOrder_, muZ_1, muZ_0 + muZ_1);
668
669 Polynomials::shiftedScaledIntegratedLegendreValues_dt(Li_dt_muX01, Pi_muX01, polyOrder_, muX_1, muX_0 + muX_1);
670 Polynomials::shiftedScaledIntegratedLegendreValues_dt(Li_dt_muY01, Pi_muY01, polyOrder_, muY_1, muY_0 + muY_1);
671 Polynomials::shiftedScaledIntegratedLegendreValues_dt(Li_dt_muZ01, Pi_muZ01, polyOrder_, muZ_1, muZ_0 + muZ_1);
672
673 // FAMILIES I & II -- divergence-free families
674 // following the ESEAS ordering: k increments first
675 for (int f=0; f<2; f++)
676 {
677 const auto &s0 = (f==0) ? muX_0 : muY_0; const auto & s0_grad = (f==0) ? muX_0_grad : muY_0_grad;
678 const auto &s1 = (f==0) ? muX_1 : muY_1; const auto & s1_grad = (f==0) ? muX_1_grad : muY_1_grad;
679 const auto & t0_grad = (f==0) ? muY_0_grad : muX_0_grad;
680 const auto & t1_grad = (f==0) ? muY_1_grad : muX_1_grad;
681 const auto & Pi_s01 = (f==0) ? Pi_muX01 : Pi_muY01;
682 const auto & Pi_t01 = (f==0) ? Pi_muY01 : Pi_muX01;
683 const auto & Li_t01 = (f==0) ? Li_muY01 : Li_muX01;
684 const auto & Li_dt_t01 = (f==0) ? Li_dt_muY01 : Li_dt_muX01;
685
686 for (int k=2; k<=polyOrder_; k++)
687 {
688 const auto & phi_k = Li_muZ01(k);
689 Kokkos::Array<OutputScalar,3> phi_k_grad;
690 computeGradHomLi(phi_k_grad, k, Pi_muZ01, Li_dt_muZ01, muZ_0_grad, muZ_1_grad);
691
692 Kokkos::Array<OutputScalar,3> muZ0_grad_phi_k_plus_phi_k_grad_muZ0;
693 for (ordinal_type d=0; d<3; d++)
694 {
695 muZ0_grad_phi_k_plus_phi_k_grad_muZ0[d] = muZ_0 * phi_k_grad[d] + phi_k * muZ_0_grad[d];
696 }
697
698 // for reasons that I don't entirely understand, ESEAS switches whether i or j are the fastest-moving indices depending on whether it's family I or II. Following their code, I'm calling the outer loop variable jg, inner ig.
699 // (Cross-code comparisons are considerably simpler if we number the dofs in the same way.)
700 ordinal_type jg_min = (f==0) ? 2 : 0;
701 ordinal_type jg_max = (f==0) ? polyOrder_ : polyOrder_-1;
702 ordinal_type ig_min = (f==0) ? 0 : 2;
703 ordinal_type ig_max = (f==0) ? polyOrder_ -1 : polyOrder_;
704 for (ordinal_type jg=jg_min; jg<=jg_max; jg++)
705 {
706 for (ordinal_type ig=ig_min; ig<=ig_max; ig++)
707 {
708 const ordinal_type &i = (f==0) ? ig : jg;
709 const ordinal_type &j = (f==0) ? jg : ig;
710 Kokkos::Array<OutputScalar,3> EQUAD_ij;
711 Kokkos::Array<OutputScalar,3> curl_EQUAD_ij;
712
713 E_QUAD(EQUAD_ij, i, j, Pi_s01, s0, s1, s0_grad, s1_grad, Li_t01);
714
715 E_QUAD_CURL(curl_EQUAD_ij, i, j, Pi_s01, s0, s1, s0_grad, s1_grad,
716 Pi_t01, Li_t01, Li_dt_t01, t0_grad, t1_grad);
717
718 // first term: muZ_0 phi^E_k curl EQUAD
719 // we can reuse the memory for curl_EQUAD_ij; we won't need the values there again
720 Kokkos::Array<OutputScalar,3> & firstTerm = curl_EQUAD_ij;
721 for (ordinal_type d=0; d<3; d++)
722 {
723 firstTerm[d] *= muZ_0 * phi_k;
724 }
725
726 Kokkos::Array<OutputScalar,3> secondTerm; //(muZ0 grad phi + phi grad muZ0) x EQUAD
727
728 cross(secondTerm, muZ0_grad_phi_k_plus_phi_k_grad_muZ0, EQUAD_ij);
729
730 for (ordinal_type d=0; d<3; d++)
731 {
732 output_(fieldOrdinalOffset,pointOrdinal,d) = firstTerm[d] + secondTerm[d];
733 }
734
735 fieldOrdinalOffset++;
736 }
737 }
738 }
739 } // family I, II loop
740
741 // FAMILY III -- a divergence-free family
742 for (int j=2; j<=polyOrder_; j++)
743 {
744 // phi_ij_QUAD: phi_i(mu_X01) * phi_j(mu_Y01) // (following the ESEAS *implementation*; Fuentes et al. (p. 454) actually have the arguments reversed, which leads to a different basis ordering)
745 const auto & phi_j = Li_muY01(j);
746 Kokkos::Array<OutputScalar,3> phi_j_grad;
747 computeGradHomLi(phi_j_grad, j, Pi_muY01, Li_dt_muY01, muY_0_grad, muY_1_grad);
748 for (int i=2; i<=polyOrder_; i++)
749 {
750 const auto & phi_i = Li_muX01(i);
751 Kokkos::Array<OutputScalar,3> phi_i_grad;
752 computeGradHomLi(phi_i_grad, i, Pi_muX01, Li_dt_muX01, muX_0_grad, muX_1_grad);
753
754 Kokkos::Array<OutputScalar,3> phi_ij_grad;
755 for (ordinal_type d=0; d<3; d++)
756 {
757 phi_ij_grad[d] = phi_i * phi_j_grad[d] + phi_j * phi_i_grad[d];
758 }
759
760 Kokkos::Array<OutputScalar,3> cross_product; // phi_ij_grad x grad_muZ0
761 cross(cross_product, phi_ij_grad, muZ_0_grad);
762
763 ordinal_type n = max(i,j);
764 OutputScalar weight = n * pow(muZ_0,n-1);
765 for (ordinal_type d=0; d<3; d++)
766 {
767 output_(fieldOrdinalOffset,pointOrdinal,d) = weight * cross_product[d];
768 }
769 fieldOrdinalOffset++;
770 }
771 }
772
773 // FAMILY IV (non-trivial divergences)
774 {
775 const auto muZ_0_squared = muZ_0 * muZ_0;
776 const auto &s0 = muX_0; const auto & s0_grad = muX_0_grad;
777 const auto &s1 = muX_1; const auto & s1_grad = muX_1_grad;
778 const auto &t0 = muY_0; const auto & t0_grad = muY_0_grad;
779 const auto &t1 = muY_1; const auto & t1_grad = muY_1_grad;
780 const auto &Pi = Pi_muX01;
781 const auto &Pj = Pi_muY01;
782 for (int k=2; k<=polyOrder_; k++)
783 {
784 const auto & phi_k = Li_muZ01(k);
785 for (int j=0; j<polyOrder_; j++)
786 {
787 for (int i=0; i<polyOrder_; i++)
788 {
789 Kokkos::Array<OutputScalar,3> VQUAD; // output from V_QUAD
790 V_QUAD(VQUAD, i, j,
791 Pi, s0, s1, s0_grad, s1_grad,
792 Pj, t0, t1, t0_grad, t1_grad);
793
794 for (int d=0; d<3; d++)
795 {
796 output_(fieldOrdinalOffset,pointOrdinal,d) = muZ_0_squared * phi_k * VQUAD[d];
797 }
798
799 fieldOrdinalOffset++;
800 }
801 }
802 }
803 }
804
805 // FAMILY V (non-trivial divergences)
806 {
807 for (int j=2; j<=polyOrder_; j++)
808 {
809 const auto & phi_j = Li_muY01(j);
810 Kokkos::Array<OutputScalar,3> phi_j_grad;
811 computeGradHomLi(phi_j_grad, j, Pi_muY01, Li_dt_muY01, muY_0_grad, muY_1_grad);
812
813 for (int i=2; i<=polyOrder_; i++)
814 {
815 const auto & phi_i = Li_muX01(i);
816 Kokkos::Array<OutputScalar,3> phi_i_grad;
817 computeGradHomLi(phi_i_grad, i, Pi_muX01, Li_dt_muX01, muX_0_grad, muX_1_grad);
818
819 const int n = max(i,j);
820 const OutputScalar muZ_1_nm1 = pow(muZ_1,n-1);
821
822 Kokkos::Array<OutputScalar,3> VLEFTTRI;
823 V_LEFT_TRI(VLEFTTRI, phi_i, phi_i_grad, phi_j, phi_j_grad, muZ_0, muZ_0_grad);
824
825 for (int d=0; d<3; d++)
826 {
827 output_(fieldOrdinalOffset,pointOrdinal,d) = muZ_1_nm1 * VLEFTTRI[d];
828 }
829
830 fieldOrdinalOffset++;
831 }
832 }
833 }
834
835 // FAMILY VI (non-trivial divergences)
836 for (int i=2; i<=polyOrder_; i++)
837 {
838 const auto & phi_i = Li_muX01(i);
839 Kokkos::Array<OutputScalar,3> phi_i_grad;
840 computeGradHomLi(phi_i_grad, i, Pi_muX01, Li_dt_muX01, muX_0_grad, muX_1_grad);
841
842 Kokkos::Array<OutputScalar,3> VRIGHTTRI;
843 V_RIGHT_TRI(VRIGHTTRI, muY_1, muY_1_grad, phi_i, phi_i_grad, muZ_0, muZ_0_grad);
844
845 const OutputScalar muZ_1_im1 = pow(muZ_1,i-1);
846
847 for (int d=0; d<3; d++)
848 {
849 output_(fieldOrdinalOffset,pointOrdinal,d) = muZ_1_im1 * VRIGHTTRI[d];
850 }
851
852 fieldOrdinalOffset++;
853 }
854
855 // FAMILY VII (non-trivial divergences)
856 for (int j=2; j<=polyOrder_; j++)
857 {
858 const auto & phi_j = Li_muY01(j);
859 Kokkos::Array<OutputScalar,3> phi_j_grad;
860 computeGradHomLi(phi_j_grad, j, Pi_muY01, Li_dt_muY01, muY_0_grad, muY_1_grad);
861
862 Kokkos::Array<OutputScalar,3> VRIGHTTRI;
863 V_RIGHT_TRI(VRIGHTTRI, muX_1, muX_1_grad, phi_j, phi_j_grad, muZ_0, muZ_0_grad);
864
865 const OutputScalar muZ_1_jm1 = pow(muZ_1,j-1);
866
867 for (int d=0; d<3; d++)
868 {
869 output_(fieldOrdinalOffset,pointOrdinal,d) = muZ_1_jm1 * VRIGHTTRI[d];
870 }
871
872 fieldOrdinalOffset++;
873 }
874 }
875 } // end OPERATOR_VALUE
876 break;
877 case OPERATOR_DIV:
878 {
879 ordinal_type fieldOrdinalOffset = 0;
880 // quadrilateral face
881 {
882 // rename scratch1, scratch2
883 auto & Pi = scratch1D_1;
884 auto & Pj = scratch1D_2;
885
886 auto & s0 = mu[0][0], s1 = mu[1][0];
887 auto & s0_grad = muGrad[0][0], s1_grad = muGrad[1][0];
888 auto & t0 = mu[0][1], t1 = mu[1][1];
889 auto & t0_grad = muGrad[0][1], t1_grad = muGrad[1][1];
890
891 Polynomials::shiftedScaledLegendreValues(Pi, polyOrder_-1, s1, s0 + s1);
892 Polynomials::shiftedScaledLegendreValues(Pj, polyOrder_-1, t1, t0 + t1);
893
894 const auto & muZ0 = mu[0][2];
895 const auto & muZ0_grad = muGrad[0][2];
896 OutputScalar three_mu0_squared = 3.0 * muZ0 * muZ0;
897
898 // following the ESEAS ordering: j increments first
899 for (int j=0; j<polyOrder_; j++)
900 {
901 for (int i=0; i<polyOrder_; i++)
902 {
903 Kokkos::Array<OutputScalar,3> VQUAD; // output from V_QUAD
904 V_QUAD(VQUAD, i, j,
905 Pi, s0, s1, s0_grad, s1_grad,
906 Pj, t0, t1, t0_grad, t1_grad);
907
908 OutputScalar grad_muZ0_dot_VQUAD;
909 dot(grad_muZ0_dot_VQUAD, muZ0_grad, VQUAD);
910
911 output_(fieldOrdinalOffset,pointOrdinal) = three_mu0_squared * grad_muZ0_dot_VQUAD;
912 fieldOrdinalOffset++;
913 }
914 }
915 } // end quad face block
916
917 // triangle faces
918 {
919 const auto & P = scratch1D_1; // for V_TRI( nu_012)
920 const auto & P_2ip1 = scratch1D_2;
921 const auto & Pmu = scratch1D_3; // for V_TRI( mu * nu_012)
922 const auto & Pmu_2ip1 = scratch1D_4;
923 for (int faceOrdinal=0; faceOrdinal<numTriFaces; faceOrdinal++)
924 {
925 // face 0,2 --> a=1, b=2
926 // face 1,3 --> a=2, b=1
927 int a = (faceOrdinal % 2 == 0) ? 1 : 2;
928 int b = 3 - a;
929 // face 0,3 --> c=0
930 // face 1,2 --> c=1
931 int c = ((faceOrdinal == 0) || (faceOrdinal == 3)) ? 0 : 1;
932
933 const auto & s0 = nu[0][a-1]; const auto & s0_grad = nuGrad[0][a-1];
934 const auto & s1 = nu[1][a-1]; const auto & s1_grad = nuGrad[1][a-1];
935 const auto & s2 = nu[2][a-1]; const auto & s2_grad = nuGrad[2][a-1];
936 const PointScalar jacobiScaling = s0 + s1 + s2; // we can actually assume that this is 1; see comment at bottom of p. 425 of Fuentes et al.
937
938 const PointScalar legendreScaling = s0 + s1;
939 Polynomials::shiftedScaledLegendreValues(P, polyOrder_-1, s1, legendreScaling);
940
941 const auto lambda0_index = tri_face_vertex_0[faceOrdinal];
942 const auto lambda1_index = tri_face_vertex_1[faceOrdinal];
943 const auto lambda2_index = tri_face_vertex_2[faceOrdinal];
944
945 const auto & mu_c_b = mu[c][b-1];
946 const auto & mu_c_b_grad = muGrad[c][b-1];
947
948 const auto & mu_s0 = lambda[lambda0_index];
949 const auto & mu_s1 = lambda[lambda1_index];
950 const auto & mu_s2 = lambda[lambda2_index]; // == s2
951
952 const PointScalar muJacobiScaling = mu_s0 + mu_s1 + mu_s2;
953
954 const PointScalar muLegendreScaling = mu_s0 + mu_s1;
955 Polynomials::shiftedScaledLegendreValues(Pmu, polyOrder_-1, mu_s1, muLegendreScaling);
956
957 Kokkos::Array<PointScalar, 3> vectorWeight;
958 computeFaceVectorWeight(vectorWeight, nu[0][a-1], nuGrad[0][a-1],
959 nu[1][a-1], nuGrad[1][a-1],
960 nu[2][a-1], nuGrad[2][a-1]);
961
962 Kokkos::Array<PointScalar,3> & mu_s0_grad = lambdaGrad[lambda0_index];
963 Kokkos::Array<PointScalar,3> & mu_s1_grad = lambdaGrad[lambda1_index];
964 Kokkos::Array<PointScalar,3> & mu_s2_grad = lambdaGrad[lambda2_index]; // == s2_grad
965
966 Kokkos::Array<PointScalar, 3> muVectorWeight;
967 computeFaceVectorWeight(muVectorWeight, mu_s0, mu_s0_grad,
968 mu_s1, mu_s1_grad,
969 mu_s2, mu_s2_grad);
970
971 OutputScalar muDivWeight;
972 computeFaceDivWeight(muDivWeight, mu_s0_grad, mu_s1_grad, mu_s2_grad);
973
974 Kokkos::Array<OutputScalar,3> VTRI_00;
975 V_TRI(VTRI_00,0,0,P,P_2ip1,vectorWeight);
976
977 Kokkos::Array<OutputScalar,3> EE_0;
978 E_E(EE_0, 0, P, s0, s1, s0_grad, s1_grad);
979
980 for (int totalPolyOrder=0; totalPolyOrder<polyOrder_; totalPolyOrder++)
981 {
982 for (int i=0; i<=totalPolyOrder; i++)
983 {
984 const int j = totalPolyOrder - i;
985
986 const double alpha = i*2.0 + 1;
987 Polynomials::shiftedScaledJacobiValues( P_2ip1, alpha, polyOrder_-1, s2, jacobiScaling);
988 Polynomials::shiftedScaledJacobiValues(Pmu_2ip1, alpha, polyOrder_-1, mu_s2, muJacobiScaling);
989
990 Kokkos::Array<OutputScalar,3> VTRI; // output from V_TRI
991
992 V_TRI(VTRI, i, j, P, P_2ip1, vectorWeight);
993
994 OutputScalar div_one_over_mu_VTRI_mu;
995 V_TRI_B42_DIV(div_one_over_mu_VTRI_mu, VTRI_00, EE_0, s2, s2_grad, mu_c_b, mu_c_b_grad, i, j, Pmu, Pmu_2ip1);
996
997 output_(fieldOrdinalOffset,pointOrdinal) = 0.5 * (dot(mu_c_b_grad, VTRI) + div_one_over_mu_VTRI_mu);
998
999 fieldOrdinalOffset++;
1000 }
1001 }
1002 }
1003 } // end triangle face block
1004
1005 {
1006 // FAMILY I -- divergence free
1007 // following the ESEAS ordering: k increments first
1008 for (int k=2; k<=polyOrder_; k++)
1009 {
1010 for (int j=2; j<=polyOrder_; j++)
1011 {
1012 for (int i=0; i<polyOrder_; i++)
1013 {
1014 output_(fieldOrdinalOffset,pointOrdinal) = 0.0;
1015 fieldOrdinalOffset++;
1016 }
1017 }
1018 }
1019
1020 // FAMILY II -- divergence free
1021 // following the ESEAS ordering: k increments first
1022 for (int k=2; k<=polyOrder_; k++)
1023 {
1024 for (int j=2; j<=polyOrder_; j++)
1025 {
1026 for (int i=0; i<polyOrder_; i++)
1027 {
1028 output_(fieldOrdinalOffset,pointOrdinal) = 0.0;
1029 fieldOrdinalOffset++;
1030 }
1031 }
1032 }
1033
1034 // FAMILY III -- divergence free
1035 for (int j=2; j<=polyOrder_; j++)
1036 {
1037 for (int i=2; i<=polyOrder_; i++)
1038 {
1039 output_(fieldOrdinalOffset,pointOrdinal) = 0.0;
1040 fieldOrdinalOffset++;
1041 }
1042 }
1043
1044 const auto & Li_muZ01 = scratch1D_1; // used in Family IV
1045 const auto & Li_muX01 = scratch1D_2; // used in Family V
1046 const auto & Li_muY01 = scratch1D_3; // used in Family V
1047 const auto & Pi_muX01 = scratch1D_4; // used in Family IV
1048 const auto & Pi_muY01 = scratch1D_5; // used in Family IV
1049 const auto & Pi_muZ01 = scratch1D_6; // used in Family IV
1050 const auto & Li_dt_muX01 = scratch1D_7; // used in Family V
1051 const auto & Li_dt_muY01 = scratch1D_8; // used in Family V
1052 const auto & Li_dt_muZ01 = scratch1D_9; // used in Family IV
1053
1054 const auto & muX_0 = mu[0][0]; const auto & muX_0_grad = muGrad[0][0];
1055 const auto & muX_1 = mu[1][0]; const auto & muX_1_grad = muGrad[1][0];
1056 const auto & muY_0 = mu[0][1]; const auto & muY_0_grad = muGrad[0][1];
1057 const auto & muY_1 = mu[1][1]; const auto & muY_1_grad = muGrad[1][1];
1058 const auto & muZ_0 = mu[0][2]; const auto & muZ_0_grad = muGrad[0][2];
1059 const auto & muZ_1 = mu[1][2]; const auto & muZ_1_grad = muGrad[1][2];
1060
1061 Polynomials::shiftedScaledIntegratedLegendreValues(Li_muX01, polyOrder_, muX_1, muX_0 + muX_1);
1062 Polynomials::shiftedScaledIntegratedLegendreValues(Li_muY01, polyOrder_, muY_1, muY_0 + muY_1);
1063 Polynomials::shiftedScaledIntegratedLegendreValues(Li_muZ01, polyOrder_, muZ_1, muZ_0 + muZ_1);
1064
1065 Polynomials::shiftedScaledLegendreValues(Pi_muX01, polyOrder_, muX_1, muX_0 + muX_1);
1066 Polynomials::shiftedScaledLegendreValues(Pi_muY01, polyOrder_, muY_1, muY_0 + muY_1);
1067 Polynomials::shiftedScaledLegendreValues(Pi_muZ01, polyOrder_, muZ_1, muZ_0 + muZ_1);
1068
1069 Polynomials::shiftedScaledIntegratedLegendreValues_dt(Li_dt_muX01, Pi_muX01, polyOrder_, muX_1, muX_0 + muX_1);
1070 Polynomials::shiftedScaledIntegratedLegendreValues_dt(Li_dt_muY01, Pi_muY01, polyOrder_, muY_1, muY_0 + muY_1);
1071 Polynomials::shiftedScaledIntegratedLegendreValues_dt(Li_dt_muZ01, Pi_muZ01, polyOrder_, muZ_1, muZ_0 + muZ_1);
1072
1073 // FAMILY IV -- non-trivial divergences
1074 {
1075 const auto muZ_0_squared = muZ_0 * muZ_0;
1076 const auto &s0 = muX_0; const auto & s0_grad = muX_0_grad;
1077 const auto &s1 = muX_1; const auto & s1_grad = muX_1_grad;
1078 const auto &t0 = muY_0; const auto & t0_grad = muY_0_grad;
1079 const auto &t1 = muY_1; const auto & t1_grad = muY_1_grad;
1080 const auto &Pi = Pi_muX01;
1081 const auto &Pj = Pi_muY01;
1082
1083 for (int k=2; k<=polyOrder_; k++)
1084 {
1085 const auto & phi_k = Li_muZ01(k);
1086 Kokkos::Array<OutputScalar,3> phi_k_grad;
1087 computeGradHomLi(phi_k_grad, k, Pi_muZ01, Li_dt_muZ01, muZ_0_grad, muZ_1_grad);
1088 for (int j=0; j<polyOrder_; j++)
1089 {
1090 for (int i=0; i<polyOrder_; i++)
1091 {
1092 Kokkos::Array<OutputScalar,3> firstTerm{0,0,0}; // muZ_0_squared * grad phi_k + 2 muZ_0 * phi_k * grad muZ_0
1093 for (int d=0; d<3; d++)
1094 {
1095 firstTerm[d] += muZ_0_squared * phi_k_grad[d] + 2. * muZ_0 * phi_k * muZ_0_grad[d];
1096 }
1097 Kokkos::Array<OutputScalar,3> VQUAD; // output from V_QUAD
1098 V_QUAD(VQUAD, i, j,
1099 Pi, s0, s1, s0_grad, s1_grad,
1100 Pj, t0, t1, t0_grad, t1_grad);
1101
1102 OutputScalar divValue;
1103 dot(divValue, firstTerm, VQUAD);
1104 output_(fieldOrdinalOffset,pointOrdinal) = divValue;
1105
1106 fieldOrdinalOffset++;
1107 }
1108 }
1109 }
1110 }
1111
1112 // FAMILY V -- non-trivial divergences
1113 {
1114 for (int j=2; j<=polyOrder_; j++)
1115 {
1116 const auto & phi_j = Li_muX01(j);
1117 Kokkos::Array<OutputScalar,3> phi_j_grad;
1118 computeGradHomLi(phi_j_grad, j, Pi_muY01, Li_dt_muY01, muY_0_grad, muY_1_grad);
1119
1120 for (int i=2; i<=polyOrder_; i++)
1121 {
1122 const auto & phi_i = Li_muY01(i);
1123 Kokkos::Array<OutputScalar,3> phi_i_grad;
1124 computeGradHomLi(phi_i_grad, i, Pi_muX01, Li_dt_muX01, muX_0_grad, muX_1_grad);
1125
1126 const int n = max(i,j);
1127 const OutputScalar muZ_1_nm2 = pow(muZ_1,n-2);
1128
1129 Kokkos::Array<OutputScalar,3> VLEFTTRI;
1130 V_LEFT_TRI(VLEFTTRI, phi_i, phi_i_grad, phi_j, phi_j_grad, muZ_0, muZ_0_grad);
1131
1132 OutputScalar dot_product;
1133 dot(dot_product, muZ_1_grad, VLEFTTRI);
1134 output_(fieldOrdinalOffset,pointOrdinal) = (n-1) * muZ_1_nm2 * dot_product;
1135
1136 fieldOrdinalOffset++;
1137 }
1138 }
1139 }
1140
1141 // FAMILY VI (non-trivial divergences)
1142 for (int i=2; i<=polyOrder_; i++)
1143 {
1144 const auto & phi_i = Li_muX01(i);
1145 Kokkos::Array<OutputScalar,3> phi_i_grad;
1146 computeGradHomLi(phi_i_grad, i, Pi_muX01, Li_dt_muX01, muX_0_grad, muX_1_grad);
1147
1148 Kokkos::Array<OutputScalar,3> VRIGHTTRI;
1149 V_RIGHT_TRI(VRIGHTTRI, muY_1, muY_1_grad, phi_i, phi_i_grad, muZ_0, muZ_0_grad);
1150
1151 OutputScalar dot_product;
1152 dot(dot_product, muZ_1_grad, VRIGHTTRI);
1153
1154 const OutputScalar muZ_1_im2 = pow(muZ_1,i-2);
1155 output_(fieldOrdinalOffset,pointOrdinal) = (i-1) * muZ_1_im2 * dot_product;
1156
1157 fieldOrdinalOffset++;
1158 }
1159
1160 // FAMILY VII (non-trivial divergences)
1161 for (int j=2; j<=polyOrder_; j++)
1162 {
1163 const auto & phi_j = Li_muY01(j);
1164 Kokkos::Array<OutputScalar,3> phi_j_grad;
1165 computeGradHomLi(phi_j_grad, j, Pi_muY01, Li_dt_muY01, muY_0_grad, muY_1_grad);
1166
1167 Kokkos::Array<OutputScalar,3> VRIGHTTRI;
1168 V_RIGHT_TRI(VRIGHTTRI, muX_1, muX_1_grad, phi_j, phi_j_grad, muZ_0, muZ_0_grad);
1169
1170 OutputScalar dot_product;
1171 dot(dot_product, muZ_1_grad, VRIGHTTRI);
1172
1173 const OutputScalar muZ_1_jm2 = pow(muZ_1,j-2);
1174 output_(fieldOrdinalOffset,pointOrdinal) = (j-1) * muZ_1_jm2 * dot_product;
1175
1176 fieldOrdinalOffset++;
1177 }
1178
1179 } // end interior function block
1180
1181 } // end OPERATOR_DIV block
1182 break;
1183 case OPERATOR_GRAD:
1184 case OPERATOR_D1:
1185 case OPERATOR_D2:
1186 case OPERATOR_D3:
1187 case OPERATOR_D4:
1188 case OPERATOR_D5:
1189 case OPERATOR_D6:
1190 case OPERATOR_D7:
1191 case OPERATOR_D8:
1192 case OPERATOR_D9:
1193 case OPERATOR_D10:
1194 INTREPID2_TEST_FOR_ABORT( true,
1195 ">>> ERROR: (Intrepid2::Hierarchical_HDIV_PYR_Functor) Computing of second and higher-order derivatives is not currently supported");
1196 default:
1197 // unsupported operator type
1198 device_assert(false);
1199 }
1200 }
1201
1202 // Provide the shared memory capacity.
1203 // This function takes the team_size as an argument,
1204 // which allows team_size-dependent allocations.
1205 size_t team_shmem_size (int team_size) const
1206 {
1207 // we use shared memory to create a fast buffer for basis computations
1208 size_t shmem_size = 0;
1209 if (fad_size_output_ > 0)
1210 {
1211 // 1D scratch views (we have up to 9 of them):
1212 shmem_size += 9 * OutputScratchView::shmem_size(polyOrder_ + 1, fad_size_output_);
1213 }
1214 else
1215 {
1216 // 1D scratch views (we have up to 9 of them):
1217 shmem_size += 9 * OutputScratchView::shmem_size(polyOrder_ + 1);
1218 }
1219
1220 return shmem_size;
1221 }
1222 };
1223
1235 template<typename DeviceType,
1236 typename OutputScalar = double,
1237 typename PointScalar = double,
1238 bool useCGBasis = true> // if useCGBasis is true, basis functions are associated with either faces or the interior. If false, basis functions are all associated with interior
1240 : public Basis<DeviceType,OutputScalar,PointScalar>
1241 {
1242 public:
1243 using BasisBase = Basis<DeviceType,OutputScalar,PointScalar>;
1244
1245 using typename BasisBase::OrdinalTypeArray1DHost;
1246 using typename BasisBase::OrdinalTypeArray2DHost;
1247
1248 using typename BasisBase::OutputViewType;
1249 using typename BasisBase::PointViewType;
1250 using typename BasisBase::ScalarViewType;
1251
1252 using typename BasisBase::ExecutionSpace;
1253
1254 protected:
1255 int polyOrder_; // the maximum order of the polynomial
1256 EPointType pointType_;
1257 public:
1268 HierarchicalBasis_HDIV_PYR(int polyOrder, const EPointType pointType=POINTTYPE_DEFAULT)
1269 :
1270 polyOrder_(polyOrder),
1271 pointType_(pointType)
1272 {
1273 INTREPID2_TEST_FOR_EXCEPTION(pointType!=POINTTYPE_DEFAULT,std::invalid_argument,"PointType not supported");
1274 const auto & p = polyOrder;
1275 this->basisCardinality_ = p * p + 2 * p * (p+1) + 3 * p * p * (p-1);
1276 this->basisDegree_ = p;
1277 this->basisCellTopologyKey_ = shards::Pyramid<>::key;
1278 this->basisType_ = BASIS_FEM_HIERARCHICAL;
1279 this->basisCoordinates_ = COORDINATES_CARTESIAN;
1280 this->functionSpace_ = FUNCTION_SPACE_HDIV;
1281
1282 const int degreeLength = 1;
1283 this->fieldOrdinalPolynomialDegree_ = OrdinalTypeArray2DHost("Integrated Legendre H(div) pyramid polynomial degree lookup", this->basisCardinality_, degreeLength);
1284 this->fieldOrdinalH1PolynomialDegree_ = OrdinalTypeArray2DHost("Integrated Legendre H(div) pyramid polynomial H^1 degree lookup", this->basisCardinality_, degreeLength);
1285
1286 int fieldOrdinalOffset = 0;
1287
1288 // **** face functions **** //
1289 // one quad face
1290 const int numFunctionsPerQuadFace = p*p;
1291
1292 // following the ESEAS ordering: j increments first
1293 for (int j=0; j<p; j++)
1294 {
1295 for (int i=0; i<p; i++)
1296 {
1297 this->fieldOrdinalPolynomialDegree_ (fieldOrdinalOffset,0) = std::max(i,j);
1298 this->fieldOrdinalH1PolynomialDegree_(fieldOrdinalOffset,0) = std::max(i,j)+1;
1299 fieldOrdinalOffset++;
1300 }
1301 }
1302
1303 const int numFunctionsPerTriFace = 2 * p * (p+1) / 4;
1304 const int numTriFaces = 4;
1305 for (int faceOrdinal=0; faceOrdinal<numTriFaces; faceOrdinal++)
1306 {
1307 for (int totalPolyOrder=0; totalPolyOrder<polyOrder_; totalPolyOrder++)
1308 {
1309 const int totalFaceDofs = (totalPolyOrder+1) * (totalPolyOrder+2) / 2; // number of dofs for which i+j <= totalPolyOrder
1310 const int totalFaceDofsPrevious = totalPolyOrder * (totalPolyOrder+1) / 2; // number of dofs for which i+j <= totalPolyOrder-1
1311 const int faceDofsForPolyOrder = totalFaceDofs - totalFaceDofsPrevious; // number of dofs for which i+j == totalPolyOrder
1312 for (int i=0; i<faceDofsForPolyOrder; i++)
1313 {
1314 this->fieldOrdinalPolynomialDegree_ (fieldOrdinalOffset,0) = totalPolyOrder;
1315 this->fieldOrdinalH1PolynomialDegree_(fieldOrdinalOffset,0) = totalPolyOrder+1;
1316 fieldOrdinalOffset++;
1317 }
1318 }
1319 }
1320
1321 // **** interior functions **** //
1322 const int numFunctionsPerVolume = 3 * p * p * (p-1);
1323
1324 // FAMILY I
1325 // following the ESEAS ordering: k increments first
1326 for (int k=2; k<=polyOrder_; k++)
1327 {
1328 for (int j=2; j<=polyOrder_; j++)
1329 {
1330 for (int i=0; i<polyOrder_; i++)
1331 {
1332 const int max_jk = std::max(j,k);
1333 const int max_ijk = std::max(max_jk,i);
1334 const int max_ip1jk = std::max(max_jk,i+1);
1335 this->fieldOrdinalPolynomialDegree_ (fieldOrdinalOffset,0) = max_ijk;
1336 this->fieldOrdinalH1PolynomialDegree_(fieldOrdinalOffset,0) = max_ip1jk;
1337 fieldOrdinalOffset++;
1338 }
1339 }
1340 }
1341
1342 // FAMILY II
1343 for (int k=2; k<=polyOrder_; k++)
1344 {
1345 // ESEAS reverses i,j loop ordering for family II, relative to family I.
1346 // We follow ESEAS for convenience of cross-code comparison.
1347 for (int i=0; i<polyOrder_; i++)
1348 {
1349 for (int j=2; j<=polyOrder_; j++)
1350 {
1351 const int max_jk = std::max(j,k);
1352 const int max_ijk = std::max(max_jk,i);
1353 const int max_ip1jk = std::max(max_jk,i+1);
1354 this->fieldOrdinalPolynomialDegree_ (fieldOrdinalOffset,0) = max_ijk;
1355 this->fieldOrdinalH1PolynomialDegree_(fieldOrdinalOffset,0) = max_ip1jk;
1356 fieldOrdinalOffset++;
1357 }
1358 }
1359 }
1360
1361 // FAMILY III
1362 for (int j=2; j<=polyOrder_; j++)
1363 {
1364 for (int i=2; i<=polyOrder_; i++)
1365 {
1366 const int max_ij = std::max(i,j);
1367 this->fieldOrdinalPolynomialDegree_ (fieldOrdinalOffset,0) = max_ij;
1368 this->fieldOrdinalH1PolynomialDegree_(fieldOrdinalOffset,0) = max_ij;
1369 fieldOrdinalOffset++;
1370 }
1371 }
1372
1373 // FAMILY IV
1374 for (int k=2; k<=polyOrder_; k++)
1375 {
1376 for (int j=0; j<polyOrder_; j++)
1377 {
1378 for (int i=0; i<polyOrder_; i++)
1379 {
1380 const int max_jk = std::max(j,k);
1381 const int max_ijk = std::max(max_jk,i);
1382 const int max_ip1jp1k = std::max(std::max(j+1,k),i+1);
1383 this->fieldOrdinalPolynomialDegree_ (fieldOrdinalOffset,0) = max_ijk;
1384 this->fieldOrdinalH1PolynomialDegree_(fieldOrdinalOffset,0) = max_ip1jp1k;
1385 fieldOrdinalOffset++;
1386 }
1387 }
1388 }
1389
1390 // FAMILY V
1391 for (int j=2; j<=polyOrder_; j++)
1392 {
1393 for (int i=2; i<=polyOrder_; i++)
1394 {
1395 const int max_ij = std::max(i,j);
1396 this->fieldOrdinalPolynomialDegree_ (fieldOrdinalOffset,0) = max_ij;
1397 this->fieldOrdinalH1PolynomialDegree_(fieldOrdinalOffset,0) = max_ij;
1398 fieldOrdinalOffset++;
1399 }
1400 }
1401
1402 // FAMILY VI
1403 for (int i=2; i<=polyOrder_; i++)
1404 {
1405 this->fieldOrdinalPolynomialDegree_ (fieldOrdinalOffset,0) = i;
1406 this->fieldOrdinalH1PolynomialDegree_(fieldOrdinalOffset,0) = i;
1407 fieldOrdinalOffset++;
1408 }
1409
1410 // FAMILY VII
1411 for (int j=2; j<=polyOrder_; j++)
1412 {
1413 this->fieldOrdinalPolynomialDegree_ (fieldOrdinalOffset,0) = j;
1414 this->fieldOrdinalH1PolynomialDegree_(fieldOrdinalOffset,0) = j;
1415 fieldOrdinalOffset++;
1416 }
1417
1418 if (fieldOrdinalOffset != this->basisCardinality_)
1419 {
1420 std::cout << "Internal error: basis enumeration is incorrect; fieldOrdinalOffset = " << fieldOrdinalOffset;
1421 std::cout << "; basisCardinality_ = " << this->basisCardinality_ << std::endl;
1422
1423 INTREPID2_TEST_FOR_EXCEPTION(fieldOrdinalOffset != this->basisCardinality_, std::invalid_argument, "Internal error: basis enumeration is incorrect");
1424 }
1425
1426 // initialize tags
1427 {
1428 // Intrepid2 vertices:
1429 // 0: (-1,-1, 0)
1430 // 1: ( 1,-1, 0)
1431 // 2: ( 1, 1, 0)
1432 // 3: (-1, 1, 0)
1433 // 4: ( 0, 0, 1)
1434
1435 // ESEAS vertices:
1436 // 0: ( 0, 0, 0)
1437 // 1: ( 1, 0, 0)
1438 // 2: ( 1, 1, 0)
1439 // 3: ( 0, 1, 0)
1440 // 4: ( 0, 0, 1)
1441
1442 // The edge numbering appears to match between ESEAS and Intrepid2
1443
1444 // ESEAS numbers pyramid faces differently from Intrepid2
1445 // See BlendProjectPyraTF in ESEAS.
1446 // See PyramidFaceNodeMap in Shards_BasicTopologies
1447 // ESEAS: 0123, 014, 124, 324, 034
1448 // Intrepid2: 014, 124, 234, 304, 0321
1449
1450 const int intrepid2FaceOrdinals[5] {4,0,1,2,3}; // index is the ESEAS face ordinal; value is the intrepid2 ordinal
1451
1452 const auto & cardinality = this->basisCardinality_;
1453
1454 // Basis-dependent initializations
1455 const ordinal_type tagSize = 4; // size of DoF tag, i.e., number of fields in the tag
1456 const ordinal_type posScDim = 0; // position in the tag, counting from 0, of the subcell dim
1457 const ordinal_type posScOrd = 1; // position in the tag, counting from 0, of the subcell ordinal
1458 const ordinal_type posDfOrd = 2; // position in the tag, counting from 0, of DoF ordinal relative to the subcell
1459
1460 OrdinalTypeArray1DHost tagView("tag view", cardinality*tagSize);
1461 const int faceDim = 2, volumeDim = 3;
1462
1463 if (useCGBasis) {
1464 {
1465 int tagNumber = 0;
1466 {
1467 // quad face
1468 const int faceOrdinalESEAS = 0;
1469 const int faceOrdinalIntrepid2 = intrepid2FaceOrdinals[faceOrdinalESEAS];
1470
1471 for (int functionOrdinal=0; functionOrdinal<numFunctionsPerQuadFace; functionOrdinal++)
1472 {
1473 tagView(tagNumber*tagSize+0) = faceDim; // face dimension
1474 tagView(tagNumber*tagSize+1) = faceOrdinalIntrepid2; // face id
1475 tagView(tagNumber*tagSize+2) = functionOrdinal; // local dof id
1476 tagView(tagNumber*tagSize+3) = numFunctionsPerQuadFace; // total number of dofs on this face
1477 tagNumber++;
1478 }
1479 }
1480 for (int triFaceOrdinalESEAS=0; triFaceOrdinalESEAS<numTriFaces; triFaceOrdinalESEAS++)
1481 {
1482 const int faceOrdinalESEAS = triFaceOrdinalESEAS + 1;
1483 const int faceOrdinalIntrepid2 = intrepid2FaceOrdinals[faceOrdinalESEAS];
1484 for (int functionOrdinal=0; functionOrdinal<numFunctionsPerTriFace; functionOrdinal++)
1485 {
1486 tagView(tagNumber*tagSize+0) = faceDim; // face dimension
1487 tagView(tagNumber*tagSize+1) = faceOrdinalIntrepid2; // face id
1488 tagView(tagNumber*tagSize+2) = functionOrdinal; // local dof id
1489 tagView(tagNumber*tagSize+3) = numFunctionsPerTriFace; // total number of dofs on this face
1490 tagNumber++;
1491 }
1492 }
1493 for (int functionOrdinal=0; functionOrdinal<numFunctionsPerVolume; functionOrdinal++)
1494 {
1495 tagView(tagNumber*tagSize+0) = volumeDim; // volume dimension
1496 tagView(tagNumber*tagSize+1) = 0; // volume id
1497 tagView(tagNumber*tagSize+2) = functionOrdinal; // local dof id
1498 tagView(tagNumber*tagSize+3) = numFunctionsPerVolume; // total number of dofs in this volume
1499 tagNumber++;
1500 }
1501 INTREPID2_TEST_FOR_EXCEPTION(tagNumber != this->basisCardinality_, std::invalid_argument, "Internal error: basis tag enumeration is incorrect");
1502 }
1503 } else {
1504 for (ordinal_type i=0;i<cardinality;++i) {
1505 tagView(i*tagSize+0) = volumeDim; // volume dimension
1506 tagView(i*tagSize+1) = 0; // volume ordinal
1507 tagView(i*tagSize+2) = i; // local dof id
1508 tagView(i*tagSize+3) = cardinality; // total number of dofs on this face
1509 }
1510 }
1511
1512 // Basis-independent function sets tag and enum data in tagToOrdinal_ and ordinalToTag_ arrays:
1513 // tags are constructed on host
1514 this->setOrdinalTagData(this->tagToOrdinal_,
1515 this->ordinalToTag_,
1516 tagView,
1517 this->basisCardinality_,
1518 tagSize,
1519 posScDim,
1520 posScOrd,
1521 posDfOrd);
1522 }
1523 }
1524
1529 const char* getName() const override {
1530 return "Intrepid2_HierarchicalBasis_HDIV_PYR";
1531 }
1532
1535 virtual bool requireOrientation() const override {
1536 return (this->getDegree() > 2);
1537 }
1538
1539 // since the getValues() below only overrides the FEM variant, we specify that
1540 // we use the base class's getValues(), which implements the FVD variant by throwing an exception.
1541 // (It's an error to use the FVD variant on this basis.)
1543
1562 virtual void getValues( OutputViewType outputValues, const PointViewType inputPoints,
1563 const EOperator operatorType = OPERATOR_VALUE ) const override
1564 {
1565 auto numPoints = inputPoints.extent_int(0);
1566
1568
1569 FunctorType functor(operatorType, outputValues, inputPoints, polyOrder_);
1570
1571 const int outputVectorSize = getVectorSizeForHierarchicalParallelism<OutputScalar>();
1572 const int pointVectorSize = getVectorSizeForHierarchicalParallelism<PointScalar>();
1573 const int vectorSize = std::max(outputVectorSize,pointVectorSize);
1574 const int teamSize = 1; // because of the way the basis functions are computed, we don't have a second level of parallelism...
1575
1576 auto policy = Kokkos::TeamPolicy<ExecutionSpace>(numPoints,teamSize,vectorSize);
1577 Kokkos::parallel_for("Hierarchical_HDIV_PYR_Functor", policy, functor);
1578 }
1579
1589 getSubCellRefBasis(const ordinal_type subCellDim, const ordinal_type subCellOrd) const override{
1590 const auto & p = this->basisDegree_;
1591 if (subCellDim == 2)
1592 {
1593 if (subCellOrd == 4) // quad basis
1594 {
1596 using HVOL_QUAD = Basis_Derived_HVOL_QUAD<HVOL_LINE>;
1597 return Teuchos::rcp(new HVOL_QUAD(p-1));
1598 }
1599 else // tri basis
1600 {
1602 return Teuchos::rcp(new HVOL_Tri(p-1));
1603 }
1604 }
1605 INTREPID2_TEST_FOR_EXCEPTION(true,std::invalid_argument,"Input parameters out of bounds");
1606 }
1607
1613 getHostBasis() const override {
1614 using HostDeviceType = typename Kokkos::HostSpace::device_type;
1616 return Teuchos::rcp( new HostBasisType(polyOrder_, pointType_) );
1617 }
1618 };
1619} // end namespace Intrepid2
1620
1621// do ETI with default (double) type
1623
1624#endif /* Intrepid2_HierarchicalBasis_HDIV_PYR_h */
Header file for the abstract base class Intrepid2::Basis.
Teuchos::RCP< Basis< DeviceType, OutputType, PointType > > BasisPtr
Basis Pointer.
Implementation of H(vol) basis on the quadrilateral that is templated on H(vol) on the line.
KOKKOS_INLINE_FUNCTION void device_assert(bool val)
H(vol) basis on the line based on Legendre polynomials.
H(vol) basis on the triangle based on integrated Legendre polynomials.
Free functions, callable from device code, that implement various polynomials useful in basis definit...
Defines several coordinates and their gradients on the pyramid; maps from Intrepid2 (shards) pyramid ...
KOKKOS_INLINE_FUNCTION void affinePyramid(Kokkos::Array< PointScalar, 5 > &lambda, Kokkos::Array< Kokkos::Array< PointScalar, 3 >, 5 > &lambdaGrad, Kokkos::Array< Kokkos::Array< PointScalar, 3 >, 2 > &mu, Kokkos::Array< Kokkos::Array< Kokkos::Array< PointScalar, 3 >, 3 >, 2 > &muGrad, Kokkos::Array< Kokkos::Array< PointScalar, 2 >, 3 > &nu, Kokkos::Array< Kokkos::Array< Kokkos::Array< PointScalar, 3 >, 2 >, 3 > &nuGrad, Kokkos::Array< PointScalar, 3 > &coords)
Compute various affine-like coordinates on the pyramid. See Fuentes et al, Appendix E....
Header function for Intrepid2::Util class and other utility functions.
constexpr int getVectorSizeForHierarchicalParallelism()
Returns a vector size to be used for the provided Scalar type in the context of hierarchically-parall...
KOKKOS_INLINE_FUNCTION constexpr unsigned getScalarDimensionForView(const ViewType &view)
Returns the size of the Scalar dimension for the View. This is 0 for non-AD types....
Implementation of H(vol) basis on the quadrilateral that is templated on H(vol) on the line.
Kokkos::DynRankView< PointValueType, Kokkos::LayoutStride, DeviceType > PointViewType
virtual KOKKOS_INLINE_FUNCTION void getValues(OutputViewType, const PointViewType, const EOperator, const typename Kokkos::TeamPolicy< ExecutionSpace >::member_type &teamMember, const typename ExecutionSpace::scratch_memory_space &scratchStorage, const ordinal_type subcellDim=-1, const ordinal_type subcellOrdinal=-1) const
Kokkos::DynRankView< OutputValueType, Kokkos::LayoutStride, DeviceType > OutputViewType
void setOrdinalTagData(OrdinalTypeView3D &tagToOrdinal, OrdinalTypeView2D &ordinalToTag, const OrdinalTypeView1D tags, const ordinal_type basisCard, const ordinal_type tagSize, const ordinal_type posScDim, const ordinal_type posScOrd, const ordinal_type posDfOrd)
Kokkos::DynRankView< scalarType, Kokkos::LayoutStride, DeviceType > ScalarViewType
Kokkos::View< ordinal_type **, typename ExecutionSpace::array_layout, Kokkos::HostSpace > OrdinalTypeArray2DHost
Kokkos::View< ordinal_type *, typename ExecutionSpace::array_layout, Kokkos::HostSpace > OrdinalTypeArray1DHost
Basis defining integrated Legendre basis on the line, a polynomial subspace of H(grad) on the line.
virtual BasisPtr< typename Kokkos::HostSpace::device_type, OutputScalar, PointScalar > getHostBasis() const override
Creates and returns a Basis object whose DeviceType template argument is Kokkos::HostSpace::device_ty...
const char * getName() const override
Returns basis name.
virtual void getValues(OutputViewType outputValues, const PointViewType inputPoints, const EOperator operatorType=OPERATOR_VALUE) const override
Evaluation of a FEM basis on a reference cell.
HierarchicalBasis_HDIV_PYR(int polyOrder, const EPointType pointType=POINTTYPE_DEFAULT)
Constructor.
BasisPtr< DeviceType, OutputScalar, PointScalar > getSubCellRefBasis(const ordinal_type subCellDim, const ordinal_type subCellOrd) const override
returns the basis associated to a subCell.
virtual bool requireOrientation() const override
True if orientation is required.
Basis defining Legendre basis on the line, a polynomial subspace of L^2 (a.k.a. H(vol)) on the line.
Basis defining Legendre basis on the line, a polynomial subspace of H(vol) on the line: extension to ...
Functor for computing values for the HierarchicalBasis_HDIV_PYR class.
KOKKOS_INLINE_FUNCTION void V_TRI_B42(Kokkos::Array< OutputScalar, 3 > &VTRI_mus0_mus1_s2_over_mu, const Kokkos::Array< OutputScalar, 3 > &VTRI_00_s0_s1_s2, const Kokkos::Array< OutputScalar, 3 > &EE_0_s0_s1, const OutputScalar &s2, const OutputScalar &mu, const Kokkos::Array< OutputScalar, 3 > &mu_grad, const ordinal_type &i, const ordinal_type &j, const OutputScratchView &P_mus0_mus1, const OutputScratchView &P_2ip1_mus0pmus1_s2) const
See Equation (B.42) in Fuentes et al. Computes 1/mu V^{tri}_ij(mu s0, mu s1, s2).
KOKKOS_INLINE_FUNCTION void V_QUAD(Kokkos::Array< OutputScalar, 3 > &VQUAD, const ordinal_type &i, const ordinal_type &j, const OutputScratchView &PHom_s, const PointScalar &s0, const PointScalar &s1, const Kokkos::Array< PointScalar, 3 > &s0_grad, const Kokkos::Array< PointScalar, 3 > &s1_grad, const OutputScratchView &PHom_t, const PointScalar &t0, const PointScalar &t1, const Kokkos::Array< PointScalar, 3 > &t0_grad, const Kokkos::Array< PointScalar, 3 > &t1_grad) const
KOKKOS_INLINE_FUNCTION void dot(OutputScalar &c, const Kokkos::Array< OutputScalar, 3 > &a, const Kokkos::Array< OutputScalar, 3 > &b) const
dot product: c = a \cdot b
KOKKOS_INLINE_FUNCTION void V_RIGHT_TRI(Kokkos::Array< OutputScalar, 3 > &VRIGHTTRI, const OutputScalar &mu1, const Kokkos::Array< OutputScalar, 3 > &mu1_grad, const OutputScalar &phi_i, const Kokkos::Array< OutputScalar, 3 > &phi_i_grad, const OutputScalar &t0, const Kokkos::Array< OutputScalar, 3 > &t0_grad) const
See Fuentes et al. (p. 455), definition of V_{ij}^{\trianglerighteq}.
KOKKOS_INLINE_FUNCTION void cross(Kokkos::Array< OutputScalar, 3 > &c, const Kokkos::Array< OutputScalar, 3 > &a, const Kokkos::Array< OutputScalar, 3 > &b) const
cross product: c = a x b
KOKKOS_INLINE_FUNCTION void V_TRI_B42_DIV(OutputScalar &div_VTRI_mus0_mus1_s2_over_mu, const Kokkos::Array< OutputScalar, 3 > &VTRI_00_s0_s1_s2, const Kokkos::Array< OutputScalar, 3 > &EE_0_s0_s1, const OutputScalar &s2, const Kokkos::Array< OutputScalar, 3 > &s2_grad, const OutputScalar &mu, const Kokkos::Array< OutputScalar, 3 > &mu_grad, const ordinal_type &i, const ordinal_type &j, const OutputScratchView &P_mus0_mus1, const OutputScratchView &P_2ip1_mus0pmus1_s2) const
See Equation (B.42) in Fuentes et al. Computes 1/mu V^{tri}_ij(mu s0, mu s1, s2).
KOKKOS_INLINE_FUNCTION void V_LEFT_TRI(Kokkos::Array< OutputScalar, 3 > &VLEFTTRI, const OutputScalar &phi_i, const Kokkos::Array< OutputScalar, 3 > &phi_i_grad, const OutputScalar &phi_j, const Kokkos::Array< OutputScalar, 3 > &phi_j_grad, const OutputScalar &t0, const Kokkos::Array< OutputScalar, 3 > &t0_grad) const
See Fuentes et al. (p. 455), definition of V_{ij}^{\trianglelefteq}.
KOKKOS_INLINE_FUNCTION void E_QUAD(Kokkos::Array< OutputScalar, 3 > &EQUAD, const ordinal_type &i, const ordinal_type &j, const OutputScratchView &HomPi_s01, const PointScalar &s0, const PointScalar &s1, const Kokkos::Array< PointScalar, 3 > &s0_grad, const Kokkos::Array< PointScalar, 3 > &s1_grad, const OutputScratchView &HomLi_t01) const