1003 std::ostringstream os;
1004 os << *prefix <<
"constantNumPackets=" << constantNumPackets << endl;
1005 std::cerr << os.str ();
1013 if (constantNumPackets == 0) {
1015 std::ostringstream os;
1016 os << *prefix <<
"3. (Re)allocate num{Ex,Im}portPacketsPerLID"
1018 std::cerr << os.str ();
1022 this->reallocArraysForNumPacketsPerLid (exportLIDs.extent (0),
1023 remoteLIDs.extent (0));
1027 std::ostringstream os;
1028 os << *prefix <<
"4. packAndPrepare: before, "
1031 std::cerr << os.str ();
1034 doPackAndPrepare(src, exportLIDs, constantNumPackets,
execution_space());
1036 this->exports_.sync_host();
1039 this->exports_.sync_device();
1043 std::ostringstream os;
1044 os << *prefix <<
"5.1. After packAndPrepare, "
1047 std::cerr << os.str ();
1053 if (constantNumPackets != 0) {
1058 const size_t rbufLen = remoteLIDs.extent (0) * constantNumPackets;
1059 reallocImportsIfNeeded (rbufLen, verbose, prefix.get (), canTryAliasing, CM);
1063 bool needCommunication =
true;
1066 const this_type* srcDistObj =
dynamic_cast<const this_type*
> (&src);
1068 if (revOp == DoReverse && ! this->isDistributed ()) {
1069 needCommunication =
false;
1078 else if (revOp == DoForward && srcDistObj != NULL &&
1079 ! srcDistObj->isDistributed ()) {
1080 needCommunication =
false;
1083 if (! needCommunication) {
1085 std::ostringstream os;
1086 os << *prefix <<
"Comm not needed; skipping" << endl;
1087 std::cerr << os.str ();
1091 ProfilingRegion region_dpw
1092 (
"Tpetra::DistObject::doTransferNew::doPostsAndWaits");
1093#ifdef HAVE_TPETRA_TRANSFER_TIMERS
1096 Teuchos::TimeMonitor doPostsAndWaitsMon (*doPostsAndWaitsTimer_);
1100 std::ostringstream os;
1101 os << *prefix <<
"7.0. "
1102 << (revOp == DoReverse ?
"Reverse" :
"Forward")
1104 std::cerr << os.str ();
1107 doPosts(distributorPlan, constantNumPackets, commOnHost, prefix, canTryAliasing, CM);
1112 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1116 const ::Tpetra::Details::Transfer<local_ordinal_type, global_ordinal_type, node_type>& transfer,
1117 const char modeString[],
1118 const ReverseOption revOp,
1120 bool restrictedMode)
1126 using Kokkos::Compat::getArrayView;
1127 using Kokkos::Compat::getConstArrayView;
1128 using Kokkos::Compat::getKokkosViewDeepCopy;
1129 using Kokkos::Compat::create_const_view;
1134 const bool commOnHost = ! Behavior::assumeMpiIsGPUAware ();
1135 const char funcNameHost[] =
"Tpetra::DistObject::endTransfer[Host]";
1136 const char funcNameDevice[] =
"Tpetra::DistObject::endTransfer[Device]";
1137 const char *funcName = commOnHost ? funcNameHost : funcNameDevice;
1138 ProfilingRegion region_doTransfer(funcName);
1139 const bool verbose = Behavior::verbose(
"DistObject");
1140 std::shared_ptr<std::string> prefix;
1142 std::ostringstream os;
1143 prefix = this->
createPrefix(
"DistObject",
"doTransfer");
1144 os << *prefix <<
"Source type: " << Teuchos::typeName(src)
1145 <<
", Target type: " << Teuchos::typeName(*
this) << endl;
1146 std::cerr << os.str();
1159 const bool debug = Behavior::debug(
"DistObject");
1161 if (! restrictedMode && revOp == DoForward) {
1162 const bool myMapSameAsTransferTgtMap =
1163 this->getMap ()->isSameAs (* (transfer.getTargetMap ()));
1164 TEUCHOS_TEST_FOR_EXCEPTION
1165 (! myMapSameAsTransferTgtMap, std::invalid_argument,
1166 "Tpetra::DistObject::" << modeString <<
": For forward-mode "
1167 "communication, the target DistObject's Map must be the same "
1168 "(in the sense of Tpetra::Map::isSameAs) as the input "
1169 "Export/Import object's target Map.");
1171 else if (! restrictedMode && revOp == DoReverse) {
1172 const bool myMapSameAsTransferSrcMap =
1173 this->getMap ()->isSameAs (* (transfer.getSourceMap ()));
1174 TEUCHOS_TEST_FOR_EXCEPTION
1175 (! myMapSameAsTransferSrcMap, std::invalid_argument,
1176 "Tpetra::DistObject::" << modeString <<
": For reverse-mode "
1177 "communication, the target DistObject's Map must be the same "
1178 "(in the sense of Tpetra::Map::isSameAs) as the input "
1179 "Export/Import object's source Map.");
1181 else if (restrictedMode && revOp == DoForward) {
1182 const bool myMapLocallyFittedTransferTgtMap =
1183 this->getMap ()->isLocallyFitted (* (transfer.getTargetMap ()));
1184 TEUCHOS_TEST_FOR_EXCEPTION
1185 (! myMapLocallyFittedTransferTgtMap , std::invalid_argument,
1186 "Tpetra::DistObject::" << modeString <<
": For forward-mode "
1187 "communication using restricted mode, Export/Import object's "
1188 "target Map must be locally fitted (in the sense of "
1189 "Tpetra::Map::isLocallyFitted) to target DistObject's Map.");
1192 const bool myMapLocallyFittedTransferSrcMap =
1193 this->getMap ()->isLocallyFitted (* (transfer.getSourceMap ()));
1194 TEUCHOS_TEST_FOR_EXCEPTION
1195 (! myMapLocallyFittedTransferSrcMap, std::invalid_argument,
1196 "Tpetra::DistObject::" << modeString <<
": For reverse-mode "
1197 "communication using restricted mode, Export/Import object's "
1198 "source Map must be locally fitted (in the sense of "
1199 "Tpetra::Map::isLocallyFitted) to target DistObject's Map.");
1205 const this_type* srcDistObj =
dynamic_cast<const this_type*
> (&src);
1206 if (srcDistObj !=
nullptr) {
1207 if (revOp == DoForward) {
1208 const bool srcMapSameAsImportSrcMap =
1209 srcDistObj->getMap ()->isSameAs (* (transfer.getSourceMap ()));
1210 TEUCHOS_TEST_FOR_EXCEPTION
1211 (! srcMapSameAsImportSrcMap, std::invalid_argument,
1212 "Tpetra::DistObject::" << modeString <<
": For forward-mode "
1213 "communication, the source DistObject's Map must be the same "
1214 "as the input Export/Import object's source Map.");
1217 const bool srcMapSameAsImportTgtMap =
1218 srcDistObj->getMap ()->isSameAs (* (transfer.getTargetMap ()));
1219 TEUCHOS_TEST_FOR_EXCEPTION
1220 (! srcMapSameAsImportTgtMap, std::invalid_argument,
1221 "Tpetra::DistObject::" << modeString <<
": For reverse-mode "
1222 "communication, the source DistObject's Map must be the same "
1223 "as the input Export/Import object's target Map.");
1229 const Details::DistributorPlan& distributorPlan = (revOp == DoForward) ? distor.getPlan() : *distor.getPlan().getReversePlan();
1231 TEUCHOS_TEST_FOR_EXCEPTION
1232 (debug && restrictedMode &&
1233 (transfer.getPermuteToLIDs_dv().extent(0) != 0 ||
1234 transfer.getPermuteFromLIDs_dv().extent(0) != 0),
1235 std::invalid_argument,
1236 "Tpetra::DistObject::" << modeString <<
": Transfer object "
1237 "cannot have permutes in restricted mode.");
1241 std::ostringstream os;
1242 os << *prefix <<
"doTransfer: Use new interface; "
1243 "commOnHost=" << (commOnHost ?
"true" :
"false") << endl;
1244 std::cerr << os.str ();
1247 using const_lo_dv_type =
1248 Kokkos::DualView<const local_ordinal_type*, buffer_device_type>;
1249 const_lo_dv_type permuteToLIDs = (revOp == DoForward) ?
1250 transfer.getPermuteToLIDs_dv () :
1251 transfer.getPermuteFromLIDs_dv ();
1252 const_lo_dv_type permuteFromLIDs = (revOp == DoForward) ?
1253 transfer.getPermuteFromLIDs_dv () :
1254 transfer.getPermuteToLIDs_dv ();
1255 const_lo_dv_type remoteLIDs = (revOp == DoForward) ?
1256 transfer.getRemoteLIDs_dv () :
1257 transfer.getExportLIDs_dv ();
1258 const_lo_dv_type exportLIDs = (revOp == DoForward) ?
1259 transfer.getExportLIDs_dv () :
1260 transfer.getRemoteLIDs_dv ();
1261 const bool canTryAliasing = (revOp == DoForward) ?
1262 transfer.areRemoteLIDsContiguous() :
1263 transfer.areExportLIDsContiguous();
1265 size_t constantNumPackets = this->constantNumberOfPackets ();
1269 if (constantNumPackets != 0) {
1274 const size_t rbufLen = remoteLIDs.extent (0) * constantNumPackets;
1275 reallocImportsIfNeeded (rbufLen, verbose, prefix.get (), canTryAliasing, CM);
1279 bool needCommunication =
true;
1282 const this_type* srcDistObj =
dynamic_cast<const this_type*
> (&src);
1284 if (revOp == DoReverse && ! this->isDistributed ()) {
1285 needCommunication =
false;
1294 else if (revOp == DoForward && srcDistObj != NULL &&
1295 ! srcDistObj->isDistributed ()) {
1296 needCommunication =
false;
1299 if (! needCommunication) {
1301 std::ostringstream os;
1302 os << *prefix <<
"Comm not needed; skipping" << endl;
1303 std::cerr << os.str ();
1307 distributorActor_.doWaits(distributorPlan);
1310 std::ostringstream os;
1311 os << *prefix <<
"8. unpackAndCombine - remoteLIDs " << remoteLIDs.extent(0) <<
", constantNumPackets " << constantNumPackets << endl;
1312 std::cerr << os.str ();
1314 doUnpackAndCombine(remoteLIDs, constantNumPackets, CM,
execution_space());
1319 std::ostringstream os;
1320 os << *prefix <<
"9. Done!" << endl;
1321 std::cerr << os.str ();
1325 std::ostringstream os;
1326 os << *prefix <<
"Tpetra::DistObject::doTransfer: Done!" << endl;
1327 std::cerr << os.str ();
1331 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1335 size_t constantNumPackets,
1337 std::shared_ptr<std::string> prefix,
1338 const bool canTryAliasing,
1343 using Kokkos::Compat::create_const_view;
1348 if (constantNumPackets == 0) {
1350 std::ostringstream os;
1351 os << *prefix <<
"7.1. Variable # packets / LID: first comm "
1352 <<
"(commOnHost = " << (commOnHost ?
"true" :
"false") <<
")"
1354 std::cerr << os.str ();
1356 size_t totalImportPackets = 0;
1358 if (this->numExportPacketsPerLID_.need_sync_host ()) {
1359 this->numExportPacketsPerLID_.sync_host ();
1361 if (this->numImportPacketsPerLID_.need_sync_host ()) {
1362 this->numImportPacketsPerLID_.sync_host ();
1364 this->numImportPacketsPerLID_.modify_host ();
1366 create_const_view (this->numExportPacketsPerLID_.view_host ());
1367 auto numImp_h = this->numImportPacketsPerLID_.view_host ();
1371 std::ostringstream os;
1372 os << *prefix <<
"Call doPostsAndWaits"
1374 std::cerr << os.str ();
1376 distributorActor_.doPostsAndWaits(distributorPlan, numExp_h, 1, numImp_h);
1379 std::ostringstream os;
1380 os << *prefix <<
"Count totalImportPackets" << std::endl;
1381 std::cerr << os.str ();
1383 using the_dev_type =
typename decltype (numImp_h)::device_type;
1384 totalImportPackets = countTotalImportPackets<the_dev_type> (numImp_h);
1387 this->numExportPacketsPerLID_.sync_device ();
1388 this->numImportPacketsPerLID_.sync_device ();
1389 this->numImportPacketsPerLID_.modify_device ();
1390 auto numExp_d = create_const_view
1391 (this->numExportPacketsPerLID_.view_device ());
1392 auto numImp_d = this->numImportPacketsPerLID_.view_device ();
1396 std::ostringstream os;
1397 os << *prefix <<
"Call doPostsAndWaits"
1399 std::cerr << os.str ();
1402 distributorActor_.doPostsAndWaits(distributorPlan, numExp_d, 1, numImp_d);
1405 std::ostringstream os;
1406 os << *prefix <<
"Count totalImportPackets" << std::endl;
1407 std::cerr << os.str ();
1409 using the_dev_type =
typename decltype (numImp_d)::device_type;
1410 totalImportPackets = countTotalImportPackets<the_dev_type> (numImp_d);
1414 std::ostringstream os;
1415 os << *prefix <<
"totalImportPackets=" << totalImportPackets << endl;
1416 std::cerr << os.str ();
1418 this->reallocImportsIfNeeded (totalImportPackets, verbose,
1419 prefix.get (), canTryAliasing, CM);
1421 std::ostringstream os;
1422 os << *prefix <<
"7.3. Second comm" << std::endl;
1423 std::cerr << os.str ();
1429 this->numExportPacketsPerLID_.sync_host ();
1430 this->numImportPacketsPerLID_.sync_host ();
1439 auto numExportPacketsPerLID_av =
1441 auto numImportPacketsPerLID_av =
1449 this->imports_.clear_sync_state ();
1452 std::ostringstream os;
1453 os << *prefix <<
"Comm on "
1454 << (commOnHost ?
"host" :
"device")
1455 <<
"; call doPosts" << endl;
1456 std::cerr << os.str ();
1460 this->imports_.modify_host ();
1461 distributorActor_.doPosts
1463 create_const_view (this->exports_.view_host ()),
1464 numExportPacketsPerLID_av,
1465 this->imports_.view_host (),
1466 numImportPacketsPerLID_av);
1469 Kokkos::fence(
"DistObject::doPosts-1");
1470 this->imports_.modify_device ();
1471 distributorActor_.doPosts
1473 create_const_view (this->exports_.view_device ()),
1474 numExportPacketsPerLID_av,
1475 this->imports_.view_device (),
1476 numImportPacketsPerLID_av);
1481 std::ostringstream os;
1482 os << *prefix <<
"7.1. Const # packets per LID: " << endl
1489 std::cerr << os.str ();
1496 this->imports_.clear_sync_state ();
1499 std::ostringstream os;
1500 os << *prefix <<
"7.2. Comm on "
1501 << (commOnHost ?
"host" :
"device")
1502 <<
"; call doPosts" << endl;
1503 std::cerr << os.str ();
1506 this->imports_.modify_host ();
1507 distributorActor_.doPosts
1509 create_const_view (this->exports_.view_host ()),
1511 this->imports_.view_host ());
1514 Kokkos::fence(
"DistObject::doPosts-2");
1515 this->imports_.modify_device ();
1516 distributorActor_.doPosts
1518 create_const_view (this->exports_.view_device ()),
1520 this->imports_.view_device ());
1525 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1529 const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& exportLIDs,
1530 size_t& constantNumPackets,
1537 ProfilingRegion region_pp
1538 (
"Tpetra::DistObject::doPackAndPrepare");
1539#ifdef HAVE_TPETRA_TRANSFER_TIMERS
1542 Teuchos::TimeMonitor packAndPrepareMon (*packAndPrepareTimer_);
1562 std::ostringstream lclErrStrm;
1563 bool lclSuccess =
false;
1565 this->packAndPrepare (src, exportLIDs, this->exports_,
1566 this->numExportPacketsPerLID_,
1567 constantNumPackets, space);
1570 catch (std::exception& e) {
1571 lclErrStrm <<
"packAndPrepare threw an exception: "
1572 << endl << e.what();
1575 lclErrStrm <<
"packAndPrepare threw an exception "
1576 "not a subclass of std::exception.";
1578 const char gblErrMsgHeader[] =
"Tpetra::DistObject "
1579 "threw an exception in packAndPrepare on "
1580 "one or more processes in the DistObject's communicator.";
1581 auto comm = getMap()->getComm();
1582 Details::checkGlobalError(std::cerr, lclSuccess,
1583 lclErrStrm.str().c_str(),
1584 gblErrMsgHeader, *comm);
1587 this->packAndPrepare (src, exportLIDs, this->exports_,
1588 this->numExportPacketsPerLID_,
1589 constantNumPackets, space);
1593 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1596 doUnpackAndCombine(
const Kokkos::DualView<const local_ordinal_type*, buffer_device_type>& remoteLIDs,
1597 size_t constantNumPackets,
1605 ProfilingRegion region_uc
1606 (
"Tpetra::DistObject::doUnpackAndCombine");
1607#ifdef HAVE_TPETRA_TRANSFER_TIMERS
1610 Teuchos::TimeMonitor unpackAndCombineMon (*unpackAndCombineTimer_);
1614 std::ostringstream lclErrStrm;
1615 bool lclSuccess =
false;
1618 this->numImportPacketsPerLID_,
1619 constantNumPackets, CM, space);
1622 catch (std::exception& e) {
1623 lclErrStrm <<
"doUnpackAndCombine threw an exception: "
1624 << endl << e.what();
1627 lclErrStrm <<
"doUnpackAndCombine threw an exception "
1628 "not a subclass of std::exception.";
1630 const char gblErrMsgHeader[] =
"Tpetra::DistObject "
1631 "threw an exception in unpackAndCombine on "
1632 "one or more processes in the DistObject's communicator.";
1633 auto comm = getMap()->getComm();
1634 Details::checkGlobalError(std::cerr, lclSuccess,
1635 lclErrStrm.str().c_str(),
1636 gblErrMsgHeader, *comm);
1640 this->numImportPacketsPerLID_,
1641 constantNumPackets, CM, space);
1645 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1651 const Kokkos::DualView<
1654 const Kokkos::DualView<
1661template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1664 const Kokkos::DualView<const local_ordinal_type *, buffer_device_type>
1666 const Kokkos::DualView<const local_ordinal_type *, buffer_device_type>
1680 copyAndPermute(source, numSameIDs, permuteToLIDs, permuteFromLIDs,
1688 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1693 const Kokkos::DualView<
1706template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1709 const Kokkos::DualView<const local_ordinal_type *, buffer_device_type>
1711 Kokkos::DualView<packet_type *, buffer_device_type> &exports,
1712 Kokkos::DualView<size_t *, buffer_device_type> numPacketsPerLID,
1731 constantNumPackets);
1739 template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1743 (
const Kokkos::DualView<
1757template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1759 const Kokkos::DualView<const local_ordinal_type *, buffer_device_type>
1761 Kokkos::DualView<packet_type *, buffer_device_type> imports,
1762 Kokkos::DualView<size_t *, buffer_device_type> numPacketsPerLID,
1763 const size_t constantNumPackets,
const CombineMode combineMode,
1768 unpackAndCombine(importLIDs, imports, numPacketsPerLID, constantNumPackets,
1777template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1779 std::ostream &os)
const {
1781 using Teuchos::FancyOStream;
1782 using Teuchos::getFancyOStream;
1784 using Teuchos::rcpFromRef;
1786 RCP<FancyOStream> out = getFancyOStream(rcpFromRef(os));
1787 this->
describe(*out, Teuchos::VERB_DEFAULT);
1790template <
class Packet,
class LocalOrdinal,
class GlobalOrdinal,
class Node>
1791std::unique_ptr<std::string>
1792DistObject<Packet, LocalOrdinal, GlobalOrdinal, Node>::createPrefix(
1793 const char className[],
const char methodName[])
const {
1794 auto map = this->getMap();
1795 auto comm = map.is_null() ? Teuchos::null : map->getComm();
1799template <
class DistObjectType>
1801 Teuchos::RCP<DistObjectType> &input,
1802 const Teuchos::RCP<
const Map<
typename DistObjectType::local_ordinal_type,
1803 typename DistObjectType::global_ordinal_type,
1804 typename DistObjectType::node_type>> &newMap) {
1805 input->removeEmptyProcessesInPlace(newMap);
1806 if (newMap.is_null()) {
1807 input = Teuchos::null;
1811template <
class DistObjectType>
1813 auto newMap = input->getMap()->removeEmptyProcesses();
1818#define TPETRA_DISTOBJECT_INSTANT(SCALAR, LO, GO, NODE) \
1819 template class DistObject<SCALAR, LO, GO, NODE>;
1823#define TPETRA_DISTOBJECT_INSTANT_CHAR(LO, GO, NODE) \
1824 template class DistObject<char, LO, GO, NODE>;