Skip to content

Commit a315a5b

Browse files
committed
Added a missing region to the SF topology that was causing an MPI crash when running Hermes-3.
1 parent 4c09014 commit a315a5b

1 file changed

Lines changed: 128 additions & 30 deletions

File tree

src/mesh/impls/bout/boutmesh.cxx

Lines changed: 128 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1187,12 +1187,12 @@ void BoutMesh::createCommunicators() {
11871187
}
11881188
};
11891189

1190-
// PF_W Y ranges: bottom target + middle segment + top target
1190+
// PF_W Y ranges: West target + Central segment + South target in order.
11911191
// Use jyseps2_1+1 (not jyseps2_1) so the last core cell is excluded.
11921192
// Use jyseps2_2+1 (not jyseps2_2) so the last E_PFR cell is excluded.
1193-
add_pf_range(0, jyseps1_1);
1194-
add_pf_range(jyseps2_1 + 1, jyseps1_2);
1195-
add_pf_range(jyseps2_2 + 1, ny - 1);
1193+
add_pf_range(0, jyseps1_1); //West target
1194+
add_pf_range(jyseps2_1 + 1, jyseps1_2); //Central segment
1195+
add_pf_range(jyseps2_2 + 1, ny - 1); //South target
11961196

11971197
MPI_Comm_create(BoutComm::get(), pf_group, &comm_tmp);
11981198
if (comm_tmp != MPI_COMM_NULL) {
@@ -1235,19 +1235,37 @@ void BoutMesh::createCommunicators() {
12351235
}
12361236
};
12371237

1238-
// PF_E Y ranges: west half + east half of the E_PFR closed loop.
1239-
// Use jyseps1_2+1 (not jyseps1_2) so the last W_PFR cell is excluded.
1240-
add_pf_range(jyseps1_2 + 1, ny_inner - 1);
1241-
add_pf_range(ny_inner + 1, jyseps2_2);
1238+
// PF_E Y ranges: the two E_PFR segments.
1239+
// IMPORTANT: add the H1 segment (ny_inner+1..jyseps2_2) FIRST so it
1240+
// becomes rank 0 in comm_pf_e → firstY=true. Then add the G1 segment
1241+
// (jyseps1_2+1..ny_inner-1) so it becomes rank 1 → lastY=true.
1242+
//
1243+
// Region naming (INGRID convention):
1244+
// G1: y = jyseps1_2+1 .. ny_inner-1 (YPROC 6, lower Y-index)
1245+
// Physical target at UPPER face (y=ny_inner-1); branch cut at lower face.
1246+
// H1: y = ny_inner+1 .. jyseps2_2 (YPROC 7, higher Y-index)
1247+
// Physical target at LOWER face (y=ny_inner); branch cut at upper face.
1248+
// (I1 is the third segment of the W_PFR, not part of E_PFR.)
1249+
//
1250+
// This ordering is the INVERSE of the W_PFR ordering because the E_PFR
1251+
// topology is inverted: G1 (lower Y-index) has its target at the TOP,
1252+
// while H1 (higher Y-index) has its target at the BOTTOM. The branch
1253+
// cut connects G1 lower face ↔ H1 upper face.
1254+
//
1255+
// With rank 0 = H1 (firstY=true → ys=ystart, no lower extension) and
1256+
// rank 1 = G1 (lastY=true → ye=yend, no upper extension), FV parallel
1257+
// operators stop at the physical target face instead of extending into
1258+
// guard cells and double-counting the sheath flux.
1259+
add_pf_range(ny_inner + 1, jyseps2_2); // H1 first → rank 0 → firstY=true
1260+
add_pf_range(jyseps1_2 + 1, ny_inner - 1); // G1 second → rank 1 → lastY=true
12421261

12431262
MPI_Comm_create(BoutComm::get(), pf_group, &comm_tmp);
12441263
if (comm_tmp != MPI_COMM_NULL) {
12451264
comm_pf_e = comm_tmp;
1246-
// Assign E_PFR processors to comm_inner only if not already assigned
1247-
// by comm_pf_w (W_PFR and E_PFR are disjoint after the off-by-one fix).
1248-
if (comm_inner == MPI_COMM_NULL) {
1249-
comm_inner = comm_pf_e;
1250-
}
1265+
// Assign to comm_inner so firstY/lastY return the correct values:
1266+
// rank 0 = H1 → firstY=true at south target (lower face)
1267+
// rank 1 = G1 → lastY=true at east target (upper face)
1268+
comm_inner = comm_pf_e;
12511269
}
12521270

12531271
if (pf_group != MPI_GROUP_EMPTY) {
@@ -1458,12 +1476,12 @@ void BoutMesh::createCommunicators() {
14581476
if (mesh_topology == MeshTopology::SF) {
14591477
TRACE("Creating SF C_PFR comm_middle communicators");
14601478
for (int i = 0; i < NXPE; i++) {
1461-
// Lower C_PFR: y = 0..jyseps1_1 (bottom target, YPROC 0)
1479+
// Lower C_PFR: y = 0..jyseps1_1 (West target, YPROC 0)
14621480
proc[0] = PROC_NUM(i, 0);
14631481
proc[1] = PROC_NUM(i, YPROC(jyseps1_1));
14641482
MPI_Group_range_incl(group_world, 1, &proc, &group_tmp1);
14651483

1466-
// Upper C_PFR: y = jyseps2_1+1..ny_inner-1 (middle + east-target, YPROC 5..6)
1484+
// Upper C_PFR: y = jyseps2_1+1..ny_inner-1 (central middle region + east-target, YPROC 5..6)
14671485
proc[0] = PROC_NUM(i, YPROC(jyseps2_1 + 1));
14681486
proc[1] = PROC_NUM(i, YPROC(ny_inner - 1));
14691487
MPI_Group_range_incl(group_world, 1, &proc, &group_tmp2);
@@ -1484,6 +1502,49 @@ void BoutMesh::createCommunicators() {
14841502
}
14851503
}
14861504

1505+
// For SF topology, create a dedicated "South PFR" comm_middle for the
1506+
// south strip (H1 = YPROC covering ny_inner..jyseps2_2, South East target
1507+
// at its lower face; I1 = YPROC covering jyseps2_2+1..ny-1, South West
1508+
// target at its upper face).
1509+
//
1510+
// The "unbalanced lower" communicator above put both H1 and I1 in the
1511+
// middle ranks of a large group (ranks 6 and 7 out of 8), so neither gets
1512+
// firstY=true nor lastY=true — exactly the double-counting problem that
1513+
// was fixed for G1 by the C_PFR section above. This section overwrites
1514+
// comm_middle for H1 and I1 with a two-processor communicator where:
1515+
// rank 0 = H1 → firstY=true (ys=ystart, no lower extension at SE target)
1516+
// rank 1 = I1 → lastY=true (ye=yend, no upper extension at SW target)
1517+
if (mesh_topology == MeshTopology::SF) {
1518+
TRACE("Creating SF S_PFR comm_middle communicators");
1519+
for (int i = 0; i < NXPE; i++) {
1520+
// H1: y = ny_inner .. jyseps2_2 (SE target at lower face)
1521+
proc[0] = PROC_NUM(i, YPROC(ny_inner));
1522+
proc[1] = PROC_NUM(i, YPROC(jyseps2_2));
1523+
proc[2] = NXPE;
1524+
MPI_Group_range_incl(group_world, 1, &proc, &group_tmp1);
1525+
1526+
// I1: y = jyseps2_2+1 .. ny-1 (SW target at upper face)
1527+
proc[0] = PROC_NUM(i, YPROC(jyseps2_2 + 1));
1528+
proc[1] = PROC_NUM(i, NYPE - 1);
1529+
proc[2] = NXPE;
1530+
MPI_Group_range_incl(group_world, 1, &proc, &group_tmp2);
1531+
1532+
MPI_Group_union(group_tmp1, group_tmp2, &group);
1533+
MPI_Comm_create(BoutComm::get(), group, &comm_tmp);
1534+
if (comm_tmp != MPI_COMM_NULL) {
1535+
comm_middle = comm_tmp;
1536+
}
1537+
1538+
if (group_tmp1 != MPI_GROUP_EMPTY) {
1539+
MPI_Group_free(&group_tmp1);
1540+
}
1541+
if (group_tmp2 != MPI_GROUP_EMPTY) {
1542+
MPI_Group_free(&group_tmp2);
1543+
}
1544+
MPI_Group_free(&group);
1545+
}
1546+
}
1547+
14871548
MPI_Group_free(&group_world);
14881549
// Now have communicators for all regions.
14891550
}
@@ -1502,16 +1563,35 @@ void BoutMesh::createXBoundaries() {
15021563
const int yg = getGlobalYIndexNoBoundaries(MYG);
15031564

15041565
if (PE_XIND == 0) {
1505-
// Inner either core or PF
1566+
// Inner x face: either core or PF boundary.
1567+
//
1568+
// For CDN/UDN the y-range (jyseps1_2, jyseps2_2] is the outer core leg,
1569+
// so it gets a "core" boundary. For SF topology that same y-range is the
1570+
// East PFR (G1 = jyseps1_2+1..ny_inner-1, H1 = ny_inner..jyseps2_2),
1571+
// which lies south of the inner separatrix and must get a "pf" boundary
1572+
// just like the West PFR.
1573+
if ((mesh_topology == MeshTopology::CDN) or (mesh_topology == MeshTopology::UDN)
1574+
or (mesh_topology == MeshTopology::CFL)){
1575+
// CDN/UDN have two core legs; CFL is all core (no X-points).
1576+
// All three need both y-ranges checked.
1577+
const bool in_core = ((yg > jyseps1_1) and (yg <= jyseps2_1))
1578+
or ((yg > jyseps1_2) and (yg <= jyseps2_2));
1579+
1580+
if (in_core) {
1581+
boundary.push_back(new BoundaryRegionXIn("core", ystart, yend, this));
1582+
} else {
1583+
boundary.push_back(new BoundaryRegionXIn("pf", ystart, yend, this));
1584+
}
1585+
}
1586+
else{ // SN and SF have one core region at (jyseps1_1, jyseps2_1].
15061587

1507-
if (((yg > jyseps1_1) and (yg <= jyseps2_1))
1508-
or ((yg > jyseps1_2) and (yg <= jyseps2_2))) {
1509-
//Unchanged for new SF topology
1510-
// Core
1511-
boundary.push_back(new BoundaryRegionXIn("core", ystart, yend, this));
1512-
} else {
1513-
// PF region
1514-
boundary.push_back(new BoundaryRegionXIn("pf", ystart, yend, this));
1588+
const bool in_core = ((yg > jyseps1_1) and (yg <= jyseps2_1));
1589+
1590+
if (in_core) {
1591+
boundary.push_back(new BoundaryRegionXIn("core", ystart, yend, this));
1592+
} else {
1593+
boundary.push_back(new BoundaryRegionXIn("pf", ystart, yend, this));
1594+
}
15151595
}
15161596
}
15171597

@@ -2188,10 +2268,19 @@ bool BoutMesh::firstY(int xpos) const {
21882268
comm = comm_outer;
21892269
}
21902270

2191-
// Communicator may be MPI_COMM_NULL for some processors in SF topology
2192-
// where createCommunicators() does not yet assign all regions.
2193-
// Fall back to the global Y-position so we at least don't crash.
2271+
// Communicator may be MPI_COMM_NULL for some processors in SF topology.
2272+
// E_PFR processors (G1/H1) now have comm_inner = comm_pf_e so this
2273+
// fallback should not be reached in normal operation. As a defence,
2274+
// check whether a physical target exists at the lower Y face
2275+
// (DDATA_INDEST < 0 means no processor below); if so return true so that
2276+
// FV operators do not extend their loop into the lower guard cells and
2277+
// double-count the sheath flux.
21942278
if (comm == MPI_COMM_NULL) {
2279+
// const bool lower_target =
2280+
// (xpos < DDATA_XSPLIT) ? (DDATA_INDEST < 0) : (DDATA_OUTDEST < 0);
2281+
// if (lower_target) {
2282+
// return true;
2283+
// }
21952284
return PE_YIND == 0;
21962285
}
21972286

@@ -2212,10 +2301,19 @@ bool BoutMesh::lastY(int xpos) const {
22122301
comm = comm_outer;
22132302
}
22142303

2215-
// Communicator may be MPI_COMM_NULL for some processors in SF topology
2216-
// where createCommunicators() does not yet assign all regions.
2217-
// Fall back to the global Y-position so we at least don't crash.
2304+
// Communicator may be MPI_COMM_NULL for some processors in SF topology.
2305+
// E_PFR processors (G1/H1) now have comm_inner = comm_pf_e so this
2306+
// fallback should not be reached in normal operation. As a defence,
2307+
// check whether a physical target exists at the upper Y face
2308+
// (UDATA_INDEST < 0 means no processor above); if so return true so that
2309+
// FV operators do not extend their loop into the upper guard cells and
2310+
// double-count the sheath flux.
22182311
if (comm == MPI_COMM_NULL) {
2312+
// const bool upper_target =
2313+
// (xpos < UDATA_XSPLIT) ? (UDATA_INDEST < 0) : (UDATA_OUTDEST < 0);
2314+
// if (upper_target) {
2315+
// return true;
2316+
// }
22192317
return PE_YIND == NYPE - 1;
22202318
}
22212319

0 commit comments

Comments
 (0)