optimize parallel load

This commit is contained in:
Joachim Schoeberl 2021-06-08 19:08:07 +02:00
parent aceb2fac32
commit abb2e43ccb
4 changed files with 71 additions and 37 deletions

View File

@ -257,6 +257,7 @@ namespace ngcore
template <typename T, typename T2 = decltype(GetMPIType<T>())> template <typename T, typename T2 = decltype(GetMPIType<T>())>
void Bcast (T & s, int root = 0) const { void Bcast (T & s, int root = 0) const {
if (size == 1) return ; if (size == 1) return ;
static Timer t("MPI - Bcast"); RegionTimer reg(t);
MPI_Bcast (&s, 1, GetMPIType<T>(), root, comm); MPI_Bcast (&s, 1, GetMPIType<T>(), root, comm);
} }

View File

@ -208,6 +208,12 @@ namespace netgen
void Mesh :: SendMesh () const void Mesh :: SendMesh () const
{ {
static Timer tsend("SendMesh"); RegionTimer reg(tsend);
static Timer tbuildvertex("SendMesh::BuildVertex");
static Timer tbuildvertexa("SendMesh::BuildVertex a");
static Timer tbuildvertexb("SendMesh::BuildVertex b");
static Timer tbuilddistpnums("SendMesh::Build_distpnums");
static Timer tbuildelementtable("SendMesh::Build_elementtable");
NgMPI_Comm comm = GetCommunicator(); NgMPI_Comm comm = GetCommunicator();
int id = comm.Rank(); int id = comm.Rank();
@ -223,6 +229,7 @@ namespace netgen
// build edges/faces. // build edges/faces.
auto & top = const_cast<MeshTopology&>(GetTopology()); auto & top = const_cast<MeshTopology&>(GetTopology());
if(top.NeedsUpdate()) { if(top.NeedsUpdate()) {
top.SetBuildVertex2Element(false);
top.SetBuildEdges(false); top.SetBuildEdges(false);
top.SetBuildFaces(false); top.SetBuildFaces(false);
top.Update(); top.Update();
@ -357,6 +364,7 @@ namespace netgen
} }
/** Now we build the vertex-data to send to the workers. **/ /** Now we build the vertex-data to send to the workers. **/
tbuildvertex.Start();
NgArray<int, PointIndex::BASE> vert_flag (GetNV()); NgArray<int, PointIndex::BASE> vert_flag (GetNV());
NgArray<int, PointIndex::BASE> num_procs_on_vert (GetNV()); NgArray<int, PointIndex::BASE> num_procs_on_vert (GetNV());
NgArray<int> num_verts_on_proc (ntasks); NgArray<int> num_verts_on_proc (ntasks);
@ -401,6 +409,7 @@ namespace netgen
} }
}; };
/** count vertices per proc and procs per vertex **/ /** count vertices per proc and procs per vertex **/
tbuildvertexa.Start();
iterate_vertices([&](auto vertex, auto dest){ iterate_vertices([&](auto vertex, auto dest){
auto countit = [&] (auto vertex, auto dest) { auto countit = [&] (auto vertex, auto dest) {
if (vert_flag[vertex] < dest) if (vert_flag[vertex] < dest)
@ -409,14 +418,23 @@ namespace netgen
num_verts_on_proc[dest]++; num_verts_on_proc[dest]++;
num_procs_on_vert[vertex]++; num_procs_on_vert[vertex]++;
// GetParallelTopology().SetDistantPNum (dest, vertex); // GetParallelTopology().SetDistantPNum (dest, vertex);
GetParallelTopology().AddDistantProc (PointIndex(vertex), dest); // GetParallelTopology().AddDistantProc (PointIndex(vertex), dest);
} }
}; };
countit(vertex, dest); countit(vertex, dest);
/*
auto pers = per_verts_trans[vertex]; auto pers = per_verts_trans[vertex];
for(int j = 0; j < pers.Size(); j++) for(int j = 0; j < pers.Size(); j++)
countit(pers[j], dest); countit(pers[j], dest);
*/
for (auto v : per_verts_trans[vertex])
countit(v, dest);
}); });
tbuildvertexa.Stop();
tbuildvertexb.Start();
TABLE<PointIndex> verts_of_proc (num_verts_on_proc); TABLE<PointIndex> verts_of_proc (num_verts_on_proc);
TABLE<int, PointIndex::BASE> procs_of_vert (num_procs_on_vert); TABLE<int, PointIndex::BASE> procs_of_vert (num_procs_on_vert);
TABLE<int, PointIndex::BASE> loc_num_of_vert (num_procs_on_vert); TABLE<int, PointIndex::BASE> loc_num_of_vert (num_procs_on_vert);
@ -430,10 +448,16 @@ namespace netgen
} }
}; };
addit(vertex, dest); addit(vertex, dest);
/*
auto pers = per_verts_trans[vertex]; auto pers = per_verts_trans[vertex];
for(int j = 0; j < pers.Size(); j++) for(int j = 0; j < pers.Size(); j++)
addit(pers[j], dest); addit(pers[j], dest);
*/
for (auto v : per_verts_trans[vertex])
addit(v, dest);
}); });
tbuildvertexb.Stop();
/** /**
local vertex numbers on distant procs local vertex numbers on distant procs
(I think this was only used for debugging??) (I think this was only used for debugging??)
@ -449,6 +473,7 @@ namespace netgen
loc_num_of_vert.Add (vert, verts_of_proc[dest].Size()); loc_num_of_vert.Add (vert, verts_of_proc[dest].Size());
} }
} }
tbuildvertex.Stop();
PrintMessage ( 3, "Sending Vertices - vertices"); PrintMessage ( 3, "Sending Vertices - vertices");
Array<MPI_Datatype> point_types(ntasks-1); Array<MPI_Datatype> point_types(ntasks-1);
@ -540,6 +565,7 @@ namespace netgen
PrintMessage ( 3, "Sending Vertices - distprocs"); PrintMessage ( 3, "Sending Vertices - distprocs");
tbuilddistpnums.Start();
Array<int> num_distpnums(ntasks); Array<int> num_distpnums(ntasks);
num_distpnums = 0; num_distpnums = 0;
@ -565,6 +591,8 @@ namespace netgen
} }
} }
tbuilddistpnums.Stop();
for ( int dest = 1; dest < ntasks; dest ++ ) for ( int dest = 1; dest < ntasks; dest ++ )
sendrequests.Append (comm.ISend (distpnums[dest], dest, MPI_TAG_MESH+1)); sendrequests.Append (comm.ISend (distpnums[dest], dest, MPI_TAG_MESH+1));
@ -572,6 +600,7 @@ namespace netgen
PrintMessage ( 3, "Sending elements" ); PrintMessage ( 3, "Sending elements" );
tbuildelementtable.Start();
Array<int> elarraysize (ntasks); Array<int> elarraysize (ntasks);
elarraysize = 0; elarraysize = 0;
for ( int ei = 1; ei <= GetNE(); ei++) for ( int ei = 1; ei <= GetNE(); ei++)
@ -596,6 +625,7 @@ namespace netgen
for (int i = 0; i < el.GetNP(); i++) for (int i = 0; i < el.GetNP(); i++)
elementarrays.Add (dest, el[i]); elementarrays.Add (dest, el[i]);
} }
tbuildelementtable.Stop();
for (int dest = 1; dest < ntasks; dest ++ ) for (int dest = 1; dest < ntasks; dest ++ )
// sendrequests.Append (MyMPI_ISend (elementarrays[dest], dest, MPI_TAG_MESH+2, comm)); // sendrequests.Append (MyMPI_ISend (elementarrays[dest], dest, MPI_TAG_MESH+2, comm));
@ -978,11 +1008,11 @@ namespace netgen
// workers receive the mesh from the master // workers receive the mesh from the master
void Mesh :: ReceiveParallelMesh ( ) void Mesh :: ReceiveParallelMesh ( )
{ {
int timer = NgProfiler::CreateTimer ("ReceiveParallelMesh"); Timer timer("ReceiveParallelMesh");
int timer_pts = NgProfiler::CreateTimer ("Receive points"); Timer timer_pts("Receive points");
int timer_els = NgProfiler::CreateTimer ("Receive elements"); Timer timer_els("Receive elements");
int timer_sels = NgProfiler::CreateTimer ("Receive surface elements"); Timer timer_sels("Receive surface elements");
NgProfiler::RegionTimer reg(timer); RegionTimer reg(timer);
NgMPI_Comm comm = GetCommunicator(); NgMPI_Comm comm = GetCommunicator();
int id = comm.Rank(); int id = comm.Rank();
@ -998,7 +1028,7 @@ namespace netgen
paralleltop -> SetNE (nelloc); paralleltop -> SetNE (nelloc);
// receive vertices // receive vertices
NgProfiler::StartTimer (timer_pts); timer_pts.Start();
Array<int> verts; Array<int> verts;
comm.Recv (verts, 0, MPI_TAG_MESH+1); comm.Recv (verts, 0, MPI_TAG_MESH+1);
@ -1054,16 +1084,16 @@ namespace netgen
// SetDistantPNum (dist_pnums[hi+1], dist_pnums[hi]); // , dist_pnums[hi+2]); // SetDistantPNum (dist_pnums[hi+1], dist_pnums[hi]); // , dist_pnums[hi+2]);
AddDistantProc (PointIndex(dist_pnums[hi]), dist_pnums[hi+1]); AddDistantProc (PointIndex(dist_pnums[hi]), dist_pnums[hi+1]);
NgProfiler::StopTimer (timer_pts); timer_pts.Stop();
*testout << "got " << numvert << " vertices" << endl; *testout << "got " << numvert << " vertices" << endl;
{ {
RegionTimer reg(timer_els);
Array<int> elarray; Array<int> elarray;
comm.Recv (elarray, 0, MPI_TAG_MESH+2); comm.Recv (elarray, 0, MPI_TAG_MESH+2);
NgProfiler::RegionTimer reg(timer_els);
for (int ind = 0, elnum = 1; ind < elarray.Size(); elnum++) for (int ind = 0, elnum = 1; ind < elarray.Size(); elnum++)
{ {
paralleltop->SetLoc2Glob_VolEl ( elnum, elarray[ind++]); paralleltop->SetLoc2Glob_VolEl ( elnum, elarray[ind++]);
@ -1093,7 +1123,7 @@ namespace netgen
} }
{ {
NgProfiler::RegionTimer reg(timer_sels); RegionTimer reg(timer_sels);
Array<SelPackage> selbuf; Array<SelPackage> selbuf;
comm.Recv ( selbuf, 0, MPI_TAG_MESH+4); comm.Recv ( selbuf, 0, MPI_TAG_MESH+4);

View File

@ -407,6 +407,8 @@ namespace netgen
vertex to segment vertex to segment
*/ */
if (buildvertex2element)
{
timer_tables.Start(); timer_tables.Start();
vert2element = mesh->CreatePoint2ElementTable(); vert2element = mesh->CreatePoint2ElementTable();
vert2surfelement = mesh->CreatePoint2SurfaceElementTable(0); vert2surfelement = mesh->CreatePoint2SurfaceElementTable(0);
@ -426,6 +428,7 @@ namespace netgen
table.Add(pointel.pnum, pei); table.Add(pointel.pnum, pei);
}, np); }, np);
timer_tables.Stop(); timer_tables.Stop();
}
(*tracer) ("Topology::Update setup tables", true); (*tracer) ("Topology::Update setup tables", true);

View File

@ -43,6 +43,7 @@ struct T_FACE
class MeshTopology class MeshTopology
{ {
const Mesh * mesh; const Mesh * mesh;
bool buildvertex2element = true;
bool buildedges; bool buildedges;
bool buildfaces; bool buildfaces;
bool build_parent_edges = false; // may be changed to default = false bool build_parent_edges = false; // may be changed to default = false
@ -79,10 +80,9 @@ public:
~MeshTopology (); ~MeshTopology ();
MeshTopology & operator= (MeshTopology && top) = default; MeshTopology & operator= (MeshTopology && top) = default;
void SetBuildEdges (bool be) void SetBuildVertex2Element (bool bv2e) { buildvertex2element = bv2e; }
{ buildedges = be; } void SetBuildEdges (bool be) { buildedges = be; }
void SetBuildFaces (bool bf) void SetBuildFaces (bool bf) { buildfaces = bf; }
{ buildfaces = bf; }
void SetBuildParentEdges (bool bh) { build_parent_edges = bh; } void SetBuildParentEdges (bool bh) { build_parent_edges = bh; }
void SetBuildParentFaces (bool bh) { build_parent_faces = bh; } void SetBuildParentFaces (bool bh) { build_parent_faces = bh; }