[英]Primary job terminated normally, but 1 process returned a non-zero exit code. Per user-direction, the job has been aborted
#include <stdio.h>
#include <iostream>
using namespace std;
void findCoords(int elem_num, int n, int& i, int& j){
int d=(elem_num+1)/n;
int q=(elem_num+1)%n;
i=d-1+int(q!=0);
j=(q-1+n)%n;
}
int main(int argc, char const *argv[])
{
const int m=10,n=10,o=10,p=10;
double A[m][n]={
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10}
};
double B[o][p]={
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10}
};
double C[m][p];
int size=10;
for (int rank=0;rank<size;rank++){
int oneD_idx=rank;
int i, j;
int elements_number=((m*p-1)-rank)/size+1;
double values_coords[elements_number][3];
for (int a=0;a<elements_number;a++){
findCoords(oneD_idx, p, i, j);
oneD_idx+=size;
double s=0;
for (int k=0;k<n;k++){
s+=A[i][k]*B[k][j];
}
values_coords[a][0]=s;
values_coords[a][1]=i;
values_coords[a][2]=j;
}
for (int x=0;x<elements_number;x++){
i=values_coords[x][1];
j=values_coords[x][2];
double value=values_coords[x][0];
C[i][j]=value;
}
}
for (int i=0;i<m;i++){
for (int j=0;j<p;j++){
cout << C[i][j]<<" ";
}
cout<<endl;
}
return 0;
}
以上代碼通過循環模擬MPI。 它用於矩陣乘法。 想法是對於每個等級都有必須計算的坐標,它適用於 2-m*p 過程,其中 m 和 p 是輸出矩陣的維度。 該代碼工作正常。
但是,當我將下面的代碼與 MPI 一起使用時,我不斷收到在第二個代碼之后顯示的錯誤。
#include <stdio.h>
#include <mpi.h>
#include <iostream>
using namespace std;
void findCoords(int oneD_idx, int n, int& i, int& j){
int d=(oneD_idx+1)/n;
int q=(oneD_idx+1)%n;
i=d-1+int(q!=0);
j=(q-1+n)%n;
}
int main( int argc, char *argv[])
{
int rank, size;
/*const int m=10,n=10,o=10,p=10;
double A[m][n]={
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10}
};
double B[o][p]={
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10},
{1,2,3,4,5,6,7,8,9,10}
};*/
const int m=3,n=3,o=3,p=3;
double A[m][n]={
{1,2,3},
{1,2,3},
{1,2,3}
};
double B[o][p]={
{1,2,3},
{1,2,3},
{1,2,3}
};
if (n!=o)
{
printf("Can not multiply because of the wrong shape!");
return 0;
}
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
MPI_Comm_size(MPI_COMM_WORLD, &size);
int oneD_idx=rank;
int i, j;
int elements_number=((m*p-1)-rank)/size+1;
double values_coords[elements_number][3];
for (int a=0;a<elements_number;a++){
findCoords(oneD_idx, p, i, j);
oneD_idx+=size;
double s=0;
for (int k=0;k<n;k++){
s+=A[i][k]*B[k][j];
}
values_coords[a][0]=s;
values_coords[a][1]=i;
values_coords[a][2]=j;
}
MPI_Status status1, status2;
if (rank==0)
{
double C[m][p];
for (int x=0;x<elements_number;x++){
i=values_coords[x][1];
j=values_coords[x][2];
double value=values_coords[x][0];
C[i][j]=value;
}
for (int r=1;r<size;r++){
int recv_elements_number;
MPI_Recv( &recv_elements_number , 1 , MPI_INT , r , 403 , MPI_COMM_WORLD , &status1);
int recv_values_coords[recv_elements_number][3];
MPI_Recv( &recv_values_coords , recv_elements_number*3 , MPI_DOUBLE , r , 404 , MPI_COMM_WORLD , &status2);
for (int x=0;x<recv_elements_number;x++){
i=recv_values_coords[x][1];
j=recv_values_coords[x][2];
double value=recv_values_coords[x][0];
C[i][j]=value;
}
}
for (int i=0;i<m;i++){
for (int j=0;j<p;j++){
cout << C[i][j]<<" ";
}
cout<<endl;
}
}
else
{
MPI_Send( &elements_number , 1 , MPI_INT , 0 , 403 , MPI_COMM_WORLD);
MPI_Send( &values_coords , elements_number*3 , MPI_DOUBLE , 0 , 404 , MPI_COMM_WORLD);
}
MPI_Finalize();
return 0;
}
Primary job terminated normally, but 1 process returned
a non-zero exit code. Per user-direction, the job has been aborted.
我使用 mpi_send 和 mpi_recv 來完成這個任務。 溝通似乎有些問題,我很累。
正如gilles-gouaillardet在評論中所說,從 int 到 double 的轉換存在問題,我應該使用 struct
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.