C-DVM - оглавление | Часть 1 (1-4) | Часть 2 (5-11) | Часть 3 (Приложения) |
создан: апрель 2001 | - последнее обновление 03.05.01 - |
Приложение 1. Примеры программ
Шесть небольших программ из научной области приводятся для иллюстрации свойств языка C-DVM. Они предназначены для решения систем линейных уравнений:
A x = b
где
A – матрица
коэффициентов,
b – вектор
свободных членов,
x – вектор
неизвестных.
Для решения этой системы используются следующие основные методы.
Прямые методы. Хорошо известный метод исключения Гаусса является наиболее широко используемым алгоритмом этого класса. Основная идея алгоритма заключается в преобразовании матрицы А в верхнетреугольную матрицу и использовании затем обратной подстановки, чтобы привести ее к диагональной форме.
Явные итерационные методы. Наиболее известным алгоритмом этого класса является метод релаксации Якоби. Алгоритм выполняет следующие итерационные вычисления
xi,jnew = ( xi-1,jold + xi,j-1old + xi+1,jold + xi,j+1old ) / 4
Неявные итерационные методы. К этому классу относится метод последовательной верхней релаксации. Итерационное приближение вычисляется по формуле
xi,jnew = ( w / 4 ) * ( xi-1,jnew + xi,j-1new + xi+1,jold + xi,j+1old ) + (1-w) * xi,jold
При использовании "красно-черного" упорядочивания переменных каждый итерационный шаг разделяется на два полушага Якоби. На одном полушаге вычисляются значения "красных" переменных, на другом – "черных" переменных. "Красно-черное" упорядочивание позволяет совместить вычисления и обмены данными.
Пример 1. Алгоритм метода исключения Гаусса
Коэффициенты матрицы A хранятся в секции массива A[0:N-1][0:N-1], а вектор B – в секции A[0:N-1][N] того же массива.
/* GAUSS program */ #include <stdlib.h> #include <stdio.h> #include <math.h> #define DVM(dvmdir) #define DO(v,l,h,s) for(v=l; v<=h; v+=s) #define N 10 int main (int argn, char **args) { long i, j, k; /* declaration of dynamic distributed arrays */ DVM(DISTRIBUTE [BLOCK] []) float (*A)[N+1]; DVM(DISTRIBUTE [BLOCK]) float (*X); /* creation of arrays */ A = malloc( N*(N+1)*sizeof(float)); X = malloc( N*sizeof(float)); /* initialize array A*/ DVM(PARALLEL [i][j] ON A[i][j]) DO(i,0,N-1,1) DO(j,0,N,1) if (i==j || j==N) A[i][j] = 1.f; else A[i][j]=0.f; /* elimination */ for (i=0; i<N-1; i++) { DVM(PARALLEL [k][j] ON A[k][j]; REMOTE_ACCESS A[i][]) DO (k,i+1,N-1,1) DO (j,i+1,N,1) A[k][j] = A[k][j]-A[k][i]*A[i][j]/A[i][i]; } /* reverse substitution */ X[N-1] = A[N-1][N]/A[N-1][N-1]; for (j=N-2; j>=0; j-=1) { DVM(PARALLEL [k] ON A[k][]; REMOTE_ACCESS X[j+1]) DO (k,0,j,1) A[k][N] = A[k][N]-A[k][j+1]*X[j+1]; X[j]=A[j][N]/A[j][j]; DVM(REMOTE_ACCESS X[j]) printf(“j=%4i X[j]=%3.3E\n”,j,X[j]); } return 0; }
/* JACOBI program */ #include <math.h> #include <stdlib.h> #include <stdio.h> #define Max(a,b) ((a)>(b)?(a): (b)) #define DVM(dvmdir) #define DO(v,l,h,s) for(v=l; v<=h; v+=s) #define L 8 #define ITMAX 20 int i,j,it,k; double eps; double MAXEPS = 0.5; FILE *f; /* 2D arrays block distributed along 2 dimensions */ DVM(DISTRIBUTE [BLOCK][BLOCK]) double A[L][L]; DVM(ALIGN[i][j] WITH A[i][j]) double B[L][L]; int main(int argn, char **args) { /* 2D loop with base array A */ DVM(PARALLEL [i][j] ON A[i][j]) DO(i,0,L-1,1) DO(j,0,L-1,1) {A[i][j]=0.; B[i][j]=1.+i+j; } /****** iteration loop *************************/ DO(it,1,ITMAX,1) { eps= 0.; /* Parallel loop with base array A */ /* calculating maximum in variable eps */ DVM(PARALLEL [i][j] ON A[i][j]; REDUCTION MAX(eps)) DO(i,1,L-2,1) DO(j,1,L-2,1) {eps = Max(fabs(B[i][j]-A[i][j]),eps); A[i][j] = B[i][j]; } /* Parallel loop with base array B and */ /* with prior updating shadow elements of array A */ DVM(PARALLEL[i][j] ON B[i][j]; SHADOW_RENEW A) DO(i,1,L-2,1) DO(j,1,L-2,1) B[i][j] = (A[i-1][j]+A[i+1][j]+A[i][j-1]+A[i][j+1])/4.; printf(“it=%4i eps=%3.3E\n”, it,eps); if (eps < MAXEPS) break; }/*DO it*/ f=fopen("jacobi.dat","wb"); fwrite(B,sizeof(double),L*L,f); return 0; }
Пример 3. Асинхронная версия алгоритма Якоби
/* Asynchronous JACOBI */ #include <math.h> #include <stdlib.h> #include <stdio.h> #define Max(a,b) ((a)>(b)? (a): (b)) #define DVM(dvmdir) #define DO(v,l,u,s) for(v=l; v<=u; v+=s) #define L 8 #define ITMAX 20 int i,j,it,k; double eps; double MAXEPS = 0.5; FILE *f; /* declare groups for shadow and reduction operations */ DVM(SHADOW_GROUP) void *grshad; DVM(REDUCTION_GROUP) void *emax; /* 2D arrays block distributed along 2 dimensions */ DVM(DISTRIBUTE [BLOCK][BLOCK]) double A[L][L]; DVM(ALIGN [i][j] WITH A[i][j]) double B[L][L]; int main(int argn, char **args) { /* 2D parallel loop with base array A */ DVM(PARALLEL [i][j] ON A[i][j]) DO(i,0,L-1,1) DO(j,0,L-1,1) {A[i][j]=0.; B[i][j]=1.+i+j;} /* Create group for shadow operation */ DVM(CREATE_SHADOW_GROUP grshad: A); /************ iteration loop *************************/ DO(it,1,ITMAX,1) { eps= 0.; /* Parallel loop with base array A: */ /* at first elements of array A exported by the */ /* neighbor processor are calculated and sent and */ /* then internal elements of array A are calculated */ DVM(PARALLEL [i][j] ON A[i][j]; SHADOW_START grshad; REDUCTION emax : MAX(eps)) DO(i,1,L-2,1) DO(j,1,L-2,1) {eps = max(fabs(B[i][j]-A[i][j]),eps); A[i][j] = B[i][j]; } /* Start asynchronous calculation of maximum */ DVM(REDUCTION_START emax); /* Parallel loop with base array B: */ /* internal elements of array B are calculated at first */ /* then completion of array A shadow edges update is */ /* awaited and the loop iterations, requiring shadow */ /* elements of array A are calculated */ DVM(PARALLEL[i][j] ON B[i][j]; SHADOW_WAIT grshad) DO(i,1,L-2,1) DO(j,1,L-2,1) B[i][j] = (A[i-1][j]+A[i+1][j]+A[i][j-1]+A[i][j+1])/4; /* Wait for completion of reduction */ DVM(REDUCTION_WAIT emax); printf( "it=%4i eps=%3.3E\n”, it,eps); if (eps < MAXEPS) break; }/*DO it*/ f=fopen("jacobi.dat","wb"); fwrite(B,sizeof(double),L*L,f); return 0; }
Пример 4. "Красно-черная" последовательная верхняя релаксация
Точки обсчитываются в шахматном порядке: сначала “красные”, потом “черные”. Чтобы задать соответствующее (непрямоугольное) индексное пространство, допускается особая форма заголовка цикла: DO (var, a+b%2, ub, 2).
/* RED-BLACK program */ #include <stdlib.h> #include <math.h> #define DVM(dvmdir) #define DO(v,l,h,s) for(v=l; v<=h; v+=s) #define Max(a,b) ((a)>(b)?(a):(b)) #define ITMAX 10 int main(int an, char ** as) {long i, j, it, irb; float eps=0.; float MAXEPS = 0.5E-5; float w = 0.5; /* Groups of asynchronous operations */ DVM(SHADOW_GROUP) void *sha; DVM(REDUCTION_GROUP) void *rg; int N; /* redefine N as constant */ #define N 8 DVM(DISTRIBUTE [BLOCK] [BLOCK]) float (*A)[N]; #undef N /* restore N */ /* "Calculate" of array size (=8!). Create array */ N = 8; A = malloc( N*N*sizeof(float)); /* Specification of members of shadow group */ DVM(CREATE_SHADOW_GROUP sha : A); /* Initialization: parallel loop with surpassing */ /* calculation and sending of exported data */ DVM(PARALLEL [i][j] ON A[i][j]; SHADOW_START sha) DO (i,0,N-1,1) DO (j,0,N-1,1) if(i==0 || i==N-1 || j==0 || j==N-1) A[i][j]=0.; else A[i][j] = 1.+i+j; DVM(SHADOW_WAIT sha); /* iteration loop */ DO (it,0,ITMAX,1) { eps = 0.; /* Parallel loop with reduction on RED points */ DVM(PARALLEL [i][j] ON A[i][j]; SHADOW_START sha; REDUCTION rg: MAX(eps) ) DO (i,1,N-2,1) DO (j,1+i%2,N-2,2) {float s; s = A[i][j]; A[i][j] = (w/4)*(A[i-1][j]+A[i+1][j]+ A[i][j-1]+A[i][j+1])+(1-w)*A[i][j]; eps = Max (fabs(s-A[i][j]), eps); } /* Start red reduction -- as early as possible */ /* Then wait shadows */ DVM(REDUCTION_START rg); DVM(SHADOW_WAIT sha); /* Parallel loop on BLACK points (without reduction) */ DVM(PARALLEL [i][j] ON A[i][j]; SHADOW_START sha) DO (i,1,N-2,1) DO (j,1+(i+1)%2,N-2,2) {A[i][j] = (w/4)*(A[i-1][j]+A[i+1][j]+ A[i][j-1]+A[i][j+1])+(1-w)*A[i][j]; } /* Wait shadows, then */ /* wait red point's reduction -- as late as possible */ DVM(SHADOW_WAIT sha); DVM(REDUCTION_WAIT rg); printf("it=%d eps=%e\n",it,eps); if (eps < MAXEPS) break; } /* DO it */ free(A); return 0; }
Пример 5. Многосеточная задача
/* MULTIGRID program */ #include <stdio.h> #include <stdlib.h> #define DVM(dvm) #define DO(v,l,h,s) for(v=(l); v<=(h); v+=(s)) void oper( DVM(*DISTRIBUTE [*][*][*]) float *AA, int AAN, int AAM, int AAK, DVM(*DISTRIBUTE [*][*][*]) float *BB, int BBN, int BBM, int BBK) /*Parameters: two distributed 3D arrays and */ /* their actual dimensions */ { #define AA(i,j,k) AA[((i)*AAM+(j))*AAK+(k)] #define BB(i,j,k) AA[((i)*BBM+(j))*BBK+(k)] int i, j,k; /* Alignment of array BB with array AA using stretching*/ DVM(REALIGN BB[i][j][k] WITH AA[2*i][2*j][2*k] NEW); /* forming array BB from elements of array AA with even indexes */ DVM(PARALLEL [i][j][k] ON BB[i][j][k]) FOR(i,BBN) FOR(j,BBM) FOR(k,BBK) BB(i,j,k)=AA(i*2,j*2,k*2); #undef AA #undef BB } int main(int argn, char **args) { int N1=8,M1=12,K1=16; int N2,M2,K2; int Narr=5,Nit=5; int grid=0; int ACM,ACK; int i,j,k; int step_grid=1; /* Up to 20 distributed arrays */ DVM(DISTRIBUTE[BLOCK][BLOCK][]) float *A[20]; /* Pointer to current distributed array */ DVM(*DISTRIBUTE[*][*][*]) float *AC; /* creation of array A[0] */ A[0]=malloc(N1*M1*K1*sizeof(float)); AC=A[0]; ACM=M1; ACK=K1; #define AC(i,j,k) AC[((i)*ACM+(j))*ACK+(k)] /* initialization of source array */ DVM(PARALLEL [i][j][k] ON AC[i][j][k]) DO(i,0,N1-1,1) DO(j,0,M1-1,1) DO(k,0,K1-1,1) AC(i,j,k)=1.+i+j+k ; #undef AC do{ printf("N1=%d M1=%d K1=%d \n”,N1,M1,K1); N2=N1/2; M2=M1/2; K2=K1/2; grid++; /* creation of array A[grid] */ A[grid]=malloc(N2*M2*K2*sizeof(float)); oper(A[grid-1],N1,M1,K1,A[grid],N2,M2,K2); N1=N2; M1=M2; K1=K2; } while (N2>2 && grid<Narr) for(i=0;i<=grid;i++) free(A[i]); return 0; }
Пример 6. Многообластная задача
/* PROGRAM TASKS */ /* rectangular grid is distributed on two blocks */ /* */ /* <------- A2,B2 -----> */ /* 0 1 2 ... N2 */ /* <--- A1,B1 --------> */ /* 0 1 ... N1-1 N1 N1+1 ... N1+N2-1 */ /*------------------------------------------------*/ /* 0 | . . . . . ... . */ /* ... | */ /* K-1 | . . . . . ... . */ /**************************************************/ #include <stdlib.h> #include <math.h> #define DVM(dvmdir) #define DO(v,l,h,s) for(v=l; v<=h; v+=s) #define FOR(v,n) for(v=0; v<n; v++) #define Max(a,b) ((a)>(b)?(a):(b)) #define NUMBER_OF_PROCESSORS() 1 #define K 8 #define N1 4 #define N2 K-N1 #define ITMAX 10 DVM(PROCESSORS) void * P[NUMBER_OF_PROCESSORS()]; DVM(TASK) void * MB[2]; double eps0; DVM(DISTRIBUTE) float (*A1)[K], (*A2)[K]; DVM(ALIGN) float (*B1)[K], (*B2)[K]; int main(int argn, char** args) { int i,j, it; int NP; printf("---------- starting --------------\n"); DVM(DEBUG 1 -d0) { NP = NUMBER_OF_PROCESSORS() / 2; } DVM(MAP MB[0] ONTO P[0:(NP?NP-1:0)]); DVM(MAP MB[1] ONTO P[NP:(NP?2*NP-1:NP)]); A1=malloc((N1+1)*K*sizeof(float)); DVM(REDISTRIBUTE A1[][BLOCK] ONTO MB[0]); B1=malloc((N1+1)*K*sizeof(float)); DVM(REALIGN B1[i][j] WITH A1[i][j]); A2=malloc((N2+1)*K*sizeof(float)); DVM(REDISTRIBUTE A2[][BLOCK] ONTO MB[1]); B2=malloc((N2+1)*K*sizeof(float)); DVM(REALIGN B2[i][j] WITH A2[i][j]); /* Initialization */ DVM(PARALLEL [i][j] ON A1[i][j]) FOR(i,N1+1) FOR(j,K) {if (i==0 || j==0 || j==K-1) { A1[i][j] = 0.f; B1[i][j] = 0.f; } else { B1[i][j] = 1.f + i + j ; A1[i][j] = B1[i][j]; }} DVM(PARALLEL [i][j] ON A2[i][j]) FOR(i,N2+1) FOR(j,K) {if(i == N2 || j==0 || j==K-1) { A2[i][j] = 0.f; B2[i][j] = 0.f; } else { B2[i][j] = 1.f + ( i + N1 - 1 ) + j ; A2[i][j] = B2[i][j]; }} DO(it,1,ITMAX,1) { eps0 = 0.; /* exchange bounds */ DVM(PARALLEL [j] ON A1[(N1)][j]; REMOTE_ACCESS B2[1][]) FOR(j,K) A1[N1][j] = B2[1][j]; DVM(PARALLEL [j] ON A2[0][j]; REMOTE_ACCESS B1[N1-1][]) FOR(j,K) A2[0][j] = B1[N1-1][j]; /* Block task region */ DVM(TASK_REGION MB; REDUCTION MAX(eps0)) { DVM(ON MB[0]) { double eps=0.; /* private reduction variable */ DVM(PARALLEL [i][j] ON B1[i][j]; SHADOW_RENEW A1[][]) DO(i,1, N1-1, 1) DO(j,1, K-2, 1) B1[i][j]=(A1[i-1][j]+A1[i][j-1]+A1[i+1][j]+A1[i][j+1])/4.f; DVM(PARALLEL [i][j] ON A1[i][j]; REDUCTION MAX(eps)) DO(i,1,N1-1,1) DO(j,1,K-2,1) {eps = Max(fabs(B1[i][j]-A1[i][j]),eps); A1[i][j] = B1[i][j]; } eps0=Max(eps0,eps); } DVM(ON MB[1]) { double eps=0.; /* private reduction variable */ DVM(PARALLEL [i][j] ON B2[i][j]; SHADOW_RENEW A2[][]) DO(i,1,N2-1,1) DO(j,1,K-2,1) B2[i][j]=(A2[i-1][j]+A2[i][j-1]+A2[i+1][j]+A2[i][j+1])/4.f; DVM(PARALLEL [i][j] ON A2[i][j]; REDUCTION MAX(eps)) DO(i,1,N2-1,1) DO(j,1,K-2,1) {eps = Max(fabs(B2[i][j]-A2[i][j]),eps); A2[i][j] = B2[i][j]; } eps0=Max(eps0,eps); } } /* TASK_REGION */ printf("it=%d eps0=%f\n",it,eps0); }/* DO IT */ return 0; }
Приложение 2. Сводный синтаксис директив C-DVM
directive | ::= | DVM ( DVM-directive [ ; DVM-directive ] ) |
DVM-directive | ::= | specification-directive |
| | executable-directive | |
specification-directive | ::= | processors-directive |
| | align-directive | |
| | distribute-directive | |
| | template-directive | |
| | shadow-directive | |
| | shadow-group-directive | |
| | reduction-group-directive | |
| | remote-group-directive | |
| | task-directive | |
executable-directive | ::= | realign-directive |
| | redistribute-directive | |
| | create-template-directive | |
| | parallel-directive | |
| | remote-access-directive | |
| | create-shadow-group-directive | |
| | shadow-start-directive | |
| | shadow-wait-directive | |
| | reduction-start-directive | |
| | reduction-wait-directive | |
| | prefetch-directive | |
| | reset-directive | |
| | map-directive | |
| | task-region-directive | |
| | on-directive | |
processors-directive | ::= | PROCESSORS |
distribute-directive | ::= | DISTRIBUTE [ dist-directive-stuff ] |
redistribute-directive | ::= | REDISTRIBUTE distributee dist-directive-stuff [ NEW ] |
dist-directive-stuff | ::= | dist-format... [ dist-onto-clause ] |
distributee | ::= | array-name |
dist-format | ::= | [BLOCK] |
| | [GENBLOCK ( block-array-name )] | |
| | [WGTBLOCK ( block-array-name,nblock )] | |
| | [] | |
dist-onto-clause | ::= | ONTO dist-target |
dist-target | ::= | processors-name [ section-subscript ]… |
align-directive | ::= | ALIGN [ align-directive-stuff ] |
realign-directive | ::= | REALIGN alignee align-directive-stuff [ NEW ] |
align-directive-stuff | ::= | align-source... align-with-clause |
alignee | ::= | array-name |
align-source | ::= | [ ] |
| | [ align-dummy ] | |
align-dummy | ::= | scalar-int-variable |
align-with-clause | ::= | WITH align-spec |
align-spec | ::= | align-target [ align-subscript ]… |
align-target | ::= | array-name |
| | template-name | |
align-subscript | ::= | [ int-expr ] |
| | [ align-subscript-use ] | |
| | [ ] | |
align-subscript-use | ::= | [ primary-expr *
] align-dummy [ add-op primary-expr ] |
primary-expr | ::= | int-constant |
| | int-variable | |
| | ( int-expr ) | |
add-op | ::= | + |
| | - | |
create-template-directive | ::= | CREATE_TEMPLATE template_name size... |
parallel-directive | ::= | PARALLEL loop-variable... ON iteration-align-spec [ ; reduction-clause] [ ; shadow-renew-clause] [ ; remote-access-clause ] [ ; across-clause ] |
iteration-align-spec | ::= | align-target iteration-align-subscript... |
iteration-align-subscript | ::= | [ int-expr ] |
| | [ do-variable-use ] | |
| | [] | |
loop-variable | ::= | [ do-variable ] |
do-variable-use | ::= | [ primary-expr *
] do-variable [ add-op primary-expr ] |
reduction-clause | ::= | REDUCTION [ group_name : ] reduction-op... |
reduction-op | ::= | reduction-op-name ( reduction-variable ) |
| | reduction-loc-name ( reduction-variable, loc-variable ) | |
reduction-variable | ::= | array-name |
| | scalar-variable-name | |
reduction-op-name | ::= | SUM |
| | PRODUCT | |
| | MAX | |
| | MIN | |
| | AND | |
| | OR | |
reduction-loc-name | ::= | MAXLOC |
| | MINLOC | |
across-clause | ::= | ACROSS dependent-array... |
dependent-array | ::= | dist-array-name dependence... |
dependence | ::= | [ flow-dep-length : anti-dep-length ] |
flow-dep-length | ::= | int-constant |
anti-dep-length | ::= | int-constant |
shadow-directive | ::= | SHADOW shadow-array... |
shadow-array | ::= | array-name shadow-edge... |
shadow-edge | ::= | [ width ] |
| | [ low-width : high-width ] | |
width | ::= | int-expr |
low-width | ::= | int-expr |
high-width | ::= | int-expr |
shadow-renew-clause | ::= | SHADOW_RENEW renewee... |
renewee | ::= | dist-array-name [ shadow-edge ]… [CORNER] |
remote-access-directive | ::= | REMOTE_ACCESS [ remote-group-name : ] regular-reference... |
regular-reference | ::= | dist-array-name [ regular-subscript ]… |
regular-subscript | ::= | [ int-expr ] |
| | [ do-variable-use ] | |
| | [ ] | |
remote-access-clause | ::= | remote-access-directive |
prefetch-directive | ::= | PREFETCH group-name |
reset-directive | ::= | RESET group-name |
shadow-group-directive | ::= | CREATE_SHADOW_GROUP shadow-group-name : renewee... |
shadow-start-directive | ::= | SHADOW_START shadow-group-name |
shadow-wait-directive | ::= | SHADOW_WAIT shadow-group-name |
shadow-renew-clause | ::= | . . . |
| | | shadow-start-directive | |
| | | shadow-wait-directive | |
reduction-group-directive | ::= | REDUCTION_GROUP |
reduction-start-directive | ::= | REDUCTION_START reduction-group-name |
reduction-wait-directive | ::= | REDUCTION_WAIT reduction-group-name |
task-directive | ::= | TASK |
map-directive | ::= | MAP task-name [ task-index ] |
ONTO processors-name [ section-subscript ]… | ||
dist-target | ::= | . . . |
| | task-name [ task-index ] | |
block-task-region | ::= | DVM( task-region-directive ) { |
on-block... | ||
} | ||
task-region-directive | ::= | TASK_REGION task-name [ ; reduction-clause] |
on-block | ::= | DVM( on-directive ) operator |
on-directive | ::= | ON task-name [ task-index ] |
loop-task-region | ::= | DVM( task-region-directive ) { |
parallel-task-loop | ||
} | ||
parallel-task-loop | DVM( parallel-task-loop-directive ) | |
do-loop | ||
parallel-task-loop-directive | ::= | PARALLEL [ do-variable
] ON task-name [ do-variable ] |
copy-flag-directive | ::= | COPY_FLAG |
copy-start-directive | ::= | COPY_START flag_addr |
copy-wait-directive | ::= | COPY_WAIT flag_addr |
copy-directive | ::= | COPY |
C-DVM - оглавление | Часть 1 (1-4) | Часть 2 (5-11) | Часть 3 (Приложения) |