C-DVM - описание языка. Часть 3 (Приложения)

Часть 3 (Приложения)

создан: апрель 2001

- последнее обновление 03.05.01 -

Шесть небольших программ из научной области приводятся для иллюстрации свойств языка C-DVM. Они предназначены для решения систем линейных уравнений:

A x = b

где
A – матрица коэффициентов,
b – вектор свободных членов,
x – вектор неизвестных.

Для решения этой системы используются следующие основные методы.

Прямые методы. Хорошо известный метод исключения Гаусса является наиболее широко используемым алгоритмом этого класса. Основная идея алгоритма заключается в преобразовании матрицы А в верхнетреугольную матрицу и использовании затем обратной подстановки, чтобы привести ее к диагональной форме.

Явные итерационные методы. Наиболее известным алгоритмом этого класса является метод релаксации Якоби. Алгоритм выполняет следующие итерационные вычисления

x_i,j^new = ( x_i-1,j^old + x_i,j-1^old + x_i+1,j^old + x_i,j+1^old ) / 4

Неявные итерационные методы. К этому классу относится метод последовательной верхней релаксации. Итерационное приближение вычисляется по формуле

x_i,j^new = ( w / 4 ) * ( x_i-1,j^new + x_i,j-1^new + x_i+1,j^old + x_i,j+1^old ) + (1-w) * x_i,j^old

При использовании "красно-черного" упорядочивания переменных каждый итерационный шаг разделяется на два полушага Якоби. На одном полушаге вычисляются значения "красных" переменных, на другом – "черных" переменных. "Красно-черное" упорядочивание позволяет совместить вычисления и обмены данными.

Пример 1. Алгоритм метода исключения Гаусса

Коэффициенты матрицы A хранятся в секции массива A[0:N-1][0:N-1], а вектор B – в секции A[0:N-1][N] того же массива.

/* GAUSS  program */

#include <stdlib.h>
#include <stdio.h>
#include <math.h>

#define DVM(dvmdir)
#define DO(v,l,h,s)   for(v=l; v<=h; v+=s)
#define N 10

int main (int argn, char **args)
{
long  i, j, k;

/* declaration of dynamic distributed arrays */
DVM(DISTRIBUTE [BLOCK] [])  float (*A)[N+1];
DVM(DISTRIBUTE [BLOCK])  float (*X);
/* creation of arrays */
A = malloc( N*(N+1)*sizeof(float));
X = malloc( N*sizeof(float));
/* initialize array A*/
DVM(PARALLEL [i][j] ON A[i][j])
DO(i,0,N-1,1)
DO(j,0,N,1)
	if (i==j || j==N)  A[i][j] = 1.f;
	else A[i][j]=0.f;

/* elimination */
for (i=0; i<N-1; i++)
{
DVM(PARALLEL [k][j] ON A[k][j]; REMOTE_ACCESS A[i][])
DO (k,i+1,N-1,1)
DO (j,i+1,N,1)
	A[k][j] = A[k][j]-A[k][i]*A[i][j]/A[i][i];
}
/* reverse substitution */
X[N-1] = A[N-1][N]/A[N-1][N-1];
for (j=N-2; j>=0; j-=1)
{
DVM(PARALLEL [k] ON A[k][]; REMOTE_ACCESS X[j+1])
DO (k,0,j,1)
    	A[k][N] = A[k][N]-A[k][j+1]*X[j+1];
X[j]=A[j][N]/A[j][j];
DVM(REMOTE_ACCESS X[j]) 
	printf(“j=%4i  X[j]=%3.3E\n”,j,X[j]);
}
return 0;
}

Пример 2. Алгоритм Якоби

/* JACOBI program */

#include <math.h>
#include <stdlib.h>
#include <stdio.h>
#define Max(a,b) ((a)>(b)?(a): (b))

#define DVM(dvmdir)
#define DO(v,l,h,s)   for(v=l; v<=h; v+=s)

#define  L  8
#define  ITMAX 20

int i,j,it,k;
double eps;
double MAXEPS = 0.5;
FILE *f;

/* 2D arrays block distributed along 2 dimensions */
DVM(DISTRIBUTE [BLOCK][BLOCK]) double A[L][L];
DVM(ALIGN[i][j] WITH A[i][j]) double B[L][L];

int main(int argn, char **args)
{
/* 2D loop with base array A */
DVM(PARALLEL [i][j] ON A[i][j])
DO(i,0,L-1,1)
DO(j,0,L-1,1)
      {A[i][j]=0.;
      B[i][j]=1.+i+j;
      }

/******  iteration loop  *************************/
DO(it,1,ITMAX,1)
{
  eps= 0.;
/* Parallel loop with base array A             */
/* calculating maximum in variable eps          */
DVM(PARALLEL [i][j] ON A[i][j]; REDUCTION MAX(eps))
DO(i,1,L-2,1)
DO(j,1,L-2,1)
	{eps =  Max(fabs(B[i][j]-A[i][j]),eps);
	A[i][j] = B[i][j];
	}
/* Parallel loop with base array  B  and         */
/* with prior updating shadow elements of array A  */
DVM(PARALLEL[i][j] ON B[i][j]; SHADOW_RENEW A)
DO(i,1,L-2,1)
DO(j,1,L-2,1)
    B[i][j] = (A[i-1][j]+A[i+1][j]+A[i][j-1]+A[i][j+1])/4.;

  printf(“it=%4i   eps=%3.3E\n”, it,eps);
  if (eps < MAXEPS) break;
  }/*DO it*/
f=fopen("jacobi.dat","wb");
fwrite(B,sizeof(double),L*L,f);
return 0;
}

Пример 3. Асинхронная версия алгоритма Якоби

/* Asynchronous JACOBI */ 
#include <math.h>
#include <stdlib.h>
#include <stdio.h>
#define Max(a,b) ((a)>(b)? (a): (b))

#define DVM(dvmdir)
#define DO(v,l,u,s)   for(v=l; v<=u; v+=s)

#define  L 8
#define  ITMAX 20

int i,j,it,k;
double eps;
double MAXEPS   = 0.5;
FILE *f;
/* declare groups for shadow and reduction operations */
DVM(SHADOW_GROUP) void *grshad;
DVM(REDUCTION_GROUP) void *emax;
/* 2D arrays block distributed along 2 dimensions */
DVM(DISTRIBUTE [BLOCK][BLOCK]) double A[L][L];
DVM(ALIGN [i][j] WITH A[i][j]) double B[L][L];

int main(int argn, char **args)
{
/* 2D parallel loop with base array A */
DVM(PARALLEL [i][j] ON A[i][j])
DO(i,0,L-1,1)
DO(j,0,L-1,1)
        {A[i][j]=0.;
         B[i][j]=1.+i+j;}
/* Create group for shadow operation */
DVM(CREATE_SHADOW_GROUP grshad: A);

/************  iteration loop  *************************/
DO(it,1,ITMAX,1)
{
  eps= 0.;
/* Parallel loop with base array A: */
/* at first elements of array A exported by the */
/* neighbor processor are calculated and sent and */
/* then internal elements of array A are calculated */
DVM(PARALLEL [i][j] ON A[i][j]; 
	SHADOW_START grshad; REDUCTION emax : MAX(eps))
DO(i,1,L-2,1)
DO(j,1,L-2,1)
         {eps = max(fabs(B[i][j]-A[i][j]),eps);
          A[i][j] = B[i][j];
         }
/* Start asynchronous calculation of maximum */
DVM(REDUCTION_START emax);
/* Parallel loop with base array B: */
/* internal elements of array B are calculated at first */
/* then completion of array A shadow edges update is */ 
/* awaited and the loop iterations, requiring shadow */ 
/* elements of array A are calculated */
DVM(PARALLEL[i][j] ON B[i][j]; SHADOW_WAIT grshad)
DO(i,1,L-2,1)
DO(j,1,L-2,1)
    B[i][j] = (A[i-1][j]+A[i+1][j]+A[i][j-1]+A[i][j+1])/4;
/* Wait for completion of reduction */
DVM(REDUCTION_WAIT emax);

printf( "it=%4i  eps=%3.3E\n”, it,eps);
if (eps < MAXEPS) break;
}/*DO it*/

f=fopen("jacobi.dat","wb");
fwrite(B,sizeof(double),L*L,f);
return 0;
}

Пример 4. "Красно-черная" последовательная верхняя релаксация

Точки обсчитываются в шахматном порядке: сначала “красные”, потом “черные”. Чтобы задать соответствующее (непрямоугольное) индексное пространство, допускается особая форма заголовка цикла: DO (var, a+b%2, ub, 2).

/* RED-BLACK program */

#include <stdlib.h>
#include <math.h>

#define DVM(dvmdir)
#define DO(v,l,h,s)   for(v=l; v<=h; v+=s)

#define Max(a,b) ((a)>(b)?(a):(b))
#define ITMAX 10

int main(int an, char ** as)
{long i, j, it, irb;
float eps=0.;
float MAXEPS = 0.5E-5;
float w = 0.5;

/* Groups of asynchronous operations */
DVM(SHADOW_GROUP) void *sha;
DVM(REDUCTION_GROUP) void *rg;

int  N;

/* redefine N as constant */
#define     N 8
DVM(DISTRIBUTE [BLOCK] [BLOCK])  float (*A)[N];
#undef      N /* restore N */

/* "Calculate"  of array size (=8!). Create array */
N = 8;
A = malloc( N*N*sizeof(float));

/* Specification of members of shadow group  */
DVM(CREATE_SHADOW_GROUP sha : A);

/* Initialization: parallel loop with surpassing */
/* calculation and sending of exported data */
DVM(PARALLEL [i][j] ON A[i][j]; SHADOW_START sha)
DO (i,0,N-1,1)
DO (j,0,N-1,1)
    if(i==0 || i==N-1 || j==0 || j==N-1) A[i][j]=0.;
    else A[i][j] = 1.+i+j;

DVM(SHADOW_WAIT sha);

/* iteration loop */
DO (it,0,ITMAX,1)
{
eps = 0.;

/* Parallel loop with reduction on RED points */
DVM(PARALLEL [i][j] ON A[i][j]; SHADOW_START sha;
                                REDUCTION rg: MAX(eps) )
DO (i,1,N-2,1)
DO (j,1+i%2,N-2,2)
    {float s;
    s = A[i][j];
    A[i][j] = (w/4)*(A[i-1][j]+A[i+1][j]+
             A[i][j-1]+A[i][j+1])+(1-w)*A[i][j];
    eps = Max (fabs(s-A[i][j]), eps);
    }

/* Start red reduction -- as early as possible */
/* Then wait shadows */
DVM(REDUCTION_START rg);
DVM(SHADOW_WAIT sha);

/* Parallel loop on BLACK points (without reduction) */
DVM(PARALLEL [i][j] ON A[i][j]; SHADOW_START sha)
DO (i,1,N-2,1)
DO (j,1+(i+1)%2,N-2,2)
    {A[i][j] = (w/4)*(A[i-1][j]+A[i+1][j]+
             A[i][j-1]+A[i][j+1])+(1-w)*A[i][j];
    }
/* Wait shadows, then */
/* wait red point's reduction -- as late as possible */
DVM(SHADOW_WAIT sha);
DVM(REDUCTION_WAIT rg);

printf("it=%d eps=%e\n",it,eps);
if (eps < MAXEPS) break;

} /* DO it */

free(A);
return 0;
}

Пример 5. Многосеточная задача

/* MULTIGRID  program */

#include <stdio.h>
#include <stdlib.h>

#define DVM(dvm)
#define DO(v,l,h,s) for(v=(l); v<=(h); v+=(s))

void oper(
	DVM(*DISTRIBUTE [*][*][*])  float *AA,
	int AAN, int AAM, int AAK,
	DVM(*DISTRIBUTE [*][*][*])  float *BB,
	int BBN, int BBM, int BBK)
/*Parameters: two distributed 3D arrays and */
/* their  actual dimensions */
{
#define AA(i,j,k) AA[((i)*AAM+(j))*AAK+(k)]
#define BB(i,j,k) AA[((i)*BBM+(j))*BBK+(k)]
int i, j,k;
/* Alignment of array BB with array AA using stretching*/
DVM(REALIGN BB[i][j][k] WITH AA[2*i][2*j][2*k] NEW);
/* forming array BB from elements of array AA with even indexes */
DVM(PARALLEL [i][j][k] ON BB[i][j][k])
FOR(i,BBN)
FOR(j,BBM)
FOR(k,BBK)
	BB(i,j,k)=AA(i*2,j*2,k*2);
#undef AA
#undef BB
}

int main(int argn, char **args)
{
int N1=8,M1=12,K1=16;
int N2,M2,K2;
int Narr=5,Nit=5;
int grid=0;
int ACM,ACK;
int i,j,k;
int step_grid=1;

/* Up to 20 distributed arrays */
DVM(DISTRIBUTE[BLOCK][BLOCK][]) float *A[20];
/* Pointer to current distributed array */
DVM(*DISTRIBUTE[*][*][*]) float *AC;
	/* creation of array  A[0] */
A[0]=malloc(N1*M1*K1*sizeof(float));
AC=A[0];
ACM=M1;
ACK=K1;
#define AC(i,j,k) AC[((i)*ACM+(j))*ACK+(k)]
	/* initialization of source array */
DVM(PARALLEL [i][j][k] ON AC[i][j][k])
DO(i,0,N1-1,1)
DO(j,0,M1-1,1)
DO(k,0,K1-1,1)
	AC(i,j,k)=1.+i+j+k ;
#undef AC

do{
printf("N1=%d M1=%d K1=%d \n”,N1,M1,K1);
N2=N1/2;
M2=M1/2;
K2=K1/2;
grid++;
/* creation of array A[grid] */
A[grid]=malloc(N2*M2*K2*sizeof(float));
oper(A[grid-1],N1,M1,K1,A[grid],N2,M2,K2);
N1=N2;
M1=M2;
K1=K2;
} while (N2>2 && grid<Narr)

for(i=0;i<=grid;i++)
	free(A[i]);
return 0;
}

Пример 6. Многообластная задача

/*  PROGRAM TASKS                                 */
/* rectangular grid is distributed on two blocks  */
/*                                                */
/*                     <------- A2,B2 ----->      */
/*                     0    1   2    ... N2       */
/*       <--- A1,B1 -------->                     */
/*       0   1   ...  N1-1  N1  N1+1 ... N1+N2-1  */
/*------------------------------------------------*/
/* 0   | .   .         .    .   .    ...  .       */
/* ... |                                          */
/* K-1 | .   .         .    .   .    ...  .       */
/**************************************************/
#include <stdlib.h>
#include <math.h>

#define DVM(dvmdir)
#define DO(v,l,h,s)   for(v=l; v<=h; v+=s)
#define FOR(v,n)  for(v=0; v<n; v++)
#define Max(a,b) ((a)>(b)?(a):(b))

#define NUMBER_OF_PROCESSORS() 1

#define K   8
#define N1  4
#define N2  K-N1
#define ITMAX 10

DVM(PROCESSORS)  void * P[NUMBER_OF_PROCESSORS()];
DVM(TASK) void * MB[2];
double eps0;
DVM(DISTRIBUTE) float (*A1)[K], (*A2)[K];
DVM(ALIGN)      float (*B1)[K], (*B2)[K];

int main(int argn, char** args)
{
int i,j, it;
int NP;

printf("---------- starting --------------\n");

DVM(DEBUG 1 -d0)
{
NP = NUMBER_OF_PROCESSORS() / 2;
}

DVM(MAP MB[0] ONTO P[0:(NP?NP-1:0)]);
DVM(MAP MB[1] ONTO P[NP:(NP?2*NP-1:NP)]);

A1=malloc((N1+1)*K*sizeof(float));
DVM(REDISTRIBUTE A1[][BLOCK] ONTO MB[0]);
B1=malloc((N1+1)*K*sizeof(float));
DVM(REALIGN B1[i][j] WITH A1[i][j]);

A2=malloc((N2+1)*K*sizeof(float));
DVM(REDISTRIBUTE A2[][BLOCK] ONTO MB[1]);
B2=malloc((N2+1)*K*sizeof(float));
DVM(REALIGN B2[i][j] WITH A2[i][j]);

    /* Initialization */
DVM(PARALLEL [i][j] ON A1[i][j])
FOR(i,N1+1)
FOR(j,K)
{if (i==0 || j==0 || j==K-1) {
    A1[i][j] = 0.f;
    B1[i][j] = 0.f;
} else  {
    B1[i][j] = 1.f + i + j ;
    A1[i][j] = B1[i][j];
}}
DVM(PARALLEL [i][j] ON A2[i][j])
FOR(i,N2+1)
FOR(j,K)
{if(i == N2  || j==0 || j==K-1) {
    A2[i][j] = 0.f;
    B2[i][j] = 0.f;
} else {
    B2[i][j] = 1.f + ( i + N1 - 1 ) + j ;
    A2[i][j] = B2[i][j];
}}

DO(it,1,ITMAX,1)
{
    eps0 = 0.;

/*     exchange bounds */

DVM(PARALLEL [j] ON A1[(N1)][j]; REMOTE_ACCESS B2[1][])
    FOR(j,K) A1[N1][j] = B2[1][j];

DVM(PARALLEL [j] ON A2[0][j]; REMOTE_ACCESS B1[N1-1][])
    FOR(j,K) A2[0][j] = B1[N1-1][j];


/* Block task region */
DVM(TASK_REGION MB; REDUCTION MAX(eps0))
{
DVM(ON MB[0])
{
double eps=0.; /* private reduction variable */
DVM(PARALLEL [i][j] ON B1[i][j]; SHADOW_RENEW A1[][])
DO(i,1, N1-1, 1)
DO(j,1, K-2, 1)
   B1[i][j]=(A1[i-1][j]+A1[i][j-1]+A1[i+1][j]+A1[i][j+1])/4.f;

DVM(PARALLEL [i][j] ON A1[i][j]; REDUCTION MAX(eps))
DO(i,1,N1-1,1)
DO(j,1,K-2,1)
        {eps =  Max(fabs(B1[i][j]-A1[i][j]),eps);
        A1[i][j] = B1[i][j];
        }
eps0=Max(eps0,eps);
}

DVM(ON MB[1])
{
double eps=0.; /* private reduction variable */
DVM(PARALLEL [i][j] ON B2[i][j]; SHADOW_RENEW A2[][])
DO(i,1,N2-1,1)
DO(j,1,K-2,1)
  B2[i][j]=(A2[i-1][j]+A2[i][j-1]+A2[i+1][j]+A2[i][j+1])/4.f;

DVM(PARALLEL [i][j] ON A2[i][j]; REDUCTION MAX(eps))
DO(i,1,N2-1,1)
DO(j,1,K-2,1)
        {eps =  Max(fabs(B2[i][j]-A2[i][j]),eps);
        A2[i][j] = B2[i][j];
        }
eps0=Max(eps0,eps);
}
} /* TASK_REGION */

printf("it=%d eps0=%f\n",it,eps0);
}/* DO IT */
return 0;
}

Приложение 2. Сводный синтаксис директив C-DVM

directive	::=	DVM ( DVM-directive [ ; DVM-directive ] )
DVM-directive	::=	specification-directive
	\|	executable-directive
specification-directive	::=	processors-directive
	\|	align-directive
	\|	distribute-directive
	\|	template-directive
	\|	shadow-directive
	\|	shadow-group-directive
	\|	reduction-group-directive
	\|	remote-group-directive
	\|	task-directive
executable-directive	::=	realign-directive
	\|	redistribute-directive
	\|	create-template-directive
	\|	parallel-directive
	\|	remote-access-directive
	\|	create-shadow-group-directive
	\|	shadow-start-directive
	\|	shadow-wait-directive
	\|	reduction-start-directive
	\|	reduction-wait-directive
	\|	prefetch-directive
	\|	reset-directive
	\|	map-directive
	\|	task-region-directive
	\|	on-directive
processors-directive	::=	PROCESSORS
distribute-directive	::=	DISTRIBUTE [ dist-directive-stuff ]
redistribute-directive	::=	REDISTRIBUTE distributee dist-directive-stuff [ NEW ]
dist-directive-stuff	::=	dist-format... [ dist-onto-clause ]
distributee	::=	array-name
dist-format	::=	[BLOCK]
	\|	[GENBLOCK ( block-array-name )]
	\|	[WGTBLOCK ( block-array-name,nblock )]
	\|	[]
dist-onto-clause	::=	ONTO dist-target
dist-target	::=	processors-name [ section-subscript ]…
align-directive	::=	ALIGN [ align-directive-stuff ]
realign-directive	::=	REALIGN alignee align-directive-stuff [ NEW ]
align-directive-stuff	::=	align-source... align-with-clause
alignee	::=	array-name
align-source	::=	[ ]
	\|	[ align-dummy ]
align-dummy	::=	scalar-int-variable
align-with-clause	::=	WITH align-spec
align-spec	::=	align-target [ align-subscript ]…
align-target	::=	array-name
	\|	template-name
align-subscript	::=	[ int-expr ]
	\|	[ align-subscript-use ]
	\|	[ ]
align-subscript-use	::=	[ primary-expr * ] align-dummy [ add-op primary-expr ]
primary-expr	::=	int-constant
	\|	int-variable
	\|	( int-expr )
add-op	::=	+
	\|	-
create-template-directive	::=	CREATE_TEMPLATE template_name size...
parallel-directive	::=	PARALLEL loop-variable... ON iteration-align-spec [ ; reduction-clause] [ ; shadow-renew-clause] [ ; remote-access-clause ] [ ; across-clause ]
iteration-align-spec	::=	align-target iteration-align-subscript...
iteration-align-subscript	::=	[ int-expr ]
	\|	[ do-variable-use ]
	\|	[]
loop-variable	::=	[ do-variable ]
do-variable-use	::=	[ primary-expr * ] do-variable [ add-op primary-expr ]
reduction-clause	::=	REDUCTION [ group_name : ] reduction-op...
reduction-op	::=	reduction-op-name ( reduction-variable )
	\|	reduction-loc-name ( reduction-variable, loc-variable )
reduction-variable	::=	array-name
	\|	scalar-variable-name
reduction-op-name	::=	SUM
	\|	PRODUCT
	\|	MAX
	\|	MIN
	\|	AND
	\|	OR
reduction-loc-name	::=	MAXLOC
	\|	MINLOC
across-clause	::=	ACROSS dependent-array...
dependent-array	::=	dist-array-name dependence...
dependence	::=	[ flow-dep-length : anti-dep-length ]
flow-dep-length	::=	int-constant
anti-dep-length	::=	int-constant
shadow-directive	::=	SHADOW shadow-array...
shadow-array	::=	array-name shadow-edge...
shadow-edge	::=	[ width ]
	\|	[ low-width : high-width ]
width	::=	int-expr
low-width	::=	int-expr
high-width	::=	int-expr
shadow-renew-clause	::=	SHADOW_RENEW renewee...
renewee	::=	dist-array-name [ shadow-edge ]… [CORNER]
remote-access-directive	::=	REMOTE_ACCESS [ remote-group-name : ] regular-reference...
regular-reference	::=	dist-array-name [ regular-subscript ]…
regular-subscript	::=	[ int-expr ]
	\|	[ do-variable-use ]
	\|	[ ]
remote-access-clause	::=	remote-access-directive
prefetch-directive	::=	PREFETCH group-name
reset-directive	::=	RESET group-name
shadow-group-directive	::=	CREATE_SHADOW_GROUP shadow-group-name : renewee...
shadow-start-directive	::=	SHADOW_START shadow-group-name
shadow-wait-directive	::=	SHADOW_WAIT shadow-group-name
shadow-renew-clause	::=	. . .
	\|	\| shadow-start-directive
	\|	\| shadow-wait-directive
reduction-group-directive	::=	REDUCTION_GROUP
reduction-start-directive	::=	REDUCTION_START reduction-group-name
reduction-wait-directive	::=	REDUCTION_WAIT reduction-group-name
task-directive	::=	TASK
map-directive	::=	MAP task-name [ task-index ]
		ONTO processors-name [ section-subscript ]…
dist-target	::=	. . .
	\|	task-name [ task-index ]
block-task-region	::=	DVM( task-region-directive ) {
		on-block...
		}
task-region-directive	::=	TASK_REGION task-name [ ; reduction-clause]
on-block	::=	DVM( on-directive ) operator
on-directive	::=	ON task-name [ task-index ]
loop-task-region	::=	DVM( task-region-directive ) {
		parallel-task-loop
		}
parallel-task-loop		DVM( parallel-task-loop-directive )
		do-loop
parallel-task-loop-directive	::=	PARALLEL [ do-variable ] ON task-name [ do-variable ]

copy-flag-directive	::=	COPY_FLAG
copy-start-directive	::=	COPY_START flag_addr
copy-wait-directive	::=	COPY_WAIT flag_addr
copy-directive	::=	COPY

C-DVM - оглавление

Часть 1 (1-4)

Часть 2 (5-11)

Часть 3 (Приложения)