#include<stdlib.h>
#include<stdio.h>
#include<math.h>
#include<time.h>
#include<omp.h>
#define N_MAX 1000 /* 1,000 */
#define SCHED static

/* Parallel Program to Calculate Gravitational Evolution */
/* Use Symmetry of Force Matrix */
/* Use Matrices Explicitly */
/* Store Lower Triangle in a Vector */

double elapsed_time(const timespec_t t2, const timespec_t t1);

void main(int argc,char* argv[])
{	double dt,dtsq,rsqinv,ftmp,gamma,dx,dy,dVx,dVy;
	double *m,*x,*y,*Vx,*Vy,fx,fy,*Fx,*Fy,Px,Py,P,td;
	const double G=6.673;
	double *Tx,*Ty;
	int    n,N,t,nt,i,j,k,p,mid,R;
	timespec_t t1,t2;
    
        if (argc > 1) n=atoi(argv[1]);
	n=( n > 0 && n <= N_MAX ) ? n : N_MAX;

	N=n+n*(n-1)/2;

	nt=3; dt=1e-1/nt; dtsq=0.5*dt*dt;
	mid=(int)ceil((double)n*0.5)-1;

	p=omp_get_max_threads();

	/* allocate arrays */
	m= (double*) malloc(n*sizeof(double));
	x= (double*) malloc(n*sizeof(double));
	y= (double*) malloc(n*sizeof(double));
	Vx=(double*) malloc(n*sizeof(double));
	Vy=(double*) malloc(n*sizeof(double));
	Fx=(double*) malloc(n*sizeof(double));
	Fy=(double*) malloc(n*sizeof(double));

       	Tx=(double*) malloc(N*sizeof(double));
	Ty=(double*) malloc(N*sizeof(double));

	#pragma omp parallel for shared(m,x,y,Vx,Vy,Fx,Fy) \
	  private(i) schedule(static)	       
        /* initialize arrays */
	for(i=0;i<n;i++)
	{	m[i]=1.0;
		x[i]=(10.0*i)/(n-1); y[i]=x[i]; 
		Vx[i]=0.0;  Vy[i]=0.0;
 		Fx[i]=0.0;  Fy[i]=0.0;
	}

#pragma omp parallel for shared(Tx,Ty) \
	  private(i,j,k) schedule(SCHED)	       
	for(i=0;i<n;i++) for(j=0;j<i;j++)
	  { k=i+(2*n-j)*(j-1)/2; 
	    Tx[k]=0.0; Ty[k]=0.0; 
	  }


	/* show initial momentum */
	for(j=0,Px=0,Py=0;j<n;j++) { Px+=m[j]*Vx[j]; Py+=m[j]*Vy[j];}
	P=Px*Px+Py*Py; printf("\nt = 0, total momentum = %lf \n",P);
        printf("Vx(mid) = %lf \n",Vx[mid]);    
	

	clock_gettime(CLOCK_SGI_CYCLE,&t1);

 
#pragma omp parallel shared(m,x,y,Vx,Vy,Fx,Fy,Tx,Ty,Px,Py) \
	  private(i,j,k,t,dx,dy,fx,fy,ftmp,rsqinv,dVx,dVy,gamma)	       	
      	for(t=1; t<=nt; t++) {	
		
/*--------------------------------------------------------------------------------------*/
#pragma omp for schedule(SCHED)     
	for(i=0; i<n; i++) {
			for(j=0; j<i; j++) { 
				dx=x[j]-x[i]; dy=y[j]-y[i];
				rsqinv=1/(dx*dx+dy*dy);
				ftmp=m[i]*m[j]*rsqinv*sqrt(rsqinv);
				fx=ftmp*dx; fy=ftmp*dy;
	
				k=i+(2*n-j-1)*j/2;
				Tx[k]=fx; Ty[k]= fy;

			}
	}

#pragma omp for schedule(static)  
	for(i=0; i<n; i++) {Fx[i]=0.0; Fy[i]=0.0;}

#pragma omp for schedule(SCHED)  
	for(i=0; i<n; i++) 
	{
	  for(j=0;j<i;j++)
	  { k=i+(2*n-j-1)*j/2;
	    Fx[i]+=Tx[k]; Fy[i]+=Ty[k];
	  }
	}

	for(i=0; i<n; i++) 
#pragma omp for schedule(SCHED)	  
	  for(j=0;j<i;j++)
	  { k=i+(2*n-j-1)*j/2;
	    Fx[j]-=Tx[k]; Fy[j]-=Ty[k];
	  }
#pragma omp for schedule(static)
	for(i=0; i<n; i++) {
       			gamma=G*dt/m[i];
			dVx=Fx[i]*gamma; dVy=Fy[i]*gamma;   
			x[i]+=Vx[i]*dt+dVx*dtsq;
			y[i]+=Vy[i]*dt+dVy*dtsq;
			Vx[i]+=dVx; Vy[i]+=dVy;
	}

/*--------------------------------------------------------------------------------------*/

/* compute and display net momentum */
#pragma omp master
		{ Px=0; Py=0; }
		
#pragma omp for schedule(static) reduction(+:Px,Py)
		for(i=0;i<n;i++) { Px+=m[i]*Vx[i]; Py+=m[i]*Vy[i];}

#pragma omp master
		{ P=Px*Px+Py*Py; printf("t = %d, total momentum = %lf \n",t,P); 
		printf("Vx(mid) = %lf \n",Vx[mid]);
		}
	    

	}

	clock_gettime(CLOCK_SGI_CYCLE,&t2);
	td = elapsed_time(t2,t1);
        R = ceil(((double)n*(n-1)*nt)/td);
	printf("\nn = %d p = %d t = %6.1f R = %d\n",n,p,(td * 1e9) / (nt*n),R);
}

/*
 * elapsed time in seconds for POSIX-compliant clocks
 */
double elapsed_time(const timespec_t t2, const timespec_t t1)
{
   return   (((double)t2.tv_sec) + ((double)t2.tv_nsec / 1e9)) 
          - (((double)t1.tv_sec) + ((double)t1.tv_nsec / 1e9));
}

/* just a reminder :)
cc -mp -o parb parb.c -lm
*/

