#include<stdlib.h>
#include<stdio.h>
#include<math.h>
#include<time.h>
#include<omp.h>
#define N_MAX 10000 /* 10,000 */

/* Sequential Program to Calculate Gravitational Evolution */

double elapsed_time(const timespec_t t2, const timespec_t t1);

void main(int argc,char* argv[])
{	double dt,dtsq,rsqinv,ftmp,gamma,dx,dy,dVx,dVy;
	double *m,*x,*y,*Vx,*Vy,*Fx,*Fy,Px,Py,P,td;
	const double G=6.673;
	int    n,t,nt,i,j,p,mid,R;
	timespec_t t1,t2;
    
        if (argc > 1) n=atoi(argv[1]);
	n=( n > 0 && n <= N_MAX ) ? n : N_MAX;
	nt=3; dt=1e-1/nt; dtsq=0.5*dt*dt;
        gamma=G*dt;
	mid=(int)ceil((double)n*0.5)-1;

	/*p=omp_get_max_threads();*/

	/* allocate arrays */
	m= (double*) malloc(n*sizeof(double));
	x= (double*) malloc(n*sizeof(double));
	y= (double*) malloc(n*sizeof(double));
	Vx=(double*) malloc(n*sizeof(double));
	Vy=(double*) malloc(n*sizeof(double));
	Fx=(double*) malloc(n*sizeof(double));
	Fy=(double*) malloc(n*sizeof(double));

	/*#pragma omp parallel for shared(m,x,y,Vx,Vy,Fx,Fy) \
	  private(i) schedule(runtime)*/	       
        /* initialize arrays */
	for(i=0;i<n;i++)
	{	m[i]=1.0;
		x[i]=(10.0*i)/(n-1); y[i]=x[i]; 
		Vx[i]=0.0;  Vy[i]=0.0;
 		Fx[i]=0.0;  Fy[i]=0.0;
	}


	/* show initial momentum */
	for(j=0,Px=0,Py=0;j<n;j++) { Px+=m[j]*Vx[j]; Py+=m[j]*Vy[j];}
	P=Px*Px+Py*Py; printf("\nt = 0, total momentum = %lf \n",P);
        printf("Vx(mid) = %lf \n",Vx[mid]);    
	

	clock_gettime(CLOCK_SGI_CYCLE,&t1);

	/*#pragma omp parallel shared(m,x,y,Vx,Vy,Fx,Fy) \
	  private(i,j,t,dx,dy,ftmp,rsqinv,dVx,dVy)*/	       	
	for(t=1; t<=nt; t++) {	

	  /*#pragma omp for schedule(static)*/                           
		for(i=0; i<n; i++) { /* calculate forces */
		  Fx[i]=0.0; Fy[i]=0.0;
			for(j=0; j<n; j++) if(j!=i) { 
				dx=x[j]-x[i]; dy=y[j]-y[i];
				rsqinv=1/(dx*dx+dy*dy);
				ftmp=m[j]*rsqinv*sqrt(rsqinv);
				Fx[i]+=ftmp*dx; Fy[i]+=ftmp*dy;
			}
		} 
         
		/*#pragma omp for schedule(static)*/
		for(i=0; i<n; i++) { /* update position/velocity */
			dVx=Fx[i]*gamma; dVy=Fy[i]*gamma;   
			x[i]+=Vx[i]*dt+dVx*dtsq;
			y[i]+=Vy[i]*dt+dVy*dtsq;
			Vx[i]+=dVx; Vy[i]+=dVy;
		}
		
/* compute and display net momentum */

/*#pragma omp master*/
		{ Px=0; Py=0; }
		
		/*#pragma omp for schedule(static) reduction(+:Px,Py)*/
		for(i=0;i<n;i++) { Px+=m[i]*Vx[i]; Py+=m[i]*Vy[i];}

		/*#pragma omp master*/
		{ P=Px*Px+Py*Py; printf("t = %d, total momentum = %lf \n",t,P); 
		printf("Vx(mid) = %lf \n",Vx[mid]);
		}
	    

	}

	clock_gettime(CLOCK_SGI_CYCLE,&t2);
	td = elapsed_time(t2,t1);
	/*printf("\nn=%d, p=%d, t = %6.1f ns\n",n,p,(td * 1e9) / (nt*n));*/
	printf("\nn=%d p=0 t = %6.1f ns\n",n,(td * 1e9) / (nt*n));
        R = ceil(((double)n*(n-1)*nt)/td);
        printf("over k = %d iterations, reaction rate R = %d per sec\n\n",nt,R);

	/* deallocate arrays */
	free(m);
	free(x);
	free(y);
	free(Vx);
	free(Vy);
	free(Fx);
	free(Fy);

}

/*
 * elapsed time in seconds for POSIX-compliant clocks
 */
double elapsed_time(const timespec_t t2, const timespec_t t1)
{
   return   (((double)t2.tv_sec) + ((double)t2.tv_nsec / 1e9)) 
          - (((double)t1.tv_sec) + ((double)t1.tv_nsec / 1e9));
}

/* just a reminder :)
cc -mp -o seqa seqa.c -lm
cc -o seqa seqa.c -lm
*/
