#include<stdlib.h>
#include<stdio.h>
#include<math.h>
#include<time.h>
#include<omp.h>

#define N_MAX 1000 /* 1,000 */

/* Sequential Program to Calculate Gravitational Evolution */

double elapsed_time(const timespec_t t2, const timespec_t t1);

void main(int argc, char *argv[])
{   double dt,dtsq,rsqinv,ftmp,gamma,dx,dy,fx,fy,dVx,dVy;
    double *m,*x,*y,*Vx,*Vy,P,td;
    const double G=6.673;
    int    n,t,nt,i,j,p;
    timespec_t t1,t2;
    
    if (argc > 1) {
      n = atoi(argv[1]);
    }
    n = ( n > 0 && n <= N_MAX) ? n : N_MAX;

    p=omp_get_max_threads();

    nt=11; dt=1e-5; dtsq=0.5*dt*dt;

    /* allocate arrays */
    m= (double*) malloc(n*sizeof(double));
    x= (double*) malloc(n*sizeof(double));
    y= (double*) malloc(n*sizeof(double));
    Vx=(double*) malloc(n*sizeof(double));
    Vy=(double*) malloc(n*sizeof(double));
	
    /* initialize arrays */
# pragma omp parallel for shared(m,x,y,Vx,Vy) private(i) schedule(runtime)
    for(i=0;i<n;i++)
    { m[i]=1;
      x[i]=(10.0*i)/n; y[i]=x[i]; 
      Vx[i]=0;  Vy[i]=0;
    }


    /* show initial condition */
    printf("t = 0, x1 = %lf, y1 = %lf \n",x[0],y[0]);
    for(j=0,P=0;j<n;j++) P+=m[j]*(Vx[j]+Vy[j]);
    printf("total momentum = %lf \n",P);
	    

    clock_gettime(CLOCK_SGI_CYCLE,&t1);


    for(t=1; t<=nt; t++) {
# pragma omp parallel for shared(m,x,y,Vx,Vy) private(i,j,fx,fy,gamma,dx,dy,dVx,dVy,rsqinv,ftmp) schedule(runtime)	                            
    	for(i=0; i<n; i++) { 
	  fx=0; fy=0;
	  for(j=0; j<n; j++) if(j!=i) { 
	     dx=x[j]-x[i]; dy=y[j]-y[i];
	     rsqinv=1/(dx*dx+dy*dy);
	     ftmp=m[i]*m[j]*rsqinv*sqrt(rsqinv);
	     fx+=ftmp*dx; fy+=ftmp*dy;
	  }
	  gamma=G*dt/m[i];
	  dVx=fx*gamma; dVy=fy*gamma;   
	  x[i]+=Vx[i]*dt+dVx*dtsq;
	  y[i]+=Vy[i]*dt+dVy*dtsq;
	  Vx[i]+=dVx; Vy[i]+=dVy;
	} 
         
	printf("t = %d, x1 = %lf, y1 = %lf \n",t,Vx[5],Vy[5]);
	for(j=0,P=0;j<n;j++) P+=m[j]*(Vx[j]+Vy[j]);
	printf("total momentum = %lf \n",P);
	    

    }

    clock_gettime(CLOCK_SGI_CYCLE,&t2);
    td = elapsed_time(t2,t1);
    printf("\b\b\btime per iteration = %6.1f ns\n", (td * 1E9)/nt);
    printf("\b\b\btime per body = %6.1f ns\n", (td * 1E9)/(nt * n));

    printf("with %d bodies and %d processors \n",n,p);

}


/*
 * elapsed time in seconds for POSIX-compliant clocks
 */
double elapsed_time(const timespec_t t2, const timespec_t t1)
{
   return   (((double)t2.tv_sec) + ((double)t2.tv_nsec / 1e9)) 
          - (((double)t1.tv_sec) + ((double)t1.tv_nsec / 1e9));
}

/*

cc -o assg2 assg2_1.c -lm

cc -mp -o assg2 assg2_2.c -lm
env OMP_NUM_THREADS=4 assg2 100

*/
