/*
**  jacobi.c - Jacobi method with over relaxation.
**
**  99/06/09 S.Satoh @ RWCP
**  99/09/17 change external pointers into firstprivate
*/

#include <stdio.h>
#include <sys/time.h>
#include <assert.h>

#define DEFAULT_PROBLEM_SIZE	4096
#define DEFAULT_LINE_SIZE	64
#define EPSILON			1.0e-5
#define OMEGA			0.6

static int N = DEFAULT_PROBLEM_SIZE;
static int lineSize = DEFAULT_LINE_SIZE;
static double *A;
static double *x;
static double *y;
static double *b;
static double eps2 = EPSILON * EPSILON;
static double omega = OMEGA;

static double init_time;
static double solve_time;
static double verify_time;

static void solve(void);
static void make_data(void);
static void *align_malloc(int size, int align);
static void verify(void);
static void parse_option(int argc, char **argv);
static void printout(void);

int main(int argc, char **argv) {
    double total;

    parse_option(argc, argv);
    printf("Jacobi's iterative solver: n = %d, l = %d, omega = %f\n",
	   N, lineSize, omega);fflush(stdout);
    make_data();
    printf("Initialized\n");fflush(stdout);
    solve();
    verify();
    total = init_time + solve_time + verify_time;
    printf("init_time:   %15.6f sec\n", init_time);
    printf("solve_time:  %15.6f sec\n", solve_time);
    printf("verify_time: %15.6f sec\n", verify_time);
    printf("total:       %15.6f sec\n", total);

    exit(0);
}

static void solve(void) {
    struct timeval t;
    double s_time, e_time;
    double err;

#pragma omp flush
    gettimeofday(&t, NULL);
    s_time = (double)t.tv_sec + (1.0e-6)*t.tv_usec;
    
#pragma omp parallel firstprivate(A,b,x,y,N,omega,eps2)
    {
	int i, j;
	int iter = 0;
	do {
#pragma omp barrier
#pragma omp single
	    err = 0.0;
#pragma omp for reduction(+:err) schedule(static)
	    for (i = 0; i < N; i++) {
		y[i] = b[i];
		for (j = 0; j < N; j++) {
		    if (j != i) y[i] = y[i] - A[i*N+j] * x[j];
		}
		y[i] = x[i] + omega * (y[i]/A[i*N+i] - x[i]);
		err += (x[i]-y[i]) * (x[i]-y[i]);
	    }
#pragma omp for schedule(static) nowait
	    for (i = 0; i < N; i++) {
		x[i] = y[i];
	    }
#pragma omp master	
	    {printf("iter %d: err**2 = %e\n", iter, err);fflush(stdout);}
	    iter++;
	} while (iter < 20 && err > eps2);
    }
#pragma omp flush
    gettimeofday(&t, NULL);
    e_time = (double)t.tv_sec + (1.0e-6)*t.tv_usec;
    solve_time = e_time - s_time;
}

static void make_data(void) {
    struct timeval t;
    double s_time, e_time;
    int i, j;

    /* allocate memory */
    A = (double *)align_malloc(N*N*sizeof(double), lineSize);
    assert(A != NULL);

    x = (double *)align_malloc(N*sizeof(double), lineSize);
    assert(x != NULL);

    y = (double *)align_malloc(N*sizeof(double), lineSize);
    assert(y != NULL);

    b = (double *)align_malloc(N*sizeof(double), lineSize);
    assert(b != NULL);

    printf("allocated\n");fflush(stdout);

#pragma omp flush
    gettimeofday(&t, NULL);
    s_time = (double)t.tv_sec + (1.0e-6)*t.tv_usec;

#pragma omp parallel firstprivate(A,b,x,y,N)
    {
	/* initialize */
#pragma omp for private(j) schedule(static) nowait
	for (i = 0; i < N; i++) {
	    for (j = 0; j < N; j++) {
		A[i*N + j] = 1.0;
	    }
	    A[i*N + i] = (double)N;
	}

#pragma omp for private(j) schedule(static)
	for (i = 0; i < N; i++) {
	    b[i] = 0.0;
	    for (j = 0; j < N; j++) {
		b[i] += A[i*N + j];
	    }
	}
    }
#pragma omp flush
    gettimeofday(&t, NULL);
    e_time = (double)t.tv_sec + (1.0e-6)*t.tv_usec;
    init_time = e_time - s_time;
}

static void *align_malloc(int size, int align) {
    void *p;
    p = (void *)malloc(size + align);
    return ((void *)((((int)p + align - 1) / align) * align));
}

static void verify(void) {
    struct timeval t;
    double	s_time, e_time;
    int i;
    double err;

#pragma omp flush
    gettimeofday(&t, NULL);
    s_time = (double)t.tv_sec + (1.0e-6)*t.tv_usec;
    err = 0.0;

#pragma omp parallel for reduction(+:err) schedule(static) firstprivate(x,N)
    for (i = 0; i < N; i++) {
	err += (x[i] - 1.0) * (x[i] - 1.0);
    }

    printf("Error**2 = %e, Epsilon**2 = %e\n", err, eps2);
    if (err < eps2) {
	printf("Verification OK\n");
    } else {
	printf("Verification Failed\n");
    }

#pragma omp flush
    gettimeofday(&t, NULL);
    e_time = (double)t.tv_sec + (1.0e-6)*t.tv_usec;
    verify_time = e_time - s_time;
}

static void parse_option(int argc, char **argv) {
    int i;
    int val;
    for (i = 1; i < argc; i++) {
	if (strcmp(argv[i], "-n") == 0) {
	    i++;
	    if (i < argc) {
		sscanf(argv[i], "%d", &val);
		N = val;
	    } else {
		fprintf(stderr, "error: no argument for -n option\n");
		exit(1);
	    }
	} else if (strcmp(argv[i], "-l") == 0) {
	    i++;
	    if (i < argc) {
		sscanf(argv[i], "%d", &val);
		lineSize = val;
	    } else {
		fprintf(stderr, "error: no argument for -l option\n");
		exit(1);
	    }
	} else {
	    fprintf(stderr, "error: unknown option: %s\n", argv[i]);
	    exit(1);
	}
    }
}

static void printout(void) {
    int i, j;
    
    for (i = 0; i < N; i++) {
	printf("A[%d, *] =", i);
	for (j = 0; j < N; j++) {
	    printf(" %10f", A[i*N + j]);
	}
	printf("\n");
    }
    printf("B[*] =");
    for (i = 0; i < N; i++) {
	printf(" %10f", b[i]);
    }
    printf("\n");
    printf("X[*] =");
    for (i = 0; i < N; i++) {
	printf(" %10f", x[i]);
    }
    printf("\n");
    printf("Y[*] =");
    for (i = 0; i < N; i++) {
	printf(" %10f", y[i]);
    }
    printf("\n");
}
