//-------------------------------------------------------------------------//
//                                                                         //
//        N  A  S     P A R A L L E L     B E N C H M A R K S  3.3         //
//                                                                         //
//            C U D A      M U L T I - Z O N E    V E R S I O N            //
//                                                                         //
//                              L U - M Z                                  //
//                                                                         //
//-------------------------------------------------------------------------//
//                                                                         //
//    This benchmark is a CUDA version of the NPB LU code.                 //
//    Refer to NAS Technical Reports 95-020 for details.                   //
//                                                                         //
//    Permission to use, copy, distribute and modify this software         //
//    for any purpose with or without fee is hereby granted.  We           //
//    request, however, that all derived work reference the NAS            //
//    Parallel Benchmarks 3.3. This software is provided "as is"           //
//    without express or implied warranty.                                 //
//                                                                         //
//    Information on NPB 3.3, including the technical report, the          //
//    original specifications, source code, results and information        //
//    on how to submit new results, is available at:                       //
//                                                                         //
//           http://www.nas.nasa.gov/Software/NPB/                         //
//                                                                         //
//    Send comments or suggestions to  npb@nas.nasa.gov                    //
//                                                                         //
//          NAS Parallel Benchmarks Group                                  //
//          NASA Ames Research Center                                      //
//          Mail Stop: T27A-1                                              //
//          Moffett Field, CA   94035-1000                                 //
//                                                                         //
//          E-mail:  npb@nas.nasa.gov                                      //
//          Fax:     (650) 604-3957                                        //
//                                                                         //
//-------------------------------------------------------------------------//

//-------------------------------------------------------------------------//
//                                                                         //
// Authors of original FORTRAN version: S. Weeratunga, V. Venkatakrishnan  //
//                                      E. Barszcz, M. Yarrow,             //
//                                      R.F. Van der Wijngaart             //
//                                                                         //
// CUDA implementation by: J. Duemmler                                     //
//                                                                         //
//-------------------------------------------------------------------------//

#include <stdio.h>
#include <math.h>
#include "main.h"

//---------------------------------------------------------------------
//
//   driver for the performance evaluation of the solver for
//   five coupled parabolic/elliptic partial differential equations.
//
//---------------------------------------------------------------------
int main (int argc, char **argv) {
	char benchclass = argc > 1 ? argv[1][0] : 'S';
	LUMZ *lumz = new LUMZ();

	printf("\n\n NAS Parallel Benchmarks (NPB3.3-MZ-CUDA) - LU Multi-Zone CUDA Benchmark\n\n");
	lumz->env_setup();

	//---------------------------------------------------------------------
	//   set up domain sizes
	//---------------------------------------------------------------------
	lumz->zone_setup(benchclass);

	//---------------------------------------------------------------------
	//   read input data
	//---------------------------------------------------------------------
	lumz->read_input();

	//---------------------------------------------------------------------
	//   allocate CUDA device memory
	//---------------------------------------------------------------------
	lumz->allocate_device_memory();

	//---------------------------------------------------------------------
	//   set up coefficients
	//---------------------------------------------------------------------
	lumz->setcoeff();

	//---------------------------------------------------------------------
	//   set the boundary values for dependent variables
	//---------------------------------------------------------------------
	lumz->setbv();

	//---------------------------------------------------------------------
	//   set the initial values for dependent variables
	//---------------------------------------------------------------------
	lumz->setiv();

	//---------------------------------------------------------------------
	//   compute the forcing term based on prescribed exact solution
	//---------------------------------------------------------------------
	lumz->erhs();

	//---------------------------------------------------------------------
	//   compute the steady-state residuals
	//---------------------------------------------------------------------
	lumz->rhs();

	//---------------------------------------------------------------------
	//   perform one SSOR iteration to touch all data pages
	//---------------------------------------------------------------------
	lumz->exch_qbc();
	lumz->ssor();

	//---------------------------------------------------------------------
	//   reset the boundary and initial values
	//---------------------------------------------------------------------
	lumz->setbv();
	lumz->setiv();
	//---------------------------------------------------------------------
	//   compute the steady-state residuals
	//---------------------------------------------------------------------
	lumz->rhs();

	//---------------------------------------------------------------------
	//   begin pseudo-time stepping iterations
	//---------------------------------------------------------------------
	lumz->time_steps();

	//---------------------------------------------------------------------
	//   compute the solution error and surface integral
	//---------------------------------------------------------------------
	lumz->error();
	lumz->pintgr();

	//---------------------------------------------------------------------
	//   verification test
	//---------------------------------------------------------------------
	bool verified = lumz->verify(benchclass);
	lumz->print_results(verified, benchclass);

	//---------------------------------------------------------------------
	//      More timers
	//---------------------------------------------------------------------
	lumz->print_timers();

	delete lumz;

	return EXIT_SUCCESS;
}

LUMZ::LUMZ() {
	timers = new Timers();
	get_cuda_info();
}

LUMZ::~LUMZ() {
	delete [] nx;
	delete [] ny;
	delete [] nz;
	delete [] iz_west;
	delete [] iz_east;
	delete [] iz_south;
	delete [] iz_north;
	free_device_memory();
}


void LUMZ::read_input() {
	//---------------------------------------------------------------------
	//    if input file does not exist, it uses defaults
	//       ipr = 1 for detailed progress output
	//       inorm = how often the norm is printed (once every inorm iterations)
	//       itmax = number of pseudo time steps
	//       dt = time step
	//       omega 1 over-relaxation factor for SSOR
	//       tolrsd = steady state residual tolerance levels
	//       nx, ny, nz = number of grid points in x, y, z directions
	//---------------------------------------------------------------------
	FILE *file = fopen("inputlu-mz.data", "rt");
	if (file != 0L) {
		char line[1024];
		int itimer, itmax_in;
		double dt_in, omega_in;
		printf("Reading from input file inputlu-mz.data\n");

		fgets(line, sizeof(line)-1, file);
		fgets(line, sizeof(line)-1, file);
		fgets(line, sizeof(line)-1, file);
		sscanf(line, "%i %i", &ipr, &inorm);
		fgets(line, sizeof(line)-1, file);
		fgets(line, sizeof(line)-1, file);
		fgets(line, sizeof(line)-1, file);
		sscanf(line, "%i", &itmax_in);
		fgets(line, sizeof(line)-1, file);
		fgets(line, sizeof(line)-1, file);
		fgets(line, sizeof(line)-1, file);
		sscanf(line, "%lf", &dt_in);
		fgets(line, sizeof(line)-1, file);
		fgets(line, sizeof(line)-1, file);
		fgets(line, sizeof(line)-1, file);
		sscanf(line, "%lf", &omega_in);
		fgets(line, sizeof(line)-1, file);
		fgets(line, sizeof(line)-1, file);
		fgets(line, sizeof(line)-1, file);
		sscanf(line, "%lf %lf %lf %lf %lf", &tolrsd[0], &tolrsd[1], &tolrsd[2], &tolrsd[3], &tolrsd[4]);
		fgets(line, sizeof(line)-1, file);
		fgets(line, sizeof(line)-1, file);
		fgets(line, sizeof(line)-1, file);
		sscanf(line, "%i", &itimer);
		fclose(file);
		
		if (itmax_in > 0) itmax = itmax_in;
		if (dt_in != 0.0) dt = dt_in;
		if (omega_in != 0.0) omega = omega_in;
		if (itimer > 0) {
			Timers::init_timer();
			timeron = true;
		}
	} else {
		ipr = IPR_DEFAULT;
		omega = OMEGA_DEFAULT;
		inorm = itmax;

		tolrsd[0] = TOLRSD1_DEF;
		tolrsd[1] = TOLRSD2_DEF;
		tolrsd[2] = TOLRSD3_DEF;
		tolrsd[3] = TOLRSD4_DEF;
		tolrsd[4] = TOLRSD5_DEF;
	}

	printf(" Number of zones: %3d x %3d\n", x_zones, y_zones);
	printf(" Iterations: %3d    dt: %10.6F\n\n", itmax, dt);
}

void LUMZ::env_setup() {
	char *envstr = getenv("NPB_VERBOSE");
	if (envstr != NULL) {
		int iverbose;
		sscanf(envstr, "%i", &iverbose);
		npb_verbose = max(iverbose, 0);
	} else npb_verbose = false;
}

//---------------------------------------------------------------------
//  verification routine                         
//---------------------------------------------------------------------
bool LUMZ::verify(const char benchclass) {
	bool verified = true;

	//---------------------------------------------------------------------
	//   tolerance level
	//---------------------------------------------------------------------
	double epsilon = 1.0e-08;

	int itmaxref;
	double xcrref[5], xceref[5], xciref, dtref;

	get_ref_values(benchclass, itmaxref, dtref, xcrref, xceref, xciref);

	//---------------------------------------------------------------------
	//    Compute the difference of solution values and the known reference values.
	//---------------------------------------------------------------------
	double xcrdif[5], xcedif[5], xcidif;
	for (int m = 0; m < 5; m++) {
		xcrdif[m] = fabs((rsdnm[m]-xcrref[m])/xcrref[m]);
		xcedif[m] = fabs((errnm[m]-xceref[m])/xceref[m]);
	}
	xcidif = fabs((frc-xciref)/xciref);

	//---------------------------------------------------------------------
	//    Output the comparison of computed results to known cases.
	//---------------------------------------------------------------------
	printf("\n Verification being performed for class %c\n", benchclass);
	printf(" Accuracy setting for epsilon = %20.13E\n", epsilon);
	if (fabs(dt-dtref) > epsilon) {
		verified = false;
		printf(" DT does not match the reference value of %15.8E\n", dtref);
	} else if (itmax != itmaxref) {
		verified = false;
		printf(" ITMAX does not match the reference value of %5d\n", itmaxref);
	}

	printf(" Comparison of RMS-norms of residual\n");
	for (int m = 0; m < 5; m++) {
		if (xcrdif[m] <= epsilon) printf("          %2d  %20.13E%20.13E%20.13E\n", m+1, rsdnm[m], xcrref[m], xcrdif[m]);
		else {
			verified = false;
			printf(" FAILURE: %2d  %20.13E%20.13E%20.13E\n", m+1, rsdnm[m], xcrref[m], xcrdif[m]);
		}
	}

	printf(" Comparison of RMS-norms of solution error\n");
	for (int m = 0; m < 5; m++) {
		if (xcedif[m] <= epsilon) printf("          %2d  %20.13E%20.13E%20.13E\n", m+1, errnm[m], xceref[m], xcedif[m]);
		else {
			verified = false;
			printf(" FAILURE: %2d  %20.13E%20.13E%20.13E\n", m+1, errnm[m], xceref[m], xcedif[m]);
		}
	}

	printf(" Comparison of surface integral\n");
	if (xcidif <= epsilon) printf("              %20.13E%20.13E%20.13E\n", frc, xciref, xcidif);
	else {
		verified = false;
		printf(" FAILURE:     %20.13E%20.13E%20.13E\n", frc, xciref, xcidif);
	}

	if (verified) printf(" Verification Successful\n");
	else printf(" Verification failed\n");

	return verified;
}

void LUMZ::print_results(const bool verified, const char benchclass) {

	printf("\n\n LU-MZ Benchmark Completed.\n");
	printf(" Class           =             %12c\n", benchclass);
	printf(" Size            =           %4dx%4dx%4d\n", gx_size, gy_size, gz_size);
	printf(" Iterations      =             %12d\n", itmax);
	printf(" Time in seconds =             %12.2f\n", maxtime);

	double mflops = compute_mflops();
	printf(" Mop/s total     =             %12.2f\n", mflops);
	printf(" Operation type  =           floating point\n");
	if (verified) printf(" Verification    =               SUCCESSFUL\n");
	else printf(" Verification    =             UNSUCCESSFUL\n");

	printf(" Version         =             %12s\n", NPB_VERSION);

	printf("\n");
	printf(" CUDA device     = %24s\n", CUDAname);
	printf(" GPU multiprocs  =             %12d\n", CUDAmp);
	printf(" GPU clock rate  =             %8.3f GHz\n", (double)CUDAclock/1000000.);
	printf(" GPU memory      =             %9.2f MB\n", (double)CUDAmem/(1024.*1024.));
	printf(" GPU mem clock   =             %8.3f GHz\n", (double)CUDAmemclock/1000000.);
	printf(" GPU L2 cache    =             %9.2f KB\n", (double)CUDAl2cache/1024.);

	printf("\n\n");
}

double LUMZ::compute_mflops() const {
	double mflops = 0.0;

	if (maxtime > 0.0) {
		for (int zone = 0; zone < num_zones; zone++) {
			double n3 = (double)(nx[zone]*ny[zone]*nz[zone]);
			double navg = (nx[zone]+ny[zone]+nz[zone])/3.0;
			double nsur = (nx[zone]*ny[zone]+nx[zone]*nz[zone]+ny[zone]*nz[zone])/3.0;
			mflops += (double)itmax*1.0e-6*(1984.77*n3-10923.3*nsur+27770.9*navg-144010.0)/maxtime;
		}
	}

	return mflops;
}

void LUMZ::zone_setup(char benchclass) {
	switch (benchclass) {
		case 's':
		case 'S': gx_size = gy_size = 24; gz_size = 6; x_zones = y_zones = 4; dt = 0.5; itmax = 50; break;
		case 'w':
		case 'W': gx_size = gy_size = 64; gz_size = 8; x_zones = y_zones = 4; dt = 1.5e-3; itmax = 300; break;
		case 'a':
		case 'A': gx_size = gy_size = 128; gz_size = 16; x_zones = y_zones = 4; dt = 2.0; itmax = 250; break;
		case 'b':
		case 'B': gx_size = 304; gy_size = 208; gz_size = 17; x_zones = y_zones = 4; dt = 2.0; itmax = 250; break;
		case 'c':
		case 'C': gx_size = 480; gy_size = 320; gz_size = 28; x_zones = y_zones = 4; dt = 2.0; itmax = 250; break;
		case 'd':
		case 'D': gx_size = 1632; gy_size = 1216; gz_size = 34; x_zones = y_zones = 4; dt = 1.0; itmax = 300; break;
		case 'e':
		case 'E': gx_size = 4224; gy_size = 3456; gz_size = 92; x_zones = y_zones = 4; dt = 0.5; itmax = 300; break;
		case 'f':
		case 'F': gx_size = 12032; gy_size = 8960; gz_size = 250; x_zones = y_zones = 4; dt = 0.2; itmax = 300; break;
		// class K corresponds to LU benchmark class S
		case 'k':
		case 'K': gx_size = gy_size = gz_size = 12; x_zones = y_zones = 1; dt = 0.5; itmax = 50; break;
		// class L corresponds to LU benchmark class W
		case 'l':
		case 'L': gx_size = gy_size = gz_size = 33; x_zones = y_zones = 1; dt = 1.5e-3; itmax = 300; break;
		// class M corresponds to LU benchmark class A
		case 'm':
		case 'M': gx_size = gy_size = gz_size = 64; x_zones = y_zones = 1; dt = 2.0; itmax = 250; break;
		// class N corresponds to LU benchmark class B
		case 'n':
		case 'N': gx_size = gy_size = gz_size = 102; x_zones = y_zones = 1; dt = 2.0; itmax = 250; break;
		// class O corresponds to LU benchmark class C
		case 'o':
		case 'O': gx_size = gy_size = gz_size = 162; x_zones = y_zones = 1; dt = 2.0; itmax = 250; break;
		// class P corresponds to LU benchmark class D
		case 'p':
		case 'P': gx_size = gy_size = gz_size = 408; x_zones = y_zones = 1; dt = 1.0; itmax = 300; break;
		// class Q corresponds to LU benchmark class E
		case 'q':
		case 'Q': gx_size = gy_size = gz_size = 1020; x_zones = y_zones = 1; dt = 0.5; itmax = 300; break;
		default: printf("setparams: Internal error: invalid class %c\n", benchclass); exit(EXIT_FAILURE);
	}

	num_zones = x_zones * y_zones;

	nx = new int [num_zones];
	ny = new int [num_zones];
	nz = new int [num_zones];
	iz_west = new int [num_zones];
	iz_east = new int [num_zones];
	iz_south = new int [num_zones];
	iz_north = new int [num_zones];

	if (npb_verbose > 1) printf("\n Zone sizes:\n");
	for (int y = 0; y < y_zones; y++) {
		for (int x = 0; x < x_zones; x++) {
			int zone_no = x+y*x_zones;
			nx[zone_no] = (x+1)*gx_size/x_zones - x*gx_size/x_zones;
			ny[zone_no] = (y+1)*gy_size/y_zones - y*gy_size/y_zones;
			nz[zone_no] = gz_size;

			int id_west = (x-1+x_zones) % x_zones;
			int id_east = (x+1) % x_zones;
			int jd_south = (y-1+y_zones) % y_zones;
			int jd_north = (y+1) % y_zones;
			iz_west[zone_no] = id_west + y*x_zones;
			iz_east[zone_no] = id_east + y*x_zones;
			iz_south[zone_no] = x + jd_south*x_zones;
			iz_north[zone_no] = x + jd_north*x_zones;

			if (npb_verbose > 1) printf("%5i:  %5i  x%5i  x%5i\n", zone_no+1, nx[zone_no], ny[zone_no], nz[zone_no]);
		}
	}
}

void LUMZ::get_ref_values(const char benchclass, int &itmaxref, double &dtref, double (&xcrref)[5], double (&xceref)[5], double &xciref) const {
	for (int m = 0; m < 5; m++) xcrref[m] = xceref[m] = 1.0;
	xciref = 1.0;
	itmaxref = 0;
	dtref = 0.0;

	if (benchclass == 'S' || benchclass == 's') {
		dtref = 0.5;
		itmaxref = 50;

		//---------------------------------------------------------------------
		//   Reference values of RMS-norms of residual
		//---------------------------------------------------------------------
		xcrref[0] = 0.3778579699366e+01;
		xcrref[1] = 0.3120418698065e+00;
		xcrref[2] = 0.8386213407018e+00;
		xcrref[3] = 0.4452165980488e+00;
		xcrref[4] = 0.7808656756434e+01;

		//---------------------------------------------------------------------
		//   Reference values of RMS-norms of solution error
		//---------------------------------------------------------------------
		xceref[0] = 0.2429480066305e+02;
		xceref[1] = 0.9072817470024e+01;
		xceref[2] = 0.1032621825644e+02;
		xceref[3] = 0.9256791727838e+01;
		xceref[4] = 0.1639045777714e+02;

		//---------------------------------------------------------------------
		//   Reference value of surface integral
		//---------------------------------------------------------------------
		xciref    = 0.4964435445706e+02;
	} else if (benchclass == 'W' || benchclass == 'w') {
		dtref = 1.5e-3;
		itmaxref = 300;
		
		//---------------------------------------------------------------------
		//   Reference values of RMS-norms of residual
		//---------------------------------------------------------------------
		xcrref[0] = 0.8285060230339e+03;
		xcrref[1] = 0.5753415004693e+02;
		xcrref[2] = 0.2023477570531e+03;
		xcrref[3] = 0.1586275182502e+03;
		xcrref[4] = 0.1733925947816e+04;

		//---------------------------------------------------------------------
		//   Reference values of RMS-norms of solution error
		//---------------------------------------------------------------------
		xceref[0] = 0.7514670702651e+02;
		xceref[1] = 0.9776687033238e+01;
		xceref[2] = 0.2141754291209e+02;
		xceref[3] = 0.1685405918675e+02;
		xceref[4] = 0.1856944519722e+03;

		//---------------------------------------------------------------------
		//   Reference value of surface integral
		//---------------------------------------------------------------------
		xciref    = 0.3781055348911e+03;
	} else if (benchclass == 'A' || benchclass == 'a') {
		dtref = 2.0;
		itmaxref = 250;
		
		//---------------------------------------------------------------------
		//   Reference values of RMS-norms of residual
		//---------------------------------------------------------------------
		xcrref[0] = 0.1131574877175e+04;
		xcrref[1] = 0.7965206944742e+02;
		xcrref[2] = 0.2705587159526e+03;
		xcrref[3] = 0.2129567530746e+03;
		xcrref[4] = 0.2260584655432e+04;

		//---------------------------------------------------------------------
		//   Reference values of RMS-norms of solution error
		//---------------------------------------------------------------------
		xceref[0] = 0.1115694885382e+03;
		xceref[1] = 0.1089257673798e+02;
		xceref[2] = 0.2905379922066e+02;
		xceref[3] = 0.2216126755530e+02;
		xceref[4] = 0.2501762341026e+03;

		//---------------------------------------------------------------------
		//   Reference value of surface integral
		//---------------------------------------------------------------------
		xciref    = 0.5904992211511e+03;
	} else if (benchclass == 'B' || benchclass == 'b') {
		dtref = 2.0;
		itmaxref = 250;
 
		//---------------------------------------------------------------------
		//   Reference values of RMS-norms of residual
		//---------------------------------------------------------------------
		xcrref[0] = 0.1734656959567e+05;
		xcrref[1] = 0.1238977748533e+04;
		xcrref[2] = 0.4123885357100e+04;
		xcrref[3] = 0.3613705834056e+04;
		xcrref[4] = 0.3531187871586e+05;

		//---------------------------------------------------------------------
		//   Reference values of RMS-norms of solution error
		//---------------------------------------------------------------------
		xceref[0] = 0.1781612313296e+04;
		xceref[1] = 0.1177971120769e+03;
		xceref[2] = 0.4233792871440e+03;
		xceref[3] = 0.3577260438230e+03;
		xceref[4] = 0.3659958544012e+04;

		//---------------------------------------------------------------------
		//   Reference value of surface integral
		//---------------------------------------------------------------------
		xciref    = 0.6107041476456e+04;
	} else if (benchclass == 'C' || benchclass == 'c') {
		dtref = 2.0;
		itmaxref = 250;

		//---------------------------------------------------------------------
		//   Reference values of RMS-norms of residual
		//---------------------------------------------------------------------
		xcrref[0] = 0.4108743427233e+05;
		xcrref[1] = 0.3439004802235e+04;
		xcrref[2] = 0.9961331392486e+04;
		xcrref[3] = 0.8321426758084e+04;
		xcrref[4] = 0.7463792419218e+05;

		//---------------------------------------------------------------------
		//   Reference values of RMS-norms of solution error
		//---------------------------------------------------------------------
		xceref[0] = 0.3429276307955e+04;
		xceref[1] = 0.2336680861825e+03;
		xceref[2] = 0.8216363109621e+03;
		xceref[3] = 0.7143809828225e+03;
		xceref[4] = 0.7057470798773e+04;

		//---------------------------------------------------------------------
		//   Reference value of surface integral
		//---------------------------------------------------------------------
		xciref    = 0.1125826349653e+05;
	} else if (benchclass == 'D' || benchclass == 'd') {
		dtref = 1.0;
		itmaxref = 300;

		//---------------------------------------------------------------------
		//   Reference values of RMS-norms of residual
		//---------------------------------------------------------------------
		xcrref[0] = 0.3282253166388e+06;
		xcrref[1] = 0.3490781637713e+05;
		xcrref[2] = 0.8610311978292e+05;
		xcrref[3] = 0.7004896022603e+05;
		xcrref[4] = 0.4546838584391e+06;

		//---------------------------------------------------------------------
		//   Reference values of RMS-norms of solution error
		//---------------------------------------------------------------------
		xceref[0] = 0.6620775619126e+04;
		xceref[1] = 0.5229798207352e+03;
		xceref[2] = 0.1620218261697e+04;
		xceref[3] = 0.1404783445006e+04;
		xceref[4] = 0.1222629805121e+05;

		//---------------------------------------------------------------------
		//   Reference value of surface integral
		//---------------------------------------------------------------------
		xciref    = 0.2059421629621e+05;
	} else if (benchclass == 'E' || benchclass == 'e') {
		dtref = 0.5;
		itmaxref = 300;

		//---------------------------------------------------------------------
		//   Reference values of RMS-norms of residual
		//---------------------------------------------------------------------
		xcrref[0] = 0.1539988626779e+07;
		xcrref[1] = 0.1742224758490e+06;
		xcrref[2] = 0.4153598861059e+06;
		xcrref[3] = 0.3468381400447e+06;
		xcrref[4] = 0.2054406022038e+07;

		//---------------------------------------------------------------------
		//   Reference values of RMS-norms of solution error
		//---------------------------------------------------------------------
		xceref[0] = 0.8021145134635e+04;
		xceref[1] = 0.6932079823791e+03;
		xceref[2] = 0.1998959591111e+04;
		xceref[3] = 0.1725962639357e+04;
		xceref[4] = 0.1389447024442e+05;
 
		//---------------------------------------------------------------------
		//   Reference value of surface integral
		//---------------------------------------------------------------------
		xciref    = 0.2334131124791e+05;
	} else if (benchclass == 'F' || benchclass == 'f') {
		dtref = 0.2;
		itmaxref = 300;

		//---------------------------------------------------------------------
		//   Reference values of RMS-norms of residual
		//---------------------------------------------------------------------
		xcrref[0] = 0.7116424271317e+07;
		xcrref[1] = 0.8159357680842e+06;
		xcrref[2] = 0.1930561069782e+07;
		xcrref[3] = 0.1633447037519e+07;
		xcrref[4] = 0.9417323380798e+07;

		//---------------------------------------------------------------------
		//   Reference values of RMS-norms of solution error
		//---------------------------------------------------------------------
		xceref[0] = 0.8648720989200e+04;
		xceref[1] = 0.7774221260694e+03;
		xceref[2] = 0.2175462599498e+04;
		xceref[3] = 0.1875280641999e+04;
		xceref[4] = 0.1457903413233e+05;

		//---------------------------------------------------------------------
		//   Reference value of surface integral
		//---------------------------------------------------------------------
		xciref    = 0.2448986519022e+05;

		if (itmax == 30) {
			itmaxref = 30;
			xcrref[0] = 0.3814950058736e+08;
			xcrref[1] = 0.4280439009977e+07;
			xcrref[2] = 0.1016353864923e+08;
			xcrref[3] = 0.8627208852987e+07;
			xcrref[4] = 0.5024448179760e+08;

			xceref[0] = 0.8903253221139e+04;
			xceref[1] = 0.8129462858441e+03;
			xceref[2] = 0.2248648703838e+04;
			xceref[3] = 0.1937258920446e+04;
			xceref[4] = 0.1485251162647e+05;

			xciref    = 0.2792087395236e+05;
		}
	} else if (benchclass == 'K' || benchclass == 'k') {
		dtref = 5.0e-1;
		itmaxref = 50;

		//---------------------------------------------------------------------
		//   Reference values of RMS-norms of residual, for the (12X12X12) grid,
		//   after 50 time steps, with  DT = 5.0d-01
		//---------------------------------------------------------------------
		xcrref[0] = 1.6196343210976702e-02;
		xcrref[1] = 2.1976745164821318e-03;
		xcrref[2] = 1.5179927653399185e-03;
		xcrref[3] = 1.5029584435994323e-03;
		xcrref[4] = 3.4264073155896461e-02;

		//---------------------------------------------------------------------
		//   Reference values of RMS-norms of solution error, for the (12X12X12) grid,
		//   after 50 time steps, with  DT = 5.0d-01
		//---------------------------------------------------------------------
		xceref[0] = 6.4223319957960924e-04;
		xceref[1] = 8.4144342047347926e-05;
		xceref[2] = 5.8588269616485186e-05;
		xceref[3] = 5.8474222595157350e-05;
		xceref[4] = 1.3103347914111294e-03;

		//---------------------------------------------------------------------
		//   Reference value of surface integral, for the (12X12X12) grid,
		//   after 50 time steps, with DT = 5.0d-01
		//---------------------------------------------------------------------
		xciref = 7.8418928865937083e+00;
	} else if (benchclass == 'L' || benchclass == 'l') {
		dtref = 1.5e-3;
		itmaxref = 300;

		//---------------------------------------------------------------------
		//   Reference values of RMS-norms of residual, for the (33x33x33) grid,
		//   after 300 time steps, with  DT = 1.5d-3
		//---------------------------------------------------------------------
		xcrref[0] =   0.1236511638192e+02;
		xcrref[1] =   0.1317228477799e+01;
		xcrref[2] =   0.2550120713095e+01;
		xcrref[3] =   0.2326187750252e+01;
		xcrref[4] =   0.2826799444189e+02;

		//---------------------------------------------------------------------
		//   Reference values of RMS-norms of solution error, for the (33X33X33) grid,
		//---------------------------------------------------------------------
		xceref[0] =   0.4867877144216e+00;
		xceref[1] =   0.5064652880982e-01;
		xceref[2] =   0.9281818101960e-01;
		xceref[3] =   0.8570126542733e-01;
		xceref[4] =   0.1084277417792e+01;

		//---------------------------------------------------------------------
		//   Reference value of surface integral, for the (33X33X33) grid,
		//   after 300 time steps, with  DT = 1.5d-3
		//---------------------------------------------------------------------
		xciref    =   0.1161399311023e+02;
	} else if (benchclass == 'M' || benchclass == 'm') {
		dtref = 2.0e+0;
		itmaxref = 250;
		//---------------------------------------------------------------------
		// Reference values of RMS-norms of residual, for the (64X64X64) grid,
		// after 250 time steps, with  DT = 2.0e+00
		//---------------------------------------------------------------------
		xcrref[0] = 7.7902107606689367e+02;
		xcrref[1] = 6.3402765259692870e+01;
		xcrref[2] = 1.9499249727292479e+02;
		xcrref[3] = 1.7845301160418537e+02;
		xcrref[4] = 1.8384760349464247e+03;

		//---------------------------------------------------------------------
		// Reference values of RMS-norms of solution error, 
		// for the (64X64X64) grid,
		// after 250 time steps, with  DT = 2.0e+00
		//---------------------------------------------------------------------
		xceref[0] = 2.9964085685471943e+01;
		xceref[1] = 2.8194576365003349e+00;
		xceref[2] = 7.3473412698774742e+00;
		xceref[3] = 6.7139225687777051e+00;
		xceref[4] = 7.0715315688392578e+01;

		//---------------------------------------------------------------------
		// Reference value of surface integral, for the (64X64X64) grid,
		// after 250 time steps, with DT = 2.0e+00
		//---------------------------------------------------------------------
		xciref = 2.6030925604886277e+01;
	} else if (benchclass == 'N' || benchclass == 'n') {
		dtref = 2.0e+0;
		itmaxref = 250;
		//---------------------------------------------------------------------
		// Reference values of RMS-norms of residual, for the (102X102X102) grid,
		// after 250 time steps, with  DT = 2.0e+00
		//---------------------------------------------------------------------
		xcrref[0] = 3.5532672969982736e+03;
		xcrref[1] = 2.6214750795310692e+02;
		xcrref[2] = 8.8333721850952190e+02;
		xcrref[3] = 7.7812774739425265e+02;
		xcrref[4] = 7.3087969592545314e+03;

		//---------------------------------------------------------------------
		// Reference values of RMS-norms of solution error, for the (102X102X102) 
		// grid, after 250 time steps, with  DT = 2.0e+00
		//---------------------------------------------------------------------
		xceref[0] = 1.1401176380212709e+02;
		xceref[1] = 8.1098963655421574e+00;
		xceref[2] = 2.8480597317698308e+01;
		xceref[3] = 2.5905394567832939e+01;
		xceref[4] = 2.6054907504857413e+02;

		//---------------------------------------------------------------------
		// Reference value of surface integral, for the (102X102X102) grid,
		// after 250 time steps, with DT = 2.0e+00
		//---------------------------------------------------------------------
		xciref = 4.7887162703308227e+01;
	} else if (benchclass == 'O' || benchclass == 'o') {
		dtref = 2.0e+0;
		itmaxref = 250;
		//---------------------------------------------------------------------
		// Reference values of RMS-norms of residual, for the (162X162X162) grid,
		// after 250 time steps, with  DT = 2.0e+00
		//---------------------------------------------------------------------
		xcrref[0] = 1.03766980323537846e+04;
		xcrref[1] = 8.92212458801008552e+02;
		xcrref[2] = 2.56238814582660871e+03;
		xcrref[3] = 2.19194343857831427e+03;
		xcrref[4] = 1.78078057261061185e+04;

		//---------------------------------------------------------------------
		// Reference values of RMS-norms of solution error, for the (162X162X162) 
		// grid, after 250 time steps, with  DT = 2.0e+00
		//---------------------------------------------------------------------
		xceref[0] = 2.15986399716949279e+02;
		xceref[1] = 1.55789559239863600e+01;
		xceref[2] = 5.41318863077207766e+01;
		xceref[3] = 4.82262643154045421e+01;
		xceref[4] = 4.55902910043250358e+02;

		//---------------------------------------------------------------------
		// Reference value of surface integral, for the (162X162X162) grid,
		// after 250 time steps, with DT = 2.0e+00
		//---------------------------------------------------------------------
		xciref = 6.66404553572181300e+01;
	} else if (benchclass == 'P' || benchclass == 'p') {
		dtref = 1.0e+0;
		itmaxref = 300;
		//---------------------------------------------------------------------
		// Reference values of RMS-norms of residual, for the (408X408X408) grid,
		// after 300 time steps, with  DT = 1.0e+00
		//---------------------------------------------------------------------
		xcrref[0] = 0.4868417937025e+05;
		xcrref[1] = 0.4696371050071e+04;
		xcrref[2] = 0.1218114549776e+05;
		xcrref[3] = 0.1033801493461e+05;
		xcrref[4] = 0.7142398413817e+05;

		//---------------------------------------------------------------------
		// Reference values of RMS-norms of solution error, for the (408X408X408) 
		// grid, after 300 time steps, with  DT = 1.0e+00
		//---------------------------------------------------------------------
		xceref[0] = 0.3752393004482e+03;
		xceref[1] = 0.3084128893659e+02;
		xceref[2] = 0.9434276905469e+02;
		xceref[3] = 0.8230686681928e+02;
		xceref[4] = 0.7002620636210e+03;

		//---------------------------------------------------------------------
		// Reference value of surface integral, for the (408X408X408) grid,
		// after 300 time steps, with DT = 1.0e+00
		//---------------------------------------------------------------------
		xciref =    0.8334101392503e+02;
	} else if (benchclass == 'Q' || benchclass == 'q') {
		dtref = 0.5e+0;
		itmaxref = 300;
		//---------------------------------------------------------------------
		// Reference values of RMS-norms of residual, 
		// for the (1020X1020X1020) grid,
		// after 300 time steps, with  DT = 0.5e+00
		//---------------------------------------------------------------------
		xcrref[0] = 0.2099641687874e+06;
		xcrref[1] = 0.2130403143165e+05;
		xcrref[2] = 0.5319228789371e+05;
		xcrref[3] = 0.4509761639833e+05;
		xcrref[4] = 0.2932360006590e+06;

		//---------------------------------------------------------------------
		// Reference values of RMS-norms of solution error, 
		// for the (1020X1020X1020) 
		// grid, after 300 time steps, with  DT = 0.5e+00
		//---------------------------------------------------------------------
		xceref[0] = 0.4800572578333e+03;
		xceref[1] = 0.4221993400184e+02;
		xceref[2] = 0.1210851906824e+03;
		xceref[3] = 0.1047888986770e+03;
		xceref[4] = 0.8363028257389e+03;

		//---------------------------------------------------------------------
		// Reference value of surface integral, for the (1020X1020X1020) grid,
		// after 300 time steps, with DT = 0.5e+00
		//---------------------------------------------------------------------
		xciref =    0.9512163272273e+02;
	}
}

void LUMZ::ssor() {
	for (int zone = 0; zone < num_zones; zone++)
		ssor(u[zone], rsd[zone], frct[zone], rho_i[zone], qs[zone], nx[zone], ny[zone], nz[zone]);
}

void LUMZ::rhs() {
	for (int zone = 0; zone < num_zones; zone++)
		rhs(u[zone], rsd[zone], frct[zone], qs[zone], rho_i[zone], nx[zone], ny[zone], nz[zone]);
}

void LUMZ::l2norm() {
	double rsdnm_aux[5];

	for (int m = 0; m < 5; m++) rsdnm[m] = 0.0;
	for (int zone = 0; zone < num_zones; zone++) {
		l2norm(rsd[zone], rsdnm_aux, nx[zone], ny[zone], nz[zone]);
		for (int m = 0; m < 5; m++) rsdnm[m] += rsdnm_aux[m];
	}
}

void LUMZ::error() {
	double errnm_aux[5];

	for (int m = 0; m < 5; m++) errnm[m] = 0.0;
	for (int zone = 0; zone < num_zones; zone++) {
		error(u[zone], errnm_aux, nx[zone], ny[zone], nz[zone]);
		for (int m = 0; m < 5; m++) errnm[m] += errnm_aux[m];
	}
}

void LUMZ::pintgr() {
	double frc_aux;

	frc = 0.0;
	for (int zone = 0; zone < num_zones; zone++) {
		pintgr(u[zone], &frc_aux, nx[zone], ny[zone], nz[zone]);
		frc += frc_aux;
	}
}

void LUMZ::setbv() {
	for (int zone = 0; zone < num_zones; zone++)
		setbv(u[zone], nx[zone], ny[zone], nz[zone]);
}

void LUMZ::setiv() {
	for (int zone = 0; zone < num_zones; zone++)
		setiv(u[zone], nx[zone], ny[zone], nz[zone]);
}

void LUMZ::erhs() {
	for (int zone = 0; zone < num_zones; zone++)
		erhs(frct[zone], rsd[zone], nx[zone], ny[zone], nz[zone]);
}
