//-------------------------------------------------------------------------//
//                                                                         //
//        N  A  S     P A R A L L E L     B E N C H M A R K S  3.3         //
//                                                                         //
//            C U D A      M U L T I - Z O N E    V E R S I O N            //
//                                                                         //
//                              S P - M Z                                  //
//                                                                         //
//-------------------------------------------------------------------------//
//                                                                         //
//    This benchmark is a CUDA version of the NPB SP code.                 //
//    Refer to NAS Technical Reports 95-020 and 99-011 for details.        //
//                                                                         //
//    Permission to use, copy, distribute and modify this software         //
//    for any purpose with or without fee is hereby granted.  We           //
//    request, however, that all derived work reference the NAS            //
//    Parallel Benchmarks 3.3. This software is provided "as is"           //
//    without express or implied warranty.                                 //
//                                                                         //
//    Information on NPB 3.3, including the technical report, the          //
//    original specifications, source code, results and information        //
//    on how to submit new results, is available at:                       //
//                                                                         //
//           http://www.nas.nasa.gov/Software/NPB/                         //
//                                                                         //
//    Send comments or suggestions to  npb@nas.nasa.gov                    //
//                                                                         //
//          NAS Parallel Benchmarks Group                                  //
//          NASA Ames Research Center                                      //
//          Mail Stop: T27A-1                                              //
//          Moffett Field, CA   94035-1000                                 //
//                                                                         //
//          E-mail:  npb@nas.nasa.gov                                      //
//          Fax:     (650) 604-3957                                        //
//                                                                         //
//-------------------------------------------------------------------------//

//-------------------------------------------------------------------------//
//                                                                         //
// Authors of original FORTRAN version: R. Van der Wijngaart, W. Saphir,   //
//                                      H. Jin                             //
//                                                                         //
// CUDA implementation by: J. Duemmler                                     //
//                                                                         //
//-------------------------------------------------------------------------//

#include <stdio.h>
#include <math.h>
#include "main.h"

int main (int argc, char **argv) {
	char benchclass = argc > 1 ? argv[1][0] : 'S';
	SPMZ *spmz = new SPMZ();

	printf("\n\n NAS Parallel Benchmarks (NPB3.3-MZ-CUDA) - SP Multi-Zone CUDA Benchmark\n\n");
	spmz->env_setup();

	//---------------------------------------------------------------------
	//   set up domain sizes
	//---------------------------------------------------------------------
	spmz->zone_setup(benchclass);

	//---------------------------------------------------------------------
	//   read input data
	//---------------------------------------------------------------------
	spmz->read_input();

	//---------------------------------------------------------------------
	//   allocate CUDA device memory
	//---------------------------------------------------------------------
	spmz->allocate_device_memory();

	//---------------------------------------------------------------------
	//   set up coefficients
	//---------------------------------------------------------------------
	spmz->set_constants();

	spmz->exact_rhs();
	spmz->initialize();

	//---------------------------------------------------------------------
	//      do one time step to touch all code, and reinitialize
	//---------------------------------------------------------------------
	spmz->exch_qbc();
	spmz->adi();
	spmz->initialize();

	//---------------------------------------------------------------------
	//      start the benchmark time step loop
	//---------------------------------------------------------------------
	spmz->time_steps();

	//---------------------------------------------------------------------
	//      perform verification and print results
	//---------------------------------------------------------------------
	bool verified = spmz->verify(benchclass);
	spmz->print_results(verified, benchclass);

	//---------------------------------------------------------------------
	//      More timers
	//---------------------------------------------------------------------
	spmz->print_timers();

	delete spmz;

	return EXIT_SUCCESS;
}

SPMZ::SPMZ() {
	timers = new Timers();
	get_cuda_info();
}

SPMZ::~SPMZ() {
	delete [] nx;
	delete [] ny;
	delete [] nz;
	delete [] iz_west;
	delete [] iz_east;
	delete [] iz_south;
	delete [] iz_north;
	free_device_memory();
}


void SPMZ::read_input() {
	//---------------------------------------------------------------------
	//      Reads input file (if it exists) else takes
	//      defaults from parameters
	//---------------------------------------------------------------------
	FILE *file = fopen("inputsp-mz.data", "rt");
	if (file != 0L) {
		char line[1024];
		int itimer, niter_in;
		double dt_in;
		printf("Reading from input file inputsp-mz.data\n");

		fgets(line, sizeof(line)-1, file);
		sscanf(line, "%i", &niter_in);
		fgets(line, sizeof(line)-1, file);
		sscanf(line, "%lf", &dt_in);
		fgets(line, sizeof(line)-1, file);
		sscanf(line, "%i", &itimer);
		fclose(file);

		if (niter_in > 0) niter = niter_in;
		if (dt_in != 0.0) dt = dt_in;
		if (itimer > 0) {
			timeron = true;
			Timers::init_timer();
		}
	} 

	printf(" Number of zones: %3d x %3d\n", x_zones, y_zones);
	printf(" Iterations: %3d    dt: %10.6F\n\n", niter, dt);
}

void SPMZ::env_setup() {
	char *envstr = getenv("NPB_VERBOSE");
	if (envstr != NULL) {
		int iverbose;
		sscanf(envstr, "%i", &iverbose);
		npb_verbose = max(iverbose,0);
	} else npb_verbose = false;
}

//---------------------------------------------------------------------
//  verification routine                         
//---------------------------------------------------------------------
bool SPMZ::verify(const char benchclass) {
	bool verified = true;

	//---------------------------------------------------------------------
	//   tolerance level
	//---------------------------------------------------------------------
	double epsilon = 1.0e-08;

	//---------------------------------------------------------------------
	//   compute the error norm and the residual norm
	//---------------------------------------------------------------------
	error_norm();
	rhs_norm();

	int niterref;
	double xcrref[5], xceref[5], dtref;
	get_ref_values(benchclass, niterref, dtref, xcrref, xceref);

	//---------------------------------------------------------------------
	//    Compute the difference of solution values and the known reference values.
	//---------------------------------------------------------------------
	double xcrdif[5], xcedif[5];
	for (int m = 0; m < 5; m++) {
		xcrdif[m] = fabs((xcr[m]-xcrref[m])/xcrref[m]);
		xcedif[m] = fabs((xce[m]-xceref[m])/xceref[m]);
	}

	//---------------------------------------------------------------------
	//    Output the comparison of computed results to known cases.
	//---------------------------------------------------------------------
	printf("\n Verification being performed for class %c\n", benchclass);
	printf(" Accuracy setting for epsilon = %20.13E\n", epsilon);
	if (fabs(dt-dtref) > epsilon) {
		verified = false;
		printf(" DT does not match the reference value of %15.8E\n", dtref);
	} else if (niter != niterref) {
		verified = false;
		printf(" NITER does not match the reference value of %5d\n", niterref);
	}

	printf(" Comparison of RMS-norms of residual\n");
	for (int m = 0; m < 5; m++) {
		if (xcrdif[m] <= epsilon) printf("          %2d  %20.13E%20.13E%20.13E\n", m+1, xcr[m], xcrref[m], xcrdif[m]);
		else {
			verified = false;
			printf(" FAILURE: %2d  %20.13E%20.13E%20.13E\n", m+1, xcr[m], xcrref[m], xcrdif[m]);
		}
	}

	printf(" Comparison of RMS-norms of solution error\n");
	for (int m = 0; m < 5; m++) {
		if (xcedif[m] <= epsilon) printf("          %2d  %20.13E%20.13E%20.13E\n", m+1, xce[m], xceref[m], xcedif[m]);
		else {
			verified = false;
			printf(" FAILURE: %2d  %20.13E%20.13E%20.13E\n", m+1, xce[m], xceref[m], xcedif[m]);
		}
	}

	if (verified) printf(" Verification Successful\n");
	else printf(" Verification failed\n");

	return verified;
}

void SPMZ::print_results(const bool verified, const char benchclass) {

	printf("\n\n SP-MZ Benchmark Completed.\n");
	printf(" Class           =             %12c\n", benchclass);
	printf(" Size            =           %4dx%4dx%4d\n", gx_size, gy_size, gz_size);
	printf(" Iterations      =             %12d\n", niter);
	printf(" Time in seconds =             %12.2f\n", maxtime);

	double mflops = compute_mflops();
	printf(" Mop/s total     =             %12.2f\n", mflops);
	printf(" Operation type  =           floating point\n");
	if (verified) printf(" Verification    =               SUCCESSFUL\n");
	else printf(" Verification    =             UNSUCCESSFUL\n");

	printf(" Version         =             %12s\n", NPB_VERSION);

	printf("\n");
	printf(" CUDA device     = %24s\n", CUDAname);
	printf(" GPU multiprocs  =             %12d\n", CUDAmp);
	printf(" GPU clock rate  =             %8.3f GHz\n", (double)CUDAclock/1000000.);
	printf(" GPU memory      =             %9.2f MB\n", (double)CUDAmem/(1024.*1024.));
	printf(" GPU mem clock   =             %8.3f GHz\n", (double)CUDAmemclock/1000000.);
	printf(" GPU L2 cache    =             %9.2f KB\n", (double)CUDAl2cache/1024.);

	printf("\n\n");
}

double SPMZ::compute_mflops() const {
	double mflops = 0.0;

	if (maxtime > 0.0) {
		for (int zone = 0; zone < num_zones; zone++) {
			double n3 = (double)(nx[zone]*ny[zone]*nz[zone]);
			double navg = (nx[zone]+ny[zone]+nz[zone])/3.0;
			double nsur = (nx[zone]*ny[zone]+nx[zone]*nz[zone]+ny[zone]*nz[zone])/3.0;
			mflops += (double)niter*1.0e-6*(881.174*n3-4683.91*nsur+11484.5*navg-19272.4)/maxtime;
		}
	}

	return mflops;
}

void SPMZ::zone_setup(char benchclass) {
	switch (benchclass) {
		case 's':
		case 'S': gx_size = gy_size = 24; gz_size = 6; x_zones = y_zones = 2; dt = 0.015; niter = 100; break;
		case 'w':
		case 'W': gx_size = gy_size = 64; gz_size = 8; x_zones = y_zones = 4; dt = 0.0015; niter = 400; break;
		case 'a':
		case 'A': gx_size = gy_size = 128; gz_size = 16; x_zones = y_zones = 4; dt = 0.0015; niter = 400; break;
		case 'b':
		case 'B': gx_size = 304; gy_size = 208; gz_size = 17; x_zones = y_zones = 8; dt = 0.001; niter = 400; break;
		case 'c':
		case 'C': gx_size = 480; gy_size = 320; gz_size = 28; x_zones = y_zones = 16; dt = 0.00067; niter = 400; break;
		case 'd':
		case 'D': gx_size = 1632; gy_size = 1216; gz_size = 34; x_zones = y_zones = 32; dt = 0.0003; niter = 500; break;
		case 'e':
		case 'E': gx_size = 4224; gy_size = 3456; gz_size = 92; x_zones = y_zones = 64; dt = 0.0002; niter = 500; break;
		case 'f':
		case 'F': gx_size = 12032; gy_size = 8960; gz_size = 250; x_zones = y_zones = 128; dt = 0.0001; niter = 500; break;
		// class K corresponds to SP benchmark class S
		case 'k':
		case 'K': gx_size = gy_size = gz_size = 12; x_zones = y_zones = 1; dt = 0.015; niter = 100; break;
		// class L corresponds to SP benchmark class W
		case 'l':
		case 'L': gx_size = gy_size = gz_size = 36; x_zones = y_zones = 1; dt = 0.0015; niter = 400; break;
		// class M corresponds to SP benchmark class A
		case 'm':
		case 'M': gx_size = gy_size = gz_size = 64; x_zones = y_zones = 1; dt = 0.0015; niter = 400; break;
		// class N corresponds to SP benchmark class B
		case 'n':
		case 'N': gx_size = gy_size = gz_size = 102; x_zones = y_zones = 1; dt = 0.001; niter = 400; break;
		// class O corresponds to SP benchmark class C
		case 'o':
		case 'O': gx_size = gy_size = gz_size = 162; x_zones = y_zones = 1; dt = 0.00067; niter = 400; break;
		// class P corresponds to SP benchmark class D
		case 'p':
		case 'P': gx_size = gy_size = gz_size = 408; x_zones = y_zones = 1; dt = 0.0003; niter = 500; break;
		// class Q corresponds to SP benchmark class E
		case 'q':
		case 'Q': gx_size = gy_size = gz_size = 1020; x_zones = y_zones = 1; dt = 0.0001; niter = 500; break;
		default: printf("setparams: Internal error: invalid class %c\n", benchclass); exit(EXIT_FAILURE);
	}

	num_zones = x_zones * y_zones;

	nx = new int [num_zones];
	ny = new int [num_zones];
	nz = new int [num_zones];
	iz_west = new int [num_zones];
	iz_east = new int [num_zones];
	iz_south = new int [num_zones];
	iz_north = new int [num_zones];

	if (npb_verbose > 1) printf("\n Zone sizes:\n");
	for (int y = 0; y < y_zones; y++) {
		for (int x = 0; x < x_zones; x++) {
			int zone_no = x+y*x_zones;
			nx[zone_no] = (x+1)*gx_size/x_zones - x*gx_size/x_zones;
			ny[zone_no] = (y+1)*gy_size/y_zones - y*gy_size/y_zones;
			nz[zone_no] = gz_size;

			int id_west = (x-1+x_zones) % x_zones;
			int id_east = (x+1) % x_zones;
			int jd_south = (y-1+y_zones) % y_zones;
			int jd_north = (y+1) % y_zones;
			iz_west[zone_no] = id_west + y*x_zones;
			iz_east[zone_no] = id_east + y*x_zones;
			iz_south[zone_no] = x + jd_south*x_zones;
			iz_north[zone_no] = x + jd_north*x_zones;

			if (npb_verbose > 1) printf("%5i:  %5i  x%5i  x%5i\n", zone_no+1, nx[zone_no], ny[zone_no], nz[zone_no]);
		}
	}
}

void SPMZ::get_ref_values(const char benchclass, int &niterref, double &dtref, double (&xcrref)[5], double (&xceref)[5]) const {
	for (int m = 0; m < 5; m++) xcrref[m] = xceref[m] = 1.0;
	niterref = 0;
	dtref = 0.0;

	if (benchclass == 'S' || benchclass == 's') {
		//---------------------------------------------------------------------
		//    reference data for class S
		//---------------------------------------------------------------------
		dtref = 1.5e-2;
		niterref = 100;

		//---------------------------------------------------------------------
		//   Reference values of RMS-norms of residual
		//---------------------------------------------------------------------
		xcrref[0] = 0.7698876173566e+01;
		xcrref[1] = 0.1517766790280e+01;
		xcrref[2] = 0.2686805141546e+01;
		xcrref[3] = 0.1893688083690e+01;
		xcrref[4] = 0.1369739859738e+02;

		//---------------------------------------------------------------------
		//   Reference values of RMS-norms of solution error
		//---------------------------------------------------------------------
		xceref[0] = 0.9566808043467e+01;
		xceref[1] = 0.3894109553741e+01;
		xceref[2] = 0.4516022447464e+01;
		xceref[3] = 0.4099103995615e+01;
		xceref[4] = 0.7776038881521e+01;
	} else if (benchclass == 'W' || benchclass == 'w') {
		//---------------------------------------------------------------------
		//    reference data for class W
		//---------------------------------------------------------------------
		dtref = 1.5e-3;
		niterref = 400;
		//---------------------------------------------------------------------
		//    Reference values of RMS-norms of residual.
		//---------------------------------------------------------------------
		xcrref[0] = 0.1887636218359e+03;
		xcrref[1] = 0.1489637963542e+02;
		xcrref[2] = 0.4851711701400e+02;
		xcrref[3] = 0.3384633608154e+02;
		xcrref[4] = 0.4036632495857e+03;
		//---------------------------------------------------------------------
		//    Reference values of RMS-norms of solution error.
		//---------------------------------------------------------------------
		xceref[0] = 0.2975895149929e+02;
		xceref[1] = 0.1341508175806e+02;
		xceref[2] = 0.1585310846491e+02;
		xceref[3] = 0.1450916426713e+02;
		xceref[4] = 0.5854137431023e+02;
	} else if (benchclass == 'A' || benchclass == 'a') {
		//---------------------------------------------------------------------
		//    reference data for class A
		//---------------------------------------------------------------------
		dtref = 1.5e-3;
		niterref = 400;
		//---------------------------------------------------------------------
		//    Reference values of RMS-norms of residual.
		//---------------------------------------------------------------------
		xcrref[0] = 0.2800097900548e+03;
		xcrref[1] = 0.2268349014438e+02;
		xcrref[2] = 0.7000852739901e+02;
		xcrref[3] = 0.5000771004061e+02;
		xcrref[4] = 0.5552068537578e+03;
		//---------------------------------------------------------------------
		//    Reference values of RMS-norms of solution error.
		//---------------------------------------------------------------------
		xceref[0] = 0.3112046666578e+02;
		xceref[1] = 0.1172197785348e+02;
		xceref[2] = 0.1486616708032e+02;
		xceref[3] = 0.1313680576292e+02;
		xceref[4] = 0.7365834058154e+02;
	} else if (benchclass == 'B' || benchclass == 'b') {
		//---------------------------------------------------------------------
		//    reference data for class B
		//---------------------------------------------------------------------
		dtref = 1.0e-3;
		niterref = 400;
		//---------------------------------------------------------------------
		//    Reference values of RMS-norms of residual.
		//---------------------------------------------------------------------
		xcrref[0] = 0.5190422977921e+04;
		xcrref[1] = 0.3655458539065e+03;
		xcrref[2] = 0.1261126592633e+04;
		xcrref[3] = 0.1002038338842e+04;
		xcrref[4] = 0.1075902511165e+05;
		//---------------------------------------------------------------------
		//    Reference values of RMS-norms of solution error.
		//---------------------------------------------------------------------
		xceref[0] = 0.5469182054223e+03;
		xceref[1] = 0.4983658028989e+02;
		xceref[2] = 0.1418301776602e+03;
		xceref[3] = 0.1097717156175e+03;
		xceref[4] = 0.1260195162174e+04;
	} else if (benchclass == 'C' || benchclass == 'c') {
		//---------------------------------------------------------------------
		//    reference data class C
		//---------------------------------------------------------------------
		dtref = 0.67e-3;
		niterref = 400;
		//---------------------------------------------------------------------
		//    Reference values of RMS-norms of residual.
		//---------------------------------------------------------------------
		xcrref[0] = 0.5886814493676e+05;
		xcrref[1] = 0.3967324375474e+04;
		xcrref[2] = 0.1444126529019e+05;
		xcrref[3] = 0.1210582211196e+05;
		xcrref[4] = 0.1278941567976e+06;
		//---------------------------------------------------------------------
		//    Reference values of RMS-norms of solution error.
		//---------------------------------------------------------------------
		xceref[0] = 0.6414069213021e+04;
		xceref[1] = 0.4069468353404e+03;
		xceref[2] = 0.1585311908719e+04;
		xceref[3] = 0.1270243185759e+04;
		xceref[4] = 0.1441398372869e+05;
	} else if (benchclass == 'D' || benchclass == 'd') {
		//---------------------------------------------------------------------
		//    reference data class D
		//---------------------------------------------------------------------
		dtref = 0.3e-3;
		niterref = 500;
		//---------------------------------------------------------------------
		//    Reference values of RMS-norms of residual.
		//---------------------------------------------------------------------
		xcrref[0] = 0.7650595424723e+06;
		xcrref[1] = 0.5111519817683e+05;
		xcrref[2] = 0.1857213937602e+06;
		xcrref[3] = 0.1624096784059e+06;
		xcrref[4] = 0.1642416844328e+07;
		//---------------------------------------------------------------------
		//    Reference values of RMS-norms of solution error.
		//---------------------------------------------------------------------
		xceref[0] = 0.8169589578340e+05;
		xceref[1] = 0.5252150843148e+04;
		xceref[2] = 0.1984739188642e+05;
		xceref[3] = 0.1662852404547e+05;
		xceref[4] = 0.1761381855235e+06;
	} else if (benchclass == 'E' || benchclass == 'e') {
		//---------------------------------------------------------------------
		//    reference data class E
		//---------------------------------------------------------------------
		dtref = 0.2e-3;
		niterref = 500;
		//---------------------------------------------------------------------
		//    Reference values of RMS-norms of residual.
		//---------------------------------------------------------------------
		xcrref[0] = 0.5058298119039e+07;
		xcrref[1] = 0.3576837494299e+06;
		xcrref[2] = 0.1230856227329e+07;
		xcrref[3] = 0.1093895671677e+07;
		xcrref[4] = 0.1073671658903e+08;
		//---------------------------------------------------------------------
		//    Reference values of RMS-norms of solution error.
		//---------------------------------------------------------------------
		xceref[0] = 0.5288293042051e+06;
		xceref[1] = 0.3471875724140e+05;
		xceref[2] = 0.1282998930808e+06;
		xceref[3] = 0.1095483394612e+06;
		xceref[4] = 0.1129716454231e+07;
	} else if (benchclass == 'F' || benchclass == 'f') {
		//---------------------------------------------------------------------
		//    reference data class F
		//---------------------------------------------------------------------
		dtref = 0.1e-3;
		niterref = 500;
		//---------------------------------------------------------------------
		//    Reference values of RMS-norms of residual.
		//---------------------------------------------------------------------
		xcrref[0] = 0.3974469160412e+08;
		xcrref[1] = 0.3260760921834e+07;
		xcrref[2] = 0.9756215393494e+07;
		xcrref[3] = 0.8278472138497e+07;
		xcrref[4] = 0.7547269314441e+08;
		//---------------------------------------------------------------------
		//    Reference values of RMS-norms of solution error.
		//---------------------------------------------------------------------
		xceref[0] = 0.3475757666334e+07;
		xceref[1] = 0.2386799228183e+06;
		xceref[2] = 0.8436705443034e+06;
		xceref[3] = 0.7339112115118e+06;
		xceref[4] = 0.7327832757877e+07;
		
		if (niter == 50) {
			niterref = 50;
			xcrref[0] = 0.3198801286787e+09;
			xcrref[1] = 0.3435698123358e+08;
			xcrref[2] = 0.8489831174901e+08;
			xcrref[3] = 0.6940707552477e+08;
			xcrref[4] = 0.4478684103255e+09;

			xceref[0] = 0.6761099692230e+07;
			xceref[1] = 0.5361561494769e+06;
			xceref[2] = 0.1662878706114e+07;
			xceref[3] = 0.1443852092060e+07;
			xceref[4] = 0.1260678700480e+08;
		}
	} else if (benchclass == 'K' || benchclass == 'k') {
		//---------------------------------------------------------------------
		//    reference data for 12X12X12 grids after 100 time steps, with DT = 1.50d-02
		//---------------------------------------------------------------------
		dtref = 1.5e-2;
		niterref = 100;

		//---------------------------------------------------------------------
		//    Reference values of RMS-norms of residual.
		//---------------------------------------------------------------------
		xcrref[0] = 2.7470315451339479e-02;
		xcrref[1] = 1.0360746705285417e-02;
		xcrref[2] = 1.6235745065095532e-02;
		xcrref[3] = 1.5840557224455615e-02;
		xcrref[4] = 3.4849040609362460e-02;

		//---------------------------------------------------------------------
		//    Reference values of RMS-norms of solution error.
		//---------------------------------------------------------------------
		xceref[0] = 2.7289258557377227e-05;
		xceref[1] = 1.0364446640837285e-05;
		xceref[2] = 1.6154798287166471e-05;
		xceref[3] = 1.5750704994480102e-05;
		xceref[4] = 3.4177666183390531e-05;
	} else if (benchclass == 'L' || benchclass == 'l') {
		//---------------------------------------------------------------------
		//    reference data for 36X36X36 grids after 400 time steps, with DT = 1.5d-03
		//---------------------------------------------------------------------
		dtref = 1.5e-3;
		niterref = 400;

		//---------------------------------------------------------------------
		//    Reference values of RMS-norms of residual.
		//---------------------------------------------------------------------
		xcrref[0] = 0.1893253733584e-02;
		xcrref[1] = 0.1717075447775e-03;
		xcrref[2] = 0.2778153350936e-03;
		xcrref[3] = 0.2887475409984e-03;
		xcrref[4] = 0.3143611161242e-02;

		//---------------------------------------------------------------------
		//    Reference values of RMS-norms of solution error.
		//---------------------------------------------------------------------
		xceref[0] = 0.7542088599534e-04;
		xceref[1] = 0.6512852253086e-05;
		xceref[2] = 0.1049092285688e-04;
		xceref[3] = 0.1128838671535e-04;
		xceref[4] = 0.1212845639773e-03;
	} else if (benchclass == 'M' || benchclass == 'm') {
		//---------------------------------------------------------------------
		//    reference data for 64X64X64 grids after 400 time steps, with DT = 1.5d-03
		//---------------------------------------------------------------------
		dtref = 1.5e-3;
		niterref = 400;

		//---------------------------------------------------------------------
		//    Reference values of RMS-norms of residual.
		//---------------------------------------------------------------------
		xcrref[0] = 2.4799822399300195e0;
		xcrref[1] = 1.1276337964368832e0;
		xcrref[2] = 1.5028977888770491e0;
		xcrref[3] = 1.4217816211695179e0;
		xcrref[4] = 2.1292113035138280e0;

		//---------------------------------------------------------------------
		//    Reference values of RMS-norms of solution error.
		//---------------------------------------------------------------------
		xceref[0] = 1.0900140297820550e-04;
		xceref[1] = 3.7343951769282091e-05;
		xceref[2] = 5.0092785406541633e-05;
		xceref[3] = 4.7671093939528255e-05;
		xceref[4] = 1.3621613399213001e-04;
	} else if (benchclass == 'N' || benchclass == 'n') {
		//---------------------------------------------------------------------
		//    reference data for 102X102X102 grids after 400 time steps,
		//    with DT = 1.0d-03
		//---------------------------------------------------------------------
		dtref = 1.0e-3;
		niterref = 400;

		//---------------------------------------------------------------------
		//    Reference values of RMS-norms of residual.
		//---------------------------------------------------------------------
		xcrref[0] = 0.6903293579998e+02;
		xcrref[1] = 0.3095134488084e+02;
		xcrref[2] = 0.4103336647017e+02;
		xcrref[3] = 0.3864769009604e+02;
		xcrref[4] = 0.5643482272596e+02;

		//---------------------------------------------------------------------
		//    Reference values of RMS-norms of solution error.
		//---------------------------------------------------------------------
		xceref[0] = 0.9810006190188e-02;
		xceref[1] = 0.1022827905670e-02;
		xceref[2] = 0.1720597911692e-02;
		xceref[3] = 0.1694479428231e-02;
		xceref[4] = 0.1847456263981e-01;
	} else if (benchclass == 'O' || benchclass == 'o') {
		//---------------------------------------------------------------------
		//    reference data for 162X162X162 grids after 400 time steps,
		//    with DT = 0.67d-03
		//---------------------------------------------------------------------
		dtref = 0.67e-3;
		niterref = 400;

		//---------------------------------------------------------------------
		//    Reference values of RMS-norms of residual.
		//---------------------------------------------------------------------
		xcrref[0] = 0.5881691581829e+03;
		xcrref[1] = 0.2454417603569e+03;
		xcrref[2] = 0.3293829191851e+03;
		xcrref[3] = 0.3081924971891e+03;
		xcrref[4] = 0.4597223799176e+03;

		//---------------------------------------------------------------------
		//    Reference values of RMS-norms of solution error.
		//---------------------------------------------------------------------
		xceref[0] = 0.2598120500183e+00;
		xceref[1] = 0.2590888922315e-01;
		xceref[2] = 0.5132886416320e-01;
		xceref[3] = 0.4806073419454e-01;
		xceref[4] = 0.5483377491301e+00;
	} else if (benchclass == 'P' || benchclass == 'p') {
		//---------------------------------------------------------------------
		//    reference data for 408X408X408 grids after 500 time steps,
		//    with DT = 0.3d-03
		//---------------------------------------------------------------------
		dtref = 0.30e-3;
		niterref = 500;

		//---------------------------------------------------------------------
		//    Reference values of RMS-norms of residual.
		//---------------------------------------------------------------------
		xcrref[0] = 0.1044696216887e+05;
		xcrref[1] = 0.3204427762578e+04;
		xcrref[2] = 0.4648680733032e+04;
		xcrref[3] = 0.4238923283697e+04;
		xcrref[4] = 0.7588412036136e+04;

		//---------------------------------------------------------------------
		//    Reference values of RMS-norms of solution error.
		//---------------------------------------------------------------------
		xceref[0] = 0.5089471423669e+01;
		xceref[1] = 0.5323514855894e+00;
		xceref[2] = 0.1187051008971e+01;
		xceref[3] = 0.1083734951938e+01;
		xceref[4] = 0.1164108338568e+02;
	} else if (benchclass == 'Q' || benchclass == 'q') {
		//---------------------------------------------------------------------
		//    reference data for 1020X1020X1020 grids after 500 time steps,
		//    with DT = 0.1d-03
		//---------------------------------------------------------------------
		dtref = 0.10e-3;
		niterref = 500;

		//---------------------------------------------------------------------
		//    Reference values of RMS-norms of residual.
		//---------------------------------------------------------------------
		xcrref[0] = 0.6255387422609e+05;
		xcrref[1] = 0.1495317020012e+05;
		xcrref[2] = 0.2347595750586e+05;
		xcrref[3] = 0.2091099783534e+05;
		xcrref[4] = 0.4770412841218e+05;

		//---------------------------------------------------------------------
		//    Reference values of RMS-norms of solution error.
		//---------------------------------------------------------------------
		xceref[0] = 0.6742735164909e+02;
		xceref[1] = 0.5390656036938e+01;
		xceref[2] = 0.1680647196477e+02;
		xceref[3] = 0.1536963126457e+02;
		xceref[4] = 0.1575330146156e+03;
	}
}

void SPMZ::exact_rhs () {
	for (int zone = 0; zone < num_zones; zone++)
		exact_rhs(forcing[zone], nx[zone], ny[zone], nz[zone]);
}

void SPMZ::initialize() {
	for (int zone = 0; zone < num_zones; zone++)
		initialize(u[zone], nx[zone], ny[zone], nz[zone]);
}

void SPMZ::error_norm() {
	double xce_sub[5];
	for (int m = 0; m < 5; m++) xce[m] = 0.0;
	for (int zone = 0; zone < num_zones; zone++) {
		error_norm(xce_sub, u[zone], nx[zone], ny[zone], nz[zone]);
		for (int m = 0; m < 5; m++) xce[m] += xce_sub[m];
	}
}

void SPMZ::rhs_norm() {
	double xcr_sub[5];
	for (int m = 0; m < 5; m++) xcr[m] = 0.0;
	for (int zone = 0; zone < num_zones; zone++) {
		compute_rhs(rho_i, us, vs, ws, speed, qs, square, rhs, forcing[zone], u[zone], nx[zone], ny[zone], nz[zone]);
		rhs_norm(xcr_sub, rhs, nx[zone], ny[zone], nz[zone]);
		for (int m = 0; m < 5; m++) xcr[m] += xcr_sub[m]/dt;
	}
}

void SPMZ::adi() {
	for (int zone = 0; zone < num_zones; zone++)
		adi(rho_i, us, vs, ws, speed, qs, square, rhs, forcing[zone], u[zone], nx[zone], ny[zone], nz[zone]);
}
