//-------------------------------------------------------------------------//
//                                                                         //
//        N  A  S     P A R A L L E L     B E N C H M A R K S  3.3         //
//                                                                         //
//            C U D A      M U L T I - Z O N E    V E R S I O N            //
//                                                                         //
//                              B T - M Z                                  //
//                                                                         //
//-------------------------------------------------------------------------//
//                                                                         //
//    This benchmark is a CUDA version of the NPB BT code.                 //
//    Refer to NAS Technical Reports 95-020 and 99-011 for details.        //
//                                                                         //
//    Permission to use, copy, distribute and modify this software         //
//    for any purpose with or without fee is hereby granted.  We           //
//    request, however, that all derived work reference the NAS            //
//    Parallel Benchmarks 3.3. This software is provided "as is"           //
//    without express or implied warranty.                                 //
//                                                                         //
//    Information on NPB 3.3, including the technical report, the          //
//    original specifications, source code, results and information        //
//    on how to submit new results, is available at:                       //
//                                                                         //
//           http://www.nas.nasa.gov/Software/NPB/                         //
//                                                                         //
//    Send comments or suggestions to  npb@nas.nasa.gov                    //
//                                                                         //
//          NAS Parallel Benchmarks Group                                  //
//          NASA Ames Research Center                                      //
//          Mail Stop: T27A-1                                              //
//          Moffett Field, CA   94035-1000                                 //
//                                                                         //
//          E-mail:  npb@nas.nasa.gov                                      //
//          Fax:     (650) 604-3957                                        //
//                                                                         //
//-------------------------------------------------------------------------//

//-------------------------------------------------------------------------//
//                                                                         //
// Authors of original FORTRAN version: R. Van der Wijngaart, T. Harris,   //
//                                      M. Yarrow, H. Jin                  //
//                                                                         //
// CUDA implementation by: J. Duemmler                                     //
//                                                                         //
//-------------------------------------------------------------------------//

#include <stdio.h>
#include <math.h>
#include "main.h"

int main (int argc, char **argv) {
	char benchclass = argc > 1 ? argv[1][0] : 'S';
	BTMZ *btmz = new BTMZ();

	printf("\n\n NAS Parallel Benchmarks (NPB3.3-MZ-CUDA) - BT Multi-Zone CUDA Benchmark\n\n");
	btmz->env_setup();

	//---------------------------------------------------------------------
	//   set up domain sizes
	//---------------------------------------------------------------------
	btmz->zone_setup(benchclass);

	//---------------------------------------------------------------------
	//   read input data
	//---------------------------------------------------------------------
	btmz->read_input();

	//---------------------------------------------------------------------
	//   allocate CUDA device memory
	//---------------------------------------------------------------------
	btmz->allocate_device_memory();

	//---------------------------------------------------------------------
	//   set up coefficients
	//---------------------------------------------------------------------
	btmz->set_constants();

	btmz->initialize();
	btmz->exact_rhs();

	//---------------------------------------------------------------------
	//      do one time step to touch all code, and reinitialize
	//---------------------------------------------------------------------
	btmz->exch_qbc();
	btmz->adi();
	btmz->initialize();

	//---------------------------------------------------------------------
	//      start the benchmark time step loop
	//---------------------------------------------------------------------
	btmz->time_steps();

	//---------------------------------------------------------------------
	//      perform verification and print results
	//---------------------------------------------------------------------
	bool verified = btmz->verify(benchclass);
	btmz->print_results(verified, benchclass);

	//---------------------------------------------------------------------
	//      More timers
	//---------------------------------------------------------------------
	btmz->print_timers();

	delete btmz;

	return EXIT_SUCCESS;
}

BTMZ::BTMZ() {
	timers = new Timers();
	get_cuda_info();
}

BTMZ::~BTMZ() {
	delete [] nx;
	delete [] ny;
	delete [] nz;
	delete [] iz_west;
	delete [] iz_east;
	delete [] iz_south;
	delete [] iz_north;
	free_device_memory();
}


void BTMZ::read_input() {
	//---------------------------------------------------------------------
	//      Reads input file (if it exists) else takes
	//      defaults from parameters
	//---------------------------------------------------------------------
	FILE *file = fopen("inputbt-mz.data", "rt");
	if (file != 0L) {
		char line[1024];
		int itimer, niter_in;
		double dt_in;
		printf("Reading from input file inputbt-mz.data\n");

		fgets(line, sizeof(line)-1, file);
		sscanf(line, "%i", &niter_in);
		fgets(line, sizeof(line)-1, file);
		sscanf(line, "%lf", &dt_in);
		fgets(line, sizeof(line)-1, file);
		sscanf(line, "%i", &itimer);
		fclose(file);

		if (niter_in > 0) niter = niter_in;
		if (dt_in != 0.0) dt = dt_in;
		if (itimer > 0) {
			timeron = true;
			Timers::init_timer();
		}
	} 

	printf(" Number of zones: %3d x %3d\n", x_zones, y_zones);
	printf(" Iterations: %3d    dt: %10.6F\n\n", niter, dt);
}

void BTMZ::env_setup() {
	char *envstr = getenv("NPB_VERBOSE");
	if (envstr != NULL) {
		int iverbose;
		sscanf(envstr, "%i", &iverbose);
		npb_verbose = max(iverbose,0);
	} else npb_verbose = false;
}

//---------------------------------------------------------------------
//  verification routine                         
//---------------------------------------------------------------------
bool BTMZ::verify(const char benchclass) {
	bool verified = true;

	//---------------------------------------------------------------------
	//   tolerance level
	//---------------------------------------------------------------------
	double epsilon = 1.0e-08;

	//---------------------------------------------------------------------
	//   compute the error norm and the residual norm
	//---------------------------------------------------------------------
	error_norm();
	rhs_norm();

	int niterref;
	double xcrref[5], xceref[5], dtref;
	get_ref_values(benchclass, niterref, dtref, xcrref, xceref);

	//---------------------------------------------------------------------
	//    Compute the difference of solution values and the known reference values.
	//---------------------------------------------------------------------
	double xcrdif[5], xcedif[5];
	for (int m = 0; m < 5; m++) {
		xcrdif[m] = fabs((xcr[m]-xcrref[m])/xcrref[m]);
		xcedif[m] = fabs((xce[m]-xceref[m])/xceref[m]);
	}

	//---------------------------------------------------------------------
	//    Output the comparison of computed results to known cases.
	//---------------------------------------------------------------------
	printf("\n Verification being performed for class %c\n", benchclass);
	printf(" Accuracy setting for epsilon = %20.13E\n", epsilon);
	if (fabs(dt-dtref) > epsilon) {
		verified = false;
		printf(" DT does not match the reference value of %15.8E\n", dtref);
	} else if (niter != niterref) {
		verified = false;
		printf(" NITER does not match the reference value of %5d\n", niterref);
	}

	printf(" Comparison of RMS-norms of residual\n");
	for (int m = 0; m < 5; m++) {
		if (xcrdif[m] <= epsilon) printf("          %2d  %20.13E%20.13E%20.13E\n", m+1, xcr[m], xcrref[m], xcrdif[m]);
		else {
			verified = false;
			printf(" FAILURE: %2d  %20.13E%20.13E%20.13E\n", m+1, xcr[m], xcrref[m], xcrdif[m]);
		}
	}

	printf(" Comparison of RMS-norms of solution error\n");
	for (int m = 0; m < 5; m++) {
		if (xcedif[m] <= epsilon) printf("          %2d  %20.13E%20.13E%20.13E\n", m+1, xce[m], xceref[m], xcedif[m]);
		else {
			verified = false;
			printf(" FAILURE: %2d  %20.13E%20.13E%20.13E\n", m+1, xce[m], xceref[m], xcedif[m]);
		}
	}

	if (verified) printf(" Verification Successful\n");
	else printf(" Verification failed\n");

	return verified;
}

void BTMZ::print_results(const bool verified, const char benchclass) {

	printf("\n\n BT-MZ Benchmark Completed.\n");
	printf(" Class           =             %12c\n", benchclass);
	printf(" Size            =           %4dx%4dx%4d\n", gx_size, gy_size, gz_size);
	printf(" Iterations      =             %12d\n", niter);
	printf(" Time in seconds =             %12.2f\n", maxtime);

	double mflops = compute_mflops();
	printf(" Mop/s total     =             %12.2f\n", mflops);
	printf(" Operation type  =           floating point\n");
	if (verified) printf(" Verification    =               SUCCESSFUL\n");
	else printf(" Verification    =             UNSUCCESSFUL\n");

	printf(" Version         =             %12s\n", NPB_VERSION);

	printf("\n");
	printf(" CUDA device     = %24s\n", CUDAname);
	printf(" GPU multiprocs  =             %12d\n", CUDAmp);
	printf(" GPU clock rate  =             %8.3f GHz\n", (double)CUDAclock/1000000.);
	printf(" GPU memory      =             %9.2f MB\n", (double)CUDAmem/(1024.*1024.));
	printf(" GPU mem clock   =             %8.3f GHz\n", (double)CUDAmemclock/1000000.);
	printf(" GPU L2 cache    =             %9.2f KB\n", (double)CUDAl2cache/1024.);

	printf("\n\n");
}

double BTMZ::compute_mflops() const {
	double mflops = 0.0;

	if (maxtime > 0.0) {
		for (int zone = 0; zone < num_zones; zone++) {
			double n3 = (double)(nx[zone]*ny[zone]*nz[zone]);
			double navg = (nx[zone]+ny[zone]+nz[zone])/3.0;
			double nsur = (nx[zone]*ny[zone]+nx[zone]*nz[zone]+ny[zone]*nz[zone])/3.0;
			mflops += (double)niter*1.0e-6*(3478.8*n3-17655.7*nsur+28023.7*navg)/maxtime;
		}
	}

	return mflops;
}

void BTMZ::zone_setup(char benchclass) {
	double ratio = 1.0;
	switch (benchclass) {
		case 's':
		case 'S': gx_size = gy_size = 24; gz_size = 6; x_zones = y_zones = 2; ratio = 3.0; dt = 0.01; niter = 60; break;
		case 'w':
		case 'W': gx_size = gy_size = 64; gz_size = 8; x_zones = y_zones = 4; ratio = 4.5; dt = 0.0008; niter = 200; break;
		case 'a':
		case 'A': gx_size = gy_size = 128; gz_size = 16; x_zones = y_zones = 4; ratio = 4.5; dt = 0.0008; niter = 200; break;
		case 'b':
		case 'B': gx_size = 304; gy_size = 208; gz_size = 17; x_zones = y_zones = 8; ratio = 4.5; dt = 0.0003; niter = 200; break;
		case 'c':
		case 'C': gx_size = 480; gy_size = 320; gz_size = 28; x_zones = y_zones = 16; ratio = 4.5; dt = 0.0001; niter = 200; break;
		case 'd':
		case 'D': gx_size = 1632; gy_size = 1216; gz_size = 34; x_zones = y_zones = 32; ratio = 4.5; dt = 0.00002; niter = 250; break;
		case 'e':
		case 'E': gx_size = 4224; gy_size = 3456; gz_size = 92; x_zones = y_zones = 64; ratio = 4.5; dt = 0.000004; niter = 250; break;
		case 'f':
		case 'F': gx_size = 12032; gy_size = 8960; gz_size = 250; x_zones = y_zones = 128; ratio = 4.5; dt = 0.000001; niter = 250; break;
		// class K corresponds to BT benchmark class S
		case 'k':
		case 'K': gx_size = gy_size = gz_size = 12; x_zones = y_zones = 1; ratio = 1.0; dt = 0.010; niter = 60; break;
		// class L corresponds to BT benchmark class W
		case 'l':
		case 'L': gx_size = gy_size = gz_size = 24; x_zones = y_zones = 1; ratio = 1.0; dt = 0.0008; niter = 200; break;
		// class M corresponds to BT benchmark class A
		case 'm':
		case 'M': gx_size = gy_size = gz_size = 64; x_zones = y_zones = 1; ratio = 1.0; dt = 0.0008; niter = 200; break;
		// class N corresponds to BT benchmark class B
		case 'n':
		case 'N': gx_size = gy_size = gz_size = 102; x_zones = y_zones = 1; ratio = 1.0; dt = 0.0003; niter = 200; break;
		// class O corresponds to BT benchmark class C
		case 'o':
		case 'O': gx_size = gy_size = gz_size = 162; x_zones = y_zones = 1; ratio = 1.0; dt = 0.0001; niter = 200; break;
		// class P corresponds to BT benchmark class D
		case 'p':
		case 'P': gx_size = gy_size = gz_size = 408; x_zones = y_zones = 1; ratio = 1.0; dt = 0.00002; niter = 250; break;
		// class Q corresponds to BT benchmark class E
		case 'q':
		case 'Q': gx_size = gy_size = gz_size = 1020; x_zones = y_zones = 1; ratio = 1.0; dt = 0.4e-5; niter = 250; break;
		default: printf("setparams: Internal error: invalid class %c\n", benchclass); exit(EXIT_FAILURE);
	}

	int *x_end = new int [x_zones];
	int *y_end = new int [y_zones];
	int *x_start = new int [x_zones];
	int *y_start = new int [y_zones];
	int *x_size = new int [x_zones];
	int *y_size = new int [y_zones];

	if (fabs(ratio-1.0) > 1e-10) {
		// compute zone stretching only if the prescribed zone size ratio is substantially larger than unity
		double x_r = exp(log(ratio)/(x_zones-1));
		double y_r = exp(log(ratio)/(y_zones-1));
		double x_smallest = ((double)gx_size)*(x_r-1.0)/(pow(x_r,x_zones)-1.0);
		double y_smallest = ((double)gy_size)*(y_r-1.0)/(pow(y_r,y_zones)-1.0);
		// compute tops of intervals, using a slightly tricked rounding to make sure that the intervals are increasing monotonically in size
		for (int i = 0; i < x_zones; i++) x_end[i] = x_smallest*(pow(x_r,i+1)-1.0)/(x_r-1.0)+.45;
		for (int j = 0; j < y_zones; j++) y_end[j] = y_smallest*(pow(y_r,j+1)-1.0)/(y_r-1.0)+.45;
	} else {
		// compute essentially equal sized zone dimensions
		for (int i = 0; i < x_zones; i++) x_end[i] = ((i+1)*gx_size)/x_zones;
		for (int j = 0; j < y_zones; j++) y_end[j] = ((j+1)*gy_size)/y_zones;
	}

	x_start[0] = 1;
	for (int i = 0; i < x_zones; i++) {
		if (i < x_zones-1) x_start[i+1] = x_end[i]+1;
		x_size[i] = x_end[i]-x_start[i]+1;
	}
	y_start[0] = 1;
	for (int j = 0; j < y_zones; j++) {
		if (j < y_zones-1) y_start[j+1] = y_end[j]+1;
		y_size[j] = y_end[j]-y_start[j]+1;
	}

	num_zones = x_zones * y_zones;
	nx = new int [num_zones];
	ny = new int [num_zones];
	nz = new int [num_zones];
	iz_west = new int [num_zones];
	iz_east = new int [num_zones];
	iz_south = new int [num_zones];
	iz_north = new int [num_zones];

	if (npb_verbose > 1) printf("\n Zone sizes:\n");
	for (int y = 0; y < y_zones; y++) {
		for (int x = 0; x < x_zones; x++) {
			int zone_no = x+y*x_zones;
			nx[zone_no] = x_size[x];
			ny[zone_no] = y_size[y];
			nz[zone_no] = gz_size;

			int id_west = (x-1+x_zones) % x_zones;
			int id_east = (x+1) % x_zones;
			int jd_south = (y-1+y_zones) % y_zones;
			int jd_north = (y+1) % y_zones;
			iz_west[zone_no] = id_west + y*x_zones;
			iz_east[zone_no] = id_east + y*x_zones;
			iz_south[zone_no] = x + jd_south*x_zones;
			iz_north[zone_no] = x + jd_north*x_zones;

			if (npb_verbose > 1) printf("%5i:  %5i  x%5i  x%5i\n", zone_no+1, nx[zone_no], ny[zone_no], nz[zone_no]);
		}
	}

	delete [] x_end; 
	delete [] y_end; 
	delete [] x_start; 
	delete [] y_start; 
	delete [] x_size; 
	delete [] y_size;
}

void BTMZ::get_ref_values(const char benchclass, int &niterref, double &dtref, double (&xcrref)[5], double (&xceref)[5]) const {
	for (int m = 0; m < 5; m++) xcrref[m] = xceref[m] = 1.0;
	niterref = 0;
	dtref = 0.0;

	if (benchclass == 'S' || benchclass == 's') {
		//---------------------------------------------------------------------
		//    reference data for class S
		//---------------------------------------------------------------------
		dtref = 1.0e-2;
		niterref = 60;
		//---------------------------------------------------------------------
		//  Reference values of RMS-norms of residual.
		//---------------------------------------------------------------------
		xcrref[0] = 0.1047687395830e+04;
		xcrref[1] = 0.9419911314792e+02;
		xcrref[2] = 0.2124737403068e+03;
		xcrref[3] = 0.1422173591794e+03;
		xcrref[4] = 0.1135441572375e+04;
		//---------------------------------------------------------------------
		//  Reference values of RMS-norms of solution error.
		//---------------------------------------------------------------------
		xceref[0] = 0.1775416062982e+03;
		xceref[1] = 0.1875540250835e+02;
		xceref[2] = 0.3863334844506e+02;
		xceref[3] = 0.2634713890362e+02;
		xceref[4] = 0.1965566269675e+03;
	} else if (benchclass == 'W' || benchclass == 'w') {
		//---------------------------------------------------------------------
		//    reference data for class W
		//---------------------------------------------------------------------
		dtref = 0.8e-3;
		niterref = 200;
		//---------------------------------------------------------------------
		//  Reference values of RMS-norms of residual.
		//---------------------------------------------------------------------
		xcrref[0] = 0.5562611195402e+05;
		xcrref[1] = 0.5151404119932e+04;
		xcrref[2] = 0.1080453907954e+05;
		xcrref[3] = 0.6576058591929e+04;
		xcrref[4] = 0.4528609293561e+05;
		//---------------------------------------------------------------------
		//  Reference values of RMS-norms of solution error.
		//---------------------------------------------------------------------
		xceref[0] = 0.7185154786403e+04;
		xceref[1] = 0.7040472738068e+03;
		xceref[2] = 0.1437035074443e+04;
		xceref[3] = 0.8570666307849e+03;
		xceref[4] = 0.5991235147368e+04;
	} else if (benchclass == 'A' || benchclass == 'a') {
		//---------------------------------------------------------------------
		//    reference data for class A
		//---------------------------------------------------------------------
		dtref = 0.8e-3;
		niterref = 200;
		//---------------------------------------------------------------------
		//  Reference values of RMS-norms of residual.
		//---------------------------------------------------------------------
		xcrref[0] = 0.5536703889522e+05;
		xcrref[1] = 0.5077835038405e+04;
		xcrref[2] = 0.1067391361067e+05;
		xcrref[3] = 0.6441179694972e+04;
		xcrref[4] = 0.4371926324069e+05;
		//---------------------------------------------------------------------
		//  Reference values of RMS-norms of solution error.
		//---------------------------------------------------------------------
		xceref[0] = 0.6716797714343e+04;
		xceref[1] = 0.6512687902160e+03;
		xceref[2] = 0.1332930740128e+04;
		xceref[3] = 0.7848302089180e+03;
		xceref[4] = 0.5429053878818e+04;
	} else if (benchclass == 'B' || benchclass == 'b') {
		//---------------------------------------------------------------------
		//    reference data for class B
		//---------------------------------------------------------------------
		dtref = 3.0e-4;
		niterref = 200;
		//---------------------------------------------------------------------
		//  Reference values of RMS-norms of residual.
		//---------------------------------------------------------------------
		xcrref[0] = 0.4461388343844e+06;
		xcrref[1] = 0.3799759138035e+05;
		xcrref[2] = 0.8383296623970e+05;
		xcrref[3] = 0.5301970201273e+05;
		xcrref[4] = 0.3618106851311e+06;
		//---------------------------------------------------------------------
		//  Reference values of RMS-norms of solution error.
		//---------------------------------------------------------------------
		xceref[0] = 0.4496733567600e+05;
		xceref[1] = 0.3892068540524e+04;
		xceref[2] = 0.8763825844217e+04;
		xceref[3] = 0.5599040091792e+04;
		xceref[4] = 0.4082652045598e+05;
	} else if (benchclass == 'C' || benchclass == 'c') {
		//---------------------------------------------------------------------
		//    reference data class C
		//---------------------------------------------------------------------
		dtref = 1.0e-4;
		niterref = 200;
		//---------------------------------------------------------------------
		//  Reference values of RMS-norms of residual.
		//---------------------------------------------------------------------
		xcrref[0] = 0.3457703287806e+07;
		xcrref[1] = 0.3213621375929e+06;
		xcrref[2] = 0.7002579656870e+06;
		xcrref[3] = 0.4517459627471e+06;
		xcrref[4] = 0.2818715870791e+07;
		//---------------------------------------------------------------------
		//  Reference values of RMS-norms of solution error.
		//---------------------------------------------------------------------
		xceref[0] = 0.2059106993570e+06;
		xceref[1] = 0.1680761129461e+05;
		xceref[2] = 0.4080731640795e+05;
		xceref[3] = 0.2836541076778e+05;
		xceref[4] = 0.2136807610771e+06;
	} else if (benchclass == 'D' || benchclass == 'd') {
		//---------------------------------------------------------------------
		//    reference data class D
		//---------------------------------------------------------------------
		dtref = 2.0e-5;
		niterref = 250;
		//---------------------------------------------------------------------
		//  Reference values of RMS-norms of residual.
		//---------------------------------------------------------------------
		xcrref[0] = 0.4250417034981e+08;
		xcrref[1] = 0.4293882192175e+07;
		xcrref[2] = 0.9121841878270e+07;
		xcrref[3] = 0.6201357771439e+07;
		xcrref[4] = 0.3474801891304e+08;
		//---------------------------------------------------------------------
		//  Reference values of RMS-norms of solution error.
		//---------------------------------------------------------------------
		xceref[0] = 0.9462418484583e+06;
		xceref[1] = 0.7884728947105e+05;
		xceref[2] = 0.1902874461259e+06;
		xceref[3] = 0.1361858029909e+06;
		xceref[4] = 0.9816489456253e+06;
	} else if (benchclass == 'E' || benchclass == 'e') {
		//---------------------------------------------------------------------
		//    reference data class E
		//---------------------------------------------------------------------
		dtref = 4.0e-6;
		niterref = 250;
		//---------------------------------------------------------------------
		//  Reference values of RMS-norms of residual.
		//---------------------------------------------------------------------
		xcrref[0] = 0.5744815962469e+09;
		xcrref[1] = 0.6088696479719e+08;
		xcrref[2] = 0.1276325224438e+09;
		xcrref[3] = 0.8947040105616e+08;
		xcrref[4] = 0.4726115284807e+09;
		//---------------------------------------------------------------------
		//  Reference values of RMS-norms of solution error.
		//---------------------------------------------------------------------
		xceref[0] = 0.4114447054461e+07;
		xceref[1] = 0.3570776728190e+06;
		xceref[2] = 0.8465106191458e+06;
		xceref[3] = 0.6147182273817e+06;
		xceref[4] = 0.4238908025163e+07;
	} else if (benchclass == 'F' || benchclass == 'f') {
		//---------------------------------------------------------------------
		//    reference data class F
		//---------------------------------------------------------------------
		dtref = 1.0e-6;
		niterref = 250;
		//---------------------------------------------------------------------
		//  Reference values of RMS-norms of residual.
		//---------------------------------------------------------------------
		xcrref[0] = 0.6524078317845e+10;
		xcrref[1] = 0.7020439279514e+09;
		xcrref[2] = 0.1467588422194e+10;
		xcrref[3] = 0.1042973064137e+10;
		xcrref[4] = 0.5411102201141e+10;
		//---------------------------------------------------------------------
		//  Reference values of RMS-norms of solution error.
		//---------------------------------------------------------------------
		xceref[0] = 0.1708795375347e+08;
		xceref[1] = 0.1514359936802e+07;
		xceref[2] = 0.3552878359250e+07;
		xceref[3] = 0.2594549582184e+07;
		xceref[4] = 0.1749809607845e+08;
		
		if (niter == 25) {
			niterref = 25;
			xcrref[0] = 0.3565049484400e+11;
			xcrref[1] = 0.3752029586145e+10;
			xcrref[2] = 0.7805935552197e+10;
			xcrref[3] = 0.5685995438056e+10;
			xcrref[4] = 0.2908811276266e+11;

			xceref[0] = 0.1805995755490e+08;
			xceref[1] = 0.1632306899424e+07;
			xceref[2] = 0.3778610439036e+07;
			xceref[3] = 0.2749319818549e+07;
			xceref[4] = 0.1814401049296e+08;
		}
	} else if (benchclass == 'K' || benchclass == 'k') {
		//---------------------------------------------------------------------
		//    reference data for 12X12X12 grids after 60 time steps, with DT = 1.0d-02
		//---------------------------------------------------------------------
		dtref = 1.0e-2;
		niterref = 60;

		//---------------------------------------------------------------------
		//  Reference values of RMS-norms of residual.
		//---------------------------------------------------------------------
		xcrref[0] = 1.7034283709541311e-01;
		xcrref[1] = 1.2975252070034097e-02;
		xcrref[2] = 3.2527926989486055e-02;
		xcrref[3] = 2.6436421275166801e-02;
		xcrref[4] = 1.9211784131744430e-01;

		//---------------------------------------------------------------------
		//  Reference values of RMS-norms of solution error.
		//---------------------------------------------------------------------
		xceref[0] = 4.9976913345811579e-04;
		xceref[1] = 4.5195666782961927e-05;
		xceref[2] = 7.3973765172921357e-05;
		xceref[3] = 7.3821238632439731e-05;
		xceref[4] = 8.9269630987491446e-04;
	} else if (benchclass == 'L' || benchclass == 'l') {
		//---------------------------------------------------------------------
		//    reference data for 24X24X24 grids after 200 time steps, with DT = 0.8d-3
		//---------------------------------------------------------------------
		dtref = 0.8e-3;
		niterref = 200;

		//---------------------------------------------------------------------
		//  Reference values of RMS-norms of residual.
		//---------------------------------------------------------------------
		xcrref[0] = 0.1125590409344e+03;
		xcrref[1] = 0.1180007595731e+02;
		xcrref[2] = 0.2710329767846e+02;
		xcrref[3] = 0.2469174937669e+02;
		xcrref[4] = 0.2638427874317e+03;

		//---------------------------------------------------------------------
		//  Reference values of RMS-norms of solution error.
		//---------------------------------------------------------------------
		xceref[0] = 0.4419655736008e+01;
		xceref[1] = 0.4638531260002e+00;
		xceref[2] = 0.1011551749967e+01;
		xceref[3] = 0.9235878729944e+00;
		xceref[4] = 0.1018045837718e+02;
	} else if (benchclass == 'M' || benchclass == 'm') {
		//---------------------------------------------------------------------
		//    reference data for 64X64X64 grids after 200 time steps, with DT = 0.8d-3
		//---------------------------------------------------------------------
		dtref = 0.8e-3;
		niterref = 200;

		//---------------------------------------------------------------------
		//  Reference values of RMS-norms of residual.
		//---------------------------------------------------------------------
		xcrref[0] = 1.0806346714637264e+02;
		xcrref[1] = 1.1319730901220813e+01;
		xcrref[2] = 2.5974354511582465e+01;
		xcrref[3] = 2.3665622544678910e+01;
		xcrref[4] = 2.5278963211748344e+02;

		//---------------------------------------------------------------------
		//  Reference values of RMS-norms of solution error.
		//---------------------------------------------------------------------
		xceref[0] = 4.2348416040525025e+00;
		xceref[1] = 4.4390282496995698e-01;
		xceref[2] = 9.6692480136345650e-01;
		xceref[3] = 8.8302063039765474e-01;
		xceref[4] = 9.7379901770829278e+00;
	} else if (benchclass == 'N' || benchclass == 'n') {
		//---------------------------------------------------------------------
		//    reference data for 102X102X102 grids after 200 time steps,
		//    with DT = 3.0d-04
		//---------------------------------------------------------------------
		dtref = 3.0e-4;
		niterref = 200;

		//---------------------------------------------------------------------
		//  Reference values of RMS-norms of residual.
		//---------------------------------------------------------------------
		xcrref[0] = 1.4233597229287254e+03;
		xcrref[1] = 9.9330522590150238e+01;
		xcrref[2] = 3.5646025644535285e+02;
		xcrref[3] = 3.2485447959084092e+02;
		xcrref[4] = 3.2707541254659363e+03;

		//---------------------------------------------------------------------
		//  Reference values of RMS-norms of solution error.
		//---------------------------------------------------------------------
		xceref[0] = 5.2969847140936856e+01;
		xceref[1] = 4.4632896115670668e+00;
		xceref[2] = 1.3122573342210174e+01;
		xceref[3] = 1.2006925323559144e+01;
		xceref[4] = 1.2459576151035986e+02;
	} else if (benchclass == 'O' || benchclass == 'o') {
		//---------------------------------------------------------------------
		//    reference data for 162X162X162 grids after 200 time steps,
		//    with DT = 1.0d-04
		//---------------------------------------------------------------------
		dtref = 1.0e-4;
		niterref = 200;

		//---------------------------------------------------------------------
		//  Reference values of RMS-norms of residual.
		//---------------------------------------------------------------------
		xcrref[0] = 0.62398116551764615e+04;
		xcrref[1] = 0.50793239190423964e+03;
		xcrref[2] = 0.15423530093013596e+04;
		xcrref[3] = 0.13302387929291190e+04;
		xcrref[4] = 0.11604087428436455e+05;

		//---------------------------------------------------------------------
		//  Reference values of RMS-norms of solution error.
		//---------------------------------------------------------------------
		xceref[0] = 0.16462008369091265e+03;
		xceref[1] = 0.11497107903824313e+02;
		xceref[2] = 0.41207446207461508e+02;
		xceref[3] = 0.37087651059694167e+02;
		xceref[4] = 0.36211053051841265e+03;
	} else if (benchclass == 'P' || benchclass == 'p') {
		//---------------------------------------------------------------------
		//    reference data for 408x408x408 grids after 250 time steps,
		//    with DT = 0.2d-04
		//---------------------------------------------------------------------
		dtref = 0.2e-4;
		niterref = 250;

		//---------------------------------------------------------------------
		//  Reference values of RMS-norms of residual.
		//---------------------------------------------------------------------
		xcrref[0] = 0.2533188551738e+05;
		xcrref[1] = 0.2346393716980e+04;
		xcrref[2] = 0.6294554366904e+04;
		xcrref[3] = 0.5352565376030e+04;
		xcrref[4] = 0.3905864038618e+05;

		//---------------------------------------------------------------------
		//  Reference values of RMS-norms of solution error.
		//---------------------------------------------------------------------
		xceref[0] = 0.3100009377557e+03;
		xceref[1] = 0.2424086324913e+02;
		xceref[2] = 0.7782212022645e+02;
		xceref[3] = 0.6835623860116e+02;
		xceref[4] = 0.6065737200368e+03;
	} else if (benchclass == 'Q' || benchclass == 'q') {
		//---------------------------------------------------------------------
		//    reference data for 1020x1020x1020 grids after 250 time steps,
		//    with DT = 0.4d-05
		//---------------------------------------------------------------------
		dtref = 0.4e-5;
		niterref = 250;

		//---------------------------------------------------------------------
		//  Reference values of RMS-norms of residual.
		//---------------------------------------------------------------------
		xcrref[0] = 0.9795372484517e+05;
		xcrref[1] = 0.9739814511521e+04;
		xcrref[2] = 0.2467606342965e+05;
		xcrref[3] = 0.2092419572860e+05;
		xcrref[4] = 0.1392138856939e+06;

		//---------------------------------------------------------------------
		//  Reference values of RMS-norms of solution error.
		//---------------------------------------------------------------------
		xceref[0] = 0.4327562208414e+03;
		xceref[1] = 0.3699051964887e+02;
		xceref[2] = 0.1089845040954e+03;
		xceref[3] = 0.9462517622043e+02;
		xceref[4] = 0.7765512765309e+03;
	}
}

void BTMZ::exact_rhs () {
	for (int zone = 0; zone < num_zones; zone++)
		exact_rhs(forcing[zone], nx[zone], ny[zone], nz[zone]);
}

void BTMZ::initialize() {
	for (int zone = 0; zone < num_zones; zone++)
		initialize(u[zone], nx[zone], ny[zone], nz[zone]);
}

void BTMZ::error_norm() {
	double xce_sub[5];
	for (int m = 0; m < 5; m++) xce[m] = 0.0;
	for (int zone = 0; zone < num_zones; zone++) {
		error_norm(xce_sub, u[zone], nx[zone], ny[zone], nz[zone]);
		for (int m = 0; m < 5; m++) xce[m] += xce_sub[m];
	}
}

void BTMZ::rhs_norm() {
	double xcr_sub[5];
	for (int m = 0; m < 5; m++) xcr[m] = 0.0;
	for (int zone = 0; zone < num_zones; zone++) {
		compute_rhs(rho_i, us, vs, ws, qs, square, rhs, forcing[zone], u[zone], nx[zone], ny[zone], nz[zone]);
		rhs_norm(xcr_sub, rhs, nx[zone], ny[zone], nz[zone]);
		for (int m = 0; m < 5; m++) xcr[m] += xcr_sub[m]/dt;
	}
}

void BTMZ::adi() {
	for (int zone = 0; zone < num_zones; zone++)
		adi(rho_i, us, vs, ws, qs, square, rhs, lhs, forcing[zone], u[zone], nx[zone], ny[zone], nz[zone]);
}
