#include "copy.h"
int main(int argc, char *argv[])
{
	int i, j, nreps, ntoks, set, dset1, srt, v[20], sn[20] = {10, 11, 12, 20, 21, 22, 30, 31, 32, -1};
	string VP = NULL, tok[20];
	string dnam[] = {"set1_nodup", "set2_nodup", "set3_nodup", 
		 "set4_nodup", "set5_nodup", "set6_nodup", "set1_genome", 
		 "set2_genome", "set3_genome", "set4_genome", "set5_genome", 
		 "set6_genome", "set1_random", "set2_random", "set3_random", 
		 "set4_random", "set5_random", "set6_random", "set1_url", 
		 "set2_url", "set3_url", "set4_url", "set5_url"};
	
    INPUTDIR = "data/";
    OUTPUTDIR = "sort-output/";
	MAXKEYS = 100000000;	
	MAXBYTES = 700000000;	
	CACHESIZE = 1<<19;
	dset1 = 0;				
	INPUTORDER = 0;			
	NSEGS0 = 50;			
	SEGSIZE0 = 1<<15;		
	BINSIZE0 = 1<<9;			
	SAMPLERATE0 = 4000;		
	FREEBURSTS0 = 100;		
	TAILSIZE0 = 12;		    
	TAILRATE = 80;			
	nreps = 1;				
	
	for (i = 1; i < argc; ++i)
	{
		
		ntoks = tokenize(argv[i], tok, ',', '=');
		if		  (strcmp(tok[0], "ds") == 0) {sscanf(tok[1], "%d", &dset1);}
		else if (strcmp(tok[0], "nr") == 0) {sscanf(tok[1], "%d", &nreps);}
		else if (strcmp(tok[0], "cs") == 0) {sscanf(tok[1], "%d", &CACHESIZE);}
		else if (strcmp(tok[0], "wr") == 0) {sscanf(tok[1], "%d", &WRITEFILE);}
		else if (strcmp(tok[0], "fb") == 0) {sscanf(tok[1], "%d", &FREEBURSTS0);}
		else if (strcmp(tok[0], "mb") == 0) {sscanf(tok[1], "%d", &MAXBYTES);}
		else if (strcmp(tok[0], "mk") == 0) {sscanf(tok[1], "%d", &MAXKEYS);}
		else if (strcmp(tok[0], "g0") == 0) {sscanf(tok[1], "%d", &NSEGS0);}
		else if (strcmp(tok[0], "or") == 0) {sscanf(tok[1], "%d", &INPUTORDER);}
		else if (strcmp(tok[0], "sr") == 0) {sscanf(tok[1], "%d", &SAMPLERATE0);}
		else if (strcmp(tok[0], "s0") == 0) {sscanf(tok[1], "%d", &BINSIZE0);}
		else if (strcmp(tok[0], "tf") == 0) {sscanf(tok[1], "%d", &TAILRATE);}
		else if (strcmp(tok[0], "tl") == 0) {sscanf(tok[1], "%d", &TAILSIZE0);}
		else if (strcmp(tok[0], "sn") == 0)
			
			{
				 for (srt = 0; srt < ntoks - 1; ++srt)
					  sscanf(tok[srt + 1], "%d", &sn[srt]);
				 sn[srt] = -1;
			}
			
		else if (strcmp(tok[0], "vl") == 0)
			
			{
				 VP = tok[1];
				 for(j = 2; j < ntoks; ++j)
					  sscanf(tok[j], "%d", &v[j-2]); v[ntoks-2] = -1;
			}
		else {showhelp(); return(0);}
	}
	
	sayln("DS SORTNAME CACHESIZE FREEBURSTS . NKEYS . Ti Tg Tb Tt . Tmed Tmin Tnorm ");
	
	for (set = dset1; set <= dset1; ++set)
	{
		DATANAME = dnam[set]; 
		
		
		for (srt = 0; sn[srt] >= 0; ++srt)
		{
			
			if (VP == NULL) {setsort(sn[srt]); doreps(nreps);}
				else
				{
					
					
						if (strcmp(VP, "cs") == 0) CACHESIZE = v[i];
						else if (strcmp(VP, "fb") == 0) FREEBURSTS0 = v[i];
						else if (strcmp(VP, "mb") == 0) MAXBYTES = v[i];
						else if (strcmp(VP, "mk") == 0) MAXKEYS = v[i];
						else if (strcmp(VP, "g0") == 0) NSEGS0 = v[i];
						else if (strcmp(VP, "or") == 0) INPUTORDER = v[i];
						else if (strcmp(VP, "sr") == 0) SAMPLERATE0 = v[i];
						else if (strcmp(VP, "s0") == 0) BINSIZE0 = v[i];
						else if (strcmp(VP, "tf") == 0) TAILRATE = v[i];
						else if (strcmp(VP, "tl") == 0) TAILSIZE0 = v[i];
						setsort(sn[srt]); 
						doreps(nreps);
					
				}
		} 
		NBYTES = MAXALLOCATED = ALLOCATED = 0;
	} 
	return(0);
} 
void showhelp()
{
	sayln(" ??\n");
	sayln("Available arguments (use in any order) are:\n");
	sayln("	nr		# of repeats to run			ds	  first data set to run");
	sayln("	cs		cache size(bytes)			s0	  initial container size");
	sayln("	fb		# of free bursts			g0	  # of buffer segments");
	sayln("	mk		maximum keys				mb	  maximum bytes");
	sayln("	or		order of input data			sr	  sampling rate");
	sayln("	tl		tail length for CPL-bs		tf	  tail factor for CPL-bs");
	sayln("	wr		output sorted strings		sn    sort variant(s) to run");
	sayln("Arguments are separated by white space and may not contain white space. Each argument must");
	sayln("start with a tag followed by '=' or ', ' and one or more numbers or additional tags.");
	sayln("Tag 'sn' takes a list of sort numbers.  Tag 'vl' takes a second tag, followed by a list");
	sayln("of up to 20 values for the argument specified by the second tag.\n");
	sayln("Available sorts are:\n");
	sayln("	 10	C-bs		Copying burstsort");
	sayln("	 11	Cf-bs		Same + free bursts set by arg FREEBURSTS");
	sayln("	 12	Cs-bs		Same + sampling set by arg SAMPLERATE");
	sayln("	 20	CP-bs		Record burstsort");
	sayln("	 21	CPf-bs		Same + free bursts set by arg FREEBURSTS");
	sayln("	 22	CPs-bs		Same + sampling set by arg SAMPLERATE");
	sayln("	 30	CPL-bs		Paging burstsort");
	sayln("	 31	CPLf-bs		Same + free bursts set by arg FREEBURSTS");
	sayln("	 32	CPLs-bs		Same + sampling set by arg SAMPLERATE");
	sayln("Available data sets are:\n");
	sayln("	 0	Set 1 of No Duplicates (100, 000 strings)");
	sayln("	 1	Set 2 of No Duplicates (316, 230 strings)");
	sayln("	 2	Set 3 of No Duplicates (1, 000, 000 strings)");
	sayln("	 3	Set 4 of No Duplicates (3, 162, 300 strings)");
	sayln("	 4	Set 5 of No Duplicates (10, 000, 000 strings)");
	sayln("	 5	Set 6 of No Duplicates (31, 623, 000 strings)\n");
	sayln("	 6	Set 1 of Genome (100, 000 strings)");
	sayln("	 7	Set 2 of Genome (316, 230 strings)");
	sayln("	 8	Set 3 of Genome (1, 000, 000 strings)");
	sayln("	 9	Set 4 of Genome (3, 162, 300 strings)");
	sayln("	 10 Set 5 of Genome (10, 000, 000 strings)");
	sayln("	 11 Set 6 of Genome (31, 623, 000 strings)\n");
	sayln("	 12	 Set 1 of Random (100, 000 strings)");
	sayln("	 13	 Set 2 of Random (316, 230 strings)");
	sayln("	 14	 Set 3 of Random (1, 000, 000 strings)");
	sayln("	 15	 Set 4 of Random (3, 162, 300 strings)");
	sayln("	 16	 Set 5 of Random (10, 000, 000 strings)");
	sayln("	 17	 Set 6 of Random (31, 623, 000 strings)\n");
	sayln("	 18	 Set 1 of URL (100, 000 strings)");
	sayln("	 19	 Set 2 of URL (316, 230 strings)");
	sayln("	 20	 Set 3 of URL (1, 000, 000 strings)");
	sayln("	 21	 Set 4 of URL (3, 162, 300 strings)");
	sayln("	 22	 Set 5 of URL (10, 000, 000 strings)\n");
	sayln("	 23	Set 3 of Random Strings, each of length 100 characters\n");
	sayln("Usage: sn=10, 11, 20 ds=5 nr=1 will run sorts 10, 11 and 20 once on dataset 5.\n\n");
} 
void setsort(int sn)
{
	FREEBURSTS = SAMPLERATE = 0; 
	
	switch(sn)
	{
		case 10: TEST = sbdo; SORTNAME = "C-burstsort"; 
					break;
		case 11: TEST = sbdo; SORTNAME = "fbC-burstsort"; 
					FREEBURSTS = FREEBURSTS0; break;
		case 12: TEST = sbdo; SORTNAME = "sC-burstsort"; 
					SAMPLERATE = SAMPLERATE0; break;
		case 20: TEST = rbdo; SORTNAME = "CP-burstsort"; 
					break;
		case 21: TEST = rbdo; SORTNAME = "fbCP-burstsort"; 
					FREEBURSTS = FREEBURSTS0; break;
		case 22: TEST = rbdo; SORTNAME = "sCP-burstsort"; 
					SAMPLERATE = SAMPLERATE0; break;
		case 30: TEST = pbdo; SORTNAME = "CPL-burstsort"; 
					break;
		case 31: TEST = pbdo; SORTNAME = "fbCPL-burstsort"; 
					FREEBURSTS = FREEBURSTS0; break;
		case 32: TEST = pbdo; SORTNAME = "sCPL-burstsort"; 
					SAMPLERATE = SAMPLERATE0; break;
	}
}
void listinputs()
{
	 if (!REP)
	 {
		  say(DATANAME); say(SORTNAME); isay(CACHESIZE); 
		  isay(FREEBURSTS); dot();
	 }
}
void doreps(int nr)
{
	double rt, pt, st, it, gt, bt, ct, ht , kt, wt, ist, xst, mo, mbs;
	
	treset(MAXTMRS, nr);
	rt = pt = st = it = gt = bt = ct = ht = 
	kt = wt = ist = xst = mo = mbs = 0;
	
	if (INPUTORDER == 0)
		 FILETYPE = "dat";
	else if (INPUTORDER == 1)
		 FILETYPE = "srt";
	else
		 FILETYPE = "rev";
	
	
	TEST(nr, fp(INPUTDIR, DATANAME, FILETYPE));
	isay(NKEYS); dot();
	
	rt = tmed(tr); pt = tmed(tx); st = tmed(ts); it = tmed(ti); gt = tmed(tg);
	bt = tmed(tb); ct = tmed(tc); ht = tmed(th); kt = tmed(tk); wt = tmed(tw);
	ist = pt+st+it+gt+bt+ct; xst = ist+rt+wt;
	
	mo = DR(MAXALLOCATED, NBYTES); 
	
	
	mbs = DR(NBYTES, DR(ist, 1000));
	
	tmin(nr);
	
	dsay(it, 0); dsay(gt, 0); dsay(bt, 0); dsay(ct, 0); dot();
	dsay(ist, 0); tminsay(); tnormsay(); cr();
}
void getsegs(string filepath)
{
    FILE *f; string s, t; int c, delim, seg, seglen, nread, n, keylen, fbytes;
    if ((f = fopen(filepath, "rb")) == NULL)
    {
        say("couldn't open"); brp(filepath);
    }
    
    
    fseek(f, 0, SEEK_END); fbytes = ftell(f); rewind(f);
    
    
    if (MAXBYTES > fbytes || MAXBYTES == 0) MAXBYTES = fbytes;
    
    
    SEGSIZE = MAXBYTES / NSEGS0;
    SEGSIZE -= (SEGSIZE % BUFSIZ);
    while (SEGSIZE < BINSIZE0) SEGSIZE *= 2;
    
    NSEGS = MAXBYTES / SEGSIZE; if (MAXBYTES % SEGSIZE) ++NSEGS;
    
    
    SEGMENTS = (string*) CALLOC(NSEGS, sizeof(string));
    SEGLIMITS = (string*) CALLOC(NSEGS, sizeof(string));
   
    
    memset((char*) CHARCOUNT, 0, 256 * sizeof(int));
    
    NBYTES = NKEYS = MAXKEYLEN = 0;
    
    
    for (seg = 0; seg < NSEGS; ++seg) {
        s = SEGMENTS[seg] = (char*) MALLOC(SEGSIZE);
        
        
        nread=MIN(SEGSIZE, MAXBYTES - NBYTES);
        nread=fread(SEGMENTS[seg], 1, nread, f);
        
        
        if (seg == 0)
        {
            t = s; while ((c = *t) != '\n' && c != '\r') ++t; delim = c;
        }
        
        
        n = keylen = seglen = 0;
        
        while (n < nread) {
            if ((c = s[n]) == delim) {
                
                s[n++] = 0; seglen = n; ++NKEYS;
                
                if (++keylen > MAXKEYLEN) MAXKEYLEN = keylen; keylen = 0;
            } 
            else 
            {
                
                if (c < 32 || c == 127) s[n] = c = 46;
                CHARCOUNT[c] = 1; ++keylen; ++n;
            }                       
        }
            
        SEGLIMITS[seg] = s += seglen; NBYTES += seglen;
        
        
        fseek(f, NBYTES, SEEK_SET);
    }
    
    LIMSIZE0 = BINSIZE0 - MAXKEYLEN;
    
    LOCHAR = 32; while (!CHARCOUNT[LOCHAR]) ++LOCHAR;
    HICHAR = 255; while (!CHARCOUNT[HICHAR]) --HICHAR;
    
} 
void getkeys(string filepath)
{
	FILE *f; string s, lim; int c, delim, keylen;
	if ((f = fopen(filepath, "rb")) == NULL) 
		{say("couldn't open"); brp(filepath);} 
	
	
	INBUF = (char *) MALLOC(MAXBYTES + 2); 
	*INBUF = 0; ++INBUF;
	
	MAXBYTES=fread(INBUF, 1, MAXBYTES, f);
	fclose(f);
	
	s = INBUF;
	while ((c = *s) != '\n' && c != '\r') ++s; 
	delim = c; 
	
	
	
	
	
		lim = INBUF + MAXBYTES; while (*(lim - 1) != delim) --lim;
	
	
	memset((char*) CHARCOUNT, 0, 256*sizeof(int));
	
	NBYTES = NKEYS = MAXKEYLEN = 0;
	
	s = INBUF;
	while (s<lim && NKEYS<MAXKEYS)
	{
		keylen = 0;
		while ((c = *s) != delim)
		{
			 
			 if (c < 32 || c == 127) *s = c = 46; 
			 CHARCOUNT[c] = 1; ++keylen; ++s;
		}
		*s++ = 0; ++NKEYS;
		
		
		if (++keylen > MAXKEYLEN) MAXKEYLEN = keylen;
	}
	
	*s = 0; 
	
	NBYTES = s - INBUF; 
	
	
	LIMSIZE0 = BINSIZE0 - MAXKEYLEN; 
	
	
	LOCHAR = 32; while (!CHARCOUNT[LOCHAR]) ++LOCHAR; 
	HICHAR = 255; while (!CHARCOUNT[HICHAR]) --HICHAR;
}