http://dx.doi.org/10.1093/ietfec/e90-a.2.457
<[email protected]>
namespace ng_cradix_rantala {
typedef size_t UINT;
typedef unsigned char BYTE, *LPBYTE, **LPPBYTE;
typedef unsigned char STR, *LPSTR, **LPPSTR, **STRPARR;
static const UINT AS = 256;      
static const UINT BS = 4;        
static const UINT AL = 0;        
static const UINT AH = 255;      
static const UINT IC = 20;       
static const UINT KBC = 128;     
static const UINT SS = 4096;     
#define push(a, k, n, b) _sp->sa=a, _sp->sk=k, _sp->sn=n, (_sp++)->sb=b
#define pop(a, k, n, b) a=(--_sp)->sa, k=_sp->sk, n=_sp->sn, b=_sp->sb
#define stackempty() (_sp<=stack)
#define splittable(c) c > 0 && count[c] > IC
struct Stack {
	LPSTR* sa; LPBYTE sk;
	int sn, sb;
} stack[SS], *_sp=stack;
static void
insertion_sort(unsigned char** strings, int n, size_t depth)
{
	for (unsigned char** i = strings + 1; --n > 0; ++i) {
		unsigned char** j = i;
		unsigned char* tmp = *i;
		while (j > strings) {
			unsigned char* s = *(j-1)+depth;
			unsigned char* t = tmp+depth;
			while (*s == *t && *s) {
				++s;
				++t;
			}
			if (*s <= *t) break;
			*j = *(j-1);
			--j;
		}
		*j = tmp;
	}
}
static
void FillKeyBuffer(LPPSTR a, LPBYTE kb, UINT* count, UINT n, UINT d)
{
	for (size_t i=0; i<n; ++i) {
		const unsigned char* str = a[i];
		unsigned j=0;
		for (; j<BS; ++j) {
			unsigned char c = str[d+j];
			kb[BS*i+j] = c;
			if (c==0) break;
		}
		if (j<BS) kb[BS*i+j] = 0;
	}
	
	
	for (size_t i=0; i<n; ++i) ++count[kb[BS*i]];
}
static
void RDFK(LPPSTR* GrpKP, LPPSTR a, UINT n, LPPSTR ta,
		UINT* count, UINT d)
{ 
	LPPSTR ak, tc; UINT i, *cptr, gs; unsigned char c=0;
	for (i=0; i<n-n%32; i+=32) {
		unsigned char cache[32];
		for (unsigned j=0; j<32; ++j) cache[j] = a[i+j][d];
		for (unsigned j=0; j<32; ++j) ++count[cache[j]];
	}
	for (; i<n; i++) count[a[i][d]]++;
	cptr=&count[AL]; while (*cptr<1) cptr++;
	if (*cptr<n) gs=n;
	else { c=(cptr-&count[AL])+AL; gs=0; }
	if (!gs) {
		if (splittable(c)) push(a, 0, n, d+1);
		else if (n>1 && c>0) insertion_sort(a, n, d);
		count[c]=0; return;
	}
	GrpKP[AL]=a;
	for (ak=a, i=AL; i<AH; i++) GrpKP[i+1]=ak+=count[i];
	memcpy(ta, a, sizeof(LPSTR)*n);
	for (i=0, tc=ta; i<n; i++, tc++) {
		*GrpKP[ta[i][d]]=*tc; GrpKP[ta[i][d]]++;
	}
	for (ak=a, i=AL; i<AH; i++) {
		if (splittable(i)) push(ak, 0, count[i], d+1);
		else if (count[i]>1 && i>0) insertion_sort(ak, count[i], d);
		ak+=count[i]; count[i]=0;
	}
}
void cradix_rantala(LPPSTR a, UINT n)
{
	UINT kbsd, kbsd1, i, j, stage, d, MEMSIZE;
	UINT *cptr, gs, count[AS];
	LPSTR tj, tk, ax, tl, kb, ss, tt, GrpKB[AS];
	LPPSTR GrpKP[AS], ak, ta, tc, t;
	if (sizeof(LPPSTR)>sizeof(unsigned char)*BS)
		MEMSIZE=sizeof(LPPSTR);
	else
		MEMSIZE=sizeof(unsigned char)*BS;
	
	ta = (LPPSTR)malloc(n * MEMSIZE);
	
	tk = (LPBYTE)malloc(n * sizeof(unsigned char) * BS);
	tj=tk;
	push(a, tk, n, 0); for (i=AL; i<AH; i++) count[i]=0;
	while (!stackempty()) {
		pop(a, tk, n, stage);
		if (tk) {
			
			if ((d=stage%BS)!=0)
				for (i=0, tl=tk; i<n; i++, tl+=(BS-d))
					count[*tl]++;
			else {
				if (n>KBC)
					FillKeyBuffer(a, tk, count, n, stage);
				else {
					RDFK(GrpKP, a, n, ta, count, stage);
					continue;
				}
			}
			
			cptr=&count[AL];
			while (*cptr<1) cptr++;
			if (*cptr<n) gs=n; else gs=0;
			
			kbsd=BS-d, kbsd1=kbsd-1;
			GrpKP[AL]=a; GrpKB[AL]=tk;
			for (ak=a, ax=tk, i=AL; i<AH; i++) {
				GrpKP[i+1]=ak+=count[i];
				GrpKB[i+1]=ax+=count[i]*kbsd1;
			}
			
			memcpy(ta, a, sizeof(LPSTR)*gs);
			for (i=0, ax=tk, tc=ta; i<gs;
					i++, ax+=kbsd, tc++) {
				*GrpKP[*ax]=*tc; GrpKP[*ax]++;
			}
			
			memcpy(ta, tk, sizeof(unsigned char)*n*kbsd);
			for (i=0, kb=(LPBYTE)ta; i<n; i++, *t+=kbsd1) {
				t=&GrpKB[*kb]; ss=*t; tt=kb+1;
				for (j=0; j<kbsd1; j++)
				{ *ss=*tt; ss++; tt++; }
				kb+=kbsd;
			}
			
			for (ak=a, ax=tk, i=AL; i<AH; i++) {
				if (splittable(i))
				{ push(ak, ax, count[i], stage+1); }
				else if (count[i]>1 && i>0)
					insertion_sort(ak, count[i], stage);
				ak+=count[i]; ax+=count[i]*(kbsd1);
				count[i]=0;
			}
		}
		else RDFK(GrpKP, a, n, ta, count, stage);
	}
	free((void*)ta);
	free((void*)tj);
}
CONTESTANT_REGISTER(cradix_rantala,
                    "ng/rantala_cradix",
                    "CRadix by Waihong Ng and Katsuhiko Kakehi modified by Rantala")
#undef push
#undef pop
#undef stackempty
#undef splittable
}