/* # of solutions: 2, 368, 1010, 2339 */

#define	USE_X
#define P_ORDER
#define USE_ASM 0		// 0=no asm, 1=asm

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/timeb.h>

#define MAXX  21
#define MAXY   7
#define DX     0
#define DY     0
#define GUARD 255

typedef unsigned char byte;
typedef unsigned char field;
typedef int pDelta;

field board[MAXX*MAXY];

int sizex, sizey, step, mirror = 0, verbose;

field *x_base;						// Needed for solution regeneration!

#ifndef P_ORDER
int used[12];
#endif

pDelta *soltab[12];

// #define o(x, y) (x * step + y)

inline int o(int x, int y)
{
	return (x * step + y);
}

char basePieces[12*11] = {
	1,0, 0,1, 1,1, 2,1, 1,2, 0,  /* X*/
	0,0, 1,0, 2,0, 3,0, 4,0, 1,  /* I */
	0,0, 1,0, 2,0, 3,0, 0,1, 7,  /* L */
	0,0, 1,0, 2,0, 3,0, 1,1, 7,  /* Y */
	0,0, 1,0, 2,0, 0,1, 1,1, 7,  /* P */
	0,0, 1,0, 2,0, 0,1, 2,1, 3,  /* U */
	0,1, 1,1, 2,1, 0,2, 2,0, 5,  /* Z */
	0,1, 1,1, 2,1, 0,2, 1,0, 7,  /* F */
	0,0, 1,0, 2,0, 1,1, 1,2, 3,  /* T */
	0,0, 1,0, 2,0, 0,1, 0,2, 3,  /* V */
	0,0, 1,0, 2,0, 2,1, 3,1, 7,  /* N */
	0,0, 1,0, 1,1, 2,1, 2,2, 3}; /* W */

char pentoName[] = ".XILYPUZFTVNW-";

char *sols[3500];
int nrofsols = 0, solutions = 0, nrofmirrors = 0;

pDelta pieces[65 * 5 + 12];
pDelta *pPieces[12], *oPieces[12];

void initBoard(void)
{
	int x, y;

	memset(board, GUARD, sizeof(board));

	for (y = 0; y < sizey; y++) {
		for (x = 0; x < sizex; x++) {
			board[o(x,y)] = 0;
		}
	}
}

void dumpBoard(void)
{
	int x, y;

	for (y = 0; y < sizey; y++) {
		for (x = 0; x < sizex; x++) {
			printf("%c", pentoName[board[o(x,y)]]);
		}
		printf("\n");
	}
	printf("\n");
}

void dumpPieces(void)
{
	pDelta *perm;
	field *base;
	int p;

	for (p = 0; p < 12; p++) {
		for (perm = pPieces[p]; perm[0] == 0; perm += 5) {
			for (base = board; 
				base[0] ||
				base[perm[1]] ||
				base[perm[2]] ||
				base[perm[3]] ||
				base[perm[4]];
				base++) ;

			base[0] = 
			base[perm[1]] = 
			base[perm[2]] = 
			base[perm[3]] = 
			base[perm[4]] =  p+1;

			dumpBoard();

			base[0] = 
			base[perm[1]] = 
			base[perm[2]] = 
			base[perm[3]] = 
			base[perm[4]] = 0;
		}
	}
}

int findperm(pDelta *pd)
{
	pDelta *perm;
	int p;

	for (p = 0; p < 12; p++) {
		for (perm = oPieces[p]; perm[0] == 0; perm += 5) {
			if ((perm[4] == pd[4]) &&
				(perm[3] == pd[3]) &&
				(perm[2] == pd[2]) &&
				(perm[1] == pd[1])) return p+1;
		}
	}
	return 0;
}

int saveSol(void)
{
	char temp[61];
	int x, y, t, p, pnr;
	field *base;
	pDelta *perm;

	if (mirror || verbose) {
		
		initBoard();
		
		perm = soltab[11]; 
		base = x_base;
		
		base[0] = base[perm[1]] = base[perm[2]] =
			base[perm[3]] = base[perm[4]] = 'X';
		
		base = board;
		
		for (p = 10; p >= 0; p--) {
			char name;
			
			while ( *base ) base++;
			
			perm = soltab[p];
			pnr = findperm(perm);
			name = pentoName[pnr];
			
			base[0] = base[perm[1]] = base[perm[2]] =
				base[perm[3]] = base[perm[4]] = name;
		}
		
		for (y = t = 0; y < sizey; y++) {
			for (x = 0; x < sizex; x++) {
				temp[t++] = board[o(x,y)];
			}
		}
		temp[t] = '\0';
		
		if (mirror) {
			
			for (t = 0; t < nrofsols; t++) {
				char *curr = sols[t];
				
				// Check for Y-axis mirror:
				for (y = 0; y < sizey; y++) {
					for (x = 0; x < (sizex >> 1); x++) {
						if (curr[y*sizex + x] != temp[y*sizex + sizex - x - 1]) goto y_ok;
					}
				}
				nrofmirrors++;
				return t+1;
y_ok:
				// Check for X-axis mirror:
				for (y = 0; y < (sizey >> 1); y++) {
					for (x = 0; x < sizex; x++) {
						if (curr[y*sizex + x] != temp[(sizey-y-1)*sizex + x]) goto x_ok;
					}
				}
				nrofmirrors++;
				return t+1;
x_ok:
				continue;
			}
			
			sols[nrofsols] = (char *) malloc(61);
			memcpy(sols[nrofsols], temp, 61);
			nrofsols++;
		}
		if (verbose) {
			char line[11*6+1], *l = line, *t = temp;

			printf("Sol # %d\n",solutions++);
			
			for (y = 0; y < sizey; y++) {
				for (x = 0; x < sizex; x++) {
					*l++ = *t++;
				}
				*l++ = '\n';
			}
			*l = '\0';
			printf("%s\n",line);
		}
	}

	solutions++;
		
	return 0;
}

pDelta *newDst(pDelta *p)
{
	int minj, j, i;
	pDelta min;

	// Sort the offsets by size, with just 5 entries insertion sort is OK!
	for (i = 0; i < 4; i++) {
		min = p[i]; minj = i;
		for (j = i+1; j < 5; j++) {
			if (p[j] < min) {
				min = p[j];
				minj = j;
			}
		}
		p[minj] = p[i];
		p[i] = min;
	}

	// Start each permutation at 0,0:
	min = p[0];
	for (j = 0; j < 5; j++) {
		p[j] -= min;
	}
	return p+5;
}


pDelta *gen(char *src, pDelta *dst, int mode)
{
	int i, x, y;

	for (i = 0; i < 5; i++) {
		x = src[i*2]; y = src[i*2+1];
		dst[i] = o(x,y);
	}
	dst = newDst(dst);

	if (mode & 1) {
		for (i = 0; i < 5; i++) {
			y = src[i*2]; x = -src[i*2+1];
			dst[i] = o(x,y);
		}
		dst = newDst(dst);
	}

	if (mode & 2) {
		for (i = 0; i < 5; i++) {
			x = -src[i*2]; y = -src[i*2+1];
			dst[i] = o(x,y);
		}
		dst = newDst(dst);
		for (i = 0; i < 5; i++) {
			y = -src[i*2]; x = src[i*2+1];
			dst[i] = o(x,y);
		}
		dst = newDst(dst);
	}

	if (mode & 4) {
		for (i = 0; i < 5; i++) {
			x = -src[i*2]; y = src[i*2+1];
			dst[i] = o(x,y);
		}
		dst = newDst(dst);

		if (mode & 1) {
			for (i = 0; i < 5; i++) {
				y = src[i*2]; x = src[i*2+1];
				dst[i] = o(x,y);
			}
			dst = newDst(dst);
		}

		if (mode & 2) {
			for (i = 0; i < 5; i++) {
				x = src[i*2]; y = -src[i*2+1];
				dst[i] = o(x,y);
			}
			dst = newDst(dst);
			for (i = 0; i < 5; i++) {
				y = -src[i*2]; x = -src[i*2+1];
				dst[i] = o(x,y);
			}
			dst = newDst(dst);
		}

	}
	*dst++ = -1;
	return dst;
}


void initPieces(void)
{
	pDelta *p = pieces;
	char *i;
	int j;

	for (j = 0, i = basePieces; j < 12; j++, i += 11) {
		pPieces[j] = oPieces[j] = p;
		p = gen(i, p, i[10]);
	}

}

typedef unsigned long ul;

ul *bPieces[12], *obPieces[12];

ul bpieces[65+12];

ul bsoltab[12];

int bx_base;

int bfindperm(ul bm, pDelta **pd)
{
	ul *b, bt;
	pDelta *perm;
	int p;

	for (p = 0; p < 12; p++) {
		perm = oPieces[p];
		for (b = obPieces[p]; (bt = *b++) != 0; perm+=5 ) {
			if (bt == bm) {
				*pd = perm;
				return p+1;
			}
		}
	}
	return 0;
}

int bsaveSol(void)
{
	char temp[61];
	int x, y, t, p, pnr;
	field *base;
	pDelta *perm;
	ul bm;

	if (mirror || verbose) {
		
		initBoard();
		
		perm = pieces; 
		base = board + bx_base;
		
		base[0] = base[perm[1]] = base[perm[2]] =
			base[perm[3]] = base[perm[4]] = 'X';
		
		base = board;
		
		for (p = 10; p >= 0; p--) {
			char name;
			
			while ( *base ) base++;
			
			bm = bsoltab[p];
			pnr = bfindperm(bm, &perm);
			name = pentoName[pnr];
			
			base[0] = base[perm[1]] = base[perm[2]] =
				base[perm[3]] = base[perm[4]] = name;
		}
		
		for (y = t = 0; y < sizey; y++) {
			for (x = 0; x < sizex; x++) {
				temp[t++] = board[o(x,y)];
			}
		}
		temp[t] = '\0';
		
		if (mirror) {
			
			for (t = 0; t < nrofsols; t++) {
				char *curr = sols[t];
				
				// Check for Y-axis mirror:
				for (y = 0; y < sizey; y++) {
					for (x = 0; x < (sizex >> 1); x++) {
						if (curr[y*sizex + x] != temp[y*sizex + sizex - x - 1]) goto y_ok;
					}
				}
				return t+1;
y_ok:
				// Check for X-axis mirror:
				for (y = 0; y < (sizey >> 1); y++) {
					for (x = 0; x < sizex; x++) {
						if (curr[y*sizex + x] != temp[(sizey-y-1)*sizex + x]) goto x_ok;
					}
				}
				return t+1;
x_ok:
				continue;
			}
			
			sols[nrofsols] = (char *) malloc(61);
			memcpy(sols[nrofsols], temp, 61);
			nrofsols++;
		}
		if (verbose) {
			char line[11*6+1], *l = line, *t = temp;

			printf("Sol # %d\n",solutions+1);
			
			for (y = 0; y < sizey; y++) {
				for (x = 0; x < sizex; x++) {
					*l++ = *t++;
				}
				*l++ = '\n';
			}
			*l = '\0';
			printf("%s\n",line);
		}
	}
	solutions++;
	return 0;
}

#if !USE_ASM
void bTry(ul b0, ul b1, ul b2, int piecesleft)
{
	int p;
	ul *pbm, bm;

	while (((long) b0) < 0) {
		b0 += b0 + (b1 >> 31);
		b1 += b1 + (b2 >> 31);
		b2 += b2;
	}

	p = --piecesleft;
	do {
		pbm = bPieces[p];
		bPieces[p] = bPieces[piecesleft];
		bPieces[piecesleft] = pbm;

		bm = *pbm++;
		do {
			if ((bm & b0) == 0) {
				b0 |= bm;
				bsoltab[piecesleft] = bm;

				if (piecesleft == 0) goto haveSolution;

				bTry(b0, b1, b2, piecesleft);
				b0 ^= bm;
			}
			bm = *pbm++;
		} while (bm);

		pbm = bPieces[p];
		bPieces[p] = bPieces[piecesleft];
		bPieces[piecesleft] = pbm;

	} while (--p >= 0);

	return;

haveSolution:
	bsaveSol();
	return;
}
#endif

ul bb[4];

int getbit(ul *t, int o)
{
	return (t[o >> 5] >> (31 - (o & 31))) & 1;
}

void bdumpBoard(void)
{
	int x, y, bit;

	char buf[96], *bp = buf;
	static char bitName[2] = {'.','X'};

	printf("piecesleft = %2d\n",bb[3]);

	for (y = 0; y <= sizey; y++) {
		for (x = 0; x <= sizex; x++) {
			bit = getbit(bb, o(x,y));
			*bp++ = bitName[bit];
		}
		*bp++ = '\n';
	}
	*bp = '\0';

	printf("%s\n", buf);
}

#if USE_ASM
void asmTry(ul b0, ul b1, ul b2, int piecesleft)
{
	__asm {

		mov edi,[b0]
		mov ebx,[b1]
		mov esi,[b2]
		mov ecx,[piecesleft]

		call aTry
		 jmp done

aTry:
#ifdef ASMDEBUG
		call dump_board
#endif

		pushad

		lea edx,[ecx-1]
		dec ecx

		test edi,edi
		 jge bit_ok
next_bit:
		add esi,esi
		adc ebx,ebx
		adc edi,edi
		 js next_bit
bit_ok:

#ifdef ASMDEBUG
		call dump_board
#endif
try_p:
		mov ebp,bPieces[edx*4]
		mov eax,bPieces[ecx*4]
		mov bPieces[edx*4],eax
		mov bPieces[ecx*4],ebp

		mov eax,[ebp]
		add ebp,4

try_perm:

		test edi,eax
		 jnz next_perm

		or edi,eax
		mov bsoltab[ecx*4],eax

		test ecx,ecx
		 jz have_solution

		call aTry

		xor edi,eax

next_perm:
		mov eax,[ebp]
		add ebp,4

		test eax,eax
		 jnz try_perm

		mov ebp,bPieces[edx*4]
		mov eax,bPieces[ecx*4]

		mov bPieces[edx*4],eax
		dec edx

		mov bPieces[ecx*4],ebp
		 jns try_p

		popad
		ret

have_solution:

		call bsaveSol
		popad
		ret

#ifdef ASMDEBUG
dump_board:
		pushad
		mov bb[0],edi
		mov bb[4],ebx
		mov bb[8],esi
		mov bb[12],ecx
		call bdumpBoard
		popad
		ret
#endif

done:
	}

}
#endif

void setbit(ul *t, int o)
{
	t[o >> 5] |= 0x80000000 >> (o & 31);
}

void clrbit(ul *t, int o)
{
	t[o >> 5] ^= 0x80000000 >> (o & 31);
}

void bInitPieces(void)
{
	int p, i;
	ul *bperm = bpieces;
	pDelta *perm;

	for (p = 0; p < 12; p++) {
		bPieces[p] = obPieces[p] = bperm;

		for (perm = pPieces[p]; perm[0] == 0; perm+=5) {
			*bperm = 0;
			for (i = 0; i < 5; i++) {
				setbit(bperm, perm[i]);
			}
			bperm++;
		}
		*bperm++ = 0;
	}
}

void bSolve(void)
{
	ul *t, bm;
	ul bt[3];
	int x, y;

	bInitPieces();

	t = bPieces[0]; bPieces[0] = bPieces[11]; bPieces[11] = t;

	bt[0] = bt[1] = bt[2] = 0;

	for (x = 0; x < sizex; x++) setbit(bt, o(x, sizey));
	for (y = 0; y <= sizey; y++) setbit(bt, o(sizex, y));

	bm = *t;
	bsoltab[11] = bm;

	int maxx = (sizex - 1) >> 1;
	int maxy = (sizey - 1) >> 1;

	for (y = maxy; y > 0; y--) {
		for (x = maxx; x > 0; x--) {
			if ((x == 1) && (y == 1)) return;

			setbit(bt, o(x, y));
			setbit(bt, o(x-1, y));
			setbit(bt, o(x+1, y));
			setbit(bt, o(x, y-1));
			setbit(bt, o(x, y+1));

			bx_base = o(x-1, y);		// Remember where we placed the 'X' piece!

			// Do we need to check for mirror images?
			mirror = (((x == maxx) && (sizex & 1)) ||
					 ((y == maxy) && (sizey & 1)));

#if USE_ASM
			asmTry(bt[0], bt[1], bt[2], 11);
#else
			bTry(bt[0], bt[1], bt[2], 11);
#endif

			clrbit(bt, o(x, y));
			clrbit(bt, o(x-1, y));
			clrbit(bt, o(x+1, y));
			clrbit(bt, o(x, y-1));
			clrbit(bt, o(x, y+1));
		}
	}
}

void Try(field *base, int piecesleft)
{
	int p;
	pDelta *first, *perm, *pl;

	while ( *base ) base++;

	piecesleft--;

	pl = pPieces[piecesleft];
	for (p = piecesleft; p >= 0; p--) {

		// Swap the current piece with the one at the end of the list:
		first = perm = pPieces[p];
		pPieces[p] = pl;

		do {

			if (base[perm[1]] ||
				base[perm[2]] ||
				base[perm[3]] ||
				base[perm[4]]) goto skip_permutation;

			base[0] = base[perm[1]] = base[perm[2]] =
			base[perm[3]] =	base[perm[4]] = 1;

			soltab[piecesleft] = perm;

			if (piecesleft == 0) {
				saveSol();

				base[0] = base[perm[1]] = base[perm[2]] =
				base[perm[3]] =	base[perm[4]] = 0;

				pPieces[p] = first;
				pPieces[piecesleft] = pl;

				return;
			}

			Try(base+1,piecesleft);

			base[0] = base[perm[1]] = base[perm[2]] =
			base[perm[3]] =	base[perm[4]] = 0;

skip_permutation:
			perm += 5;
		} while (perm[0] == 0);

		pPieces[p] = first;
	}
	pPieces[piecesleft] = pl;
}

void Solve(void)
{
	pDelta *t;
	field *b;

	t = pPieces[0]; pPieces[0] = pPieces[11]; pPieces[11] = t;

	soltab[11] = pieces;

	int maxx = (sizex - 3) >> 1;
	int maxy = (sizey - 1) >> 1;

	for (int y = maxy; y > 0; y--) {
		for (int x = maxx; x >= 0; x--) {
			if ((x == 0) && (y == 1)) return;

			b = board + o(x, y);

			x_base = b;			// Remember where we placed the 'X' piece!

			b[0] = b[pieces[1]] = b[pieces[2]] =
			b[pieces[3]] = b[pieces[4]] = 1;

			// Do we need to check for mirror images?
			mirror = (((x == maxx) && (sizex & 1)) ||
					 ((y == maxy) && (sizey & 1)));

			Try(board,11);

			b[0] = b[pieces[1]] = b[pieces[2]] =
			b[pieces[3]] = b[pieces[4]] = 0;
		}
	}
}

int syntax(void)
{
	fprintf(stderr,
		"Pento V1.0 (c) Terje Mathisen 1997\n\n"\
		"Usage: pento [-option] [3|4|5|6]\n\n"\
		"Options:\n"\
		"	-v	: Verbose (print all solutions to stdout)\n"\
		"	-s	: Slow: Use byte-based code instead of bitmaps\n\n"\
		"The number is the smaller axis of the pentomino board (default = 6)\n");
	return 1;
}

int main(int argc, char *argv[])
{
	int sy = 6, a, slow = 0;
	char *arg;

	for (a = 1; a < argc; a++) {
		arg = argv[a];
		if ((arg[0] == '-') || (arg[0] == '/')) {
			if (toupper(arg[1]) == 'V') verbose = 1;
			else if (toupper(arg[1]) == 'S') slow = 1;
			else return syntax();
		}
		else {
			sy = atoi(argv[1]);
			if (sy < 3 || sy > 6) return syntax();
		}
	}

	sizex = 60 / sy; sizey = sy; step = sizey+1;

	struct _timeb tb;
	long timediff, millidiff;


	// Get an accurate timestamp before starting
	_ftime(&tb); timediff = tb.time; millidiff = tb.millitm;

	initBoard();
	initPieces();


	if (slow) {
		printf("Using slow algorithm\n");
		Solve();		// Solve using byte arrays
	}
	else {
		bSolve();
	}

	// Calculate elapsed time:
	_ftime(&tb); timediff = tb.time - timediff; millidiff = tb.millitm - millidiff;

	if (millidiff < 0) { 
		millidiff += 1000; 
		timediff--; 
	}

    if (verbose) {
		printf("%dX%d board: %d solutions found in %d.%03ds\n", 
			sizex, sizey, solutions, timediff, millidiff);
	}
	fprintf(stderr,"%dX%d board: %d solutions found in %d.%03ds\n", 
		sizex, sizey, solutions, timediff, millidiff);

	return 0;
}

