#include <stdio.h>
#include <string.h>

#ifndef DEBUG
	#define DEBUG 0
#endif

enum {
	BUF_SIZE = 0x1FFF,
	BUF_BYTES = 2,
};

static void print_char(unsigned char c) {
	if((' '<=c) && (0x7F>c))
		putchar(c);
	else
		printf("%02x",c);
}

int main(int argc,char** args) {
	if(4!=argc) {
		fprintf(stderr,"usage: bpe [c|d] src dest\n");
		return -1;
	}
	FILE* srcf = fopen(args[2],"rb");
	FILE* destf = fopen(args[3],"wb");
	unsigned char buf[BUF_SIZE], tmp[BUF_SIZE];
	// what to do?
	if('c'==*args[1]) {
		// compress; lets be greedy
		// but if this was competition, we could just do a proper search and doubtless trim a few bytes off :)
		int digraphs[256*256];
		char used[256];
		int buf_num = 0;
		size_t read = 0, written = 0;
		while(true) {
			size_t buf_len = fread(buf,1,BUF_SIZE,srcf);
			if(!buf_len)
				break;
			read += buf_len;
			buf_num++;
			size_t cycle;
			for(cycle=0; cycle<256; cycle++) {
				memset(digraphs,0,sizeof(digraphs));
				memset(used,0,sizeof(used));
				used[buf[0]] = 1;
				for(int i=1; i<buf_len; i++) {
					digraphs[buf[i-1]*256+buf[i]]++;
					used[buf[i]] = 1;
				}
				int best_count = 0, best = 0;
				for(int i=0; i<256*256; i++)
					if(digraphs[i] > best_count) {
						best_count = digraphs[i];
						best = i;
					}
				if(best_count < 4) // nothing to improve
					break;
				const unsigned char one = best/256, two = best%256;
				char replace, replace_found = 0;
				for(int i=0; i<256; i++)
					if(!used[i]) {
						replace = i;
						replace_found = true;
						break;
					}
				if(!replace_found)
					break;
#if DEBUG>=2
				printf("%3d %2zu [",buf_num,cycle);
				print_char(one);
				putchar(':');
				print_char(two);
				printf("] %d = ",digraphs[best]);
				print_char(replace);
#endif
				size_t out = 0;
				tmp[out++] = one;
				tmp[out++] = two;
				tmp[out++] = replace;
				size_t i;
				for(int i=0; i<buf_len; )
					if((i < buf_len-1) && (buf[i] == one) && (buf[i+1] == two)) {
						tmp[out++] = replace;
						i += 2;
					} else
						tmp[out++] = buf[i++];
#if DEBUG>=2
				printf(" = %zu saved, %zu left\n",buf_len-out,out);
#endif
				buf_len = out;
				memcpy(buf,tmp,buf_len);
			}
			for(int i=0; i<BUF_BYTES; i++) {
				fputc((buf_len>>((BUF_BYTES-i-1)*8))&0xff,destf);
				written++;
			}
			fputc(cycle,destf);
			written++;
			fwrite(buf,1,buf_len,destf);
			written += buf_len;
#if DEBUG>=1
			printf("%d = %zu, %zu\n",buf_num,buf_len,cycle);
#endif
		}
		printf("compressed %zu to %zu in %d blocks (%0.0f%%)\n",read,written,buf_num,((double)written/(double)read)*100.0);
	} else {
		// decompress
		size_t read = 0, written = 0;
		int buf_num;
		bool eof = false;
		for(; !eof; buf_num++) {
			size_t buf_len = 0;
			for(int i=0; i<BUF_BYTES; i++) {
				buf_len <<= 8;
				const int r = fgetc(srcf);
				if(EOF == r) {
					eof = true;
					break;
				}
				buf_len |= r;
				read++;
			}
			if(eof)
				break;
			if(buf_len > BUF_SIZE) {
				fprintf(stderr,"illegal buf_len %zu\n",buf_len);
				return 1;
			}
			const size_t cycles = fgetc(srcf);
			read ++;
#if DEBUG>=1
			printf("%d = %zu, %zu\n",buf_num,buf_len,cycles);
#endif
			read += fread(buf,1,buf_len,srcf);
			for(size_t cycle=0; cycle<cycles; cycle++) {
				size_t in = 0;
				const unsigned char one = buf[in++], two = buf[in++], replace = buf[in++];
#if DEBUG>=2
				printf("%3d %2zu [",buf_num,cycle);
				print_char(one);
				putchar(':');
				print_char(two);
				printf("] = ");
				print_char(replace);
#endif
				size_t out = 0;
				for(; in < buf_len; in++)
					if(buf[in]==replace) {
						tmp[out++] = one;
						tmp[out++] = two;
					} else
						tmp[out++] = buf[in];
#if DEBUG>=2
				printf(" = %zu saved, %zu left\n",out-buf_len,out);
#endif
				buf_len = out;
				memcpy(buf,tmp,buf_len);
			}
			written += fwrite(buf,1,buf_len,destf);
		}
		printf("decompressed %zu to %zu in %d blocks (%0.0f%%)\n",read,written,buf_num,((double)read/(double)written)*100.0);		
	}
	return 0;
}

