#include "stdafx.h"
#include "ttd.h"
/*
 * #include "strings.h" <-- This will be uncommented when we actually implement
 *                          Unicode and drop the first four functions there.
 */

/*
 * Note: All functions here allocate new memory, and do not free old memory.
 *       In addition, memsize is assumed to be the length of the string not
 *       including any terminating null.
 *
 * Documentation on the OpenTTD character set (for which the first two functions
 * map from/to Unicode respectively) can be found in the file
 * openttd-unicode.txt, which, besides being found in docs/ can also be had at
 * http://www.pipian.com/stuffforchat/openttd-unicode.txt
 *
 * Docs on UTF-8 can be had in many places, including:
 *   http://www.cl.cam.ac.uk/~mgk25/unicode.html
 */

/*
 * Converts 'memsize' chars of Unicode internal representation in 'wide' to the
 * OpenTTD Character Set. Free the returned pointer when you're done.
 *
 * Note: Does not yet account for previous location of Euro in 0x80, or command
 *       characters 0x00-0x20 and 0x80-0xA0. This will be fixed later, with a
 *       bool switch to disable conversion of these characters.
 */
char *UnicharToOpenTTD(const unichar *wide, uint32 memsize)
{
	uint32 i;
	char *iso = calloc((memsize + 1), 1);

	for(i = 0; i < memsize; i++) { // Go through wide, mapping characters.
		if (wide[i] > 0xFF) {
			switch(wide[i]) { // First map non-ISO-8859-1 characters
			                  // in the OpenTTD charset to it.
				case 0x0178:
					iso[i] = 0x9F;
					break;
				case 0x20AC:
					iso[i] = 0xA4;
					break;
				case 0x25B2:
					iso[i] = 0xA0;
					break;
				case 0x25B4:
					iso[i] = 0xBC;
					break;
				case 0x25B6:
					iso[i] = 0xAF;
					break;
				case 0x25BC:
					iso[i] = 0xAA;
					break;
				case 0x25BE:
					iso[i] = 0xBD;
					break;
				case 0x2713:
					iso[i] = 0xAC;
					break;
				case 0x2717:
					iso[i] = 0xAD;
					break;
				case 0xF000:
					iso[i] = 0xB4;
					break;
				case 0xF001:
					iso[i] = 0xB5;
					break;
				case 0xF002:
					iso[i] = 0xB6;
					break;
				case 0xF003:
					iso[i] = 0xB7;
					break;
				case 0xF004:
					iso[i] = 0xB8;
					break;
				case 0xF005:
					iso[i] = 0xB9;
					break;
				default: // Map non-OpenTTD characters as '?'
					iso[i] = '?';
			}
		}
		else {
			switch(wide[i]) {
				case 0x7B: // Map non-OpenTTD characters in
				case 0x7C: // Unicode to '?'
				case 0x7D:
				case 0x7E:
				case 0x7F:
				case 0x9F:
				case 0xA0:
				case 0xA4:
				case 0xAA:
				case 0xAC:
				case 0xAD:
				case 0xAF:
				case 0xB4:
				case 0xB5:
				case 0xB6:
				case 0xB7:
				case 0xB8:
				case 0xB9:
				case 0xBC:
				case 0xBD:
					iso[i] = '?';
					break;
				default: // Otherwise, it's a 1-1 copy of 8859-1
					iso[i] = wide[i];
			}
		}
	}

	return iso;
}

/*
 * Converts 'memsize' OpenTTD Character Set chars in 'iso' to Unicode internal
 * representation. Remember to free the returned pointer when you're done.
 *
 * Note: Does not yet account for previous location of Euro in 0x80, or command
 *       characters 0x00-0x20 and 0x80-0xA0. This will be fixed later, with a
 *       bool switch to disable conversion of these characters.
 */
unichar *OpenTTDToUnichar(const byte *iso, uint32 memsize)
{
	uint32 i;
	unichar *wide = calloc(sizeof(unichar) * (memsize + 1), 1);

	for(i = 0; i < memsize; i++) { // Go through iso mapping characters.
		switch (iso[i]) {
			case 0x9F: // LATIN CAPITAL LETTER Y WITH DIAERESIS
				wide[i] = 0x0178;
				break;
			case 0xA0: // BLACK UP-POINTING TRIANGLE
				wide[i] = 0x25B2;
				break;
			case 0xA4: // EURO SIGN
				wide[i] = 0x20AC;
				break;
			case 0xAA: // BLACK DOWN-POINTING TRIANGLE
				wide[i] = 0x25BC;
				break;
			case 0xAC: // CHECK MARK
				wide[i] = 0x2713;
				break;
			case 0xAD: // BALLOT X
				wide[i] = 0x2717;
				break;
			case 0xAF: // BLACK RIGHT-POINTING TRIANGLE
				wide[i] = 0x25B6;
				break;
			case 0xB4: // TRAIN
				wide[i] = 0xF000;
				break;
			case 0xB5: // LORRY
				wide[i] = 0xF001;
				break;
			case 0xB6: // BUS
				wide[i] = 0xF002;
				break;
			case 0xB7: // PLANE
				wide[i] = 0xF003;
				break;
			case 0xB8: // SHIP
				wide[i] = 0xF004;
				break;
			case 0xB9: // SUPERSCRIPT NEGATIVE ONE
				wide[i] = 0xF005;
				break;
			case 0xBC: // BLACK UP-POINTING SMALL TRIANGLE
				wide[i] = 0x25B4;
				break;
			case 0xBD: // BLACK DOWN-POINTING SMALL TRIANGLE
				wide[i] = 0x25BE;
				break;
			default:
				wide[i] = iso[i];
		}
	}

	return wide;
}

/*
 * Converts 'memsize' Unicode internal representation chars in 'wide' to UTF-8.
 * Remember to free returned pointer when done.
 *
 * Note: Does not yet correctly handle surrogate pairs for character planes >= 2
 *       This will be fixed upon full Unicode integration. Information on this
 *       is somewhat vague. See RFC 3629.
 */
byte *UnicharToUTF8(const unichar *wide, uint32 memsize)
{
	uint32 i;
	byte *mem, *ptr;
	
	mem = calloc(memsize * 6 + 1, 1);
	ptr = mem;
	
	// 
	for(i = 0; i < memsize; i++)
	{
		if(wide[i] < 0x80)
		{
			*ptr++ = wide[i] & 0x7F;
		}
		else if(wide[i] < 0x800)
		{
			*ptr++ = 0xC0 | ((wide[i] >> 6) & 0x1F);
			*ptr++ = 0x80 | (wide[i] & 0x3F);
		}
		else if(wide[i] < 0x10000)
		{
			*ptr++ = 0xE0 | ((wide[i] >> 12) & 0x0F);
			*ptr++ = 0x80 | ((wide[i] >> 6) & 0x3F);
			*ptr++ = 0x80 | (wide[i] & 0x3F);
		}
		else if(wide[i] < 0x200000)
		{
			*ptr++ = 0xF0 | ((wide[i] >> 18) & 0x07);
			*ptr++ = 0x80 | ((wide[i] >> 12) & 0x3F);
			*ptr++ = 0x80 | ((wide[i] >> 6) & 0x3F);
			*ptr++ = 0x80 | (wide[i] & 0x3F);
		}
		else if(wide[i] < 0x4000000)
		{
			*ptr++ = 0xF8 | ((wide[i] >> 24) & 0x03);
			*ptr++ = 0x80 | ((wide[i] >> 18) & 0x3F);
			*ptr++ = 0x80 | ((wide[i] >> 12) & 0x3F);
			*ptr++ = 0x80 | ((wide[i] >> 6) & 0x3F);
			*ptr++ = 0x80 | (wide[i] & 0x3F);
		}
		else if(wide[i] < 0x80000000)
		{
			*ptr++ = 0xFC | ((wide[i] >> 30) & 0x01);
			*ptr++ = 0x80 | ((wide[i] >> 24) & 0x3F);
			*ptr++ = 0x80 | ((wide[i] >> 18) & 0x3F);
			*ptr++ = 0x80 | ((wide[i] >> 12) & 0x3F);
			*ptr++ = 0x80 | ((wide[i] >> 6) & 0x3F);
			*ptr++ = 0x80 | (wide[i] & 0x3F);
		}
	}
	
	mem = realloc(mem, ptr - mem + 1);
	
	return mem;
}

/*
 * Converts 'memsize' bytes of UTF-8 in 'utf' to Unicode internal representation
 * Remember to free the returned pointer when done.
 *
 * Note: Does not yet correctly handle surrogate pairs for character planes >= 2
 *       This will be fixed upon full Unicode integration. Information on this
 *       is somewhat vague. See RFC 3629.
 */
unichar *UTF8ToUnichar(const byte *utf, uint32 memsize)
{
	int i, j = 0;
	unichar *mem;

	mem = calloc(sizeof(unichar) * (memsize + 1), 1);

	for(i = 0; i < memsize;)
	{
		if(utf[i] < 0x80)
			mem[j++] = utf[i++];
		else if(utf[i] < 0xE0)
		{
			mem[j++] = ((utf[i] & 0x1F) << 6) |
				(utf[i + 1] & 0x3F);
			i += 2;
		}
		else if(utf[i] < 0xF0)
		{
			mem[j++] = ((utf[i] & 0x0F) << 12) |
				((utf[i + 1] & 0x3F) << 6) |
				(utf[i + 2] & 0x3F);
			i += 3;
		}
		else if(utf[i] < 0xF8)
		{
			mem[j++] = ((utf[i] & 0x07) << 18) |
				((utf[i + 1] & 0x3F) << 12) |
				((utf[i + 2] & 0x3F) << 6) |
				(utf[i + 2] & 0x3F);
			i += 4;
		}
		else if(utf[i] < 0xFC)
		{
			mem[j++] = ((utf[i] & 0x03) << 24) |
				((utf[i + 1] & 0x3F) << 18) |
				((utf[i + 2] & 0x3F) << 12) |
				((utf[i + 3] & 0x3F) << 6) |
				(utf[i + 4] & 0x3F);
			i += 5;
		}
		else if(utf[i] >= 0xFC)
		{
			mem[j++] = ((utf[i] & 0x01) << 30) |
				((utf[i + 1] & 0x3F) << 24) |
				((utf[i + 2] & 0x3F) << 18) |
				((utf[i + 3] & 0x3F) << 12) |
				((utf[i + 4] & 0x3F) << 6) |
				(utf[i + 5] & 0x3F);
			i += 6;
		}
	}

	mem = realloc(mem, sizeof(unichar) * (j + 1));

	return mem;
}

/*
 * Wraps conversion of 'memsize' characters of OpenTTD encoding 'iso' to
 * Unichar, and from Unichar to UTF-8. Remember to free the returned pointer
 * when done.
 */
byte *OpenTTDToUTF8(const char *iso, uint32 memsize)
{
	byte *utf;
	unichar *wide;

	wide = OpenTTDToUnichar(iso, memsize);
	utf = UnicharToUTF8(wide, memsize);
	free(wide);

	return utf;
}

/*
 * Wraps conversion of 'memsize' bytes of UTF-8 in 'utf' to Unichar, and Unichar
 * to OpenTTD encoding. Remember to free the returned pointer when done.
 */
char *UTF8ToOpenTTD(const byte *utf, uint32 memsize)
{
	char *iso;
	unichar *wide;

	wide = UTF8ToUnichar(utf, memsize);
	iso = UnicharToOpenTTD(wide, memsize);
	free(wide);

	return iso;
}

