/*
 *	LanguageMapping.c
 *
 *	Tables and functions for converting between Windows LANGIDs and Macintosh locale strings
 *
 *	Copyright (c) 2000-2002 Marco Piovanelli
 *	All Rights Reserved
 *
 */

/*	this file contains a list of ISO-639 language codes, sorted by Windows primary language ID */
/*	see:		<http://lcweb.loc.gov/standards/iso639-2/> */
/*	see also:	<http://www.unicode.org/unicode/onlinedat/languages.html> */
/*	and:		<http://www.unicode.org/unicode/onlinedat/countries.html> */
/*	and:		<http://www.sil.org/silewp/2000/001/SILEWP2000-001.html> */

#include <string.h>
#include <stdint.h>

char *		MapWindowsLangIDToMacLocaleString ( uint16_t inWindowsLangID, char * outMacLocaleString ) ;
uint16_t	MapMacLocaleStringToWindowsLangID ( const char * inLocaleString ) ;

typedef struct LanguageTableEntry
{
	const char *					mLanguageTag ;
	const char * const *			mSublangList ;
} LanguageTableEntry ;

static const char * const		kSublangArabic [ ] =
{
	"_SA",		/*	Saudi Arabia */
	"_IQ",		/*	Iraq */
	"_EG",		/*	Egypt */
	"_LY",		/*	Libya */
	"_DZ",		/*	Algeria */
	"_MA",		/*	Morocco */
	"_TN",		/*	Tunisia */
	"_OM",		/*	Oman */
	"_YE",		/*	Yemen */
	"_SY",		/*	Syria */
	"_JO",		/*	Jordan */
	"_LB",		/*	Lebanon */
	"_KW",		/*	Kuwait */
	"_AE",		/*	United Arab Emirates */
	"_BH",		/*	Bahrain */
	"_QA",		/*	Qatar */
	0			/*	(end of table) */
} ;

static const char * const		kSublangChinese [ ] =
{
	"_TW",		/*	Taiwan (Traditional Chinese script) */
	"_CN",		/*	China (Simplified Chinese script) */
	"_HK",		/*	Hong Kong */
	"_SG",		/*	Singapore */
	"_MO",		/*	Macau */
	0			/*	(end of table) */
} ;

static const char * const		kSublangGerman [ ] =
{
	"_DE",		/*	Germany */
	"_CH",		/*	Switzerland */
	"_AT",		/*	Austria */
	"_LU",		/*	Luxembourg */
	"_LI",		/*	Liechtenstein */
	0			/*	(end of table) */
} ;

static const char * const		kSublangEnglish [ ] =
{
	"_US",		/*	USA */
	"_UK",		/*	UK */
	"_AU",		/*	Australia */
	"_CA",		/*	Canada */
	"_NZ",		/*	New Zealand */
	"_IE",		/*	Ireland */
	"_ZA",		/*	South Africa */
	"_JM",		/*	Jamaica */
	"",			/*	Caribbean */
	"_BZ",		/*	Belize */
	"_TT",		/*	Trinidad and Tobago */
	"_ZW",		/*	Zimbabwe */
	"_PH",		/*	Philippines */
	"_ID",		/*	Indonesia */
	"_HK",		/*	Hong Kong */
	"_IN",		/*	India */
	"_MY",		/*	Malaysia */
	"_SG",		/*	Singapore */
	0			/*	(end of table) */
} ;

static const char * const		kSublangSpanish [ ] =
{
	"_ES",		/*	Spain (traditional sort) */
	"_MX",		/*	Mexican */
	"_ES",		/*	Spain (international sort) */
	"_GT",		/*	Guatemala */
	"_CR",		/*	Costa Rica */
	"_PA",		/*	Panama */
	"_DO",		/*	Dominican Republic */
	"_VE",		/*	Venezuela */
	"_CO",		/*	Colombia */
	"_PE",		/*	Peru */
	"_AR",		/*	Argentina */
	"_EC",		/*	Ecuador */
	"_CL",		/*	Chile */
	"_UY",		/*	Uruguay */
	"_PY",		/*	Paraguay */
	"_BO",		/*	Bolivia */
	"_SV",		/*	El Salvador */
	"_HN",		/*	Honduras */
	"_NI",		/*	Nicaragua */
	"_PR",		/*	Puerto Rico */
	0			/*	(end of table) */
} ;

static const char * const		kSublangFrench [ ] =
{
	"_FR",		/*	France */
	"_BE",		/*	Belgium */
	"_CA",		/*	Canda */
	"_CH",		/*	Switzerland */
	"_LU",		/*	Luxembourg */
	"_MC",		/*	Monaco */
	"",			/*	West Indies */
	"_RE",		/*	Reunion */
	"_CG",		/*	Congo */
	"_SN",		/*	Senegal */
	"_CM",		/*	Cameroon */
	"_CI",		/*	C™te d'Ivoire */
	"_ML",		/*	Mali */
	"_MA",		/*	Morocco */
	"_HT",		/*	Haiti */
	0			/*	(end of table) */
} ;

static const char * const		kSublangItalian [ ] =
{
	"_IT",		/*	Italy */
	"_CH",		/*	Switzerland */
	0			/*	(end of table) */
} ;

static const char * const		kSublangDutch [ ] =
{
	"_NL",		/*	Netherlands */
	"_BE",		/*	Belgium */
	0			/*	(end of table) */
} ;

static const char * const		kSublangNorwegian [ ] =
{
	"-bok_NO",	/*	BokmŒl */
	"-nyn_NO",	/*	Nynorsk */
	0			/*	(end of table) */
} ;

static const char * const		kSublangPortuguese [ ] =
{
	"_BR",		/*	Brazil */
	"_PT",		/*	Portugal */
	0			/*	(end of table) */
} ;

static const char * const		kSublangRomanian [ ] =
{
	"_RO",		/*	Romania */
	"_MD",		/*	Moldova */
	0			/*	(end of table) */
} ;

static const char * const		kSublangSerboCroatian [ ] =
{
	"_HR",		/*	Croatia */
	".La_YU",	/*	Yugoslavia (Latin script) */
	".Cy_YU",	/*	Yugoslavia (Cyrillic script) */
	0			/*	(end of table) */
} ;

static const char * const		kSublangSwedish [ ] =
{
	"_SE",		/*	Sweden */
	"_FI",		/*	Finland */
	0			/*	(end of table) */
} ;

static const char * const		kSublangAzerbaijani [ ] =
{
	".La",		/*	Latin script */
	".Cy",		/*	Cyrillic script */
	0			/*	(end of table) */
} ;

static const char * const		kSublangMalay [ ] =
{
	"_MY",		/*	Malaysia */
	"_BN",		/*	Brunei Darussalam */
	0
} ;

static const char * const		kSublangUzbek [ ] =
{
	".La",		/*	Latin script */
	".Cy",		/*	Cyrillic script */
	0			/*	(end of table) */
} ;

static const char * const		kSublangBengali [ ] =
{
	"_IN",		/*	India */
	"_BD",		/*	Bangladesh */
	0			/*	(end of table) */
} ;

static const char * const		kSublangPunjabi [ ] =
{
	"_IN",		/*	India (Gurmukhi script) */
	".Ar_PK",	/*	Pakistan (Arabic script) */
	0			/*	(end of table) */
} ;

static const char * const		kSublangMongolian [ ] =
{
	".Cy",		/*	Cyrillic script */
	".Mn",		/*	Mongolian script */
	0			/*	(end of table) */
} ;

static const char * const		kSublangSindhi [ ] =
{
	"_IN",		/*	India */
	"_PK",		/*	Pakistan */
	0			/*	(end of table) */
} ;

static const char * const		kSublangTamazight [ ] =
{
	".Ar",		/*	Arabic script */
	".La",		/*	Latin script */
	0			/*	(end of table) */
} ;

static const char * const		kSublangNepali [ ] =
{
	"_NP",		/*	Nepal */
	"_IN",		/*	India */
	0			/*	(end of table) */
} ;

static const char * const		kSublangTigrinya [ ] =
{
	"_ET",		/*	Ethiopia */
	"_ER",		/*	Eritrea */
	0			/*	(end of table) */
} ;

const LanguageTableEntry		kLanguageTable [ ] =
{
/* 1 */		{ "ar",		kSublangArabic },		/*	Arabic */
/* 2 */		{ "bg",		0 },					/*	Bulgarian */
/* 3 */		{ "ca",		0 },					/*	Catalan */
/* 4 */		{ "zh",		kSublangChinese },		/*	Chinese */
/* 5 */		{ "cs",		0 },					/*	Czech */
/* 6 */		{ "da",		0 },					/*	Danish */
/* 7 */		{ "de",		kSublangGerman },		/*	German */
/* 8 */		{ "el",		0 },					/*	Greek */
/* 9 */		{ "en",		kSublangEnglish },		/*	English */
/* 10 */	{ "es",		kSublangSpanish },		/*	Spanish */
/* 11 */	{ "fi",		0 },					/*	Finnish */
/* 12 */	{ "fr",		kSublangFrench },		/*	French */
/* 13 */	{ "he",		0 },					/*	Hebrew (ISO code used to be "iw") */
/* 14 */	{ "hu",		0 },					/*	Hungarian */
/* 15 */	{ "is",		0 },					/*	Icelandic */
/* 16 */	{ "it",		kSublangItalian },		/*	Italian */
/* 17 */	{ "ja",		0 },					/*	Japanese */
/* 18 */	{ "ko",		0 },					/*	Korean */
/* 19 */	{ "nl",		kSublangDutch },		/*	Dutch */
/* 20 */	{ "no",		kSublangNorwegian },	/*	Norwegian */
/* 21 */	{ "pl",		0 },					/*	Polish */
/* 22 */	{ "pt",		kSublangPortuguese },	/*	Portuguese */
/* 23 */	{ "rm",		0 },					/*	Rhaeto-Romance */
/* 24 */	{ "ro",		kSublangRomanian },		/*	Romanian */
/* 25 */	{ "ru",		0 },					/*	Russian */
/* 26 */	{ "sh",		kSublangSerboCroatian },/*	Serbo-Croatian ("hr" = Croatian; "sr" = Serbian) */
/* 27 */	{ "sk",		0 },					/*	Slovak */
/* 28 */	{ "sq",		0 },					/*	Albanian */
/* 29 */	{ "sv",		kSublangSwedish },		/*	Swedish */
/* 30 */	{ "th",		0 },					/*	Thai */
/* 31 */	{ "tr",		0 },					/*	Turkish */
/* 32 */	{ "ur",		0 },					/*	Urdu */
/* 33 */	{ "id",		0 },					/*	Indonesian (ISO code used to be "in") */
/* 34 */	{ "uk",		0 },					/*	Ukrainian */
/* 35 */	{ "be",		0 },					/*	Byelorussian */
/* 36 */	{ "sl",		0 },					/*	Slovenian */
/* 37 */	{ "et",		0 },					/*	Estonian */
/* 38 */	{ "lv",		0 },					/*	Latvian */
/* 39 */	{ "lt",		0 },					/*	Lithuanian */
/* 40 */	{ "tg",		0 },					/*	Tajiki */
/* 41 */	{ "fa",		0 },					/*	Farsi (Iranian) */
/* 42 */	{ "vi",		0 },					/*	Vietnamese */
/* 43 */	{ "hy",		0 },					/*	Armenian */
/* 44 */	{ "az",		kSublangAzerbaijani },	/*	Azerbaijani */
/* 45 */	{ "eu",		0 },					/*	Basque */
/* 46 */	{ "wen",	0 },					/*	Sorbian languages */
/* 47 */	{ "mk",		0 },					/*	Macedonian (Former Yugoslav Republic Of Macedonia) */
/* 48 */	{ "st",		0 },					/*	Sutu (sic!) -- this is probably Southern Sotho, aka Sesotho */
/* 49 */	{ "ts",		0 },					/*	Tsonga */
/* 50 */	{ "tn",		0 },					/*	Tswana */
/* 51 */	{ "ven",	0 },					/*	Venda */
/* 52 */	{ "xh",		0 },					/*	Xhosa */
/* 53 */	{ "zu",		0 },					/*	Zulu */
/* 54 */	{ "af",		0 },					/*	Afrikaans */
/* 55 */	{ "ka",		0 },					/*	Georgian */
/* 56 */	{ "fo",		0 },					/*	Faeroese */
/* 57 */	{ "hi",		0 },					/*	Hindi */
/* 58 */	{ "mt",		0 },					/*	Maltese */
/* 59 */	{ "se",		0 },					/*	Sami */
/* 60 */	{ "gv",		0 },					/*	Gaelic */
/* 61 */	{ "yi",		0 },					/*	Yiddish (ISO code used to be "ji") */
/* 62 */	{ "ms",		kSublangMalay },		/*	Malay */
/* 63 */	{ "kk",		0 },					/*	Kazakh */
/* 64 */	{ "ky",		0 },					/*	Kirghiz */
/* 65 */	{ "sw",		0 },					/*	Swahili */
/* 66 */	{ "tk",		0 },					/*	Turkmen */
/* 67 */	{ "uz",		kSublangUzbek },		/*	Uzbek */
/* 68 */	{ "tt",		0 },					/*	Tatar */
/* 69 */	{ "bn",		kSublangBengali },		/*	Bengali */
/* 70 */	{ "pa",		kSublangPunjabi },		/*	Punjabi */
/* 71 */	{ "gu",		0 },					/*	Gujarati */
/* 72 */	{ "or",		0 },					/*	Oriya */
/* 73 */	{ "ta",		0 },					/*	Tamil */
/* 74 */	{ "te",		0 },					/*	Telugu */
/* 75 */	{ "kn",		0 },					/*	Kannada */
/* 76 */	{ "ml",		0 },					/*	Malayalam */
/* 77 */	{ "as",		0 },					/*	Assamese */
/* 78 */	{ "mr",		0 },					/*	Marathi */
/* 79 */	{ "sa",		0 },					/*	Sanskrit */
/* 80 */	{ "mn",		kSublangMongolian },	/*	Mongolian */
/* 81 */	{ "bo",		0 },					/*	Tibetan */
/* 82 */	{ "cy",		0 },					/*	Welsh */
/* 83 */	{ "km",		0 },					/*	Khmer */
/* 84 */	{ "lo",		0 },					/*	Lao */
/* 85 */	{ "my",		0 },					/*	Burmese */
/* 86 */	{ "gl",		0 },					/*	Galician */
/* 87 */	{ "kok",	0 },					/*	Konkani */
/* 88 */	{ "mni",	0 },					/*	Manipuri */
/* 89 */	{ "sd",		kSublangSindhi },		/*	Sindhi */
/* 90 */	{ "syr",	0 },					/*	Syriac */
/* 91 */	{ "si",		0 },					/*	Sinhalese */
/* 92 */	{ "chr",	0 },					/*	Cherokee */
/* 93 */	{ "iu",		0 },					/*	Inuktitut */
/* 94 */	{ "am",		0 },					/*	Amharic */
/* 95 */	{ "ber",	kSublangTamazight },	/*	Tamazight (no ISO code currently assigned -- map to Berber) */
/* 96 */	{ "ks",		0 },					/*	Kashmiri */
/* 97 */	{ "ne",		kSublangNepali },		/*	Nepali */
/* 98 */	{ "fy",		0 },					/*	Frisian */
/* 99 */	{ "ps",		0 },					/*	Pashto */
/* 100 */	{ "tl",		0 },					/*	Tagalog (Philipino) */
/* 101 */	{ "div",	0 },					/*	Dhivehi */
/* 102 */	{ "",		0 },					/*	Edo (code??) */
/* 103 */	{ "",		0 },					/*	Fulfulde (code??) */
/* 104 */	{ "ha",		0 },					/*	Hausa */
/* 105 */	{ "",		0 },					/*	Ibibio (code??) */
/* 106 */	{ "yo",		0 },					/*	Yoruba */
/* 107 */	{ "",		0 },					/*	(unassigned) */
/* 108 */	{ "",		0 },					/*	(unassigned) */
/* 109 */	{ "",		0 },					/*	(unassigned) */
/* 110 */	{ "",		0 },					/*	(unassigned) */
/* 111 */	{ "",		0 },					/*	(unassigned) */
/* 112 */	{ "ibo",	0 },					/*	Igbo */
/* 113 */	{ "kau",	0 },					/*	Kanuri */
/* 114 */	{ "om",		0 },					/*	Oromo (Galla) */
/* 115 */	{ "ti",		kSublangTigrinya },		/*	Tigrinya */
/* 116 */	{ "gn",		0 },					/*	Guarani */
/* 117 */	{ "haw",	0 },					/*	Hawaiian */
/* 118 */	{ "la",		0 },					/*	Latin */
/* 119 */	{ "so",		0 },					/*	Somali */
/* 120 */	{ "",		0 },					/*	Yi (code??) */
/* 121 */	{ "pap",	0 }						/*	Papiamento */
} ;

const int	kLanguageTableSize = sizeof ( kLanguageTable ) / sizeof ( kLanguageTable [ 0 ] ) ;

char * MapWindowsLangIDToMacLocaleString ( uint16_t inWindowsLangID, char * outMacLocaleString )
{
	/*	This function maps a Windows language ID (LANGID) to a Mac locale string. */

	/*	A Windows language ID is a 16-bit value calculated combining a 10-bit primary language ID */
	/*	and a 6-bit "sublanguage" ID that conveys additional information about the dialect, country (region), */
	/*	script or encoding of the primary language. */
	/*	How the sublanguage ID is interpreted depends on the primary language ID. */

	/*	A Mac locale string is a string of the form "language-variant.script-variant_region" */
	/*	where language is the ISO-639 language code, region is the ISO-3166 country code, */
	/*	script is a two-letter script code loosely based on ISO-15924, and the variant parts */
	/*	are mac-specific ways to specify language and script variants. */
	/*	Any part of the locale string can be missing except for the language code. */

	unsigned int			language = ( inWindowsLangID & 0x03FF ) ;
	unsigned int			sublanguage = ( inWindowsLangID >> 10 ) ;
	const char * const *	sublangList = 0 ;

	outMacLocaleString [ 0 ] = 0 ;

	if ( ( language > 0 ) && ( language <= kLanguageTableSize ) )
	{
		/*	copy the language tag -- this is the ISO-639-1 two-letter language code, */
		/*	or, in some cases, the ISO-639-2 three-letter language code */
		strcpy ( outMacLocaleString, kLanguageTable [ language - 1 ] . mLanguageTag ) ;

		/*	some languages have an associated sublanguage list (the sublanguage usually specifies */
		/*	the country/region, but can also specify the script or other information) */
		if ( ( sublanguage > 0 ) && ( sublangList = kLanguageTable [ language - 1 ] . mSublangList ) != 0 )
		{
			for ( ; * sublangList != 0 ; sublangList ++ )
			{
				if ( -- sublanguage == 0 )
				{
					strcat ( outMacLocaleString, * sublangList ) ;
					break ;
				}
			}
		}
	}

	return outMacLocaleString ;
}

uint16_t MapMacLocaleStringToWindowsLangID ( const char * inLocaleString )
{
	uint16_t				langID = 0 ;
	int						length = strlen ( inLocaleString ) ;
	char					languageTag [ 4 ] ;
	char					sublanguageTag [ 32 ] ;
	const char * const *	sublangList = 0 ;
	int						sublanguage = 1 ;
	int						index ;

	if ( length > 1 )
	{
		/*	extract ISO-639 language code */
		for ( index = 0 ; index < 3 ; index ++ )
		{
			char	c = inLocaleString [ index ] ;
			if ( ( c < 'a' ) || ( c > 'z' ) )
			{
				break ;
			}
			languageTag [ index ] = c ;
		}
		languageTag [ index ] = 0 ;

		/*	extract sublanguage tag, if any */
		strcpy ( sublanguageTag, inLocaleString + index ) ;

		/*	look up the language code in the language table */
		for ( index = 0 ; index < kLanguageTableSize ; index ++ )
		{
			if ( strcmp ( languageTag, kLanguageTable [ index ] . mLanguageTag ) == 0 )
			{
				/*	found */
				langID = ( index + 1 ) ;

				/*	is there a sublanguage list? */
				if ( ( sublangList = kLanguageTable [ index ] . mSublangList ) != 0 )
				{
					index = 1 ;

					/*	look up the remaining portion of the locale string in the sublanguage list */
					for ( ; * sublangList ; sublangList ++ )
					{
						if ( strcmp ( sublanguageTag, * sublangList ) == 0 )
						{
							/*	found */
							sublanguage = index ;
							break ;
						}

						index ++ ;
					}
				}

				langID |= ( sublanguage << 10 ) ;
				break ;
			}
		}
	}

	return langID ;
}
