/*
* LanguageMapping.c
*
* Tables and functions for converting between Windows LANGIDs and Macintosh locale strings
*
* Copyright (c) 2000-2002 Marco Piovanelli
* All Rights Reserved
*
*/
/* this file contains a list of ISO-639 language codes, sorted by Windows primary language ID */
/* see: */
/* see also: */
/* and: */
/* and: */
#include
#include
char * MapWindowsLangIDToMacLocaleString ( uint16_t inWindowsLangID, char * outMacLocaleString ) ;
uint16_t MapMacLocaleStringToWindowsLangID ( const char * inLocaleString ) ;
typedef struct LanguageTableEntry
{
const char * mLanguageTag ;
const char * const * mSublangList ;
} LanguageTableEntry ;
static const char * const kSublangArabic [ ] =
{
"_SA", /* Saudi Arabia */
"_IQ", /* Iraq */
"_EG", /* Egypt */
"_LY", /* Libya */
"_DZ", /* Algeria */
"_MA", /* Morocco */
"_TN", /* Tunisia */
"_OM", /* Oman */
"_YE", /* Yemen */
"_SY", /* Syria */
"_JO", /* Jordan */
"_LB", /* Lebanon */
"_KW", /* Kuwait */
"_AE", /* United Arab Emirates */
"_BH", /* Bahrain */
"_QA", /* Qatar */
0 /* (end of table) */
} ;
static const char * const kSublangChinese [ ] =
{
"_TW", /* Taiwan (Traditional Chinese script) */
"_CN", /* China (Simplified Chinese script) */
"_HK", /* Hong Kong */
"_SG", /* Singapore */
"_MO", /* Macau */
0 /* (end of table) */
} ;
static const char * const kSublangGerman [ ] =
{
"_DE", /* Germany */
"_CH", /* Switzerland */
"_AT", /* Austria */
"_LU", /* Luxembourg */
"_LI", /* Liechtenstein */
0 /* (end of table) */
} ;
static const char * const kSublangEnglish [ ] =
{
"_US", /* USA */
"_UK", /* UK */
"_AU", /* Australia */
"_CA", /* Canada */
"_NZ", /* New Zealand */
"_IE", /* Ireland */
"_ZA", /* South Africa */
"_JM", /* Jamaica */
"", /* Caribbean */
"_BZ", /* Belize */
"_TT", /* Trinidad and Tobago */
"_ZW", /* Zimbabwe */
"_PH", /* Philippines */
"_ID", /* Indonesia */
"_HK", /* Hong Kong */
"_IN", /* India */
"_MY", /* Malaysia */
"_SG", /* Singapore */
0 /* (end of table) */
} ;
static const char * const kSublangSpanish [ ] =
{
"_ES", /* Spain (traditional sort) */
"_MX", /* Mexican */
"_ES", /* Spain (international sort) */
"_GT", /* Guatemala */
"_CR", /* Costa Rica */
"_PA", /* Panama */
"_DO", /* Dominican Republic */
"_VE", /* Venezuela */
"_CO", /* Colombia */
"_PE", /* Peru */
"_AR", /* Argentina */
"_EC", /* Ecuador */
"_CL", /* Chile */
"_UY", /* Uruguay */
"_PY", /* Paraguay */
"_BO", /* Bolivia */
"_SV", /* El Salvador */
"_HN", /* Honduras */
"_NI", /* Nicaragua */
"_PR", /* Puerto Rico */
0 /* (end of table) */
} ;
static const char * const kSublangFrench [ ] =
{
"_FR", /* France */
"_BE", /* Belgium */
"_CA", /* Canda */
"_CH", /* Switzerland */
"_LU", /* Luxembourg */
"_MC", /* Monaco */
"", /* West Indies */
"_RE", /* Reunion */
"_CG", /* Congo */
"_SN", /* Senegal */
"_CM", /* Cameroon */
"_CI", /* C™te d'Ivoire */
"_ML", /* Mali */
"_MA", /* Morocco */
"_HT", /* Haiti */
0 /* (end of table) */
} ;
static const char * const kSublangItalian [ ] =
{
"_IT", /* Italy */
"_CH", /* Switzerland */
0 /* (end of table) */
} ;
static const char * const kSublangDutch [ ] =
{
"_NL", /* Netherlands */
"_BE", /* Belgium */
0 /* (end of table) */
} ;
static const char * const kSublangNorwegian [ ] =
{
"-bok_NO", /* BokmŒl */
"-nyn_NO", /* Nynorsk */
0 /* (end of table) */
} ;
static const char * const kSublangPortuguese [ ] =
{
"_BR", /* Brazil */
"_PT", /* Portugal */
0 /* (end of table) */
} ;
static const char * const kSublangRomanian [ ] =
{
"_RO", /* Romania */
"_MD", /* Moldova */
0 /* (end of table) */
} ;
static const char * const kSublangSerboCroatian [ ] =
{
"_HR", /* Croatia */
".La_YU", /* Yugoslavia (Latin script) */
".Cy_YU", /* Yugoslavia (Cyrillic script) */
0 /* (end of table) */
} ;
static const char * const kSublangSwedish [ ] =
{
"_SE", /* Sweden */
"_FI", /* Finland */
0 /* (end of table) */
} ;
static const char * const kSublangAzerbaijani [ ] =
{
".La", /* Latin script */
".Cy", /* Cyrillic script */
0 /* (end of table) */
} ;
static const char * const kSublangMalay [ ] =
{
"_MY", /* Malaysia */
"_BN", /* Brunei Darussalam */
0
} ;
static const char * const kSublangUzbek [ ] =
{
".La", /* Latin script */
".Cy", /* Cyrillic script */
0 /* (end of table) */
} ;
static const char * const kSublangBengali [ ] =
{
"_IN", /* India */
"_BD", /* Bangladesh */
0 /* (end of table) */
} ;
static const char * const kSublangPunjabi [ ] =
{
"_IN", /* India (Gurmukhi script) */
".Ar_PK", /* Pakistan (Arabic script) */
0 /* (end of table) */
} ;
static const char * const kSublangMongolian [ ] =
{
".Cy", /* Cyrillic script */
".Mn", /* Mongolian script */
0 /* (end of table) */
} ;
static const char * const kSublangSindhi [ ] =
{
"_IN", /* India */
"_PK", /* Pakistan */
0 /* (end of table) */
} ;
static const char * const kSublangTamazight [ ] =
{
".Ar", /* Arabic script */
".La", /* Latin script */
0 /* (end of table) */
} ;
static const char * const kSublangNepali [ ] =
{
"_NP", /* Nepal */
"_IN", /* India */
0 /* (end of table) */
} ;
static const char * const kSublangTigrinya [ ] =
{
"_ET", /* Ethiopia */
"_ER", /* Eritrea */
0 /* (end of table) */
} ;
const LanguageTableEntry kLanguageTable [ ] =
{
/* 1 */ { "ar", kSublangArabic }, /* Arabic */
/* 2 */ { "bg", 0 }, /* Bulgarian */
/* 3 */ { "ca", 0 }, /* Catalan */
/* 4 */ { "zh", kSublangChinese }, /* Chinese */
/* 5 */ { "cs", 0 }, /* Czech */
/* 6 */ { "da", 0 }, /* Danish */
/* 7 */ { "de", kSublangGerman }, /* German */
/* 8 */ { "el", 0 }, /* Greek */
/* 9 */ { "en", kSublangEnglish }, /* English */
/* 10 */ { "es", kSublangSpanish }, /* Spanish */
/* 11 */ { "fi", 0 }, /* Finnish */
/* 12 */ { "fr", kSublangFrench }, /* French */
/* 13 */ { "he", 0 }, /* Hebrew (ISO code used to be "iw") */
/* 14 */ { "hu", 0 }, /* Hungarian */
/* 15 */ { "is", 0 }, /* Icelandic */
/* 16 */ { "it", kSublangItalian }, /* Italian */
/* 17 */ { "ja", 0 }, /* Japanese */
/* 18 */ { "ko", 0 }, /* Korean */
/* 19 */ { "nl", kSublangDutch }, /* Dutch */
/* 20 */ { "no", kSublangNorwegian }, /* Norwegian */
/* 21 */ { "pl", 0 }, /* Polish */
/* 22 */ { "pt", kSublangPortuguese }, /* Portuguese */
/* 23 */ { "rm", 0 }, /* Rhaeto-Romance */
/* 24 */ { "ro", kSublangRomanian }, /* Romanian */
/* 25 */ { "ru", 0 }, /* Russian */
/* 26 */ { "sh", kSublangSerboCroatian },/* Serbo-Croatian ("hr" = Croatian; "sr" = Serbian) */
/* 27 */ { "sk", 0 }, /* Slovak */
/* 28 */ { "sq", 0 }, /* Albanian */
/* 29 */ { "sv", kSublangSwedish }, /* Swedish */
/* 30 */ { "th", 0 }, /* Thai */
/* 31 */ { "tr", 0 }, /* Turkish */
/* 32 */ { "ur", 0 }, /* Urdu */
/* 33 */ { "id", 0 }, /* Indonesian (ISO code used to be "in") */
/* 34 */ { "uk", 0 }, /* Ukrainian */
/* 35 */ { "be", 0 }, /* Byelorussian */
/* 36 */ { "sl", 0 }, /* Slovenian */
/* 37 */ { "et", 0 }, /* Estonian */
/* 38 */ { "lv", 0 }, /* Latvian */
/* 39 */ { "lt", 0 }, /* Lithuanian */
/* 40 */ { "tg", 0 }, /* Tajiki */
/* 41 */ { "fa", 0 }, /* Farsi (Iranian) */
/* 42 */ { "vi", 0 }, /* Vietnamese */
/* 43 */ { "hy", 0 }, /* Armenian */
/* 44 */ { "az", kSublangAzerbaijani }, /* Azerbaijani */
/* 45 */ { "eu", 0 }, /* Basque */
/* 46 */ { "wen", 0 }, /* Sorbian languages */
/* 47 */ { "mk", 0 }, /* Macedonian (Former Yugoslav Republic Of Macedonia) */
/* 48 */ { "st", 0 }, /* Sutu (sic!) -- this is probably Southern Sotho, aka Sesotho */
/* 49 */ { "ts", 0 }, /* Tsonga */
/* 50 */ { "tn", 0 }, /* Tswana */
/* 51 */ { "ven", 0 }, /* Venda */
/* 52 */ { "xh", 0 }, /* Xhosa */
/* 53 */ { "zu", 0 }, /* Zulu */
/* 54 */ { "af", 0 }, /* Afrikaans */
/* 55 */ { "ka", 0 }, /* Georgian */
/* 56 */ { "fo", 0 }, /* Faeroese */
/* 57 */ { "hi", 0 }, /* Hindi */
/* 58 */ { "mt", 0 }, /* Maltese */
/* 59 */ { "se", 0 }, /* Sami */
/* 60 */ { "gv", 0 }, /* Gaelic */
/* 61 */ { "yi", 0 }, /* Yiddish (ISO code used to be "ji") */
/* 62 */ { "ms", kSublangMalay }, /* Malay */
/* 63 */ { "kk", 0 }, /* Kazakh */
/* 64 */ { "ky", 0 }, /* Kirghiz */
/* 65 */ { "sw", 0 }, /* Swahili */
/* 66 */ { "tk", 0 }, /* Turkmen */
/* 67 */ { "uz", kSublangUzbek }, /* Uzbek */
/* 68 */ { "tt", 0 }, /* Tatar */
/* 69 */ { "bn", kSublangBengali }, /* Bengali */
/* 70 */ { "pa", kSublangPunjabi }, /* Punjabi */
/* 71 */ { "gu", 0 }, /* Gujarati */
/* 72 */ { "or", 0 }, /* Oriya */
/* 73 */ { "ta", 0 }, /* Tamil */
/* 74 */ { "te", 0 }, /* Telugu */
/* 75 */ { "kn", 0 }, /* Kannada */
/* 76 */ { "ml", 0 }, /* Malayalam */
/* 77 */ { "as", 0 }, /* Assamese */
/* 78 */ { "mr", 0 }, /* Marathi */
/* 79 */ { "sa", 0 }, /* Sanskrit */
/* 80 */ { "mn", kSublangMongolian }, /* Mongolian */
/* 81 */ { "bo", 0 }, /* Tibetan */
/* 82 */ { "cy", 0 }, /* Welsh */
/* 83 */ { "km", 0 }, /* Khmer */
/* 84 */ { "lo", 0 }, /* Lao */
/* 85 */ { "my", 0 }, /* Burmese */
/* 86 */ { "gl", 0 }, /* Galician */
/* 87 */ { "kok", 0 }, /* Konkani */
/* 88 */ { "mni", 0 }, /* Manipuri */
/* 89 */ { "sd", kSublangSindhi }, /* Sindhi */
/* 90 */ { "syr", 0 }, /* Syriac */
/* 91 */ { "si", 0 }, /* Sinhalese */
/* 92 */ { "chr", 0 }, /* Cherokee */
/* 93 */ { "iu", 0 }, /* Inuktitut */
/* 94 */ { "am", 0 }, /* Amharic */
/* 95 */ { "ber", kSublangTamazight }, /* Tamazight (no ISO code currently assigned -- map to Berber) */
/* 96 */ { "ks", 0 }, /* Kashmiri */
/* 97 */ { "ne", kSublangNepali }, /* Nepali */
/* 98 */ { "fy", 0 }, /* Frisian */
/* 99 */ { "ps", 0 }, /* Pashto */
/* 100 */ { "tl", 0 }, /* Tagalog (Philipino) */
/* 101 */ { "div", 0 }, /* Dhivehi */
/* 102 */ { "", 0 }, /* Edo (code??) */
/* 103 */ { "", 0 }, /* Fulfulde (code??) */
/* 104 */ { "ha", 0 }, /* Hausa */
/* 105 */ { "", 0 }, /* Ibibio (code??) */
/* 106 */ { "yo", 0 }, /* Yoruba */
/* 107 */ { "", 0 }, /* (unassigned) */
/* 108 */ { "", 0 }, /* (unassigned) */
/* 109 */ { "", 0 }, /* (unassigned) */
/* 110 */ { "", 0 }, /* (unassigned) */
/* 111 */ { "", 0 }, /* (unassigned) */
/* 112 */ { "ibo", 0 }, /* Igbo */
/* 113 */ { "kau", 0 }, /* Kanuri */
/* 114 */ { "om", 0 }, /* Oromo (Galla) */
/* 115 */ { "ti", kSublangTigrinya }, /* Tigrinya */
/* 116 */ { "gn", 0 }, /* Guarani */
/* 117 */ { "haw", 0 }, /* Hawaiian */
/* 118 */ { "la", 0 }, /* Latin */
/* 119 */ { "so", 0 }, /* Somali */
/* 120 */ { "", 0 }, /* Yi (code??) */
/* 121 */ { "pap", 0 } /* Papiamento */
} ;
const int kLanguageTableSize = sizeof ( kLanguageTable ) / sizeof ( kLanguageTable [ 0 ] ) ;
char * MapWindowsLangIDToMacLocaleString ( uint16_t inWindowsLangID, char * outMacLocaleString )
{
/* This function maps a Windows language ID (LANGID) to a Mac locale string. */
/* A Windows language ID is a 16-bit value calculated combining a 10-bit primary language ID */
/* and a 6-bit "sublanguage" ID that conveys additional information about the dialect, country (region), */
/* script or encoding of the primary language. */
/* How the sublanguage ID is interpreted depends on the primary language ID. */
/* A Mac locale string is a string of the form "language-variant.script-variant_region" */
/* where language is the ISO-639 language code, region is the ISO-3166 country code, */
/* script is a two-letter script code loosely based on ISO-15924, and the variant parts */
/* are mac-specific ways to specify language and script variants. */
/* Any part of the locale string can be missing except for the language code. */
unsigned int language = ( inWindowsLangID & 0x03FF ) ;
unsigned int sublanguage = ( inWindowsLangID >> 10 ) ;
const char * const * sublangList = 0 ;
outMacLocaleString [ 0 ] = 0 ;
if ( ( language > 0 ) && ( language <= kLanguageTableSize ) )
{
/* copy the language tag -- this is the ISO-639-1 two-letter language code, */
/* or, in some cases, the ISO-639-2 three-letter language code */
strcpy ( outMacLocaleString, kLanguageTable [ language - 1 ] . mLanguageTag ) ;
/* some languages have an associated sublanguage list (the sublanguage usually specifies */
/* the country/region, but can also specify the script or other information) */
if ( ( sublanguage > 0 ) && ( sublangList = kLanguageTable [ language - 1 ] . mSublangList ) != 0 )
{
for ( ; * sublangList != 0 ; sublangList ++ )
{
if ( -- sublanguage == 0 )
{
strcat ( outMacLocaleString, * sublangList ) ;
break ;
}
}
}
}
return outMacLocaleString ;
}
uint16_t MapMacLocaleStringToWindowsLangID ( const char * inLocaleString )
{
uint16_t langID = 0 ;
int length = strlen ( inLocaleString ) ;
char languageTag [ 4 ] ;
char sublanguageTag [ 32 ] ;
const char * const * sublangList = 0 ;
int sublanguage = 1 ;
int index ;
if ( length > 1 )
{
/* extract ISO-639 language code */
for ( index = 0 ; index < 3 ; index ++ )
{
char c = inLocaleString [ index ] ;
if ( ( c < 'a' ) || ( c > 'z' ) )
{
break ;
}
languageTag [ index ] = c ;
}
languageTag [ index ] = 0 ;
/* extract sublanguage tag, if any */
strcpy ( sublanguageTag, inLocaleString + index ) ;
/* look up the language code in the language table */
for ( index = 0 ; index < kLanguageTableSize ; index ++ )
{
if ( strcmp ( languageTag, kLanguageTable [ index ] . mLanguageTag ) == 0 )
{
/* found */
langID = ( index + 1 ) ;
/* is there a sublanguage list? */
if ( ( sublangList = kLanguageTable [ index ] . mSublangList ) != 0 )
{
index = 1 ;
/* look up the remaining portion of the locale string in the sublanguage list */
for ( ; * sublangList ; sublangList ++ )
{
if ( strcmp ( sublanguageTag, * sublangList ) == 0 )
{
/* found */
sublanguage = index ;
break ;
}
index ++ ;
}
}
langID |= ( sublanguage << 10 ) ;
break ;
}
}
}
return langID ;
}