/* * LanguageMapping.c * * Tables and functions for converting between Windows LANGIDs and Macintosh locale strings * * Copyright (c) 2000-2002 Marco Piovanelli * All Rights Reserved * */ /* this file contains a list of ISO-639 language codes, sorted by Windows primary language ID */ /* see: */ /* see also: */ /* and: */ /* and: */ #include #include char * MapWindowsLangIDToMacLocaleString ( uint16_t inWindowsLangID, char * outMacLocaleString ) ; uint16_t MapMacLocaleStringToWindowsLangID ( const char * inLocaleString ) ; typedef struct LanguageTableEntry { const char * mLanguageTag ; const char * const * mSublangList ; } LanguageTableEntry ; static const char * const kSublangArabic [ ] = { "_SA", /* Saudi Arabia */ "_IQ", /* Iraq */ "_EG", /* Egypt */ "_LY", /* Libya */ "_DZ", /* Algeria */ "_MA", /* Morocco */ "_TN", /* Tunisia */ "_OM", /* Oman */ "_YE", /* Yemen */ "_SY", /* Syria */ "_JO", /* Jordan */ "_LB", /* Lebanon */ "_KW", /* Kuwait */ "_AE", /* United Arab Emirates */ "_BH", /* Bahrain */ "_QA", /* Qatar */ 0 /* (end of table) */ } ; static const char * const kSublangChinese [ ] = { "_TW", /* Taiwan (Traditional Chinese script) */ "_CN", /* China (Simplified Chinese script) */ "_HK", /* Hong Kong */ "_SG", /* Singapore */ "_MO", /* Macau */ 0 /* (end of table) */ } ; static const char * const kSublangGerman [ ] = { "_DE", /* Germany */ "_CH", /* Switzerland */ "_AT", /* Austria */ "_LU", /* Luxembourg */ "_LI", /* Liechtenstein */ 0 /* (end of table) */ } ; static const char * const kSublangEnglish [ ] = { "_US", /* USA */ "_UK", /* UK */ "_AU", /* Australia */ "_CA", /* Canada */ "_NZ", /* New Zealand */ "_IE", /* Ireland */ "_ZA", /* South Africa */ "_JM", /* Jamaica */ "", /* Caribbean */ "_BZ", /* Belize */ "_TT", /* Trinidad and Tobago */ "_ZW", /* Zimbabwe */ "_PH", /* Philippines */ "_ID", /* Indonesia */ "_HK", /* Hong Kong */ "_IN", /* India */ "_MY", /* Malaysia */ "_SG", /* Singapore */ 0 /* (end of table) */ } ; static const char * const kSublangSpanish [ ] = { "_ES", /* Spain (traditional sort) */ "_MX", /* Mexican */ "_ES", /* Spain (international sort) */ "_GT", /* Guatemala */ "_CR", /* Costa Rica */ "_PA", /* Panama */ "_DO", /* Dominican Republic */ "_VE", /* Venezuela */ "_CO", /* Colombia */ "_PE", /* Peru */ "_AR", /* Argentina */ "_EC", /* Ecuador */ "_CL", /* Chile */ "_UY", /* Uruguay */ "_PY", /* Paraguay */ "_BO", /* Bolivia */ "_SV", /* El Salvador */ "_HN", /* Honduras */ "_NI", /* Nicaragua */ "_PR", /* Puerto Rico */ 0 /* (end of table) */ } ; static const char * const kSublangFrench [ ] = { "_FR", /* France */ "_BE", /* Belgium */ "_CA", /* Canda */ "_CH", /* Switzerland */ "_LU", /* Luxembourg */ "_MC", /* Monaco */ "", /* West Indies */ "_RE", /* Reunion */ "_CG", /* Congo */ "_SN", /* Senegal */ "_CM", /* Cameroon */ "_CI", /* C™te d'Ivoire */ "_ML", /* Mali */ "_MA", /* Morocco */ "_HT", /* Haiti */ 0 /* (end of table) */ } ; static const char * const kSublangItalian [ ] = { "_IT", /* Italy */ "_CH", /* Switzerland */ 0 /* (end of table) */ } ; static const char * const kSublangDutch [ ] = { "_NL", /* Netherlands */ "_BE", /* Belgium */ 0 /* (end of table) */ } ; static const char * const kSublangNorwegian [ ] = { "-bok_NO", /* BokmŒl */ "-nyn_NO", /* Nynorsk */ 0 /* (end of table) */ } ; static const char * const kSublangPortuguese [ ] = { "_BR", /* Brazil */ "_PT", /* Portugal */ 0 /* (end of table) */ } ; static const char * const kSublangRomanian [ ] = { "_RO", /* Romania */ "_MD", /* Moldova */ 0 /* (end of table) */ } ; static const char * const kSublangSerboCroatian [ ] = { "_HR", /* Croatia */ ".La_YU", /* Yugoslavia (Latin script) */ ".Cy_YU", /* Yugoslavia (Cyrillic script) */ 0 /* (end of table) */ } ; static const char * const kSublangSwedish [ ] = { "_SE", /* Sweden */ "_FI", /* Finland */ 0 /* (end of table) */ } ; static const char * const kSublangAzerbaijani [ ] = { ".La", /* Latin script */ ".Cy", /* Cyrillic script */ 0 /* (end of table) */ } ; static const char * const kSublangMalay [ ] = { "_MY", /* Malaysia */ "_BN", /* Brunei Darussalam */ 0 } ; static const char * const kSublangUzbek [ ] = { ".La", /* Latin script */ ".Cy", /* Cyrillic script */ 0 /* (end of table) */ } ; static const char * const kSublangBengali [ ] = { "_IN", /* India */ "_BD", /* Bangladesh */ 0 /* (end of table) */ } ; static const char * const kSublangPunjabi [ ] = { "_IN", /* India (Gurmukhi script) */ ".Ar_PK", /* Pakistan (Arabic script) */ 0 /* (end of table) */ } ; static const char * const kSublangMongolian [ ] = { ".Cy", /* Cyrillic script */ ".Mn", /* Mongolian script */ 0 /* (end of table) */ } ; static const char * const kSublangSindhi [ ] = { "_IN", /* India */ "_PK", /* Pakistan */ 0 /* (end of table) */ } ; static const char * const kSublangTamazight [ ] = { ".Ar", /* Arabic script */ ".La", /* Latin script */ 0 /* (end of table) */ } ; static const char * const kSublangNepali [ ] = { "_NP", /* Nepal */ "_IN", /* India */ 0 /* (end of table) */ } ; static const char * const kSublangTigrinya [ ] = { "_ET", /* Ethiopia */ "_ER", /* Eritrea */ 0 /* (end of table) */ } ; const LanguageTableEntry kLanguageTable [ ] = { /* 1 */ { "ar", kSublangArabic }, /* Arabic */ /* 2 */ { "bg", 0 }, /* Bulgarian */ /* 3 */ { "ca", 0 }, /* Catalan */ /* 4 */ { "zh", kSublangChinese }, /* Chinese */ /* 5 */ { "cs", 0 }, /* Czech */ /* 6 */ { "da", 0 }, /* Danish */ /* 7 */ { "de", kSublangGerman }, /* German */ /* 8 */ { "el", 0 }, /* Greek */ /* 9 */ { "en", kSublangEnglish }, /* English */ /* 10 */ { "es", kSublangSpanish }, /* Spanish */ /* 11 */ { "fi", 0 }, /* Finnish */ /* 12 */ { "fr", kSublangFrench }, /* French */ /* 13 */ { "he", 0 }, /* Hebrew (ISO code used to be "iw") */ /* 14 */ { "hu", 0 }, /* Hungarian */ /* 15 */ { "is", 0 }, /* Icelandic */ /* 16 */ { "it", kSublangItalian }, /* Italian */ /* 17 */ { "ja", 0 }, /* Japanese */ /* 18 */ { "ko", 0 }, /* Korean */ /* 19 */ { "nl", kSublangDutch }, /* Dutch */ /* 20 */ { "no", kSublangNorwegian }, /* Norwegian */ /* 21 */ { "pl", 0 }, /* Polish */ /* 22 */ { "pt", kSublangPortuguese }, /* Portuguese */ /* 23 */ { "rm", 0 }, /* Rhaeto-Romance */ /* 24 */ { "ro", kSublangRomanian }, /* Romanian */ /* 25 */ { "ru", 0 }, /* Russian */ /* 26 */ { "sh", kSublangSerboCroatian },/* Serbo-Croatian ("hr" = Croatian; "sr" = Serbian) */ /* 27 */ { "sk", 0 }, /* Slovak */ /* 28 */ { "sq", 0 }, /* Albanian */ /* 29 */ { "sv", kSublangSwedish }, /* Swedish */ /* 30 */ { "th", 0 }, /* Thai */ /* 31 */ { "tr", 0 }, /* Turkish */ /* 32 */ { "ur", 0 }, /* Urdu */ /* 33 */ { "id", 0 }, /* Indonesian (ISO code used to be "in") */ /* 34 */ { "uk", 0 }, /* Ukrainian */ /* 35 */ { "be", 0 }, /* Byelorussian */ /* 36 */ { "sl", 0 }, /* Slovenian */ /* 37 */ { "et", 0 }, /* Estonian */ /* 38 */ { "lv", 0 }, /* Latvian */ /* 39 */ { "lt", 0 }, /* Lithuanian */ /* 40 */ { "tg", 0 }, /* Tajiki */ /* 41 */ { "fa", 0 }, /* Farsi (Iranian) */ /* 42 */ { "vi", 0 }, /* Vietnamese */ /* 43 */ { "hy", 0 }, /* Armenian */ /* 44 */ { "az", kSublangAzerbaijani }, /* Azerbaijani */ /* 45 */ { "eu", 0 }, /* Basque */ /* 46 */ { "wen", 0 }, /* Sorbian languages */ /* 47 */ { "mk", 0 }, /* Macedonian (Former Yugoslav Republic Of Macedonia) */ /* 48 */ { "st", 0 }, /* Sutu (sic!) -- this is probably Southern Sotho, aka Sesotho */ /* 49 */ { "ts", 0 }, /* Tsonga */ /* 50 */ { "tn", 0 }, /* Tswana */ /* 51 */ { "ven", 0 }, /* Venda */ /* 52 */ { "xh", 0 }, /* Xhosa */ /* 53 */ { "zu", 0 }, /* Zulu */ /* 54 */ { "af", 0 }, /* Afrikaans */ /* 55 */ { "ka", 0 }, /* Georgian */ /* 56 */ { "fo", 0 }, /* Faeroese */ /* 57 */ { "hi", 0 }, /* Hindi */ /* 58 */ { "mt", 0 }, /* Maltese */ /* 59 */ { "se", 0 }, /* Sami */ /* 60 */ { "gv", 0 }, /* Gaelic */ /* 61 */ { "yi", 0 }, /* Yiddish (ISO code used to be "ji") */ /* 62 */ { "ms", kSublangMalay }, /* Malay */ /* 63 */ { "kk", 0 }, /* Kazakh */ /* 64 */ { "ky", 0 }, /* Kirghiz */ /* 65 */ { "sw", 0 }, /* Swahili */ /* 66 */ { "tk", 0 }, /* Turkmen */ /* 67 */ { "uz", kSublangUzbek }, /* Uzbek */ /* 68 */ { "tt", 0 }, /* Tatar */ /* 69 */ { "bn", kSublangBengali }, /* Bengali */ /* 70 */ { "pa", kSublangPunjabi }, /* Punjabi */ /* 71 */ { "gu", 0 }, /* Gujarati */ /* 72 */ { "or", 0 }, /* Oriya */ /* 73 */ { "ta", 0 }, /* Tamil */ /* 74 */ { "te", 0 }, /* Telugu */ /* 75 */ { "kn", 0 }, /* Kannada */ /* 76 */ { "ml", 0 }, /* Malayalam */ /* 77 */ { "as", 0 }, /* Assamese */ /* 78 */ { "mr", 0 }, /* Marathi */ /* 79 */ { "sa", 0 }, /* Sanskrit */ /* 80 */ { "mn", kSublangMongolian }, /* Mongolian */ /* 81 */ { "bo", 0 }, /* Tibetan */ /* 82 */ { "cy", 0 }, /* Welsh */ /* 83 */ { "km", 0 }, /* Khmer */ /* 84 */ { "lo", 0 }, /* Lao */ /* 85 */ { "my", 0 }, /* Burmese */ /* 86 */ { "gl", 0 }, /* Galician */ /* 87 */ { "kok", 0 }, /* Konkani */ /* 88 */ { "mni", 0 }, /* Manipuri */ /* 89 */ { "sd", kSublangSindhi }, /* Sindhi */ /* 90 */ { "syr", 0 }, /* Syriac */ /* 91 */ { "si", 0 }, /* Sinhalese */ /* 92 */ { "chr", 0 }, /* Cherokee */ /* 93 */ { "iu", 0 }, /* Inuktitut */ /* 94 */ { "am", 0 }, /* Amharic */ /* 95 */ { "ber", kSublangTamazight }, /* Tamazight (no ISO code currently assigned -- map to Berber) */ /* 96 */ { "ks", 0 }, /* Kashmiri */ /* 97 */ { "ne", kSublangNepali }, /* Nepali */ /* 98 */ { "fy", 0 }, /* Frisian */ /* 99 */ { "ps", 0 }, /* Pashto */ /* 100 */ { "tl", 0 }, /* Tagalog (Philipino) */ /* 101 */ { "div", 0 }, /* Dhivehi */ /* 102 */ { "", 0 }, /* Edo (code??) */ /* 103 */ { "", 0 }, /* Fulfulde (code??) */ /* 104 */ { "ha", 0 }, /* Hausa */ /* 105 */ { "", 0 }, /* Ibibio (code??) */ /* 106 */ { "yo", 0 }, /* Yoruba */ /* 107 */ { "", 0 }, /* (unassigned) */ /* 108 */ { "", 0 }, /* (unassigned) */ /* 109 */ { "", 0 }, /* (unassigned) */ /* 110 */ { "", 0 }, /* (unassigned) */ /* 111 */ { "", 0 }, /* (unassigned) */ /* 112 */ { "ibo", 0 }, /* Igbo */ /* 113 */ { "kau", 0 }, /* Kanuri */ /* 114 */ { "om", 0 }, /* Oromo (Galla) */ /* 115 */ { "ti", kSublangTigrinya }, /* Tigrinya */ /* 116 */ { "gn", 0 }, /* Guarani */ /* 117 */ { "haw", 0 }, /* Hawaiian */ /* 118 */ { "la", 0 }, /* Latin */ /* 119 */ { "so", 0 }, /* Somali */ /* 120 */ { "", 0 }, /* Yi (code??) */ /* 121 */ { "pap", 0 } /* Papiamento */ } ; const int kLanguageTableSize = sizeof ( kLanguageTable ) / sizeof ( kLanguageTable [ 0 ] ) ; char * MapWindowsLangIDToMacLocaleString ( uint16_t inWindowsLangID, char * outMacLocaleString ) { /* This function maps a Windows language ID (LANGID) to a Mac locale string. */ /* A Windows language ID is a 16-bit value calculated combining a 10-bit primary language ID */ /* and a 6-bit "sublanguage" ID that conveys additional information about the dialect, country (region), */ /* script or encoding of the primary language. */ /* How the sublanguage ID is interpreted depends on the primary language ID. */ /* A Mac locale string is a string of the form "language-variant.script-variant_region" */ /* where language is the ISO-639 language code, region is the ISO-3166 country code, */ /* script is a two-letter script code loosely based on ISO-15924, and the variant parts */ /* are mac-specific ways to specify language and script variants. */ /* Any part of the locale string can be missing except for the language code. */ unsigned int language = ( inWindowsLangID & 0x03FF ) ; unsigned int sublanguage = ( inWindowsLangID >> 10 ) ; const char * const * sublangList = 0 ; outMacLocaleString [ 0 ] = 0 ; if ( ( language > 0 ) && ( language <= kLanguageTableSize ) ) { /* copy the language tag -- this is the ISO-639-1 two-letter language code, */ /* or, in some cases, the ISO-639-2 three-letter language code */ strcpy ( outMacLocaleString, kLanguageTable [ language - 1 ] . mLanguageTag ) ; /* some languages have an associated sublanguage list (the sublanguage usually specifies */ /* the country/region, but can also specify the script or other information) */ if ( ( sublanguage > 0 ) && ( sublangList = kLanguageTable [ language - 1 ] . mSublangList ) != 0 ) { for ( ; * sublangList != 0 ; sublangList ++ ) { if ( -- sublanguage == 0 ) { strcat ( outMacLocaleString, * sublangList ) ; break ; } } } } return outMacLocaleString ; } uint16_t MapMacLocaleStringToWindowsLangID ( const char * inLocaleString ) { uint16_t langID = 0 ; int length = strlen ( inLocaleString ) ; char languageTag [ 4 ] ; char sublanguageTag [ 32 ] ; const char * const * sublangList = 0 ; int sublanguage = 1 ; int index ; if ( length > 1 ) { /* extract ISO-639 language code */ for ( index = 0 ; index < 3 ; index ++ ) { char c = inLocaleString [ index ] ; if ( ( c < 'a' ) || ( c > 'z' ) ) { break ; } languageTag [ index ] = c ; } languageTag [ index ] = 0 ; /* extract sublanguage tag, if any */ strcpy ( sublanguageTag, inLocaleString + index ) ; /* look up the language code in the language table */ for ( index = 0 ; index < kLanguageTableSize ; index ++ ) { if ( strcmp ( languageTag, kLanguageTable [ index ] . mLanguageTag ) == 0 ) { /* found */ langID = ( index + 1 ) ; /* is there a sublanguage list? */ if ( ( sublangList = kLanguageTable [ index ] . mSublangList ) != 0 ) { index = 1 ; /* look up the remaining portion of the locale string in the sublanguage list */ for ( ; * sublangList ; sublangList ++ ) { if ( strcmp ( sublanguageTag, * sublangList ) == 0 ) { /* found */ sublanguage = index ; break ; } index ++ ; } } langID |= ( sublanguage << 10 ) ; break ; } } } return langID ; }