1 module glib.gunicode; 2 3 import glib.gtypes; 4 import glib.gerror; 5 6 7 alias gunichar = dchar; 8 alias gunichar2 = wchar; 9 10 11 enum GUnicodeType 12 { 13 G_UNICODE_CONTROL, 14 G_UNICODE_FORMAT, 15 G_UNICODE_UNASSIGNED, 16 G_UNICODE_PRIVATE_USE, 17 G_UNICODE_SURROGATE, 18 G_UNICODE_LOWERCASE_LETTER, 19 G_UNICODE_MODIFIER_LETTER, 20 G_UNICODE_OTHER_LETTER, 21 G_UNICODE_TITLECASE_LETTER, 22 G_UNICODE_UPPERCASE_LETTER, 23 G_UNICODE_SPACING_MARK, 24 G_UNICODE_ENCLOSING_MARK, 25 G_UNICODE_NON_SPACING_MARK, 26 G_UNICODE_DECIMAL_NUMBER, 27 G_UNICODE_LETTER_NUMBER, 28 G_UNICODE_OTHER_NUMBER, 29 G_UNICODE_CONNECT_PUNCTUATION, 30 G_UNICODE_DASH_PUNCTUATION, 31 G_UNICODE_CLOSE_PUNCTUATION, 32 G_UNICODE_FINAL_PUNCTUATION, 33 G_UNICODE_INITIAL_PUNCTUATION, 34 G_UNICODE_OTHER_PUNCTUATION, 35 G_UNICODE_OPEN_PUNCTUATION, 36 G_UNICODE_CURRENCY_SYMBOL, 37 G_UNICODE_MODIFIER_SYMBOL, 38 G_UNICODE_MATH_SYMBOL, 39 G_UNICODE_OTHER_SYMBOL, 40 G_UNICODE_LINE_SEPARATOR, 41 G_UNICODE_PARAGRAPH_SEPARATOR, 42 G_UNICODE_SPACE_SEPARATOR 43 } 44 45 enum GUnicodeBreakType 46 { 47 G_UNICODE_BREAK_MANDATORY, 48 G_UNICODE_BREAK_CARRIAGE_RETURN, 49 G_UNICODE_BREAK_LINE_FEED, 50 G_UNICODE_BREAK_COMBINING_MARK, 51 G_UNICODE_BREAK_SURROGATE, 52 G_UNICODE_BREAK_ZERO_WIDTH_SPACE, 53 G_UNICODE_BREAK_INSEPARABLE, 54 G_UNICODE_BREAK_NON_BREAKING_GLUE, 55 G_UNICODE_BREAK_CONTINGENT, 56 G_UNICODE_BREAK_SPACE, 57 G_UNICODE_BREAK_AFTER, 58 G_UNICODE_BREAK_BEFORE, 59 G_UNICODE_BREAK_BEFORE_AND_AFTER, 60 G_UNICODE_BREAK_HYPHEN, 61 G_UNICODE_BREAK_NON_STARTER, 62 G_UNICODE_BREAK_OPEN_PUNCTUATION, 63 G_UNICODE_BREAK_CLOSE_PUNCTUATION, 64 G_UNICODE_BREAK_QUOTATION, 65 G_UNICODE_BREAK_EXCLAMATION, 66 G_UNICODE_BREAK_IDEOGRAPHIC, 67 G_UNICODE_BREAK_NUMERIC, 68 G_UNICODE_BREAK_INFIX_SEPARATOR, 69 G_UNICODE_BREAK_SYMBOL, 70 G_UNICODE_BREAK_ALPHABETIC, 71 G_UNICODE_BREAK_PREFIX, 72 G_UNICODE_BREAK_POSTFIX, 73 G_UNICODE_BREAK_COMPLEX_CONTEXT, 74 G_UNICODE_BREAK_AMBIGUOUS, 75 G_UNICODE_BREAK_UNKNOWN, 76 G_UNICODE_BREAK_NEXT_LINE, 77 G_UNICODE_BREAK_WORD_JOINER, 78 G_UNICODE_BREAK_HANGUL_L_JAMO, 79 G_UNICODE_BREAK_HANGUL_V_JAMO, 80 G_UNICODE_BREAK_HANGUL_T_JAMO, 81 G_UNICODE_BREAK_HANGUL_LV_SYLLABLE, 82 G_UNICODE_BREAK_HANGUL_LVT_SYLLABLE, 83 G_UNICODE_BREAK_CLOSE_PARANTHESIS, 84 G_UNICODE_BREAK_CONDITIONAL_JAPANESE_STARTER, 85 G_UNICODE_BREAK_HEBREW_LETTER, 86 G_UNICODE_BREAK_REGIONAL_INDICATOR 87 } 88 89 enum GUnicodeScript 90 { /* ISO 15924 code */ 91 G_UNICODE_SCRIPT_INVALID_CODE = -1, 92 G_UNICODE_SCRIPT_COMMON = 0, /* Zyyy */ 93 G_UNICODE_SCRIPT_INHERITED, /* Zinh (Qaai) */ 94 G_UNICODE_SCRIPT_ARABIC, /* Arab */ 95 G_UNICODE_SCRIPT_ARMENIAN, /* Armn */ 96 G_UNICODE_SCRIPT_BENGALI, /* Beng */ 97 G_UNICODE_SCRIPT_BOPOMOFO, /* Bopo */ 98 G_UNICODE_SCRIPT_CHEROKEE, /* Cher */ 99 G_UNICODE_SCRIPT_COPTIC, /* Copt (Qaac) */ 100 G_UNICODE_SCRIPT_CYRILLIC, /* Cyrl (Cyrs) */ 101 G_UNICODE_SCRIPT_DESERET, /* Dsrt */ 102 G_UNICODE_SCRIPT_DEVANAGARI, /* Deva */ 103 G_UNICODE_SCRIPT_ETHIOPIC, /* Ethi */ 104 G_UNICODE_SCRIPT_GEORGIAN, /* Geor (Geon, Geoa) */ 105 G_UNICODE_SCRIPT_GOTHIC, /* Goth */ 106 G_UNICODE_SCRIPT_GREEK, /* Grek */ 107 G_UNICODE_SCRIPT_GUJARATI, /* Gujr */ 108 G_UNICODE_SCRIPT_GURMUKHI, /* Guru */ 109 G_UNICODE_SCRIPT_HAN, /* Hani */ 110 G_UNICODE_SCRIPT_HANGUL, /* Hang */ 111 G_UNICODE_SCRIPT_HEBREW, /* Hebr */ 112 G_UNICODE_SCRIPT_HIRAGANA, /* Hira */ 113 G_UNICODE_SCRIPT_KANNADA, /* Knda */ 114 G_UNICODE_SCRIPT_KATAKANA, /* Kana */ 115 G_UNICODE_SCRIPT_KHMER, /* Khmr */ 116 G_UNICODE_SCRIPT_LAO, /* Laoo */ 117 G_UNICODE_SCRIPT_LATIN, /* Latn (Latf, Latg) */ 118 G_UNICODE_SCRIPT_MALAYALAM, /* Mlym */ 119 G_UNICODE_SCRIPT_MONGOLIAN, /* Mong */ 120 G_UNICODE_SCRIPT_MYANMAR, /* Mymr */ 121 G_UNICODE_SCRIPT_OGHAM, /* Ogam */ 122 G_UNICODE_SCRIPT_OLD_ITALIC, /* Ital */ 123 G_UNICODE_SCRIPT_ORIYA, /* Orya */ 124 G_UNICODE_SCRIPT_RUNIC, /* Runr */ 125 G_UNICODE_SCRIPT_SINHALA, /* Sinh */ 126 G_UNICODE_SCRIPT_SYRIAC, /* Syrc (Syrj, Syrn, Syre) */ 127 G_UNICODE_SCRIPT_TAMIL, /* Taml */ 128 G_UNICODE_SCRIPT_TELUGU, /* Telu */ 129 G_UNICODE_SCRIPT_THAANA, /* Thaa */ 130 G_UNICODE_SCRIPT_THAI, /* Thai */ 131 G_UNICODE_SCRIPT_TIBETAN, /* Tibt */ 132 G_UNICODE_SCRIPT_CANADIAN_ABORIGINAL, /* Cans */ 133 G_UNICODE_SCRIPT_YI, /* Yiii */ 134 G_UNICODE_SCRIPT_TAGALOG, /* Tglg */ 135 G_UNICODE_SCRIPT_HANUNOO, /* Hano */ 136 G_UNICODE_SCRIPT_BUHID, /* Buhd */ 137 G_UNICODE_SCRIPT_TAGBANWA, /* Tagb */ 138 139 /* Unicode-4.0 additions */ 140 G_UNICODE_SCRIPT_BRAILLE, /* Brai */ 141 G_UNICODE_SCRIPT_CYPRIOT, /* Cprt */ 142 G_UNICODE_SCRIPT_LIMBU, /* Limb */ 143 G_UNICODE_SCRIPT_OSMANYA, /* Osma */ 144 G_UNICODE_SCRIPT_SHAVIAN, /* Shaw */ 145 G_UNICODE_SCRIPT_LINEAR_B, /* Linb */ 146 G_UNICODE_SCRIPT_TAI_LE, /* Tale */ 147 G_UNICODE_SCRIPT_UGARITIC, /* Ugar */ 148 149 /* Unicode-4.1 additions */ 150 G_UNICODE_SCRIPT_NEW_TAI_LUE, /* Talu */ 151 G_UNICODE_SCRIPT_BUGINESE, /* Bugi */ 152 G_UNICODE_SCRIPT_GLAGOLITIC, /* Glag */ 153 G_UNICODE_SCRIPT_TIFINAGH, /* Tfng */ 154 G_UNICODE_SCRIPT_SYLOTI_NAGRI, /* Sylo */ 155 G_UNICODE_SCRIPT_OLD_PERSIAN, /* Xpeo */ 156 G_UNICODE_SCRIPT_KHAROSHTHI, /* Khar */ 157 158 /* Unicode-5.0 additions */ 159 G_UNICODE_SCRIPT_UNKNOWN, /* Zzzz */ 160 G_UNICODE_SCRIPT_BALINESE, /* Bali */ 161 G_UNICODE_SCRIPT_CUNEIFORM, /* Xsux */ 162 G_UNICODE_SCRIPT_PHOENICIAN, /* Phnx */ 163 G_UNICODE_SCRIPT_PHAGS_PA, /* Phag */ 164 G_UNICODE_SCRIPT_NKO, /* Nkoo */ 165 166 /* Unicode-5.1 additions */ 167 G_UNICODE_SCRIPT_KAYAH_LI, /* Kali */ 168 G_UNICODE_SCRIPT_LEPCHA, /* Lepc */ 169 G_UNICODE_SCRIPT_REJANG, /* Rjng */ 170 G_UNICODE_SCRIPT_SUNDANESE, /* Sund */ 171 G_UNICODE_SCRIPT_SAURASHTRA, /* Saur */ 172 G_UNICODE_SCRIPT_CHAM, /* Cham */ 173 G_UNICODE_SCRIPT_OL_CHIKI, /* Olck */ 174 G_UNICODE_SCRIPT_VAI, /* Vaii */ 175 G_UNICODE_SCRIPT_CARIAN, /* Cari */ 176 G_UNICODE_SCRIPT_LYCIAN, /* Lyci */ 177 G_UNICODE_SCRIPT_LYDIAN, /* Lydi */ 178 179 /* Unicode-5.2 additions */ 180 G_UNICODE_SCRIPT_AVESTAN, /* Avst */ 181 G_UNICODE_SCRIPT_BAMUM, /* Bamu */ 182 G_UNICODE_SCRIPT_EGYPTIAN_HIEROGLYPHS, /* Egyp */ 183 G_UNICODE_SCRIPT_IMPERIAL_ARAMAIC, /* Armi */ 184 G_UNICODE_SCRIPT_INSCRIPTIONAL_PAHLAVI, /* Phli */ 185 G_UNICODE_SCRIPT_INSCRIPTIONAL_PARTHIAN, /* Prti */ 186 G_UNICODE_SCRIPT_JAVANESE, /* Java */ 187 G_UNICODE_SCRIPT_KAITHI, /* Kthi */ 188 G_UNICODE_SCRIPT_LISU, /* Lisu */ 189 G_UNICODE_SCRIPT_MEETEI_MAYEK, /* Mtei */ 190 G_UNICODE_SCRIPT_OLD_SOUTH_ARABIAN, /* Sarb */ 191 G_UNICODE_SCRIPT_OLD_TURKIC, /* Orkh */ 192 G_UNICODE_SCRIPT_SAMARITAN, /* Samr */ 193 G_UNICODE_SCRIPT_TAI_THAM, /* Lana */ 194 G_UNICODE_SCRIPT_TAI_VIET, /* Tavt */ 195 196 /* Unicode-6.0 additions */ 197 G_UNICODE_SCRIPT_BATAK, /* Batk */ 198 G_UNICODE_SCRIPT_BRAHMI, /* Brah */ 199 G_UNICODE_SCRIPT_MANDAIC, /* Mand */ 200 201 /* Unicode-6.1 additions */ 202 G_UNICODE_SCRIPT_CHAKMA, /* Cakm */ 203 G_UNICODE_SCRIPT_MEROITIC_CURSIVE, /* Merc */ 204 G_UNICODE_SCRIPT_MEROITIC_HIEROGLYPHS, /* Mero */ 205 G_UNICODE_SCRIPT_MIAO, /* Plrd */ 206 G_UNICODE_SCRIPT_SHARADA, /* Shrd */ 207 G_UNICODE_SCRIPT_SORA_SOMPENG, /* Sora */ 208 G_UNICODE_SCRIPT_TAKRI, /* Takr */ 209 210 /* Unicode 7.0 additions */ 211 G_UNICODE_SCRIPT_BASSA_VAH, /* Bass */ 212 G_UNICODE_SCRIPT_CAUCASIAN_ALBANIAN, /* Aghb */ 213 G_UNICODE_SCRIPT_DUPLOYAN, /* Dupl */ 214 G_UNICODE_SCRIPT_ELBASAN, /* Elba */ 215 G_UNICODE_SCRIPT_GRANTHA, /* Gran */ 216 G_UNICODE_SCRIPT_KHOJKI, /* Khoj */ 217 G_UNICODE_SCRIPT_KHUDAWADI, /* Sind */ 218 G_UNICODE_SCRIPT_LINEAR_A, /* Lina */ 219 G_UNICODE_SCRIPT_MAHAJANI, /* Mahj */ 220 G_UNICODE_SCRIPT_MANICHAEAN, /* Manu */ 221 G_UNICODE_SCRIPT_MENDE_KIKAKUI, /* Mend */ 222 G_UNICODE_SCRIPT_MODI, /* Modi */ 223 G_UNICODE_SCRIPT_MRO, /* Mroo */ 224 G_UNICODE_SCRIPT_NABATAEAN, /* Nbat */ 225 G_UNICODE_SCRIPT_OLD_NORTH_ARABIAN, /* Narb */ 226 G_UNICODE_SCRIPT_OLD_PERMIC, /* Perm */ 227 G_UNICODE_SCRIPT_PAHAWH_HMONG, /* Hmng */ 228 G_UNICODE_SCRIPT_PALMYRENE, /* Palm */ 229 G_UNICODE_SCRIPT_PAU_CIN_HAU, /* Pauc */ 230 G_UNICODE_SCRIPT_PSALTER_PAHLAVI, /* Phlp */ 231 G_UNICODE_SCRIPT_SIDDHAM, /* Sidd */ 232 G_UNICODE_SCRIPT_TIRHUTA, /* Tirh */ 233 G_UNICODE_SCRIPT_WARANG_CITI /* Wara */ 234 } 235 236 enum GNormalizeMode { 237 G_NORMALIZE_DEFAULT, 238 G_NORMALIZE_NFD = G_NORMALIZE_DEFAULT, 239 G_NORMALIZE_DEFAULT_COMPOSE, 240 G_NORMALIZE_NFC = G_NORMALIZE_DEFAULT_COMPOSE, 241 G_NORMALIZE_ALL, 242 G_NORMALIZE_NFKD = G_NORMALIZE_ALL, 243 G_NORMALIZE_ALL_COMPOSE, 244 G_NORMALIZE_NFKC = G_NORMALIZE_ALL_COMPOSE 245 } 246 247 248 extern (C) { 249 250 251 guint32 g_unicode_script_to_iso15924 (GUnicodeScript script); 252 253 GUnicodeScript g_unicode_script_from_iso15924 (guint32 iso15924); 254 255 pure gboolean g_unichar_isalnum (gunichar c); 256 pure gboolean g_unichar_isalpha (gunichar c); 257 pure gboolean g_unichar_iscntrl (gunichar c); 258 pure gboolean g_unichar_isdigit (gunichar c); 259 pure gboolean g_unichar_isgraph (gunichar c); 260 pure gboolean g_unichar_islower (gunichar c); 261 pure gboolean g_unichar_isprint (gunichar c); 262 pure gboolean g_unichar_ispunct (gunichar c); 263 pure gboolean g_unichar_isspace (gunichar c); 264 pure gboolean g_unichar_isupper (gunichar c); 265 pure gboolean g_unichar_isxdigit (gunichar c); 266 pure gboolean g_unichar_istitle (gunichar c); 267 pure gboolean g_unichar_isdefined (gunichar c); 268 pure gboolean g_unichar_iswide (gunichar c); 269 pure gboolean g_unichar_iswide_cjk(gunichar c); 270 pure gboolean g_unichar_iszerowidth(gunichar c); 271 pure gboolean g_unichar_ismark (gunichar c); 272 pure gunichar g_unichar_toupper (gunichar c); 273 pure gunichar g_unichar_tolower (gunichar c); 274 pure gunichar g_unichar_totitle (gunichar c); 275 276 pure gint g_unichar_digit_value (gunichar c); 277 278 pure gint g_unichar_xdigit_value (gunichar c); 279 280 pure GUnicodeType g_unichar_type (gunichar c); 281 282 pure GUnicodeBreakType g_unichar_break_type (gunichar c); 283 284 pure gint g_unichar_combining_class (gunichar uc); 285 286 gboolean g_unichar_get_mirror_char (gunichar ch, 287 gunichar *mirrored_ch); 288 289 pure GUnicodeScript g_unichar_get_script (gunichar ch); 290 291 pure gboolean g_unichar_validate (gunichar ch); 292 293 gboolean g_unichar_compose (gunichar a, 294 gunichar b, 295 gunichar *ch); 296 297 gboolean g_unichar_decompose (gunichar ch, 298 gunichar *a, 299 gunichar *b); 300 301 302 gsize g_unichar_fully_decompose (gunichar ch, 303 gboolean compat, 304 gunichar *result, 305 gsize result_len); 306 307 enum G_UNICHAR_MAX_DECOMPOSITION_LENGTH = 18; 308 309 310 void g_unicode_canonical_ordering (gunichar *str, 311 gsize len); 312 313 314 deprecated 315 gunichar *g_unicode_canonical_decomposition (gunichar ch, 316 gsize *result_len); 317 318 extern __gshared const(gchar*) g_utf8_skip; 319 320 extern(D) 321 auto g_utf8_next_char(inout(char)* p) { 322 return p + g_utf8_skip[*(cast(inout(ubyte)*)p)]; 323 } 324 325 pure gunichar g_utf8_get_char (const(gchar) *p); 326 327 pure gunichar g_utf8_get_char_validated (const( gchar) *p, 328 gssize max_len); 329 330 331 pure gchar* g_utf8_offset_to_pointer (const(gchar) *str, 332 glong offset); 333 334 pure glong g_utf8_pointer_to_offset (const(gchar) *str, 335 const(gchar) *pos); 336 337 pure gchar* g_utf8_prev_char (const(gchar) *p); 338 339 pure gchar* g_utf8_find_next_char (const(gchar) *p, 340 const(gchar) *end); 341 342 pure gchar* g_utf8_find_prev_char (const(gchar) *str, 343 const(gchar) *p); 344 345 346 pure glong g_utf8_strlen (const(gchar) *p, 347 gssize max); 348 349 350 gchar *g_utf8_substring (const(gchar) *str, 351 glong start_pos, 352 glong end_pos); 353 354 355 gchar *g_utf8_strncpy (gchar *dest, 356 const(gchar) *src, 357 gsize n); 358 359 /* Find the UTF-8 character corresponding to ch, in string p. These 360 functions are equivalants to strchr and strrchr */ 361 362 gchar* g_utf8_strchr (const(gchar) *p, 363 gssize len, 364 gunichar c); 365 366 gchar* g_utf8_strrchr (const(gchar) *p, 367 gssize len, 368 gunichar c); 369 370 gchar* g_utf8_strreverse (const(gchar) *str, 371 gssize len); 372 373 374 gunichar2 *g_utf8_to_utf16 (const(gchar) *str, 375 glong len, 376 glong *items_read, 377 glong *items_written, 378 GError **error); 379 380 gunichar * g_utf8_to_ucs4 (const(gchar) *str, 381 glong len, 382 glong *items_read, 383 glong *items_written, 384 GError **error); 385 386 gunichar * g_utf8_to_ucs4_fast (const(gchar) *str, 387 glong len, 388 glong *items_written); 389 390 gunichar * g_utf16_to_ucs4 (const(gunichar2) *str, 391 glong len, 392 glong *items_read, 393 glong *items_written, 394 GError **error); 395 396 gchar* g_utf16_to_utf8 (const(gunichar2) *str, 397 glong len, 398 glong *items_read, 399 glong *items_written, 400 GError **error); 401 402 gunichar2 *g_ucs4_to_utf16 (const(gunichar) *str, 403 glong len, 404 glong *items_read, 405 glong *items_written, 406 GError **error); 407 408 gchar* g_ucs4_to_utf8 (const(gunichar) *str, 409 glong len, 410 glong *items_read, 411 glong *items_written, 412 GError **error); 413 414 415 gint g_unichar_to_utf8 (gunichar c, 416 gchar *outbuf); 417 418 419 gboolean g_utf8_validate (const(gchar) *str, 420 gssize max_len, 421 const(gchar) **end); 422 423 424 gchar *g_utf8_strup (const(gchar) *str, 425 gssize len); 426 427 gchar *g_utf8_strdown (const(gchar) *str, 428 gssize len); 429 430 gchar *g_utf8_casefold (const(gchar) *str, 431 gssize len); 432 433 434 gchar *g_utf8_normalize (const(gchar) *str, 435 gssize len, 436 GNormalizeMode mode); 437 438 439 pure gint g_utf8_collate (const(gchar) *str1, 440 const(gchar) *str2); 441 442 gchar *g_utf8_collate_key (const(gchar) *str, 443 gssize len); 444 445 gchar *g_utf8_collate_key_for_filename (const(gchar) *str, 446 gssize len); 447 448 449 /* private */ 450 gchar *_g_utf8_make_valid (const(gchar) *name); 451 452 } 453