1 module glib.gunicode;
2 
3 import glib.gtypes;
4 import glib.gerror;
5 
6 
7 alias gunichar = dchar;
8 alias gunichar2 = wchar;
9 
10 
11 enum GUnicodeType
12 {
13   G_UNICODE_CONTROL,
14   G_UNICODE_FORMAT,
15   G_UNICODE_UNASSIGNED,
16   G_UNICODE_PRIVATE_USE,
17   G_UNICODE_SURROGATE,
18   G_UNICODE_LOWERCASE_LETTER,
19   G_UNICODE_MODIFIER_LETTER,
20   G_UNICODE_OTHER_LETTER,
21   G_UNICODE_TITLECASE_LETTER,
22   G_UNICODE_UPPERCASE_LETTER,
23   G_UNICODE_SPACING_MARK,
24   G_UNICODE_ENCLOSING_MARK,
25   G_UNICODE_NON_SPACING_MARK,
26   G_UNICODE_DECIMAL_NUMBER,
27   G_UNICODE_LETTER_NUMBER,
28   G_UNICODE_OTHER_NUMBER,
29   G_UNICODE_CONNECT_PUNCTUATION,
30   G_UNICODE_DASH_PUNCTUATION,
31   G_UNICODE_CLOSE_PUNCTUATION,
32   G_UNICODE_FINAL_PUNCTUATION,
33   G_UNICODE_INITIAL_PUNCTUATION,
34   G_UNICODE_OTHER_PUNCTUATION,
35   G_UNICODE_OPEN_PUNCTUATION,
36   G_UNICODE_CURRENCY_SYMBOL,
37   G_UNICODE_MODIFIER_SYMBOL,
38   G_UNICODE_MATH_SYMBOL,
39   G_UNICODE_OTHER_SYMBOL,
40   G_UNICODE_LINE_SEPARATOR,
41   G_UNICODE_PARAGRAPH_SEPARATOR,
42   G_UNICODE_SPACE_SEPARATOR
43 }
44 
45 enum GUnicodeBreakType
46 {
47   G_UNICODE_BREAK_MANDATORY,
48   G_UNICODE_BREAK_CARRIAGE_RETURN,
49   G_UNICODE_BREAK_LINE_FEED,
50   G_UNICODE_BREAK_COMBINING_MARK,
51   G_UNICODE_BREAK_SURROGATE,
52   G_UNICODE_BREAK_ZERO_WIDTH_SPACE,
53   G_UNICODE_BREAK_INSEPARABLE,
54   G_UNICODE_BREAK_NON_BREAKING_GLUE,
55   G_UNICODE_BREAK_CONTINGENT,
56   G_UNICODE_BREAK_SPACE,
57   G_UNICODE_BREAK_AFTER,
58   G_UNICODE_BREAK_BEFORE,
59   G_UNICODE_BREAK_BEFORE_AND_AFTER,
60   G_UNICODE_BREAK_HYPHEN,
61   G_UNICODE_BREAK_NON_STARTER,
62   G_UNICODE_BREAK_OPEN_PUNCTUATION,
63   G_UNICODE_BREAK_CLOSE_PUNCTUATION,
64   G_UNICODE_BREAK_QUOTATION,
65   G_UNICODE_BREAK_EXCLAMATION,
66   G_UNICODE_BREAK_IDEOGRAPHIC,
67   G_UNICODE_BREAK_NUMERIC,
68   G_UNICODE_BREAK_INFIX_SEPARATOR,
69   G_UNICODE_BREAK_SYMBOL,
70   G_UNICODE_BREAK_ALPHABETIC,
71   G_UNICODE_BREAK_PREFIX,
72   G_UNICODE_BREAK_POSTFIX,
73   G_UNICODE_BREAK_COMPLEX_CONTEXT,
74   G_UNICODE_BREAK_AMBIGUOUS,
75   G_UNICODE_BREAK_UNKNOWN,
76   G_UNICODE_BREAK_NEXT_LINE,
77   G_UNICODE_BREAK_WORD_JOINER,
78   G_UNICODE_BREAK_HANGUL_L_JAMO,
79   G_UNICODE_BREAK_HANGUL_V_JAMO,
80   G_UNICODE_BREAK_HANGUL_T_JAMO,
81   G_UNICODE_BREAK_HANGUL_LV_SYLLABLE,
82   G_UNICODE_BREAK_HANGUL_LVT_SYLLABLE,
83   G_UNICODE_BREAK_CLOSE_PARANTHESIS,
84   G_UNICODE_BREAK_CONDITIONAL_JAPANESE_STARTER,
85   G_UNICODE_BREAK_HEBREW_LETTER,
86   G_UNICODE_BREAK_REGIONAL_INDICATOR
87 }
88 
89 enum GUnicodeScript
90 {                         /* ISO 15924 code */
91   G_UNICODE_SCRIPT_INVALID_CODE = -1,
92   G_UNICODE_SCRIPT_COMMON       = 0,   /* Zyyy */
93   G_UNICODE_SCRIPT_INHERITED,          /* Zinh (Qaai) */
94   G_UNICODE_SCRIPT_ARABIC,             /* Arab */
95   G_UNICODE_SCRIPT_ARMENIAN,           /* Armn */
96   G_UNICODE_SCRIPT_BENGALI,            /* Beng */
97   G_UNICODE_SCRIPT_BOPOMOFO,           /* Bopo */
98   G_UNICODE_SCRIPT_CHEROKEE,           /* Cher */
99   G_UNICODE_SCRIPT_COPTIC,             /* Copt (Qaac) */
100   G_UNICODE_SCRIPT_CYRILLIC,           /* Cyrl (Cyrs) */
101   G_UNICODE_SCRIPT_DESERET,            /* Dsrt */
102   G_UNICODE_SCRIPT_DEVANAGARI,         /* Deva */
103   G_UNICODE_SCRIPT_ETHIOPIC,           /* Ethi */
104   G_UNICODE_SCRIPT_GEORGIAN,           /* Geor (Geon, Geoa) */
105   G_UNICODE_SCRIPT_GOTHIC,             /* Goth */
106   G_UNICODE_SCRIPT_GREEK,              /* Grek */
107   G_UNICODE_SCRIPT_GUJARATI,           /* Gujr */
108   G_UNICODE_SCRIPT_GURMUKHI,           /* Guru */
109   G_UNICODE_SCRIPT_HAN,                /* Hani */
110   G_UNICODE_SCRIPT_HANGUL,             /* Hang */
111   G_UNICODE_SCRIPT_HEBREW,             /* Hebr */
112   G_UNICODE_SCRIPT_HIRAGANA,           /* Hira */
113   G_UNICODE_SCRIPT_KANNADA,            /* Knda */
114   G_UNICODE_SCRIPT_KATAKANA,           /* Kana */
115   G_UNICODE_SCRIPT_KHMER,              /* Khmr */
116   G_UNICODE_SCRIPT_LAO,                /* Laoo */
117   G_UNICODE_SCRIPT_LATIN,              /* Latn (Latf, Latg) */
118   G_UNICODE_SCRIPT_MALAYALAM,          /* Mlym */
119   G_UNICODE_SCRIPT_MONGOLIAN,          /* Mong */
120   G_UNICODE_SCRIPT_MYANMAR,            /* Mymr */
121   G_UNICODE_SCRIPT_OGHAM,              /* Ogam */
122   G_UNICODE_SCRIPT_OLD_ITALIC,         /* Ital */
123   G_UNICODE_SCRIPT_ORIYA,              /* Orya */
124   G_UNICODE_SCRIPT_RUNIC,              /* Runr */
125   G_UNICODE_SCRIPT_SINHALA,            /* Sinh */
126   G_UNICODE_SCRIPT_SYRIAC,             /* Syrc (Syrj, Syrn, Syre) */
127   G_UNICODE_SCRIPT_TAMIL,              /* Taml */
128   G_UNICODE_SCRIPT_TELUGU,             /* Telu */
129   G_UNICODE_SCRIPT_THAANA,             /* Thaa */
130   G_UNICODE_SCRIPT_THAI,               /* Thai */
131   G_UNICODE_SCRIPT_TIBETAN,            /* Tibt */
132   G_UNICODE_SCRIPT_CANADIAN_ABORIGINAL, /* Cans */
133   G_UNICODE_SCRIPT_YI,                 /* Yiii */
134   G_UNICODE_SCRIPT_TAGALOG,            /* Tglg */
135   G_UNICODE_SCRIPT_HANUNOO,            /* Hano */
136   G_UNICODE_SCRIPT_BUHID,              /* Buhd */
137   G_UNICODE_SCRIPT_TAGBANWA,           /* Tagb */
138 
139   /* Unicode-4.0 additions */
140   G_UNICODE_SCRIPT_BRAILLE,            /* Brai */
141   G_UNICODE_SCRIPT_CYPRIOT,            /* Cprt */
142   G_UNICODE_SCRIPT_LIMBU,              /* Limb */
143   G_UNICODE_SCRIPT_OSMANYA,            /* Osma */
144   G_UNICODE_SCRIPT_SHAVIAN,            /* Shaw */
145   G_UNICODE_SCRIPT_LINEAR_B,           /* Linb */
146   G_UNICODE_SCRIPT_TAI_LE,             /* Tale */
147   G_UNICODE_SCRIPT_UGARITIC,           /* Ugar */
148 
149   /* Unicode-4.1 additions */
150   G_UNICODE_SCRIPT_NEW_TAI_LUE,        /* Talu */
151   G_UNICODE_SCRIPT_BUGINESE,           /* Bugi */
152   G_UNICODE_SCRIPT_GLAGOLITIC,         /* Glag */
153   G_UNICODE_SCRIPT_TIFINAGH,           /* Tfng */
154   G_UNICODE_SCRIPT_SYLOTI_NAGRI,       /* Sylo */
155   G_UNICODE_SCRIPT_OLD_PERSIAN,        /* Xpeo */
156   G_UNICODE_SCRIPT_KHAROSHTHI,         /* Khar */
157 
158   /* Unicode-5.0 additions */
159   G_UNICODE_SCRIPT_UNKNOWN,            /* Zzzz */
160   G_UNICODE_SCRIPT_BALINESE,           /* Bali */
161   G_UNICODE_SCRIPT_CUNEIFORM,          /* Xsux */
162   G_UNICODE_SCRIPT_PHOENICIAN,         /* Phnx */
163   G_UNICODE_SCRIPT_PHAGS_PA,           /* Phag */
164   G_UNICODE_SCRIPT_NKO,                /* Nkoo */
165 
166   /* Unicode-5.1 additions */
167   G_UNICODE_SCRIPT_KAYAH_LI,           /* Kali */
168   G_UNICODE_SCRIPT_LEPCHA,             /* Lepc */
169   G_UNICODE_SCRIPT_REJANG,             /* Rjng */
170   G_UNICODE_SCRIPT_SUNDANESE,          /* Sund */
171   G_UNICODE_SCRIPT_SAURASHTRA,         /* Saur */
172   G_UNICODE_SCRIPT_CHAM,               /* Cham */
173   G_UNICODE_SCRIPT_OL_CHIKI,           /* Olck */
174   G_UNICODE_SCRIPT_VAI,                /* Vaii */
175   G_UNICODE_SCRIPT_CARIAN,             /* Cari */
176   G_UNICODE_SCRIPT_LYCIAN,             /* Lyci */
177   G_UNICODE_SCRIPT_LYDIAN,             /* Lydi */
178 
179   /* Unicode-5.2 additions */
180   G_UNICODE_SCRIPT_AVESTAN,                /* Avst */
181   G_UNICODE_SCRIPT_BAMUM,                  /* Bamu */
182   G_UNICODE_SCRIPT_EGYPTIAN_HIEROGLYPHS,   /* Egyp */
183   G_UNICODE_SCRIPT_IMPERIAL_ARAMAIC,       /* Armi */
184   G_UNICODE_SCRIPT_INSCRIPTIONAL_PAHLAVI,  /* Phli */
185   G_UNICODE_SCRIPT_INSCRIPTIONAL_PARTHIAN, /* Prti */
186   G_UNICODE_SCRIPT_JAVANESE,               /* Java */
187   G_UNICODE_SCRIPT_KAITHI,                 /* Kthi */
188   G_UNICODE_SCRIPT_LISU,                   /* Lisu */
189   G_UNICODE_SCRIPT_MEETEI_MAYEK,           /* Mtei */
190   G_UNICODE_SCRIPT_OLD_SOUTH_ARABIAN,      /* Sarb */
191   G_UNICODE_SCRIPT_OLD_TURKIC,             /* Orkh */
192   G_UNICODE_SCRIPT_SAMARITAN,              /* Samr */
193   G_UNICODE_SCRIPT_TAI_THAM,               /* Lana */
194   G_UNICODE_SCRIPT_TAI_VIET,               /* Tavt */
195 
196   /* Unicode-6.0 additions */
197   G_UNICODE_SCRIPT_BATAK,                  /* Batk */
198   G_UNICODE_SCRIPT_BRAHMI,                 /* Brah */
199   G_UNICODE_SCRIPT_MANDAIC,                /* Mand */
200 
201   /* Unicode-6.1 additions */
202   G_UNICODE_SCRIPT_CHAKMA,                 /* Cakm */
203   G_UNICODE_SCRIPT_MEROITIC_CURSIVE,       /* Merc */
204   G_UNICODE_SCRIPT_MEROITIC_HIEROGLYPHS,   /* Mero */
205   G_UNICODE_SCRIPT_MIAO,                   /* Plrd */
206   G_UNICODE_SCRIPT_SHARADA,                /* Shrd */
207   G_UNICODE_SCRIPT_SORA_SOMPENG,           /* Sora */
208   G_UNICODE_SCRIPT_TAKRI,                  /* Takr */
209 
210   /* Unicode 7.0 additions */
211   G_UNICODE_SCRIPT_BASSA_VAH,              /* Bass */
212   G_UNICODE_SCRIPT_CAUCASIAN_ALBANIAN,     /* Aghb */
213   G_UNICODE_SCRIPT_DUPLOYAN,               /* Dupl */
214   G_UNICODE_SCRIPT_ELBASAN,                /* Elba */
215   G_UNICODE_SCRIPT_GRANTHA,                /* Gran */
216   G_UNICODE_SCRIPT_KHOJKI,                 /* Khoj */
217   G_UNICODE_SCRIPT_KHUDAWADI,              /* Sind */
218   G_UNICODE_SCRIPT_LINEAR_A,               /* Lina */
219   G_UNICODE_SCRIPT_MAHAJANI,               /* Mahj */
220   G_UNICODE_SCRIPT_MANICHAEAN,             /* Manu */
221   G_UNICODE_SCRIPT_MENDE_KIKAKUI,          /* Mend */
222   G_UNICODE_SCRIPT_MODI,                   /* Modi */
223   G_UNICODE_SCRIPT_MRO,                    /* Mroo */
224   G_UNICODE_SCRIPT_NABATAEAN,              /* Nbat */
225   G_UNICODE_SCRIPT_OLD_NORTH_ARABIAN,      /* Narb */
226   G_UNICODE_SCRIPT_OLD_PERMIC,             /* Perm */
227   G_UNICODE_SCRIPT_PAHAWH_HMONG,           /* Hmng */
228   G_UNICODE_SCRIPT_PALMYRENE,              /* Palm */
229   G_UNICODE_SCRIPT_PAU_CIN_HAU,            /* Pauc */
230   G_UNICODE_SCRIPT_PSALTER_PAHLAVI,        /* Phlp */
231   G_UNICODE_SCRIPT_SIDDHAM,                /* Sidd */
232   G_UNICODE_SCRIPT_TIRHUTA,                /* Tirh */
233   G_UNICODE_SCRIPT_WARANG_CITI             /* Wara */
234 }
235 
236 enum GNormalizeMode {
237   G_NORMALIZE_DEFAULT,
238   G_NORMALIZE_NFD = G_NORMALIZE_DEFAULT,
239   G_NORMALIZE_DEFAULT_COMPOSE,
240   G_NORMALIZE_NFC = G_NORMALIZE_DEFAULT_COMPOSE,
241   G_NORMALIZE_ALL,
242   G_NORMALIZE_NFKD = G_NORMALIZE_ALL,
243   G_NORMALIZE_ALL_COMPOSE,
244   G_NORMALIZE_NFKC = G_NORMALIZE_ALL_COMPOSE
245 }
246 
247 
248 extern (C) {
249 
250 
251     guint32        g_unicode_script_to_iso15924   (GUnicodeScript script);
252 
253     GUnicodeScript g_unicode_script_from_iso15924 (guint32        iso15924);
254 
255     pure gboolean g_unichar_isalnum   (gunichar c);
256     pure gboolean g_unichar_isalpha   (gunichar c);
257     pure gboolean g_unichar_iscntrl   (gunichar c);
258     pure gboolean g_unichar_isdigit   (gunichar c);
259     pure gboolean g_unichar_isgraph   (gunichar c);
260     pure gboolean g_unichar_islower   (gunichar c);
261     pure gboolean g_unichar_isprint   (gunichar c);
262     pure gboolean g_unichar_ispunct   (gunichar c);
263     pure gboolean g_unichar_isspace   (gunichar c);
264     pure gboolean g_unichar_isupper   (gunichar c);
265     pure gboolean g_unichar_isxdigit  (gunichar c);
266     pure gboolean g_unichar_istitle   (gunichar c);
267     pure gboolean g_unichar_isdefined (gunichar c);
268     pure gboolean g_unichar_iswide    (gunichar c);
269     pure gboolean g_unichar_iswide_cjk(gunichar c);
270     pure gboolean g_unichar_iszerowidth(gunichar c);
271     pure gboolean g_unichar_ismark    (gunichar c);
272     pure gunichar g_unichar_toupper (gunichar c);
273     pure gunichar g_unichar_tolower (gunichar c);
274     pure gunichar g_unichar_totitle (gunichar c);
275 
276     pure gint g_unichar_digit_value (gunichar c);
277 
278     pure gint g_unichar_xdigit_value (gunichar c);
279 
280     pure GUnicodeType g_unichar_type (gunichar c);
281 
282     pure GUnicodeBreakType g_unichar_break_type (gunichar c);
283 
284     pure gint g_unichar_combining_class (gunichar uc);
285 
286     gboolean g_unichar_get_mirror_char (gunichar ch,
287                                         gunichar *mirrored_ch);
288 
289     pure GUnicodeScript g_unichar_get_script (gunichar ch);
290 
291     pure gboolean g_unichar_validate (gunichar ch);
292 
293     gboolean g_unichar_compose (gunichar  a,
294                                 gunichar  b,
295                                 gunichar *ch);
296 
297     gboolean g_unichar_decompose (gunichar  ch,
298                                   gunichar *a,
299                                   gunichar *b);
300 
301 
302     gsize g_unichar_fully_decompose (gunichar  ch,
303                                      gboolean  compat,
304                                      gunichar *result,
305                                      gsize     result_len);
306 
307     enum G_UNICHAR_MAX_DECOMPOSITION_LENGTH = 18;
308 
309 
310     void g_unicode_canonical_ordering (gunichar *str,
311                                        gsize     len);
312 
313 
314     deprecated
315     gunichar *g_unicode_canonical_decomposition (gunichar  ch,
316                                                  gsize    *result_len);
317 
318     extern __gshared const(gchar*) g_utf8_skip;
319 
320     extern(D)
321     auto g_utf8_next_char(inout(char)* p) {
322         return p + g_utf8_skip[*(cast(inout(ubyte)*)p)];
323     }
324 
325     pure gunichar g_utf8_get_char           (const(gchar)  *p);
326 
327     pure gunichar g_utf8_get_char_validated (const( gchar) *p,
328                                         gssize        max_len);
329 
330 
331     pure gchar*   g_utf8_offset_to_pointer (const(gchar) *str,
332                                        glong        offset);
333 
334     pure glong    g_utf8_pointer_to_offset (const(gchar) *str,
335                                        const(gchar) *pos);
336 
337     pure gchar*   g_utf8_prev_char         (const(gchar) *p);
338 
339     pure gchar*   g_utf8_find_next_char    (const(gchar) *p,
340                                        const(gchar) *end);
341 
342     pure gchar*   g_utf8_find_prev_char    (const(gchar) *str,
343                                        const(gchar) *p);
344 
345 
346     pure glong    g_utf8_strlen            (const(gchar) *p,
347                                        gssize       max);
348 
349 
350     gchar   *g_utf8_substring         (const(gchar) *str,
351                                        glong        start_pos,
352                                        glong        end_pos);
353 
354 
355     gchar   *g_utf8_strncpy           (gchar       *dest,
356                                        const(gchar) *src,
357                                        gsize        n);
358 
359     /* Find the UTF-8 character corresponding to ch, in string p. These
360        functions are equivalants to strchr and strrchr */
361 
362     gchar* g_utf8_strchr  (const(gchar) *p,
363                            gssize       len,
364                            gunichar     c);
365 
366     gchar* g_utf8_strrchr (const(gchar) *p,
367                            gssize       len,
368                            gunichar     c);
369 
370     gchar* g_utf8_strreverse (const(gchar) *str,
371                               gssize len);
372 
373 
374     gunichar2 *g_utf8_to_utf16     (const(gchar)      *str,
375                                     glong             len,
376                                     glong            *items_read,
377                                     glong            *items_written,
378                                     GError          **error);
379 
380     gunichar * g_utf8_to_ucs4      (const(gchar)      *str,
381                                     glong             len,
382                                     glong            *items_read,
383                                     glong            *items_written,
384                                     GError          **error);
385 
386     gunichar * g_utf8_to_ucs4_fast (const(gchar)      *str,
387                                     glong             len,
388                                     glong            *items_written);
389 
390     gunichar * g_utf16_to_ucs4     (const(gunichar2)  *str,
391                                     glong             len,
392                                     glong            *items_read,
393                                     glong            *items_written,
394                                     GError          **error);
395 
396     gchar*     g_utf16_to_utf8     (const(gunichar2)  *str,
397                                     glong             len,
398                                     glong            *items_read,
399                                     glong            *items_written,
400                                     GError          **error);
401 
402     gunichar2 *g_ucs4_to_utf16     (const(gunichar)   *str,
403                                     glong             len,
404                                     glong            *items_read,
405                                     glong            *items_written,
406                                     GError          **error);
407 
408     gchar*     g_ucs4_to_utf8      (const(gunichar)   *str,
409                                     glong             len,
410                                     glong            *items_read,
411                                     glong            *items_written,
412                                     GError          **error);
413 
414 
415     gint      g_unichar_to_utf8 (gunichar    c,
416                                  gchar      *outbuf);
417 
418 
419     gboolean g_utf8_validate (const(gchar)  *str,
420                               gssize        max_len,
421                               const(gchar) **end);
422 
423 
424     gchar *g_utf8_strup   (const(gchar) *str,
425                            gssize       len);
426 
427     gchar *g_utf8_strdown (const(gchar) *str,
428                            gssize       len);
429 
430     gchar *g_utf8_casefold (const(gchar) *str,
431                             gssize       len);
432 
433 
434     gchar *g_utf8_normalize (const(gchar)   *str,
435                              gssize         len,
436                              GNormalizeMode mode);
437 
438 
439     pure gint   g_utf8_collate     (const(gchar) *str1,
440                                const(gchar) *str2);
441 
442     gchar *g_utf8_collate_key (const(gchar) *str,
443                                gssize       len);
444 
445     gchar *g_utf8_collate_key_for_filename (const(gchar) *str,
446                                             gssize       len);
447 
448 
449     /* private */
450     gchar *_g_utf8_make_valid (const(gchar) *name);
451 
452 }
453