00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00036 #ifndef _utf8_h_
00037 #define _utf8_h_
00038
00039 #include <glib.h>
00040
00041
00042 #if 0
00043 #include "unicode/uchar.h"
00044 #include "unicode/ustring.h"
00045 #include "unicode/utypes.h"
00046 #include "unicode/ustdio.h"
00047 #include "unicode/unorm.h"
00048 #endif
00049
00050 typedef enum {
00051 UNI_NORM_NFC = 0,
00052 UNI_NORM_NFKC,
00053 UNI_NORM_NFD,
00054 UNI_NORM_NFKD,
00055
00056 NUM_UNI_NORM
00057 } uni_norm_t;
00058
00059
00060
00061
00062 #define UNI_NORM_GUI UNI_NORM_NFC
00063
00064
00065
00066
00067
00068 #define UNI_NORM_NETWORK UNI_NORM_NFC
00069
00070 #if defined(__APPLE__) && defined(__MACH__)
00071
00072 #define UNI_NORM_FILESYSTEM UNI_NORM_NFD
00073 #else
00074
00075 #define UNI_NORM_FILESYSTEM UNI_NORM_NFC
00076 #endif
00077
00078 void locale_init(void);
00079 void locale_close(void);
00080 const gchar *locale_get_charset(void);
00081 const gchar * locale_get_language(void);
00082 guint utf8_char_len(const gchar *s);
00083 gboolean is_ascii_string(const gchar *str);
00084 gboolean utf8_is_valid_string(const gchar *s);
00085 gboolean utf8_is_valid_data(const gchar *s, size_t n);
00086 size_t utf8_char_count(const gchar *s);
00087 size_t utf8_strlcpy(gchar *dst, const gchar *src, size_t dst_size);
00088 guint32 utf8_decode_char_fast(const gchar *s, guint *retlen)
00089 NON_NULL_PARAM((1,2));
00090 gint utf8_to_iso8859(gchar *s, gint len, gboolean space);
00091 size_t utf8_strlower(gchar *dst, const gchar *src, size_t size);
00092 gchar *utf8_strlower_copy(const gchar *src);
00093 size_t utf8_strupper(gchar *dst, const gchar *src, size_t size);
00094 gchar *utf8_strupper_copy(const gchar *src);
00095 gchar *utf8_canonize(const gchar *src);
00096 gchar *utf8_normalize(const gchar *src, uni_norm_t norm);
00097
00098 size_t utf32_to_utf8(const guint32 *in, gchar *out, size_t size);
00099 guint32 utf32_lowercase(guint32 uc);
00100
00113 static inline size_t
00114 utf8_decode_lookahead(const gchar *s, size_t len)
00115 {
00116 while (len < 6 && s[len] != '\0')
00117 len++;
00118 return len;
00119 }
00120
00133 static inline guint32
00134 utf16_encode_char_compact(guint32 uc)
00135 {
00136 if (uc <= 0xFFFF) {
00137 return uc;
00138 } else if (uc <= 0x10FFFF) {
00139 guint16 w1, w2;
00140
00141 uc -= 0x10000;
00142 w1 = (uc >> 10) | 0xd800;
00143 w2 = (uc & 0x3ff) | 0xdc00;
00144 return (w2 << 16) | w1;
00145 }
00146 return (guint32) -1;
00147 }
00148
00155 const gchar *lazy_iso8859_1_to_utf8(const gchar *src);
00156
00157 const gchar *lazy_ui_string_to_utf8(const gchar *src);
00158 const gchar *lazy_utf8_to_ui_string(const gchar *src);
00159
00160 const gchar *lazy_utf8_to_locale(const gchar *src);
00161 const gchar *lazy_locale_to_utf8(const gchar *src);
00162
00163 const gchar *lazy_locale_to_ui_string(const gchar *src);
00164 const gchar *lazy_locale_to_ui_string2(const gchar *src);
00165
00166 const gchar *lazy_locale_to_utf8_normalized(const gchar *src, uni_norm_t norm);
00167 const gchar *lazy_unknown_to_utf8_normalized(const gchar *src, uni_norm_t norm,
00168 const gchar **charset_ptr);
00169
00170 gchar *iso8859_1_to_utf8(const gchar *str);
00171 gchar *iso8859_1_to_utf8_normalized(const gchar *str, uni_norm_t norm);
00172
00173 gchar *utf8_to_ui_string(const gchar *src);
00174 gchar *ui_string_to_utf8(const gchar *src);
00175
00176 gchar *utf8_to_locale(const gchar *s);
00177 gchar *locale_to_utf8(const gchar *str);
00178 gchar *locale_to_utf8_normalized(const gchar *str, uni_norm_t norm);
00179
00180 gchar *utf8_to_filename(const gchar *s);
00181 gchar *filename_to_utf8_normalized(const gchar *str, uni_norm_t norm);
00182
00183 gchar *unknown_to_utf8(const gchar *str, const gchar **charset_ptr);
00184 gchar *unknown_to_utf8_normalized(const gchar *src, uni_norm_t norm,
00185 const gchar **charset_ptr);
00186
00187 size_t ascii_enforce(gchar *dst, size_t size, const gchar *src);
00188 size_t utf8_enforce(gchar *dst, size_t size, const gchar *src);
00189
00190 gboolean icu_enabled(void);
00191 gboolean locale_is_latin(void);
00192 gboolean locale_is_utf8(void);
00193
00194 gboolean utf8_can_dejap(const gchar *src);
00195 size_t utf8_dejap(gchar *dst, size_t dst_size, const gchar *src);
00196
00197 #if 0
00198
00199 #define UNICODE_CANONIZE(x) \
00200 (icu_enabled() ? unicode_canonize(x) : utf8_canonize(x))
00201
00202 int locale_to_icu_conv(const gchar *in, int lenin, UChar *out, int lenout);
00203 int utf8_to_icu_conv(const gchar *in, int lenin, UChar *out, int lenout);
00204 int icu_to_utf8_conv(const UChar *in, int lenin, gchar *out, int lenout);
00205
00206 int unicode_NFC(const UChar *source, gint32 len, UChar *result, gint32 rlen);
00207 int unicode_NFKD(const UChar *source, gint32 len, UChar *result, gint32 rlen);
00208 int unicode_lower(const UChar *source, gint32 len, UChar *result, gint32 rlen);
00209 int unicode_upper(const UChar *source, gint32 len, UChar *result, gint32 rlen);
00210 int unicode_filters(const UChar *source, gint32 len, UChar *result);
00211 gchar* unicode_canonize(const gchar *in);
00212
00213 #else
00214
00215 #define UNICODE_CANONIZE(x) utf8_canonize(x)
00216
00217 #endif
00218
00219 #endif
00220
00221