#include <glib.h>
Go to the source code of this file.
Defines | |
#define | UNI_NORM_GUI UNI_NORM_NFC |
#define | UNI_NORM_NETWORK UNI_NORM_NFC |
#define | UNI_NORM_FILESYSTEM UNI_NORM_NFC |
#define | UNICODE_CANONIZE(x) utf8_canonize(x) |
Enumerations | |
enum | uni_norm_t { UNI_NORM_NFC = 0, UNI_NORM_NFKC, UNI_NORM_NFD, UNI_NORM_NFKD, NUM_UNI_NORM } |
Functions | |
void | locale_init (void) |
void | locale_close (void) |
Called at shutdown time. | |
const gchar * | locale_get_charset (void) |
NOTE: The internal variable "charset" can be used to override the initially detected character set name. | |
const gchar * | locale_get_language (void) |
Determine the current language. | |
guint | utf8_char_len (const gchar *s) |
Are the first bytes of string `s' forming a valid UTF-8 character? | |
gboolean | is_ascii_string (const gchar *str) |
gboolean | utf8_is_valid_string (const gchar *s) |
gboolean | utf8_is_valid_data (const gchar *s, size_t n) |
size_t | utf8_char_count (const gchar *s) |
size_t | utf8_strlcpy (gchar *dst, const gchar *src, size_t dst_size) |
Works exactly like strlcpy() but preserves a valid UTF-8 encoding, if the string has to be truncated. | |
guint32 | utf8_decode_char_fast (const gchar *s, guint *retlen) |
This routine is the same as utf8_decode_char() but it is more specialized and is aimed at being fast. | |
gint | utf8_to_iso8859 (gchar *s, gint len, gboolean space) |
Convert UTF-8 string to ISO-8859-1 inplace. | |
size_t | utf8_strlower (gchar *dst, const gchar *src, size_t size) |
Copies ``src'' to ``dst'' converting all characters to lowercase. | |
gchar * | utf8_strlower_copy (const gchar *src) |
Copies the UTF-8 string ``src'' to a newly allocated buffer converting all characters to lowercase. | |
size_t | utf8_strupper (gchar *dst, const gchar *src, size_t size) |
Copies ``src'' to ``dst'' converting all characters to uppercase. | |
gchar * | utf8_strupper_copy (const gchar *src) |
Copies the UTF-8 string ``src'' to a newly allocated buffer converting all characters to uppercase. | |
gchar * | utf8_canonize (const gchar *src) |
Apply the NFKD/NFC algo to have nomalized keywords. | |
gchar * | utf8_normalize (const gchar *src, uni_norm_t norm) |
Normalizes an UTF-8 string to the request normal form and returns it as a newly allocated string. | |
size_t | utf32_to_utf8 (const guint32 *in, gchar *out, size_t size) |
guint32 | utf32_lowercase (guint32 uc) |
Looks up the simple lowercase variant of an UTF-32 character. | |
size_t | utf8_decode_lookahead (const gchar *s, size_t len) |
This is a highly specialized function (read: don't use it if you don't understand what it does and how it's used) to be used with utf8_decode_char(). | |
guint32 | utf16_encode_char_compact (guint32 uc) |
Encodes a single UTF-32 character as UTF-16 and return the result compacted into a 32-bit integer. | |
const gchar * | lazy_iso8859_1_to_utf8 (const gchar *src) |
Lazy converters either return a pointer to a static buffer or manage the allocated memory themselves. | |
const gchar * | lazy_ui_string_to_utf8 (const gchar *src) |
const gchar * | lazy_utf8_to_ui_string (const gchar *src) |
const gchar * | lazy_utf8_to_locale (const gchar *src) |
const gchar * | lazy_locale_to_utf8 (const gchar *src) |
const gchar * | lazy_locale_to_ui_string (const gchar *src) |
const gchar * | lazy_locale_to_ui_string2 (const gchar *src) |
const gchar * | lazy_locale_to_utf8_normalized (const gchar *src, uni_norm_t norm) |
const gchar * | lazy_unknown_to_utf8_normalized (const gchar *src, uni_norm_t norm, const gchar **charset_ptr) |
gchar * | iso8859_1_to_utf8 (const gchar *str) |
Converts a string from ISO-8859-1 to UTF-8 encoding. | |
gchar * | iso8859_1_to_utf8_normalized (const gchar *str, uni_norm_t norm) |
Converts a string from the ISO-8859-1 character set to UTF-8 encoding and the specified Unicode normalization form. | |
gchar * | utf8_to_ui_string (const gchar *src) |
gchar * | ui_string_to_utf8 (const gchar *src) |
gchar * | utf8_to_locale (const gchar *s) |
Non-convertible characters will be replaced by '_'. | |
gchar * | locale_to_utf8 (const gchar *str) |
Converts a string from the locale's character set to UTF-8 encoding. | |
gchar * | locale_to_utf8_normalized (const gchar *str, uni_norm_t norm) |
Converts a string from the locale's character set to UTF-8 encoding and the specified Unicode normalization form. | |
gchar * | utf8_to_filename (const gchar *s) |
Converts the UTF-8 encoded src string to a string encoded in the primary filename character set. | |
gchar * | filename_to_utf8_normalized (const gchar *str, uni_norm_t norm) |
Converts a string from the filename character set to UTF-8 encoding and the specified Unicode normalization form. | |
gchar * | unknown_to_utf8 (const gchar *str, const gchar **charset_ptr) |
Converts the string to UTF-8 assuming an appropriate character set. | |
gchar * | unknown_to_utf8_normalized (const gchar *src, uni_norm_t norm, const gchar **charset_ptr) |
Converts a string from the ISO-8859-1 character set to UTF-8 encoding and the specified Unicode normalization form. | |
size_t | ascii_enforce (gchar *dst, size_t size, const gchar *src) |
Copies the NUL-terminated string ``src'' to ``dst'' replacing all invalid characters (non-ASCII) with underscores. | |
size_t | utf8_enforce (gchar *dst, size_t size, const gchar *src) |
Copies the NUL-terminated string ``src'' to ``dst'' replacing all invalid characters (non-UTF-8) with underscores. | |
gboolean | icu_enabled (void) |
gboolean | locale_is_latin (void) |
gboolean | locale_is_utf8 (void) |
gboolean | utf8_can_dejap (const gchar *src) |
Checks whether the given UTF-8 string contains any convertible characters. | |
size_t | utf8_dejap (gchar *dst, size_t dst_size, const gchar *src) |
Converts hiragana and katakana characters to ASCII sequences, strips voice marks and keeps any other characters as is. |
|
|
|
|
|
|
|
|
|
|
|
Copies the NUL-terminated string ``src'' to ``dst'' replacing all invalid characters (non-ASCII) with underscores. ``src'' and ``dst'' may be identical but must not overlap otherwise. If ``dst'' is to small, the resulting string will be truncated.
TODO: Add overlap check |
|
Converts a string from the filename character set to UTF-8 encoding and the specified Unicode normalization form.
|
|
|
|
|
|
Converts a string from ISO-8859-1 to UTF-8 encoding. The returned string is in no defined Unicode normalization form.
|
|
Converts a string from the ISO-8859-1 character set to UTF-8 encoding and the specified Unicode normalization form.
|
|
Lazy converters either return a pointer to a static buffer or manage the allocated memory themselves. They may also return the original pointer. Copy the result before calling them again unless you don't need the previous result anymore. |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
Called at shutdown time.
|
|
NOTE: The internal variable "charset" can be used to override the initially detected character set name.
|
|
Determine the current language.
TRANSLATORS: Put the two-letter ISO 639 code here. |
|
|
|
|
|
|
|
Converts a string from the locale's character set to UTF-8 encoding. The returned string is in no defined Unicode normalization form.
|
|
Converts a string from the locale's character set to UTF-8 encoding and the specified Unicode normalization form.
|
|
|
|
Converts the string to UTF-8 assuming an appropriate character set. The conversion result might still be rubbish but is guaranteed to be UTF-8 encoded. The returned string is in no defined Unicode normalization form.
|
|
Converts a string from the ISO-8859-1 character set to UTF-8 encoding and the specified Unicode normalization form.
|
|
Encodes a single UTF-32 character as UTF-16 and return the result compacted into a 32-bit integer. See also RFC 2781.
|
|
Looks up the simple lowercase variant of an UTF-32 character.
|
|
|
|
Checks whether the given UTF-8 string contains any convertible characters.
|
|
Apply the NFKD/NFC algo to have nomalized keywords.
|
|
|
|
Are the first bytes of string `s' forming a valid UTF-8 character?
|
|
This routine is the same as utf8_decode_char() but it is more specialized and is aimed at being fast. Use it when you don't need warnings and you don't know the length of the string you're reading from.
|
|
This is a highly specialized function (read: don't use it if you don't understand what it does and how it's used) to be used with utf8_decode_char(). It's purpose is to determine the maximum possible length in bytes of current UTF-8 character that ``s'' points to.
|
|
Converts hiragana and katakana characters to ASCII sequences, strips voice marks and keeps any other characters as is.
|
|
Copies the NUL-terminated string ``src'' to ``dst'' replacing all invalid characters (non-UTF-8) with underscores. ``src'' and ``dst'' may be identical but must not overlap otherwise. If ``dst'' is to small, the resulting string will be truncated but the UTF-8 encoding is preserved in any case.
TODO: Add overlap check |
|
|
|
|
|
Normalizes an UTF-8 string to the request normal form and returns it as a newly allocated string.
|
|
Works exactly like strlcpy() but preserves a valid UTF-8 encoding, if the string has to be truncated.
|
|
Copies ``src'' to ``dst'' converting all characters to lowercase. If the string is as long as ``size'' or larger, the string in ``dst'' will be truncated. ``dst'' is always NUL-terminated unless ``size'' is zero. The returned value is the length of the converted string ``src'' regardless of the ``size'' parameter. ``src'' must be validly UTF-8 encoded, otherwise the string will be truncated.
|
|
Copies the UTF-8 string ``src'' to a newly allocated buffer converting all characters to lowercase.
|
|
Copies ``src'' to ``dst'' converting all characters to uppercase. If the string is as long as ``size'' or larger, the string in ``dst'' will be truncated. ``dst'' is always NUL-terminated unless ``size'' is zero. The returned value is the length of the converted string ``src'' regardless of the ``size'' parameter. ``src'' must be validly UTF-8 encoded, otherwise the string will be truncated.
|
|
Copies the UTF-8 string ``src'' to a newly allocated buffer converting all characters to uppercase.
|
|
Converts the UTF-8 encoded src string to a string encoded in the primary filename character set.
|
|
Convert UTF-8 string to ISO-8859-1 inplace. If `space' is TRUE, all characters outside the U+0000 .. U+00FF range are turned to space U+0020. Otherwise, we stop at the first out-of-range character. If `len' is 0, the length of the string is computed with strlen().
|
|
Non-convertible characters will be replaced by '_'. The returned string WILL be NUL-terminated in any case. In case of an unrecoverable error, NULL is returned.
|
|
|