libunibreak
4.0
|
Implementation of the grapheme breaking algorithm as described in Unicode Standard Annex 29. More...
#include <stdbool.h>
#include <string.h>
#include "graphemebreak.h"
#include "graphemebreakdata.c"
#include "unibreakdef.h"
Macros | |
#define | ARRAY_LEN(x) (sizeof(x) / sizeof(x[0])) |
Functions | |
void | init_graphemebreak (void) |
Initializes the wordbreak internals. More... | |
static enum GraphemeBreakClass | get_char_gb_class (utf32_t ch) |
Gets the grapheme breaking class of a character. More... | |
static void | set_graphemebreaks (const void *s, size_t len, char *brks, get_next_char_t get_next_char) |
Sets the grapheme breaking information for a generic input string. More... | |
void | set_graphemebreaks_utf8 (const utf8_t *s, size_t len, const char *lang, char *brks) |
Sets the grapheme breaking information for a UTF-8 input string. More... | |
void | set_graphemebreaks_utf16 (const utf16_t *s, size_t len, const char *lang, char *brks) |
Sets the grapheme breaking information for a UTF-16 input string. More... | |
void | set_graphemebreaks_utf32 (const utf32_t *s, size_t len, const char *lang, char *brks) |
Sets the grapheme breaking information for a UTF-32 input string. More... | |
Implementation of the grapheme breaking algorithm as described in Unicode Standard Annex 29.
#define ARRAY_LEN | ( | x | ) | (sizeof(x) / sizeof(x[0])) |
|
static |
Gets the grapheme breaking class of a character.
ch | character to check |
GBP_Other
otherwise void init_graphemebreak | ( | void | ) |
Initializes the wordbreak internals.
It currently does nothing, but it may in the future.
|
static |
Sets the grapheme breaking information for a generic input string.
[in] | s | input string |
[in] | len | length of the input |
[out] | brks | pointer to the output breaking data, containing GRAPHEMEBREAK_BREAK or GRAPHEMEBREAK_NOBREAK |
[in] | get_next_char | function to get the next UTF-32 character |
void set_graphemebreaks_utf16 | ( | const utf16_t * | s, |
size_t | len, | ||
const char * | lang, | ||
char * | brks | ||
) |
Sets the grapheme breaking information for a UTF-16 input string.
[in] | s | input UTF-16 string |
[in] | len | length of the input |
[in] | lang | language of the input (reserved for future use) |
[out] | brks | pointer to the output breaking data, containing GRAPHEMEBREAK_BREAK or GRAPHEMEBREAK_NOBREAK. First element in output array is for the break behind the first character the pointer must point to an array with at least as many elements as there are characters in the string |
void set_graphemebreaks_utf32 | ( | const utf32_t * | s, |
size_t | len, | ||
const char * | lang, | ||
char * | brks | ||
) |
Sets the grapheme breaking information for a UTF-32 input string.
[in] | s | input UTF-32 string |
[in] | len | length of the input |
[in] | lang | language of the input (reserved for future use) |
[out] | brks | pointer to the output breaking data, containing GRAPHEMEBREAK_BREAK or GRAPHEMEBREAK_NOBREAK. First element in output array is for the break behind the first character the pointer must point to an array with at least as many elements as there are characters in the string |
void set_graphemebreaks_utf8 | ( | const utf8_t * | s, |
size_t | len, | ||
const char * | lang, | ||
char * | brks | ||
) |
Sets the grapheme breaking information for a UTF-8 input string.
[in] | s | input UTF-8 string |
[in] | len | length of the input |
[in] | lang | language of the input (reserved for future use) |
[out] | brks | pointer to the output breaking data, containing GRAPHEMEBREAK_BREAK or GRAPHEMEBREAK_NOBREAK. First element in output array is for the break behind the first character the pointer must point to an array with at least as many elements as there are characters in the string |