libunibreak  4.0
wordbreak.h
Go to the documentation of this file.
1 /* vim: set expandtab tabstop=4 softtabstop=4 shiftwidth=4: */
2 
3 /*
4  * Word breaking in a Unicode sequence. Designed to be used in a
5  * generic text renderer.
6  *
7  * Copyright (C) 2013-2016 Tom Hacohen <tom at stosb dot com>
8  *
9  * This software is provided 'as-is', without any express or implied
10  * warranty. In no event will the author be held liable for any damages
11  * arising from the use of this software.
12  *
13  * Permission is granted to anyone to use this software for any purpose,
14  * including commercial applications, and to alter it and redistribute
15  * it freely, subject to the following restrictions:
16  *
17  * 1. The origin of this software must not be misrepresented; you must
18  * not claim that you wrote the original software. If you use this
19  * software in a product, an acknowledgement in the product
20  * documentation would be appreciated but is not required.
21  * 2. Altered source versions must be plainly marked as such, and must
22  * not be misrepresented as being the original software.
23  * 3. This notice may not be removed or altered from any source
24  * distribution.
25  *
26  * The main reference is Unicode Standard Annex 29 (UAX #29):
27  * <URL:http://unicode.org/reports/tr29>
28  *
29  * When this library was designed, this annex was at Revision 17, for
30  * Unicode 6.0.0:
31  * <URL:http://www.unicode.org/reports/tr29/tr29-17.html>
32  *
33  * This library has been updated according to Revision 29, for
34  * Unicode 9.0.0:
35  * <URL:http://www.unicode.org/reports/tr29/tr29-29.html>
36  *
37  * The Unicode Terms of Use are available at
38  * <URL:http://www.unicode.org/copyright.html>
39  */
40 
49 #ifndef WORDBREAK_H
50 #define WORDBREAK_H
51 
52 #include <stddef.h>
53 #include "unibreakbase.h"
54 
55 #ifdef __cplusplus
56 extern "C" {
57 #endif
58 
59 #define WORDBREAK_BREAK 0
60 #define WORDBREAK_NOBREAK 1
61 #define WORDBREAK_INSIDEACHAR 2
63 void init_wordbreak(void);
65  const utf8_t *s, size_t len, const char* lang, char *brks);
67  const utf16_t *s, size_t len, const char* lang, char *brks);
69  const utf32_t *s, size_t len, const char* lang, char *brks);
70 
71 #ifdef __cplusplus
72 }
73 #endif
74 
75 #endif
unsigned char utf8_t
Type for UTF-8 data points.
Definition: unibreakbase.h:47
unsigned short utf16_t
Type for UTF-16 data points.
Definition: unibreakbase.h:48
void set_wordbreaks_utf32(const utf32_t *s, size_t len, const char *lang, char *brks)
Sets the word breaking information for a UTF-32 input string.
Definition: wordbreak.c:546
const char * lang
Language name.
Definition: linebreakdef.h:131
unsigned int utf32_t
Type for UTF-32 data points.
Definition: unibreakbase.h:49
void set_wordbreaks_utf8(const utf8_t *s, size_t len, const char *lang, char *brks)
Sets the word breaking information for a UTF-8 input string.
Definition: wordbreak.c:506
void set_wordbreaks_utf16(const utf16_t *s, size_t len, const char *lang, char *brks)
Sets the word breaking information for a UTF-16 input string.
Definition: wordbreak.c:526
Header file for common definitions in the libunibreak library.