-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsabir.h
65 lines (53 loc) · 2.19 KB
/
sabir.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
#ifndef SABIR_H
#define SABIR_H
/* This must be linked against the utf8proc library. Its source code is at:
* https://github.com/JuliaLang/utf8proc
*/
#define SB_VERSION "0.3"
#include <stddef.h>
enum {
SB_OK, /* No error. */
SB_EOPEN, /* Cannot open model file. */
SB_EMAGIC, /* Not a model file. */
SB_EMODEL, /* Invalid model file. */
SB_EIO, /* I/O error. */
SB_ENOMEM, /* Out of memory. */
};
/* Returns a string describing an error code. */
const char *sb_strerror(int err);
struct sabir;
/* Loads a language detection model from a file.
* On success, makes the provided structure pointer point to the loaded model,
* and returns SB_OK. Otherwise, makes it point to NULL, and returns an error
* code.
*/
int sb_load(struct sabir **, const char *path);
/* Deallocates a model. */
void sb_dealloc(struct sabir *);
/* Returns the list of languages supported by a model.
* The returned array is lexicographically sorted and NULL-terminated. It points
* to the model's internals, and should then not be used after the model is
* deallocated. If "nr" is not NULL, fills it with the number of supported
* languages.
*/
const char *const *sb_langs(struct sabir *, size_t *nr);
/* Detects the language of a UTF-8 string.
* The returned pointer points to this object's internals. It should then not
* be accessed after the model is deallocated.
* This always returns a value, whether or not the text to classify is written
* in one of the languages supported by this model.
*/
const char *sb_detect(struct sabir *, const void *text, size_t len);
/* Low-level classification interface, to be used when the input text is read
* from a stream. The calling procedure must be as follows:
* 1. Call sb_init() to (re)initialize the classifier state.
* 2. Call sb_feed() one or more times with pieces of the text to classify.
* It is assumed that these chunks are contiguous. They need not start or
* end on valid UTF-8 boundaries.
* 3. Call sb_finish() once enough data has been gathered to obtain the best
* matching language.
*/
void sb_init(struct sabir *);
void sb_feed(struct sabir *, const void *chunk, size_t len);
const char *sb_finish(struct sabir *);
#endif