]>
Commit | Line | Data |
---|---|---|
51004dcb A |
1 | .\" Hey, Emacs! This is -*-nroff-*- you know... |
2 | .\" | |
3 | .\" gendict.1: manual page for the gendict utility | |
4 | .\" | |
f3c0d7a5 A |
5 | .\" Copyright (C) 2016 and later: Unicode, Inc. and others. |
6 | .\" License & terms of use: http://www.unicode.org/copyright.html | |
51004dcb A |
7 | .\" Copyright (C) 2012 International Business Machines Corporation and others |
8 | .\" | |
9 | .TH GENDICT 1 "1 June 2012" "ICU MANPAGE" "ICU @VERSION@ Manual" | |
10 | .SH NAME | |
11 | .B gendict | |
12 | \- Compiles word list into ICU string trie dictionary | |
13 | .SH SYNOPSIS | |
14 | .B gendict | |
15 | [ | |
16 | .BR "\fB\-\-uchars" | |
17 | | | |
18 | .BR "\fB\-\-bytes" | |
19 | .BI "\fB\-\-transform" " transform" | |
20 | ] | |
21 | [ | |
22 | .BR "\-h\fP, \fB\-?\fP, \fB\-\-help" | |
23 | ] | |
24 | [ | |
25 | .BR "\-V\fP, \fB\-\-version" | |
26 | ] | |
27 | [ | |
28 | .BR "\-c\fP, \fB\-\-copyright" | |
29 | ] | |
30 | [ | |
31 | .BR "\-v\fP, \fB\-\-verbose" | |
32 | ] | |
33 | [ | |
34 | .BI "\-i\fP, \fB\-\-icudatadir" " directory" | |
35 | ] | |
36 | .IR " input-file" | |
37 | .IR " output\-file" | |
38 | .SH DESCRIPTION | |
39 | .B gendict | |
40 | reads the word list from | |
41 | .I dictionary-file | |
42 | and creates a string trie dictionary file. Normally this data file has the | |
43 | .B .dict | |
44 | extension. | |
45 | .PP | |
46 | Words begin at the beginning of a line and are terminated by the first whitespace. | |
47 | Lines that begin with whitespace are ignored. | |
48 | .SH OPTIONS | |
49 | .TP | |
50 | .BR "\-h\fP, \fB\-?\fP, \fB\-\-help" | |
51 | Print help about usage and exit. | |
52 | .TP | |
53 | .BR "\-V\fP, \fB\-\-version" | |
54 | Print the version of | |
55 | .B gendict | |
56 | and exit. | |
57 | .TP | |
58 | .BR "\-c\fP, \fB\-\-copyright" | |
59 | Embeds the standard ICU copyright into the | |
60 | .IR output-file . | |
61 | .TP | |
62 | .BR "\-v\fP, \fB\-\-verbose" | |
63 | Display extra informative messages during execution. | |
64 | .TP | |
65 | .BI "\-i\fP, \fB\-\-icudatadir" " directory" | |
66 | Look for any necessary ICU data files in | |
67 | .IR directory . | |
68 | For example, the file | |
69 | .B pnames.icu | |
70 | must be located when ICU's data is not built as a shared library. | |
71 | The default ICU data directory is specified by the environment variable | |
72 | .BR ICU_DATA . | |
73 | Most configurations of ICU do not require this argument. | |
74 | .TP | |
75 | .BR "\fB\-\-uchars" | |
76 | Set the output trie type to UChar. Mutually exclusive with | |
77 | .BR --bytes. | |
78 | .TP | |
79 | .BR "\fB\-\-bytes" | |
80 | Set the output trie type to Bytes. Mutually exclusive with | |
81 | .BR --uchars. | |
82 | .TP | |
83 | .BR "\fB\-\-transform" | |
84 | Set the transform type. Should only be specified with | |
85 | .BR --bytes. | |
86 | Currently supported transforms are: | |
87 | .BR offset-<hex-number>, | |
88 | which specifies an offset to subtract from all input characters. | |
89 | It should be noted that the offset transform also maps U+200D | |
90 | to 0xFF and U+200C to 0xFE, in order to offer compatibility to | |
91 | languages that require these characters. | |
92 | A transform must be specified for a bytes trie, and when applied | |
93 | to the non-value characters in the | |
94 | .IR input-file | |
95 | must produce output between 0x00 and 0xFF. | |
96 | .TP | |
97 | .BI " input\-file" | |
98 | The source file to read. | |
99 | .TP | |
100 | .BI " output\-file" | |
101 | The file to write the output dictionary to. | |
102 | .SH CAVEATS | |
103 | The | |
104 | .IR input-file | |
105 | is assumed to be encoded in UTF-8. | |
106 | The integers in the | |
107 | .IR input-file | |
108 | that are used as values must be made up of ASCII digits. They | |
109 | may be specified either in hex, by using a 0x prefix, or in | |
110 | decimal. | |
111 | Either | |
112 | .BI --bytes | |
113 | or | |
114 | .BI --uchars | |
115 | must be specified. | |
116 | .SH ENVIRONMENT | |
117 | .TP 10 | |
118 | .B ICU_DATA | |
119 | Specifies the directory containing ICU data. Defaults to | |
120 | .BR @thepkgicudatadir@/@PACKAGE@/@VERSION@/ . | |
121 | Some tools in ICU depend on the presence of the trailing slash. It is thus | |
122 | important to make sure that it is present if | |
123 | .B ICU_DATA | |
124 | is set. | |
125 | .SH AUTHORS | |
126 | Maxime Serrano | |
127 | .SH VERSION | |
128 | 1.0 | |
129 | .SH COPYRIGHT | |
130 | Copyright (C) 2012 International Business Machines Corporation and others | |
131 | .SH SEE ALSO | |
132 | .BR http://www.icu-project.org/userguide/boundaryAnalysis.html | |
133 |