]>
Commit | Line | Data |
---|---|---|
729e4ab9 A |
1 | /* |
2 | *************************************************************************** | |
2ca993e8 | 3 | * Copyright (C) 2008-2015, International Business Machines Corporation |
729e4ab9 A |
4 | * and others. All Rights Reserved. |
5 | *************************************************************************** | |
6 | * file name: uspoof_build.cpp | |
7 | * encoding: US-ASCII | |
8 | * tab size: 8 (not used) | |
9 | * indentation:4 | |
10 | * | |
11 | * created on: 2008 Dec 8 | |
12 | * created by: Andy Heninger | |
13 | * | |
14 | * Unicode Spoof Detection Data Builder | |
15 | * Builder-related functions are kept in separate files so that applications not needing | |
16 | * the builder can more easily exclude them, typically by means of static linking. | |
17 | * | |
18 | * There are three relatively independent sets of Spoof data, | |
19 | * Confusables, | |
20 | * Whole Script Confusables | |
21 | * ID character extensions. | |
22 | * | |
23 | * The data tables for each are built separately, each from its own definitions | |
24 | */ | |
25 | ||
26 | #include "unicode/utypes.h" | |
27 | #include "unicode/uspoof.h" | |
28 | #include "unicode/unorm.h" | |
29 | #include "unicode/uregex.h" | |
30 | #include "unicode/ustring.h" | |
31 | #include "cmemory.h" | |
32 | #include "uspoof_impl.h" | |
33 | #include "uhash.h" | |
34 | #include "uvector.h" | |
35 | #include "uassert.h" | |
36 | #include "uarrsort.h" | |
37 | #include "uspoof_conf.h" | |
38 | #include "uspoof_wsconf.h" | |
39 | ||
40 | #if !UCONFIG_NO_NORMALIZATION | |
41 | ||
42 | U_NAMESPACE_USE | |
43 | ||
2ca993e8 A |
44 | // Defined in uspoof.cpp, initializes file-static variables. |
45 | U_CFUNC void uspoof_internalInitStatics(UErrorCode *status); | |
729e4ab9 A |
46 | |
47 | // The main data building function | |
48 | ||
49 | U_CAPI USpoofChecker * U_EXPORT2 | |
50 | uspoof_openFromSource(const char *confusables, int32_t confusablesLen, | |
51 | const char *confusablesWholeScript, int32_t confusablesWholeScriptLen, | |
52 | int32_t *errorType, UParseError *pe, UErrorCode *status) { | |
2ca993e8 | 53 | uspoof_internalInitStatics(status); |
729e4ab9 A |
54 | if (U_FAILURE(*status)) { |
55 | return NULL; | |
56 | } | |
57 | #if UCONFIG_NO_REGULAR_EXPRESSIONS | |
58 | *status = U_UNSUPPORTED_ERROR; | |
59 | return NULL; | |
60 | #else | |
61 | if (errorType!=NULL) { | |
62 | *errorType = 0; | |
63 | } | |
64 | if (pe != NULL) { | |
65 | pe->line = 0; | |
66 | pe->offset = 0; | |
67 | pe->preContext[0] = 0; | |
68 | pe->postContext[0] = 0; | |
69 | } | |
70 | ||
71 | // Set up a shell of a spoof detector, with empty data. | |
72 | SpoofData *newSpoofData = new SpoofData(*status); | |
73 | SpoofImpl *This = new SpoofImpl(newSpoofData, *status); | |
74 | ||
75 | // Compile the binary data from the source (text) format. | |
76 | ConfusabledataBuilder::buildConfusableData(This, confusables, confusablesLen, errorType, pe, *status); | |
77 | buildWSConfusableData(This, confusablesWholeScript, confusablesWholeScriptLen, pe, *status); | |
78 | ||
79 | if (U_FAILURE(*status)) { | |
80 | delete This; | |
81 | This = NULL; | |
82 | } | |
83 | return (USpoofChecker *)This; | |
84 | #endif // UCONFIG_NO_REGULAR_EXPRESSIONS | |
85 | } | |
86 | ||
87 | #endif |