]>
Commit | Line | Data |
---|---|---|
f3c0d7a5 A |
1 | // © 2016 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
729e4ab9 A |
3 | /* |
4 | *************************************************************************** | |
2ca993e8 | 5 | * Copyright (C) 2008-2015, International Business Machines Corporation |
729e4ab9 A |
6 | * and others. All Rights Reserved. |
7 | *************************************************************************** | |
8 | * file name: uspoof_build.cpp | |
f3c0d7a5 | 9 | * encoding: UTF-8 |
729e4ab9 A |
10 | * tab size: 8 (not used) |
11 | * indentation:4 | |
12 | * | |
13 | * created on: 2008 Dec 8 | |
14 | * created by: Andy Heninger | |
15 | * | |
16 | * Unicode Spoof Detection Data Builder | |
17 | * Builder-related functions are kept in separate files so that applications not needing | |
18 | * the builder can more easily exclude them, typically by means of static linking. | |
19 | * | |
20 | * There are three relatively independent sets of Spoof data, | |
21 | * Confusables, | |
22 | * Whole Script Confusables | |
23 | * ID character extensions. | |
24 | * | |
25 | * The data tables for each are built separately, each from its own definitions | |
26 | */ | |
27 | ||
28 | #include "unicode/utypes.h" | |
29 | #include "unicode/uspoof.h" | |
30 | #include "unicode/unorm.h" | |
31 | #include "unicode/uregex.h" | |
32 | #include "unicode/ustring.h" | |
33 | #include "cmemory.h" | |
34 | #include "uspoof_impl.h" | |
35 | #include "uhash.h" | |
36 | #include "uvector.h" | |
37 | #include "uassert.h" | |
38 | #include "uarrsort.h" | |
39 | #include "uspoof_conf.h" | |
729e4ab9 A |
40 | |
41 | #if !UCONFIG_NO_NORMALIZATION | |
42 | ||
43 | U_NAMESPACE_USE | |
44 | ||
2ca993e8 A |
45 | // Defined in uspoof.cpp, initializes file-static variables. |
46 | U_CFUNC void uspoof_internalInitStatics(UErrorCode *status); | |
729e4ab9 A |
47 | |
48 | // The main data building function | |
49 | ||
50 | U_CAPI USpoofChecker * U_EXPORT2 | |
51 | uspoof_openFromSource(const char *confusables, int32_t confusablesLen, | |
f3c0d7a5 | 52 | const char* /*confusablesWholeScript*/, int32_t /*confusablesWholeScriptLen*/, |
729e4ab9 | 53 | int32_t *errorType, UParseError *pe, UErrorCode *status) { |
2ca993e8 | 54 | uspoof_internalInitStatics(status); |
729e4ab9 A |
55 | if (U_FAILURE(*status)) { |
56 | return NULL; | |
57 | } | |
58 | #if UCONFIG_NO_REGULAR_EXPRESSIONS | |
59 | *status = U_UNSUPPORTED_ERROR; | |
60 | return NULL; | |
61 | #else | |
62 | if (errorType!=NULL) { | |
63 | *errorType = 0; | |
64 | } | |
65 | if (pe != NULL) { | |
66 | pe->line = 0; | |
67 | pe->offset = 0; | |
68 | pe->preContext[0] = 0; | |
69 | pe->postContext[0] = 0; | |
70 | } | |
71 | ||
72 | // Set up a shell of a spoof detector, with empty data. | |
73 | SpoofData *newSpoofData = new SpoofData(*status); | |
0f5d89e8 A |
74 | |
75 | if (newSpoofData == NULL) { | |
76 | *status = U_MEMORY_ALLOCATION_ERROR; | |
77 | return NULL; | |
78 | } | |
79 | ||
80 | if (U_FAILURE(*status)) { | |
81 | delete newSpoofData; | |
82 | return NULL; | |
83 | } | |
729e4ab9 A |
84 | SpoofImpl *This = new SpoofImpl(newSpoofData, *status); |
85 | ||
0f5d89e8 A |
86 | if (This == NULL) { |
87 | *status = U_MEMORY_ALLOCATION_ERROR; | |
88 | delete newSpoofData; // explicit delete as the destructor for SpoofImpl won't be called. | |
89 | return NULL; | |
90 | } | |
91 | ||
92 | if (U_FAILURE(*status)) { | |
93 | delete This; // no delete for newSpoofData, as the SpoofImpl destructor will delete it. | |
94 | return NULL; | |
95 | } | |
96 | ||
729e4ab9 A |
97 | // Compile the binary data from the source (text) format. |
98 | ConfusabledataBuilder::buildConfusableData(This, confusables, confusablesLen, errorType, pe, *status); | |
729e4ab9 A |
99 | |
100 | if (U_FAILURE(*status)) { | |
101 | delete This; | |
102 | This = NULL; | |
103 | } | |
104 | return (USpoofChecker *)This; | |
105 | #endif // UCONFIG_NO_REGULAR_EXPRESSIONS | |
106 | } | |
107 | ||
108 | #endif |