]> git.saurik.com Git - apple/icu.git/blob - icuSources/tools/genrb/filterrb.cpp
ICU-64232.0.1.tar.gz
[apple/icu.git] / icuSources / tools / genrb / filterrb.cpp
1 // © 2018 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3
4 #include <iostream>
5 #include <stack>
6
7 #include "filterrb.h"
8 #include "errmsg.h"
9
10
11 const char* PathFilter::kEInclusionNames[] = {
12 "INCLUDE",
13 "PARTIAL",
14 "EXCLUDE"
15 };
16
17
18 ResKeyPath::ResKeyPath() {}
19
20 ResKeyPath::ResKeyPath(const std::string& path, UErrorCode& status) {
21 if (path.empty() || path[0] != '/') {
22 std::cerr << "genrb error: path must start with /: " << path << std::endl;
23 status = U_PARSE_ERROR;
24 return;
25 }
26 size_t i;
27 size_t j = 0;
28 while (true) {
29 i = j + 1;
30 j = path.find('/', i);
31 std::string key = path.substr(i, j - i);
32 if (key.empty()) {
33 std::cerr << "genrb error: empty subpaths and trailing slashes are not allowed: " << path << std::endl;
34 status = U_PARSE_ERROR;
35 return;
36 }
37 push(key);
38 if (j == std::string::npos) {
39 break;
40 }
41 }
42 }
43
44 void ResKeyPath::push(const std::string& key) {
45 fPath.push_back(key);
46 }
47
48 void ResKeyPath::pop() {
49 fPath.pop_back();
50 }
51
52 const std::list<std::string>& ResKeyPath::pieces() const {
53 return fPath;
54 }
55
56 std::ostream& operator<<(std::ostream& out, const ResKeyPath& value) {
57 if (value.pieces().empty()) {
58 out << "/";
59 } else for (auto& key : value.pieces()) {
60 out << "/" << key;
61 }
62 return out;
63 }
64
65
66 PathFilter::~PathFilter() = default;
67
68
69 void SimpleRuleBasedPathFilter::addRule(const std::string& ruleLine, UErrorCode& status) {
70 if (ruleLine.empty()) {
71 std::cerr << "genrb error: empty filter rules are not allowed" << std::endl;
72 status = U_PARSE_ERROR;
73 return;
74 }
75 bool inclusionRule = false;
76 if (ruleLine[0] == '+') {
77 inclusionRule = true;
78 } else if (ruleLine[0] != '-') {
79 std::cerr << "genrb error: rules must start with + or -: " << ruleLine << std::endl;
80 status = U_PARSE_ERROR;
81 return;
82 }
83 ResKeyPath path(ruleLine.substr(1), status);
84 addRule(path, inclusionRule, status);
85 }
86
87 void SimpleRuleBasedPathFilter::addRule(const ResKeyPath& path, bool inclusionRule, UErrorCode& status) {
88 if (U_FAILURE(status)) {
89 return;
90 }
91 fRoot.applyRule(path, path.pieces().begin(), inclusionRule, status);
92 }
93
94 PathFilter::EInclusion SimpleRuleBasedPathFilter::match(const ResKeyPath& path) const {
95 const Tree* node = &fRoot;
96
97 // defaultResult "bubbles up" the nearest "definite" inclusion/exclusion rule
98 EInclusion defaultResult = INCLUDE;
99 if (node->fIncluded != PARTIAL) {
100 // rules handled here: "+/" and "-/"
101 defaultResult = node->fIncluded;
102 }
103
104 // isLeaf is whether the filter tree can provide no additional information
105 // even if additional subpaths are added to the given key
106 bool isLeaf = false;
107
108 for (auto& key : path.pieces()) {
109 auto child = node->fChildren.find(key);
110 // Leaf case 1: input path descends outside the filter tree
111 if (child == node->fChildren.end()) {
112 if (node->fWildcard) {
113 // A wildcard pattern is present; continue checking
114 node = node->fWildcard.get();
115 } else {
116 isLeaf = true;
117 break;
118 }
119 } else {
120 node = &child->second;
121 }
122 if (node->fIncluded != PARTIAL) {
123 defaultResult = node->fIncluded;
124 }
125 }
126
127 // Leaf case 2: input path exactly matches a filter leaf
128 if (node->isLeaf()) {
129 isLeaf = true;
130 }
131
132 // Always return PARTIAL if we are not at a leaf
133 if (!isLeaf) {
134 return PARTIAL;
135 }
136
137 // If leaf node is PARTIAL, return the default
138 if (node->fIncluded == PARTIAL) {
139 return defaultResult;
140 }
141
142 return node->fIncluded;
143 }
144
145
146 SimpleRuleBasedPathFilter::Tree::Tree(const Tree& other)
147 : fIncluded(other.fIncluded), fChildren(other.fChildren) {
148 // Note: can't use the default copy assignment because of the std::unique_ptr
149 if (other.fWildcard) {
150 fWildcard.reset(new Tree(*other.fWildcard));
151 }
152 }
153
154 bool SimpleRuleBasedPathFilter::Tree::isLeaf() const {
155 return fChildren.empty() && !fWildcard;
156 }
157
158 void SimpleRuleBasedPathFilter::Tree::applyRule(
159 const ResKeyPath& path,
160 std::list<std::string>::const_iterator it,
161 bool inclusionRule,
162 UErrorCode& status) {
163
164 // Base Case
165 if (it == path.pieces().end()) {
166 if (isVerbose() && (fIncluded != PARTIAL || !isLeaf())) {
167 std::cout << "genrb info: rule on path " << path
168 << " overrides previous rules" << std::endl;
169 }
170 fIncluded = inclusionRule ? INCLUDE : EXCLUDE;
171 fChildren.clear();
172 fWildcard.reset();
173 return;
174 }
175
176 // Recursive Step
177 auto& key = *it;
178 if (key == "*") {
179 // Case 1: Wildcard
180 if (!fWildcard) {
181 fWildcard.reset(new Tree());
182 }
183 // Apply the rule to fWildcard and also to all existing children.
184 it++;
185 fWildcard->applyRule(path, it, inclusionRule, status);
186 for (auto& child : fChildren) {
187 child.second.applyRule(path, it, inclusionRule, status);
188 }
189 it--;
190
191 } else {
192 // Case 2: Normal Key
193 auto search = fChildren.find(key);
194 if (search == fChildren.end()) {
195 if (fWildcard) {
196 // Deep-copy the existing wildcard tree into the new key
197 search = fChildren.emplace(key, Tree(*fWildcard)).first;
198 } else {
199 search = fChildren.emplace(key, Tree()).first;
200 }
201 }
202 it++;
203 search->second.applyRule(path, it, inclusionRule, status);
204 it--;
205 }
206 }
207
208 void SimpleRuleBasedPathFilter::Tree::print(std::ostream& out, int32_t indent) const {
209 for (int32_t i=0; i<indent; i++) out << "\t";
210 out << "included: " << kEInclusionNames[fIncluded] << std::endl;
211 for (auto& child : fChildren) {
212 for (int32_t i=0; i<indent; i++) out << "\t";
213 out << child.first << ": {" << std::endl;
214 child.second.print(out, indent + 1);
215 for (int32_t i=0; i<indent; i++) out << "\t";
216 out << "}" << std::endl;
217 }
218 if (fWildcard) {
219 for (int32_t i=0; i<indent; i++) out << "\t";
220 out << "* {" << std::endl;
221 fWildcard->print(out, indent + 1);
222 for (int32_t i=0; i<indent; i++) out << "\t";
223 out << "}" << std::endl;
224 }
225 }
226
227 void SimpleRuleBasedPathFilter::print(std::ostream& out) const {
228 out << "SimpleRuleBasedPathFilter {" << std::endl;
229 fRoot.print(out, 1);
230 out << "}" << std::endl;
231 }
232
233 std::ostream& operator<<(std::ostream& out, const SimpleRuleBasedPathFilter& value) {
234 value.print(out);
235 return out;
236 }