]>
Commit | Line | Data |
---|---|---|
3d1f044b A |
1 | // © 2018 and later: Unicode, Inc. and others. |
2 | // License & terms of use: http://www.unicode.org/copyright.html | |
3 | ||
4 | #include <iostream> | |
5 | #include <stack> | |
6 | ||
7 | #include "filterrb.h" | |
8 | #include "errmsg.h" | |
9 | ||
10 | ||
11 | const char* PathFilter::kEInclusionNames[] = { | |
12 | "INCLUDE", | |
13 | "PARTIAL", | |
14 | "EXCLUDE" | |
15 | }; | |
16 | ||
17 | ||
18 | ResKeyPath::ResKeyPath() {} | |
19 | ||
20 | ResKeyPath::ResKeyPath(const std::string& path, UErrorCode& status) { | |
21 | if (path.empty() || path[0] != '/') { | |
22 | std::cerr << "genrb error: path must start with /: " << path << std::endl; | |
23 | status = U_PARSE_ERROR; | |
24 | return; | |
25 | } | |
340931cb A |
26 | if (path.length() == 1) { |
27 | return; | |
28 | } | |
3d1f044b A |
29 | size_t i; |
30 | size_t j = 0; | |
31 | while (true) { | |
32 | i = j + 1; | |
33 | j = path.find('/', i); | |
34 | std::string key = path.substr(i, j - i); | |
35 | if (key.empty()) { | |
36 | std::cerr << "genrb error: empty subpaths and trailing slashes are not allowed: " << path << std::endl; | |
37 | status = U_PARSE_ERROR; | |
38 | return; | |
39 | } | |
40 | push(key); | |
41 | if (j == std::string::npos) { | |
42 | break; | |
43 | } | |
44 | } | |
45 | } | |
46 | ||
47 | void ResKeyPath::push(const std::string& key) { | |
48 | fPath.push_back(key); | |
49 | } | |
50 | ||
51 | void ResKeyPath::pop() { | |
52 | fPath.pop_back(); | |
53 | } | |
54 | ||
55 | const std::list<std::string>& ResKeyPath::pieces() const { | |
56 | return fPath; | |
57 | } | |
58 | ||
59 | std::ostream& operator<<(std::ostream& out, const ResKeyPath& value) { | |
60 | if (value.pieces().empty()) { | |
61 | out << "/"; | |
62 | } else for (auto& key : value.pieces()) { | |
63 | out << "/" << key; | |
64 | } | |
65 | return out; | |
66 | } | |
67 | ||
68 | ||
69 | PathFilter::~PathFilter() = default; | |
70 | ||
71 | ||
72 | void SimpleRuleBasedPathFilter::addRule(const std::string& ruleLine, UErrorCode& status) { | |
73 | if (ruleLine.empty()) { | |
74 | std::cerr << "genrb error: empty filter rules are not allowed" << std::endl; | |
75 | status = U_PARSE_ERROR; | |
76 | return; | |
77 | } | |
78 | bool inclusionRule = false; | |
79 | if (ruleLine[0] == '+') { | |
80 | inclusionRule = true; | |
81 | } else if (ruleLine[0] != '-') { | |
82 | std::cerr << "genrb error: rules must start with + or -: " << ruleLine << std::endl; | |
83 | status = U_PARSE_ERROR; | |
84 | return; | |
85 | } | |
86 | ResKeyPath path(ruleLine.substr(1), status); | |
87 | addRule(path, inclusionRule, status); | |
88 | } | |
89 | ||
90 | void SimpleRuleBasedPathFilter::addRule(const ResKeyPath& path, bool inclusionRule, UErrorCode& status) { | |
91 | if (U_FAILURE(status)) { | |
92 | return; | |
93 | } | |
94 | fRoot.applyRule(path, path.pieces().begin(), inclusionRule, status); | |
95 | } | |
96 | ||
97 | PathFilter::EInclusion SimpleRuleBasedPathFilter::match(const ResKeyPath& path) const { | |
98 | const Tree* node = &fRoot; | |
99 | ||
100 | // defaultResult "bubbles up" the nearest "definite" inclusion/exclusion rule | |
101 | EInclusion defaultResult = INCLUDE; | |
102 | if (node->fIncluded != PARTIAL) { | |
103 | // rules handled here: "+/" and "-/" | |
104 | defaultResult = node->fIncluded; | |
105 | } | |
106 | ||
107 | // isLeaf is whether the filter tree can provide no additional information | |
108 | // even if additional subpaths are added to the given key | |
109 | bool isLeaf = false; | |
110 | ||
111 | for (auto& key : path.pieces()) { | |
112 | auto child = node->fChildren.find(key); | |
113 | // Leaf case 1: input path descends outside the filter tree | |
114 | if (child == node->fChildren.end()) { | |
115 | if (node->fWildcard) { | |
116 | // A wildcard pattern is present; continue checking | |
117 | node = node->fWildcard.get(); | |
118 | } else { | |
119 | isLeaf = true; | |
120 | break; | |
121 | } | |
122 | } else { | |
123 | node = &child->second; | |
124 | } | |
125 | if (node->fIncluded != PARTIAL) { | |
126 | defaultResult = node->fIncluded; | |
127 | } | |
128 | } | |
129 | ||
130 | // Leaf case 2: input path exactly matches a filter leaf | |
131 | if (node->isLeaf()) { | |
132 | isLeaf = true; | |
133 | } | |
134 | ||
135 | // Always return PARTIAL if we are not at a leaf | |
136 | if (!isLeaf) { | |
137 | return PARTIAL; | |
138 | } | |
139 | ||
140 | // If leaf node is PARTIAL, return the default | |
141 | if (node->fIncluded == PARTIAL) { | |
142 | return defaultResult; | |
143 | } | |
144 | ||
145 | return node->fIncluded; | |
146 | } | |
147 | ||
148 | ||
149 | SimpleRuleBasedPathFilter::Tree::Tree(const Tree& other) | |
150 | : fIncluded(other.fIncluded), fChildren(other.fChildren) { | |
151 | // Note: can't use the default copy assignment because of the std::unique_ptr | |
152 | if (other.fWildcard) { | |
153 | fWildcard.reset(new Tree(*other.fWildcard)); | |
154 | } | |
155 | } | |
156 | ||
157 | bool SimpleRuleBasedPathFilter::Tree::isLeaf() const { | |
158 | return fChildren.empty() && !fWildcard; | |
159 | } | |
160 | ||
161 | void SimpleRuleBasedPathFilter::Tree::applyRule( | |
162 | const ResKeyPath& path, | |
163 | std::list<std::string>::const_iterator it, | |
164 | bool inclusionRule, | |
165 | UErrorCode& status) { | |
166 | ||
167 | // Base Case | |
168 | if (it == path.pieces().end()) { | |
169 | if (isVerbose() && (fIncluded != PARTIAL || !isLeaf())) { | |
170 | std::cout << "genrb info: rule on path " << path | |
171 | << " overrides previous rules" << std::endl; | |
172 | } | |
173 | fIncluded = inclusionRule ? INCLUDE : EXCLUDE; | |
174 | fChildren.clear(); | |
175 | fWildcard.reset(); | |
176 | return; | |
177 | } | |
178 | ||
179 | // Recursive Step | |
180 | auto& key = *it; | |
181 | if (key == "*") { | |
182 | // Case 1: Wildcard | |
183 | if (!fWildcard) { | |
184 | fWildcard.reset(new Tree()); | |
185 | } | |
186 | // Apply the rule to fWildcard and also to all existing children. | |
187 | it++; | |
188 | fWildcard->applyRule(path, it, inclusionRule, status); | |
189 | for (auto& child : fChildren) { | |
190 | child.second.applyRule(path, it, inclusionRule, status); | |
191 | } | |
192 | it--; | |
193 | ||
194 | } else { | |
195 | // Case 2: Normal Key | |
196 | auto search = fChildren.find(key); | |
197 | if (search == fChildren.end()) { | |
198 | if (fWildcard) { | |
199 | // Deep-copy the existing wildcard tree into the new key | |
200 | search = fChildren.emplace(key, Tree(*fWildcard)).first; | |
201 | } else { | |
202 | search = fChildren.emplace(key, Tree()).first; | |
203 | } | |
204 | } | |
205 | it++; | |
206 | search->second.applyRule(path, it, inclusionRule, status); | |
207 | it--; | |
208 | } | |
209 | } | |
210 | ||
211 | void SimpleRuleBasedPathFilter::Tree::print(std::ostream& out, int32_t indent) const { | |
212 | for (int32_t i=0; i<indent; i++) out << "\t"; | |
213 | out << "included: " << kEInclusionNames[fIncluded] << std::endl; | |
214 | for (auto& child : fChildren) { | |
215 | for (int32_t i=0; i<indent; i++) out << "\t"; | |
216 | out << child.first << ": {" << std::endl; | |
217 | child.second.print(out, indent + 1); | |
218 | for (int32_t i=0; i<indent; i++) out << "\t"; | |
219 | out << "}" << std::endl; | |
220 | } | |
221 | if (fWildcard) { | |
222 | for (int32_t i=0; i<indent; i++) out << "\t"; | |
223 | out << "* {" << std::endl; | |
224 | fWildcard->print(out, indent + 1); | |
225 | for (int32_t i=0; i<indent; i++) out << "\t"; | |
226 | out << "}" << std::endl; | |
227 | } | |
228 | } | |
229 | ||
230 | void SimpleRuleBasedPathFilter::print(std::ostream& out) const { | |
231 | out << "SimpleRuleBasedPathFilter {" << std::endl; | |
232 | fRoot.print(out, 1); | |
233 | out << "}" << std::endl; | |
234 | } | |
235 | ||
236 | std::ostream& operator<<(std::ostream& out, const SimpleRuleBasedPathFilter& value) { | |
237 | value.print(out); | |
238 | return out; | |
239 | } |