X-Git-Url: https://git.saurik.com/apple/icu.git/blobdiff_plain/a01113dcd0f39d5da295ef82785beff9ed86fe38..340931cb2e044a2141d11567dd0f782524e32994:/icuSources/python/icutools/databuilder/comment_stripper.py

diff --git a/icuSources/python/icutools/databuilder/comment_stripper.py b/icuSources/python/icutools/databuilder/comment_stripper.py
new file mode 100644
index 00000000..4001f2f6
--- /dev/null
+++ b/icuSources/python/icutools/databuilder/comment_stripper.py
@@ -0,0 +1,51 @@
+# Copyright (C) 2018 and later: Unicode, Inc. and others.
+# License & terms of use: http://www.unicode.org/copyright.html
+
+import io
+
+class CommentStripper(object):
+    """Removes lines starting with "//" from a file stream."""
+
+    def __init__(self, f):
+        self.f = f
+        self.state = 0
+
+    def read(self, size=-1):
+        bytes = self.f.read(size)
+        # TODO: Do we need to read more bytes if comments were stripped
+        # in order to obey the size request?
+        return "".join(self._strip_comments(bytes))
+
+    def _strip_comments(self, bytes):
+        for byte in bytes:
+            if self.state == 0:
+                # state 0: start of a line
+                if byte == "/":
+                    self.state = 1
+                elif byte == "\n":
+                    self.state = 0
+                    yield byte
+                else:
+                    self.state = 2
+                    yield byte
+            elif self.state == 1:
+                # state 1: read a single '/'
+                if byte == "/":
+                    self.state = 3
+                elif byte == "\n":
+                    self.state = 0
+                    yield "/"  # the one that was skipped
+                    yield "\n"
+                else:
+                    self.state = 2
+                    yield "/"  # the one that was skipped
+                    yield byte
+            elif self.state == 2:
+                # state 2: middle of a line, no comment
+                if byte == "\n":
+                    self.state = 0
+                yield byte
+            elif self.state == 3:
+                # state 3: inside a comment
+                if byte == "\n":
+                    self.state = 0