1 #ifndef INC_TokenStreamRewriteEngine_hpp__
2 #define INC_TokenStreamRewriteEngine_hpp__
4 /* ANTLR Translator Generator
5 * Project led by Terence Parr at http://www.jGuru.com
6 * Software rights: http://www.antlr.org/license.html
19 #include <antlr/config.hpp>
21 #include <antlr/TokenStream.hpp>
22 #include <antlr/TokenWithIndex.hpp>
23 #include <antlr/BitSet.hpp>
25 #ifdef ANTLR_CXX_SUPPORTS_NAMESPACE
29 /** This token stream tracks the *entire* token stream coming from
30 * a lexer, but does not pass on the whitespace (or whatever else
31 * you want to discard) to the parser.
33 * This class can then be asked for the ith token in the input stream.
34 * Useful for dumping out the input stream exactly after doing some
35 * augmentation or other manipulations. Tokens are index from 0..n-1
37 * You can insert stuff, replace, and delete chunks. Note that the
38 * operations are done lazily--only if you convert the buffer to a
39 * String. This is very efficient because you are not moving data around
40 * all the time. As the buffer of tokens is converted to strings, the
41 * toString() method(s) check to see if there is an operation at the
42 * current index. If so, the operation is done and then normal String
43 * rendering continues on the buffer. This is like having multiple Turing
44 * machine instruction streams (programs) operating on a single input tape. :)
46 * Since the operations are done lazily at toString-time, operations do not
47 * screw up the token index values. That is, an insert operation at token
48 * index i does not change the index values for tokens i+1..n-1.
50 * Because operations never actually alter the buffer, you may always get
51 * the original token stream back without undoing anything. Since
52 * the instructions are queued up, you can easily simulate transactions and
53 * roll back any changes if there is an error just by removing instructions.
56 * TokenStreamRewriteEngine rewriteEngine =
57 * new TokenStreamRewriteEngine(lexer);
58 * JavaRecognizer parser = new JavaRecognizer(rewriteEngine);
60 * rewriteEngine.insertAfter("pass1", t, "foobar");}
61 * rewriteEngine.insertAfter("pass2", u, "start");}
62 * System.out.println(rewriteEngine.toString("pass1"));
63 * System.out.println(rewriteEngine.toString("pass2"));
65 * You can also have multiple "instruction streams" and get multiple
66 * rewrites from a single pass over the input. Just name the instruction
67 * streams and use that name again when printing the buffer. This could be
68 * useful for generating a C file and also its header file--all from the
71 * If you don't use named rewrite streams, a "default" stream is used.
73 * Terence Parr, parrt@cs.usfca.edu
74 * University of San Francisco
77 class TokenStreamRewriteEngine : public TokenStream
80 typedef ANTLR_USE_NAMESPACE(std)vector<antlr::RefTokenWithIndex> token_list;
81 static const char* DEFAULT_PROGRAM_NAME;
82 #ifndef NO_STATIC_CONSTS
83 static const size_t MIN_TOKEN_INDEX;
84 static const int PROGRAM_INIT_SIZE;
88 PROGRAM_INIT_SIZE = 100
92 struct tokenToStream {
93 tokenToStream( ANTLR_USE_NAMESPACE(std)ostream& o ) : out(o) {}
94 template <typename T> void operator() ( const T& t ) {
97 ANTLR_USE_NAMESPACE(std)ostream& out;
100 class RewriteOperation {
102 RewriteOperation( size_t idx, const ANTLR_USE_NAMESPACE(std)string& txt )
103 : index(idx), text(txt)
107 virtual ~RewriteOperation()
110 /** Execute the rewrite operation by possibly adding to the buffer.
111 * Return the index of the next token to operate on.
113 virtual size_t execute( ANTLR_USE_NAMESPACE(std)ostream& /* out */ ) {
116 virtual size_t getIndex() const {
119 virtual const char* type() const {
120 return "RewriteOperation";
124 ANTLR_USE_NAMESPACE(std)string text;
127 struct executeOperation {
128 ANTLR_USE_NAMESPACE(std)ostream& out;
129 executeOperation( ANTLR_USE_NAMESPACE(std)ostream& s ) : out(s) {}
130 void operator () ( RewriteOperation* t ) {
135 /// list of rewrite operations
136 typedef ANTLR_USE_NAMESPACE(std)list<RewriteOperation*> operation_list;
137 /// map program name to <program counter,program> tuple
138 typedef ANTLR_USE_NAMESPACE(std)map<ANTLR_USE_NAMESPACE(std)string,operation_list> program_map;
140 class InsertBeforeOp : public RewriteOperation
143 InsertBeforeOp( size_t index, const ANTLR_USE_NAMESPACE(std)string& text )
144 : RewriteOperation(index, text)
147 virtual ~InsertBeforeOp() {}
148 virtual size_t execute( ANTLR_USE_NAMESPACE(std)ostream& out )
153 virtual const char* type() const {
154 return "InsertBeforeOp";
158 class ReplaceOp : public RewriteOperation
161 ReplaceOp(size_t from, size_t to, ANTLR_USE_NAMESPACE(std)string text)
162 : RewriteOperation(from,text)
166 virtual ~ReplaceOp() {}
167 virtual size_t execute( ANTLR_USE_NAMESPACE(std)ostream& out ) {
171 virtual const char* type() const {
178 class DeleteOp : public ReplaceOp {
180 DeleteOp(size_t from, size_t to)
181 : ReplaceOp(from,to,"")
184 virtual const char* type() const {
189 TokenStreamRewriteEngine(TokenStream& upstream);
191 TokenStreamRewriteEngine(TokenStream& upstream, size_t initialSize);
193 RefToken nextToken( void );
195 void rollback(size_t instructionIndex) {
196 rollback(DEFAULT_PROGRAM_NAME, instructionIndex);
199 /** Rollback the instruction stream for a program so that
200 * the indicated instruction (via instructionIndex) is no
201 * longer in the stream. UNTESTED!
203 void rollback(const ANTLR_USE_NAMESPACE(std)string& programName,
204 size_t instructionIndex );
206 void deleteProgram() {
207 deleteProgram(DEFAULT_PROGRAM_NAME);
210 /** Reset the program so that no instructions exist */
211 void deleteProgram(const ANTLR_USE_NAMESPACE(std)string& programName) {
212 rollback(programName, MIN_TOKEN_INDEX);
215 void insertAfter( RefTokenWithIndex t,
216 const ANTLR_USE_NAMESPACE(std)string& text )
218 insertAfter(DEFAULT_PROGRAM_NAME, t, text);
221 void insertAfter(size_t index, const ANTLR_USE_NAMESPACE(std)string& text) {
222 insertAfter(DEFAULT_PROGRAM_NAME, index, text);
225 void insertAfter( const ANTLR_USE_NAMESPACE(std)string& programName,
227 const ANTLR_USE_NAMESPACE(std)string& text )
229 insertAfter(programName, t->getIndex(), text);
232 void insertAfter( const ANTLR_USE_NAMESPACE(std)string& programName,
234 const ANTLR_USE_NAMESPACE(std)string& text )
236 // to insert after, just insert before next index (even if past end)
237 insertBefore(programName,index+1, text);
240 void insertBefore( RefTokenWithIndex t,
241 const ANTLR_USE_NAMESPACE(std)string& text )
243 // std::cout << "insertBefore index " << t->getIndex() << " " << text << std::endl;
244 insertBefore(DEFAULT_PROGRAM_NAME, t, text);
247 void insertBefore(size_t index, const ANTLR_USE_NAMESPACE(std)string& text) {
248 insertBefore(DEFAULT_PROGRAM_NAME, index, text);
251 void insertBefore( const ANTLR_USE_NAMESPACE(std)string& programName,
253 const ANTLR_USE_NAMESPACE(std)string& text )
255 insertBefore(programName, t->getIndex(), text);
258 void insertBefore( const ANTLR_USE_NAMESPACE(std)string& programName,
260 const ANTLR_USE_NAMESPACE(std)string& text )
262 addToSortedRewriteList(programName, new InsertBeforeOp(index,text));
265 void replace(size_t index, const ANTLR_USE_NAMESPACE(std)string& text)
267 replace(DEFAULT_PROGRAM_NAME, index, index, text);
270 void replace( size_t from, size_t to,
271 const ANTLR_USE_NAMESPACE(std)string& text)
273 replace(DEFAULT_PROGRAM_NAME, from, to, text);
276 void replace( RefTokenWithIndex indexT,
277 const ANTLR_USE_NAMESPACE(std)string& text )
279 replace(DEFAULT_PROGRAM_NAME, indexT->getIndex(), indexT->getIndex(), text);
282 void replace( RefTokenWithIndex from,
283 RefTokenWithIndex to,
284 const ANTLR_USE_NAMESPACE(std)string& text )
286 replace(DEFAULT_PROGRAM_NAME, from, to, text);
289 void replace(const ANTLR_USE_NAMESPACE(std)string& programName,
290 size_t from, size_t to,
291 const ANTLR_USE_NAMESPACE(std)string& text )
293 addToSortedRewriteList(programName,new ReplaceOp(from, to, text));
296 void replace( const ANTLR_USE_NAMESPACE(std)string& programName,
297 RefTokenWithIndex from,
298 RefTokenWithIndex to,
299 const ANTLR_USE_NAMESPACE(std)string& text )
307 void remove(size_t index) {
308 remove(DEFAULT_PROGRAM_NAME, index, index);
311 void remove(size_t from, size_t to) {
312 remove(DEFAULT_PROGRAM_NAME, from, to);
315 void remove(RefTokenWithIndex indexT) {
316 remove(DEFAULT_PROGRAM_NAME, indexT, indexT);
319 void remove(RefTokenWithIndex from, RefTokenWithIndex to) {
320 remove(DEFAULT_PROGRAM_NAME, from, to);
323 void remove( const ANTLR_USE_NAMESPACE(std)string& programName,
324 size_t from, size_t to)
326 replace(programName,from,to,"");
329 void remove( const ANTLR_USE_NAMESPACE(std)string& programName,
330 RefTokenWithIndex from, RefTokenWithIndex to )
332 replace(programName,from,to,"");
335 void discard(int ttype) {
336 discardMask.add(ttype);
339 RefToken getToken( size_t i )
341 return RefToken(tokens.at(i));
344 size_t getTokenStreamSize() const {
345 return tokens.size();
348 void originalToStream( ANTLR_USE_NAMESPACE(std)ostream& out ) const {
349 ANTLR_USE_NAMESPACE(std)for_each( tokens.begin(), tokens.end(), tokenToStream(out) );
352 void originalToStream( ANTLR_USE_NAMESPACE(std)ostream& out,
353 size_t start, size_t end ) const;
355 void toStream( ANTLR_USE_NAMESPACE(std)ostream& out ) const {
356 toStream( out, MIN_TOKEN_INDEX, getTokenStreamSize());
359 void toStream( ANTLR_USE_NAMESPACE(std)ostream& out,
360 const ANTLR_USE_NAMESPACE(std)string& programName ) const
362 toStream( out, programName, MIN_TOKEN_INDEX, getTokenStreamSize());
365 void toStream( ANTLR_USE_NAMESPACE(std)ostream& out,
366 size_t start, size_t end ) const
368 toStream(out, DEFAULT_PROGRAM_NAME, start, end);
371 void toStream( ANTLR_USE_NAMESPACE(std)ostream& out,
372 const ANTLR_USE_NAMESPACE(std)string& programName,
373 size_t firstToken, size_t lastToken ) const;
375 void toDebugStream( ANTLR_USE_NAMESPACE(std)ostream& out ) const {
376 toDebugStream( out, MIN_TOKEN_INDEX, getTokenStreamSize());
379 void toDebugStream( ANTLR_USE_NAMESPACE(std)ostream& out,
380 size_t start, size_t end ) const;
382 size_t getLastRewriteTokenIndex() const {
383 return getLastRewriteTokenIndex(DEFAULT_PROGRAM_NAME);
386 /** Return the last index for the program named programName
387 * return 0 if the program does not exist or the program is empty.
388 * (Note this is different from the java implementation that returns -1)
390 size_t getLastRewriteTokenIndex(const ANTLR_USE_NAMESPACE(std)string& programName) const {
391 program_map::const_iterator rewrites = programs.find(programName);
393 if( rewrites == programs.end() )
396 const operation_list& prog = rewrites->second;
399 operation_list::const_iterator last = prog.end();
401 return (*last)->getIndex();
407 /** If op.index > lastRewriteTokenIndexes, just add to the end.
408 * Otherwise, do linear */
409 void addToSortedRewriteList(RewriteOperation* op) {
410 addToSortedRewriteList(DEFAULT_PROGRAM_NAME, op);
413 void addToSortedRewriteList( const ANTLR_USE_NAMESPACE(std)string& programName,
414 RewriteOperation* op );
417 /** Who do we suck tokens from? */
419 /** track index of tokens */
422 /** Track the incoming list of tokens */
425 /** You may have multiple, named streams of rewrite operations.
426 * I'm calling these things "programs."
427 * Maps String (name) -> rewrite (List)
429 program_map programs;
431 /** Which (whitespace) token(s) to throw out */
435 #ifdef ANTLR_CXX_SUPPORTS_NAMESPACE