]>
Commit | Line | Data |
---|---|---|
1 | from tokenize import * | |
2 | from keyword import * | |
3 | from string import * | |
4 | ||
5 | class Tokenizer: | |
6 | """ | |
7 | Simple class to create a list of token-tuples like: | |
8 | ||
9 | (type, string, first, last) | |
10 | ||
11 | Example: | |
12 | t = Tokenizer('def hallo(du): # juchee') | |
13 | print t.tokens() | |
14 | """ | |
15 | ||
16 | def __init__(self, text): | |
17 | self.text = text | |
18 | self.toks = [] | |
19 | try: | |
20 | tokenize(self.readline, self.get) | |
21 | except TokenError: | |
22 | pass | |
23 | ||
24 | def tokens(self): | |
25 | return self.toks | |
26 | ||
27 | def get(self, type, string, begin, end, l): | |
28 | #print begin,end | |
29 | h1, b = begin | |
30 | h2, e = end | |
31 | tname = tok_name[type] | |
32 | if iskeyword(string): | |
33 | tname = "KEY" | |
34 | self.toks.append(tname, string, b, e) | |
35 | ||
36 | def readline(self): | |
37 | t = self.text | |
38 | self.text = "" | |
39 | return t | |
40 | ||
41 | def line(self): | |
42 | pre = "" | |
43 | out = "" | |
44 | for type, string, begin, end in self.toks: | |
45 | if (pre in ["NAME","KEY"]) and (not string in [".",",","("]): | |
46 | out = out + " " | |
47 | ||
48 | if type in ["NAME","KEY"]: | |
49 | out = out + string | |
50 | elif type=="OP": | |
51 | if string in [",",":"]: | |
52 | out = out + string + " " | |
53 | else: | |
54 | out = out + string | |
55 | else: | |
56 | out = out + string | |
57 | pre = type | |
58 | return out | |
59 | ||
60 |