1 # Sources (c) 2002, 2003, 2004, 2006, 2007, 2008, 2009 |
|
2 # David Turner <david@freetype.org> |
|
3 # |
|
4 # |
|
5 # this file contains definitions of classes needed to decompose |
|
6 # C sources files into a series of multi-line "blocks". There are |
|
7 # two kinds of blocks: |
|
8 # |
|
9 # - normal blocks, which contain source code or ordinary comments |
|
10 # |
|
11 # - documentation blocks, which have restricted formatting, and |
|
12 # whose text always start with a documentation markup tag like |
|
13 # "<Function>", "<Type>", etc.. |
|
14 # |
|
15 # the routines used to process the content of documentation blocks |
|
16 # are not contained here, but in "content.py" |
|
17 # |
|
18 # the classes and methods found here only deal with text parsing |
|
19 # and basic documentation block extraction |
|
20 # |
|
21 |
|
22 import fileinput, re, sys, os, string |
|
23 |
|
24 |
|
25 |
|
26 ################################################################ |
|
27 ## |
|
28 ## BLOCK FORMAT PATTERN |
|
29 ## |
|
30 ## A simple class containing compiled regular expressions used |
|
31 ## to detect potential documentation format block comments within |
|
32 ## C source code |
|
33 ## |
|
34 ## note that the 'column' pattern must contain a group that will |
|
35 ## be used to "unbox" the content of documentation comment blocks |
|
36 ## |
|
37 class SourceBlockFormat: |
|
38 |
|
39 def __init__( self, id, start, column, end ): |
|
40 """create a block pattern, used to recognize special documentation blocks""" |
|
41 self.id = id |
|
42 self.start = re.compile( start, re.VERBOSE ) |
|
43 self.column = re.compile( column, re.VERBOSE ) |
|
44 self.end = re.compile( end, re.VERBOSE ) |
|
45 |
|
46 |
|
47 |
|
48 # |
|
49 # format 1 documentation comment blocks look like the following: |
|
50 # |
|
51 # /************************************/ |
|
52 # /* */ |
|
53 # /* */ |
|
54 # /* */ |
|
55 # /************************************/ |
|
56 # |
|
57 # we define a few regular expressions here to detect them |
|
58 # |
|
59 |
|
60 start = r''' |
|
61 \s* # any number of whitespace |
|
62 /\*{2,}/ # followed by '/' and at least two asterisks then '/' |
|
63 \s*$ # probably followed by whitespace |
|
64 ''' |
|
65 |
|
66 column = r''' |
|
67 \s* # any number of whitespace |
|
68 /\*{1} # followed by '/' and precisely one asterisk |
|
69 ([^*].*) # followed by anything (group 1) |
|
70 \*{1}/ # followed by one asterisk and a '/' |
|
71 \s*$ # probably followed by whitespace |
|
72 ''' |
|
73 |
|
74 re_source_block_format1 = SourceBlockFormat( 1, start, column, start ) |
|
75 |
|
76 |
|
77 # |
|
78 # format 2 documentation comment blocks look like the following: |
|
79 # |
|
80 # /************************************ (at least 2 asterisks) |
|
81 # * |
|
82 # * |
|
83 # * |
|
84 # * |
|
85 # **/ (1 or more asterisks at the end) |
|
86 # |
|
87 # we define a few regular expressions here to detect them |
|
88 # |
|
89 start = r''' |
|
90 \s* # any number of whitespace |
|
91 /\*{2,} # followed by '/' and at least two asterisks |
|
92 \s*$ # probably followed by whitespace |
|
93 ''' |
|
94 |
|
95 column = r''' |
|
96 \s* # any number of whitespace |
|
97 \*{1}(?!/) # followed by precisely one asterisk not followed by `/' |
|
98 (.*) # then anything (group1) |
|
99 ''' |
|
100 |
|
101 end = r''' |
|
102 \s* # any number of whitespace |
|
103 \*+/ # followed by at least one asterisk, then '/' |
|
104 ''' |
|
105 |
|
106 re_source_block_format2 = SourceBlockFormat( 2, start, column, end ) |
|
107 |
|
108 |
|
109 # |
|
110 # the list of supported documentation block formats, we could add new ones |
|
111 # relatively easily |
|
112 # |
|
113 re_source_block_formats = [re_source_block_format1, re_source_block_format2] |
|
114 |
|
115 |
|
116 # |
|
117 # the following regular expressions corresponds to markup tags |
|
118 # within the documentation comment blocks. they're equivalent |
|
119 # despite their different syntax |
|
120 # |
|
121 # notice how each markup tag _must_ begin a new line |
|
122 # |
|
123 re_markup_tag1 = re.compile( r'''\s*<(\w*)>''' ) # <xxxx> format |
|
124 re_markup_tag2 = re.compile( r'''\s*@(\w*):''' ) # @xxxx: format |
|
125 |
|
126 # |
|
127 # the list of supported markup tags, we could add new ones relatively |
|
128 # easily |
|
129 # |
|
130 re_markup_tags = [re_markup_tag1, re_markup_tag2] |
|
131 |
|
132 # |
|
133 # used to detect a cross-reference, after markup tags have been stripped |
|
134 # |
|
135 re_crossref = re.compile( r'@(\w*)(.*)' ) |
|
136 |
|
137 # |
|
138 # used to detect italic and bold styles in paragraph text |
|
139 # |
|
140 re_italic = re.compile( r"_(\w(\w|')*)_(.*)" ) # _italic_ |
|
141 re_bold = re.compile( r"\*(\w(\w|')*)\*(.*)" ) # *bold* |
|
142 |
|
143 # |
|
144 # used to detect the end of commented source lines |
|
145 # |
|
146 re_source_sep = re.compile( r'\s*/\*\s*\*/' ) |
|
147 |
|
148 # |
|
149 # used to perform cross-reference within source output |
|
150 # |
|
151 re_source_crossref = re.compile( r'(\W*)(\w*)' ) |
|
152 |
|
153 # |
|
154 # a list of reserved source keywords |
|
155 # |
|
156 re_source_keywords = re.compile( '''\\b ( typedef | |
|
157 struct | |
|
158 enum | |
|
159 union | |
|
160 const | |
|
161 char | |
|
162 int | |
|
163 short | |
|
164 long | |
|
165 void | |
|
166 signed | |
|
167 unsigned | |
|
168 \#include | |
|
169 \#define | |
|
170 \#undef | |
|
171 \#if | |
|
172 \#ifdef | |
|
173 \#ifndef | |
|
174 \#else | |
|
175 \#endif ) \\b''', re.VERBOSE ) |
|
176 |
|
177 |
|
178 ################################################################ |
|
179 ## |
|
180 ## SOURCE BLOCK CLASS |
|
181 ## |
|
182 ## A SourceProcessor is in charge of reading a C source file |
|
183 ## and decomposing it into a series of different "SourceBlocks". |
|
184 ## each one of these blocks can be made of the following data: |
|
185 ## |
|
186 ## - A documentation comment block that starts with "/**" and |
|
187 ## whose exact format will be discussed later |
|
188 ## |
|
189 ## - normal sources lines, including comments |
|
190 ## |
|
191 ## the important fields in a text block are the following ones: |
|
192 ## |
|
193 ## self.lines : a list of text lines for the corresponding block |
|
194 ## |
|
195 ## self.content : for documentation comment blocks only, this is the |
|
196 ## block content that has been "unboxed" from its |
|
197 ## decoration. This is None for all other blocks |
|
198 ## (i.e. sources or ordinary comments with no starting |
|
199 ## markup tag) |
|
200 ## |
|
201 class SourceBlock: |
|
202 |
|
203 def __init__( self, processor, filename, lineno, lines ): |
|
204 self.processor = processor |
|
205 self.filename = filename |
|
206 self.lineno = lineno |
|
207 self.lines = lines[:] |
|
208 self.format = processor.format |
|
209 self.content = [] |
|
210 |
|
211 if self.format == None: |
|
212 return |
|
213 |
|
214 words = [] |
|
215 |
|
216 # extract comment lines |
|
217 lines = [] |
|
218 |
|
219 for line0 in self.lines: |
|
220 m = self.format.column.match( line0 ) |
|
221 if m: |
|
222 lines.append( m.group( 1 ) ) |
|
223 |
|
224 # now, look for a markup tag |
|
225 for l in lines: |
|
226 l = string.strip( l ) |
|
227 if len( l ) > 0: |
|
228 for tag in re_markup_tags: |
|
229 if tag.match( l ): |
|
230 self.content = lines |
|
231 return |
|
232 |
|
233 def location( self ): |
|
234 return "(" + self.filename + ":" + repr( self.lineno ) + ")" |
|
235 |
|
236 # debugging only - not used in normal operations |
|
237 def dump( self ): |
|
238 if self.content: |
|
239 print "{{{content start---" |
|
240 for l in self.content: |
|
241 print l |
|
242 print "---content end}}}" |
|
243 return |
|
244 |
|
245 fmt = "" |
|
246 if self.format: |
|
247 fmt = repr( self.format.id ) + " " |
|
248 |
|
249 for line in self.lines: |
|
250 print line |
|
251 |
|
252 |
|
253 |
|
254 ################################################################ |
|
255 ## |
|
256 ## SOURCE PROCESSOR CLASS |
|
257 ## |
|
258 ## The SourceProcessor is in charge of reading a C source file |
|
259 ## and decomposing it into a series of different "SourceBlock" |
|
260 ## objects. |
|
261 ## |
|
262 ## each one of these blocks can be made of the following data: |
|
263 ## |
|
264 ## - A documentation comment block that starts with "/**" and |
|
265 ## whose exact format will be discussed later |
|
266 ## |
|
267 ## - normal sources lines, include comments |
|
268 ## |
|
269 ## |
|
270 class SourceProcessor: |
|
271 |
|
272 def __init__( self ): |
|
273 """initialize a source processor""" |
|
274 self.blocks = [] |
|
275 self.filename = None |
|
276 self.format = None |
|
277 self.lines = [] |
|
278 |
|
279 def reset( self ): |
|
280 """reset a block processor, clean all its blocks""" |
|
281 self.blocks = [] |
|
282 self.format = None |
|
283 |
|
284 def parse_file( self, filename ): |
|
285 """parse a C source file, and add its blocks to the processor's list""" |
|
286 self.reset() |
|
287 |
|
288 self.filename = filename |
|
289 |
|
290 fileinput.close() |
|
291 self.format = None |
|
292 self.lineno = 0 |
|
293 self.lines = [] |
|
294 |
|
295 for line in fileinput.input( filename ): |
|
296 # strip trailing newlines, important on Windows machines! |
|
297 if line[-1] == '\012': |
|
298 line = line[0:-1] |
|
299 |
|
300 if self.format == None: |
|
301 self.process_normal_line( line ) |
|
302 else: |
|
303 if self.format.end.match( line ): |
|
304 # that's a normal block end, add it to 'lines' and |
|
305 # create a new block |
|
306 self.lines.append( line ) |
|
307 self.add_block_lines() |
|
308 elif self.format.column.match( line ): |
|
309 # that's a normal column line, add it to 'lines' |
|
310 self.lines.append( line ) |
|
311 else: |
|
312 # humm.. this is an unexpected block end, |
|
313 # create a new block, but don't process the line |
|
314 self.add_block_lines() |
|
315 |
|
316 # we need to process the line again |
|
317 self.process_normal_line( line ) |
|
318 |
|
319 # record the last lines |
|
320 self.add_block_lines() |
|
321 |
|
322 def process_normal_line( self, line ): |
|
323 """process a normal line and check whether it is the start of a new block""" |
|
324 for f in re_source_block_formats: |
|
325 if f.start.match( line ): |
|
326 self.add_block_lines() |
|
327 self.format = f |
|
328 self.lineno = fileinput.filelineno() |
|
329 |
|
330 self.lines.append( line ) |
|
331 |
|
332 def add_block_lines( self ): |
|
333 """add the current accumulated lines and create a new block""" |
|
334 if self.lines != []: |
|
335 block = SourceBlock( self, self.filename, self.lineno, self.lines ) |
|
336 |
|
337 self.blocks.append( block ) |
|
338 self.format = None |
|
339 self.lines = [] |
|
340 |
|
341 # debugging only, not used in normal operations |
|
342 def dump( self ): |
|
343 """print all blocks in a processor""" |
|
344 for b in self.blocks: |
|
345 b.dump() |
|
346 |
|
347 # eof |
|