misc/libfreetype/src/tools/docmaker/content.py
changeset 9372 915436ff64ab
parent 9371 f3840de881bd
child 9373 b769a8e38cbd
equal deleted inserted replaced
9371:f3840de881bd 9372:915436ff64ab
     1 #  Content (c) 2002, 2004, 2006, 2007, 2008, 2009
       
     2 #    David Turner <david@freetype.org>
       
     3 #
       
     4 #  This file contains routines used to parse the content of documentation
       
     5 #  comment blocks and build more structured objects out of them.
       
     6 #
       
     7 
       
     8 from sources import *
       
     9 from utils import *
       
    10 import string, re
       
    11 
       
    12 
       
    13 # this regular expression is used to detect code sequences. these
       
    14 # are simply code fragments embedded in '{' and '}' like in:
       
    15 #
       
    16 #  {
       
    17 #    x = y + z;
       
    18 #    if ( zookoo == 2 )
       
    19 #    {
       
    20 #      foobar();
       
    21 #    }
       
    22 #  }
       
    23 #
       
    24 # note that indentation of the starting and ending accolades must be
       
    25 # exactly the same. the code sequence can contain accolades at greater
       
    26 # indentation
       
    27 #
       
    28 re_code_start = re.compile( r"(\s*){\s*$" )
       
    29 re_code_end   = re.compile( r"(\s*)}\s*$" )
       
    30 
       
    31 
       
    32 # this regular expression is used to isolate identifiers from
       
    33 # other text
       
    34 #
       
    35 re_identifier = re.compile( r'(\w*)' )
       
    36 
       
    37 
       
    38 # we collect macros ending in `_H'; while outputting the object data, we use
       
    39 # this info together with the object's file location to emit the appropriate
       
    40 # header file macro and name before the object itself
       
    41 #
       
    42 re_header_macro = re.compile( r'^#define\s{1,}(\w{1,}_H)\s{1,}<(.*)>' )
       
    43 
       
    44 
       
    45 #############################################################################
       
    46 #
       
    47 # The DocCode class is used to store source code lines.
       
    48 #
       
    49 #   'self.lines' contains a set of source code lines that will be dumped as
       
    50 #   HTML in a <PRE> tag.
       
    51 #
       
    52 #   The object is filled line by line by the parser; it strips the leading
       
    53 #   "margin" space from each input line before storing it in 'self.lines'.
       
    54 #
       
    55 class  DocCode:
       
    56 
       
    57     def  __init__( self, margin, lines ):
       
    58         self.lines = []
       
    59         self.words = None
       
    60 
       
    61         # remove margin spaces
       
    62         for l in lines:
       
    63             if string.strip( l[:margin] ) == "":
       
    64                 l = l[margin:]
       
    65             self.lines.append( l )
       
    66 
       
    67     def  dump( self, prefix = "", width = 60 ):
       
    68         lines = self.dump_lines( 0, width )
       
    69         for l in lines:
       
    70             print prefix + l
       
    71 
       
    72     def  dump_lines( self, margin = 0, width = 60 ):
       
    73         result = []
       
    74         for l in self.lines:
       
    75             result.append( " " * margin + l )
       
    76         return result
       
    77 
       
    78 
       
    79 
       
    80 #############################################################################
       
    81 #
       
    82 # The DocPara class is used to store "normal" text paragraph.
       
    83 #
       
    84 #   'self.words' contains the list of words that make up the paragraph
       
    85 #
       
    86 class  DocPara:
       
    87 
       
    88     def  __init__( self, lines ):
       
    89         self.lines = None
       
    90         self.words = []
       
    91         for l in lines:
       
    92             l = string.strip( l )
       
    93             self.words.extend( string.split( l ) )
       
    94 
       
    95     def  dump( self, prefix = "", width = 60 ):
       
    96         lines = self.dump_lines( 0, width )
       
    97         for l in lines:
       
    98             print prefix + l
       
    99 
       
   100     def  dump_lines( self, margin = 0, width = 60 ):
       
   101         cur    = ""  # current line
       
   102         col    = 0   # current width
       
   103         result = []
       
   104 
       
   105         for word in self.words:
       
   106             ln = len( word )
       
   107             if col > 0:
       
   108                 ln = ln + 1
       
   109 
       
   110             if col + ln > width:
       
   111                 result.append( " " * margin + cur )
       
   112                 cur = word
       
   113                 col = len( word )
       
   114             else:
       
   115                 if col > 0:
       
   116                     cur = cur + " "
       
   117                 cur = cur + word
       
   118                 col = col + ln
       
   119 
       
   120         if col > 0:
       
   121             result.append( " " * margin + cur )
       
   122 
       
   123         return result
       
   124 
       
   125 
       
   126 
       
   127 #############################################################################
       
   128 #
       
   129 #  The DocField class is used to store a list containing either DocPara or
       
   130 #  DocCode objects. Each DocField also has an optional "name" which is used
       
   131 #  when the object corresponds to a field or value definition
       
   132 #
       
   133 class  DocField:
       
   134 
       
   135     def  __init__( self, name, lines ):
       
   136         self.name  = name  # can be None for normal paragraphs/sources
       
   137         self.items = []    # list of items
       
   138 
       
   139         mode_none  = 0     # start parsing mode
       
   140         mode_code  = 1     # parsing code sequences
       
   141         mode_para  = 3     # parsing normal paragraph
       
   142 
       
   143         margin     = -1    # current code sequence indentation
       
   144         cur_lines  = []
       
   145 
       
   146         # now analyze the markup lines to see if they contain paragraphs,
       
   147         # code sequences or fields definitions
       
   148         #
       
   149         start = 0
       
   150         mode  = mode_none
       
   151 
       
   152         for l in lines:
       
   153             # are we parsing a code sequence ?
       
   154             if mode == mode_code:
       
   155                 m = re_code_end.match( l )
       
   156                 if m and len( m.group( 1 ) ) <= margin:
       
   157                     # that's it, we finished the code sequence
       
   158                     code = DocCode( 0, cur_lines )
       
   159                     self.items.append( code )
       
   160                     margin    = -1
       
   161                     cur_lines = []
       
   162                     mode      = mode_none
       
   163                 else:
       
   164                     # nope, continue the code sequence
       
   165                     cur_lines.append( l[margin:] )
       
   166             else:
       
   167                 # start of code sequence ?
       
   168                 m = re_code_start.match( l )
       
   169                 if m:
       
   170                     # save current lines
       
   171                     if cur_lines:
       
   172                         para = DocPara( cur_lines )
       
   173                         self.items.append( para )
       
   174                         cur_lines = []
       
   175 
       
   176                     # switch to code extraction mode
       
   177                     margin = len( m.group( 1 ) )
       
   178                     mode   = mode_code
       
   179                 else:
       
   180                     if not string.split( l ) and cur_lines:
       
   181                         # if the line is empty, we end the current paragraph,
       
   182                         # if any
       
   183                         para = DocPara( cur_lines )
       
   184                         self.items.append( para )
       
   185                         cur_lines = []
       
   186                     else:
       
   187                         # otherwise, simply add the line to the current
       
   188                         # paragraph
       
   189                         cur_lines.append( l )
       
   190 
       
   191         if mode == mode_code:
       
   192             # unexpected end of code sequence
       
   193             code = DocCode( margin, cur_lines )
       
   194             self.items.append( code )
       
   195         elif cur_lines:
       
   196             para = DocPara( cur_lines )
       
   197             self.items.append( para )
       
   198 
       
   199     def  dump( self, prefix = "" ):
       
   200         if self.field:
       
   201             print prefix + self.field + " ::"
       
   202             prefix = prefix + "----"
       
   203 
       
   204         first = 1
       
   205         for p in self.items:
       
   206             if not first:
       
   207                 print ""
       
   208             p.dump( prefix )
       
   209             first = 0
       
   210 
       
   211     def  dump_lines( self, margin = 0, width = 60 ):
       
   212         result = []
       
   213         nl     = None
       
   214 
       
   215         for p in self.items:
       
   216             if nl:
       
   217                 result.append( "" )
       
   218 
       
   219             result.extend( p.dump_lines( margin, width ) )
       
   220             nl = 1
       
   221 
       
   222         return result
       
   223 
       
   224 
       
   225 
       
   226 # this regular expression is used to detect field definitions
       
   227 #
       
   228 re_field = re.compile( r"\s*(\w*|\w(\w|\.)*\w)\s*::" )
       
   229 
       
   230 
       
   231 
       
   232 class  DocMarkup:
       
   233 
       
   234     def  __init__( self, tag, lines ):
       
   235         self.tag    = string.lower( tag )
       
   236         self.fields = []
       
   237 
       
   238         cur_lines = []
       
   239         field     = None
       
   240         mode      = 0
       
   241 
       
   242         for l in lines:
       
   243             m = re_field.match( l )
       
   244             if m:
       
   245                 # we detected the start of a new field definition
       
   246 
       
   247                 # first, save the current one
       
   248                 if cur_lines:
       
   249                     f = DocField( field, cur_lines )
       
   250                     self.fields.append( f )
       
   251                     cur_lines = []
       
   252                     field     = None
       
   253 
       
   254                 field     = m.group( 1 )   # record field name
       
   255                 ln        = len( m.group( 0 ) )
       
   256                 l         = " " * ln + l[ln:]
       
   257                 cur_lines = [l]
       
   258             else:
       
   259                 cur_lines.append( l )
       
   260 
       
   261         if field or cur_lines:
       
   262             f = DocField( field, cur_lines )
       
   263             self.fields.append( f )
       
   264 
       
   265     def  get_name( self ):
       
   266         try:
       
   267             return self.fields[0].items[0].words[0]
       
   268         except:
       
   269             return None
       
   270 
       
   271     def  get_start( self ):
       
   272         try:
       
   273             result = ""
       
   274             for word in self.fields[0].items[0].words:
       
   275                 result = result + " " + word
       
   276             return result[1:]
       
   277         except:
       
   278             return "ERROR"
       
   279 
       
   280     def  dump( self, margin ):
       
   281         print " " * margin + "<" + self.tag + ">"
       
   282         for f in self.fields:
       
   283             f.dump( "  " )
       
   284         print " " * margin + "</" + self.tag + ">"
       
   285 
       
   286 
       
   287 
       
   288 class  DocChapter:
       
   289 
       
   290     def  __init__( self, block ):
       
   291         self.block    = block
       
   292         self.sections = []
       
   293         if block:
       
   294             self.name  = block.name
       
   295             self.title = block.get_markup_words( "title" )
       
   296             self.order = block.get_markup_words( "sections" )
       
   297         else:
       
   298             self.name  = "Other"
       
   299             self.title = string.split( "Miscellaneous" )
       
   300             self.order = []
       
   301 
       
   302 
       
   303 
       
   304 class  DocSection:
       
   305 
       
   306     def  __init__( self, name = "Other" ):
       
   307         self.name        = name
       
   308         self.blocks      = {}
       
   309         self.block_names = []  # ordered block names in section
       
   310         self.defs        = []
       
   311         self.abstract    = ""
       
   312         self.description = ""
       
   313         self.order       = []
       
   314         self.title       = "ERROR"
       
   315         self.chapter     = None
       
   316 
       
   317     def  add_def( self, block ):
       
   318         self.defs.append( block )
       
   319 
       
   320     def  add_block( self, block ):
       
   321         self.block_names.append( block.name )
       
   322         self.blocks[block.name] = block
       
   323 
       
   324     def  process( self ):
       
   325         # look up one block that contains a valid section description
       
   326         for block in self.defs:
       
   327             title = block.get_markup_text( "title" )
       
   328             if title:
       
   329                 self.title       = title
       
   330                 self.abstract    = block.get_markup_words( "abstract" )
       
   331                 self.description = block.get_markup_items( "description" )
       
   332                 self.order       = block.get_markup_words( "order" )
       
   333                 return
       
   334 
       
   335     def  reorder( self ):
       
   336         self.block_names = sort_order_list( self.block_names, self.order )
       
   337 
       
   338 
       
   339 
       
   340 class  ContentProcessor:
       
   341 
       
   342     def  __init__( self ):
       
   343         """initialize a block content processor"""
       
   344         self.reset()
       
   345 
       
   346         self.sections = {}    # dictionary of documentation sections
       
   347         self.section  = None  # current documentation section
       
   348 
       
   349         self.chapters = []    # list of chapters
       
   350 
       
   351         self.headers  = {}    # dictionary of header macros
       
   352 
       
   353     def  set_section( self, section_name ):
       
   354         """set current section during parsing"""
       
   355         if not self.sections.has_key( section_name ):
       
   356             section = DocSection( section_name )
       
   357             self.sections[section_name] = section
       
   358             self.section                = section
       
   359         else:
       
   360             self.section = self.sections[section_name]
       
   361 
       
   362     def  add_chapter( self, block ):
       
   363         chapter = DocChapter( block )
       
   364         self.chapters.append( chapter )
       
   365 
       
   366 
       
   367     def  reset( self ):
       
   368         """reset the content processor for a new block"""
       
   369         self.markups      = []
       
   370         self.markup       = None
       
   371         self.markup_lines = []
       
   372 
       
   373     def  add_markup( self ):
       
   374         """add a new markup section"""
       
   375         if self.markup and self.markup_lines:
       
   376 
       
   377             # get rid of last line of markup if it's empty
       
   378             marks = self.markup_lines
       
   379             if len( marks ) > 0 and not string.strip( marks[-1] ):
       
   380                 self.markup_lines = marks[:-1]
       
   381 
       
   382             m = DocMarkup( self.markup, self.markup_lines )
       
   383 
       
   384             self.markups.append( m )
       
   385 
       
   386             self.markup       = None
       
   387             self.markup_lines = []
       
   388 
       
   389     def  process_content( self, content ):
       
   390         """process a block content and return a list of DocMarkup objects
       
   391            corresponding to it"""
       
   392         markup       = None
       
   393         markup_lines = []
       
   394         first        = 1
       
   395 
       
   396         for line in content:
       
   397             found = None
       
   398             for t in re_markup_tags:
       
   399                 m = t.match( line )
       
   400                 if m:
       
   401                     found  = string.lower( m.group( 1 ) )
       
   402                     prefix = len( m.group( 0 ) )
       
   403                     line   = " " * prefix + line[prefix:]   # remove markup from line
       
   404                     break
       
   405 
       
   406             # is it the start of a new markup section ?
       
   407             if found:
       
   408                 first = 0
       
   409                 self.add_markup()  # add current markup content
       
   410                 self.markup = found
       
   411                 if len( string.strip( line ) ) > 0:
       
   412                     self.markup_lines.append( line )
       
   413             elif first == 0:
       
   414                 self.markup_lines.append( line )
       
   415 
       
   416         self.add_markup()
       
   417 
       
   418         return self.markups
       
   419 
       
   420     def  parse_sources( self, source_processor ):
       
   421         blocks = source_processor.blocks
       
   422         count  = len( blocks )
       
   423 
       
   424         for n in range( count ):
       
   425             source = blocks[n]
       
   426             if source.content:
       
   427                 # this is a documentation comment, we need to catch
       
   428                 # all following normal blocks in the "follow" list
       
   429                 #
       
   430                 follow = []
       
   431                 m = n + 1
       
   432                 while m < count and not blocks[m].content:
       
   433                     follow.append( blocks[m] )
       
   434                     m = m + 1
       
   435 
       
   436                 doc_block = DocBlock( source, follow, self )
       
   437 
       
   438     def  finish( self ):
       
   439         # process all sections to extract their abstract, description
       
   440         # and ordered list of items
       
   441         #
       
   442         for sec in self.sections.values():
       
   443             sec.process()
       
   444 
       
   445         # process chapters to check that all sections are correctly
       
   446         # listed there
       
   447         for chap in self.chapters:
       
   448             for sec in chap.order:
       
   449                 if self.sections.has_key( sec ):
       
   450                     section = self.sections[sec]
       
   451                     section.chapter = chap
       
   452                     section.reorder()
       
   453                     chap.sections.append( section )
       
   454                 else:
       
   455                     sys.stderr.write( "WARNING: chapter '" +          \
       
   456                         chap.name + "' in " + chap.block.location() + \
       
   457                         " lists unknown section '" + sec + "'\n" )
       
   458 
       
   459         # check that all sections are in a chapter
       
   460         #
       
   461         others = []
       
   462         for sec in self.sections.values():
       
   463             if not sec.chapter:
       
   464                 others.append( sec )
       
   465 
       
   466         # create a new special chapter for all remaining sections
       
   467         # when necessary
       
   468         #
       
   469         if others:
       
   470             chap = DocChapter( None )
       
   471             chap.sections = others
       
   472             self.chapters.append( chap )
       
   473 
       
   474 
       
   475 
       
   476 class  DocBlock:
       
   477 
       
   478     def  __init__( self, source, follow, processor ):
       
   479         processor.reset()
       
   480 
       
   481         self.source  = source
       
   482         self.code    = []
       
   483         self.type    = "ERRTYPE"
       
   484         self.name    = "ERRNAME"
       
   485         self.section = processor.section
       
   486         self.markups = processor.process_content( source.content )
       
   487 
       
   488         # compute block type from first markup tag
       
   489         try:
       
   490             self.type = self.markups[0].tag
       
   491         except:
       
   492             pass
       
   493 
       
   494         # compute block name from first markup paragraph
       
   495         try:
       
   496             markup = self.markups[0]
       
   497             para   = markup.fields[0].items[0]
       
   498             name   = para.words[0]
       
   499             m = re_identifier.match( name )
       
   500             if m:
       
   501                 name = m.group( 1 )
       
   502             self.name = name
       
   503         except:
       
   504             pass
       
   505 
       
   506         if self.type == "section":
       
   507             # detect new section starts
       
   508             processor.set_section( self.name )
       
   509             processor.section.add_def( self )
       
   510         elif self.type == "chapter":
       
   511             # detect new chapter
       
   512             processor.add_chapter( self )
       
   513         else:
       
   514             processor.section.add_block( self )
       
   515 
       
   516         # now, compute the source lines relevant to this documentation
       
   517         # block. We keep normal comments in for obvious reasons (??)
       
   518         source = []
       
   519         for b in follow:
       
   520             if b.format:
       
   521                 break
       
   522             for l in b.lines:
       
   523                 # collect header macro definitions
       
   524                 m = re_header_macro.match( l )
       
   525                 if m:
       
   526                     processor.headers[m.group( 2 )] = m.group( 1 );
       
   527 
       
   528                 # we use "/* */" as a separator
       
   529                 if re_source_sep.match( l ):
       
   530                     break
       
   531                 source.append( l )
       
   532 
       
   533         # now strip the leading and trailing empty lines from the sources
       
   534         start = 0
       
   535         end   = len( source ) - 1
       
   536 
       
   537         while start < end and not string.strip( source[start] ):
       
   538             start = start + 1
       
   539 
       
   540         while start < end and not string.strip( source[end] ):
       
   541             end = end - 1
       
   542 
       
   543         if start == end and not string.strip( source[start] ):
       
   544             self.code = []
       
   545         else:
       
   546             self.code = source[start:end + 1]
       
   547 
       
   548     def  location( self ):
       
   549         return self.source.location()
       
   550 
       
   551     def  get_markup( self, tag_name ):
       
   552         """return the DocMarkup corresponding to a given tag in a block"""
       
   553         for m in self.markups:
       
   554             if m.tag == string.lower( tag_name ):
       
   555                 return m
       
   556         return None
       
   557 
       
   558     def  get_markup_name( self, tag_name ):
       
   559         """return the name of a given primary markup in a block"""
       
   560         try:
       
   561             m = self.get_markup( tag_name )
       
   562             return m.get_name()
       
   563         except:
       
   564             return None
       
   565 
       
   566     def  get_markup_words( self, tag_name ):
       
   567         try:
       
   568             m = self.get_markup( tag_name )
       
   569             return m.fields[0].items[0].words
       
   570         except:
       
   571             return []
       
   572 
       
   573     def  get_markup_text( self, tag_name ):
       
   574         result = self.get_markup_words( tag_name )
       
   575         return string.join( result )
       
   576 
       
   577     def  get_markup_items( self, tag_name ):
       
   578         try:
       
   579             m = self.get_markup( tag_name )
       
   580             return m.fields[0].items
       
   581         except:
       
   582             return None
       
   583 
       
   584 # eof