blob: 09ff7f9970d768cd67aad6030844b0953e7e36ea [file] [log] [blame]
# Sources (c) 2002, 2003, 2004, 2006, 2007
# David Turner <david@freetype.org>
#
#
# this file contains definitions of classes needed to decompose
# C sources files into a series of multi-line "blocks". There are
# two kinds of blocks:
#
# - normal blocks, which contain source code or ordinary comments
#
# - documentation blocks, which have restricted formatting, and
# whose text always start with a documentation markup tag like
# "<Function>", "<Type>", etc..
#
# the routines used to process the content of documentation blocks
# are not contained here, but in "content.py"
#
# the classes and methods found here only deal with text parsing
# and basic documentation block extraction
#
import fileinput, re, sys, os, string
################################################################
##
## BLOCK FORMAT PATTERN
##
## A simple class containing compiled regular expressions used
## to detect potential documentation format block comments within
## C source code
##
## note that the 'column' pattern must contain a group that will
## be used to "unbox" the content of documentation comment blocks
##
class SourceBlockFormat:
def __init__( self, id, start, column, end ):
"""create a block pattern, used to recognize special documentation blocks"""
self.id = id
self.start = re.compile( start, re.VERBOSE )
self.column = re.compile( column, re.VERBOSE )
self.end = re.compile( end, re.VERBOSE )
#
# format 1 documentation comment blocks look like the following:
#
# /************************************/
# /* */
# /* */
# /* */
# /************************************/
#
# we define a few regular expressions here to detect them
#
start = r'''
\s* # any number of whitespace
/\*{2,}/ # followed by '/' and at least two asterisks then '/'
\s*$ # eventually followed by whitespace
'''
column = r'''
\s* # any number of whitespace
/\*{1} # followed by '/' and precisely one asterisk
([^*].*) # followed by anything (group 1)
\*{1}/ # followed by one asterisk and a '/'
\s*$ # eventually followed by whitespace
'''
re_source_block_format1 = SourceBlockFormat( 1, start, column, start )
#
# format 2 documentation comment blocks look like the following:
#
# /************************************ (at least 2 asterisks)
# *
# *
# *
# *
# **/ (1 or more asterisks at the end)
#
# we define a few regular expressions here to detect them
#
start = r'''
\s* # any number of whitespace
/\*{2,} # followed by '/' and at least two asterisks
\s*$ # eventually followed by whitespace
'''
column = r'''
\s* # any number of whitespace
\*{1}(?!/) # followed by precisely one asterisk not followed by `/'
(.*) # then anything (group1)
'''
end = r'''
\s* # any number of whitespace
\*+/ # followed by at least one asterisk, then '/'
'''
re_source_block_format2 = SourceBlockFormat( 2, start, column, end )
#
# the list of supported documentation block formats, we could add new ones
# relatively easily
#
re_source_block_formats = [ re_source_block_format1, re_source_block_format2 ]
#
# the following regular expressions corresponds to markup tags
# within the documentation comment blocks. they're equivalent
# despite their different syntax
#
# notice how each markup tag _must_ begin a new line
#
re_markup_tag1 = re.compile( r'''\s*<(\w*)>''' ) # <xxxx> format
re_markup_tag2 = re.compile( r'''\s*@(\w*):''' ) # @xxxx: format
#
# the list of supported markup tags, we could add new ones relatively
# easily
#
re_markup_tags = [ re_markup_tag1, re_markup_tag2 ]
#
# used to detect a cross-reference, after markup tags have been stripped
#
re_crossref = re.compile( r'@(\w*)(.*)' )
#
# used to detect italic and bold styles in paragraph text
#
re_italic = re.compile( r"_(\w(\w|')*)_(.*)" ) # _italic_
re_bold = re.compile( r"\*(\w(\w|')*)\*(.*)" ) # *bold*
#
# used to detect the end of commented source lines
#
re_source_sep = re.compile( r'\s*/\*\s*\*/' )
#
# used to perform cross-reference within source output
#
re_source_crossref = re.compile( r'(\W*)(\w*)' )
#
# a list of reserved source keywords
#
re_source_keywords = re.compile( '''\\b ( typedef |
struct |
enum |
union |
const |
char |
int |
short |
long |
void |
signed |
unsigned |
\#include |
\#define |
\#undef |
\#if |
\#ifdef |
\#ifndef |
\#else |
\#endif ) \\b''', re.VERBOSE )
################################################################
##
## SOURCE BLOCK CLASS
##
## A SourceProcessor is in charge or reading a C source file
## and decomposing it into a series of different "SourceBlocks".
## each one of these blocks can be made of the following data:
##
## - A documentation comment block that starts with "/**" and
## whose exact format will be discussed later
##
## - normal sources lines, include comments
##
## the important fields in a text block are the following ones:
##
## self.lines : a list of text lines for the corresponding block
##
## self.content : for documentation comment blocks only, this is the
## block content that has been "unboxed" from its
## decoration. This is None for all other blocks
## (i.e. sources or ordinary comments with no starting
## markup tag)
##
class SourceBlock:
def __init__( self, processor, filename, lineno, lines ):
self.processor = processor
self.filename = filename
self.lineno = lineno
self.lines = lines[:]
self.format = processor.format
self.content = []
if self.format == None:
return
words = []
# extract comment lines
lines = []
for line0 in self.lines:
m = self.format.column.match( line0 )
if m:
lines.append( m.group(1) )
# now, look for a markup tag
for l in lines:
l = string.strip(l)
if len(l) > 0:
for tag in re_markup_tags:
if tag.match( l ):
self.content = lines
return
def location( self ):
return "(" + self.filename + ":" + repr(self.lineno) + ")"
# debugging only - not used in normal operations
def dump( self ):
if self.content:
print "{{{content start---"
for l in self.content:
print l
print "---content end}}}"
return
fmt = ""
if self.format:
fmt = repr(self.format.id) + " "
for line in self.lines:
print line
################################################################
##
## SOURCE PROCESSOR CLASS
##
## The SourceProcessor is in charge or reading a C source file
## and decomposing it into a series of different "SourceBlock"
## objects.
##
## each one of these blocks can be made of the following data:
##
## - A documentation comment block that starts with "/**" and
## whose exact format will be discussed later
##
## - normal sources lines, include comments
##
##
class SourceProcessor:
def __init__( self ):
"""initialize a source processor"""
self.blocks = []
self.filename = None
self.format = None
self.lines = []
def reset( self ):
"""reset a block processor, clean all its blocks"""
self.blocks = []
self.format = None
def parse_file( self, filename ):
"""parse a C source file, and adds its blocks to the processor's list"""
self.reset()
self.filename = filename
fileinput.close()
self.format = None
self.lineno = 0
self.lines = []
for line in fileinput.input( filename ):
# strip trailing newlines, important on Windows machines !!
if line[-1] == '\012':
line = line[0:-1]
if self.format == None:
self.process_normal_line( line )
else:
if self.format.end.match( line ):
# that's a normal block end, add it to lines and
# create a new block
self.lines.append( line )
self.add_block_lines()
elif self.format.column.match( line ):
# that's a normal column line, add it to 'lines'
self.lines.append( line )
else:
# humm.. this is an unexpected block end,
# create a new block, but don't process the line
self.add_block_lines()
# we need to process the line again
self.process_normal_line( line )
# record the last lines
self.add_block_lines()
def process_normal_line( self, line ):
"""process a normal line and check if it's the start of a new block"""
for f in re_source_block_formats:
if f.start.match( line ):
self.add_block_lines()
self.format = f
self.lineno = fileinput.filelineno()
self.lines.append( line )
def add_block_lines( self ):
"""add the current accumulated lines, and create a new block"""
if self.lines != []:
block = SourceBlock( self, self.filename, self.lineno, self.lines )
self.blocks.append( block )
self.format = None
self.lines = []
# debugging only, not used in normal operations
def dump( self ):
"""print all blocks in a processor"""
for b in self.blocks:
b.dump()
# eof