| #!/usr/bin/env python |
| # encoding: utf-8 |
| # Baptiste Lepilleur, 2009 |
| |
| from dircache import listdir |
| import re |
| import fnmatch |
| import os.path |
| |
| |
| # These fnmatch expressions are used by default to prune the directory tree |
| # while doing the recursive traversal in the glob_impl method of glob function. |
| prune_dirs = '.git .bzr .hg .svn _MTN _darcs CVS SCCS ' |
| |
| # These fnmatch expressions are used by default to exclude files and dirs |
| # while doing the recursive traversal in the glob_impl method of glob function. |
| ##exclude_pats = prune_pats + '*~ #*# .#* %*% ._* .gitignore .cvsignore vssver.scc .DS_Store'.split() |
| |
| # These ant_glob expressions are used by default to exclude files and dirs and also prune the directory tree |
| # while doing the recursive traversal in the glob_impl method of glob function. |
| default_excludes = ''' |
| **/*~ |
| **/#*# |
| **/.#* |
| **/%*% |
| **/._* |
| **/CVS |
| **/CVS/** |
| **/.cvsignore |
| **/SCCS |
| **/SCCS/** |
| **/vssver.scc |
| **/.svn |
| **/.svn/** |
| **/.git |
| **/.git/** |
| **/.gitignore |
| **/.bzr |
| **/.bzr/** |
| **/.hg |
| **/.hg/** |
| **/_MTN |
| **/_MTN/** |
| **/_darcs |
| **/_darcs/** |
| **/.DS_Store ''' |
| |
| DIR = 1 |
| FILE = 2 |
| DIR_LINK = 4 |
| FILE_LINK = 8 |
| LINKS = DIR_LINK | FILE_LINK |
| ALL_NO_LINK = DIR | FILE |
| ALL = DIR | FILE | LINKS |
| |
| _ANT_RE = re.compile( r'(/\*\*/)|(\*\*/)|(/\*\*)|(\*)|(/)|([^\*/]*)' ) |
| |
| def ant_pattern_to_re( ant_pattern ): |
| """Generates a regular expression from the ant pattern. |
| Matching convention: |
| **/a: match 'a', 'dir/a', 'dir1/dir2/a' |
| a/**/b: match 'a/b', 'a/c/b', 'a/d/c/b' |
| *.py: match 'script.py' but not 'a/script.py' |
| """ |
| rex = ['^'] |
| next_pos = 0 |
| sep_rex = r'(?:/|%s)' % re.escape( os.path.sep ) |
| ## print 'Converting', ant_pattern |
| for match in _ANT_RE.finditer( ant_pattern ): |
| ## print 'Matched', match.group() |
| ## print match.start(0), next_pos |
| if match.start(0) != next_pos: |
| raise ValueError( "Invalid ant pattern" ) |
| if match.group(1): # /**/ |
| rex.append( sep_rex + '(?:.*%s)?' % sep_rex ) |
| elif match.group(2): # **/ |
| rex.append( '(?:.*%s)?' % sep_rex ) |
| elif match.group(3): # /** |
| rex.append( sep_rex + '.*' ) |
| elif match.group(4): # * |
| rex.append( '[^/%s]*' % re.escape(os.path.sep) ) |
| elif match.group(5): # / |
| rex.append( sep_rex ) |
| else: # somepath |
| rex.append( re.escape(match.group(6)) ) |
| next_pos = match.end() |
| rex.append('$') |
| return re.compile( ''.join( rex ) ) |
| |
| def _as_list( l ): |
| if isinstance(l, basestring): |
| return l.split() |
| return l |
| |
| def glob(dir_path, |
| includes = '**/*', |
| excludes = default_excludes, |
| entry_type = FILE, |
| prune_dirs = prune_dirs, |
| max_depth = 25): |
| include_filter = [ant_pattern_to_re(p) for p in _as_list(includes)] |
| exclude_filter = [ant_pattern_to_re(p) for p in _as_list(excludes)] |
| prune_dirs = [p.replace('/',os.path.sep) for p in _as_list(prune_dirs)] |
| dir_path = dir_path.replace('/',os.path.sep) |
| entry_type_filter = entry_type |
| |
| def is_pruned_dir( dir_name ): |
| for pattern in prune_dirs: |
| if fnmatch.fnmatch( dir_name, pattern ): |
| return True |
| return False |
| |
| def apply_filter( full_path, filter_rexs ): |
| """Return True if at least one of the filter regular expression match full_path.""" |
| for rex in filter_rexs: |
| if rex.match( full_path ): |
| return True |
| return False |
| |
| def glob_impl( root_dir_path ): |
| child_dirs = [root_dir_path] |
| while child_dirs: |
| dir_path = child_dirs.pop() |
| for entry in listdir( dir_path ): |
| full_path = os.path.join( dir_path, entry ) |
| ## print 'Testing:', full_path, |
| is_dir = os.path.isdir( full_path ) |
| if is_dir and not is_pruned_dir( entry ): # explore child directory ? |
| ## print '===> marked for recursion', |
| child_dirs.append( full_path ) |
| included = apply_filter( full_path, include_filter ) |
| rejected = apply_filter( full_path, exclude_filter ) |
| if not included or rejected: # do not include entry ? |
| ## print '=> not included or rejected' |
| continue |
| link = os.path.islink( full_path ) |
| is_file = os.path.isfile( full_path ) |
| if not is_file and not is_dir: |
| ## print '=> unknown entry type' |
| continue |
| if link: |
| entry_type = is_file and FILE_LINK or DIR_LINK |
| else: |
| entry_type = is_file and FILE or DIR |
| ## print '=> type: %d' % entry_type, |
| if (entry_type & entry_type_filter) != 0: |
| ## print ' => KEEP' |
| yield os.path.join( dir_path, entry ) |
| ## else: |
| ## print ' => TYPE REJECTED' |
| return list( glob_impl( dir_path ) ) |
| |
| |
| if __name__ == "__main__": |
| import unittest |
| |
| class AntPatternToRETest(unittest.TestCase): |
| ## def test_conversion( self ): |
| ## self.assertEqual( '^somepath$', ant_pattern_to_re( 'somepath' ).pattern ) |
| |
| def test_matching( self ): |
| test_cases = [ ( 'path', |
| ['path'], |
| ['somepath', 'pathsuffix', '/path', '/path'] ), |
| ( '*.py', |
| ['source.py', 'source.ext.py', '.py'], |
| ['path/source.py', '/.py', 'dir.py/z', 'z.pyc', 'z.c'] ), |
| ( '**/path', |
| ['path', '/path', '/a/path', 'c:/a/path', '/a/b/path', '//a/path', '/a/path/b/path'], |
| ['path/', 'a/path/b', 'dir.py/z', 'somepath', 'pathsuffix', 'a/somepath'] ), |
| ( 'path/**', |
| ['path/a', 'path/path/a', 'path//'], |
| ['path', 'somepath/a', 'a/path', 'a/path/a', 'pathsuffix/a'] ), |
| ( '/**/path', |
| ['/path', '/a/path', '/a/b/path/path', '/path/path'], |
| ['path', 'path/', 'a/path', '/pathsuffix', '/somepath'] ), |
| ( 'a/b', |
| ['a/b'], |
| ['somea/b', 'a/bsuffix', 'a/b/c'] ), |
| ( '**/*.py', |
| ['script.py', 'src/script.py', 'a/b/script.py', '/a/b/script.py'], |
| ['script.pyc', 'script.pyo', 'a.py/b'] ), |
| ( 'src/**/*.py', |
| ['src/a.py', 'src/dir/a.py'], |
| ['a/src/a.py', '/src/a.py'] ), |
| ] |
| for ant_pattern, accepted_matches, rejected_matches in list(test_cases): |
| def local_path( paths ): |
| return [ p.replace('/',os.path.sep) for p in paths ] |
| test_cases.append( (ant_pattern, local_path(accepted_matches), local_path( rejected_matches )) ) |
| for ant_pattern, accepted_matches, rejected_matches in test_cases: |
| rex = ant_pattern_to_re( ant_pattern ) |
| print 'ant_pattern:', ant_pattern, ' => ', rex.pattern |
| for accepted_match in accepted_matches: |
| print 'Accepted?:', accepted_match |
| self.assert_( rex.match( accepted_match ) is not None ) |
| for rejected_match in rejected_matches: |
| print 'Rejected?:', rejected_match |
| self.assert_( rex.match( rejected_match ) is None ) |
| |
| unittest.main() |