|
| 1 | +"""Parse a Python module and describe its classes and functions. |
| 2 | +
|
| 3 | +Parse enough of a Python file to recognize imports and class and |
| 4 | +function definitions, and to find out the superclasses of a class. |
| 5 | +
|
| 6 | +The interface consists of a single function: |
| 7 | + readmodule_ex(module, path=None) |
| 8 | +where module is the name of a Python module, and path is an optional |
| 9 | +list of directories where the module is to be searched. If present, |
| 10 | +path is prepended to the system search path sys.path. The return value |
| 11 | +is a dictionary. The keys of the dictionary are the names of the |
| 12 | +classes and functions defined in the module (including classes that are |
| 13 | +defined via the from XXX import YYY construct). The values are |
| 14 | +instances of classes Class and Function. One special key/value pair is |
| 15 | +present for packages: the key '__path__' has a list as its value which |
| 16 | +contains the package search path. |
| 17 | +
|
| 18 | +Classes and Functions have a common superclass: _Object. Every instance |
| 19 | +has the following attributes: |
| 20 | + module -- name of the module; |
| 21 | + name -- name of the object; |
| 22 | + file -- file in which the object is defined; |
| 23 | + lineno -- line in the file where the object's definition starts; |
| 24 | + end_lineno -- line in the file where the object's definition ends; |
| 25 | + parent -- parent of this object, if any; |
| 26 | + children -- nested objects contained in this object. |
| 27 | +The 'children' attribute is a dictionary mapping names to objects. |
| 28 | +
|
| 29 | +Instances of Function describe functions with the attributes from _Object, |
| 30 | +plus the following: |
| 31 | + is_async -- if a function is defined with an 'async' prefix |
| 32 | +
|
| 33 | +Instances of Class describe classes with the attributes from _Object, |
| 34 | +plus the following: |
| 35 | + super -- list of super classes (Class instances if possible); |
| 36 | + methods -- mapping of method names to beginning line numbers. |
| 37 | +If the name of a super class is not recognized, the corresponding |
| 38 | +entry in the list of super classes is not a class instance but a |
| 39 | +string giving the name of the super class. Since import statements |
| 40 | +are recognized and imported modules are scanned as well, this |
| 41 | +shouldn't happen often. |
| 42 | +""" |
| 43 | + |
| 44 | +import ast |
| 45 | +import sys |
| 46 | +import importlib.util |
| 47 | + |
| 48 | +__all__ = ["readmodule", "readmodule_ex", "Class", "Function"] |
| 49 | + |
| 50 | +_modules = {} # Initialize cache of modules we've seen. |
| 51 | + |
| 52 | + |
| 53 | +class _Object: |
| 54 | + "Information about Python class or function." |
| 55 | + def __init__(self, module, name, file, lineno, end_lineno, parent): |
| 56 | + self.module = module |
| 57 | + self.name = name |
| 58 | + self.file = file |
| 59 | + self.lineno = lineno |
| 60 | + self.end_lineno = end_lineno |
| 61 | + self.parent = parent |
| 62 | + self.children = {} |
| 63 | + if parent is not None: |
| 64 | + parent.children[name] = self |
| 65 | + |
| 66 | + |
| 67 | +# Odd Function and Class signatures are for back-compatibility. |
| 68 | +class Function(_Object): |
| 69 | + "Information about a Python function, including methods." |
| 70 | + def __init__(self, module, name, file, lineno, |
| 71 | + parent=None, is_async=False, *, end_lineno=None): |
| 72 | + super().__init__(module, name, file, lineno, end_lineno, parent) |
| 73 | + self.is_async = is_async |
| 74 | + if isinstance(parent, Class): |
| 75 | + parent.methods[name] = lineno |
| 76 | + |
| 77 | + |
| 78 | +class Class(_Object): |
| 79 | + "Information about a Python class." |
| 80 | + def __init__(self, module, name, super_, file, lineno, |
| 81 | + parent=None, *, end_lineno=None): |
| 82 | + super().__init__(module, name, file, lineno, end_lineno, parent) |
| 83 | + self.super = super_ or [] |
| 84 | + self.methods = {} |
| 85 | + |
| 86 | + |
| 87 | +# These 2 functions are used in these tests |
| 88 | +# Lib/test/test_pyclbr, Lib/idlelib/idle_test/test_browser.py |
| 89 | +def _nest_function(ob, func_name, lineno, end_lineno, is_async=False): |
| 90 | + "Return a Function after nesting within ob." |
| 91 | + return Function(ob.module, func_name, ob.file, lineno, |
| 92 | + parent=ob, is_async=is_async, end_lineno=end_lineno) |
| 93 | + |
| 94 | +def _nest_class(ob, class_name, lineno, end_lineno, super=None): |
| 95 | + "Return a Class after nesting within ob." |
| 96 | + return Class(ob.module, class_name, super, ob.file, lineno, |
| 97 | + parent=ob, end_lineno=end_lineno) |
| 98 | + |
| 99 | + |
| 100 | +def readmodule(module, path=None): |
| 101 | + """Return Class objects for the top-level classes in module. |
| 102 | +
|
| 103 | + This is the original interface, before Functions were added. |
| 104 | + """ |
| 105 | + |
| 106 | + res = {} |
| 107 | + for key, value in _readmodule(module, path or []).items(): |
| 108 | + if isinstance(value, Class): |
| 109 | + res[key] = value |
| 110 | + return res |
| 111 | + |
| 112 | +def readmodule_ex(module, path=None): |
| 113 | + """Return a dictionary with all functions and classes in module. |
| 114 | +
|
| 115 | + Search for module in PATH + sys.path. |
| 116 | + If possible, include imported superclasses. |
| 117 | + Do this by reading source, without importing (and executing) it. |
| 118 | + """ |
| 119 | + return _readmodule(module, path or []) |
| 120 | + |
| 121 | + |
| 122 | +def _readmodule(module, path, inpackage=None): |
| 123 | + """Do the hard work for readmodule[_ex]. |
| 124 | +
|
| 125 | + If inpackage is given, it must be the dotted name of the package in |
| 126 | + which we are searching for a submodule, and then PATH must be the |
| 127 | + package search path; otherwise, we are searching for a top-level |
| 128 | + module, and path is combined with sys.path. |
| 129 | + """ |
| 130 | + # Compute the full module name (prepending inpackage if set). |
| 131 | + if inpackage is not None: |
| 132 | + fullmodule = "%s.%s" % (inpackage, module) |
| 133 | + else: |
| 134 | + fullmodule = module |
| 135 | + |
| 136 | + # Check in the cache. |
| 137 | + if fullmodule in _modules: |
| 138 | + return _modules[fullmodule] |
| 139 | + |
| 140 | + # Initialize the dict for this module's contents. |
| 141 | + tree = {} |
| 142 | + |
| 143 | + # Check if it is a built-in module; we don't do much for these. |
| 144 | + if module in sys.builtin_module_names and inpackage is None: |
| 145 | + _modules[module] = tree |
| 146 | + return tree |
| 147 | + |
| 148 | + # Check for a dotted module name. |
| 149 | + i = module.rfind('.') |
| 150 | + if i >= 0: |
| 151 | + package = module[:i] |
| 152 | + submodule = module[i+1:] |
| 153 | + parent = _readmodule(package, path, inpackage) |
| 154 | + if inpackage is not None: |
| 155 | + package = "%s.%s" % (inpackage, package) |
| 156 | + if not '__path__' in parent: |
| 157 | + raise ImportError('No package named {}'.format(package)) |
| 158 | + return _readmodule(submodule, parent['__path__'], package) |
| 159 | + |
| 160 | + # Search the path for the module. |
| 161 | + f = None |
| 162 | + if inpackage is not None: |
| 163 | + search_path = path |
| 164 | + else: |
| 165 | + search_path = path + sys.path |
| 166 | + spec = importlib.util._find_spec_from_path(fullmodule, search_path) |
| 167 | + if spec is None: |
| 168 | + raise ModuleNotFoundError(f"no module named {fullmodule!r}", name=fullmodule) |
| 169 | + _modules[fullmodule] = tree |
| 170 | + # Is module a package? |
| 171 | + if spec.submodule_search_locations is not None: |
| 172 | + tree['__path__'] = spec.submodule_search_locations |
| 173 | + try: |
| 174 | + source = spec.loader.get_source(fullmodule) |
| 175 | + except (AttributeError, ImportError): |
| 176 | + # If module is not Python source, we cannot do anything. |
| 177 | + return tree |
| 178 | + else: |
| 179 | + if source is None: |
| 180 | + return tree |
| 181 | + |
| 182 | + fname = spec.loader.get_filename(fullmodule) |
| 183 | + return _create_tree(fullmodule, path, fname, source, tree, inpackage) |
| 184 | + |
| 185 | + |
| 186 | +class _ModuleBrowser(ast.NodeVisitor): |
| 187 | + def __init__(self, module, path, file, tree, inpackage): |
| 188 | + self.path = path |
| 189 | + self.tree = tree |
| 190 | + self.file = file |
| 191 | + self.module = module |
| 192 | + self.inpackage = inpackage |
| 193 | + self.stack = [] |
| 194 | + |
| 195 | + def visit_ClassDef(self, node): |
| 196 | + bases = [] |
| 197 | + for base in node.bases: |
| 198 | + name = ast.unparse(base) |
| 199 | + if name in self.tree: |
| 200 | + # We know this super class. |
| 201 | + bases.append(self.tree[name]) |
| 202 | + elif len(names := name.split(".")) > 1: |
| 203 | + # Super class form is module.class: |
| 204 | + # look in module for class. |
| 205 | + *_, module, class_ = names |
| 206 | + if module in _modules: |
| 207 | + bases.append(_modules[module].get(class_, name)) |
| 208 | + else: |
| 209 | + bases.append(name) |
| 210 | + |
| 211 | + parent = self.stack[-1] if self.stack else None |
| 212 | + class_ = Class(self.module, node.name, bases, self.file, node.lineno, |
| 213 | + parent=parent, end_lineno=node.end_lineno) |
| 214 | + if parent is None: |
| 215 | + self.tree[node.name] = class_ |
| 216 | + self.stack.append(class_) |
| 217 | + self.generic_visit(node) |
| 218 | + self.stack.pop() |
| 219 | + |
| 220 | + def visit_FunctionDef(self, node, *, is_async=False): |
| 221 | + parent = self.stack[-1] if self.stack else None |
| 222 | + function = Function(self.module, node.name, self.file, node.lineno, |
| 223 | + parent, is_async, end_lineno=node.end_lineno) |
| 224 | + if parent is None: |
| 225 | + self.tree[node.name] = function |
| 226 | + self.stack.append(function) |
| 227 | + self.generic_visit(node) |
| 228 | + self.stack.pop() |
| 229 | + |
| 230 | + def visit_AsyncFunctionDef(self, node): |
| 231 | + self.visit_FunctionDef(node, is_async=True) |
| 232 | + |
| 233 | + def visit_Import(self, node): |
| 234 | + if node.col_offset != 0: |
| 235 | + return |
| 236 | + |
| 237 | + for module in node.names: |
| 238 | + try: |
| 239 | + try: |
| 240 | + _readmodule(module.name, self.path, self.inpackage) |
| 241 | + except ImportError: |
| 242 | + _readmodule(module.name, []) |
| 243 | + except (ImportError, SyntaxError): |
| 244 | + # If we can't find or parse the imported module, |
| 245 | + # too bad -- don't die here. |
| 246 | + continue |
| 247 | + |
| 248 | + def visit_ImportFrom(self, node): |
| 249 | + if node.col_offset != 0: |
| 250 | + return |
| 251 | + try: |
| 252 | + module = "." * node.level |
| 253 | + if node.module: |
| 254 | + module += node.module |
| 255 | + module = _readmodule(module, self.path, self.inpackage) |
| 256 | + except (ImportError, SyntaxError): |
| 257 | + return |
| 258 | + |
| 259 | + for name in node.names: |
| 260 | + if name.name in module: |
| 261 | + self.tree[name.asname or name.name] = module[name.name] |
| 262 | + elif name.name == "*": |
| 263 | + for import_name, import_value in module.items(): |
| 264 | + if import_name.startswith("_"): |
| 265 | + continue |
| 266 | + self.tree[import_name] = import_value |
| 267 | + |
| 268 | + |
| 269 | +def _create_tree(fullmodule, path, fname, source, tree, inpackage): |
| 270 | + mbrowser = _ModuleBrowser(fullmodule, path, fname, tree, inpackage) |
| 271 | + mbrowser.visit(ast.parse(source)) |
| 272 | + return mbrowser.tree |
| 273 | + |
| 274 | + |
| 275 | +def _main(): |
| 276 | + "Print module output (default this file) for quick visual check." |
| 277 | + import os |
| 278 | + try: |
| 279 | + mod = sys.argv[1] |
| 280 | + except: |
| 281 | + mod = __file__ |
| 282 | + if os.path.exists(mod): |
| 283 | + path = [os.path.dirname(mod)] |
| 284 | + mod = os.path.basename(mod) |
| 285 | + if mod.lower().endswith(".py"): |
| 286 | + mod = mod[:-3] |
| 287 | + else: |
| 288 | + path = [] |
| 289 | + tree = readmodule_ex(mod, path) |
| 290 | + lineno_key = lambda a: getattr(a, 'lineno', 0) |
| 291 | + objs = sorted(tree.values(), key=lineno_key, reverse=True) |
| 292 | + indent_level = 2 |
| 293 | + while objs: |
| 294 | + obj = objs.pop() |
| 295 | + if isinstance(obj, list): |
| 296 | + # Value is a __path__ key. |
| 297 | + continue |
| 298 | + if not hasattr(obj, 'indent'): |
| 299 | + obj.indent = 0 |
| 300 | + |
| 301 | + if isinstance(obj, _Object): |
| 302 | + new_objs = sorted(obj.children.values(), |
| 303 | + key=lineno_key, reverse=True) |
| 304 | + for ob in new_objs: |
| 305 | + ob.indent = obj.indent + indent_level |
| 306 | + objs.extend(new_objs) |
| 307 | + if isinstance(obj, Class): |
| 308 | + print("{}class {} {} {}" |
| 309 | + .format(' ' * obj.indent, obj.name, obj.super, obj.lineno)) |
| 310 | + elif isinstance(obj, Function): |
| 311 | + print("{}def {} {}".format(' ' * obj.indent, obj.name, obj.lineno)) |
| 312 | + |
| 313 | +if __name__ == "__main__": |
| 314 | + _main() |
0 commit comments