pdoc.doc_ast
This module handles all interpretation of the Abstract Syntax Tree (AST) in pdoc.
Parsing the AST is done to extract docstrings, type annotations, and variable declarations from __init__
.
1""" 2This module handles all interpretation of the *Abstract Syntax Tree (AST)* in pdoc. 3 4Parsing the AST is done to extract docstrings, type annotations, and variable declarations from `__init__`. 5""" 6from __future__ import annotations 7 8import ast 9import inspect 10import types 11import warnings 12from collections.abc import Iterable, Iterator 13from dataclasses import dataclass 14from itertools import tee, zip_longest 15from typing import Any, TypeVar, overload 16 17from ._compat import ast_unparse, cache 18 19 20def get_source(obj: Any) -> str: 21 """ 22 Returns the source code of the Python object `obj` as a str. 23 This tries to first unwrap the method if it is wrapped and then calls `inspect.getsource`. 24 25 If this fails, an empty string is returned. 26 """ 27 # Some objects may not be hashable, so we fall back to the non-cached version if that is the case. 28 try: 29 return _get_source(obj) 30 except TypeError: 31 return _get_source.__wrapped__(obj) 32 33 34@cache 35def _get_source(obj: Any) -> str: 36 try: 37 return inspect.getsource(obj) 38 except Exception: 39 return "" 40 41 42@overload 43def parse(obj: types.ModuleType) -> ast.Module: 44 ... 45 46 47@overload 48def parse(obj: types.FunctionType) -> ast.FunctionDef | ast.AsyncFunctionDef: 49 ... 50 51 52@overload 53def parse(obj: type) -> ast.ClassDef: 54 ... 55 56 57def parse(obj): 58 """ 59 Parse a module, class or function and return the (unwrapped) AST node. 60 If an object's source code cannot be found, this function returns an empty ast node stub 61 which can still be walked. 62 """ 63 src = get_source(obj) 64 if isinstance(obj, types.ModuleType): 65 return _parse_module(src) 66 elif isinstance(obj, type): 67 return _parse_class(src) 68 else: 69 return _parse_function(src) 70 71 72@cache 73def unparse(tree: ast.AST): 74 """`ast.unparse`, but cached.""" 75 return ast_unparse(tree) 76 77 78@dataclass 79class AstInfo: 80 """The information extracted from walking the syntax tree.""" 81 82 docstrings: dict[str, str] 83 """A qualname -> docstring mapping.""" 84 annotations: dict[str, str] 85 """A qualname -> annotation mapping. 86 87 Annotations are not evaluated by this module and only returned as strings.""" 88 89 90def walk_tree(obj: types.ModuleType | type) -> AstInfo: 91 """ 92 Walks the abstract syntax tree for `obj` and returns the extracted information. 93 """ 94 return _walk_tree(parse(obj)) 95 96 97@cache 98def _walk_tree( 99 tree: ast.Module | ast.ClassDef | ast.FunctionDef | ast.AsyncFunctionDef, 100) -> AstInfo: 101 docstrings = {} 102 annotations = {} 103 for a, b in _pairwise_longest(_nodes(tree)): 104 if isinstance(a, ast.AnnAssign) and isinstance(a.target, ast.Name) and a.simple: 105 name = a.target.id 106 annotations[name] = unparse(a.annotation) 107 elif ( 108 isinstance(a, ast.Assign) 109 and len(a.targets) == 1 110 and isinstance(a.targets[0], ast.Name) 111 ): 112 name = a.targets[0].id 113 else: 114 continue 115 if ( 116 isinstance(b, ast.Expr) 117 and isinstance(b.value, ast.Constant) 118 and isinstance(b.value.value, str) 119 ): 120 docstrings[name] = inspect.cleandoc(b.value.value).strip() 121 elif isinstance(b, ast.Expr) and isinstance( 122 b.value, ast.Str 123 ): # pragma: no cover 124 # Python <= 3.7 125 docstrings[name] = inspect.cleandoc(b.value.s).strip() 126 return AstInfo( 127 docstrings, 128 annotations, 129 ) 130 131 132T = TypeVar("T") 133 134 135def sort_by_source( 136 obj: types.ModuleType | type, sorted: dict[str, T], unsorted: dict[str, T] 137) -> tuple[dict[str, T], dict[str, T]]: 138 """ 139 Takes items from `unsorted` and inserts them into `sorted` in order of appearance in the source code of `obj`. 140 The only exception to this rule is `__init__`, which (if present) is always inserted first. 141 142 Some items may not be found, for example because they've been inherited from a superclass. They are returned as-is. 143 144 Returns a `(sorted, not found)` tuple. 145 """ 146 tree = parse(obj) 147 148 if "__init__" in unsorted: 149 sorted["__init__"] = unsorted.pop("__init__") 150 151 for a in _nodes(tree): 152 if ( 153 isinstance(a, ast.Assign) 154 and len(a.targets) == 1 155 and isinstance(a.targets[0], ast.Name) 156 ): 157 name = a.targets[0].id 158 elif ( 159 isinstance(a, ast.AnnAssign) and isinstance(a.target, ast.Name) and a.simple 160 ): 161 name = a.target.id 162 elif isinstance(a, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)): 163 name = a.name 164 else: 165 continue 166 167 if name in unsorted: 168 sorted[name] = unsorted.pop(name) 169 return sorted, unsorted 170 171 172def type_checking_sections(mod: types.ModuleType) -> ast.Module: 173 """ 174 Walks the abstract syntax tree for `mod` and returns all statements guarded by TYPE_CHECKING blocks. 175 """ 176 ret = ast.Module(body=[], type_ignores=[]) 177 tree = _parse_module(get_source(mod)) 178 for node in tree.body: 179 if ( 180 isinstance(node, ast.If) 181 and isinstance(node.test, ast.Name) 182 and node.test.id == "TYPE_CHECKING" 183 ): 184 ret.body.extend(node.body) 185 if ( 186 isinstance(node, ast.If) 187 and isinstance(node.test, ast.Attribute) 188 and isinstance(node.test.value, ast.Name) 189 # some folks do "import typing as t", the accuracy with just TYPE_CHECKING is good enough. 190 # and node.test.value.id == "typing" 191 and node.test.attr == "TYPE_CHECKING" 192 ): 193 ret.body.extend(node.body) 194 return ret 195 196 197@cache 198def _parse_module(source: str) -> ast.Module: 199 """ 200 Parse the AST for the source code of a module and return the ast.Module. 201 202 Returns an empty ast.Module if source is empty. 203 """ 204 tree = _parse(source) 205 assert isinstance(tree, ast.Module) 206 return tree 207 208 209@cache 210def _parse_class(source: str) -> ast.ClassDef: 211 """ 212 Parse the AST for the source code of a class and return the ast.ClassDef. 213 214 Returns an empty ast.ClassDef if source is empty. 215 """ 216 tree = _parse(source) 217 assert len(tree.body) <= 1 218 if tree.body: 219 t = tree.body[0] 220 assert isinstance(t, ast.ClassDef) 221 return t 222 return ast.ClassDef(body=[], decorator_list=[]) 223 224 225@cache 226def _parse_function(source: str) -> ast.FunctionDef | ast.AsyncFunctionDef: 227 """ 228 Parse the AST for the source code of a (async) function and return the matching AST node. 229 230 Returns an empty ast.FunctionDef if source is empty. 231 """ 232 tree = _parse(source) 233 assert len(tree.body) <= 1 234 if tree.body: 235 t = tree.body[0] 236 if isinstance(t, (ast.FunctionDef, ast.AsyncFunctionDef)): 237 return t 238 else: 239 # we have a lambda function, 240 # to simplify the API return the ast.FunctionDef stub. 241 pass 242 return ast.FunctionDef(body=[], decorator_list=[]) 243 244 245def _parse( 246 source: str, 247) -> ast.Module | ast.ClassDef | ast.FunctionDef | ast.AsyncFunctionDef: 248 try: 249 return ast.parse(_dedent(source)) 250 except Exception as e: 251 warnings.warn(f"Error parsing source code: {e}\n" f"===\n" f"{source}\n" f"===") 252 return ast.parse("") 253 254 255@cache 256def _dedent(source: str) -> str: 257 """ 258 Dedent the head of a function or class definition so that it can be parsed by `ast.parse`. 259 This is an alternative to `textwrap.dedent`, which does not dedent if there are docstrings 260 without indentation. For example, this is valid Python code but would not be dedented with `textwrap.dedent`: 261 262 class Foo: 263 def bar(self): 264 ''' 265 this is a docstring 266 ''' 267 """ 268 if not source or source[0] not in (" ", "\t"): 269 return source 270 source = source.lstrip() 271 # we may have decorators before our function definition, in which case we need to dedent a few more lines. 272 # the following heuristic should be good enough to detect if we have reached the definition. 273 # it's easy to produce examples where this fails, but this probably is not a problem in practice. 274 if not any(source.startswith(x) for x in ["async ", "def ", "class "]): 275 first_line, rest = source.split("\n", 1) 276 return first_line + "\n" + _dedent(rest) 277 else: 278 return source 279 280 281@cache 282def _nodes(tree: ast.Module | ast.ClassDef) -> list[ast.AST]: 283 """ 284 Returns the list of all nodes in tree's body, but also inlines the body of __init__. 285 286 This is useful to detect all declared variables in a class, even if they only appear in the constructor. 287 """ 288 return list(_nodes_iter(tree)) 289 290 291def _nodes_iter(tree: ast.Module | ast.ClassDef) -> Iterator[ast.AST]: 292 for a in tree.body: 293 yield a 294 if isinstance(a, ast.FunctionDef) and a.name == "__init__": 295 yield from _init_nodes(a) 296 297 298def _init_nodes(tree: ast.FunctionDef) -> Iterator[ast.AST]: 299 """ 300 Transform attribute assignments like "self.foo = 42" to name assignments like "foo = 42", 301 keep all constant expressions, and no-op everything else. 302 This essentially allows us to inline __init__ when parsing a class definition. 303 """ 304 for a in tree.body: 305 if ( 306 isinstance(a, ast.AnnAssign) 307 and isinstance(a.target, ast.Attribute) 308 and isinstance(a.target.value, ast.Name) 309 and a.target.value.id == "self" 310 ): 311 yield ast.AnnAssign( 312 ast.Name(a.target.attr), a.annotation, a.value, simple=1 313 ) 314 elif ( 315 isinstance(a, ast.Assign) 316 and len(a.targets) == 1 317 and isinstance(a.targets[0], ast.Attribute) 318 and isinstance(a.targets[0].value, ast.Name) 319 and a.targets[0].value.id == "self" 320 ): 321 yield ast.Assign( 322 [ast.Name(a.targets[0].attr)], 323 value=a.value, 324 # not available on Python 3.7 325 type_comment=getattr(a, "type_comment", None), 326 ) 327 elif ( 328 isinstance(a, ast.Expr) 329 and isinstance(a.value, ast.Constant) 330 and isinstance(a.value.value, str) 331 ): 332 yield a 333 elif isinstance(a, ast.Expr) and isinstance( 334 a.value, ast.Str 335 ): # pragma: no cover 336 # Python <= 3.7 337 yield a 338 else: 339 yield ast.Pass() 340 341 342def _pairwise_longest(iterable: Iterable[T]) -> Iterable[tuple[T, T]]: 343 """s -> (s0,s1), (s1,s2), (s2, s3), ..., (sN, None)""" 344 a, b = tee(iterable) 345 next(b, None) 346 return zip_longest(a, b)
21def get_source(obj: Any) -> str: 22 """ 23 Returns the source code of the Python object `obj` as a str. 24 This tries to first unwrap the method if it is wrapped and then calls `inspect.getsource`. 25 26 If this fails, an empty string is returned. 27 """ 28 # Some objects may not be hashable, so we fall back to the non-cached version if that is the case. 29 try: 30 return _get_source(obj) 31 except TypeError: 32 return _get_source.__wrapped__(obj)
Returns the source code of the Python object obj
as a str.
This tries to first unwrap the method if it is wrapped and then calls inspect.getsource
.
If this fails, an empty string is returned.
58def parse(obj): 59 """ 60 Parse a module, class or function and return the (unwrapped) AST node. 61 If an object's source code cannot be found, this function returns an empty ast node stub 62 which can still be walked. 63 """ 64 src = get_source(obj) 65 if isinstance(obj, types.ModuleType): 66 return _parse_module(src) 67 elif isinstance(obj, type): 68 return _parse_class(src) 69 else: 70 return _parse_function(src)
Parse a module, class or function and return the (unwrapped) AST node. If an object's source code cannot be found, this function returns an empty ast node stub which can still be walked.
73@cache 74def unparse(tree: ast.AST): 75 """`ast.unparse`, but cached.""" 76 return ast_unparse(tree)
ast.unparse
, but cached.
79@dataclass 80class AstInfo: 81 """The information extracted from walking the syntax tree.""" 82 83 docstrings: dict[str, str] 84 """A qualname -> docstring mapping.""" 85 annotations: dict[str, str] 86 """A qualname -> annotation mapping. 87 88 Annotations are not evaluated by this module and only returned as strings."""
The information extracted from walking the syntax tree.
91def walk_tree(obj: types.ModuleType | type) -> AstInfo: 92 """ 93 Walks the abstract syntax tree for `obj` and returns the extracted information. 94 """ 95 return _walk_tree(parse(obj))
Walks the abstract syntax tree for obj
and returns the extracted information.
136def sort_by_source( 137 obj: types.ModuleType | type, sorted: dict[str, T], unsorted: dict[str, T] 138) -> tuple[dict[str, T], dict[str, T]]: 139 """ 140 Takes items from `unsorted` and inserts them into `sorted` in order of appearance in the source code of `obj`. 141 The only exception to this rule is `__init__`, which (if present) is always inserted first. 142 143 Some items may not be found, for example because they've been inherited from a superclass. They are returned as-is. 144 145 Returns a `(sorted, not found)` tuple. 146 """ 147 tree = parse(obj) 148 149 if "__init__" in unsorted: 150 sorted["__init__"] = unsorted.pop("__init__") 151 152 for a in _nodes(tree): 153 if ( 154 isinstance(a, ast.Assign) 155 and len(a.targets) == 1 156 and isinstance(a.targets[0], ast.Name) 157 ): 158 name = a.targets[0].id 159 elif ( 160 isinstance(a, ast.AnnAssign) and isinstance(a.target, ast.Name) and a.simple 161 ): 162 name = a.target.id 163 elif isinstance(a, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)): 164 name = a.name 165 else: 166 continue 167 168 if name in unsorted: 169 sorted[name] = unsorted.pop(name) 170 return sorted, unsorted
Takes items from unsorted
and inserts them into sorted
in order of appearance in the source code of obj
.
The only exception to this rule is __init__
, which (if present) is always inserted first.
Some items may not be found, for example because they've been inherited from a superclass. They are returned as-is.
Returns a (sorted, not found)
tuple.
173def type_checking_sections(mod: types.ModuleType) -> ast.Module: 174 """ 175 Walks the abstract syntax tree for `mod` and returns all statements guarded by TYPE_CHECKING blocks. 176 """ 177 ret = ast.Module(body=[], type_ignores=[]) 178 tree = _parse_module(get_source(mod)) 179 for node in tree.body: 180 if ( 181 isinstance(node, ast.If) 182 and isinstance(node.test, ast.Name) 183 and node.test.id == "TYPE_CHECKING" 184 ): 185 ret.body.extend(node.body) 186 if ( 187 isinstance(node, ast.If) 188 and isinstance(node.test, ast.Attribute) 189 and isinstance(node.test.value, ast.Name) 190 # some folks do "import typing as t", the accuracy with just TYPE_CHECKING is good enough. 191 # and node.test.value.id == "typing" 192 and node.test.attr == "TYPE_CHECKING" 193 ): 194 ret.body.extend(node.body) 195 return ret
Walks the abstract syntax tree for mod
and returns all statements guarded by TYPE_CHECKING blocks.