pdoc.doc_ast
This module handles all interpretation of the Abstract Syntax Tree (AST) in pdoc.
Parsing the AST is done to extract docstrings, type annotations, and variable declarations from __init__
.
1""" 2This module handles all interpretation of the *Abstract Syntax Tree (AST)* in pdoc. 3 4Parsing the AST is done to extract docstrings, type annotations, and variable declarations from `__init__`. 5""" 6from __future__ import annotations 7 8import ast 9import inspect 10import types 11import warnings 12from collections.abc import Iterable 13from collections.abc import Iterator 14from dataclasses import dataclass 15from itertools import tee 16from itertools import zip_longest 17from typing import Any 18from typing import overload 19from typing import TypeVar 20 21import pdoc 22from ._compat import ast_unparse 23from ._compat import cache 24 25 26def get_source(obj: Any) -> str: 27 """ 28 Returns the source code of the Python object `obj` as a str. 29 This tries to first unwrap the method if it is wrapped and then calls `inspect.getsource`. 30 31 If this fails, an empty string is returned. 32 """ 33 # Some objects may not be hashable, so we fall back to the non-cached version if that is the case. 34 try: 35 return _get_source(obj) 36 except TypeError: 37 return _get_source.__wrapped__(obj) 38 39 40@cache 41def _get_source(obj: Any) -> str: 42 try: 43 return inspect.getsource(obj) 44 except Exception: 45 return "" 46 47 48@overload 49def parse(obj: types.ModuleType) -> ast.Module: 50 ... 51 52 53@overload 54def parse(obj: types.FunctionType) -> ast.FunctionDef | ast.AsyncFunctionDef: 55 ... 56 57 58@overload 59def parse(obj: type) -> ast.ClassDef: 60 ... 61 62 63def parse(obj): 64 """ 65 Parse a module, class or function and return the (unwrapped) AST node. 66 If an object's source code cannot be found, this function returns an empty ast node stub 67 which can still be walked. 68 """ 69 src = get_source(obj) 70 if isinstance(obj, types.ModuleType): 71 return _parse_module(src) 72 elif isinstance(obj, type): 73 return _parse_class(src) 74 else: 75 return _parse_function(src) 76 77 78@cache 79def unparse(tree: ast.AST): 80 """`ast.unparse`, but cached.""" 81 return ast_unparse(tree) 82 83 84@dataclass 85class AstInfo: 86 """The information extracted from walking the syntax tree.""" 87 88 var_docstrings: dict[str, str] 89 """A qualname -> docstring mapping.""" 90 func_docstrings: dict[str, str] 91 """A qualname -> docstring mapping for functions.""" 92 annotations: dict[str, str] 93 """A qualname -> annotation mapping. 94 95 Annotations are not evaluated by this module and only returned as strings.""" 96 97 98def walk_tree(obj: types.ModuleType | type) -> AstInfo: 99 """ 100 Walks the abstract syntax tree for `obj` and returns the extracted information. 101 """ 102 return _walk_tree(parse(obj)) 103 104 105@cache 106def _walk_tree( 107 tree: ast.Module | ast.ClassDef | ast.FunctionDef | ast.AsyncFunctionDef, 108) -> AstInfo: 109 var_docstrings = {} 110 func_docstrings = {} 111 annotations = {} 112 for a, b in _pairwise_longest(_nodes(tree)): 113 if isinstance(a, ast.AnnAssign) and isinstance(a.target, ast.Name) and a.simple: 114 name = a.target.id 115 annotations[name] = unparse(a.annotation) 116 elif ( 117 isinstance(a, ast.Assign) 118 and len(a.targets) == 1 119 and isinstance(a.targets[0], ast.Name) 120 ): 121 name = a.targets[0].id 122 # Make sure that all assignments are picked up, even is there is 123 # no annotation or docstring. 124 annotations.setdefault(name, pdoc.doc_types.empty) 125 elif isinstance(a, ast.FunctionDef) and a.body: 126 first = a.body[0] 127 if isinstance(first, ast.Expr) and isinstance(first.value, ast.Str): 128 func_docstrings[a.name] = inspect.cleandoc(first.value.s).strip() 129 continue 130 else: 131 continue 132 if ( 133 isinstance(b, ast.Expr) 134 and isinstance(b.value, ast.Constant) 135 and isinstance(b.value.value, str) 136 ): 137 var_docstrings[name] = inspect.cleandoc(b.value.value).strip() 138 elif isinstance(b, ast.Expr) and isinstance( 139 b.value, ast.Str 140 ): # pragma: no cover 141 # Python <= 3.7 142 var_docstrings[name] = inspect.cleandoc(b.value.s).strip() 143 return AstInfo( 144 var_docstrings, 145 func_docstrings, 146 annotations, 147 ) 148 149 150T = TypeVar("T") 151 152 153def sort_by_source( 154 obj: types.ModuleType | type, sorted: dict[str, T], unsorted: dict[str, T] 155) -> tuple[dict[str, T], dict[str, T]]: 156 """ 157 Takes items from `unsorted` and inserts them into `sorted` in order of appearance in the source code of `obj`. 158 The only exception to this rule is `__init__`, which (if present) is always inserted first. 159 160 Some items may not be found, for example because they've been inherited from a superclass. They are returned as-is. 161 162 Returns a `(sorted, not found)` tuple. 163 """ 164 tree = parse(obj) 165 166 if "__init__" in unsorted: 167 sorted["__init__"] = unsorted.pop("__init__") 168 169 for a in _nodes(tree): 170 if ( 171 isinstance(a, ast.Assign) 172 and len(a.targets) == 1 173 and isinstance(a.targets[0], ast.Name) 174 ): 175 name = a.targets[0].id 176 elif ( 177 isinstance(a, ast.AnnAssign) and isinstance(a.target, ast.Name) and a.simple 178 ): 179 name = a.target.id 180 elif isinstance(a, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)): 181 name = a.name 182 else: 183 continue 184 185 if name in unsorted: 186 sorted[name] = unsorted.pop(name) 187 return sorted, unsorted 188 189 190def type_checking_sections(mod: types.ModuleType) -> ast.Module: 191 """ 192 Walks the abstract syntax tree for `mod` and returns all statements guarded by TYPE_CHECKING blocks. 193 """ 194 ret = ast.Module(body=[], type_ignores=[]) 195 tree = _parse_module(get_source(mod)) 196 for node in tree.body: 197 if ( 198 isinstance(node, ast.If) 199 and isinstance(node.test, ast.Name) 200 and node.test.id == "TYPE_CHECKING" 201 ): 202 ret.body.extend(node.body) 203 if ( 204 isinstance(node, ast.If) 205 and isinstance(node.test, ast.Attribute) 206 and isinstance(node.test.value, ast.Name) 207 # some folks do "import typing as t", the accuracy with just TYPE_CHECKING is good enough. 208 # and node.test.value.id == "typing" 209 and node.test.attr == "TYPE_CHECKING" 210 ): 211 ret.body.extend(node.body) 212 return ret 213 214 215@cache 216def _parse_module(source: str) -> ast.Module: 217 """ 218 Parse the AST for the source code of a module and return the ast.Module. 219 220 Returns an empty ast.Module if source is empty. 221 """ 222 tree = _parse(source) 223 assert isinstance(tree, ast.Module) 224 return tree 225 226 227@cache 228def _parse_class(source: str) -> ast.ClassDef: 229 """ 230 Parse the AST for the source code of a class and return the ast.ClassDef. 231 232 Returns an empty ast.ClassDef if source is empty. 233 """ 234 tree = _parse(source) 235 assert len(tree.body) <= 1 236 if tree.body: 237 t = tree.body[0] 238 assert isinstance(t, ast.ClassDef) 239 return t 240 return ast.ClassDef(body=[], decorator_list=[]) 241 242 243@cache 244def _parse_function(source: str) -> ast.FunctionDef | ast.AsyncFunctionDef: 245 """ 246 Parse the AST for the source code of a (async) function and return the matching AST node. 247 248 Returns an empty ast.FunctionDef if source is empty. 249 """ 250 tree = _parse(source) 251 assert len(tree.body) <= 1 252 if tree.body: 253 t = tree.body[0] 254 if isinstance(t, (ast.FunctionDef, ast.AsyncFunctionDef)): 255 return t 256 else: 257 # we have a lambda function, 258 # to simplify the API return the ast.FunctionDef stub. 259 pass 260 return ast.FunctionDef(body=[], decorator_list=[]) 261 262 263def _parse( 264 source: str, 265) -> ast.Module | ast.ClassDef | ast.FunctionDef | ast.AsyncFunctionDef: 266 try: 267 return ast.parse(_dedent(source)) 268 except Exception as e: 269 warnings.warn(f"Error parsing source code: {e}\n" f"===\n" f"{source}\n" f"===") 270 return ast.parse("") 271 272 273@cache 274def _dedent(source: str) -> str: 275 """ 276 Dedent the head of a function or class definition so that it can be parsed by `ast.parse`. 277 This is an alternative to `textwrap.dedent`, which does not dedent if there are docstrings 278 without indentation. For example, this is valid Python code but would not be dedented with `textwrap.dedent`: 279 280 class Foo: 281 def bar(self): 282 ''' 283 this is a docstring 284 ''' 285 """ 286 if not source or source[0] not in (" ", "\t"): 287 return source 288 source = source.lstrip() 289 # we may have decorators before our function definition, in which case we need to dedent a few more lines. 290 # the following heuristic should be good enough to detect if we have reached the definition. 291 # it's easy to produce examples where this fails, but this probably is not a problem in practice. 292 if not any(source.startswith(x) for x in ["async ", "def ", "class "]): 293 first_line, rest = source.split("\n", 1) 294 return first_line + "\n" + _dedent(rest) 295 else: 296 return source 297 298 299@cache 300def _nodes(tree: ast.Module | ast.ClassDef) -> list[ast.AST]: 301 """ 302 Returns the list of all nodes in tree's body, but also inlines the body of __init__. 303 304 This is useful to detect all declared variables in a class, even if they only appear in the constructor. 305 """ 306 return list(_nodes_iter(tree)) 307 308 309def _nodes_iter(tree: ast.Module | ast.ClassDef) -> Iterator[ast.AST]: 310 for a in tree.body: 311 yield a 312 if isinstance(a, ast.FunctionDef) and a.name == "__init__": 313 yield from _init_nodes(a) 314 315 316def _init_nodes(tree: ast.FunctionDef) -> Iterator[ast.AST]: 317 """ 318 Transform attribute assignments like "self.foo = 42" to name assignments like "foo = 42", 319 keep all constant expressions, and no-op everything else. 320 This essentially allows us to inline __init__ when parsing a class definition. 321 """ 322 for a in tree.body: 323 if ( 324 isinstance(a, ast.AnnAssign) 325 and isinstance(a.target, ast.Attribute) 326 and isinstance(a.target.value, ast.Name) 327 and a.target.value.id == "self" 328 ): 329 yield ast.AnnAssign( 330 ast.Name(a.target.attr), a.annotation, a.value, simple=1 331 ) 332 elif ( 333 isinstance(a, ast.Assign) 334 and len(a.targets) == 1 335 and isinstance(a.targets[0], ast.Attribute) 336 and isinstance(a.targets[0].value, ast.Name) 337 and a.targets[0].value.id == "self" 338 ): 339 yield ast.Assign( 340 [ast.Name(a.targets[0].attr)], 341 value=a.value, 342 # not available on Python 3.7 343 type_comment=getattr(a, "type_comment", None), 344 ) 345 elif ( 346 isinstance(a, ast.Expr) 347 and isinstance(a.value, ast.Constant) 348 and isinstance(a.value.value, str) 349 ): 350 yield a 351 elif isinstance(a, ast.Expr) and isinstance( 352 a.value, ast.Str 353 ): # pragma: no cover 354 # Python <= 3.7 355 yield a 356 else: 357 yield ast.Pass() 358 359 360def _pairwise_longest(iterable: Iterable[T]) -> Iterable[tuple[T, T]]: 361 """s -> (s0,s1), (s1,s2), (s2, s3), ..., (sN, None)""" 362 a, b = tee(iterable) 363 next(b, None) 364 return zip_longest(a, b)
27def get_source(obj: Any) -> str: 28 """ 29 Returns the source code of the Python object `obj` as a str. 30 This tries to first unwrap the method if it is wrapped and then calls `inspect.getsource`. 31 32 If this fails, an empty string is returned. 33 """ 34 # Some objects may not be hashable, so we fall back to the non-cached version if that is the case. 35 try: 36 return _get_source(obj) 37 except TypeError: 38 return _get_source.__wrapped__(obj)
Returns the source code of the Python object obj
as a str.
This tries to first unwrap the method if it is wrapped and then calls inspect.getsource
.
If this fails, an empty string is returned.
64def parse(obj): 65 """ 66 Parse a module, class or function and return the (unwrapped) AST node. 67 If an object's source code cannot be found, this function returns an empty ast node stub 68 which can still be walked. 69 """ 70 src = get_source(obj) 71 if isinstance(obj, types.ModuleType): 72 return _parse_module(src) 73 elif isinstance(obj, type): 74 return _parse_class(src) 75 else: 76 return _parse_function(src)
Parse a module, class or function and return the (unwrapped) AST node. If an object's source code cannot be found, this function returns an empty ast node stub which can still be walked.
79@cache 80def unparse(tree: ast.AST): 81 """`ast.unparse`, but cached.""" 82 return ast_unparse(tree)
ast.unparse
, but cached.
85@dataclass 86class AstInfo: 87 """The information extracted from walking the syntax tree.""" 88 89 var_docstrings: dict[str, str] 90 """A qualname -> docstring mapping.""" 91 func_docstrings: dict[str, str] 92 """A qualname -> docstring mapping for functions.""" 93 annotations: dict[str, str] 94 """A qualname -> annotation mapping. 95 96 Annotations are not evaluated by this module and only returned as strings."""
The information extracted from walking the syntax tree.
99def walk_tree(obj: types.ModuleType | type) -> AstInfo: 100 """ 101 Walks the abstract syntax tree for `obj` and returns the extracted information. 102 """ 103 return _walk_tree(parse(obj))
Walks the abstract syntax tree for obj
and returns the extracted information.
154def sort_by_source( 155 obj: types.ModuleType | type, sorted: dict[str, T], unsorted: dict[str, T] 156) -> tuple[dict[str, T], dict[str, T]]: 157 """ 158 Takes items from `unsorted` and inserts them into `sorted` in order of appearance in the source code of `obj`. 159 The only exception to this rule is `__init__`, which (if present) is always inserted first. 160 161 Some items may not be found, for example because they've been inherited from a superclass. They are returned as-is. 162 163 Returns a `(sorted, not found)` tuple. 164 """ 165 tree = parse(obj) 166 167 if "__init__" in unsorted: 168 sorted["__init__"] = unsorted.pop("__init__") 169 170 for a in _nodes(tree): 171 if ( 172 isinstance(a, ast.Assign) 173 and len(a.targets) == 1 174 and isinstance(a.targets[0], ast.Name) 175 ): 176 name = a.targets[0].id 177 elif ( 178 isinstance(a, ast.AnnAssign) and isinstance(a.target, ast.Name) and a.simple 179 ): 180 name = a.target.id 181 elif isinstance(a, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)): 182 name = a.name 183 else: 184 continue 185 186 if name in unsorted: 187 sorted[name] = unsorted.pop(name) 188 return sorted, unsorted
Takes items from unsorted
and inserts them into sorted
in order of appearance in the source code of obj
.
The only exception to this rule is __init__
, which (if present) is always inserted first.
Some items may not be found, for example because they've been inherited from a superclass. They are returned as-is.
Returns a (sorted, not found)
tuple.
191def type_checking_sections(mod: types.ModuleType) -> ast.Module: 192 """ 193 Walks the abstract syntax tree for `mod` and returns all statements guarded by TYPE_CHECKING blocks. 194 """ 195 ret = ast.Module(body=[], type_ignores=[]) 196 tree = _parse_module(get_source(mod)) 197 for node in tree.body: 198 if ( 199 isinstance(node, ast.If) 200 and isinstance(node.test, ast.Name) 201 and node.test.id == "TYPE_CHECKING" 202 ): 203 ret.body.extend(node.body) 204 if ( 205 isinstance(node, ast.If) 206 and isinstance(node.test, ast.Attribute) 207 and isinstance(node.test.value, ast.Name) 208 # some folks do "import typing as t", the accuracy with just TYPE_CHECKING is good enough. 209 # and node.test.value.id == "typing" 210 and node.test.attr == "TYPE_CHECKING" 211 ): 212 ret.body.extend(node.body) 213 return ret
Walks the abstract syntax tree for mod
and returns all statements guarded by TYPE_CHECKING blocks.