pdoc.doc_ast
This module handles all interpretation of the Abstract Syntax Tree (AST) in pdoc.
Parsing the AST is done to extract docstrings, type annotations, and variable declarations from __init__
.
1""" 2This module handles all interpretation of the *Abstract Syntax Tree (AST)* in pdoc. 3 4Parsing the AST is done to extract docstrings, type annotations, and variable declarations from `__init__`. 5""" 6 7from __future__ import annotations 8 9import ast 10from collections.abc import Iterable 11from collections.abc import Iterator 12from dataclasses import dataclass 13from functools import cache 14import inspect 15from itertools import tee 16from itertools import zip_longest 17import types 18from typing import TYPE_CHECKING 19from typing import Any 20from typing import TypeVar 21from typing import overload 22import warnings 23 24import pdoc 25 26from ._compat import ast_TypeAlias 27 28if TYPE_CHECKING: 29 import pdoc.doc_types 30 31 32def get_source(obj: Any) -> str: 33 """ 34 Returns the source code of the Python object `obj` as a str. 35 36 If this fails, an empty string is returned. 37 """ 38 # Some objects may not be hashable, so we fall back to the non-cached version if that is the case. 39 try: 40 return _get_source(obj) 41 except TypeError: 42 return _get_source.__wrapped__(obj) 43 44 45@cache 46def _get_source(obj: Any) -> str: 47 try: 48 return inspect.getsource(obj) 49 except Exception: 50 return "" 51 52 53@overload 54def parse(obj: types.ModuleType) -> ast.Module: ... 55 56 57@overload 58def parse(obj: types.FunctionType) -> ast.FunctionDef | ast.AsyncFunctionDef: ... 59 60 61@overload 62def parse(obj: type) -> ast.ClassDef: ... 63 64 65def parse(obj): 66 """ 67 Parse a module, class or function and return the (unwrapped) AST node. 68 If an object's source code cannot be found, this function returns an empty ast node stub 69 which can still be walked. 70 """ 71 src = get_source(obj) 72 if isinstance(obj, types.ModuleType): 73 return _parse_module(src) 74 elif isinstance(obj, type): 75 return _parse_class(src) 76 else: 77 return _parse_function(src) 78 79 80@cache 81def unparse(tree: ast.AST): 82 """`ast.unparse`, but cached.""" 83 return ast.unparse(tree) 84 85 86@dataclass 87class AstInfo: 88 """The information extracted from walking the syntax tree.""" 89 90 var_docstrings: dict[str, str] 91 """A qualname -> docstring mapping.""" 92 func_docstrings: dict[str, str] 93 """A qualname -> docstring mapping for functions.""" 94 annotations: dict[str, str | type[pdoc.doc_types.empty]] 95 """A qualname -> annotation mapping. 96 97 Annotations are not evaluated by this module and only returned as strings.""" 98 99 100def walk_tree(obj: types.ModuleType | type) -> AstInfo: 101 """ 102 Walks the abstract syntax tree for `obj` and returns the extracted information. 103 """ 104 return _walk_tree(parse(obj)) 105 106 107@cache 108def _walk_tree( 109 tree: ast.Module | ast.ClassDef | ast.FunctionDef | ast.AsyncFunctionDef, 110) -> AstInfo: 111 var_docstrings = {} 112 func_docstrings = {} 113 annotations = {} 114 for a, b in _pairwise_longest(_nodes(tree)): 115 if isinstance(a, ast_TypeAlias): 116 name = a.name.id 117 elif ( 118 isinstance(a, ast.AnnAssign) and isinstance(a.target, ast.Name) and a.simple 119 ): 120 name = a.target.id 121 annotations[name] = unparse(a.annotation) 122 elif ( 123 isinstance(a, ast.Assign) 124 and len(a.targets) == 1 125 and isinstance(a.targets[0], ast.Name) 126 ): 127 name = a.targets[0].id 128 # Make sure that all assignments are picked up, even is there is 129 # no annotation or docstring. 130 annotations.setdefault(name, pdoc.doc_types.empty) 131 elif isinstance(a, ast.FunctionDef) and a.body: 132 first = a.body[0] 133 if ( 134 isinstance(first, ast.Expr) 135 and isinstance(first.value, ast.Constant) 136 and isinstance(first.value.value, str) 137 ): 138 func_docstrings[a.name] = inspect.cleandoc(first.value.value).strip() 139 continue 140 else: 141 continue 142 if ( 143 isinstance(b, ast.Expr) 144 and isinstance(b.value, ast.Constant) 145 and isinstance(b.value.value, str) 146 ): 147 var_docstrings[name] = inspect.cleandoc(b.value.value).strip() 148 return AstInfo( 149 var_docstrings, 150 func_docstrings, 151 annotations, 152 ) 153 154 155T = TypeVar("T") 156 157 158def sort_by_source( 159 obj: types.ModuleType | type, sorted: dict[str, T], unsorted: dict[str, T] 160) -> tuple[dict[str, T], dict[str, T]]: 161 """ 162 Takes items from `unsorted` and inserts them into `sorted` in order of appearance in the source code of `obj`. 163 The only exception to this rule is `__init__`, which (if present) is always inserted first. 164 165 Some items may not be found, for example because they've been inherited from a superclass. They are returned as-is. 166 167 Returns a `(sorted, not found)` tuple. 168 """ 169 tree = parse(obj) 170 171 if "__init__" in unsorted: 172 sorted["__init__"] = unsorted.pop("__init__") 173 174 for a in _nodes(tree): 175 if ( 176 isinstance(a, ast.Assign) 177 and len(a.targets) == 1 178 and isinstance(a.targets[0], ast.Name) 179 ): 180 name = a.targets[0].id 181 elif ( 182 isinstance(a, ast.AnnAssign) and isinstance(a.target, ast.Name) and a.simple 183 ): 184 name = a.target.id 185 elif isinstance(a, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)): 186 name = a.name 187 elif isinstance(a, ast_TypeAlias): 188 name = a.name.id 189 else: 190 continue 191 192 if name in unsorted: 193 sorted[name] = unsorted.pop(name) 194 return sorted, unsorted 195 196 197def type_checking_sections(mod: types.ModuleType) -> ast.Module: 198 """ 199 Walks the abstract syntax tree for `mod` and returns all statements guarded by TYPE_CHECKING blocks. 200 """ 201 ret = ast.Module(body=[], type_ignores=[]) 202 tree = _parse_module(get_source(mod)) 203 for node in tree.body: 204 if ( 205 isinstance(node, ast.If) 206 and isinstance(node.test, ast.Name) 207 and node.test.id == "TYPE_CHECKING" 208 ): 209 ret.body.extend(node.body) 210 if ( 211 isinstance(node, ast.If) 212 and isinstance(node.test, ast.Attribute) 213 and isinstance(node.test.value, ast.Name) 214 # some folks do "import typing as t", the accuracy with just TYPE_CHECKING is good enough. 215 # and node.test.value.id == "typing" 216 and node.test.attr == "TYPE_CHECKING" 217 ): 218 ret.body.extend(node.body) 219 return ret 220 221 222@cache 223def _parse_module(source: str) -> ast.Module: 224 """ 225 Parse the AST for the source code of a module and return the ast.Module. 226 227 Returns an empty ast.Module if source is empty. 228 """ 229 tree = _parse(source) 230 assert isinstance(tree, ast.Module) 231 return tree 232 233 234@cache 235def _parse_class(source: str) -> ast.ClassDef: 236 """ 237 Parse the AST for the source code of a class and return the ast.ClassDef. 238 239 Returns an empty ast.ClassDef if source is empty. 240 """ 241 tree = _parse(source) 242 if tree.body and len(tree.body) == 1: 243 t = tree.body[0] 244 if isinstance(t, ast.ClassDef): 245 return t 246 return ast.ClassDef(name="PdocStub", body=[], decorator_list=[]) # type: ignore 247 248 249@cache 250def _parse_function(source: str) -> ast.FunctionDef | ast.AsyncFunctionDef: 251 """ 252 Parse the AST for the source code of a (async) function and return the matching AST node. 253 254 Returns an empty ast.FunctionDef if source is empty. 255 """ 256 tree = _parse(source) 257 if tree.body and len(tree.body) == 1: 258 t = tree.body[0] 259 if isinstance(t, (ast.FunctionDef, ast.AsyncFunctionDef)): 260 return t 261 else: 262 # we have a lambda function, 263 # to simplify the API return the ast.FunctionDef stub. 264 pass 265 return ast.FunctionDef( 266 name="pdoc_stub", args=ast.arguments(), body=[], decorator_list=[] 267 ) # type: ignore 268 269 270def _parse( 271 source: str, 272) -> ast.Module | ast.ClassDef | ast.FunctionDef | ast.AsyncFunctionDef: 273 try: 274 return ast.parse(_dedent(source)) 275 except Exception as e: 276 warnings.warn(f"Error parsing source code: {e}\n" f"===\n" f"{source}\n" f"===") 277 return ast.parse("") 278 279 280@cache 281def _dedent(source: str) -> str: 282 """ 283 Dedent the head of a function or class definition so that it can be parsed by `ast.parse`. 284 This is an alternative to `textwrap.dedent`, which does not dedent if there are docstrings 285 without indentation. For example, this is valid Python code but would not be dedented with `textwrap.dedent`: 286 287 class Foo: 288 def bar(self): 289 ''' 290 this is a docstring 291 ''' 292 """ 293 if not source or source[0] not in (" ", "\t"): 294 return source 295 source = source.lstrip() 296 # we may have decorators before our function definition, in which case we need to dedent a few more lines. 297 # the following heuristic should be good enough to detect if we have reached the definition. 298 # it's easy to produce examples where this fails, but this probably is not a problem in practice. 299 if not any(source.startswith(x) for x in ["async ", "def ", "class "]): 300 first_line, rest = source.split("\n", 1) 301 return first_line + "\n" + _dedent(rest) 302 else: 303 return source 304 305 306@cache 307def _nodes(tree: ast.Module | ast.ClassDef) -> list[ast.AST]: 308 """ 309 Returns the list of all nodes in tree's body, but also inlines the body of __init__. 310 311 This is useful to detect all declared variables in a class, even if they only appear in the constructor. 312 """ 313 return list(_nodes_iter(tree)) 314 315 316def _nodes_iter(tree: ast.Module | ast.ClassDef) -> Iterator[ast.AST]: 317 for a in tree.body: 318 yield a 319 if isinstance(a, ast.FunctionDef) and a.name == "__init__": 320 yield from _init_nodes(a) 321 322 323def _init_nodes(tree: ast.FunctionDef) -> Iterator[ast.AST]: 324 """ 325 Transform attribute assignments like "self.foo = 42" to name assignments like "foo = 42", 326 keep all constant expressions, and no-op everything else. 327 This essentially allows us to inline __init__ when parsing a class definition. 328 """ 329 for a in tree.body: 330 if ( 331 isinstance(a, ast.AnnAssign) 332 and isinstance(a.target, ast.Attribute) 333 and isinstance(a.target.value, ast.Name) 334 and a.target.value.id == "self" 335 ): 336 yield ast.AnnAssign( 337 ast.Name(a.target.attr), a.annotation, a.value, simple=1 338 ) 339 elif ( 340 isinstance(a, ast.Assign) 341 and len(a.targets) == 1 342 and isinstance(a.targets[0], ast.Attribute) 343 and isinstance(a.targets[0].value, ast.Name) 344 and a.targets[0].value.id == "self" 345 ): 346 yield ast.Assign( 347 [ast.Name(a.targets[0].attr)], 348 value=a.value, 349 type_comment=a.type_comment, 350 ) 351 elif ( 352 isinstance(a, ast.Expr) 353 and isinstance(a.value, ast.Constant) 354 and isinstance(a.value.value, str) 355 ): 356 yield a 357 else: 358 yield ast.Pass() 359 360 361def _pairwise_longest(iterable: Iterable[T]) -> Iterable[tuple[T, T]]: 362 """s -> (s0,s1), (s1,s2), (s2, s3), ..., (sN, None)""" 363 a, b = tee(iterable) 364 next(b, None) 365 return zip_longest(a, b)
33def get_source(obj: Any) -> str: 34 """ 35 Returns the source code of the Python object `obj` as a str. 36 37 If this fails, an empty string is returned. 38 """ 39 # Some objects may not be hashable, so we fall back to the non-cached version if that is the case. 40 try: 41 return _get_source(obj) 42 except TypeError: 43 return _get_source.__wrapped__(obj)
Returns the source code of the Python object obj
as a str.
If this fails, an empty string is returned.
66def parse(obj): 67 """ 68 Parse a module, class or function and return the (unwrapped) AST node. 69 If an object's source code cannot be found, this function returns an empty ast node stub 70 which can still be walked. 71 """ 72 src = get_source(obj) 73 if isinstance(obj, types.ModuleType): 74 return _parse_module(src) 75 elif isinstance(obj, type): 76 return _parse_class(src) 77 else: 78 return _parse_function(src)
Parse a module, class or function and return the (unwrapped) AST node. If an object's source code cannot be found, this function returns an empty ast node stub which can still be walked.
81@cache 82def unparse(tree: ast.AST): 83 """`ast.unparse`, but cached.""" 84 return ast.unparse(tree)
ast.unparse
, but cached.
87@dataclass 88class AstInfo: 89 """The information extracted from walking the syntax tree.""" 90 91 var_docstrings: dict[str, str] 92 """A qualname -> docstring mapping.""" 93 func_docstrings: dict[str, str] 94 """A qualname -> docstring mapping for functions.""" 95 annotations: dict[str, str | type[pdoc.doc_types.empty]] 96 """A qualname -> annotation mapping. 97 98 Annotations are not evaluated by this module and only returned as strings."""
The information extracted from walking the syntax tree.
101def walk_tree(obj: types.ModuleType | type) -> AstInfo: 102 """ 103 Walks the abstract syntax tree for `obj` and returns the extracted information. 104 """ 105 return _walk_tree(parse(obj))
Walks the abstract syntax tree for obj
and returns the extracted information.
159def sort_by_source( 160 obj: types.ModuleType | type, sorted: dict[str, T], unsorted: dict[str, T] 161) -> tuple[dict[str, T], dict[str, T]]: 162 """ 163 Takes items from `unsorted` and inserts them into `sorted` in order of appearance in the source code of `obj`. 164 The only exception to this rule is `__init__`, which (if present) is always inserted first. 165 166 Some items may not be found, for example because they've been inherited from a superclass. They are returned as-is. 167 168 Returns a `(sorted, not found)` tuple. 169 """ 170 tree = parse(obj) 171 172 if "__init__" in unsorted: 173 sorted["__init__"] = unsorted.pop("__init__") 174 175 for a in _nodes(tree): 176 if ( 177 isinstance(a, ast.Assign) 178 and len(a.targets) == 1 179 and isinstance(a.targets[0], ast.Name) 180 ): 181 name = a.targets[0].id 182 elif ( 183 isinstance(a, ast.AnnAssign) and isinstance(a.target, ast.Name) and a.simple 184 ): 185 name = a.target.id 186 elif isinstance(a, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)): 187 name = a.name 188 elif isinstance(a, ast_TypeAlias): 189 name = a.name.id 190 else: 191 continue 192 193 if name in unsorted: 194 sorted[name] = unsorted.pop(name) 195 return sorted, unsorted
Takes items from unsorted
and inserts them into sorted
in order of appearance in the source code of obj
.
The only exception to this rule is __init__
, which (if present) is always inserted first.
Some items may not be found, for example because they've been inherited from a superclass. They are returned as-is.
Returns a (sorted, not found)
tuple.
198def type_checking_sections(mod: types.ModuleType) -> ast.Module: 199 """ 200 Walks the abstract syntax tree for `mod` and returns all statements guarded by TYPE_CHECKING blocks. 201 """ 202 ret = ast.Module(body=[], type_ignores=[]) 203 tree = _parse_module(get_source(mod)) 204 for node in tree.body: 205 if ( 206 isinstance(node, ast.If) 207 and isinstance(node.test, ast.Name) 208 and node.test.id == "TYPE_CHECKING" 209 ): 210 ret.body.extend(node.body) 211 if ( 212 isinstance(node, ast.If) 213 and isinstance(node.test, ast.Attribute) 214 and isinstance(node.test.value, ast.Name) 215 # some folks do "import typing as t", the accuracy with just TYPE_CHECKING is good enough. 216 # and node.test.value.id == "typing" 217 and node.test.attr == "TYPE_CHECKING" 218 ): 219 ret.body.extend(node.body) 220 return ret
Walks the abstract syntax tree for mod
and returns all statements guarded by TYPE_CHECKING blocks.