pdoc.doc_ast
This module handles all interpretation of the Abstract Syntax Tree (AST) in pdoc.
Parsing the AST is done to extract docstrings, type annotations, and variable declarations from __init__
.
1""" 2This module handles all interpretation of the *Abstract Syntax Tree (AST)* in pdoc. 3 4Parsing the AST is done to extract docstrings, type annotations, and variable declarations from `__init__`. 5""" 6from __future__ import annotations 7 8import ast 9import inspect 10import types 11import warnings 12from collections.abc import Iterable 13from collections.abc import Iterator 14from dataclasses import dataclass 15from itertools import tee 16from itertools import zip_longest 17from typing import Any 18from typing import overload 19from typing import TypeVar 20 21import pdoc 22from ._compat import ast_unparse 23from ._compat import cache 24 25 26def get_source(obj: Any) -> str: 27 """ 28 Returns the source code of the Python object `obj` as a str. 29 This tries to first unwrap the method if it is wrapped and then calls `inspect.getsource`. 30 31 If this fails, an empty string is returned. 32 """ 33 # Some objects may not be hashable, so we fall back to the non-cached version if that is the case. 34 try: 35 return _get_source(obj) 36 except TypeError: 37 return _get_source.__wrapped__(obj) 38 39 40@cache 41def _get_source(obj: Any) -> str: 42 try: 43 return inspect.getsource(obj) 44 except Exception: 45 return "" 46 47 48@overload 49def parse(obj: types.ModuleType) -> ast.Module: 50 ... 51 52 53@overload 54def parse(obj: types.FunctionType) -> ast.FunctionDef | ast.AsyncFunctionDef: 55 ... 56 57 58@overload 59def parse(obj: type) -> ast.ClassDef: 60 ... 61 62 63def parse(obj): 64 """ 65 Parse a module, class or function and return the (unwrapped) AST node. 66 If an object's source code cannot be found, this function returns an empty ast node stub 67 which can still be walked. 68 """ 69 src = get_source(obj) 70 if isinstance(obj, types.ModuleType): 71 return _parse_module(src) 72 elif isinstance(obj, type): 73 return _parse_class(src) 74 else: 75 return _parse_function(src) 76 77 78@cache 79def unparse(tree: ast.AST): 80 """`ast.unparse`, but cached.""" 81 return ast_unparse(tree) 82 83 84@dataclass 85class AstInfo: 86 """The information extracted from walking the syntax tree.""" 87 88 docstrings: dict[str, str] 89 """A qualname -> docstring mapping.""" 90 annotations: dict[str, str] 91 """A qualname -> annotation mapping. 92 93 Annotations are not evaluated by this module and only returned as strings.""" 94 95 96def walk_tree(obj: types.ModuleType | type) -> AstInfo: 97 """ 98 Walks the abstract syntax tree for `obj` and returns the extracted information. 99 """ 100 return _walk_tree(parse(obj)) 101 102 103@cache 104def _walk_tree( 105 tree: ast.Module | ast.ClassDef | ast.FunctionDef | ast.AsyncFunctionDef, 106) -> AstInfo: 107 docstrings = {} 108 annotations = {} 109 for a, b in _pairwise_longest(_nodes(tree)): 110 if isinstance(a, ast.AnnAssign) and isinstance(a.target, ast.Name) and a.simple: 111 name = a.target.id 112 annotations[name] = unparse(a.annotation) 113 elif ( 114 isinstance(a, ast.Assign) 115 and len(a.targets) == 1 116 and isinstance(a.targets[0], ast.Name) 117 ): 118 name = a.targets[0].id 119 # Make sure that all assignments are picked up, even is there is 120 # no annotation or docstring. 121 annotations.setdefault(name, pdoc.doc_types.empty) 122 elif isinstance(a, ast.FunctionDef) and a.body: 123 first = a.body[0] 124 if isinstance(first, ast.Expr) and isinstance(first.value, ast.Str): 125 docstrings[a.name] = inspect.cleandoc(first.value.s).strip() 126 continue 127 else: 128 continue 129 if ( 130 isinstance(b, ast.Expr) 131 and isinstance(b.value, ast.Constant) 132 and isinstance(b.value.value, str) 133 ): 134 docstrings[name] = inspect.cleandoc(b.value.value).strip() 135 elif isinstance(b, ast.Expr) and isinstance( 136 b.value, ast.Str 137 ): # pragma: no cover 138 # Python <= 3.7 139 docstrings[name] = inspect.cleandoc(b.value.s).strip() 140 return AstInfo( 141 docstrings, 142 annotations, 143 ) 144 145 146T = TypeVar("T") 147 148 149def sort_by_source( 150 obj: types.ModuleType | type, sorted: dict[str, T], unsorted: dict[str, T] 151) -> tuple[dict[str, T], dict[str, T]]: 152 """ 153 Takes items from `unsorted` and inserts them into `sorted` in order of appearance in the source code of `obj`. 154 The only exception to this rule is `__init__`, which (if present) is always inserted first. 155 156 Some items may not be found, for example because they've been inherited from a superclass. They are returned as-is. 157 158 Returns a `(sorted, not found)` tuple. 159 """ 160 tree = parse(obj) 161 162 if "__init__" in unsorted: 163 sorted["__init__"] = unsorted.pop("__init__") 164 165 for a in _nodes(tree): 166 if ( 167 isinstance(a, ast.Assign) 168 and len(a.targets) == 1 169 and isinstance(a.targets[0], ast.Name) 170 ): 171 name = a.targets[0].id 172 elif ( 173 isinstance(a, ast.AnnAssign) and isinstance(a.target, ast.Name) and a.simple 174 ): 175 name = a.target.id 176 elif isinstance(a, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)): 177 name = a.name 178 else: 179 continue 180 181 if name in unsorted: 182 sorted[name] = unsorted.pop(name) 183 return sorted, unsorted 184 185 186def type_checking_sections(mod: types.ModuleType) -> ast.Module: 187 """ 188 Walks the abstract syntax tree for `mod` and returns all statements guarded by TYPE_CHECKING blocks. 189 """ 190 ret = ast.Module(body=[], type_ignores=[]) 191 tree = _parse_module(get_source(mod)) 192 for node in tree.body: 193 if ( 194 isinstance(node, ast.If) 195 and isinstance(node.test, ast.Name) 196 and node.test.id == "TYPE_CHECKING" 197 ): 198 ret.body.extend(node.body) 199 if ( 200 isinstance(node, ast.If) 201 and isinstance(node.test, ast.Attribute) 202 and isinstance(node.test.value, ast.Name) 203 # some folks do "import typing as t", the accuracy with just TYPE_CHECKING is good enough. 204 # and node.test.value.id == "typing" 205 and node.test.attr == "TYPE_CHECKING" 206 ): 207 ret.body.extend(node.body) 208 return ret 209 210 211@cache 212def _parse_module(source: str) -> ast.Module: 213 """ 214 Parse the AST for the source code of a module and return the ast.Module. 215 216 Returns an empty ast.Module if source is empty. 217 """ 218 tree = _parse(source) 219 assert isinstance(tree, ast.Module) 220 return tree 221 222 223@cache 224def _parse_class(source: str) -> ast.ClassDef: 225 """ 226 Parse the AST for the source code of a class and return the ast.ClassDef. 227 228 Returns an empty ast.ClassDef if source is empty. 229 """ 230 tree = _parse(source) 231 assert len(tree.body) <= 1 232 if tree.body: 233 t = tree.body[0] 234 assert isinstance(t, ast.ClassDef) 235 return t 236 return ast.ClassDef(body=[], decorator_list=[]) 237 238 239@cache 240def _parse_function(source: str) -> ast.FunctionDef | ast.AsyncFunctionDef: 241 """ 242 Parse the AST for the source code of a (async) function and return the matching AST node. 243 244 Returns an empty ast.FunctionDef if source is empty. 245 """ 246 tree = _parse(source) 247 assert len(tree.body) <= 1 248 if tree.body: 249 t = tree.body[0] 250 if isinstance(t, (ast.FunctionDef, ast.AsyncFunctionDef)): 251 return t 252 else: 253 # we have a lambda function, 254 # to simplify the API return the ast.FunctionDef stub. 255 pass 256 return ast.FunctionDef(body=[], decorator_list=[]) 257 258 259def _parse( 260 source: str, 261) -> ast.Module | ast.ClassDef | ast.FunctionDef | ast.AsyncFunctionDef: 262 try: 263 return ast.parse(_dedent(source)) 264 except Exception as e: 265 warnings.warn(f"Error parsing source code: {e}\n" f"===\n" f"{source}\n" f"===") 266 return ast.parse("") 267 268 269@cache 270def _dedent(source: str) -> str: 271 """ 272 Dedent the head of a function or class definition so that it can be parsed by `ast.parse`. 273 This is an alternative to `textwrap.dedent`, which does not dedent if there are docstrings 274 without indentation. For example, this is valid Python code but would not be dedented with `textwrap.dedent`: 275 276 class Foo: 277 def bar(self): 278 ''' 279 this is a docstring 280 ''' 281 """ 282 if not source or source[0] not in (" ", "\t"): 283 return source 284 source = source.lstrip() 285 # we may have decorators before our function definition, in which case we need to dedent a few more lines. 286 # the following heuristic should be good enough to detect if we have reached the definition. 287 # it's easy to produce examples where this fails, but this probably is not a problem in practice. 288 if not any(source.startswith(x) for x in ["async ", "def ", "class "]): 289 first_line, rest = source.split("\n", 1) 290 return first_line + "\n" + _dedent(rest) 291 else: 292 return source 293 294 295@cache 296def _nodes(tree: ast.Module | ast.ClassDef) -> list[ast.AST]: 297 """ 298 Returns the list of all nodes in tree's body, but also inlines the body of __init__. 299 300 This is useful to detect all declared variables in a class, even if they only appear in the constructor. 301 """ 302 return list(_nodes_iter(tree)) 303 304 305def _nodes_iter(tree: ast.Module | ast.ClassDef) -> Iterator[ast.AST]: 306 for a in tree.body: 307 yield a 308 if isinstance(a, ast.FunctionDef) and a.name == "__init__": 309 yield from _init_nodes(a) 310 311 312def _init_nodes(tree: ast.FunctionDef) -> Iterator[ast.AST]: 313 """ 314 Transform attribute assignments like "self.foo = 42" to name assignments like "foo = 42", 315 keep all constant expressions, and no-op everything else. 316 This essentially allows us to inline __init__ when parsing a class definition. 317 """ 318 for a in tree.body: 319 if ( 320 isinstance(a, ast.AnnAssign) 321 and isinstance(a.target, ast.Attribute) 322 and isinstance(a.target.value, ast.Name) 323 and a.target.value.id == "self" 324 ): 325 yield ast.AnnAssign( 326 ast.Name(a.target.attr), a.annotation, a.value, simple=1 327 ) 328 elif ( 329 isinstance(a, ast.Assign) 330 and len(a.targets) == 1 331 and isinstance(a.targets[0], ast.Attribute) 332 and isinstance(a.targets[0].value, ast.Name) 333 and a.targets[0].value.id == "self" 334 ): 335 yield ast.Assign( 336 [ast.Name(a.targets[0].attr)], 337 value=a.value, 338 # not available on Python 3.7 339 type_comment=getattr(a, "type_comment", None), 340 ) 341 elif ( 342 isinstance(a, ast.Expr) 343 and isinstance(a.value, ast.Constant) 344 and isinstance(a.value.value, str) 345 ): 346 yield a 347 elif isinstance(a, ast.Expr) and isinstance( 348 a.value, ast.Str 349 ): # pragma: no cover 350 # Python <= 3.7 351 yield a 352 else: 353 yield ast.Pass() 354 355 356def _pairwise_longest(iterable: Iterable[T]) -> Iterable[tuple[T, T]]: 357 """s -> (s0,s1), (s1,s2), (s2, s3), ..., (sN, None)""" 358 a, b = tee(iterable) 359 next(b, None) 360 return zip_longest(a, b)
27def get_source(obj: Any) -> str: 28 """ 29 Returns the source code of the Python object `obj` as a str. 30 This tries to first unwrap the method if it is wrapped and then calls `inspect.getsource`. 31 32 If this fails, an empty string is returned. 33 """ 34 # Some objects may not be hashable, so we fall back to the non-cached version if that is the case. 35 try: 36 return _get_source(obj) 37 except TypeError: 38 return _get_source.__wrapped__(obj)
Returns the source code of the Python object obj
as a str.
This tries to first unwrap the method if it is wrapped and then calls inspect.getsource
.
If this fails, an empty string is returned.
64def parse(obj): 65 """ 66 Parse a module, class or function and return the (unwrapped) AST node. 67 If an object's source code cannot be found, this function returns an empty ast node stub 68 which can still be walked. 69 """ 70 src = get_source(obj) 71 if isinstance(obj, types.ModuleType): 72 return _parse_module(src) 73 elif isinstance(obj, type): 74 return _parse_class(src) 75 else: 76 return _parse_function(src)
Parse a module, class or function and return the (unwrapped) AST node. If an object's source code cannot be found, this function returns an empty ast node stub which can still be walked.
79@cache 80def unparse(tree: ast.AST): 81 """`ast.unparse`, but cached.""" 82 return ast_unparse(tree)
ast.unparse
, but cached.
85@dataclass 86class AstInfo: 87 """The information extracted from walking the syntax tree.""" 88 89 docstrings: dict[str, str] 90 """A qualname -> docstring mapping.""" 91 annotations: dict[str, str] 92 """A qualname -> annotation mapping. 93 94 Annotations are not evaluated by this module and only returned as strings."""
The information extracted from walking the syntax tree.
97def walk_tree(obj: types.ModuleType | type) -> AstInfo: 98 """ 99 Walks the abstract syntax tree for `obj` and returns the extracted information. 100 """ 101 return _walk_tree(parse(obj))
Walks the abstract syntax tree for obj
and returns the extracted information.
150def sort_by_source( 151 obj: types.ModuleType | type, sorted: dict[str, T], unsorted: dict[str, T] 152) -> tuple[dict[str, T], dict[str, T]]: 153 """ 154 Takes items from `unsorted` and inserts them into `sorted` in order of appearance in the source code of `obj`. 155 The only exception to this rule is `__init__`, which (if present) is always inserted first. 156 157 Some items may not be found, for example because they've been inherited from a superclass. They are returned as-is. 158 159 Returns a `(sorted, not found)` tuple. 160 """ 161 tree = parse(obj) 162 163 if "__init__" in unsorted: 164 sorted["__init__"] = unsorted.pop("__init__") 165 166 for a in _nodes(tree): 167 if ( 168 isinstance(a, ast.Assign) 169 and len(a.targets) == 1 170 and isinstance(a.targets[0], ast.Name) 171 ): 172 name = a.targets[0].id 173 elif ( 174 isinstance(a, ast.AnnAssign) and isinstance(a.target, ast.Name) and a.simple 175 ): 176 name = a.target.id 177 elif isinstance(a, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)): 178 name = a.name 179 else: 180 continue 181 182 if name in unsorted: 183 sorted[name] = unsorted.pop(name) 184 return sorted, unsorted
Takes items from unsorted
and inserts them into sorted
in order of appearance in the source code of obj
.
The only exception to this rule is __init__
, which (if present) is always inserted first.
Some items may not be found, for example because they've been inherited from a superclass. They are returned as-is.
Returns a (sorted, not found)
tuple.
187def type_checking_sections(mod: types.ModuleType) -> ast.Module: 188 """ 189 Walks the abstract syntax tree for `mod` and returns all statements guarded by TYPE_CHECKING blocks. 190 """ 191 ret = ast.Module(body=[], type_ignores=[]) 192 tree = _parse_module(get_source(mod)) 193 for node in tree.body: 194 if ( 195 isinstance(node, ast.If) 196 and isinstance(node.test, ast.Name) 197 and node.test.id == "TYPE_CHECKING" 198 ): 199 ret.body.extend(node.body) 200 if ( 201 isinstance(node, ast.If) 202 and isinstance(node.test, ast.Attribute) 203 and isinstance(node.test.value, ast.Name) 204 # some folks do "import typing as t", the accuracy with just TYPE_CHECKING is good enough. 205 # and node.test.value.id == "typing" 206 and node.test.attr == "TYPE_CHECKING" 207 ): 208 ret.body.extend(node.body) 209 return ret
Walks the abstract syntax tree for mod
and returns all statements guarded by TYPE_CHECKING blocks.