Edit on GitHub

pdoc.doc_ast

This module handles all interpretation of the Abstract Syntax Tree (AST) in pdoc.

Parsing the AST is done to extract docstrings, type annotations, and variable declarations from __init__.

  1"""
  2This module handles all interpretation of the *Abstract Syntax Tree (AST)* in pdoc.
  3
  4Parsing the AST is done to extract docstrings, type annotations, and variable declarations from `__init__`.
  5"""
  6
  7from __future__ import annotations
  8
  9import ast
 10from collections.abc import Iterable
 11from collections.abc import Iterator
 12from dataclasses import dataclass
 13from functools import cache
 14import inspect
 15from itertools import tee
 16from itertools import zip_longest
 17import types
 18from typing import TYPE_CHECKING
 19from typing import Any
 20from typing import TypeVar
 21from typing import overload
 22import warnings
 23
 24import pdoc
 25
 26from ._compat import ast_TypeAlias
 27
 28if TYPE_CHECKING:
 29    import pdoc.doc_types
 30
 31
 32def get_source(obj: Any) -> str:
 33    """
 34    Returns the source code of the Python object `obj` as a str.
 35
 36    If this fails, an empty string is returned.
 37    """
 38    # Some objects may not be hashable, so we fall back to the non-cached version if that is the case.
 39    try:
 40        return _get_source(obj)
 41    except TypeError:
 42        return _get_source.__wrapped__(obj)
 43
 44
 45@cache
 46def _get_source(obj: Any) -> str:
 47    try:
 48        return inspect.getsource(obj)
 49    except Exception:
 50        return ""
 51
 52
 53@overload
 54def parse(obj: types.ModuleType) -> ast.Module: ...
 55
 56
 57@overload
 58def parse(obj: types.FunctionType) -> ast.FunctionDef | ast.AsyncFunctionDef: ...
 59
 60
 61@overload
 62def parse(obj: type) -> ast.ClassDef: ...
 63
 64
 65def parse(obj):
 66    """
 67    Parse a module, class or function and return the (unwrapped) AST node.
 68    If an object's source code cannot be found, this function returns an empty ast node stub
 69    which can still be walked.
 70    """
 71    src = get_source(obj)
 72    if isinstance(obj, types.ModuleType):
 73        return _parse_module(src)
 74    elif isinstance(obj, type):
 75        return _parse_class(src)
 76    else:
 77        return _parse_function(src)
 78
 79
 80@cache
 81def unparse(tree: ast.AST):
 82    """`ast.unparse`, but cached."""
 83    return ast.unparse(tree)
 84
 85
 86@dataclass
 87class AstInfo:
 88    """The information extracted from walking the syntax tree."""
 89
 90    var_docstrings: dict[str, str]
 91    """A qualname -> docstring mapping."""
 92    func_docstrings: dict[str, str]
 93    """A qualname -> docstring mapping for functions."""
 94    annotations: dict[str, str | type[pdoc.doc_types.empty]]
 95    """A qualname -> annotation mapping.
 96    
 97    Annotations are not evaluated by this module and only returned as strings."""
 98
 99
100def walk_tree(obj: types.ModuleType | type) -> AstInfo:
101    """
102    Walks the abstract syntax tree for `obj` and returns the extracted information.
103    """
104    return _walk_tree(parse(obj))
105
106
107@cache
108def _walk_tree(
109    tree: ast.Module | ast.ClassDef | ast.FunctionDef | ast.AsyncFunctionDef,
110) -> AstInfo:
111    var_docstrings = {}
112    func_docstrings = {}
113    annotations = {}
114    for a, b in _pairwise_longest(_nodes(tree)):
115        if isinstance(a, ast_TypeAlias):
116            name = a.name.id
117        elif (
118            isinstance(a, ast.AnnAssign) and isinstance(a.target, ast.Name) and a.simple
119        ):
120            name = a.target.id
121            annotations[name] = unparse(a.annotation)
122        elif (
123            isinstance(a, ast.Assign)
124            and len(a.targets) == 1
125            and isinstance(a.targets[0], ast.Name)
126        ):
127            name = a.targets[0].id
128            # Make sure that all assignments are picked up, even is there is
129            # no annotation or docstring.
130            annotations.setdefault(name, pdoc.doc_types.empty)
131        elif isinstance(a, ast.FunctionDef) and a.body:
132            first = a.body[0]
133            if (
134                isinstance(first, ast.Expr)
135                and isinstance(first.value, ast.Constant)
136                and isinstance(first.value.value, str)
137            ):
138                func_docstrings[a.name] = inspect.cleandoc(first.value.value).strip()
139            continue
140        else:
141            continue
142        if (
143            isinstance(b, ast.Expr)
144            and isinstance(b.value, ast.Constant)
145            and isinstance(b.value.value, str)
146        ):
147            var_docstrings[name] = inspect.cleandoc(b.value.value).strip()
148    return AstInfo(
149        var_docstrings,
150        func_docstrings,
151        annotations,
152    )
153
154
155T = TypeVar("T")
156
157
158def sort_by_source(
159    obj: types.ModuleType | type, sorted: dict[str, T], unsorted: dict[str, T]
160) -> tuple[dict[str, T], dict[str, T]]:
161    """
162    Takes items from `unsorted` and inserts them into `sorted` in order of appearance in the source code of `obj`.
163    The only exception to this rule is `__init__`, which (if present) is always inserted first.
164
165    Some items may not be found, for example because they've been inherited from a superclass. They are returned as-is.
166
167    Returns a `(sorted, not found)` tuple.
168    """
169    tree = parse(obj)
170
171    if "__init__" in unsorted:
172        sorted["__init__"] = unsorted.pop("__init__")
173
174    for a in _nodes(tree):
175        if (
176            isinstance(a, ast.Assign)
177            and len(a.targets) == 1
178            and isinstance(a.targets[0], ast.Name)
179        ):
180            name = a.targets[0].id
181        elif (
182            isinstance(a, ast.AnnAssign) and isinstance(a.target, ast.Name) and a.simple
183        ):
184            name = a.target.id
185        elif isinstance(a, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)):
186            name = a.name
187        elif isinstance(a, ast_TypeAlias):
188            name = a.name.id
189        else:
190            continue
191
192        if name in unsorted:
193            sorted[name] = unsorted.pop(name)
194    return sorted, unsorted
195
196
197def type_checking_sections(mod: types.ModuleType) -> ast.Module:
198    """
199    Walks the abstract syntax tree for `mod` and returns all statements guarded by TYPE_CHECKING blocks.
200    """
201    ret = ast.Module(body=[], type_ignores=[])
202    tree = _parse_module(get_source(mod))
203    for node in tree.body:
204        if (
205            isinstance(node, ast.If)
206            and isinstance(node.test, ast.Name)
207            and node.test.id == "TYPE_CHECKING"
208        ):
209            ret.body.extend(node.body)
210        if (
211            isinstance(node, ast.If)
212            and isinstance(node.test, ast.Attribute)
213            and isinstance(node.test.value, ast.Name)
214            # some folks do "import typing as t", the accuracy with just TYPE_CHECKING is good enough.
215            # and node.test.value.id == "typing"
216            and node.test.attr == "TYPE_CHECKING"
217        ):
218            ret.body.extend(node.body)
219    return ret
220
221
222@cache
223def _parse_module(source: str) -> ast.Module:
224    """
225    Parse the AST for the source code of a module and return the ast.Module.
226
227    Returns an empty ast.Module if source is empty.
228    """
229    tree = _parse(source)
230    assert isinstance(tree, ast.Module)
231    return tree
232
233
234@cache
235def _parse_class(source: str) -> ast.ClassDef:
236    """
237    Parse the AST for the source code of a class and return the ast.ClassDef.
238
239    Returns an empty ast.ClassDef if source is empty.
240    """
241    tree = _parse(source)
242    if tree.body and len(tree.body) == 1:
243        t = tree.body[0]
244        if isinstance(t, ast.ClassDef):
245            return t
246    return ast.ClassDef(name="PdocStub", body=[], decorator_list=[])  # type: ignore
247
248
249@cache
250def _parse_function(source: str) -> ast.FunctionDef | ast.AsyncFunctionDef:
251    """
252    Parse the AST for the source code of a (async) function and return the matching AST node.
253
254    Returns an empty ast.FunctionDef if source is empty.
255    """
256    tree = _parse(source)
257    if tree.body and len(tree.body) == 1:
258        t = tree.body[0]
259        if isinstance(t, (ast.FunctionDef, ast.AsyncFunctionDef)):
260            return t
261        else:
262            # we have a lambda function,
263            # to simplify the API return the ast.FunctionDef stub.
264            pass
265    return ast.FunctionDef(
266        name="pdoc_stub", args=ast.arguments(), body=[], decorator_list=[]
267    )  # type: ignore
268
269
270def _parse(
271    source: str,
272) -> ast.Module | ast.ClassDef | ast.FunctionDef | ast.AsyncFunctionDef:
273    try:
274        return ast.parse(_dedent(source))
275    except Exception as e:
276        warnings.warn(f"Error parsing source code: {e}\n" f"===\n" f"{source}\n" f"===")
277        return ast.parse("")
278
279
280@cache
281def _dedent(source: str) -> str:
282    """
283    Dedent the head of a function or class definition so that it can be parsed by `ast.parse`.
284    This is an alternative to `textwrap.dedent`, which does not dedent if there are docstrings
285    without indentation. For example, this is valid Python code but would not be dedented with `textwrap.dedent`:
286
287    class Foo:
288        def bar(self):
289           '''
290    this is a docstring
291           '''
292    """
293    if not source or source[0] not in (" ", "\t"):
294        return source
295    source = source.lstrip()
296    # we may have decorators before our function definition, in which case we need to dedent a few more lines.
297    # the following heuristic should be good enough to detect if we have reached the definition.
298    # it's easy to produce examples where this fails, but this probably is not a problem in practice.
299    if not any(source.startswith(x) for x in ["async ", "def ", "class "]):
300        first_line, rest = source.split("\n", 1)
301        return first_line + "\n" + _dedent(rest)
302    else:
303        return source
304
305
306@cache
307def _nodes(tree: ast.Module | ast.ClassDef) -> list[ast.AST]:
308    """
309    Returns the list of all nodes in tree's body, but also inlines the body of __init__.
310
311    This is useful to detect all declared variables in a class, even if they only appear in the constructor.
312    """
313    return list(_nodes_iter(tree))
314
315
316def _nodes_iter(tree: ast.Module | ast.ClassDef) -> Iterator[ast.AST]:
317    for a in tree.body:
318        yield a
319        if isinstance(a, ast.FunctionDef) and a.name == "__init__":
320            yield from _init_nodes(a)
321
322
323def _init_nodes(tree: ast.FunctionDef) -> Iterator[ast.AST]:
324    """
325    Transform attribute assignments like "self.foo = 42" to name assignments like "foo = 42",
326    keep all constant expressions, and no-op everything else.
327    This essentially allows us to inline __init__ when parsing a class definition.
328    """
329    for a in tree.body:
330        if (
331            isinstance(a, ast.AnnAssign)
332            and isinstance(a.target, ast.Attribute)
333            and isinstance(a.target.value, ast.Name)
334            and a.target.value.id == "self"
335        ):
336            yield ast.AnnAssign(
337                ast.Name(a.target.attr), a.annotation, a.value, simple=1
338            )
339        elif (
340            isinstance(a, ast.Assign)
341            and len(a.targets) == 1
342            and isinstance(a.targets[0], ast.Attribute)
343            and isinstance(a.targets[0].value, ast.Name)
344            and a.targets[0].value.id == "self"
345        ):
346            yield ast.Assign(
347                [ast.Name(a.targets[0].attr)],
348                value=a.value,
349                type_comment=a.type_comment,
350            )
351        elif (
352            isinstance(a, ast.Expr)
353            and isinstance(a.value, ast.Constant)
354            and isinstance(a.value.value, str)
355        ):
356            yield a
357        else:
358            yield ast.Pass()
359
360
361def _pairwise_longest(iterable: Iterable[T]) -> Iterable[tuple[T, T]]:
362    """s -> (s0,s1), (s1,s2), (s2, s3),  ..., (sN, None)"""
363    a, b = tee(iterable)
364    next(b, None)
365    return zip_longest(a, b)
def get_source(obj: Any) -> str:
33def get_source(obj: Any) -> str:
34    """
35    Returns the source code of the Python object `obj` as a str.
36
37    If this fails, an empty string is returned.
38    """
39    # Some objects may not be hashable, so we fall back to the non-cached version if that is the case.
40    try:
41        return _get_source(obj)
42    except TypeError:
43        return _get_source.__wrapped__(obj)

Returns the source code of the Python object obj as a str.

If this fails, an empty string is returned.

def parse(obj):
66def parse(obj):
67    """
68    Parse a module, class or function and return the (unwrapped) AST node.
69    If an object's source code cannot be found, this function returns an empty ast node stub
70    which can still be walked.
71    """
72    src = get_source(obj)
73    if isinstance(obj, types.ModuleType):
74        return _parse_module(src)
75    elif isinstance(obj, type):
76        return _parse_class(src)
77    else:
78        return _parse_function(src)

Parse a module, class or function and return the (unwrapped) AST node. If an object's source code cannot be found, this function returns an empty ast node stub which can still be walked.

@cache
def unparse(tree: ast.AST):
81@cache
82def unparse(tree: ast.AST):
83    """`ast.unparse`, but cached."""
84    return ast.unparse(tree)

ast.unparse, but cached.

@dataclass
class AstInfo:
87@dataclass
88class AstInfo:
89    """The information extracted from walking the syntax tree."""
90
91    var_docstrings: dict[str, str]
92    """A qualname -> docstring mapping."""
93    func_docstrings: dict[str, str]
94    """A qualname -> docstring mapping for functions."""
95    annotations: dict[str, str | type[pdoc.doc_types.empty]]
96    """A qualname -> annotation mapping.
97    
98    Annotations are not evaluated by this module and only returned as strings."""

The information extracted from walking the syntax tree.

AstInfo( var_docstrings: dict[str, str], func_docstrings: dict[str, str], annotations: dict[str, str | type[inspect._empty]])
var_docstrings: dict[str, str]

A qualname -> docstring mapping.

func_docstrings: dict[str, str]

A qualname -> docstring mapping for functions.

annotations: dict[str, str | type[inspect._empty]]

A qualname -> annotation mapping.

Annotations are not evaluated by this module and only returned as strings.

def walk_tree(obj: module | type) -> AstInfo:
101def walk_tree(obj: types.ModuleType | type) -> AstInfo:
102    """
103    Walks the abstract syntax tree for `obj` and returns the extracted information.
104    """
105    return _walk_tree(parse(obj))

Walks the abstract syntax tree for obj and returns the extracted information.

def sort_by_source( obj: module | type, sorted: dict[str, ~T], unsorted: dict[str, ~T]) -> tuple[dict[str, ~T], dict[str, ~T]]:
159def sort_by_source(
160    obj: types.ModuleType | type, sorted: dict[str, T], unsorted: dict[str, T]
161) -> tuple[dict[str, T], dict[str, T]]:
162    """
163    Takes items from `unsorted` and inserts them into `sorted` in order of appearance in the source code of `obj`.
164    The only exception to this rule is `__init__`, which (if present) is always inserted first.
165
166    Some items may not be found, for example because they've been inherited from a superclass. They are returned as-is.
167
168    Returns a `(sorted, not found)` tuple.
169    """
170    tree = parse(obj)
171
172    if "__init__" in unsorted:
173        sorted["__init__"] = unsorted.pop("__init__")
174
175    for a in _nodes(tree):
176        if (
177            isinstance(a, ast.Assign)
178            and len(a.targets) == 1
179            and isinstance(a.targets[0], ast.Name)
180        ):
181            name = a.targets[0].id
182        elif (
183            isinstance(a, ast.AnnAssign) and isinstance(a.target, ast.Name) and a.simple
184        ):
185            name = a.target.id
186        elif isinstance(a, (ast.FunctionDef, ast.AsyncFunctionDef, ast.ClassDef)):
187            name = a.name
188        elif isinstance(a, ast_TypeAlias):
189            name = a.name.id
190        else:
191            continue
192
193        if name in unsorted:
194            sorted[name] = unsorted.pop(name)
195    return sorted, unsorted

Takes items from unsorted and inserts them into sorted in order of appearance in the source code of obj. The only exception to this rule is __init__, which (if present) is always inserted first.

Some items may not be found, for example because they've been inherited from a superclass. They are returned as-is.

Returns a (sorted, not found) tuple.

def type_checking_sections(mod: module) -> ast.Module:
198def type_checking_sections(mod: types.ModuleType) -> ast.Module:
199    """
200    Walks the abstract syntax tree for `mod` and returns all statements guarded by TYPE_CHECKING blocks.
201    """
202    ret = ast.Module(body=[], type_ignores=[])
203    tree = _parse_module(get_source(mod))
204    for node in tree.body:
205        if (
206            isinstance(node, ast.If)
207            and isinstance(node.test, ast.Name)
208            and node.test.id == "TYPE_CHECKING"
209        ):
210            ret.body.extend(node.body)
211        if (
212            isinstance(node, ast.If)
213            and isinstance(node.test, ast.Attribute)
214            and isinstance(node.test.value, ast.Name)
215            # some folks do "import typing as t", the accuracy with just TYPE_CHECKING is good enough.
216            # and node.test.value.id == "typing"
217            and node.test.attr == "TYPE_CHECKING"
218        ):
219            ret.body.extend(node.body)
220    return ret

Walks the abstract syntax tree for mod and returns all statements guarded by TYPE_CHECKING blocks.