Edit on GitHub

pdoc.extract

This module handles the interaction with Python's module system, that is it loads the correct module based on whatever the user specified, and provides the rest of pdoc with some additional module metadata.

  1"""
  2This module handles the interaction with Python's module system,
  3that is it loads the correct module based on whatever the user specified,
  4and provides the rest of pdoc with some additional module metadata.
  5"""
  6from __future__ import annotations
  7
  8import importlib
  9import importlib.util
 10import io
 11import linecache
 12import os
 13import pkgutil
 14import platform
 15import re
 16import shutil
 17import subprocess
 18import sys
 19import traceback
 20import types
 21import warnings
 22from collections.abc import Iterable, Iterator, Sequence
 23from contextlib import contextmanager
 24from pathlib import Path
 25from unittest.mock import patch
 26
 27import pdoc
 28import pdoc.doc_ast
 29import pdoc.docstrings
 30
 31
 32def walk_specs(specs: Sequence[Path | str]) -> list[str]:
 33    """
 34    This function processes a list of module specifications and returns a collection of module names, including all
 35    submodules, that should be processed by pdoc.
 36
 37    A module specification can either be the name of an installed module, or the path to a specific file or package.
 38    For example, the following strings are valid module specifications:
 39
 40     - `typing`
 41     - `collections.abc`
 42     - `./test/testdata/demo_long.py`
 43     - `./test/testdata/demopackage`
 44
 45    *This function has side effects:* See `parse_spec`.
 46    """
 47    all_modules: dict[str, None] = {}
 48    for spec in specs:
 49
 50        if isinstance(spec, str) and spec.startswith("!"):
 51            ignore_pattern = re.compile(spec[1:])
 52            all_modules = {
 53                k: v for k, v in all_modules.items() if not ignore_pattern.match(k)
 54            }
 55            continue
 56
 57        modname = parse_spec(spec)
 58
 59        try:
 60            with mock_some_common_side_effects():
 61                modspec = importlib.util.find_spec(modname)
 62                if modspec is None:
 63                    raise ModuleNotFoundError(modname)
 64        except AnyException:
 65            warnings.warn(
 66                f"Cannot find spec for {modname} (from {spec}):\n{traceback.format_exc()}",
 67                stacklevel=2,
 68            )
 69        else:
 70            mod_info = pkgutil.ModuleInfo(
 71                None,  # type: ignore
 72                name=modname,
 73                ispkg=bool(modspec.submodule_search_locations),
 74            )
 75            for m in walk_packages2([mod_info]):
 76                if m.name in all_modules:
 77                    warnings.warn(
 78                        f"The module specification {spec!r} adds a module named {m.name}, but a module with this name "
 79                        f"has already been added. You may have accidentally repeated a module spec, or you are trying "
 80                        f"to document two modules with the same filename from two different directories, which does "
 81                        f"not work. Only one documentation page will be generated."
 82                    )
 83                all_modules[m.name] = None
 84
 85    if not all_modules:
 86        raise ValueError(
 87            f"No modules found matching spec: {', '.join(str(x) for x in specs)}"
 88        )
 89
 90    return list(all_modules)
 91
 92
 93def parse_spec(spec: Path | str) -> str:
 94    """
 95    This functions parses a user's module specification into a module identifier that can be imported.
 96    If both a local file/directory and an importable module with the same name exist, a warning will be printed.
 97
 98    *This function has side effects:* `sys.path` will be amended if the specification is a path.
 99    If this side effect is undesired, pass a module name instead.
100    """
101    pspec = Path(spec)
102    if isinstance(spec, str) and (os.sep in spec or (os.altsep and os.altsep in spec)):
103        # We have a path separator, so it's definitely a filepath.
104        spec = pspec
105
106    if isinstance(spec, str) and (pspec.is_file() or (pspec / "__init__.py").is_file()):
107        # We have a local file with this name, but is there also a module with the same name?
108        try:
109            with mock_some_common_side_effects():
110                modspec = importlib.util.find_spec(spec)
111                if modspec is None:
112                    raise ModuleNotFoundError
113        except AnyException:
114            # Module does not exist, use local file.
115            spec = pspec
116        else:
117            # Module does exist. We now check if the local file/directory is the same (e.g. after pip install -e),
118            # and emit a warning if that's not the case.
119            origin = (
120                Path(modspec.origin).absolute() if modspec.origin else Path("unknown")
121            )
122            local_dir = Path(spec).absolute()
123            if local_dir not in (origin, origin.parent):
124                warnings.warn(
125                    f"{spec!r} may refer to either the installed Python module or the local file/directory with the "
126                    f"same name. pdoc will document the installed module, prepend './' to force documentation of the "
127                    f"local file/directory.\n"
128                    f" - Module location: {origin}\n"
129                    f" - Local file/directory: {local_dir}",
130                    RuntimeWarning,
131                )
132
133    if isinstance(spec, Path):
134        if (spec.parent / "__init__.py").exists():
135            return parse_spec(spec.resolve().parent) + f".{spec.stem}"
136        parent_dir = str(spec.parent)
137        sys.path = [parent_dir] + [x for x in sys.path if x != parent_dir]
138        if spec.stem in sys.modules and sys.modules[spec.stem].__file__:
139            local_dir = spec.resolve()
140            file = sys.modules[spec.stem].__file__
141            assert file is not None  # make mypy happy
142            origin = Path(file).resolve()
143            if local_dir not in (origin, origin.parent, origin.with_suffix("")):
144                warnings.warn(
145                    f"pdoc cannot load {spec.stem!r} because a module with the same name is already imported in pdoc's "
146                    f"Python process. pdoc will document the loaded module from {origin} instead.",
147                    RuntimeWarning,
148                )
149        return spec.stem
150    else:
151        return spec
152
153
154def _noop(*args, **kwargs):
155    pass
156
157
158class _PdocDefusedPopen(subprocess.Popen):
159    """A small wrapper around subprocess.Popen that converts most executions into no-ops."""
160
161    if platform.system() == "Windows":  # pragma: no cover
162        _noop_exe = "echo.exe"
163    else:  # pragma: no cover
164        _noop_exe = "echo"
165
166    def __init__(self, *args, **kwargs):  # pragma: no cover
167        command_allowed = (
168            args
169            and args[0]
170            and args[0][0]
171            in (
172                # these invocations may all come from https://github.com/python/cpython/blob/main/Lib/ctypes/util.py,
173                # which we want to keep working.
174                "/sbin/ldconfig",
175                "ld",
176                shutil.which("gcc") or shutil.which("cc"),
177                shutil.which("objdump"),
178                # https://github.com/mitmproxy/pdoc/issues/430: GitPython invokes git commands, which is also fine.
179                "git",
180            )
181        )
182        if not command_allowed:
183            kwargs["executable"] = self._noop_exe
184        super().__init__(*args, **kwargs)
185
186
187@contextmanager
188def mock_some_common_side_effects():
189    """
190    This context manager is applied when importing modules. It mocks some common side effects that may happen upon
191    module import. For example, `import antigravity` normally causes a web browser to open, which we want to suppress.
192
193    Note that this function must not be used for security purposes, it's easily bypassable.
194    """
195    with patch("subprocess.Popen", new=_PdocDefusedPopen), patch(
196        "os.startfile", new=_noop, create=True
197    ), patch("sys.stdout", new=io.StringIO()), patch(
198        "sys.stderr", new=io.StringIO()
199    ), patch(
200        "sys.stdin", new=io.StringIO()
201    ):
202        yield
203
204
205@mock_some_common_side_effects()
206def load_module(module: str) -> types.ModuleType:
207    """Try to import a module. If import fails, a RuntimeError is raised.
208
209    Returns the imported module."""
210    try:
211        return importlib.import_module(module)
212    except AnyException as e:
213        raise RuntimeError(f"Error importing {module}") from e
214
215
216AnyException = (SystemExit, GeneratorExit, Exception)
217"""BaseException, but excluding KeyboardInterrupt.
218
219Modules may raise SystemExit on import (which we want to catch),
220but we don't want to catch a user's KeyboardInterrupt.
221"""
222
223
224def walk_packages2(
225    modules: Iterable[pkgutil.ModuleInfo],
226) -> Iterator[pkgutil.ModuleInfo]:
227    """
228    For a given list of modules, recursively yield their names and all their submodules' names.
229
230    This function is similar to `pkgutil.walk_packages`, but respects a package's `__all__` attribute if specified.
231    If `__all__` is defined, submodules not listed in `__all__` are excluded.
232    """
233
234    # noinspection PyDefaultArgument
235    def seen(p, m={}):  # pragma: no cover
236        if p in m:
237            return True
238        m[p] = True
239
240    for mod in modules:
241        yield mod
242
243        if mod.ispkg:
244            try:
245                module = load_module(mod.name)
246            except RuntimeError:
247                warnings.warn(f"Error loading {mod.name}:\n{traceback.format_exc()}")
248                continue
249
250            mod_all = getattr(module, "__all__", None)
251            # don't traverse path items we've seen before
252            path = [p for p in (getattr(module, "__path__", None) or []) if not seen(p)]
253
254            submodules = []
255            for submodule in pkgutil.iter_modules(path, f"{mod.name}."):
256                name = submodule.name.rpartition(".")[2]
257                if name == "__main__":
258                    continue  # https://github.com/mitmproxy/pdoc/issues/438
259                if (
260                    mod_all is None
261                    or name in mod_all
262                ):
263                    submodules.append(submodule)
264
265            yield from walk_packages2(submodules)
266
267
268def module_mtime(modulename: str) -> float | None:
269    """Returns the time the specified module file was last modified, or `None` if this cannot be determined.
270    The primary use of this is live-reloading modules on modification."""
271    try:
272        with mock_some_common_side_effects():
273            spec = importlib.util.find_spec(modulename)
274    except AnyException:
275        pass
276    else:
277        if spec is not None and spec.origin is not None:
278            return Path(spec.origin).stat().st_mtime
279    return None
280
281
282def invalidate_caches(module_name: str) -> None:
283    """
284    Invalidate module cache to allow live-reloading of modules.
285    """
286    # Getting this right is tricky – reloading modules causes a bunch of surprising side effects.
287    # Our current best effort is to call `importlib.reload` on all modules that start with module_name.
288    # We also exclude our own dependencies, which cause fun errors otherwise.
289    if module_name not in sys.modules:
290        return
291    if any(
292        module_name.startswith(f"{x}.") or x == module_name
293        for x in ("jinja2", "markupsafe", "markdown2", "pygments")
294    ):
295        return
296
297    # a more extreme alternative:
298    # filename = sys.modules[module_name].__file__
299    # if (
300    #    filename.startswith(sysconfig.get_path("platstdlib"))
301    #    or filename.startswith(sysconfig.get_path("stdlib"))
302    # ):
303    #     return
304
305    importlib.invalidate_caches()
306    linecache.clearcache()
307    pdoc.doc.Module.from_name.cache_clear()
308    pdoc.doc_ast._get_source.cache_clear()
309    pdoc.docstrings.convert.cache_clear()
310
311    prefix = f"{module_name}."
312    mods = sorted(
313        mod for mod in sys.modules if module_name == mod or mod.startswith(prefix)
314    )
315    for modname in mods:
316        if modname == "pdoc.render":
317            # pdoc.render is stateful after configure(), so we don't want to reload it.
318            continue
319        try:
320            if not isinstance(sys.modules[modname], types.ModuleType):
321                continue  # some funky stuff going on - one example is typing.io, which is a class.
322            with mock_some_common_side_effects():
323                importlib.reload(sys.modules[modname])
324        except AnyException:
325            warnings.warn(
326                f"Error reloading {modname}:\n{traceback.format_exc()}",
327                stacklevel=2,
328            )
def walk_specs(specs: collections.abc.Sequence[pathlib.Path | str]) -> list[str]:
33def walk_specs(specs: Sequence[Path | str]) -> list[str]:
34    """
35    This function processes a list of module specifications and returns a collection of module names, including all
36    submodules, that should be processed by pdoc.
37
38    A module specification can either be the name of an installed module, or the path to a specific file or package.
39    For example, the following strings are valid module specifications:
40
41     - `typing`
42     - `collections.abc`
43     - `./test/testdata/demo_long.py`
44     - `./test/testdata/demopackage`
45
46    *This function has side effects:* See `parse_spec`.
47    """
48    all_modules: dict[str, None] = {}
49    for spec in specs:
50
51        if isinstance(spec, str) and spec.startswith("!"):
52            ignore_pattern = re.compile(spec[1:])
53            all_modules = {
54                k: v for k, v in all_modules.items() if not ignore_pattern.match(k)
55            }
56            continue
57
58        modname = parse_spec(spec)
59
60        try:
61            with mock_some_common_side_effects():
62                modspec = importlib.util.find_spec(modname)
63                if modspec is None:
64                    raise ModuleNotFoundError(modname)
65        except AnyException:
66            warnings.warn(
67                f"Cannot find spec for {modname} (from {spec}):\n{traceback.format_exc()}",
68                stacklevel=2,
69            )
70        else:
71            mod_info = pkgutil.ModuleInfo(
72                None,  # type: ignore
73                name=modname,
74                ispkg=bool(modspec.submodule_search_locations),
75            )
76            for m in walk_packages2([mod_info]):
77                if m.name in all_modules:
78                    warnings.warn(
79                        f"The module specification {spec!r} adds a module named {m.name}, but a module with this name "
80                        f"has already been added. You may have accidentally repeated a module spec, or you are trying "
81                        f"to document two modules with the same filename from two different directories, which does "
82                        f"not work. Only one documentation page will be generated."
83                    )
84                all_modules[m.name] = None
85
86    if not all_modules:
87        raise ValueError(
88            f"No modules found matching spec: {', '.join(str(x) for x in specs)}"
89        )
90
91    return list(all_modules)

This function processes a list of module specifications and returns a collection of module names, including all submodules, that should be processed by pdoc.

A module specification can either be the name of an installed module, or the path to a specific file or package. For example, the following strings are valid module specifications:

  • typing
  • collections.abc
  • ./test/testdata/demo_long.py
  • ./test/testdata/demopackage

This function has side effects: See parse_spec.

def parse_spec(spec: pathlib.Path | str) -> str:
 94def parse_spec(spec: Path | str) -> str:
 95    """
 96    This functions parses a user's module specification into a module identifier that can be imported.
 97    If both a local file/directory and an importable module with the same name exist, a warning will be printed.
 98
 99    *This function has side effects:* `sys.path` will be amended if the specification is a path.
100    If this side effect is undesired, pass a module name instead.
101    """
102    pspec = Path(spec)
103    if isinstance(spec, str) and (os.sep in spec or (os.altsep and os.altsep in spec)):
104        # We have a path separator, so it's definitely a filepath.
105        spec = pspec
106
107    if isinstance(spec, str) and (pspec.is_file() or (pspec / "__init__.py").is_file()):
108        # We have a local file with this name, but is there also a module with the same name?
109        try:
110            with mock_some_common_side_effects():
111                modspec = importlib.util.find_spec(spec)
112                if modspec is None:
113                    raise ModuleNotFoundError
114        except AnyException:
115            # Module does not exist, use local file.
116            spec = pspec
117        else:
118            # Module does exist. We now check if the local file/directory is the same (e.g. after pip install -e),
119            # and emit a warning if that's not the case.
120            origin = (
121                Path(modspec.origin).absolute() if modspec.origin else Path("unknown")
122            )
123            local_dir = Path(spec).absolute()
124            if local_dir not in (origin, origin.parent):
125                warnings.warn(
126                    f"{spec!r} may refer to either the installed Python module or the local file/directory with the "
127                    f"same name. pdoc will document the installed module, prepend './' to force documentation of the "
128                    f"local file/directory.\n"
129                    f" - Module location: {origin}\n"
130                    f" - Local file/directory: {local_dir}",
131                    RuntimeWarning,
132                )
133
134    if isinstance(spec, Path):
135        if (spec.parent / "__init__.py").exists():
136            return parse_spec(spec.resolve().parent) + f".{spec.stem}"
137        parent_dir = str(spec.parent)
138        sys.path = [parent_dir] + [x for x in sys.path if x != parent_dir]
139        if spec.stem in sys.modules and sys.modules[spec.stem].__file__:
140            local_dir = spec.resolve()
141            file = sys.modules[spec.stem].__file__
142            assert file is not None  # make mypy happy
143            origin = Path(file).resolve()
144            if local_dir not in (origin, origin.parent, origin.with_suffix("")):
145                warnings.warn(
146                    f"pdoc cannot load {spec.stem!r} because a module with the same name is already imported in pdoc's "
147                    f"Python process. pdoc will document the loaded module from {origin} instead.",
148                    RuntimeWarning,
149                )
150        return spec.stem
151    else:
152        return spec

This functions parses a user's module specification into a module identifier that can be imported. If both a local file/directory and an importable module with the same name exist, a warning will be printed.

This function has side effects: sys.path will be amended if the specification is a path. If this side effect is undesired, pass a module name instead.

@contextmanager
def mock_some_common_side_effects():
188@contextmanager
189def mock_some_common_side_effects():
190    """
191    This context manager is applied when importing modules. It mocks some common side effects that may happen upon
192    module import. For example, `import antigravity` normally causes a web browser to open, which we want to suppress.
193
194    Note that this function must not be used for security purposes, it's easily bypassable.
195    """
196    with patch("subprocess.Popen", new=_PdocDefusedPopen), patch(
197        "os.startfile", new=_noop, create=True
198    ), patch("sys.stdout", new=io.StringIO()), patch(
199        "sys.stderr", new=io.StringIO()
200    ), patch(
201        "sys.stdin", new=io.StringIO()
202    ):
203        yield

This context manager is applied when importing modules. It mocks some common side effects that may happen upon module import. For example, import antigravity normally causes a web browser to open, which we want to suppress.

Note that this function must not be used for security purposes, it's easily bypassable.

@mock_some_common_side_effects()
def load_module(module: str) -> module:
206@mock_some_common_side_effects()
207def load_module(module: str) -> types.ModuleType:
208    """Try to import a module. If import fails, a RuntimeError is raised.
209
210    Returns the imported module."""
211    try:
212        return importlib.import_module(module)
213    except AnyException as e:
214        raise RuntimeError(f"Error importing {module}") from e

Try to import a module. If import fails, a RuntimeError is raised.

Returns the imported module.

AnyException = (<class 'SystemExit'>, <class 'GeneratorExit'>, <class 'Exception'>)

BaseException, but excluding KeyboardInterrupt.

Modules may raise SystemExit on import (which we want to catch), but we don't want to catch a user's KeyboardInterrupt.

def walk_packages2( modules: collections.abc.Iterable[pkgutil.ModuleInfo]) -> collections.abc.Iterator[pkgutil.ModuleInfo]:
225def walk_packages2(
226    modules: Iterable[pkgutil.ModuleInfo],
227) -> Iterator[pkgutil.ModuleInfo]:
228    """
229    For a given list of modules, recursively yield their names and all their submodules' names.
230
231    This function is similar to `pkgutil.walk_packages`, but respects a package's `__all__` attribute if specified.
232    If `__all__` is defined, submodules not listed in `__all__` are excluded.
233    """
234
235    # noinspection PyDefaultArgument
236    def seen(p, m={}):  # pragma: no cover
237        if p in m:
238            return True
239        m[p] = True
240
241    for mod in modules:
242        yield mod
243
244        if mod.ispkg:
245            try:
246                module = load_module(mod.name)
247            except RuntimeError:
248                warnings.warn(f"Error loading {mod.name}:\n{traceback.format_exc()}")
249                continue
250
251            mod_all = getattr(module, "__all__", None)
252            # don't traverse path items we've seen before
253            path = [p for p in (getattr(module, "__path__", None) or []) if not seen(p)]
254
255            submodules = []
256            for submodule in pkgutil.iter_modules(path, f"{mod.name}."):
257                name = submodule.name.rpartition(".")[2]
258                if name == "__main__":
259                    continue  # https://github.com/mitmproxy/pdoc/issues/438
260                if (
261                    mod_all is None
262                    or name in mod_all
263                ):
264                    submodules.append(submodule)
265
266            yield from walk_packages2(submodules)

For a given list of modules, recursively yield their names and all their submodules' names.

This function is similar to pkgutil.walk_packages, but respects a package's __all__ attribute if specified. If __all__ is defined, submodules not listed in __all__ are excluded.

def module_mtime(modulename: str) -> float | None:
269def module_mtime(modulename: str) -> float | None:
270    """Returns the time the specified module file was last modified, or `None` if this cannot be determined.
271    The primary use of this is live-reloading modules on modification."""
272    try:
273        with mock_some_common_side_effects():
274            spec = importlib.util.find_spec(modulename)
275    except AnyException:
276        pass
277    else:
278        if spec is not None and spec.origin is not None:
279            return Path(spec.origin).stat().st_mtime
280    return None

Returns the time the specified module file was last modified, or None if this cannot be determined. The primary use of this is live-reloading modules on modification.

def invalidate_caches(module_name: str) -> None:
283def invalidate_caches(module_name: str) -> None:
284    """
285    Invalidate module cache to allow live-reloading of modules.
286    """
287    # Getting this right is tricky – reloading modules causes a bunch of surprising side effects.
288    # Our current best effort is to call `importlib.reload` on all modules that start with module_name.
289    # We also exclude our own dependencies, which cause fun errors otherwise.
290    if module_name not in sys.modules:
291        return
292    if any(
293        module_name.startswith(f"{x}.") or x == module_name
294        for x in ("jinja2", "markupsafe", "markdown2", "pygments")
295    ):
296        return
297
298    # a more extreme alternative:
299    # filename = sys.modules[module_name].__file__
300    # if (
301    #    filename.startswith(sysconfig.get_path("platstdlib"))
302    #    or filename.startswith(sysconfig.get_path("stdlib"))
303    # ):
304    #     return
305
306    importlib.invalidate_caches()
307    linecache.clearcache()
308    pdoc.doc.Module.from_name.cache_clear()
309    pdoc.doc_ast._get_source.cache_clear()
310    pdoc.docstrings.convert.cache_clear()
311
312    prefix = f"{module_name}."
313    mods = sorted(
314        mod for mod in sys.modules if module_name == mod or mod.startswith(prefix)
315    )
316    for modname in mods:
317        if modname == "pdoc.render":
318            # pdoc.render is stateful after configure(), so we don't want to reload it.
319            continue
320        try:
321            if not isinstance(sys.modules[modname], types.ModuleType):
322                continue  # some funky stuff going on - one example is typing.io, which is a class.
323            with mock_some_common_side_effects():
324                importlib.reload(sys.modules[modname])
325        except AnyException:
326            warnings.warn(
327                f"Error reloading {modname}:\n{traceback.format_exc()}",
328                stacklevel=2,
329            )

Invalidate module cache to allow live-reloading of modules.