pdoc.extract
This module handles the interaction with Python's module system, that is it loads the correct module based on whatever the user specified, and provides the rest of pdoc with some additional module metadata.
1""" 2This module handles the interaction with Python's module system, 3that is it loads the correct module based on whatever the user specified, 4and provides the rest of pdoc with some additional module metadata. 5""" 6from __future__ import annotations 7 8from collections.abc import Iterable 9from collections.abc import Iterator 10from collections.abc import Sequence 11from contextlib import contextmanager 12import importlib.util 13import io 14import linecache 15import os 16from pathlib import Path 17import pkgutil 18import platform 19import re 20import shutil 21import subprocess 22import sys 23import traceback 24import types 25from unittest.mock import patch 26import warnings 27 28import pdoc.doc_ast 29import pdoc.docstrings 30 31 32def walk_specs(specs: Sequence[Path | str]) -> list[str]: 33 """ 34 This function processes a list of module specifications and returns a collection of module names, including all 35 submodules, that should be processed by pdoc. 36 37 A module specification can either be the name of an installed module, or the path to a specific file or package. 38 For example, the following strings are valid module specifications: 39 40 - `typing` 41 - `collections.abc` 42 - `./test/testdata/demo_long.py` 43 - `./test/testdata/demopackage` 44 45 *This function has side effects:* See `parse_spec`. 46 """ 47 all_modules: dict[str, None] = {} 48 for spec in specs: 49 if isinstance(spec, str) and spec.startswith("!"): 50 ignore_pattern = re.compile(spec[1:]) 51 all_modules = { 52 k: v for k, v in all_modules.items() if not ignore_pattern.match(k) 53 } 54 continue 55 56 modname = parse_spec(spec) 57 58 try: 59 with mock_some_common_side_effects(): 60 modspec = importlib.util.find_spec(modname) 61 if modspec is None: 62 raise ModuleNotFoundError(modname) 63 except AnyException: 64 warnings.warn( 65 f"Cannot find spec for {modname} (from {spec}):\n{traceback.format_exc()}", 66 stacklevel=2, 67 ) 68 else: 69 mod_info = pkgutil.ModuleInfo( 70 None, # type: ignore 71 name=modname, 72 ispkg=bool(modspec.submodule_search_locations), 73 ) 74 for m in walk_packages2([mod_info]): 75 if m.name in all_modules: 76 warnings.warn( 77 f"The module specification {spec!r} adds a module named {m.name}, but a module with this name " 78 f"has already been added. You may have accidentally repeated a module spec, or you are trying " 79 f"to document two modules with the same filename from two different directories, which does " 80 f"not work. Only one documentation page will be generated." 81 ) 82 all_modules[m.name] = None 83 84 if not all_modules: 85 raise ValueError( 86 f"No modules found matching spec: {', '.join(str(x) for x in specs)}" 87 ) 88 89 return list(all_modules) 90 91 92def parse_spec(spec: Path | str) -> str: 93 """ 94 This functions parses a user's module specification into a module identifier that can be imported. 95 If both a local file/directory and an importable module with the same name exist, a warning will be printed. 96 97 *This function has side effects:* `sys.path` will be amended if the specification is a path. 98 If this side effect is undesired, pass a module name instead. 99 """ 100 pspec = Path(spec) 101 if isinstance(spec, str) and (os.sep in spec or (os.altsep and os.altsep in spec)): 102 # We have a path separator, so it's definitely a filepath. 103 spec = pspec 104 105 if isinstance(spec, str) and (pspec.is_file() or (pspec / "__init__.py").is_file()): 106 # We have a local file with this name, but is there also a module with the same name? 107 try: 108 with mock_some_common_side_effects(): 109 modspec = importlib.util.find_spec(spec) 110 if modspec is None: 111 raise ModuleNotFoundError 112 except AnyException: 113 # Module does not exist, use local file. 114 spec = pspec 115 else: 116 # Module does exist. We now check if the local file/directory is the same (e.g. after pip install -e), 117 # and emit a warning if that's not the case. 118 origin = ( 119 Path(modspec.origin).absolute() if modspec.origin else Path("unknown") 120 ) 121 local_dir = Path(spec).absolute() 122 if local_dir not in (origin, origin.parent): 123 warnings.warn( 124 f"{spec!r} may refer to either the installed Python module or the local file/directory with the " 125 f"same name. pdoc will document the installed module, prepend './' to force documentation of the " 126 f"local file/directory.\n" 127 f" - Module location: {origin}\n" 128 f" - Local file/directory: {local_dir}", 129 RuntimeWarning, 130 ) 131 132 if isinstance(spec, Path): 133 if spec.name == "__init__.py": 134 spec = spec.parent 135 if (spec.parent / "__init__.py").exists(): 136 return parse_spec(spec.resolve().parent) + f".{spec.stem}" 137 parent_dir = str(spec.parent) 138 sys.path = [parent_dir] + [x for x in sys.path if x != parent_dir] 139 if spec.stem in sys.modules and sys.modules[spec.stem].__file__: 140 local_dir = spec.resolve() 141 file = sys.modules[spec.stem].__file__ 142 assert file is not None # make mypy happy 143 origin = Path(file).resolve() 144 if local_dir not in (origin, origin.parent, origin.with_suffix("")): 145 warnings.warn( 146 f"pdoc cannot load {spec.stem!r} because a module with the same name is already imported in pdoc's " 147 f"Python process. pdoc will document the loaded module from {origin} instead.", 148 RuntimeWarning, 149 ) 150 return spec.stem 151 else: 152 return spec 153 154 155def _noop(*args, **kwargs): 156 pass 157 158 159class _PdocDefusedPopen(subprocess.Popen): 160 """A small wrapper around subprocess.Popen that converts most executions into no-ops.""" 161 162 if platform.system() == "Windows": # pragma: no cover 163 _noop_exe = "echo.exe" 164 else: # pragma: no cover 165 _noop_exe = "echo" 166 167 def __init__(self, *args, **kwargs): # pragma: no cover 168 command_allowed = ( 169 args 170 and args[0] 171 and args[0][0] 172 in ( 173 # these invocations may all come from https://github.com/python/cpython/blob/main/Lib/ctypes/util.py, 174 # which we want to keep working. 175 "/sbin/ldconfig", 176 "ld", 177 shutil.which("gcc") or shutil.which("cc"), 178 shutil.which("objdump"), 179 # https://github.com/mitmproxy/pdoc/issues/430: GitPython invokes git commands, which is also fine. 180 "git", 181 ) 182 ) 183 if not command_allowed and os.environ.get("PDOC_ALLOW_EXEC", "") == "": 184 # sys.stderr is patched, so we need to unpatch it for printing a warning. 185 with patch("sys.stderr", new=sys.__stderr__): 186 warnings.warn( 187 f"Suppressed execution of {args[0]!r} during import. " 188 f"Set PDOC_ALLOW_EXEC=1 as an environment variable to allow subprocess execution.", 189 stacklevel=2, 190 ) 191 kwargs["executable"] = self._noop_exe 192 super().__init__(*args, **kwargs) 193 194 195@contextmanager 196def mock_some_common_side_effects(): 197 """ 198 This context manager is applied when importing modules. It mocks some common side effects that may happen upon 199 module import. For example, `import antigravity` normally causes a web browser to open, which we want to suppress. 200 201 Note that this function must not be used for security purposes, it's easily bypassable. 202 """ 203 with patch("subprocess.Popen", new=_PdocDefusedPopen), patch( 204 "os.startfile", new=_noop, create=True 205 ), patch("sys.stdout", new=io.StringIO()), patch( 206 "sys.stderr", new=io.StringIO() 207 ), patch( 208 "sys.stdin", new=io.StringIO() 209 ): 210 yield 211 212 213@mock_some_common_side_effects() 214def load_module(module: str) -> types.ModuleType: 215 """Try to import a module. If import fails, a RuntimeError is raised. 216 217 Returns the imported module.""" 218 try: 219 return importlib.import_module(module) 220 except AnyException as e: 221 raise RuntimeError(f"Error importing {module}") from e 222 223 224AnyException = (SystemExit, GeneratorExit, Exception) 225"""BaseException, but excluding KeyboardInterrupt. 226 227Modules may raise SystemExit on import (which we want to catch), 228but we don't want to catch a user's KeyboardInterrupt. 229""" 230 231 232def walk_packages2( 233 modules: Iterable[pkgutil.ModuleInfo], 234) -> Iterator[pkgutil.ModuleInfo]: 235 """ 236 For a given list of modules, recursively yield their names and all their submodules' names. 237 238 This function is similar to `pkgutil.walk_packages`, but respects a package's `__all__` attribute if specified. 239 If `__all__` is defined, submodules not listed in `__all__` are excluded. 240 """ 241 242 # noinspection PyDefaultArgument 243 def seen(p, m={}): # pragma: no cover 244 if p in m: 245 return True 246 m[p] = True 247 248 for mod in modules: 249 yield mod 250 251 if mod.ispkg: 252 try: 253 module = load_module(mod.name) 254 except RuntimeError: 255 warnings.warn(f"Error loading {mod.name}:\n{traceback.format_exc()}") 256 continue 257 258 mod_all = getattr(module, "__all__", None) 259 # don't traverse path items we've seen before 260 path = [p for p in (getattr(module, "__path__", None) or []) if not seen(p)] 261 262 submodules = [] 263 for submodule in pkgutil.iter_modules(path, f"{mod.name}."): 264 name = submodule.name.rpartition(".")[2] 265 if name == "__main__": 266 continue # https://github.com/mitmproxy/pdoc/issues/438 267 if mod_all is None or name in mod_all: 268 submodules.append(submodule) 269 270 yield from walk_packages2(submodules) 271 272 273def module_mtime(modulename: str) -> float | None: 274 """Returns the time the specified module file was last modified, or `None` if this cannot be determined. 275 The primary use of this is live-reloading modules on modification.""" 276 try: 277 with mock_some_common_side_effects(): 278 spec = importlib.util.find_spec(modulename) 279 except AnyException: 280 pass 281 else: 282 if spec is not None and spec.origin is not None: 283 return Path(spec.origin).stat().st_mtime 284 return None 285 286 287def invalidate_caches(module_name: str) -> None: 288 """ 289 Invalidate module cache to allow live-reloading of modules. 290 """ 291 # Getting this right is tricky – reloading modules causes a bunch of surprising side effects. 292 # Our current best effort is to call `importlib.reload` on all modules that start with module_name. 293 # We also exclude our own dependencies, which cause fun errors otherwise. 294 if module_name not in sys.modules: 295 return 296 if any( 297 module_name.startswith(f"{x}.") or x == module_name 298 for x in ("jinja2", "markupsafe", "markdown2", "pygments") 299 ): 300 return 301 302 # a more extreme alternative: 303 # filename = sys.modules[module_name].__file__ 304 # if ( 305 # filename.startswith(sysconfig.get_path("platstdlib")) 306 # or filename.startswith(sysconfig.get_path("stdlib")) 307 # ): 308 # return 309 310 importlib.invalidate_caches() 311 linecache.clearcache() 312 pdoc.doc.Module.from_name.cache_clear() 313 pdoc.doc_ast._get_source.cache_clear() 314 pdoc.docstrings.convert.cache_clear() 315 316 prefix = f"{module_name}." 317 mods = sorted( 318 mod for mod in sys.modules if module_name == mod or mod.startswith(prefix) 319 ) 320 for modname in mods: 321 if modname == "pdoc.render": 322 # pdoc.render is stateful after configure(), so we don't want to reload it. 323 continue 324 try: 325 if not isinstance(sys.modules[modname], types.ModuleType): 326 continue # some funky stuff going on - one example is typing.io, which is a class. 327 with mock_some_common_side_effects(): 328 importlib.reload(sys.modules[modname]) 329 except AnyException: 330 warnings.warn( 331 f"Error reloading {modname}:\n{traceback.format_exc()}", 332 stacklevel=2, 333 )
33def walk_specs(specs: Sequence[Path | str]) -> list[str]: 34 """ 35 This function processes a list of module specifications and returns a collection of module names, including all 36 submodules, that should be processed by pdoc. 37 38 A module specification can either be the name of an installed module, or the path to a specific file or package. 39 For example, the following strings are valid module specifications: 40 41 - `typing` 42 - `collections.abc` 43 - `./test/testdata/demo_long.py` 44 - `./test/testdata/demopackage` 45 46 *This function has side effects:* See `parse_spec`. 47 """ 48 all_modules: dict[str, None] = {} 49 for spec in specs: 50 if isinstance(spec, str) and spec.startswith("!"): 51 ignore_pattern = re.compile(spec[1:]) 52 all_modules = { 53 k: v for k, v in all_modules.items() if not ignore_pattern.match(k) 54 } 55 continue 56 57 modname = parse_spec(spec) 58 59 try: 60 with mock_some_common_side_effects(): 61 modspec = importlib.util.find_spec(modname) 62 if modspec is None: 63 raise ModuleNotFoundError(modname) 64 except AnyException: 65 warnings.warn( 66 f"Cannot find spec for {modname} (from {spec}):\n{traceback.format_exc()}", 67 stacklevel=2, 68 ) 69 else: 70 mod_info = pkgutil.ModuleInfo( 71 None, # type: ignore 72 name=modname, 73 ispkg=bool(modspec.submodule_search_locations), 74 ) 75 for m in walk_packages2([mod_info]): 76 if m.name in all_modules: 77 warnings.warn( 78 f"The module specification {spec!r} adds a module named {m.name}, but a module with this name " 79 f"has already been added. You may have accidentally repeated a module spec, or you are trying " 80 f"to document two modules with the same filename from two different directories, which does " 81 f"not work. Only one documentation page will be generated." 82 ) 83 all_modules[m.name] = None 84 85 if not all_modules: 86 raise ValueError( 87 f"No modules found matching spec: {', '.join(str(x) for x in specs)}" 88 ) 89 90 return list(all_modules)
This function processes a list of module specifications and returns a collection of module names, including all submodules, that should be processed by pdoc.
A module specification can either be the name of an installed module, or the path to a specific file or package. For example, the following strings are valid module specifications:
typing
collections.abc
./test/testdata/demo_long.py
./test/testdata/demopackage
This function has side effects: See parse_spec
.
93def parse_spec(spec: Path | str) -> str: 94 """ 95 This functions parses a user's module specification into a module identifier that can be imported. 96 If both a local file/directory and an importable module with the same name exist, a warning will be printed. 97 98 *This function has side effects:* `sys.path` will be amended if the specification is a path. 99 If this side effect is undesired, pass a module name instead. 100 """ 101 pspec = Path(spec) 102 if isinstance(spec, str) and (os.sep in spec or (os.altsep and os.altsep in spec)): 103 # We have a path separator, so it's definitely a filepath. 104 spec = pspec 105 106 if isinstance(spec, str) and (pspec.is_file() or (pspec / "__init__.py").is_file()): 107 # We have a local file with this name, but is there also a module with the same name? 108 try: 109 with mock_some_common_side_effects(): 110 modspec = importlib.util.find_spec(spec) 111 if modspec is None: 112 raise ModuleNotFoundError 113 except AnyException: 114 # Module does not exist, use local file. 115 spec = pspec 116 else: 117 # Module does exist. We now check if the local file/directory is the same (e.g. after pip install -e), 118 # and emit a warning if that's not the case. 119 origin = ( 120 Path(modspec.origin).absolute() if modspec.origin else Path("unknown") 121 ) 122 local_dir = Path(spec).absolute() 123 if local_dir not in (origin, origin.parent): 124 warnings.warn( 125 f"{spec!r} may refer to either the installed Python module or the local file/directory with the " 126 f"same name. pdoc will document the installed module, prepend './' to force documentation of the " 127 f"local file/directory.\n" 128 f" - Module location: {origin}\n" 129 f" - Local file/directory: {local_dir}", 130 RuntimeWarning, 131 ) 132 133 if isinstance(spec, Path): 134 if spec.name == "__init__.py": 135 spec = spec.parent 136 if (spec.parent / "__init__.py").exists(): 137 return parse_spec(spec.resolve().parent) + f".{spec.stem}" 138 parent_dir = str(spec.parent) 139 sys.path = [parent_dir] + [x for x in sys.path if x != parent_dir] 140 if spec.stem in sys.modules and sys.modules[spec.stem].__file__: 141 local_dir = spec.resolve() 142 file = sys.modules[spec.stem].__file__ 143 assert file is not None # make mypy happy 144 origin = Path(file).resolve() 145 if local_dir not in (origin, origin.parent, origin.with_suffix("")): 146 warnings.warn( 147 f"pdoc cannot load {spec.stem!r} because a module with the same name is already imported in pdoc's " 148 f"Python process. pdoc will document the loaded module from {origin} instead.", 149 RuntimeWarning, 150 ) 151 return spec.stem 152 else: 153 return spec
This functions parses a user's module specification into a module identifier that can be imported. If both a local file/directory and an importable module with the same name exist, a warning will be printed.
This function has side effects: sys.path
will be amended if the specification is a path.
If this side effect is undesired, pass a module name instead.
196@contextmanager 197def mock_some_common_side_effects(): 198 """ 199 This context manager is applied when importing modules. It mocks some common side effects that may happen upon 200 module import. For example, `import antigravity` normally causes a web browser to open, which we want to suppress. 201 202 Note that this function must not be used for security purposes, it's easily bypassable. 203 """ 204 with patch("subprocess.Popen", new=_PdocDefusedPopen), patch( 205 "os.startfile", new=_noop, create=True 206 ), patch("sys.stdout", new=io.StringIO()), patch( 207 "sys.stderr", new=io.StringIO() 208 ), patch( 209 "sys.stdin", new=io.StringIO() 210 ): 211 yield
This context manager is applied when importing modules. It mocks some common side effects that may happen upon
module import. For example, import antigravity
normally causes a web browser to open, which we want to suppress.
Note that this function must not be used for security purposes, it's easily bypassable.
214@mock_some_common_side_effects() 215def load_module(module: str) -> types.ModuleType: 216 """Try to import a module. If import fails, a RuntimeError is raised. 217 218 Returns the imported module.""" 219 try: 220 return importlib.import_module(module) 221 except AnyException as e: 222 raise RuntimeError(f"Error importing {module}") from e
Try to import a module. If import fails, a RuntimeError is raised.
Returns the imported module.
BaseException, but excluding KeyboardInterrupt.
Modules may raise SystemExit on import (which we want to catch), but we don't want to catch a user's KeyboardInterrupt.
233def walk_packages2( 234 modules: Iterable[pkgutil.ModuleInfo], 235) -> Iterator[pkgutil.ModuleInfo]: 236 """ 237 For a given list of modules, recursively yield their names and all their submodules' names. 238 239 This function is similar to `pkgutil.walk_packages`, but respects a package's `__all__` attribute if specified. 240 If `__all__` is defined, submodules not listed in `__all__` are excluded. 241 """ 242 243 # noinspection PyDefaultArgument 244 def seen(p, m={}): # pragma: no cover 245 if p in m: 246 return True 247 m[p] = True 248 249 for mod in modules: 250 yield mod 251 252 if mod.ispkg: 253 try: 254 module = load_module(mod.name) 255 except RuntimeError: 256 warnings.warn(f"Error loading {mod.name}:\n{traceback.format_exc()}") 257 continue 258 259 mod_all = getattr(module, "__all__", None) 260 # don't traverse path items we've seen before 261 path = [p for p in (getattr(module, "__path__", None) or []) if not seen(p)] 262 263 submodules = [] 264 for submodule in pkgutil.iter_modules(path, f"{mod.name}."): 265 name = submodule.name.rpartition(".")[2] 266 if name == "__main__": 267 continue # https://github.com/mitmproxy/pdoc/issues/438 268 if mod_all is None or name in mod_all: 269 submodules.append(submodule) 270 271 yield from walk_packages2(submodules)
For a given list of modules, recursively yield their names and all their submodules' names.
This function is similar to pkgutil.walk_packages
, but respects a package's __all__
attribute if specified.
If __all__
is defined, submodules not listed in __all__
are excluded.
274def module_mtime(modulename: str) -> float | None: 275 """Returns the time the specified module file was last modified, or `None` if this cannot be determined. 276 The primary use of this is live-reloading modules on modification.""" 277 try: 278 with mock_some_common_side_effects(): 279 spec = importlib.util.find_spec(modulename) 280 except AnyException: 281 pass 282 else: 283 if spec is not None and spec.origin is not None: 284 return Path(spec.origin).stat().st_mtime 285 return None
Returns the time the specified module file was last modified, or None
if this cannot be determined.
The primary use of this is live-reloading modules on modification.
288def invalidate_caches(module_name: str) -> None: 289 """ 290 Invalidate module cache to allow live-reloading of modules. 291 """ 292 # Getting this right is tricky – reloading modules causes a bunch of surprising side effects. 293 # Our current best effort is to call `importlib.reload` on all modules that start with module_name. 294 # We also exclude our own dependencies, which cause fun errors otherwise. 295 if module_name not in sys.modules: 296 return 297 if any( 298 module_name.startswith(f"{x}.") or x == module_name 299 for x in ("jinja2", "markupsafe", "markdown2", "pygments") 300 ): 301 return 302 303 # a more extreme alternative: 304 # filename = sys.modules[module_name].__file__ 305 # if ( 306 # filename.startswith(sysconfig.get_path("platstdlib")) 307 # or filename.startswith(sysconfig.get_path("stdlib")) 308 # ): 309 # return 310 311 importlib.invalidate_caches() 312 linecache.clearcache() 313 pdoc.doc.Module.from_name.cache_clear() 314 pdoc.doc_ast._get_source.cache_clear() 315 pdoc.docstrings.convert.cache_clear() 316 317 prefix = f"{module_name}." 318 mods = sorted( 319 mod for mod in sys.modules if module_name == mod or mod.startswith(prefix) 320 ) 321 for modname in mods: 322 if modname == "pdoc.render": 323 # pdoc.render is stateful after configure(), so we don't want to reload it. 324 continue 325 try: 326 if not isinstance(sys.modules[modname], types.ModuleType): 327 continue # some funky stuff going on - one example is typing.io, which is a class. 328 with mock_some_common_side_effects(): 329 importlib.reload(sys.modules[modname]) 330 except AnyException: 331 warnings.warn( 332 f"Error reloading {modname}:\n{traceback.format_exc()}", 333 stacklevel=2, 334 )
Invalidate module cache to allow live-reloading of modules.