Edit on GitHub

pdoc.web

This module implements pdoc's live-reloading webserver.

We want to keep the number of dependencies as small as possible, so we are content with the builtin http.server module. It is a bit unergonomic compared to let's say flask, but good enough for our purposes.

  1"""
  2This module implements pdoc's live-reloading webserver.
  3
  4We want to keep the number of dependencies as small as possible,
  5so we are content with the builtin `http.server` module.
  6It is a bit unergonomic compared to let's say flask, but good enough for our purposes.
  7"""
  8
  9from __future__ import annotations
 10
 11from collections.abc import Iterable
 12from collections.abc import Iterator
 13from functools import cache
 14import http.server
 15import traceback
 16from typing import Mapping
 17import urllib.parse
 18import warnings
 19import webbrowser
 20
 21from pdoc import doc
 22from pdoc import extract
 23from pdoc import render
 24
 25
 26class DocHandler(http.server.BaseHTTPRequestHandler):
 27    """A handler for individual requests."""
 28
 29    server: DocServer
 30    """A reference to the main web server."""
 31
 32    def do_HEAD(self):
 33        try:
 34            return self.handle_request()
 35        except ConnectionError:  # pragma: no cover
 36            pass
 37
 38    def do_GET(self):
 39        try:
 40            self.wfile.write(self.handle_request().encode())
 41        except ConnectionError:  # pragma: no cover
 42            pass
 43
 44    def handle_request(self) -> str:
 45        """Actually handle a request. Called by `do_HEAD` and `do_GET`."""
 46        path = self.path.split("?", 1)[0]
 47
 48        if path == "/" or path == "/index.html":
 49            out = render.html_index(self.server.all_modules)
 50        elif path == "/search.js":
 51            self.send_response(200)
 52            self.send_header("content-type", "application/javascript")
 53            self.end_headers()
 54            return self.server.render_search_index()
 55        elif "." in path.removesuffix(".html"):
 56            # See https://github.com/mitmproxy/pdoc/issues/615: All module separators should be normalized to "/".
 57            # We could redirect here, but that would create the impression of a working link, which will fall apart
 58            # when pdoc prerenders to static HTML. So we rather fail early.
 59            self.send_response(404)
 60            self.end_headers()
 61            return "Not Found: Please normalize all module separators to '/'."
 62        else:
 63            module_name = path.lstrip("/").removesuffix(".html").replace("/", ".")
 64            module_name = urllib.parse.unquote(module_name)
 65            if module_name not in self.server.all_modules:
 66                self.send_response(404)
 67                self.send_header("content-type", "text/html")
 68                self.end_headers()
 69                return render.html_error(error=f"Module {module_name!r} not found")
 70
 71            mtime = ""
 72            t = extract.module_mtime(module_name)
 73            if t:
 74                mtime = f"{t:.1f}"
 75            if "mtime=1" in self.path:
 76                self.send_response(200)
 77                self.send_header("content-type", "text/plain")
 78                self.end_headers()
 79                return mtime
 80
 81            try:
 82                extract.invalidate_caches(module_name)
 83                mod = self.server.all_modules[module_name]
 84                out = render.html_module(
 85                    module=mod,
 86                    all_modules=self.server.all_modules,
 87                    mtime=mtime,
 88                )
 89            except Exception:
 90                self.send_response(500)
 91                self.send_header("content-type", "text/html")
 92                self.end_headers()
 93                return render.html_error(
 94                    error=f"Error importing {module_name!r}",
 95                    details=traceback.format_exc(),
 96                )
 97
 98        self.send_response(200)
 99        self.send_header("content-type", "text/html")
100        self.end_headers()
101        return out
102
103    def log_request(self, code: int | str = ..., size: int | str = ...) -> None:
104        """Override logging to disable it."""
105
106
107class DocServer(http.server.HTTPServer):
108    """pdoc's live-reloading web server"""
109
110    all_modules: AllModules
111
112    def __init__(self, addr: tuple[str, int], specs: list[str], **kwargs):
113        super().__init__(addr, DocHandler, **kwargs)  # type: ignore
114        module_names = extract.walk_specs(specs)
115        self.all_modules = AllModules(module_names)
116
117    @cache
118    def render_search_index(self) -> str:
119        """Render the search index. For performance reasons this is always cached."""
120        # Some modules may not be importable, which means that they would raise an RuntimeError
121        # when accessed. We "fix" this by pre-loading all modules here and only passing the ones that work.
122        all_modules_safe = {}
123        for mod in self.all_modules:
124            try:
125                all_modules_safe[mod] = doc.Module.from_name(mod)
126            except RuntimeError:
127                warnings.warn(f"Error importing {mod!r}:\n{traceback.format_exc()}")
128        return render.search_index(all_modules_safe)
129
130
131class AllModules(Mapping[str, doc.Module]):
132    """A lazy-loading implementation of all_modules.
133
134    This behaves like a regular dict, but modules are only imported on demand for performance reasons.
135    This has the somewhat annoying side effect that __getitem__ may raise a RuntimeError.
136    We can ignore that when rendering HTML as the default templates do not access all_modules values,
137    but we need to perform additional steps for the search index.
138    """
139
140    def __init__(self, allowed_modules: Iterable[str]):
141        # use a dict to preserve order
142        self.allowed_modules: dict[str, None] = dict.fromkeys(allowed_modules)
143
144    def __len__(self) -> int:
145        return self.allowed_modules.__len__()
146
147    def __iter__(self) -> Iterator[str]:
148        return self.allowed_modules.__iter__()
149
150    def __contains__(self, item):
151        return self.allowed_modules.__contains__(item)
152
153    def __getitem__(self, item: str):
154        if item in self.allowed_modules:
155            return doc.Module.from_name(item)
156        else:  # pragma: no cover
157            raise KeyError(item)
158
159
160# https://github.com/mitmproxy/mitmproxy/blob/af3dfac85541ce06c0e3302a4ba495fe3c77b18a/mitmproxy/tools/web/webaddons.py#L35-L61
161def open_browser(url: str) -> bool:  # pragma: no cover
162    """
163    Open a URL in a browser window.
164    In contrast to `webbrowser.open`, we limit the list of suitable browsers.
165    This gracefully degrades to a no-op on headless servers, where `webbrowser.open`
166    would otherwise open lynx.
167
168    Returns:
169
170    - `True`, if a browser has been opened
171    - `False`, if no suitable browser has been found.
172    """
173    browsers = (
174        "windows-default",
175        "macosx",
176        "wslview %s",
177        "x-www-browser %s",
178        "gnome-open %s",
179        "google-chrome",
180        "chrome",
181        "chromium",
182        "chromium-browser",
183        "firefox",
184        "opera",
185        "safari",
186    )
187    for browser in browsers:
188        try:
189            b = webbrowser.get(browser)
190        except webbrowser.Error:
191            pass
192        else:
193            if b.open(url):
194                return True
195    return False
class DocHandler(http.server.BaseHTTPRequestHandler):
 27class DocHandler(http.server.BaseHTTPRequestHandler):
 28    """A handler for individual requests."""
 29
 30    server: DocServer
 31    """A reference to the main web server."""
 32
 33    def do_HEAD(self):
 34        try:
 35            return self.handle_request()
 36        except ConnectionError:  # pragma: no cover
 37            pass
 38
 39    def do_GET(self):
 40        try:
 41            self.wfile.write(self.handle_request().encode())
 42        except ConnectionError:  # pragma: no cover
 43            pass
 44
 45    def handle_request(self) -> str:
 46        """Actually handle a request. Called by `do_HEAD` and `do_GET`."""
 47        path = self.path.split("?", 1)[0]
 48
 49        if path == "/" or path == "/index.html":
 50            out = render.html_index(self.server.all_modules)
 51        elif path == "/search.js":
 52            self.send_response(200)
 53            self.send_header("content-type", "application/javascript")
 54            self.end_headers()
 55            return self.server.render_search_index()
 56        elif "." in path.removesuffix(".html"):
 57            # See https://github.com/mitmproxy/pdoc/issues/615: All module separators should be normalized to "/".
 58            # We could redirect here, but that would create the impression of a working link, which will fall apart
 59            # when pdoc prerenders to static HTML. So we rather fail early.
 60            self.send_response(404)
 61            self.end_headers()
 62            return "Not Found: Please normalize all module separators to '/'."
 63        else:
 64            module_name = path.lstrip("/").removesuffix(".html").replace("/", ".")
 65            module_name = urllib.parse.unquote(module_name)
 66            if module_name not in self.server.all_modules:
 67                self.send_response(404)
 68                self.send_header("content-type", "text/html")
 69                self.end_headers()
 70                return render.html_error(error=f"Module {module_name!r} not found")
 71
 72            mtime = ""
 73            t = extract.module_mtime(module_name)
 74            if t:
 75                mtime = f"{t:.1f}"
 76            if "mtime=1" in self.path:
 77                self.send_response(200)
 78                self.send_header("content-type", "text/plain")
 79                self.end_headers()
 80                return mtime
 81
 82            try:
 83                extract.invalidate_caches(module_name)
 84                mod = self.server.all_modules[module_name]
 85                out = render.html_module(
 86                    module=mod,
 87                    all_modules=self.server.all_modules,
 88                    mtime=mtime,
 89                )
 90            except Exception:
 91                self.send_response(500)
 92                self.send_header("content-type", "text/html")
 93                self.end_headers()
 94                return render.html_error(
 95                    error=f"Error importing {module_name!r}",
 96                    details=traceback.format_exc(),
 97                )
 98
 99        self.send_response(200)
100        self.send_header("content-type", "text/html")
101        self.end_headers()
102        return out
103
104    def log_request(self, code: int | str = ..., size: int | str = ...) -> None:
105        """Override logging to disable it."""

A handler for individual requests.

server: DocServer

A reference to the main web server.

def do_HEAD(self):
33    def do_HEAD(self):
34        try:
35            return self.handle_request()
36        except ConnectionError:  # pragma: no cover
37            pass
def do_GET(self):
39    def do_GET(self):
40        try:
41            self.wfile.write(self.handle_request().encode())
42        except ConnectionError:  # pragma: no cover
43            pass
def handle_request(self) -> str:
 45    def handle_request(self) -> str:
 46        """Actually handle a request. Called by `do_HEAD` and `do_GET`."""
 47        path = self.path.split("?", 1)[0]
 48
 49        if path == "/" or path == "/index.html":
 50            out = render.html_index(self.server.all_modules)
 51        elif path == "/search.js":
 52            self.send_response(200)
 53            self.send_header("content-type", "application/javascript")
 54            self.end_headers()
 55            return self.server.render_search_index()
 56        elif "." in path.removesuffix(".html"):
 57            # See https://github.com/mitmproxy/pdoc/issues/615: All module separators should be normalized to "/".
 58            # We could redirect here, but that would create the impression of a working link, which will fall apart
 59            # when pdoc prerenders to static HTML. So we rather fail early.
 60            self.send_response(404)
 61            self.end_headers()
 62            return "Not Found: Please normalize all module separators to '/'."
 63        else:
 64            module_name = path.lstrip("/").removesuffix(".html").replace("/", ".")
 65            module_name = urllib.parse.unquote(module_name)
 66            if module_name not in self.server.all_modules:
 67                self.send_response(404)
 68                self.send_header("content-type", "text/html")
 69                self.end_headers()
 70                return render.html_error(error=f"Module {module_name!r} not found")
 71
 72            mtime = ""
 73            t = extract.module_mtime(module_name)
 74            if t:
 75                mtime = f"{t:.1f}"
 76            if "mtime=1" in self.path:
 77                self.send_response(200)
 78                self.send_header("content-type", "text/plain")
 79                self.end_headers()
 80                return mtime
 81
 82            try:
 83                extract.invalidate_caches(module_name)
 84                mod = self.server.all_modules[module_name]
 85                out = render.html_module(
 86                    module=mod,
 87                    all_modules=self.server.all_modules,
 88                    mtime=mtime,
 89                )
 90            except Exception:
 91                self.send_response(500)
 92                self.send_header("content-type", "text/html")
 93                self.end_headers()
 94                return render.html_error(
 95                    error=f"Error importing {module_name!r}",
 96                    details=traceback.format_exc(),
 97                )
 98
 99        self.send_response(200)
100        self.send_header("content-type", "text/html")
101        self.end_headers()
102        return out

Actually handle a request. Called by do_HEAD and do_GET.

def log_request(self, code: int | str = Ellipsis, size: int | str = Ellipsis) -> None:
104    def log_request(self, code: int | str = ..., size: int | str = ...) -> None:
105        """Override logging to disable it."""

Override logging to disable it.

class DocServer(http.server.HTTPServer):
108class DocServer(http.server.HTTPServer):
109    """pdoc's live-reloading web server"""
110
111    all_modules: AllModules
112
113    def __init__(self, addr: tuple[str, int], specs: list[str], **kwargs):
114        super().__init__(addr, DocHandler, **kwargs)  # type: ignore
115        module_names = extract.walk_specs(specs)
116        self.all_modules = AllModules(module_names)
117
118    @cache
119    def render_search_index(self) -> str:
120        """Render the search index. For performance reasons this is always cached."""
121        # Some modules may not be importable, which means that they would raise an RuntimeError
122        # when accessed. We "fix" this by pre-loading all modules here and only passing the ones that work.
123        all_modules_safe = {}
124        for mod in self.all_modules:
125            try:
126                all_modules_safe[mod] = doc.Module.from_name(mod)
127            except RuntimeError:
128                warnings.warn(f"Error importing {mod!r}:\n{traceback.format_exc()}")
129        return render.search_index(all_modules_safe)

pdoc's live-reloading web server

DocServer(addr: tuple[str, int], specs: list[str], **kwargs)
113    def __init__(self, addr: tuple[str, int], specs: list[str], **kwargs):
114        super().__init__(addr, DocHandler, **kwargs)  # type: ignore
115        module_names = extract.walk_specs(specs)
116        self.all_modules = AllModules(module_names)

Constructor. May be extended, do not override.

all_modules: AllModules
@cache
def render_search_index(self) -> str:
118    @cache
119    def render_search_index(self) -> str:
120        """Render the search index. For performance reasons this is always cached."""
121        # Some modules may not be importable, which means that they would raise an RuntimeError
122        # when accessed. We "fix" this by pre-loading all modules here and only passing the ones that work.
123        all_modules_safe = {}
124        for mod in self.all_modules:
125            try:
126                all_modules_safe[mod] = doc.Module.from_name(mod)
127            except RuntimeError:
128                warnings.warn(f"Error importing {mod!r}:\n{traceback.format_exc()}")
129        return render.search_index(all_modules_safe)

Render the search index. For performance reasons this is always cached.

class AllModules(typing.Mapping[str, pdoc.doc.Module]):
132class AllModules(Mapping[str, doc.Module]):
133    """A lazy-loading implementation of all_modules.
134
135    This behaves like a regular dict, but modules are only imported on demand for performance reasons.
136    This has the somewhat annoying side effect that __getitem__ may raise a RuntimeError.
137    We can ignore that when rendering HTML as the default templates do not access all_modules values,
138    but we need to perform additional steps for the search index.
139    """
140
141    def __init__(self, allowed_modules: Iterable[str]):
142        # use a dict to preserve order
143        self.allowed_modules: dict[str, None] = dict.fromkeys(allowed_modules)
144
145    def __len__(self) -> int:
146        return self.allowed_modules.__len__()
147
148    def __iter__(self) -> Iterator[str]:
149        return self.allowed_modules.__iter__()
150
151    def __contains__(self, item):
152        return self.allowed_modules.__contains__(item)
153
154    def __getitem__(self, item: str):
155        if item in self.allowed_modules:
156            return doc.Module.from_name(item)
157        else:  # pragma: no cover
158            raise KeyError(item)

A lazy-loading implementation of all_modules.

This behaves like a regular dict, but modules are only imported on demand for performance reasons. This has the somewhat annoying side effect that __getitem__ may raise a RuntimeError. We can ignore that when rendering HTML as the default templates do not access all_modules values, but we need to perform additional steps for the search index.

AllModules(allowed_modules: Iterable[str])
141    def __init__(self, allowed_modules: Iterable[str]):
142        # use a dict to preserve order
143        self.allowed_modules: dict[str, None] = dict.fromkeys(allowed_modules)
allowed_modules: dict[str, None]
def open_browser(url: str) -> bool:
162def open_browser(url: str) -> bool:  # pragma: no cover
163    """
164    Open a URL in a browser window.
165    In contrast to `webbrowser.open`, we limit the list of suitable browsers.
166    This gracefully degrades to a no-op on headless servers, where `webbrowser.open`
167    would otherwise open lynx.
168
169    Returns:
170
171    - `True`, if a browser has been opened
172    - `False`, if no suitable browser has been found.
173    """
174    browsers = (
175        "windows-default",
176        "macosx",
177        "wslview %s",
178        "x-www-browser %s",
179        "gnome-open %s",
180        "google-chrome",
181        "chrome",
182        "chromium",
183        "chromium-browser",
184        "firefox",
185        "opera",
186        "safari",
187    )
188    for browser in browsers:
189        try:
190            b = webbrowser.get(browser)
191        except webbrowser.Error:
192            pass
193        else:
194            if b.open(url):
195                return True
196    return False

Open a URL in a browser window. In contrast to webbrowser.open, we limit the list of suitable browsers. This gracefully degrades to a no-op on headless servers, where webbrowser.open would otherwise open lynx.

Returns:

  • True, if a browser has been opened
  • False, if no suitable browser has been found.