Edit on GitHub

pdoc.web

This module implements pdoc's live-reloading webserver.

We want to keep the number of dependencies as small as possible, so we are content with the builtin http.server module. It is a bit unergonomic compared to let's say flask, but good enough for our purposes.

View Source

  1"""
  2This module implements pdoc's live-reloading webserver.
  3
  4We want to keep the number of dependencies as small as possible,
  5so we are content with the builtin `http.server` module.
  6It is a bit unergonomic compared to let's say flask, but good enough for our purposes.
  7"""
  8
  9from __future__ import annotations
 10
 11from collections.abc import Iterable
 12from collections.abc import Iterator
 13from functools import cache
 14import http.server
 15import traceback
 16from typing import Mapping
 17import warnings
 18import webbrowser
 19
 20from pdoc import doc
 21from pdoc import extract
 22from pdoc import render
 23
 24
 25class DocHandler(http.server.BaseHTTPRequestHandler):
 26    """A handler for individual requests."""
 27
 28    server: DocServer
 29    """A reference to the main web server."""
 30
 31    def do_HEAD(self):
 32        try:
 33            return self.handle_request()
 34        except ConnectionError:  # pragma: no cover
 35            pass
 36
 37    def do_GET(self):
 38        try:
 39            self.wfile.write(self.handle_request().encode())
 40        except ConnectionError:  # pragma: no cover
 41            pass
 42
 43    def handle_request(self) -> str:
 44        """Actually handle a request. Called by `do_HEAD` and `do_GET`."""
 45        path = self.path.split("?", 1)[0]
 46
 47        if path == "/" or path == "/index.html":
 48            out = render.html_index(self.server.all_modules)
 49        elif path == "/search.js":
 50            self.send_response(200)
 51            self.send_header("content-type", "application/javascript")
 52            self.end_headers()
 53            return self.server.render_search_index()
 54        elif "." in path.removesuffix(".html"):
 55            # See https://github.com/mitmproxy/pdoc/issues/615: All module separators should be normalized to "/".
 56            # We could redirect here, but that would create the impression of a working link, which will fall apart
 57            # when pdoc prerenders to static HTML. So we rather fail early.
 58            self.send_response(404)
 59            self.end_headers()
 60            return "Not Found: Please normalize all module separators to '/'."
 61        else:
 62            module_name = path.lstrip("/").removesuffix(".html").replace("/", ".")
 63            if module_name not in self.server.all_modules:
 64                self.send_response(404)
 65                self.send_header("content-type", "text/html")
 66                self.end_headers()
 67                return render.html_error(error=f"Module {module_name!r} not found")
 68
 69            mtime = ""
 70            t = extract.module_mtime(module_name)
 71            if t:
 72                mtime = f"{t:.1f}"
 73            if "mtime=1" in self.path:
 74                self.send_response(200)
 75                self.send_header("content-type", "text/plain")
 76                self.end_headers()
 77                return mtime
 78
 79            try:
 80                extract.invalidate_caches(module_name)
 81                mod = self.server.all_modules[module_name]
 82                out = render.html_module(
 83                    module=mod,
 84                    all_modules=self.server.all_modules,
 85                    mtime=mtime,
 86                )
 87            except Exception:
 88                self.send_response(500)
 89                self.send_header("content-type", "text/html")
 90                self.end_headers()
 91                return render.html_error(
 92                    error=f"Error importing {module_name!r}",
 93                    details=traceback.format_exc(),
 94                )
 95
 96        self.send_response(200)
 97        self.send_header("content-type", "text/html")
 98        self.end_headers()
 99        return out
100
101    def log_request(self, code: int | str = ..., size: int | str = ...) -> None:
102        """Override logging to disable it."""
103
104
105class DocServer(http.server.HTTPServer):
106    """pdoc's live-reloading web server"""
107
108    all_modules: AllModules
109
110    def __init__(self, addr: tuple[str, int], specs: list[str], **kwargs):
111        super().__init__(addr, DocHandler, **kwargs)  # type: ignore
112        module_names = extract.walk_specs(specs)
113        self.all_modules = AllModules(module_names)
114
115    @cache
116    def render_search_index(self) -> str:
117        """Render the search index. For performance reasons this is always cached."""
118        # Some modules may not be importable, which means that they would raise an RuntimeError
119        # when accessed. We "fix" this by pre-loading all modules here and only passing the ones that work.
120        all_modules_safe = {}
121        for mod in self.all_modules:
122            try:
123                all_modules_safe[mod] = doc.Module.from_name(mod)
124            except RuntimeError:
125                warnings.warn(f"Error importing {mod!r}:\n{traceback.format_exc()}")
126        return render.search_index(all_modules_safe)
127
128
129class AllModules(Mapping[str, doc.Module]):
130    """A lazy-loading implementation of all_modules.
131
132    This behaves like a regular dict, but modules are only imported on demand for performance reasons.
133    This has the somewhat annoying side effect that __getitem__ may raise a RuntimeError.
134    We can ignore that when rendering HTML as the default templates do not access all_modules values,
135    but we need to perform additional steps for the search index.
136    """
137
138    def __init__(self, allowed_modules: Iterable[str]):
139        # use a dict to preserve order
140        self.allowed_modules: dict[str, None] = dict.fromkeys(allowed_modules)
141
142    def __len__(self) -> int:
143        return self.allowed_modules.__len__()
144
145    def __iter__(self) -> Iterator[str]:
146        return self.allowed_modules.__iter__()
147
148    def __contains__(self, item):
149        return self.allowed_modules.__contains__(item)
150
151    def __getitem__(self, item: str):
152        if item in self.allowed_modules:
153            return doc.Module.from_name(item)
154        else:  # pragma: no cover
155            raise KeyError(item)
156
157
158# https://github.com/mitmproxy/mitmproxy/blob/af3dfac85541ce06c0e3302a4ba495fe3c77b18a/mitmproxy/tools/web/webaddons.py#L35-L61
159def open_browser(url: str) -> bool:  # pragma: no cover
160    """
161    Open a URL in a browser window.
162    In contrast to `webbrowser.open`, we limit the list of suitable browsers.
163    This gracefully degrades to a no-op on headless servers, where `webbrowser.open`
164    would otherwise open lynx.
165
166    Returns:
167
168    - `True`, if a browser has been opened
169    - `False`, if no suitable browser has been found.
170    """
171    browsers = (
172        "windows-default",
173        "macosx",
174        "wslview %s",
175        "x-www-browser %s",
176        "gnome-open %s",
177        "google-chrome",
178        "chrome",
179        "chromium",
180        "chromium-browser",
181        "firefox",
182        "opera",
183        "safari",
184    )
185    for browser in browsers:
186        try:
187            b = webbrowser.get(browser)
188        except webbrowser.Error:
189            pass
190        else:
191            if b.open(url):
192                return True
193    return False

class DocHandler(http.server.BaseHTTPRequestHandler): View Source

 26class DocHandler(http.server.BaseHTTPRequestHandler):
 27    """A handler for individual requests."""
 28
 29    server: DocServer
 30    """A reference to the main web server."""
 31
 32    def do_HEAD(self):
 33        try:
 34            return self.handle_request()
 35        except ConnectionError:  # pragma: no cover
 36            pass
 37
 38    def do_GET(self):
 39        try:
 40            self.wfile.write(self.handle_request().encode())
 41        except ConnectionError:  # pragma: no cover
 42            pass
 43
 44    def handle_request(self) -> str:
 45        """Actually handle a request. Called by `do_HEAD` and `do_GET`."""
 46        path = self.path.split("?", 1)[0]
 47
 48        if path == "/" or path == "/index.html":
 49            out = render.html_index(self.server.all_modules)
 50        elif path == "/search.js":
 51            self.send_response(200)
 52            self.send_header("content-type", "application/javascript")
 53            self.end_headers()
 54            return self.server.render_search_index()
 55        elif "." in path.removesuffix(".html"):
 56            # See https://github.com/mitmproxy/pdoc/issues/615: All module separators should be normalized to "/".
 57            # We could redirect here, but that would create the impression of a working link, which will fall apart
 58            # when pdoc prerenders to static HTML. So we rather fail early.
 59            self.send_response(404)
 60            self.end_headers()
 61            return "Not Found: Please normalize all module separators to '/'."
 62        else:
 63            module_name = path.lstrip("/").removesuffix(".html").replace("/", ".")
 64            if module_name not in self.server.all_modules:
 65                self.send_response(404)
 66                self.send_header("content-type", "text/html")
 67                self.end_headers()
 68                return render.html_error(error=f"Module {module_name!r} not found")
 69
 70            mtime = ""
 71            t = extract.module_mtime(module_name)
 72            if t:
 73                mtime = f"{t:.1f}"
 74            if "mtime=1" in self.path:
 75                self.send_response(200)
 76                self.send_header("content-type", "text/plain")
 77                self.end_headers()
 78                return mtime
 79
 80            try:
 81                extract.invalidate_caches(module_name)
 82                mod = self.server.all_modules[module_name]
 83                out = render.html_module(
 84                    module=mod,
 85                    all_modules=self.server.all_modules,
 86                    mtime=mtime,
 87                )
 88            except Exception:
 89                self.send_response(500)
 90                self.send_header("content-type", "text/html")
 91                self.end_headers()
 92                return render.html_error(
 93                    error=f"Error importing {module_name!r}",
 94                    details=traceback.format_exc(),
 95                )
 96
 97        self.send_response(200)
 98        self.send_header("content-type", "text/html")
 99        self.end_headers()
100        return out
101
102    def log_request(self, code: int | str = ..., size: int | str = ...) -> None:
103        """Override logging to disable it."""

A handler for individual requests.

server: DocServer

A reference to the main web server.

def do_HEAD(self): View Source

32    def do_HEAD(self):
33        try:
34            return self.handle_request()
35        except ConnectionError:  # pragma: no cover
36            pass

def do_GET(self): View Source

38    def do_GET(self):
39        try:
40            self.wfile.write(self.handle_request().encode())
41        except ConnectionError:  # pragma: no cover
42            pass

def handle_request(self) -> str: View Source

 44    def handle_request(self) -> str:
 45        """Actually handle a request. Called by `do_HEAD` and `do_GET`."""
 46        path = self.path.split("?", 1)[0]
 47
 48        if path == "/" or path == "/index.html":
 49            out = render.html_index(self.server.all_modules)
 50        elif path == "/search.js":
 51            self.send_response(200)
 52            self.send_header("content-type", "application/javascript")
 53            self.end_headers()
 54            return self.server.render_search_index()
 55        elif "." in path.removesuffix(".html"):
 56            # See https://github.com/mitmproxy/pdoc/issues/615: All module separators should be normalized to "/".
 57            # We could redirect here, but that would create the impression of a working link, which will fall apart
 58            # when pdoc prerenders to static HTML. So we rather fail early.
 59            self.send_response(404)
 60            self.end_headers()
 61            return "Not Found: Please normalize all module separators to '/'."
 62        else:
 63            module_name = path.lstrip("/").removesuffix(".html").replace("/", ".")
 64            if module_name not in self.server.all_modules:
 65                self.send_response(404)
 66                self.send_header("content-type", "text/html")
 67                self.end_headers()
 68                return render.html_error(error=f"Module {module_name!r} not found")
 69
 70            mtime = ""
 71            t = extract.module_mtime(module_name)
 72            if t:
 73                mtime = f"{t:.1f}"
 74            if "mtime=1" in self.path:
 75                self.send_response(200)
 76                self.send_header("content-type", "text/plain")
 77                self.end_headers()
 78                return mtime
 79
 80            try:
 81                extract.invalidate_caches(module_name)
 82                mod = self.server.all_modules[module_name]
 83                out = render.html_module(
 84                    module=mod,
 85                    all_modules=self.server.all_modules,
 86                    mtime=mtime,
 87                )
 88            except Exception:
 89                self.send_response(500)
 90                self.send_header("content-type", "text/html")
 91                self.end_headers()
 92                return render.html_error(
 93                    error=f"Error importing {module_name!r}",
 94                    details=traceback.format_exc(),
 95                )
 96
 97        self.send_response(200)
 98        self.send_header("content-type", "text/html")
 99        self.end_headers()
100        return out

Actually handle a request. Called by do_HEAD and do_GET.

def log_request(self, code: int | str = Ellipsis, size: int | str = Ellipsis) -> None: View Source

102    def log_request(self, code: int | str = ..., size: int | str = ...) -> None:
103        """Override logging to disable it."""

Override logging to disable it.

class DocServer(http.server.HTTPServer): View Source

106class DocServer(http.server.HTTPServer):
107    """pdoc's live-reloading web server"""
108
109    all_modules: AllModules
110
111    def __init__(self, addr: tuple[str, int], specs: list[str], **kwargs):
112        super().__init__(addr, DocHandler, **kwargs)  # type: ignore
113        module_names = extract.walk_specs(specs)
114        self.all_modules = AllModules(module_names)
115
116    @cache
117    def render_search_index(self) -> str:
118        """Render the search index. For performance reasons this is always cached."""
119        # Some modules may not be importable, which means that they would raise an RuntimeError
120        # when accessed. We "fix" this by pre-loading all modules here and only passing the ones that work.
121        all_modules_safe = {}
122        for mod in self.all_modules:
123            try:
124                all_modules_safe[mod] = doc.Module.from_name(mod)
125            except RuntimeError:
126                warnings.warn(f"Error importing {mod!r}:\n{traceback.format_exc()}")
127        return render.search_index(all_modules_safe)

pdoc's live-reloading web server

DocServer(addr: tuple[str, int], specs: list[str], **kwargs) View Source

111    def __init__(self, addr: tuple[str, int], specs: list[str], **kwargs):
112        super().__init__(addr, DocHandler, **kwargs)  # type: ignore
113        module_names = extract.walk_specs(specs)
114        self.all_modules = AllModules(module_names)

Constructor. May be extended, do not override.

all_modules: AllModules

@cache

def render_search_index(self) -> str: View Source

116    @cache
117    def render_search_index(self) -> str:
118        """Render the search index. For performance reasons this is always cached."""
119        # Some modules may not be importable, which means that they would raise an RuntimeError
120        # when accessed. We "fix" this by pre-loading all modules here and only passing the ones that work.
121        all_modules_safe = {}
122        for mod in self.all_modules:
123            try:
124                all_modules_safe[mod] = doc.Module.from_name(mod)
125            except RuntimeError:
126                warnings.warn(f"Error importing {mod!r}:\n{traceback.format_exc()}")
127        return render.search_index(all_modules_safe)

Render the search index. For performance reasons this is always cached.

class AllModules(typing.Mapping[str, pdoc.doc.Module]): View Source

130class AllModules(Mapping[str, doc.Module]):
131    """A lazy-loading implementation of all_modules.
132
133    This behaves like a regular dict, but modules are only imported on demand for performance reasons.
134    This has the somewhat annoying side effect that __getitem__ may raise a RuntimeError.
135    We can ignore that when rendering HTML as the default templates do not access all_modules values,
136    but we need to perform additional steps for the search index.
137    """
138
139    def __init__(self, allowed_modules: Iterable[str]):
140        # use a dict to preserve order
141        self.allowed_modules: dict[str, None] = dict.fromkeys(allowed_modules)
142
143    def __len__(self) -> int:
144        return self.allowed_modules.__len__()
145
146    def __iter__(self) -> Iterator[str]:
147        return self.allowed_modules.__iter__()
148
149    def __contains__(self, item):
150        return self.allowed_modules.__contains__(item)
151
152    def __getitem__(self, item: str):
153        if item in self.allowed_modules:
154            return doc.Module.from_name(item)
155        else:  # pragma: no cover
156            raise KeyError(item)

A lazy-loading implementation of all_modules.

This behaves like a regular dict, but modules are only imported on demand for performance reasons. This has the somewhat annoying side effect that __getitem__ may raise a RuntimeError. We can ignore that when rendering HTML as the default templates do not access all_modules values, but we need to perform additional steps for the search index.

AllModules(allowed_modules: Iterable[str]) View Source

139    def __init__(self, allowed_modules: Iterable[str]):
140        # use a dict to preserve order
141        self.allowed_modules: dict[str, None] = dict.fromkeys(allowed_modules)

allowed_modules: dict[str, None]

def open_browser(url: str) -> bool: View Source

160def open_browser(url: str) -> bool:  # pragma: no cover
161    """
162    Open a URL in a browser window.
163    In contrast to `webbrowser.open`, we limit the list of suitable browsers.
164    This gracefully degrades to a no-op on headless servers, where `webbrowser.open`
165    would otherwise open lynx.
166
167    Returns:
168
169    - `True`, if a browser has been opened
170    - `False`, if no suitable browser has been found.
171    """
172    browsers = (
173        "windows-default",
174        "macosx",
175        "wslview %s",
176        "x-www-browser %s",
177        "gnome-open %s",
178        "google-chrome",
179        "chrome",
180        "chromium",
181        "chromium-browser",
182        "firefox",
183        "opera",
184        "safari",
185    )
186    for browser in browsers:
187        try:
188            b = webbrowser.get(browser)
189        except webbrowser.Error:
190            pass
191        else:
192            if b.open(url):
193                return True
194    return False

Open a URL in a browser window. In contrast to webbrowser.open, we limit the list of suitable browsers. This gracefully degrades to a no-op on headless servers, where webbrowser.open would otherwise open lynx.

Returns:

True, if a browser has been opened
False, if no suitable browser has been found.