Edit on GitHub

pdoc.web

This module implements pdoc's live-reloading webserver.

We want to keep the number of dependencies as small as possible, so we are content with the builtin http.server module. It is a bit unergonomic compared to let's say flask, but good enough for our purposes.

  1"""
  2This module implements pdoc's live-reloading webserver.
  3
  4We want to keep the number of dependencies as small as possible,
  5so we are content with the builtin `http.server` module.
  6It is a bit unergonomic compared to let's say flask, but good enough for our purposes.
  7"""
  8from __future__ import annotations
  9
 10from collections.abc import Iterable
 11from collections.abc import Iterator
 12import http.server
 13import traceback
 14from typing import Mapping
 15import warnings
 16import webbrowser
 17
 18from pdoc import doc
 19from pdoc import extract
 20from pdoc import render
 21from pdoc._compat import cache
 22from pdoc._compat import removesuffix
 23
 24
 25class DocHandler(http.server.BaseHTTPRequestHandler):
 26    """A handler for individual requests."""
 27
 28    server: DocServer
 29    """A reference to the main web server."""
 30
 31    def do_HEAD(self):
 32        try:
 33            return self.handle_request()
 34        except ConnectionError:  # pragma: no cover
 35            pass
 36
 37    def do_GET(self):
 38        try:
 39            self.wfile.write(self.handle_request().encode())
 40        except ConnectionError:  # pragma: no cover
 41            pass
 42
 43    def handle_request(self) -> str:
 44        """Actually handle a request. Called by `do_HEAD` and `do_GET`."""
 45        path = self.path.split("?", 1)[0]
 46
 47        if path == "/" or path == "/index.html":
 48            out = render.html_index(self.server.all_modules)
 49        elif path == "/search.js":
 50            self.send_response(200)
 51            self.send_header("content-type", "application/javascript")
 52            self.end_headers()
 53            return self.server.render_search_index()
 54        elif "." in removesuffix(path, ".html"):
 55            # See https://github.com/mitmproxy/pdoc/issues/615: All module separators should be normalized to "/".
 56            # We could redirect here, but that would create the impression of a working link, which will fall apart
 57            # when pdoc prerenders to static HTML. So we rather fail early.
 58            self.send_response(404)
 59            self.end_headers()
 60            return "Not Found: Please normalize all module separators to '/'."
 61        else:
 62            module_name = removesuffix(path.lstrip("/"), ".html").replace("/", ".")
 63            if module_name not in self.server.all_modules:
 64                self.send_response(404)
 65                self.send_header("content-type", "text/html")
 66                self.end_headers()
 67                return render.html_error(error=f"Module {module_name!r} not found")
 68
 69            mtime = ""
 70            t = extract.module_mtime(module_name)
 71            if t:
 72                mtime = f"{t:.1f}"
 73            if "mtime=1" in self.path:
 74                self.send_response(200)
 75                self.send_header("content-type", "text/plain")
 76                self.end_headers()
 77                return mtime
 78
 79            try:
 80                extract.invalidate_caches(module_name)
 81                mod = self.server.all_modules[module_name]
 82                out = render.html_module(
 83                    module=mod,
 84                    all_modules=self.server.all_modules,
 85                    mtime=mtime,
 86                )
 87            except Exception:
 88                self.send_response(500)
 89                self.send_header("content-type", "text/html")
 90                self.end_headers()
 91                return render.html_error(
 92                    error=f"Error importing {module_name!r}",
 93                    details=traceback.format_exc(),
 94                )
 95
 96        self.send_response(200)
 97        self.send_header("content-type", "text/html")
 98        self.end_headers()
 99        return out
100
101    def log_request(self, code: int | str = ..., size: int | str = ...) -> None:
102        """Override logging to disable it."""
103
104
105class DocServer(http.server.HTTPServer):
106    """pdoc's live-reloading web server"""
107
108    all_modules: AllModules
109
110    def __init__(self, addr: tuple[str, int], specs: list[str], **kwargs):
111        super().__init__(addr, DocHandler, **kwargs)  # type: ignore
112        module_names = extract.walk_specs(specs)
113        self.all_modules = AllModules(module_names)
114
115    @cache
116    def render_search_index(self) -> str:
117        """Render the search index. For performance reasons this is always cached."""
118        # Some modules may not be importable, which means that they would raise an RuntimeError
119        # when accessed. We "fix" this by pre-loading all modules here and only passing the ones that work.
120        all_modules_safe = {}
121        for mod in self.all_modules:
122            try:
123                all_modules_safe[mod] = doc.Module.from_name(mod)
124            except RuntimeError:
125                warnings.warn(f"Error importing {mod!r}:\n{traceback.format_exc()}")
126        return render.search_index(all_modules_safe)
127
128
129class AllModules(Mapping[str, doc.Module]):
130    """A lazy-loading implementation of all_modules.
131
132    This behaves like a regular dict, but modules are only imported on demand for performance reasons.
133    This has the somewhat annoying side effect that __getitem__ may raise a RuntimeError.
134    We can ignore that when rendering HTML as the default templates do not access all_modules values,
135    but we need to perform additional steps for the search index.
136    """
137
138    def __init__(self, allowed_modules: Iterable[str]):
139        # use a dict to preserve order
140        self.allowed_modules: dict[str, None] = dict.fromkeys(allowed_modules)
141
142    def __len__(self) -> int:
143        return self.allowed_modules.__len__()
144
145    def __iter__(self) -> Iterator[str]:
146        return self.allowed_modules.__iter__()
147
148    def __contains__(self, item):
149        return self.allowed_modules.__contains__(item)
150
151    def __getitem__(self, item: str):
152        if item in self.allowed_modules:
153            return doc.Module.from_name(item)
154        else:  # pragma: no cover
155            raise KeyError(item)
156
157
158# https://github.com/mitmproxy/mitmproxy/blob/af3dfac85541ce06c0e3302a4ba495fe3c77b18a/mitmproxy/tools/web/webaddons.py#L35-L61
159def open_browser(url: str) -> bool:  # pragma: no cover
160    """
161    Open a URL in a browser window.
162    In contrast to `webbrowser.open`, we limit the list of suitable browsers.
163    This gracefully degrades to a no-op on headless servers, where `webbrowser.open`
164    would otherwise open lynx.
165
166    Returns:
167
168    - `True`, if a browser has been opened
169    - `False`, if no suitable browser has been found.
170    """
171    browsers = (
172        "windows-default",
173        "macosx",
174        "wslview %s",
175        "x-www-browser %s",
176        "gnome-open %s",
177        "google-chrome",
178        "chrome",
179        "chromium",
180        "chromium-browser",
181        "firefox",
182        "opera",
183        "safari",
184    )
185    for browser in browsers:
186        try:
187            b = webbrowser.get(browser)
188        except webbrowser.Error:
189            pass
190        else:
191            if b.open(url):
192                return True
193    return False
class DocHandler(http.server.BaseHTTPRequestHandler):
 26class DocHandler(http.server.BaseHTTPRequestHandler):
 27    """A handler for individual requests."""
 28
 29    server: DocServer
 30    """A reference to the main web server."""
 31
 32    def do_HEAD(self):
 33        try:
 34            return self.handle_request()
 35        except ConnectionError:  # pragma: no cover
 36            pass
 37
 38    def do_GET(self):
 39        try:
 40            self.wfile.write(self.handle_request().encode())
 41        except ConnectionError:  # pragma: no cover
 42            pass
 43
 44    def handle_request(self) -> str:
 45        """Actually handle a request. Called by `do_HEAD` and `do_GET`."""
 46        path = self.path.split("?", 1)[0]
 47
 48        if path == "/" or path == "/index.html":
 49            out = render.html_index(self.server.all_modules)
 50        elif path == "/search.js":
 51            self.send_response(200)
 52            self.send_header("content-type", "application/javascript")
 53            self.end_headers()
 54            return self.server.render_search_index()
 55        elif "." in removesuffix(path, ".html"):
 56            # See https://github.com/mitmproxy/pdoc/issues/615: All module separators should be normalized to "/".
 57            # We could redirect here, but that would create the impression of a working link, which will fall apart
 58            # when pdoc prerenders to static HTML. So we rather fail early.
 59            self.send_response(404)
 60            self.end_headers()
 61            return "Not Found: Please normalize all module separators to '/'."
 62        else:
 63            module_name = removesuffix(path.lstrip("/"), ".html").replace("/", ".")
 64            if module_name not in self.server.all_modules:
 65                self.send_response(404)
 66                self.send_header("content-type", "text/html")
 67                self.end_headers()
 68                return render.html_error(error=f"Module {module_name!r} not found")
 69
 70            mtime = ""
 71            t = extract.module_mtime(module_name)
 72            if t:
 73                mtime = f"{t:.1f}"
 74            if "mtime=1" in self.path:
 75                self.send_response(200)
 76                self.send_header("content-type", "text/plain")
 77                self.end_headers()
 78                return mtime
 79
 80            try:
 81                extract.invalidate_caches(module_name)
 82                mod = self.server.all_modules[module_name]
 83                out = render.html_module(
 84                    module=mod,
 85                    all_modules=self.server.all_modules,
 86                    mtime=mtime,
 87                )
 88            except Exception:
 89                self.send_response(500)
 90                self.send_header("content-type", "text/html")
 91                self.end_headers()
 92                return render.html_error(
 93                    error=f"Error importing {module_name!r}",
 94                    details=traceback.format_exc(),
 95                )
 96
 97        self.send_response(200)
 98        self.send_header("content-type", "text/html")
 99        self.end_headers()
100        return out
101
102    def log_request(self, code: int | str = ..., size: int | str = ...) -> None:
103        """Override logging to disable it."""

A handler for individual requests.

server: DocServer

A reference to the main web server.

def do_HEAD(self):
32    def do_HEAD(self):
33        try:
34            return self.handle_request()
35        except ConnectionError:  # pragma: no cover
36            pass
def do_GET(self):
38    def do_GET(self):
39        try:
40            self.wfile.write(self.handle_request().encode())
41        except ConnectionError:  # pragma: no cover
42            pass
def handle_request(self) -> str:
 44    def handle_request(self) -> str:
 45        """Actually handle a request. Called by `do_HEAD` and `do_GET`."""
 46        path = self.path.split("?", 1)[0]
 47
 48        if path == "/" or path == "/index.html":
 49            out = render.html_index(self.server.all_modules)
 50        elif path == "/search.js":
 51            self.send_response(200)
 52            self.send_header("content-type", "application/javascript")
 53            self.end_headers()
 54            return self.server.render_search_index()
 55        elif "." in removesuffix(path, ".html"):
 56            # See https://github.com/mitmproxy/pdoc/issues/615: All module separators should be normalized to "/".
 57            # We could redirect here, but that would create the impression of a working link, which will fall apart
 58            # when pdoc prerenders to static HTML. So we rather fail early.
 59            self.send_response(404)
 60            self.end_headers()
 61            return "Not Found: Please normalize all module separators to '/'."
 62        else:
 63            module_name = removesuffix(path.lstrip("/"), ".html").replace("/", ".")
 64            if module_name not in self.server.all_modules:
 65                self.send_response(404)
 66                self.send_header("content-type", "text/html")
 67                self.end_headers()
 68                return render.html_error(error=f"Module {module_name!r} not found")
 69
 70            mtime = ""
 71            t = extract.module_mtime(module_name)
 72            if t:
 73                mtime = f"{t:.1f}"
 74            if "mtime=1" in self.path:
 75                self.send_response(200)
 76                self.send_header("content-type", "text/plain")
 77                self.end_headers()
 78                return mtime
 79
 80            try:
 81                extract.invalidate_caches(module_name)
 82                mod = self.server.all_modules[module_name]
 83                out = render.html_module(
 84                    module=mod,
 85                    all_modules=self.server.all_modules,
 86                    mtime=mtime,
 87                )
 88            except Exception:
 89                self.send_response(500)
 90                self.send_header("content-type", "text/html")
 91                self.end_headers()
 92                return render.html_error(
 93                    error=f"Error importing {module_name!r}",
 94                    details=traceback.format_exc(),
 95                )
 96
 97        self.send_response(200)
 98        self.send_header("content-type", "text/html")
 99        self.end_headers()
100        return out

Actually handle a request. Called by do_HEAD and do_GET.

def log_request(self, code: int | str = Ellipsis, size: int | str = Ellipsis) -> None:
102    def log_request(self, code: int | str = ..., size: int | str = ...) -> None:
103        """Override logging to disable it."""

Override logging to disable it.

Inherited Members
socketserver.BaseRequestHandler
BaseRequestHandler
request
client_address
http.server.BaseHTTPRequestHandler
sys_version
server_version
error_message_format
error_content_type
default_request_version
parse_request
handle_expect_100
handle_one_request
handle
send_error
send_response
send_response_only
send_header
end_headers
flush_headers
log_error
log_message
version_string
date_time_string
log_date_time_string
weekdayname
monthname
address_string
protocol_version
MessageClass
responses
socketserver.StreamRequestHandler
rbufsize
wbufsize
timeout
disable_nagle_algorithm
setup
finish
class DocServer(http.server.HTTPServer):
106class DocServer(http.server.HTTPServer):
107    """pdoc's live-reloading web server"""
108
109    all_modules: AllModules
110
111    def __init__(self, addr: tuple[str, int], specs: list[str], **kwargs):
112        super().__init__(addr, DocHandler, **kwargs)  # type: ignore
113        module_names = extract.walk_specs(specs)
114        self.all_modules = AllModules(module_names)
115
116    @cache
117    def render_search_index(self) -> str:
118        """Render the search index. For performance reasons this is always cached."""
119        # Some modules may not be importable, which means that they would raise an RuntimeError
120        # when accessed. We "fix" this by pre-loading all modules here and only passing the ones that work.
121        all_modules_safe = {}
122        for mod in self.all_modules:
123            try:
124                all_modules_safe[mod] = doc.Module.from_name(mod)
125            except RuntimeError:
126                warnings.warn(f"Error importing {mod!r}:\n{traceback.format_exc()}")
127        return render.search_index(all_modules_safe)

pdoc's live-reloading web server

DocServer(addr: tuple[str, int], specs: list[str], **kwargs)
111    def __init__(self, addr: tuple[str, int], specs: list[str], **kwargs):
112        super().__init__(addr, DocHandler, **kwargs)  # type: ignore
113        module_names = extract.walk_specs(specs)
114        self.all_modules = AllModules(module_names)

Constructor. May be extended, do not override.

all_modules: AllModules
@cache
def render_search_index(self) -> str:
116    @cache
117    def render_search_index(self) -> str:
118        """Render the search index. For performance reasons this is always cached."""
119        # Some modules may not be importable, which means that they would raise an RuntimeError
120        # when accessed. We "fix" this by pre-loading all modules here and only passing the ones that work.
121        all_modules_safe = {}
122        for mod in self.all_modules:
123            try:
124                all_modules_safe[mod] = doc.Module.from_name(mod)
125            except RuntimeError:
126                warnings.warn(f"Error importing {mod!r}:\n{traceback.format_exc()}")
127        return render.search_index(all_modules_safe)

Render the search index. For performance reasons this is always cached.

Inherited Members
http.server.HTTPServer
allow_reuse_address
server_bind
socketserver.TCPServer
address_family
socket_type
request_queue_size
allow_reuse_port
socket
server_activate
server_close
fileno
get_request
shutdown_request
close_request
socketserver.BaseServer
timeout
server_address
RequestHandlerClass
serve_forever
shutdown
service_actions
handle_request
handle_timeout
verify_request
process_request
finish_request
handle_error
class AllModules(typing.Mapping[str, pdoc.doc.Module]):
130class AllModules(Mapping[str, doc.Module]):
131    """A lazy-loading implementation of all_modules.
132
133    This behaves like a regular dict, but modules are only imported on demand for performance reasons.
134    This has the somewhat annoying side effect that __getitem__ may raise a RuntimeError.
135    We can ignore that when rendering HTML as the default templates do not access all_modules values,
136    but we need to perform additional steps for the search index.
137    """
138
139    def __init__(self, allowed_modules: Iterable[str]):
140        # use a dict to preserve order
141        self.allowed_modules: dict[str, None] = dict.fromkeys(allowed_modules)
142
143    def __len__(self) -> int:
144        return self.allowed_modules.__len__()
145
146    def __iter__(self) -> Iterator[str]:
147        return self.allowed_modules.__iter__()
148
149    def __contains__(self, item):
150        return self.allowed_modules.__contains__(item)
151
152    def __getitem__(self, item: str):
153        if item in self.allowed_modules:
154            return doc.Module.from_name(item)
155        else:  # pragma: no cover
156            raise KeyError(item)

A lazy-loading implementation of all_modules.

This behaves like a regular dict, but modules are only imported on demand for performance reasons. This has the somewhat annoying side effect that __getitem__ may raise a RuntimeError. We can ignore that when rendering HTML as the default templates do not access all_modules values, but we need to perform additional steps for the search index.

AllModules(allowed_modules: collections.abc.Iterable[str])
139    def __init__(self, allowed_modules: Iterable[str]):
140        # use a dict to preserve order
141        self.allowed_modules: dict[str, None] = dict.fromkeys(allowed_modules)
allowed_modules: dict[str, None]
Inherited Members
collections.abc.Mapping
get
keys
items
values
def open_browser(url: str) -> bool:
160def open_browser(url: str) -> bool:  # pragma: no cover
161    """
162    Open a URL in a browser window.
163    In contrast to `webbrowser.open`, we limit the list of suitable browsers.
164    This gracefully degrades to a no-op on headless servers, where `webbrowser.open`
165    would otherwise open lynx.
166
167    Returns:
168
169    - `True`, if a browser has been opened
170    - `False`, if no suitable browser has been found.
171    """
172    browsers = (
173        "windows-default",
174        "macosx",
175        "wslview %s",
176        "x-www-browser %s",
177        "gnome-open %s",
178        "google-chrome",
179        "chrome",
180        "chromium",
181        "chromium-browser",
182        "firefox",
183        "opera",
184        "safari",
185    )
186    for browser in browsers:
187        try:
188            b = webbrowser.get(browser)
189        except webbrowser.Error:
190            pass
191        else:
192            if b.open(url):
193                return True
194    return False

Open a URL in a browser window. In contrast to webbrowser.open, we limit the list of suitable browsers. This gracefully degrades to a no-op on headless servers, where webbrowser.open would otherwise open lynx.

Returns:

  • True, if a browser has been opened
  • False, if no suitable browser has been found.