Edit on GitHub

pdoc.web

This module implements pdoc's live-reloading webserver.

We want to keep the number of dependencies as small as possible, so we are content with the builtin http.server module. It is a bit unergonomic compared to let's say flask, but good enough for our purposes.

  1"""
  2This module implements pdoc's live-reloading webserver.
  3
  4We want to keep the number of dependencies as small as possible,
  5so we are content with the builtin `http.server` module.
  6It is a bit unergonomic compared to let's say flask, but good enough for our purposes.
  7"""
  8
  9from __future__ import annotations
 10
 11from collections.abc import Iterable
 12from collections.abc import Iterator
 13import http.server
 14import traceback
 15from typing import Mapping
 16import warnings
 17import webbrowser
 18
 19from pdoc import doc
 20from pdoc import extract
 21from pdoc import render
 22from pdoc._compat import cache
 23from pdoc._compat import removesuffix
 24
 25
 26class DocHandler(http.server.BaseHTTPRequestHandler):
 27    """A handler for individual requests."""
 28
 29    server: DocServer
 30    """A reference to the main web server."""
 31
 32    def do_HEAD(self):
 33        try:
 34            return self.handle_request()
 35        except ConnectionError:  # pragma: no cover
 36            pass
 37
 38    def do_GET(self):
 39        try:
 40            self.wfile.write(self.handle_request().encode())
 41        except ConnectionError:  # pragma: no cover
 42            pass
 43
 44    def handle_request(self) -> str:
 45        """Actually handle a request. Called by `do_HEAD` and `do_GET`."""
 46        path = self.path.split("?", 1)[0]
 47
 48        if path == "/" or path == "/index.html":
 49            out = render.html_index(self.server.all_modules)
 50        elif path == "/search.js":
 51            self.send_response(200)
 52            self.send_header("content-type", "application/javascript")
 53            self.end_headers()
 54            return self.server.render_search_index()
 55        elif "." in removesuffix(path, ".html"):
 56            # See https://github.com/mitmproxy/pdoc/issues/615: All module separators should be normalized to "/".
 57            # We could redirect here, but that would create the impression of a working link, which will fall apart
 58            # when pdoc prerenders to static HTML. So we rather fail early.
 59            self.send_response(404)
 60            self.end_headers()
 61            return "Not Found: Please normalize all module separators to '/'."
 62        else:
 63            module_name = removesuffix(path.lstrip("/"), ".html").replace("/", ".")
 64            if module_name not in self.server.all_modules:
 65                self.send_response(404)
 66                self.send_header("content-type", "text/html")
 67                self.end_headers()
 68                return render.html_error(error=f"Module {module_name!r} not found")
 69
 70            mtime = ""
 71            t = extract.module_mtime(module_name)
 72            if t:
 73                mtime = f"{t:.1f}"
 74            if "mtime=1" in self.path:
 75                self.send_response(200)
 76                self.send_header("content-type", "text/plain")
 77                self.end_headers()
 78                return mtime
 79
 80            try:
 81                extract.invalidate_caches(module_name)
 82                mod = self.server.all_modules[module_name]
 83                out = render.html_module(
 84                    module=mod,
 85                    all_modules=self.server.all_modules,
 86                    mtime=mtime,
 87                )
 88            except Exception:
 89                self.send_response(500)
 90                self.send_header("content-type", "text/html")
 91                self.end_headers()
 92                return render.html_error(
 93                    error=f"Error importing {module_name!r}",
 94                    details=traceback.format_exc(),
 95                )
 96
 97        self.send_response(200)
 98        self.send_header("content-type", "text/html")
 99        self.end_headers()
100        return out
101
102    def log_request(self, code: int | str = ..., size: int | str = ...) -> None:
103        """Override logging to disable it."""
104
105
106class DocServer(http.server.HTTPServer):
107    """pdoc's live-reloading web server"""
108
109    all_modules: AllModules
110
111    def __init__(self, addr: tuple[str, int], specs: list[str], **kwargs):
112        super().__init__(addr, DocHandler, **kwargs)  # type: ignore
113        module_names = extract.walk_specs(specs)
114        self.all_modules = AllModules(module_names)
115
116    @cache
117    def render_search_index(self) -> str:
118        """Render the search index. For performance reasons this is always cached."""
119        # Some modules may not be importable, which means that they would raise an RuntimeError
120        # when accessed. We "fix" this by pre-loading all modules here and only passing the ones that work.
121        all_modules_safe = {}
122        for mod in self.all_modules:
123            try:
124                all_modules_safe[mod] = doc.Module.from_name(mod)
125            except RuntimeError:
126                warnings.warn(f"Error importing {mod!r}:\n{traceback.format_exc()}")
127        return render.search_index(all_modules_safe)
128
129
130class AllModules(Mapping[str, doc.Module]):
131    """A lazy-loading implementation of all_modules.
132
133    This behaves like a regular dict, but modules are only imported on demand for performance reasons.
134    This has the somewhat annoying side effect that __getitem__ may raise a RuntimeError.
135    We can ignore that when rendering HTML as the default templates do not access all_modules values,
136    but we need to perform additional steps for the search index.
137    """
138
139    def __init__(self, allowed_modules: Iterable[str]):
140        # use a dict to preserve order
141        self.allowed_modules: dict[str, None] = dict.fromkeys(allowed_modules)
142
143    def __len__(self) -> int:
144        return self.allowed_modules.__len__()
145
146    def __iter__(self) -> Iterator[str]:
147        return self.allowed_modules.__iter__()
148
149    def __contains__(self, item):
150        return self.allowed_modules.__contains__(item)
151
152    def __getitem__(self, item: str):
153        if item in self.allowed_modules:
154            return doc.Module.from_name(item)
155        else:  # pragma: no cover
156            raise KeyError(item)
157
158
159# https://github.com/mitmproxy/mitmproxy/blob/af3dfac85541ce06c0e3302a4ba495fe3c77b18a/mitmproxy/tools/web/webaddons.py#L35-L61
160def open_browser(url: str) -> bool:  # pragma: no cover
161    """
162    Open a URL in a browser window.
163    In contrast to `webbrowser.open`, we limit the list of suitable browsers.
164    This gracefully degrades to a no-op on headless servers, where `webbrowser.open`
165    would otherwise open lynx.
166
167    Returns:
168
169    - `True`, if a browser has been opened
170    - `False`, if no suitable browser has been found.
171    """
172    browsers = (
173        "windows-default",
174        "macosx",
175        "wslview %s",
176        "x-www-browser %s",
177        "gnome-open %s",
178        "google-chrome",
179        "chrome",
180        "chromium",
181        "chromium-browser",
182        "firefox",
183        "opera",
184        "safari",
185    )
186    for browser in browsers:
187        try:
188            b = webbrowser.get(browser)
189        except webbrowser.Error:
190            pass
191        else:
192            if b.open(url):
193                return True
194    return False
class DocHandler(http.server.BaseHTTPRequestHandler):
 27class DocHandler(http.server.BaseHTTPRequestHandler):
 28    """A handler for individual requests."""
 29
 30    server: DocServer
 31    """A reference to the main web server."""
 32
 33    def do_HEAD(self):
 34        try:
 35            return self.handle_request()
 36        except ConnectionError:  # pragma: no cover
 37            pass
 38
 39    def do_GET(self):
 40        try:
 41            self.wfile.write(self.handle_request().encode())
 42        except ConnectionError:  # pragma: no cover
 43            pass
 44
 45    def handle_request(self) -> str:
 46        """Actually handle a request. Called by `do_HEAD` and `do_GET`."""
 47        path = self.path.split("?", 1)[0]
 48
 49        if path == "/" or path == "/index.html":
 50            out = render.html_index(self.server.all_modules)
 51        elif path == "/search.js":
 52            self.send_response(200)
 53            self.send_header("content-type", "application/javascript")
 54            self.end_headers()
 55            return self.server.render_search_index()
 56        elif "." in removesuffix(path, ".html"):
 57            # See https://github.com/mitmproxy/pdoc/issues/615: All module separators should be normalized to "/".
 58            # We could redirect here, but that would create the impression of a working link, which will fall apart
 59            # when pdoc prerenders to static HTML. So we rather fail early.
 60            self.send_response(404)
 61            self.end_headers()
 62            return "Not Found: Please normalize all module separators to '/'."
 63        else:
 64            module_name = removesuffix(path.lstrip("/"), ".html").replace("/", ".")
 65            if module_name not in self.server.all_modules:
 66                self.send_response(404)
 67                self.send_header("content-type", "text/html")
 68                self.end_headers()
 69                return render.html_error(error=f"Module {module_name!r} not found")
 70
 71            mtime = ""
 72            t = extract.module_mtime(module_name)
 73            if t:
 74                mtime = f"{t:.1f}"
 75            if "mtime=1" in self.path:
 76                self.send_response(200)
 77                self.send_header("content-type", "text/plain")
 78                self.end_headers()
 79                return mtime
 80
 81            try:
 82                extract.invalidate_caches(module_name)
 83                mod = self.server.all_modules[module_name]
 84                out = render.html_module(
 85                    module=mod,
 86                    all_modules=self.server.all_modules,
 87                    mtime=mtime,
 88                )
 89            except Exception:
 90                self.send_response(500)
 91                self.send_header("content-type", "text/html")
 92                self.end_headers()
 93                return render.html_error(
 94                    error=f"Error importing {module_name!r}",
 95                    details=traceback.format_exc(),
 96                )
 97
 98        self.send_response(200)
 99        self.send_header("content-type", "text/html")
100        self.end_headers()
101        return out
102
103    def log_request(self, code: int | str = ..., size: int | str = ...) -> None:
104        """Override logging to disable it."""

A handler for individual requests.

server: DocServer

A reference to the main web server.

def do_HEAD(self):
33    def do_HEAD(self):
34        try:
35            return self.handle_request()
36        except ConnectionError:  # pragma: no cover
37            pass
def do_GET(self):
39    def do_GET(self):
40        try:
41            self.wfile.write(self.handle_request().encode())
42        except ConnectionError:  # pragma: no cover
43            pass
def handle_request(self) -> str:
 45    def handle_request(self) -> str:
 46        """Actually handle a request. Called by `do_HEAD` and `do_GET`."""
 47        path = self.path.split("?", 1)[0]
 48
 49        if path == "/" or path == "/index.html":
 50            out = render.html_index(self.server.all_modules)
 51        elif path == "/search.js":
 52            self.send_response(200)
 53            self.send_header("content-type", "application/javascript")
 54            self.end_headers()
 55            return self.server.render_search_index()
 56        elif "." in removesuffix(path, ".html"):
 57            # See https://github.com/mitmproxy/pdoc/issues/615: All module separators should be normalized to "/".
 58            # We could redirect here, but that would create the impression of a working link, which will fall apart
 59            # when pdoc prerenders to static HTML. So we rather fail early.
 60            self.send_response(404)
 61            self.end_headers()
 62            return "Not Found: Please normalize all module separators to '/'."
 63        else:
 64            module_name = removesuffix(path.lstrip("/"), ".html").replace("/", ".")
 65            if module_name not in self.server.all_modules:
 66                self.send_response(404)
 67                self.send_header("content-type", "text/html")
 68                self.end_headers()
 69                return render.html_error(error=f"Module {module_name!r} not found")
 70
 71            mtime = ""
 72            t = extract.module_mtime(module_name)
 73            if t:
 74                mtime = f"{t:.1f}"
 75            if "mtime=1" in self.path:
 76                self.send_response(200)
 77                self.send_header("content-type", "text/plain")
 78                self.end_headers()
 79                return mtime
 80
 81            try:
 82                extract.invalidate_caches(module_name)
 83                mod = self.server.all_modules[module_name]
 84                out = render.html_module(
 85                    module=mod,
 86                    all_modules=self.server.all_modules,
 87                    mtime=mtime,
 88                )
 89            except Exception:
 90                self.send_response(500)
 91                self.send_header("content-type", "text/html")
 92                self.end_headers()
 93                return render.html_error(
 94                    error=f"Error importing {module_name!r}",
 95                    details=traceback.format_exc(),
 96                )
 97
 98        self.send_response(200)
 99        self.send_header("content-type", "text/html")
100        self.end_headers()
101        return out

Actually handle a request. Called by do_HEAD and do_GET.

def log_request(self, code: int | str = Ellipsis, size: int | str = Ellipsis) -> None:
103    def log_request(self, code: int | str = ..., size: int | str = ...) -> None:
104        """Override logging to disable it."""

Override logging to disable it.

Inherited Members
socketserver.BaseRequestHandler
BaseRequestHandler
request
client_address
http.server.BaseHTTPRequestHandler
sys_version
server_version
error_message_format
error_content_type
default_request_version
parse_request
handle_expect_100
handle_one_request
handle
send_error
send_response
send_response_only
send_header
end_headers
flush_headers
log_error
log_message
version_string
date_time_string
log_date_time_string
weekdayname
monthname
address_string
protocol_version
MessageClass
responses
socketserver.StreamRequestHandler
rbufsize
wbufsize
timeout
disable_nagle_algorithm
setup
finish
class DocServer(http.server.HTTPServer):
107class DocServer(http.server.HTTPServer):
108    """pdoc's live-reloading web server"""
109
110    all_modules: AllModules
111
112    def __init__(self, addr: tuple[str, int], specs: list[str], **kwargs):
113        super().__init__(addr, DocHandler, **kwargs)  # type: ignore
114        module_names = extract.walk_specs(specs)
115        self.all_modules = AllModules(module_names)
116
117    @cache
118    def render_search_index(self) -> str:
119        """Render the search index. For performance reasons this is always cached."""
120        # Some modules may not be importable, which means that they would raise an RuntimeError
121        # when accessed. We "fix" this by pre-loading all modules here and only passing the ones that work.
122        all_modules_safe = {}
123        for mod in self.all_modules:
124            try:
125                all_modules_safe[mod] = doc.Module.from_name(mod)
126            except RuntimeError:
127                warnings.warn(f"Error importing {mod!r}:\n{traceback.format_exc()}")
128        return render.search_index(all_modules_safe)

pdoc's live-reloading web server

DocServer(addr: tuple[str, int], specs: list[str], **kwargs)
112    def __init__(self, addr: tuple[str, int], specs: list[str], **kwargs):
113        super().__init__(addr, DocHandler, **kwargs)  # type: ignore
114        module_names = extract.walk_specs(specs)
115        self.all_modules = AllModules(module_names)

Constructor. May be extended, do not override.

all_modules: AllModules
@cache
def render_search_index(self) -> str:
117    @cache
118    def render_search_index(self) -> str:
119        """Render the search index. For performance reasons this is always cached."""
120        # Some modules may not be importable, which means that they would raise an RuntimeError
121        # when accessed. We "fix" this by pre-loading all modules here and only passing the ones that work.
122        all_modules_safe = {}
123        for mod in self.all_modules:
124            try:
125                all_modules_safe[mod] = doc.Module.from_name(mod)
126            except RuntimeError:
127                warnings.warn(f"Error importing {mod!r}:\n{traceback.format_exc()}")
128        return render.search_index(all_modules_safe)

Render the search index. For performance reasons this is always cached.

Inherited Members
http.server.HTTPServer
allow_reuse_address
server_bind
socketserver.TCPServer
address_family
socket_type
request_queue_size
allow_reuse_port
socket
server_activate
server_close
fileno
get_request
shutdown_request
close_request
socketserver.BaseServer
timeout
server_address
RequestHandlerClass
serve_forever
shutdown
service_actions
handle_request
handle_timeout
verify_request
process_request
finish_request
handle_error
class AllModules(typing.Mapping[str, pdoc.doc.Module]):
131class AllModules(Mapping[str, doc.Module]):
132    """A lazy-loading implementation of all_modules.
133
134    This behaves like a regular dict, but modules are only imported on demand for performance reasons.
135    This has the somewhat annoying side effect that __getitem__ may raise a RuntimeError.
136    We can ignore that when rendering HTML as the default templates do not access all_modules values,
137    but we need to perform additional steps for the search index.
138    """
139
140    def __init__(self, allowed_modules: Iterable[str]):
141        # use a dict to preserve order
142        self.allowed_modules: dict[str, None] = dict.fromkeys(allowed_modules)
143
144    def __len__(self) -> int:
145        return self.allowed_modules.__len__()
146
147    def __iter__(self) -> Iterator[str]:
148        return self.allowed_modules.__iter__()
149
150    def __contains__(self, item):
151        return self.allowed_modules.__contains__(item)
152
153    def __getitem__(self, item: str):
154        if item in self.allowed_modules:
155            return doc.Module.from_name(item)
156        else:  # pragma: no cover
157            raise KeyError(item)

A lazy-loading implementation of all_modules.

This behaves like a regular dict, but modules are only imported on demand for performance reasons. This has the somewhat annoying side effect that __getitem__ may raise a RuntimeError. We can ignore that when rendering HTML as the default templates do not access all_modules values, but we need to perform additional steps for the search index.

AllModules(allowed_modules: Iterable[str])
140    def __init__(self, allowed_modules: Iterable[str]):
141        # use a dict to preserve order
142        self.allowed_modules: dict[str, None] = dict.fromkeys(allowed_modules)
allowed_modules: dict[str, None]
Inherited Members
collections.abc.Mapping
get
keys
items
values
def open_browser(url: str) -> bool:
161def open_browser(url: str) -> bool:  # pragma: no cover
162    """
163    Open a URL in a browser window.
164    In contrast to `webbrowser.open`, we limit the list of suitable browsers.
165    This gracefully degrades to a no-op on headless servers, where `webbrowser.open`
166    would otherwise open lynx.
167
168    Returns:
169
170    - `True`, if a browser has been opened
171    - `False`, if no suitable browser has been found.
172    """
173    browsers = (
174        "windows-default",
175        "macosx",
176        "wslview %s",
177        "x-www-browser %s",
178        "gnome-open %s",
179        "google-chrome",
180        "chrome",
181        "chromium",
182        "chromium-browser",
183        "firefox",
184        "opera",
185        "safari",
186    )
187    for browser in browsers:
188        try:
189            b = webbrowser.get(browser)
190        except webbrowser.Error:
191            pass
192        else:
193            if b.open(url):
194                return True
195    return False

Open a URL in a browser window. In contrast to webbrowser.open, we limit the list of suitable browsers. This gracefully degrades to a no-op on headless servers, where webbrowser.open would otherwise open lynx.

Returns:

  • True, if a browser has been opened
  • False, if no suitable browser has been found.