Edit on GitHub

pdoc.web

This module implements pdoc's live-reloading webserver.

We want to keep the number of dependencies as small as possible, so we are content with the builtin http.server module. It is a bit unergonomic compared to let's say flask, but good enough for our purposes.

  1"""
  2This module implements pdoc's live-reloading webserver.
  3
  4We want to keep the number of dependencies as small as possible,
  5so we are content with the builtin `http.server` module.
  6It is a bit unergonomic compared to let's say flask, but good enough for our purposes.
  7"""
  8from __future__ import annotations
  9
 10import http.server
 11import traceback
 12import warnings
 13import webbrowser
 14from collections.abc import Iterable
 15from collections.abc import Iterator
 16from typing import Mapping
 17
 18from pdoc import doc
 19from pdoc import extract
 20from pdoc import render
 21from pdoc._compat import cache
 22from pdoc._compat import removesuffix
 23
 24
 25class DocHandler(http.server.BaseHTTPRequestHandler):
 26    """A handler for individual requests."""
 27
 28    server: DocServer
 29    """A reference to the main web server."""
 30
 31    def do_HEAD(self):
 32        try:
 33            return self.handle_request()
 34        except ConnectionError:  # pragma: no cover
 35            pass
 36
 37    def do_GET(self):
 38        try:
 39            self.wfile.write(self.handle_request().encode())
 40        except ConnectionError:  # pragma: no cover
 41            pass
 42
 43    def handle_request(self) -> str | None:
 44        """Actually handle a request. Called by `do_HEAD` and `do_GET`."""
 45        path = self.path.split("?", 1)[0]
 46
 47        if path == "/" or path == "/index.html":
 48            out = render.html_index(self.server.all_modules)
 49        elif path == "/search.js":
 50            self.send_response(200)
 51            self.send_header("content-type", "application/javascript")
 52            self.end_headers()
 53            return self.server.render_search_index()
 54        else:
 55            module_name = removesuffix(path.lstrip("/"), ".html").replace("/", ".")
 56            if module_name not in self.server.all_modules:
 57                self.send_response(404)
 58                self.send_header("content-type", "text/html")
 59                self.end_headers()
 60                return render.html_error(error=f"Module {module_name!r} not found")
 61
 62            mtime = ""
 63            t = extract.module_mtime(module_name)
 64            if t:
 65                mtime = f"{t:.1f}"
 66            if "mtime=1" in self.path:
 67                self.send_response(200)
 68                self.send_header("content-type", "text/plain")
 69                self.end_headers()
 70                return mtime
 71
 72            try:
 73                extract.invalidate_caches(module_name)
 74                mod = self.server.all_modules[module_name]
 75                out = render.html_module(
 76                    module=mod,
 77                    all_modules=self.server.all_modules,
 78                    mtime=mtime,
 79                )
 80            except Exception:
 81                self.send_response(500)
 82                self.send_header("content-type", "text/html")
 83                self.end_headers()
 84                return render.html_error(
 85                    error=f"Error importing {module_name!r}",
 86                    details=traceback.format_exc(),
 87                )
 88
 89        self.send_response(200)
 90        self.send_header("content-type", "text/html")
 91        self.end_headers()
 92        return out
 93
 94    def log_request(self, code: int | str = ..., size: int | str = ...) -> None:
 95        """Override logging to disable it."""
 96
 97
 98class DocServer(http.server.HTTPServer):
 99    """pdoc's live-reloading web server"""
100
101    all_modules: AllModules
102
103    def __init__(self, addr: tuple[str, int], specs: list[str], **kwargs):
104        super().__init__(addr, DocHandler, **kwargs)  # type: ignore
105        module_names = extract.walk_specs(specs)
106        self.all_modules = AllModules(module_names)
107
108    @cache
109    def render_search_index(self) -> str:
110        """Render the search index. For performance reasons this is always cached."""
111        # Some modules may not be importable, which means that they would raise an RuntimeError
112        # when accessed. We "fix" this by pre-loading all modules here and only passing the ones that work.
113        all_modules_safe = {}
114        for mod in self.all_modules:
115            try:
116                all_modules_safe[mod] = doc.Module.from_name(mod)
117            except RuntimeError:
118                warnings.warn(f"Error importing {mod!r}:\n{traceback.format_exc()}")
119        return render.search_index(all_modules_safe)
120
121
122class AllModules(Mapping[str, doc.Module]):
123    """A lazy-loading implementation of all_modules.
124
125    This behaves like a regular dict, but modules are only imported on demand for performance reasons.
126    This has the somewhat annoying side effect that __getitem__ may raise a RuntimeError.
127    We can ignore that when rendering HTML as the default templates do not access all_modules values,
128    but we need to perform additional steps for the search index.
129    """
130
131    def __init__(self, allowed_modules: Iterable[str]):
132        # use a dict to preserve order
133        self.allowed_modules: dict[str, None] = dict.fromkeys(allowed_modules)
134
135    def __len__(self) -> int:
136        return self.allowed_modules.__len__()
137
138    def __iter__(self) -> Iterator[str]:
139        return self.allowed_modules.__iter__()
140
141    def __contains__(self, item):
142        return self.allowed_modules.__contains__(item)
143
144    def __getitem__(self, item: str):
145        if item in self.allowed_modules:
146            return doc.Module.from_name(item)
147        else:  # pragma: no cover
148            raise KeyError(item)
149
150
151# https://github.com/mitmproxy/mitmproxy/blob/af3dfac85541ce06c0e3302a4ba495fe3c77b18a/mitmproxy/tools/web/webaddons.py#L35-L61
152def open_browser(url: str) -> bool:  # pragma: no cover
153    """
154    Open a URL in a browser window.
155    In contrast to `webbrowser.open`, we limit the list of suitable browsers.
156    This gracefully degrades to a no-op on headless servers, where `webbrowser.open`
157    would otherwise open lynx.
158
159    Returns:
160
161    - `True`, if a browser has been opened
162    - `False`, if no suitable browser has been found.
163    """
164    browsers = (
165        "windows-default",
166        "macosx",
167        "wslview %s",
168        "x-www-browser %s",
169        "gnome-open %s",
170        "google-chrome",
171        "chrome",
172        "chromium",
173        "chromium-browser",
174        "firefox",
175        "opera",
176        "safari",
177    )
178    for browser in browsers:
179        try:
180            b = webbrowser.get(browser)
181        except webbrowser.Error:
182            pass
183        else:
184            if b.open(url):
185                return True
186    return False
class DocHandler(http.server.BaseHTTPRequestHandler):
26class DocHandler(http.server.BaseHTTPRequestHandler):
27    """A handler for individual requests."""
28
29    server: DocServer
30    """A reference to the main web server."""
31
32    def do_HEAD(self):
33        try:
34            return self.handle_request()
35        except ConnectionError:  # pragma: no cover
36            pass
37
38    def do_GET(self):
39        try:
40            self.wfile.write(self.handle_request().encode())
41        except ConnectionError:  # pragma: no cover
42            pass
43
44    def handle_request(self) -> str | None:
45        """Actually handle a request. Called by `do_HEAD` and `do_GET`."""
46        path = self.path.split("?", 1)[0]
47
48        if path == "/" or path == "/index.html":
49            out = render.html_index(self.server.all_modules)
50        elif path == "/search.js":
51            self.send_response(200)
52            self.send_header("content-type", "application/javascript")
53            self.end_headers()
54            return self.server.render_search_index()
55        else:
56            module_name = removesuffix(path.lstrip("/"), ".html").replace("/", ".")
57            if module_name not in self.server.all_modules:
58                self.send_response(404)
59                self.send_header("content-type", "text/html")
60                self.end_headers()
61                return render.html_error(error=f"Module {module_name!r} not found")
62
63            mtime = ""
64            t = extract.module_mtime(module_name)
65            if t:
66                mtime = f"{t:.1f}"
67            if "mtime=1" in self.path:
68                self.send_response(200)
69                self.send_header("content-type", "text/plain")
70                self.end_headers()
71                return mtime
72
73            try:
74                extract.invalidate_caches(module_name)
75                mod = self.server.all_modules[module_name]
76                out = render.html_module(
77                    module=mod,
78                    all_modules=self.server.all_modules,
79                    mtime=mtime,
80                )
81            except Exception:
82                self.send_response(500)
83                self.send_header("content-type", "text/html")
84                self.end_headers()
85                return render.html_error(
86                    error=f"Error importing {module_name!r}",
87                    details=traceback.format_exc(),
88                )
89
90        self.send_response(200)
91        self.send_header("content-type", "text/html")
92        self.end_headers()
93        return out
94
95    def log_request(self, code: int | str = ..., size: int | str = ...) -> None:
96        """Override logging to disable it."""

A handler for individual requests.

A reference to the main web server.

def do_HEAD(self):
32    def do_HEAD(self):
33        try:
34            return self.handle_request()
35        except ConnectionError:  # pragma: no cover
36            pass
def do_GET(self):
38    def do_GET(self):
39        try:
40            self.wfile.write(self.handle_request().encode())
41        except ConnectionError:  # pragma: no cover
42            pass
def handle_request(self) -> str | None:
44    def handle_request(self) -> str | None:
45        """Actually handle a request. Called by `do_HEAD` and `do_GET`."""
46        path = self.path.split("?", 1)[0]
47
48        if path == "/" or path == "/index.html":
49            out = render.html_index(self.server.all_modules)
50        elif path == "/search.js":
51            self.send_response(200)
52            self.send_header("content-type", "application/javascript")
53            self.end_headers()
54            return self.server.render_search_index()
55        else:
56            module_name = removesuffix(path.lstrip("/"), ".html").replace("/", ".")
57            if module_name not in self.server.all_modules:
58                self.send_response(404)
59                self.send_header("content-type", "text/html")
60                self.end_headers()
61                return render.html_error(error=f"Module {module_name!r} not found")
62
63            mtime = ""
64            t = extract.module_mtime(module_name)
65            if t:
66                mtime = f"{t:.1f}"
67            if "mtime=1" in self.path:
68                self.send_response(200)
69                self.send_header("content-type", "text/plain")
70                self.end_headers()
71                return mtime
72
73            try:
74                extract.invalidate_caches(module_name)
75                mod = self.server.all_modules[module_name]
76                out = render.html_module(
77                    module=mod,
78                    all_modules=self.server.all_modules,
79                    mtime=mtime,
80                )
81            except Exception:
82                self.send_response(500)
83                self.send_header("content-type", "text/html")
84                self.end_headers()
85                return render.html_error(
86                    error=f"Error importing {module_name!r}",
87                    details=traceback.format_exc(),
88                )
89
90        self.send_response(200)
91        self.send_header("content-type", "text/html")
92        self.end_headers()
93        return out

Actually handle a request. Called by do_HEAD and do_GET.

def log_request(self, code: int | str = Ellipsis, size: int | str = Ellipsis) -> None:
95    def log_request(self, code: int | str = ..., size: int | str = ...) -> None:
96        """Override logging to disable it."""

Override logging to disable it.

Inherited Members
socketserver.BaseRequestHandler
BaseRequestHandler
http.server.BaseHTTPRequestHandler
parse_request
handle_expect_100
handle_one_request
handle
send_error
send_response
send_response_only
send_header
end_headers
flush_headers
log_error
log_message
version_string
date_time_string
log_date_time_string
address_string
socketserver.StreamRequestHandler
setup
finish
class DocServer(http.server.HTTPServer):
 99class DocServer(http.server.HTTPServer):
100    """pdoc's live-reloading web server"""
101
102    all_modules: AllModules
103
104    def __init__(self, addr: tuple[str, int], specs: list[str], **kwargs):
105        super().__init__(addr, DocHandler, **kwargs)  # type: ignore
106        module_names = extract.walk_specs(specs)
107        self.all_modules = AllModules(module_names)
108
109    @cache
110    def render_search_index(self) -> str:
111        """Render the search index. For performance reasons this is always cached."""
112        # Some modules may not be importable, which means that they would raise an RuntimeError
113        # when accessed. We "fix" this by pre-loading all modules here and only passing the ones that work.
114        all_modules_safe = {}
115        for mod in self.all_modules:
116            try:
117                all_modules_safe[mod] = doc.Module.from_name(mod)
118            except RuntimeError:
119                warnings.warn(f"Error importing {mod!r}:\n{traceback.format_exc()}")
120        return render.search_index(all_modules_safe)

pdoc's live-reloading web server

DocServer(addr: tuple[str, int], specs: list[str], **kwargs)
104    def __init__(self, addr: tuple[str, int], specs: list[str], **kwargs):
105        super().__init__(addr, DocHandler, **kwargs)  # type: ignore
106        module_names = extract.walk_specs(specs)
107        self.all_modules = AllModules(module_names)

Constructor. May be extended, do not override.

@cache
def render_search_index(self) -> str:
109    @cache
110    def render_search_index(self) -> str:
111        """Render the search index. For performance reasons this is always cached."""
112        # Some modules may not be importable, which means that they would raise an RuntimeError
113        # when accessed. We "fix" this by pre-loading all modules here and only passing the ones that work.
114        all_modules_safe = {}
115        for mod in self.all_modules:
116            try:
117                all_modules_safe[mod] = doc.Module.from_name(mod)
118            except RuntimeError:
119                warnings.warn(f"Error importing {mod!r}:\n{traceback.format_exc()}")
120        return render.search_index(all_modules_safe)

Render the search index. For performance reasons this is always cached.

Inherited Members
http.server.HTTPServer
server_bind
socketserver.TCPServer
address_family
socket_type
server_activate
server_close
fileno
get_request
shutdown_request
close_request
socketserver.BaseServer
serve_forever
shutdown
service_actions
handle_request
handle_timeout
verify_request
process_request
finish_request
handle_error
class AllModules(typing.Mapping[str, pdoc.doc.Module]):
123class AllModules(Mapping[str, doc.Module]):
124    """A lazy-loading implementation of all_modules.
125
126    This behaves like a regular dict, but modules are only imported on demand for performance reasons.
127    This has the somewhat annoying side effect that __getitem__ may raise a RuntimeError.
128    We can ignore that when rendering HTML as the default templates do not access all_modules values,
129    but we need to perform additional steps for the search index.
130    """
131
132    def __init__(self, allowed_modules: Iterable[str]):
133        # use a dict to preserve order
134        self.allowed_modules: dict[str, None] = dict.fromkeys(allowed_modules)
135
136    def __len__(self) -> int:
137        return self.allowed_modules.__len__()
138
139    def __iter__(self) -> Iterator[str]:
140        return self.allowed_modules.__iter__()
141
142    def __contains__(self, item):
143        return self.allowed_modules.__contains__(item)
144
145    def __getitem__(self, item: str):
146        if item in self.allowed_modules:
147            return doc.Module.from_name(item)
148        else:  # pragma: no cover
149            raise KeyError(item)

A lazy-loading implementation of all_modules.

This behaves like a regular dict, but modules are only imported on demand for performance reasons. This has the somewhat annoying side effect that __getitem__ may raise a RuntimeError. We can ignore that when rendering HTML as the default templates do not access all_modules values, but we need to perform additional steps for the search index.

AllModules(allowed_modules: collections.abc.Iterable[str])
132    def __init__(self, allowed_modules: Iterable[str]):
133        # use a dict to preserve order
134        self.allowed_modules: dict[str, None] = dict.fromkeys(allowed_modules)
Inherited Members
collections.abc.Mapping
get
keys
items
values
def open_browser(url: str) -> bool:
153def open_browser(url: str) -> bool:  # pragma: no cover
154    """
155    Open a URL in a browser window.
156    In contrast to `webbrowser.open`, we limit the list of suitable browsers.
157    This gracefully degrades to a no-op on headless servers, where `webbrowser.open`
158    would otherwise open lynx.
159
160    Returns:
161
162    - `True`, if a browser has been opened
163    - `False`, if no suitable browser has been found.
164    """
165    browsers = (
166        "windows-default",
167        "macosx",
168        "wslview %s",
169        "x-www-browser %s",
170        "gnome-open %s",
171        "google-chrome",
172        "chrome",
173        "chromium",
174        "chromium-browser",
175        "firefox",
176        "opera",
177        "safari",
178    )
179    for browser in browsers:
180        try:
181            b = webbrowser.get(browser)
182        except webbrowser.Error:
183            pass
184        else:
185            if b.open(url):
186                return True
187    return False

Open a URL in a browser window. In contrast to webbrowser.open, we limit the list of suitable browsers. This gracefully degrades to a no-op on headless servers, where webbrowser.open would otherwise open lynx.

Returns:

  • True, if a browser has been opened
  • False, if no suitable browser has been found.