Edit on GitHub

pdoc.web

This module implements pdoc's live-reloading webserver.

We want to keep the number of dependencies as small as possible, so we are content with the builtin http.server module. It is a bit unergonomic compared to let's say flask, but good enough for our purposes.

  1"""
  2This module implements pdoc's live-reloading webserver.
  3
  4We want to keep the number of dependencies as small as possible,
  5so we are content with the builtin `http.server` module.
  6It is a bit unergonomic compared to let's say flask, but good enough for our purposes.
  7"""
  8
  9from __future__ import annotations
 10
 11import http.server
 12import traceback
 13import warnings
 14import webbrowser
 15from collections.abc import Iterable, Iterator
 16from typing import Mapping
 17
 18from pdoc import doc, extract, render
 19from pdoc._compat import cache, removesuffix
 20
 21
 22class DocHandler(http.server.BaseHTTPRequestHandler):
 23    """A handler for individual requests."""
 24
 25    server: DocServer
 26    """A reference to the main web server."""
 27
 28    def do_HEAD(self):
 29        try:
 30            return self.handle_request()
 31        except ConnectionError:  # pragma: no cover
 32            pass
 33
 34    def do_GET(self):
 35        try:
 36            self.wfile.write(self.handle_request().encode())
 37        except ConnectionError:  # pragma: no cover
 38            pass
 39
 40    def handle_request(self) -> str | None:
 41        """Actually handle a request. Called by `do_HEAD` and `do_GET`."""
 42        path = self.path.split("?", 1)[0]
 43
 44        if path == "/" or path == "/index.html":
 45            out = render.html_index(self.server.all_modules)
 46        elif path == "/search.js":
 47            self.send_response(200)
 48            self.send_header("content-type", "application/javascript")
 49            self.end_headers()
 50            return self.server.render_search_index()
 51        else:
 52            module_name = removesuffix(path.lstrip("/"), ".html").replace("/", ".")
 53            if module_name not in self.server.all_modules:
 54                self.send_response(404)
 55                self.send_header("content-type", "text/html")
 56                self.end_headers()
 57                return render.html_error(error=f"Module {module_name!r} not found")
 58
 59            mtime = ""
 60            t = extract.module_mtime(module_name)
 61            if t:
 62                mtime = f"{t:.1f}"
 63            if "mtime=1" in self.path:
 64                self.send_response(200)
 65                self.send_header("content-type", "text/plain")
 66                self.end_headers()
 67                return mtime
 68
 69            try:
 70                extract.invalidate_caches(module_name)
 71                mod = self.server.all_modules[module_name]
 72                out = render.html_module(
 73                    module=mod,
 74                    all_modules=self.server.all_modules,
 75                    mtime=mtime,
 76                )
 77            except Exception:
 78                self.send_response(500)
 79                self.send_header("content-type", "text/html")
 80                self.end_headers()
 81                return render.html_error(
 82                    error=f"Error importing {module_name!r}",
 83                    details=traceback.format_exc(),
 84                )
 85
 86        self.send_response(200)
 87        self.send_header("content-type", "text/html")
 88        self.end_headers()
 89        return out
 90
 91    def log_request(self, code: int | str = ..., size: int | str = ...) -> None:
 92        """Override logging to disable it."""
 93        pass
 94
 95
 96class DocServer(http.server.HTTPServer):
 97    """pdoc's live-reloading web server"""
 98
 99    all_modules: AllModules
100
101    def __init__(self, addr: tuple[str, int], specs: list[str], **kwargs):
102        super().__init__(addr, DocHandler, **kwargs)  # type: ignore
103        module_names = extract.walk_specs(specs)
104        self.all_modules = AllModules(module_names)
105
106    @cache
107    def render_search_index(self) -> str:
108        """Render the search index. For performance reasons this is always cached."""
109        # Some modules may not be importable, which means that they would raise an RuntimeError
110        # when accessed. We "fix" this by pre-loading all modules here and only passing the ones that work.
111        all_modules_safe = {}
112        for mod in self.all_modules:
113            try:
114                all_modules_safe[mod] = doc.Module.from_name(mod)
115            except RuntimeError:
116                warnings.warn(f"Error importing {mod!r}:\n{traceback.format_exc()}")
117        return render.search_index(all_modules_safe)
118
119
120class AllModules(Mapping[str, doc.Module]):
121    """A lazy-loading implementation of all_modules.
122
123    This behaves like a regular dict, but modules are only imported on demand for performance reasons.
124    This has the somewhat annoying side effect that __getitem__ may raise a RuntimeError.
125    We can ignore that when rendering HTML as the default templates do not access all_modules values,
126    but we need to perform additional steps for the search index.
127    """
128
129    def __init__(self, allowed_modules: Iterable[str]):
130        # use a dict to preserve order
131        self.allowed_modules: dict[str, None] = dict.fromkeys(allowed_modules)
132
133    def __len__(self) -> int:
134        return self.allowed_modules.__len__()
135
136    def __iter__(self) -> Iterator[str]:
137        return self.allowed_modules.__iter__()
138
139    def __contains__(self, item):
140        return self.allowed_modules.__contains__(item)
141
142    def __getitem__(self, item: str):
143        if item in self.allowed_modules:
144            return doc.Module.from_name(item)
145        else:  # pragma: no cover
146            raise KeyError(item)
147
148
149# https://github.com/mitmproxy/mitmproxy/blob/af3dfac85541ce06c0e3302a4ba495fe3c77b18a/mitmproxy/tools/web/webaddons.py#L35-L61
150def open_browser(url: str) -> bool:  # pragma: no cover
151    """
152    Open a URL in a browser window.
153    In contrast to `webbrowser.open`, we limit the list of suitable browsers.
154    This gracefully degrades to a no-op on headless servers, where `webbrowser.open`
155    would otherwise open lynx.
156
157    Returns:
158
159    - `True`, if a browser has been opened
160    - `False`, if no suitable browser has been found.
161    """
162    browsers = (
163        "windows-default",
164        "macosx",
165        "wslview %s",
166        "x-www-browser %s",
167        "gnome-open %s",
168        "google-chrome",
169        "chrome",
170        "chromium",
171        "chromium-browser",
172        "firefox",
173        "opera",
174        "safari",
175    )
176    for browser in browsers:
177        try:
178            b = webbrowser.get(browser)
179        except webbrowser.Error:
180            pass
181        else:
182            if b.open(url):
183                return True
184    return False
class DocHandler(http.server.BaseHTTPRequestHandler):
23class DocHandler(http.server.BaseHTTPRequestHandler):
24    """A handler for individual requests."""
25
26    server: DocServer
27    """A reference to the main web server."""
28
29    def do_HEAD(self):
30        try:
31            return self.handle_request()
32        except ConnectionError:  # pragma: no cover
33            pass
34
35    def do_GET(self):
36        try:
37            self.wfile.write(self.handle_request().encode())
38        except ConnectionError:  # pragma: no cover
39            pass
40
41    def handle_request(self) -> str | None:
42        """Actually handle a request. Called by `do_HEAD` and `do_GET`."""
43        path = self.path.split("?", 1)[0]
44
45        if path == "/" or path == "/index.html":
46            out = render.html_index(self.server.all_modules)
47        elif path == "/search.js":
48            self.send_response(200)
49            self.send_header("content-type", "application/javascript")
50            self.end_headers()
51            return self.server.render_search_index()
52        else:
53            module_name = removesuffix(path.lstrip("/"), ".html").replace("/", ".")
54            if module_name not in self.server.all_modules:
55                self.send_response(404)
56                self.send_header("content-type", "text/html")
57                self.end_headers()
58                return render.html_error(error=f"Module {module_name!r} not found")
59
60            mtime = ""
61            t = extract.module_mtime(module_name)
62            if t:
63                mtime = f"{t:.1f}"
64            if "mtime=1" in self.path:
65                self.send_response(200)
66                self.send_header("content-type", "text/plain")
67                self.end_headers()
68                return mtime
69
70            try:
71                extract.invalidate_caches(module_name)
72                mod = self.server.all_modules[module_name]
73                out = render.html_module(
74                    module=mod,
75                    all_modules=self.server.all_modules,
76                    mtime=mtime,
77                )
78            except Exception:
79                self.send_response(500)
80                self.send_header("content-type", "text/html")
81                self.end_headers()
82                return render.html_error(
83                    error=f"Error importing {module_name!r}",
84                    details=traceback.format_exc(),
85                )
86
87        self.send_response(200)
88        self.send_header("content-type", "text/html")
89        self.end_headers()
90        return out
91
92    def log_request(self, code: int | str = ..., size: int | str = ...) -> None:
93        """Override logging to disable it."""
94        pass

A handler for individual requests.

A reference to the main web server.

def do_HEAD(self)
29    def do_HEAD(self):
30        try:
31            return self.handle_request()
32        except ConnectionError:  # pragma: no cover
33            pass
def do_GET(self)
35    def do_GET(self):
36        try:
37            self.wfile.write(self.handle_request().encode())
38        except ConnectionError:  # pragma: no cover
39            pass
def handle_request(self) -> str | None:
41    def handle_request(self) -> str | None:
42        """Actually handle a request. Called by `do_HEAD` and `do_GET`."""
43        path = self.path.split("?", 1)[0]
44
45        if path == "/" or path == "/index.html":
46            out = render.html_index(self.server.all_modules)
47        elif path == "/search.js":
48            self.send_response(200)
49            self.send_header("content-type", "application/javascript")
50            self.end_headers()
51            return self.server.render_search_index()
52        else:
53            module_name = removesuffix(path.lstrip("/"), ".html").replace("/", ".")
54            if module_name not in self.server.all_modules:
55                self.send_response(404)
56                self.send_header("content-type", "text/html")
57                self.end_headers()
58                return render.html_error(error=f"Module {module_name!r} not found")
59
60            mtime = ""
61            t = extract.module_mtime(module_name)
62            if t:
63                mtime = f"{t:.1f}"
64            if "mtime=1" in self.path:
65                self.send_response(200)
66                self.send_header("content-type", "text/plain")
67                self.end_headers()
68                return mtime
69
70            try:
71                extract.invalidate_caches(module_name)
72                mod = self.server.all_modules[module_name]
73                out = render.html_module(
74                    module=mod,
75                    all_modules=self.server.all_modules,
76                    mtime=mtime,
77                )
78            except Exception:
79                self.send_response(500)
80                self.send_header("content-type", "text/html")
81                self.end_headers()
82                return render.html_error(
83                    error=f"Error importing {module_name!r}",
84                    details=traceback.format_exc(),
85                )
86
87        self.send_response(200)
88        self.send_header("content-type", "text/html")
89        self.end_headers()
90        return out

Actually handle a request. Called by do_HEAD and do_GET.

def log_request(self, code: int | str = Ellipsis, size: int | str = Ellipsis) -> None:
92    def log_request(self, code: int | str = ..., size: int | str = ...) -> None:
93        """Override logging to disable it."""
94        pass

Override logging to disable it.

Inherited Members
socketserver.BaseRequestHandler
BaseRequestHandler
http.server.BaseHTTPRequestHandler
sys_version
server_version
error_message_format
error_content_type
default_request_version
parse_request
handle_expect_100
handle_one_request
handle
send_error
send_response
send_response_only
send_header
end_headers
flush_headers
log_error
log_message
version_string
date_time_string
log_date_time_string
weekdayname
monthname
address_string
protocol_version
MessageClass
responses
socketserver.StreamRequestHandler
rbufsize
wbufsize
timeout
disable_nagle_algorithm
setup
finish
class DocServer(http.server.HTTPServer):
 97class DocServer(http.server.HTTPServer):
 98    """pdoc's live-reloading web server"""
 99
100    all_modules: AllModules
101
102    def __init__(self, addr: tuple[str, int], specs: list[str], **kwargs):
103        super().__init__(addr, DocHandler, **kwargs)  # type: ignore
104        module_names = extract.walk_specs(specs)
105        self.all_modules = AllModules(module_names)
106
107    @cache
108    def render_search_index(self) -> str:
109        """Render the search index. For performance reasons this is always cached."""
110        # Some modules may not be importable, which means that they would raise an RuntimeError
111        # when accessed. We "fix" this by pre-loading all modules here and only passing the ones that work.
112        all_modules_safe = {}
113        for mod in self.all_modules:
114            try:
115                all_modules_safe[mod] = doc.Module.from_name(mod)
116            except RuntimeError:
117                warnings.warn(f"Error importing {mod!r}:\n{traceback.format_exc()}")
118        return render.search_index(all_modules_safe)

pdoc's live-reloading web server

DocServer(addr: tuple[str, int], specs: list[str], **kwargs)
102    def __init__(self, addr: tuple[str, int], specs: list[str], **kwargs):
103        super().__init__(addr, DocHandler, **kwargs)  # type: ignore
104        module_names = extract.walk_specs(specs)
105        self.all_modules = AllModules(module_names)

Constructor. May be extended, do not override.

@cache
def render_search_index(self) -> str:
107    @cache
108    def render_search_index(self) -> str:
109        """Render the search index. For performance reasons this is always cached."""
110        # Some modules may not be importable, which means that they would raise an RuntimeError
111        # when accessed. We "fix" this by pre-loading all modules here and only passing the ones that work.
112        all_modules_safe = {}
113        for mod in self.all_modules:
114            try:
115                all_modules_safe[mod] = doc.Module.from_name(mod)
116            except RuntimeError:
117                warnings.warn(f"Error importing {mod!r}:\n{traceback.format_exc()}")
118        return render.search_index(all_modules_safe)

Render the search index. For performance reasons this is always cached.

Inherited Members
http.server.HTTPServer
allow_reuse_address
server_bind
socketserver.TCPServer
address_family
socket_type
request_queue_size
server_activate
server_close
fileno
get_request
shutdown_request
close_request
socketserver.BaseServer
timeout
serve_forever
shutdown
service_actions
handle_request
handle_timeout
verify_request
process_request
finish_request
handle_error
class AllModules(typing.Mapping[str, pdoc.doc.Module]):
121class AllModules(Mapping[str, doc.Module]):
122    """A lazy-loading implementation of all_modules.
123
124    This behaves like a regular dict, but modules are only imported on demand for performance reasons.
125    This has the somewhat annoying side effect that __getitem__ may raise a RuntimeError.
126    We can ignore that when rendering HTML as the default templates do not access all_modules values,
127    but we need to perform additional steps for the search index.
128    """
129
130    def __init__(self, allowed_modules: Iterable[str]):
131        # use a dict to preserve order
132        self.allowed_modules: dict[str, None] = dict.fromkeys(allowed_modules)
133
134    def __len__(self) -> int:
135        return self.allowed_modules.__len__()
136
137    def __iter__(self) -> Iterator[str]:
138        return self.allowed_modules.__iter__()
139
140    def __contains__(self, item):
141        return self.allowed_modules.__contains__(item)
142
143    def __getitem__(self, item: str):
144        if item in self.allowed_modules:
145            return doc.Module.from_name(item)
146        else:  # pragma: no cover
147            raise KeyError(item)

A lazy-loading implementation of all_modules.

This behaves like a regular dict, but modules are only imported on demand for performance reasons. This has the somewhat annoying side effect that __getitem__ may raise a RuntimeError. We can ignore that when rendering HTML as the default templates do not access all_modules values, but we need to perform additional steps for the search index.

AllModules(allowed_modules: collections.abc.Iterable[str])
130    def __init__(self, allowed_modules: Iterable[str]):
131        # use a dict to preserve order
132        self.allowed_modules: dict[str, None] = dict.fromkeys(allowed_modules)
Inherited Members
collections.abc.Mapping
get
keys
items
values
def open_browser(url: str) -> bool:
151def open_browser(url: str) -> bool:  # pragma: no cover
152    """
153    Open a URL in a browser window.
154    In contrast to `webbrowser.open`, we limit the list of suitable browsers.
155    This gracefully degrades to a no-op on headless servers, where `webbrowser.open`
156    would otherwise open lynx.
157
158    Returns:
159
160    - `True`, if a browser has been opened
161    - `False`, if no suitable browser has been found.
162    """
163    browsers = (
164        "windows-default",
165        "macosx",
166        "wslview %s",
167        "x-www-browser %s",
168        "gnome-open %s",
169        "google-chrome",
170        "chrome",
171        "chromium",
172        "chromium-browser",
173        "firefox",
174        "opera",
175        "safari",
176    )
177    for browser in browsers:
178        try:
179            b = webbrowser.get(browser)
180        except webbrowser.Error:
181            pass
182        else:
183            if b.open(url):
184                return True
185    return False

Open a URL in a browser window. In contrast to webbrowser.open, we limit the list of suitable browsers. This gracefully degrades to a no-op on headless servers, where webbrowser.open would otherwise open lynx.

Returns:

  • True, if a browser has been opened
  • False, if no suitable browser has been found.