Edit on GitHub

pdoc.web

This module implements pdoc's live-reloading webserver.

We want to keep the number of dependencies as small as possible, so we are content with the builtin http.server module. It is a bit unergonomic compared to let's say flask, but good enough for our purposes.

View Source
"""
This module implements pdoc's live-reloading webserver.

We want to keep the number of dependencies as small as possible,
so we are content with the builtin `http.server` module.
It is a bit unergonomic compared to let's say flask, but good enough for our purposes.
"""

from __future__ import annotations

import http.server
import importlib.util
import pkgutil
import sysconfig
import traceback
import webbrowser
from typing import Collection, Optional, Union

from pdoc import doc, extract, render
from pdoc._compat import cache, removesuffix


class DocHandler(http.server.BaseHTTPRequestHandler):
    """A handler for individual requests."""

    server: "DocServer"
    """A reference to the main web server."""

    def do_HEAD(self):
        try:
            return self.handle_request()
        except ConnectionError:  # pragma: no cover
            pass

    def do_GET(self):
        try:
            self.wfile.write(self.handle_request().encode())
        except ConnectionError:  # pragma: no cover
            pass

    def handle_request(self) -> Optional[str]:
        """Actually handle a request. Called by `do_HEAD` and `do_GET`."""
        path = self.path.split("?", 1)[0]

        if path == "/":
            out = render.html_index(self.server.all_modules)
        else:
            module = removesuffix(path.lstrip("/"), ".html").replace("/", ".")
            if module not in self.server.all_modules:
                self.send_response(404)
                self.send_header("content-type", "text/html")
                self.end_headers()
                return render.html_error(error=f"Module {module!r} not found")

            mtime = ""
            t = extract.module_mtime(module)
            if t:
                mtime = f"{t:.1f}"
            if "mtime=1" in self.path:
                self.send_response(200)
                self.send_header("content-type", "text/plain")
                self.end_headers()
                return mtime

            try:
                extract.invalidate_caches(module)
                mod = doc.Module(extract.load_module(module))
            except Exception:
                self.send_response(500)
                self.send_header("content-type", "text/html")
                self.end_headers()
                return render.html_error(
                    error=f"Error importing {module!r}",
                    details=traceback.format_exc(),
                )
            out = render.html_module(
                module=mod,
                all_modules=self.server.all_modules,
                mtime=mtime,
            )

        self.send_response(200)
        self.send_header("content-type", "text/html")
        self.end_headers()
        return out

    def log_request(
        self, code: Union[int, str] = ..., size: Union[int, str] = ...
    ) -> None:
        """Override logging to disable it."""
        pass


class DocServer(http.server.HTTPServer):
    """pdoc's live-reloading web server"""

    all_modules: Collection[str]

    def __init__(
        self,
        addr: tuple[str, int],
        all_modules: Collection[str],
    ):
        super().__init__(addr, DocHandler)
        self.all_modules = all_modules


# https://github.com/mitmproxy/mitmproxy/blob/af3dfac85541ce06c0e3302a4ba495fe3c77b18a/mitmproxy/tools/web/webaddons.py#L35-L61
def open_browser(url: str) -> bool:  # pragma: no cover
    """
    Open a URL in a browser window.
    In contrast to `webbrowser.open`, we limit the list of suitable browsers.
    This gracefully degrades to a no-op on headless servers, where `webbrowser.open`
    would otherwise open lynx.

    Returns:

    - `True`, if a browser has been opened
    - `False`, if no suitable browser has been found.
    """
    browsers = (
        "windows-default",
        "macosx",
        "wslview %s",
        "x-www-browser %s",
        "gnome-open %s",
        "google-chrome",
        "chrome",
        "chromium",
        "chromium-browser",
        "firefox",
        "opera",
        "safari",
    )
    for browser in browsers:
        try:
            b = webbrowser.get(browser)
        except webbrowser.Error:
            pass
        else:
            if b.open(url):
                return True
    return False


class AllModules(Collection[str]):
    """
    A fake collection that contains all modules installed by the user.
    This is used when `pdoc` is invoked without any arguments,
    using `pkgutil.walk_packages` would take multiple seconds.

    When being __iter__ated, it returns the list of all top-level modules,
    but it __contains__ all submodules as well.
    """

    def __init__(self):
        root_modules = []
        stdlib = sysconfig.get_path("stdlib").lower()
        platstdlib = sysconfig.get_path("platstdlib").lower()
        for m in pkgutil.iter_modules():
            if m.name.startswith("_") or m.name[0].isdigit():
                continue
            if getattr(m.module_finder, "path", "").lower() in (stdlib, platstdlib):
                continue
            root_modules.append(m.name)
        self._root_mods: dict[str, None] = dict.fromkeys(sorted(root_modules))

    def __iter__(self):
        return self._root_mods.__iter__()

    def __len__(self):
        return self._root_mods.__len__()

    @cache
    def __contains__(self, modname):
        if modname.split(".", maxsplit=1)[0] not in self._root_mods:
            return False
        try:
            with extract.mock_some_common_side_effects():
                modspec = importlib.util.find_spec(modname)
            if modspec is None:
                raise ModuleNotFoundError(modname)
        except extract.AnyException:
            return False
        else:
            return True
#   class DocHandler(http.server.BaseHTTPRequestHandler):
View Source
class DocHandler(http.server.BaseHTTPRequestHandler):
    """A handler for individual requests."""

    server: "DocServer"
    """A reference to the main web server."""

    def do_HEAD(self):
        try:
            return self.handle_request()
        except ConnectionError:  # pragma: no cover
            pass

    def do_GET(self):
        try:
            self.wfile.write(self.handle_request().encode())
        except ConnectionError:  # pragma: no cover
            pass

    def handle_request(self) -> Optional[str]:
        """Actually handle a request. Called by `do_HEAD` and `do_GET`."""
        path = self.path.split("?", 1)[0]

        if path == "/":
            out = render.html_index(self.server.all_modules)
        else:
            module = removesuffix(path.lstrip("/"), ".html").replace("/", ".")
            if module not in self.server.all_modules:
                self.send_response(404)
                self.send_header("content-type", "text/html")
                self.end_headers()
                return render.html_error(error=f"Module {module!r} not found")

            mtime = ""
            t = extract.module_mtime(module)
            if t:
                mtime = f"{t:.1f}"
            if "mtime=1" in self.path:
                self.send_response(200)
                self.send_header("content-type", "text/plain")
                self.end_headers()
                return mtime

            try:
                extract.invalidate_caches(module)
                mod = doc.Module(extract.load_module(module))
            except Exception:
                self.send_response(500)
                self.send_header("content-type", "text/html")
                self.end_headers()
                return render.html_error(
                    error=f"Error importing {module!r}",
                    details=traceback.format_exc(),
                )
            out = render.html_module(
                module=mod,
                all_modules=self.server.all_modules,
                mtime=mtime,
            )

        self.send_response(200)
        self.send_header("content-type", "text/html")
        self.end_headers()
        return out

    def log_request(
        self, code: Union[int, str] = ..., size: Union[int, str] = ...
    ) -> None:
        """Override logging to disable it."""
        pass

A handler for individual requests.

A reference to the main web server.

#   def do_HEAD(self):
View Source
    def do_HEAD(self):
        try:
            return self.handle_request()
        except ConnectionError:  # pragma: no cover
            pass
#   def do_GET(self):
View Source
    def do_GET(self):
        try:
            self.wfile.write(self.handle_request().encode())
        except ConnectionError:  # pragma: no cover
            pass
#   def handle_request(self) -> Optional[str]:
View Source
    def handle_request(self) -> Optional[str]:
        """Actually handle a request. Called by `do_HEAD` and `do_GET`."""
        path = self.path.split("?", 1)[0]

        if path == "/":
            out = render.html_index(self.server.all_modules)
        else:
            module = removesuffix(path.lstrip("/"), ".html").replace("/", ".")
            if module not in self.server.all_modules:
                self.send_response(404)
                self.send_header("content-type", "text/html")
                self.end_headers()
                return render.html_error(error=f"Module {module!r} not found")

            mtime = ""
            t = extract.module_mtime(module)
            if t:
                mtime = f"{t:.1f}"
            if "mtime=1" in self.path:
                self.send_response(200)
                self.send_header("content-type", "text/plain")
                self.end_headers()
                return mtime

            try:
                extract.invalidate_caches(module)
                mod = doc.Module(extract.load_module(module))
            except Exception:
                self.send_response(500)
                self.send_header("content-type", "text/html")
                self.end_headers()
                return render.html_error(
                    error=f"Error importing {module!r}",
                    details=traceback.format_exc(),
                )
            out = render.html_module(
                module=mod,
                all_modules=self.server.all_modules,
                mtime=mtime,
            )

        self.send_response(200)
        self.send_header("content-type", "text/html")
        self.end_headers()
        return out

Actually handle a request. Called by do_HEAD and do_GET.

#   def log_request( self, code: Union[int, str] = Ellipsis, size: Union[int, str] = Ellipsis ) -> NoneType:
View Source
    def log_request(
        self, code: Union[int, str] = ..., size: Union[int, str] = ...
    ) -> None:
        """Override logging to disable it."""
        pass

Override logging to disable it.

Inherited Members
socketserver.BaseRequestHandler
BaseRequestHandler
http.server.BaseHTTPRequestHandler
sys_version
server_version
error_message_format
error_content_type
default_request_version
parse_request
handle_expect_100
handle_one_request
handle
send_error
send_response
send_response_only
send_header
end_headers
flush_headers
log_error
log_message
version_string
date_time_string
log_date_time_string
weekdayname
monthname
address_string
protocol_version
MessageClass
responses
socketserver.StreamRequestHandler
rbufsize
wbufsize
timeout
disable_nagle_algorithm
setup
finish
#   class DocServer(http.server.HTTPServer):
View Source
class DocServer(http.server.HTTPServer):
    """pdoc's live-reloading web server"""

    all_modules: Collection[str]

    def __init__(
        self,
        addr: tuple[str, int],
        all_modules: Collection[str],
    ):
        super().__init__(addr, DocHandler)
        self.all_modules = all_modules

pdoc's live-reloading web server

#   DocServer(addr: tuple, all_modules: Collection[str])
View Source
    def __init__(
        self,
        addr: tuple[str, int],
        all_modules: Collection[str],
    ):
        super().__init__(addr, DocHandler)
        self.all_modules = all_modules

Constructor. May be extended, do not override.

#   all_modules: Collection[str]
Inherited Members
http.server.HTTPServer
allow_reuse_address
server_bind
socketserver.TCPServer
address_family
socket_type
request_queue_size
server_activate
server_close
fileno
get_request
shutdown_request
close_request
socketserver.BaseServer
timeout
serve_forever
shutdown
service_actions
handle_request
handle_timeout
verify_request
process_request
finish_request
handle_error
#   def open_browser(url: str) -> bool:
View Source
def open_browser(url: str) -> bool:  # pragma: no cover
    """
    Open a URL in a browser window.
    In contrast to `webbrowser.open`, we limit the list of suitable browsers.
    This gracefully degrades to a no-op on headless servers, where `webbrowser.open`
    would otherwise open lynx.

    Returns:

    - `True`, if a browser has been opened
    - `False`, if no suitable browser has been found.
    """
    browsers = (
        "windows-default",
        "macosx",
        "wslview %s",
        "x-www-browser %s",
        "gnome-open %s",
        "google-chrome",
        "chrome",
        "chromium",
        "chromium-browser",
        "firefox",
        "opera",
        "safari",
    )
    for browser in browsers:
        try:
            b = webbrowser.get(browser)
        except webbrowser.Error:
            pass
        else:
            if b.open(url):
                return True
    return False

Open a URL in a browser window. In contrast to webbrowser.open, we limit the list of suitable browsers. This gracefully degrades to a no-op on headless servers, where webbrowser.open would otherwise open lynx.

Returns:

  • True, if a browser has been opened
  • False, if no suitable browser has been found.
#   class AllModules(typing.Collection[str]):
View Source
class AllModules(Collection[str]):
    """
    A fake collection that contains all modules installed by the user.
    This is used when `pdoc` is invoked without any arguments,
    using `pkgutil.walk_packages` would take multiple seconds.

    When being __iter__ated, it returns the list of all top-level modules,
    but it __contains__ all submodules as well.
    """

    def __init__(self):
        root_modules = []
        stdlib = sysconfig.get_path("stdlib").lower()
        platstdlib = sysconfig.get_path("platstdlib").lower()
        for m in pkgutil.iter_modules():
            if m.name.startswith("_") or m.name[0].isdigit():
                continue
            if getattr(m.module_finder, "path", "").lower() in (stdlib, platstdlib):
                continue
            root_modules.append(m.name)
        self._root_mods: dict[str, None] = dict.fromkeys(sorted(root_modules))

    def __iter__(self):
        return self._root_mods.__iter__()

    def __len__(self):
        return self._root_mods.__len__()

    @cache
    def __contains__(self, modname):
        if modname.split(".", maxsplit=1)[0] not in self._root_mods:
            return False
        try:
            with extract.mock_some_common_side_effects():
                modspec = importlib.util.find_spec(modname)
            if modspec is None:
                raise ModuleNotFoundError(modname)
        except extract.AnyException:
            return False
        else:
            return True

A fake collection that contains all modules installed by the user. This is used when pdoc is invoked without any arguments, using pkgutil.walk_packages would take multiple seconds.

When being __iter__ated, it returns the list of all top-level modules, but it __contains__ all submodules as well.

#   AllModules()
View Source
    def __init__(self):
        root_modules = []
        stdlib = sysconfig.get_path("stdlib").lower()
        platstdlib = sysconfig.get_path("platstdlib").lower()
        for m in pkgutil.iter_modules():
            if m.name.startswith("_") or m.name[0].isdigit():
                continue
            if getattr(m.module_finder, "path", "").lower() in (stdlib, platstdlib):
                continue
            root_modules.append(m.name)
        self._root_mods: dict[str, None] = dict.fromkeys(sorted(root_modules))