Source code for sphinx.environment

"""
    sphinx.environment
    ~~~~~~~~~~~~~~~~~~

    Global creation environment.

    :copyright: Copyright 2007-2021 by the Sphinx team, see AUTHORS.
    :license: BSD, see LICENSE for details.
"""

import os
import pickle
import warnings
from collections import defaultdict
from copy import copy
from datetime import datetime
from os import path
from typing import (TYPE_CHECKING, Any, Callable, Dict, Generator, Iterator, List, Optional,
                    Set, Tuple, Union)

from docutils import nodes
from docutils.nodes import Node

from sphinx import addnodes
from sphinx.config import Config
from sphinx.deprecation import RemovedInSphinx60Warning
from sphinx.domains import Domain
from sphinx.environment.adapters.toctree import TocTree
from sphinx.errors import BuildEnvironmentError, DocumentError, ExtensionError, SphinxError
from sphinx.events import EventManager
from sphinx.locale import __
from sphinx.project import Project
from sphinx.transforms import SphinxTransformer
from sphinx.util import DownloadFiles, FilenameUniqDict, logging
from sphinx.util.docutils import LoggingReporter
from sphinx.util.i18n import CatalogRepository, docname_to_domain
from sphinx.util.nodes import is_translatable
from sphinx.util.osutil import canon_path, os_path

if TYPE_CHECKING:
    from sphinx.application import Sphinx
    from sphinx.builders import Builder


logger = logging.getLogger(__name__)

default_settings: Dict[str, Any] = {
    'embed_stylesheet': False,
    'cloak_email_addresses': True,
    'pep_base_url': 'https://www.python.org/dev/peps/',
    'pep_references': None,
    'rfc_base_url': 'https://tools.ietf.org/html/',
    'rfc_references': None,
    'input_encoding': 'utf-8-sig',
    'doctitle_xform': False,
    'sectsubtitle_xform': False,
    'halt_level': 5,
    'file_insertion_enabled': True,
    'smartquotes_locales': [],
}

# This is increased every time an environment attribute is added
# or changed to properly invalidate pickle files.
ENV_VERSION = 56

# config status
CONFIG_OK = 1
CONFIG_NEW = 2
CONFIG_CHANGED = 3
CONFIG_EXTENSIONS_CHANGED = 4

CONFIG_CHANGED_REASON = {
    CONFIG_NEW: __('new config'),
    CONFIG_CHANGED: __('config changed'),
    CONFIG_EXTENSIONS_CHANGED: __('extensions changed'),
}


versioning_conditions: Dict[str, Union[bool, Callable]] = {
    'none': False,
    'text': is_translatable,
}


[docs]class BuildEnvironment:
    """
    The environment in which the ReST files are translated.
    Stores an inventory of cross-file targets and provides doctree
    transformations to resolve links to them.
    """

    domains: Dict[str, Domain]

    # --------- ENVIRONMENT INITIALIZATION -------------------------------------

    def __init__(self, app: "Sphinx" = None):
        self.app: Sphinx = None
        self.doctreedir: str = None
        self.srcdir: str = None
        self.config: Config = None
        self.config_status: int = None
        self.config_status_extra: str = None
        self.events: EventManager = None
        self.project: Project = None
        self.version: Dict[str, str] = None

        # the method of doctree versioning; see set_versioning_method
        self.versioning_condition: Union[bool, Callable] = None
        self.versioning_compare: bool = None

        # all the registered domains, set by the application
        self.domains = {}

        # the docutils settings for building
        self.settings = default_settings.copy()
        self.settings['env'] = self

        # All "docnames" here are /-separated and relative and exclude
        # the source suffix.

        # docname -> mtime at the time of reading
        # contains all read docnames
        self.all_docs: Dict[str, float] = {}
        # docname -> set of dependent file
        # names, relative to documentation root
        self.dependencies: Dict[str, Set[str]] = defaultdict(set)
        # docname -> set of included file
        # docnames included from other documents
        self.included: Dict[str, Set[str]] = defaultdict(set)
        # docnames to re-read unconditionally on next build
        self.reread_always: Set[str] = set()

        # File metadata
        # docname -> dict of metadata items
        self.metadata: Dict[str, Dict[str, Any]] = defaultdict(dict)

        # TOC inventory
        # docname -> title node
        self.titles: Dict[str, nodes.title] = {}
        # docname -> title node; only different if
        # set differently with title directive
        self.longtitles: Dict[str, nodes.title] = {}
        # docname -> table of contents nodetree
        self.tocs: Dict[str, nodes.bullet_list] = {}
        # docname -> number of real entries
        self.toc_num_entries: Dict[str, int] = {}

        # used to determine when to show the TOC
        # in a sidebar (don't show if it's only one item)
        # docname -> dict of sectionid -> number
        self.toc_secnumbers: Dict[str, Dict[str, Tuple[int, ...]]] = {}
        # docname -> dict of figtype -> dict of figureid -> number
        self.toc_fignumbers: Dict[str, Dict[str, Dict[str, Tuple[int, ...]]]] = {}

        # docname -> list of toctree includefiles
        self.toctree_includes: Dict[str, List[str]] = {}
        # docname -> set of files (containing its TOCs) to rebuild too
        self.files_to_rebuild: Dict[str, Set[str]] = {}
        # docnames that have :glob: toctrees
        self.glob_toctrees: Set[str] = set()
        # docnames that have :numbered: toctrees
        self.numbered_toctrees: Set[str] = set()

        # domain-specific inventories, here to be pickled
        # domainname -> domain-specific dict
        self.domaindata: Dict[str, Dict] = {}

        # these map absolute path -> (docnames, unique filename)
        self.images: FilenameUniqDict = FilenameUniqDict()
        # filename -> (set of docnames, destination)
        self.dlfiles: DownloadFiles = DownloadFiles()

        # the original URI for images
        self.original_image_uri: Dict[str, str] = {}

        # temporary data storage while reading a document
        self.temp_data: Dict[str, Any] = {}
        # context for cross-references (e.g. current module or class)
        # this is similar to temp_data, but will for example be copied to
        # attributes of "any" cross references
        self.ref_context: Dict[str, Any] = {}

        # set up environment
        if app:
            self.setup(app)
        else:
            warnings.warn("The 'app' argument for BuildEnvironment() becomes required now.",
                          RemovedInSphinx60Warning, stacklevel=2)

    def __getstate__(self) -> Dict:
        """Obtains serializable data for pickling."""
        __dict__ = self.__dict__.copy()
        __dict__.update(app=None, domains={}, events=None)  # clear unpickable attributes
        return __dict__

    def __setstate__(self, state: Dict) -> None:
        self.__dict__.update(state)

    def setup(self, app: "Sphinx") -> None:
        """Set up BuildEnvironment object."""
        if self.version and self.version != app.registry.get_envversion(app):
            raise BuildEnvironmentError(__('build environment version not current'))
        elif self.srcdir and self.srcdir != app.srcdir:
            raise BuildEnvironmentError(__('source directory has changed'))

        if self.project:
            app.project.restore(self.project)

        self.app = app
        self.doctreedir = app.doctreedir
        self.events = app.events
        self.srcdir = app.srcdir
        self.project = app.project
        self.version = app.registry.get_envversion(app)

        # initialize domains
        self.domains = {}
        for domain in app.registry.create_domains(self):
            self.domains[domain.name] = domain

        # setup domains (must do after all initialization)
        for domain in self.domains.values():
            domain.setup()

        # initialize config
        self._update_config(app.config)

        # initialie settings
        self._update_settings(app.config)

    def _update_config(self, config: Config) -> None:
        """Update configurations by new one."""
        self.config_status = CONFIG_OK
        self.config_status_extra = ''
        if self.config is None:
            self.config_status = CONFIG_NEW
        elif self.config.extensions != config.extensions:
            self.config_status = CONFIG_EXTENSIONS_CHANGED
            extensions = sorted(
                set(self.config.extensions) ^ set(config.extensions))
            if len(extensions) == 1:
                extension = extensions[0]
            else:
                extension = '%d' % (len(extensions),)
            self.config_status_extra = ' (%r)' % (extension,)
        else:
            # check if a config value was changed that affects how
            # doctrees are read
            for item in config.filter('env'):
                if self.config[item.name] != item.value:
                    self.config_status = CONFIG_CHANGED
                    self.config_status_extra = ' (%r)' % (item.name,)
                    break

        self.config = config

    def _update_settings(self, config: Config) -> None:
        """Update settings by new config."""
        self.settings['input_encoding'] = config.source_encoding
        self.settings['trim_footnote_reference_space'] = config.trim_footnote_reference_space
        self.settings['language_code'] = config.language or 'en'

        # Allow to disable by 3rd party extension (workaround)
        self.settings.setdefault('smart_quotes', True)

    def set_versioning_method(self, method: Union[str, Callable], compare: bool) -> None:
        """This sets the doctree versioning method for this environment.

        Versioning methods are a builder property; only builders with the same
        versioning method can share the same doctree directory.  Therefore, we
        raise an exception if the user tries to use an environment with an
        incompatible versioning method.
        """
        condition: Union[bool, Callable]
        if callable(method):
            condition = method
        else:
            if method not in versioning_conditions:
                raise ValueError('invalid versioning method: %r' % method)
            condition = versioning_conditions[method]

        if self.versioning_condition not in (None, condition):
            raise SphinxError(__('This environment is incompatible with the '
                                 'selected builder, please choose another '
                                 'doctree directory.'))
        self.versioning_condition = condition
        self.versioning_compare = compare

    def clear_doc(self, docname: str) -> None:
        """Remove all traces of a source file in the inventory."""
        if docname in self.all_docs:
            self.all_docs.pop(docname, None)
            self.included.pop(docname, None)
            self.reread_always.discard(docname)

        for domain in self.domains.values():
            domain.clear_doc(docname)

    def merge_info_from(self, docnames: List[str], other: "BuildEnvironment",
                        app: "Sphinx") -> None:
        """Merge global information gathered about *docnames* while reading them
        from the *other* environment.

        This possibly comes from a parallel build process.
        """
        docnames = set(docnames)  # type: ignore
        for docname in docnames:
            self.all_docs[docname] = other.all_docs[docname]
            self.included[docname] = other.included[docname]
            if docname in other.reread_always:
                self.reread_always.add(docname)

        for domainname, domain in self.domains.items():
            domain.merge_domaindata(docnames, other.domaindata[domainname])
        self.events.emit('env-merge-info', self, docnames, other)

    def path2doc(self, filename: str) -> Optional[str]:
        """Return the docname for the filename if the file is document.

        *filename* should be absolute or relative to the source directory.
        """
        return self.project.path2doc(filename)

[docs]    def doc2path(self, docname: str, base: bool = True) -> str:
        """Return the filename for the document name.

        If *base* is True, return absolute path under self.srcdir.
        If *base* is False, return relative path to self.srcdir.
        """
        return self.project.doc2path(docname, base)

[docs]    def relfn2path(self, filename: str, docname: str = None) -> Tuple[str, str]:
        """Return paths to a file referenced from a document, relative to
        documentation root and absolute.

        In the input "filename", absolute filenames are taken as relative to the
        source dir, while relative filenames are relative to the dir of the
        containing document.
        """
        filename = os_path(filename)
        if filename.startswith('/') or filename.startswith(os.sep):
            rel_fn = filename[1:]
        else:
            docdir = path.dirname(self.doc2path(docname or self.docname,
                                                base=None))
            rel_fn = path.join(docdir, filename)

        return (canon_path(path.normpath(rel_fn)),
                path.normpath(path.join(self.srcdir, rel_fn)))

    @property
    def found_docs(self) -> Set[str]:
        """contains all existing docnames."""
        return self.project.docnames

    def find_files(self, config: Config, builder: "Builder") -> None:
        """Find all source files in the source dir and put them in
        self.found_docs.
        """
        try:
            exclude_paths = (self.config.exclude_patterns +
                             self.config.templates_path +
                             builder.get_asset_paths())
            self.project.discover(exclude_paths)

            # Current implementation is applying translated messages in the reading
            # phase.Therefore, in order to apply the updated message catalog, it is
            # necessary to re-process from the reading phase. Here, if dependency
            # is set for the doc source and the mo file, it is processed again from
            # the reading phase when mo is updated. In the future, we would like to
            # move i18n process into the writing phase, and remove these lines.
            if builder.use_message_catalog:
                # add catalog mo file dependency
                repo = CatalogRepository(self.srcdir, self.config.locale_dirs,
                                         self.config.language, self.config.source_encoding)
                mo_paths = {c.domain: c.mo_path for c in repo.catalogs}
                for docname in self.found_docs:
                    domain = docname_to_domain(docname, self.config.gettext_compact)
                    if domain in mo_paths:
                        self.dependencies[docname].add(mo_paths[domain])
        except OSError as exc:
            raise DocumentError(__('Failed to scan documents in %s: %r') %
                                (self.srcdir, exc)) from exc

    def get_outdated_files(self, config_changed: bool) -> Tuple[Set[str], Set[str], Set[str]]:
        """Return (added, changed, removed) sets."""
        # clear all files no longer present
        removed = set(self.all_docs) - self.found_docs

        added: Set[str] = set()
        changed: Set[str] = set()

        if config_changed:
            # config values affect e.g. substitutions
            added = self.found_docs
        else:
            for docname in self.found_docs:
                if docname not in self.all_docs:
                    logger.debug('[build target] added %r', docname)
                    added.add(docname)
                    continue
                # if the doctree file is not there, rebuild
                filename = path.join(self.doctreedir, docname + '.doctree')
                if not path.isfile(filename):
                    logger.debug('[build target] changed %r', docname)
                    changed.add(docname)
                    continue
                # check the "reread always" list
                if docname in self.reread_always:
                    logger.debug('[build target] changed %r', docname)
                    changed.add(docname)
                    continue
                # check the mtime of the document
                mtime = self.all_docs[docname]
                newmtime = path.getmtime(self.doc2path(docname))
                if newmtime > mtime:
                    logger.debug('[build target] outdated %r: %s -> %s',
                                 docname,
                                 datetime.utcfromtimestamp(mtime),
                                 datetime.utcfromtimestamp(newmtime))
                    changed.add(docname)
                    continue
                # finally, check the mtime of dependencies
                for dep in self.dependencies[docname]:
                    try:
                        # this will do the right thing when dep is absolute too
                        deppath = path.join(self.srcdir, dep)
                        if not path.isfile(deppath):
                            changed.add(docname)
                            break
                        depmtime = path.getmtime(deppath)
                        if depmtime > mtime:
                            changed.add(docname)
                            break
                    except OSError:
                        # give it another chance
                        changed.add(docname)
                        break

        return added, changed, removed

    def check_dependents(self, app: "Sphinx", already: Set[str]) -> Generator[str, None, None]:
        to_rewrite: List[str] = []
        for docnames in self.events.emit('env-get-updated', self):
            to_rewrite.extend(docnames)
        for docname in set(to_rewrite):
            if docname not in already:
                yield docname

    # --------- SINGLE FILE READING --------------------------------------------

    def prepare_settings(self, docname: str) -> None:
        """Prepare to set up environment for reading."""
        self.temp_data['docname'] = docname
        # defaults to the global default, but can be re-set in a document
        self.temp_data['default_role'] = self.config.default_role
        self.temp_data['default_domain'] = \
            self.domains.get(self.config.primary_domain)

    # utilities to use while reading a document

    @property
    def docname(self) -> str:
        """Returns the docname of the document currently being parsed."""
        return self.temp_data['docname']

[docs]    def new_serialno(self, category: str = '') -> int:
        """Return a serial number, e.g. for index entry targets.

        The number is guaranteed to be unique in the current document.
        """
        key = category + 'serialno'
        cur = self.temp_data.get(key, 0)
        self.temp_data[key] = cur + 1
        return cur

[docs]    def note_dependency(self, filename: str) -> None:
        """Add *filename* as a dependency of the current document.

        This means that the document will be rebuilt if this file changes.

        *filename* should be absolute or relative to the source directory.
        """
        self.dependencies[self.docname].add(filename)

    def note_included(self, filename: str) -> None:
        """Add *filename* as a included from other document.

        This means the document is not orphaned.

        *filename* should be absolute or relative to the source directory.
        """
        self.included[self.docname].add(self.path2doc(filename))

[docs]    def note_reread(self) -> None:
        """Add the current document to the list of documents that will
        automatically be re-read at the next build.
        """
        self.reread_always.add(self.docname)

    def get_domain(self, domainname: str) -> Domain:
        """Return the domain instance with the specified name.

        Raises an ExtensionError if the domain is not registered.
        """
        try:
            return self.domains[domainname]
        except KeyError as exc:
            raise ExtensionError(__('Domain %r is not registered') % domainname) from exc

    # --------- RESOLVING REFERENCES AND TOCTREES ------------------------------

    def get_doctree(self, docname: str) -> nodes.document:
        """Read the doctree for a file from the pickle and return it."""
        filename = path.join(self.doctreedir, docname + '.doctree')
        with open(filename, 'rb') as f:
            doctree = pickle.load(f)
        doctree.settings.env = self
        doctree.reporter = LoggingReporter(self.doc2path(docname))
        return doctree

    def get_and_resolve_doctree(self, docname: str, builder: "Builder",
                                doctree: nodes.document = None, prune_toctrees: bool = True,
                                includehidden: bool = False) -> nodes.document:
        """Read the doctree from the pickle, resolve cross-references and
        toctrees and return it.
        """
        if doctree is None:
            doctree = self.get_doctree(docname)

        # resolve all pending cross-references
        self.apply_post_transforms(doctree, docname)

        # now, resolve all toctree nodes
        for toctreenode in doctree.traverse(addnodes.toctree):
            result = TocTree(self).resolve(docname, builder, toctreenode,
                                           prune=prune_toctrees,
                                           includehidden=includehidden)
            if result is None:
                toctreenode.replace_self([])
            else:
                toctreenode.replace_self(result)

        return doctree

    def resolve_toctree(self, docname: str, builder: "Builder", toctree: addnodes.toctree,
                        prune: bool = True, maxdepth: int = 0, titles_only: bool = False,
                        collapse: bool = False, includehidden: bool = False) -> Node:
        """Resolve a *toctree* node into individual bullet lists with titles
        as items, returning None (if no containing titles are found) or
        a new node.

        If *prune* is True, the tree is pruned to *maxdepth*, or if that is 0,
        to the value of the *maxdepth* option on the *toctree* node.
        If *titles_only* is True, only toplevel document titles will be in the
        resulting tree.
        If *collapse* is True, all branches not containing docname will
        be collapsed.
        """
        return TocTree(self).resolve(docname, builder, toctree, prune,
                                     maxdepth, titles_only, collapse,
                                     includehidden)

    def resolve_references(self, doctree: nodes.document, fromdocname: str,
                           builder: "Builder") -> None:
        self.apply_post_transforms(doctree, fromdocname)

    def apply_post_transforms(self, doctree: nodes.document, docname: str) -> None:
        """Apply all post-transforms."""
        try:
            # set env.docname during applying post-transforms
            backup = copy(self.temp_data)
            self.temp_data['docname'] = docname

            transformer = SphinxTransformer(doctree)
            transformer.set_environment(self)
            transformer.add_transforms(self.app.registry.get_post_transforms())
            transformer.apply_transforms()
        finally:
            self.temp_data = backup

        # allow custom references to be resolved
        self.events.emit('doctree-resolved', doctree, docname)

    def collect_relations(self) -> Dict[str, List[Optional[str]]]:
        traversed = set()

        def traverse_toctree(parent: str, docname: str) -> Iterator[Tuple[str, str]]:
            if parent == docname:
                logger.warning(__('self referenced toctree found. Ignored.'),
                               location=docname, type='toc',
                               subtype='circular')
                return

            # traverse toctree by pre-order
            yield parent, docname
            traversed.add(docname)

            for child in (self.toctree_includes.get(docname) or []):
                for subparent, subdocname in traverse_toctree(docname, child):
                    if subdocname not in traversed:
                        yield subparent, subdocname
                        traversed.add(subdocname)

        relations = {}
        docnames = traverse_toctree(None, self.config.root_doc)
        prevdoc = None
        parent, docname = next(docnames)
        for nextparent, nextdoc in docnames:
            relations[docname] = [parent, prevdoc, nextdoc]
            prevdoc = docname
            docname = nextdoc
            parent = nextparent

        relations[docname] = [parent, prevdoc, None]

        return relations

    def check_consistency(self) -> None:
        """Do consistency checks."""
        included = set().union(*self.included.values())  # type: ignore
        for docname in sorted(self.all_docs):
            if docname not in self.files_to_rebuild:
                if docname == self.config.root_doc:
                    # the master file is not included anywhere ;)
                    continue
                if docname in included:
                    # the document is included from other documents
                    continue
                if 'orphan' in self.metadata[docname]:
                    continue
                logger.warning(__('document isn\'t included in any toctree'),
                               location=docname)

        # call check-consistency for all extensions
        for domain in self.domains.values():
            domain.check_consistency()
        self.events.emit('env-check-consistency', self)