Source code for docutils.transforms.universal

# $Id: universal.py 8144 2017-07-26 21:25:08Z milde $
# -*- coding: utf-8 -*-
# Authors: David Goodger <goodger@python.org>; Ueli Schlaepfer; Günter Milde
# Maintainer: docutils-develop@lists.sourceforge.net
# Copyright: This module has been placed in the public domain.

"""
Transforms needed by most or all documents:

- `Decorations`: Generate a document's header & footer.
- `Messages`: Placement of system messages stored in
  `nodes.document.transform_messages`.
- `TestMessages`: Like `Messages`, used on test runs.
- `FinalReferences`: Resolve remaining references.
"""

__docformat__ = 'reStructuredText'

import re
import sys
import time
from docutils import nodes, utils
from docutils.transforms import TransformError, Transform
from docutils.utils import smartquotes

[docs]class Decorations(Transform):

    """
    Populate a document's decoration element (header, footer).
    """

    default_priority = 820

[docs]    def apply(self):
        header_nodes = self.generate_header()
        if header_nodes:
            decoration = self.document.get_decoration()
            header = decoration.get_header()
            header.extend(header_nodes)
        footer_nodes = self.generate_footer()
        if footer_nodes:
            decoration = self.document.get_decoration()
            footer = decoration.get_footer()
            footer.extend(footer_nodes)

[docs]    def generate_header(self):
        return None

[docs]    def generate_footer(self):
        # @@@ Text is hard-coded for now.
        # Should be made dynamic (language-dependent).
        # @@@ Use timestamp from the `SOURCE_DATE_EPOCH`_ environment variable
        # for the datestamp?
        # See https://sourceforge.net/p/docutils/patches/132/
        # and https://reproducible-builds.org/specs/source-date-epoch/
        settings = self.document.settings
        if settings.generator or settings.datestamp or settings.source_link \
               or settings.source_url:
            text = []
            if settings.source_link and settings._source \
                   or settings.source_url:
                if settings.source_url:
                    source = settings.source_url
                else:
                    source = utils.relative_path(settings._destination,
                                                 settings._source)
                text.extend([
                    nodes.reference('', 'View document source',
                                    refuri=source),
                    nodes.Text('.\n')])
            if settings.datestamp:
                datestamp = time.strftime(settings.datestamp, time.gmtime())
                text.append(nodes.Text('Generated on: ' + datestamp + '.\n'))
            if settings.generator:
                text.extend([
                    nodes.Text('Generated by '),
                    nodes.reference('', 'Docutils', refuri=
                                    'http://docutils.sourceforge.net/'),
                    nodes.Text(' from '),
                    nodes.reference('', 'reStructuredText', refuri='http://'
                                    'docutils.sourceforge.net/rst.html'),
                    nodes.Text(' source.\n')])
            return [nodes.paragraph('', '', *text)]
        else:
            return None


[docs]class ExposeInternals(Transform):

    """
    Expose internal attributes if ``expose_internals`` setting is set.
    """

    default_priority = 840

[docs]    def not_Text(self, node):
        return not isinstance(node, nodes.Text)

[docs]    def apply(self):
        if self.document.settings.expose_internals:
            for node in self.document.traverse(self.not_Text):
                for att in self.document.settings.expose_internals:
                    value = getattr(node, att, None)
                    if value is not None:
                        node['internal:' + att] = value


[docs]class Messages(Transform):

    """
    Place any system messages generated after parsing into a dedicated section
    of the document.
    """

    default_priority = 860

[docs]    def apply(self):
        unfiltered = self.document.transform_messages
        threshold = self.document.reporter.report_level
        messages = []
        for msg in unfiltered:
            if msg['level'] >= threshold and not msg.parent:
                messages.append(msg)
        if messages:
            section = nodes.section(classes=['system-messages'])
            # @@@ get this from the language module?
            section += nodes.title('', 'Docutils System Messages')
            section += messages
            self.document.transform_messages[:] = []
            self.document += section


[docs]class FilterMessages(Transform):

    """
    Remove system messages below verbosity threshold.
    """

    default_priority = 870

[docs]    def apply(self):
        for node in self.document.traverse(nodes.system_message):
            if node['level'] < self.document.reporter.report_level:
                node.parent.remove(node)


[docs]class TestMessages(Transform):

    """
    Append all post-parse system messages to the end of the document.

    Used for testing purposes.
    """

    default_priority = 880

[docs]    def apply(self):
        for msg in self.document.transform_messages:
            if not msg.parent:
                self.document += msg


[docs]class StripComments(Transform):

    """
    Remove comment elements from the document tree (only if the
    ``strip_comments`` setting is enabled).
    """

    default_priority = 740

[docs]    def apply(self):
        if self.document.settings.strip_comments:
            for node in self.document.traverse(nodes.comment):
                node.parent.remove(node)


[docs]class StripClassesAndElements(Transform):

    """
    Remove from the document tree all elements with classes in
    `self.document.settings.strip_elements_with_classes` and all "classes"
    attribute values in `self.document.settings.strip_classes`.
    """

    default_priority = 420

[docs]    def apply(self):
        if not (self.document.settings.strip_elements_with_classes
                or self.document.settings.strip_classes):
            return
        # prepare dicts for lookup (not sets, for Python 2.2 compatibility):
        self.strip_elements = dict(
            [(key, None)
             for key in (self.document.settings.strip_elements_with_classes
                         or [])])
        self.strip_classes = dict(
            [(key, None) for key in (self.document.settings.strip_classes
                                     or [])])
        for node in self.document.traverse(self.check_classes):
            node.parent.remove(node)

[docs]    def check_classes(self, node):
        if isinstance(node, nodes.Element):
            for class_value in node['classes'][:]:
                if class_value in self.strip_classes:
                    node['classes'].remove(class_value)
                if class_value in self.strip_elements:
                    return 1


[docs]class SmartQuotes(Transform):

    """
    Replace ASCII quotation marks with typographic form.

    Also replace multiple dashes with em-dash/en-dash characters.
    """

    default_priority = 850

    nodes_to_skip = (nodes.FixedTextElement, nodes.Special)
    """Do not apply "smartquotes" to instances of these block-level nodes."""

    literal_nodes = (nodes.image, nodes.literal, nodes.math,
                     nodes.raw, nodes.problematic)
    """Do not change quotes in instances of these inline nodes."""

    smartquotes_action = 'qDe'
    """Setting to select smartquote transformations.

    The default 'qDe' educates normal quote characters: (", '),
    em- and en-dashes (---, --) and ellipses (...).
    """

    def __init__(self, document, startnode):
        Transform.__init__(self, document, startnode=startnode)
        self.unsupported_languages = set()

[docs]    def get_tokens(self, txtnodes):
        # A generator that yields ``(texttype, nodetext)`` tuples for a list
        # of "Text" nodes (interface to ``smartquotes.educate_tokens()``).

        texttype = {True: 'literal', # "literal" text is not changed:
                    False: 'plain'}
        for txtnode in txtnodes:
            nodetype = texttype[isinstance(txtnode.parent,
                                           self.literal_nodes)]
            yield (nodetype, txtnode.astext())


[docs]    def apply(self):
        smart_quotes = self.document.settings.smart_quotes
        if not smart_quotes:
            return
        try:
            alternative = smart_quotes.startswith('alt')
        except AttributeError:
            alternative = False
        # print repr(alternative)

        document_language = self.document.settings.language_code
        lc_smartquotes = self.document.settings.smartquotes_locales
        if lc_smartquotes:
            smartquotes.smartchars.quotes.update(dict(lc_smartquotes))

        # "Educate" quotes in normal text. Handle each block of text
        # (TextElement node) as a unit to keep context around inline nodes:
        for node in self.document.traverse(nodes.TextElement):
            # skip preformatted text blocks and special elements:
            if isinstance(node, self.nodes_to_skip):
                continue
            # nested TextElements are not "block-level" elements:
            if isinstance(node.parent, nodes.TextElement):
                continue

            # list of text nodes in the "text block":
            txtnodes = [txtnode for txtnode in node.traverse(nodes.Text)
                        if not isinstance(txtnode.parent,
                                          nodes.option_string)]

            # language: use typographical quotes for language "lang"
            lang = node.get_language_code(document_language)
            # use alternative form if `smart-quotes` setting starts with "alt":
            if alternative:
                if '-x-altquot' in lang:
                    lang = lang.replace('-x-altquot', '')
                else:
                    lang += '-x-altquot'
            # drop unsupported subtags:
            for tag in utils.normalize_language_tag(lang):
                if tag in smartquotes.smartchars.quotes:
                    lang = tag
                    break
            else: # language not supported: (keep ASCII quotes)
                if lang not in self.unsupported_languages:
                    self.document.reporter.warning('No smart quotes '
                        'defined for language "%s".'%lang, base_node=node)
                self.unsupported_languages.add(lang)
                lang = ''

            # Iterator educating quotes in plain text:
            # (see "utils/smartquotes.py" for the attribute setting)
            teacher = smartquotes.educate_tokens(self.get_tokens(txtnodes),
                                attr=self.smartquotes_action, language=lang)

            for txtnode, newtext in zip(txtnodes, teacher):
                txtnode.parent.replace(txtnode, nodes.Text(newtext, 
                                       rawsource=txtnode.rawsource))

        self.unsupported_languages = set() # reset