Source code for docutils.transforms.universal

# $Id: universal.py 8144 2017-07-26 21:25:08Z milde $
# -*- coding: utf-8 -*-
# Authors: David Goodger <goodger@python.org>; Ueli Schlaepfer; Günter Milde
# Maintainer: docutils-develop@lists.sourceforge.net
# Copyright: This module has been placed in the public domain.

"""
Transforms needed by most or all documents:

- `Decorations`: Generate a document's header & footer.
- `Messages`: Placement of system messages stored in
  `nodes.document.transform_messages`.
- `TestMessages`: Like `Messages`, used on test runs.
- `FinalReferences`: Resolve remaining references.
"""

__docformat__ = 'reStructuredText'

import re
import sys
import time
from docutils import nodes, utils
from docutils.transforms import TransformError, Transform
from docutils.utils import smartquotes

[docs]class Decorations(Transform): """ Populate a document's decoration element (header, footer). """ default_priority = 820
[docs] def apply(self): header_nodes = self.generate_header() if header_nodes: decoration = self.document.get_decoration() header = decoration.get_header() header.extend(header_nodes) footer_nodes = self.generate_footer() if footer_nodes: decoration = self.document.get_decoration() footer = decoration.get_footer() footer.extend(footer_nodes)
[docs] def generate_header(self): return None
[docs]class ExposeInternals(Transform): """ Expose internal attributes if ``expose_internals`` setting is set. """ default_priority = 840
[docs] def not_Text(self, node): return not isinstance(node, nodes.Text)
[docs] def apply(self): if self.document.settings.expose_internals: for node in self.document.traverse(self.not_Text): for att in self.document.settings.expose_internals: value = getattr(node, att, None) if value is not None: node['internal:' + att] = value
[docs]class Messages(Transform): """ Place any system messages generated after parsing into a dedicated section of the document. """ default_priority = 860
[docs] def apply(self): unfiltered = self.document.transform_messages threshold = self.document.reporter.report_level messages = [] for msg in unfiltered: if msg['level'] >= threshold and not msg.parent: messages.append(msg) if messages: section = nodes.section(classes=['system-messages']) # @@@ get this from the language module? section += nodes.title('', 'Docutils System Messages') section += messages self.document.transform_messages[:] = [] self.document += section
[docs]class FilterMessages(Transform): """ Remove system messages below verbosity threshold. """ default_priority = 870
[docs] def apply(self): for node in self.document.traverse(nodes.system_message): if node['level'] < self.document.reporter.report_level: node.parent.remove(node)
[docs]class TestMessages(Transform): """ Append all post-parse system messages to the end of the document. Used for testing purposes. """ default_priority = 880
[docs] def apply(self): for msg in self.document.transform_messages: if not msg.parent: self.document += msg
[docs]class StripComments(Transform): """ Remove comment elements from the document tree (only if the ``strip_comments`` setting is enabled). """ default_priority = 740
[docs] def apply(self): if self.document.settings.strip_comments: for node in self.document.traverse(nodes.comment): node.parent.remove(node)
[docs]class StripClassesAndElements(Transform): """ Remove from the document tree all elements with classes in `self.document.settings.strip_elements_with_classes` and all "classes" attribute values in `self.document.settings.strip_classes`. """ default_priority = 420
[docs] def apply(self): if not (self.document.settings.strip_elements_with_classes or self.document.settings.strip_classes): return # prepare dicts for lookup (not sets, for Python 2.2 compatibility): self.strip_elements = dict( [(key, None) for key in (self.document.settings.strip_elements_with_classes or [])]) self.strip_classes = dict( [(key, None) for key in (self.document.settings.strip_classes or [])]) for node in self.document.traverse(self.check_classes): node.parent.remove(node)
[docs] def check_classes(self, node): if isinstance(node, nodes.Element): for class_value in node['classes'][:]: if class_value in self.strip_classes: node['classes'].remove(class_value) if class_value in self.strip_elements: return 1
[docs]class SmartQuotes(Transform): """ Replace ASCII quotation marks with typographic form. Also replace multiple dashes with em-dash/en-dash characters. """ default_priority = 850 nodes_to_skip = (nodes.FixedTextElement, nodes.Special) """Do not apply "smartquotes" to instances of these block-level nodes.""" literal_nodes = (nodes.image, nodes.literal, nodes.math, nodes.raw, nodes.problematic) """Do not change quotes in instances of these inline nodes.""" smartquotes_action = 'qDe' """Setting to select smartquote transformations. The default 'qDe' educates normal quote characters: (", '), em- and en-dashes (---, --) and ellipses (...). """ def __init__(self, document, startnode): Transform.__init__(self, document, startnode=startnode) self.unsupported_languages = set()
[docs] def get_tokens(self, txtnodes): # A generator that yields ``(texttype, nodetext)`` tuples for a list # of "Text" nodes (interface to ``smartquotes.educate_tokens()``). texttype = {True: 'literal', # "literal" text is not changed: False: 'plain'} for txtnode in txtnodes: nodetype = texttype[isinstance(txtnode.parent, self.literal_nodes)] yield (nodetype, txtnode.astext())
[docs] def apply(self): smart_quotes = self.document.settings.smart_quotes if not smart_quotes: return try: alternative = smart_quotes.startswith('alt') except AttributeError: alternative = False # print repr(alternative) document_language = self.document.settings.language_code lc_smartquotes = self.document.settings.smartquotes_locales if lc_smartquotes: smartquotes.smartchars.quotes.update(dict(lc_smartquotes)) # "Educate" quotes in normal text. Handle each block of text # (TextElement node) as a unit to keep context around inline nodes: for node in self.document.traverse(nodes.TextElement): # skip preformatted text blocks and special elements: if isinstance(node, self.nodes_to_skip): continue # nested TextElements are not "block-level" elements: if isinstance(node.parent, nodes.TextElement): continue # list of text nodes in the "text block": txtnodes = [txtnode for txtnode in node.traverse(nodes.Text) if not isinstance(txtnode.parent, nodes.option_string)] # language: use typographical quotes for language "lang" lang = node.get_language_code(document_language) # use alternative form if `smart-quotes` setting starts with "alt": if alternative: if '-x-altquot' in lang: lang = lang.replace('-x-altquot', '') else: lang += '-x-altquot' # drop unsupported subtags: for tag in utils.normalize_language_tag(lang): if tag in smartquotes.smartchars.quotes: lang = tag break else: # language not supported: (keep ASCII quotes) if lang not in self.unsupported_languages: self.document.reporter.warning('No smart quotes ' 'defined for language "%s".'%lang, base_node=node) self.unsupported_languages.add(lang) lang = '' # Iterator educating quotes in plain text: # (see "utils/smartquotes.py" for the attribute setting) teacher = smartquotes.educate_tokens(self.get_tokens(txtnodes), attr=self.smartquotes_action, language=lang) for txtnode, newtext in zip(txtnodes, teacher): txtnode.parent.replace(txtnode, nodes.Text(newtext, rawsource=txtnode.rawsource)) self.unsupported_languages = set() # reset