Source code for docutils.utils.math.latex2mathml

#!/usr/bin/env python
# -*- coding: utf-8 -*-

# :Id: $Id: latex2mathml.py 7995 2016-12-10 17:50:59Z milde $
# :Copyright: © 2010 Günter Milde.
#             Based on rst2mathml.py from the latex_math sandbox project
#             © 2005 Jens Jørgen Mortensen
# :License: Released under the terms of the `2-Clause BSD license`_, in short:
# 
#    Copying and distribution of this file, with or without modification,
#    are permitted in any medium without royalty provided the copyright
#    notice and this notice are preserved.
#    This file is offered as-is, without any warranty.
# 
# .. _2-Clause BSD license: http://www.spdx.org/licenses/BSD-2-Clause


"""Convert LaTex math code into presentational MathML"""

# Based on the `latex_math` sandbox project by Jens Jørgen Mortensen

import docutils.utils.math.tex2unichar as tex2unichar

#        TeX      spacing    combining
over = {'acute':    '\u00B4', # u'\u0301',
        'bar':      '\u00AF', # u'\u0304',
        'breve':    '\u02D8', # u'\u0306',
        'check':    '\u02C7', # u'\u030C',
        'dot':      '\u02D9', # u'\u0307',
        'ddot':     '\u00A8', # u'\u0308',
        'dddot':               '\u20DB',
        'grave':    '`',      # u'\u0300',
        'hat':      '^',      # u'\u0302',
        'mathring': '\u02DA', # u'\u030A',
        'overleftrightarrow':  '\u20e1',
        # 'overline':        # u'\u0305',
        'tilde':    '\u02DC', # u'\u0303',
        'vec':               '\u20D7'}

Greek = { # Capital Greek letters: (upright in TeX style)
    'Phi':'\u03a6', 'Xi':'\u039e', 'Sigma':'\u03a3',
    'Psi':'\u03a8', 'Delta':'\u0394', 'Theta':'\u0398',
    'Upsilon':'\u03d2', 'Pi':'\u03a0', 'Omega':'\u03a9',
    'Gamma':'\u0393', 'Lambda':'\u039b'}

letters = tex2unichar.mathalpha

special = tex2unichar.mathbin         # Binary symbols
special.update(tex2unichar.mathrel)   # Relation symbols, arrow symbols
special.update(tex2unichar.mathord)   # Miscellaneous symbols
special.update(tex2unichar.mathop)    # Variable-sized symbols
special.update(tex2unichar.mathopen)  # Braces
special.update(tex2unichar.mathclose) # Braces
special.update(tex2unichar.mathfence)

sumintprod = ''.join([special[symbol] for symbol in
                      ['sum', 'int', 'oint', 'prod']])

functions = ['arccos', 'arcsin', 'arctan', 'arg', 'cos',  'cosh',
             'cot',    'coth',   'csc',    'deg', 'det',  'dim',
             'exp',    'gcd',    'hom',    'inf', 'ker',  'lg',
             'lim',    'liminf', 'limsup', 'ln',  'log',  'max',
             'min',    'Pr',     'sec',    'sin', 'sinh', 'sup',
             'tan',    'tanh',
             'injlim',  'varinjlim', 'varlimsup',
             'projlim', 'varliminf', 'varprojlim']


mathbb = {
          'A': '\U0001D538',
          'B': '\U0001D539',
          'C': '\u2102',
          'D': '\U0001D53B',
          'E': '\U0001D53C',
          'F': '\U0001D53D',
          'G': '\U0001D53E',
          'H': '\u210D',
          'I': '\U0001D540',
          'J': '\U0001D541',
          'K': '\U0001D542',
          'L': '\U0001D543',
          'M': '\U0001D544',
          'N': '\u2115',
          'O': '\U0001D546',
          'P': '\u2119',
          'Q': '\u211A',
          'R': '\u211D',
          'S': '\U0001D54A',
          'T': '\U0001D54B',
          'U': '\U0001D54C',
          'V': '\U0001D54D',
          'W': '\U0001D54E',
          'X': '\U0001D54F',
          'Y': '\U0001D550',
          'Z': '\u2124',
         }

mathscr = {
           'A': '\U0001D49C',
           'B': '\u212C',     # bernoulli function
           'C': '\U0001D49E',
           'D': '\U0001D49F',
           'E': '\u2130',
           'F': '\u2131',
           'G': '\U0001D4A2',
           'H': '\u210B',     # hamiltonian
           'I': '\u2110',
           'J': '\U0001D4A5',
           'K': '\U0001D4A6',
           'L': '\u2112',     # lagrangian
           'M': '\u2133',     # physics m-matrix
           'N': '\U0001D4A9',
           'O': '\U0001D4AA',
           'P': '\U0001D4AB',
           'Q': '\U0001D4AC',
           'R': '\u211B',
           'S': '\U0001D4AE',
           'T': '\U0001D4AF',
           'U': '\U0001D4B0',
           'V': '\U0001D4B1',
           'W': '\U0001D4B2',
           'X': '\U0001D4B3',
           'Y': '\U0001D4B4',
           'Z': '\U0001D4B5',
           'a': '\U0001D4B6',
           'b': '\U0001D4B7',
           'c': '\U0001D4B8',
           'd': '\U0001D4B9',
           'e': '\u212F',
           'f': '\U0001D4BB',
           'g': '\u210A',
           'h': '\U0001D4BD',
           'i': '\U0001D4BE',
           'j': '\U0001D4BF',
           'k': '\U0001D4C0',
           'l': '\U0001D4C1',
           'm': '\U0001D4C2',
           'n': '\U0001D4C3',
           'o': '\u2134',     # order of
           'p': '\U0001D4C5',
           'q': '\U0001D4C6',
           'r': '\U0001D4C7',
           's': '\U0001D4C8',
           't': '\U0001D4C9',
           'u': '\U0001D4CA',
           'v': '\U0001D4CB',
           'w': '\U0001D4CC',
           'x': '\U0001D4CD',
           'y': '\U0001D4CE',
           'z': '\U0001D4CF',
          }

negatables = {'=': '\u2260',
              r'\in': '\u2209',
              r'\equiv': '\u2262'}

# LaTeX to MathML translation stuff:
[docs]class math: """Base class for MathML elements.""" nchildren = 1000000 """Required number of children""" def __init__(self, children=None, inline=None): """math([children]) -> MathML element children can be one child or a list of children.""" self.children = [] if children is not None: if type(children) is list: for child in children: self.append(child) else: # Only one child: self.append(children) if inline is not None: self.inline = inline def __repr__(self): if hasattr(self, 'children'): return self.__class__.__name__ + '(%s)' % \ ','.join([repr(child) for child in self.children]) else: return self.__class__.__name__
[docs] def full(self): """Room for more children?""" return len(self.children) >= self.nchildren
[docs] def append(self, child): """append(child) -> element Appends child and returns self if self is not full or first non-full parent.""" assert not self.full() self.children.append(child) child.parent = self node = self while node.full(): node = node.parent return node
[docs] def delete_child(self): """delete_child() -> child Delete last child and return it.""" child = self.children[-1] del self.children[-1] return child
[docs] def close(self): """close() -> parent Close element and return first non-full element.""" parent = self.parent while parent.full(): parent = parent.parent return parent
[docs] def xml(self): """xml() -> xml-string""" return self.xml_start() + self.xml_body() + self.xml_end()
[docs] def xml_start(self): if not hasattr(self, 'inline'): return ['<%s>' % self.__class__.__name__] xmlns = 'http://www.w3.org/1998/Math/MathML' if self.inline: return ['<math xmlns="%s">' % xmlns] else: return ['<math xmlns="%s" mode="display">' % xmlns]
[docs] def xml_end(self): return ['</%s>' % self.__class__.__name__]
[docs] def xml_body(self): xml = [] for child in self.children: xml.extend(child.xml()) return xml
[docs]class mrow(math):
[docs] def xml_start(self): return ['\n<%s>' % self.__class__.__name__]
[docs]class mtable(math):
[docs] def xml_start(self): return ['\n<%s>' % self.__class__.__name__]
[docs]class mtr(mrow): pass
[docs]class mtd(mrow): pass
[docs]class mx(math): """Base class for mo, mi, and mn""" nchildren = 0 def __init__(self, data): self.data = data
[docs] def xml_body(self): return [self.data]
[docs]class mo(mx): translation = {'<': '&lt;', '>': '&gt;'}
[docs] def xml_body(self): return [self.translation.get(self.data, self.data)]
[docs]class mi(mx): pass
[docs]class mn(mx): pass
[docs]class msub(math): nchildren = 2
[docs]class msup(math): nchildren = 2
[docs]class msqrt(math): nchildren = 1
[docs]class mroot(math): nchildren = 2
[docs]class mfrac(math): nchildren = 2
[docs]class msubsup(math): nchildren = 3 def __init__(self, children=None, reversed=False): self.reversed = reversed math.__init__(self, children)
[docs] def xml(self): if self.reversed: ## self.children[1:3] = self.children[2:0:-1] self.children[1:3] = [self.children[2], self.children[1]] self.reversed = False return math.xml(self)
[docs]class mfenced(math): translation = {'\\{': '{', '\\langle': '\u2329', '\\}': '}', '\\rangle': '\u232A', '.': ''} def __init__(self, par): self.openpar = par math.__init__(self)
[docs] def xml_start(self): open = self.translation.get(self.openpar, self.openpar) close = self.translation.get(self.closepar, self.closepar) return ['<mfenced open="%s" close="%s">' % (open, close)]
[docs]class mspace(math): nchildren = 0
[docs]class mstyle(math): def __init__(self, children=None, nchildren=None, **kwargs): if nchildren is not None: self.nchildren = nchildren math.__init__(self, children) self.attrs = kwargs
[docs] def xml_start(self): return ['<mstyle '] + ['%s="%s"' % item for item in list(self.attrs.items())] + ['>']
[docs]class mover(math): nchildren = 2 def __init__(self, children=None, reversed=False): self.reversed = reversed math.__init__(self, children)
[docs] def xml(self): if self.reversed: self.children.reverse() self.reversed = False return math.xml(self)
[docs]class munder(math): nchildren = 2
[docs]class munderover(math): nchildren = 3 def __init__(self, children=None): math.__init__(self, children)
[docs]class mtext(math): nchildren = 0 def __init__(self, text): self.text = text
[docs] def xml_body(self): return [self.text]
[docs]def parse_latex_math(string, inline=True): """parse_latex_math(string [,inline]) -> MathML-tree Returns a MathML-tree parsed from string. inline=True is for inline math and inline=False is for displayed math. tree is the whole tree and node is the current element.""" # Normalize white-space: string = ' '.join(string.split()) if inline: node = mrow() tree = math(node, inline=True) else: node = mtd() tree = math(mtable(mtr(node)), inline=False) while len(string) > 0: n = len(string) c = string[0] skip = 1 # number of characters consumed if n > 1: c2 = string[1] else: c2 = '' ## print n, string, c, c2, node.__class__.__name__ if c == ' ': pass elif c == '\\': if c2 in '{}': node = node.append(mo(c2)) skip = 2 elif c2 == ' ': node = node.append(mspace()) skip = 2 elif c2 == ',': # TODO: small space node = node.append(mspace()) skip = 2 elif c2.isalpha(): # We have a LaTeX-name: i = 2 while i < n and string[i].isalpha(): i += 1 name = string[1:i] node, skip = handle_keyword(name, node, string[i:]) skip += i elif c2 == '\\': # End of a row: entry = mtd() row = mtr(entry) node.close().close().append(row) node = entry skip = 2 else: raise SyntaxError(r'Syntax error: "%s%s"' % (c, c2)) elif c.isalpha(): node = node.append(mi(c)) elif c.isdigit(): node = node.append(mn(c)) elif c in "+-*/=()[]|<>,.!?':;@": node = node.append(mo(c)) elif c == '_': child = node.delete_child() if isinstance(child, msup): sub = msubsup(child.children, reversed=True) elif isinstance(child, mo) and child.data in sumintprod: sub = munder(child) else: sub = msub(child) node.append(sub) node = sub elif c == '^': child = node.delete_child() if isinstance(child, msub): sup = msubsup(child.children) elif isinstance(child, mo) and child.data in sumintprod: sup = mover(child) elif (isinstance(child, munder) and child.children[0].data in sumintprod): sup = munderover(child.children) else: sup = msup(child) node.append(sup) node = sup elif c == '{': row = mrow() node.append(row) node = row elif c == '}': node = node.close() elif c == '&': entry = mtd() node.close().append(entry) node = entry else: raise SyntaxError(r'Illegal character: "%s"' % c) string = string[skip:] return tree
[docs]def handle_keyword(name, node, string): skip = 0 if len(string) > 0 and string[0] == ' ': string = string[1:] skip = 1 if name == 'begin': if not string.startswith('{matrix}'): raise SyntaxError('Environment not supported! ' 'Supported environment: "matrix".') skip += 8 entry = mtd() table = mtable(mtr(entry)) node.append(table) node = entry elif name == 'end': if not string.startswith('{matrix}'): raise SyntaxError(r'Expected "\end{matrix}"!') skip += 8 node = node.close().close().close() elif name in ('text', 'mathrm'): if string[0] != '{': raise SyntaxError(r'Expected "\text{...}"!') i = string.find('}') if i == -1: raise SyntaxError(r'Expected "\text{...}"!') node = node.append(mtext(string[1:i])) skip += i + 1 elif name == 'sqrt': sqrt = msqrt() node.append(sqrt) node = sqrt elif name == 'frac': frac = mfrac() node.append(frac) node = frac elif name == 'left': for par in ['(', '[', '|', '\\{', '\\langle', '.']: if string.startswith(par): break else: raise SyntaxError('Missing left-brace!') fenced = mfenced(par) node.append(fenced) row = mrow() fenced.append(row) node = row skip += len(par) elif name == 'right': for par in [')', ']', '|', '\\}', '\\rangle', '.']: if string.startswith(par): break else: raise SyntaxError('Missing right-brace!') node = node.close() node.closepar = par node = node.close() skip += len(par) elif name == 'not': for operator in negatables: if string.startswith(operator): break else: raise SyntaxError(r'Expected something to negate: "\not ..."!') node = node.append(mo(negatables[operator])) skip += len(operator) elif name == 'mathbf': style = mstyle(nchildren=1, fontweight='bold') node.append(style) node = style elif name == 'mathbb': if string[0] != '{' or not string[1].isupper() or string[2] != '}': raise SyntaxError(r'Expected something like "\mathbb{A}"!') node = node.append(mi(mathbb[string[1]])) skip += 3 elif name in ('mathscr', 'mathcal'): if string[0] != '{' or string[2] != '}': raise SyntaxError(r'Expected something like "\mathscr{A}"!') node = node.append(mi(mathscr[string[1]])) skip += 3 elif name == 'colon': # "normal" colon, not binary operator node = node.append(mo(':')) # TODO: add ``lspace="0pt"`` elif name in Greek: # Greek capitals (upright in "TeX style") node = node.append(mo(Greek[name])) # TODO: "ISO style" sets them italic. Could we use a class argument # to enable styling via CSS? elif name in letters: node = node.append(mi(letters[name])) elif name in special: node = node.append(mo(special[name])) elif name in functions: node = node.append(mo(name)) elif name in over: ovr = mover(mo(over[name]), reversed=True) node.append(ovr) node = ovr else: raise SyntaxError('Unknown LaTeX command: ' + name) return node, skip
[docs]def tex2mathml(tex_math, inline=True): """Return string with MathML code corresponding to `tex_math`. `inline`=True is for inline math and `inline`=False for displayed math. """ mathml_tree = parse_latex_math(tex_math, inline=inline) return ''.join(mathml_tree.xml())