Source code for pygments.formatters.latex

# -*- coding: utf-8 -*-
"""
    pygments.formatters.latex
    ~~~~~~~~~~~~~~~~~~~~~~~~~

    Formatter for LaTeX fancyvrb output.

    :copyright: Copyright 2006-2013 by the Pygments team, see AUTHORS.
    :license: BSD, see LICENSE for details.
"""

from pygments.formatter import Formatter
from pygments.token import Token, STANDARD_TYPES
from pygments.util import get_bool_opt, get_int_opt, StringIO


__all__ = ['LatexFormatter']


def escape_tex(text, commandprefix):
    return text.replace('\\', '\x00'). \
                replace('{', '\x01'). \
                replace('}', '\x02'). \
                replace('\x00', r'\%sZbs{}' % commandprefix). \
                replace('\x01', r'\%sZob{}' % commandprefix). \
                replace('\x02', r'\%sZcb{}' % commandprefix). \
                replace('^', r'\%sZca{}' % commandprefix). \
                replace('_', r'\%sZus{}' % commandprefix). \
                replace('&', r'\%sZam{}' % commandprefix). \
                replace('<', r'\%sZlt{}' % commandprefix). \
                replace('>', r'\%sZgt{}' % commandprefix). \
                replace('#', r'\%sZsh{}' % commandprefix). \
                replace('%', r'\%sZpc{}' % commandprefix). \
                replace('$', r'\%sZdl{}' % commandprefix). \
                replace('-', r'\%sZhy{}' % commandprefix). \
                replace("'", r'\%sZsq{}' % commandprefix). \
                replace('"', r'\%sZdq{}' % commandprefix). \
                replace('~', r'\%sZti{}' % commandprefix)


DOC_TEMPLATE = r'''
\documentclass{%(docclass)s}
\usepackage{fancyvrb}
\usepackage{color}
\usepackage[%(encoding)s]{inputenc}
%(preamble)s

%(styledefs)s

\begin{document}

\section*{%(title)s}

%(code)s
\end{document}
'''

## Small explanation of the mess below :)
#
# The previous version of the LaTeX formatter just assigned a command to
# each token type defined in the current style.  That obviously is
# problematic if the highlighted code is produced for a different style
# than the style commands themselves.
#
# This version works much like the HTML formatter which assigns multiple
# CSS classes to each <span> tag, from the most specific to the least
# specific token type, thus falling back to the parent token type if one
# is not defined.  Here, the classes are there too and use the same short
# forms given in token.STANDARD_TYPES.
#
# Highlighted code now only uses one custom command, which by default is
# \PY and selectable by the commandprefix option (and in addition the
# escapes \PYZat, \PYZlb and \PYZrb which haven't been renamed for
# backwards compatibility purposes).
#
# \PY has two arguments: the classes, separated by +, and the text to
# render in that style.  The classes are resolved into the respective
# style commands by magic, which serves to ignore unknown classes.
#
# The magic macros are:
# * \PY@it, \PY@bf, etc. are unconditionally wrapped around the text
#   to render in \PY@do.  Their definition determines the style.
# * \PY@reset resets \PY@it etc. to do nothing.
# * \PY@toks parses the list of classes, using magic inspired by the
#   keyval package (but modified to use plusses instead of commas
#   because fancyvrb redefines commas inside its environments).
# * \PY@tok processes one class, calling the \PY@tok@classname command
#   if it exists.
# * \PY@tok@classname sets the \PY@it etc. to reflect the chosen style
#   for its class.
# * \PY resets the style, parses the classnames and then calls \PY@do.
#
# Tip: to read this code, print it out in substituted form using e.g.
# >>> print STYLE_TEMPLATE % {'cp': 'PY'}

STYLE_TEMPLATE = r'''
\makeatletter
\def\%(cp)s@reset{\let\%(cp)s@it=\relax \let\%(cp)s@bf=\relax%%
    \let\%(cp)s@ul=\relax \let\%(cp)s@tc=\relax%%
    \let\%(cp)s@bc=\relax \let\%(cp)s@ff=\relax}
\def\%(cp)s@tok#1{\csname %(cp)s@tok@#1\endcsname}
\def\%(cp)s@toks#1+{\ifx\relax#1\empty\else%%
    \%(cp)s@tok{#1}\expandafter\%(cp)s@toks\fi}
\def\%(cp)s@do#1{\%(cp)s@bc{\%(cp)s@tc{\%(cp)s@ul{%%
    \%(cp)s@it{\%(cp)s@bf{\%(cp)s@ff{#1}}}}}}}
\def\%(cp)s#1#2{\%(cp)s@reset\%(cp)s@toks#1+\relax+\%(cp)s@do{#2}}

%(styles)s

\def\%(cp)sZbs{\char`\\}
\def\%(cp)sZus{\char`\_}
\def\%(cp)sZob{\char`\{}
\def\%(cp)sZcb{\char`\}}
\def\%(cp)sZca{\char`\^}
\def\%(cp)sZam{\char`\&}
\def\%(cp)sZlt{\char`\<}
\def\%(cp)sZgt{\char`\>}
\def\%(cp)sZsh{\char`\#}
\def\%(cp)sZpc{\char`\%%}
\def\%(cp)sZdl{\char`\$}
\def\%(cp)sZhy{\char`\-}
\def\%(cp)sZsq{\char`\'}
\def\%(cp)sZdq{\char`\"}
\def\%(cp)sZti{\char`\~}
%% for compatibility with earlier versions
\def\%(cp)sZat{@}
\def\%(cp)sZlb{[}
\def\%(cp)sZrb{]}
\makeatother
'''


def _get_ttype_name(ttype):
    fname = STANDARD_TYPES.get(ttype)
    if fname:
        return fname
    aname = ''
    while fname is None:
        aname = ttype[-1] + aname
        ttype = ttype.parent
        fname = STANDARD_TYPES.get(ttype)
    return fname + aname


[docs]class LatexFormatter(Formatter): r""" Format tokens as LaTeX code. This needs the `fancyvrb` and `color` standard packages. Without the `full` option, code is formatted as one ``Verbatim`` environment, like this: .. sourcecode:: latex \begin{Verbatim}[commandchars=\\{\}] \PY{k}{def }\PY{n+nf}{foo}(\PY{n}{bar}): \PY{k}{pass} \end{Verbatim} The special command used here (``\PY``) and all the other macros it needs are output by the `get_style_defs` method. With the `full` option, a complete LaTeX document is output, including the command definitions in the preamble. The `get_style_defs()` method of a `LatexFormatter` returns a string containing ``\def`` commands defining the macros needed inside the ``Verbatim`` environments. Additional options accepted: `style` The style to use, can be a string or a Style subclass (default: ``'default'``). `full` Tells the formatter to output a "full" document, i.e. a complete self-contained document (default: ``False``). `title` If `full` is true, the title that should be used to caption the document (default: ``''``). `docclass` If the `full` option is enabled, this is the document class to use (default: ``'article'``). `preamble` If the `full` option is enabled, this can be further preamble commands, e.g. ``\usepackage`` (default: ``''``). `linenos` If set to ``True``, output line numbers (default: ``False``). `linenostart` The line number for the first line (default: ``1``). `linenostep` If set to a number n > 1, only every nth line number is printed. `verboptions` Additional options given to the Verbatim environment (see the *fancyvrb* docs for possible values) (default: ``''``). `commandprefix` The LaTeX commands used to produce colored output are constructed using this prefix and some letters (default: ``'PY'``). *New in Pygments 0.7.* *New in Pygments 0.10:* the default is now ``'PY'`` instead of ``'C'``. `texcomments` If set to ``True``, enables LaTeX comment lines. That is, LaTex markup in comment tokens is not escaped so that LaTeX can render it (default: ``False``). *New in Pygments 1.2.* `mathescape` If set to ``True``, enables LaTeX math mode escape in comments. That is, ``'$...$'`` inside a comment will trigger math mode (default: ``False``). *New in Pygments 1.2.* """ name = 'LaTeX' aliases = ['latex', 'tex'] filenames = ['*.tex'] def __init__(self, **options): Formatter.__init__(self, **options) self.docclass = options.get('docclass', 'article') self.preamble = options.get('preamble', '') self.linenos = get_bool_opt(options, 'linenos', False) self.linenostart = abs(get_int_opt(options, 'linenostart', 1)) self.linenostep = abs(get_int_opt(options, 'linenostep', 1)) self.verboptions = options.get('verboptions', '') self.nobackground = get_bool_opt(options, 'nobackground', False) self.commandprefix = options.get('commandprefix', 'PY') self.texcomments = get_bool_opt(options, 'texcomments', False) self.mathescape = get_bool_opt(options, 'mathescape', False) self._create_stylesheet() def _create_stylesheet(self): t2n = self.ttype2name = {Token: ''} c2d = self.cmd2def = {} cp = self.commandprefix def rgbcolor(col): if col: return ','.join(['%.2f' %(int(col[i] + col[i + 1], 16) / 255.0) for i in (0, 2, 4)]) else: return '1,1,1' for ttype, ndef in self.style: name = _get_ttype_name(ttype) cmndef = '' if ndef['bold']: cmndef += r'\let\$$@bf=\textbf' if ndef['italic']: cmndef += r'\let\$$@it=\textit' if ndef['underline']: cmndef += r'\let\$$@ul=\underline' if ndef['roman']: cmndef += r'\let\$$@ff=\textrm' if ndef['sans']: cmndef += r'\let\$$@ff=\textsf' if ndef['mono']: cmndef += r'\let\$$@ff=\textsf' if ndef['color']: cmndef += (r'\def\$$@tc##1{\textcolor[rgb]{%s}{##1}}' % rgbcolor(ndef['color'])) if ndef['border']: cmndef += (r'\def\$$@bc##1{\setlength{\fboxsep}{0pt}' r'\fcolorbox[rgb]{%s}{%s}{\strut ##1}}' % (rgbcolor(ndef['border']), rgbcolor(ndef['bgcolor']))) elif ndef['bgcolor']: cmndef += (r'\def\$$@bc##1{\setlength{\fboxsep}{0pt}' r'\colorbox[rgb]{%s}{\strut ##1}}' % rgbcolor(ndef['bgcolor'])) if cmndef == '': continue cmndef = cmndef.replace('$$', cp) t2n[ttype] = name c2d[name] = cmndef
[docs] def get_style_defs(self, arg=''): """ Return the command sequences needed to define the commands used to format text in the verbatim environment. ``arg`` is ignored. """ cp = self.commandprefix styles = [] for name, definition in self.cmd2def.iteritems(): styles.append(r'\expandafter\def\csname %s@tok@%s\endcsname{%s}' % (cp, name, definition)) return STYLE_TEMPLATE % {'cp': self.commandprefix, 'styles': '\n'.join(styles)}
def format_unencoded(self, tokensource, outfile): # TODO: add support for background colors t2n = self.ttype2name cp = self.commandprefix if self.full: realoutfile = outfile outfile = StringIO() outfile.write(ur'\begin{Verbatim}[commandchars=\\\{\}') if self.linenos: start, step = self.linenostart, self.linenostep outfile.write(u',numbers=left' + (start and u',firstnumber=%d' % start or u'') + (step and u',stepnumber=%d' % step or u'')) if self.mathescape or self.texcomments: outfile.write(ur',codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8}') if self.verboptions: outfile.write(u',' + self.verboptions) outfile.write(u']\n') for ttype, value in tokensource: if ttype in Token.Comment: if self.texcomments: # Try to guess comment starting lexeme and escape it ... start = value[0:1] for i in xrange(1, len(value)): if start[0] != value[i]: break start += value[i] value = value[len(start):] start = escape_tex(start, self.commandprefix) # ... but do not escape inside comment. value = start + value elif self.mathescape: # Only escape parts not inside a math environment. parts = value.split('$') in_math = False for i, part in enumerate(parts): if not in_math: parts[i] = escape_tex(part, self.commandprefix) in_math = not in_math value = '$'.join(parts) else: value = escape_tex(value, self.commandprefix) else: value = escape_tex(value, self.commandprefix) styles = [] while ttype is not Token: try: styles.append(t2n[ttype]) except KeyError: # not in current style styles.append(_get_ttype_name(ttype)) ttype = ttype.parent styleval = '+'.join(reversed(styles)) if styleval: spl = value.split('\n') for line in spl[:-1]: if line: outfile.write("\\%s{%s}{%s}" % (cp, styleval, line)) outfile.write('\n') if spl[-1]: outfile.write("\\%s{%s}{%s}" % (cp, styleval, spl[-1])) else: outfile.write(value) outfile.write(u'\\end{Verbatim}\n') if self.full: realoutfile.write(DOC_TEMPLATE % dict(docclass = self.docclass, preamble = self.preamble, title = self.title, encoding = self.encoding or 'latin1', styledefs = self.get_style_defs(), code = outfile.getvalue()))

Project Versions

This Page