374 lines
20 KiB
HTML
374 lines
20 KiB
HTML
<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN"
|
|
"http://www.w3.org/TR/html4/strict.dtd">
|
|
<html>
|
|
<head>
|
|
<title>Write your own formatter — Pygments</title>
|
|
<meta http-equiv="content-type" content="text/html; charset=utf-8">
|
|
<style type="text/css">
|
|
body {
|
|
background-color: #f2f2f2;
|
|
margin: 0;
|
|
padding: 0;
|
|
font-family: 'Georgia', serif;
|
|
color: #111;
|
|
}
|
|
|
|
#content {
|
|
background-color: white;
|
|
padding: 20px;
|
|
margin: 20px auto 20px auto;
|
|
max-width: 800px;
|
|
border: 4px solid #ddd;
|
|
}
|
|
|
|
h1 {
|
|
font-weight: normal;
|
|
font-size: 40px;
|
|
color: #09839A;
|
|
}
|
|
|
|
h2 {
|
|
font-weight: normal;
|
|
font-size: 30px;
|
|
color: #C73F00;
|
|
}
|
|
|
|
h1.heading {
|
|
margin: 0 0 30px 0;
|
|
}
|
|
|
|
h2.subheading {
|
|
margin: -30px 0 0 45px;
|
|
}
|
|
|
|
h3 {
|
|
margin-top: 30px;
|
|
}
|
|
|
|
table.docutils {
|
|
border-collapse: collapse;
|
|
border: 2px solid #aaa;
|
|
margin: 0.5em 1.5em 0.5em 1.5em;
|
|
}
|
|
|
|
table.docutils td {
|
|
padding: 2px;
|
|
border: 1px solid #ddd;
|
|
}
|
|
|
|
p, li, dd, dt, blockquote {
|
|
font-size: 15px;
|
|
color: #333;
|
|
}
|
|
|
|
p {
|
|
line-height: 150%;
|
|
margin-bottom: 0;
|
|
margin-top: 10px;
|
|
}
|
|
|
|
hr {
|
|
border-top: 1px solid #ccc;
|
|
border-bottom: 0;
|
|
border-right: 0;
|
|
border-left: 0;
|
|
margin-bottom: 10px;
|
|
margin-top: 20px;
|
|
}
|
|
|
|
dl {
|
|
margin-left: 10px;
|
|
}
|
|
|
|
li, dt {
|
|
margin-top: 5px;
|
|
}
|
|
|
|
dt {
|
|
font-weight: bold;
|
|
}
|
|
|
|
th {
|
|
text-align: left;
|
|
}
|
|
|
|
a {
|
|
color: #990000;
|
|
}
|
|
|
|
a:hover {
|
|
color: #c73f00;
|
|
}
|
|
|
|
pre {
|
|
background-color: #f9f9f9;
|
|
border-top: 1px solid #ccc;
|
|
border-bottom: 1px solid #ccc;
|
|
padding: 5px;
|
|
font-size: 13px;
|
|
font-family: Bitstream Vera Sans Mono,monospace;
|
|
}
|
|
|
|
tt {
|
|
font-size: 13px;
|
|
font-family: Bitstream Vera Sans Mono,monospace;
|
|
color: black;
|
|
padding: 1px 2px 1px 2px;
|
|
background-color: #f0f0f0;
|
|
}
|
|
|
|
cite {
|
|
/* abusing <cite>, it's generated by ReST for `x` */
|
|
font-size: 13px;
|
|
font-family: Bitstream Vera Sans Mono,monospace;
|
|
font-weight: bold;
|
|
font-style: normal;
|
|
}
|
|
|
|
#backlink {
|
|
float: right;
|
|
font-size: 11px;
|
|
color: #888;
|
|
}
|
|
|
|
div.toc {
|
|
margin: 0 0 10px 0;
|
|
}
|
|
|
|
div.toc h2 {
|
|
font-size: 20px;
|
|
}
|
|
.syntax .hll { background-color: #ffffcc }
|
|
.syntax { background: #ffffff; }
|
|
.syntax .c { color: #888888 } /* Comment */
|
|
.syntax .err { color: #a61717; background-color: #e3d2d2 } /* Error */
|
|
.syntax .k { color: #008800; font-weight: bold } /* Keyword */
|
|
.syntax .cm { color: #888888 } /* Comment.Multiline */
|
|
.syntax .cp { color: #cc0000; font-weight: bold } /* Comment.Preproc */
|
|
.syntax .c1 { color: #888888 } /* Comment.Single */
|
|
.syntax .cs { color: #cc0000; font-weight: bold; background-color: #fff0f0 } /* Comment.Special */
|
|
.syntax .gd { color: #000000; background-color: #ffdddd } /* Generic.Deleted */
|
|
.syntax .ge { font-style: italic } /* Generic.Emph */
|
|
.syntax .gr { color: #aa0000 } /* Generic.Error */
|
|
.syntax .gh { color: #333333 } /* Generic.Heading */
|
|
.syntax .gi { color: #000000; background-color: #ddffdd } /* Generic.Inserted */
|
|
.syntax .go { color: #888888 } /* Generic.Output */
|
|
.syntax .gp { color: #555555 } /* Generic.Prompt */
|
|
.syntax .gs { font-weight: bold } /* Generic.Strong */
|
|
.syntax .gu { color: #666666 } /* Generic.Subheading */
|
|
.syntax .gt { color: #aa0000 } /* Generic.Traceback */
|
|
.syntax .kc { color: #008800; font-weight: bold } /* Keyword.Constant */
|
|
.syntax .kd { color: #008800; font-weight: bold } /* Keyword.Declaration */
|
|
.syntax .kn { color: #008800; font-weight: bold } /* Keyword.Namespace */
|
|
.syntax .kp { color: #008800 } /* Keyword.Pseudo */
|
|
.syntax .kr { color: #008800; font-weight: bold } /* Keyword.Reserved */
|
|
.syntax .kt { color: #888888; font-weight: bold } /* Keyword.Type */
|
|
.syntax .m { color: #0000DD; font-weight: bold } /* Literal.Number */
|
|
.syntax .s { color: #dd2200; background-color: #fff0f0 } /* Literal.String */
|
|
.syntax .na { color: #336699 } /* Name.Attribute */
|
|
.syntax .nb { color: #003388 } /* Name.Builtin */
|
|
.syntax .nc { color: #bb0066; font-weight: bold } /* Name.Class */
|
|
.syntax .no { color: #003366; font-weight: bold } /* Name.Constant */
|
|
.syntax .nd { color: #555555 } /* Name.Decorator */
|
|
.syntax .ne { color: #bb0066; font-weight: bold } /* Name.Exception */
|
|
.syntax .nf { color: #0066bb; font-weight: bold } /* Name.Function */
|
|
.syntax .nl { color: #336699; font-style: italic } /* Name.Label */
|
|
.syntax .nn { color: #bb0066; font-weight: bold } /* Name.Namespace */
|
|
.syntax .py { color: #336699; font-weight: bold } /* Name.Property */
|
|
.syntax .nt { color: #bb0066; font-weight: bold } /* Name.Tag */
|
|
.syntax .nv { color: #336699 } /* Name.Variable */
|
|
.syntax .ow { color: #008800 } /* Operator.Word */
|
|
.syntax .w { color: #bbbbbb } /* Text.Whitespace */
|
|
.syntax .mf { color: #0000DD; font-weight: bold } /* Literal.Number.Float */
|
|
.syntax .mh { color: #0000DD; font-weight: bold } /* Literal.Number.Hex */
|
|
.syntax .mi { color: #0000DD; font-weight: bold } /* Literal.Number.Integer */
|
|
.syntax .mo { color: #0000DD; font-weight: bold } /* Literal.Number.Oct */
|
|
.syntax .sb { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Backtick */
|
|
.syntax .sc { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Char */
|
|
.syntax .sd { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Doc */
|
|
.syntax .s2 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Double */
|
|
.syntax .se { color: #0044dd; background-color: #fff0f0 } /* Literal.String.Escape */
|
|
.syntax .sh { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Heredoc */
|
|
.syntax .si { color: #3333bb; background-color: #fff0f0 } /* Literal.String.Interpol */
|
|
.syntax .sx { color: #22bb22; background-color: #f0fff0 } /* Literal.String.Other */
|
|
.syntax .sr { color: #008800; background-color: #fff0ff } /* Literal.String.Regex */
|
|
.syntax .s1 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Single */
|
|
.syntax .ss { color: #aa6600; background-color: #fff0f0 } /* Literal.String.Symbol */
|
|
.syntax .bp { color: #003388 } /* Name.Builtin.Pseudo */
|
|
.syntax .vc { color: #336699 } /* Name.Variable.Class */
|
|
.syntax .vg { color: #dd7700 } /* Name.Variable.Global */
|
|
.syntax .vi { color: #3333bb } /* Name.Variable.Instance */
|
|
.syntax .il { color: #0000DD; font-weight: bold } /* Literal.Number.Integer.Long */
|
|
</style>
|
|
</head>
|
|
<body>
|
|
<div id="content">
|
|
<h1 class="heading">Pygments</h1>
|
|
<h2 class="subheading">Write your own formatter</h2>
|
|
|
|
<a id="backlink" href="index.html">« Back To Index</a>
|
|
|
|
|
|
<div class="toc">
|
|
<h2>Contents</h2>
|
|
<ul class="contents">
|
|
|
|
<li><a href="#quickstart">Quickstart</a></li>
|
|
|
|
<li><a href="#styles">Styles</a></li>
|
|
|
|
<li><a href="#html-3-2-formatter">HTML 3.2 Formatter</a></li>
|
|
|
|
<li><a href="#generating-style-definitions">Generating Style Definitions</a></li>
|
|
|
|
</ul>
|
|
</div>
|
|
|
|
<!-- -*- mode: rst -*- -->
|
|
<p>As well as creating <a class="reference external" href="./lexerdevelopment.html">your own lexer</a>, writing a new
|
|
formatter for Pygments is easy and straightforward.</p>
|
|
<p>A formatter is a class that is initialized with some keyword arguments (the
|
|
formatter options) and that must provides a <cite>format()</cite> method.
|
|
Additionally a formatter should provide a <cite>get_style_defs()</cite> method that
|
|
returns the style definitions from the style in a form usable for the
|
|
formatter's output format.</p>
|
|
<div class="section" id="quickstart">
|
|
<h3>Quickstart</h3>
|
|
<p>The most basic formatter shipped with Pygments is the <cite>NullFormatter</cite>. It just
|
|
sends the value of a token to the output stream:</p>
|
|
<div class="syntax"><pre><span class="kn">from</span> <span class="nn">pygments.formatter</span> <span class="kn">import</span> <span class="n">Formatter</span>
|
|
|
|
<span class="k">class</span> <span class="nc">NullFormatter</span><span class="p">(</span><span class="n">Formatter</span><span class="p">):</span>
|
|
<span class="k">def</span> <span class="nf">format</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">tokensource</span><span class="p">,</span> <span class="n">outfile</span><span class="p">):</span>
|
|
<span class="k">for</span> <span class="n">ttype</span><span class="p">,</span> <span class="n">value</span> <span class="ow">in</span> <span class="n">tokensource</span><span class="p">:</span>
|
|
<span class="n">outfile</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="n">value</span><span class="p">)</span>
|
|
</pre></div>
|
|
<p>As you can see, the <cite>format()</cite> method is passed two parameters: <cite>tokensource</cite>
|
|
and <cite>outfile</cite>. The first is an iterable of <tt class="docutils literal">(token_type, value)</tt> tuples,
|
|
the latter a file like object with a <cite>write()</cite> method.</p>
|
|
<p>Because the formatter is that basic it doesn't overwrite the <cite>get_style_defs()</cite>
|
|
method.</p>
|
|
</div>
|
|
<div class="section" id="styles">
|
|
<h3>Styles</h3>
|
|
<p>Styles aren't instantiated but their metaclass provides some class functions
|
|
so that you can access the style definitions easily.</p>
|
|
<p>Styles are iterable and yield tuples in the form <tt class="docutils literal">(ttype, d)</tt> where <cite>ttype</cite>
|
|
is a token and <cite>d</cite> is a dict with the following keys:</p>
|
|
<dl class="docutils">
|
|
<dt><tt class="docutils literal">'color'</tt></dt>
|
|
<dd>Hexadecimal color value (eg: <tt class="docutils literal">'ff0000'</tt> for red) or <cite>None</cite> if not
|
|
defined.</dd>
|
|
<dt><tt class="docutils literal">'bold'</tt></dt>
|
|
<dd><cite>True</cite> if the value should be bold</dd>
|
|
<dt><tt class="docutils literal">'italic'</tt></dt>
|
|
<dd><cite>True</cite> if the value should be italic</dd>
|
|
<dt><tt class="docutils literal">'underline'</tt></dt>
|
|
<dd><cite>True</cite> if the value should be underlined</dd>
|
|
<dt><tt class="docutils literal">'bgcolor'</tt></dt>
|
|
<dd>Hexadecimal color value for the background (eg: <tt class="docutils literal">'eeeeeee'</tt> for light
|
|
gray) or <cite>None</cite> if not defined.</dd>
|
|
<dt><tt class="docutils literal">'border'</tt></dt>
|
|
<dd>Hexadecimal color value for the border (eg: <tt class="docutils literal">'0000aa'</tt> for a dark
|
|
blue) or <cite>None</cite> for no border.</dd>
|
|
</dl>
|
|
<p>Additional keys might appear in the future, formatters should ignore all keys
|
|
they don't support.</p>
|
|
</div>
|
|
<div class="section" id="html-3-2-formatter">
|
|
<h3>HTML 3.2 Formatter</h3>
|
|
<p>For an more complex example, let's implement a HTML 3.2 Formatter. We don't
|
|
use CSS but inline markup (<tt class="docutils literal"><u></tt>, <tt class="docutils literal"><font></tt>, etc). Because this isn't good
|
|
style this formatter isn't in the standard library ;-)</p>
|
|
<div class="syntax"><pre><span class="kn">from</span> <span class="nn">pygments.formatter</span> <span class="kn">import</span> <span class="n">Formatter</span>
|
|
|
|
<span class="k">class</span> <span class="nc">OldHtmlFormatter</span><span class="p">(</span><span class="n">Formatter</span><span class="p">):</span>
|
|
|
|
<span class="k">def</span> <span class="nf">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">**</span><span class="n">options</span><span class="p">):</span>
|
|
<span class="n">Formatter</span><span class="o">.</span><span class="n">__init__</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="o">**</span><span class="n">options</span><span class="p">)</span>
|
|
|
|
<span class="c"># create a dict of (start, end) tuples that wrap the</span>
|
|
<span class="c"># value of a token so that we can use it in the format</span>
|
|
<span class="c"># method later</span>
|
|
<span class="bp">self</span><span class="o">.</span><span class="n">styles</span> <span class="o">=</span> <span class="p">{}</span>
|
|
|
|
<span class="c"># we iterate over the `_styles` attribute of a style item</span>
|
|
<span class="c"># that contains the parsed style values.</span>
|
|
<span class="k">for</span> <span class="n">token</span><span class="p">,</span> <span class="n">style</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">style</span><span class="p">:</span>
|
|
<span class="n">start</span> <span class="o">=</span> <span class="n">end</span> <span class="o">=</span> <span class="s">''</span>
|
|
<span class="c"># a style item is a tuple in the following form:</span>
|
|
<span class="c"># colors are readily specified in hex: 'RRGGBB'</span>
|
|
<span class="k">if</span> <span class="n">style</span><span class="p">[</span><span class="s">'color'</span><span class="p">]:</span>
|
|
<span class="n">start</span> <span class="o">+=</span> <span class="s">'<font color="#</span><span class="si">%s</span><span class="s">">'</span> <span class="o">%</span> <span class="n">style</span><span class="p">[</span><span class="s">'color'</span><span class="p">]</span>
|
|
<span class="n">end</span> <span class="o">=</span> <span class="s">'</font>'</span> <span class="o">+</span> <span class="n">end</span>
|
|
<span class="k">if</span> <span class="n">style</span><span class="p">[</span><span class="s">'bold'</span><span class="p">]:</span>
|
|
<span class="n">start</span> <span class="o">+=</span> <span class="s">'<b>'</span>
|
|
<span class="n">end</span> <span class="o">=</span> <span class="s">'</b>'</span> <span class="o">+</span> <span class="n">end</span>
|
|
<span class="k">if</span> <span class="n">style</span><span class="p">[</span><span class="s">'italic'</span><span class="p">]:</span>
|
|
<span class="n">start</span> <span class="o">+=</span> <span class="s">'<i>'</span>
|
|
<span class="n">end</span> <span class="o">=</span> <span class="s">'</i>'</span> <span class="o">+</span> <span class="n">end</span>
|
|
<span class="k">if</span> <span class="n">style</span><span class="p">[</span><span class="s">'underline'</span><span class="p">]:</span>
|
|
<span class="n">start</span> <span class="o">+=</span> <span class="s">'<u>'</span>
|
|
<span class="n">end</span> <span class="o">=</span> <span class="s">'</u>'</span> <span class="o">+</span> <span class="n">end</span>
|
|
<span class="bp">self</span><span class="o">.</span><span class="n">styles</span><span class="p">[</span><span class="n">token</span><span class="p">]</span> <span class="o">=</span> <span class="p">(</span><span class="n">start</span><span class="p">,</span> <span class="n">end</span><span class="p">)</span>
|
|
|
|
<span class="k">def</span> <span class="nf">format</span><span class="p">(</span><span class="bp">self</span><span class="p">,</span> <span class="n">tokensource</span><span class="p">,</span> <span class="n">outfile</span><span class="p">):</span>
|
|
<span class="c"># lastval is a string we use for caching</span>
|
|
<span class="c"># because it's possible that an lexer yields a number</span>
|
|
<span class="c"># of consecutive tokens with the same token type.</span>
|
|
<span class="c"># to minimize the size of the generated html markup we</span>
|
|
<span class="c"># try to join the values of same-type tokens here</span>
|
|
<span class="n">lastval</span> <span class="o">=</span> <span class="s">''</span>
|
|
<span class="n">lasttype</span> <span class="o">=</span> <span class="bp">None</span>
|
|
|
|
<span class="c"># wrap the whole output with <pre></span>
|
|
<span class="n">outfile</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s">'<pre>'</span><span class="p">)</span>
|
|
|
|
<span class="k">for</span> <span class="n">ttype</span><span class="p">,</span> <span class="n">value</span> <span class="ow">in</span> <span class="n">tokensource</span><span class="p">:</span>
|
|
<span class="c"># if the token type doesn't exist in the stylemap</span>
|
|
<span class="c"># we try it with the parent of the token type</span>
|
|
<span class="c"># eg: parent of Token.Literal.String.Double is</span>
|
|
<span class="c"># Token.Literal.String</span>
|
|
<span class="k">while</span> <span class="n">ttype</span> <span class="ow">not</span> <span class="ow">in</span> <span class="bp">self</span><span class="o">.</span><span class="n">styles</span><span class="p">:</span>
|
|
<span class="n">ttype</span> <span class="o">=</span> <span class="n">ttype</span><span class="o">.</span><span class="n">parent</span>
|
|
<span class="k">if</span> <span class="n">ttype</span> <span class="o">==</span> <span class="n">lasttype</span><span class="p">:</span>
|
|
<span class="c"># the current token type is the same of the last</span>
|
|
<span class="c"># iteration. cache it</span>
|
|
<span class="n">lastval</span> <span class="o">+=</span> <span class="n">value</span>
|
|
<span class="k">else</span><span class="p">:</span>
|
|
<span class="c"># not the same token as last iteration, but we</span>
|
|
<span class="c"># have some data in the buffer. wrap it with the</span>
|
|
<span class="c"># defined style and write it to the output file</span>
|
|
<span class="k">if</span> <span class="n">lastval</span><span class="p">:</span>
|
|
<span class="n">stylebegin</span><span class="p">,</span> <span class="n">styleend</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">styles</span><span class="p">[</span><span class="n">lasttype</span><span class="p">]</span>
|
|
<span class="n">outfile</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="n">stylebegin</span> <span class="o">+</span> <span class="n">lastval</span> <span class="o">+</span> <span class="n">styleend</span><span class="p">)</span>
|
|
<span class="c"># set lastval/lasttype to current values</span>
|
|
<span class="n">lastval</span> <span class="o">=</span> <span class="n">value</span>
|
|
<span class="n">lasttype</span> <span class="o">=</span> <span class="n">ttype</span>
|
|
|
|
<span class="c"># if something is left in the buffer, write it to the</span>
|
|
<span class="c"># output file, then close the opened <pre> tag</span>
|
|
<span class="k">if</span> <span class="n">lastval</span><span class="p">:</span>
|
|
<span class="n">stylebegin</span><span class="p">,</span> <span class="n">styleend</span> <span class="o">=</span> <span class="bp">self</span><span class="o">.</span><span class="n">styles</span><span class="p">[</span><span class="n">lasttype</span><span class="p">]</span>
|
|
<span class="n">outfile</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="n">stylebegin</span> <span class="o">+</span> <span class="n">lastval</span> <span class="o">+</span> <span class="n">styleend</span><span class="p">)</span>
|
|
<span class="n">outfile</span><span class="o">.</span><span class="n">write</span><span class="p">(</span><span class="s">'</pre></span><span class="se">\n</span><span class="s">'</span><span class="p">)</span>
|
|
</pre></div>
|
|
<p>The comments should explain it. Again, this formatter doesn't override the
|
|
<cite>get_style_defs()</cite> method. If we would have used CSS classes instead of
|
|
inline HTML markup, we would need to generate the CSS first. For that
|
|
purpose the <cite>get_style_defs()</cite> method exists:</p>
|
|
</div>
|
|
<div class="section" id="generating-style-definitions">
|
|
<h3>Generating Style Definitions</h3>
|
|
<p>Some formatters like the <cite>LatexFormatter</cite> and the <cite>HtmlFormatter</cite> don't
|
|
output inline markup but reference either macros or css classes. Because
|
|
the definitions of those are not part of the output, the <cite>get_style_defs()</cite>
|
|
method exists. It is passed one parameter (if it's used and how it's used
|
|
is up to the formatter) and has to return a string or <tt class="docutils literal">None</tt>.</p>
|
|
</div>
|
|
|
|
</div>
|
|
</body>
|
|
<!-- generated on: 2013-01-09 17:48:42.995351
|
|
file id: formatterdevelopment -->
|
|
</html> |