summaryrefslogtreecommitdiff
path: root/dev-python/nbconvert/files/nbconvert-6.5.0-mistune-2.patch
blob: 4a3f4731b32d59c67535f81b6523cad37e9931cd (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
From 6e5ba41803cc8c3192f001b3ede9b74454220bda Mon Sep 17 00:00:00 2001
From: Tiago de Paula <tiagodepalves@gmail.com>
Date: Mon, 9 May 2022 09:39:31 -0300
Subject: [PATCH] Update to Mistune 2.0.2 (#1764)

Co-authored-by: Steven Silvester <steven.silvester@ieee.org>
---
 nbconvert/filters/markdown_mistune.py | 212 ++++++++++++++------------
 setup.py                              |   2 +-
 2 files changed, 119 insertions(+), 95 deletions(-)

diff --git a/nbconvert/filters/markdown_mistune.py b/nbconvert/filters/markdown_mistune.py
index 382a5388..636e1e8c 100644
--- a/nbconvert/filters/markdown_mistune.py
+++ b/nbconvert/filters/markdown_mistune.py
@@ -21,7 +21,7 @@ except ImportError:
     from cgi import escape as html_escape
 
 import bs4
-import mistune
+from mistune import BlockParser, HTMLRenderer, InlineParser, Markdown
 from pygments import highlight
 from pygments.formatters import HtmlFormatter
 from pygments.lexers import get_lexer_by_name
@@ -34,158 +34,183 @@ class InvalidNotebook(Exception):
     pass
 
 
-class MathBlockGrammar(mistune.BlockGrammar):
-    """This defines a single regex comprised of the different patterns that
-    identify math content spanning multiple lines. These are used by the
-    MathBlockLexer.
+class MathBlockParser(BlockParser):
+    """This acts as a pass-through to the MathInlineParser. It is needed in
+    order to avoid other block level rules splitting math sections apart.
     """
 
-    multi_math_str = "|".join(
-        [r"^\$\$.*?\$\$", r"^\\\\\[.*?\\\\\]", r"^\\begin\{([a-z]*\*?)\}(.*?)\\end\{\1\}"]
+    MULTILINE_MATH = re.compile(
+        r"(?<!\\)[$]{2}.*?(?<!\\)[$]{2}|"
+        r"\\\\\[.*?\\\\\]|"
+        r"\\begin\{([a-z]*\*?)\}.*?\\end\{\1\}",
+        re.DOTALL,
     )
-    multiline_math = re.compile(multi_math_str, re.DOTALL)
 
+    RULE_NAMES = ("multiline_math",) + BlockParser.RULE_NAMES
 
-class MathBlockLexer(mistune.BlockLexer):
-    """This acts as a pass-through to the MathInlineLexer. It is needed in
-    order to avoid other block level rules splitting math sections apart.
-    """
+    # Regex for header that doesn't require space after '#'
+    AXT_HEADING = re.compile(r" {0,3}(#{1,6})(?!#+)\s*([^\n]*?)$")
 
-    default_rules = ["multiline_math"] + mistune.BlockLexer.default_rules
+    def parse_multiline_math(self, m, state):
+        """Pass token through mutiline math."""
+        return {"type": "multiline_math", "text": m.group(0)}
 
-    def __init__(self, rules=None, **kwargs):
-        if rules is None:
-            rules = MathBlockGrammar()
-        super().__init__(rules, **kwargs)
 
-    def parse_multiline_math(self, m):
-        """Add token to pass through mutiline math."""
-        self.tokens.append({"type": "multiline_math", "text": m.group(0)})
+def _dotall(pattern):
+    """Make the '.' special character match any character inside the pattern, including a newline.
 
-
-class MathInlineGrammar(mistune.InlineGrammar):
-    """This defines different ways of declaring math objects that should be
-    passed through to mathjax unaffected. These are used by the MathInlineLexer.
+    This is implemented with the inline flag `(?s:...)` and is equivalent to using `re.DOTALL` when
+    it is the only pattern used. It is necessary since `mistune>=2.0.0`, where the pattern is passed
+    to the undocumented `re.Scanner`.
     """
-
-    inline_math = re.compile(r"^\$(.+?)\$|^\\\\\((.+?)\\\\\)", re.DOTALL)
-    block_math = re.compile(r"^\$\$(.*?)\$\$|^\\\\\[(.*?)\\\\\]", re.DOTALL)
-    latex_environment = re.compile(r"^\\begin\{([a-z]*\*?)\}(.*?)\\end\{\1\}", re.DOTALL)
-    text = re.compile(r"^[\s\S]+?(?=[\\<!\[_*`~$]|https?://| {2,}\n|$)")
+    return f"(?s:{pattern})"
 
 
-class MathInlineLexer(mistune.InlineLexer):
-    r"""This interprets the content of LaTeX style math objects using the rules
-    defined by the MathInlineGrammar.
+class MathInlineParser(InlineParser):
+    r"""This interprets the content of LaTeX style math objects.
 
     In particular this grabs ``$$...$$``, ``\\[...\\]``, ``\\(...\\)``, ``$...$``,
     and ``\begin{foo}...\end{foo}`` styles for declaring mathematics. It strips
     delimiters from all these varieties, and extracts the type of environment
     in the last case (``foo`` in this example).
     """
-    default_rules = [
-        "block_math",
-        "inline_math",
+    BLOCK_MATH_TEX = _dotall(r"(?<!\\)\$\$(.*?)(?<!\\)\$\$")
+    BLOCK_MATH_LATEX = _dotall(r"(?<!\\)\\\\\[(.*?)(?<!\\)\\\\\]")
+    INLINE_MATH_TEX = _dotall(r"(?<![$\\])\$(.+?)(?<![$\\])\$")
+    INLINE_MATH_LATEX = _dotall(r"(?<!\\)\\\\\((.*?)(?<!\\)\\\\\)")
+    LATEX_ENVIRONMENT = _dotall(r"\\begin\{([a-z]*\*?)\}(.*?)\\end\{\1\}")
+
+    # The order is important here
+    RULE_NAMES = (
+        "block_math_tex",
+        "block_math_latex",
+        "inline_math_tex",
+        "inline_math_latex",
         "latex_environment",
-    ] + mistune.InlineLexer.default_rules
-
-    def __init__(self, renderer, rules=None, **kwargs):
-        if rules is None:
-            rules = MathInlineGrammar()
-        super().__init__(renderer, rules, **kwargs)
-
-    def output_inline_math(self, m):
-        return self.renderer.inline_math(m.group(1) or m.group(2))
-
-    def output_block_math(self, m):
-        return self.renderer.block_math(m.group(1) or m.group(2) or "")
-
-    def output_latex_environment(self, m):
-        return self.renderer.latex_environment(m.group(1), m.group(2))
-
-
-class MarkdownWithMath(mistune.Markdown):
-    def __init__(self, renderer, **kwargs):
-        if "inline" not in kwargs:
-            kwargs["inline"] = MathInlineLexer
-        if "block" not in kwargs:
-            kwargs["block"] = MathBlockLexer
-        super().__init__(renderer, **kwargs)
-
-    def output_multiline_math(self):
-        return self.inline(self.token["text"])
-
-
-class IPythonRenderer(mistune.Renderer):
-    def block_code(self, code, lang):
-        if lang:
+    ) + InlineParser.RULE_NAMES
+
+    def parse_block_math_tex(self, m, state):
+        # sometimes the Scanner keeps the final '$$', so we use the
+        # full matched string and remove the math markers
+        text = m.group(0)[2:-2]
+        return "block_math", text
+
+    def parse_block_math_latex(self, m, state):
+        text = m.group(1)
+        return "block_math", text
+
+    def parse_inline_math_tex(self, m, state):
+        text = m.group(1)
+        return "inline_math", text
+
+    def parse_inline_math_latex(self, m, state):
+        text = m.group(1)
+        return "inline_math", text
+
+    def parse_latex_environment(self, m, state):
+        name, text = m.group(1), m.group(2)
+        return "latex_environment", name, text
+
+
+class MarkdownWithMath(Markdown):
+    def __init__(self, renderer, block=None, inline=None, plugins=None):
+        if block is None:
+            block = MathBlockParser()
+        if inline is None:
+            inline = MathInlineParser(renderer, hard_wrap=False)
+        super().__init__(renderer, block, inline, plugins)
+
+    def render(self, s):
+        """Compatibility method with `mistune==0.8.4`."""
+        return self.parse(s)
+
+
+class IPythonRenderer(HTMLRenderer):
+    def __init__(
+        self,
+        escape=True,
+        allow_harmful_protocols=True,
+        embed_images=False,
+        exclude_anchor_links=False,
+        anchor_link_text="¶",
+        path="",
+        attachments=None,
+    ):
+        super().__init__(escape, allow_harmful_protocols)
+        self.embed_images = embed_images
+        self.exclude_anchor_links = exclude_anchor_links
+        self.anchor_link_text = anchor_link_text
+        self.path = path
+        if attachments is not None:
+            self.attachments = attachments
+        else:
+            self.attachments = {}
+
+    def block_code(self, code, info=None):
+        if info:
             try:
+                lang = info.strip().split(None, 1)[0]
                 lexer = get_lexer_by_name(lang, stripall=True)
             except ClassNotFound:
                 code = lang + "\n" + code
                 lang = None
 
         if not lang:
-            return "\n<pre><code>%s</code></pre>\n" % mistune.escape(code)
+            return super().block_code(code)
 
         formatter = HtmlFormatter()
         return highlight(code, lexer, formatter)
 
     def block_html(self, html):
-        embed_images = self.options.get("embed_images", False)
-
-        if embed_images:
+        if self.embed_images:
             html = self._html_embed_images(html)
 
         return super().block_html(html)
 
     def inline_html(self, html):
-        embed_images = self.options.get("embed_images", False)
-
-        if embed_images:
+        if self.embed_images:
             html = self._html_embed_images(html)
 
         return super().inline_html(html)
 
-    def header(self, text, level, raw=None):
-        html = super().header(text, level, raw=raw)
-        if self.options.get("exclude_anchor_links"):
+    def heading(self, text, level):
+        html = super().heading(text, level)
+        if self.exclude_anchor_links:
             return html
-        anchor_link_text = self.options.get("anchor_link_text", "¶")
-        return add_anchor(html, anchor_link_text=anchor_link_text)
+        return add_anchor(html, anchor_link_text=self.anchor_link_text)
 
     def escape_html(self, text):
         return html_escape(text)
 
+    def multiline_math(self, text):
+        return text
+
     def block_math(self, text):
-        return "$$%s$$" % self.escape_html(text)
+        return f"$${self.escape_html(text)}$$"
 
     def latex_environment(self, name, text):
-        name = self.escape_html(name)
-        text = self.escape_html(text)
-        return rf"\begin{{{name}}}{text}\end{{{name}}}"
+        name, text = self.escape_html(name), self.escape_html(text)
+        return f"\\begin{{{name}}}{text}\\end{{{name}}}"
 
     def inline_math(self, text):
-        return "$%s$" % self.escape_html(text)
+        return f"${self.escape_html(text)}$"
 
-    def image(self, src, title, text):
+    def image(self, src, text, title):
         """Rendering a image with title and text.
 
         :param src: source link of the image.
-        :param title: title text of the image.
         :param text: alt text of the image.
+        :param title: title text of the image.
         """
-        attachments = self.options.get("attachments", {})
         attachment_prefix = "attachment:"
-        embed_images = self.options.get("embed_images", False)
 
         if src.startswith(attachment_prefix):
             name = src[len(attachment_prefix) :]
 
-            if name not in attachments:
+            if name not in self.attachments:
                 raise InvalidNotebook(f"missing attachment: {name}")
 
-            attachment = attachments[name]
+            attachment = self.attachments[name]
             # we choose vector over raster, and lossless over lossy
             preferred_mime_types = ["image/svg+xml", "image/png", "image/jpeg"]
             for preferred_mime_type in preferred_mime_types:
@@ -197,13 +222,13 @@ class IPythonRenderer(mistune.Renderer):
             data = attachment[mime_type]
             src = "data:" + mime_type + ";base64," + data
 
-        elif embed_images:
+        elif self.embed_images:
             base64_url = self._src_to_base64(src)
 
             if base64_url is not None:
                 src = base64_url
 
-        return super().image(src, title, text)
+        return super().image(src, text, title)
 
     def _src_to_base64(self, src):
         """Turn the source file into a base64 url.
@@ -211,8 +236,7 @@ class IPythonRenderer(mistune.Renderer):
         :param src: source link of the file.
         :return: the base64 url or None if the file was not found.
         """
-        path = self.options.get("path", "")
-        src_path = os.path.join(path, src)
+        src_path = os.path.join(self.path, src)
 
         if not os.path.exists(src_path):
             return None
diff --git a/setup.py b/setup.py
index 7220a875..2dfa2534 100644
--- a/setup.py
+++ b/setup.py
@@ -245,7 +245,7 @@ setup_args["install_requires"] = [
     "jupyter_core>=4.7",
     "jupyterlab_pygments",
     "MarkupSafe>=2.0",
-    "mistune>=0.8.1,<2",
+    "mistune>=2.0.2",
     "nbclient>=0.5.0",
     "nbformat>=5.1",
     "packaging",
-- 
2.35.1