summaryrefslogtreecommitdiff
path: root/dev-python/html5lib/files/html5lib-0.9999999-lxml-3.5.0-backport.patch
blob: fecfab9a4fb4fe11e306afe45e111a92d45bc7e3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
From 46046c0f7125911ff8205f09a7574573bb953105 Mon Sep 17 00:00:00 2001
From: Geoffrey Sneddon <geoffers@gmail.com>
Date: Mon, 23 Nov 2015 15:17:07 +0000
Subject: [PATCH 1/3] Make lxml tree-builder coerce comments to work with lxml
 3.5.

---
 html5lib/ihatexml.py                | 2 ++
 html5lib/treebuilders/etree_lxml.py | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/html5lib/ihatexml.py b/html5lib/ihatexml.py
index 0fc7930..b5b2e98 100644
--- a/html5lib/ihatexml.py
+++ b/html5lib/ihatexml.py
@@ -225,6 +225,8 @@ def coerceComment(self, data):
             while "--" in data:
                 warnings.warn("Comments cannot contain adjacent dashes", DataLossWarning)
                 data = data.replace("--", "- -")
+            if data.endswith("-"):
+                data += " "
         return data
 
     def coerceCharacters(self, data):
diff --git a/html5lib/treebuilders/etree_lxml.py b/html5lib/treebuilders/etree_lxml.py
index 35d08ef..17007e3 100644
--- a/html5lib/treebuilders/etree_lxml.py
+++ b/html5lib/treebuilders/etree_lxml.py
@@ -189,7 +189,7 @@ class TreeBuilder(_base.TreeBuilder):
 
     def __init__(self, namespaceHTMLElements, fullTree=False):
         builder = etree_builders.getETreeModule(etree, fullTree=fullTree)
-        infosetFilter = self.infosetFilter = ihatexml.InfosetFilter()
+        infosetFilter = self.infosetFilter = ihatexml.InfosetFilter(preventDoubleDashComments=True)
         self.namespaceHTMLElements = namespaceHTMLElements
 
         class Attributes(dict):

From 1c22e1ce93dd4acc81a66cfa03cf9720fbd741c7 Mon Sep 17 00:00:00 2001
From: Geoffrey Sneddon <geoffers@gmail.com>
Date: Mon, 23 Nov 2015 15:35:21 +0000
Subject: [PATCH 2/3] fixup! Make lxml tree-builder coerce comments to work
 with lxml 3.5.

---
 html5lib/ihatexml.py                | 1 +
 html5lib/treebuilders/etree_lxml.py | 7 ++++---
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/html5lib/ihatexml.py b/html5lib/ihatexml.py
index b5b2e98..5a81a12 100644
--- a/html5lib/ihatexml.py
+++ b/html5lib/ihatexml.py
@@ -226,6 +226,7 @@ def coerceComment(self, data):
                 warnings.warn("Comments cannot contain adjacent dashes", DataLossWarning)
                 data = data.replace("--", "- -")
             if data.endswith("-"):
+                warnings.warn("Comments cannot contain end in a dash", DataLossWarning)
                 data += " "
         return data
 
diff --git a/html5lib/treebuilders/etree_lxml.py b/html5lib/treebuilders/etree_lxml.py
index 17007e3..c6c981f 100644
--- a/html5lib/treebuilders/etree_lxml.py
+++ b/html5lib/treebuilders/etree_lxml.py
@@ -54,7 +54,7 @@ def _getChildNodes(self):
 def testSerializer(element):
     rv = []
     finalText = None
-    infosetFilter = ihatexml.InfosetFilter()
+    infosetFilter = ihatexml.InfosetFilter(preventDoubleDashComments=True)
 
     def serializeElement(element, indent=0):
         if not hasattr(element, "tag"):
@@ -257,7 +257,7 @@ def _getData(self):
             data = property(_getData, _setData)
 
         self.elementClass = Element
-        self.commentClass = builder.Comment
+        self.commentClass = Comment
         # self.fragmentClass = builder.DocumentFragment
         _base.TreeBuilder.__init__(self, namespaceHTMLElements)
 
@@ -344,7 +344,8 @@ def insertRoot(self, token):
 
         # Append the initial comments:
         for comment_token in self.initial_comments:
-            root.addprevious(etree.Comment(comment_token["data"]))
+            comment = self.commentClass(comment_token["data"])
+            root.addprevious(comment._element)
 
         # Create the root document and add the ElementTree to it
         self.document = self.documentClass()

From 235a6d7ac7e0a3e2b431766e051094c2d3110ba3 Mon Sep 17 00:00:00 2001
From: Geoffrey Sneddon <geoffers@gmail.com>
Date: Mon, 23 Nov 2015 15:42:12 +0000
Subject: [PATCH 3/3] fixup! Make lxml tree-builder coerce comments to work
 with lxml 3.5.

---
 html5lib/ihatexml.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/html5lib/ihatexml.py b/html5lib/ihatexml.py
index 5a81a12..5da5d93 100644
--- a/html5lib/ihatexml.py
+++ b/html5lib/ihatexml.py
@@ -226,7 +226,7 @@ def coerceComment(self, data):
                 warnings.warn("Comments cannot contain adjacent dashes", DataLossWarning)
                 data = data.replace("--", "- -")
             if data.endswith("-"):
-                warnings.warn("Comments cannot contain end in a dash", DataLossWarning)
+                warnings.warn("Comments cannot end in a dash", DataLossWarning)
                 data += " "
         return data