Skip to content
This repository was archived by the owner on Apr 4, 2023. It is now read-only.

Commit 7b901fa

Browse files
Merge pull request #519 from eregs/remove-stray-unescape
Remove unnecessary unescape step
2 parents bcbe66e + 797fc99 commit 7b901fa

2 files changed

Lines changed: 42 additions & 8 deletions

File tree

regulations/generator/layers/layers_applier.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import re
22

33
from six.moves.queue import PriorityQueue
4-
from six.moves.html_parser import HTMLParser
54

65
from regulations.generator.layers.location_replace import LocationReplace
76

@@ -29,11 +28,6 @@ def location_replace(self, xml_node, original, replacement, locations):
2928
LocationReplace().location_replace(xml_node, original, replacement,
3029
locations)
3130

32-
def unescape_text(self):
33-
""" Because of the way we do replace_all(), we need to unescape HTML
34-
entities. """
35-
self.text = HTMLParser().unescape(self.text)
36-
3731
def replace_all(self, original, replacement):
3832
""" Replace all occurrences of original with replacement. This is HTML
3933
aware; it effectively looks at all of the text in between HTML tags"""
@@ -46,7 +40,6 @@ def replace_all(self, original, replacement):
4640
index = match.end()
4741
text_chunks.append(self.text[index:]) # trailing text
4842
self.text = "".join(text_chunks)
49-
self.unescape_text()
5043

5144
def replace_at(self, original, replacement, locations):
5245
""" Replace the occurrences of original at all the locations with
@@ -55,7 +48,6 @@ def replace_at(self, original, replacement, locations):
5548
locations.sort()
5649
self.text = LocationReplace().location_replace_text(
5750
self.text, original, replacement, locations)
58-
self.unescape_text()
5951

6052
def apply_layers(self, original_text):
6153
self.text = original_text

regulations/tests/layers_appliers_test.py

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,3 +122,45 @@ def test_replace_skip_location(self):
122122
"law. </dfn> state law. <dfn> <a href=\"link_url\">state"
123123
"</a> liability. </dfn>")
124124
self.assertEquals(applier.text, result)
125+
126+
def test_apply_layers(self):
127+
# Tests same as above but from one level out.
128+
original = 'state'
129+
replacement = '<a href="link_url">state</a>'
130+
locations = [0, 2]
131+
text = ("<em>(6)</em> <dfn> Under state law. </dfn> state "
132+
"law. <dfn> state liability. </dfn>")
133+
134+
applier = layers_applier.LayersApplier()
135+
applier.enqueue((original, replacement, locations))
136+
applier.apply_layers(text)
137+
138+
result = ("<em>(6)</em> <dfn> Under <a href=\"link_url\">state</a> "
139+
"law. </dfn> state law. <dfn> <a href=\"link_url\">state"
140+
"</a> liability. </dfn>")
141+
self.assertEquals(applier.text, result)
142+
143+
def test_apply_layers_escaping(self):
144+
# See https://github.com/eregs/regulations-site/issues/514 and
145+
# https://github.com/fecgov/fec-eregs/issues/382
146+
#
147+
# It appears that we had a holdover step of unescaping that, thanks to
148+
# looser interpretations in Python 3.6 (specifically, ``&sec`` was
149+
# treated as a valid escape even without a trailing semicolon) started
150+
# breaking links that have a ``&section`` parameter.
151+
original = 'state'
152+
replacement = '<a href="link_url">state</a>'
153+
locations = [0, 2]
154+
text = ("<em>(6)</em> <dfn> Under state law. </dfn> state "
155+
"law. <dfn> state liability. </dfn>"
156+
"<a href='http://example.org?one=1&section2'>test</a>")
157+
158+
applier = layers_applier.LayersApplier()
159+
applier.enqueue((original, replacement, locations))
160+
applier.apply_layers(text)
161+
162+
result = ("<em>(6)</em> <dfn> Under <a href=\"link_url\">state</a> "
163+
"law. </dfn> state law. <dfn> <a href=\"link_url\">state"
164+
"</a> liability. </dfn>"
165+
"<a href='http://example.org?one=1&section2'>test</a>")
166+
self.assertEquals(applier.text, result)

0 commit comments

Comments
 (0)