Commit 54ff87b7 authored by Lysander Trischler's avatar Lysander Trischler
Browse files

Don't let plain URLs end with commas or periods

parent 7ef24005
...@@ -217,6 +217,12 @@ class PlainLinkTest(unittest.TestCase): ...@@ -217,6 +217,12 @@ class PlainLinkTest(unittest.TestCase):
self.assertEqual(PlainLink(start_pos=0, end_pos=20, url="https://example.com/"), self.assertEqual(PlainLink(start_pos=0, end_pos=20, url="https://example.com/"),
_parse_plain_link("https://example.com/)", 0)) _parse_plain_link("https://example.com/)", 0))
def test_period_and_comma_at_the_end_are_not_part_of_the_url(self):
self.assertEqual(PlainLink(start_pos=0, end_pos=28, url="https://example.com/test.txt"),
_parse_plain_link("https://example.com/test.txt.", 0))
self.assertEqual(PlainLink(start_pos=0, end_pos=28, url="https://example.com/test.txt"),
_parse_plain_link("https://example.com/test.txt,", 0))
class ParseTwtTextTest(unittest.TestCase): class ParseTwtTextTest(unittest.TestCase):
......
...@@ -170,6 +170,12 @@ def _parse_plain_link(text, start_pos): ...@@ -170,6 +170,12 @@ def _parse_plain_link(text, start_pos):
closing_parenthesis_pos = text.find(")", start_pos + 7, end_pos) closing_parenthesis_pos = text.find(")", start_pos + 7, end_pos)
if closing_parenthesis_pos > -1: if closing_parenthesis_pos > -1:
end_pos = closing_parenthesis_pos end_pos = closing_parenthesis_pos
# URLs probably don't end in periods or commas, they're most likely just
# punctuation in the sentence, so strip them
while text[end_pos - 1] in ('.', ','):
end_pos -= 1
return PlainLink(start_pos=start_pos, end_pos=end_pos, url=text[start_pos:end_pos]) return PlainLink(start_pos=start_pos, end_pos=end_pos, url=text[start_pos:end_pos])
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment