Commit 87209450 authored by Lysander Trischler's avatar Lysander Trischler
Browse files

Try to extract subject hash from URL if missing

parent 7af10503
......@@ -157,6 +157,14 @@ class ParseSubjectTest(unittest.TestCase):
self.assertEqual(SubjectHash(start_pos=0, end_pos=31, hash=None, url="https://example.com/"),
_parse_subject("( #< https://example.com/ > ) more text", 0))
def test_only_url_hash_extracted_from_url(self):
self.assertEqual(SubjectHash(start_pos=0, end_pos=34, hash="hfp2hca", url="https://twtxt.net/twt/hfp2hca"),
_parse_subject("(#<https://twtxt.net/twt/hfp2hca>)", 0))
self.assertEqual(SubjectHash(start_pos=0, end_pos=35, hash="hfp2hca", url="https://twtxt.net/conv/hfp2hca"),
_parse_subject("(#<https://twtxt.net/conv/hfp2hca>)", 0))
self.assertEqual(SubjectHash(start_pos=0, end_pos=43, hash="5jqioeq", url="https://txt.sour.is/search?tag=5jqioeq"),
_parse_subject("(#<https://txt.sour.is/search?tag=5jqioeq>)", 0))
class ParseMarkdownLink(unittest.TestCase):
......
......@@ -23,6 +23,7 @@ Known problems and limitations:
"""
import collections
import re
def _token_class(name, *fields):
......@@ -106,6 +107,8 @@ def _parse_mention(text, start_pos):
return __parse_text_and_url_in_brackets(text, start_pos, None, "@<", ">", Mention)
_TWT_HASH_RE = re.compile(r"^[a-z0-9]{7}$")
def _parse_subject(text, start_pos):
data, closing_pos = _extract_between(text, start_pos, None, "(", ")")
if data is None:
......@@ -122,7 +125,14 @@ def _parse_subject(text, start_pos):
# treat this as a subject to be safe…'
return None
return SubjectHash(start_pos, closing_pos, subject_hash.hash, subject_hash.url)
hash = subject_hash.hash
if not hash:
# try to extract the hash from the last part of the URL
hash = subject_hash.url[-7:]
match = _TWT_HASH_RE.match(hash)
if not match or subject_hash.url[-8] not in ("/", "="):
hash = None
return SubjectHash(start_pos, closing_pos, hash, subject_hash.url)
def _parse_subject_hash(text, start_pos, end_pos=None):
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment