Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Lyse
tt
Commits
87209450
Commit
87209450
authored
Dec 22, 2020
by
Lysander Trischler
Browse files
Try to extract subject hash from URL if missing
parent
7af10503
Changes
2
Hide whitespace changes
Inline
Side-by-side
test_twtxtparser.py
View file @
87209450
...
...
@@ -157,6 +157,14 @@ class ParseSubjectTest(unittest.TestCase):
self
.
assertEqual
(
SubjectHash
(
start_pos
=
0
,
end_pos
=
31
,
hash
=
None
,
url
=
"https://example.com/"
),
_parse_subject
(
"( #< https://example.com/ > ) more text"
,
0
))
def
test_only_url_hash_extracted_from_url
(
self
):
self
.
assertEqual
(
SubjectHash
(
start_pos
=
0
,
end_pos
=
34
,
hash
=
"hfp2hca"
,
url
=
"https://twtxt.net/twt/hfp2hca"
),
_parse_subject
(
"(#<https://twtxt.net/twt/hfp2hca>)"
,
0
))
self
.
assertEqual
(
SubjectHash
(
start_pos
=
0
,
end_pos
=
35
,
hash
=
"hfp2hca"
,
url
=
"https://twtxt.net/conv/hfp2hca"
),
_parse_subject
(
"(#<https://twtxt.net/conv/hfp2hca>)"
,
0
))
self
.
assertEqual
(
SubjectHash
(
start_pos
=
0
,
end_pos
=
43
,
hash
=
"5jqioeq"
,
url
=
"https://txt.sour.is/search?tag=5jqioeq"
),
_parse_subject
(
"(#<https://txt.sour.is/search?tag=5jqioeq>)"
,
0
))
class
ParseMarkdownLink
(
unittest
.
TestCase
):
...
...
twtxtparser.py
View file @
87209450
...
...
@@ -23,6 +23,7 @@ Known problems and limitations:
"""
import
collections
import
re
def
_token_class
(
name
,
*
fields
):
...
...
@@ -106,6 +107,8 @@ def _parse_mention(text, start_pos):
return
__parse_text_and_url_in_brackets
(
text
,
start_pos
,
None
,
"@<"
,
">"
,
Mention
)
_TWT_HASH_RE
=
re
.
compile
(
r
"^[a-z0-9]{7}$"
)
def
_parse_subject
(
text
,
start_pos
):
data
,
closing_pos
=
_extract_between
(
text
,
start_pos
,
None
,
"("
,
")"
)
if
data
is
None
:
...
...
@@ -122,7 +125,14 @@ def _parse_subject(text, start_pos):
# treat this as a subject to be safe…'
return
None
return
SubjectHash
(
start_pos
,
closing_pos
,
subject_hash
.
hash
,
subject_hash
.
url
)
hash
=
subject_hash
.
hash
if
not
hash
:
# try to extract the hash from the last part of the URL
hash
=
subject_hash
.
url
[
-
7
:]
match
=
_TWT_HASH_RE
.
match
(
hash
)
if
not
match
or
subject_hash
.
url
[
-
8
]
not
in
(
"/"
,
"="
):
hash
=
None
return
SubjectHash
(
start_pos
,
closing_pos
,
hash
,
subject_hash
.
url
)
def
_parse_subject_hash
(
text
,
start_pos
,
end_pos
=
None
):
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment