Skip to content
GitLab
Menu
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Lyse
tt
Commits
e24dd938
Commit
e24dd938
authored
Dec 14, 2020
by
Lysander Trischler
Browse files
Refactor code structure
parent
e84cca8e
Changes
8
Hide whitespace changes
Inline
Side-by-side
test_twtxthash.py
0 → 100644
View file @
e24dd938
import
datetime
import
twtxt.models
import
unittest
from
twtxthash
import
create_hash
,
create_old_hash
UTC
=
datetime
.
timezone
.
utc
CET
=
datetime
.
timezone
(
datetime
.
timedelta
(
hours
=
1
))
class
CreateHashTest
(
unittest
.
TestCase
):
def
test_old_timestamp_format
(
self
):
self
.
assertEqual
(
"sqwl3la"
,
create_old_hash
(
twtxt
.
models
.
Tweet
(
created_at
=
datetime
.
datetime
(
2020
,
12
,
6
,
20
,
20
,
35
,
tzinfo
=
CET
),
text
=
"This is a test tweet for testing."
,
source
=
twtxt
.
models
.
Source
(
nick
=
"nick"
,
url
=
"http://0.0.0.0:8000/user/lyse/twtxt.txt"
))))
def
test_rfc3339_timestamp_with_milliseconds_precision_is_truncated_to_seconds_precision
(
self
):
self
.
assertEqual
(
"74qtyjq"
,
create_hash
(
twtxt
.
models
.
Tweet
(
created_at
=
datetime
.
datetime
(
2020
,
12
,
9
,
15
,
38
,
42
,
123
,
tzinfo
=
UTC
),
text
=
"The twt hash now uses the RFC 3339 timestamp format."
,
source
=
twtxt
.
models
.
Source
(
nick
=
"nick"
,
url
=
"https://example.com/twtxt.txt"
))))
def
test_rfc3339_timestamp_with_millisecomds_precision_is_truncated_to_seconds_precision_without_rounding
(
self
):
self
.
assertEqual
(
"74qtyjq"
,
create_hash
(
twtxt
.
models
.
Tweet
(
created_at
=
datetime
.
datetime
(
2020
,
12
,
9
,
15
,
38
,
42
,
999
,
tzinfo
=
UTC
),
text
=
"The twt hash now uses the RFC 3339 timestamp format."
,
source
=
twtxt
.
models
.
Source
(
nick
=
"nick"
,
url
=
"https://example.com/twtxt.txt"
))))
def
test_rfc3339_timestamp_with_seconds_precision_and_utc_plus_1_offset_is_kept_intact
(
self
):
self
.
assertEqual
(
"64u2m5a"
,
create_hash
(
twtxt
.
models
.
Tweet
(
created_at
=
datetime
.
datetime
(
2020
,
12
,
9
,
16
,
38
,
42
,
tzinfo
=
CET
),
text
=
"The twt hash now uses the RFC 3339 timestamp format."
,
source
=
twtxt
.
models
.
Source
(
nick
=
"nick"
,
url
=
"https://example.com/twtxt.txt"
))))
def
test_rfc3339_timestamp_with_minutes_precision_is_expanded_to_seconds_precision
(
self
):
self
.
assertEqual
(
"a3c3k5q"
,
create_hash
(
twtxt
.
models
.
Tweet
(
created_at
=
datetime
.
datetime
(
2020
,
12
,
9
,
16
,
38
,
0
,
tzinfo
=
CET
),
text
=
"The twt hash now uses the RFC 3339 timestamp format."
,
source
=
twtxt
.
models
.
Source
(
nick
=
"nick"
,
url
=
"https://example.com/twtxt.txt"
))))
def
test_rfc3339_timestamp_with_utc_is_rendered_as_designated_zulu_offset_rather_than_numeric_offset
(
self
):
self
.
assertEqual
(
"74qtyjq"
,
create_hash
(
twtxt
.
models
.
Tweet
(
created_at
=
datetime
.
datetime
(
2020
,
12
,
9
,
15
,
38
,
42
,
tzinfo
=
UTC
),
text
=
"The twt hash now uses the RFC 3339 timestamp format."
,
source
=
twtxt
.
models
.
Source
(
nick
=
"nick"
,
url
=
"https://example.com/twtxt.txt"
))))
self
.
assertEqual
(
"74qtyjq"
,
create_hash
(
twtxt
.
models
.
Tweet
(
created_at
=
datetime
.
datetime
(
2020
,
12
,
9
,
15
,
38
,
42
,
tzinfo
=
datetime
.
timezone
(
datetime
.
timedelta
(
hours
=-
0
))),
text
=
"The twt hash now uses the RFC 3339 timestamp format."
,
source
=
twtxt
.
models
.
Source
(
nick
=
"nick"
,
url
=
"https://example.com/twtxt.txt"
))))
def
test_rfc3339_timestamp_without_explicit_timezone_information_is_assumed_to_be_in_utc
(
self
):
self
.
assertEqual
(
"74qtyjq"
,
create_hash
(
twtxt
.
models
.
Tweet
(
created_at
=
datetime
.
datetime
(
2020
,
12
,
9
,
15
,
38
,
42
),
text
=
"The twt hash now uses the RFC 3339 timestamp format."
,
source
=
twtxt
.
models
.
Source
(
nick
=
"nick"
,
url
=
"https://example.com/twtxt.txt"
))))
if
__name__
==
"__main__"
:
unittest
.
main
()
test_twtxtparser.py
0 → 100644
View file @
e24dd938
import
unittest
from
twtxtparser
import
(
_find_whitespace
,
_extract_between
,
_parse_mention
,
_parse_subject_hash
,
_parse_subject
,
_parse_markdown_link
,
_parse_plain_link
,
parse_twt_text
,
Mention
,
SubjectHash
,
TwtxtLink
,
MarkdownLink
,
PlainLink
,
Text
,
)
class
FindWhitespaceTest
(
unittest
.
TestCase
):
def
test_no_whitespace
(
self
):
self
.
assertEqual
(
0
,
_find_whitespace
(
''
,
0
))
self
.
assertEqual
(
1
,
_find_whitespace
(
'a'
,
0
))
self
.
assertEqual
(
3
,
_find_whitespace
(
' ab'
,
1
))
def
test_whitespace
(
self
):
self
.
assertEqual
(
1
,
_find_whitespace
(
'a '
,
0
))
self
.
assertEqual
(
2
,
_find_whitespace
(
'ab '
,
0
))
self
.
assertEqual
(
2
,
_find_whitespace
(
'ab cd'
,
0
))
self
.
assertEqual
(
7
,
_find_whitespace
(
'ab cdef gh'
,
3
))
class
ExtractBetweenTest
(
unittest
.
TestCase
):
def
test_closing_not_found
(
self
):
self
.
assertEqual
((
None
,
-
1
),
_extract_between
(
"(missing parenthesis"
,
0
,
None
,
"("
,
")"
))
self
.
assertEqual
((
None
,
-
1
),
_extract_between
(
"(closing parenthesis)( before start_pos"
,
21
,
None
,
"("
,
")"
))
self
.
assertEqual
((
None
,
-
1
),
_extract_between
(
"(closing parenthesis) after end_pos"
,
0
,
20
,
"("
,
")"
))
def
test_extract_data
(
self
):
self
.
assertEqual
((
"foo"
,
5
),
_extract_between
(
"(foo)bar"
,
0
,
None
,
"("
,
")"
))
self
.
assertEqual
((
"foo"
,
7
),
_extract_between
(
"({foo})bar"
,
0
,
None
,
"({"
,
"})"
))
self
.
assertEqual
((
"foo"
,
7
),
_extract_between
(
"({foo})"
,
0
,
None
,
"({"
,
"})"
))
class
ParseMentionTest
(
unittest
.
TestCase
):
def
test_missing_closing_angle_bracket
(
self
):
self
.
assertIsNone
(
_parse_mention
(
"@<missing bracket"
,
0
))
self
.
assertIsNone
(
_parse_mention
(
"@<missing bracket]})"
,
0
))
def
test_nick_and_url
(
self
):
self
.
assertEqual
(
Mention
(
start_pos
=
0
,
end_pos
=
37
,
nick
=
"nick"
,
url
=
"https://example.com/twtxt.txt"
),
_parse_mention
(
"@<nick https://example.com/twtxt.txt>"
,
0
))
self
.
assertEqual
(
Mention
(
start_pos
=
0
,
end_pos
=
37
,
nick
=
"nick"
,
url
=
"https://example.com/twtxt.txt"
),
_parse_mention
(
"@<nick https://example.com/twtxt.txt> more text"
,
0
))
self
.
assertEqual
(
Mention
(
start_pos
=
0
,
end_pos
=
43
,
nick
=
"nick"
,
url
=
"https://example.com/twtxt.txt"
),
_parse_mention
(
"@< nick https://example.com/twtxt.txt > more text"
,
0
))
def
test_only_nick
(
self
):
self
.
assertEqual
(
Mention
(
start_pos
=
0
,
end_pos
=
7
,
nick
=
"nick"
,
url
=
None
),
_parse_mention
(
"@<nick>"
,
0
))
self
.
assertEqual
(
Mention
(
start_pos
=
0
,
end_pos
=
7
,
nick
=
"nick"
,
url
=
None
),
_parse_mention
(
"@<nick> more text"
,
0
))
self
.
assertEqual
(
Mention
(
start_pos
=
0
,
end_pos
=
11
,
nick
=
"nick"
,
url
=
None
),
_parse_mention
(
"@< nick > more text"
,
0
))
def
test_only_url
(
self
):
self
.
assertEqual
(
Mention
(
start_pos
=
0
,
end_pos
=
32
,
nick
=
None
,
url
=
"https://example.com/twtxt.txt"
),
_parse_mention
(
"@<https://example.com/twtxt.txt>"
,
0
))
self
.
assertEqual
(
Mention
(
start_pos
=
0
,
end_pos
=
32
,
nick
=
None
,
url
=
"https://example.com/twtxt.txt"
),
_parse_mention
(
"@<https://example.com/twtxt.txt> more text"
,
0
))
self
.
assertEqual
(
Mention
(
start_pos
=
0
,
end_pos
=
36
,
nick
=
None
,
url
=
"https://example.com/twtxt.txt"
),
_parse_mention
(
"@< https://example.com/twtxt.txt > more text"
,
0
))
class
ParseSubjectHashTest
(
unittest
.
TestCase
):
def
test_missing_closing_angle_bracket
(
self
):
self
.
assertIsNone
(
_parse_subject_hash
(
"#<abcdefg missing-bracket"
,
0
))
self
.
assertIsNone
(
_parse_subject_hash
(
"#<abcdefg missing-bracket]})"
,
0
))
def
test_non_7_chars_hash
(
self
):
self
.
assertEqual
(
SubjectHash
(
start_pos
=
0
,
end_pos
=
30
,
hash
=
"123456"
,
url
=
"https://example.com/"
),
_parse_subject_hash
(
"#<123456 https://example.com/>"
,
0
))
self
.
assertEqual
(
SubjectHash
(
start_pos
=
0
,
end_pos
=
9
,
hash
=
"123456"
,
url
=
None
),
_parse_subject_hash
(
"#<123456>"
,
0
))
self
.
assertEqual
(
SubjectHash
(
start_pos
=
0
,
end_pos
=
32
,
hash
=
"12345678"
,
url
=
"https://example.com/"
),
_parse_subject_hash
(
"#<12345678 https://example.com/>"
,
0
))
self
.
assertEqual
(
SubjectHash
(
start_pos
=
0
,
end_pos
=
11
,
hash
=
"12345678"
,
url
=
None
),
_parse_subject_hash
(
"#<12345678>"
,
0
))
def
test_hash_and_url
(
self
):
self
.
assertEqual
(
SubjectHash
(
start_pos
=
0
,
end_pos
=
31
,
hash
=
"abcd567"
,
url
=
"https://example.com/"
),
_parse_subject_hash
(
"#<abcd567 https://example.com/>"
,
0
))
self
.
assertEqual
(
SubjectHash
(
start_pos
=
0
,
end_pos
=
31
,
hash
=
"abcd567"
,
url
=
"https://example.com/"
),
_parse_subject_hash
(
"#<abcd567 https://example.com/> more text"
,
0
))
self
.
assertEqual
(
SubjectHash
(
start_pos
=
0
,
end_pos
=
36
,
hash
=
"abcd567"
,
url
=
"https://example.com/"
),
_parse_subject_hash
(
"#< abcd567 https://example.com/ > more text"
,
0
))
def
test_only_hash
(
self
):
self
.
assertEqual
(
SubjectHash
(
start_pos
=
0
,
end_pos
=
10
,
hash
=
"abcd567"
,
url
=
None
),
_parse_subject_hash
(
"#<abcd567>"
,
0
))
self
.
assertEqual
(
SubjectHash
(
start_pos
=
0
,
end_pos
=
10
,
hash
=
"abcd567"
,
url
=
None
),
_parse_subject_hash
(
"#<abcd567> more text"
,
0
))
self
.
assertEqual
(
SubjectHash
(
start_pos
=
0
,
end_pos
=
13
,
hash
=
"abcd567"
,
url
=
None
),
_parse_subject_hash
(
"#< abcd567 > more text"
,
0
))
def
test_only_url
(
self
):
self
.
assertEqual
(
SubjectHash
(
start_pos
=
0
,
end_pos
=
23
,
hash
=
None
,
url
=
"https://example.com/"
),
_parse_subject_hash
(
"#<https://example.com/>"
,
0
))
self
.
assertEqual
(
SubjectHash
(
start_pos
=
0
,
end_pos
=
23
,
hash
=
None
,
url
=
"https://example.com/"
),
_parse_subject_hash
(
"#<https://example.com/> more text"
,
0
))
self
.
assertEqual
(
SubjectHash
(
start_pos
=
0
,
end_pos
=
26
,
hash
=
None
,
url
=
"https://example.com/"
),
_parse_subject_hash
(
"#< https://example.com/ > more text"
,
0
))
class
ParseSubjectTest
(
unittest
.
TestCase
):
def
test_missing_closing_angle_bracket
(
self
):
self
.
assertIsNone
(
_parse_subject
(
"(#<abcdefg missing-bracket>"
,
0
))
self
.
assertIsNone
(
_parse_subject
(
"(#<abcdefg missing-bracket>]}"
,
0
))
self
.
assertIsNone
(
_parse_subject
(
"(#<abcdefg missing-bracket]}"
,
0
))
self
.
assertIsNone
(
_parse_subject
(
"(#<abcdefg missing-bracket]})"
,
0
))
def
test_other_text_after_subject_hash
(
self
):
self
.
assertIsNone
(
_parse_subject
(
"(#<abcd567 https://example.com/> a)"
,
0
))
def
test_hash_and_url
(
self
):
self
.
assertEqual
(
SubjectHash
(
start_pos
=
0
,
end_pos
=
33
,
hash
=
"abcd567"
,
url
=
"https://example.com/"
),
_parse_subject
(
"(#<abcd567 https://example.com/>)"
,
0
))
self
.
assertEqual
(
SubjectHash
(
start_pos
=
0
,
end_pos
=
33
,
hash
=
"abcd567"
,
url
=
"https://example.com/"
),
_parse_subject
(
"(#<abcd567 https://example.com/>) more text"
,
0
))
self
.
assertEqual
(
SubjectHash
(
start_pos
=
0
,
end_pos
=
36
,
hash
=
"abcd567"
,
url
=
"https://example.com/"
),
_parse_subject
(
"( #<abcd567 https://example.com/> ) more text"
,
0
))
self
.
assertEqual
(
SubjectHash
(
start_pos
=
0
,
end_pos
=
44
,
hash
=
"abcd567"
,
url
=
"https://example.com/"
),
_parse_subject
(
"( #< abcd567 https://example.com/ > ) more text"
,
0
))
def
test_only_hash
(
self
):
self
.
assertEqual
(
SubjectHash
(
start_pos
=
0
,
end_pos
=
12
,
hash
=
"abcd567"
,
url
=
None
),
_parse_subject
(
"(#<abcd567>)"
,
0
))
self
.
assertEqual
(
SubjectHash
(
start_pos
=
0
,
end_pos
=
12
,
hash
=
"abcd567"
,
url
=
None
),
_parse_subject
(
"(#<abcd567>) more text"
,
0
))
self
.
assertEqual
(
SubjectHash
(
start_pos
=
0
,
end_pos
=
15
,
hash
=
"abcd567"
,
url
=
None
),
_parse_subject
(
"( #<abcd567> ) more text"
,
0
))
self
.
assertEqual
(
SubjectHash
(
start_pos
=
0
,
end_pos
=
18
,
hash
=
"abcd567"
,
url
=
None
),
_parse_subject
(
"( #< abcd567 > ) more text"
,
0
))
def
test_only_url
(
self
):
self
.
assertEqual
(
SubjectHash
(
start_pos
=
0
,
end_pos
=
25
,
hash
=
None
,
url
=
"https://example.com/"
),
_parse_subject
(
"(#<https://example.com/>)"
,
0
))
self
.
assertEqual
(
SubjectHash
(
start_pos
=
0
,
end_pos
=
25
,
hash
=
None
,
url
=
"https://example.com/"
),
_parse_subject
(
"(#<https://example.com/>) more text"
,
0
))
self
.
assertEqual
(
SubjectHash
(
start_pos
=
0
,
end_pos
=
28
,
hash
=
None
,
url
=
"https://example.com/"
),
_parse_subject
(
"( #<https://example.com/> ) more text"
,
0
))
self
.
assertEqual
(
SubjectHash
(
start_pos
=
0
,
end_pos
=
31
,
hash
=
None
,
url
=
"https://example.com/"
),
_parse_subject
(
"( #< https://example.com/ > ) more text"
,
0
))
class
ParseMarkdownLink
(
unittest
.
TestCase
):
def
test_missing_closing_closing_bracket
(
self
):
self
.
assertIsNone
(
_parse_markdown_link
(
"[link title(https://example.com)"
,
0
))
self
.
assertIsNone
(
_parse_markdown_link
(
"[link title"
,
0
))
def
test_missing_opening_parenthesis
(
self
):
self
.
assertIsNone
(
_parse_markdown_link
(
"[link title]https://example.com)"
,
0
))
self
.
assertIsNone
(
_parse_markdown_link
(
"[link title]"
,
0
))
def
test_missing_closing_parenthesis
(
self
):
self
.
assertIsNone
(
_parse_markdown_link
(
"[link title](https://example.com"
,
0
))
self
.
assertIsNone
(
_parse_markdown_link
(
"[link title]("
,
0
))
def
test_missing_closing_parenthesis_or_after_text
(
self
):
self
.
assertIsNone
(
_parse_markdown_link
(
"[link title](https://example.com more)"
,
0
))
self
.
assertIsNone
(
_parse_markdown_link
(
"[link title](https://example.com more"
,
0
))
def
test_chars_between_closing_bracket_and_opening_parenthesis
(
self
):
self
.
assertIsNone
(
_parse_markdown_link
(
"[link title] (https://example.com)"
,
0
))
self
.
assertIsNone
(
_parse_markdown_link
(
"[link title]a(https://example.com)"
,
0
))
def
test_markdown_link
(
self
):
self
.
assertEqual
(
MarkdownLink
(
start_pos
=
0
,
end_pos
=
34
,
title
=
"link title"
,
url
=
"https://example.com/"
),
_parse_markdown_link
(
"[link title](https://example.com/)"
,
0
))
self
.
assertEqual
(
MarkdownLink
(
start_pos
=
0
,
end_pos
=
34
,
title
=
"link title"
,
url
=
"https://example.com/"
),
_parse_markdown_link
(
"[link title](https://example.com/) more text"
,
0
))
self
.
assertEqual
(
MarkdownLink
(
start_pos
=
0
,
end_pos
=
29
,
title
=
"title"
,
url
=
"https://example.com/"
),
_parse_markdown_link
(
"[title](https://example.com/)"
,
0
))
self
.
assertEqual
(
MarkdownLink
(
start_pos
=
0
,
end_pos
=
29
,
title
=
"title"
,
url
=
"https://example.com/"
),
_parse_markdown_link
(
"[title](https://example.com/) more text"
,
0
))
class
PlainLinkTest
(
unittest
.
TestCase
):
def
test_http_link
(
self
):
self
.
assertEqual
(
PlainLink
(
start_pos
=
0
,
end_pos
=
19
,
url
=
"http://example.com/"
),
_parse_plain_link
(
"http://example.com/"
,
0
))
self
.
assertEqual
(
PlainLink
(
start_pos
=
0
,
end_pos
=
19
,
url
=
"http://example.com/"
),
_parse_plain_link
(
"http://example.com/ more text"
,
0
))
def
test_https_link
(
self
):
self
.
assertEqual
(
PlainLink
(
start_pos
=
0
,
end_pos
=
20
,
url
=
"https://example.com/"
),
_parse_plain_link
(
"https://example.com/"
,
0
))
self
.
assertEqual
(
PlainLink
(
start_pos
=
0
,
end_pos
=
20
,
url
=
"https://example.com/"
),
_parse_plain_link
(
"https://example.com/ more text"
,
0
))
def
test_closing_parenthesis_at_the_end_is_not_part_of_url
(
self
):
self
.
assertEqual
(
PlainLink
(
start_pos
=
0
,
end_pos
=
20
,
url
=
"https://example.com/"
),
_parse_plain_link
(
"https://example.com/)"
,
0
))
class
ParseTwtTextTest
(
unittest
.
TestCase
):
def
check
(
self
,
text
,
*
expected_tokens
):
actual
=
tuple
(
parse_twt_text
(
text
))
self
.
assertEqual
(
expected_tokens
,
actual
)
def
test_only_text
(
self
):
self
.
check
(
"Hello world."
,
Text
(
start_pos
=
0
,
end_pos
=
12
,
text
=
"Hello world."
))
def
test_only_mention
(
self
):
self
.
check
(
"@<hugo https://example.com/twtxt.txt>"
,
Mention
(
start_pos
=
0
,
end_pos
=
37
,
nick
=
"hugo"
,
url
=
"https://example.com/twtxt.txt"
))
self
.
check
(
"@<hugo>"
,
Mention
(
start_pos
=
0
,
end_pos
=
7
,
nick
=
"hugo"
,
url
=
None
))
self
.
check
(
"@<https://example.com/twtxt.txt>"
,
Mention
(
start_pos
=
0
,
end_pos
=
32
,
nick
=
None
,
url
=
"https://example.com/twtxt.txt"
))
def
test_text_and_mention
(
self
):
self
.
check
(
"Hello world. @<hugo https://example.com/twtxt.txt>"
,
Text
(
start_pos
=
0
,
end_pos
=
13
,
text
=
"Hello world. "
),
Mention
(
start_pos
=
13
,
end_pos
=
50
,
nick
=
"hugo"
,
url
=
"https://example.com/twtxt.txt"
))
def
test_only_subject
(
self
):
self
.
check
(
"(#<123defg https://example.com/>)"
,
SubjectHash
(
start_pos
=
0
,
end_pos
=
33
,
hash
=
"123defg"
,
url
=
"https://example.com/"
))
self
.
check
(
"(#<123defg>)"
,
SubjectHash
(
start_pos
=
0
,
end_pos
=
12
,
hash
=
"123defg"
,
url
=
None
))
self
.
check
(
"(#<https://example.com/>)"
,
SubjectHash
(
start_pos
=
0
,
end_pos
=
25
,
hash
=
None
,
url
=
"https://example.com/"
))
def
test_subject_with_text_after_subject_hash_is_text_token_and_twtxt_link_and_text
(
self
):
self
.
check
(
"(#<123defg https://example.com/> some text)"
,
Text
(
start_pos
=
0
,
end_pos
=
1
,
text
=
"("
),
TwtxtLink
(
start_pos
=
1
,
end_pos
=
32
,
title
=
"123defg"
,
url
=
"https://example.com/"
),
Text
(
start_pos
=
32
,
end_pos
=
43
,
text
=
" some text)"
))
def
test_subject_text_and_mention_and_text
(
self
):
self
.
check
(
"(#<abc4567 https://example.com/>) @<hugo https://example.com/twtxt.txt> Great!"
,
SubjectHash
(
start_pos
=
0
,
end_pos
=
33
,
hash
=
"abc4567"
,
url
=
"https://example.com/"
),
Text
(
start_pos
=
33
,
end_pos
=
34
,
text
=
" "
),
Mention
(
start_pos
=
34
,
end_pos
=
71
,
nick
=
"hugo"
,
url
=
"https://example.com/twtxt.txt"
),
Text
(
start_pos
=
71
,
end_pos
=
78
,
text
=
" Great!"
))
def
test_mention_and_text_and_mention_and_subject_and_text_and_mention_and_text
(
self
):
self
.
check
(
"@<hugo https://example.com/hugo> @<joe> (#<1234567 https://example.com/>) "
"@<https://example.com/kate> This is wonderful."
,
Mention
(
start_pos
=
0
,
end_pos
=
32
,
nick
=
"hugo"
,
url
=
"https://example.com/hugo"
),
Text
(
start_pos
=
32
,
end_pos
=
33
,
text
=
" "
),
Mention
(
start_pos
=
33
,
end_pos
=
39
,
nick
=
"joe"
,
url
=
None
),
Text
(
start_pos
=
39
,
end_pos
=
40
,
text
=
" "
),
SubjectHash
(
start_pos
=
40
,
end_pos
=
73
,
hash
=
"1234567"
,
url
=
"https://example.com/"
),
Text
(
start_pos
=
73
,
end_pos
=
74
,
text
=
" "
),
Mention
(
start_pos
=
74
,
end_pos
=
101
,
nick
=
None
,
url
=
"https://example.com/kate"
),
Text
(
start_pos
=
101
,
end_pos
=
120
,
text
=
" This is wonderful."
))
def
test_only_twtxt_link
(
self
):
self
.
check
(
"#<eggs https://example.com/eggs>"
,
TwtxtLink
(
start_pos
=
0
,
end_pos
=
32
,
title
=
"eggs"
,
url
=
"https://example.com/eggs"
))
def
test_only_markdown_link
(
self
):
self
.
check
(
"[link title](https://example.com/)"
,
MarkdownLink
(
start_pos
=
0
,
end_pos
=
34
,
title
=
"link title"
,
url
=
"https://example.com/"
))
def
test_mention_text_subject_text_mention_text_markdown_link_text_mention_text_mention
(
self
):
self
.
check
(
"@<hugo https://example.com/hugo> (#<1234567 https://example.com/>) "
"What a great thing I've discovered: [Check this out](https://example.com/), "
"@<kate> and @<joe>"
,
Mention
(
start_pos
=
0
,
end_pos
=
32
,
nick
=
"hugo"
,
url
=
"https://example.com/hugo"
),
Text
(
start_pos
=
32
,
end_pos
=
33
,
text
=
" "
),
SubjectHash
(
start_pos
=
33
,
end_pos
=
66
,
hash
=
"1234567"
,
url
=
"https://example.com/"
),
Text
(
start_pos
=
66
,
end_pos
=
103
,
text
=
" What a great thing I've discovered: "
),
MarkdownLink
(
start_pos
=
103
,
end_pos
=
141
,
title
=
"Check this out"
,
url
=
"https://example.com/"
),
Text
(
start_pos
=
141
,
end_pos
=
143
,
text
=
", "
),
Mention
(
start_pos
=
143
,
end_pos
=
150
,
nick
=
"kate"
,
url
=
None
),
Text
(
start_pos
=
150
,
end_pos
=
155
,
text
=
" and "
),
Mention
(
start_pos
=
155
,
end_pos
=
161
,
nick
=
"joe"
,
url
=
None
))
def
test_input_crashing_parser
(
self
):
self
.
check
(
"Over two days, the upload feature is busy. "
"!(https://twtxt.net/media/KuGwrmFJvpahhaJJsFPXKK) No comments in console."
,
Text
(
start_pos
=
0
,
end_pos
=
45
,
text
=
"Over two days, the upload feature is busy. !("
),
PlainLink
(
start_pos
=
45
,
end_pos
=
91
,
url
=
"https://twtxt.net/media/KuGwrmFJvpahhaJJsFPXKK"
),
Text
(
start_pos
=
91
,
end_pos
=
116
,
text
=
") No comments in console."
))
def
test_only_plain_link
(
self
):
self
.
check
(
"https://example.com/"
,
PlainLink
(
start_pos
=
0
,
end_pos
=
20
,
url
=
"https://example.com/"
))
def
test_mention_plain_link_text
(
self
):
self
.
check
(
"@<hugo https://example.com/hugo>https://example.com/foo/bar crazy!!1"
,
Mention
(
start_pos
=
0
,
end_pos
=
32
,
nick
=
"hugo"
,
url
=
"https://example.com/hugo"
),
PlainLink
(
start_pos
=
32
,
end_pos
=
59
,
url
=
"https://example.com/foo/bar"
),
Text
(
start_pos
=
59
,
end_pos
=
68
,
text
=
" crazy!!1"
))
if
__name__
==
"__main__"
:
unittest
.
main
()
twtxt
test_t
est
.py
→
test_t
wtxtrenderer
.py
View file @
e24dd938
import
datetime
import
twtxt
import
unittest
import
pytz
from
twtxttest
import
create_hash
,
create_old_hash
,
normalize_whitespace_markup
UTC
=
datetime
.
timezone
.
utc
CET
=
datetime
.
timezone
(
datetime
.
timedelta
(
hours
=
1
))
class
CreateHashTest
(
unittest
.
TestCase
):
def
test_old_timestamp_format
(
self
):
self
.
assertEqual
(
"sqwl3la"
,
create_old_hash
(
twtxt
.
models
.
Tweet
(
created_at
=
datetime
.
datetime
(
2020
,
12
,
6
,
20
,
20
,
35
,
tzinfo
=
CET
),
text
=
"This is a test tweet for testing."
,
source
=
twtxt
.
models
.
Source
(
nick
=
"nick"
,
url
=
"http://0.0.0.0:8000/user/lyse/twtxt.txt"
))))
def
test_rfc3339_timestamp_with_milliseconds_precision_is_truncated_to_seconds_precision
(
self
):
self
.
assertEqual
(
"74qtyjq"
,
create_hash
(
twtxt
.
models
.
Tweet
(
created_at
=
datetime
.
datetime
(
2020
,
12
,
9
,
15
,
38
,
42
,
123
,
tzinfo
=
UTC
),
text
=
"The twt hash now uses the RFC 3339 timestamp format."
,
source
=
twtxt
.
models
.
Source
(
nick
=
"nick"
,
url
=
"https://example.com/twtxt.txt"
))))
def
test_rfc3339_timestamp_with_millisecomds_precision_is_truncated_to_seconds_precision_without_rounding
(
self
):
self
.
assertEqual
(
"74qtyjq"
,
create_hash
(
twtxt
.
models
.
Tweet
(
created_at
=
datetime
.
datetime
(
2020
,
12
,
9
,
15
,
38
,
42
,
999
,
tzinfo
=
UTC
),
text
=
"The twt hash now uses the RFC 3339 timestamp format."
,
source
=
twtxt
.
models
.
Source
(
nick
=
"nick"
,
url
=
"https://example.com/twtxt.txt"
))))
def
test_rfc3339_timestamp_with_seconds_precision_and_utc_plus_1_offset_is_kept_intact
(
self
):
self
.
assertEqual
(
"64u2m5a"
,
create_hash
(
twtxt
.
models
.
Tweet
(
created_at
=
datetime
.
datetime
(
2020
,
12
,
9
,
16
,
38
,
42
,
tzinfo
=
CET
),
text
=
"The twt hash now uses the RFC 3339 timestamp format."
,
source
=
twtxt
.
models
.
Source
(
nick
=
"nick"
,
url
=
"https://example.com/twtxt.txt"
))))
def
test_rfc3339_timestamp_with_minutes_precision_is_expanded_to_seconds_precision
(
self
):
self
.
assertEqual
(
"a3c3k5q"
,
create_hash
(
twtxt
.
models
.
Tweet
(
created_at
=
datetime
.
datetime
(
2020
,
12
,
9
,
16
,
38
,
0
,
tzinfo
=
CET
),
text
=
"The twt hash now uses the RFC 3339 timestamp format."
,
source
=
twtxt
.
models
.
Source
(
nick
=
"nick"
,
url
=
"https://example.com/twtxt.txt"
))))
def
test_rfc3339_timestamp_with_utc_is_rendered_as_designated_zulu_offset_rather_than_numeric_offset
(
self
):
self
.
assertEqual
(
"74qtyjq"
,
create_hash
(
twtxt
.
models
.
Tweet
(
created_at
=
datetime
.
datetime
(
2020
,
12
,
9
,
15
,
38
,
42
,
tzinfo
=
UTC
),
text
=
"The twt hash now uses the RFC 3339 timestamp format."
,
source
=
twtxt
.
models
.
Source
(
nick
=
"nick"
,
url
=
"https://example.com/twtxt.txt"
))))
self
.
assertEqual
(
"74qtyjq"
,
create_hash
(
twtxt
.
models
.
Tweet
(
created_at
=
datetime
.
datetime
(
2020
,
12
,
9
,
15
,
38
,
42
,
tzinfo
=
datetime
.
timezone
(
datetime
.
timedelta
(
hours
=-
0
))),
text
=
"The twt hash now uses the RFC 3339 timestamp format."
,
source
=
twtxt
.
models
.
Source
(
nick
=
"nick"
,
url
=
"https://example.com/twtxt.txt"
))))
def
test_rfc3339_timestamp_without_explicit_timezone_information_is_assumed_to_be_in_utc
(
self
):
self
.
assertEqual
(
"74qtyjq"
,
create_hash
(
twtxt
.
models
.
Tweet
(
created_at
=
datetime
.
datetime
(
2020
,
12
,
9
,
15
,
38
,
42
),
text
=
"The twt hash now uses the RFC 3339 timestamp format."
,
source
=
twtxt
.
models
.
Source
(
nick
=
"nick"
,
url
=
"https://example.com/twtxt.txt"
))))
from
twtxtrenderer
import
normalize_whitespace_markup
class
NormalizeWhitespaceMarkupTest
(
unittest
.
TestCase
):
...
...
@@ -146,3 +91,4 @@ class NormalizeWhitespaceMarkupTest(unittest.TestCase):
if
__name__
==
"__main__"
:
unittest
.
main
()
twtxthash.py
0 → 100644
View file @
e24dd938
"""
Create hashes for twts.
"""
import
datetime
import
base64
import
hashlib
_tz_abbr_cache
=
{
"+0000"
:
"UTC"
,
"+0100"
:
"CET"
,
"+0200"
:
"CEST"
,
"-0400"
:
"-0400"
,
"-0500"
:
"-0500"
,
}
def
create_hash
(
twt
):
"""
Create the hash of the given twt as specified in the Twt Hash Extension:
https://dev.twtxt.net/doc/twthashextension.html
"""
# (All comments in this function are taken from the aforementioned
# specification. Comments in parenthesis are mine, including this one.)
# If the timestamp does not explicitly include any timezone information, it
# must be assumed to be in UTC.
if
twt
.
created_at
.
tzinfo
is
None
:
tz_aware_created_at
=
twt
.
created_at
.
replace
(
tzinfo
=
datetime
.
timezone
.
utc
)
else
:
tz_aware_created_at
=
twt
.
created_at
# All timezones representing UTC must be formatted using the designated
# Zulu indicator 'Z' rather than the numeric offsets '+00:00' or '-00:00'.
# (RFC 3339 permits and special cases '-00:00', however, it looks like it's
# not possible to create a negative zero offset in Python.)
created_at
=
tz_aware_created_at
.
isoformat
().
replace
(
"+00:00"
,
"Z"
)
# Each twt’s hash is calculated using its author, timestamp and contents.
# The author feed URL, RFC 3339 formatted timestamp and twt text are joined
# with line feeds:
payload
=
"%s
\n
%s
\n
%s"
%
(
twt
.
source
.
url
,
created_at
,
twt
.
text
)
# This UTF-8 encoded string is Blake2b hashed with 256 bits…
# (256 bits are 32 bytes)
sum256
=
hashlib
.
blake2b
(
payload
.
encode
(
"utf-8"
),
digest_size
=
32
).
digest
()
# …and Base32 encoded…
# (It's a silly interface, so we actually have to make a real string out of
# this ASCII byte string before further processing it any further.)
hash
=
base64
.
b32encode
(
sum256
).
decode
(
"ascii"
)
# …without padding.
# (unfortunately there's no better way to this)
hash
=
hash
.
replace
(
"="
,
""
)
# After converting to lower case the last seven characters make up the twt
# hash.
# (In fact I reckon it's a tiny bit faster to create a substring first and
# only then lower case seven letters than doing it the other way around as
# the spec tells us to do. :-))
return
hash
[
-
7
:].
lower
()
def
create_old_hash
(
twt
):
"""
Create the hash of the given tweet according to prologic's blog post which
is used to form conversations:
https://twtxt.net/blog/prologic/2020/10/18/making-twtxt-better
"""
created_at
=
twt
.
created_at
.
strftime
(
"%Y-%m-%d %H:%M:%S %z"
)
tz
=
created_at
[
20
:]
tz_abbr
=
_tz_abbr_cache
.
get
(
tz
)
if
tz_abbr
is
None
:
print
(
"ERROR: no entry for TZ offset %s"
%
tz
)
created_at
+=
" %s"
%
tz_abbr
payload
=
"%s
\n
%s
\n
%s"
%
(
twt
.
source
.
url
,
created_at
,
twt
.
text
)
sum256
=
hashlib
.
blake2b
(
payload
.
encode
(
"utf-8"
),
digest_size
=
32
).
digest
()
# 32 bytes are 256 bits
hash
=
base64
.
b32encode
(
sum256
).
decode
(
"ascii"
)
# thank you for the ASCII bytes…
hash
=
hash
.
replace
(
"="
,
""
)
# no padding
return
hash
[
-
7
:].
lower
()
twtxtparser.py
View file @
e24dd938
...
...
@@ -199,299 +199,3 @@ def parse_twt_text(text):
pos
+=
1
yield
from
create_text_token
()
if
__name__
==
"__main__"
:
import
unittest
class
FindWhitespaceTest
(
unittest
.
TestCase
):
def
test_no_whitespace
(
self
):
self
.
assertEqual
(
0
,
_find_whitespace
(
''
,
0
))
self
.
assertEqual
(
1
,
_find_whitespace
(
'a'
,
0
))
self
.
assertEqual
(
3
,
_find_whitespace
(
' ab'
,
1
))
def
test_whitespace
(
self
):
self
.
assertEqual
(
1
,
_find_whitespace
(
'a '
,
0
))
self
.
assertEqual
(
2
,
_find_whitespace
(
'ab '
,
0
))
self
.
assertEqual
(
2
,
_find_whitespace
(
'ab cd'
,
0
))
self
.
assertEqual
(
7
,
_find_whitespace
(
'ab cdef gh'
,
3
))
class
ExtractBetweenTest
(
unittest
.
TestCase
):
def
test_closing_not_found
(
self
):
self
.
assertEqual
((
None
,
-
1
),
_extract_between
(
"(missing parenthesis"
,
0
,
None
,
"("
,
")"
))
self
.
assertEqual
((
None
,
-
1
),
_extract_between
(
"(closing parenthesis)( before start_pos"
,
21
,
None
,
"("
,
")"
))
self
.
assertEqual
((
None
,
-
1
),
_extract_between
(
"(closing parenthesis) after end_pos"
,
0
,
20
,
"("
,
")"
))
def
test_extract_data
(
self
):
self
.
assertEqual
((
"foo"
,
5
),
_extract_between
(
"(foo)bar"
,
0
,
None
,
"("
,
")"
))
self
.
assertEqual
((
"foo"
,
7
),
_extract_between
(
"({foo})bar"
,
0
,
None
,
"({"
,
"})"
))
self
.
assertEqual
((
"foo"
,
7
),
_extract_between
(
"({foo})"
,
0
,
None
,
"({"
,
"})"
))
class
ParseMentionTest
(
unittest
.
TestCase
):
def
test_missing_closing_angle_bracket
(
self
):
self
.
assertIsNone
(
_parse_mention
(
"@<missing bracket"
,
0
))
self
.
assertIsNone
(
_parse_mention
(
"@<missing bracket]})"
,
0
))
def
test_nick_and_url
(
self
):
self
.
assertEqual
(
Mention
(
start_pos
=
0
,
end_pos
=
37
,
nick
=
"nick"
,
url
=
"https://example.com/twtxt.txt"
),
_parse_mention
(
"@<nick https://example.com/twtxt.txt>"
,
0
))
self
.
assertEqual
(
Mention
(
start_pos
=
0
,
end_pos
=
37
,
nick
=
"nick"
,
url
=
"https://example.com/twtxt.txt"
),
_parse_mention
(
"@<nick https://example.com/twtxt.txt> more text"
,
0
))
self
.
assertEqual
(
Mention
(
start_pos
=
0
,
end_pos
=
43
,
nick
=
"nick"
,
url
=
"https://example.com/twtxt.txt"
),
_parse_mention
(
"@< nick https://example.com/twtxt.txt > more text"
,
0
))
def
test_only_nick
(
self
):
self
.
assertEqual
(
Mention
(
start_pos
=
0
,
end_pos
=
7
,
nick
=
"nick"
,
url
=
None
),
_parse_mention
(
"@<nick>"
,
0
))
self
.
assertEqual
(
Mention
(
start_pos
=
0
,
end_pos
=
7
,
nick
=
"nick"
,
url
=
None
),
_parse_mention
(
"@<nick> more text"
,
0
))
self
.
assertEqual
(
Mention
(
start_pos
=
0
,
end_pos
=
11
,
nick
=
"nick"
,
url
=
None
),