Commit e24dd938 authored by Lysander Trischler's avatar Lysander Trischler

Refactor code structure

parent e84cca8e
import datetime
import twtxt.models
import unittest
from twtxthash import create_hash, create_old_hash
UTC = datetime.timezone.utc
CET = datetime.timezone(datetime.timedelta(hours=1))
class CreateHashTest(unittest.TestCase):
def test_old_timestamp_format(self):
self.assertEqual("sqwl3la", create_old_hash(twtxt.models.Tweet(
created_at=datetime.datetime(2020, 12, 6, 20, 20, 35, tzinfo=CET),
text="This is a test tweet for testing.",
source=twtxt.models.Source(nick="nick", url="http://0.0.0.0:8000/user/lyse/twtxt.txt"))))
def test_rfc3339_timestamp_with_milliseconds_precision_is_truncated_to_seconds_precision(self):
self.assertEqual("74qtyjq", create_hash(twtxt.models.Tweet(
created_at=datetime.datetime(2020, 12, 9, 15, 38, 42, 123, tzinfo=UTC),
text="The twt hash now uses the RFC 3339 timestamp format.",
source=twtxt.models.Source(nick="nick", url="https://example.com/twtxt.txt"))))
def test_rfc3339_timestamp_with_millisecomds_precision_is_truncated_to_seconds_precision_without_rounding(self):
self.assertEqual("74qtyjq", create_hash(twtxt.models.Tweet(
created_at=datetime.datetime(2020, 12, 9, 15, 38, 42, 999, tzinfo=UTC),
text="The twt hash now uses the RFC 3339 timestamp format.",
source=twtxt.models.Source(nick="nick", url="https://example.com/twtxt.txt"))))
def test_rfc3339_timestamp_with_seconds_precision_and_utc_plus_1_offset_is_kept_intact(self):
self.assertEqual("64u2m5a", create_hash(twtxt.models.Tweet(
created_at=datetime.datetime(2020, 12, 9, 16, 38, 42, tzinfo=CET),
text="The twt hash now uses the RFC 3339 timestamp format.",
source=twtxt.models.Source(nick="nick", url="https://example.com/twtxt.txt"))))
def test_rfc3339_timestamp_with_minutes_precision_is_expanded_to_seconds_precision(self):
self.assertEqual("a3c3k5q", create_hash(twtxt.models.Tweet(
created_at=datetime.datetime(2020, 12, 9, 16, 38, 0, tzinfo=CET),
text="The twt hash now uses the RFC 3339 timestamp format.",
source=twtxt.models.Source(nick="nick", url="https://example.com/twtxt.txt"))))
def test_rfc3339_timestamp_with_utc_is_rendered_as_designated_zulu_offset_rather_than_numeric_offset(self):
self.assertEqual("74qtyjq", create_hash(twtxt.models.Tweet(
created_at=datetime.datetime(2020, 12, 9, 15, 38, 42, tzinfo=UTC),
text="The twt hash now uses the RFC 3339 timestamp format.",
source=twtxt.models.Source(nick="nick", url="https://example.com/twtxt.txt"))))
self.assertEqual("74qtyjq", create_hash(twtxt.models.Tweet(
created_at=datetime.datetime(2020, 12, 9, 15, 38, 42, tzinfo=datetime.timezone(datetime.timedelta(hours=-0))),
text="The twt hash now uses the RFC 3339 timestamp format.",
source=twtxt.models.Source(nick="nick", url="https://example.com/twtxt.txt"))))
def test_rfc3339_timestamp_without_explicit_timezone_information_is_assumed_to_be_in_utc(self):
self.assertEqual("74qtyjq", create_hash(twtxt.models.Tweet(
created_at=datetime.datetime(2020, 12, 9, 15, 38, 42),
text="The twt hash now uses the RFC 3339 timestamp format.",
source=twtxt.models.Source(nick="nick", url="https://example.com/twtxt.txt"))))
if __name__ == "__main__":
unittest.main()
import unittest
from twtxtparser import (
_find_whitespace,
_extract_between,
_parse_mention,
_parse_subject_hash,
_parse_subject,
_parse_markdown_link,
_parse_plain_link,
parse_twt_text,
Mention,
SubjectHash,
TwtxtLink,
MarkdownLink,
PlainLink,
Text,
)
class FindWhitespaceTest(unittest.TestCase):
def test_no_whitespace(self):
self.assertEqual(0, _find_whitespace('', 0))
self.assertEqual(1, _find_whitespace('a', 0))
self.assertEqual(3, _find_whitespace(' ab', 1))
def test_whitespace(self):
self.assertEqual(1, _find_whitespace('a ', 0))
self.assertEqual(2, _find_whitespace('ab ', 0))
self.assertEqual(2, _find_whitespace('ab cd', 0))
self.assertEqual(7, _find_whitespace('ab cdef gh', 3))
class ExtractBetweenTest(unittest.TestCase):
def test_closing_not_found(self):
self.assertEqual((None, -1), _extract_between("(missing parenthesis", 0, None, "(", ")"))
self.assertEqual((None, -1), _extract_between("(closing parenthesis)( before start_pos", 21, None, "(", ")"))
self.assertEqual((None, -1), _extract_between("(closing parenthesis) after end_pos", 0, 20, "(", ")"))
def test_extract_data(self):
self.assertEqual(("foo", 5), _extract_between("(foo)bar", 0, None, "(", ")"))
self.assertEqual(("foo", 7), _extract_between("({foo})bar", 0, None, "({", "})"))
self.assertEqual(("foo", 7), _extract_between("({foo})", 0, None, "({", "})"))
class ParseMentionTest(unittest.TestCase):
def test_missing_closing_angle_bracket(self):
self.assertIsNone(_parse_mention("@<missing bracket", 0))
self.assertIsNone(_parse_mention("@<missing bracket]})", 0))
def test_nick_and_url(self):
self.assertEqual(Mention(start_pos=0, end_pos=37, nick="nick", url="https://example.com/twtxt.txt"),
_parse_mention("@<nick https://example.com/twtxt.txt>", 0))
self.assertEqual(Mention(start_pos=0, end_pos=37, nick="nick", url="https://example.com/twtxt.txt"),
_parse_mention("@<nick https://example.com/twtxt.txt> more text", 0))
self.assertEqual(Mention(start_pos=0, end_pos=43, nick="nick", url="https://example.com/twtxt.txt"),
_parse_mention("@< nick https://example.com/twtxt.txt > more text", 0))
def test_only_nick(self):
self.assertEqual(Mention(start_pos=0, end_pos=7, nick="nick", url=None),
_parse_mention("@<nick>", 0))
self.assertEqual(Mention(start_pos=0, end_pos=7, nick="nick", url=None),
_parse_mention("@<nick> more text", 0))
self.assertEqual(Mention(start_pos=0, end_pos=11, nick="nick", url=None),
_parse_mention("@< nick > more text", 0))
def test_only_url(self):
self.assertEqual(Mention(start_pos=0, end_pos=32, nick=None, url="https://example.com/twtxt.txt"),
_parse_mention("@<https://example.com/twtxt.txt>", 0))
self.assertEqual(Mention(start_pos=0, end_pos=32, nick=None, url="https://example.com/twtxt.txt"),
_parse_mention("@<https://example.com/twtxt.txt> more text", 0))
self.assertEqual(Mention(start_pos=0, end_pos=36, nick=None, url="https://example.com/twtxt.txt"),
_parse_mention("@< https://example.com/twtxt.txt > more text", 0))
class ParseSubjectHashTest(unittest.TestCase):
def test_missing_closing_angle_bracket(self):
self.assertIsNone(_parse_subject_hash("#<abcdefg missing-bracket", 0))
self.assertIsNone(_parse_subject_hash("#<abcdefg missing-bracket]})", 0))
def test_non_7_chars_hash(self):
self.assertEqual(SubjectHash(start_pos=0, end_pos=30, hash="123456", url="https://example.com/"),
_parse_subject_hash("#<123456 https://example.com/>", 0))
self.assertEqual(SubjectHash(start_pos=0, end_pos=9, hash="123456", url=None),
_parse_subject_hash("#<123456>", 0))
self.assertEqual(SubjectHash(start_pos=0, end_pos=32, hash="12345678", url="https://example.com/"),
_parse_subject_hash("#<12345678 https://example.com/>", 0))
self.assertEqual(SubjectHash(start_pos=0, end_pos=11, hash="12345678", url=None),
_parse_subject_hash("#<12345678>", 0))
def test_hash_and_url(self):
self.assertEqual(SubjectHash(start_pos=0, end_pos=31, hash="abcd567", url="https://example.com/"),
_parse_subject_hash("#<abcd567 https://example.com/>", 0))
self.assertEqual(SubjectHash(start_pos=0, end_pos=31, hash="abcd567", url="https://example.com/"),
_parse_subject_hash("#<abcd567 https://example.com/> more text", 0))
self.assertEqual(SubjectHash(start_pos=0, end_pos=36, hash="abcd567", url="https://example.com/"),
_parse_subject_hash("#< abcd567 https://example.com/ > more text", 0))
def test_only_hash(self):
self.assertEqual(SubjectHash(start_pos=0, end_pos=10, hash="abcd567", url=None),
_parse_subject_hash("#<abcd567>", 0))
self.assertEqual(SubjectHash(start_pos=0, end_pos=10, hash="abcd567", url=None),
_parse_subject_hash("#<abcd567> more text", 0))
self.assertEqual(SubjectHash(start_pos=0, end_pos=13, hash="abcd567", url=None),
_parse_subject_hash("#< abcd567 > more text", 0))
def test_only_url(self):
self.assertEqual(SubjectHash(start_pos=0, end_pos=23, hash=None, url="https://example.com/"),
_parse_subject_hash("#<https://example.com/>", 0))
self.assertEqual(SubjectHash(start_pos=0, end_pos=23, hash=None, url="https://example.com/"),
_parse_subject_hash("#<https://example.com/> more text", 0))
self.assertEqual(SubjectHash(start_pos=0, end_pos=26, hash=None, url="https://example.com/"),
_parse_subject_hash("#< https://example.com/ > more text", 0))
class ParseSubjectTest(unittest.TestCase):
def test_missing_closing_angle_bracket(self):
self.assertIsNone(_parse_subject("(#<abcdefg missing-bracket>", 0))
self.assertIsNone(_parse_subject("(#<abcdefg missing-bracket>]}", 0))
self.assertIsNone(_parse_subject("(#<abcdefg missing-bracket]}", 0))
self.assertIsNone(_parse_subject("(#<abcdefg missing-bracket]})", 0))
def test_other_text_after_subject_hash(self):
self.assertIsNone(_parse_subject("(#<abcd567 https://example.com/> a)", 0))
def test_hash_and_url(self):
self.assertEqual(SubjectHash(start_pos=0, end_pos=33, hash="abcd567", url="https://example.com/"),
_parse_subject("(#<abcd567 https://example.com/>)", 0))
self.assertEqual(SubjectHash(start_pos=0, end_pos=33, hash="abcd567", url="https://example.com/"),
_parse_subject("(#<abcd567 https://example.com/>) more text", 0))
self.assertEqual(SubjectHash(start_pos=0, end_pos=36, hash="abcd567", url="https://example.com/"),
_parse_subject("( #<abcd567 https://example.com/> ) more text", 0))
self.assertEqual(SubjectHash(start_pos=0, end_pos=44, hash="abcd567", url="https://example.com/"),
_parse_subject("( #< abcd567 https://example.com/ > ) more text", 0))
def test_only_hash(self):
self.assertEqual(SubjectHash(start_pos=0, end_pos=12, hash="abcd567", url=None),
_parse_subject("(#<abcd567>)", 0))
self.assertEqual(SubjectHash(start_pos=0, end_pos=12, hash="abcd567", url=None),
_parse_subject("(#<abcd567>) more text", 0))
self.assertEqual(SubjectHash(start_pos=0, end_pos=15, hash="abcd567", url=None),
_parse_subject("( #<abcd567> ) more text", 0))
self.assertEqual(SubjectHash(start_pos=0, end_pos=18, hash="abcd567", url=None),
_parse_subject("( #< abcd567 > ) more text", 0))
def test_only_url(self):
self.assertEqual(SubjectHash(start_pos=0, end_pos=25, hash=None, url="https://example.com/"),
_parse_subject("(#<https://example.com/>)", 0))
self.assertEqual(SubjectHash(start_pos=0, end_pos=25, hash=None, url="https://example.com/"),
_parse_subject("(#<https://example.com/>) more text", 0))
self.assertEqual(SubjectHash(start_pos=0, end_pos=28, hash=None, url="https://example.com/"),
_parse_subject("( #<https://example.com/> ) more text", 0))
self.assertEqual(SubjectHash(start_pos=0, end_pos=31, hash=None, url="https://example.com/"),
_parse_subject("( #< https://example.com/ > ) more text", 0))
class ParseMarkdownLink(unittest.TestCase):
def test_missing_closing_closing_bracket(self):
self.assertIsNone(_parse_markdown_link("[link title(https://example.com)", 0))
self.assertIsNone(_parse_markdown_link("[link title", 0))
def test_missing_opening_parenthesis(self):
self.assertIsNone(_parse_markdown_link("[link title]https://example.com)", 0))
self.assertIsNone(_parse_markdown_link("[link title]", 0))
def test_missing_closing_parenthesis(self):
self.assertIsNone(_parse_markdown_link("[link title](https://example.com", 0))
self.assertIsNone(_parse_markdown_link("[link title](", 0))
def test_missing_closing_parenthesis_or_after_text(self):
self.assertIsNone(_parse_markdown_link("[link title](https://example.com more)", 0))
self.assertIsNone(_parse_markdown_link("[link title](https://example.com more", 0))
def test_chars_between_closing_bracket_and_opening_parenthesis(self):
self.assertIsNone(_parse_markdown_link("[link title] (https://example.com)", 0))
self.assertIsNone(_parse_markdown_link("[link title]a(https://example.com)", 0))
def test_markdown_link(self):
self.assertEqual(MarkdownLink(start_pos=0, end_pos=34, title="link title", url="https://example.com/"),
_parse_markdown_link("[link title](https://example.com/)", 0))
self.assertEqual(MarkdownLink(start_pos=0, end_pos=34, title="link title", url="https://example.com/"),
_parse_markdown_link("[link title](https://example.com/) more text", 0))
self.assertEqual(MarkdownLink(start_pos=0, end_pos=29, title="title", url="https://example.com/"),
_parse_markdown_link("[title](https://example.com/)", 0))
self.assertEqual(MarkdownLink(start_pos=0, end_pos=29, title="title", url="https://example.com/"),
_parse_markdown_link("[title](https://example.com/) more text", 0))
class PlainLinkTest(unittest.TestCase):
def test_http_link(self):
self.assertEqual(PlainLink(start_pos=0, end_pos=19, url="http://example.com/"),
_parse_plain_link("http://example.com/", 0))
self.assertEqual(PlainLink(start_pos=0, end_pos=19, url="http://example.com/"),
_parse_plain_link("http://example.com/ more text", 0))
def test_https_link(self):
self.assertEqual(PlainLink(start_pos=0, end_pos=20, url="https://example.com/"),
_parse_plain_link("https://example.com/", 0))
self.assertEqual(PlainLink(start_pos=0, end_pos=20, url="https://example.com/"),
_parse_plain_link("https://example.com/ more text", 0))
def test_closing_parenthesis_at_the_end_is_not_part_of_url(self):
self.assertEqual(PlainLink(start_pos=0, end_pos=20, url="https://example.com/"),
_parse_plain_link("https://example.com/)", 0))
class ParseTwtTextTest(unittest.TestCase):
def check(self, text, *expected_tokens):
actual = tuple(parse_twt_text(text))
self.assertEqual(expected_tokens, actual)
def test_only_text(self):
self.check("Hello world.", Text(start_pos=0, end_pos=12, text="Hello world."))
def test_only_mention(self):
self.check("@<hugo https://example.com/twtxt.txt>",
Mention(start_pos=0, end_pos=37, nick="hugo", url="https://example.com/twtxt.txt"))
self.check("@<hugo>",
Mention(start_pos=0, end_pos=7, nick="hugo", url=None))
self.check("@<https://example.com/twtxt.txt>",
Mention(start_pos=0, end_pos=32, nick=None, url="https://example.com/twtxt.txt"))
def test_text_and_mention(self):
self.check("Hello world. @<hugo https://example.com/twtxt.txt>",
Text(start_pos=0, end_pos=13, text="Hello world. "),
Mention(start_pos=13, end_pos=50, nick="hugo", url="https://example.com/twtxt.txt"))
def test_only_subject(self):
self.check("(#<123defg https://example.com/>)",
SubjectHash(start_pos=0, end_pos=33, hash="123defg", url="https://example.com/"))
self.check("(#<123defg>)",
SubjectHash(start_pos=0, end_pos=12, hash="123defg", url=None))
self.check("(#<https://example.com/>)",
SubjectHash(start_pos=0, end_pos=25, hash=None, url="https://example.com/"))
def test_subject_with_text_after_subject_hash_is_text_token_and_twtxt_link_and_text(self):
self.check("(#<123defg https://example.com/> some text)",
Text(start_pos=0, end_pos=1, text="("),
TwtxtLink(start_pos=1, end_pos=32, title="123defg", url="https://example.com/"),
Text(start_pos=32, end_pos=43, text=" some text)"))
def test_subject_text_and_mention_and_text(self):
self.check("(#<abc4567 https://example.com/>) @<hugo https://example.com/twtxt.txt> Great!",
SubjectHash(start_pos=0, end_pos=33, hash="abc4567", url="https://example.com/"),
Text(start_pos=33, end_pos=34, text=" "),
Mention(start_pos=34, end_pos=71, nick="hugo", url="https://example.com/twtxt.txt"),
Text(start_pos=71, end_pos=78, text=" Great!"))
def test_mention_and_text_and_mention_and_subject_and_text_and_mention_and_text(self):
self.check("@<hugo https://example.com/hugo> @<joe> (#<1234567 https://example.com/>) "
"@<https://example.com/kate> This is wonderful.",
Mention(start_pos=0, end_pos=32, nick="hugo", url="https://example.com/hugo"),
Text(start_pos=32, end_pos=33, text=" "),
Mention(start_pos=33, end_pos=39, nick="joe", url=None),
Text(start_pos=39, end_pos=40, text=" "),
SubjectHash(start_pos=40, end_pos=73, hash="1234567", url="https://example.com/"),
Text(start_pos=73, end_pos=74, text=" "),
Mention(start_pos=74, end_pos=101, nick=None, url="https://example.com/kate"),
Text(start_pos=101, end_pos=120, text=" This is wonderful."))
def test_only_twtxt_link(self):
self.check("#<eggs https://example.com/eggs>",
TwtxtLink(start_pos=0, end_pos=32, title="eggs", url="https://example.com/eggs"))
def test_only_markdown_link(self):
self.check("[link title](https://example.com/)",
MarkdownLink(start_pos=0, end_pos=34, title="link title", url="https://example.com/"))
def test_mention_text_subject_text_mention_text_markdown_link_text_mention_text_mention(self):
self.check("@<hugo https://example.com/hugo> (#<1234567 https://example.com/>) "
"What a great thing I've discovered: [Check this out](https://example.com/), "
"@<kate> and @<joe>",
Mention(start_pos=0, end_pos=32, nick="hugo", url="https://example.com/hugo"),
Text(start_pos=32, end_pos=33, text=" "),
SubjectHash(start_pos=33, end_pos=66, hash="1234567", url="https://example.com/"),
Text(start_pos=66, end_pos=103, text=" What a great thing I've discovered: "),
MarkdownLink(start_pos=103, end_pos=141, title="Check this out", url="https://example.com/"),
Text(start_pos=141, end_pos=143, text=", "),
Mention(start_pos=143, end_pos=150, nick="kate", url=None),
Text(start_pos=150, end_pos=155, text=" and "),
Mention(start_pos=155, end_pos=161, nick="joe", url=None))
def test_input_crashing_parser(self):
self.check("Over two days, the upload feature is busy. "
"!(https://twtxt.net/media/KuGwrmFJvpahhaJJsFPXKK) No comments in console.",
Text(start_pos=0, end_pos=45, text="Over two days, the upload feature is busy. !("),
PlainLink(start_pos=45, end_pos=91, url="https://twtxt.net/media/KuGwrmFJvpahhaJJsFPXKK"),
Text(start_pos=91, end_pos=116, text=") No comments in console."))
def test_only_plain_link(self):
self.check("https://example.com/",
PlainLink(start_pos=0, end_pos=20, url="https://example.com/"))
def test_mention_plain_link_text(self):
self.check("@<hugo https://example.com/hugo>https://example.com/foo/bar crazy!!1",
Mention(start_pos=0, end_pos=32, nick="hugo", url="https://example.com/hugo"),
PlainLink(start_pos=32, end_pos=59, url="https://example.com/foo/bar"),
Text(start_pos=59, end_pos=68, text=" crazy!!1"))
if __name__ == "__main__":
unittest.main()
import datetime
import twtxt
import unittest
import pytz
from twtxttest import create_hash, create_old_hash, normalize_whitespace_markup
UTC = datetime.timezone.utc
CET = datetime.timezone(datetime.timedelta(hours=1))
class CreateHashTest(unittest.TestCase):
def test_old_timestamp_format(self):
self.assertEqual("sqwl3la", create_old_hash(twtxt.models.Tweet(
created_at=datetime.datetime(2020, 12, 6, 20, 20, 35, tzinfo=CET),
text="This is a test tweet for testing.",
source=twtxt.models.Source(nick="nick", url="http://0.0.0.0:8000/user/lyse/twtxt.txt"))))
def test_rfc3339_timestamp_with_milliseconds_precision_is_truncated_to_seconds_precision(self):
self.assertEqual("74qtyjq", create_hash(twtxt.models.Tweet(
created_at=datetime.datetime(2020, 12, 9, 15, 38, 42, 123, tzinfo=UTC),
text="The twt hash now uses the RFC 3339 timestamp format.",
source=twtxt.models.Source(nick="nick", url="https://example.com/twtxt.txt"))))
def test_rfc3339_timestamp_with_millisecomds_precision_is_truncated_to_seconds_precision_without_rounding(self):
self.assertEqual("74qtyjq", create_hash(twtxt.models.Tweet(
created_at=datetime.datetime(2020, 12, 9, 15, 38, 42, 999, tzinfo=UTC),
text="The twt hash now uses the RFC 3339 timestamp format.",
source=twtxt.models.Source(nick="nick", url="https://example.com/twtxt.txt"))))
def test_rfc3339_timestamp_with_seconds_precision_and_utc_plus_1_offset_is_kept_intact(self):
self.assertEqual("64u2m5a", create_hash(twtxt.models.Tweet(
created_at=datetime.datetime(2020, 12, 9, 16, 38, 42, tzinfo=CET),
text="The twt hash now uses the RFC 3339 timestamp format.",
source=twtxt.models.Source(nick="nick", url="https://example.com/twtxt.txt"))))
def test_rfc3339_timestamp_with_minutes_precision_is_expanded_to_seconds_precision(self):
self.assertEqual("a3c3k5q", create_hash(twtxt.models.Tweet(
created_at=datetime.datetime(2020, 12, 9, 16, 38, 0, tzinfo=CET),
text="The twt hash now uses the RFC 3339 timestamp format.",
source=twtxt.models.Source(nick="nick", url="https://example.com/twtxt.txt"))))
def test_rfc3339_timestamp_with_utc_is_rendered_as_designated_zulu_offset_rather_than_numeric_offset(self):
self.assertEqual("74qtyjq", create_hash(twtxt.models.Tweet(
created_at=datetime.datetime(2020, 12, 9, 15, 38, 42, tzinfo=UTC),
text="The twt hash now uses the RFC 3339 timestamp format.",
source=twtxt.models.Source(nick="nick", url="https://example.com/twtxt.txt"))))
self.assertEqual("74qtyjq", create_hash(twtxt.models.Tweet(
created_at=datetime.datetime(2020, 12, 9, 15, 38, 42, tzinfo=datetime.timezone(datetime.timedelta(hours=-0))),
text="The twt hash now uses the RFC 3339 timestamp format.",
source=twtxt.models.Source(nick="nick", url="https://example.com/twtxt.txt"))))
def test_rfc3339_timestamp_without_explicit_timezone_information_is_assumed_to_be_in_utc(self):
self.assertEqual("74qtyjq", create_hash(twtxt.models.Tweet(
created_at=datetime.datetime(2020, 12, 9, 15, 38, 42),
text="The twt hash now uses the RFC 3339 timestamp format.",
source=twtxt.models.Source(nick="nick", url="https://example.com/twtxt.txt"))))
from twtxtrenderer import normalize_whitespace_markup
class NormalizeWhitespaceMarkupTest(unittest.TestCase):
......@@ -146,3 +91,4 @@ class NormalizeWhitespaceMarkupTest(unittest.TestCase):
if __name__ == "__main__":
unittest.main()
"""
Create hashes for twts.
"""
import datetime
import base64
import hashlib
_tz_abbr_cache = {
"+0000": "UTC",
"+0100": "CET",
"+0200": "CEST",
"-0400": "-0400",
"-0500": "-0500",
}
def create_hash(twt):
"""
Create the hash of the given twt as specified in the Twt Hash Extension:
https://dev.twtxt.net/doc/twthashextension.html
"""
# (All comments in this function are taken from the aforementioned
# specification. Comments in parenthesis are mine, including this one.)
# If the timestamp does not explicitly include any timezone information, it
# must be assumed to be in UTC.
if twt.created_at.tzinfo is None:
tz_aware_created_at = twt.created_at.replace(tzinfo=datetime.timezone.utc)
else:
tz_aware_created_at = twt.created_at
# All timezones representing UTC must be formatted using the designated
# Zulu indicator 'Z' rather than the numeric offsets '+00:00' or '-00:00'.
# (RFC 3339 permits and special cases '-00:00', however, it looks like it's
# not possible to create a negative zero offset in Python.)
created_at = tz_aware_created_at.isoformat().replace("+00:00", "Z")
# Each twt’s hash is calculated using its author, timestamp and contents.
# The author feed URL, RFC 3339 formatted timestamp and twt text are joined
# with line feeds:
payload = "%s\n%s\n%s" % (twt.source.url, created_at, twt.text)
# This UTF-8 encoded string is Blake2b hashed with 256 bits…
# (256 bits are 32 bytes)
sum256 = hashlib.blake2b(payload.encode("utf-8"), digest_size=32).digest()
# …and Base32 encoded…
# (It's a silly interface, so we actually have to make a real string out of
# this ASCII byte string before further processing it any further.)
hash = base64.b32encode(sum256).decode("ascii")
# …without padding.
# (unfortunately there's no better way to this)
hash = hash.replace("=", "")
# After converting to lower case the last seven characters make up the twt
# hash.
# (In fact I reckon it's a tiny bit faster to create a substring first and
# only then lower case seven letters than doing it the other way around as
# the spec tells us to do. :-))
return hash[-7:].lower()
def create_old_hash(twt):
"""
Create the hash of the given tweet according to prologic's blog post which
is used to form conversations:
https://twtxt.net/blog/prologic/2020/10/18/making-twtxt-better
"""
created_at = twt.created_at.strftime("%Y-%m-%d %H:%M:%S %z")
tz = created_at[20:]
tz_abbr = _tz_abbr_cache.get(tz)
if tz_abbr is None:
print("ERROR: no entry for TZ offset %s" % tz)
created_at += " %s" % tz_abbr
payload = "%s\n%s\n%s" % (twt.source.url, created_at, twt.text)
sum256 = hashlib.blake2b(payload.encode("utf-8"), digest_size=32).digest() # 32 bytes are 256 bits
hash = base64.b32encode(sum256).decode("ascii") # thank you for the ASCII bytes…
hash = hash.replace("=", "") # no padding
return hash[-7:].lower()
......@@ -199,299 +199,3 @@ def parse_twt_text(text):
pos += 1
yield from create_text_token()
if __name__ == "__main__":
import unittest
class FindWhitespaceTest(unittest.TestCase):
def test_no_whitespace(self):
self.assertEqual(0, _find_whitespace('', 0))
self.assertEqual(1, _find_whitespace('a', 0))
self.assertEqual(3, _find_whitespace(' ab', 1))
def test_whitespace(self):
self.assertEqual(1, _find_whitespace('a ', 0))
self.assertEqual(2, _find_whitespace('ab ', 0))
self.assertEqual(2, _find_whitespace('ab cd', 0))
self.assertEqual(7, _find_whitespace('ab cdef gh', 3))
class ExtractBetweenTest(unittest.TestCase):
def test_closing_not_found(self):
self.assertEqual((None, -1), _extract_between("(missing parenthesis", 0, None, "(", ")"))
self.assertEqual((None, -1), _extract_between("(closing parenthesis)( before start_pos", 21, None, "(", ")"))
self.assertEqual((None, -1), _extract_between("(closing parenthesis) after end_pos", 0, 20, "(", ")"))
def test_extract_data(self):
self.assertEqual(("foo", 5), _extract_between("(foo)bar", 0, None, "(", ")"))
self.assertEqual(("foo", 7), _extract_between("({foo})bar", 0, None, "({", "})"))
self.assertEqual(("foo", 7), _extract_between("({foo})", 0, None, "({", "})"))
class ParseMentionTest(unittest.TestCase):
def test_missing_closing_angle_bracket(self):
self.assertIsNone(_parse_mention("@<missing bracket", 0))
self.assertIsNone(_parse_mention("@<missing bracket]})", 0))
def test_nick_and_url(self):
self.assertEqual(Mention(start_pos=0, end_pos=37, nick="nick", url="https://example.com/twtxt.txt"),
_parse_mention("@<nick https://example.com/twtxt.txt>", 0))
self.assertEqual(Mention(start_pos=0, end_pos=37, nick="nick", url="https://example.com/twtxt.txt"),
_parse_mention("@<nick https://example.com/twtxt.txt> more text", 0))
self.assertEqual(Mention(start_pos=0, end_pos=43, nick="nick", url="https://example.com/twtxt.txt"),
_parse_mention("@< nick https://example.com/twtxt.txt > more text", 0))
def test_only_nick(self):
self.assertEqual(Mention(start_pos=0, end_pos=7, nick="nick", url=None),
_parse_mention("@<nick>", 0))
self.assertEqual(Mention(start_pos=0, end_pos=7, nick="nick", url=None),
_parse_mention("@<nick> more text", 0))
self.assertEqual(Mention(start_pos=0, end_pos=11, nick="nick", url=None),
_parse_mention("@< nick > more text", 0))