Commit af0008ab authored by Lysander Trischler's avatar Lysander Trischler
Browse files

Refactor tags to be links, too

parent 489440bd
......@@ -55,27 +55,41 @@ class _Twter(ctypes.Structure):
class Link:
def __init__(self, text=None, target=None, tag=None):
STANDARD, MEDIA, PLAIN, NAKED, TAG = range(1, 6)
_types = {
STANDARD: "STANDARD",
MEDIA: "MEDIA",
PLAIN: "PLAIN",
NAKED: "NAKED",
TAG: "TAG",
}
def __init__(self, type=None, text=None, target=None):
self.type = type
self.text = text
self.target = target
self.tag = tag
def __repr__(self):
return f"{self.__class__.__name__}<text={self.text!r} target={self.target!r} tag={self.tag}>"
return f"{self.__class__.__name__}<" + \
f"type={self._types.get(self.type, 'UNKNOWN (%d)' % self.type)} " + \
f"text={self.text!r} " + \
f"target={self.target!r}>"
def __eq__(self, other):
return isinstance(other, self.__class__) and \
self.type == other.type and \
self.text == other.text and \
self.target == other.target and \
self.tag == other.tag
self.target == other.target
class _Link(ctypes.Structure):
_fields_ = [("text", ctypes.c_char_p),
_fields_ = [("type", ctypes.c_int),
("text", ctypes.c_char_p),
("target", ctypes.c_char_p)]
def to_python(self, tag=None):
link = Link(tag=tag)
def to_python(self):
link = Link(type=self.type)
if self.text:
link.text = self.text.decode("utf-8")
if self.target:
......@@ -106,7 +120,7 @@ class _Subject(ctypes.Structure):
if self.text:
subject.text = self.text.decode("utf-8")
if self.tag:
subject.tag = self.tag.contents.to_python(tag=True)
subject.tag = self.tag.contents.to_python()
return subject
......@@ -145,8 +159,7 @@ class _Elem(ctypes.Structure):
_TYPE_LINK, \
_TYPE_MENTION, \
_TYPE_SUBJECT, \
_TYPE_TAG, \
_TYPE_TEXT = range(1, 8)
_TYPE_TEXT = range(1, 7)
def to_python(self):
if self.type == self._TYPE_CODE:
......@@ -159,8 +172,6 @@ class _Elem(ctypes.Structure):
return ctypes.cast(self.data, ctypes.POINTER(_Twter)).contents.to_python()
elif self.type == self._TYPE_SUBJECT:
return ctypes.cast(self.data, ctypes.POINTER(_Subject)).contents.to_python()
elif self.type == self._TYPE_TAG:
return ctypes.cast(self.data, ctypes.POINTER(_Link)).contents.to_python(tag=True)
elif self.type == self._TYPE_TEXT:
if self.data is None:
return None
......@@ -237,7 +248,7 @@ class _Twt(ctypes.Structure):
twt.tags = []
for i in range(self.tags_len):
twt.tags.append(self.tags[i].contents.to_python(tag=True))
twt.tags.append(self.tags[i].contents.to_python())
twt.elems = []
for i in range(self.elems_len):
......
......@@ -11,6 +11,7 @@ struct twter {
};
struct link {
int type;
char *text;
char *target;
};
......@@ -69,7 +70,6 @@ const (
TypeLink
TypeMention
TypeSubject
TypeTag
TypeText
)
......@@ -79,6 +79,14 @@ const (
// one. We're very careful to not exceed the real array bounds, though.
const arraySize = 1<<30 - 1
// LinkTag is a freshly invented link type to indicate that a link is a hash
// tag. There's no real need to come up with a complete Tag type in this C
// bridge, we can just perfectly reuse the Link instead.
//
// At the time of writing (2021-10-27) the LinkNaked is the last defined link
// type in lextwt.
const LinkTag lextwt.LinkType = lextwt.LinkNaked + 1
//export parse_file
func parse_file(input *C.char, twter *C.struct_twter) (*C.struct_twt_file, *C.char) {
s := strings.NewReader(C.GoString(input))
......@@ -182,7 +190,7 @@ func convertTwt(t *lextwt.Twt) *C.struct_twt {
linksPtr := C.malloc(C.size_t(len(t.Links())) * C.size_t(unsafe.Sizeof(uintptr(0))))
links := (*[arraySize]*C.struct_link)(linksPtr)
for i, link := range t.Links() {
links[i] = convertLink(link)
links[i] = convertLink(link.(*lextwt.Link))
}
twt.links = (*C.struct_link)(linksPtr)
twt.links_len = C.int(len(t.Links()))
......@@ -190,7 +198,7 @@ func convertTwt(t *lextwt.Twt) *C.struct_twt {
tagsPtr := C.malloc(C.size_t(len(t.Tags())) * C.size_t(unsafe.Sizeof(uintptr(0))))
tags := (*[arraySize]*C.struct_link)(tagsPtr)
for i, tag := range t.Tags() {
tags[i] = convertLink(tag)
tags[i] = convertTagToLink(tag.(*lextwt.Tag))
}
twt.tags = (*C.struct_link)(tagsPtr)
twt.tags_len = C.int(len(t.Tags()))
......@@ -235,7 +243,7 @@ func convertSubject(s types.Subject) *C.struct_subject {
subject := (*C.struct_subject)(ptr)
subject.text = C.CString(s.Text())
if tag, ok := s.Tag().(*lextwt.Tag); ok && tag != nil {
subject.tag = convertTag(tag)
subject.tag = convertTagToLink(tag)
} else {
subject.tag = nil
}
......@@ -250,20 +258,36 @@ func freeSubject(s *C.struct_subject) {
}
}
func convertTag(t types.TwtTag) *C.struct_link {
func newLink(linkType lextwt.LinkType, text, target string) *C.struct_link {
ptr := C.malloc(C.sizeof_struct_link)
link := (*C.struct_link)(ptr)
link.text = C.CString(t.Text())
link.target = C.CString(t.Target())
link._type = C.int(linkType)
link.text = C.CString(text)
link.target = C.CString(target)
return link
}
func convertLink(l types.TwtLink) *C.struct_link {
ptr := C.malloc(C.sizeof_struct_link)
link := (*C.struct_link)(ptr)
link.text = C.CString(l.Text())
link.target = C.CString(l.Target())
return link
func convertLink(l *lextwt.Link) *C.struct_link {
// unfortunately, the link type is private and not exposed over an accessor
// method, so we have to reconstruct it
var linkType lextwt.LinkType
if l.IsMedia() {
linkType = lextwt.LinkMedia
} else {
lit := l.Literal()
if strings.HasPrefix(lit, "<") {
linkType = lextwt.LinkPlain
} else if strings.HasPrefix(lit, "[") {
linkType = lextwt.LinkStandard
} else {
linkType = lextwt.LinkNaked
}
}
return newLink(linkType, l.Text(), l.Target())
}
func convertTagToLink(t *lextwt.Tag) *C.struct_link {
return newLink(LinkTag, t.Text(), t.Target())
}
func freeLink(l *C.struct_link) {
......@@ -303,7 +327,7 @@ func convertElem(e lextwt.Elem) *C.struct_elem {
case *lextwt.Subject:
return newElem(TypeSubject, unsafe.Pointer(convertSubject(el)))
case *lextwt.Tag:
return newElem(TypeTag, unsafe.Pointer(convertLink(el)))
return newElem(TypeLink, unsafe.Pointer(convertTagToLink(el)))
case *lextwt.Text:
return newElem(TypeText, unsafe.Pointer(C.CString(el.String())))
default:
......@@ -331,8 +355,6 @@ func freeElem(e *C.struct_elem) {
freeTwter((*C.struct_twter)(e.data))
case TypeSubject:
freeSubject((*C.struct_subject)(e.data))
case TypeTag:
freeLink((*C.struct_link)(e.data))
case TypeText:
free_string((*C.char)(e.data))
default:
......
......@@ -17,7 +17,7 @@ class ParseFileTest(unittest.TestCase):
Twt(twter=TWTER,
created=datetime.datetime(2021, 8, 2, 10, 27, 42, tzinfo=UTC_PLUS_2),
hash="slrnx6a",
subject=Subject(text="#slrnx6a", tag=Link(text="slrnx6a", target=None, tag=True)),
subject=Subject(text="#slrnx6a", tag=Link(type=Link.TAG, text="slrnx6a", target=None)),
mentions=[],
links=[],
tags=[],
......@@ -32,7 +32,7 @@ class ParseFileTest(unittest.TestCase):
Twt(twter=TWTER,
created=datetime.datetime(2021, 8, 2, 10, 27, 42, tzinfo=UTC_PLUS_2),
hash="slrnx6a",
subject=Subject(text="#slrnx6a", tag=Link(text="slrnx6a", target=None, tag=True)),
subject=Subject(text="#slrnx6a", tag=Link(type=Link.TAG, text="slrnx6a", target=None)),
mentions=[],
links=[],
tags=[],
......@@ -40,7 +40,7 @@ class ParseFileTest(unittest.TestCase):
Twt(twter=TWTER,
created=datetime.datetime(2021, 8, 3, 9, 28, 45, tzinfo=UTC_PLUS_2),
hash="zm7fnka",
subject=Subject(text="#zm7fnka", tag=Link(text="zm7fnka", target=None, tag=True)),
subject=Subject(text="#zm7fnka", tag=Link(type=Link.TAG, text="zm7fnka", target=None)),
mentions=[],
links=[],
tags=[],
......@@ -55,18 +55,69 @@ class ParseFileTest(unittest.TestCase):
Twt(twter=TWTER,
created=datetime.datetime(2021, 8, 3, 11, 16, 13, tzinfo=UTC_PLUS_2),
hash="zv6vujq",
subject=Subject(text="#zv6vujq", tag=Link(text="zv6vujq", target=None, tag=True)),
subject=Subject(text="#zv6vujq", tag=Link(type=Link.TAG, text="zv6vujq", target=None)),
mentions=[],
links=[Link(text="wonderful", target="https://example.com/"),
Link(text="nice", target="https://example.com/test")],
links=[Link(type=Link.STANDARD, text="wonderful", target="https://example.com/"),
Link(type=Link.STANDARD, text="nice", target="https://example.com/test")],
tags=[],
elems=["Hello ",
Link(text="wonderful", target="https://example.com/"),
Link(type=Link.STANDARD, text="wonderful", target="https://example.com/"),
" and ",
Link(text="nice", target="https://example.com/test"),
Link(type=Link.STANDARD, text="nice", target="https://example.com/test"),
" world",
"!"])]), twtfile)
def test_naked_links(self):
twtfile = parse_file("2021-10-26T10:08:01+02:00\tCool website: https://example.com/", TWTER)
self.assertEqual(TwtFile(twter=TWTER, twts=[
Twt(twter=TWTER,
created=datetime.datetime(2021, 10, 26, 10, 8, 1, tzinfo=UTC_PLUS_2),
hash="hs2oe7a",
subject=Subject(text="#hs2oe7a", tag=Link(type=Link.TAG, text="hs2oe7a", target=None)),
mentions=[],
links=[Link(type=Link.NAKED, text=None, target="https://example.com/")],
tags=[],
elems=["Cool website: ",
Link(type=Link.NAKED, text=None, target="https://example.com/")])]), twtfile)
def test_plain_links(self):
twtfile = parse_file("2021-10-26T12:45:04+02:00\tCheck this! <https://example.com/>", TWTER)
self.assertEqual(TwtFile(twter=TWTER, twts=[
Twt(twter=TWTER,
created=datetime.datetime(2021, 10, 26, 12, 45, 4, tzinfo=UTC_PLUS_2),
hash="xsgdioq",
subject=Subject(text="#xsgdioq", tag=Link(type=Link.TAG, text="xsgdioq", target=None)),
mentions=[],
links=[Link(type=Link.PLAIN, text=None, target="https://example.com/")],
tags=[],
elems=["Check this",
"! ",
Link(type=Link.PLAIN, text=None, target="https://example.com/")])]), twtfile)
def test_media_links(self):
twtfile = parse_file(
"2021-10-26T12:49:38+02:00\tI built this: ![a bench](https://example.com/bench.jpg)\n"
"2021-10-26T12:50:48+02:00\t![](https://example.com/closeup.jpg)",
TWTER)
self.assertEqual(TwtFile(twter=TWTER, twts=[
Twt(twter=TWTER,
created=datetime.datetime(2021, 10, 26, 12, 49, 38, tzinfo=UTC_PLUS_2),
hash="z7lkhyq",
subject=Subject(text="#z7lkhyq", tag=Link(type=Link.TAG, text="z7lkhyq", target=None)),
mentions=[],
links=[Link(type=Link.MEDIA, text="a bench", target="https://example.com/bench.jpg")],
tags=[],
elems=["I built this: ",
Link(type=Link.MEDIA, text="a bench", target="https://example.com/bench.jpg")]),
Twt(twter=TWTER,
created=datetime.datetime(2021, 10, 26, 12, 50, 48, tzinfo=UTC_PLUS_2),
hash="7qx6lga",
subject=Subject(text="#7qx6lga", tag=Link(type=Link.TAG, text="7qx6lga", target=None)),
mentions=[],
links=[Link(type=Link.MEDIA, text=None, target="https://example.com/closeup.jpg")],
tags=[],
elems=[Link(type=Link.MEDIA, text=None, target="https://example.com/closeup.jpg")])]), twtfile)
def test_subjects(self):
twtfile = parse_file(
"2021-08-03T12:28:26+02:00\t(#<1234567 https://example.com/1234567>) I agree!\n"
......@@ -77,12 +128,12 @@ class ParseFileTest(unittest.TestCase):
created=datetime.datetime(2021, 8, 3, 12, 28, 26, tzinfo=UTC_PLUS_2),
hash="g2xdgsq",
subject=Subject(text="#<1234567 https://example.com/1234567>",
tag=Link(text="1234567", target="https://example.com/1234567", tag=True)),
tag=Link(type=Link.TAG, text="1234567", target="https://example.com/1234567")),
mentions=[],
links=[],
tags=[Link(text="1234567", target="https://example.com/1234567", tag=True)],
tags=[Link(type=Link.TAG, text="1234567", target="https://example.com/1234567")],
elems=[Subject(text="#<1234567 https://example.com/1234567>",
tag=Link(text="1234567", target="https://example.com/1234567", tag=True)),
tag=Link(type=Link.TAG, text="1234567", target="https://example.com/1234567")),
" I agree",
"!"]),
Twt(twter=TWTER,
......@@ -103,7 +154,7 @@ class ParseFileTest(unittest.TestCase):
Twt(twter=TWTER,
created=datetime.datetime(2021, 8, 3, 15, 4, 9, tzinfo=UTC_PLUS_2),
hash="vu5np6q",
subject=Subject(text="#vu5np6q", tag=Link(text="vu5np6q", target=None, tag=True)),
subject=Subject(text="#vu5np6q", tag=Link(type=Link.TAG, text="vu5np6q", target=None)),
mentions=[Twter(nick="eugen",
url="https://example.org/~eugen/twtxt.txt",
avatar="",
......@@ -125,12 +176,12 @@ class ParseFileTest(unittest.TestCase):
Twt(twter=TWTER,
created=datetime.datetime(2021, 8, 3, 16, 26, 53, tzinfo=UTC_PLUS_2),
hash="5thpeka",
subject=Subject(text="#5thpeka", tag=Link(text="5thpeka", target=None, tag=True)),
subject=Subject(text="#5thpeka", tag=Link(type=Link.TAG, text="5thpeka", target=None)),
mentions=[],
links=[],
tags=[Link(text="eggs", target="https://example.com/eggs", tag=True)],
tags=[Link(type=Link.TAG, text="eggs", target="https://example.com/eggs")],
elems=["Have a look at ",
Link(text="eggs", target="https://example.com/eggs", tag=True),
Link(type=Link.TAG, text="eggs", target="https://example.com/eggs"),
"!"])]), twtfile)
def test_metadata(self):
......@@ -150,7 +201,7 @@ class ParseFileTest(unittest.TestCase):
Twt(twter=expectedTwter,
created=datetime.datetime(2021, 10, 25, 14, 17, 13, tzinfo=UTC_PLUS_2),
hash="ckgkqaa",
subject=Subject(text="#ckgkqaa", tag=Link(text="ckgkqaa", target=None, tag=True)),
subject=Subject(text="#ckgkqaa", tag=Link(type=Link.TAG, text="ckgkqaa", target=None)),
mentions=[],
links=[],
tags=[],
......@@ -158,7 +209,7 @@ class ParseFileTest(unittest.TestCase):
Twt(twter=expectedTwter,
created=datetime.datetime(2021, 10, 25, 14, 18, 35, tzinfo=UTC_PLUS_2),
hash="k6xkknq",
subject=Subject(text="#k6xkknq", tag=Link(text="k6xkknq", target=None, tag=True)),
subject=Subject(text="#k6xkknq", tag=Link(type=Link.TAG, text="k6xkknq", target=None)),
mentions=[],
links=[],
tags=[],
......@@ -172,7 +223,7 @@ class ParseFileTest(unittest.TestCase):
Twt(twter=TWTER,
created=datetime.datetime(2021, 10, 26, 9,26, 56, tzinfo=UTC_PLUS_2),
hash="izcp6eq",
subject=Subject(text="#izcp6eq", tag=Link(text="izcp6eq", target=None, tag=True)),
subject=Subject(text="#izcp6eq", tag=Link(type=Link.TAG, text="izcp6eq", target=None)),
mentions=[],
links=[],
tags=[],
......@@ -189,7 +240,7 @@ class ParseFileTest(unittest.TestCase):
Twt(twter=TWTER,
created=datetime.datetime(2021, 10, 26, 9, 41, 41, tzinfo=UTC_PLUS_2),
hash="yb2hp2q",
subject=Subject(text="#yb2hp2q", tag=Link(text="yb2hp2q", target=None, tag=True)),
subject=Subject(text="#yb2hp2q", tag=Link(type=Link.TAG, text="yb2hp2q", target=None)),
mentions=[],
links=[],
tags=[],
......@@ -203,7 +254,7 @@ class ParseFileTest(unittest.TestCase):
Twt(twter=TWTER,
created=datetime.datetime(2021, 10, 26, 9, 53, 7, tzinfo=UTC_PLUS_2),
hash="3srzo5q",
subject=Subject(text="#3srzo5q", tag=Link(text="3srzo5q", target=None, tag=True)),
subject=Subject(text="#3srzo5q", tag=Link(type=Link.TAG, text="3srzo5q", target=None)),
mentions=[],
links=[],
tags=[],
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment