Commit 489440bd authored by Lysander Trischler's avatar Lysander Trischler
Browse files

Finally implement elements

parent 02e038a4
......@@ -55,25 +55,27 @@ class _Twter(ctypes.Structure):
class Link:
def __init__(self, text=None, target=None):
def __init__(self, text=None, target=None, tag=None):
self.text = text
self.target = target
self.tag = tag
def __repr__(self):
return f"{self.__class__.__name__}<text={self.text!r} target={self.target!r}>"
return f"{self.__class__.__name__}<text={self.text!r} target={self.target!r} tag={self.tag}>"
def __eq__(self, other):
return isinstance(other, self.__class__) and \
self.text == other.text and \
self.target == other.target
self.target == other.target and \
self.tag == other.tag
class _Link(ctypes.Structure):
_fields_ = [("text", ctypes.c_char_p),
("target", ctypes.c_char_p)]
def to_python(self):
link = Link()
def to_python(self, tag=None):
link = Link(tag=tag)
if self.text:
link.text = self.text.decode("utf-8")
if self.target:
......@@ -104,14 +106,74 @@ class _Subject(ctypes.Structure):
if self.text:
subject.text = self.text.decode("utf-8")
if self.tag:
subject.tag = self.tag.contents.to_python()
subject.tag = self.tag.contents.to_python(tag=True)
return subject
class Code:
INLINE, BLOCK = range(1, 3)
def __init__(self, type, text):
self.type = type
self.text = text
def __repr__(self):
return f"<{self.__class__.__name__} " + \
f"type={'INLINE' if self.type == self.INLINE else 'BLOCK'} " + \
f"text={self.text!r}>"
def __eq__(self, other):
return isinstance(other, self.__class__) and \
self.type == other.type and \
self.text == other.text
class _Code(ctypes.Structure):
_fields_ = [("type", ctypes.c_int),
("text", ctypes.c_char_p)]
def to_python(self):
return Code(self.type, self.text.decode("utf-8"))
class _Elem(ctypes.Structure):
_fields_ = [("type", ctypes.c_int),
("data", ctypes.c_void_p)]
_TYPE_CODE, \
_TYPE_LINE_SEPARATOR, \
_TYPE_LINK, \
_TYPE_MENTION, \
_TYPE_SUBJECT, \
_TYPE_TAG, \
_TYPE_TEXT = range(1, 8)
def to_python(self):
if self.type == self._TYPE_CODE:
return ctypes.cast(self.data, ctypes.POINTER(_Code)).contents.to_python()
elif self.type == self._TYPE_LINE_SEPARATOR:
return "\u2028"
elif self.type == self._TYPE_LINK:
return ctypes.cast(self.data, ctypes.POINTER(_Link)).contents.to_python()
elif self.type == self._TYPE_MENTION:
return ctypes.cast(self.data, ctypes.POINTER(_Twter)).contents.to_python()
elif self.type == self._TYPE_SUBJECT:
return ctypes.cast(self.data, ctypes.POINTER(_Subject)).contents.to_python()
elif self.type == self._TYPE_TAG:
return ctypes.cast(self.data, ctypes.POINTER(_Link)).contents.to_python(tag=True)
elif self.type == self._TYPE_TEXT:
if self.data is None:
return None
else:
return ctypes.string_at(self.data).decode("utf-8")
else:
print("TODO NOT IMPLEMENTED ELEMENT TYPE %d" % self.type)
class Twt:
def __init__(self, twter=None, created=None, hash=None,
subject=None, mentions=None, links=None,
tags=None):
tags=None, elems=None):
self.twter = twter
self.created = created
self.hash = hash
......@@ -119,12 +181,14 @@ class Twt:
self.mentions = mentions
self.links = links
self.tags = tags
self.elems = elems
def __repr__(self):
return f"{self.__class__.__name__}<twter={self.twter!r} " + \
f"created={self.created!r} hash={self.hash!r} " + \
f"subject={self.subject!r} mentions={self.mentions!r} " + \
f"links={self.links!r} tags={self.tags!r}>"
f"links={self.links!r} tags={self.tags!r} " + \
f"elems={self.elems!r}>"
def __eq__(self, other):
return isinstance(other, self.__class__) and \
......@@ -134,7 +198,8 @@ class Twt:
self.subject == other.subject and \
self.mentions == other.mentions and \
self.links == other.links and \
self.tags == other.tags
self.tags == other.tags and \
self.elems == other.elems
class _Twt(ctypes.Structure):
......@@ -147,7 +212,9 @@ class _Twt(ctypes.Structure):
("links", ctypes.POINTER(ctypes.POINTER(_Link))),
("links_len", ctypes.c_int),
("tags", ctypes.POINTER(ctypes.POINTER(_Link))),
("tags_len", ctypes.c_int)]
("tags_len", ctypes.c_int),
("elems_len", ctypes.c_int),
("elems", ctypes.POINTER(ctypes.POINTER(_Elem)))]
def to_python(self):
twt = Twt()
......@@ -170,7 +237,11 @@ class _Twt(ctypes.Structure):
twt.tags = []
for i in range(self.tags_len):
twt.tags.append(self.tags[i].contents.to_python())
twt.tags.append(self.tags[i].contents.to_python(tag=True))
twt.elems = []
for i in range(self.elems_len):
twt.elems.append(self.elems[i].contents.to_python())
return twt
......
......@@ -20,6 +20,16 @@ struct subject {
struct link *tag;
};
struct code {
int type;
char *text;
};
struct elem {
int type;
void *data;
};
struct twt {
struct twter *twter;
char *created;
......@@ -31,6 +41,8 @@ struct twt {
int links_len;
struct link *tags;
int tags_len;
int elems_len;
struct elem *elems;
};
struct twt_file {
......@@ -47,6 +59,18 @@ import (
"strings"
"time"
"unsafe"
"fmt"
"reflect"
)
const (
TypeCode = iota + 1
TypeLineSeparator
TypeLink
TypeMention
TypeSubject
TypeTag
TypeText
)
// arraySize is a fairly large (hopefully large enough) array size for C-backed
......@@ -88,7 +112,7 @@ func parse_file(input *C.char, twter *C.struct_twter) (*C.struct_twt_file, *C.ch
// so we can index it and copy all the twts into it
twts := (*[arraySize]*C.struct_twt)(twtsPtr)
for i, twt := range file.Twts() {
twts[i] = convertTwt(twt)
twts[i] = convertTwt(twt.(*lextwt.Twt))
}
twtfile.twts = (*C.struct_twt)(twtsPtr)
......@@ -135,7 +159,7 @@ func freeTwters(t *C.struct_twter, length C.int) {
}
}
func convertTwt(t types.Twt) *C.struct_twt {
func convertTwt(t *lextwt.Twt) *C.struct_twt {
ptr := C.malloc(C.sizeof_struct_twt)
twt := (*C.struct_twt)(ptr)
twt.twter = convertTwter(t.Twter())
......@@ -171,6 +195,14 @@ func convertTwt(t types.Twt) *C.struct_twt {
twt.tags = (*C.struct_link)(tagsPtr)
twt.tags_len = C.int(len(t.Tags()))
elemsPtr := C.malloc(C.size_t(len(t.Elems())) * C.size_t(unsafe.Sizeof(uintptr(0))))
elems := (*[arraySize]*C.struct_elem)(elemsPtr)
for i, elem := range t.Elems() {
elems[i] = convertElem(elem)
}
twt.elems_len = C.int(len(t.Elems()))
twt.elems = (*C.struct_elem)(elemsPtr)
return twt
}
......@@ -183,6 +215,7 @@ func freeTwt(t *C.struct_twt) {
freeTwters(t.mentions, t.mentions_len)
freeLinks(t.links, t.links_len)
freeLinks(t.tags, t.tags_len)
freeElems(t.elems, t.elems_len)
C.free(unsafe.Pointer(t))
}
}
......@@ -251,6 +284,96 @@ func freeLinks(l *C.struct_link, length C.int) {
}
}
func newElem(elemType C.int, dataPointer unsafe.Pointer) *C.struct_elem {
ptr := C.malloc(C.sizeof_struct_elem)
elem := (*C.struct_elem)(ptr)
elem._type = elemType
elem.data = dataPointer
return elem
}
func convertElem(e lextwt.Elem) *C.struct_elem {
switch el := e.(type) {
case *lextwt.Code:
return newElem(TypeCode, unsafe.Pointer(convertCode(el)))
case *lextwt.Link:
return newElem(TypeLink, unsafe.Pointer(convertLink(el)))
case *lextwt.Mention:
return newElem(TypeMention, unsafe.Pointer(convertTwter(el.Twter())))
case *lextwt.Subject:
return newElem(TypeSubject, unsafe.Pointer(convertSubject(el)))
case *lextwt.Tag:
return newElem(TypeTag, unsafe.Pointer(convertLink(el)))
case *lextwt.Text:
return newElem(TypeText, unsafe.Pointer(C.CString(el.String())))
default:
// unfortunately, the lextwt.lineSeparator type is private, so we
// need to fall back to a variable comparison
if e == lextwt.LineSeparator {
return newElem(TypeLineSeparator, nil) // no data payload required
}
fmt.Println("ERROR: unsupported element type", reflect.TypeOf(e), e)
return nil
}
}
func freeElem(e *C.struct_elem) {
if e != nil {
if e.data != nil {
switch e._type {
case TypeCode:
freeCode((*C.struct_code)(e.data))
case TypeLineSeparator:
// nothing to do
case TypeLink:
freeLink((*C.struct_link)(e.data))
case TypeMention:
freeTwter((*C.struct_twter)(e.data))
case TypeSubject:
freeSubject((*C.struct_subject)(e.data))
case TypeTag:
freeLink((*C.struct_link)(e.data))
case TypeText:
free_string((*C.char)(e.data))
default:
fmt.Println("ERROR: unsupported element type", e._type)
}
}
C.free(unsafe.Pointer(e))
}
}
func freeElems(e *C.struct_elem, length C.int) {
if e != nil {
elems := (*[arraySize]*C.struct_elem)(unsafe.Pointer(e))
for i := 0; i < int(length); i++ {
freeElem(elems[i])
}
C.free(unsafe.Pointer(e))
}
}
func convertCode(c *lextwt.Code) *C.struct_code {
ptr := C.malloc(C.sizeof_struct_code)
code := (*C.struct_code)(ptr)
lit := c.Literal()
if strings.HasPrefix(lit, "```") {
code._type = C.int(lextwt.CodeBlock)
code.text = C.CString(lit[3:len(lit)-3])
} else {
code._type = C.int(lextwt.CodeInline)
code.text = C.CString(lit[1:len(lit)-1])
}
return code
}
func freeCode(c *C.struct_code) {
if c != nil {
free_string(c.text)
C.free(unsafe.Pointer(c))
}
}
//export free_string
func free_string(s *C.char) {
if s != nil {
......
......@@ -2,7 +2,7 @@
import datetime
import unittest
from libgotwtxt import Link, parse_file, Subject, Twt, Twter, TwtFile
from libgotwtxt import Code, Link, parse_file, Subject, Twt, Twter, TwtFile
UTC_PLUS_2 = datetime.timezone(datetime.timedelta(hours=2))
TWTER = Twter(nick="hugo",
......@@ -17,10 +17,11 @@ class ParseFileTest(unittest.TestCase):
Twt(twter=TWTER,
created=datetime.datetime(2021, 8, 2, 10, 27, 42, tzinfo=UTC_PLUS_2),
hash="slrnx6a",
subject=Subject(text="#slrnx6a", tag=Link(text="slrnx6a", target=None)),
subject=Subject(text="#slrnx6a", tag=Link(text="slrnx6a", target=None, tag=True)),
mentions=[],
links=[],
tags=[])]), twtfile)
tags=[],
elems=["Hello world."])]), twtfile)
def test_two_lines(self):
twtfile = parse_file(
......@@ -31,17 +32,19 @@ class ParseFileTest(unittest.TestCase):
Twt(twter=TWTER,
created=datetime.datetime(2021, 8, 2, 10, 27, 42, tzinfo=UTC_PLUS_2),
hash="slrnx6a",
subject=Subject(text="#slrnx6a", tag=Link(text="slrnx6a", target=None)),
subject=Subject(text="#slrnx6a", tag=Link(text="slrnx6a", target=None, tag=True)),
mentions=[],
links=[],
tags=[]),
tags=[],
elems=["Hello world."]),
Twt(twter=TWTER,
created=datetime.datetime(2021, 8, 3, 9, 28, 45, tzinfo=UTC_PLUS_2),
hash="zm7fnka",
subject=Subject(text="#zm7fnka", tag=Link(text="zm7fnka", target=None)),
subject=Subject(text="#zm7fnka", tag=Link(text="zm7fnka", target=None, tag=True)),
mentions=[],
links=[],
tags=[])]), twtfile)
tags=[],
elems=["Foo bar eggs and spam."])]), twtfile)
def test_markdown_links(self):
twtfile = parse_file(
......@@ -52,11 +55,17 @@ class ParseFileTest(unittest.TestCase):
Twt(twter=TWTER,
created=datetime.datetime(2021, 8, 3, 11, 16, 13, tzinfo=UTC_PLUS_2),
hash="zv6vujq",
subject=Subject(text="#zv6vujq", tag=Link(text="zv6vujq", target=None)),
subject=Subject(text="#zv6vujq", tag=Link(text="zv6vujq", target=None, tag=True)),
mentions=[],
links=[Link(text="wonderful", target="https://example.com/"),
Link(text="nice", target="https://example.com/test")],
tags=[])]), twtfile)
tags=[],
elems=["Hello ",
Link(text="wonderful", target="https://example.com/"),
" and ",
Link(text="nice", target="https://example.com/test"),
" world",
"!"])]), twtfile)
def test_subjects(self):
twtfile = parse_file(
......@@ -68,17 +77,23 @@ class ParseFileTest(unittest.TestCase):
created=datetime.datetime(2021, 8, 3, 12, 28, 26, tzinfo=UTC_PLUS_2),
hash="g2xdgsq",
subject=Subject(text="#<1234567 https://example.com/1234567>",
tag=Link(text="1234567", target="https://example.com/1234567")),
tag=Link(text="1234567", target="https://example.com/1234567", tag=True)),
mentions=[],
links=[],
tags=[Link(text="1234567", target="https://example.com/1234567")]),
tags=[Link(text="1234567", target="https://example.com/1234567", tag=True)],
elems=[Subject(text="#<1234567 https://example.com/1234567>",
tag=Link(text="1234567", target="https://example.com/1234567", tag=True)),
" I agree",
"!"]),
Twt(twter=TWTER,
created=datetime.datetime(2021, 8, 3, 12, 33, 17, tzinfo=UTC_PLUS_2),
hash="wqw7ipq",
subject=Subject(text="re: foo", tag=None),
mentions=[],
links=[],
tags=[])]), twtfile)
tags=[],
elems=[Subject(text="re: foo", tag=None),
" Well, it's quite complicated."])]), twtfile)
def test_mentions(self):
twtfile = parse_file(
......@@ -88,13 +103,19 @@ class ParseFileTest(unittest.TestCase):
Twt(twter=TWTER,
created=datetime.datetime(2021, 8, 3, 15, 4, 9, tzinfo=UTC_PLUS_2),
hash="vu5np6q",
subject=Subject(text="#vu5np6q", tag=Link(text="vu5np6q", target=None)),
subject=Subject(text="#vu5np6q", tag=Link(text="vu5np6q", target=None, tag=True)),
mentions=[Twter(nick="eugen",
url="https://example.org/~eugen/twtxt.txt",
avatar="",
tagline="")],
links=[],
tags=[])]), twtfile)
tags=[],
elems=["Hello ",
Twter(nick="eugen",
url="https://example.org/~eugen/twtxt.txt",
avatar="",
tagline=""),
"!"])]), twtfile)
def test_tags(self):
twtfile = parse_file(
......@@ -104,13 +125,15 @@ class ParseFileTest(unittest.TestCase):
Twt(twter=TWTER,
created=datetime.datetime(2021, 8, 3, 16, 26, 53, tzinfo=UTC_PLUS_2),
hash="5thpeka",
subject=Subject(text="#5thpeka", tag=Link(text="5thpeka", target=None)),
subject=Subject(text="#5thpeka", tag=Link(text="5thpeka", target=None, tag=True)),
mentions=[],
links=[],
tags=[Link(text="eggs", target="https://example.com/eggs")])]), twtfile)
tags=[Link(text="eggs", target="https://example.com/eggs", tag=True)],
elems=["Have a look at ",
Link(text="eggs", target="https://example.com/eggs", tag=True),
"!"])]), twtfile)
def test_metadata(self):
self.maxDiff = None
twtfile = parse_file(
"# nick = eugen\n"
"# url = https://example.org/eugen/twtxt.txt\n"
......@@ -123,24 +146,73 @@ class ParseFileTest(unittest.TestCase):
url="https://example.org/eugen/twtxt.txt",
avatar="https://example.org/eugen/avatar.jpg",
tagline="The greatest feed on earth")
self.assertEqual(expectedTwter, twtfile.twter, "twter of twtfile does not match")
self.assertEqual(expectedTwter, twtfile.twts[0].twter, "twter of first twt does not match")
self.assertEqual(TwtFile(twter=expectedTwter, twts=[
Twt(twter=expectedTwter,
created=datetime.datetime(2021, 10, 25, 14, 17, 13, tzinfo=UTC_PLUS_2),
hash="ckgkqaa",
subject=Subject(text="#ckgkqaa", tag=Link(text="ckgkqaa", target=None)),
subject=Subject(text="#ckgkqaa", tag=Link(text="ckgkqaa", target=None, tag=True)),
mentions=[],
links=[],
tags=[]),
tags=[],
elems=["The metadata should override the twter information."]),
Twt(twter=expectedTwter,
created=datetime.datetime(2021, 10, 25, 14, 18, 35, tzinfo=UTC_PLUS_2),
hash="k6xkknq",
subject=Subject(text="#k6xkknq", tag=Link(text="k6xkknq", target=None)),
subject=Subject(text="#k6xkknq", tag=Link(text="k6xkknq", target=None, tag=True)),
mentions=[],
links=[],
tags=[])]), twtfile)
tags=[],
elems=["And it does. Great."])]), twtfile)
def test_multiline_twt(self):
twtfile = parse_file(
"2021-10-26T09:26:56+02:00\tNow with\u2028a Unicode LINE SEPARATOR\u2028\u2028and two other ones",
TWTER)
self.assertEqual(TwtFile(twter=TWTER, twts=[
Twt(twter=TWTER,
created=datetime.datetime(2021, 10, 26, 9,26, 56, tzinfo=UTC_PLUS_2),
hash="izcp6eq",
subject=Subject(text="#izcp6eq", tag=Link(text="izcp6eq", target=None, tag=True)),
mentions=[],
links=[],
tags=[],
elems=["Now with",
"\u2028",
"a Unicode LINE SEPARATOR",
"\u2028",
"\u2028",
"and two other ones"])]), twtfile)
def test_empty_twt(self):
twtfile = parse_file("2021-10-26T09:41:41+02:00\t", TWTER)
self.assertEqual(TwtFile(twter=TWTER, twts=[
Twt(twter=TWTER,
created=datetime.datetime(2021, 10, 26, 9, 41, 41, tzinfo=UTC_PLUS_2),
hash="yb2hp2q",
subject=Subject(text="#yb2hp2q", tag=Link(text="yb2hp2q", target=None, tag=True)),
mentions=[],
links=[],
tags=[],
elems=[])]), twtfile)
def test_code(self):
twtfile = parse_file(
"2021-10-26T09:53:07+02:00\tNow `some inline code` and a ```whole code block``` even ```with\u2028LINE SEPARATOR```",
TWTER)
self.assertEqual(TwtFile(twter=TWTER, twts=[
Twt(twter=TWTER,
created=datetime.datetime(2021, 10, 26, 9, 53, 7, tzinfo=UTC_PLUS_2),
hash="3srzo5q",
subject=Subject(text="#3srzo5q", tag=Link(text="3srzo5q", target=None, tag=True)),
mentions=[],
links=[],
tags=[],
elems=["Now ",
Code(Code.INLINE, "some inline code"),
" and a ",
Code(Code.BLOCK, "whole code block"),
" even ",
Code(Code.BLOCK, "with\u2028LINE SEPARATOR")])]), twtfile)
if __name__ == "__main__":
unittest.main()
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment