twtxtmanager.py 9.62 KB
Newer Older
1
2
3
4
5
"""
Manage twts.
"""

import collections
Lysander Trischler's avatar
Lysander Trischler committed
6
7
8
import configparser
import os
import os.path
9
10
import shelve
import twtxt.models
11
import twtxt.parser
12
13
import twtxthash
import twtxtparser
Lysander Trischler's avatar
Lysander Trischler committed
14
import xdg
15

16

17
18
class TwtxtManager:

Lysander Trischler's avatar
Lysander Trischler committed
19
20
    def __init__(self, config):
        self.config = config
21
        self._cache = None
Lysander Trischler's avatar
Lysander Trischler committed
22
        self._data = None
23
24
25
26
27
28
        self.conversation_tree = []
        self.missing_conversation_root_count = 0
        self.twts_count = 0
        self.unread_twts_count = 0
        self.read_twts_count = 0

Lysander Trischler's avatar
Lysander Trischler committed
29
30
31
        self.show_read_conversations = False
        self.show_unread_missing_conversation_roots = False

32
33
34
35
36
37
38
39
40
41
42
43
44
    def load_twts(self):
        all_tweets = []

        # map a subject string to a list of tweets which replied to a hash by subject
        replies_by_subject = collections.defaultdict(list)

        # map a hash to a tweet
        known_tweet_hashes = set()

        self.max_author_nick_width = 0
        self.read_twts_count = 0
        self.unread_twts_count = 0

Lysander Trischler's avatar
Lysander Trischler committed
45
        MAX_TWEETS_LIMIT = 2000
Lysander Trischler's avatar
Lysander Trischler committed
46
        self._cache = shelve.open(self.config.cachefile)
47
        self.own_source = twtxt.models.Source(nick=self.config.nick, url=self.config.twturl)
Lysander Trischler's avatar
Lysander Trischler committed
48
        with open(self.config.twtfile, 'r', encoding='utf-8') as fd:
49
50
            own_twts = twtxt.parser.parse_tweets(fd.readlines(), self.own_source)
        self._cache[self.own_source.url] = {'tweets': own_twts}
Lysander Trischler's avatar
Lysander Trischler committed
51
52
        os.makedirs(os.path.dirname(self.config.datafile), exist_ok=True)
        self._data = shelve.open(self.config.datafile)
53
54
55
56
57
58
59
60
61
        for url, feed in self._cache.items():
            if url == "last_update":
                continue
            tweets = feed['tweets'][-MAX_TWEETS_LIMIT:]
            if not tweets:
                continue
            self.max_author_nick_width = max(self.max_author_nick_width, len(tweets[0].source.nick))
            all_tweets.extend(tweets)
            for twt in tweets:
62
                self.enhance_twt(twt)
63
64
65
                replies_by_subject[twt.subject].append(twt)
                known_tweet_hashes.add(twt.hash)
                known_tweet_hashes.add(twt.old_hash)
Lysander Trischler's avatar
Lysander Trischler committed
66
                entry = self._data.get(twt.hash)
67
68
                twt.read = bool(entry and entry.get("read", False))
                if not twt.read:
Lysander Trischler's avatar
Lysander Trischler committed
69
                    entry = self._data.get(twt.old_hash)
70
71
72
73
74
75
76
77
78
79
80
81
82
83
                    twt.read = bool(entry and entry.get("read", False))
                if twt.read:
                    self.read_twts_count += 1
                else:
                    self.unread_twts_count += 1

        for tweets in replies_by_subject.values():
            tweets.sort()
        all_tweets.sort()
        #all_tweets = all_tweets[-MAX_TWEETS_LIMIT:]

        # TODO Add cyclic dependency check!!

        # list of all top level tweets
Lysander Trischler's avatar
Lysander Trischler committed
84
        conversation_tree = replies_by_subject.pop(None, [])
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110

        # build conversation trees
        for twt in all_tweets:
            twt.replies.extend(replies_by_subject.get(twt.hash, []))
            twt.replies.extend(replies_by_subject.get(twt.old_hash, []))

        self.missing_conversation_root_count = 0

        # ensure that every conversation has at least a fake root tweet and thus is reachable
        for hash, replies in replies_by_subject.items():
            if hash not in known_tweet_hashes:
                if not replies:
                    print("WARNING: referenced tweet '%s' has no replies, WTF?!" % hash)
                    created_at = datetime.datetime.now()
                else:
                    created_at = replies[0].created_at

                twt = twtxt.models.Tweet(created_at=created_at,
                                         text=" ", # constructor raises on empty string, will be reset down below
                                         source=twtxt.models.Source(nick="UNKNOWN", url=None))
                twt.text = ""
                twt.hash = hash
                twt.old_hash = "?"
                twt.subject = None
                twt.tokens = []
                twt.replies = replies
Lysander Trischler's avatar
Lysander Trischler committed
111
                entry = self._data.get(hash)
112
113
                twt.read = entry and entry.get("read", False)
                twt.missing = True
Lysander Trischler's avatar
Lysander Trischler committed
114
                conversation_tree.append(twt)
115
                self.missing_conversation_root_count += 1
Lysander Trischler's avatar
Lysander Trischler committed
116
        conversation_tree.sort()
117
118
        self.twts_count = len(all_tweets)

Lysander Trischler's avatar
Lysander Trischler committed
119
120
121
122
123
124
125
126
127
128
129
130
131
        if self.show_read_conversations:
            self.conversation_tree = conversation_tree
        else:
            self.conversation_tree = []
            for twt in conversation_tree:
                if self.show_unread_missing_conversation_roots:
                    consider_twt_read = twt.read
                else:
                    consider_twt_read = (hasattr(twt, "missing") and twt.missing) or twt.read

                if not consider_twt_read or self._has_unread_replies(twt):
                    self.conversation_tree.append(twt)

132

133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
    def enhance_twt(self, twt):
        """
        Parse the text of the given twt, store all its enhanced information
        (such as hashes, tokens, subject, replies) in the twt and return it
        for further processing.
        """

        twt.hash = twtxthash.create_hash(twt)
        twt.old_hash = twtxthash.create_old_hash(twt)
        if "\t" in twt.text:
            twt.text = twt.text.replace("\t", " ")
        twt.tokens = list(twtxtparser.parse_twt_text(twt.text))
        twt.subject = next((token.hash for token in twt.tokens
                                       if isinstance(token, twtxtparser.SubjectHash)), None)
        twt.replies = []
        twt.read = False
        return twt


152
153
154
155
156
157
    def toggle_read(self, twt):
        """
        Toggle the read status of the given twt and sync it to disk.
        """

        twt.read = not twt.read
Lysander Trischler's avatar
Lysander Trischler committed
158
        entry = self._data.get(twt.hash, {})
159
        entry["read"] = twt.read
Lysander Trischler's avatar
Lysander Trischler committed
160
161
        self._data[twt.hash] = entry
        self._data.sync()
162
163
164
165
166
167
168
169
170
171
172
173
        self.read_twts_count += 1 if twt.read else -1
        self.unread_twts_count -= 1 if twt.read else -1


    def resolve_nick_by_url_from_cache(self, url):
        """
        Resolve the given source URL to a nick using the cache.

        TODO: We could also use our config instead! This would even allow us to
        resolve our own nick which might be different from the twt's one.
        """

Lysander Trischler's avatar
Lysander Trischler committed
174
        feed = self._cache.get(url)
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
        if feed:
            for twt in feed['tweets']:
                return twt.source.nick
        return None


    def is_following(self, nick, url):
        if url:
            return url in self._cache
        if nick:
            for feed in self._cache.values():
                if not isinstance(feed, dict):
                    continue
                tweets = feed['tweets']
                if tweets and tweets[0].source.nick == nick:
                    return True
        return False


Lysander Trischler's avatar
Lysander Trischler committed
194
195
196
197
198
199
200
201
202
203
204
205
206
    def _has_unread_replies(self, twt):
        """
        Determine whether there are any unread replies in the given
        conversation without taking the read status of the given twt itself
        into account.
        """

        for reply in twt.replies:
            if not reply.read or self._has_unread_replies(reply):
                return True
        return False


Lysander Trischler's avatar
Lysander Trischler committed
207
208
209
210
211
212
    def publish_twt(self, created_at, text):
        """
        Publish the given text at the specified timestamp by writing it to the
        local twtxt.txt file.
        """

Lysander Trischler's avatar
Lysander Trischler committed
213
        with open(self.config.twtfile, 'a', encoding='utf-8') as fd:
Lysander Trischler's avatar
Lysander Trischler committed
214
215
216
            fd.write("%s\t%s\n" % (created_at.isoformat(), text))


Lysander Trischler's avatar
Lysander Trischler committed
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
class Config:
    _CONFIG_DIR = os.path.join(xdg.xdg_config_home(), 'twtxt')
    _CONFIG_FILE = os.path.join(_CONFIG_DIR, 'config')
    _CACHE_DIR = os.path.join(xdg.xdg_cache_home(), 'twtxt')

    # The original twtxt implementation actually places its 'cache' file in the
    # config directory, which is wrong. The XDG Base Directory Specification
    # defines a dedicated cache directory. To be backwards-compatible, we cannot
    # fix this, though. :-(
    _CACHE_FILE = os.path.join(_CONFIG_DIR, 'cache')

    _DATA_DIR = os.path.join(xdg.xdg_data_home(), 'twtxt')
    _DATA_FILE = os.path.join(_DATA_DIR, 'data')

    def __init__(self, cfg):
        self._cfg = cfg

    @classmethod
    def load_file(cls):
        if not os.path.exists(Config._CONFIG_FILE):
            raise ValueError("Config file '%s' does not exist." % Config._CONFIG_FILE)
        cfg = configparser.ConfigParser()
        cfg.read(Config._CONFIG_FILE)
        return cls(cfg)

    @property
    def nick(self):
        return self._cfg.get("twtxt", "nick", fallback=os.environ.get("USER", "").lower())

    @property
    def twtfile(self):
        return self._make_abs(Config._CONFIG_DIR,
                self._cfg.get("twtxt", "twtfile", fallback="twtxt.txt"))

    @property
    def twturl(self):
        return self._cfg.get("twtxt", "twturl", fallback=None)

    @property
    def cachefile(self):
        return self._make_abs(Config._CONFIG_DIR,
                self._cfg.get("twtxt", "cachefile", fallback="cache"))

    @property
    def datafile(self):
        return self._make_abs(Config._CONFIG_DIR,
                self._cfg.get("twtxt", "datafile", fallback=Config._DATA_FILE))

    def _make_abs(self, base_path, path):
        """
        Expand user homes and environment variables in the given `path` and
        make it absolute with the given `base_path` in case it is relative.
        """
        path = os.path.expanduser(os.path.expandvars(path))
        if os.path.isabs(path):
            return path
        return os.path.join(base_path, path)
274
275
276
277
278
279
280
281
282
283

    def colors(self, name):
        return self._cfg.get("colors", name, fallback=None)

    @property
    def nick_colors(self):
        if self._cfg.has_section("nick-colors"):
            return self._cfg.items("nick-colors")
        return []