From a5c3105a7af60a413f65b526e2045c8ccc5d7264 Mon Sep 17 00:00:00 2001
From: sweing <sjweingaertner@gmail.com>
Date: Tue, 9 Jan 2024 18:51:15 +0100
Subject: [PATCH 01/12] Remove Twitter, add Mastodon

---
 api/__init__.py                     |   4 +-
 api/modules/{tweets.py => toots.py} |  38 ++---
 config.template.json                |  10 ++
 downloadFromApi.py                  |  43 ++++++
 getOldToots.py                      |  61 ++++++++
 lib/Toot.py                         | 152 +++++++++++++++++++
 lib/TootForest.py                   | 218 ++++++++++++++++++++++++++++
 lib/Tweet.py                        | 146 -------------------
 lib/TweetForest.py                  | 108 --------------
 lib/mastodon_auth.py                |  15 ++
 lib/mastodon_base.py                |  55 +++++++
 lib/shared.py                       |   6 +-
 lib/{tweets_aux.py => toots_aux.py} |  15 +-
 lib/tweets_base.py                  |  43 ------
 lib/twitter_auth.py                 |  18 ---
 runInit.py                          |   3 +-
 runListener.py                      | 100 +++++++++----
 17 files changed, 660 insertions(+), 375 deletions(-)
 rename api/modules/{tweets.py => toots.py} (69%)
 create mode 100644 downloadFromApi.py
 create mode 100644 getOldToots.py
 create mode 100644 lib/Toot.py
 create mode 100644 lib/TootForest.py
 delete mode 100644 lib/Tweet.py
 delete mode 100644 lib/TweetForest.py
 create mode 100644 lib/mastodon_auth.py
 create mode 100644 lib/mastodon_base.py
 rename lib/{tweets_aux.py => toots_aux.py} (70%)
 delete mode 100644 lib/tweets_base.py
 delete mode 100644 lib/twitter_auth.py

diff --git a/api/__init__.py b/api/__init__.py
index e1b8ada..255f864 100644
--- a/api/__init__.py
+++ b/api/__init__.py
@@ -1,6 +1,6 @@
 from flask import Flask, current_app, render_template, request, session
 from flask_cors import CORS
-from .modules import tweets, log
+from .modules import toots, log
 from .modules.auth import login_exempt
 from lib.shared import authToken, secretKey
 from datetime import timedelta
@@ -12,7 +12,7 @@ def create_app(test_config = None):
 
     cors = CORS(app)
 
-    app.register_blueprint(tweets.bp)
+    app.register_blueprint(toots.bp)
     app.register_blueprint(log.bp)
 
     @app.route('/')
diff --git a/api/modules/tweets.py b/api/modules/toots.py
similarity index 69%
rename from api/modules/tweets.py
rename to api/modules/toots.py
index cddbbfb..920d4e3 100644
--- a/api/modules/tweets.py
+++ b/api/modules/toots.py
@@ -1,13 +1,14 @@
 from datetime import datetime
 import logging
+import json
 
 from flask import Blueprint, jsonify, Response, render_template, abort
 from flask_cors import cross_origin
 from .auth import login_exempt
 
-from lib.tweets_base import readTweetsApiJson
-from lib.Tweet import Tweet
-from lib.TweetForest import TweetForest
+from lib.mastodon_base import readTootsApiJson
+from lib.Toot import Toot
+from lib.TootForest import TootForest
 
 bp = Blueprint('tweets', __name__, url_prefix='/tweets')
 
@@ -15,37 +16,36 @@
 @cross_origin()
 @login_exempt
 def root():
-    return jsonify({
+    return Response(json.dumps({
         'date': int(datetime.timestamp(datetime.now())),
-        'tweets': readTweetsApiJson()
-    })
+        'tweets': readTootsApiJson()
+    }), mimetype='application/json')
 
 
 @bp.route('/<int:id>')
 def tweet(id):
-    tweet = Tweet.loadFromFile(id)
+    tweet = Toot.loadFromFile(id)
     return jsonify(tweet.data)
 
 @bp.route('/forest')
 def forest_show():
-    return render_template('forest.html.j2', forest = TweetForest.fromFolder())
+    return render_template('forest.html.j2', forest = TootForest.fromFolder())
     #return Response(str(forest), mimetype='text/plain')
 
 @bp.route('/forest/renew')
 def forest_create():
     logging.warning('Manual invocation of creating forest!')
-    # renew forest
-    forest = TweetForest.fromFolder()
+    forest = TootForest.fromFolder()
     forest.saveApiJson()
-    return jsonify(readTweetsApiJson())
+    return Response(json.dumps(readTootsApiJson()), mimetype='application/json')
 
 @bp.route('/add/<int:id>')
 def add(id):
     logging.warning('Manual invocation of adding tweet (id: {})!'.format(id))
-    tweet = Tweet.loadFromTwitter(id)
+    tweet = Toot.loadFromMastodon(id)
     tweet.save()
     # renew forest
-    forest = TweetForest.fromFolder()
+    forest = TootForest.fromFolder()
     forest.saveApiJson()
     return jsonify({
         'message': 'added',
@@ -58,10 +58,10 @@ def add(id):
 @bp.route('/delete/<int:id>')
 def delete(id):
     logging.warning('Manual invocation of deleting tweet (id: {})!'.format(id))
-    tweet = Tweet.loadFromFile(id)
+    tweet = Toot.loadFromFile(id)
     tweet.delete()
     # renew forest
-    forest = TweetForest.fromFolder()
+    forest = TootForest.fromFolder()
     forest.saveApiJson()
     return jsonify({
         'message': 'deleted',
@@ -73,19 +73,19 @@ def delete(id):
 
 @bp.route('/all')
 def all():
-    tweets = Tweet.loadFromFolder()
+    tweets = Toot.loadFromFolder()
     tweets.sort(key = lambda x: x.getDateTime(), reverse = True)
-    # [Tweet(i) for i in tweets]
+    # [Toot(i) for i in tweets]
     return render_template('all.html.j2', tweets = tweets)
 
 @bp.route('/stories')
 def info():
-    tweets = readTweetsApiJson()
+    tweets = readTootsApiJson()
     stories = {};
     for id, info in tweets.items():
         if 'story' in info:
             storyId = info['story']
             if storyId not in stories:
                 stories[storyId] = []
-            stories[storyId].append(Tweet.loadFromFile(id))
+            stories[storyId].append(Toot.loadFromFile(id))
     return render_template('stories.html.j2', stories = stories)
diff --git a/config.template.json b/config.template.json
index f54f6a2..6cf829d 100644
--- a/config.template.json
+++ b/config.template.json
@@ -1,6 +1,16 @@
 {
     "authToken": "xxx",
     "secretKey": "xxx",
+    "mastodon": {
+        "instance_url": "https://mastodon.instance",
+        "client": {
+            "key": "xxx",
+            "secret": "xxx"
+        },
+        "access": {
+            "token": "xxx"
+        }
+    },
     "twitter": {
         "api": {
             "key": "xxx",
diff --git a/downloadFromApi.py b/downloadFromApi.py
new file mode 100644
index 0000000..c4e7585
--- /dev/null
+++ b/downloadFromApi.py
@@ -0,0 +1,43 @@
+import os
+import requests
+import json
+from lib.shared import authToken
+
+BASE_URL = "https://dev.decarbnow.space"
+AUTH_URL = BASE_URL + "/authenticate/"
+DOWNLOAD_URL = BASE_URL + "/tweets/"
+
+# Create a session to maintain authentication
+session = requests.Session()
+
+# Authenticate using the provided token
+auth_response = session.get(AUTH_URL + authToken)
+if auth_response.status_code != 200:
+    print(f"Authentication failed with status code: {auth_response.status_code}")
+    exit()
+
+# Path to the data folder
+data_folder = "data"
+os.makedirs(data_folder, exist_ok=True)
+
+# Load tweet IDs from fromAPI.json
+with open(os.path.join(data_folder, "fromAPI.json"), "r") as json_file:
+    tweet_data = json.load(json_file)
+    tweet_ids = tweet_data.get("tweets", {}).keys()
+
+# Download files for each tweet ID
+for tweet_id in tweet_ids:
+    download_url = DOWNLOAD_URL + tweet_id
+
+    # Send request to download the file
+    file_response = session.get(download_url)
+    if file_response.status_code == 200:
+        # Save the file in the data folder
+        with open(os.path.join(data_folder, "live", f"{tweet_id}.json"), "wb") as file:
+            file.write(file_response.content)
+        print(f"Downloaded file for tweet ID {tweet_id}")
+    else:
+        print(f"Failed to download file for tweet ID {tweet_id} with status code: {file_response.status_code}")
+
+# Close the session
+session.close()
diff --git a/getOldToots.py b/getOldToots.py
new file mode 100644
index 0000000..728063f
--- /dev/null
+++ b/getOldToots.py
@@ -0,0 +1,61 @@
+from mastodon import Mastodon
+import json
+from datetime import datetime
+import os
+import shutil
+from lib.mastodon_auth import get_auth_user
+
+from lib.shared import base_folder, tootsFetchSettings, toots_folder
+
+# Define the URL you want to check for in the messages
+keyword = tootsFetchSettings['listen'] + '/@'
+
+# Define the path for the data folder and archive subfolder
+data_folder = os.path.join(toots_folder, tootsFetchSettings['folder'])
+
+archive_folder = os.path.join(data_folder, "archive")
+
+# Ensure the data and archive folders exist
+os.makedirs(data_folder, exist_ok=True)
+os.makedirs(archive_folder, exist_ok=True)
+
+# Get a list of existing message IDs in the data folder
+existing_ids = [filename.split(".")[0] for filename in os.listdir(data_folder) if filename.endswith(".json")]
+
+# Create an instance of Mastodon
+mastodon = get_auth_user()
+
+# Custom JSON encoder to handle datetime objects
+class DateTimeEncoder(json.JSONEncoder):
+    def default(self, obj):
+        if isinstance(obj, datetime):
+            return obj.strftime('%Y-%m-%d %H:%M:%S')
+        return super().default(obj)
+
+# Search for messages containing the target URL
+search_results = mastodon.search(keyword)
+
+# Check each search result for the target URL
+for status in search_results['statuses']:
+    # Generate a unique filename using ID
+    message_id = str(status['id'])
+    filename = f'{message_id}.json'
+    filepath = os.path.join(data_folder, filename)
+
+    # Save the relevant information to a JSON file
+    with open(filepath, 'w') as output_file:
+        json.dump(status, output_file, cls=DateTimeEncoder)
+
+    print(f'Message with ID {message_id} saved to {filepath}')
+
+    # Remove the ID from the existing IDs list if found
+    if message_id in existing_ids:
+        existing_ids.remove(message_id)
+
+# Move any remaining files in the data folder to the archive folder
+for message_id in existing_ids:
+    filename = f'{message_id}.json'
+    filepath = os.path.join(data_folder, filename)
+    archive_filepath = os.path.join(archive_folder, filename)
+    shutil.move(filepath, archive_filepath)
+    print(f'Message with ID {message_id} moved to archive folder')
diff --git a/lib/Toot.py b/lib/Toot.py
new file mode 100644
index 0000000..987d7d6
--- /dev/null
+++ b/lib/Toot.py
@@ -0,0 +1,152 @@
+from .shared import toots_folder, tootsFetchSettings
+#from .mastodon_auth import get_api_app
+from .mastodon_base import *
+
+from datetime import datetime
+from html.parser import HTMLParser
+
+class URLExtractor(HTMLParser):
+    def __init__(self):
+        super().__init__()
+        self.urls = []
+
+    def handle_starttag(self, tag, attrs):
+        if tag == 'a':
+            self.urls.extend(value for name, value in attrs if name == 'href')
+
+class Toot(object):
+    data = None
+
+    @staticmethod
+    def loadFromFile(id, folder = tootsFetchSettings['folder']):
+        return Toot(readTootFromFolder(id, folder))
+
+    @staticmethod
+    def loadFromFolder(folder = tootsFetchSettings['folder']):
+        return [Toot(j) for j in readTootsFromFolder(folder)]
+
+    @staticmethod
+    def loadFromMastodon(id):
+        return Toot(readTootFromMastodon(id))
+
+
+    def __init__(self, data):
+        self.data = data
+        self.children = []
+
+    def save(self, folder = tootsFetchSettings['folder']):
+        writeTootToFolder(self.data, folder)
+
+    def delete(self, folder = tootsFetchSettings['folder']):
+        deleteTootFromFolder(self.getId(), folder)
+
+    def addChild(self, toot):
+        self.children.append(toot)
+
+    def getChildren(self):
+        return self.children
+
+    def hasChildren(self):
+        return len(self.children) > 0
+
+    def asApiDict(self, add = {}):
+        return {
+            'url': self.getPathOfLinksTo()[0],
+            'hashtags': self.getHashtags(),
+            'timestamp': str(self.getDateTime()),
+            'content': self.getText(),
+            'account': self.getUserName(),
+            'display_name': self.getUserScreenName(),
+            'avatar': self.getUserImageHttps(),
+            'media': self.getMedia(),
+            **add
+        }
+
+    def getData(self):
+        return self.data
+
+    def toStringList(self):
+        return [
+            "{}, {}, {}".format(self.getUserName(), self.getDateTime(), self.getId()),
+            "# {}".format(', '.join(self.getHashtags())),
+            "▶ {}".format(', '.join(self.getLinks())),
+            "¶ {}".format(self.getText())
+        ]
+
+    def __str__(self):
+        return '\n'.join(self.toStringList())
+
+    def getId(self):
+        return self.data['id']
+
+    def getUserId(self):
+        return self.data['account']['id']
+
+    def getUserName(self):
+        return self.data['account']['username']
+
+    def getUserScreenName(self):
+        return self.data['account']['display_name']
+    
+    def getUserImageHttps(self):
+        return self.data['account']['avatar_static']
+
+    def getDateTime(self):
+        return datetime.strptime(self.data['created_at'], '%Y-%m-%d %H:%M:%S')
+
+    def getMedia(self):
+        if 'media_attachments' in self.data:
+            return [item['preview_url'] for item in self.data['media_attachments'] if 'preview_url' in item]
+        return []
+
+    def getText(self):
+        if 'content' in self.data:
+            return self.data['content']
+        elif 'extended_toot' in self.data:
+            if 'full_text' in self.data['extended_toot']:
+                return self.data['extended_toot']['full_text']
+        elif 'retooted_status' in self.data and 'extended_toot' in self.data['retooted_status'] and self.data['retooted_status']['extended_toot']['full_text']:
+            return self.data['retooted_status']['extended_toot']['full_text']
+        elif 'text' in self.data:
+            return self.data['text']
+
+    def isReply(self):
+        return 'in_reply_to_id' in self.data
+
+    def getReplyToId(self):
+        return self.data['in_reply_to_id']
+
+    def getReplyToUserId(self):
+        return self.data['in_reply_to_account_id']
+
+    def isSelfReply(self):
+        return self.isReply() and self.getUserId() == self.getReplyToUserId()
+
+    def getHashtags(self):
+        hashtags = []
+        if self.data['tags']:
+            hashtags.extend([h['name'] for h in self.data['tags']])
+        if 'extended_toot' in self.data and self.data['extended_toot']['entities']['hashtags']:
+            hashtags.extend([h['text'] for h in self.data['extended_toot']['entities']['hashtags']])
+        return hashtags
+
+    def hasHashtag(self, hashtag):
+        return hashtag in self.getHashtags()
+
+    def getLinks(self):
+        urls = []
+        if 'content' in self.data:
+            extractor = URLExtractor()
+            extractor.feed(self.data['content'])
+            urls.extend(extractor.urls)
+        return urls
+
+    def getPathOfLinksTo(self, to = tootsFetchSettings['link']):
+        return [u.split(to, 1)[1] for u in self.getLinks() if to in u]
+
+    def hasLinkTo(self, to = f"{tootsFetchSettings['link']}/@"):
+        return len(self.getPathOfLinksTo(to)) > 0
+
+    def getUrl(self):
+        if 'url' in self.data:
+            return self.data['url']
diff --git a/lib/TootForest.py b/lib/TootForest.py
new file mode 100644
index 0000000..b70902a
--- /dev/null
+++ b/lib/TootForest.py
@@ -0,0 +1,218 @@
+from .shared import tootsFetchSettings
+from .mastodon_base import readTootsFromFolder, writeTootsApiJson
+from .Toot import Toot
+from collections import OrderedDict
+
+class TootForest(object):
+    trunks = []
+
+    @staticmethod
+    def fromFolder(folder = tootsFetchSettings['folder']):
+        toots = Toot.loadFromFolder(folder)
+        toots = filter(lambda t: t.hasLinkTo(), toots)
+        # toots = filter(lambda t: not t.hasHashtag('private'), toots)
+        return TootForest(toots)
+
+    def __init__(self, toots):
+        self.trunks = []
+        tootsDict = {t.getId(): t for t in toots}
+
+        for toot in tootsDict.values():
+            if toot.isSelfReply():
+                if toot.getReplyToId() in tootsDict:
+                    tootsDict[toot.getReplyToId()].addChild(toot)
+                    continue
+            self.trunks.append(toot)
+
+    def __str__(self):
+        def printLeafs(toots, i = 0):
+            a = []
+            for toot in toots:
+                a.extend(["{}{}".format('  ' * i, l) for l in toot.toStringList()])
+                a.append('')
+                if toot.hasChildren():
+                    a.extend(printLeafs(toot.getChildren(), i + 1))
+            return a
+        return '\n'.join(printLeafs(self.trunks))
+
+    def asApiJson(self):
+        apiJson = {};
+        for toot in self.trunks:
+            if toot.hasChildren():
+                story = toot.getId()
+                apiJson[story] = toot.asApiDict({'story': str(story)})
+                while toot.hasChildren():
+                    # ONLY FIRST CHILD
+                    toot = toot.getChildren()[0]
+                    apiJson[toot.getId()] = toot.asApiDict({'story': str(story)})
+            else:
+                apiJson[toot.getId()] = toot.asApiDict()
+
+        return apiJson
+    
+    def rename_ids(self, data):
+        # Create a dictionary to store the count of each story ID
+        story_counts = {}
+
+        # Create a dictionary to store the length of each story
+        story_lengths = {}
+
+        # Calculate the length of each story before the loop
+        for tweet_id, values in data.items():
+            # Check if "story" key is present, otherwise use tweet_id as the story ID
+            story_id = values.get("story", tweet_id)
+
+            # Update the length of the story
+            story_lengths[story_id] = story_lengths.get(story_id, 0) + 1
+
+        # Iterate through the original dictionary and create a new one with modified keys
+        result = {}
+        for tweet_id, values in data.items():
+            # Check if "story" key is present, otherwise use tweet_id as the story ID
+            story_id = values.get("story", tweet_id)
+
+            # Calculate the count as the difference between the length of the story and the current length
+            count = story_lengths[story_id]
+            story_lengths[story_id] = story_lengths[story_id] - 1
+
+            # Create the new key in the format "story_id.count_tweet_id"
+            new_key = f"{story_id}.{count}_{tweet_id}"
+            result[new_key] = values
+
+        return result
+    
+    def revert_ids(self, data):
+        # Iterate through the dictionary and create a new one with reverted keys
+        result = {}
+        for key, values in data.items():
+            # Split the key into story_id.count_tweet_id
+            parts = key.split('_')
+
+            # Extract the tweet_id from the second part
+            tweet_id = parts[1]
+
+            # Create the new key with the original tweet_id
+            new_key = tweet_id
+            result[new_key] = values
+
+        return result
+
+
+    def saveApiJson(self, file = tootsFetchSettings['file']):
+        data = self.asApiJson()
+        renamed_data = self.rename_ids(data)
+        # Sort toots based on id in descending order
+        sorted_toots = OrderedDict(sorted(renamed_data.items(), key=lambda x: x[0], reverse=True))
+        final_toots = self.revert_ids(sorted_toots)
+
+        writeTootsApiJson(final_toots, file)
+
+
+# from .shared import tweetsFetchSettings
+# from .mastodon_base import readTweetsFromFolder, writeTweetsApiJson
+# from .Tweet import Tweet
+# from collections import OrderedDict
+
+# class TweetForest(object):
+#     trunks = []
+
+#     @staticmethod
+#     def fromFolder(folder = tweetsFetchSettings['folder']):
+#         tweets = Tweet.loadFromFolder(folder)
+#         tweets = filter(lambda t: t.hasLinkTo(), tweets)
+#         # tweets = filter(lambda t: not t.hasHashtag('private'), tweets)
+#         return TweetForest(tweets)
+
+#     def __init__(self, tweets):
+#         self.trunks = []
+#         tweetsDict = {t.getId(): t for t in tweets}
+
+#         for tweet in tweetsDict.values():
+#             if tweet.isSelfReply():
+#                 if tweet.getReplyToId() in tweetsDict:
+#                     tweetsDict[tweet.getReplyToId()].addChild(tweet)
+#                     continue
+
+#             self.trunks.append(tweet)
+
+#     def __str__(self):
+#         def printLeafs(tweets, i = 0):
+#             a = []
+#             for tweet in tweets:
+#                 a.extend(["{}{}".format('  ' * i, l) for l in tweet.toStringList()])
+#                 a.append('')
+#                 if tweet.hasChildren():
+#                     a.extend(printLeafs(tweet.getChildren(), i + 1))
+#             return a
+#         return '\n'.join(printLeafs(self.trunks))
+
+#     def asApiJson(self):
+#         apiJson = {};
+#         for tweet in self.trunks:
+#             if tweet.hasChildren():
+#                 story = tweet.getId()
+#                 apiJson[story] = tweet.asApiDict({'story': story})
+#                 while tweet.hasChildren():
+#                     # ONLY FIRST CHILD
+#                     tweet = tweet.getChildren()[0]
+#                     apiJson[tweet.getId()] = tweet.asApiDict({'story': story})
+#             else:
+#                 apiJson[tweet.getId()] = tweet.asApiDict()
+
+#         return apiJson
+    
+#     def rename_ids(self, data):
+#         # Create a dictionary to store the count of each story ID
+#         story_counts = {}
+
+#         # Create a dictionary to store the length of each story
+#         story_lengths = {}
+
+#         # Calculate the length of each story before the loop
+#         for tweet_id, values in data.items():
+#             # Check if "story" key is present, otherwise use tweet_id as the story ID
+#             story_id = values.get("story", tweet_id)
+
+#             # Update the length of the story
+#             story_lengths[story_id] = story_lengths.get(story_id, 0) + 1
+
+#         # Iterate through the original dictionary and create a new one with modified keys
+#         result = {}
+#         for tweet_id, values in data.items():
+#             # Check if "story" key is present, otherwise use tweet_id as the story ID
+#             story_id = values.get("story", tweet_id)
+
+#             # Calculate the count as the difference between the length of the story and the current length
+#             count = story_lengths[story_id]
+#             story_lengths[story_id] = story_lengths[story_id] - 1
+
+#             # Create the new key in the format "story_id.count_tweet_id"
+#             new_key = f"{story_id}.{count}_{tweet_id}"
+#             result[new_key] = values
+
+#         return result
+    
+#     def revert_ids(self, data):
+#         # Iterate through the dictionary and create a new one with reverted keys
+#         result = {}
+#         for key, values in data.items():
+#             # Split the key into story_id.count_tweet_id
+#             parts = key.split('_')
+
+#             # Extract the tweet_id from the second part
+#             tweet_id = parts[1]
+
+#             # Create the new key with the original tweet_id
+#             new_key = tweet_id
+#             result[new_key] = values
+
+#         return result
+
+#     def saveApiJson(self, file = tweetsFetchSettings['file']):
+#         data = self.asApiJson()
+#         renamed_data = self.rename_ids(data)
+#         # Sort tweets based on id in descending order
+#         sorted_tweets = OrderedDict(sorted(renamed_data.items(), key=lambda x: x[0], reverse=True))
+#         final_tweets = self.revert_ids(sorted_tweets)
+
+#         writeTweetsApiJson(final_tweets, file)
\ No newline at end of file
diff --git a/lib/Tweet.py b/lib/Tweet.py
deleted file mode 100644
index 60e550c..0000000
--- a/lib/Tweet.py
+++ /dev/null
@@ -1,146 +0,0 @@
-from .shared import tweets_folder, tweetsFetchSettings
-from .twitter_auth import get_api_app
-from .tweets_base import *
-
-from datetime import datetime
-
-class Tweet(object):
-    data = None
-
-    @staticmethod
-    def loadFromFile(id, folder = tweetsFetchSettings['folder']):
-        return Tweet(readTweetFromFolder(id, folder))
-
-    @staticmethod
-    def loadFromFolder(folder = tweetsFetchSettings['folder']):
-        return [Tweet(j) for j in readTweetsFromFolder(folder)]
-
-    @staticmethod
-    def loadFromTwitter(id):
-        return Tweet(readTweetFromTwitter(id))
-
-
-    def __init__(self, data):
-        self.data = data
-        self.children = []
-
-    def save(self, folder = tweetsFetchSettings['folder']):
-        writeTweetToFolder(self.data, folder)
-
-    def delete(self, folder = tweetsFetchSettings['folder']):
-        deleteTweetFromFolder(self.getId(), folder)
-
-    def addChild(self, tweet):
-        self.children.append(tweet)
-
-    def getChildren(self):
-        return self.children
-
-    def hasChildren(self):
-        return len(self.children) > 0
-
-    def asApiDict(self, add = {}):
-        return {
-            'url': self.getPathOfLinksTo()[0],
-            'hashtags': self.getHashtags(),
-            'timestamp': str(self.getDateTime()),
-            'content': self.getText(),
-            'account': self.getUserName(),
-            'display_name': self.getUserScreenName(),
-            'avatar': self.getUserImageHttps(),
-            'media': self.getMedia(),
-            **add
-        }
-
-    def getData(self):
-        return self.data
-
-    def toStringList(self):
-        return [
-            "{}, {}, {}".format(self.getUserName(), self.getDateTime(), self.getId()),
-            "# {}".format(', '.join(self.getHashtags())),
-            "▶ {}".format(', '.join(self.getLinks())),
-            "¶ {}".format(self.getText())
-        ]
-
-    def __str__(self):
-        return '\n'.join(self.toStringList())
-
-    def getId(self):
-        return self.data['id_str']
-
-    def getUserId(self):
-        return self.data['user']['id_str']
-
-    def getUserName(self):
-        return self.data['user']['name']
-
-    def getUserScreenName(self):
-        return self.data['user']['screen_name']
-    
-    def getUserImageHttps(self):
-        return self.data['user']['profile_image_url_https']
-
-    def getDateTime(self):
-        return datetime.strptime(self.data['created_at'], '%a %b %d %H:%M:%S +0000 %Y')
-
-    def getMedia(self):
-        if 'extended_tweet' in self.data and 'extended_entities' in self.data['extended_tweet'] and 'media' in self.data['extended_tweet']['extended_entities']:
-            return([item['media_url_https'] for item in self.data['extended_tweet']['extended_entities']['media']])
-        elif 'extended_entities' in self.data and 'media' in self.data['extended_entities']:
-            return([item['media_url_https'] for item in self.data['extended_entities']['media']])
-        elif 'quoted_status' in self.data and 'extended_entities' in self.data['quoted_status'] and 'media' in self.data['quoted_status']['extended_entities']:
-            return([item['media_url_https'] for item in self.data['quoted_status']['extended_entities']['media']])
-        elif 'quoted_status' in self.data and 'extended_tweet' in self.data['quoted_status'] and 'extended_entities' in self.data['quoted_status']['extended_tweet'] and 'media' in self.data['quoted_status']['extended_tweet']['extended_entities']:
-            return([item['media_url_https'] for item in self.data['quoted_status']['extended_tweet']['extended_entities']['media']])
-
-    def getText(self):
-        if 'full_text' in self.data:
-            return self.data['full_text']
-        elif 'extended_tweet' in self.data:
-            if 'full_text' in self.data['extended_tweet']:
-                return self.data['extended_tweet']['full_text']
-        elif 'retweeted_status' in self.data and 'extended_tweet' in self.data['retweeted_status'] and self.data['retweeted_status']['extended_tweet']['full_text']:
-            return self.data['retweeted_status']['extended_tweet']['full_text']
-        elif 'text' in self.data:
-            return self.data['text']
-
-    def isReply(self):
-        return 'in_reply_to_status_id_str' in self.data
-
-    def getReplyToId(self):
-        return self.data['in_reply_to_status_id_str']
-
-    def getReplyToUserId(self):
-        return self.data['in_reply_to_user_id_str']
-
-    def isSelfReply(self):
-        return self.isReply() and self.getUserId() == self.getReplyToUserId()
-
-    def getHashtags(self):
-        hashtags = []
-        if self.data['entities']['hashtags']:
-            hashtags.extend([h['text'] for h in self.data['entities']['hashtags']])
-        if 'extended_tweet' in self.data and self.data['extended_tweet']['entities']['hashtags']:
-            hashtags.extend([h['text'] for h in self.data['extended_tweet']['entities']['hashtags']])
-        return hashtags
-
-    def hasHashtag(self, hashtag):
-        return hashtag in self.getHashtags()
-
-    def getLinks(self):
-        urls = []
-        if self.data['entities']['urls']:
-            urls.extend([url['expanded_url'] for url in self.data['entities']['urls']])
-        if 'extended_tweet' in self.data and self.data['extended_tweet']['entities']['urls']:
-            urls.extend([url['expanded_url'] for url in self.data['extended_tweet']['entities']['urls']])
-        return urls
-
-    def getPathOfLinksTo(self, to = tweetsFetchSettings['link']):
-        return [u.split(to, 1)[1] for u in self.getLinks() if to in u]
-
-    def hasLinkTo(self, to = f"{tweetsFetchSettings['link']}/@"):
-        return len(self.getPathOfLinksTo(to)) > 0
-
-    def getUrl(self):
-        return 'https://twitter.com/{}/status/{}'.format(self.getUserScreenName(), self.getId())
diff --git a/lib/TweetForest.py b/lib/TweetForest.py
deleted file mode 100644
index 1b3e37a..0000000
--- a/lib/TweetForest.py
+++ /dev/null
@@ -1,108 +0,0 @@
-from .shared import tweetsFetchSettings
-from .tweets_base import readTweetsFromFolder, writeTweetsApiJson
-from .Tweet import Tweet
-from collections import OrderedDict
-
-class TweetForest(object):
-    trunks = []
-
-    @staticmethod
-    def fromFolder(folder = tweetsFetchSettings['folder']):
-        tweets = Tweet.loadFromFolder(folder)
-        tweets = filter(lambda t: t.hasLinkTo(), tweets)
-        # tweets = filter(lambda t: not t.hasHashtag('private'), tweets)
-        return TweetForest(tweets)
-
-    def __init__(self, tweets):
-        self.trunks = []
-        tweetsDict = {t.getId(): t for t in tweets}
-
-        for tweet in tweetsDict.values():
-            if tweet.isSelfReply():
-                if tweet.getReplyToId() in tweetsDict:
-                    tweetsDict[tweet.getReplyToId()].addChild(tweet)
-                    continue
-
-            self.trunks.append(tweet)
-
-    def __str__(self):
-        def printLeafs(tweets, i = 0):
-            a = []
-            for tweet in tweets:
-                a.extend(["{}{}".format('  ' * i, l) for l in tweet.toStringList()])
-                a.append('')
-                if tweet.hasChildren():
-                    a.extend(printLeafs(tweet.getChildren(), i + 1))
-            return a
-        return '\n'.join(printLeafs(self.trunks))
-
-    def asApiJson(self):
-        apiJson = {};
-        for tweet in self.trunks:
-            if tweet.hasChildren():
-                story = tweet.getId()
-                apiJson[story] = tweet.asApiDict({'story': story})
-                while tweet.hasChildren():
-                    # ONLY FIRST CHILD
-                    tweet = tweet.getChildren()[0]
-                    apiJson[tweet.getId()] = tweet.asApiDict({'story': story})
-            else:
-                apiJson[tweet.getId()] = tweet.asApiDict()
-
-        return apiJson
-    
-    def rename_ids(self, data):
-        # Create a dictionary to store the count of each story ID
-        story_counts = {}
-
-        # Create a dictionary to store the length of each story
-        story_lengths = {}
-
-        # Calculate the length of each story before the loop
-        for tweet_id, values in data.items():
-            # Check if "story" key is present, otherwise use tweet_id as the story ID
-            story_id = values.get("story", tweet_id)
-
-            # Update the length of the story
-            story_lengths[story_id] = story_lengths.get(story_id, 0) + 1
-
-        # Iterate through the original dictionary and create a new one with modified keys
-        result = {}
-        for tweet_id, values in data.items():
-            # Check if "story" key is present, otherwise use tweet_id as the story ID
-            story_id = values.get("story", tweet_id)
-
-            # Calculate the count as the difference between the length of the story and the current length
-            count = story_lengths[story_id]
-            story_lengths[story_id] = story_lengths[story_id] - 1
-
-            # Create the new key in the format "story_id.count_tweet_id"
-            new_key = f"{story_id}.{count}_{tweet_id}"
-            result[new_key] = values
-
-        return result
-    
-    def revert_ids(self, data):
-        # Iterate through the dictionary and create a new one with reverted keys
-        result = {}
-        for key, values in data.items():
-            # Split the key into story_id.count_tweet_id
-            parts = key.split('_')
-
-            # Extract the tweet_id from the second part
-            tweet_id = parts[1]
-
-            # Create the new key with the original tweet_id
-            new_key = tweet_id
-            result[new_key] = values
-
-        return result
-
-    def saveApiJson(self, file = tweetsFetchSettings['file']):
-        data = self.asApiJson()
-        renamed_data = self.rename_ids(data)
-        # Sort tweets based on id in descending order
-        sorted_tweets = OrderedDict(sorted(renamed_data.items(), key=lambda x: x[0], reverse=True))
-        final_tweets = self.revert_ids(sorted_tweets)
-
-        writeTweetsApiJson(final_tweets, file)
diff --git a/lib/mastodon_auth.py b/lib/mastodon_auth.py
new file mode 100644
index 0000000..db7e41f
--- /dev/null
+++ b/lib/mastodon_auth.py
@@ -0,0 +1,15 @@
+from mastodon import Mastodon
+from .shared import base_folder, mastodonAuth
+import json
+import os
+
+with open(os.path.join(base_folder, 'config.json'), 'r') as f:
+    config = json.load(f)
+
+def get_auth_user():
+    return Mastodon(
+        client_id = config['mastodon']['client']['key'],
+        client_secret = config['mastodon']['client']['secret'],
+        access_token = config['mastodon']['access']['token'],
+        api_base_url = config['mastodon']['instance_url']
+    )
diff --git a/lib/mastodon_base.py b/lib/mastodon_base.py
new file mode 100644
index 0000000..05aafd9
--- /dev/null
+++ b/lib/mastodon_base.py
@@ -0,0 +1,55 @@
+import os
+import logging
+import json
+from datetime import datetime
+
+from .shared import toots_folder, data_folder, tootsFetchSettings
+from .mastodon_auth import get_auth_user
+
+class DateTimeEncoder(json.JSONEncoder):
+    def default(self, obj):
+        if isinstance(obj, datetime):
+            return obj.strftime('%Y-%m-%d %H:%M:%S')
+        return super().default(obj)
+
+
+def readTootFromFolder(id, folder = tootsFetchSettings['folder']):
+    ff = os.path.join(toots_folder, folder)
+    with open(os.path.join(ff, '{}.json'.format(id)), 'r') as f:
+        return json.load(f)
+
+def writeTootToFolder(toot, folder = tootsFetchSettings['folder']):
+    ff = os.path.join(toots_folder, folder)
+    with open(os.path.join(ff, '{}.json'.format(toot['id'])), 'w') as f:
+        json.dump(toot, f, cls=DateTimeEncoder)
+
+def writeTootToArchive(toot, folder = tootsFetchSettings['folder']):
+    ff = os.path.join(toots_folder, folder, "archive")
+    with open(os.path.join(ff, '{}.json'.format(toot['id'])), 'w') as f:
+        json.dump(toot, f, cls=DateTimeEncoder)
+
+def deleteTootFromFolder(id, folder = tootsFetchSettings['folder']):
+    ff = os.path.join(toots_folder, folder)
+    os.remove(os.path.join(ff, '{}.json'.format(id)))
+
+def readTootsFromFolder(folder = tootsFetchSettings['folder']):
+    ff = os.path.join(toots_folder, folder)
+    toots = [readTootFromFolder(f[:-5], folder) for f in os.listdir(ff) if f[-5:] == '.json']
+    logging.info('Read {} toots from folder: \'{}\''.format(len(toots), folder))
+    return toots
+
+
+def readTootFromMastodon(id):
+    mastodon = get_auth_user()
+    logging.info('Reading toot info from Mastodon API: {}'.format(id))
+    return mastodon.status(id)
+
+def writeTootsApiJson(data, tootsApiFile = tootsFetchSettings['file']):
+    filePath = os.path.join(data_folder, tootsApiFile)
+    logging.info('Writing new toots API file: \'{}\''.format(tootsApiFile))
+    with open(filePath, 'w') as f:
+        json.dump(data, f)
+
+def readTootsApiJson(tootsApiFile = tootsFetchSettings['file']):
+    with open(os.path.join(data_folder, tootsApiFile), 'r') as f:
+        return json.load(f)
diff --git a/lib/shared.py b/lib/shared.py
index d02db0c..f26947a 100644
--- a/lib/shared.py
+++ b/lib/shared.py
@@ -5,13 +5,13 @@
 
 base_folder = os.path.join(lib_folder, '..')
 data_folder = os.path.join(base_folder, 'data')
-tweets_folder = os.path.join(data_folder, 'tweets')
+toots_folder = os.path.join(data_folder, 'toots')
 
 with open(os.path.join(base_folder, 'config.json'), 'r') as f:
     config = json.load(f)
 
-tweetsFetchSettings = config['tweetsFetchSettings']['list'][config['tweetsFetchSettings']['default']]
+tootsFetchSettings = config['tootsFetchSettings']['list'][config['tootsFetchSettings']['default']]
 
 authToken = config['authToken']
 secretKey = config['secretKey']
-twitterAuth = config['twitter']
+mastodonAuth = config['mastodon']
diff --git a/lib/tweets_aux.py b/lib/toots_aux.py
similarity index 70%
rename from lib/tweets_aux.py
rename to lib/toots_aux.py
index e128f13..dd4e27c 100644
--- a/lib/tweets_aux.py
+++ b/lib/toots_aux.py
@@ -1,12 +1,11 @@
-import json
 import os
 import logging
 import GetOldTweets3 as got
 
-from .shared import tweets_folder, tweetsFetchSettings
-from .tweets_base import *
+from .shared import toots_folder, tootsFetchSettings
+from .mastodon_base import *
 
-def getOldTweetIds(searchString = tweetsFetchSettings['link'], max = 100):
+def getOldTweetIds(searchString = tootsFetchSettings['link'], max = 100):
     tweetCriteria = got.manager.TweetCriteria().setQuerySearch(searchString)\
                                                .setMaxTweets(max)
     logging.info('Loading old tweet ids from twitter (GoT):')
@@ -21,8 +20,8 @@ def getOldTweetIds(searchString = tweetsFetchSettings['link'], max = 100):
 
     return [t.id for t in tweets]
 
-def populateTweetsFolder(folder = tweetsFetchSettings['folder'], ids = [], init = False, refresh = False):
-    d = os.path.join(tweets_folder, folder)
+def populateTweetsFolder(folder = tootsFetchSettings['folder'], ids = [], init = False, refresh = False):
+    d = os.path.join(toots_folder, folder)
     if not os.path.exists(d):
         os.mkdir(d)
     ids_e = [f[:-5] for f in os.listdir(d)]
@@ -37,5 +36,5 @@ def populateTweetsFolder(folder = tweetsFetchSettings['folder'], ids = [], init
         ids_f = list(set(ids) - set(ids_e))
 
     for id in ids_f:
-        info = readTweetFromTwitter(id)
-        writeTweetToFolder(info, folder)
+        info = readTootFromMastodon(id)
+        writeTootToFolder(info, folder)
diff --git a/lib/tweets_base.py b/lib/tweets_base.py
deleted file mode 100644
index da29c50..0000000
--- a/lib/tweets_base.py
+++ /dev/null
@@ -1,43 +0,0 @@
-import os
-import logging
-import json
-
-from .shared import tweets_folder, data_folder, tweetsFetchSettings
-from .twitter_auth import get_api_app
-
-def readTweetFromFolder(id, folder = tweetsFetchSettings['folder']):
-    ff = os.path.join(tweets_folder, folder)
-    with open(os.path.join(ff, '{}.json'.format(id)), 'r') as f:
-        return json.load(f)
-
-def writeTweetToFolder(tweet, folder = tweetsFetchSettings['folder']):
-    ff = os.path.join(tweets_folder, folder)
-    with open(os.path.join(ff, '{}.json'.format(tweet['id_str'])), 'w') as f:
-        json.dump(tweet, f)
-
-def deleteTweetFromFolder(id, folder = tweetsFetchSettings['folder']):
-    ff = os.path.join(tweets_folder, folder)
-    os.remove(os.path.join(ff, '{}.json'.format(id)))
-
-def readTweetsFromFolder(folder = tweetsFetchSettings['folder']):
-    ff = os.path.join(tweets_folder, folder)
-    tweets = [readTweetFromFolder(f[:-5], folder) for f in os.listdir(ff) if f[-5:] == '.json']
-    logging.info('Read {} tweets from folder: \'{}\''.format(len(tweets), folder))
-    return tweets
-
-
-def readTweetFromTwitter(id):
-    api = get_api_app()
-    logging.info('Reading tweet info from twitter API: {}'.format(id))
-    return api.get_status(id, include_entities=True, tweet_mode='extended')._json
-
-
-def writeTweetsApiJson(data, tweetsApiFile = tweetsFetchSettings['file']):
-    filePath = os.path.join(data_folder, tweetsApiFile)
-    logging.info('Writing new tweets API file: \'{}\''.format(tweetsApiFile))
-    with open(filePath, 'w') as f:
-        json.dump(data, f)
-
-def readTweetsApiJson(tweetsApiFile = tweetsFetchSettings['file']):
-    with open(os.path.join(data_folder, tweetsApiFile), 'r') as f:
-        return json.load(f)
diff --git a/lib/twitter_auth.py b/lib/twitter_auth.py
deleted file mode 100644
index c6f965b..0000000
--- a/lib/twitter_auth.py
+++ /dev/null
@@ -1,18 +0,0 @@
-from tweepy import AppAuthHandler, OAuthHandler, API
-from .shared import base_folder, twitterAuth
-import json
-import os
-
-
-def get_auth_app():
-    ad = twitterAuth['api']
-    return AppAuthHandler(ad['key'], ad['secret'])
-
-def get_api_app():
-    return API(get_auth_app())
-
-def get_auth_user():
-    ad = twitterAuth
-    auth = OAuthHandler(ad['api']['key'], ad['api']['secret'])
-    auth.set_access_token(ad['access']['token'], ad['access']['token-secret'])
-    return auth
diff --git a/runInit.py b/runInit.py
index c1ca65b..2f7af00 100755
--- a/runInit.py
+++ b/runInit.py
@@ -1,11 +1,10 @@
 #!./venv/bin/python
-import json
 import os
 import logging
 import GetOldTweets3 as got
 
 from lib.shared import base_folder, tweetsFetchSettings
-from lib.tweets_aux import getOldTweetIds, populateTweetsFolder
+from lib.toots_aux import getOldTweetIds, populateTweetsFolder
 
 logging.basicConfig(
     level=logging.INFO,
diff --git a/runListener.py b/runListener.py
index aa064fa..e546361 100755
--- a/runListener.py
+++ b/runListener.py
@@ -1,14 +1,25 @@
-#!./venv/bin/python
-import json
-import tweepy
+##!./venv/bin/python
+
 import os
 import logging
+import time
+from mastodon import StreamListener
+
+from lib.shared import base_folder, tootsFetchSettings, toots_folder
+from lib.mastodon_auth import get_auth_user
+from lib.mastodon_base import writeTootToFolder, writeTootToArchive, readTootFromFolder, deleteTootFromFolder
+from lib.TootForest import TootForest
+
+
+mastodon = get_auth_user()
 
-from lib.shared import base_folder, tweetsFetchSettings
-from lib.twitter_auth import get_auth_user
-from lib.tweets_base import writeTweetToFolder
-from lib.TweetForest import TweetForest
+keyword = tootsFetchSettings['listen']
 
+dir_path = os.path.join(toots_folder, tootsFetchSettings['folder'])
+# Get all files in directory
+files = os.listdir(dir_path)
+# Extract ids (filenames without .json)
+existing_ids = [os.path.splitext(file)[0] for file in files]
 
 logging.basicConfig(
     level=logging.INFO,
@@ -19,36 +30,73 @@
     ]
 )
 
-searchString = tweetsFetchSettings['listen']
+searchString = tootsFetchSettings['listen']
 
 def update():
-    forest = TweetForest.fromFolder(tweetsFetchSettings['folder'])
-    forest.saveApiJson(tweetsFetchSettings['file'])
+    forest = TootForest.fromFolder(tootsFetchSettings['folder'])
+    forest.saveApiJson(tootsFetchSettings['file'])
+
+class DecarbnowStreamListener(StreamListener):
+    def __init__(self, mastodon):
+        self.mastodon = mastodon
+
+    def stream_with_reconnection(self):
+        retry_delay = 5
+        max_retries = 10
+        retry_count = 0
+
+        while retry_count < max_retries:
+            try:
+                self.mastodon.stream_public(self)
+                break
+            except Exception as e:
+                print("Error: ", e)
+                print("Versuche, die Verbindung nach 10 Sekunden erneut herzustellen...")
+                retry_count += 1
+                time.sleep(retry_delay)
 
-class DecarbnowStreamListener(tweepy.StreamListener):
-    def on_status(self, tweet):
-        id = tweet._json['id']
-        logging.info('Listener got new tweet: {}'.format(id))
-        writeTweetToFolder(tweet._json, tweetsFetchSettings['folder'])
-        update()
+    def on_update(self, toot):     
+        if keyword in toot['content']:
+            id = toot['id']
+            logging.info('Listener got new toot: {}'.format(id))
+            writeTootToFolder(toot, tootsFetchSettings['folder'])
+            update()
 
-    def on_error(self, status_code):
+    def on_status_update(self, toot):     
+        if keyword in toot['content']:
+            id = toot['id']
+            logging.info('Listener got new toot: {}'.format(id))
+            writeTootToFolder(toot, tootsFetchSettings['folder'])
+            update()
+
+    def on_delete(self, status_id):
+        str_status = str(status_id)
+        if str_status in existing_ids:
+            logging.info('Archiving toot (id: {})!'.format(str_status))
+            toot = readTootFromFolder(str_status)
+            archive_dir = os.path.join(toots_folder, tootsFetchSettings['folder'], 'archive')
+            os.makedirs(archive_dir, exist_ok=True)
+
+            writeTootToArchive(toot, tootsFetchSettings['folder'])
+            deleteTootFromFolder(status_id, tootsFetchSettings['folder'])
+
+            update()
+
+    def handle_heartbeat(self):
+        print("Received a heartbeat from the server!")
+
+    def on_abort(self, status_code):
         logging.warning('Listener got error, status code: {}'.format(status_code))
         return True
 
-
 logging.info('Init Listener:')
-logging.info('  - Tweets Folder: \'{}\''.format(tweetsFetchSettings['folder']))
+logging.info('  - Toots Folder: \'{}\''.format(tootsFetchSettings['folder']))
 logging.info('  - Search String: \'{}\''.format(searchString))
 
-listener = DecarbnowStreamListener()
-stream = tweepy.Stream(auth = get_auth_user(), listener = listener, tweet_mode = 'extended')
+listener = DecarbnowStreamListener(mastodon)
 
-logging.info('Init Tweets API File ...')
+logging.info('Init Toots API File ...')
 update()
 
 logging.info('Start Listener ...')
-try:
-    stream.filter(track=[searchString])
-except KeyboardInterrupt:
-    pass
+listener.stream_with_reconnection()
\ No newline at end of file

From 65671e86466147d51381bd4f742b678d5ae10f53 Mon Sep 17 00:00:00 2001
From: sweing <sjweingaertner@gmail.com>
Date: Tue, 9 Jan 2024 20:20:50 +0100
Subject: [PATCH 02/12] only heardbeat on connect

---
 runListener.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/runListener.py b/runListener.py
index e546361..6a2fb67 100755
--- a/runListener.py
+++ b/runListener.py
@@ -39,6 +39,7 @@ def update():
 class DecarbnowStreamListener(StreamListener):
     def __init__(self, mastodon):
         self.mastodon = mastodon
+        self.receivedHeartbeat = False
 
     def stream_with_reconnection(self):
         retry_delay = 5
@@ -50,6 +51,7 @@ def stream_with_reconnection(self):
                 self.mastodon.stream_public(self)
                 break
             except Exception as e:
+                self.receivedHeartbeat = False
                 print("Error: ", e)
                 print("Versuche, die Verbindung nach 10 Sekunden erneut herzustellen...")
                 retry_count += 1
@@ -83,7 +85,9 @@ def on_delete(self, status_id):
             update()
 
     def handle_heartbeat(self):
-        print("Received a heartbeat from the server!")
+        if not self.receivedHeartbeat:
+            print("Received a heartbeat from the server!")
+            self.receivedHeartbeat = True
 
     def on_abort(self, status_code):
         logging.warning('Listener got error, status code: {}'.format(status_code))

From bf0d75c6c112444fdd6dca84edf73613b215fb68 Mon Sep 17 00:00:00 2001
From: sweing <sjweingaertner@gmail.com>
Date: Tue, 9 Jan 2024 20:38:28 +0100
Subject: [PATCH 03/12] more beautiful messaging

---
 runListener.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/runListener.py b/runListener.py
index 6a2fb67..40caf29 100755
--- a/runListener.py
+++ b/runListener.py
@@ -86,7 +86,7 @@ def on_delete(self, status_id):
 
     def handle_heartbeat(self):
         if not self.receivedHeartbeat:
-            print("Received a heartbeat from the server!")
+            logging.info("Connected to server. Listening.")
             self.receivedHeartbeat = True
 
     def on_abort(self, status_code):

From 74644560c5653b6d1a4372e30fea8505c5a58ea4 Mon Sep 17 00:00:00 2001
From: sweing <sjweingaertner@gmail.com>
Date: Tue, 9 Jan 2024 20:40:01 +0100
Subject: [PATCH 04/12] more beautiful messaging2

---
 runListener.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/runListener.py b/runListener.py
index 40caf29..e6f1f0f 100755
--- a/runListener.py
+++ b/runListener.py
@@ -42,7 +42,7 @@ def __init__(self, mastodon):
         self.receivedHeartbeat = False
 
     def stream_with_reconnection(self):
-        retry_delay = 5
+        retry_delay = 10
         max_retries = 10
         retry_count = 0
 
@@ -52,8 +52,8 @@ def stream_with_reconnection(self):
                 break
             except Exception as e:
                 self.receivedHeartbeat = False
-                print("Error: ", e)
-                print("Versuche, die Verbindung nach 10 Sekunden erneut herzustellen...")
+                logging.warning("Error: ", e)
+                logging.info("Versuche, die Verbindung nach 10 Sekunden erneut herzustellen...")
                 retry_count += 1
                 time.sleep(retry_delay)
 

From a1c53f860405cd51815619ffc2ceb9812e015d93 Mon Sep 17 00:00:00 2001
From: sweing <sjweingaertner@gmail.com>
Date: Wed, 10 Jan 2024 10:04:50 +0100
Subject: [PATCH 05/12] messaging

---
 runListener.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/runListener.py b/runListener.py
index e6f1f0f..29829ba 100755
--- a/runListener.py
+++ b/runListener.py
@@ -36,7 +36,7 @@ def update():
     forest = TootForest.fromFolder(tootsFetchSettings['folder'])
     forest.saveApiJson(tootsFetchSettings['file'])
 
-class DecarbnowStreamListener(StreamListener):
+class MastodonStreamListener(StreamListener):
     def __init__(self, mastodon):
         self.mastodon = mastodon
         self.receivedHeartbeat = False
@@ -53,7 +53,7 @@ def stream_with_reconnection(self):
             except Exception as e:
                 self.receivedHeartbeat = False
                 logging.warning("Error: ", e)
-                logging.info("Versuche, die Verbindung nach 10 Sekunden erneut herzustellen...")
+                logging.info("Trying to reconnect after " + retry_delay + "seconds.")
                 retry_count += 1
                 time.sleep(retry_delay)
 
@@ -88,6 +88,7 @@ def handle_heartbeat(self):
         if not self.receivedHeartbeat:
             logging.info("Connected to server. Listening.")
             self.receivedHeartbeat = True
+            retry_count = 0
 
     def on_abort(self, status_code):
         logging.warning('Listener got error, status code: {}'.format(status_code))
@@ -97,7 +98,7 @@ def on_abort(self, status_code):
 logging.info('  - Toots Folder: \'{}\''.format(tootsFetchSettings['folder']))
 logging.info('  - Search String: \'{}\''.format(searchString))
 
-listener = DecarbnowStreamListener(mastodon)
+listener = MastodonStreamListener(mastodon)
 
 logging.info('Init Toots API File ...')
 update()

From 062957e97294ba777b51c6fac1f7ccf1148d3d37 Mon Sep 17 00:00:00 2001
From: sweing <sjweingaertner@gmail.com>
Date: Wed, 10 Jan 2024 13:56:22 +0100
Subject: [PATCH 06/12] reconnect on_abord

---
 runListener.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/runListener.py b/runListener.py
index 29829ba..547f60e 100755
--- a/runListener.py
+++ b/runListener.py
@@ -53,7 +53,7 @@ def stream_with_reconnection(self):
             except Exception as e:
                 self.receivedHeartbeat = False
                 logging.warning("Error: ", e)
-                logging.info("Trying to reconnect after " + retry_delay + "seconds.")
+                logging.info("Trying to reconnect after " + str(retry_delay) + "seconds.")
                 retry_count += 1
                 time.sleep(retry_delay)
 
@@ -92,7 +92,7 @@ def handle_heartbeat(self):
 
     def on_abort(self, status_code):
         logging.warning('Listener got error, status code: {}'.format(status_code))
-        return True
+        self.stream_with_reconnection()
 
 logging.info('Init Listener:')
 logging.info('  - Toots Folder: \'{}\''.format(tootsFetchSettings['folder']))

From 64c38a7108f2be11a7e567acd37e27306783f4db Mon Sep 17 00:00:00 2001
From: sweing <sjweingaertner@gmail.com>
Date: Wed, 10 Jan 2024 18:42:12 +0100
Subject: [PATCH 07/12] combine tweets & toots

---
 api/modules/toots.py | 59 ++++++++++++++++++++++++++++++++++++++++++--
 lib/Toot.py          |  1 +
 runListener.py       |  4 +--
 3 files changed, 60 insertions(+), 4 deletions(-)

diff --git a/api/modules/toots.py b/api/modules/toots.py
index 920d4e3..3643bd7 100644
--- a/api/modules/toots.py
+++ b/api/modules/toots.py
@@ -16,9 +16,36 @@
 @cross_origin()
 @login_exempt
 def root():
+    # Read the current tweets from the API or other source
+    current_tweets = readTootsApiJson()
+
+    # Read additional tweets from file_tweets.json
+    try:
+        with open('data/file_tweets.json', 'r') as file:
+            file_tweets_data = json.load(file)
+            # Check if the data is a dictionary
+            if isinstance(file_tweets_data, dict):
+                # If the current tweets are also a dictionary, update it
+                if isinstance(current_tweets, dict):
+                    current_tweets.update(file_tweets_data)
+                # If the current tweets are a list, convert the dictionary to a list and extend it
+                elif isinstance(current_tweets, list):
+                    current_tweets.extend(file_tweets_data.values())
+                else:
+                    raise ValueError("The current tweets are neither a list nor a dictionary.")
+            else:
+                raise ValueError("The contents of file_tweets.json are not a dictionary")
+    except FileNotFoundError:
+        logging.error("The file file_tweets.json was not found.")
+    except json.JSONDecodeError:
+        logging.error("The file file_tweets.json does not contain valid JSON.")
+    except ValueError as e:
+        logging.error(e)
+
+    # Return the combined data as a JSON response
     return Response(json.dumps({
         'date': int(datetime.timestamp(datetime.now())),
-        'tweets': readTootsApiJson()
+        'tweets': current_tweets
     }), mimetype='application/json')
 
 
@@ -37,7 +64,35 @@ def forest_create():
     logging.warning('Manual invocation of creating forest!')
     forest = TootForest.fromFolder()
     forest.saveApiJson()
-    return Response(json.dumps(readTootsApiJson()), mimetype='application/json')
+
+    # Read the current tweets from the API or other source
+    current_tweets = readTootsApiJson()
+
+    # Read additional tweets from file_tweets.json
+    try:
+        with open('data/file_tweets.json', 'r') as file:
+            file_tweets_data = json.load(file)
+            # Check if the data is a dictionary
+            if isinstance(file_tweets_data, dict):
+                # If the current tweets are also a dictionary, update it
+                if isinstance(current_tweets, dict):
+                    current_tweets.update(file_tweets_data)
+                # If the current tweets are a list, convert the dictionary to a list and extend it
+                elif isinstance(current_tweets, list):
+                    current_tweets.extend(file_tweets_data.values())
+                else:
+                    raise ValueError("The current tweets are neither a list nor a dictionary.")
+            else:
+                raise ValueError("The contents of file_tweets.json are not a dictionary")
+    except FileNotFoundError:
+        logging.error("The file file_tweets.json was not found.")
+    except json.JSONDecodeError:
+        logging.error("The file file_tweets.json does not contain valid JSON.")
+    except ValueError as e:
+        logging.error(e)
+
+    # Return the combined data as a JSON response
+    return Response(json.dumps(current_tweets), mimetype='application/json')
 
 @bp.route('/add/<int:id>')
 def add(id):
diff --git a/lib/Toot.py b/lib/Toot.py
index 987d7d6..3d61439 100644
--- a/lib/Toot.py
+++ b/lib/Toot.py
@@ -59,6 +59,7 @@ def asApiDict(self, add = {}):
             'display_name': self.getUserScreenName(),
             'avatar': self.getUserImageHttps(),
             'media': self.getMedia(),
+            'source': 'mastodon.social',
             **add
         }
 
diff --git a/runListener.py b/runListener.py
index 547f60e..19f7259 100755
--- a/runListener.py
+++ b/runListener.py
@@ -49,11 +49,12 @@ def stream_with_reconnection(self):
         while retry_count < max_retries:
             try:
                 self.mastodon.stream_public(self)
+                retry_count = 0
                 break
             except Exception as e:
                 self.receivedHeartbeat = False
                 logging.warning("Error: ", e)
-                logging.info("Trying to reconnect after " + str(retry_delay) + "seconds.")
+                logging.info("Trying to reconnect after " + str(retry_delay) + " seconds.")
                 retry_count += 1
                 time.sleep(retry_delay)
 
@@ -88,7 +89,6 @@ def handle_heartbeat(self):
         if not self.receivedHeartbeat:
             logging.info("Connected to server. Listening.")
             self.receivedHeartbeat = True
-            retry_count = 0
 
     def on_abort(self, status_code):
         logging.warning('Listener got error, status code: {}'.format(status_code))

From dcbaaeb2409b87f7f29b9da3859ed6fb550b9116 Mon Sep 17 00:00:00 2001
From: sweing <sjweingaertner@gmail.com>
Date: Sun, 14 Jan 2024 16:54:02 +0100
Subject: [PATCH 08/12] update() now updates the existing_ids array

---
 requirements.txt |  3 +++
 runListener.py   | 13 ++++++++-----
 sync.sh          |  2 ++
 3 files changed, 13 insertions(+), 5 deletions(-)
 create mode 100755 sync.sh

diff --git a/requirements.txt b/requirements.txt
index 0054864..52cc1d1 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,5 @@
 backcall==0.1.0
+blurhash==1.1.4
 certifi==2020.4.5.1
 chardet==3.0.4
 click==7.1.2
@@ -18,6 +19,7 @@ jupyter-client==6.1.3
 jupyter-core==4.6.3
 lxml==4.5.1
 MarkupSafe==1.1.1
+Mastodon.py==1.8.1
 oauthlib==3.1.0
 parso==0.7.0
 pexpect==4.8.0
@@ -28,6 +30,7 @@ Pygments==2.6.1
 pyquery==1.4.1
 PySocks==1.7.1
 python-dateutil==2.8.1
+python-magic==0.4.27
 pyzmq==19.0.1
 requests==2.23.0
 requests-oauthlib==1.3.0
diff --git a/runListener.py b/runListener.py
index 19f7259..50ae4bd 100755
--- a/runListener.py
+++ b/runListener.py
@@ -16,9 +16,7 @@
 keyword = tootsFetchSettings['listen']
 
 dir_path = os.path.join(toots_folder, tootsFetchSettings['folder'])
-# Get all files in directory
 files = os.listdir(dir_path)
-# Extract ids (filenames without .json)
 existing_ids = [os.path.splitext(file)[0] for file in files]
 
 logging.basicConfig(
@@ -33,8 +31,14 @@
 searchString = tootsFetchSettings['listen']
 
 def update():
+    global existing_ids
     forest = TootForest.fromFolder(tootsFetchSettings['folder'])
     forest.saveApiJson(tootsFetchSettings['file'])
+    
+    # Update the global existing_ids variable
+    files = os.listdir(dir_path)
+    existing_ids = [os.path.splitext(file)[0] for file in files]
+
 
 class MastodonStreamListener(StreamListener):
     def __init__(self, mastodon):
@@ -60,7 +64,7 @@ def stream_with_reconnection(self):
 
     def on_update(self, toot):     
         if keyword in toot['content']:
-            id = toot['id']
+            id = str(toot['id'])  # Convert the ID to a string, if it's not already
             logging.info('Listener got new toot: {}'.format(id))
             writeTootToFolder(toot, tootsFetchSettings['folder'])
             update()
@@ -68,7 +72,7 @@ def on_update(self, toot):
     def on_status_update(self, toot):     
         if keyword in toot['content']:
             id = toot['id']
-            logging.info('Listener got new toot: {}'.format(id))
+            logging.info('Listener got update of toot: {}'.format(id))
             writeTootToFolder(toot, tootsFetchSettings['folder'])
             update()
 
@@ -82,7 +86,6 @@ def on_delete(self, status_id):
 
             writeTootToArchive(toot, tootsFetchSettings['folder'])
             deleteTootFromFolder(status_id, tootsFetchSettings['folder'])
-
             update()
 
     def handle_heartbeat(self):
diff --git a/sync.sh b/sync.sh
new file mode 100755
index 0000000..cf3bb0d
--- /dev/null
+++ b/sync.sh
@@ -0,0 +1,2 @@
+#!/usr/bin/env bash
+rsync -a  --exclude='/data' . nebula@spica.uberspace.de:~/html/dev2.decarbnow.space

From a177ef576ec697c574b9df34552e2b03086bdb2c Mon Sep 17 00:00:00 2001
From: sweing <sjweingaertner@gmail.com>
Date: Sat, 27 Sep 2025 12:21:55 +0200
Subject: [PATCH 09/12] implement bluesky

---
 api/modules/toots.py        |  22 ++--
 lib/Toot.py                 |  99 ++++++++++++++--
 lib/TootForest.py           |   3 +-
 lib/bluesky_base.py         | 164 +++++++++++++++++++++++++
 lib/bluesky_did_resolver.py | 124 +++++++++++++++++++
 requirements.txt            |  98 +++++++++------
 runListener.py              | 230 ++++++++++++++++++++++++++++++++++--
 7 files changed, 669 insertions(+), 71 deletions(-)
 create mode 100644 lib/bluesky_base.py
 create mode 100644 lib/bluesky_did_resolver.py

diff --git a/api/modules/toots.py b/api/modules/toots.py
index 3643bd7..17c897b 100644
--- a/api/modules/toots.py
+++ b/api/modules/toots.py
@@ -1,6 +1,7 @@
 from datetime import datetime
 import logging
 import json
+import os
 
 from flask import Blueprint, jsonify, Response, render_template, abort
 from flask_cors import cross_origin
@@ -9,6 +10,7 @@
 from lib.mastodon_base import readTootsApiJson
 from lib.Toot import Toot
 from lib.TootForest import TootForest
+from lib.shared import data_folder
 
 bp = Blueprint('tweets', __name__, url_prefix='/tweets')
 
@@ -19,9 +21,9 @@ def root():
     # Read the current tweets from the API or other source
     current_tweets = readTootsApiJson()
 
-    # Read additional tweets from file_tweets.json
+    # Read additional tweets from file_tweets_transformed.json
     try:
-        with open('data/file_tweets.json', 'r') as file:
+        with open(os.path.join(data_folder, 'file_tweets_transformed.json'), 'r', encoding='utf-8') as file:
             file_tweets_data = json.load(file)
             # Check if the data is a dictionary
             if isinstance(file_tweets_data, dict):
@@ -34,11 +36,11 @@ def root():
                 else:
                     raise ValueError("The current tweets are neither a list nor a dictionary.")
             else:
-                raise ValueError("The contents of file_tweets.json are not a dictionary")
+                raise ValueError("The contents of file_tweets_transformed.json are not a dictionary")
     except FileNotFoundError:
-        logging.error("The file file_tweets.json was not found.")
+        logging.error("The file file_tweets_transformed.json was not found.")
     except json.JSONDecodeError:
-        logging.error("The file file_tweets.json does not contain valid JSON.")
+        logging.error("The file file_tweets_transformed.json does not contain valid JSON.")
     except ValueError as e:
         logging.error(e)
 
@@ -68,9 +70,9 @@ def forest_create():
     # Read the current tweets from the API or other source
     current_tweets = readTootsApiJson()
 
-    # Read additional tweets from file_tweets.json
+    # Read additional tweets from file_tweets_transformed.json
     try:
-        with open('data/file_tweets.json', 'r') as file:
+        with open(os.path.join(data_folder, 'file_tweets_transformed.json'), 'r', encoding='utf-8') as file:
             file_tweets_data = json.load(file)
             # Check if the data is a dictionary
             if isinstance(file_tweets_data, dict):
@@ -83,11 +85,11 @@ def forest_create():
                 else:
                     raise ValueError("The current tweets are neither a list nor a dictionary.")
             else:
-                raise ValueError("The contents of file_tweets.json are not a dictionary")
+                raise ValueError("The contents of file_tweets_transformed.json are not a dictionary")
     except FileNotFoundError:
-        logging.error("The file file_tweets.json was not found.")
+        logging.error("The file file_tweets_transformed.json was not found.")
     except json.JSONDecodeError:
-        logging.error("The file file_tweets.json does not contain valid JSON.")
+        logging.error("The file file_tweets_transformed.json does not contain valid JSON.")
     except ValueError as e:
         logging.error(e)
 
diff --git a/lib/Toot.py b/lib/Toot.py
index 3d61439..d5f21ba 100644
--- a/lib/Toot.py
+++ b/lib/Toot.py
@@ -50,6 +50,12 @@ def hasChildren(self):
         return len(self.children) > 0
 
     def asApiDict(self, add = {}):
+        # Determine source platform
+        source = 'mastodon.social'  # Default
+        if 'platform' in self.data:
+            if self.data['platform'] == 'bluesky':
+                source = 'bluesky'
+
         return {
             'url': self.getPathOfLinksTo()[0],
             'hashtags': self.getHashtags(),
@@ -59,7 +65,7 @@ def asApiDict(self, add = {}):
             'display_name': self.getUserScreenName(),
             'avatar': self.getUserImageHttps(),
             'media': self.getMedia(),
-            'source': 'mastodon.social',
+            'source': source,
             **add
         }
 
@@ -81,7 +87,7 @@ def getId(self):
         return self.data['id']
 
     def getUserId(self):
-        return self.data['account']['id']
+        return self.data['account'].get('id', self.data['account'].get('acct', ''))
 
     def getUserName(self):
         return self.data['account']['username']
@@ -90,15 +96,39 @@ def getUserScreenName(self):
         return self.data['account']['display_name']
     
     def getUserImageHttps(self):
-        return self.data['account']['avatar_static']
+        return self.data['account'].get('avatar_static', '')
 
     def getDateTime(self):
-        return datetime.strptime(self.data['created_at'], '%Y-%m-%d %H:%M:%S')
+        date_str = self.data['created_at']
+        # Handle different date formats (Mastodon vs Bluesky)
+        try:
+            return datetime.strptime(date_str, '%Y-%m-%d %H:%M:%S')
+        except ValueError:
+            try:
+                # ISO format with microseconds (Bluesky)
+                return datetime.fromisoformat(date_str.replace('Z', '+00:00'))
+            except ValueError:
+                # Fallback - return current time if parsing fails
+                return datetime.now()
 
     def getMedia(self):
+        media = []
+        # Handle Mastodon-style media
         if 'media_attachments' in self.data:
-            return [item['preview_url'] for item in self.data['media_attachments'] if 'preview_url' in item]
-        return []
+            media.extend([item['preview_url'] for item in self.data['media_attachments'] if 'preview_url' in item])
+        # Handle Bluesky media from embed
+        if 'platform' in self.data and self.data['platform'] == 'bluesky' and 'raw_record' in self.data:
+            raw_record = self.data['raw_record']
+            if 'embed' in raw_record:
+                # Handle image embeds
+                if raw_record['embed'].get('$type') == 'app.bsky.embed.images' and 'images' in raw_record['embed']:
+                    for image in raw_record['embed']['images']:
+                        if 'image' in image and '$link' in image['image']['ref']:
+                            # Convert blob reference to CDN URL
+                            blob_ref = image['image']['ref']['$link']
+                            cdn_url = f"https://cdn.bsky.app/img/feed_thumbnail/plain/did:plc:adsquh2z4vzbpeelyvkq4rbl/{blob_ref}@jpeg"
+                            media.append(cdn_url)
+        return media
 
     def getText(self):
         if 'content' in self.data:
@@ -125,10 +155,21 @@ def isSelfReply(self):
 
     def getHashtags(self):
         hashtags = []
-        if self.data['tags']:
+        # Handle Mastodon-style tags
+        if 'tags' in self.data and self.data['tags']:
             hashtags.extend([h['name'] for h in self.data['tags']])
-        if 'extended_toot' in self.data and self.data['extended_toot']['entities']['hashtags']:
+        # Handle extended_toot format
+        if 'extended_toot' in self.data and 'entities' in self.data['extended_toot'] and 'hashtags' in self.data['extended_toot']['entities']:
             hashtags.extend([h['text'] for h in self.data['extended_toot']['entities']['hashtags']])
+        # Handle Bluesky hashtags from facets
+        if 'platform' in self.data and self.data['platform'] == 'bluesky' and 'raw_record' in self.data:
+            raw_record = self.data['raw_record']
+            if 'facets' in raw_record:
+                for facet in raw_record['facets']:
+                    if 'features' in facet:
+                        for feature in facet['features']:
+                            if feature.get('$type') == 'app.bsky.richtext.facet#tag' and 'tag' in feature:
+                                hashtags.append(feature['tag'])
         return hashtags
 
     def hasHashtag(self, hashtag):
@@ -136,14 +177,50 @@ def hasHashtag(self, hashtag):
 
     def getLinks(self):
         urls = []
-        if 'content' in self.data:
+        content = self.getText()
+
+        # For Bluesky posts - extract URLs from structured data first
+        if 'platform' in self.data and self.data['platform'] == 'bluesky' and 'raw_record' in self.data:
+            raw_record = self.data['raw_record']
+
+            # Extract from embed.external.uri
+            if 'embed' in raw_record and 'external' in raw_record['embed'] and 'uri' in raw_record['embed']['external']:
+                urls.append(raw_record['embed']['external']['uri'])
+
+            # Extract from facets (rich text links)
+            if 'facets' in raw_record:
+                for facet in raw_record['facets']:
+                    if 'features' in facet:
+                        for feature in facet['features']:
+                            if feature.get('$type') == 'app.bsky.richtext.facet#link' and 'uri' in feature:
+                                urls.append(feature['uri'])
+
+        # For Mastodon posts with HTML content
+        if 'content' in self.data and '<' in self.data['content']:
             extractor = URLExtractor()
             extractor.feed(self.data['content'])
             urls.extend(extractor.urls)
-        return urls
+
+        # Fallback: extract URLs from plain text content using regex
+        if content:
+            import re
+            # Match URLs in plain text (http/https URLs)
+            url_pattern = r'https?://[^\s<>"]+|[a-zA-Z0-9][-a-zA-Z0-9]*\.(?:[a-zA-Z]{2,}(?:\.[a-zA-Z]{2,})?)[^\s<>"]*'
+            text_urls = re.findall(url_pattern, content)
+            urls.extend(text_urls)
+
+        return list(set(urls))  # Remove duplicates
 
     def getPathOfLinksTo(self, to = tootsFetchSettings['link']):
-        return [u.split(to, 1)[1] for u in self.getLinks() if to in u]
+        links = self.getLinks()
+        matching_links = [u for u in links if to in u]
+
+        # For Bluesky posts, prioritize full URLs (those starting with https://) over truncated ones
+        if 'platform' in self.data and self.data['platform'] == 'bluesky':
+            # Sort so full URLs come first
+            matching_links.sort(key=lambda x: not x.startswith('https://'))
+
+        return [u.split(to, 1)[1] for u in matching_links]
 
     def hasLinkTo(self, to = f"{tootsFetchSettings['link']}/@"):
         return len(self.getPathOfLinksTo(to)) > 0
diff --git a/lib/TootForest.py b/lib/TootForest.py
index b70902a..e931643 100644
--- a/lib/TootForest.py
+++ b/lib/TootForest.py
@@ -9,7 +9,8 @@ class TootForest(object):
     @staticmethod
     def fromFolder(folder = tootsFetchSettings['folder']):
         toots = Toot.loadFromFolder(folder)
-        toots = filter(lambda t: t.hasLinkTo(), toots)
+        # Filter for posts that contain links to the configured domain
+        toots = filter(lambda t: t.hasLinkTo(tootsFetchSettings['link']), toots)
         # toots = filter(lambda t: not t.hasHashtag('private'), toots)
         return TootForest(toots)
 
diff --git a/lib/bluesky_base.py b/lib/bluesky_base.py
new file mode 100644
index 0000000..46a2291
--- /dev/null
+++ b/lib/bluesky_base.py
@@ -0,0 +1,164 @@
+import json
+import logging
+import time
+import threading
+from urllib.parse import urlencode
+import websocket
+from datetime import datetime
+from .bluesky_did_resolver import did_resolver
+
+class BlueSkyStreamListener:
+    def __init__(self, keywords, callback_func, reconnect_attempts=10, reconnect_delay=5):
+        self.endpoint = 'wss://jetstream2.us-west.bsky.network/subscribe'
+        self.wanted_collections = ['app.bsky.feed.post']
+        self.keywords = keywords
+        self.callback_func = callback_func
+        self.ws = None
+        self.reconnect_attempts = 0
+        self.max_reconnect_attempts = reconnect_attempts
+        self.reconnect_delay = reconnect_delay
+        self.should_run = True
+
+    def _build_url(self):
+        params = [('wantedCollections', collection) for collection in self.wanted_collections]
+        query_string = urlencode(params)
+        return f"{self.endpoint}?{query_string}"
+
+    def on_message(self, ws, message):
+        try:
+            data = json.loads(message)
+            self._process_message(data)
+        except json.JSONDecodeError as e:
+            logging.error(f"Error parsing Bluesky message: {e}")
+        except Exception as e:
+            logging.error(f"Error processing Bluesky message: {e}")
+
+    def _process_message(self, message):
+        if message.get('kind') == 'commit' and message.get('commit'):
+            commit = message['commit']
+            operation = commit.get('operation', '')
+
+            # Handle deletion events
+            if operation == 'delete':
+                self._handle_deletion(message)
+                return
+
+            # Handle creation/update events (existing logic)
+            if commit.get('record') and operation == 'create':
+                record = commit['record']
+
+                if record.get('text'):
+                    text = record['text'].lower()
+                    matched_keywords = [keyword for keyword in self.keywords if keyword.lower() in text]
+
+                    if matched_keywords:
+                        # Extract ID from the message - use rkey which is the record key
+                        post_id = commit.get('rkey', '')
+
+                        # Construct the proper AT-URI
+                        at_uri = f"at://{message.get('did', '')}/app.bsky.feed.post/{post_id}"
+
+                        # Resolve DID to actual handle
+                        did = message.get('did', '')
+                        resolved_handle = did_resolver.resolve_did_to_handle(did)
+
+                        # Extract reply information if this is a reply
+                        in_reply_to_id = None
+                        in_reply_to_account_id = None
+                        if 'reply' in record and record['reply'].get('parent'):
+                            parent_uri = record['reply']['parent'].get('uri', '')
+                            if parent_uri.startswith('at://'):
+                                # Parse AT-URI to extract DID and post ID
+                                # Format: at://did:plc:xxxxx/app.bsky.feed.post/postid
+                                parts = parent_uri.split('/')
+                                if len(parts) >= 4:
+                                    parent_did = parts[2]  # The DID part
+                                    in_reply_to_id = parts[-1]  # The post ID part
+                                    in_reply_to_account_id = parent_did
+
+                        # Convert to a format similar to Mastodon toot
+                        bluesky_post = {
+                            'id': post_id,
+                            'content': record['text'],
+                            'created_at': datetime.fromtimestamp(message.get('time_us', 0) / 1000000).isoformat(),
+                            'account': {
+                                'acct': did,
+                                'username': resolved_handle.replace('@', '') if resolved_handle.startswith('@') else resolved_handle,
+                                'display_name': resolved_handle
+                            },
+                            'uri': at_uri,
+                            'url': f"https://bsky.app/profile/{message.get('did', '')}/post/{post_id}",
+                            'platform': 'bluesky',
+                            'matched_keywords': matched_keywords,
+                            'raw_record': record
+                        }
+
+                        # Add reply fields if this is a reply
+                        if in_reply_to_id:
+                            bluesky_post['in_reply_to_id'] = in_reply_to_id
+                            bluesky_post['in_reply_to_account_id'] = in_reply_to_account_id
+
+                        self.callback_func(bluesky_post)
+
+    def _handle_deletion(self, message):
+        """Handle Bluesky post deletion events - only for posts we've captured"""
+        commit = message.get('commit', {})
+        post_id = commit.get('rkey', '')
+
+        if post_id:
+            # Only process deletions for posts we actually have stored
+            # The deletion callback will check if the post exists locally
+            if hasattr(self, 'deletion_callback') and self.deletion_callback:
+                self.deletion_callback(post_id)
+
+    def set_deletion_callback(self, callback_func):
+        """Set a callback function to handle deletions"""
+        self.deletion_callback = callback_func
+
+    def on_error(self, ws, error):
+        logging.error(f"Bluesky WebSocket error: {error}")
+
+    def on_close(self, ws, close_status_code, close_msg):
+        logging.warning("Bluesky connection closed")
+        if self.should_run:
+            self._reconnect()
+
+    def on_open(self, ws):
+        logging.info("Connected to Bluesky Jetstream")
+        self.reconnect_attempts = 0
+
+    def _reconnect(self):
+        if self.reconnect_attempts < self.max_reconnect_attempts and self.should_run:
+            self.reconnect_attempts += 1
+            logging.info(f"Reconnecting to Bluesky in {self.reconnect_delay}s (attempt {self.reconnect_attempts}/{self.max_reconnect_attempts})")
+            time.sleep(self.reconnect_delay)
+            self.start_stream()
+        else:
+            logging.error("Max Bluesky reconnection attempts reached")
+
+    def start_stream(self):
+        if not self.should_run:
+            return
+
+        try:
+            url = self._build_url()
+            logging.info(f"Connecting to Bluesky: {url}")
+
+            self.ws = websocket.WebSocketApp(
+                url,
+                on_open=self.on_open,
+                on_message=self.on_message,
+                on_error=self.on_error,
+                on_close=self.on_close
+            )
+
+            self.ws.run_forever()
+        except Exception as e:
+            logging.error(f"Error starting Bluesky stream: {e}")
+            if self.should_run:
+                self._reconnect()
+
+    def stop_stream(self):
+        self.should_run = False
+        if self.ws:
+            self.ws.close()
\ No newline at end of file
diff --git a/lib/bluesky_did_resolver.py b/lib/bluesky_did_resolver.py
new file mode 100644
index 0000000..2d69211
--- /dev/null
+++ b/lib/bluesky_did_resolver.py
@@ -0,0 +1,124 @@
+import requests
+import logging
+import json
+import time
+from datetime import datetime, timedelta
+
+class BlueSkyDIDResolver:
+    def __init__(self):
+        self.cache = {}
+        self.cache_expiry = {}
+        self.cache_duration = timedelta(hours=24)  # Cache for 24 hours
+
+    def resolve_did_to_handle(self, did):
+        """
+        Resolve a DID to a Bluesky handle using AT Protocol
+
+        Args:
+            did (str): The DID to resolve (e.g., 'did:plc:adsquh2z4vzbpeelyvkq4rbl')
+
+        Returns:
+            str: The resolved handle (e.g., '@username.bsky.social') or formatted DID if resolution fails
+        """
+        # Check cache first
+        if self._is_cached(did):
+            return self.cache[did]
+
+        try:
+            # Use AT Protocol's com.atproto.identity.resolveHandle API
+            # But we need to resolve DID to handle, so we use the directory service
+
+            # Try multiple resolution methods
+            handle = self._resolve_via_plc_directory(did)
+            if not handle:
+                handle = self._resolve_via_bsky_api(did)
+
+            if handle:
+                # Cache the result
+                self._cache_result(did, handle)
+                return handle
+            else:
+                # Fallback: format the DID nicely
+                fallback = self._format_did_fallback(did)
+                self._cache_result(did, fallback)
+                return fallback
+
+        except Exception as e:
+            logging.warning(f"Failed to resolve DID {did}: {e}")
+            fallback = self._format_did_fallback(did)
+            self._cache_result(did, fallback)
+            return fallback
+
+    def _resolve_via_plc_directory(self, did):
+        """Try to resolve via PLC directory"""
+        try:
+            # PLC directory API
+            url = f"https://plc.directory/{did}"
+            response = requests.get(url, timeout=5)
+
+            if response.status_code == 200:
+                data = response.json()
+                # Look for the handle in the service endpoints
+                if 'alsoKnownAs' in data:
+                    for aka in data['alsoKnownAs']:
+                        if aka.startswith('at://'):
+                            handle = aka.replace('at://', '')
+                            return f"@{handle}"
+
+        except Exception as e:
+            logging.debug(f"PLC directory resolution failed for {did}: {e}")
+
+        return None
+
+    def _resolve_via_bsky_api(self, did):
+        """Try to resolve via Bluesky's public API"""
+        try:
+            # Use Bluesky's public API to get profile
+            url = f"https://public.api.bsky.app/xrpc/app.bsky.actor.getProfile"
+            params = {'actor': did}
+            response = requests.get(url, params=params, timeout=5)
+
+            if response.status_code == 200:
+                data = response.json()
+                if 'handle' in data:
+                    return f"@{data['handle']}"
+
+        except Exception as e:
+            logging.debug(f"Bluesky API resolution failed for {did}: {e}")
+
+        return None
+
+    def _format_did_fallback(self, did):
+        """Format DID as a more readable fallback"""
+        if did.startswith('did:plc:'):
+            # Take first 8 characters of the identifier
+            identifier = did.split(':')[-1][:8]
+            return f"@{identifier}...bsky"
+        return did
+
+    def _is_cached(self, did):
+        """Check if DID is in cache and not expired"""
+        if did not in self.cache:
+            return False
+
+        if did in self.cache_expiry:
+            if datetime.now() > self.cache_expiry[did]:
+                # Cache expired, remove it
+                del self.cache[did]
+                del self.cache_expiry[did]
+                return False
+
+        return True
+
+    def _cache_result(self, did, handle):
+        """Cache the resolution result"""
+        self.cache[did] = handle
+        self.cache_expiry[did] = datetime.now() + self.cache_duration
+
+    def clear_cache(self):
+        """Clear the entire cache"""
+        self.cache.clear()
+        self.cache_expiry.clear()
+
+# Global instance for reuse
+did_resolver = BlueSkyDIDResolver()
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index 52cc1d1..f33961f 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,43 +1,63 @@
-backcall==0.1.0
-blurhash==1.1.4
-certifi==2020.4.5.1
-chardet==3.0.4
-click==7.1.2
-cssselect==1.1.0
-decorator==4.4.2
+# Core Flask and web framework
 Flask==1.1.2
 Flask-Cors==3.0.8
-GetOldTweets3==0.0.11
-idna==2.9
-ipykernel==5.2.1
-ipython==7.14.0
-ipython-genutils==0.2.0
-itsdangerous==1.1.0
-jedi==0.17.0
-Jinja2==2.11.2
-jupyter-client==6.1.3
-jupyter-core==4.6.3
-lxml==4.5.1
-MarkupSafe==1.1.1
+Werkzeug>=0.15
+
+# Mastodon integration
 Mastodon.py==1.8.1
-oauthlib==3.1.0
-parso==0.7.0
-pexpect==4.8.0
-pickleshare==0.7.5
-prompt-toolkit==3.0.5
-ptyprocess==0.6.0
-Pygments==2.6.1
-pyquery==1.4.1
-PySocks==1.7.1
-python-dateutil==2.8.1
-python-magic==0.4.27
-pyzmq==19.0.1
-requests==2.23.0
-requests-oauthlib==1.3.0
-six==1.14.0
-tornado==6.0.4
-traitlets==4.3.3
+blurhash>=1.1.4
+python-magic>=0.4.27
+
+# Twitter integration
+GetOldTweets3==0.0.11
 tweepy==3.8.0
-urllib3==1.25.9
-wcwidth==0.1.9
-Werkzeug==1.0.1
+
+# Bluesky integration
+websocket-client==1.6.4
+
+# HTTP and networking
+requests>=2.23.0
+requests-oauthlib>=0.7.0
+oauthlib>=3.0.0
+urllib3>=1.25.9
+certifi>=2017.4.17
+PySocks>=1.5.7
+
+# Web scraping and parsing
+lxml>=4.5.1
+pyquery>=1.2.10
+cssselect>=1.1.0
+
+# Utilities
+python-dateutil>=2.8.1
+click>=7.1.2
+six>=1.14.0
+chardet>=3.0.2
+idna>=2.5
+decorator>=4.4.2
+
+# Template engine
+Jinja2>=2.10.1
+MarkupSafe>=1.1.1
+itsdangerous>=0.24
+
+# Optional: Development and notebook support (install only if needed)
+# ipython>=7.14.0
+# ipykernel>=5.2.1
+# jupyter-client>=6.1.3
+# jupyter-core>=4.6.3
+# ipython-genutils>=0.2.0
+# prompt-toolkit>=3.0.5
+# Pygments>=2.6.1
+# backcall>=0.1.0
+# pexpect>=4.8.0
+# pickleshare>=0.7.5
+# ptyprocess>=0.6.0
+# parso>=0.7.0
+# jedi>=0.17.0
+# wcwidth>=0.1.9
+# traitlets>=4.3.3
+# tornado>=6.0.4
+
+# Note: pyzmq has compatibility issues with Python 3.13 and is not required for core functionality
+# pyzmq==19.0.1  # Commented out due to Python 3.13 compatibility issues
\ No newline at end of file
diff --git a/runListener.py b/runListener.py
index 50ae4bd..925f3f5 100755
--- a/runListener.py
+++ b/runListener.py
@@ -3,15 +3,44 @@
 import os
 import logging
 import time
+import threading
+import argparse
 from mastodon import StreamListener
 
 from lib.shared import base_folder, tootsFetchSettings, toots_folder
-from lib.mastodon_auth import get_auth_user
+try:
+    from lib.mastodon_auth import get_auth_user
+except Exception as e:
+    logging.warning(f"Mastodon auth not configured: {e}")
+    get_auth_user = None
 from lib.mastodon_base import writeTootToFolder, writeTootToArchive, readTootFromFolder, deleteTootFromFolder
 from lib.TootForest import TootForest
+from lib.bluesky_base import BlueSkyStreamListener
 
+# Parse command line arguments
+parser = argparse.ArgumentParser(description='Social Media Listener')
+parser.add_argument('--platforms', type=str, default='all',
+                    help='Platforms to listen to: all, mastodon, bluesky, or comma-separated list (e.g., mastodon,bluesky)')
+parser.add_argument('--mastodon-only', action='store_true', help='Listen only to Mastodon')
+parser.add_argument('--bluesky-only', action='store_true', help='Listen only to Bluesky')
+args = parser.parse_args()
 
-mastodon = get_auth_user()
+# Determine which platforms to enable
+if args.mastodon_only:
+    enable_mastodon = True
+    enable_bluesky = False
+elif args.bluesky_only:
+    enable_mastodon = False
+    enable_bluesky = True
+elif args.platforms.lower() == 'all':
+    enable_mastodon = True
+    enable_bluesky = True
+else:
+    platforms = [p.strip().lower() for p in args.platforms.split(',')]
+    enable_mastodon = 'mastodon' in platforms
+    enable_bluesky = 'bluesky' in platforms
+
+mastodon = get_auth_user() if get_auth_user and enable_mastodon else None
 
 keyword = tootsFetchSettings['listen']
 
@@ -34,11 +63,73 @@ def update():
     global existing_ids
     forest = TootForest.fromFolder(tootsFetchSettings['folder'])
     forest.saveApiJson(tootsFetchSettings['file'])
-    
+
     # Update the global existing_ids variable
     files = os.listdir(dir_path)
     existing_ids = [os.path.splitext(file)[0] for file in files]
 
+def handle_bluesky_post(bluesky_post):
+    """Handle incoming Bluesky posts, converting them to toot format and saving"""
+    global existing_ids
+    id = str(bluesky_post['id'])
+    if keyword in bluesky_post['content']:
+        logging.info('Listener got new Bluesky post: {}'.format(id))
+
+        # Check for story threading (same user replying to themselves with libmap link)
+        if (bluesky_post.get('in_reply_to_id') and
+            bluesky_post.get('in_reply_to_account_id') and
+            bluesky_post.get('account', {}).get('acct') == bluesky_post.get('in_reply_to_account_id')):
+
+            # This is a self-reply, check if the parent has a libmap link and is in our collection
+            parent_id = str(bluesky_post['in_reply_to_id'])
+            if parent_id in existing_ids:
+                try:
+                    parent_toot = readTootFromFolder(parent_id, tootsFetchSettings['folder'])
+                    if keyword in parent_toot.get('content', ''):
+                        # Parent has libmap link, this should be part of a story
+                        # Find the root of the story by following the chain
+                        root_id = parent_id
+                        while True:
+                            try:
+                                root_toot = readTootFromFolder(root_id, tootsFetchSettings['folder'])
+                                if (root_toot.get('in_reply_to_id') and
+                                    str(root_toot.get('in_reply_to_account_id')) == str(bluesky_post.get('account', {}).get('acct')) and
+                                    str(root_toot['in_reply_to_id']) in existing_ids):
+                                    root_id = str(root_toot['in_reply_to_id'])
+                                else:
+                                    break
+                            except:
+                                break
+
+                        # Add story field to the post before saving
+                        bluesky_post['story'] = root_id
+                        logging.info('Added Bluesky story field: {} -> {}'.format(id, root_id))
+                except Exception as e:
+                    logging.warning('Error checking Bluesky story threading: {}'.format(e))
+
+        writeTootToFolder(bluesky_post, tootsFetchSettings['folder'])
+        # Update existing_ids immediately to include the new post
+        existing_ids.append(id)
+        update()
+
+def handle_bluesky_deletion(post_id):
+    """Handle Bluesky post deletions - only archive posts we have captured"""
+    global existing_ids
+    str_status = str(post_id)
+    if str_status in existing_ids:
+        # This is a post we actually captured and stored
+        logging.info('Archiving Bluesky post (id: {})!'.format(str_status))
+        toot = readTootFromFolder(str_status, tootsFetchSettings['folder'])
+        archive_dir = os.path.join(toots_folder, tootsFetchSettings['folder'], 'archive')
+        os.makedirs(archive_dir, exist_ok=True)
+
+        writeTootToArchive(toot, tootsFetchSettings['folder'])
+        deleteTootFromFolder(post_id, tootsFetchSettings['folder'])
+        # Remove from existing_ids immediately
+        existing_ids.remove(str_status)
+        update()
+    # If post_id not in existing_ids, we silently ignore it (not relevant to us)
+
 
 class MastodonStreamListener(StreamListener):
     def __init__(self, mastodon):
@@ -57,26 +148,101 @@ def stream_with_reconnection(self):
                 break
             except Exception as e:
                 self.receivedHeartbeat = False
-                logging.warning("Error: ", e)
+                logging.warning(f"Error: {e}")
                 logging.info("Trying to reconnect after " + str(retry_delay) + " seconds.")
                 retry_count += 1
                 time.sleep(retry_delay)
 
-    def on_update(self, toot):     
+    def on_update(self, toot):
+        global existing_ids
         if keyword in toot['content']:
             id = str(toot['id'])  # Convert the ID to a string, if it's not already
             logging.info('Listener got new toot: {}'.format(id))
+
+            # Check for story threading (same user replying to themselves with libmap link)
+            if (toot.get('in_reply_to_id') and
+                toot.get('in_reply_to_account_id') and
+                toot.get('account', {}).get('id') == toot.get('in_reply_to_account_id')):
+
+                # This is a self-reply, check if the parent has a libmap link and is in our collection
+                parent_id = str(toot['in_reply_to_id'])
+                if parent_id in existing_ids:
+                    try:
+                        parent_toot = readTootFromFolder(parent_id, tootsFetchSettings['folder'])
+                        if keyword in parent_toot.get('content', ''):
+                            # Parent has libmap link, this should be part of a story
+                            # Find the root of the story by following the chain
+                            root_id = parent_id
+                            while True:
+                                try:
+                                    root_toot = readTootFromFolder(root_id, tootsFetchSettings['folder'])
+                                    if (root_toot.get('in_reply_to_id') and
+                                        str(root_toot.get('in_reply_to_account_id')) == str(toot.get('account', {}).get('id')) and
+                                        str(root_toot['in_reply_to_id']) in existing_ids):
+                                        root_id = str(root_toot['in_reply_to_id'])
+                                    else:
+                                        break
+                                except:
+                                    break
+
+                            # Add story field to the toot before saving
+                            toot['story'] = root_id
+                            logging.info('Added story field: {} -> {}'.format(id, root_id))
+                    except Exception as e:
+                        logging.warning('Error checking story threading: {}'.format(e))
+
             writeTootToFolder(toot, tootsFetchSettings['folder'])
+            # Update existing_ids immediately to include the new post
+            if id not in existing_ids:
+                existing_ids.append(id)
             update()
 
-    def on_status_update(self, toot):     
+    def on_status_update(self, toot):
+        global existing_ids
         if keyword in toot['content']:
-            id = toot['id']
+            id = str(toot['id'])
             logging.info('Listener got update of toot: {}'.format(id))
+
+            # Check for story threading (same user replying to themselves with libmap link)
+            if (toot.get('in_reply_to_id') and
+                toot.get('in_reply_to_account_id') and
+                toot.get('account', {}).get('id') == toot.get('in_reply_to_account_id')):
+
+                # This is a self-reply, check if the parent has a libmap link and is in our collection
+                parent_id = str(toot['in_reply_to_id'])
+                if parent_id in existing_ids:
+                    try:
+                        parent_toot = readTootFromFolder(parent_id, tootsFetchSettings['folder'])
+                        if keyword in parent_toot.get('content', ''):
+                            # Parent has libmap link, this should be part of a story
+                            # Find the root of the story by following the chain
+                            root_id = parent_id
+                            while True:
+                                try:
+                                    root_toot = readTootFromFolder(root_id, tootsFetchSettings['folder'])
+                                    if (root_toot.get('in_reply_to_id') and
+                                        str(root_toot.get('in_reply_to_account_id')) == str(toot.get('account', {}).get('id')) and
+                                        str(root_toot['in_reply_to_id']) in existing_ids):
+                                        root_id = str(root_toot['in_reply_to_id'])
+                                    else:
+                                        break
+                                except:
+                                    break
+
+                            # Add story field to the toot before saving
+                            toot['story'] = root_id
+                            logging.info('Added story field to updated toot: {} -> {}'.format(id, root_id))
+                    except Exception as e:
+                        logging.warning('Error checking story threading on update: {}'.format(e))
+
             writeTootToFolder(toot, tootsFetchSettings['folder'])
+            # Update existing_ids immediately to include the new post
+            if id not in existing_ids:
+                existing_ids.append(id)
             update()
 
     def on_delete(self, status_id):
+        global existing_ids
         str_status = str(status_id)
         if str_status in existing_ids:
             logging.info('Archiving toot (id: {})!'.format(str_status))
@@ -86,6 +252,8 @@ def on_delete(self, status_id):
 
             writeTootToArchive(toot, tootsFetchSettings['folder'])
             deleteTootFromFolder(status_id, tootsFetchSettings['folder'])
+            # Remove from existing_ids immediately
+            existing_ids.remove(str_status)
             update()
 
     def handle_heartbeat(self):
@@ -100,11 +268,53 @@ def on_abort(self, status_code):
 logging.info('Init Listener:')
 logging.info('  - Toots Folder: \'{}\''.format(tootsFetchSettings['folder']))
 logging.info('  - Search String: \'{}\''.format(searchString))
+logging.info('  - Enabled Platforms: Mastodon={}, Bluesky={}'.format(enable_mastodon, enable_bluesky))
+
+listener = MastodonStreamListener(mastodon) if mastodon and enable_mastodon else None
+
+# Initialize Bluesky listener if enabled
+# Use the same keyword as Mastodon for consistency
+keywords = [searchString]  # Keep the full search string as one keyword
+bluesky_listener = BlueSkyStreamListener(keywords, handle_bluesky_post) if enable_bluesky else None
 
-listener = MastodonStreamListener(mastodon)
+# Set up deletion callback for Bluesky
+if bluesky_listener:
+    bluesky_listener.set_deletion_callback(handle_bluesky_deletion)
 
 logging.info('Init Toots API File ...')
 update()
 
-logging.info('Start Listener ...')
-listener.stream_with_reconnection()
\ No newline at end of file
+logging.info('Start Listeners ...')
+
+# Start Bluesky listener in a separate thread if enabled
+bluesky_thread = None
+if bluesky_listener:
+    bluesky_thread = threading.Thread(target=bluesky_listener.start_stream, daemon=True)
+    bluesky_thread.start()
+    logging.info('Bluesky listener started in background thread')
+else:
+    logging.info('Bluesky listener disabled')
+
+# Start Mastodon listener (blocks) if enabled
+if listener:
+    logging.info('Starting Mastodon listener...')
+    try:
+        listener.stream_with_reconnection()
+    except KeyboardInterrupt:
+        logging.info('Shutting down listeners...')
+        if bluesky_listener:
+            bluesky_listener.stop_stream()
+        raise
+else:
+    logging.info('Mastodon listener disabled')
+    if bluesky_listener:
+        logging.info('Running with Bluesky only - press Ctrl+C to stop')
+        try:
+            # Keep the main thread alive when only Bluesky is running
+            while True:
+                time.sleep(1)
+        except KeyboardInterrupt:
+            logging.info('Shutting down Bluesky listener...')
+            bluesky_listener.stop_stream()
+    else:
+        logging.error('No listeners enabled! Use --help for usage information.')
\ No newline at end of file

From bb970657a12d53b5d115933b213ffd11c5eb79d2 Mon Sep 17 00:00:00 2001
From: sweing <sjweingaertner@gmail.com>
Date: Sat, 27 Sep 2025 14:17:00 +0200
Subject: [PATCH 10/12] add ut8 encodings to json imports

---
 downloadFromApi.py   |  2 +-
 lib/mastodon_auth.py |  2 +-
 lib/mastodon_base.py | 10 +++++-----
 lib/shared.py        |  2 +-
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/downloadFromApi.py b/downloadFromApi.py
index c4e7585..f4f8748 100644
--- a/downloadFromApi.py
+++ b/downloadFromApi.py
@@ -21,7 +21,7 @@
 os.makedirs(data_folder, exist_ok=True)
 
 # Load tweet IDs from fromAPI.json
-with open(os.path.join(data_folder, "fromAPI.json"), "r") as json_file:
+with open(os.path.join(data_folder, "fromAPI.json"), "r", encoding='utf-8') as json_file:
     tweet_data = json.load(json_file)
     tweet_ids = tweet_data.get("tweets", {}).keys()
 
diff --git a/lib/mastodon_auth.py b/lib/mastodon_auth.py
index db7e41f..19266cf 100644
--- a/lib/mastodon_auth.py
+++ b/lib/mastodon_auth.py
@@ -3,7 +3,7 @@
 import json
 import os
 
-with open(os.path.join(base_folder, 'config.json'), 'r') as f:
+with open(os.path.join(base_folder, 'config.json'), 'r', encoding='utf-8') as f:
     config = json.load(f)
 
 def get_auth_user():
diff --git a/lib/mastodon_base.py b/lib/mastodon_base.py
index 05aafd9..49b8578 100644
--- a/lib/mastodon_base.py
+++ b/lib/mastodon_base.py
@@ -15,17 +15,17 @@ def default(self, obj):
 
 def readTootFromFolder(id, folder = tootsFetchSettings['folder']):
     ff = os.path.join(toots_folder, folder)
-    with open(os.path.join(ff, '{}.json'.format(id)), 'r') as f:
+    with open(os.path.join(ff, '{}.json'.format(id)), 'r', encoding='utf-8') as f:
         return json.load(f)
 
 def writeTootToFolder(toot, folder = tootsFetchSettings['folder']):
     ff = os.path.join(toots_folder, folder)
-    with open(os.path.join(ff, '{}.json'.format(toot['id'])), 'w') as f:
+    with open(os.path.join(ff, '{}.json'.format(toot['id'])), 'w', encoding='utf-8') as f:
         json.dump(toot, f, cls=DateTimeEncoder)
 
 def writeTootToArchive(toot, folder = tootsFetchSettings['folder']):
     ff = os.path.join(toots_folder, folder, "archive")
-    with open(os.path.join(ff, '{}.json'.format(toot['id'])), 'w') as f:
+    with open(os.path.join(ff, '{}.json'.format(toot['id'])), 'w', encoding='utf-8') as f:
         json.dump(toot, f, cls=DateTimeEncoder)
 
 def deleteTootFromFolder(id, folder = tootsFetchSettings['folder']):
@@ -47,9 +47,9 @@ def readTootFromMastodon(id):
 def writeTootsApiJson(data, tootsApiFile = tootsFetchSettings['file']):
     filePath = os.path.join(data_folder, tootsApiFile)
     logging.info('Writing new toots API file: \'{}\''.format(tootsApiFile))
-    with open(filePath, 'w') as f:
+    with open(filePath, 'w', encoding='utf-8') as f:
         json.dump(data, f)
 
 def readTootsApiJson(tootsApiFile = tootsFetchSettings['file']):
-    with open(os.path.join(data_folder, tootsApiFile), 'r') as f:
+    with open(os.path.join(data_folder, tootsApiFile), 'r', encoding='utf-8') as f:
         return json.load(f)
diff --git a/lib/shared.py b/lib/shared.py
index f26947a..35fb282 100644
--- a/lib/shared.py
+++ b/lib/shared.py
@@ -7,7 +7,7 @@
 data_folder = os.path.join(base_folder, 'data')
 toots_folder = os.path.join(data_folder, 'toots')
 
-with open(os.path.join(base_folder, 'config.json'), 'r') as f:
+with open(os.path.join(base_folder, 'config.json'), 'r', encoding='utf-8') as f:
     config = json.load(f)
 
 tootsFetchSettings = config['tootsFetchSettings']['list'][config['tootsFetchSettings']['default']]

From 1c2a31c3b534d72401c0f2a3ad5315acce116d76 Mon Sep 17 00:00:00 2001
From: sweing <sjweingaertner@gmail.com>
Date: Fri, 17 Oct 2025 21:36:42 +0200
Subject: [PATCH 11/12] fixing url issue

---
 lib/Toot.py | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/lib/Toot.py b/lib/Toot.py
index d5f21ba..8bf07c6 100644
--- a/lib/Toot.py
+++ b/lib/Toot.py
@@ -178,9 +178,11 @@ def hasHashtag(self, hashtag):
     def getLinks(self):
         urls = []
         content = self.getText()
+        is_bluesky = 'platform' in self.data and self.data['platform'] == 'bluesky'
+        has_html_content = 'content' in self.data and '<' in self.data['content']
 
         # For Bluesky posts - extract URLs from structured data first
-        if 'platform' in self.data and self.data['platform'] == 'bluesky' and 'raw_record' in self.data:
+        if is_bluesky and 'raw_record' in self.data:
             raw_record = self.data['raw_record']
 
             # Extract from embed.external.uri
@@ -196,13 +198,15 @@ def getLinks(self):
                                 urls.append(feature['uri'])
 
         # For Mastodon posts with HTML content
-        if 'content' in self.data and '<' in self.data['content']:
+        if has_html_content:
             extractor = URLExtractor()
             extractor.feed(self.data['content'])
             urls.extend(extractor.urls)
 
         # Fallback: extract URLs from plain text content using regex
-        if content:
+        # Skip if we already found URLs from structured data (Bluesky) or HTML (Mastodon)
+        # This avoids extracting HTML-encoded duplicates or truncated display text
+        if content and not ((is_bluesky or has_html_content) and len(urls) > 0):
             import re
             # Match URLs in plain text (http/https URLs)
             url_pattern = r'https?://[^\s<>"]+|[a-zA-Z0-9][-a-zA-Z0-9]*\.(?:[a-zA-Z]{2,}(?:\.[a-zA-Z]{2,})?)[^\s<>"]*'

From 94884e5d8e993624ac26cac1cfcb4c05a0b69eac Mon Sep 17 00:00:00 2001
From: Claude <noreply@anthropic.com>
Date: Mon, 5 Jan 2026 11:29:15 +0000
Subject: [PATCH 12/12] Add comprehensive documentation to TootForest class

- Added detailed class-level docstring explaining the forest/tree structure concept
- Documented all methods with parameter descriptions, return values, and examples
- Explained the thread organization algorithm and self-reply logic
- Described the ID transformation pipeline used for proper story sorting
- Included usage examples for key methods
- Clarified the distinction between trunks (top-level posts) and children (replies)

This documentation makes the TootForest class's purpose and functionality clear
for developers working with social media thread organization and API serialization.
---
 lib/TootForest.py | 221 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 221 insertions(+)

diff --git a/lib/TootForest.py b/lib/TootForest.py
index e931643..5b42f26 100644
--- a/lib/TootForest.py
+++ b/lib/TootForest.py
@@ -4,10 +4,58 @@
 from collections import OrderedDict
 
 class TootForest(object):
+    """
+    A TootForest represents a collection of social media posts organized into thread structures.
+
+    This class organizes individual posts (toots) into a hierarchical tree structure where:
+    - 'Trunks' are top-level posts (posts that are not replies to themselves)
+    - 'Children' are self-replies that form conversation threads
+
+    The forest structure enables:
+    - Thread visualization (showing posts and their reply chains)
+    - Story organization (grouping related posts that form a narrative)
+    - Proper ordering and serialization for API consumption
+
+    A post qualifies as a 'self-reply' when it's a reply to another post by the same author,
+    which is commonly used to create threaded content or longer narratives that exceed
+    character limits on social media platforms.
+
+    Attributes:
+        trunks (list): List of Toot objects representing top-level posts (thread starters).
+                      Each trunk may have children forming a thread.
+
+    Example:
+        # Load all toots from the default folder and organize into forest structure
+        forest = TootForest.fromFolder()
+
+        # Save the organized structure as API-ready JSON
+        forest.saveApiJson()
+
+        # Print a hierarchical text representation
+        print(forest)
+    """
     trunks = []
 
     @staticmethod
     def fromFolder(folder = tootsFetchSettings['folder']):
+        """
+        Load toots from a folder and construct a TootForest.
+
+        This factory method reads all toots from the specified folder, filters them
+        to only include posts containing links to the configured domain, and organizes
+        them into a forest structure.
+
+        Args:
+            folder (str, optional): Path to the folder containing toot JSON files.
+                                   Defaults to the configured folder from tootsFetchSettings.
+
+        Returns:
+            TootForest: A new TootForest instance containing the organized toots.
+
+        Note:
+            Only toots with links matching tootsFetchSettings['link'] are included.
+            This typically filters for posts linking to a specific domain or path.
+        """
         toots = Toot.loadFromFolder(folder)
         # Filter for posts that contain links to the configured domain
         toots = filter(lambda t: t.hasLinkTo(tootsFetchSettings['link']), toots)
@@ -15,6 +63,32 @@ def fromFolder(folder = tootsFetchSettings['folder']):
         return TootForest(toots)
 
     def __init__(self, toots):
+        """
+        Initialize a TootForest by organizing toots into thread structures.
+
+        This constructor processes a collection of toots and organizes them into a tree
+        structure by identifying parent-child relationships through self-replies. The
+        algorithm works as follows:
+
+        1. Create a dictionary mapping toot IDs to toot objects for quick lookup
+        2. For each toot, check if it's a self-reply (reply to the same author)
+        3. If it's a self-reply and the parent exists, add it as a child to the parent
+        4. Otherwise, add it as a trunk (top-level post)
+
+        This creates a forest where:
+        - Each trunk represents the start of a thread
+        - Children are nested under their parent toots
+        - Only self-replies are connected (replies to the same author)
+
+        Args:
+            toots (iterable): An iterable of Toot objects to organize into the forest.
+
+        Example:
+            >>> toots = [toot1, toot2, toot3]  # toot2 is a self-reply to toot1
+            >>> forest = TootForest(toots)
+            >>> len(forest.trunks)  # toot1 and toot3 are trunks, toot2 is a child
+            2
+        """
         self.trunks = []
         tootsDict = {t.getId(): t for t in toots}
 
@@ -26,6 +100,30 @@ def __init__(self, toots):
             self.trunks.append(toot)
 
     def __str__(self):
+        """
+        Generate a human-readable string representation of the forest.
+
+        Creates a hierarchical text view of all toots in the forest, with indentation
+        showing the thread structure. Each level of reply is indented with two spaces.
+
+        Returns:
+            str: A multi-line string showing the forest structure with:
+                 - Username, datetime, and ID
+                 - Hashtags prefixed with '#'
+                 - Links prefixed with '▶'
+                 - Content prefixed with '¶'
+                 - Indentation showing reply depth
+
+        Example output:
+            user1, 2024-01-15 10:30:00, 123456
+            # python, coding
+            ▶ https://example.com/article
+            ¶ This is the main post content
+
+              user1, 2024-01-15 10:35:00, 123457
+              # python
+              ¶ This is a reply to the above post
+        """
         def printLeafs(toots, i = 0):
             a = []
             for toot in toots:
@@ -37,6 +135,39 @@ def printLeafs(toots, i = 0):
         return '\n'.join(printLeafs(self.trunks))
 
     def asApiJson(self):
+        """
+        Convert the forest into API-ready JSON format.
+
+        Transforms the forest structure into a dictionary suitable for API responses.
+        For threaded posts (posts with children), this method:
+        - Assigns a 'story' ID (the ID of the root post) to link related posts
+        - Follows the chain of first children to create linear story progression
+        - Includes all posts with their metadata in API dictionary format
+
+        Returns:
+            dict: A dictionary mapping toot IDs to their API representations.
+                  Each value contains:
+                  - url: Link to the referenced content
+                  - hashtags: List of hashtags
+                  - timestamp: Post creation time
+                  - content: Post text
+                  - account: Username
+                  - display_name: User's display name
+                  - avatar: User avatar URL
+                  - media: List of media URLs
+                  - source: Platform (mastodon.social, bluesky, etc.)
+                  - story: Story ID (only for threaded posts)
+
+        Note:
+            For threads, only the FIRST child is followed at each level, creating
+            a linear narrative. Additional children (branches) are not included.
+
+        Example:
+            >>> forest = TootForest.fromFolder()
+            >>> api_data = forest.asApiJson()
+            >>> print(api_data['123456']['story'])  # Story ID for a threaded post
+            '123456'
+        """
         apiJson = {};
         for toot in self.trunks:
             if toot.hasChildren():
@@ -52,6 +183,42 @@ def asApiJson(self):
         return apiJson
     
     def rename_ids(self, data):
+        """
+        Transform toot IDs into sortable keys for proper story ordering.
+
+        This method prepends sequence numbers to toot IDs to enable proper sorting
+        of stories and their posts. The transformation allows stories to be sorted
+        in reverse chronological order while maintaining correct post order within
+        each story.
+
+        The algorithm:
+        1. Calculates the total length of each story (number of posts)
+        2. Assigns each post a descending count within its story
+        3. Creates composite keys: "story_id.count_tweet_id"
+
+        This ensures that when sorted alphabetically:
+        - Stories are ordered by their root post ID
+        - Posts within a story maintain their sequential order
+        - Later posts in a thread have lower counts (e.g., 3, 2, 1)
+
+        Args:
+            data (dict): Dictionary mapping toot IDs to their API representations.
+                        Each value may contain a 'story' key for threaded posts.
+
+        Returns:
+            dict: New dictionary with composite keys in format "story_id.count_tweet_id".
+                 The count starts from the story length and decrements for each post.
+
+        Example:
+            >>> data = {
+            ...     '100': {'story': '100', 'content': 'First post'},
+            ...     '101': {'story': '100', 'content': 'Second post'},
+            ...     '102': {'content': 'Standalone post'}
+            ... }
+            >>> renamed = forest.rename_ids(data)
+            >>> list(renamed.keys())
+            ['100.2_100', '100.1_101', '102.1_102']
+        """
         # Create a dictionary to store the count of each story ID
         story_counts = {}
 
@@ -83,6 +250,33 @@ def rename_ids(self, data):
         return result
     
     def revert_ids(self, data):
+        """
+        Revert composite keys back to original toot IDs.
+
+        This method reverses the transformation done by rename_ids(), extracting
+        the original toot ID from composite keys. After sorting with renamed keys,
+        this restores the original ID structure while preserving the sorted order.
+
+        The transformation:
+        - Input: "story_id.count_tweet_id" (e.g., "100.2_123")
+        - Output: "tweet_id" (e.g., "123")
+
+        Args:
+            data (dict): Dictionary with composite keys in format "story_id.count_tweet_id".
+
+        Returns:
+            dict: New dictionary with original toot IDs as keys, maintaining the
+                 sorted order from the input dictionary.
+
+        Example:
+            >>> sorted_data = OrderedDict([
+            ...     ('100.2_100', {'content': 'First'}),
+            ...     ('100.1_101', {'content': 'Second'})
+            ... ])
+            >>> reverted = forest.revert_ids(sorted_data)
+            >>> list(reverted.keys())
+            ['100', '101']  # Original IDs restored, sorted order preserved
+        """
         # Iterate through the dictionary and create a new one with reverted keys
         result = {}
         for key, values in data.items():
@@ -100,6 +294,33 @@ def revert_ids(self, data):
 
 
     def saveApiJson(self, file = tootsFetchSettings['file']):
+        """
+        Save the forest as sorted, API-ready JSON to a file.
+
+        This method performs a complete transformation pipeline:
+        1. Convert forest to API JSON format (asApiJson)
+        2. Rename IDs to make them sortable (rename_ids)
+        3. Sort in descending order (newest stories first)
+        4. Revert IDs to original format (revert_ids)
+        5. Write to JSON file
+
+        The sorting ensures:
+        - Stories appear in reverse chronological order (newest first)
+        - Posts within each story maintain their sequential order
+        - The final output has clean toot IDs despite complex sorting logic
+
+        Args:
+            file (str, optional): Path to the output JSON file.
+                                 Defaults to the configured file from tootsFetchSettings.
+
+        Side Effects:
+            Writes a JSON file containing the sorted toots with their metadata.
+
+        Example:
+            >>> forest = TootForest.fromFolder()
+            >>> forest.saveApiJson()  # Saves to default location
+            >>> forest.saveApiJson('/custom/path/toots.json')  # Custom location
+        """
         data = self.asApiJson()
         renamed_data = self.rename_ids(data)
         # Sort toots based on id in descending order