diff --git a/tools/migrations/26-02-10-a--add_monthly_activity_stats_cache.sql b/tools/migrations/26-02-10-a--add_monthly_activity_stats_cache.sql new file mode 100644 index 00000000..2cc0e370 --- /dev/null +++ b/tools/migrations/26-02-10-a--add_monthly_activity_stats_cache.sql @@ -0,0 +1,13 @@ +-- Cache table for monthly activity statistics by type +-- Historical months are cached permanently, current month refreshed periodically + +CREATE TABLE IF NOT EXISTS monthly_activity_stats_cache ( + id INT AUTO_INCREMENT PRIMARY KEY, + `year_month` VARCHAR(7) NOT NULL UNIQUE COMMENT 'Format: YYYY-MM', + `exercise_minutes` INT NOT NULL DEFAULT 0, + `reading_minutes` INT NOT NULL DEFAULT 0, + `browsing_minutes` INT NOT NULL DEFAULT 0, + `audio_minutes` INT NOT NULL DEFAULT 0, + `computed_at` DATETIME NOT NULL, + INDEX idx_year_month (`year_month`) +) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_bin; diff --git a/zeeguu/api/endpoints/article.py b/zeeguu/api/endpoints/article.py index 43eed856..a96a3593 100644 --- a/zeeguu/api/endpoints/article.py +++ b/zeeguu/api/endpoints/article.py @@ -627,19 +627,50 @@ def clear_article_cache(article_id): ) bookmark_count = len(bookmarks) + bookmark_ids_to_delete = {b.id for b in bookmarks} - # First pass: clear all preferred_bookmark_id references + # Track UserWords that need cleanup + user_words_to_check = {} for bookmark in bookmarks: user_word = bookmark.user_word - if user_word and user_word.preferred_bookmark_id == bookmark.id: - user_word.preferred_bookmark_id = None + if user_word: + user_words_to_check[user_word.id] = user_word + # Clear preferred_bookmark if it's one we're deleting + if user_word.preferred_bookmark_id == bookmark.id: + user_word.preferred_bookmark_id = None + db_session.flush() # Commit the nullifications before deleting - # Second pass: delete bookmarks + # Delete bookmarks for bookmark in bookmarks: db_session.delete(bookmark) + db_session.flush() + + # Handle UserWords that are now orphaned or need new preferred_bookmark + user_words_marked_unfit = 0 + for user_word in user_words_to_check.values(): + # Check for remaining bookmarks not in our delete set + remaining_bookmarks = ( + Bookmark.query + .filter(Bookmark.user_word_id == user_word.id) + .filter(Bookmark.id.notin_(bookmark_ids_to_delete)) + .all() + ) + + if remaining_bookmarks: + # Set a new preferred bookmark if needed + if user_word.preferred_bookmark is None: + user_word.preferred_bookmark = remaining_bookmarks[0] + else: + # No bookmarks left - keep UserWord for history but mark unfit for study + user_word.set_unfit_for_study(db_session) + user_words_marked_unfit += 1 + db_session.commit() + if user_words_marked_unfit > 0: + log(f"[DEV] Marked {user_words_marked_unfit} UserWords as unfit for study (no bookmarks)") + log(f"[DEV] Cleared cache and {bookmark_count} bookmarks for article {article_id}") return json_result({ diff --git a/zeeguu/api/endpoints/bookmarks_and_words.py b/zeeguu/api/endpoints/bookmarks_and_words.py index 903f0498..8d89d96a 100644 --- a/zeeguu/api/endpoints/bookmarks_and_words.py +++ b/zeeguu/api/endpoints/bookmarks_and_words.py @@ -241,7 +241,8 @@ def delete_bookmark(bookmark_id): # in the future we can generate an example for this user word with the help of the robots! user_word.set_unfit_for_study(db_session) else: - # No other bookmarks exist - ALWAYS keep the user_word for historical data + # No other bookmarks exist - keep UserWord for historical data + # but mark as unfit for study (won't appear in exercises) user_word.set_unfit_for_study(db_session) # Delete any ExampleSentenceContext records that reference this bookmark diff --git a/zeeguu/api/endpoints/exercises.py b/zeeguu/api/endpoints/exercises.py index 383da166..a0128924 100644 --- a/zeeguu/api/endpoints/exercises.py +++ b/zeeguu/api/endpoints/exercises.py @@ -309,7 +309,6 @@ def _user_words_as_json_result(user_words): log(f"Failed to get tokenized context for user_word {uw.id}: {e}") dicts = [] - words_to_delete = [] for user_word in user_words: try: @@ -320,28 +319,9 @@ def _user_words_as_json_result(user_words): schedule=schedule, pre_tokenized_context=tokenized_context )) - except ValueError as e: - # This means validate_data_integrity() couldn't repair the issue - # (i.e., UserWord has no bookmarks at all) - log(f"UserWord {user_word.id} failed validation and cannot be repaired: {str(e)}") - words_to_delete.append(user_word) except Exception as e: - # Log any other unexpected errors and skip + # Log unexpected errors and skip (orphaned UserWords are handled gracefully) log(f"Unexpected error processing UserWord {user_word.id}: {str(e)}") continue - # Delete UserWords that couldn't be repaired - if words_to_delete: - for word in words_to_delete: - try: - db.session.delete(word) - log(f"Deleted UserWord {word.id} due to unrepairable data integrity issues") - except: - log(f"Failed to delete UserWord {word.id}") - try: - db.session.commit() - except: - db.session.rollback() - log("Failed to commit UserWord deletions") - return json_result(dicts) diff --git a/zeeguu/api/endpoints/user_stats.py b/zeeguu/api/endpoints/user_stats.py index 5c7eaa76..39c8587f 100644 --- a/zeeguu/api/endpoints/user_stats.py +++ b/zeeguu/api/endpoints/user_stats.py @@ -2215,8 +2215,416 @@ def stats_index():
Chart and table showing active users per month
+
  • + +
    Monthly Activity Breakdown
    +
    Stacked chart showing exercises, reading, browsing, and audio minutes
    +
    +
  • """ return Response(html, mimetype="text/html") + + +def _compute_activity_stats_for_month(month_start, month_end): + """Compute activity minutes by type for a specific month range.""" + from sqlalchemy import func + + # Exercise minutes + exercise_ms = ( + db_session.query(func.sum(UserExerciseSession.duration)) + .filter(UserExerciseSession.start_time >= month_start) + .filter(UserExerciseSession.start_time < month_end) + .scalar() + ) or 0 + + # Reading minutes + reading_ms = ( + db_session.query(func.sum(UserReadingSession.duration)) + .filter(UserReadingSession.start_time >= month_start) + .filter(UserReadingSession.start_time < month_end) + .scalar() + ) or 0 + + # Browsing minutes + browsing_ms = ( + db_session.query(func.sum(UserBrowsingSession.duration)) + .filter(UserBrowsingSession.start_time >= month_start) + .filter(UserBrowsingSession.start_time < month_end) + .scalar() + ) or 0 + + # Audio minutes (stored in seconds) + audio_sec = ( + db_session.query(func.sum(DailyAudioLesson.duration_seconds)) + .filter(DailyAudioLesson.completed_at >= month_start) + .filter(DailyAudioLesson.completed_at < month_end) + .scalar() + ) or 0 + + return { + "exercise_minutes": round(exercise_ms / 60000), + "reading_minutes": round(reading_ms / 60000), + "browsing_minutes": round(browsing_ms / 60000), + "audio_minutes": round(audio_sec / 60), + } + + +def get_monthly_activity_stats(months=12): + """ + Get activity stats per month for the last N months. + Uses caching: historical months cached permanently, current month refreshed every 6 hours. + """ + from zeeguu.core.model import MonthlyActivityStatsCache + + now = datetime.now() + current_year_month = now.strftime("%Y-%m") + monthly_data = [] + + # Get all cached data in one query + cache = MonthlyActivityStatsCache.get_all_cached() + + for i in range(months): + # Calculate month boundaries + if i == 0: + month_start = now.replace(day=1, hour=0, minute=0, second=0, microsecond=0) + month_end = now + else: + year = now.year + month = now.month - i + while month <= 0: + month += 12 + year -= 1 + month_start = datetime(year, month, 1) + next_month = month + 1 + next_year = year + if next_month > 12: + next_month = 1 + next_year += 1 + month_end = datetime(next_year, next_month, 1) + + year_month = month_start.strftime("%Y-%m") + is_current_month = (year_month == current_year_month) + + # Check cache + cached = cache.get(year_month) + use_cache = False + + if cached: + if is_current_month: + cache_age = now - cached.computed_at + if cache_age.total_seconds() < 6 * 3600: + use_cache = True + else: + use_cache = True + + if use_cache: + stats = { + "exercise_minutes": cached.exercise_minutes, + "reading_minutes": cached.reading_minutes, + "browsing_minutes": cached.browsing_minutes, + "audio_minutes": cached.audio_minutes, + } + else: + stats = _compute_activity_stats_for_month(month_start, month_end) + MonthlyActivityStatsCache.set_cached( + db_session, year_month, + stats["exercise_minutes"], + stats["reading_minutes"], + stats["browsing_minutes"], + stats["audio_minutes"] + ) + + monthly_data.append({ + "month": year_month, + "month_label": month_start.strftime("%b %Y"), + **stats, + }) + + return list(reversed(monthly_data)) + + +@api.route("/stats/monthly_activity", methods=["GET"]) +@cross_domain +def monthly_activity_page(): + """ + Public page showing monthly activity breakdown by type. + Stacked bar chart with exercises, reading, browsing, and audio. + """ + months = min(int(request.args.get("months", 12)), 24) + monthly_data = get_monthly_activity_stats(months) + + # Calculate max total for chart scaling + max_total = max( + (m["exercise_minutes"] + m["reading_minutes"] + m["browsing_minutes"] + m["audio_minutes"] + for m in monthly_data), + default=1 + ) + + # Colors for each activity type + colors = { + "exercise": "#e74c3c", # Red + "reading": "#3498db", # Blue + "browsing": "#2ecc71", # Green + "audio": "#9b59b6", # Purple + } + + # Generate stacked bars + chart_bars = "" + for m in monthly_data: + total = m["exercise_minutes"] + m["reading_minutes"] + m["browsing_minutes"] + m["audio_minutes"] + total_height_pct = (total / max_total * 100) if max_total > 0 else 0 + + # Calculate individual heights as percentage of total height + if total > 0: + exercise_h = m["exercise_minutes"] / max_total * 100 + reading_h = m["reading_minutes"] / max_total * 100 + browsing_h = m["browsing_minutes"] / max_total * 100 + audio_h = m["audio_minutes"] / max_total * 100 + else: + exercise_h = reading_h = browsing_h = audio_h = 0 + + chart_bars += f""" +
    +
    {total}
    +
    +
    +
    +
    +
    +
    + {m['month_label'][:3]} +
    +""" + + # Generate table rows + table_rows = "" + for m in reversed(monthly_data): + total = m["exercise_minutes"] + m["reading_minutes"] + m["browsing_minutes"] + m["audio_minutes"] + table_rows += f""" + + {m['month_label']} + {m['exercise_minutes']} + {m['reading_minutes']} + {m['browsing_minutes']} + {m['audio_minutes']} + {total} + +""" + + html = f""" + + + Monthly Activity - Zeeguu + + + + + +
    +

    Monthly Activity Breakdown

    +

    Total minutes spent on different learning activities

    + +
    +
    +
    Exercises
    +
    Reading
    +
    Browsing
    +
    Audio
    +
    +
    +{chart_bars} +
    +
    + +
    +

    Details (minutes)

    + + + + + + + + + + + + +{table_rows} + +
    MonthExercisesReadingBrowsingAudioTotal
    +
    + + +
    + +""" + + return Response(html, mimetype="text/html") diff --git a/zeeguu/core/model/__init__.py b/zeeguu/core/model/__init__.py index 9609a458..6298d6a5 100644 --- a/zeeguu/core/model/__init__.py +++ b/zeeguu/core/model/__init__.py @@ -107,3 +107,4 @@ # stats caching from .monthly_active_users_cache import MonthlyActiveUsersCache +from .monthly_activity_stats_cache import MonthlyActivityStatsCache diff --git a/zeeguu/core/model/monthly_activity_stats_cache.py b/zeeguu/core/model/monthly_activity_stats_cache.py new file mode 100644 index 00000000..acf02344 --- /dev/null +++ b/zeeguu/core/model/monthly_activity_stats_cache.py @@ -0,0 +1,62 @@ +from datetime import datetime + +from sqlalchemy import Column, Integer, String, DateTime +from sqlalchemy.orm.exc import NoResultFound + +from zeeguu.core.model.db import db + + +class MonthlyActivityStatsCache(db.Model): + """ + Cache for monthly activity statistics by type. + Historical months are cached permanently. + Current month is refreshed periodically. + """ + + __tablename__ = "monthly_activity_stats_cache" + __table_args__ = {"mysql_collate": "utf8_bin"} + + id = Column(Integer, primary_key=True) + year_month = Column(String(7), unique=True, nullable=False) # e.g., "2026-01" + exercise_minutes = Column(Integer, nullable=False, default=0) + reading_minutes = Column(Integer, nullable=False, default=0) + browsing_minutes = Column(Integer, nullable=False, default=0) + audio_minutes = Column(Integer, nullable=False, default=0) + computed_at = Column(DateTime, nullable=False) + + def __init__(self, year_month, exercise_minutes, reading_minutes, browsing_minutes, audio_minutes): + self.year_month = year_month + self.exercise_minutes = exercise_minutes + self.reading_minutes = reading_minutes + self.browsing_minutes = browsing_minutes + self.audio_minutes = audio_minutes + self.computed_at = datetime.now() + + @classmethod + def get_cached(cls, year_month): + """Get cached stats for a month, or None if not cached.""" + try: + return cls.query.filter_by(year_month=year_month).one() + except NoResultFound: + return None + + @classmethod + def set_cached(cls, session, year_month, exercise_minutes, reading_minutes, browsing_minutes, audio_minutes): + """Set or update cached stats for a month.""" + existing = cls.get_cached(year_month) + if existing: + existing.exercise_minutes = exercise_minutes + existing.reading_minutes = reading_minutes + existing.browsing_minutes = browsing_minutes + existing.audio_minutes = audio_minutes + existing.computed_at = datetime.now() + else: + new_entry = cls(year_month, exercise_minutes, reading_minutes, browsing_minutes, audio_minutes) + session.add(new_entry) + session.commit() + + @classmethod + def get_all_cached(cls): + """Get all cached months as a dict {year_month: cache_entry}.""" + entries = cls.query.all() + return {e.year_month: e for e in entries} diff --git a/zeeguu/core/model/user_word.py b/zeeguu/core/model/user_word.py index 84b9bde9..8bc778ba 100644 --- a/zeeguu/core/model/user_word.py +++ b/zeeguu/core/model/user_word.py @@ -191,10 +191,19 @@ def as_dictionary(self, schedule=None, pre_tokenized_context=None, with_context_ If not provided, will tokenize on demand (slower for batch operations). with_context_tokenized: Whether to include tokenized context (default True). Set to False for list views where tokenization isn't needed - saves ~150ms per word. + """ - # Note: Data integrity validation removed from hot path for performance - # Run periodic checks with: python -m tools._check_and_fix_data_integrity - # Write-time validation happens via SQLAlchemy event listeners + # Auto-repair: if no preferred_bookmark but bookmarks exist, set one + is_orphaned = False + if self.preferred_bookmark is None: + bookmarks = self.bookmarks() + if bookmarks: + # Auto-repair: use first bookmark as preferred + self.preferred_bookmark = bookmarks[0] + log(f"Auto-repaired UserWord {self.id}: set preferred_bookmark to {bookmarks[0].id}") + else: + # Orphaned - no bookmarks, return minimal data (word/translation only) + is_orphaned = True try: translation_word = self.meaning.translation.content @@ -240,31 +249,56 @@ def as_dictionary(self, schedule=None, pre_tokenized_context=None, with_context_ is_about_to_be_learned = None next_practice_time = None - exercise_info_dict = dict( - to=translation_word, - from_lang=self.meaning.origin.language.code, - to_lang=translation_language, - url=self.preferred_bookmark.text.url(), - origin_rank=word_rank if word_rank != 100000 else "", - article_id=( - self.preferred_bookmark.text.article_id - if self.preferred_bookmark.text.article_id - else "" - ), - source_id=self.preferred_bookmark.source_id, - fit_for_study=self.fit_for_study == 1, - level=self.level, - cooling_interval=cooling_interval_in_days, - is_last_in_cycle=is_last_in_cycle, - is_about_to_be_learned=is_about_to_be_learned, - can_update_schedule=can_update_schedule, - user_preference=self.user_preference, - consecutive_correct_answers=consecutive_correct_answers, - context_in_content=self.preferred_bookmark.text.in_content, - left_ellipsis=self.preferred_bookmark.context.left_ellipsis, - right_ellipsis=self.preferred_bookmark.context.right_ellipsis, - next_practice_time=next_practice_time, - ) + # Build exercise info - some fields require bookmark + if is_orphaned: + # Minimal data for orphaned words (no bookmark context available) + exercise_info_dict = dict( + to=translation_word, + from_lang=self.meaning.origin.language.code, + to_lang=translation_language, + url="", + origin_rank=word_rank if word_rank != 100000 else "", + article_id="", + source_id=None, + fit_for_study=False, # Orphaned words are not fit for study + level=self.level, + cooling_interval=cooling_interval_in_days, + is_last_in_cycle=is_last_in_cycle, + is_about_to_be_learned=is_about_to_be_learned, + can_update_schedule=can_update_schedule, + user_preference=self.user_preference, + consecutive_correct_answers=consecutive_correct_answers, + context_in_content=False, + left_ellipsis=False, + right_ellipsis=False, + next_practice_time=next_practice_time, + ) + else: + exercise_info_dict = dict( + to=translation_word, + from_lang=self.meaning.origin.language.code, + to_lang=translation_language, + url=self.preferred_bookmark.text.url(), + origin_rank=word_rank if word_rank != 100000 else "", + article_id=( + self.preferred_bookmark.text.article_id + if self.preferred_bookmark.text.article_id + else "" + ), + source_id=self.preferred_bookmark.source_id, + fit_for_study=self.fit_for_study == 1, + level=self.level, + cooling_interval=cooling_interval_in_days, + is_last_in_cycle=is_last_in_cycle, + is_about_to_be_learned=is_about_to_be_learned, + can_update_schedule=can_update_schedule, + user_preference=self.user_preference, + consecutive_correct_answers=consecutive_correct_answers, + context_in_content=self.preferred_bookmark.text.in_content, + left_ellipsis=self.preferred_bookmark.context.left_ellipsis, + right_ellipsis=self.preferred_bookmark.context.right_ellipsis, + next_practice_time=next_practice_time, + ) exercise_info_dict["from"] = self.meaning.origin.content @@ -295,14 +329,25 @@ def as_dictionary(self, schedule=None, pre_tokenized_context=None, with_context_ scheduling_reason = "early_practice" days_until_practice = days_diff - result = { - **self.preferred_bookmark.as_dictionary( + # Build bookmark dictionary - orphaned words get minimal placeholder + if is_orphaned: + bookmark_dict = { + "id": self.id, # Use user_word id as fallback + "context": "", + "context_tokenized": None, + } + else: + bookmark_dict = self.preferred_bookmark.as_dictionary( with_context_tokenized=with_context_tokenized, pre_tokenized_context=pre_tokenized_context - ), + ) + + result = { + **bookmark_dict, **exercise_info_dict, "user_word_id": self.id, "meaning_id": self.meaning_id, + "is_orphaned": is_orphaned, # True if no bookmark context available "is_user_added": ( self.is_user_added if self.is_user_added is not None else False ),