diff --git a/README.md b/README.md
index 37e6b66..90f6868 100644
--- a/README.md
+++ b/README.md
@@ -197,7 +197,7 @@ BioAnalyzer includes a formal validation workflow to compare automated predictio
 - **Ground truth**: Expert annotations in `feedback.csv` for the six BugSigDB fields  
 - **Predictions**: BioAnalyzer outputs in a predictions CSV (e.g. `analysis_results.csv` or `new.csv`)  
 - **Alignment**: PMIDs are aligned with `align_pmids.py`  
-- **Evaluation**: `confusion_matrix_analysis.py` computes 3-class confusion matrices (`ABSENT`, `PARTIALLY_PRESENT`, `PRESENT`) and per-field accuracy  
+- **Evaluation**: `scripts/eval/confusion_matrix_analysis.py` computes 3-class confusion matrices (`ABSENT`, `PARTIALLY_PRESENT`, `PRESENT`) and per-field accuracy  
 - **Outputs**: Metrics and PNG confusion matrices are written to `confusion_matrix_results/`
 
 For sharing/inspection, `create_validation_dataset.py` can generate a flat CSV:
diff --git a/app/utils/config.py b/app/utils/config.py
index 706f1aa..3971531 100644
--- a/app/utils/config.py
+++ b/app/utils/config.py
@@ -3,6 +3,7 @@
 import logging
 from typing import List, Optional
 from .credential_masking import mask_exception_message, mask_string
+from app.core.settings import get_settings
 
 try:
     from dotenv import load_dotenv  # type: ignore
@@ -326,5 +327,63 @@ def setup_logging() -> logging.Logger:
     return root_logger
 
 
-# Initialize logging when module is imported
 logger = setup_logging()
+
+# ---------------------------------------------------------------------------
+# Bridge to structured settings (app.core.settings)
+# This keeps app.core.settings as the single source of truth while preserving
+# existing config module attributes used across the codebase.
+# ---------------------------------------------------------------------------
+try:
+    _settings = get_settings()
+
+    # API / timeout settings
+    API_TIMEOUT = _settings.api.timeout
+    ANALYSIS_TIMEOUT = _settings.api.analysis_timeout
+    GEMINI_TIMEOUT = _settings.api.gemini_timeout
+    FRONTEND_TIMEOUT = _settings.api.frontend_timeout
+    NCBI_RATE_LIMIT_DELAY = _settings.api.ncbi_rate_limit_delay
+    MAX_CONCURRENT_REQUESTS = _settings.api.max_concurrent_requests
+
+    # LLM config
+    if _settings.llm.provider:
+        LLM_PROVIDER = _settings.llm.provider
+    if _settings.llm.model:
+        LLM_MODEL = _settings.llm.model
+
+    # RAG settings
+    RAG_SUMMARY_PROVIDER = _settings.rag.summary_provider or RAG_SUMMARY_PROVIDER
+    RAG_SUMMARY_MODEL = _settings.rag.summary_model or RAG_SUMMARY_MODEL
+    RAG_SUMMARY_LENGTH = _settings.rag.summary_length.value
+    RAG_SUMMARY_QUALITY = _settings.rag.summary_quality.value
+    RAG_RERANK_METHOD = _settings.rag.rerank_method.value
+    RAG_USE_SUMMARY_CACHE = _settings.rag.use_cache
+    RAG_MAX_SUMMARY_KEY_POINTS = _settings.rag.max_summary_key_points
+    RAG_TOP_K_CHUNKS = _settings.rag.top_k_chunks
+
+    # Cache settings
+    CACHE_VALIDITY_HOURS = _settings.cache.validity_hours
+    MAX_CACHE_SIZE = _settings.cache.max_size
+
+    # Retrieval settings
+    USE_FULLTEXT = _settings.retrieval.use_fulltext
+    if _settings.retrieval.ncbi_api_key:
+        NCBI_API_KEY = _settings.retrieval.ncbi_api_key
+    if _settings.retrieval.email:
+        EMAIL = _settings.retrieval.email
+
+    # Security / environment
+    ENVIRONMENT = _settings.environment.value
+    CORS_ORIGINS = _settings.security.cors_origins
+    ENABLE_REQUEST_ID = _settings.security.enable_request_id
+
+    # Rate limiting
+    ENABLE_RATE_LIMITING = _settings.rate_limit.enabled
+    RATE_LIMIT_PER_MINUTE = _settings.rate_limit.requests_per_minute
+
+    # Logging
+    LOG_LEVEL = _settings.logging.level.value
+except Exception:
+    # Fail gracefully; fall back to env-based values defined above
+    pass
+
diff --git a/curator_table/app.py b/curator_table/app.py
index 6c63c45..99346ca 100644
--- a/curator_table/app.py
+++ b/curator_table/app.py
@@ -3,63 +3,44 @@
 BioAnalyzer Curator Table (with column-level validation + curator ground truth)
 =============================================================================
 
-A Streamlit dashboard for real-world validation of BioAnalyzer predictions.
-
-This app supports:
-1) A sortable, searchable, filterable table of candidate curatable PubMed articles.
-2) A curator feedback workflow aligned by PMID.
-3) Column-level correctness checks for each BioAnalyzer field.
-4) Curator-provided TRUE labels (ground truth) for each field.
-5) Exportable feedback suitable for:
-   - confusion matrices
-   - per-field error analysis
-   - binary + multiclass evaluation
-   - MCC decisions on how to treat PARTIALLY_PRESENT
-
-Run:
-    streamlit run curator_table/app.py
-
-Input data:
-    CSV or Parquet with at minimum:
-        - PMID
-    Recommended:
-        - Title, Journal, Year, Summary
-    Expected BioAnalyzer outputs:
-        - Host Species Status
-        - Body Site Status
-        - Condition Status
-        - Sequencing Type Status
-        - Taxa Level Status
-        - Sample Size Status
-
-Status values expected:
-    ABSENT | PARTIALLY_PRESENT | PRESENT
-
-Feedback storage:
-    results/curator_feedback.csv
-    results/curator_feedback.parquet
-
-Design notes
-------------
-- Feedback is upserted by (PMID, curator_id) to prevent duplicates.
-- Feedback rows store BOTH:
-    (a) the BioAnalyzer predictions (pred__*)
-    (b) the curator's evaluation (col_feedback__*)
-    (c) the curator's ground truth labels (true__*)
-  This makes benchmarking reproducible even if the input dataset changes.
+Streamlit dashboard for real-world validation of BioAnalyzer predictions:
+sortable/searchable/filterable table, curator feedback by PMID, column-level
+correctness + ground truth, exportable feedback for confusion matrices and MCC.
+
+Run: streamlit run curator_table/app.py
+Input: CSV or Parquet with PMID (recommended: Title, Journal, Year, Summary).
+Status values: ABSENT | PARTIALLY_PRESENT | PRESENT.
+Feedback: results/curator_feedback.csv and .parquet (upserted by PMID + curator_id).
 """
 
 from __future__ import annotations
 
+import io
+import logging
 import os
 from pathlib import Path
-from typing import Optional, List, Dict
+from typing import Optional
 
 import pandas as pd
 import streamlit as st
 
 # -----------------------------
-# Expected prediction columns
+# Config (env overrides)
+# -----------------------------
+CONFIG = {
+    "feedback_dir": Path(os.getenv("FEEDBACK_DIR", "results")),
+    "curator_id_default": os.getenv("USER", ""),
+    "bioanalyzer_version_default": os.getenv("BIOANALYZER_VERSION", ""),
+}
+
+CONFIG["feedback_dir"].mkdir(exist_ok=True)
+FEEDBACK_CSV = CONFIG["feedback_dir"] / "curator_feedback.csv"
+FEEDBACK_PARQUET = CONFIG["feedback_dir"] / "curator_feedback.parquet"
+
+logger = logging.getLogger(__name__)
+
+# -----------------------------
+# Schema (single source of truth)
 # -----------------------------
 STATUS_COLUMNS = [
     "Host Species Status",
@@ -69,312 +50,202 @@
     "Taxa Level Status",
     "Sample Size Status",
 ]
-
-OPTIONAL_COLUMNS = [
-    "Title",
-    "Journal",
-    "Summary",
-    "Processing Time",
-    "Year",
-    "Publication Date",
+OPTIONS = {
+    "valid_states": ["ABSENT", "PARTIALLY_PRESENT", "PRESENT"],
+    "col_feedback": ["Not reviewed", "Correct", "Incorrect", "Unclear"],
+    "true_label": ["Not reviewed", "ABSENT", "PARTIALLY_PRESENT", "PRESENT"],
+}
+
+_safe = lambda col: col.replace(" ", "_")
+FEEDBACK_BASE_COLS = [
+    "PMID", "curator_id", "overall_verdict", "comment", "timestamp", "bioanalyzer_version",
 ]
+PRED_PREFIX, TRUE_PREFIX, COL_FB_PREFIX = "pred__", "true__", "col_feedback__"
 
-VALID_STATES = ["ABSENT", "PARTIALLY_PRESENT", "PRESENT"]
 
-# Column-level feedback options (curator judgement of correctness)
-COL_FEEDBACK_OPTIONS = [
-    "Not reviewed",
-    "Correct",
-    "Incorrect",
-    "Unclear",
-]
-
-# True label options (ground truth)
-TRUE_LABEL_OPTIONS = [
-    "Not reviewed",
-    "ABSENT",
-    "PARTIALLY_PRESENT",
-    "PRESENT",
-]
-
-# -----------------------------
-# Feedback persistence
-# -----------------------------
-DEFAULT_FEEDBACK_DIR = Path("results")
-DEFAULT_FEEDBACK_DIR.mkdir(exist_ok=True)
-
-FEEDBACK_CSV = DEFAULT_FEEDBACK_DIR / "curator_feedback.csv"
-FEEDBACK_PARQUET = DEFAULT_FEEDBACK_DIR / "curator_feedback.parquet"
+def _feedback_schema() -> list[str]:
+    """Full feedback column schema (dynamic from STATUS_COLUMNS)."""
+    derived = [
+        f"{PRED_PREFIX}{_safe(c)}" for c in STATUS_COLUMNS
+    ] + [
+        f"{TRUE_PREFIX}{_safe(c)}" for c in STATUS_COLUMNS
+    ] + [
+        f"{COL_FB_PREFIX}{_safe(c)}" for c in STATUS_COLUMNS
+    ]
+    return FEEDBACK_BASE_COLS + derived
 
 
 # -----------------------------
 # Helpers
 # -----------------------------
-def make_pmid_link(pmid) -> str:
-    """Return PubMed URL for a PMID."""
+def _make_pmid_link(pmid) -> str:
     try:
-        pid = str(int(float(pmid)))
-        return f"https://pubmed.ncbi.nlm.nih.gov/{pid}/"
-    except Exception:
+        return f"https://pubmed.ncbi.nlm.nih.gov/{int(float(pmid))}/"
+    except (TypeError, ValueError):
         return ""
 
 
-def safe_int(x) -> Optional[int]:
+def _safe_int(x) -> Optional[int]:
     try:
         return int(float(x))
-    except Exception:
+    except (TypeError, ValueError):
         return None
 
 
-def normalize_status_value(x: str) -> str:
-    """Normalize status values to one of ABSENT/PARTIALLY_PRESENT/PRESENT."""
+def _normalize_status(x: str) -> str:
     if pd.isna(x):
         return ""
     x = str(x).strip().upper()
-    if x in VALID_STATES:
+    if x in OPTIONS["valid_states"]:
         return x
-
-    # Common variants
     if x in {"PARTIAL", "PARTIALLY", "PARTLY"}:
         return "PARTIALLY_PRESENT"
     if x in {"YES", "TRUE"}:
         return "PRESENT"
     if x in {"NO", "FALSE"}:
         return "ABSENT"
-
     return x
 
 
-def compute_priority_score(row: pd.Series) -> float:
-    """
-    Rank candidates by how many fields are predicted present.
-
-    - PRESENT contributes 1.0
-    - PARTIALLY_PRESENT contributes 0.5
-    """
-    score = 0.0
-    for col in STATUS_COLUMNS:
-        val = str(row.get(col, "")).strip().upper()
-        if val == "PRESENT":
-            score += 1.0
-        elif val == "PARTIALLY_PRESENT":
-            score += 0.5
-    return score
-
-
-@st.cache_data(show_spinner=False)
-def load_data_from_path(path: str) -> pd.DataFrame:
-    path = Path(path)
-    if not path.exists():
-        return pd.DataFrame()
-
-    suf = path.suffix.lower()
-    if suf == ".csv":
-        return pd.read_csv(path)
-    if suf in (".parquet", ".pq"):
-        return pd.read_parquet(path)
-
-    raise ValueError(f"Unsupported format: {suf}. Use .csv or .parquet.")
+def _priority_score(row: pd.Series) -> float:
+    weights = {"PRESENT": 1.0, "PARTIALLY_PRESENT": 0.5}
+    return sum(
+        weights.get(str(row.get(col, "")).strip().upper(), 0.0)
+        for col in STATUS_COLUMNS
+    )
 
 
+# -----------------------------
+# Data loading (unified)
+# -----------------------------
 @st.cache_data(show_spinner=False)
-def load_data_from_upload(uploaded) -> pd.DataFrame:
-    if uploaded is None:
+def _load_data(source, is_path: bool) -> pd.DataFrame:
+    """Load from file path (str/Path) or uploaded file; returns empty DataFrame on failure."""
+    if source is None or (is_path and not source):
         return pd.DataFrame()
-
-    name = uploaded.name.lower()
-    if name.endswith(".csv"):
-        return pd.read_csv(uploaded)
-    if name.endswith(".parquet") or name.endswith(".pq"):
-        return pd.read_parquet(uploaded)
-
-    raise ValueError("Unsupported upload. Use .csv or .parquet.")
+    if is_path:
+        path = Path(source)
+        if not path.exists():
+            logger.warning("Path does not exist: %s", path)
+            return pd.DataFrame()
+        buf, ext = path, path.suffix.lower()
+    else:
+        name = source.name.lower()
+        buf = io.BytesIO(source.getvalue())
+        ext = ".csv" if name.endswith(".csv") else (".parquet" if name.endswith((".parquet", ".pq")) else "")
+    if ext == ".csv":
+        return pd.read_csv(buf)
+    if ext in (".parquet", ".pq"):
+        return pd.read_parquet(buf)
+    raise ValueError(f"Unsupported format. Use .csv or .parquet.")
 
 
 def normalize_dataset(df: pd.DataFrame) -> pd.DataFrame:
-    """Normalize dataset and derive helper columns."""
-    if df.empty:
-        return df
-
-    if "PMID" not in df.columns:
-        st.error("Data must contain a 'PMID' column.")
+    """Normalize PMID, statuses, derive year/link/priority. Returns empty DataFrame if invalid."""
+    if df.empty or "PMID" not in df.columns:
+        if not df.empty:
+            st.error("Data must contain a 'PMID' column.")
+        return pd.DataFrame()
+    try:
+        df = (
+            df.assign(PMID=df["PMID"].apply(_safe_int))
+            .dropna(subset=["PMID"])
+            .astype({"PMID": int})
+        )
+    except Exception as e:
+        logger.exception("PMID normalization failed: %s", e)
         return pd.DataFrame()
-
-    # Normalize PMID
-    df["PMID"] = df["PMID"].apply(safe_int)
-    df = df.dropna(subset=["PMID"]).copy()
-    df["PMID"] = df["PMID"].astype(int)
-
-    # Derive year
     if "Year" not in df.columns and "Publication Date" in df.columns:
         try:
-            df["Year"] = pd.to_datetime(df["Publication Date"], errors="coerce").dt.year
+            df = df.assign(Year=pd.to_datetime(df["Publication Date"], errors="coerce").dt.year)
         except Exception:
             pass
-
-    # Normalize statuses
     for col in STATUS_COLUMNS:
         if col in df.columns:
-            df[col] = df[col].apply(normalize_status_value)
-
-    # Priority score
-    df["Priority Score"] = df.apply(compute_priority_score, axis=1)
-
-    # PubMed link
-    df["PubMed Link"] = df["PMID"].apply(make_pmid_link)
-
+            df = df.assign(**{col: df[col].apply(_normalize_status)})
+    df = df.assign(
+        **{"Priority Score": df.apply(_priority_score, axis=1)},
+        **{"PubMed Link": df["PMID"].apply(_make_pmid_link)},
+    )
     return df
 
 
-def _safe_field_name(col: str) -> str:
-    """Convert 'Host Species Status' -> 'Host_Species_Status'."""
-    return col.replace(" ", "_")
-
-
-def _default_feedback_columns() -> List[str]:
-    """Defines the full schema for feedback rows."""
-    base = [
-        "PMID",
-        "curator_id",
-        "overall_verdict",
-        "comment",
-        "timestamp",
-        "bioanalyzer_version",
-    ]
-
-    # Predicted statuses
-    pred_cols = []
-    for col in STATUS_COLUMNS:
-        safe = _safe_field_name(col)
-        pred_cols.append(f"pred__{safe}")
-
-    # True labels (curator ground truth)
-    true_cols = []
-    for col in STATUS_COLUMNS:
-        safe = _safe_field_name(col)
-        true_cols.append(f"true__{safe}")
-
-    # Column-level feedback (Correct/Incorrect/etc.)
-    col_feedback_cols = []
-    for col in STATUS_COLUMNS:
-        safe = _safe_field_name(col)
-        col_feedback_cols.append(f"col_feedback__{safe}")
-
-    return base + pred_cols + true_cols + col_feedback_cols
-
-
+# -----------------------------
+# Feedback persistence
+# -----------------------------
 def load_feedback() -> pd.DataFrame:
-    """Load feedback from parquet/csv or return empty with schema."""
-    if FEEDBACK_PARQUET.exists():
-        try:
-            df = pd.read_parquet(FEEDBACK_PARQUET)
-            return df
-        except Exception:
-            pass
-
-    if FEEDBACK_CSV.exists():
-        try:
-            df = pd.read_csv(FEEDBACK_CSV)
-            return df
-        except Exception:
-            pass
-
-    return pd.DataFrame(columns=_default_feedback_columns())
+    """Load feedback from parquet then csv; empty DataFrame with schema if missing."""
+    for path, reader in [(FEEDBACK_PARQUET, pd.read_parquet), (FEEDBACK_CSV, pd.read_csv)]:
+        if path.exists():
+            try:
+                return reader(path)
+            except Exception as e:
+                logger.warning("Failed to load %s: %s", path, e)
+    return pd.DataFrame(columns=_feedback_schema())
 
 
 def save_feedback(df: pd.DataFrame) -> None:
-    """Save feedback in CSV and optionally parquet."""
-    DEFAULT_FEEDBACK_DIR.mkdir(exist_ok=True)
-
-    # Ensure schema
-    for col in _default_feedback_columns():
+    """Persist feedback to CSV and Parquet; ensure schema."""
+    CONFIG["feedback_dir"].mkdir(exist_ok=True)
+    for col in _feedback_schema():
         if col not in df.columns:
             df[col] = ""
-
     df.to_csv(FEEDBACK_CSV, index=False)
-
     try:
         df.to_parquet(FEEDBACK_PARQUET, index=False)
-    except Exception:
-        # Parquet may fail if pyarrow isn't installed
-        pass
+    except Exception as e:
+        logger.warning("Parquet save skipped: %s", e)
 
 
-def upsert_feedback(existing: pd.DataFrame, row: Dict) -> pd.DataFrame:
+def upsert_feedback(existing: pd.DataFrame, row: dict) -> pd.DataFrame:
     """Upsert by PMID + curator_id."""
-    if existing.empty:
-        return pd.DataFrame([row])
-
-    # Ensure schema
-    for col in _default_feedback_columns():
+    for col in _feedback_schema():
         if col not in existing.columns:
             existing[col] = ""
-
-    mask = (existing["PMID"].astype(str) == str(row["PMID"])) & (
-        existing["curator_id"].astype(str) == str(row["curator_id"])
-    )
-
-    if mask.any():
-        for k, v in row.items():
-            existing.loc[mask, k] = v
-        return existing
-
+    if not existing.empty:
+        mask = (
+            (existing["PMID"].astype(str) == str(row["PMID"]))
+            & (existing["curator_id"].astype(str) == str(row["curator_id"]))
+        )
+        if mask.any():
+            for k, v in row.items():
+                existing.loc[mask, k] = v
+            return existing
     return pd.concat([existing, pd.DataFrame([row])], ignore_index=True)
 
 
 # -----------------------------
-# UI rendering
+# UI
 # -----------------------------
 def render_filters(df: pd.DataFrame) -> pd.DataFrame:
     st.sidebar.header("Filters")
-
     search = st.sidebar.text_input(
         "Search (PMID, title, journal, summary)",
         placeholder="e.g. obesity, 2019, Lactobacillus",
     ).strip().lower()
-
-    status_filters = {}
-    for col in STATUS_COLUMNS:
-        if col in df.columns:
-            status_filters[col] = st.sidebar.multiselect(
-                col,
-                options=VALID_STATES,
-                default=[],
-            )
-
+    status_filters = {
+        col: st.sidebar.multiselect(col, options=OPTIONS["valid_states"], default=[])
+        for col in STATUS_COLUMNS
+        if col in df.columns
+    }
     year_range = None
     if "Year" in df.columns:
         years = df["Year"].dropna()
         if not years.empty:
-            years = years.astype(int)
             min_y, max_y = int(years.min()), int(years.max())
-            year_range = st.sidebar.slider(
-                "Year range",
-                min_value=min_y,
-                max_value=max_y,
-                value=(min_y, max_y),
-            )
-
+            year_range = st.sidebar.slider("Year range", min_y, max_y, (min_y, max_y))
     out = df.copy()
-
-    # Search
     if search:
-        mask = pd.Series(False, index=out.index)
-        mask |= out["PMID"].astype(str).str.contains(search, na=False)
+        mask = out["PMID"].astype(str).str.contains(search, na=False)
         for col in ["Title", "Journal", "Summary"]:
             if col in out.columns:
-                mask |= out[col].astype(str).str.lower().str.contains(search, na=False)
+                mask = mask | out[col].astype(str).str.lower().str.contains(search, na=False)
         out = out.loc[mask]
-
-    # Status filters
     for col, allowed in status_filters.items():
         if allowed:
             out = out[out[col].isin(allowed)]
-
-    # Year filter
     if year_range and "Year" in out.columns:
         out = out[(out["Year"] >= year_range[0]) & (out["Year"] <= year_range[1])]
-
     return out
 
 
@@ -382,56 +253,30 @@ def render_table(df: pd.DataFrame) -> Optional[int]:
     if df.empty:
         st.warning("No rows match your filters.")
         return None
-
     st.subheader("Candidate curatable articles")
     st.caption("Tip: Sort by Priority Score to review the most promising candidates first.")
-
     sort_options = ["Priority Score", "PMID"]
     if "Title" in df.columns:
         sort_options.append("Title")
-    for c in STATUS_COLUMNS:
-        if c in df.columns:
-            sort_options.append(c)
-
+    sort_options.extend(c for c in STATUS_COLUMNS if c in df.columns)
     sort_col = st.selectbox("Sort by", options=sort_options, index=0)
     ascending = st.checkbox("Ascending", value=False)
-
     if sort_col in df.columns:
         df = df.sort_values(by=sort_col, ascending=ascending, na_position="last")
-
     st.divider()
     max_rows = st.slider("Rows to display", 50, 2000, 300, 50)
     df_show = df.head(max_rows).copy()
-
-    display_cols: List[str] = ["PMID"]
-    if "PubMed Link" in df_show.columns:
-        display_cols.append("PubMed Link")
-
-    for c in ["Priority Score", "Title", "Journal", "Year"]:
-        if c in df_show.columns:
-            display_cols.append(c)
-
-    for c in STATUS_COLUMNS:
-        if c in df_show.columns:
-            display_cols.append(c)
-
-    if "Summary" in df_show.columns:
-        display_cols.append("Summary")
-
+    want = ["PMID", "PubMed Link", "Priority Score", "Title", "Journal", "Year"] + list(STATUS_COLUMNS) + ["Summary"]
+    display_cols = [c for c in want if c in df_show.columns]
     df_show = df_show[display_cols]
-
     st.dataframe(
         df_show,
         use_container_width=True,
         height=650,
-        column_config={
-            "PubMed Link": st.column_config.LinkColumn("PubMed", display_text="Open"),
-        },
+        column_config={"PubMed Link": st.column_config.LinkColumn("PubMed", display_text="Open")},
     )
-
     st.metric("Rows after filtering", len(df))
     st.metric("Rows displayed", len(df_show))
-
     st.divider()
     st.subheader("Quick select for feedback")
     selected = st.selectbox(
@@ -439,127 +284,89 @@ def render_table(df: pd.DataFrame) -> Optional[int]:
         options=[""] + df_show["PMID"].astype(str).tolist(),
         index=0,
     )
-    if selected:
-        return int(selected)
-    return None
-
-
-def render_column_level_validation(selected_row: pd.Series) -> Dict[str, str]:
-    """
-    Render per-column correctness + curator true label UI.
-    Returns a dict with keys:
-      - col_feedback__X
-      - true__X
-    """
-    st.markdown("### Field-by-field validation (ground truth)")
+    return int(selected) if selected else None
+
 
+def render_column_level_validation(selected_row: pd.Series) -> dict[str, str]:
+    """Per-column correctness + curator true label UI. Returns col_feedback__* and true__*."""
+    st.markdown("### Field-by-field validation (ground truth)")
     st.caption(
-        "For each field, provide the curator TRUE label (ground truth). "
-        "Optionally also mark whether BioAnalyzer's predicted status was correct."
+        "For each field, provide the curator TRUE label. "
+        "Optionally mark whether BioAnalyzer's predicted status was correct."
     )
-
     out = {}
-
     left, right = st.columns(2)
-    halves = [STATUS_COLUMNS[:3], STATUS_COLUMNS[3:]]
-
-    for pane, cols in zip([left, right], halves):
+    for pane, cols in zip([left, right], [STATUS_COLUMNS[:3], STATUS_COLUMNS[3:]]):
         with pane:
             for col in cols:
                 if col not in selected_row.index:
                     continue
-
-                safe = _safe_field_name(col)
+                safe = _safe(col)
                 pred = str(selected_row.get(col, "")).strip()
                 label = col.replace(" Status", "")
-
                 st.markdown(f"**{label}**")
                 st.write(f"BioAnalyzer predicted: `{pred}`")
-
-                # Curator true label
-                true_key = f"true__{safe}"
-                true_choice = st.selectbox(
+                true_key = f"{TRUE_PREFIX}{safe}"
+                out[true_key] = st.selectbox(
                     f"Curator TRUE label for {label}",
-                    options=TRUE_LABEL_OPTIONS,
+                    options=OPTIONS["true_label"],
                     index=0,
                     key=f"ui__{true_key}",
                 )
-                out[true_key] = true_choice
-
-                # Optional correctness judgement
-                fb_key = f"col_feedback__{safe}"
-                fb_choice = st.selectbox(
+                fb_key = f"{COL_FB_PREFIX}{safe}"
+                out[fb_key] = st.selectbox(
                     f"Was BioAnalyzer correct for {label}?",
-                    options=COL_FEEDBACK_OPTIONS,
+                    options=OPTIONS["col_feedback"],
                     index=0,
                     key=f"ui__{fb_key}",
                 )
-                out[fb_key] = fb_choice
-
                 st.divider()
-
     return out
 
 
 def render_feedback_section(selected_pmid: Optional[int], dataset_df: pd.DataFrame) -> None:
     st.subheader("Curator feedback")
     st.caption("Feedback is stored locally in results/. Entries are upserted by PMID + curator_id.")
-
     feedback_df = load_feedback()
-
     selected_row = None
-    title_prefill = ""
-
     if selected_pmid is not None:
         try:
             selected_row = dataset_df.loc[dataset_df["PMID"] == selected_pmid].iloc[0]
         except Exception:
             selected_row = None
-
-    if selected_row is not None and "Title" in selected_row.index:
-        title_prefill = str(selected_row.get("Title", ""))
+    title_prefill = str(selected_row.get("Title", "")) if selected_row is not None and "Title" in selected_row.index else ""
 
     with st.form("feedback_form", clear_on_submit=False):
         curator_id = st.text_input(
             "Curator ID / initials",
-            value=os.getenv("USER", ""),
+            value=CONFIG["curator_id_default"],
             placeholder="e.g. Ronald Ouma",
         ).strip()
-
         fb_pmid = st.text_input(
             "PMID",
             value=str(selected_pmid) if selected_pmid else "",
             placeholder="e.g. 31215600",
         ).strip()
-
         if title_prefill:
             st.write(f"**Title:** {title_prefill}")
-
         overall_verdict = st.selectbox(
             "Overall paper verdict",
             options=["Curatable", "Not curatable", "Uncertain", "Not reviewed"],
             index=0,
         )
-
         comment = st.text_area(
             "Comment (optional)",
             placeholder="Evidence, edge case, missing field, false positive reason, etc.",
             height=90,
         )
-
         bioanalyzer_version = st.text_input(
             "BioAnalyzer version (recommended)",
-            value=os.getenv("BIOANALYZER_VERSION", ""),
+            value=CONFIG["bioanalyzer_version_default"],
             placeholder="e.g. 1.0.0, commit SHA, docker tag",
         ).strip()
-
-        # Field-by-field validation
-        field_validation = {}
-        if selected_row is not None:
-            field_validation = render_column_level_validation(selected_row)
-        else:
+        field_validation = render_column_level_validation(selected_row) if selected_row is not None else {}
+        if selected_row is None:
             st.info("Select a PMID above to enable field-level validation.")
-
         submitted = st.form_submit_button("Save feedback")
 
         if submitted:
@@ -569,12 +376,12 @@ def render_feedback_section(selected_pmid: Optional[int], dataset_df: pd.DataFra
             if not fb_pmid:
                 st.error("Please provide a PMID.")
                 return
-
-            pid = safe_int(fb_pmid)
+            pid = _safe_int(fb_pmid)
             if pid is None:
                 st.error("PMID must be numeric.")
                 return
-
+            if selected_pmid is not None and pid != selected_pmid and pid not in dataset_df["PMID"].values:
+                st.warning("PMID not in current dataset; feedback will still be saved.")
             row = {
                 "PMID": int(pid),
                 "curator_id": curator_id,
@@ -583,62 +390,33 @@ def render_feedback_section(selected_pmid: Optional[int], dataset_df: pd.DataFra
                 "timestamp": pd.Timestamp.now(tz="UTC").isoformat(),
                 "bioanalyzer_version": bioanalyzer_version,
             }
-
-            # Always store predictions into feedback (reproducibility)
-            for col in STATUS_COLUMNS:
-                safe = _safe_field_name(col)
-                pred_key = f"pred__{safe}"
-
-                if selected_row is not None and col in selected_row.index:
-                    row[pred_key] = str(selected_row.get(col, "")).strip()
-                else:
-                    row[pred_key] = ""
-
-            # Store curator true labels + correctness
             for col in STATUS_COLUMNS:
-                safe = _safe_field_name(col)
-
-                true_key = f"true__{safe}"
-                fb_key = f"col_feedback__{safe}"
-
-                row[true_key] = field_validation.get(true_key, "Not reviewed")
-                row[fb_key] = field_validation.get(fb_key, "Not reviewed")
-
+                s = _safe(col)
+                row[f"{PRED_PREFIX}{s}"] = (
+                    str(selected_row.get(col, "")).strip()
+                    if selected_row is not None and col in selected_row.index else ""
+                )
+                row[f"{TRUE_PREFIX}{s}"] = field_validation.get(f"{TRUE_PREFIX}{s}", "Not reviewed")
+                row[f"{COL_FB_PREFIX}{s}"] = field_validation.get(f"{COL_FB_PREFIX}{s}", "Not reviewed")
             feedback_df = upsert_feedback(feedback_df, row)
             save_feedback(feedback_df)
-
+            logger.info("Saved feedback for PMID %s (curator=%s)", pid, curator_id)
             st.success(f"Saved feedback for PMID {pid} (curator={curator_id}).")
 
     st.divider()
     st.subheader("Existing feedback")
-
     if feedback_df.empty:
         st.info("No feedback recorded yet.")
         return
-
-    compact_cols = [
-        "PMID",
-        "curator_id",
-        "overall_verdict",
-        "timestamp",
-        "bioanalyzer_version",
+    compact_cols = [c for c in FEEDBACK_BASE_COLS if c in feedback_df.columns] + [
+        k for c in STATUS_COLUMNS for k in (f"{PRED_PREFIX}{_safe(c)}", f"{TRUE_PREFIX}{_safe(c)}", f"{COL_FB_PREFIX}{_safe(c)}")
+        if k in feedback_df.columns
     ]
-
-    # Add prediction + true + feedback columns in a readable order
-    for col in STATUS_COLUMNS:
-        safe = _safe_field_name(col)
-        for k in [f"pred__{safe}", f"true__{safe}", f"col_feedback__{safe}"]:
-            if k in feedback_df.columns:
-                compact_cols.append(k)
-
-    compact_cols = [c for c in compact_cols if c in feedback_df.columns]
-
     st.dataframe(
         feedback_df.sort_values("timestamp", ascending=False)[compact_cols],
         use_container_width=True,
         height=380,
     )
-
     st.download_button(
         "Download feedback CSV",
         data=feedback_df.to_csv(index=False),
@@ -647,103 +425,64 @@ def render_feedback_section(selected_pmid: Optional[int], dataset_df: pd.DataFra
     )
 
 
-# -----------------------------
-# Main
-# -----------------------------
-def main():
+def main() -> None:
     st.set_page_config(page_title="BioAnalyzer Curator Table", layout="wide")
     st.title("BioAnalyzer Curator Table")
-
     st.markdown(
         """
-This dashboard provides a **sortable, searchable, filterable** table of BioAnalyzer predictions for
-candidate curatable PubMed articles.
-
-### Why this is useful
-- Lets curators review predictions in a real-world workflow
-- Captures feedback aligned by PMID
-- Stores:
-  - BioAnalyzer predicted statuses
-  - curator ground truth statuses
-  - curator correctness flags
-
-This makes the exported feedback suitable for:
-- confusion matrices to evaluate accuracy 
-- per-field error profiling
-- MCC decisions for PARTIALLY_PRESENT
+This dashboard provides a **sortable, searchable, filterable** table of BioAnalyzer predictions.
+
+- Curators review predictions and capture feedback by PMID.
+- Stored: predicted statuses, curator ground truth, correctness flags.
+- Export suitable for confusion matrices, per-field error profiling, MCC for PARTIALLY_PRESENT.
         """
     )
-
     st.sidebar.header("Data source")
-
     data_source = st.sidebar.radio(
         "Choose input mode",
         options=["Upload CSV/Parquet", "Use file path"],
         index=0,
     )
-
     raw_df = pd.DataFrame()
-
     if data_source == "Upload CSV/Parquet":
-        uploaded = st.sidebar.file_uploader(
-            "Upload dataset",
-            type=["csv", "parquet", "pq"],
-        )
+        uploaded = st.sidebar.file_uploader("Upload dataset", type=["csv", "parquet", "pq"])
         if uploaded:
             try:
-                raw_df = load_data_from_upload(uploaded)
+                raw_df = _load_data(uploaded, is_path=False)
             except Exception as e:
                 st.error(f"Could not load file: {e}")
+                logger.exception("Upload load failed")
                 return
     else:
-        path = st.sidebar.text_input(
-            "Path to CSV/Parquet",
-            placeholder="e.g. analysis_results.csv",
-        ).strip()
+        path = st.sidebar.text_input("Path to CSV/Parquet", placeholder="e.g. analysis_results.csv").strip()
         if path:
             try:
-                raw_df = load_data_from_path(path)
+                raw_df = _load_data(path, is_path=True)
             except Exception as e:
                 st.error(str(e))
                 return
-
     if raw_df.empty:
         st.info("Upload a dataset or provide a file path to begin.")
         st.stop()
-
     df = normalize_dataset(raw_df)
-
     if df.empty:
         st.error("Dataset loaded, but no valid rows found after normalization.")
         st.stop()
-
     missing = [c for c in STATUS_COLUMNS if c not in df.columns]
     if missing:
         st.warning(
-            "Some expected status columns are missing. "
-            "Priority scoring and filtering will be partial.\n\n"
-            f"Missing columns: {missing}"
+            "Some expected status columns are missing. Priority and filtering will be partial.\n\n"
+            f"Missing: {missing}"
         )
-
     filtered_df = render_filters(df)
     selected_pmid = render_table(filtered_df)
-
     st.divider()
     render_feedback_section(selected_pmid, filtered_df)
-
     st.sidebar.divider()
     st.sidebar.header("Notes")
     st.sidebar.markdown(
-        f"""
-Feedback files are saved to:
-
-- `{FEEDBACK_CSV}`
-- `{FEEDBACK_PARQUET}` (if parquet supported)
-
-Tip: set an environment variable to track versions:
-
-- `BIOANALYZER_VERSION=commit_sha`
-        """
+        f"Feedback: `{FEEDBACK_CSV}` and `{FEEDBACK_PARQUET}`. "
+        "Tip: set `BIOANALYZER_VERSION` or `FEEDBACK_DIR` in environment."
     )
 
 
diff --git a/docker-setup.sh b/docker-setup.sh
index 934005e..8164653 100755
--- a/docker-setup.sh
+++ b/docker-setup.sh
@@ -68,7 +68,7 @@ fi
 print_status "Testing Docker image..."
 
 # Test the image with a simple command
-docker run --rm bioanalyzer-package python test_cli.py
+docker run --rm bioanalyzer-package python scripts/dev/test_cli.py
 
 if [ $? -eq 0 ]; then
     print_success "Docker image test passed!"
diff --git a/docs/CURATOR_TABLE_DESIGN.md b/docs/CURATOR_TABLE_DESIGN.md
index a1bd351..9856087 100644
--- a/docs/CURATOR_TABLE_DESIGN.md
+++ b/docs/CURATOR_TABLE_DESIGN.md
@@ -92,4 +92,4 @@ We can start with **PMID, Title, Year, Journal, the 6 field statuses, and one co
 
 - **Design:** `docs/CURATOR_TABLE_DESIGN.md` (this file).  
 - **App:** `curator_table/` (Streamlit app + README).  
-- **Data format:** Same as existing BioAnalyzer export (e.g. `analysis_results.csv` / validation dataset shape); see `create_validation_dataset.py` and `confusion_matrix_analysis.py` for column names.
+- **Data format:** Same as existing BioAnalyzer export (e.g. `analysis_results.csv` / validation dataset shape); see `create_validation_dataset.py` and `scripts/eval/confusion_matrix_analysis.py` for column names.
diff --git a/run_confusion_analysis.sh b/run_confusion_analysis.sh
index 30defd0..99ffee5 100755
--- a/run_confusion_analysis.sh
+++ b/run_confusion_analysis.sh
@@ -55,7 +55,7 @@ docker run --rm \
     -v "$SCRIPT_DIR:/app" \
     -w /app \
     bioanalyzer-package \
-    python confusion_matrix_analysis.py "$PREDICTIONS_FILE" "$FEEDBACK_FILE"
+    python scripts/eval/confusion_matrix_analysis.py "$PREDICTIONS_FILE" "$FEEDBACK_FILE"
 
 echo ""
 echo "=========================================="
diff --git a/scripts/dev/test_cli.py b/scripts/dev/test_cli.py
new file mode 100644
index 0000000..f60d6bb
--- /dev/null
+++ b/scripts/dev/test_cli.py
@@ -0,0 +1,124 @@
+#!/usr/bin/env python3
+"""
+BioAnalyzer CLI Test - Simple test without dependencies
+"""
+
+
+def test_cli_structure():
+    """Test that the CLI structure is correct."""
+    print("🧪 Testing BioAnalyzer Package CLI Structure...")
+
+    # Test CLI file exists
+    import os
+
+    cli_path = "cli.py"
+    if os.path.exists(cli_path):
+        print("✅ CLI file exists")
+    else:
+        print("❌ CLI file missing")
+        return False
+
+    # Test main.py exists
+    main_path = "main.py"
+    if os.path.exists(main_path):
+        print("✅ Main.py file exists")
+    else:
+        print("❌ Main.py file missing")
+        return False
+
+    # Test app directory structure
+    app_dirs = ["app", "app/api", "app/services", "app/models", "app/utils"]
+    for dir_path in app_dirs:
+        if os.path.exists(dir_path):
+            print(f"✅ {dir_path} directory exists")
+        else:
+            print(f"❌ {dir_path} directory missing")
+            return False
+
+    # Test configuration files
+    config_files = ["config/requirements.txt", "setup.py", "README.md"]
+    for config_file in config_files:
+        if os.path.exists(config_file):
+            print(f"✅ {config_file} exists")
+        else:
+            print(f"❌ {config_file} missing")
+            return False
+
+    print("\n🎉 All structure tests passed!")
+    return True
+
+
+def test_field_info():
+    """Test field information display."""
+    print("\n📋 BugSigDB Essential Fields:")
+    print("=" * 50)
+
+    fields = {
+        "host_species": {
+            "name": "Host Species",
+            "description": "The host organism being studied (e.g., Human, Mouse, Rat)",
+            "required": True,
+        },
+        "body_site": {
+            "name": "Body Site",
+            "description": "Where the microbiome sample was collected (e.g., Gut, Oral, Skin)",
+            "required": True,
+        },
+        "condition": {
+            "name": "Condition",
+            "description": "What disease, treatment, or exposure is being studied",
+            "required": True,
+        },
+        "sequencing_type": {
+            "name": "Sequencing Type",
+            "description": "What molecular method was used (e.g., 16S, metagenomics)",
+            "required": True,
+        },
+        "taxa_level": {
+            "name": "Taxa Level",
+            "description": "What taxonomic level was analyzed (e.g., phylum, genus, species)",
+            "required": True,
+        },
+        "sample_size": {
+            "name": "Sample Size",
+            "description": "Number of samples or participants analyzed",
+            "required": True,
+        },
+    }
+
+    for field_key, field_info in fields.items():
+        print(f"\n{field_info['name']} ({field_key}):")
+        print(f"  Description: {field_info['description']}")
+        print(f"  Required: {'Yes' if field_info['required'] else 'No'}")
+
+    print("\n\nField Status Values:")
+    print("-" * 30)
+    status_values = {
+        "PRESENT": "Information is complete and clear",
+        "PARTIALLY_PRESENT": "Some information available but incomplete",
+        "ABSENT": "Information is missing",
+    }
+
+    for status, description in status_values.items():
+        print(f"  {status}: {description}")
+
+    print("\n" + "=" * 50)
+
+
+if __name__ == "__main__":
+    print("🚀 BioAnalyzer Package CLI Test")
+    print("=" * 40)
+
+    # Test structure
+    if test_cli_structure():
+        # Test field info
+        test_field_info()
+
+        print("\n✅ Package CLI structure is correct!")
+        print("📝 To use the CLI, install dependencies:")
+        print("   pip install -r config/requirements.txt")
+        print("   python3 cli.py fields")
+        print("   python3 cli.py analyze 12345678")
+    else:
+        print("\n❌ Package CLI structure has issues!")
+
diff --git a/confusion_matrix_analysis.py b/scripts/eval/confusion_matrix_analysis.py
similarity index 99%
rename from confusion_matrix_analysis.py
rename to scripts/eval/confusion_matrix_analysis.py
index 6feb7ea..102ac86 100644
--- a/confusion_matrix_analysis.py
+++ b/scripts/eval/confusion_matrix_analysis.py
@@ -248,3 +248,4 @@ def main():
 
 if __name__ == "__main__":
     main()
+
diff --git a/scripts/ops/log_cleanup.py b/scripts/ops/log_cleanup.py
new file mode 100644
index 0000000..14717e2
--- /dev/null
+++ b/scripts/ops/log_cleanup.py
@@ -0,0 +1,212 @@
+#!/usr/bin/env python3
+"""
+Archived: Log Cleanup for BioAnalyzer
+=====================================
+
+This script helps manage log files by cleaning up old entries and rotating logs.
+Moved to scripts/archive/ to indicate it's a dev/ops utility and not part of the
+core backend runtime.
+"""
+
+import os
+import sys
+import argparse
+from pathlib import Path
+from datetime import datetime, timedelta
+import shutil
+
+
+class LogCleanup:
+    """Log file cleanup and management."""
+
+    def __init__(self, log_dir="logs"):
+        self.log_dir = Path(log_dir)
+        self.log_files = {
+            "main": self.log_dir / "bioanalyzer.log",
+            "performance": self.log_dir / "performance.log",
+            "errors": self.log_dir / "errors.log",
+            "api": self.log_dir / "api_calls.log",
+        }
+
+    def cleanup_old_logs(self, days=7):
+        """Remove log files older than specified days."""
+        cutoff_date = datetime.now() - timedelta(days=days)
+        removed_count = 0
+
+        print(f"🧹 Cleaning up logs older than {days} days...")
+
+        for log_file in self.log_dir.glob("*.log.*"):
+            try:
+                # Check if it's a rotated log file
+                if log_file.name.endswith((".1", ".2", ".3", ".4", ".5")):
+                    # Get file modification time
+                    mtime = datetime.fromtimestamp(log_file.stat().st_mtime)
+                    if mtime < cutoff_date:
+                        log_file.unlink()
+                        print(f"✅ Removed old log: {log_file.name}")
+                        removed_count += 1
+            except Exception as e:
+                print(f"❌ Error removing {log_file.name}: {e}")
+
+        print(f"✅ Cleanup complete. Removed {removed_count} old log files.")
+
+    def rotate_logs(self):
+        """Manually rotate log files."""
+        print("🔄 Rotating log files...")
+
+        for log_type, log_file in self.log_files.items():
+            if not log_file.exists():
+                continue
+
+            try:
+                # Create backup with timestamp
+                timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
+                backup_name = f"{log_file.stem}_{timestamp}{log_file.suffix}"
+                backup_path = self.log_dir / backup_name
+
+                # Copy current log to backup
+                shutil.copy2(log_file, backup_path)
+
+                # Clear current log
+                with open(log_file, "w") as f:
+                    f.write("")
+
+                print(f"✅ Rotated {log_type}: {backup_name}")
+
+            except Exception as e:
+                print(f"❌ Error rotating {log_type}: {e}")
+
+    def compress_logs(self):
+        """Compress old log files to save space."""
+        import gzip
+
+        print("🗜️  Compressing old log files...")
+        compressed_count = 0
+
+        for log_file in self.log_dir.glob("*.log.*"):
+            if log_file.name.endswith((".1", ".2", ".3", ".4", ".5")):
+                try:
+                    # Skip already compressed files
+                    if log_file.suffix == ".gz":
+                        continue
+
+                    # Compress file
+                    with open(log_file, "rb") as f_in:
+                        compressed_path = log_file.with_suffix(log_file.suffix + ".gz")
+                        with gzip.open(compressed_path, "wb") as f_out:
+                            shutil.copyfileobj(f_in, f_out)
+
+                    # Remove original file
+                    log_file.unlink()
+                    print(f"✅ Compressed: {log_file.name} -> {compressed_path.name}")
+                    compressed_count += 1
+
+                except Exception as e:
+                    print(f"❌ Error compressing {log_file.name}: {e}")
+
+        print(f"✅ Compression complete. Compressed {compressed_count} files.")
+
+    def show_log_info(self):
+        """Show information about log files."""
+        print("📊 LOG FILES INFORMATION")
+        print("=" * 50)
+
+        total_size = 0
+
+        for log_type, log_file in self.log_files.items():
+            if log_file.exists():
+                size = log_file.stat().st_size
+                size_mb = size / (1024 * 1024)
+                total_size += size
+
+                # Count lines
+                try:
+                    with open(log_file, "r", encoding="utf-8") as f:
+                        line_count = sum(1 for _ in f)
+                except:
+                    line_count = 0
+
+                print(
+                    f"{log_type.upper():12} | {size_mb:6.2f} MB | {line_count:8d} lines"
+                )
+            else:
+                print(f"{log_type.upper():12} | Not found")
+
+        # Check for rotated logs
+        rotated_logs = list(self.log_dir.glob("*.log.*"))
+        if rotated_logs:
+            print(f"\n🔄 ROTATED LOGS ({len(rotated_logs)} files):")
+            for log_file in sorted(rotated_logs):
+                size = log_file.stat().st_size
+                size_kb = size / 1024
+                mtime = datetime.fromtimestamp(log_file.stat().st_mtime)
+                age = datetime.now() - mtime
+
+                if age.days > 0:
+                    age_str = f"{age.days}d ago"
+                else:
+                    age_str = f"{age.seconds // 3600}h ago"
+
+                print(f"  {log_file.name:30} | {size_kb:6.1f} KB | {age_str}")
+
+        print(f"\n💾 Total log size: {total_size / (1024 * 1024):.2f} MB")
+
+    def reset_logs(self, confirm=True):
+        """Reset all log files (clear content)."""
+        if confirm:
+            response = input("Are you sure you want to reset ALL log files? (y/N): ")
+            if response.lower() != "y":
+                print("Log reset cancelled.")
+                return
+
+        print("🔄 Resetting all log files...")
+
+        for log_type, log_file in self.log_files.items():
+            if log_file.exists():
+                try:
+                    with open(log_file, "w") as f:
+                        f.write("")
+                    print(f"✅ Reset {log_type} log")
+                except Exception as e:
+                    print(f"❌ Error resetting {log_type}: {e}")
+
+        print("✅ All log files have been reset.")
+
+
+def main():
+    parser = argparse.ArgumentParser(description="BioAnalyzer Log Cleanup (Archived)")
+    parser.add_argument(
+        "--cleanup", type=int, metavar="DAYS", help="Clean up logs older than DAYS"
+    )
+    parser.add_argument("--rotate", action="store_true", help="Rotate log files")
+    parser.add_argument(
+        "--compress", action="store_true", help="Compress old log files"
+    )
+    parser.add_argument("--info", action="store_true", help="Show log file information")
+    parser.add_argument("--reset", action="store_true", help="Reset all log files")
+    parser.add_argument(
+        "--logs", default="logs", help="Log directory path (default: logs)"
+    )
+
+    args = parser.parse_args()
+
+    cleanup = LogCleanup(args.logs)
+
+    if args.cleanup:
+        cleanup.cleanup_old_logs(args.cleanup)
+    elif args.rotate:
+        cleanup.rotate_logs()
+    elif args.compress:
+        cleanup.compress_logs()
+    elif args.info:
+        cleanup.show_log_info()
+    elif args.reset:
+        cleanup.reset_logs()
+    else:
+        # Default: show info
+        cleanup.show_log_info()
+
+
+if __name__ == "__main__":
+    main()
+
diff --git a/scripts/ops/log_dashboard.py b/scripts/ops/log_dashboard.py
new file mode 100644
index 0000000..4b0d0d8
--- /dev/null
+++ b/scripts/ops/log_dashboard.py
@@ -0,0 +1,254 @@
+#!/usr/bin/env python3
+"""
+Archived: Log Dashboard for BioAnalyzer
+=======================================
+
+A simple dashboard to monitor logs in real-time with performance metrics.
+Moved to scripts/archive/ to indicate it's a dev/ops utility and not part of the
+core backend runtime.
+"""
+
+import os
+import sys
+import time
+import json
+from pathlib import Path
+from datetime import datetime, timedelta
+import argparse
+import re
+
+
+class LogDashboard:
+    """Simple log monitoring dashboard."""
+
+    def __init__(self, log_dir="logs"):
+        self.log_dir = Path(log_dir)
+        self.performance_log = self.log_dir / "performance.log"
+        self.error_log = self.log_dir / "errors.log"
+        self.main_log = self.log_dir / "bioanalyzer.log"
+
+        # Statistics
+        self.stats = {
+            "total_queries": 0,
+            "successful_queries": 0,
+            "failed_queries": 0,
+            "cached_queries": 0,
+            "avg_response_time": 0,
+            "errors": [],
+            "recent_activity": [],
+        }
+
+        # Track file positions
+        self.file_positions = {}
+        for log_file in [self.performance_log, self.error_log, self.main_log]:
+            if log_file.exists():
+                self.file_positions[log_file] = log_file.stat().st_size
+            else:
+                self.file_positions[log_file] = 0
+
+    def update_stats(self):
+        """Update statistics from log files."""
+        self._update_performance_stats()
+        self._update_error_stats()
+        self._update_recent_activity()
+
+    def _update_performance_stats(self):
+        """Update performance statistics."""
+        if not self.performance_log.exists():
+            return
+
+        try:
+            with open(self.performance_log, "r", encoding="utf-8") as f:
+                lines = f.readlines()
+
+            # Reset counters
+            self.stats["total_queries"] = 0
+            self.stats["successful_queries"] = 0
+            self.stats["failed_queries"] = 0
+            self.stats["cached_queries"] = 0
+            response_times = []
+
+            for line in lines:
+                if "PMID_QUERY_END" in line:
+                    self.stats["total_queries"] += 1
+
+                    # Parse status
+                    if "Status: SUCCESS" in line:
+                        self.stats["successful_queries"] += 1
+                    elif "Status: FAILED" in line:
+                        self.stats["failed_queries"] += 1
+
+                    # Parse cache status
+                    if "Cache: CACHED" in line:
+                        self.stats["cached_queries"] += 1
+
+                    # Parse duration
+                    duration_match = re.search(r"Duration: ([\d.]+)s", line)
+                    if duration_match:
+                        response_times.append(float(duration_match.group(1)))
+
+            # Calculate average response time
+            if response_times:
+                self.stats["avg_response_time"] = sum(response_times) / len(
+                    response_times
+                )
+
+        except Exception as e:
+            print(f"Error updating performance stats: {e}")
+
+    def _update_error_stats(self):
+        """Update error statistics."""
+        if not self.error_log.exists():
+            return
+
+        try:
+            with open(self.error_log, "r", encoding="utf-8") as f:
+                lines = f.readlines()
+
+            # Get last 10 errors
+            recent_errors = []
+            for line in lines[-10:]:
+                if line.strip():
+                    # Extract error summary
+                    error_match = re.search(
+                        r"ERROR - PMID: (\d+) \| Context: (.+?) \|", line
+                    )
+                    if error_match:
+                        pmid = error_match.group(1)
+                        context = error_match.group(2)
+                        recent_errors.append(f"PMID {pmid}: {context}")
+
+            self.stats["errors"] = recent_errors[-5:]  # Keep last 5 errors
+
+        except Exception as e:
+            print(f"Error updating error stats: {e}")
+
+    def _update_recent_activity(self):
+        """Update recent activity."""
+        if not self.main_log.exists():
+            return
+
+        try:
+            with open(self.main_log, "r", encoding="utf-8") as f:
+                lines = f.readlines()
+
+            # Get last 10 log entries
+            recent_lines = lines[-10:]
+            self.stats["recent_activity"] = []
+
+            for line in recent_lines:
+                if line.strip():
+                    # Extract timestamp and message
+                    timestamp_match = re.search(
+                        r"(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2})", line
+                    )
+                    if timestamp_match:
+                        timestamp = timestamp_match.group(1)
+                        # Extract meaningful part of the message
+                        message = (
+                            line.split(" - ", 2)[-1] if " - " in line else line.strip()
+                        )
+                        self.stats["recent_activity"].append(f"{timestamp}: {message}")
+
+        except Exception as e:
+            print(f"Error updating recent activity: {e}")
+
+    def display_dashboard(self):
+        """Display the dashboard."""
+        os.system("clear" if os.name == "posix" else "cls")
+
+        print("🚀 BioAnalyzer Log Dashboard (Archived)")
+        print("=" * 60)
+        print(f"📅 Last Updated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
+        print()
+
+        # Performance Overview
+        print("📊 PERFORMANCE OVERVIEW")
+        print("-" * 30)
+        print(f"Total PMID Queries: {self.stats['total_queries']}")
+        print(f"Successful: {self.stats['successful_queries']} ✅")
+        print(f"Failed: {self.stats['failed_queries']} ❌")
+        print(f"Cached Results: {self.stats['cached_queries']} 📋")
+        print(f"Avg Response Time: {self.stats['avg_response_time']:.2f}s")
+
+        # Success rate
+        if self.stats["total_queries"] > 0:
+            success_rate = (
+                self.stats["successful_queries"] / self.stats["total_queries"]
+            ) * 100
+            print(f"Success Rate: {success_rate:.1f}%")
+        print()
+
+        # Recent Errors
+        if self.stats["errors"]:
+            print("❌ RECENT ERRORS")
+            print("-" * 20)
+            for error in self.stats["errors"]:
+                print(f"• {error}")
+            print()
+
+        # Recent Activity
+        if self.stats["recent_activity"]:
+            print("📝 RECENT ACTIVITY")
+            print("-" * 20)
+            for activity in self.stats["recent_activity"][-5:]:  # Show last 5
+                print(f"• {activity}")
+            print()
+
+        # File Status
+        print("📁 LOG FILES STATUS")
+        print("-" * 20)
+        for log_name, log_file in [
+            ("Main", self.main_log),
+            ("Performance", self.performance_log),
+            ("Errors", self.error_log),
+        ]:
+            if log_file.exists():
+                size = log_file.stat().st_size
+                size_kb = size / 1024
+                print(f"{log_name}: {size_kb:.1f} KB")
+            else:
+                print(f"{log_name}: Not found")
+
+        print()
+        print("Press Ctrl+C to stop monitoring")
+
+    def monitor(self, refresh_interval=5):
+        """Monitor logs with periodic updates."""
+        print("Starting log monitoring (Archived script)...")
+        print("Press Ctrl+C to stop")
+
+        try:
+            while True:
+                self.update_stats()
+                self.display_dashboard()
+                time.sleep(refresh_interval)
+
+        except KeyboardInterrupt:
+            print("\n👋 Monitoring stopped.")
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="BioAnalyzer Log Dashboard (Archived)"
+    )
+    parser.add_argument(
+        "--refresh",
+        "-r",
+        type=int,
+        default=5,
+        help="Refresh interval in seconds (default: 5)",
+    )
+    parser.add_argument(
+        "--logs", default="logs", help="Log directory path (default: logs)"
+    )
+
+    args = parser.parse_args()
+
+    dashboard = LogDashboard(args.logs)
+    dashboard.monitor(args.refresh)
+
+
+if __name__ == "__main__":
+    main()
+
diff --git a/scripts/ops/performance_monitor.py b/scripts/ops/performance_monitor.py
new file mode 100644
index 0000000..063d04c
--- /dev/null
+++ b/scripts/ops/performance_monitor.py
@@ -0,0 +1,171 @@
+#!/usr/bin/env python3
+"""
+Archived: Performance Monitor for BioAnalyzer
+============================================
+
+This script monitors the performance of PMID queries and helps identify
+API bottlenecks. Moved to scripts/archive/ to indicate it's a dev/ops
+utility and not part of the core backend runtime.
+"""
+
+import requests
+import time
+import json
+from datetime import datetime
+import argparse
+
+
+def test_pmid_query(pmid, base_url="http://localhost:8000"):
+    """Test a single PMID query and measure performance."""
+    print(f"Testing PMID: {pmid}")
+
+    start_time = time.time()
+
+    try:
+        # Test health endpoint first
+        health_response = requests.get(f"{base_url}/health", timeout=10)
+        health_time = time.time() - start_time
+
+        if health_response.status_code != 200:
+            print(f"❌ Health check failed: {health_response.status_code}")
+            return False
+
+        print(f"✅ Health check: {health_time:.2f}s")
+
+        # Test enhanced analysis endpoint
+        analysis_start = time.time()
+        response = requests.get(
+            f"{base_url}/enhanced_analysis/{pmid}", timeout=150
+        )  # Increased timeout to 150 seconds
+        analysis_time = time.time() - analysis_start
+
+        total_time = time.time() - start_time
+
+        if response.status_code == 200:
+            print(f"✅ Analysis successful: {analysis_time:.2f}s")
+            print(f"✅ Total time: {total_time:.2f}s")
+
+            # Check if result was cached
+            data = response.json()
+            if data.get("cached", False):
+                print("📋 Result served from cache")
+            else:
+                print("🔄 Result generated fresh")
+
+            return True
+        else:
+            print(f"❌ Analysis failed: {response.status_code}")
+            try:
+                error_data = response.json()
+                print(f"Error: {error_data.get('detail', 'Unknown error')}")
+            except:
+                print(f"Error: {response.text}")
+            return False
+
+    except requests.exceptions.Timeout:
+        print("❌ Request timed out")
+        return False
+    except Exception as e:
+        print(f"❌ Error: {str(e)}")
+        return False
+
+
+def test_multiple_pmids(pmids, base_url="http://localhost:8000"):
+    """Test multiple PMIDs and provide performance summary."""
+    print(f"Testing {len(pmids)} PMIDs...")
+    print("=" * 50)
+
+    results = []
+    total_time = 0
+
+    for i, pmid in enumerate(pmids, 1):
+        print(f"\n[{i}/{len(pmids)}] ", end="")
+        start_time = time.time()
+
+        success = test_pmid_query(pmid, base_url)
+        query_time = time.time() - start_time
+
+        results.append({"pmid": pmid, "success": success, "time": query_time})
+
+        total_time += query_time
+
+        # Add delay between requests to avoid overwhelming the server
+        if i < len(pmids):
+            time.sleep(1)
+
+    # Print summary
+    print("\n" + "=" * 50)
+    print("PERFORMANCE SUMMARY")
+    print("=" * 50)
+
+    successful = sum(1 for r in results if r["success"])
+    failed = len(results) - successful
+
+    print(f"Total PMIDs tested: {len(pmids)}")
+    print(f"Successful: {successful}")
+    print(f"Failed: {failed}")
+    print(f"Success rate: {(successful/len(pmids)*100):.1f}%")
+    print(f"Total time: {total_time:.2f}s")
+    print(f"Average time per PMID: {(total_time/len(pmids)):.2f}s")
+
+    if successful > 0:
+        successful_times = [r["time"] for r in results if r["success"]]
+        print(f"Fastest query: {min(successful_times):.2f}s")
+        print(f"Slowest query: {max(successful_times):.2f}s")
+
+    # Check cache performance
+    try:
+        metrics_response = requests.get(f"{base_url}/metrics", timeout=10)
+        if metrics_response.status_code == 200:
+            metrics = metrics_response.json()
+            cache_stats = metrics.get("cache", {})
+            print(f"\nCache Statistics:")
+            print(
+                f"  Total analyzed: {cache_stats.get('total_curation_analyzed', 'N/A')}"
+            )
+            print(
+                f"  Cache hit rate: {cache_stats.get('curation_readiness_rate', 'N/A'):.1%}"
+            )
+            print(
+                f"  Recent activity (24h): {cache_stats.get('recent_analysis_24h', 'N/A')}"
+            )
+    except:
+        print("\nCould not retrieve cache statistics")
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="Performance Monitor for BioAnalyzer (Archived)"
+    )
+    parser.add_argument("--pmid", help="Single PMID to test")
+    parser.add_argument("--pmids", nargs="+", help="Multiple PMIDs to test")
+    parser.add_argument("--file", help="File containing PMIDs (one per line)")
+    parser.add_argument(
+        "--url", default="http://localhost:8000", help="Base URL of the API"
+    )
+
+    args = parser.parse_args()
+
+    if args.pmid:
+        test_pmid_query(args.pmid, args.url)
+    elif args.pmids:
+        test_multiple_pmids(args.pmids, args.url)
+    elif args.file:
+        try:
+            with open(args.file, "r") as f:
+                pmids = [line.strip() for line in f if line.strip()]
+            test_multiple_pmids(pmids, args.url)
+        except FileNotFoundError:
+            print(f"File not found: {args.file}")
+        except Exception as e:
+            print(f"Error reading file: {str(e)}")
+    else:
+        # Test with some sample PMIDs
+        sample_pmids = ["12345", "67890", "11111"]
+        print("No PMIDs specified. Testing with sample PMIDs...")
+        test_multiple_pmids(sample_pmids, args.url)
+
+
+if __name__ == "__main__":
+    main()
+