From 871c550d11408fed6df6195587d518c84f4fe2dd Mon Sep 17 00:00:00 2001 From: RichardCMX Date: Sun, 28 Sep 2025 17:22:30 -0600 Subject: [PATCH 01/12] feat(dal): enrich schedule departures with route names; format times as HH:MM:SS; validate stop_id; add Fuseki flags and DAL docs; add /api/schedule/departures/ endpoint --- .env.local.example | 6 +++ README.md | 42 ++++++++++++++++ api/serializers.py | 21 ++++++++ api/urls.py | 1 + api/views.py | 95 ++++++++++++++++++++++++++++++++++++ datahub/settings.py | 4 ++ docs/architecture.md | 20 ++++++++ gtfs | 2 +- storage/__init__.py | 1 + storage/cached_schedule.py | 63 ++++++++++++++++++++++++ storage/factory.py | 31 ++++++++++++ storage/fuseki_schedule.py | 21 ++++++++ storage/interfaces.py | 47 ++++++++++++++++++ storage/postgres_schedule.py | 71 +++++++++++++++++++++++++++ storage/redis_cache.py | 36 ++++++++++++++ 15 files changed, 460 insertions(+), 1 deletion(-) create mode 100644 storage/__init__.py create mode 100644 storage/cached_schedule.py create mode 100644 storage/factory.py create mode 100644 storage/fuseki_schedule.py create mode 100644 storage/interfaces.py create mode 100644 storage/postgres_schedule.py create mode 100644 storage/redis_cache.py diff --git a/.env.local.example b/.env.local.example index 1b2cb42..dc3e9cb 100644 --- a/.env.local.example +++ b/.env.local.example @@ -13,3 +13,9 @@ DEBUG=True # For example: # DB_PASSWORD=my-local-password # ALLOWED_HOSTS=localhost,127.0.0.1,0.0.0.0,mylocal.dev + +# Optional Fuseki (SPARQL) backend configuration +# Set to true to enable using Fuseki adapter for reads (experimental) +FUSEKI_ENABLED=false +# SPARQL endpoint URL (e.g. http://localhost:3030/dataset/sparql) +FUSEKI_ENDPOINT= diff --git a/README.md b/README.md index d4a4fa8..189ba7c 100644 --- a/README.md +++ b/README.md @@ -190,6 +190,48 @@ docker-compose down ## 📚 API Documentation +### New: Schedule Departures (Data Access Layer) +An HTTP endpoint backed by the new DAL returns scheduled departures at a stop. It uses PostgreSQL as the source of truth and Redis for caching (read-through) by default. + +- Endpoint: GET /api/schedule/departures/ +- Query params: + - stop_id (required) + - feed_id (optional; defaults to current feed) + - date (optional; YYYY-MM-DD; defaults to today) + - time (optional; HH:MM or HH:MM:SS; defaults to now) + - limit (optional; default 10; max 100) + +Example: +```bash +curl "http://localhost:8000/api/schedule/departures/?stop_id=STOP_123&limit=5" +``` + +Response shape: +```json +{ + "feed_id": "FEED_1", + "stop_id": "STOP_123", + "service_date": "2025-09-28", + "from_time": "08:00:00", + "limit": 5, + "departures": [ + { + "route_id": "R1", + "trip_id": "T1", + "stop_id": "STOP_123", + "headsign": "Terminal Central", + "direction_id": 0, + "arrival_time": "08:05:00", + "departure_time": "08:06:00" + } + ] +} +``` + +Configuration flags (optional): +- FUSEKI_ENABLED=false +- FUSEKI_ENDPOINT= + ### REST API Endpoints - **`/api/`** - Main API endpoints with DRF browsable interface - **`/api/gtfs/`** - GTFS Schedule and Realtime data diff --git a/api/serializers.py b/api/serializers.py index 2e9df54..8601fda 100644 --- a/api/serializers.py +++ b/api/serializers.py @@ -198,6 +198,27 @@ class Meta: fields = "__all__" +class DalDepartureSerializer(serializers.Serializer): + route_id = serializers.CharField() + route_short_name = serializers.CharField(allow_null=True, required=False) + route_long_name = serializers.CharField(allow_null=True, required=False) + trip_id = serializers.CharField() + stop_id = serializers.CharField() + headsign = serializers.CharField(allow_null=True, required=False) + direction_id = serializers.IntegerField(allow_null=True, required=False) + arrival_time = serializers.CharField(allow_null=True, required=False) + departure_time = serializers.CharField(allow_null=True, required=False) + + +class DalDeparturesResponseSerializer(serializers.Serializer): + feed_id = serializers.CharField() + stop_id = serializers.CharField() + service_date = serializers.DateField() + from_time = serializers.CharField() + limit = serializers.IntegerField() + departures = DalDepartureSerializer(many=True) + + class FareAttributeSerializer(serializers.HyperlinkedModelSerializer): feed = serializers.PrimaryKeyRelatedField(read_only=True) diff --git a/api/urls.py b/api/urls.py index 2bbf18c..b194fd9 100644 --- a/api/urls.py +++ b/api/urls.py @@ -29,6 +29,7 @@ path("next-trips/", views.NextTripView.as_view(), name="next-trips"), path("next-stops/", views.NextStopView.as_view(), name="next-stops"), path("route-stops/", views.RouteStopView.as_view(), name="route-stops"), + path("schedule/departures/", views.ScheduleDeparturesView.as_view(), name="schedule-departures"), path("api-auth/", include("rest_framework.urls", namespace="rest_framework")), path("docs/schema/", views.get_schema, name="schema"), path("docs/", SpectacularRedocView.as_view(url_name="schema"), name="api_docs"), diff --git a/api/views.py b/api/views.py index a8a6445..7721a6c 100644 --- a/api/views.py +++ b/api/views.py @@ -21,6 +21,9 @@ from django.conf import settings from .serializers import * +from django.utils import timezone as dj_timezone +from storage.factory import get_schedule_repository +from gtfs.models import Feed, Stop # from .serializers import InfoServiceSerializer, GTFSProviderSerializer, RouteSerializer, TripSerializer @@ -37,6 +40,98 @@ def get_filtered_queryset(self, allowed_query_params): return queryset.filter(**filter_args) +class ScheduleDeparturesView(APIView): + """Simple endpoint backed by the DAL to get next scheduled departures at a stop. + + Query params: + - stop_id (required) + - feed_id (optional, defaults to current feed) + - date (optional, YYYY-MM-DD; defaults to today in settings.TIME_ZONE) + - time (optional, HH:MM or HH:MM:SS; defaults to now in settings.TIME_ZONE) + - limit (optional, integer; default 10) + """ + + def get(self, request): + stop_id = request.query_params.get("stop_id") + if not stop_id: + return Response({"error": "stop_id is required"}, status=status.HTTP_400_BAD_REQUEST) + + # Resolve feed_id + feed_id = request.query_params.get("feed_id") + if not feed_id: + try: + current_feed = Feed.objects.filter(is_current=True).latest("retrieved_at") + except Feed.DoesNotExist: + return Response( + {"error": "No GTFS feed configured as current (is_current=True). Load GTFS fixtures or import a feed and set one as current."}, + status=status.HTTP_404_NOT_FOUND, + ) + feed_id = current_feed.feed_id + else: + if not Feed.objects.filter(feed_id=feed_id).exists(): + return Response( + {"error": f"feed_id '{feed_id}' not found"}, status=status.HTTP_404_NOT_FOUND + ) + + # Validate stop exists for the chosen feed + if not Stop.objects.filter(feed__feed_id=feed_id, stop_id=stop_id).exists(): + return Response( + {"error": f"stop_id '{stop_id}' not found for feed '{feed_id}'"}, + status=status.HTTP_404_NOT_FOUND, + ) + + # Parse date/time with TZ defaults + try: + date_str = request.query_params.get("date") + if date_str: + service_date = datetime.strptime(date_str, "%Y-%m-%d").date() + else: + service_date = dj_timezone.localdate() + except Exception: + return Response({"error": "Invalid date format. Use YYYY-MM-DD"}, status=status.HTTP_400_BAD_REQUEST) + + try: + time_str = request.query_params.get("time") + if time_str: + fmt = "%H:%M:%S" if len(time_str.split(":")) == 3 else "%H:%M" + from_time = datetime.strptime(time_str, fmt).time() + else: + from_time = dj_timezone.localtime().time() + except Exception: + return Response({"error": "Invalid time format. Use HH:MM or HH:MM:SS"}, status=status.HTTP_400_BAD_REQUEST) + + try: + limit = int(request.query_params.get("limit", 10)) + if limit <= 0 or limit > 100: + return Response({"error": "limit must be between 1 and 100"}, status=status.HTTP_400_BAD_REQUEST) + except ValueError: + return Response({"error": "limit must be an integer"}, status=status.HTTP_400_BAD_REQUEST) + + # Build response using DAL + repo = get_schedule_repository(use_cache=True) + departures = repo.get_next_departures( + feed_id=feed_id, + stop_id=stop_id, + service_date=service_date, + from_time=from_time, + limit=limit, + ) + + # Format from_time as HH:MM:SS for a cleaner API response + from_time_str = from_time.strftime("%H:%M:%S") + + payload = { + "feed_id": feed_id, + "stop_id": stop_id, + "service_date": service_date, + "from_time": from_time_str, + "limit": limit, + "departures": departures, + } + serializer = DalDeparturesResponseSerializer(payload) + return Response(serializer.data) + + class GTFSProviderViewSet(viewsets.ModelViewSet): """ Proveedores de datos GTFS. diff --git a/datahub/settings.py b/datahub/settings.py index a3c2c3a..246f933 100644 --- a/datahub/settings.py +++ b/datahub/settings.py @@ -130,6 +130,10 @@ REDIS_HOST = config("REDIS_HOST") REDIS_PORT = config("REDIS_PORT") +# Optional Fuseki (SPARQL) backend +FUSEKI_ENABLED = config("FUSEKI_ENABLED", cast=bool, default=False) +FUSEKI_ENDPOINT = config("FUSEKI_ENDPOINT", default=None) + # Celery settings CELERY_BROKER_URL = f"redis://{REDIS_HOST}:{REDIS_PORT}/0" diff --git a/docs/architecture.md b/docs/architecture.md index 89dd9ac..8a6e701 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -48,6 +48,26 @@ Nota: las pantallas por ahora asumimos que son Raspberry Pi en [modo kiosko](htt ### Django app: `gtfs` +## Estrategia de almacenamiento y capa de acceso a datos (DAL) + +- PostgreSQL/PostGIS es la fuente de verdad para GTFS Schedule. +- Redis se utiliza como caché de alto desempeño (lecturas read-through/write-through donde aplique) y para mensajería (Channels, Celery). +- Fuseki (Jena) es un backend opcional para consultas SPARQL. Se controla con variables de entorno: + - FUSEKI_ENABLED (bool) + - FUSEKI_ENDPOINT (URL) + +Se define una capa de acceso a datos (DAL) con interfaces claras: +- ScheduleRepository: obtiene salidas programadas (next departures) por parada. +- CacheProvider: wrapper de caché (implementación en Redis). + +Implementaciones actuales: +- PostgresScheduleRepository (Django ORM) +- CachedScheduleRepository (envoltorio con Redis) +- FusekiScheduleRepository (stub opcional para desarrollo futuro) + +Endpoint nuevo (ejemplo): +- GET /api/schedule/departures/?stop_id=STOP_123&limit=5 + > Páginas de administación de información GTFS Schedule y GTFS Realtime. - `/gtfs/`: diff --git a/gtfs b/gtfs index 34fa77e..719e08f 160000 --- a/gtfs +++ b/gtfs @@ -1 +1 @@ -Subproject commit 34fa77e278a643ee846bb60c128dec7eda3f0f97 +Subproject commit 719e08f53511c286106c3f17f3d4a532e833feec diff --git a/storage/__init__.py b/storage/__init__.py new file mode 100644 index 0000000..c137c20 --- /dev/null +++ b/storage/__init__.py @@ -0,0 +1 @@ +# Storage/Data Access Layer package diff --git a/storage/cached_schedule.py b/storage/cached_schedule.py new file mode 100644 index 0000000..d999f1d --- /dev/null +++ b/storage/cached_schedule.py @@ -0,0 +1,63 @@ +from __future__ import annotations + +import json +from datetime import date, time +from typing import List + +from .interfaces import CacheProvider, Departure, ScheduleRepository + + +class CachedScheduleRepository(ScheduleRepository): + """Cache wrapper for any ScheduleRepository. + + Keys are namespaced to avoid collisions and include parameters for safety. + """ + + def __init__(self, repo: ScheduleRepository, cache: CacheProvider, *, ttl_seconds: int = 60): + self._repo = repo + self._cache = cache + self._ttl = ttl_seconds + + @staticmethod + def _key(*, feed_id: str, stop_id: str, service_date: date, from_time: time, limit: int) -> str: + return ( + f"schedule:next_departures:feed={feed_id}:stop={stop_id}:" + f"date={service_date.isoformat()}:time={from_time.strftime('%H%M%S')}:limit={limit}:v1" + ) + + def get_next_departures( + self, + *, + feed_id: str, + stop_id: str, + service_date: date, + from_time: time, + limit: int = 10, + ) -> List[Departure]: + key = self._key( + feed_id=feed_id, + stop_id=stop_id, + service_date=service_date, + from_time=from_time, + limit=limit, + ) + cached = self._cache.get(key) + if cached: + try: + return json.loads(cached) + except Exception: + # Fallback to fetching from source if cache content is invalid + pass + + result = self._repo.get_next_departures( + feed_id=feed_id, + stop_id=stop_id, + service_date=service_date, + from_time=from_time, + limit=limit, + ) + try: + self._cache.set(key, json.dumps(result), self._ttl) + except Exception: + pass + return result diff --git a/storage/factory.py b/storage/factory.py new file mode 100644 index 0000000..154aa1d --- /dev/null +++ b/storage/factory.py @@ -0,0 +1,31 @@ +from __future__ import annotations + +from datetime import date, time +from typing import List + +from django.conf import settings + +from .cached_schedule import CachedScheduleRepository +from .interfaces import ScheduleRepository +from .postgres_schedule import PostgresScheduleRepository +from .redis_cache import RedisCacheProvider +from .fuseki_schedule import FusekiScheduleRepository + + +def get_schedule_repository(*, use_cache: bool = True) -> ScheduleRepository: + """Factory to obtain a ScheduleRepository according to settings. + + - Uses PostgreSQL (Django ORM) by default. + - Optionally wraps with Redis cache. + - If FUSEKI_ENABLED is true and endpoint configured, uses Fuseki adapter instead. + """ + base_repo: ScheduleRepository + if getattr(settings, "FUSEKI_ENABLED", False) and getattr(settings, "FUSEKI_ENDPOINT", None): + base_repo = FusekiScheduleRepository(endpoint=settings.FUSEKI_ENDPOINT) # type: ignore[arg-type] + else: + base_repo = PostgresScheduleRepository() + + if use_cache: + cache = RedisCacheProvider() + return CachedScheduleRepository(base_repo, cache) + return base_repo diff --git a/storage/fuseki_schedule.py b/storage/fuseki_schedule.py new file mode 100644 index 0000000..9fd1aab --- /dev/null +++ b/storage/fuseki_schedule.py @@ -0,0 +1,21 @@ +from __future__ import annotations + +from typing import List + +from .interfaces import Departure, ScheduleRepository + + +class FusekiScheduleRepository(ScheduleRepository): + """Optional Fuseki-backed schedule repository. + + This is a stub implementation. It outlines the expected interface and can be + filled in later to execute SPARQL queries against a Jena Fuseki endpoint. + """ + + def __init__(self, *, endpoint: str): + self._endpoint = endpoint + + def get_next_departures(self, **kwargs) -> List[Departure]: + raise NotImplementedError( + "FusekiScheduleRepository is not yet implemented. Set FUSEKI_ENABLED=false to use Postgres." + ) \ No newline at end of file diff --git a/storage/interfaces.py b/storage/interfaces.py new file mode 100644 index 0000000..f950659 --- /dev/null +++ b/storage/interfaces.py @@ -0,0 +1,47 @@ +from __future__ import annotations + +from typing import List, Optional, Protocol, TypedDict, runtime_checkable +from datetime import date, time + + +class Departure(TypedDict): + route_id: str + route_short_name: Optional[str] + route_long_name: Optional[str] + trip_id: str + stop_id: str + headsign: Optional[str] + direction_id: Optional[int] + arrival_time: Optional[str] # HH:MM:SS + departure_time: Optional[str] # HH:MM:SS + + +@runtime_checkable +class ScheduleRepository(Protocol): + """Abstract interface for reading scheduled service information.""" + + def get_next_departures( + self, + *, + feed_id: str, + stop_id: str, + service_date: date, + from_time: time, + limit: int = 10, + ) -> List[Departure]: + """Return the next scheduled departures at a stop. + + Notes: + - Implementations may approximate service availability and ignore + service_date exceptions initially; exact filtering can be added later. + """ + ... + + +@runtime_checkable +class CacheProvider(Protocol): + def get(self, key: str) -> Optional[str]: + ... + + def set(self, key: str, value: str, ttl_seconds: int) -> None: + ... diff --git a/storage/postgres_schedule.py b/storage/postgres_schedule.py new file mode 100644 index 0000000..5f1d373 --- /dev/null +++ b/storage/postgres_schedule.py @@ -0,0 +1,71 @@ +from __future__ import annotations + +from datetime import date, time +from typing import List + +from django.db.models import F + +from gtfs.models import StopTime, Trip, Route +from .interfaces import Departure, ScheduleRepository + + +class PostgresScheduleRepository(ScheduleRepository): + """PostgreSQL-backed schedule repository using Django ORM. + + NOTE: This initial implementation does not yet filter by service_date + (Calendar/CalendarDate). That logic can be layered in a future iteration. + """ + + def get_next_departures( + self, + *, + feed_id: str, + stop_id: str, + service_date: date, + from_time: time, + limit: int = 10, + ) -> List[Departure]: + qs = ( + StopTime.objects.select_related("_trip") + .filter( + feed__feed_id=feed_id, + stop_id=stop_id, + departure_time__isnull=False, + departure_time__gte=from_time, + ) + .order_by("departure_time") + ) + qs = qs[:limit] + + results: List[Departure] = [] + for st in qs: + # Ensure we can resolve the Trip, even if _trip is not populated + trip: Trip | None = getattr(st, "_trip", None) # type: ignore + if trip is None: + trip = Trip.objects.filter(feed=st.feed, trip_id=st.trip_id).first() + + route_id_val = trip.route_id if trip else "" + route_short_name = None + route_long_name = None + if route_id_val: + route = Route.objects.filter(feed=st.feed, route_id=route_id_val).only( + "route_short_name", "route_long_name" + ).first() + if route is not None: + route_short_name = route.route_short_name + route_long_name = route.route_long_name + + results.append( + { + "route_id": route_id_val, + "route_short_name": route_short_name, + "route_long_name": route_long_name, + "trip_id": st.trip_id, + "stop_id": st.stop_id, + "headsign": getattr(trip, "trip_headsign", None) if trip else None, + "direction_id": getattr(trip, "direction_id", None) if trip else None, + "arrival_time": st.arrival_time.strftime("%H:%M:%S") if st.arrival_time else None, + "departure_time": st.departure_time.strftime("%H:%M:%S") if st.departure_time else None, + } + ) + return results diff --git a/storage/redis_cache.py b/storage/redis_cache.py new file mode 100644 index 0000000..7c1c367 --- /dev/null +++ b/storage/redis_cache.py @@ -0,0 +1,36 @@ +from __future__ import annotations + +import json +from typing import Optional + +from django.conf import settings +import redis + +from .interfaces import CacheProvider + + +class RedisCacheProvider(CacheProvider): + """Simple Redis-backed cache for DAL results. + + Stores JSON-encoded strings under namespaced keys. + """ + + def __init__(self, *, host: Optional[str] = None, port: Optional[int] = None): + self._host = host or settings.REDIS_HOST + self._port = int(port or settings.REDIS_PORT) + # decode_responses=True to work with str values + self._client = redis.Redis(host=self._host, port=self._port, decode_responses=True) + + def get(self, key: str) -> Optional[str]: + try: + return self._client.get(key) + except Exception: + # Cache failures should not break the application + return None + + def set(self, key: str, value: str, ttl_seconds: int) -> None: + try: + self._client.setex(key, ttl_seconds, value) + except Exception: + # Best-effort cache set + pass From f2953bff8f9058a29f6ef3b6b3c038eea09bb5fd Mon Sep 17 00:00:00 2001 From: RichardCMX Date: Sun, 28 Sep 2025 17:27:52 -0600 Subject: [PATCH 02/12] docs(api): add route_short_name and route_long_name to schedule departures example --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 189ba7c..e141edc 100644 --- a/README.md +++ b/README.md @@ -217,6 +217,8 @@ Response shape: "departures": [ { "route_id": "R1", + "route_short_name": "R1", + "route_long_name": "Ruta 1 - Centro", "trip_id": "T1", "stop_id": "STOP_123", "headsign": "Terminal Central", From 8232e908a496957ec2286d34bce10418513929ea Mon Sep 17 00:00:00 2001 From: RichardCMX Date: Thu, 2 Oct 2025 08:43:52 -0600 Subject: [PATCH 03/12] docs(openapi): switch to generated schema and annotate schedule departures endpoint; tests(api): add tests for DAL-backed schedule departures --- api/tests/test_schedule_departures.py | 73 +++++++++++++++++++++++++++ api/urls.py | 2 +- api/views.py | 25 +++++---- 3 files changed, 89 insertions(+), 11 deletions(-) create mode 100644 api/tests/test_schedule_departures.py diff --git a/api/tests/test_schedule_departures.py b/api/tests/test_schedule_departures.py new file mode 100644 index 0000000..536f559 --- /dev/null +++ b/api/tests/test_schedule_departures.py @@ -0,0 +1,73 @@ +from __future__ import annotations + +import re +from typing import List + +from django.urls import reverse +from django.test import TestCase +from rest_framework.test import APITestCase +from rest_framework import status + +from gtfs.models import Feed, Stop, StopTime + + +class ScheduleDeparturesTests(APITestCase): + fixtures = ["gtfs/fixtures/gtfs_test.json"] + + def setUp(self): + # Ensure there is a current feed for defaults + feed = Feed.objects.first() + if feed: + feed.is_current = True + feed.save() + + def test_returns_404_when_stop_missing(self): + url = "/api/schedule/departures/?stop_id=THIS_DOES_NOT_EXIST&limit=1" + resp = self.client.get(url) + self.assertEqual(resp.status_code, status.HTTP_404_NOT_FOUND) + self.assertIn("error", resp.json()) + + def test_returns_departures_with_expected_shape(self): + feed = Feed.objects.filter(is_current=True).first() or Feed.objects.first() + self.assertIsNotNone(feed, "Expected fixture to provide at least one feed") + + # Find a stop_id that actually has stoptimes + st = StopTime.objects.filter(feed=feed).order_by("departure_time").first() + self.assertIsNotNone(st, "Expected fixture to provide at least one StopTime") + stop_id = st.stop_id + + url = f"/api/schedule/departures/?stop_id={stop_id}&limit=1" + resp = self.client.get(url) + self.assertEqual(resp.status_code, status.HTTP_200_OK) + data = resp.json() + + # Top-level keys + for key in ["feed_id", "stop_id", "service_date", "from_time", "limit", "departures"]: + self.assertIn(key, data) + + self.assertIsInstance(data["departures"], list) + self.assertGreaterEqual(len(data["departures"]), 1) + + item = data["departures"][0] + for key in [ + "route_id", + "route_short_name", + "route_long_name", + "trip_id", + "stop_id", + "headsign", + "direction_id", + "arrival_time", + "departure_time", + ]: + self.assertIn(key, item) + + # Time fields formatted HH:MM:SS + time_pattern = re.compile(r"^\d{2}:\d{2}:\d{2}$") + if item["arrival_time"] is not None: + self.assertRegex(item["arrival_time"], time_pattern) + if item["departure_time"] is not None: + self.assertRegex(item["departure_time"], time_pattern) + + # from_time string formatted HH:MM:SS + self.assertRegex(data["from_time"], time_pattern) diff --git a/api/urls.py b/api/urls.py index b194fd9..2375e52 100644 --- a/api/urls.py +++ b/api/urls.py @@ -31,6 +31,6 @@ path("route-stops/", views.RouteStopView.as_view(), name="route-stops"), path("schedule/departures/", views.ScheduleDeparturesView.as_view(), name="schedule-departures"), path("api-auth/", include("rest_framework.urls", namespace="rest_framework")), - path("docs/schema/", views.get_schema, name="schema"), + path("docs/schema/", SpectacularAPIView.as_view(), name="schema"), path("docs/", SpectacularRedocView.as_view(url_name="schema"), name="api_docs"), ] diff --git a/api/views.py b/api/views.py index 7721a6c..b684601 100644 --- a/api/views.py +++ b/api/views.py @@ -24,6 +24,7 @@ from django.utils import timezone as dj_timezone from storage.factory import get_schedule_repository from gtfs.models import Feed, Stop +from drf_spectacular.utils import extend_schema, OpenApiParameter, OpenApiTypes # from .serializers import InfoServiceSerializer, GTFSProviderSerializer, RouteSerializer, TripSerializer @@ -41,16 +42,20 @@ def get_filtered_queryset(self, allowed_query_params): class ScheduleDeparturesView(APIView): - """Simple endpoint backed by the DAL to get next scheduled departures at a stop. - - Query params: - - stop_id (required) - - feed_id (optional, defaults to current feed) - - date (optional, YYYY-MM-DD; defaults to today in settings.TIME_ZONE) - - time (optional, HH:MM or HH:MM:SS; defaults to now in settings.TIME_ZONE) - - limit (optional, integer; default 10) - """ - + """Simple endpoint backed by the DAL to get next scheduled departures at a stop.""" + + @extend_schema( + parameters=[ + OpenApiParameter(name="stop_id", type=OpenApiTypes.STR, required=True, description="Stop identifier (must exist in Stop for the chosen feed)"), + OpenApiParameter(name="feed_id", type=OpenApiTypes.STR, required=False, description="Feed identifier (defaults to current feed)") , + OpenApiParameter(name="date", type=OpenApiTypes.DATE, required=False, description="Service date (YYYY-MM-DD, defaults to today)"), + OpenApiParameter(name="time", type=OpenApiTypes.STR, required=False, description="Start time (HH:MM or HH:MM:SS, defaults to now)"), + OpenApiParameter(name="limit", type=OpenApiTypes.INT, required=False, description="Number of results (default 10, max 100)"), + ], + responses={200: DalDeparturesResponseSerializer}, + description="Return next scheduled departures at a stop using the DAL (PostgreSQL + Redis cache).", + tags=["schedule"], + ) def get(self, request): stop_id = request.query_params.get("stop_id") if not stop_id: From 0425c26b60427677ff5c4696abb9e8a0ca3ccd83 Mon Sep 17 00:00:00 2001 From: RichardCMX Date: Thu, 2 Oct 2025 08:54:38 -0600 Subject: [PATCH 04/12] test(api): programmatic dataset for schedule departures tests; add tests package __init__ --- api/tests/__init__.py | 1 + api/tests/test_schedule_departures.py | 39 +++++++++++++++++++++------ 2 files changed, 32 insertions(+), 8 deletions(-) create mode 100644 api/tests/__init__.py diff --git a/api/tests/__init__.py b/api/tests/__init__.py new file mode 100644 index 0000000..2245dc8 --- /dev/null +++ b/api/tests/__init__.py @@ -0,0 +1 @@ +# makes tests a package for unittest discovery \ No newline at end of file diff --git a/api/tests/test_schedule_departures.py b/api/tests/test_schedule_departures.py index 536f559..80ff2f3 100644 --- a/api/tests/test_schedule_departures.py +++ b/api/tests/test_schedule_departures.py @@ -11,15 +11,38 @@ from gtfs.models import Feed, Stop, StopTime -class ScheduleDeparturesTests(APITestCase): - fixtures = ["gtfs/fixtures/gtfs_test.json"] +from django.contrib.gis.geos import Point +from datetime import time + +class ScheduleDeparturesTests(APITestCase): def setUp(self): - # Ensure there is a current feed for defaults - feed = Feed.objects.first() - if feed: - feed.is_current = True - feed.save() + # Minimal dataset for the endpoint + self.feed = Feed.objects.create( + feed_id="TEST", + is_current=True, + ) + self.stop = Stop.objects.create( + feed=self.feed, + stop_id="S1", + stop_name="Test Stop", + stop_point=Point(0.0, 0.0), + ) + # Create StopTime without triggering model save() logic that requires Trip + StopTime.objects.bulk_create( + [ + StopTime( + feed=self.feed, + trip_id="T1", + stop_id=self.stop.stop_id, + stop_sequence=1, + pickup_type=0, + drop_off_type=0, + arrival_time=time(8, 5, 0), + departure_time=time(8, 6, 0), + ) + ] + ) def test_returns_404_when_stop_missing(self): url = "/api/schedule/departures/?stop_id=THIS_DOES_NOT_EXIST&limit=1" @@ -36,7 +59,7 @@ def test_returns_departures_with_expected_shape(self): self.assertIsNotNone(st, "Expected fixture to provide at least one StopTime") stop_id = st.stop_id - url = f"/api/schedule/departures/?stop_id={stop_id}&limit=1" + url = f"/api/schedule/departures/?stop_id={stop_id}&time=08:00:00&limit=1" resp = self.client.get(url) self.assertEqual(resp.status_code, status.HTTP_200_OK) data = resp.json() From ed4b0f67c3739e870ac1dabd333f502a4db25a30 Mon Sep 17 00:00:00 2001 From: RichardCMX Date: Thu, 2 Oct 2025 09:03:18 -0600 Subject: [PATCH 05/12] docs(cache): document key format and TTL; feat(config): add SCHEDULE_CACHE_TTL_SECONDS and pass via factory --- README.md | 5 +++++ datahub/settings.py | 3 +++ docs/architecture.md | 6 ++++++ storage/factory.py | 3 ++- 4 files changed, 16 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index e141edc..f0d25f4 100644 --- a/README.md +++ b/README.md @@ -234,6 +234,11 @@ Configuration flags (optional): - FUSEKI_ENABLED=false - FUSEKI_ENDPOINT= +Caching (keys and TTLs): +- Key pattern: schedule:next_departures:feed={FEED_ID}:stop={STOP_ID}:date={YYYY-MM-DD}:time={HHMMSS}:limit={N}:v1 +- Default TTL: 60 seconds +- Configure TTL via env: SCHEDULE_CACHE_TTL_SECONDS=60 + ### REST API Endpoints - **`/api/`** - Main API endpoints with DRF browsable interface - **`/api/gtfs/`** - GTFS Schedule and Realtime data diff --git a/datahub/settings.py b/datahub/settings.py index 246f933..69deb99 100644 --- a/datahub/settings.py +++ b/datahub/settings.py @@ -134,6 +134,9 @@ FUSEKI_ENABLED = config("FUSEKI_ENABLED", cast=bool, default=False) FUSEKI_ENDPOINT = config("FUSEKI_ENDPOINT", default=None) +# DAL caching configuration +SCHEDULE_CACHE_TTL_SECONDS = config("SCHEDULE_CACHE_TTL_SECONDS", cast=int, default=60) + # Celery settings CELERY_BROKER_URL = f"redis://{REDIS_HOST}:{REDIS_PORT}/0" diff --git a/docs/architecture.md b/docs/architecture.md index 8a6e701..bcd5613 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -68,6 +68,12 @@ Implementaciones actuales: Endpoint nuevo (ejemplo): - GET /api/schedule/departures/?stop_id=STOP_123&limit=5 +### Capa de caché (Redis) +- Claves (key): + - schedule:next_departures:feed={FEED_ID}:stop={STOP_ID}:date={YYYY-MM-DD}:time={HHMMSS}:limit={N}:v1 +- TTL por defecto: 60 segundos +- Configuración por entorno: SCHEDULE_CACHE_TTL_SECONDS (entero) + > Páginas de administación de información GTFS Schedule y GTFS Realtime. - `/gtfs/`: diff --git a/storage/factory.py b/storage/factory.py index 154aa1d..f46f834 100644 --- a/storage/factory.py +++ b/storage/factory.py @@ -27,5 +27,6 @@ def get_schedule_repository(*, use_cache: bool = True) -> ScheduleRepository: if use_cache: cache = RedisCacheProvider() - return CachedScheduleRepository(base_repo, cache) + ttl = getattr(settings, "SCHEDULE_CACHE_TTL_SECONDS", 60) + return CachedScheduleRepository(base_repo, cache, ttl_seconds=int(ttl)) return base_repo From 32849c485462dc3b963b14ef7cbede20dfc00674 Mon Sep 17 00:00:00 2001 From: RichardCMX Date: Thu, 2 Oct 2025 10:57:48 -0600 Subject: [PATCH 06/12] feat(fuseki): implement minimal SPARQL-based adapter and integration test; add tiny TTL sample --- api/tests/data/fuseki_sample.ttl | 17 +++++++ api/tests/test_fuseki_schedule.py | 70 ++++++++++++++++++++++++++ docker-compose.yml | 15 ++++++ storage/fuseki_schedule.py | 83 +++++++++++++++++++++++++++---- 4 files changed, 176 insertions(+), 9 deletions(-) create mode 100644 api/tests/data/fuseki_sample.ttl create mode 100644 api/tests/test_fuseki_schedule.py diff --git a/api/tests/data/fuseki_sample.ttl b/api/tests/data/fuseki_sample.ttl new file mode 100644 index 0000000..471810d --- /dev/null +++ b/api/tests/data/fuseki_sample.ttl @@ -0,0 +1,17 @@ +@prefix ex: . + +# Minimal sample data for Fuseki integration tests +# One departure at stop S1 for feed TEST + +[] a ex:Departure ; + ex:feed_id "TEST" ; + ex:stop_id "S1" ; + ex:trip_id "T1" ; + ex:route_id "R1" ; + ex:route_short_name "R1" ; + ex:route_long_name "Ruta 1" ; + ex:headsign "Terminal" ; + ex:direction_id "0" ; + ex:service_date "2099-01-01" ; + ex:arrival_time "08:05:00" ; + ex:departure_time "08:06:00" . diff --git a/api/tests/test_fuseki_schedule.py b/api/tests/test_fuseki_schedule.py new file mode 100644 index 0000000..6354d3c --- /dev/null +++ b/api/tests/test_fuseki_schedule.py @@ -0,0 +1,70 @@ +from __future__ import annotations + +import os +import time +from pathlib import Path + +import requests +from django.test import override_settings +from rest_framework import status +from rest_framework.test import APITestCase +from django.contrib.gis.geos import Point + +from gtfs.models import Feed, Stop + + +FUSEKI_URL = os.environ.get("FUSEKI_TEST_URL", "http://fuseki:3030") +DATASET = os.environ.get("FUSEKI_TEST_DATASET", "dataset") +SPARQL_ENDPOINT = f"{FUSEKI_URL}/{DATASET}/sparql" +DATA_ENDPOINT = f"{FUSEKI_URL}/{DATASET}/data?default" + + +class FusekiScheduleIntegrationTests(APITestCase): + @override_settings(FUSEKI_ENABLED=True, FUSEKI_ENDPOINT=SPARQL_ENDPOINT) + def test_fuseki_departures_via_dal(self): + # Ensure DB has feed and stop for validation in API view + feed = Feed.objects.create(feed_id="TEST", is_current=True) + Stop.objects.create(feed=feed, stop_id="S1", stop_name="Stop 1", stop_point=Point(0.0, 0.0)) + + # Wait for Fuseki to be ready + self._wait_for_fuseki_ready() + + # Load tiny TTL into dataset (default graph) + ttl_path = Path(__file__).parent / "data" / "fuseki_sample.ttl" + with open(ttl_path, "rb") as f: + r = requests.post(DATA_ENDPOINT, data=f.read(), headers={"Content-Type": "text/turtle"}, timeout=10, auth=("admin", "admin")) + # Some images allow anonymous writes; if 401, try again without auth + if r.status_code == 401: + r = requests.post(DATA_ENDPOINT, data=open(ttl_path, "rb").read(), headers={"Content-Type": "text/turtle"}, timeout=10) + r.raise_for_status() + + # Call the API endpoint; service_date in TTL is far future (2099-01-01), so pass date to match + url = "/api/schedule/departures/?feed_id=TEST&stop_id=S1&date=2099-01-01&time=08:00:00&limit=1" + resp = self.client.get(url) + self.assertEqual(resp.status_code, status.HTTP_200_OK, resp.content) + data = resp.json() + self.assertEqual(data["feed_id"], "TEST") + self.assertEqual(data["stop_id"], "S1") + self.assertEqual(data["limit"], 1) + self.assertEqual(len(data["departures"]), 1) + item = data["departures"][0] + # Validate enriched fields + self.assertEqual(item["route_id"], "R1") + self.assertEqual(item["route_short_name"], "R1") + self.assertEqual(item["route_long_name"], "Ruta 1") + self.assertEqual(item["trip_id"], "T1") + self.assertEqual(item["arrival_time"], "08:05:00") + self.assertEqual(item["departure_time"], "08:06:00") + + def _wait_for_fuseki_ready(self, timeout_seconds: int = 20): + start = time.time() + while time.time() - start < timeout_seconds: + try: + # ASK {} to ensure SPARQL endpoint is responsive + r = requests.post(SPARQL_ENDPOINT, data=b"ASK {}", headers={"Content-Type": "application/sparql-query"}, timeout=3) + if r.status_code == 200: + return + except Exception: + pass + time.sleep(1) + raise RuntimeError("Fuseki SPARQL endpoint did not become ready in time") diff --git a/docker-compose.yml b/docker-compose.yml index 4c62a04..8c7d3bf 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -29,6 +29,17 @@ services: timeout: 5s retries: 5 + # Apache Jena Fuseki (optional) + fuseki: + image: stain/jena-fuseki:latest + ports: + - "3030:3030" + environment: + - FUSEKI_DATASET=dataset + - FUSEKI_ADMIN_PASSWORD=admin + volumes: + - fuseki_data:/fuseki + # Django Web App (Daphne ASGI server) web: build: @@ -51,6 +62,8 @@ services: # GDAL library path for ARM64 architecture - GDAL_LIBRARY_PATH=/usr/lib/aarch64-linux-gnu/libgdal.so - GEOS_LIBRARY_PATH=/usr/lib/aarch64-linux-gnu/libgeos_c.so + - FUSEKI_ENDPOINT=http://fuseki:3030/dataset/sparql + - FUSEKI_ENDPOINT=http://fuseki:3030/dataset/sparql volumes: - .:/app # Mount current directory for live development - /app/.venv # Exclude .venv directory to prevent conflicts @@ -129,6 +142,7 @@ services: # GDAL library path for ARM64 architecture - GDAL_LIBRARY_PATH=/usr/lib/aarch64-linux-gnu/libgdal.so - GEOS_LIBRARY_PATH=/usr/lib/aarch64-linux-gnu/libgeos_c.so + - FUSEKI_ENDPOINT=http://fuseki:3030/dataset/sparql volumes: - .:/app # Mount source code for hot reload - /app/.venv # Exclude .venv directory to prevent conflicts @@ -159,6 +173,7 @@ volumes: static_volume: media_volume: celery_beat_data: + fuseki_data: networks: default: diff --git a/storage/fuseki_schedule.py b/storage/fuseki_schedule.py index 9fd1aab..d009ce1 100644 --- a/storage/fuseki_schedule.py +++ b/storage/fuseki_schedule.py @@ -1,21 +1,86 @@ from __future__ import annotations -from typing import List +from typing import List, Optional +from datetime import date, time + +import requests from .interfaces import Departure, ScheduleRepository class FusekiScheduleRepository(ScheduleRepository): - """Optional Fuseki-backed schedule repository. + """Fuseki-backed schedule repository using SPARQL queries. + + Minimal vocabulary expected for each ex:Departure resource: + - ex:feed_id, ex:stop_id, ex:trip_id (xsd:string) + - ex:arrival_time, ex:departure_time (xsd:string HH:MM:SS) + - optional: ex:route_id, ex:headsign, ex:direction_id, ex:route_short_name, ex:route_long_name + - optional: ex:service_date (xsd:string YYYY-MM-DD) - This is a stub implementation. It outlines the expected interface and can be - filled in later to execute SPARQL queries against a Jena Fuseki endpoint. + PREFIX ex: """ def __init__(self, *, endpoint: str): - self._endpoint = endpoint + self._endpoint = endpoint.rstrip("/") + + def get_next_departures( + self, + *, + feed_id: str, + stop_id: str, + service_date: date, + from_time: time, + limit: int = 10, + ) -> List[Departure]: + date_str = service_date.isoformat() + time_str = from_time.strftime("%H:%M:%S") + query = f""" + PREFIX ex: + SELECT ?route_id ?route_short_name ?route_long_name ?trip_id ?stop_id ?headsign ?direction_id ?arrival ?departure + WHERE {{ + ?d a ex:Departure ; + ex:feed_id "{feed_id}" ; + ex:stop_id "{stop_id}" ; + ex:trip_id ?trip_id ; + ex:arrival_time ?arrival ; + ex:departure_time ?departure . + OPTIONAL {{ ?d ex:route_id ?route_id }} + OPTIONAL {{ ?d ex:headsign ?headsign }} + OPTIONAL {{ ?d ex:direction_id ?direction_id }} + OPTIONAL {{ ?d ex:route_short_name ?route_short_name }} + OPTIONAL {{ ?d ex:route_long_name ?route_long_name }} + OPTIONAL {{ ?d ex:service_date ?svc_date }} + FILTER ( ?departure >= "{time_str}" ) + FILTER ( !BOUND(?svc_date) || ?svc_date = "{date_str}" ) + }} + ORDER BY ?departure + LIMIT {int(limit)} + """ + + headers = { + "Accept": "application/sparql-results+json", + "Content-Type": "application/sparql-query", + } + resp = requests.post(self._endpoint, data=query.encode("utf-8"), headers=headers, timeout=10) + resp.raise_for_status() + js = resp.json() + results: List[Departure] = [] + for b in js.get("results", {}).get("bindings", []): + def val(name: str) -> Optional[str]: + v = b.get(name, {}).get("value") + return v if v != "" else None - def get_next_departures(self, **kwargs) -> List[Departure]: - raise NotImplementedError( - "FusekiScheduleRepository is not yet implemented. Set FUSEKI_ENABLED=false to use Postgres." - ) \ No newline at end of file + results.append( + { + "route_id": val("route_id") or "", + "route_short_name": val("route_short_name"), + "route_long_name": val("route_long_name"), + "trip_id": val("trip_id") or "", + "stop_id": val("stop_id") or stop_id, + "headsign": val("headsign"), + "direction_id": int(val("direction_id")) if val("direction_id") else None, + "arrival_time": val("arrival"), + "departure_time": val("departure"), + } + ) + return results From 4757a264d68dcbee9030e84464b826bddf9e175f Mon Sep 17 00:00:00 2001 From: RichardCMX Date: Thu, 2 Oct 2025 12:35:46 -0600 Subject: [PATCH 07/12] dev(fuseki): add dataset config and shiro.ini; compose mounts; docs: add Fuseki dev guide and update README/architecture; fix duplicate FUSEKI_ENDPOINT --- README.md | 30 +++++++++++ docker-compose.yml | 3 +- docker/fuseki/configuration/dataset.ttl | 14 +++++ docker/fuseki/shiro.ini | 11 ++++ docs/architecture.md | 2 +- docs/dev/fuseki.md | 70 +++++++++++++++++++++++++ 6 files changed, 128 insertions(+), 2 deletions(-) create mode 100755 docker/fuseki/configuration/dataset.ttl create mode 100644 docker/fuseki/shiro.ini create mode 100644 docs/dev/fuseki.md diff --git a/README.md b/README.md index f0d25f4..6ec444f 100644 --- a/README.md +++ b/README.md @@ -234,6 +234,36 @@ Configuration flags (optional): - FUSEKI_ENABLED=false - FUSEKI_ENDPOINT= +### Using the optional Fuseki (SPARQL) backend in development + +For development and tests, you can run an optional Apache Jena Fuseki server and point the app/tests at its SPARQL endpoint. + +1) Start Fuseki +- docker-compose up -d fuseki +- The dataset is defined by docker/fuseki/configuration/dataset.ttl as "dataset" with SPARQL and graph store endpoints. +- Auth rules are controlled by docker/fuseki/shiro.ini (anon allowed for /dataset/sparql and /dataset/data in dev/tests). + +2) Verify readiness +- GET: curl "http://localhost:3030/dataset/sparql?query=ASK%20%7B%7D" +- POST: curl -X POST -H 'Content-Type: application/sparql-query' --data 'ASK {}' http://localhost:3030/dataset/sparql + +3) Admin UI +- http://localhost:3030/#/ +- The mounted shiro.ini does not define users by default. Add users under [users] in that file if you need UI access, then recreate the container. + +4) Using Fuseki from the app (optional) +- To have the app use Fuseki for reads instead of PostgreSQL, set these in .env.local: + - FUSEKI_ENABLED=true + - FUSEKI_ENDPOINT=http://fuseki:3030/dataset/sparql + +5) Reset state (optional) +- The dataset persists in the fuseki_data Docker volume. To reset: + - docker-compose stop fuseki + - docker volume rm infobus_fuseki_data (name may vary) + - docker-compose up -d fuseki + +See also: docs/dev/fuseki.md for a deeper guide and troubleshooting. + Caching (keys and TTLs): - Key pattern: schedule:next_departures:feed={FEED_ID}:stop={STOP_ID}:date={YYYY-MM-DD}:time={HHMMSS}:limit={N}:v1 - Default TTL: 60 seconds diff --git a/docker-compose.yml b/docker-compose.yml index 8c7d3bf..1ff8000 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -39,6 +39,8 @@ services: - FUSEKI_ADMIN_PASSWORD=admin volumes: - fuseki_data:/fuseki + - ./docker/fuseki/configuration:/fuseki/configuration + - ./docker/fuseki/shiro.ini:/fuseki/shiro.ini:ro # Django Web App (Daphne ASGI server) web: @@ -63,7 +65,6 @@ services: - GDAL_LIBRARY_PATH=/usr/lib/aarch64-linux-gnu/libgdal.so - GEOS_LIBRARY_PATH=/usr/lib/aarch64-linux-gnu/libgeos_c.so - FUSEKI_ENDPOINT=http://fuseki:3030/dataset/sparql - - FUSEKI_ENDPOINT=http://fuseki:3030/dataset/sparql volumes: - .:/app # Mount current directory for live development - /app/.venv # Exclude .venv directory to prevent conflicts diff --git a/docker/fuseki/configuration/dataset.ttl b/docker/fuseki/configuration/dataset.ttl new file mode 100755 index 0000000..e714ee1 --- /dev/null +++ b/docker/fuseki/configuration/dataset.ttl @@ -0,0 +1,14 @@ +@prefix tdb2: . +@prefix fuseki: . +@prefix ja: . + +[] a fuseki:Server ; + fuseki:services ( + [ a fuseki:Service ; + fuseki:name "dataset" ; + fuseki:serviceQuery "sparql" ; + fuseki:serviceUpdate "update" ; + fuseki:serviceUpload "upload" ; + fuseki:serviceReadWriteGraphStore "data" ; + fuseki:dataset [ a tdb2:DatasetTDB2 ; tdb2:location "databases/dataset" ] ] + ) . diff --git a/docker/fuseki/shiro.ini b/docker/fuseki/shiro.ini new file mode 100644 index 0000000..9c0aaa0 --- /dev/null +++ b/docker/fuseki/shiro.ini @@ -0,0 +1,11 @@ +[main] +[users] +admin=admin +[roles] +admin=* +[urls] +/$/** = authcBasic +/dataset/update = authcBasic +/dataset/data = anon +/dataset/sparql = anon +/** = anon diff --git a/docs/architecture.md b/docs/architecture.md index bcd5613..6ea0428 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -63,7 +63,7 @@ Se define una capa de acceso a datos (DAL) con interfaces claras: Implementaciones actuales: - PostgresScheduleRepository (Django ORM) - CachedScheduleRepository (envoltorio con Redis) -- FusekiScheduleRepository (stub opcional para desarrollo futuro) +- FusekiScheduleRepository (backend opcional habilitable por entorno; utilizado en desarrollo y tests de integración) Endpoint nuevo (ejemplo): - GET /api/schedule/departures/?stop_id=STOP_123&limit=5 diff --git a/docs/dev/fuseki.md b/docs/dev/fuseki.md new file mode 100644 index 0000000..d616c3d --- /dev/null +++ b/docs/dev/fuseki.md @@ -0,0 +1,70 @@ +# Optional Apache Jena Fuseki (SPARQL) backend for development + +This project can optionally use Apache Jena Fuseki as a SPARQL backend for schedule queries in development and for integration tests. + +When to use it +- Default reads use PostgreSQL with Redis caching. +- Fuseki is useful for experimenting with SPARQL-based data access and for the provided integration test that validates our DAL against a live SPARQL endpoint. + +What the dev setup provides +- A dataset named "dataset" exposed at: + - Query (SPARQL): http://localhost:3030/dataset/sparql + - Graph store (read/write): http://localhost:3030/dataset/data +- A permissive shiro.ini for tests, allowing anonymous access to SPARQL query and data upload endpoints (admin endpoints are still protected). + +Files in this repo +- docker/fuseki/configuration/dataset.ttl + - Declares a Fuseki server with a single TDB2 dataset named "dataset" and the services: sparql, update, upload, data. +- docker/fuseki/shiro.ini + - Dev/test-friendly auth rules: anon access for /dataset/sparql and /dataset/data; admin areas require auth. + +Start and verify Fuseki +- Start the service: + - docker-compose up -d fuseki +- Check logs: + - docker-compose logs --tail=200 fuseki +- Verify readiness (expect 200): + - GET: curl "http://localhost:3030/dataset/sparql?query=ASK%20%7B%7D" + - POST: curl -X POST -H 'Content-Type: application/sparql-query' --data 'ASK {}' http://localhost:3030/dataset/sparql + +Admin UI and credentials +- UI: http://localhost:3030/#/ +- By default, our mounted shiro.ini does not define users. If you need to log in to the UI, add a user under [users] in docker/fuseki/shiro.ini, e.g.: + + [users] + admin = admin,admin + + [roles] + admin = * + + Then restart Fuseki: docker-compose up -d --force-recreate fuseki + +Resetting the dataset +- The dataset is persisted to the fuseki_data Docker volume. To reset: + - docker-compose stop fuseki + - docker volume rm infobus_fuseki_data (volume name may vary; list with docker volume ls) + - docker-compose up -d fuseki + +Using Fuseki from Django (optional) +- You can force the application to use the Fuseki-backed repository by setting in .env.local: + + FUSEKI_ENABLED=true + FUSEKI_ENDPOINT=http://fuseki:3030/dataset/sparql + +- Note: the integration test overrides these settings automatically; .env.local is not required for that test. + +Integration test +- The test api/tests/test_fuseki_schedule.py: + - Waits for the SPARQL endpoint to be ready using ASK {} + - Uploads a tiny TTL into the default graph + - Calls /api/schedule/departures/ and asserts the enriched fields + +Troubleshooting +- 404 on /dataset or /dataset/sparql + - Ensure docker/fuseki/configuration/dataset.ttl is mounted at /fuseki/configuration and the volume fuseki_data is cleanly initialized (docker-compose down -v; docker-compose up -d fuseki). +- 405 on SPARQL POST + - Try a GET ASK first (as above). If only GET works, your shiro.ini or services configuration may be missing update/upload permissions or the endpoint is still starting. +- Fuseki logs show "Not writable: /fuseki/configuration" + - Make sure the /fuseki/configuration mount is writable by the container user. In dev, making the host directory writable (chmod -R 777 docker/fuseki/configuration) is acceptable. +- Random admin password printed in logs + - That occurs when the image initializes with its own config (no mounted shiro.ini). When using our mounted shiro.ini, define users there instead, or set the image-specific admin envs and avoid mounting shiro.ini. From 2dd9696a27542a6af646a02c123db40858684e76 Mon Sep 17 00:00:00 2001 From: RichardCMX Date: Thu, 9 Oct 2025 15:43:04 -0600 Subject: [PATCH 08/12] docs(README): include storage/ in project structure tree --- README.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 6ec444f..3741751 100644 --- a/README.md +++ b/README.md @@ -249,7 +249,7 @@ For development and tests, you can run an optional Apache Jena Fuseki server and 3) Admin UI - http://localhost:3030/#/ -- The mounted shiro.ini does not define users by default. Add users under [users] in that file if you need UI access, then recreate the container. +- The mounted shiro.ini defines an Admin user by default. Also you can add users under [users] in that file if you need UI access, then recreate the container. 4) Using Fuseki from the app (optional) - To have the app use Fuseki for reads instead of PostgreSQL, set these in .env.local: @@ -292,6 +292,7 @@ infobus/ ├── 📁 gtfs/ # GTFS data processing (submodule) ├── 📁 feed/ # Data feed management ├── 📁 api/ # REST API endpoints +├── 📁 storage/ # Data Access Layer (Postgres, Fuseki) and cache providers ├── 📦 docker-compose.yml # Development environment ├── 📦 docker-compose.production.yml # Production environment ├── 📄 Dockerfile # Multi-stage container build From f56075764350732489ed05b21302bfd7999dd8ce Mon Sep 17 00:00:00 2001 From: Ojem22 Date: Thu, 13 Nov 2025 12:00:14 -0600 Subject: [PATCH 09/12] refactor: remove Fuseki (Apache Jena) implementation - Remove Fuseki Docker service from docker-compose.yml - Remove fuseki_data volume - Delete storage/fuseki_schedule.py implementation - Delete api/tests/test_fuseki_schedule.py integration tests - Remove docker/fuseki/ configuration directory - Remove docs/dev/fuseki.md documentation - Update storage/factory.py to use only PostgreSQL repository - Remove FUSEKI_ENABLED and FUSEKI_ENDPOINT from settings.py - Remove Fuseki environment variables from .env.local.example - Update README.md and docs/architecture.md to remove Fuseki references PostgreSQL with Redis caching is now the sole storage backend. --- .env.local.example | 6 -- README.md | 36 +---------- api/tests/test_fuseki_schedule.py | 70 -------------------- datahub/settings.py | 4 -- docker-compose.yml | 16 ----- docker/fuseki/configuration/dataset.ttl | 14 ---- docker/fuseki/shiro.ini | 11 ---- docs/architecture.md | 4 -- docs/dev/fuseki.md | 70 -------------------- storage/factory.py | 10 +-- storage/fuseki_schedule.py | 86 ------------------------- 11 files changed, 3 insertions(+), 324 deletions(-) delete mode 100644 api/tests/test_fuseki_schedule.py delete mode 100755 docker/fuseki/configuration/dataset.ttl delete mode 100644 docker/fuseki/shiro.ini delete mode 100644 docs/dev/fuseki.md delete mode 100644 storage/fuseki_schedule.py diff --git a/.env.local.example b/.env.local.example index dc3e9cb..1b2cb42 100644 --- a/.env.local.example +++ b/.env.local.example @@ -13,9 +13,3 @@ DEBUG=True # For example: # DB_PASSWORD=my-local-password # ALLOWED_HOSTS=localhost,127.0.0.1,0.0.0.0,mylocal.dev - -# Optional Fuseki (SPARQL) backend configuration -# Set to true to enable using Fuseki adapter for reads (experimental) -FUSEKI_ENABLED=false -# SPARQL endpoint URL (e.g. http://localhost:3030/dataset/sparql) -FUSEKI_ENDPOINT= diff --git a/README.md b/README.md index 9208046..7e158d5 100644 --- a/README.md +++ b/README.md @@ -230,40 +230,6 @@ Response shape: } ``` -Configuration flags (optional): -- FUSEKI_ENABLED=false -- FUSEKI_ENDPOINT= - -### Using the optional Fuseki (SPARQL) backend in development - -For development and tests, you can run an optional Apache Jena Fuseki server and point the app/tests at its SPARQL endpoint. - -1) Start Fuseki -- docker-compose up -d fuseki -- The dataset is defined by docker/fuseki/configuration/dataset.ttl as "dataset" with SPARQL and graph store endpoints. -- Auth rules are controlled by docker/fuseki/shiro.ini (anon allowed for /dataset/sparql and /dataset/data in dev/tests). - -2) Verify readiness -- GET: curl "http://localhost:3030/dataset/sparql?query=ASK%20%7B%7D" -- POST: curl -X POST -H 'Content-Type: application/sparql-query' --data 'ASK {}' http://localhost:3030/dataset/sparql - -3) Admin UI -- http://localhost:3030/#/ -- The mounted shiro.ini defines an Admin user by default. Also you can add users under [users] in that file if you need UI access, then recreate the container. - -4) Using Fuseki from the app (optional) -- To have the app use Fuseki for reads instead of PostgreSQL, set these in .env.local: - - FUSEKI_ENABLED=true - - FUSEKI_ENDPOINT=http://fuseki:3030/dataset/sparql - -5) Reset state (optional) -- The dataset persists in the fuseki_data Docker volume. To reset: - - docker-compose stop fuseki - - docker volume rm infobus_fuseki_data (name may vary) - - docker-compose up -d fuseki - -See also: docs/dev/fuseki.md for a deeper guide and troubleshooting. - Caching (keys and TTLs): - Key pattern: schedule:next_departures:feed={FEED_ID}:stop={STOP_ID}:date={YYYY-MM-DD}:time={HHMMSS}:limit={N}:v1 - Default TTL: 60 seconds @@ -292,7 +258,7 @@ infobus/ ├── 📁 gtfs/ # GTFS data processing (submodule) ├── 📁 feed/ # Data feed management ├── 📁 api/ # REST API endpoints -├── 📁 storage/ # Data Access Layer (Postgres, Fuseki) and cache providers +├── 📁 storage/ # Data Access Layer (Postgres) and cache providers ├── 📦 docker-compose.yml # Development environment ├── 📦 docker-compose.production.yml # Production environment ├── 📄 Dockerfile # Multi-stage container build diff --git a/api/tests/test_fuseki_schedule.py b/api/tests/test_fuseki_schedule.py deleted file mode 100644 index 6354d3c..0000000 --- a/api/tests/test_fuseki_schedule.py +++ /dev/null @@ -1,70 +0,0 @@ -from __future__ import annotations - -import os -import time -from pathlib import Path - -import requests -from django.test import override_settings -from rest_framework import status -from rest_framework.test import APITestCase -from django.contrib.gis.geos import Point - -from gtfs.models import Feed, Stop - - -FUSEKI_URL = os.environ.get("FUSEKI_TEST_URL", "http://fuseki:3030") -DATASET = os.environ.get("FUSEKI_TEST_DATASET", "dataset") -SPARQL_ENDPOINT = f"{FUSEKI_URL}/{DATASET}/sparql" -DATA_ENDPOINT = f"{FUSEKI_URL}/{DATASET}/data?default" - - -class FusekiScheduleIntegrationTests(APITestCase): - @override_settings(FUSEKI_ENABLED=True, FUSEKI_ENDPOINT=SPARQL_ENDPOINT) - def test_fuseki_departures_via_dal(self): - # Ensure DB has feed and stop for validation in API view - feed = Feed.objects.create(feed_id="TEST", is_current=True) - Stop.objects.create(feed=feed, stop_id="S1", stop_name="Stop 1", stop_point=Point(0.0, 0.0)) - - # Wait for Fuseki to be ready - self._wait_for_fuseki_ready() - - # Load tiny TTL into dataset (default graph) - ttl_path = Path(__file__).parent / "data" / "fuseki_sample.ttl" - with open(ttl_path, "rb") as f: - r = requests.post(DATA_ENDPOINT, data=f.read(), headers={"Content-Type": "text/turtle"}, timeout=10, auth=("admin", "admin")) - # Some images allow anonymous writes; if 401, try again without auth - if r.status_code == 401: - r = requests.post(DATA_ENDPOINT, data=open(ttl_path, "rb").read(), headers={"Content-Type": "text/turtle"}, timeout=10) - r.raise_for_status() - - # Call the API endpoint; service_date in TTL is far future (2099-01-01), so pass date to match - url = "/api/schedule/departures/?feed_id=TEST&stop_id=S1&date=2099-01-01&time=08:00:00&limit=1" - resp = self.client.get(url) - self.assertEqual(resp.status_code, status.HTTP_200_OK, resp.content) - data = resp.json() - self.assertEqual(data["feed_id"], "TEST") - self.assertEqual(data["stop_id"], "S1") - self.assertEqual(data["limit"], 1) - self.assertEqual(len(data["departures"]), 1) - item = data["departures"][0] - # Validate enriched fields - self.assertEqual(item["route_id"], "R1") - self.assertEqual(item["route_short_name"], "R1") - self.assertEqual(item["route_long_name"], "Ruta 1") - self.assertEqual(item["trip_id"], "T1") - self.assertEqual(item["arrival_time"], "08:05:00") - self.assertEqual(item["departure_time"], "08:06:00") - - def _wait_for_fuseki_ready(self, timeout_seconds: int = 20): - start = time.time() - while time.time() - start < timeout_seconds: - try: - # ASK {} to ensure SPARQL endpoint is responsive - r = requests.post(SPARQL_ENDPOINT, data=b"ASK {}", headers={"Content-Type": "application/sparql-query"}, timeout=3) - if r.status_code == 200: - return - except Exception: - pass - time.sleep(1) - raise RuntimeError("Fuseki SPARQL endpoint did not become ready in time") diff --git a/datahub/settings.py b/datahub/settings.py index d596466..dedd355 100644 --- a/datahub/settings.py +++ b/datahub/settings.py @@ -131,10 +131,6 @@ REDIS_HOST = config("REDIS_HOST") REDIS_PORT = config("REDIS_PORT") -# Optional Fuseki (SPARQL) backend -FUSEKI_ENABLED = config("FUSEKI_ENABLED", cast=bool, default=False) -FUSEKI_ENDPOINT = config("FUSEKI_ENDPOINT", default=None) - # DAL caching configuration SCHEDULE_CACHE_TTL_SECONDS = config("SCHEDULE_CACHE_TTL_SECONDS", cast=int, default=60) diff --git a/docker-compose.yml b/docker-compose.yml index 1ff8000..4c62a04 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -29,19 +29,6 @@ services: timeout: 5s retries: 5 - # Apache Jena Fuseki (optional) - fuseki: - image: stain/jena-fuseki:latest - ports: - - "3030:3030" - environment: - - FUSEKI_DATASET=dataset - - FUSEKI_ADMIN_PASSWORD=admin - volumes: - - fuseki_data:/fuseki - - ./docker/fuseki/configuration:/fuseki/configuration - - ./docker/fuseki/shiro.ini:/fuseki/shiro.ini:ro - # Django Web App (Daphne ASGI server) web: build: @@ -64,7 +51,6 @@ services: # GDAL library path for ARM64 architecture - GDAL_LIBRARY_PATH=/usr/lib/aarch64-linux-gnu/libgdal.so - GEOS_LIBRARY_PATH=/usr/lib/aarch64-linux-gnu/libgeos_c.so - - FUSEKI_ENDPOINT=http://fuseki:3030/dataset/sparql volumes: - .:/app # Mount current directory for live development - /app/.venv # Exclude .venv directory to prevent conflicts @@ -143,7 +129,6 @@ services: # GDAL library path for ARM64 architecture - GDAL_LIBRARY_PATH=/usr/lib/aarch64-linux-gnu/libgdal.so - GEOS_LIBRARY_PATH=/usr/lib/aarch64-linux-gnu/libgeos_c.so - - FUSEKI_ENDPOINT=http://fuseki:3030/dataset/sparql volumes: - .:/app # Mount source code for hot reload - /app/.venv # Exclude .venv directory to prevent conflicts @@ -174,7 +159,6 @@ volumes: static_volume: media_volume: celery_beat_data: - fuseki_data: networks: default: diff --git a/docker/fuseki/configuration/dataset.ttl b/docker/fuseki/configuration/dataset.ttl deleted file mode 100755 index e714ee1..0000000 --- a/docker/fuseki/configuration/dataset.ttl +++ /dev/null @@ -1,14 +0,0 @@ -@prefix tdb2: . -@prefix fuseki: . -@prefix ja: . - -[] a fuseki:Server ; - fuseki:services ( - [ a fuseki:Service ; - fuseki:name "dataset" ; - fuseki:serviceQuery "sparql" ; - fuseki:serviceUpdate "update" ; - fuseki:serviceUpload "upload" ; - fuseki:serviceReadWriteGraphStore "data" ; - fuseki:dataset [ a tdb2:DatasetTDB2 ; tdb2:location "databases/dataset" ] ] - ) . diff --git a/docker/fuseki/shiro.ini b/docker/fuseki/shiro.ini deleted file mode 100644 index 9c0aaa0..0000000 --- a/docker/fuseki/shiro.ini +++ /dev/null @@ -1,11 +0,0 @@ -[main] -[users] -admin=admin -[roles] -admin=* -[urls] -/$/** = authcBasic -/dataset/update = authcBasic -/dataset/data = anon -/dataset/sparql = anon -/** = anon diff --git a/docs/architecture.md b/docs/architecture.md index 6ea0428..57b3f21 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -52,9 +52,6 @@ Nota: las pantallas por ahora asumimos que son Raspberry Pi en [modo kiosko](htt - PostgreSQL/PostGIS es la fuente de verdad para GTFS Schedule. - Redis se utiliza como caché de alto desempeño (lecturas read-through/write-through donde aplique) y para mensajería (Channels, Celery). -- Fuseki (Jena) es un backend opcional para consultas SPARQL. Se controla con variables de entorno: - - FUSEKI_ENABLED (bool) - - FUSEKI_ENDPOINT (URL) Se define una capa de acceso a datos (DAL) con interfaces claras: - ScheduleRepository: obtiene salidas programadas (next departures) por parada. @@ -63,7 +60,6 @@ Se define una capa de acceso a datos (DAL) con interfaces claras: Implementaciones actuales: - PostgresScheduleRepository (Django ORM) - CachedScheduleRepository (envoltorio con Redis) -- FusekiScheduleRepository (backend opcional habilitable por entorno; utilizado en desarrollo y tests de integración) Endpoint nuevo (ejemplo): - GET /api/schedule/departures/?stop_id=STOP_123&limit=5 diff --git a/docs/dev/fuseki.md b/docs/dev/fuseki.md deleted file mode 100644 index d616c3d..0000000 --- a/docs/dev/fuseki.md +++ /dev/null @@ -1,70 +0,0 @@ -# Optional Apache Jena Fuseki (SPARQL) backend for development - -This project can optionally use Apache Jena Fuseki as a SPARQL backend for schedule queries in development and for integration tests. - -When to use it -- Default reads use PostgreSQL with Redis caching. -- Fuseki is useful for experimenting with SPARQL-based data access and for the provided integration test that validates our DAL against a live SPARQL endpoint. - -What the dev setup provides -- A dataset named "dataset" exposed at: - - Query (SPARQL): http://localhost:3030/dataset/sparql - - Graph store (read/write): http://localhost:3030/dataset/data -- A permissive shiro.ini for tests, allowing anonymous access to SPARQL query and data upload endpoints (admin endpoints are still protected). - -Files in this repo -- docker/fuseki/configuration/dataset.ttl - - Declares a Fuseki server with a single TDB2 dataset named "dataset" and the services: sparql, update, upload, data. -- docker/fuseki/shiro.ini - - Dev/test-friendly auth rules: anon access for /dataset/sparql and /dataset/data; admin areas require auth. - -Start and verify Fuseki -- Start the service: - - docker-compose up -d fuseki -- Check logs: - - docker-compose logs --tail=200 fuseki -- Verify readiness (expect 200): - - GET: curl "http://localhost:3030/dataset/sparql?query=ASK%20%7B%7D" - - POST: curl -X POST -H 'Content-Type: application/sparql-query' --data 'ASK {}' http://localhost:3030/dataset/sparql - -Admin UI and credentials -- UI: http://localhost:3030/#/ -- By default, our mounted shiro.ini does not define users. If you need to log in to the UI, add a user under [users] in docker/fuseki/shiro.ini, e.g.: - - [users] - admin = admin,admin - - [roles] - admin = * - - Then restart Fuseki: docker-compose up -d --force-recreate fuseki - -Resetting the dataset -- The dataset is persisted to the fuseki_data Docker volume. To reset: - - docker-compose stop fuseki - - docker volume rm infobus_fuseki_data (volume name may vary; list with docker volume ls) - - docker-compose up -d fuseki - -Using Fuseki from Django (optional) -- You can force the application to use the Fuseki-backed repository by setting in .env.local: - - FUSEKI_ENABLED=true - FUSEKI_ENDPOINT=http://fuseki:3030/dataset/sparql - -- Note: the integration test overrides these settings automatically; .env.local is not required for that test. - -Integration test -- The test api/tests/test_fuseki_schedule.py: - - Waits for the SPARQL endpoint to be ready using ASK {} - - Uploads a tiny TTL into the default graph - - Calls /api/schedule/departures/ and asserts the enriched fields - -Troubleshooting -- 404 on /dataset or /dataset/sparql - - Ensure docker/fuseki/configuration/dataset.ttl is mounted at /fuseki/configuration and the volume fuseki_data is cleanly initialized (docker-compose down -v; docker-compose up -d fuseki). -- 405 on SPARQL POST - - Try a GET ASK first (as above). If only GET works, your shiro.ini or services configuration may be missing update/upload permissions or the endpoint is still starting. -- Fuseki logs show "Not writable: /fuseki/configuration" - - Make sure the /fuseki/configuration mount is writable by the container user. In dev, making the host directory writable (chmod -R 777 docker/fuseki/configuration) is acceptable. -- Random admin password printed in logs - - That occurs when the image initializes with its own config (no mounted shiro.ini). When using our mounted shiro.ini, define users there instead, or set the image-specific admin envs and avoid mounting shiro.ini. diff --git a/storage/factory.py b/storage/factory.py index f46f834..5997a9b 100644 --- a/storage/factory.py +++ b/storage/factory.py @@ -9,21 +9,15 @@ from .interfaces import ScheduleRepository from .postgres_schedule import PostgresScheduleRepository from .redis_cache import RedisCacheProvider -from .fuseki_schedule import FusekiScheduleRepository def get_schedule_repository(*, use_cache: bool = True) -> ScheduleRepository: """Factory to obtain a ScheduleRepository according to settings. - Uses PostgreSQL (Django ORM) by default. - - Optionally wraps with Redis cache. - - If FUSEKI_ENABLED is true and endpoint configured, uses Fuseki adapter instead. + - Optionally wraps with Redis cache for improved performance. """ - base_repo: ScheduleRepository - if getattr(settings, "FUSEKI_ENABLED", False) and getattr(settings, "FUSEKI_ENDPOINT", None): - base_repo = FusekiScheduleRepository(endpoint=settings.FUSEKI_ENDPOINT) # type: ignore[arg-type] - else: - base_repo = PostgresScheduleRepository() + base_repo: ScheduleRepository = PostgresScheduleRepository() if use_cache: cache = RedisCacheProvider() diff --git a/storage/fuseki_schedule.py b/storage/fuseki_schedule.py deleted file mode 100644 index d009ce1..0000000 --- a/storage/fuseki_schedule.py +++ /dev/null @@ -1,86 +0,0 @@ -from __future__ import annotations - -from typing import List, Optional -from datetime import date, time - -import requests - -from .interfaces import Departure, ScheduleRepository - - -class FusekiScheduleRepository(ScheduleRepository): - """Fuseki-backed schedule repository using SPARQL queries. - - Minimal vocabulary expected for each ex:Departure resource: - - ex:feed_id, ex:stop_id, ex:trip_id (xsd:string) - - ex:arrival_time, ex:departure_time (xsd:string HH:MM:SS) - - optional: ex:route_id, ex:headsign, ex:direction_id, ex:route_short_name, ex:route_long_name - - optional: ex:service_date (xsd:string YYYY-MM-DD) - - PREFIX ex: - """ - - def __init__(self, *, endpoint: str): - self._endpoint = endpoint.rstrip("/") - - def get_next_departures( - self, - *, - feed_id: str, - stop_id: str, - service_date: date, - from_time: time, - limit: int = 10, - ) -> List[Departure]: - date_str = service_date.isoformat() - time_str = from_time.strftime("%H:%M:%S") - query = f""" - PREFIX ex: - SELECT ?route_id ?route_short_name ?route_long_name ?trip_id ?stop_id ?headsign ?direction_id ?arrival ?departure - WHERE {{ - ?d a ex:Departure ; - ex:feed_id "{feed_id}" ; - ex:stop_id "{stop_id}" ; - ex:trip_id ?trip_id ; - ex:arrival_time ?arrival ; - ex:departure_time ?departure . - OPTIONAL {{ ?d ex:route_id ?route_id }} - OPTIONAL {{ ?d ex:headsign ?headsign }} - OPTIONAL {{ ?d ex:direction_id ?direction_id }} - OPTIONAL {{ ?d ex:route_short_name ?route_short_name }} - OPTIONAL {{ ?d ex:route_long_name ?route_long_name }} - OPTIONAL {{ ?d ex:service_date ?svc_date }} - FILTER ( ?departure >= "{time_str}" ) - FILTER ( !BOUND(?svc_date) || ?svc_date = "{date_str}" ) - }} - ORDER BY ?departure - LIMIT {int(limit)} - """ - - headers = { - "Accept": "application/sparql-results+json", - "Content-Type": "application/sparql-query", - } - resp = requests.post(self._endpoint, data=query.encode("utf-8"), headers=headers, timeout=10) - resp.raise_for_status() - js = resp.json() - results: List[Departure] = [] - for b in js.get("results", {}).get("bindings", []): - def val(name: str) -> Optional[str]: - v = b.get(name, {}).get("value") - return v if v != "" else None - - results.append( - { - "route_id": val("route_id") or "", - "route_short_name": val("route_short_name"), - "route_long_name": val("route_long_name"), - "trip_id": val("trip_id") or "", - "stop_id": val("stop_id") or stop_id, - "headsign": val("headsign"), - "direction_id": int(val("direction_id")) if val("direction_id") else None, - "arrival_time": val("arrival"), - "departure_time": val("departure"), - } - ) - return results From e5cd20fca433dbdd8cf7816355af0398c66db9c4 Mon Sep 17 00:00:00 2001 From: Ojem22 Date: Thu, 13 Nov 2025 12:04:45 -0600 Subject: [PATCH 10/12] docs: add CHANGELOG.md documenting storage/DAL feature - Document Data Access Layer implementation - Document new /api/schedule/departures/ endpoint - Document Redis caching configuration - Document Fuseki removal - Follow Keep a Changelog format --- CHANGELOG.md | 81 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) create mode 100644 CHANGELOG.md diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..ea950d9 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,81 @@ +# Changelog + +All notable changes to the Infobús project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [Unreleased] + +### Added - Storage and Data Access Layer (feat/storage-reading-dal) + +#### Storage Layer +- **Data Access Layer (DAL)** with repository pattern for GTFS schedule data + - `ScheduleRepository` interface defining contract for schedule data access + - `PostgresScheduleRepository` implementation using Django ORM + - `CachedScheduleRepository` decorator for Redis caching with configurable TTL + - `RedisCacheProvider` for cache operations + - Factory pattern (`get_schedule_repository()`) for obtaining configured repository instances + +#### API Endpoints +- **GET /api/schedule/departures/** - Retrieve scheduled departures for a stop + - Query parameters: + - `stop_id` (required): Stop identifier + - `feed_id` (optional): Feed identifier, defaults to current feed + - `date` (optional): Service date in YYYY-MM-DD format, defaults to today + - `time` (optional): Departure time in HH:MM or HH:MM:SS format, defaults to now + - `limit` (optional): Maximum number of results (1-100), defaults to 10 + - Returns enriched departure data with route information: + - Route short name and long name + - Trip headsign and direction + - Formatted arrival and departure times (HH:MM:SS) + - Validates stop existence (returns 404 if not found) + - Uses PostgreSQL as data source with Redis read-through caching + +#### Configuration +- `SCHEDULE_CACHE_TTL_SECONDS` environment variable for cache duration (default: 60 seconds) +- Cache key format: `schedule:next_departures:feed={FEED_ID}:stop={STOP_ID}:date={YYYY-MM-DD}:time={HHMMSS}:limit={N}:v1` + +#### Testing +- Comprehensive test suite for schedule departures endpoint + - Response structure validation + - Stop validation (404 handling) + - Time format validation (HH:MM:SS) + - Programmatic test dataset creation + +#### Documentation +- OpenAPI/Swagger schema generation with drf-spectacular +- API endpoint annotations for automatic documentation +- Architecture documentation for DAL strategy +- README updates with endpoint usage examples and cache configuration + +### Removed - Storage and Data Access Layer (feat/storage-reading-dal) + +#### Fuseki Implementation +- Removed Apache Jena Fuseki as optional SPARQL backend + - Deleted `storage/fuseki_schedule.py` implementation + - Removed `api/tests/test_fuseki_schedule.py` integration tests + - Removed Fuseki Docker service from docker-compose.yml + - Deleted `fuseki_data` Docker volume + - Removed `docker/fuseki/` configuration directory + - Deleted `docs/dev/fuseki.md` documentation +- Removed Fuseki-related configuration + - `FUSEKI_ENABLED` environment variable + - `FUSEKI_ENDPOINT` environment variable + - Fuseki references in `.env.local.example` +- Updated `storage/factory.py` to use only PostgreSQL repository +- PostgreSQL with Redis caching is now the sole storage backend + +### Changed - Storage and Data Access Layer (feat/storage-reading-dal) + +#### Documentation +- Updated README.md to document new DAL architecture and API endpoints +- Updated docs/architecture.md with storage strategy and repository pattern +- Added project structure documentation including `storage/` directory +- Removed all Fuseki references from documentation + +--- + +## [Previous Releases] + + From 521a3da86973fe6567d404a9bd8dcbddbf1ecccc Mon Sep 17 00:00:00 2001 From: Ojem22 Date: Thu, 13 Nov 2025 12:07:41 -0600 Subject: [PATCH 11/12] docs(tests): add docstrings to schedule departures test suite - Add class-level docstring explaining DAL testing - Document setUp method for test data preparation - Add docstrings for test_returns_404_when_stop_missing - Add docstrings for test_returns_departures_with_expected_shape - Improve test readability and maintainability --- api/tests/test_schedule_departures.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/api/tests/test_schedule_departures.py b/api/tests/test_schedule_departures.py index 80ff2f3..b79056c 100644 --- a/api/tests/test_schedule_departures.py +++ b/api/tests/test_schedule_departures.py @@ -16,7 +16,14 @@ class ScheduleDeparturesTests(APITestCase): + """Test suite for the /api/schedule/departures/ endpoint. + + This endpoint uses the Data Access Layer (DAL) to retrieve scheduled + departures from PostgreSQL with Redis caching. + """ + def setUp(self): + """Set up minimal test data: feed, stop, and stop_time records.""" # Minimal dataset for the endpoint self.feed = Feed.objects.create( feed_id="TEST", @@ -45,12 +52,18 @@ def setUp(self): ) def test_returns_404_when_stop_missing(self): + """Verify endpoint returns 404 when querying a non-existent stop_id.""" url = "/api/schedule/departures/?stop_id=THIS_DOES_NOT_EXIST&limit=1" resp = self.client.get(url) self.assertEqual(resp.status_code, status.HTTP_404_NOT_FOUND) self.assertIn("error", resp.json()) def test_returns_departures_with_expected_shape(self): + """Verify endpoint returns departures with expected JSON structure. + + Validates that all required fields are present in the response and + time fields are formatted correctly (HH:MM:SS). + """ feed = Feed.objects.filter(is_current=True).first() or Feed.objects.first() self.assertIsNotNone(feed, "Expected fixture to provide at least one feed") From 69f928fde09e8b46b4a7d1907b0ee916daf8b492 Mon Sep 17 00:00:00 2001 From: Ojem22 Date: Thu, 13 Nov 2025 12:08:35 -0600 Subject: [PATCH 12/12] docs(tests): add README for api/tests directory - Document test structure and organization - Explain test coverage for schedule departures endpoint - Provide examples for running tests - Document test data setup approach - Add guidelines for adding new tests --- api/tests/README.md | 77 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 api/tests/README.md diff --git a/api/tests/README.md b/api/tests/README.md new file mode 100644 index 0000000..ffc2356 --- /dev/null +++ b/api/tests/README.md @@ -0,0 +1,77 @@ +# API Tests + +This directory contains test suites for the Infobús API endpoints. + +## Test Structure + +### `test_schedule_departures.py` +Tests for the `/api/schedule/departures/` endpoint which provides scheduled departure information using the Data Access Layer (DAL). + +**Test Cases:** +- `ScheduleDeparturesTests`: Complete test suite for the schedule departures endpoint + - `test_returns_404_when_stop_missing`: Validates 404 error handling for non-existent stops + - `test_returns_departures_with_expected_shape`: Validates response structure and data format + +**What's Tested:** +- Endpoint returns proper HTTP status codes +- Response JSON structure matches API specification +- Required fields are present in response +- Time fields are formatted correctly (HH:MM:SS) +- Stop validation and error handling +- Integration with PostgreSQL via DAL +- Data enrichment (route names, trip information) + +## Running Tests + +### Run all API tests +```bash +docker compose exec web uv run python manage.py test api +``` + +### Run specific test file +```bash +docker compose exec web uv run python manage.py test api.tests.test_schedule_departures +``` + +### Run specific test class +```bash +docker compose exec web uv run python manage.py test api.tests.test_schedule_departures.ScheduleDeparturesTests +``` + +### Run specific test method +```bash +docker compose exec web uv run python manage.py test api.tests.test_schedule_departures.ScheduleDeparturesTests.test_returns_404_when_stop_missing +``` + +## Test Data + +Tests use Django's test database which is created and destroyed automatically. Each test case sets up its own minimal test data using: +- `Feed.objects.create()` for GTFS feeds +- `Stop.objects.create()` for stop locations +- `StopTime.objects.bulk_create()` for scheduled stop times + +## Test Dependencies + +- `rest_framework.test.APITestCase`: Base class for API testing +- `django.test.TestCase`: Django test framework +- `gtfs.models`: GTFS data models (Feed, Stop, StopTime) +- PostgreSQL test database with PostGIS extension + +## Coverage + +Current test coverage focuses on: +- ✅ Schedule departures endpoint functionality +- ✅ Error handling and validation +- ✅ Response format verification +- ✅ DAL integration (PostgreSQL) + +## Adding New Tests + +When adding new API endpoint tests: +1. Create a new test file named `test_.py` +2. Import necessary test base classes and models +3. Add class-level and method-level docstrings +4. Set up minimal test data in `setUp()` method +5. Test both success and error cases +6. Validate response structure and data types +7. Update this README with the new test file information