diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..06461de --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,86 @@ +# CLAUDE.md + +This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository. + +## Project Overview + +Unofficial Python client for the [TooGoodToGo](https://toogoodtogo.com) API. Published on PyPI as `tgtg`. Python 3.9+. Licensed under GPL-3.0. + +## Commands + +```bash +# Install dependencies (requires poetry) +# Option 1: if pipx is available +pipx install poetry + +# Option 2: via venv (if pipx/poetry not available) +python3 -m venv .venv +source .venv/bin/activate +pip install poetry + +# Then install project deps +poetry install + +# Run all tests (with coverage) +make test # or: poetry run pytest + +# Run a single test file +poetry run pytest tests/test_login.py + +# Run a single test function +poetry run pytest tests/test_login.py::test_login_with_tokens + +# Run linting (black, isort, flake8) +make lint # or: poetry run pre-commit run -a + +# Build and publish to PyPI +make publish +``` + +## Code Style + +- Formatter: **Black** +- Import sorting: **isort** (profile: black) +- Linter: **flake8** (max-line-length: 119, max-complexity: 10) +- All enforced via pre-commit hooks + +## Architecture + +The entire library is a single class `TgtgClient` in `tgtg/__init__.py` (~500 lines). There are no subpackages or complex abstractions. + +### Key components + +- **`tgtg/__init__.py`** - `TgtgClient` class plus all API endpoint constants. Every public method calls `self.login()` first, which handles token refresh automatically. The client uses `requests.Session` for HTTP, with Android user-agent spoofing. All API calls go through the central `_post()` method which handles DataDome cookie management and 403 retry logic. +- **`tgtg/exceptions.py`** - Three exception types: `TgtgLoginError`, `TgtgAPIError`, `TgtgPollingError`. +- **`tgtg/google_play_scraper.py`** - Scrapes the Google Play Store page to get the current TooGoodToGo APK version for user-agent strings. Falls back to `DEFAULT_APK_VERSION`. + +### DataDome bot protection + +TooGoodToGo uses DataDome for bot protection. The client handles this by: + +1. Fetching a `datadome` cookie from `https://api-sdk.datadome.co/sdk/` before API requests, mimicking the Android app's DataDome SDK integration (device fingerprint parameters like model, OS version, screen size). +2. The `_post()` method automatically ensures a DataDome cookie is present. On 403, it invalidates the cookie and retries with a fresh one. +3. The `_generate_datadome_cid()` helper creates random 120-char client IDs matching DataDome's expected format. +4. VPN/datacenter IPs are frequently blocked by DataDome regardless of cookie validity — residential IPs work best. + +### Authentication flow + +1. Email-based login sends a request to `auth/v5/authByEmail` +2. User receives a PIN code via email +3. Client submits PIN via `auth/v5/authByRequestPin` to complete authentication +4. Fallback: client can also poll `auth/v5/authByRequestPollingId` (legacy link-click flow) +5. On success, stores `access_token`, `refresh_token`, and `cookie` +6. Tokens auto-refresh after `access_token_lifetime` (default 4 hours) + +### API versions in use + +Auth endpoints use `v5`, item endpoints use `v8`, order endpoints use `v8`, token refresh uses `v1`, favorites use `v1`, discover uses `v1`. + +## Testing + +- Framework: **pytest** with **responses** library for HTTP mocking +- Coverage: branch coverage enabled, configured in `setup.cfg` (`addopts = --cov=tgtg`) +- Tests in `tests/` use fixtures from `conftest.py` that mock auth/refresh endpoints +- `tests/constants.py` contains expected API response property lists and fake token credentials +- `test_api.py` contains integration tests marked `@pytest.mark.withoutresponses` that hit the real API (requires `TGTG_EMAIL` env var) +- Time-dependent tests use **freezegun** for mocking `datetime.datetime.now()` diff --git a/tgtg/__init__.py b/tgtg/__init__.py index d768eb7..d1d5f9d 100644 --- a/tgtg/__init__.py +++ b/tgtg/__init__.py @@ -1,10 +1,11 @@ import datetime import random +import re import sys import time import uuid from http import HTTPStatus -from urllib.parse import urljoin +from urllib.parse import urljoin, urlsplit import requests @@ -17,6 +18,7 @@ FAVORITE_ITEM_ENDPOINT = "user/favorite/v1/{}/update" AUTH_BY_EMAIL_ENDPOINT = "auth/v5/authByEmail" AUTH_POLLING_ENDPOINT = "auth/v5/authByRequestPollingId" +AUTH_BY_REQUEST_PIN_ENDPOINT = "auth/v5/authByRequestPin" SIGNUP_BY_EMAIL_ENDPOINT = "auth/v5/signUpByEmail" REFRESH_ENDPOINT = "token/v1/refresh" ACTIVE_ORDER_ENDPOINT = "order/v8/active" @@ -25,6 +27,7 @@ ABORT_ORDER_ENDPOINT = "order/v8/{}/abort" ORDER_STATUS_ENDPOINT = "order/v8/{}/status" API_BUCKET_ENDPOINT = "discover/v1/bucket" +DATADOME_SDK_URL = "https://api-sdk.datadome.co/sdk/" DEFAULT_APK_VERSION = "24.11.0" USER_AGENTS = [ "TGTG/{} Dalvik/2.1.0 (Linux; U; Android 9; Nexus 5 Build/M4B30Z)", @@ -36,6 +39,11 @@ POLLING_WAIT_TIME = 5 # Seconds +def _generate_datadome_cid(): + chars = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789~_" + return "".join(random.choice(chars) for _ in range(120)) + + class TgtgClient: def __init__( self, @@ -103,9 +111,8 @@ def _headers(self): "accept-language": self.language, "content-type": "application/json; charset=utf-8", "user-agent": self.user_agent, + "x-correlation-id": self.correlation_id, } - if not self.email: - headers["x-correlation-id"] = self.correlation_id if self.cookie: headers["Cookie"] = self.cookie if self.access_token: @@ -116,6 +123,86 @@ def _headers(self): def _already_logged(self): return bool(self.access_token and self.refresh_token) + def _fetch_datadome_cookie(self, request_url): + """Fetch a DataDome cookie from the SDK endpoint, mimicking the Android app.""" + cid = _generate_datadome_cid() + apk_version = getattr(self, "version", DEFAULT_APK_VERSION) + params = { + "camera": '{"auth":"true", "info":"{\\"front\\":\\"2000x1500\\",\\"back\\":\\"5472x3648\\"}"}', + "cid": cid, + "ddk": "1D42C2CA6131C526E09F294FE96F94", + "ddv": "3.0.4", + "ddvc": apk_version, + "events": '[{"id":1,"message":"response validation","source":"sdk","date":' + + str(int(time.time() * 1000)) + + "}]", + "inte": "android-java-okhttp", + "mdl": "Pixel 7 Pro", + "os": "Android", + "osn": "UPSIDE_DOWN_CAKE", + "osr": "14", + "osv": "34", + "request": request_url, + "screen_d": "3.5", + "screen_x": "1440", + "screen_y": "3120", + "ua": self.user_agent, + } + try: + r = requests.post( + DATADOME_SDK_URL, + data=params, + headers={ + "Content-Type": "application/x-www-form-urlencoded", + "Accept": "*/*", + "User-Agent": self.user_agent, + "Accept-Encoding": "gzip, deflate, br", + }, + timeout=10, + ) + r.raise_for_status() + data = r.json() + if data.get("status") == 200 and data.get("cookie"): + m = re.search(r"datadome=([^;]+)", data["cookie"]) + if m: + domain = urlsplit(self.base_url).hostname + self.session.cookies.set( + "datadome", + m.group(1), + domain=f".{domain}", + path="/", + secure=True, + ) + except Exception: + sys.stdout.write("Failed to fetch DataDome cookie\n") + + def _ensure_datadome_cookie(self, request_url): + if "datadome" not in self.session.cookies: + self._fetch_datadome_cookie(request_url) + + def _post(self, url, **kwargs): + """POST with automatic DataDome cookie handling.""" + self._ensure_datadome_cookie(url) + response = self.session.post( + url, + headers=self._headers, + proxies=self.proxies, + timeout=self.timeout, + **kwargs, + ) + if response.status_code == 403: + # Invalidate and retry with fresh DataDome cookie + self.session.cookies.clear() + self._fetch_datadome_cookie(url) + response = self.session.post( + url, + headers=self._headers, + proxies=self.proxies, + timeout=self.timeout, + **kwargs, + ) + return response + def _refresh_token(self): if ( self.last_time_token_refreshed @@ -124,12 +211,9 @@ def _refresh_token(self): ): return - response = self.session.post( + response = self._post( self._get_url(REFRESH_ENDPOINT), json={"refresh_token": self.refresh_token}, - headers=self._headers, - proxies=self.proxies, - timeout=self.timeout, ) if response.status_code == HTTPStatus.OK: self.access_token = response.json()["access_token"] @@ -147,17 +231,13 @@ def login(self): if self._already_logged: self._refresh_token() else: - for _ in range(2): # doing twice the request to try to bypass captcha - response = self.session.post( - self._get_url(AUTH_BY_EMAIL_ENDPOINT), - headers=self._headers, - json={ - "device_type": self.device_type, - "email": self.email, - }, - proxies=self.proxies, - timeout=self.timeout, - ) + response = self._post( + self._get_url(AUTH_BY_EMAIL_ENDPOINT), + json={ + "device_type": self.device_type, + "email": self.email, + }, + ) if response.status_code == HTTPStatus.OK: first_login_response = response.json() if first_login_response["state"] == "TERMS": @@ -178,17 +258,19 @@ def login(self): raise TgtgLoginError(response.status_code, response.content) def start_polling(self, polling_id): + sys.stdout.write("Check your email for a login PIN code.\n") + pin = input("Enter PIN from email: ").strip() + if pin: + return self._auth_by_pin(polling_id, pin) + for _ in range(MAX_POLLING_TRIES): - response = self.session.post( + response = self._post( self._get_url(AUTH_POLLING_ENDPOINT), - headers=self._headers, json={ "device_type": self.device_type, "email": self.email, "request_polling_id": polling_id, }, - proxies=self.proxies, - timeout=self.timeout, ) if response.status_code == HTTPStatus.ACCEPTED: sys.stdout.write( @@ -217,6 +299,27 @@ def start_polling(self, polling_id): f"Max retries ({MAX_POLLING_TRIES * POLLING_WAIT_TIME} seconds) reached. Try again." ) + def _auth_by_pin(self, polling_id, pin): + response = self._post( + self._get_url(AUTH_BY_REQUEST_PIN_ENDPOINT), + json={ + "device_type": self.device_type, + "email": self.email, + "request_pin": pin, + "request_polling_id": polling_id, + }, + ) + if response.status_code == HTTPStatus.OK: + sys.stdout.write("Logged in!\n") + login_response = response.json() + self.access_token = login_response["access_token"] + self.refresh_token = login_response["refresh_token"] + self.last_time_token_refreshed = datetime.datetime.now() + self.cookie = response.headers.get("Set-Cookie", "") + return + else: + raise TgtgLoginError(response.status_code, response.content) + def get_items( self, *, @@ -255,12 +358,9 @@ def get_items( "hidden_only": hidden_only, "we_care_only": we_care_only, } - response = self.session.post( + response = self._post( self._get_url(API_ITEM_ENDPOINT), - headers=self._headers, json=data, - proxies=self.proxies, - timeout=self.timeout, ) if response.status_code == HTTPStatus.OK: return response.json()["items"] @@ -269,12 +369,9 @@ def get_items( def get_item(self, item_id): self.login() - response = self.session.post( + response = self._post( urljoin(self._get_url(API_ITEM_ENDPOINT), str(item_id)), - headers=self._headers, json={"origin": None}, - proxies=self.proxies, - timeout=self.timeout, ) if response.status_code == HTTPStatus.OK: return response.json() @@ -298,12 +395,9 @@ def get_favorites( "paging": {"page": page, "size": page_size}, "bucket": {"filler_type": "Favorites"}, } - response = self.session.post( + response = self._post( self._get_url(API_BUCKET_ENDPOINT), - headers=self._headers, json=data, - proxies=self.proxies, - timeout=self.timeout, ) if response.status_code == HTTPStatus.OK: return response.json().get("mobile_bucket", {}).get("items", []) @@ -312,12 +406,9 @@ def get_favorites( def set_favorite(self, item_id, is_favorite): self.login() - response = self.session.post( + response = self._post( self._get_url(FAVORITE_ITEM_ENDPOINT.format(item_id)), - headers=self._headers, json={"is_favorite": is_favorite}, - proxies=self.proxies, - timeout=self.timeout, ) if response.status_code != HTTPStatus.OK: raise TgtgAPIError(response.status_code, response.content) @@ -325,12 +416,9 @@ def set_favorite(self, item_id, is_favorite): def create_order(self, item_id, item_count): self.login() - response = self.session.post( + response = self._post( urljoin(self._get_url(CREATE_ORDER_ENDPOINT), str(item_id)), - headers=self._headers, json={"item_count": item_count}, - proxies=self.proxies, - timeout=self.timeout, ) if response.status_code != HTTPStatus.OK: raise TgtgAPIError(response.status_code, response.content) @@ -342,11 +430,8 @@ def create_order(self, item_id, item_count): def get_order_status(self, order_id): self.login() - response = self.session.post( + response = self._post( self._get_url(ORDER_STATUS_ENDPOINT.format(order_id)), - headers=self._headers, - proxies=self.proxies, - timeout=self.timeout, ) if response.status_code == HTTPStatus.OK: return response.json() @@ -357,12 +442,9 @@ def abort_order(self, order_id): """Use this when your order is not yet paid""" self.login() - response = self.session.post( + response = self._post( self._get_url(ABORT_ORDER_ENDPOINT.format(order_id)), - headers=self._headers, json={"cancel_reason_id": 1}, - proxies=self.proxies, - timeout=self.timeout, ) if response.status_code != HTTPStatus.OK: raise TgtgAPIError(response.status_code, response.content) @@ -380,9 +462,8 @@ def signup_by_email( newsletter_opt_in=False, push_notification_opt_in=True, ): - response = self.session.post( + response = self._post( self._get_url(SIGNUP_BY_EMAIL_ENDPOINT), - headers=self._headers, json={ "country_id": country_id, "device_type": self.device_type, @@ -391,8 +472,6 @@ def signup_by_email( "newsletter_opt_in": newsletter_opt_in, "push_notification_opt_in": push_notification_opt_in, }, - proxies=self.proxies, - timeout=self.timeout, ) if response.status_code == HTTPStatus.OK: self.access_token = response.json()["login_response"]["access_token"] @@ -405,12 +484,9 @@ def signup_by_email( def get_active(self): self.login() - response = self.session.post( + response = self._post( self._get_url(ACTIVE_ORDER_ENDPOINT), - headers=self._headers, json={}, - proxies=self.proxies, - timeout=self.timeout, ) if response.status_code == HTTPStatus.OK: return response.json() @@ -419,12 +495,9 @@ def get_active(self): def get_inactive(self, page=0, page_size=20): self.login() - response = self.session.post( + response = self._post( self._get_url(INACTIVE_ORDER_ENDPOINT), - headers=self._headers, json={"paging": {"page": page, "size": page_size}}, - proxies=self.proxies, - timeout=self.timeout, ) if response.status_code == HTTPStatus.OK: return response.json()