diff --git a/README.md b/README.md index 4402a14..8a65b59 100644 --- a/README.md +++ b/README.md @@ -77,6 +77,17 @@ Some parameters can be used in the query string. You can force an encoding (e.g. `utf-8`) using this parameter, instead of relying on the automatic detection. +#### `analysis` + +**default**: no analysis + +You can ask an analysis of the csv file (csv detective and pandas profiling checks) by specifying `&analysis=yes` in endpoint. + +#### `refresh` + +**default**: no refresh + +You can force a refresh of the sqlite db creation by specifying `&refresh=yes` in endpoint. ### Data API diff --git a/csvapi/parseview.py b/csvapi/parseview.py index 3f6c35d..365f44c 100644 --- a/csvapi/parseview.py +++ b/csvapi/parseview.py @@ -108,13 +108,14 @@ async def get(self): app.logger.debug('* Starting ParseView.get') url = request.args.get('url') encoding = request.args.get('encoding') + refresh = request.args.get('refresh') if not url: raise APIError('Missing url query string variable.', status=400) if not validators.url(url): raise APIError('Malformed url parameter.', status=400) urlhash = get_hash(url) analysis = request.args.get('analysis') - if not await already_exists(urlhash, analysis): + if not await already_exists(urlhash, analysis) or refresh == 'yes': try: storage = app.config['DB_ROOT_DIR'] await self.do_parse(url=url, diff --git a/tests/test_api.py b/tests/test_api.py index 84b1a45..a48d2a0 100644 --- a/tests/test_api.py +++ b/tests/test_api.py @@ -159,6 +159,18 @@ async def test_apify(rmock, csv, client): assert db_path.exists() +async def test_apify_refresh(rmock, csv, client): + rmock.get(MOCK_CSV_URL, status=200, body=csv.encode('utf-8')) + res = await client.get(f"/apify?url={MOCK_CSV_URL}&refresh=yes") + assert res.status_code == 200 + jsonres = await res.json + assert jsonres['ok'] + assert 'endpoint' in jsonres + assert f"/api/{MOCK_CSV_HASH}" in jsonres['endpoint'] + db_path = Path(DB_ROOT_DIR) / f"{MOCK_CSV_HASH}.db" + assert db_path.exists() + + async def test_apify_not_found(rmock, csv, client): rmock.get(MOCK_CSV_URL, status=404) res = await client.get(f"/apify?url={MOCK_CSV_URL}") @@ -619,6 +631,20 @@ async def test_apify_analysed_check_general_infos(rmock, csv_top, client): assert jsonres['general_infos']['header_row_idx'] == 0 +async def test_apify_analysis_and_refresh(rmock, csv_top, client): + content = csv_top.replace('', ';').encode('utf-8') + url = random_url() + rmock.get(url, body=content) + await client.get(f"/apify?url={url}&analysis=yes&refresh=yes") + res = await client.get(f"/api/{get_hash(url)}") + assert res.status_code == 200 + jsonres = await res.json + assert jsonres['general_infos']['nb_columns'] == 2 + assert jsonres['general_infos']['total_lines'] == 4 + assert jsonres['general_infos']['separator'] == ';' + assert jsonres['general_infos']['header_row_idx'] == 0 + + @pytest.mark.parametrize('extension', ['xls', 'xlsx']) async def test_no_analysis_when_excel(client, rmock, extension): here = os.path.dirname(os.path.abspath(__file__))