Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,17 @@ Some parameters can be used in the query string.

You can force an encoding (e.g. `utf-8`) using this parameter, instead of relying on the automatic detection.

#### `analysis`

**default**: no analysis

You can ask an analysis of the csv file (csv detective and pandas profiling checks) by specifying `&analysis=yes` in endpoint.

#### `refresh`

**default**: no refresh

You can force a refresh of the sqlite db creation by specifying `&refresh=yes` in endpoint.

### Data API

Expand Down
3 changes: 2 additions & 1 deletion csvapi/parseview.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,13 +108,14 @@ async def get(self):
app.logger.debug('* Starting ParseView.get')
url = request.args.get('url')
encoding = request.args.get('encoding')
refresh = request.args.get('refresh')
if not url:
raise APIError('Missing url query string variable.', status=400)
if not validators.url(url):
raise APIError('Malformed url parameter.', status=400)
urlhash = get_hash(url)
analysis = request.args.get('analysis')
if not await already_exists(urlhash, analysis):
if not await already_exists(urlhash, analysis) or refresh == 'yes':
try:
storage = app.config['DB_ROOT_DIR']
await self.do_parse(url=url,
Expand Down
26 changes: 26 additions & 0 deletions tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,6 +159,18 @@ async def test_apify(rmock, csv, client):
assert db_path.exists()


async def test_apify_refresh(rmock, csv, client):
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I don't think it actually tests the refresh feature though? You should make two call, with a different mock on the second one right?

rmock.get(MOCK_CSV_URL, status=200, body=csv.encode('utf-8'))
res = await client.get(f"/apify?url={MOCK_CSV_URL}&refresh=yes")
assert res.status_code == 200
jsonres = await res.json
assert jsonres['ok']
assert 'endpoint' in jsonres
assert f"/api/{MOCK_CSV_HASH}" in jsonres['endpoint']
db_path = Path(DB_ROOT_DIR) / f"{MOCK_CSV_HASH}.db"
assert db_path.exists()


async def test_apify_not_found(rmock, csv, client):
rmock.get(MOCK_CSV_URL, status=404)
res = await client.get(f"/apify?url={MOCK_CSV_URL}")
Expand Down Expand Up @@ -619,6 +631,20 @@ async def test_apify_analysed_check_general_infos(rmock, csv_top, client):
assert jsonres['general_infos']['header_row_idx'] == 0


async def test_apify_analysis_and_refresh(rmock, csv_top, client):
content = csv_top.replace('<sep>', ';').encode('utf-8')
url = random_url()
rmock.get(url, body=content)
await client.get(f"/apify?url={url}&analysis=yes&refresh=yes")
res = await client.get(f"/api/{get_hash(url)}")
assert res.status_code == 200
jsonres = await res.json
assert jsonres['general_infos']['nb_columns'] == 2
assert jsonres['general_infos']['total_lines'] == 4
assert jsonres['general_infos']['separator'] == ';'
assert jsonres['general_infos']['header_row_idx'] == 0


@pytest.mark.parametrize('extension', ['xls', 'xlsx'])
async def test_no_analysis_when_excel(client, rmock, extension):
here = os.path.dirname(os.path.abspath(__file__))
Expand Down