diff --git a/.github/workflows/cicd.yml b/.github/workflows/cicd.yml index 05fe71e..d88a5fc 100644 --- a/.github/workflows/cicd.yml +++ b/.github/workflows/cicd.yml @@ -53,4 +53,4 @@ jobs: - name: Test with pytest run: | - pytest -s -rx tests + pytest -s tests diff --git a/backend/connectors/wqp/source.py b/backend/connectors/wqp/source.py index f9550ab..926b2b4 100644 --- a/backend/connectors/wqp/source.py +++ b/backend/connectors/wqp/source.py @@ -28,6 +28,12 @@ DT_MEASURED, EARLIEST, LATEST, + TDS, + WATERLEVELS, + USGS_PCODE_30210, + USGS_PCODE_70300, + USGS_PCODE_70301, + USGS_PCODE_70303, ) from backend.connectors.wqp.transformer import ( WQPSiteTransformer, @@ -97,7 +103,7 @@ def get_records(self): else: # every record with pCode 30210 (depth in m) has a corresponding # record with pCode 72019 (depth in ft) but not vice versa - params["pCode"] = "30210" + params["pCode"] = USGS_PCODE_30210 params.update(get_date_range(config)) @@ -130,7 +136,44 @@ def _extract_source_parameter_results(self, records): return [ri["ResultMeasureValue"] for ri in records] def _clean_records(self, records): - return [ri for ri in records if ri["ResultMeasureValue"]] + clean_records = [r for r in records if r["ResultMeasureValue"]] + + if self.config.parameter == TDS and len(clean_records) > 1: + site_id = clean_records[0]["MonitoringLocationIdentifier"] + return_records = [] + activity_identifiers = [record["ActivityIdentifier"] for record in records] + activity_identifiers = list(set(activity_identifiers)) + for activity_identifier in activity_identifiers: + # get all records for this activity identifier + ai_records = { + record["USGSPCode"]: record + for record in records + if record["ActivityIdentifier"] == activity_identifier + } + if len(ai_records.items()) > 1: + if USGS_PCODE_70300 in ai_records.keys(): + kept_record = ai_records[USGS_PCODE_70300] + pcode = USGS_PCODE_70300 + elif USGS_PCODE_70301 in ai_records.keys(): + kept_record = ai_records[USGS_PCODE_70301] + pcode = USGS_PCODE_70301 + elif USGS_PCODE_70303 in ai_records.keys(): + kept_record = ai_records[USGS_PCODE_70303] + pcode = USGS_PCODE_70303 + else: + raise ValueError( + f"Multiple TDS records found for {site_id} with ActivityIdentifier {activity_identifier} but no 70300, 70301, or 70303 pcodes found." + ) + record_date = kept_record["ActivityStartDate"] + self.log( + f"Removing duplicates for {site_id} on {record_date} with ActivityIdentifier {activity_identifier}. Keeping record with pcode {pcode}." + ) + else: + kept_record = list(ai_records.values())[0] + return_records.append(kept_record) + return return_records + else: + return clean_records def _extract_source_parameter_units(self, records): return [ri["ResultMeasure/MeasureUnitCode"] for ri in records] @@ -160,7 +203,7 @@ def get_records(self, site_record): } params.update(get_date_range(self.config)) - if config.parameter.lower() != "waterlevels": + if config.parameter.lower() != WATERLEVELS: params["characteristicName"] = get_analyte_search_param( config.parameter, WQP_ANALYTE_MAPPING ) diff --git a/backend/constants.py b/backend/constants.py index b7635ab..2482900 100644 --- a/backend/constants.py +++ b/backend/constants.py @@ -57,6 +57,11 @@ SOURCE_PARAMETER_UNITS = "source_parameter_units" CONVERSION_FACTOR = "conversion_factor" +USGS_PCODE_30210 = "30210" +USGS_PCODE_70300 = "70300" +USGS_PCODE_70301 = "70301" +USGS_PCODE_70303 = "70303" + ANALYTE_OPTIONS = sorted( [ ARSENIC, diff --git a/frontend/cli.py b/frontend/cli.py index 879e5d3..3efb46b 100644 --- a/frontend/cli.py +++ b/frontend/cli.py @@ -187,11 +187,11 @@ def cli(): ) ] -OUTPUT_FORMATS = sorted([value for value in OutputFormat]) +OUTPUT_FORMATS = sorted([of for of in OutputFormat]) OUTPUT_FORMAT_OPTIONS = [ click.option( "--output-format", - type=click.Choice(OUTPUT_FORMATS), + type=click.Choice(OUTPUT_FORMATS, case_sensitive=False), default="csv", help=f"Output file format for sites: {OUTPUT_FORMATS}. Default is csv", ) diff --git a/setup.py b/setup.py index 2675e8e..c847cd6 100644 --- a/setup.py +++ b/setup.py @@ -22,7 +22,7 @@ setup( name="nmuwd", - version="0.9.5", + version="0.9.6", author="Jake Ross", description="New Mexico Water Data Integration Engine", long_description=long_description, diff --git a/tests/test_cli/__init__.py b/tests/test_cli/__init__.py index 4d342ae..2029c38 100644 --- a/tests/test_cli/__init__.py +++ b/tests/test_cli/__init__.py @@ -104,74 +104,82 @@ def _test_weave( result = self.runner.invoke(weave, arguments, standalone_mode=False) # Assert - assert result.exit_code == 0 - - """ - For the config, check that - - 0. (set output dir to clean up tests results even in event of failure) - 1. The parameter is set correctly - 2. The agencies are set correctly - 3. The output types are set correctly - 4. The site limit is set correctly - 5. The dry is set correctly - 6. The start date is set correctly - 7. The end date is set correctly - 8. The geographic filter is set correctly - 9. The site output type is set correctly - """ - config = result.return_value - - # 0 - self.output_dir = Path(config.output_path) - - # 1 - assert getattr(config, "parameter") == parameter - - # 2 - agency_with_underscore = self.agency.replace("-", "_") - if self.agency_reports_parameter[parameter]: - assert getattr(config, f"use_source_{agency_with_underscore}") is True - else: - assert getattr(config, f"use_source_{agency_with_underscore}") is False - - for no_agency in no_agencies: - no_agency_with_underscore = no_agency.replace("--no-", "").replace("-", "_") - assert getattr(config, f"use_source_{no_agency_with_underscore}") is False - - # 3 - output_types = ["summary", "timeseries_unified", "timeseries_separated"] - for ot in output_types: - if ot == output_type: - assert getattr(config, f"output_{ot}") is True + try: + assert result.exit_code == 0 + + """ + For the config, check that + + 0. (set output dir to clean up tests results even in event of failure) + 1. The parameter is set correctly + 2. The agencies are set correctly + 3. The output types are set correctly + 4. The site limit is set correctly + 5. The dry is set correctly + 6. The start date is set correctly + 7. The end date is set correctly + 8. The geographic filter is set correctly + 9. The site output type is set correctly + """ + config = result.return_value + + # 0 + self.output_dir = Path(config.output_path) + + # 1 + assert getattr(config, "parameter") == parameter + + # 2 + agency_with_underscore = self.agency.replace("-", "_") + if self.agency_reports_parameter[parameter]: + assert getattr(config, f"use_source_{agency_with_underscore}") is True else: - assert getattr(config, f"output_{ot}") is False - - # 4 - assert getattr(config, "site_limit") == 4 - - # 5 - assert getattr(config, "dry") is True - - # 6 - assert getattr(config, "start_date") == start_date - - # 7 - assert getattr(config, "end_date") == end_date - - # 8 - if geographic_filter_name and geographic_filter_value: - for _geographic_filter_name in ["bbox", "county", "wkt"]: - if _geographic_filter_name == geographic_filter_name: - assert ( - getattr(config, _geographic_filter_name) - == geographic_filter_value - ) + assert getattr(config, f"use_source_{agency_with_underscore}") is False + + for no_agency in no_agencies: + no_agency_with_underscore = no_agency.replace("--no-", "").replace( + "-", "_" + ) + assert ( + getattr(config, f"use_source_{no_agency_with_underscore}") is False + ) + + # 3 + output_types = ["summary", "timeseries_unified", "timeseries_separated"] + for ot in output_types: + if ot == output_type: + assert getattr(config, f"output_{ot}") is True else: - assert getattr(config, _geographic_filter_name) == "" - - # 9 - assert getattr(config, "output_format") == output_format + assert getattr(config, f"output_{ot}") is False + + # 4 + assert getattr(config, "site_limit") == 4 + + # 5 + assert getattr(config, "dry") is True + + # 6 + assert getattr(config, "start_date") == start_date + + # 7 + assert getattr(config, "end_date") == end_date + + # 8 + if geographic_filter_name and geographic_filter_value: + for _geographic_filter_name in ["bbox", "county", "wkt"]: + if _geographic_filter_name == geographic_filter_name: + assert ( + getattr(config, _geographic_filter_name) + == geographic_filter_value + ) + else: + assert getattr(config, _geographic_filter_name) == "" + + # 9 + assert getattr(config, "output_format") == output_format + except Exception as e: + print(result) + assert False def test_weave_summary(self): self._test_weave(parameter=WATERLEVELS, output_type="summary")