From 49949c282424666422fa77b7062f7cfe51b24e76 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Mon, 19 May 2025 17:21:06 -0600 Subject: [PATCH 01/16] WQP TDS duplicate handling --- backend/connectors/wqp/source.py | 35 +++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/backend/connectors/wqp/source.py b/backend/connectors/wqp/source.py index f9550ab..e971954 100644 --- a/backend/connectors/wqp/source.py +++ b/backend/connectors/wqp/source.py @@ -28,6 +28,8 @@ DT_MEASURED, EARLIEST, LATEST, + TDS, + WATERLEVELS ) from backend.connectors.wqp.transformer import ( WQPSiteTransformer, @@ -160,7 +162,7 @@ def get_records(self, site_record): } params.update(get_date_range(self.config)) - if config.parameter.lower() != "waterlevels": + if config.parameter.lower() != WATERLEVELS: params["characteristicName"] = get_analyte_search_param( config.parameter, WQP_ANALYTE_MAPPING ) @@ -181,6 +183,37 @@ def _parameter_units_hook(self): raise NotImplementedError( f"{self.__class__.__name__} must implement _parameter_units_hook" ) + + def _clean_records(self, records) -> list: + """ + Remove duplicate TDS records. This is called on a site-by-site basis so does not need to account for + different sites having observations on the same date. + """ + if self.config.parameter == TDS: + site_id = records[0]["MonitoringLocationIdentifier"] + return_records = [] + dates = [record["ActivityStartDate"] for record in records] + dates = list(set(dates)) + for date in dates: + # get all records for this date + date_records = { + record["USGSPCode"]: record for record in records if record["ActivityStartDate"] == date + } + if len(date_records.items()) > 1: + if "70301" in date_records.keys(): + kept_record = date_records["70301"] + elif "70303" in date_records.keys(): + kept_record = date_records["70303"] + else: + raise ValueError( + f"Multiple TDS records found for {site_id} on date {date} but no 70301 or 70303 pcodes found." + ) + else: + kept_record = list(date_records.values())[0] + return_records.append(kept_record) + return return_records + else: + return super()._clean_records(records) class WQPAnalyteSource(WQPParameterSource, BaseAnalyteSource): From c8b3dbbad657638926b5571c6caa10fd86d534fc Mon Sep 17 00:00:00 2001 From: jacob-a-brown Date: Mon, 19 May 2025 23:22:22 +0000 Subject: [PATCH 02/16] Formatting changes --- backend/connectors/wqp/source.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/backend/connectors/wqp/source.py b/backend/connectors/wqp/source.py index e971954..9d61d88 100644 --- a/backend/connectors/wqp/source.py +++ b/backend/connectors/wqp/source.py @@ -29,7 +29,7 @@ EARLIEST, LATEST, TDS, - WATERLEVELS + WATERLEVELS, ) from backend.connectors.wqp.transformer import ( WQPSiteTransformer, @@ -183,10 +183,10 @@ def _parameter_units_hook(self): raise NotImplementedError( f"{self.__class__.__name__} must implement _parameter_units_hook" ) - + def _clean_records(self, records) -> list: """ - Remove duplicate TDS records. This is called on a site-by-site basis so does not need to account for + Remove duplicate TDS records. This is called on a site-by-site basis so does not need to account for different sites having observations on the same date. """ if self.config.parameter == TDS: @@ -197,7 +197,9 @@ def _clean_records(self, records) -> list: for date in dates: # get all records for this date date_records = { - record["USGSPCode"]: record for record in records if record["ActivityStartDate"] == date + record["USGSPCode"]: record + for record in records + if record["ActivityStartDate"] == date } if len(date_records.items()) > 1: if "70301" in date_records.keys(): From 15b7c9d4f72bc0e887036aff0a2a43b2bb0d8a03 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Mon, 19 May 2025 17:30:57 -0600 Subject: [PATCH 03/16] mypy and _clean_records fix --- backend/connectors/wqp/source.py | 60 +++++++++++++++----------------- 1 file changed, 28 insertions(+), 32 deletions(-) diff --git a/backend/connectors/wqp/source.py b/backend/connectors/wqp/source.py index e971954..762b825 100644 --- a/backend/connectors/wqp/source.py +++ b/backend/connectors/wqp/source.py @@ -132,7 +132,34 @@ def _extract_source_parameter_results(self, records): return [ri["ResultMeasureValue"] for ri in records] def _clean_records(self, records): - return [ri for ri in records if ri["ResultMeasureValue"]] + records_with_values = [r for r in records if r["ResultMeasureValue"]] + + if self.config.parameter == TDS and len(records_with_values) > 1: + site_id = records_with_values[0]["MonitoringLocationIdentifier"] + return_records = [] + dates = [record["ActivityStartDate"] for record in records] + dates = list(set(dates)) + for date in dates: + # get all records for this date + date_records = { + record["USGSPCode"]: record for record in records if record["ActivityStartDate"] == date + } + if len(date_records.items()) > 1: + if "70301" in date_records.keys(): + kept_record = date_records["70301"] + elif "70303" in date_records.keys(): + kept_record = date_records["70303"] + else: + raise ValueError( + f"Multiple TDS records found for {site_id} on date {date} but no 70301 or 70303 pcodes found." + ) + else: + kept_record = list(date_records.values())[0] + return_records.append(kept_record) + return return_records + else: + return records_with_values + def _extract_source_parameter_units(self, records): return [ri["ResultMeasure/MeasureUnitCode"] for ri in records] @@ -183,37 +210,6 @@ def _parameter_units_hook(self): raise NotImplementedError( f"{self.__class__.__name__} must implement _parameter_units_hook" ) - - def _clean_records(self, records) -> list: - """ - Remove duplicate TDS records. This is called on a site-by-site basis so does not need to account for - different sites having observations on the same date. - """ - if self.config.parameter == TDS: - site_id = records[0]["MonitoringLocationIdentifier"] - return_records = [] - dates = [record["ActivityStartDate"] for record in records] - dates = list(set(dates)) - for date in dates: - # get all records for this date - date_records = { - record["USGSPCode"]: record for record in records if record["ActivityStartDate"] == date - } - if len(date_records.items()) > 1: - if "70301" in date_records.keys(): - kept_record = date_records["70301"] - elif "70303" in date_records.keys(): - kept_record = date_records["70303"] - else: - raise ValueError( - f"Multiple TDS records found for {site_id} on date {date} but no 70301 or 70303 pcodes found." - ) - else: - kept_record = list(date_records.values())[0] - return_records.append(kept_record) - return return_records - else: - return super()._clean_records(records) class WQPAnalyteSource(WQPParameterSource, BaseAnalyteSource): From 21dd9aca9f698bf898d861dea140a31af1256266 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Mon, 19 May 2025 17:39:56 -0600 Subject: [PATCH 04/16] log removing duplicate TDS data for WQP --- backend/connectors/wqp/source.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/backend/connectors/wqp/source.py b/backend/connectors/wqp/source.py index 85489af..80242dc 100644 --- a/backend/connectors/wqp/source.py +++ b/backend/connectors/wqp/source.py @@ -147,12 +147,15 @@ def _clean_records(self, records): if len(date_records.items()) > 1: if "70301" in date_records.keys(): kept_record = date_records["70301"] + pcode = "70301" elif "70303" in date_records.keys(): kept_record = date_records["70303"] + pcode = "70303" else: raise ValueError( f"Multiple TDS records found for {site_id} on date {date} but no 70301 or 70303 pcodes found." ) + self.log(f"Removing duplicates for {site_id} on date {date}. Keeping record with pcode {pcode}.") else: kept_record = list(date_records.values())[0] return_records.append(kept_record) From d2df6f4ac1293adac3f648a7d898dd1d5a0ce030 Mon Sep 17 00:00:00 2001 From: jacob-a-brown Date: Mon, 19 May 2025 23:41:22 +0000 Subject: [PATCH 05/16] Formatting changes --- backend/connectors/wqp/source.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/backend/connectors/wqp/source.py b/backend/connectors/wqp/source.py index 80242dc..3b581c6 100644 --- a/backend/connectors/wqp/source.py +++ b/backend/connectors/wqp/source.py @@ -142,7 +142,9 @@ def _clean_records(self, records): for date in dates: # get all records for this date date_records = { - record["USGSPCode"]: record for record in records if record["ActivityStartDate"] == date + record["USGSPCode"]: record + for record in records + if record["ActivityStartDate"] == date } if len(date_records.items()) > 1: if "70301" in date_records.keys(): @@ -155,7 +157,9 @@ def _clean_records(self, records): raise ValueError( f"Multiple TDS records found for {site_id} on date {date} but no 70301 or 70303 pcodes found." ) - self.log(f"Removing duplicates for {site_id} on date {date}. Keeping record with pcode {pcode}.") + self.log( + f"Removing duplicates for {site_id} on date {date}. Keeping record with pcode {pcode}." + ) else: kept_record = list(date_records.values())[0] return_records.append(kept_record) @@ -163,7 +167,6 @@ def _clean_records(self, records): else: return records_with_values - def _extract_source_parameter_units(self, records): return [ri["ResultMeasure/MeasureUnitCode"] for ri in records] From 572139ef87f12eb5dac81f3063dd769467bb189a Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Tue, 20 May 2025 09:36:14 -0600 Subject: [PATCH 06/16] use ActivityIdentifier to identify duplicate records --- backend/connectors/wqp/source.py | 49 ++++++++++++++++++-------------- backend/constants.py | 5 ++++ 2 files changed, 33 insertions(+), 21 deletions(-) diff --git a/backend/connectors/wqp/source.py b/backend/connectors/wqp/source.py index 3b581c6..dd0be86 100644 --- a/backend/connectors/wqp/source.py +++ b/backend/connectors/wqp/source.py @@ -30,6 +30,10 @@ LATEST, TDS, WATERLEVELS, + USGS_PCODE_30210, + USGS_PCODE_70300, + USGS_PCODE_70301, + USGS_PCODE_70303, ) from backend.connectors.wqp.transformer import ( WQPSiteTransformer, @@ -99,7 +103,7 @@ def get_records(self): else: # every record with pCode 30210 (depth in m) has a corresponding # record with pCode 72019 (depth in ft) but not vice versa - params["pCode"] = "30210" + params["pCode"] = USGS_PCODE_30210 params.update(get_date_range(config)) @@ -132,40 +136,43 @@ def _extract_source_parameter_results(self, records): return [ri["ResultMeasureValue"] for ri in records] def _clean_records(self, records): - records_with_values = [r for r in records if r["ResultMeasureValue"]] + clean_records = [r for r in records if r["ResultMeasureValue"]] - if self.config.parameter == TDS and len(records_with_values) > 1: - site_id = records_with_values[0]["MonitoringLocationIdentifier"] + if self.config.parameter == TDS and len(clean_records) > 1: + site_id = clean_records[0]["MonitoringLocationIdentifier"] return_records = [] - dates = [record["ActivityStartDate"] for record in records] - dates = list(set(dates)) - for date in dates: - # get all records for this date - date_records = { + activity_identifiers = [record["ActivityIdentifier"] for record in records] + activity_identifiers = list(set(activity_identifiers)) + for activity_identifier in activity_identifiers: + # get all records for this activity identifier + ai_records = { record["USGSPCode"]: record for record in records - if record["ActivityStartDate"] == date + if record["ActivityIdentifier"] == activity_identifier } - if len(date_records.items()) > 1: - if "70301" in date_records.keys(): - kept_record = date_records["70301"] - pcode = "70301" - elif "70303" in date_records.keys(): - kept_record = date_records["70303"] - pcode = "70303" + if len(ai_records.items()) > 1: + if USGS_PCODE_70300 in ai_records.keys(): + kept_record = ai_records[USGS_PCODE_70300] + pcode = USGS_PCODE_70300 + elif USGS_PCODE_70301 in ai_records.keys(): + kept_record = ai_records[USGS_PCODE_70301] + pcode = USGS_PCODE_70301 + elif USGS_PCODE_70303 in ai_records.keys(): + kept_record = ai_records[USGS_PCODE_70303] + pcode = USGS_PCODE_70303 else: raise ValueError( - f"Multiple TDS records found for {site_id} on date {date} but no 70301 or 70303 pcodes found." + f"Multiple TDS records found for {site_id} with ActivityIdentifier {activity_identifier} but no 70300, 70301, or 70303 pcodes found." ) self.log( - f"Removing duplicates for {site_id} on date {date}. Keeping record with pcode {pcode}." + f"Removing duplicates for {site_id} with ActivityIdentifier {activity_identifier}. Keeping record with pcode {pcode}." ) else: - kept_record = list(date_records.values())[0] + kept_record = list(ai_records.values())[0] return_records.append(kept_record) return return_records else: - return records_with_values + return clean_records def _extract_source_parameter_units(self, records): return [ri["ResultMeasure/MeasureUnitCode"] for ri in records] diff --git a/backend/constants.py b/backend/constants.py index b7635ab..2482900 100644 --- a/backend/constants.py +++ b/backend/constants.py @@ -57,6 +57,11 @@ SOURCE_PARAMETER_UNITS = "source_parameter_units" CONVERSION_FACTOR = "conversion_factor" +USGS_PCODE_30210 = "30210" +USGS_PCODE_70300 = "70300" +USGS_PCODE_70301 = "70301" +USGS_PCODE_70303 = "70303" + ANALYTE_OPTIONS = sorted( [ ARSENIC, From 54a74b9625a2ad355a1157c84c10b48e7bddcacc Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Tue, 20 May 2025 16:33:30 -0600 Subject: [PATCH 07/16] bump version to 0.9.6 for duplicate handling fix --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 2675e8e..c847cd6 100644 --- a/setup.py +++ b/setup.py @@ -22,7 +22,7 @@ setup( name="nmuwd", - version="0.9.5", + version="0.9.6", author="Jake Ross", description="New Mexico Water Data Integration Engine", long_description=long_description, From ed5cc4199e9fe60680f27c67a4445777d8deb46e Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Tue, 20 May 2025 16:58:21 -0600 Subject: [PATCH 08/16] remove -rx flag to make parsing logs easier --- .github/workflows/cicd.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cicd.yml b/.github/workflows/cicd.yml index 05fe71e..d88a5fc 100644 --- a/.github/workflows/cicd.yml +++ b/.github/workflows/cicd.yml @@ -53,4 +53,4 @@ jobs: - name: Test with pytest run: | - pytest -s -rx tests + pytest -s tests From 3efe7984eb30e63ed89a9025e1af0dbe93e641cc Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Tue, 20 May 2025 17:13:10 -0600 Subject: [PATCH 09/16] test tests --- .github/workflows/cicd.yml | 2 +- tests/test_cli/__init__.py | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/cicd.yml b/.github/workflows/cicd.yml index d88a5fc..991d888 100644 --- a/.github/workflows/cicd.yml +++ b/.github/workflows/cicd.yml @@ -53,4 +53,4 @@ jobs: - name: Test with pytest run: | - pytest -s tests + pytest -sx tests/test_cli diff --git a/tests/test_cli/__init__.py b/tests/test_cli/__init__.py index 4d342ae..69bb8fb 100644 --- a/tests/test_cli/__init__.py +++ b/tests/test_cli/__init__.py @@ -102,6 +102,8 @@ def _test_weave( # Act result = self.runner.invoke(weave, arguments, standalone_mode=False) + print(result.output) + print(result.__dir__) # Assert assert result.exit_code == 0 From 503cbde69446fd802e548934c5bf405b27b14e8a Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Tue, 20 May 2025 17:16:53 -0600 Subject: [PATCH 10/16] more test tests --- tests/test_cli/__init__.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/tests/test_cli/__init__.py b/tests/test_cli/__init__.py index 69bb8fb..d27f1e7 100644 --- a/tests/test_cli/__init__.py +++ b/tests/test_cli/__init__.py @@ -102,8 +102,13 @@ def _test_weave( # Act result = self.runner.invoke(weave, arguments, standalone_mode=False) + print(result) print(result.output) print(result.__dir__) + print(result.__dict__) + print(result.stdout) + print(result.stderr) + # Assert assert result.exit_code == 0 From d28a6f6e0321e74db0dc56eb1a2add4509ab8f64 Mon Sep 17 00:00:00 2001 From: jacob-a-brown Date: Tue, 20 May 2025 23:18:22 +0000 Subject: [PATCH 11/16] Formatting changes --- tests/test_cli/__init__.py | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/test_cli/__init__.py b/tests/test_cli/__init__.py index d27f1e7..5d9a4bd 100644 --- a/tests/test_cli/__init__.py +++ b/tests/test_cli/__init__.py @@ -108,7 +108,6 @@ def _test_weave( print(result.__dict__) print(result.stdout) print(result.stderr) - # Assert assert result.exit_code == 0 From c82ab3547a038993906c68f1a6e15d98d115bce4 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Tue, 20 May 2025 17:31:00 -0600 Subject: [PATCH 12/16] make output format case insensitive --- frontend/cli.py | 4 +- tests/test_cli/__init__.py | 140 ++++++++++++++++++------------------- 2 files changed, 71 insertions(+), 73 deletions(-) diff --git a/frontend/cli.py b/frontend/cli.py index 879e5d3..3efb46b 100644 --- a/frontend/cli.py +++ b/frontend/cli.py @@ -187,11 +187,11 @@ def cli(): ) ] -OUTPUT_FORMATS = sorted([value for value in OutputFormat]) +OUTPUT_FORMATS = sorted([of for of in OutputFormat]) OUTPUT_FORMAT_OPTIONS = [ click.option( "--output-format", - type=click.Choice(OUTPUT_FORMATS), + type=click.Choice(OUTPUT_FORMATS, case_sensitive=False), default="csv", help=f"Output file format for sites: {OUTPUT_FORMATS}. Default is csv", ) diff --git a/tests/test_cli/__init__.py b/tests/test_cli/__init__.py index d27f1e7..b1a6977 100644 --- a/tests/test_cli/__init__.py +++ b/tests/test_cli/__init__.py @@ -102,83 +102,81 @@ def _test_weave( # Act result = self.runner.invoke(weave, arguments, standalone_mode=False) - print(result) - print(result.output) - print(result.__dir__) - print(result.__dict__) - print(result.stdout) - print(result.stderr) - # Assert - assert result.exit_code == 0 - - """ - For the config, check that - - 0. (set output dir to clean up tests results even in event of failure) - 1. The parameter is set correctly - 2. The agencies are set correctly - 3. The output types are set correctly - 4. The site limit is set correctly - 5. The dry is set correctly - 6. The start date is set correctly - 7. The end date is set correctly - 8. The geographic filter is set correctly - 9. The site output type is set correctly - """ - config = result.return_value - - # 0 - self.output_dir = Path(config.output_path) - - # 1 - assert getattr(config, "parameter") == parameter - - # 2 - agency_with_underscore = self.agency.replace("-", "_") - if self.agency_reports_parameter[parameter]: - assert getattr(config, f"use_source_{agency_with_underscore}") is True - else: - assert getattr(config, f"use_source_{agency_with_underscore}") is False - - for no_agency in no_agencies: - no_agency_with_underscore = no_agency.replace("--no-", "").replace("-", "_") - assert getattr(config, f"use_source_{no_agency_with_underscore}") is False - - # 3 - output_types = ["summary", "timeseries_unified", "timeseries_separated"] - for ot in output_types: - if ot == output_type: - assert getattr(config, f"output_{ot}") is True + try: + assert result.exit_code == 0 + + + """ + For the config, check that + + 0. (set output dir to clean up tests results even in event of failure) + 1. The parameter is set correctly + 2. The agencies are set correctly + 3. The output types are set correctly + 4. The site limit is set correctly + 5. The dry is set correctly + 6. The start date is set correctly + 7. The end date is set correctly + 8. The geographic filter is set correctly + 9. The site output type is set correctly + """ + config = result.return_value + + # 0 + self.output_dir = Path(config.output_path) + + # 1 + assert getattr(config, "parameter") == parameter + + # 2 + agency_with_underscore = self.agency.replace("-", "_") + if self.agency_reports_parameter[parameter]: + assert getattr(config, f"use_source_{agency_with_underscore}") is True else: - assert getattr(config, f"output_{ot}") is False + assert getattr(config, f"use_source_{agency_with_underscore}") is False - # 4 - assert getattr(config, "site_limit") == 4 + for no_agency in no_agencies: + no_agency_with_underscore = no_agency.replace("--no-", "").replace("-", "_") + assert getattr(config, f"use_source_{no_agency_with_underscore}") is False - # 5 - assert getattr(config, "dry") is True - - # 6 - assert getattr(config, "start_date") == start_date - - # 7 - assert getattr(config, "end_date") == end_date - - # 8 - if geographic_filter_name and geographic_filter_value: - for _geographic_filter_name in ["bbox", "county", "wkt"]: - if _geographic_filter_name == geographic_filter_name: - assert ( - getattr(config, _geographic_filter_name) - == geographic_filter_value - ) + # 3 + output_types = ["summary", "timeseries_unified", "timeseries_separated"] + for ot in output_types: + if ot == output_type: + assert getattr(config, f"output_{ot}") is True else: - assert getattr(config, _geographic_filter_name) == "" - - # 9 - assert getattr(config, "output_format") == output_format + assert getattr(config, f"output_{ot}") is False + + # 4 + assert getattr(config, "site_limit") == 4 + + # 5 + assert getattr(config, "dry") is True + + # 6 + assert getattr(config, "start_date") == start_date + + # 7 + assert getattr(config, "end_date") == end_date + + # 8 + if geographic_filter_name and geographic_filter_value: + for _geographic_filter_name in ["bbox", "county", "wkt"]: + if _geographic_filter_name == geographic_filter_name: + assert ( + getattr(config, _geographic_filter_name) + == geographic_filter_value + ) + else: + assert getattr(config, _geographic_filter_name) == "" + + # 9 + assert getattr(config, "output_format") == output_format + except Exception as e: + print(result) + assert False def test_weave_summary(self): self._test_weave(parameter=WATERLEVELS, output_type="summary") From 9203a5b63f0f722b4c731fa093bb70924bd3c7da Mon Sep 17 00:00:00 2001 From: jacob-a-brown Date: Tue, 20 May 2025 23:33:35 +0000 Subject: [PATCH 13/16] Formatting changes --- tests/test_cli/__init__.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/tests/test_cli/__init__.py b/tests/test_cli/__init__.py index b1a6977..2029c38 100644 --- a/tests/test_cli/__init__.py +++ b/tests/test_cli/__init__.py @@ -106,7 +106,6 @@ def _test_weave( # Assert try: assert result.exit_code == 0 - """ For the config, check that @@ -138,8 +137,12 @@ def _test_weave( assert getattr(config, f"use_source_{agency_with_underscore}") is False for no_agency in no_agencies: - no_agency_with_underscore = no_agency.replace("--no-", "").replace("-", "_") - assert getattr(config, f"use_source_{no_agency_with_underscore}") is False + no_agency_with_underscore = no_agency.replace("--no-", "").replace( + "-", "_" + ) + assert ( + getattr(config, f"use_source_{no_agency_with_underscore}") is False + ) # 3 output_types = ["summary", "timeseries_unified", "timeseries_separated"] From 781f57734b8db5f6fa4af45505ff22b43ee1c1e2 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Tue, 20 May 2025 17:33:50 -0600 Subject: [PATCH 14/16] test cli and sources in GitHub Actions --- .github/workflows/cicd.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cicd.yml b/.github/workflows/cicd.yml index 991d888..907a1c0 100644 --- a/.github/workflows/cicd.yml +++ b/.github/workflows/cicd.yml @@ -53,4 +53,4 @@ jobs: - name: Test with pytest run: | - pytest -sx tests/test_cli + pytest -sx tests From 99cf02b48fedc4b7650e051ff66c223a5f6a6f85 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Tue, 20 May 2025 17:38:49 -0600 Subject: [PATCH 15/16] complete all tests even if failure --- .github/workflows/cicd.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cicd.yml b/.github/workflows/cicd.yml index 907a1c0..d88a5fc 100644 --- a/.github/workflows/cicd.yml +++ b/.github/workflows/cicd.yml @@ -53,4 +53,4 @@ jobs: - name: Test with pytest run: | - pytest -sx tests + pytest -s tests From 93880d9100fcb47fcf5af08fc3632415917e1ce7 Mon Sep 17 00:00:00 2001 From: Jacob Brown Date: Wed, 21 May 2025 12:52:39 -0600 Subject: [PATCH 16/16] log date of duplicate records removed --- backend/connectors/wqp/source.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/backend/connectors/wqp/source.py b/backend/connectors/wqp/source.py index dd0be86..926b2b4 100644 --- a/backend/connectors/wqp/source.py +++ b/backend/connectors/wqp/source.py @@ -164,8 +164,9 @@ def _clean_records(self, records): raise ValueError( f"Multiple TDS records found for {site_id} with ActivityIdentifier {activity_identifier} but no 70300, 70301, or 70303 pcodes found." ) + record_date = kept_record["ActivityStartDate"] self.log( - f"Removing duplicates for {site_id} with ActivityIdentifier {activity_identifier}. Keeping record with pcode {pcode}." + f"Removing duplicates for {site_id} on {record_date} with ActivityIdentifier {activity_identifier}. Keeping record with pcode {pcode}." ) else: kept_record = list(ai_records.values())[0]