diff --git a/.gitignore b/.gitignore index e51bca5e..3bfeecc1 100644 --- a/.gitignore +++ b/.gitignore @@ -46,3 +46,6 @@ local.properties # InelliJ .idea + +local_data +*~ diff --git a/ols-geocoder-admin/pom.xml b/ols-geocoder-admin/pom.xml index eea64378..bedfdb07 100644 --- a/ols-geocoder-admin/pom.xml +++ b/ols-geocoder-admin/pom.xml @@ -3,7 +3,7 @@ ca.bc.gov.ols ols-geocoder - 4.5.2 + 4.5.3 OLS Geocoder Admin diff --git a/ols-geocoder-core/pom.xml b/ols-geocoder-core/pom.xml index a7f255d1..daf16f59 100644 --- a/ols-geocoder-core/pom.xml +++ b/ols-geocoder-core/pom.xml @@ -6,7 +6,7 @@ ca.bc.gov.ols ols-geocoder - 4.5.2 + 4.5.3 OLS Geocoder Core diff --git a/ols-geocoder-core/src/main/java/ca/bc/gov/ols/geocoder/Geocoder.java b/ols-geocoder-core/src/main/java/ca/bc/gov/ols/geocoder/Geocoder.java index 97247cb5..6bd117d6 100644 --- a/ols-geocoder-core/src/main/java/ca/bc/gov/ols/geocoder/Geocoder.java +++ b/ols-geocoder-core/src/main/java/ca/bc/gov/ols/geocoder/Geocoder.java @@ -252,12 +252,43 @@ public SearchResults geocode(GeocodeQuery query) { } if(query.isFuzzyMatch() && query.getAddressString() != null && !query.getAddressString().isEmpty()) { - // sort by fuzzy score (higher fuzzy score is better) + // When fuzzy matching is enabled, we need to sort results more intelligently + // than just by fuzzy score alone. We prioritize: + // 1. Exact locality matches (no penalty) over prefix matches (with penalty) + // 2. LOCALITY precision matches for simple word queries without numbers + // 3. Fuzzy score within each priority group (case-insensitive) + final String queryStr = query.getAddressString(); + final String normalizedInput = queryStr.toLowerCase(); matches.sort( - Comparator.comparingInt((GeocodeMatch match) -> - FuzzySearch.ratio(query.getAddressString(), match.getAddressString()) - ).reversed() + Comparator + // First, prioritize matches without locality partialMatch faults (exact matches) + .comparing((GeocodeMatch match) -> { + for(MatchFault fault : match.getFaults()) { + if(fault.getElement() == MatchFault.MatchElement.LOCALITY + && fault.getFault().equals("partialMatch") + && fault.getPenalty() > 0) { + return 1; // Deprioritize prefix matches + } + } + return 0; // Prioritize exact matches + }) + // Second, for locality-only queries, prioritize LOCALITY precision matches + .thenComparing((GeocodeMatch match) -> { + // Check if this is likely a locality-only query (simple word(s), no numbers) + boolean likelyLocalityQuery = !queryStr.matches(".*\\d+.*"); + // If it's a locality query and this is a LOCALITY match, boost it + if (likelyLocalityQuery && match.getPrecision() == MatchPrecision.LOCALITY) { + return 0; // Higher priority + } + return 1; // Lower priority + }) + // Then sort by fuzzy score (higher is better, case-insensitive) + .thenComparing((GeocodeMatch match) -> + FuzzySearch.ratio(normalizedInput, match.getAddressString().toLowerCase()), + Comparator.reverseOrder() + ) ); + // limit to maxResults matches = matches.subList(0, Math.min(query.getMaxResults(), matches.size())); } diff --git a/ols-geocoder-core/src/main/java/ca/bc/gov/ols/geocoder/GeocoderDataStore.java b/ols-geocoder-core/src/main/java/ca/bc/gov/ols/geocoder/GeocoderDataStore.java index c41c4e8a..154d3cff 100644 --- a/ols-geocoder-core/src/main/java/ca/bc/gov/ols/geocoder/GeocoderDataStore.java +++ b/ols-geocoder-core/src/main/java/ca/bc/gov/ols/geocoder/GeocoderDataStore.java @@ -1461,6 +1461,8 @@ private Map buildAbbreviationMappings( // Special case to handle the fact that Rue needs to map to St for french // but Rue is also a valid street type on its own wordMapBuilder.addWordMapping("Rue", "St"); + // Add Wye as a valid street type (railway term for Y-shaped track junction) + wordMapBuilder.addWord("Wye", WordClass.STREET_TYPE); // Add directionals rr = dataSource.getStreetDirs(); diff --git a/ols-geocoder-core/src/main/java/ca/bc/gov/ols/geocoder/api/GeocodeQuery.java b/ols-geocoder-core/src/main/java/ca/bc/gov/ols/geocoder/api/GeocodeQuery.java index 2d8f60aa..79876ccf 100644 --- a/ols-geocoder-core/src/main/java/ca/bc/gov/ols/geocoder/api/GeocodeQuery.java +++ b/ols-geocoder-core/src/main/java/ca/bc/gov/ols/geocoder/api/GeocodeQuery.java @@ -58,6 +58,7 @@ public class GeocodeQuery extends SharedParameters{ private String streetQualifier; private String localityName; private String stateProvTerr; + private String tagCondition; private int minScore = 0; private EnumSet matchPrecision = null; @@ -428,6 +429,14 @@ public void setExactSpelling(boolean exactSpelling) { this.exactSpelling = exactSpelling; } + public String getTagCondition() { + return tagCondition; + } + + public void setTagCondition(String tagCondition) { + this.tagCondition = tagCondition; + } + public int getNumPrelimResults() { if(fuzzyMatch) { return 100; @@ -470,12 +479,45 @@ public boolean pass(GeocodeMatch match) { filters.add(new Filter() { @Override public boolean pass(GeocodeMatch match) { + if(match instanceof AddressMatch - && ((AddressMatch)match).getAddress() instanceof OccupantAddress - && ((OccupantAddress)(((AddressMatch)match).getAddress())).getKeywordList() - .containsAll(Arrays.asList(tags.toLowerCase().split(";"))) - ) { - return true; + && ((AddressMatch)match).getAddress() instanceof OccupantAddress) { + List keywords = ((OccupantAddress)(((AddressMatch)match).getAddress())).getKeywordList(); + if(keywords == null || keywords.isEmpty()) { + return false; + } + String lowerTags = tags.toLowerCase(); + + boolean useOr = true; + if(tagCondition != null && !tagCondition.trim().isEmpty()) { + String c = tagCondition.trim().toLowerCase(); + if(c.contains("or")) { + useOr = true; + } else if(c.contains("and")) { + useOr = false; + } + } + + if(useOr) { + String[] tagArray = lowerTags.split(";"); + System.out.println(">>>>>>>> keywords: " + keywords + " : " + Arrays.toString(tagArray)); + for(String t : tagArray) { + t = t.trim(); + if(keywords.contains(t)) { + System.out.println(">>>>>>>> matched tag: " + t); + return true; + } + } + return false; + } else { + // default + String[] tagArray = lowerTags.split(";"); + List required = new ArrayList(tagArray.length); + for(String t : tagArray) { + required.add(t.trim()); + } + return keywords.containsAll(required); + } } return false; } diff --git a/ols-geocoder-process/pom.xml b/ols-geocoder-process/pom.xml index 1dec3559..82bc5d5a 100644 --- a/ols-geocoder-process/pom.xml +++ b/ols-geocoder-process/pom.xml @@ -4,7 +4,7 @@ ca.bc.gov.ols ols-geocoder - 4.5.2 + 4.5.3 ols-geocoder-process OLS Geocoder Process diff --git a/ols-geocoder-process/src/main/java/ca/bc/gov/ols/siteloaderprep/RawStreetName.java b/ols-geocoder-process/src/main/java/ca/bc/gov/ols/siteloaderprep/RawStreetName.java index b4ce81cc..0ae46356 100644 --- a/ols-geocoder-process/src/main/java/ca/bc/gov/ols/siteloaderprep/RawStreetName.java +++ b/ols-geocoder-process/src/main/java/ca/bc/gov/ols/siteloaderprep/RawStreetName.java @@ -8,6 +8,7 @@ public class RawStreetName { public String qual; public boolean typeIsPrefix; public boolean dirIsPrefix; + public boolean isPrivateRoad; // indicates if this street name is a private road name @Override public String toString() { diff --git a/ols-geocoder-process/src/main/java/ca/bc/gov/ols/streetprep/StreetPrep.java b/ols-geocoder-process/src/main/java/ca/bc/gov/ols/streetprep/StreetPrep.java index 3e9e258d..e96fc1ae 100644 --- a/ols-geocoder-process/src/main/java/ca/bc/gov/ols/streetprep/StreetPrep.java +++ b/ols-geocoder-process/src/main/java/ca/bc/gov/ols/streetprep/StreetPrep.java @@ -851,6 +851,13 @@ private TIntObjectMap readStreetNames(Map streetT name.body = rr.getString("NAME_BODY"); name.qual = rr.getString("NAME_DESCRIPTOR_CODE"); + + if(rr.getString("FULL_NAME") != null && (rr.getString("FULL_NAME").toLowerCase().contains("private rd") || + rr.getString("FULL_NAME").toLowerCase().contains("private road"))) { + // special case for "Private Rd" + name.isPrivateRoad = true; + } + if(name.qual != null) { String newQual = streetQualMap.get(name.qual); if(newQual == null) { @@ -1179,16 +1186,43 @@ private void writeStreetSegments(TIntObjectMap segMap) { private void writeStreetNameOnSegs(TIntObjectMap segMap, TIntObjectMap streetNameIdMap) { try(RowWriter rw = new JsonRowWriter(new File(outputDir + STREET_LOAD_STREET_NAME_ON_SEG_XREF_FILE), "bgeo_street_name_on_seg_xref", dates)) { TIntObjectIterator segIterator = segMap.iterator(); + // find the name ID for "private road" and "private rd" from the streetNameIdMap + // streetNameIdMap is keyed by STRUCTURED_NAME_ID, so we need to iterate through it to find the ID (the value is a RawStreetName. It has isPrivateRoad boolean) + int privateRoadNameId = -1; + for(TIntObjectIterator nameIterator = streetNameIdMap.iterator(); nameIterator.hasNext(); ) { + nameIterator.advance(); + RawStreetName name = nameIterator.value(); + if(name.isPrivateRoad) { + privateRoadNameId = name.id; + break; + } + } + while(segIterator.hasNext()) { segIterator.advance(); RawStreetSeg seg = segIterator.value(); - for(int nameIdx = 0; nameIdx < seg.nameIds.size(); nameIdx++) { + // new logic. Use STREET_SEGMENT_ID_2 if STREET_SEGMENT_ID_1 is "private road" or "private rd" + // and STREET_SEGMENT_ID_2 is not null. Please note the IDs are just number. + // Note: if the value was null in the source data, it will not be in the nameIds list. + // case 1: nameId 1 is private road and nameId 2 is not null -> use nameId 2 as primary + if(seg.nameIds.size() >= 2 && seg.nameIds.get(0) == privateRoadNameId) { Map row = new THashMap(); - row.put("STREET_NAME_ID", seg.nameIds.get(nameIdx)); + row.put("STREET_NAME_ID", seg.nameIds.get(1)); row.put("STREET_SEGMENT_ID", seg.streetSegmentId); - row.put("IS_PRIMARY_IND", nameIdx == 0); + row.put("IS_PRIMARY_IND", true); rw.writeRow(row); } + else{ + // case 2: nameId 1 is private road and nameId 2 is null -> do not change + // case 3: nameId 1 is not private road -> do not change + for(int nameIdx = 0; nameIdx < seg.nameIds.size(); nameIdx++) { + Map row = new THashMap(); + row.put("STREET_NAME_ID", seg.nameIds.get(nameIdx)); + row.put("STREET_SEGMENT_ID", seg.streetSegmentId); + row.put("IS_PRIMARY_IND", nameIdx == 0); + rw.writeRow(row); + } + } } } } diff --git a/ols-geocoder-web/pom.xml b/ols-geocoder-web/pom.xml index ad396dd9..78c87f2a 100644 --- a/ols-geocoder-web/pom.xml +++ b/ols-geocoder-web/pom.xml @@ -6,7 +6,7 @@ ca.bc.gov.ols ols-geocoder - 4.5.2 + 4.5.3 OLS Geocoder Web diff --git a/pom.xml b/pom.xml index f96e63e4..e0d2d632 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ ca.bc.gov.ols ols-geocoder - 4.5.2 + 4.5.3 pom OLS Geocoder https://bcgov.github.io/ols-geocoder/