Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
52 commits
Select commit Hold shift + click to select a range
d40503b
Updated service with a lot of improvements:
krickert Aug 25, 2024
f263573
temp commit
krickert Aug 26, 2024
1d764c5
Merge branch 'main' of github.com:krickert/rag-models
krickert Aug 26, 2024
533e5fe
minor fixes to proto
krickert Aug 26, 2024
20a17d3
added health checks
krickert Aug 27, 2024
baceefe
Merge branch 'main' of github.com:krickert/rag-models
krickert Aug 27, 2024
893a0f1
Bump org.apache.maven.plugins:maven-surefire-plugin from 3.4.0 to 3.5.0
dependabot[bot] Aug 27, 2024
70a3faa
added new configuration fields for authentication with solr on the in…
krickert Sep 12, 2024
a93fe77
new features: filters, start from, max chars
krickert Sep 12, 2024
9343112
Search proto
krickert Sep 17, 2024
cd0ca30
Bump commons-io:commons-io from 2.16.1 to 2.17.0
dependabot[bot] Sep 18, 2024
b5d78de
Bump grpc.version from 1.66.0 to 1.68.0
dependabot[bot] Sep 20, 2024
e0b0deb
Merge pull request #13 from krickert/dependabot/maven/grpc.version-1.…
krickert Sep 24, 2024
1d246d9
Merge pull request #12 from krickert/dependabot/maven/commons-io-comm…
krickert Sep 24, 2024
18354f0
Bump org.apache.commons:commons-lang3 from 3.16.0 to 3.17.0
dependabot[bot] Sep 24, 2024
a609888
Merge pull request #9 from krickert/dependabot/maven/org.apache.commo…
krickert Sep 24, 2024
5265e6d
Merge pull request #7 from krickert/dependabot/maven/org.apache.maven…
krickert Sep 24, 2024
27b83c0
deleted transative dependency
krickert Sep 24, 2024
b64a7f4
Merge branch 'main' of github.com:krickert/rag-models
krickert Sep 24, 2024
7acaa08
Bump com.google.guava:guava from 33.3.0-jre to 33.3.1-jre
dependabot[bot] Sep 24, 2024
5e0cf57
Merge pull request #14 from krickert/dependabot/maven/com.google.guav…
krickert Sep 25, 2024
6345b67
refactored where the files are pointing to because we are taking wiki…
krickert Sep 25, 2024
7d4881a
Merge branch 'main' of github.com:krickert/rag-models
krickert Sep 25, 2024
c7df060
added search api
krickert Sep 25, 2024
806ce1a
Merge branch 'main' of https://github.com/krickert/rag-models
krickert Sep 29, 2024
f6ce12e
trying first search api definition
krickert Sep 30, 2024
2a90c1c
updated search api for better features
krickert Sep 30, 2024
c900fd7
refactored search api per advanced specs
krickert Oct 2, 2024
76e8ee0
fixed facet design issue in the response.
krickert Oct 3, 2024
b078381
made the tests more generic and loosened up the highlighting rules
krickert Oct 4, 2024
a5b965a
Bump org.junit.jupiter:junit-jupiter-engine from 5.11.0 to 5.11.2
dependabot[bot] Oct 5, 2024
7c674d9
added boost queries
krickert Oct 7, 2024
c38bdb4
Bump org.apache.maven.plugins:maven-surefire-plugin from 3.5.0 to 3.5.1
dependabot[bot] Oct 7, 2024
580f506
more features
krickert Oct 10, 2024
e618960
updated to have doc metadata
krickert Oct 10, 2024
3b6144e
Merge pull request #17 from krickert/dependabot/maven/org.apache.mave…
krickert Oct 11, 2024
263db38
Merge pull request #16 from krickert/dependabot/maven/org.junit.jupit…
krickert Oct 11, 2024
147932c
Bump org.junit.jupiter:junit-jupiter-engine from 5.11.2 to 5.11.3
dependabot[bot] Oct 21, 2024
d38a986
confused why intellij is freaking out over this.
krickert Oct 29, 2024
9abb069
Merge remote-tracking branch 'origin/main'
krickert Oct 29, 2024
238c317
Bump grpc.version from 1.68.0 to 1.68.1
dependabot[bot] Oct 29, 2024
0622101
Bump com.fasterxml.jackson.core:jackson-databind from 2.18.0 to 2.18.1
dependabot[bot] Oct 29, 2024
9534f2c
Bump org.apache.maven.plugins:maven-surefire-plugin from 3.5.1 to 3.5.2
dependabot[bot] Nov 4, 2024
10a4123
fixed some number types
krickert Nov 6, 2024
fcbfd4b
updated example to convert
krickert Nov 6, 2024
984b054
updated example to convert
krickert Nov 7, 2024
e53163c
Merge pull request #22 from krickert/dependabot/maven/org.apache.mave…
krickert Nov 16, 2024
424fbe3
Merge pull request #20 from krickert/dependabot/maven/com.fasterxml.j…
krickert Nov 16, 2024
4e4db9c
Merge pull request #19 from krickert/dependabot/maven/grpc.version-1.…
krickert Nov 16, 2024
f00deb0
Merge branch 'main' into dependabot/maven/org.junit.jupiter-junit-jup…
krickert Nov 16, 2024
4f3993d
Merge pull request #18 from krickert/dependabot/maven/org.junit.jupit…
krickert Nov 16, 2024
d7fe0a6
Bump protobuf.version from 3.25.1 to 4.28.3
dependabot[bot] Nov 16, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
21 changes: 8 additions & 13 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,13 @@
<release.version>17</release.version>
<assertj-core.version>3.26.3</assertj-core.version>
<maven.compiler.version>3.13.0</maven.compiler.version>
<maven.surefire.plugin>3.4.0</maven.surefire.plugin>
<protobuf.version>3.25.1</protobuf.version>
<junit-jupiter-engine.version>5.11.0</junit-jupiter-engine.version>
<grpc.version>1.66.0</grpc.version>
<maven.surefire.plugin>3.5.2</maven.surefire.plugin>
<protobuf.version>4.28.3</protobuf.version>
<junit-jupiter-engine.version>5.11.3</junit-jupiter-engine.version>
<grpc.version>1.68.1</grpc.version>
<javax.annotation.api.version>1.3.2</javax.annotation.api.version>
<commons-lang3.version>3.16.0</commons-lang3.version>
<commons.io.version>2.16.1</commons.io.version>
<commons-lang3.version>3.17.0</commons-lang3.version>
<commons.io.version>2.17.0</commons.io.version>
<protoc-jar-maven-plugin.version>3.11.4</protoc-jar-maven-plugin.version>
</properties>

Expand Down Expand Up @@ -51,16 +51,11 @@
</distributionManagement>

<modules>
<module>wikisearch-model</module>
<module>wikisearch-model-test-resources</module>
<module>search-models</module>
<module>search-models-test-resources</module>
</modules>

<dependencies>
<dependency>
<groupId>com.google.protobuf</groupId>
<artifactId>protobuf-java</artifactId>
<version>${protobuf.version}</version>
</dependency>
<dependency>
<groupId>org.assertj</groupId>
<artifactId>assertj-core</artifactId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<artifactId>wikisearch-model-test-resources</artifactId>
<artifactId>search-models-test-resources</artifactId>
<packaging>jar</packaging>
<version>1.0-SNAPSHOT</version>
<properties>
Expand All @@ -19,9 +19,27 @@
</parent>

<dependencies>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
<version>2.18.1</version>
</dependency>

<dependency>
<groupId>com.google.protobuf</groupId>
<artifactId>protobuf-java-util</artifactId>
<version>${protobuf.version}</version>
</dependency>

<dependency>
<groupId>com.google.protobuf</groupId>
<artifactId>protobuf-java</artifactId>
<version>${protobuf.version}</version>
</dependency>

<dependency>
<groupId>com.krickert.search</groupId>
<artifactId>wikisearch-model</artifactId>
<artifactId>search-models</artifactId>
<version>${project.version}</version>
</dependency>

Expand All @@ -37,7 +55,7 @@
<dependency>
<groupId>com.google.guava</groupId>
<artifactId>guava</artifactId>
<version>33.3.0-jre</version>
<version>33.3.1-jre</version>
</dependency>

</dependencies>
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
package com.krickert.search.model.test.util;

import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.protobuf.Struct;
import com.google.protobuf.Value;
import com.google.protobuf.ListValue;
import com.google.protobuf.Timestamp;
import com.google.protobuf.util.JsonFormat;
import com.google.protobuf.util.Timestamps;
import org.apache.commons.lang3.time.DateUtils;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.URI;
import java.nio.file.Files;
import java.nio.file.Path;
import java.text.ParseException;
import java.time.Instant;
import java.time.format.DateTimeParseException;
import java.util.ArrayList;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

public class JsonToProtoStructConverter {

private static final ObjectMapper objectMapper = new ObjectMapper();
private static final String[] DATE_FORMATS = new String[] {
"yyyy-MM-dd'T'HH:mm:ss'Z'",
"yyyy-MM-dd'T'HH:mm:ss.SSS'Z'",
"yyyy-MM-dd'T'HH:mm:ssX",
"yyyy-MM-dd"
};

public static List<Struct> convertJsonToStruct(String jsonString) throws IOException {
JsonNode jsonNode = objectMapper.readTree(jsonString);
if (jsonNode.isArray()) {
return convertJsonArrayToStructList(jsonNode);
} else {
List<Struct> singleStructList = new ArrayList<>();
singleStructList.add(convertJsonNodeToStruct(jsonNode, null));
return singleStructList;
}
}

public static List<Struct> convertJsonToStruct(String jsonString, String regex) throws IOException {
JsonNode jsonNode = objectMapper.readTree(jsonString);
Pattern pattern = (regex != null) ? Pattern.compile(regex) : null;
if (jsonNode.isArray()) {
return convertJsonArrayToStructList(jsonNode, pattern);
} else {
List<Struct> singleStructList = new ArrayList<>();
singleStructList.add(convertJsonNodeToStruct(jsonNode, pattern));
return singleStructList;
}
}

private static List<Struct> convertJsonArrayToStructList(JsonNode jsonArrayNode) {
return convertJsonArrayToStructList(jsonArrayNode, null);
}

private static List<Struct> convertJsonArrayToStructList(JsonNode jsonArrayNode, Pattern pattern) {
List<Struct> structList = new ArrayList<>();
for (JsonNode element : jsonArrayNode) {
structList.add(convertJsonNodeToStruct(element, pattern));
}
return structList;
}

private static Struct convertJsonNodeToStruct(JsonNode jsonNode, Pattern pattern) {
Struct.Builder structBuilder = Struct.newBuilder();

Iterator<Map.Entry<String, JsonNode>> fields = jsonNode.fields();
while (fields.hasNext()) {
Map.Entry<String, JsonNode> field = fields.next();
if (pattern == null || !pattern.matcher(field.getKey()).matches()) {
structBuilder.putFields(field.getKey(), convertJsonNodeToValue(field.getValue(), pattern));
}
}

return structBuilder.build();
}

private static Value convertJsonNodeToValue(JsonNode jsonNode, Pattern pattern) {
Value.Builder valueBuilder = Value.newBuilder();

if (jsonNode.isNull()) {
valueBuilder.setNullValueValue(0);
} else if (jsonNode.isNumber()) {
if (jsonNode.isInt() || jsonNode.isLong()) {
valueBuilder.setNumberValue(jsonNode.longValue());
} else if (jsonNode.isDouble()) {
valueBuilder.setNumberValue(jsonNode.doubleValue());
}
} else if (jsonNode.isTextual()) {
String textValue = jsonNode.textValue();
try {
// Try parsing as Instant
Instant instant = Instant.parse(textValue);
Timestamp timestamp = Timestamps.fromMillis(instant.toEpochMilli());
valueBuilder.setStringValue(Timestamps.toString(timestamp));
} catch (DateTimeParseException e) {
try {
// Try parsing with multiple date formats
Date date = DateUtils.parseDate(textValue, DATE_FORMATS);
Timestamp timestamp = Timestamps.fromMillis(date.getTime());
valueBuilder.setStringValue(Timestamps.toString(timestamp));
} catch (ParseException ex) {
valueBuilder.setStringValue(textValue);
}
}
} else if (jsonNode.isBoolean()) {
valueBuilder.setBoolValue(jsonNode.booleanValue());
} else if (jsonNode.isObject()) {
valueBuilder.setStructValue(convertJsonNodeToStruct(jsonNode, pattern));
} else if (jsonNode.isArray()) {
ListValue.Builder listValueBuilder = ListValue.newBuilder();
for (JsonNode element : jsonNode) {
listValueBuilder.addValues(convertJsonNodeToValue(element, pattern));
}
valueBuilder.setListValue(listValueBuilder);
}

return valueBuilder.build();
}

public static List<Struct> loadJsonFromResource(String resourceName) throws IOException {
InputStream inputStream = JsonToProtoStructConverter.class.getClassLoader().getResourceAsStream(resourceName);
if (inputStream == null) {
Path path = Path.of(resourceName);
if (Files.exists(path)) {
inputStream = Files.newInputStream(path);
} else {
throw new IOException("Resource not found: " + resourceName);
}
}

try (BufferedReader reader = new BufferedReader(new InputStreamReader(inputStream))) {
String jsonString = reader.lines().collect(Collectors.joining(System.lineSeparator()));
return convertJsonToStruct(jsonString);
}
}

public static String convertStructToJson(Struct struct) throws IOException {
JsonFormat.Printer printer = JsonFormat.printer();
String jsonString = printer.print(struct);
JsonNode jsonNode = objectMapper.readTree(jsonString);

// Fix timestamp fields by converting them back to string representations
Iterator<Map.Entry<String, JsonNode>> fields = jsonNode.fields();
while (fields.hasNext()) {
Map.Entry<String, JsonNode> field = fields.next();
JsonNode valueNode = field.getValue();
if (valueNode.isObject() && valueNode.has("timestamp")) {
field.setValue(objectMapper.getNodeFactory().textNode(valueNode.get("timestamp").asText()));
}
}

return objectMapper.writerWithDefaultPrettyPrinter().writeValueAsString(jsonNode);
}

public static void main(String[] args) {
String json = "{" +
"\"id\": 1234," +
"\"title\": \"Sample Document\"," +
"\"body\": \"This is a test document\"," +
"\"keywords\": [\"sample\", \"test\"]," +
"\"creation_date\": \"2024-01-01T00:00:00Z\"," +
"\"last_updated_date\": \"2024-10-28T12:00:00Z\"," +
"\"is_published\": true," +
"\"_version_\": 1.0," +
"\"custom_data\": {\"additional_field\": \"custom_value\"}" +
"}";

try {
List<Struct> structs = convertJsonToStruct(json, "_version_");
for (Struct struct : structs) {
System.out.println(JsonFormat.printer().print(struct));
String originalJson = convertStructToJson(struct);
System.out.println("Converted back to JSON:\n" + originalJson);
}

try {
List<Struct> resourceStructs = loadJsonFromResource("example.json");
for (Struct resourceStruct : resourceStructs) {
System.out.println(JsonFormat.printer().print(resourceStruct));
String originalJson = convertStructToJson(resourceStruct);
System.out.println("Converted back to JSON:\n" + originalJson);
}
} catch (IOException e) {
System.err.println("Warning: " + e.getMessage());
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
Loading