diff --git a/pom.xml b/pom.xml
index 997b887..1e8cfbc 100644
--- a/pom.xml
+++ b/pom.xml
@@ -47,33 +47,33 @@
17
17
- 24.0.1
- 1.16.1
- 1.18.28
- 1.1.2
- 3.5.8
- 2.0.7
- 3.3.5
- 6.1.14
+ 26.0.2
+ 1.18.3
+ 1.18.36
+ 1.2.3
+ 3.7.3
+ 2.0.16
+ 3.4.3
+ 6.2.3
5.15.0
- 5.10.0
- 3.24.2
- 5.4.0
+ 5.12.0
+ 3.27.3
+ 5.15.2
- 0.8.10
- 3.11.0
- 3.6.0
- 3.1.1
- 3.1.0
- 3.5.0
- 3.0.1
- 2.0.1
- 2.0.1
- 3.3.0
- 3.1.2
- 1.6.13
- 2.16.0
+ 0.8.12
+ 3.14.0
+ 3.8.1
+ 3.1.3
+ 3.2.7
+ 3.11.2
+ 3.1.1
+ 2.1.0
+ 2.1.0
+ 3.3.1
+ 3.5.2
+ 1.7.0
+ 2.18.0
diff --git a/scraphead-core/src/main/java/fr/ght1pc9kc/scraphead/core/scrap/OGScrapperUtils.java b/scraphead-core/src/main/java/fr/ght1pc9kc/scraphead/core/scrap/OGScrapperUtils.java
index 6b96c9f..042c985 100644
--- a/scraphead-core/src/main/java/fr/ght1pc9kc/scraphead/core/scrap/OGScrapperUtils.java
+++ b/scraphead-core/src/main/java/fr/ght1pc9kc/scraphead/core/scrap/OGScrapperUtils.java
@@ -13,11 +13,12 @@
@Slf4j
@UtilityClass
public class OGScrapperUtils {
- public static final String META_PROPERTY = "property";
- public static final String META_NAME = "name";
public static final String META_CONTENT = "content";
- public static final String META_REL = "rel";
public static final String META_HREF = "href";
+ public static final String META_NAME = "name";
+ public static final String META_PROPERTY = "property";
+ public static final String META_REL = "rel";
+ public static final String META_TYPE = "type";
public static String removeQueryString(String uri) {
int idx = uri.indexOf('?');
diff --git a/scraphead-core/src/main/java/fr/ght1pc9kc/scraphead/core/scrap/collectors/LinksCollector.java b/scraphead-core/src/main/java/fr/ght1pc9kc/scraphead/core/scrap/collectors/LinksCollector.java
index e248c5d..a9e4245 100644
--- a/scraphead-core/src/main/java/fr/ght1pc9kc/scraphead/core/scrap/collectors/LinksCollector.java
+++ b/scraphead-core/src/main/java/fr/ght1pc9kc/scraphead/core/scrap/collectors/LinksCollector.java
@@ -16,13 +16,19 @@
import static fr.ght1pc9kc.scraphead.core.scrap.OGScrapperUtils.META_HREF;
import static fr.ght1pc9kc.scraphead.core.scrap.OGScrapperUtils.META_REL;
+import static fr.ght1pc9kc.scraphead.core.scrap.OGScrapperUtils.META_TYPE;
+import static java.util.Objects.isNull;
@Slf4j
public final class LinksCollector implements MetaDataCollector, Collector, WithErrors> {
private static final String REL_CANONICAL = "canonical";
private static final String REL_ICON = "icon";
+ private static final String REL_SHORTCUT_ICON = "shortcut icon";
private static final String REL_LICENSE = "license";
private static final String REL_SHORTLINK = "shortlink";
+ private static final String REL_TYPE_ICON = "image/x-icon";
+ private static final String ABSOLUTE_PREFIX = "abs:";
+ private static final String TAG_LINK = "link";
@Override
public Collector, WithErrors> collector() {
@@ -37,19 +43,24 @@ public Supplier> supplier() {
@Override
public BiConsumer, Element> accumulator() {
return (builder, element) -> {
- if (!"link".equals(element.tagName()) || !element.hasAttr(META_REL)) {
+ if (!TAG_LINK.equals(element.tagName()) || !element.hasAttr(META_REL)) {
return;
}
String relation = element.attr(META_REL);
switch (relation) {
- case REL_CANONICAL -> OGScrapperUtils.toUri(element.attr("abs:" + META_HREF))
+ case REL_CANONICAL -> OGScrapperUtils.toUri(element.attr(ABSOLUTE_PREFIX + META_HREF))
.ifPresent(builder.object()::canonical);
- case REL_ICON -> OGScrapperUtils.toUri(element.attr("abs:" + META_HREF))
- .ifPresent(builder.object()::icon);
- case REL_LICENSE -> OGScrapperUtils.toUri(element.attr("abs:" + META_HREF))
+ case REL_ICON, REL_SHORTCUT_ICON -> {
+ if (isNull(builder.object().build().icon())
+ || REL_TYPE_ICON.equals(element.attr(META_TYPE))) {
+ OGScrapperUtils.toUri(element.attr(ABSOLUTE_PREFIX + META_HREF))
+ .ifPresent(builder.object()::icon);
+ }
+ }
+ case REL_LICENSE -> OGScrapperUtils.toUri(element.attr(ABSOLUTE_PREFIX + META_HREF))
.ifPresent(builder.object()::license);
- case REL_SHORTLINK -> OGScrapperUtils.toUri(element.attr("abs:" + META_HREF))
+ case REL_SHORTLINK -> OGScrapperUtils.toUri(element.attr(ABSOLUTE_PREFIX + META_HREF))
.ifPresent(builder.object()::shortlink);
default -> log.trace("Unmanaged relation for {}", relation);
}
diff --git a/scraphead-core/src/test/java/fr/ght1pc9kc/scraphead/core/scrap/collectors/LinksCollectorTest.java b/scraphead-core/src/test/java/fr/ght1pc9kc/scraphead/core/scrap/collectors/LinksCollectorTest.java
index c2e94b5..af6a24f 100644
--- a/scraphead-core/src/test/java/fr/ght1pc9kc/scraphead/core/scrap/collectors/LinksCollectorTest.java
+++ b/scraphead-core/src/test/java/fr/ght1pc9kc/scraphead/core/scrap/collectors/LinksCollectorTest.java
@@ -15,6 +15,7 @@
import static fr.ght1pc9kc.scraphead.core.scrap.OGScrapperUtils.META_HREF;
import static fr.ght1pc9kc.scraphead.core.scrap.OGScrapperUtils.META_NAME;
import static fr.ght1pc9kc.scraphead.core.scrap.OGScrapperUtils.META_REL;
+import static fr.ght1pc9kc.scraphead.core.scrap.OGScrapperUtils.META_TYPE;
class LinksCollectorTest {
@@ -33,7 +34,8 @@ void should_collect_elements_links() {
new Element(link, baseUrl)
.attr(META_REL, "icon").attr(META_HREF, "favicon.ico"),
new Element(link, baseUrl)
- .attr(META_REL, "icon").attr(META_HREF, "favicon.png"),
+ .attr(META_REL, "icon").attr(META_TYPE, "image/x-icon")
+ .attr(META_HREF, "favicon.png"),
new Element(link, baseUrl)
.attr(META_REL, "license").attr(META_HREF, "//www.wtfpl.net/"),
new Element(link, baseUrl)
diff --git a/scraphead-netty/src/main/java/fr/ght1pc9kc/scraphead/netty/http/config/NettyClientBuilder.java b/scraphead-netty/src/main/java/fr/ght1pc9kc/scraphead/netty/http/config/NettyClientBuilder.java
index 7641be1..ed53dd3 100644
--- a/scraphead-netty/src/main/java/fr/ght1pc9kc/scraphead/netty/http/config/NettyClientBuilder.java
+++ b/scraphead-netty/src/main/java/fr/ght1pc9kc/scraphead/netty/http/config/NettyClientBuilder.java
@@ -1,8 +1,8 @@
package fr.ght1pc9kc.scraphead.netty.http.config;
import lombok.experimental.UtilityClass;
-import reactor.netty.http.Http11SslContextSpec;
import reactor.netty.http.client.HttpClient;
+import reactor.netty.tcp.SslProvider;
import java.util.Set;
@@ -11,7 +11,7 @@ public class NettyClientBuilder {
public static HttpClient getNettyHttpClient() {
return HttpClient.create()
- .secure(spec -> spec.sslContext(Http11SslContextSpec.forClient()))
+ .secure(spec -> spec.sslContext(SslProvider.defaultClientProvider().getSslContext()))
.followRedirect((req, res) -> // 303 was not in the default code
Set.of(301, 302, 303, 307, 308).contains(res.status().code()))
.compress(true);
diff --git a/scraphead-spring/pom.xml b/scraphead-spring/pom.xml
index 95f9f98..0e7c2f4 100644
--- a/scraphead-spring/pom.xml
+++ b/scraphead-spring/pom.xml
@@ -1,5 +1,6 @@
-
+
4.0.0
scraphead
@@ -70,6 +71,13 @@
org.mock-server
mockserver-netty
test
+
+
+
+ io.netty
+ *
+
+
org.slf4j
diff --git a/scraphead-spring/src/main/java/fr/ght1pc9kc/scraphead/spring/config/ScrapheadWebClientConfiguration.java b/scraphead-spring/src/main/java/fr/ght1pc9kc/scraphead/spring/config/ScrapheadWebClientConfiguration.java
index c83f6c0..dcbe548 100644
--- a/scraphead-spring/src/main/java/fr/ght1pc9kc/scraphead/spring/config/ScrapheadWebClientConfiguration.java
+++ b/scraphead-spring/src/main/java/fr/ght1pc9kc/scraphead/spring/config/ScrapheadWebClientConfiguration.java
@@ -5,8 +5,8 @@
import org.springframework.context.annotation.Configuration;
import org.springframework.http.client.reactive.ReactorClientHttpConnector;
import org.springframework.web.reactive.function.client.WebClient;
-import reactor.netty.http.Http11SslContextSpec;
import reactor.netty.http.client.HttpClient;
+import reactor.netty.tcp.SslProvider;
import java.util.Set;
@@ -19,7 +19,7 @@ public class ScrapheadWebClientConfiguration {
public WebClient scrapheadWebclient() {
return WebClient.builder().clientConnector(new ReactorClientHttpConnector(
HttpClient.create()
- .secure(spec -> spec.sslContext(Http11SslContextSpec.forClient()))
+ .secure(spec -> spec.sslContext(SslProvider.defaultClientProvider().getSslContext()))
.followRedirect(true)
.followRedirect((req, res) -> // 303 was not in the default code
Set.of(301, 302, 303, 307, 308).contains(res.status().code()))