diff --git a/pom.xml b/pom.xml index 997b887..1e8cfbc 100644 --- a/pom.xml +++ b/pom.xml @@ -47,33 +47,33 @@ 17 17 - 24.0.1 - 1.16.1 - 1.18.28 - 1.1.2 - 3.5.8 - 2.0.7 - 3.3.5 - 6.1.14 + 26.0.2 + 1.18.3 + 1.18.36 + 1.2.3 + 3.7.3 + 2.0.16 + 3.4.3 + 6.2.3 5.15.0 - 5.10.0 - 3.24.2 - 5.4.0 + 5.12.0 + 3.27.3 + 5.15.2 - 0.8.10 - 3.11.0 - 3.6.0 - 3.1.1 - 3.1.0 - 3.5.0 - 3.0.1 - 2.0.1 - 2.0.1 - 3.3.0 - 3.1.2 - 1.6.13 - 2.16.0 + 0.8.12 + 3.14.0 + 3.8.1 + 3.1.3 + 3.2.7 + 3.11.2 + 3.1.1 + 2.1.0 + 2.1.0 + 3.3.1 + 3.5.2 + 1.7.0 + 2.18.0 diff --git a/scraphead-core/src/main/java/fr/ght1pc9kc/scraphead/core/scrap/OGScrapperUtils.java b/scraphead-core/src/main/java/fr/ght1pc9kc/scraphead/core/scrap/OGScrapperUtils.java index 6b96c9f..042c985 100644 --- a/scraphead-core/src/main/java/fr/ght1pc9kc/scraphead/core/scrap/OGScrapperUtils.java +++ b/scraphead-core/src/main/java/fr/ght1pc9kc/scraphead/core/scrap/OGScrapperUtils.java @@ -13,11 +13,12 @@ @Slf4j @UtilityClass public class OGScrapperUtils { - public static final String META_PROPERTY = "property"; - public static final String META_NAME = "name"; public static final String META_CONTENT = "content"; - public static final String META_REL = "rel"; public static final String META_HREF = "href"; + public static final String META_NAME = "name"; + public static final String META_PROPERTY = "property"; + public static final String META_REL = "rel"; + public static final String META_TYPE = "type"; public static String removeQueryString(String uri) { int idx = uri.indexOf('?'); diff --git a/scraphead-core/src/main/java/fr/ght1pc9kc/scraphead/core/scrap/collectors/LinksCollector.java b/scraphead-core/src/main/java/fr/ght1pc9kc/scraphead/core/scrap/collectors/LinksCollector.java index e248c5d..a9e4245 100644 --- a/scraphead-core/src/main/java/fr/ght1pc9kc/scraphead/core/scrap/collectors/LinksCollector.java +++ b/scraphead-core/src/main/java/fr/ght1pc9kc/scraphead/core/scrap/collectors/LinksCollector.java @@ -16,13 +16,19 @@ import static fr.ght1pc9kc.scraphead.core.scrap.OGScrapperUtils.META_HREF; import static fr.ght1pc9kc.scraphead.core.scrap.OGScrapperUtils.META_REL; +import static fr.ght1pc9kc.scraphead.core.scrap.OGScrapperUtils.META_TYPE; +import static java.util.Objects.isNull; @Slf4j public final class LinksCollector implements MetaDataCollector, Collector, WithErrors> { private static final String REL_CANONICAL = "canonical"; private static final String REL_ICON = "icon"; + private static final String REL_SHORTCUT_ICON = "shortcut icon"; private static final String REL_LICENSE = "license"; private static final String REL_SHORTLINK = "shortlink"; + private static final String REL_TYPE_ICON = "image/x-icon"; + private static final String ABSOLUTE_PREFIX = "abs:"; + private static final String TAG_LINK = "link"; @Override public Collector, WithErrors> collector() { @@ -37,19 +43,24 @@ public Supplier> supplier() { @Override public BiConsumer, Element> accumulator() { return (builder, element) -> { - if (!"link".equals(element.tagName()) || !element.hasAttr(META_REL)) { + if (!TAG_LINK.equals(element.tagName()) || !element.hasAttr(META_REL)) { return; } String relation = element.attr(META_REL); switch (relation) { - case REL_CANONICAL -> OGScrapperUtils.toUri(element.attr("abs:" + META_HREF)) + case REL_CANONICAL -> OGScrapperUtils.toUri(element.attr(ABSOLUTE_PREFIX + META_HREF)) .ifPresent(builder.object()::canonical); - case REL_ICON -> OGScrapperUtils.toUri(element.attr("abs:" + META_HREF)) - .ifPresent(builder.object()::icon); - case REL_LICENSE -> OGScrapperUtils.toUri(element.attr("abs:" + META_HREF)) + case REL_ICON, REL_SHORTCUT_ICON -> { + if (isNull(builder.object().build().icon()) + || REL_TYPE_ICON.equals(element.attr(META_TYPE))) { + OGScrapperUtils.toUri(element.attr(ABSOLUTE_PREFIX + META_HREF)) + .ifPresent(builder.object()::icon); + } + } + case REL_LICENSE -> OGScrapperUtils.toUri(element.attr(ABSOLUTE_PREFIX + META_HREF)) .ifPresent(builder.object()::license); - case REL_SHORTLINK -> OGScrapperUtils.toUri(element.attr("abs:" + META_HREF)) + case REL_SHORTLINK -> OGScrapperUtils.toUri(element.attr(ABSOLUTE_PREFIX + META_HREF)) .ifPresent(builder.object()::shortlink); default -> log.trace("Unmanaged relation for {}", relation); } diff --git a/scraphead-core/src/test/java/fr/ght1pc9kc/scraphead/core/scrap/collectors/LinksCollectorTest.java b/scraphead-core/src/test/java/fr/ght1pc9kc/scraphead/core/scrap/collectors/LinksCollectorTest.java index c2e94b5..af6a24f 100644 --- a/scraphead-core/src/test/java/fr/ght1pc9kc/scraphead/core/scrap/collectors/LinksCollectorTest.java +++ b/scraphead-core/src/test/java/fr/ght1pc9kc/scraphead/core/scrap/collectors/LinksCollectorTest.java @@ -15,6 +15,7 @@ import static fr.ght1pc9kc.scraphead.core.scrap.OGScrapperUtils.META_HREF; import static fr.ght1pc9kc.scraphead.core.scrap.OGScrapperUtils.META_NAME; import static fr.ght1pc9kc.scraphead.core.scrap.OGScrapperUtils.META_REL; +import static fr.ght1pc9kc.scraphead.core.scrap.OGScrapperUtils.META_TYPE; class LinksCollectorTest { @@ -33,7 +34,8 @@ void should_collect_elements_links() { new Element(link, baseUrl) .attr(META_REL, "icon").attr(META_HREF, "favicon.ico"), new Element(link, baseUrl) - .attr(META_REL, "icon").attr(META_HREF, "favicon.png"), + .attr(META_REL, "icon").attr(META_TYPE, "image/x-icon") + .attr(META_HREF, "favicon.png"), new Element(link, baseUrl) .attr(META_REL, "license").attr(META_HREF, "//www.wtfpl.net/"), new Element(link, baseUrl) diff --git a/scraphead-netty/src/main/java/fr/ght1pc9kc/scraphead/netty/http/config/NettyClientBuilder.java b/scraphead-netty/src/main/java/fr/ght1pc9kc/scraphead/netty/http/config/NettyClientBuilder.java index 7641be1..ed53dd3 100644 --- a/scraphead-netty/src/main/java/fr/ght1pc9kc/scraphead/netty/http/config/NettyClientBuilder.java +++ b/scraphead-netty/src/main/java/fr/ght1pc9kc/scraphead/netty/http/config/NettyClientBuilder.java @@ -1,8 +1,8 @@ package fr.ght1pc9kc.scraphead.netty.http.config; import lombok.experimental.UtilityClass; -import reactor.netty.http.Http11SslContextSpec; import reactor.netty.http.client.HttpClient; +import reactor.netty.tcp.SslProvider; import java.util.Set; @@ -11,7 +11,7 @@ public class NettyClientBuilder { public static HttpClient getNettyHttpClient() { return HttpClient.create() - .secure(spec -> spec.sslContext(Http11SslContextSpec.forClient())) + .secure(spec -> spec.sslContext(SslProvider.defaultClientProvider().getSslContext())) .followRedirect((req, res) -> // 303 was not in the default code Set.of(301, 302, 303, 307, 308).contains(res.status().code())) .compress(true); diff --git a/scraphead-spring/pom.xml b/scraphead-spring/pom.xml index 95f9f98..0e7c2f4 100644 --- a/scraphead-spring/pom.xml +++ b/scraphead-spring/pom.xml @@ -1,5 +1,6 @@ - + 4.0.0 scraphead @@ -70,6 +71,13 @@ org.mock-server mockserver-netty test + + + + io.netty + * + + org.slf4j diff --git a/scraphead-spring/src/main/java/fr/ght1pc9kc/scraphead/spring/config/ScrapheadWebClientConfiguration.java b/scraphead-spring/src/main/java/fr/ght1pc9kc/scraphead/spring/config/ScrapheadWebClientConfiguration.java index c83f6c0..dcbe548 100644 --- a/scraphead-spring/src/main/java/fr/ght1pc9kc/scraphead/spring/config/ScrapheadWebClientConfiguration.java +++ b/scraphead-spring/src/main/java/fr/ght1pc9kc/scraphead/spring/config/ScrapheadWebClientConfiguration.java @@ -5,8 +5,8 @@ import org.springframework.context.annotation.Configuration; import org.springframework.http.client.reactive.ReactorClientHttpConnector; import org.springframework.web.reactive.function.client.WebClient; -import reactor.netty.http.Http11SslContextSpec; import reactor.netty.http.client.HttpClient; +import reactor.netty.tcp.SslProvider; import java.util.Set; @@ -19,7 +19,7 @@ public class ScrapheadWebClientConfiguration { public WebClient scrapheadWebclient() { return WebClient.builder().clientConnector(new ReactorClientHttpConnector( HttpClient.create() - .secure(spec -> spec.sslContext(Http11SslContextSpec.forClient())) + .secure(spec -> spec.sslContext(SslProvider.defaultClientProvider().getSslContext())) .followRedirect(true) .followRedirect((req, res) -> // 303 was not in the default code Set.of(301, 302, 303, 307, 308).contains(res.status().code()))