Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 24 additions & 24 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -47,33 +47,33 @@
<maven.compiler.source>17</maven.compiler.source>
<maven.compiler.target>17</maven.compiler.target>

<jetbrains.version>24.0.1</jetbrains.version>
<jsoup.version>1.16.1</jsoup.version>
<lombok.version>1.18.28</lombok.version>
<netty.version>1.1.2</netty.version>
<reactor.version>3.5.8</reactor.version>
<slf4j.version>2.0.7</slf4j.version>
<spring-boot.version>3.3.5</spring-boot.version>
<spring.version>6.1.14</spring.version>
<jetbrains.version>26.0.2</jetbrains.version>
<jsoup.version>1.18.3</jsoup.version>
<lombok.version>1.18.36</lombok.version>
<netty.version>1.2.3</netty.version>
<reactor.version>3.7.3</reactor.version>
<slf4j.version>2.0.16</slf4j.version>
<spring-boot.version>3.4.3</spring-boot.version>
<spring.version>6.2.3</spring.version>

<mockserver-netty.version>5.15.0</mockserver-netty.version>
<junit.version>5.10.0</junit.version>
<assertj.version>3.24.2</assertj.version>
<mockito.version>5.4.0</mockito.version>
<junit.version>5.12.0</junit.version>
<assertj.version>3.27.3</assertj.version>
<mockito.version>5.15.2</mockito.version>

<jacoco-maven-plugin.version>0.8.10</jacoco-maven-plugin.version>
<maven-compiler-plugin.version>3.11.0</maven-compiler-plugin.version>
<maven-dependency-plugin.version>3.6.0</maven-dependency-plugin.version>
<maven-deploy-plugin.version>3.1.1</maven-deploy-plugin.version>
<maven-gpg-plugin.version>3.1.0</maven-gpg-plugin.version>
<maven-javadoc-plugin.version>3.5.0</maven-javadoc-plugin.version>
<maven-release-plugin.version>3.0.1</maven-release-plugin.version>
<maven-scm-api.version>2.0.1</maven-scm-api.version>
<maven-scm-provider-gitexe.version>2.0.1</maven-scm-provider-gitexe.version>
<maven-source-plugin.version>3.3.0</maven-source-plugin.version>
<maven-surefire-plugin.version>3.1.2</maven-surefire-plugin.version>
<nexus-staging-maven-plugin.version>1.6.13</nexus-staging-maven-plugin.version>
<versions-maven-plugin.version>2.16.0</versions-maven-plugin.version>
<jacoco-maven-plugin.version>0.8.12</jacoco-maven-plugin.version>
<maven-compiler-plugin.version>3.14.0</maven-compiler-plugin.version>
<maven-dependency-plugin.version>3.8.1</maven-dependency-plugin.version>
<maven-deploy-plugin.version>3.1.3</maven-deploy-plugin.version>
<maven-gpg-plugin.version>3.2.7</maven-gpg-plugin.version>
<maven-javadoc-plugin.version>3.11.2</maven-javadoc-plugin.version>
<maven-release-plugin.version>3.1.1</maven-release-plugin.version>
<maven-scm-api.version>2.1.0</maven-scm-api.version>
<maven-scm-provider-gitexe.version>2.1.0</maven-scm-provider-gitexe.version>
<maven-source-plugin.version>3.3.1</maven-source-plugin.version>
<maven-surefire-plugin.version>3.5.2</maven-surefire-plugin.version>
<nexus-staging-maven-plugin.version>1.7.0</nexus-staging-maven-plugin.version>
<versions-maven-plugin.version>2.18.0</versions-maven-plugin.version>

<surefire.argline.opens />
</properties>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,12 @@
@Slf4j
@UtilityClass
public class OGScrapperUtils {
public static final String META_PROPERTY = "property";
public static final String META_NAME = "name";
public static final String META_CONTENT = "content";
public static final String META_REL = "rel";
public static final String META_HREF = "href";
public static final String META_NAME = "name";
public static final String META_PROPERTY = "property";
public static final String META_REL = "rel";
public static final String META_TYPE = "type";

public static String removeQueryString(String uri) {
int idx = uri.indexOf('?');
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,19 @@

import static fr.ght1pc9kc.scraphead.core.scrap.OGScrapperUtils.META_HREF;
import static fr.ght1pc9kc.scraphead.core.scrap.OGScrapperUtils.META_REL;
import static fr.ght1pc9kc.scraphead.core.scrap.OGScrapperUtils.META_TYPE;
import static java.util.Objects.isNull;

@Slf4j
public final class LinksCollector implements MetaDataCollector<Links>, Collector<Element, WithErrors<Links.LinksBuilder>, WithErrors<Links>> {
private static final String REL_CANONICAL = "canonical";
private static final String REL_ICON = "icon";
private static final String REL_SHORTCUT_ICON = "shortcut icon";
private static final String REL_LICENSE = "license";
private static final String REL_SHORTLINK = "shortlink";
private static final String REL_TYPE_ICON = "image/x-icon";
private static final String ABSOLUTE_PREFIX = "abs:";
private static final String TAG_LINK = "link";

@Override
public Collector<Element, WithErrors<Links.LinksBuilder>, WithErrors<Links>> collector() {
Expand All @@ -37,19 +43,24 @@ public Supplier<WithErrors<Links.LinksBuilder>> supplier() {
@Override
public BiConsumer<WithErrors<Links.LinksBuilder>, Element> accumulator() {
return (builder, element) -> {
if (!"link".equals(element.tagName()) || !element.hasAttr(META_REL)) {
if (!TAG_LINK.equals(element.tagName()) || !element.hasAttr(META_REL)) {
return;
}
String relation = element.attr(META_REL);

switch (relation) {
case REL_CANONICAL -> OGScrapperUtils.toUri(element.attr("abs:" + META_HREF))
case REL_CANONICAL -> OGScrapperUtils.toUri(element.attr(ABSOLUTE_PREFIX + META_HREF))
.ifPresent(builder.object()::canonical);
case REL_ICON -> OGScrapperUtils.toUri(element.attr("abs:" + META_HREF))
.ifPresent(builder.object()::icon);
case REL_LICENSE -> OGScrapperUtils.toUri(element.attr("abs:" + META_HREF))
case REL_ICON, REL_SHORTCUT_ICON -> {
if (isNull(builder.object().build().icon())
|| REL_TYPE_ICON.equals(element.attr(META_TYPE))) {
OGScrapperUtils.toUri(element.attr(ABSOLUTE_PREFIX + META_HREF))
.ifPresent(builder.object()::icon);
}
}
case REL_LICENSE -> OGScrapperUtils.toUri(element.attr(ABSOLUTE_PREFIX + META_HREF))
.ifPresent(builder.object()::license);
case REL_SHORTLINK -> OGScrapperUtils.toUri(element.attr("abs:" + META_HREF))
case REL_SHORTLINK -> OGScrapperUtils.toUri(element.attr(ABSOLUTE_PREFIX + META_HREF))
.ifPresent(builder.object()::shortlink);
default -> log.trace("Unmanaged relation for {}", relation);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
import static fr.ght1pc9kc.scraphead.core.scrap.OGScrapperUtils.META_HREF;
import static fr.ght1pc9kc.scraphead.core.scrap.OGScrapperUtils.META_NAME;
import static fr.ght1pc9kc.scraphead.core.scrap.OGScrapperUtils.META_REL;
import static fr.ght1pc9kc.scraphead.core.scrap.OGScrapperUtils.META_TYPE;

class LinksCollectorTest {

Expand All @@ -33,7 +34,8 @@ void should_collect_elements_links() {
new Element(link, baseUrl)
.attr(META_REL, "icon").attr(META_HREF, "favicon.ico"),
new Element(link, baseUrl)
.attr(META_REL, "icon").attr(META_HREF, "favicon.png"),
.attr(META_REL, "icon").attr(META_TYPE, "image/x-icon")
.attr(META_HREF, "favicon.png"),
new Element(link, baseUrl)
.attr(META_REL, "license").attr(META_HREF, "//www.wtfpl.net/"),
new Element(link, baseUrl)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
package fr.ght1pc9kc.scraphead.netty.http.config;

import lombok.experimental.UtilityClass;
import reactor.netty.http.Http11SslContextSpec;
import reactor.netty.http.client.HttpClient;
import reactor.netty.tcp.SslProvider;

import java.util.Set;

Expand All @@ -11,7 +11,7 @@ public class NettyClientBuilder {

public static HttpClient getNettyHttpClient() {
return HttpClient.create()
.secure(spec -> spec.sslContext(Http11SslContextSpec.forClient()))
.secure(spec -> spec.sslContext(SslProvider.defaultClientProvider().getSslContext()))
.followRedirect((req, res) -> // 303 was not in the default code
Set.of(301, 302, 303, 307, 308).contains(res.status().code()))
.compress(true);
Expand Down
10 changes: 9 additions & 1 deletion scraphead-spring/pom.xml
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<artifactId>scraphead</artifactId>
Expand Down Expand Up @@ -70,6 +71,13 @@
<groupId>org.mock-server</groupId>
<artifactId>mockserver-netty</artifactId>
<scope>test</scope>
<exclusions>
<!-- Avoid dependency conflict with 4.1.118 from reactor-netty -->
<exclusion>
<groupId>io.netty</groupId>
<artifactId>*</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
import org.springframework.context.annotation.Configuration;
import org.springframework.http.client.reactive.ReactorClientHttpConnector;
import org.springframework.web.reactive.function.client.WebClient;
import reactor.netty.http.Http11SslContextSpec;
import reactor.netty.http.client.HttpClient;
import reactor.netty.tcp.SslProvider;

import java.util.Set;

Expand All @@ -19,7 +19,7 @@ public class ScrapheadWebClientConfiguration {
public WebClient scrapheadWebclient() {
return WebClient.builder().clientConnector(new ReactorClientHttpConnector(
HttpClient.create()
.secure(spec -> spec.sslContext(Http11SslContextSpec.forClient()))
.secure(spec -> spec.sslContext(SslProvider.defaultClientProvider().getSslContext()))
.followRedirect(true)
.followRedirect((req, res) -> // 303 was not in the default code
Set.of(301, 302, 303, 307, 308).contains(res.status().code()))
Expand Down