Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/self-hosted-docker-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ jobs:
run: |
# 保留 latest 和 previous,删除其他 huntly 镜像
docker images lcomplete/huntly --format "table {{.Repository}}:{{.Tag}}" | grep -v "latest\|previous\|REPOSITORY" | xargs -r docker rmi || true
continue-on-error: true
continue-on-error: false

- name: Ensure data directory exists
run: |
Expand Down
10 changes: 5 additions & 5 deletions app/client/src/components/SettingModal/BatchOrganizeSetting.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -262,11 +262,11 @@ export default function BatchOrganizeSetting() {
{filterResult.items.length > 0 && (
<>
<BatchPageItemList items={filterResult.items.slice(0, 5)} />
{filterResult.totalCount > 5 && (
<Button variant="outlined" size="small" onClick={handleViewMore}>
View More ({filterResult.totalCount} total)
</Button>
)}
<Button variant="outlined" size="small" onClick={handleViewMore}>
{filterResult.totalCount === 1
? "Organize (1)"
: `Batch Organize (${filterResult.totalCount})`}
</Button>
</>
)}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,12 +13,16 @@
@Setter
public class ConnectorProperties {
private Instant lastFetchAt;

private String subscribeUrl;

private String apiToken;

private Boolean crawlFullContent;

private ProxySetting proxySetting;

private String httpEtag;

private String httpLastModified;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
package com.huntly.server.connector;

import com.huntly.interfaces.external.model.CapturePage;
import lombok.Getter;
import lombok.Setter;

import java.util.List;

/**
* Result of fetching pages from a connector, including HTTP cache headers.
*/
@Getter
@Setter
public class FetchPagesResult {
/**
* The fetched pages, empty if the feed was not modified.
*/
private List<CapturePage> pages;

/**
* Whether the feed was not modified (HTTP 304).
*/
private boolean notModified;

/**
* ETag header from the response.
*/
private String httpEtag;

/**
* Last-Modified header from the response.
*/
private String httpLastModified;

public static FetchPagesResult notModified() {
FetchPagesResult result = new FetchPagesResult();
result.setNotModified(true);
return result;
}

public static FetchPagesResult of(List<CapturePage> pages, String httpEtag, String httpLastModified) {
FetchPagesResult result = new FetchPagesResult();
result.setPages(pages);
result.setNotModified(false);
result.setHttpEtag(httpEtag);
result.setHttpLastModified(httpLastModified);
return result;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,15 @@ protected HttpClient buildHttpClient(ConnectorProperties properties) {
public abstract List<CapturePage> fetchAllPages();

public abstract CapturePage fetchPageContent(CapturePage capturePage);

/**
* Fetch newest pages with HTTP 304 cache support.
* Default implementation delegates to fetchNewestPages() without cache support.
*
* @return FetchPagesResult containing pages and cache headers
*/
public FetchPagesResult fetchNewestPagesWithCache() {
List<CapturePage> pages = fetchNewestPages();
return FetchPagesResult.of(pages, null, null);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
package com.huntly.server.connector.rss;

import com.rometools.rome.feed.synd.SyndFeed;
import lombok.Getter;
import lombok.Setter;

/**
* Result of fetching a feed, containing the parsed feed and HTTP cache headers.
*/
@Getter
@Setter
public class FeedFetchResult {
/**
* The parsed feed, null if the response was 304 Not Modified.
*/
private SyndFeed feed;

/**
* Whether the feed was not modified (HTTP 304).
*/
private boolean notModified;

/**
* ETag header from the response.
*/
private String etag;

/**
* Last-Modified header from the response.
*/
private String lastModified;

public static FeedFetchResult notModified() {
FeedFetchResult result = new FeedFetchResult();
result.setNotModified(true);
return result;
}

public static FeedFetchResult of(SyndFeed feed, String etag, String lastModified) {
FeedFetchResult result = new FeedFetchResult();
result.setFeed(feed);
result.setNotModified(false);
result.setEtag(etag);
result.setLastModified(lastModified);
return result;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,61 +23,104 @@
*/
@UtilityClass
public class FeedUtils {
public static SyndFeed parseFeedUrl(String feedUrl, OkHttpClient client) {
Request request = new Request.Builder()
.url(feedUrl)
.build();
try(Response response = client.newCall(request).execute()) {
byte[] xmlBytes = null;

private static final int HTTP_NOT_MODIFIED = 304;

/**
* Fetch feed with conditional request support (HTTP 304).
*
* @param feedUrl The feed URL to fetch
* @param client The OkHttp client
* @param etag The ETag from previous request (can be null)
* @param lastModified The Last-Modified from previous request (can be null)
* @return FeedFetchResult containing the feed or notModified flag
*/
public static FeedFetchResult fetchFeed(String feedUrl, OkHttpClient client, String etag, String lastModified) {
Request.Builder requestBuilder = new Request.Builder().url(feedUrl);

// Add conditional request headers if available
if (StringUtils.isNotBlank(etag)) {
requestBuilder.header("If-None-Match", etag);
}
if (StringUtils.isNotBlank(lastModified)) {
requestBuilder.header("If-Modified-Since", lastModified);
}

Request request = requestBuilder.build();

try (Response response = client.newCall(request).execute()) {
// Check for 304 Not Modified
if (response.code() == HTTP_NOT_MODIFIED) {
return FeedFetchResult.notModified();
}

if (response.body() == null) {
throw new ConnectorFetchException("xml response null for url: " + feedUrl);
}

xmlBytes = response.body().bytes();
byte[] xmlBytes = response.body().bytes();
Charset encoding = FeedUtils.guessEncoding(xmlBytes);
String xmlString = XmlUtils.removeInvalidXmlCharacters(new String(xmlBytes, encoding));
if (xmlString == null) {
throw new ConnectorFetchException("xml fetch failed for url: " + feedUrl);
}
return new SyndFeedInput().build(new StringReader(xmlString));

SyndFeed feed = new SyndFeedInput().build(new StringReader(xmlString));

// Extract cache headers from response
String responseEtag = response.header("ETag");
String responseLastModified = response.header("Last-Modified");

return FeedFetchResult.of(feed, responseEtag, responseLastModified);
} catch (IOException e) {
throw new RuntimeException(e);
} catch (FeedException e) {
throw new RuntimeException(e);
}
}

// public static SyndFeed parseFeedUrl(String feedUrl, HttpClient client) {
// HttpRequest request = HttpRequest.newBuilder().GET().uri(URI.create(feedUrl))
// .build();
// HttpResponse<byte[]> response = null;
// try {
// response = client.send(request, HttpResponse.BodyHandlers.ofByteArray());
// } catch (IOException e) {
// throw new RuntimeException(e);
// } catch (InterruptedException e) {
// throw new RuntimeException(e);
// }
// var xmlBytes = response.body();
// Charset encoding = FeedUtils.guessEncoding(xmlBytes);
// String xmlString = XmlUtils.removeInvalidXmlCharacters(new String(xmlBytes, encoding));
// if (xmlString == null) {
// throw new ConnectorFetchException("xml fetch failed for url: " + feedUrl);
// }
//
// try {
// SyndFeed feed = new SyndFeedInput().build(new StringReader(xmlString));
// return feed;
// } catch (FeedException e) {
// throw new RuntimeException(e);
// }
// }

// public static SyndFeed parseFeedUrl(String feedUrl) {
// var client = HttpClient.newBuilder().connectTimeout(Duration.ofSeconds(60))
// .followRedirects(HttpClient.Redirect.ALWAYS).build();
// return parseFeedUrl(feedUrl, client);
// }
/**
* @deprecated Use {@link #fetchFeed(String, OkHttpClient, String, String)} for
* HTTP 304 support
*/
@Deprecated
public static SyndFeed parseFeedUrl(String feedUrl, OkHttpClient client) {
FeedFetchResult result = fetchFeed(feedUrl, client, null, null);
return result.getFeed();
}

// public static SyndFeed parseFeedUrl(String feedUrl, HttpClient client) {
// HttpRequest request = HttpRequest.newBuilder().GET().uri(URI.create(feedUrl))
// .build();
// HttpResponse<byte[]> response = null;
// try {
// response = client.send(request, HttpResponse.BodyHandlers.ofByteArray());
// } catch (IOException e) {
// throw new RuntimeException(e);
// } catch (InterruptedException e) {
// throw new RuntimeException(e);
// }
// var xmlBytes = response.body();
// Charset encoding = FeedUtils.guessEncoding(xmlBytes);
// String xmlString = XmlUtils.removeInvalidXmlCharacters(new String(xmlBytes,
// encoding));
// if (xmlString == null) {
// throw new ConnectorFetchException("xml fetch failed for url: " + feedUrl);
// }
//
// try {
// SyndFeed feed = new SyndFeedInput().build(new StringReader(xmlString));
// return feed;
// } catch (FeedException e) {
// throw new RuntimeException(e);
// }
// }

// public static SyndFeed parseFeedUrl(String feedUrl) {
// var client = HttpClient.newBuilder().connectTimeout(Duration.ofSeconds(60))
// .followRedirects(HttpClient.Redirect.ALWAYS).build();
// return parseFeedUrl(feedUrl, client);
// }

public static Charset guessEncoding(byte[] bytes) {
String extracted = extractDeclaredEncoding(bytes);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import com.huntly.common.util.UrlUtils;
import com.huntly.interfaces.external.model.CapturePage;
import com.huntly.server.connector.ConnectorProperties;
import com.huntly.server.connector.FetchPagesResult;
import com.huntly.server.connector.InfoConnector;
import com.huntly.server.domain.exceptions.ConnectorFetchException;
import com.huntly.server.util.HttpUtils;
Expand Down Expand Up @@ -47,31 +48,54 @@ public List<CapturePage> fetchAllPages() {

@Override
public List<CapturePage> fetchNewestPages() {
FetchPagesResult result = fetchNewestPagesWithCache();
return result.getPages() != null ? result.getPages() : new ArrayList<>();
}

@Override
public FetchPagesResult fetchNewestPagesWithCache() {
if (StringUtils.isBlank(connectorProperties.getSubscribeUrl())) {
return new ArrayList<>();
return FetchPagesResult.of(new ArrayList<>(), null, null);
}

try {
SyndFeed feed = FeedUtils.parseFeedUrl(connectorProperties.getSubscribeUrl(), okClient);
// Use conditional request with cached ETag and Last-Modified
FeedFetchResult feedResult = FeedUtils.fetchFeed(
connectorProperties.getSubscribeUrl(),
okClient,
connectorProperties.getHttpEtag(),
connectorProperties.getHttpLastModified());

// If feed was not modified, return early with notModified flag
if (feedResult.isNotModified()) {
log.debug("Feed not modified (HTTP 304): {}", connectorProperties.getSubscribeUrl());
return FetchPagesResult.notModified();
}

SyndFeed feed = feedResult.getFeed();
var entries = feed.getEntries();
List<CapturePage> pages = new ArrayList<>();
for (var entry : entries) {
CapturePage capturePage = new CapturePage();
String content = getContent(entry);
String description = StringUtils.trimToEmpty(entry.getDescription() == null ? null : entry.getDescription().getValue());
String description = StringUtils
.trimToEmpty(entry.getDescription() == null ? null : entry.getDescription().getValue());
capturePage.setUrl(entry.getLink());
capturePage.setDomain(UrlUtils.getDomainName(entry.getLink()));
capturePage.setContent(content);
capturePage.setDescription(description);
capturePage.setTitle(getTitle(entry));
capturePage.setConnectedAt(ObjectUtils.firstNonNull(entry.getPublishedDate(), entry.getUpdatedDate(), feed.getPublishedDate(), new Date()).toInstant());
capturePage.setConnectedAt(ObjectUtils.firstNonNull(entry.getPublishedDate(), entry.getUpdatedDate(),
feed.getPublishedDate(), new Date()).toInstant());
capturePage.setAuthor(StringUtils.trimToEmpty(entry.getAuthor()));
capturePage.setCategory(entry.getCategories().stream().map(SyndCategory::getName).collect(Collectors.joining(", ")));
capturePage.setCategory(
entry.getCategories().stream().map(SyndCategory::getName).collect(Collectors.joining(", ")));
capturePage.setNeedFindThumbUrl(true);
pages.add(capturePage);
}

return pages;
// Return pages with cache headers from response
return FetchPagesResult.of(pages, feedResult.getEtag(), feedResult.getLastModified());
} catch (Exception e) {
throw new ConnectorFetchException(e);
}
Expand All @@ -93,7 +117,8 @@ private String getTitle(SyndEntry item) {
private String getContent(SyndEntry entry) {
String content = null;
if (!entry.getContents().isEmpty()) {
content = entry.getContents().stream().map(SyndContent::getValue).collect(Collectors.joining(System.lineSeparator()));
content = entry.getContents().stream().map(SyndContent::getValue)
.collect(Collectors.joining(System.lineSeparator()));
}
return StringUtils.trimToEmpty(content);
}
Expand Down
Loading
Loading