Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@ db.sqlite*

.vscode/

.agents/
.claude/
.cursor/
.playwright-mcp/
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ ARG JAR_PATH=/app/server.jar

COPY ${JAR_FILE} ${JAR_PATH}

ENV JAVA_ARGS="-Xms128m -Xmx768m"
ENV JAVA_ARGS="-Xms128m -Xmx1024m"
ENV VM_ARGS="-Duser.timezone=GMT+08"
ENV APP_ARGS=""
ENV PROFILE="default"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -72,56 +72,16 @@ public static FeedFetchResult fetchFeed(String feedUrl, OkHttpClient client, Str
String responseLastModified = response.header("Last-Modified");

return FeedFetchResult.of(feed, responseEtag, responseLastModified);
} catch (IOException e) {
throw new RuntimeException(e);
} catch (FeedException e) {
throw new RuntimeException(e);
} catch (IOException | FeedException e) {
throw new ConnectorFetchException(e);
}
}

/**
* @deprecated Use {@link #fetchFeed(String, OkHttpClient, String, String)} for
* HTTP 304 support
*/
@Deprecated
public static SyndFeed parseFeedUrl(String feedUrl, OkHttpClient client) {
FeedFetchResult result = fetchFeed(feedUrl, client, null, null);
return result.getFeed();
}

// public static SyndFeed parseFeedUrl(String feedUrl, HttpClient client) {
// HttpRequest request = HttpRequest.newBuilder().GET().uri(URI.create(feedUrl))
// .build();
// HttpResponse<byte[]> response = null;
// try {
// response = client.send(request, HttpResponse.BodyHandlers.ofByteArray());
// } catch (IOException e) {
// throw new RuntimeException(e);
// } catch (InterruptedException e) {
// throw new RuntimeException(e);
// }
// var xmlBytes = response.body();
// Charset encoding = FeedUtils.guessEncoding(xmlBytes);
// String xmlString = XmlUtils.removeInvalidXmlCharacters(new String(xmlBytes,
// encoding));
// if (xmlString == null) {
// throw new ConnectorFetchException("xml fetch failed for url: " + feedUrl);
// }
//
// try {
// SyndFeed feed = new SyndFeedInput().build(new StringReader(xmlString));
// return feed;
// } catch (FeedException e) {
// throw new RuntimeException(e);
// }
// }

// public static SyndFeed parseFeedUrl(String feedUrl) {
// var client = HttpClient.newBuilder().connectTimeout(Duration.ofSeconds(60))
// .followRedirects(HttpClient.Redirect.ALWAYS).build();
// return parseFeedUrl(feedUrl, client);
// }

public static Charset guessEncoding(byte[] bytes) {
String extracted = extractDeclaredEncoding(bytes);
if (StringUtils.startsWithIgnoreCase(extracted, "iso-8859-")) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -126,11 +126,12 @@ public synchronized void indexPage(Page page) {
boolean docExists = false;
Directory dir = getDirectory();
if (DirectoryReader.indexExists(dir)) {
DirectoryReader reader = DirectoryReader.open(dir);
IndexSearcher searcher = new IndexSearcher(reader);
Query idQuery = new TermQuery(new Term("id", page.getId().toString()));
TopDocs docs = searcher.search(idQuery, 1);
docExists = docs.totalHits.value > 0;
try (DirectoryReader reader = DirectoryReader.open(dir)) {
IndexSearcher searcher = new IndexSearcher(reader);
Query idQuery = new TermQuery(new Term("id", page.getId().toString()));
TopDocs docs = searcher.search(idQuery, 1);
docExists = docs.totalHits.value > 0;
}
}
Document doc = pageToDocument(page);
if (docExists) {
Expand Down Expand Up @@ -307,136 +308,137 @@ public PageSearchResult searchPages(@NonNull SearchQuery searchQuery) {
try {
Directory dir = getDirectory();
if (DirectoryReader.indexExists(dir)) {
DirectoryReader reader = DirectoryReader.open(dir);
IndexSearcher searcher = new IndexSearcher(reader);
List<FieldQueryInfo> fields = new ArrayList<>();
fields.add(new FieldQueryInfo().setName(DocFields.TITLE).setWildcard(true).setBoost(100));
if (!Boolean.TRUE.equals(option.getOnlySearchTitle())) {
fields.add(new FieldQueryInfo().setName(DocFields.CONTENT).setWildcard(true).setBoost(5));
}
var boolQueryBuilder = new BooleanQuery.Builder();
if (Boolean.TRUE.equals(option.getAlreadyRead())) {
Query query = LongPoint.newRangeQuery(DocFields.LAST_READ_AT, 1, Long.MAX_VALUE);
boolQueryBuilder.add(query, BooleanClause.Occur.MUST);
}
if (option.getType() != null) {
Query query = null;
switch (option.getType()) {
case TWEET:
query = IntPoint.newExactQuery(DocFields.CONTENT_TYPE, ContentType.TWEET.getCode());
break;
case GITHUB_STARRED_REPO:
query = IntPoint.newExactQuery(DocFields.CONNECTOR_TYPE, ConnectorType.GITHUB.getCode());
break;
case BROWSER_HISTORY:
query = IntPoint.newExactQuery(DocFields.CONTENT_TYPE, ContentType.BROWSER_HISTORY.getCode());
break;
case FEEDS:
query = IntPoint.newExactQuery(DocFields.CONNECTOR_TYPE, ConnectorType.RSS.getCode());
break;
default:
break;
try (DirectoryReader reader = DirectoryReader.open(dir)) {
IndexSearcher searcher = new IndexSearcher(reader);
List<FieldQueryInfo> fields = new ArrayList<>();
fields.add(new FieldQueryInfo().setName(DocFields.TITLE).setWildcard(true).setBoost(100));
if (!Boolean.TRUE.equals(option.getOnlySearchTitle())) {
fields.add(new FieldQueryInfo().setName(DocFields.CONTENT).setWildcard(true).setBoost(5));
}
if (query != null) {
var boolQueryBuilder = new BooleanQuery.Builder();
if (Boolean.TRUE.equals(option.getAlreadyRead())) {
Query query = LongPoint.newRangeQuery(DocFields.LAST_READ_AT, 1, Long.MAX_VALUE);
boolQueryBuilder.add(query, BooleanClause.Occur.MUST);
}
}
if (option.getLibrary() != null) {
Query query = null;
switch (option.getLibrary()) {
case MY_LIST:
query = IntPoint.newExactQuery(DocFields.LIBRARY_SAVE_STATUS, LibrarySaveStatus.SAVED.getCode());
break;
case STARRED:
query = new TermQuery(new Term(DocFields.STARRED, "1"));
break;
case READ_LATER:
query = new TermQuery(new Term(DocFields.READ_LATER, "1"));
break;
case ARCHIVE:
query = IntPoint.newExactQuery(DocFields.LIBRARY_SAVE_STATUS, LibrarySaveStatus.ARCHIVED.getCode());
break;
case HIGHLIGHTS:
query = IntPoint.newRangeQuery(DocFields.HIGHLIGHT_COUNT, 1, Integer.MAX_VALUE);
break;
case UNSORTED:
// Unsorted: must be in library (librarySaveStatus > 0) AND no collection assigned
// For backward compatibility with old docs that don't have collection_id field:
// Match docs that do NOT have a valid collectionId (>= 1)
BooleanQuery.Builder unsortedBuilder = new BooleanQuery.Builder();
unsortedBuilder.add(IntPoint.newRangeQuery(DocFields.LIBRARY_SAVE_STATUS, 1, Integer.MAX_VALUE), BooleanClause.Occur.MUST);

// Exclude docs that have a valid collection (collectionId >= 1)
// This covers: collectionId = -1, collectionId = 0, or field doesn't exist (old docs)
unsortedBuilder.add(LongPoint.newRangeQuery(DocFields.COLLECTION_ID, 1L, Long.MAX_VALUE), BooleanClause.Occur.MUST_NOT);

query = unsortedBuilder.build();
break;
default:
break;
if (option.getType() != null) {
Query query = null;
switch (option.getType()) {
case TWEET:
query = IntPoint.newExactQuery(DocFields.CONTENT_TYPE, ContentType.TWEET.getCode());
break;
case GITHUB_STARRED_REPO:
query = IntPoint.newExactQuery(DocFields.CONNECTOR_TYPE, ConnectorType.GITHUB.getCode());
break;
case BROWSER_HISTORY:
query = IntPoint.newExactQuery(DocFields.CONTENT_TYPE, ContentType.BROWSER_HISTORY.getCode());
break;
case FEEDS:
query = IntPoint.newExactQuery(DocFields.CONNECTOR_TYPE, ConnectorType.RSS.getCode());
break;
default:
break;
}
if (query != null) {
boolQueryBuilder.add(query, BooleanClause.Occur.MUST);
}
}
if (query != null) {
boolQueryBuilder.add(query, BooleanClause.Occur.MUST);
if (option.getLibrary() != null) {
Query query = null;
switch (option.getLibrary()) {
case MY_LIST:
query = IntPoint.newExactQuery(DocFields.LIBRARY_SAVE_STATUS, LibrarySaveStatus.SAVED.getCode());
break;
case STARRED:
query = new TermQuery(new Term(DocFields.STARRED, "1"));
break;
case READ_LATER:
query = new TermQuery(new Term(DocFields.READ_LATER, "1"));
break;
case ARCHIVE:
query = IntPoint.newExactQuery(DocFields.LIBRARY_SAVE_STATUS, LibrarySaveStatus.ARCHIVED.getCode());
break;
case HIGHLIGHTS:
query = IntPoint.newRangeQuery(DocFields.HIGHLIGHT_COUNT, 1, Integer.MAX_VALUE);
break;
case UNSORTED:
// Unsorted: must be in library (librarySaveStatus > 0) AND no collection assigned
// For backward compatibility with old docs that don't have collection_id field:
// Match docs that do NOT have a valid collectionId (>= 1)
BooleanQuery.Builder unsortedBuilder = new BooleanQuery.Builder();
unsortedBuilder.add(IntPoint.newRangeQuery(DocFields.LIBRARY_SAVE_STATUS, 1, Integer.MAX_VALUE), BooleanClause.Occur.MUST);

// Exclude docs that have a valid collection (collectionId >= 1)
// This covers: collectionId = -1, collectionId = 0, or field doesn't exist (old docs)
unsortedBuilder.add(LongPoint.newRangeQuery(DocFields.COLLECTION_ID, 1L, Long.MAX_VALUE), BooleanClause.Occur.MUST_NOT);

query = unsortedBuilder.build();
break;
default:
break;
}
if (query != null) {
boolQueryBuilder.add(query, BooleanClause.Occur.MUST);
}
}
}

for (AdvancedSearch advancedSearch : completeSearch.advancedSearches) {
if (CollectionUtils.isEmpty(advancedSearch.words)) {
continue;
}
var advancedSearchQueryBuilder = new BooleanQuery.Builder();
for (String word : advancedSearch.words) {
var query = new WildcardQuery(new Term(advancedSearch.docField, "*" + word + "*"));
advancedSearchQueryBuilder.add(query, BooleanClause.Occur.SHOULD);
for (AdvancedSearch advancedSearch : completeSearch.advancedSearches) {
if (CollectionUtils.isEmpty(advancedSearch.words)) {
continue;
}
var advancedSearchQueryBuilder = new BooleanQuery.Builder();
for (String word : advancedSearch.words) {
var query = new WildcardQuery(new Term(advancedSearch.docField, "*" + word + "*"));
advancedSearchQueryBuilder.add(query, BooleanClause.Occur.SHOULD);
}
boolQueryBuilder.add(advancedSearchQueryBuilder.build(), BooleanClause.Occur.MUST);
}
boolQueryBuilder.add(advancedSearchQueryBuilder.build(), BooleanClause.Occur.MUST);
}

// Filter by collection IDs if specified
if (!CollectionUtils.isEmpty(completeSearch.getCollectionIds())) {
var collectionQueryBuilder = new BooleanQuery.Builder();
for (Long collectionId : completeSearch.getCollectionIds()) {
Query collectionQuery = LongPoint.newExactQuery(DocFields.COLLECTION_ID, collectionId);
collectionQueryBuilder.add(collectionQuery, BooleanClause.Occur.SHOULD);
// Filter by collection IDs if specified
if (!CollectionUtils.isEmpty(completeSearch.getCollectionIds())) {
var collectionQueryBuilder = new BooleanQuery.Builder();
for (Long collectionId : completeSearch.getCollectionIds()) {
Query collectionQuery = LongPoint.newExactQuery(DocFields.COLLECTION_ID, collectionId);
collectionQueryBuilder.add(collectionQuery, BooleanClause.Occur.SHOULD);
}
boolQueryBuilder.add(collectionQueryBuilder.build(), BooleanClause.Occur.MUST);
}
boolQueryBuilder.add(collectionQueryBuilder.build(), BooleanClause.Occur.MUST);
}

for (String word : words) {
var wordQueryBuilder = new BooleanQuery.Builder();
for (FieldQueryInfo field : fields) {
Query query;
if (field.isWildcard()) {
query = new WildcardQuery(new Term(field.getName(), "*" + word + "*"));
} else {
query = new TermQuery(new Term(field.getName(), word));
for (String word : words) {
var wordQueryBuilder = new BooleanQuery.Builder();
for (FieldQueryInfo field : fields) {
Query query;
if (field.isWildcard()) {
query = new WildcardQuery(new Term(field.getName(), "*" + word + "*"));
} else {
query = new TermQuery(new Term(field.getName(), word));
}
BoostQuery boosted = new BoostQuery(query, field.getBoost());
wordQueryBuilder.add(boosted, BooleanClause.Occur.SHOULD);
}
BoostQuery boosted = new BoostQuery(query, field.getBoost());
wordQueryBuilder.add(boosted, BooleanClause.Occur.SHOULD);
boolQueryBuilder.add(wordQueryBuilder.build(), BooleanClause.Occur.MUST);
}
boolQueryBuilder.add(wordQueryBuilder.build(), BooleanClause.Occur.MUST);
}
StopWatch sw = new StopWatch();
sw.start();
var page = ObjectUtils.defaultIfNull(searchQuery.getPage(), 1);
var size = PageSizeUtils.getPageSize(searchQuery.getSize(), 100);
var maxPage = 10000;
int startIndex = (page - 1) * size;
TopScoreDocCollector collector = TopScoreDocCollector.create(page * size, maxPage);
searcher.search(boolQueryBuilder.build(), collector);
TopDocs docs = collector.topDocs(startIndex, size);
if (docs.totalHits.value > 0) {
var hits = docs.scoreDocs;
for (ScoreDoc hit : hits) {
var doc = searcher.doc(hit.doc);
PageItem item = docToPageItem(doc);
pageItems.add(item);
StopWatch sw = new StopWatch();
sw.start();
var page = ObjectUtils.defaultIfNull(searchQuery.getPage(), 1);
var size = PageSizeUtils.getPageSize(searchQuery.getSize(), 100);
var maxPage = 10000;
int startIndex = (page - 1) * size;
TopScoreDocCollector collector = TopScoreDocCollector.create(page * size, maxPage);
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If a request passes page <= 0, then startIndex = (page - 1) * size and TopScoreDocCollector.create(page * size, ...) can go negative/zero and Lucene may throw at query time. Consider clamping page to at least 1 before using it in these calculations.

Severity: medium

Fix This in Augment

🤖 Was this useful? React with 👍 or 👎, or 🚀 if it prevented an incident/outage.

searcher.search(boolQueryBuilder.build(), collector);
TopDocs docs = collector.topDocs(startIndex, size);
if (docs.totalHits.value > 0) {
var hits = docs.scoreDocs;
for (ScoreDoc hit : hits) {
var doc = searcher.doc(hit.doc);
PageItem item = docToPageItem(doc);
pageItems.add(item);
}
}
sw.stop();
searchResult.setPage(page);
searchResult.setTotalHits(docs.totalHits.value);
searchResult.setCostSeconds(sw.getTotalTimeSeconds());
}
sw.stop();
searchResult.setPage(page);
searchResult.setTotalHits(docs.totalHits.value);
searchResult.setCostSeconds(sw.getTotalTimeSeconds());
}
} catch (IOException e) {
throw new RuntimeException(e);
Expand Down
Loading
Loading