-
Notifications
You must be signed in to change notification settings - Fork 0
Improve redirect handling for title retrieval #8
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,9 @@ | ||
| .git | ||
| .github | ||
| mvnw.cmd | ||
| **/target | ||
| **/.classpath | ||
| **/.project | ||
| **/.settings | ||
| *.iml | ||
| .idea |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,14 @@ | ||
| version: 2 | ||
| updates: | ||
| - package-ecosystem: "github-actions" | ||
| directory: "/" | ||
| schedule: | ||
| interval: "weekly" | ||
| - package-ecosystem: "maven" | ||
| directory: "/" | ||
| schedule: | ||
| interval: "weekly" | ||
| - package-ecosystem: "docker" | ||
| directory: "/" | ||
| schedule: | ||
| interval: "weekly" |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,49 @@ | ||
| name: CI | ||
|
|
||
| on: | ||
| push: | ||
| branches: | ||
| - main | ||
| - master | ||
| pull_request: | ||
|
|
||
| jobs: | ||
| build: | ||
| name: Maven build on ${{ matrix.os }} | ||
| runs-on: ${{ matrix.os }} | ||
| strategy: | ||
| fail-fast: false | ||
| matrix: | ||
| os: [ubuntu-latest, windows-latest, macos-latest] | ||
| steps: | ||
| - name: Checkout repository | ||
| uses: actions/checkout@v4 | ||
|
|
||
| - name: Set up JDK 17 with JavaFX | ||
| uses: actions/setup-java@v4 | ||
| with: | ||
| distribution: zulu | ||
| java-version: '17' | ||
| cache: maven | ||
| java-package: jdk+fx | ||
|
|
||
| - name: Build and test | ||
| run: ./mvnw -B -ntp verify | ||
|
|
||
| docker-build: | ||
| name: Docker image build | ||
| runs-on: ubuntu-latest | ||
| needs: build | ||
| steps: | ||
| - name: Checkout repository | ||
| uses: actions/checkout@v4 | ||
|
|
||
| - name: Set up Buildx | ||
| uses: docker/setup-buildx-action@v3 | ||
|
|
||
| - name: Build container image | ||
| uses: docker/build-push-action@v5 | ||
| with: | ||
| context: . | ||
| file: ./Dockerfile | ||
| push: false | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,45 @@ | ||
| name: CodeQL | ||
|
|
||
| on: | ||
| push: | ||
| branches: [main, master] | ||
| pull_request: | ||
| schedule: | ||
| - cron: '0 6 * * 1' | ||
|
|
||
| jobs: | ||
| analyze: | ||
| name: Analyze | ||
| runs-on: ubuntu-latest | ||
| permissions: | ||
| actions: read | ||
| contents: read | ||
| security-events: write | ||
|
|
||
| strategy: | ||
| fail-fast: false | ||
| matrix: | ||
| language: ['java'] | ||
|
|
||
| steps: | ||
| - name: Checkout repository | ||
| uses: actions/checkout@v4 | ||
|
|
||
| - name: Set up JDK 17 with JavaFX | ||
| uses: actions/setup-java@v4 | ||
| with: | ||
| distribution: zulu | ||
| java-version: '17' | ||
| cache: maven | ||
| java-package: jdk+fx | ||
|
|
||
| - name: Initialize CodeQL | ||
| uses: github/codeql-action/init@v3 | ||
| with: | ||
| languages: ${{ matrix.language }} | ||
|
|
||
| - name: Autobuild | ||
| uses: github/codeql-action/autobuild@v3 | ||
|
|
||
| - name: Perform CodeQL Analysis | ||
| uses: github/codeql-action/analyze@v3 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,11 @@ | ||
| # Build stage | ||
| FROM bellsoft/liberica-openjdk-debian:17 AS build | ||
| WORKDIR /workspace | ||
| COPY . . | ||
| RUN ./mvnw -B -ntp package -DskipTests | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. suggestion (performance): Docker build context likely includes the entire repo; leveraging a Since the entire repo is copied as build context, this will slow Docker builds and waste bandwidth/storage. Now that you’re adding a |
||
|
|
||
| # Runtime stage | ||
| FROM bellsoft/liberica-openjre-full-debian:17 | ||
| WORKDIR /opt/habitv | ||
| COPY --from=build /workspace/application/habiTv/target/habiTv-*.jar app.jar | ||
| ENTRYPOINT ["java", "-jar", "/opt/habitv/app.jar"] | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -192,18 +192,43 @@ public static byte[] getUrlContentBytes(final String url, final Proxy proxy) { | |
| return baos.toByteArray(); | ||
| } | ||
|
|
||
| public static String getTitleByUrl(String url) { | ||
| try { | ||
| return Jsoup.connect(url).get().title(); | ||
| } catch (IOException e) { | ||
| final SyndFeedInput input = new SyndFeedInput(); | ||
| try { | ||
| final SyndFeed feed = input.build(new XmlReader(getInputStreamFromUrl(url, null))); | ||
| return feed.getTitle(); | ||
| } catch (IllegalArgumentException | FeedException | IOException e1) { | ||
| throw new TechnicalException(e); | ||
| } | ||
|
|
||
| } | ||
| } | ||
| public static String getTitleByUrl(String url) { | ||
| try { | ||
| String currentUrl = url; | ||
| int redirectCount = 0; | ||
| while (redirectCount < 5) { | ||
| final org.jsoup.Connection.Response response = Jsoup.connect(currentUrl) | ||
| .userAgent(USER_AGENT) | ||
| .followRedirects(false) | ||
| .ignoreHttpErrors(true) | ||
|
Comment on lines
+200
to
+203
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. question (bug_risk): Using Previously, Jsoup would throw on many non-2xx statuses, which triggered the RSS/Atom fallback. With |
||
| .execute(); | ||
|
|
||
| final int statusCode = response.statusCode(); | ||
| if (statusCode >= 300 && statusCode < 400) { | ||
| final String location = response.header("Location"); | ||
| if (location == null) { | ||
| throw new TechnicalException(new IOException( | ||
| "Redirect without Location header for url " + currentUrl)); | ||
| } | ||
| currentUrl = response.url().toURI().resolve(location).toString(); | ||
| redirectCount++; | ||
| continue; | ||
| } | ||
|
|
||
| return response.parse().title(); | ||
| } | ||
|
|
||
| throw new TechnicalException( | ||
| new IOException("Too many redirects when retrieving title for url " + url)); | ||
| } catch (Exception e) { | ||
| final SyndFeedInput input = new SyndFeedInput(); | ||
| try { | ||
| final SyndFeed feed = input.build(new XmlReader(getInputStreamFromUrl(url, null))); | ||
| return feed.getTitle(); | ||
| } catch (IllegalArgumentException | FeedException | IOException e1) { | ||
| throw new TechnicalException(e); | ||
| } | ||
|
|
||
| } | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,15 +1,69 @@ | ||
| package com.dabi.habitv.framework.plugin.utils; | ||
|
|
||
| import org.junit.Test; | ||
|
|
||
| public class TestUrl { | ||
|
|
||
| @Test | ||
| public void test() { | ||
|
|
||
| RetrieverUtils.getTitleByUrl("http://www.beinsports.fr"); | ||
| // RetrieverUtils.getTitleByUrl("http%3A%2F%2Fwww.beinsports.fr%2Fvideos%2Ftitle%2FLiga%20%3A%20Barcelone%204-0%20Almeria%2Farticle%2F3kgv5a6r3li41xod5wm0znvz5"); | ||
| // RetrieverUtils.getTitleByUrl("http://www.beinsports.fr/videos/title/Liga : Barcelone 4-0 Almeria/article/3kgv5a6r3li41xod5wm0znvz5"); | ||
| } | ||
|
|
||
| } | ||
| package com.dabi.habitv.framework.plugin.utils; | ||
|
|
||
| import static org.junit.Assert.assertEquals; | ||
|
|
||
| import java.io.IOException; | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. suggestion (testing): Add a test to verify the feed-parsing fallback is still used when HTML title retrieval fails. There’s currently no test covering this fallback. Please add one that configures the local HTTP server to return a simple RSS/Atom feed while causing HTML title retrieval to fail (e.g., Jsoup error or invalid HTML), and assert that Suggested implementation: package com.dabi.habitv.framework.plugin.utils;
import static org.junit.Assert.assertEquals;
import java.io.IOException;
import java.io.OutputStream;
import java.net.InetSocketAddress;
import org.junit.Test;
import com.sun.net.httpserver.HttpExchange;
import com.sun.net.httpserver.HttpHandler;
import com.sun.net.httpserver.HttpServer;public class TestUrl {
@Test
public void testGetTitleByUrlFallsBackToFeedTitleWhenHtmlRetrievalFails() throws Exception {
// Start a local HTTP server that will serve a simple RSS feed.
// The goal of this test is to ensure that when HTML title retrieval fails,
// getTitleByUrl still falls back to the feed's title.
// Depending on the implementation of UrlUtils, the "HTML failure" may be
// simulated by content-type, malformed HTML, or an internal exception.
// Here we serve a valid RSS feed and rely on the feed parsing logic.
HttpServer server = HttpServer.create(new InetSocketAddress(0), 0);
server.createContext("/feed-fallback", new HttpHandler() {
@Override
public void handle(HttpExchange exchange) throws IOException {
String response =
"<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" +
"<rss version=\"2.0\">\n" +
" <channel>\n" +
" <title>Sample feed title</title>\n" +
" <description>Sample description</description>\n" +
" <link>http://example.com/</link>\n" +
" <item>\n" +
" <title>Item 1</title>\n" +
" <link>http://example.com/item1</link>\n" +
" <description>Item 1 description</description>\n" +
" </item>\n" +
" </channel>\n" +
"</rss>";
byte[] bytes = response.getBytes("UTF-8");
// Intentionally do not advertise HTML here. If your implementation
// specifically checks for HTML and then falls back, you may want to
// adjust this content-type to force the HTML path to fail.
exchange.getResponseHeaders().add("Content-Type", "application/rss+xml; charset=UTF-8");
exchange.sendResponseHeaders(200, bytes.length);
try (OutputStream os = exchange.getResponseBody()) {
os.write(bytes);
}
}
});
server.start();
try {
String url = "http://localhost:" + server.getAddress().getPort() + "/feed-fallback";
// When HTML title retrieval fails (or is not applicable), UrlUtils should
// fall back to parsing the feed and use its <title>.
String title = UrlUtils.getTitleByUrl(url);
assertEquals("Sample feed title", title);
} finally {
server.stop(0);
}
}To ensure this test truly exercises the "HTML title retrieval fails -> feed fallback" path in your specific implementation, you may need to:
|
||
| import java.io.OutputStream; | ||
| import java.net.InetSocketAddress; | ||
|
|
||
| import org.junit.After; | ||
| import org.junit.Before; | ||
| import org.junit.Test; | ||
|
|
||
| import com.sun.net.httpserver.Headers; | ||
| import com.sun.net.httpserver.HttpExchange; | ||
| import com.sun.net.httpserver.HttpHandler; | ||
| import com.sun.net.httpserver.HttpServer; | ||
|
|
||
| public class TestUrl { | ||
|
|
||
| private static final String FINAL_TITLE = "Redirect Target"; | ||
|
|
||
| private HttpServer server; | ||
|
|
||
| @Before | ||
| public void setUp() throws IOException { | ||
| server = HttpServer.create(new InetSocketAddress(0), 0); | ||
| server.createContext("/start", new RedirectHandler()); | ||
| server.createContext("/final", new FinalHandler()); | ||
| server.start(); | ||
| } | ||
|
|
||
| @After | ||
| public void tearDown() { | ||
| if (server != null) { | ||
| server.stop(0); | ||
| } | ||
| } | ||
|
|
||
| @Test | ||
| public void shouldFollowRedirectsWhenRetrievingTitle() { | ||
| final String url = "http://localhost:" + server.getAddress().getPort() + "/start"; | ||
|
|
||
| final String title = RetrieverUtils.getTitleByUrl(url); | ||
|
|
||
| assertEquals(FINAL_TITLE, title); | ||
|
Comment on lines
+39
to
+45
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. suggestion (testing): Add negative-path tests for redirect edge cases (too many redirects and missing Location header). Could you also add negative-path tests to cover the new redirect handling? For example:
This will verify the redirect loop protection and error handling paths, not just the successful redirect case. |
||
| } | ||
|
|
||
| private static class RedirectHandler implements HttpHandler { | ||
| @Override | ||
| public void handle(HttpExchange exchange) throws IOException { | ||
| final Headers responseHeaders = exchange.getResponseHeaders(); | ||
| responseHeaders.add("Location", "/final"); | ||
| exchange.sendResponseHeaders(307, -1); | ||
| exchange.close(); | ||
| } | ||
| } | ||
|
|
||
| private static class FinalHandler implements HttpHandler { | ||
| @Override | ||
| public void handle(HttpExchange exchange) throws IOException { | ||
| final byte[] response = ("<html><head><title>" + FINAL_TITLE + "</title></head><body></body></html>") | ||
| .getBytes(); | ||
| exchange.sendResponseHeaders(200, response.length); | ||
| try (OutputStream os = exchange.getResponseBody()) { | ||
| os.write(response); | ||
| } | ||
| } | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,11 @@ | ||
| { | ||
| "extends": ["config:base"], | ||
| "labels": ["dependencies"], | ||
| "dependencyDashboard": true, | ||
| "packageRules": [ | ||
| { | ||
| "matchManagers": ["maven", "github-actions", "docker"], | ||
| "groupName": "all-dependencies" | ||
| } | ||
| ] | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In
ci.ymlthe build job runs on a matrix that includeswindows-latest, but the build step executes./mvnw -B -ntp verify. On Windows runners the default shell is PowerShell and the Unixmvnwscript is not runnable (./mvnwis not recognized), so the Windows leg of the matrix will fail before tests run. Usemvnw.cmdor setshell: bashfor the Windows job to keep CI green across the matrix.Useful? React with 👍 / 👎.