diff --git a/CHANGES b/CHANGES index e8dbb535fd..b199135ccb 100644 --- a/CHANGES +++ b/CHANGES @@ -29,6 +29,8 @@ Release 1.17.1 [PENDING] * Bugfix: in W3CDom, if the jsoup input document contained an empty doctype, the conversion would fail with a DOMException. Now, said doctype is discarded, and the conversion continues. + * Build Improvement: added a local test proxy implementation, for proxy integration tests. + Release 1.16.2 [20-Oct-2023] * Improvement: optimized the performance of complex CSS selectors, by adding a cost-based query planner. Evaluators are sorted by their relative execution cost, and executed in order of lower to higher cost. This speeds the diff --git a/src/test/java/org/jsoup/integration/ConnectTest.java b/src/test/java/org/jsoup/integration/ConnectTest.java index f02840a0cb..1043489720 100644 --- a/src/test/java/org/jsoup/integration/ConnectTest.java +++ b/src/test/java/org/jsoup/integration/ConnectTest.java @@ -78,7 +78,7 @@ public void exceptOnUnsupportedProtocol() { assertTrue(threw); } - private static String ihVal(String key, Document doc) { + static String ihVal(String key, Document doc) { final Element first = doc.select("th:contains(" + key + ") + td").first(); return first != null ? first.text() : null; } @@ -403,7 +403,7 @@ public void multiCookieSet() throws IOException { @Test public void supportsDeflate() throws IOException { - Connection.Response res = Jsoup.connect(Deflateservlet.Url).execute(); + Connection.Response res = Jsoup.connect(DeflateServlet.Url).execute(); assertEquals("deflate", res.header("Content-Encoding")); Document doc = res.parse(); diff --git a/src/test/java/org/jsoup/integration/ProxyTest.java b/src/test/java/org/jsoup/integration/ProxyTest.java new file mode 100644 index 0000000000..2650bf48f9 --- /dev/null +++ b/src/test/java/org/jsoup/integration/ProxyTest.java @@ -0,0 +1,75 @@ +package org.jsoup.integration; + +import org.jsoup.Connection; +import org.jsoup.Jsoup; +import org.jsoup.integration.servlets.EchoServlet; +import org.jsoup.integration.servlets.FileServlet; +import org.jsoup.integration.servlets.HelloServlet; +import org.jsoup.integration.servlets.ProxyServlet; +import org.jsoup.integration.servlets.RedirectServlet; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.Test; + +import java.io.IOException; + +import static org.jsoup.integration.ConnectTest.ihVal; +import static org.junit.jupiter.api.Assertions.assertEquals; + +/** + Tests Jsoup.connect proxy support + */ +public class ProxyTest { + private static String echoUrl; + private static TestServer.ProxySettings proxy; + + @BeforeAll + public static void setUp() { + echoUrl = EchoServlet.Url; + proxy = ProxyServlet.ProxySettings; + } + + @Test void fetchViaProxy() throws IOException { + Connection con = Jsoup.connect(HelloServlet.Url) + .proxy(proxy.hostname, proxy.port); + + Connection.Response res = con.execute(); + assertVia(res); + + Document doc = res.parse(); + Element p = doc.expectFirst("p"); + assertEquals("Hello, World!", p.text()); + } + + private static void assertVia(Connection.Response res) { + assertEquals(res.header("Via"), ProxyServlet.Via); + } + + @Test void redirectViaProxy() throws IOException { + Connection.Response res = Jsoup + .connect(RedirectServlet.Url) + .data(RedirectServlet.LocationParam, echoUrl) + .header("Random-Header-name", "hello") + .proxy(proxy.hostname, proxy.port) + .execute(); + + assertVia(res); + Document doc = res.parse(); + assertEquals(echoUrl, doc.location()); + assertEquals("hello", ihVal("Random-Header-name", doc)); + assertVia(res); + } + + @Test void proxyForSession() throws IOException { + Connection session = Jsoup.newSession().proxy(proxy.hostname, proxy.port); + + Connection.Response medRes = session.newRequest().url(FileServlet.urlTo("/htmltests/medium.html")).execute(); + Connection.Response largeRes = session.newRequest().url(FileServlet.urlTo("/htmltests/large.html")).execute(); + + assertVia(medRes); + assertVia(largeRes); + assertEquals("Medium HTML", medRes.parse().title()); + assertEquals("Large HTML", largeRes.parse().title()); + } +} diff --git a/src/test/java/org/jsoup/integration/TestServer.java b/src/test/java/org/jsoup/integration/TestServer.java index fa370d1524..3284c2aca6 100644 --- a/src/test/java/org/jsoup/integration/TestServer.java +++ b/src/test/java/org/jsoup/integration/TestServer.java @@ -4,15 +4,28 @@ import org.eclipse.jetty.server.ServerConnector; import org.eclipse.jetty.servlet.ServletHandler; import org.jsoup.integration.servlets.BaseServlet; +import org.jsoup.integration.servlets.ProxyServlet; import java.net.InetSocketAddress; public class TestServer { - private static final Server jetty = new Server(new InetSocketAddress("localhost", 0)); + private static final String localhost = "localhost"; + private static final Server jetty = newServer(); private static final ServletHandler handler = new ServletHandler(); + static int port; + + private static final Server proxy = newServer(); + private static final ServletHandler proxyHandler = new ServletHandler(); + private static final ProxySettings proxySettings = new ProxySettings(); + + private static Server newServer() { + return new Server(new InetSocketAddress(localhost, 0)); + } static { jetty.setHandler(handler); + proxy.setHandler(proxyHandler); + proxyHandler.addServletWithMapping(ProxyServlet.class, "/*"); } private TestServer() { @@ -20,8 +33,14 @@ private TestServer() { public static void start() { synchronized (jetty) { + if (jetty.isStarted()) return; + try { jetty.start(); // jetty will safely no-op a start on an already running instance + port = ((ServerConnector) jetty.getConnectors()[0]).getLocalPort(); + + proxy.start(); + proxySettings.port = ((ServerConnector) proxy.getConnectors()[0]).getLocalPort(); } catch (Exception e) { throw new IllegalStateException(e); } @@ -35,8 +54,22 @@ public static String map(Class servletClass) { String path = "/" + servletClass.getSimpleName(); handler.addServletWithMapping(servletClass, path + "/*"); - int port = ((ServerConnector) jetty.getConnectors()[0]).getLocalPort(); - return "http://localhost:" + port + path; + return "http://" + localhost + ":" + port + path; + } + } + + public static ProxySettings proxySettings(Class servletClass) { + synchronized (jetty) { + if (!jetty.isStarted()) + start(); // if running out of the test cases + + return proxySettings; } } + + //public static String proxy + public static class ProxySettings { + final String hostname = localhost; + int port; + } } diff --git a/src/test/java/org/jsoup/integration/servlets/BaseServlet.java b/src/test/java/org/jsoup/integration/servlets/BaseServlet.java index 46d8e477e0..57acca8fe7 100644 --- a/src/test/java/org/jsoup/integration/servlets/BaseServlet.java +++ b/src/test/java/org/jsoup/integration/servlets/BaseServlet.java @@ -9,24 +9,25 @@ public abstract class BaseServlet extends HttpServlet { static final String TextHtml = "text/html; charset=UTF-8"; - // these are overridden just to get the response name to be 'res' not 'resp' + abstract protected void doIt(HttpServletRequest req, HttpServletResponse res) throws IOException, ServletException; + @Override - protected void doGet(HttpServletRequest req, HttpServletResponse res) throws ServletException, IOException { - super.doGet(req, res); + protected void doGet(HttpServletRequest req, HttpServletResponse res) throws IOException, ServletException { + doIt(req, res); } @Override - protected void doPost(HttpServletRequest req, HttpServletResponse res) throws ServletException, IOException { - super.doPost(req, res); + protected void doPost(HttpServletRequest req, HttpServletResponse res) throws IOException, ServletException { + doIt(req, res); } @Override - protected void doPut(HttpServletRequest req, HttpServletResponse res) throws ServletException, IOException { - super.doPut(req, res); + protected void doPut(HttpServletRequest req, HttpServletResponse res) throws IOException, ServletException { + doIt(req, res); } @Override - protected void doDelete(HttpServletRequest req, HttpServletResponse res) throws ServletException, IOException { - super.doPut(req, res); + protected void doDelete(HttpServletRequest req, HttpServletResponse res) throws IOException, ServletException { + doIt(req, res); } } diff --git a/src/test/java/org/jsoup/integration/servlets/CookieServlet.java b/src/test/java/org/jsoup/integration/servlets/CookieServlet.java index 2249f971d8..fefe3fe08e 100644 --- a/src/test/java/org/jsoup/integration/servlets/CookieServlet.java +++ b/src/test/java/org/jsoup/integration/servlets/CookieServlet.java @@ -2,35 +2,19 @@ import org.jsoup.integration.TestServer; -import javax.servlet.ServletException; import javax.servlet.http.Cookie; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; import java.io.IOException; import java.io.PrintWriter; -public class CookieServlet extends BaseServlet{ +public class CookieServlet extends BaseServlet { public static final String Url = TestServer.map(CookieServlet.class); public static final String SetCookiesParam = "setCookies"; public static final String LocationParam = "loc"; - @Override - protected void doGet(HttpServletRequest req, HttpServletResponse res) throws ServletException, IOException { - doIt(req, res); - } - - @Override - protected void doPost(HttpServletRequest req, HttpServletResponse res) throws ServletException, IOException { - doIt(req, res); - } - - @Override - protected void doPut(HttpServletRequest req, HttpServletResponse res) throws ServletException, IOException { - doIt(req, res); - } - - private void doIt(HttpServletRequest req, HttpServletResponse res) throws IOException { + protected void doIt(HttpServletRequest req, HttpServletResponse res) throws IOException { // Do we want to set cookies? if (req.getParameter(SetCookiesParam) != null) setCookies(res); diff --git a/src/test/java/org/jsoup/integration/servlets/Deflateservlet.java b/src/test/java/org/jsoup/integration/servlets/DeflateServlet.java similarity index 83% rename from src/test/java/org/jsoup/integration/servlets/Deflateservlet.java rename to src/test/java/org/jsoup/integration/servlets/DeflateServlet.java index 13af91624e..bf321c19c4 100644 --- a/src/test/java/org/jsoup/integration/servlets/Deflateservlet.java +++ b/src/test/java/org/jsoup/integration/servlets/DeflateServlet.java @@ -9,11 +9,11 @@ import java.util.zip.Deflater; import java.util.zip.DeflaterOutputStream; -public class Deflateservlet extends BaseServlet { - public static final String Url = TestServer.map(Deflateservlet.class); +public class DeflateServlet extends BaseServlet { + public static final String Url = TestServer.map(DeflateServlet.class); @Override - protected void doGet(HttpServletRequest req, HttpServletResponse res) throws IOException { + protected void doIt(HttpServletRequest req, HttpServletResponse res) throws IOException { res.setContentType(TextHtml); res.setStatus(HttpServletResponse.SC_OK); res.setHeader("Content-Encoding", "deflate"); diff --git a/src/test/java/org/jsoup/integration/servlets/EchoServlet.java b/src/test/java/org/jsoup/integration/servlets/EchoServlet.java index c3a9c44a3f..0039c0420b 100644 --- a/src/test/java/org/jsoup/integration/servlets/EchoServlet.java +++ b/src/test/java/org/jsoup/integration/servlets/EchoServlet.java @@ -26,26 +26,7 @@ public class EchoServlet extends BaseServlet { private static final int DefaultCode = HttpServletResponse.SC_OK; @Override - protected void doGet(HttpServletRequest req, HttpServletResponse res) throws ServletException, IOException { - doIt(req, res); - } - - @Override - protected void doPost(HttpServletRequest req, HttpServletResponse res) throws ServletException, IOException { - doIt(req, res); - } - - @Override - protected void doPut(HttpServletRequest req, HttpServletResponse res) throws ServletException, IOException { - doIt(req, res); - } - - @Override - protected void doDelete(HttpServletRequest req, HttpServletResponse res) throws ServletException, IOException { - doIt(req, res); - } - - private void doIt(HttpServletRequest req, HttpServletResponse res) throws IOException, ServletException { + protected void doIt(HttpServletRequest req, HttpServletResponse res) throws IOException, ServletException { int intCode = DefaultCode; String code = req.getHeader(CodeParam); if (code != null) diff --git a/src/test/java/org/jsoup/integration/servlets/FileServlet.java b/src/test/java/org/jsoup/integration/servlets/FileServlet.java index 7447e8c777..db68e18fc1 100644 --- a/src/test/java/org/jsoup/integration/servlets/FileServlet.java +++ b/src/test/java/org/jsoup/integration/servlets/FileServlet.java @@ -16,7 +16,7 @@ public class FileServlet extends BaseServlet { public static final String DefaultType = "text/html"; @Override - protected void doGet(HttpServletRequest req, HttpServletResponse res) throws IOException { + protected void doIt(HttpServletRequest req, HttpServletResponse res) throws IOException { String contentType = req.getParameter(ContentTypeParam); if (contentType == null) contentType = DefaultType; @@ -33,16 +33,11 @@ protected void doGet(HttpServletRequest req, HttpServletResponse res) throws IOE Files.copy(file.toPath(), out); out.flush(); } else { - res.setStatus(HttpServletResponse.SC_NOT_FOUND); + res.sendError(HttpServletResponse.SC_NOT_FOUND); } } public static String urlTo(String path) { return Url + path; } - - @Override - protected void doPost(HttpServletRequest req, HttpServletResponse res) throws IOException { - doGet(req, res); - } } diff --git a/src/test/java/org/jsoup/integration/servlets/HelloServlet.java b/src/test/java/org/jsoup/integration/servlets/HelloServlet.java index 7a7924c775..4c9f380287 100644 --- a/src/test/java/org/jsoup/integration/servlets/HelloServlet.java +++ b/src/test/java/org/jsoup/integration/servlets/HelloServlet.java @@ -10,7 +10,7 @@ public class HelloServlet extends BaseServlet { public static final String Url = TestServer.map(HelloServlet.class); @Override - protected void doGet(HttpServletRequest req, HttpServletResponse res) throws IOException { + protected void doIt(HttpServletRequest req, HttpServletResponse res) throws IOException { res.setContentType(TextHtml); res.setStatus(HttpServletResponse.SC_OK); diff --git a/src/test/java/org/jsoup/integration/servlets/InterruptedServlet.java b/src/test/java/org/jsoup/integration/servlets/InterruptedServlet.java index 22180e13d5..67554c5a3a 100644 --- a/src/test/java/org/jsoup/integration/servlets/InterruptedServlet.java +++ b/src/test/java/org/jsoup/integration/servlets/InterruptedServlet.java @@ -12,9 +12,8 @@ public class InterruptedServlet extends BaseServlet { public static final String Magnitude = "magnitude"; public static final String Larger = "larger"; - @Override - protected void doGet(HttpServletRequest req, HttpServletResponse res) throws IOException { + protected void doIt(HttpServletRequest req, HttpServletResponse res) throws IOException { String magnitude = req.getParameter(Magnitude); magnitude = magnitude == null ? "" : magnitude; res.setContentType(TextHtml); diff --git a/src/test/java/org/jsoup/integration/servlets/ProxyServlet.java b/src/test/java/org/jsoup/integration/servlets/ProxyServlet.java new file mode 100644 index 0000000000..0c9e30e55b --- /dev/null +++ b/src/test/java/org/jsoup/integration/servlets/ProxyServlet.java @@ -0,0 +1,74 @@ +package org.jsoup.integration.servlets; + +import org.jsoup.Connection; +import org.jsoup.Jsoup; +import org.jsoup.integration.TestServer; + +import javax.servlet.ServletException; +import javax.servlet.ServletOutputStream; +import javax.servlet.http.HttpServletRequest; +import javax.servlet.http.HttpServletResponse; +import java.io.BufferedInputStream; +import java.io.IOException; +import java.util.Enumeration; +import java.util.Iterator; +import java.util.List; +import java.util.Map; + +public class ProxyServlet extends BaseServlet{ + public static TestServer.ProxySettings ProxySettings = TestServer.proxySettings(ProxyServlet.class); + public static String Via = "1.1 jsoup test proxy"; + + @Override + protected void doIt(HttpServletRequest req, HttpServletResponse res) throws IOException, ServletException { + StringBuffer urlBuf = req.getRequestURL(); + if (req.getQueryString() != null) { + urlBuf.append('?').append(req.getQueryString()); + } + String url = urlBuf.toString(); + //log("Proxying URL: " + url); + + Connection.Method method = Enum.valueOf(Connection.Method.class, req.getMethod()); + Connection fetch = Jsoup.connect(url) + .method(method) + .followRedirects(false) + .ignoreHttpErrors(true); + + // request headers + Enumeration headerNames = req.getHeaderNames(); + while (headerNames.hasMoreElements()) { + String name = headerNames.nextElement(); + Enumeration values = req.getHeaders(name); + while (values.hasMoreElements()) { + String value = values.nextElement(); + //System.out.println("Header: " + name + " = " + value); + fetch.header(name, value); // todo - this invocation will replace existing header, not add + } + } + + // execute + Connection.Response fetchRes = fetch.execute(); + res.setStatus(fetchRes.statusCode()); + + // write the response headers + res.addHeader("Via", Via); + for (Map.Entry> entry : fetchRes.multiHeaders().entrySet()) { + String header = entry.getKey(); + for (String value : entry.getValue()) { + res.addHeader(header,value); + } + } + + // write the body + ServletOutputStream outputStream = res.getOutputStream(); + BufferedInputStream inputStream = fetchRes.bodyStream(); + byte[] buffer = new byte[1024]; + int bytesRead; + while ((bytesRead = inputStream.read(buffer)) != -1) { + outputStream.write(buffer, 0, bytesRead); + } + + outputStream.close(); + inputStream.close(); + } +} diff --git a/src/test/java/org/jsoup/integration/servlets/RedirectServlet.java b/src/test/java/org/jsoup/integration/servlets/RedirectServlet.java index 5ab52a0bd5..1f90c2f999 100644 --- a/src/test/java/org/jsoup/integration/servlets/RedirectServlet.java +++ b/src/test/java/org/jsoup/integration/servlets/RedirectServlet.java @@ -5,6 +5,7 @@ import javax.servlet.http.Cookie; import javax.servlet.http.HttpServletRequest; import javax.servlet.http.HttpServletResponse; +import java.io.IOException; public class RedirectServlet extends BaseServlet { public static final String Url = TestServer.map(RedirectServlet.class); @@ -14,7 +15,7 @@ public class RedirectServlet extends BaseServlet { private static final int DefaultCode = HttpServletResponse.SC_MOVED_TEMPORARILY; @Override - protected void doGet(HttpServletRequest req, HttpServletResponse res) { + protected void doIt(HttpServletRequest req, HttpServletResponse res) throws IOException { String location = req.getParameter(LocationParam); if (location == null) location = ""; @@ -31,10 +32,7 @@ protected void doGet(HttpServletRequest req, HttpServletResponse res) { res.setHeader("Location", location); res.setStatus(intCode); + res.flushBuffer(); } - @Override - protected void doPost(HttpServletRequest req, HttpServletResponse res) { - doGet(req, res); - } } diff --git a/src/test/java/org/jsoup/integration/servlets/SlowRider.java b/src/test/java/org/jsoup/integration/servlets/SlowRider.java index e8db460073..84e876e5a1 100644 --- a/src/test/java/org/jsoup/integration/servlets/SlowRider.java +++ b/src/test/java/org/jsoup/integration/servlets/SlowRider.java @@ -16,7 +16,7 @@ public class SlowRider extends BaseServlet { public static final String MaxTimeParam = "maxTime"; @Override - protected void doGet(HttpServletRequest req, HttpServletResponse res) throws IOException { + protected void doIt(HttpServletRequest req, HttpServletResponse res) throws IOException { pause(1000); res.setContentType(TextHtml); res.setStatus(HttpServletResponse.SC_OK);