From c3d1c7b6541ddaca689334ff3b2d0719e27f473d Mon Sep 17 00:00:00 2001 From: Wiiplay123 Date: Tue, 25 Apr 2017 20:17:26 -0500 Subject: [PATCH 1/8] Fixed deviantArt ripping Uses the data-super-full-img attribute of thumbnails as first attempt to get a full image URL. If that doesn't work (as is the case with mature items), the JSON is used. thumbToFull is still broken in this commit, but shouldn't be needed. --- .../ripper/rippers/DeviantartRipper.java | 34 ++++++++++++++----- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java index b170c2e75..7dcd2747b 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java @@ -125,7 +125,7 @@ public List getURLsFromPage(Document page) { List imageURLs = new ArrayList(); // Iterate over all thumbnails - for (Element thumb : page.select("div.zones-container a.thumb")) { + for (Element thumb : page.select("div.zones-container span.thumb")) { if (isStopped()) { break; } @@ -133,15 +133,33 @@ public List getURLsFromPage(Document page) { if (img.attr("transparent").equals("false")) { continue; // a.thumbs to other albums are invisible } - // Get full-sized image via helper methods String fullSize = null; - try { - fullSize = thumbToFull(img.attr("src"), true); - } catch (Exception e) { - logger.info("Attempting to get full size image from " + thumb.attr("href")); - fullSize = smallToFull(img.attr("src"), thumb.attr("href")); - } + if (!thumb.attr("data-super-full-img").isEmpty()) { + fullSize = thumb.attr("data-super-full-img"); + } else { + String spanUrl = thumb.attr("href"); + // id = spanUrl.substring(spanUrl.lastIndexOf('-') + 1) + Elements js = page.select("script[type=\"text/javascript\"]"); + for (Element tag : js) { + if (tag.html().contains("window.__pageload")) { + String script = tag.html(); + script = script.substring(script.indexOf("window.__pageload")); + script = script.substring(script.indexOf(spanUrl.substring(spanUrl.lastIndexOf('-') + 1))); + script = script.substring(script.indexOf("},\"src\":\"") + 9,script.indexOf("\",\"type\"")); // first },"src":"url" after id + fullSize = script.replace("\\/","/"); + break; + } + } + if (fullSize == null) { + try { + fullSize = thumbToFull(img.attr("src"), true); + } catch (Exception e) { + logger.info("Attempting to get full size image from " + thumb.attr("href")); + fullSize = smallToFull(img.attr("src"), thumb.attr("href")); + } + } + } if (fullSize == null) { continue; } From 8da945a8fe555c8e9c4c584e2a943d77177976d1 Mon Sep 17 00:00:00 2001 From: Wiiplay123 Date: Tue, 25 Apr 2017 21:18:03 -0500 Subject: [PATCH 2/8] Fixed deviantArt description ripping again Forgot to change an "a" to a "span", fixed now. --- .../com/rarchives/ripme/ripper/rippers/DeviantartRipper.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java index 7dcd2747b..994efa7a9 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java @@ -180,9 +180,9 @@ public List getURLsFromPage(Document page) { @Override public List getDescriptionsFromPage(Document page) { List textURLs = new ArrayList(); - // Iterate over all thumbnails - for (Element thumb : page.select("div.zones-container a.thumb")) { + for (Element thumb : page.select("div.zones-container span.thumb")) { + logger.info(thumb.attr("href")); if (isStopped()) { break; } @@ -191,6 +191,7 @@ public List getDescriptionsFromPage(Document page) { continue; // a.thumbs to other albums are invisible } textURLs.add(thumb.attr("href")); + } return textURLs; } From f2afb840ddd56281782f56f9cd1e21a0ff2e1a7a Mon Sep 17 00:00:00 2001 From: Wiiplay123 Date: Wed, 26 Apr 2017 23:13:11 -0500 Subject: [PATCH 3/8] Fixed deviantArt description file name issue. The deviantArt description file name will now match the image file name, rather than the URL. --- .../ripme/ripper/AbstractHTMLRipper.java | 26 +++++---- .../ripper/rippers/DeviantartRipper.java | 54 ++++++++++++------- 2 files changed, 51 insertions(+), 29 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java b/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java index 35b2aa432..6111db79b 100644 --- a/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java @@ -53,7 +53,7 @@ public URL sanitizeURL(URL url) throws MalformedURLException { public boolean hasDescriptionSupport() { return false; } - public String getDescription(String page) throws IOException { + public String[] getDescription(String url,Document page) throws IOException { throw new IOException("getDescription not implemented"); // Do I do this or make an abstract function? } public int descSleepTime() { @@ -95,15 +95,16 @@ public void rip() throws IOException { logger.debug("Found description link(s) from " + doc.location()); for (String textURL : textURLs) { if (isStopped()) { + break; } textindex += 1; logger.debug("Getting description from " + textURL); sleep(descSleepTime()); - String tempDesc = getDescription(textURL); + String[] tempDesc = getDescription(textURL,doc); if (tempDesc != null) { - logger.debug("Got description: " + tempDesc); - saveText(new URL(textURL), "", tempDesc, textindex); + logger.debug("Got description from " + textURL); + saveText(new URL(textURL), "", tempDesc[0], textindex,tempDesc[1]); } } } @@ -130,18 +131,21 @@ public void rip() throws IOException { waitForThreads(); } public boolean saveText(URL url, String subdirectory, String text, int index) { - // Not the best for some cases, like FurAffinity. Overridden there. - try { - stopCheck(); - } catch (IOException e) { - return false; - } String saveAs = url.toExternalForm(); saveAs = saveAs.substring(saveAs.lastIndexOf('/')+1); if (saveAs.indexOf('?') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('?')); } if (saveAs.indexOf('#') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('#')); } if (saveAs.indexOf('&') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('&')); } if (saveAs.indexOf(':') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf(':')); } + return saveText(url,subdirectory,text,index,saveAs); + } + public boolean saveText(URL url, String subdirectory, String text, int index, String fileName) { + // Not the best for some cases, like FurAffinity. Overridden there. + try { + stopCheck(); + } catch (IOException e) { + return false; + } File saveFileAs; try { if (!subdirectory.equals("")) { // Not sure about this part @@ -153,7 +157,7 @@ public boolean saveText(URL url, String subdirectory, String text, int index) { + subdirectory + File.separator + getPrefix(index) - + saveAs + + fileName + ".txt"); // Write the file FileOutputStream out = (new FileOutputStream(saveFileAs)); diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java index 994efa7a9..b717437ff 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java @@ -119,7 +119,19 @@ public Document getFirstPage() throws IOException { .cookies(cookies) .get(); } - + public String jsonToImage(Document page,String id) { + Elements js = page.select("script[type=\"text/javascript\"]"); + for (Element tag : js) { + if (tag.html().contains("window.__pageload")) { + String script = tag.html(); + script = script.substring(script.indexOf("window.__pageload")); + script = script.substring(script.indexOf(id)); + script = script.substring(script.indexOf("},\"src\":\"") + 9,script.indexOf("\",\"type\"")); // first },"src":"url" after id + return script.replace("\\/","/"); + } + } + return null; + } @Override public List getURLsFromPage(Document page) { List imageURLs = new ArrayList(); @@ -139,18 +151,7 @@ public List getURLsFromPage(Document page) { fullSize = thumb.attr("data-super-full-img"); } else { String spanUrl = thumb.attr("href"); - // id = spanUrl.substring(spanUrl.lastIndexOf('-') + 1) - Elements js = page.select("script[type=\"text/javascript\"]"); - for (Element tag : js) { - if (tag.html().contains("window.__pageload")) { - String script = tag.html(); - script = script.substring(script.indexOf("window.__pageload")); - script = script.substring(script.indexOf(spanUrl.substring(spanUrl.lastIndexOf('-') + 1))); - script = script.substring(script.indexOf("},\"src\":\"") + 9,script.indexOf("\",\"type\"")); // first },"src":"url" after id - fullSize = script.replace("\\/","/"); - break; - } - } + fullSize = jsonToImage(page,spanUrl.substring(spanUrl.lastIndexOf('-') + 1)); if (fullSize == null) { try { fullSize = thumbToFull(img.attr("src"), true); @@ -263,17 +264,18 @@ public static String thumbToFull(String thumb, boolean throwException) throws Ex * Attempts to download description for image. * Comes in handy when people put entire stories in their description. * If no description was found, returns null. - * @param page The page the description will be retrieved from - * @return The description + * @param url The URL the description will be retrieved from + * @param page The gallery page the URL was found on + * @return A String[] with first object being the description, and the second object being image file name if found. */ @Override - public String getDescription(String page) { + public String[] getDescription(String url,Document page) { if (isThisATest()) { return null; } try { // Fetch the image page - Response resp = Http.url(page) + Response resp = Http.url(url) .referrer(this.url) .cookies(cookies) .response(); @@ -289,7 +291,23 @@ public String getDescription(String page) { documentz.outputSettings(new Document.OutputSettings().prettyPrint(false)); ele.select("br").append("\\n"); ele.select("p").prepend("\\n\\n"); - return Jsoup.clean(ele.html().replaceAll("\\\\n", System.getProperty("line.separator")), "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false)); + String fullSize = null; + Element thumb = page.select("div.zones-container span.thumb[href=\"" + url + "\"]").get(0); + if (!thumb.attr("data-super-full-img").isEmpty()) { + fullSize = thumb.attr("data-super-full-img"); + String[] split = fullSize.split("/"); + fullSize = split[split.length - 1]; + } else { + String spanUrl = thumb.attr("href"); + fullSize = jsonToImage(page,spanUrl.substring(spanUrl.lastIndexOf('-') + 1)); + String[] split = fullSize.split("/"); + fullSize = split[split.length - 1]; + } + if (fullSize == null) { + return new String[] {Jsoup.clean(ele.html().replaceAll("\\\\n", System.getProperty("line.separator")), "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false))}; + } + fullSize = fullSize.substring(0,fullSize.lastIndexOf(".")); + return new String[] {Jsoup.clean(ele.html().replaceAll("\\\\n", System.getProperty("line.separator")), "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false)),fullSize}; // TODO Make this not make a newline if someone just types \n into the description. } catch (IOException ioe) { logger.info("Failed to get description " + page + " : '" + ioe.getMessage() + "'"); From 4cf9f09293390d4ce2aca4cc6c78f1201a10b8a7 Mon Sep 17 00:00:00 2001 From: Wiiplay123 Date: Thu, 27 Apr 2017 00:18:42 -0500 Subject: [PATCH 4/8] Fixed deviantArt "no next page found" error The pagination buttons are gone. The "coffset" buttons for comment pages, not gallery pages. I patched this by using a link found in the HTML itself that I think is used by JavaScript to generate the pagination buttons. --- .../ripme/ripper/rippers/DeviantartRipper.java | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java index b717437ff..cde05d9f5 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java @@ -201,14 +201,15 @@ public Document getNextPage(Document page) throws IOException { if (isThisATest()) { return null; } - Elements nextButtons = page.select("li.next > a"); + Elements nextButtons = page.select("link[rel=\"next\"]"); if (nextButtons.size() == 0) { - throw new IOException("No next page found"); + if (page.select("link[rel=\"prev\"]").size() == 0) { + throw new IOException("No next page found"); + } else { + throw new IOException("Hit end of pages"); + } } Element a = nextButtons.first(); - if (a.hasClass("disabled")) { - throw new IOException("Hit end of pages"); - } String nextPage = a.attr("href"); if (nextPage.startsWith("/")) { nextPage = "http://" + this.url.getHost() + nextPage; @@ -306,7 +307,7 @@ public String[] getDescription(String url,Document page) { if (fullSize == null) { return new String[] {Jsoup.clean(ele.html().replaceAll("\\\\n", System.getProperty("line.separator")), "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false))}; } - fullSize = fullSize.substring(0,fullSize.lastIndexOf(".")); + fullSize = fullSize.substring(0, fullSize.lastIndexOf(".")); return new String[] {Jsoup.clean(ele.html().replaceAll("\\\\n", System.getProperty("line.separator")), "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false)),fullSize}; // TODO Make this not make a newline if someone just types \n into the description. } catch (IOException ioe) { From 8f409eb28ac5e270dd76eeee88db8d9613857172 Mon Sep 17 00:00:00 2001 From: Wiiplay123 Date: Sat, 29 Apr 2017 14:35:39 -0500 Subject: [PATCH 5/8] Fixed deviantArt downloading low res when higher res is available Also fixed a few bugs related to non-image items that crashed the rip. --- .../ripme/ripper/AbstractHTMLRipper.java | 32 ++++-- .../ripper/rippers/DeviantartRipper.java | 102 ++++++++++++------ 2 files changed, 91 insertions(+), 43 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java b/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java index 6111db79b..99ce1fec0 100644 --- a/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java @@ -57,7 +57,7 @@ public String[] getDescription(String url,Document page) throws IOException { throw new IOException("getDescription not implemented"); // Do I do this or make an abstract function? } public int descSleepTime() { - return 0; + return 100; } @Override public void rip() throws IOException { @@ -95,17 +95,27 @@ public void rip() throws IOException { logger.debug("Found description link(s) from " + doc.location()); for (String textURL : textURLs) { if (isStopped()) { - break; } textindex += 1; logger.debug("Getting description from " + textURL); - sleep(descSleepTime()); - String[] tempDesc = getDescription(textURL,doc); - if (tempDesc != null) { - logger.debug("Got description from " + textURL); - saveText(new URL(textURL), "", tempDesc[0], textindex,tempDesc[1]); - } + String[] tempDesc = getDescription(textURL,doc); + if (tempDesc != null) { + if (Utils.getConfigBoolean("file.overwrite", false) || !(new File( + workingDir.getCanonicalPath() + + "" + + File.separator + + getPrefix(index) + + (tempDesc.length > 1 ? tempDesc[1] : fileNameFromURL(new URL(textURL))) + + ".txt").exists())) { + logger.debug("Got description from " + textURL); + saveText(new URL(textURL), "", tempDesc[0], textindex, (tempDesc.length > 1 ? tempDesc[1] : fileNameFromURL(new URL(textURL)))); + sleep(descSleepTime()); + } else { + logger.debug("Description from " + textURL + " already exists."); + } + } + } } } @@ -130,13 +140,17 @@ public void rip() throws IOException { } waitForThreads(); } - public boolean saveText(URL url, String subdirectory, String text, int index) { + public String fileNameFromURL(URL url) { String saveAs = url.toExternalForm(); saveAs = saveAs.substring(saveAs.lastIndexOf('/')+1); if (saveAs.indexOf('?') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('?')); } if (saveAs.indexOf('#') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('#')); } if (saveAs.indexOf('&') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('&')); } if (saveAs.indexOf(':') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf(':')); } + return saveAs; + } + public boolean saveText(URL url, String subdirectory, String text, int index) { + String saveAs = fileNameFromURL(url); return saveText(url,subdirectory,text,index,saveAs); } public boolean saveText(URL url, String subdirectory, String text, int index, String fileName) { diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java index cde05d9f5..248169929 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java @@ -1,6 +1,7 @@ package com.rarchives.ripme.ripper.rippers; import java.io.IOException; +import java.net.HttpURLConnection; import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; @@ -114,6 +115,7 @@ public Document getFirstPage() throws IOException { cookies = loginToDeviantart(); } catch (Exception e) { logger.warn("Failed to login: ", e); + cookies.put("agegate_state","1"); // Bypasses the age gate } return Http.url(this.url) .cookies(cookies) @@ -123,11 +125,18 @@ public String jsonToImage(Document page,String id) { Elements js = page.select("script[type=\"text/javascript\"]"); for (Element tag : js) { if (tag.html().contains("window.__pageload")) { - String script = tag.html(); - script = script.substring(script.indexOf("window.__pageload")); - script = script.substring(script.indexOf(id)); - script = script.substring(script.indexOf("},\"src\":\"") + 9,script.indexOf("\",\"type\"")); // first },"src":"url" after id - return script.replace("\\/","/"); + try { + String script = tag.html(); + script = script.substring(script.indexOf("window.__pageload")); + if (script.indexOf(id) < 0) { + continue; + } + script = script.substring(script.indexOf(id)); + script = script.substring(script.indexOf("},\"src\":\"") + 9, script.indexOf("\",\"type\"")); // first },"src":"url" after id + return script.replace("\\/", "/"); + } catch (StringIndexOutOfBoundsException e) { + logger.debug("Unable to get json link from " + page.location()); + } } } return null; @@ -147,22 +156,26 @@ public List getURLsFromPage(Document page) { } // Get full-sized image via helper methods String fullSize = null; - if (!thumb.attr("data-super-full-img").isEmpty()) { + if (thumb.attr("data-super-full-img").contains("//orig")) { fullSize = thumb.attr("data-super-full-img"); } else { String spanUrl = thumb.attr("href"); - fullSize = jsonToImage(page,spanUrl.substring(spanUrl.lastIndexOf('-') + 1)); - if (fullSize == null) { - try { - fullSize = thumbToFull(img.attr("src"), true); - } catch (Exception e) { - logger.info("Attempting to get full size image from " + thumb.attr("href")); - fullSize = smallToFull(img.attr("src"), thumb.attr("href")); - } + String fullSize1 = jsonToImage(page,spanUrl.substring(spanUrl.lastIndexOf('-') + 1)); + if (fullSize1 == null || !fullSize1.contains("//orig")) { + fullSize = smallToFull(img.attr("src"), spanUrl); } + if (fullSize == null && fullSize1 != null) { + fullSize = fullSize1; + } } if (fullSize == null) { - continue; + if (thumb.attr("data-super-full-img") != null) { + fullSize = thumb.attr("data-super-full-img"); + } else if (thumb.attr("data-super-img") != null) { + fullSize = thumb.attr("data-super-img"); + } else { + continue; + } } if (triedURLs.contains(fullSize)) { logger.warn("Already tried to download " + fullSize); @@ -283,12 +296,11 @@ public String[] getDescription(String url,Document page) { cookies.putAll(resp.cookies()); // Try to find the description - Elements els = resp.parse().select("div[class=dev-description]"); - if (els.size() == 0) { + Document documentz = resp.parse(); + Element ele = documentz.select("div.dev-description").first(); + if (ele == null) { throw new IOException("No description found"); } - Document documentz = resp.parse(); - Element ele = documentz.select("div[class=dev-description]").get(0); documentz.outputSettings(new Document.OutputSettings().prettyPrint(false)); ele.select("br").append("\\n"); ele.select("p").prepend("\\n\\n"); @@ -301,8 +313,10 @@ public String[] getDescription(String url,Document page) { } else { String spanUrl = thumb.attr("href"); fullSize = jsonToImage(page,spanUrl.substring(spanUrl.lastIndexOf('-') + 1)); - String[] split = fullSize.split("/"); - fullSize = split[split.length - 1]; + if (fullSize != null) { + String[] split = fullSize.split("/"); + fullSize = split[split.length - 1]; + } } if (fullSize == null) { return new String[] {Jsoup.clean(ele.html().replaceAll("\\\\n", System.getProperty("line.separator")), "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false))}; @@ -311,7 +325,7 @@ public String[] getDescription(String url,Document page) { return new String[] {Jsoup.clean(ele.html().replaceAll("\\\\n", System.getProperty("line.separator")), "", Whitelist.none(), new Document.OutputSettings().prettyPrint(false)),fullSize}; // TODO Make this not make a newline if someone just types \n into the description. } catch (IOException ioe) { - logger.info("Failed to get description " + page + " : '" + ioe.getMessage() + "'"); + logger.info("Failed to get description at " + url + ": '" + ioe.getMessage() + "'"); return null; } } @@ -332,23 +346,43 @@ public String smallToFull(String thumb, String page) { .cookies(cookies) .response(); cookies.putAll(resp.cookies()); - - // Try to find the download button Document doc = resp.parse(); - Elements els = doc.select("a.dev-page-download"); - if (els.size() > 0) { - // Full-size image - String fsimage = els.get(0).attr("href"); - logger.info("Found download page: " + fsimage); - return fsimage; - } - + Elements els = doc.select("img.dev-content-full"); + String fsimage = null; // Get the largest resolution image on the page - els = doc.select("img.dev-content-full"); if (els.size() > 0) { // Large image - String fsimage = els.get(0).attr("src"); + fsimage = els.get(0).attr("src"); logger.info("Found large-scale: " + fsimage); + if (fsimage.contains("//orig")) { + return fsimage; + } + } + // Try to find the download button + els = doc.select("a.dev-page-download"); + if (els.size() > 0) { + // Full-size image + String downloadLink = els.get(0).attr("href"); + logger.info("Found download page: " + downloadLink); + HttpURLConnection con = (HttpURLConnection) new URL(downloadLink).openConnection(); + con.setRequestProperty("Referer",this.url.toString()); + String cookieString = ""; + for (Map.Entry entry : cookies.entrySet()) { + cookieString = cookieString + entry.getKey() + "=" + entry.getValue() + "; "; + } + cookieString = cookieString.substring(0,cookieString.length() - 1); + con.setRequestProperty("Cookie",cookieString); + con.setRequestProperty("User-Agent",this.USER_AGENT); + con.setInstanceFollowRedirects(true); + con.connect(); + int code = con.getResponseCode(); + String location = con.getHeaderField("Location"); + con.disconnect(); + if (location.contains("//orig")) { + fsimage = location; + } + } + if (fsimage != null) { return fsimage; } throw new IOException("No download page found"); From 624c28befa811b2c205f8ed31158f082758a2d2e Mon Sep 17 00:00:00 2001 From: Wiiplay123 Date: Sat, 29 Apr 2017 21:07:49 -0500 Subject: [PATCH 6/8] Fixed deviantArt download link resolving Turns out getURL works perfectly fine, but the Location header doesn't. --- .../com/rarchives/ripme/ripper/rippers/DeviantartRipper.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java index 248169929..5bb0fae3d 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java @@ -363,7 +363,7 @@ public String smallToFull(String thumb, String page) { if (els.size() > 0) { // Full-size image String downloadLink = els.get(0).attr("href"); - logger.info("Found download page: " + downloadLink); + logger.info("Found download button link: " + downloadLink); HttpURLConnection con = (HttpURLConnection) new URL(downloadLink).openConnection(); con.setRequestProperty("Referer",this.url.toString()); String cookieString = ""; @@ -376,10 +376,11 @@ public String smallToFull(String thumb, String page) { con.setInstanceFollowRedirects(true); con.connect(); int code = con.getResponseCode(); - String location = con.getHeaderField("Location"); + String location = con.getURL().toString(); con.disconnect(); if (location.contains("//orig")) { fsimage = location; + logger.info("Found image download: " + location); } } if (fsimage != null) { From 3d359be95866720bb9133dd62abd6d2935671d41 Mon Sep 17 00:00:00 2001 From: Wiiplay123 Date: Sun, 14 May 2017 20:15:27 -0500 Subject: [PATCH 7/8] Added precaution for URLs with slashes at the end when determining file name Made for my FurAffinity branch, brought it over to master branch because it could come in handy for other things. --- src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java b/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java index 99ce1fec0..300e8a64b 100644 --- a/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java @@ -142,6 +142,7 @@ public void rip() throws IOException { } public String fileNameFromURL(URL url) { String saveAs = url.toExternalForm(); + if (saveAs.substring(saveAs.length() - 1) == "/") { saveAs = saveAs.substring(0,saveAs.length() - 1) ;} saveAs = saveAs.substring(saveAs.lastIndexOf('/')+1); if (saveAs.indexOf('?') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('?')); } if (saveAs.indexOf('#') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('#')); } From 2055cb0d9d9449e323e29a0df4737f811f1257c4 Mon Sep 17 00:00:00 2001 From: MetaPrime Date: Mon, 15 May 2017 10:24:36 -0700 Subject: [PATCH 8/8] Fix indentation. --- .../ripme/ripper/AbstractHTMLRipper.java | 36 +++++++++---------- .../ripper/rippers/DeviantartRipper.java | 33 ++++++++--------- 2 files changed, 35 insertions(+), 34 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java b/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java index 300e8a64b..cdab1664b 100644 --- a/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/AbstractHTMLRipper.java @@ -16,7 +16,7 @@ * Simplified ripper, designed for ripping from sites by parsing HTML. */ public abstract class AbstractHTMLRipper extends AlbumRipper { - + public AbstractHTMLRipper(URL url) throws IOException { super(url); } @@ -30,7 +30,7 @@ public Document getNextPage(Document doc) throws IOException { } public abstract List getURLsFromPage(Document page); public List getDescriptionsFromPage(Document doc) throws IOException { - throw new IOException("getDescriptionsFromPage not implemented"); // Do I do this or make an abstract function? + throw new IOException("getDescriptionsFromPage not implemented"); // Do I do this or make an abstract function? } public abstract void downloadURL(URL url, int index); public DownloadThreadPool getThreadPool() { @@ -45,16 +45,16 @@ public boolean keepSortOrder() { public boolean canRip(URL url) { return url.getHost().endsWith(getDomain()); } - + @Override public URL sanitizeURL(URL url) throws MalformedURLException { return url; } public boolean hasDescriptionSupport() { - return false; + return false; } public String[] getDescription(String url,Document page) throws IOException { - throw new IOException("getDescription not implemented"); // Do I do this or make an abstract function? + throw new IOException("getDescription not implemented"); // Do I do this or make an abstract function? } public int descSleepTime() { return 100; @@ -66,7 +66,7 @@ public void rip() throws IOException { logger.info("Retrieving " + this.url); sendUpdate(STATUS.LOADING_RESOURCE, this.url.toExternalForm()); Document doc = getFirstPage(); - + while (doc != null) { List imageURLs = getURLsFromPage(doc); // Remove all but 1 image @@ -79,7 +79,7 @@ public void rip() throws IOException { if (imageURLs.size() == 0) { throw new IOException("No images found at " + doc.location()); } - + for (String imageURL : imageURLs) { index += 1; logger.debug("Found image url #" + index + ": " + imageURL); @@ -90,15 +90,15 @@ public void rip() throws IOException { } if (hasDescriptionSupport() && Utils.getConfigBoolean("descriptions.save", false)) { logger.debug("Fetching description(s) from " + doc.location()); - List textURLs = getDescriptionsFromPage(doc); - if (textURLs.size() > 0) { + List textURLs = getDescriptionsFromPage(doc); + if (textURLs.size() > 0) { logger.debug("Found description link(s) from " + doc.location()); - for (String textURL : textURLs) { - if (isStopped()) { - break; - } - textindex += 1; - logger.debug("Getting description from " + textURL); + for (String textURL : textURLs) { + if (isStopped()) { + break; + } + textindex += 1; + logger.debug("Getting description from " + textURL); String[] tempDesc = getDescription(textURL,doc); if (tempDesc != null) { if (Utils.getConfigBoolean("file.overwrite", false) || !(new File( @@ -116,8 +116,8 @@ public void rip() throws IOException { } } - } - } + } + } } if (isStopped() || isThisATest()) { @@ -142,7 +142,7 @@ public void rip() throws IOException { } public String fileNameFromURL(URL url) { String saveAs = url.toExternalForm(); - if (saveAs.substring(saveAs.length() - 1) == "/") { saveAs = saveAs.substring(0,saveAs.length() - 1) ;} + if (saveAs.substring(saveAs.length() - 1) == "/") { saveAs = saveAs.substring(0,saveAs.length() - 1) ;} saveAs = saveAs.substring(saveAs.lastIndexOf('/')+1); if (saveAs.indexOf('?') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('?')); } if (saveAs.indexOf('#') >= 0) { saveAs = saveAs.substring(0, saveAs.indexOf('#')); } diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java index 5bb0fae3d..e61cb0075 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/DeviantartRipper.java @@ -49,7 +49,7 @@ public String getDomain() { } @Override public boolean hasDescriptionSupport() { - return true; + return true; } @Override public URL sanitizeURL(URL url) throws MalformedURLException { @@ -132,7 +132,8 @@ public String jsonToImage(Document page,String id) { continue; } script = script.substring(script.indexOf(id)); - script = script.substring(script.indexOf("},\"src\":\"") + 9, script.indexOf("\",\"type\"")); // first },"src":"url" after id + // first },"src":"url" after id + script = script.substring(script.indexOf("},\"src\":\"") + 9, script.indexOf("\",\"type\"")); return script.replace("\\/", "/"); } catch (StringIndexOutOfBoundsException e) { logger.debug("Unable to get json link from " + page.location()); @@ -156,23 +157,23 @@ public List getURLsFromPage(Document page) { } // Get full-sized image via helper methods String fullSize = null; - if (thumb.attr("data-super-full-img").contains("//orig")) { - fullSize = thumb.attr("data-super-full-img"); - } else { - String spanUrl = thumb.attr("href"); - String fullSize1 = jsonToImage(page,spanUrl.substring(spanUrl.lastIndexOf('-') + 1)); - if (fullSize1 == null || !fullSize1.contains("//orig")) { + if (thumb.attr("data-super-full-img").contains("//orig")) { + fullSize = thumb.attr("data-super-full-img"); + } else { + String spanUrl = thumb.attr("href"); + String fullSize1 = jsonToImage(page,spanUrl.substring(spanUrl.lastIndexOf('-') + 1)); + if (fullSize1 == null || !fullSize1.contains("//orig")) { fullSize = smallToFull(img.attr("src"), spanUrl); - } - if (fullSize == null && fullSize1 != null) { + } + if (fullSize == null && fullSize1 != null) { fullSize = fullSize1; } - } + } if (fullSize == null) { - if (thumb.attr("data-super-full-img") != null) { - fullSize = thumb.attr("data-super-full-img"); + if (thumb.attr("data-super-full-img") != null) { + fullSize = thumb.attr("data-super-full-img"); } else if (thumb.attr("data-super-img") != null) { - fullSize = thumb.attr("data-super-img"); + fullSize = thumb.attr("data-super-img"); } else { continue; } @@ -273,7 +274,7 @@ public static String thumbToFull(String thumb, boolean throwException) throws Ex } return result.toString(); } - + /** * Attempts to download description for image. * Comes in handy when people put entire stories in their description. @@ -329,7 +330,7 @@ public String[] getDescription(String url,Document page) { return null; } } - + /** * If largest resolution for image at 'thumb' is found, starts downloading * and returns null.