From 07bc76c6275332b980823057a0055342e25b75c5 Mon Sep 17 00:00:00 2001 From: Avinash13iitkgp Date: Mon, 5 Jun 2017 21:05:37 +0530 Subject: [PATCH 1/2] TwitterResponse.java Has a wrong check for Long with string. It will fail if the condition is met --- .../java/com/rarchives/ripme/ripper/rippers/TwitterRipper.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/TwitterRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/TwitterRipper.java index fa90d4a89..e721d0565 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/TwitterRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/TwitterRipper.java @@ -235,7 +235,7 @@ public void rip() throws IOException { } logger.debug("Twitter response #" + (i + 1) + " Tweets:\n" + tweets); if (tweets.size() == 1 && - lastMaxID.equals(tweets.get(0).getString("id_str")) + lastMaxID== Long.parseLong(tweets.get(0).getString("id_str")) ) { logger.info(" No more tweet found."); break; From 1e423f324780485be0b9dd30760b3a9731133608 Mon Sep 17 00:00:00 2001 From: Avinash13iitkgp Date: Mon, 5 Jun 2017 22:45:59 +0530 Subject: [PATCH 2/2] Update TwitterRipper.java --- .../ripme/ripper/rippers/TwitterRipper.java | 646 +++++++++++------- 1 file changed, 404 insertions(+), 242 deletions(-) diff --git a/src/main/java/com/rarchives/ripme/ripper/rippers/TwitterRipper.java b/src/main/java/com/rarchives/ripme/ripper/rippers/TwitterRipper.java index e721d0565..451fbf1a4 100644 --- a/src/main/java/com/rarchives/ripme/ripper/rippers/TwitterRipper.java +++ b/src/main/java/com/rarchives/ripme/ripper/rippers/TwitterRipper.java @@ -1,301 +1,463 @@ -package com.rarchives.ripme.ripper.rippers; +package com.rarchives.ripme.utils; +import java.io.File; import java.io.IOException; -import java.net.MalformedURLException; +import java.io.InputStream; +import java.io.UnsupportedEncodingException; +import java.lang.reflect.Constructor; +import java.net.URISyntaxException; import java.net.URL; +import java.net.URLDecoder; import java.util.ArrayList; +import java.util.Enumeration; +import java.util.HashMap; import java.util.List; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -import org.json.JSONArray; -import org.json.JSONException; -import org.json.JSONObject; -import org.json.JSONTokener; -import org.jsoup.nodes.Document; - -import com.rarchives.ripme.ripper.AlbumRipper; -import com.rarchives.ripme.utils.Http; -import com.rarchives.ripme.utils.Utils; - -public class TwitterRipper extends AlbumRipper { - - private static final String DOMAIN = "twitter.com", - HOST = "twitter"; - - private static final int MAX_REQUESTS = Utils.getConfigInteger("twitter.max_requests", 10); - private static final int WAIT_TIME = 2000; - - // Base 64 of consumer key : consumer secret - private String authKey; - private String accessToken; - - private enum ALBUM_TYPE { - ACCOUNT, - SEARCH +import java.util.Map; +import java.util.jar.JarEntry; +import java.util.jar.JarFile; + +import javax.sound.sampled.AudioSystem; +import javax.sound.sampled.Clip; +import javax.sound.sampled.Line; +import javax.sound.sampled.LineEvent; +import javax.sound.sampled.LineListener; + +import org.apache.commons.configuration.ConfigurationException; +import org.apache.commons.configuration.PropertiesConfiguration; +import org.apache.log4j.LogManager; +import org.apache.log4j.Logger; +import org.apache.log4j.PropertyConfigurator; + +import com.rarchives.ripme.ripper.AbstractRipper; + +/** + * Common utility functions used in various places throughout the project. + */ +public class Utils { + public static final String RIP_DIRECTORY = "rips"; + private static final String configFile = "rip.properties"; + private static final Logger logger = Logger.getLogger(Utils.class); + + private static PropertiesConfiguration config; + static { + try { + String configPath = getConfigPath(); + File f = new File(configPath); + if (!f.exists()) { + // Use default bundled with .jar + configPath = configFile; + } + config = new PropertiesConfiguration(configPath); + logger.info("Loaded " + config.getPath()); + if (f.exists()) { + // Config was loaded from file + if ( !config.containsKey("twitter.auth") + || !config.containsKey("twitter.max_requests") + || !config.containsKey("tumblr.auth") + || !config.containsKey("error.skip404") + || !config.containsKey("gw.api") + || !config.containsKey("page.timeout") + || !config.containsKey("download.max_size") + ) { + // Config is missing key fields + // Need to reload the default config + // See https://github.com/4pr0n/ripme/issues/158 + logger.warn("Config does not contain key fields, deleting old config"); + f.delete(); + config = new PropertiesConfiguration(configFile); + logger.info("Loaded " + config.getPath()); + } + } + } catch (Exception e) { + logger.error("[!] Failed to load properties file from " + configFile, e); + } } - private ALBUM_TYPE albumType; - private String searchText, accountName; - - public TwitterRipper(URL url) throws IOException { - super(url); - authKey = Utils.getConfigString("twitter.auth", null); - if (authKey == null) { - throw new IOException("Could not find twitter authentication key in configuration"); + /** + * Get the root rips directory. + * @return + * Root directory to save rips to. + * @throws IOException + */ + public static File getWorkingDirectory() { + String currentDir = "."; + try { + currentDir = new File(".").getCanonicalPath() + File.separator + RIP_DIRECTORY + File.separator; + } catch (IOException e) { + logger.error("Error while finding working dir: ", e); + } + if (config != null) { + currentDir = getConfigString("rips.directory", currentDir); } + File workingDir = new File(currentDir); + if (!workingDir.exists()) { + workingDir.mkdirs(); + } + return workingDir; } - @Override - public boolean canRip(URL url) { - return url.getHost().endsWith(DOMAIN); + public static String getConfigString(String key, String defaultValue) { + return config.getString(key, defaultValue); } - - @Override - public URL sanitizeURL(URL url) throws MalformedURLException { - // https://twitter.com/search?q=from%3Apurrbunny%20filter%3Aimages&src=typd - Pattern p = Pattern.compile("^https?://(m\\.)?twitter\\.com/search\\?q=([a-zA-Z0-9%\\-_]{1,}).*$"); - Matcher m = p.matcher(url.toExternalForm()); - if (m.matches()) { - albumType = ALBUM_TYPE.SEARCH; - searchText = m.group(2); - return url; + public static int getConfigInteger(String key, int defaultValue) { + return config.getInt(key, defaultValue); + } + public static boolean getConfigBoolean(String key, boolean defaultValue) { + return config.getBoolean(key, defaultValue); + } + public static List getConfigList(String key) { + List result = new ArrayList(); + for (Object obj : config.getList(key, new ArrayList())) { + if (obj instanceof String) { + result.add( (String) obj); + } } - p = Pattern.compile("^https?://(m\\.)?twitter\\.com/([a-zA-Z0-9\\-_]{1,}).*$"); - m = p.matcher(url.toExternalForm()); - if (m.matches()) { - albumType = ALBUM_TYPE.ACCOUNT; - accountName = m.group(2); - return url; + return result; + } + public static void setConfigBoolean(String key, boolean value) { config.setProperty(key, value); } + public static void setConfigString(String key, String value) { config.setProperty(key, value); } + public static void setConfigInteger(String key, int value) { config.setProperty(key, value); } + public static void setConfigList(String key, List list) { + config.clearProperty(key); + config.addProperty(key, list); + } + public static void setConfigList(String key, Enumeration enumeration) { + config.clearProperty(key); + List list = new ArrayList(); + while (enumeration.hasMoreElements()) { + list.add(enumeration.nextElement()); } - throw new MalformedURLException("Expected username or search string in url: " + url); + config.addProperty(key, list); } - private void getAccessToken() throws IOException { - Document doc = Http.url("https://api.twitter.com/oauth2/token") - .ignoreContentType() - .header("Authorization", "Basic " + authKey) - .header("Content-Type", "application/x-www-form-urlencoded;charset=UTF-8") - .header("User-agent", "ripe and zipe") - .data("grant_type", "client_credentials") - .post(); - String body = doc.body().html().replaceAll(""", "\""); + public static void saveConfig() { try { - JSONObject json = new JSONObject(body); - accessToken = json.getString("access_token"); - return; - } catch (JSONException e) { - // Fall through - throw new IOException("Failure while parsing JSON: " + body, e); + config.save(getConfigPath()); + logger.info("Saved configuration to " + getConfigPath()); + } catch (ConfigurationException e) { + logger.error("Error while saving configuration: ", e); } } - - private void checkRateLimits(String resource, String api) throws IOException { - Document doc = Http.url("https://api.twitter.com/1.1/application/rate_limit_status.json?resources=" + resource) - .ignoreContentType() - .header("Authorization", "Bearer " + accessToken) - .header("Content-Type", "application/x-www-form-urlencoded;charset=UTF-8") - .header("User-agent", "ripe and zipe") - .get(); - String body = doc.body().html().replaceAll(""", "\""); + private static String getConfigPath() { try { - JSONObject json = new JSONObject(body); - JSONObject stats = json.getJSONObject("resources") - .getJSONObject(resource) - .getJSONObject(api); - int remaining = stats.getInt("remaining"); - logger.info(" Twitter " + resource + " calls remaining: " + remaining); - if (remaining < 20) { - logger.error("Twitter API calls exhausted: " + stats.toString()); - throw new IOException("Less than 20 API calls remaining; not enough to rip."); - } - } catch (JSONException e) { - logger.error("JSONException: ", e); - throw new IOException("Error while parsing JSON: " + body, e); + return new File(".").getCanonicalPath() + File.separator + configFile; + } catch (Exception e) { + return "." + File.separator + configFile; } } - private String getApiURL(Long maxID) { - StringBuilder req = new StringBuilder(); - switch (albumType) { - case ACCOUNT: - req.append("https://api.twitter.com/1.1/statuses/user_timeline.json") - .append("?screen_name=" + this.accountName) - .append("&include_entities=true") - .append("&exclude_replies=true") - .append("&trim_user=true") - .append("&include_rts=false") - .append("&count=" + 200); - break; - case SEARCH: - req.append("https://api.twitter.com/1.1/search/tweets.json") - .append("?q=" + this.searchText) - .append("&include_entities=true") - .append("&result_type=recent") - .append("&count=100"); - break; - } - if (maxID > 0) { - req.append("&max_id=" + Long.toString(maxID)); + /** + * Removes the current working directory (CWD) from a File. + * @param saveAs + * The File path + * @return + * saveAs in relation to the CWD + */ + public static String removeCWD(File saveAs) { + String prettySaveAs = saveAs.toString(); + try { + prettySaveAs = saveAs.getCanonicalPath(); + String cwd = new File(".").getCanonicalPath() + File.separator; + prettySaveAs = prettySaveAs.replace( + cwd, + "." + File.separator); + } catch (Exception e) { + logger.error("Exception: ", e); } - return req.toString(); + return prettySaveAs; } - private List getTweets(String url) throws IOException { - List tweets = new ArrayList(); - logger.info(" Retrieving " + url); - Document doc = Http.url(url) - .ignoreContentType() - .header("Authorization", "Bearer " + accessToken) - .header("Content-Type", "application/x-www-form-urlencoded;charset=UTF-8") - .header("User-agent", "ripe and zipe") - .get(); - String body = doc.body().html().replaceAll(""", "\""); - Object jsonObj = new JSONTokener(body).nextValue(); - JSONArray statuses; - if (jsonObj instanceof JSONObject) { - JSONObject json = (JSONObject) jsonObj; - if (json.has("errors")) { - String msg = json.getJSONObject("errors").getString("message"); - throw new IOException("Twitter responded with errors: " + msg); - } - statuses = json.getJSONArray("statuses"); - } else { - statuses = (JSONArray) jsonObj; + public static String stripURLParameter(String url, String parameter) { + int paramIndex = url.indexOf("?" + parameter); + boolean wasFirstParam = true; + if (paramIndex < 0) { + wasFirstParam = false; + paramIndex = url.indexOf("&" + parameter); } - for (int i = 0; i < statuses.length(); i++) { - tweets.add((JSONObject) statuses.get(i)); + + if (paramIndex > 0) { + int nextParam = url.indexOf("&", paramIndex+1); + if (nextParam != -1) { + String c = "&"; + if (wasFirstParam) { + c = "?"; + } + url = url.substring(0, paramIndex) + c + url.substring(nextParam+1, url.length()); + } else { + url = url.substring(0, paramIndex); + } } - return tweets; + + return url; } - private int parseTweet(JSONObject tweet) throws MalformedURLException { - int parsedCount = 0; - if (!tweet.has("extended_entities")) { - logger.error("XXX Tweet doesn't have entitites"); - return 0; + /** + * Removes the current working directory from a given filename + * @param file + * @return + * 'file' without the leading current working directory + */ + public static String removeCWD(String file) { + return removeCWD(new File(file)); + } + + /** + * Get a list of all Classes within a package. + * Works with file system projects and jar files! + * Borrowed from StackOverflow, but I don't have a link :[ + * @param pkgname + * The name of the package + * @return + * List of classes within the package + */ + public static ArrayList> getClassesForPackage(String pkgname) { + ArrayList> classes = new ArrayList>(); + String relPath = pkgname.replace('.', '/'); + URL resource = ClassLoader.getSystemClassLoader().getResource(relPath); + if (resource == null) { + throw new RuntimeException("No resource for " + relPath); } - JSONObject entities = tweet.getJSONObject("extended_entities"); - - if (entities.has("media")) { - JSONArray medias = entities.getJSONArray("media"); - String url; - JSONObject media; - - for (int i = 0; i < medias.length(); i++) { - media = (JSONObject) medias.get(i); - url = media.getString("media_url"); - if (media.getString("type").equals("video")) { - JSONArray variants = media.getJSONObject("video_info").getJSONArray("variants"); - for (int j = 0; j < medias.length(); j++) { - JSONObject variant = (JSONObject) variants.get(i); - if (variant.has("bitrate") && variant.getInt("bitrate") == 832000) { - addURLToDownload(new URL(variant.getString("url"))); - parsedCount++; - break; - } + String fullPath = resource.getFile(); + File directory = null; + try { + directory = new File(resource.toURI()); + } catch (URISyntaxException e) { + throw new RuntimeException(pkgname + " (" + resource + ") does not appear to be a valid URL / URI. Strange, since we got it from the system...", e); + } catch (IllegalArgumentException e) { + directory = null; + } + + if (directory != null && directory.exists()) { + // Get the list of the files contained in the package + String[] files = directory.list(); + for (String file : files) { + if (file.endsWith(".class") && !file.contains("$")) { + String className = pkgname + '.' + file.substring(0, file.length() - 6); + try { + classes.add(Class.forName(className)); + } catch (ClassNotFoundException e) { + throw new RuntimeException("ClassNotFoundException loading " + className); } - } else if (media.getString("type").equals("photo")) { - if (url.contains(".twimg.com/")) { - url += ":orig"; - addURLToDownload(new URL(url)); - parsedCount++; - } else { - logger.debug("Unexpected media_url: " + url); + } + } + } + else { + // Load from JAR + try { + String jarPath = fullPath + .replaceFirst("[.]jar[!].*", ".jar") + .replaceFirst("file:", ""); + jarPath = URLDecoder.decode(jarPath, "UTF-8"); + JarFile jarFile = new JarFile(jarPath); + Enumeration entries = jarFile.entries(); + while (entries.hasMoreElements()) { + JarEntry nextElement = entries.nextElement(); + String entryName = nextElement.getName(); + if (entryName.startsWith(relPath) + && entryName.length() > (relPath.length() + "/".length()) + && !nextElement.isDirectory()) { + String className = entryName.replace('/', '.').replace('\\', '.').replace(".class", ""); + try { + classes.add(Class.forName(className)); + } catch (ClassNotFoundException e) { + logger.error("ClassNotFoundException loading " + className); + jarFile.close(); // Resource leak fix? + throw new RuntimeException("ClassNotFoundException loading " + className); + } } } + jarFile.close(); // Eclipse said not closing it would have a resource leak + } catch (IOException e) { + logger.error("Error while loading jar file:", e); + throw new RuntimeException(pkgname + " (" + directory + ") does not appear to be a valid package", e); } } - - - return parsedCount; + return classes; } - @Override - public void rip() throws IOException { - getAccessToken(); + public static final int SHORTENED_PATH_LENGTH = 12; + public static String shortenPath(String path) { + return shortenPath(new File(path)); + } + public static String shortenPath(File file) { + String path = removeCWD(file); + if (path.length() < SHORTENED_PATH_LENGTH * 2) { + return path; + } + return path.substring(0, SHORTENED_PATH_LENGTH) + + "..." + + path.substring(path.length() - SHORTENED_PATH_LENGTH); + } - switch (albumType) { - case ACCOUNT: - checkRateLimits("statuses", "/statuses/user_timeline"); - break; - case SEARCH: - checkRateLimits("search", "/search/tweets"); - break; + public static String filesystemSafe(String text) { + text = text.replaceAll("[^a-zA-Z0-9.-]", "_") + .replaceAll("__", "_") + .replaceAll("_+$", ""); + if (text.length() > 100) { + text = text.substring(0, 99); } + return text; + } - Long lastMaxID = 0L; - int parsedCount = 0; - for (int i = 0; i < MAX_REQUESTS; i++) { - List tweets = getTweets(getApiURL(lastMaxID - 1)); - if (tweets.size() == 0) { - logger.info(" No more tweets found."); - break; - } - logger.debug("Twitter response #" + (i + 1) + " Tweets:\n" + tweets); - if (tweets.size() == 1 && - lastMaxID== Long.parseLong(tweets.get(0).getString("id_str")) - ) { - logger.info(" No more tweet found."); - break; - } + public static String bytesToHumanReadable(int bytes) { + float fbytes = (float) bytes; + String[] mags = new String[] {"", "k", "m", "g", "t"}; + int magIndex = 0; + while (fbytes >= 1024) { + fbytes /= 1024; + magIndex++; + } + return String.format("%.2f%sb", fbytes, mags[magIndex]); + } - for (JSONObject tweet : tweets) { - lastMaxID = tweet.getLong("id"); - parsedCount += parseTweet(tweet); + public static List getListOfAlbumRippers() throws Exception { + List list = new ArrayList(); + for (Constructor ripper : AbstractRipper.getRipperConstructors("com.rarchives.ripme.ripper.rippers")) { + list.add(ripper.getName()); + } + return list; + } + public static List getListOfVideoRippers() throws Exception { + List list = new ArrayList(); + for (Constructor ripper : AbstractRipper.getRipperConstructors("com.rarchives.ripme.ripper.rippers.video")) { + list.add(ripper.getName()); + } + return list; + } - if (isStopped() || (isThisATest() && parsedCount > 0)) { - break; + public static void playSound(String filename) { + URL resource = ClassLoader.getSystemClassLoader().getResource(filename); + try { + final Clip clip = (Clip) AudioSystem.getLine(new Line.Info(Clip.class)); + clip.addLineListener(new LineListener() { + @Override + public void update(LineEvent event) { + if (event.getType() == LineEvent.Type.STOP) { + clip.close(); + } } - } + }); + clip.open(AudioSystem.getAudioInputStream(resource)); + clip.start(); + } catch (Exception e) { + logger.error("Failed to play sound " + filename, e); + } + } + + /** + * Configures root logger, either for FILE output or just console. + */ + public static void configureLogger() { + LogManager.shutdown(); + String logFile; + if (getConfigBoolean("log.save", false)) { + logFile = "log4j.file.properties"; + } + else { + logFile = "log4j.properties"; + } + InputStream stream = Utils.class.getClassLoader().getResourceAsStream(logFile); + if (stream == null) { + PropertyConfigurator.configure("src/main/resources/" + logFile); + } else { + PropertyConfigurator.configure(stream); + } + logger.info("Loaded " + logFile); + try { + stream.close(); + } catch (IOException e) { } + } - if (isStopped() || (isThisATest() && parsedCount > 0)) { + /** + * Gets list of strings between two strings. + * @param fullText Text to retrieve from. + * @param start String that precedes the desired text + * @param finish String that follows the desired text + * @return List of all strings that are between 'start' and 'finish' + */ + public static List between(String fullText, String start, String finish) { + List result = new ArrayList(); + int i, j; + i = fullText.indexOf(start); + while (i >= 0) { + i += start.length(); + j = fullText.indexOf(finish, i); + if (j < 0) { break; } + result.add(fullText.substring(i, j)); + i = fullText.indexOf(start, j + finish.length()); + } + return result; + } - try { - Thread.sleep(WAIT_TIME); - } catch (InterruptedException e) { - logger.error("[!] Interrupted while waiting to load more results", e); - break; + /** + * Parses an URL query + * + * @param query + * The query part of an URL + * @return The map of all query parameters + */ + public static Map parseUrlQuery(String query) { + Map res = new HashMap(); + + if (query.equals("")) { + return res; + } + + String[] parts = query.split("&"); + int pos; + + try { + for (String part : parts) { + if ((pos = part.indexOf('=')) >= 0) { + res.put(URLDecoder.decode(part.substring(0, pos), "UTF-8"), URLDecoder.decode(part.substring(pos + 1), "UTF-8")); + } else { + res.put(URLDecoder.decode(part, "UTF-8"), ""); + } } + } catch (UnsupportedEncodingException e) { + // Shouldn't happen since UTF-8 is required to be supported + throw new RuntimeException(e); } - waitForThreads(); + return res; } - @Override - public String getHost() { - return HOST; - } + /** + * Parses an URL query and returns the requested parameter's value + * + * @param query + * The query part of an URL + * @param key + * The key whose value is requested + * @return The associated value or null if key wasn't found + */ + public static String parseUrlQuery(String query, String key) { + if (query.equals("")) { + return null; + } + + String[] parts = query.split("&"); + int pos; - @Override - public String getGID(URL url) throws MalformedURLException { - switch (albumType) { - case ACCOUNT: - return "account_" + accountName; - case SEARCH: - StringBuilder gid = new StringBuilder(); - for (int i = 0; i < searchText.length(); i++) { - char c = searchText.charAt(i); - // Ignore URL-encoded chars - if (c == '%') { - gid.append('_'); - i += 2; - continue; - // Ignore non-alphanumeric chars - } else if ( - (c >= 'a' && c <= 'z') - || (c >= 'A' && c <= 'Z') - || (c >= '0' && c <= '9') - ) { - gid.append(c); + try { + for (String part : parts) { + if ((pos = part.indexOf('=')) >= 0) { + if (URLDecoder.decode(part.substring(0, pos), "UTF-8").equals(key)) { + return URLDecoder.decode(part.substring(pos + 1), "UTF-8"); } + + } else if (URLDecoder.decode(part, "UTF-8").equals(key)) { + return ""; } - return "search_" + gid.toString(); + } + } catch (UnsupportedEncodingException e) { + // Shouldn't happen since UTF-8 is required to be supported + throw new RuntimeException(e); } - throw new MalformedURLException("Could not decide type of URL (search/account): " + url); - } + return null; + } }