diff --git a/README.md b/README.md index 8ee62e2..f1b575b 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ TumblThree is the code rewrite of [TumblTwo](https://github.com/johanneszab/TumblTwo), a free and open source Tumblr blog backup application, using C# with WPF and the MVVM pattern. It uses the [Win Application Framework (WAF)](https://github.com/jbe2277/waf). It downloads photo, video, audio and text posts from a given tumblr blog. ### New Features (over TumblTwo): -* Internationalization support. +* Internationalization support (currently available: zh, ru, de, fr, es). * Autosave of the queuelist. * Save, clear and restore the queuelist. * Download of text, audio, quote, conversation, link and question posts. @@ -12,6 +12,8 @@ TumblThree is the code rewrite of [TumblTwo](https://github.com/johanneszab/Tumb * Download of \_raw image files (original/higher resolution pictures). * A downloader for private blogs (login required blogs). * A downloader for downloading "liked by" photos and videos instead of a tumblr blog. +* A downloader for downloading photos and videos from the tumblr tag search (e.g. http://www.tumblr.com/tagged/keyword) (login required). +* A downloader for downloading photos and videos from the tumblr search (e.g. http://www.tumblr.com/search/keywords). * An option to download an url list instead of the actual files. * Allows to download only original content of the blog and skip reblogged posts. * Set a time interval for a automatic download (e.g. during nights). @@ -76,6 +78,22 @@ TumblThree is the code rewrite of [TumblTwo](https://github.com/johanneszab/Tumb * You can use the _portable mode_ (settings->general) to stores the application settings in the same folder as the executable. * For each blog there is also an index file in the download location (default: in the _.\\Blogs\\_ folder relative to the executable) named after the _blogname_.tumblr. Here are blog relative information stored like what files have been downloaded, the url of the blog and when it was added. This allows you to move your downloaded files (photos, videos, audio files) to a different location without interfering with the backup process. +### Getting Started: ### + +The default settings should cover most users. You should only have to change the download location and the kind of posts you want to download. For this, in the Settings (click on the Settings button in the lower panel of the main user interface) you might want to change: +* General -> Download location: Specifies where to download the files. The default is in a folder _Blogs_ relative to the TumblThree.exe +* Blog -> Settings applied to each blog upon addition: + * Here you can set what posts newly added blogs will download per default. To change what each blog downloads, click on a blog in the main interface, select the Details Tab on the right and change the settings. This separation allows to download different kind of post for different blogs. You can change the download settings for multiple existing blogs by selecting them with shift+left click for a range or ctrl-a for all of them. + * Note: You might want to always select: + * _Download Reblogged posts_: Downloads reblogs, not just original content of the blog author. + * _Force Rescan_: Force Rescan always crawls the whole blog and not just new posts which were added after the last successful crawl. The statistics of a blog (total posts, number of post, number of duplicates) currently can only be updated if the whole blog is crawled. Thus, disabling this might result in downloading "more" posts than displayed in TumblThree. If you don't matter if about the displayed blog statistics, turning Force Rescan off will decrease the scanning time since already downloaded posts are skipped in the scanning. + +Settings you might want to change if the download speed is not satisfactory: +* General -> Parallel connections: Specifies the number of connections used for downloading posts. The number is shared between all actively downloading blogs. +* General -> Parallel Blogs: Number of blogs to download in parallel. + +Most likely you don't have to change any of the other connection settings. + ### Current Limitations: ### * The old datasets from TumblTwo and TumblOne are NOT compatible yet. diff --git a/src/TumblThree/TumblThree.Applications/Controllers/ManagerController.cs b/src/TumblThree/TumblThree.Applications/Controllers/ManagerController.cs index a04d137..12674ac 100644 --- a/src/TumblThree/TumblThree.Applications/Controllers/ManagerController.cs +++ b/src/TumblThree/TumblThree.Applications/Controllers/ManagerController.cs @@ -257,7 +257,10 @@ private void EnqueueAutoDownload() private bool CanAddBlog() { - return Validator.IsValidTumblrUrl(crawlerService.NewBlogUrl) || Validator.IsValidTumblrLikedByUrl(crawlerService.NewBlogUrl) || Validator.IsValidTumblrSearchUrl(crawlerService.NewBlogUrl); + return Validator.IsValidTumblrUrl(crawlerService.NewBlogUrl) + || Validator.IsValidTumblrLikedByUrl(crawlerService.NewBlogUrl) + || Validator.IsValidTumblrSearchUrl(crawlerService.NewBlogUrl) + || Validator.IsValidTumblrTaggedUrl(crawlerService.NewBlogUrl); } private async Task AddBlog() diff --git a/src/TumblThree/TumblThree.Applications/Downloader/DownloaderFactory.cs b/src/TumblThree/TumblThree.Applications/Downloader/DownloaderFactory.cs index 5dae26c..6bd81c3 100644 --- a/src/TumblThree/TumblThree.Applications/Downloader/DownloaderFactory.cs +++ b/src/TumblThree/TumblThree.Applications/Downloader/DownloaderFactory.cs @@ -47,8 +47,10 @@ public IDownloader GetDownloader(BlogTypes blogtype, CancellationToken ct, Pause return new TumblrPrivateDownloader(shellService, ct, pt, progress, new PostCounter(blog), GetFileDownloader(ct), crawlerService, blog, LoadFiles(blog)); case BlogTypes.tlb: return new TumblrLikedByDownloader(shellService, ct, pt, progress, new PostCounter(blog), GetFileDownloader(ct), crawlerService, blog, LoadFiles(blog)); - case BlogTypes.ts: + case BlogTypes.tumblrsearch: return new TumblrSearchDownloader(shellService, ct, pt, progress, new PostCounter(blog), GetFileDownloader(ct), crawlerService, blog, LoadFiles(blog)); + case BlogTypes.tumblrtagged: + return new TumblrTaggedDownloader(shellService, ct, pt, progress, new PostCounter(blog), GetFileDownloader(ct), crawlerService, blog, LoadFiles(blog)); default: throw new ArgumentException("Website is not supported!", "blogType"); } diff --git a/src/TumblThree/TumblThree.Applications/Downloader/TumblrLikedByDownloader.cs b/src/TumblThree/TumblThree.Applications/Downloader/TumblrLikedByDownloader.cs index fbed4a3..5371890 100644 --- a/src/TumblThree/TumblThree.Applications/Downloader/TumblrLikedByDownloader.cs +++ b/src/TumblThree/TumblThree.Applications/Downloader/TumblrLikedByDownloader.cs @@ -119,6 +119,13 @@ private async Task GetUrlsAsync() var semaphoreSlim = new SemaphoreSlim(shellService.Settings.ParallelScans); var trackedTasks = new List(); + if (!await CheckIfLoggedIn()) + { + Logger.Error("TumblrLikedByDownloader:GetUrlsAsync: {0}", "User not logged in"); + shellService.ShowError(new Exception("User not logged in"), Resources.NotLoggedIn, blog.Name); + return; + } + foreach (int crawlerNumber in Enumerable.Range(0, shellService.Settings.ParallelScans)) { await semaphoreSlim.WaitAsync(); @@ -128,13 +135,6 @@ private async Task GetUrlsAsync() try { string document = await RequestDataAsync(blog.Url + "/page/" + crawlerNumber); - if (!CheckIfLoggedIn(document)) - { - Logger.Error("TumblrLikedByDownloader:GetUrlsAsync: {0}", "User not logged in"); - shellService.ShowError(new Exception("User not logged in"), Resources.NotLoggedIn, blog.Name); - return; - } - await AddUrlsToDownloadList(document, crawlerNumber); } catch @@ -156,8 +156,9 @@ private async Task GetUrlsAsync() } } - private bool CheckIfLoggedIn(string document) + private async Task CheckIfLoggedIn() { + string document = await RequestDataAsync(blog.Url + "/page/1"); return !document.Contains("
tags, in var jsonDeserializer = new System.Web.Script.Serialization.JavaScriptSerializer { MaxJsonLength = 2147483644 }; var result = jsonDeserializer.Deserialize(response); - if (result.response.posts_html == null) + if (string.IsNullOrEmpty(result.response.posts_html)) { return; } diff --git a/src/TumblThree/TumblThree.Applications/Downloader/TumblrTaggedDownloader.cs b/src/TumblThree/TumblThree.Applications/Downloader/TumblrTaggedDownloader.cs new file mode 100644 index 0000000..5185eb9 --- /dev/null +++ b/src/TumblThree/TumblThree.Applications/Downloader/TumblrTaggedDownloader.cs @@ -0,0 +1,492 @@ +using System; +using System.Collections.Generic; +using System.ComponentModel.Composition; +using System.IO; +using System.Linq; +using System.Net; +using System.Text; +using System.Text.RegularExpressions; +using System.Threading; +using System.Threading.Tasks; + +using TumblThree.Applications.DataModels; +using TumblThree.Applications.Properties; +using TumblThree.Applications.Services; +using TumblThree.Domain; +using TumblThree.Domain.Models; + +namespace TumblThree.Applications.Downloader +{ + [Export(typeof(IDownloader))] + [ExportMetadata("BlogType", BlogTypes.tumblrtagged)] + public class TumblrTaggedDownloader : Downloader, IDownloader + { + private int numberOfPagesCrawled = 0; + + public TumblrTaggedDownloader(IShellService shellService, CancellationToken ct, PauseToken pt, IProgress progress, PostCounter counter, FileDownloader fileDownloader, ICrawlerService crawlerService, IBlog blog, IFiles files) + : base(shellService, ct, pt, progress, counter, fileDownloader, crawlerService, blog, files) + { + } + + public async Task Crawl() + { + Logger.Verbose("TumblrTaggedDownloader.Crawl:Start"); + + Task grabber = GetUrlsAsync(); + Task downloader = DownloadBlogAsync(); + + await grabber; + + UpdateProgressQueueInformation(Resources.ProgressUniqueDownloads); + blog.DuplicatePhotos = DetermineDuplicates(PostTypes.Photo); + blog.DuplicateVideos = DetermineDuplicates(PostTypes.Video); + blog.DuplicateAudios = DetermineDuplicates(PostTypes.Audio); + blog.TotalCount = (blog.TotalCount - blog.DuplicatePhotos - blog.DuplicateAudios - blog.DuplicateVideos); + + CleanCollectedBlogStatistics(); + + await downloader; + + if (!ct.IsCancellationRequested) + { + blog.LastCompleteCrawl = DateTime.Now; + } + + blog.Save(); + + UpdateProgressQueueInformation(""); + } + + private string ImageSize() + { + if (shellService.Settings.ImageSize == "raw") + return "1280"; + return shellService.Settings.ImageSize; + } + + private string ResizeTumblrImageUrl(string imageUrl) + { + var sb = new StringBuilder(imageUrl); + return sb + .Replace("_raw", "_" + ImageSize()) + .Replace("_1280", "_" + ImageSize()) + .Replace("_540", "_" + ImageSize()) + .Replace("_500", "_" + ImageSize()) + .Replace("_400", "_" + ImageSize()) + .Replace("_250", "_" + ImageSize()) + .Replace("_100", "_" + ImageSize()) + .Replace("_75sq", "_" + ImageSize()) + .ToString(); + } + + protected override bool CheckIfFileExistsInDirectory(string url) + { + string fileName = url.Split('/').Last(); + Monitor.Enter(lockObjectDirectory); + string blogPath = blog.DownloadLocation(); + if (Directory.EnumerateFiles(blogPath).Any(file => file.Contains(fileName))) + { + Monitor.Exit(lockObjectDirectory); + return true; + } + Monitor.Exit(lockObjectDirectory); + return false; + } + + private int DetermineDuplicates(PostTypes type) + { + return statisticsBag.Where(url => url.PostType.Equals(type)) + .GroupBy(url => url.Url) + .Where(g => g.Count() > 1) + .Sum(g => g.Count() - 1); + } + + private IEnumerable GetPageNumbers() + { + if (!TestRange(blog.PageSize, 1, 100)) + blog.PageSize = 100; + + if (string.IsNullOrEmpty(blog.DownloadPages)) + { + return Enumerable.Range(0, shellService.Settings.ParallelScans); + } + return RangeToSequence(blog.DownloadPages); + } + + private static bool TestRange(int numberToCheck, int bottom, int top) + { + return (numberToCheck >= bottom && numberToCheck <= top); + } + + static IEnumerable RangeToSequence(string input) + { + string[] parts = input.Split(','); + foreach (string part in parts) + { + if (!part.Contains('-')) + { + yield return int.Parse(part); + continue; + } + string[] rangeParts = part.Split('-'); + int start = int.Parse(rangeParts[0]); + int end = int.Parse(rangeParts[1]); + + while (start <= end) + { + yield return start; + start++; + } + } + } + + + private ulong GetLastPostId() + { + ulong lastId = blog.LastId; + if (blog.ForceRescan) + { + blog.ForceRescan = false; + return 0; + } + if (!string.IsNullOrEmpty(blog.DownloadPages)) + { + blog.ForceRescan = false; + return 0; + } + return lastId; + } + + private static bool CheckPostAge(TumblrJson document, ulong lastId) + { + ulong highestPostId = 0; + ulong.TryParse(document.response.posts.FirstOrDefault().id, + out highestPostId); + + if (highestPostId < lastId) + { + return false; + } + return true; + } + + protected override bool CheckIfFileExistsInDB(string url) + { + string fileName = url.Split('/').Last(); + Monitor.Enter(lockObjectDb); + if (files.Links.Contains(fileName)) + { + Monitor.Exit(lockObjectDb); + return true; + } + Monitor.Exit(lockObjectDb); + return false; + } + + private async Task GetUrlsAsync() + { + var semaphoreSlim = new SemaphoreSlim(shellService.Settings.ParallelScans); + var trackedTasks = new List(); + + if (!await CheckIfLoggedIn()) + { + Logger.Error("TumblrLikedByDownloader:GetUrlsAsync: {0}", "User not logged in"); + shellService.ShowError(new Exception("User not logged in"), Resources.NotLoggedIn, blog.Name); + return; + } + + long crawlerTimeOffset = GenerateCrawlerTimeOffsets(); + + foreach (int crawlerNumber in Enumerable.Range(0, shellService.Settings.ParallelScans)) + { + await semaphoreSlim.WaitAsync(); + + trackedTasks.Add(new Func(async () => + { + var tags = new List(); + if (!string.IsNullOrWhiteSpace(blog.Tags)) + { + tags = blog.Tags.Split(',').Select(x => x.Trim()).ToList(); + } + + try + { + long crawlerTime = DateTimeOffset.Now.ToUnixTimeSeconds() - (crawlerNumber * crawlerTimeOffset); + await AddUrlsToDownloadList(tags, crawlerTime); + } + catch + { + } + finally + { + semaphoreSlim.Release(); + } + })()); + } + await Task.WhenAll(trackedTasks); + + producerConsumerCollection.CompleteAdding(); + + if (!ct.IsCancellationRequested) + { + UpdateBlogStats(); + } + } + + private long GenerateCrawlerTimeOffsets() + { + long tagsIntroduced = 1173570824; // Unix time of 03/10/2007 @ 11:53pm (UTC) + long unixTimeNow = DateTimeOffset.Now.ToUnixTimeSeconds(); + long tagsLifeTime = unixTimeNow - tagsIntroduced; + return tagsLifeTime / shellService.Settings.ParallelScans; + } + + private async Task CheckIfLoggedIn() + { + string document = await GetTaggedSearchPageAsync(DateTimeOffset.Now.ToUnixTimeSeconds()); + return !document.Contains("SearchResultsModel"); + } + + private long ExtractNextPageLink(string document) + { + long unixTime = 0; + string pagination = "id=\"next_page_link\" href=\"/tagged/" + blog.Name + "\\?before="; + long.TryParse(Regex.Match(document, pagination + "([\\d]*)\"").Groups[1].Value, out unixTime); + return unixTime; + } + + private async Task GetTaggedSearchPageAsync(long time) + { + if (shellService.Settings.LimitConnections) + { + return await RequestGetAsync(time); + } + return await RequestGetAsync(time); + } + + protected virtual async Task RequestGetAsync(long time) + { + var requestRegistration = new CancellationTokenRegistration(); + try + { + HttpWebRequest request = CreateGetReqeust(time); + + requestRegistration = ct.Register(() => request.Abort()); + using (var response = await request.GetResponseAsync() as HttpWebResponse) + { + using (var stream = GetStreamForApiRequest(response.GetResponseStream())) + { + using (var buffer = new BufferedStream(stream)) + { + using (var reader = new StreamReader(buffer)) + { + return reader.ReadToEnd(); + } + } + } + } + } + finally + { + requestRegistration.Dispose(); + } + } + + protected HttpWebRequest CreateGetReqeust(long pagination) + { + string url = "https://www.tumblr.com/tagged/" + blog.Name + "?before=" + pagination; + var request = (HttpWebRequest)WebRequest.Create(url); + request.Method = "GET"; + request.ProtocolVersion = HttpVersion.Version11; + request.UserAgent = + "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36"; + request.AllowAutoRedirect = true; + request.KeepAlive = true; + request.Pipelined = true; + request.AutomaticDecompression = DecompressionMethods.GZip | DecompressionMethods.Deflate; + // Timeouts don't work with GetResponseAsync() as it internally uses BeginGetResponse. + // See docs: https://msdn.microsoft.com/en-us/library/system.net.httpwebrequest.timeout(v=vs.110).aspx + // Quote: The Timeout property has no effect on asynchronous requests made with the BeginGetResponse or BeginGetRequestStream method. + // TODO: Use HttpClient instead? + request.ReadWriteTimeout = shellService.Settings.TimeOut * 1000; + request.Timeout = -1; + request.CookieContainer = SharedCookieService.GetUriCookieContainer(new Uri("https://www.tumblr.com/")); + ServicePointManager.DefaultConnectionLimit = 400; + request = SetWebRequestProxy(request, shellService.Settings); + //request.Referer = @"https://www.tumblr.com/tagged/" + blog.Name + "?before=" oldPagination; + request.Headers["DNT"] = "1"; + return request; + } + + private async Task AddUrlsToDownloadList(IList tags, long time) + { + while (true) + { + if (ct.IsCancellationRequested) + { + return; + } + if (pt.IsPaused) + { + pt.WaitWhilePausedWithResponseAsyc().Wait(); + } + + string document = await GetTaggedSearchPageAsync(time); + if (document.Contains("No posts found.")) //
+ { + return; + } + + try + { + AddPhotoUrlToDownloadList(document, tags); + AddVideoUrlToDownloadList(document, tags); + } + catch (NullReferenceException) + { + } + + Interlocked.Increment(ref numberOfPagesCrawled); + UpdateProgressQueueInformation(Resources.ProgressGetUrlShort, numberOfPagesCrawled); + time = ExtractNextPageLink(document); + } + } + + protected override async Task DownloadPhotoAsync(TumblrPost downloadItem) + { + string url = Url(downloadItem); + + if (blog.ForceSize) + { + url = ResizeTumblrImageUrl(url); + } + + foreach (string host in shellService.Settings.TumblrHosts) + { + url = BuildRawImageUrl(url, host); + if (await DownloadDetectedImageUrl(url, PostDate(downloadItem))) + return; + } + + await DownloadDetectedImageUrl(Url(downloadItem), PostDate(downloadItem)); + } + + private async Task DownloadDetectedImageUrl(string url, DateTime postDate) + { + if (!(CheckIfFileExistsInDB(url) || CheckIfBlogShouldCheckDirectory(GetCoreImageUrl(url)))) + { + string blogDownloadLocation = blog.DownloadLocation(); + string fileName = url.Split('/').Last(); + string fileLocation = FileLocation(blogDownloadLocation, fileName); + string fileLocationUrlList = FileLocationLocalized(blogDownloadLocation, Resources.FileNamePhotos); + UpdateProgressQueueInformation(Resources.ProgressDownloadImage, fileName); + if (await DownloadBinaryFile(fileLocation, fileLocationUrlList, url)) + { + SetFileDate(fileLocation, postDate); + UpdateBlogPostCount(ref counter.Photos, value => blog.DownloadedPhotos = value); + UpdateBlogProgress(ref counter.TotalDownloads); + UpdateBlogDB(fileName); + if (shellService.Settings.EnablePreview) + { + if (!fileName.EndsWith(".gif")) + { + blog.LastDownloadedPhoto = Path.GetFullPath(fileLocation); + } + else + { + blog.LastDownloadedVideo = Path.GetFullPath(fileLocation); + } + } + return true; + } + return false; + } + return true; + } + + /// + /// Builds a tumblr raw image url from any sized tumblr image url if the ImageSize is set to "raw". + /// + /// The url detected from the crawler. + /// Hostname to insert in the original url. + /// + public string BuildRawImageUrl(string url, string host) + { + if (shellService.Settings.ImageSize == "raw") + { + string path = new Uri(url).LocalPath.TrimStart('/'); + var imageDimension = new Regex("_\\d+"); + path = imageDimension.Replace(path, "_raw"); + return "https://" + host + "/" + path; + } + return url; + } + + private void AddPhotoUrlToDownloadList(string document, IList tags) + { + if (blog.DownloadPhoto) + { + var regex = new Regex("\"(http[A-Za-z0-9_/:.]*media.tumblr.com[A-Za-z0-9_/:.]*(jpg|png|gif))\""); + foreach (Match match in regex.Matches(document)) + { + string imageUrl = match.Groups[1].Value; + if (imageUrl.Contains("avatar") || imageUrl.Contains("previews")) + continue; + if (blog.SkipGif && imageUrl.EndsWith(".gif")) + { + continue; + } + imageUrl = ResizeTumblrImageUrl(imageUrl); + // TODO: postID + AddToDownloadList(new TumblrPost(PostTypes.Photo, imageUrl, Guid.NewGuid().ToString("N"))); + } + } + } + + private void AddVideoUrlToDownloadList(string document, IList tags) + { + if (blog.DownloadVideo) + { + var regex = new Regex("\"(http[A-Za-z0-9_/:.]*.com/video_file/[A-Za-z0-9_/:.]*)\""); + foreach (Match match in regex.Matches(document)) + { + string videoUrl = match.Groups[1].Value; + // TODO: postId + if (shellService.Settings.VideoSize == 1080) + { + // TODO: postID + AddToDownloadList(new TumblrPost(PostTypes.Video, videoUrl.Replace("/480", "") + ".mp4", Guid.NewGuid().ToString("N"))); + } + else if (shellService.Settings.VideoSize == 480) + { + // TODO: postID + AddToDownloadList(new TumblrPost(PostTypes.Video, + "https://vt.tumblr.com/" + videoUrl.Replace("/480", "").Split('/').Last() + "_480.mp4", + Guid.NewGuid().ToString("N"))); + } + } + } + } + + private void UpdateBlogStats() + { + blog.TotalCount = statisticsBag.Count; + blog.Photos = statisticsBag.Count(url => url.PostType.Equals(PostTypes.Photo)); + blog.Videos = statisticsBag.Count(url => url.PostType.Equals(PostTypes.Video)); + blog.Audios = statisticsBag.Count(url => url.PostType.Equals(PostTypes.Audio)); + blog.Texts = statisticsBag.Count(url => url.PostType.Equals(PostTypes.Text)); + blog.Conversations = statisticsBag.Count(url => url.PostType.Equals(PostTypes.Conversation)); + blog.Quotes = statisticsBag.Count(url => url.PostType.Equals(PostTypes.Quote)); + blog.NumberOfLinks = statisticsBag.Count(url => url.PostType.Equals(PostTypes.Link)); + blog.PhotoMetas = statisticsBag.Count(url => url.PostType.Equals(PostTypes.PhotoMeta)); + blog.VideoMetas = statisticsBag.Count(url => url.PostType.Equals(PostTypes.VideoMeta)); + blog.AudioMetas = statisticsBag.Count(url => url.PostType.Equals(PostTypes.AudioMeta)); + } + + private void AddToDownloadList(TumblrPost addToList) + { + producerConsumerCollection.Add(addToList); + statisticsBag.Add(addToList); + } + } +} diff --git a/src/TumblThree/TumblThree.Applications/TumblThree.Applications.csproj b/src/TumblThree/TumblThree.Applications/TumblThree.Applications.csproj index 6dbd5c0..2e788e3 100644 --- a/src/TumblThree/TumblThree.Applications/TumblThree.Applications.csproj +++ b/src/TumblThree/TumblThree.Applications/TumblThree.Applications.csproj @@ -80,6 +80,7 @@ + diff --git a/src/TumblThree/TumblThree.Domain/Models/BlogFactory.cs b/src/TumblThree/TumblThree.Domain/Models/BlogFactory.cs index 12f4c3a..804a6b3 100644 --- a/src/TumblThree/TumblThree.Domain/Models/BlogFactory.cs +++ b/src/TumblThree/TumblThree.Domain/Models/BlogFactory.cs @@ -18,7 +18,9 @@ public IBlog GetBlog(string blogUrl, string path) if (Validator.IsValidTumblrLikedByUrl(blogUrl)) return new TumblrLikeByBlog(blogUrl, path, BlogTypes.tlb); if (Validator.IsValidTumblrSearchUrl(blogUrl)) - return new TumblrSearchBlog(blogUrl, path, BlogTypes.ts); + return new TumblrSearchBlog(blogUrl, path, BlogTypes.tumblrsearch); + if (Validator.IsValidTumblrTaggedUrl(blogUrl)) + return new TumblrSearchBlog(blogUrl, path, BlogTypes.tumblrtagged); throw new ArgumentException("Website is not supported!", nameof(blogUrl)); } } diff --git a/src/TumblThree/TumblThree.Domain/Models/BlogTypes.cs b/src/TumblThree/TumblThree.Domain/Models/BlogTypes.cs index 193cf4e..ce1ebf1 100644 --- a/src/TumblThree/TumblThree.Domain/Models/BlogTypes.cs +++ b/src/TumblThree/TumblThree.Domain/Models/BlogTypes.cs @@ -7,6 +7,7 @@ public enum BlogTypes instagram, twitter, tlb, - ts + tumblrsearch, + tumblrtagged } } diff --git a/src/TumblThree/TumblThree.Domain/Models/TumblrTaggedBlog.cs b/src/TumblThree/TumblThree.Domain/Models/TumblrTaggedBlog.cs new file mode 100644 index 0000000..cc19284 --- /dev/null +++ b/src/TumblThree/TumblThree.Domain/Models/TumblrTaggedBlog.cs @@ -0,0 +1,26 @@ +using System.Runtime.Serialization; + +namespace TumblThree.Domain.Models +{ + [DataContract] + public class TumblrTaggedBlog : Blog + { + public TumblrTaggedBlog(string url, string location, BlogTypes blogType) : base(url, location, blogType) + { + } + + protected override string ExtractName() + { + return Url.Split('/')[4]; + } + + protected override string ExtractUrl() + { + if (Url.StartsWith("http://")) + Url = Url.Insert(4, "s"); + int blogNameLength = Url.Split('/')[4].Length; + var urlLength = 30; + return Url.Substring(0, blogNameLength + urlLength); + } + } +} diff --git a/src/TumblThree/TumblThree.Domain/Models/Validator.cs b/src/TumblThree/TumblThree.Domain/Models/Validator.cs index b4d7404..927e6be 100644 --- a/src/TumblThree/TumblThree.Domain/Models/Validator.cs +++ b/src/TumblThree/TumblThree.Domain/Models/Validator.cs @@ -21,5 +21,11 @@ public static bool IsValidTumblrSearchUrl(string url) return url != null && url.Length > 29 && url.Contains("www.tumblr.com/search/") && !url.Any(char.IsWhiteSpace) && (url.StartsWith("http://", true, null) || url.StartsWith("https://", true, null)); } + + public static bool IsValidTumblrTaggedUrl(string url) + { + return url != null && url.Length > 29 && url.Contains("www.tumblr.com/tagged/") && !url.Any(char.IsWhiteSpace) && + (url.StartsWith("http://", true, null) || url.StartsWith("https://", true, null)); + } } } diff --git a/src/TumblThree/TumblThree.Domain/TumblThree.Domain.csproj b/src/TumblThree/TumblThree.Domain/TumblThree.Domain.csproj index 857575f..e1c21a6 100644 --- a/src/TumblThree/TumblThree.Domain/TumblThree.Domain.csproj +++ b/src/TumblThree/TumblThree.Domain/TumblThree.Domain.csproj @@ -72,6 +72,7 @@ +