Skip to content

Commit 05eaf34

Browse files
committed
Create RestClient once rather than on every request
1 parent 6248312 commit 05eaf34

File tree

5 files changed

+36
-21
lines changed

5 files changed

+36
-21
lines changed

LinkCrawler/LinkCrawler.Tests/LinkCrawlerTests.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@ public void CrawlForLinksInResponse_ResponseModelWithMarkup_ValidUrlFoundInMarku
4949
var mockResponseModel = new Mock<IResponseModel>();
5050
mockResponseModel.Setup(x => x.Markup).Returns(markup);
5151

52-
LinkCrawler.CrawlForLinksInResponse(mockResponseModel.Object);
52+
LinkCrawler.CrawlLinksInResponse(mockResponseModel.Object);
5353
Assert.That(LinkCrawler.UrlList.Where(l=>l.Address == url).Count() > 0);
5454
}
5555
}

LinkCrawler/LinkCrawler/LinkCrawler.cs

+30-17
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,12 @@ public class LinkCrawler
1717
public string BaseUrl { get; set; }
1818
public bool CheckImages { get; set; }
1919
public bool FollowRedirects { get; set; }
20-
public RestRequest RestRequest { get; set; }
20+
private RestRequest GetRequest { get; set; }
21+
private RestClient Client{ get; set; }
2122
public IEnumerable<IOutput> Outputs { get; set; }
2223
public IValidUrlParser ValidUrlParser { get; set; }
2324
public bool OnlyReportBrokenLinksToOutput { get; set; }
24-
public static List<LinkModel> UrlList;
25+
public List<LinkModel> UrlList;
2526
private ISettings _settings;
2627
private Stopwatch timer;
2728

@@ -33,7 +34,10 @@ public LinkCrawler(IEnumerable<IOutput> outputs, IValidUrlParser validUrlParser,
3334
CheckImages = settings.CheckImages;
3435
FollowRedirects = settings.FollowRedirects;
3536
UrlList = new List<LinkModel>();
36-
RestRequest = new RestRequest(Method.GET).SetHeader("Accept", "*/*");
37+
GetRequest = new RestRequest(Method.GET).SetHeader("Accept", "*/*");
38+
Client = new RestClient() { FollowRedirects = false }; // we don't want RestSharp following the redirects, otherwise we won't see them
39+
// https://stackoverflow.com/questions/8823349/how-do-i-use-the-cookie-container-with-restsharp-and-asp-net-sessions - set cookies up according to this link?
40+
3741
OnlyReportBrokenLinksToOutput = settings.OnlyReportBrokenLinksToOutput;
3842
_settings = settings;
3943
this.timer = new Stopwatch();
@@ -49,9 +53,9 @@ public void Start()
4953
public void SendRequest(string crawlUrl, string referrerUrl = "")
5054
{
5155
var requestModel = new RequestModel(crawlUrl, referrerUrl, BaseUrl);
52-
var restClient = new RestClient(new Uri(crawlUrl)) { FollowRedirects = false }; // we don't want RestSharp following the redirects, otherwise we won't see them
56+
Client.BaseUrl = new Uri(crawlUrl);
5357

54-
restClient.ExecuteAsync(RestRequest, response =>
58+
Client.ExecuteAsync(GetRequest, response =>
5559
{
5660
if (response == null)
5761
return;
@@ -67,25 +71,34 @@ public void ProcessResponse(IResponseModel responseModel)
6771

6872
// follow 3xx redirects
6973
if (FollowRedirects && responseModel.IsRedirect)
70-
{
71-
string redirectUrl;
72-
if (responseModel.Location.StartsWith("/"))
73-
redirectUrl = responseModel.RequestedUrl.GetUrlBase() + responseModel.Location;
74-
else
75-
redirectUrl = responseModel.Location;
76-
SendRequest(redirectUrl, responseModel.RequestedUrl);
77-
}
74+
FollowRedirect(responseModel);
7875

7976
// follow internal links in response
8077
if (responseModel.ShouldCrawl)
81-
CrawlForLinksInResponse(responseModel);
78+
CrawlLinksInResponse(responseModel);
79+
}
80+
81+
private void FollowRedirect(IResponseModel responseModel)
82+
{
83+
string redirectUrl;
84+
if (responseModel.Location.StartsWith("/"))
85+
redirectUrl = responseModel.RequestedUrl.GetUrlBase() + responseModel.Location; // add base URL to relative links
86+
else
87+
redirectUrl = responseModel.Location;
88+
89+
SendRequest(redirectUrl, responseModel.RequestedUrl);
8290
}
8391

84-
public void CrawlForLinksInResponse(IResponseModel responseModel)
92+
public void CrawlLinksInResponse(IResponseModel responseModel)
8593
{
8694
var linksFoundInMarkup = MarkupHelpers.GetValidUrlListFromMarkup(responseModel.Markup, ValidUrlParser, CheckImages);
8795

88-
foreach (var url in linksFoundInMarkup)
96+
SendRequestsToLinks(linksFoundInMarkup, responseModel.RequestedUrl);
97+
}
98+
99+
private void SendRequestsToLinks(List<string> urls, string referrerUrl)
100+
{
101+
foreach (string url in urls)
89102
{
90103
lock (UrlList)
91104
{
@@ -94,7 +107,7 @@ public void CrawlForLinksInResponse(IResponseModel responseModel)
94107

95108
UrlList.Add(new LinkModel(url));
96109
}
97-
SendRequest(url, responseModel.RequestedUrl);
110+
SendRequest(url, referrerUrl);
98111
}
99112
}
100113

LinkCrawler/LinkCrawler/Models/RequestModel.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ public class RequestModel
77
public string Url;
88
public string ReferrerUrl;
99
public bool IsInternalUrl { get; set; }
10-
public RestClient Client;
10+
private readonly RestClient Client;
1111

1212
public RequestModel(string url, string referrerUrl, string baseUrl)
1313
{

LinkCrawler/LinkCrawler/Models/ResponseModel.cs

+4-1
Original file line numberDiff line numberDiff line change
@@ -13,14 +13,15 @@ public class ResponseModel : IResponseModel
1313
public string RequestedUrl { get; }
1414
public string ReferrerUrl { get; }
1515
public string Location { get; }
16+
public IList<RestResponseCookie> Cookies { get; }
1617

1718
public HttpStatusCode StatusCode { get; }
1819
public int StatusCodeNumber { get { return (int)StatusCode; } }
1920
public bool IsSuccess { get; }
2021
public bool IsInteresting { get; }
2122
public bool IsRedirect { get; }
2223
public bool ShouldCrawl { get; }
23-
public string ErrorMessage { get; }
24+
private string ErrorMessage { get; }
2425

2526
public ResponseModel(IRestResponse restResponse, RequestModel requestModel, ISettings settings)
2627
{
@@ -29,6 +30,8 @@ public ResponseModel(IRestResponse restResponse, RequestModel requestModel, ISet
2930
RequestedUrl = requestModel.Url;
3031
Location = restResponse.GetHeaderByName("Location"); // returns null if no Location header present in the response
3132
ErrorMessage = restResponse.ErrorMessage;
33+
Cookies = restResponse.Cookies;
34+
3235
IsSuccess = settings.IsSuccess(StatusCode);
3336
IsInteresting = settings.IsInteresting(StatusCode);
3437
IsRedirect = settings.IsRedirect(StatusCode);

LinkCrawler/LinkCrawler/Program.cs

-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ class Program
1111
{
1212
static void Main(string[] args)
1313
{
14-
1514
using (var container = Container.For<StructureMapRegistry>())
1615
{
1716
var linkCrawler = container.GetInstance<LinkCrawler>();

0 commit comments

Comments
 (0)