@@ -17,11 +17,12 @@ public class LinkCrawler
17
17
public string BaseUrl { get ; set ; }
18
18
public bool CheckImages { get ; set ; }
19
19
public bool FollowRedirects { get ; set ; }
20
- public RestRequest RestRequest { get ; set ; }
20
+ private RestRequest GetRequest { get ; set ; }
21
+ private RestClient Client { get ; set ; }
21
22
public IEnumerable < IOutput > Outputs { get ; set ; }
22
23
public IValidUrlParser ValidUrlParser { get ; set ; }
23
24
public bool OnlyReportBrokenLinksToOutput { get ; set ; }
24
- public static List < LinkModel > UrlList ;
25
+ public List < LinkModel > UrlList ;
25
26
private ISettings _settings ;
26
27
private Stopwatch timer ;
27
28
@@ -33,7 +34,10 @@ public LinkCrawler(IEnumerable<IOutput> outputs, IValidUrlParser validUrlParser,
33
34
CheckImages = settings . CheckImages ;
34
35
FollowRedirects = settings . FollowRedirects ;
35
36
UrlList = new List < LinkModel > ( ) ;
36
- RestRequest = new RestRequest ( Method . GET ) . SetHeader ( "Accept" , "*/*" ) ;
37
+ GetRequest = new RestRequest ( Method . GET ) . SetHeader ( "Accept" , "*/*" ) ;
38
+ Client = new RestClient ( ) { FollowRedirects = false } ; // we don't want RestSharp following the redirects, otherwise we won't see them
39
+ // https://stackoverflow.com/questions/8823349/how-do-i-use-the-cookie-container-with-restsharp-and-asp-net-sessions - set cookies up according to this link?
40
+
37
41
OnlyReportBrokenLinksToOutput = settings . OnlyReportBrokenLinksToOutput ;
38
42
_settings = settings ;
39
43
this . timer = new Stopwatch ( ) ;
@@ -49,9 +53,9 @@ public void Start()
49
53
public void SendRequest ( string crawlUrl , string referrerUrl = "" )
50
54
{
51
55
var requestModel = new RequestModel ( crawlUrl , referrerUrl , BaseUrl ) ;
52
- var restClient = new RestClient ( new Uri ( crawlUrl ) ) { FollowRedirects = false } ; // we don't want RestSharp following the redirects, otherwise we won't see them
56
+ Client . BaseUrl = new Uri ( crawlUrl ) ;
53
57
54
- restClient . ExecuteAsync ( RestRequest , response =>
58
+ Client . ExecuteAsync ( GetRequest , response =>
55
59
{
56
60
if ( response == null )
57
61
return ;
@@ -67,25 +71,34 @@ public void ProcessResponse(IResponseModel responseModel)
67
71
68
72
// follow 3xx redirects
69
73
if ( FollowRedirects && responseModel . IsRedirect )
70
- {
71
- string redirectUrl ;
72
- if ( responseModel . Location . StartsWith ( "/" ) )
73
- redirectUrl = responseModel . RequestedUrl . GetUrlBase ( ) + responseModel . Location ;
74
- else
75
- redirectUrl = responseModel . Location ;
76
- SendRequest ( redirectUrl , responseModel . RequestedUrl ) ;
77
- }
74
+ FollowRedirect ( responseModel ) ;
78
75
79
76
// follow internal links in response
80
77
if ( responseModel . ShouldCrawl )
81
- CrawlForLinksInResponse ( responseModel ) ;
78
+ CrawlLinksInResponse ( responseModel ) ;
79
+ }
80
+
81
+ private void FollowRedirect ( IResponseModel responseModel )
82
+ {
83
+ string redirectUrl ;
84
+ if ( responseModel . Location . StartsWith ( "/" ) )
85
+ redirectUrl = responseModel . RequestedUrl . GetUrlBase ( ) + responseModel . Location ; // add base URL to relative links
86
+ else
87
+ redirectUrl = responseModel . Location ;
88
+
89
+ SendRequest ( redirectUrl , responseModel . RequestedUrl ) ;
82
90
}
83
91
84
- public void CrawlForLinksInResponse ( IResponseModel responseModel )
92
+ public void CrawlLinksInResponse ( IResponseModel responseModel )
85
93
{
86
94
var linksFoundInMarkup = MarkupHelpers . GetValidUrlListFromMarkup ( responseModel . Markup , ValidUrlParser , CheckImages ) ;
87
95
88
- foreach ( var url in linksFoundInMarkup )
96
+ SendRequestsToLinks ( linksFoundInMarkup , responseModel . RequestedUrl ) ;
97
+ }
98
+
99
+ private void SendRequestsToLinks ( List < string > urls , string referrerUrl )
100
+ {
101
+ foreach ( string url in urls )
89
102
{
90
103
lock ( UrlList )
91
104
{
@@ -94,7 +107,7 @@ public void CrawlForLinksInResponse(IResponseModel responseModel)
94
107
95
108
UrlList . Add ( new LinkModel ( url ) ) ;
96
109
}
97
- SendRequest ( url , responseModel . RequestedUrl ) ;
110
+ SendRequest ( url , referrerUrl ) ;
98
111
}
99
112
}
100
113
0 commit comments