diff --git a/CHANGELOG.md b/CHANGELOG.md
index f0699d7..807d6a4 100755
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -31,6 +31,7 @@
- Updated regex limit [#148](https://github.com/LeonardoCardoso/SwiftLinkPreview/issues/148)
- Changed by [kinhvodoi92](https://github.com/kinhvodoi92)
- Annotated `Cancelable.cancel()` as `@objc` to make it compatibale with Objective-C [#135](https://github.com/LeonardoCardoso/SwiftLinkPreview/issues/135)
+- Capture base URL [#45](https://github.com/LeonardoCardoso/SwiftLinkPreview/issues/45)
- Changed by [LeonardoCardoso](https://github.com/LeonardoCardoso)
diff --git a/README.md b/README.md
index e5f1435..c2e23ca 100644
--- a/README.md
+++ b/README.md
@@ -121,16 +121,17 @@ let preview = slp.preview("Text containing URL",
```swift
Response {
- let url: URL // URL
- let finalUrl: URL // unshortened URL
- let canonicalUrl: String // canonical URL
- let title: String // title
- let description: String // page description or relevant text
- let images: [String] // array of URLs of the images
- let image: String // main image
- let icon: String // favicon
- let video: String // video
- let price: String // price
+ let baseURL: String? // base
+ let url: URL? // URL
+ let finalUrl: URL? // unshortened URL
+ let canonicalUrl: String? // canonical URL
+ let title: String? // title
+ let description: String? // page description or relevant text
+ let images: [String]? // array of URLs of the images
+ let image: String? // main image
+ let icon: String? // favicon
+ let video: String? // video
+ let price: String? // price
}
```
diff --git a/Sources/Regex.swift b/Sources/Regex.swift
index 843db2b..06c1738 100644
--- a/Sources/Regex.swift
+++ b/Sources/Regex.swift
@@ -17,8 +17,9 @@ class Regex {
static let imageTagPattern = ""
static let secondaryImageTagPattern = "og:image\"(.+?)content=\"([^\"](.+?))\"(.+?)[/]?>"
static let titlePattern = "
(.*?)"
- static let metatagPattern = ""
- static let metatagContentPattern = "content=(\"(.*?)\")|('(.*?)')"
+ static let metaTagPattern = ""
+ static let baseTagPattern = ""
+ static let metaTagContentPattern = "content=(\"(.*?)\")|('(.*?)')"
static let cannonicalUrlPattern = "([^\\+@%\\?=~_\\|!:,;]+)"
static let rawTagPattern = "<[^>]+>"
static let inlineStylePattern = ""
diff --git a/Sources/Response.swift b/Sources/Response.swift
index d20f657..c3ef506 100644
--- a/Sources/Response.swift
+++ b/Sources/Response.swift
@@ -9,7 +9,8 @@
import Foundation
public struct Response {
-
+
+ public internal(set) var baseURL: String?
public internal(set) var url: URL?
public internal(set) var finalUrl: URL?
public internal(set) var canonicalUrl: String?
diff --git a/Sources/ResponseExtension.swift b/Sources/ResponseExtension.swift
index d3dd9b3..169748b 100644
--- a/Sources/ResponseExtension.swift
+++ b/Sources/ResponseExtension.swift
@@ -12,6 +12,7 @@ internal extension Response {
var dictionary: [String: Any] {
var responseData:[String: Any] = [:]
+ responseData["baseURL"] = baseURL
responseData["url"] = url
responseData["finalUrl"] = finalUrl
responseData["canonicalUrl"] = canonicalUrl
@@ -35,11 +36,14 @@ internal extension Response {
case images
case icon
case video
+ case baseURL
case price
}
mutating func set(_ value: Any, for key: Key) {
switch key {
+ case Key.baseURL:
+ if let value = value as? String { self.baseURL = value }
case Key.url:
if let value = value as? URL { self.url = value }
case Key.finalUrl:
@@ -65,6 +69,8 @@ internal extension Response {
func value(for key: Key) -> Any? {
switch key {
+ case Key.baseURL:
+ return self.baseURL
case Key.url:
return self.url
case Key.finalUrl:
diff --git a/Sources/SwiftLinkPreview.swift b/Sources/SwiftLinkPreview.swift
index 86f2f36..1ac52cd 100644
--- a/Sources/SwiftLinkPreview.swift
+++ b/Sources/SwiftLinkPreview.swift
@@ -132,10 +132,11 @@ open class SwiftLinkPreview: NSObject {
result.title = $0.title
result.description = $0.description
- result.image = $0.image
- result.images = $0.images
- result.icon = $0.icon
- result.video = $0.video
+
+ result.image = self.formatImageURL($0.image, base: $0.baseURL)
+ result.images = self.formatImageURLs($0.images, base: $0.baseURL)
+ result.icon = self.formatImageURL($0.icon, base: $0.baseURL)
+ result.video = self.formatImageURL($0.video, base: $0.baseURL)
result.price = $0.price
self.cache.slp_setCachedResponse(url: unshortened.absoluteString, response: result)
@@ -154,6 +155,28 @@ open class SwiftLinkPreview: NSObject {
return cancellable
}
+ private func formatImageURL(_ url: String?, base: String?) -> String? {
+ guard var url = url else { return nil }
+
+ if !url.starts(with: "http"), let base = base {
+ url = "\(base)\(url)"
+ }
+
+ return url
+ }
+
+ func formatImageURLs(_ array: [String]?, base: String?) -> [String]? {
+ guard var array = array else { return nil }
+
+ for i in 0 ..< array.count {
+ if let formatted = formatImageURL(array[0], base: base) {
+ array[i] = formatted
+ }
+ }
+
+ return Array(Set(array))
+ }
+
/*
Extract url redirection inside the GET query.
Like https://www.dji.com/404?url=http%3A%2F%2Fwww.dji.com%2Fmatrice600-pro%2Finfo#specs -> http://www.dji.com/de/matrice600-pro/info#specs
@@ -287,9 +310,9 @@ extension SwiftLinkPreview {
CFStringConvertIANACharSetNameToEncoding( $0 as CFString ) ) )
} ?? .utf8
if let html = String( data: data, encoding: encoding ) {
- for meta in Regex.pregMatchAll( html, regex: Regex.metatagPattern, index: 1 ) {
+ for meta in Regex.pregMatchAll( html, regex: Regex.metaTagPattern, index: 1 ) {
if (meta.contains( "http-equiv=\"refresh\"" ) || meta.contains( "http-equiv='refresh'" )),
- let value = Regex.pregMatchFirst( meta, regex: Regex.metatagContentPattern, index: 2 )?.decoded.extendedTrim,
+ let value = Regex.pregMatchFirst( meta, regex: Regex.metaTagContentPattern, index: 2 )?.decoded.extendedTrim,
let redirectString = value.split( separator: ";" )
.first( where: { $0.lowercased().starts( with: "url=" ) } )?
.split( separator: "=", maxSplits: 1 ).last,
@@ -444,6 +467,8 @@ extension SwiftLinkPreview {
result = self.crawlMetaTags(sanitizedHtmlCode, result: result)
+ result = self.crawlMetaBase(sanitizedHtmlCode, result: result)
+
var otherResponse = self.crawlTitle(sanitizedHtmlCode, result: result)
otherResponse = self.crawlDescription(otherResponse.htmlCode, result: otherResponse.result)
@@ -534,10 +559,10 @@ extension SwiftLinkPreview {
Response.Key.title.rawValue,
Response.Key.description.rawValue,
Response.Key.image.rawValue,
- Response.Key.video.rawValue,
+ Response.Key.video.rawValue
]
- let metatags = Regex.pregMatchAll(htmlCode, regex: Regex.metatagPattern, index: 1)
+ let metatags = Regex.pregMatchAll(htmlCode, regex: Regex.metaTagPattern, index: 1)
for metatag in metatags {
for tag in possibleTags {
@@ -552,7 +577,7 @@ extension SwiftLinkPreview {
if let key = Response.Key(rawValue: tag),
result.value(for: key) == nil {
- if let value = Regex.pregMatchFirst(metatag, regex: Regex.metatagContentPattern, index: 2) {
+ if let value = Regex.pregMatchFirst(metatag, regex: Regex.metaTagContentPattern, index: 2) {
let value = value.decoded.extendedTrim
if tag == "image" {
let value = addImagePrefixIfNeeded(value, result: result)
@@ -572,6 +597,17 @@ extension SwiftLinkPreview {
return result
}
+ internal func crawlMetaBase(_ htmlCode: String, result: Response) -> Response {
+
+ var result = result
+
+ if let base = Regex.pregMatchAll(htmlCode, regex: Regex.baseTagPattern, index: 2).first {
+ result.set(base, for: .baseURL)
+ }
+
+ return result
+ }
+
// Crawl for title if needed
internal func crawlTitle(_ htmlCode: String, result: Response) -> (htmlCode: String, result: Response) {
var result = result
diff --git a/SwiftLinkPreview.xcodeproj/project.pbxproj b/SwiftLinkPreview.xcodeproj/project.pbxproj
index 403e874..3ee57ac 100644
--- a/SwiftLinkPreview.xcodeproj/project.pbxproj
+++ b/SwiftLinkPreview.xcodeproj/project.pbxproj
@@ -8,6 +8,12 @@
/* Begin PBXBuildFile section */
1F8164ED26287866000F2905 /* VideoTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 1F8164EC26287866000F2905 /* VideoTests.swift */; };
+ 27BCC85826FCF22E00886BDA /* BaseURLTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 27BCC85726FCF22E00886BDA /* BaseURLTests.swift */; };
+ 27BCC85D26FCF3BF00886BDA /* BaseURLTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 27BCC85726FCF22E00886BDA /* BaseURLTests.swift */; };
+ 27BCC85E26FCF3C000886BDA /* BaseURLTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 27BCC85726FCF22E00886BDA /* BaseURLTests.swift */; };
+ 27BCC86026FCF4C000886BDA /* head-meta-base.html in Resources */ = {isa = PBXBuildFile; fileRef = 27BCC85F26FCF4C000886BDA /* head-meta-base.html */; };
+ 27BCC86126FCF4C000886BDA /* head-meta-base.html in Resources */ = {isa = PBXBuildFile; fileRef = 27BCC85F26FCF4C000886BDA /* head-meta-base.html */; };
+ 27BCC86226FCF4C000886BDA /* head-meta-base.html in Resources */ = {isa = PBXBuildFile; fileRef = 27BCC85F26FCF4C000886BDA /* head-meta-base.html */; };
68074FFA1F23B6C900649DE6 /* head-meta-icon.html in Resources */ = {isa = PBXBuildFile; fileRef = 68074FF91F23B6C900649DE6 /* head-meta-icon.html */; };
68074FFB1F23BB1100649DE6 /* head-meta-icon.html in Resources */ = {isa = PBXBuildFile; fileRef = 68074FF91F23B6C900649DE6 /* head-meta-icon.html */; };
68074FFC1F23BB1400649DE6 /* head-meta-icon.html in Resources */ = {isa = PBXBuildFile; fileRef = 68074FF91F23B6C900649DE6 /* head-meta-icon.html */; };
@@ -150,6 +156,8 @@
/* Begin PBXFileReference section */
1F8164EC26287866000F2905 /* VideoTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = VideoTests.swift; sourceTree = ""; };
+ 27BCC85726FCF22E00886BDA /* BaseURLTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = BaseURLTests.swift; sourceTree = ""; };
+ 27BCC85F26FCF4C000886BDA /* head-meta-base.html */ = {isa = PBXFileReference; lastKnownFileType = text.html; path = "head-meta-base.html"; sourceTree = ""; };
68074FF91F23B6C900649DE6 /* head-meta-icon.html */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text.html; path = "head-meta-icon.html"; sourceTree = ""; };
686E58DE1F22416D000C2A33 /* IconTests.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = IconTests.swift; sourceTree = ""; };
7A552DE121A460910019E8B1 /* Response.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Response.swift; sourceTree = ""; };
@@ -257,17 +265,18 @@
985DCEB01D2BFD2700B40D76 /* Files */ = {
isa = PBXGroup;
children = (
- 98B5ED421D3E5F5C00AEBD54 /* head-meta-itemprop.html */,
- 986D5BE61D33E0FD0025555F /* head-title.html */,
- 985DCEBB1D2BFFAF00B40D76 /* body-text-span.html */,
- 985DCEBC1D2BFFAF00B40D76 /* body-text-p.html */,
- 985DCEBD1D2BFFAF00B40D76 /* body-text-div.html */,
- 985DCEB71D2BFE4100B40D76 /* body-image-single.html */,
985DCEB81D2BFE4100B40D76 /* body-image-gallery.html */,
- 985DCEB11D2BFD3400B40D76 /* head-meta-twitter.html */,
- 985DCEB21D2BFD3400B40D76 /* head-meta-meta.html */,
+ 985DCEB71D2BFE4100B40D76 /* body-image-single.html */,
+ 985DCEBD1D2BFFAF00B40D76 /* body-text-div.html */,
+ 985DCEBC1D2BFFAF00B40D76 /* body-text-p.html */,
+ 985DCEBB1D2BFFAF00B40D76 /* body-text-span.html */,
+ 27BCC85F26FCF4C000886BDA /* head-meta-base.html */,
985DCEB31D2BFD3400B40D76 /* head-meta-facebook.html */,
68074FF91F23B6C900649DE6 /* head-meta-icon.html */,
+ 98B5ED421D3E5F5C00AEBD54 /* head-meta-itemprop.html */,
+ 985DCEB21D2BFD3400B40D76 /* head-meta-meta.html */,
+ 985DCEB11D2BFD3400B40D76 /* head-meta-twitter.html */,
+ 986D5BE61D33E0FD0025555F /* head-title.html */,
);
name = Files;
sourceTree = "";
@@ -358,18 +367,19 @@
98DC53391D1D73DB001134E3 /* SwiftLinkPreviewTests */ = {
isa = PBXGroup;
children = (
- 98B5ED491D3E7DC600AEBD54 /* HugeTests.swift */,
+ 27BCC85726FCF22E00886BDA /* BaseURLTests.swift */,
985DCEC81D2C029700B40D76 /* BodyTests.swift */,
+ 988B48D61D2C3C2E0040A4AD /* Constants */,
+ 985DCEB01D2BFD2700B40D76 /* Files */,
+ 98B5ED491D3E7DC600AEBD54 /* HugeTests.swift */,
+ 686E58DE1F22416D000C2A33 /* IconTests.swift */,
985DCEC61D2C026000B40D76 /* ImageTests.swift */,
- 1F8164EC26287866000F2905 /* VideoTests.swift */,
+ 98E7C3121D3B23F5009E5F6D /* Info */,
985DCEC41D2C022E00B40D76 /* MetaTests.swift */,
982812911D3A9293000D3ABB /* RegexTests.swift */,
986D5BE41D33DFE50025555F /* TitleTests.swift */,
- 686E58DE1F22416D000C2A33 /* IconTests.swift */,
- 988B48D61D2C3C2E0040A4AD /* Constants */,
- 985DCEB01D2BFD2700B40D76 /* Files */,
- 98E7C3121D3B23F5009E5F6D /* Info */,
988B48D11D2C39790040A4AD /* Utils */,
+ 1F8164EC26287866000F2905 /* VideoTests.swift */,
);
path = SwiftLinkPreviewTests;
sourceTree = "";
@@ -641,6 +651,7 @@
985DCEBF1D2BFFAF00B40D76 /* body-text-span.html in Resources */,
985DCEC01D2BFFAF00B40D76 /* body-text-p.html in Resources */,
985DCEC11D2BFFAF00B40D76 /* body-text-div.html in Resources */,
+ 27BCC86026FCF4C000886BDA /* head-meta-base.html in Resources */,
985DCEB91D2BFE4100B40D76 /* body-image-single.html in Resources */,
985DCEB61D2BFD3400B40D76 /* head-meta-facebook.html in Resources */,
986D5BE71D33E0FD0025555F /* head-title.html in Resources */,
@@ -666,6 +677,7 @@
98E7C32F1D3B24DA009E5F6D /* body-image-single.html in Resources */,
98B5ED461D3E62A200AEBD54 /* head-meta-itemprop.html in Resources */,
98E7C3301D3B24DA009E5F6D /* body-image-gallery.html in Resources */,
+ 27BCC86226FCF4C000886BDA /* head-meta-base.html in Resources */,
98E7C3311D3B24DA009E5F6D /* head-meta-twitter.html in Resources */,
98E7C3321D3B24DA009E5F6D /* head-meta-meta.html in Resources */,
98E7C3331D3B24DA009E5F6D /* head-meta-facebook.html in Resources */,
@@ -691,6 +703,7 @@
98F76D1D1D3AF87100E9B10E /* body-image-single.html in Resources */,
98B5ED441D3E62A000AEBD54 /* head-meta-itemprop.html in Resources */,
98F76D1E1D3AF87100E9B10E /* body-image-gallery.html in Resources */,
+ 27BCC86126FCF4C000886BDA /* head-meta-base.html in Resources */,
98F76D1F1D3AF87100E9B10E /* head-meta-twitter.html in Resources */,
98F76D201D3AF87100E9B10E /* head-meta-meta.html in Resources */,
98F76D211D3AF87100E9B10E /* head-meta-facebook.html in Resources */,
@@ -740,6 +753,7 @@
986D5BE51D33DFE60025555F /* TitleTests.swift in Sources */,
985DCEC71D2C026000B40D76 /* ImageTests.swift in Sources */,
988B48D81D2C3C3D0040A4AD /* Constants.swift in Sources */,
+ 27BCC85826FCF22E00886BDA /* BaseURLTests.swift in Sources */,
9272A10D1E2EF0E600F9F17E /* Regex.swift in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
@@ -777,6 +791,7 @@
98E7C3281D3B24C6009E5F6D /* File.swift in Sources */,
98E7C3291D3B24C6009E5F6D /* IntExtension.swift in Sources */,
9272A10F1E2EF0E800F9F17E /* Regex.swift in Sources */,
+ 27BCC85E26FCF3C000886BDA /* BaseURLTests.swift in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
@@ -813,6 +828,7 @@
98F76D121D3AF78600E9B10E /* File.swift in Sources */,
98F76D131D3AF78600E9B10E /* IntExtension.swift in Sources */,
9272A10E1E2EF0E700F9F17E /* Regex.swift in Sources */,
+ 27BCC85D26FCF3BF00886BDA /* BaseURLTests.swift in Sources */,
);
runOnlyForDeploymentPostprocessing = 0;
};
diff --git a/SwiftLinkPreviewTests/BaseURLTests.swift b/SwiftLinkPreviewTests/BaseURLTests.swift
new file mode 100644
index 0000000..33ee379
--- /dev/null
+++ b/SwiftLinkPreviewTests/BaseURLTests.swift
@@ -0,0 +1,55 @@
+//
+// BaseURLTests.swift
+// SwiftLinkPreviewTests
+//
+// Created by Leonardo Cardoso on 23.09.21.
+// Copyright © 2021 leocardz.com. All rights reserved.
+//
+
+import XCTest
+@testable import SwiftLinkPreview
+
+// This class tests head meta info
+class BaseURLTests: XCTestCase {
+
+ // MARK: - Vars
+ var baseTemplate = ""
+ let slp = SwiftLinkPreview()
+
+ // MARK: - SetUps
+ // Those setup functions get that template, and fulfil determinated areas with rand texts, images and tags
+ override func setUp() {
+ super.setUp()
+
+ self.baseTemplate = File.toString(Constants.headMetaBase)
+
+ }
+
+ // MARK: - Base
+ func setUpBaseAndRun() {
+
+ var baseTemplate = self.baseTemplate
+ baseTemplate = baseTemplate.replace(Constants.headRandom, with: String.randomTag())
+ baseTemplate = baseTemplate.replace(Constants.bodyRandom, with: String.randomTag()).extendedTrim
+
+ let result = self.slp.crawlMetaBase(baseTemplate, result: Response())
+
+ XCTAssertEqual(result.baseURL, "https://host/resource/index/")
+ }
+
+ func testBase() {
+
+ for _ in 0 ..< 100 {
+
+ self.setUpBaseAndRun()
+
+ }
+
+ }
+
+ func testResultBase() {
+ XCTAssertEqual(slp.formatImageURLs(["assets/test.png"], base: "https://host/resource/index/")?.first,
+ "https://host/resource/index/assets/test.png")
+ }
+
+}
diff --git a/SwiftLinkPreviewTests/Constants.swift b/SwiftLinkPreviewTests/Constants.swift
index db15838..e04a9fb 100644
--- a/SwiftLinkPreviewTests/Constants.swift
+++ b/SwiftLinkPreviewTests/Constants.swift
@@ -20,6 +20,7 @@ struct Constants {
static let bodyIcon = "head-meta-icon"
static let headMetaTwitter = "head-meta-twitter"
static let headMetaMeta = "head-meta-meta"
+ static let headMetaBase = "head-meta-base"
static let headMetaItemprop = "head-meta-itemprop"
static let headMetaFacebook = "head-meta-facebook"
static let headTitle = "head-title"
diff --git a/SwiftLinkPreviewTests/head-meta-base.html b/SwiftLinkPreviewTests/head-meta-base.html
new file mode 100644
index 0000000..7f66076
--- /dev/null
+++ b/SwiftLinkPreviewTests/head-meta-base.html
@@ -0,0 +1,10 @@
+
+
+ [:head-random]
+
+
+
+ [:body-random]
+
+
+
diff --git a/SwiftLinkPreviewTests/head-meta-facebook.html b/SwiftLinkPreviewTests/head-meta-facebook.html
index 7d1b9d9..79e01b1 100644
--- a/SwiftLinkPreviewTests/head-meta-facebook.html
+++ b/SwiftLinkPreviewTests/head-meta-facebook.html
@@ -10,4 +10,4 @@
[:body-random]
-