Skip to content

Commit

Permalink
Merge pull request #18 from kubero-dev/feature/multiplle-domains
Browse files Browse the repository at this point in the history
Feature/multiple domains
  • Loading branch information
mms-gianni authored Nov 9, 2023
2 parents b127f81 + 5442da8 commit bc346a3
Show file tree
Hide file tree
Showing 3 changed files with 137 additions and 35 deletions.
5 changes: 4 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -121,12 +121,15 @@ See in [ruleset.yaml](ruleset.yaml) for an example.

```yaml
- domain: www.example.com
domains: # Additional domains to apply the rule
- www.example.com
- www.beispiel.de
regexRules:
- match: <script\s+([^>]*\s+)?src="(/)([^"]*)"
replace: <script $1 script="/https://www.example.com/$3"
injections:
- position: head # Position where to inject the code
append: |
append: | # possible keys: append, prepend, replace
<script>
window.localStorage.clear();
console.log("test");
Expand Down
62 changes: 35 additions & 27 deletions handlers/proxy.go
Original file line number Diff line number Diff line change
Expand Up @@ -155,14 +155,17 @@ func loadRules() RuleSet {
yaml.Unmarshal(yamlFile, &ruleSet)
}

domains := []string{}
for _, rule := range ruleSet {
//log.Println("Loaded rules for", rule.Domain)

domains = append(domains, rule.Domain)
domains = append(domains, rule.Domains...)
if os.Getenv("ALLOWED_DOMAINS_RULESET") == "true" {
allowedDomains = append(allowedDomains, rule.Domain)
allowedDomains = append(allowedDomains, domains...)
}
}

log.Println("Loaded rules for", len(ruleSet), "Domains")
log.Println("Loaded ", len(ruleSet), " rules for", len(domains), "Domains")
return ruleSet
}

Expand All @@ -172,33 +175,37 @@ func applyRules(domain string, path string, body string) string {
}

for _, rule := range rulesSet {
if rule.Domain != domain {
continue
}
if len(rule.Paths) > 0 && !StringInSlice(path, rule.Paths) {
continue
}
for _, regexRule := range rule.RegexRules {
re := regexp.MustCompile(regexRule.Match)
body = re.ReplaceAllString(body, regexRule.Replace)
}
for _, injection := range rule.Injections {
doc, err := goquery.NewDocumentFromReader(strings.NewReader(body))
if err != nil {
log.Fatal(err)
}
if injection.Replace != "" {
doc.Find(injection.Position).ReplaceWithHtml(injection.Replace)
domains := rule.Domains
domains = append(domains, rule.Domain)
for _, ruleDomain := range domains {
if ruleDomain != domain {
continue
}
if injection.Append != "" {
doc.Find(injection.Position).AppendHtml(injection.Append)
if len(rule.Paths) > 0 && !StringInSlice(path, rule.Paths) {
continue
}
if injection.Prepend != "" {
doc.Find(injection.Position).PrependHtml(injection.Prepend)
for _, regexRule := range rule.RegexRules {
re := regexp.MustCompile(regexRule.Match)
body = re.ReplaceAllString(body, regexRule.Replace)
}
body, err = doc.Html()
if err != nil {
log.Fatal(err)
for _, injection := range rule.Injections {
doc, err := goquery.NewDocumentFromReader(strings.NewReader(body))
if err != nil {
log.Fatal(err)
}
if injection.Replace != "" {
doc.Find(injection.Position).ReplaceWithHtml(injection.Replace)
}
if injection.Append != "" {
doc.Find(injection.Position).AppendHtml(injection.Append)
}
if injection.Prepend != "" {
doc.Find(injection.Position).PrependHtml(injection.Prepend)
}
body, err = doc.Html()
if err != nil {
log.Fatal(err)
}
}
}
}
Expand All @@ -213,6 +220,7 @@ type Rule struct {

type RuleSet []struct {
Domain string `yaml:"domain"`
Domains []string `yaml:"domains,omitempty"`
Paths []string `yaml:"paths,omitempty"`
GoogleCache bool `yaml:"googleCache,omitempty"`
RegexRules []Rule `yaml:"regexRules"`
Expand Down
105 changes: 98 additions & 7 deletions ruleset.yaml
Original file line number Diff line number Diff line change
@@ -1,24 +1,26 @@
- domain: www.example.com
domains:
- www.beispiel.com
regexRules:
- match: <script\s+([^>]*\s+)?src="(/)([^"]*)"
replace: <script $1 script="/https://www.example.com/$3"
injections:
- position: head # Position where to inject the code
append: |
append: |
<script>
window.localStorage.clear();
console.log("test");
alert("Hello!");
</script>
- position: h1
replace: |
replace: |
<h1>An example with a ladder ;-)</h1>
- domain: www.americanbanker.com
paths:
paths:
- /news
injections:
- position: head
append: |
append: |
<script>
document.addEventListener("DOMContentLoaded", () => {
const inlineGate = document.querySelector('.inline-gate');
Expand All @@ -30,7 +32,7 @@
});
</script>
- domain: www.nzz.ch
paths:
paths:
- /international
- /sport
- /wirtschaft
Expand All @@ -46,10 +48,99 @@
- /finanze
injections:
- position: head
append: |
append: |
<script>
document.addEventListener("DOMContentLoaded", () => {
const paywall = document.querySelector('.dynamic-regwall');
removeDOMElement(paywall)
});
</script>
</script>
- domains:
- www.architecturaldigest.com
- www.bonappetit.com
- www.cntraveler.com
- www.epicurious.com
- www.gq.com
- www.newyorker.com
- www.vanityfair.com
- www.vogue.com
- www.wired.com
injections:
- position: head
append: |
<script>
document.addEventListener("DOMContentLoaded", () => {
const banners = document.querySelectorAll('.paywall-bar, div[class^="MessageBannerWrapper-"');
banners.forEach(el => { el.remove(); });
});
</script>
- domains:
- www.nytimes.com
- www.time.com
injections:
- position: head
append: |
<script>
window.localStorage.clear();
document.addEventListener("DOMContentLoaded", () => {
const banners = document.querySelectorAll('div[data-testid="inline-message"], div[id^="ad-"], div[id^="leaderboard-"], div.expanded-dock, div.pz-ad-box, div[id="top-wrapper"], div[id="bottom-wrapper"]');
banners.forEach(el => { el.remove(); });
});
</script>
- domains:
- www.thestar.com
- www.niagarafallsreview.ca
- www.stcatharinesstandard.ca
- www.thepeterboroughexaminer.com
- www.therecord.com
- www.thespec.com
- www.wellandtribune.ca
injections:
- position: head
append: |
<script>
window.localStorage.clear();
document.addEventListener("DOMContentLoaded", () => {
const paywall = document.querySelectorAll('div.subscriber-offers');
paywall.forEach(el => { el.remove(); });
const subscriber_only = document.querySelectorAll('div.subscriber-only');
for (const elem of subscriber_only) {
if (elem.classList.contains('encrypted-content') && dompurify_loaded) {
const parser = new DOMParser();
const doc = parser.parseFromString('<div>' + DOMPurify.sanitize(unscramble(elem.innerText)) + '</div>', 'text/html');
const content_new = doc.querySelector('div');
elem.parentNode.replaceChild(content_new, elem);
}
elem.removeAttribute('style');
elem.removeAttribute('class');
}
const banners = document.querySelectorAll('div.subscription-required, div.redacted-overlay, div.subscriber-hide, div.tnt-ads-container');
banners.forEach(el => { el.remove(); });
const ads = document.querySelectorAll('div.tnt-ads-container, div[class*="adLabelWrapper"]');
ads.forEach(el => { el.remove(); });
const recommendations = document.querySelectorAll('div[id^="tncms-region-article"]');
recommendations.forEach(el => { el.remove(); });
});
</script>
- domain: www.usatoday.com
injections:
- position: head
append: |
<script>
document.addEventListener("DOMContentLoaded", () => {
const banners = document.querySelectorAll('div.roadblock-container, .gnt_nb, [aria-label="advertisement"], div[id="main-frame-error"]');
banners.forEach(el => { el.remove(); });
});
</script>
- domain: www.washingtonpost.com
injections:
- position: head
append: |
<script>
document.addEventListener("DOMContentLoaded", () => {
let paywall = document.querySelectorAll('div[data-qa$="-ad"], div[id="leaderboard-wrapper"], div[data-qa="subscribe-promo"]');
paywall.forEach(el => { el.remove(); });
const images = document.querySelectorAll('img');
images.forEach(image => { image.parentElement.style.filter = ''; });
});
</script>

0 comments on commit bc346a3

Please sign in to comment.