Skip to content

Commit def39cc

Browse files
sanitize snippets
1 parent 4ace042 commit def39cc

File tree

7 files changed

+89
-19
lines changed

7 files changed

+89
-19
lines changed

docs/contributing/documentation.md

+1
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,7 @@ python -m tools.github_readme_sync.cli check docs
155155

156156
See the [Style Guide](style-guide.md#images) images section for details about creating and referencing images correctly.
157157

158+
158159
# VS Code Snippets
159160

160161
> 👍 You have access to VS Code snippets

docs/contributing/pull-requests/pull-request-flow.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
---
22
title: Pull Request Flow
33
---
4-
![Blue: Contributor responsibility <br/>Gray: Maintainer responsibility](../../figures/contributing/pull_request_flow.png)
4+
![Blue: Contributor responsibility <br>Gray: Maintainer responsibility](../../figures/contributing/pull_request_flow.png)
55

66

77
The Pull Request flow begins when you **Create a Pull Request** to our [GitHub repository](https://github.com/thousandbrainsproject/tbp.monty/pulls).

docs/figures/contributing/pull_request_flow.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ flowchart LR
2020
NBCP -- No --> UPR2(Update Pull Request):::contributor
2121
UPR2 --> NBCP
2222
NBCP -- Yes --> M(Merge):::maintainer
23-
M --> AMCP{Post-merge<br/>checks and tasks<br/>pass?}
23+
M --> AMCP{Post-merge<br>checks and tasks<br>pass?}
2424
AMCP -- No --> RV(((Revert))):::endFail
2525
AMCP -- Yes --> D(((Done))):::endSuccess
2626

docs/snippets/edit-this-page.md

+19-6
Original file line numberDiff line numberDiff line change
@@ -12,24 +12,37 @@
1212
margin-top: -10px;
1313
align-items: center;
1414
border: 1px solid #CCCCCC;
15-
text-decoration: none;
15+
text-decoration: none !important;
1616
}
1717
.contribution-button:hover {
1818
background-color: #DDDDDD;
1919
cursor: pointer;
2020
color: #000000;
21+
text-decoration:none;
22+
}
23+
.contribution-button img {
24+
display: inline-block;
25+
vertical-align: middle;
26+
}
27+
.contribution-button div {
28+
margin-left: 5px;
29+
}
30+
.contribution-button span {
31+
margin:0;
32+
padding:0;
2133
}
2234
</style>
23-
<br/><br/>
35+
<br><br>
2436

2537
----
2638

2739
#### Help Us Make This Page Better
2840

2941
All our docs are open-source. If something is wrong or unclear, submit a PR to fix it!
30-
31-
<a class="contribution-button" style="text-decoration:none" href="!!LINK!!" target="_blank">
32-
<img src="https://raw.githubusercontent.com/primer/octicons/main/icons/git-pull-request-16.svg" width="16" height="16" style="display: inline-block; vertical-align: middle;" alt="Pull request icon">
33-
<span style="margin-left: 5px;">Make a Contribution</span></a>
42+
<div>
43+
<a class="contribution-button" href="!!LINK!!" target="_blank">
44+
<img src="https://raw.githubusercontent.com/primer/octicons/main/icons/git-pull-request-16.svg" width="16" height="16" alt="Pull request icon">
45+
<div>Make a Contribution</div></a>
46+
</div>
3447

3548
[Learn how to contribute to our docs](../contributing/documentation.md)

tools/github_readme_sync/readme.py

+40-11
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
import os
1616
import re
1717
from collections import OrderedDict
18+
from copy import deepcopy
1819
from typing import Any, List, Tuple
1920
from urllib.parse import parse_qs
2021

@@ -262,20 +263,12 @@ def replace_match(match):
262263
def create_or_update_doc(
263264
self, order: int, category_id: str, doc: dict, parent_id: str, file_path: str
264265
) -> Tuple[str, bool]:
265-
body = doc["body"]
266-
body = self.insert_edit_this_page(body, doc["slug"], file_path)
267-
body = self.insert_markdown_snippet(body, file_path)
268-
body = self.convert_csv_to_html_table(body, file_path)
269-
body = self.correct_image_locations(body)
270-
body = self.correct_file_locations(body)
271-
body = self.convert_note_tags(body)
272-
body = self.parse_images(body)
273-
body = self.convert_cloudinary_videos(body)
266+
markdown = self.process_markdown(doc["body"], file_path, doc["slug"])
274267

275268
create_doc_request = {
276269
"title": doc["title"],
277270
"type": "basic",
278-
"body": body,
271+
"body": markdown,
279272
"category": category_id,
280273
"hidden": doc.get("hidden", False),
281274
"order": order,
@@ -301,6 +294,40 @@ def create_or_update_doc(
301294

302295
return doc_id, created
303296

297+
def process_markdown(self, body: str, file_path: str, slug: str) -> str:
298+
body = self.insert_edit_this_page(body, slug, file_path)
299+
body = self.insert_markdown_snippet(body, file_path)
300+
body = self.convert_csv_to_html_table(body, file_path)
301+
body = self.correct_image_locations(body)
302+
body = self.correct_file_locations(body)
303+
body = self.convert_note_tags(body)
304+
body = self.parse_images(body)
305+
body = self.convert_cloudinary_videos(body)
306+
return body
307+
308+
def sanitize_html(self, body: str) -> str:
309+
allowed_attributes = deepcopy(nh3.ALLOWED_ATTRIBUTES)
310+
allowed_tags = deepcopy(nh3.ALLOWED_TAGS)
311+
312+
allowed_tags.add("style")
313+
allowed_tags.add("a")
314+
allowed_tags.add("label")
315+
for tag in allowed_attributes:
316+
allowed_attributes[tag].add("width")
317+
allowed_attributes[tag].add("style")
318+
allowed_attributes[tag].add("target")
319+
allowed_attributes[tag].add("class")
320+
321+
return nh3.clean(
322+
body,
323+
tags=allowed_tags,
324+
attributes=allowed_attributes,
325+
link_rel=None,
326+
strip_comments=False,
327+
generic_attribute_prefixes={"data-"},
328+
clean_content_tags={"script"},
329+
)
330+
304331
def insert_edit_this_page(self, body: str, filename: str, file_path: str) -> str:
305332
depth = len(file_path.split("/")) - 1
306333
relative_path = "../" * depth
@@ -468,7 +495,9 @@ def replace_match(match):
468495

469496
try:
470497
with open(snippet_path, "r") as f:
471-
return f.read()
498+
unsafe_content = f.read()
499+
return self.sanitize_html(unsafe_content)
500+
472501
except Exception as e:
473502
return f"[File not found or could not be read: {snippet_path}]"
474503

tools/github_readme_sync/tests/readme_test.py

+26
Original file line numberDiff line numberDiff line change
@@ -661,6 +661,32 @@ def test_insert_markdown_snippet(self):
661661
)
662662
self.assertIn("File not found", result)
663663

664+
def test_sanitize_html_removes_scripts(self):
665+
html_with_script = """
666+
<div>
667+
<h1>Test Content</h1>
668+
<p>This is a test paragraph</p>
669+
<script>
670+
alert('This is a malicious script');
671+
document.cookie = "session=stolen";
672+
</script>
673+
<p>More content after the script</p>
674+
</div>
675+
"""
676+
677+
sanitized_html = self.readme.sanitize_html(html_with_script)
678+
679+
# Verify script tag is removed
680+
self.assertNotIn("<script>", sanitized_html)
681+
self.assertNotIn("</script>", sanitized_html)
682+
self.assertNotIn("alert('This is a malicious script')", sanitized_html)
683+
self.assertNotIn("document.cookie", sanitized_html)
684+
685+
# Verify legitimate content is preserved
686+
self.assertIn("<h1>Test Content</h1>", sanitized_html)
687+
self.assertIn("<p>This is a test paragraph</p>", sanitized_html)
688+
self.assertIn("<p>More content after the script</p>", sanitized_html)
689+
664690

665691
if __name__ == "__main__":
666692
unittest.main()

tools/github_readme_sync/upload.py

+1
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919

2020
def upload(new_hierarchy, file_path: str, rdme: ReadMe):
2121
logging.info(f"Uploading export folder: {file_path}")
22+
logging.info(f"Url: https://thousandbrainsproject.readme.io/v{rdme.version}/docs")
2223
rdme.create_version_if_not_exists()
2324
to_be_deleted = get_all_categories_docs(rdme)
2425

0 commit comments

Comments
 (0)