15
15
import os
16
16
import re
17
17
from collections import OrderedDict
18
+ from copy import deepcopy
18
19
from typing import Any , List , Tuple
19
20
from urllib .parse import parse_qs
20
21
@@ -262,20 +263,12 @@ def replace_match(match):
262
263
def create_or_update_doc (
263
264
self , order : int , category_id : str , doc : dict , parent_id : str , file_path : str
264
265
) -> Tuple [str , bool ]:
265
- body = doc ["body" ]
266
- body = self .insert_edit_this_page (body , doc ["slug" ], file_path )
267
- body = self .insert_markdown_snippet (body , file_path )
268
- body = self .convert_csv_to_html_table (body , file_path )
269
- body = self .correct_image_locations (body )
270
- body = self .correct_file_locations (body )
271
- body = self .convert_note_tags (body )
272
- body = self .parse_images (body )
273
- body = self .convert_cloudinary_videos (body )
266
+ markdown = self .process_markdown (doc ["body" ], file_path , doc ["slug" ])
274
267
275
268
create_doc_request = {
276
269
"title" : doc ["title" ],
277
270
"type" : "basic" ,
278
- "body" : body ,
271
+ "body" : markdown ,
279
272
"category" : category_id ,
280
273
"hidden" : doc .get ("hidden" , False ),
281
274
"order" : order ,
@@ -301,6 +294,40 @@ def create_or_update_doc(
301
294
302
295
return doc_id , created
303
296
297
+ def process_markdown (self , body : str , file_path : str , slug : str ) -> str :
298
+ body = self .insert_edit_this_page (body , slug , file_path )
299
+ body = self .insert_markdown_snippet (body , file_path )
300
+ body = self .convert_csv_to_html_table (body , file_path )
301
+ body = self .correct_image_locations (body )
302
+ body = self .correct_file_locations (body )
303
+ body = self .convert_note_tags (body )
304
+ body = self .parse_images (body )
305
+ body = self .convert_cloudinary_videos (body )
306
+ return body
307
+
308
+ def sanitize_html (self , body : str ) -> str :
309
+ allowed_attributes = deepcopy (nh3 .ALLOWED_ATTRIBUTES )
310
+ allowed_tags = deepcopy (nh3 .ALLOWED_TAGS )
311
+
312
+ allowed_tags .add ("style" )
313
+ allowed_tags .add ("a" )
314
+ allowed_tags .add ("label" )
315
+ for tag in allowed_attributes :
316
+ allowed_attributes [tag ].add ("width" )
317
+ allowed_attributes [tag ].add ("style" )
318
+ allowed_attributes [tag ].add ("target" )
319
+ allowed_attributes [tag ].add ("class" )
320
+
321
+ return nh3 .clean (
322
+ body ,
323
+ tags = allowed_tags ,
324
+ attributes = allowed_attributes ,
325
+ link_rel = None ,
326
+ strip_comments = False ,
327
+ generic_attribute_prefixes = {"data-" },
328
+ clean_content_tags = {"script" },
329
+ )
330
+
304
331
def insert_edit_this_page (self , body : str , filename : str , file_path : str ) -> str :
305
332
depth = len (file_path .split ("/" )) - 1
306
333
relative_path = "../" * depth
@@ -468,7 +495,9 @@ def replace_match(match):
468
495
469
496
try :
470
497
with open (snippet_path , "r" ) as f :
471
- return f .read ()
498
+ unsafe_content = f .read ()
499
+ return self .sanitize_html (unsafe_content )
500
+
472
501
except Exception as e :
473
502
return f"[File not found or could not be read: { snippet_path } ]"
474
503
0 commit comments