Skip to content

Commit

Permalink
chore: get English duplicated release notes (#15140)
Browse files Browse the repository at this point in the history
  • Loading branch information
qiancai authored Feb 22, 2024
1 parent 872df99 commit cc497a2
Showing 1 changed file with 90 additions and 0 deletions.
90 changes: 90 additions & 0 deletions scripts/get-en-duplicated-notes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
# This script helps you get duplicated release notes in English after duplicated release notes in Chinese are ready.
# Before running this script, you need to first copy the duplicated release notes in Chinese to your target English release note file. Then this script can replace Chinese duplicated release notes with the corresponding English translation according to the release and issue number information of the duplicated release notes.

import re, os
from tempfile import mkstemp
from shutil import move
from os import remove

ext_path = r'/Users/userid/Documents/GitHub/mygithubid/docs/releases' # Specify the directory of the English release notes folder
main_path = r'/Users/userid/Documents/GitHub/mygithubid/docs/releases/release-7.1.2.md' # Specify the directory of the English release note file that you are preparing

# Get existing release notes from the English release notes folder
def store_exst_rn(ext_path,main_path):

exst_notes = []
exst_issue_nums = []

for maindir, subdir, files in os.walk(ext_path):
for afile in files:
file_path = (os.path.join(maindir, afile))
if file_path.endswith('.md') and file_path != main_path: # Exclude duplicate notes that are in the current release file
with open(file_path,'r', encoding='utf-8') as fp:
level1 = level2 = level3 = ""
for line in fp:
exst_issue_num = re.search(r'https://github.com/(pingcap|tikv)/[\w-]+/(issues|pull)/\d+', line)
authors = re.findall(r'@\[([^\]]+)\]', line) # Get the list of authors in this line
if exst_issue_num:
if exst_issue_num.group() not in exst_issue_nums:
note_level = level1 + level2 + level3
note_pair = [exst_issue_num.group(),line.strip(),afile, note_level, authors]
exst_issue_nums.append(exst_issue_num.group())
exst_notes.append(note_pair)
else:
continue
elif line.startswith("##"):
level1 = "> " + line.replace("##","").strip()
level2 = level3 = ""
elif (line.startswith ("+") or line.startswith ("-")) and (not authors):
level2 = "> " + line.replace("+","").replace("-","").strip()
level3 = ""
elif (line.startswith (" +") or line.startswith (" -")) and (not authors):
level3 = "> " + line.replace(" +","").replace(" -","").strip()
else:
continue
else:
pass

if len(exst_issue_nums) != 0:
return exst_notes
else:
return 0

# Replace Chinese duplicated release notes with the corresponding English translation
def replace_zh_dup_with_en_dup(note_pairs, main_path):
DupNum = 0
NoteNum = 0
target_file_path = mkstemp()[1]
source_file_path = main_path
with open(target_file_path, 'w', encoding='utf-8') as target_file:
with open(source_file_path, 'r', encoding='utf-8') as source_file:
LineNum = 0
for line in source_file:
newline = line
LineNum += 1
original_release_file = re.search('release-\d+\.\d+\.\d+\.md', line)
issue_num = re.search('https://github.com/(pingcap|tikv)/\w+/(issues|pull)/\d+', line)
if issue_num and original_release_file:
NoteNum +=1
for note_pair in note_pairs:
if issue_num.group() == note_pair[0] and "(dup)" in line and original_release_file.group() == note_pair[2]:
print('A duplicated note is found in line ' + str(LineNum) + " from " + note_pair[2] + note_pair[1])
dup_note = '- (dup): {} {} {}'.format(note_pair[2], note_pair[3], note_pair[1]).strip()
newline = re.sub(r'- \(dup\): release-\d+\.\d+\.\d+\.md.*?\n',r'{}\n'.format(dup_note),line)
print('The duplicated note is replaced with ' + newline)
DupNum += 1
else:
continue
break
target_file.write(newline)

remove(source_file_path)
move(target_file_path, source_file_path)
DupRate = "%.0f%%" % (DupNum/NoteNum*100)
print (str(DupNum) + " duplicated notes are found in " + str(NoteNum) + " notes. The duplicated rate is " + str(DupRate) + ".")


if __name__ == "__main__":

note_pairs = store_exst_rn(ext_path,main_path)
replace_zh_dup_with_en_dup(note_pairs, main_path)

0 comments on commit cc497a2

Please sign in to comment.