-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathjupyter-to-mdx.py
107 lines (89 loc) · 3.71 KB
/
jupyter-to-mdx.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import os
import nbformat
from pathlib import Path
def convert_notebook_to_mdx(notebook_path, output_path):
"""
Convert a Jupyter notebook to MDX format.
Args:
notebook_path (str): Path to the input notebook file
output_path (str): Path where the MDX file will be saved
"""
# Read the notebook
with open(notebook_path, 'r', encoding='utf-8') as f:
notebook = nbformat.read(f, as_version=4)
# Initialize MDX content with frontmatter
mdx_content = [
'---',
f'title: "{Path(notebook_path).stem}"',
'---\n',
]
# Process each cell
for cell in notebook.cells:
if cell.cell_type == 'markdown':
# Add markdown content directly
mdx_content.append(cell.source)
mdx_content.append('\n')
elif cell.cell_type == 'code':
# Wrap code in markdown code fence
mdx_content.append('```python')
mdx_content.append(cell.source)
mdx_content.append('```\n')
# Add output if present
if cell.outputs:
mdx_content.append('<Output>\n')
for output in cell.outputs:
if 'text' in output:
mdx_content.append('```')
mdx_content.append(output.text)
mdx_content.append('```')
elif 'data' in output:
if 'text/plain' in output.data:
mdx_content.append('```')
mdx_content.append(output.data['text/plain'])
mdx_content.append('```')
# Handle images if present
if 'image/png' in output.data:
mdx_content.append('data:image/s3,"s3://crabby-images/ca508/ca50843b5920a926bbf73cfa31aa3992025ef30f" alt="Output"
mdx_content.append(output.data['image/png'])
mdx_content.append(')\n')
mdx_content.append('</Output>\n')
# Write the MDX file
with open(output_path, 'w', encoding='utf-8') as f:
f.write('\n'.join(mdx_content))
def process_directory(input_dir):
"""
Recursively process all .ipynb files in a directory and its subdirectories.
Args:
input_dir (str): Path to the directory to process
"""
# Convert input_dir to Path object
input_path = Path(input_dir)
# Create a directory for MDX files if it doesn't exist
mdx_dir = input_path / 'mdx_output'
mdx_dir.mkdir(exist_ok=True)
# Find all .ipynb files
for notebook_path in input_path.rglob('*.ipynb'):
# Skip checkpoint files
if '.ipynb_checkpoints' in str(notebook_path):
continue
# Create corresponding MDX path
relative_path = notebook_path.relative_to(input_path)
mdx_path = mdx_dir / relative_path.with_suffix('.mdx')
# Create necessary subdirectories
mdx_path.parent.mkdir(parents=True, exist_ok=True)
print(f'Converting {notebook_path} to {mdx_path}')
try:
convert_notebook_to_mdx(str(notebook_path), str(mdx_path))
except Exception as e:
print(f'Error converting {notebook_path}: {str(e)}')
if __name__ == '__main__':
import sys
if len(sys.argv) != 2:
print('Usage: python notebook_to_mdx.py <directory_path>')
sys.exit(1)
directory_path = sys.argv[1]
if not os.path.isdir(directory_path):
print(f'Error: {directory_path} is not a valid directory')
sys.exit(1)
process_directory(directory_path)
print('Conversion complete! Check the mdx_output directory for the converted files.')