Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
bertkdowns committed Nov 5, 2024
2 parents ba35b24 + 97ed856 commit c4969b0
Show file tree
Hide file tree
Showing 559 changed files with 116,112 additions and 1,608 deletions.
32 changes: 32 additions & 0 deletions Initial_gen/build_data_files.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import xml.etree.ElementTree as ET
import os

"""
Splits chemsep1 and chemsep2 into
individual compound files.
"""

def build(filename):
# Parse the XML file
tree = ET.parse(filename)
root = tree.getroot()
for child in root:
child_elem = child.find("CompoundID")
child_name = child_elem.get("value").lower()
child_elem.set('value', child_name)
if child_name is not None:
tree = ET.ElementTree(child)
tree.write(f"data/data_files/{child_name}.xml")

def run():
# Remove all files in the directory
data_directory = "data/data_files"
for fname in os.listdir(data_directory):
file_path = os.path.join(data_directory, fname)
os.remove(file_path)

# Build from XML files
build("data/chemsep/chemsep1.xml")
build("data/chemsep/chemsep2.xml")

run()
Loading

0 comments on commit c4969b0

Please sign in to comment.