-
Notifications
You must be signed in to change notification settings - Fork 24
adding boilerplate text at top of scripts missing it NO_JIRA #84
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
#!/usr/bin/env python | ||
# | ||
# This script can be used for any purpose without limitation subject to the | ||
# conditions at http://www.ccdc.cam.ac.uk/Community/Pages/Licences/v2.aspx | ||
# | ||
# This permission notice and the following statement of attribution must be | ||
# included in all copies or substantial portions of this script. | ||
# | ||
|
||
from ccdc.io import MoleculeReader | ||
import os | ||
import csv | ||
import argparse | ||
from utilities import file_list, string_scrubber, read_experimental_csv | ||
|
||
|
||
def read_mol_file(directory, file): | ||
'''Returns: identifier, smiles''' | ||
mol_reader = MoleculeReader(os.path.join(directory, file)) | ||
mol = mol_reader[0] | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
return mol.identifier, mol.heaviest_component.smiles | ||
|
||
|
||
def main(): | ||
parser = argparse.ArgumentParser(__doc__) | ||
parser.add_argument( | ||
"--input_dir", | ||
type=str, | ||
required=True, | ||
help="Directory containing API folders." | ||
) | ||
parser.add_argument( | ||
"--output_filename", | ||
type=str, | ||
required=True, | ||
help="Filename of formatted .csv file containing SMILES." | ||
) | ||
parser.add_argument( | ||
"--experimental_csv", | ||
type=str, | ||
required=False, | ||
help="Filename of formatted .csv file containing identifier names and experimental bool." | ||
) | ||
parser.add_argument( | ||
'--clean_id', | ||
action='store_true', | ||
help='Removes special characters from ids that may be problematic.' | ||
) | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
args = parser.parse_args() | ||
|
||
output_path = os.path.join(args.input_dir, args.output_filename) | ||
with open(output_path, 'w', newline='', encoding="utf-8") as output_file: | ||
csvwriter = csv.writer(output_file, delimiter=',', quotechar='|') | ||
csvwriter.writerow(['identifier', 'n_components', | ||
'component_a', 'component_b', 'neutral_a', 'neutral_b']) | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
if args.experimental_csv: | ||
experimental_dict = read_experimental_csv(args.experimental_csv) | ||
|
||
# API group directories contain one or more API files and a directory of coformers | ||
API_groups = [name for name in os.listdir( | ||
args.input_dir) if os.path.isdir(os.path.join(args.input_dir, name))] | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
exp_replaced = combo_count = 0 | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
with open(output_path, 'a+', newline='', encoding="utf-8") as output_file: | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
for API_group in API_groups: | ||
csvwriter = csv.writer(output_file, delimiter=',', quotechar='|') | ||
API_group_path = os.path.join(args.input_dir, API_group) | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
for API_file in file_list(API_group_path): | ||
api_id, api_smiles = read_mol_file( | ||
API_group_path, API_file) | ||
print(api_id) | ||
coformer_dir_path = os.path.join(API_group_path, 'coformers') | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
for coformer_file in file_list(coformer_dir_path): | ||
coformer_id, coformer_smiles = read_mol_file( | ||
coformer_dir_path, coformer_file) | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
combo_count += 1 | ||
exp_bool = "?" | ||
# Try to look up the experimental boolean in dictionary, if provided | ||
if args.experimental_csv: | ||
for (x, y) in [(api_id, coformer_id), (coformer_id, api_id)]: | ||
if (x, y) in list(experimental_dict.keys()): | ||
exp_bool = experimental_dict[(x, y)] | ||
exp_replaced += 1 | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
# Clean the ids if the option is turned on | ||
if args.clean_id: | ||
api_id = string_scrubber(api_id) | ||
coformer_id = string_scrubber(coformer_id) | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
n_components = 2 | ||
if api_smiles == coformer_smiles: | ||
n_components = 1 | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
combo_id = ".".join([api_id, coformer_id, str(exp_bool)]) | ||
csvwriter.writerow([f'"{combo_id}"', n_components, api_smiles, coformer_smiles, "", ""]) | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
if args.experimental_csv: | ||
print(f"Found experimental labels for {exp_replaced} out of {combo_count} combinations") | ||
|
||
|
||
if __name__ == '__main__': | ||
main() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
blank line contains whitespace