Skip to content

Commit

Permalink
added weird point separation pattern
Browse files Browse the repository at this point in the history
  • Loading branch information
dominik-kopczynski committed Apr 22, 2024
1 parent 56e03be commit 3990dd4
Show file tree
Hide file tree
Showing 4 changed files with 55 additions and 18 deletions.
35 changes: 21 additions & 14 deletions pygoslin/data/goslin/SwissLipids.g4
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,13 @@ grammar SwissLipids;


/* first rule is always start rule */
lipid : lipid_pure EOF | lipid_pure adduct_info EOF;
lipid : lipid_pure EOF | lipid_pure adduct_info EOF | lipid_pure variants EOF;
lipid_pure : fatty_acid | gl | pl | sl | st;

variants : variant | variant variants;
variant : adduct_separator abc | abc | adduct_separator npos | npos;
abc : '(a)' | '(b)' | '(c)' | '(d)' | '(e)' | '(f)' | '(g)';
npos : '(n' number ')' | '[sn' number ']' | '[' number '_sn' number ']';

/* adduct information */
adduct_info : adduct_sep | adduct_separator adduct_sep;
Expand All @@ -41,8 +45,9 @@ adduct_element : element | element number | number element | plus_minus element


/* fatty acyl rules */
fa : fa_core | fa_lcb_prefix fa_core | fa_core fa_lcb_suffix | fa_lcb_prefix fa_core fa_lcb_suffix;
fa_core : carbon carbon_db_separator db | ether carbon carbon_db_separator db;
fa : fa_p | fa_p '.';
fa_p : fa_core | fa_lcb_prefix fa_core | fa_core fa_lcb_suffix | fa_lcb_prefix fa_core fa_lcb_suffix;
fa_core : carbon carbon_db_separator db | ether_type carbon carbon_db_separator db;

lcb : lcb_core | fa_lcb_prefix lcb_core | lcb_core fa_lcb_suffix | fa_lcb_prefix lcb_core fa_lcb_suffix;
lcb_core : hydroxyl carbon carbon_db_separator db;
Expand All @@ -55,7 +60,8 @@ db_position : db_single_position | db_position db_position_separator db_position
db_single_position : db_position_number | db_position_number cistrans;
db_position_number : number;
cistrans : 'E' | 'Z';
ether : 'O-' | 'P-';
ether_type : ether '-' | ether '.';
ether : 'O' | 'P';
hydroxyl : 'm' | 'd' | 't';
fa_lcb_suffix : fa_lcb_suffix_core | fa_lcb_suffix_separator fa_lcb_suffix_core | ROB fa_lcb_suffix_core RCB;
fa_lcb_suffix_core : fa_lcb_suffix_number fa_lcb_suffix_type | fa_lcb_suffix_number fa_lcb_suffix_separator fa_lcb_suffix_type;
Expand Down Expand Up @@ -113,9 +119,10 @@ med_suffix : 'S' | 'R';
/* glycerolipid rules */
gl : gl_regular | gl_mono | gl_molecular;

gl_regular : gl_hg gl_fa | gl_hg headgroup_separator gl_fa;
gl_regular : gl_hg gl_fa | gl_hg headgroup_separator gl_fa | gl_hg_tg gl_half_sub_fa | gl_hg_tg headgroup_separator gl_half_sub_fa;
gl_fa : ROB fa_species RCB | ROB fa3 RCB;
gl_hg : 'MG' | 'DG' | 'TG' | 'MAG' | 'DAG' | 'TAG';
gl_hg : 'MG' | 'DG' | gl_hg_tg | 'MAG' | 'DAG';
gl_hg_tg : 'TG' | 'TAG';

gl_molecular : gl_molecular_hg gl_molecular_fa | gl_molecular_hg headgroup_separator gl_molecular_fa;
gl_molecular_fa : ROB fa2 RCB;
Expand All @@ -126,7 +133,7 @@ gl_mono : gl_mono_hg gl_mono_fa | gl_mono_hg headgroup_separator gl_mono_fa;
gl_mono_fa : ROB fa_species RCB | ROB fa2 RCB;
gl_mono_hg : 'MHDG' | 'DHDG' | 'MGDG' | 'DGDG';


gl_half_sub_fa : fa_species 'FA' fa_species;



Expand All @@ -151,7 +158,7 @@ pl_four_hg : 'BMP' | 'LBPA' | 'Lysobisphosphatidate' | 'CL' | 'MLCL' | 'DLCL';


/* sphingolipid rules */
sl : sl_hg sl_lcb | sl_hg headgroup_separator sl_lcb;
sl : sl_hg sl_lcb | sl_hg headgroup_separator sl_lcb | sl_hg lcb_18_1_fa;
sl_hg : sl_hg_names | sl_hg_prefix sl_hg_names | sl_hg_names sl_hg_suffix | sl_hg_prefix sl_hg_names sl_hg_suffix;
sl_hg_names : 'HexCer' | 'Hex2Cer' | 'SM' | 'PE-Cer' | 'Cer' | 'CerP' | 'GD1a' | 'GM1b' | 'GT1b' | 'GQ1b' | 'GT1a' | 'GQ1c' | 'GP1c' | 'GD1c' | 'GD1b' | 'GT1c' | 'IPC' | 'MIPC' | 'M(IP)2C' | 'Gb3Cer' | 'Gb4Cer' | 'Forssman' | 'MSGG' | 'DSGG' | 'NOR1' | 'NORint' | 'NOR2' | 'Globo-H' | 'Globo-A' | 'SB1a' | 'SM1b' | 'SM1a' | 'Branched-Forssman' | 'Globo-B' | 'Para-Forssman' | 'Globo-Lex-9' | glyco_sphingo_lipid;
glyco_sphingo_lipid : 'GA1' | 'Ga1' | 'GA2' | 'Ga2' |
Expand All @@ -168,7 +175,7 @@ sl_hg_suffix : sl_hg_suffix sl_hg_suffix | sl_hg_suffix '/' | ROB sl_hg_suffix R
sl_lcb : sl_lcb_species | sl_lcb_subspecies;
sl_lcb_species : ROB lcb RCB;
sl_lcb_subspecies : ROB lcb sorted_fa_separator fa RCB;

lcb_18_1_fa : fa_fa;



Expand Down Expand Up @@ -196,14 +203,14 @@ UNDERSCORE : '_';
SLASH : '/';
BACKSLASH : '\\';
COMMA: ',';
ROB: '(';
RCB: ')';
ROB: '(' | '.';
RCB: ')' | '.';

unsorted_fa_separator : UNDERSCORE;
unsorted_fa_separator : UNDERSCORE | '.';
adduct_separator : SPACE;
sorted_fa_separator : SLASH;
sorted_fa_separator : SLASH | '.';
headgroup_separator : SPACE;
carbon_db_separator : COLON;
carbon_db_separator : COLON | '.';
db_position_separator : COMMA;
med_position_separator : COMMA;
fa_lcb_suffix_separator : DASH;
Expand Down
7 changes: 6 additions & 1 deletion pygoslin/parser/LipidBaseParserEventHandler.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,12 @@ def prepare_headgroup_and_checks(self, allow_class_shift = True):
if not LipidBaseParserEventHandler.check_full_structure(fa):
self.set_lipid_level(LipidLevel.STRUCTURE_DEFINED)
break


if self.level == LipidLevel.MOLECULAR_SPECIES and headgroup.lipid_category == LipidCategory.GP and len(self.fa_list) == 2 and sum(fa.lipid_FA_bond_type == LipidFaBondType.ESTER for fa in self.fa_list) < 2:
self.level = LipidLevel.SN_POSITION
if self.fa_list[0].lipid_FA_bond_type == LipidFaBondType.ESTER:
self.fa_list[0], self.fa_list[1] = self.fa_list[1], self.fa_list[0]


if self.level == LipidLevel.SPECIES:
Expand Down Expand Up @@ -151,7 +157,6 @@ def prepare_headgroup_and_checks(self, allow_class_shift = True):


def assemble_lipid(self, headgroup):

for fa in self.fa_list:
if fa.stereo_information_missing():
self.set_lipid_level(LipidLevel.FULL_STRUCTURE)
Expand Down
29 changes: 27 additions & 2 deletions pygoslin/parser/SwissLipidsParserEventHandler.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,7 @@ def __init__(self):
self.registered_events["charge_sign_pre_event"] = self.add_charge_sign

self.registered_events["pl_hg_pre_event"] = self.set_head_group_name
self.registered_events["gl_hg_tg_pre_event"] = self.set_head_group_name
self.registered_events["pl_three_hg_pre_event"] = self.set_head_group_name
self.registered_events["pl_four_hg_pre_event"] = self.set_head_group_name
self.registered_events["sl_hg_pre_event"] = self.set_head_group_name
Expand All @@ -77,6 +78,7 @@ def __init__(self):
self.registered_events["fa2_unsorted_pre_event"] = self.set_molecular_level
self.registered_events["fa3_unsorted_pre_event"] = self.set_molecular_level
self.registered_events["fa4_unsorted_pre_event"] = self.set_molecular_level
self.registered_events["lcb_18_1_fa_pre_event"] = self.set_lcb_18_1

self.registered_events["st_species_fa_post_event"] = self.set_species_fa
self.registered_events["pl_three_post_event"] = self.set_nape
Expand All @@ -97,6 +99,7 @@ def __init__(self):
self.registered_events["db_count_pre_event"] = self.add_double_bonds
self.registered_events["carbon_pre_event"] = self.add_carbon
self.registered_events["fa_lcb_suffix_number_pre_event"] = self.add_suffix_number
self.registered_events["gl_half_sub_fa_pre_event"] = self.gl_half_sub_fa

self.debug = ""

Expand All @@ -114,6 +117,7 @@ def reset_lipid(self, node):
self.db_numbers = -1
self.headgroup_decorators = []
self.suffix_number = -1
self.fa_suffix_molecular = False


def add_db_position(self, node):
Expand All @@ -125,6 +129,10 @@ def add_db_position(self, node):
if self.db_cistrans not in {"E", "Z"}: self.set_lipid_level(LipidLevel.STRUCTURE_DEFINED)


def gl_half_sub_fa(self, node):
self.fa_suffix_molecular = True


def set_nape(self, node):
self.head_group = "PE-N"
hgd = HeadgroupDecorator("decorator_acyl", suffix = True)
Expand All @@ -133,6 +141,17 @@ def set_nape(self, node):
self.fa_list.pop()


def set_lcb_18_1(self, node):
self.new_lcb(node)
self.current_fa.num_carbon = 18
self.current_fa.double_bonds = 1
functional_group = get_functional_group("OH")
if "OH" not in self.current_fa.functional_groups: self.current_fa.functional_groups["OH"] = []
self.current_fa.functional_groups["OH"].append(functional_group)
self.clean_lcb(node)




def add_db_position_number(self, node):
self.db_position = int(node.get_text())
Expand Down Expand Up @@ -219,6 +238,12 @@ def build_lipid(self, node):
self.fa_list = [self.lcb] + self.fa_list

headgroup = self.prepare_headgroup_and_checks()

if self.fa_suffix_molecular:
self.fa_list[0].num_carbon -= self.fa_list[1].num_carbon
self.fa_list[0].double_bonds -= self.fa_list[1].double_bonds
if self.level.value < LipidLevel.MOLECULAR_SPECIES.value: self.level = LipidLevel.MOLECULAR_SPECIES

lipid = LipidAdduct()
lipid.lipid = self.assemble_lipid(headgroup)
lipid.adduct = self.adduct
Expand All @@ -228,8 +253,8 @@ def build_lipid(self, node):

def add_ether(self, node):
ether = node.get_text()
if ether == "O-": self.current_fa.lipid_FA_bond_type = LipidFaBondType.ETHER_PLASMANYL
elif ether == "P-":
if ether == "O": self.current_fa.lipid_FA_bond_type = LipidFaBondType.ETHER_PLASMANYL
elif ether == "P":
self.current_fa.lipid_FA_bond_type = LipidFaBondType.ETHER_PLASMENYL


Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@

setup(
name = 'pygoslin',
version = '2.1.3',
version = '2.1.4',
url = 'https://github.com/lifs-tools/pygoslin',
license = 'MIT',
author = 'Dominik Kopczynski',
Expand Down

0 comments on commit 3990dd4

Please sign in to comment.