1
2
3
4
5 """Monomer and element library data classes. The Library classes are used
6 for the identification and construction of biopolymers and ligands.
7 """
8 import os
9 import sys
10 import types
11
12 import ConsoleOutput
13 import mmCIF
14
15
16
17
18
19 (MMLIB_PATH, JUNK) = os.path.split(__file__)
20 DATA_PATH = os.path.join(MMLIB_PATH, "Data")
21 ELEMENT_DATA_PATH = os.path.join(MMLIB_PATH, "Data", "elements.cif")
22 MMLIB_MONOMER_DATA_PATH = os.path.join(MMLIB_PATH, "Data", "monomers.cif")
23 RCSB_MONOMER_DATA_FILE = os.path.join(MMLIB_PATH, "Data", "Monomers.zip")
24 RCSB_MONOMER_DATA_PATH = os.path.join(MMLIB_PATH, "Data", "Monomers")
25
26
27
28
29 ELEMENT_CACHE = {}
30 MONOMER_RES_NAME_CACHE = {}
31
32 ELEMENT_CIF_FILE = mmCIF.mmCIFFile()
33 ELEMENT_CIF_FILE.load_file(open(ELEMENT_DATA_PATH, "r"))
34
35 MMLIB_MONOMERS_CIF = mmCIF.mmCIFFile()
36 MMLIB_MONOMERS_CIF.load_file(open(MMLIB_MONOMER_DATA_PATH, "r"))
37
38 RCSB_USE_ZIP = None
39 RCSB_ZIP = None
40
41
42
43
44 ELEMENT_SYMBOL_DICT = {
45 "H" : True, "h" : True,
46 "He": True, "he": True, "HE": True,
47 "Li": True, "li": True, "LI": True,
48 "Be": True, "be": True, "BE": True,
49 "B" : True, "b" : True,
50 "C" : True, "c" : True,
51 "N" : True, "n" : True,
52 "O" : True, "o" : True,
53 "F" : True, "f" : True,
54 "Ne": True, "ne": True, "NE": True,
55 "Na": True, "na": True, "NA": True,
56 "Mg": True, "mg": True, "MG": True,
57 "Al": True, "al": True, "AL": True,
58 "Si": True, "si": True, "SI": True,
59 "P" : True, "p" : True,
60 "S" : True, "s" : True,
61 "Cl": True, "cl": True, "CL": True,
62 "Ar": True, "ar": True, "AR": True,
63 "K" : True, "k" : True,
64 "Ca": True, "ca": True, "CA": True,
65 "Sc": True, "sc": True, "SC": True,
66 "Ti": True, "ti": True, "TI": True,
67 "V" : True, "v" : True,
68 "Cr": True, "cr": True, "CR": True,
69 "Mn": True, "mn": True, "MN": True,
70 "Fe": True, "fe": True, "FE": True,
71 "Co": True, "co": True, "CO": True,
72 "Ni": True, "ni": True, "NI": True,
73 "Cu": True, "cu": True, "CU": True,
74 "Zn": True, "zn": True, "ZN": True,
75 "Ga": True, "ga": True, "GA": True,
76 "Ge": True, "ge": True, "GE": True,
77 "As": True, "as": True, "AS": True,
78 "Se": True, "se": True, "SE": True,
79 "Br": True, "br": True, "BR": True,
80 "Kr": True, "kr": True, "KR": True,
81 "Rb": True, "rb": True, "RB": True,
82 "Sr": True, "sr": True, "SR": True,
83 "Y" : True, "y" : True,
84 "Zr": True, "zr": True, "ZR": True,
85 "Nb": True, "nb": True, "NB": True,
86 "Mo": True, "mo": True, "MO": True,
87 "Tc": True, "tc": True, "TC": True,
88 "Ru": True, "ru": True, "RU": True,
89 "Rh": True, "rh": True, "RH": True,
90 "Pd": True, "pd": True, "PD": True,
91 "Ag": True, "ag": True, "AG": True,
92 "Cd": True, "cd": True, "CD": True,
93 "In": True, "in": True, "IN": True,
94 "Sn": True, "sn": True, "SN": True,
95 "Sb": True, "sb": True, "SB": True,
96 "Te": True, "te": True, "TE": True,
97 "I" : True, "i" : True,
98 "Xe": True, "xe": True, "XE": True,
99 "Cs": True, "cs": True, "CS": True,
100 "Ba": True, "ba": True, "BA": True,
101 "La": True, "la": True, "LA": True,
102 "Ce": True, "ce": True, "CE": True,
103 "Pr": True, "pr": True, "PR": True,
104 "Nd": True, "nd": True, "ND": True,
105 "Pm": True, "pm": True, "PM": True,
106 "Sm": True, "sm": True, "SM": True,
107 "Eu": True, "eu": True, "EU": True,
108 "Gd": True, "gd": True, "GD": True,
109 "Tb": True, "tb": True, "TB": True,
110 "Dy": True, "dy": True, "DY": True,
111 "Ho": True, "ho": True, "HO": True,
112 "Er": True, "er": True, "ER": True,
113 "Tm": True, "tm": True, "TM": True,
114 "Yb": True, "yb": True, "YB": True,
115 "Lu": True, "lu": True, "LU": True,
116 "Hf": True, "hf": True, "HF": True,
117 "Ta": True, "ta": True, "TA": True,
118 "W" : True, "w" : True,
119 "Re": True, "re": True, "RE": True,
120 "Os": True, "os": True, "OS": True,
121 "Ir": True, "ir": True, "IR": True,
122 "Pt": True, "pt": True, "PT": True,
123 "Au": True, "au": True, "AU": True,
124 "Hg": True, "hg": True, "HG": True,
125 "Tl": True, "tl": True, "TL": True,
126 "Pb": True, "pb": True, "PB": True,
127 "Bi": True, "bi": True, "BI": True,
128 "Po": True, "po": True, "PO": True,
129 "At": True, "at": True, "AT": True,
130 "Rn": True, "rn": True, "RN": True,
131 "Fr": True, "fr": True, "FR": True,
132 "Ra": True, "ra": True, "RA": True,
133 "Ac": True, "ac": True, "AC": True,
134 "Th": True, "th": True, "TH": True,
135 "Pa": True, "pa": True, "PA": True,
136 "U" : True, "u" : True }
137
138 AMINO_ACID3_LIST = [
139 "GLY", "ALA", "VAL", "LEU", "ILE", "PRO", "PHE", "TYR", "TRP",
140 "MET", "CYS", "SER", "THR", "ASP", "GLU", "HIS", "LYS", "ARG",
141 "ASN", "GLN"
142 ]
143
144 AMINO_ACID31_DICT = {
145 "GLY":"G", "ALA":"A", "VAL":"V", "LEU":"L", "ILE":"I", "PRO":"P",
146 "PHE":"F", "TYR":"Y", "TRP":"W", "MET":"M", "CYS":"C", "SER":"S",
147 "THR":"T", "ASP":"D", "GLU":"E", "HIS":"H", "LYS":"K", "ARG":"R",
148 "ASN":"N", "GLN":"Q"
149 }
150
151 AMINO_ACID13_DICT = {
152 'A': 'ALA', 'C': 'CYS', 'E': 'GLU', 'D': 'ASP', 'G': 'GLY',
153 'F': 'PHE', 'I': 'ILE', 'H': 'HIS', 'K': 'LYS', 'M': 'MET',
154 'L': 'LEU', 'N': 'ASN', 'Q': 'GLN', 'P': 'PRO', 'S': 'SER',
155 'R': 'ARG', 'T': 'THR', 'W': 'TRP', 'V': 'VAL', 'Y': 'TYR'}
156
157 NUCLEIC_ACID_LIST = ["A", "G", "C", "T", "U"]
158
159 NUCLEIC_ACID_RES_NAME_DICT = {
160 "C": "C", "C+": "C", "Cr": "C", "+C": "C",
161 "G": "G", "G+": "G", "Gr": "G", "+G": "G",
162 "A": "A", "A+": "A", "Ar": "A", "+A": "A",
163 "T": "T", "T+": "T", "Tr": "T", "+T": "T",
164 "U": "U", "U+": "U", "Ur": "U", "+U": "U",
165 }
166
167
168 ALT_RES_NAME_DICT = {
169 "C+": "C", "Cr": "C", "+C": "C",
170 "G+": "G", "Gr": "G", "+G": "G",
171 "A+": "A", "Ar": "A", "+A": "A",
172 "T+": "T", "Tr": "T", "+T": "T",
173 "U+": "U", "Ur": "U", "+U": "U",
174 "Ad": "A", "Td": "T", "Gd": "G", "Cd": "C",
175 }
176
177
178
179
180
182 """Element description class returned by library_get_element_desc().
183 """
185 self.cif_data = None
186 self.name = None
187 self.symbol = None
188 self.group = None
189 self.period = None
190 self.atomic_number = None
191 self.atomic_weight = None
192 self.atomic_radius = None
193 self.covalent_radius = None
194 self.van_der_waals_radius = None
195 self.covalent_radius = None
196 self.electronegativity = None
197 self.color_rgbf = None
198
199
201 """Monomer description class returned by library_get_monomer_desc().
202 """
204 self.res_name = None
205 self.full_name = None
206 self.one_letter_code = None
207 self.type = None
208 self.pdbx_type = None
209 self.formula = None
210 self.rcsb_class_1 = None
211 self.chem_type = None
212 self.atom_list = []
213 self.atom_dict = {}
214 self.alt_atom_dict = {}
215 self.bond_list = []
216 self.torsion_angle_dict = {}
217
218 self.amino_acid = False
219 self.nucleic_acid = False
220 self.water = False
221
223 """Returns True if the Monomer is an amino acid, otherwise returns
224 False.
225 """
226 return self.amino_acid
227
229 """Returns True if the Monomer is a nucleic acid, otherwise returns
230 False.
231 """
232 return self.nucleic_acid
233
235 """
236 """
237 return self.amino_acid or self.nucleic_acid
238
240 """
241 """
242 return not self.amino_acid and not self.nucleic_acid
243
245 """Returns True if the Monomer is a water molecule,
246 otherwise returns False.
247 """
248 return self.water
249
250
251
252
253
254
256 """Constructs the ElementDesc object for the given element symbol.
257 """
258 cif_data = ELEMENT_CIF_FILE.get_data(symbol)
259 if cif_data is None:
260 ConsoleOutput.warning("element description not found for %s" % (symbol))
261 return None
262
263
264 element_desc = ElementDesc()
265
266 element_desc.cif_data = cif_data
267
268 element = cif_data.get_table("element")
269 element_desc.name = element["name"]
270 element_desc.symbol = element["symbol"]
271 element_desc.number = int(element["number"])
272 element_desc.atomic_weight = float(element["atomic_weight"])
273 element_desc.vdw_radius = float(element["van_der_walls_radius"])
274 element_desc.covalent_radius = float(element.get("covalent_radius", 0.0))
275
276 rgb8 = element["color_rgb"]
277 element_desc.color_rgbf = (int(rgb8[1:3], 16) / 255.0,
278 int(rgb8[3:5], 16) / 255.0,
279 int(rgb8[5:7], 16) / 255.0)
280
281 return element_desc
282
283
285 """Loads/caches/returns an instance of the ElementDesc class for the given
286 element symbol. The source of the element data is the
287 mmLib/Data/elements.cif file.
288 """
289 assert isinstance(symbol, str)
290
291 try:
292 return ELEMENT_CACHE[symbol]
293 except KeyError:
294 pass
295
296 element_desc = library_construct_element_desc(symbol)
297 if element_desc is None:
298 ConsoleOutput.warning("element description not found for %s" % (symbol))
299 return None
300
301 ELEMENT_CACHE[symbol] = element_desc
302 return element_desc
303
304
322
323
325 """Returns the open file object for the mmCIF monomer library file if it
326 is found in the monomer library zipfile.
327 """
328 if library_use_monomer_zipfile():
329
330 try:
331 blob = RCSB_ZIP.read(monomer_name.upper())
332 except KeyError:
333 ConsoleOutput.warning("monomer description not found in zipfile for '%s'" % (monomer_name))
334 else:
335 from cStringIO import StringIO
336 return StringIO(blob)
337 return None
338
339
341 """Returns the open file object for the mmCIF monomer library file if it
342 is found as an uncompressed mmCIF file at the path:
343 mmLib/Data/Monomers/NAME[0]/NAME.cif
344 """
345 assert len(monomer_name) > 0
346 fil_name = "%s.cif" % (monomer_name.upper())
347 path = os.path.join(RCSB_MONOMER_DATA_PATH, fil_name[0], fil_name)
348 if os.path.isfile(path):
349 return open(path, "r")
350 return None
351
352
354 """Returns the open file object for the mmCIF monomer library file if it
355 is found from library_open_monomer_lib_directory() or
356 library_open_monomer_lib_zipfile(). library_open_monomer_lib_directory()
357 is checked first because loading the file from the directory sturcture
358 is much faster than loading it from a zipfile.
359 """
360 libfil = library_open_monomer_lib_directory(monomer_name)
361 if libfil is not None:
362 return libfil
363 libfil = library_open_monomer_lib_zipfile(monomer_name)
364 return libfil
365
366
368 """Constructs the MonomerDesc object for the given residue name.
369 """
370
371 if len(res_name) < 1:
372 return None
373
374 if ALT_RES_NAME_DICT.has_key(res_name):
375 lookup_name = ALT_RES_NAME_DICT[res_name]
376 else:
377 lookup_name = res_name.upper()
378
379 libfil = library_open_monomer_lib_file(lookup_name)
380 if libfil is None:
381 ConsoleOutput.warning("monomer description not found for '%s'" % (res_name))
382 return None
383
384
385 mon_desc = MonomerDesc()
386
387 rcsb_cif_file = mmCIF.mmCIFFile()
388 rcsb_cif_file.load_file(libfil)
389 rcsb_cif_data = rcsb_cif_file[0]
390 libfil.close()
391
392 chem_comp = rcsb_cif_data.get_table("chem_comp")[0]
393 mon_desc.res_name = chem_comp.get_lower("res_name")
394 mon_desc.full_name = chem_comp.get_lower("name")
395 mon_desc.type = chem_comp.get_lower("type")
396 mon_desc.pdbx_type = chem_comp.get_lower("pdbx_type")
397 mon_desc.formula = chem_comp.get_lower("formula")
398 mon_desc.rcsb_class_1 = chem_comp.get_lower("rcsb_class_1")
399
400 chem_comp_atom = rcsb_cif_data.get_table("chem_comp_atom")
401 if chem_comp_atom is not None:
402 for cif_row in chem_comp_atom:
403 name = cif_row.getitem_lower("atom_id")
404
405 try:
406 symbol = cif_row.getitem_lower("type_symbol")
407 except KeyError:
408
409
410 symbol = name
411 msg = "unrecognized atom name: '%s' in residue '%s'" % (
412 symbol, res_name)
413 ConsoleOutput.warning(msg)
414
415 mon_desc.atom_list.append({"name": name, "symbol": symbol})
416 mon_desc.atom_dict[name] = symbol
417 try:
418 alt_name = cif_row.getitem_lower("alt_atom_id")
419 except KeyError:
420 pass
421 else:
422 mon_desc.alt_atom_dict[name] = alt_name
423
424 chem_comp_bond = rcsb_cif_data.get_table("chem_comp_bond")
425 if chem_comp_bond is not None:
426 for cif_row in chem_comp_bond:
427 atom1 = cif_row.getitem_lower("atom_id_1")
428 atom2 = cif_row.getitem_lower("atom_id_2")
429 mon_desc.bond_list.append({"atom1": atom1, "atom2": atom2})
430
431
432 mmlib_cif_data = MMLIB_MONOMERS_CIF.get_data(res_name)
433 if mmlib_cif_data is not None:
434
435 chem_comp = mmlib_cif_data.get_table("chem_comp")
436 if chem_comp is not None:
437 mon_desc.one_letter_code = chem_comp["one_letter_code"]
438 mon_desc.chem_type = chem_comp["chem_type"]
439
440
441 torsion_angles = mmlib_cif_data.get_table("torsion_angles")
442 if torsion_angles is not None:
443 for cif_row in torsion_angles:
444 mon_desc.torsion_angle_dict[cif_row["name"]] = (
445 cif_row["atom1"], cif_row["atom2"],
446 cif_row["atom3"], cif_row["atom4"])
447
448
449 mon_type = mon_desc.type.upper()
450
451 if mon_type == "L-PEPTIDE LINKING":
452 mon_desc.amino_acid = True
453
454 elif mon_type == "DNA LINKING" or mon_type == "RNA LINKING":
455 mon_desc.nucleic_acid = True
456
457 elif mon_type == "HOH" or mon_type == "WAT":
458 mon_desc.water = True
459
460 return mon_desc
461
463 """Loads/caches/returns the monomer description objec MonomerDesc
464 for the given monomer residue name.
465 """
466 assert isinstance(res_name, str)
467
468 try:
469 return MONOMER_RES_NAME_CACHE[res_name]
470 except KeyError:
471 pass
472
473 mon_desc = library_construct_monomer_desc(res_name)
474 if mon_desc is None:
475 return None
476
477 MONOMER_RES_NAME_CACHE[res_name] = mon_desc
478 return mon_desc
479
480
482 """Returns True if the res_name is an amino acid.
483 """
484 assert isinstance(res_name, str)
485
486 mdesc = library_get_monomer_desc(res_name)
487 if mdesc is None:
488 return False
489
490 return mdesc.is_amino_acid()
491
492
494 """Returns True if the res_name is a nucleic acid.
495 """
496 assert isinstance(res_name, str)
497
498 mdesc = library_get_monomer_desc(res_name)
499 if mdesc is None:
500 return False
501
502 return mdesc.is_nucleic_acid()
503
504
506 """Returns True if the res_name is a standard amino or nucleic acid.
507 """
508 assert isinstance(res_name, str)
509
510 mdesc = library_get_monomer_desc(res_name)
511 if mdesc is None:
512 return False
513
514 return mdesc.is_standard_residue()
515
516
518 """Return True if the res_name is water.
519 """
520 assert isinstance(res_name, str)
521
522 if res_name == "HOH" or res_name == "WAT":
523 return True
524
525 return False
526
527
529 """Try everything we can possibly think of to extract the element
530 symbol from the atom name. If available, use the monomer dictionary to
531 help narrow down the search.
532 """
533
534
535 name = name0.strip()
536 if name == "":
537 return None
538
539 if name0 != res_name:
540
541 mdesc = library_get_monomer_desc(res_name)
542 if mdesc is not None:
543 if mdesc.atom_dict.has_key(name):
544 symbol = mdesc.atom_dict[name]
545 if symbol is not None:
546 return symbol
547
548 if mdesc.is_amino_acid() and name == "OXT":
549 return "O"
550
551 if mdesc.is_amino_acid():
552 msg = "invalid amino acid atom name '%s' in residue '%s'" % (
553 name, res_name)
554 ConsoleOutput.warning(msg)
555
556
557
558
559
560 if name0.startswith(" "):
561 space_flag = True
562 else:
563 space_flag = False
564
565
566 alpha_name = ""
567 for c in name:
568 if c.isalpha() == True:
569 alpha_name += c
570
571
572
573
574 if len(alpha_name) == 0:
575 return None
576
577 e1_symbol = alpha_name[0]
578 e1_valid = ELEMENT_SYMBOL_DICT.has_key(e1_symbol)
579
580 if len(alpha_name) > 1:
581 e2_symbol = alpha_name[:2]
582 e2_valid = ELEMENT_SYMBOL_DICT.has_key(e2_symbol)
583 else:
584 e2_symbol = None
585 e2_valid = False
586
587
588
589
590 if e1_valid == False and e2_valid == False:
591 return None
592
593 elif e1_valid == True and e2_valid == False:
594 return e1_symbol
595
596 elif e1_valid == False and e2_valid == True:
597 return e2_symbol
598
599
600
601
602
603
604 if space_flag == True:
605 return e1_symbol
606
607 return e2_symbol
608
609
610
612 h = library_get_element_desc("H")
613
614 for cif_data in ELEMENT_CIF_FILE:
615 if len(cif_data.name) == 1:
616 print ' "%s" : True, "%s" : True,' % (
617 cif_data.name, cif_data.name.lower())
618 else:
619 print ' "%s": True, "%s": True, "%s": True,' % (
620 cif_data.name, cif_data.name.lower(), cif_data.name.upper())
621
622 if __name__ == "__main__":
623 test_module()
624
625