1
2
3
4
5
6
7
8 from mmCIF import mmCIFSyntaxError
11
12
13
14
15 L_EOF = "<eof>"
16 L_DATA = "<data>"
17 L_LOOP = "<loop>"
18 L_STOP = "<stop>"
19 L_SAVE = "<save>"
20 L_GLOBAL = "<global>"
21 L_TAG = "<tag>"
22 L_VALUE = "<value>"
23
24
25
26
28
29 "Parser for reading a CIF 1.1 file."
30
33
35 import types
36 if isinstance(f, types.StringTypes):
37 name = f
38 f = open(f)
39 needClose = True
40 else:
41 name = "<input>"
42 needClose = False
43 try:
44 self.parse(f, name)
45 finally:
46 if needClose:
47 f.close()
48
49 - def parse(self, f, name):
57
58
60
61 "CIF data block of tags and tables."
62
63 - def __init__(self, name, lexer, filename):
84
85 - def get_tag(self, token, lexer, filename):
95
98
101
102
104 """CIF data table.
105 """
106
132
134 n = self._columnIndex[column]
135 r = self.rows[row]
136 return r[n]
137
138
139
140
141
143 """Lexical analyzer for reading a CIF 1.1 file.
144 """
145
147 self.f = f
148 self.filename = filename
149 self.prev_char = None
150 self.cur_char = None
151 self.peeked_char = None
152 self.pushed_token = None
153 self.line = 1
154
156
157 if self.pushed_token is not None:
158 t = self.pushed_token
159 self.pushed_token = None
160 return t
161
162 from string import whitespace, digits
163 while True:
164
165
166
167 while True:
168 c = self.next_char()
169 if not c:
170 return self.token(L_EOF, None)
171 if c not in whitespace:
172 break
173
174
175
176 if c == '#':
177 while True:
178 c = self.next_char()
179 if not c:
180 return self.token(L_EOF, None)
181 if c == '\n':
182 break
183
184 continue
185
186
187
188 if c == "'" or c == '"':
189 endQuote = c
190 atEnd = False
191 chars = []
192 while True:
193 c = self.next_char()
194 if not c:
195 raise CIFSyntaxError(self.line,
196 "<eof> in quoted string")
197 if atEnd:
198 if c in whitespace:
199 return self.token(L_VALUE, ''.join(chars))
200 else:
201 chars.append(endQuote)
202 if c != endQuote:
203 chars.append(c)
204 atEnd = False
205 else:
206 if c == endQuote:
207 atEnd = True
208 else:
209 chars.append(c)
210 atEnd = False
211
212
213
214 if c == '[':
215 raise CIFSyntaxError(self.line,
216 "bracket strings not permitted in CIF")
217
218
219
220 if c == ';' and self.prev_char == '\n':
221 chars = []
222 atStart = False
223 while True:
224 c = self.next_char()
225 if not c:
226 raise CIFSyntaxError(self.line, "<eof> in text field")
227 if c == ';' and atStart:
228 return self.token(L_VALUE, ''.join(chars))
229 if atStart:
230 chars.append('\n')
231 if c == '\n':
232 atStart = True
233 else:
234 chars.append(c)
235 atStart = False
236
237
238
239 if c == '_':
240 chars = []
241 while True:
242 c = self.next_char()
243 if not c:
244 raise CIFSyntaxError(self.line, "<eof> in tag")
245 if c in whitespace:
246 return self.token(L_TAG, ''.join(chars))
247 chars.append(c)
248
249
250
251 if c == '?':
252 return self.token(L_VALUE, c)
253 if c == '.':
254 if self.peek_char() in whitespace:
255 return self.token(L_VALUE, c)
256
257
258
259 chars = [ c ]
260 while True:
261 c = self.next_char()
262 if not c or c in whitespace:
263 break
264 chars.append(c)
265
266 data = ''.join(chars)
267 lc = data.lower()
268
269 if lc.startswith("data_"):
270 return self.token(L_DATA, data[5:])
271 elif lc.startswith("loop_"):
272 return self.token(L_LOOP, data[5:])
273 elif lc.startswith("save_"):
274 return self.token(L_SAVE, data[5:])
275 elif lc.startswith("stop_"):
276 return self.token(L_STOP, data[5:])
277 elif lc.startswith("global_"):
278 return self.token(L_GLOBAL, data[5:])
279 else:
280 return self.token(L_VALUE, data)
281
283 if self.prev_char == '\n':
284 self.line += 1
285 self.prev_char = self.cur_char
286 if self.peeked_char is None:
287 self.cur_char = self.f.read(1)
288 else:
289 self.cur_char = self.peeked_char
290 self.peeked_char = None
291 return self.cur_char
292
294 if self.peeked_char is None:
295 self.peeked_char = self.f.read(1)
296 return self.peeked_char
297
298 - def token(self, type, value):
299 return Token(type, value, self.line)
300
302 assert(self.pushed_token is None)
303 self.pushed_token = token
304
307
308
310 """Lexical token with type, value and line number.
311 """
313 self.type = type
314 self.value = value
315 self.line = line
316
317
318
319
320
323
325 print "%s\n" % s
326
327 paren = s.find('(')
328 if paren != -1:
329 s = s[:paren]
330 try:
331 return int(s)
332 except ValueError:
333 return float(s)
334
335
336
337 if __name__ == "__main__":
338 """Module tests.
339 """
351
353 cif = CIFFile()
354 cif.load_file(test_file)
355 print "%d data blocks" % len(cif.data_blocks)
356 import pprint
357 for db in cif.data_blocks:
358 print "%s: %d tags, %d tables" % (db.name,
359 len(db.tags), len(db.tables))
360 pprint.pprint(db.tags)
361
362
363 parser_test("ccd.cif")
364
365