ODFPY  1.2.0
element.py
Go to the documentation of this file.
1 #!/usr/bin/python
2 # -*- coding: utf-8 -*-
3 # Copyright (C) 2007-2010 Søren Roug, European Environment Agency
4 #
5 # This library is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU Lesser General Public
7 # License as published by the Free Software Foundation; either
8 # version 2.1 of the License, or (at your option) any later version.
9 #
10 # This library is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 # Lesser General Public License for more details.
14 #
15 # You should have received a copy of the GNU Lesser General Public
16 # License along with this library; if not, write to the Free Software
17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18 #
19 # Contributor(s):
20 #
21 
22 # Note: This script has copied a lot of text from xml.dom.minidom.
23 # Whatever license applies to that file also applies to this file.
24 #
25 import sys, os.path
26 sys.path.append(os.path.dirname(__file__))
27 import re
28 import xml.dom
29 from xml.dom.minicompat import *
30 from odf.namespaces import nsdict
31 import odf.grammar as grammar
32 from odf.attrconverters import AttrConverters
33 
34 if sys.version_info[0] == 3:
35  unicode=str # unicode function does not exist
36  unichr=chr # unichr does not exist
37 
38 _xml11_illegal_ranges = (
39  (0x0, 0x0,),
40  (0xd800, 0xdfff,),
41  (0xfffe, 0xffff,),
42 )
43 
44 _xml10_illegal_ranges = _xml11_illegal_ranges + (
45  (0x01, 0x08,),
46  (0x0b, 0x0c,),
47  (0x0e, 0x1f,),
48 )
49 
50 _xml_discouraged_ranges = (
51  (0x7f, 0x84,),
52  (0x86, 0x9f,),
53 )
54 
55 if sys.maxunicode >= 0x10000:
56  # modern or "wide" python build
57  _xml_discouraged_ranges = _xml_discouraged_ranges + (
58  (0x1fffe, 0x1ffff,),
59  (0x2fffe, 0x2ffff,),
60  (0x3fffe, 0x3ffff,),
61  (0x4fffe, 0x4ffff,),
62  (0x5fffe, 0x5ffff,),
63  (0x6fffe, 0x6ffff,),
64  (0x7fffe, 0x7ffff,),
65  (0x8fffe, 0x8ffff,),
66  (0x9fffe, 0x9ffff,),
67  (0xafffe, 0xaffff,),
68  (0xbfffe, 0xbffff,),
69  (0xcfffe, 0xcffff,),
70  (0xdfffe, 0xdffff,),
71  (0xefffe, 0xeffff,),
72  (0xffffe, 0xfffff,),
73  (0x10fffe, 0x10ffff,),
74  )
75 # else "narrow" python build - only possible with old versions
76 
77 def _range_seq_to_re(range_seq):
78  # range pairs are specified as closed intervals
79  return re.compile(u"[{}]".format(
80  u"".join(
81  u"{}-{}".format(re.escape(unichr(lo)), re.escape(unichr(hi)))
82  for lo, hi in range_seq
83  )
84  ), flags=re.UNICODE)
85 
86 _xml_filtered_chars_re = _range_seq_to_re(_xml10_illegal_ranges + _xml_discouraged_ranges)
87 
88 def _handle_unrepresentable(data):
89  return _xml_filtered_chars_re.sub(u"\ufffd", data)
90 
91 # The following code is pasted form xml.sax.saxutils
92 # Tt makes it possible to run the code without the xml sax package installed
93 # To make it possible to have <rubbish> in your text elements, it is necessary to escape the texts
94 
101 def _escape(data, entities={}):
102  data = data.replace("&", "&amp;")
103  data = data.replace("<", "&lt;")
104  data = data.replace(">", "&gt;")
105  for chars, entity in entities.items():
106  data = data.replace(chars, entity)
107  return data
108 
109 def _sanitize(data, entities={}):
110  return _escape(_handle_unrepresentable(data), entities=entities)
111 
112 
123 def _quoteattr(data, entities={}):
124  entities['\n']='&#10;'
125  entities['\r']='&#12;'
126  data = _sanitize(data, entities)
127  if '"' in data:
128  if "'" in data:
129  data = '"%s"' % data.replace('"', "&quot;")
130  else:
131  data = "'%s'" % data
132  else:
133  data = '"%s"' % data
134  return data
135 
136 
138 def _nssplit(qualifiedName):
139  fields = qualifiedName.split(':', 1)
140  if len(fields) == 2:
141  return fields
142  else:
143  return (None, fields[0])
144 
145 def _nsassign(namespace):
146  return nsdict.setdefault(namespace,"ns" + str(len(nsdict)))
147 
148 
149 # Exceptions
150 
153 
155 class IllegalText(Exception):
156 
157 
159 class Node(xml.dom.Node):
160  parentNode = None
161  nextSibling = None
162  previousSibling = None
163 
164 
168  def hasChildNodes(self):
169  if self.childNodes:
170  return True
171  else:
172  return False
173 
174  def _get_childNodes(self):
175  return self.childNodes
176 
177  def _get_firstChild(self):
178  if self.childNodes:
179  return self.childNodes[0]
180 
181  def _get_lastChild(self):
182  if self.childNodes:
183  return self.childNodes[-1]
184 
185 
189  def insertBefore(self, newChild, refChild):
190  if newChild.nodeType not in self._child_node_types:
191  raise IllegalChild( "%s cannot be child of %s" % (newChild.tagName, self.tagName))
192  if newChild.parentNode is not None:
193  newChild.parentNode.removeChild(newChild)
194  if refChild is None:
195  self.appendChild(newChild)
196  else:
197  try:
198  index = self.childNodes.index(refChild)
199  except ValueError:
200  raise xml.dom.NotFoundErr()
201  self.childNodes.insert(index, newChild)
202  newChild.nextSibling = refChild
203  refChild.previousSibling = newChild
204  if index:
205  node = self.childNodes[index-1]
206  node.nextSibling = newChild
207  newChild.previousSibling = node
208  else:
209  newChild.previousSibling = None
210  newChild.parentNode = self
211  return newChild
212 
213 
217  def appendChild(self, newChild):
218  if newChild.nodeType == self.DOCUMENT_FRAGMENT_NODE:
219  for c in tuple(newChild.childNodes):
220  self.appendChild(c)
221 
222  return newChild
223  if newChild.nodeType not in self._child_node_types:
224  raise IllegalChild( "<%s> is not allowed in %s" % ( newChild.tagName, self.tagName))
225  if newChild.parentNode is not None:
226  newChild.parentNode.removeChild(newChild)
227  _append_child(self, newChild)
228  newChild.nextSibling = None
229  return newChild
230 
231 
234  def removeChild(self, oldChild):
235  #FIXME: update ownerDocument.element_dict or find other solution
236  try:
237  self.childNodes.remove(oldChild)
238  except ValueError:
239  raise xml.dom.NotFoundErr()
240  if oldChild.nextSibling is not None:
241  oldChild.nextSibling.previousSibling = oldChild.previousSibling
242  if oldChild.previousSibling is not None:
243  oldChild.previousSibling.nextSibling = oldChild.nextSibling
244  oldChild.nextSibling = oldChild.previousSibling = None
245  if self.ownerDocument:
246  self.ownerDocument.clear_caches()
247  oldChild.parentNode = None
248  return oldChild
249 
250  def __str__(self):
251  val = []
252  for c in self.childNodes:
253  val.append(str(c))
254  return ''.join(val)
255 
256  def __unicode__(self):
257  val = []
258  for c in self.childNodes:
259  val.append(unicode(c))
260  return u''.join(val)
261 
262 defproperty(Node, "firstChild", doc="First child node, or None.")
263 defproperty(Node, "lastChild", doc="Last child node, or None.")
264 
265 def _append_child(self, node):
266  # fast path with less checks; usable by DOM builders if careful
267  childNodes = self.childNodes
268  if childNodes:
269  last = childNodes[-1]
270  node.__dict__["previousSibling"] = last
271  last.__dict__["nextSibling"] = node
272  childNodes.append(node)
273  node.__dict__["parentNode"] = self
274 
275 
279 class Childless:
280 
281  attributes = None
282  childNodes = EmptyNodeList()
283  firstChild = None
284  lastChild = None
285 
286  def _get_firstChild(self):
287  return None
288 
289  def _get_lastChild(self):
290  return None
291 
292 
294  def appendChild(self, node):
295  raise xml.dom.HierarchyRequestErr(
296  self.tagName + " nodes cannot have children")
297 
298  def hasChildNodes(self):
299  return False
300 
301 
303  def insertBefore(self, newChild, refChild):
304  raise xml.dom.HierarchyRequestErr(
305  self.tagName + " nodes do not have children")
306 
307 
309  def removeChild(self, oldChild):
310  raise xml.dom.NotFoundErr(
311  self.tagName + " nodes do not have children")
312 
313 
315  def replaceChild(self, newChild, oldChild):
316  raise xml.dom.HierarchyRequestErr(
317  self.tagName + " nodes do not have children")
318 
320  nodeType = Node.TEXT_NODE
321  tagName = "Text"
322 
323  def __init__(self, data):
324  self.data = data
325 
326  def __str__(self):
327  return self.data
328 
329  def __unicode__(self):
330  return self.data
331 
332 
334  def toXml(self,level,f):
335  if self.data:
336  f.write(_sanitize(unicode(self.data)))
337 
339  nodeType = Node.CDATA_SECTION_NODE
340 
341 
346  def toXml(self,level,f):
347  if self.data:
348  f.write('<![CDATA[%s]]>' % self.data.replace(']]>',']]>]]><![CDATA['))
349 
350 
357 class Element(Node):
358 
359  nodeType = Node.ELEMENT_NODE
360  namespaces = {} # Due to shallow copy this is a static variable
361 
362  _child_node_types = (Node.ELEMENT_NODE,
363  Node.PROCESSING_INSTRUCTION_NODE,
364  Node.COMMENT_NODE,
365  Node.TEXT_NODE,
366  Node.CDATA_SECTION_NODE,
367  Node.ENTITY_REFERENCE_NODE)
368 
369  def __init__(self, attributes=None, text=None, cdata=None, qname=None, qattributes=None, check_grammar=True, **args):
370  if qname is not None:
371  self.qname = qname
372  assert(hasattr(self, 'qname'))
373  self.ownerDocument = None
374  self.childNodes=[]
375  self.allowed_children = grammar.allowed_children.get(self.qname)
376  prefix = self.get_nsprefix(self.qname[0])
377  self.tagName = prefix + ":" + self.qname[1]
378  if text is not None:
379  self.addText(text)
380  if cdata is not None:
381  self.addCDATA(cdata)
382 
383  allowed_attrs = self.allowed_attributes()
384  if allowed_attrs is not None:
385  allowed_args = [ a[1].lower().replace('-','') for a in allowed_attrs]
386  self.attributes={}
387  # Load the attributes from the 'attributes' argument
388  if attributes:
389  for attr, value in attributes.items():
390  self.setAttribute(attr, value)
391  # Load the qualified attributes
392  if qattributes:
393  for attr, value in qattributes.items():
394  self.setAttrNS(attr[0], attr[1], value)
395  if allowed_attrs is not None:
396  # Load the attributes from the 'args' argument
397  for arg in args.keys():
398  self.setAttribute(arg, args[arg])
399  else:
400  for arg in args.keys(): # If any attribute is allowed
401  self.attributes[arg]=args[arg]
402  if not check_grammar:
403  return
404  # Test that all mandatory attributes have been added.
405  required = grammar.required_attributes.get(self.qname)
406  if required:
407  for r in required:
408  if self.getAttrNS(r[0],r[1]) is None:
409  raise AttributeError( "Required attribute missing: %s in <%s>" % (r[1].lower().replace('-',''), self.tagName))
410 
411 
415  def get_knownns(self, prefix):
416  global nsdict
417  for ns,p in nsdict.items():
418  if p == prefix: return ns
419  return None
420 
421 
425  def get_nsprefix(self, namespace):
426  if namespace is None: namespace = ""
427  prefix = _nsassign(namespace)
428  if not namespace in self.namespaces:
429  self.namespaces[namespace] = prefix
430  return prefix
431 
433  return grammar.allowed_attributes.get(self.qname)
434 
435  def _setOwnerDoc(self, element):
436  element.ownerDocument = self.ownerDocument
437  for child in element.childNodes:
438  self._setOwnerDoc(child)
439 
440 
445  def addElement(self, element, check_grammar=True):
446  if check_grammar and self.allowed_children is not None:
447  if element.qname not in self.allowed_children:
448  raise IllegalChild( "<%s> is not allowed in <%s>" % ( element.tagName, self.tagName))
449  self.appendChild(element)
450  self._setOwnerDoc(element)
451  if self.ownerDocument:
452  self.ownerDocument.rebuild_caches(element)
453 
454 
458  def addText(self, text, check_grammar=True):
459  if check_grammar and self.qname not in grammar.allows_text:
460  raise IllegalText( "The <%s> element does not allow text" % self.tagName)
461  else:
462  if text != '':
463  self.appendChild(Text(text))
464 
465 
469  def addCDATA(self, cdata, check_grammar=True):
470  if check_grammar and self.qname not in grammar.allows_text:
471  raise IllegalText( "The <%s> element does not allow text" % self.tagName)
472  else:
473  self.appendChild(CDATASection(cdata))
474 
475 
477  def removeAttribute(self, attr, check_grammar=True):
478  allowed_attrs = self.allowed_attributes()
479  if allowed_attrs is None:
480  if type(attr) == type(()):
481  prefix, localname = attr
482  self.removeAttrNS(prefix, localname)
483  else:
484  raise AttributeError( "Unable to add simple attribute - use (namespace, localpart)")
485  else:
486  # Construct a list of allowed arguments
487  allowed_args = [ a[1].lower().replace('-','') for a in allowed_attrs]
488  if check_grammar and attr not in allowed_args:
489  raise AttributeError( "Attribute %s is not allowed in <%s>" % ( attr, self.tagName))
490  i = allowed_args.index(attr)
491  self.removeAttrNS(allowed_attrs[i][0], allowed_attrs[i][1])
492 
493 
501  def setAttribute(self, attr, value, check_grammar=True):
502  if attr == 'parent' and value is not None:
503  value.addElement(self)
504  else:
505  allowed_attrs = self.allowed_attributes()
506  if allowed_attrs is None:
507  if type(attr) == type(()):
508  prefix, localname = attr
509  self.setAttrNS(prefix, localname, value)
510  else:
511  raise AttributeError( "Unable to add simple attribute - use (namespace, localpart)")
512  else:
513  # Construct a list of allowed arguments
514  allowed_args = [ a[1].lower().replace('-','') for a in allowed_attrs]
515  if check_grammar and attr not in allowed_args:
516  raise AttributeError( "Attribute %s is not allowed in <%s>" % ( attr, self.tagName))
517  i = allowed_args.index(attr)
518  self.setAttrNS(allowed_attrs[i][0], allowed_attrs[i][1], value)
519 
520 
527  def setAttrNS(self, namespace, localpart, value):
528  allowed_attrs = self.allowed_attributes()
529  prefix = self.get_nsprefix(namespace)
530 # if allowed_attrs and (namespace, localpart) not in allowed_attrs:
531 # raise AttributeError( "Attribute %s:%s is not allowed in element <%s>" % ( prefix, localpart, self.tagName))
532  c = AttrConverters()
533  self.attributes[(namespace, localpart)] = c.convert((namespace, localpart), value, self)
534 
535 
545  def getAttrNS(self, namespace, localpart):
546  prefix = self.get_nsprefix(namespace)
547  result = self.attributes.get((namespace, localpart))
548 
549  assert(
550  (type(namespace), type(namespace), type(namespace) == \
551  type(b""), type(b""), type(b"")) or
552  (type(namespace), type(namespace), type(namespace) == \
553  type(u""), type(u""), type(u""))
554  )
555 
556  return result
557 
558  def removeAttrNS(self, namespace, localpart):
559  del self.attributes[(namespace, localpart)]
560 
561 
564  def getAttribute(self, attr):
565  allowed_attrs = self.allowed_attributes()
566  if allowed_attrs is None:
567  if type(attr) == type(()):
568  prefix, localname = attr
569  return self.getAttrNS(prefix, localname)
570  else:
571  raise AttributeError( "Unable to get simple attribute - use (namespace, localpart)")
572  else:
573  # Construct a list of allowed arguments
574  allowed_args = [ a[1].lower().replace('-','') for a in allowed_attrs]
575  i = allowed_args.index(attr)
576  return self.getAttrNS(allowed_attrs[i][0], allowed_attrs[i][1])
577 
578  def write_open_tag(self, level, f):
579  f.write(('<'+self.tagName))
580  if level == 0:
581  for namespace, prefix in self.namespaces.items():
582  f.write(u' xmlns:' + prefix + u'="'+ _sanitize(str(namespace))+'"')
583  for qname in self.attributes.keys():
584  prefix = self.get_nsprefix(qname[0])
585  f.write(u' '+_sanitize(str(prefix+u':'+qname[1]))+u'='+_quoteattr(unicode(self.attributes[qname])))
586  f.write(u'>')
587 
588  def write_close_tag(self, level, f):
589  f.write('</'+self.tagName+'>')
590 
591 
597  def toXml(self, level, f):
598  f.write(u'<'+self.tagName)
599  if level == 0:
600  for namespace, prefix in self.namespaces.items():
601  f.write(u' xmlns:' + prefix + u'="'+ _sanitize(str(namespace))+u'"')
602  for qname in self.attributes.keys():
603  prefix = self.get_nsprefix(qname[0])
604  f.write(u' '+_sanitize(unicode(prefix+':'+qname[1]))+u'='+_quoteattr(unicode(self.attributes[qname])))
605  if self.childNodes:
606  f.write(u'>')
607  for element in self.childNodes:
608  element.toXml(level+1,f)
609  f.write(u'</'+self.tagName+'>')
610  else:
611  f.write(u'/>')
612 
613  def _getElementsByObj(self, obj, accumulator):
614  if self.qname == obj.qname:
615  accumulator.append(self)
616  for e in self.childNodes:
617  if e.nodeType == Node.ELEMENT_NODE:
618  accumulator = e._getElementsByObj(obj, accumulator)
619  return accumulator
620 
621 
623  def getElementsByType(self, element):
624  obj = element(check_grammar=False)
625  return self._getElementsByObj(obj,[])
626 
627 
629  def isInstanceOf(self, element):
630  obj = element(check_grammar=False)
631  return self.qname == obj.qname
def setAttribute(self, attr, value, check_grammar=True)
Add an attribute to the element This is sort of a convenience method.
Definition: element.py:501
def toXml(self, level, f)
Generate an XML stream out of the tree structure.
Definition: element.py:597
def replaceChild(self, newChild, oldChild)
Raises an error.
Definition: element.py:315
def toXml(self, level, f)
Generate XML output of the node.
Definition: element.py:346
def addCDATA(self, cdata, check_grammar=True)
Adds CDATA to an element Setting check_grammar=False turns off grammar checking.
Definition: element.py:469
def get_knownns(self, prefix)
Odfpy maintains a list of known namespaces.
Definition: element.py:415
def removeAttribute(self, attr, check_grammar=True)
Removes an attribute by name.
Definition: element.py:477
def removeAttrNS(self, namespace, localpart)
Definition: element.py:558
def getAttribute(self, attr)
Get an attribute value.
Definition: element.py:564
super class for more specific nodes
Definition: element.py:159
def __unicode__(self)
Definition: element.py:329
def getAttrNS(self, namespace, localpart)
gets an attribute, given a namespace and a key
Definition: element.py:545
Complains if you add text or cdata to an element where it is not allowed.
Definition: element.py:155
def removeChild(self, oldChild)
Removes the child node indicated by oldChild from the list of children, and returns it...
Definition: element.py:234
def allowed_attributes(self)
Definition: element.py:432
def _setOwnerDoc(self, element)
Definition: element.py:435
def insertBefore(self, newChild, refChild)
Raises an error.
Definition: element.py:303
def getElementsByType(self, element)
Gets elements based on the type, which is function from text.py, draw.py etc.
Definition: element.py:623
def _getElementsByObj(self, obj, accumulator)
Definition: element.py:613
Complains if you add an element to a parent where it is not allowed.
Definition: element.py:152
def toXml(self, level, f)
Write XML in UTF-8.
Definition: element.py:334
Mixin that makes childless-ness easy to implement and avoids the complexity of the Node methods that ...
Definition: element.py:279
def __unicode__(self)
Definition: element.py:256
def __init__(self, attributes=None, text=None, cdata=None, qname=None, qattributes=None, check_grammar=True, args)
Definition: element.py:369
def setAttrNS(self, namespace, localpart, value)
Add an attribute to the element In case you need to add an attribute the library doesn&#39;t know about t...
Definition: element.py:527
def removeChild(self, oldChild)
Raises an error.
Definition: element.py:309
def addText(self, text, check_grammar=True)
Adds text to an element Setting check_grammar=False turns off grammar checking.
Definition: element.py:458
def write_open_tag(self, level, f)
Definition: element.py:578
def hasChildNodes(self)
Definition: element.py:298
def appendChild(self, node)
Raises an error.
Definition: element.py:294
Creates a arbitrary element and is intended to be subclassed not used on its own. ...
Definition: element.py:357
def isInstanceOf(self, element)
This is a check to see if the object is an instance of a type.
Definition: element.py:629
def write_close_tag(self, level, f)
Definition: element.py:588
def addElement(self, element, check_grammar=True)
adds an element to an Element
Definition: element.py:445
def __str__(self)
Definition: element.py:326
def __init__(self, data)
Definition: element.py:323
def appendChild(self, newChild)
Adds the node newChild to the end of the list of children of this node.
Definition: element.py:217
def __str__(self)
Definition: element.py:250
def get_nsprefix(self, namespace)
Odfpy maintains a list of known namespaces.
Definition: element.py:425
def insertBefore(self, newChild, refChild)
Inserts the node newChild before the existing child node refChild.
Definition: element.py:189
dictionary namespaces
Definition: element.py:360
def hasChildNodes(self)
Tells whether this element has any children; text nodes, subelements, whatever.
Definition: element.py:168