ChimeraTK-ControlSystemAdapter-OPCUAAdapter  04.00.01
open62541_XMLPreprocessor.py
Go to the documentation of this file.
1 #!/usr/bin/env/python
2 # -*- coding: utf-8 -*-
3 
4 # This Source Code Form is subject to the terms of the Mozilla Public
5 # License, v. 2.0. If a copy of the MPL was not distributed with this
6 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 
8 
21 
22 import logging
23 from ua_constants import *
24 import tempfile
25 import xml.dom.minidom as dom
26 import os
27 import string
28 from collections import Counter
29 import re
30 
31 from ua_namespace import opcua_node_id_t
32 
33 
34 logger = logging.getLogger(__name__)
35 
37  originXML = '' # Original XML passed to the preprocessor
38  targetXML = () # tuple of (fileHandle, fileName)
39  nodeset = '' # Parsed DOM XML object
40  parseOK = False;
41  containedNodes = [] # contains tuples of (opcua_node_id_t, xmlelement)
42  referencedNodes = [] # contains tuples of (opcua_node_id_t, xmlelement)
43  namespaceOrder = [] # contains xmlns:sX attributed as tuples (int ns, string name)
44  namespaceQualifiers = [] # contains all xmlns:XYZ qualifiers that might prefix value aliases (like "<uax:Int32>")
45  referencedNamesSpaceUris = [] # contains <NamespaceUris> URI elements
46 
47  def __init__(self, originXML):
48  self.originXML = originXML
49  self.targetXML = tempfile.mkstemp(prefix=os.path.basename(originXML)+"_preProcessed-" ,suffix=".xml")
50  self.parseOK = True
51  self.containedNodes = []
52  self.referencedNodes = []
53  self.namespaceOrder = []
56  try:
57  self.nodeset = dom.parse(originXML)
58  if len(self.nodeset.getElementsByTagName("UANodeSet")) == 0 or len(self.nodeset.getElementsByTagName("UANodeSet")) > 1:
59  logger.error(self, "Document " + self.targetXML[1] + " contains no or more then 1 nodeset", LOG_LEVEL_ERROR)
60  self.parseOK = False
61  except:
62  self.parseOK = False
63  logger.debug("Adding new document to be preprocessed " + os.path.basename(originXML) + " as " + self.targetXML[1])
64 
65  def clean(self):
66  #os.close(self.targetXML[0]) Don't -> done to flush() after finalize()
67  os.remove(self.targetXML[1])
68 
69  def getTargetXMLName(self):
70  if (self.parseOK):
71  return self.targetXML[1]
72  return None
73 
75  """ extractNamespaceURIs
76 
77  minidom gobbles up <NamespaceUris></NamespaceUris> elements, without a decent
78  way to reliably access this dom2 <uri></uri> elements (only attribute xmlns= are
79  accessible using minidom). We need them for dereferencing though... This
80  function attempts to do just that.
81 
82  returns: Nothing
83  """
84  infile = open(self.originXML)
85  foundURIs = False
86  nsline = ""
87  line = infile.readline()
88  for line in infile:
89  if "<namespaceuris>" in line.lower():
90  foundURIs = True
91  elif "</namespaceuris>" in line.lower():
92  foundURIs = False
93  nsline = nsline + line
94  break
95  if foundURIs:
96  nsline = nsline + line
97 
98  if len(nsline) > 0:
99  ns = dom.parseString(nsline).getElementsByTagName("NamespaceUris")
100  for uri in ns[0].childNodes:
101  if uri.nodeType != uri.ELEMENT_NODE:
102  continue
103  self.referencedNamesSpaceUris.append(uri.firstChild.data)
104 
105  infile.close()
106 
107  def analyze(self):
108  """ analyze()
109 
110  analyze will gather information about the nodes and references contained in a XML File
111  to facilitate later preprocessing stages that adresss XML dependency issues
112 
113  returns: No return value
114  """
115  nodeIds = []
116  ns = self.nodeset.getElementsByTagName("UANodeSet")
117 
118  # We need to find out what the namespace calls itself and other referenced, as numeric id's are pretty
119  # useless sans linked nodes. There is two information sources...
120  self.extractNamespaceURIs() # From <URI>...</URI> definitions
121 
122  for key in ns[0].attributes.keys(): # from xmlns:sX attributes
123  if "xmlns:" in key: # Any key: we will be removing these qualifiers from Values later
124  self.namespaceQualifiers.append(key.replace("xmlns:",""))
125  if "xmlns:s" in key: # get a numeric nsId and modelname/uri
126  self.namespaceOrder.append((int(key.replace("xmlns:s","")), re.sub("[A-Za-z0-9-_\.]+\.[xXsSdD]{3}$","",ns[0].getAttribute(key))))
127 
128  # Get all nodeIds contained in this XML
129  for nd in ns[0].childNodes:
130  if nd.nodeType != nd.ELEMENT_NODE:
131  continue
132  if nd.hasAttribute(u'NodeId'):
133  self.containedNodes.append( (opcua_node_id_t(nd.getAttribute(u'NodeId')), nd) )
134  refs = nd.getElementsByTagName(u'References')[0]
135  for ref in refs.childNodes:
136  if ref.nodeType == ref.ELEMENT_NODE:
137  self.referencedNodes.append( (opcua_node_id_t(ref.firstChild.data), ref) )
138 
139  logger.debug("Nodes: " + str(len(self.containedNodes)) + " References: " + str(len(self.referencedNodes)))
140 
141  def getNamespaceId(self):
142  """ namespaceId()
143 
144  Counts the namespace IDs in all nodes of this XML and picks the most used
145  namespace as the numeric identifier of this data model.
146 
147  returns: Integer ID of the most propable/most used namespace in this XML
148  """
149  max = 0;
150  namespaceIdGuessed = 0;
151  idDict = {}
152 
153  for ndid in self.containedNodes:
154  if not ndid[0].ns in idDict.keys():
155  idDict[ndid[0].ns] = 1
156  else:
157  idDict[ndid[0].ns] = idDict[ndid[0].ns] + 1
158 
159  for entry in idDict:
160  if idDict[entry] > max:
161  max = idDict[entry]
162  namespaceIdGuessed = entry
163  #logger.debug("XML Contents are propably in namespace " + str(entry) + " (used by " + str(idDict[entry]) + " Nodes)")
164  return namespaceIdGuessed
165 
166  def getReferencedNamespaceUri(self, nsId):
167  """ getReferencedNamespaceUri
168 
169  returns an URL that hopefully corresponds to the nsId that was used to reference this model
170 
171  return: URI string corresponding to nsId
172  """
173  # Might be the more reliable method: Get the URI from the xmlns attributes (they have numers)
174  if len(self.namespaceOrder) > 0:
175  for el in self.namespaceOrder:
176  if el[0] == nsId:
177  return el[1]
178 
179  # Fallback:
180  # Some models do not have xmlns:sX attributes, but still <URI>s (usually when they only reference NS0)
181  if len(self.referencedNamesSpaceUris) > 0 and len(self.referencedNamesSpaceUris) >= nsId-1:
182  return self.referencedNamesSpaceUris[nsId-1]
183 
184  #Nope, not found.
185  return ""
186 
188  deps = []
189  for ndid in self.referencedNodes:
190  if not ndid[0].ns in deps:
191  deps.append(ndid[0].ns)
192  return deps
193 
194  def finalize(self):
195  outfile = self.targetXML[0]
196  outline = self.nodeset.toxml()
197  for qualifier in self.namespaceQualifiers:
198  rq = qualifier+":"
199  outline = outline.replace(rq, "")
200  os.write(outfile, outline.encode('UTF-8'))
201  os.close(outfile)
202 
203  def reassignReferencedNamespaceId(self, currentNsId, newNsId):
204  """ reassignReferencedNamespaceId
205 
206  Iterates over all references in this document, find references to currentNsId and changes them to newNsId.
207  NodeIds themselves are not altered.
208 
209  returns: nothing
210  """
211  for refNd in self.referencedNodes:
212  if refNd[0].ns == currentNsId:
213  refNd[1].firstChild.data = refNd[1].firstChild.data.replace("ns="+str(currentNsId), "ns="+str(newNsId))
214  refNd[0].ns = newNsId
215  refNd[0].toString()
216 
217  def reassignNamespaceId(self, currentNsId, newNsId):
218  """ reassignNamespaceId
219 
220  Iterates over all nodes in this document, find those in namespace currentNsId and changes them to newNsId.
221 
222  returns: nothing
223  """
224 
225  #change ids in aliases
226  ns = self.nodeset.getElementsByTagName("Alias")
227  for al in ns:
228  if al.nodeType == al.ELEMENT_NODE:
229  if al.hasAttribute("Alias"):
230  al.firstChild.data = al.firstChild.data.replace("ns=" + str(currentNsId), "ns=" + str(newNsId))
231 
232  logger.debug("Migrating nodes /w ns index " + str(currentNsId) + " to " + str(newNsId))
233  for nd in self.containedNodes:
234  if nd[0].ns == currentNsId:
235  # In our own document, update any references to this node
236  for refNd in self.referencedNodes:
237  if refNd[0].ns == currentNsId and refNd[0] == nd[0]:
238  refNd[1].firstChild.data = refNd[1].firstChild.data.replace("ns="+str(currentNsId), "ns="+str(newNsId))
239  refNd[0].ns = newNsId
240  refNd[0].toString()
241  nd[1].setAttribute(u'NodeId', nd[1].getAttribute(u'NodeId').replace("ns="+str(currentNsId), "ns="+str(newNsId)))
242  nd[0].ns = newNsId
243  nd[0].toString()
244 
246  preProcDocuments = []
247 
248  def __init__(self):
250 
251  def addDocument(self, documentPath):
252  self.preProcDocuments.append(preProcessDocument(documentPath))
253 
255  for doc in self.preProcDocuments:
256  doc.clean()
257 
259  files = []
260  for doc in self.preProcDocuments:
261  if (doc.parseOK):
262  files.append(doc.getTargetXMLName())
263  return files
264 
266  """ testModelCongruencyAgainstReferences
267 
268  Counts how many of the nodes referencef in refs can be found in the model
269  doc.
270 
271  returns: double corresponding to the percentage of hits
272  """
273  sspace = len(refs)
274  if sspace == 0:
275  return float(0)
276  found = 0
277  for ref in refs:
278  for n in doc.containedNodes:
279  if str(ref) == str(n[0]):
280  print(ref, n[0])
281  found = found + 1
282  break
283  return float(found)/float(sspace)
284 
286  nsdep = []
287  docLst = []
288  # Search for namespace 0('s) - plural possible if user is overwriting NS0 defaults
289  # Remove them from the list of namespaces, zero does not get demangled
290  for doc in self.preProcDocuments:
291  if doc.getNamespaceId() == 0:
292  docLst.append(doc)
293  for doc in docLst:
294  self.preProcDocuments.remove(doc)
295 
296  # Reassign namespace id's to be in ascending order
297  nsidx = 1 # next namespace id to assign on collision (first one will be "2")
298  for doc in self.preProcDocuments:
299  nsidx = nsidx + 1
300  nsid = doc.getNamespaceId()
301  doc.reassignNamespaceId(nsid, nsidx)
302  docLst.append(doc)
303  logger.info("Document " + doc.originXML + " is now namespace " + str(nsidx))
304  self.preProcDocuments = docLst
305 
307  """ getUsedNamespaceArrayNames
308 
309  Returns the XML xmlns:s1 or <URI>[0] of each XML document (if contained/possible)
310 
311  returns: dict of int:nsId -> string:url
312  """
313  nsName = {}
314  for doc in self.preProcDocuments:
315  uri = doc.getReferencedNamespaceUri(1)
316  if uri == None:
317  uri = "http://modeluri.not/retrievable/from/xml"
318  nsName[doc.getNamespaceId()] = doc.getReferencedNamespaceUri(1)
319  return nsName
320 
322  revertToStochastic = [] # (doc, int id), where id was not resolvable using model URIs
323 
324  # Attemp to identify the model relations by using model URIs in xmlns:sX or <URI> contents
325  for doc in self.preProcDocuments:
326  nsid = doc.getNamespaceId()
327  dependencies = doc.getNamespaceDependencies()
328  for d in dependencies:
329  if d != nsid and d != 0:
330  # Attempt to identify the namespace URI this d referes to...
331  nsUri = doc.getReferencedNamespaceUri(d) # FIXME: This could actually fail and return ""!
332  logger.info("Need a namespace referenced as " + str(d) + ". Which hopefully is " + nsUri)
333  targetDoc = None
334  for tgt in self.preProcDocuments:
335  # That model, whose URI is known but its current id is not, will
336  # refer have referred to itself as "1"
337  if tgt.getReferencedNamespaceUri(1) == nsUri:
338  targetDoc = tgt
339  break
340  if not targetDoc == None:
341  # Found the model... relink the references
342  doc.reassignReferencedNamespaceId(d, targetDoc.getNamespaceId())
343  continue
344  else:
345  revertToStochastic.append((doc, d))
346  logger.warn("Failed to reliably identify which XML/Model " + os.path.basename(doc.originXML) + " calls ns=" +str(d))
347 
348  for (doc, d) in revertToStochastic:
349  logger.warn("Attempting to find stochastic match for target namespace ns=" + str(d) + " of " + os.path.basename(doc.originXML))
350  # Copy all references to the given namespace
351  refs = []
352  matches = [] # list of (match%, targetDoc) to pick from later
353  for ref in doc.referencedNodes:
354  if ref[0].ns == d:
355  refs.append(opcua_node_id_t(str(ref[0])))
356  for tDoc in self.preProcDocuments:
357  tDocId = tDoc.getNamespaceId()
358  # Scenario: If these references did target this documents namespace...
359  for r in refs:
360  r.ns = tDocId
361  r.toString()
362  # ... how many of them would be found!?
363  c = self.testModelCongruencyAgainstReferences(tDoc, refs)
364  print(c)
365  if c>0:
366  matches.append((c, tDoc))
367  best = (0, None)
368  for m in matches:
369  print(m[0])
370  if m[0] > best[0]:
371  best = m
372  if best[1] != None:
373  logger.warn("Best match (" + str(best[1]*100) + "%) for what " + os.path.basename(doc.originXML) + " refers to as ns="+str(d)+" was " + os.path.basename(best[1].originXML))
374  doc.reassignReferencedNamespaceId(d, best[1].getNamespaceId())
375  else:
376  logger.error("Failed to find a match for what " + os.path.basename(doc.originXML) + " refers to as ns=" + str(d))
377 
378  def preprocessAll(self):
379 
381  for doc in self.preProcDocuments:
382  doc.analyze()
383 
384  # Preprocess step: Remove XML specific Naming scheme ("uax:")
385  # FIXME: Not implemented
386 
387 
392 
393 
394 
397 
398 
400  for doc in self.preProcDocuments:
401  doc.finalize()
402 
403  return True
open62541_XMLPreprocessor.open62541_XMLPreprocessor.preprocessAll
def preprocessAll(self)
First: Gather statistics about the namespaces:
Definition: open62541_XMLPreprocessor.py:378
open62541_XMLPreprocessor.preProcessDocument.analyze
def analyze(self)
Definition: open62541_XMLPreprocessor.py:107
open62541_XMLPreprocessor.preProcessDocument.getNamespaceDependencies
def getNamespaceDependencies(self)
Definition: open62541_XMLPreprocessor.py:187
open62541_XMLPreprocessor.preProcessDocument.containedNodes
list containedNodes
Definition: open62541_XMLPreprocessor.py:41
open62541_XMLPreprocessor.open62541_XMLPreprocessor
Definition: open62541_XMLPreprocessor.py:245
open62541_XMLPreprocessor.preProcessDocument.namespaceOrder
list namespaceOrder
Definition: open62541_XMLPreprocessor.py:43
open62541_XMLPreprocessor.preProcessDocument.clean
def clean(self)
Definition: open62541_XMLPreprocessor.py:65
open62541_XMLPreprocessor.preProcessDocument.reassignNamespaceId
def reassignNamespaceId(self, currentNsId, newNsId)
Definition: open62541_XMLPreprocessor.py:217
open62541_XMLPreprocessor.open62541_XMLPreprocessor.preprocess_linkDependantModels
def preprocess_linkDependantModels(self)
Definition: open62541_XMLPreprocessor.py:321
open62541_XMLPreprocessor.preProcessDocument.getNamespaceId
def getNamespaceId(self)
Definition: open62541_XMLPreprocessor.py:141
open62541_XMLPreprocessor.preProcessDocument.reassignReferencedNamespaceId
def reassignReferencedNamespaceId(self, currentNsId, newNsId)
Definition: open62541_XMLPreprocessor.py:203
open62541_XMLPreprocessor.open62541_XMLPreprocessor.addDocument
def addDocument(self, documentPath)
Definition: open62541_XMLPreprocessor.py:251
open62541_XMLPreprocessor.preProcessDocument.getReferencedNamespaceUri
def getReferencedNamespaceUri(self, nsId)
Definition: open62541_XMLPreprocessor.py:166
open62541_XMLPreprocessor.open62541_XMLPreprocessor.removePreprocessedFiles
def removePreprocessedFiles(self)
Definition: open62541_XMLPreprocessor.py:254
open62541_XMLPreprocessor.preProcessDocument.finalize
def finalize(self)
Definition: open62541_XMLPreprocessor.py:194
str
#define str(a)
Definition: ua_adapter.cpp:52
open62541_XMLPreprocessor.preProcessDocument.originXML
string originXML
Definition: open62541_XMLPreprocessor.py:37
open62541_XMLPreprocessor.preProcessDocument.nodeset
string nodeset
Definition: open62541_XMLPreprocessor.py:39
open62541_XMLPreprocessor.open62541_XMLPreprocessor.getPreProcessedFiles
def getPreProcessedFiles(self)
Definition: open62541_XMLPreprocessor.py:258
open62541_XMLPreprocessor.open62541_XMLPreprocessor.preprocess_assignUniqueNsIds
def preprocess_assignUniqueNsIds(self)
Definition: open62541_XMLPreprocessor.py:285
open62541_XMLPreprocessor.preProcessDocument.namespaceQualifiers
list namespaceQualifiers
Definition: open62541_XMLPreprocessor.py:44
open62541_XMLPreprocessor.preProcessDocument
Definition: open62541_XMLPreprocessor.py:36
open62541_XMLPreprocessor.preProcessDocument.parseOK
bool parseOK
Definition: open62541_XMLPreprocessor.py:40
open62541_XMLPreprocessor.open62541_XMLPreprocessor.__init__
def __init__(self)
Definition: open62541_XMLPreprocessor.py:248
open62541_XMLPreprocessor.open62541_XMLPreprocessor.preProcDocuments
list preProcDocuments
Definition: open62541_XMLPreprocessor.py:246
open62541_XMLPreprocessor.preProcessDocument.__init__
def __init__(self, originXML)
Definition: open62541_XMLPreprocessor.py:47
open62541_XMLPreprocessor.preProcessDocument.targetXML
tuple targetXML
Definition: open62541_XMLPreprocessor.py:38
open62541_XMLPreprocessor.open62541_XMLPreprocessor.testModelCongruencyAgainstReferences
def testModelCongruencyAgainstReferences(self, doc, refs)
Definition: open62541_XMLPreprocessor.py:265
open62541_XMLPreprocessor.open62541_XMLPreprocessor.getUsedNamespaceArrayNames
def getUsedNamespaceArrayNames(self)
Definition: open62541_XMLPreprocessor.py:306
open62541_XMLPreprocessor.preProcessDocument.getTargetXMLName
def getTargetXMLName(self)
Definition: open62541_XMLPreprocessor.py:69
ua_node_types.opcua_node_id_t
Definition: ua_node_types.py:162
open62541_XMLPreprocessor.preProcessDocument.referencedNamesSpaceUris
list referencedNamesSpaceUris
Definition: open62541_XMLPreprocessor.py:45
open62541_XMLPreprocessor.preProcessDocument.referencedNodes
list referencedNodes
Definition: open62541_XMLPreprocessor.py:42
open62541_XMLPreprocessor.preProcessDocument.extractNamespaceURIs
def extractNamespaceURIs(self)
Definition: open62541_XMLPreprocessor.py:74