ChimeraTK-ControlSystemAdapter-OPCUAAdapter 04.00.04
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
open62541_XMLPreprocessor.py
Go to the documentation of this file.
1#!/usr/bin/env/python
2# -*- coding: utf-8 -*-
3
4# This Source Code Form is subject to the terms of the Mozilla Public
5# License, v. 2.0. If a copy of the MPL was not distributed with this
6# file, You can obtain one at http://mozilla.org/MPL/2.0/.
7
8
21
22import logging
23from ua_constants import *
24import tempfile
25import xml.dom.minidom as dom
26import os
27import string
28from collections import Counter
29import re
30
31from ua_namespace import opcua_node_id_t
32
33
34logger = logging.getLogger(__name__)
35
37 originXML = '' # Original XML passed to the preprocessor
38 targetXML = () # tuple of (fileHandle, fileName)
39 nodeset = '' # Parsed DOM XML object
40 parseOK = False;
41 containedNodes = [] # contains tuples of (opcua_node_id_t, xmlelement)
42 referencedNodes = [] # contains tuples of (opcua_node_id_t, xmlelement)
43 namespaceOrder = [] # contains xmlns:sX attributed as tuples (int ns, string name)
44 namespaceQualifiers = [] # contains all xmlns:XYZ qualifiers that might prefix value aliases (like "<uax:Int32>")
45 referencedNamesSpaceUris = [] # contains <NamespaceUris> URI elements
46
47 def __init__(self, originXML):
48 self.originXMLoriginXML = originXML
49 self.targetXMLtargetXML = tempfile.mkstemp(prefix=os.path.basename(originXML)+"_preProcessed-" ,suffix=".xml")
50 self.parseOKparseOK = True
56 try:
57 self.nodesetnodeset = dom.parse(originXML)
58 if len(self.nodesetnodeset.getElementsByTagName("UANodeSet")) == 0 or len(self.nodesetnodeset.getElementsByTagName("UANodeSet")) > 1:
59 logger.error(self, "Document " + self.targetXMLtargetXML[1] + " contains no or more then 1 nodeset", LOG_LEVEL_ERROR)
60 self.parseOKparseOK = False
61 except:
62 self.parseOKparseOK = False
63 logger.debug("Adding new document to be preprocessed " + os.path.basename(originXML) + " as " + self.targetXMLtargetXML[1])
64
65 def clean(self):
66 #os.close(self.targetXML[0]) Don't -> done to flush() after finalize()
67 os.remove(self.targetXMLtargetXML[1])
68
70 if (self.parseOKparseOK):
71 return self.targetXMLtargetXML[1]
72 return None
73
75 """ extractNamespaceURIs
76
77 minidom gobbles up <NamespaceUris></NamespaceUris> elements, without a decent
78 way to reliably access this dom2 <uri></uri> elements (only attribute xmlns= are
79 accessible using minidom). We need them for dereferencing though... This
80 function attempts to do just that.
81
82 returns: Nothing
83 """
84 infile = open(self.originXMLoriginXML)
85 foundURIs = False
86 nsline = ""
87 line = infile.readline()
88 for line in infile:
89 if "<namespaceuris>" in line.lower():
90 foundURIs = True
91 elif "</namespaceuris>" in line.lower():
92 foundURIs = False
93 nsline = nsline + line
94 break
95 if foundURIs:
96 nsline = nsline + line
97
98 if len(nsline) > 0:
99 ns = dom.parseString(nsline).getElementsByTagName("NamespaceUris")
100 for uri in ns[0].childNodes:
101 if uri.nodeType != uri.ELEMENT_NODE:
102 continue
103 self.referencedNamesSpaceUrisreferencedNamesSpaceUris.append(uri.firstChild.data)
104
105 infile.close()
106
107 def analyze(self):
108 """ analyze()
109
110 analyze will gather information about the nodes and references contained in a XML File
111 to facilitate later preprocessing stages that adresss XML dependency issues
112
113 returns: No return value
114 """
115 nodeIds = []
116 ns = self.nodesetnodeset.getElementsByTagName("UANodeSet")
117
118 # We need to find out what the namespace calls itself and other referenced, as numeric id's are pretty
119 # useless sans linked nodes. There is two information sources...
120 self.extractNamespaceURIs() # From <URI>...</URI> definitions
121
122 for key in ns[0].attributes.keys(): # from xmlns:sX attributes
123 if "xmlns:" in key: # Any key: we will be removing these qualifiers from Values later
124 self.namespaceQualifiersnamespaceQualifiers.append(key.replace("xmlns:",""))
125 if "xmlns:s" in key: # get a numeric nsId and modelname/uri
126 self.namespaceOrdernamespaceOrder.append((int(key.replace("xmlns:s","")), re.sub("[A-Za-z0-9-_\.]+\.[xXsSdD]{3}$","",ns[0].getAttribute(key))))
127
128 # Get all nodeIds contained in this XML
129 for nd in ns[0].childNodes:
130 if nd.nodeType != nd.ELEMENT_NODE:
131 continue
132 if nd.hasAttribute(u'NodeId'):
133 self.containedNodescontainedNodes.append( (opcua_node_id_t(nd.getAttribute(u'NodeId')), nd) )
134 refs = nd.getElementsByTagName(u'References')[0]
135 for ref in refs.childNodes:
136 if ref.nodeType == ref.ELEMENT_NODE:
137 self.referencedNodesreferencedNodes.append( (opcua_node_id_t(ref.firstChild.data), ref) )
138
139 logger.debug("Nodes: " + str(len(self.containedNodescontainedNodes)) + " References: " + str(len(self.referencedNodesreferencedNodes)))
140
141 def getNamespaceId(self):
142 """ namespaceId()
143
144 Counts the namespace IDs in all nodes of this XML and picks the most used
145 namespace as the numeric identifier of this data model.
146
147 returns: Integer ID of the most propable/most used namespace in this XML
148 """
149 max = 0;
150 namespaceIdGuessed = 0;
151 idDict = {}
152
153 for ndid in self.containedNodescontainedNodes:
154 if not ndid[0].ns in idDict.keys():
155 idDict[ndid[0].ns] = 1
156 else:
157 idDict[ndid[0].ns] = idDict[ndid[0].ns] + 1
158
159 for entry in idDict:
160 if idDict[entry] > max:
161 max = idDict[entry]
162 namespaceIdGuessed = entry
163 #logger.debug("XML Contents are propably in namespace " + str(entry) + " (used by " + str(idDict[entry]) + " Nodes)")
164 return namespaceIdGuessed
165
167 """ getReferencedNamespaceUri
168
169 returns an URL that hopefully corresponds to the nsId that was used to reference this model
170
171 return: URI string corresponding to nsId
172 """
173 # Might be the more reliable method: Get the URI from the xmlns attributes (they have numers)
174 if len(self.namespaceOrdernamespaceOrder) > 0:
175 for el in self.namespaceOrdernamespaceOrder:
176 if el[0] == nsId:
177 return el[1]
178
179 # Fallback:
180 # Some models do not have xmlns:sX attributes, but still <URI>s (usually when they only reference NS0)
183
184 #Nope, not found.
185 return ""
186
188 deps = []
189 for ndid in self.referencedNodesreferencedNodes:
190 if not ndid[0].ns in deps:
191 deps.append(ndid[0].ns)
192 return deps
193
194 def finalize(self):
195 outfile = self.targetXMLtargetXML[0]
196 outline = self.nodesetnodeset.toxml()
197 for qualifier in self.namespaceQualifiersnamespaceQualifiers:
198 rq = qualifier+":"
199 outline = outline.replace(rq, "")
200 os.write(outfile, outline.encode('UTF-8'))
201 os.close(outfile)
202
203 def reassignReferencedNamespaceId(self, currentNsId, newNsId):
204 """ reassignReferencedNamespaceId
205
206 Iterates over all references in this document, find references to currentNsId and changes them to newNsId.
207 NodeIds themselves are not altered.
208
209 returns: nothing
210 """
211 for refNd in self.referencedNodesreferencedNodes:
212 if refNd[0].ns == currentNsId:
213 refNd[1].firstChild.data = refNd[1].firstChild.data.replace("ns="+str(currentNsId), "ns="+str(newNsId))
214 refNd[0].ns = newNsId
215 refNd[0].toString()
216
217 def reassignNamespaceId(self, currentNsId, newNsId):
218 """ reassignNamespaceId
219
220 Iterates over all nodes in this document, find those in namespace currentNsId and changes them to newNsId.
221
222 returns: nothing
223 """
224
225 #change ids in aliases
226 ns = self.nodesetnodeset.getElementsByTagName("Alias")
227 for al in ns:
228 if al.nodeType == al.ELEMENT_NODE:
229 if al.hasAttribute("Alias"):
230 al.firstChild.data = al.firstChild.data.replace("ns=" + str(currentNsId), "ns=" + str(newNsId))
231
232 logger.debug("Migrating nodes /w ns index " + str(currentNsId) + " to " + str(newNsId))
233 for nd in self.containedNodescontainedNodes:
234 if nd[0].ns == currentNsId:
235 # In our own document, update any references to this node
236 for refNd in self.referencedNodesreferencedNodes:
237 if refNd[0].ns == currentNsId and refNd[0] == nd[0]:
238 refNd[1].firstChild.data = refNd[1].firstChild.data.replace("ns="+str(currentNsId), "ns="+str(newNsId))
239 refNd[0].ns = newNsId
240 refNd[0].toString()
241 nd[1].setAttribute(u'NodeId', nd[1].getAttribute(u'NodeId').replace("ns="+str(currentNsId), "ns="+str(newNsId)))
242 nd[0].ns = newNsId
243 nd[0].toString()
244
246 preProcDocuments = []
247
251 def addDocument(self, documentPath):
253
255 for doc in self.preProcDocumentspreProcDocuments:
256 doc.clean()
257
259 files = []
260 for doc in self.preProcDocumentspreProcDocuments:
261 if (doc.parseOK):
262 files.append(doc.getTargetXMLName())
263 return files
264
266 """ testModelCongruencyAgainstReferences
267
268 Counts how many of the nodes referencef in refs can be found in the model
269 doc.
270
271 returns: double corresponding to the percentage of hits
272 """
273 sspace = len(refs)
274 if sspace == 0:
275 return float(0)
276 found = 0
277 for ref in refs:
278 for n in doc.containedNodes:
279 if str(ref) == str(n[0]):
280 print(ref, n[0])
281 found = found + 1
282 break
283 return float(found)/float(sspace)
284
286 nsdep = []
287 docLst = []
288 # Search for namespace 0('s) - plural possible if user is overwriting NS0 defaults
289 # Remove them from the list of namespaces, zero does not get demangled
290 for doc in self.preProcDocumentspreProcDocuments:
291 if doc.getNamespaceId() == 0:
292 docLst.append(doc)
293 for doc in docLst:
294 self.preProcDocumentspreProcDocuments.remove(doc)
295
296 # Reassign namespace id's to be in ascending order
297 nsidx = 1 # next namespace id to assign on collision (first one will be "2")
298 for doc in self.preProcDocumentspreProcDocuments:
299 nsidx = nsidx + 1
300 nsid = doc.getNamespaceId()
301 doc.reassignNamespaceId(nsid, nsidx)
302 docLst.append(doc)
303 logger.info("Document " + doc.originXML + " is now namespace " + str(nsidx))
305
307 """ getUsedNamespaceArrayNames
308
309 Returns the XML xmlns:s1 or <URI>[0] of each XML document (if contained/possible)
310
311 returns: dict of int:nsId -> string:url
312 """
313 nsName = {}
314 for doc in self.preProcDocumentspreProcDocuments:
315 uri = doc.getReferencedNamespaceUri(1)
316 if uri == None:
317 uri = "http://modeluri.not/retrievable/from/xml"
318 nsName[doc.getNamespaceId()] = doc.getReferencedNamespaceUri(1)
319 return nsName
320
322 revertToStochastic = [] # (doc, int id), where id was not resolvable using model URIs
323
324 # Attemp to identify the model relations by using model URIs in xmlns:sX or <URI> contents
325 for doc in self.preProcDocumentspreProcDocuments:
326 nsid = doc.getNamespaceId()
327 dependencies = doc.getNamespaceDependencies()
328 for d in dependencies:
329 if d != nsid and d != 0:
330 # Attempt to identify the namespace URI this d referes to...
331 nsUri = doc.getReferencedNamespaceUri(d) # FIXME: This could actually fail and return ""!
332 logger.info("Need a namespace referenced as " + str(d) + ". Which hopefully is " + nsUri)
333 targetDoc = None
334 for tgt in self.preProcDocumentspreProcDocuments:
335 # That model, whose URI is known but its current id is not, will
336 # refer have referred to itself as "1"
337 if tgt.getReferencedNamespaceUri(1) == nsUri:
338 targetDoc = tgt
339 break
340 if not targetDoc == None:
341 # Found the model... relink the references
342 doc.reassignReferencedNamespaceId(d, targetDoc.getNamespaceId())
343 continue
344 else:
345 revertToStochastic.append((doc, d))
346 logger.warn("Failed to reliably identify which XML/Model " + os.path.basename(doc.originXML) + " calls ns=" +str(d))
347
348 for (doc, d) in revertToStochastic:
349 logger.warn("Attempting to find stochastic match for target namespace ns=" + str(d) + " of " + os.path.basename(doc.originXML))
350 # Copy all references to the given namespace
351 refs = []
352 matches = [] # list of (match%, targetDoc) to pick from later
353 for ref in doc.referencedNodes:
354 if ref[0].ns == d:
355 refs.append(opcua_node_id_t(str(ref[0])))
356 for tDoc in self.preProcDocumentspreProcDocuments:
357 tDocId = tDoc.getNamespaceId()
358 # Scenario: If these references did target this documents namespace...
359 for r in refs:
360 r.ns = tDocId
361 r.toString()
362 # ... how many of them would be found!?
363 c = self.testModelCongruencyAgainstReferences(tDoc, refs)
364 print(c)
365 if c>0:
366 matches.append((c, tDoc))
367 best = (0, None)
368 for m in matches:
369 print(m[0])
370 if m[0] > best[0]:
371 best = m
372 if best[1] != None:
373 logger.warn("Best match (" + str(best[1]*100) + "%) for what " + os.path.basename(doc.originXML) + " refers to as ns="+str(d)+" was " + os.path.basename(best[1].originXML))
374 doc.reassignReferencedNamespaceId(d, best[1].getNamespaceId())
375 else:
376 logger.error("Failed to find a match for what " + os.path.basename(doc.originXML) + " refers to as ns=" + str(d))
377
378 def preprocessAll(self):
379
381 for doc in self.preProcDocumentspreProcDocuments:
382 doc.analyze()
383
384 # Preprocess step: Remove XML specific Naming scheme ("uax:")
385 # FIXME: Not implemented
386
387
392
393
394
397
398
400 for doc in self.preProcDocumentspreProcDocuments:
401 doc.finalize()
402
403 return True
preprocessAll(self)
First: Gather statistics about the namespaces:
#define str(a)