/* * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved. */ /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* * $Id: DOMHelper.java,v 1.2.4.1 2005/09/15 08:15:40 suresh_emailid Exp $ */ package com.sun.org.apache.xml.internal.utils; import com.sun.org.apache.xml.internal.dtm.ref.DTMNodeProxy; import com.sun.org.apache.xml.internal.res.XMLErrorResources; import com.sun.org.apache.xml.internal.res.XMLMessages; import java.util.HashMap; import java.util.Map; import java.util.Vector; import javax.xml.XMLConstants; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; import org.w3c.dom.Attr; import org.w3c.dom.DOMImplementation; import org.w3c.dom.Document; import org.w3c.dom.DocumentType; import org.w3c.dom.Element; import org.w3c.dom.Entity; import org.w3c.dom.NamedNodeMap; import org.w3c.dom.Node; import org.w3c.dom.Text; /** * @deprecated Since the introduction of the DTM, this class will be removed. * This class provides a front-end to DOM implementations, providing * a number of utility functions that either aren't yet standardized * by the DOM spec or that are defined in optional DOM modules and * hence may not be present in all DOMs. */ public class DOMHelper { /** * DOM Level 1 did not have a standard mechanism for creating a new * Document object. This function provides a DOM-implementation-independent * abstraction for that for that concept. It's typically used when * outputting a new DOM as the result of an operation. *
* TODO: This isn't directly compatable with DOM Level 2. * The Level 2 createDocument call also creates the root * element, and thus requires that you know what that element will be * before creating the Document. We should think about whether we want * to change this code, and the callers, so we can use the DOM's own * method. (It's also possible that DOM Level 3 may relax this * sequence, but you may give up some intelligence in the DOM by * doing so; the intent was that knowing the document type and root * element might let the DOM automatically switch to a specialized * subclass for particular kinds of documents.) * * @param isSecureProcessing state of the secure processing feature. * @return The newly created DOM Document object, with no children, or * null if we can't find a DOM implementation that permits creating * new empty Documents. */ public static Document createDocument(boolean isSecureProcessing) { try { // Use an implementation of the JAVA API for XML Parsing 1.0 to // create a DOM Document node to contain the result. DocumentBuilderFactory dfactory = DocumentBuilderFactory.newInstance(); dfactory.setNamespaceAware(true); dfactory.setValidating(true); if (isSecureProcessing) { try { dfactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); } catch (ParserConfigurationException pce) {} } DocumentBuilder docBuilder = dfactory.newDocumentBuilder(); Document outNode = docBuilder.newDocument(); return outNode; } catch (ParserConfigurationException pce) { throw new RuntimeException( XMLMessages.createXMLMessage( XMLErrorResources.ER_CREATEDOCUMENT_NOT_SUPPORTED, null)); //"createDocument() not supported in XPathContext!"); // return null; } } /** * DOM Level 1 did not have a standard mechanism for creating a new * Document object. This function provides a DOM-implementation-independent * abstraction for that for that concept. It's typically used when * outputting a new DOM as the result of an operation. * * @return The newly created DOM Document object, with no children, or * null if we can't find a DOM implementation that permits creating * new empty Documents. */ public static Document createDocument() { return createDocument(false); } /** * Tells, through the combination of the default-space attribute * on xsl:stylesheet, xsl:strip-space, xsl:preserve-space, and the * xml:space attribute, whether or not extra whitespace should be stripped * from the node. Literal elements from template elements should * not be tested with this function. * @param textNode A text node from the source tree. * @return true if the text node should be stripped of extra whitespace. * * @throws javax.xml.transform.TransformerException * @xsl.usage advanced */ public boolean shouldStripSourceNode(Node textNode) throws javax.xml.transform.TransformerException { // return (null == m_envSupport) ? false : m_envSupport.shouldStripSourceNode(textNode); return false; } /** * Supports the XPath function GenerateID by returning a unique * identifier string for any given DOM Node. *
* Warning: The base implementation uses the Node object's hashCode(), * which is NOT guaranteed to be unique. If that method hasn't been * overridden in this DOM ipmlementation, most Java implementions will * derive it from the object's address and should be OK... but if * your DOM uses a different definition of hashCode (eg hashing the * contents of the subtree), or if your DOM may have multiple objects * that represent a single Node in the data structure (eg via proxying), * you may need to find another way to assign a unique identifier. *
* Also, be aware that if nodes are destroyed and recreated, there is * an open issue regarding whether an ID may be reused. Currently * we're assuming that the input document is stable for the duration * of the XPath/XSLT operation, so this shouldn't arise in this context. *
* (DOM Level 3 is investigating providing a unique node "key", but * that won't help Level 1 and Level 2 implementations.) * * @param node whose identifier you want to obtain * * @return a string which should be different for every Node object. */ public String getUniqueID(Node node) { return "N" + Integer.toHexString(node.hashCode()).toUpperCase(); } /** * Figure out whether node2 should be considered as being later * in the document than node1, in Document Order as defined * by the XPath model. This may not agree with the ordering defined * by other XML applications. *
* There are some cases where ordering isn't defined, and neither are
* the results of this function -- though we'll generally return true.
*
* TODO: Make sure this does the right thing with attribute nodes!!!
*
* @param node1 DOM Node to perform position comparison on.
* @param node2 DOM Node to perform position comparison on .
*
* @return false if node2 comes before node1, otherwise return true.
* You can think of this as
* (node1.documentOrderPosition <= node2.documentOrderPosition)
.
*/
public static boolean isNodeAfter(Node node1, Node node2)
{
if (node1 == node2 || isNodeTheSame(node1, node2))
return true;
// Default return value, if there is no defined ordering
boolean isNodeAfter = true;
Node parent1 = getParentOfNode(node1);
Node parent2 = getParentOfNode(node2);
// Optimize for most common case
if (parent1 == parent2 || isNodeTheSame(parent1, parent2)) // then we know they are siblings
{
if (null != parent1)
isNodeAfter = isNodeAfterSibling(parent1, node1, node2);
else
{
// If both parents are null, ordering is not defined.
// We're returning a value in lieu of throwing an exception.
// Not a case we expect to arise in XPath, but beware if you
// try to reuse this method.
// We can just fall through in this case, which allows us
// to hit the debugging code at the end of the function.
//return isNodeAfter;
}
}
else
{
// General strategy: Figure out the lengths of the two
// ancestor chains, reconcile the lengths, and look for
// the lowest common ancestor. If that ancestor is one of
// the nodes being compared, it comes before the other.
// Otherwise perform a sibling compare.
//
// NOTE: If no common ancestor is found, ordering is undefined
// and we return the default value of isNodeAfter.
// Count parents in each ancestor chain
int nParents1 = 2, nParents2 = 2; // include node & parent obtained above
while (parent1 != null)
{
nParents1++;
parent1 = getParentOfNode(parent1);
}
while (parent2 != null)
{
nParents2++;
parent2 = getParentOfNode(parent2);
}
// Initially assume scan for common ancestor starts with
// the input nodes.
Node startNode1 = node1, startNode2 = node2;
// If one ancestor chain is longer, adjust its start point
// so we're comparing at the same depths
if (nParents1 < nParents2)
{
// Adjust startNode2 to depth of startNode1
int adjust = nParents2 - nParents1;
for (int i = 0; i < adjust; i++)
{
startNode2 = getParentOfNode(startNode2);
}
}
else if (nParents1 > nParents2)
{
// adjust startNode1 to depth of startNode2
int adjust = nParents1 - nParents2;
for (int i = 0; i < adjust; i++)
{
startNode1 = getParentOfNode(startNode1);
}
}
Node prevChild1 = null, prevChild2 = null; // so we can "back up"
// Loop up the ancestor chain looking for common parent
while (null != startNode1)
{
if (startNode1 == startNode2 || isNodeTheSame(startNode1, startNode2)) // common parent?
{
if (null == prevChild1) // first time in loop?
{
// Edge condition: one is the ancestor of the other.
isNodeAfter = (nParents1 < nParents2) ? true : false;
break; // from while loop
}
else
{
// Compare ancestors below lowest-common as siblings
isNodeAfter = isNodeAfterSibling(startNode1, prevChild1,
prevChild2);
break; // from while loop
}
} // end if(startNode1 == startNode2)
// Move up one level and try again
prevChild1 = startNode1;
startNode1 = getParentOfNode(startNode1);
prevChild2 = startNode2;
startNode2 = getParentOfNode(startNode2);
} // end while(parents exist to examine)
} // end big else (not immediate siblings)
// WARNING: The following diagnostic won't report the early
// "same node" case. Fix if/when needed.
/* -- please do not remove... very useful for diagnostics --
System.out.println("node1 = "+node1.getNodeName()+"("+node1.getNodeType()+")"+
", node2 = "+node2.getNodeName()
+"("+node2.getNodeType()+")"+
", isNodeAfter = "+isNodeAfter); */
return isNodeAfter;
} // end isNodeAfter(Node node1, Node node2)
/**
* Use DTMNodeProxy to determine whether two nodes are the same.
*
* @param node1 The first DOM node to compare.
* @param node2 The second DOM node to compare.
* @return true if the two nodes are the same.
*/
public static boolean isNodeTheSame(Node node1, Node node2)
{
if (node1 instanceof DTMNodeProxy && node2 instanceof DTMNodeProxy)
return ((DTMNodeProxy)node1).equals((DTMNodeProxy)node2);
else
return (node1 == node2);
}
/**
* Figure out if child2 is after child1 in document order.
*
* Warning: Some aspects of "document order" are not well defined.
* For example, the order of attributes is considered
* meaningless in XML, and the order reported by our model will
* be consistant for a given invocation but may not
* match that of either the source file or the serialized output.
*
* @param parent Must be the parent of both child1 and child2.
* @param child1 Must be the child of parent and not equal to child2.
* @param child2 Must be the child of parent and not equal to child1.
* @return true if child 2 is after child1 in document order.
*/
private static boolean isNodeAfterSibling(Node parent, Node child1,
Node child2)
{
boolean isNodeAfterSibling = false;
short child1type = child1.getNodeType();
short child2type = child2.getNodeType();
if ((Node.ATTRIBUTE_NODE != child1type)
&& (Node.ATTRIBUTE_NODE == child2type))
{
// always sort attributes before non-attributes.
isNodeAfterSibling = false;
}
else if ((Node.ATTRIBUTE_NODE == child1type)
&& (Node.ATTRIBUTE_NODE != child2type))
{
// always sort attributes before non-attributes.
isNodeAfterSibling = true;
}
else if (Node.ATTRIBUTE_NODE == child1type)
{
NamedNodeMap children = parent.getAttributes();
int nNodes = children.getLength();
boolean found1 = false, found2 = false;
// Count from the start until we find one or the other.
for (int i = 0; i < nNodes; i++)
{
Node child = children.item(i);
if (child1 == child || isNodeTheSame(child1, child))
{
if (found2)
{
isNodeAfterSibling = false;
break;
}
found1 = true;
}
else if (child2 == child || isNodeTheSame(child2, child))
{
if (found1)
{
isNodeAfterSibling = true;
break;
}
found2 = true;
}
}
}
else
{
// TODO: Check performance of alternate solution:
// There are two choices here: Count from the start of
// the document until we find one or the other, or count
// from one until we find or fail to find the other.
// Either can wind up scanning all the siblings in the worst
// case, which on a wide document can be a lot of work but
// is more typically is a short list.
// Scanning from the start involves two tests per iteration,
// but it isn't clear that scanning from the middle doesn't
// yield more iterations on average.
// We should run some testcases.
Node child = parent.getFirstChild();
boolean found1 = false, found2 = false;
while (null != child)
{
// Node child = children.item(i);
if (child1 == child || isNodeTheSame(child1, child))
{
if (found2)
{
isNodeAfterSibling = false;
break;
}
found1 = true;
}
else if (child2 == child || isNodeTheSame(child2, child))
{
if (found1)
{
isNodeAfterSibling = true;
break;
}
found2 = true;
}
child = child.getNextSibling();
}
}
return isNodeAfterSibling;
} // end isNodeAfterSibling(Node parent, Node child1, Node child2)
//==========================================================
// SECTION: Namespace resolution
//==========================================================
/**
* Get the depth level of this node in the tree (equals 1 for
* a parentless node).
*
* @param n Node to be examined.
* @return the number of ancestors, plus one
* @xsl.usage internal
*/
public short getLevel(Node n)
{
short level = 1;
while (null != (n = getParentOfNode(n)))
{
level++;
}
return level;
}
/**
* Given an XML Namespace prefix and a context in which the prefix
* is to be evaluated, return the Namespace Name this prefix was
* bound to. Note that DOM Level 3 is expected to provide a version of
* this which deals with the DOM's "early binding" behavior.
*
* Default handling:
*
* @param prefix String containing namespace prefix to be resolved,
* without the ':' which separates it from the localname when used
* in a Node Name. The empty sting signifies the default namespace
* at this point in the document.
* @param namespaceContext Element which provides context for resolution.
* (We could extend this to work for other nodes by first seeking their
* nearest Element ancestor.)
*
* @return a String containing the Namespace URI which this prefix
* represents in the specified context.
*/
public String getNamespaceForPrefix(String prefix, Element namespaceContext)
{
int type;
Node parent = namespaceContext;
String namespace = null;
if (prefix.equals("xml"))
{
namespace = QName.S_XMLNAMESPACEURI; // Hardcoded, per Namespace spec
}
else if(prefix.equals("xmlns"))
{
// Hardcoded in the DOM spec, expected to be adopted by
// Namespace spec. NOTE: Namespace declarations _must_ use
// the xmlns: prefix; other prefixes declared as belonging
// to this namespace will not be recognized and should
// probably be rejected by parsers as erroneous declarations.
namespace = "http://www.w3.org/2000/xmlns/";
}
else
{
// Attribute name for this prefix's declaration
String declname=(prefix=="")
? "xmlns"
: "xmlns:"+prefix;
// Scan until we run out of Elements or have resolved the namespace
while ((null != parent) && (null == namespace)
&& (((type = parent.getNodeType()) == Node.ELEMENT_NODE)
|| (type == Node.ENTITY_REFERENCE_NODE)))
{
if (type == Node.ELEMENT_NODE)
{
// Look for the appropriate Namespace Declaration attribute,
// either "xmlns:prefix" or (if prefix is "") "xmlns".
// TODO: This does not handle "implicit declarations"
// which may be created when the DOM is edited. DOM Level
// 3 will define how those should be interpreted. But
// this issue won't arise in freshly-parsed DOMs.
// NOTE: declname is set earlier, outside the loop.
Attr attr=((Element)parent).getAttributeNode(declname);
if(attr!=null)
{
namespace = attr.getNodeValue();
break;
}
}
parent = getParentOfNode(parent);
}
}
return namespace;
}
/**
* An experiment for the moment.
*/
Map
* TODO: This doesn't handle DocumentFragments or "orphaned" subtrees
* -- it's currently returning ownerDocument even when the tree is
* not actually part of the main Document tree. We should either
* rewrite the description to say that it finds the Document node,
* or change the code to walk up the ancestor chain.
*
* @param n Node to be examined
*
* @return the Document node. Note that this is not the correct answer
* if n was (or was a child of) a DocumentFragment or an orphaned node,
* as can arise if the DOM has been edited rather than being generated
* by a parser.
*/
public Node getRootNode(Node n)
{
int nt = n.getNodeType();
return ( (Node.DOCUMENT_NODE == nt) || (Node.DOCUMENT_FRAGMENT_NODE == nt) )
? n : n.getOwnerDocument();
}
/**
* Test whether the given node is a namespace decl node. In DOM Level 2
* this can be done in a namespace-aware manner, but in Level 1 DOMs
* it has to be done by testing the node name.
*
* @param n Node to be examined.
*
* @return boolean -- true iff the node is an Attr whose name is
* "xmlns" or has the "xmlns:" prefix.
*/
public boolean isNamespaceNode(Node n)
{
if (Node.ATTRIBUTE_NODE == n.getNodeType())
{
String attrName = n.getNodeName();
return (attrName.startsWith("xmlns:") || attrName.equals("xmlns"));
}
return false;
}
/**
* Obtain the XPath-model parent of a DOM node -- ownerElement for Attrs,
* parent for other nodes.
*
* Background: The DOM believes that you must be your Parent's
* Child, and thus Attrs don't have parents. XPath said that Attrs
* do have their owning Element as their parent. This function
* bridges the difference, either by using the DOM Level 2 ownerElement
* function or by using a "silly and expensive function" in Level 1
* DOMs.
*
* (There's some discussion of future DOMs generalizing ownerElement
* into ownerNode and making it work on all types of nodes. This
* still wouldn't help the users of Level 1 or Level 2 DOMs)
*
*
* @param node Node whose XPath parent we want to obtain
*
* @return the parent of the node, or the ownerElement if it's an
* Attr node, or null if the node is an orphan.
*
* @throws RuntimeException if the Document has no root element.
* This can't arise if the Document was created
* via the DOM Level 2 factory methods, but is possible if other
* mechanisms were used to obtain it
*/
public static Node getParentOfNode(Node node) throws RuntimeException
{
Node parent;
short nodeType = node.getNodeType();
if (Node.ATTRIBUTE_NODE == nodeType)
{
Document doc = node.getOwnerDocument();
/*
TBD:
if(null == doc)
{
throw new RuntimeException(XSLMessages.createXPATHMessage(XPATHErrorResources.ER_CHILD_HAS_NO_OWNER_DOCUMENT, null));//"Attribute child does not have an owner document!");
}
*/
// Given how expensive the tree walk may be, we should first ask
// whether this DOM can answer the question for us. The additional
// test does slow down Level 1 DOMs slightly. DOMHelper2, which
// is currently specialized for Xerces, assumes it can use the
// Level 2 solution. We might want to have an intermediate stage,
// which would assume DOM Level 2 but not assume Xerces.
//
// (Shouldn't have to check whether impl is null in a compliant DOM,
// but let's be paranoid for a moment...)
DOMImplementation impl=doc.getImplementation();
if(impl!=null && impl.hasFeature("Core","2.0"))
{
parent=((Attr)node).getOwnerElement();
return parent;
}
// DOM Level 1 solution, as fallback. Hugely expensive.
Element rootElem = doc.getDocumentElement();
if (null == rootElem)
{
throw new RuntimeException(
XMLMessages.createXMLMessage(
XMLErrorResources.ER_CHILD_HAS_NO_OWNER_DOCUMENT_ELEMENT,
null)); //"Attribute child does not have an owner document element!");
}
parent = locateAttrParent(rootElem, node);
}
else
{
parent = node.getParentNode();
// if((Node.DOCUMENT_NODE != nodeType) && (null == parent))
// {
// throw new RuntimeException("Child does not have parent!");
// }
}
return parent;
}
/**
* Given an ID, return the element. This can work only if the document
* is interpreted in the context of a DTD or Schema, since otherwise
* we don't know which attributes are or aren't IDs.
*
* Note that DOM Level 1 had no ability to retrieve this information.
* DOM Level 2 introduced it but does not promise that it will be
* supported in all DOMs; those which can't support it will always
* return null.
*
* TODO: getElementByID is currently unimplemented. Support DOM Level 2?
*
* @param id The unique identifier to be searched for.
* @param doc The document to search within.
* @return CURRENTLY HARDCODED TO NULL, but it should be:
* The node which has this unique identifier, or null if there
* is no such node or this DOM can't reliably recognize it.
*/
public Element getElementByID(String id, Document doc)
{
return null;
}
/**
* The getUnparsedEntityURI function returns the URI of the unparsed
* entity with the specified name in the same document as the context
* node (see [3.3 Unparsed Entities]). It returns the empty string if
* there is no such entity.
*
* XML processors may choose to use the System Identifier (if one
* is provided) to resolve the entity, rather than the URI in the
* Public Identifier. The details are dependent on the processor, and
* we would have to support some form of plug-in resolver to handle
* this properly. Currently, we simply return the System Identifier if
* present, and hope that it a usable URI or that our caller can
* map it to one.
* TODO: Resolve Public Identifiers... or consider changing function name.
*
* If we find a relative URI
* reference, XML expects it to be resolved in terms of the base URI
* of the document. The DOM doesn't do that for us, and it isn't
* entirely clear whether that should be done here; currently that's
* pushed up to a higher levelof our application. (Note that DOM Level
* 1 didn't store the document's base URI.)
* TODO: Consider resolving Relative URIs.
*
* (The DOM's statement that "An XML processor may choose to
* completely expand entities before the structure model is passed
* to the DOM" refers only to parsed entities, not unparsed, and hence
* doesn't affect this function.)
*
* @param name A string containing the Entity Name of the unparsed
* entity.
* @param doc Document node for the document to be searched.
*
* @return String containing the URI of the Unparsed Entity, or an
* empty string if no such entity exists.
*/
public String getUnparsedEntityURI(String name, Document doc)
{
String url = "";
DocumentType doctype = doc.getDoctype();
if (null != doctype)
{
NamedNodeMap entities = doctype.getEntities();
if(null == entities)
return url;
Entity entity = (Entity) entities.getNamedItem(name);
if(null == entity)
return url;
String notationName = entity.getNotationName();
if (null != notationName) // then it's unparsed
{
// The draft says: "The XSLT processor may use the public
// identifier to generate a URI for the entity instead of the URI
// specified in the system identifier. If the XSLT processor does
// not use the public identifier to generate the URI, it must use
// the system identifier; if the system identifier is a relative
// URI, it must be resolved into an absolute URI using the URI of
// the resource containing the entity declaration as the base
// URI [RFC2396]."
// So I'm falling a bit short here.
url = entity.getSystemId();
if (null == url)
{
url = entity.getPublicId();
}
else
{
// This should be resolved to an absolute URL, but that's hard
// to do from here.
}
}
}
return url;
}
/**
* Support for getParentOfNode; walks a DOM tree until it finds
* the Element which owns the Attr. This is hugely expensive, and
* if at all possible you should use the DOM Level 2 Attr.ownerElement()
* method instead.
*
* The DOM Level 1 developers expected that folks would keep track
* of the last Element they'd seen and could recover the info from
* that source. Obviously that doesn't work very well if the only
* information you've been presented with is the Attr. The DOM Level 2
* getOwnerElement() method fixes that, but only for Level 2 and
* later DOMs.
*
* @param elem Element whose subtree is to be searched for this Attr
* @param attr Attr whose owner is to be located.
*
* @return the first Element whose attribute list includes the provided
* attr. In modern DOMs, this will also be the only such Element. (Early
* DOMs had some hope that Attrs might be sharable, but this idea has
* been abandoned.)
*/
private static Node locateAttrParent(Element elem, Node attr)
{
Node parent = null;
// This should only be called for Level 1 DOMs, so we don't have to
// worry about namespace issues. In later levels, it's possible
// for a DOM to have two Attrs with the same NodeName but
// different namespaces, and we'd need to get getAttributeNodeNS...
// but later levels also have Attr.getOwnerElement.
Attr check=elem.getAttributeNode(attr.getNodeName());
if(check==attr)
parent = elem;
if (null == parent)
{
for (Node node = elem.getFirstChild(); null != node;
node = node.getNextSibling())
{
if (Node.ELEMENT_NODE == node.getNodeType())
{
parent = locateAttrParent((Element) node, attr);
if (null != parent)
break;
}
}
}
return parent;
}
/**
* The factory object used for creating nodes
* in the result tree.
*/
protected Document m_DOMFactory = null;
/**
* Store the factory object required to create DOM nodes
* in the result tree. In fact, that's just the result tree's
* Document node...
*
* @param domFactory The DOM Document Node within whose context
* the result tree will be built.
*/
public void setDOMFactory(Document domFactory)
{
this.m_DOMFactory = domFactory;
}
/**
* Retrieve the factory object required to create DOM nodes
* in the result tree.
*
* @return The result tree's DOM Document Node.
*/
public Document getDOMFactory()
{
if (null == this.m_DOMFactory)
{
this.m_DOMFactory = createDocument();
}
return this.m_DOMFactory;
}
/**
* Get the textual contents of the node. See
* getNodeData(Node,FastStringBuffer) for discussion of how
* whitespace nodes are handled.
*
* @param node DOM Node to be examined
* @return String containing a concatenation of all the
* textual content within that node.
* @see #getNodeData(Node,FastStringBuffer)
*
*/
public static String getNodeData(Node node)
{
FastStringBuffer buf = StringBufferPool.get();
String s;
try
{
getNodeData(node, buf);
s = (buf.length() > 0) ? buf.toString() : "";
}
finally
{
StringBufferPool.free(buf);
}
return s;
}
/**
* Retrieve the text content of a DOM subtree, appending it into a
* user-supplied FastStringBuffer object. Note that attributes are
* not considered part of the content of an element.
*
* There are open questions regarding whitespace stripping.
* Currently we make no special effort in that regard, since the standard
* DOM doesn't yet provide DTD-based information to distinguish
* whitespace-in-element-context from genuine #PCDATA. Note that we
* should probably also consider xml:space if/when we address this.
* DOM Level 3 may solve the problem for us.
*
* @param node Node whose subtree is to be walked, gathering the
* contents of all Text or CDATASection nodes.
* @param buf FastStringBuffer into which the contents of the text
* nodes are to be concatenated.
*/
public static void getNodeData(Node node, FastStringBuffer buf)
{
switch (node.getNodeType())
{
case Node.DOCUMENT_FRAGMENT_NODE :
case Node.DOCUMENT_NODE :
case Node.ELEMENT_NODE :
{
for (Node child = node.getFirstChild(); null != child;
child = child.getNextSibling())
{
getNodeData(child, buf);
}
}
break;
case Node.TEXT_NODE :
case Node.CDATA_SECTION_NODE :
buf.append(node.getNodeValue());
break;
case Node.ATTRIBUTE_NODE :
buf.append(node.getNodeValue());
break;
case Node.PROCESSING_INSTRUCTION_NODE :
// warning(XPATHErrorResources.WG_PARSING_AND_PREPARING);
break;
default :
// ignore
break;
}
}
}