Logo Search packages:      
Sourcecode: zope-parsedxml version File versions

test_parser.py

##############################################################################
# 
# Zope Public License (ZPL) Version 1.0
# -------------------------------------
# 
# Copyright (c) Digital Creations.  All rights reserved.
# 
# This license has been certified as Open Source(tm).
# 
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are
# met:
# 
# 1. Redistributions in source code must retain the above copyright
#    notice, this list of conditions, and the following disclaimer.
# 
# 2. Redistributions in binary form must reproduce the above copyright
#    notice, this list of conditions, and the following disclaimer in
#    the documentation and/or other materials provided with the
#    distribution.
# 
# 3. Digital Creations requests that attribution be given to Zope
#    in any manner possible. Zope includes a "Powered by Zope"
#    button that is installed by default. While it is not a license
#    violation to remove this button, it is requested that the
#    attribution remain. A significant investment has been put
#    into Zope, and this effort will continue if the Zope community
#    continues to grow. This is one way to assure that growth.
# 
# 4. All advertising materials and documentation mentioning
#    features derived from or use of this software must display
#    the following acknowledgement:
# 
#      "This product includes software developed by Digital Creations
#      for use in the Z Object Publishing Environment
#      (http://www.zope.org/)."
# 
#    In the event that the product being advertised includes an
#    intact Zope distribution (with copyright and license included)
#    then this clause is waived.
# 
# 5. Names associated with Zope or Digital Creations must not be used to
#    endorse or promote products derived from this software without
#    prior written permission from Digital Creations.
# 
# 6. Modified redistributions of any form whatsoever must retain
#    the following acknowledgment:
# 
#      "This product includes software developed by Digital Creations
#      for use in the Z Object Publishing Environment
#      (http://www.zope.org/)."
# 
#    Intact (re-)distributions of any official Zope release do not
#    require an external acknowledgement.
# 
# 7. Modifications are encouraged but must be packaged separately as
#    patches to official Zope releases.  Distributions that do not
#    clearly separate the patches from the original work must be clearly
#    labeled as unofficial distributions.  Modifications which do not
#    carry the name Zope may be packaged in any form, as long as they
#    conform to all of the clauses above.
# 
# 
# Disclaimer
# 
#   THIS SOFTWARE IS PROVIDED BY DIGITAL CREATIONS ``AS IS'' AND ANY
#   EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
#   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
#   PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL DIGITAL CREATIONS OR ITS
#   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
#   USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
#   ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
#   OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
#   OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
#   SUCH DAMAGE.
# 
# 
# This software consists of contributions made by Digital Creations and
# many individuals on behalf of Digital Creations.  Specific
# attributions are listed in the accompanying credits file.
# 
##############################################################################

import unittest
import ZODB # for Persistent
import os
import string
import sys

from domapi.Base import checkAttribute

from Products.ParsedXML import ParsedXML, Printer, ExtraDOM, DOM
from Products.ParsedXML.StrIO import StringIO
# the printer is a convenient way to track changes made by the parser
from test_printer import printElement, checkOutput

class ParsedXMLTestCaseBase(unittest.TestCase):

    def shotgunParse(self, doc, namespaces = 1):
        "parse and print every node in the document"
        # first we parse a print of the document, to avoid errors from
        # comparing before and after infospace-lossyness
        docStr = StringIO(ExtraDOM.writeStream(doc).getvalue())
        doc = ExtraDOM.parseFile(doc, docStr, namespaces)
        from Products.ParsedXML.DOM.Traversal import NodeFilter
        nodes = []
        iterator = doc.createNodeIterator(doc, NodeFilter.SHOW_ALL, None, 0)
        # parse from the bottom up because parsing a node replaces children
        # it'd be simpler to not store a list of nodes, but this way we
        # miss possible iterator bugs
        while iterator.nextNode():
            pass
        node = iterator.previousNode()
        while node:
            if node.nodeType != DOM.Core.Node.DOCUMENT_TYPE_NODE:
                nodes.append(node)
            node = iterator.previousNode()
        # parse print of each node and compare printed doc, node.
        for node in nodes:
            docStrIn = ExtraDOM.writeStream(doc).getvalue()
            nodeStrIn = ExtraDOM.writeStream(node).getvalue()
            ExtraDOM.parseFile(node, StringIO(nodeStrIn), namespaces)
            nodeStrOut = ExtraDOM.writeStream(node).getvalue()
            docStrOut = ExtraDOM.writeStream(doc).getvalue()
            checkOutput(nodeStrIn, nodeStrOut,
                        "parsing print of node %s changed node print" % node)
            checkOutput(docStrIn, docStrOut,
                        "parsing print of node %s changed doc print" % node)

class ParseOasisXMLTestSaTestCase(ParsedXMLTestCaseBase):

    # set this to true to generate new output files
    generate = 0

    def checkParse001(self):
        self._checkParse("001.xml")
    def checkParse002(self):
        self._checkParse("002.xml")
    def checkParse003(self):
        self._checkParse("003.xml")
    def checkParse004(self):
        self._checkParse("004.xml")
    def checkParse005(self):
        self._checkParse("005.xml")
    def checkParse006(self):
        self._checkParse("006.xml")
    def checkParse007(self):
        self._checkParse("007.xml")
    def checkParse008(self):
        self._checkParse("008.xml")
    def checkParse009(self):
        self._checkParse("009.xml")
    def checkParse010(self):
        self._checkParse("010.xml")
    def checkParse011(self):
        self._checkParse("011.xml")
    # This test fails when we use namespaces to determine valid tagnames.
    # We want to parse namespaces always.
    #def checkParse012(self):
    #    self._checkParse("012.xml")
    def checkParse013(self):
        self._checkParse("013.xml")
    def checkParse014(self):
        self._checkParse("014.xml")
    def checkParse015(self):
        self._checkParse("015.xml")
    def checkParse016(self):
        self._checkParse("016.xml")
    def checkParse017(self):
        self._checkParse("017.xml")
    def checkParse018(self):
        self._checkParse("018.xml")
    def checkParse019(self):
        self._checkParse("019.xml")
    def checkParse020(self):
        self._checkParse("020.xml")
    def checkParse021(self):
        self._checkParse("021.xml")
    def checkParse022(self):
        self._checkParse("022.xml")
    def checkParse023(self):
        self._checkParse("023.xml")
    def checkParse024(self):
        self._checkParse("024.xml")
    def checkParse025(self):
        self._checkParse("025.xml")
    def checkParse026(self):
        self._checkParse("026.xml")
    def checkParse027(self):
        self._checkParse("027.xml")
    def checkParse028(self):
        self._checkParse("028.xml")
    def checkParse029(self):
        self._checkParse("029.xml")
    def checkParse030(self):
        self._checkParse("030.xml")
    def checkParse031(self):
        self._checkParse("031.xml")
    def checkParse032(self):
        self._checkParse("032.xml")
    def checkParse033(self):
        self._checkParse("033.xml")
    def checkParse034(self):
        self._checkParse("034.xml")
    def checkParse035(self):
        self._checkParse("035.xml")
    def checkParse036(self):
        self._checkParse("036.xml")
    def checkParse037(self):
        self._checkParse("037.xml")
    def checkParse038(self):
        self._checkParse("038.xml")
    def checkParse039(self):
        self._checkParse("039.xml")
    def checkParse040(self):
        self._checkParse("040.xml")
    def checkParse041(self):
        self._checkParse("041.xml")
    def checkParse042(self):
        self._checkParse("042.xml")
    def checkParse043(self):
        self._checkParse("043.xml")
    def checkParse044(self):
        self._checkParse("044.xml")
    def checkParse045(self):
        self._checkParse("045.xml")
    def checkParse046(self):
        self._checkParse("046.xml")
    def checkParse047(self):
        self._checkParse("047.xml")
    def checkParse048(self):
        self._checkParse("048.xml")
    def checkParse049(self):
        self._checkParse("049.xml")
    def checkParse050(self):
        self._checkParse("050.xml")
    # TODO: replace when we get unicode        
    #def checkParse051(self):
    #    self._checkParse("051.xml")
    def checkParse052(self):
        self._checkParse("052.xml")
    def checkParse053(self):
        self._checkParse("053.xml")
    def checkParse054(self):
        self._checkParse("054.xml")
    def checkParse055(self):
        self._checkParse("055.xml")
    def checkParse056(self):
        self._checkParse("056.xml")
    def checkParse057(self):
        self._checkParse("057.xml")
    def checkParse058(self):
        self._checkParse("058.xml")
    def checkParse059(self):
        self._checkParse("059.xml")
    def checkParse060(self):
        self._checkParse("060.xml")
    def checkParse061(self):
        self._checkParse("061.xml")
    def checkParse062(self):
        self._checkParse("062.xml")
    # TODO: replace when we get unicode
    #def checkParse063(self):
    #    self._checkParse("063.xml")
    def checkParse064(self):
        self._checkParse("064.xml")
    def checkParse065(self):
        self._checkParse("065.xml")
    def checkParse066(self):
        self._checkParse("066.xml")
    def checkParse067(self):
        self._checkParse("067.xml")
    def checkParse068(self):
        self._checkParse("068.xml")
    def checkParse069(self):
        self._checkParse("069.xml")
    def checkParse070(self):
        self._checkParse("070.xml")
    def checkParse071(self):
        self._checkParse("071.xml")
    def checkParse072(self):
        self._checkParse("072.xml")
    def checkParse073(self):
        self._checkParse("073.xml")
    def checkParse074(self):
        self._checkParse("074.xml")
    def checkParse075(self):
        self._checkParse("075.xml")
    def checkParse076(self):
        self._checkParse("076.xml")
    def checkParse077(self):
        self._checkParse("077.xml")
    def checkParse078(self):
        self._checkParse("078.xml")
    def checkParse079(self):
        self._checkParse("079.xml")
    def checkParse080(self):
        self._checkParse("080.xml")
    def checkParse081(self):
        self._checkParse("081.xml")
    def checkParse082(self):
        self._checkParse("082.xml")
    def checkParse083(self):
        self._checkParse("083.xml")
    def checkParse084(self):
        self._checkParse("084.xml")
    def checkParse085(self):
        self._checkParse("085.xml")
    def checkParse086(self):
        self._checkParse("086.xml")
    def checkParse087(self):
        self._checkParse("087.xml")
    def checkParse088(self):
        self._checkParse("088.xml")
    def checkParse089(self):
        self._checkParse("089.xml")
    def checkParse090(self):
        self._checkParse("090.xml")
    def checkParse091(self):
        self._checkParse("091.xml")
    def checkParse092(self):
        self._checkParse("092.xml")
    def checkParse093(self):
        self._checkParse("093.xml")
    def checkParse094(self):
        self._checkParse("094.xml")
    def checkParse095(self):
        self._checkParse("095.xml")
    def checkParse096(self):
        self._checkParse("096.xml")
    def checkParse097(self):
        self._checkParse("097.xml")
    def checkParse098(self):
        self._checkParse("098.xml")
    def checkParse099(self):
        self._checkParse("099.xml")
    def checkParse100(self):
        self._checkParse("100.xml")
    def checkParse101(self):
        self._checkParse("101.xml")
    def checkParse102(self):
        self._checkParse("102.xml")
    def checkParse103(self):
        self._checkParse("103.xml")
    def checkParse104(self):
        self._checkParse("104.xml")
    def checkParse105(self):
        self._checkParse("105.xml")
    def checkParse106(self):
        self._checkParse("106.xml")
    def checkParse107(self):
        self._checkParse("107.xml")
    def checkParse108(self):
        self._checkParse("108.xml")
    def checkParse109(self):
        self._checkParse("109.xml")
    def checkParse110(self):
        self._checkParse("110.xml")
    def checkParse111(self):
        self._checkParse("111.xml")
    def checkParse112(self):
        self._checkParse("112.xml")
    def checkParse113(self):
        self._checkParse("113.xml")
    def checkParse114(self):
        self._checkParse("114.xml")
    def checkParse115(self):
        self._checkParse("115.xml")
    def checkParse116(self):
        self._checkParse("116.xml")
    def checkParse117(self):
        self._checkParse("117.xml")
    def checkParse118(self):
        self._checkParse("118.xml")
    def checkParse119(self):
        self._checkParse("119.xml")

    def _checkParse(self, iterFileName):
        testDir = os.path.join(sys.modules['Products.ParsedXML'].__path__[0],
            'tests')
        saDir = os.path.join(testDir, 'xml', 'conf', 'xmltest', 'sa')
        outDir = os.path.join(saDir, 'ParsedXMLTestOut')

        # Read the test input
        inFilename = os.path.join(saDir, iterFileName)
        inFile = open(inFilename).read()
        doc = ParsedXML.ParsedXML('foo', inFile)

        # Read the output to test against
        outFilename = os.path.join(outDir, iterFileName)
        outFile = open(outFilename).read()
        # FIXME: if the next line is enabled, the tests will succeed
        # with python 2.1. Unfortunately the whole testsuite will
        # segfault at about test 23 (it'll vary)
        # outFile = unicode(outFile)
        
        # Print the DOM, and compare against the expected output.
        output = printElement(doc)
        
        # All line separators are supposed to normalize to Unix-style
        # (XML 1.0, section 2.11).
##         outFile = string.replace(outFile, "\r\n", "\n")
##         outFile = string.replace(outFile, "\r", "\n")

        if self.generate:
            # re-generate the expected output file, but only if it changed
            if outFile != output:
                fp = open(outFilename, "w")
                fp.write(output)
                fp.close()
        else:
            checkOutput(repr(outFile), repr(output))
            self.shotgunParse(doc.getDOMObj())

class ParseTestCase(ParsedXMLTestCaseBase):

    def checkParseException(self):
        "assert exception & exception args are correct"
        from xml.parsers import expat
        text = "<doc>\nfoobar<</doc>" # parse error line 2 column 7
        try:
            ParsedXML.ParsedXML('foo', text)
        except expat.error, e:
            assert e.lineno == 2, (
                "parse exception give wrong line number.  Wanted %s got %s"
                % (2, e.lineno))
            assert e.offset == 7, (
                "parse exception give wrong column number.  Wanted %s got %s"
                % (7, e.offset))
        else:
            assert 0, "parse of malformed XML doesn't raise properly"

    def checkSubnodeParseException(self):
        "assert exception & exception args are correct"        
        from xml.parsers import expat
        docText = "<doc><child/></doc>"
        subText = "<child>\nfoobar<</child>" # parse error line 2 column 7
        doc = ParsedXML.ParsedXML('foo', docText)
        try:
            doc.documentElement.firstChild.parseXML(StringIO(subText))
        except expat.error, e:
            assert e.lineno == 2, (
                "parse exception gives wrong line number.  Wanted %s, got %s"
                % (2, e.lineno))
            assert e.offset == 7, (
                "parse exception gives wrong offset.  Wanted %s, got %s"
                % (7, e.offset))
        else:
            assert 0, "parse of malformed XML doesn't raise properly"

class Lvl2ParseTestCase(ParsedXMLTestCaseBase):

    def checkNamespaceAttrOfDocumentElement(self):
        """we should be able to parse an element that uses a namespace
        declared on the element itself"""
        inStr = '<?xml version="1.0" ?>\n' \
                '<foo:bar xmlns:foo="uri:test_namespace"/>\n'
        doc = ParsedXML.ParsedXML('foo', inStr)
        self.shotgunParse(doc.getDOMObj())        

    def checkNamespaceAttrOfElement(self):
        """we should be able to parse an element that uses a namespace
        declared on the element itself"""
        inStr = '<?xml version="1.0" ?>\n' \
                '<doc><foo:bar xmlns:foo="uri:test_namespace"/></doc>\n'
        doc = ParsedXML.ParsedXML('foo', inStr)
        self.shotgunParse(doc.getDOMObj())        

    def checkSubnodeAncestorNamespace(self):
        """we should be able to parse a subtree that uses a namespace
        declared on an ancestor that we don't parse"""
        inStr = ('<?xml version="1.0" ?>\n'
                 '<doc><spam xmlns:spamNs="uri:test_namespace">'
                 '<eggs><spamNs:ham spamNs:foo="bar"/></eggs>'
                 '</spam></doc>\n')
        doc = ParsedXML.ParsedXML('foo', inStr)
        self.shotgunParse(doc.getDOMObj())

    def checkSubnodeParseXMLNamepsaceDecl(self):
        """Check that we can parse at a subnode with an xml ns decl attr, and
        that parsing a subnode's output doesn't change the document.
        The external entity parser that the fragment builder uses likes
        to add this namespace, so we want to make sure we don't break
        anything that uses it."""
        inStr = '<spam xmlns:xml="http://www.w3.org/XML/1998/namespace"/>'
        doc = ParsedXML.ParsedXML('foo', inStr)
        self.shotgunParse(doc.getDOMObj())

    def checkSubnodeParseXMLNamepsace(self):
        """Check that we can parse at a subnode with an xml ns attr, and
        that parsing a subnode's output doesn't change the document.
        The external entity parser that the fragment builder uses likes
        to add this namespace, so we want to make sure we don't break
        anything that uses it."""
        # we should use a real XML name here, it's not an error if the
        # parser notices that we're abusign the xml namespace
        inStr = '<spam xml:spam="spam"/>'
        doc = ParsedXML.ParsedXML('foo', inStr)
        self.shotgunParse(doc.getDOMObj())

    def checkXMLNSPrefixParse(self):
        "check that xmlns prefix attrs are parsed properly"
        inStr = '<doc xmlns:spamNS="uri:test_namespace"/>'
        doc = ParsedXML.ParsedXML('foo', inStr)
        attr = doc.documentElement.attributes.item(0)
        checkAttribute(attr, 'prefix', 'xmlns')
        checkAttribute(attr, 'localName', 'spamNS')
        checkAttribute(attr, 'namespaceURI', 'http://www.w3.org/2000/xmlns/')
        checkAttribute(attr, 'value', 'uri:test_namespace')        

    def checkXMLNSParse(self):
        "check that xmlns attrs are parsed properly"
        inStr = '<doc xmlns="uri:test_namespace"/>'
        doc = ParsedXML.ParsedXML('foo', inStr)
        attr = doc.documentElement.attributes.item(0)
        checkAttribute(attr, 'prefix', 'xmlns')
        checkAttribute(attr, 'localName', None)
        checkAttribute(attr, 'namespaceURI', 'http://www.w3.org/2000/xmlns/')
        checkAttribute(attr, 'value', 'uri:test_namespace')        

    def checkNoNSParse(self):
        """check that attrs parsed with no NS but with NS aware parse
        can be retrieved with NS of None"""
        inStr = '<doc version="1.0"/>'
        doc = ParsedXML.ParsedXML('foo', inStr)
        assert doc.documentElement.getAttributeNS(None, 'version'), (
            "NS-free attribute not gotten by NS-free getAttributeNS")

def test_suite():
    """Return a test suite for the Zope testing framework."""

    suite = unittest.TestSuite()
    suite.addTest(unittest.makeSuite(ParseOasisXMLTestSaTestCase, 'check'))
    suite.addTest(unittest.makeSuite(ParseTestCase, 'check'))
    suite.addTest(unittest.makeSuite(Lvl2ParseTestCase, 'check'))
    return suite

def main():
    unittest.TextTestRunner(verbosity=3).run(test_suite())

if __name__ == "__main__":
    main()








Generated by  Doxygen 1.6.0   Back to index