khtml Library API Documentation

xml_tokenizer.cpp

00001 
00023 #include "xml_tokenizer.h"
00024 #include "xml/dom_docimpl.h"
00025 #include "xml/dom_textimpl.h"
00026 #include "xml/dom_xmlimpl.h"
00027 #include "html/html_headimpl.h"
00028 #include "rendering/render_object.h"
00029 #include "misc/htmltags.h"
00030 #include "misc/htmlattrs.h"
00031 #include "misc/loader.h"
00032 
00033 #include "khtmlview.h"
00034 #include "khtml_part.h"
00035 #include <qvariant.h>
00036 #include <kdebug.h>
00037 #include <klocale.h>
00038 
00039 using namespace DOM;
00040 using namespace khtml;
00041 
00042 XMLHandler::XMLHandler(DocumentPtr *_doc, KHTMLView *_view)
00043 {
00044     m_doc = _doc;
00045     if ( m_doc ) m_doc->ref();
00046     m_view = _view;
00047     m_currentNode = _doc->document();
00048 }
00049 
00050 
00051 XMLHandler::~XMLHandler()
00052 {
00053     if ( m_doc ) m_doc->deref();
00054 }
00055 
00056 
00057 QString XMLHandler::errorProtocol()
00058 {
00059     return errorProt;
00060 }
00061 
00062 
00063 bool XMLHandler::startDocument()
00064 {
00065     // at the beginning of parsing: do some initialization
00066     errorProt = "";
00067     state = StateInit;
00068 
00069     return true;
00070 }
00071 
00072 
00073 bool XMLHandler::startElement( const QString& namespaceURI, const QString& /*localName*/, const QString& qName, const QXmlAttributes& atts )
00074 {
00075     if (m_currentNode->nodeType() == Node::TEXT_NODE)
00076         exitText();
00077 
00078     ElementImpl *newElement;
00079     newElement = m_doc->document()->createElementNS(namespaceURI,qName);
00080 
00081     int i;
00082     for (i = 0; i < atts.length(); i++) {
00083         int exceptioncode = 0;
00084         DOMString uri(atts.uri(i));
00085         DOMString ln(atts.localName(i));
00086         DOMString val(atts.value(i));
00087         NodeImpl::Id id = m_doc->document()->attrId(uri.implementation(),
00088                                                     ln.implementation(),
00089                                                     false /* allocate */);
00090         newElement->setAttribute(id, val.implementation(), exceptioncode);
00091         if (exceptioncode) // exception setting attributes
00092             return false;
00093     }
00094     if (m_currentNode->addChild(newElement)) {
00095         if (m_view && !newElement->attached())
00096             newElement->attach();
00097         m_currentNode = newElement;
00098         return true;
00099     }
00100     else {
00101         delete newElement;
00102         return false;
00103     }
00104 
00105     // ### DOM spec states: "if there is no markup inside an element's content, the text is contained in a
00106     // single object implementing the Text interface that is the only child of the element."... do we
00107     // need to ensure that empty elements always have an empty text child?
00108 }
00109 
00110 
00111 bool XMLHandler::endElement( const QString& /*namespaceURI*/, const QString& /*localName*/, const QString& /*qName*/ )
00112 {
00113     if (m_currentNode->nodeType() == Node::TEXT_NODE)
00114         exitText();
00115     if (m_currentNode->parentNode() != 0) {
00116         if (m_currentNode->renderer())
00117             m_currentNode->renderer()->close();
00118         m_currentNode = m_currentNode->parentNode();
00119     }
00120 // ###  else error
00121 
00122     return true;
00123 }
00124 
00125 
00126 bool XMLHandler::startCDATA()
00127 {
00128     if (m_currentNode->nodeType() == Node::TEXT_NODE)
00129         exitText();
00130 
00131     NodeImpl *newNode = m_doc->document()->createCDATASection(new DOMStringImpl(""));
00132     if (m_currentNode->addChild(newNode)) {
00133         if (m_view && !newNode->attached())
00134             newNode->attach();
00135         m_currentNode = newNode;
00136         return true;
00137     }
00138     else {
00139         delete newNode;
00140         return false;
00141     }
00142 
00143 }
00144 
00145 bool XMLHandler::endCDATA()
00146 {
00147     if (m_currentNode->parentNode() != 0)
00148         m_currentNode = m_currentNode->parentNode();
00149     return true;
00150 }
00151 
00152 bool XMLHandler::characters( const QString& ch )
00153 {
00154     if (ch.stripWhiteSpace().isEmpty())
00155         return true;
00156 
00157     if (m_currentNode->nodeType() == Node::TEXT_NODE ||
00158         m_currentNode->nodeType() == Node::CDATA_SECTION_NODE ||
00159         enterText()) {
00160 
00161         unsigned short parentId = m_currentNode->parentNode() ? m_currentNode->parentNode()->id() : 0;
00162         if (parentId == ID_SCRIPT || parentId == ID_STYLE || parentId == ID_XMP || parentId == ID_TEXTAREA) {
00163             // ### hack.. preserve whitespace for script, style, xmp and textarea... is this the correct
00164             // way of doing this?
00165             int exceptioncode = 0;
00166             static_cast<TextImpl*>(m_currentNode)->appendData(ch,exceptioncode);
00167             if (exceptioncode)
00168                 return false;
00169         }
00170         else {
00171             // for all others, simplify the whitespace
00172             int exceptioncode = 0;
00173             static_cast<TextImpl*>(m_currentNode)->appendData(ch.simplifyWhiteSpace(),exceptioncode);
00174             if (exceptioncode)
00175                 return false;
00176         }
00177         return true;
00178     }
00179     else
00180         return false;
00181 }
00182 
00183 bool XMLHandler::comment(const QString & ch)
00184 {
00185     if (m_currentNode->nodeType() == Node::TEXT_NODE)
00186         exitText();
00187     // ### handle exceptions
00188     m_currentNode->addChild(m_doc->document()->createComment(new DOMStringImpl(ch.unicode(), ch.length())));
00189     return true;
00190 }
00191 
00192 bool XMLHandler::processingInstruction(const QString &target, const QString &data)
00193 {
00194     if (m_currentNode->nodeType() == Node::TEXT_NODE)
00195         exitText();
00196     // ### handle exceptions
00197     ProcessingInstructionImpl *pi =
00198         m_doc->document()->createProcessingInstruction(target, new DOMStringImpl(data.unicode(), data.length()));
00199     m_currentNode->addChild(pi);
00200     pi->checkStyleSheet();
00201     return true;
00202 }
00203 
00204 
00205 QString XMLHandler::errorString()
00206 {
00207     return i18n("the document is not in the correct file format");
00208 }
00209 
00210 
00211 bool XMLHandler::fatalError( const QXmlParseException& exception )
00212 {
00213     errorProt += i18n( "fatal parsing error: %1 in line %2, column %3" )
00214         .arg( exception.message() )
00215         .arg( exception.lineNumber() )
00216         .arg( exception.columnNumber() );
00217 
00218     errorLine = exception.lineNumber();
00219     errorCol = exception.columnNumber();
00220 
00221     return false;
00222 }
00223 
00224 bool XMLHandler::enterText()
00225 {
00226     NodeImpl *newNode = m_doc->document()->createTextNode("");
00227     if (m_currentNode->addChild(newNode)) {
00228         if (m_view && !newNode->attached())
00229             newNode->attach();
00230         m_currentNode = newNode;
00231         return true;
00232     }
00233     else {
00234         delete newNode;
00235         return false;
00236     }
00237 }
00238 
00239 void XMLHandler::exitText()
00240 {
00241     NodeImpl* par = m_currentNode->parentNode();
00242     if (par != 0)
00243         m_currentNode = par;
00244 }
00245 
00246 bool XMLHandler::attributeDecl(const QString &/*eName*/, const QString &/*aName*/, const QString &/*type*/,
00247                                const QString &/*valueDefault*/, const QString &/*value*/)
00248 {
00249     // qt's xml parser (as of 2.2.3) does not currently give us values for type, valueDefault and
00250     // value. When it does, we can store these somewhere and have default attributes on elements
00251     return true;
00252 }
00253 
00254 bool XMLHandler::externalEntityDecl(const QString &/*name*/, const QString &/*publicId*/, const QString &/*systemId*/)
00255 {
00256     // ### insert these too - is there anything special we have to do here?
00257     return true;
00258 }
00259 
00260 bool XMLHandler::internalEntityDecl(const QString &name, const QString &value)
00261 {
00262     EntityImpl *e = new EntityImpl(m_doc,name);
00263     // ### further parse entities inside the value and add them as separate nodes (or entityreferences)?
00264     e->addChild(m_doc->document()->createTextNode(new DOMStringImpl(value.unicode(), value.length())));
00265 // ### FIXME
00266 //     if (m_doc->document()->doctype())
00267 //         static_cast<GenericRONamedNodeMapImpl*>(m_doc->document()->doctype()->entities())->addNode(e);
00268     return true;
00269 }
00270 
00271 bool XMLHandler::notationDecl(const QString &/*name*/, const QString &/*publicId*/, const QString &/*systemId*/)
00272 {
00273 // ### FIXME
00274 //     if (m_doc->document()->doctype()) {
00275 //         NotationImpl *n = new NotationImpl(m_doc,name,publicId,systemId);
00276 //         static_cast<GenericRONamedNodeMapImpl*>(m_doc->document()->doctype()->notations())->addNode(n);
00277 //     }
00278     return true;
00279 }
00280 
00281 bool XMLHandler::unparsedEntityDecl(const QString &/*name*/, const QString &/*publicId*/,
00282                                     const QString &/*systemId*/, const QString &/*notationName*/)
00283 {
00284     // ###
00285     return true;
00286 }
00287 
00288 
00289 //------------------------------------------------------------------------------
00290 
00291 XMLTokenizer::XMLTokenizer(DOM::DocumentPtr *_doc, KHTMLView *_view)
00292 {
00293     m_doc = _doc;
00294     if ( m_doc ) m_doc->ref();
00295     m_view = _view;
00296     m_xmlCode = "";
00297     m_scriptsIt = 0;
00298     m_cachedScript = 0;
00299 }
00300 
00301 XMLTokenizer::~XMLTokenizer()
00302 {
00303     if ( m_doc ) m_doc->deref();
00304     if (m_scriptsIt)
00305         delete m_scriptsIt;
00306     if (m_cachedScript)
00307         m_cachedScript->deref(this);
00308 }
00309 
00310 
00311 void XMLTokenizer::begin()
00312 {
00313 }
00314 
00315 void XMLTokenizer::write( const QString &str, bool /*appendData*/ )
00316 {
00317     m_xmlCode += str;
00318 }
00319 
00320 void XMLTokenizer::end()
00321 {
00322     emit finishedParsing();
00323 }
00324 
00325 void XMLTokenizer::finish()
00326 {
00327     // parse xml file
00328     XMLHandler handler(m_doc,m_view);
00329     QXmlInputSource source;
00330     source.setData(m_xmlCode);
00331     QXmlSimpleReader reader;
00332     reader.setContentHandler( &handler );
00333     reader.setLexicalHandler( &handler );
00334     reader.setErrorHandler( &handler );
00335     reader.setDeclHandler( &handler );
00336     reader.setDTDHandler( &handler );
00337     bool ok = reader.parse( source );
00338 
00339     if (!ok) {
00340         // An error occurred during parsing of the code. Display an error page to the user (the DOM
00341         // tree is created manually and includes an excerpt from the code where the error is located)
00342 
00343         // ### for multiple error messages, display the code for each (can this happen?)
00344 
00345         // Clear the document
00346         int exceptioncode = 0;
00347         while (m_doc->document()->hasChildNodes())
00348             static_cast<NodeImpl*>(m_doc->document())->removeChild(m_doc->document()->firstChild(),exceptioncode);
00349 
00350         QTextIStream stream(&m_xmlCode);
00351         unsigned long lineno;
00352         for (lineno = 0; lineno < handler.errorLine-1; lineno++)
00353           stream.readLine();
00354         QString line = stream.readLine();
00355 
00356         unsigned long colno;
00357         QString errorLocPtr = "";
00358         for (colno = 0; colno < handler.errorCol-1; colno++)
00359             errorLocPtr += " ";
00360         errorLocPtr += "^";
00361 
00362         // Create elements for display
00363         DocumentImpl *doc = m_doc->document();
00364         NodeImpl *html = doc->createElementNS(XHTML_NAMESPACE,"html");
00365         NodeImpl   *body = doc->createElementNS(XHTML_NAMESPACE,"body");
00366         NodeImpl     *h1 = doc->createElementNS(XHTML_NAMESPACE,"h1");
00367         NodeImpl       *headingText = doc->createTextNode(i18n("XML parsing error"));
00368         NodeImpl     *errorText = doc->createTextNode(handler.errorProtocol());
00369         NodeImpl     *hr = doc->createElementNS(XHTML_NAMESPACE,"hr");
00370         NodeImpl     *pre = doc->createElementNS(XHTML_NAMESPACE,"pre");
00371         NodeImpl       *lineText = doc->createTextNode(line+"\n");
00372         NodeImpl       *errorLocText = doc->createTextNode(errorLocPtr);
00373 
00374         // Construct DOM tree. We ignore exceptions as we assume they will not be thrown here (due to the
00375         // fact we are using a known tag set)
00376         doc->appendChild(html,exceptioncode);
00377         html->appendChild(body,exceptioncode);
00378         body->appendChild(h1,exceptioncode);
00379         h1->appendChild(headingText,exceptioncode);
00380         body->appendChild(errorText,exceptioncode);
00381         body->appendChild(hr,exceptioncode);
00382         body->appendChild(pre,exceptioncode);
00383         pre->appendChild(lineText,exceptioncode);
00384         pre->appendChild(errorLocText,exceptioncode);
00385 
00386         // Close the renderers so that they update their display correctly
00387         // ### this should not be necessary, but requires changes in the rendering code...
00388         h1->renderer()->close();
00389         pre->renderer()->close();
00390         body->renderer()->close();
00391 
00392         m_doc->document()->recalcStyle( NodeImpl::Inherit );
00393         m_doc->document()->updateRendering();
00394 
00395         end();
00396     }
00397     else {
00398         // Parsing was successful. Now locate all html <script> tags in the document and execute them
00399         // one by one
00400         addScripts(m_doc->document());
00401         m_scriptsIt = new QPtrListIterator<HTMLScriptElementImpl>(m_scripts);
00402         executeScripts();
00403     }
00404 
00405 }
00406 
00407 void XMLTokenizer::addScripts(NodeImpl *n)
00408 {
00409     // Recursively go through the entire document tree, looking for html <script> tags. For each of these
00410     // that is found, add it to the m_scripts list from which they will be executed
00411 
00412     if (n->id() == ID_SCRIPT) {
00413         m_scripts.append(static_cast<HTMLScriptElementImpl*>(n));
00414     }
00415 
00416     NodeImpl *child;
00417     for (child = n->firstChild(); child; child = child->nextSibling())
00418         addScripts(child);
00419 }
00420 
00421 void XMLTokenizer::executeScripts()
00422 {
00423     // Iterate through all of the html <script> tags in the document. For those that have a src attribute,
00424     // start loading the script and return (executeScripts() will be called again once the script is loaded
00425     // and continue where it left off). For scripts that don't have a src attribute, execute the code
00426     // inside the tag
00427     while (m_scriptsIt->current()) {
00428         DOMString scriptSrc = m_scriptsIt->current()->getAttribute(ATTR_SRC);
00429         QString charset = m_scriptsIt->current()->getAttribute(ATTR_CHARSET).string();
00430 
00431         if (scriptSrc != "") {
00432             // we have a src attribute
00433             m_cachedScript = m_doc->document()->docLoader()->requestScript(scriptSrc, charset);
00434             ++(*m_scriptsIt);
00435             m_cachedScript->ref(this); // will call executeScripts() again if already cached
00436             return;
00437         }
00438         else {
00439             // no src attribute - execute from contents of tag
00440             QString scriptCode = "";
00441             NodeImpl *child;
00442             for (child = m_scriptsIt->current()->firstChild(); child; child = child->nextSibling()) {
00443                 if ( ( child->nodeType() == Node::TEXT_NODE || child->nodeType() == Node::CDATA_SECTION_NODE) &&
00444                      static_cast<TextImpl*>(child)->string() )
00445                     scriptCode += QConstString(static_cast<TextImpl*>(child)->string()->s,
00446                                                static_cast<TextImpl*>(child)->string()->l).string();
00447             }
00448             // the script cannot do document.write until we support incremental parsing
00449             // ### handle the case where the script deletes the node or redirects to
00450             // another page, etc. (also in notifyFinished())
00451             // ### the script may add another script node after this one which should be executed
00452             if (m_view) {
00453                 m_view->part()->executeScript(scriptCode);
00454             }
00455             ++(*m_scriptsIt);
00456         }
00457     }
00458 
00459     // All scripts have finished executing, so calculate the style for the document and close
00460     // the last element
00461     m_doc->document()->updateStyleSelector();
00462 
00463     // We are now finished parsing
00464     end();
00465 }
00466 
00467 void XMLTokenizer::notifyFinished(CachedObject *finishedObj)
00468 {
00469     // This is called when a script has finished loading that was requested from executeScripts(). We execute
00470     // the script, and then call executeScripts() again to continue iterating through the list of scripts in
00471     // the document
00472     if (finishedObj == m_cachedScript) {
00473         DOMString scriptSource = m_cachedScript->script();
00474         m_cachedScript->deref(this);
00475         m_cachedScript = 0;
00476         m_view->part()->executeScript(scriptSource.string());
00477         executeScripts();
00478     }
00479 }
00480 
00481 #include "xml_tokenizer.moc"
00482 
KDE Logo
This file is part of the documentation for kdelibs Version 3.1.0.
Documentation copyright © 1996-2002 the KDE developers.
Generated on Wed Oct 8 12:22:43 2003 by doxygen 1.2.18 written by Dimitri van Heesch, © 1997-2001