lib

KoXmlWriter.cpp

00001 /* This file is part of the KDE project
00002    Copyright (C) 2004 David Faure <faure@kde.org>
00003 
00004    This library is free software; you can redistribute it and/or
00005    modify it under the terms of the GNU Library General Public
00006    License as published by the Free Software Foundation; either
00007    version 2 of the License, or (at your option) any later version.
00008 
00009    This library is distributed in the hope that it will be useful,
00010    but WITHOUT ANY WARRANTY; without even the implied warranty of
00011    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00012    Library General Public License for more details.
00013 
00014    You should have received a copy of the GNU Library General Public License
00015    along with this library; see the file COPYING.LIB.  If not, write to
00016    the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00017  * Boston, MA 02110-1301, USA.
00018 */
00019 
00020 #include "KoXmlWriter.h"
00021 
00022 #include <kglobal.h> // kMin
00023 #include <kdebug.h>
00024 #include <qiodevice.h>
00025 #include <float.h>
00026 
00027 static const int s_indentBufferLength = 100;
00028 
00029 KoXmlWriter::KoXmlWriter( QIODevice* dev, int indentLevel )
00030     : m_dev( dev ), m_baseIndentLevel( indentLevel )
00031 {
00032     init();
00033 }
00034 
00035 void KoXmlWriter::init()
00036 {
00037     m_indentBuffer = new char[ s_indentBufferLength ];
00038     memset( m_indentBuffer, ' ', s_indentBufferLength );
00039     *m_indentBuffer = '\n'; // write newline before indentation, in one go
00040 
00041     m_escapeBuffer = new char[s_escapeBufferLen];
00042 }
00043 
00044 KoXmlWriter::~KoXmlWriter()
00045 {
00046     delete[] m_indentBuffer;
00047     delete[] m_escapeBuffer;
00048 }
00049 
00050 void KoXmlWriter::startDocument( const char* rootElemName, const char* publicId, const char* systemId )
00051 {
00052     Q_ASSERT( m_tags.isEmpty() );
00053     writeCString( "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" );
00054     // There isn't much point in a doctype if there's no DTD to refer to
00055     // (I'm told that files that are validated by a RelaxNG schema cannot refer to the schema)
00056     if ( publicId ) {
00057         writeCString( "<!DOCTYPE " );
00058         writeCString( rootElemName );
00059         writeCString( " PUBLIC \"" );
00060         writeCString( publicId );
00061         writeCString( "\" \"" );
00062         writeCString( systemId );
00063         writeCString( "\"" );
00064         writeCString( ">\n" );
00065     }
00066 }
00067 
00068 void KoXmlWriter::endDocument()
00069 {
00070     // just to do exactly like QDom does (newline at end of file).
00071     writeChar( '\n' );
00072     Q_ASSERT( m_tags.isEmpty() );
00073 }
00074 
00075 // returns the value of indentInside of the parent
00076 bool KoXmlWriter::prepareForChild()
00077 {
00078     if ( !m_tags.isEmpty() ) {
00079         Tag& parent = m_tags.top();
00080         if ( !parent.hasChildren ) {
00081             closeStartElement( parent );
00082             parent.hasChildren = true;
00083             parent.lastChildIsText = false;
00084         }
00085         if ( parent.indentInside ) {
00086             writeIndent();
00087         }
00088         return parent.indentInside;
00089     }
00090     return true;
00091 }
00092 
00093 void KoXmlWriter::prepareForTextNode()
00094 {
00095     Tag& parent = m_tags.top();
00096     if ( !parent.hasChildren ) {
00097         closeStartElement( parent );
00098         parent.hasChildren = true;
00099         parent.lastChildIsText = true;
00100     }
00101 }
00102 
00103 void KoXmlWriter::startElement( const char* tagName, bool indentInside )
00104 {
00105     Q_ASSERT( tagName != 0 );
00106 
00107     // Tell parent that it has children
00108     bool parentIndent = prepareForChild();
00109 
00110     m_tags.push( Tag( tagName, parentIndent && indentInside ) );
00111     writeChar( '<' );
00112     writeCString( tagName );
00113     //kdDebug() << k_funcinfo << tagName << endl;
00114 }
00115 
00116 void KoXmlWriter::addCompleteElement( const char* cstr )
00117 {
00118     prepareForChild();
00119     writeCString( cstr );
00120 }
00121 
00122 
00123 void KoXmlWriter::addCompleteElement( QIODevice* indev )
00124 {
00125     prepareForChild();
00126     bool openOk = indev->open( IO_ReadOnly );
00127     Q_ASSERT( openOk );
00128     if ( !openOk )
00129         return;
00130     static const int MAX_CHUNK_SIZE = 8*1024; // 8 KB
00131     QByteArray buffer(MAX_CHUNK_SIZE);
00132     while ( !indev->atEnd() ) {
00133         Q_LONG len = indev->readBlock( buffer.data(), buffer.size() );
00134         if ( len <= 0 ) // e.g. on error
00135             break;
00136         m_dev->writeBlock( buffer.data(), len );
00137     }
00138 }
00139 
00140 void KoXmlWriter::endElement()
00141 {
00142     if ( m_tags.isEmpty() )
00143         kdWarning() << "Ouch, endElement() was called more times than startElement(). "
00144             "The generated XML will be invalid! "
00145             "Please report this bug (by saving the document to another format...)" << endl;
00146 
00147     Tag tag = m_tags.pop();
00148     //kdDebug() << k_funcinfo << " tagName=" << tag.tagName << " hasChildren=" << tag.hasChildren << endl;
00149     if ( !tag.hasChildren ) {
00150         writeCString( "/>" );
00151     }
00152     else {
00153         if ( tag.indentInside && !tag.lastChildIsText ) {
00154             writeIndent();
00155         }
00156         writeCString( "</" );
00157         Q_ASSERT( tag.tagName != 0 );
00158         writeCString( tag.tagName );
00159         writeChar( '>' );
00160     }
00161 }
00162 
00163 void KoXmlWriter::addTextNode( const char* cstr )
00164 {
00165     prepareForTextNode();
00166     char* escaped = escapeForXML( cstr, -1 );
00167     writeCString( escaped );
00168     if(escaped != m_escapeBuffer)
00169         delete[] escaped;
00170 }
00171 
00172 void KoXmlWriter::addProcessingInstruction( const char* cstr )
00173 {
00174     prepareForTextNode();
00175     writeCString( "<?" );
00176     addTextNode( cstr );
00177     writeCString( "?>");
00178 }
00179 
00180 void KoXmlWriter::addAttribute( const char* attrName, const char* value )
00181 {
00182     writeChar( ' ' );
00183     writeCString( attrName );
00184     writeCString("=\"");
00185     char* escaped = escapeForXML( value, -1 );
00186     writeCString( escaped );
00187     if(escaped != m_escapeBuffer)
00188         delete[] escaped;
00189     writeChar( '"' );
00190 }
00191 
00192 void KoXmlWriter::addAttribute( const char* attrName, double value )
00193 {
00194     QCString str;
00195     str.setNum( value, 'g', DBL_DIG );
00196     addAttribute( attrName, str.data() );
00197 }
00198 
00199 void KoXmlWriter::addAttributePt( const char* attrName, double value )
00200 {
00201     QCString str;
00202     str.setNum( value, 'g', DBL_DIG );
00203     str += "pt";
00204     addAttribute( attrName, str.data() );
00205 }
00206 
00207 void KoXmlWriter::writeIndent()
00208 {
00209     // +1 because of the leading '\n'
00210     m_dev->writeBlock( m_indentBuffer, kMin( indentLevel() + 1,
00211                                              s_indentBufferLength ) );
00212 }
00213 
00214 void KoXmlWriter::writeString( const QString& str )
00215 {
00216     // cachegrind says .utf8() is where most of the time is spent
00217     QCString cstr = str.utf8();
00218     m_dev->writeBlock( cstr.data(), cstr.size() - 1 );
00219 }
00220 
00221 // In case of a reallocation (ret value != m_buffer), the caller owns the return value,
00222 // it must delete it (with [])
00223 char* KoXmlWriter::escapeForXML( const char* source, int length = -1 ) const
00224 {
00225     // we're going to be pessimistic on char length; so lets make the outputLength less
00226     // the amount one char can take: 6
00227     char* destBoundary = m_escapeBuffer + s_escapeBufferLen - 6;
00228     char* destination = m_escapeBuffer;
00229     char* output = m_escapeBuffer;
00230     const char* src = source; // src moves, source remains
00231     for ( ;; ) {
00232         if(destination >= destBoundary) {
00233             // When we come to realize that our escaped string is going to
00234             // be bigger than the escape buffer (this shouldn't happen very often...),
00235             // we drop the idea of using it, and we allocate a bigger buffer.
00236             // Note that this if() can only be hit once per call to the method.
00237             if ( length == -1 )
00238                 length = qstrlen( source ); // expensive...
00239             uint newLength = length * 6 + 1; // worst case. 6 is due to &quot; and &apos;
00240             char* buffer = new char[ newLength ];
00241             destBoundary = buffer + newLength;
00242             uint amountOfCharsAlreadyCopied = destination - m_escapeBuffer;
00243             memcpy( buffer, m_escapeBuffer, amountOfCharsAlreadyCopied );
00244             output = buffer;
00245             destination = buffer + amountOfCharsAlreadyCopied;
00246         }
00247         switch( *src ) {
00248         case 60: // <
00249             memcpy( destination, "&lt;", 4 );
00250             destination += 4;
00251             break;
00252         case 62: // >
00253             memcpy( destination, "&gt;", 4 );
00254             destination += 4;
00255             break;
00256         case 34: // "
00257             memcpy( destination, "&quot;", 6 );
00258             destination += 6;
00259             break;
00260 #if 0 // needed?
00261         case 39: // '
00262             memcpy( destination, "&apos;", 6 );
00263             destination += 6;
00264             break;
00265 #endif
00266         case 38: // &
00267             memcpy( destination, "&amp;", 5 );
00268             destination += 5;
00269             break;
00270         case 0:
00271             *destination = '\0';
00272             return output;
00273         default:
00274             *destination++ = *src++;
00275             continue;
00276         }
00277         ++src;
00278     }
00279     // NOTREACHED (see case 0)
00280     return output;
00281 }
00282 
00283 void KoXmlWriter::addManifestEntry( const QString& fullPath, const QString& mediaType )
00284 {
00285     startElement( "manifest:file-entry" );
00286     addAttribute( "manifest:media-type", mediaType );
00287     addAttribute( "manifest:full-path", fullPath );
00288     endElement();
00289 }
00290 
00291 void KoXmlWriter::addConfigItem( const QString & configName, const QString& value )
00292 {
00293     startElement( "config:config-item" );
00294     addAttribute( "config:name", configName );
00295     addAttribute( "config:type",  "string" );
00296     addTextNode( value );
00297     endElement();
00298 }
00299 
00300 void KoXmlWriter::addConfigItem( const QString & configName, bool value )
00301 {
00302     startElement( "config:config-item" );
00303     addAttribute( "config:name", configName );
00304     addAttribute( "config:type",  "boolean" );
00305     addTextNode( value ? "true" : "false" );
00306     endElement();
00307 }
00308 
00309 void KoXmlWriter::addConfigItem( const QString & configName, int value )
00310 {
00311     startElement( "config:config-item" );
00312     addAttribute( "config:name", configName );
00313     addAttribute( "config:type",  "int");
00314     addTextNode(QString::number( value ) );
00315     endElement();
00316 }
00317 
00318 void KoXmlWriter::addConfigItem( const QString & configName, double value )
00319 {
00320     startElement( "config:config-item" );
00321     addAttribute( "config:name", configName );
00322     addAttribute( "config:type", "double" );
00323     addTextNode( QString::number( value ) );
00324     endElement();
00325 }
00326 
00327 void KoXmlWriter::addConfigItem( const QString & configName, long value )
00328 {
00329     startElement( "config:config-item" );
00330     addAttribute( "config:name", configName );
00331     addAttribute( "config:type", "long" );
00332     addTextNode( QString::number( value ) );
00333     endElement();
00334 }
00335 
00336 void KoXmlWriter::addConfigItem( const QString & configName, short value )
00337 {
00338     startElement( "config:config-item" );
00339     addAttribute( "config:name", configName );
00340     addAttribute( "config:type", "short" );
00341     addTextNode( QString::number( value ) );
00342     endElement();
00343 }
00344 
00345 void KoXmlWriter::addTextSpan( const QString& text )
00346 {
00347     QMap<int, int> tabCache;
00348     addTextSpan( text, tabCache );
00349 }
00350 
00351 void KoXmlWriter::addTextSpan( const QString& text, const QMap<int, int>& tabCache )
00352 {
00353     uint len = text.length();
00354     int nrSpaces = 0; // number of consecutive spaces
00355     bool leadingSpace = false;
00356     QString str;
00357     str.reserve( len );
00358 
00359     // Accumulate chars either in str or in nrSpaces (for spaces).
00360     // Flush str when writing a subelement (for spaces or for another reason)
00361     // Flush nrSpaces when encountering two or more consecutive spaces
00362     for ( uint i = 0; i < len ; ++i ) {
00363         QChar ch = text[i];
00364         if ( ch != ' ' ) {
00365             if ( nrSpaces > 0 ) {
00366                 // For the first space we use ' '.
00367                 // "it is good practice to use (text:s) for the second and all following SPACE 
00368                 // characters in a sequence." (per the ODF spec)
00369                 // however, per the HTML spec, "authors should not rely on user agents to render 
00370                 // white space immediately after a start tag or immediately before an end tag"
00371                 // (and both we and OO.o ignore leading spaces in <text:p> or <text:h> elements...)
00372                 if (!leadingSpace)
00373                 {
00374                     str += ' ';
00375                     --nrSpaces;
00376                 }
00377                 if ( nrSpaces > 0 ) { // there are more spaces
00378                     if ( !str.isEmpty() )
00379                         addTextNode( str );
00380                     str = QString::null;
00381                     startElement( "text:s" );
00382                     if ( nrSpaces > 1 ) // it's 1 by default
00383                         addAttribute( "text:c", nrSpaces );
00384                     endElement();
00385                 }
00386             }
00387             nrSpaces = 0;
00388             leadingSpace = false;
00389         }
00390         switch ( ch.unicode() ) {
00391         case '\t':
00392             if ( !str.isEmpty() )
00393                 addTextNode( str );
00394             str = QString::null;
00395             startElement( "text:tab" );
00396             if ( tabCache.contains( i ) )
00397                 addAttribute( "text:tab-ref", tabCache[i] + 1 );
00398             endElement();
00399             break;
00400         case '\n':
00401             if ( !str.isEmpty() )
00402                 addTextNode( str );
00403             str = QString::null;
00404             startElement( "text:line-break" );
00405             endElement();
00406             break;
00407         case ' ':
00408             if ( i == 0 )
00409                 leadingSpace = true;
00410             ++nrSpaces;
00411             break;
00412         default:
00413             str += text[i];
00414             break;
00415         }
00416     }
00417     // either we still have text in str or we have spaces in nrSpaces
00418     if ( !str.isEmpty() ) {
00419         addTextNode( str );
00420     }
00421     if ( nrSpaces > 0 ) { // there are more spaces
00422         startElement( "text:s" );
00423         if ( nrSpaces > 1 ) // it's 1 by default
00424             addAttribute( "text:c", nrSpaces );
00425         endElement();
00426     }
00427 }
KDE Home | KDE Accessibility Home | Description of Access Keys