filters

wpimport.cc

00001 /* This file is part of the KDE project
00002    Copyright (C) 2001-2005 Ariya Hidayat <ariya@kde.org>
00003 
00004    This library is free software; you can redistribute it and/or
00005    modify it under the terms of the GNU Library General Public
00006    License as published by the Free Software Foundation; either
00007    version 2 of the License, or (at your option) any later version.
00008 
00009    This library is distributed in the hope that it will be useful,
00010    but WITHOUT ANY WARRANTY; without even the implied warranty of
00011    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00012    Library General Public License for more details.
00013 
00014    You should have received a copy of the GNU Library General Public License
00015    along with this library; see the file COPYING.LIB.  If not, write to
00016    the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00017    Boston, MA 02110-1301, USA.
00018 */
00019 
00020 #include <config.h>
00021 
00022 #ifdef HAVE_UNISTD_H
00023 #include <unistd.h>
00024 #endif
00025 
00026 #include <kdebug.h>
00027 #include <KoFilterChain.h>
00028 #include <kgenericfactory.h>
00029 
00030 #include <wpimport.h>
00031 
00032 #include <stdio.h> // debug
00033 
00034 typedef KGenericFactory<WPImport, KoFilter> WPImportFactory;
00035 K_EXPORT_COMPONENT_FACTORY( libwpimport, WPImportFactory( "kofficefilters" ) )
00036 
00037 #include <libwpd/libwpd.h>
00038 #include <libwpd/WPXStream.h>
00039 #include "DocumentHandler.hxx"
00040 #include "WordPerfectCollector.hxx"
00041 
00042 class WPXMemoryInputStream : public WPXInputStream
00043 {
00044 public:
00045         WPXMemoryInputStream(uint8_t *data, size_t size);
00046         virtual ~WPXMemoryInputStream();
00047 
00048         virtual bool isOLEStream() { return false; }
00049         virtual WPXInputStream * getDocumentOLEStream() { return NULL; }
00050 
00051         const virtual uint8_t *read(size_t numBytes, size_t &numBytesRead);
00052         virtual int seek(long offset, WPX_SEEK_TYPE seekType);
00053         virtual long tell();
00054         virtual bool atEOS();
00055 
00056 private:
00057         long m_offset;
00058         size_t m_size;
00059         uint8_t *m_data;
00060         uint8_t *m_tmpBuf;
00061 };
00062 
00063 
00064 WPXMemoryInputStream::WPXMemoryInputStream(uint8_t *data, size_t size) :
00065     WPXInputStream(false),
00066     m_offset(0),
00067     m_data(data),
00068     m_size(size),
00069     m_tmpBuf(NULL)
00070 {
00071 }
00072 
00073 WPXMemoryInputStream::~WPXMemoryInputStream()
00074 {
00075     delete [] m_tmpBuf;
00076     delete [] m_data;
00077 }
00078 
00079 const uint8_t * WPXMemoryInputStream::read(size_t numBytes, size_t &numBytesRead)
00080 {
00081     delete [] m_tmpBuf;
00082     int numBytesToRead;
00083 
00084     if ((m_offset+numBytes) < m_size)
00085         numBytesToRead = numBytes;
00086     else
00087         numBytesToRead = m_size - m_offset;
00088     
00089     numBytesRead = numBytesToRead; // about as paranoid as we can be..
00090 
00091     if (numBytesToRead == 0)
00092         return NULL;
00093 
00094     m_tmpBuf = new uint8_t[numBytesToRead];
00095     for (size_t i=0; i<numBytesToRead; i++)
00096     {
00097         m_tmpBuf[i] = m_data[m_offset];
00098         m_offset++;
00099     }
00100     
00101     return m_tmpBuf;
00102 }
00103 
00104 int WPXMemoryInputStream::seek(long offset, WPX_SEEK_TYPE seekType)
00105 {
00106     if (seekType == WPX_SEEK_CUR)
00107         m_offset += offset;
00108     else if (seekType == WPX_SEEK_SET)
00109         m_offset = offset;
00110 
00111     if (m_offset < 0)
00112         m_offset = 0;
00113     else if (m_offset >= m_size)
00114         m_offset = m_size;
00115 
00116     return 0;
00117 }
00118 
00119 long WPXMemoryInputStream::tell()
00120 {
00121     return m_offset;
00122 }
00123 
00124 bool WPXMemoryInputStream::atEOS()
00125 {
00126     if (m_offset >= m_size ) 
00127         return true; 
00128 
00129     return false;
00130 }
00131 
00132 class KWordHandler : public DocumentHandler
00133 {
00134 public:
00135         KWordHandler();
00136     virtual ~KWordHandler() {};
00137     void startDocument();
00138         void endDocument();
00139         void startElement(const char *psName, const WPXPropertyList &xPropList);
00140         void endElement(const char *psName);
00141         void characters(const WPXString &sCharacters);
00142     WPXString documentstring;
00143 private:
00144     bool isTagOpened;
00145     WPXString openedTagName;
00146 };
00147 
00148 KWordHandler::KWordHandler() :
00149     isTagOpened(false)
00150 {
00151 }
00152 
00153 void KWordHandler::startDocument()
00154 {
00155   documentstring.clear();
00156 }
00157 
00158 void KWordHandler::startElement(const char *psName, const WPXPropertyList &xPropList)
00159 {
00160     if (isTagOpened)
00161     {
00162         documentstring.append( ">" );
00163         isTagOpened = false;
00164     }
00165     WPXString tempString;
00166         tempString.sprintf("<%s", psName);
00167     documentstring.append( tempString );
00168         WPXPropertyList::Iter i(xPropList);
00169         for (i.rewind(); i.next(); )
00170         {
00171                 // filter out libwpd elements
00172                 if (strlen(i.key()) > 6 && strncmp(i.key(), "libwpd", 6) != 0)
00173         {
00174             tempString.sprintf(" %s=\"%s\"", i.key(), i()->getStr().cstr());
00175                         documentstring.append( tempString );
00176             }
00177     }
00178     isTagOpened = true;
00179     openedTagName.sprintf("%s", psName);
00180 }
00181 
00182 void KWordHandler::endElement(const char *psName)
00183 {
00184     if ((isTagOpened) && (openedTagName == psName))
00185         documentstring.append( " />" );
00186     else
00187     {
00188         WPXString tempString;
00189         tempString.sprintf("</%s>", psName);
00190             documentstring.append( tempString );
00191     }
00192     isTagOpened = false;
00193 }
00194 
00195 void KWordHandler::characters(const WPXString &sCharacters)
00196 {
00197     if (isTagOpened)
00198     {
00199         documentstring.append( ">" );
00200         isTagOpened = false;
00201     }
00202         documentstring.append( WPXString(sCharacters, true) );
00203 }
00204 
00205 
00206 void KWordHandler::endDocument()
00207 {
00208     if (isTagOpened)
00209     {
00210         documentstring.append( ">" );
00211         isTagOpened = false;
00212     }
00213 }
00214     
00215 
00216 
00217 WPImport::WPImport( KoFilter *, const char *, const QStringList& ):  KoFilter()
00218 {
00219 }
00220 
00221 KoFilter::ConversionStatus WPImport::convert( const QCString& from, const QCString& to )
00222 {
00223   // check for proper conversion
00224   if(to!= "application/vnd.sun.xml.writer" || from != "application/wordperfect" )
00225      return KoFilter::NotImplemented;
00226 
00227   // open input file
00228   const char* infile = m_chain->inputFile().latin1();
00229   FILE *f = fopen( infile, "rb" );
00230   if( !f )
00231      return KoFilter::StupidError;
00232   
00233   fseek( f, 0, SEEK_END );
00234   long fsize = ftell( f );
00235   fseek( f, 0, SEEK_SET );
00236   
00237   unsigned char* buf = new unsigned char[fsize];
00238   fread( buf, 1, fsize, f );
00239   fclose( f );
00240   
00241   // instream now owns buf, no need to delete buf later
00242   WPXMemoryInputStream instream = WPXMemoryInputStream( buf, fsize );
00243 
00244   WPDConfidence confidence = WPDocument::isFileFormatSupported(&instream, false);
00245   if( confidence == WPD_CONFIDENCE_NONE )
00246   {
00247     fprintf(stderr, "ERROR: We have no confidence that you are giving us a valid WordPerfect document.\n");
00248     return KoFilter::StupidError;
00249   }
00250   instream.seek(0, WPX_SEEK_SET);
00251 
00252   // open and parse the file    
00253   KWordHandler handler;
00254         
00255   WordPerfectCollector collector(&instream, &handler);
00256   
00257   if ( !collector.filter() ) return KoFilter::StupidError;
00258   
00259   // prepare storage
00260   KoStoreDevice* manifest = m_chain->storageFile( "META-INF/manifest.xml", KoStore::Write );
00261   if ( manifest )
00262     {
00263       QCString manifeststring = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n\
00264 <!DOCTYPE manifest:manifest PUBLIC \"-//OpenOffice.org//DTD Manifest 1.0//EN\" \"Manifest.dtd\">\n\
00265 <manifest:manifest xmlns:manifest=\"http://openoffice.org/2001/manifest\">\n\
00266 <manifest:file-entry manifest:media-type=\"application/vnd.sun.xml.writer\" manifest:full-path=\"/\"/>\n\
00267 <manifest:file-entry manifest:media-type=\"text/xml\" manifest:full-path=\"content.xml\"/>\n\
00268 <manifest:file-entry manifest:media-type=\"text/xml\" manifest:full-path=\"styles.xml\"/>\n\
00269 </manifest:manifest>\n";
00270       manifest->writeBlock( (const char*) manifeststring, manifeststring.length() );
00271     }
00272     
00273   KoStoreDevice* styles = m_chain->storageFile( "styles.xml", KoStore::Write );
00274   if ( styles )
00275     {
00276       QCString stylesstring = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\
00277 <!DOCTYPE office:document-styles PUBLIC \"-//OpenOffice.org//DTD OfficeDocument 1.0//EN\" \"office.dtd\">\
00278 <office:document-styles xmlns:office=\"http://openoffice.org/2000/office\" xmlns:style=\"http://openoffice.org/2000/style\"\
00279  xmlns:text=\"http://openoffice.org/2000/text\" xmlns:table=\"http://openoffice.org/2000/table\"\
00280  xmlns:draw=\"http://openoffice.org/2000/drawing\" xmlns:fo=\"http://www.w3.org/1999/XSL/Format\"\
00281  xmlns:xlink=\"http://www.w3.org/1999/xlink\" xmlns:number=\"http://openoffice.org/2000/datastyle\"\
00282  xmlns:svg=\"http://www.w3.org/2000/svg\" xmlns:chart=\"http://openoffice.org/2000/chart\" xmlns:dr3d=\"http://openoffice.org/2000/dr3d\"\
00283  xmlns:math=\"http://www.w3.org/1998/Math/MathML\" xmlns:form=\"http://openoffice.org/2000/form\"\
00284  xmlns:script=\"http://openoffice.org/2000/script\" office:version=\"1.0\">\
00285 <office:styles>\
00286 <style:default-style style:family=\"paragraph\">\
00287 <style:properties style:use-window-font-color=\"true\" style:text-autospace=\"ideograph-alpha\"\
00288  style:punctuation-wrap=\"hanging\" style:line-break=\"strict\" style:writing-mode=\"page\"/>\
00289 </style:default-style>\
00290 <style:default-style style:family=\"table\"/>\
00291 <style:default-style style:family=\"table-row\"/>\
00292 <style:default-style style:family=\"table-column\"/>\
00293 <style:style style:name=\"Standard\" style:family=\"paragraph\" style:class=\"text\"/>\
00294 <style:style style:name=\"Text body\" style:family=\"paragraph\" style:parent-style-name=\"Standard\" style:class=\"text\"/>\
00295 <style:style style:name=\"List\" style:family=\"paragraph\" style:parent-style-name=\"Text body\" style:class=\"list\"/>\
00296 <style:style style:name=\"Header\" style:family=\"paragraph\" style:parent-style-name=\"Standard\" style:class=\"extra\"/>\
00297 <style:style style:name=\"Footer\" style:family=\"paragraph\" style:parent-style-name=\"Standard\" style:class=\"extra\"/>\
00298 <style:style style:name=\"Caption\" style:family=\"paragraph\" style:parent-style-name=\"Standard\" style:class=\"extra\"/>\
00299 <style:style style:name=\"Footnote\" style:family=\"paragraph\" style:parent-style-name=\"Standard\" style:class=\"extra\"/>\
00300 <style:style style:name=\"Endnote\" style:family=\"paragraph\" style:parent-style-name=\"Standard\" style:class=\"extra\"/>\
00301 <style:style style:name=\"Index\" style:family=\"paragraph\" style:parent-style-name=\"Standard\" style:class=\"index\"/>\
00302 <style:style style:name=\"Footnote Symbol\" style:family=\"text\">\
00303 <style:properties style:text-position=\"super 58%\"/>\
00304 </style:style>\
00305 <style:style style:name=\"Endnote Symbol\" style:family=\"text\">\
00306 <style:properties style:text-position=\"super 58%\"/>\
00307 </style:style>\
00308 <style:style style:name=\"Footnote anchor\" style:family=\"text\">\
00309 <style:properties style:text-position=\"super 58%\"/>\
00310 </style:style>\
00311 <style:style style:name=\"Endnote anchor\" style:family=\"text\">\
00312 <style:properties style:text-position=\"super 58%\"/>\
00313 </style:style>\
00314 <text:footnotes-configuration text:citation-style-name=\"Footnote Symbol\" text:citation-body-style-name=\"Footnote anchor\"\
00315  style:num-format=\"1\" text:start-value=\"0\" text:footnotes-position=\"page\" text:start-numbering-at=\"document\"/>\
00316 <text:endnotes-configuration text:citation-style-name=\"Endnote Symbol\" text:citation-body-style-name=\"Endnote anchor\"\
00317  text:master-page-name=\"Endnote\" style:num-format=\"i\" text:start-value=\"0\"/>\
00318 <text:linenumbering-configuration text:number-lines=\"false\" text:offset=\"0.1965inch\" style:num-format=\"1\"\
00319  text:number-position=\"left\" text:increment=\"5\"/>\
00320 </office:styles>\
00321 <office:automatic-styles>\
00322 <style:page-master style:name=\"PM0\">\
00323 <style:properties fo:margin-bottom=\"1.0000inch\" fo:margin-left=\"1.0000inch\" fo:margin-right=\"1.0000inch\" fo:margin-top=\"1.0000inch\"\
00324  fo:page-height=\"11.0000inch\" fo:page-width=\"8.5000inch\" style:print-orientation=\"portrait\">\
00325 <style:footnote-sep style:adjustment=\"left\" style:color=\"#000000\" style:distance-after-sep=\"0.0398inch\"\
00326  style:distance-before-sep=\"0.0398inch\" style:rel-width=\"25%\" style:width=\"0.0071inch\"/>\
00327 </style:properties>\
00328 </style:page-master>\
00329 <style:page-master style:name=\"PM1\">\
00330 <style:properties fo:margin-bottom=\"1.0000inch\" fo:margin-left=\"1.0000inch\" fo:margin-right=\"1.0000inch\" fo:margin-top=\"1.0000inch\"\
00331  fo:page-height=\"11.0000inch\" fo:page-width=\"8.5000inch\" style:print-orientation=\"portrait\">\
00332 <style:footnote-sep style:adjustment=\"left\" style:color=\"#000000\" style:rel-width=\"25%\"/>\
00333 </style:properties>\
00334 </style:page-master>\
00335 </office:automatic-styles>\
00336 <office:master-styles>\
00337 <style:master-page style:name=\"Standard\" style:page-master-name=\"PM0\"/>\
00338 <style:master-page style:name=\"Endnote\" style:page-master-name=\"PM1\"/>\
00339 </office:master-styles>\
00340 </office:document-styles>";
00341       styles->writeBlock( (const char*) stylesstring, stylesstring.length() );
00342     }
00343   
00344   KoStoreDevice* out = m_chain->storageFile( "content.xml", KoStore::Write );
00345 
00346   if( out )
00347       out->writeBlock( (const char*) handler.documentstring.cstr(), strlen(handler.documentstring.cstr()) );
00348 
00349   return KoFilter::OK;
00350 }
00351 
00352 #include "wpimport.moc"
KDE Home | KDE Accessibility Home | Description of Access Keys