filters

kwordfilter.cpp

00001 /* This file is part of the KDE project
00002    Copyright (C) 2001 Ariya Hidayat <ariyahidayat@yahoo.de>
00003 
00004    This library is free software; you can redistribute it and/or
00005    modify it under the terms of the GNU Library General Public
00006    License as published by the Free Software Foundation; either
00007    version 2 of the License, or (at your option) any later version.
00008 
00009    This library is distributed in the hope that it will be useful,
00010    but WITHOUT ANY WARRANTY; without even the implied warranty of
00011    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00012    Library General Public License for more details.
00013 
00014    You should have received a copy of the GNU Library General Public License
00015    along with this library; see the file COPYING.LIB.  If not, write to
00016    the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00017  * Boston, MA 02110-1301, USA.
00018 */
00019 
00020 
00021 #include "kwordfilter.h"
00022 #include "parser.h"
00023 
00024 #include <qfileinfo.h>
00025 #include <qptrlist.h>
00026 #include <qstring.h>
00027 #include <qregexp.h>
00028 
00029 using namespace WP;
00030 
00031 // NOTE: 1 wpu = 1/1200 inch and 1 inch = 72 point
00032 static double WPUToPoint( unsigned wpu )
00033 {
00034     return wpu * 72 / 1200;
00035 }
00036 
00037 // helper class
00038 class KWordFormat
00039 {
00040   public:
00041     bool bold, italic, underline, doubleunderline;
00042     bool striked, superscript, subscript, redline;
00043     bool color, highlight;
00044     int red, green, blue;
00045     int bgred, bggreen, bgblue;
00046     double fontsize;
00047     QString fontface;
00048 
00049     KWordFormat();
00050     QString asXML();
00051 };
00052 
00053 KWordFormat::KWordFormat()
00054 {
00055   bold = italic = underline = doubleunderline = FALSE;
00056   striked = superscript = subscript = redline = FALSE;
00057   color = highlight = FALSE;
00058   red = green = blue = 0;
00059   bgred = bggreen = bgblue = 255;
00060   fontsize = 0.0;
00061   fontface = "";
00062 }
00063 
00064 QString KWordFormat::asXML()
00065 {
00066   QString result;
00067 
00068   if( bold ) result.append( "    <WEIGHT value=\"75\" />\n" );
00069   if( italic ) result.append( "    <ITALIC value=\"1\" />\n" );
00070   if( underline ) result.append( "    <UNDERLINE value=\"1\" />\n" );
00071   if( doubleunderline ) result.append( "    <UNDERLINE value=\"double\" />\n" );
00072   if( striked ) result.append( "    <STRIKEOUT value=\"1\" />\n" );
00073   if( subscript ) result.append( "<VERTALIGN value=\"1\" />\n" );
00074   if( superscript ) result.append( "<VERTALIGN value=\"2\" />\n" );
00075 
00076   if( !fontface.isEmpty() )
00077     result.append( "<FONT name=\"" + fontface + "\" />\n" );
00078 
00079   if( fontsize > 0.0 )
00080     result.append( "    <SIZE value=\"" + QString::number(fontsize) + "\" />\n" );
00081 
00082   if( color )
00083     result.append( "    <COLOR red=\"" + QString::number(red) +
00084                    "\" green=\"" + QString::number(green) +
00085                    "\" blue=\"" + QString::number(blue) + "\" />\n" );
00086   
00087   if( highlight )
00088     result.append( "    <TEXTBACKGROUNDCOLOR red=\"" + QString::number(bgred) +
00089                    "\" green=\"" + QString::number(bggreen) +
00090                    "\" blue=\"" + QString::number(bgblue) + "\" />\n" );
00091 
00092   return result;
00093 }
00094 
00095 static QString mapAlign( Token::Align align )
00096 {
00097   switch( align )
00098   {
00099   case Token::Left: return "left";
00100   case Token::Right: return "right";
00101   case Token::Center: return "center";
00102   case Token::Full: return "justify";
00103   case Token::All: return "justify";
00104   }
00105   return "left";
00106 }
00107 
00108 // NOTE: KWord value for linespace: 72=one, 144=double, ..
00109 // Special case: "0" is normal, "oneandhalf" is 108, "double" is 144.
00110 static QString mapLinespace( double linespace )
00111 {
00112   return QString::number( linespace );
00113 }
00114 
00115 KWordFilter::KWordFilter ():Parser ()
00116 {
00117 }
00118 
00119 bool
00120 KWordFilter::parse (const QString & filename)
00121 {
00122   int frameLeftMargin = 36, frameRightMargin = 36; // quick hack
00123 
00124   if (!Parser::parse (filename))
00125     return FALSE;
00126 
00127   // this will force very last text and formatting to be flushed as well
00128   tokens.append( new Token( Token::HardReturn ) );
00129 
00130   QString text;
00131   QString layout;
00132   QString formats;
00133   int LeftMargin = 0;
00134   int TopMargin = 36;
00135   int RightMargin = 0;
00136   int BottomMargin = 36;
00137   int LeftMarginAdjust = 0;
00138   int RightMarginAdjust = 0;
00139   int lm = 0, rm = 0;
00140   Token::Align align = Token::Left;
00141   double linespace = 1.0;
00142 
00143   root = "";
00144 
00145   KWordFormat flag;
00146   int format_pos;
00147   QString fmt;
00148 
00149   // FIXME replace with doc initial code or default style
00150   format_pos = 0;
00151   fmt = flag.asXML();
00152 
00153   for (QPtrListIterator < Token > it (tokens); it; ++it)
00154     {
00155       unsigned int ucode;
00156       int attr;
00157       int len;
00158       Token *t = it.current ();
00159       Token::Type type = t->type ();
00160 
00161       switch (type)
00162         {
00163 
00164         case Token::Text:
00165           text.append( t->text() );
00166           break;
00167 
00168         case Token::SoftSpace:
00169         case Token::HardSpace:
00170           text.append( " " );
00171           break;
00172 
00173         case Token::SoftReturn:
00174           // ignore
00175           break;
00176 
00177         case Token::AttrOff:
00178         case Token::AttrOn:
00179         case Token::FontColor:
00180         case Token::FontSize:
00181         case Token::FontFace:
00182         case Token::HighlightOn:
00183         case Token::HighlightOff:
00184 
00185           if( type == Token::FontColor )
00186           {
00187             flag.color = true;
00188             flag.red = t->red();
00189             flag.green = t->green();
00190             flag.blue= t->blue();
00191           }
00192           else if( type == Token::HighlightOn )
00193           {
00194             flag.highlight = true;
00195             flag.bgred = t->red();
00196             flag.bggreen = t->green();
00197             flag.bgblue = t->blue();
00198           }
00199           else if( type == Token::HighlightOff )
00200           {
00201             // RGB in the data is last used highlight color
00202             // to go back to normal color, simply XOR would do the trick
00203             flag.highlight = false;
00204             flag.bgred ^= t->red();
00205             flag.bggreen ^= t->green();
00206             flag.bgblue ^= t->blue();
00207           }
00208           else if( type == Token::FontSize )
00209           {
00210             // WP font size is 1/3600th inch
00211             // NOTE 72 pt is 1 inch
00212             if( t->value() > 50 )
00213               flag.fontsize = t->value()*72.0/3600;
00214           }
00215           else if( type == Token::FontFace )
00216           {
00217             flag.fontface = t->fontface();
00218           }
00219           else
00220           {
00221             attr = t->attr();
00222             if( attr == Token::Bold ) flag.bold = ( type == Token::AttrOn );
00223             if( attr == Token::Italic ) flag.italic = ( type == Token::AttrOn );
00224             if( attr == Token::Underline) flag.underline = ( type == Token::AttrOn );
00225             if( attr == Token::DoubleUnderline ) flag.doubleunderline = ( type == Token::AttrOn );
00226             if( attr == Token::StrikedOut ) flag.striked = ( type == Token::AttrOn );
00227             if( attr == Token::Subscript ) flag.subscript = ( type == Token::AttrOn );
00228             if( attr == Token::Superscript ) flag.superscript = ( type == Token::AttrOn );
00229             if( attr == Token::Redline ) flag.redline = ( type == Token::AttrOn );
00230           }
00231 
00232           // process previous fmt first
00233           len = text.length() - format_pos;
00234           formats.append ( "<FORMAT id=\"1\" pos=\"" + QString::number( format_pos ) +
00235                              "\" len=\"" + QString::number( len )+ "\">\n" );
00236           formats.append( fmt );
00237           formats.append ( "</FORMAT>\n" );
00238 
00239           // now current format
00240           fmt = flag.asXML();
00241           format_pos= text.length();
00242 
00243           break;
00244 
00245         case Token::HardReturn:
00246         case Token::DormantHardReturn:
00247 
00248           // last formatting not flushed
00249           // SEE ABOVE
00250           len = text.length() - format_pos;
00251           formats.append ( "  <FORMAT id=\"1\" pos=\"" + QString::number( format_pos ) +
00252                              "\" len=\"" + QString::number( len )+ "\">\n" );
00253           formats.append( "  " + fmt );
00254           formats.append ( "  </FORMAT>\n" );
00255 
00256           layout = "";
00257           layout.append( "<LAYOUT>\n" );
00258           layout.append( "  <NAME value=\"Standard\" />\n" );
00259           layout.append( "  <FLOW align=\"" + mapAlign( align ) + "\" />\n" );
00260           layout.append( "  <LINESPACING value=\"" + mapLinespace( linespace) + "\" />\n" );
00261           layout.append( "  <LEFTBORDER width=\"0\" style=\"0\" />\n" );
00262           layout.append( "  <RIGHTBORDER width=\"0\" style=\"0\" />\n" );
00263           layout.append( "  <TOPBORDER width=\"0\" style=\"0\" />\n" );
00264           layout.append( "  <BOTTOMBORDER width=\"0\" style=\"0\" />\n" );
00265           lm = LeftMargin + LeftMarginAdjust - frameLeftMargin;
00266           rm = RightMargin + RightMarginAdjust - frameRightMargin;
00267           layout.append( "  <INDENTS left=\"" + QString::number( QMAX( 0, lm ) ) + "\"" + 
00268                          " right=\"" + QString::number( QMAX( 0 , rm ) ) + "\"" +
00269                          " first=\"0\" />\n" );
00270           layout.append( "  <OFFSETS />\n" );
00271           layout.append( "  <PAGEBREAKING />\n" );
00272           layout.append( "  <COUNTER />\n" );
00273           layout.append( "  <FORMAT id=\"1\">\n" );
00274           layout.append( "    <WEIGHT value=\"50\" />\n" );
00275           layout.append( "    <ITALIC value=\"0\" />\n" );
00276           layout.append( "    <UNDERLINE value=\"0\" />\n" );
00277           layout.append( "    <STRIKEOUT value=\"0\" />\n" );
00278           layout.append( "    <CHARSET value=\"0\" />\n" );
00279           layout.append( "    <VERTALIGN value=\"0\" />\n" );
00280           layout.append( "  </FORMAT>\n" );
00281           layout.append( "</LAYOUT>\n" );
00282 
00283           // encode text for XML-ness
00284           // FIXME could be faster without QRegExp
00285           text.replace( QRegExp("&"), "&amp;" );
00286           text.replace( QRegExp("<"), "&lt;" );
00287           text.replace( QRegExp(">"), "&gt;" );
00288           text.replace( QRegExp("\""), "&quot;" );
00289           text.replace( QRegExp("'"), "&apos;" );
00290 
00291           // construct the <PARAGRAPH>
00292           root.append( "<PARAGRAPH>\n" );
00293           root.append( "<TEXT>" + text + "</TEXT>\n" );
00294           root.append( "<FORMATS>\n");
00295           root.append( formats );
00296           root.append( "</FORMATS>\n");
00297           root.append( layout );
00298           root.append( "</PARAGRAPH>\n" );
00299 
00300           // for the next paragraph
00301           text = "";
00302           formats = "";
00303           format_pos = 0;
00304           fmt = flag.asXML();
00305 
00306           break;
00307 
00308         case Token::HardHyphen:
00309           text.append( "-" );
00310           break;
00311 
00312         case Token::LeftMargin:
00313           LeftMargin = (int) WPUToPoint( t->value() );
00314           break;
00315 
00316         case Token::RightMargin:
00317           RightMargin = (int) WPUToPoint( t->value() );
00318           break;
00319 
00320         case Token::TopMargin:
00321           TopMargin = (int) WPUToPoint( t->value() );
00322           break;
00323 
00324         case Token::BottomMargin:
00325           BottomMargin = (int) WPUToPoint( t->value() );
00326           break;
00327 
00328         case Token::LeftMarginAdjust:
00329           LeftMarginAdjust = (int)WPUToPoint( t->value() );
00330           break;
00331 
00332         case Token::RightMarginAdjust:
00333           RightMarginAdjust = (int)WPUToPoint( t->value() );
00334           break;
00335 
00336         case Token::Justification:
00337           align = t->align();
00338           break;
00339 
00340         case Token::Linespace:
00341           // NOTE assume 1.0 = 12 pt, 2.0 = 24 pt, 1.5=18
00342           // from parser.cpp, linespace is stored as 1/65536th inch
00343           linespace = t->value() * 12.0 / 65536;
00344           break;
00345 
00346         case Token::ExtChar:
00347           ucode = Parser::ExtCharToUnicode (t->charset (), t->charcode ());
00348           if (ucode == 0) ucode = 32;
00349           text.append( QChar (ucode) );
00350           break;
00351 
00352         case Token::TabHardFlushRight:
00353           // FIXME
00354           text.append( "    " );
00355           break;
00356 
00357         case Token::None:
00358         default:
00359           break;
00360         };
00361 
00362     }
00363 
00364   QString content = root;
00365 
00366   root = "<!DOCTYPE DOC>\n";
00367   root.append( "<DOC mime=\"application/x-kword\" syntaxVersion=\"2\" editor=\"KWord\">\n");
00368 
00369   // quick hack, think of something better in the future
00370   LeftMargin = RightMargin = 36;
00371 
00372   // paper definition
00373   root.append( "<PAPER width=\"595\" height=\"841\" format=\"1\" fType=\"0\" orientation=\"0\" hType=\"0\" columns=\"1\">\n" );
00374   root.append( " <PAPERBORDERS left=\"" + QString::number(frameLeftMargin) +
00375                "\" right=\"" + QString::number(frameRightMargin) +
00376                "\" top=\"" + QString::number(TopMargin) +
00377                "\" bottom=\"" + QString::number(BottomMargin) + "\" />\n" );
00378   root.append( "</PAPER>\n" );
00379 
00380   root.append( "<ATTRIBUTES standardpage=\"1\" hasFooter=\"0\" hasHeader=\"0\" processing=\"0\" />\n" );
00381 
00382   root.append( "<FRAMESETS>\n" );
00383   root.append( "<FRAMESET removable=\"0\" frameType=\"1\" frameInfo=\"0\" autoCreateNewFrame=\"1\">\n" );
00384   root.append( "<FRAME right=\"567\" left=\"28\" top=\"42\" bottom=\"799\" />\n" );
00385   root.append( content );
00386   root.append( "</FRAMESET>\n" );
00387   root.append( "</FRAMESETS>\n" );
00388 
00389   root.append( "</DOC>\n" );
00390 
00391   // in case no document summary is available, then make default
00392   // set so that basename of the filename becomes the document title
00393   // e.g /home/ariya/test/resume.wpd will have 'resume' as the title
00394   if( docTitle.isEmpty() )
00395   {
00396     QFileInfo info( filename );
00397     docTitle = info.baseName();
00398   }
00399 
00400   // create document information
00401   documentInfo = "<!DOCTYPE document-info>\n";
00402 
00403   documentInfo += "<document-info>\n";
00404   documentInfo += "<log><text></text></log>\n";
00405 
00406   documentInfo += "<author>\n";
00407   documentInfo += "<full-name>" + docAuthor + "</full-name>\n";
00408   documentInfo += "<title></title>\n";
00409   documentInfo += "<company></company>\n";
00410   documentInfo += "<email></email>\n";
00411   documentInfo += "<telephone></telephone>\n";
00412   documentInfo += "</author>\n";
00413 
00414   documentInfo += "<about>\n";
00415   documentInfo += "<abstract><![CDATA[" + docAbstract + "]]></abstract>\n";
00416   documentInfo += "<title>" + docTitle + "</title>\n";
00417   documentInfo += "</about>\n";
00418 
00419   documentInfo += "</document-info>";
00420 
00421   return TRUE;
00422 }
KDE Home | KDE Accessibility Home | Description of Access Keys