filters

libmswrite.cpp

00001 /* This file is part of the LibMSWrite Library
00002    Copyright (C) 2001-2003 Clarence Dang <clarencedang@users.sourceforge.net>
00003 
00004    This library is free software; you can redistribute it and/or
00005    modify it under the terms of the GNU Library General Public
00006    License Version 2 as published by the Free Software Foundation.
00007 
00008    This library is distributed in the hope that it will be useful,
00009    but WITHOUT ANY WARRANTY; without even the implied warranty of
00010    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00011    Library General Public License Version 2 for more details.
00012 
00013    You should have received a copy of the GNU Library General Public License
00014    Version 2 along with this library; see the file COPYING.LIB.  If not,
00015    write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00016  * Boston, MA 02110-1301, USA.
00017 
00018    LibMSWrite Project Website:
00019    http://sourceforge.net/projects/libmswrite/
00020 */
00021 
00022 #include <assert.h>
00023 #include <stdio.h>
00024 #include <string.h>
00025 #include <ctype.h>
00026 
00027 #include "libmswrite.h"
00028 
00029 namespace MSWrite
00030 {
00031     FormatInfo::FormatInfo () : m_nextChar (0),
00032                                             m_leftMargin (0xFFFF), m_rightMargin (0xFFFF),
00033                                             m_fontTable (NULL)
00034     {
00035         // m_type =
00036     }
00037 
00038     FormatInfo::~FormatInfo ()
00039     {
00040     }
00041 
00042     bool FormatInfo::readFromDevice (void)
00043     {
00044     CHECK_DEVICE;
00045 
00046     #ifdef DEBUG_FORMATINFO
00047         if (m_type == ParaType)
00048             m_device->debug ("\n<<<< libmswrite.cpp::FormatInfo::readFromDevice (ParaType) >>>>\n");
00049         else    // if (m_type == CharType)
00050             m_device->debug ("\n<<<< libmswrite.cpp::FormatInfo::readFromDevice (CharType) >>>>\n");
00051     #endif
00052 
00053         int formatInfoPageNum;
00054         if (m_type == ParaType)
00055             formatInfoPageNum = m_header->getNumPageParaInfo ();
00056         else    // if (m_type == CharType)
00057             formatInfoPageNum = m_header->getNumPageCharInfo ();
00058 
00059 
00060     #ifdef DEBUG_FORMATINFO
00061         m_device->debug ("numpages format info=", formatInfoPageNum);
00062     #endif
00063 
00064         // you have to have information pages that cover the _entire_ document (if it's not empty that is)
00065         if (m_header->getNumCharBytes () && formatInfoPageNum == 0)
00066         {
00067             if (m_type == ParaType)
00068             {
00069                 ErrorAndQuit (Error::InvalidFormat, "no paragraph formatting information page\n");
00070             }
00071             else    // if (m_type == CharType)
00072             {
00073                 ErrorAndQuit (Error::InvalidFormat, "no character formatting information page\n");
00074             }
00075         }
00076 
00077         // seek to start of info pages
00078         if (!m_device->seek (((m_type == ParaType) ?
00079                                         m_header->getPageParaInfo () : m_header->getPageCharInfo ()) * 128, SEEK_SET))
00080             return false;
00081 
00082         //
00083         // read in every info page
00084         // (this eats up all your memory...)
00085         //
00086         // The reason this is done is because infoPages (formatting information) are required
00087         // in between reads of text and on most devices, continually seeking back and forth
00088         // between the text pages and information pages is inconvenient and inefficient to say
00089         // the least.
00090         //
00091         // Generally speaking there is more text than formatting information some it is probably
00092         // cheaper (memory-wise) to cache the formatting in memory, rather than the text.
00093         //
00094         // A few more good reasons for caching formatting information:
00095         //
00096         // * we don't actually expand the formatting pages into the full FormatPointers
00097         //   and FormatProperty's (until begin()/next() are called) so we save quite a bit of
00098         //   memory
00099         // * some filters need to know information like the number of objects in advance
00100         //   so an extra parse of the formatting information (already cached :)) is required
00101         //
00102         for (int i = 0; i < formatInfoPageNum; i++)
00103         {
00104             if (!m_formatInfoPageList.addToBack ())
00105                 ErrorAndQuit (Error::OutOfMemory, "could not allocate memory for an information page\n");
00106 
00107             FormatInfoPage &fip = *m_formatInfoPageList.begin (false);
00108             if (m_device->bad ()) return false;
00109 
00110             fip.setHeader (m_header);
00111             fip.setDevice (m_device);
00112             fip.setType (m_type);
00113             
00114             if (m_type == ParaType)
00115                 fip.setMargins (m_leftMargin, m_rightMargin);
00116             else    // if (m_type == CharType)
00117                 fip.setFontTable (m_fontTable);
00118 
00119             if (!fip.readFromDevice ())
00120                 return false;
00121         }
00122 
00123         return true;
00124     }
00125 
00126     bool FormatInfo::writeToDevice (const void *defaultProperty)
00127     {
00128     CHECK_DEVICE;
00129 
00130     #ifdef DEBUG_FORMATINFO
00131         if (m_type == ParaType)
00132             m_device->debug ("\n<<<< libmswrite.cpp::FormatInfo::writeToDevice (ParaType) >>>>\n");
00133         else    // if (m_type == CharType)
00134             m_device->debug ("\n<<<< libmswrite.cpp::FormatInfo::writeToDevice (CharType) >>>>\n");
00135     #endif
00136 
00137         // set page number of formatting info in header
00138         if (m_type == ParaType)
00139             m_header->setPageParaInfo (m_device->tellInternal () / 128);
00140         else    // if (m_type == CharType)
00141         {
00142             // ms programmers were so space conservative that they didn't
00143             // store charInfo page
00144             //m_header->setPageCharInfo (m_device->tellInternal () / 128);
00145         }
00146 
00147         //
00148         // No formatting table?
00149         //
00150         // Mimick what Write does and make one.
00151         // It seems that only a fontTable is mandatory but this is conveniently
00152         // updated when adding to a character formatting page.
00153         //
00154         if (!m_formatInfoPageList.getNumElements ())
00155         {
00156         #ifdef DEBUG_FORMATINFO
00157             m_device->debug ("\tno formatting information pages, creating one\n");
00158         #endif
00159         
00160             // --- but we should only get in here if numCharBytes = 0 ---
00161             
00162             if (m_header->getNumCharBytes ())
00163             {
00164                 if (m_type == ParaType)
00165                     m_device->error (Error::Warn, "data but no paragraph formatting info\n");
00166                 else    // if (m_type == CharType)
00167                     m_device->error (Error::Warn, "data but no character formatting info\n");
00168             }
00169     
00170             long currentOffset = m_device->tellInternal ();
00171                 if (!m_device->seekInternal (128 + m_header->getNumCharBytes (), SEEK_SET)) return false;
00172                 if (!add (defaultProperty, true/* force */)) return false;
00173             if (!m_device->seekInternal (currentOffset, SEEK_SET)) return false;
00174         }
00175 
00176         List <FormatInfoPage>::Iterator it;
00177         for (it = m_formatInfoPageList.begin (); it != m_formatInfoPageList.end (); ++it)
00178         {
00179             FormatInfoPage &fip = *it;
00180 
00181             // OPT: should already have done in add but we have to handle the impexp case
00182             fip.setHeader (m_header);
00183             fip.setDevice (m_device);
00184             fip.setType (m_type);
00185             
00186             if (m_type == ParaType)
00187                 fip.setMargins (m_leftMargin, m_rightMargin);
00188             else    // if (m_type == CharType)
00189                 fip.setFontTable (m_fontTable);
00190 
00191             if (!fip.writeToDevice ())
00192                 return false;
00193         }
00194 
00195         return true;
00196     }
00197 
00198     void *FormatInfo::begin (void)
00199     {
00200         m_nextChar = 0;
00201 
00202         m_formatInfoPageIterator = m_formatInfoPageList.begin ();
00203 
00204         if (m_formatInfoPageIterator == m_formatInfoPageList.end ())
00205             return NULL;
00206 
00207         void *ret = (*m_formatInfoPageIterator).begin ();
00208 
00209         if (ret)
00210         {
00211             if (m_type == ParaType)
00212                 m_nextChar = ((FormatParaProperty *) ret)->getAfterEndCharByte ();
00213             else    // if (m_type == CharType)
00214                 m_nextChar = ((FormatCharProperty *) ret)->getAfterEndCharByte ();
00215         }
00216 
00217         return ret;
00218     }
00219 
00220     void *FormatInfo::next (void)
00221     {
00222     #ifdef CHECK_INTERNAL
00223         if (!m_device)
00224         {
00225             CHECK_DEVICE_ERROR;
00226             return NULL;
00227         }
00228     #endif
00229 
00230         void *ret = NULL;
00231         if ((*m_formatInfoPageIterator).end ())
00232         {
00233             m_formatInfoPageIterator++;
00234 
00235             // out of pages?
00236             if (m_formatInfoPageIterator == m_formatInfoPageList.end ())
00237                 return NULL;
00238 
00239             if ((*m_formatInfoPageIterator).getFirstCharByte () != m_nextChar)
00240                 m_device->error (Error::Warn, "FormatInfoPage::firstCharByte does not flow on from nextChar\n");
00241 
00242             ret = (*m_formatInfoPageIterator).begin ();
00243         }
00244 
00245         if (!ret)
00246             ret = (*m_formatInfoPageIterator).next ();
00247 
00248         if (ret)
00249         {
00250             if (m_type == ParaType)
00251                 m_nextChar = ((FormatParaProperty *) ret)->getAfterEndCharByte ();
00252             else    // if (m_type == CharType)
00253                 m_nextChar = ((FormatCharProperty *) ret)->getAfterEndCharByte ();
00254         }
00255 
00256         return ret;
00257     }
00258 
00259     bool FormatInfo::end (void) /*const*/
00260     {
00261         return m_formatInfoPageIterator == m_formatInfoPageList.end ();
00262     }
00263 
00264     bool FormatInfo::add (const void *property, const bool force)
00265     {
00266     CHECK_DEVICE;
00267 
00268     #ifdef DEBUG_FORMATINFO
00269         m_device->debug (">>>> FormatInfo::add <<<<\n");
00270     #endif
00271     
00272         DWord currentChar = m_device->tellInternal () - 128;
00273         
00274         // so that export filter writers can be lazy...
00275         if (m_nextChar == currentChar && !force)
00276         {
00277         #ifdef DEBUG_FORMATINFO
00278             m_device->debug ("\tEmpty FormatProperty, ignoring\n");
00279         #endif
00280             return true;
00281         }
00282 
00283         bool needToAllocate = false;
00284 
00285         if (m_formatInfoPageList.getNumElements ())
00286         {
00287             FormatInfoPage &fip = *m_formatInfoPageList.begin (false);
00288             if (!fip.add (property))
00289             {
00290                 // a real error
00291                 if (m_device->bad ())
00292                     return false;
00293 
00294                 needToAllocate = true;
00295             }
00296         }
00297         else
00298             needToAllocate = true;
00299 
00300         if (needToAllocate)
00301         {
00302             #ifdef DEBUG_FORMATINFO
00303                 m_device->debug ("\tneedToAllocate=yes, FormatInfoPage::firstCharByte=", m_nextChar);
00304             #endif
00305                 
00306             if (!m_formatInfoPageList.addToBack ())
00307                 ErrorAndQuit (Error::OutOfMemory, "could not allocate memory for new formatInfoPage\n");
00308             
00309             FormatInfoPage &fip = *m_formatInfoPageList.begin (false);
00310             fip.setDevice (m_device);
00311             fip.setFirstCharByte (m_nextChar);
00312             fip.setType (m_type);
00313             
00314             if (m_type == ParaType)
00315                 fip.setMargins (m_leftMargin, m_rightMargin);
00316             else    // if (m_type == CharType)
00317             {
00318                 assert (m_fontTable);
00319                 fip.setFontTable (m_fontTable);
00320             }
00321             
00322             if (!fip.add (property))
00323                 return false;
00324         }
00325 
00326         m_nextChar = currentChar;
00327         return true;
00328     }
00329 
00330 
00331     Generator::Generator ()
00332     {
00333     }
00334 
00335     Generator::~Generator ()
00336     {
00337     }
00338 
00339     bool Generator::processText (const Byte *string, bool willReachEndOfParagraph)
00340     {
00341         //
00342         // Look out for characters in the string and emit signals as appropriate:
00343         //
00344         //  1   pageNumber
00345         //  10  newLine
00346         //  13  carriageReturn
00347         //  12  pageBreak
00348         //  31  optionalHyphen
00349         //  ?   text
00350         //
00351 
00352         const int outBufferMaxLen = 1024;
00353         Byte outBuffer [outBufferMaxLen];
00354         DWord outBufferLen = 0;
00355 
00356         for (; *string; string++)
00357         {
00358             // buffer full?
00359             if (outBufferLen >= outBufferMaxLen - 1)
00360             {
00361                 // flush
00362                 outBuffer [outBufferMaxLen - 1] = '\0';
00363                 if (!writeText (outBuffer)) return false;
00364                 outBufferLen = 0;
00365             }
00366         
00367             switch (*string)
00368             {
00369             // write text, generate signals for special characters, write more text...
00370             case 1:     // pageNumber anchor
00371             case 12:        // pageBreak (some silly document might have this in the middle of a paragraph!)
00372             case 13:
00373             case 10:        // newLine (some silly document _does_ have a newline in the middle of a paragraph!)
00374             case 31:    // optionalHyphen (aka "soft hyphen" an invisible hyphen, unless at end of line)
00375                 // output text before this character
00376                 if (outBufferLen)
00377                 {
00378                     outBuffer [outBufferLen] = 0;   // null terminate
00379                     if (!writeText (outBuffer)) return false;
00380                     outBufferLen = 0;
00381                 }
00382 
00383                 // generate signal
00384                 switch (*string)
00385                 {
00386                 case 1: if (!writePageNumber ()) return false;  break;
00387                 case 12:    if (!writePageBreak ()) return false;   break;
00388                 case 10:    if (!writeNewLine (willReachEndOfParagraph && string [1] == 0)) return false;   break;
00389                 case 13:    if (!writeCarriageReturn ()) return false;  break;
00390                 case 31: if (!writeOptionalHyphen ()) return false; break;
00391                 }
00392 
00393                 break;
00394 
00395             // normal text character
00396             default:
00397                 outBuffer [outBufferLen++] = *string;
00398                 break;
00399             }   // switch (*string) {
00400         }   // for (; *string; string++)    {
00401 
00402         // flush
00403         if (outBufferLen)
00404         {
00405             outBuffer [outBufferLen] = 0;
00406             if (!writeText (outBuffer)) return false;
00407         }
00408 
00409         return true;
00410     }
00411 
00412 
00413     InternalParser::InternalParser () : m_header (NULL),
00414                                                     m_sectionTable (NULL),
00415                                                     m_pageLayout (NULL),
00416                                                     m_pageTable (NULL),
00417                                                     m_fontTable (NULL),
00418                                                     m_paragraphInfo (NULL),
00419                                                     m_characterInfo (NULL),
00420                                                     m_image (NULL),
00421                                                     m_ole (NULL)
00422     {
00423     }
00424 
00425     InternalParser::~InternalParser ()
00426     {
00427         delete m_ole;
00428         delete m_image;
00429 
00430         delete m_characterInfo;
00431         delete m_paragraphInfo;
00432         delete m_fontTable;
00433         delete m_pageTable;
00434         delete m_pageLayout;
00435         delete m_sectionTable;
00436         delete m_header;
00437     }
00438 
00439     // use this parser for "import" filters
00440     bool InternalParser::parse (void)
00441     {
00442         if (!m_device)
00443         {
00444             fprintf (stderr, "INTERNAL ERROR: InternalParser::parse() called without a device\n");
00445             return false;   // cannot use ErrorAndQuit() because that calls m_device->error()
00446         }
00447 
00448         if (!m_generator)
00449             ErrorAndQuit (Error::InternalError, "generator not passed to parser\n");
00450 
00451         //
00452         // allocate memory
00453         //
00454 
00455         m_header = new Header;
00456         if (!m_header)
00457             ErrorAndQuit (Error::OutOfMemory, "could not allocate memory for header\n");
00458 
00459         m_sectionTable = new SectionTable;
00460         if (!m_sectionTable)
00461             ErrorAndQuit (Error::OutOfMemory, "could not allocate memory for sectionTable\n");
00462 
00463         m_pageLayout = new PageLayout;
00464         if (!m_pageLayout)
00465             ErrorAndQuit (Error::OutOfMemory, "could not allocate memory for pageLayout\n");
00466 
00467         m_pageTable = new PageTable;
00468         if (!m_pageTable)
00469             ErrorAndQuit (Error::OutOfMemory, "could not allocate memory for pageTable\n");
00470 
00471         m_fontTable = new FontTable;
00472         if (!m_fontTable)
00473             ErrorAndQuit (Error::OutOfMemory, "could not allocate memory for fontTable\n");
00474 
00475         m_paragraphInfo = new FormatInfo;
00476         if (!m_paragraphInfo)
00477             ErrorAndQuit (Error::OutOfMemory, "could not allocate memory for paragraphInfo\n");
00478 
00479         m_characterInfo = new FormatInfo;
00480         if (!m_characterInfo)
00481             ErrorAndQuit (Error::OutOfMemory, "could not allocate memory for charInfo\n");
00482 
00483         //
00484         // Read everything from .WRI file
00485         //
00486 
00487         m_header->setDevice (m_device);
00488         if (!m_header->readFromDevice ()) return false;
00489         DWord numCharBytes = m_header->getNumCharBytes ();
00490 
00491         m_sectionTable->setHeader (m_header);
00492         m_sectionTable->setDevice (m_device);
00493         if (!m_sectionTable->readFromDevice ())
00494             return false;
00495 
00496         m_pageLayout->setHeader (m_header);
00497         m_pageLayout->setDevice (m_device);
00498         if (!m_pageLayout->readFromDevice ())
00499             return false;
00500 
00501         m_pageTable->setPageNumberStart (m_pageLayout->getPageNumberStart ());
00502         m_pageTable->setHeader (m_header);
00503         m_pageTable->setDevice (m_device);
00504         if (!m_pageTable->readFromDevice ())
00505             return false;
00506 
00507         m_fontTable->setHeader (m_header);
00508         m_fontTable->setDevice (m_device);
00509         if (!m_fontTable->readFromDevice ())
00510             return false;
00511 
00512         m_paragraphInfo->setHeader (m_header);
00513         m_paragraphInfo->setDevice (m_device);
00514         m_paragraphInfo->setType (ParaType);
00515         m_paragraphInfo->setMargins (m_pageLayout->getLeftMargin (), m_pageLayout->getRightMargin ());
00516         if (!m_paragraphInfo->readFromDevice ())
00517             return false;
00518 
00519         m_characterInfo->setHeader (m_header);
00520         m_characterInfo->setDevice (m_device);
00521         m_characterInfo->setType (CharType);
00522         m_characterInfo->setFontTable (m_fontTable);
00523         if (!m_characterInfo->readFromDevice ())
00524             return false;
00525 
00526 #if 0
00527         DocumentInfo documentInfo;
00528         FormatParaProperty *paraProperty = (FormatParaProperty *) m_paragraphInfo->begin ();
00529         int numObjects = 0;
00530         while (paraProperty)
00531         {
00532             if (paraProperty->getIsObject ())
00533                 numObjects++;
00534             if (paraProperty->getIsHeader ())
00535             {
00536                 documentInfo.setHasHeader (true);
00537                 if (paraProperty->getIsOnFirstPage ())
00538                     documentInfo.setHasHeaderOnFirstPage (true);
00539             }
00540             if (paraProperty->getIsFooter ())
00541             {
00542                 documentInfo.setHasFooter (true);
00543                 if (paraProperty->getIsOnFirstPage ())
00544                     documentInfo.setHasFooterOnFirstPage (true);
00545             }
00546             paraProperty = (FormatParaProperty *) m_paragraphInfo->next ();
00547         }
00548         documentInfo.setNumObjects (numObjects);
00549 #endif
00550 
00551         // Get Ready!
00552         //
00553 
00554         enum Section { InNothing, InBody, InHeader, InFooter } inWhat = InNothing;
00555 
00556         // must call writeBody*() even if document doesn't have one
00557         bool wroteBody = false;
00558 
00559         PagePointer *pp = m_pageTable->begin ();
00560         if (m_device->bad ()) return false;
00561 
00562         //
00563         // Signal callbacks
00564         //
00565         #ifdef DEBUG_INTERNALPARSER
00566             m_device->debug ("@@@ InternalParser: start of document write\n");
00567         #endif
00568 
00569         if (!m_generator->writeDocumentBegin (m_header->getMagic (), m_pageLayout)) return false;
00570         m_generator->sigProgress (0);
00571 
00572         // start of text
00573         if (!m_device->seekInternal (1 * 128, SEEK_SET))
00574             return false;
00575 
00576         FormatParaProperty *paraProp = (FormatParaProperty *) m_paragraphInfo->begin ();
00577         if (m_device->bad ()) return false;
00578         FormatCharProperty *charProp = (FormatCharProperty *) m_characterInfo->begin ();
00579         if (m_device->bad ()) return false;
00580         DWord paraStartByte = 0;
00581         
00582         if (numCharBytes) while (paraProp)  // loop if not empty document
00583         {
00584         #ifdef DEBUG_INTERNALPARSER
00585             m_device->debug ("@@@ InternalParser: Start of loop - section write\n");
00586         #endif
00587             //
00588             // Section work
00589             //
00590 
00591             enum Section inWhatNext = InNothing;
00592             if (paraProp->getIsFooter ())
00593                 inWhatNext = InFooter;
00594             else if (paraProp->getIsHeader ())
00595                 inWhatNext = InHeader;
00596             else
00597                 inWhatNext = InBody;
00598 
00599             // beginning of a new section?
00600             if (inWhatNext != inWhat)
00601             {
00602                 // end last thing we were in
00603                 switch (inWhat)
00604                 {
00605                 case InFooter:  if (!m_generator->writeFooterEnd ()) return false; else break;
00606                 case InHeader:  if (!m_generator->writeHeaderEnd ()) return false; else break;
00607                 case InBody:    if (!m_generator->writeBodyEnd ()) return false; else break;
00608                 default:    break;  // keep compiler happy
00609                 }
00610 
00611                 // start next section
00612                 switch (inWhat = inWhatNext)
00613                 {
00614                 case InFooter:  if (!m_generator->writeFooterBegin ()) return false; else break;
00615                 case InHeader:  if (!m_generator->writeHeaderBegin ()) return false; else break;
00616                 case InBody:    if (!m_generator->writeBodyBegin ()) return false;
00617                     // if there's not pageTable, manually signal start of page for the first and last time
00618                     if (!pp)
00619                         if (!m_generator->writePageNew ())
00620                             return false;
00621 
00622                     wroteBody = true;
00623                 default:    break;  // keep compiler happy
00624                 }
00625             }
00626 
00627 
00628             //
00629             // start paragraph
00630             //
00631 
00632             bool paraIsText = paraProp->getIsText ();
00633             int objectType = ObjectType::NotObject;
00634 
00635             DWord paraEndByte, paraAfterEndByte;
00636             paraAfterEndByte = paraProp->getAfterEndCharByte ();
00637             paraEndByte = paraAfterEndByte - 1;
00638 
00639             if (paraIsText)
00640             {
00641             #ifdef DEBUG_INTERNALPARSER
00642                 m_device->debug ("@@@ InternalParser: Start of paragraph write\n");
00643             #endif
00644 
00645                 // signal paragraph
00646                 if (!m_generator->writeParaInfoBegin (paraProp, NULL, NULL)) return false;
00647             }
00648             else
00649             {
00650                 //
00651                 // Determine whether the object is OLE or not
00652                 // Yes, I know this isn't entirely clean but
00653                 // name a cleaner and more efficient way of doing this...
00654                 //
00655 
00656                 Byte data [2];
00657                 Word mappingMode;
00658 
00659                 if (!m_device->readInternal (data, 2)) return false;
00660                 ReadWord (mappingMode, data);
00661                 if (!m_device->seekInternal (-2, SEEK_CUR)) return false;   // ungetc()x2
00662 
00663                 switch (mappingMode)
00664                 {
00665                 case 0xE4:
00666                     objectType = ObjectType::OLE;
00667 
00668                     m_ole = new OLE;
00669                     if (!m_ole)
00670                         ErrorAndQuit (Error::OutOfMemory, "could not allocate memory for OLE object\n");
00671 
00672                     m_ole->setDevice (m_device);
00673                     if (!m_ole->readFromDevice ())
00674                         return false;
00675 
00676                     if (!m_generator->writeParaInfoBegin (paraProp, m_ole, NULL)) return false;
00677                     break;
00678                 case 0xE3:  // monochrome bitmap?
00679                     objectType = ObjectType::BMP;
00680 
00681                     m_image = new Image;
00682                     if (!m_image)
00683                         ErrorAndQuit (Error::OutOfMemory, "could not allocate memory for BMP image object\n");
00684 
00685                     m_image->setDevice (m_device);
00686                     if (!m_image->readFromDevice ())
00687                         return false;
00688 
00689                     if (!m_generator->writeParaInfoBegin (paraProp, NULL, m_image)) return false;
00690                     break;
00691                 default:
00692                     objectType = ObjectType::WMF;
00693 
00694                     m_image = new Image;
00695                     if (!m_image)
00696                         ErrorAndQuit (Error::OutOfMemory, "could not allocate memory for WMF image object\n");
00697 
00698                     m_image->setDevice (m_device);
00699                     if (!m_image->readFromDevice ())
00700                         return false;
00701 
00702                     if (!m_generator->writeParaInfoBegin (paraProp, NULL, m_image)) return false;
00703                     break;
00704                 }
00705 
00706                 // image: BMP/WMF
00707                 if (objectType != ObjectType::OLE)
00708                 {
00709                     if (m_image->getNumHeaderBytes () + m_image->getNumDataBytes ()
00710                             != paraAfterEndByte - paraStartByte)
00711                     {
00712                         m_device->error (Error::Warn, "imageHeader: numHeaderBytes + numDataBytes != paragraph length\n");
00713 
00714                         // we had better seek to where the paragraphs expect us to be to avoid trouble
00715                         if (!m_device->seekInternal (paraAfterEndByte + 128, SEEK_SET)) return false;
00716                     }
00717                 }
00718             }
00719 
00720         #ifdef DEBUG_INTERNALPARSER
00721             m_device->debug ("@@@ InternalParser: Start of text write\n");
00722         #endif
00723 
00724             DWord uptoByte = paraStartByte;
00725 
00726             // loop until we hit end of paragraph (getting one CharProperty on every iteration)
00727             while (uptoByte < paraAfterEndByte)
00728             {
00729                 if (charProp)
00730                 {
00731                     if (uptoByte >= charProp->getAfterEndCharByte ())
00732                     {
00733                         charProp = (FormatCharProperty *) m_characterInfo->next ();
00734                         if (m_device->bad ()) return false;
00735                     }
00736 
00737                 #ifdef DEBUG_INTERNALPARSER
00738                     m_device->debug ("@@@ InternalParser: character write\n");
00739                 #endif
00740 
00741                     if (!m_generator->writeCharInfoBegin (charProp)) return false;
00742                 }
00743 
00744                 // ultimately aim for end of CharProperty block; if that's not possible, aim for end of paragraph
00745                 DWord aimUltimateByte = charProp->getEndCharByte () > paraEndByte ? paraEndByte : charProp->getEndCharByte ();
00746 
00747                 while (uptoByte <= aimUltimateByte)
00748                 {
00749                     // flag to tell us to generate pageNewWrite/pageTable signal, after writing some normal text
00750                     bool pageTableAck = false;
00751 
00752                     // short-term goal (before end of CharProperty, ParaProperty or page)
00753                     DWord aimByte = aimUltimateByte;
00754 
00755                     // we want to efficiently send pageNewWrite signals which might be sandwhiched inside
00756                     // a CharProperty block (note: we do NOT end and then restart a CharProperty block because of a writePageNew())
00757                     if (pp)
00758                     {
00759                         if (pp->getFirstCharByte () <= aimByte)
00760                         {
00761                             pageTableAck = true;
00762                             if (pp->getFirstCharByte ())
00763                                 // - 1 is because we want the PageNew signal called before the character
00764                                 aimByte = pp->getFirstCharByte () - 1;
00765                             else
00766                                 aimByte = 0xFFFFFFFF;   // 0 - 1 :)
00767 
00768                         #ifdef DEBUG_INTERNALPARSER
00769                             m_device->debug ("@@@ InternalParser: writePageNew pending\n");
00770                         #endif
00771                         }
00772                     }
00773 
00774                     // write text (using buffering)
00775                     while (uptoByte <= aimByte && aimByte != 0xFFFFFFFF)
00776                     {
00777                         const DWord amountToRead = aimByte - uptoByte + 1 > 1023
00778                                                             ? 1023 : aimByte - uptoByte + 1;
00779 
00780                         if (paraIsText)
00781                         {
00782                             Byte buffer [1024];
00783 
00784                             if (!m_device->readInternal (buffer, amountToRead)) return false;
00785 
00786                             buffer [amountToRead] = '\0';
00787                             if (!m_generator->processText (buffer, uptoByte + amountToRead - 1 == paraEndByte)) return false;
00788                         }
00789 
00790                         uptoByte += amountToRead;
00791                     }       // while (uptoByte <= aimByte && aimByte != 0xFFFFFFFF) {
00792 
00793                     // generate pageNewWrite/pageTable signal, if requested
00794                     if (pageTableAck)
00795                     {
00796                     #ifdef DEBUG_INTERNALPARSER
00797                         m_device->debug ("@@@ InternalParser: writePageNew\n");
00798                     #endif
00799 
00800                         if (!m_generator->writePageNew (pp->getPageNumber ())) return false;
00801 
00802                         pp = m_pageTable->next ();
00803                         if (m_device->bad ()) return false;
00804                     }
00805                 }       // while (uptoByte <= aimUltimateByte) {
00806 
00807 
00808                 #ifdef DEBUG_INTERNALPARSER
00809                     m_device->debug ("@@@ InternalParser: character end write\n");
00810                 #endif
00811 
00812                 // end char info
00813                 if (charProp)
00814                     if (!m_generator->writeCharInfoEnd (charProp, uptoByte == paraAfterEndByte))
00815                         return false;
00816 
00817             }       // while (uptoByte < paraAfterEndByte) {
00818 
00819 
00820             //
00821             // ouptut object
00822             //
00823 
00824             if (!paraIsText)
00825             {
00826                 if (objectType == ObjectType::OLE)
00827                 {
00828                     if (!m_generator->writeBinary (m_ole->getExternalObject (), m_ole->getExternalObjectSize ()))
00829                         return false;
00830                 }
00831                 else    // if (objectType == ObjectType::BMP || objectType == ObjectType::WMF)
00832                 {
00833                     if (!m_generator->writeBinary (m_image->getExternalImage (), m_image->getExternalImageSize ()))
00834                         return false;
00835                 }
00836             }
00837 
00838             //
00839             // end paragraph
00840             //
00841 
00842             if (paraIsText)
00843             {
00844                 // end paragraph
00845                 if (!m_generator->writeParaInfoEnd (paraProp, NULL)) return false;
00846             }
00847             else
00848             {
00849                 switch (objectType)
00850                 {
00851                 case ObjectType::OLE:
00852                     if (!m_generator->writeParaInfoEnd (paraProp, m_ole, NULL)) return false;
00853                     delete m_ole;
00854                     m_ole = NULL;
00855                     break;
00856                 case ObjectType::BMP:
00857                     if (!m_generator->writeParaInfoEnd (paraProp, NULL, m_image)) return false;
00858                     delete m_image;
00859                     m_image = NULL;
00860                     break;
00861                 case ObjectType::WMF:
00862                     if (!m_generator->writeParaInfoEnd (paraProp, NULL, m_image)) return false;
00863                     delete m_image;
00864                     m_image = NULL;
00865                     break;
00866                 }
00867             }
00868 
00869             paraStartByte = paraAfterEndByte;
00870 
00871             // numCharBytes != 0 because we checked it before we entered the loop
00872             m_generator->sigProgress (paraStartByte * 100 / numCharBytes);
00873 
00874             // get next paragraph properties
00875             paraProp = (FormatParaProperty *) m_paragraphInfo->next ();
00876             if (m_device->bad ()) return false;
00877         }
00878 
00879         // end last thing we were in
00880         switch (inWhat)
00881         {
00882         case InFooter:  if (!m_generator->writeFooterEnd ()) return false; else break;
00883         case InHeader:  if (!m_generator->writeHeaderEnd ()) return false; else break;
00884         case InBody:    if (!m_generator->writeBodyEnd ()) return false; else break;
00885         default:    break;  // keep compiler happy
00886         }
00887 
00888         // didn't output a body (usually due to a blank document)
00889         if (!wroteBody)
00890         {
00891         #ifdef DEBUG_INTERNALPARSER
00892             m_device->debug ("@@@ InternalParser: did not write body, writing one now\n");
00893         #endif
00894         
00895             if (!m_generator->writeBodyBegin ()) return false;
00896             if (!m_generator->writeBodyEnd ()) return false;
00897         }
00898 
00899         m_generator->sigProgress (100);
00900         if (!m_generator->writeDocumentEnd (m_header->getMagic (), m_pageLayout)) return false;
00901 
00902         return true;
00903     }
00904 
00905 
00906     InternalGenerator::InternalGenerator () : m_header (NULL),
00907                                                             m_sectionTable (NULL),
00908                                                             m_pageLayout (NULL),
00909                                                             m_pageTable (NULL),
00910                                                             m_fontTable (NULL),
00911                                                             m_paragraphInfo (NULL),
00912                                                             m_characterInfo (NULL),
00913                                                             m_image (NULL),
00914                                                             m_ole (NULL)
00915     {
00916     }
00917 
00918     InternalGenerator::~InternalGenerator ()
00919     {
00920         delete m_ole;
00921         delete m_image;
00922 
00923         delete m_characterInfo;
00924         delete m_paragraphInfo;
00925         delete m_fontTable;
00926         delete m_pageTable;
00927         delete m_pageLayout;
00928         delete m_sectionTable;
00929         delete m_header;
00930     }
00931 
00932     bool InternalGenerator::seekNextPage (void)
00933     {
00934         return m_device->seekInternal ((m_device->tellInternal () + 127) / 128 * 128, SEEK_SET);
00935     }
00936 
00937     bool InternalGenerator::writeDocumentBegin (const Word, const PageLayout *pageLayout)
00938     {
00939         if (!m_device)
00940         {
00941             fprintf (stderr, "INTERNAL ERROR: InternalGenerator::writeDocumentBegin() called without a device\n");
00942             return false;   // cannot use ErrorAndQuit() because that calls m_device->error()
00943         }
00944 
00945         m_header = new Header;
00946         if (!m_header)
00947             ErrorAndQuit (Error::OutOfMemory, "could not allocate memory for header\n");
00948         m_header->setDevice (m_device);
00949 
00950         m_pageLayout = new PageLayout;
00951         if (!m_pageLayout)
00952             ErrorAndQuit (Error::OutOfMemory, "could not allocate memory for pageLayout\n");
00953         m_pageLayout->setDevice (m_device);
00954 
00955         m_sectionTable = new SectionTable;
00956         if (!m_sectionTable)
00957             ErrorAndQuit (Error::OutOfMemory, "could not allocate memory for sectionTable\n");
00958         m_sectionTable->setDevice (m_device);
00959 
00960         m_pageTable = new PageTable;
00961         if (!m_pageTable)
00962             ErrorAndQuit (Error::OutOfMemory, "could not allocate memory for pageTable\n");
00963         m_pageTable->setDevice (m_device);
00964 
00965         m_fontTable = new FontTable;
00966         if (!m_fontTable)
00967             ErrorAndQuit (Error::OutOfMemory, "could not allocate memory for fontTable\n");
00968         m_fontTable->setDevice (m_device);
00969 
00970         m_paragraphInfo = new FormatInfo;
00971         if (!m_paragraphInfo)
00972             ErrorAndQuit (Error::OutOfMemory, "could not allocate memory for paragraphInfo\n");
00973         m_paragraphInfo->setType (ParaType);
00974         m_paragraphInfo->setDevice (m_device);
00975 
00976         m_characterInfo = new FormatInfo;
00977         if (!m_characterInfo)
00978             ErrorAndQuit (Error::OutOfMemory, "could not allocate memory for characterInfo\n");
00979         m_characterInfo->setType (CharType);
00980         m_characterInfo->setDevice (m_device);
00981         
00982         *m_pageLayout = *pageLayout;
00983         
00984         // get ready to output text
00985         if (!m_device->seekInternal (1 * 128, SEEK_SET)) return false;
00986 
00987         return true;
00988     }
00989 
00990     bool InternalGenerator::writeDocumentEnd (const Word format, const PageLayout *)
00991     {
00992     #ifdef DEBUG_INTERNALGENERATOR
00993         m_device->debug ("!!!! InternalGenerator: writeDocumentEnd\n");
00994     #endif
00995 
00996         m_header->setNumCharBytes (m_device->tellInternal () - 128);
00997 
00998     #ifdef DEBUG_INTERNALGENERATOR
00999         m_device->debug ("before charinfo seek next page: ", m_device->tellInternal ());
01000     #endif
01001         if (!seekNextPage ()) return false; // from text pages
01002     #ifdef DEBUG_INTERNALGENERATOR
01003         m_device->debug ("!!!! InternalGenerator: Write characterInfo; page=", m_device->tellInternal () / 128);
01004     #endif
01005         FormatCharProperty defaultCharProperty;
01006         defaultCharProperty.setFontTable (m_fontTable); 
01007             Font defaultFont ((const Byte *) "Arial");  // better than "unknown", I guess
01008         defaultCharProperty.setFont (&defaultFont);
01009             
01010         // write characterInfo pages
01011         m_characterInfo->setDevice (m_device);
01012         m_characterInfo->setHeader (m_header);
01013         m_characterInfo->setFontTable (m_fontTable);
01014         if (!m_characterInfo->writeToDevice (&defaultCharProperty)) return false;
01015 
01016     #ifdef DEBUG_INTERNALGENERATOR
01017         m_device->debug ("before parainfo seek next page: ", m_device->tellInternal ());
01018     #endif
01019         if (!seekNextPage ()) return false;
01020     #ifdef DEBUG_INTERNALGENERATOR
01021         m_device->debug ("!!!! InternalGenerator: Write paragraphInfo; page=", m_device->tellInternal () / 128);
01022     #endif
01023         FormatParaProperty defaultParaProperty; 
01024         defaultParaProperty.setMargins (m_pageLayout->getLeftMargin (), m_pageLayout->getRightMargin ());
01025     
01026         // write paragraphInfo pages
01027         m_paragraphInfo->setDevice (m_device);
01028         m_paragraphInfo->setHeader (m_header);
01029         m_paragraphInfo->setMargins (m_pageLayout->getLeftMargin (), m_pageLayout->getRightMargin ());
01030         if (!m_paragraphInfo->writeToDevice (&defaultParaProperty)) return false;
01031 
01032     #ifdef DEBUG_INTERNALGENERATOR
01033         m_device->debug ("!!!! InternalGenerator: Write footnoteTable\n");
01034     #endif
01035 
01036         // there's "no such thing (tm)" as a FootnoteTable
01037         m_header->setPageFootnoteTable ((m_device->tellInternal () + 127) / 128);
01038 
01039         if (!seekNextPage ()) return false;
01040     #ifdef DEBUG_INTERNALGENERATOR
01041         m_device->debug ("!!!! InternalGenerator: Write pageLayout; page=", m_device->tellInternal () / 128);
01042     #endif
01043         m_pageLayout->setDevice (m_device);
01044         m_pageLayout->setHeader (m_header);
01045         if (!m_pageLayout->writeToDevice ()) return false;
01046 
01047     #ifdef DEBUG_INTERNALGENERATOR
01048         m_device->debug ("!!!! InternalGenerator: Write sectionTable\n");
01049     #endif
01050         if (!seekNextPage ()) return false;
01051         m_sectionTable->setDevice (m_device);
01052         m_sectionTable->setHeader (m_header);
01053         if (!m_sectionTable->writeToDevice (m_pageLayout->getIsModified ())) return false;
01054 
01055     #ifdef DEBUG_INTERNALGENERATOR
01056         m_device->debug ("!!!! InternalGenerator: Write pageTable\n");
01057     #endif
01058         if (!seekNextPage ()) return false;
01059         m_pageTable->setDevice (m_device);
01060         m_pageTable->setHeader (m_header);
01061         m_pageTable->setPageNumberStart (m_pageLayout->getPageNumberStart ());
01062         if (!m_pageTable->writeToDevice ()) return false;
01063 
01064     #ifdef DEBUG_INTERNALGENERATOR
01065         m_device->debug ("!!!! InternalGenerator: Write fontTable\n");
01066     #endif
01067         if (!seekNextPage ()) return false;
01068         m_fontTable->setDevice (m_device);
01069         m_fontTable->setHeader (m_header);
01070         if (!m_fontTable->writeToDevice ()) return false;
01071 
01072     #ifdef DEBUG_INTERNALGENERATOR
01073         m_device->debug ("!!!! InternalGenerator: Write header\n");
01074     #endif
01075         // write header
01076         m_header->setFormat (format);
01077         m_header->setNumPages ((m_device->tellInternal () + 127) / 128);
01078         m_header->setDevice (m_device);
01079         if (!m_device->seekInternal (0 * 128, SEEK_SET)) return false;
01080         if (!m_header->writeToDevice ()) return false;
01081 
01082         // pad up to 128 (it seems that the ms programmers really did expect 128-byte pages)
01083         // BTW, if you look in a Write file, you can normally see some garbage after the
01084         // fontTable that appears to be part of an earlier part of the document
01085         // (or even some _other_ document!) suggesting that ms really did reuse their memory
01086         // and that we shouldn't be all that concerned with "unknown", "reserved" and "zero"
01087         // fields
01088         if (!m_device->seekInternal (m_header->getNumPages () * 128, SEEK_SET)) return false;
01089 
01090         return true;
01091     }
01092 
01093     bool InternalGenerator::writeParaInfoBegin (const FormatParaProperty * /*paraProperty*/,
01094                                                                 const OLE *ole,
01095                                                                 const Image *image)
01096     {
01097     #ifdef DEBUG_INTERNALGENERATOR
01098         m_device->debug ("!!!! InternalGenerator: writeParaInfoBegin\n");
01099     #endif
01100 
01101         if (ole)
01102         {
01103             m_ole = new OLE;
01104             if (!m_ole)
01105                 ErrorAndQuit (Error::OutOfMemory, "could not allocate memory for OLE\n");
01106             *m_ole = *ole;
01107 
01108             m_ole->setDevice (m_device);
01109             m_ole->setExternalObjectSize (ole->getExternalObjectSize ());
01110         }
01111 
01112         if (image)
01113         {
01114             m_image = new Image;
01115             if (!m_image)
01116                 ErrorAndQuit (Error::OutOfMemory, "could not allocate memory for image\n");
01117             *m_image = *image;
01118 
01119             m_image->setDevice (m_device);
01120             m_image->setExternalImageSize (image->getExternalImageSize ());
01121         }
01122 
01123         return true;
01124     }
01125 
01126     bool InternalGenerator::writeParaInfoEnd (const FormatParaProperty *paraProperty,
01127                                                             const OLE */*ole*/,
01128                                                             const Image */*image*/)
01129     {
01130     #ifdef DEBUG_INTERNALGENERATOR
01131         m_device->debug ("!!!! InternalGenerator: writeParaInfoEnd\n");
01132     #endif
01133 
01134         if (m_ole)
01135         {
01136             if (!m_ole->writeToDevice ())
01137                 return false;
01138 
01139             delete m_ole;
01140             m_ole = NULL;
01141         }
01142         else if (m_image)
01143         {
01144             if (!m_image->writeToDevice ())
01145                 return false;
01146 
01147             delete m_image;
01148             m_image = NULL;
01149         }
01150 
01151         m_paragraphInfo->setMargins (m_pageLayout->getLeftMargin (), m_pageLayout->getRightMargin ());
01152         return m_paragraphInfo->add (paraProperty, false/* don't force */);
01153     }
01154 
01155     bool InternalGenerator::writeCharInfoBegin (const FormatCharProperty * /*charProperty*/)
01156     {
01157     #ifdef DEBUG_INTERNALGENERATOR
01158         m_device->debug ("!!!! InternalGenerator: writeCharInfoBegin\n");
01159     #endif
01160         
01161         return true;
01162     }
01163 
01164     bool InternalGenerator::writeCharInfoEnd (const FormatCharProperty *charProperty,
01165                                                             const bool)
01166     {
01167     #ifdef DEBUG_INTERNALGENERATOR
01168         m_device->debug ("!!!! InternalGenerator: writeCharInfoEnd\n");
01169     #endif
01170 
01171         m_characterInfo->setFontTable (m_fontTable);
01172         return m_characterInfo->add (charProperty, false/* don't force */);
01173     }
01174 
01175     bool InternalGenerator::writeBinary (const Byte *buffer, const DWord length)
01176     {
01177         if (m_ole)
01178         {
01179             if (!m_ole->setExternalObject (buffer, length))
01180                 return false;
01181         }
01182         else if (m_image)
01183         {
01184             if (!m_image->setExternalImage (buffer, length))
01185                 return false;
01186         }
01187         else
01188             ErrorAndQuit (Error::InternalError, "attempt to write unknown type of binary data\n");
01189 
01190         return true;
01191     }
01192 
01193     bool InternalGenerator::writeText (const Byte *string)
01194     {
01195     #ifdef DEBUG_INTERNALGENERATOR
01196 //      m_device->debug ("!!!! InternalGenerator: writeText> ", (const char *) string);
01197     #endif
01198 
01199         DWord length = DWord (strlen ((const char *) string));
01200 
01201         if (!m_device->writeInternal (string, length)) return false;
01202 
01203         return true;
01204     }
01205 
01206     bool InternalGenerator::writePageNew (const int pageNumberClaimed)
01207     {
01208     #ifdef DEBUG_INTERNALGENERATOR
01209         m_device->debug ("!!!! InternalGenerator: writePageNew() with pageNo ",
01210                                 pageNumberClaimed);
01211     #endif
01212 
01213         // is this a forced new page (as in, the signal was only generated because I put it in the spec?)
01214         if (pageNumberClaimed == 0)
01215             return true;    // no real pageTable
01216 
01217         PagePointer pp;
01218         pp.setPageNumber (pageNumberClaimed);
01219         pp.setFirstCharByte (m_device->tellInternal () - 128);
01220 
01221         return m_pageTable->add (&pp);
01222     }
01223 
01224 }   // namespace MSWrite    {
01225 
01226 // end of libmswrite.cpp
KDE Home | KDE Accessibility Home | Description of Access Keys