filters

pole.cpp

00001 /* POLE - Portable C++ library to access OLE Storage
00002    Copyright (C) 2002-2005 Ariya Hidayat <ariya@kde.org>
00003 
00004    Redistribution and use in source and binary forms, with or without
00005    modification, are permitted provided that the following conditions
00006    are met:
00007    * Redistributions of source code must retain the above copyright notice,
00008      this list of conditions and the following disclaimer.
00009    * Redistributions in binary form must reproduce the above copyright notice,
00010      this list of conditions and the following disclaimer in the documentation
00011      and/or other materials provided with the distribution.
00012    * Neither the name of the authors nor the names of its contributors may be
00013      used to endorse or promote products derived from this software without
00014      specific prior written permission.
00015 
00016    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
00017    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
00018    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
00019    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
00020    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
00021    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
00022    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
00023    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
00024    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
00025    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
00026    THE POSSIBILITY OF SUCH DAMAGE.
00027 */
00028 
00029 #include <fstream>
00030 #include <iostream>
00031 #include <list>
00032 #include <string>
00033 #include <vector>
00034 #include <string.h>
00035 
00036 #include "pole.h"
00037 
00038 // enable to activate debugging output
00039 // #define POLE_DEBUG
00040 
00041 namespace POLE
00042 {
00043 
00044 class Header
00045 {
00046   public:
00047     unsigned char id[8];       // signature, or magic identifier
00048     unsigned b_shift;          // bbat->blockSize = 1 << b_shift
00049     unsigned s_shift;          // sbat->blockSize = 1 << s_shift
00050     unsigned num_bat;          // blocks allocated for big bat
00051     unsigned dirent_start;     // starting block for directory info
00052     unsigned threshold;        // switch from small to big file (usually 4K)
00053     unsigned sbat_start;       // starting block index to store small bat
00054     unsigned num_sbat;         // blocks allocated for small bat
00055     unsigned mbat_start;       // starting block to store meta bat
00056     unsigned num_mbat;         // blocks allocated for meta bat
00057     unsigned long bb_blocks[109];
00058 
00059     Header();
00060     bool valid();
00061     void load( const unsigned char* buffer );
00062     void save( unsigned char* buffer );
00063     void debug();
00064 };
00065 
00066 class AllocTable
00067 {
00068   public:
00069     static const unsigned Eof;
00070     static const unsigned Avail;
00071     static const unsigned Bat;
00072     static const unsigned MetaBat;
00073     unsigned blockSize;
00074     AllocTable();
00075     void clear();
00076     unsigned long count();
00077     void resize( unsigned long newsize );
00078     void preserve( unsigned long n );
00079     void set( unsigned long index, unsigned long val );
00080     unsigned unused();
00081     void setChain( std::vector<unsigned long> );
00082     std::vector<unsigned long> follow( unsigned long start );
00083     unsigned long operator[](unsigned long index );
00084     void load( const unsigned char* buffer, unsigned len );
00085     void save( unsigned char* buffer );
00086     unsigned size();
00087     void debug();
00088   private:
00089     std::vector<unsigned long> data;
00090     AllocTable( const AllocTable& );
00091     AllocTable& operator=( const AllocTable& );
00092 };
00093 
00094 class DirEntry
00095 {
00096   public:
00097     bool valid;            // false if invalid (should be skipped)
00098     std::string name;      // the name, not in unicode anymore
00099     bool dir;              // true if directory
00100     unsigned long size;    // size (not valid if directory)
00101     unsigned long start;   // starting block
00102     unsigned prev;         // previous sibling
00103     unsigned next;         // next sibling
00104     unsigned child;        // first child
00105 };
00106 
00107 class DirTree
00108 {
00109   public:
00110     static const unsigned End;
00111     DirTree();
00112     void clear();
00113     unsigned entryCount();
00114     DirEntry* entry( unsigned index );
00115     DirEntry* entry( const std::string& name, bool create=false );
00116     int indexOf( DirEntry* e );
00117     int parent( unsigned index );
00118     std::string fullName( unsigned index );
00119     std::vector<unsigned> children( unsigned index );
00120     void load( unsigned char* buffer, unsigned len );
00121     void save( unsigned char* buffer );
00122     unsigned size();
00123     void debug();
00124   private:
00125     std::vector<DirEntry> entries;
00126     DirTree( const DirTree& );
00127     DirTree& operator=( const DirTree& );
00128 };
00129 
00130 class StorageIO
00131 {
00132   public:
00133     Storage* storage;         // owner
00134     std::string filename;     // filename
00135     std::fstream file;        // associated with above name
00136     int result;               // result of operation
00137     bool opened;              // true if file is opened
00138     unsigned long filesize;   // size of the file
00139 
00140     Header* header;           // storage header
00141     DirTree* dirtree;         // directory tree
00142     AllocTable* bbat;         // allocation table for big blocks
00143     AllocTable* sbat;         // allocation table for small blocks
00144 
00145     unsigned long lastBlockIndex;         // last read, for simple caching
00146     unsigned char* lastBlockData;
00147 
00148     std::vector<unsigned long> sb_blocks; // blocks for "small" files
00149 
00150     std::list<Stream*> streams;
00151 
00152     StorageIO( Storage* storage, const char* filename );
00153     ~StorageIO();
00154 
00155     bool open();
00156     void close();
00157     void flush();
00158     void load();
00159     void create();
00160 
00161     unsigned long loadBigBlocks( std::vector<unsigned long> blocks, unsigned char* buffer, unsigned long maxlen );
00162 
00163     unsigned long loadBigBlock( unsigned long block, unsigned char* buffer, unsigned long maxlen );
00164 
00165     unsigned long loadSmallBlocks( std::vector<unsigned long> blocks, unsigned char* buffer, unsigned long maxlen );
00166 
00167     unsigned long loadSmallBlock( unsigned long block, unsigned char* buffer, unsigned long maxlen );
00168 
00169     StreamIO* streamIO( const std::string& name );
00170 
00171   private:
00172     // no copy or assign
00173     StorageIO( const StorageIO& );
00174     StorageIO& operator=( const StorageIO& );
00175 
00176 };
00177 
00178 class StreamIO
00179 {
00180   public:
00181     StorageIO* io;
00182     DirEntry* entry;
00183     std::string fullName;
00184     bool eof;
00185     bool fail;
00186 
00187     StreamIO( StorageIO* io, DirEntry* entry );
00188     ~StreamIO();
00189     unsigned long size();
00190     void seek( unsigned long pos );
00191     unsigned long tell();
00192     int getch();
00193     unsigned long read( unsigned char* data, unsigned long maxlen );
00194     unsigned long read( unsigned long pos, unsigned char* data, unsigned long maxlen );
00195 
00196 
00197   private:
00198     std::vector<unsigned long> blocks;
00199 
00200     // no copy or assign
00201     StreamIO( const StreamIO& );
00202     StreamIO& operator=( const StreamIO& );
00203 
00204     // pointer for read
00205     unsigned long m_pos;
00206 
00207     // simple cache system to speed-up getch()
00208     unsigned char* cache_data;
00209     unsigned long cache_size;
00210     unsigned long cache_pos;
00211     void updateCache();
00212 };
00213 
00214 } // namespace POLE
00215 
00216 using namespace POLE;
00217 
00218 static inline unsigned long readU16( const unsigned char* ptr )
00219 {
00220   return ptr[0]+(ptr[1]<<8);
00221 }
00222 
00223 static inline unsigned long readU32( const unsigned char* ptr )
00224 {
00225   return ptr[0]+(ptr[1]<<8)+(ptr[2]<<16)+(ptr[3]<<24);
00226 }
00227 
00228 static inline void writeU16( unsigned char* ptr, unsigned long data )
00229 {
00230   ptr[0] = (unsigned char)(data & 0xff);
00231   ptr[1] = (unsigned char)((data >> 8) & 0xff);
00232 }
00233 
00234 static inline void writeU32( unsigned char* ptr, unsigned long data )
00235 {
00236   ptr[0] = (unsigned char)(data & 0xff);
00237   ptr[1] = (unsigned char)((data >> 8) & 0xff);
00238   ptr[2] = (unsigned char)((data >> 16) & 0xff);
00239   ptr[3] = (unsigned char)((data >> 24) & 0xff);
00240 }
00241 
00242 static const unsigned char pole_magic[] =
00243  { 0xd0, 0xcf, 0x11, 0xe0, 0xa1, 0xb1, 0x1a, 0xe1 };
00244 
00245 // =========== Header ==========
00246 
00247 Header::Header()
00248 {
00249   b_shift = 9;
00250   s_shift = 6;
00251   num_bat = 0;
00252   dirent_start = 0;
00253   threshold = 4096;
00254   sbat_start = 0;
00255   num_sbat = 0;
00256   mbat_start = 0;
00257   num_mbat = 0;
00258 
00259   for( unsigned i = 0; i < 8; i++ )
00260     id[i] = pole_magic[i];
00261   for( unsigned i=0; i<109; i++ )
00262     bb_blocks[i] = AllocTable::Avail;
00263 }
00264 
00265 bool Header::valid()
00266 {
00267   if( threshold != 4096 ) return false;
00268   if( num_bat == 0 ) return false;
00269   if( (num_bat > 109) && (num_bat > (num_mbat * 127) + 109)) return false;
00270   if( (num_bat < 109) && (num_mbat != 0) ) return false;
00271   if( s_shift > b_shift ) return false;
00272   if( b_shift <= 6 ) return false;
00273   if( b_shift >=31 ) return false;
00274 
00275   return true;
00276 }
00277 
00278 void Header::load( const unsigned char* buffer )
00279 {
00280   b_shift      = readU16( buffer + 0x1e );
00281   s_shift      = readU16( buffer + 0x20 );
00282   num_bat      = readU32( buffer + 0x2c );
00283   dirent_start = readU32( buffer + 0x30 );
00284   threshold    = readU32( buffer + 0x38 );
00285   sbat_start   = readU32( buffer + 0x3c );
00286   num_sbat     = readU32( buffer + 0x40 );
00287   mbat_start   = readU32( buffer + 0x44 );
00288   num_mbat     = readU32( buffer + 0x48 );
00289 
00290   for( unsigned i = 0; i < 8; i++ )
00291     id[i] = buffer[i];
00292   for( unsigned i=0; i<109; i++ )
00293     bb_blocks[i] = readU32( buffer + 0x4C+i*4 );
00294 }
00295 
00296 void Header::save( unsigned char* buffer )
00297 {
00298   memset( buffer, 0, 0x4c );
00299   memcpy( buffer, pole_magic, 8 );        // ole signature
00300   writeU32( buffer + 8, 0 );              // unknown
00301   writeU32( buffer + 12, 0 );             // unknown
00302   writeU32( buffer + 16, 0 );             // unknown
00303   writeU16( buffer + 24, 0x003e );        // revision ?
00304   writeU16( buffer + 26, 3 );             // version ?
00305   writeU16( buffer + 28, 0xfffe );        // unknown
00306   writeU16( buffer + 0x1e, b_shift );
00307   writeU16( buffer + 0x20, s_shift );
00308   writeU32( buffer + 0x2c, num_bat );
00309   writeU32( buffer + 0x30, dirent_start );
00310   writeU32( buffer + 0x38, threshold );
00311   writeU32( buffer + 0x3c, sbat_start );
00312   writeU32( buffer + 0x40, num_sbat );
00313   writeU32( buffer + 0x44, mbat_start );
00314   writeU32( buffer + 0x48, num_mbat );
00315 
00316   for( unsigned i=0; i<109; i++ )
00317     writeU32( buffer + 0x4C+i*4, bb_blocks[i] );
00318 }
00319 
00320 void Header::debug()
00321 {
00322   std::cout << std::endl;
00323   std::cout << "b_shift " << b_shift << std::endl;
00324   std::cout << "s_shift " << s_shift << std::endl;
00325   std::cout << "num_bat " << num_bat << std::endl;
00326   std::cout << "dirent_start " << dirent_start << std::endl;
00327   std::cout << "threshold " << threshold << std::endl;
00328   std::cout << "sbat_start " << sbat_start << std::endl;
00329   std::cout << "num_sbat " << num_sbat << std::endl;
00330   std::cout << "mbat_start " << mbat_start << std::endl;
00331   std::cout << "num_mbat " << num_mbat << std::endl;
00332 
00333   unsigned s = (num_bat<=109) ? num_bat : 109;
00334   std::cout << "bat blocks: ";
00335   for( unsigned i = 0; i < s; i++ )
00336     std::cout << bb_blocks[i] << " ";
00337   std::cout << std::endl;
00338 }
00339 
00340 // =========== AllocTable ==========
00341 
00342 const unsigned AllocTable::Avail = 0xffffffff;
00343 const unsigned AllocTable::Eof = 0xfffffffe;
00344 const unsigned AllocTable::Bat = 0xfffffffd;
00345 const unsigned AllocTable::MetaBat = 0xfffffffc;
00346 
00347 AllocTable::AllocTable()
00348 {
00349   blockSize = 4096;
00350   // initial size
00351   resize( 128 );
00352 }
00353 
00354 unsigned long AllocTable::count()
00355 {
00356   return data.size();
00357 }
00358 
00359 void AllocTable::resize( unsigned long newsize )
00360 {
00361   unsigned oldsize = data.size();
00362   data.resize( newsize );
00363   if( newsize > oldsize )
00364     for( unsigned i = oldsize; i<newsize; i++ )
00365       data[i] = Avail;
00366 }
00367 
00368 // make sure there're still free blocks
00369 void AllocTable::preserve( unsigned long n )
00370 {
00371   std::vector<unsigned long> pre;
00372   for( unsigned i=0; i < n; i++ )
00373     pre.push_back( unused() );
00374 }
00375 
00376 unsigned long AllocTable::operator[]( unsigned long index )
00377 {
00378   unsigned long result;
00379   result = data[index];
00380   return result;
00381 }
00382 
00383 void AllocTable::set( unsigned long index, unsigned long value )
00384 {
00385   if( index >= count() ) resize( index + 1);
00386   data[ index ] = value;
00387 }
00388 
00389 void AllocTable::setChain( std::vector<unsigned long> chain )
00390 {
00391   if( chain.size() )
00392   {
00393     for( unsigned i=0; i<chain.size()-1; i++ )
00394       set( chain[i], chain[i+1] );
00395     set( chain[ chain.size()-1 ], AllocTable::Eof );
00396   }
00397 }
00398 
00399 // follow
00400 std::vector<unsigned long> AllocTable::follow( unsigned long start )
00401 {
00402   std::vector<unsigned long> chain;
00403 
00404   if( start >= count() ) return chain;
00405 
00406   unsigned long p = start;
00407   while( p < count() )
00408   {
00409     if( p == (unsigned long)Eof ) break;
00410     if( p == (unsigned long)Bat ) break;
00411     if( p == (unsigned long)MetaBat ) break;
00412     if( p >= count() ) break;
00413     chain.push_back( p );
00414     if( data[p] >= count() ) break;
00415     p = data[ p ];
00416   }
00417 
00418   return chain;
00419 }
00420 
00421 unsigned AllocTable::unused()
00422 {
00423   // find first available block
00424   for( unsigned i = 0; i < data.size(); i++ )
00425     if( data[i] == Avail )
00426       return i;
00427 
00428   // completely full, so enlarge the table
00429   unsigned block = data.size();
00430   resize( data.size()+10 );
00431   return block;
00432 }
00433 
00434 void AllocTable::load( const unsigned char* buffer, unsigned len )
00435 {
00436   resize( len / 4 );
00437   for( unsigned i = 0; i < count(); i++ )
00438     set( i, readU32( buffer + i*4 ) );
00439 }
00440 
00441 // return space required to save this dirtree
00442 unsigned AllocTable::size()
00443 {
00444   return count() * 4;
00445 }
00446 
00447 void AllocTable::save( unsigned char* buffer )
00448 {
00449   for( unsigned i = 0; i < count(); i++ )
00450     writeU32( buffer + i*4, data[i] );
00451 }
00452 
00453 void AllocTable::debug()
00454 {
00455   std::cout << "block size " << data.size() << std::endl;
00456   for( unsigned i=0; i< data.size(); i++ )
00457   {
00458      if( data[i] == Avail ) continue;
00459      std::cout << i << ": ";
00460      if( data[i] == Eof ) std::cout << "[eof]";
00461      else if( data[i] == Bat ) std::cout << "[bat]";
00462      else if( data[i] == MetaBat ) std::cout << "[metabat]";
00463      else std::cout << data[i];
00464      std::cout << std::endl;
00465   }
00466 }
00467 
00468 // =========== DirTree ==========
00469 
00470 const unsigned DirTree::End = 0xffffffff;
00471 
00472 DirTree::DirTree()
00473 {
00474   clear();
00475 }
00476 
00477 void DirTree::clear()
00478 {
00479   // leave only root entry
00480   entries.resize( 1 );
00481   entries[0].valid = true;
00482   entries[0].name = "Root Entry";
00483   entries[0].dir = true;
00484   entries[0].size = 0;
00485   entries[0].start = End;
00486   entries[0].prev = End;
00487   entries[0].next = End;
00488   entries[0].child = End;
00489 }
00490 
00491 unsigned DirTree::entryCount()
00492 {
00493   return entries.size();
00494 }
00495 
00496 DirEntry* DirTree::entry( unsigned index )
00497 {
00498   if( index >= entryCount() ) return (DirEntry*) 0;
00499   return &entries[ index ];
00500 }
00501 
00502 int DirTree::indexOf( DirEntry* e )
00503 {
00504   for( unsigned i = 0; i < entryCount(); i++ )
00505     if( entry( i ) == e ) return i;
00506 
00507   return -1;
00508 }
00509 
00510 int DirTree::parent( unsigned index )
00511 {
00512   // brute-force, basically we iterate for each entries, find its children
00513   // and check if one of the children is 'index'
00514   for( unsigned j=0; j<entryCount(); j++ )
00515   {
00516     std::vector<unsigned> chi = children( j );
00517     for( unsigned i=0; i<chi.size();i++ )
00518       if( chi[i] == index )
00519         return j;
00520   }
00521 
00522   return -1;
00523 }
00524 
00525 std::string DirTree::fullName( unsigned index )
00526 {
00527   // don't use root name ("Root Entry"), just give "/"
00528   if( index == 0 ) return "/";
00529 
00530   std::string result = entry( index )->name;
00531   result.insert( 0,  "/" );
00532   int p = parent( index );
00533   DirEntry * _entry = 0;
00534   while( p > 0 )
00535   {
00536     _entry = entry( p );
00537     if (_entry->dir && _entry->valid)
00538     {
00539       result.insert( 0,  _entry->name);
00540       result.insert( 0,  "/" );
00541     }
00542     --p;
00543     index = p;
00544     if( index <= 0 ) break;
00545   }
00546   return result;
00547 }
00548 
00549 // given a fullname (e.g "/ObjectPool/_1020961869"), find the entry
00550 // if not found and create is false, return 0
00551 // if create is true, a new entry is returned
00552 DirEntry* DirTree::entry( const std::string& name, bool create )
00553 {
00554    if( !name.length() ) return (DirEntry*)0;
00555 
00556    // quick check for "/" (that's root)
00557    if( name == "/" ) return entry( 0 );
00558 
00559    // split the names, e.g  "/ObjectPool/_1020961869" will become:
00560    // "ObjectPool" and "_1020961869"
00561    std::list<std::string> names;
00562    std::string::size_type start = 0, end = 0;
00563    if( name[0] == '/' ) start++;
00564    while( start < name.length() )
00565    {
00566      end = name.find_first_of( '/', start );
00567      if( end == std::string::npos ) end = name.length();
00568      names.push_back( name.substr( start, end-start ) );
00569      start = end+1;
00570    }
00571 
00572    // start from root
00573    int index = 0 ;
00574 
00575    // trace one by one
00576    std::list<std::string>::iterator it;
00577 
00578    for( it = names.begin(); it != names.end(); ++it )
00579    {
00580      // find among the children of index
00581      std::vector<unsigned> chi = children( index );
00582      unsigned child = 0;
00583      for( unsigned i = 0; i < chi.size(); i++ )
00584      {
00585        DirEntry* ce = entry( chi[i] );
00586        if( ce )
00587        if( ce->valid && ( ce->name.length()>1 ) )
00588        if( ce->name == *it )
00589              child = chi[i];
00590      }
00591 
00592      // traverse to the child
00593      if( child > 0 ) index = child;
00594      else
00595      {
00596        // not found among children
00597        if( !create ) return (DirEntry*)0;
00598 
00599        // create a new entry
00600        unsigned parent = index;
00601        entries.push_back( DirEntry() );
00602        index = entryCount()-1;
00603        DirEntry* e = entry( index );
00604        e->valid = true;
00605        e->name = *it;
00606        e->dir = false;
00607        e->size = 0;
00608        e->start = 0;
00609        e->child = End;
00610        e->prev = End;
00611        e->next = entry(parent)->child;
00612        entry(parent)->child = index;
00613      }
00614    }
00615 
00616    return entry( index );
00617 }
00618 
00619 // helper function: recursively find siblings of index
00620 void dirtree_find_siblings( DirTree* dirtree, std::vector<unsigned>& result,
00621   unsigned index )
00622 {
00623   DirEntry* e = dirtree->entry( index );
00624   if( !e ) return;
00625   if( !e->valid ) return;
00626 
00627   // prevent infinite loop
00628   for( unsigned i = 0; i < result.size(); i++ )
00629     if( result[i] == index ) return;
00630 
00631   // add myself
00632   result.push_back( index );
00633 
00634   // visit previous sibling, don't go infinitely
00635   unsigned prev = e->prev;
00636   if( ( prev > 0 ) && ( prev < dirtree->entryCount() ) )
00637   {
00638     for( unsigned i = 0; i < result.size(); i++ )
00639       if( result[i] == prev ) prev = 0;
00640     if( prev ) dirtree_find_siblings( dirtree, result, prev );
00641   }
00642 
00643   // visit next sibling, don't go infinitely
00644   unsigned next = e->next;
00645   if( ( next > 0 ) && ( next < dirtree->entryCount() ) )
00646   {
00647     for( unsigned i = 0; i < result.size(); i++ )
00648       if( result[i] == next ) next = 0;
00649     if( next ) dirtree_find_siblings( dirtree, result, next );
00650   }
00651 }
00652 
00653 std::vector<unsigned> DirTree::children( unsigned index )
00654 {
00655   std::vector<unsigned> result;
00656 
00657   DirEntry* e = entry( index );
00658   if( e ) if( e->valid && e->child < entryCount() )
00659     dirtree_find_siblings( this, result, e->child );
00660 
00661   return result;
00662 }
00663 
00664 void DirTree::load( unsigned char* buffer, unsigned size )
00665 {
00666   entries.clear();
00667 
00668   for( unsigned i = 0; i < size/128; i++ )
00669   {
00670     unsigned p = i * 128;
00671 
00672     // would be < 32 if first char in the name isn't printable
00673     unsigned prefix = 32;
00674 
00675     // parse name of this entry, which stored as Unicode 16-bit
00676     std::string name;
00677     int name_len = readU16( buffer + 0x40+p );
00678     if( name_len > 64 ) name_len = 64;
00679     for( int j=0; ( buffer[j+p]) && (j<name_len); j+= 2 )
00680       name.append( 1, buffer[j+p] );
00681 
00682     // first char isn't printable ? remove it...
00683     if( buffer[p] < 32 )
00684     {
00685       prefix = buffer[0];
00686       name.erase( 0,1 );
00687     }
00688 
00689     // 2 = file (aka stream), 1 = directory (aka storage), 5 = root
00690     unsigned type = buffer[ 0x42 + p];
00691 
00692     DirEntry e;
00693     e.valid = true;
00694     e.name = name;
00695     e.start = readU32( buffer + 0x74+p );
00696     e.size = readU32( buffer + 0x78+p );
00697     e.prev = readU32( buffer + 0x44+p );
00698     e.next = readU32( buffer + 0x48+p );
00699     e.child = readU32( buffer + 0x4C+p );
00700     e.dir = ( type!=2 );
00701 
00702     // sanity checks
00703     if( (type != 2) && (type != 1 ) && (type != 5 ) ) e.valid = false;
00704     if( name_len < 1 ) e.valid = false;
00705 
00706     entries.push_back( e );
00707   }
00708 }
00709 
00710 // return space required to save this dirtree
00711 unsigned DirTree::size()
00712 {
00713   return entryCount() * 128;
00714 }
00715 
00716 void DirTree::save( unsigned char* buffer )
00717 {
00718   memset( buffer, 0, size() );
00719 
00720   // root is fixed as "Root Entry"
00721   DirEntry* root = entry( 0 );
00722   std::string name = "Root Entry";
00723   for( unsigned j = 0; j < name.length(); j++ )
00724     buffer[ j*2 ] = name[j];
00725   writeU16( buffer + 0x40, name.length()*2 + 2 );
00726   writeU32( buffer + 0x74, 0xffffffff );
00727   writeU32( buffer + 0x78, 0 );
00728   writeU32( buffer + 0x44, 0xffffffff );
00729   writeU32( buffer + 0x48, 0xffffffff );
00730   writeU32( buffer + 0x4c, root->child );
00731   buffer[ 0x42 ] = 5;
00732   buffer[ 0x43 ] = 1;
00733 
00734   for( unsigned i = 1; i < entryCount(); i++ )
00735   {
00736     DirEntry* e = entry( i );
00737     if( !e ) continue;
00738     if( e->dir )
00739     {
00740       e->start = 0xffffffff;
00741       e->size = 0;
00742     }
00743 
00744     // max length for name is 32 chars
00745     std::string name = e->name;
00746     if( name.length() > 32 )
00747       name.erase( 32, name.length() );
00748 
00749     // write name as Unicode 16-bit
00750     for( unsigned j = 0; j < name.length(); j++ )
00751       buffer[ i*128 + j*2 ] = name[j];
00752 
00753     writeU16( buffer + i*128 + 0x40, name.length()*2 + 2 );
00754     writeU32( buffer + i*128 + 0x74, e->start );
00755     writeU32( buffer + i*128 + 0x78, e->size );
00756     writeU32( buffer + i*128 + 0x44, e->prev );
00757     writeU32( buffer + i*128 + 0x48, e->next );
00758     writeU32( buffer + i*128 + 0x4c, e->child );
00759     buffer[ i*128 + 0x42 ] = e->dir ? 1 : 2;
00760     buffer[ i*128 + 0x43 ] = 1; // always black
00761   }
00762 }
00763 
00764 void DirTree::debug()
00765 {
00766   for( unsigned i = 0; i < entryCount(); i++ )
00767   {
00768     DirEntry* e = entry( i );
00769     if( !e ) continue;
00770     std::cout << i << ": ";
00771     if( !e->valid ) std::cout << "INVALID ";
00772     std::cout << e->name << " ";
00773     if( e->dir ) std::cout << "(Dir) ";
00774     else std::cout << "(File) ";
00775     std::cout << e->size << " ";
00776     std::cout << "s:" << e->start << " ";
00777     std::cout << "(";
00778     if( e->child == End ) std::cout << "-"; else std::cout << e->child;
00779     std::cout << " ";
00780     if( e->prev == End ) std::cout << "-"; else std::cout << e->prev;
00781     std::cout << ":";
00782     if( e->next == End ) std::cout << "-"; else std::cout << e->next;
00783     std::cout << ")";
00784     std::cout << std::endl;
00785   }
00786 }
00787 
00788 // =========== StorageIO ==========
00789 
00790 StorageIO::StorageIO( Storage* st, const char* fname )
00791 {
00792   storage = st;
00793   filename = fname;
00794   result = Storage::Ok;
00795   opened = false;
00796 
00797   header = new Header();
00798   dirtree = new DirTree();
00799   bbat = new AllocTable();
00800   sbat = new AllocTable();
00801 
00802   lastBlockIndex = 0;
00803   lastBlockData = 0;
00804 
00805   filesize = 0;
00806   bbat->blockSize = 1 << header->b_shift;
00807   sbat->blockSize = 1 << header->s_shift;
00808 }
00809 
00810 StorageIO::~StorageIO()
00811 {
00812   if( opened ) close();
00813 
00814   delete [] lastBlockData;
00815   delete sbat;
00816   delete bbat;
00817   delete dirtree;
00818   delete header;
00819 }
00820 
00821 bool StorageIO::open()
00822 {
00823   // already opened ? close first
00824   if( opened ) close();
00825 
00826   load();
00827 
00828   return result == Storage::Ok;
00829 }
00830 
00831 void StorageIO::load()
00832 {
00833   unsigned char* buffer = 0;
00834   unsigned long buflen = 0;
00835   std::vector<unsigned long> blocks;
00836 
00837   // open the file, check for error
00838   result = Storage::OpenFailed;
00839   file.open( filename.c_str(), std::ios::binary | std::ios::in );
00840   if( !file.good() ) return;
00841 
00842   // find size of input file
00843   file.seekg( 0, std::ios::end );
00844   filesize = file.tellg();
00845 
00846   // load header
00847   buffer = new unsigned char[512];
00848   file.seekg( 0 );
00849   file.read( (char*)buffer, 512 );
00850   header->load( buffer );
00851   delete[] buffer;
00852 
00853   // check OLE magic id
00854   result = Storage::NotOLE;
00855   for( unsigned i=0; i<8; i++ )
00856     if( header->id[i] != pole_magic[i] )
00857       return;
00858 
00859   // sanity checks
00860   result = Storage::BadOLE;
00861   if( !header->valid() ) return;
00862   if( header->threshold != 4096 ) return;
00863 
00864   // important block size
00865   bbat->blockSize = 1 << header->b_shift;
00866   sbat->blockSize = 1 << header->s_shift;
00867 
00868   // find blocks allocated to store big bat
00869   // the first 109 blocks are in header, the rest in meta bat
00870   blocks.clear();
00871   blocks.resize( header->num_bat );
00872   for( unsigned i = 0; i < 109; i++ )
00873     if( i >= header->num_bat ) break;
00874     else blocks[i] = header->bb_blocks[i];
00875   if( (header->num_bat > 109) && (header->num_mbat > 0) )
00876   {
00877     unsigned char* buffer2 = new unsigned char[ bbat->blockSize ];
00878     unsigned k = 109;
00879     unsigned mblock = header->mbat_start;
00880     for( unsigned r = 0; r < header->num_mbat; r++ )
00881     {
00882       loadBigBlock( mblock, buffer2, bbat->blockSize );
00883       for( unsigned s=0; s < bbat->blockSize-4; s+=4 )
00884       {
00885         if( k >= header->num_bat ) break;
00886         else  blocks[k++] = readU32( buffer2 + s );
00887       }
00888       mblock = readU32( buffer2 + bbat->blockSize-4 );
00889      }
00890     delete[] buffer2;
00891   }
00892 
00893   // load big bat
00894   buflen = blocks.size()*bbat->blockSize;
00895   if( buflen > 0 )
00896   {
00897     buffer = new unsigned char[ buflen ];
00898     loadBigBlocks( blocks, buffer, buflen );
00899     bbat->load( buffer, buflen );
00900     delete[] buffer;
00901   }
00902 
00903   // load small bat
00904   blocks.clear();
00905   blocks = bbat->follow( header->sbat_start );
00906   buflen = blocks.size()*bbat->blockSize;
00907   if( buflen > 0 )
00908   {
00909     buffer = new unsigned char[ buflen ];
00910     loadBigBlocks( blocks, buffer, buflen );
00911     sbat->load( buffer, buflen );
00912     delete[] buffer;
00913   }
00914 
00915   // load directory tree
00916   blocks.clear();
00917   blocks = bbat->follow( header->dirent_start );
00918   buflen = blocks.size()*bbat->blockSize;
00919   buffer = new unsigned char[ buflen ];
00920   loadBigBlocks( blocks, buffer, buflen );
00921   dirtree->load( buffer, buflen );
00922   unsigned sb_start = readU32( buffer + 0x74 );
00923   delete[] buffer;
00924 
00925   // fetch block chain as data for small-files
00926   sb_blocks = bbat->follow( sb_start ); // small files
00927 
00928   // for troubleshooting, just enable this block
00929 #if 0
00930   header->debug();
00931   sbat->debug();
00932   bbat->debug();
00933   dirtree->debug();
00934 #endif
00935 
00936   // so far so good
00937   result = Storage::Ok;
00938   opened = true;
00939 }
00940 
00941 void StorageIO::create()
00942 {
00943   // std::cout << "Creating " << filename << std::endl;
00944 
00945   file.open( filename.c_str(), std::ios::out|std::ios::binary );
00946   if( !file.good() )
00947   {
00948     std::cerr << "Can't create " << filename << std::endl;
00949     result = Storage::OpenFailed;
00950     return;
00951   }
00952 
00953   // so far so good
00954   opened = true;
00955   result = Storage::Ok;
00956 }
00957 
00958 void StorageIO::flush()
00959 {
00960   /* Note on Microsoft implementation:
00961      - directory entries are stored in the last block(s)
00962      - BATs are as second to the last
00963      - Meta BATs are third to the last
00964   */
00965 }
00966 
00967 void StorageIO::close()
00968 {
00969   if( !opened ) return;
00970 
00971   file.close();
00972   opened = false;
00973 
00974   std::list<Stream*>::iterator it;
00975   for( it = streams.begin(); it != streams.end(); ++it )
00976     delete *it;
00977 }
00978 
00979 StreamIO* StorageIO::streamIO( const std::string& name )
00980 {
00981   // sanity check
00982   if( !name.length() ) return (StreamIO*)0;
00983 
00984   // search in the entries
00985   DirEntry* entry = dirtree->entry( name );
00986   //if( entry) std::cout << "FOUND\n";
00987   if( !entry ) return (StreamIO*)0;
00988   //if( !entry->dir ) std::cout << "  NOT DIR\n";
00989   if( entry->dir ) return (StreamIO*)0;
00990 
00991   StreamIO* result = new StreamIO( this, entry );
00992   result->fullName = name;
00993 
00994   return result;
00995 }
00996 
00997 unsigned long StorageIO::loadBigBlocks( std::vector<unsigned long> blocks,
00998   unsigned char* data, unsigned long maxlen )
00999 {
01000   // sentinel
01001   if( !data ) return 0;
01002   if( !file.good() ) return 0;
01003   if( blocks.size() < 1 ) return 0;
01004   if( maxlen == 0 ) return 0;
01005 
01006   // read block one by one, seems fast enough
01007   unsigned long bytes = 0;
01008   for( unsigned long i=0; (i < blocks.size() ) && ( bytes<maxlen ); i++ )
01009   {
01010     unsigned long block = blocks[i];
01011     unsigned long pos =  bbat->blockSize * ( block+1 );
01012     unsigned long p = (bbat->blockSize < maxlen-bytes) ? bbat->blockSize : maxlen-bytes;
01013     if( pos + p > filesize ) p = filesize - pos;
01014     file.seekg( pos );
01015     file.read( (char*)data + bytes, p );
01016     bytes += p;
01017   }
01018 
01019   return bytes;
01020 }
01021 
01022 // this will avoid reading the same big block from the disk
01023 #define POLE_CACHE_BLOCK
01024 
01025 unsigned long StorageIO::loadBigBlock( unsigned long block,
01026   unsigned char* data, unsigned long maxlen )
01027 {
01028   // sentinel
01029   if( !data ) return 0;
01030   if( !file.good() ) return 0;
01031 
01032 #ifdef POLE_CACHE_BLOCK
01033   // just before, we also did read this block
01034   // so just reuse from cached data
01035   if( block == lastBlockIndex )
01036   if( lastBlockData) if( maxlen <= bbat->blockSize )
01037   {
01038     memcpy( data, lastBlockData, maxlen );
01039     return maxlen;
01040   }
01041 #endif
01042 
01043   // wraps call for loadBigBlocks
01044   std::vector<unsigned long> blocks;
01045   blocks.resize( 1 );
01046   blocks[0] = block;
01047 
01048   unsigned long result = loadBigBlocks( blocks, data, maxlen );
01049 
01050 #ifdef POLE_CACHE_BLOCK
01051   // save cached data for future use
01052   if(maxlen == bbat->blockSize )
01053   {
01054     if( !lastBlockData )
01055       lastBlockData = new unsigned char[bbat->blockSize];
01056     memcpy(lastBlockData, data, bbat->blockSize);
01057     lastBlockIndex = block;
01058   }
01059 #endif
01060 
01061   return result;
01062 }
01063 
01064 // return number of bytes which has been read
01065 unsigned long StorageIO::loadSmallBlocks( std::vector<unsigned long> blocks,
01066   unsigned char* data, unsigned long maxlen )
01067 {
01068   // sentinel
01069   if( !data ) return 0;
01070   if( !file.good() ) return 0;
01071   if( blocks.size() < 1 ) return 0;
01072   if( maxlen == 0 ) return 0;
01073 
01074   // our own local buffer
01075   unsigned char* buf = new unsigned char[ bbat->blockSize ];
01076 
01077   // read small block one by one
01078   unsigned long bytes = 0;
01079   for( unsigned long i=0; ( i<blocks.size() ) && ( bytes<maxlen ); i++ )
01080   {
01081     unsigned long block = blocks[i];
01082 
01083     // find where the small-block exactly is
01084     unsigned long pos = block * sbat->blockSize;
01085     unsigned long bbindex = pos / bbat->blockSize;
01086     if( bbindex >= sb_blocks.size() ) break;
01087 
01088     loadBigBlock( sb_blocks[ bbindex ], buf, bbat->blockSize );
01089 
01090     // copy the data
01091     unsigned offset = pos % bbat->blockSize;
01092     unsigned long p = (maxlen-bytes < bbat->blockSize-offset ) ? maxlen-bytes :  bbat->blockSize-offset;
01093     p = (sbat->blockSize<p ) ? sbat->blockSize : p;
01094     memcpy( data + bytes, buf + offset, p );
01095     bytes += p;
01096   }
01097 
01098   delete[] buf;
01099 
01100   return bytes;
01101 }
01102 
01103 unsigned long StorageIO::loadSmallBlock( unsigned long block,
01104   unsigned char* data, unsigned long maxlen )
01105 {
01106   // sentinel
01107   if( !data ) return 0;
01108   if( !file.good() ) return 0;
01109 
01110   // wraps call for loadSmallBlocks
01111   std::vector<unsigned long> blocks;
01112   blocks.resize( 1 );
01113   blocks.assign( 1, block );
01114 
01115   return loadSmallBlocks( blocks, data, maxlen );
01116 }
01117 
01118 // =========== StreamIO ==========
01119 
01120 StreamIO::StreamIO( StorageIO* s, DirEntry* e)
01121 {
01122   io = s;
01123   entry = e;
01124   eof = false;
01125   fail = false;
01126 
01127   m_pos = 0;
01128 
01129   if( entry->size >= io->header->threshold )
01130     blocks = io->bbat->follow( entry->start );
01131   else
01132     blocks = io->sbat->follow( entry->start );
01133 
01134   // prepare cache
01135   cache_pos = 0;
01136   cache_size = 4096; // optimal ?
01137   cache_data = new unsigned char[cache_size];
01138   updateCache();
01139 }
01140 
01141 // FIXME tell parent we're gone
01142 StreamIO::~StreamIO()
01143 {
01144   delete[] cache_data;
01145 }
01146 
01147 void StreamIO::seek( unsigned long pos )
01148 {
01149   m_pos = pos;
01150 }
01151 
01152 unsigned long StreamIO::tell()
01153 {
01154   return m_pos;
01155 }
01156 
01157 int StreamIO::getch()
01158 {
01159   // past end-of-file ?
01160   if( m_pos > entry->size ) return -1;
01161 
01162   // need to update cache ?
01163   if( !cache_size || ( m_pos < cache_pos ) ||
01164     ( m_pos >= cache_pos + cache_size ) )
01165       updateCache();
01166 
01167   // something bad if we don't get good cache
01168   if( !cache_size ) return -1;
01169 
01170   int data = cache_data[m_pos - cache_pos];
01171   m_pos++;
01172 
01173   return data;
01174 }
01175 
01176 unsigned long StreamIO::read( unsigned long pos, unsigned char* data, unsigned long maxlen )
01177 {
01178   // sanity checks
01179   if( !data ) return 0;
01180   if( maxlen == 0 ) return 0;
01181 
01182   unsigned long totalbytes = 0;
01183 
01184   if ( entry->size < io->header->threshold )
01185   {
01186     // small file
01187     unsigned long index = pos / io->sbat->blockSize;
01188 
01189     if( index >= blocks.size() ) return 0;
01190 
01191     unsigned char* buf = new unsigned char[ io->sbat->blockSize ];
01192     unsigned long offset = pos % io->sbat->blockSize;
01193     while( totalbytes < maxlen )
01194     {
01195       if( index >= blocks.size() ) break;
01196       io->loadSmallBlock( blocks[index], buf, io->bbat->blockSize );
01197       unsigned long count = io->sbat->blockSize - offset;
01198       if( count > maxlen-totalbytes ) count = maxlen-totalbytes;
01199       memcpy( data+totalbytes, buf + offset, count );
01200       totalbytes += count;
01201       offset = 0;
01202       index++;
01203     }
01204     delete[] buf;
01205 
01206   }
01207   else
01208   {
01209     // big file
01210     unsigned long index = pos / io->bbat->blockSize;
01211 
01212     // sanity check
01213     if( index >= blocks.size() ) return 0;
01214 
01215     unsigned char* buf = new unsigned char[ io->bbat->blockSize ];
01216     unsigned long offset = pos % io->bbat->blockSize;
01217     while( totalbytes < maxlen )
01218     {
01219       if( index >= blocks.size() ) break;
01220       io->loadBigBlock( blocks[index], buf, io->bbat->blockSize );
01221       unsigned long count = io->bbat->blockSize - offset;
01222       if( count > maxlen-totalbytes ) count = maxlen-totalbytes;
01223       memcpy( data+totalbytes, buf + offset, count );
01224       totalbytes += count;
01225       index++;
01226       offset = 0;
01227     }
01228     delete [] buf;
01229 
01230   }
01231 
01232   return totalbytes;
01233 }
01234 
01235 unsigned long StreamIO::read( unsigned char* data, unsigned long maxlen )
01236 {
01237   unsigned long bytes = read( tell(), data, maxlen );
01238   m_pos += bytes;
01239   return bytes;
01240 }
01241 
01242 void StreamIO::updateCache()
01243 {
01244   // sanity check
01245   if( !cache_data ) return;
01246 
01247   cache_pos = m_pos - ( m_pos % cache_size );
01248   unsigned long bytes = cache_size;
01249   if( cache_pos + bytes > entry->size ) bytes = entry->size - cache_pos;
01250   cache_size = read( cache_pos, cache_data, bytes );
01251 }
01252 
01253 
01254 // =========== Storage ==========
01255 
01256 Storage::Storage( const char* filename )
01257 {
01258   io = new StorageIO( this, filename );
01259 }
01260 
01261 Storage::~Storage()
01262 {
01263   delete io;
01264 }
01265 
01266 int Storage::result()
01267 {
01268   return io->result;
01269 }
01270 
01271 bool Storage::open()
01272 {
01273   return io->open();
01274 }
01275 
01276 void Storage::close()
01277 {
01278   io->close();
01279 }
01280 
01281 std::list<std::string> Storage::entries( const std::string& path )
01282 {
01283   std::list<std::string> result;
01284   DirTree* dt = io->dirtree;
01285   DirEntry* e = dt->entry( path, false );
01286   if( e  && e->dir )
01287   {
01288     unsigned parent = dt->indexOf( e );
01289     std::vector<unsigned> children = dt->children( parent );
01290     for( unsigned i = 0; i < children.size(); i++ )
01291       result.push_back( dt->entry( children[i] )->name );
01292   }
01293 
01294   return result;
01295 }
01296 
01297 bool Storage::isDirectory( const std::string& name )
01298 {
01299   DirEntry* e = io->dirtree->entry( name, false );
01300   return e ? e->dir : false;
01301 }
01302 
01303 // =========== Stream ==========
01304 
01305 Stream::Stream( Storage* storage, const std::string& name )
01306 {
01307   io = storage->io->streamIO( name );
01308 }
01309 
01310 // FIXME tell parent we're gone
01311 Stream::~Stream()
01312 {
01313   delete io;
01314 }
01315 
01316 std::string Stream::fullName()
01317 {
01318   return io ? io->fullName : std::string();
01319 }
01320 
01321 unsigned long Stream::tell()
01322 {
01323   return io ? io->tell() : 0;
01324 }
01325 
01326 void Stream::seek( unsigned long newpos )
01327 {
01328   if( io ) io->seek( newpos );
01329 }
01330 
01331 unsigned long Stream::size()
01332 {
01333   return io ? io->entry->size : 0;
01334 }
01335 
01336 int Stream::getch()
01337 {
01338   return io ? io->getch() : 0;
01339 }
01340 
01341 unsigned long Stream::read( unsigned char* data, unsigned long maxlen )
01342 {
01343   return io ? io->read( data, maxlen ) : 0;
01344 }
01345 
01346 bool Stream::eof()
01347 {
01348   return io ? io->eof : false;
01349 }
01350 
01351 bool Stream::fail()
01352 {
01353   return io ? io->fail : true;
01354 }
KDE Home | KDE Accessibility Home | Description of Access Keys