filters

ailexer.cc

00001 /* This file is part of the KDE project
00002    Copyright (C) 2002, Dirk Schönberger <dirk.schoenberger@sz-online.de>
00003 
00004    This library is free software; you can redistribute it and/or
00005    modify it under the terms of the GNU Library General Public
00006    License as published by the Free Software Foundation; either
00007    version 2 of the License, or (at your option) any later version.
00008 
00009    This library is distributed in the hope that it will be useful,
00010    but WITHOUT ANY WARRANTY; without even the implied warranty of
00011    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00012    Library General Public License for more details.
00013 
00014    You should have received a copy of the GNU Library General Public License
00015    along with this library; see the file COPYING.LIB.  If not, write to
00016    the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00017  * Boston, MA 02110-1301, USA.
00018 */
00019 
00020 #include <ctype.h>
00021 #include <stdlib.h>
00022 #include <qstringlist.h>
00023 #include "ailexer.h"
00024 
00025 #define CATEGORY_WHITESPACE -1
00026 #define CATEGORY_ALPHA -2
00027 #define CATEGORY_DIGIT -3
00028 #define CATEGORY_SPECIAL -4
00029 #define CATEGORY_LETTERHEX -5
00030 #define CATEGORY_INTTOOLONG -6
00031 
00032 #define CATEGORY_ANY -127
00033 
00034 #define MAX_INTLEN 9
00035 #define MIN_HEXCHARS 6
00036 
00037 #define STOP 0
00038 
00039 int iswhitespace(char c){
00040   return (c==' ')||(c=='\n')||(c=='\t')||(c=='\r');
00041 }
00042 
00043 int isSpecial(char c){
00044   return (c=='*')||(c=='_')||(c=='?')||(c=='~')||(c=='-')||(c=='^')||(c=='`')||(c=='!')||(c=='.')||(c=='@')||(c=='&')||(c=='$')||(c=='=');
00045 }
00046 
00047 int isletterhex(char c){
00048   return (c=='A')||(c=='B')||(c=='C')||(c=='D')||(c=='E')||(c=='F');
00049 }
00050 
00051 const char*statetoa (State state){
00052   switch (state)
00053   {
00054     case State_Comment : return "comment";
00055     case State_Integer : return "integer";
00056     case State_Float : return "float";
00057     case State_String : return "string";
00058     case State_Token : return "token";
00059     case State_Reference : return "reference";
00060     case State_Start : return "start";
00061     case State_BlockStart : return "block start";
00062     case State_BlockEnd : return "block end";
00063     case State_ArrayStart : return "array start";
00064     case State_ArrayEnd : return "array end";
00065     case State_Byte : return "byte";
00066     case State_ByteArray : return "byte array";
00067     case State_StringEncodedChar : return "encoded char (string)";
00068     case State_CommentEncodedChar : return "encoded char (comment)";
00069     case State_ByteArray2 : return "byte array (mode 2)";
00070     default : return "unknown";
00071   }
00072 }
00073 
00074 typedef struct {
00075   State oldState;
00076   char c;
00077   State newState;
00078   Action action;
00079 } Transition;
00080 
00081 static Transition transitions[] = {
00082   { State_Comment, '\n', State_Start, Action_Output},
00083   { State_Comment, '\r', State_Start, Action_Output},
00084   { State_Comment, '\\', State_CommentEncodedChar, Action_InitTemp},
00085   { State_Comment, CATEGORY_ANY, State_Comment, Action_Copy},
00086   { State_Integer, CATEGORY_DIGIT, State_Integer, Action_Copy},
00087   { State_Integer, CATEGORY_WHITESPACE, State_Start, Action_Output},
00088   { State_Integer, '.', State_Float, Action_Copy},
00089   { State_Integer, ']', State_Start, Action_OutputUnget},
00090   { State_Integer, '}', State_Start, Action_OutputUnget},
00091   { State_Integer, '#', State_Byte, Action_Copy },
00092   { State_Integer, '/', State_Start, Action_OutputUnget },
00093   { State_Integer, '{', State_Start, Action_OutputUnget },
00094   { State_Integer, '%', State_Start, Action_OutputUnget },
00095   { State_Integer, CATEGORY_LETTERHEX, State_ByteArray2, Action_Copy },
00096   { State_Integer, CATEGORY_INTTOOLONG, State_ByteArray2, Action_Copy },
00097   { State_Integer, CATEGORY_ANY, State_Start, Action_Abort},
00098   { State_Float, CATEGORY_DIGIT, State_Float, Action_Copy},
00099   { State_Float, CATEGORY_WHITESPACE, State_Start, Action_Output},
00100   { State_Float, ']', State_Start, Action_OutputUnget},
00101   { State_Float, '}', State_Start, Action_OutputUnget},
00102   { State_Float, CATEGORY_ANY, State_Start, Action_Abort},
00103   { State_Token, CATEGORY_ALPHA, State_Token, Action_Copy},
00104   { State_Token, CATEGORY_DIGIT, State_Token, Action_Copy},
00105   { State_Token, CATEGORY_SPECIAL, State_Token, Action_Copy},
00106   { State_Token, '}', State_Start, Action_OutputUnget},
00107   { State_Token, ']', State_Start, Action_OutputUnget},
00108   { State_Token, '{', State_BlockStart, Action_Output},
00109   { State_Token, '}', State_BlockEnd, Action_Output},
00110   { State_Token, '/', State_Start, Action_OutputUnget},
00111   { State_Token, CATEGORY_WHITESPACE, State_Start, Action_Output},
00112   { State_Token, CATEGORY_ANY, State_Start, Action_Abort},
00113   { State_String, ')', State_Start, Action_Output},
00114   { State_String, '\\', State_StringEncodedChar, Action_InitTemp},
00115   { State_String, CATEGORY_ANY, State_String, Action_Copy},
00116 //  { State_Array, CATEGORY_ALPHA, State_Array, Action_Copy},
00117 //  { State_Array, CATEGORY_DIGIT, State_Array, Action_Copy},
00118 //  { State_Array, ' ', State_Array, Action_Copy},
00119   { State_BlockStart, CATEGORY_ANY, State_Start, Action_OutputUnget },
00120   { State_BlockEnd, CATEGORY_ANY, State_Start, Action_OutputUnget },
00121   { State_ArrayStart, CATEGORY_ANY, State_Start, Action_OutputUnget },
00122   { State_ArrayEnd, CATEGORY_ANY, State_Start, Action_OutputUnget },
00123   { State_Reference, '#', State_Reference, Action_Copy },
00124   { State_Reference, CATEGORY_ALPHA, State_Reference, Action_Copy },
00125   { State_Reference, CATEGORY_DIGIT, State_Reference, Action_Copy },
00126   { State_Reference, CATEGORY_SPECIAL, State_Reference, Action_Copy },
00127   { State_Reference, CATEGORY_ANY, State_Start, Action_OutputUnget },
00128   { State_Byte, '/', State_Start, Action_OutputUnget },
00129   { State_Byte, CATEGORY_DIGIT, State_Byte, Action_Copy},
00130   { State_Byte, CATEGORY_ALPHA, State_Byte, Action_Copy},
00131   { State_Byte, CATEGORY_WHITESPACE, State_Start, Action_Output},
00132   { State_ByteArray, '>', State_Start, Action_Output },
00133   { State_ByteArray, CATEGORY_ALPHA, State_ByteArray, Action_Copy },
00134   { State_ByteArray, CATEGORY_DIGIT, State_ByteArray, Action_Copy },
00135   { State_ByteArray, CATEGORY_WHITESPACE, State_ByteArray, Action_Ignore },
00136   { State_ByteArray, CATEGORY_ANY, State_Start, Action_Abort },
00137   { State_StringEncodedChar, '\\', State_String, Action_Copy},
00138   { State_StringEncodedChar, CATEGORY_DIGIT, State_StringEncodedChar, Action_CopyTemp},
00139   { State_StringEncodedChar, CATEGORY_ANY, State_String, Action_DecodeUnget},
00140   { State_CommentEncodedChar, '\\', State_Comment, Action_Copy},
00141   { State_CommentEncodedChar, CATEGORY_DIGIT, State_CommentEncodedChar, Action_CopyTemp},
00142   { State_CommentEncodedChar, CATEGORY_ANY, State_Comment, Action_DecodeUnget},
00143   { State_ByteArray2, '\n', State_Start, Action_Output},
00144   { State_ByteArray2, '\r', State_Start, Action_Output},
00145   { State_ByteArray2, '}', State_Start, Action_ByteArraySpecial},
00146   { State_ByteArray2, CATEGORY_WHITESPACE, State_Start, Action_Output},
00147   { State_ByteArray2, CATEGORY_DIGIT, State_ByteArray2, Action_Copy},
00148   { State_ByteArray2, CATEGORY_LETTERHEX, State_ByteArray2, Action_Copy},
00149   { State_ByteArray2, CATEGORY_ALPHA, State_Token, Action_Copy},
00150   { State_ByteArray2, CATEGORY_ANY, State_Start, Action_Abort},
00151   { State_Start, '%', State_Comment, Action_Ignore},
00152   { State_Start, CATEGORY_DIGIT, State_Integer, Action_Copy},
00153   { State_Start, '-', State_Integer, Action_Copy},
00154   { State_Start, '+', State_Integer, Action_Copy},
00155   { State_Start, '.', State_Float, Action_Copy},
00156   { State_Start, '/', State_Reference, Action_Ignore },
00157   { State_Start, '(', State_String, Action_Ignore},
00158   { State_Start, '{', State_BlockStart, Action_Copy},
00159   { State_Start, '}', State_BlockEnd, Action_Copy},
00160   { State_Start, '[', State_ArrayStart, Action_Copy},
00161   { State_Start, ']', State_ArrayEnd, Action_Copy},
00162   { State_Start, '<', State_ByteArray, Action_Ignore},
00163   { State_Start, CATEGORY_ALPHA, State_Token, Action_Copy},
00164   { State_Start, CATEGORY_WHITESPACE, State_Start, Action_Output},
00165   { State_Start, CATEGORY_SPECIAL, State_Token, Action_Copy},
00166   { State_Start, CATEGORY_LETTERHEX, State_ByteArray2, Action_Copy},
00167   { State_Start, CATEGORY_ANY, State_Start, Action_Abort},
00168   { State_Start, STOP, State_Start, Action_Abort}
00169 };
00170 
00171 AILexer::AILexer(){
00172 }
00173 AILexer::~AILexer(){
00174 }
00175 
00176 bool AILexer::parse (QIODevice& fin){
00177   char c;
00178 
00179   m_buffer.clear();
00180   m_curState = State_Start;
00181 
00182   parsingStarted();
00183 
00184   while (!fin.atEnd())
00185   {
00186     c = fin.getch ();
00187 
00188 //    qDebug ("got %c", c);
00189 
00190     State newState;
00191     Action action;
00192 
00193     nextStep (c, &newState, &action);
00194 
00195     switch (action)
00196     {
00197       case Action_Copy :
00198         m_buffer.append (c);
00199         break;
00200       case Action_CopyOutput :
00201         m_buffer.append (c);
00202         doOutput();
00203         break;
00204       case Action_Output :
00205         doOutput();
00206         break;
00207       case Action_OutputUnget :
00208         doOutput();
00209         fin.ungetch(c);
00210         break;
00211       case Action_Ignore :
00212         /* ignore */
00213         break;
00214       case Action_Abort :
00215         qWarning ( "state %s / %s char %c (%d)" , statetoa(m_curState), statetoa(newState), c, c );
00216         parsingAborted();
00217         return false;
00218         break;
00219       case Action_InitTemp :
00220         m_temp.clear();
00221         break;
00222       case Action_CopyTemp :
00223         m_temp.append (c);
00224         break;
00225       case Action_DecodeUnget :
00226         m_buffer.append (decode());
00227         fin.ungetch(c);
00228         break;
00229       // in Postscript Quelltext: Kombination F}
00230       case Action_ByteArraySpecial :
00231         m_curState = State_Token;
00232         doOutput();
00233         fin.ungetch(c);
00234         break;
00235       default :
00236         qWarning ( "unknown action: %d ", action);
00237     }
00238 
00239     m_curState = newState;
00240   }
00241 
00242   parsingFinished();
00243   return true;
00244 }
00245 
00246 void AILexer::doOutput ()
00247 {
00248   if (m_buffer.length() == 0) return;
00249   switch (m_curState)
00250   {
00251     case State_Comment :
00252       gotComment (m_buffer.latin1());
00253       break;
00254     case State_Integer :
00255       gotIntValue (m_buffer.toInt());
00256       break;
00257     case State_Float :
00258       gotDoubleValue (m_buffer.toFloat());
00259       break;
00260     case State_String :
00261       gotStringValue (m_buffer.latin1());
00262       break;
00263     case State_Token :
00264       gotToken (m_buffer.latin1());
00265       break;
00266     case State_Reference :
00267       gotReference (m_buffer.latin1());
00268       break;
00269     case State_BlockStart :
00270       gotBlockStart ();
00271       break;
00272     case State_BlockEnd :
00273       gotBlockEnd ();
00274       break;
00275     case State_Start :
00276       break;
00277     case State_ArrayStart :
00278       gotArrayStart ();
00279       break;
00280     case State_ArrayEnd :
00281       gotArrayEnd ();
00282       break;
00283     case State_Byte :
00284       gotByte (getByte());
00285       break;
00286     case State_ByteArray :
00287     case State_ByteArray2 :
00288       doHandleByteArray ();
00289       break;
00290     default:
00291       qWarning ( "unknown state: %d", m_curState );
00292   }
00293 
00294   m_buffer.clear();
00295 }
00296 
00297 void AILexer::gotComment (const char *value) {
00298   qDebug ( "gotComment: %s ", value );
00299 }
00300 
00301 void AILexer::gotIntValue (int value) {
00302   qDebug ( "gotInt: %d ", value );
00303 }
00304 
00305 void AILexer::gotDoubleValue (double value) {
00306   qDebug ( "gotDouble: %f ", value );
00307 }
00308 
00309 void AILexer::gotStringValue (const char *value) {
00310   qDebug ( "gotString: %s ", value );
00311 }
00312 
00313 void AILexer::gotToken (const char *value) {
00314   qDebug ( "gotToken: %s ", value );
00315 }
00316 
00317 void AILexer::gotReference (const char *value) {
00318   qDebug ( "gotReference: %s ", value );
00319 }
00320 
00321 void AILexer::gotBlockStart (){
00322   qDebug ( "gotBlockStart" );
00323 }
00324 
00325 void AILexer::gotBlockEnd (){
00326   qDebug ( "gotBlockEnd" );
00327 }
00328 
00329 void AILexer::gotArrayStart (){
00330   qDebug ( "gotArrayStart" );
00331 }
00332 
00333 void AILexer::gotArrayEnd (){
00334   qDebug ( "gotArrayEnd" );
00335 }
00336 
00337 void AILexer::parsingStarted() {
00338   qDebug ( "parsing started" );
00339 }
00340 
00341 void AILexer::parsingFinished() {
00342   qDebug ( "parsing finished" );
00343 }
00344 
00345 void AILexer::parsingAborted() {
00346   qDebug ( "parsing aborted" );
00347 }
00348 
00349 void AILexer::gotByte (uchar value) {
00350   qDebug ( "got byte %d" , value );
00351 }
00352 
00353 void AILexer::gotByteArray (const QByteArray &data) {
00354   qDebug ( "got byte array" );
00355 /*  for ( uint i = 0; i < data.size(); i++ )
00356   {
00357     uchar value = data[i];
00358     qDebug( "%d: %x", i, value );
00359   }
00360   qDebug ( "/byte array" ); */
00361 
00362 }
00363 
00364 
00365 void AILexer::nextStep (char c, State *newState, Action *newAction) {
00366   int i=0;
00367 
00368   while (true) {
00369     Transition trans = transitions[i];
00370 
00371     if (trans.c == STOP) {
00372       *newState = trans.newState;
00373       *newAction = trans.action;
00374       return;
00375     }
00376 
00377     bool found = false;
00378 
00379     if (trans.oldState == m_curState) {
00380       switch (trans.c) {
00381         case CATEGORY_WHITESPACE : found = isspace(c); break;
00382         case CATEGORY_ALPHA : found = isalpha(c); break;
00383         case CATEGORY_DIGIT : found = isdigit(c); break;
00384         case CATEGORY_SPECIAL : found = isSpecial(c); break;
00385         case CATEGORY_LETTERHEX : found = isletterhex(c); break;
00386         case CATEGORY_INTTOOLONG : found = m_buffer.length() > MAX_INTLEN; break;
00387         case CATEGORY_ANY : found = true; break;
00388         default : found = (trans.c == c);
00389       }
00390 
00391       if (found) {
00392         *newState = trans.newState;
00393         *newAction = trans.action;
00394 
00395         return;
00396       }
00397     }
00398 
00399 
00400     i++;
00401   }
00402 }
00403 
00404 void AILexer::doHandleByteArray ()
00405 {
00406   // Special case - too short
00407   if (m_buffer.length () < MIN_HEXCHARS)
00408   {
00409     gotToken (m_buffer.latin1());
00410     return;
00411   }
00412 
00413   uint strIdx = 0;
00414   uint arrayIdx = 0;
00415 
00416   QByteArray data (m_buffer.length() >> 1);
00417 
00418   while (strIdx < m_buffer.length())
00419   {
00420     const QString &item = m_buffer.mid (strIdx, 2);
00421     uchar val = item.toShort(NULL, 16);
00422     data[arrayIdx] = val;
00423     strIdx += 2;
00424     arrayIdx++;
00425   }
00426 
00427   gotByteArray (data);
00428 }
00429 
00430 uchar AILexer::getByte()
00431 {
00432 //  qDebug ("convert string to byte (%s)", m_buffer.latin1());
00433 
00434   QStringList list = QStringList::split ("#", m_buffer.toString());
00435   int radix = list[0].toShort();
00436   uchar value = list[1].toShort (NULL, radix);
00437 
00438   return value;
00439 }
00440 
00441 uchar AILexer::decode()
00442 {
00443   uchar value = m_temp.toString().toShort(NULL, 8);
00444 //  qDebug ("got encoded char %c",value);
00445   return value;
00446 }
00447 
00448 /* StringBuffer implementation */
00449 
00450 int initialSize = 20;
00451 int addSize = 10;
00452 
00453 StringBuffer::StringBuffer () {
00454   m_buffer = (char*)calloc (initialSize, sizeof(char));
00455   m_length = 0;
00456   m_capacity = initialSize;
00457 }
00458 
00459 StringBuffer::~StringBuffer (){
00460   free(m_buffer);
00461 }
00462 
00463 void StringBuffer::append (char c){
00464   ensureCapacity(m_length + 1);
00465   m_buffer[m_length] = c;
00466   m_length++;
00467 }
00468 
00469 void StringBuffer::clear(){
00470   for (uint i=0; i<m_length; i++) m_buffer[i] = '\0';
00471   m_length = 0;
00472 }
00473 
00474 QString StringBuffer::toString() const {
00475   QString ret(m_buffer);
00476   return ret;
00477 }
00478 
00479 void StringBuffer::ensureCapacity (int p_capacity) {
00480   if (m_capacity >= p_capacity) return;
00481 
00482   int newSize = m_capacity + addSize;
00483   if (p_capacity > newSize) newSize = p_capacity;
00484 
00485   char* oldBuffer = m_buffer;
00486   char *newBuffer = (char*)calloc (newSize, sizeof(char));
00487   strcpy (newBuffer, m_buffer);
00488   free(oldBuffer);
00489   m_buffer = newBuffer;
00490   m_capacity = newSize;
00491 }
00492 
00493 uint StringBuffer::length() {
00494   return m_length;
00495 }
00496 
00497 double StringBuffer::toFloat() {
00498   QString data = toString();
00499   return data.toFloat();
00500 }
00501 
00502 int StringBuffer::toInt() {
00503   QString data = toString();
00504   return data.toInt();
00505 }
00506 
00507 const char *StringBuffer::latin1() {
00508   return m_buffer;
00509 }
00510 
00511 QString StringBuffer::mid( uint index, uint len) const {
00512   QString data = toString();
00513   return data.mid(index,len);
00514 }
KDE Home | KDE Accessibility Home | Description of Access Keys