Drizzled Public API Documentation

temporal_format.cc

Go to the documentation of this file.
00001 /* - mode: c; c-basic-offset: 2; indent-tabs-mode: nil; -*-
00002  *  vim:expandtab:shiftwidth=2:tabstop=2:smarttab:
00003  *
00004  *  Copyright (C) 2008 Sun Microsystems, Inc.
00005  *
00006  *  Authors:
00007  *
00008  *  Jay Pipes <jay.pipes@sun.com>
00009  *
00010  *  This program is free software; you can redistribute it and/or modify
00011  *  it under the terms of the GNU General Public License as published by
00012  *  the Free Software Foundation; either version 2 of the License, or
00013  *  (at your option) any later version.
00014  *
00015  *  This program is distributed in the hope that it will be useful,
00016  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00017  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00018  *  GNU General Public License for more details.
00019  *
00020  *  You should have received a copy of the GNU General Public License
00021  *  along with this program; if not, write to the Free Software
00022  *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
00023  */
00024 
00031 #include <config.h>
00032 
00033 #include <drizzled/temporal_format.h>
00034 #include <drizzled/temporal.h>
00035 
00036 #include <string.h>
00037 #include PCRE_HEADER
00038 
00039 #include <string>
00040 #include <vector>
00041 
00042 using namespace std;
00043 
00044 namespace drizzled
00045 {
00046 
00047 TemporalFormat::TemporalFormat(const char *pattern) :
00048   _pattern(pattern)
00049 , _error_offset(0)
00050 , _error(NULL)
00051 , _year_part_index(0)
00052 , _month_part_index(0)
00053 , _day_part_index(0)
00054 , _hour_part_index(0)
00055 , _minute_part_index(0)
00056 , _second_part_index(0)
00057 , _usecond_part_index(0)
00058 , _nsecond_part_index(0)
00059 {
00060   /* Compile our regular expression */
00061   _re= pcre_compile(pattern
00062                     , 0 /* Default options */
00063                     , &_error
00064                     , &_error_offset
00065                     , NULL /* Use default character table */
00066                     );
00067 }
00068 
00069 bool TemporalFormat::matches(const char *data, size_t data_len, Temporal *to)
00070 {
00071   if (! is_valid()) 
00072     return false;
00073 
00074   int32_t match_vector[OUT_VECTOR_SIZE]; 
00076   /* Make sure we've got no junk in the match_vector. */
00077   memset(match_vector, 0, sizeof(match_vector));
00078 
00079   /* Simply check the subject against the compiled regular expression */
00080   int32_t result= pcre_exec(_re
00081                             , NULL /* No extra data */
00082                             , data
00083                             , data_len
00084                             , 0 /* Start at offset 0 of subject...*/
00085                             , 0 /* Default options */
00086                             , match_vector
00087                             , OUT_VECTOR_SIZE
00088                             );
00089   if (result < 0)
00090   {
00091     switch (result)
00092     {
00093       case PCRE_ERROR_NOMATCH:
00094         return false; /* No match, just return false */
00095       default:
00096         return false;
00097     }
00098     return false;
00099   }
00100 
00101   int32_t expected_match_count= (_year_part_index > 1 ? 1 : 0)
00102                               + (_month_part_index > 1 ? 1 : 0)
00103                               + (_day_part_index > 1 ? 1 : 0)
00104                               + (_hour_part_index > 1 ? 1 : 0)
00105                               + (_minute_part_index > 1 ? 1 : 0)
00106                               + (_second_part_index > 1 ? 1 : 0)
00107                               + (_usecond_part_index > 1 ? 1 : 0)
00108                               + (_nsecond_part_index > 1 ? 1 : 0)
00109                               + 1; /* Add one for the entire match... */
00110   if (result != expected_match_count)
00111     return false;
00112 
00113   /* C++ string class easy to use substr() method is very useful here */
00114   string copy_data(data, data_len);
00115   /* 
00116    * OK, we have the expected substring matches, so grab
00117    * the various temporal parts from the subject string
00118    *
00119    * @note 
00120    *
00121    * TemporalFormatMatch is a friend class to Temporal, so
00122    * we can access the temporal instance's protected data.
00123    */
00124   if (_year_part_index > 1)
00125   {
00126     size_t year_start= match_vector[_year_part_index];
00127     size_t year_len= match_vector[_year_part_index + 1] - match_vector[_year_part_index];
00128     to->_years= atoi(copy_data.substr(year_start, year_len).c_str());
00129     if (year_len == 2)
00130       to->_years+= (to->_years >= DRIZZLE_YY_PART_YEAR ? 1900 : 2000);
00131   }
00132   if (_month_part_index > 1)
00133   {
00134     size_t month_start= match_vector[_month_part_index];
00135     size_t month_len= match_vector[_month_part_index + 1] - match_vector[_month_part_index];
00136     to->_months= atoi(copy_data.substr(month_start, month_len).c_str());
00137   }
00138   if (_day_part_index > 1)
00139   {
00140     size_t day_start= match_vector[_day_part_index];
00141     size_t day_len= match_vector[_day_part_index + 1] - match_vector[_day_part_index];
00142     to->_days= atoi(copy_data.substr(day_start, day_len).c_str());
00143   }
00144   if (_hour_part_index > 1)
00145   {
00146     size_t hour_start= match_vector[_hour_part_index];
00147     size_t hour_len= match_vector[_hour_part_index + 1] - match_vector[_hour_part_index];
00148     to->_hours= atoi(copy_data.substr(hour_start, hour_len).c_str());
00149   }
00150   if (_minute_part_index > 1)
00151   {
00152     size_t minute_start= match_vector[_minute_part_index];
00153     size_t minute_len= match_vector[_minute_part_index + 1] - match_vector[_minute_part_index];
00154     to->_minutes= atoi(copy_data.substr(minute_start, minute_len).c_str());
00155   }
00156   if (_second_part_index > 1)
00157   {
00158     size_t second_start= match_vector[_second_part_index];
00159     size_t second_len= match_vector[_second_part_index + 1] - match_vector[_second_part_index];
00160     to->_seconds= atoi(copy_data.substr(second_start, second_len).c_str());
00161   }
00162   if (_usecond_part_index > 1)
00163   {
00164     size_t usecond_start= match_vector[_usecond_part_index];
00165     size_t usecond_len= match_vector[_usecond_part_index + 1] - match_vector[_usecond_part_index];
00166     /* 
00167      * For microseconds, which are millionth of 1 second, 
00168      * we must ensure that we produce a correct result, 
00169      * even if < 6 places were specified.  For instance, if we get .1, 
00170      * we must produce 100000. .11 should produce 110000, etc.
00171      */
00172     uint32_t multiplier= 1;
00173     int32_t x= usecond_len;
00174     while (x < 6)
00175     {
00176       multiplier*= 10;
00177       ++x;
00178     }
00179     to->_useconds= atoi(copy_data.substr(usecond_start, usecond_len).c_str()) * multiplier;
00180   }
00181   if (_nsecond_part_index > 1)
00182   {
00183     size_t nsecond_start= match_vector[_nsecond_part_index];
00184     size_t nsecond_len= match_vector[_nsecond_part_index + 1] - match_vector[_nsecond_part_index];
00185     /* 
00186      * For nanoseconds, which are 1 billionth of a second, 
00187      * we must ensure that we produce a correct result, 
00188      * even if < 9 places were specified.  For instance, if we get .1, 
00189      * we must produce 100000000. .11 should produce 110000000, etc.
00190      */
00191     uint32_t multiplier= 1;
00192     int32_t x= nsecond_len;
00193     while (x < 9)
00194     {
00195       multiplier*= 10;
00196       ++x;
00197     }
00198     to->_nseconds= atoi(copy_data.substr(nsecond_start, nsecond_len).c_str()) * multiplier;
00199   }
00200   return true;
00201 }
00202 
00203 
00204 #define COUNT_KNOWN_FORMATS 19
00205 
00206 struct temporal_format_args
00207 {
00208   const char *pattern;
00209   int32_t year_part_index;
00210   int32_t month_part_index;
00211   int32_t day_part_index;
00212   int32_t hour_part_index;
00213   int32_t minute_part_index;
00214   int32_t second_part_index;
00215   int32_t usecond_part_index;
00216   int32_t nsecond_part_index;
00217 };
00218 
00231 static struct temporal_format_args __format_args[COUNT_KNOWN_FORMATS]= 
00232 {
00233   {"^(\\d{4})(\\d{2})(\\d{2})(\\d{2})(\\d{2})(\\d{2})\\.(\\d{1,6})$", 1, 2, 3, 4, 5, 6, 7, 0} /* YYYYMMDDHHmmSS.uuuuuu */
00234 , {"^(\\d{4})(\\d{2})(\\d{2})(\\d{2})(\\d{2})(\\d{2})$", 1, 2, 3, 4, 5, 6, 0, 0} /* YYYYMMDDHHmmSS */
00235 , {"^(\\d{4})[-/.](\\d{1,2})[-/.](\\d{1,2})[T|\\s+](\\d{2}):(\\d{2}):(\\d{2})\\.(\\d{1,6})$", 1, 2, 3, 4, 5, 6, 7, 0} /* YYYY[/-.]MM[/-.]DD[T]HH:mm:SS.uuuuuu */
00236 , {"^(\\d{4})[-/.](\\d{1,2})[-/.](\\d{1,2})[T|\\s+](\\d{2}):(\\d{2}):(\\d{2})$", 1, 2, 3, 4, 5, 6, 0, 0} /* YYYY[/-.][M]M[/-.][D]D[T]HH:mm:SS */
00237 , {"^(\\d{2})[-/.](\\d{1,2})[-/.](\\d{1,2})[\\s+](\\d{2}):(\\d{2}):(\\d{2})$", 1, 2, 3, 4, 5, 6, 0, 0} /* YY[/-.][M]M[/-.][D]D HH:mm:SS */
00238 , {"^(\\d{2})[-/.](\\d{1,2})[-/.](\\d{1,2})[\\s+](\\d{2}):(\\d{2})$", 1, 2, 3, 4, 5, 0, 0, 0} /* YY[/-.][M]M[/-.][D]D HH:mm */
00239 , {"^(\\d{4})[-/.](\\d{1,2})[-/.](\\d{1,2})[\\s+](\\d{2}):(\\d{2})$", 1, 2, 3, 4, 5, 0, 0, 0} /* YYYY[/-.][M]M[/-.][D]D HH:mm */
00240 , {"^(\\d{4})[-/.](\\d{1,2})[-/.](\\d{1,2})$", 1, 2, 3, 0, 0, 0, 0, 0} /* YYYY-[M]M-[D]D, YYYY.[M]M.[D]D, YYYY/[M]M/[D]D */ 
00241 , {"^(\\d{4})(\\d{2})(\\d{2})$", 1, 2, 3, 0, 0, 0, 0, 0} /* YYYYMMDD */
00242 , {"^(\\d{2})[-/.]*(\\d{2})[-/.]*(\\d{4})$", 3, 1, 2, 0, 0, 0, 0, 0} /* MM[-/.]DD[-/.]YYYY (US common format)*/
00243 , {"^(\\d{2})[-/.]*(\\d{2})[-/.]*(\\d{2})$", 1, 2, 3, 0, 0, 0, 0, 0} /* YY[-/.]MM[-/.]DD */
00244 , {"^(\\d{2})[-/.]*(\\d{1,2})[-/.]*(\\d{1,2})$", 1, 2, 3, 0, 0, 0, 0, 0} /* YY[-/.][M]M[-/.][D]D */
00245 , {"^(\\d{4})[-/.]*(\\d{1,2})[-/.]*(\\d{1,2})$", 1, 2, 3, 0, 0, 0, 0, 0} /* YYYY[-/.][M]M[-/.][D]D */
00246 , {"^(\\d{2}):*(\\d{2}):*(\\d{2})\\.(\\d{1,6})$", 0, 0, 0, 1, 2, 3, 4, 0} /* HHmmSS.uuuuuu, HH:mm:SS.uuuuuu */
00247 , {"^(\\d{1,2}):*(\\d{2}):*(\\d{2})$", 0, 0, 0, 1, 2, 3, 0, 0} /* [H]HmmSS, [H]H:mm:SS */
00248 , {"^(\\d{1,2}):(\\d{1,2}):(\\d{1,2})$", 0, 0, 0, 1, 2, 3, 0, 0} /* [H]H:[m]m:[S]S */
00249 , {"^(\\d{1,2}):*(\\d{2})$", 0, 0, 0, 0, 1, 2, 0, 0} /* [m]mSS, [m]m:SS */
00250 , {"^(\\d{1,2})$", 0, 0, 0, 0, 0, 1, 0, 0} /* SS, S */
00251 , {"^(\\d{1,2})\\.(\\d{1,6})$", 0, 0, 0, 0, 0, 1, 2, 0} /* [S]S.uuuuuu */
00252 };
00253 
00254 vector<TemporalFormat *> known_datetime_formats;
00255 vector<TemporalFormat *> known_date_formats;
00256 vector<TemporalFormat *> known_time_formats;
00257 vector<TemporalFormat *> all_temporal_formats;
00258 
00264 bool init_temporal_formats()
00265 {
00266   /* Compile all the regular expressions for the datetime formats */
00267   TemporalFormat *tmp;
00268   struct temporal_format_args current_format_args;
00269   
00270   for (int32_t x= 0; x < COUNT_KNOWN_FORMATS; ++x)
00271   {
00272     current_format_args= __format_args[x];
00273     tmp= new TemporalFormat(current_format_args.pattern);
00274     tmp->set_year_part_index(current_format_args.year_part_index);
00275     tmp->set_month_part_index(current_format_args.month_part_index);
00276     tmp->set_day_part_index(current_format_args.day_part_index);
00277     tmp->set_hour_part_index(current_format_args.hour_part_index);
00278     tmp->set_minute_part_index(current_format_args.minute_part_index);
00279     tmp->set_second_part_index(current_format_args.second_part_index);
00280     tmp->set_usecond_part_index(current_format_args.usecond_part_index);
00281     tmp->set_nsecond_part_index(current_format_args.nsecond_part_index);
00282     
00283     /* 
00284      * We store the pointer in all_temporal_formats because we 
00285      * delete pointers from that vector and only that vector
00286      */
00287     all_temporal_formats.push_back(tmp); 
00288 
00289     if (current_format_args.year_part_index > 0) /* A date must have a year */
00290     {
00291       known_datetime_formats.push_back(tmp);
00292       if (current_format_args.second_part_index == 0) /* A time must have seconds. */
00293         known_date_formats.push_back(tmp);
00294     }
00295 
00296     if (current_format_args.second_part_index > 0) /* A time must have seconds, but may not have minutes or hours */
00297       known_time_formats.push_back(tmp);
00298   }
00299   return true;
00300 }
00301 
00303 void deinit_temporal_formats()
00304 {
00305   vector<TemporalFormat *>::iterator p= all_temporal_formats.begin();
00306   while (p != all_temporal_formats.end())
00307   {
00308     delete *p;
00309     ++p;
00310   }
00311   known_date_formats.clear();
00312   known_datetime_formats.clear();
00313   known_time_formats.clear();
00314   all_temporal_formats.clear();
00315 }
00316 
00317 } /* end namespace drizzled */