Drizzled Public API Documentation

logging_query.cc

00001 /* -*- mode: c++; c-basic-offset: 2; indent-tabs-mode: nil; -*-
00002  *  vim:expandtab:shiftwidth=2:tabstop=2:smarttab:
00003  *
00004  *  Copyright (C) 2008, 2009 Sun Microsystems, Inc.
00005  *
00006  *  This program is free software; you can redistribute it and/or modify
00007  *  it under the terms of the GNU General Public License as published by
00008  *  the Free Software Foundation; version 2 of the License.
00009  *
00010  *  This program is distributed in the hope that it will be useful,
00011  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00012  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00013  *  GNU General Public License for more details.
00014  *
00015  *  You should have received a copy of the GNU General Public License
00016  *  along with this program; if not, write to the Free Software
00017  *  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
00018  */
00019 
00020 #include <config.h>
00021 #include <drizzled/plugin/logging.h>
00022 #include <drizzled/gettext.h>
00023 #include <drizzled/session.h>
00024 #include <drizzled/sql_parse.h>
00025 #include PCRE_HEADER
00026 #include <limits.h>
00027 #include <sys/time.h>
00028 #include <sys/types.h>
00029 #include <sys/stat.h>
00030 #include <fcntl.h>
00031 #include <string>
00032 #include <boost/format.hpp>
00033 #include <boost/program_options.hpp>
00034 #include <drizzled/module/option_map.h>
00035 #include <cstdio>
00036 #include <cerrno>
00037 
00038 namespace po= boost::program_options;
00039 using namespace drizzled;
00040 using namespace std;
00041 
00042 #define ESCAPE_CHAR      '\\'
00043 #define SEPARATOR_CHAR   ','
00044 
00045 namespace drizzle_plugin
00046 {
00047 
00048 static bool sysvar_logging_query_enable= false;
00049 /* TODO fix these to not be unsigned long once we have sensible sys_var system */
00050 static uint32_constraint sysvar_logging_query_threshold_slow;
00051 static uint32_constraint sysvar_logging_query_threshold_big_resultset;
00052 static uint32_constraint sysvar_logging_query_threshold_big_examined;
00053 
00054 /* quote a string to be safe to include in a CSV line
00055    that means backslash quoting all commas, doublequotes, backslashes,
00056    and all the ASCII unprintable characters
00057    as long as we pass the high-bit bytes unchanged
00058    this is safe to do to a UTF8 string
00059    we dont allow overrunning the targetbuffer
00060    to avoid having a very long query overwrite memory
00061 
00062    TODO consider remapping the unprintables instead to "Printable
00063    Representation", the Unicode characters from the area U+2400 to
00064    U+2421 reserved for representing control characters when it is
00065    necessary to print or display them rather than have them perform
00066    their intended function.
00067 
00068 */
00069 
00070 static void quotify(const string &src, string &dst)
00071 {
00072   static const char hexit[]= { '0', '1', '2', '3', '4', '5', '6', '7',
00073         '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' };
00074   string::const_iterator src_iter;
00075 
00076   for (src_iter= src.begin(); src_iter < src.end(); ++src_iter)
00077   {
00078     if (static_cast<unsigned char>(*src_iter) > 0x7f)
00079     {
00080       dst.push_back(*src_iter);
00081     }
00082     else if (*src_iter == 0x00)  // null
00083     {
00084       dst.push_back(ESCAPE_CHAR); dst.push_back('0');
00085     }
00086     else if (*src_iter == 0x07)  // bell
00087     {
00088       dst.push_back(ESCAPE_CHAR); dst.push_back('a');
00089     }
00090     else if (*src_iter == 0x08)  // backspace
00091     {
00092       dst.push_back(ESCAPE_CHAR); dst.push_back('b');
00093     }
00094     else if (*src_iter == 0x09)  // horiz tab
00095     {
00096       dst.push_back(ESCAPE_CHAR); dst.push_back('t');
00097     }
00098     else if (*src_iter == 0x0a)  // line feed
00099     {
00100       dst.push_back(ESCAPE_CHAR); dst.push_back('n');
00101     }
00102     else if (*src_iter == 0x0b)  // vert tab
00103     {
00104       dst.push_back(ESCAPE_CHAR); dst.push_back('v');
00105     }
00106     else if (*src_iter == 0x0c)  // formfeed
00107     {
00108       dst.push_back(ESCAPE_CHAR); dst.push_back('f');
00109     }
00110     else if (*src_iter == 0x0d)  // carrage return
00111     {
00112       dst.push_back(ESCAPE_CHAR); dst.push_back('r');
00113     }
00114     else if (*src_iter == 0x1b)  // escape
00115     {
00116       dst.push_back(ESCAPE_CHAR); dst.push_back('e');
00117     }
00118     else if (*src_iter == 0x22)  // quotation mark
00119     {
00120       dst.push_back(ESCAPE_CHAR); dst.push_back(0x22);
00121     }
00122     else if (*src_iter == SEPARATOR_CHAR)
00123     {
00124       dst.push_back(ESCAPE_CHAR); dst.push_back(SEPARATOR_CHAR);
00125     }
00126     else if (*src_iter == ESCAPE_CHAR)
00127     {
00128       dst.push_back(ESCAPE_CHAR); dst.push_back(ESCAPE_CHAR);
00129     }
00130     else if ((*src_iter < 0x20) || (*src_iter == 0x7F))  // other unprintable ASCII
00131     {
00132       dst.push_back(ESCAPE_CHAR);
00133       dst.push_back('x');
00134       dst.push_back(hexit[(*src_iter >> 4) & 0x0f]);
00135       dst.push_back(hexit[*src_iter & 0x0f]);
00136     }
00137     else  // everything else
00138     {
00139       dst.push_back(*src_iter);
00140     }
00141   }
00142 }
00143 
00144 
00145 class Logging_query: public drizzled::plugin::Logging
00146 {
00147   const std::string _filename;
00148   const std::string _query_pcre;
00149   int fd;
00150   pcre *re;
00151   pcre_extra *pe;
00152 
00154   boost::format formatter;
00155 
00156 public:
00157 
00158   Logging_query(const std::string &filename,
00159                 const std::string &query_pcre) :
00160     drizzled::plugin::Logging("Logging_query"),
00161     _filename(filename),
00162     _query_pcre(query_pcre),
00163     fd(-1), re(NULL), pe(NULL),
00164     formatter("%1%,%2%,%3%,\"%4%\",\"%5%\",\"%6%\",%7%,%8%,"
00165               "%9%,%10%,%11%,%12%,%13%,%14%,\"%15%\"\n")
00166   {
00167 
00168     /* if there is no destination filename, dont bother doing anything */
00169     if (_filename.empty())
00170       return;
00171 
00172     fd= open(_filename.c_str(),
00173              O_WRONLY | O_APPEND | O_CREAT,
00174              S_IRUSR|S_IWUSR);
00175 
00176     if (fd < 0)
00177     {
00178       sql_perror( _("fail open()"), _filename);
00179       return;
00180     }
00181 
00182     if (not _query_pcre.empty())
00183     {
00184       const char *this_pcre_error;
00185       int this_pcre_erroffset;
00186       re= pcre_compile(_query_pcre.c_str(), 0, &this_pcre_error,
00187                        &this_pcre_erroffset, NULL);
00188       pe= pcre_study(re, 0, &this_pcre_error);
00189       /* TODO emit error messages if there is a problem */
00190     }
00191   }
00192 
00193   ~Logging_query()
00194   {
00195     if (fd >= 0)
00196     {
00197       close(fd);
00198     }
00199 
00200     if (pe != NULL)
00201     {
00202       pcre_free(pe);
00203     }
00204 
00205     if (re != NULL)
00206     {
00207       pcre_free(re);
00208     }
00209   }
00210 
00211   virtual bool post (Session *session)
00212   {
00213     size_t wrv;
00214 
00215     assert(session != NULL);
00216 
00217     if (fd < 0)
00218       return false;
00219 
00220     /* Yes, we know that checking sysvar_logging_query_enable,
00221        sysvar_logging_query_threshold_big_resultset, and
00222        sysvar_logging_query_threshold_big_examined is not threadsafe,
00223        because some other thread might change these sysvars.  But we
00224        don't care.  We might start logging a little late as it spreads
00225        to other threads.  Big deal. */
00226 
00227     // return if not enabled or query was too fast or resultset was too small
00228     if (sysvar_logging_query_enable == false)
00229       return false;
00230     if (session->sent_row_count < sysvar_logging_query_threshold_big_resultset.get())
00231       return false;
00232     if (session->examined_row_count < sysvar_logging_query_threshold_big_examined.get())
00233       return false;
00234 
00235     /*
00236       TODO, the session object should have a "utime command completed"
00237       inside itself, so be more accurate, and so this doesnt have to
00238       keep calling current_utime, which can be slow.
00239     */
00240     uint64_t t_mark= session->getCurrentTimestamp(false);
00241 
00242     if (session->getElapsedTime() < (sysvar_logging_query_threshold_slow.get()))
00243       return false;
00244 
00245     Session::QueryString query_string(session->getQueryString());
00246     if (query_string == NULL)
00247     {
00248       return false;
00249     }
00250 
00251     if (re)
00252     {
00253       int this_pcre_rc;
00254       this_pcre_rc= pcre_exec(re, pe, query_string->c_str(), query_string->length(), 0, 0, NULL, 0);
00255       if (this_pcre_rc < 0)
00256         return false;
00257     }
00258 
00259     // buffer to quotify the query
00260     string qs;
00261 
00262     // Since quotify() builds the quoted string incrementally, we can
00263     // avoid some reallocating if we reserve some space up front.
00264     qs.reserve(query_string->length());
00265 
00266     quotify(*query_string, qs);
00267 
00268     // to avoid trying to printf %s something that is potentially NULL
00269     util::string::const_shared_ptr schema(session->schema());
00270     const char *dbs= (schema and not schema->empty()) ? schema->c_str() : "";
00271 
00272     formatter % t_mark
00273               % session->thread_id
00274               % session->getQueryId()
00275               % dbs
00276               % qs
00277               % getCommandName(session->command)
00278               % (t_mark - session->getConnectMicroseconds())
00279               % session->getElapsedTime()
00280               % (t_mark - session->utime_after_lock)
00281               % session->sent_row_count
00282               % session->examined_row_count
00283               % session->tmp_table
00284               % session->total_warn_count
00285               % session->getServerId()
00286               % getServerHostname();
00287 
00288     string msgbuf= formatter.str();
00289 
00290     // a single write has a kernel thread lock, thus no need mutex guard this
00291     wrv= write(fd, msgbuf.c_str(), msgbuf.length());
00292     assert(wrv == msgbuf.length());
00293 
00294     return false;
00295   }
00296 };
00297 
00298 static int logging_query_plugin_init(drizzled::module::Context &context)
00299 {
00300 
00301   const module::option_map &vm= context.getOptions();
00302 
00303   if (vm.count("filename") > 0)
00304   {
00305     context.add(new Logging_query(vm["filename"].as<string>(),
00306                                   vm["pcre"].as<string>()));
00307     context.registerVariable(new sys_var_bool_ptr("enable", &sysvar_logging_query_enable));
00308     context.registerVariable(new sys_var_const_string_val("filename", vm["filename"].as<string>()));
00309     context.registerVariable(new sys_var_const_string_val("pcre", vm["pcre"].as<string>()));
00310     context.registerVariable(new sys_var_constrained_value<uint32_t>("threshold_slow", sysvar_logging_query_threshold_slow));
00311     context.registerVariable(new sys_var_constrained_value<uint32_t>("threshold_big_resultset", sysvar_logging_query_threshold_big_resultset));
00312     context.registerVariable(new sys_var_constrained_value<uint32_t>("threshold_big_examined", sysvar_logging_query_threshold_big_examined));
00313   }
00314 
00315   return 0;
00316 }
00317 
00318 static void init_options(drizzled::module::option_context &context)
00319 {
00320   context("enable",
00321           po::value<bool>(&sysvar_logging_query_enable)->default_value(false)->zero_tokens(),
00322           _("Enable logging to CSV file"));
00323   context("filename",
00324           po::value<string>(),
00325           _("File to log to"));
00326   context("pcre",
00327           po::value<string>()->default_value(""),
00328           _("PCRE to match the query against"));
00329   context("threshold-slow",
00330           po::value<uint32_constraint>(&sysvar_logging_query_threshold_slow)->default_value(0),
00331           _("Threshold for logging slow queries, in microseconds"));
00332   context("threshold-big-resultset",
00333           po::value<uint32_constraint>(&sysvar_logging_query_threshold_big_resultset)->default_value(0),
00334           _("Threshold for logging big queries, for rows returned"));
00335   context("threshold-big-examined",
00336           po::value<uint32_constraint>(&sysvar_logging_query_threshold_big_examined)->default_value(0),
00337           _("Threshold for logging big queries, for rows examined"));
00338 }
00339 
00340 } /* namespace drizzle_plugin */
00341 
00342 DRIZZLE_DECLARE_PLUGIN
00343 {
00344   DRIZZLE_VERSION_ID,
00345   "logging-query",
00346   "0.2",
00347   "Mark Atwood <mark@fallenpegasus.com>",
00348   N_("Log queries to a CSV file"),
00349   PLUGIN_LICENSE_GPL,
00350   drizzle_plugin::logging_query_plugin_init,
00351   NULL,
00352   drizzle_plugin::init_options
00353 }
00354 DRIZZLE_DECLARE_PLUGIN_END;