From 475c83618a043c88e7414b839228c28452658166 Mon Sep 17 00:00:00 2001
From: Přemysl Janouch
Date: Sun, 13 Jul 2014 04:30:23 +0200
Subject: Only compile regex's once
---
src/common.c | 106 +++++++++++++++++++++++++++++++++++++---------------------
src/kike.c | 45 +++++++++++++++++--------
src/zyklonb.c | 45 +++++++++++++++----------
3 files changed, 126 insertions(+), 70 deletions(-)
diff --git a/src/common.c b/src/common.c
index 283535f..fcb9742 100644
--- a/src/common.c
+++ b/src/common.c
@@ -1417,45 +1417,6 @@ set_boolean_if_valid (bool *out, const char *s)
return true;
}
-static void
-regerror_to_str (int code, const regex_t *preg, struct str *out)
-{
- size_t required = regerror (code, preg, NULL, 0);
- str_ensure_space (out, required);
- out->len += regerror (code, preg,
- out->str + out->len, out->alloc - out->len) - 1;
-}
-
-static size_t regex_error_domain_tag;
-#define REGEX_ERROR (error_resolve_domain (®ex_error_domain_tag))
-
-enum
-{
- REGEX_ERROR_COMPILATION_FAILED
-};
-
-static bool
-regex_match (const char *regex, const char *s, struct error **e)
-{
- regex_t re;
- int err = regcomp (&re, regex, REG_EXTENDED | REG_NOSUB);
- if (err)
- {
- struct str desc;
-
- str_init (&desc);
- regerror_to_str (err, &re, &desc);
- error_set (e, REGEX_ERROR, REGEX_ERROR_COMPILATION_FAILED,
- "failed to compile regular expression: %s", desc.str);
- str_free (&desc);
- return false;
- }
-
- bool result = regexec (&re, s, 0, NULL, 0) != REG_NOMATCH;
- regfree (&re);
- return result;
-}
-
static bool
read_line (FILE *fp, struct str *s)
{
@@ -1512,6 +1473,73 @@ xssl_get_error (SSL *ssl, int result, const char **error_info)
}
}
+// --- Regular expressions -----------------------------------------------------
+
+static size_t regex_error_domain_tag;
+#define REGEX_ERROR (error_resolve_domain (®ex_error_domain_tag))
+
+enum
+{
+ REGEX_ERROR_COMPILATION_FAILED
+};
+
+static regex_t *
+regex_compile (const char *regex, int flags, struct error **e)
+{
+ regex_t *re = xmalloc (sizeof *re);
+ int err = regcomp (re, regex, flags);
+ if (!err)
+ return re;
+
+ struct str desc;
+ str_init (&desc);
+
+ size_t required = regerror (err, re, NULL, 0);
+ str_ensure_space (&desc, required);
+ desc.len += regerror (err, re,
+ desc.str + desc.len, desc.alloc - desc.len) - 1;
+
+ free (re);
+ error_set (e, REGEX_ERROR, REGEX_ERROR_COMPILATION_FAILED,
+ "%s: %s", "failed to compile regular expression", desc.str);
+ str_free (&desc);
+ return NULL;
+}
+
+static void
+regex_free (void *regex)
+{
+ regfree (regex);
+ free (regex);
+}
+
+// The cost of hashing a string is likely to be significantly smaller than that
+// of compiling the whole regular expression anew, so here is a simple cache.
+// Adding basic support for subgroups is easy: check `re_nsub' and output into
+// a `struct str_vector' (if all we want is the substrings).
+
+static void
+regex_cache_init (struct str_map *cache)
+{
+ str_map_init (cache);
+ cache->free = regex_free;
+}
+
+static bool
+regex_cache_match (struct str_map *cache, const char *regex, int flags,
+ const char *s, struct error **e)
+{
+ regex_t *re = str_map_find (cache, regex);
+ if (!re)
+ {
+ re = regex_compile (regex, flags, e);
+ if (!re)
+ return false;
+ str_map_set (cache, regex, re);
+ }
+ return regexec (re, s, 0, NULL, 0) != REG_NOMATCH;
+}
+
// --- IRC utilities -----------------------------------------------------------
struct irc_message
diff --git a/src/kike.c b/src/kike.c
index d5a777c..0ce7092 100644
--- a/src/kike.c
+++ b/src/kike.c
@@ -103,13 +103,24 @@ enum validation_result
#define IRC_NICKNAME_MAX 9
#define IRC_HOSTNAME_MAX 63
-// Anything to keep it as short as possible
-#define SN "[0-9A-Za-z][-0-9A-Za-z]*[0-9A-Za-z]*"
-#define N4 "[0-9]{1,3}"
-#define N6 "[0-9ABCDEFabcdef]{1,}"
+static bool
+irc_regex_match (const char *regex, const char *s)
+{
+ static struct str_map cache;
+ static bool initialized;
-#define LE "A-Za-z"
-#define SP "\\[\\]\\\\`_^{|}"
+ if (!initialized)
+ {
+ regex_cache_init (&cache);
+ initialized = true;
+ }
+
+ struct error *e = NULL;
+ bool result = regex_cache_match (&cache, regex,
+ REG_EXTENDED | REG_NOSUB, s, &e);
+ hard_assert (!e);
+ return result;
+}
static const char *
irc_validate_to_str (enum validation_result result)
@@ -124,14 +135,20 @@ irc_validate_to_str (enum validation_result result)
}
}
-// TODO: at least cache the resulting `regex_t' in a `struct str_map'
+// Anything to keep it as short as possible
+#define SN "[0-9A-Za-z][-0-9A-Za-z]*[0-9A-Za-z]*"
+#define N4 "[0-9]{1,3}"
+#define N6 "[0-9ABCDEFabcdef]{1,}"
+
+#define LE "A-Za-z"
+#define SP "\\[\\]\\\\`_^{|}"
static enum validation_result
irc_validate_hostname (const char *hostname)
{
if (!*hostname)
return VALIDATION_ERROR_EMPTY;
- if (!regex_match ("^" SN "(\\." SN ")*$", hostname, NULL))
+ if (!irc_regex_match ("^" SN "(\\." SN ")*$", hostname))
return VALIDATION_ERROR_INVALID;
if (strlen (hostname) > IRC_HOSTNAME_MAX)
return VALIDATION_ERROR_TOO_LONG;
@@ -141,11 +158,11 @@ irc_validate_hostname (const char *hostname)
static bool
irc_is_valid_hostaddr (const char *hostaddr)
{
- if (regex_match ("^" N4 "\\." N4 "\\." N4 "\\." N4 "$", hostaddr, NULL)
- || regex_match ("^" N6 ":" N6 ":" N6 ":" N6 ":"
- N6 ":" N6 ":" N6 ":" N6 "$", hostaddr, NULL)
- || regex_match ("^0:0:0:0:0:(0|[Ff]{4}):"
- N4 "\\." N4 "\\." N4 "\\." N4 "$", hostaddr, NULL))
+ if (irc_regex_match ("^" N4 "\\." N4 "\\." N4 "\\." N4 "$", hostaddr)
+ || irc_regex_match ("^" N6 ":" N6 ":" N6 ":" N6 ":"
+ N6 ":" N6 ":" N6 ":" N6 "$", hostaddr)
+ || irc_regex_match ("^0:0:0:0:0:(0|[Ff]{4}):"
+ N4 "\\." N4 "\\." N4 "\\." N4 "$", hostaddr))
return true;
return false;
}
@@ -162,7 +179,7 @@ irc_validate_nickname (const char *nickname)
{
if (!*nickname)
return VALIDATION_ERROR_EMPTY;
- if (!regex_match ("^[" LE SP "][-0-9" LE SP "]*$", nickname, NULL))
+ if (!irc_regex_match ("^[" LE SP "][-0-9" LE SP "]*$", nickname))
return VALIDATION_ERROR_INVALID;
if (strlen (nickname) > IRC_NICKNAME_MAX)
return VALIDATION_ERROR_TOO_LONG;
diff --git a/src/zyklonb.c b/src/zyklonb.c
index 69c41c1..13e7f3b 100644
--- a/src/zyklonb.c
+++ b/src/zyklonb.c
@@ -118,6 +118,7 @@ enum
struct bot_context
{
struct str_map config; ///< User configuration
+ regex_t *admin_re; ///< Regex to match our administrator
int irc_fd; ///< Socket FD of the server
struct str read_buffer; ///< Input yet to be processed
@@ -140,6 +141,7 @@ bot_context_init (struct bot_context *self)
str_map_init (&self->config);
self->config.free = free;
load_config_defaults (&self->config, g_config_table);
+ self->admin_re = NULL;
self->irc_fd = -1;
str_init (&self->read_buffer);
@@ -160,6 +162,8 @@ static void
bot_context_free (struct bot_context *self)
{
str_map_free (&self->config);
+ if (self->admin_re)
+ regex_free (self->admin_re);
str_free (&self->read_buffer);
// TODO: terminate the plugins properly before this is called
@@ -1110,25 +1114,10 @@ is_private_message (const struct irc_message *msg)
static bool
is_sent_by_admin (struct bot_context *ctx, const struct irc_message *msg)
{
- const char *admin = str_map_find (&ctx->config, "admin");
-
// No administrator set -> everyone is an administrator
- if (!admin)
- return true;
-
- // TODO: precompile the regex
- struct error *e = NULL;
- if (regex_match (admin, msg->prefix, NULL))
- return true;
-
- if (e)
- {
- print_error ("%s: %s", "invalid admin mask", e->message);
- error_free (e);
+ if (!ctx->admin_re)
return true;
- }
-
- return false;
+ return regexec (ctx->admin_re, msg->prefix, 0, NULL, 0) != REG_NOMATCH;
}
static void respond_to_user (struct bot_context *ctx, const struct
@@ -1591,6 +1580,26 @@ irc_connect (struct bot_context *ctx, struct error **e)
return true;
}
+static bool
+load_admin_regex (struct bot_context *ctx)
+{
+ hard_assert (!ctx->admin_re);
+ const char *admin = str_map_find (&ctx->config, "admin");
+
+ if (!admin)
+ return true;
+
+ struct error *e = NULL;
+ ctx->admin_re = regex_compile (admin, REG_EXTENDED | REG_NOSUB, &e);
+ if (!e)
+ return true;
+
+ print_error ("invalid configuration value for `%s': %s",
+ "admin", e->message);
+ error_free (e);
+ return false;
+}
+
static void
on_signal_pipe_readable (const struct pollfd *fd, struct bot_context *ctx)
{
@@ -1758,6 +1767,8 @@ main (int argc, char *argv[])
(poller_dispatcher_func) on_signal_pipe_readable, &ctx);
plugin_load_all_from_config (&ctx);
+ if (!load_admin_regex (&ctx))
+ exit (EXIT_FAILURE);
if (!irc_connect (&ctx, &e))
{
print_error ("%s", e->message);
--
cgit v1.2.3-70-g09d2