This patch improves the speed of table scans involving the 'LIKE' keyword. In my informal tests, the stock 1.0.9 server took about 4.1s to do a full scan of a 25000 row table using the LIKE operator, and the patched server took about 0.6s to do the same scan. It's unfortunate that this patch touches so many files, but if the 7x speedup is worth it for your application, give it a try... NOTE that this will break any query of the form: SELECT ... FROM table_a, table_b, ... WHERE table_a.attr LIKE table_b.attr ie, any query in which the right hand side of the LIKE operator is used as the pattern. That's because this patch achieves its speedup by compiling the pattern only once. It will generate a type error for this now. I can't verify that that construct is ANSI SQL, so I made no effort to retain that functionality. If you really want to compile the pattern for every line, there are still a couple of small optimizations to the SQL pattern to regexp converter, and a newline added to some trace output. diff -cr src/msql/acl.c targets/FreeBSD/msql/acl.c *** src/msql/acl.c Mon Oct 2 17:34:51 1995 --- targets/FreeBSD/msql/acl.c Sun Nov 12 04:54:40 1995 *************** *** 28,33 **** --- 28,34 ---- #include + #include #include "msql_priv.h" diff -cr src/msql/msql.h targets/FreeBSD/msql/msql.h *** src/msql/msql.h Sun Oct 15 20:04:42 1995 --- targets/FreeBSD/msql/msql.h Sun Nov 12 04:40:39 1995 *************** *** 71,76 **** --- 71,77 ---- #define REAL_TYPE 3 #define IDENT_TYPE 4 #define NULL_TYPE 5 + #define REGEXP_TYPE 6 #define NOT_NULL_FLAG 1 #define PRI_KEY_FLAG 2 diff -cr src/msql/msql_lex.c targets/FreeBSD/msql/msql_lex.c *** src/msql/msql_lex.c Mon Oct 2 17:34:51 1995 --- targets/FreeBSD/msql/msql_lex.c Sun Nov 12 04:56:26 1995 *************** *** 41,46 **** --- 41,47 ---- #include + #include #include "msql_priv.h" #include "y.tab.h" *************** *** 659,665 **** } ! #ifdef DEBUG main() { --- 660,666 ---- } ! #ifdef LEX_DEBUG main() { diff -cr src/msql/msql_priv.h targets/FreeBSD/msql/msql_priv.h *** src/msql/msql_priv.h Mon Oct 16 18:06:55 1995 --- targets/FreeBSD/msql/msql_priv.h Sun Nov 12 04:39:53 1995 *************** *** 63,68 **** --- 63,69 ---- u_char *charVal; double realVal; ident_t *identVal; + regexp *regexpVal; } val; int type, nullVal, diff -cr src/msql/msql_proc.c targets/FreeBSD/msql/msql_proc.c *** src/msql/msql_proc.c Mon Oct 2 17:34:51 1995 --- targets/FreeBSD/msql/msql_proc.c Sun Nov 12 05:13:39 1995 *************** *** 35,40 **** --- 35,42 ---- #include #include + #include + #define MSQL_ADT #include "msql_priv.h" *************** *** 360,365 **** --- 362,371 ---- if (!val->nullVal) (void)free(val->val.charVal); break; + case REGEXP_TYPE: + if (val->val.regexpVal) + (void)free(val->val.regexpVal); + break; } (void)free(val); msqlTrace(TRACE_OUT,"freeValue()"); *************** *** 531,536 **** --- 537,621 ---- } + static regexp *SQLpattern2regexp(pat) + char *pat; + { + char regbuf[1024]; + REG char *cp1, *cp2; + regexp *reg; + + /* + ** Map an SQL regexp into a UNIX regexp + */ + cp1 = pat; + cp2 = regbuf; + /* (void)bzero(regbuf,sizeof(regbuf)); * a crutch for bad code below */ + /* + ** unqualified strings in SQL patterns must match the whole + ** string, so usually we wrap ^$ around the regexp to get this + ** behavior. if the SQL pattern starts with %, let it float + ** by not including ^ rather than mapping to ^.* + */ + if ('%' == *cp1) { + ++cp1; + } else { + *cp2++ = '^'; + } + while(*cp1) + { + switch(*cp1) + { + case '\\': + cp1++; + if (*cp1) + { + *cp2++ = *cp1++; + } + break; + + case '_': + *cp2++ = '.'; + cp1++; + break; + + case '%': + cp1++; + if (!*cp1) { + /* + ** a % at the end of the string can + ** be optimized to "nothing" rather + ** than .*$, see ^.* info above, and + ** note cleanup below + */ + break; + } + *cp2++ = '.'; + *cp2++ = '*'; + break; + + case '.': + case '*': + case '+': + *cp2++ = '\\'; + *cp2++ = *cp1++; + break; + + default: + *cp2++ = *cp1++; + break; + } + } + /* + ** optimize trailing %, as above + */ + if (cp1 == pat || '%' != cp1[-1]) { + *cp2++ = '$'; + } + *cp2 = 0; + + reg = regcomp(regbuf); + return reg; + } /**************************************************************************** *************** *** 577,582 **** --- 662,677 ---- new->bool = bool; new->value = value; + /* + ** do conversion of the CHAR buf to a REGEXP here if we know this + ** is a LIKE or NOT LIKE cond. If it's not a CHAR type, we should + ** error out, but we can't (?) so just let the bad type fall through + ** to the "runtime" typecheck. + */ + if ((NOT_LIKE_OP == op || LIKE_OP == op) && CHAR_TYPE == value->type) { + value->type = REGEXP_TYPE; + value->val.regexpVal = SQLpattern2regexp(value->val.charVal); + } if (!condHead) { condHead = condTail = new; diff -cr src/msql/msqld.c targets/FreeBSD/msql/msqld.c *** src/msql/msqld.c Sun Oct 15 17:44:09 1995 --- targets/FreeBSD/msql/msqld.c Sun Nov 12 04:55:49 1995 *************** *** 31,37 **** #include "version.h" ! #include "common/portability.h" #ifdef HAVE_SYS_UN_H # include --- 31,38 ---- #include "version.h" ! #include ! #include #ifdef HAVE_SYS_UN_H # include *** src/msql/msqldb.c Sun Oct 15 17:44:09 1995 --- targets/FreeBSD/msql/msqldb.c Sun Nov 12 04:55:49 1995 *************** *** 3080,3143 **** ! static int regexpTest(str,re,maxLen) ! char *str, ! *re; int maxLen; { ! char regbuf[1024], ! hold; ! REG char *cp1, *cp2; ! regexp *reg; int res; /* - ** Map an SQL regexp into a UNIX regexp - */ - cp1 = re; - cp2 = regbuf; - (void)bzero(regbuf,sizeof(regbuf)); - *cp2++ = '^'; - while(*cp1 && maxLen) - { - switch(*cp1) - { - case '\\': - if (*(cp1+1)) - { - cp1++; - *cp2 = *cp1; - } - cp1++; - cp2++; - break; - - case '_': - *cp2++ = '.'; - cp1++; - break; - - case '%': - *cp2++ = '.'; - *cp2++ = '*'; - cp1++; - break; - - case '.': - case '*': - case '+': - *cp2++ = '\\'; - *cp2++ = *cp1++; - break; - - default: - *cp2++ = *cp1++; - break; - } - } - *cp2 = '$'; - - /* ** Do the regexp thang. We do an ugly hack here : The data of ** a field may be exactly the same length as the field itself. ** Seeing as the regexp routines work on null rerminated strings --- 3083,3097 ---- ! static int regexpTest(str,reg,maxLen) ! char *str; ! regexp *reg; int maxLen; { ! char hold; int res; /* ** Do the regexp thang. We do an ugly hack here : The data of ** a field may be exactly the same length as the field itself. ** Seeing as the regexp routines work on null rerminated strings *************** *** 3148,3157 **** regErrFlag = 0; hold = *(str + maxLen); *(str + maxLen) = 0; - reg = regcomp(regbuf); res = regexec(reg,str); *(str + maxLen) = hold; - safeFree(reg); if (regErrFlag) { strcpy(errMsg, BAD_LIKE_ERROR); --- 3102,3109 ---- *************** *** 3221,3228 **** */ static int charMatch(v1,v2,op,maxLen) ! char *v1, ! *v2; int op, maxLen; { --- 3173,3180 ---- */ static int charMatch(v1,v2,op,maxLen) ! char *v1; ! val_t *v2; int op, maxLen; { *************** *** 3231,3240 **** REG char *c1,*c2; REG int offset; ! if (op != LIKE_OP) { c1 = v1; ! c2 = v2; offset=0; cmp = 0; while(offset < maxLen) --- 3183,3192 ---- REG char *c1,*c2; REG int offset; ! if (op != LIKE_OP && op != NOT_LIKE_OP) { c1 = v1; ! c2 = v2->val.charVal; offset=0; cmp = 0; while(offset < maxLen) *************** *** 3275,3285 **** break; case LIKE_OP: ! result = regexpTest(v1,v2,maxLen); break; case NOT_LIKE_OP: ! result = !(regexpTest(v1,v2,maxLen)); break; } return(result); --- 3227,3237 ---- break; case LIKE_OP: ! result = regexpTest(v1, v2->val.regexpVal, maxLen); break; case NOT_LIKE_OP: ! result = !(regexpTest(v1, v2->val.regexpVal, maxLen)); break; } return(result); *************** *** 3461,3466 **** --- 3413,3419 ---- } break; + case REGEXP_TYPE: case INT_TYPE: case REAL_TYPE: case CHAR_TYPE: *************** *** 3486,3495 **** value->val.realVal = value->val.intVal; value->type = REAL_TYPE; } ! if (curCond->type != value->type) { sprintf(errMsg,BAD_TYPE_ERROR, curCond->name); ! msqlDebug(MOD_ERR,"Bad type for comparison of '%s'", curCond->name); return(-1); } --- 3439,3460 ---- value->val.realVal = value->val.intVal; value->type = REAL_TYPE; } ! if (LIKE_OP == curCond->op || NOT_LIKE_OP == curCond->op) ! { ! if (REGEXP_TYPE != curCond->value->type || ! CHAR_TYPE != curCond->type) ! { ! sprintf(errMsg,BAD_TYPE_ERROR, curCond->name); ! msqlDebug(MOD_ERR,"Bad type for comparison of '%s'\n", ! curCond->name); ! return(-1); ! } ! ! } ! else if (curCond->type != value->type) { sprintf(errMsg,BAD_TYPE_ERROR, curCond->name); ! msqlDebug(MOD_ERR,"Bad type for comparison of '%s'\n", curCond->name); return(-1); } *************** *** 3542,3548 **** break; } cp = (char *)row + *offset +1; ! tmp = charMatch(cp,value->val.charVal, curCond->op, curCond->length); if (value == &tmpVal) { --- 3507,3513 ---- break; } cp = (char *)row + *offset +1; ! tmp = charMatch(cp,value, curCond->op, curCond->length); if (value == &tmpVal) { diff -cr src/msql/relshow.c targets/FreeBSD/msql/relshow.c *** src/msql/relshow.c Mon Oct 2 17:34:51 1995 --- targets/FreeBSD/msql/relshow.c Sun Nov 12 04:55:08 1995 *************** *** 27,32 **** --- 27,33 ---- #include #include + #include #include "msql_priv.h" #include "msql.h" *** src/msql/net.c Mon Oct 16 18:24:03 1995 --- targets/FreeBSD/msql/net.c Sun Nov 12 06:47:43 1995 *************** *** 26,31 **** --- 26,32 ---- #include #include + #include #include "msql_priv.h" *** src/msql/libmsql.c Sun Oct 15 20:06:12 1995 --- targets/FreeBSD/msql/libmsql.c Sun Nov 12 06:46:27 1995 *************** *** 32,37 **** --- 32,38 ---- #include #include "common/portability.h" + #include #ifdef HAVE_SYS_UN_H # include *** src/msql/msql_yacc.y Mon Oct 2 16:11:27 1995 --- targets/FreeBSD/msql/msql_yacc.y Sun Nov 12 06:48:57 1995 *************** *** 29,37 **** #include #include #include "msql_priv.h" #include "msql.h" - #include "common/portability.h" int yylineno; extern int selectWildcard, --- 29,38 ---- #include #include + #include "common/portability.h" + #include #include "msql_priv.h" #include "msql.h" int yylineno; extern int selectWildcard,