|  | Posted by Jochem Maas on 08/26/05 14:37 
Robin Vickery wrote:> This is another rather hackish attempt at using the tokeniser.
 >
 
 you beast ;-)
 apart from the fact that I don't use mysql this is really cool.
 I have no time to play right now (gotta catcha plana) but I have
 it on my to do list of things I need to take apart and
 understand properly.
 
 thanks!
 
 > It turns this:
 >
 > <?php
 > $emu = "my_column {$banana}";
 > $wallaby = 'my_table';
 > $kookaburra = 'SELECT * FROM';
 > $kookaburra .= $wallaby;
 > $koala = 'ASC';
 > $taipan =  ' ORDER BY' . $emu;
 > $dropBear = 'group by something';
 >
 > mysql_query("$kookaburra WHERE (up = 'down') $taipan $koala " .
 > strtoupper($dropBear));
 > ?>
 >
 > into this:
 >
 > SELECT * FROM my_table WHERE (up = 'down') ORDER BY my_column $banana
 > ASC strtoupper( group by something)
 >
 > Which isn't perfect by a long shot, but I'm away home now so it'll have to do.
 >
 > I'd be interested to know what it makes of your queries.
 >
 >  -robin
 >
 > <?php
 >
 > //looks at all $dir/*.php files.
 > $dir = '/path/to/php/files';
 >
 > foreach (getPhpFileList($dir) as $file) {
 >   print "===== $file =====\n";
 >   $token = tokeniseFile($file);
 >
 >   // first find all calls to mysql_query()
 >   $mysqlCalls = getMysqlQueryCalls($token);
 >   foreach($mysqlCalls as $range) {
 >     $sql = resolveExpression($token, $range[0], $range[0], $range[1]);
 >     echo trim(preg_replace('/\s+/', ' ', $sql)), "\n"; // tidy it a little
 >   }
 > }
 >
 >
 > function getMysqlQueryCalls($token) {
 >   $callList = array();
 >   for ($i=0; isset($token[$i]); $i++) {
 >     if (!is_array($token[$i])) continue; // not interested in atomic tokens
 >     if (strtolower($token[$i][1]) !== 'mysql_query') continue;
 >     $args = getMysqlQueryArguments($i, $token);
 >     if ($args !== false) $call[] = $args;
 >   }
 >   return $call;
 > }
 >
 > function nameTokens($token, $start = 0, $end = null)
 > {
 >   if (is_null($end)) $end = sizeof($token);
 >   $range = array_slice($token, $start, $end - $start);
 >   foreach ($range as $key => $tok) {
 >     if (is_array($tok)) $range[$key][0] = token_name($range[$key][0]);
 >   }
 >   return $range;
 > }
 >
 > function resolveToken($token, $cursor, $i) {
 >   if(is_array($token[$i])) {
 >     switch ($token[$i][0]) {
 >     case T_DOLLAR_OPEN_CURLY_BRACES:
 >     case T_CURLY_OPEN:
 >       break;
 >     case T_STRING:
 >     case T_WHITESPACE:
 >     case T_ENCAPSED_AND_WHITESPACE:
 >       return $token[$i][1];
 >     case T_CONSTANT_ENCAPSED_STRING:
 >       return eval( "return {$token[$i][1]};");
 >     case T_VARIABLE:
 >     case T_STRING_VARNAME:
 >       $def = findLastDefinition($token, $cursor, $i);
 >       if ($def === false) {
 > 	// can't find anything else to replace $var with
 > 	// presume it's defined elsewhere or we're not clever
 > 	// enough to find it.
 > 	return preg_replace('/^\$*/', '$', $token[$i][1]);
 >       }
 >       if ($def[3] == '.=') return
 > 	resolveToken($token, $def[0], $i)
 > 	. resolveExpression($token, $def[0], $def[1], $def[2]);
 >
 >       return resolveExpression($token, $def[0], $def[1], $def[2]);
 >     default:
 >       return '('.token_name($token[$i][0]) . ':' . $token[$i][1] . ')';
 >     }
 >   } else {
 >     switch ($token[$i]) {
 >     case '(':
 >     case ')':
 >       return $token[$i];
 >     default:
 >       return '';
 >     }
 >   }
 > }
 >
 > function ResolveExpression($token, $cursor, $start, $end) {
 >   $output = '';
 >   // just try and resolve all the tokens in the expression, concat
 > them and throw them back.
 >   for ( $i=$start; $i<$end; $i++) {
 >     $output .= resolveToken($token, $cursor, $i);
 >   }
 >   return $output;
 > }
 >
 > function findLastDefinition($token, $i, $id) {
 >   // make sure all variables are in the form $name as ${name} ones are
 >   // just 'name' by the time they end up here.
 >   $name = preg_replace('/^\$*/', '$', $token[$id][1]);
 >
 >   // rewind until we hit an assignment or run out of tokens
 >   while (isset($token[--$i])) {
 >     // if we catch an assignment and our $name is to the left then
 > capture the right.
 >     if (($token[$i] == '=' || (is_array($token[$i]) && $token[$i][0]
 > == T_CONCAT_EQUAL)) && getLHS($token, $i) == $name) {
 >       $RHS = getRHS($token, $i);
 >       $RHS[] = is_array($token[$i]) ? $token[$i][1] : $token[$i];
 >       return $RHS;
 >     }
 >   }
 >   // we've run out of tokens, so seems like we can't find where this
 > variable was defined.
 >   return false;
 > }
 >
 >
 > function getLHS($token, $i)
 > {
 >   // rewind until we hit an variable name or run out of tokens
 >   while (isset($token[--$i])) {
 >     if (is_array($token[$i]) && $token[$i][0] == T_VARIABLE) return
 > $token[$i][1];
 >   }
 >   // run out of tokens, we can't get a left hand side.
 >   return false;
 > }
 >
 > function getRHS($token, $i)
 > {
 >   // save the cursor at the assignment operator, so if $name is referred to on
 >   // the RHS, and we have to look for it again, we don't look at this bit.
 >   $cursor = $i;
 >
 >   // fast forward until we get to a ';' or run out of tokens.
 >   while (isset($token[++$i]) && $token[$i] != ';') {
 >     if (!isset($start)) $start = $i;
 >   }
 >   // if we've run out of tokens or the RHS is empty then give up.
 >   if (!isset($start) || !isset($token[$i])) return false;
 >   return array($cursor, $start, $i);
 > }
 >
 > // just stick all the tokens together to see what's going on.
 > function renderTokens($token, $start, $end)
 > {
 >   $output = '';
 >   for ( $i=$start; $i<$end; $i++) {
 >     $output .= is_array($token[$i]) ? $token[$i][1] : $token[$i];
 >   }
 >   return $output;
 > }
 >
 >
 > function getMysqlQueryArguments($i, $token) {
 >   /* only allowed whitespace before brackets */
 >   while (isset($token[++$i])) {
 >     if ($token[$i] === '(') break;
 >     if (!is_array($token[$i])) return false;
 >     if ($token[$i][0] !== T_WHITESPACE) return false;
 >   }
 >
 >   // if we're here, we've found the '('
 >   // now find the matching ')'
 >   $start = $i;
 >   $braceCount = 1;
 >   while (isset($token[++$i]) && $braceCount) {
 >     if ($token[$i] === '(') $braceCount++;
 >     if ($token[$i] === ')') $braceCount--;
 >
 >   }
 >   // if we're left with any unmatched braces, something's wrong.
 >   if ($braceCount != 0) return false;
 >   // otherwise return the start and end positions of the parameters.
 >   return array($start+1,$i-1);
 > }
 >
 > function getPhpFileList($dir)
 > {
 >   if (!is_dir($dir)) $dir = '.';
 >   return glob($dir . '/*.php');
 > }
 >
 > function tokeniseFile($file)
 > {
 >   return token_get_all(file_get_contents($file));
 > }
 >
 > function findMysqlQueries($tokens)
 > {
 >   return array_filter($tokens, 'isMysqlQuery');
 > }
 >
 > function isMysqlQuery($token) {
 >   if (!is_array($token)) return 0;
 >   return ($token[0] == T_FUNCTION) || (strtolower($token[1]) == 'mysql_query');
 > }
 > ?>
 >
 [Back to original message] |