|
Posted by Jochem Maas on 08/26/05 14:37
Robin Vickery wrote:
> This is another rather hackish attempt at using the tokeniser.
>
you beast ;-)
apart from the fact that I don't use mysql this is really cool.
I have no time to play right now (gotta catcha plana) but I have
it on my to do list of things I need to take apart and
understand properly.
thanks!
> It turns this:
>
> <?php
> $emu = "my_column {$banana}";
> $wallaby = 'my_table';
> $kookaburra = 'SELECT * FROM';
> $kookaburra .= $wallaby;
> $koala = 'ASC';
> $taipan = ' ORDER BY' . $emu;
> $dropBear = 'group by something';
>
> mysql_query("$kookaburra WHERE (up = 'down') $taipan $koala " .
> strtoupper($dropBear));
> ?>
>
> into this:
>
> SELECT * FROM my_table WHERE (up = 'down') ORDER BY my_column $banana
> ASC strtoupper( group by something)
>
> Which isn't perfect by a long shot, but I'm away home now so it'll have to do.
>
> I'd be interested to know what it makes of your queries.
>
> -robin
>
> <?php
>
> //looks at all $dir/*.php files.
> $dir = '/path/to/php/files';
>
> foreach (getPhpFileList($dir) as $file) {
> print "===== $file =====\n";
> $token = tokeniseFile($file);
>
> // first find all calls to mysql_query()
> $mysqlCalls = getMysqlQueryCalls($token);
> foreach($mysqlCalls as $range) {
> $sql = resolveExpression($token, $range[0], $range[0], $range[1]);
> echo trim(preg_replace('/\s+/', ' ', $sql)), "\n"; // tidy it a little
> }
> }
>
>
> function getMysqlQueryCalls($token) {
> $callList = array();
> for ($i=0; isset($token[$i]); $i++) {
> if (!is_array($token[$i])) continue; // not interested in atomic tokens
> if (strtolower($token[$i][1]) !== 'mysql_query') continue;
> $args = getMysqlQueryArguments($i, $token);
> if ($args !== false) $call[] = $args;
> }
> return $call;
> }
>
> function nameTokens($token, $start = 0, $end = null)
> {
> if (is_null($end)) $end = sizeof($token);
> $range = array_slice($token, $start, $end - $start);
> foreach ($range as $key => $tok) {
> if (is_array($tok)) $range[$key][0] = token_name($range[$key][0]);
> }
> return $range;
> }
>
> function resolveToken($token, $cursor, $i) {
> if(is_array($token[$i])) {
> switch ($token[$i][0]) {
> case T_DOLLAR_OPEN_CURLY_BRACES:
> case T_CURLY_OPEN:
> break;
> case T_STRING:
> case T_WHITESPACE:
> case T_ENCAPSED_AND_WHITESPACE:
> return $token[$i][1];
> case T_CONSTANT_ENCAPSED_STRING:
> return eval( "return {$token[$i][1]};");
> case T_VARIABLE:
> case T_STRING_VARNAME:
> $def = findLastDefinition($token, $cursor, $i);
> if ($def === false) {
> // can't find anything else to replace $var with
> // presume it's defined elsewhere or we're not clever
> // enough to find it.
> return preg_replace('/^\$*/', '$', $token[$i][1]);
> }
> if ($def[3] == '.=') return
> resolveToken($token, $def[0], $i)
> . resolveExpression($token, $def[0], $def[1], $def[2]);
>
> return resolveExpression($token, $def[0], $def[1], $def[2]);
> default:
> return '('.token_name($token[$i][0]) . ':' . $token[$i][1] . ')';
> }
> } else {
> switch ($token[$i]) {
> case '(':
> case ')':
> return $token[$i];
> default:
> return '';
> }
> }
> }
>
> function ResolveExpression($token, $cursor, $start, $end) {
> $output = '';
> // just try and resolve all the tokens in the expression, concat
> them and throw them back.
> for ( $i=$start; $i<$end; $i++) {
> $output .= resolveToken($token, $cursor, $i);
> }
> return $output;
> }
>
> function findLastDefinition($token, $i, $id) {
> // make sure all variables are in the form $name as ${name} ones are
> // just 'name' by the time they end up here.
> $name = preg_replace('/^\$*/', '$', $token[$id][1]);
>
> // rewind until we hit an assignment or run out of tokens
> while (isset($token[--$i])) {
> // if we catch an assignment and our $name is to the left then
> capture the right.
> if (($token[$i] == '=' || (is_array($token[$i]) && $token[$i][0]
> == T_CONCAT_EQUAL)) && getLHS($token, $i) == $name) {
> $RHS = getRHS($token, $i);
> $RHS[] = is_array($token[$i]) ? $token[$i][1] : $token[$i];
> return $RHS;
> }
> }
> // we've run out of tokens, so seems like we can't find where this
> variable was defined.
> return false;
> }
>
>
> function getLHS($token, $i)
> {
> // rewind until we hit an variable name or run out of tokens
> while (isset($token[--$i])) {
> if (is_array($token[$i]) && $token[$i][0] == T_VARIABLE) return
> $token[$i][1];
> }
> // run out of tokens, we can't get a left hand side.
> return false;
> }
>
> function getRHS($token, $i)
> {
> // save the cursor at the assignment operator, so if $name is referred to on
> // the RHS, and we have to look for it again, we don't look at this bit.
> $cursor = $i;
>
> // fast forward until we get to a ';' or run out of tokens.
> while (isset($token[++$i]) && $token[$i] != ';') {
> if (!isset($start)) $start = $i;
> }
> // if we've run out of tokens or the RHS is empty then give up.
> if (!isset($start) || !isset($token[$i])) return false;
> return array($cursor, $start, $i);
> }
>
> // just stick all the tokens together to see what's going on.
> function renderTokens($token, $start, $end)
> {
> $output = '';
> for ( $i=$start; $i<$end; $i++) {
> $output .= is_array($token[$i]) ? $token[$i][1] : $token[$i];
> }
> return $output;
> }
>
>
> function getMysqlQueryArguments($i, $token) {
> /* only allowed whitespace before brackets */
> while (isset($token[++$i])) {
> if ($token[$i] === '(') break;
> if (!is_array($token[$i])) return false;
> if ($token[$i][0] !== T_WHITESPACE) return false;
> }
>
> // if we're here, we've found the '('
> // now find the matching ')'
> $start = $i;
> $braceCount = 1;
> while (isset($token[++$i]) && $braceCount) {
> if ($token[$i] === '(') $braceCount++;
> if ($token[$i] === ')') $braceCount--;
>
> }
> // if we're left with any unmatched braces, something's wrong.
> if ($braceCount != 0) return false;
> // otherwise return the start and end positions of the parameters.
> return array($start+1,$i-1);
> }
>
> function getPhpFileList($dir)
> {
> if (!is_dir($dir)) $dir = '.';
> return glob($dir . '/*.php');
> }
>
> function tokeniseFile($file)
> {
> return token_get_all(file_get_contents($file));
> }
>
> function findMysqlQueries($tokens)
> {
> return array_filter($tokens, 'isMysqlQuery');
> }
>
> function isMysqlQuery($token) {
> if (!is_array($token)) return 0;
> return ($token[0] == T_FUNCTION) || (strtolower($token[1]) == 'mysql_query');
> }
> ?>
>
[Back to original message]
|