|
Posted by GamblerZG on 04/04/05 16:38
Somewhere around here I asked about human-readable serialization and
there was no solution that fitted my needs. So I wrote my own
serialize/deserialize pair. The decode function is different from the
one I posted here before. This function, in my opinion, is written very
efficiently, but it's still about 10-50 times slower than serialize. I
guess it's not too bad, because I compete with pure C++ code, but I
would still appreciate any suggestion for performance tune-up.
PS: If someone would write the same functions in C, they wold probably
be faster than PHP's de/serialize. And they provide output that is much
more compact. And it's human-editable. PHP developers, if you read this,
think about it.
---
function encode($var) {
if (is_array($var)) {
$code = '(';
foreach ($var as $key => $value) {
$code .= encode($key).'='.encode($value).',';
}
$code = chop($code, ','); //remove unnecessary coma
$code .= ')';
return $code;
} else {
if (is_string($var)) { //this also catches string with numbers
inside
if (strpos($var, "'") !== FALSE) {
$var = str_replace("'", "''", $var);
}
return "'".$var."'";
} elseif (is_numeric($var)) {
return $var;
} elseif (is_bool($var)) {
return ($var ? 'T' : 'F');
} else {
return 'N';
}
}
}
function decode($str) {
preg_match_all('/"(.*?)"/s', $str, $matches, PREG_PATTERN_ORDER);
$dStack = $matches[1];
$str = preg_replace('/".*?"/s', 'D', $str);
preg_match_all("/'(.*?)'/s", $str, $matches, PREG_PATTERN_ORDER);
$sStack = $matches[1];
$str = preg_replace("/'.*?'/s", "S", $str);
if (preg_match('/[\'"]/', $string)) {
user_error("Unpaired quotes", E_USER_WARNING);
return;
}
$str = preg_replace('/\s/', '', $str);
$str = preg_replace('/,\)/', ')', $str);
preg_match_all('/([^TFNDS=,\(\)]+)/i', $str, $matches,
PREG_PATTERN_ORDER);
$xStack = $matches[1];
$str = preg_replace('/[^TFNDS=,\(\)]+/i', 'X', $str);
$heap = array();
$ptr = strlen($str) - 1;
while ($ptr != 0) {
switch ($str{$ptr}) {
case 'D':
$string = array_pop($sStack);
while ($str{$ptr - 1} == 'D') {
$string = array_pop($dStack).'"'.$string;
$str{$ptr} = '_';
--$ptr;
}
$str{$ptr} = '$';
$heap[$subPtr] = $string;
break;
case 'S':
$string = array_pop($sStack);
while ($str{$ptr - 1} == 'S') {
$string = array_pop($sStack)."'".$string;
$str{$ptr} = '_';
--$ptr;
}
$str{$ptr} = '$';
$heap[$ptr] = $string;
break;
case 'T':
$heap[$ptr] = TRUE;
$str{$ptr} = '$';
break;
case 'F':
$heap[$ptr] = FALSE;
$str{$ptr} = '$';
break;
case 'N':
$heap[$ptr] = NULL;
$str{$ptr} = '$';
break;
case 'X':
$number = array_pop($xStack);
$str{$ptr} = '$';
if (is_numeric($number)) {
$heap[$ptr] = $number + 0;
} else {
user_error('Invalid character sequence in array
['.($ptr - 1).']', E_USER_WARNING);
return;
}
break;
} //braces, commas and equal signs are ignored
--$ptr;
}
while (($aStart = strrpos($str, '(')) !== FALSE) {
$aEnd = strpos($str, ')', $aStart);
if ($aEnd === FALSE) {
user_error("Array beginning at [$aStart] is not closed",
E_USER_WARNING);
return;
}
$ptr = $aStart + 1;
if ($str{$ptr} == ',' || $str{$ptr} == '=') {
user_error("Invalid array entry [$ptr]", E_USER_WARNING);
return;
}
$str{$aEnd} = ','; //comma serves as a trigger, so array should
end with one
$aStack = array();
while ($ptr < $aEnd) {
switch ($str{$ptr}) {
case '=':
$str{$ptr} = '_';
$keyPtr = $ptr - 1;
while ($str{$keyPtr} != '$') {
if ($str{$keyPtr} != '_') {
user_error("Invalid character sequence in array
[$keyPtr]", E_USER_WARNING);
return;
}
$str{$keyPtr} = '_';
--$keyPtr;
}
$str{$keyPtr} = '_';
$valPtr = $ptr + 1;
if ($str{$valPtr} != '$') {
user_error("Invalid character sequence in array
[$keyPtr]", E_USER_WARNING);
return;
}
$str{$valPtr} = '_';
$aStack[$heap[$keyPtr]]= $heap[$valPtr];
$commaPtr = $valPtr + 1;
while ($str{$commaPtr} != ',') {
if ($str{$commaPtr} != '_') {
user_error("Invalid character sequence in array
[$commaPtr]", E_USER_WARNING);
return;
}
++$commaPtr;
}
$str{$commaPtr} = '_';
break;
case ',':
$str{$ptr} = '_';
$valPtr = $ptr - 1;
while ($str{$valPtr} != '$') {
if ($str{$valPtr} != '_') {
user_error("Invalid character sequence in array
[$valPtr]", E_USER_WARNING);
return;
}
$str{$valPtr} = '_';
--$valPtr;
}
$str{$valPtr} = '_';
$aStack[]= $heap[$valPtr];
break;
}
++$ptr;
}
$str{$aStart} = '$'; //array is now reduced to a single variable
$heap[$aStart] = $aStack;
}
return $heap[0];
}
[Back to original message]
|