Key
This line was removed.
This word was removed. This word was added.
This line was added.

Changes (35)

View Page History
* @link http://noteslog.com/
* @link http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
*
* @package Zend_Utf8
*/
class Zend_Utf8
* Unescape UTF-8 characters from a given escape format
*
* About the read.pattern option
* -- no delimiters and no modifiers are allowed
* -- for back references, your groups start at 3.
* About the read.callback option
* -- it receives
* @param array $options
* 'read' => array(
* 'pattern' => preg (default: '@\\\\u([0-9A-Fa-f]{4})@'),
* 'callback' => callable (default: create_function('$all, $code', 'return hexdec($code);')),
* 'arguments' => array (deafult: array()),
$options = array_merge(array(
'read' => array(
'pattern' => '@\\\\u([0-9A-Fa-f]{4})@',
'callback' => create_function('$all, $code', 'return hexdec($code);'),
'arguments' => array(),
'extendedUseSurrogate' => true,
), $options);

if (! self::isCallable($options['read']))
{
require_once 'Zend/Utf8/Exception.php';
throw new Zend_Utf8_Exception('Expected a valid read handler (callable, array).');
}
$thereAreFilters = self::validateFilters($options);

$result = "";
$length = strlen($value);
$pattern = '@([\w\W]*?)(' . $options['read']['pattern'] . ')|([\w\W]+)@';
$offset = 0;
while (preg_match($pattern, $value, $matches, 0, $offset))
while (preg_match($options['read']['pattern'], $value, $matches, PREG_OFFSET_CAPTURE))
{
if (! $matches[2]) $unicode = self::eatUpMatches($result, $value, $matches, $options['read']);
if ($options['extendedUseSurrogate'] && (0xD800 <= $unicode && $unicode < 0xDC00))
{
//no more escape patterns $upperSurrogate = $unicode;
if (! preg_match($options['read']['pattern'], $value, $matches, PREG_OFFSET_CAPTURE))
$result .= $matches[0]; {
require_once 'Zend/Utf8/Exception.php';
throw new Zend_Utf8_Exception('Expected an extended UTF-8 character.');
$offset += strlen($matches[0]); }
$unicode = self::eatUpMatches($result, $value, $matches, $options['read']);
$utf8Char = self::utf8CharFromSurrogatePair(array($upperSurrogate, $unicode));
}
else
{
//one more escape pattern $utf8Char = self::utf8CharFromCodePoint($unicode);
$result .= $matches[1];
$offset += strlen($matches[0]);
$args = array_splice($matches, 2, count($matches) - 1);
$unicode = self::call($options['read'], $args);// call_user_func($options['integer'], $matches[2]);
if ($options['extendedUseSurrogate'] && (0xD800 <= $unicode && $unicode < 0xDC00))
{
$upperSurrogate = $unicode;
preg_match($pattern, $value, $matches, 0, $offset);
if (! $matches[2])
{
require_once 'Zend/Utf8/Exception.php';
throw new Zend_Utf8_Exception('Expected an extended UTF-8 character.');
}
$offset += strlen($matches[0]);
$args = array_splice($matches, 2, count($matches) - 1);
$unicode = self::call($options['read'], $args);//$lowerSurrogate = call_user_func($options['integer'], $matches[2]);
$utf8Char = self::utf8CharFromSurrogatePair(array($upperSurrogate, $unicode));
}
else
{
$utf8Char = self::utf8CharFromCodePoint($unicode);
}
$result .= $utf8Char;
}
$result .= $utf8Char;
}
$result .= $value;

if ($thereAreFilters && isset($options['filters']['after-read']))
{

/**
* Validate filters. If there are filters return true, else false
*
* @param array $options
* @throws Ando_Utf8_Exception If there are malformed filters
* @return boolean
*/
protected static function validateFilters($options)
{
if (isset($options['filters']))
{
if (! is_array($options['filters']))
{
require_once 'Zend/Utf8/Exception.php';
throw new Zend_Utf8_Exception('Expected valid filters.');
}
foreach ($options['filters'] as $key => $value)
{
if (! self::isCallable($value))
{
require_once 'Zend/Utf8/Exception.php';
throw new Zend_Utf8_Exception("Expected a valid $key handler.");
}
}
return true;
}
return false;
}

/**
* A little calling interface: validation
*
}

/**
* Return the transposition of the given array
*
* @param array $rows
* @return array
*/
private static function transpose($rows)
{
$result = call_user_func_array('array_map', array_merge(array(null), $rows));
return $result;
}

/**
* Validate filters. If there are filters return true, else false
* 1: update $processed with the unmatched substring before $matches
* 2: update $value with the rest of the substring after $matches
* 3: return unicode read from the matched substring in $matches
*
* @param string $processed
* @param string $value
* @param array $options $matches
* @throws Zend_Utf8_Exception If there are malformed filters @param array $handler
* @return boolean integer
*/
protected static function validateFilters($options)
private static function eatUpMatches(&$processed, &$value, $matches, $handler)
{
$match = $matches[0][0];
$offset = $matches[0][1];
$processed .= substr($value, 0, $offset);
$value = substr($value, $offset + strlen($match));

if (isset($options['filters'])) $matches = self::transpose($matches);
$args = $matches[0];
$result = self::call($handler, $args);

{ return $result;
if (! is_array($options['filters']))
{
require_once 'Zend/Utf8/Exception.php';
throw new Zend_Utf8_Exception('Expected valid filters.');
}
foreach ($options['filters'] as $key => $value)
{
if (! self::isCallable($value))
{
require_once 'Zend/Utf8/Exception.php';
throw new Zend_Utf8_Exception("Expected a valid $key filter.");
}
}
return true;
}
return false;
}