PHPXRef 0.7.1 : MyBB : /inc/3rdparty/diff/Diff/String.php source

[Summary view] [Print] [Text view]
   1  <?php
   2  /**
   3   * The Horde_String:: class provides static methods for charset and locale
   4   * safe string manipulation.
   5   *
   6   * Copyright 2003-2012 Horde LLC (http://www.horde.org/)
   7   *
   8   * See the enclosed file COPYING for license information (LGPL). If you
   9   * did not receive this file, see http://www.horde.org/licenses/lgpl21.
  10   *
  11   * @author   Jan Schneider <jan@horde.org>
  12   * @category Horde
  13   * @license  http://www.horde.org/licenses/lgpl21 LGPL 2.1
  14   * @package  Util
  15   */
  16  
  17  // Disallow direct access to this file for security reasons
  18  if(!defined("IN_MYBB"))
  19  {
  20      die("Direct initialization of this file is not allowed.<br /><br />Please make sure IN_MYBB is defined.");
  21  }
  22  
  23  class Horde_String
  24  {
  25      /**
  26       * lower() cache.
  27       *
  28       * @var array
  29       */
  30      static protected $_lowers = array();
  31  
  32      /**
  33       * upper() cache.
  34       *
  35       * @var array
  36       */
  37      static protected $_uppers = array();
  38  
  39      /**
  40       * Converts a string from one charset to another.
  41       *
  42       * Uses the iconv or the mbstring extensions.
  43       * The original string is returned if conversion failed or none
  44       * of the extensions were available.
  45       *
  46       * @param mixed $input    The data to be converted. If $input is an an
  47       *                        array, the array's values get converted
  48       *                        recursively.
  49       * @param string $from    The string's current charset.
  50       * @param string $to      The charset to convert the string to.
  51       * @param boolean $force  Force conversion?
  52       *
  53       * @return mixed  The converted input data.
  54       */
  55      static public function convertCharset($input, $from, $to, $force = false)
  56      {
  57          /* Don't bother converting numbers. */
  58          if (is_numeric($input)) {
  59              return $input;
  60          }
  61  
  62          /* If the from and to character sets are identical, return now. */
  63          if (!$force && $from == $to) {
  64              return $input;
  65          }
  66          $from = self::lower($from);
  67          $to = self::lower($to);
  68          if (!$force && $from == $to) {
  69              return $input;
  70          }
  71  
  72          if (is_array($input)) {
  73              $tmp = array();
  74              reset($input);
  75              while (list($key, $val) = each($input)) {
  76                  $tmp[self::_convertCharset($key, $from, $to)] = self::convertCharset($val, $from, $to, $force);
  77              }
  78              return $tmp;
  79          }
  80  
  81          if (is_object($input)) {
  82              // PEAR_Error/Exception objects are almost guaranteed to contain
  83              // recursion, which will cause a segfault in PHP. We should never
  84              // reach this line, but add a check.
  85              if (($input instanceof Exception) ||
  86                  ($input instanceof PEAR_Error)) {
  87                  return '';
  88              }
  89  
  90              $input = Horde_Util::cloneObject($input);
  91              $vars = get_object_vars($input);
  92              while (list($key, $val) = each($vars)) {
  93                  $input->$key = self::convertCharset($val, $from, $to, $force);
  94              }
  95              return $input;
  96          }
  97  
  98          if (!is_string($input)) {
  99              return $input;
 100          }
 101  
 102          return self::_convertCharset($input, $from, $to);
 103      }
 104  
 105      /**
 106       * Internal function used to do charset conversion.
 107       *
 108       * @param string $input  See self::convertCharset().
 109       * @param string $from   See self::convertCharset().
 110       * @param string $to     See self::convertCharset().
 111       *
 112       * @return string  The converted string.
 113       */
 114      static protected function _convertCharset($input, $from, $to)
 115      {
 116          /* Use utf8_[en|de]code() if possible and if the string isn't too
 117           * large (less than 16 MB = 16 * 1024 * 1024 = 16777216 bytes) - these
 118           * functions use more memory. */
 119          if (Horde_Util::extensionExists('xml') &&
 120              ((strlen($input) < 16777216) ||
 121               !Horde_Util::extensionExists('iconv') ||
 122               !Horde_Util::extensionExists('mbstring'))) {
 123              if (($to == 'utf-8') &&
 124                  in_array($from, array('iso-8859-1', 'us-ascii', 'utf-8'))) {
 125                  return utf8_encode($input);
 126              }
 127  
 128              if (($from == 'utf-8') &&
 129                  in_array($to, array('iso-8859-1', 'us-ascii', 'utf-8'))) {
 130                  return utf8_decode($input);
 131              }
 132          }
 133  
 134          /* Try UTF7-IMAP conversions. */
 135          if (($from == 'utf7-imap') || ($to == 'utf7-imap')) {
 136              try {
 137                  if ($from == 'utf7-imap') {
 138                      return self::convertCharset(Horde_Imap_Client_Utf7imap::Utf7ImapToUtf8($input), 'UTF-8', $to);
 139                  } else {
 140                      if ($from == 'utf-8') {
 141                          $conv = $input;
 142                      } else {
 143                          $conv = self::convertCharset($input, $from, 'UTF-8');
 144                      }
 145                      return Horde_Imap_Client_Utf7imap::Utf8ToUtf7Imap($conv);
 146                  }
 147              } catch (Horde_Imap_Client_Exception $e) {
 148                  return $input;
 149              }
 150          }
 151  
 152          /* Try iconv with transliteration. */
 153          if (Horde_Util::extensionExists('iconv')) {
 154              unset($php_errormsg);
 155              ini_set('track_errors', 1);
 156              $out = @iconv($from, $to . '//TRANSLIT', $input);
 157              $errmsg = isset($php_errormsg);
 158              ini_restore('track_errors');
 159              if (!$errmsg) {
 160                  return $out;
 161              }
 162          }
 163  
 164          /* Try mbstring. */
 165          if (Horde_Util::extensionExists('mbstring')) {
 166              $out = @mb_convert_encoding($input, $to, self::_mbstringCharset($from));
 167              if (!empty($out)) {
 168                  return $out;
 169              }
 170          }
 171  
 172          return $input;
 173      }
 174  
 175      /**
 176       * Makes a string lowercase.
 177       *
 178       * @param string $string   The string to be converted.
 179       * @param boolean $locale  If true the string will be converted based on
 180       *                         a given charset, locale independent else.
 181       * @param string $charset  If $locale is true, the charset to use when
 182       *                         converting.
 183       *
 184       * @return string  The string with lowercase characters.
 185       */
 186      static public function lower($string, $locale = false, $charset = null)
 187      {
 188          if ($locale) {
 189              if (Horde_Util::extensionExists('mbstring')) {
 190                  if (is_null($charset)) {
 191                      throw new InvalidArgumentException('$charset argument must not be null');
 192                  }
 193                  $ret = @mb_strtolower($string, self::_mbstringCharset($charset));
 194                  if (!empty($ret)) {
 195                      return $ret;
 196                  }
 197              }
 198              return strtolower($string);
 199          }
 200  
 201          if (!isset(self::$_lowers[$string])) {
 202              $language = setlocale(LC_CTYPE, 0);
 203              setlocale(LC_CTYPE, 'C');
 204              self::$_lowers[$string] = strtolower($string);
 205              setlocale(LC_CTYPE, $language);
 206          }
 207  
 208          return self::$_lowers[$string];
 209      }
 210  
 211      /**
 212       * Makes a string uppercase.
 213       *
 214       * @param string $string   The string to be converted.
 215       * @param boolean $locale  If true the string will be converted based on a
 216       *                         given charset, locale independent else.
 217       * @param string $charset  If $locale is true, the charset to use when
 218       *                         converting. If not provided the current charset.
 219       *
 220       * @return string  The string with uppercase characters.
 221       */
 222      static public function upper($string, $locale = false, $charset = null)
 223      {
 224          if ($locale) {
 225              if (Horde_Util::extensionExists('mbstring')) {
 226                  if (is_null($charset)) {
 227                      throw new InvalidArgumentException('$charset argument must not be null');
 228                  }
 229                  $ret = @mb_strtoupper($string, self::_mbstringCharset($charset));
 230                  if (!empty($ret)) {
 231                      return $ret;
 232                  }
 233              }
 234              return strtoupper($string);
 235          }
 236  
 237          if (!isset(self::$_uppers[$string])) {
 238              $language = setlocale(LC_CTYPE, 0);
 239              setlocale(LC_CTYPE, 'C');
 240              self::$_uppers[$string] = strtoupper($string);
 241              setlocale(LC_CTYPE, $language);
 242          }
 243  
 244          return self::$_uppers[$string];
 245      }
 246  
 247      /**
 248       * Returns a string with the first letter capitalized if it is
 249       * alphabetic.
 250       *
 251       * @param string $string   The string to be capitalized.
 252       * @param boolean $locale  If true the string will be converted based on a
 253       *                         given charset, locale independent else.
 254       * @param string $charset  The charset to use, defaults to current charset.
 255       *
 256       * @return string  The capitalized string.
 257       */
 258      static public function ucfirst($string, $locale = false, $charset = null)
 259      {
 260          if ($locale) {
 261              if (is_null($charset)) {
 262                  throw new InvalidArgumentException('$charset argument must not be null');
 263              }
 264              $first = self::substr($string, 0, 1, $charset);
 265              if (self::isAlpha($first, $charset)) {
 266                  $string = self::upper($first, true, $charset) . self::substr($string, 1, null, $charset);
 267              }
 268          } else {
 269              $string = self::upper(substr($string, 0, 1), false) . substr($string, 1);
 270          }
 271  
 272          return $string;
 273      }
 274  
 275      /**
 276       * Returns a string with the first letter of each word capitalized if it is
 277       * alphabetic.
 278       *
 279       * Sentences are splitted into words at whitestrings.
 280       *
 281       * @param string $string   The string to be capitalized.
 282       * @param boolean $locale  If true the string will be converted based on a
 283       *                         given charset, locale independent else.
 284       * @param string $charset  The charset to use, defaults to current charset.
 285       *
 286       * @return string  The capitalized string.
 287       */
 288      static public function ucwords($string, $locale = false, $charset = null)
 289      {
 290          $words = preg_split('/(\s+)/', $string, -1, PREG_SPLIT_DELIM_CAPTURE);
 291          for ($i = 0, $c = count($words); $i < $c; $i += 2) {
 292              $words[$i] = self::ucfirst($words[$i], $locale, $charset);
 293          }
 294          return implode('', $words);
 295      }
 296  
 297      /**
 298       * Returns part of a string.
 299       *
 300       * @param string $string   The string to be converted.
 301       * @param integer $start   The part's start position, zero based.
 302       * @param integer $length  The part's length.
 303       * @param string $charset  The charset to use when calculating the part's
 304       *                         position and length, defaults to current
 305       *                         charset.
 306       *
 307       * @return string  The string's part.
 308       */
 309      static public function substr($string, $start, $length = null,
 310                                    $charset = 'UTF-8')
 311      {
 312          if (is_null($length)) {
 313              $length = self::length($string, $charset) - $start;
 314          }
 315  
 316          if ($length == 0) {
 317              return '';
 318          }
 319  
 320          /* Try mbstring. */
 321          if (Horde_Util::extensionExists('mbstring')) {
 322              $ret = @mb_substr($string, $start, $length, self::_mbstringCharset($charset));
 323  
 324              /* mb_substr() returns empty string on failure. */
 325              if (strlen($ret)) {
 326                  return $ret;
 327              }
 328          }
 329  
 330          /* Try iconv. */
 331          if (Horde_Util::extensionExists('iconv')) {
 332              $ret = @iconv_substr($string, $start, $length, $charset);
 333  
 334              /* iconv_substr() returns false on failure. */
 335              if ($ret !== false) {
 336                  return $ret;
 337              }
 338          }
 339  
 340          return substr($string, $start, $length);
 341      }
 342  
 343      /**
 344       * Returns the character (not byte) length of a string.
 345       *
 346       * @param string $string  The string to return the length of.
 347       * @param string $charset The charset to use when calculating the string's
 348       *                        length.
 349       *
 350       * @return integer  The string's length.
 351       */
 352      static public function length($string, $charset = 'UTF-8')
 353      {
 354          $charset = self::lower($charset);
 355  
 356          if ($charset == 'utf-8' || $charset == 'utf8') {
 357              return strlen(utf8_decode($string));
 358          }
 359  
 360          if (Horde_Util::extensionExists('mbstring')) {
 361              $ret = @mb_strlen($string, self::_mbstringCharset($charset));
 362              if (!empty($ret)) {
 363                  return $ret;
 364              }
 365          }
 366  
 367          return strlen($string);
 368      }
 369  
 370      /**
 371       * Returns the numeric position of the first occurrence of $needle
 372       * in the $haystack string.
 373       *
 374       * @param string $haystack  The string to search through.
 375       * @param string $needle    The string to search for.
 376       * @param integer $offset   Allows to specify which character in haystack
 377       *                          to start searching.
 378       * @param string $charset   The charset to use when searching for the
 379       *                          $needle string.
 380       *
 381       * @return integer  The position of first occurrence.
 382       */
 383      static public function pos($haystack, $needle, $offset = 0,
 384                                 $charset = 'UTF-8')
 385      {
 386          if (Horde_Util::extensionExists('mbstring')) {
 387              $track_errors = ini_set('track_errors', 1);
 388              $ret = @mb_strpos($haystack, $needle, $offset, self::_mbstringCharset($charset));
 389              ini_set('track_errors', $track_errors);
 390              if (!isset($php_errormsg)) {
 391                  return $ret;
 392              }
 393          }
 394  
 395          return strpos($haystack, $needle, $offset);
 396      }
 397  
 398      /**
 399       * Returns the numeric position of the last occurrence of $needle
 400       * in the $haystack string.
 401       *
 402       * @param string $haystack  The string to search through.
 403       * @param string $needle    The string to search for.
 404       * @param integer $offset   Allows to specify which character in haystack
 405       *                          to start searching.
 406       * @param string $charset   The charset to use when searching for the
 407       *                          $needle string.
 408       *
 409       * @return integer  The position of first occurrence.
 410       */
 411      static public function rpos($haystack, $needle, $offset = 0,
 412                                  $charset = 'UTF-8')
 413      {
 414          if (Horde_Util::extensionExists('mbstring')) {
 415              $track_errors = ini_set('track_errors', 1);
 416              $ret = @mb_strrpos($haystack, $needle, $offset, self::_mbstringCharset($charset));
 417              ini_set('track_errors', $track_errors);
 418              if (!isset($php_errormsg)) {
 419                  return $ret;
 420              }
 421          }
 422  
 423          return strrpos($haystack, $needle, $offset);
 424      }
 425  
 426      /**
 427       * Returns a string padded to a certain length with another string.
 428       * This method behaves exactly like str_pad() but is multibyte safe.
 429       *
 430       * @param string $input    The string to be padded.
 431       * @param integer $length  The length of the resulting string.
 432       * @param string $pad      The string to pad the input string with. Must
 433       *                         be in the same charset like the input string.
 434       * @param const $type      The padding type. One of STR_PAD_LEFT,
 435       *                         STR_PAD_RIGHT, or STR_PAD_BOTH.
 436       * @param string $charset  The charset of the input and the padding
 437       *                         strings.
 438       *
 439       * @return string  The padded string.
 440       */
 441      static public function pad($input, $length, $pad = ' ',
 442                                 $type = STR_PAD_RIGHT, $charset = 'UTF-8')
 443      {
 444          $mb_length = self::length($input, $charset);
 445          $sb_length = strlen($input);
 446          $pad_length = self::length($pad, $charset);
 447  
 448          /* Return if we already have the length. */
 449          if ($mb_length >= $length) {
 450              return $input;
 451          }
 452  
 453          /* Shortcut for single byte strings. */
 454          if ($mb_length == $sb_length && $pad_length == strlen($pad)) {
 455              return str_pad($input, $length, $pad, $type);
 456          }
 457  
 458          switch ($type) {
 459          case STR_PAD_LEFT:
 460              $left = $length - $mb_length;
 461              $output = self::substr(str_repeat($pad, ceil($left / $pad_length)), 0, $left, $charset) . $input;
 462              break;
 463  
 464          case STR_PAD_BOTH:
 465              $left = floor(($length - $mb_length) / 2);
 466              $right = ceil(($length - $mb_length) / 2);
 467              $output = self::substr(str_repeat($pad, ceil($left / $pad_length)), 0, $left, $charset) .
 468                  $input .
 469                  self::substr(str_repeat($pad, ceil($right / $pad_length)), 0, $right, $charset);
 470              break;
 471  
 472          case STR_PAD_RIGHT:
 473              $right = $length - $mb_length;
 474              $output = $input . self::substr(str_repeat($pad, ceil($right / $pad_length)), 0, $right, $charset);
 475              break;
 476          }
 477  
 478          return $output;
 479      }
 480  
 481      /**
 482       * Wraps the text of a message.
 483       *
 484       * @param string $string         String containing the text to wrap.
 485       * @param integer $width         Wrap the string at this number of
 486       *                               characters.
 487       * @param string $break          Character(s) to use when breaking lines.
 488       * @param boolean $cut           Whether to cut inside words if a line
 489       *                               can't be wrapped.
 490       * @param boolean $line_folding  Whether to apply line folding rules per
 491       *                               RFC 822 or similar. The correct break
 492       *                               characters including leading whitespace
 493       *                               have to be specified too.
 494       *
 495       * @return string  String containing the wrapped text.
 496       */
 497      static public function wordwrap($string, $width = 75, $break = "\n",
 498                                      $cut = false, $line_folding = false)
 499      {
 500          $wrapped = '';
 501  
 502          while (self::length($string, 'UTF-8') > $width) {
 503              $line = self::substr($string, 0, $width, 'UTF-8');
 504              $string = self::substr($string, self::length($line, 'UTF-8'), null, 'UTF-8');
 505  
 506              // Make sure we didn't cut a word, unless we want hard breaks
 507              // anyway.
 508              if (!$cut && preg_match('/^(.+?)((\s|\r?\n).*)/us', $string, $match)) {
 509                  $line .= $match[1];
 510                  $string = $match[2];
 511              }
 512  
 513              // Wrap at existing line breaks.
 514              if (preg_match('/^(.*?)(\r?\n)(.*)$/su', $line, $match)) {
 515                  $wrapped .= $match[1] . $match[2];
 516                  $string = $match[3] . $string;
 517                  continue;
 518              }
 519  
 520              // Wrap at the last colon or semicolon followed by a whitespace if
 521              // doing line folding.
 522              if ($line_folding &&
 523                  preg_match('/^(.*?)(;|:)(\s+.*)$/u', $line, $match)) {
 524                  $wrapped .= $match[1] . $match[2] . $break;
 525                  $string = $match[3] . $string;
 526                  continue;
 527              }
 528  
 529              // Wrap at the last whitespace of $line.
 530              $sub = $line_folding
 531                  ? '(.+[^\s])'
 532                  : '(.*)';
 533  
 534              if (preg_match('/^' . $sub . '(\s+)(.*)$/u', $line, $match)) {
 535                  $wrapped .= $match[1] . $break;
 536                  $string = ($line_folding ? $match[2] : '') . $match[3] . $string;
 537                  continue;
 538              }
 539  
 540              // Hard wrap if necessary.
 541              if ($cut) {
 542                  $wrapped .= $line . $break;
 543                  continue;
 544              }
 545  
 546              $wrapped .= $line;
 547          }
 548  
 549          return $wrapped . $string;
 550      }
 551  
 552      /**
 553       * Wraps the text of a message.
 554       *
 555       * @param string $text        String containing the text to wrap.
 556       * @param integer $length     Wrap $text at this number of characters.
 557       * @param string $break_char  Character(s) to use when breaking lines.
 558       * @param boolean $quote      Ignore lines that are wrapped with the '>'
 559       *                            character (RFC 2646)? If true, we don't
 560       *                            remove any padding whitespace at the end of
 561       *                            the string.
 562       *
 563       * @return string  String containing the wrapped text.
 564       */
 565      static public function wrap($text, $length = 80, $break_char = "\n",
 566                                  $quote = false)
 567      {
 568          $paragraphs = array();
 569  
 570          foreach (preg_split('/\r?\n/', $text) as $input) {
 571              if ($quote && (strpos($input, '>') === 0)) {
 572                  $line = $input;
 573              } else {
 574                  /* We need to handle the Usenet-style signature line
 575                   * separately; since the space after the two dashes is
 576                   * REQUIRED, we don't want to trim the line. */
 577                  if ($input != '-- ') {
 578                      $input = rtrim($input);
 579                  }
 580                  $line = self::wordwrap($input, $length, $break_char);
 581              }
 582  
 583              $paragraphs[] = $line;
 584          }
 585  
 586          return implode($break_char, $paragraphs);
 587      }
 588  
 589      /**
 590       * Return a truncated string, suitable for notifications.
 591       *
 592       * @param string $text     The original string.
 593       * @param integer $length  The maximum length.
 594       *
 595       * @return string  The truncated string, if longer than $length.
 596       */
 597      static public function truncate($text, $length = 100)
 598      {
 599          return (self::length($text) > $length)
 600              ? rtrim(self::substr($text, 0, $length - 3)) . '...'
 601              : $text;
 602      }
 603  
 604      /**
 605       * Return an abbreviated string, with characters in the middle of the
 606       * excessively long string replaced by '...'.
 607       *
 608       * @param string $text     The original string.
 609       * @param integer $length  The length at which to abbreviate.
 610       *
 611       * @return string  The abbreviated string, if longer than $length.
 612       */
 613      static public function abbreviate($text, $length = 20)
 614      {
 615          return (self::length($text) > $length)
 616              ? rtrim(self::substr($text, 0, round(($length - 3) / 2))) . '...' . ltrim(self::substr($text, (($length - 3) / 2) * -1))
 617              : $text;
 618      }
 619  
 620      /**
 621       * Returns the common leading part of two strings.
 622       *
 623       * @param string $str1  A string.
 624       * @param string $str2  Another string.
 625       *
 626       * @return string  The start of $str1 and $str2 that is identical in both.
 627       */
 628      static public function common($str1, $str2)
 629      {
 630          for ($result = '', $i = 0;
 631               isset($str1[$i]) && isset($str2[$i]) && $str1[$i] == $str2[$i];
 632               $i++) {
 633              $result .= $str1[$i];
 634          }
 635          return $result;
 636      }
 637  
 638      /**
 639       * Returns true if the every character in the parameter is an alphabetic
 640       * character.
 641       *
 642       * @param string $string   The string to test.
 643       * @param string $charset  The charset to use when testing the string.
 644       *
 645       * @return boolean  True if the parameter was alphabetic only.
 646       */
 647      static public function isAlpha($string, $charset)
 648      {
 649          if (!Horde_Util::extensionExists('mbstring')) {
 650              return ctype_alpha($string);
 651          }
 652  
 653          $charset = self::_mbstringCharset($charset);
 654          $old_charset = mb_regex_encoding();
 655  
 656          if ($charset != $old_charset) {
 657              @mb_regex_encoding($charset);
 658          }
 659          $alpha = !@mb_ereg_match('[^[:alpha:]]', $string);
 660          if ($charset != $old_charset) {
 661              @mb_regex_encoding($old_charset);
 662          }
 663  
 664          return $alpha;
 665      }
 666  
 667      /**
 668       * Returns true if ever character in the parameter is a lowercase letter in
 669       * the current locale.
 670       *
 671       * @param string $string   The string to test.
 672       * @param string $charset  The charset to use when testing the string.
 673       *
 674       * @return boolean  True if the parameter was lowercase.
 675       */
 676      static public function isLower($string, $charset)
 677      {
 678          return ((self::lower($string, true, $charset) === $string) &&
 679                  self::isAlpha($string, $charset));
 680      }
 681  
 682      /**
 683       * Returns true if every character in the parameter is an uppercase letter
 684       * in the current locale.
 685       *
 686       * @param string $string   The string to test.
 687       * @param string $charset  The charset to use when testing the string.
 688       *
 689       * @return boolean  True if the parameter was uppercase.
 690       */
 691      static public function isUpper($string, $charset)
 692      {
 693          return ((self::upper($string, true, $charset) === $string) &&
 694                  self::isAlpha($string, $charset));
 695      }
 696  
 697      /**
 698       * Performs a multibyte safe regex match search on the text provided.
 699       *
 700       * @param string $text     The text to search.
 701       * @param array $regex     The regular expressions to use, without perl
 702       *                         regex delimiters (e.g. '/' or '|').
 703       * @param string $charset  The character set of the text.
 704       *
 705       * @return array  The matches array from the first regex that matches.
 706       */
 707      static public function regexMatch($text, $regex, $charset = null)
 708      {
 709          if (!empty($charset)) {
 710              $regex = self::convertCharset($regex, $charset, 'utf-8');
 711              $text = self::convertCharset($text, $charset, 'utf-8');
 712          }
 713  
 714          $matches = array();
 715          foreach ($regex as $val) {
 716              if (preg_match('/' . $val . '/u', $text, $matches)) {
 717                  break;
 718              }
 719          }
 720  
 721          if (!empty($charset)) {
 722              $matches = self::convertCharset($matches, 'utf-8', $charset);
 723          }
 724  
 725          return $matches;
 726      }
 727  
 728      /**
 729       * Check to see if a string is valid UTF-8.
 730       *
 731       * @since 1.1.0
 732       *
 733       * @param string $text  The text to check.
 734       *
 735       * @return boolean  True if valid UTF-8.
 736       */
 737      static public function validUtf8($text)
 738      {
 739          /* Regex from:
 740           * http://stackoverflow.com/questions/1523460/ensuring-valid-utf-8-in-php
 741           */
 742          return preg_match('/^(?:
 743                [\x09\x0A\x0D\x20-\x7E]            # ASCII
 744              | [\xC2-\xDF][\x80-\xBF]             # non-overlong 2-byte
 745              | \xE0[\xA0-\xBF][\x80-\xBF]         # excluding overlongs
 746              | [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}  # straight 3-byte
 747              | \xED[\x80-\x9F][\x80-\xBF]         # excluding surrogates
 748              | \xF0[\x90-\xBF][\x80-\xBF]{2}      # planes 1-3
 749              | [\xF1-\xF3][\x80-\xBF]{3}          # planes 4-15
 750              | \xF4[\x80-\x8F][\x80-\xBF]{2}      # plane 16
 751          )*$/xs', $text);
 752      }
 753  
 754      /**
 755       * Workaround charsets that don't work with mbstring functions.
 756       *
 757       * @param string $charset  The original charset.
 758       *
 759       * @return string  The charset to use with mbstring functions.
 760       */
 761      static protected function _mbstringCharset($charset)
 762      {
 763          /* mbstring functions do not handle the 'ks_c_5601-1987' &
 764           * 'ks_c_5601-1989' charsets. However, these charsets are used, for
 765           * example, by various versions of Outlook to send Korean characters.
 766           * Use UHC (CP949) encoding instead. See, e.g.,
 767           * http://lists.w3.org/Archives/Public/ietf-charsets/2001AprJun/0030.html */
 768          if ($charset == 'UTF-8' || $charset == 'utf-8') {
 769              return $charset;
 770          }
 771          if (in_array(self::lower($charset), array('ks_c_5601-1987', 'ks_c_5601-1989'))) {
 772              $charset = 'UHC';
 773          }
 774  
 775          return $charset;
 776      }
 777  
 778  }
PHP Cross Reference of MyBB

/inc/3rdparty/diff/Diff/ -> String.php (source)