mirror of
https://tildegit.org/sbgodin/HtmGem.git
synced 2023-08-25 13:53:12 +02:00
36 lines
1001 B
PHP
36 lines
1001 B
PHP
<?php declare(strict_types=1);
|
|
|
|
namespace htmgem\io;
|
|
|
|
define("_BOMS", array( // Byte Order Mark
|
|
// https://www.unicode.org/faq/utf_bom.html
|
|
"UTF-32LE" => "\xFF\xFE\x00\x00",
|
|
"UTF-16LE" => "\xFF\xFE",
|
|
"UTF-16BE" => "\xFE\xFF",
|
|
"UTF-8" => "\xEF\xBB\xBF",
|
|
"UTF-32BE" => "\x00\x00\xFE\xFF"
|
|
));
|
|
|
|
/**
|
|
* Returns the encoding among Unicode ones, using the BOM
|
|
* @param txt $text
|
|
* @returns the encoding, or UTF-8 if no BOM read
|
|
*/
|
|
function _detectUnicodeEncoding(&$text) {
|
|
/* The PHP built-in function mb-detect-encoding()
|
|
* doesn't detect UTF-16.
|
|
*/
|
|
foreach (_BOMS as $bomName => $bomBytes)
|
|
if (strpos($text, $bomBytes) === 0) return $bomName;
|
|
return "UTF-8";
|
|
}
|
|
|
|
/** Converts to UTF8 an Unicode text and removes the BOM
|
|
*/
|
|
function convertToUTF8(&$text) {
|
|
$encoding = _detectUnicodeEncoding($text);
|
|
$text = mb_convert_encoding($text, "UTF-8", $encoding);
|
|
$text = preg_replace("/^"._BOMS['UTF-8']."/", "", $text);
|
|
return $encoding;
|
|
}
|