Merge branch '10406' into 10500
This commit is contained in:
commit
f7d2c36086
540
core/class/SitemapGenerator.class.php
Executable file
540
core/class/SitemapGenerator.class.php
Executable file
@ -0,0 +1,540 @@
|
|||||||
|
<?php
|
||||||
|
|
||||||
|
namespace Icamys\SitemapGenerator;
|
||||||
|
|
||||||
|
class SitemapGenerator
|
||||||
|
{
|
||||||
|
const MAX_FILE_SIZE = 10485760;
|
||||||
|
const MAX_URLS_PER_SITEMAP = 50000;
|
||||||
|
|
||||||
|
const URL_PARAM_LOC = 0;
|
||||||
|
const URL_PARAM_LASTMOD = 1;
|
||||||
|
const URL_PARAM_CHANGEFREQ = 2;
|
||||||
|
const URL_PARAM_PRIORITY = 3;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Name of sitemap file
|
||||||
|
* @var string
|
||||||
|
* @access public
|
||||||
|
*/
|
||||||
|
public $sitemapFileName = "sitemap.xml";
|
||||||
|
/**
|
||||||
|
* Name of sitemap index file
|
||||||
|
* @var string
|
||||||
|
* @access public
|
||||||
|
*/
|
||||||
|
public $sitemapIndexFileName = "sitemap-index.xml";
|
||||||
|
/**
|
||||||
|
* Robots file name
|
||||||
|
* @var string
|
||||||
|
* @access public
|
||||||
|
*/
|
||||||
|
public $robotsFileName = "robots.txt";
|
||||||
|
/**
|
||||||
|
* Quantity of URLs per single sitemap file.
|
||||||
|
* According to specification max value is 50.000.
|
||||||
|
* If Your links are very long, sitemap file can be bigger than 10MB,
|
||||||
|
* in this case use smaller value.
|
||||||
|
* @var int
|
||||||
|
* @access public
|
||||||
|
*/
|
||||||
|
public $maxURLsPerSitemap = self::MAX_URLS_PER_SITEMAP;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Quantity of sitemaps per index file.
|
||||||
|
* According to specification max value is 50.000
|
||||||
|
* If Your index file is very long, index file can be bigger than 10MB,
|
||||||
|
* in this case use smaller value.
|
||||||
|
* @see http://www.sitemaps.org/protocol.html
|
||||||
|
* @var int
|
||||||
|
* @access public
|
||||||
|
*/
|
||||||
|
public $maxSitemaps = 50000;
|
||||||
|
/**
|
||||||
|
* If true, two sitemap files (.xml and .xml.gz) will be created and added to robots.txt.
|
||||||
|
* If true, .gz file will be submitted to search engines.
|
||||||
|
* If quantity of URLs will be bigger than 50.000, option will be ignored,
|
||||||
|
* all sitemap files except sitemap index will be compressed.
|
||||||
|
* @var bool
|
||||||
|
* @access public
|
||||||
|
*/
|
||||||
|
public $createGZipFile = false;
|
||||||
|
/**
|
||||||
|
* URL to Your site.
|
||||||
|
* Script will use it to send sitemaps to search engines.
|
||||||
|
* @var string
|
||||||
|
* @access private
|
||||||
|
*/
|
||||||
|
private $baseURL;
|
||||||
|
/**
|
||||||
|
* Base path. Relative to script location.
|
||||||
|
* Use this if Your sitemap and robots files should be stored in other
|
||||||
|
* directory then script.
|
||||||
|
* @var string
|
||||||
|
* @access private
|
||||||
|
*/
|
||||||
|
private $basePath;
|
||||||
|
/**
|
||||||
|
* Version of this class
|
||||||
|
* @var string
|
||||||
|
* @access private
|
||||||
|
*/
|
||||||
|
private $classVersion = "1.0.0";
|
||||||
|
/**
|
||||||
|
* Search engines URLs
|
||||||
|
* @var array of strings
|
||||||
|
* @access private
|
||||||
|
*/
|
||||||
|
private $searchEngines = array(
|
||||||
|
array(
|
||||||
|
"http://search.yahooapis.com/SiteExplorerService/V1/updateNotification?appid=USERID&url=",
|
||||||
|
"http://search.yahooapis.com/SiteExplorerService/V1/ping?sitemap="
|
||||||
|
),
|
||||||
|
"http://www.google.com/webmasters/tools/ping?sitemap=",
|
||||||
|
"http://submissions.ask.com/ping?sitemap=",
|
||||||
|
"http://www.bing.com/webmaster/ping.aspx?siteMap="
|
||||||
|
);
|
||||||
|
/**
|
||||||
|
* Array with urls
|
||||||
|
* @var \SplFixedArray of strings
|
||||||
|
* @access private
|
||||||
|
*/
|
||||||
|
private $urls;
|
||||||
|
/**
|
||||||
|
* Array with sitemap
|
||||||
|
* @var array of strings
|
||||||
|
* @access private
|
||||||
|
*/
|
||||||
|
private $sitemaps;
|
||||||
|
/**
|
||||||
|
* Array with sitemap index
|
||||||
|
* @var array of strings
|
||||||
|
* @access private
|
||||||
|
*/
|
||||||
|
private $sitemapIndex;
|
||||||
|
/**
|
||||||
|
* Current sitemap full URL
|
||||||
|
* @var string
|
||||||
|
* @access private
|
||||||
|
*/
|
||||||
|
private $sitemapFullURL;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @var \DOMDocument
|
||||||
|
*/
|
||||||
|
private $document;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Constructor.
|
||||||
|
* @param string $baseURL You site URL, with / at the end.
|
||||||
|
* @param string|null $basePath Relative path where sitemap and robots should be stored.
|
||||||
|
*/
|
||||||
|
public function __construct($baseURL, $basePath = "")
|
||||||
|
{
|
||||||
|
$this->urls = new \SplFixedArray();
|
||||||
|
$this->baseURL = $baseURL;
|
||||||
|
$this->basePath = $basePath;
|
||||||
|
$this->document = new \DOMDocument("1.0");
|
||||||
|
$this->document->preserveWhiteSpace = false;
|
||||||
|
$this->document->formatOutput = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Use this to add many URL at one time.
|
||||||
|
* Each inside array can have 1 to 4 fields.
|
||||||
|
* @param $urlsArray
|
||||||
|
* @throws \InvalidArgumentException
|
||||||
|
*/
|
||||||
|
public function addUrls($urlsArray)
|
||||||
|
{
|
||||||
|
if (!is_array($urlsArray)) {
|
||||||
|
throw new \InvalidArgumentException("Array as argument should be given.");
|
||||||
|
}
|
||||||
|
foreach ($urlsArray as $url) {
|
||||||
|
$this->addUrl(
|
||||||
|
isset($url[0]) ? $url[0] : null,
|
||||||
|
isset($url[1]) ? $url[1] : null,
|
||||||
|
isset($url[2]) ? $url[2] : null,
|
||||||
|
isset($url[3]) ? $url[3] : null
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Use this to add single URL to sitemap.
|
||||||
|
* @param string $url URL
|
||||||
|
* @param \DateTime $lastModified When it was modified, use ISO 8601
|
||||||
|
* @param string $changeFrequency How often search engines should revisit this URL
|
||||||
|
* @param string $priority Priority of URL on You site
|
||||||
|
* @see http://en.wikipedia.org/wiki/ISO_8601
|
||||||
|
* @see http://php.net/manual/en/function.date.php
|
||||||
|
* @throws \InvalidArgumentException
|
||||||
|
*/
|
||||||
|
public function addUrl($url, \DateTime $lastModified = null, $changeFrequency = null, $priority = null)
|
||||||
|
{
|
||||||
|
if ($url == null) {
|
||||||
|
throw new \InvalidArgumentException("URL is mandatory. At least one argument should be given.");
|
||||||
|
}
|
||||||
|
$urlLength = extension_loaded('mbstring') ? mb_strlen($url) : strlen($url);
|
||||||
|
if ($urlLength > 2048) {
|
||||||
|
throw new \InvalidArgumentException(
|
||||||
|
"URL length can't be bigger than 2048 characters.
|
||||||
|
Note, that precise url length check is guaranteed only using mb_string extension.
|
||||||
|
Make sure Your server allow to use mbstring extension."
|
||||||
|
);
|
||||||
|
}
|
||||||
|
$tmp = new \SplFixedArray(1);
|
||||||
|
|
||||||
|
$tmp[self::URL_PARAM_LOC] = $url;
|
||||||
|
|
||||||
|
if (isset($lastModified)) {
|
||||||
|
$tmp->setSize(2);
|
||||||
|
$tmp[self::URL_PARAM_LASTMOD] = $lastModified->format(\DateTime::ATOM);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isset($changeFrequency)) {
|
||||||
|
$tmp->setSize(3);
|
||||||
|
$tmp[self::URL_PARAM_CHANGEFREQ] = $changeFrequency;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isset($priority)) {
|
||||||
|
$tmp->setSize(4);
|
||||||
|
$tmp[self::URL_PARAM_PRIORITY] = $priority;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($this->urls->getSize() === 0) {
|
||||||
|
$this->urls->setSize(1);
|
||||||
|
} else {
|
||||||
|
if ($this->urls->getSize() === $this->urls->key()) {
|
||||||
|
$this->urls->setSize($this->urls->getSize() * 2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$this->urls[$this->urls->key()] = $tmp;
|
||||||
|
$this->urls->next();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @throws \BadMethodCallException
|
||||||
|
* @throws \InvalidArgumentException
|
||||||
|
* @throws \LengthException
|
||||||
|
*/
|
||||||
|
public function createSitemap()
|
||||||
|
{
|
||||||
|
if (!isset($this->urls)) {
|
||||||
|
throw new \BadMethodCallException("To create sitemap, call addUrl or addUrls function first.");
|
||||||
|
}
|
||||||
|
if ($this->maxURLsPerSitemap > self::MAX_URLS_PER_SITEMAP) {
|
||||||
|
throw new \InvalidArgumentException(
|
||||||
|
"More than " . self::MAX_URLS_PER_SITEMAP . " URLs per single sitemap is not allowed."
|
||||||
|
);
|
||||||
|
}
|
||||||
|
$generatorInfo = '<!-- generated-on="' . date('c') . '" -->';
|
||||||
|
|
||||||
|
|
||||||
|
$sitemapHeader = '<?xml version="1.0" encoding="UTF-8"?>' . $generatorInfo . '
|
||||||
|
<urlset
|
||||||
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"' . "\r\n" . '
|
||||||
|
xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9' . "\n" . '
|
||||||
|
http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"' . "\n" . '
|
||||||
|
xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
||||||
|
</urlset>';
|
||||||
|
|
||||||
|
$sitemapIndexHeader = '<?xml version="1.0" encoding="UTF-8"?>' . $generatorInfo . '
|
||||||
|
<sitemapindex
|
||||||
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||||
|
xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9
|
||||||
|
http://www.sitemaps.org/schemas/sitemap/0.9/siteindex.xsd"
|
||||||
|
xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
||||||
|
</sitemapindex>';
|
||||||
|
|
||||||
|
|
||||||
|
$nullUrls = 0;
|
||||||
|
foreach ($this->urls as $url) {
|
||||||
|
if (is_null($url)) {
|
||||||
|
$nullUrls++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
$nonEmptyUrls = $this->urls->getSize() - $nullUrls;
|
||||||
|
|
||||||
|
$chunks = ceil($nonEmptyUrls / $this->maxURLsPerSitemap);
|
||||||
|
|
||||||
|
for ($chunkCounter = 0; $chunkCounter < $chunks; $chunkCounter++) {
|
||||||
|
$xml = new \SimpleXMLElement($sitemapHeader);
|
||||||
|
for ($urlCounter = $chunkCounter * $this->maxURLsPerSitemap;
|
||||||
|
$urlCounter < ($chunkCounter + 1) * $this->maxURLsPerSitemap && $urlCounter < $nonEmptyUrls;
|
||||||
|
$urlCounter++
|
||||||
|
) {
|
||||||
|
$row = $xml->addChild('url');
|
||||||
|
|
||||||
|
$row->addChild(
|
||||||
|
'loc',
|
||||||
|
htmlspecialchars($this->baseURL . $this->urls[$urlCounter][self::URL_PARAM_LOC], ENT_QUOTES, 'UTF-8')
|
||||||
|
);
|
||||||
|
|
||||||
|
if ($this->urls[$urlCounter]->getSize() > 1) {
|
||||||
|
$row->addChild('lastmod', $this->urls[$urlCounter][self::URL_PARAM_LASTMOD]);
|
||||||
|
}
|
||||||
|
if ($this->urls[$urlCounter]->getSize() > 2) {
|
||||||
|
$row->addChild('changefreq', $this->urls[$urlCounter][self::URL_PARAM_CHANGEFREQ]);
|
||||||
|
}
|
||||||
|
if ($this->urls[$urlCounter]->getSize() > 3) {
|
||||||
|
$row->addChild('priority', $this->urls[$urlCounter][self::URL_PARAM_PRIORITY]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (strlen($xml->asXML()) > self::MAX_FILE_SIZE) {
|
||||||
|
throw new \LengthException(
|
||||||
|
"Sitemap size equals to " . strlen($xml->asXML())
|
||||||
|
. " bytes is more than 10MB (" . self::MAX_FILE_SIZE . " bytes),
|
||||||
|
please decrease maxURLsPerSitemap variable."
|
||||||
|
);
|
||||||
|
}
|
||||||
|
$this->sitemaps[] = $xml->asXML();
|
||||||
|
}
|
||||||
|
if (count($this->sitemaps) > $this->maxSitemaps) {
|
||||||
|
throw new \LengthException(
|
||||||
|
"Sitemap index can contain {$this->maxSitemaps} sitemaps.
|
||||||
|
Perhaps You trying to submit too many maps."
|
||||||
|
);
|
||||||
|
}
|
||||||
|
if (count($this->sitemaps) > 1) {
|
||||||
|
for ($i = 0; $i < count($this->sitemaps); $i++) {
|
||||||
|
$this->sitemaps[$i] = array(
|
||||||
|
str_replace(".xml", ($i + 1) . ".xml", $this->sitemapFileName),
|
||||||
|
$this->sitemaps[$i]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
$xml = new \SimpleXMLElement($sitemapIndexHeader);
|
||||||
|
foreach ($this->sitemaps as $sitemap) {
|
||||||
|
$row = $xml->addChild('sitemap');
|
||||||
|
$row->addChild('loc', $this->baseURL . "/" . $this->getSitemapFileName(htmlentities($sitemap[0])));
|
||||||
|
$row->addChild('lastmod', date('c'));
|
||||||
|
}
|
||||||
|
$this->sitemapFullURL = $this->baseURL . "/" . $this->sitemapIndexFileName;
|
||||||
|
$this->sitemapIndex = array(
|
||||||
|
$this->sitemapIndexFileName,
|
||||||
|
$xml->asXML()
|
||||||
|
);
|
||||||
|
} else {
|
||||||
|
$this->sitemapFullURL = $this->baseURL . "/" . $this->getSitemapFileName();
|
||||||
|
|
||||||
|
$this->sitemaps[0] = array(
|
||||||
|
$this->sitemapFileName,
|
||||||
|
$this->sitemaps[0]
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns created sitemaps as array of strings.
|
||||||
|
* Use it You want to work with sitemap without saving it as files.
|
||||||
|
* @return array of strings
|
||||||
|
* @access public
|
||||||
|
*/
|
||||||
|
public function toArray()
|
||||||
|
{
|
||||||
|
if (isset($this->sitemapIndex)) {
|
||||||
|
return array_merge(array($this->sitemapIndex), $this->sitemaps);
|
||||||
|
} else {
|
||||||
|
return $this->sitemaps;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Will write sitemaps as files.
|
||||||
|
* @access public
|
||||||
|
* @throws \BadMethodCallException
|
||||||
|
*/
|
||||||
|
public function writeSitemap()
|
||||||
|
{
|
||||||
|
if (!isset($this->sitemaps)) {
|
||||||
|
throw new \BadMethodCallException("To write sitemap, call createSitemap function first.");
|
||||||
|
}
|
||||||
|
if (isset($this->sitemapIndex)) {
|
||||||
|
$this->document->loadXML($this->sitemapIndex[1]);
|
||||||
|
$this->writeFile($this->document->saveXML(), $this->basePath, $this->sitemapIndex[0], true);
|
||||||
|
foreach ($this->sitemaps as $sitemap) {
|
||||||
|
$this->writeFile($sitemap[1], $this->basePath, $sitemap[0]);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
$this->document->loadXML($this->sitemaps[0][1]);
|
||||||
|
$this->writeFile($this->document->saveXML(), $this->basePath, $this->sitemaps[0][0], true);
|
||||||
|
$this->writeFile($this->sitemaps[0][1], $this->basePath, $this->sitemaps[0][0]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private function getSitemapFileName($name = null)
|
||||||
|
{
|
||||||
|
if (!$name) {
|
||||||
|
$name = $this->sitemapFileName;
|
||||||
|
}
|
||||||
|
if ($this->createGZipFile) {
|
||||||
|
$name .= ".gz";
|
||||||
|
}
|
||||||
|
return $name;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Save file.
|
||||||
|
* @param string $content
|
||||||
|
* @param string $filePath
|
||||||
|
* @param string $fileName
|
||||||
|
* @param bool $noGzip
|
||||||
|
* @return bool
|
||||||
|
* @access private
|
||||||
|
*/
|
||||||
|
private function writeFile($content, $filePath, $fileName, $noGzip = false)
|
||||||
|
{
|
||||||
|
if (!$noGzip && $this->createGZipFile) {
|
||||||
|
return $this->writeGZipFile($content, $filePath, $fileName);
|
||||||
|
}
|
||||||
|
$file = fopen($filePath . $fileName, 'w');
|
||||||
|
fwrite($file, $content);
|
||||||
|
return fclose($file);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Save GZipped file.
|
||||||
|
* @param string $content
|
||||||
|
* @param string $filePath
|
||||||
|
* @param string $fileName
|
||||||
|
* @return bool
|
||||||
|
* @access private
|
||||||
|
*/
|
||||||
|
private function writeGZipFile($content, $filePath, $fileName)
|
||||||
|
{
|
||||||
|
$fileName .= '.gz';
|
||||||
|
$file = gzopen($filePath . $fileName, 'w');
|
||||||
|
gzwrite($file, $content);
|
||||||
|
return gzclose($file);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* If robots.txt file exist, will update information about newly created sitemaps.
|
||||||
|
* If there is no robots.txt will, create one and put into it information about sitemaps.
|
||||||
|
* @access public
|
||||||
|
* @throws \BadMethodCallException
|
||||||
|
*/
|
||||||
|
public function updateRobots()
|
||||||
|
{
|
||||||
|
if (!isset($this->sitemaps)) {
|
||||||
|
throw new \BadMethodCallException("To update robots.txt, call createSitemap function first.");
|
||||||
|
}
|
||||||
|
$sampleRobotsFile = "User-agent: *\nAllow: /";
|
||||||
|
if (file_exists($this->basePath . $this->robotsFileName)) {
|
||||||
|
$robotsFile = explode("\n", file_get_contents($this->basePath . $this->robotsFileName));
|
||||||
|
$robotsFileContent = "";
|
||||||
|
foreach ($robotsFile as $key => $value) {
|
||||||
|
if (substr($value, 0, 8) == 'Sitemap:') {
|
||||||
|
unset($robotsFile[$key]);
|
||||||
|
} else {
|
||||||
|
$robotsFileContent .= $value . "\n";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$robotsFileContent .= "Sitemap: $this->sitemapFullURL";
|
||||||
|
if (!isset($this->sitemapIndex)) {
|
||||||
|
$robotsFileContent .= "\nSitemap: " . $this->getSitemapFileName($this->sitemapFullURL);
|
||||||
|
}
|
||||||
|
file_put_contents($this->basePath . $this->robotsFileName, $robotsFileContent);
|
||||||
|
} else {
|
||||||
|
$sampleRobotsFile = $sampleRobotsFile . "\n\nSitemap: " . $this->sitemapFullURL;
|
||||||
|
if (!isset($this->sitemapIndex)) {
|
||||||
|
$sampleRobotsFile .= "\nSitemap: " . $this->getSitemapFileName($this->sitemapFullURL);
|
||||||
|
}
|
||||||
|
file_put_contents($this->basePath . $this->robotsFileName, $sampleRobotsFile);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Will inform search engines about newly created sitemaps.
|
||||||
|
* Google, Ask, Bing and Yahoo will be noticed.
|
||||||
|
* If You don't pass yahooAppId, Yahoo still will be informed,
|
||||||
|
* but this method can be used once per day. If You will do this often,
|
||||||
|
* message that limit was exceeded will be returned from Yahoo.
|
||||||
|
* @param string $yahooAppId Your site Yahoo appid.
|
||||||
|
* @return array of messages and http codes from each search engine
|
||||||
|
* @access public
|
||||||
|
* @throws \BadMethodCallException
|
||||||
|
*/
|
||||||
|
public function submitSitemap($yahooAppId = null)
|
||||||
|
{
|
||||||
|
if (!isset($this->sitemaps)) {
|
||||||
|
throw new \BadMethodCallException("To submit sitemap, call createSitemap function first.");
|
||||||
|
}
|
||||||
|
if (!extension_loaded('curl')) {
|
||||||
|
throw new \BadMethodCallException("cURL library is needed to do submission.");
|
||||||
|
}
|
||||||
|
$searchEngines = $this->searchEngines;
|
||||||
|
$searchEngines[0] = isset($yahooAppId) ?
|
||||||
|
str_replace("USERID", $yahooAppId, $searchEngines[0][0]) :
|
||||||
|
$searchEngines[0][1];
|
||||||
|
$result = array();
|
||||||
|
for ($i = 0; $i < count($searchEngines); $i++) {
|
||||||
|
$submitSite = curl_init($searchEngines[$i] . htmlspecialchars($this->sitemapFullURL, ENT_QUOTES, 'UTF-8'));
|
||||||
|
curl_setopt($submitSite, CURLOPT_RETURNTRANSFER, true);
|
||||||
|
$responseContent = curl_exec($submitSite);
|
||||||
|
$response = curl_getinfo($submitSite);
|
||||||
|
$submitSiteShort = array_reverse(explode(".", parse_url($searchEngines[$i], PHP_URL_HOST)));
|
||||||
|
$result[] = array(
|
||||||
|
"site" => $submitSiteShort[1] . "." . $submitSiteShort[0],
|
||||||
|
"fullsite" => $searchEngines[$i] . htmlspecialchars($this->sitemapFullURL, ENT_QUOTES, 'UTF-8'),
|
||||||
|
"http_code" => $response['http_code'],
|
||||||
|
"message" => str_replace("\n", " ", strip_tags($responseContent))
|
||||||
|
);
|
||||||
|
}
|
||||||
|
return $result;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns array of URLs
|
||||||
|
*
|
||||||
|
* Converts internal SplFixedArray to array
|
||||||
|
* @return array
|
||||||
|
*/
|
||||||
|
public function getUrls()
|
||||||
|
{
|
||||||
|
$urls = $this->urls->toArray();
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @var int $key
|
||||||
|
* @var \SplFixedArray $urlSplArr
|
||||||
|
*/
|
||||||
|
foreach ($urls as $key => $urlSplArr) {
|
||||||
|
if (!is_null($urlSplArr)) {
|
||||||
|
$urlArr = $urlSplArr->toArray();
|
||||||
|
$url = [];
|
||||||
|
foreach ($urlArr as $paramIndex => $paramValue) {
|
||||||
|
switch ($paramIndex) {
|
||||||
|
case static::URL_PARAM_LOC:
|
||||||
|
$url['loc'] = $paramValue;
|
||||||
|
break;
|
||||||
|
case static::URL_PARAM_CHANGEFREQ:
|
||||||
|
$url['changefreq'] = $paramValue;
|
||||||
|
break;
|
||||||
|
case static::URL_PARAM_LASTMOD:
|
||||||
|
$url['lastmod'] = $paramValue;
|
||||||
|
break;
|
||||||
|
case static::URL_PARAM_PRIORITY:
|
||||||
|
$url['priority'] = $paramValue;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
$urls[$key] = $url;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return $urls;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function countUrls()
|
||||||
|
{
|
||||||
|
return $this->urls->getSize();
|
||||||
|
}
|
||||||
|
}
|
@ -4,9 +4,7 @@ class autoload {
|
|||||||
public static function autoloader () {
|
public static function autoloader () {
|
||||||
require_once 'core/class/helper.class.php';
|
require_once 'core/class/helper.class.php';
|
||||||
require_once 'core/class/template.class.php';
|
require_once 'core/class/template.class.php';
|
||||||
require_once 'core/class/sitemap/Runtime.class.php';
|
require_once 'core/class/SitemapGenerator.class.php';
|
||||||
require_once 'core/class/sitemap/FileSystem.class.php';
|
|
||||||
require_once 'core/class/sitemap/SitemapGenerator.class.php';
|
|
||||||
require_once 'core/class/phpmailer/PHPMailer.class.php';
|
require_once 'core/class/phpmailer/PHPMailer.class.php';
|
||||||
require_once 'core/class/phpmailer/Exception.class.php';
|
require_once 'core/class/phpmailer/Exception.class.php';
|
||||||
require_once 'core/class/phpmailer/SMTP.class.php';
|
require_once 'core/class/phpmailer/SMTP.class.php';
|
||||||
|
@ -64,7 +64,7 @@ class helper {
|
|||||||
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
|
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
|
||||||
curl_setopt($ch, CURLOPT_URL, $url);
|
curl_setopt($ch, CURLOPT_URL, $url);
|
||||||
$url_get_contents_data = curl_exec($ch);
|
$url_get_contents_data = curl_exec($ch);
|
||||||
curl_close($ch);
|
curl_close($ch);
|
||||||
}else{
|
}else{
|
||||||
$url_get_contents_data = false;
|
$url_get_contents_data = false;
|
||||||
}
|
}
|
||||||
|
@ -1,36 +0,0 @@
|
|||||||
<?php
|
|
||||||
|
|
||||||
namespace Icamys\SitemapGenerator;
|
|
||||||
|
|
||||||
class FileSystem
|
|
||||||
{
|
|
||||||
public function file_get_contents($filepath)
|
|
||||||
{
|
|
||||||
return file_get_contents($filepath);
|
|
||||||
}
|
|
||||||
|
|
||||||
public function file_put_contents($filepath, $content, $flags = 0)
|
|
||||||
{
|
|
||||||
return file_put_contents($filepath, $content, $flags);
|
|
||||||
}
|
|
||||||
|
|
||||||
public function file_exists($filepath)
|
|
||||||
{
|
|
||||||
return file_exists($filepath);
|
|
||||||
}
|
|
||||||
|
|
||||||
public function rename($oldname, $newname)
|
|
||||||
{
|
|
||||||
return rename($oldname, $newname);
|
|
||||||
}
|
|
||||||
|
|
||||||
public function copy($source, $destination)
|
|
||||||
{
|
|
||||||
return copy($source, $destination);
|
|
||||||
}
|
|
||||||
|
|
||||||
public function unlink($filepath)
|
|
||||||
{
|
|
||||||
return unlink($filepath);
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,36 +0,0 @@
|
|||||||
<?php
|
|
||||||
|
|
||||||
namespace Icamys\SitemapGenerator;
|
|
||||||
|
|
||||||
class Runtime
|
|
||||||
{
|
|
||||||
public function extension_loaded($extname)
|
|
||||||
{
|
|
||||||
return extension_loaded($extname);
|
|
||||||
}
|
|
||||||
|
|
||||||
public function is_writable($filepath)
|
|
||||||
{
|
|
||||||
return is_writable($filepath);
|
|
||||||
}
|
|
||||||
|
|
||||||
public function curl_init($url)
|
|
||||||
{
|
|
||||||
return curl_init($url);
|
|
||||||
}
|
|
||||||
|
|
||||||
public function curl_setopt($handle, $option, $value)
|
|
||||||
{
|
|
||||||
return curl_setopt($handle, $option, $value);
|
|
||||||
}
|
|
||||||
|
|
||||||
public function curl_exec($handle)
|
|
||||||
{
|
|
||||||
return curl_exec($handle);
|
|
||||||
}
|
|
||||||
|
|
||||||
public function curl_getinfo($handle, $option = null)
|
|
||||||
{
|
|
||||||
return curl_getinfo($handle, $option);
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,705 +0,0 @@
|
|||||||
<?php
|
|
||||||
|
|
||||||
namespace Icamys\SitemapGenerator;
|
|
||||||
|
|
||||||
use BadMethodCallException;
|
|
||||||
use DateTime;
|
|
||||||
use Icamys\SitemapGenerator\Extensions\GoogleVideoExtension;
|
|
||||||
use InvalidArgumentException;
|
|
||||||
use OutOfRangeException;
|
|
||||||
use RuntimeException;
|
|
||||||
use XMLWriter;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Class SitemapGenerator
|
|
||||||
* @package Icamys\SitemapGenerator
|
|
||||||
*/
|
|
||||||
class SitemapGenerator
|
|
||||||
{
|
|
||||||
/**
|
|
||||||
* Max size of a sitemap according to spec.
|
|
||||||
* @see https://www.sitemaps.org/protocol.html
|
|
||||||
*/
|
|
||||||
private const MAX_FILE_SIZE = 52428800;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Max number of urls per sitemap according to spec.
|
|
||||||
* @see https://www.sitemaps.org/protocol.html
|
|
||||||
*/
|
|
||||||
private const MAX_URLS_PER_SITEMAP = 50000;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Max number of sitemaps per index file according to spec.
|
|
||||||
* @see http://www.sitemaps.org/protocol.html
|
|
||||||
*/
|
|
||||||
private const MAX_SITEMAPS_PER_INDEX = 50000;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Total max number of URLs.
|
|
||||||
*/
|
|
||||||
private const TOTAL_MAX_URLS = self::MAX_URLS_PER_SITEMAP * self::MAX_SITEMAPS_PER_INDEX;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Max url length according to spec.
|
|
||||||
* @see https://www.sitemaps.org/protocol.html#xmlTagDefinitions
|
|
||||||
*/
|
|
||||||
private const MAX_URL_LEN = 2048;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Robots file name
|
|
||||||
* @var string
|
|
||||||
* @access public
|
|
||||||
*/
|
|
||||||
private $robotsFileName = "robots.txt";
|
|
||||||
/**
|
|
||||||
* Name of sitemap file
|
|
||||||
* @var string
|
|
||||||
* @access public
|
|
||||||
*/
|
|
||||||
private $sitemapFileName = "sitemap.xml";
|
|
||||||
/**
|
|
||||||
* Name of sitemap index file
|
|
||||||
* @var string
|
|
||||||
* @access public
|
|
||||||
*/
|
|
||||||
private $sitemapIndexFileName = "sitemap-index.xml";
|
|
||||||
/**
|
|
||||||
* Quantity of URLs per single sitemap file.
|
|
||||||
* If Your links are very long, sitemap file can be bigger than 10MB,
|
|
||||||
* in this case use smaller value.
|
|
||||||
* @var int
|
|
||||||
* @access public
|
|
||||||
*/
|
|
||||||
private $maxUrlsPerSitemap = self::MAX_URLS_PER_SITEMAP;
|
|
||||||
/**
|
|
||||||
* If true, two sitemap files (.xml and .xml.gz) will be created and added to robots.txt.
|
|
||||||
* If true, .gz file will be submitted to search engines.
|
|
||||||
* If quantity of URLs will be bigger than 50.000, option will be ignored,
|
|
||||||
* all sitemap files except sitemap index will be compressed.
|
|
||||||
* @var bool
|
|
||||||
* @access public
|
|
||||||
*/
|
|
||||||
private $isCompressionEnabled = false;
|
|
||||||
/**
|
|
||||||
* URL to Your site.
|
|
||||||
* Script will use it to send sitemaps to search engines.
|
|
||||||
* @var string
|
|
||||||
* @access private
|
|
||||||
*/
|
|
||||||
private $baseURL;
|
|
||||||
/**
|
|
||||||
* Base path. Relative to script location.
|
|
||||||
* Use this if Your sitemap and robots files should be stored in other
|
|
||||||
* directory then script.
|
|
||||||
* @var string
|
|
||||||
* @access private
|
|
||||||
*/
|
|
||||||
private $basePath;
|
|
||||||
/**
|
|
||||||
* Version of this class
|
|
||||||
* @var string
|
|
||||||
* @access private
|
|
||||||
*/
|
|
||||||
private $classVersion = "4.3.2";
|
|
||||||
/**
|
|
||||||
* Search engines URLs
|
|
||||||
* @var array of strings
|
|
||||||
* @access private
|
|
||||||
*/
|
|
||||||
private $searchEngines = [
|
|
||||||
[
|
|
||||||
"http://search.yahooapis.com/SiteExplorerService/V1/updateNotification?appid=USERID&url=",
|
|
||||||
"http://search.yahooapis.com/SiteExplorerService/V1/ping?sitemap=",
|
|
||||||
],
|
|
||||||
"http://www.google.com/ping?sitemap=",
|
|
||||||
"http://submissions.ask.com/ping?sitemap=",
|
|
||||||
"http://www.bing.com/ping?sitemap=",
|
|
||||||
"http://www.webmaster.yandex.ru/ping?sitemap=",
|
|
||||||
];
|
|
||||||
/**
|
|
||||||
* Array with urls
|
|
||||||
* @var array
|
|
||||||
* @access private
|
|
||||||
*/
|
|
||||||
private $urls;
|
|
||||||
/**
|
|
||||||
* Lines for robots.txt file that are written if file does not exist
|
|
||||||
* @var array
|
|
||||||
*/
|
|
||||||
private $sampleRobotsLines = [
|
|
||||||
"User-agent: *",
|
|
||||||
"Allow: /",
|
|
||||||
];
|
|
||||||
/**
|
|
||||||
* @var array list of valid changefreq values according to the spec
|
|
||||||
*/
|
|
||||||
private $validChangefreqValues = [
|
|
||||||
'always',
|
|
||||||
'hourly',
|
|
||||||
'daily',
|
|
||||||
'weekly',
|
|
||||||
'monthly',
|
|
||||||
'yearly',
|
|
||||||
'never',
|
|
||||||
];
|
|
||||||
/**
|
|
||||||
* @var float[] list of valid priority values according to the spec
|
|
||||||
*/
|
|
||||||
private $validPriorities = [
|
|
||||||
0.0,
|
|
||||||
0.1,
|
|
||||||
0.2,
|
|
||||||
0.3,
|
|
||||||
0.4,
|
|
||||||
0.5,
|
|
||||||
0.6,
|
|
||||||
0.7,
|
|
||||||
0.8,
|
|
||||||
0.9,
|
|
||||||
1.0,
|
|
||||||
];
|
|
||||||
/**
|
|
||||||
* @var FileSystem object used to communicate with file system
|
|
||||||
*/
|
|
||||||
private $fs;
|
|
||||||
/**
|
|
||||||
* @var Runtime object used to communicate with runtime
|
|
||||||
*/
|
|
||||||
private $runtime;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @var XMLWriter Used for writing xml to files
|
|
||||||
*/
|
|
||||||
private $xmlWriter;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @var string
|
|
||||||
*/
|
|
||||||
private $flushedSitemapFilenameFormat;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @var int
|
|
||||||
*/
|
|
||||||
private $flushedSitemapSize = 0;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @var int
|
|
||||||
*/
|
|
||||||
private $flushedSitemapCounter = 0;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @var array
|
|
||||||
*/
|
|
||||||
private $flushedSitemaps = [];
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @var bool
|
|
||||||
*/
|
|
||||||
private $isSitemapStarted = false;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @var int
|
|
||||||
*/
|
|
||||||
private $totalUrlCount = 0;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @var int
|
|
||||||
*/
|
|
||||||
private $urlsetClosingTagLen = 10; // strlen("</urlset>\n")
|
|
||||||
private $sitemapUrlCount = 0;
|
|
||||||
private $generatedFiles = [];
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @param string $baseURL You site URL
|
|
||||||
* @param string $basePath Relative path where sitemap and robots should be stored.
|
|
||||||
* @param FileSystem|null $fs
|
|
||||||
* @param Runtime|null $runtime
|
|
||||||
*/
|
|
||||||
public function __construct(string $baseURL, string $basePath = "", FileSystem $fs = null, Runtime $runtime = null)
|
|
||||||
{
|
|
||||||
$this->urls = [];
|
|
||||||
$this->baseURL = rtrim($baseURL, '/');
|
|
||||||
|
|
||||||
if ($fs === null) {
|
|
||||||
$this->fs = new FileSystem();
|
|
||||||
} else {
|
|
||||||
$this->fs = $fs;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ($runtime === null) {
|
|
||||||
$this->runtime = new Runtime();
|
|
||||||
} else {
|
|
||||||
$this->runtime = $runtime;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ($this->runtime->is_writable($basePath) === false) {
|
|
||||||
throw new InvalidArgumentException(
|
|
||||||
sprintf('the provided basePath (%s) should be a writable directory,', $basePath) .
|
|
||||||
' please check its existence and permissions'
|
|
||||||
);
|
|
||||||
}
|
|
||||||
if (strlen($basePath) > 0 && substr($basePath, -1) != DIRECTORY_SEPARATOR) {
|
|
||||||
$basePath = $basePath . DIRECTORY_SEPARATOR;
|
|
||||||
}
|
|
||||||
$this->basePath = $basePath;
|
|
||||||
|
|
||||||
$this->xmlWriter = $this->createXmlWriter();
|
|
||||||
$this->flushedSitemapFilenameFormat = sprintf("sm-%%d-%d.xml", time());
|
|
||||||
}
|
|
||||||
|
|
||||||
private function createXmlWriter(): XMLWriter
|
|
||||||
{
|
|
||||||
$w = new XMLWriter();
|
|
||||||
$w->openMemory();
|
|
||||||
$w->setIndent(true);
|
|
||||||
return $w;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @param string $filename
|
|
||||||
* @return SitemapGenerator
|
|
||||||
*/
|
|
||||||
public function setSitemapFilename(string $filename = ''): SitemapGenerator
|
|
||||||
{
|
|
||||||
if (strlen($filename) === 0) {
|
|
||||||
throw new InvalidArgumentException('sitemap filename should not be empty');
|
|
||||||
}
|
|
||||||
if (pathinfo($filename, PATHINFO_EXTENSION) !== 'xml') {
|
|
||||||
throw new InvalidArgumentException('sitemap filename should have *.xml extension');
|
|
||||||
}
|
|
||||||
$this->sitemapFileName = $filename;
|
|
||||||
return $this;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @param string $filename
|
|
||||||
* @return $this
|
|
||||||
*/
|
|
||||||
public function setSitemapIndexFilename(string $filename = ''): SitemapGenerator
|
|
||||||
{
|
|
||||||
if (strlen($filename) === 0) {
|
|
||||||
throw new InvalidArgumentException('filename should not be empty');
|
|
||||||
}
|
|
||||||
$this->sitemapIndexFileName = $filename;
|
|
||||||
return $this;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @param string $filename
|
|
||||||
* @return $this
|
|
||||||
*/
|
|
||||||
public function setRobotsFileName(string $filename): SitemapGenerator
|
|
||||||
{
|
|
||||||
if (strlen($filename) === 0) {
|
|
||||||
throw new InvalidArgumentException('filename should not be empty');
|
|
||||||
}
|
|
||||||
$this->robotsFileName = $filename;
|
|
||||||
return $this;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @param int $value
|
|
||||||
* @return $this
|
|
||||||
*/
|
|
||||||
public function setMaxUrlsPerSitemap(int $value): SitemapGenerator
|
|
||||||
{
|
|
||||||
if ($value < 1 || self::MAX_URLS_PER_SITEMAP < $value) {
|
|
||||||
throw new OutOfRangeException(
|
|
||||||
sprintf('value %d is out of range 1-%d', $value, self::MAX_URLS_PER_SITEMAP)
|
|
||||||
);
|
|
||||||
}
|
|
||||||
$this->maxUrlsPerSitemap = $value;
|
|
||||||
return $this;
|
|
||||||
}
|
|
||||||
|
|
||||||
public function enableCompression(): SitemapGenerator
|
|
||||||
{
|
|
||||||
$this->isCompressionEnabled = true;
|
|
||||||
return $this;
|
|
||||||
}
|
|
||||||
|
|
||||||
public function disableCompression(): SitemapGenerator
|
|
||||||
{
|
|
||||||
$this->isCompressionEnabled = false;
|
|
||||||
return $this;
|
|
||||||
}
|
|
||||||
|
|
||||||
public function isCompressionEnabled(): bool
|
|
||||||
{
|
|
||||||
return $this->isCompressionEnabled;
|
|
||||||
}
|
|
||||||
|
|
||||||
public function validate(
|
|
||||||
string $path,
|
|
||||||
DateTime $lastModified = null,
|
|
||||||
string $changeFrequency = null,
|
|
||||||
float $priority = null,
|
|
||||||
array $alternates = null,
|
|
||||||
array $extensions = [])
|
|
||||||
{
|
|
||||||
if (!(1 <= mb_strlen($path) && mb_strlen($path) <= self::MAX_URL_LEN)) {
|
|
||||||
throw new InvalidArgumentException(
|
|
||||||
sprintf("The urlPath argument length must be between 1 and %d.", self::MAX_URL_LEN)
|
|
||||||
);
|
|
||||||
}
|
|
||||||
if ($changeFrequency !== null && !in_array($changeFrequency, $this->validChangefreqValues)) {
|
|
||||||
throw new InvalidArgumentException(
|
|
||||||
'The change frequency argument should be one of: %s' . implode(',', $this->validChangefreqValues)
|
|
||||||
);
|
|
||||||
}
|
|
||||||
if ($priority !== null && !in_array($priority, $this->validPriorities)) {
|
|
||||||
throw new InvalidArgumentException("Priority argument should be a float number in the range [0.0..1.0]");
|
|
||||||
}
|
|
||||||
if ($extensions !== null && isset($extensions['google_video'])) {
|
|
||||||
GoogleVideoExtension::validate($this->baseURL . $path, $extensions['google_video']);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Add url components.
|
|
||||||
* Instead of storing all urls in the memory, the generator will flush sets of added urls
|
|
||||||
* to the temporary files created on your disk.
|
|
||||||
* The file format is 'sm-{index}-{timestamp}.xml'
|
|
||||||
* @param string $path
|
|
||||||
* @param DateTime|null $lastModified
|
|
||||||
* @param string|null $changeFrequency
|
|
||||||
* @param float|null $priority
|
|
||||||
* @param array|null $alternates
|
|
||||||
* @param array $extensions
|
|
||||||
* @return $this
|
|
||||||
*/
|
|
||||||
public function addURL(
|
|
||||||
string $path,
|
|
||||||
DateTime $lastModified = null,
|
|
||||||
string $changeFrequency = null,
|
|
||||||
float $priority = null,
|
|
||||||
array $alternates = null,
|
|
||||||
array $extensions = []
|
|
||||||
): SitemapGenerator
|
|
||||||
{
|
|
||||||
$this->validate($path, $lastModified, $changeFrequency, $priority, $alternates, $extensions);
|
|
||||||
|
|
||||||
if ($this->totalUrlCount >= self::TOTAL_MAX_URLS) {
|
|
||||||
throw new OutOfRangeException(
|
|
||||||
sprintf("Max url limit reached (%d)", self::TOTAL_MAX_URLS)
|
|
||||||
);
|
|
||||||
}
|
|
||||||
if ($this->isSitemapStarted === false) {
|
|
||||||
$this->writeSitemapStart();
|
|
||||||
}
|
|
||||||
|
|
||||||
$this->writeSitemapUrl($this->baseURL . $path, $lastModified, $changeFrequency, $priority, $alternates, $extensions);
|
|
||||||
|
|
||||||
if ($this->totalUrlCount % 1000 === 0 || $this->sitemapUrlCount >= $this->maxUrlsPerSitemap) {
|
|
||||||
$this->flushWriter();
|
|
||||||
}
|
|
||||||
|
|
||||||
if ($this->sitemapUrlCount === $this->maxUrlsPerSitemap) {
|
|
||||||
$this->writeSitemapEnd();
|
|
||||||
}
|
|
||||||
|
|
||||||
return $this;
|
|
||||||
}
|
|
||||||
|
|
||||||
private function writeSitemapStart()
|
|
||||||
{
|
|
||||||
$this->xmlWriter->startDocument("1.0", "UTF-8");
|
|
||||||
$this->xmlWriter->writeComment(sprintf('generator-class="%s"', get_class($this)));
|
|
||||||
$this->xmlWriter->writeComment(sprintf('generator-version="%s"', $this->classVersion));
|
|
||||||
$this->xmlWriter->writeComment(sprintf('generated-on="%s"', date('c')));
|
|
||||||
$this->xmlWriter->startElement('urlset');
|
|
||||||
$this->xmlWriter->writeAttribute('xmlns', 'http://www.sitemaps.org/schemas/sitemap/0.9');
|
|
||||||
$this->xmlWriter->writeAttribute('xmlns:xhtml', 'http://www.w3.org/1999/xhtml');
|
|
||||||
$this->xmlWriter->writeAttribute('xmlns:video', 'http://www.google.com/schemas/sitemap-video/1.1');
|
|
||||||
$this->xmlWriter->writeAttribute('xmlns:xsi', 'http://www.w3.org/2001/XMLSchema-instance');
|
|
||||||
$this->xmlWriter->writeAttribute('xsi:schemaLocation', 'http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd');
|
|
||||||
$this->isSitemapStarted = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
private function writeSitemapUrl($loc, $lastModified, $changeFrequency, $priority, $alternates, $extensions)
|
|
||||||
{
|
|
||||||
$this->xmlWriter->startElement('url');
|
|
||||||
$this->xmlWriter->writeElement('loc', htmlspecialchars($loc, ENT_QUOTES));
|
|
||||||
|
|
||||||
if ($lastModified !== null) {
|
|
||||||
$this->xmlWriter->writeElement('lastmod', $lastModified->format(DateTime::ATOM));
|
|
||||||
}
|
|
||||||
|
|
||||||
if ($changeFrequency !== null) {
|
|
||||||
$this->xmlWriter->writeElement('changefreq', $changeFrequency);
|
|
||||||
}
|
|
||||||
|
|
||||||
if ($priority !== null) {
|
|
||||||
$this->xmlWriter->writeElement('priority', number_format($priority, 1, ".", ""));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (is_array($alternates) && count($alternates) > 0) {
|
|
||||||
foreach ($alternates as $alternate) {
|
|
||||||
if (is_array($alternate) && isset($alternate['hreflang']) && isset($alternate['href'])) {
|
|
||||||
$this->xmlWriter->startElement('xhtml:link');
|
|
||||||
$this->xmlWriter->writeAttribute('rel', 'alternate');
|
|
||||||
$this->xmlWriter->writeAttribute('hreflang', $alternate['hreflang']);
|
|
||||||
$this->xmlWriter->writeAttribute('href', $alternate['href']);
|
|
||||||
$this->xmlWriter->endElement();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
foreach ($extensions as $extName => $extFields) {
|
|
||||||
if ($extName === 'google_video') {
|
|
||||||
GoogleVideoExtension::writeVideoTag($this->xmlWriter, $loc, $extFields);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
$this->xmlWriter->endElement(); // url
|
|
||||||
$this->sitemapUrlCount++;
|
|
||||||
$this->totalUrlCount++;
|
|
||||||
}
|
|
||||||
|
|
||||||
private function flushWriter()
|
|
||||||
{
|
|
||||||
$targetSitemapFilepath = $this->basePath . sprintf($this->flushedSitemapFilenameFormat, $this->flushedSitemapCounter);
|
|
||||||
$flushedString = $this->xmlWriter->outputMemory(true);
|
|
||||||
$flushedStringLen = mb_strlen($flushedString);
|
|
||||||
|
|
||||||
if ($flushedStringLen === 0) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
$this->flushedSitemapSize += $flushedStringLen;
|
|
||||||
|
|
||||||
if ($this->flushedSitemapSize > self::MAX_FILE_SIZE - $this->urlsetClosingTagLen) {
|
|
||||||
$this->writeSitemapEnd();
|
|
||||||
$this->writeSitemapStart();
|
|
||||||
}
|
|
||||||
$this->fs->file_put_contents($targetSitemapFilepath, $flushedString, FILE_APPEND);
|
|
||||||
}
|
|
||||||
|
|
||||||
private function writeSitemapEnd()
|
|
||||||
{
|
|
||||||
$targetSitemapFilepath = $this->basePath . sprintf($this->flushedSitemapFilenameFormat, $this->flushedSitemapCounter);
|
|
||||||
$this->xmlWriter->endElement(); // urlset
|
|
||||||
$this->xmlWriter->endDocument();
|
|
||||||
$this->fs->file_put_contents($targetSitemapFilepath, $this->xmlWriter->flush(true), FILE_APPEND);
|
|
||||||
$this->isSitemapStarted = false;
|
|
||||||
$this->flushedSitemaps[] = $targetSitemapFilepath;
|
|
||||||
$this->flushedSitemapCounter++;
|
|
||||||
$this->sitemapUrlCount = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Flush all stored urls from memory to the disk and close all necessary tags.
|
|
||||||
*/
|
|
||||||
public function flush()
|
|
||||||
{
|
|
||||||
$this->flushWriter();
|
|
||||||
if ($this->isSitemapStarted) {
|
|
||||||
$this->writeSitemapEnd();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Move flushed files to their final location. Compress if necessary.
|
|
||||||
*/
|
|
||||||
public function finalize()
|
|
||||||
{
|
|
||||||
$this->generatedFiles = [];
|
|
||||||
|
|
||||||
if (count($this->flushedSitemaps) === 1) {
|
|
||||||
$targetSitemapFilename = $this->sitemapFileName;
|
|
||||||
if ($this->isCompressionEnabled) {
|
|
||||||
$targetSitemapFilename .= '.gz';
|
|
||||||
}
|
|
||||||
|
|
||||||
$targetSitemapFilepath = $this->basePath . $targetSitemapFilename;
|
|
||||||
|
|
||||||
if ($this->isCompressionEnabled) {
|
|
||||||
$this->fs->copy($this->flushedSitemaps[0], 'compress.zlib://' . $targetSitemapFilepath);
|
|
||||||
$this->fs->unlink($this->flushedSitemaps[0]);
|
|
||||||
} else {
|
|
||||||
$this->fs->rename($this->flushedSitemaps[0], $targetSitemapFilepath);
|
|
||||||
}
|
|
||||||
$this->generatedFiles['sitemaps_location'] = [$targetSitemapFilepath];
|
|
||||||
$this->generatedFiles['sitemaps_index_url'] = $this->baseURL . '/' . $targetSitemapFilename;
|
|
||||||
} else if (count($this->flushedSitemaps) > 1) {
|
|
||||||
$ext = '.' . pathinfo($this->sitemapFileName, PATHINFO_EXTENSION);
|
|
||||||
$targetExt = $ext;
|
|
||||||
if ($this->isCompressionEnabled) {
|
|
||||||
$targetExt .= '.gz';
|
|
||||||
}
|
|
||||||
|
|
||||||
$sitemapsUrls = [];
|
|
||||||
$targetSitemapFilepaths = [];
|
|
||||||
foreach ($this->flushedSitemaps as $i => $flushedSitemap) {
|
|
||||||
$targetSitemapFilename = str_replace($ext, ($i + 1) . $targetExt, $this->sitemapFileName);
|
|
||||||
$targetSitemapFilepath = $this->basePath . $targetSitemapFilename;
|
|
||||||
|
|
||||||
if ($this->isCompressionEnabled) {
|
|
||||||
$this->fs->copy($flushedSitemap, 'compress.zlib://' . $targetSitemapFilepath);
|
|
||||||
$this->fs->unlink($flushedSitemap);
|
|
||||||
} else {
|
|
||||||
$this->fs->rename($flushedSitemap, $targetSitemapFilepath);
|
|
||||||
}
|
|
||||||
$sitemapsUrls[] = htmlspecialchars($this->baseURL . '/' . $targetSitemapFilename, ENT_QUOTES);
|
|
||||||
$targetSitemapFilepaths[] = $targetSitemapFilepath;
|
|
||||||
}
|
|
||||||
|
|
||||||
$targetSitemapIndexFilepath = $this->basePath . $this->sitemapIndexFileName;
|
|
||||||
$this->createSitemapIndex($sitemapsUrls, $targetSitemapIndexFilepath);
|
|
||||||
$this->generatedFiles['sitemaps_location'] = $targetSitemapFilepaths;
|
|
||||||
$this->generatedFiles['sitemaps_index_location'] = $targetSitemapIndexFilepath;
|
|
||||||
$this->generatedFiles['sitemaps_index_url'] = $this->baseURL . '/' . $this->sitemapIndexFileName;
|
|
||||||
} else {
|
|
||||||
throw new RuntimeException('failed to finalize, please add urls and flush first');
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private function createSitemapIndex($sitemapsUrls, $sitemapIndexFileName)
|
|
||||||
{
|
|
||||||
$this->xmlWriter->flush(true);
|
|
||||||
$this->writeSitemapIndexStart();
|
|
||||||
foreach ($sitemapsUrls as $sitemapsUrl) {
|
|
||||||
$this->writeSitemapIndexUrl($sitemapsUrl);
|
|
||||||
}
|
|
||||||
$this->writeSitemapIndexEnd();
|
|
||||||
$this->fs->file_put_contents(
|
|
||||||
$sitemapIndexFileName,
|
|
||||||
$this->xmlWriter->flush(true),
|
|
||||||
FILE_APPEND
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
private function writeSitemapIndexStart()
|
|
||||||
{
|
|
||||||
$this->xmlWriter->startDocument("1.0", "UTF-8");
|
|
||||||
$this->xmlWriter->writeComment(sprintf('generator-class="%s"', get_class($this)));
|
|
||||||
$this->xmlWriter->writeComment(sprintf('generator-version="%s"', $this->classVersion));
|
|
||||||
$this->xmlWriter->writeComment(sprintf('generated-on="%s"', date('c')));
|
|
||||||
$this->xmlWriter->startElement('sitemapindex');
|
|
||||||
$this->xmlWriter->writeAttribute('xmlns', 'http://www.sitemaps.org/schemas/sitemap/0.9');
|
|
||||||
$this->xmlWriter->writeAttribute('xmlns:xsi', 'http://www.w3.org/2001/XMLSchema-instance');
|
|
||||||
$this->xmlWriter->writeAttribute('xsi:schemaLocation', 'http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd');
|
|
||||||
}
|
|
||||||
|
|
||||||
private function writeSitemapIndexUrl($url)
|
|
||||||
{
|
|
||||||
$this->xmlWriter->startElement('sitemap');
|
|
||||||
$this->xmlWriter->writeElement('loc', htmlspecialchars($url, ENT_QUOTES));
|
|
||||||
$this->xmlWriter->writeElement('lastmod', date('c'));
|
|
||||||
$this->xmlWriter->endElement(); // sitemap
|
|
||||||
}
|
|
||||||
|
|
||||||
private function writeSitemapIndexEnd()
|
|
||||||
{
|
|
||||||
$this->xmlWriter->endElement(); // sitemapindex
|
|
||||||
$this->xmlWriter->endDocument();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @return array Array of previously generated files
|
|
||||||
*/
|
|
||||||
public function getGeneratedFiles(): array
|
|
||||||
{
|
|
||||||
return $this->generatedFiles;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Will inform search engines about newly created sitemaps.
|
|
||||||
* Google, Ask, Bing and Yahoo will be noticed.
|
|
||||||
* If You don't pass yahooAppId, Yahoo still will be informed,
|
|
||||||
* but this method can be used once per day. If You will do this often,
|
|
||||||
* message that limit was exceeded will be returned from Yahoo.
|
|
||||||
* @param string $yahooAppId Your site Yahoo appid.
|
|
||||||
* @return array of messages and http codes from each search engine
|
|
||||||
* @access public
|
|
||||||
* @throws BadMethodCallException
|
|
||||||
*/
|
|
||||||
public function submitSitemap($yahooAppId = null): array
|
|
||||||
{
|
|
||||||
if (count($this->generatedFiles) === 0) {
|
|
||||||
throw new BadMethodCallException("To update robots.txt, call finalize() first.");
|
|
||||||
}
|
|
||||||
if (!$this->runtime->extension_loaded('curl')) {
|
|
||||||
throw new BadMethodCallException("cURL extension is needed to do submission.");
|
|
||||||
}
|
|
||||||
$searchEngines = $this->searchEngines;
|
|
||||||
$searchEngines[0] = isset($yahooAppId) ?
|
|
||||||
str_replace("USERID", $yahooAppId, $searchEngines[0][0]) :
|
|
||||||
$searchEngines[0][1];
|
|
||||||
$result = [];
|
|
||||||
for ($i = 0; $i < count($searchEngines); $i++) {
|
|
||||||
$submitUrl = $searchEngines[$i] . htmlspecialchars($this->generatedFiles['sitemaps_index_url'], ENT_QUOTES);
|
|
||||||
$submitSite = $this->runtime->curl_init($submitUrl);
|
|
||||||
$this->runtime->curl_setopt($submitSite, CURLOPT_RETURNTRANSFER, true);
|
|
||||||
$responseContent = $this->runtime->curl_exec($submitSite);
|
|
||||||
$response = $this->runtime->curl_getinfo($submitSite);
|
|
||||||
$submitSiteShort = array_reverse(explode(".", parse_url($searchEngines[$i], PHP_URL_HOST)));
|
|
||||||
$result[] = [
|
|
||||||
"site" => $submitSiteShort[1] . "." . $submitSiteShort[0],
|
|
||||||
"fullsite" => $submitUrl,
|
|
||||||
"http_code" => $response['http_code'],
|
|
||||||
"message" => str_replace("\n", " ", strip_tags($responseContent)),
|
|
||||||
];
|
|
||||||
}
|
|
||||||
return $result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Adds sitemap url to robots.txt file located in basePath.
|
|
||||||
* If robots.txt file exists,
|
|
||||||
* the function will append sitemap url to file.
|
|
||||||
* If robots.txt does not exist,
|
|
||||||
* the function will create new robots.txt file with sample content and sitemap url.
|
|
||||||
* @access public
|
|
||||||
* @throws BadMethodCallException
|
|
||||||
* @throws RuntimeException
|
|
||||||
*/
|
|
||||||
public function updateRobots(): SitemapGenerator
|
|
||||||
{
|
|
||||||
if (count($this->generatedFiles) === 0) {
|
|
||||||
throw new BadMethodCallException("To update robots.txt, call finalize() first.");
|
|
||||||
}
|
|
||||||
|
|
||||||
$robotsFilePath = $this->basePath . $this->robotsFileName;
|
|
||||||
|
|
||||||
$robotsFileContent = $this->createNewRobotsContentFromFile($robotsFilePath);
|
|
||||||
|
|
||||||
$this->fs->file_put_contents($robotsFilePath, $robotsFileContent);
|
|
||||||
|
|
||||||
return $this;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @param $filepath
|
|
||||||
* @return string
|
|
||||||
*/
|
|
||||||
private function createNewRobotsContentFromFile($filepath): string
|
|
||||||
{
|
|
||||||
if ($this->fs->file_exists($filepath)) {
|
|
||||||
$robotsFileContent = "";
|
|
||||||
$robotsFile = explode(PHP_EOL, $this->fs->file_get_contents($filepath));
|
|
||||||
foreach ($robotsFile as $key => $value) {
|
|
||||||
if (substr($value, 0, 8) == 'Sitemap:') {
|
|
||||||
unset($robotsFile[$key]);
|
|
||||||
} else {
|
|
||||||
$robotsFileContent .= $value . PHP_EOL;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
$robotsFileContent = $this->getSampleRobotsContent();
|
|
||||||
}
|
|
||||||
|
|
||||||
$robotsFileContent .= "Sitemap: {$this->generatedFiles['sitemaps_index_url']}";
|
|
||||||
|
|
||||||
return $robotsFileContent;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @return string
|
|
||||||
* @access private
|
|
||||||
*/
|
|
||||||
private function getSampleRobotsContent(): string
|
|
||||||
{
|
|
||||||
return implode(PHP_EOL, $this->sampleRobotsLines) . PHP_EOL;
|
|
||||||
}
|
|
||||||
}
|
|
@ -741,26 +741,22 @@ class common {
|
|||||||
|
|
||||||
$timezone = $this->getData(['config','timezone']);
|
$timezone = $this->getData(['config','timezone']);
|
||||||
|
|
||||||
$outputDir = getcwd();
|
$sitemap = new \Icamys\SitemapGenerator\SitemapGenerator(helper::baseurl());
|
||||||
|
|
||||||
$sitemap = new \Icamys\SitemapGenerator\SitemapGenerator(helper::baseurl(false),$outputDir);
|
|
||||||
|
|
||||||
// will create also compressed (gzipped) sitemap
|
// will create also compressed (gzipped) sitemap
|
||||||
$sitemap->enableCompression();
|
$sitemap->createGZipFile = true;
|
||||||
|
|
||||||
// determine how many urls should be put into one file
|
// determine how many urls should be put into one file
|
||||||
// according to standard protocol 50000 is maximum value (see http://www.sitemaps.org/protocol.html)
|
// according to standard protocol 50000 is maximum value (see http://www.sitemaps.org/protocol.html)
|
||||||
$sitemap->setMaxUrlsPerSitemap(50000);
|
$sitemap->maxURLsPerSitemap = 50000;
|
||||||
|
|
||||||
// sitemap file name
|
// sitemap file name
|
||||||
$sitemap->setSitemapFileName("sitemap.xml");
|
$sitemap->sitemapFileName = "sitemap.xml";
|
||||||
|
|
||||||
// Set the sitemap index file name
|
|
||||||
$sitemap->setSitemapIndexFileName("sitemap-index.xml");
|
|
||||||
|
|
||||||
$datetime = new DateTime(date('c'));
|
$datetime = new DateTime(date('c'));
|
||||||
$datetime->format(DateTime::ATOM); // Updated ISO8601
|
$datetime->format(DateTime::ATOM); // Updated ISO8601
|
||||||
|
// sitemap index file name
|
||||||
|
$sitemap->sitemapIndexFileName = "sitemap-index.xml";
|
||||||
foreach($this->getHierarchy(null, null, null) as $parentPageId => $childrenPageIds) {
|
foreach($this->getHierarchy(null, null, null) as $parentPageId => $childrenPageIds) {
|
||||||
// Exclure les barres et les pages non publiques et les pages masquées
|
// Exclure les barres et les pages non publiques et les pages masquées
|
||||||
if ($this->getData(['page',$parentPageId,'group']) !== 0 ||
|
if ($this->getData(['page',$parentPageId,'group']) !== 0 ||
|
||||||
@ -802,22 +798,17 @@ class common {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Flush all stored urls from memory to the disk and close all necessary tags.
|
// generating internally a sitemap
|
||||||
$sitemap->flush();
|
$sitemap->createSitemap();
|
||||||
|
|
||||||
// Move flushed files to their final location. Compress if the option is enabled.
|
// writing early generated sitemap to file
|
||||||
$sitemap->finalize();
|
$sitemap->writeSitemap();
|
||||||
|
|
||||||
// Update robots.txt file in output directory or create a new one
|
|
||||||
$sitemap->updateRobots();
|
|
||||||
|
|
||||||
// Submit your sitemaps to Google, Yahoo, Bing and Ask.com
|
|
||||||
$sitemap->submitSitemap();
|
|
||||||
|
|
||||||
return(file_exists('sitemap.xml'));
|
return(file_exists('sitemap.xml'));
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Création d'une miniature
|
* Création d'une miniature
|
||||||
* Fonction utilisée lors de la mise à jour d'une version 9 à une version 10
|
* Fonction utilisée lors de la mise à jour d'une version 9 à une version 10
|
||||||
@ -1611,6 +1602,11 @@ class common {
|
|||||||
if ($this->getData(['core', 'dataVersion']) < 10405) {
|
if ($this->getData(['core', 'dataVersion']) < 10405) {
|
||||||
$this->setData(['core', 'dataVersion', 10405]);
|
$this->setData(['core', 'dataVersion', 10405]);
|
||||||
}
|
}
|
||||||
|
// Version 10.4.06
|
||||||
|
if ($this->getData(['core', 'dataVersion']) < 10406) {
|
||||||
|
$this->removeDir ('core/class/sitemap');
|
||||||
|
$this->setData(['core', 'dataVersion', 10406]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user