2023-09-05 15:21:01 +02:00
|
|
|
<?php
|
|
|
|
|
|
|
|
namespace Icamys\SitemapGenerator;
|
|
|
|
|
|
|
|
use BadMethodCallException;
|
|
|
|
use DateTime;
|
|
|
|
use Icamys\SitemapGenerator\Extensions\GoogleVideoExtension;
|
|
|
|
use InvalidArgumentException;
|
|
|
|
use OutOfRangeException;
|
|
|
|
use RuntimeException;
|
|
|
|
use XMLWriter;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Class SitemapGenerator
|
|
|
|
* @package Icamys\SitemapGenerator
|
|
|
|
*/
|
|
|
|
class SitemapGenerator
|
|
|
|
{
|
|
|
|
/**
|
|
|
|
* Max size of a sitemap according to spec.
|
|
|
|
* @see https://www.sitemaps.org/protocol.html
|
|
|
|
*/
|
|
|
|
private const MAX_FILE_SIZE = 52428800;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Max number of urls per sitemap according to spec.
|
|
|
|
* @see https://www.sitemaps.org/protocol.html
|
|
|
|
*/
|
|
|
|
private const MAX_URLS_PER_SITEMAP = 50000;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Max number of sitemaps per index file according to spec.
|
|
|
|
* @see http://www.sitemaps.org/protocol.html
|
|
|
|
*/
|
|
|
|
private const MAX_SITEMAPS_PER_INDEX = 50000;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Total max number of URLs.
|
|
|
|
*/
|
|
|
|
private const TOTAL_MAX_URLS = self::MAX_URLS_PER_SITEMAP * self::MAX_SITEMAPS_PER_INDEX;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Max url length according to spec.
|
|
|
|
* @see https://www.sitemaps.org/protocol.html#xmlTagDefinitions
|
|
|
|
*/
|
|
|
|
private const MAX_URL_LEN = 2048;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Robots file name
|
|
|
|
* @var string
|
|
|
|
* @access public
|
|
|
|
*/
|
|
|
|
private $robotsFileName = "robots.txt";
|
|
|
|
/**
|
|
|
|
* Name of sitemap file
|
|
|
|
* @var string
|
|
|
|
* @access public
|
|
|
|
*/
|
|
|
|
private $sitemapFileName = "sitemap.xml";
|
|
|
|
/**
|
|
|
|
* Name of sitemap index file
|
|
|
|
* @var string
|
|
|
|
* @access public
|
|
|
|
*/
|
|
|
|
private $sitemapIndexFileName = "sitemap-index.xml";
|
|
|
|
/**
|
|
|
|
* Quantity of URLs per single sitemap file.
|
|
|
|
* If Your links are very long, sitemap file can be bigger than 10MB,
|
|
|
|
* in this case use smaller value.
|
|
|
|
* @var int
|
|
|
|
* @access public
|
|
|
|
*/
|
|
|
|
private $maxUrlsPerSitemap = self::MAX_URLS_PER_SITEMAP;
|
|
|
|
/**
|
|
|
|
* If true, two sitemap files (.xml and .xml.gz) will be created and added to robots.txt.
|
|
|
|
* If true, .gz file will be submitted to search engines.
|
|
|
|
* If quantity of URLs will be bigger than 50.000, option will be ignored,
|
|
|
|
* all sitemap files except sitemap index will be compressed.
|
|
|
|
* @var bool
|
|
|
|
* @access public
|
|
|
|
*/
|
|
|
|
private $isCompressionEnabled = false;
|
|
|
|
/**
|
|
|
|
* URL to Your site.
|
|
|
|
* Script will use it to send sitemaps to search engines.
|
|
|
|
* @var string
|
|
|
|
* @access private
|
|
|
|
*/
|
|
|
|
private $baseURL;
|
|
|
|
/**
|
|
|
|
* Base path. Relative to script location.
|
|
|
|
* Use this if Your sitemap and robots files should be stored in other
|
|
|
|
* directory then script.
|
|
|
|
* @var string
|
|
|
|
* @access private
|
|
|
|
*/
|
|
|
|
private $basePath;
|
|
|
|
/**
|
|
|
|
* Version of this class
|
|
|
|
* @var string
|
|
|
|
* @access private
|
|
|
|
*/
|
|
|
|
private $classVersion = "4.3.1";
|
|
|
|
/**
|
|
|
|
* Search engines URLs
|
|
|
|
* @var array of strings
|
|
|
|
* @access private
|
|
|
|
*/
|
|
|
|
private $searchEngines = [
|
|
|
|
[
|
|
|
|
"http://search.yahooapis.com/SiteExplorerService/V1/updateNotification?appid=USERID&url=",
|
|
|
|
"http://search.yahooapis.com/SiteExplorerService/V1/ping?sitemap=",
|
|
|
|
],
|
|
|
|
"http://www.google.com/ping?sitemap=",
|
|
|
|
"http://submissions.ask.com/ping?sitemap=",
|
|
|
|
"http://www.bing.com/ping?sitemap=",
|
|
|
|
"http://www.webmaster.yandex.ru/ping?sitemap=",
|
|
|
|
];
|
|
|
|
/**
|
|
|
|
* Array with urls
|
|
|
|
* @var array
|
|
|
|
* @access private
|
|
|
|
*/
|
|
|
|
private $urls;
|
|
|
|
/**
|
|
|
|
* Lines for robots.txt file that are written if file does not exist
|
|
|
|
* @var array
|
|
|
|
*/
|
|
|
|
private $sampleRobotsLines = [
|
|
|
|
"User-agent: *",
|
2024-09-07 14:12:22 +02:00
|
|
|
"Disallow: /",
|
|
|
|
"User-agent: Googlebot",
|
2023-09-05 15:21:01 +02:00
|
|
|
"Allow: /",
|
2024-09-07 14:12:22 +02:00
|
|
|
"User-agent: bingbot",
|
|
|
|
"Allow: /",
|
|
|
|
"User-agent: Slurp",
|
|
|
|
"Allow: /",
|
|
|
|
"User-agent: DuckDuckBot",
|
|
|
|
"Allow: /",
|
|
|
|
"User-agent: Baiduspider",
|
|
|
|
"Allow: /"
|
2023-09-05 15:21:01 +02:00
|
|
|
];
|
|
|
|
/**
|
|
|
|
* @var array list of valid changefreq values according to the spec
|
|
|
|
*/
|
|
|
|
private $validChangefreqValues = [
|
|
|
|
'always',
|
|
|
|
'hourly',
|
|
|
|
'daily',
|
|
|
|
'weekly',
|
|
|
|
'monthly',
|
|
|
|
'yearly',
|
|
|
|
'never',
|
|
|
|
];
|
|
|
|
/**
|
|
|
|
* @var float[] list of valid priority values according to the spec
|
|
|
|
*/
|
|
|
|
private $validPriorities = [
|
|
|
|
0.0,
|
|
|
|
0.1,
|
|
|
|
0.2,
|
|
|
|
0.3,
|
|
|
|
0.4,
|
|
|
|
0.5,
|
|
|
|
0.6,
|
|
|
|
0.7,
|
|
|
|
0.8,
|
|
|
|
0.9,
|
|
|
|
1.0,
|
|
|
|
];
|
|
|
|
/**
|
|
|
|
* @var FileSystem object used to communicate with file system
|
|
|
|
*/
|
|
|
|
private $fs;
|
|
|
|
/**
|
|
|
|
* @var Runtime object used to communicate with runtime
|
|
|
|
*/
|
|
|
|
private $runtime;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @var XMLWriter Used for writing xml to files
|
|
|
|
*/
|
|
|
|
private $xmlWriter;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @var string
|
|
|
|
*/
|
|
|
|
private $flushedSitemapFilenameFormat;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @var int
|
|
|
|
*/
|
|
|
|
private $flushedSitemapSize = 0;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @var int
|
|
|
|
*/
|
|
|
|
private $flushedSitemapCounter = 0;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @var array
|
|
|
|
*/
|
|
|
|
private $flushedSitemaps = [];
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @var bool
|
|
|
|
*/
|
|
|
|
private $isSitemapStarted = false;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @var int
|
|
|
|
*/
|
|
|
|
private $totalUrlCount = 0;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @var int
|
|
|
|
*/
|
|
|
|
private $urlsetClosingTagLen = 10; // strlen("</urlset>\n")
|
|
|
|
private $sitemapUrlCount = 0;
|
|
|
|
private $generatedFiles = [];
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @param string $baseURL You site URL
|
|
|
|
* @param string $basePath Relative path where sitemap and robots should be stored.
|
|
|
|
* @param FileSystem|null $fs
|
|
|
|
* @param Runtime|null $runtime
|
|
|
|
*/
|
|
|
|
public function __construct(string $baseURL, string $basePath = "", FileSystem $fs = null, Runtime $runtime = null)
|
|
|
|
{
|
|
|
|
$this->urls = [];
|
|
|
|
$this->baseURL = rtrim($baseURL, '/');
|
|
|
|
|
|
|
|
if ($fs === null) {
|
|
|
|
$this->fs = new FileSystem();
|
|
|
|
} else {
|
|
|
|
$this->fs = $fs;
|
|
|
|
}
|
|
|
|
|
|
|
|
if ($runtime === null) {
|
|
|
|
$this->runtime = new Runtime();
|
|
|
|
} else {
|
|
|
|
$this->runtime = $runtime;
|
|
|
|
}
|
|
|
|
|
|
|
|
if ($this->runtime->is_writable($basePath) === false) {
|
|
|
|
throw new InvalidArgumentException(
|
|
|
|
sprintf('the provided basePath (%s) should be a writable directory,', $basePath) .
|
|
|
|
' please check its existence and permissions'
|
|
|
|
);
|
|
|
|
}
|
|
|
|
if (strlen($basePath) > 0 && substr($basePath, -1) != DIRECTORY_SEPARATOR) {
|
|
|
|
$basePath = $basePath . DIRECTORY_SEPARATOR;
|
|
|
|
}
|
|
|
|
$this->basePath = $basePath;
|
|
|
|
|
|
|
|
$this->xmlWriter = $this->createXmlWriter();
|
|
|
|
$this->flushedSitemapFilenameFormat = sprintf("sm-%%d-%d.xml", time());
|
|
|
|
}
|
|
|
|
|
|
|
|
private function createXmlWriter(): XMLWriter
|
|
|
|
{
|
|
|
|
$w = new XMLWriter();
|
|
|
|
$w->openMemory();
|
|
|
|
$w->setIndent(true);
|
|
|
|
return $w;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @param string $filename
|
|
|
|
* @return SitemapGenerator
|
|
|
|
*/
|
|
|
|
public function setSitemapFilename(string $filename = ''): SitemapGenerator
|
|
|
|
{
|
|
|
|
if (strlen($filename) === 0) {
|
|
|
|
throw new InvalidArgumentException('sitemap filename should not be empty');
|
|
|
|
}
|
|
|
|
if (pathinfo($filename, PATHINFO_EXTENSION) !== 'xml') {
|
|
|
|
throw new InvalidArgumentException('sitemap filename should have *.xml extension');
|
|
|
|
}
|
|
|
|
$this->sitemapFileName = $filename;
|
|
|
|
return $this;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @param string $filename
|
|
|
|
* @return $this
|
|
|
|
*/
|
|
|
|
public function setSitemapIndexFilename(string $filename = ''): SitemapGenerator
|
|
|
|
{
|
|
|
|
if (strlen($filename) === 0) {
|
|
|
|
throw new InvalidArgumentException('filename should not be empty');
|
|
|
|
}
|
|
|
|
$this->sitemapIndexFileName = $filename;
|
|
|
|
return $this;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @param string $filename
|
|
|
|
* @return $this
|
|
|
|
*/
|
|
|
|
public function setRobotsFileName(string $filename): SitemapGenerator
|
|
|
|
{
|
|
|
|
if (strlen($filename) === 0) {
|
|
|
|
throw new InvalidArgumentException('filename should not be empty');
|
|
|
|
}
|
|
|
|
$this->robotsFileName = $filename;
|
|
|
|
return $this;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @param int $value
|
|
|
|
* @return $this
|
|
|
|
*/
|
|
|
|
public function setMaxUrlsPerSitemap(int $value): SitemapGenerator
|
|
|
|
{
|
|
|
|
if ($value < 1 || self::MAX_URLS_PER_SITEMAP < $value) {
|
|
|
|
throw new OutOfRangeException(
|
|
|
|
sprintf('value %d is out of range 1-%d', $value, self::MAX_URLS_PER_SITEMAP)
|
|
|
|
);
|
|
|
|
}
|
|
|
|
$this->maxUrlsPerSitemap = $value;
|
|
|
|
return $this;
|
|
|
|
}
|
|
|
|
|
|
|
|
public function enableCompression(): SitemapGenerator
|
|
|
|
{
|
|
|
|
$this->isCompressionEnabled = true;
|
|
|
|
return $this;
|
|
|
|
}
|
|
|
|
|
|
|
|
public function disableCompression(): SitemapGenerator
|
|
|
|
{
|
|
|
|
$this->isCompressionEnabled = false;
|
|
|
|
return $this;
|
|
|
|
}
|
|
|
|
|
|
|
|
public function isCompressionEnabled(): bool
|
|
|
|
{
|
|
|
|
return $this->isCompressionEnabled;
|
|
|
|
}
|
|
|
|
|
|
|
|
public function validate(
|
|
|
|
string $path,
|
|
|
|
DateTime $lastModified = null,
|
|
|
|
string $changeFrequency = null,
|
|
|
|
float $priority = null,
|
|
|
|
array $alternates = null,
|
|
|
|
array $extensions = [])
|
|
|
|
{
|
|
|
|
if (!(1 <= mb_strlen($path) && mb_strlen($path) <= self::MAX_URL_LEN)) {
|
|
|
|
throw new InvalidArgumentException(
|
|
|
|
sprintf("The urlPath argument length must be between 1 and %d.", self::MAX_URL_LEN)
|
|
|
|
);
|
|
|
|
}
|
|
|
|
if ($changeFrequency !== null && !in_array($changeFrequency, $this->validChangefreqValues)) {
|
|
|
|
throw new InvalidArgumentException(
|
|
|
|
'The change frequency argument should be one of: %s' . implode(',', $this->validChangefreqValues)
|
|
|
|
);
|
|
|
|
}
|
|
|
|
if ($priority !== null && !in_array($priority, $this->validPriorities)) {
|
|
|
|
throw new InvalidArgumentException("Priority argument should be a float number in the range [0.0..1.0]");
|
|
|
|
}
|
|
|
|
if ($extensions !== null && isset($extensions['google_video'])) {
|
|
|
|
GoogleVideoExtension::validate($this->baseURL . $path, $extensions['google_video']);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Add url components.
|
|
|
|
* Instead of storing all urls in the memory, the generator will flush sets of added urls
|
|
|
|
* to the temporary files created on your disk.
|
|
|
|
* The file format is 'sm-{index}-{timestamp}.xml'
|
|
|
|
* @param string $path
|
|
|
|
* @param DateTime|null $lastModified
|
|
|
|
* @param string|null $changeFrequency
|
|
|
|
* @param float|null $priority
|
|
|
|
* @param array|null $alternates
|
|
|
|
* @param array $extensions
|
|
|
|
* @return $this
|
|
|
|
*/
|
|
|
|
public function addURL(
|
|
|
|
string $path,
|
|
|
|
DateTime $lastModified = null,
|
|
|
|
string $changeFrequency = null,
|
|
|
|
float $priority = null,
|
|
|
|
array $alternates = null,
|
|
|
|
array $extensions = []
|
|
|
|
): SitemapGenerator
|
|
|
|
{
|
|
|
|
$this->validate($path, $lastModified, $changeFrequency, $priority, $alternates, $extensions);
|
|
|
|
|
|
|
|
if ($this->totalUrlCount >= self::TOTAL_MAX_URLS) {
|
|
|
|
throw new OutOfRangeException(
|
|
|
|
sprintf("Max url limit reached (%d)", self::TOTAL_MAX_URLS)
|
|
|
|
);
|
|
|
|
}
|
|
|
|
if ($this->isSitemapStarted === false) {
|
|
|
|
$this->writeSitemapStart();
|
|
|
|
}
|
|
|
|
|
|
|
|
$this->writeSitemapUrl($this->baseURL . $path, $lastModified, $changeFrequency, $priority, $alternates, $extensions);
|
|
|
|
|
|
|
|
if ($this->totalUrlCount % 1000 === 0 || $this->sitemapUrlCount >= $this->maxUrlsPerSitemap) {
|
|
|
|
$this->flushWriter();
|
|
|
|
}
|
|
|
|
|
|
|
|
if ($this->sitemapUrlCount === $this->maxUrlsPerSitemap) {
|
|
|
|
$this->writeSitemapEnd();
|
|
|
|
}
|
|
|
|
|
|
|
|
return $this;
|
|
|
|
}
|
|
|
|
|
|
|
|
private function writeSitemapStart()
|
|
|
|
{
|
|
|
|
$this->xmlWriter->startDocument("1.0", "UTF-8");
|
|
|
|
$this->xmlWriter->writeComment(sprintf('generator-class="%s"', get_class($this)));
|
|
|
|
$this->xmlWriter->writeComment(sprintf('generator-version="%s"', $this->classVersion));
|
|
|
|
$this->xmlWriter->writeComment(sprintf('generated-on="%s"', date('c')));
|
|
|
|
$this->xmlWriter->startElement('urlset');
|
|
|
|
$this->xmlWriter->writeAttribute('xmlns', 'http://www.sitemaps.org/schemas/sitemap/0.9');
|
|
|
|
$this->xmlWriter->writeAttribute('xmlns:xhtml', 'http://www.w3.org/1999/xhtml');
|
|
|
|
$this->xmlWriter->writeAttribute('xmlns:video', 'http://www.google.com/schemas/sitemap-video/1.1');
|
|
|
|
$this->xmlWriter->writeAttribute('xmlns:xsi', 'http://www.w3.org/2001/XMLSchema-instance');
|
|
|
|
$this->xmlWriter->writeAttribute('xsi:schemaLocation', 'http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd');
|
|
|
|
$this->isSitemapStarted = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
private function writeSitemapUrl($loc, $lastModified, $changeFrequency, $priority, $alternates, $extensions)
|
|
|
|
{
|
|
|
|
$this->xmlWriter->startElement('url');
|
|
|
|
$this->xmlWriter->writeElement('loc', htmlspecialchars($loc, ENT_QUOTES));
|
|
|
|
|
|
|
|
if ($lastModified !== null) {
|
|
|
|
$this->xmlWriter->writeElement('lastmod', $lastModified->format(DateTime::ATOM));
|
|
|
|
}
|
|
|
|
|
|
|
|
if ($changeFrequency !== null) {
|
|
|
|
$this->xmlWriter->writeElement('changefreq', $changeFrequency);
|
|
|
|
}
|
|
|
|
|
|
|
|
if ($priority !== null) {
|
|
|
|
$this->xmlWriter->writeElement('priority', number_format($priority, 1, ".", ""));
|
|
|
|
}
|
|
|
|
|
|
|
|
if (is_array($alternates) && count($alternates) > 0) {
|
|
|
|
foreach ($alternates as $alternate) {
|
|
|
|
if (is_array($alternate) && isset($alternate['hreflang']) && isset($alternate['href'])) {
|
|
|
|
$this->xmlWriter->startElement('xhtml:link');
|
|
|
|
$this->xmlWriter->writeAttribute('rel', 'alternate');
|
|
|
|
$this->xmlWriter->writeAttribute('hreflang', $alternate['hreflang']);
|
|
|
|
$this->xmlWriter->writeAttribute('href', $alternate['href']);
|
|
|
|
$this->xmlWriter->endElement();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
foreach ($extensions as $extName => $extFields) {
|
|
|
|
if ($extName === 'google_video') {
|
|
|
|
GoogleVideoExtension::writeVideoTag($this->xmlWriter, $loc, $extFields);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
$this->xmlWriter->endElement(); // url
|
|
|
|
$this->sitemapUrlCount++;
|
|
|
|
$this->totalUrlCount++;
|
|
|
|
}
|
|
|
|
|
|
|
|
private function flushWriter()
|
|
|
|
{
|
|
|
|
$targetSitemapFilepath = $this->basePath . sprintf($this->flushedSitemapFilenameFormat, $this->flushedSitemapCounter);
|
|
|
|
$flushedString = $this->xmlWriter->outputMemory(true);
|
|
|
|
$flushedStringLen = mb_strlen($flushedString);
|
|
|
|
|
|
|
|
if ($flushedStringLen === 0) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
$this->flushedSitemapSize += $flushedStringLen;
|
|
|
|
|
|
|
|
if ($this->flushedSitemapSize > self::MAX_FILE_SIZE - $this->urlsetClosingTagLen) {
|
|
|
|
$this->writeSitemapEnd();
|
|
|
|
$this->writeSitemapStart();
|
|
|
|
}
|
|
|
|
$this->fs->file_put_contents($targetSitemapFilepath, $flushedString, FILE_APPEND);
|
|
|
|
}
|
|
|
|
|
|
|
|
private function writeSitemapEnd()
|
|
|
|
{
|
|
|
|
$targetSitemapFilepath = $this->basePath . sprintf($this->flushedSitemapFilenameFormat, $this->flushedSitemapCounter);
|
|
|
|
$this->xmlWriter->endElement(); // urlset
|
|
|
|
$this->xmlWriter->endDocument();
|
|
|
|
$this->fs->file_put_contents($targetSitemapFilepath, $this->xmlWriter->flush(true), FILE_APPEND);
|
|
|
|
$this->isSitemapStarted = false;
|
|
|
|
$this->flushedSitemaps[] = $targetSitemapFilepath;
|
|
|
|
$this->flushedSitemapCounter++;
|
|
|
|
$this->sitemapUrlCount = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Flush all stored urls from memory to the disk and close all necessary tags.
|
|
|
|
*/
|
|
|
|
public function flush()
|
|
|
|
{
|
|
|
|
$this->flushWriter();
|
|
|
|
if ($this->isSitemapStarted) {
|
|
|
|
$this->writeSitemapEnd();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Move flushed files to their final location. Compress if necessary.
|
|
|
|
*/
|
|
|
|
public function finalize()
|
|
|
|
{
|
|
|
|
$this->generatedFiles = [];
|
|
|
|
|
|
|
|
if (count($this->flushedSitemaps) === 1) {
|
|
|
|
$targetSitemapFilename = $this->sitemapFileName;
|
|
|
|
if ($this->isCompressionEnabled) {
|
|
|
|
$targetSitemapFilename .= '.gz';
|
|
|
|
}
|
|
|
|
|
|
|
|
$targetSitemapFilepath = $this->basePath . $targetSitemapFilename;
|
|
|
|
|
|
|
|
if ($this->isCompressionEnabled) {
|
|
|
|
$this->fs->copy($this->flushedSitemaps[0], 'compress.zlib://' . $targetSitemapFilepath);
|
|
|
|
$this->fs->unlink($this->flushedSitemaps[0]);
|
|
|
|
} else {
|
|
|
|
$this->fs->rename($this->flushedSitemaps[0], $targetSitemapFilepath);
|
|
|
|
}
|
|
|
|
$this->generatedFiles['sitemaps_location'] = [$targetSitemapFilepath];
|
|
|
|
$this->generatedFiles['sitemaps_index_url'] = $this->baseURL . '/' . $targetSitemapFilename;
|
|
|
|
} else if (count($this->flushedSitemaps) > 1) {
|
|
|
|
$ext = '.' . pathinfo($this->sitemapFileName, PATHINFO_EXTENSION);
|
|
|
|
$targetExt = $ext;
|
|
|
|
if ($this->isCompressionEnabled) {
|
|
|
|
$targetExt .= '.gz';
|
|
|
|
}
|
|
|
|
|
|
|
|
$sitemapsUrls = [];
|
|
|
|
$targetSitemapFilepaths = [];
|
|
|
|
foreach ($this->flushedSitemaps as $i => $flushedSitemap) {
|
|
|
|
$targetSitemapFilename = str_replace($ext, ($i + 1) . $targetExt, $this->sitemapFileName);
|
|
|
|
$targetSitemapFilepath = $this->basePath . $targetSitemapFilename;
|
|
|
|
|
|
|
|
if ($this->isCompressionEnabled) {
|
|
|
|
$this->fs->copy($flushedSitemap, 'compress.zlib://' . $targetSitemapFilepath);
|
|
|
|
$this->fs->unlink($flushedSitemap);
|
|
|
|
} else {
|
|
|
|
$this->fs->rename($flushedSitemap, $targetSitemapFilepath);
|
|
|
|
}
|
|
|
|
$sitemapsUrls[] = htmlspecialchars($this->baseURL . '/' . $targetSitemapFilename, ENT_QUOTES);
|
|
|
|
$targetSitemapFilepaths[] = $targetSitemapFilepath;
|
|
|
|
}
|
|
|
|
|
|
|
|
$targetSitemapIndexFilepath = $this->basePath . $this->sitemapIndexFileName;
|
|
|
|
$this->createSitemapIndex($sitemapsUrls, $targetSitemapIndexFilepath);
|
|
|
|
$this->generatedFiles['sitemaps_location'] = $targetSitemapFilepaths;
|
|
|
|
$this->generatedFiles['sitemaps_index_location'] = $targetSitemapIndexFilepath;
|
|
|
|
$this->generatedFiles['sitemaps_index_url'] = $this->baseURL . '/' . $this->sitemapIndexFileName;
|
|
|
|
} else {
|
|
|
|
throw new RuntimeException('failed to finalize, please add urls and flush first');
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
private function createSitemapIndex($sitemapsUrls, $sitemapIndexFileName)
|
|
|
|
{
|
|
|
|
$this->xmlWriter->flush(true);
|
|
|
|
$this->writeSitemapIndexStart();
|
|
|
|
foreach ($sitemapsUrls as $sitemapsUrl) {
|
|
|
|
$this->writeSitemapIndexUrl($sitemapsUrl);
|
|
|
|
}
|
|
|
|
$this->writeSitemapIndexEnd();
|
|
|
|
$this->fs->file_put_contents(
|
|
|
|
$sitemapIndexFileName,
|
|
|
|
$this->xmlWriter->flush(true),
|
|
|
|
FILE_APPEND
|
|
|
|
);
|
|
|
|
}
|
|
|
|
|
|
|
|
private function writeSitemapIndexStart()
|
|
|
|
{
|
|
|
|
$this->xmlWriter->startDocument("1.0", "UTF-8");
|
|
|
|
$this->xmlWriter->writeComment(sprintf('generator-class="%s"', get_class($this)));
|
|
|
|
$this->xmlWriter->writeComment(sprintf('generator-version="%s"', $this->classVersion));
|
|
|
|
$this->xmlWriter->writeComment(sprintf('generated-on="%s"', date('c')));
|
|
|
|
$this->xmlWriter->startElement('sitemapindex');
|
|
|
|
$this->xmlWriter->writeAttribute('xmlns', 'http://www.sitemaps.org/schemas/sitemap/0.9');
|
|
|
|
$this->xmlWriter->writeAttribute('xmlns:xsi', 'http://www.w3.org/2001/XMLSchema-instance');
|
|
|
|
$this->xmlWriter->writeAttribute('xsi:schemaLocation', 'http://www.sitemaps.org/schemas/sitemap/0.9 http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd');
|
|
|
|
}
|
|
|
|
|
|
|
|
private function writeSitemapIndexUrl($url)
|
|
|
|
{
|
|
|
|
$this->xmlWriter->startElement('sitemap');
|
|
|
|
$this->xmlWriter->writeElement('loc', htmlspecialchars($url, ENT_QUOTES));
|
|
|
|
$this->xmlWriter->writeElement('lastmod', date('c'));
|
|
|
|
$this->xmlWriter->endElement(); // sitemap
|
|
|
|
}
|
|
|
|
|
|
|
|
private function writeSitemapIndexEnd()
|
|
|
|
{
|
|
|
|
$this->xmlWriter->endElement(); // sitemapindex
|
|
|
|
$this->xmlWriter->endDocument();
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @return array Array of previously generated files
|
|
|
|
*/
|
|
|
|
public function getGeneratedFiles(): array
|
|
|
|
{
|
|
|
|
return $this->generatedFiles;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Will inform search engines about newly created sitemaps.
|
|
|
|
* Google, Ask, Bing and Yahoo will be noticed.
|
|
|
|
* If You don't pass yahooAppId, Yahoo still will be informed,
|
|
|
|
* but this method can be used once per day. If You will do this often,
|
|
|
|
* message that limit was exceeded will be returned from Yahoo.
|
|
|
|
* @param string $yahooAppId Your site Yahoo appid.
|
|
|
|
* @return array of messages and http codes from each search engine
|
|
|
|
* @access public
|
|
|
|
* @throws BadMethodCallException
|
|
|
|
*/
|
|
|
|
public function submitSitemap($yahooAppId = null): array
|
|
|
|
{
|
|
|
|
if (count($this->generatedFiles) === 0) {
|
|
|
|
throw new BadMethodCallException("To update robots.txt, call finalize() first.");
|
|
|
|
}
|
|
|
|
if (!$this->runtime->extension_loaded('curl')) {
|
|
|
|
throw new BadMethodCallException("cURL extension is needed to do submission.");
|
|
|
|
}
|
|
|
|
$searchEngines = $this->searchEngines;
|
|
|
|
$searchEngines[0] = isset($yahooAppId) ?
|
|
|
|
str_replace("USERID", $yahooAppId, $searchEngines[0][0]) :
|
|
|
|
$searchEngines[0][1];
|
|
|
|
$result = [];
|
|
|
|
for ($i = 0; $i < count($searchEngines); $i++) {
|
|
|
|
$submitUrl = $searchEngines[$i] . htmlspecialchars($this->generatedFiles['sitemaps_index_url'], ENT_QUOTES);
|
|
|
|
$submitSite = $this->runtime->curl_init($submitUrl);
|
|
|
|
$this->runtime->curl_setopt($submitSite, CURLOPT_RETURNTRANSFER, true);
|
|
|
|
$responseContent = $this->runtime->curl_exec($submitSite);
|
|
|
|
$response = $this->runtime->curl_getinfo($submitSite);
|
|
|
|
$submitSiteShort = array_reverse(explode(".", parse_url($searchEngines[$i], PHP_URL_HOST)));
|
|
|
|
$result[] = [
|
|
|
|
"site" => $submitSiteShort[1] . "." . $submitSiteShort[0],
|
|
|
|
"fullsite" => $submitUrl,
|
|
|
|
"http_code" => $response['http_code'],
|
|
|
|
"message" => str_replace("\n", " ", strip_tags($responseContent)),
|
|
|
|
];
|
|
|
|
}
|
|
|
|
return $result;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Adds sitemap url to robots.txt file located in basePath.
|
|
|
|
* If robots.txt file exists,
|
|
|
|
* the function will append sitemap url to file.
|
|
|
|
* If robots.txt does not exist,
|
|
|
|
* the function will create new robots.txt file with sample content and sitemap url.
|
|
|
|
* @access public
|
|
|
|
* @throws BadMethodCallException
|
|
|
|
* @throws RuntimeException
|
|
|
|
*/
|
|
|
|
public function updateRobots(): SitemapGenerator
|
|
|
|
{
|
|
|
|
if (count($this->generatedFiles) === 0) {
|
|
|
|
throw new BadMethodCallException("To update robots.txt, call finalize() first.");
|
|
|
|
}
|
|
|
|
|
|
|
|
$robotsFilePath = $this->basePath . $this->robotsFileName;
|
|
|
|
|
|
|
|
$robotsFileContent = $this->createNewRobotsContentFromFile($robotsFilePath);
|
|
|
|
|
|
|
|
$this->fs->file_put_contents($robotsFilePath, $robotsFileContent);
|
|
|
|
|
|
|
|
return $this;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @param $filepath
|
|
|
|
* @return string
|
|
|
|
*/
|
|
|
|
private function createNewRobotsContentFromFile($filepath): string
|
|
|
|
{
|
|
|
|
if ($this->fs->file_exists($filepath)) {
|
|
|
|
$robotsFileContent = "";
|
|
|
|
$robotsFile = explode(PHP_EOL, $this->fs->file_get_contents($filepath));
|
|
|
|
foreach ($robotsFile as $key => $value) {
|
|
|
|
if (substr($value, 0, 8) == 'Sitemap:') {
|
|
|
|
unset($robotsFile[$key]);
|
|
|
|
} else {
|
|
|
|
$robotsFileContent .= $value . PHP_EOL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
$robotsFileContent = $this->getSampleRobotsContent();
|
|
|
|
}
|
|
|
|
|
|
|
|
$robotsFileContent .= "Sitemap: {$this->generatedFiles['sitemaps_index_url']}";
|
|
|
|
|
|
|
|
return $robotsFileContent;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* @return string
|
|
|
|
* @access private
|
|
|
|
*/
|
|
|
|
private function getSampleRobotsContent(): string
|
|
|
|
{
|
|
|
|
return implode(PHP_EOL, $this->sampleRobotsLines) . PHP_EOL;
|
|
|
|
}
|
|
|
|
}
|