<?php

namespace Icamys\SitemapGenerator;

class SitemapGenerator
{
    const MAX_FILE_SIZE = 10485760;
    const MAX_URLS_PER_SITEMAP = 50000;

    const URL_PARAM_LOC = 0;
    const URL_PARAM_LASTMOD = 1;
    const URL_PARAM_CHANGEFREQ = 2;
    const URL_PARAM_PRIORITY = 3;

    /**
     * Name of sitemap file
     * @var string
     * @access public
     */
    public $sitemapFileName = "sitemap.xml";
    /**
     * Name of sitemap index file
     * @var string
     * @access public
     */
    public $sitemapIndexFileName = "sitemap-index.xml";
    /**
     * Robots file name
     * @var string
     * @access public
     */
    public $robotsFileName = "robots.txt";
    /**
     * Quantity of URLs per single sitemap file.
     * According to specification max value is 50.000.
     * If Your links are very long, sitemap file can be bigger than 10MB,
     * in this case use smaller value.
     * @var int
     * @access public
     */
    public $maxURLsPerSitemap = self::MAX_URLS_PER_SITEMAP;

    /**
     * Quantity of sitemaps per index file.
     * According to specification max value is 50.000
     * If Your index file is very long, index file can be bigger than 10MB,
     * in this case use smaller value.
     * @see http://www.sitemaps.org/protocol.html
     * @var int
     * @access public
     */
    public $maxSitemaps = 50000;
    /**
     * If true, two sitemap files (.xml and .xml.gz) will be created and added to robots.txt.
     * If true, .gz file will be submitted to search engines.
     * If quantity of URLs will be bigger than 50.000, option will be ignored,
     * all sitemap files except sitemap index will be compressed.
     * @var bool
     * @access public
     */
    public $createGZipFile = false;
    /**
     * URL to Your site.
     * Script will use it to send sitemaps to search engines.
     * @var string
     * @access private
     */
    private $baseURL;
    /**
     * Base path. Relative to script location.
     * Use this if Your sitemap and robots files should be stored in other
     * directory then script.
     * @var string
     * @access private
     */
    private $basePath;
    /**
     * Version of this class
     * @var string
     * @access private
     */
    private $classVersion = "1.0.0";
    /**
     * Search engines URLs
     * @var array of strings
     * @access private
     */
    private $searchEngines = array(
        array(
            "http://search.yahooapis.com/SiteExplorerService/V1/updateNotification?appid=USERID&url=",
            "http://search.yahooapis.com/SiteExplorerService/V1/ping?sitemap="
        ),
        "http://www.google.com/webmasters/tools/ping?sitemap=",
        "http://submissions.ask.com/ping?sitemap=",
        "http://www.bing.com/webmaster/ping.aspx?siteMap="
    );
    /**
     * Array with urls
     * @var \SplFixedArray of strings
     * @access private
     */
    private $urls;
    /**
     * Array with sitemap
     * @var array of strings
     * @access private
     */
    private $sitemaps;
    /**
     * Array with sitemap index
     * @var array of strings
     * @access private
     */
    private $sitemapIndex;
    /**
     * Current sitemap full URL
     * @var string
     * @access private
     */
    private $sitemapFullURL;

    /**
     * @var \DOMDocument
     */
    private $document;

    /**
     * Constructor.
     * @param string $baseURL You site URL, with / at the end.
     * @param string|null $basePath Relative path where sitemap and robots should be stored.
     */
    public function __construct($baseURL, $basePath = "")
    {
        $this->urls = new \SplFixedArray();
        $this->baseURL = $baseURL;
        $this->basePath = $basePath;
        $this->document = new \DOMDocument("1.0");
        $this->document->preserveWhiteSpace = false;
        $this->document->formatOutput = true;
    }

    /**
     * Use this to add many URL at one time.
     * Each inside array can have 1 to 4 fields.
     * @param $urlsArray
     * @throws \InvalidArgumentException
     */
    public function addUrls($urlsArray)
    {
        if (!is_array($urlsArray)) {
            throw new \InvalidArgumentException("Array as argument should be given.");
        }
        foreach ($urlsArray as $url) {
            $this->addUrl(
                isset($url[0]) ? $url[0] : null,
                isset($url[1]) ? $url[1] : null,
                isset($url[2]) ? $url[2] : null,
                isset($url[3]) ? $url[3] : null
            );
        }
    }

    /**
     * Use this to add single URL to sitemap.
     * @param string $url URL
     * @param \DateTime $lastModified When it was modified, use ISO 8601
     * @param string $changeFrequency How often search engines should revisit this URL
     * @param string $priority Priority of URL on You site
     * @see http://en.wikipedia.org/wiki/ISO_8601
     * @see http://php.net/manual/en/function.date.php
     * @throws \InvalidArgumentException
     */
    public function addUrl($url, \DateTime $lastModified = null, $changeFrequency = null, $priority = null)
    {
        if ($url == null) {
            throw new \InvalidArgumentException("URL is mandatory. At least one argument should be given.");
        }
        $urlLength = extension_loaded('mbstring') ? mb_strlen($url) : strlen($url);
        if ($urlLength > 2048) {
            throw new \InvalidArgumentException(
                "URL length can't be bigger than 2048 characters.
                Note, that precise url length check is guaranteed only using mb_string extension.
                Make sure Your server allow to use mbstring extension."
            );
        }
        $tmp = new \SplFixedArray(1);

        $tmp[self::URL_PARAM_LOC] = $url;

        if (isset($lastModified)) {
            $tmp->setSize(2);
            $tmp[self::URL_PARAM_LASTMOD] = $lastModified->format(\DateTime::ATOM);
        }

        if (isset($changeFrequency)) {
            $tmp->setSize(3);
            $tmp[self::URL_PARAM_CHANGEFREQ] = $changeFrequency;
        }

        if (isset($priority)) {
            $tmp->setSize(4);
            $tmp[self::URL_PARAM_PRIORITY] = $priority;
        }

        if ($this->urls->getSize() === 0) {
            $this->urls->setSize(1);
        } else {
            if ($this->urls->getSize() === $this->urls->key()) {
                $this->urls->setSize($this->urls->getSize() * 2);
            }
        }

        $this->urls[$this->urls->key()] = $tmp;
        $this->urls->next();
    }


    /**
     * @throws \BadMethodCallException
     * @throws \InvalidArgumentException
     * @throws \LengthException
     */
    public function createSitemap()
    {
        if (!isset($this->urls)) {
            throw new \BadMethodCallException("To create sitemap, call addUrl or addUrls function first.");
        }
        if ($this->maxURLsPerSitemap > self::MAX_URLS_PER_SITEMAP) {
            throw new \InvalidArgumentException(
                "More than " . self::MAX_URLS_PER_SITEMAP . " URLs per single sitemap is not allowed."
            );
        }
        $generatorInfo = '<!-- generated-on="' . date('c') . '" -->';


        $sitemapHeader = '<?xml version="1.0" encoding="UTF-8"?>' . $generatorInfo . '
                            <urlset
                                xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"' . "\r\n" . '
                                xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9' . "\n" . '
                                http://www.sitemaps.org/schemas/sitemap/0.9/sitemap.xsd"' . "\n" . '
                                xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
                         </urlset>';

        $sitemapIndexHeader = '<?xml version="1.0" encoding="UTF-8"?>' . $generatorInfo . '
                                <sitemapindex
                                    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
                                    xsi:schemaLocation="http://www.sitemaps.org/schemas/sitemap/0.9
                                    http://www.sitemaps.org/schemas/sitemap/0.9/siteindex.xsd"
                                    xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
                              </sitemapindex>';


        $nullUrls = 0;
        foreach ($this->urls as $url) {
            if (is_null($url)) {
                $nullUrls++;
            }
        }

        $nonEmptyUrls = $this->urls->getSize() - $nullUrls;

        $chunks = ceil($nonEmptyUrls / $this->maxURLsPerSitemap);

        for ($chunkCounter = 0; $chunkCounter < $chunks; $chunkCounter++) {
            $xml = new \SimpleXMLElement($sitemapHeader);
            for ($urlCounter = $chunkCounter * $this->maxURLsPerSitemap;
                 $urlCounter < ($chunkCounter + 1) * $this->maxURLsPerSitemap && $urlCounter < $nonEmptyUrls;
                 $urlCounter++
            ) {
                $row = $xml->addChild('url');

                $row->addChild(
                    'loc',
                    htmlspecialchars($this->baseURL . $this->urls[$urlCounter][self::URL_PARAM_LOC], ENT_QUOTES, 'UTF-8')
                );

                if ($this->urls[$urlCounter]->getSize() > 1) {
                    $row->addChild('lastmod', $this->urls[$urlCounter][self::URL_PARAM_LASTMOD]);
                }
                if ($this->urls[$urlCounter]->getSize() > 2) {
                    $row->addChild('changefreq', $this->urls[$urlCounter][self::URL_PARAM_CHANGEFREQ]);
                }
                if ($this->urls[$urlCounter]->getSize() > 3) {
                    $row->addChild('priority', $this->urls[$urlCounter][self::URL_PARAM_PRIORITY]);
                }
            }
            if (strlen($xml->asXML()) > self::MAX_FILE_SIZE) {
                throw new \LengthException(
                    "Sitemap size equals to " . strlen($xml->asXML())
                    . " bytes is more than 10MB (" . self::MAX_FILE_SIZE . " bytes),
                    please decrease maxURLsPerSitemap variable."
                );
            }
            $this->sitemaps[] = $xml->asXML();
        }
        if (count($this->sitemaps) > $this->maxSitemaps) {
            throw new \LengthException(
                "Sitemap index can contain {$this->maxSitemaps} sitemaps.
                Perhaps You trying to submit too many maps."
            );
        }
        if (count($this->sitemaps) > 1) {
            for ($i = 0; $i < count($this->sitemaps); $i++) {
                $this->sitemaps[$i] = array(
                    str_replace(".xml", ($i + 1) . ".xml", $this->sitemapFileName),
                    $this->sitemaps[$i]
                );
            }
            $xml = new \SimpleXMLElement($sitemapIndexHeader);
            foreach ($this->sitemaps as $sitemap) {
                $row = $xml->addChild('sitemap');
                $row->addChild('loc', $this->baseURL . "/" . $this->getSitemapFileName(htmlentities($sitemap[0])));
                $row->addChild('lastmod', date('c'));
            }
            $this->sitemapFullURL = $this->baseURL . "/" . $this->sitemapIndexFileName;
            $this->sitemapIndex = array(
                $this->sitemapIndexFileName,
                $xml->asXML()
            );
        } else {
            $this->sitemapFullURL = $this->baseURL . "/" . $this->getSitemapFileName();

            $this->sitemaps[0] = array(
                $this->sitemapFileName,
                $this->sitemaps[0]
            );
        }
    }


    /**
     * Returns created sitemaps as array of strings.
     * Use it You want to work with sitemap without saving it as files.
     * @return array of strings
     * @access public
     */
    public function toArray()
    {
        if (isset($this->sitemapIndex)) {
            return array_merge(array($this->sitemapIndex), $this->sitemaps);
        } else {
            return $this->sitemaps;
        }
    }

    /**
     * Will write sitemaps as files.
     * @access public
     * @throws \BadMethodCallException
     */
    public function writeSitemap()
    {
        if (!isset($this->sitemaps)) {
            throw new \BadMethodCallException("To write sitemap, call createSitemap function first.");
        }
        if (isset($this->sitemapIndex)) {
            $this->document->loadXML($this->sitemapIndex[1]);
            $this->writeFile($this->document->saveXML(), $this->basePath, $this->sitemapIndex[0], true);
            foreach ($this->sitemaps as $sitemap) {
                $this->writeFile($sitemap[1], $this->basePath, $sitemap[0]);
            }
        } else {
            $this->document->loadXML($this->sitemaps[0][1]);
            $this->writeFile($this->document->saveXML(), $this->basePath, $this->sitemaps[0][0], true);
            $this->writeFile($this->sitemaps[0][1], $this->basePath, $this->sitemaps[0][0]);
        }
    }


    private function getSitemapFileName($name = null)
    {
        if (!$name) {
            $name = $this->sitemapFileName;
        }
        if ($this->createGZipFile) {
            $name .= ".gz";
        }
        return $name;
    }

    /**
     * Save file.
     * @param string $content
     * @param string $filePath
     * @param string $fileName
     * @param bool $noGzip
     * @return bool
     * @access private
     */
    private function writeFile($content, $filePath, $fileName, $noGzip = false)
    {
        if (!$noGzip && $this->createGZipFile) {
            return $this->writeGZipFile($content, $filePath, $fileName);
        }
        $file = fopen($filePath . $fileName, 'w');
        fwrite($file, $content);
        return fclose($file);
    }

    /**
     * Save GZipped file.
     * @param string $content
     * @param string $filePath
     * @param string $fileName
     * @return bool
     * @access private
     */
    private function writeGZipFile($content, $filePath, $fileName)
    {
        $fileName .= '.gz';
        $file = gzopen($filePath . $fileName, 'w');
        gzwrite($file, $content);
        return gzclose($file);
    }

    /**
     * If robots.txt file exist, will update information about newly created sitemaps.
     * If there is no robots.txt will, create one and put into it information about sitemaps.
     * @access public
     * @throws \BadMethodCallException
     */
    public function updateRobots()
    {
        if (!isset($this->sitemaps)) {
            throw new \BadMethodCallException("To update robots.txt, call createSitemap function first.");
        }
        $sampleRobotsFile = "User-agent: *\nAllow: /";
        if (file_exists($this->basePath . $this->robotsFileName)) {
            $robotsFile = explode("\n", file_get_contents($this->basePath . $this->robotsFileName));
            $robotsFileContent = "";
            foreach ($robotsFile as $key => $value) {
                if (substr($value, 0, 8) == 'Sitemap:') {
                    unset($robotsFile[$key]);
                } else {
                    $robotsFileContent .= $value . "\n";
                }
            }
            $robotsFileContent .= "Sitemap: $this->sitemapFullURL";
            if (!isset($this->sitemapIndex)) {
                $robotsFileContent .= "\nSitemap: " . $this->getSitemapFileName($this->sitemapFullURL);
            }
            file_put_contents($this->basePath . $this->robotsFileName, $robotsFileContent);
        } else {
            $sampleRobotsFile = $sampleRobotsFile . "\n\nSitemap: " . $this->sitemapFullURL;
            if (!isset($this->sitemapIndex)) {
                $sampleRobotsFile .= "\nSitemap: " . $this->getSitemapFileName($this->sitemapFullURL);
            }
            file_put_contents($this->basePath . $this->robotsFileName, $sampleRobotsFile);
        }
    }

    /**
     * Will inform search engines about newly created sitemaps.
     * Google, Ask, Bing and Yahoo will be noticed.
     * If You don't pass yahooAppId, Yahoo still will be informed,
     * but this method can be used once per day. If You will do this often,
     * message that limit was exceeded  will be returned from Yahoo.
     * @param string $yahooAppId Your site Yahoo appid.
     * @return array of messages and http codes from each search engine
     * @access public
     * @throws \BadMethodCallException
     */
    public function submitSitemap($yahooAppId = null)
    {
        if (!isset($this->sitemaps)) {
            throw new \BadMethodCallException("To submit sitemap, call createSitemap function first.");
        }
        if (!extension_loaded('curl')) {
            throw new \BadMethodCallException("cURL library is needed to do submission.");
        }
        $searchEngines = $this->searchEngines;
        $searchEngines[0] = isset($yahooAppId) ?
            str_replace("USERID", $yahooAppId, $searchEngines[0][0]) :
            $searchEngines[0][1];
        $result = array();
        for ($i = 0; $i < count($searchEngines); $i++) {
            $submitSite = curl_init($searchEngines[$i] . htmlspecialchars($this->sitemapFullURL, ENT_QUOTES, 'UTF-8'));
            curl_setopt($submitSite, CURLOPT_RETURNTRANSFER, true);
            $responseContent = curl_exec($submitSite);
            $response = curl_getinfo($submitSite);
            $submitSiteShort = array_reverse(explode(".", parse_url($searchEngines[$i], PHP_URL_HOST)));
            $result[] = array(
                "site" => $submitSiteShort[1] . "." . $submitSiteShort[0],
                "fullsite" => $searchEngines[$i] . htmlspecialchars($this->sitemapFullURL, ENT_QUOTES, 'UTF-8'),
                "http_code" => $response['http_code'],
                "message" => str_replace("\n", " ", strip_tags($responseContent))
            );
        }
        return $result;
    }


    /**
     * Returns array of URLs
     *
     * Converts internal SplFixedArray to array
     * @return array
     */
    public function getUrls()
    {
        $urls = $this->urls->toArray();

        /**
         * @var int $key
         * @var \SplFixedArray $urlSplArr
         */
        foreach ($urls as $key => $urlSplArr) {
            if (!is_null($urlSplArr)) {
                $urlArr = $urlSplArr->toArray();
                $url = [];
                foreach ($urlArr as $paramIndex => $paramValue) {
                    switch ($paramIndex) {
                        case static::URL_PARAM_LOC:
                            $url['loc'] = $paramValue;
                            break;
                        case static::URL_PARAM_CHANGEFREQ:
                            $url['changefreq'] = $paramValue;
                            break;
                        case static::URL_PARAM_LASTMOD:
                            $url['lastmod'] = $paramValue;
                            break;
                        case static::URL_PARAM_PRIORITY:
                            $url['priority'] = $paramValue;
                            break;
                        default:
                            break;
                    }
                }
                $urls[$key] = $url;
            }
        }

        return $urls;
    }

    public function countUrls()
    {
        return $this->urls->getSize();
    }
}