sourceUrl = $sourceUrl; $this->targetUrl = $targetUrl; $this->redirectUrl = $redirectUrl; $this->httpcode = $httpcode; } public function toArray() : array { return [ 'sourceUrl' => $this->sourceUrl, 'targetUrl' => $this->targetUrl, 'redirectUrl' => $this->redirectUrl, 'httpcode' => $this->httpcode ]; } } class StaticSiteTester { public string $projectDir; public CurlService $curlService; public MailerService $mailerService; public SpreadsheetWriter $spreadsheetWriter; public StaticSiteBuilder $staticSiteBuilder; public bool $testing; private bool $verbose = false; private array $hrefTestResultsByFilename = []; private array $hrefTestResultsByUrl = []; private array $sitemaps = []; private array $pagesToFetch = []; private int $countPagesFetched = 0; private int $countStaticTemplates = 0; private array $pagesNotInSitemaps = []; private int $countHttp200 = 0; private int $countHttp300 = 0; private int $countHttp400 = 0; private int $countHttp500 = 0; private int $countHttpOther = 0; public function __construct( string $projectDir, CurlService $curlService, MailerService $mailerService, SpreadsheetWriter $spreadsheetWriter, StaticSiteBuilder $staticSiteBuilder ) { $this->projectDir = $projectDir; $this->curlService = $curlService; $this->mailerService = $mailerService; $this->spreadsheetWriter = $spreadsheetWriter; $this->staticSiteBuilder = $staticSiteBuilder; $this->testing = StaticSiteTesterConfig::$testing; } private function isValidUrl(string $url) : bool { $isValidUrl = false; $parsedUrl = parse_url($url); if ( filter_var($url, FILTER_VALIDATE_URL) && array_key_exists('scheme', $parsedUrl) && in_array($parsedUrl['scheme'], ['http', 'https']) ) { $isValidUrl = true; } return $isValidUrl; } private function ___scanHref(string $filename, string $url) { if (in_array($url, StaticSiteTesterConfig::$hrefExclusions)) { return; } if (!$this->isValidUrl($url)) { return; } $result = new HrefTestResult('', '', '', 0); $pathinfo = pathinfo($filename); //check if url already tested if (!isset($hrefTestResultsByUrl[$url])) { $ch = $this->curlService->initCurlWithHeaders($url, true); $response = curl_exec($ch); $effectiveUrl = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL); $httpcode = curl_getinfo($ch, CURLINFO_HTTP_CODE); $redirectUrl = ''; if ($effectiveUrl != $url) { $redirectUrl = $effectiveUrl; } $sourceUrl = $this->staticSiteBuilder->staticDomain . str_replace( $this->staticSiteBuilder->slashReplace, '/', $pathinfo['filename'] ); $result = new HrefTestResult( $sourceUrl, $url, //targetUrl $redirectUrl, $httpcode ); $this->hrefTestResultsByUrl[$url] = $result; } else { $result = $this->hrefTestResultsByUrl[$url]; } $this->hrefTestResultsByFilename[$pathinfo['basename']][] = $result; $analysis = ''; $___code = substr((string)$httpcode, 0, 2); switch ($___code) { case '20': $analysis = 'Found'; $this->countHttp200++; break; case '30': $analysis = 'Redirect'; $this->countHttp300++; break; case '40': $analysis = 'Inaccessible'; $this->countHttp400++; break; case '50': $analysis = 'Error'; $this->countHttp500++; break; default: $analysis = 'Unknown ' . $httpcode; $this->countHttpOther++; break; } if ($this->verbose && $___code == '40') { echo $analysis . ': ' . $url . PHP_EOL; } } public function scanHrefs($filename) { $pathinfo = pathinfo($filename); if ($this->verbose) { echo 'SCANNING: HREFS IN ' . $pathinfo['basename'] . PHP_EOL; } $content = file_get_contents($filename); //loop through all standard link types... href="", src="", url() $matches = []; $matchGroup = 4; $regexp = "(<.*((href|src)=[\"\'])|url\()(.*)(([\"\'])|\))"; if (preg_match_all("/$regexp/iU", $content, $matches, PREG_SET_ORDER)) { foreach($matches as $match) { $url = $match[$matchGroup]; $this->___scanHref($filename, $url); } } } public function scanAllHrefs() { if ($this->verbose) { echo 'SCANNING: HREFS ' . PHP_EOL; } $count = 0; foreach (glob($this->projectDir . $this->staticSiteBuilder->staticTemplatePath . '*.html') as $filename) { $this->scanHrefs($filename); $count++; //check if page was discoverable via sitemap $pathinfo = pathinfo($filename); $path = str_replace($this->staticSiteBuilder->slashReplace, '/', $pathinfo['filename']); $url = $this->staticSiteBuilder->staticDomain . $path; if (!isset($this->pagesToFetch[$url])) { $this->pagesNotInSitemaps[$url] = $url; } //early exit if testing if ($this->testing && $count > 0) { echo '*** TESTING: EXITING EARLY ***' . PHP_EOL; break; } } $this->countStaticTemplates = $count; } public function scanSitemaps(string $url) { if ($this->verbose) { echo 'SITEMAP: ' . $url . PHP_EOL; } $this->sitemaps[] = $url; //get content as array so we can fetch pages within $arr = json_decode(json_encode(simplexml_load_file($url)), TRUE); if (isset($arr['sitemap']) && is_array($arr['sitemap'])) { foreach ($arr['sitemap'] as $val) { $this->scanSitemaps($val['loc']); } } if (isset($arr['url']) && is_array($arr['url'])) { foreach ($arr['url'] as $val) { $this->pagesToFetch[$val['loc']] = $val['loc']; } } } public function scanPages() { if ($this->verbose) { echo 'SCANNING: PAGES ' . PHP_EOL; } foreach ($this->pagesToFetch as $url) { if ($this->verbose) { echo 'PAGE: ' . $url . PHP_EOL; } $ch = $this->curlService->initCurlWithHeaders($url, true); $response = curl_exec($ch); $effectiveUrl = curl_getinfo($ch, CURLINFO_EFFECTIVE_URL); $httpcode = curl_getinfo($ch, CURLINFO_HTTP_CODE); $___code = substr((string)$httpcode, 0, 2); switch ($___code) { case '20': $this->countPagesFetched++; break; } } } public function run() { //output stuff to the command line $this->verbose = true; if ($this->staticSiteBuilder->isRunning()) { echo '*** STATIC SITE BUILDER IS RUNNING, TRY AGAIN SHORTLY ***'; exit; } //check coverage vs sitemaps $url = $this->staticSiteBuilder->staticDomain . $this->staticSiteBuilder->rootSitemap; if ($this->verbose) { echo 'SCANNING: SITEMAPS ' . PHP_EOL; } $this->scanSitemaps($url); $this->scanPages(); //scan all our local static templates for 404s etc $this->scanAllHrefs(); //prepare email $body = ''; $body .= '
Static Site testing completed, results attached
'; $body .= 'Number of sitemaps: ' . count($this->sitemaps) . '
'; $body .= 'Number of pages in sitemaps: ' . count($this->pagesToFetch) . '
'; $body .= 'Number of pages in sitemaps found on static site: ' . $this->countPagesFetched . '
'; $body .= 'Number of pages not in sitemaps found on static site: ' . count($this->pagesNotInSitemaps) . '
'; $body .= 'Analyis of hrefs within pages
'; $body .= 'Number of 200 results: ' . $this->countHttp200 . '
'; $body .= 'Number of 300 results: ' . $this->countHttp300 . '
'; $body .= 'Number of 400 results: ' . $this->countHttp400 . '
'; $body .= 'Number of 500 results: ' . $this->countHttp500 . '
'; $body .= 'Number of unknown results: ' . $this->countHttpOther . '
'; $fromAddress = new Address( $this->mailerService->noReplyAddress, $this->mailerService->noReplyName ); $email = (new Email()) ->from($fromAddress) ->to($this->mailerService->webmasterAddress) ->subject('Static Site Testing Results') ->text(strip_tags($body)) ->html($body); //create attachments to send via email $spreadsheet = $this->spreadsheetWriter->createEmptySpreadsheet(); //SHEET 1 //convert hrefTestResultsByFilename to array of array $columns = [ 'Source URL', 'Target URL', 'Redirect URL', 'HTTP CODE' ]; $rows = []; foreach ($this->hrefTestResultsByUrl as $result) { $rows[] = $result->toArray(); } //$spreadsheet = $this->spreadsheetWriter->createSpreadsheet($columns, $rows); $this->spreadsheetWriter->createWorksheet($spreadsheet, 'HREF ANALYSIS', $columns, $rows); //SHEET 2 $columns = [ 'Source URL' ]; $rows = []; foreach ($this->pagesNotInSitemaps as $url) { $rows[] = [$url]; } $this->spreadsheetWriter->createWorksheet($spreadsheet, 'PAGES NOT IN SITEMAPS', $columns, $rows); if ($this->spreadsheetWriter->writeSpreadsheet( $spreadsheet, $this->projectDir . $this->staticSiteBuilder->tmpPath, 'StaticSiteTestResults.xlsx' )) { $filename = $this->projectDir . $this->staticSiteBuilder->tmpPath . 'StaticSiteTestResults.xlsx'; $email->addPart(new DataPart(new File($filename))); } $this->mailerService->sendEmail($email); } }