• Модуль: seo
  • Путь к файлу: ~/bitrix/modules/seo/classes/general/seo_page_checker.php
  • Класс: CSeoPageChecker
  • Вызов: CSeoPageChecker::_PrepareData
function _PrepareData()
{
	if($this->pcre_backtrack_limit === false)
		$this->pcre_backtrack_limit = intval(ini_get("pcre.backtrack_limit"));
	$text_len = strlen($this->__result_data);
	$text_len++;
	if($this->pcre_backtrack_limit > 0 && $this->pcre_backtrack_limit < $text_len)
	{
		@ini_set("pcre.backtrack_limit", $text_len);
		$this->pcre_backtrack_limit = intval(ini_get("pcre.backtrack_limit"));
	}

	if($this->__bCheckErrors && $this->pcre_backtrack_limit > 0 && $this->pcre_backtrack_limit < $text_len)
	{
		$this->__result_errors[] = array(
			'CODE' => 'SEO_PCRE',
			'TYPE' => 'NOTE',
			'DETAIL' => array(
				'#PCRE_BACKTRACK_LIMIT#' => $this->pcre_backtrack_limit,
				'#TEXT_LEN#' => $text_len,
			)
		);
	}

	$this->__index = array('TOTAL' => array(), 'BOLD' => array(), 'ITALIC' => array(), 'LINK' => array(), 'DESCRIPTION' => array(), 'KEYWORDS' => array());

	// replace all images on their not empty ALT or TITLE attributes
	$this->__result_data = preg_replace('/]*(alt|title)="([^"]*)".*?>/is', '\2', $this->__result_data);

	if ($this->__bCheckErrors && ($img_cnt = preg_match('//is', $this->__result_data)))
	{
		$this->__result_errors[] = array(
			'CODE' => 'SEO_IMG_NO_ALT',
			'TYPE' => 'NOTE',
			'DETAIL' => array(
				'#COUNT#' => $img_cnt
			)
		);
	}

	// get full words index
	$this->__index['TOTAL'] = $this->__prepareText($this->__result_data);

	// get bold words index
	$arRes = array();
	if(preg_match_all("/<(b|strong)>(.*?)<\/\1>/is", $this->__result_data, $arRes))
	{
		$this->__result_extended['BOLD'] = $arRes[0];
		$this->__index['BOLD'] = $this->__prepareText(implode(" ", $arRes[2]));
	}

	// get italic words index
	if(preg_match_all("/<(i|em)>(.*?)<\/\1>/is", $this->__result_data, $arRes))
	{
		$this->__result_extended['ITALIC'] = $arRes[0];
		$this->__index['ITALIC'] = $this->__prepareText(implode(" ", $arRes[2]));
	}

	// get noindex tags
	if(preg_match_all("/<(noindex)>(.*?)<\/\1>/is", $this->__result_data, $arRes))
	{
		$this->__result_extended['NOINDEX'] = $arRes[0];
		$this->__index['NOINDEX'] = $this->__prepareText(implode(" ", $arRes[2]));
	}
	// get link words index
	if(preg_match_all("/<(a) ([^>]*)>(.*?)<\/\1>/is", $this->__result_data, $arRes))
	{
		$this->__result_extended['LINK'] = $arRes[0];
		$this->__index['LINK'] = $this->__prepareText(implode(" ", $arRes[3]));

		$this->__result_extended['NOFOLLOW'] = array();
		$this->__result_extended['LINK_EXTERNAL'] = array();
		$this->__index['LINK_EXTERNAL'] = array();

		foreach ($arRes[2] as $key => $attrs)
		{
			if (false !== mb_strpos($attrs, 'rel="nofollow"'))
				$this->__result_extended['NOFOLLOW'][] = $arRes[0][$key];
			if (false !== ($pos = mb_strpos($attrs, 'href="')))
			{
				$pos1 = mb_strpos($attrs, '"', $pos + 6);
				$url = mb_substr($attrs, $pos, $pos1 - $pos);

				if ($this->IsOuterUrl($url))
				{
					$this->__index['LINK_EXTERNAL'] = array_merge($this->__index['LINK_EXTERNAL'], $this->__prepareText($arRes[3][$key]));
					$this->__result_extended['LINK_EXTERNAL'][] = $arRes[0][$key];
				}
			}
		}

		if ($this->__bCheckErrors && count($arRes[0]) > $this->__qualifier_links_count)
		{
			$this->__result_errors[] = array(
				'CODE' => 'SEO_LINKS_COUNT',
				'TYPE' => 'NOTE',
				'DETAIL' => array(
					'#COUNT#' => count($arRes[0]),
					'#COUNT_EXTERNAL#' => count($this->__result_extended['LINK_EXTERNAL']),
					'#QUALIFIER#' => $this->__qualifier_links_count,
				)
			);
		}

	}

	// get meta description words index
	if(preg_match('/]*>/i', $this->__result_data, $arRes))
	{
		$this->__result_meta['DESCRIPTION'] = $arRes[1];
		$this->__result_extended['META_DESCRIPTION'] = $arRes[0];
		$this->__index['DESCRIPTION'] = $this->__prepareText($this->__result_meta['DESCRIPTION']);
	}
	else
	{
		$this->__result_errors[] = array(
			'CODE' => 'SEO_META_NO_DESCRIPTION',
			'TYPE' => 'NOTE',
			'DETAIL' => array()
		);
	}

	// get meta keywords words index
	if(preg_match('/]*>/i', $this->__result_data, $arRes))
	{
		$this->__result_meta['KEYWORDS'] = $arRes[1];
		$this->__result_extended['META_KEYWORDS'] = $arRes[0];
		$this->__index['KEYWORDS'] = $this->__prepareText($this->__result_meta['KEYWORDS']);
	}
	else
	{
		$this->__result_errors[] = array(
			'CODE' => 'SEO_META_NO_KEYWORDS',
			'TYPE' => 'NOTE',
			'DETAIL' => array()
		);
	}

	// get titles words index
	if(preg_match("/<(title)>(.*?)<\/\1>/is", $this->__result_data, $arRes))
	{
		$this->__result_extended['TITLE'] = $arRes[0];
		$this->__index['TITLE'] = $this->__prepareText($arRes[2]);
	}

	if(preg_match_all("/<(h[d]{1}).*?>.*?<\/\1>/is", $this->__result_data, $arRes))
	{
		$this->__result_extended['H'] = $arRes[0];
	}

	if(preg_match_all("/<(h1).*?>(.*?)<\/\1>/is", $this->__result_data, $arRes))
	{
		if ($this->__bCheckErrors && count($arRes[0]) > 1)
		{
			$this->__result_errors[] = array(
				'CODE' => 'SEO_H1_UNIQUE',
				'TYPE' => 'NOTE',
				'DETAIL' => array(
					'#COUNT#' => count($arRes[0]),
					'#VALUES#' => htmlspecialcharsbx('"'.implode('", "', $arRes[2]).'"'),
				)
			);
		}

		$this->__index['H1'] = $this->__prepareText(implode(" ", $arRes[2]));
	}
	elseif ($this->__bCheckErrors)
	{
		$this->__result_errors[] = array(
			'CODE' => 'SEO_H1_ABSENT',
			'TYPE' => 'NOTE',
			'DETAIL' => array()
		);
	}

	if ($this->__bCheckErrors)
	{
		foreach(GetModuleEvents('seo', 'onPageCheck', true) as $arEvent)
		{
			if (!ExecuteModuleEventEx($arEvent, array(
				array(
					'URL' => $this->__url,
					'LANG' => $this->__lang,
					'SERVER_NAME' => $this->__server_name,
					'SITE' => $this->__site,
				),
				array(
					'HEADERS' => $this->__result_headers,
					'BODY' => $this->__result_data,
				),
				$this->__result_meta,
				$this->__index,
			)) && ($ex = $GLOBALS['APPLICATION']->GetException()))
			{
				$this->__result_errors[] = array(
					'CODE' => $ex->GetId(),
					'TYPE' => 'NOTE',
					'TEXT' => $ex->GetString(),
				);
			}
		}
	}
}