function _PrepareData()
{
if($this->pcre_backtrack_limit === false)
$this->pcre_backtrack_limit = intval(ini_get("pcre.backtrack_limit"));
$text_len = strlen($this->__result_data);
$text_len++;
if($this->pcre_backtrack_limit > 0 && $this->pcre_backtrack_limit < $text_len)
{
@ini_set("pcre.backtrack_limit", $text_len);
$this->pcre_backtrack_limit = intval(ini_get("pcre.backtrack_limit"));
}
if($this->__bCheckErrors && $this->pcre_backtrack_limit > 0 && $this->pcre_backtrack_limit < $text_len)
{
$this->__result_errors[] = array(
'CODE' => 'SEO_PCRE',
'TYPE' => 'NOTE',
'DETAIL' => array(
'#PCRE_BACKTRACK_LIMIT#' => $this->pcre_backtrack_limit,
'#TEXT_LEN#' => $text_len,
)
);
}
$this->__index = array('TOTAL' => array(), 'BOLD' => array(), 'ITALIC' => array(), 'LINK' => array(), 'DESCRIPTION' => array(), 'KEYWORDS' => array());
// replace all images on their not empty ALT or TITLE attributes
$this->__result_data = preg_replace('/
]*(alt|title)="([^"]*)".*?>/is', '\2', $this->__result_data);
if ($this->__bCheckErrors && ($img_cnt = preg_match('//is', $this->__result_data)))
{
$this->__result_errors[] = array(
'CODE' => 'SEO_IMG_NO_ALT',
'TYPE' => 'NOTE',
'DETAIL' => array(
'#COUNT#' => $img_cnt
)
);
}
// get full words index
$this->__index['TOTAL'] = $this->__prepareText($this->__result_data);
// get bold words index
$arRes = array();
if(preg_match_all("/<(b|strong)>(.*?)<\/\1>/is", $this->__result_data, $arRes))
{
$this->__result_extended['BOLD'] = $arRes[0];
$this->__index['BOLD'] = $this->__prepareText(implode(" ", $arRes[2]));
}
// get italic words index
if(preg_match_all("/<(i|em)>(.*?)<\/\1>/is", $this->__result_data, $arRes))
{
$this->__result_extended['ITALIC'] = $arRes[0];
$this->__index['ITALIC'] = $this->__prepareText(implode(" ", $arRes[2]));
}
// get noindex tags
if(preg_match_all("/<(noindex)>(.*?)<\/\1>/is", $this->__result_data, $arRes))
{
$this->__result_extended['NOINDEX'] = $arRes[0];
$this->__index['NOINDEX'] = $this->__prepareText(implode(" ", $arRes[2]));
}
// get link words index
if(preg_match_all("/<(a) ([^>]*)>(.*?)<\/\1>/is", $this->__result_data, $arRes))
{
$this->__result_extended['LINK'] = $arRes[0];
$this->__index['LINK'] = $this->__prepareText(implode(" ", $arRes[3]));
$this->__result_extended['NOFOLLOW'] = array();
$this->__result_extended['LINK_EXTERNAL'] = array();
$this->__index['LINK_EXTERNAL'] = array();
foreach ($arRes[2] as $key => $attrs)
{
if (false !== mb_strpos($attrs, 'rel="nofollow"'))
$this->__result_extended['NOFOLLOW'][] = $arRes[0][$key];
if (false !== ($pos = mb_strpos($attrs, 'href="')))
{
$pos1 = mb_strpos($attrs, '"', $pos + 6);
$url = mb_substr($attrs, $pos, $pos1 - $pos);
if ($this->IsOuterUrl($url))
{
$this->__index['LINK_EXTERNAL'] = array_merge($this->__index['LINK_EXTERNAL'], $this->__prepareText($arRes[3][$key]));
$this->__result_extended['LINK_EXTERNAL'][] = $arRes[0][$key];
}
}
}
if ($this->__bCheckErrors && count($arRes[0]) > $this->__qualifier_links_count)
{
$this->__result_errors[] = array(
'CODE' => 'SEO_LINKS_COUNT',
'TYPE' => 'NOTE',
'DETAIL' => array(
'#COUNT#' => count($arRes[0]),
'#COUNT_EXTERNAL#' => count($this->__result_extended['LINK_EXTERNAL']),
'#QUALIFIER#' => $this->__qualifier_links_count,
)
);
}
}
// get meta description words index
if(preg_match('/]*>/i', $this->__result_data, $arRes))
{
$this->__result_meta['DESCRIPTION'] = $arRes[1];
$this->__result_extended['META_DESCRIPTION'] = $arRes[0];
$this->__index['DESCRIPTION'] = $this->__prepareText($this->__result_meta['DESCRIPTION']);
}
else
{
$this->__result_errors[] = array(
'CODE' => 'SEO_META_NO_DESCRIPTION',
'TYPE' => 'NOTE',
'DETAIL' => array()
);
}
// get meta keywords words index
if(preg_match('/]*>/i', $this->__result_data, $arRes))
{
$this->__result_meta['KEYWORDS'] = $arRes[1];
$this->__result_extended['META_KEYWORDS'] = $arRes[0];
$this->__index['KEYWORDS'] = $this->__prepareText($this->__result_meta['KEYWORDS']);
}
else
{
$this->__result_errors[] = array(
'CODE' => 'SEO_META_NO_KEYWORDS',
'TYPE' => 'NOTE',
'DETAIL' => array()
);
}
// get titles words index
if(preg_match("/<(title)>(.*?)<\/\1>/is", $this->__result_data, $arRes))
{
$this->__result_extended['TITLE'] = $arRes[0];
$this->__index['TITLE'] = $this->__prepareText($arRes[2]);
}
if(preg_match_all("/<(h[d]{1}).*?>.*?<\/\1>/is", $this->__result_data, $arRes))
{
$this->__result_extended['H'] = $arRes[0];
}
if(preg_match_all("/<(h1).*?>(.*?)<\/\1>/is", $this->__result_data, $arRes))
{
if ($this->__bCheckErrors && count($arRes[0]) > 1)
{
$this->__result_errors[] = array(
'CODE' => 'SEO_H1_UNIQUE',
'TYPE' => 'NOTE',
'DETAIL' => array(
'#COUNT#' => count($arRes[0]),
'#VALUES#' => htmlspecialcharsbx('"'.implode('", "', $arRes[2]).'"'),
)
);
}
$this->__index['H1'] = $this->__prepareText(implode(" ", $arRes[2]));
}
elseif ($this->__bCheckErrors)
{
$this->__result_errors[] = array(
'CODE' => 'SEO_H1_ABSENT',
'TYPE' => 'NOTE',
'DETAIL' => array()
);
}
if ($this->__bCheckErrors)
{
foreach(GetModuleEvents('seo', 'onPageCheck', true) as $arEvent)
{
if (!ExecuteModuleEventEx($arEvent, array(
array(
'URL' => $this->__url,
'LANG' => $this->__lang,
'SERVER_NAME' => $this->__server_name,
'SITE' => $this->__site,
),
array(
'HEADERS' => $this->__result_headers,
'BODY' => $this->__result_data,
),
$this->__result_meta,
$this->__index,
)) && ($ex = $GLOBALS['APPLICATION']->GetException()))
{
$this->__result_errors[] = array(
'CODE' => $ex->GetId(),
'TYPE' => 'NOTE',
'TEXT' => $ex->GetString(),
);
}
}
}
}