- Модуль: mail
- Путь к файлу: ~/bitrix/modules/mail/classes/general/mail.php
- Класс: CMailFilter
- Вызов: CMailFilter::GetSpamRating
static function GetSpamRating($message)
{
global $DB;
$arWords = CMailFilter::getWords($message, 1000);
if (empty($arWords))
return 0;
// for every word find Si
$arWords = array_map("md5", $arWords);
global $BX_MAIL_SPAM_CNT;
if(!is_set($BX_MAIL_SPAM_CNT, "G"))
{
$strSql = "SELECT MAX(GOOD_CNT) as G, MAX(BAD_CNT) as B FROM b_mail_spam_weight";
if($res = $DB->Query($strSql))
$BX_MAIL_SPAM_CNT = $res->Fetch();
if(intval($BX_MAIL_SPAM_CNT["G"])<=0)
$BX_MAIL_SPAM_CNT["G"] = 1;
if(intval($BX_MAIL_SPAM_CNT["B"])<=0)
$BX_MAIL_SPAM_CNT["B"] = 1;
}
$CNT_WORDS = COption::GetOptionInt("mail", "spam_word_count", B_MAIL_WORD_CNT);
$MIN_COUNT = COption::GetOptionInt("mail", "spam_min_count", B_MAIL_MIN_CNT);
// select $CNT_WORDS words with max |Si - 0.5|
// if the word placed less then xxx (5) times, then ignore
$strSql =
"SELECT SW.*, ".
" (BAD_CNT/".$BX_MAIL_SPAM_CNT["B"].".0) / (2*GOOD_CNT/".$BX_MAIL_SPAM_CNT["G"].".0 + BAD_CNT/".$BX_MAIL_SPAM_CNT["B"].".0) as RATING, ".
" ABS((BAD_CNT/".$BX_MAIL_SPAM_CNT["B"].".0) / (2*GOOD_CNT/".$BX_MAIL_SPAM_CNT["G"].".0 + BAD_CNT/".$BX_MAIL_SPAM_CNT["B"].".0) - 0.5) as MOD_RATING ".
"FROM b_mail_spam_weight SW ".
"WHERE WORD_ID IN ('".implode("', '", $arWords)."') ".
" AND ABS((BAD_CNT/".$BX_MAIL_SPAM_CNT["B"].".0) / (2*GOOD_CNT/".$BX_MAIL_SPAM_CNT["G"].".0 + BAD_CNT/".$BX_MAIL_SPAM_CNT["B"].".0) - 0.5) > 0.1 ".
" AND TOTAL_CNT>".$MIN_COUNT." ".
"ORDER BY MOD_RATING DESC ".
($DB->type == "MYSQL"?"LIMIT ".$CNT_WORDS : "");
//echo htmlspecialcharsbx($strSql)."
";
$a = 1;
$b = 1;
$dbr = $DB->Query($strSql, false, "File: ".__FILE__."
Line: ".__LINE__);
$arr = true;
$words = "";
for($i=0; $i<$CNT_WORDS; $i++)
{
if($arr && $arr = $dbr->Fetch())
{
//echo "".htmlspecialcharsbx($arr["WORD_REAL"])."=".$arr["RATING"]."
";
$words .= $arr["WORD_REAL"]." ".Round($arr["RATING"]*100, 4)." ".$arr["BAD_CNT"]." ".$arr["GOOD_CNT"]."n";
$a = $a * ($arr["RATING"]==0?0.00001:$arr["RATING"]);
$b = $b * (1 - ($arr["RATING"]==1?0.9999:$arr["RATING"]));
}
else
{
//if there is no word then weight Si = 0.4
$a = $a * 0.4;
$b = $b * (1 - 0.4);
}
}
// calculate Bayes for the whole message
$rating = $a/($a+$b) * 100;
return Array("RATING"=>$rating, "WORDS"=>$words);
}