- Модуль: crm
- Путь к файлу: ~/bitrix/modules/crm/lib/ml/featurebuilder.php
- Класс: Bitrix\Crm\Ml\FeatureBuilder
- Вызов: FeatureBuilder::clearText
static function clearText($input, $maxWords = 0)
{
//$input = Encoding::convertEncoding($input, "utf8", "cp1251");
$result = HTMLToTxt($input);
// strip BBCode
$result = preg_replace('/[[\/\!]*?[^\[\]]*?]/si', ' ', $result);
// strip punctuation
$result = preg_replace("/[[:punct:]]/", ' ', $result);
// replace multiple spaces with single one
$result = preg_replace("/[[:space:]]+/", ' ', $result);
// remove short words
$words = explode(" ", $result);
$words = array_filter($words, function($word) {return mb_strlen($word) > 3 && \Bitrix\Main\Text\UtfSafeString::checkEncoding($word);});
if($maxWords > 0)
{
$words = array_slice($words, 0, $maxWords);
}
return join(" ", $words);
}