<?php<BR>/* vim: set expandtab tabstop=4 shiftwidth=4: */<BR>// +------------------------------------------------------------------------<BR>// Name : 权重计算 <BR>// Description: 稍加修改,亦可用于分词,词频统计,全文检索和垃圾检测<BR>// Date : 2013/12/16 08:51</P><P>class weight {<BR> protected $aDict = array(array());<BR> protected $aItems = array();<BR> protected $sLastRule;<BR> protected $aMatchs = array();<BR> protected $aShow = array();</P><P> private function init() {<BR> //清空记录的匹配表和输出结果<BR> unset($this->aShow);<BR> }</P><P> public function newItems($mItems) {<BR> //导入新的项目<BR> $this->aItems = (is_array($mItems))? $mItems: array($mItems);<BR> $this->init();<BR> }</P><P> public function newTable(array $aTable) {<BR> //导入新的对照表,并生成字典<BR> foreach($aTable as $iTableKey=>$sTableLine) {<BR> $aTableLine = explode(',', str_replace('|', ',', $sTableLine)); <BR> $setter = function($v, $k, $paraMeter) {<BR> $k1 = $paraMeter[0]; $oWeight = $paraMeter[1];<BR> $oWeight->genDict($v, $k1);<BR> };<BR> array_walk($aTableLine, $setter, array($iTableKey, $this));<BR> }<BR> $this->init();<BR> }</P><P> public function getShow($sRule = 'max') {<BR> //获取最终的显示结果<BR> if(empty($this->aItems) || empty($this->aDict))<BR> return array();<BR> if (empty($this->aShow) || $sRule != $this->sLastRule) <BR> return $this->genShow($sRule);<BR> return $this->aShow;<BR> }</P><P> public function genShow($sRule) {<BR> $aShow = array();<BR> $aMatchs = array();<BR> $getter = function($v, $k, $oWeight) use(&$aShow, &$aMatchs, $sRule) {<BR> $t = array_count_values($oWeight->matchWord($v));<BR> $aMatchs[] = $t;<BR> switch ($sRule) {<BR> case 'max':<BR> $aShow[$k] = array_keys($t, max($t)); <BR> break;<BR> }<BR> };<BR> array_walk($this->aItems, $getter, $this);<BR> $this->aShow = $aShow;<BR> $this->aMatchs = $aMatchs;<BR> return $aShow;<BR> }</P><P> private function genDict($mWord, $iKey = '') {<BR> $iInsertPonit = count($this->aDict);<BR> $iCur = 0; //当前节点号<BR> foreach (str_split($mWord) as $iChar) {<BR> if (isset($this->aDict[$iCur][$iChar])) {<BR> <div style="color:transparent">本文来源gaodai.ma#com搞##代!^码@网*</div><pre>搞gaodaima代码
$iCur = $this->aDict[$iCur][$iChar];
continue;
}
$this->aDict[$iInsertPonit] = array();
$this->aDict[$iCur][$iChar] = $iInsertPonit;
$iCur = $iInsertPonit;
$iInsertPonit++;
}
$this->aDict[$iCur][‘acc’][] = $iKey;
}
function matchWord($sLine) {
$iCur = $iOffset = $iPosition = 0;
$sLine .= “\0”;
$iLen = strlen($sLine);
$aReturn = array();
while($iOffset < $iLen) {
$sChar = $sLine{$iOffset};
if(isset($this->aDict[$iCur][$sChar])) {
$iCur = $this->aDict[$iCur][$sChar];
if(isset($this->aDict[$iCur][‘acc’])) {
$aReturn = array_merge($aReturn, $this->aDict[$iCur][‘acc’]);
$iPosition = $iOffset + 1;
$iCur = 0;
}
} else {
$iCur = 0;
$iOffset = $iPosition;
$iPosition = $iOffset + 1;
}
++$iOffset;
}
return $aReturn;
}
}
?>
外部调用示例
$aItems = array(<BR> 'chinaisbig',<BR> 'whichisnot',<BR> 'totalyrightforme',<BR>);<BR>$aTable = array(<BR> 'china,is|small',<BR> 'china,big|me',<BR> 'china,is|big,which|not,me',<BR> 'totaly|right,for,me',<BR>);</P><P>$oWeight = new ttrie;<BR>$oWeight->newItems($aItems);<BR>$aResult = $oWeight->newTable($aTable);</P><P>