﻿
noise_word_list_ch = new Array("?","about","$","1","2","3","4","5","6","7","8","9","0","_",
        "a","b","c","d","e","f","g","h","i","j","k","l","m","n","o",
        "p","q","r","s","t","u","v","w","x","y","z","after","all","also",
        "an","and","another","any","are","as","at","be","because","been",
        "before","being","between","both","but","by","came","can","come",
        "could","did","do","each","for","from","get","got","had","has",
        "have","he","her","here","him","himself","his","how","if","in","into",
        "is","it","like","make","many","me","might","more","most","much","must",
        "my","never","now","of","on","only","or","other","our","out","over","said",
        "same","see","should","since","some","still","such","take","than","that",
        "the","their","them","then","there","these","they","this","those","through",
        "to","too","under","up","very","was","way","we","well","were","what","where",
        "which","while","who","with","would","you","your",
        "的","一","不","在","人","有","是","为","以","于","上","他","而","后","之","来",
        "及","了","因","下","可","到","由","这","与","也","此","但","并","个","其","已",
         "无","小","我","们","起","最","再","今","去","好","只","又","或","很","亦","某",
        "把","那","你","乃","它","'","!","@","#","%","^","&","*","(",")","-","+","|","\/","/","=","~","`",",",".",":",";"); 

noise_symbol_list_ch = new Array("?","'","!","&","(",")"); 
        
function is_ch_noise_word(str_key){ 
   var key_word = Trim(str_key);
   key_word = key_word.toLowerCase();
   var listlength=noise_word_list_ch.length;
   var tmp_str = ""; 
   for(i=0;i<listlength;i++){
      tmp_str = noise_word_list_ch[i]
      if(tmp_str==key_word){  
         return true;
      }
   } 
   return false;
} 

function is_ch_noise_symbol(str_key)
{
   var key_word = Trim(str_key);
   key_word = key_word.toLowerCase();
   var listlength=noise_symbol_list_ch.length;
   var tmp_str = ""; 
   for(i=0;i<listlength;i++){
      tmp_str = noise_symbol_list_ch[i]
      if(tmp_str==key_word){  
         return true;
      }
   } 
   return false;
}

function remove_noise_word(str_source){
  var tmp_str = "";
  var ch = "";
  var str_out = "";
  var i = 0;
  str_source = Trim(str_source);
  str_source = str_source.replace("(","（").replace(")","）");
  var str_source_length = str_source.length;
  
  if(str_source_length == 0){
    return str_out;
  }
   
  for (i=0;i < str_source_length; i++)
  {  
    ch = str_source.charAt(i);
    if(ch==" "){ //如果为空格则表示是下一个关键词
        if(!(is_ch_noise_word(tmp_str))){ //不是干扰词就输出
            if(tmp_str!=" "){  //防止连续的两个空格
                str_out = str_out + tmp_str + " ";
            }  
        }
        tmp_str = "";
    }
    else{
            if(is_ch_noise_symbol(ch)) ch="";//如果是干扰符号则清除
            tmp_str = tmp_str + ch;
    }
  } 
  str_out = str_out + tmp_str;    
  if(Trim(str_out).indexOf(" ")==-1 && (is_ch_noise_word(tmp_str))) str_out = "1001it.COM";
  if(Trim(str_out)=="1001it.COM") str_out="";
  return Trim(str_out);  
}

//--------把中文字符转换成Utf8编码------------------------//
 function EncodeUtf8(s1)
  {
      var s = escape(s1);
      var sa = s.split("%");
      var retV ="";
      if(sa[0] != "")
      {
         retV = sa[0];
      }
      for(var i = 1; i < sa.length; i ++)
      {
           if(sa[i].substring(0,1) == "u")
           {
               retV += Hex2Utf8(Str2Hex(sa[i].substring(1,5)));
               
           }
           else retV += "%" + sa[i];
      }
      
      return retV;
  }
  function Str2Hex(s)
  {
      var c = "";
      var n;
      var ss = "0123456789ABCDEF";
      var digS = "";
      for(var i = 0; i < s.length; i ++)
      {
         c = s.charAt(i);
         n = ss.indexOf(c);
         digS += Dec2Dig(eval(n));
           
      }
      //return value;
      return digS;
  }
  function Dec2Dig(n1)
  {
      var s = "";
      var n2 = 0;
      for(var i = 0; i < 4; i++)
      {
         n2 = Math.pow(2,3 - i);
         if(n1 >= n2)
         {
            s += '1';
            n1 = n1 - n2;
          }
         else
          s += '0';
          
      }
      return s;
      
  }
  function Dig2Dec(s)
  {
      var retV = 0;
      if(s.length == 4)
      {
          for(var i = 0; i < 4; i ++)
          {
              retV += eval(s.charAt(i)) * Math.pow(2, 3 - i);
          }
          return retV;
      }
      return -1;
  } 
  function Hex2Utf8(s)
  {
     var retS = "";
     var tempS = "";
     var ss = "";
     if(s.length == 16)
     {
         tempS = "1110" + s.substring(0, 4);
         tempS += "10" +  s.substring(4, 10); 
         tempS += "10" + s.substring(10,16); 
         var sss = "0123456789ABCDEF";
         for(var i = 0; i < 3; i ++)
         {
            retS += "%";
            ss = tempS.substring(i * 8, (eval(i)+1)*8);
            
            
            
            retS += sss.charAt(Dig2Dec(ss.substring(0,4)));
            retS += sss.charAt(Dig2Dec(ss.substring(4,8)));
         }
         return retS;
     }
     return "";
  } 