找回密码
 注册账户
查看: 1107|回复: 2

Typo Generator Class

[复制链接]
棋子 发表于 2008-9-10 01:40:59 | 显示全部楼层 |阅读模式
  1. <?php
  2. /*
  3. Typo Generator Class

  4. - PHP 5 is required
  5. - The class is not intended to be used for the construction of an object but rather as a namespace
  6. - The class has four methods each of which accept a string and return and array of strings that are likely typos of the type that particular function producesi
  7. - Copyright with the the MIT License
  8. - Developer was Scott Horne of Takeshi Media and Web-Professor.net
  9. - http://web-professor.net for mor info


  10. Class Functions:
  11. -----------------------------------------------------------

  12. cTypoGenerator::getWrongKeyTypos( $word )
  13.         Typos based on a user hitting the wrong key that is near the intended key, only uses characters valid in ascii domain names

  14. cTypoGenerator::getMissedCharTypos( $word )
  15.         Typos based on a missed key

  16. cTypoGenerator::getTransposedCharTypos( $word )
  17.         Typos based on transposition errors

  18. cTypoGenerator::getDoubleCharTypos( $word )
  19.         Typos based on hitting an intended key twice

  20. cTypoGenerator::getAllTypos( $word )
  21.         This calls all the typos and returns every variety


  22. Example Usage:
  23. -----------------------------------------------------------
  24. $word = "Hello";
  25. $typos = array();
  26. $typos = cTypoGenerator::getAllTypos( $word );

  27. print_r( $typos );







  28. Copyright (c) 2006, Takeshi Media

  29. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:

  30. The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.

  31. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

  32. */




  33. class cTypoGenerator
  34. {

  35. // array of keys near character on a QWERTY keyboard
  36. // only valid characters in a domain name
  37.         static $keyboard = array(
  38. // top row
  39.                 '1' => array( '2', 'q' ),
  40.                 '2' => array( '1', 'q', 'w', '3' ),
  41.                 '3' => array( '2', 'w', 'e', '4' ),
  42.                 '4' => array( '3', 'e', 'r', '5' ),
  43.                 '5' => array( '4', 'r', 't', '6' ),
  44.                 '6' => array( '5', 't', 'y', '7' ),
  45.                 '7' => array( '6', 'y', 'u', '8' ),
  46.                 '8' => array( '7', 'u', 'i', '9' ),
  47.                 '9' => array( '8', 'i', 'o', '0' ),
  48.                 '0' => array( '9', 'o', 'p', '-' ),
  49.                 '-' => array( '0', 'p' ),
  50. // 2nd from top
  51.                 'q' => array( '1', '2', 'w', 'a' ),
  52.                 'w' => array( 'q', 'a', 's', 'e', '3', '2' ),
  53.                 'e' => array( 'w', 's', 'd', 'r', '4', '3' ),
  54.                 'r' => array( 'e', 'd', 'f', 't', '5', '4' ),
  55.                 't' => array( 'r', 'f', 'g', 'y', '6', '5' ),       
  56.                 'y' => array( 't', 'g', 'h', 'u', '7', '6' ),
  57.                 'u' => array( 'y', 'h', 'j', 'i', '8', '7' ),
  58.                 'i' => array( 'u', 'j', 'k', 'o', '9', '8' ),
  59.                 'o' => array( 'i', 'k', 'l', 'p', '0', '9' ),
  60.                 'p' => array( 'o', 'l', '-', '0' ),
  61. // home row
  62.                 'a' => array( 'z', 's' , 'w', 'q' ),
  63.                 's' => array( 'a', 'z', 'x', 'd', 'e', 'w' ),
  64.                 'd' => array( 's', 'x', 'c', 'f', 'r', 'e' ),
  65.                 'f' => array( 'd', 'c', 'v', 'g', 't', 'r' ),
  66.                 'g' => array( 'f', 'v', 'b', 'h', 'y', 't' ),
  67.                 'h' => array( 'g', 'b', 'n', 'j', 'u', 'y' ),
  68.                 'j' => array( 'h', 'n', 'm', 'k', 'i', 'u' ),
  69.                 'k' => array( 'j', 'm', 'l', 'o', 'i' ),
  70.                 'l' => array( 'k', 'p', 'o' ),
  71. // bottom row
  72.                 'z' => array( 'x', 's', 'a' ),
  73.                 'x' => array( 'z', 'c', 'd', 's' ),
  74.                 'c' => array( 'x', 'v', 'f', 'd' ),
  75.                 'v' => array( 'c', 'b', 'g', 'f' ),
  76.                 'b' => array( 'v', 'n', 'h', 'g' ),
  77.                 'n' => array( 'b', 'm', 'j', 'h' ),
  78.                 'm' => array( 'n', 'k', 'j' )
  79.         );

  80.         function getAllTypos( $word )
  81.         {
  82.                 $typos = array();

  83.                 $typos = array_merge( $typos, cTypoGenerator::getWrongKeyTypos($word));
  84.                 $typos = array_merge( $typos, cTypoGenerator::getMissedCharTypos($word));
  85.                 $typos = array_merge( $typos, cTypoGenerator::getTransposedCharTypos( $word ));
  86.                 $typos = array_merge( $typos, cTypoGenerator::getDoubleCharTypos( $word ));

  87.                 return $typos;
  88.         }

  89. // accepts a string
  90. // returns array of likely single "wrong key" typos
  91. // arrays contain only characters that are valid domain names

  92.         function getWrongKeyTypos( $word )
  93.         {
  94.                 $word = strtolower( $word );
  95.                 $typos = array();
  96.                 $length = strlen( $word );
  97. // check each character
  98.                 for( $i = 0; $i < $length; $i++ )
  99.                 {
  100. // if character has replacements then create all replacements
  101.                         if( cTypoGenerator::$keyboard[$word{$i}] )
  102.                         {
  103. // temp word for manipulating
  104.                                 $tempWord = $word;
  105.                                 foreach( cTypoGenerator::$keyboard[$word{$i}] as $char )
  106.                                 {
  107.                                         $tempWord{$i} = $char;                       
  108.                                         array_push( $typos, $tempWord );
  109.                                 }
  110.                         }
  111.                 }

  112.                 return $typos;
  113.         }



  114. // accepts a string
  115. // returns array of likely single missed character typos
  116. // arrays contain only characters that are valid domain names
  117.         function getMissedCharTypos( $word )
  118.         {
  119.                 $word = strtolower( $word );
  120.                 $typos = array();
  121.                 $length = strlen( $word );
  122. // check each character
  123.                 for( $i = 0; $i < $length; $i++ )
  124.                 {
  125.                         $tempWord = '';
  126.                         if( $i == 0 )
  127.                         {
  128. // at first character
  129.                                 $tempWord = substr( $word, ( $i + 1 ) );

  130.                         } else if ( ( $i + 1 ) == $length ) {
  131. // at last character
  132.                                 $tempWord = substr( $word, 0,  $i  ) ;

  133.                         } else {
  134. // in between
  135.                                 $tempWord = substr( $word, 0,  $i  ) ;
  136.                                 $tempWord .= substr( $word, ( $i + 1 )) ;

  137.                         }
  138.                         array_push( $typos, $tempWord );
  139.                 }

  140.                 return $typos;
  141.         }


  142. // accepts a string
  143. // returns array of likely transposed character typos
  144. // arrays contain only characters that are valid domain names
  145.         function getTransposedCharTypos( $word )
  146.         {
  147.                 $word = strtolower( $word );
  148.                 $typos = array();
  149.                 $length = strlen( $word );
  150. // check each character
  151.                 for( $i = 0; $i < $length; $i++ )
  152.                 {
  153.                         if( ( $i + 1 ) == $length )
  154.                         {
  155. // could have simplified the test by throwing it in the for loop but I didn't to keep it readable
  156. // at the end no transposition
  157.                         } else {
  158.                                 $tempWord = $word;
  159.                                 $tempChar = $tempWord{$i};                       
  160.                                 $tempWord{$i} = $tempWord{( $i + 1 )} ;                       
  161.                                 $tempWord{( $i + 1 )} = $tempChar;                       
  162.                                 array_push( $typos, $tempWord );
  163.                         }
  164.                 }

  165.                 return $typos;
  166.         }





  167. // accepts a string
  168. // returns array of likely double entered character typos
  169. // arrays contain only characters that are valid domain names
  170.         function getDoubleCharTypos( $word )
  171.         {
  172.                 $word = strtolower( $word );
  173.                 $typos = array();
  174.                 $length = strlen( $word );
  175. // check each character
  176.                 for( $i = 0; $i < $length; $i++ )
  177.                 {
  178. // get first part of word
  179.                         $tempWord = substr( $word, 0, ($i+1) );
  180. // add a character
  181.                         $tempWord .= $word{$i};
  182. // add last part of strin if there is any
  183.                         if( $i == ( $length - 1 ))
  184.                         {
  185. // do nothing we are at the end
  186.                         } else {
  187. // add the end part of the string
  188.                                 $tempWord .= substr( $word, ($i+1));
  189.                         }
  190.                         array_push( $typos, $tempWord );
  191.                 }

  192.                 return $typos;
  193.         }


  194. }


  195. ?>

复制代码
tonybuy 发表于 2008-9-11 00:06:11 | 显示全部楼层
牛人。。如此复杂,感情支持
您需要登录后才可以回帖 登录 | 注册账户

本版积分规则

存档|黑屋|手机|网络实验室 本站服务器由美国合租以及IDCLayer国际数据提供!!!

GMT+8, 2026-6-4 14:41 , Processed in 0.010571 second(s), 9 queries , Gzip On, Redis On.

Powered by Discuz! X3.5

© 2001-2025 Discuz! Team.

快速回复 返回顶部 返回列表