cnmqw 发表于 2013-1-28 09:18:30

Arch-03-10-自动投票机器人

由于生活中需要对网站上的某张图片投票决定排名,无可避免地想起了做个自动投票机器人。
 
经过一天的尝试,三种方案:
(1)保存投票页面到本地,分析代码,直接用 js 和 ajax 循环提交,每次循环中间随机休眠几秒。一开始居然有效,不过好景不长,第二天就被网站改了页面,不能直接提交了。
 
 
<html><head>      <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">      <title>            自动投票机器人一      </title>      <link rel="stylesheet" href="style.css" type="text/css"media="screen">      <script type="text/javascript">var counters=0;var xmlHttp;function S_xmlhttprequest(){if(window.ActiveXObject){xmlHttp = new ActiveXObject("Microsoft.XMLHTTP");} else if(window.XMLHttpRequest){xmlHttp = new XMLHttpRequest();}}function vote(){url =1;//idS_xmlhttprequest();xmlHttp.open("GET","http://www.xxxx.com/work/vote.php?id="+url+"&"+Math.random(),true);xmlHttp.send(null);alert('   投票成功!\n 谢谢您的支持!');    counters++;    document.getElementById("counter").innerText=counters;    setTimeout("vote()",nextTime());}    function Workspace_OnLoad() {    setTimeout("vote()",1000);    }      function nextTime(){    var vNum=1000;vNum = Math.random();vNum = Math.round(vNum*60000);return vNum;    }      </script>    </head>      <body class="bgcolor" >      <div align="center">            <table class="bgjpg" align="center" border="0" cellpadding="0" cellspacing="0"            width="100%">                <tbody>                  <tr height="10">                        <td>                        </td>                  </tr>                  <tr>                        <td height="30">                        </td>                  </tr>                </tbody>            </table>      </div>    </body></html> 
 
(2)发现需要提交表单到服务器,而不是直接发送GET请求,并且有 cookie 检查校验码,尝试用 java 编程,好象没成功,但理论上可行。
 

[*]获取校验码图片
[*]tesseract OCR 解析校验码图片
[*]尝试提交模拟表单
[*]需要的 jar 包和 Tesseract OCR在网上搜。
 
package A;import java.io.ByteArrayInputStream;import java.io.ByteArrayOutputStream;import java.io.DataOutputStream;import java.io.File;import java.io.FileOutputStream;import java.io.IOException;import java.io.InputStream;import javax.servlet.RequestDispatcher;import javax.servlet.ServletException;import javax.servlet.http.HttpServletRequest;import javax.servlet.http.HttpServletResponse;import org.apache.commons.httpclient.Header;import org.apache.commons.httpclient.HttpClient;import org.apache.commons.httpclient.HttpException;import org.apache.commons.httpclient.HttpStatus;import org.apache.commons.httpclient.NameValuePair;import org.apache.commons.httpclient.methods.PostMethod;import com.overseas.ocr.ImageFilter;import com.overseas.ocr.ImageIOHelper;import com.overseas.sys.InitEnv;import com.overseas.util.PicUtil;import net.sourceforge.tess4j.*;public class TesseractExample {public int success=0;public static void main(String[] args) {//File imageFile = new File("eurotext.tif");//Tesseract instance = Tesseract.getInstance(); // JNA Interface Mapping// Tesseract1 instance = new Tesseract1(); // JNA Direct Mappingtry {TesseractExample t =new TesseractExample();t.doit() ;//String result = instance.doOCR(imageFile);//System.out.println(result);} catch (Exception e) {System.err.println(e.getMessage());}}private void doit() {int counter =1000000;for(int i=0;i<counter;i++){try{Thread.sleep((int)(Math.random()*50000));}catch(Exception e){e.printStackTrace();}String decodeText =decode();System.out.println("Start excute..."+i+"====="+decodeText);if(decodeText==null || decodeText.trim().length()<4 ){continue;//next times}decodeText =decodeText.trim();if(decodeText.length()>4)decodeText =decodeText.substring(0,4);try{int code =Integer.parseInt(decodeText);// submit votevote(code);System.out.println("call vote..."+code);}catch(Exception e){continue;}}}private void vote(int code) {HttpClient httpClient = new HttpClient();String url = "http://www.xxxx.com/work/chick.php?id=1";PostMethod postMethod = new PostMethod(url);//   填入各个表单域的值NameValuePair[] data = {   new NameValuePair("numtext", Integer.toString(code)),    new NameValuePair("submit", "确认"),};//   将表单的值放入postMethod中postMethod.setRequestBody(data);//   执行postMethodint statusCode = 0;try {   statusCode = httpClient.executeMethod(postMethod);} catch (HttpException e) {   // TODO Auto-generated catch block   e.printStackTrace();} catch (IOException e) {   // TODO Auto-generated catch block   e.printStackTrace();}//   HttpClient对于要求接受后继服务的请求,象POST和PUT等不能自动处理转发//   301或者302if (statusCode == HttpStatus.SC_MOVED_PERMANENTLY || statusCode == HttpStatus.SC_MOVED_TEMPORARILY){   //   从头中取出转向的地址   Header locationHeader = postMethod.getResponseHeader("location");   String location = null;   if (locationHeader != null) {    location = locationHeader.getValue();    System.out.println("The page was redirected to:" + location);   }   else {    System.err.println("Location field value is null.");   }   return;}else{         System.out.println(postMethod.getStatusLine());         String str = "";         try {               str = postMethod.getResponseBodyAsString();               System.out.println("Success! ooooooooooooooooo"+success++);         } catch (IOException e) {               // TODO Auto-generated catch block               e.printStackTrace();         }                           System.out.println("======================================================");         System.out.println(utf8Togb2312(str));}postMethod.releaseConnection();      return ;}private String decode() {String url = "http://www.xxxx.com/work/che.php?"+Math.random();InputStream instream = InitEnv.class.getResourceAsStream(InitEnv.CERTPATH);ByteArrayOutputStream outputStream = new ByteArrayOutputStream();String protocol = "http";byte[] b = null;try {if (url.startsWith("https")) {protocol = "https";}PicUtil.getPic(protocol, url, 80, "", instream,outputStream);b = outputStream.toByteArray();ImageFilter imageFilter = new ImageFilter(new ByteArrayInputStream(b));outputStream.close();ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();ImageIOHelper.createImage(imageFilter.changeGrey(),byteArrayOutputStream);File file = new File("C:\\temp\\ocr.tiff ");if (!file.exists()) {file.createNewFile();}DataOutputStream to = new DataOutputStream(new FileOutputStream(file));byteArrayOutputStream.writeTo(to);byteArrayOutputStream.close();Tesseract instance = Tesseract.getInstance();String result = instance.doOCR(file);return result;} catch (Exception e) {e.printStackTrace();return null;}}private String utf8Togb2312(String str){      StringBuffer sb = new StringBuffer();      for(int i=0; i<str.length(); i++) {          char c = str.charAt(i);          switch (c) {             case '+':               sb.append(' ');             break;             case '%':               try {                      sb.append((char)Integer.parseInt(                      str.substring(i+1,i+3),16));               }               catch (NumberFormatException e) {                     throw new IllegalArgumentException();                }                i += 2;                break;             default:                sb.append(c);                break;         }      }      // Undo conversion to external encoding      String result = sb.toString();      String res=null;      try{          byte[] inputBytes = result.getBytes("8859_1");          res= new String(inputBytes,"UTF-8");      }      catch(Exception e){}      return res;}} 
(3)既然前面两种方法都没办法成功,尝试用第三种方法吧,比较复杂,但理论上一定可用(做出来后才发现,投票时间已经结束,无法验证)
 

[*]greasemonkey
[*]ajax 取得图片
[*]上传到图片解析服务器(需要搭建专门 OCR 解析服务器)
[*]从解析服务器返回解析后的校验码
[*]填充表单校验码,提交
// ==UserScript==// @name         AutoVote// @namespace      autovote// @include      http://www.xxxx.com/work/workshow.php?id=1// @include      http://www.xxxx.com/work/index.php// @include      http://www.xxxx.com/work/show.php// ==/UserScript==var VOTE_URL='http://www.xxxx.com/work/workshow.php?id=1';var IMG_URL ='http://www.xxxx.com/work/che.php?';var DECODE_SERVER_URL ='http://localhost/decode/image';var MAX_COUNT =10;var counter =0;function start(){// 1. direct to vote pageif(counter>MAX_COUNT)    return;if(document.location.href!=VOTE_URL){document.location.href =VOTE_URL;}    document.getElementById('title').style.display="block";// 2. get code from PLUS_VOTE_SERVER    load_image();// 3. full code value// 4. submit// 5. open new window// 6. close current windows}function load_image(){if(counter>MAX_COUNT)    return;counter++;alert("2. get code from PLUS_VOTE_SERVER");var imageSrc = IMG_URL+Math.random(1);    GM_xmlhttpRequest({method: 'GET',                      url: imageSrc,                      overrideMimeType: 'text/plain; charset=x-user-defined',                      onload: function(response) { decode_image(response.responseText); }                      });    }function decode_image(data){    var textbox = document.getElementById('numtext');    textbox.value = 'working...';    upload(data);}function upload(data) {var darray=data_array(data);GM_xmlhttpRequest({    method: 'POST',    headers:{'Content-type':'application/x-www-form-urlencoded'},    url: DECODE_SERVER_URL,    data: 'data='+darray,    onload: function(response) { submit_form(response.responseText); }});}function submit_form(data){if(data==-1){load_image();}    var textbox = document.getElementById('numtext');    textbox.value = data;    var form1 = document.forms;    //alert("submit ...");    form1.submit();}function data_array(data){    var data_array = [];    for (var i = 0; i < data.length; i++)      data_array.push(data.charCodeAt(0) & 0xff);    return data_array;}start();
页: [1]
查看完整版本: Arch-03-10-自动投票机器人