@tenlee
2015-08-05T10:53:52.000000Z
字数 1677
阅读 2494
Python
通过对杭电OJ的观察,很容易找到规律,根据此规则写相应的代码.
Requests 实现模拟登陆
需要用 BeautifulSoup 处理 status 页面的表格, 获得runid,problemid,代码链接,其实runid就是代码链接
#!/usr/bin/env python3#coding=utf-8from bs4 import BeautifulSoupimport requestsimport timedef saveFile(data, fname = "temp.html"):with open(fname, "w") as f:f.write(data)print(fname + '保存成功')def getCode(codeurl, proid):url = hdu_url + codeurlcode_html = session.get(url, cookies=cookieJar)code_html.encoding = 'gb2312'data = code_html.textsoup = BeautifulSoup(data) #初始化code = soup.textarea.text #代码区域的标签tagsaveFile(code, 'HDU'+proid + '.cpp') #保存路径以及文件名,我的格式是当前目录,文件名规则为HDU1001.cppdef getStatus():first = ''vis = set() #标记是否保存过while(True):payload = {'first':first, "user":uname, "pid":"", "lang":"", "status":5}status_html = session.get(status_url, cookies=cookieJar, params=payload)print(status_html.url)status_html.encoding = 'gb2312' #设置编码data = status_html.text #转换成strsoup = BeautifulSoup(data) #初始化runid = "" #每次将first初始化为空for table in soup.findAll('table'):for row in table.findAll('tr'):i = 100for tr in row.findAll('td'):if(len(tr.text) == 8 and '1' in tr.text): #找到RunIdrunid = tr.texti = 0if(i == 3):proid = tr.textif(proid in vis):breakelse:vis |= {proid} #保存过的代码,不再重新保存,节省时间if(i == 6):getCode(tr.a.get('href'), proid) #获取到代码的链接,保存代码time.sleep(1) #休眠一秒,不然服务器反应不过来i += 1if(runid == ''): #本页没有runid, 说明是最后一页breakfirst = str(int(runid)-1) #下一页 是以本页最后一个Runid - 1,规律uname = input("请输入用户名: ")upass = input("请输入密码: ")hdu_url = "http://acm.hdu.edu.cn/"login_url = hdu_url + "userloginex.php?action=login"status_url = hdu_url + "status.php"login_data = {'username': uname, "userpass": upass,"login":"Sign In"}#保持会话cookieJar = requests.cookies.RequestsCookieJar()session = requests.Session()#登陆login_resp = session.request("POST", login_url,cookies=cookieJar, data=login_data)getStatus()