本节主要内容:
掌握python多线程下载文件的方法
代码:
 
#!/bin/python
#
#site: www.jb200.com
import httplib
import urllib2
import time
from threading import Thread
from Queue import Queue
from time import sleep
proxy = 'your proxy';
opener = urllib2.build_opener( urllib2.ProxyHandler({'http':proxy}) )
urllib2.install_opener( opener )
ids = {};
for i in range(1,110):
    try:
        listUrl = "http://www.jb200.com/sort/list_8_%d.shtml" % (i);
        print listUrl;
        page = urllib2.urlopen(listUrl).read();
        speUrl = "http://www.jb200.com/soft/";
        speUrlLen = len(speUrl);
        idx = page.find(speUrl,0);
        while idx!=-1:
            dotIdx = page.find(".",idx + speUrlLen);
            if dotIdx != -1:
                id = page[idx + speUrlLen:dotIdx];
                ids[id] = 1;
            idx = page.find("http://www.jb200.com/soft/",idx + speUrlLen);
    except:
        pass;
q = Queue()
NUM = 5
failedId = [];
def do_somthing_using(id):
    try:
        url = "http://www.jb200.com/download.php?softid=%s&type=dx" % (id);
        h2 = httplib.HTTPConnection("your proxy", "you port");
        h2.request("HEAD", url);
        resp = h2.getresponse();
        header = resp.getheaders();
        location = header[3][1];        
        sContent = urllib2.urlopen(location).read();
        savePath = "C:someweb%s.rar" % (id);
        file=open(savePath,'wb');
        file.write(sContent);
        file.close();   
        print savePath + " saved";
    except:
        pass;
def working():
    while True:
        arguments = q.get()
        do_somthing_using(arguments)
        sleep(1)
        q.task_done()
for i in range(NUM):
    t = Thread(target=working)
    t.setDaemon(True)
    t.start()
for id in ids:
    q.put(id)
q.join()