::-- ZoomQuiet [2008-03-11 09:58:19]

CPUG联盟::

CPUG::门户plone

BPUG

SPUG

ZPUG

SpreadPython Python宣传

1. 断点续传&多线程下载

CPyUG:42890~ python 实现断点续传和多线程下载

1.1. C实现的

{{{unyi sun <[email protected]> reply-to [email protected], to [email protected], date Tue, Mar 11, 2008 at 1:20 PM subject [CPyUG:42994] Re: python 实现断点续传和多线程下载

}}}我用C写的一个下载器,支持断点续传了多线程以及服务器Redirect,用Python写的话,应该更简洁

   1 /**
   2 ** description:qdown is a multithread downloader
   3 ** author:Sunjoy
   4 ** from:ICT.CAS.
   5 ** date:2007-9-10
   6 **
   7 */
   8 
   9 #include <stdio.h>
  10 #include <stdlib.h>
  11 #include <unistd.h>
  12 #include <string.h>
  13 #include <pthread.h>
  14 #include <sys/types.h>
  15 #include <sys/socket.h>
  16 #include <netinet/in.h>
  17 #include <arpa/inet.h>
  18 #include <netdb.h>
  19 #define MAX_THREAD 100
  20 
  21 typedef struct URLInfo
  22 {
  23     char schema[8];
  24     char host[256];
  25     char host_name[256];
  26     unsigned int port;
  27     char file[256];
  28 }URLInfo;
  29 
  30 typedef struct Connection
  31 {
  32     int sock;
  33     URLInfo url_info;
  34     int avaliable;
  35 }Connection;
  36 
  37 typedef struct Resource
  38 {
  39     char file_url[256];
  40     int file_size;
  41     char file_name[256];
  42 }Resource;
  43 
  44 typedef struct ThreadArg
  45 {
  46     Resource* res;
  47     int start_pos;
  48     int limit;
  49     int no;
  50 }ThreadArg;
  51 
  52 typedef struct BreakPoint
  53 {
  54     int downloaded;
  55     int thread_amount;
  56     int tasks[MAX_THREAD][2];
  57    
  58 }BreakPoint;
  59 
  60 pthread_mutex_t g_mut;
  61 int g_total=0;
  62 int g_downloaded=0;
  63 BreakPoint g_breakpoint;
  64 
  65 URLInfo parse_url(const char *url);
  66 Connection open_url(const char * url);
  67 Resource get_resource(const char *url);
  68 void join_url(const char* old_url,const char* redirect,char * new_url);
  69 void download(const char* url,int thread_amount,const char* file_name);
  70 void* download_part(void* args);
  71 void* monitor(void *args);
  72 void store_breakpoint(char * cfgName);
  73 
  74 void store_breakpoint(char * cfgName)
  75 {
  76     int z;
  77     FILE* f;
  78     f=fopen(cfgName,"w");
  79     fprintf(f,"%d\n",g_breakpoint.downloaded);
  80     fprintf(f,"%d\n",g_breakpoint.thread_amount);
  81     for(z=0;z<g_breakpoint.thread_amount;z++){
  82        fprintf(f,"%d-%d\n",g_breakpoint.tasks[z][0],g_breakpoint.tasks[z][1]);
  83     }
  84     fclose(f);
  85 }
  86 
  87 void join_url(const char* old_url,const char* redirect,char * new_url)
  88 {
  89     char stack1[256][256]={0},stack2[256][256]={0};
  90     int i=0,j=0,p1=0,p2=0;
  91     char seg[256]={0};
  92     URLInfo temp_urlinfo;
  93    
  94     memset(new_url,0,sizeof(new_url));
  95     if(strstr(redirect,"://")!=NULL){
  96         strcpy(new_url,redirect);
  97     }
  98     else{
  99         while(1){
 100             while(redirect[i]!='/' && redirect[i]!=0){
 101                 seg[j++]=redirect[i++];
 102             }   
 103             strcpy(stack1[p1++],seg);
 104             memset(seg,0,sizeof(seg));
 105             j=0;
 106             if(redirect[i]==0)
 107                 break;
 108             i++;
 109         }
 110         for(i=0;i<p1;i++){
 111             if(!strcmp(stack1[i],"..") && p2>-1)
 112                 p2--;
 113             else if(strcmp(stack1[i],".")){
 114                 strcpy(stack2[p2++],stack1[i]);
 115             }
 116         }
 117         //printf("##%s\n",stack2[0]);
 118   
 119         if(!strcmp(stack2[0],"")){
 120             temp_urlinfo=parse_url(old_url);
 121             sprintf(new_url,"%s://%s:%d/",temp_urlinfo.schema,temp_urlinfo.host,temp_urlinfo.port);         
 122         }
 123         else{
 124             i=strlen(old_url)-1;
 125             while(old_url[i]!='/')
 126                 i--;
 127             //printf("##%c\n",old_url[i]);
 128             strncpy(new_url,old_url,i+1);
 129             new_url[i+1]=0;
 130         }
 131         //printf("##%s\n",new_url);
 132         for(j=0;j<p2-1;j++){
 133             strcat(new_url,stack2[j]);
 134             strcat(new_url,"/");
 135         }
 136         strcat(new_url,stack2[p2-1]);
 137     }
 138 }
 139 
 140 URLInfo parse_url(const char* url){
 141     int i=0,j=0;
 142     char schema[8]={0};
 143     char host[256]={0};
 144     char port[8]={0};
 145     char file[256]={0};
 146     char IP[32]={0};
 147     URLInfo url_info;
 148     struct hostent* hptr;
 149    
 150     while(url[i]!=':'){
 151         schema[j++]=url[i++];
 152     }
 153 
 154     for(i+=3,j=0;url[i]!=':' && url[i]!='/' && url[i]!=0;){
 155         host[j++]=url[i++];
 156     }
 157    
 158     if(url[i]==':'){
 159         for(i+=1,j=0;url[i]!='/';){
 160             port[j++]=url[i++];
 161         }
 162         sscanf(port,"%d",&url_info.port);
 163     }
 164     else{
 165         url_info.port=80;
 166     }
 167    
 168     if(url[i]!=0){
 169         for(j=0;url[i]!=0;){
 170             file[j++]=url[i++];
 171         }
 172     }
 173     else{
 174         file[0]='/';
 175     }
 176    
 177     strcpy(url_info.schema,schema);
 178     strcpy(url_info.file,file);
 179     strcpy(url_info.host_name,host);
 180     hptr=gethostbyname(host);
 181   
 182     if(hptr!=NULL){
 183         strcpy(url_info.host,
 184             inet_ntop(hptr->h_addrtype,*(hptr->h_addr_list),IP,sizeof(IP))
 185         );
 186     }
 187     //printf("%s\n",url_info.host);
 188     return url_info;
 189 }
 190 Connection open_url(const char* url){
 191     Connection conn;
 192     struct sockaddr_in remote_addr,local_addr;
 193 
 194     conn.avaliable=0;
 195     conn.url_info=parse_url(url);
 196    
 197     local_addr.sin_family=AF_INET;
 198     local_addr.sin_addr.s_addr=htonl(INADDR_ANY);
 199     local_addr.sin_port=htons(0);
 200     remote_addr.sin_family=AF_INET;
 201     remote_addr.sin_addr.s_addr=inet_addr(conn.url_info.host);
 202     remote_addr.sin_port=htons(conn.url_info.port);
 203    
 204     conn.sock=socket(AF_INET,SOCK_STREAM,0);
 205     if(bind(conn.sock,
 206         (struct sockaddr*)&local_addr,
 207         sizeof(local_addr))<0){
 208             printf("bind error\n");
 209     }
 210    
 211    
 212    
 213     if(conn.sock){
 214         if(
 215             connect(conn.sock,(struct sockaddr*)&remote_addr,sizeof(remote_addr))!=-1
 216         ){
 217             conn.avaliable=1;
 218         }
 219     }
 220    
 221     return conn;
 222 }
 223 
 224 Resource get_resource(const char* url){
 225     char pack[1024]={0};
 226     char buf[1024]={0};
 227     char redirect[256]={0},new_url[256]={0},old_url[256]={0};
 228     static int redirect_count=0;
 229     char* i;
 230     char* j;
 231     char* z;
 232     Resource res;
 233    
 234     Connection conn=open_url(url);
 235     if(!conn.avaliable){
 236         return res;
 237     }
 238     sprintf(pack,"GET %s HTTP/1.1\nHost: %s\nAccept: */*\nReferer: http://%s\nUser-Agent: Mozilla/4.0 (compatible; MSIE 5.00; Windows 98)\nPragma: no-cache\nCache-Control: no-cache\nConnection: close\n\n",conn.url_info.file,conn.url_info.host_name,conn.url_info.host_name);
 239     send(conn.sock,pack,strlen(pack),0);
 240     recv(conn.sock,buf,sizeof(buf),0);
 241     //printf("%s\n",buf);
 242     if(strstr(buf,"HTTP/1.1 404")!=NULL || strstr(buf,"HTTP/1.0 404")!=NULL){
 243        return res;
 244     }
 245     i=(char *)strstr(buf,"Location:");
 246     if(i!=NULL && redirect_count<5){
 247         sscanf(i,"Location: %s",redirect);
 248         sprintf(old_url,"%s://%s:%d%s",conn.url_info.schema,conn.url_info.host_name,conn.url_info.port,conn.url_info.file);
 249         join_url(old_url,redirect,new_url);
 250         //printf("@#%s\n",new_url);
 251         redirect_count++;
 252         return get_resource(new_url);
 253     }
 254     i=(char *)strstr(buf,"Content-Length:");
 255     if(i!=NULL){
 256         sscanf(i,"Content-Length: %d",&res.file_size);
 257     }
 258     strcpy(res.file_url,url);
 259     //printf("#%d\n",res.file_size);
 260     for(z=(char*)url;(j=strstr(z,"/"))!=NULL;){
 261         z=j+sizeof(char);
 262     }
 263     strcpy(res.file_name,z);
 264     close(conn.sock);
 265     return res;
 266 }
 267 
 268 void* download_part(void * args)
 269 {
 270     ThreadArg* targ=(ThreadArg*)args;
 271     Connection conn;
 272     FILE* f=NULL;
 273     char pack[1024]={0};
 274     char buf[1024]={0};
 275     int i=0,ct=0;
 276     char* body=NULL;
 277     //printf("%s,%d-%d\n",targ->res->file_url, targ->start_pos,targ->limit);
 278     conn=open_url(targ->res->file_url);
 279     while(!conn.avaliable){
 280         sleep(1);
 281         conn=open_url(targ->res->file_url);
 282     }
 283     if(conn.avaliable){
 284 
 285         f=fopen(targ->res->file_name,"rb+");
 286         fseek(f,targ->start_pos,0);
 287         sprintf(pack,"GET %s HTTP/1.1\nHost: %s\nAccept: */*\nReferer: http://%s\nUser-Agent: Mozilla/4.0 (compatible; MSIE 5.00; Windows 98)\nRange: bytes=%d-%d\nPragma: no-cache\nCache-Control: no-cache\n\n",conn.url_info.file,conn.url_info.host_name,conn.url_info.host_name,targ->start_pos,targ->start_pos+targ->limit-1);
 288         //printf("%s",pack);
 289 begin_down:
 290         send(conn.sock,pack,strlen(pack),0);
 291         i=recv(conn.sock,buf,sizeof(buf),0);
 292        
 293         if(strstr(buf,"HTTP/1.1 206")==NULL && strstr(buf,"HTTP/1.0 206")==NULL && strstr(buf,"HTTP/1.1 200")==NULL && strstr(buf,"HTTP/1.0 200")==NULL){
 294             sleep(2);
 295             memset(buf,0,sizeof(buf));
 296             conn=open_url(targ->res->file_url);
 297             goto begin_down;
 298         }
 299         //printf("##%s\n",body);
 300         body=strstr(buf,"\r\n\r\n")+4;
 301         if(body!=NULL){
 302             i=i-(body-buf);
 303             fwrite(body,sizeof(char),i,f);
 304             //printf("@@@@%x\n",buf);
 305             fflush(f);
 306             ct+=i;
 307             pthread_mutex_lock(&g_mut);
 308             g_downloaded+=i;
 309             pthread_mutex_unlock(&g_mut);
 310            
 311             while(ct< targ->limit){
 312                 i=recv(conn.sock,buf,sizeof(buf),0);
 313                 if(i==0){
 314                     fclose(f);
 315                     conn.avaliable=0;
 316                     while(!conn.avaliable){
 317                         sleep(2);
 318                         //printf("waiting...\n");
 319                         conn=open_url(targ->res->file_url);
 320                     }
 321                     memset(pack,0,sizeof(pack));
 322                     memset(buf,0,sizeof(buf));
 323                     sprintf(pack,"GET %s HTTP/1.1\nHost: %s\nAccept: */*\nReferer: http://%s\nUser-Agent: Mozilla/4.0 (compatible; MSIE 5.00; Windows 98)\nRange: bytes=%d-%d\nPragma: no-cache\nCache-Control: no-cache\n\n",conn.url_info.file,conn.url_info.host_name,conn.url_info.host_name,targ->start_pos+ct,targ->start_pos+targ->limit-1);
 324                     f=fopen(targ->res->file_name,"rb+");
 325                     fseek(f,targ->start_pos+ct,0);
 326                     goto begin_down;
 327                 }
 328                
 329                 fwrite(buf,sizeof(char),i,f);
 330                 fflush(f);
 331                 ct+=i;
 332                 pthread_mutex_lock(&g_mut);
 333                 g_downloaded+=i;
 334                 g_breakpoint.tasks[targ->no][0]=targ->start_pos+ct;
 335                 g_breakpoint.tasks[targ->no][1]=targ->limit-ct;
 336                 g_breakpoint.downloaded=g_downloaded;
 337                 pthread_mutex_unlock(&g_mut);
 338             }
 339             fclose(f);
 340             g_breakpoint.downloaded=g_downloaded;
 341             close(conn.sock);
 342         }
 343     }
 344     pthread_exit(NULL);
 345 }
 346 void* monitor(void* args){
 347     float p;
 348     int i,j,z,old;
 349     FILE* f;
 350     char cfgName[256];
 351     strcpy(cfgName,(char*)args);
 352     strcat(cfgName,".cfg");
 353    
 354     while(1){
 355         p=g_downloaded/(g_total+0.0);
 356         if(g_downloaded>=g_total)
 357                 break;
 358         i=p*100/10;
 359         if(old!=g_downloaded){
 360            
 361 
 362             printf("\r");
 363             for(j=0;j<i;j++){
 364                 printf("==");
 365             }
 366             printf("%2.0f%%",p*100);
 367             fflush(stdout);
 368        
 369             store_breakpoint(cfgName);
 370             old=g_downloaded;
 371         }
 372     }
 373     printf("\r====================100%%\n");
 374     remove(cfgName);
 375     pthread_exit(NULL);
 376 }
 377 
 378 
 379 void download(const char* url,int thread_amount,const char* file_name)
 380 {
 381     ThreadArg targs[MAX_THREAD];
 382     pthread_attr_t * thAttr = NULL;
 383     pthread_t tids[MAX_THREAD],monitor_id,controler_id;
 384     Resource res;
 385     int i,block_size,t_start_pos,t_limit;
 386     FILE* f;
 387     char cfgName[256]={0};
 388    
 389     if(thread_amount>MAX_THREAD)
 390         return;
 391     res=get_resource(url);
 392    
 393     if(!strcmp(res.file_url,""))
 394         return;
 395    
 396     if(strcmp(file_name,""))
 397         strcpy(res.file_name,file_name);
 398    
 399     if(!strcmp(res.file_name,""))
 400         strcpy(res.file_name,"default_down");
 401    
 402     if(res.file_size<1000000)
 403         thread_amount=1;
 404    
 405     block_size=res.file_size/thread_amount;
 406     pthread_mutex_init(&g_mut,NULL);
 407    
 408     strcpy(cfgName,res.file_name);
 409     strcat(cfgName,".cfg");
 410     printf("downloading %s,%d bytes... \n",res.file_name,res.file_size);
 411    
 412     if(fopen(cfgName,"r")==NULL){
 413 new_task:      
 414         f=fopen(res.file_name,"wb");
 415         if(f==NULL){
 416             strcpy(res.file_name,"default_down");
 417             f=fopen(res.file_name,"wb");
 418         }
 419         fclose(f);
 420         g_total=res.file_size;
 421 
 422         for(i=0;i<thread_amount;i++){
 423             targs[i].res=&res;
 424             targs[i].start_pos=block_size*i;
 425             targs[i].limit=block_size;
 426             if(i==thread_amount-1)
 427                 targs[i].limit+= (res.file_size%thread_amount);
 428            
 429             targs[i].no=i;
 430             g_breakpoint.tasks[i][0]=targs[i].start_pos;
 431             g_breakpoint.tasks[i][1]=block_size;
 432             pthread_create(&tids[i], thAttr, download_part, (void *)&targs[i]);
 433         }
 434        
 435     }
 436     else{
 437         f=fopen(cfgName,"r");
 438         if(fscanf(f,"%d",&g_downloaded)==-1)
 439             goto new_task;
 440         //printf("#%d\n",g_downloaded);
 441         g_total=res.file_size;
 442         fscanf(f,"%d",&thread_amount);
 443         for(i=0;i<thread_amount;i++){
 444             fscanf(f,"%d-%d",&t_start_pos,&t_limit);
 445             targs[i].res=&res;
 446             targs[i].start_pos=t_start_pos;
 447             targs[i].limit=t_limit;
 448             targs[i].no=i;
 449             g_breakpoint.tasks[i][0]=targs[i].start_pos;
 450             g_breakpoint.tasks[i][1]=t_limit;
 451             pthread_create(&tids[i], thAttr, download_part, (void *)&targs[i]);
 452         }
 453         fclose(f);
 454     }
 455    
 456     pthread_create(&monitor_id,NULL,monitor,(void *)res.file_name);
 457     g_breakpoint.thread_amount=thread_amount;
 458     g_breakpoint.downloaded=g_downloaded;
 459     //printf("#%d\n",g_downloaded);
 460     /*for(i=0;i<thread_amount;i++){
 461         pthread_join(tids[i],NULL);
 462     }*/
 463 
 464     pthread_join(monitor_id,NULL);
 465 }
 466 
 467  
 468 
 469 int main (int ac, char * av[])
 470 {
 471   int thread_amount=5;
 472   char file_name[256]={0};
 473   if(ac<2){
 474         printf("usage: qdown URL [thread_amount] [save as]\n");
 475         printf("example: qdown http://www.baidu.com/img/logo.gif 5 /home/sunjoy/log.gif\n");
 476   }
 477   else{
 478         if(ac>=3)
 479             sscanf(av[2],"%d",&thread_amount);
 480         if(ac>=4){
 481             strcpy(file_name,av[3]);
 482         }
 483         download(av[1],thread_amount,file_name);
 484        
 485   }
 486  
 487   return 0;
 488 }

PageCommentData

MiscItems/2008-03-11 (last edited 2009-12-25 07:09:18 by localhost)