home *** CD-ROM | disk | FTP | other *** search
/ Chip 2004 July / CMCD0704.ISO / Software / Freeware / Comunicatii / htttrack / httrack-3.32-2.exe / {app} / src / htsparse.c < prev    next >
Encoding:
C/C++ Source or Header  |  2004-05-08  |  177.6 KB  |  4,100 lines

  1. /* ------------------------------------------------------------ */
  2. /*
  3. HTTrack Website Copier, Offline Browser for Windows and Unix
  4. Copyright (C) Xavier Roche and other contributors
  5.  
  6. This program is free software; you can redistribute it and/or
  7. modify it under the terms of the GNU General Public License
  8. as published by the Free Software Foundation; either version 2
  9. of the License, or any later version.
  10.  
  11. This program is distributed in the hope that it will be useful,
  12. but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14. GNU General Public License for more details.
  15.  
  16. You should have received a copy of the GNU General Public License
  17. along with this program; if not, write to the Free Software
  18. Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  19.  
  20.  
  21. Important notes:
  22.  
  23. - We hereby ask people using this source NOT to use it in purpose of grabbing
  24. emails addresses, or collecting any other private information on persons.
  25. This would disgrace our work, and spoil the many hours we spent on it.
  26.  
  27.  
  28. Please visit our Website: http://www.httrack.com
  29. */
  30.  
  31.  
  32. /* ------------------------------------------------------------ */
  33. /* File: htsparse.c parser                                      */
  34. /*       html/javascript/css parser                             */
  35. /*       and other parser routines                              */
  36. /* Author: Xavier Roche                                         */
  37. /* ------------------------------------------------------------ */
  38.  
  39.  
  40. /* Internal engine bytecode */
  41. #define HTS_INTERNAL_BYTECODE
  42.  
  43. #include <fcntl.h>
  44. #include <ctype.h>
  45.  
  46. /* File defs */
  47. #include "htscore.h"
  48.  
  49. /* specific definitions */
  50. #include "htsbase.h"
  51. #include "htsnet.h"
  52. #include "htsbauth.h"
  53. #include "htsmd5.h"
  54. #include "htsindex.h"
  55.  
  56. /* external modules */
  57. #include "htsmodules.h"
  58.  
  59. // htswrap_add
  60. #include "htswrap.h"
  61.  
  62. // parser
  63. #include "htsparse.h"
  64.  
  65.  
  66. // specific defines
  67. #define urladr   (liens[ptr]->adr)
  68. #define urlfil   (liens[ptr]->fil)
  69. #define savename (liens[ptr]->sav)
  70. #define parenturladr   (liens[liens[ptr]->precedent]->adr)
  71. #define parenturlfil   (liens[liens[ptr]->precedent]->fil)
  72. #define parentsavename (liens[liens[ptr]->precedent]->sav)
  73. #define relativeurladr   ((!parent_relative)?urladr:parenturladr)
  74. #define relativeurlfil   ((!parent_relative)?urlfil:parenturlfil)
  75. #define relativesavename ((!parent_relative)?savename:parentsavename)
  76.  
  77. #define test_flush if (opt->flush) { if (opt->log) { fflush(opt->log); } if (opt->errlog) { fflush(opt->errlog);  } }
  78.  
  79. // does nothing
  80. #define XH_uninit do {} while(0)
  81.  
  82. // version optimisΘe, qui permet de ne pas toucher aux html non modifiΘs (update)
  83. #define REALLOC_SIZE 8192
  84. #define HT_ADD_CHK(A) if (((int) (A)+ht_len+1) >= ht_size) { \
  85.   ht_size=(A)+ht_len+REALLOC_SIZE; \
  86.   ht_buff=(char*) realloct(ht_buff,ht_size); \
  87.   if (ht_buff==NULL) { \
  88.   printf("PANIC! : Not enough memory [%d]\n",__LINE__); \
  89.   XH_uninit; \
  90.   abortLogFmt("not enough memory for current html document in HT_ADD_CHK : realloct(%d) failed" _ ht_size); \
  91.   exit(1); \
  92.   } \
  93.   } \
  94.   ht_len+=A;
  95. #define HT_ADD_ADR \
  96.   if ((opt->getmode & 1) && (ptr>0)) { \
  97.   int i=((int) (adr - lastsaved)),j=ht_len; HT_ADD_CHK(i) \
  98.   memcpy(ht_buff+j, lastsaved, i); \
  99.   ht_buff[j+i]='\0'; \
  100.   lastsaved=adr; \
  101.   }
  102. #define HT_ADD(A) \
  103.   if ((opt->getmode & 1) && (ptr>0)) { \
  104.   int i=strlen(A),j=ht_len; \
  105.   if (i) { \
  106.   HT_ADD_CHK(i) \
  107.   memcpy(ht_buff+j, A, i); \
  108.   ht_buff[j+i]='\0'; \
  109.   } }
  110. #define HT_ADD_START \
  111.   int ht_size=(int)(r->size*5)/4+REALLOC_SIZE; \
  112.   int ht_len=0; \
  113.   char* ht_buff=NULL; \
  114.   if ((opt->getmode & 1) && (ptr>0)) { \
  115.   ht_buff=(char*) malloct(ht_size); \
  116.   if (ht_buff==NULL) { \
  117.   printf("PANIC! : Not enough memory [%d]\n",__LINE__); \
  118.   XH_uninit; \
  119.   abortLogFmt("not enough memory for current html document in HT_ADD_START : malloct(%d) failed" _ ht_size); \
  120.   exit(1); \
  121.   } \
  122.   ht_buff[0]='\0'; \
  123.   }
  124. #define HT_ADD_END { \
  125.   int ok=0;\
  126.   if (ht_buff) { \
  127.   INTsys file_len=(INTsys) strlen(ht_buff);\
  128.   char digest[32+2];\
  129.   digest[0]='\0';\
  130.   domd5mem(ht_buff,file_len,digest,1);\
  131.   if (fsize(fconv(savename))==file_len) { \
  132.   int mlen = 0;\
  133.   char* mbuff;\
  134.   cache_readdata(cache,"//[HTML-MD5]//",savename,&mbuff,&mlen);\
  135.   if (mlen) mbuff[mlen]='\0';\
  136.   if ((mlen == 32) && (strcmp(((mbuff!=NULL)?mbuff:""),digest)==0)) {\
  137.   ok=1;\
  138.   if ( (opt->debug>1) && (opt->log!=NULL) ) {\
  139.   fspc(opt->log,"debug"); fprintf(opt->log,"File not re-written (md5): %s"LF,savename);\
  140.   test_flush;\
  141.   }\
  142.   } else {\
  143.   ok=0;\
  144.   } \
  145.   }\
  146.   if (!ok) { \
  147.   fp=filecreate(savename); \
  148.   if (fp) { \
  149.   if (file_len>0) {\
  150.   if ((INTsys)fwrite(ht_buff,1,file_len,fp) != file_len) { \
  151.   int fcheck;\
  152.   if ((fcheck=check_fatal_io_errno())) {\
  153.   opt->state.exit_xh=-1;\
  154.   }\
  155.   if (opt->errlog) {   \
  156.   fspc(opt->errlog,"error"); fprintf(opt->errlog,"Unable to write HTML file %s: %s"LF, savename, strerror(errno));\
  157.   if (fcheck) {\
  158.   fspc(opt->errlog,"error");\
  159.   fprintf(opt->errlog,"* * Fatal write error, giving up"LF);\
  160.   }\
  161.   test_flush;\
  162.   }\
  163.   }\
  164.   }\
  165.   fclose(fp); fp=NULL; \
  166.   if (strnotempty(r->lastmodified)) \
  167.   set_filetime_rfc822(savename,r->lastmodified); \
  168.   } else {\
  169.   int fcheck;\
  170.   if ((fcheck=check_fatal_io_errno())) {\
  171.   opt->state.exit_xh=-1;\
  172.   }\
  173.   if (opt->errlog) { \
  174.   fspc(opt->errlog,"error");\
  175.   fprintf(opt->errlog,"Unable to save file %s : %s"LF, savename, strerror(errno));\
  176.   if (fcheck) {\
  177.   fspc(opt->errlog,"error");\
  178.   fprintf(opt->errlog,"* * Fatal write error, giving up"LF);\
  179.   }\
  180.   test_flush;\
  181.   }\
  182.   }\
  183.   } else {\
  184.   filenote(savename,NULL); \
  185.   }\
  186.   if (cache->ndx)\
  187.     cache_writedata(cache->ndx,cache->dat,"//[HTML-MD5]//",savename,digest,(int)strlen(digest));\
  188.   } \
  189.   freet(ht_buff); ht_buff=NULL; \
  190.   }
  191. #define HT_ADD_FOP 
  192.  
  193. // COPY IN HTSCORE.C
  194. #define HT_INDEX_END do { \
  195. if (!makeindex_done) { \
  196. if (makeindex_fp) { \
  197.   char tempo[1024]; \
  198.   if (makeindex_links == 1) { \
  199.     sprintf(tempo,"<meta HTTP-EQUIV=\"Refresh\" CONTENT=\"0; URL=%s\">"CRLF,makeindex_firstlink); \
  200.   } else \
  201.     tempo[0]='\0'; \
  202.   fprintf(makeindex_fp,template_footer, \
  203.     "<!-- Mirror and index made by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->", \
  204.     tempo \
  205.     ); \
  206.   fflush(makeindex_fp); \
  207.   fclose(makeindex_fp);  /* α ne pas oublier sinon on passe une nuit blanche */  \
  208.   makeindex_fp=NULL; \
  209.   usercommand(opt,0,NULL,fconcat(opt->path_html,"index.html"),"primary","primary");  \
  210. } \
  211. } \
  212. makeindex_done=1;    /* ok c'est fait */  \
  213. } while(0)
  214.  
  215. // Enregistrement d'un lien:
  216. // on calcule la taille nΘcessaire: taille des 3 chaεnes α stocker (taille forcΘe paire, plus 2 octets de sΘcuritΘ)
  217. // puis on vΘrifie qu'on a assez de marge dans le buffer - sinon on en rΘalloue un autre
  218. // enfin on Θcrit α l'adresse courante du buffer, qu'on incrΘmente. on dΘcrΘmente la taille dispo d'autant ensuite
  219. // codebase: si non nul et si .class stockee on le note pour chemin primaire pour classes
  220. // FA,FS: former_adr et former_fil, lien original
  221. #if HTS_HASH
  222. #define liens_record_sav_len(A) 
  223. #else
  224. #define liens_record_sav_len(A) (A)->sav_len=strlen((A)->sav)
  225. #endif
  226.  
  227. // COPIE DE HTSCORE.C
  228. #define liens_record(A,F,S,FA,FF) { \
  229. int notecode=0; \
  230. int lienurl_len=((sizeof(lien_url)+HTS_ALIGN-1)/HTS_ALIGN)*HTS_ALIGN,\
  231.   adr_len=strlen(A),\
  232.   fil_len=strlen(F),\
  233.   sav_len=strlen(S),\
  234.   cod_len=0,\
  235.   former_adr_len=strlen(FA),\
  236.   former_fil_len=strlen(FF); \
  237. if (former_adr_len>0) {\
  238.   former_adr_len=(former_adr_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN*2; \
  239.   former_fil_len=(former_fil_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN*2; \
  240. } else former_adr_len=former_fil_len=0;\
  241. if (strlen(F)>6) if (strnotempty(codebase)) if (strfield(F+strlen(F)-6,".class")) { notecode=1; \
  242. cod_len=strlen(codebase); cod_len=(cod_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN*2; } \
  243. adr_len=(adr_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN*2; fil_len=(fil_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN*2; sav_len=(sav_len/HTS_ALIGN)*HTS_ALIGN+HTS_ALIGN*2; \
  244. if ((int) lien_size < (int) (adr_len+fil_len+sav_len+cod_len+former_adr_len+former_fil_len+lienurl_len)) { \
  245. lien_buffer=(char*) ((void*) calloct(add_tab_alloc,1)); \
  246. lien_size=add_tab_alloc; \
  247. if (lien_buffer!=NULL) { \
  248. liens[lien_tot]=(lien_url*) (void*) lien_buffer; lien_buffer+=lienurl_len; lien_size-=lienurl_len; \
  249. liens[lien_tot]->firstblock=1; \
  250. } \
  251. } else { \
  252. liens[lien_tot]=(lien_url*) (void*) lien_buffer; lien_buffer+=lienurl_len; lien_size-=lienurl_len; \
  253. liens[lien_tot]->firstblock=0; \
  254. } \
  255. if (liens[lien_tot]!=NULL) { \
  256. liens[lien_tot]->adr=lien_buffer; lien_buffer+=adr_len; lien_size-=adr_len; \
  257. liens[lien_tot]->fil=lien_buffer; lien_buffer+=fil_len; lien_size-=fil_len; \
  258. liens[lien_tot]->sav=lien_buffer; lien_buffer+=sav_len; lien_size-=sav_len; \
  259. liens[lien_tot]->cod=NULL; \
  260. if (notecode) { liens[lien_tot]->cod=lien_buffer; lien_buffer+=cod_len; lien_size-=cod_len; strcpybuff(liens[lien_tot]->cod,codebase); } \
  261. if (former_adr_len>0) {\
  262. liens[lien_tot]->former_adr=lien_buffer; lien_buffer+=former_adr_len; lien_size-=former_adr_len; \
  263. liens[lien_tot]->former_fil=lien_buffer; lien_buffer+=former_fil_len; lien_size-=former_fil_len; \
  264. strcpybuff(liens[lien_tot]->former_adr,FA); \
  265. strcpybuff(liens[lien_tot]->former_fil,FF); \
  266. }\
  267. strcpybuff(liens[lien_tot]->adr,A); \
  268. strcpybuff(liens[lien_tot]->fil,F); \
  269. strcpybuff(liens[lien_tot]->sav,S); \
  270. liens_record_sav_len(liens[lien_tot]); \
  271. hash_write(hashptr,lien_tot,opt->urlhack);  \
  272. } \
  273. }
  274.  
  275. #define ENGINE_LOAD_CONTEXT() \
  276.   lien_url** liens = (lien_url**) str->liens; \
  277.   httrackp* opt = (httrackp*) str->opt; \
  278.   lien_back* back = (lien_back*) str->back; \
  279.   cache_back* cache = (cache_back*) str->cache; \
  280.   hash_struct* hashptr = (hash_struct*) str->hashptr; \
  281.   int back_max = str->back_max; \
  282.   int numero_passe = str->numero_passe; \
  283.   int add_tab_alloc = str->add_tab_alloc; \
  284.   /* */ \
  285.   int lien_tot = * ( (int*) (str->lien_tot_) ); \
  286.   int ptr = * ( (int*) (str->ptr_) ); \
  287.   int lien_size = * ( (int*) (str->lien_size_) ); \
  288.   char* lien_buffer = * ( (char**) (str->lien_buffer_) ); \
  289.   /* */ \
  290.   /* */ \
  291.   htsblk* r = stre->r_; \
  292.   hash_struct* hash = stre->hash_; \
  293.   int lien_max = *stre->lien_max_; \
  294.   /* */ \
  295.   int error = * stre->error_; \
  296.   int store_errpage = * stre->store_errpage_; \
  297.   char* codebase = stre->codebase; \
  298.   char* base = stre->base; \
  299.   /* */ \
  300.   int makeindex_done = *stre->makeindex_done_; \
  301.   FILE* makeindex_fp = *stre->makeindex_fp_; \
  302.   int makeindex_links = *stre->makeindex_links_; \
  303.   char* makeindex_firstlink = stre->makeindex_firstlink_; \
  304.   /* */ \
  305.   char *template_header = stre->template_header_; \
  306.   char *template_body = stre->template_body_; \
  307.   char *template_footer = stre->template_footer_; \
  308.   /* */ \
  309.   LLint stat_fragment = *stre->stat_fragment_; \
  310.   TStamp makestat_time = stre->makestat_time; \
  311.   FILE* makestat_fp = stre->makestat_fp
  312.  
  313. #define ENGINE_SAVE_CONTEXT() \
  314.   /* Apply changes */ \
  315.    * ( (int*) (str->lien_tot_) ) = lien_tot; \
  316.    * ( (int*) (str->ptr_) ) = ptr; \
  317.    * ( (int*) (str->lien_size_) ) = lien_size; \
  318.    * ( (char**) (str->lien_buffer_) ) = lien_buffer; \
  319.    /* */ \
  320.    * stre->error_ = error; \
  321.    * stre->store_errpage_ = store_errpage; \
  322.    * stre->lien_max_ = lien_max; \
  323.    /* */ \
  324.    *stre->makeindex_done_ = makeindex_done; \
  325.    *stre->makeindex_fp_ = makeindex_fp; \
  326.    *stre->makeindex_links_ = makeindex_links; \
  327.    /* */ \
  328.    *stre->stat_fragment_ = stat_fragment
  329.  
  330. #define _FILTERS     (*opt->filters.filters)
  331. #define _FILTERS_PTR (opt->filters.filptr)
  332. #define _ROBOTS      ((robots_wizard*)opt->robotsptr)
  333.  
  334. /* Apply current *adr character for the script automate */
  335. #define AUTOMATE_LOOKUP_CURRENT_ADR() do { \
  336.   if (inscript) { \
  337.     int new_state_pos; \
  338.     new_state_pos=inscript_state[inscript_state_pos][(unsigned char)*adr]; \
  339.     if (new_state_pos < 0) { \
  340.     new_state_pos=inscript_state[inscript_state_pos][INSCRIPT_DEFAULT]; \
  341.     } \
  342.     assertf(new_state_pos >= 0); \
  343.     assertf(new_state_pos*sizeof(inscript_state[0]) < sizeof(inscript_state)); \
  344.     inscript_state_pos=new_state_pos; \
  345.   } \
  346. } while(0)  
  347.  
  348. /* Increment current pointer to 'steps' characters, modifying automate if necessary */
  349. #define INCREMENT_CURRENT_ADR(steps) do { \
  350.   int steps__ = (steps); \
  351.   while(steps__ > 0) { \
  352.     adr++; \
  353.     AUTOMATE_LOOKUP_CURRENT_ADR(); \
  354.     steps__ --; \
  355.   } \
  356. } while(0)
  357.  
  358.  
  359. /* Main parser */
  360. int htsparse(htsmoduleStruct* str, htsmoduleStructExtended* stre) {
  361.   /* Load engine variables */
  362.   ENGINE_LOAD_CONTEXT();
  363.   
  364. #if HTS_ANALYSTE
  365.   if (hts_htmlcheck(r->adr,(int)r->size,urladr,urlfil)) {
  366. #endif          
  367.     FILE* fp=NULL;      // fichier Θcrit localement 
  368.     char* adr=r->adr;    // pointeur (on parcourt)
  369.     char* lastsaved;    // adresse du dernier octet sauvΘ + 1
  370.     if ( (opt->debug>1) && (opt->log!=NULL) ) {
  371.       fspc(opt->log,"debug"); fprintf(opt->log,"scan file.."LF); test_flush;
  372.     }
  373.     
  374.     
  375.     // Indexing!
  376. #if HTS_MAKE_KEYWORD_INDEX
  377.     if (opt->kindex) {
  378.       if (index_keyword(r->adr,r->size,r->contenttype,savename,opt->path_html)) {
  379.         if ( (opt->debug>1) && (opt->log!=NULL) ) {
  380.           fspc(opt->log,"debug"); fprintf(opt->log,"indexing file..done"LF); test_flush;
  381.         }
  382.       } else {
  383.         if ( (opt->debug>1) && (opt->log!=NULL) ) {
  384.           fspc(opt->log,"debug"); fprintf(opt->log,"indexing file..error!"LF); test_flush;
  385.         }
  386.       }
  387.     }
  388. #endif
  389.     
  390.     // Now, parsing
  391.     if ((opt->getmode & 1) && (ptr>0)) {  // rΘcupΘrer les html sur disque       
  392.       // crΘer le fichier html local
  393.       HT_ADD_FOP;   // Θcrire peu α peu le fichier
  394.     }
  395.     
  396.     if (!error) {
  397.       int detect_title=0;  // dΘtection  du title
  398.       int back_add_stats = opt->state.back_add_stats;
  399.       //
  400.       char* in_media=NULL; // in other media type (real media and so..)
  401.       int intag=0;         // on est dans un tag
  402.       int incomment=0;     // dans un <!--
  403.       int inscript=0;      // dans un scipt pour applets javascript)
  404.       signed char inscript_state[10][257];
  405.       typedef enum { 
  406.         INSCRIPT_START=0,
  407.         INSCRIPT_ANTISLASH,
  408.         INSCRIPT_INQUOTE,
  409.         INSCRIPT_INQUOTE2,
  410.         INSCRIPT_SLASH,
  411.         INSCRIPT_SLASHSLASH,
  412.         INSCRIPT_COMMENT,
  413.         INSCRIPT_COMMENT2,
  414.         INSCRIPT_ANTISLASH_IN_QUOTE,
  415.         INSCRIPT_ANTISLASH_IN_QUOTE2,
  416.         INSCRIPT_DEFAULT=256
  417.       } INSCRIPT;
  418.       INSCRIPT inscript_state_pos=INSCRIPT_START;
  419.       char* inscript_name=NULL; // script tag name
  420.       int inscript_tag=0;  // on est dans un <body onLoad="... terminΘ par >
  421.       char inscript_tag_lastc='\0';
  422.       // terminaison (" ou ') du "<body onLoad=.."
  423.       int inscriptgen=0;     // on est dans un code gΘnΘrant, ex aprΦs obj.write("..
  424.       //int inscript_check_comments=0, inscript_in_comments=0;    // javascript comments
  425.       char scriptgen_q='\0'; // caractΦre faisant office de guillemet (' ou ")
  426.       int no_esc_utf=0;      // ne pas echapper chars > 127
  427.       int nofollow=0;        // ne pas scanner
  428.       //
  429.       int parseall_lastc='\0';     // dernier caractΦre parsΘ pour parseall
  430.       //int parseall_incomment=0;   // dans un /* */ (exemple: a = /* URL */ "img.gif";)
  431.       //
  432.       char* intag_start=adr;
  433.       char* intag_startattr=NULL;
  434.       int intag_start_valid=0;
  435.       int intag_ctype=0;
  436.       //
  437.       int   parent_relative=0;    // the parent is the base path (.js, .css..)
  438.       HT_ADD_START;    // dΘbuter
  439.   
  440.       /* Initialize script automate for comments, quotes.. */
  441.       memset(inscript_state, 0xff, sizeof(inscript_state));
  442.       inscript_state[INSCRIPT_START][INSCRIPT_DEFAULT]=INSCRIPT_START;     /* by default, stay in START */
  443.       inscript_state[INSCRIPT_START]['\\']=INSCRIPT_ANTISLASH;             /* #1: \ escapes the next character whatever it is */
  444.       inscript_state[INSCRIPT_ANTISLASH][INSCRIPT_DEFAULT]=INSCRIPT_START;
  445.       inscript_state[INSCRIPT_START]['\'']=INSCRIPT_INQUOTE;               /* #2: ' opens quote and only ' returns to 0 */
  446.       inscript_state[INSCRIPT_INQUOTE][INSCRIPT_DEFAULT]=INSCRIPT_INQUOTE;
  447.       inscript_state[INSCRIPT_INQUOTE]['\'']=INSCRIPT_START;
  448.       inscript_state[INSCRIPT_INQUOTE]['\\']=INSCRIPT_ANTISLASH_IN_QUOTE;
  449.       inscript_state[INSCRIPT_START]['\"']=INSCRIPT_INQUOTE2;              /* #3: " opens double-quote and only " returns to 0 */
  450.       inscript_state[INSCRIPT_INQUOTE2][INSCRIPT_DEFAULT]=INSCRIPT_INQUOTE2;
  451.       inscript_state[INSCRIPT_INQUOTE2]['\"']=INSCRIPT_START;
  452.       inscript_state[INSCRIPT_INQUOTE2]['\\']=INSCRIPT_ANTISLASH_IN_QUOTE2;
  453.       inscript_state[INSCRIPT_START]['/']=INSCRIPT_SLASH;                  /* #4: / state, default to #0 */
  454.       inscript_state[INSCRIPT_SLASH][INSCRIPT_DEFAULT]=INSCRIPT_START;
  455.       inscript_state[INSCRIPT_SLASH]['/']=INSCRIPT_SLASHSLASH;             /* #5: // with only LF to escape */
  456.       inscript_state[INSCRIPT_SLASHSLASH][INSCRIPT_DEFAULT]=INSCRIPT_SLASHSLASH;
  457.       inscript_state[INSCRIPT_SLASHSLASH]['\n']=INSCRIPT_START;
  458.       inscript_state[INSCRIPT_SLASH]['*']=INSCRIPT_COMMENT;                /* #6: / * with only * / to escape */
  459.       inscript_state[INSCRIPT_COMMENT][INSCRIPT_DEFAULT]=INSCRIPT_COMMENT;
  460.       inscript_state[INSCRIPT_COMMENT]['*']=INSCRIPT_COMMENT2;             /* #7: closing comments */
  461.       inscript_state[INSCRIPT_COMMENT2][INSCRIPT_DEFAULT]=INSCRIPT_COMMENT;
  462.       inscript_state[INSCRIPT_COMMENT2]['/']=INSCRIPT_START;
  463.       inscript_state[INSCRIPT_COMMENT2]['*']=INSCRIPT_COMMENT2;
  464.       inscript_state[INSCRIPT_ANTISLASH_IN_QUOTE][INSCRIPT_DEFAULT]=INSCRIPT_INQUOTE;    /* #8: escape in "" */
  465.       inscript_state[INSCRIPT_ANTISLASH_IN_QUOTE2][INSCRIPT_DEFAULT]=INSCRIPT_INQUOTE2;  /* #9: escape in '' */
  466.  
  467.  
  468.       /* statistics */
  469.       if ((opt->getmode & 1) && (ptr>0)) { 
  470.       /*
  471.       HTS_STAT.stat_files++;
  472.       HTS_STAT.stat_bytes+=r->size;
  473.         */
  474.       }
  475.       
  476.       /* Primary list or URLs */
  477.       if (ptr == 0) {
  478.         intag=1;
  479.         intag_start_valid=0;
  480.       }
  481.       /* Check is the file is a .js file */
  482.       else if (
  483.         (compare_mime(r->contenttype, str->url_file, "application/x-javascript")!=0)
  484.         || (compare_mime(r->contenttype, str->url_file, "text/css")!=0)
  485.         ) {      /* JavaScript js file */
  486.         inscript=1;
  487.         if (opt->parsedebug) { HT_ADD("<@@ inscript @@>"); }
  488.         inscript_name="script";
  489.         intag=1;     // because aprΦs <script> on y est .. - pas utile
  490.         intag_start_valid=0;    // OUI car nous sommes dans du code, plus dans du "vrai" tag
  491.         if ((opt->debug>1) && (opt->log!=NULL)) {
  492.           fspc(opt->log,"debug"); fprintf(opt->log,"note: this file is a javascript file"LF); test_flush;
  493.         }
  494.         // for javascript only
  495.         if (compare_mime(r->contenttype, str->url_file, "application/x-javascript") != 0) {
  496.           // all links must be checked against parent, not this link
  497.           if (liens[ptr]->precedent != 0) {
  498.             parent_relative=1;
  499.           }
  500.         }
  501.       }
  502.       /* Or a real audio */
  503.       else if (compare_mime(r->contenttype, str->url_file, "audio/x-pn-realaudio")!=0) {      /* realaudio link file */
  504.         inscript=intag=0;
  505.         inscript_name="media";
  506.         intag_start_valid=0;
  507.         in_media="LNK";       // real media! -> links
  508.       } 
  509.       /* Or a m3u playlist */
  510.       else if (compare_mime(r->contenttype, str->url_file, "audio/x-mpegurl")!=0) {      /* mp3 link file */
  511.         inscript=intag=0;
  512.         inscript_name="media";
  513.         intag_start_valid=0;
  514.         in_media="LNK";       // m3u! -> links
  515.       } 
  516.       else if (compare_mime(r->contenttype, str->url_file, "application/x-authorware-map")!=0) {      /* macromedia aam file */
  517.         inscript=intag=0;
  518.         inscript_name="media";
  519.         intag_start_valid=0;
  520.         in_media="AAM";       // aam
  521.       } 
  522.       
  523.       // Detect UTF8 format
  524.       if (is_unicode_utf8((unsigned char*) r->adr, (unsigned int) r->size) == 1) {
  525.         no_esc_utf=1;
  526.       } else {
  527.         no_esc_utf=0;
  528.       }
  529.       // Hack to prevent any problems with ram files of other files
  530.       * ( r->adr + r->size ) = '\0';
  531.       
  532.       
  533.       // ------------------------------------------------------------
  534.       // analyser ce qu'il y a en mΘmoire (fichier html)
  535.       // on scanne les balises
  536.       // ------------------------------------------------------------
  537. #if HTS_ANALYSTE
  538.       _hts_in_html_done=0;     // 0% scannΘs
  539.       _hts_cancel=0;           // pas de cancel
  540.       _hts_in_html_parsing=1;  // flag pour indiquer un parsing
  541. #endif
  542.       base[0]='\0';    // effacer base-href
  543.       lastsaved=adr;
  544.       do {
  545.         int p=0;
  546.         int valid_p=0;      // force to take p even if == 0
  547.         int ending_p='\0';  // ending quote?
  548.         int archivetag_p=0;  // avoid multiple-archives with commas
  549.         int  unquoted_script=0;
  550.         INSCRIPT inscript_state_pos_prev=inscript_state_pos;
  551.         error=0;
  552.         
  553.         /* Hack to avoid NULL char problems with C syntax */
  554.         /* Yes, some bogus HTML pages can embed null chars
  555.         and therefore can not be properly handled if this hack is not done
  556.         */
  557.         if ( ! (*adr) ) {
  558.           if ( ((int) (adr - r->adr)) < r->size)
  559.             *adr=' ';
  560.         }
  561.         
  562.         
  563.         
  564.         /*
  565.         index.html built here
  566.         */
  567.         // Construction index.html (sommaire)
  568.         // Avant de tester les a href,
  569.         // Ici on teste si l'on doit construire l'index vers le(s) site(s) miroir(s)
  570.         if (!makeindex_done) {  // autoriation d'Θcrire un index
  571.           if (!detect_title) {
  572.             if (opt->depth == liens[ptr]->depth) {    // on note toujours les premiers liens
  573.               if (!in_media) {
  574.                 if (opt->makeindex && (ptr>0)) {
  575.                   if (opt->getmode & 1) {  // autorisation d'Θcrire
  576.                     p=strfield(adr,"title");  
  577.                     if (p) {
  578.                       if (*(adr-1)=='/') p=0;    // /title
  579.                     } else {
  580.                       if (strfield(adr,"/html"))
  581.                         p=-1;                    // noter, mais sans titre
  582.                       else if (strfield(adr,"body"))
  583.                         p=-1;                    // noter, mais sans titre
  584.                       else if ( ((int) (adr - r->adr) ) >= (r->size-1) )
  585.                         p=-1;                    // noter, mais sans titre
  586.                       else if ( (int) (adr - r->adr) >= r->size - 2)   // we got to hurry
  587.                         p=-1; // xxc xxc xxc
  588.                     }
  589.                   } else
  590.                     p=0;
  591.                   
  592.                   if (p) {    // ok center                            
  593.                     if (makeindex_fp==NULL) {
  594.                       verif_backblue(opt,opt->path_html);    // gΘnΘrer gif
  595.                       makeindex_fp=filecreate(fconcat(opt->path_html,"index.html"));
  596.                       if (makeindex_fp!=NULL) {
  597.                         
  598.                         // Header
  599.                         fprintf(makeindex_fp,template_header,
  600.                           "<!-- Mirror and index made by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->"
  601.                           );
  602.                         
  603.                       } else makeindex_done=-1;    // fait, erreur
  604.                     }
  605.                     
  606.                     if (makeindex_fp!=NULL) {
  607.                       char tempo[HTS_URLMAXSIZE*2];
  608.                       char s[HTS_URLMAXSIZE*2];
  609.                       char* a=NULL;
  610.                       char* b=NULL;
  611.                       s[0]='\0';
  612.                       if (p>0) {
  613.                         a=strchr(adr,'>');
  614.                         if (a!=NULL) {
  615.                           a++;
  616.                           while(is_space(*a)) a++;    // sauter espaces & co
  617.                           b=strchr(a,'<');   // prochain tag
  618.                         }
  619.                       }
  620.                       if (lienrelatif(tempo,liens[ptr]->sav,concat(opt->path_html,"index.html"))==0) {
  621.                         detect_title=1;      // ok dΘtectΘ pour cette page!
  622.                         makeindex_links++;   // un de plus
  623.                         strcpybuff(makeindex_firstlink,tempo);
  624.                         //
  625.  
  626.                         /* Hack */
  627.                         if (opt->mimehtml) {
  628.                           strcpybuff(makeindex_firstlink, "cid:primary/primary");
  629.                         }
  630.  
  631.                         if ((b==a) || (a==NULL) || (b==NULL)) {    // pas de titre
  632.                           strcpybuff(s,tempo);
  633.                         } else if ((b-a)<256) {
  634.                           b--;
  635.                           while(is_space(*b)) b--;
  636.                           strncpy(s,a,b-a+1);
  637.                           *(s+(b-a)+1)='\0';
  638.                         }
  639.                         
  640.                         // Body
  641.                         fprintf(makeindex_fp,template_body,
  642.                           tempo,
  643.                           s
  644.                           );
  645.                         
  646.                       }
  647.                     }
  648.                   }
  649.                 }
  650.               }
  651.               
  652.             } else if (liens[ptr]->depth<opt->depth) {   // on a sautΘ level1+1 et level1
  653.               HT_INDEX_END;
  654.             }
  655.           } // if (opt->makeindex)
  656.         }
  657.         // FIN Construction index.html (sommaire)
  658.         /*
  659.         end -- index.html built here
  660.         */
  661.         
  662.         
  663.         
  664.         /* Parse */
  665.         if (
  666.           (*adr=='<')    /* No starting tag */
  667.           && (!inscript)    /* Not in (java)script */
  668.           && (!incomment)   /* Not in comment (<!--) */
  669.           && (!in_media)    /* Not in media */
  670.           ) { 
  671.           intag=1;
  672.           intag_ctype=0;
  673.           //parseall_incomment=0;
  674.           //inquote=0;  // effacer quote
  675.           intag_start=adr; intag_start_valid=1;
  676.           codebase[0]='\0';    // effacer Θventuel codebase
  677.           
  678.           if (opt->getmode & 1) {  // sauver html
  679.             p=strfield(adr,"</html");
  680.             if (p==0) p=strfield(adr,"<head>");
  681.             // if (p==0) p=strfield(adr,"<doctype");
  682.             if (p) {
  683.               char* eol="\n";
  684.               if (strchr(r->adr,'\r'))
  685.                 eol="\r\n";
  686.               if (strnotempty(opt->footer)) {
  687.                 char tempo[1024+HTS_URLMAXSIZE*2];
  688.                 char gmttime[256];
  689.                 tempo[0]='\0';
  690.                 time_gmt_rfc822(gmttime);
  691.                 strcatbuff(tempo,eol);
  692.                 sprintf(tempo+strlen(tempo),opt->footer,jump_identification(urladr),urlfil,gmttime,HTTRACK_VERSIONID,"","","","","","","");
  693.                 strcatbuff(tempo,eol);
  694.                 //fwrite(tempo,1,strlen(tempo),fp);
  695.                 HT_ADD(tempo);
  696.                 if (r->charset[0]) {
  697.                   HT_ADD("<!-- Added by HTTrack --><meta http-equiv=\"content-type\" content=\"text/html;charset=");
  698.                   HT_ADD(r->charset);
  699.                   HT_ADD("\"><!-- /Added by HTTrack -->");
  700.                   HT_ADD(eol);
  701.                 }
  702.               }
  703.             }
  704.           }        
  705.           
  706.           // Θliminer les <!-- (commentaires) : intag dΘvalidΘ
  707.           if (*(adr+1)=='!')
  708.             if (*(adr+2)=='-')
  709.               if (*(adr+3)=='-') {
  710.                 intag=0;
  711.                 incomment=1;
  712.                 intag_start_valid=0;
  713.               }
  714.               
  715.         }
  716.         else if (
  717.           (*adr=='>')                        /* ending tag */
  718.           && ( (!inscript && !in_media) || (inscript_tag) )  /* and in tag (or in script) */
  719.           ) {
  720.           if (inscript_tag) {
  721.             inscript_tag=inscript=0;
  722.             intag=0;
  723.             incomment=0;
  724.             intag_start_valid=0;
  725.             if (opt->parsedebug) { HT_ADD("<@@ /inscript @@>"); }
  726.           } else if (!incomment) {
  727.             intag=0; //inquote=0;
  728.             
  729.             // entrΘe dans du javascript?
  730.             // on parse ICI car il se peut qu'on ait eu a parser les src=.. dedans
  731.             //if (!inscript) {  // sinon on est dans un obj.write("..
  732.             if ((intag_start_valid) && 
  733.               (
  734.               check_tag(intag_start,"script")
  735.               ||
  736.               check_tag(intag_start,"style")
  737.               )
  738.               ) {
  739.               char* a=intag_start;    // <
  740.               // ** while(is_realspace(*(--a)));
  741.               if (*a=='<') {  // s√r que c'est un tag?
  742.                 if (check_tag(intag_start,"script"))
  743.                   inscript_name="script";
  744.                 else
  745.                   inscript_name="style";
  746.                 inscript=1;
  747.                 inscript_state_pos=INSCRIPT_START;
  748.                 intag=1;     // because aprΦs <script> on y est .. - pas utile
  749.                 intag_start_valid=0;    // OUI car nous sommes dans du code, plus dans du "vrai" tag
  750.                 if (opt->parsedebug) { HT_ADD("<@@ inscript @@>"); }
  751.               }
  752.             }
  753.           } else {                               /* end of comment? */
  754.             // vΘrifier fermeture correcte
  755.             if ( (*(adr-1)=='-') && (*(adr-2)=='-') ) {
  756.               intag=0;
  757.               incomment=0;
  758.               intag_start_valid=0;
  759.             }
  760. #if GT_ENDS_COMMENT
  761.             /* wrong comment ending */
  762.             else {
  763.             /* check if correct ending does not exists
  764.             <!-- foo > example <!-- bar > is sometimes accepted by browsers
  765.             when no --> is used somewhere else.. darn those browsers are dirty
  766.               */
  767.               if (!strstr(adr,"-->")) {
  768.                 intag=0;
  769.                 incomment=0;
  770.                 intag_start_valid=0;
  771.               }
  772.             }
  773. #endif
  774.           }
  775.           //}
  776.         }
  777.         //else if (*adr==34) {
  778.         //  inquote=(inquote?0:1);
  779.         //}
  780.         else if (intag || inscript || in_media) {    // nous sommes dans un tag/commentaire, tester si on recoit un tag
  781.           int p_type=0;
  782.           int p_nocatch=0;
  783.           int p_searchMETAURL=0;  // chercher ..URL=<url>
  784.           int add_class=0;        // ajouter .class
  785.           int add_class_dots_to_patch=0;   // number of '.' in code="x.y.z<realname>"
  786.           char* p_flush=NULL;
  787.           
  788.           
  789.           // ------------------------------------------------------------
  790.           // parsing ΘvolΘ
  791.           // ------------------------------------------------------------
  792.           if (((isalpha((unsigned char)*adr)) || (*adr=='/') || (inscript) || (in_media) || (inscriptgen))) {  // sinon pas la peine de tester..
  793.             
  794.             
  795.                                                                                                  /* caractΦre de terminaison pour "miniparsing" javascript=.. ? 
  796.             (ex: <a href="javascript:()" action="foo"> ) */
  797.             if (inscript_tag) {
  798.               if (inscript_tag_lastc) {
  799.                 if (*adr == inscript_tag_lastc) {
  800.                   /* sortir */
  801.                   inscript_tag=inscript=0;
  802.                   incomment=0;
  803.                   if (opt->parsedebug) { HT_ADD("<@@ /inscript @@>"); }
  804.                 }
  805.               }
  806.             }
  807.  
  808.             /* automate */
  809.             AUTOMATE_LOOKUP_CURRENT_ADR();
  810.             
  811.             
  812.             // Note:
  813.             // Certaines pages ne respectent pas le html
  814.             // notamment les guillements ne sont pas fixΘs
  815.             // Nous sommes dans un tag, donc on peut faire un test plus
  816.             // large pour pouvoi prendre en compte ces particularitΘs
  817.             
  818.             // α vΘrifier: ACTION, CODEBASE, VRML
  819.             
  820.             if (in_media) {
  821.               if (strcmp(in_media,"LNK")==0) { // real media
  822.                 p=0;
  823.                 valid_p=1;
  824.               }
  825.               else if (strcmp(in_media,"AAM")==0) { // AAM
  826.                 if (is_space((unsigned char)adr[0]) && ! is_space((unsigned char)adr[1])) {
  827.                   char* a = adr + 1;
  828.                   int n = 0;
  829.                   int ok = 0;
  830.                   int dot = 0;
  831.                   while(n < HTS_URLMAXSIZE/2 && a[n] != '\0' &&
  832.                     ( ! is_space((unsigned char)a[n]) || ! ( ok = 1) )
  833.                     ) {
  834.                     if (a[n] == '.') {
  835.                       dot = n;
  836.                     }
  837.                     n++;
  838.                   }
  839.                   if (ok && dot > 0) {
  840.                     char tmp[HTS_URLMAXSIZE/2 + 2];
  841.                     tmp[0] = '\0';
  842.                     strncat(tmp, a + dot + 1, n - dot - 1);
  843.                     if (is_knowntype(tmp) || ishtml_ext(tmp) != -1) {
  844.                       adr++;
  845.                       p = 0;
  846.                       valid_p = 1;
  847.                       unquoted_script = 1;
  848.                     }
  849.                   }
  850.                 }
  851.               }
  852.             } else if (ptr>0) {        /* pas premiΦre page 0 (primary) */
  853.               p=0;  // saut pour le nom de fichier: adresse nom fichier=adr+p
  854.               
  855.               // ------------------------------
  856.               // dΘtection d'Θcriture JavaScript.
  857.               // osons les obj.write et les obj.href=.. ! osons!
  858.               // note: inscript==1 donc on sautera aprΦs les \"
  859.               if (inscript) {
  860.                 if (inscriptgen) {          // on est dΘja dans un objet gΘnΘrant..
  861.                   if (*adr==scriptgen_q) {  // fermeture des " ou '
  862.                     if (*(adr-1)!='\\') {   // non
  863.                       inscriptgen=0;        // ok parsing terminΘ
  864.                     }
  865.                   }
  866.                 } else {
  867.                   char* a=NULL;
  868.                   char check_this_fking_line=0;  // parsing code javascript..
  869.                   char must_be_terminated=0;     // caractΦre obligatoire de terminaison!
  870.                   int token_size;
  871.                   if (!(token_size=strfield(adr,".writeln"))) // dΘtection ...objet.write[ln]("code html")...
  872.                     token_size=strfield(adr,".write");
  873.                   if (token_size) {
  874.                     a=adr+token_size;
  875.                     while(is_realspace(*a)) a++; // sauter espaces
  876.                     if (*a=='(') {  // dΘbut parenthΦse
  877.                       check_this_fking_line=2;  // α parser!
  878.                       must_be_terminated=')';
  879.                       a++;  // sauter (
  880.                     }
  881.                   }
  882.                   // euhh ??? ???
  883.                   /* else if (strfield(adr,".href")) {  // dΘtection ...objet.href="...
  884.                   a=adr+5;
  885.                   while(is_realspace(*a)) a++; // sauter espaces
  886.                   if (*a=='=') {  // ohh un Θgal
  887.                   check_this_fking_line=1;  // α noter!
  888.                   must_be_terminated=';';   // et si t'as oubliΘ le ; tu sais pas coder
  889.                   a++;   // sauter =
  890.                   }
  891.                   
  892.                 }*/
  893.                   
  894.                   // on a un truc du genre instruction"code gΘnΘrΘ" dont on parse le code
  895.                   if (check_this_fking_line) {
  896.                     while(is_realspace(*a)) a++;
  897.                     if ((*a=='\'') || (*a=='"')) {  // dΘpart de '' ou ""
  898.                       char *b;
  899.                       scriptgen_q=*a;    // quote
  900.                       b=a+1;      // dΘpart de la chaεne
  901.                       // vΘrifier forme ("code") et pas ("code"+var), ingΘrable
  902.                       do {
  903.                         if (*a==scriptgen_q && *(a-1)!='\\')  // quote non slash
  904.                           break;            // sortie
  905.                         else if (*a==10 && *(a-1) != '\\'  /* LF and no continue (\) character */
  906.                             && ( *(a-1) != '\r' || *(a-2) != '\\' ) )  /* and not CRLF and no .. */
  907.                           break;
  908.                         else 
  909.                           a++;  // caractΦre suivant
  910.                       } while((a-b) < HTS_URLMAXSIZE / 2);
  911.                       if (*a==scriptgen_q) {  // fin du quote
  912.                         a++;
  913.                         while(is_realspace(*a)) a++;
  914.                         if (*a==must_be_terminated) {  // parenthΦse fermante: ("..")
  915.                           
  916.                           // bon, on doit parser une ligne javascript
  917.                           // 1) si check.. ==1 alors c'est un nom de fichier direct, donc
  918.                           // on fixe p sur le saut nΘcessaire pour atteindre le nom du fichier
  919.                           // et le moteur se dΘbrouillera ensuite tout seul comme un grand
  920.                           // 2) si check==2 c'est un peu plus tordu car lα on gΘnΘre du
  921.                           // code html au sein de code javascript au sein de code html
  922.                           // dans ce cas on doit fixer un flag α un puis ensuite dans la boucle
  923.                           // on devra parser les instructions standard comme <a href etc
  924.                           // NOTE: le code javascript autogΘnΘrΘ n'est pas pris en compte!!
  925.                           // (et ne marche pas dans 50% des cas de toute facon!)
  926.                           if (check_this_fking_line==1) {
  927.                             p=(int) (b - adr);    // calculer saut!
  928.                           } else {
  929.                             inscriptgen=1;        // SCRIPTGEN actif
  930.                             adr=b;                // jump
  931.                           }
  932.                           
  933.                           if ((opt->debug>1) && (opt->log!=NULL)) {
  934.                             char str[512];
  935.                             str[0]='\0';
  936.                             strncatbuff(str,b,minimum((int) (a - b + 1), 32));
  937.                             fspc(opt->log,"debug"); fprintf(opt->log,"active code (%s) detected in javascript: %s"LF,(check_this_fking_line==2)?"parse":"pickup",str); test_flush;
  938.                           }
  939.                         }
  940.                         
  941.                       }
  942.                       
  943.                     }
  944.                     
  945.                     
  946.                   }
  947.                 }
  948.               }
  949.               // fin detection code gΘnΘrant javascript vers html
  950.               // ------------------------------
  951.               
  952.               
  953.               // analyse proprement dite, A HREF=.. etc..
  954.               if (!p) {
  955.                 // si dans un tag, et pas dans un script - sauf si on analyse un obj.write("..
  956.                 if ((intag && (!inscript)) || inscriptgen) {
  957.                   if ( (*(adr-1)=='<') || (is_space(*(adr-1))) ) {   // <tag < tag etc
  958.                     // <A HREF=.. pour les liens HTML
  959.                     p=rech_tageq(adr,"href");
  960.                     if (p) {    // href.. tester si c'est une bas href!
  961.                       if ((intag_start_valid) && check_tag(intag_start,"base")) {  // oui!
  962.                         // ** note: base href et codebase ne font pas bon mΘnage..
  963.                         p_type=2;    // c'est un chemin
  964.                       }
  965.                     }
  966.                     
  967.                     /* Tags supplΘmentaires α vΘrifier (<img src=..> etc) */
  968.                     if (p==0) {
  969.                       int i=0;
  970.                       while( (p==0) && (strnotempty(hts_detect[i])) ) {
  971.                         p=rech_tageq(adr,hts_detect[i]);
  972.                         if (p) {
  973.                           /* This is a temporary hack to avoid archive=foo.jar,bar.jar .. */
  974.                           if (strcmp(hts_detect[i], "archive") == 0) {
  975.                             archivetag_p = 1;
  976.                           }
  977.                         }
  978.                         i++;
  979.                       }
  980.                     }
  981.                     
  982.                     /* Tags supplΘmentaires en dΘbut α vΘrifier (<object .. hotspot1=..> etc) */
  983.                     if (p==0) {
  984.                       int i=0;
  985.                       while( (p==0) && (strnotempty(hts_detectbeg[i])) ) {
  986.                         p=rech_tageqbegdigits(adr,hts_detectbeg[i]);
  987.                         i++;
  988.                       }
  989.                     }
  990.                     
  991.                     /* Tags supplΘmentaires α vΘrifier : URL=.. */
  992.                     if (p==0) {
  993.                       int i=0;
  994.                       while( (p==0) && (strnotempty(hts_detectURL[i])) ) {
  995.                         p=rech_tageq(adr,hts_detectURL[i]);
  996.                         i++;
  997.                       }
  998.                       if (p) {
  999.                           if (intag_ctype == 1) {
  1000.                             p = 0;
  1001. #if 0
  1002.                           //if ((pos=rech_tageq(adr, "content"))) {
  1003.                               char temp[256];
  1004.                               char* token = NULL;
  1005.                               int len = rech_endtoken(adr + pos, &token);
  1006.                               if (len > 0 && len < sizeof(temp) - 2) {
  1007.                                   char* chpos;
  1008.                                   temp[0] = '\0';
  1009.                                   strncat(temp, token, len);
  1010.                                   if ((chpos = strstr(temp, "charset"))
  1011.                                       &&
  1012.                                       (chpos = strchr(chpos, '='))
  1013.                                       ) {
  1014.                                       chpos++;
  1015.                                       while(is_space(*chpos)) chpod++;
  1016.                                       chpos
  1017.                                   }
  1018.                               }
  1019. #endif
  1020.                           }
  1021.                           // <META HTTP-EQUIV="Refresh" CONTENT="3;URL=http://www.example.com">
  1022.                           else if (intag_ctype == 2) {
  1023.                               p_searchMETAURL=1;
  1024.                           } else {
  1025.                             p = 0;            /* cancel */
  1026.                           }
  1027.                       }
  1028.  
  1029.  
  1030.                     }
  1031.  
  1032.                     /* Tags supplΘmentaires α vΘrifier, mais α ne pas capturer */
  1033.                     if (p==0) {
  1034.                       int i=0;
  1035.                       while( (p==0) && (strnotempty(hts_detectandleave[i])) ) {
  1036.                         p=rech_tageq(adr,hts_detectandleave[i]);
  1037.                         i++;
  1038.                       }
  1039.                       if (p)
  1040.                         p_nocatch=1;      /* ne pas rechercher */
  1041.                     }
  1042.                     
  1043.                     /* EvΘnements */
  1044.                     if (p==0 && 
  1045.                       ! inscript          /* we don't want events inside document.write */
  1046.                       ) {
  1047.                       int i=0;
  1048.                       /* dΘtection onLoad etc */
  1049.                       while( (p==0) && (strnotempty(hts_detect_js[i])) ) {
  1050.                         p=rech_tageq(adr,hts_detect_js[i]);
  1051.                         i++;
  1052.                       }
  1053.                       /* non dΘtectΘ - dΘtecter Θgalement les onXxxxx= */
  1054.                       if (p==0) {
  1055.                         if ( (*adr=='o') && (*(adr+1)=='n') && isUpperLetter(*(adr+2)) ) {
  1056.                           p=0;
  1057.                           while(isalpha((unsigned char)adr[p]) && (p<64) ) p++;
  1058.                           if (p<64) {
  1059.                             while(is_space(adr[p])) p++;
  1060.                             if (adr[p]=='=')
  1061.                               p++;
  1062.                             else p=0;
  1063.                           } else p=0;
  1064.                         }
  1065.                       }
  1066.                       /* OK, ΘvΘnement repΘrΘ */
  1067.                       if (p) {
  1068.                         inscript_tag_lastc=*(adr+p);     /* α attendre α la fin */
  1069.                         adr+=p+1;   /* saut */
  1070.                                     /*
  1071.                                     On est dΘsormais dans du code javascript
  1072.                         */
  1073.                         inscript_name="";
  1074.                         inscript=inscript_tag=1;
  1075.                         inscript_state_pos=INSCRIPT_START;
  1076.                         if (opt->parsedebug) { HT_ADD("<@@ inscript @@>"); }
  1077.                       }
  1078.                       p=0;        /* quoi qu'il arrive, ne rien dΘmarrer ici */
  1079.                     }
  1080.                     
  1081.                     // <APPLET CODE=.. pour les applet java.. [CODEBASE (chemin..) α faire]
  1082.                     if (p==0) {
  1083.                       p=rech_tageq(adr,"code");
  1084.                       if (p) {
  1085.                         if ((intag_start_valid) && check_tag(intag_start,"applet")) {  // dans un <applet !
  1086.                           p_type=-1;  // juste le nom de fichier+dossier, Θcire avant codebase 
  1087.                           add_class=1;   // ajouter .class au besoin                         
  1088.                           
  1089.                           // vΘrifier qu'il n'y a pas de codebase APRES
  1090.                           // sinon on swappe les deux.
  1091.                           // pas trΦs propre mais c'est ce qu'il y a de plus simple α faire!!
  1092.                           
  1093.                           {
  1094.                             char *a;
  1095.                             a=adr;
  1096.                             while((*a) && (*a!='>') && (!rech_tageq(a,"codebase"))) a++;
  1097.                             if (rech_tageq(a,"codebase")) {  // banzai! codebase=
  1098.                               char* b;
  1099.                               b=strchr(a,'>');
  1100.                               if (b) {
  1101.                                 if (((int) (b - adr)) < 1000) {    // au total < 1Ko
  1102.                                   char tempo[HTS_URLMAXSIZE*2];
  1103.                                   tempo[0]='\0';
  1104.                                   strncatbuff(tempo,a,(int) (b - a) );
  1105.                                   strcatbuff( tempo," ");
  1106.                                   strncatbuff(tempo,adr,(int) (a - adr - 1));
  1107.                                   // Θventuellement remplire par des espaces pour avoir juste la taille
  1108.                                   while((int) strlen(tempo)<((int) (b - adr)))
  1109.                                     strcatbuff(tempo," ");
  1110.                                   // pas d'erreur?
  1111.                                   if ((int) strlen(tempo) == ((int) (b - adr) )) {
  1112.                                     strncpy(adr,tempo,strlen(tempo));   // PAS d'octet nul α la fin!
  1113.                                     p=0;    // DEVALIDER!!
  1114.                                     p_type=0;
  1115.                                     add_class=0;
  1116.                                   }
  1117.                                 }
  1118.                               }
  1119.                             }
  1120.                           }
  1121.                           
  1122.                         }
  1123.                       }
  1124.                     }
  1125.                     
  1126.                     // liens α patcher mais pas α charger (ex: codebase)
  1127.                     if (p==0) {  // note: si non chargΘ (ex: ignorer .class) patchΘ tout de mΩme
  1128.                       p=rech_tageq(adr,"codebase");
  1129.                       if (p) {
  1130.                         if ((intag_start_valid) && check_tag(intag_start,"applet")) {  // dans un <applet !
  1131.                           p_type=-2;
  1132.                         } else p=-1;   // ne plus chercher
  1133.                       }
  1134.                     }
  1135.                     
  1136.                     
  1137.                     // Meta tags pour robots
  1138.                     if (p==0) {
  1139.                       if (opt->robots) {
  1140.                         if ((intag_start_valid) && check_tag(intag_start,"meta")) {
  1141.                           if (rech_tageq(adr,"name")) {    // name=robots.txt
  1142.                             char tempo[1100];
  1143.                             char* a;
  1144.                             tempo[0]='\0';
  1145.                             a=strchr(adr,'>');
  1146. #if DEBUG_ROBOTS
  1147.                             printf("robots.txt meta tag detected\n");
  1148. #endif
  1149.                             if (a) {
  1150.                               if (((int) (a - adr)) < 999 ) {
  1151.                                 strncatbuff(tempo,adr,(int) (a - adr));
  1152.                                 if (strstrcase(tempo,"content")) {
  1153.                                   if (strstrcase(tempo,"robots")) {
  1154.                                     if (strstrcase(tempo,"nofollow")) {
  1155. #if DEBUG_ROBOTS
  1156.                                       printf("robots.txt meta tag: nofollow in %s%s\n",urladr,urlfil);
  1157. #endif
  1158.                                       nofollow=1;       // NE PLUS suivre liens dans cette page
  1159.                                       if (opt->errlog) {
  1160.                                         fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Link %s%s not scanned (follow robots meta tag)"LF,urladr,urlfil);
  1161.                                         test_flush;
  1162.                                       }
  1163.                                     }
  1164.                                   }
  1165.                                 }
  1166.                               }
  1167.                             }
  1168.                           }
  1169.                         }
  1170.                       }
  1171.                     }
  1172.  
  1173.                     // charset meta tags
  1174.                     if (p==0) {
  1175.                       if ((intag_start_valid) && check_tag(intag_start,"meta")) {
  1176.                         int pos;
  1177.                         // <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
  1178.                         if ((pos=rech_tageq(adr, "http-equiv"))) {
  1179.                           const char* token = NULL;
  1180.                           int len = rech_endtoken(adr + pos, &token);
  1181.                           if (len > 0) {
  1182.                             if (strfield(token, "content-type")) {
  1183.                               intag_ctype=1;
  1184.                             }
  1185.                             else if (strfield(token, "refresh")) {
  1186.                               intag_ctype=2;
  1187.                             }
  1188.                           }
  1189.                         }
  1190.                       }                    
  1191.                     }
  1192.  
  1193.                     // entrΘe dans une applet javascript
  1194.                     /*if (!inscript) {  // sinon on est dans un obj.write("..
  1195.                     if (p==0)
  1196.                     if (rech_sampletag(adr,"script"))
  1197.                     if (check_tag(intag_start,"script")) {
  1198.                     inscript=1;
  1199.                     }
  1200.                         }*/
  1201.                     
  1202.                     // Ici on procΦde α une analyse du code javascript pour tenter de rΘcupΘrer
  1203.                     // certains fichiers Θvidents.
  1204.                     // C'est devenu obligatoire vu le nombre de pages qui intΦgrent
  1205.                     // des images rΘactives par exemple
  1206.                 }
  1207.               } else if (inscript) {
  1208.  
  1209. #if 0
  1210.                 /* Check // javascript comments */
  1211.                 if (*adr == 10 || *adr == 13) {
  1212.                   inscript_check_comments = 1;
  1213.                   inscript_in_comments = 0;
  1214.                 }
  1215.                 else if (inscript_check_comments) {
  1216.                   if (!is_realspace(*adr)) {
  1217.                     inscript_check_comments = 0;
  1218.                     if (adr[0] == '/' && adr[1] == '/') {
  1219.                       inscript_in_comments = 1;
  1220.                     }
  1221.                   }
  1222.                 }
  1223. #endif
  1224.  
  1225.                 /* Parse */
  1226.                 assertf(inscript_name != NULL);
  1227.                 if (
  1228.                   *adr == '/' &&
  1229.                   (
  1230.                   (strfield(adr,"/script") && strfield(inscript_name, "script"))
  1231.                   ||
  1232.                   (strfield(adr,"/style")  && strfield(inscript_name, "style"))
  1233.                   )
  1234.                   ) {
  1235.                   char* a=adr;
  1236.                   //while(is_realspace(*(--a)));
  1237.                   while( is_realspace(*a) ) a--;
  1238.                   a--;
  1239.                   if (*a=='<') {  // s√r que c'est un tag?
  1240.                     inscript=0;
  1241.                     if (opt->parsedebug) { HT_ADD("<@@ /inscript @@>"); }
  1242.                   }
  1243.                 } else if (inscript_state_pos == INSCRIPT_START /*!inscript_in_comments*/) {
  1244.                 /*
  1245.                 Script Analyzing - different types supported:
  1246.                 foo="url"
  1247.                 foo("url") or foo(url)
  1248.                 foo "url"
  1249.                   */
  1250.                   int nc;
  1251.                   char  expected     = '=';          // caractΦre attendu aprΦs
  1252.                   char* expected_end = ";";
  1253.                   int can_avoid_quotes=0;
  1254.                   char quotes_replacement='\0';
  1255.                   int ensure_not_mime=0;
  1256.                   if (inscript_tag)
  1257.                     expected_end=";\"\'";            // voir a href="javascript:doc.location='foo'"
  1258.                   nc = strfield(adr,".src");  // nom.src="image";
  1259.                   if (!nc) nc = strfield(adr,".location");  // document.location="doc"
  1260.                   if (!nc) nc = strfield(adr,":location");  // javascript:location="doc"
  1261.                   if (!nc) nc = strfield(adr,".href");  // document.location="doc"
  1262.                   if (!nc) if ( (nc = strfield(adr,".open")) ) { // window.open("doc",..
  1263.                     expected='(';    // parenthΦse
  1264.                     expected_end="),";  // fin: virgule ou parenthΦse
  1265.                     ensure_not_mime=1;  //* ensure the url is not a mime type */
  1266.                   }
  1267.                   if (!nc) if ( (nc = strfield(adr,".replace")) ) { // window.replace("url")
  1268.                     expected='(';    // parenthΦse
  1269.                     expected_end=")";  // fin: parenthΦse
  1270.                   }
  1271.                   if (!nc) if ( (nc = strfield(adr,".link")) ) { // window.link("url")
  1272.                     expected='(';    // parenthΦse
  1273.                     expected_end=")";  // fin: parenthΦse
  1274.                   }
  1275.                   if (!nc) if ( (nc = strfield(adr,"url")) && (!isalnum(*(adr - 1))) 
  1276.                                                            && *(adr - 1) != '_'
  1277.                                                            ) { // url(url)
  1278.                     expected='(';    // parenthΦse
  1279.                     expected_end=")";  // fin: parenthΦse
  1280.                     can_avoid_quotes=1;
  1281.                     quotes_replacement=')';
  1282.                   }
  1283.                   if (!nc) if ( (nc = strfield(adr,"import")) ) { // import "url"
  1284.                     if (is_space(*(adr+nc))) {
  1285.                       expected=0;    // no char expected
  1286.                     } else
  1287.                       nc=0;
  1288.                   }
  1289.                   if (nc) {
  1290.                     char *a;
  1291.                     a=adr+nc;
  1292.                     while(is_realspace(*a)) a++;
  1293.                     if ((*a == expected) || (!expected)) {
  1294.                       if (expected)
  1295.                         a++;
  1296.                       while(is_realspace(*a)) a++;
  1297.                       if ((*a==34) || (*a=='\'') || (can_avoid_quotes)) {
  1298.                         char *b,*c;
  1299.                         int ndelim=1;
  1300.                         if ((*a==34) || (*a=='\''))
  1301.                           a++;
  1302.                         else
  1303.                           ndelim=0;
  1304.                         b=a;
  1305.                         if (ndelim) {
  1306.                           while((*b!=34) && (*b!='\'') && (*b!='\0')) b++;
  1307.                         }
  1308.                         else {
  1309.                           while((*b != quotes_replacement) && (*b!='\0')) b++;
  1310.                         }
  1311.                         c=b--; c+=ndelim;
  1312.                         while(*c==' ') c++;
  1313.                         if ((strchr(expected_end,*c)) || (*c=='\n') || (*c=='\r')) {
  1314.                           c-=(ndelim+1);
  1315.                           if ((int) (c - a + 1)) {
  1316.                             if (ensure_not_mime) {
  1317.                               int i = 0;
  1318.                               while(a != NULL && hts_main_mime[i] != NULL && hts_main_mime[i][0] != '\0') {
  1319.                                 int p;
  1320.                                 if ((p=strfield(a, hts_main_mime[i])) && a[p] == '/') {
  1321.                                   a=NULL;
  1322.                                 }
  1323.                                 i++;
  1324.                               }
  1325.                             }
  1326.                             if (a != NULL) {
  1327.                               if ((opt->debug>1) && (opt->log!=NULL)) {
  1328.                                 char str[512];
  1329.                                 str[0]='\0';
  1330.                                 strncatbuff(str,a,minimum((int) (c - a + 1),32));
  1331.                                 fspc(opt->log,"debug"); fprintf(opt->log,"link detected in javascript: %s"LF,str); test_flush;
  1332.                               }
  1333.                               p=(int) (a - adr);    // p non nul: TRAITER CHAINE COMME FICHIER
  1334.                               if (can_avoid_quotes) {
  1335.                                 ending_p=quotes_replacement;
  1336.                               }
  1337.                             }
  1338.                           }
  1339.                         }
  1340.                         
  1341.                         
  1342.                       }
  1343.                     }
  1344.                   }
  1345.                   
  1346.                 }
  1347.               }
  1348.             }
  1349.             
  1350.           } else {      // ptr == 0
  1351.             //p=rech_tageq(adr,"primary");    // lien primaire, yeah
  1352.             p=0;          // No stupid tag anymore, raw link
  1353.             valid_p=1;    // Valid even if p==0
  1354.             while ((adr[p] == '\r') || (adr[p] == '\n'))
  1355.               p++;
  1356.             //can_avoid_quotes=1;
  1357.             ending_p='\r';
  1358.           }       
  1359.           
  1360.         } else if (isspace((unsigned char)*adr)) {
  1361.           intag_startattr=adr+1;        // attribute in tag (for dirty parsing)
  1362.         }
  1363.         
  1364.         
  1365.         // ------------------------------------------------------------
  1366.         // dernier recours - parsing "sale" : dΘtection systΘmatique des .gif, etc.
  1367.         // risque: gΘnΘrer de faux fichiers parazites
  1368.         // fix: ne parse plus dans les commentaires
  1369.         // ------------------------------------------------------------
  1370.         if ( (opt->parseall) && (ptr>0) && (!in_media) /* && (!inscript_in_comments)*/ ) {   // option parsing "brut"
  1371.           //int incomment_justquit=0;
  1372.           if (!is_realspace(*adr)) {
  1373.             int noparse=0;
  1374.             
  1375.             // Gestion des /* */
  1376. #if 0
  1377.             if (inscript) {
  1378.               if (parseall_incomment) {
  1379.                 if ((*adr=='/') && (*(adr-1)=='*'))
  1380.                   parseall_incomment=0;
  1381.                 incomment_justquit=1;       // ne pas noter dernier caractΦre
  1382.               } else {
  1383.                 if ((*adr=='/') && (*(adr+1)=='*'))
  1384.                   parseall_incomment=1;
  1385.               }
  1386.             } else
  1387.               parseall_incomment=0;
  1388. #endif
  1389.             /* ensure automate state  0 (not in comments, quotes..) */
  1390.             if (inscript && ( 
  1391.               inscript_state_pos != INSCRIPT_INQUOTE && inscript_state_pos != INSCRIPT_INQUOTE2
  1392.               ) ) {
  1393.               noparse=1;
  1394.             }
  1395.             
  1396.             /* vΘrifier que l'on est pas dans un <!-- --> pur */
  1397.             if ( (!intag) && (incomment) && (!inscript))
  1398.               noparse=1;        /* commentaire */
  1399.             
  1400.             // recherche d'URLs
  1401.             if (!noparse) {
  1402.             //if ((!parseall_incomment) && (!noparse)) {
  1403.               if (!p) {                   // non dΘja trouvΘ
  1404.                 if (adr != r->adr) {     // >1 caractΦre
  1405.                   // scanner les chaines
  1406.                   if ((*adr == '\"') || (*adr=='\'')) {         // "xx.gif" 'xx.gif'
  1407.                     if (strchr("=(,",parseall_lastc)) {    // exemple: a="img.gif.. (handles comments)
  1408.                       char *a=adr;
  1409.                       char stop=*adr;  // " ou '
  1410.                       int count=0;
  1411.                       
  1412.                       // sauter caractΦres
  1413.                       a++;
  1414.                       // copier
  1415.                       while((*a) && (*a!='\'') && (*a!='\"') && (count<HTS_URLMAXSIZE)) { count++; a++; }
  1416.                       
  1417.                       // ok chaine terminΘe par " ou '
  1418.                       if ((*a == stop) && (count<HTS_URLMAXSIZE) && (count>0)) {
  1419.                         char c;
  1420.                         char* aend;
  1421.                         //
  1422.                         aend=a;     // sauver dΘbut
  1423.                         a++;
  1424.                         while(is_taborspace(*a)) a++;
  1425.                         c=*a;
  1426.                         if (strchr("),;>/+\r\n",c)) {     // exemple: ..img.gif";
  1427.                           // le / est pour funct("img.gif" /* URL */);
  1428.                           char tempo[HTS_URLMAXSIZE*2];
  1429.                           char type[256];
  1430.                           int url_ok=0;      // url valide?
  1431.                           tempo[0]='\0'; type[0]='\0';
  1432.                           //
  1433.                           strncatbuff(tempo,adr+1,count);
  1434.                           //
  1435.                           if ((!strchr(tempo,' ')) || inscript) {   // espace dedans: mΘfiance! (sauf dans code javascript)
  1436.                             int invalid_url=0;
  1437.                             
  1438.                             // escape                              
  1439.                             unescape_amp(tempo);
  1440.                             
  1441.                             // Couper au # ou ? Θventuel
  1442.                             {
  1443.                               char* a=strchr(tempo,'#');
  1444.                               if (a)
  1445.                                 *a='\0';
  1446.                               a=strchr(tempo,'?');
  1447.                               if (a)
  1448.                                 *a='\0';
  1449.                             }
  1450.                             
  1451.                             // vΘrifier qu'il n'y a pas de caractΦres spΘciaux
  1452.                             if (!strnotempty(tempo))
  1453.                               invalid_url=1;
  1454.                             else if (strchr(tempo,'*')
  1455.                               || strchr(tempo,'<')
  1456.                               || strchr(tempo,'>')
  1457.                               || strchr(tempo,',')    /* list of files ? */
  1458.                               || strchr(tempo,'\"')    /* potential parsing bug */
  1459.                               || strchr(tempo,'\'')    /* potential parsing bug */
  1460.                               )
  1461.                               invalid_url=1;
  1462.                             else if (tempo[0] == '.' && isalnum(tempo[1]))   // ".gif"
  1463.                               invalid_url=1;
  1464.                             
  1465.                             /* non invalide? */
  1466.                             if (!invalid_url) {
  1467.                               // Un plus α la fin? Alors ne pas prendre sauf si extension ("/toto.html#"+tag)
  1468.                               if (c!='+') {    // PAS de plus α la fin
  1469. #if 0
  1470.                                 char* a;
  1471. #endif
  1472.                                 // "Comparisons of scheme names MUST be case-insensitive" (RFC2616)                                  
  1473.                                 //if ((strncmp(tempo,"http://",7)==0) || (strncmp(tempo,"ftp://",6)==0))  // ok pas de problΦme
  1474.                                 if (
  1475.                                   (strfield(tempo,"http:")) 
  1476.                                   || (strfield(tempo,"ftp:"))
  1477. #if HTS_USEOPENSSL
  1478.                                   || (
  1479.                                   SSL_is_available &&
  1480.                                   (strfield(tempo,"https:"))
  1481.                                   )
  1482. #endif
  1483.                                   )  // ok pas de problΦme
  1484.                                   url_ok=1;
  1485.                                 else if (tempo[strlen(tempo)-1]=='/') {        // un slash: ok..
  1486.                                   if (inscript)   // sinon si pas javascript, mΘfiance (rΘpertoire style base?)
  1487.                                     url_ok=1;
  1488.                                 } 
  1489. #if 0
  1490.                                 else if ((a=strchr(tempo,'/'))) {        // un slash: ok..
  1491.                                   if (inscript) {    // sinon si pas javascript, mΘfiance (style "text/css")
  1492.                                     if (strchr(a+1,'/'))     // un seul / : abandon (STYLE type='text/css')
  1493.                                     if (!strchr(tempo,' '))  // avoid spaces (too dangerous for comments)
  1494.                                       url_ok=1;
  1495.                                   }
  1496.                                 }
  1497. #endif
  1498.                               }
  1499.                               // Prendre si extension reconnue
  1500.                               if (!url_ok) {
  1501.                                 get_httptype(type,tempo,0);
  1502.                                 if (strnotempty(type))     // type reconnu!
  1503.                                   url_ok=1;
  1504.                                 else if (is_dyntype(get_ext(tempo)))  // reconnu php,cgi,asp..
  1505.                                   url_ok=1;
  1506.                                 // MAIS pas les foobar@aol.com !!
  1507.                                 if (strchr(tempo,'@'))
  1508.                                   url_ok=0;
  1509.                               }
  1510.                               //
  1511.                               // Ok, cela pourrait Ωtre une URL
  1512.                               if (url_ok) {
  1513.                                 
  1514.                                 // Check if not fodbidden tag (id,name..)
  1515.                                 if (intag_start_valid) {
  1516.                                   if (intag_start)
  1517.                                     if (intag_startattr)
  1518.                                       if (intag)
  1519.                                         if (!inscript)
  1520.                                           if (!incomment) {
  1521.                                             int i=0,nop=0;
  1522.                                             while( (nop==0) && (strnotempty(hts_nodetect[i])) ) {
  1523.                                               nop=rech_tageq(intag_startattr,hts_nodetect[i]);
  1524.                                               i++;
  1525.                                             }
  1526.                                             // Forbidden tag
  1527.                                             if (nop) {
  1528.                                               url_ok=0;
  1529.                                               if ((opt->debug>1) && (opt->log!=NULL)) {
  1530.                                                 fspc(opt->log,"debug"); fprintf(opt->log,"dirty parsing: bad tag avoided: %s"LF,hts_nodetect[i-1]); test_flush;
  1531.                                               }
  1532.                                             }
  1533.                                           }
  1534.                                 }
  1535.                                 
  1536.                                 
  1537.                                 // Accepter URL, on la traitera comme une URL normale!!
  1538.                                 if (url_ok) {
  1539.                                   valid_p = 1;
  1540.                                   p = 0;
  1541.                                 }
  1542.                                 
  1543.                               }
  1544.                             }
  1545.                           }
  1546.                           }
  1547.                         }
  1548.                       }
  1549.                     }
  1550.                   }
  1551.                 }  // p == 0               
  1552.                 
  1553.               } // not in comment
  1554.               
  1555.               // plus dans un commentaire
  1556.               if ( inscript_state_pos == INSCRIPT_START 
  1557.                 && inscript_state_pos_prev == INSCRIPT_START) {
  1558.                 parseall_lastc=*adr;             // caractΦre avant le prochain
  1559.               }
  1560.  
  1561.  
  1562.             }  // if realspace
  1563.           }  // if parseall
  1564.           
  1565.           
  1566.           // ------------------------------------------------------------
  1567.           // p!=0 : on a repΘrΘ un Θventuel lien
  1568.           // ------------------------------------------------------------
  1569.           //
  1570.           if ((p>0) || (valid_p)) {    // on a repΘrΘ un lien
  1571.             //int lien_valide=0;
  1572.             char* eadr=NULL;          /* fin de l'URL */
  1573.             char* quote_adr=NULL;     /* adresse du ? dans l'adresse */
  1574.             int ok=1;
  1575.             char quote='\0';
  1576.             int quoteinscript=0;
  1577.             int  noquote=0;
  1578.             
  1579.             // si nofollow ou un stop a ΘtΘ dΘclenchΘ, rΘΘcrire tous les liens en externe
  1580.             if ((nofollow) || (opt->state.stop))
  1581.               p_nocatch=1;
  1582.             
  1583.             // Θcrire codebase avant, flusher avant code
  1584.             if ((p_type==-1) || (p_type==-2)) {
  1585.               if ((opt->getmode & 1) && (ptr>0)) {
  1586.                 HT_ADD_ADR;    // refresh
  1587.               }
  1588.               lastsaved=adr;    // dernier Θcrit+1
  1589.             }
  1590.             
  1591.             // sauter espaces
  1592.             // adr+=p;
  1593.             INCREMENT_CURRENT_ADR(p);
  1594.             while( ( is_space(*adr) || (
  1595.                                         inscriptgen 
  1596.                                         && adr[0] == '\\' 
  1597.                                         && is_space(adr[1])
  1598.                                        )
  1599.                    )
  1600.                    && quote == '\0'
  1601.                  ) {
  1602.               if (!quote)
  1603.                 if ((*adr=='\"') || (*adr=='\'')) {
  1604.                   quote=*adr;                     // on doit attendre cela α la fin
  1605.                   if (inscriptgen && *(adr - 1) == '\\') {
  1606.                     quoteinscript=1;  /* will wait for \" */
  1607.                   }
  1608.                 }
  1609.                 // puis quitter
  1610.                 // adr++;    // sauter les espaces, "" et cie
  1611.                 INCREMENT_CURRENT_ADR(1);
  1612.             }
  1613.             
  1614.             /* Stop at \n (LF) if primary links or link lists */
  1615.             if (ptr == 0 || (in_media && strcmp(in_media,"LNK")==0))
  1616.               quote='\n';
  1617.             /* s'arrΩter que ce soit un ' ou un " : pour document.write('<img src="foo'+a); par exemple! */
  1618.             else if (inscript && ! unquoted_script)
  1619.               noquote=1;
  1620.             
  1621.             // sauter Θventuel \" ou \' javascript
  1622.             if (inscript) {    // on est dans un obj.write("..
  1623.               if (*adr=='\\') {
  1624.                 if ((*(adr+1)=='\'') || (*(adr+1)=='"')) {  // \" ou \'
  1625.                   // adr+=2;    // sauter
  1626.                   INCREMENT_CURRENT_ADR(2);
  1627.                 }
  1628.               }
  1629.             }
  1630.             
  1631.             // sauter content="1;URL=http://..
  1632.             if (p_searchMETAURL) {
  1633.               int l=0;
  1634.               while(
  1635.                 (adr + l + 4 < r->adr + r->size)
  1636.                 && (!strfield(adr+l,"URL=")) 
  1637.                 && (l<128) ) l++;
  1638.               if (!strfield(adr+l,"URL="))
  1639.                 ok=-1;
  1640.               else
  1641.                 adr+=(l+4);
  1642.             }
  1643.             
  1644.             /* Θviter les javascript:document.location=.. : les parser, plut⌠t */
  1645.             if (ok!=-1) {
  1646.               if (strfield(adr,"javascript:") 
  1647.                 && ! inscript       /* we don't want to parse 'javascript:' inside document.write inside scripts */
  1648.                 ) {
  1649.                 ok=-1;
  1650.                 /*
  1651.                 On est dΘsormais dans du code javascript
  1652.                 */
  1653.                 inscript_name="";
  1654.                 inscript_tag=inscript=1;
  1655.                 inscript_state_pos=INSCRIPT_START;
  1656.                 inscript_tag_lastc=quote;     /* α attendre α la fin */
  1657.                 if (opt->parsedebug) { HT_ADD("<@@ inscript @@>"); }
  1658.               }
  1659.             }
  1660.             
  1661.             if (p_type==1) {
  1662.               if (*adr=='#') {
  1663.                 adr++;           // sauter # pour usemap etc
  1664.               }
  1665.             }
  1666.             eadr=adr;
  1667.             
  1668.             // ne pas flusher aprΦs code si on doit Θcrire le codebase avant!
  1669.             if ((p_type!=-1) && (p_type!=2) && (p_type!=-2)) {
  1670.               if ((opt->getmode & 1) && (ptr>0)) {
  1671.                 HT_ADD_ADR;    // refresh
  1672.               }
  1673.               lastsaved=adr;    // dernier Θcrit+1
  1674.               // aprΦs on Θcrira soit les donnΘes initiales,
  1675.               // soir une URL/lien modifiΘ!
  1676.             } else if (p_type==-1) p_flush=adr;    // flusher jusqu'α adr ensuite
  1677.             
  1678.             if (ok!=-1) {    // continuer
  1679.               // dΘcouper le lien
  1680.               do {
  1681.                 if ((* (unsigned char*) eadr)<32) {   // caractΦre de contr⌠le (ou \0)
  1682.                   if (!is_space(*eadr))
  1683.                     ok=0; 
  1684.                 }
  1685.                 if ( ( ((int) (eadr - adr)) ) > HTS_URLMAXSIZE)  // ** trop long, >HTS_URLMAXSIZE caractΦres (on prΘvoit HTS_URLMAXSIZE autres pour path)
  1686.                   ok=-1;    // ne pas traiter ce lien
  1687.                 
  1688.                 if (ok > 0) {
  1689.                   //if (*eadr!=' ') {  
  1690.                   if (is_space(*eadr)) {   // guillemets,CR, etc
  1691.                     if ( 
  1692.                             ( *eadr == quote && ( !quoteinscript || *(eadr -1) == '\\') )  // end quote
  1693.                          || ( noquote && (*eadr == '\"' || *eadr == '\'') )       // end at any quote
  1694.                          || (!noquote && quote == '\0' && is_realspace(*eadr) )   // unquoted href
  1695.                        )     // si pas d'attente de quote spΘciale ou si quote atteinte
  1696.                       ok=0; 
  1697.                   } else if (ending_p && (*eadr==ending_p))
  1698.                     ok=0;
  1699.                   else {
  1700.                     switch(*eadr) {
  1701.                     case '>': 
  1702.                       if (!quote) {
  1703.                         if (!inscript && !in_media) {
  1704.                           intag=0;    // PLUS dans un tag!
  1705.                           intag_start_valid=0;
  1706.                         }
  1707.                         ok=0;
  1708.                       }
  1709.                       break;
  1710.                       /*case '<':*/ 
  1711.                     case '#': 
  1712.                       if (*(eadr-1) != '&')       // (
  1713.                         ok=0; 
  1714.                       break;
  1715.                       // case '?': non!
  1716.                     case '\\': if (inscript) ok=0; break;     // \" ou \' point d'arrΩt
  1717.                     case '?': quote_adr=adr; break;           // noter position query
  1718.                     }
  1719.                   }
  1720.                   //}
  1721.                 } 
  1722.                 eadr++;
  1723.               } while(ok==1);
  1724.               
  1725.               // Empty link detected
  1726.               if ( (((int) (eadr - adr))) <= 1) {       // link empty
  1727.                 ok=-1;        // No
  1728.                 if (*adr != '#') {        // Not empty+unique #
  1729.                   if ( (((int) (eadr - adr)) == 1)) {       // 1=link empty with delim (end_adr-start_adr)
  1730.                     if (quote) {
  1731.                       if ((opt->getmode & 1) && (ptr>0)) { 
  1732.                         HT_ADD("#");        // We add this for a <href="">
  1733.                       }
  1734.                     }
  1735.                   }
  1736.                 }
  1737.               }
  1738.  
  1739.               // This is a dirty and horrible hack to avoid parsing an Adobe GoLive bogus tag
  1740.               if (strfield(adr, "(Empty Reference!)")) {
  1741.                 ok=-1;        // No
  1742.               }
  1743.               
  1744.             }
  1745.             
  1746.             if (ok==0) {    // tester un lien
  1747.               char lien[HTS_URLMAXSIZE*2];
  1748.               int meme_adresse=0;      // 0 par dΘfaut pour primary
  1749.               //char *copie_de_adr=adr;
  1750.               //char* p;
  1751.               
  1752.               // construire lien (dΘcoupage)
  1753.               if ( (((int) (eadr -  adr))-1) < HTS_URLMAXSIZE  ) {    // pas trop long?
  1754.                 strncpy(lien,adr,((int) (eadr - adr))-1);
  1755.                 *(lien+  (((int) (eadr -  adr)))-1  )='\0';
  1756.                 //printf("link: %s\n",lien);          
  1757.                 // supprimer les espaces
  1758.                 while((lien[strlen(lien)-1]==' ') && (strnotempty(lien))) lien[strlen(lien)-1]='\0';
  1759.                 
  1760.                 
  1761.               } else
  1762.                 lien[0]='\0';    // erreur
  1763.               
  1764.  
  1765.               // ------------------------------------------------------
  1766.               // Lien repΘrΘ et extrait
  1767.               if (strnotempty(lien)>0) {           // construction du lien
  1768.                 char adr[HTS_URLMAXSIZE*2],fil[HTS_URLMAXSIZE*2];          // ATTENTION adr cache le "vrai" adr
  1769.                 int forbidden_url=-1;              // lien non interdit (mais non autorisΘ..)
  1770.                 int just_test_it=0;                // mode de test des liens
  1771.                 int set_prio_to=0;                 // pour capture de page isolΘe
  1772.                 int import_done=0;                 // lien importΘ (ne pas scanner ensuite *α priori*)
  1773.                 //
  1774.                 adr[0]='\0'; fil[0]='\0';
  1775.                 //
  1776.                 // 0: autorisΘ
  1777.                 // 1: interdit (patcher tout de mΩme adresse)
  1778.                 
  1779.                 if ((opt->debug>1) && (opt->log!=NULL)) {
  1780.                   fspc(opt->log,"debug"); fprintf(opt->log,"link detected in html: %s"LF,lien); test_flush;
  1781.                 }
  1782.                 
  1783.                 // external check
  1784. #if HTS_ANALYSTE
  1785.                 if (!hts_htmlcheck_linkdetected(lien)) {
  1786.                   error=1;    // erreur
  1787.                   if (opt->errlog) {
  1788.                     fspc(opt->errlog,"error"); fprintf(opt->errlog,"Link %s refused by external wrapper"LF,lien);
  1789.                     test_flush;
  1790.                   }
  1791.                 }
  1792. #endif
  1793.  
  1794. #if HTS_STRIP_DOUBLE_SLASH
  1795.                 // supprimer les // en / (sauf pour http://)
  1796.                 if (opt->urlhack) {
  1797.                   char *a,*p,*q;
  1798.                   int done=0;
  1799.                   a=strchr(lien,':');    // http://
  1800.                   if (a) {
  1801.                     a++;
  1802.                     while(*a=='/') a++;    // position aprΦs http://
  1803.                   } else {
  1804.                     a=lien;                // dΘbut
  1805.                     while(*a=='/') a++;    // position aprΦs http://
  1806.                   }
  1807.                   q=strchr(a,'?');     // ne pas traiter aprΦs '?'
  1808.                   if (!q)
  1809.                     q=a+strlen(a)-1;
  1810.                   while(( p=strstr(a,"//")) && (!done) ) {    // remplacer // par /
  1811.                     if ((int) p>(int) q) {   // aprΦs le ? (toto.cgi?param=1//2.3)
  1812.                       done=1;    // stopper
  1813.                     } else {
  1814.                       char tempo[HTS_URLMAXSIZE*2];
  1815.                       tempo[0]='\0';
  1816.                       strncatbuff(tempo,a,(int) p - (int) a);
  1817.                       strcatbuff (tempo,p+1);
  1818.                       strcpybuff(a,tempo);    // recopier
  1819.                     }
  1820.                   }
  1821.                 }
  1822. #endif
  1823.                                
  1824.                 // purger espaces de dΘbut et fin, CR,LF rΘsiduels
  1825.                 // (IMG SRC="foo.<\n><\t>gif<\t>")
  1826.                 {
  1827.                   char* a = lien;
  1828.                   int llen;
  1829.  
  1830.                   // strip ending spaces
  1831.                   llen = ( *a != '\0' ) ? strlen(a) : 0;
  1832.                   while(llen > 0 && is_realspace(lien[llen - 1]) ) {
  1833.                     a[--llen]='\0';
  1834.                   } 
  1835.                   //  skip leading ones
  1836.                   while(is_realspace(*a)) a++;
  1837.                   // strip cr, lf, tab inside URL
  1838.                   llen = 0;
  1839.                   while(*a) {
  1840.                     if (*a != '\n' && *a != '\r' && *a != '\t') {
  1841.                       lien[llen++] = *a;
  1842.                     }
  1843.                     a++;
  1844.                   }
  1845.                   lien[llen] = '\0';
  1846.                 }
  1847.  
  1848.                 // commas are forbidden
  1849.                 if (archivetag_p) {
  1850.                   if (strchr(lien, ',')) {
  1851.                     error=1;    // erreur
  1852.                     if ((opt->debug>1) && (opt->log!=NULL)) {
  1853.                       fspc(opt->log,"debug"); fprintf(opt->log,"link rejected (multiple-archive) %s"LF,lien); test_flush;
  1854.                     }
  1855.                   }
  1856.                 }               
  1857.                 
  1858.                 /* Unescape/escape %20 and other   */
  1859.                 {
  1860.                   char query[HTS_URLMAXSIZE*2];
  1861.                   char* a=strchr(lien,'?');
  1862.                   if (a) {
  1863.                     strcpybuff(query,a);
  1864.                     *a='\0';
  1865.                   } else
  1866.                     query[0]='\0';
  1867.                   // conversion & -> & et autres joyeusetΘs
  1868.                   unescape_amp(lien);
  1869.                   unescape_amp(query);
  1870.                   // dΘcoder l'inutile (%2E par exemple) et coder espaces
  1871.                   // XXXXXXXXXXXXXXXXX strcpybuff(lien,unescape_http(lien));
  1872.                   strcpybuff(lien,unescape_http_unharm(lien, (no_esc_utf)?0:1));
  1873.                   escape_remove_control(lien);
  1874.                   escape_spc_url(lien);
  1875.                   strcatbuff(lien,query);     /* restore */
  1876.                 }
  1877.                 
  1878.                 // convertir les Θventuels \ en des / pour Θviter des problΦmes de reconnaissance!
  1879.                 {
  1880.                   char* a=jump_identification(lien);
  1881.                   while( (a=strchr(a,'\\')) ) *a='/';
  1882.                 }
  1883.                 
  1884.                 // supprimer le(s) ./
  1885.                 while ((lien[0]=='.') && (lien[1]=='/')) {
  1886.                   char tempo[HTS_URLMAXSIZE*2];
  1887.                   strcpybuff(tempo,lien+2);
  1888.                   strcpybuff(lien,tempo);
  1889.                 }
  1890.                 if (strnotempty(lien)==0)  // sauf si plus de nom de fichier
  1891.                   strcpybuff(lien,"./");
  1892.                 
  1893.                 // vΘrifie les /~machin -> /~machin/
  1894.                 // supposition dangereuse?
  1895.                 // OUI!!
  1896. #if HTS_TILDE_SLASH
  1897.                 if (lien[strlen(lien)-1]!='/') {
  1898.                   char *a=lien+strlen(lien)-1;
  1899.                   // Θviter aussi index~1.html
  1900.                   while (((int) a>(int) lien) && (*a!='~') && (*a!='/') && (*a!='.')) a--;
  1901.                   if (*a=='~') {
  1902.                     strcatbuff(lien,"/");    // ajouter slash
  1903.                   }
  1904.                 }
  1905. #endif
  1906.                 
  1907.                 // APPLET CODE="mixer.MixerApplet.class" --> APPLET CODE="mixer/MixerApplet.class"
  1908.                 // yes, this is dirty
  1909.                 // but I'm so lazzy..
  1910.                 // and besides the java "code" convention is really a pain in html code
  1911.                 if (p_type==-1) {
  1912.                   char* a=strrchr(lien,'.');
  1913.                   add_class_dots_to_patch=0;
  1914.                   if (a) {
  1915.                     char* b;
  1916.                     do {
  1917.                       b=strchr(lien,'.');
  1918.                       if ((b != a) && (b)) {
  1919.                         add_class_dots_to_patch++;
  1920.                         *b='/';
  1921.                       }
  1922.                     } while((b != a) && (b));
  1923.                   }
  1924.                 }
  1925.  
  1926.                 // Θliminer les Θventuels :80 (port par dΘfaut!)
  1927.                 if (link_has_authority(lien)) {
  1928.                   char * a;
  1929.                   a=strstr(lien,"//");    // "//" authority
  1930.                   if (a)
  1931.                     a+=2;
  1932.                   else
  1933.                     a=lien;
  1934.                   // while((*a) && (*a!='/') && (*a!=':')) a++;
  1935.                   a=jump_toport(a);
  1936.                   if (a) {  // port
  1937.                     int port=0;
  1938.                     int defport=80;
  1939.                     char* b=a+1;
  1940. #if HTS_USEOPENSSL
  1941.                     // FIXME
  1942.                     //if (strfield(adr, "https:")) {
  1943.                     //}
  1944. #endif
  1945.                     while(isdigit((unsigned char)*b)) { port*=10; port+=(int) (*b-'0'); b++; }
  1946.                     if (port==defport) {  // port 80, default - c'est dΘbile
  1947.                       char tempo[HTS_URLMAXSIZE*2];
  1948.                       tempo[0]='\0';
  1949.                       strncatbuff(tempo,lien,(int) (a - lien));
  1950.                       strcatbuff(tempo,a+3);  // sauter :80
  1951.                       strcpybuff(lien,tempo);
  1952.                     }
  1953.                   }
  1954.                 }
  1955.                 
  1956.                 // filtrer les parazites (mailto & cie)
  1957.                 /*
  1958.                 if (strfield(lien,"mailto:")) {  // ne pas traiter
  1959.                 error=1;
  1960.                 } else if (strfield(lien,"news:")) {  // ne pas traiter
  1961.                 error=1;
  1962.                 }
  1963.                 */
  1964.                 
  1965.                 // vΘrifier que l'on ne doit pas ajouter de .class
  1966.                 if (!error) {
  1967.                   if (add_class) {
  1968.                     char *a = lien+strlen(lien)-1;
  1969.                     while(( a > lien) && (*a!='/') && (*a!='.')) a--;
  1970.                     if (*a != '.')
  1971.                       strcatbuff(lien,".class");    // ajouter .class
  1972.                     else if (!strfield2(a,".class"))
  1973.                       strcatbuff(lien,".class");    // idem
  1974.                   }
  1975.                 }
  1976.                 
  1977.                 // si c'est un chemin, alors vΘrifier (toto/toto.html -> http://www/toto/)
  1978.                 if (!error) {
  1979.                   if ((opt->debug>1) && (opt->log!=NULL)) {
  1980.                     fspc(opt->log,"debug"); fprintf(opt->log,"position link check %s"LF,lien); test_flush;
  1981.                   }
  1982.                   
  1983.                   if ((p_type==2) || (p_type==-2)) {   // code ou codebase                        
  1984.                     // VΘrifier les codebase=applet (au lieu de applet/)
  1985.                     if (p_type==-2) {    // codebase
  1986.                       if (strnotempty(lien)) {
  1987.                         if (fil[strlen(lien)-1]!='/') {  // pas rΘpertoire
  1988.                           strcatbuff(lien,"/");
  1989.                         }
  1990.                       }
  1991.                     }
  1992.  
  1993.                     /* base has always authority */
  1994.                     if (p_type==2 && !link_has_authority(lien)) {
  1995.                       char tmp[HTS_URLMAXSIZE*2];
  1996.                       strcpybuff(tmp, "http://");
  1997.                       strcatbuff(tmp, lien);
  1998.                       strcpybuff(lien, tmp);
  1999.                     }
  2000.  
  2001.                     /* only one ending / (bug on some pages) */
  2002.                     if ((int)strlen(lien)>2) {
  2003.                       int len = (int) strlen(lien);
  2004.                       while(len > 1 && lien[len-1] == '/' && lien[len-2] == '/' )    /* double // (bug) */
  2005.                         lien[--len]='\0';
  2006.                     }
  2007.                     // copier nom host si besoin est
  2008.                     if (!link_has_authority(lien)) {  // pas de http://
  2009.                       char adr2[HTS_URLMAXSIZE*2],fil2[HTS_URLMAXSIZE*2];  // ** euh ident_url_relatif??
  2010.                       if (ident_url_relatif(lien,urladr,urlfil,adr2,fil2)<0) {                        
  2011.                         error=1;
  2012.                       } else {
  2013.                         strcpybuff(lien,"http://");
  2014.                         strcatbuff(lien,adr2);
  2015.                         if (*fil2!='/')
  2016.                           strcatbuff(lien,"/");
  2017.                         strcatbuff(lien,fil2);
  2018.                         {
  2019.                           char* a;
  2020.                           a=lien+strlen(lien)-1;
  2021.                           while((*a) && (*a!='/') && ( a> lien)) a--;
  2022.                           if (*a=='/') {
  2023.                             *(a+1)='\0';
  2024.                           }
  2025.                         }
  2026.                         //char tempo[HTS_URLMAXSIZE*2];
  2027.                         //strcpybuff(tempo,"http://");
  2028.                         //strcatbuff(tempo,urladr);    // host
  2029.                         //if (*lien!='/')
  2030.                         //  strcatbuff(tempo,"/");
  2031.                         //strcatbuff(tempo,lien);
  2032.                         //strcpybuff(lien,tempo);
  2033.                       }
  2034.                     }
  2035.                     
  2036.                     if (!error) {  // pas d'erreur?
  2037.                       if (p_type==2) {   // code ET PAS codebase      
  2038.                         char* a=lien+strlen(lien)-1;
  2039.                         while( (a > lien) && (*a) && (*a!='/')) a--;
  2040.                         if (*a=='/')     // ok on a repΘrΘ le dernier /
  2041.                           *(a+1)='\0';   // couper
  2042.                         else {
  2043.                           *lien='\0';    // Θliminer
  2044.                           error=1;   // erreur, ne pas poursuivre
  2045.                         }      
  2046.                       }
  2047.                       
  2048.                       // stocker base ou codebase?
  2049.                       switch(p_type) {
  2050.                       case 2: { 
  2051.                         //if (*lien!='/') strcatbuff(base,"/");
  2052.                         strcpybuff(base,lien);
  2053.                               }
  2054.                         break;      // base
  2055.                       case -2: {
  2056.                         //if (*lien!='/') strcatbuff(codebase,"/");
  2057.                         strcpybuff(codebase,lien); 
  2058.                                }
  2059.                         break;  // base
  2060.                       }
  2061.                       
  2062.                       if ((opt->debug>1) && (opt->log!=NULL)) {
  2063.                         fspc(opt->log,"debug"); fprintf(opt->log,"code/codebase link %s base %s"LF,lien,base); test_flush;
  2064.                       }
  2065.                       //printf("base code: %s - %s\n",lien,base);
  2066.                     }
  2067.                     
  2068.                   } else {
  2069.                     char* _base;
  2070.                     if (p_type==-1)   // code (applet)
  2071.                       _base=codebase;
  2072.                     else
  2073.                       _base=base;
  2074.                     
  2075.                     
  2076.                     // ajouter chemin de base href..
  2077.                     if (strnotempty(_base)) {       // considΘrer base
  2078.                       if (!link_has_authority(lien)) {    // non absolue
  2079.                         if (*lien!='/') {           // non absolu sur le site (/)
  2080.                           if ( ((int) strlen(_base)+(int) strlen(lien))<HTS_URLMAXSIZE) {
  2081.                             // mailto: and co: do NOT add base
  2082.                             if (ident_url_relatif(lien,urladr,urlfil,adr,fil)>=0) {
  2083.                               char tempo[HTS_URLMAXSIZE*2];
  2084.                               // base est absolue
  2085.                               strcpybuff(tempo,_base);
  2086.                               strcatbuff(tempo,lien + ((*lien=='/')?1:0) );
  2087.                               strcpybuff(lien,tempo);        // patcher en considΘrant base
  2088.                               // ** vΘrifier que ../ fonctionne (ne doit pas arriver mais bon..)
  2089.                               
  2090.                               if ((opt->debug>1) && (opt->log!=NULL)) {
  2091.                                 fspc(opt->log,"debug"); fprintf(opt->log,"link modified with code/codebase %s"LF,lien); test_flush;
  2092.                               }
  2093.                             }
  2094.                           } else {
  2095.                             error=1;    // erreur
  2096.                             if (opt->errlog) {
  2097.                               fspc(opt->errlog,"error"); fprintf(opt->errlog,"Link %s too long with base href"LF,lien);
  2098.                               test_flush;
  2099.                             }
  2100.                           }
  2101.                         } else {
  2102.                           char badr[HTS_URLMAXSIZE*2], bfil[HTS_URLMAXSIZE*2];
  2103.                           if (ident_url_absolute(_base, badr, bfil) >=0 ) {
  2104.                             if ( ((int) strlen(badr)+(int) strlen(lien)) < HTS_URLMAXSIZE) {
  2105.                               char tempo[HTS_URLMAXSIZE*2];
  2106.                               // base est absolue
  2107.                               tempo[0] = '\0';
  2108.                               if (!link_has_authority(badr)) {
  2109.                                 strcatbuff(tempo, "http://");
  2110.                               }
  2111.                               strcatbuff(tempo,badr);
  2112.                               strcatbuff(tempo,lien);
  2113.                               strcpybuff(lien,tempo);        // patcher en considΘrant base
  2114.                               
  2115.                               if ((opt->debug>1) && (opt->log!=NULL)) {
  2116.                                 fspc(opt->log,"debug"); fprintf(opt->log,"link modified with code/codebase %s"LF,lien); test_flush;
  2117.                               }
  2118.                             } else {
  2119.                               error=1;    // erreur
  2120.                               if (opt->errlog) {
  2121.                                 fspc(opt->errlog,"error"); fprintf(opt->errlog,"Link %s too long with base href"LF,lien);
  2122.                                 test_flush;
  2123.                               }
  2124.                             }
  2125.                           }
  2126.                         }
  2127.                       }
  2128.                     }
  2129.                     
  2130.                     
  2131.                   }
  2132.                 }
  2133.                 
  2134.                 
  2135.                 // transformer lien quelconque (http, relatif, etc) en une adresse
  2136.                 // et un chemin+fichier (adr,fil)
  2137.                 if (!error) {
  2138.                   int reponse;
  2139.                   if ((opt->debug>1) && (opt->log!=NULL)) {
  2140.                     fspc(opt->log,"debug"); fprintf(opt->log,"build relative link %s with %s%s"LF,lien,relativeurladr,relativeurlfil); test_flush;
  2141.                   }
  2142.                   if ((reponse=ident_url_relatif(lien,relativeurladr,relativeurlfil,adr,fil))<0) {                        
  2143.                     adr[0]='\0';    // erreur
  2144.                     if (reponse==-2) {
  2145.                       if (opt->errlog) {
  2146.                         fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Link %s not caught (unknown protocol)"LF,lien);
  2147.                         test_flush;
  2148.                       }
  2149.                     } else {
  2150.                       if ((opt->debug>1) && (opt->errlog!=NULL)) {
  2151.                         fspc(opt->errlog,"debug"); fprintf(opt->errlog,"ident_url_relatif failed for %s with %s%s"LF,lien,relativeurladr,relativeurlfil); test_flush;
  2152.                       }
  2153.                     }
  2154.                   } else {
  2155.                     if ((opt->debug>1) && (opt->log!=NULL)) {
  2156.                       fspc(opt->log,"debug"); fprintf(opt->log,"built relative link %s with %s%s -> %s%s"LF,lien,relativeurladr,relativeurlfil,adr,fil); test_flush;
  2157.                     }
  2158.                   }
  2159.                 } else {
  2160.                   if ((opt->debug>1) && (opt->log!=NULL)) {
  2161.                     fspc(opt->log,"debug"); fprintf(opt->log,"link %s not build, error detected before"LF,lien); test_flush;
  2162.                   }
  2163.                   adr[0]='\0';
  2164.                 }
  2165.                 
  2166. #if HTS_CHECK_STRANGEDIR
  2167.                 // !ATTENTION!
  2168.                 // Ici on teste les exotiques du genre www.truc.fr/machin (sans slash α la fin)
  2169.                 // je n'ai pas encore trouvΘ le moyen de faire la diffΘrence entre un rΘpertoire
  2170.                 // et un fichier en http A PRIORI : je fais donc un test
  2171.                 // En cas de moved xxx, on recalcule adr et fil, tout simplement
  2172.                 // DEFAUT: test effectuΘ plusieurs fois! α revoir!!!
  2173.                 if ((adr[0]!='\0') && (strcmp(adr,"file://") && (p_type!=2) && (p_type!=-2)) {
  2174.                   //## if ((adr[0]!='\0') && (adr[0]!=lOCAL_CHAR) && (p_type!=2) && (p_type!=-2)) {
  2175.                   if (fil[strlen(fil)-1]!='/') {  // pas rΘpertoire
  2176.                     if (ishtml(fil)==-2) {    // pas d'extension
  2177.                       char loc[HTS_URLMAXSIZE*2];  // Θventuelle nouvelle position
  2178.                       loc[0]='\0';
  2179.                       if ((opt->debug>1) && (opt->log!=NULL)) {
  2180.                         fspc(opt->log,"debug"); fprintf(opt->log,"link-check-directory: %s%s"LF,adr,fil);
  2181.                         test_flush;
  2182.                       }
  2183.                       
  2184.                       // tester Θventuelle nouvelle position
  2185.                       switch (http_location(adr,fil,loc).statuscode) {
  2186.                       case 200: // ok au final
  2187.                         if (strnotempty(loc)) {  // a changΘ d'adresse
  2188.                           if (opt->errlog) {
  2189.                             fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Link %s%s has moved to %s for %s%s"LF,adr,fil,loc,urladr,urlfil);
  2190.                             test_flush;
  2191.                           }
  2192.                           
  2193.                           // recalculer adr et fil!
  2194.                           if (ident_url_absolute(loc,adr,fil)==-1) {
  2195.                             adr[0]='\0';  // cancel
  2196.                             if ((opt->debug>1) && (opt->log!=NULL)) {
  2197.                               fspc(opt->log,"debug"); fprintf(opt->log,"link-check-dir: %s%s"LF,adr,fil);
  2198.                               test_flush;
  2199.                             }
  2200.                           }
  2201.                           
  2202.                         }
  2203.                         break;
  2204.                       case -2: case -3:  // timeout ou erreur grave
  2205.                         if (opt->errlog) {
  2206.                           fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Connection too slow for testing link %s%s (from %s%s)"LF,adr,fil,urladr,urlfil);
  2207.                           test_flush;
  2208.                         }
  2209.                         
  2210.                         break;
  2211.                       }
  2212.                       
  2213.                     }
  2214.                   } 
  2215.                 }
  2216. #endif
  2217.                 
  2218.                 // Le lien doit juste Ωtre rΘΘcrit, mais ne doit pas gΘnΘrer un lien
  2219.                 // exemple: <FORM ACTION="url_cgi">
  2220.                 if (p_nocatch) {
  2221.                   forbidden_url=1;    // interdire rΘcupΘration du lien
  2222.                   if ((opt->debug>1) && (opt->log!=NULL)) {
  2223.                     fspc(opt->log,"debug"); fprintf(opt->log,"link forced external at %s%s"LF,adr,fil);
  2224.                     test_flush;
  2225.                   }
  2226.                 }
  2227.                 
  2228.                 // Tester si un lien doit Ωtre acceptΘ ou refusΘ (wizard)
  2229.                 // forbidden_url=1 : lien refusΘ
  2230.                 // forbidden_url=0 : lien acceptΘ
  2231.                 //if ((ptr>0) && (p_type!=2) && (p_type!=-2)) {    // tester autorisations?
  2232.                 if ((p_type!=2) && (p_type!=-2)) {    // tester autorisations?
  2233.                   if (!p_nocatch) {
  2234.                     if (adr[0]!='\0') {          
  2235.                       if ((opt->debug>1) && (opt->log!=NULL)) {
  2236.                         fspc(opt->log,"debug"); fprintf(opt->log,"wizard link test at %s%s.."LF,adr,fil);
  2237.                         test_flush;
  2238.                       }
  2239.                       forbidden_url=hts_acceptlink(opt,ptr,lien_tot,liens,
  2240.                         adr,fil,
  2241.                         NULL, NULL,
  2242.                         &set_prio_to,
  2243.                         &just_test_it);
  2244.                       if ((opt->debug>1) && (opt->log!=NULL)) {
  2245.                         fspc(opt->log,"debug"); fprintf(opt->log,"result for wizard link test: %d"LF,forbidden_url);
  2246.                         test_flush;
  2247.                       }
  2248.                     }
  2249.                   }
  2250.                 }
  2251.                 
  2252.                 // calculer meme_adresse
  2253.                 meme_adresse=strfield2(jump_identification(adr),jump_identification(urladr));
  2254.                 
  2255.                 
  2256.                 
  2257.                 // DΘbut partie sauvegarde
  2258.                 
  2259.                 // ici on forme le nom du fichier α sauver, et on patche l'URL
  2260.                 if (adr[0]!='\0') {
  2261.                   // savename: simplifier les ../ et autres joyeusetΘs
  2262.                   char save[HTS_URLMAXSIZE*2];
  2263.                   int r_sv=0;
  2264.                   // En cas de moved, adresse premiΦre
  2265.                   char former_adr[HTS_URLMAXSIZE*2];
  2266.                   char former_fil[HTS_URLMAXSIZE*2];
  2267.                   //
  2268.                   save[0]='\0'; former_adr[0]='\0'; former_fil[0]='\0';
  2269.                   //
  2270.                   
  2271.                   // nom du chemin α sauver si on doit le calculer
  2272.                   // note: url_savename peut dΘcider de tester le lien si il le trouve
  2273.                   // suspect, et modifier alors adr et fil
  2274.                   // dans ce cas on aura une rΘfΘrence directe au lieu des traditionnels
  2275.                   // moved en cascade (impossible α reproduire α priori en local, lorsque des fichiers
  2276.                   // gif sont impliquΘs par exemple)
  2277.                   if ((p_type!=2) && (p_type!=-2)) {  // pas base href ou codebase
  2278.                     if (forbidden_url!=1) {
  2279.                       char last_adr[HTS_URLMAXSIZE*2];
  2280.                       last_adr[0]='\0';
  2281.                       //char last_fil[HTS_URLMAXSIZE*2]="";
  2282.                       strcpybuff(last_adr,adr);    // ancienne adresse
  2283.                       //strcpybuff(last_fil,fil);    // ancien chemin
  2284.                       r_sv=url_savename(adr,fil,save,former_adr,former_fil,liens[ptr]->adr,liens[ptr]->fil,opt,liens,lien_tot,back,back_max,cache,hash,ptr,numero_passe);
  2285.                       if (strcmp(jump_identification(last_adr),jump_identification(adr)) != 0) {  // a changΘ
  2286.                         
  2287.                         // 2e test si moved
  2288.                         
  2289.                         // Tester si un lien doit Ωtre acceptΘ ou refusΘ (wizard)
  2290.                         // forbidden_url=1 : lien refusΘ
  2291.                         // forbidden_url=0 : lien acceptΘ
  2292.                         if ((ptr>0) && (p_type!=2) && (p_type!=-2)) {    // tester autorisations?
  2293.                           if (!p_nocatch) {
  2294.                             if (adr[0]!='\0') {          
  2295.                               if ((opt->debug>1) && (opt->log!=NULL)) {
  2296.                                 fspc(opt->log,"debug"); fprintf(opt->log,"wizard moved link retest at %s%s.."LF,adr,fil);
  2297.                                 test_flush;
  2298.                               }
  2299.                               forbidden_url=hts_acceptlink(opt,ptr,lien_tot,liens,
  2300.                                 adr,fil,
  2301.                                 NULL, NULL,
  2302.                                 &set_prio_to,
  2303.                                 &just_test_it);
  2304.                               if ((opt->debug>1) && (opt->log!=NULL)) {
  2305.                                 fspc(opt->log,"debug"); fprintf(opt->log,"result for wizard moved link retest: %d"LF,forbidden_url);
  2306.                                 test_flush;
  2307.                               }
  2308.                             }
  2309.                           }
  2310.                         }
  2311.                         
  2312.                         //import_done=1;    // c'est un import!
  2313.                         meme_adresse=0;   // on a changΘ
  2314.                       }
  2315.                     } else {
  2316.                       strcpybuff(save,"");  // dummy
  2317.                     }
  2318.                   }
  2319.                   if (r_sv!=-1) {  // pas d'erreur, on continue
  2320.                     /* log */
  2321.                     if ((opt->debug>1) && (opt->log!=NULL)) {
  2322.                       fspc(opt->log,"debug");
  2323.                       if (forbidden_url!=1) {    // le lien va Ωtre chargΘ
  2324.                         if ((p_type==2) || (p_type==-2)) {  // base href ou codebase, pas un lien
  2325.                           fprintf(opt->log,"Code/Codebase: %s%s"LF,adr,fil);
  2326.                         } else if ((opt->getmode & 4)==0) {
  2327.                           fprintf(opt->log,"Record: %s%s -> %s"LF,adr,fil,save);
  2328.                         } else {
  2329.                           if (!ishtml(fil))
  2330.                             fprintf(opt->log,"Record after: %s%s -> %s"LF,adr,fil,save);
  2331.                           else
  2332.                             fprintf(opt->log,"Record: %s%s -> %s"LF,adr,fil,save);
  2333.                         } 
  2334.                       } else
  2335.                         fprintf(opt->log,"External: %s%s"LF,adr,fil);
  2336.                       test_flush;
  2337.                     }
  2338.                     /* FIN log */
  2339.                     
  2340.                     // Θcrire lien
  2341.                     if ((p_type==2) || (p_type==-2)) {  // base href ou codebase, sauter
  2342.                       lastsaved=eadr-1+1;  // sauter "
  2343.                     }
  2344.                     /* */
  2345.                     else if (opt->urlmode==0) {    // URL absolue dans tous les cas
  2346.                       if ((opt->getmode & 1) && (ptr>0)) {    // ecrire les html
  2347.                         if (!link_has_authority(adr)) {
  2348.                           HT_ADD("http://");
  2349.                         } else {
  2350.                           char* aut = strstr(adr, "//");
  2351.                           if (aut) {
  2352.                             char tmp[256];
  2353.                             tmp[0]='\0';
  2354.                             strncatbuff(tmp, adr, (int) (aut - adr));   // scheme
  2355.                             HT_ADD(tmp);          // Protocol
  2356.                             HT_ADD("//");
  2357.                           }
  2358.                         }
  2359.                         
  2360.                         if (!opt->passprivacy) {
  2361.                           HT_ADD(jump_protocol(adr));           // Password
  2362.                         } else {
  2363.                           HT_ADD(jump_identification(adr));     // No Password
  2364.                         }
  2365.                         if (*fil!='/')
  2366.                           HT_ADD("/");
  2367.                         HT_ADD(fil);
  2368.                       }
  2369.                       lastsaved=eadr-1;    // dernier Θcrit+1 (enfin euh apres on fait un ++ alors hein)
  2370.                       /* */
  2371.                     } else if (opt->urlmode >= 4) {    // ne rien faire dans tous les cas!
  2372.                       /* */
  2373.                       /* leave the link 'as is' */
  2374.                       /* Sinon, dΘpend de interne/externe */
  2375.                     } else if (forbidden_url==1) {    // le lien ne sera pas chargΘ, rΘfΘrence externe!
  2376.                       if ((opt->getmode & 1) && (ptr>0)) {
  2377.                         if (p_type!=-1) {     // pas que le nom de fichier (pas classe java)
  2378.                           if (!opt->external) {
  2379.                             if (!link_has_authority(adr)) {
  2380.                               HT_ADD("http://");
  2381.                               if (!opt->passprivacy) {
  2382.                                 HT_ADD(adr);     // Password
  2383.                               } else {
  2384.                                 HT_ADD(jump_identification(adr));     // No Password
  2385.                               }
  2386.                               if (*fil!='/')
  2387.                                 HT_ADD("/");
  2388.                               HT_ADD(fil);
  2389.                             } else {
  2390.                               char* aut = strstr(adr, "//");
  2391.                               if (aut) {
  2392.                                 char tmp[256];
  2393.                                 tmp[0]='\0';
  2394.                                 strncatbuff(tmp, adr, (int) (aut - adr));   // scheme
  2395.                                 HT_ADD(tmp);          // Protocol
  2396.                                 HT_ADD("//");
  2397.                                 if (!opt->passprivacy) {
  2398.                                   HT_ADD(jump_protocol(adr));          // Password
  2399.                                 } else {
  2400.                                   HT_ADD(jump_identification(adr));     // No Password
  2401.                                 }
  2402.                                 if (*fil!='/')
  2403.                                   HT_ADD("/");
  2404.                                 HT_ADD(fil);
  2405.                               }
  2406.                             }
  2407.                             //
  2408.                           } else {    // fichier/page externe, mais on veut gΘnΘrer une erreur
  2409.                             //
  2410.                             int patch_it=0;
  2411.                             int add_url=0;
  2412.                             char* cat_name=NULL;
  2413.                             char* cat_data=NULL;
  2414.                             int cat_nb=0;
  2415.                             int cat_data_len=0;
  2416.                             
  2417.                             // ajouter lien external
  2418.                             switch ( (link_has_authority(adr)) ? 1 : ( (fil[strlen(fil)-1]=='/')?1:(ishtml(fil))  ) ) {
  2419.                             case 1: case -2:       // html ou rΘpertoire
  2420.                               if (opt->getmode & 1) {  // sauver html
  2421.                                 patch_it=1;   // redirect
  2422.                                 add_url=1;    // avec link?
  2423.                                 cat_name="external.html";
  2424.                                 cat_nb=0;
  2425.                                 cat_data=HTS_DATA_UNKNOWN_HTML;
  2426.                                 cat_data_len=HTS_DATA_UNKNOWN_HTML_LEN;
  2427.                               }
  2428.                               break;
  2429.                             default:    // inconnu
  2430.                               // asp, cgi..
  2431.                               if ( (strfield2(fil+max(0,(int)strlen(fil)-4),".gif")) 
  2432.                                 || (strfield2(fil+max(0,(int)strlen(fil)-4),".jpg")) 
  2433.                                 || (strfield2(fil+max(0,(int)strlen(fil)-4),".xbm")) 
  2434.                                 /*|| (ishtml(fil)!=0)*/ ) {
  2435.                                 patch_it=1;   // redirect
  2436.                                 add_url=1;    // avec link aussi
  2437.                                 cat_name="external.gif";
  2438.                                 cat_nb=1;
  2439.                                 cat_data=HTS_DATA_UNKNOWN_GIF;
  2440.                                 cat_data_len=HTS_DATA_UNKNOWN_GIF_LEN;
  2441.                               } else /* if (is_dyntype(get_ext(fil))) */ {
  2442.                                 patch_it=1;   // redirect
  2443.                                 add_url=1;    // avec link?
  2444.                                 cat_name="external.html";
  2445.                                 cat_nb=0;
  2446.                                 cat_data=HTS_DATA_UNKNOWN_HTML;
  2447.                                 cat_data_len=HTS_DATA_UNKNOWN_HTML_LEN;
  2448.                               }
  2449.                               break;
  2450.                             }// html,gif
  2451.                             
  2452.                             if (patch_it) {
  2453.                               char save[HTS_URLMAXSIZE*2];
  2454.                               char tempo[HTS_URLMAXSIZE*2];
  2455.                               strcpybuff(save,opt->path_html);
  2456.                               strcatbuff(save,cat_name);
  2457.                               if (lienrelatif(tempo,save, relativesavename)==0) {
  2458.                                 if (!no_esc_utf)
  2459.                                   escape_uri(tempo);     // escape with %xx
  2460.                                 else
  2461.                                   escape_uri_utf(tempo);     // escape with %xx
  2462.                                 HT_ADD(tempo);    // page externe
  2463.                                 if (add_url) {
  2464.                                   HT_ADD("?link=");    // page externe
  2465.                                   
  2466.                                   // same as above
  2467.                                   if (!link_has_authority(adr)) {
  2468.                                     HT_ADD("http://");
  2469.                                     if (!opt->passprivacy) {
  2470.                                       HT_ADD(adr);     // Password
  2471.                                     } else {
  2472.                                       HT_ADD(jump_identification(adr));     // No Password
  2473.                                     }
  2474.                                     if (*fil!='/')
  2475.                                       HT_ADD("/");
  2476.                                     HT_ADD(fil);
  2477.                                   } else {
  2478.                                     char* aut = strstr(adr, "//");
  2479.                                     if (aut) {
  2480.                                       char tmp[256];
  2481.                                       tmp[0]='\0';
  2482.                                       strncatbuff(tmp, adr, (int) (aut - adr) + 2);   // scheme
  2483.                                       HT_ADD(tmp);
  2484.                                       if (!opt->passprivacy) {
  2485.                                         HT_ADD(jump_protocol(adr));          // Password
  2486.                                       } else {
  2487.                                         HT_ADD(jump_identification(adr));     // No Password
  2488.                                       }
  2489.                                       if (*fil!='/')
  2490.                                         HT_ADD("/");
  2491.                                       HT_ADD(fil);
  2492.                                     }
  2493.                                   }
  2494.                                   //
  2495.                                   
  2496.                                 }
  2497.                               }
  2498.                               
  2499.                               // Θcrire fichier?
  2500.                               if (verif_external(cat_nb,1)) {
  2501.                                 //if (!fexist(fconcat(opt->path_html,cat_name))) {
  2502.                                 FILE* fp = filecreate(fconcat(opt->path_html,cat_name));
  2503.                                 if (fp) {
  2504.                                   if (cat_data_len==0) {   // texte
  2505.                                     verif_backblue(opt,opt->path_html);
  2506.                                     fprintf(fp,"%s%s","<!-- Created by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->"LF,cat_data);
  2507.                                   } else {                    // data
  2508.                                     fwrite(cat_data,cat_data_len,1,fp);
  2509.                                   }
  2510.                                   fclose(fp);
  2511.                                   usercommand(opt,0,NULL,fconcat(opt->path_html,cat_name),"","");
  2512.                                 }
  2513.                               }
  2514.                             }  else {    // Θcrire normalement le nom de fichier
  2515.                               HT_ADD("http://");
  2516.                               if (!opt->passprivacy) {
  2517.                                 HT_ADD(adr);       // Password
  2518.                               } else {
  2519.                                 HT_ADD(jump_identification(adr));       // No Password
  2520.                               }
  2521.                               if (*fil!='/')
  2522.                                 HT_ADD("/");
  2523.                               HT_ADD(fil);
  2524.                             }// patcher?
  2525.                             }  // external
  2526.                           } else {  // que le nom de fichier (classe java)
  2527.                             // en gros recopie de plus bas: copier codebase et base
  2528.                             if (p_flush) {
  2529.                               char tempo[HTS_URLMAXSIZE*2];    // <-- ajoutΘ
  2530.                               char tempo_pat[HTS_URLMAXSIZE*2];
  2531.                               
  2532.                               // Calculer chemin
  2533.                               tempo_pat[0]='\0';
  2534.                               strcpybuff(tempo,fil);  // <-- ajoutΘ
  2535.                               {
  2536.                                 char* a=strrchr(tempo,'/');
  2537.                                 
  2538.                                 // Example: we converted code="x.y.z.foo.class" into "x/y/z/foo.class"
  2539.                                 // we have to do the contrary now
  2540.                                 if (add_class_dots_to_patch>0) {
  2541.                                   while( (add_class_dots_to_patch>0) && (a) ) {
  2542.                                     *a='.';     // convert "false" java / into .
  2543.                                     add_class_dots_to_patch--;
  2544.                                     a=strrchr(tempo,'/');
  2545.                                   }
  2546.                                   // if add_class_dots_to_patch, this is because there is a problem!!
  2547.                                   if (add_class_dots_to_patch) {
  2548.                                     if (opt->errlog) {
  2549.                                       fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Error: can not rewind java path %s, check html code"LF,tempo);
  2550.                                       test_flush;
  2551.                                     }
  2552.                                   }
  2553.                                 }
  2554.                                 
  2555.                                 // Cut path/filename
  2556.                                 if (a) {
  2557.                                   char tempo2[HTS_URLMAXSIZE*2];
  2558.                                   strcpybuff(tempo2,a+1);         // FICHIER
  2559.                                   strncatbuff(tempo_pat,tempo,(int) (a - tempo)+1);  // chemin
  2560.                                   strcpybuff(tempo,tempo2);                     // fichier
  2561.                                 }
  2562.                               }
  2563.                               
  2564.                               // Θrire codebase="chemin"
  2565.                               if ((opt->getmode & 1) && (ptr>0)) {
  2566.                                 char tempo4[HTS_URLMAXSIZE*2];
  2567.                                 tempo4[0]='\0';
  2568.                                 
  2569.                                 if (strnotempty(tempo_pat)) {
  2570.                                   HT_ADD("codebase=\"http://");
  2571.                                   if (!opt->passprivacy) {
  2572.                                     HT_ADD(adr);  // Password
  2573.                                   } else {
  2574.                                     HT_ADD(jump_identification(adr));  // No Password
  2575.                                   }
  2576.                                   if (*tempo_pat!='/') HT_ADD("/");
  2577.                                   HT_ADD(tempo_pat);
  2578.                                   HT_ADD("\" ");
  2579.                                 }
  2580.                                 
  2581.                                 strncatbuff(tempo4,lastsaved,(int) (p_flush - lastsaved));
  2582.                                 HT_ADD(tempo4);    // refresh code="
  2583.                                 HT_ADD(tempo);
  2584.                               }
  2585.                             }
  2586.                           }
  2587.                         }
  2588.                         lastsaved=eadr-1;
  2589.                       }
  2590.                       /*
  2591.                       else if (opt->urlmode==1) {    // ABSOLU, c'est le cas le moins courant
  2592.                       //  NE FONCTIONNE PAS!!  (et est inutile)
  2593.                       if ((opt->getmode & 1) && (ptr>0)) {    // ecrire les html
  2594.                       // Θcrire le lien modifiΘ, absolu
  2595.                       HT_ADD("file:");
  2596.                       if (*save=='/')
  2597.                       HT_ADD(save+1)
  2598.                       else
  2599.                       HT_ADD(save)
  2600.                       }
  2601.                       lastsaved=eadr-1;    // dernier Θcrit+1 (enfin euh apres on fait un ++ alors hein)
  2602.                       }
  2603.                       */
  2604.                     else if (opt->mimehtml) {
  2605.                       char buff[HTS_URLMAXSIZE*3];
  2606.                       HT_ADD("cid:");
  2607.                       strcpybuff(buff, adr);
  2608.                       strcatbuff(buff, fil);
  2609.                       escape_in_url(buff);
  2610.                       { char* a = buff; while((a = strchr(a, '%'))) { *a = 'X'; a++; } }
  2611.                       HT_ADD(buff);
  2612.                       lastsaved=eadr-1;    // dernier Θcrit+1 (enfin euh apres on fait un ++ alors hein)
  2613.                     }
  2614.                     else if (opt->urlmode==3) {    // URI absolue /
  2615.                       if ((opt->getmode & 1) && (ptr>0)) {    // ecrire les html
  2616.                         HT_ADD(fil);
  2617.                       }
  2618.                       lastsaved=eadr-1;    // dernier Θcrit+1 (enfin euh apres on fait un ++ alors hein)
  2619.                     }
  2620.                     else if (opt->urlmode==2) {  // RELATIF
  2621.                         char tempo[HTS_URLMAXSIZE*2];
  2622.                         tempo[0]='\0';
  2623.                         // calculer le lien relatif
  2624.                         
  2625.                         if (lienrelatif(tempo,save,relativesavename)==0) {
  2626.                           if (!in_media) {    // In media (such as real audio): don't patch
  2627.                             if (!no_esc_utf)
  2628.                               escape_uri(tempo);     // escape with %xx
  2629.                             else
  2630.                               escape_uri_utf(tempo);     // escape with %xx
  2631.                           }
  2632.                           if ((opt->debug>1) && (opt->log!=NULL)) {
  2633.                             fspc(opt->log,"debug"); fprintf(opt->log,"relative link at %s build with %s and %s: %s"LF,adr,save,relativesavename,tempo);
  2634.                             test_flush;
  2635.                           }
  2636.                           
  2637.                           // lien applet (code) - il faut placer un codebase avant
  2638.                           if (p_type==-1) {  // que le nom de fichier
  2639.                             
  2640.                             if (p_flush) {
  2641.                               char tempo_pat[HTS_URLMAXSIZE*2];
  2642.                               tempo_pat[0]='\0';
  2643.                               {
  2644.                                 char* a=strrchr(tempo,'/');
  2645.                                 
  2646.                                 // Example: we converted code="x.y.z.foo.class" into "x/y/z/foo.class"
  2647.                                 // we have to do the contrary now
  2648.                                 if (add_class_dots_to_patch>0) {
  2649.                                   while( (add_class_dots_to_patch>0) && (a) ) {
  2650.                                     *a='.';     // convert "false" java / into .
  2651.                                     add_class_dots_to_patch--;
  2652.                                     a=strrchr(tempo,'/');
  2653.                                   }
  2654.                                   // if add_class_dots_to_patch, this is because there is a problem!!
  2655.                                   if (add_class_dots_to_patch) {
  2656.                                     if (opt->errlog) {
  2657.                                       fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Error: can not rewind java path %s, check html code"LF,tempo);
  2658.                                       test_flush;
  2659.                                     }
  2660.                                   }
  2661.                                 }
  2662.                                 
  2663.                                 if (a) {
  2664.                                   char tempo2[HTS_URLMAXSIZE*2];
  2665.                                   strcpybuff(tempo2,a+1);
  2666.                                   strncatbuff(tempo_pat,tempo,(int) (a - tempo)+1);  // chemin
  2667.                                   strcpybuff(tempo,tempo2);                     // fichier
  2668.                                 }
  2669.                               }
  2670.                               
  2671.                               // Θrire codebase="chemin"
  2672.                               if ((opt->getmode & 1) && (ptr>0)) {
  2673.                                 char tempo4[HTS_URLMAXSIZE*2];
  2674.                                 tempo4[0]='\0';
  2675.                                 
  2676.                                 if (strnotempty(tempo_pat)) {
  2677.                                   HT_ADD("codebase=\"");
  2678.                                   HT_ADD(tempo_pat);
  2679.                                   HT_ADD("\" ");
  2680.                                 }
  2681.                                 
  2682.                                 strncatbuff(tempo4,lastsaved,(int) (p_flush - lastsaved));
  2683.                                 HT_ADD(tempo4);    // refresh code="
  2684.                               }
  2685.                             }
  2686.                             //lastsaved=adr;    // dernier Θcrit+1
  2687.                           }                              
  2688.                           
  2689.                           if ((opt->getmode & 1) && (ptr>0)) {
  2690.                             // Θcrire le lien modifiΘ, relatif
  2691.                             HT_ADD(tempo);
  2692.                             
  2693.                             // Add query-string, for informational purpose only
  2694.                             // Useless, because all parameters-pages are saved into different targets
  2695.                             if (opt->includequery) {
  2696.                               char* a=strchr(lien,'?');
  2697.                               if (a) {
  2698.                                 HT_ADD(a);
  2699.                               }
  2700.                             }
  2701.                           }
  2702.                           lastsaved=eadr-1;    // dernier Θcrit+1 (enfin euh apres on fait un ++ alors hein)
  2703.                         } else {
  2704.                           if (opt->errlog) {
  2705.                             fprintf(opt->errlog,"Error building relative link %s and %s"LF,save,relativesavename);
  2706.                             test_flush;
  2707.                           }
  2708.                         }
  2709.                       }  // sinon le lien sera Θcrit normalement
  2710.                       
  2711.                       
  2712. #if 0
  2713.                       if (fexist(save)) {    // le fichier existe..
  2714.                         adr[0]='\0';
  2715.                         //if ((opt->debug>0) && (opt->log!=NULL)) {
  2716.                         if (opt->errlog) {
  2717.                           fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Link has already been written on disk, cancelled: %s"LF,save);
  2718.                           test_flush;
  2719.                         }
  2720.                       }
  2721. #endif                            
  2722.                       
  2723.                       /* Security check */
  2724.                       if (strlen(save) >= HTS_URLMAXSIZE) {
  2725.                         adr[0]='\0';
  2726.                         if (opt->errlog) {
  2727.                           fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Link is too long: %s"LF,save);
  2728.                           test_flush;
  2729.                         }
  2730.                       }
  2731.                       
  2732.                       if ((adr[0]!='\0') && (p_type!=2) && (p_type!=-2) && (forbidden_url!=1) ) {  // si le fichier n'existe pas, ajouter α la liste                            
  2733.                         // n'y a-t-il pas trop de liens?
  2734.                         if (lien_tot+1 >= lien_max-4) {    // trop de liens!
  2735.                           printf("PANIC! : Too many URLs : >%d [%d]\n",lien_tot,__LINE__);
  2736.                           if (opt->errlog) {
  2737.                             fprintf(opt->errlog,LF"Too many URLs, giving up..(>%d)"LF,lien_max);
  2738.                             fprintf(opt->errlog,"To avoid that: use #L option for more links (example: -#L1000000)"LF);
  2739.                             test_flush;
  2740.                           }
  2741.                           if ((opt->getmode & 1) && (ptr>0)) { if (fp) { fclose(fp); fp=NULL; } }
  2742.                           XH_uninit;   // dΘsallocation mΘmoire & buffers
  2743.                           return -1;
  2744.                           
  2745.                         } else {    // noter le lien sur la listes des liens α charger
  2746.                           int pass_fix,dejafait=0;
  2747.                           
  2748.                           // Calculer la prioritΘ de ce lien
  2749.                           if ((opt->getmode & 4)==0) {    // traiter html aprΦs
  2750.                             pass_fix=0;
  2751.                           } else {    // vΘrifier que ce n'est pas un !html
  2752.                             if (!ishtml(fil))
  2753.                               pass_fix=1;        // prioritΘ infΘrieure (traiter aprΦs)
  2754.                             else
  2755.                               pass_fix=max(0,numero_passe);    // prioritΘ normale
  2756.                           }
  2757.                           
  2758.                           /* If the file seems to be an html file, get depth-1 */
  2759.                           /*
  2760.                           if (strnotempty(save)) {
  2761.                           if (ishtml(save) == 1) {
  2762.                           // descore_prio = 2;
  2763.                           } else {
  2764.                           // descore_prio = 1;
  2765.                           }
  2766.                           }
  2767.                           */
  2768.                           
  2769.                           // vΘrifier que le lien n'a pas dΘja ΘtΘ notΘ
  2770.                           // si c'est le cas, alors il faut s'assurer que la prioritΘ associΘe
  2771.                           // au fichier est la plus grande des deux prioritΘs
  2772.                           //
  2773.                           // On part de la fin et on essaye de se presser (Θconomise temps machine)
  2774. #if HTS_HASH
  2775.                           {
  2776.                             int i=hash_read(hash,save,"",0,opt->urlhack);      // lecture type 0 (sav)
  2777.                             if (i>=0) {
  2778.                               if ((opt->debug>1) && (opt->log!=NULL)) {
  2779.                                 if (
  2780.                                   strcmp(adr, liens[i]->adr) != 0 
  2781.                                   || strcmp(fil, liens[i]->fil) != 0
  2782.                                   ) {
  2783.                                   fspc(opt->log,"debug"); fprintf(opt->log,"merging similar links %s%s and %s%s"LF,adr,fil,liens[i]->adr,liens[i]->fil);
  2784.                                   test_flush;
  2785.                                 }
  2786.                               }
  2787.                               liens[i]->depth=maximum(liens[i]->depth,liens[ptr]->depth - 1);
  2788.                               dejafait=1;
  2789.                             }
  2790.                           }
  2791. #else
  2792.                           {
  2793.                             int l;
  2794.                             int i;
  2795.                             l=strlen(save);  // opti
  2796.                             for(i=lien_tot-1;(i>=0) && (dejafait==0);i--) {
  2797.                               if (liens[i]->sav_len==l) {    // mΩme taille de chaεne
  2798.                                 if (strcmp(liens[i]->sav,save)==0) {    // existe dΘja
  2799.                                   liens[i]->depth=maximum(liens[i]->depth,liens[ptr]->depth - 1);
  2800.                                   dejafait=1;
  2801.                                 }
  2802.                               }
  2803.                             }
  2804.                           }
  2805. #endif
  2806.                           
  2807.                           // le lien n'a jamais ΘtΘ crΘΘ.
  2808.                           // cette fois ci, on le crΘe!
  2809.                           if (!dejafait) {                                
  2810.                             //
  2811.                             // >>>> CREER LE LIEN <<<<
  2812.                             //
  2813.                             // enregistrer lien α charger
  2814.                             //liens[lien_tot]->adr[0]=liens[lien_tot]->fil[0]=liens[lien_tot]->sav[0]='\0';
  2815.                             // mΩme adresse: l'objet pΦre est l'objet pΦre de l'actuel
  2816.                             
  2817.                             // DEBUT ROBOTS.TXT AJOUT
  2818.                             if (!just_test_it) {
  2819.                               if (
  2820.                                 (!strfield(adr,"ftp://"))         // non ftp
  2821.                                 && (!strfield(adr,"file://")) ) {    // non file
  2822.                                 if (opt->robots) {    // rΘcupΘrer robots
  2823.                                   if (ishtml(fil)!=0) {                       // pas la peine pour des fichiers isolΘs
  2824.                                     if (checkrobots(_ROBOTS,adr,"") != -1) {    // robots.txt ?
  2825.                                       checkrobots_set(_ROBOTS ,adr,"");          // ajouter entrΘe vide
  2826.                                       if (checkrobots(_ROBOTS,adr,"") == -1) {    // robots.txt ?
  2827.                                         // enregistrer robots.txt (MACRO)
  2828.                                         liens_record(adr,"/robots.txt","","","");
  2829.                                         if (liens[lien_tot]==NULL) {  // erreur, pas de place rΘservΘe
  2830.                                           printf("PANIC! : Not enough memory [%d]\n",__LINE__);
  2831.                                           if (opt->errlog) { 
  2832.                                             fprintf(opt->errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url)));
  2833.                                             test_flush;
  2834.                                           }
  2835.                                           if ((opt->getmode & 1) && (ptr>0)) { if (fp) { fclose(fp); fp=NULL; } }
  2836.                                           XH_uninit;    // dΘsallocation mΘmoire & buffers
  2837.                                           return -1;
  2838.                                         }  
  2839.                                         liens[lien_tot]->testmode=0;          // pas mode test
  2840.                                         liens[lien_tot]->link_import=0;       // pas mode import     
  2841.                                         liens[lien_tot]->premier=lien_tot;
  2842.                                         liens[lien_tot]->precedent=ptr;
  2843.                                         liens[lien_tot]->depth=0;
  2844.                                         liens[lien_tot]->pass2=max(0,numero_passe);
  2845.                                         liens[lien_tot]->retry=0;
  2846.                                         lien_tot++;  // UN LIEN DE PLUS
  2847. #if DEBUG_ROBOTS
  2848.                                         printf("robots.txt: added file robots.txt for %s\n",adr);
  2849. #endif
  2850.                                         if ((opt->debug>1) && (opt->log!=NULL)) {
  2851.                                           fspc(opt->log,"debug"); fprintf(opt->log,"robots.txt added at %s"LF,adr);
  2852.                                           test_flush;
  2853.                                         }
  2854.                                       } else {
  2855.                                         if (opt->errlog) {   
  2856.                                           fprintf(opt->errlog,"Unexpected robots.txt error at %d"LF,__LINE__);
  2857.                                           test_flush;
  2858.                                         }
  2859.                                       }
  2860.                                     }
  2861.                                   }
  2862.                                 }
  2863.                               }
  2864.                             }
  2865.                             // FIN ROBOTS.TXT AJOUT
  2866.                             
  2867.                             // enregistrer (MACRO)
  2868.                             liens_record(adr,fil,save,former_adr,former_fil);
  2869.                             if (liens[lien_tot]==NULL) {  // erreur, pas de place rΘservΘe
  2870.                               printf("PANIC! : Not enough memory [%d]\n",__LINE__);
  2871.                               if (opt->errlog) { 
  2872.                                 fprintf(opt->errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url)));
  2873.                                 test_flush;
  2874.                               }
  2875.                               if ((opt->getmode & 1) && (ptr>0)) { if (fp) { fclose(fp); fp=NULL; } }
  2876.                               XH_uninit;    // dΘsallocation mΘmoire & buffers
  2877.                               return -1;
  2878.                             }  
  2879.                             
  2880.                             // mode test?
  2881.                             if (!just_test_it)
  2882.                               liens[lien_tot]->testmode=0;          // pas mode test
  2883.                             else
  2884.                               liens[lien_tot]->testmode=1;          // mode test
  2885.                             if (!import_done)
  2886.                               liens[lien_tot]->link_import=0;       // pas mode import
  2887.                             else
  2888.                               liens[lien_tot]->link_import=1;       // mode import
  2889.                             // Θcrire autres paramΦtres de la structure-lien
  2890.                             if ((meme_adresse) && (!import_done) && (liens[ptr]->premier != 0))
  2891.                               liens[lien_tot]->premier=liens[ptr]->premier;
  2892.                             else    // sinon l'objet pΦre est le prΘcΘdent lui mΩme
  2893.                               liens[lien_tot]->premier=lien_tot;
  2894.                             // liens[lien_tot]->premier=ptr;
  2895.                             
  2896.                             liens[lien_tot]->precedent=ptr;
  2897.                             // noter la prioritΘ
  2898.                             if (!set_prio_to)
  2899.                               liens[lien_tot]->depth=liens[ptr]->depth - 1;
  2900.                             else
  2901.                               liens[lien_tot]->depth=max(0,min(liens[ptr]->depth-1,set_prio_to-1));         // PRIORITE NULLE (catch page)
  2902.                             // noter pass
  2903.                             liens[lien_tot]->pass2=pass_fix;
  2904.                             liens[lien_tot]->retry=opt->retry;
  2905.                             
  2906.                             //strcpybuff(liens[lien_tot]->adr,adr);
  2907.                             //strcpybuff(liens[lien_tot]->fil,fil);
  2908.                             //strcpybuff(liens[lien_tot]->sav,save); 
  2909.                             if ((opt->debug>1) && (opt->log!=NULL)) {
  2910.                               if (!just_test_it) {
  2911.                                 fspc(opt->log,"debug"); fprintf(opt->log,"OK, NOTE: %s%s -> %s"LF,liens[lien_tot]->adr,liens[lien_tot]->fil,liens[lien_tot]->sav);
  2912.                               } else {
  2913.                                 fspc(opt->log,"debug"); fprintf(opt->log,"OK, TEST: %s%s"LF,liens[lien_tot]->adr,liens[lien_tot]->fil);
  2914.                               }
  2915.                               test_flush;
  2916.                             }
  2917.                             
  2918.                             lien_tot++;  // UN LIEN DE PLUS
  2919.                           } else { // if !dejafait
  2920.                             if ((opt->debug>1) && (opt->log!=NULL)) {
  2921.                               fspc(opt->log,"debug"); fprintf(opt->log,"link has already been recorded, cancelled: %s"LF,save);
  2922.                               test_flush;
  2923.                             }
  2924.                             
  2925.                           }
  2926.                           
  2927.                           
  2928.                         }   // si pas trop de liens
  2929.                       }   // si adr[0]!='\0'
  2930.                       
  2931.                       
  2932.                     }  // if adr[0]!='\0' 
  2933.                     
  2934.                   }  // if adr[0]!='\0'
  2935.                   
  2936.                 }    // if strlen(lien)>0
  2937.                 
  2938.               }   // if ok==0      
  2939.  
  2940.               assertf(eadr - adr >= 0);       // Should not go back
  2941.               if (eadr > adr) {
  2942.                 INCREMENT_CURRENT_ADR(eadr - 1 - adr);
  2943.               }
  2944.               // adr=eadr-1;  // ** sauter
  2945.  
  2946.               /* We skipped bytes and skip the " : reset state */
  2947.               /*if (inscript) {
  2948.                 inscript_state_pos = INSCRIPT_START;
  2949.               }*/
  2950.  
  2951.             }  // if (p) 
  2952.             
  2953.           }  // si '<' ou '>'
  2954.           
  2955.           // plus loin
  2956.           adr++;      // automate will be checked next loop
  2957.           
  2958.           
  2959.           /* Otimization: if we are scanning in HTML data (not in tag or script), 
  2960.           then jump to the next starting tag */
  2961.           if (ptr>0) {
  2962.             if ( (!intag)         /* Not in tag */
  2963.               && (!inscript)      /* Not in (java)script */
  2964.               && (!in_media)      /* Not in media */
  2965.               && (!incomment)     /* Not in comment (<!--) */
  2966.               && (!inscript_tag)  /* Not in tag with script inside */
  2967.               ) 
  2968.             {
  2969.               /* Not at the end */
  2970.               if (( ((int) (adr - r->adr)) ) < r->size) {
  2971.                 /* Not on a starting tag yet */
  2972.                 if (*adr != '<') {
  2973.                   /* strchr does not well behave with null chrs.. */
  2974.                   /* char* adr_next = strchr(adr,'<'); */
  2975.                   char* adr_next = adr;
  2976.                   while(*adr_next != '<' && (adr_next - r->adr) < r->size ) {
  2977.                     adr_next++;
  2978.                   }
  2979.                   /* Jump to near end (index hack) */
  2980.                   if (!adr_next || *adr_next != '<') {
  2981.                     if (
  2982.                       ( (int)(adr - r->adr) < (r->size - 4)) 
  2983.                       &&
  2984.                       (r->size > 4)
  2985.                       ) {
  2986.                       adr = r->adr + r->size - 2;
  2987.                     }
  2988.                   } else {
  2989.                     adr = adr_next;
  2990.                   }
  2991.                 }
  2992.               }
  2993.             }
  2994.           }
  2995.           
  2996.           // ----------
  2997.           // Θcrire peu α peu
  2998.           if ((opt->getmode & 1) && (ptr>0)) HT_ADD_ADR;
  2999.           lastsaved=adr;    // dernier Θcrit+1
  3000.           // ----------
  3001.  
  3002.           // Checks
  3003.           if (back_add_stats != opt->state.back_add_stats) {
  3004.             back_add_stats = opt->state.back_add_stats;
  3005.  
  3006.             // Check max time
  3007.             if (!back_checkmirror(opt)) {
  3008.               adr = r->adr + r->size;
  3009.             }
  3010.           }
  3011.  
  3012.           // pour les stats du shell si parsing trop long
  3013. #if HTS_ANALYSTE
  3014.           if (r->size)
  3015.             _hts_in_html_done=(100 * ((int) (adr - r->adr)) ) / (int)(r->size);
  3016.           if (_hts_in_html_poll) {
  3017.             _hts_in_html_poll=0;
  3018.             // temps α attendre, et remplir autant que l'on peut le cache (backing)
  3019.             back_wait(back,back_max,opt,cache,HTS_STAT.stat_timestart);        
  3020.             back_fillmax(back,back_max,opt,cache,liens,ptr,numero_passe,lien_tot);
  3021.             
  3022.             // Transfer rate
  3023.             engine_stats();
  3024.             
  3025.             // Refresh various stats
  3026.             HTS_STAT.stat_nsocket=back_nsoc(back,back_max);
  3027.             HTS_STAT.stat_errors=fspc(NULL,"error");
  3028.             HTS_STAT.stat_warnings=fspc(NULL,"warning");
  3029.             HTS_STAT.stat_infos=fspc(NULL,"info");
  3030.             HTS_STAT.nbk=backlinks_done(liens,lien_tot,ptr);
  3031.             HTS_STAT.nb=back_transfered(HTS_STAT.stat_bytes,back,back_max);
  3032.             
  3033.             if (!hts_htmlcheck_loop(back,back_max,0,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)) {
  3034.               if (opt->errlog) {
  3035.                 fspc(opt->errlog,"info"); fprintf(opt->errlog,"Exit requested by shell or user"LF);
  3036.                 test_flush;
  3037.               } 
  3038.               *stre->exit_xh_=1;  // exit requested
  3039.               XH_uninit;
  3040.               return -1;
  3041.               //adr = r->adr + r->size;  // exit
  3042.             } else if (_hts_cancel==1) {
  3043.               // adr = r->adr + r->size;  // exit
  3044.               nofollow=1;               // moins violent
  3045.               _hts_cancel=0;
  3046.             }
  3047.           }
  3048.           
  3049.           // refresh the backing system each 2 seconds
  3050.           if (engine_stats()) {
  3051.             back_wait(back,back_max,opt,cache,HTS_STAT.stat_timestart);        
  3052.             back_fillmax(back,back_max,opt,cache,liens,ptr,numero_passe,lien_tot);
  3053.           }
  3054. #endif
  3055.         } while(( ((int) (adr - r->adr)) ) < r->size);
  3056. #if HTS_ANALYSTE
  3057.         _hts_in_html_parsing=0;  // flag
  3058.         _hts_cancel=0;           // pas de cancel
  3059. #endif
  3060.         if ((opt->getmode & 1) && (ptr>0)) {
  3061.           HT_ADD_END;    // achever
  3062.         }
  3063.         //
  3064.         //
  3065.         //
  3066.       }  // if !error
  3067.       
  3068.       
  3069.       if (opt->getmode & 1) { if (fp) { fclose(fp); fp=NULL; } }
  3070.       // sauver fichier
  3071.       //structcheck(savename);
  3072.       //filesave(opt,r->adr,r->size,savename);
  3073.       
  3074. #if HTS_ANALYSTE
  3075.     }  // analyse OK
  3076. #endif
  3077.  
  3078.     /* Apply changes */
  3079.     ENGINE_SAVE_CONTEXT();
  3080.     
  3081.     return 0;
  3082. }
  3083.  
  3084.  
  3085.  
  3086.  
  3087. /*
  3088.   Check 301, 302, .. statuscodes (moved)
  3089. */
  3090. int hts_mirror_check_moved(htsmoduleStruct* str, htsmoduleStructExtended* stre) {
  3091.   /* Load engine variables */
  3092.   ENGINE_LOAD_CONTEXT();  
  3093.   
  3094.   // DEBUT rattrapage des 301,302,307..
  3095.   // ------------------------------------------------------------
  3096.   if (!error) {
  3097.     ////////{
  3098.     // on a chargΘ un fichier en plus
  3099.     // if (!error) stat_loaded+=r.size;
  3100.     
  3101.     // ------------------------------------------------------------
  3102.     // Rattrapage des 301,302,307 (moved) et 412,416 - les 304 le sont dans le backing 
  3103.     // ------------------------------------------------------------
  3104.     if ( (r->statuscode==301) 
  3105.       || (r->statuscode==302)
  3106.       || (r->statuscode==303)
  3107.       || (r->statuscode==307)
  3108.       ) {          
  3109.       //if (r->adr!=NULL) {   // adr==null si fichier direct. [catch: davename normalement si cgi]
  3110.       //int i=0;
  3111.       char *rn=NULL;
  3112.       // char* p;
  3113.       
  3114.       if ( (opt->debug>0) && (opt->errlog!=NULL) ) {
  3115.         //if (opt->errlog) {
  3116.         fspc(opt->errlog,"warning"); fprintf(opt->errlog,"%s for %s%s"LF,r->msg,urladr,urlfil);
  3117.         test_flush;
  3118.       }
  3119.       
  3120.       
  3121.       {
  3122.         char mov_url[HTS_URLMAXSIZE*2],mov_adr[HTS_URLMAXSIZE*2],mov_fil[HTS_URLMAXSIZE*2];
  3123.         int get_it=0;         // ne pas prendre le fichier α la mΩme adresse par dΘfaut
  3124.         int reponse=0;
  3125.         mov_url[0]='\0'; mov_adr[0]='\0'; mov_fil[0]='\0';
  3126.         //
  3127.         
  3128.         strcpybuff(mov_url,r->location);
  3129.         
  3130.         // url qque -> adresse+fichier
  3131.         if ((reponse=ident_url_relatif(mov_url,urladr,urlfil,mov_adr,mov_fil))>=0) {                        
  3132.           int set_prio_to=0;    // pas de priotitΘ fixΘd par wizard
  3133.           
  3134.           //if (ident_url_absolute(mov_url,mov_adr,mov_fil)!=-1) {    // ok URL reconnue
  3135.           // c'est (en gros) la mΩme URL..
  3136.           // si c'est un problΦme de casse dans le host c'est que le serveur est buggΘ
  3137.           // ("RFC says.." : host name IS case insensitive)
  3138.           if ((strfield2(mov_adr,urladr)!=0) && (strfield2(mov_fil,urlfil)!=0)) {  // identique α casse prΦs
  3139.             // on tourne en rond
  3140.             if (strcmp(mov_fil,urlfil)==0) {
  3141.               error=1;
  3142.               get_it=-1;        // ne rien faire
  3143.               if (opt->errlog) {
  3144.                 fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Can not bear crazy server (%s) for %s%s"LF,r->msg,urladr,urlfil);
  3145.                 test_flush;
  3146.               }
  3147.             } else {    // mauvaise casse, effacer entrΘe dans la pile et rejouer une fois
  3148.               get_it=1;
  3149.             }
  3150.           } else {        // adresse diffΘrente
  3151.             if (ishtml(mov_url)==0) {   // pas mΩme adresse MAIS c'est un fichier non html (pas de page moved possible)
  3152.               // -> on prend α cette adresse, le lien sera enregistrΘ avec lien_record() (hash)
  3153.               if ((opt->debug>1) && (opt->log!=NULL)) {
  3154.                 fspc(opt->log,"debug"); fprintf(opt->log,"wizard link test for moved file at %s%s.."LF,mov_adr,mov_fil);
  3155.                 test_flush;
  3156.               }
  3157.               // acceptΘ?
  3158.               if (hts_acceptlink(opt,ptr,lien_tot,liens,
  3159.                 mov_adr,mov_fil,
  3160.                 NULL, NULL,
  3161.                 &set_prio_to,
  3162.                 NULL) != 1) {                /* nouvelle adresse non refusΘe ? */
  3163.                 get_it=1;
  3164.                 if ((opt->debug>1) && (opt->log!=NULL)) {
  3165.                   fspc(opt->log,"debug"); fprintf(opt->log,"moved link accepted: %s%s"LF,mov_adr,mov_fil);
  3166.                   test_flush;
  3167.                 }
  3168.               }
  3169.             } /* sinon traitΘ normalement */
  3170.           }
  3171.           
  3172.           //if ((strfield2(mov_adr,urladr)!=0) && (strfield2(mov_fil,urlfil)!=0)) {  // identique α casse prΦs
  3173.           if (get_it==1) {
  3174.             // court-circuiter le reste du traitement
  3175.             // et reculer pour mieux sauter
  3176.             if (opt->errlog) {
  3177.               fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Warning moved treated for %s%s (real one is %s%s)"LF,urladr,urlfil,mov_adr,mov_fil);
  3178.               test_flush;
  3179.             }          
  3180.             // canceller lien actuel
  3181.             error=1;
  3182.             strcpybuff(liens[ptr]->adr,"!");  // caractΦre bidon (invalide hash)
  3183. #if HTS_HASH
  3184. #else
  3185.             liens[ptr]->sav_len=-1;       // taille invalide
  3186. #endif
  3187.             // noter NOUVEAU lien
  3188.             //xxc xxc
  3189.             //  set_prio_to=0+1;  // protection if the moved URL is an html page!!
  3190.             //xxc xxc
  3191.             {
  3192.               char mov_sav[HTS_URLMAXSIZE*2];
  3193.               // calculer lien et Θventuellement modifier addresse/fichier
  3194.               if (url_savename(mov_adr,mov_fil,mov_sav,NULL,NULL,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil,opt,liens,lien_tot,back,back_max,cache,hash,ptr,numero_passe)!=-1) { 
  3195.                 if (hash_read(hash,mov_sav,"",0,0)<0) {      // n'existe pas dΘja
  3196.                   // enregistrer lien (MACRO) avec SAV IDENTIQUE
  3197.                   liens_record(mov_adr,mov_fil,liens[ptr]->sav,"","");
  3198.                   //liens_record(mov_adr,mov_fil,mov_sav,"","");
  3199.                   if (liens[lien_tot]!=NULL) {    // OK, pas d'erreur
  3200.                     // mode test?
  3201.                     liens[lien_tot]->testmode=liens[ptr]->testmode;
  3202.                     liens[lien_tot]->link_import=0;       // mode normal
  3203.                     if (!set_prio_to)
  3204.                       liens[lien_tot]->depth=liens[ptr]->depth;
  3205.                     else
  3206.                       liens[lien_tot]->depth=max(0,min(set_prio_to-1,liens[ptr]->depth));       // PRIORITE NULLE (catch page)
  3207.                     liens[lien_tot]->pass2=max(liens[ptr]->pass2,numero_passe);
  3208.                     liens[lien_tot]->retry=liens[ptr]->retry;
  3209.                     liens[lien_tot]->premier=liens[ptr]->premier;
  3210.                     liens[lien_tot]->precedent=liens[ptr]->precedent;
  3211.                     lien_tot++;
  3212.                   } else {  // oups erreur, plus de mΘmoire!!
  3213.                     printf("PANIC! : Not enough memory [%d]\n",__LINE__);
  3214.                     if (opt->errlog) {
  3215.                       fprintf(opt->errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url)));
  3216.                       test_flush;
  3217.                     }
  3218.                     //if (opt->getmode & 1) { if (fp) { fclose(fp); fp=NULL; } }
  3219.                     XH_uninit;    // dΘsallocation mΘmoire & buffers
  3220.                     return 0;
  3221.                   }
  3222.                 } else {
  3223.                   if ( (opt->debug>0) && (opt->errlog!=NULL) ) {
  3224.                     fspc(opt->errlog,"warning"); fprintf(opt->errlog,"moving %s to an existing file %s"LF,liens[ptr]->fil,urlfil);
  3225.                     test_flush;
  3226.                   }
  3227.                 }
  3228.                 
  3229.               }
  3230.             }
  3231.             
  3232.             //printf("-> %s %s %s\n",liens[lien_tot-1]->adr,liens[lien_tot-1]->fil,liens[lien_tot-1]->sav);
  3233.             
  3234.             // note mΘtaphysique: il se peut qu'il y ait un index.html et un INDEX.HTML
  3235.             // sous DOS ca marche pas trΦs bien... mais comme je suis gΘnial url_savename()
  3236.             // est α mΩme de rΘgler ce problΦme
  3237.           }
  3238.             } // ident_url_xx
  3239.             
  3240.             if (get_it==0) {    // adresse vraiment diffΘrente et potentiellement en html (pas de possibilitΘ de bouger la page tel quel α cause des <img src..> et cie)
  3241.               rn=(char*) calloct(8192,1);
  3242.               if (rn!=NULL) {
  3243.                 if (opt->errlog) {
  3244.                   fspc(opt->errlog,"warning"); fprintf(opt->errlog,"File has moved from %s%s to %s"LF,urladr,urlfil,mov_url);
  3245.                   test_flush;
  3246.                 }
  3247.                 if (!opt->mimehtml) {
  3248.                   escape_uri(mov_url);
  3249.                 } else {
  3250.                   char buff[HTS_URLMAXSIZE*3];
  3251.                   strcpybuff(buff, mov_adr);
  3252.                   strcatbuff(buff, mov_fil);
  3253.                   escape_in_url(buff);
  3254.                   { char* a = buff; while((a = strchr(a, '%'))) { *a = 'X'; a++; } }
  3255.                   strcpybuff(mov_url, "cid:");
  3256.                   strcatbuff(mov_url, buff);
  3257.                 }
  3258.                 // On prΘpare une page qui sautera immΘdiatement sur la bonne URL
  3259.                 // Le scanner re-changera, ensuite, cette URL, pour la mirrorer!
  3260.                 strcpybuff(rn,"<HTML>"CRLF);
  3261.                 strcatbuff(rn,"<!-- Created by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->"CRLF);
  3262.                 strcatbuff(rn,"<HEAD>"CRLF"<TITLE>Page has moved</TITLE>"CRLF"</HEAD>"CRLF"<BODY>"CRLF);
  3263.                 strcatbuff(rn,"<META HTTP-EQUIV=\"Refresh\" CONTENT=\"0; URL=");
  3264.                 strcatbuff(rn,mov_url);    // URL
  3265.                 strcatbuff(rn,"\">"CRLF);
  3266.                 strcatbuff(rn,"<A HREF=\"");
  3267.                 strcatbuff(rn,mov_url);
  3268.                 strcatbuff(rn,"\">");
  3269.                 strcatbuff(rn,"<B>Click here...</B></A>"CRLF);
  3270.                 strcatbuff(rn,"</BODY>"CRLF);
  3271.                 strcatbuff(rn,"<!-- Created by HTTrack Website Copier/"HTTRACK_VERSION" "HTTRACK_AFF_AUTHORS" -->"CRLF);
  3272.                 strcatbuff(rn,"</HTML>"CRLF);
  3273.                 
  3274.                 // changer la page
  3275.                 if (r->adr) { 
  3276.                   freet(r->adr); 
  3277.                   r->adr=NULL; 
  3278.                 }
  3279.                 r->adr=rn;
  3280.                 r->size=strlen(r->adr);
  3281.                 strcpybuff(r->contenttype, "text/html");
  3282.               }
  3283.             }  // get_it==0
  3284.             
  3285.           }     // bloc
  3286.           // erreur HTTP (ex: 404, not found)
  3287.         } else if (
  3288.           (r->statuscode==412)
  3289.           || (r->statuscode==416)
  3290.           ) {    // Precondition Failed, c'est α dire pour nous redemander TOUT le fichier
  3291.           if (fexist(liens[ptr]->sav)) {
  3292.             remove(liens[ptr]->sav);    // Eliminer
  3293.             if (!fexist(liens[ptr]->sav)) {  // Bien ΘliminΘ? (sinon on boucle..)
  3294. #if HDEBUG
  3295.               printf("Partial content NOT up-to-date, reget all file for %s\n",liens[ptr]->sav);
  3296. #endif
  3297.               if ( (opt->debug>1) && (opt->errlog!=NULL) ) {
  3298.                 //if (opt->errlog) {
  3299.                 fspc(opt->errlog,"debug"); fprintf(opt->errlog,"Partial file reget (%s) for %s%s"LF,r->msg,urladr,urlfil);
  3300.                 test_flush;
  3301.               }
  3302.               // enregistrer le MEME lien (MACRO)
  3303.               liens_record(liens[ptr]->adr,liens[ptr]->fil,liens[ptr]->sav,"","");
  3304.               if (liens[lien_tot]!=NULL) {    // OK, pas d'erreur
  3305.                 liens[lien_tot]->testmode=liens[ptr]->testmode;          // mode test?
  3306.                 liens[lien_tot]->link_import=0;       // pas mode import
  3307.                 liens[lien_tot]->depth=liens[ptr]->depth;
  3308.                 liens[lien_tot]->pass2=max(liens[ptr]->pass2,numero_passe);
  3309.                 liens[lien_tot]->retry=liens[ptr]->retry;
  3310.                 liens[lien_tot]->premier=liens[ptr]->premier;
  3311.                 liens[lien_tot]->precedent=ptr;
  3312.                 lien_tot++;
  3313.                 //
  3314.                 // canceller lien actuel
  3315.                 error=1;
  3316.                 strcpybuff(liens[ptr]->adr,"!");  // caractΦre bidon (invalide hash)
  3317. #if HTS_HASH
  3318. #else
  3319.                 liens[ptr]->sav_len=-1;       // taille invalide
  3320. #endif
  3321.                 //
  3322.               } else {  // oups erreur, plus de mΘmoire!!
  3323.                 printf("PANIC! : Not enough memory [%d]\n",__LINE__);
  3324.                 if (opt->errlog) {
  3325.                   fprintf(opt->errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url)));
  3326.                   test_flush;
  3327.                 }
  3328.                 //if (opt->getmode & 1) { if (fp) { fclose(fp); fp=NULL; } }
  3329.                 XH_uninit;    // dΘsallocation mΘmoire & buffers
  3330.                 return 0;
  3331.               } 
  3332.             } else {
  3333.               if (opt->errlog!=NULL) {
  3334.                 fspc(opt->errlog,"error"); fprintf(opt->errlog,"Can not remove old file %s"LF,urlfil);
  3335.                 test_flush;
  3336.               }
  3337.             }
  3338.           } else {
  3339.             if (opt->errlog!=NULL) {
  3340.               fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Unexpected 412/416 error (%s) for %s%s"LF,r->msg,urladr,urlfil);
  3341.               test_flush;
  3342.             }
  3343.           }
  3344.         } else if (r->statuscode!=200) {
  3345.           int can_retry=0;
  3346.           
  3347.           // cas o∙ l'on peut reessayer
  3348.           // -2=timeout -3=rateout (interne α httrack)
  3349.           switch(r->statuscode) {
  3350.             //case -1: can_retry=1; break;
  3351.           case -2: if (opt->hostcontrol) {    // timeout et retry ΘpuisΘs
  3352.             if ((opt->hostcontrol & 1) && (liens[ptr]->retry<=0)) {
  3353.               if ((opt->debug>1) && (opt->log!=NULL)) {
  3354.                 fspc(opt->log,"debug"); fprintf(opt->log,"Link banned: %s%s"LF,urladr,urlfil); test_flush;
  3355.               }
  3356.               host_ban(opt,liens,ptr,lien_tot,back,back_max,jump_identification(urladr));
  3357.               if ((opt->debug>1) && (opt->log!=NULL)) {
  3358.                 fspc(opt->log,"debug"); fprintf(opt->log,"Info: previous log - link banned: %s%s"LF,urladr,urlfil); test_flush;
  3359.               }
  3360.             } else can_retry=1;
  3361.                    } else can_retry=1;
  3362.             break;
  3363.           case -3: if ((opt->hostcontrol) && (liens[ptr]->retry<=0)) {    // too slow
  3364.             if (opt->hostcontrol & 2) {
  3365.               if ((opt->debug>1) && (opt->log!=NULL)) {
  3366.                 fspc(opt->log,"debug"); fprintf(opt->log,"Link banned: %s%s"LF,urladr,urlfil); test_flush;
  3367.               }
  3368.               host_ban(opt,liens,ptr,lien_tot,back,back_max,jump_identification(urladr));
  3369.               if ((opt->debug>1) && (opt->log!=NULL)) {
  3370.                 fspc(opt->log,"debug"); fprintf(opt->log,"Info: previous log - link banned: %s%s"LF,urladr,urlfil); test_flush;
  3371.               }
  3372.             } else can_retry=1;
  3373.                    } else can_retry=1;
  3374.             break;
  3375.           case -4:            // connect closed
  3376.             can_retry=1;
  3377.             break;
  3378.           case -5:            // other (non fatal) error
  3379.             can_retry=1;
  3380.             break;
  3381.           case -6:            // bad SSL handskake
  3382.             can_retry=1;
  3383.             break;
  3384.           case 408: case 409: case 500: case 502: case 504: can_retry=1;
  3385.             break;
  3386.           }
  3387.           
  3388.           if ( strcmp(liens[ptr]->fil,"/primary") != 0 ) {  // no primary (internal page 0)
  3389.             if ((liens[ptr]->retry<=0) || (!can_retry) ) {  // retry ΘpuisΘs (ou retry impossible)
  3390.               if (opt->errlog) {
  3391.                 if ((opt->retry>0) && (can_retry)){
  3392.                   fspc(opt->errlog,"error"); 
  3393.                   fprintf(opt->errlog,"\"%s\" (%d) after %d retries at link %s%s (from %s%s)"LF,r->msg,r->statuscode,opt->retry,urladr,urlfil,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil);
  3394.                 } else {
  3395.                   if (r->statuscode==-10) {    // test OK
  3396.                     if ((opt->debug>0) && (opt->errlog!=NULL)) {
  3397.                       fspc(opt->errlog,"info"); 
  3398.                       fprintf(opt->errlog,"Test OK at link %s%s (from %s%s)"LF,urladr,urlfil,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil);
  3399.                     }
  3400.                   } else {
  3401.                     if (strcmp(urlfil,"/robots.txt")) {       // ne pas afficher d'infos sur robots.txt par dΘfaut
  3402.                       fspc(opt->errlog,"error"); 
  3403.                       fprintf(opt->errlog,"\"%s\" (%d) at link %s%s (from %s%s)"LF,r->msg,r->statuscode,urladr,urlfil,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil);
  3404.                     } else {
  3405.                       if (opt->debug>1) {
  3406.                         fspc(opt->errlog,"info"); fprintf(opt->errlog,"No robots.txt rules at %s"LF,urladr);
  3407.                         test_flush;
  3408.                       }
  3409.                     }
  3410.                   }
  3411.                 }
  3412.                 test_flush;
  3413.               }
  3414.               
  3415.               // NO error in trop level
  3416.               // due to the "no connection -> previous restored" hack
  3417.               // This prevent the engine from wiping all data if the website has been deleted (or moved)
  3418.               // since last time (which is quite annoying)
  3419.               if (liens[ptr]->precedent != 0) {
  3420.                 // ici on teste si on doit enregistrer la page tout de mΩme
  3421.                 if (opt->errpage) {
  3422.                   store_errpage=1;
  3423.                 }
  3424.               } else {
  3425.                 if (strcmp(urlfil,"/robots.txt") != 0) {
  3426.                 /*
  3427.                 This is an error caused by a link entered by the user
  3428.                 That is, link(s) entered by user are invalid (404, 500, connect error, proxy error->.)
  3429.                 If all links entered are invalid, the session failed and we will attempt to restore
  3430.                 the previous one
  3431.                 Example: Try to update a website which has been deleted remotely: this may delete
  3432.                 the website locally, which is really not desired (especially if the website disappeared!)
  3433.                 With this hack, the engine won't wipe local files (how clever)
  3434.                   */
  3435.                   HTS_STAT.stat_errors_front++;
  3436.                 }
  3437.               }
  3438.               
  3439.             } else {    // retry!!
  3440.               if (opt->debug>0 && opt->errlog != NULL) {  // on fera un alert si le retry Θchoue               
  3441.                 fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Retry after error %d (%s) at link %s%s (from %s%s)"LF,r->statuscode,r->msg,urladr,urlfil,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil);
  3442.                 test_flush;
  3443.               }
  3444.               // redemander fichier
  3445.               liens_record(urladr,urlfil,savename,"","");
  3446.               if (liens[lien_tot]!=NULL) {    // OK, pas d'erreur
  3447.                 liens[lien_tot]->testmode=liens[ptr]->testmode;          // mode test?
  3448.                 liens[lien_tot]->link_import=0;       // pas mode import
  3449.                 liens[lien_tot]->depth=liens[ptr]->depth;
  3450.                 liens[lien_tot]->pass2=max(liens[ptr]->pass2,numero_passe);
  3451.                 liens[lien_tot]->retry=liens[ptr]->retry-1;    // moins 1 retry!
  3452.                 liens[lien_tot]->premier=liens[ptr]->premier;
  3453.                 liens[lien_tot]->precedent=liens[ptr]->precedent;
  3454.                 lien_tot++;
  3455.               } else {  // oups erreur, plus de mΘmoire!!
  3456.                 printf("PANIC! : Not enough memory [%d]\n",__LINE__);
  3457.                 if (opt->errlog) {
  3458.                   fspc(opt->errlog,"panic"); 
  3459.                   fprintf(opt->errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url)));
  3460.                   test_flush;
  3461.                 }
  3462.                 //if (opt->getmode & 1) { if (fp) { fclose(fp); fp=NULL; } }
  3463.                 XH_uninit;    // dΘsallocation mΘmoire & buffers
  3464.                 return 0;
  3465.               } 
  3466.             }
  3467.           } else {
  3468.             if (opt->errlog) {
  3469.               if (opt->debug>1) {
  3470.                 fspc(opt->errlog,"info"); 
  3471.                 fprintf(opt->errlog,"Info: no robots.txt at %s%s"LF,urladr,urlfil);
  3472.               }
  3473.             }
  3474.           }
  3475.           if (!store_errpage) {
  3476.             if (r->adr) {     // dΘsalloc
  3477.               freet(r->adr); 
  3478.               r->adr=NULL; 
  3479.             }
  3480.             error=1;  // erreur!
  3481.           }
  3482.         }
  3483.         // FIN rattrapage des 301,302,307..
  3484.         // ------------------------------------------------------------
  3485.         
  3486.       }  // if !error
  3487.       
  3488.       
  3489.       /* Apply changes */
  3490.       ENGINE_SAVE_CONTEXT();
  3491.       
  3492.       return 0;
  3493.       
  3494.       
  3495. }
  3496.  
  3497.  
  3498.  
  3499. /*
  3500.   Wait for next file and
  3501.   check 301, 302, .. statuscodes (moved)
  3502. */
  3503. int hts_mirror_wait_for_next_file(htsmoduleStruct* str, htsmoduleStructExtended* stre) {
  3504.   /* Load engine variables */
  3505.   ENGINE_LOAD_CONTEXT();
  3506.   /* */
  3507.   int b;
  3508.   int n;
  3509.   
  3510. #if BDEBUG==1
  3511.   printf("\nBack test..\n");
  3512. #endif
  3513.   
  3514.   // pause/lock files
  3515.   {
  3516.     int do_pause=0;
  3517.     
  3518.     // user pause lockfile : create hts-paused.lock --> HTTrack will be paused
  3519.     if (fexist(fconcat(opt->path_log,"hts-stop.lock"))) {
  3520.       // remove lockfile
  3521.       remove(fconcat(opt->path_log,"hts-stop.lock"));
  3522.       if (!fexist(fconcat(opt->path_log,"hts-stop.lock"))) {
  3523.         do_pause=1;
  3524.       }
  3525.     }
  3526.     
  3527.     // after receving N bytes, pause
  3528.     if (opt->fragment>0) {
  3529.       if ((HTS_STAT.stat_bytes-stat_fragment) > opt->fragment) {
  3530.         do_pause=1;
  3531.       }
  3532.     }
  3533.     
  3534.     // pause?
  3535.     if (do_pause) {
  3536.       if ( (opt->debug>0) && (opt->log!=NULL) ) {
  3537.         fspc(opt->log,"info"); fprintf(opt->log,"engine: pause requested.."LF);
  3538.       }
  3539.       while (back_nsoc(back,back_max)>0) {                  // attendre fin des transferts
  3540.         back_wait(back,back_max,opt,cache,HTS_STAT.stat_timestart);
  3541.         Sleep(200);
  3542. #if HTS_ANALYSTE
  3543.         {
  3544.           back_wait(back,back_max,opt,cache,HTS_STAT.stat_timestart);
  3545.           
  3546.           // Transfer rate
  3547.           engine_stats();
  3548.           
  3549.           // Refresh various stats
  3550.           HTS_STAT.stat_nsocket=back_nsoc(back,back_max);
  3551.           HTS_STAT.stat_errors=fspc(NULL,"error");
  3552.           HTS_STAT.stat_warnings=fspc(NULL,"warning");
  3553.           HTS_STAT.stat_infos=fspc(NULL,"info");
  3554.           HTS_STAT.nbk=backlinks_done(liens,lien_tot,ptr);
  3555.           HTS_STAT.nb=back_transfered(HTS_STAT.stat_bytes,back,back_max);
  3556.           
  3557.           b=0;
  3558.           if (!hts_htmlcheck_loop(back,back_max,b,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)
  3559.             || !back_checkmirror(opt)) {
  3560.             if (opt->errlog) {
  3561.               fspc(opt->errlog,"info"); fprintf(opt->errlog,"Exit requested by shell or user"LF);
  3562.               test_flush;
  3563.             }
  3564.             *stre->exit_xh_=1;  // exit requested
  3565.             XH_uninit;
  3566.             return 0;
  3567.           }
  3568.         }
  3569. #endif
  3570.       }
  3571.       // On dΘsalloue le buffer d'enregistrement des chemins crΘΘe, au cas o∙ pendant la pause
  3572.       // l'utilisateur ferait un rm -r aprΦs avoir effectuΘ un tar
  3573.       // structcheck_init(1);
  3574.       {
  3575.         FILE* fp = fopen(fconcat(opt->path_log,"hts-paused.lock"),"wb");
  3576.         if (fp) {
  3577.           fspc(fp,"info");  // dater
  3578.           fprintf(fp,"Pause"LF"HTTrack is paused after retreiving "LLintP" bytes"LF"Delete this file to continue the mirror->.."LF""LF"",(LLint)HTS_STAT.stat_bytes);
  3579.           fclose(fp);
  3580.         }
  3581.       }
  3582.       stat_fragment=HTS_STAT.stat_bytes;
  3583.       /* Info for wrappers */
  3584.       if ( (opt->debug>0) && (opt->log!=NULL) ) {
  3585.         fspc(opt->log,"info"); fprintf(opt->log,"engine: pause: %s"LF,fconcat(opt->path_log,"hts-paused.lock"));
  3586.       }
  3587. #if HTS_ANALYSTE
  3588.       hts_htmlcheck_pause(fconcat(opt->path_log,"hts-paused.lock"));
  3589. #else
  3590.       while (fexist(fconcat(opt->path_log,"hts-paused.lock"))) {
  3591.         //back_wait(back,back_max,opt,cache,HTS_STAT.stat_timestart);   inutile!! (plus de sockets actives)
  3592.         Sleep(1000);
  3593.       }
  3594. #endif
  3595.     }
  3596.     //
  3597.   }
  3598.   // end of pause/lock files
  3599.   
  3600. #if HTS_ANALYSTE
  3601.   // changement dans les prΘfΘrences
  3602.   /*
  3603.   if (_hts_setopt) {
  3604.   copy_htsopt(_hts_setopt,opt);    // copier au besoin
  3605.   _hts_setopt=NULL;                 // effacer callback
  3606.   }
  3607.   */
  3608.   if (_hts_addurl) {
  3609.     char add_adr[HTS_URLMAXSIZE*2];
  3610.     char add_fil[HTS_URLMAXSIZE*2];
  3611.     while(*_hts_addurl) {
  3612.       char add_url[HTS_URLMAXSIZE*2];
  3613.       add_adr[0]=add_fil[0]=add_url[0]='\0';
  3614.       if (!link_has_authority(*_hts_addurl))
  3615.         strcpybuff(add_url,"http://");          // ajouter http://
  3616.       strcatbuff(add_url,*_hts_addurl);
  3617.       if (ident_url_absolute(add_url,add_adr,add_fil)>=0) {
  3618.         // ----Ajout----
  3619.         // noter NOUVEAU lien
  3620.         char add_sav[HTS_URLMAXSIZE*2];
  3621.         // calculer lien et Θventuellement modifier addresse/fichier
  3622.         if (url_savename(add_adr,add_fil,add_sav,NULL,NULL,NULL,NULL,opt,liens,lien_tot,back,back_max,cache,hash,ptr,numero_passe)!=-1) { 
  3623.           if (hash_read(hash,add_sav,"",0,0)<0) {      // n'existe pas dΘja
  3624.             // enregistrer lien (MACRO)
  3625.             liens_record(add_adr,add_fil,add_sav,"","");
  3626.             if (liens[lien_tot]!=NULL) {    // OK, pas d'erreur
  3627.               liens[lien_tot]->testmode=0;          // mode test?
  3628.               liens[lien_tot]->link_import=0;       // mode normal
  3629.               liens[lien_tot]->depth=opt->depth;
  3630.               liens[lien_tot]->pass2=max(0,numero_passe);
  3631.               liens[lien_tot]->retry=opt->retry;
  3632.               liens[lien_tot]->premier=lien_tot;
  3633.               liens[lien_tot]->precedent=lien_tot;
  3634.               lien_tot++;
  3635.               //
  3636.               if ((opt->debug>0) && (opt->log!=NULL)) {
  3637.                 fspc(opt->log,"info"); fprintf(opt->log,"Link added by user: %s%s"LF,add_adr,add_fil); test_flush;
  3638.               }
  3639.               //
  3640.             } else {  // oups erreur, plus de mΘmoire!!
  3641.               printf("PANIC! : Not enough memory [%d]\n",__LINE__);
  3642.               if (opt->errlog) {
  3643.                 fprintf(opt->errlog,"Not enough memory, can not re-allocate %d bytes"LF,(int)((add_tab_alloc+1)*sizeof(lien_url)));
  3644.                 test_flush;
  3645.               }
  3646.               //if (opt->getmode & 1) { if (fp) { fclose(fp); fp=NULL; } }
  3647.               XH_uninit;    // dΘsallocation mΘmoire & buffers
  3648.               return 0;
  3649.             }
  3650.           } else {
  3651.             if ( (opt->debug>0) && (opt->errlog!=NULL) ) {
  3652.               fspc(opt->errlog,"warning"); fprintf(opt->errlog,"Existing link %s%s not added after user request"LF,add_adr,add_fil);
  3653.               test_flush;
  3654.             }
  3655.           }
  3656.           
  3657.         }
  3658.       } else {
  3659.         if (opt->errlog) {
  3660.           fspc(opt->errlog,"error");
  3661.           fprintf(opt->errlog,"Error during URL decoding for %s"LF,add_url);
  3662.           test_flush;
  3663.         }
  3664.       }
  3665.       // ----Fin Ajout----
  3666.       _hts_addurl++;                  // suivante
  3667.     }
  3668.     _hts_addurl=NULL;           // libΘrer _hts_addurl
  3669.   }
  3670.   // si une pause a ΘtΘ demandΘe
  3671.   if (_hts_setpause) {
  3672.     // index du lien actuel
  3673.     int b=back_index(back,back_max,urladr,urlfil,savename);
  3674.     if (b<0) b=0;    // forcer pour les stats
  3675.     while(_hts_setpause) {    // on fait la pause..
  3676.       back_wait(back,back_max,opt,cache,HTS_STAT.stat_timestart);
  3677.       
  3678.       // Transfer rate
  3679.       engine_stats();
  3680.       
  3681.       // Refresh various stats
  3682.       HTS_STAT.stat_nsocket=back_nsoc(back,back_max);
  3683.       HTS_STAT.stat_errors=fspc(NULL,"error");
  3684.       HTS_STAT.stat_warnings=fspc(NULL,"warning");
  3685.       HTS_STAT.stat_infos=fspc(NULL,"info");
  3686.       HTS_STAT.nbk=backlinks_done(liens,lien_tot,ptr);
  3687.       HTS_STAT.nb=back_transfered(HTS_STAT.stat_bytes,back,back_max);
  3688.       
  3689.       if (!hts_htmlcheck_loop(back,back_max,b,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)) {
  3690.         if (opt->errlog) {
  3691.           fspc(opt->errlog,"info"); fprintf(opt->errlog,"Exit requested by shell or user"LF);
  3692.           test_flush;
  3693.         }
  3694.         *stre->exit_xh_=1;  // exit requested
  3695.         XH_uninit;
  3696.         return 0;
  3697.       }
  3698.       if (back_nsoc(back,back_max)==0)
  3699.         Sleep(250);  // tite pause
  3700.     }
  3701.   }
  3702. #endif
  3703.   
  3704.   // si le fichier n'est pas en backing, le mettre..
  3705.   if (!back_exist(back,back_max,urladr,urlfil,savename)) {
  3706. #if BDEBUG==1
  3707.     printf("crash backing: %s%s\n",liens[ptr]->adr,liens[ptr]->fil);
  3708. #endif
  3709.     if (back_add(back,back_max,opt,cache,urladr,urlfil,savename,liens[liens[ptr]->precedent]->adr,liens[liens[ptr]->precedent]->fil,liens[ptr]->testmode,&liens[ptr]->pass2)==-1) {
  3710.       printf("PANIC! : Crash adding error, unexpected error found.. [%d]\n",__LINE__);
  3711. #if BDEBUG==1
  3712.       printf("error while crash adding\n");
  3713. #endif
  3714.       if (opt->errlog) {
  3715.         fspc(opt->errlog,"error"); fprintf(opt->errlog,"Unexpected backing error for %s%s"LF,urladr,urlfil);
  3716.         test_flush;
  3717.       } 
  3718.       
  3719.     }
  3720.   }
  3721.   
  3722. #if BDEBUG==1
  3723.   printf("test number of socks\n");
  3724. #endif
  3725.   
  3726.   // ajouter autant de socket qu'on peut ajouter
  3727.   n=opt->maxsoc-back_nsoc(back,back_max);
  3728. #if BDEBUG==1
  3729.   printf("%d sockets available for backing\n",n);
  3730. #endif
  3731.   
  3732. #if HTS_ANALYSTE
  3733.   if ((n>0) && (!_hts_setpause)) {   // si sockets libre et pas en pause, ajouter
  3734. #else
  3735.     if (n>0) {                         // si sockets libre
  3736. #endif
  3737.       // remplir autant que l'on peut le cache (backing)
  3738.       back_fillmax(back,back_max,opt,cache,liens,ptr,numero_passe,lien_tot);
  3739.     }
  3740.     
  3741.     // index du lien actuel
  3742.     /*
  3743.     b=back_index(back,back_max,urladr,urlfil,savename);
  3744.     
  3745.       if (b>=0) 
  3746.     */
  3747.     {
  3748.       // ------------------------------------------------------------
  3749.       // attendre que le fichier actuel soit prΩt - BOUCLE D'ATTENTE
  3750.       do {
  3751.         
  3752.         // index du lien actuel
  3753.         b=back_index(back,back_max,urladr,urlfil,savename);
  3754. #if BDEBUG==1
  3755.         printf("back index %d, waiting\n",b);
  3756. #endif
  3757.         // Continue to the loop if link still present
  3758.         if (b<0)
  3759.           continue;
  3760.         
  3761.         // Receive data
  3762.         if (back[b].status>0)
  3763.           back_wait(back,back_max,opt,cache,HTS_STAT.stat_timestart);
  3764.         
  3765.         // Continue to the loop if link still present
  3766.         b=back_index(back,back_max,urladr,urlfil,savename);
  3767.         if (b<0)
  3768.           continue;
  3769.         
  3770.         // Stop the mirror
  3771.         if (!back_checkmirror(opt)) {
  3772.           *stre->exit_xh_=1;  // exit requested
  3773.           XH_uninit;
  3774.           return 0;
  3775.         }
  3776.         
  3777.         // And fill the backing stack
  3778.         if (back[b].status>0)
  3779.           back_fillmax(back,back_max,opt,cache,liens,ptr,numero_passe,lien_tot);
  3780.         
  3781.         // Continue to the loop if link still present
  3782.         b=back_index(back,back_max,urladr,urlfil,savename);
  3783.         if (b<0)
  3784.           continue;
  3785.         
  3786.         // autres occupations de HTTrack: statistiques, boucle d'attente, etc.
  3787.         if ((opt->makestat) || (opt->maketrack)) {
  3788.           TStamp l=time_local();
  3789.           if ((int) (l-makestat_time) >= 60) {   
  3790.             if (makestat_fp != NULL) {
  3791.               fspc(makestat_fp,"info");
  3792.               fprintf(makestat_fp,"Rate= %d (/"LLintP") \11NewLinks= %d (/%d)"LF,(int) ((HTS_STAT.HTS_TOTAL_RECV-*stre->makestat_total_)/(l-makestat_time)), (LLint)HTS_STAT.HTS_TOTAL_RECV,(int) lien_tot-*stre->makestat_lnk_,(int) lien_tot);
  3793.               fflush(makestat_fp);
  3794.               *stre->makestat_total_=HTS_STAT.HTS_TOTAL_RECV;
  3795.               *stre->makestat_lnk_=lien_tot;
  3796.             }
  3797.             if (stre->maketrack_fp != NULL) {
  3798.               int i;
  3799.               fspc(stre->maketrack_fp,"info"); fprintf(stre->maketrack_fp,LF);
  3800.               for(i=0;i<back_max;i++) {
  3801.                 back_info(back,i,3,stre->maketrack_fp);
  3802.               }
  3803.               fprintf(stre->maketrack_fp,LF);
  3804.               fflush(stre->maketrack_fp);
  3805.               
  3806.             }
  3807.             makestat_time=l;
  3808.           }
  3809.         }
  3810. #if HTS_ANALYSTE
  3811.         {
  3812.           int i;
  3813.           {
  3814.             char* s=hts_cancel_file("");
  3815.             if (strnotempty(s)) {    // fichier α canceller
  3816.               for(i=0;i<back_max;i++) {
  3817.                 if ((back[i].status>0)) {
  3818.                   if (strcmp(back[i].url_sav,s)==0) {  // ok trouvΘ
  3819.                     if (back[i].status != 1000) {
  3820. #if HTS_DEBUG_CLOSESOCK
  3821.                       DEBUG_W("user cancel: deletehttp\n");
  3822. #endif
  3823.                       if (back[i].r.soc!=INVALID_SOCKET) deletehttp(&back[i].r);
  3824.                       back[i].r.soc=INVALID_SOCKET;
  3825.                       back[i].r.statuscode=-1;
  3826.                       strcpybuff(back[i].r.msg,"Cancelled by User");
  3827.                       back[i].status=0;  // terminΘ
  3828.                     } else    // cancel ftp.. flag α 1
  3829.                       back[i].stop_ftp = 1;
  3830.                   }
  3831.                 }
  3832.               }
  3833.               s[0]='\0';
  3834.             }
  3835.           }
  3836.           
  3837.           // Transfer rate
  3838.           engine_stats();
  3839.           
  3840.           // Refresh various stats
  3841.           HTS_STAT.stat_nsocket=back_nsoc(back,back_max);
  3842.           HTS_STAT.stat_errors=fspc(NULL,"error");
  3843.           HTS_STAT.stat_warnings=fspc(NULL,"warning");
  3844.           HTS_STAT.stat_infos=fspc(NULL,"info");
  3845.           HTS_STAT.nbk=backlinks_done(liens,lien_tot,ptr);
  3846.           HTS_STAT.nb=back_transfered(HTS_STAT.stat_bytes,back,back_max);
  3847.           
  3848.           if (!hts_htmlcheck_loop(back,back_max,b,ptr,lien_tot,(int) (time_local()-HTS_STAT.stat_timestart),&HTS_STAT)) {
  3849.             if (opt->errlog) {
  3850.               fspc(opt->errlog,"info"); fprintf(opt->errlog,"Exit requested by shell or user"LF);
  3851.               test_flush;
  3852.             } 
  3853.             *stre->exit_xh_=1;  // exit requested
  3854.             XH_uninit;
  3855.             return 0;
  3856.           }
  3857.         }
  3858.         
  3859. #endif
  3860. #if HTS_POLL
  3861.         if ((opt->shell) || (opt->keyboard) || (opt->verbosedisplay) || (!opt->quiet)) {
  3862.           TStamp tl;
  3863.           *stre->info_shell_=1;
  3864.           
  3865.           /* Toggle with ENTER */
  3866.           if (!opt->quiet) {
  3867.             if (check_stdin()) {
  3868.               char com[256];
  3869.               linput(stdin,com,200);
  3870.               if (opt->verbosedisplay==2)
  3871.                 opt->verbosedisplay=1;
  3872.               else
  3873.                 opt->verbosedisplay=2;
  3874.               /* Info for wrappers */
  3875.               if ( (opt->debug>0) && (opt->log!=NULL) ) {
  3876.                 fspc(opt->log,"info"); fprintf(opt->log,"engine: change-options"LF);
  3877.               }
  3878. #if HTS_ANALYSTE
  3879.               hts_htmlcheck_chopt(opt);
  3880. #endif
  3881.             }
  3882.           }
  3883.           
  3884.           tl=time_local();
  3885.           
  3886.           // gΘnΘrer un message d'infos sur l'Θtat actuel
  3887.           if (opt->shell) {    // si shell
  3888.             if ((tl-*stre->last_info_shell_)>0) {    // toute les 1 sec
  3889.               FILE* fp=stdout;
  3890.               int a=0;
  3891.               *stre->last_info_shell_=tl;
  3892.               if (fexist(fconcat(opt->path_log,"hts-autopsy"))) {  // dΘbuggage: teste si le robot est vivant
  3893.                 // (oui je sais un robot vivant.. mais bon.. il a le droit de vivre lui aussi)
  3894.                 // (libΘrons les robots esclaves de l'internet!)
  3895.                 remove(fconcat(opt->path_log,"hts-autopsy"));
  3896.                 fp=fopen(fconcat(opt->path_log,"hts-isalive"),"wb");
  3897.                 a=1;
  3898.               }
  3899.               if ((*stre->info_shell_) || a) {
  3900.                 int i,j;
  3901.                 
  3902.                 fprintf(fp,"TIME %d"LF,(int) (tl-HTS_STAT.stat_timestart));
  3903.                 fprintf(fp,"TOTAL %d"LF,(int) HTS_STAT.stat_bytes);
  3904.                 fprintf(fp,"RATE %d"LF,(int) (HTS_STAT.HTS_TOTAL_RECV/(tl-HTS_STAT.stat_timestart)));
  3905.                 fprintf(fp,"SOCKET %d"LF,back_nsoc(back,back_max));
  3906.                 fprintf(fp,"LINK %d"LF,lien_tot);
  3907.                 {
  3908.                   LLint mem=0;
  3909.                   for(i=0;i<back_max;i++)
  3910.                     if (back[i].r.adr!=NULL)
  3911.                       mem+=back[i].r.size;
  3912.                     fprintf(fp,"INMEM "LLintP""LF,(LLint)mem);
  3913.                 }
  3914.                 for(j=0;j<2;j++) {  // passes pour ready et wait
  3915.                   for(i=0;i<back_max;i++) {
  3916.                     back_info(back,i,j+1,stdout);    // maketrack_fp a la place de stdout ?? // **
  3917.                   }
  3918.                 }
  3919.                 fprintf(fp,LF);
  3920.                 if (a)
  3921.                   fclose(fp);
  3922.                 io_flush;
  3923.               }
  3924.             }
  3925.           }  // si shell
  3926.           
  3927.         }  // si shell ou keyboard (option)
  3928.         //
  3929. #endif
  3930.       } while((b>=0) && (back[max(b,0)].status>0));
  3931.             
  3932.             
  3933.       // If link not found on the stack, it's because it has already been downloaded
  3934.       // in background
  3935.       // Then, skip it and go to the next one
  3936.       if (b<0) {
  3937.         if ((opt->debug>1) && (opt->log!=NULL)) {
  3938.           fspc(opt->log,"debug"); fprintf(opt->log,"link #%d is ready, no more on the stack, skipping: %s%s.."LF,ptr,urladr,urlfil);
  3939.           test_flush;
  3940.         }
  3941.         
  3942.         // prochain lien
  3943.         // ptr++;
  3944.         
  3945.         return 2; // goto jump_if_done;
  3946.         
  3947.       }
  3948. #if 0
  3949.       /* FIXME - finalized HAS NO MORE THIS MEANING */
  3950.       /* link put in cache by the backing system for memory spare - reclaim */
  3951.       else if (back[b].finalized) {
  3952.         assertf(back[b].r.adr == NULL);
  3953.         /* read file in cache */
  3954.         back[b].r = cache_read_ro(opt,cache,back[b].url_adr,back[b].url_fil,back[b].url_sav, back[b].location_buffer);
  3955.         /* ensure correct location buffer set */
  3956.         back[b].r.location=back[b].location_buffer;
  3957.         if (back[b].r.statuscode == -1) {
  3958.           if (opt->errlog) {
  3959.             fspc(opt->errlog,"error"); fprintf(opt->errlog,"Unexpected error: %s%s not found anymore in cache"LF,back[b].url_adr,back[b].url_fil);
  3960.             test_flush;
  3961.           }
  3962.         } else {
  3963.           if ( (opt->debug>1) && (opt->log!=NULL) ) {
  3964.             fspc(opt->log,"debug"); fprintf(opt->log,"reclaim file %s%s (%d)"LF,back[b].url_adr,back[b].url_fil,back[b].r.statuscode); test_flush;
  3965.           }
  3966.         }
  3967.       }
  3968. #endif
  3969.             
  3970. #if HTS_ANALYSTE==2
  3971. #else
  3972.       //if (!opt->quiet) {  // petite animation
  3973.       if (!opt->verbosedisplay) {
  3974.         if (!opt->quiet) {
  3975.           static int roll=0;  /* static: ok */
  3976.           roll=(roll+1)%4;
  3977.           printf("%c\x0d",("/-\\|")[roll]);
  3978.           fflush(stdout);
  3979.         }
  3980.       } else if (opt->verbosedisplay==1) {
  3981.         if (back[b].r.statuscode==200)
  3982.           printf("%d/%d: %s%s ("LLintP" bytes) - OK\33[K\r",ptr,lien_tot,back[b].url_adr,back[b].url_fil,(LLint)back[b].r.size);
  3983.         else
  3984.           printf("%d/%d: %s%s ("LLintP" bytes) - %d\33[K\r",ptr,lien_tot,back[b].url_adr,back[b].url_fil,(LLint)back[b].r.size,back[b].r.statuscode);
  3985.         fflush(stdout);
  3986.       }
  3987.       //}
  3988. #endif
  3989.       // ------------------------------------------------------------
  3990.       // VΘrificateur d'intΘgritΘ
  3991. #if DEBUG_CHECKINT
  3992.       _CHECKINT(&back[b],"Retour de back_wait, aprΦs le while")
  3993.       {
  3994.         int i;
  3995.         for(i=0;i<back_max;i++) {
  3996.           char si[256];
  3997.           sprintf(si,"Test global aprΦs back_wait, index %d",i);
  3998.           _CHECKINT(&back[i],si)
  3999.         }
  4000.       }
  4001. #endif
  4002.       
  4003.       // copier structure rΘponse htsblk
  4004.       memcpy(r, &(back[b].r), sizeof(htsblk));
  4005.       r->location=stre->loc_;    // ne PAS copier location!! adresse, pas de buffer
  4006.       if (back[b].r.location) 
  4007.         strcpybuff(r->location,back[b].r.location);
  4008.       back[b].r.adr=NULL;    // ne pas faire de desalloc ensuite
  4009.       
  4010.       // libΘrer emplacement backing
  4011.       back_maydelete(opt,cache,back,b);
  4012.       
  4013.       // progression
  4014. #if 0
  4015.       if (opt->aff_progress) {
  4016.         TStamp tl=time_local();
  4017.         if ((tl-HTS_STAT.stat_timestart)>0) {
  4018.           char s[32];
  4019.           int i=0;
  4020.           lastime=tl;
  4021.           _CLRSCR; _GOTOXY("1","1");
  4022.           printf("Rate=%d B/sec\n",(int) (HTS_STAT.HTS_TOTAL_RECV/(tl-HTS_STAT.stat_timestart)));
  4023.           while(i<minimum(back_max,99)) {  // **
  4024.             if (back[i].status>=0) {  // loading..
  4025.               s[0]='\0';
  4026.               if (strlen(back[i].url_fil)>16)
  4027.                 strcatbuff(s,back[i].url_fil+strlen(back[i].url_fil)-16);       
  4028.               else
  4029.                 strncatbuff(s,back[i].url_fil,16);
  4030.               printf("%s : ",s);
  4031.               
  4032.               printf("[");
  4033.               if (back[i].r.totalsize>0) {
  4034.                 int p;
  4035.                 int j;
  4036.                 p=(int)((back[i].r.size*10)/back[i].r.totalsize);
  4037.                 p=minimum(10,p);
  4038.                 for(j=0;j<p;j++) printf("*");
  4039.                 for(j=0;j<(10-p);j++) printf("-");
  4040.               } else { 
  4041.                 printf(LLintP,(LLint)back[i].r.size);                      
  4042.               }
  4043.               printf("]");
  4044.               
  4045.               //} else if (back[i].status==0) {
  4046.               //  strcpybuff(s,"ENDED");
  4047.             } 
  4048.             printf("\n");
  4049.             i++;
  4050.           }
  4051.           io_flush;
  4052.         }
  4053.       }
  4054. #endif
  4055.       
  4056.       // dΘbug graphique
  4057. #if BDEBUG==2
  4058.       {
  4059.         char s[12];
  4060.         int i=0;
  4061.         _GOTOXY(1,1);
  4062.         printf("Rate=%d B/sec\n",(int) (HTS_STAT.HTS_TOTAL_RECV/(time_local()-HTS_STAT.stat_timestart)));
  4063.         while(i<minimum(back_max,160)) {
  4064.           if (back[i].status>0) {
  4065.             sprintf(s,"%d",back[i].r.size);
  4066.           } else if (back[i].status==0) {
  4067.             strcpybuff(s,"ENDED");
  4068.           } else 
  4069.             strcpybuff(s,"   -   ");
  4070.           while(strlen(s)<8) strcatbuff(s," ");
  4071.           printf("%s",s); io_flush;
  4072.           i++;
  4073.         }
  4074.       }
  4075. #endif
  4076.       
  4077.       
  4078. #if BDEBUG==1
  4079.       printf("statuscode=%d with %s / msg=%s\n",r->statuscode,r->contenttype,r->msg);
  4080. #endif
  4081.       
  4082.     }
  4083.     /*else {
  4084.     #if BDEBUG==1
  4085.     printf("back index error\n");
  4086.     #endif
  4087.     }
  4088.     */
  4089.     
  4090.     
  4091.     
  4092.     ENGINE_SAVE_CONTEXT();
  4093.     
  4094.     return 0;
  4095.     
  4096.     
  4097. }
  4098.  
  4099.  
  4100.