program Kayhannews; {Arash Kamangir, arash@kamangir.net This code scans through the pages of the state-run newspaper Kayhannews to gather the names of "people" who leave comments there. It is suggested that these comments are in fact written by the staff. 4 March 2008: Work started } {$APPTYPE CONSOLE} uses SysUtils,ulog,udownload,uprogress,utools,UDBWStringList; var year,month,day:Integer; ss,s,url:String; download:Mdownload; index,minmonth,maxmonth,cnt:Integer; src,dest:textfile; list:MDBWStringList; begin log:=MLog.create; log.Initialize(LOG_TYPE_CON); download:=Mdownload.create; download.Online:=true; assign(dest,'comments.txt'); rewrite(dest); list:=MDBWStringList.Create; list.InitializeRandom; //The for loops progress.Show('Fetching...',1,round((12+12+3)*30*6/7)); cnt:=0; for year:=84 to 86 do begin if year=84 then minmonth:=10 else minmonth:=1; maxmonth:=12; for month:=minmonth to maxmonth do for day:=1 to 30 do begin progress.progress; //Creating the url url:=inttostr(year); s:=inttostr(month); if length(s)<2 then s:='0'+s; url:=url+s; s:=inttostr(day); if length(s)<2 then s:='0'+s; url:=url+s; url:='http://kayhannews.ir/'+url+'/2.HTM';//#other201 //Does it exist? if download.execute(url)<>DOWNLOAD_DONE then begin Writeln(url+' : Does not exist!'); continue; end; Write(url+' : '); //Analyze! AssignFile(src,TEMP_HTML); reset(src); if not FindinFile(src,'¤ ',s,index) then Writeln('Tag not found!') else begin writeln('Found!'); while s<>'' do begin s:=CopyTillEndS(s,'
'); ss:=CopyToS(s,'
'); if length(ss)<100 then begin Writeln(dest,'- '+ss); list.Increment(ss); end; end; end; closefile(src); //Done; cnt:=cnt+1; end; end; //Report; Writeln(inttostr(cnt)+' pages analyzed.'); list.PrintToFile('names.txt'); //Finalize; progress.Hide; Readln; log.free; download.Free; closefile(dest); list.Free; end.