program Kayhannews;
{Arash Kamangir, arash@kamangir.net
This code scans through the pages of the state-run newspaper Kayhannews to gather the names
of "people" who leave comments there. It is suggested that these comments are in fact written by the
staff.
4 March 2008: Work started
}
{$APPTYPE CONSOLE}
uses
SysUtils,ulog,udownload,uprogress,utools,UDBWStringList;
var
year,month,day:Integer;
ss,s,url:String;
download:Mdownload;
index,minmonth,maxmonth,cnt:Integer;
src,dest:textfile;
list:MDBWStringList;
begin
log:=MLog.create;
log.Initialize(LOG_TYPE_CON);
download:=Mdownload.create;
download.Online:=true;
assign(dest,'comments.txt');
rewrite(dest);
list:=MDBWStringList.Create;
list.InitializeRandom;
//The for loops
progress.Show('Fetching...',1,round((12+12+3)*30*6/7));
cnt:=0;
for year:=84 to 86 do
begin
if year=84 then minmonth:=10
else
minmonth:=1;
maxmonth:=12;
for month:=minmonth to maxmonth do
for day:=1 to 30 do
begin
progress.progress;
//Creating the url
url:=inttostr(year);
s:=inttostr(month);
if length(s)<2 then s:='0'+s;
url:=url+s;
s:=inttostr(day);
if length(s)<2 then s:='0'+s;
url:=url+s;
url:='http://kayhannews.ir/'+url+'/2.HTM';//#other201
//Does it exist?
if download.execute(url)<>DOWNLOAD_DONE then
begin
Writeln(url+' : Does not exist!');
continue;
end;
Write(url+' : ');
//Analyze!
AssignFile(src,TEMP_HTML);
reset(src);
if not FindinFile(src,'¤ ',s,index) then Writeln('Tag not found!')
else
begin
writeln('Found!');
while s<>'' do
begin
s:=CopyTillEndS(s,'
');
ss:=CopyToS(s,'
');
if length(ss)<100 then
begin
Writeln(dest,'- '+ss);
list.Increment(ss);
end;
end;
end;
closefile(src);
//Done;
cnt:=cnt+1;
end;
end;
//Report;
Writeln(inttostr(cnt)+' pages analyzed.');
list.PrintToFile('names.txt');
//Finalize;
progress.Hide;
Readln;
log.free;
download.Free;
closefile(dest);
list.Free;
end.