VasyOK
Full Member | Ðåäàêòèðîâàòü | Ïðîôèëü | Ñîîáùåíèå | Öèòèðîâàòü | Ñîîáùèòü ìîäåðàòîðó Äîáðûé äåíü! Åñòü ñêðèïò - ïàðñåð íîâîñòåé. Åãî çàäà÷à ïåðèîäè÷åñêè çàõîäèòü íà ñòðàíèöû ñ óêàçàííûìè url (àðõèâû íîâîñòåé), èñêàòü òàì âíîâü ïîÿâèâøèåñÿ url è äîáàâëÿòü èõ â áàçó, èç íàéäåííûõ url îòáèðàòü url, ñòðàíèöû êîòîðûõ ñîäåðæàò îïðåäåëåííûå êëþ÷åâûå ñëîâà. Ñêðèïò ðàáî÷èé, äà âîò òîëüêî ñåðâåð ïåðåãðóæàåò)) Êàê áûòü óæå íå çíàþ. Ìîæåò êàêîé íþàíñ óïóñòèë. Ïðîøó ïîìîùè. Áóäó áëàãîäàðåí ëþáûì ñîâåòàì ïî îïòèìèçàöèè ñêðèïòà Òåïåðü ïîäðîáíåå. Crons: Êîä: Êàæäûå ïÿòü ìèíóò áåðåì url (ñî ñòàòóñîì wait) îäíîãî èç àðõèâîâ íîâîñòåé äëÿ ïîèñêà âíîâü ïîÿâèâøèõñÿ ññûëîê: */05 * * * * /home/asob_admin/data/www/timeringer.ru/professional/cgi-bin/crons/search_news.pl get_url_to_check Êàæäûå ïÿòü ìèíóò áåðåì èç áàçû íåñêîëüêî íàéäåííûõ url (ñî ñòàòóñîì wait) äëÿ ïðîâåðêè íà íàëè÷èå êëþ÷åâûõ ñëîâ: */05 * * * * /home/asob_admin/data/www/timeringer.ru/professional/cgi-bin/crons/search_news.pl search  8:00 è 13:00 ìåíÿåì ñòàòóñ âñåõ ññûëîê íà àðõèâû íîâîñòåé íà - wait è òåì ñàìûì èíèöèèðóåì ïðîöåññ ïîèñêà âíîâü ïîÿâèâøèõñÿ ññûëîê: * 8,13 * * * /home/asob_admin/data/www/timeringer.ru/professional/cgi-bin/crons/search_news.pl source_wait Îòïðàâëÿåì ññûëêè íà èíòåðåñíûå íîâîñòè àäìèíèñòðàòîðó. 5 11,16 * * * /home/asob_admin/data/www/timeringer.ru/professional/cgi-bin/crons/search_news.pl send | À òåïåðü ñàì ñêðèïò: Êîä: #!/usr/local/bin/perl -w use DBI; use CGI; use CGI qw(:standard); use CGI::Cookie; use Date::Calc qw(Add_Delta_DHMS); use Text::Iconv; use HTML::TokeParser; use Encode; $converter = Text::Iconv->new("windows-1251", "utf-8"); $converter_to_win = Text::Iconv->new("utf-8","windows-1251"); use LWP; my $ua=LWP::UserAgent->new(); $ua->agent("PerlUA/0.1"); $fullpath="/home/asob_admin/data/www/timeringer.ru/professional/cgi-bin/"; $m=$fullpath; $m.="mod/datetimefunc.mod"; require "$m"; $m=$fullpath; $m.="mod/settings.mod"; require "$m"; $m=$fullpath; $m.="mod/db.mod"; require "$m"; &db_connect; $datevar=""; $timevar=""; &SetUnicIdAntAnd; ############################ # source_wait ############################ if ($ARGV[0] eq "source_wait"){ $sth = $dbh->prepare(" UPDATE `snews_source` SET `status`='wait' WHERE `status` LIKE 'ready' "); $sth->execute(); $sth->finish(); ######################## $sth = $dbh->prepare(" UPDATE `snews_urls_to_check` SET `status`='ready' WHERE `status`='wait' "); $sth->execute(); $sth->finish(); } ############################ # get_url_to_check ############################ if ($ARGV[0] eq "get_url_to_check"){ $sth = "SELECT source.id, source.url, source.view_root, source.key, source.coding, source.tag1, source.tag2 FROM `snews_source` as source WHERE source.status LIKE 'wait'"; $inf = $dbh->selectall_arrayref($sth); $qnt = @$inf; $sth2 = "SELECT * FROM `snews_source` as source WHERE source.status LIKE 'check'"; $inf2 = $dbh->selectall_arrayref($sth2); $qnt2 = @$inf2; if ($qnt>0 && $qnt2<2){ $sth = $dbh->prepare(" UPDATE `snews_source` SET `status`='check' WHERE `id`=$$inf[0][0] "); $sth->execute(); $sth->finish(); # ÔÎÐÌÈÐÓÅÌ ÑÏÈÑÎÊ URL`s äëÿ ïðîâåðêè âõîæäåíèÿ êëþ÷åâûõ ñëîâ $document=$ua->request(HTTP::Request->new(GET => $$inf[0][1])); if ($document->is_success){ $str=$document->content; $str=~s/\n//g; $str=~s/\r//g; if ($$inf[0][4] eq "win-1251"){$str=$converter->convert($str);} $str=Encode::decode_utf8($str); $parser = HTML::TokeParser->new(\$str); while (my $token = $parser->get_tag('a')) { my $url = $token->[1]{href}; my $text = $parser->get_trimmed_text('/a'); push(@html, $url) if $url; } for(@html){ $url=$_; if (index($url,"://")<0){$url="$$inf[0][2]"."$url"} $url=~s/\/\//\//g; $url=~s/:\//:\/\//g; $url=~ s/\'//g; $url=~ s/\"//g; $fl=1; if ($$inf[0][3] ne ""){ if (index($url,$$inf[0][3])<0 || index($url,"comm")>=0 || index($url,"mailto")>=0 || index($url,"#")>=0){$fl=0} } #$url=$converter->convert($url); if ($fl==1){ if (get_qnt_urls("snews_urls_to_check", $url)==0){ $date_create=DateConvert($datevar,"yyyy-mm-dd"); $time_create=$timevar; $dbh -> do("INSERT INTO `snews_urls_to_check` (`url`, `date_create`, `time_create`, `status`, `coding`, `tag1`, `tag2`) VALUES ('$url', '$date_create', '$time_create', 'wait', '$$inf[0][4]', '$$inf[0][5]', '$$inf[0][6]')"); } } } } $sth = $dbh->prepare(" UPDATE `snews_source` SET `status`='ready' WHERE `id`=$$inf[0][0] "); $sth->execute(); $sth->finish(); } } ############################ # search ############################ if ($ARGV[0] eq "search"){ $sth = "SELECT `keyword` FROM `snews_keywords`"; $strings = $dbh->selectall_arrayref($sth); $sth = "SELECT * FROM `snews_urls_to_check` WHERE `status` LIKE 'check'"; $inf = $dbh->selectall_arrayref($sth); $qnt = @$inf; $find=""; $which_found=""; if ($qnt<20){ $sth = "SELECT `id`, `url`, `coding` FROM `snews_urls_to_check` WHERE `status` LIKE 'wait'"; $inf = $dbh->selectall_arrayref($sth); $qnt = @$inf; if ($qnt>0){ if ($qnt>5){$qnt=5;} $check_list=""; for (my $i=0; $i<$qnt; $i++){ if ($check_list ne ""){$check_list.=","} $check_list.=$$inf[$i][0]; } $sth = $dbh->prepare(" UPDATE `snews_urls_to_check` SET `status`='check' WHERE `id` IN ($check_list) "); $sth->execute(); $sth->finish(); # ÏÐÎÂÅÐßÅÌ ÑÑÛËÊÈ ÍÀ ÊËÞ×ÅÂÛÅ ÑËÎÂÀ $sth = "SELECT `id`, `url`, `coding`, `tag1`, `tag2` FROM `snews_urls_to_check` WHERE `id` IN ($check_list)"; $inf = $dbh->selectall_arrayref($sth); $qnt = @$inf; for (my $i=0; $i<$qnt; $i++){ $which_found=""; $find=""; $url=$$inf[$i][1]; if (get_qnt_urls("snews_urls", $url)==0){ $url=$converter_to_win->convert($url); $document=$ua->request(HTTP::Request->new(GET => $url)); if ($document->is_success){ $str=$document->content; if ($$inf[$i][2] eq "utf-8"){$str=$converter_to_win->convert($str);} $search_in = lc($str); $search_in=~s/\n//g; $search_in=~s/\r//g; $search_in=~ s/\'//g; $search_in=~ s/\"//g; $tag1=$converter_to_win->convert($$inf[$i][3]); $tag2=$converter_to_win->convert($$inf[$i][4]); $search_result=""; while($search_in =~/$tag1(.*?)$tag2/g){$search_result.="$1 "} $search_in=$search_result; $qnt_words=@$strings; $find="false"; if ($qnt_words>0 && $search_in ne ""){ $search_in=~s/<(?:[^>'"]*|(['"]).*?\1)*>//gs; for (my $k=0; $k<$qnt_words; $k++){ if ($$inf[$i][2] eq "win-1251"){ $keyw=$converter_to_win->convert($$strings[$k][0]); } else { $keyw=$$strings[$k][0]; } if($search_in=~/$keyw/){ $find="true"; if ($which_found ne ""){$which_found.=", ";} $which_found.=$keyw; } } } if ($find eq "true"){ if ($$inf[$i][2] eq "win-1251"){$keyword=$converter->convert($which_found)}else{$keyword=$which_found} if ($$inf[$i][2] eq "win-1251"){$url=$converter->convert($url)} $date_create=DateConvert($datevar,"yyyy-mm-dd"); $time_create=$timevar; $dbh -> do("INSERT INTO `snews_urls` (`url`, `date_create`, `time_create`, `status`, `keyword`) VALUES ('$url', '$date_create', '$time_create', 'wait', '$keyword')"); } $sth = $dbh->prepare(" UPDATE `snews_urls_to_check` SET `status`='ready' WHERE `url` LIKE '$url' "); $sth->execute(); $sth->finish(); } else { $sth = $dbh->prepare(" UPDATE `snews_urls_to_check` SET `status`='error' WHERE `url` LIKE '$url' "); $sth->execute(); $sth->finish(); } } } } ############################## } } sub get_qnt_urls { my ($table, $url) = @_; my $sth = "SELECT * FROM `$table` WHERE `url` LIKE '%$url%'"; my $inf = $dbh->selectall_arrayref($sth); my $qnt = @$inf; return int($qnt); } ############################ # S E N D ############################ if ($ARGV[0] eq "send"){ my $sth = "SELECT su.url, su.keyword, suc.coding FROM `snews_urls` as su, `snews_urls_to_check` as suc WHERE su.status LIKE 'wait' AND su.url LIKE suc.url"; my $inf = $dbh->selectall_arrayref($sth); my $qnt = @$inf; my $news_list=""; my $url=""; my $keyw=""; if ($qnt>0){ for (my $i=0; $i<$qnt; $i++){ $url=$converter_to_win->convert($$inf[$i][0]); $keyw=$converter_to_win->convert($$inf[$i][1]); $news_list.="<p><a href=\"$url\">$url</a>"." $keyw</p>"; } $sth = $dbh->prepare(" UPDATE `snews_urls` SET `status`='send' WHERE `status` LIKE 'wait' "); $sth->execute(); $sth->finish(); } else { $news_list="<p>Íîâîñòè íå íàéäåíû.</p>"; } $text=<<HTML; <p>Äîáðûé äåíü!</p> <p>Ïîñëåäíèå íîâîñòè:</p> $news_list <p>Ñ Óâàæåíèåì ê Âàì,<br>Êîìàíäà TimeRinger.ru</p> HTML $subject="Ïîñëåäíèå íîâîñòè"; $usr_to_email="zyuzin\@list.ru, fproml\@mail.ru"; SendMessage($usr_to_email, "TimeRinger.ru <support\@timeringer.ru>", "$subject", $text); } sub SendMessage { my ($sendto, $sendfrom, $subject, $text) = @_; $mailprog = "/usr/sbin/sendmail -t"; open (OUTMAIL,"| $mailprog") || die "Îøèáêà â ïðîãðàììå ïî÷òû"; print OUTMAIL "To: $sendto\n"; print OUTMAIL "Subject: $subject\n"; print OUTMAIL "From: $sendfrom\n"; print OUTMAIL "Content-Type: text/html; charset=\"windows-1251\"\n\n"; print OUTMAIL "<html><head></head><body>"; print OUTMAIL "$text"; print OUTMAIL "</body></html>"; close(OUTMAIL); return "Âàøå ñîîáùåíèå óñïåøíî îòïðàâëåíî."; } exit 0; | | Âñåãî çàïèñåé: 403 | Çàðåãèñòð. 13-12-2003 | Îòïðàâëåíî: 12:33 04-06-2014 | Èñïðàâëåíî: VasyOK, 12:34 04-06-2014 |
|