Advertisement
zero50x

Общая схема паука curl

Apr 17th, 2014
106
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
PHP 1.35 KB | None | 0 0
  1. <?php
  2. error_reporting(E_ALL | E_STRICT); ini_set('display_errors', TRUE); ini_set('display_startup_errors', TRUE);
  3.  
  4. function getUrl($url) {
  5.     $header[0] = "Accept: text/xml,application/xml,application/xhtml+xml, text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5";
  6.     $header[] = "Cache-Control: max-age=0";
  7.     $header[] = "Connection: keep-alive";
  8.     $header[] = "Keep-Alive: 300";
  9.     $header[] = "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7";
  10.     $header[] = "Accept-Language: en-us,en;q=0.5";
  11.  
  12.     curl_setopt($curl, CURLOPT_URL, $url);
  13.     curl_setopt($curl, CURLOPT_USERAGENT, 'Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Ubuntu/10.04 Chromium/6.0.472.53 Chrome/6.0.472.53 Safari/534.3');
  14.     curl_setopt($curl, CURLOPT_HTTPHEADER, $header);
  15.     curl_setopt($curl, CURLOPT_RETURNTRANSFER, true); // very important to set it to true, otherwise the content will be not be saved to string
  16.     $html = curl_exec($curl); // execute the curl command
  17.     return $html;
  18. }
  19.  
  20. foreach($links as $url) {
  21.     $html = getUrl($url); // the function from Step 3
  22.     $author = getAuthor($html); // getAuthor is the function that parses the HTML and returns the name of the author.
  23.     addAuthorToDB($author); // put it to your DataBase
  24.     sleep(1); // one second break
  25.     echo $author."\n"; // it's good to see the output while the screen is running
  26. }
  27.  
  28. ?>
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement