Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- <?php
- error_reporting(E_ALL | E_STRICT); ini_set('display_errors', TRUE); ini_set('display_startup_errors', TRUE);
- function getUrl($url) {
- $header[0] = "Accept: text/xml,application/xml,application/xhtml+xml, text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5";
- $header[] = "Cache-Control: max-age=0";
- $header[] = "Connection: keep-alive";
- $header[] = "Keep-Alive: 300";
- $header[] = "Accept-Charset: ISO-8859-1,utf-8;q=0.7,*;q=0.7";
- $header[] = "Accept-Language: en-us,en;q=0.5";
- curl_setopt($curl, CURLOPT_URL, $url);
- curl_setopt($curl, CURLOPT_USERAGENT, 'Mozilla/5.0 (X11; U; Linux x86_64; en-US) AppleWebKit/534.3 (KHTML, like Gecko) Ubuntu/10.04 Chromium/6.0.472.53 Chrome/6.0.472.53 Safari/534.3');
- curl_setopt($curl, CURLOPT_HTTPHEADER, $header);
- curl_setopt($curl, CURLOPT_RETURNTRANSFER, true); // very important to set it to true, otherwise the content will be not be saved to string
- $html = curl_exec($curl); // execute the curl command
- return $html;
- }
- foreach($links as $url) {
- $html = getUrl($url); // the function from Step 3
- $author = getAuthor($html); // getAuthor is the function that parses the HTML and returns the name of the author.
- addAuthorToDB($author); // put it to your DataBase
- sleep(1); // one second break
- echo $author."\n"; // it's good to see the output while the screen is running
- }
- ?>
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement