diff --git a/README.md b/README.md index 0525bba..0e18e28 100644 --- a/README.md +++ b/README.md @@ -72,6 +72,9 @@ Httpful highly encourages sending in pull requests. When submitting a pull requ # Changelog +## 0.2.18 +- FEATURE [Allow to limit the amount of data retrieved](https://github.com/nategood/httpful/issues/137) + ## 0.2.17 - FEATURE [PR #144](https://github.com/nategood/httpful/pull/144) Adds additional parameter to the Response class to specify additional meta data about the request/response (e.g. number of redirect). @@ -201,4 +204,3 @@ Httpful highly encourages sending in pull requests. When submitting a pull requ - Created AbstractMimeHandler type that all Mime Handlers must extend - Pulled out the parsing/serializing logic from the Request/Response classes into their own MimeHandler classes - Added ability to register new mime handlers for mime types - diff --git a/composer.json b/composer.json index 52329c4..48e87ec 100644 --- a/composer.json +++ b/composer.json @@ -4,12 +4,17 @@ "homepage": "http://github.com/nategood/httpful", "license": "MIT", "keywords": ["http", "curl", "rest", "restful", "api", "requests"], - "version": "0.2.17", + "version": "0.2.18", "authors": [ { "name": "Nate Good", "email": "me@nategood.com", "homepage": "http://nategood.com" + }, + { + "name": "Diego Zanella", + "email": "admin@aelia.co", + "homepage": "http://aelia.co" } ], "require": { diff --git a/src/Httpful/Request.php b/src/Httpful/Request.php index 65ba63d..c582fc4 100755 --- a/src/Httpful/Request.php +++ b/src/Httpful/Request.php @@ -61,6 +61,11 @@ class Request // Template Request object private static $_template; + // @var int The maximum amount of data to retrieve. + protected $download_limit; + // @var string The data retrieved by the CURL request. Used only a download limit is set. + protected $retrieved_data; + /** * We made the constructor private to force the factory style. This was * done to keep the syntax cleaner and better the support the idea of @@ -201,14 +206,43 @@ public function send() $result = curl_exec($this->_ch); if ($result === false) { - if ($curlErrorNumber = curl_errno($this->_ch)) { - $curlErrorString = curl_error($this->_ch); - $this->_error($curlErrorString); - throw new ConnectionErrorException('Unable to connect: ' . $curlErrorNumber . ' ' . $curlErrorString); + $curlErrorNumber = curl_errno($this->_ch); + + // If error number is CURLE_WRITE_ERROR, it may just be that we hit + // the download limit. In such case, we can get the data retrieved so + // far and carry on. No need to throw an exception, as we are not + // dealing with an actual error. + if(($curlErrorNumber == CURLE_WRITE_ERROR) && + ($this->download_limit > 0)) { + $result = true; + } + else { + // Any other error number represents an actual error + if ($curlErrorNumber) { + $curlErrorString = curl_error($this->_ch); + $this->_error($curlErrorString); + throw new ConnectionErrorException('Unable to connect: ' . $curlErrorNumber . ' ' . $curlErrorString); + } + + $this->_error('Unable to connect.'); + throw new ConnectionErrorException('Unable to connect.'); } + } - $this->_error('Unable to connect.'); - throw new ConnectionErrorException('Unable to connect.'); + /* Result can be "true" in two cases: + * - When download limit is greater than zero, and the limit set was + * larger than the page size (i.e. the whole page was fetched, despite + * the limit). + * - When download limit is greater than zero and error CURLE_WRITE_ERROR + * was raised (i.e. the transfer was interrupted because the limit was + * reached). + * + * In both cases, the data is actually stored in $this->retrieved_data, + * therefore it must be put back inside $result, where the library + * expects to find it. + */ + if($result === true) { + $result = $this->retrieved_data; } $info = curl_getinfo($this->_ch); @@ -1135,4 +1169,51 @@ public static function options($uri) { return self::init(Http::OPTIONS)->uri($uri); } + + /** + * Allows to limit the size of retrieved data. Useful when you only need to + * get the headers of a page, as remote servers usually don't honour the + * "range" header in HTTP requests. + * + * IMPORTANT: setting the limit too low will cause the request to fail, because + * the response will not contain the headers and body expected by the parser. + * The minimum value should be at least 1000, to ensure that all headers are + * retrieved and the parsing can succeed. + * + * @param int size The amount of data to retrieve, in bytes. + * @return Httpful\Request + */ + public function limit($size) { + if((int)$size > 0) { + $this->download_limit = $size; + $this->retrieved_data = ''; + $this->addOnCurlOption(CURLOPT_BINARYTRANSFER, 1); + $this->addOnCurlOption(CURLOPT_WRITEFUNCTION, array($this, 'downloadLimiter')); + } + + return $this; + } + + /** + * Callback for the Request::limit() method. This method keeps track of the + * data retrieved, and interrupts the transfer once the limit has been + * reached. + * + * @param object ch The CURL handle. + * @param string chunk A chunk of the data retrieved by the CURL request. + * @return int The length of the retrieved chunk, or -1 if the limit has + * been reached. + */ + public function downloadLimiter($ch, $chunk) { + $len = strlen($this->retrieved_data) + strlen($chunk); + if($len >= $this->download_limit) { + $this->retrieved_data .= substr($chunk, 0, $this->download_limit - strlen($this->retrieved_data)); + //echo strlen($this->retrieved_data) , ' ', $this->retrieved_data; + return -1; + } + + $this->retrieved_data .= $chunk; + + return strlen($chunk); + } }