gnu-social/lib/httpclient.php

406 lines
14 KiB
PHP

<?php
/**
* StatusNet, the distributed open-source microblogging tool
*
* Utility for doing HTTP-related things
*
* PHP version 5
*
* LICENCE: This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* @category Action
* @package StatusNet
* @author Evan Prodromou <evan@status.net>
* @copyright 2009 StatusNet, Inc.
* @license http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0
* @link http://status.net/
*/
if (!defined('GNUSOCIAL')) { exit(1); }
/**
* Useful structure for HTTP responses
*
* We make HTTP calls in several places, and we have several different
* ways of doing them. This class hides the specifics of what underlying
* library (curl or PHP-HTTP or whatever) that's used.
*
* This extends the HTTP_Request2_Response class with methods to get info
* about any followed redirects.
*
* Originally used the name 'HTTPResponse' to match earlier code, but
* this conflicts with a class in in the PECL HTTP extension.
*
* @category HTTP
* @package StatusNet
* @author Evan Prodromou <evan@status.net>
* @author Brion Vibber <brion@status.net>
* @license http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0
* @link http://status.net/
*/
class GNUsocial_HTTPResponse extends HTTP_Request2_Response
{
function __construct(HTTP_Request2_Response $response, $url, $redirects=0)
{
foreach (get_object_vars($response) as $key => $val) {
$this->$key = $val;
}
$this->url = strval($url);
$this->redirectCount = intval($redirects);
}
/**
* Get the count of redirects that have been followed, if any.
* @return int
*/
function getRedirectCount()
{
return $this->redirectCount;
}
/**
* Gets the target URL, before any redirects. Use getEffectiveUrl() for final target.
* @return string URL
*/
function getUrl()
{
return $this->url;
}
/**
* Check if the response is OK, generally a 200 or other 2xx status code.
* @return bool
*/
function isOk()
{
$status = $this->getStatus();
return ($status >= 200 && $status < 300);
}
}
/**
* Utility class for doing HTTP client stuff
*
* We make HTTP calls in several places, and we have several different
* ways of doing them. This class hides the specifics of what underlying
* library (curl or PHP-HTTP or whatever) that's used.
*
* This extends the PEAR HTTP_Request2 package:
* - sends StatusNet-specific User-Agent header
* - 'follow_redirects' config option, defaulting on
* - 'max_redirs' config option, defaulting to 10
* - extended response class adds getRedirectCount() and getUrl() methods
* - get() and post() convenience functions return body content directly
*
* @category HTTP
* @package StatusNet
* @author Evan Prodromou <evan@status.net>
* @author Brion Vibber <brion@status.net>
* @license http://www.fsf.org/licensing/licenses/agpl-3.0.html GNU Affero General Public License version 3.0
* @link http://status.net/
*/
class HTTPClient extends HTTP_Request2
{
function __construct($url=null, $method=self::METHOD_GET, $config=array())
{
if (is_int(common_config('http', 'timeout'))) {
// Reasonably you shouldn't set http/timeout to 0 because of
// malicious remote servers that can cause infinitely long
// responses... But the default in HTTP_Request2 is 0 for
// some reason and should probably be considered a valid value.
$this->config['timeout'] = common_config('http', 'timeout');
} else {
common_log(LOG_ERR, 'config option http/timeout is not an integer value: '._ve(common_config('http', 'timeout')));
}
if (!empty(common_config('http', 'connect_timeout'))) {
$this->config['connect_timeout'] = common_config('http', 'connect_timeout');
}
$this->config['max_redirs'] = 10;
$this->config['follow_redirects'] = true;
// We've had some issues with keepalive breaking with
// HEAD requests, such as to youtube which seems to be
// emitting chunked encoding info for an empty body
// instead of not emitting anything. This may be a
// bug on YouTube's end, but the upstream libray
// ought to be investigated to see if we can handle
// it gracefully in that case as well.
$this->config['protocol_version'] = '1.0';
// Default state of OpenSSL seems to have no trusted
// SSL certificate authorities, which breaks hostname
// verification and means we have a hard time communicating
// with other sites' HTTPS interfaces.
//
// Turn off verification unless we've configured a CA bundle.
if (common_config('http', 'ssl_cafile')) {
$this->config['ssl_cafile'] = common_config('http', 'ssl_cafile');
} else {
$this->config['ssl_verify_peer'] = false;
}
// This means "verify the cert hostname against what we connect to", it does not
// imply CA trust or anything like that. Just the hostname.
$this->config['ssl_verify_host'] = common_config('http', 'ssl_verify_host');
if (common_config('http', 'curl') && extension_loaded('curl')) {
$this->config['adapter'] = 'HTTP_Request2_Adapter_Curl';
}
foreach (array('host', 'port', 'user', 'password', 'auth_scheme') as $cf) {
$k = 'proxy_'.$cf;
$v = common_config('http', $k);
if (!empty($v)) {
$this->config[$k] = $v;
}
}
parent::__construct($url, $method, $config);
$this->setHeader('User-Agent', self::userAgent());
}
/**
* Convenience/back-compat instantiator
* @return HTTPClient
*/
public static function start()
{
return new HTTPClient();
}
/**
* Quick static function to GET a URL
*/
public static function quickGet($url, $accept=null, array $params=array(), array $headers=array())
{
if (!empty($params)) {
$params = http_build_query($params, null, '&');
if (strpos($url, '?') === false) {
$url .= '?' . $params;
} else {
$url .= '&' . $params;
}
}
$client = new HTTPClient();
if (!is_null($accept)) {
$client->setHeader('Accept', $accept);
}
$response = $client->get($url, $headers);
if (!$response->isOk()) {
// TRANS: Exception. %s is the URL we tried to GET.
throw new Exception(sprintf(_m('Could not GET URL %s.'), $url), $response->getStatus());
}
return $response->getBody();
}
public static function quickGetJson($url, $params=array())
{
$data = json_decode(self::quickGet($url, null, $params));
if (is_null($data)) {
common_debug('Could not decode JSON data from URL: '.$url);
throw new ServerException('Could not decode JSON data from URL');
}
return $data;
}
/**
* If you want an Accept header, put it in $headers
*/
public static function quickHead($url, array $params=array(), array $headers=array())
{
if (!empty($params)) {
$params = http_build_query($params, null, '&');
if (strpos($url, '?') === false) {
$url .= '?' . $params;
} else {
$url .= '&' . $params;
}
}
$client = new HTTPClient();
$response = $client->head($url, $headers);
if (!$response->isOk()) {
// TRANS: Exception. %s is the URL we tried to GET.
throw new Exception(sprintf(_m('Could not GET URL %s.'), $url), $response->getStatus());
}
return $response->getHeader();
}
/**
* Convenience function to run a GET request.
*
* @return GNUsocial_HTTPResponse
* @throws HTTP_Request2_Exception
*/
public function get($url, $headers=array())
{
return $this->doRequest($url, self::METHOD_GET, $headers);
}
/**
* Convenience function to run a HEAD request.
*
* NOTE: Will probably turn into a GET request if you let it follow redirects!
* That option is only there to be flexible and may be removed in the future!
*
* @return GNUsocial_HTTPResponse
* @throws HTTP_Request2_Exception
*/
public function head($url, $headers=array(), $follow_redirects=false)
{
// Save the configured value for follow_redirects
$old_follow = $this->config['follow_redirects'];
try {
// Temporarily (possibly) override the follow_redirects setting
$this->config['follow_redirects'] = $follow_redirects;
return $this->doRequest($url, self::METHOD_HEAD, $headers);
} catch (Exception $e) {
// Let the exception go on its merry way.
throw $e;
} finally {
// reset to the old value
$this->config['follow_redirects'] = $old_follow;
}
//we've either returned or thrown exception here
}
/**
* Convenience function to POST form data.
*
* @param string $url
* @param array $headers optional associative array of HTTP headers
* @param array $data optional associative array or blob of form data to submit
* @return GNUsocial_HTTPResponse
* @throws HTTP_Request2_Exception
*/
public function post($url, $headers=array(), $data=array())
{
if ($data) {
$this->addPostParameter($data);
}
return $this->doRequest($url, self::METHOD_POST, $headers);
}
/**
* @param string $url The URL including possible querystring
* @param string $method The HTTP method to use
* @param array $headers List of already formatted strings
* (not an associative array, to allow
* multiple same-named headers)
*
* @return GNUsocial_HTTPResponse
* @throws HTTP_Request2_Exception
*/
protected function doRequest($url, $method, array $headers=array())
{
$this->setUrl($url);
// Workaround for HTTP_Request2 not setting up SNI in socket contexts;
// This fixes cert validation for SSL virtual hosts using SNI.
// Requires PHP 5.3.2 or later and OpenSSL with SNI support.
if ($this->url->getScheme() == 'https' && defined('OPENSSL_TLSEXT_SERVER_NAME')) {
$this->config['ssl_SNI_enabled'] = true;
$this->config['ssl_SNI_server_name'] = $this->url->getHost();
}
$this->setMethod($method);
foreach ($headers as $header) {
$this->setHeader($header);
}
$response = $this->send();
if (is_null($response)) {
// TRANS: Failed to retrieve a remote web resource, %s is the target URL.
throw new NoHttpResponseException($url);
}
return $response;
}
protected function log($level, $detail) {
$method = $this->getMethod();
$url = $this->getUrl();
common_log($level, __CLASS__ . ": HTTP $method $url - $detail");
}
/**
* Pulls up GNU Social's customized user-agent string, so services
* we hit can track down the responsible software.
*
* @return string
*/
static public function userAgent()
{
return GNUSOCIAL_ENGINE . '/' . GNUSOCIAL_VERSION
. ' (' . GNUSOCIAL_CODENAME . ')';
}
/**
* Actually performs the HTTP request and returns a
* GNUsocial_HTTPResponse object with response body and header info.
*
* Wraps around parent send() to add logging and redirection processing.
*
* @return GNUsocial_HTTPResponse
* @throw HTTP_Request2_Exception
*/
public function send()
{
$maxRedirs = intval($this->config['max_redirs']);
if (empty($this->config['max_redirs'])) {
$maxRedirs = 0;
}
$redirs = 0;
$redirUrls = array();
do {
try {
$response = parent::send();
} catch (Exception $e) {
$this->log(LOG_ERR, $e->getMessage());
throw $e;
}
$code = $response->getStatus();
$effectiveUrl = $response->getEffectiveUrl();
$redirUrls[] = $effectiveUrl;
$response->redirUrls = $redirUrls;
if ($code >= 200 && $code < 300) {
$reason = $response->getReasonPhrase();
$this->log(LOG_INFO, "$code $reason");
} elseif ($code >= 300 && $code < 400) {
$url = $this->getUrl();
$target = $response->getHeader('Location');
if (++$redirs >= $maxRedirs) {
common_log(LOG_ERR, __CLASS__ . ": Too many redirects: skipping $code redirect from $url to $target");
break;
}
try {
$this->setUrl($target);
$this->setHeader('Referer', $url);
common_log(LOG_INFO, __CLASS__ . ": Following $code redirect from $url to $target");
continue;
} catch (HTTP_Request2_Exception $e) {
common_log(LOG_ERR, __CLASS__ . ": Invalid $code redirect from $url to $target");
}
} else {
$reason = $response->getReasonPhrase();
$this->log(LOG_ERR, "$code $reason");
}
break;
} while ($maxRedirs);
return new GNUsocial_HTTPResponse($response, $this->getUrl(), $redirs);
}
}