From 6b887728b2c84fec39a576e6f2a76909b6318774 Mon Sep 17 00:00:00 2001 From: Zach Copley Date: Wed, 17 Feb 2010 19:24:38 +0000 Subject: [PATCH 01/62] Better logging for Twitter bridge account linking process --- .../TwitterBridge/twitterauthorization.php | 31 +++++++++++++------ 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/plugins/TwitterBridge/twitterauthorization.php b/plugins/TwitterBridge/twitterauthorization.php index c154932bbc..8bfdacee91 100644 --- a/plugins/TwitterBridge/twitterauthorization.php +++ b/plugins/TwitterBridge/twitterauthorization.php @@ -131,8 +131,7 @@ class TwitterauthorizationAction extends Action } else if ($this->arg('connect')) { $this->connectNewUser(); } else { - common_debug('Twitter Connect Plugin - ' . - print_r($this->args, true)); + common_debug('Twitter bridge - ' . print_r($this->args, true)); $this->showForm(_('Something weird happened.'), $this->trimmed('newname')); } @@ -172,9 +171,15 @@ class TwitterauthorizationAction extends Action $auth_link = $client->getAuthorizeLink($req_tok, $this->signin); } catch (OAuthClientException $e) { - $msg = sprintf('OAuth client error - code: %1s, msg: %2s', - $e->getCode(), $e->getMessage()); - $this->serverError(_m('Couldn\'t link your Twitter account.')); + $msg = sprintf( + 'OAuth client error - code: %1s, msg: %2s', + $e->getCode(), + $e->getMessage() + ); + common_log(LOG_INFO, 'Twitter bridge - ' . $msg); + $this->serverError( + _m('Couldn\'t link your Twitter account: ') . $e->getMessage() + ); } common_redirect($auth_link); @@ -192,7 +197,9 @@ class TwitterauthorizationAction extends Action // token we sent them if ($_SESSION['twitter_request_token'] != $this->oauth_token) { - $this->serverError(_m('Couldn\'t link your Twitter account.')); + $this->serverError( + _m('Couldn\'t link your Twitter account: oauth_token mismatch.') + ); } $twitter_user = null; @@ -212,9 +219,15 @@ class TwitterauthorizationAction extends Action $twitter_user = $client->verifyCredentials(); } catch (OAuthClientException $e) { - $msg = sprintf('OAuth client error - code: %1$s, msg: %2$s', - $e->getCode(), $e->getMessage()); - $this->serverError(_m('Couldn\'t link your Twitter account.')); + $msg = sprintf( + 'OAuth client error - code: %1$s, msg: %2$s', + $e->getCode(), + $e->getMessage() + ); + common_log(LOG_INFO, 'Twitter bridge - ' . $msg); + $this->serverError( + _m('Couldn\'t link your Twitter account: ') . $e-getMessage() + ); } if (common_logged_in()) { From c498f6e1ba50d8c17cd1d8698f05f7604d5cfcf8 Mon Sep 17 00:00:00 2001 From: Zach Copley Date: Wed, 17 Feb 2010 20:53:16 +0000 Subject: [PATCH 02/62] Twitter bridge - fix for Ticket #2192 --- plugins/TwitterBridge/twitter.php | 14 +++++++++----- plugins/TwitterBridge/twitterauthorization.php | 6 +++--- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/plugins/TwitterBridge/twitter.php b/plugins/TwitterBridge/twitter.php index e5afde62ca..ceb83b037f 100644 --- a/plugins/TwitterBridge/twitter.php +++ b/plugins/TwitterBridge/twitter.php @@ -1,7 +1,7 @@ delete(); - if ($result != false) { - common_log(LOG_INFO, - "Twitter bridge - removed old Twitter user: $screen_name ($twitter_id)."); + if (!empty($luser)) { + $result = $luser->delete(); + if ($result != false) { + common_log( + LOG_INFO, + "Twitter bridge - removed old Twitter user: $screen_name ($twitter_id)." + ); + } } $fuser = new Foreign_user(); diff --git a/plugins/TwitterBridge/twitterauthorization.php b/plugins/TwitterBridge/twitterauthorization.php index 8bfdacee91..cabf69d7a8 100644 --- a/plugins/TwitterBridge/twitterauthorization.php +++ b/plugins/TwitterBridge/twitterauthorization.php @@ -178,7 +178,7 @@ class TwitterauthorizationAction extends Action ); common_log(LOG_INFO, 'Twitter bridge - ' . $msg); $this->serverError( - _m('Couldn\'t link your Twitter account: ') . $e->getMessage() + _m('Couldn\'t link your Twitter account.') ); } @@ -226,7 +226,7 @@ class TwitterauthorizationAction extends Action ); common_log(LOG_INFO, 'Twitter bridge - ' . $msg); $this->serverError( - _m('Couldn\'t link your Twitter account: ') . $e-getMessage() + _m('Couldn\'t link your Twitter account.') ); } @@ -292,7 +292,7 @@ class TwitterauthorizationAction extends Action if (empty($flink_id)) { common_log_db_error($flink, 'INSERT', __FILE__); - $this->serverError(_('Couldn\'t link your Twitter account.')); + $this->serverError(_('Couldn\'t link your Twitter account.')); } return $flink_id; From c201baffbfbf812ecba504e6829dd9e9d17a4bac Mon Sep 17 00:00:00 2001 From: Zach Copley Date: Tue, 16 Feb 2010 06:12:08 +0000 Subject: [PATCH 03/62] Upgrade Twitter bridge to use OAuth 1.0a. It's more secure, and allows us to automatically send in a callback url instead of having to manually configure one for each StatusNet instance. --- lib/oauthclient.php | 88 ++++++++++++++----- .../TwitterBridge/twitterauthorization.php | 10 +-- plugins/TwitterBridge/twitteroauthclient.php | 28 ++++++ 3 files changed, 98 insertions(+), 28 deletions(-) diff --git a/lib/oauthclient.php b/lib/oauthclient.php index b22fd78974..bc7587183b 100644 --- a/lib/oauthclient.php +++ b/lib/oauthclient.php @@ -90,20 +90,47 @@ class OAuthClient /** * Gets a request token from the given url * - * @param string $url OAuth endpoint for grabbing request tokens + * @param string $url OAuth endpoint for grabbing request tokens + * @param string $callback authorized request token callback * * @return OAuthToken $token the request token */ - function getRequestToken($url) + function getRequestToken($url, $callback = null) { - $response = $this->oAuthGet($url); + $params = null; + + if (!is_null($callback)) { + $params['oauth_callback'] = $callback; + } + + $response = $this->oAuthGet($url, $params); + $arr = array(); parse_str($response, $arr); - if (isset($arr['oauth_token']) && isset($arr['oauth_token_secret'])) { - $token = new OAuthToken($arr['oauth_token'], @$arr['oauth_token_secret']); + + $token = $arr['oauth_token']; + $secret = $arr['oauth_token_secret']; + $confirm = $arr['oauth_callback_confirmed']; + + if (isset($token) && isset($secret)) { + + $token = new OAuthToken($token, $secret); + + if (isset($confirm)) { + if ($confirm == 'true') { + common_debug('Twitter bridge - callback confirmed.'); + return $token; + } else { + throw new OAuthClientException( + 'Callback was not confirmed by Twitter.' + ); + } + } return $token; } else { - throw new OAuthClientException(); + throw new OAuthClientException( + 'Could not get a request token from Twitter.' + ); } } @@ -113,49 +140,64 @@ class OAuthClient * * @param string $url endpoint for authorizing request tokens * @param OAuthToken $request_token the request token to be authorized - * @param string $oauth_callback optional callback url * * @return string $authorize_url the url to redirect to */ - function getAuthorizeLink($url, $request_token, $oauth_callback = null) + function getAuthorizeLink($url, $request_token) { $authorize_url = $url . '?oauth_token=' . $request_token->key; - if (isset($oauth_callback)) { - $authorize_url .= '&oauth_callback=' . urlencode($oauth_callback); - } - return $authorize_url; } /** * Fetches an access token * - * @param string $url OAuth endpoint for exchanging authorized request tokens - * for access tokens + * @param string $url OAuth endpoint for exchanging authorized request tokens + * for access tokens + * @param string $verifier 1.0a verifier * * @return OAuthToken $token the access token */ - function getAccessToken($url) + function getAccessToken($url, $verifier = null) { - $response = $this->oAuthPost($url); - parse_str($response); - $token = new OAuthToken($oauth_token, $oauth_token_secret); - return $token; + $params = array(); + + if (!is_null($verifier)) { + $params['oauth_verifier'] = $verifier; + } + + $response = $this->oAuthPost($url, $params); + + $arr = array(); + parse_str($response, $arr); + + $token = $arr['oauth_token']; + $secret = $arr['oauth_token_secret']; + + if (isset($token) && isset($secret)) { + $token = new OAuthToken($token, $secret); + return $token; + } else { + throw new OAuthClientException( + 'Could not get a access token from Twitter.' + ); + } } /** - * Use HTTP GET to make a signed OAuth request + * Use HTTP GET to make a signed OAuth requesta * - * @param string $url OAuth endpoint + * @param string $url OAuth request token endpoint + * @param array $params additional parameters * * @return mixed the request */ - function oAuthGet($url) + function oAuthGet($url, $params = null) { $request = OAuthRequest::from_consumer_and_token($this->consumer, - $this->token, 'GET', $url, null); + $this->token, 'GET', $url, $params); $request->sign_request($this->sha1_method, $this->consumer, $this->token); diff --git a/plugins/TwitterBridge/twitterauthorization.php b/plugins/TwitterBridge/twitterauthorization.php index 6822d33dd1..c154932bbc 100644 --- a/plugins/TwitterBridge/twitterauthorization.php +++ b/plugins/TwitterBridge/twitterauthorization.php @@ -56,6 +56,7 @@ class TwitterauthorizationAction extends Action var $tw_fields = null; var $access_token = null; var $signin = null; + var $verifier = null; /** * Initialize class members. Looks for 'oauth_token' parameter. @@ -70,6 +71,7 @@ class TwitterauthorizationAction extends Action $this->signin = $this->boolean('signin'); $this->oauth_token = $this->arg('oauth_token'); + $this->verifier = $this->arg('oauth_verifier'); return true; } @@ -160,8 +162,7 @@ class TwitterauthorizationAction extends Action // Get a new request token and authorize it $client = new TwitterOAuthClient(); - $req_tok = - $client->getRequestToken(TwitterOAuthClient::$requestTokenURL); + $req_tok = $client->getRequestToken(); // Sock the request token away in the session temporarily @@ -171,7 +172,7 @@ class TwitterauthorizationAction extends Action $auth_link = $client->getAuthorizeLink($req_tok, $this->signin); } catch (OAuthClientException $e) { - $msg = sprintf('OAuth client cURL error - code: %1s, msg: %2s', + $msg = sprintf('OAuth client error - code: %1s, msg: %2s', $e->getCode(), $e->getMessage()); $this->serverError(_m('Couldn\'t link your Twitter account.')); } @@ -187,7 +188,6 @@ class TwitterauthorizationAction extends Action */ function saveAccessToken() { - // Check to make sure Twitter returned the same request // token we sent them @@ -204,7 +204,7 @@ class TwitterauthorizationAction extends Action // Exchange the request token for an access token - $atok = $client->getAccessToken(TwitterOAuthClient::$accessTokenURL); + $atok = $client->getAccessToken($this->verifier); // Test the access token and get the user's Twitter info diff --git a/plugins/TwitterBridge/twitteroauthclient.php b/plugins/TwitterBridge/twitteroauthclient.php index 277e7ab409..ba45b533dc 100644 --- a/plugins/TwitterBridge/twitteroauthclient.php +++ b/plugins/TwitterBridge/twitteroauthclient.php @@ -91,6 +91,19 @@ class TwitterOAuthClient extends OAuthClient } } + /** + * Gets a request token from Twitter + * + * @return OAuthToken $token the request token + */ + function getRequestToken() + { + return parent::getRequestToken( + self::$requestTokenURL, + common_local_url('twitterauthorization') + ); + } + /** * Builds a link to Twitter's endpoint for authorizing a request token * @@ -107,6 +120,21 @@ class TwitterOAuthClient extends OAuthClient common_local_url('twitterauthorization')); } + /** + * Fetches an access token from Twitter + * + * @param string $verifier 1.0a verifier + * + * @return OAuthToken $token the access token + */ + function getAccessToken($verifier = null) + { + return parent::getAccessToken( + self::$accessTokenURL, + $verifier + ); + } + /** * Calls Twitter's /account/verify_credentials API method * From 05c50499c31b80d2a1c41e5256fae96cc06252ad Mon Sep 17 00:00:00 2001 From: Zach Copley Date: Wed, 17 Feb 2010 19:24:38 +0000 Subject: [PATCH 04/62] Better logging for Twitter bridge account linking process --- .../TwitterBridge/twitterauthorization.php | 31 +++++++++++++------ 1 file changed, 22 insertions(+), 9 deletions(-) diff --git a/plugins/TwitterBridge/twitterauthorization.php b/plugins/TwitterBridge/twitterauthorization.php index c154932bbc..8bfdacee91 100644 --- a/plugins/TwitterBridge/twitterauthorization.php +++ b/plugins/TwitterBridge/twitterauthorization.php @@ -131,8 +131,7 @@ class TwitterauthorizationAction extends Action } else if ($this->arg('connect')) { $this->connectNewUser(); } else { - common_debug('Twitter Connect Plugin - ' . - print_r($this->args, true)); + common_debug('Twitter bridge - ' . print_r($this->args, true)); $this->showForm(_('Something weird happened.'), $this->trimmed('newname')); } @@ -172,9 +171,15 @@ class TwitterauthorizationAction extends Action $auth_link = $client->getAuthorizeLink($req_tok, $this->signin); } catch (OAuthClientException $e) { - $msg = sprintf('OAuth client error - code: %1s, msg: %2s', - $e->getCode(), $e->getMessage()); - $this->serverError(_m('Couldn\'t link your Twitter account.')); + $msg = sprintf( + 'OAuth client error - code: %1s, msg: %2s', + $e->getCode(), + $e->getMessage() + ); + common_log(LOG_INFO, 'Twitter bridge - ' . $msg); + $this->serverError( + _m('Couldn\'t link your Twitter account: ') . $e->getMessage() + ); } common_redirect($auth_link); @@ -192,7 +197,9 @@ class TwitterauthorizationAction extends Action // token we sent them if ($_SESSION['twitter_request_token'] != $this->oauth_token) { - $this->serverError(_m('Couldn\'t link your Twitter account.')); + $this->serverError( + _m('Couldn\'t link your Twitter account: oauth_token mismatch.') + ); } $twitter_user = null; @@ -212,9 +219,15 @@ class TwitterauthorizationAction extends Action $twitter_user = $client->verifyCredentials(); } catch (OAuthClientException $e) { - $msg = sprintf('OAuth client error - code: %1$s, msg: %2$s', - $e->getCode(), $e->getMessage()); - $this->serverError(_m('Couldn\'t link your Twitter account.')); + $msg = sprintf( + 'OAuth client error - code: %1$s, msg: %2$s', + $e->getCode(), + $e->getMessage() + ); + common_log(LOG_INFO, 'Twitter bridge - ' . $msg); + $this->serverError( + _m('Couldn\'t link your Twitter account: ') . $e-getMessage() + ); } if (common_logged_in()) { From 73ba26efe3d9d97c478a507d351ac92d28d82655 Mon Sep 17 00:00:00 2001 From: Zach Copley Date: Wed, 17 Feb 2010 20:53:16 +0000 Subject: [PATCH 05/62] Twitter bridge - fix for Ticket #2192 --- plugins/TwitterBridge/twitter.php | 14 +++++++++----- plugins/TwitterBridge/twitterauthorization.php | 6 +++--- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/plugins/TwitterBridge/twitter.php b/plugins/TwitterBridge/twitter.php index e5afde62ca..ceb83b037f 100644 --- a/plugins/TwitterBridge/twitter.php +++ b/plugins/TwitterBridge/twitter.php @@ -1,7 +1,7 @@ delete(); - if ($result != false) { - common_log(LOG_INFO, - "Twitter bridge - removed old Twitter user: $screen_name ($twitter_id)."); + if (!empty($luser)) { + $result = $luser->delete(); + if ($result != false) { + common_log( + LOG_INFO, + "Twitter bridge - removed old Twitter user: $screen_name ($twitter_id)." + ); + } } $fuser = new Foreign_user(); diff --git a/plugins/TwitterBridge/twitterauthorization.php b/plugins/TwitterBridge/twitterauthorization.php index 8bfdacee91..cabf69d7a8 100644 --- a/plugins/TwitterBridge/twitterauthorization.php +++ b/plugins/TwitterBridge/twitterauthorization.php @@ -178,7 +178,7 @@ class TwitterauthorizationAction extends Action ); common_log(LOG_INFO, 'Twitter bridge - ' . $msg); $this->serverError( - _m('Couldn\'t link your Twitter account: ') . $e->getMessage() + _m('Couldn\'t link your Twitter account.') ); } @@ -226,7 +226,7 @@ class TwitterauthorizationAction extends Action ); common_log(LOG_INFO, 'Twitter bridge - ' . $msg); $this->serverError( - _m('Couldn\'t link your Twitter account: ') . $e-getMessage() + _m('Couldn\'t link your Twitter account.') ); } @@ -292,7 +292,7 @@ class TwitterauthorizationAction extends Action if (empty($flink_id)) { common_log_db_error($flink, 'INSERT', __FILE__); - $this->serverError(_('Couldn\'t link your Twitter account.')); + $this->serverError(_('Couldn\'t link your Twitter account.')); } return $flink_id; From ce6be4f83624d8c39a93d2b54567cc2f33580812 Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Wed, 17 Feb 2010 16:49:00 -0800 Subject: [PATCH 06/62] Queues: redid the breakout control model so we can start up and subscribe to queues without running through the complete site list, which is ok at 1k sites but too slow at 10k. All breakout queues that we're going to need to listen to now need to be explicitly listed in $config['queue']['breakout']. Until XMPP is moved to component model, this setting will let the individual processes work with their own queues: $config['queue']['breakout'][] = 'xmpp/xmppout/' . $config['site']['nickname']; --- lib/default.php | 11 ++-- lib/iomaster.php | 23 +++----- lib/stompqueuemanager.php | 117 ++++++++++++++++++++------------------ scripts/queuedaemon.php | 3 +- 4 files changed, 76 insertions(+), 78 deletions(-) diff --git a/lib/default.php b/lib/default.php index a74cccae12..c969c3b337 100644 --- a/lib/default.php +++ b/lib/default.php @@ -91,10 +91,13 @@ $default = 'spawndelay' => 1, // Wait at least N seconds between (re)spawns of child processes to avoid slamming the queue server with subscription startup 'debug_memory' => false, // true to spit memory usage to log 'inboxes' => true, // true to do inbox distribution & output queueing from in background via 'distrib' queue - 'breakout' => array('*' => 'shared'), // set global or per-handler queue breakout - // 'shared': use a shared queue for all sites - // 'handler': share each/this handler over multiple sites - // 'site': break out for each/this handler on this site + 'breakout' => array(), // List queue specifiers to break out when using Stomp queue. + // Default will share all queues for all sites within each group. + // Specify as / or //, + // using nickname identifier as site. + // + // 'main/distrib' separate "distrib" queue covering all sites + // 'xmpp/xmppout/mysite' separate "xmppout" queue covering just 'mysite' 'max_retries' => 10, // drop messages after N failed attempts to process (Stomp) 'dead_letter_dir' => false, // set to directory to save dropped messages into (Stomp) ), diff --git a/lib/iomaster.php b/lib/iomaster.php index 54e2dfe841..d20837ba54 100644 --- a/lib/iomaster.php +++ b/lib/iomaster.php @@ -55,27 +55,18 @@ abstract class IoMaster if ($multiSite !== null) { $this->multiSite = $multiSite; } - if ($this->multiSite) { - $this->sites = StatusNet::findAllSites(); - } else { - $this->sites = array(StatusNet::currentSite()); - } - if (empty($this->sites)) { - throw new Exception("Empty status_network table, cannot init"); - } - - foreach ($this->sites as $site) { - StatusNet::switchSite($site); - $this->initManagers(); - } + $this->initManagers(); } /** - * Initialize IoManagers for the currently configured site - * which are appropriate to this instance. + * Initialize IoManagers which are appropriate to this instance; + * pass class names or instances into $this->instantiate(). * - * Pass class names into $this->instantiate() + * If setup and configuration may vary between sites in multi-site + * mode, it's the subclass's responsibility to set them up here. + * + * Switching site configurations is an acceptable side effect. */ abstract function initManagers(); diff --git a/lib/stompqueuemanager.php b/lib/stompqueuemanager.php index bfeeb23b7f..9af8b2f482 100644 --- a/lib/stompqueuemanager.php +++ b/lib/stompqueuemanager.php @@ -63,7 +63,7 @@ class StompQueueManager extends QueueManager $this->password = common_config('queue', 'stomp_password'); $this->base = common_config('queue', 'queue_basename'); $this->control = common_config('queue', 'control_channel'); - $this->subscriptions = array($this->control => $this->control); + $this->breakout = common_config('queue', 'breakout'); } /** @@ -75,28 +75,6 @@ class StompQueueManager extends QueueManager return IoManager::INSTANCE_PER_PROCESS; } - /** - * Record queue subscriptions we'll need to handle the current site. - */ - public function addSite() - { - $this->sites[] = StatusNet::currentSite(); - - // Set up handlers active for this site... - $this->initialize(); - - foreach ($this->activeGroups as $group) { - if (isset($this->groups[$group])) { - // Actual queues may be broken out or consolidated... - // Subscribe to all the target queues we'll need. - foreach ($this->groups[$group] as $transport => $class) { - $target = $this->queueName($transport); - $this->subscriptions[$target] = $target; - } - } - } - } - /** * Optional; ping any running queue handler daemons with a notification * such as announcing a new site to handle or requesting clean shutdown. @@ -166,14 +144,15 @@ class StompQueueManager extends QueueManager $con = $this->cons[$idx]; $host = $con->getServer(); - $result = $con->send($this->queueName($queue), $msg, $props); + $target = $this->queueName($queue); + $result = $con->send($target, $msg, $props); if (!$result) { - $this->_log(LOG_ERR, "Error sending $rep to $queue queue on $host"); + $this->_log(LOG_ERR, "Error sending $rep to $queue queue on $host $target"); return false; } - $this->_log(LOG_DEBUG, "complete remote queueing $rep for $queue on $host"); + $this->_log(LOG_DEBUG, "complete remote queueing $rep for $queue on $host $target"); $this->stats('enqueued', $queue); return true; } @@ -432,11 +411,42 @@ class StompQueueManager extends QueueManager protected function doSubscribe(LiberalStomp $con) { $host = $con->getServer(); - foreach ($this->subscriptions as $queue) { - $this->_log(LOG_INFO, "Subscribing to $queue on $host"); - $con->subscribe($queue); + foreach ($this->subscriptions() as $sub) { + $this->_log(LOG_INFO, "Subscribing to $sub on $host"); + $con->subscribe($sub); } } + + /** + * Grab a full list of stomp-side queue subscriptions. + * Will include: + * - control broadcast channel + * - shared group queues for active groups + * - per-handler and per-site breakouts from $config['queue']['breakout'] + * that are rooted in the active groups. + * + * @return array of strings + */ + protected function subscriptions() + { + $subs = array(); + $subs[] = $this->control; + + foreach ($this->activeGroups as $group) { + $subs[] = $this->base . $group; + } + + foreach ($this->breakout as $spec) { + $parts = explode('/', $spec); + if (count($parts) < 2 || count($parts) > 3) { + common_log(LOG_ERR, "Bad queue breakout specifier $spec"); + } + if (in_array($parts[0], $this->activeGroups)) { + $subs[] = $this->base . $spec; + } + } + return array_unique($subs); + } /** * Handle and acknowledge an event that's come in through a queue. @@ -612,32 +622,26 @@ class StompQueueManager extends QueueManager } /** - * Set us up with queue subscriptions for a new site added at runtime, + * (Re)load runtime configuration for a given site by nickname, * triggered by a broadcast to the 'statusnet-control' topic. * + * Configuration changes in database should update, but config + * files might not. + * * @param array $frame Stomp frame * @return bool true to continue; false to stop further processing. */ protected function updateSiteConfig($nickname) { - if (empty($this->sites)) { - if ($nickname == common_config('site', 'nickname')) { - StatusNet::init(common_config('site', 'server')); - } else { - $this->_log(LOG_INFO, "Ignoring update ping for other site $nickname"); + $sn = Status_network::staticGet($nickname); + if ($sn) { + $this->switchSite($nickname); + if (!in_array($nickname, $this->sites)) { + $this->addSite(); } + $this->stats('siteupdate'); } else { - $sn = Status_network::staticGet($nickname); - if ($sn) { - $this->switchSite($nickname); - if (!in_array($nickname, $this->sites)) { - $this->addSite(); - } - // @fixme update subscriptions, if applicable - $this->stats('siteupdate'); - } else { - $this->_log(LOG_ERR, "Ignoring ping for unrecognized new site $nickname"); - } + $this->_log(LOG_ERR, "Ignoring ping for unrecognized new site $nickname"); } } @@ -646,24 +650,25 @@ class StompQueueManager extends QueueManager * group name for this queue to give eg: * * /queue/statusnet/main + * /queue/statusnet/main/distrib + * /queue/statusnet/xmpp/xmppout/site01 * * @param string $queue * @return string */ protected function queueName($queue) { - $base = common_config('queue', 'queue_basename'); $group = $this->queueGroup($queue); - $breakout = $this->breakoutMode($queue); - if ($breakout == 'shared') { - return $base . "$group"; - } else if ($breakout == 'handler') { - return $base . "$group/$queue"; - } else if ($breakout == 'site') { - $site = StatusNet::currentSite(); - return $base . "$group/$queue/$site"; + $site = StatusNet::currentSite(); + + $specs = array("$group/$queue/$site", + "$group/$queue"); + foreach ($specs as $spec) { + if (in_array($spec, $this->breakout)) { + return $this->base . $spec; + } } - throw Exception("Unrecognized queue breakout mode '$breakout' for '$queue'"); + return $this->base . $group; } /** diff --git a/scripts/queuedaemon.php b/scripts/queuedaemon.php index d372d898fa..6dba16f953 100755 --- a/scripts/queuedaemon.php +++ b/scripts/queuedaemon.php @@ -126,8 +126,7 @@ class QueueDaemon extends SpawningDaemon class QueueMaster extends IoMaster { /** - * Initialize IoManagers for the currently configured site - * which are appropriate to this instance. + * Initialize IoManagers which are appropriate to this instance. */ function initManagers() { From 80ef3946d016eeeef1682e73eddffb222d8db149 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Thu, 18 Feb 2010 06:36:32 -0500 Subject: [PATCH 07/62] more work on salmon --- plugins/OStatus/actions/salmon.php | 140 +++++++++++++++++++++++++++-- 1 file changed, 133 insertions(+), 7 deletions(-) diff --git a/plugins/OStatus/actions/salmon.php b/plugins/OStatus/actions/salmon.php index c79d09c95a..9ca0198266 100644 --- a/plugins/OStatus/actions/salmon.php +++ b/plugins/OStatus/actions/salmon.php @@ -62,20 +62,146 @@ class SalmonAction extends Action // XXX: check the signature $this->act = new Activity($dom->documentElement); + + return true; } function handle($args) { - common_log(LOG_DEBUG, 'Salmon: incoming post for user: '. $user_id); + common_log(LOG_INFO, 'Salmon: incoming post for user '. $this->user->id); // TODO : Insert new $xml -> notice code - switch ($this->act->verb) - { - case Activity::POST: - case Activity::SHARE: - case Activity::FAVORITE: - case Activity::FOLLOW: + if (Event::handle('StartHandleSalmon', array($this->user, $this->activity))) { + switch ($this->act->verb) + { + case ActivityVerb::POST: + $this->handlePost(); + break; + case ActivityVerb::SHARE: + $this->handleShare(); + break; + case ActivityVerb::FAVORITE: + $this->handleFavorite(); + break; + case ActivityVerb::FOLLOW: + case ActivityVerb::FRIEND: + $this->handleFollow(); + break; + } + Event::handle('EndHandleSalmon', array($this->user, $this->activity)); + } + } + + function handlePost() + { + switch ($this->act->object->type) { + case ActivityObject::ARTICLE: + case ActivityObject::BLOGENTRY: + case ActivityObject::NOTE: + case ActivityObject::STATUS: + case ActivityObject::COMMENT: + break; + default: + throw new Exception("Can't handle that kind of post."); + } + + $profile = $this->ensureProfile(); + } + + function handleFollow() + { + } + + function handleFavorite() + { + } + + function handleShare() + { + } + + function ensureProfile() + { + $actor = $this->act->actor; + + if (empty($actor->id)) { + throw new Exception("Received a salmon slap from unidentified actor."); + } + + $ostatusProfile = Ostatus_profile::staticGet('homeuri', $actor->id); + + if (empty($ostatusProfile)) { + return $this->createProfile(); + } else { + // XXX: can we receive a salmon slap from a group...? + assert(!empty($ostatusProfile->profile_id)); + return Profile::staticGet($ostatusProfile->profile_id); + } + } + + function createProfile() + { + $actor = $this->act->actor; + + $profile = new Profile(); + + $profile->nickname = $this->nicknameFromURI($actor->id); + + if (empty($profile->nickname)) { + $profile->nickname = common_nicknamize($actor->title); + } + + $profile->fullname = $actor->title; + $profile->bio = $actor->summary; // XXX: is that right? + $profile->profileurl = $actor->link; // XXX: is that right? + $profile->created = common_sql_now(); + + $id = $profile->insert(); + + if (empty($id)) { + common_log_db_error($profile, 'INSERT', __FILE__); + throw new Exception("Couldn't save new profile for $actor->id\n"); + } + + // XXX: add avatars + + $op = new Ostatus_profile(); + + $op->profile_id = $id; + $op->homeuri = $actor->id; + $op->created = $profile->created; + + // XXX: determine feed URI from source or Webfinger or whatever + + $id = $op->insert(); + + if (empty($id)) { + common_log_db_error($op, 'INSERT', __FILE__); + throw new Exception("Couldn't save new ostatus profile for $actor->id\n"); + } + + return $profile; + } + + function nicknameFromURI($uri) + { + preg_match('/(\w+):/', $uri, $matches); + + $protocol = $matches[1]; + + switch ($protocol) { + case 'acct': + case 'mailto': + if (preg_match("/^$protocol:(.*)?@.*\$/", $uri, $matches)) { + return common_canonical_nickname($matches[1]); + } + return null; + case 'http': + return common_url_to_nickname($uri); + break; + default: + return null; } } } From c2ba7645359242590c8ac60b66f012110ae889ef Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Thu, 18 Feb 2010 07:11:20 -0500 Subject: [PATCH 08/62] always distribute to inbox of author immediately --- classes/Notice.php | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/classes/Notice.php b/classes/Notice.php index b0edb6de60..7e2b8b4a69 100644 --- a/classes/Notice.php +++ b/classes/Notice.php @@ -681,7 +681,20 @@ class Notice extends Memcached_DataObject { $ni = $this->whoGets($groups, $recipients); - Inbox::bulkInsert($this->id, array_keys($ni)); + $ids = array_keys($ni); + + // We remove the author (if they're a local user), + // since we'll have already done this in distribute() + + $i = array_search($this->profile_id, $ids); + + if ($i !== false) { + unset($ids[$i]); + } + + // Bulk insert + + Inbox::bulkInsert($this->id, $ids); return; } @@ -1487,6 +1500,14 @@ class Notice extends Memcached_DataObject function distribute() { + // We always insert for the author so they don't + // have to wait + + $user = User::staticGet('id', $this->profile_id); + if (!empty($user)) { + Inbox::insertNotice($user->id, $this->id); + } + if (common_config('queue', 'inboxes')) { // If there's a failure, we want to _force_ // distribution at this point. From c36155e2381df5bc00a3f89e6e35768fb822960a Mon Sep 17 00:00:00 2001 From: Sarven Capadisli Date: Thu, 18 Feb 2010 18:12:08 +0100 Subject: [PATCH 09/62] Fixes long strings from not breaking --- theme/base/css/display.css | 1 + 1 file changed, 1 insertion(+) diff --git a/theme/base/css/display.css b/theme/base/css/display.css index 89fe810c69..380975e324 100644 --- a/theme/base/css/display.css +++ b/theme/base/css/display.css @@ -1035,6 +1035,7 @@ text-decoration:underline; .notice .entry-title { overflow:hidden; +word-wrap:break-word; } .notice .entry-title.ov { overflow:visible; From 310ac319957d437ab3bc0619419c750c98ebeaf0 Mon Sep 17 00:00:00 2001 From: Sarven Capadisli Date: Thu, 18 Feb 2010 18:12:47 +0100 Subject: [PATCH 10/62] Minor adjustment to edit icon position --- theme/default/css/display.css | 2 +- theme/identica/css/display.css | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/theme/default/css/display.css b/theme/default/css/display.css index a2f1013428..71470d55d2 100644 --- a/theme/default/css/display.css +++ b/theme/default/css/display.css @@ -306,7 +306,7 @@ background-position:5px -1181px; } .entity_edit a { -background-position: 5px -718px; +background-position: 5px -719px; } .entity_send-a-message a { background-position: 5px -852px; diff --git a/theme/identica/css/display.css b/theme/identica/css/display.css index e214047451..14a82a8def 100644 --- a/theme/identica/css/display.css +++ b/theme/identica/css/display.css @@ -305,7 +305,7 @@ background-position:5px -1181px; } .entity_edit a { -background-position: 5px -718px; +background-position: 5px -719px; } .entity_send-a-message a { background-position: 5px -852px; From 22ff358ba8d1fd0396136e1de570d788dd0727b6 Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Thu, 18 Feb 2010 18:20:48 +0000 Subject: [PATCH 11/62] OStatus sub/unsub updates: - fix for PuSH unsub verification - send Salmon notification on unsub --- lib/atom10entry.php | 5 +- lib/atom10feed.php | 11 +++- lib/atomnoticefeed.php | 8 +-- plugins/OStatus/OStatusPlugin.php | 55 +++++++++++++++++-- plugins/OStatus/actions/pushcallback.php | 2 +- plugins/OStatus/actions/pushhub.php | 9 ++- plugins/OStatus/actions/salmon.php | 2 + plugins/OStatus/classes/HubSub.php | 11 ++-- plugins/OStatus/classes/Ostatus_profile.php | 44 ++++++++------- plugins/OStatus/lib/activity.php | 6 ++ plugins/OStatus/lib/hubverifyqueuehandler.php | 3 +- 11 files changed, 108 insertions(+), 48 deletions(-) diff --git a/lib/atom10entry.php b/lib/atom10entry.php index 5710c80fc5..f8f16d5946 100644 --- a/lib/atom10entry.php +++ b/lib/atom10entry.php @@ -27,8 +27,7 @@ * @link http://status.net/ */ -if (!defined('STATUSNET') -{ +if (!defined('STATUSNET')) { exit(1); } @@ -87,7 +86,7 @@ class Atom10Entry extends XMLStringer * * @return void */ - function validate + function validate() { } diff --git a/lib/atom10feed.php b/lib/atom10feed.php index 14a3beb83e..5e17b20d3a 100644 --- a/lib/atom10feed.php +++ b/lib/atom10feed.php @@ -78,7 +78,7 @@ class Atom10Feed extends XMLStringer $this->authors = array(); $this->links = array(); $this->entries = array(); - $this->addNamespace('xmlns', 'http://www.w3.org/2005/Atom'); + $this->addNamespace('', 'http://www.w3.org/2005/Atom'); } /** @@ -162,7 +162,14 @@ class Atom10Feed extends XMLStringer { $this->xw->startDocument('1.0', 'UTF-8'); $commonAttrs = array('xml:lang' => 'en-US'); - $commonAttrs = array_merge($commonAttrs, $this->namespaces); + foreach ($this->namespaces as $prefix => $uri) { + if ($prefix == '') { + $attr = 'xmlns'; + } else { + $attr = 'xmlns:' . $prefix; + } + $commonAttrs[$attr] = $uri; + } $this->elementStart('feed', $commonAttrs); $this->element('id', null, $this->id); diff --git a/lib/atomnoticefeed.php b/lib/atomnoticefeed.php index b7a60bde6e..7653f91544 100644 --- a/lib/atomnoticefeed.php +++ b/lib/atomnoticefeed.php @@ -50,23 +50,23 @@ class AtomNoticeFeed extends Atom10Feed // Feeds containing notice info use these namespaces $this->addNamespace( - 'xmlns:thr', + 'thr', 'http://purl.org/syndication/thread/1.0' ); $this->addNamespace( - 'xmlns:georss', + 'georss', 'http://www.georss.org/georss' ); $this->addNamespace( - 'xmlns:activity', + 'activity', 'http://activitystrea.ms/spec/1.0/' ); // XXX: What should the uri be? $this->addNamespace( - 'xmlns:ostatus', + 'ostatus', 'http://ostatus.org/schema/1.0' ); } diff --git a/plugins/OStatus/OStatusPlugin.php b/plugins/OStatus/OStatusPlugin.php index b6c9fa1d4c..e548a151c7 100644 --- a/plugins/OStatus/OStatusPlugin.php +++ b/plugins/OStatus/OStatusPlugin.php @@ -112,7 +112,7 @@ class OStatusPlugin extends Plugin * Set up a PuSH hub link to our internal link for canonical timeline * Atom feeds for users and groups. */ - function onStartApiAtom(AtomNoticeFeed $feed) + function onStartApiAtom($feed) { $id = null; @@ -171,6 +171,12 @@ class OStatusPlugin extends Plugin { $base = dirname(__FILE__); $lower = strtolower($cls); + $map = array('activityverb' => 'activity', + 'activityobject' => 'activity', + 'activityutils' => 'activity'); + if (isset($map[$lower])) { + $lower = $map[$lower]; + } $files = array("$base/classes/$cls.php", "$base/lib/$lower.php"); if (substr($lower, -6) == 'action') { @@ -253,18 +259,45 @@ class OStatusPlugin extends Plugin } /** - * Garbage collect unused feeds on unsubscribe + * Notify remote server when one of our users subscribes. + * @fixme Check and restart the PuSH subscription if needed + * + * @param User $user + * @param Profile $other + * @return hook return value + */ + function onEndSubscribe($user, $other) + { + $oprofile = Ostatus_profile::staticGet('profile_id', $other->id); + if ($oprofile) { + // Notify the remote server of the unsub, if supported. + $oprofile->notify($user->getProfile(), ActivityVerb::FOLLOW, $oprofile); + } + return true; + } + + /** + * Notify remote server and garbage collect unused feeds on unsubscribe. + * @fixme send these operations to background queues + * + * @param User $user + * @param Profile $other + * @return hook return value */ function onEndUnsubscribe($user, $other) { - $profile = Ostatus_profile::staticGet('profile_id', $other->id); - if ($feed) { + $oprofile = Ostatus_profile::staticGet('profile_id', $other->id); + if ($oprofile) { + // Notify the remote server of the unsub, if supported. + $oprofile->notify($user->getProfile(), ActivityVerb::UNFOLLOW, $oprofile); + + // Drop the PuSH subscription if there are no other subscribers. $sub = new Subscription(); $sub->subscribed = $other->id; $sub->limit(1); if (!$sub->find(true)) { - common_log(LOG_INFO, "Unsubscribing from now-unused feed $feed->feeduri on hub $feed->huburi"); - $profile->unsubscribe(); + common_log(LOG_INFO, "Unsubscribing from now-unused feed $oprofile->feeduri on hub $oprofile->huburi"); + $oprofile->unsubscribe(); } } return true; @@ -290,6 +323,16 @@ class OStatusPlugin extends Plugin return true; } + /** + * Override the "from ostatus" bit in notice lists to link to the + * original post and show the domain it came from. + * + * @param Notice in $notice + * @param string out &$name + * @param string out &$url + * @param string out &$title + * @return mixed hook return code + */ function onStartNoticeSourceLink($notice, &$name, &$url, &$title) { if ($notice->source == 'ostatus') { diff --git a/plugins/OStatus/actions/pushcallback.php b/plugins/OStatus/actions/pushcallback.php index 388c8f9c3d..ed859a32f8 100644 --- a/plugins/OStatus/actions/pushcallback.php +++ b/plugins/OStatus/actions/pushcallback.php @@ -89,7 +89,7 @@ class PushCallbackAction extends Action if ($profile->verify_token !== $verify_token) { common_log(LOG_WARNING, __METHOD__ . ": bogus hub callback with bad token \"$verify_token\" for feed $topic"); - throw new ServerError("Bogus hub callback: bad token", 404); + throw new ServerException("Bogus hub callback: bad token", 404); } if ($mode != $profile->sub_state) { diff --git a/plugins/OStatus/actions/pushhub.php b/plugins/OStatus/actions/pushhub.php index 13ec09d528..19599d815f 100644 --- a/plugins/OStatus/actions/pushhub.php +++ b/plugins/OStatus/actions/pushhub.php @@ -83,6 +83,7 @@ class PushHubAction extends Action { $feed = $this->argUrl('hub.topic'); $callback = $this->argUrl('hub.callback'); + $token = $this->arg('hub.verify_token', null); common_log(LOG_DEBUG, __METHOD__ . ": checking sub'd to $feed $callback"); if ($this->getSub($feed, $callback)) { @@ -96,7 +97,6 @@ class PushHubAction extends Action $sub = new HubSub(); $sub->topic = $feed; $sub->callback = $callback; - $sub->verify_token = $this->arg('hub.verify_token', null); $sub->secret = $this->arg('hub.secret', null); if (strlen($sub->secret) > 200) { throw new ClientException("hub.secret must be no longer than 200 chars", 400); @@ -115,7 +115,7 @@ class PushHubAction extends Action // @fixme check errors ;) - $data = array('sub' => $sub, 'mode' => 'subscribe'); + $data = array('sub' => $sub, 'mode' => 'subscribe', 'token' => $token); $qm = QueueManager::get(); $qm->enqueue($data, 'hubverify'); @@ -130,6 +130,8 @@ class PushHubAction extends Action * 202 Accepted - request saved and awaiting verification * 204 No Content - already subscribed * 400 Bad Request - invalid params or rejected feed + * + * @fixme background this */ function unsubscribe() { @@ -138,7 +140,8 @@ class PushHubAction extends Action $sub = $this->getSub($feed, $callback); if ($sub) { - if ($sub->verify('unsubscribe')) { + $token = $this->arg('hub.verify_token', null); + if ($sub->verify('unsubscribe', $token)) { $sub->delete(); common_log(LOG_INFO, "PuSH unsubscribed $feed for $callback"); } else { diff --git a/plugins/OStatus/actions/salmon.php b/plugins/OStatus/actions/salmon.php index 9ca0198266..224134cd7c 100644 --- a/plugins/OStatus/actions/salmon.php +++ b/plugins/OStatus/actions/salmon.php @@ -34,6 +34,8 @@ class SalmonAction extends Action function prepare($args) { + parent::prepare($args); + if ($_SERVER['REQUEST_METHOD'] != 'POST') { $this->clientError(_('This method requires a POST.')); } diff --git a/plugins/OStatus/classes/HubSub.php b/plugins/OStatus/classes/HubSub.php index 7071ee5b4f..0cd4281f8f 100644 --- a/plugins/OStatus/classes/HubSub.php +++ b/plugins/OStatus/classes/HubSub.php @@ -30,7 +30,6 @@ class HubSub extends Memcached_DataObject public $topic; public $callback; public $secret; - public $verify_token; public $challenge; public $lease; public $sub_start; @@ -62,7 +61,6 @@ class HubSub extends Memcached_DataObject 'topic' => DB_DATAOBJECT_STR + DB_DATAOBJECT_NOTNULL, 'callback' => DB_DATAOBJECT_STR + DB_DATAOBJECT_NOTNULL, 'secret' => DB_DATAOBJECT_STR, - 'verify_token' => DB_DATAOBJECT_STR, 'challenge' => DB_DATAOBJECT_STR, 'lease' => DB_DATAOBJECT_INT, 'sub_start' => DB_DATAOBJECT_STR + DB_DATAOBJECT_DATE + DB_DATAOBJECT_TIME, @@ -84,8 +82,6 @@ class HubSub extends Memcached_DataObject 255, false), new ColumnDef('secret', 'text', null, true), - new ColumnDef('verify_token', 'text', - null, true), new ColumnDef('challenge', 'varchar', 32, true), new ColumnDef('lease', 'int', @@ -154,8 +150,9 @@ class HubSub extends Memcached_DataObject /** * Send a verification ping to subscriber * @param string $mode 'subscribe' or 'unsubscribe' + * @param string $token hub.verify_token value, if provided by client */ - function verify($mode) + function verify($mode, $token=null) { assert($mode == 'subscribe' || $mode == 'unsubscribe'); @@ -172,8 +169,8 @@ class HubSub extends Memcached_DataObject if ($mode == 'subscribe') { $params['hub.lease_seconds'] = $this->lease; } - if ($this->verify_token) { - $params['hub.verify_token'] = $this->verify_token; + if ($token !== null) { + $params['hub.verify_token'] = $token; } $url = $this->callback . '?' . http_build_query($params, '', '&'); // @fixme ugly urls diff --git a/plugins/OStatus/classes/Ostatus_profile.php b/plugins/OStatus/classes/Ostatus_profile.php index be01cdfe19..486417617c 100644 --- a/plugins/OStatus/classes/Ostatus_profile.php +++ b/plugins/OStatus/classes/Ostatus_profile.php @@ -484,7 +484,7 @@ class Ostatus_profile extends Memcached_DataObject } else { $this->sub_end = null; } - $this->lastupdate = common_sql_date(); + $this->lastupdate = common_sql_now(); return $this->update($original); } @@ -497,12 +497,13 @@ class Ostatus_profile extends Memcached_DataObject { $original = clone($this); - $this->verify_token = null; - $this->secret = null; - $this->sub_state = null; - $this->sub_start = null; - $this->sub_end = null; - $this->lastupdate = common_sql_date(); + // @fixme these should all be null, but DB_DataObject doesn't save null values...????? + $this->verify_token = ''; + $this->secret = ''; + $this->sub_state = ''; + $this->sub_start = ''; + $this->sub_end = ''; + $this->lastupdate = common_sql_now(); return $this->update($original); } @@ -527,24 +528,25 @@ class Ostatus_profile extends Memcached_DataObject ':' . $actor->id . ':' . time(); // @fixme - $entry = new Atom10Entry(); + //$entry = new Atom10Entry(); + $entry = new XMLStringer(); $entry->elementStart('entry'); $entry->element('id', null, $id); $entry->element('title', null, $text); $entry->element('summary', null, $text); - $entry->element('published', null, common_date_w3dtf()); + $entry->element('published', null, common_date_w3dtf(time())); $entry->element('activity:verb', null, $verb); - $entry->raw($profile->asAtomAuthor()); - $entry->raw($profile->asActivityActor()); + $entry->raw($actor->asAtomAuthor()); + $entry->raw($actor->asActivityActor()); $entry->raw($object->asActivityNoun('object')); - $entry->elmentEnd('entry'); + $entry->elementEnd('entry'); $feed = $this->atomFeed($actor); - $feed->initFeed(); + #$feed->initFeed(); $feed->addEntry($entry); - $feed->renderEntries(); - $feed->endFeed(); + #$feed->renderEntries(); + #$feed->endFeed(); $xml = $feed->getString(); common_log(LOG_INFO, "Posting to Salmon endpoint $salmon: $xml"); @@ -568,7 +570,7 @@ class Ostatus_profile extends Memcached_DataObject $feed = new Atom10Feed(); // @fixme should these be set up somewhere else? $feed->addNamespace('activity', 'http://activitystrea.ms/spec/1.0/'); - $feed->addNamesapce('thr', 'http://purl.org/syndication/thread/1.0'); + $feed->addNamespace('thr', 'http://purl.org/syndication/thread/1.0'); $feed->addNamespace('georss', 'http://www.georss.org/georss'); $feed->addNamespace('ostatus', 'http://ostatus.org/schema/1.0'); @@ -579,14 +581,14 @@ class Ostatus_profile extends Memcached_DataObject $feed->setUpdated(time()); $feed->setPublished(time()); - $feed->addLink(common_url('ApiTimelineUser', - array('id' => $actor->id, - 'type' => 'atom')), + $feed->addLink(common_local_url('ApiTimelineUser', + array('id' => $actor->id, + 'type' => 'atom')), array('rel' => 'self', 'type' => 'application/atom+xml')); - $feed->addLink(common_url('userbyid', - array('id' => $actor->id)), + $feed->addLink(common_local_url('userbyid', + array('id' => $actor->id)), array('rel' => 'alternate', 'type' => 'text/html')); diff --git a/plugins/OStatus/lib/activity.php b/plugins/OStatus/lib/activity.php index f137946ab4..3ed613dc7f 100644 --- a/plugins/OStatus/lib/activity.php +++ b/plugins/OStatus/lib/activity.php @@ -303,6 +303,12 @@ class ActivityVerb const FRIEND = 'http://activitystrea.ms/schema/1.0/make-friend'; const JOIN = 'http://activitystrea.ms/schema/1.0/join'; const TAG = 'http://activitystrea.ms/schema/1.0/tag'; + + // Custom OStatus verbs for the flipside until they're standardized + const DELETE = 'http://ostatus.org/schema/1.0/unfollow'; + const UNFAVORITE = 'http://ostatus.org/schema/1.0/unfavorite'; + const UNFOLLOW = 'http://ostatus.org/schema/1.0/unfollow'; + const LEAVE = 'http://ostatus.org/schema/1.0/leave'; } /** diff --git a/plugins/OStatus/lib/hubverifyqueuehandler.php b/plugins/OStatus/lib/hubverifyqueuehandler.php index 125d13a777..7ce9e14312 100644 --- a/plugins/OStatus/lib/hubverifyqueuehandler.php +++ b/plugins/OStatus/lib/hubverifyqueuehandler.php @@ -33,13 +33,14 @@ class HubVerifyQueueHandler extends QueueHandler { $sub = $data['sub']; $mode = $data['mode']; + $token = $data['token']; assert($sub instanceof HubSub); assert($mode === 'subscribe' || $mode === 'unsubscribe'); common_log(LOG_INFO, __METHOD__ . ": $mode $sub->callback $sub->topic"); try { - $sub->verify($mode); + $sub->verify($mode, $token); } catch (Exception $e) { common_log(LOG_ERR, "Failed PuSH $mode verify to $sub->callback for $sub->topic: " . $e->getMessage()); From 0dac13d197248bf24ea51cb7911d32286764c0c8 Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Thu, 18 Feb 2010 21:22:21 +0000 Subject: [PATCH 12/62] OStatus refactoring to clean up profile vs feed and fix up subscription issues. PuSH subscription maintenance broken back out to FeedSub, letting Ostatus_profile deal with the profile level (user or group, with unique id URI) --- plugins/OStatus/OStatusPlugin.php | 53 +- plugins/OStatus/actions/feedsubsettings.php | 99 ++-- plugins/OStatus/actions/pushcallback.php | 22 +- plugins/OStatus/actions/salmon.php | 32 +- plugins/OStatus/classes/FeedSub.php | 443 ++++++++++++++++ plugins/OStatus/classes/Ostatus_profile.php | 558 +++++++------------- plugins/OStatus/lib/feeddiscovery.php | 52 +- plugins/OStatus/lib/feedmunger.php | 350 ------------ 8 files changed, 749 insertions(+), 860 deletions(-) create mode 100644 plugins/OStatus/classes/FeedSub.php delete mode 100644 plugins/OStatus/lib/feedmunger.php diff --git a/plugins/OStatus/OStatusPlugin.php b/plugins/OStatus/OStatusPlugin.php index e548a151c7..4ebe4551ec 100644 --- a/plugins/OStatus/OStatusPlugin.php +++ b/plugins/OStatus/OStatusPlugin.php @@ -1,17 +1,7 @@ -Author URI: http://status.net/ -*/ - /* * StatusNet - the distributed open-source microblogging tool - * Copyright (C) 2009, StatusNet, Inc. + * Copyright (C) 2009-2010, StatusNet, Inc. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by @@ -28,17 +18,12 @@ Author URI: http://status.net/ */ /** - * @package FeedSubPlugin + * @package OStatusPlugin * @maintainer Brion Vibber */ if (!defined('STATUSNET') && !defined('LACONICA')) { exit(1); } -define('FEEDSUB_SERVICE', 100); // fixme -- avoid hardcoding these? - -// We bundle the XML_Parse_Feed library... -set_include_path(get_include_path() . PATH_SEPARATOR . dirname(__FILE__) . '/extlib'); - class FeedSubException extends Exception { } @@ -258,24 +243,6 @@ class OStatusPlugin extends Plugin } } - /** - * Notify remote server when one of our users subscribes. - * @fixme Check and restart the PuSH subscription if needed - * - * @param User $user - * @param Profile $other - * @return hook return value - */ - function onEndSubscribe($user, $other) - { - $oprofile = Ostatus_profile::staticGet('profile_id', $other->id); - if ($oprofile) { - // Notify the remote server of the unsub, if supported. - $oprofile->notify($user->getProfile(), ActivityVerb::FOLLOW, $oprofile); - } - return true; - } - /** * Notify remote server and garbage collect unused feeds on unsubscribe. * @fixme send these operations to background queues @@ -309,6 +276,7 @@ class OStatusPlugin extends Plugin function onCheckSchema() { $schema = Schema::get(); $schema->ensureTable('ostatus_profile', Ostatus_profile::schemaDef()); + $schema->ensureTable('feedsub', FeedSub::schemaDef()); $schema->ensureTable('hubsub', HubSub::schemaDef()); return true; } @@ -345,4 +313,19 @@ class OStatusPlugin extends Plugin return false; } } + + /** + * Send incoming PuSH feeds for OStatus endpoints in for processing. + * + * @param FeedSub $feedsub + * @param DOMDocument $feed + * @return mixed hook return code + */ + function onStartFeedSubReceive($feedsub, $feed) + { + $oprofile = Ostatus_profile::staticGet('feeduri', $feedsub->uri); + if ($oprofile) { + $oprofile->processFeed($feed); + } + } } diff --git a/plugins/OStatus/actions/feedsubsettings.php b/plugins/OStatus/actions/feedsubsettings.php index 6933c9bf21..3e1d0aa823 100644 --- a/plugins/OStatus/actions/feedsubsettings.php +++ b/plugins/OStatus/actions/feedsubsettings.php @@ -26,7 +26,7 @@ if (!defined('STATUSNET') && !defined('LACONICA')) { exit(1); } class FeedSubSettingsAction extends ConnectSettingsAction { - protected $feedurl; + protected $profile_uri; protected $preview; protected $munger; @@ -88,7 +88,10 @@ class FeedSubSettingsAction extends ConnectSettingsAction $this->elementStart('ul', 'form_data'); $this->elementStart('li', array('id' => 'settings_twitter_login_button')); - $this->input('feedurl', _('Feed URL'), $this->feedurl, _('Enter the URL of a PubSubHubbub-enabled feed')); + $this->input('profile_uri', + _m('Feed URL'), + $this->profile_uri, + _m('Enter the profile URL of a PubSubHubbub-enabled feed')); $this->elementEnd('li'); $this->elementEnd('ul'); @@ -145,79 +148,55 @@ class FeedSubSettingsAction extends ConnectSettingsAction */ function validateFeed() { - $feedurl = trim($this->arg('feedurl')); + $profile_uri = trim($this->arg('profile_uri')); - if ($feedurl == '') { - $this->showForm(_m('Empty feed URL!')); + if ($profile_uri == '') { + $this->showForm(_m('Empty remote profile URL!')); return; } - $this->feedurl = $feedurl; + $this->profile_uri = $profile_uri; - // Get the canonical feed URI and check it + // @fixme validate, normalize bla bla try { - $discover = new FeedDiscovery(); - $uri = $discover->discoverFromURL($feedurl); + $oprofile = Ostatus_profile::ensureProfile($this->profile_uri); + $this->oprofile = $oprofile; + return true; } catch (FeedSubBadURLException $e) { - $this->showForm(_m('Invalid URL or could not reach server.')); - return false; + $err = _m('Invalid URL or could not reach server.'); } catch (FeedSubBadResponseException $e) { - $this->showForm(_m('Cannot read feed; server returned error.')); - return false; + $err = _m('Cannot read feed; server returned error.'); } catch (FeedSubEmptyException $e) { - $this->showForm(_m('Cannot read feed; server returned an empty page.')); - return false; + $err = _m('Cannot read feed; server returned an empty page.'); } catch (FeedSubBadHTMLException $e) { - $this->showForm(_m('Bad HTML, could not find feed link.')); - return false; + $err = _m('Bad HTML, could not find feed link.'); } catch (FeedSubNoFeedException $e) { - $this->showForm(_m('Could not find a feed linked from this URL.')); - return false; + $err = _m('Could not find a feed linked from this URL.'); } catch (FeedSubUnrecognizedTypeException $e) { - $this->showForm(_m('Not a recognized feed type.')); - return false; + $err = _m('Not a recognized feed type.'); } catch (FeedSubException $e) { // Any new ones we forgot about - $this->showForm(_m('Bad feed URL.')); - return false; + $err = sprintf(_m('Bad feed URL: %s %s'), get_class($e), $e->getMessage()); } - - $this->munger = $discover->feedMunger(); - $this->profile = $this->munger->ostatusProfile(); - if ($this->profile->huburi == '' && !common_config('feedsub', 'nohub')) { - $this->showForm(_m('Feed is not PuSH-enabled; cannot subscribe.')); - return false; - } - - return true; + $this->showForm($err); + return false; } function saveFeed() { if ($this->validateFeed()) { $this->preview = true; - $this->profile = Ostatus_profile::ensureProfile($this->munger); - if (!$this->profile) { - throw new ServerException("Feed profile was not saved properly."); - } - - // If not already in use, subscribe to updates via the hub - if ($this->profile->sub_start) { - common_log(LOG_INFO, __METHOD__ . ": double the fun! new sub for {$this->profile->feeduri} last subbed {$this->profile->sub_start}"); - } else { - $ok = $this->profile->subscribe(); - common_log(LOG_INFO, __METHOD__ . ": sub was $ok"); - if (!$ok) { - $this->showForm(_m('Feed subscription failed! Bad response from hub.')); - return; - } - } // And subscribe the current user to the local profile $user = common_current_user(); - if ($this->profile->isGroup()) { - $group = $this->profile->localGroup(); + if (!$this->oprofile->subscribe()) { + $this->showForm(_m("Failed to set up server-to-server subscription.")); + return; + } + + if ($this->oprofile->isGroup()) { + $group = $this->oprofile->localGroup(); if ($user->isMember($group)) { $this->showForm(_m('Already a member!')); } elseif (Group_member::join($this->profile->group_id, $user->id)) { @@ -226,13 +205,13 @@ class FeedSubSettingsAction extends ConnectSettingsAction $this->showForm(_m('Remote group join failed!')); } } else { - $local = $this->profile->localProfile(); + $local = $this->oprofile->localProfile(); if ($user->isSubscribed($local)) { $this->showForm(_m('Already subscribed!')); - } elseif ($user->subscribeTo($local)) { - $this->showForm(_m('Feed subscribed!')); + } elseif ($this->oprofile->subscribeLocalToRemote($user)) { + $this->showForm(_m('Remote user subscribed!')); } else { - $this->showForm(_m('Feed subscription failed!')); + $this->showForm(_m('Remote subscription failed!')); } } } @@ -248,17 +227,7 @@ class FeedSubSettingsAction extends ConnectSettingsAction function previewFeed() { - $profile = $this->munger->ostatusProfile(); - $notice = $this->munger->notice(0, true); // preview - - if ($notice) { - $this->element('b', null, 'Preview of latest post from this feed:'); - - $item = new NoticeList($notice, $this); - $item->show(); - } else { - $this->element('b', null, 'No posts in this feed yet.'); - } + $this->text('Profile preview should go here'); } function showScripts() diff --git a/plugins/OStatus/actions/pushcallback.php b/plugins/OStatus/actions/pushcallback.php index ed859a32f8..7e1227a66a 100644 --- a/plugins/OStatus/actions/pushcallback.php +++ b/plugins/OStatus/actions/pushcallback.php @@ -48,9 +48,9 @@ class PushCallbackAction extends Action throw new ServerException('Empty or invalid feed id', 400); } - $profile = Ostatus_profile::staticGet('id', $feedid); - if (!$profile) { - throw new ServerException('Unknown OStatus/PuSH feed id ' . $feedid, 400); + $feedsub = FeedSub::staticGet('id', $feedid); + if (!$feedsub) { + throw new ServerException('Unknown PuSH feed id ' . $feedid, 400); } $hmac = ''; @@ -62,7 +62,7 @@ class PushCallbackAction extends Action // @fixme Queue this to a background process; we should return // as quickly as possible from a distribution POST. - $profile->postUpdates($post, $hmac); + $feedsub->receive($post, $hmac); } /** @@ -81,29 +81,29 @@ class PushCallbackAction extends Action throw new ServerException("Bogus hub callback: bad mode", 404); } - $profile = Ostatus_profile::staticGet('feeduri', $topic); - if (!$profile) { + $feedsub = FeedSub::staticGet('uri', $topic); + if (!$feedsub) { common_log(LOG_WARNING, __METHOD__ . ": bogus hub callback for unknown feed $topic"); throw new ServerException("Bogus hub callback: unknown feed", 404); } - if ($profile->verify_token !== $verify_token) { + if ($feedsub->verify_token !== $verify_token) { common_log(LOG_WARNING, __METHOD__ . ": bogus hub callback with bad token \"$verify_token\" for feed $topic"); throw new ServerException("Bogus hub callback: bad token", 404); } - if ($mode != $profile->sub_state) { - common_log(LOG_WARNING, __METHOD__ . ": bogus hub callback with bad mode \"$mode\" for feed $topic in state \"{$profile->sub_state}\""); + if ($mode != $feedsub->sub_state) { + common_log(LOG_WARNING, __METHOD__ . ": bogus hub callback with bad mode \"$mode\" for feed $topic in state \"{$feedsub->sub_state}\""); throw new ServerException("Bogus hub callback: mode doesn't match subscription state.", 404); } // OK! if ($mode == 'subscribe') { common_log(LOG_INFO, __METHOD__ . ': sub confirmed'); - $profile->confirmSubscribe($lease_seconds); + $feedsub->confirmSubscribe($lease_seconds); } else { common_log(LOG_INFO, __METHOD__ . ": unsub confirmed; deleting sub record for $topic"); - $profile->confirmUnsubscribe(); + $feedsub->confirmUnsubscribe(); } print $challenge; } diff --git a/plugins/OStatus/actions/salmon.php b/plugins/OStatus/actions/salmon.php index 224134cd7c..ea5b8e4ea8 100644 --- a/plugins/OStatus/actions/salmon.php +++ b/plugins/OStatus/actions/salmon.php @@ -68,6 +68,9 @@ class SalmonAction extends Action return true; } + /** + * @fixme probably call Ostatus_profile::processFeed + */ function handle($args) { common_log(LOG_INFO, 'Salmon: incoming post for user '. $this->user->id); @@ -95,6 +98,9 @@ class SalmonAction extends Action } } + /** + * @fixme probably call Ostatus_profile::processFeed + */ function handlePost() { switch ($this->act->object->type) { @@ -111,14 +117,23 @@ class SalmonAction extends Action $profile = $this->ensureProfile(); } + /** + * @fixme probably call Ostatus_profile::processFeed + */ function handleFollow() { } + /** + * @fixme probably call Ostatus_profile::processFeed + */ function handleFavorite() { } + /** + * @fixme probably call Ostatus_profile::processFeed + */ function handleShare() { } @@ -131,17 +146,13 @@ class SalmonAction extends Action throw new Exception("Received a salmon slap from unidentified actor."); } - $ostatusProfile = Ostatus_profile::staticGet('homeuri', $actor->id); - - if (empty($ostatusProfile)) { - return $this->createProfile(); - } else { - // XXX: can we receive a salmon slap from a group...? - assert(!empty($ostatusProfile->profile_id)); - return Profile::staticGet($ostatusProfile->profile_id); - } + $ostatusProfile = Ostatus_profile::ensureActorProfile($this->act); + return $oprofile->localProfile(); } + /** + * @fixme anything new in here probably should be merged into Ostatus_profile::ensureActorProfile and friends + */ function createProfile() { $actor = $this->act->actor; @@ -186,6 +197,9 @@ class SalmonAction extends Action return $profile; } + /** + * @fixme should be merged into Ostatus_profile + */ function nicknameFromURI($uri) { preg_match('/(\w+):/', $uri, $matches); diff --git a/plugins/OStatus/classes/FeedSub.php b/plugins/OStatus/classes/FeedSub.php new file mode 100644 index 0000000000..dc2c0b710b --- /dev/null +++ b/plugins/OStatus/classes/FeedSub.php @@ -0,0 +1,443 @@ +. + */ + +/** + * @package OStatusPlugin + * @maintainer Brion Vibber + */ + +/* +PuSH subscription flow: + + $profile->subscribe() + generate random verification token + save to verify_token + sends a sub request to the hub... + + main/push/callback + hub sends confirmation back to us via GET + We verify the request, then echo back the challenge. + On our end, we save the time we subscribed and the lease expiration + + main/push/callback + hub sends us updates via POST + +*/ + +class FeedDBException extends FeedSubException +{ + public $obj; + + function __construct($obj) + { + parent::__construct('Database insert failure'); + $this->obj = $obj; + } +} + +/** + * FeedSub handles low-level PubHubSubbub (PuSH) subscriptions. + * Higher-level behavior building OStatus stuff on top is handled + * under Ostatus_profile. + */ +class FeedSub extends Memcached_DataObject +{ + public $__table = 'feedsub'; + + public $id; + public $feeduri; + + // PuSH subscription data + public $huburi; + public $secret; + public $verify_token; + public $sub_state; // subscribe, active, unsubscribe, inactive + public $sub_start; + public $sub_end; + public $last_update; + + public $created; + public $modified; + + public /*static*/ function staticGet($k, $v=null) + { + return parent::staticGet(__CLASS__, $k, $v); + } + + /** + * return table definition for DB_DataObject + * + * DB_DataObject needs to know something about the table to manipulate + * instances. This method provides all the DB_DataObject needs to know. + * + * @return array array of column definitions + */ + + function table() + { + return array('id' => DB_DATAOBJECT_INT + DB_DATAOBJECT_NOTNULL, + 'uri' => DB_DATAOBJECT_STR + DB_DATAOBJECT_NOTNULL, + 'huburi' => DB_DATAOBJECT_STR, + 'secret' => DB_DATAOBJECT_STR, + 'verify_token' => DB_DATAOBJECT_STR, + 'sub_state' => DB_DATAOBJECT_STR + DB_DATAOBJECT_NOTNULL, + 'sub_start' => DB_DATAOBJECT_STR + DB_DATAOBJECT_DATE + DB_DATAOBJECT_TIME, + 'sub_end' => DB_DATAOBJECT_STR + DB_DATAOBJECT_DATE + DB_DATAOBJECT_TIME, + 'last_update' => DB_DATAOBJECT_STR + DB_DATAOBJECT_DATE + DB_DATAOBJECT_TIME + DB_DATAOBJECT_NOTNULL, + 'created' => DB_DATAOBJECT_STR + DB_DATAOBJECT_DATE + DB_DATAOBJECT_TIME + DB_DATAOBJECT_NOTNULL, + 'modified' => DB_DATAOBJECT_STR + DB_DATAOBJECT_DATE + DB_DATAOBJECT_TIME + DB_DATAOBJECT_NOTNULL); + } + + static function schemaDef() + { + return array(new ColumnDef('id', 'integer', + /*size*/ null, + /*nullable*/ false, + /*key*/ 'PRI', + /*default*/ '0', + /*extra*/ null, + /*auto_increment*/ true), + new ColumnDef('uri', 'varchar', + 255, false, 'UNI'), + new ColumnDef('huburi', 'text', + null, true), + new ColumnDef('verify_token', 'text', + null, true), + new ColumnDef('secret', 'text', + null, true), + new ColumnDef('sub_state', "enum('subscribe','active','unsubscribe','inactive')", + null, false), + new ColumnDef('sub_start', 'datetime', + null, true), + new ColumnDef('sub_end', 'datetime', + null, true), + new ColumnDef('last_update', 'datetime', + null, false), + new ColumnDef('created', 'datetime', + null, false), + new ColumnDef('modified', 'datetime', + null, false)); + } + + /** + * return key definitions for DB_DataObject + * + * DB_DataObject needs to know about keys that the table has; this function + * defines them. + * + * @return array key definitions + */ + + function keys() + { + return array_keys($this->keyTypes()); + } + + /** + * return key definitions for Memcached_DataObject + * + * Our caching system uses the same key definitions, but uses a different + * method to get them. + * + * @return array key definitions + */ + + function keyTypes() + { + return array('id' => 'K', 'uri' => 'U'); + } + + function sequenceKey() + { + return array('id', true, false); + } + + /** + * Fetch the StatusNet-side profile for this feed + * @return Profile + */ + public function localProfile() + { + if ($this->profile_id) { + return Profile::staticGet('id', $this->profile_id); + } + return null; + } + + /** + * Fetch the StatusNet-side profile for this feed + * @return Profile + */ + public function localGroup() + { + if ($this->group_id) { + return User_group::staticGet('id', $this->group_id); + } + return null; + } + + /** + * @param string $feeduri + * @return FeedSub + * @throws FeedSubException if feed is invalid or lacks PuSH setup + */ + public static function ensureFeed($feeduri) + { + $current = self::staticGet('uri', $feeduri); + if ($current) { + return $current; + } + + $discover = new FeedDiscovery(); + $discover->discoverFromFeedURL($feeduri); + + $huburi = $discover->getAtomLink('hub'); + if (!$huburi) { + throw new FeedSubNoHubException(); + } + + $feedsub = new FeedSub(); + $feedsub->uri = $feeduri; + $feedsub->huburi = $huburi; + $feedsub->sub_state = 'inactive'; + + $feedsub->created = common_sql_now(); + $feedsub->modified = common_sql_now(); + + $result = $feedsub->insert(); + if (empty($result)) { + throw new FeedDBException($feedsub); + } + + return $feedsub; + } + + /** + * Send a subscription request to the hub for this feed. + * The hub will later send us a confirmation POST to /main/push/callback. + * + * @return bool true on success, false on failure + * @throws ServerException if feed state is not valid + */ + public function subscribe($mode='subscribe') + { + if ($this->sub_state && $this->sub_state != 'inactive') { + throw new ServerException("Attempting to start PuSH subscription to feed in state $this->sub_state"); + } + if (empty($this->huburi)) { + if (common_config('feedsub', 'nohub')) { + // Fake it! We're just testing remote feeds w/o hubs. + return true; + } else { + throw new ServerException("Attempting to start PuSH subscription for feed with no hub"); + } + } + + return $this->doSubscribe('subscribe'); + } + + /** + * Send a PuSH unsubscription request to the hub for this feed. + * The hub will later send us a confirmation POST to /main/push/callback. + * + * @return bool true on success, false on failure + * @throws ServerException if feed state is not valid + */ + public function unsubscribe() { + if ($this->sub_state != 'active') { + throw new ServerException("Attempting to end PuSH subscription to feed in state $this->sub_state"); + } + if (empty($this->huburi)) { + if (common_config('feedsub', 'nohub')) { + // Fake it! We're just testing remote feeds w/o hubs. + return true; + } else { + throw new ServerException("Attempting to end PuSH subscription for feed with no hub"); + } + } + + return $this->doSubscribe('unsubscribe'); + } + + protected function doSubscribe($mode) + { + $orig = clone($this); + $this->verify_token = common_good_rand(16); + if ($mode == 'subscribe') { + $this->secret = common_good_rand(32); + } + $this->sub_state = $mode; + $this->update($orig); + unset($orig); + + try { + $callback = common_local_url('pushcallback', array('feed' => $this->id)); + $headers = array('Content-Type: application/x-www-form-urlencoded'); + $post = array('hub.mode' => $mode, + 'hub.callback' => $callback, + 'hub.verify' => 'async', + 'hub.verify_token' => $this->verify_token, + 'hub.secret' => $this->secret, + //'hub.lease_seconds' => 0, + 'hub.topic' => $this->uri); + $client = new HTTPClient(); + $response = $client->post($this->huburi, $headers, $post); + $status = $response->getStatus(); + if ($status == 202) { + common_log(LOG_INFO, __METHOD__ . ': sub req ok, awaiting verification callback'); + return true; + } else if ($status == 204) { + common_log(LOG_INFO, __METHOD__ . ': sub req ok and verified'); + return true; + } else if ($status >= 200 && $status < 300) { + common_log(LOG_ERR, __METHOD__ . ": sub req returned unexpected HTTP $status: " . $response->getBody()); + return false; + } else { + common_log(LOG_ERR, __METHOD__ . ": sub req failed with HTTP $status: " . $response->getBody()); + return false; + } + } catch (Exception $e) { + // wtf! + common_log(LOG_ERR, __METHOD__ . ": error \"{$e->getMessage()}\" hitting hub $this->huburi subscribing to $this->uri"); + + $orig = clone($this); + $this->verify_token = null; + $this->sub_state = null; + $this->update($orig); + unset($orig); + + return false; + } + } + + /** + * Save PuSH subscription confirmation. + * Sets approximate lease start and end times and finalizes state. + * + * @param int $lease_seconds provided hub.lease_seconds parameter, if given + */ + public function confirmSubscribe($lease_seconds=0) + { + $original = clone($this); + + $this->sub_state = 'active'; + $this->sub_start = common_sql_date(time()); + if ($lease_seconds > 0) { + $this->sub_end = common_sql_date(time() + $lease_seconds); + } else { + $this->sub_end = null; + } + $this->lastupdate = common_sql_now(); + + return $this->update($original); + } + + /** + * Save PuSH unsubscription confirmation. + * Wipes active PuSH sub info and resets state. + */ + public function confirmUnsubscribe() + { + $original = clone($this); + + // @fixme these should all be null, but DB_DataObject doesn't save null values...????? + $this->verify_token = ''; + $this->secret = ''; + $this->sub_state = ''; + $this->sub_start = ''; + $this->sub_end = ''; + $this->lastupdate = common_sql_now(); + + return $this->update($original); + } + + /** + * Accept updates from a PuSH feed. If validated, this object and the + * feed (as a DOMDocument) will be passed to the StartFeedSubHandleFeed + * and EndFeedSubHandleFeed events for processing. + * + * @param string $post source of Atom or RSS feed + * @param string $hmac X-Hub-Signature header, if present + */ + public function receive($post, $hmac) + { + common_log(LOG_INFO, __METHOD__ . ": packet for \"$this->uri\"! $hmac $post"); + + if ($this->sub_state != 'active') { + common_log(LOG_ERR, __METHOD__ . ": ignoring PuSH for inactive feed $this->uri (in state '$this->sub_state')"); + return; + } + + if ($post === '') { + common_log(LOG_ERR, __METHOD__ . ": ignoring empty post"); + return; + } + + if (!$this->validatePushSig($post, $hmac)) { + // Per spec we silently drop input with a bad sig, + // while reporting receipt to the server. + return; + } + + $feed = new DOMDocument(); + if (!$feed->loadXML($post)) { + // @fixme might help to include the err message + common_log(LOG_ERR, __METHOD__ . ": ignoring invalid XML"); + return; + } + + Event::handle('StartFeedSubReceive', array($this, $feed)); + Event::handle('EndFeedSubReceive', array($this, $feed)); + } + + /** + * Validate the given Atom chunk and HMAC signature against our + * shared secret that was set up at subscription time. + * + * If we don't have a shared secret, there should be no signature. + * If we we do, our the calculated HMAC should match theirs. + * + * @param string $post raw XML source as POSTed to us + * @param string $hmac X-Hub-Signature HTTP header value, or empty + * @return boolean true for a match + */ + protected function validatePushSig($post, $hmac) + { + if ($this->secret) { + if (preg_match('/^sha1=([0-9a-fA-F]{40})$/', $hmac, $matches)) { + $their_hmac = strtolower($matches[1]); + $our_hmac = hash_hmac('sha1', $post, $this->secret); + if ($their_hmac === $our_hmac) { + return true; + } + common_log(LOG_ERR, __METHOD__ . ": ignoring PuSH with bad SHA-1 HMAC: got $their_hmac, expected $our_hmac"); + } else { + common_log(LOG_ERR, __METHOD__ . ": ignoring PuSH with bogus HMAC '$hmac'"); + } + } else { + if (empty($hmac)) { + return true; + } else { + common_log(LOG_ERR, __METHOD__ . ": ignoring PuSH with unexpected HMAC '$hmac'"); + } + } + return false; + } + +} diff --git a/plugins/OStatus/classes/Ostatus_profile.php b/plugins/OStatus/classes/Ostatus_profile.php index 486417617c..1ce8ac4917 100644 --- a/plugins/OStatus/classes/Ostatus_profile.php +++ b/plugins/OStatus/classes/Ostatus_profile.php @@ -18,62 +18,24 @@ */ /** - * @package FeedSubPlugin + * @package OStatusPlugin * @maintainer Brion Vibber */ -/* -PuSH subscription flow: - - $profile->subscribe() - generate random verification token - save to verify_token - sends a sub request to the hub... - - main/push/callback - hub sends confirmation back to us via GET - We verify the request, then echo back the challenge. - On our end, we save the time we subscribed and the lease expiration - - main/push/callback - hub sends us updates via POST - -*/ - -class FeedDBException extends FeedSubException -{ - public $obj; - - function __construct($obj) - { - parent::__construct('Database insert failure'); - $this->obj = $obj; - } -} - class Ostatus_profile extends Memcached_DataObject { public $__table = 'ostatus_profile'; - public $id; + public $uri; + public $profile_id; public $group_id; public $feeduri; - public $homeuri; - - // PuSH subscription data - public $huburi; - public $secret; - public $verify_token; - public $sub_state; // subscribe, active, unsubscribe - public $sub_start; - public $sub_end; - public $salmonuri; public $created; - public $lastupdate; + public $modified; public /*static*/ function staticGet($k, $v=null) { @@ -91,56 +53,30 @@ class Ostatus_profile extends Memcached_DataObject function table() { - return array('id' => DB_DATAOBJECT_INT + DB_DATAOBJECT_NOTNULL, + return array('uri' => DB_DATAOBJECT_STR + DB_DATAOBJECT_NOTNULL, 'profile_id' => DB_DATAOBJECT_INT, 'group_id' => DB_DATAOBJECT_INT, 'feeduri' => DB_DATAOBJECT_STR + DB_DATAOBJECT_NOTNULL, - 'homeuri' => DB_DATAOBJECT_STR + DB_DATAOBJECT_NOTNULL, - 'huburi' => DB_DATAOBJECT_STR, - 'secret' => DB_DATAOBJECT_STR, - 'verify_token' => DB_DATAOBJECT_STR, - 'sub_state' => DB_DATAOBJECT_STR, - 'sub_start' => DB_DATAOBJECT_STR + DB_DATAOBJECT_DATE + DB_DATAOBJECT_TIME, - 'sub_end' => DB_DATAOBJECT_STR + DB_DATAOBJECT_DATE + DB_DATAOBJECT_TIME, 'salmonuri' => DB_DATAOBJECT_STR, 'created' => DB_DATAOBJECT_STR + DB_DATAOBJECT_DATE + DB_DATAOBJECT_TIME + DB_DATAOBJECT_NOTNULL, - 'lastupdate' => DB_DATAOBJECT_STR + DB_DATAOBJECT_DATE + DB_DATAOBJECT_TIME + DB_DATAOBJECT_NOTNULL); + 'modified' => DB_DATAOBJECT_STR + DB_DATAOBJECT_DATE + DB_DATAOBJECT_TIME + DB_DATAOBJECT_NOTNULL); } static function schemaDef() { - return array(new ColumnDef('id', 'integer', - /*size*/ null, - /*nullable*/ false, - /*key*/ 'PRI', - /*default*/ '0', - /*extra*/ null, - /*auto_increment*/ true), + return array(new ColumnDef('uri', 'varchar', + 255, false, 'PRI'), new ColumnDef('profile_id', 'integer', null, true, 'UNI'), new ColumnDef('group_id', 'integer', null, true, 'UNI'), new ColumnDef('feeduri', 'varchar', 255, false, 'UNI'), - new ColumnDef('homeuri', 'varchar', - 255, false), - new ColumnDef('huburi', 'text', - null, true), - new ColumnDef('verify_token', 'varchar', - 32, true), - new ColumnDef('secret', 'varchar', - 64, true), - new ColumnDef('sub_state', "enum('subscribe','active','unsubscribe')", - null, true), - new ColumnDef('sub_start', 'datetime', - null, true), - new ColumnDef('sub_end', 'datetime', - null, true), new ColumnDef('salmonuri', 'text', null, true), new ColumnDef('created', 'datetime', null, false), - new ColumnDef('lastupdate', 'datetime', + new ColumnDef('modified', 'datetime', null, false)); } @@ -169,12 +105,12 @@ class Ostatus_profile extends Memcached_DataObject function keyTypes() { - return array('id' => 'K', 'profile_id' => 'U', 'group_id' => 'U', 'feeduri' => 'U'); + return array('uri' => 'K', 'profile_id' => 'U', 'group_id' => 'U', 'feeduri' => 'U'); } function sequenceKey() { - return array('id', true, false); + return array(false, false, false); } /** @@ -201,101 +137,6 @@ class Ostatus_profile extends Memcached_DataObject return null; } - /** - * @param FeedMunger $munger - * @param boolean $isGroup is this a group record? - * @return Ostatus_profile - */ - public static function ensureProfile($munger) - { - $profile = $munger->ostatusProfile(); - - $current = self::staticGet('feeduri', $profile->feeduri); - if ($current) { - // @fixme we should probably update info as necessary - return $current; - } - - $profile->query('BEGIN'); - - try { - $local = $munger->profile(); - - if ($profile->isGroup()) { - $group = new User_group(); - $group->nickname = $local->nickname . '@remote'; // @fixme - $group->fullname = $local->fullname; - $group->homepage = $local->homepage; - $group->location = $local->location; - $group->created = $local->created; - $group->insert(); - if (empty($result)) { - throw new FeedDBException($group); - } - $profile->group_id = $group->id; - } else { - $result = $local->insert(); - if (empty($result)) { - throw new FeedDBException($local); - } - $profile->profile_id = $local->id; - } - - $profile->created = common_sql_now(); - $profile->lastupdate = common_sql_now(); - $result = $profile->insert(); - if (empty($result)) { - throw new FeedDBException($profile); - } - - $profile->query('COMMIT'); - } catch (FeedDBException $e) { - common_log_db_error($e->obj, 'INSERT', __FILE__); - $profile->query('ROLLBACK'); - return false; - } - - $avatar = $munger->getAvatar(); - if ($avatar) { - try { - $profile->updateAvatar($avatar); - } catch (Exception $e) { - common_log(LOG_ERR, "Exception setting OStatus avatar: " . - $e->getMessage()); - } - } - - return $profile; - } - - /** - * Download and update given avatar image - * @param string $url - * @throws Exception in various failure cases - */ - public function updateAvatar($url) - { - // @fixme this should be better encapsulated - // ripped from oauthstore.php (for old OMB client) - $temp_filename = tempnam(sys_get_temp_dir(), 'listener_avatar'); - copy($url, $temp_filename); - - // @fixme should we be using different ids? - $imagefile = new ImageFile($this->id, $temp_filename); - $filename = Avatar::filename($this->id, - image_type_to_extension($imagefile->type), - null, - common_timestamp()); - rename($temp_filename, Avatar::path($filename)); - if ($this->isGroup()) { - $group = $this->localGroup(); - $group->setOriginal($filename); - } else { - $profile = $this->localProfile(); - $profile->setOriginal($filename); - } - } - /** * Returns an XML string fragment with profile information as an * Activity Streams noun object with the given element type. @@ -345,7 +186,7 @@ class Ostatus_profile extends Memcached_DataObject $xs->element( 'id', null, - $this->homeuri); // ? + $this->uri); // ? $xs->element('title', null, $self->getBestName()); $xs->element( @@ -369,6 +210,61 @@ class Ostatus_profile extends Memcached_DataObject return (strpos($this->feeduri, '/groups/') !== false); } + /** + * Subscribe a local user to this remote user. + * PuSH subscription will be started if necessary, and we'll + * send a Salmon notification to the remote server if available + * notifying them of the sub. + * + * @param User $user + * @return boolean success + * @throws FeedException + */ + public function subscribeLocalToRemote(User $user) + { + if ($this->isGroup()) { + throw new ServerException("Can't subscribe to a remote group"); + } + + if ($this->subscribe()) { + if ($user->subscribeTo($this->localProfile())) { + $this->notify($user->getProfile(), ActivityVerb::FOLLOW, $this); + return true; + } + } + return false; + } + + /** + * Mark this remote profile as subscribing to the given local user, + * and send appropriate notifications to the user. + * + * This will generally be in response to a subscription notification + * from a foreign site to our local Salmon response channel. + * + * @param User $user + * @return boolean success + */ + public function subscribeRemoteToLocal(User $user) + { + if ($this->isGroup()) { + throw new ServerException("Remote groups can't subscribe to local users"); + } + + // @fixme use regular channels for subbing, once they accept remote profiles + $sub = new Subscription(); + $sub->subscriber = $this->profile_id; + $sub->subscribed = $user->id; + $sub->created = common_sql_now(); // current time + + if ($sub->insert()) { + // @fixme use subs_notify() if refactored to take profiles? + mail_subscribe_notify_profile($user, $this->localProfile()); + return true; + } + return false; + } + /** * Send a subscription request to the hub for this feed. * The hub will later send us a confirmation POST to /main/push/callback. @@ -378,19 +274,14 @@ class Ostatus_profile extends Memcached_DataObject */ public function subscribe($mode='subscribe') { - if ($this->sub_state != '') { - throw new ServerException("Attempting to start PuSH subscription to feed in state $this->sub_state"); + $feedsub = FeedSub::ensureFeed($this->feeduri); + if ($feedsub->sub_state == 'active' || $feedsub->sub_state == 'subscribe') { + return true; + } else if ($feedsub->sub_state == '' || $feedsub->sub_state == 'inactive') { + return $feedsub->subscribe(); + } else if ('unsubscribe') { + throw new FeedSubException("Unsub is pending, can't subscribe..."); } - if (empty($this->huburi)) { - if (common_config('feedsub', 'nohub')) { - // Fake it! We're just testing remote feeds w/o hubs. - return true; - } else { - throw new ServerException("Attempting to start PuSH subscription for feed with no hub"); - } - } - - return $this->doSubscribe('subscribe'); } /** @@ -401,111 +292,14 @@ class Ostatus_profile extends Memcached_DataObject * @throws ServerException if feed state is not valid */ public function unsubscribe() { - if ($this->sub_state != 'active') { - throw new ServerException("Attempting to end PuSH subscription to feed in state $this->sub_state"); + $feedsub = FeedSub::staticGet('uri', $this->feeduri); + if ($feedsub->sub_state == 'active') { + return $feedsub->unsubscribe(); + } else if ($feedsub->sub_state == '' || $feedsub->sub_state == 'inactive' || $feedsub->sub_state == 'unsubscribe') { + return true; + } else if ($feedsub->sub_state == 'subscribe') { + throw new FeedSubException("Feed is awaiting subscription, can't unsub..."); } - if (empty($this->huburi)) { - if (common_config('feedsub', 'nohub')) { - // Fake it! We're just testing remote feeds w/o hubs. - return true; - } else { - throw new ServerException("Attempting to end PuSH subscription for feed with no hub"); - } - } - - return $this->doSubscribe('unsubscribe'); - } - - protected function doSubscribe($mode) - { - $orig = clone($this); - $this->verify_token = common_good_rand(16); - if ($mode == 'subscribe') { - $this->secret = common_good_rand(32); - } - $this->sub_state = $mode; - $this->update($orig); - unset($orig); - - try { - $callback = common_local_url('pushcallback', array('feed' => $this->id)); - $headers = array('Content-Type: application/x-www-form-urlencoded'); - $post = array('hub.mode' => $mode, - 'hub.callback' => $callback, - 'hub.verify' => 'async', - 'hub.verify_token' => $this->verify_token, - 'hub.secret' => $this->secret, - //'hub.lease_seconds' => 0, - 'hub.topic' => $this->feeduri); - $client = new HTTPClient(); - $response = $client->post($this->huburi, $headers, $post); - $status = $response->getStatus(); - if ($status == 202) { - common_log(LOG_INFO, __METHOD__ . ': sub req ok, awaiting verification callback'); - return true; - } else if ($status == 204) { - common_log(LOG_INFO, __METHOD__ . ': sub req ok and verified'); - return true; - } else if ($status >= 200 && $status < 300) { - common_log(LOG_ERR, __METHOD__ . ": sub req returned unexpected HTTP $status: " . $response->getBody()); - return false; - } else { - common_log(LOG_ERR, __METHOD__ . ": sub req failed with HTTP $status: " . $response->getBody()); - return false; - } - } catch (Exception $e) { - // wtf! - common_log(LOG_ERR, __METHOD__ . ": error \"{$e->getMessage()}\" hitting hub $this->huburi subscribing to $this->feeduri"); - - $orig = clone($this); - $this->verify_token = null; - $this->sub_state = null; - $this->update($orig); - unset($orig); - - return false; - } - } - - /** - * Save PuSH subscription confirmation. - * Sets approximate lease start and end times and finalizes state. - * - * @param int $lease_seconds provided hub.lease_seconds parameter, if given - */ - public function confirmSubscribe($lease_seconds=0) - { - $original = clone($this); - - $this->sub_state = 'active'; - $this->sub_start = common_sql_date(time()); - if ($lease_seconds > 0) { - $this->sub_end = common_sql_date(time() + $lease_seconds); - } else { - $this->sub_end = null; - } - $this->lastupdate = common_sql_now(); - - return $this->update($original); - } - - /** - * Save PuSH unsubscription confirmation. - * Wipes active PuSH sub info and resets state. - */ - public function confirmUnsubscribe() - { - $original = clone($this); - - // @fixme these should all be null, but DB_DataObject doesn't save null values...????? - $this->verify_token = ''; - $this->secret = ''; - $this->sub_state = ''; - $this->sub_start = ''; - $this->sub_end = ''; - $this->lastupdate = common_sql_now(); - - return $this->update($original); } /** @@ -543,10 +337,7 @@ class Ostatus_profile extends Memcached_DataObject $entry->elementEnd('entry'); $feed = $this->atomFeed($actor); - #$feed->initFeed(); $feed->addEntry($entry); - #$feed->renderEntries(); - #$feed->endFeed(); $xml = $feed->getString(); common_log(LOG_INFO, "Posting to Salmon endpoint $salmon: $xml"); @@ -600,36 +391,10 @@ class Ostatus_profile extends Memcached_DataObject * Currently assumes that all items in the feed are new, * coming from a PuSH hub. * - * @param string $post source of Atom or RSS feed - * @param string $hmac X-Hub-Signature header, if present + * @param DOMDocument $feed */ - public function postUpdates($post, $hmac) + public function processFeed($feed) { - common_log(LOG_INFO, __METHOD__ . ": packet for \"$this->feeduri\"! $hmac $post"); - - if ($this->sub_state != 'active') { - common_log(LOG_ERR, __METHOD__ . ": ignoring PuSH for inactive feed $this->feeduri (in state '$this->sub_state')"); - return; - } - - if ($post === '') { - common_log(LOG_ERR, __METHOD__ . ": ignoring empty post"); - return; - } - - if (!$this->validatePushSig($post, $hmac)) { - // Per spec we silently drop input with a bad sig, - // while reporting receipt to the server. - return; - } - - $feed = new DOMDocument(); - if (!$feed->loadXML($post)) { - // @fixme might help to include the err message - common_log(LOG_ERR, __METHOD__ . ": ignoring invalid XML"); - return; - } - $entries = $feed->getElementsByTagNameNS(Activity::ATOM, 'entry'); if ($entries->length == 0) { common_log(LOG_ERR, __METHOD__ . ": no entries in feed update, ignoring"); @@ -642,40 +407,6 @@ class Ostatus_profile extends Memcached_DataObject } } - /** - * Validate the given Atom chunk and HMAC signature against our - * shared secret that was set up at subscription time. - * - * If we don't have a shared secret, there should be no signature. - * If we we do, our the calculated HMAC should match theirs. - * - * @param string $post raw XML source as POSTed to us - * @param string $hmac X-Hub-Signature HTTP header value, or empty - * @return boolean true for a match - */ - protected function validatePushSig($post, $hmac) - { - if ($this->secret) { - if (preg_match('/^sha1=([0-9a-fA-F]{40})$/', $hmac, $matches)) { - $their_hmac = strtolower($matches[1]); - $our_hmac = hash_hmac('sha1', $post, $this->secret); - if ($their_hmac === $our_hmac) { - return true; - } - common_log(LOG_ERR, __METHOD__ . ": ignoring PuSH with bad SHA-1 HMAC: got $their_hmac, expected $our_hmac"); - } else { - common_log(LOG_ERR, __METHOD__ . ": ignoring PuSH with bogus HMAC '$hmac'"); - } - } else { - if (empty($hmac)) { - return true; - } else { - common_log(LOG_ERR, __METHOD__ . ": ignoring PuSH with unexpected HMAC '$hmac'"); - } - } - return false; - } - /** * Process a posted entry from this feed source. * @@ -704,14 +435,14 @@ class Ostatus_profile extends Memcached_DataObject { if ($this->isGroup()) { // @fixme validate these profiles in some way! - $oprofile = $this->ensureActorProfile($activity); + $oprofile = self::ensureActorProfile($activity); } else { - $actorUri = $this->getActorProfileURI($activity); - if ($actorUri == $this->homeuri) { + $actorUri = self::getActorProfileURI($activity); + if ($actorUri == $this->uri) { // @fixme check if profile info has changed and update it } else { // @fixme drop or reject the messages once we've got the canonical profile URI recorded sanely - common_log(LOG_INFO, "OStatus: Warning: non-group post with unexpected author: $actorUri expected $this->homeuri"); + common_log(LOG_INFO, "OStatus: Warning: non-group post with unexpected author: $actorUri expected $this->uri"); //return; } $oprofile = $this; @@ -787,6 +518,65 @@ class Ostatus_profile extends Memcached_DataObject return false; } + /** + * @param string $profile_url + * @return Ostatus_profile + * @throws FeedSubException + */ + public static function ensureProfile($profile_uri) + { + // Get the canonical feed URI and check it + $discover = new FeedDiscovery(); + $feeduri = $discover->discoverFromURL($profile_uri); + + $feedsub = FeedSub::ensureFeed($feeduri, $discover->feed); + $huburi = $discover->getAtomLink('hub'); + $salmonuri = $discover->getAtomLink('salmon'); + + if (!$huburi) { + // We can only deal with folks with a PuSH hub + throw new FeedSubNoHubException(); + } + + // Ok this is going to be a terrible hack! + // Won't be suitable for groups, empty feeds, or getting + // info that's only available on the profile page. + $entries = $discover->feed->getElementsByTagNameNS(Activity::ATOM, 'entry'); + if (!$entries || $entries->length == 0) { + throw new FeedSubException('empty feed'); + } + $first = new Activity($entries->item(0), $discover->feed); + return self::ensureActorProfile($first, $feeduri); + } + + /** + * Download and update given avatar image + * @param string $url + * @throws Exception in various failure cases + */ + protected function updateAvatar($url) + { + // @fixme this should be better encapsulated + // ripped from oauthstore.php (for old OMB client) + $temp_filename = tempnam(sys_get_temp_dir(), 'listener_avatar'); + copy($url, $temp_filename); + + // @fixme should we be using different ids? + $imagefile = new ImageFile($this->id, $temp_filename); + $filename = Avatar::filename($this->id, + image_type_to_extension($imagefile->type), + null, + common_timestamp()); + rename($temp_filename, Avatar::path($filename)); + if ($this->isGroup()) { + $group = $this->localGroup(); + $group->setOriginal($filename); + } else { + $profile = $this->localProfile(); + $profile->setOriginal($filename); + } + } + /** * Get an appropriate avatar image source URL, if available. * @@ -794,7 +584,7 @@ class Ostatus_profile extends Memcached_DataObject * @param DOMElement $feed * @return string */ - function getAvatar($actor, $feed) + protected static function getAvatar($actor, $feed) { $url = ''; $icon = ''; @@ -833,13 +623,18 @@ class Ostatus_profile extends Memcached_DataObject } /** - * @fixme move off of ostatus_profile or static? + * Fetch, or build if necessary, an Ostatus_profile for the actor + * in a given Activity Streams activity. + * + * @param Activity $activity + * @param string $feeduri if we already know the canonical feed URI! + * @return Ostatus_profile */ - function ensureActorProfile($activity) + public static function ensureActorProfile($activity, $feeduri=null) { - $profile = $this->getActorProfile($activity); + $profile = self::getActorProfile($activity); if (!$profile) { - $profile = $this->createActorProfile($activity); + $profile = self::createActorProfile($activity, $feeduri); } return $profile; } @@ -848,10 +643,10 @@ class Ostatus_profile extends Memcached_DataObject * @param Activity $activity * @return mixed matching Ostatus_profile or false if none known */ - function getActorProfile($activity) + protected static function getActorProfile($activity) { - $homeuri = $this->getActorProfileURI($activity); - return Ostatus_profile::staticGet('homeuri', $homeuri); + $homeuri = self::getActorProfileURI($activity); + return self::staticGet('uri', $homeuri); } /** @@ -859,7 +654,7 @@ class Ostatus_profile extends Memcached_DataObject * @return string * @throws ServerException */ - function getActorProfileURI($activity) + protected static function getActorProfileURI($activity) { $opts = array('allowed_schemes' => array('http', 'https')); $actor = $activity->actor; @@ -873,14 +668,19 @@ class Ostatus_profile extends Memcached_DataObject } /** - * + * @fixme validate stuff somewhere */ - function createActorProfile($activity) + protected static function createActorProfile($activity, $feeduri=null) { - $actor = $activity->actor(); - $homeuri = $this->getActivityProfileURI($activity); - $nickname = $this->getAuthorNick($activity); - $avatar = $this->getAvatar($actor, $feed); + $actor = $activity->actor; + $homeuri = self::getActorProfileURI($activity); + $nickname = self::getAuthorNick($activity); + $avatar = self::getAvatar($actor, $feed); + + if (!$homeuri) { + common_log(LOG_DEBUG, __METHOD__ . " empty actor profile URI: " . var_export($activity, true)); + throw new ServerException("No profile URI"); + } $profile = new Profile(); $profile->nickname = $nickname; @@ -894,9 +694,7 @@ class Ostatus_profile extends Memcached_DataObject // @todo lat/lon/location? $ok = $profile->insert(); - if ($ok) { - $this->updateAvatar($profile, $avatar); - } else { + if (!$ok) { throw new ServerException("Can't save local profile"); } @@ -904,11 +702,15 @@ class Ostatus_profile extends Memcached_DataObject // or need to split out some of the feed stuff // so we can leave it empty until later. $oprofile = new Ostatus_profile(); - $oprofile->homeuri = $homeuri; + $oprofile->uri = $homeuri; + if ($feeduri) { + $oprofile->feeduri = $feeduri; + } $oprofile->profile_id = $profile->id; $ok = $oprofile->insert(); if ($ok) { + $oprofile->updateAvatar($avatar); return $oprofile; } else { throw new ServerException("Can't save OStatus profile"); @@ -920,13 +722,13 @@ class Ostatus_profile extends Memcached_DataObject * @param Activity $activity * @return string */ - function getAuthorNick($activity) + protected static function getAuthorNick($activity) { // @fixme not technically part of the actor? foreach (array($activity->entry, $activity->feed) as $source) { - $author = ActivityUtil::child($source, 'author', Activity::ATOM); + $author = ActivityUtils::child($source, 'author', Activity::ATOM); if ($author) { - $name = ActivityUtil::child($author, 'name', Activity::ATOM); + $name = ActivityUtils::child($author, 'name', Activity::ATOM); if ($name) { return trim($name->textContent); } diff --git a/plugins/OStatus/lib/feeddiscovery.php b/plugins/OStatus/lib/feeddiscovery.php index 39985fc902..7afb71bdc1 100644 --- a/plugins/OStatus/lib/feeddiscovery.php +++ b/plugins/OStatus/lib/feeddiscovery.php @@ -48,6 +48,14 @@ class FeedSubNoFeedException extends FeedSubException { } +class FeedSubBadXmlException extends FeedSubException +{ +} + +class FeedSubNoHubException extends FeedSubException +{ +} + /** * Given a web page or feed URL, discover the final location of the feed * and return its current contents. @@ -57,21 +65,25 @@ class FeedSubNoFeedException extends FeedSubException * if ($feed->discoverFromURL($url)) { * print $feed->uri; * print $feed->type; - * processFeed($feed->body); + * processFeed($feed->feed); // DOMDocument * } */ class FeedDiscovery { public $uri; public $type; - public $body; + public $feed; - - public function feedMunger() + /** Post-initialize query helper... */ + public function getLink($rel, $type=null) { - require_once 'XML/Feed/Parser.php'; - $feed = new XML_Feed_Parser($this->body, false, false, true); // @fixme - return new FeedMunger($feed, $this->uri); + // @fixme check for non-Atom links in RSS2 feeds as well + return self::getAtomLink($rel, $type); + } + + public function getAtomLink($rel, $type=null) + { + return ActivityUtils::getLink($this->feed->documentElement, $rel, $type); } /** @@ -90,6 +102,7 @@ class FeedDiscovery $client = new HTTPClient(); $response = $client->get($url); } catch (HTTP_Request2_Exception $e) { + common_log(LOG_ERR, __METHOD__ . " Failure for $url - " . $e->getMessage()); throw new FeedSubBadURLException($e); } @@ -107,7 +120,12 @@ class FeedDiscovery return $this->initFromResponse($response); } - + + function discoverFromFeedURL($url) + { + return $this->discoverFromURL($url, false); + } + function initFromResponse($response) { if (!$response->isOk()) { @@ -122,16 +140,26 @@ class FeedDiscovery $type = $response->getHeader('Content-Type'); if (preg_match('!^(text/xml|application/xml|application/(rss|atom)\+xml)!i', $type)) { - $this->uri = $sourceurl; - $this->type = $type; - $this->body = $body; - return true; + return $this->init($sourceurl, $type, $body); } else { common_log(LOG_WARNING, "Unrecognized feed type $type for $sourceurl"); throw new FeedSubUnrecognizedTypeException($type); } } + function init($sourceurl, $type, $body) + { + $feed = new DOMDocument(); + if ($feed->loadXML($body)) { + $this->uri = $sourceurl; + $this->type = $type; + $this->feed = $feed; + return $this->uri; + } else { + throw new FeedSubBadXmlException($url); + } + } + /** * @param string $url source URL, used to resolve relative links * @param string $body HTML body text diff --git a/plugins/OStatus/lib/feedmunger.php b/plugins/OStatus/lib/feedmunger.php deleted file mode 100644 index e8c46de90e..0000000000 --- a/plugins/OStatus/lib/feedmunger.php +++ /dev/null @@ -1,350 +0,0 @@ -. - */ - -/** - * @package FeedSubPlugin - * @maintainer Brion Vibber - */ - -if (!defined('STATUSNET') && !defined('LACONICA')) { exit(1); } - -class FeedSubPreviewNotice extends Notice -{ - protected $fetched = true; - - function __construct($profile) - { - $this->profile = $profile; - $this->profile_id = 0; - } - - function getProfile() - { - return $this->profile; - } - - function find() - { - return true; - } - - function fetch() - { - $got = $this->fetched; - $this->fetched = false; - return $got; - } -} - -class FeedSubPreviewProfile extends Profile -{ - function getAvatar($width, $height=null) - { - return new FeedSubPreviewAvatar($width, $height, $this->avatar); - } -} - -class FeedSubPreviewAvatar extends Avatar -{ - function __construct($width, $height, $remote) - { - $this->remoteImage = $remote; - } - - function displayUrl() { - return $this->remoteImage; - } -} - -class FeedMunger -{ - /** - * @param XML_Feed_Parser $feed - */ - function __construct($feed, $url=null) - { - $this->feed = $feed; - $this->url = $url; - } - - function ostatusProfile() - { - $profile = new Ostatus_profile(); - $profile->feeduri = $this->url; - $profile->homeuri = $this->feed->link; - $profile->huburi = $this->getHubLink(); - $salmon = $this->getSalmonLink(); - if ($salmon) { - $profile->salmonuri = $salmon; - } - return $profile; - } - - function getAtomLink($item, $attribs=array()) - { - // XML_Feed_Parser gets confused by multiple elements. - $dom = $item->model; - - // Note that RSS feeds would embed an so this should work for both. - /// http://code.google.com/p/pubsubhubbub/wiki/RssFeeds - // - $links = $dom->getElementsByTagNameNS('http://www.w3.org/2005/Atom', 'link'); - for ($i = 0; $i < $links->length; $i++) { - $node = $links->item($i); - if ($node->hasAttributes()) { - $href = $node->attributes->getNamedItem('href'); - if ($href) { - $matches = 0; - foreach ($attribs as $name => $val) { - $attrib = $node->attributes->getNamedItem($name); - if ($attrib && $attrib->value == $val) { - $matches++; - } - } - if ($matches == count($attribs)) { - return $href->value; - } - } - } - } - return false; - } - - function getRssLink($item) - { - // XML_Feed_Parser gets confused by multiple elements. - $dom = $item->model; - - // Note that RSS feeds would embed an so this should work for both. - /// http://code.google.com/p/pubsubhubbub/wiki/RssFeeds - // - $links = $dom->getElementsByTagName('link'); - for ($i = 0; $i < $links->length; $i++) { - $node = $links->item($i); - if (!$node->hasAttributes()) { - return $node->textContent; - } - } - return false; - } - - function getAltLink($item) - { - // Check for an atom link... - $link = $this->getAtomLink($item, array('rel' => 'alternate', 'type' => 'text/html')); - if (!$link) { - $link = $this->getRssLink($item); - } - return $link; - } - - function getHubLink() - { - return $this->getAtomLink($this->feed, array('rel' => 'hub')); - } - - function getSalmonLink() - { - return $this->getAtomLink($this->feed, array('rel' => 'salmon')); - } - - function getSelfLink() - { - return $this->getAtomLink($this->feed, array('rel' => 'self')); - } - - /** - * Get an appropriate avatar image source URL, if available. - * @return mixed string or false - */ - function getAvatar() - { - $logo = $this->feed->logo; - if ($logo) { - return $logo; - } - $icon = $this->feed->icon; - if ($icon) { - return $icon; - } - return common_path('plugins/OStatus/images/48px-Feed-icon.svg.png'); - } - - function profile($preview=false) - { - if ($preview) { - $profile = new FeedSubPreviewProfile(); - } else { - $profile = new Profile(); - } - - // @todo validate/normalize nick? - $profile->nickname = $this->feed->title; - $profile->fullname = $this->feed->title; - $profile->homepage = $this->getAltLink($this->feed); - $profile->bio = $this->feed->description; - $profile->profileurl = $this->getAltLink($this->feed); - - if ($preview) { - $profile->avatar = $this->getAvatar(); - } - - // @todo tags from categories - // @todo lat/lon/location? - - return $profile; - } - - function notice($index=1, $preview=false) - { - $entry = $this->feed->getEntryByOffset($index); - if (!$entry) { - return null; - } - - if ($preview) { - $notice = new FeedSubPreviewNotice($this->profile(true)); - $notice->id = -1; - } else { - $notice = new Notice(); - $notice->profile_id = $this->profileIdForEntry($index); - } - - $link = $this->getAltLink($entry); - if (empty($link)) { - if (preg_match('!^https?://!', $entry->id)) { - $link = $entry->id; - common_log(LOG_DEBUG, "No link on entry, using URL from id: $link"); - } - } - $notice->uri = $link; - $notice->url = $link; - $notice->content = $this->noticeFromEntry($entry); - $notice->rendered = common_render_content($notice->content, $notice); // @fixme this is failing on group posts - $notice->created = common_sql_date($entry->updated); // @fixme - $notice->is_local = Notice::GATEWAY; - $notice->source = 'feed'; - - $location = $this->getLocation($entry); - if ($location) { - if ($location->location_id) { - $notice->location_ns = $location->location_ns; - $notice->location_id = $location->location_id; - } - $notice->lat = $location->lat; - $notice->lon = $location->lon; - } - - return $notice; - } - - function profileIdForEntry($index=1) - { - // hack hack hack - // should get profile for this entry's author... - $feeduri = $this->getSelfLink(); - $remote = Ostatus_profile::staticGet('feeduri', $feeduri); - if ($remote) { - return $remote->profile_id; - } else { - throw new Exception("Can't find feed profile for $feeduri"); - } - } - - /** - * Parse location given as a GeoRSS-simple point, if provided. - * http://www.georss.org/simple - * - * @param feed item $entry - * @return mixed Location or false - */ - function getLocation($entry) - { - $dom = $entry->model; - $points = $dom->getElementsByTagNameNS('http://www.georss.org/georss', 'point'); - - for ($i = 0; $i < $points->length; $i++) { - $point = $points->item(0)->textContent; - $point = str_replace(',', ' ', $point); // per spec "treat commas as whitespace" - $point = preg_replace('/\s+/', ' ', $point); - $point = trim($point); - $coords = explode(' ', $point); - if (count($coords) == 2) { - list($lat, $lon) = $coords; - if (is_numeric($lat) && is_numeric($lon)) { - common_log(LOG_INFO, "Looking up location for $lat $lon from georss"); - return Location::fromLatLon($lat, $lon); - } - } - common_log(LOG_ERR, "Ignoring bogus georss:point value $point"); - } - - return false; - } - - /** - * @param XML_Feed_Type $entry - * @return string notice text, within post size limit - */ - function noticeFromEntry($entry) - { - $max = Notice::maxContent(); - $ellipsis = "\xe2\x80\xa6"; // U+2026 HORIZONTAL ELLIPSIS - $title = $entry->title; - $link = $entry->link; - - // @todo We can get entries like this: - // $cats = $entry->getCategory('category', array(0, true)); - // but it feels like an awful hack. If it's accessible cleanly, - // try adding #hashtags from the categories/tags on a post. - - $title = $entry->title; - $link = $this->getAltLink($entry); - if ($link) { - // Blog post or such... - // @todo Should we force a language here? - $format = _m('New post: "%1$s" %2$s'); - $out = sprintf($format, $title, $link); - - // Trim link if needed... - if (mb_strlen($out) > $max) { - $link = common_shorten_url($link); - $out = sprintf($format, $title, $link); - } - - // Trim title if needed... - if (mb_strlen($out) > $max) { - $used = mb_strlen($out) - mb_strlen($title); - $available = $max - $used - mb_strlen($ellipsis); - $title = mb_substr($title, 0, $available) . $ellipsis; - $out = sprintf($format, $title, $link); - } - } else { - // No link? Consider a bare status update. - if (mb_strlen($title) > $max) { - $available = $max - mb_strlen($ellipsis); - $out = mb_substr($title, 0, $available) . $ellipsis; - } else { - $out = $title; - } - } - - return $out; - } -} From 3d665f82d17d36c11bf4f36e84fdeedd911d62c8 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Thu, 18 Feb 2010 22:13:47 -0500 Subject: [PATCH 13/62] add type='text/html' to alternate link in Notice Atom --- classes/Notice.php | 1 + 1 file changed, 1 insertion(+) diff --git a/classes/Notice.php b/classes/Notice.php index 7e2b8b4a69..a52cfed70c 100644 --- a/classes/Notice.php +++ b/classes/Notice.php @@ -1012,6 +1012,7 @@ class Notice extends Memcached_DataObject $xs->raw($profile->asActivityActor()); $xs->element('link', array('rel' => 'alternate', + 'type' => 'text/html', 'href' => $this->bestUrl())); $xs->element('id', null, $this->uri); From 48edade751cca3ac7363808264a76c470a520528 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Thu, 18 Feb 2010 22:18:14 -0500 Subject: [PATCH 14/62] add ActivityContext class and test it --- plugins/OStatus/actions/salmon.php | 1 + plugins/OStatus/classes/Ostatus_profile.php | 32 +------- plugins/OStatus/lib/activity.php | 85 +++++++++++++++++++- plugins/OStatus/tests/ActivityParseTests.php | 53 +++++++++++- 4 files changed, 138 insertions(+), 33 deletions(-) diff --git a/plugins/OStatus/actions/salmon.php b/plugins/OStatus/actions/salmon.php index ea5b8e4ea8..e9d6015f43 100644 --- a/plugins/OStatus/actions/salmon.php +++ b/plugins/OStatus/actions/salmon.php @@ -71,6 +71,7 @@ class SalmonAction extends Action /** * @fixme probably call Ostatus_profile::processFeed */ + function handle($args) { common_log(LOG_INFO, 'Salmon: incoming post for user '. $this->user->id); diff --git a/plugins/OStatus/classes/Ostatus_profile.php b/plugins/OStatus/classes/Ostatus_profile.php index 1ce8ac4917..e0cb467e5e 100644 --- a/plugins/OStatus/classes/Ostatus_profile.php +++ b/plugins/OStatus/classes/Ostatus_profile.php @@ -488,36 +488,6 @@ class Ostatus_profile extends Memcached_DataObject $params); } - /** - * Parse location given as a GeoRSS-simple point, if provided. - * http://www.georss.org/simple - * - * @param feed item $entry - * @return mixed Location or false - */ - function getLocation($dom) - { - $points = $dom->getElementsByTagNameNS('http://www.georss.org/georss', 'point'); - - for ($i = 0; $i < $points->length; $i++) { - $point = $points->item(0)->textContent; - $point = str_replace(',', ' ', $point); // per spec "treat commas as whitespace" - $point = preg_replace('/\s+/', ' ', $point); - $point = trim($point); - $coords = explode(' ', $point); - if (count($coords) == 2) { - list($lat, $lon) = $coords; - if (is_numeric($lat) && is_numeric($lon)) { - common_log(LOG_INFO, "Looking up location for $lat $lon from georss"); - return Location::fromLatLon($lat, $lon); - } - } - common_log(LOG_ERR, "Ignoring bogus georss:point value $point"); - } - - return false; - } - /** * @param string $profile_url * @return Ostatus_profile @@ -560,7 +530,7 @@ class Ostatus_profile extends Memcached_DataObject // ripped from oauthstore.php (for old OMB client) $temp_filename = tempnam(sys_get_temp_dir(), 'listener_avatar'); copy($url, $temp_filename); - + // @fixme should we be using different ids? $imagefile = new ImageFile($this->id, $temp_filename); $filename = Avatar::filename($this->id, diff --git a/plugins/OStatus/lib/activity.php b/plugins/OStatus/lib/activity.php index 3ed613dc7f..5bc8f78e52 100644 --- a/plugins/OStatus/lib/activity.php +++ b/plugins/OStatus/lib/activity.php @@ -311,6 +311,87 @@ class ActivityVerb const LEAVE = 'http://ostatus.org/schema/1.0/leave'; } +class ActivityContext +{ + public $replyToID; + public $replyToUrl; + public $location; + public $attention = array(); + public $conversation; + + const THR = 'http://purl.org/syndication/thread/1.0'; + const GEORSS = 'http://www.georss.org/georss'; + const OSTATUS = 'http://ostatus.org/schema/1.0'; + + const INREPLYTO = 'in-reply-to'; + const REF = 'ref'; + const HREF = 'href'; + + const POINT = 'point'; + + const ATTENTION = 'ostatus:attention'; + const CONVERSATION = 'ostatus:conversation'; + + function __construct($element) + { + $replyToEl = ActivityUtils::child($element, self::INREPLYTO, self::THR); + + if (!empty($replyToEl)) { + $this->replyToID = $replyToEl->getAttribute(self::REF); + $this->replyToUrl = $replyToEl->getAttribute(self::HREF); + } + + $this->location = $this->getLocation($element); + + $this->conversation = ActivityUtils::getLink($element, self::CONVERSATION); + + // Multiple attention links allowed + + $links = $element->getElementsByTagNameNS(ActivityUtils::ATOM, ActivityUtils::LINK); + + for ($i = 0; $i < $links->length; $i++) { + + $link = $links->item($i); + + $linkRel = $link->getAttribute(ActivityUtils::REL); + + if ($linkRel == self::ATTENTION) { + $this->attention[] = $link->getAttribute(self::HREF); + } + } + } + + /** + * Parse location given as a GeoRSS-simple point, if provided. + * http://www.georss.org/simple + * + * @param feed item $entry + * @return mixed Location or false + */ + function getLocation($dom) + { + $points = $dom->getElementsByTagNameNS(self::GEORSS, self::POINT); + + for ($i = 0; $i < $points->length; $i++) { + $point = $points->item($i)->textContent; + $point = str_replace(',', ' ', $point); // per spec "treat commas as whitespace" + $point = preg_replace('/\s+/', ' ', $point); + $point = trim($point); + $coords = explode(' ', $point); + if (count($coords) == 2) { + list($lat, $lon) = $coords; + if (is_numeric($lat) && is_numeric($lon)) { + common_log(LOG_INFO, "Looking up location for $lat $lon from georss"); + return Location::fromLatLon($lat, $lon); + } + } + common_log(LOG_ERR, "Ignoring bogus georss:point value $point"); + } + + return null; + } +} + /** * An activity in the ActivityStrea.ms world * @@ -426,7 +507,9 @@ class Activity $contextEl = $this->_child($entry, self::CONTEXT); if (!empty($contextEl)) { - $this->context = new ActivityObject($contextEl); + $this->context = new ActivityContext($contextEl); + } else { + $this->context = new ActivityContext($entry); } $targetEl = $this->_child($entry, self::TARGET); diff --git a/plugins/OStatus/tests/ActivityParseTests.php b/plugins/OStatus/tests/ActivityParseTests.php index fa8bcdda23..35b4b0f9d7 100644 --- a/plugins/OStatus/tests/ActivityParseTests.php +++ b/plugins/OStatus/tests/ActivityParseTests.php @@ -57,12 +57,35 @@ class ActivityParseTests extends PHPUnit_Framework_TestCase $this->assertEquals($act->object->summary, 'Some text.'); $this->assertEquals($act->object->link, 'http://example.org/2003/12/13/atom03.html'); - $this->assertTrue(empty($act->context)); + $this->assertFalse(empty($act->context)); + $this->assertTrue(empty($act->target)); $this->assertEquals($act->entry, $entry); $this->assertEquals($act->feed, $feed); } + + public function testExample4() + { + global $_example4; + $dom = DOMDocument::loadXML($_example4); + + $entry = $dom->documentElement; + + $act = new Activity($entry); + + $this->assertFalse(empty($act)); + $this->assertEquals(1266547958, $act->time); + $this->assertEquals('http://example.net/notice/14', $act->link); + + $this->assertFalse(empty($act->context)); + $this->assertEquals('http://example.net/notice/12', $act->context->replyToID); + $this->assertEquals('http://example.net/notice/12', $act->context->replyToUrl); + $this->assertEquals('http://example.net/conversation/11', $act->context->conversation); + $this->assertEquals(array('http://example.net/user/1'), $act->context->attention); + + $this->assertFalse(empty($act->actor)); + } } $_example1 = << EXAMPLE3; + +$_example4 = << + + @evan now is the time for all good men to come to the aid of their country. #thetime + @evan now is the time for all good men to come to the aid of their country. #thetime + + spock + http://example.net/user/2 + + + http://activitystrea.ms/schema/1.0/person + http://example.net/user/2 + spock + + + + http://example.net/notice/14 + 2010-02-19T02:52:38+00:00 + 2010-02-19T02:52:38+00:00 + + + + + @<span class="vcard"><a href="http://example.net/user/1" class="url"><span class="fn nickname">evan</span></a></span> now is the time for all good men to come to the aid of their country. #<span class="tag"><a href="http://example.net/tag/thetime" rel="tag">thetime</a></span> + + +EXAMPLE4; From b03c7a383f00a97b08a270c6c472b46e11314b84 Mon Sep 17 00:00:00 2001 From: Sarven Capadisli Date: Fri, 19 Feb 2010 13:03:26 +0100 Subject: [PATCH 15/62] Only load json2.js if native JSON is not supported --- lib/action.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/action.php b/lib/action.php index b85f353a3d..cd84662c96 100644 --- a/lib/action.php +++ b/lib/action.php @@ -249,7 +249,7 @@ class Action extends HTMLOutputter // lawsuit $this->script('jquery.min.js'); $this->script('jquery.form.js'); $this->script('jquery.cookie.js'); - $this->script('json2.js'); + $this->inlineScript('if (typeof window.JSON !== "object") { $.getScript("'.common_path('js/json2.js').'"); }'); $this->script('jquery.joverlay.min.js'); Event::handle('EndShowJQueryScripts', array($this)); } From 8d59f7cc9d7f9f12d46530db2d9efe0b7aaac1a5 Mon Sep 17 00:00:00 2001 From: Sarven Capadisli Date: Fri, 19 Feb 2010 13:13:05 +0100 Subject: [PATCH 16/62] Using inlineScript instead for clickjacking check --- lib/action.php | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/lib/action.php b/lib/action.php index cd84662c96..fa9ddb9110 100644 --- a/lib/action.php +++ b/lib/action.php @@ -259,8 +259,7 @@ class Action extends HTMLOutputter // lawsuit $this->script('util.js'); $this->script('geometa.js'); // Frame-busting code to avoid clickjacking attacks. - $this->element('script', array('type' => 'text/javascript'), - 'if (window.top !== window.self) { window.top.location.href = window.self.location.href; }'); + $this->inlineScript('if (window.top !== window.self) { window.top.location.href = window.self.location.href; }'); Event::handle('EndShowStatusNetScripts', array($this)); Event::handle('EndShowLaconicaScripts', array($this)); } From 52e8aa798a23b2832a748189b42c3bc77d65c9c7 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Fri, 19 Feb 2010 08:16:45 -0500 Subject: [PATCH 17/62] Refactor subs_* functions for remote use The subs_* functions in subs.php have made a lot of assumptions about users versus profiles. I've refactored the functions to be methods of the Subscription class instead, and to use Profile objects throughout. Some of the checks for blocks or existing subscriptions depended on users or profiles, so I've moved those methods around a bit. I've left stubs for the subs_* functions until we get time to replace them. --- classes/Profile.php | 12 +++ classes/Subscription.php | 154 ++++++++++++++++++++++++++++++++++++++- classes/User.php | 19 +---- lib/subs.php | 112 ++-------------------------- 4 files changed, 170 insertions(+), 127 deletions(-) diff --git a/classes/Profile.php b/classes/Profile.php index 494c697e42..6b396c8c34 100644 --- a/classes/Profile.php +++ b/classes/Profile.php @@ -868,4 +868,16 @@ class Profile extends Memcached_DataObject return $uri; } + function hasBlocked($other) + { + $block = Profile_block::get($this->id, $other->id); + + if (empty($block)) { + $result = false; + } else { + $result = true; + } + + return $result; + } } diff --git a/classes/Subscription.php b/classes/Subscription.php index faf1331cda..d6fb3fcbdd 100644 --- a/classes/Subscription.php +++ b/classes/Subscription.php @@ -24,7 +24,7 @@ if (!defined('STATUSNET') && !defined('LACONICA')) { exit(1); } */ require_once INSTALLDIR.'/classes/Memcached_DataObject.php'; -class Subscription extends Memcached_DataObject +class Subscription extends Memcached_DataObject { ###START_AUTOCODE /* the code below is auto generated do not remove the above tag */ @@ -34,8 +34,8 @@ class Subscription extends Memcached_DataObject public $subscribed; // int(4) primary_key not_null public $jabber; // tinyint(1) default_1 public $sms; // tinyint(1) default_1 - public $token; // varchar(255) - public $secret; // varchar(255) + public $token; // varchar(255) + public $secret; // varchar(255) public $created; // datetime() not_null public $modified; // timestamp() not_null default_CURRENT_TIMESTAMP @@ -45,9 +45,155 @@ class Subscription extends Memcached_DataObject /* the code above is auto generated do not remove the tag below */ ###END_AUTOCODE - + function pkeyGet($kv) { return Memcached_DataObject::pkeyGet('Subscription', $kv); } + + /** + * Make a new subscription + * + * @param Profile $subscriber party to receive new notices + * @param Profile $other party sending notices; publisher + * + * @return Subscription new subscription + */ + + static function start($subscriber, $other) + { + if (!$subscriber->hasRight(Right::SUBSCRIBE)) { + throw new Exception(_('You have been banned from subscribing.')); + } + + if (self::exists($subscriber, $other)) { + throw new Exception(_('Already subscribed!')); + } + + if ($other->hasBlocked($subscriber)) { + throw new Exception(_('User has blocked you.')); + } + + if (Event::handle('StartSubscribe', array($subscriber, $other))) { + + $sub = new Subscription(); + + $sub->subscriber = $subscriber->id; + $sub->subscribed = $other->id; + $sub->created = common_sql_now(); + + $result = $sub->insert(); + + if (!$result) { + common_log_db_error($sub, 'INSERT', __FILE__); + throw new Exception(_('Could not save subscription.')); + } + + $sub->notify(); + + self::blow('user:notices_with_friends:%d', $subscriber->id); + + $subscriber->blowSubscriptionsCount(); + $other->blowSubscribersCount(); + + $otherUser = User::staticGet('id', $other->id); + + if (!empty($otherUser) && + $otherUser->autosubscribe && + !self::exists($other, $subscriber) && + !$subscriber->hasBlocked($other)) { + + $auto = new Subscription(); + + $auto->subscriber = $subscriber->id; + $auto->subscribed = $other->id; + $auto->created = common_sql_now(); + + $result = $auto->insert(); + + if (!$result) { + common_log_db_error($auto, 'INSERT', __FILE__); + throw new Exception(_('Could not save subscription.')); + } + + $auto->notify(); + } + + Event::handle('EndSubscribe', array($subscriber, $other)); + } + + return true; + } + + function notify() + { + # XXX: add other notifications (Jabber, SMS) here + # XXX: queue this and handle it offline + # XXX: Whatever happens, do it in Twitter-like API, too + + $this->notifyEmail(); + } + + function notifyEmail() + { + $subscribedUser = User::staticGet('id', $this->subscribed); + + if (!empty($subscribedUser)) { + + $subscriber = Profile::staticGet('id', $this->subscriber); + + mail_subscribe_notify_profile($subscribedUser, $subscriber); + } + } + + /** + * Cancel a subscription + * + */ + + function cancel($subscriber, $other) + { + if (!self::exists($subscriber, $other)) { + throw new Exception(_('Not subscribed!')); + } + + // Don't allow deleting self subs + + if ($subscriber->id == $other->id) { + throw new Exception(_('Couldn\'t delete self-subscription.')); + } + + if (Event::handle('StartUnsubscribe', array($subscriber, $other))) { + + $sub = Subscription::pkeyGet(array('subscriber' => $subscriber->id, + 'subscribed' => $other->id)); + + // note we checked for existence above + + assert(!empty($sub)); + + $result = $sub->delete(); + + if (!$result) { + common_log_db_error($sub, 'DELETE', __FILE__); + throw new Exception(_('Couldn\'t delete subscription.')); + } + + self::blow('user:notices_with_friends:%d', $subscriber->id); + + $subscriber->blowSubscriptionsCount(); + $other->blowSubscribersCount(); + + Event::handle('EndUnsubscribe', array($subscriber, $other)); + } + + return; + } + + function exists($subscriber, $other) + { + $sub = Subscription::pkeyGet(array('subscriber' => $subscriber->id, + 'subscribed' => $other->id)); + return (empty($sub)) ? false : true; + } } diff --git a/classes/User.php b/classes/User.php index 72c3f39e94..10b1f48651 100644 --- a/classes/User.php +++ b/classes/User.php @@ -80,11 +80,7 @@ class User extends Memcached_DataObject function isSubscribed($other) { - assert(!is_null($other)); - // XXX: cache results of this query - $sub = Subscription::pkeyGet(array('subscriber' => $this->id, - 'subscribed' => $other->id)); - return (is_null($sub)) ? false : true; + return Subscription::exists($this->getProfile(), $other); } // 'update' won't write key columns, so we have to do it ourselves. @@ -167,17 +163,8 @@ class User extends Memcached_DataObject function hasBlocked($other) { - - $block = Profile_block::get($this->id, $other->id); - - if (is_null($block)) { - $result = false; - } else { - $result = true; - $block->free(); - } - - return $result; + $profile = $this->getProfile(); + return $profile->hasBlocked($other); } /** diff --git a/lib/subs.php b/lib/subs.php index 5ac1a75a5c..5376e21bda 100644 --- a/lib/subs.php +++ b/lib/subs.php @@ -19,8 +19,6 @@ if (!defined('STATUSNET') && !defined('LACONICA')) { exit(1); } -require_once('XMPPHP/XMPP.php'); - /* Subscribe $user to nickname $other_nickname Returns true or an error message. */ @@ -44,72 +42,12 @@ function subs_subscribe_user($user, $other_nickname) function subs_subscribe_to($user, $other) { - if (!$user->hasRight(Right::SUBSCRIBE)) { - return _('You have been banned from subscribing.'); - } - - if ($user->isSubscribed($other)) { - return _('Already subscribed!'); - } - - if ($other->hasBlocked($user)) { - return _('User has blocked you.'); - } - try { - if (Event::handle('StartSubscribe', array($user, $other))) { - - if (!$user->subscribeTo($other)) { - return _('Could not subscribe.'); - return; - } - - subs_notify($other, $user); - - $cache = common_memcache(); - - if ($cache) { - $cache->delete(common_cache_key('user:notices_with_friends:' . $user->id)); - } - - $profile = $user->getProfile(); - - $profile->blowSubscriptionsCount(); - $other->blowSubscribersCount(); - - if ($other->autosubscribe && !$other->isSubscribed($user) && !$user->hasBlocked($other)) { - if (!$other->subscribeTo($user)) { - return _('Could not subscribe other to you.'); - } - $cache = common_memcache(); - - if ($cache) { - $cache->delete(common_cache_key('user:notices_with_friends:' . $other->id)); - } - - subs_notify($user, $other); - } - - Event::handle('EndSubscribe', array($user, $other)); - } + Subscription::start($user->getProfile(), $other); + return true; } catch (Exception $e) { return $e->getMessage(); } - - return true; -} - -function subs_notify($listenee, $listener) -{ - # XXX: add other notifications (Jabber, SMS) here - # XXX: queue this and handle it offline - # XXX: Whatever happens, do it in Twitter-like API, too - subs_notify_email($listenee, $listener); -} - -function subs_notify_email($listenee, $listener) -{ - mail_subscribe_notify($listenee, $listener); } /* Unsubscribe $user from nickname $other_nickname @@ -128,52 +66,12 @@ function subs_unsubscribe_user($user, $other_nickname) return subs_unsubscribe_to($user, $other->getProfile()); } -/* Unsubscribe user $user from profile $other - * NB: other can be a remote user. */ - function subs_unsubscribe_to($user, $other) { - if (!$user->isSubscribed($other)) - return _('Not subscribed!'); - - // Don't allow deleting self subs - - if ($user->id == $other->id) { - return _('Couldn\'t delete self-subscription.'); - } - try { - if (Event::handle('StartUnsubscribe', array($user, $other))) { - - $sub = DB_DataObject::factory('subscription'); - - $sub->subscriber = $user->id; - $sub->subscribed = $other->id; - - $sub->find(true); - - // note we checked for existence above - - if (!$sub->delete()) - return _('Couldn\'t delete subscription.'); - - $cache = common_memcache(); - - if ($cache) { - $cache->delete(common_cache_key('user:notices_with_friends:' . $user->id)); - } - - $profile = $user->getProfile(); - - $profile->blowSubscriptionsCount(); - $other->blowSubscribersCount(); - - Event::handle('EndUnsubscribe', array($user, $other)); - } + Subscription::cancel($user->getProfile(), $other); + return true; } catch (Exception $e) { return $e->getMessage(); } - - return true; -} - +} \ No newline at end of file From b0a75a2ab24673452674a8f296ca4dbb110bed6b Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Fri, 19 Feb 2010 08:31:20 -0500 Subject: [PATCH 18/62] replace calls to subs_(un)subscribe_user with Subscription methods --- actions/apifriendshipsdestroy.php | 9 +++------ lib/command.php | 29 +++++++++++++++++++++-------- 2 files changed, 24 insertions(+), 14 deletions(-) diff --git a/actions/apifriendshipsdestroy.php b/actions/apifriendshipsdestroy.php index 91c6fd0324..d48a577562 100644 --- a/actions/apifriendshipsdestroy.php +++ b/actions/apifriendshipsdestroy.php @@ -124,12 +124,9 @@ class ApiFriendshipsDestroyAction extends ApiAuthAction return; } - $result = subs_unsubscribe_user($this->user, $this->other->nickname); - - if (is_string($result)) { - $this->clientError($result, 403, $this->format); - return; - } + // throws an exception on error + Subscription::cancel($this->user->getProfile(), + $this->other->getProfile()); $this->initDocument($this->format); $this->showProfile($this->other, $this->format); diff --git a/lib/command.php b/lib/command.php index 2a51fd6872..ea7b60372d 100644 --- a/lib/command.php +++ b/lib/command.php @@ -548,12 +548,19 @@ class SubCommand extends Command return; } - $result = subs_subscribe_user($this->user, $this->other); + $otherUser = User::staticGet('nickname', $this->other); - if ($result == 'true') { + if (empty($otherUser)) { + $channel->error($this->user, _('No such user')); + return; + } + + try { + Subscription::start($this->user->getProfile(), + $otherUser->getProfile()); $channel->output($this->user, sprintf(_('Subscribed to %s'), $this->other)); - } else { - $channel->error($this->user, $result); + } catch (Exception $e) { + $channel->error($this->user, $e->getMessage()); } } } @@ -576,12 +583,18 @@ class UnsubCommand extends Command return; } - $result=subs_unsubscribe_user($this->user, $this->other); + $otherUser = User::staticGet('nickname', $this->other); - if ($result) { + if (empty($otherUser)) { + $channel->error($this->user, _('No such user')); + } + + try { + Subscription::cancel($this->user->getProfile(), + $otherUser->getProfile()); $channel->output($this->user, sprintf(_('Unsubscribed from %s'), $this->other)); - } else { - $channel->error($this->user, $result); + } catch (Exception $e) { + $channel->error($this->user, $e->getMessage()); } } } From 512e8b69f1e6020e97835f1c5c492c2be3eff4c1 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Fri, 19 Feb 2010 08:31:46 -0500 Subject: [PATCH 19/62] remove unused subs_(un)subscribe_user functions --- lib/subs.php | 32 -------------------------------- 1 file changed, 32 deletions(-) diff --git a/lib/subs.php b/lib/subs.php index 5376e21bda..1c240c475d 100644 --- a/lib/subs.php +++ b/lib/subs.php @@ -19,22 +19,6 @@ if (!defined('STATUSNET') && !defined('LACONICA')) { exit(1); } -/* Subscribe $user to nickname $other_nickname - Returns true or an error message. -*/ - -function subs_subscribe_user($user, $other_nickname) -{ - - $other = User::staticGet('nickname', $other_nickname); - - if (!$other) { - return _('No such user.'); - } - - return subs_subscribe_to($user, $other); -} - /* Subscribe user $user to other user $other. * Note: $other must be a local user, not a remote profile. * Because the other way is quite a bit more complicated. @@ -50,22 +34,6 @@ function subs_subscribe_to($user, $other) } } -/* Unsubscribe $user from nickname $other_nickname - Returns true or an error message. -*/ - -function subs_unsubscribe_user($user, $other_nickname) -{ - - $other = User::staticGet('nickname', $other_nickname); - - if (!$other) { - return _('No such user.'); - } - - return subs_unsubscribe_to($user, $other->getProfile()); -} - function subs_unsubscribe_to($user, $other) { try { From 114eb310ca4f53e09c98f2337850829a9036c133 Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Fri, 19 Feb 2010 10:29:06 -0800 Subject: [PATCH 20/62] OStatus: fix up Salmon endpoint detection/saving, timestamp fixes. --- plugins/OStatus/actions/salmon.php | 2 +- plugins/OStatus/classes/FeedSub.php | 2 +- plugins/OStatus/classes/Ostatus_profile.php | 18 +++++++++++++----- 3 files changed, 15 insertions(+), 7 deletions(-) diff --git a/plugins/OStatus/actions/salmon.php b/plugins/OStatus/actions/salmon.php index e9d6015f43..2da9db9eb7 100644 --- a/plugins/OStatus/actions/salmon.php +++ b/plugins/OStatus/actions/salmon.php @@ -142,7 +142,7 @@ class SalmonAction extends Action function ensureProfile() { $actor = $this->act->actor; - + common_log(LOG_DEBUG, "Received salmon bit: " . var_export($this->act, true)); if (empty($actor->id)) { throw new Exception("Received a salmon slap from unidentified actor."); } diff --git a/plugins/OStatus/classes/FeedSub.php b/plugins/OStatus/classes/FeedSub.php index dc2c0b710b..bf9d063fa1 100644 --- a/plugins/OStatus/classes/FeedSub.php +++ b/plugins/OStatus/classes/FeedSub.php @@ -99,7 +99,7 @@ class FeedSub extends Memcached_DataObject 'sub_state' => DB_DATAOBJECT_STR + DB_DATAOBJECT_NOTNULL, 'sub_start' => DB_DATAOBJECT_STR + DB_DATAOBJECT_DATE + DB_DATAOBJECT_TIME, 'sub_end' => DB_DATAOBJECT_STR + DB_DATAOBJECT_DATE + DB_DATAOBJECT_TIME, - 'last_update' => DB_DATAOBJECT_STR + DB_DATAOBJECT_DATE + DB_DATAOBJECT_TIME + DB_DATAOBJECT_NOTNULL, + 'last_update' => DB_DATAOBJECT_STR + DB_DATAOBJECT_DATE + DB_DATAOBJECT_TIME, 'created' => DB_DATAOBJECT_STR + DB_DATAOBJECT_DATE + DB_DATAOBJECT_TIME + DB_DATAOBJECT_NOTNULL, 'modified' => DB_DATAOBJECT_STR + DB_DATAOBJECT_DATE + DB_DATAOBJECT_TIME + DB_DATAOBJECT_NOTNULL); } diff --git a/plugins/OStatus/classes/Ostatus_profile.php b/plugins/OStatus/classes/Ostatus_profile.php index e0cb467e5e..319b76a661 100644 --- a/plugins/OStatus/classes/Ostatus_profile.php +++ b/plugins/OStatus/classes/Ostatus_profile.php @@ -328,7 +328,7 @@ class Ostatus_profile extends Memcached_DataObject $entry->element('id', null, $id); $entry->element('title', null, $text); $entry->element('summary', null, $text); - $entry->element('published', null, common_date_w3dtf(time())); + $entry->element('published', null, common_date_w3dtf(common_sql_now())); $entry->element('activity:verb', null, $verb); $entry->raw($actor->asAtomAuthor()); @@ -516,7 +516,7 @@ class Ostatus_profile extends Memcached_DataObject throw new FeedSubException('empty feed'); } $first = new Activity($entries->item(0), $discover->feed); - return self::ensureActorProfile($first, $feeduri); + return self::ensureActorProfile($first, $feeduri, $salmonuri); } /** @@ -598,13 +598,14 @@ class Ostatus_profile extends Memcached_DataObject * * @param Activity $activity * @param string $feeduri if we already know the canonical feed URI! + * @param string $salmonuri if we already know the salmon return channel URI * @return Ostatus_profile */ - public static function ensureActorProfile($activity, $feeduri=null) + public static function ensureActorProfile($activity, $feeduri=null, $salmonuri=null) { $profile = self::getActorProfile($activity); if (!$profile) { - $profile = self::createActorProfile($activity, $feeduri); + $profile = self::createActorProfile($activity, $feeduri, $salmonuri); } return $profile; } @@ -640,7 +641,7 @@ class Ostatus_profile extends Memcached_DataObject /** * @fixme validate stuff somewhere */ - protected static function createActorProfile($activity, $feeduri=null) + protected static function createActorProfile($activity, $feeduri=null, $salmonuri=null) { $actor = $activity->actor; $homeuri = self::getActorProfileURI($activity); @@ -674,10 +675,17 @@ class Ostatus_profile extends Memcached_DataObject $oprofile = new Ostatus_profile(); $oprofile->uri = $homeuri; if ($feeduri) { + // If we don't have these, we can look them up later. $oprofile->feeduri = $feeduri; + if ($salmonuri) { + $oprofile->salmonuri = $salmonuri; + } } $oprofile->profile_id = $profile->id; + $oprofile->created = common_sql_now(); + $oprofile->modified = common_sql_now(); + $ok = $oprofile->insert(); if ($ok) { $oprofile->updateAvatar($avatar); From a1a3ab1c58cf8636c4e9ac6b9d44bdc18946a547 Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Fri, 19 Feb 2010 12:08:07 -0800 Subject: [PATCH 21/62] OStatus: hooked up follow/unfollow events on Salmon endpoint to create/destroy remote subscriber relationships --- plugins/OStatus/OStatusPlugin.php | 9 ++- plugins/OStatus/actions/feedsubsettings.php | 8 --- plugins/OStatus/actions/salmon.php | 66 ++++++++++++++++++--- plugins/OStatus/classes/Ostatus_profile.php | 22 +++++-- 4 files changed, 81 insertions(+), 24 deletions(-) diff --git a/plugins/OStatus/OStatusPlugin.php b/plugins/OStatus/OStatusPlugin.php index 4ebe4551ec..4c43d8a303 100644 --- a/plugins/OStatus/OStatusPlugin.php +++ b/plugins/OStatus/OStatusPlugin.php @@ -253,17 +253,22 @@ class OStatusPlugin extends Plugin */ function onEndUnsubscribe($user, $other) { + if ($user instanceof Profile) { + $profile = $user; + } else if ($user instanceof Profile) { + $profile = $user->getProfile(); + } $oprofile = Ostatus_profile::staticGet('profile_id', $other->id); if ($oprofile) { // Notify the remote server of the unsub, if supported. - $oprofile->notify($user->getProfile(), ActivityVerb::UNFOLLOW, $oprofile); + $oprofile->notify($profile, ActivityVerb::UNFOLLOW, $oprofile); // Drop the PuSH subscription if there are no other subscribers. $sub = new Subscription(); $sub->subscribed = $other->id; $sub->limit(1); if (!$sub->find(true)) { - common_log(LOG_INFO, "Unsubscribing from now-unused feed $oprofile->feeduri on hub $oprofile->huburi"); + common_log(LOG_INFO, "Unsubscribing from now-unused feed $oprofile->feeduri"); $oprofile->unsubscribe(); } } diff --git a/plugins/OStatus/actions/feedsubsettings.php b/plugins/OStatus/actions/feedsubsettings.php index 3e1d0aa823..aee4cee9af 100644 --- a/plugins/OStatus/actions/feedsubsettings.php +++ b/plugins/OStatus/actions/feedsubsettings.php @@ -68,14 +68,6 @@ class FeedSubSettingsAction extends ConnectSettingsAction $profile = $user->getProfile(); - $fuser = null; - - $flink = Foreign_link::getByUserID($user->id, FEEDSUB_SERVICE); - - if (!empty($flink)) { - $fuser = $flink->getForeignUser(); - } - $this->elementStart('form', array('method' => 'post', 'id' => 'form_settings_feedsub', 'class' => 'form_settings', diff --git a/plugins/OStatus/actions/salmon.php b/plugins/OStatus/actions/salmon.php index 2da9db9eb7..1ed322ed28 100644 --- a/plugins/OStatus/actions/salmon.php +++ b/plugins/OStatus/actions/salmon.php @@ -63,13 +63,17 @@ class SalmonAction extends Action // XXX: check that document element is Atom entry // XXX: check the signature - $this->act = new Activity($dom->documentElement); + $entries = $dom->getElementsByTagNameNS(Activity::ATOM, 'entry'); + if ($entries && $entries->length) { + // @fixme is it legit to have multiple entries? + $this->act = new Activity($entries->item(0), $dom->documentElement); + } return true; } /** - * @fixme probably call Ostatus_profile::processFeed + * Check the posted activity type and break out to appropriate processing. */ function handle($args) @@ -94,13 +98,19 @@ class SalmonAction extends Action case ActivityVerb::FRIEND: $this->handleFollow(); break; + case ActivityVerb::UNFOLLOW: + $this->handleUnfollow(); + break; } Event::handle('EndHandleSalmon', array($this->user, $this->activity)); } } /** - * @fixme probably call Ostatus_profile::processFeed + * We've gotten a post event on the Salmon backchannel, probably a reply. + * + * @todo validate if we need to handle this post, then call into + * ostatus_profile's general incoming-post handling. */ function handlePost() { @@ -116,43 +126,81 @@ class SalmonAction extends Action } $profile = $this->ensureProfile(); + // @fixme do something with the post } /** - * @fixme probably call Ostatus_profile::processFeed + * We've gotten a follow/subscribe notification from a remote user. + * Save a subscription relationship for them. */ function handleFollow() { + $oprofile = $this->ensureProfile(); + if ($oprofile) { + common_log(LOG_INFO, "Setting up subscription from remote {$oprofile->uri} to local {$this->user->nickname}"); + $oprofile->subscribeRemoteToLocal($this->user); + } else { + common_log(LOG_INFO, "Can't set up subscription from remote; missing profile."); + } } /** - * @fixme probably call Ostatus_profile::processFeed + * We've gotten an unfollow/unsubscribe notification from a remote user. + * Check if we have a subscription relationship for them and kill it. + * + * @fixme probably catch exceptions on fail? + */ + function handleUnfollow() + { + $oprofile = $this->ensureProfile(); + if ($oprofile) { + common_log(LOG_INFO, "Canceling subscription from remote {$oprofile->uri} to local {$this->user->nickname}"); + Subscription::cancel($oprofile->localProfile(), $this->user->getProfile()); + } else { + common_log(LOG_ERR, "Can't cancel subscription from remote, didn't find the profile"); + } + } + + /** + * Remote user likes one of our posts. + * Confirm the post is ours, and save a local favorite event. */ function handleFavorite() { } /** - * @fixme probably call Ostatus_profile::processFeed + * Remote user doesn't like one of our posts after all! + * Confirm the post is ours, and save a local favorite event. + */ + function handleUnfavorite() + { + } + + /** + * Hmmmm */ function handleShare() { } + /** + * @return Ostatus_profile + */ function ensureProfile() { $actor = $this->act->actor; common_log(LOG_DEBUG, "Received salmon bit: " . var_export($this->act, true)); if (empty($actor->id)) { + common_log(LOG_ERR, "broken actor: " . var_export($actor, true)); throw new Exception("Received a salmon slap from unidentified actor."); } - $ostatusProfile = Ostatus_profile::ensureActorProfile($this->act); - return $oprofile->localProfile(); + return Ostatus_profile::ensureActorProfile($this->act); } /** - * @fixme anything new in here probably should be merged into Ostatus_profile::ensureActorProfile and friends + * @fixme merge into Ostatus_profile::ensureActorProfile and friends */ function createProfile() { diff --git a/plugins/OStatus/classes/Ostatus_profile.php b/plugins/OStatus/classes/Ostatus_profile.php index 319b76a661..97d8eec101 100644 --- a/plugins/OStatus/classes/Ostatus_profile.php +++ b/plugins/OStatus/classes/Ostatus_profile.php @@ -310,8 +310,15 @@ class Ostatus_profile extends Memcached_DataObject * @param $verb eg Activity::SUBSCRIBE or Activity::JOIN * @param $object object of the action; if null, the remote entity itself is assumed */ - public function notify(Profile $actor, $verb, $object=null) + public function notify($actor, $verb, $object=null) { + if (!($actor instanceof Profile)) { + $type = gettype($actor); + if ($type == 'object') { + $type = get_class($actor); + } + throw new ServerException("Invalid actor passed to " . __METHOD__ . ": " . $type); + } if ($object == null) { $object = $this; } @@ -340,7 +347,7 @@ class Ostatus_profile extends Memcached_DataObject $feed->addEntry($entry); $xml = $feed->getString(); - common_log(LOG_INFO, "Posting to Salmon endpoint $salmon: $xml"); + common_log(LOG_INFO, "Posting to Salmon endpoint $this->salmonuri: $xml"); $salmon = new Salmon(); // ? $salmon->post($this->salmonuri, $xml); @@ -531,9 +538,14 @@ class Ostatus_profile extends Memcached_DataObject $temp_filename = tempnam(sys_get_temp_dir(), 'listener_avatar'); copy($url, $temp_filename); + if ($this->isGroup()) { + $id = $this->group_id; + } else { + $id = $this->profile_id; + } // @fixme should we be using different ids? - $imagefile = new ImageFile($this->id, $temp_filename); - $filename = Avatar::filename($this->id, + $imagefile = new ImageFile($id, $temp_filename); + $filename = Avatar::filename($id, image_type_to_extension($imagefile->type), null, common_timestamp()); @@ -646,7 +658,7 @@ class Ostatus_profile extends Memcached_DataObject $actor = $activity->actor; $homeuri = self::getActorProfileURI($activity); $nickname = self::getAuthorNick($activity); - $avatar = self::getAvatar($actor, $feed); + $avatar = self::getAvatar($actor, $activity->feed); if (!$homeuri) { common_log(LOG_DEBUG, __METHOD__ . " empty actor profile URI: " . var_export($activity, true)); From b0327506a491f029b239ae703e1acb1d42ae299c Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Fri, 19 Feb 2010 16:37:07 -0500 Subject: [PATCH 22/62] some more salmon stuff --- plugins/OStatus/actions/salmon.php | 90 +++++++++++++++++++++++++++++- 1 file changed, 89 insertions(+), 1 deletion(-) diff --git a/plugins/OStatus/actions/salmon.php b/plugins/OStatus/actions/salmon.php index e9d6015f43..7356c489a6 100644 --- a/plugins/OStatus/actions/salmon.php +++ b/plugins/OStatus/actions/salmon.php @@ -112,24 +112,112 @@ class SalmonAction extends Action case ActivityObject::COMMENT: break; default: - throw new Exception("Can't handle that kind of post."); + throw new ClientException("Can't handle that kind of post."); + } + + // Notice must either be a) in reply to a notice by this user + // or b) to the attention of this user + + $context = $this->act->context; + + if (!empty($context->replyToID)) { + $notice = Notice::staticGet('uri', $context->replyToID); + if (empty($notice)) { + throw new ClientException("In reply to unknown notice"); + } + if ($notice->profile_id != $this->user->id) { + throw new ClientException("In reply to a notice not by this user"); + } + } else if (!empty($context->attention)) { + if (!in_array($context->attention, $this->user->uri)) { + throw new ClientException("To the attention of user(s) not including this one!"); + } + } else { + throw new ClientException("Not to anyone in reply to anything!"); } $profile = $this->ensureProfile(); + } /** * @fixme probably call Ostatus_profile::processFeed */ + function handleFollow() { + $object = $this->act->object; + + if ($object->id != $this->user->uri) { + throw new ClientException("Subscription notice not for this user."); + } + + $profile = $this->ensureProfile(); + + $sub = Subscription::pkeyGet(array('subscriber' => $profile->id, + 'subscribed' => $this->user->id)); + + if (!empty($sub)) { + throw new ClientException("Already subscribed."); + } + + if ($this->user->hasBlocked($profile)) { + throw new ClientException("Already subscribed."); + } + } /** * @fixme probably call Ostatus_profile::processFeed */ + function handleFavorite() { + // WORST VARIABLE NAME EVER + $object = $this->act->object; + + switch ($this->act->object->type) { + case ActivityObject::ARTICLE: + case ActivityObject::BLOGENTRY: + case ActivityObject::NOTE: + case ActivityObject::STATUS: + case ActivityObject::COMMENT: + break; + default: + throw new ClientException("Can't handle that kind of object for liking/faving."); + } + + $notice = Notice::staticGet('uri', $object->id); + + if (empty($notice)) { + throw new ClientException("Notice with ID $object->id unknown."); + } + + if ($notice->profile_id != $this->user->id) { + throw new ClientException("Notice with ID $object->id not posted by $this->user->id."); + } + + $profile = $this->ensureProfile(); + + $old = Fave::pkeyGet(array('user_id' => $profile->id, + 'notice_id' => $notice->id)); + + if (!empty($old)) { + throw new ClientException("We already know that's a fave!"); + } + + $fave = new Fave(); + + // @fixme need to change this attribute name, maybe references + $fave->user_id = $profile->id; + $fave->notice_id = $notice->id; + + $result = $fave->insert(); + + if (!$result) { + common_log_db_error($fave, 'INSERT', __FILE__); + throw new ServerException('Could not save new favorite.'); + } } /** From 557df3d3f78dbfce656c4c8e3ddf82ee0e34da0a Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Fri, 19 Feb 2010 16:21:17 -0800 Subject: [PATCH 23/62] OStatus: sub/unsub notifications working again. Fixed up autodetection of feed info at profile setup time --- plugins/OStatus/OStatusPlugin.php | 2 ++ plugins/OStatus/actions/salmon.php | 13 +++++++-- plugins/OStatus/classes/Ostatus_profile.php | 31 +++++++++++++-------- plugins/OStatus/lib/salmon.php | 3 ++ 4 files changed, 35 insertions(+), 14 deletions(-) diff --git a/plugins/OStatus/OStatusPlugin.php b/plugins/OStatus/OStatusPlugin.php index 4c43d8a303..1b58d3c356 100644 --- a/plugins/OStatus/OStatusPlugin.php +++ b/plugins/OStatus/OStatusPlugin.php @@ -331,6 +331,8 @@ class OStatusPlugin extends Plugin $oprofile = Ostatus_profile::staticGet('feeduri', $feedsub->uri); if ($oprofile) { $oprofile->processFeed($feed); + } else { + common_log(LOG_DEBUG, "No ostatus profile for incoming feed $feedsub->uri"); } } } diff --git a/plugins/OStatus/actions/salmon.php b/plugins/OStatus/actions/salmon.php index 7a4474ff61..43d79cf4ad 100644 --- a/plugins/OStatus/actions/salmon.php +++ b/plugins/OStatus/actions/salmon.php @@ -34,6 +34,8 @@ class SalmonAction extends Action function prepare($args) { + StatusNet::setApi(true); // Send smaller error pages + parent::prepare($args); if ($_SERVER['REQUEST_METHOD'] != 'POST') { @@ -63,8 +65,12 @@ class SalmonAction extends Action // XXX: check that document element is Atom entry // XXX: check the signature - $this->act = new Activity($dom->documentElement); - + // We need to run an entry into Activity, so get the first one + $entries = $dom->getElementsByTagNameNS(Activity::ATOM, 'entry'); + if ($entries && $entries->length) { + // @fixme is it legit to have multiple entries? + $this->act = new Activity($entries->item(0), $dom->documentElement); + } return true; } @@ -74,7 +80,9 @@ class SalmonAction extends Action function handle($args) { + StatusNet::setApi(true); // Send smaller error pages common_log(LOG_INFO, 'Salmon: incoming post for user '. $this->user->id); + common_log(LOG_DEBUG, "Received salmon bit: " . var_export($this->act, true)); // TODO : Insert new $xml -> notice code @@ -254,7 +262,6 @@ class SalmonAction extends Action function ensureProfile() { $actor = $this->act->actor; - common_log(LOG_DEBUG, "Received salmon bit: " . var_export($this->act, true)); if (empty($actor->id)) { common_log(LOG_ERR, "broken actor: " . var_export($actor, true)); throw new Exception("Received a salmon slap from unidentified actor."); diff --git a/plugins/OStatus/classes/Ostatus_profile.php b/plugins/OStatus/classes/Ostatus_profile.php index 97d8eec101..5fe135f962 100644 --- a/plugins/OStatus/classes/Ostatus_profile.php +++ b/plugins/OStatus/classes/Ostatus_profile.php @@ -56,7 +56,7 @@ class Ostatus_profile extends Memcached_DataObject return array('uri' => DB_DATAOBJECT_STR + DB_DATAOBJECT_NOTNULL, 'profile_id' => DB_DATAOBJECT_INT, 'group_id' => DB_DATAOBJECT_INT, - 'feeduri' => DB_DATAOBJECT_STR + DB_DATAOBJECT_NOTNULL, + 'feeduri' => DB_DATAOBJECT_STR, 'salmonuri' => DB_DATAOBJECT_STR, 'created' => DB_DATAOBJECT_STR + DB_DATAOBJECT_DATE + DB_DATAOBJECT_TIME + DB_DATAOBJECT_NOTNULL, 'modified' => DB_DATAOBJECT_STR + DB_DATAOBJECT_DATE + DB_DATAOBJECT_TIME + DB_DATAOBJECT_NOTNULL); @@ -71,7 +71,7 @@ class Ostatus_profile extends Memcached_DataObject new ColumnDef('group_id', 'integer', null, true, 'UNI'), new ColumnDef('feeduri', 'varchar', - 255, false, 'UNI'), + 255, true, 'UNI'), new ColumnDef('salmonuri', 'text', null, true), new ColumnDef('created', 'datetime', @@ -272,7 +272,7 @@ class Ostatus_profile extends Memcached_DataObject * @return bool true on success, false on failure * @throws ServerException if feed state is not valid */ - public function subscribe($mode='subscribe') + public function subscribe() { $feedsub = FeedSub::ensureFeed($this->feeduri); if ($feedsub->sub_state == 'active' || $feedsub->sub_state == 'subscribe') { @@ -506,7 +506,7 @@ class Ostatus_profile extends Memcached_DataObject $discover = new FeedDiscovery(); $feeduri = $discover->discoverFromURL($profile_uri); - $feedsub = FeedSub::ensureFeed($feeduri, $discover->feed); + //$feedsub = FeedSub::ensureFeed($feeduri, $discover->feed); $huburi = $discover->getAtomLink('hub'); $salmonuri = $discover->getAtomLink('salmon'); @@ -665,6 +665,20 @@ class Ostatus_profile extends Memcached_DataObject throw new ServerException("No profile URI"); } + if (!$feeduri || !$salmonuri) { + // Get the canonical feed URI and check it + $discover = new FeedDiscovery(); + $feeduri = $discover->discoverFromURL($homeuri); + + $huburi = $discover->getAtomLink('hub'); + $salmonuri = $discover->getAtomLink('salmon'); + + if (!$huburi) { + // We can only deal with folks with a PuSH hub + throw new FeedSubNoHubException(); + } + } + $profile = new Profile(); $profile->nickname = $nickname; $profile->fullname = $actor->displayName; @@ -686,13 +700,8 @@ class Ostatus_profile extends Memcached_DataObject // so we can leave it empty until later. $oprofile = new Ostatus_profile(); $oprofile->uri = $homeuri; - if ($feeduri) { - // If we don't have these, we can look them up later. - $oprofile->feeduri = $feeduri; - if ($salmonuri) { - $oprofile->salmonuri = $salmonuri; - } - } + $oprofile->feeduri = $feeduri; + $oprofile->salmonuri = $salmonuri; $oprofile->profile_id = $profile->id; $oprofile->created = common_sql_now(); diff --git a/plugins/OStatus/lib/salmon.php b/plugins/OStatus/lib/salmon.php index 8c77222a62..df17a70068 100644 --- a/plugins/OStatus/lib/salmon.php +++ b/plugins/OStatus/lib/salmon.php @@ -41,9 +41,12 @@ class Salmon $client->setBody($xml); $response = $client->post($endpoint_uri, $headers); } catch (HTTP_Request2_Exception $e) { + common_log(LOG_ERR, "Salmon post to $endpoint_uri failed: " . $e->getMessage()); return false; } if ($response->getStatus() != 200) { + common_log(LOG_ERR, "Salmon at $endpoint_uri returned status " . + $response->getStatus() . ': ' . $response->getBody()); return false; } From 50db2d5d69a8769dc2ddcc937afb130bcce0971d Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Fri, 19 Feb 2010 17:01:38 -0800 Subject: [PATCH 24/62] OStatus: Salmon notifications now being generated moderately correctly. :) Needs to be an not a . --- plugins/OStatus/actions/salmon.php | 12 +++++------- plugins/OStatus/classes/Ostatus_profile.php | 15 +++++++++------ 2 files changed, 14 insertions(+), 13 deletions(-) diff --git a/plugins/OStatus/actions/salmon.php b/plugins/OStatus/actions/salmon.php index 43d79cf4ad..f7c86dc0ce 100644 --- a/plugins/OStatus/actions/salmon.php +++ b/plugins/OStatus/actions/salmon.php @@ -62,15 +62,13 @@ class SalmonAction extends Action $dom = DOMDocument::loadXML($xml); - // XXX: check that document element is Atom entry + if ($dom->documentElement->namespaceURI != Activity::ATOM || + $dom->documentElement->localName != 'entry') { + $this->clientError(_m('Salmon post must be an Atom entry.')); + } // XXX: check the signature - // We need to run an entry into Activity, so get the first one - $entries = $dom->getElementsByTagNameNS(Activity::ATOM, 'entry'); - if ($entries && $entries->length) { - // @fixme is it legit to have multiple entries? - $this->act = new Activity($entries->item(0), $dom->documentElement); - } + $this->act = new Activity($dom->documentElement); return true; } diff --git a/plugins/OStatus/classes/Ostatus_profile.php b/plugins/OStatus/classes/Ostatus_profile.php index 5fe135f962..b14b4c9a37 100644 --- a/plugins/OStatus/classes/Ostatus_profile.php +++ b/plugins/OStatus/classes/Ostatus_profile.php @@ -329,9 +329,15 @@ class Ostatus_profile extends Memcached_DataObject ':' . $actor->id . ':' . time(); // @fixme - //$entry = new Atom10Entry(); + // @fixme consolidate all these NS settings somewhere + $attributes = array('xmlns' => Activity::ATOM, + 'xmlns:activity' => 'http://activitystrea.ms/spec/1.0/', + 'xmlns:thr' => 'http://purl.org/syndication/thread/1.0', + 'xmlns:georss' => 'http://www.georss.org/georss', + 'xmlns:ostatus' => 'http://ostatus.org/schema/1.0'); + $entry = new XMLStringer(); - $entry->elementStart('entry'); + $entry->elementStart('entry', $attributes); $entry->element('id', null, $id); $entry->element('title', null, $text); $entry->element('summary', null, $text); @@ -343,10 +349,7 @@ class Ostatus_profile extends Memcached_DataObject $entry->raw($object->asActivityNoun('object')); $entry->elementEnd('entry'); - $feed = $this->atomFeed($actor); - $feed->addEntry($entry); - - $xml = $feed->getString(); + $xml = $entry->getString(); common_log(LOG_INFO, "Posting to Salmon endpoint $this->salmonuri: $xml"); $salmon = new Salmon(); // ? From 2df3bbc80b6c9dd44134bcf3b5b4a09ea1a803d1 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Sat, 20 Feb 2010 11:12:43 -0500 Subject: [PATCH 25/62] Move some salmon processing to a superclass Moved some salmon processing to a superclass so we could handle group salmon posts, too. --- plugins/OStatus/OStatusPlugin.php | 4 +- plugins/OStatus/actions/groupsalmon.php | 108 ++++++++++++ plugins/OStatus/actions/usersalmon.php | 0 .../salmon.php => lib/salmonaction.php} | 156 +++--------------- 4 files changed, 133 insertions(+), 135 deletions(-) create mode 100644 plugins/OStatus/actions/groupsalmon.php create mode 100644 plugins/OStatus/actions/usersalmon.php rename plugins/OStatus/{actions/salmon.php => lib/salmonaction.php} (52%) diff --git a/plugins/OStatus/OStatusPlugin.php b/plugins/OStatus/OStatusPlugin.php index 1b58d3c356..e1f3fd9d37 100644 --- a/plugins/OStatus/OStatusPlugin.php +++ b/plugins/OStatus/OStatusPlugin.php @@ -63,10 +63,10 @@ class OStatusPlugin extends Plugin // Salmon endpoint $m->connect('main/salmon/user/:id', - array('action' => 'salmon'), + array('action' => 'usersalmon'), array('id' => '[0-9]+')); $m->connect('main/salmon/group/:id', - array('action' => 'salmongroup'), + array('action' => 'groupsalmon'), array('id' => '[0-9]+')); return true; } diff --git a/plugins/OStatus/actions/groupsalmon.php b/plugins/OStatus/actions/groupsalmon.php new file mode 100644 index 0000000000..64ae9f3cc0 --- /dev/null +++ b/plugins/OStatus/actions/groupsalmon.php @@ -0,0 +1,108 @@ +. + */ + +/** + * @package OStatusPlugin + * @author James Walker + */ + +if (!defined('STATUSNET')) { + exit(1); +} + +class GroupsalmonAction extends SalmonAction +{ + var $group = null; + + function prepare($args) + { + parent::prepare($args); + + $id = $this->trimmed('id'); + + if (!$id) { + $this->clientError(_('No ID.')); + } + + $this->group = User_group::staticGet('id', $id); + + if (empty($this->group)) { + $this->clientError(_('No such group.')); + } + + return true; + } + + /** + * We've gotten a post event on the Salmon backchannel, probably a reply. + */ + + function handlePost() + { + switch ($this->act->object->type) { + case ActivityObject::ARTICLE: + case ActivityObject::BLOGENTRY: + case ActivityObject::NOTE: + case ActivityObject::STATUS: + case ActivityObject::COMMENT: + break; + default: + throw new ClientException("Can't handle that kind of post."); + } + + // Notice must be to the attention of this group + + $context = $this->act->context; + + if (empty($context->attention)) { + throw new ClientException("Not to the attention of anyone."); + } else { + $uri = common_local_url('groupbyid', array('id' => $this->group->id)); + if (!in_array($context->attention, $uri)) { + throw new ClientException("Not to the attention of this group."); + } + } + + $profile = $this->ensureProfile(); + // @fixme save the post + } + + /** + * We've gotten a follow/subscribe notification from a remote user. + * Save a subscription relationship for them. + */ + + function handleFollow() + { + $this->handleJoin(); // ??? + } + + function handleUnfollow() + { + } + + /** + * A remote user joined our group. + */ + + function handleJoin() + { + } + +} diff --git a/plugins/OStatus/actions/usersalmon.php b/plugins/OStatus/actions/usersalmon.php new file mode 100644 index 0000000000..e69de29bb2 diff --git a/plugins/OStatus/actions/salmon.php b/plugins/OStatus/lib/salmonaction.php similarity index 52% rename from plugins/OStatus/actions/salmon.php rename to plugins/OStatus/lib/salmonaction.php index f7c86dc0ce..7085e4583e 100644 --- a/plugins/OStatus/actions/salmon.php +++ b/plugins/OStatus/lib/salmonaction.php @@ -28,7 +28,6 @@ if (!defined('STATUSNET')) { class SalmonAction extends Action { - var $user = null; var $xml = null; var $activity = null; @@ -46,18 +45,6 @@ class SalmonAction extends Action $this->clientError(_('Salmon requires application/atom+xml')); } - $id = $this->trimmed('id'); - - if (!$id) { - $this->clientError(_('No ID.')); - } - - $this->user = User::staticGet($id); - - if (empty($this->user)) { - $this->clientError(_('No such user.')); - } - $xml = file_get_contents('php://input'); $dom = DOMDocument::loadXML($xml); @@ -79,12 +66,10 @@ class SalmonAction extends Action function handle($args) { StatusNet::setApi(true); // Send smaller error pages - common_log(LOG_INFO, 'Salmon: incoming post for user '. $this->user->id); - common_log(LOG_DEBUG, "Received salmon bit: " . var_export($this->act, true)); // TODO : Insert new $xml -> notice code - if (Event::handle('StartHandleSalmon', array($this->user, $this->activity))) { + if (Event::handle('StartHandleSalmon', array($this->activity))) { switch ($this->act->verb) { case ActivityVerb::POST: @@ -103,148 +88,44 @@ class SalmonAction extends Action case ActivityVerb::UNFOLLOW: $this->handleUnfollow(); break; + case ActivityVerb::JOIN: + $this->handleJoin(); + break; + default: + throw new ClientException(_("Unimplemented.")); } - Event::handle('EndHandleSalmon', array($this->user, $this->activity)); + Event::handle('EndHandleSalmon', array($this->activity)); } } - /** - * We've gotten a post event on the Salmon backchannel, probably a reply. - * - * @todo validate if we need to handle this post, then call into - * ostatus_profile's general incoming-post handling. - */ function handlePost() { - switch ($this->act->object->type) { - case ActivityObject::ARTICLE: - case ActivityObject::BLOGENTRY: - case ActivityObject::NOTE: - case ActivityObject::STATUS: - case ActivityObject::COMMENT: - break; - default: - throw new ClientException("Can't handle that kind of post."); - } - - // Notice must either be a) in reply to a notice by this user - // or b) to the attention of this user - - $context = $this->act->context; - - if (!empty($context->replyToID)) { - $notice = Notice::staticGet('uri', $context->replyToID); - if (empty($notice)) { - throw new ClientException("In reply to unknown notice"); - } - if ($notice->profile_id != $this->user->id) { - throw new ClientException("In reply to a notice not by this user"); - } - } else if (!empty($context->attention)) { - if (!in_array($context->attention, $this->user->uri)) { - throw new ClientException("To the attention of user(s) not including this one!"); - } - } else { - throw new ClientException("Not to anyone in reply to anything!"); - } - - $profile = $this->ensureProfile(); - // @fixme do something with the post + throw new ClientException(_("Unimplemented!")); } - /** - * We've gotten a follow/subscribe notification from a remote user. - * Save a subscription relationship for them. - */ - function handleFollow() { - $oprofile = $this->ensureProfile(); - if ($oprofile) { - common_log(LOG_INFO, "Setting up subscription from remote {$oprofile->uri} to local {$this->user->nickname}"); - $oprofile->subscribeRemoteToLocal($this->user); - } else { - common_log(LOG_INFO, "Can't set up subscription from remote; missing profile."); - } + throw new ClientException(_("Unimplemented!")); } - /** - * We've gotten an unfollow/unsubscribe notification from a remote user. - * Check if we have a subscription relationship for them and kill it. - * - * @fixme probably catch exceptions on fail? - */ function handleUnfollow() { - $oprofile = $this->ensureProfile(); - if ($oprofile) { - common_log(LOG_INFO, "Canceling subscription from remote {$oprofile->uri} to local {$this->user->nickname}"); - Subscription::cancel($oprofile->localProfile(), $this->user->getProfile()); - } else { - common_log(LOG_ERR, "Can't cancel subscription from remote, didn't find the profile"); - } + throw new ClientException(_("Unimplemented!")); } - /** - * Remote user likes one of our posts. - * Confirm the post is ours, and save a local favorite event. - */ - function handleFavorite() { - // WORST VARIABLE NAME EVER - $object = $this->act->object; - - switch ($this->act->object->type) { - case ActivityObject::ARTICLE: - case ActivityObject::BLOGENTRY: - case ActivityObject::NOTE: - case ActivityObject::STATUS: - case ActivityObject::COMMENT: - break; - default: - throw new ClientException("Can't handle that kind of object for liking/faving."); - } - - $notice = Notice::staticGet('uri', $object->id); - - if (empty($notice)) { - throw new ClientException("Notice with ID $object->id unknown."); - } - - if ($notice->profile_id != $this->user->id) { - throw new ClientException("Notice with ID $object->id not posted by $this->user->id."); - } - - $profile = $this->ensureProfile(); - - $old = Fave::pkeyGet(array('user_id' => $profile->id, - 'notice_id' => $notice->id)); - - if (!empty($old)) { - throw new ClientException("We already know that's a fave!"); - } - - $fave = new Fave(); - - // @fixme need to change this attribute name, maybe references - $fave->user_id = $profile->id; - $fave->notice_id = $notice->id; - - $result = $fave->insert(); - - if (!$result) { - common_log_db_error($fave, 'INSERT', __FILE__); - throw new ServerException('Could not save new favorite.'); - } + throw new ClientException(_("Unimplemented!")); } /** * Remote user doesn't like one of our posts after all! - * Confirm the post is ours, and save a local favorite event. + * Confirm the post is ours, and delete a local favorite event. */ + function handleUnfavorite() { + throw new ClientException(_("Unimplemented!")); } /** @@ -252,6 +133,15 @@ class SalmonAction extends Action */ function handleShare() { + throw new ClientException(_("Unimplemented!")); + } + + /** + * Hmmmm + */ + function handleJoin() + { + throw new ClientException(_("Unimplemented!")); } /** From ed45df045f661e9c3b85e0657986c99c320914f0 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Sat, 20 Feb 2010 11:17:54 -0500 Subject: [PATCH 26/62] Cool bug! Technically good PHP syntax --- plugins/OStatus/classes/Ostatus_profile.php | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/plugins/OStatus/classes/Ostatus_profile.php b/plugins/OStatus/classes/Ostatus_profile.php index b14b4c9a37..9f5c605612 100644 --- a/plugins/OStatus/classes/Ostatus_profile.php +++ b/plugins/OStatus/classes/Ostatus_profile.php @@ -149,7 +149,6 @@ class Ostatus_profile extends Memcached_DataObject function asActivityNoun($element) { $xs = new XMLStringer(true); - $avatarHref = Avatar::defaultImage(AVATAR_PROFILE_SIZE); $avatarType = 'image/png'; if ($this->isGroup()) { @@ -173,8 +172,8 @@ class Ostatus_profile extends Memcached_DataObject $self = $this->localProfile(); $avatar = $self->getAvatar(AVATAR_PROFILE_SIZE); if ($avatar) { - $avatarHref = $avatar-> - $avatarType = $avatar->mediatype; + $avatarHref = $avatar->url; + $avatarType = $avatar->mediatype; } } $xs->elementStart('activity:' . $element); @@ -672,10 +671,10 @@ class Ostatus_profile extends Memcached_DataObject // Get the canonical feed URI and check it $discover = new FeedDiscovery(); $feeduri = $discover->discoverFromURL($homeuri); - + $huburi = $discover->getAtomLink('hub'); $salmonuri = $discover->getAtomLink('salmon'); - + if (!$huburi) { // We can only deal with folks with a PuSH hub throw new FeedSubNoHubException(); From 81ea0f81173030c73cfc8dd46946d126d3d41622 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Sat, 20 Feb 2010 11:35:01 -0500 Subject: [PATCH 27/62] Add HTMLPurifier to extlib HTMLPurifier defangs arbitrary submitted HTML. We're using it in the OStatus plugin, but it may be valuable for other parts of the codebase (I think OEmbed might benefit, for example). --- extlib/HTMLPurifier/HTMLPurifier.auto.php | 11 + extlib/HTMLPurifier/HTMLPurifier.autoload.php | 21 + extlib/HTMLPurifier/HTMLPurifier.func.php | 23 + extlib/HTMLPurifier/HTMLPurifier.includes.php | 208 + extlib/HTMLPurifier/HTMLPurifier.kses.php | 30 + extlib/HTMLPurifier/HTMLPurifier.path.php | 11 + extlib/HTMLPurifier/HTMLPurifier.php | 237 + .../HTMLPurifier.safe-includes.php | 202 + .../HTMLPurifier/AttrCollections.php | 128 + extlib/HTMLPurifier/HTMLPurifier/AttrDef.php | 87 + .../HTMLPurifier/HTMLPurifier/AttrDef/CSS.php | 87 + .../HTMLPurifier/AttrDef/CSS/AlphaValue.php | 21 + .../HTMLPurifier/AttrDef/CSS/Background.php | 87 + .../AttrDef/CSS/BackgroundPosition.php | 126 + .../HTMLPurifier/AttrDef/CSS/Border.php | 43 + .../HTMLPurifier/AttrDef/CSS/Color.php | 78 + .../HTMLPurifier/AttrDef/CSS/Composite.php | 38 + .../AttrDef/CSS/DenyElementDecorator.php | 28 + .../HTMLPurifier/AttrDef/CSS/Filter.php | 54 + .../HTMLPurifier/AttrDef/CSS/Font.php | 149 + .../HTMLPurifier/AttrDef/CSS/FontFamily.php | 90 + .../AttrDef/CSS/ImportantDecorator.php | 40 + .../HTMLPurifier/AttrDef/CSS/Length.php | 47 + .../HTMLPurifier/AttrDef/CSS/ListStyle.php | 78 + .../HTMLPurifier/AttrDef/CSS/Multiple.php | 58 + .../HTMLPurifier/AttrDef/CSS/Number.php | 69 + .../HTMLPurifier/AttrDef/CSS/Percentage.php | 40 + .../AttrDef/CSS/TextDecoration.php | 38 + .../HTMLPurifier/AttrDef/CSS/URI.php | 56 + .../HTMLPurifier/AttrDef/Enum.php | 65 + .../HTMLPurifier/AttrDef/HTML/Bool.php | 28 + .../HTMLPurifier/AttrDef/HTML/Class.php | 34 + .../HTMLPurifier/AttrDef/HTML/Color.php | 32 + .../HTMLPurifier/AttrDef/HTML/FrameTarget.php | 21 + .../HTMLPurifier/AttrDef/HTML/ID.php | 70 + .../HTMLPurifier/AttrDef/HTML/Length.php | 41 + .../HTMLPurifier/AttrDef/HTML/LinkTypes.php | 53 + .../HTMLPurifier/AttrDef/HTML/MultiLength.php | 41 + .../HTMLPurifier/AttrDef/HTML/Nmtokens.php | 52 + .../HTMLPurifier/AttrDef/HTML/Pixels.php | 48 + .../HTMLPurifier/AttrDef/Integer.php | 73 + .../HTMLPurifier/AttrDef/Lang.php | 73 + .../HTMLPurifier/AttrDef/Switch.php | 34 + .../HTMLPurifier/AttrDef/Text.php | 15 + .../HTMLPurifier/HTMLPurifier/AttrDef/URI.php | 77 + .../HTMLPurifier/AttrDef/URI/Email.php | 17 + .../AttrDef/URI/Email/SimpleCheck.php | 21 + .../HTMLPurifier/AttrDef/URI/Host.php | 62 + .../HTMLPurifier/AttrDef/URI/IPv4.php | 39 + .../HTMLPurifier/AttrDef/URI/IPv6.php | 99 + .../HTMLPurifier/AttrTransform.php | 56 + .../HTMLPurifier/AttrTransform/Background.php | 23 + .../HTMLPurifier/AttrTransform/BdoDir.php | 19 + .../HTMLPurifier/AttrTransform/BgColor.php | 23 + .../HTMLPurifier/AttrTransform/BoolToCSS.php | 36 + .../HTMLPurifier/AttrTransform/Border.php | 18 + .../HTMLPurifier/AttrTransform/EnumToCSS.php | 58 + .../AttrTransform/ImgRequired.php | 42 + .../HTMLPurifier/AttrTransform/ImgSpace.php | 44 + .../HTMLPurifier/AttrTransform/Input.php | 40 + .../HTMLPurifier/AttrTransform/Lang.php | 28 + .../HTMLPurifier/AttrTransform/Length.php | 27 + .../HTMLPurifier/AttrTransform/Name.php | 21 + .../HTMLPurifier/AttrTransform/NameSync.php | 27 + .../HTMLPurifier/AttrTransform/SafeEmbed.php | 15 + .../HTMLPurifier/AttrTransform/SafeObject.php | 16 + .../HTMLPurifier/AttrTransform/SafeParam.php | 50 + .../AttrTransform/ScriptRequired.php | 16 + .../HTMLPurifier/AttrTransform/Textarea.php | 18 + .../HTMLPurifier/HTMLPurifier/AttrTypes.php | 77 + .../HTMLPurifier/AttrValidator.php | 162 + .../HTMLPurifier/HTMLPurifier/Bootstrap.php | 98 + .../HTMLPurifier/CSSDefinition.php | 292 ++ extlib/HTMLPurifier/HTMLPurifier/ChildDef.php | 48 + .../HTMLPurifier/ChildDef/Chameleon.php | 48 + .../HTMLPurifier/ChildDef/Custom.php | 90 + .../HTMLPurifier/ChildDef/Empty.php | 20 + .../HTMLPurifier/ChildDef/Optional.php | 26 + .../HTMLPurifier/ChildDef/Required.php | 117 + .../ChildDef/StrictBlockquote.php | 88 + .../HTMLPurifier/ChildDef/Table.php | 142 + extlib/HTMLPurifier/HTMLPurifier/Config.php | 580 +++ .../HTMLPurifier/ConfigSchema.php | 158 + .../ConfigSchema/Builder/ConfigSchema.php | 44 + .../HTMLPurifier/ConfigSchema/Builder/Xml.php | 106 + .../HTMLPurifier/ConfigSchema/Exception.php | 11 + .../HTMLPurifier/ConfigSchema/Interchange.php | 42 + .../ConfigSchema/Interchange/Directive.php | 77 + .../ConfigSchema/Interchange/Id.php | 37 + .../ConfigSchema/InterchangeBuilder.php | 180 + .../HTMLPurifier/ConfigSchema/Validator.php | 206 + .../ConfigSchema/ValidatorAtom.php | 66 + .../HTMLPurifier/ConfigSchema/schema.ser | Bin 0 -> 12999 bytes .../schema/Attr.AllowedClasses.txt | 8 + .../schema/Attr.AllowedFrameTargets.txt | 12 + .../ConfigSchema/schema/Attr.AllowedRel.txt | 9 + .../ConfigSchema/schema/Attr.AllowedRev.txt | 9 + .../schema/Attr.ClassUseCDATA.txt | 19 + .../schema/Attr.DefaultImageAlt.txt | 11 + .../schema/Attr.DefaultInvalidImage.txt | 9 + .../schema/Attr.DefaultInvalidImageAlt.txt | 8 + .../schema/Attr.DefaultTextDir.txt | 10 + .../ConfigSchema/schema/Attr.EnableID.txt | 16 + .../schema/Attr.ForbiddenClasses.txt | 8 + .../ConfigSchema/schema/Attr.IDBlacklist.txt | 5 + .../schema/Attr.IDBlacklistRegexp.txt | 9 + .../ConfigSchema/schema/Attr.IDPrefix.txt | 12 + .../schema/Attr.IDPrefixLocal.txt | 14 + .../schema/AutoFormat.AutoParagraph.txt | 31 + .../ConfigSchema/schema/AutoFormat.Custom.txt | 12 + .../schema/AutoFormat.DisplayLinkURI.txt | 11 + .../schema/AutoFormat.Linkify.txt | 12 + .../AutoFormat.PurifierLinkify.DocURL.txt | 12 + .../schema/AutoFormat.PurifierLinkify.txt | 12 + ...rmat.RemoveEmpty.RemoveNbsp.Exceptions.txt | 11 + .../AutoFormat.RemoveEmpty.RemoveNbsp.txt | 15 + .../schema/AutoFormat.RemoveEmpty.txt | 46 + .../schema/CSS.AllowImportant.txt | 8 + .../ConfigSchema/schema/CSS.AllowTricky.txt | 11 + .../schema/CSS.AllowedProperties.txt | 18 + .../ConfigSchema/schema/CSS.DefinitionRev.txt | 11 + .../ConfigSchema/schema/CSS.MaxImgLength.txt | 16 + .../ConfigSchema/schema/CSS.Proprietary.txt | 10 + .../schema/Cache.DefinitionImpl.txt | 14 + .../schema/Cache.SerializerPath.txt | 13 + .../schema/Core.AggressivelyFixLt.txt | 18 + .../schema/Core.CollectErrors.txt | 12 + .../schema/Core.ColorKeywords.txt | 28 + .../schema/Core.ConvertDocumentToFragment.txt | 14 + .../Core.DirectLexLineNumberSyncInterval.txt | 17 + .../ConfigSchema/schema/Core.Encoding.txt | 15 + .../schema/Core.EscapeInvalidChildren.txt | 10 + .../schema/Core.EscapeInvalidTags.txt | 7 + .../schema/Core.EscapeNonASCIICharacters.txt | 13 + .../schema/Core.HiddenElements.txt | 19 + .../ConfigSchema/schema/Core.Language.txt | 10 + .../ConfigSchema/schema/Core.LexerImpl.txt | 34 + .../schema/Core.MaintainLineNumbers.txt | 16 + .../schema/Core.RemoveInvalidImg.txt | 12 + .../schema/Core.RemoveScriptContents.txt | 12 + .../ConfigSchema/schema/Filter.Custom.txt | 11 + .../Filter.ExtractStyleBlocks.Escaping.txt | 14 + .../Filter.ExtractStyleBlocks.Scope.txt | 29 + .../Filter.ExtractStyleBlocks.TidyImpl.txt | 16 + .../schema/Filter.ExtractStyleBlocks.txt | 74 + .../ConfigSchema/schema/Filter.YouTube.txt | 11 + .../ConfigSchema/schema/HTML.Allowed.txt | 22 + .../schema/HTML.AllowedAttributes.txt | 19 + .../schema/HTML.AllowedElements.txt | 18 + .../schema/HTML.AllowedModules.txt | 20 + .../schema/HTML.Attr.Name.UseCDATA.txt | 11 + .../ConfigSchema/schema/HTML.BlockWrapper.txt | 18 + .../ConfigSchema/schema/HTML.CoreModules.txt | 23 + .../schema/HTML.CustomDoctype.txt | 9 + .../ConfigSchema/schema/HTML.DefinitionID.txt | 33 + .../schema/HTML.DefinitionRev.txt | 16 + .../ConfigSchema/schema/HTML.Doctype.txt | 11 + .../schema/HTML.ForbiddenAttributes.txt | 21 + .../schema/HTML.ForbiddenElements.txt | 20 + .../ConfigSchema/schema/HTML.MaxImgLength.txt | 14 + .../ConfigSchema/schema/HTML.Parent.txt | 12 + .../ConfigSchema/schema/HTML.Proprietary.txt | 12 + .../ConfigSchema/schema/HTML.SafeEmbed.txt | 14 + .../ConfigSchema/schema/HTML.SafeObject.txt | 14 + .../ConfigSchema/schema/HTML.Strict.txt | 9 + .../ConfigSchema/schema/HTML.TidyAdd.txt | 8 + .../ConfigSchema/schema/HTML.TidyLevel.txt | 24 + .../ConfigSchema/schema/HTML.TidyRemove.txt | 8 + .../ConfigSchema/schema/HTML.Trusted.txt | 8 + .../ConfigSchema/schema/HTML.XHTML.txt | 11 + .../schema/Output.CommentScriptContents.txt | 10 + .../ConfigSchema/schema/Output.Newline.txt | 13 + .../ConfigSchema/schema/Output.SortAttr.txt | 14 + .../ConfigSchema/schema/Output.TidyFormat.txt | 25 + .../ConfigSchema/schema/Test.ForceNoIconv.txt | 7 + .../schema/URI.AllowedSchemes.txt | 15 + .../ConfigSchema/schema/URI.Base.txt | 17 + .../ConfigSchema/schema/URI.DefaultScheme.txt | 10 + .../ConfigSchema/schema/URI.DefinitionID.txt | 11 + .../ConfigSchema/schema/URI.DefinitionRev.txt | 11 + .../ConfigSchema/schema/URI.Disable.txt | 14 + .../schema/URI.DisableExternal.txt | 11 + .../schema/URI.DisableExternalResources.txt | 13 + .../schema/URI.DisableResources.txt | 12 + .../ConfigSchema/schema/URI.Host.txt | 19 + .../ConfigSchema/schema/URI.HostBlacklist.txt | 9 + .../ConfigSchema/schema/URI.MakeAbsolute.txt | 13 + .../ConfigSchema/schema/URI.Munge.txt | 83 + .../schema/URI.MungeResources.txt | 17 + .../schema/URI.MungeSecretKey.txt | 30 + .../schema/URI.OverrideAllowedSchemes.txt | 9 + .../HTMLPurifier/ConfigSchema/schema/info.ini | 3 + .../HTMLPurifier/HTMLPurifier/ContentSets.php | 155 + extlib/HTMLPurifier/HTMLPurifier/Context.php | 82 + .../HTMLPurifier/HTMLPurifier/Definition.php | 39 + .../HTMLPurifier/DefinitionCache.php | 108 + .../DefinitionCache/Decorator.php | 62 + .../DefinitionCache/Decorator/Cleanup.php | 43 + .../DefinitionCache/Decorator/Memory.php | 46 + .../DefinitionCache/Decorator/Template.php.in | 47 + .../HTMLPurifier/DefinitionCache/Null.php | 39 + .../DefinitionCache/Serializer.php | 172 + .../DefinitionCache/Serializer/README | 3 + .../HTMLPurifier/DefinitionCacheFactory.php | 91 + extlib/HTMLPurifier/HTMLPurifier/Doctype.php | 60 + .../HTMLPurifier/DoctypeRegistry.php | 103 + .../HTMLPurifier/HTMLPurifier/ElementDef.php | 176 + extlib/HTMLPurifier/HTMLPurifier/Encoder.php | 426 ++ .../HTMLPurifier/EntityLookup.php | 44 + .../HTMLPurifier/EntityLookup/entities.ser | 1 + .../HTMLPurifier/EntityParser.php | 144 + .../HTMLPurifier/ErrorCollector.php | 209 + .../HTMLPurifier/HTMLPurifier/ErrorStruct.php | 60 + .../HTMLPurifier/HTMLPurifier/Exception.php | 12 + extlib/HTMLPurifier/HTMLPurifier/Filter.php | 46 + .../Filter/ExtractStyleBlocks.php | 135 + .../HTMLPurifier/Filter/YouTube.php | 39 + .../HTMLPurifier/HTMLPurifier/Generator.php | 183 + .../HTMLPurifier/HTMLDefinition.php | 420 ++ .../HTMLPurifier/HTMLPurifier/HTMLModule.php | 244 + .../HTMLPurifier/HTMLModule/Bdo.php | 31 + .../HTMLModule/CommonAttributes.php | 26 + .../HTMLPurifier/HTMLModule/Edit.php | 38 + .../HTMLPurifier/HTMLModule/Forms.php | 118 + .../HTMLPurifier/HTMLModule/Hypertext.php | 31 + .../HTMLPurifier/HTMLModule/Image.php | 40 + .../HTMLPurifier/HTMLModule/Legacy.php | 143 + .../HTMLPurifier/HTMLModule/List.php | 35 + .../HTMLPurifier/HTMLModule/Name.php | 21 + .../HTMLModule/NonXMLCommonAttributes.php | 14 + .../HTMLPurifier/HTMLModule/Object.php | 47 + .../HTMLPurifier/HTMLModule/Presentation.php | 36 + .../HTMLPurifier/HTMLModule/Proprietary.php | 33 + .../HTMLPurifier/HTMLModule/Ruby.php | 27 + .../HTMLPurifier/HTMLModule/SafeEmbed.php | 33 + .../HTMLPurifier/HTMLModule/SafeObject.php | 50 + .../HTMLPurifier/HTMLModule/Scripting.php | 54 + .../HTMLModule/StyleAttribute.php | 24 + .../HTMLPurifier/HTMLModule/Tables.php | 66 + .../HTMLPurifier/HTMLModule/Target.php | 23 + .../HTMLPurifier/HTMLModule/Text.php | 71 + .../HTMLPurifier/HTMLModule/Tidy.php | 207 + .../HTMLPurifier/HTMLModule/Tidy/Name.php | 24 + .../HTMLModule/Tidy/Proprietary.php | 23 + .../HTMLPurifier/HTMLModule/Tidy/Strict.php | 21 + .../HTMLModule/Tidy/Transitional.php | 9 + .../HTMLPurifier/HTMLModule/Tidy/XHTML.php | 17 + .../HTMLModule/Tidy/XHTMLAndHTML4.php | 161 + .../HTMLModule/XMLCommonAttributes.php | 14 + .../HTMLPurifier/HTMLModuleManager.php | 403 ++ .../HTMLPurifier/IDAccumulator.php | 53 + extlib/HTMLPurifier/HTMLPurifier/Injector.php | 239 + .../HTMLPurifier/Injector/AutoParagraph.php | 340 ++ .../HTMLPurifier/Injector/DisplayLinkURI.php | 26 + .../HTMLPurifier/Injector/Linkify.php | 46 + .../HTMLPurifier/Injector/PurifierLinkify.php | 45 + .../HTMLPurifier/Injector/RemoveEmpty.php | 51 + .../HTMLPurifier/Injector/SafeObject.php | 87 + extlib/HTMLPurifier/HTMLPurifier/Language.php | 163 + .../Language/classes/en-x-test.php | 12 + .../Language/messages/en-x-test.php | 11 + .../Language/messages/en-x-testmini.php | 12 + .../HTMLPurifier/Language/messages/en.php | 62 + .../HTMLPurifier/LanguageFactory.php | 198 + extlib/HTMLPurifier/HTMLPurifier/Length.php | 115 + extlib/HTMLPurifier/HTMLPurifier/Lexer.php | 298 ++ .../HTMLPurifier/Lexer/DOMLex.php | 213 + .../HTMLPurifier/Lexer/DirectLex.php | 490 +++ .../HTMLPurifier/Lexer/PEARSax3.php | 106 + .../HTMLPurifier/HTMLPurifier/Lexer/PH5P.php | 3906 +++++++++++++++++ .../HTMLPurifier/PercentEncoder.php | 98 + extlib/HTMLPurifier/HTMLPurifier/Printer.php | 176 + .../HTMLPurifier/Printer/CSSDefinition.php | 38 + .../HTMLPurifier/Printer/ConfigForm.css | 10 + .../HTMLPurifier/Printer/ConfigForm.js | 5 + .../HTMLPurifier/Printer/ConfigForm.php | 368 ++ .../HTMLPurifier/Printer/HTMLDefinition.php | 272 ++ .../HTMLPurifier/PropertyList.php | 86 + .../HTMLPurifier/PropertyListIterator.php | 32 + extlib/HTMLPurifier/HTMLPurifier/Strategy.php | 26 + .../HTMLPurifier/Strategy/Composite.php | 25 + .../HTMLPurifier/Strategy/Core.php | 18 + .../HTMLPurifier/Strategy/FixNesting.php | 328 ++ .../HTMLPurifier/Strategy/MakeWellFormed.php | 457 ++ .../Strategy/RemoveForeignElements.php | 171 + .../Strategy/ValidateAttributes.php | 39 + .../HTMLPurifier/HTMLPurifier/StringHash.php | 39 + .../HTMLPurifier/StringHashParser.php | 110 + .../HTMLPurifier/TagTransform.php | 36 + .../HTMLPurifier/TagTransform/Font.php | 96 + .../HTMLPurifier/TagTransform/Simple.php | 35 + extlib/HTMLPurifier/HTMLPurifier/Token.php | 57 + .../HTMLPurifier/Token/Comment.php | 22 + .../HTMLPurifier/HTMLPurifier/Token/Empty.php | 11 + .../HTMLPurifier/HTMLPurifier/Token/End.php | 19 + .../HTMLPurifier/HTMLPurifier/Token/Start.php | 11 + .../HTMLPurifier/HTMLPurifier/Token/Tag.php | 56 + .../HTMLPurifier/HTMLPurifier/Token/Text.php | 33 + .../HTMLPurifier/TokenFactory.php | 94 + extlib/HTMLPurifier/HTMLPurifier/URI.php | 173 + .../HTMLPurifier/URIDefinition.php | 93 + .../HTMLPurifier/HTMLPurifier/URIFilter.php | 45 + .../URIFilter/DisableExternal.php | 23 + .../URIFilter/DisableExternalResources.php | 12 + .../HTMLPurifier/URIFilter/HostBlacklist.php | 21 + .../HTMLPurifier/URIFilter/MakeAbsolute.php | 114 + .../HTMLPurifier/URIFilter/Munge.php | 58 + .../HTMLPurifier/HTMLPurifier/URIParser.php | 70 + .../HTMLPurifier/HTMLPurifier/URIScheme.php | 42 + .../HTMLPurifier/URIScheme/ftp.php | 43 + .../HTMLPurifier/URIScheme/http.php | 20 + .../HTMLPurifier/URIScheme/https.php | 12 + .../HTMLPurifier/URIScheme/mailto.php | 27 + .../HTMLPurifier/URIScheme/news.php | 22 + .../HTMLPurifier/URIScheme/nntp.php | 20 + .../HTMLPurifier/URISchemeRegistry.php | 68 + .../HTMLPurifier/UnitConverter.php | 254 ++ .../HTMLPurifier/HTMLPurifier/VarParser.php | 154 + .../HTMLPurifier/VarParser/Flexible.php | 96 + .../HTMLPurifier/VarParser/Native.php | 26 + .../HTMLPurifier/VarParserException.php | 11 + extlib/HTMLPurifier/LICENSE | 504 +++ 322 files changed, 24205 insertions(+) create mode 100644 extlib/HTMLPurifier/HTMLPurifier.auto.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier.autoload.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier.func.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier.includes.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier.kses.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier.path.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier.safe-includes.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/AttrCollections.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/AttrDef.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/AlphaValue.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/Background.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/BackgroundPosition.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/Border.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/Color.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/Composite.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/DenyElementDecorator.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/Filter.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/Font.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/FontFamily.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/ImportantDecorator.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/Length.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/ListStyle.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/Multiple.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/Number.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/Percentage.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/TextDecoration.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/URI.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/AttrDef/Enum.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/AttrDef/HTML/Bool.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/AttrDef/HTML/Class.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/AttrDef/HTML/Color.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/AttrDef/HTML/FrameTarget.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/AttrDef/HTML/ID.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/AttrDef/HTML/Length.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/AttrDef/HTML/LinkTypes.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/AttrDef/HTML/MultiLength.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/AttrDef/HTML/Nmtokens.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/AttrDef/HTML/Pixels.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/AttrDef/Integer.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/AttrDef/Lang.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/AttrDef/Switch.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/AttrDef/Text.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/AttrDef/URI.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/AttrDef/URI/Email.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/AttrDef/URI/Email/SimpleCheck.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/AttrDef/URI/Host.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/AttrDef/URI/IPv4.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/AttrDef/URI/IPv6.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/AttrTransform.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/AttrTransform/Background.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/AttrTransform/BdoDir.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/AttrTransform/BgColor.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/AttrTransform/BoolToCSS.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/AttrTransform/Border.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/AttrTransform/EnumToCSS.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/AttrTransform/ImgRequired.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/AttrTransform/ImgSpace.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/AttrTransform/Input.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/AttrTransform/Lang.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/AttrTransform/Length.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/AttrTransform/Name.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/AttrTransform/NameSync.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/AttrTransform/SafeEmbed.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/AttrTransform/SafeObject.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/AttrTransform/SafeParam.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/AttrTransform/ScriptRequired.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/AttrTransform/Textarea.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/AttrTypes.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/AttrValidator.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/Bootstrap.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/CSSDefinition.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ChildDef.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ChildDef/Chameleon.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ChildDef/Custom.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ChildDef/Empty.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ChildDef/Optional.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ChildDef/Required.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ChildDef/StrictBlockquote.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ChildDef/Table.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/Config.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/Builder/ConfigSchema.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/Builder/Xml.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/Exception.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/Interchange.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/Interchange/Directive.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/Interchange/Id.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/InterchangeBuilder.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/Validator.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/ValidatorAtom.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema.ser create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Attr.AllowedClasses.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Attr.AllowedFrameTargets.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Attr.AllowedRel.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Attr.AllowedRev.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Attr.ClassUseCDATA.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Attr.DefaultImageAlt.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Attr.DefaultInvalidImage.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Attr.DefaultInvalidImageAlt.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Attr.DefaultTextDir.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Attr.EnableID.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Attr.ForbiddenClasses.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Attr.IDBlacklist.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Attr.IDBlacklistRegexp.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Attr.IDPrefix.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Attr.IDPrefixLocal.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/AutoFormat.AutoParagraph.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/AutoFormat.Custom.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/AutoFormat.DisplayLinkURI.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/AutoFormat.Linkify.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/AutoFormat.PurifierLinkify.DocURL.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/AutoFormat.PurifierLinkify.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/AutoFormat.RemoveEmpty.RemoveNbsp.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/AutoFormat.RemoveEmpty.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/CSS.AllowImportant.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/CSS.AllowTricky.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/CSS.AllowedProperties.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/CSS.DefinitionRev.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/CSS.MaxImgLength.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/CSS.Proprietary.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Cache.DefinitionImpl.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Cache.SerializerPath.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.AggressivelyFixLt.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.CollectErrors.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.ColorKeywords.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.ConvertDocumentToFragment.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.DirectLexLineNumberSyncInterval.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.Encoding.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.EscapeInvalidChildren.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.EscapeInvalidTags.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.EscapeNonASCIICharacters.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.HiddenElements.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.Language.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.LexerImpl.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.MaintainLineNumbers.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.RemoveInvalidImg.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.RemoveScriptContents.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Filter.Custom.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Filter.ExtractStyleBlocks.Escaping.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Filter.ExtractStyleBlocks.Scope.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Filter.ExtractStyleBlocks.TidyImpl.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Filter.ExtractStyleBlocks.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Filter.YouTube.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.Allowed.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.AllowedAttributes.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.AllowedElements.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.AllowedModules.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.Attr.Name.UseCDATA.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.BlockWrapper.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.CoreModules.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.CustomDoctype.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.DefinitionID.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.DefinitionRev.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.Doctype.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.ForbiddenAttributes.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.ForbiddenElements.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.MaxImgLength.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.Parent.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.Proprietary.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.SafeEmbed.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.SafeObject.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.Strict.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.TidyAdd.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.TidyLevel.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.TidyRemove.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.Trusted.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.XHTML.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Output.CommentScriptContents.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Output.Newline.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Output.SortAttr.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Output.TidyFormat.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Test.ForceNoIconv.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.AllowedSchemes.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.Base.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.DefaultScheme.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.DefinitionID.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.DefinitionRev.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.Disable.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.DisableExternal.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.DisableExternalResources.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.DisableResources.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.Host.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.HostBlacklist.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.MakeAbsolute.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.Munge.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.MungeResources.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.MungeSecretKey.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.OverrideAllowedSchemes.txt create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/info.ini create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ContentSets.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/Context.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/Definition.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/DefinitionCache.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/DefinitionCache/Decorator.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/DefinitionCache/Decorator/Cleanup.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/DefinitionCache/Decorator/Memory.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/DefinitionCache/Decorator/Template.php.in create mode 100644 extlib/HTMLPurifier/HTMLPurifier/DefinitionCache/Null.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/DefinitionCache/Serializer.php create mode 100755 extlib/HTMLPurifier/HTMLPurifier/DefinitionCache/Serializer/README create mode 100644 extlib/HTMLPurifier/HTMLPurifier/DefinitionCacheFactory.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/Doctype.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/DoctypeRegistry.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ElementDef.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/Encoder.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/EntityLookup.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/EntityLookup/entities.ser create mode 100644 extlib/HTMLPurifier/HTMLPurifier/EntityParser.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ErrorCollector.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/ErrorStruct.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/Exception.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/Filter.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/Filter/ExtractStyleBlocks.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/Filter/YouTube.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/Generator.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/HTMLDefinition.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/HTMLModule.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Bdo.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/HTMLModule/CommonAttributes.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Edit.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Forms.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Hypertext.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Image.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Legacy.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/HTMLModule/List.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Name.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/HTMLModule/NonXMLCommonAttributes.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Object.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Presentation.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Proprietary.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Ruby.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/HTMLModule/SafeEmbed.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/HTMLModule/SafeObject.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Scripting.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/HTMLModule/StyleAttribute.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Tables.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Target.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Text.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Tidy.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Tidy/Name.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Tidy/Proprietary.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Tidy/Strict.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Tidy/Transitional.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Tidy/XHTML.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Tidy/XHTMLAndHTML4.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/HTMLModule/XMLCommonAttributes.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/HTMLModuleManager.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/IDAccumulator.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/Injector.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/Injector/AutoParagraph.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/Injector/DisplayLinkURI.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/Injector/Linkify.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/Injector/PurifierLinkify.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/Injector/RemoveEmpty.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/Injector/SafeObject.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/Language.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/Language/classes/en-x-test.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/Language/messages/en-x-test.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/Language/messages/en-x-testmini.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/Language/messages/en.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/LanguageFactory.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/Length.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/Lexer.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/Lexer/DOMLex.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/Lexer/DirectLex.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/Lexer/PEARSax3.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/Lexer/PH5P.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/PercentEncoder.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/Printer.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/Printer/CSSDefinition.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/Printer/ConfigForm.css create mode 100644 extlib/HTMLPurifier/HTMLPurifier/Printer/ConfigForm.js create mode 100644 extlib/HTMLPurifier/HTMLPurifier/Printer/ConfigForm.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/Printer/HTMLDefinition.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/PropertyList.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/PropertyListIterator.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/Strategy.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/Strategy/Composite.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/Strategy/Core.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/Strategy/FixNesting.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/Strategy/MakeWellFormed.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/Strategy/RemoveForeignElements.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/Strategy/ValidateAttributes.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/StringHash.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/StringHashParser.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/TagTransform.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/TagTransform/Font.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/TagTransform/Simple.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/Token.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/Token/Comment.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/Token/Empty.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/Token/End.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/Token/Start.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/Token/Tag.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/Token/Text.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/TokenFactory.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/URI.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/URIDefinition.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/URIFilter.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/URIFilter/DisableExternal.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/URIFilter/DisableExternalResources.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/URIFilter/HostBlacklist.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/URIFilter/MakeAbsolute.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/URIFilter/Munge.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/URIParser.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/URIScheme.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/URIScheme/ftp.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/URIScheme/http.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/URIScheme/https.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/URIScheme/mailto.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/URIScheme/news.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/URIScheme/nntp.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/URISchemeRegistry.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/UnitConverter.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/VarParser.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/VarParser/Flexible.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/VarParser/Native.php create mode 100644 extlib/HTMLPurifier/HTMLPurifier/VarParserException.php create mode 100644 extlib/HTMLPurifier/LICENSE diff --git a/extlib/HTMLPurifier/HTMLPurifier.auto.php b/extlib/HTMLPurifier/HTMLPurifier.auto.php new file mode 100644 index 0000000000..1960c399f8 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier.auto.php @@ -0,0 +1,11 @@ +purify($html, $config); +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier.includes.php b/extlib/HTMLPurifier/HTMLPurifier.includes.php new file mode 100644 index 0000000000..7cfb970601 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier.includes.php @@ -0,0 +1,208 @@ + $attributes) { + $allowed_elements[$element] = true; + foreach ($attributes as $attribute => $x) { + $allowed_attributes["$element.$attribute"] = true; + } + } + $config->set('HTML.AllowedElements', $allowed_elements); + $config->set('HTML.AllowedAttributes', $allowed_attributes); + $allowed_schemes = array(); + if ($allowed_protocols !== null) { + $config->set('URI.AllowedSchemes', $allowed_protocols); + } + $purifier = new HTMLPurifier($config); + return $purifier->purify($string); +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier.path.php b/extlib/HTMLPurifier/HTMLPurifier.path.php new file mode 100644 index 0000000000..39b1b65319 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier.path.php @@ -0,0 +1,11 @@ +config = HTMLPurifier_Config::create($config); + + $this->strategy = new HTMLPurifier_Strategy_Core(); + + } + + /** + * Adds a filter to process the output. First come first serve + * @param $filter HTMLPurifier_Filter object + */ + public function addFilter($filter) { + trigger_error('HTMLPurifier->addFilter() is deprecated, use configuration directives in the Filter namespace or Filter.Custom', E_USER_WARNING); + $this->filters[] = $filter; + } + + /** + * Filters an HTML snippet/document to be XSS-free and standards-compliant. + * + * @param $html String of HTML to purify + * @param $config HTMLPurifier_Config object for this operation, if omitted, + * defaults to the config object specified during this + * object's construction. The parameter can also be any type + * that HTMLPurifier_Config::create() supports. + * @return Purified HTML + */ + public function purify($html, $config = null) { + + // :TODO: make the config merge in, instead of replace + $config = $config ? HTMLPurifier_Config::create($config) : $this->config; + + // implementation is partially environment dependant, partially + // configuration dependant + $lexer = HTMLPurifier_Lexer::create($config); + + $context = new HTMLPurifier_Context(); + + // setup HTML generator + $this->generator = new HTMLPurifier_Generator($config, $context); + $context->register('Generator', $this->generator); + + // set up global context variables + if ($config->get('Core.CollectErrors')) { + // may get moved out if other facilities use it + $language_factory = HTMLPurifier_LanguageFactory::instance(); + $language = $language_factory->create($config, $context); + $context->register('Locale', $language); + + $error_collector = new HTMLPurifier_ErrorCollector($context); + $context->register('ErrorCollector', $error_collector); + } + + // setup id_accumulator context, necessary due to the fact that + // AttrValidator can be called from many places + $id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context); + $context->register('IDAccumulator', $id_accumulator); + + $html = HTMLPurifier_Encoder::convertToUTF8($html, $config, $context); + + // setup filters + $filter_flags = $config->getBatch('Filter'); + $custom_filters = $filter_flags['Custom']; + unset($filter_flags['Custom']); + $filters = array(); + foreach ($filter_flags as $filter => $flag) { + if (!$flag) continue; + if (strpos($filter, '.') !== false) continue; + $class = "HTMLPurifier_Filter_$filter"; + $filters[] = new $class; + } + foreach ($custom_filters as $filter) { + // maybe "HTMLPurifier_Filter_$filter", but be consistent with AutoFormat + $filters[] = $filter; + } + $filters = array_merge($filters, $this->filters); + // maybe prepare(), but later + + for ($i = 0, $filter_size = count($filters); $i < $filter_size; $i++) { + $html = $filters[$i]->preFilter($html, $config, $context); + } + + // purified HTML + $html = + $this->generator->generateFromTokens( + // list of tokens + $this->strategy->execute( + // list of un-purified tokens + $lexer->tokenizeHTML( + // un-purified HTML + $html, $config, $context + ), + $config, $context + ) + ); + + for ($i = $filter_size - 1; $i >= 0; $i--) { + $html = $filters[$i]->postFilter($html, $config, $context); + } + + $html = HTMLPurifier_Encoder::convertFromUTF8($html, $config, $context); + $this->context =& $context; + return $html; + } + + /** + * Filters an array of HTML snippets + * @param $config Optional HTMLPurifier_Config object for this operation. + * See HTMLPurifier::purify() for more details. + * @return Array of purified HTML + */ + public function purifyArray($array_of_html, $config = null) { + $context_array = array(); + foreach ($array_of_html as $key => $html) { + $array_of_html[$key] = $this->purify($html, $config); + $context_array[$key] = $this->context; + } + $this->context = $context_array; + return $array_of_html; + } + + /** + * Singleton for enforcing just one HTML Purifier in your system + * @param $prototype Optional prototype HTMLPurifier instance to + * overload singleton with, or HTMLPurifier_Config + * instance to configure the generated version with. + */ + public static function instance($prototype = null) { + if (!self::$instance || $prototype) { + if ($prototype instanceof HTMLPurifier) { + self::$instance = $prototype; + } elseif ($prototype) { + self::$instance = new HTMLPurifier($prototype); + } else { + self::$instance = new HTMLPurifier(); + } + } + return self::$instance; + } + + /** + * @note Backwards compatibility, see instance() + */ + public static function getInstance($prototype = null) { + return HTMLPurifier::instance($prototype); + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier.safe-includes.php b/extlib/HTMLPurifier/HTMLPurifier.safe-includes.php new file mode 100644 index 0000000000..cf2c1d617a --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier.safe-includes.php @@ -0,0 +1,202 @@ +attr_collections as $coll_i => $coll) { + if (!isset($this->info[$coll_i])) { + $this->info[$coll_i] = array(); + } + foreach ($coll as $attr_i => $attr) { + if ($attr_i === 0 && isset($this->info[$coll_i][$attr_i])) { + // merge in includes + $this->info[$coll_i][$attr_i] = array_merge( + $this->info[$coll_i][$attr_i], $attr); + continue; + } + $this->info[$coll_i][$attr_i] = $attr; + } + } + } + // perform internal expansions and inclusions + foreach ($this->info as $name => $attr) { + // merge attribute collections that include others + $this->performInclusions($this->info[$name]); + // replace string identifiers with actual attribute objects + $this->expandIdentifiers($this->info[$name], $attr_types); + } + } + + /** + * Takes a reference to an attribute associative array and performs + * all inclusions specified by the zero index. + * @param &$attr Reference to attribute array + */ + public function performInclusions(&$attr) { + if (!isset($attr[0])) return; + $merge = $attr[0]; + $seen = array(); // recursion guard + // loop through all the inclusions + for ($i = 0; isset($merge[$i]); $i++) { + if (isset($seen[$merge[$i]])) continue; + $seen[$merge[$i]] = true; + // foreach attribute of the inclusion, copy it over + if (!isset($this->info[$merge[$i]])) continue; + foreach ($this->info[$merge[$i]] as $key => $value) { + if (isset($attr[$key])) continue; // also catches more inclusions + $attr[$key] = $value; + } + if (isset($this->info[$merge[$i]][0])) { + // recursion + $merge = array_merge($merge, $this->info[$merge[$i]][0]); + } + } + unset($attr[0]); + } + + /** + * Expands all string identifiers in an attribute array by replacing + * them with the appropriate values inside HTMLPurifier_AttrTypes + * @param &$attr Reference to attribute array + * @param $attr_types HTMLPurifier_AttrTypes instance + */ + public function expandIdentifiers(&$attr, $attr_types) { + + // because foreach will process new elements we add, make sure we + // skip duplicates + $processed = array(); + + foreach ($attr as $def_i => $def) { + // skip inclusions + if ($def_i === 0) continue; + + if (isset($processed[$def_i])) continue; + + // determine whether or not attribute is required + if ($required = (strpos($def_i, '*') !== false)) { + // rename the definition + unset($attr[$def_i]); + $def_i = trim($def_i, '*'); + $attr[$def_i] = $def; + } + + $processed[$def_i] = true; + + // if we've already got a literal object, move on + if (is_object($def)) { + // preserve previous required + $attr[$def_i]->required = ($required || $attr[$def_i]->required); + continue; + } + + if ($def === false) { + unset($attr[$def_i]); + continue; + } + + if ($t = $attr_types->get($def)) { + $attr[$def_i] = $t; + $attr[$def_i]->required = $required; + } else { + unset($attr[$def_i]); + } + } + + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/AttrDef.php b/extlib/HTMLPurifier/HTMLPurifier/AttrDef.php new file mode 100644 index 0000000000..d32fa62d6a --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/AttrDef.php @@ -0,0 +1,87 @@ + by removing + * leading and trailing whitespace, ignoring line feeds, and replacing + * carriage returns and tabs with spaces. While most useful for HTML + * attributes specified as CDATA, it can also be applied to most CSS + * values. + * + * @note This method is not entirely standards compliant, as trim() removes + * more types of whitespace than specified in the spec. In practice, + * this is rarely a problem, as those extra characters usually have + * already been removed by HTMLPurifier_Encoder. + * + * @warning This processing is inconsistent with XML's whitespace handling + * as specified by section 3.3.3 and referenced XHTML 1.0 section + * 4.7. However, note that we are NOT necessarily + * parsing XML, thus, this behavior may still be correct. We + * assume that newlines have been normalized. + */ + public function parseCDATA($string) { + $string = trim($string); + $string = str_replace(array("\n", "\t", "\r"), ' ', $string); + return $string; + } + + /** + * Factory method for creating this class from a string. + * @param $string String construction info + * @return Created AttrDef object corresponding to $string + */ + public function make($string) { + // default implementation, return a flyweight of this object. + // If $string has an effect on the returned object (i.e. you + // need to overload this method), it is best + // to clone or instantiate new copies. (Instantiation is safer.) + return $this; + } + + /** + * Removes spaces from rgb(0, 0, 0) so that shorthand CSS properties work + * properly. THIS IS A HACK! + */ + protected function mungeRgb($string) { + return preg_replace('/rgb\((\d+)\s*,\s*(\d+)\s*,\s*(\d+)\)/', 'rgb(\1,\2,\3)', $string); + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS.php b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS.php new file mode 100644 index 0000000000..953e706755 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS.php @@ -0,0 +1,87 @@ +parseCDATA($css); + + $definition = $config->getCSSDefinition(); + + // we're going to break the spec and explode by semicolons. + // This is because semicolon rarely appears in escaped form + // Doing this is generally flaky but fast + // IT MIGHT APPEAR IN URIs, see HTMLPurifier_AttrDef_CSSURI + // for details + + $declarations = explode(';', $css); + $propvalues = array(); + + /** + * Name of the current CSS property being validated. + */ + $property = false; + $context->register('CurrentCSSProperty', $property); + + foreach ($declarations as $declaration) { + if (!$declaration) continue; + if (!strpos($declaration, ':')) continue; + list($property, $value) = explode(':', $declaration, 2); + $property = trim($property); + $value = trim($value); + $ok = false; + do { + if (isset($definition->info[$property])) { + $ok = true; + break; + } + if (ctype_lower($property)) break; + $property = strtolower($property); + if (isset($definition->info[$property])) { + $ok = true; + break; + } + } while(0); + if (!$ok) continue; + // inefficient call, since the validator will do this again + if (strtolower(trim($value)) !== 'inherit') { + // inherit works for everything (but only on the base property) + $result = $definition->info[$property]->validate( + $value, $config, $context ); + } else { + $result = 'inherit'; + } + if ($result === false) continue; + $propvalues[$property] = $result; + } + + $context->destroy('CurrentCSSProperty'); + + // procedure does not write the new CSS simultaneously, so it's + // slightly inefficient, but it's the only way of getting rid of + // duplicates. Perhaps config to optimize it, but not now. + + $new_declarations = ''; + foreach ($propvalues as $prop => $value) { + $new_declarations .= "$prop:$value;"; + } + + return $new_declarations ? $new_declarations : false; + + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/AlphaValue.php b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/AlphaValue.php new file mode 100644 index 0000000000..292c040d4b --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/AlphaValue.php @@ -0,0 +1,21 @@ + 1.0) $result = '1'; + return $result; + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/Background.php b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/Background.php new file mode 100644 index 0000000000..3a3d20cd6a --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/Background.php @@ -0,0 +1,87 @@ +getCSSDefinition(); + $this->info['background-color'] = $def->info['background-color']; + $this->info['background-image'] = $def->info['background-image']; + $this->info['background-repeat'] = $def->info['background-repeat']; + $this->info['background-attachment'] = $def->info['background-attachment']; + $this->info['background-position'] = $def->info['background-position']; + } + + public function validate($string, $config, $context) { + + // regular pre-processing + $string = $this->parseCDATA($string); + if ($string === '') return false; + + // munge rgb() decl if necessary + $string = $this->mungeRgb($string); + + // assumes URI doesn't have spaces in it + $bits = explode(' ', strtolower($string)); // bits to process + + $caught = array(); + $caught['color'] = false; + $caught['image'] = false; + $caught['repeat'] = false; + $caught['attachment'] = false; + $caught['position'] = false; + + $i = 0; // number of catches + $none = false; + + foreach ($bits as $bit) { + if ($bit === '') continue; + foreach ($caught as $key => $status) { + if ($key != 'position') { + if ($status !== false) continue; + $r = $this->info['background-' . $key]->validate($bit, $config, $context); + } else { + $r = $bit; + } + if ($r === false) continue; + if ($key == 'position') { + if ($caught[$key] === false) $caught[$key] = ''; + $caught[$key] .= $r . ' '; + } else { + $caught[$key] = $r; + } + $i++; + break; + } + } + + if (!$i) return false; + if ($caught['position'] !== false) { + $caught['position'] = $this->info['background-position']-> + validate($caught['position'], $config, $context); + } + + $ret = array(); + foreach ($caught as $value) { + if ($value === false) continue; + $ret[] = $value; + } + + if (empty($ret)) return false; + return implode(' ', $ret); + + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/BackgroundPosition.php b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/BackgroundPosition.php new file mode 100644 index 0000000000..35df3985e2 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/BackgroundPosition.php @@ -0,0 +1,126 @@ + | | left | center | right + ] + [ + | | top | center | bottom + ]? + ] | + [ // this signifies that the vertical and horizontal adjectives + // can be arbitrarily ordered, however, there can only be two, + // one of each, or none at all + [ + left | center | right + ] || + [ + top | center | bottom + ] + ] + top, left = 0% + center, (none) = 50% + bottom, right = 100% +*/ + +/* QuirksMode says: + keyword + length/percentage must be ordered correctly, as per W3C + + Internet Explorer and Opera, however, support arbitrary ordering. We + should fix it up. + + Minor issue though, not strictly necessary. +*/ + +// control freaks may appreciate the ability to convert these to +// percentages or something, but it's not necessary + +/** + * Validates the value of background-position. + */ +class HTMLPurifier_AttrDef_CSS_BackgroundPosition extends HTMLPurifier_AttrDef +{ + + protected $length; + protected $percentage; + + public function __construct() { + $this->length = new HTMLPurifier_AttrDef_CSS_Length(); + $this->percentage = new HTMLPurifier_AttrDef_CSS_Percentage(); + } + + public function validate($string, $config, $context) { + $string = $this->parseCDATA($string); + $bits = explode(' ', $string); + + $keywords = array(); + $keywords['h'] = false; // left, right + $keywords['v'] = false; // top, bottom + $keywords['c'] = false; // center + $measures = array(); + + $i = 0; + + $lookup = array( + 'top' => 'v', + 'bottom' => 'v', + 'left' => 'h', + 'right' => 'h', + 'center' => 'c' + ); + + foreach ($bits as $bit) { + if ($bit === '') continue; + + // test for keyword + $lbit = ctype_lower($bit) ? $bit : strtolower($bit); + if (isset($lookup[$lbit])) { + $status = $lookup[$lbit]; + $keywords[$status] = $lbit; + $i++; + } + + // test for length + $r = $this->length->validate($bit, $config, $context); + if ($r !== false) { + $measures[] = $r; + $i++; + } + + // test for percentage + $r = $this->percentage->validate($bit, $config, $context); + if ($r !== false) { + $measures[] = $r; + $i++; + } + + } + + if (!$i) return false; // no valid values were caught + + + $ret = array(); + + // first keyword + if ($keywords['h']) $ret[] = $keywords['h']; + elseif (count($measures)) $ret[] = array_shift($measures); + elseif ($keywords['c']) { + $ret[] = $keywords['c']; + $keywords['c'] = false; // prevent re-use: center = center center + } + + if ($keywords['v']) $ret[] = $keywords['v']; + elseif (count($measures)) $ret[] = array_shift($measures); + elseif ($keywords['c']) $ret[] = $keywords['c']; + + if (empty($ret)) return false; + return implode(' ', $ret); + + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/Border.php b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/Border.php new file mode 100644 index 0000000000..42a1d1b4ae --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/Border.php @@ -0,0 +1,43 @@ +getCSSDefinition(); + $this->info['border-width'] = $def->info['border-width']; + $this->info['border-style'] = $def->info['border-style']; + $this->info['border-top-color'] = $def->info['border-top-color']; + } + + public function validate($string, $config, $context) { + $string = $this->parseCDATA($string); + $string = $this->mungeRgb($string); + $bits = explode(' ', $string); + $done = array(); // segments we've finished + $ret = ''; // return value + foreach ($bits as $bit) { + foreach ($this->info as $propname => $validator) { + if (isset($done[$propname])) continue; + $r = $validator->validate($bit, $config, $context); + if ($r !== false) { + $ret .= $r . ' '; + $done[$propname] = true; + break; + } + } + } + return rtrim($ret); + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/Color.php b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/Color.php new file mode 100644 index 0000000000..07f95a6719 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/Color.php @@ -0,0 +1,78 @@ +get('Core.ColorKeywords'); + + $color = trim($color); + if ($color === '') return false; + + $lower = strtolower($color); + if (isset($colors[$lower])) return $colors[$lower]; + + if (strpos($color, 'rgb(') !== false) { + // rgb literal handling + $length = strlen($color); + if (strpos($color, ')') !== $length - 1) return false; + $triad = substr($color, 4, $length - 4 - 1); + $parts = explode(',', $triad); + if (count($parts) !== 3) return false; + $type = false; // to ensure that they're all the same type + $new_parts = array(); + foreach ($parts as $part) { + $part = trim($part); + if ($part === '') return false; + $length = strlen($part); + if ($part[$length - 1] === '%') { + // handle percents + if (!$type) { + $type = 'percentage'; + } elseif ($type !== 'percentage') { + return false; + } + $num = (float) substr($part, 0, $length - 1); + if ($num < 0) $num = 0; + if ($num > 100) $num = 100; + $new_parts[] = "$num%"; + } else { + // handle integers + if (!$type) { + $type = 'integer'; + } elseif ($type !== 'integer') { + return false; + } + $num = (int) $part; + if ($num < 0) $num = 0; + if ($num > 255) $num = 255; + $new_parts[] = (string) $num; + } + } + $new_triad = implode(',', $new_parts); + $color = "rgb($new_triad)"; + } else { + // hexadecimal handling + if ($color[0] === '#') { + $hex = substr($color, 1); + } else { + $hex = $color; + $color = '#' . $color; + } + $length = strlen($hex); + if ($length !== 3 && $length !== 6) return false; + if (!ctype_xdigit($hex)) return false; + } + + return $color; + + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/Composite.php b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/Composite.php new file mode 100644 index 0000000000..de1289cba8 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/Composite.php @@ -0,0 +1,38 @@ +defs = $defs; + } + + public function validate($string, $config, $context) { + foreach ($this->defs as $i => $def) { + $result = $this->defs[$i]->validate($string, $config, $context); + if ($result !== false) return $result; + } + return false; + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/DenyElementDecorator.php b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/DenyElementDecorator.php new file mode 100644 index 0000000000..6599c5b2dd --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/DenyElementDecorator.php @@ -0,0 +1,28 @@ +def = $def; + $this->element = $element; + } + /** + * Checks if CurrentToken is set and equal to $this->element + */ + public function validate($string, $config, $context) { + $token = $context->get('CurrentToken', true); + if ($token && $token->name == $this->element) return false; + return $this->def->validate($string, $config, $context); + } +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/Filter.php b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/Filter.php new file mode 100644 index 0000000000..147894b861 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/Filter.php @@ -0,0 +1,54 @@ +intValidator = new HTMLPurifier_AttrDef_Integer(); + } + + public function validate($value, $config, $context) { + $value = $this->parseCDATA($value); + if ($value === 'none') return $value; + // if we looped this we could support multiple filters + $function_length = strcspn($value, '('); + $function = trim(substr($value, 0, $function_length)); + if ($function !== 'alpha' && + $function !== 'Alpha' && + $function !== 'progid:DXImageTransform.Microsoft.Alpha' + ) return false; + $cursor = $function_length + 1; + $parameters_length = strcspn($value, ')', $cursor); + $parameters = substr($value, $cursor, $parameters_length); + $params = explode(',', $parameters); + $ret_params = array(); + $lookup = array(); + foreach ($params as $param) { + list($key, $value) = explode('=', $param); + $key = trim($key); + $value = trim($value); + if (isset($lookup[$key])) continue; + if ($key !== 'opacity') continue; + $value = $this->intValidator->validate($value, $config, $context); + if ($value === false) continue; + $int = (int) $value; + if ($int > 100) $value = '100'; + if ($int < 0) $value = '0'; + $ret_params[] = "$key=$value"; + $lookup[$key] = true; + } + $ret_parameters = implode(',', $ret_params); + $ret_function = "$function($ret_parameters)"; + return $ret_function; + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/Font.php b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/Font.php new file mode 100644 index 0000000000..699ee0b701 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/Font.php @@ -0,0 +1,149 @@ +getCSSDefinition(); + $this->info['font-style'] = $def->info['font-style']; + $this->info['font-variant'] = $def->info['font-variant']; + $this->info['font-weight'] = $def->info['font-weight']; + $this->info['font-size'] = $def->info['font-size']; + $this->info['line-height'] = $def->info['line-height']; + $this->info['font-family'] = $def->info['font-family']; + } + + public function validate($string, $config, $context) { + + static $system_fonts = array( + 'caption' => true, + 'icon' => true, + 'menu' => true, + 'message-box' => true, + 'small-caption' => true, + 'status-bar' => true + ); + + // regular pre-processing + $string = $this->parseCDATA($string); + if ($string === '') return false; + + // check if it's one of the keywords + $lowercase_string = strtolower($string); + if (isset($system_fonts[$lowercase_string])) { + return $lowercase_string; + } + + $bits = explode(' ', $string); // bits to process + $stage = 0; // this indicates what we're looking for + $caught = array(); // which stage 0 properties have we caught? + $stage_1 = array('font-style', 'font-variant', 'font-weight'); + $final = ''; // output + + for ($i = 0, $size = count($bits); $i < $size; $i++) { + if ($bits[$i] === '') continue; + switch ($stage) { + + // attempting to catch font-style, font-variant or font-weight + case 0: + foreach ($stage_1 as $validator_name) { + if (isset($caught[$validator_name])) continue; + $r = $this->info[$validator_name]->validate( + $bits[$i], $config, $context); + if ($r !== false) { + $final .= $r . ' '; + $caught[$validator_name] = true; + break; + } + } + // all three caught, continue on + if (count($caught) >= 3) $stage = 1; + if ($r !== false) break; + + // attempting to catch font-size and perhaps line-height + case 1: + $found_slash = false; + if (strpos($bits[$i], '/') !== false) { + list($font_size, $line_height) = + explode('/', $bits[$i]); + if ($line_height === '') { + // ooh, there's a space after the slash! + $line_height = false; + $found_slash = true; + } + } else { + $font_size = $bits[$i]; + $line_height = false; + } + $r = $this->info['font-size']->validate( + $font_size, $config, $context); + if ($r !== false) { + $final .= $r; + // attempt to catch line-height + if ($line_height === false) { + // we need to scroll forward + for ($j = $i + 1; $j < $size; $j++) { + if ($bits[$j] === '') continue; + if ($bits[$j] === '/') { + if ($found_slash) { + return false; + } else { + $found_slash = true; + continue; + } + } + $line_height = $bits[$j]; + break; + } + } else { + // slash already found + $found_slash = true; + $j = $i; + } + if ($found_slash) { + $i = $j; + $r = $this->info['line-height']->validate( + $line_height, $config, $context); + if ($r !== false) { + $final .= '/' . $r; + } + } + $final .= ' '; + $stage = 2; + break; + } + return false; + + // attempting to catch font-family + case 2: + $font_family = + implode(' ', array_slice($bits, $i, $size - $i)); + $r = $this->info['font-family']->validate( + $font_family, $config, $context); + if ($r !== false) { + $final .= $r . ' '; + // processing completed successfully + return rtrim($final); + } + return false; + } + } + return false; + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/FontFamily.php b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/FontFamily.php new file mode 100644 index 0000000000..705ac893d1 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/FontFamily.php @@ -0,0 +1,90 @@ + true, + 'sans-serif' => true, + 'monospace' => true, + 'fantasy' => true, + 'cursive' => true + ); + + // assume that no font names contain commas in them + $fonts = explode(',', $string); + $final = ''; + foreach($fonts as $font) { + $font = trim($font); + if ($font === '') continue; + // match a generic name + if (isset($generic_names[$font])) { + $final .= $font . ', '; + continue; + } + // match a quoted name + if ($font[0] === '"' || $font[0] === "'") { + $length = strlen($font); + if ($length <= 2) continue; + $quote = $font[0]; + if ($font[$length - 1] !== $quote) continue; + $font = substr($font, 1, $length - 2); + + $new_font = ''; + for ($i = 0, $c = strlen($font); $i < $c; $i++) { + if ($font[$i] === '\\') { + $i++; + if ($i >= $c) { + $new_font .= '\\'; + break; + } + if (ctype_xdigit($font[$i])) { + $code = $font[$i]; + for ($a = 1, $i++; $i < $c && $a < 6; $i++, $a++) { + if (!ctype_xdigit($font[$i])) break; + $code .= $font[$i]; + } + // We have to be extremely careful when adding + // new characters, to make sure we're not breaking + // the encoding. + $char = HTMLPurifier_Encoder::unichr(hexdec($code)); + if (HTMLPurifier_Encoder::cleanUTF8($char) === '') continue; + $new_font .= $char; + if ($i < $c && trim($font[$i]) !== '') $i--; + continue; + } + if ($font[$i] === "\n") continue; + } + $new_font .= $font[$i]; + } + + $font = $new_font; + } + // $font is a pure representation of the font name + + if (ctype_alnum($font) && $font !== '') { + // very simple font, allow it in unharmed + $final .= $font . ', '; + continue; + } + + // complicated font, requires quoting + + // armor single quotes and new lines + $font = str_replace("\\", "\\\\", $font); + $font = str_replace("'", "\\'", $font); + $final .= "'$font', "; + } + $final = rtrim($final, ', '); + if ($final === '') return false; + return $final; + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/ImportantDecorator.php b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/ImportantDecorator.php new file mode 100644 index 0000000000..4e6b35e5a0 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/ImportantDecorator.php @@ -0,0 +1,40 @@ +def = $def; + $this->allow = $allow; + } + /** + * Intercepts and removes !important if necessary + */ + public function validate($string, $config, $context) { + // test for ! and important tokens + $string = trim($string); + $is_important = false; + // :TODO: optimization: test directly for !important and ! important + if (strlen($string) >= 9 && substr($string, -9) === 'important') { + $temp = rtrim(substr($string, 0, -9)); + // use a temp, because we might want to restore important + if (strlen($temp) >= 1 && substr($temp, -1) === '!') { + $string = rtrim(substr($temp, 0, -1)); + $is_important = true; + } + } + $string = $this->def->validate($string, $config, $context); + if ($this->allow && $is_important) $string .= ' !important'; + return $string; + } +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/Length.php b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/Length.php new file mode 100644 index 0000000000..a07ec58135 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/Length.php @@ -0,0 +1,47 @@ +min = $min !== null ? HTMLPurifier_Length::make($min) : null; + $this->max = $max !== null ? HTMLPurifier_Length::make($max) : null; + } + + public function validate($string, $config, $context) { + $string = $this->parseCDATA($string); + + // Optimizations + if ($string === '') return false; + if ($string === '0') return '0'; + if (strlen($string) === 1) return false; + + $length = HTMLPurifier_Length::make($string); + if (!$length->isValid()) return false; + + if ($this->min) { + $c = $length->compareTo($this->min); + if ($c === false) return false; + if ($c < 0) return false; + } + if ($this->max) { + $c = $length->compareTo($this->max); + if ($c === false) return false; + if ($c > 0) return false; + } + + return $length->toString(); + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/ListStyle.php b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/ListStyle.php new file mode 100644 index 0000000000..4406868c08 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/ListStyle.php @@ -0,0 +1,78 @@ +getCSSDefinition(); + $this->info['list-style-type'] = $def->info['list-style-type']; + $this->info['list-style-position'] = $def->info['list-style-position']; + $this->info['list-style-image'] = $def->info['list-style-image']; + } + + public function validate($string, $config, $context) { + + // regular pre-processing + $string = $this->parseCDATA($string); + if ($string === '') return false; + + // assumes URI doesn't have spaces in it + $bits = explode(' ', strtolower($string)); // bits to process + + $caught = array(); + $caught['type'] = false; + $caught['position'] = false; + $caught['image'] = false; + + $i = 0; // number of catches + $none = false; + + foreach ($bits as $bit) { + if ($i >= 3) return; // optimization bit + if ($bit === '') continue; + foreach ($caught as $key => $status) { + if ($status !== false) continue; + $r = $this->info['list-style-' . $key]->validate($bit, $config, $context); + if ($r === false) continue; + if ($r === 'none') { + if ($none) continue; + else $none = true; + if ($key == 'image') continue; + } + $caught[$key] = $r; + $i++; + break; + } + } + + if (!$i) return false; + + $ret = array(); + + // construct type + if ($caught['type']) $ret[] = $caught['type']; + + // construct image + if ($caught['image']) $ret[] = $caught['image']; + + // construct position + if ($caught['position']) $ret[] = $caught['position']; + + if (empty($ret)) return false; + return implode(' ', $ret); + + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/Multiple.php b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/Multiple.php new file mode 100644 index 0000000000..4d62a40d7f --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/Multiple.php @@ -0,0 +1,58 @@ +single = $single; + $this->max = $max; + } + + public function validate($string, $config, $context) { + $string = $this->parseCDATA($string); + if ($string === '') return false; + $parts = explode(' ', $string); // parseCDATA replaced \r, \t and \n + $length = count($parts); + $final = ''; + for ($i = 0, $num = 0; $i < $length && $num < $this->max; $i++) { + if (ctype_space($parts[$i])) continue; + $result = $this->single->validate($parts[$i], $config, $context); + if ($result !== false) { + $final .= $result . ' '; + $num++; + } + } + if ($final === '') return false; + return rtrim($final); + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/Number.php b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/Number.php new file mode 100644 index 0000000000..3f99e12ec2 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/Number.php @@ -0,0 +1,69 @@ +non_negative = $non_negative; + } + + /** + * @warning Some contexts do not pass $config, $context. These + * variables should not be used without checking HTMLPurifier_Length + */ + public function validate($number, $config, $context) { + + $number = $this->parseCDATA($number); + + if ($number === '') return false; + if ($number === '0') return '0'; + + $sign = ''; + switch ($number[0]) { + case '-': + if ($this->non_negative) return false; + $sign = '-'; + case '+': + $number = substr($number, 1); + } + + if (ctype_digit($number)) { + $number = ltrim($number, '0'); + return $number ? $sign . $number : '0'; + } + + // Period is the only non-numeric character allowed + if (strpos($number, '.') === false) return false; + + list($left, $right) = explode('.', $number, 2); + + if ($left === '' && $right === '') return false; + if ($left !== '' && !ctype_digit($left)) return false; + + $left = ltrim($left, '0'); + $right = rtrim($right, '0'); + + if ($right === '') { + return $left ? $sign . $left : '0'; + } elseif (!ctype_digit($right)) { + return false; + } + + return $sign . $left . '.' . $right; + + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/Percentage.php b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/Percentage.php new file mode 100644 index 0000000000..c34b8fc3c3 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/Percentage.php @@ -0,0 +1,40 @@ +number_def = new HTMLPurifier_AttrDef_CSS_Number($non_negative); + } + + public function validate($string, $config, $context) { + + $string = $this->parseCDATA($string); + + if ($string === '') return false; + $length = strlen($string); + if ($length === 1) return false; + if ($string[$length - 1] !== '%') return false; + + $number = substr($string, 0, $length - 1); + $number = $this->number_def->validate($number, $config, $context); + + if ($number === false) return false; + return "$number%"; + + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/TextDecoration.php b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/TextDecoration.php new file mode 100644 index 0000000000..772c922d80 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/TextDecoration.php @@ -0,0 +1,38 @@ + true, + 'overline' => true, + 'underline' => true, + ); + + $string = strtolower($this->parseCDATA($string)); + + if ($string === 'none') return $string; + + $parts = explode(' ', $string); + $final = ''; + foreach ($parts as $part) { + if (isset($allowed_values[$part])) { + $final .= $part . ' '; + } + } + $final = rtrim($final); + if ($final === '') return false; + return $final; + + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/URI.php b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/URI.php new file mode 100644 index 0000000000..435d7930bb --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/CSS/URI.php @@ -0,0 +1,56 @@ +parseCDATA($uri_string); + if (strpos($uri_string, 'url(') !== 0) return false; + $uri_string = substr($uri_string, 4); + $new_length = strlen($uri_string) - 1; + if ($uri_string[$new_length] != ')') return false; + $uri = trim(substr($uri_string, 0, $new_length)); + + if (!empty($uri) && ($uri[0] == "'" || $uri[0] == '"')) { + $quote = $uri[0]; + $new_length = strlen($uri) - 1; + if ($uri[$new_length] !== $quote) return false; + $uri = substr($uri, 1, $new_length - 1); + } + + $keys = array( '(', ')', ',', ' ', '"', "'"); + $values = array('\\(', '\\)', '\\,', '\\ ', '\\"', "\\'"); + $uri = str_replace($values, $keys, $uri); + + $result = parent::validate($uri, $config, $context); + + if ($result === false) return false; + + // escape necessary characters according to CSS spec + // except for the comma, none of these should appear in the + // URI at all + $result = str_replace($keys, $values, $result); + + return "url($result)"; + + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/AttrDef/Enum.php b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/Enum.php new file mode 100644 index 0000000000..5d603ebcc6 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/Enum.php @@ -0,0 +1,65 @@ +valid_values = array_flip($valid_values); + $this->case_sensitive = $case_sensitive; + } + + public function validate($string, $config, $context) { + $string = trim($string); + if (!$this->case_sensitive) { + // we may want to do full case-insensitive libraries + $string = ctype_lower($string) ? $string : strtolower($string); + } + $result = isset($this->valid_values[$string]); + + return $result ? $string : false; + } + + /** + * @param $string In form of comma-delimited list of case-insensitive + * valid values. Example: "foo,bar,baz". Prepend "s:" to make + * case sensitive + */ + public function make($string) { + if (strlen($string) > 2 && $string[0] == 's' && $string[1] == ':') { + $string = substr($string, 2); + $sensitive = true; + } else { + $sensitive = false; + } + $values = explode(',', $string); + return new HTMLPurifier_AttrDef_Enum($values, $sensitive); + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/AttrDef/HTML/Bool.php b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/HTML/Bool.php new file mode 100644 index 0000000000..e06987eb8d --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/HTML/Bool.php @@ -0,0 +1,28 @@ +name = $name;} + + public function validate($string, $config, $context) { + if (empty($string)) return false; + return $this->name; + } + + /** + * @param $string Name of attribute + */ + public function make($string) { + return new HTMLPurifier_AttrDef_HTML_Bool($string); + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/AttrDef/HTML/Class.php b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/HTML/Class.php new file mode 100644 index 0000000000..370068d975 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/HTML/Class.php @@ -0,0 +1,34 @@ +getDefinition('HTML')->doctype->name; + if ($name == "XHTML 1.1" || $name == "XHTML 2.0") { + return parent::split($string, $config, $context); + } else { + return preg_split('/\s+/', $string); + } + } + protected function filter($tokens, $config, $context) { + $allowed = $config->get('Attr.AllowedClasses'); + $forbidden = $config->get('Attr.ForbiddenClasses'); + $ret = array(); + foreach ($tokens as $token) { + if ( + ($allowed === null || isset($allowed[$token])) && + !isset($forbidden[$token]) && + // We need this O(n) check because of PHP's array + // implementation that casts -0 to 0. + !in_array($token, $ret, true) + ) { + $ret[] = $token; + } + } + return $ret; + } +} diff --git a/extlib/HTMLPurifier/HTMLPurifier/AttrDef/HTML/Color.php b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/HTML/Color.php new file mode 100644 index 0000000000..d01e20454e --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/HTML/Color.php @@ -0,0 +1,32 @@ +get('Core.ColorKeywords'); + + $string = trim($string); + + if (empty($string)) return false; + if (isset($colors[$string])) return $colors[$string]; + if ($string[0] === '#') $hex = substr($string, 1); + else $hex = $string; + + $length = strlen($hex); + if ($length !== 3 && $length !== 6) return false; + if (!ctype_xdigit($hex)) return false; + if ($length === 3) $hex = $hex[0].$hex[0].$hex[1].$hex[1].$hex[2].$hex[2]; + + return "#$hex"; + + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/AttrDef/HTML/FrameTarget.php b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/HTML/FrameTarget.php new file mode 100644 index 0000000000..ae6ea7c01d --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/HTML/FrameTarget.php @@ -0,0 +1,21 @@ +valid_values === false) $this->valid_values = $config->get('Attr.AllowedFrameTargets'); + return parent::validate($string, $config, $context); + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/AttrDef/HTML/ID.php b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/HTML/ID.php new file mode 100644 index 0000000000..81d03762de --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/HTML/ID.php @@ -0,0 +1,70 @@ +get('Attr.EnableID')) return false; + + $id = trim($id); // trim it first + + if ($id === '') return false; + + $prefix = $config->get('Attr.IDPrefix'); + if ($prefix !== '') { + $prefix .= $config->get('Attr.IDPrefixLocal'); + // prevent re-appending the prefix + if (strpos($id, $prefix) !== 0) $id = $prefix . $id; + } elseif ($config->get('Attr.IDPrefixLocal') !== '') { + trigger_error('%Attr.IDPrefixLocal cannot be used unless '. + '%Attr.IDPrefix is set', E_USER_WARNING); + } + + //if (!$this->ref) { + $id_accumulator =& $context->get('IDAccumulator'); + if (isset($id_accumulator->ids[$id])) return false; + //} + + // we purposely avoid using regex, hopefully this is faster + + if (ctype_alpha($id)) { + $result = true; + } else { + if (!ctype_alpha(@$id[0])) return false; + $trim = trim( // primitive style of regexps, I suppose + $id, + 'A..Za..z0..9:-._' + ); + $result = ($trim === ''); + } + + $regexp = $config->get('Attr.IDBlacklistRegexp'); + if ($regexp && preg_match($regexp, $id)) { + return false; + } + + if (/*!$this->ref && */$result) $id_accumulator->add($id); + + // if no change was made to the ID, return the result + // else, return the new id if stripping whitespace made it + // valid, or return false. + return $result ? $id : false; + + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/AttrDef/HTML/Length.php b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/HTML/Length.php new file mode 100644 index 0000000000..a242f9c238 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/HTML/Length.php @@ -0,0 +1,41 @@ + 100) return '100%'; + + return ((string) $points) . '%'; + + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/AttrDef/HTML/LinkTypes.php b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/HTML/LinkTypes.php new file mode 100644 index 0000000000..76d25ed088 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/HTML/LinkTypes.php @@ -0,0 +1,53 @@ + 'AllowedRel', + 'rev' => 'AllowedRev' + ); + if (!isset($configLookup[$name])) { + trigger_error('Unrecognized attribute name for link '. + 'relationship.', E_USER_ERROR); + return; + } + $this->name = $configLookup[$name]; + } + + public function validate($string, $config, $context) { + + $allowed = $config->get('Attr.' . $this->name); + if (empty($allowed)) return false; + + $string = $this->parseCDATA($string); + $parts = explode(' ', $string); + + // lookup to prevent duplicates + $ret_lookup = array(); + foreach ($parts as $part) { + $part = strtolower(trim($part)); + if (!isset($allowed[$part])) continue; + $ret_lookup[$part] = true; + } + + if (empty($ret_lookup)) return false; + $string = implode(' ', array_keys($ret_lookup)); + + return $string; + + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/AttrDef/HTML/MultiLength.php b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/HTML/MultiLength.php new file mode 100644 index 0000000000..c72fc76e4d --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/HTML/MultiLength.php @@ -0,0 +1,41 @@ +split($string, $config, $context); + $tokens = $this->filter($tokens, $config, $context); + if (empty($tokens)) return false; + return implode(' ', $tokens); + + } + + /** + * Splits a space separated list of tokens into its constituent parts. + */ + protected function split($string, $config, $context) { + // OPTIMIZABLE! + // do the preg_match, capture all subpatterns for reformulation + + // we don't support U+00A1 and up codepoints or + // escaping because I don't know how to do that with regexps + // and plus it would complicate optimization efforts (you never + // see that anyway). + $pattern = '/(?:(?<=\s)|\A)'. // look behind for space or string start + '((?:--|-?[A-Za-z_])[A-Za-z_\-0-9]*)'. + '(?:(?=\s)|\z)/'; // look ahead for space or string end + preg_match_all($pattern, $string, $matches); + return $matches[1]; + } + + /** + * Template method for removing certain tokens based on arbitrary criteria. + * @note If we wanted to be really functional, we'd do an array_filter + * with a callback. But... we're not. + */ + protected function filter($tokens, $config, $context) { + return $tokens; + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/AttrDef/HTML/Pixels.php b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/HTML/Pixels.php new file mode 100644 index 0000000000..4cb2c1b857 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/HTML/Pixels.php @@ -0,0 +1,48 @@ +max = $max; + } + + public function validate($string, $config, $context) { + + $string = trim($string); + if ($string === '0') return $string; + if ($string === '') return false; + $length = strlen($string); + if (substr($string, $length - 2) == 'px') { + $string = substr($string, 0, $length - 2); + } + if (!is_numeric($string)) return false; + $int = (int) $string; + + if ($int < 0) return '0'; + + // upper-bound value, extremely high values can + // crash operating systems, see + // WARNING, above link WILL crash you if you're using Windows + + if ($this->max !== null && $int > $this->max) return (string) $this->max; + + return (string) $int; + + } + + public function make($string) { + if ($string === '') $max = null; + else $max = (int) $string; + $class = get_class($this); + return new $class($max); + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/AttrDef/Integer.php b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/Integer.php new file mode 100644 index 0000000000..d59738d2a2 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/Integer.php @@ -0,0 +1,73 @@ +negative = $negative; + $this->zero = $zero; + $this->positive = $positive; + } + + public function validate($integer, $config, $context) { + + $integer = $this->parseCDATA($integer); + if ($integer === '') return false; + + // we could possibly simply typecast it to integer, but there are + // certain fringe cases that must not return an integer. + + // clip leading sign + if ( $this->negative && $integer[0] === '-' ) { + $digits = substr($integer, 1); + if ($digits === '0') $integer = '0'; // rm minus sign for zero + } elseif( $this->positive && $integer[0] === '+' ) { + $digits = $integer = substr($integer, 1); // rm unnecessary plus + } else { + $digits = $integer; + } + + // test if it's numeric + if (!ctype_digit($digits)) return false; + + // perform scope tests + if (!$this->zero && $integer == 0) return false; + if (!$this->positive && $integer > 0) return false; + if (!$this->negative && $integer < 0) return false; + + return $integer; + + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/AttrDef/Lang.php b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/Lang.php new file mode 100644 index 0000000000..10e6da56db --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/Lang.php @@ -0,0 +1,73 @@ + 8 || !ctype_alnum($subtags[1])) { + return $new_string; + } + if (!ctype_lower($subtags[1])) $subtags[1] = strtolower($subtags[1]); + + $new_string .= '-' . $subtags[1]; + if ($num_subtags == 2) return $new_string; + + // process all other subtags, index 2 and up + for ($i = 2; $i < $num_subtags; $i++) { + $length = strlen($subtags[$i]); + if ($length == 0 || $length > 8 || !ctype_alnum($subtags[$i])) { + return $new_string; + } + if (!ctype_lower($subtags[$i])) { + $subtags[$i] = strtolower($subtags[$i]); + } + $new_string .= '-' . $subtags[$i]; + } + + return $new_string; + + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/AttrDef/Switch.php b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/Switch.php new file mode 100644 index 0000000000..c9e3ed193e --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/Switch.php @@ -0,0 +1,34 @@ +tag = $tag; + $this->withTag = $with_tag; + $this->withoutTag = $without_tag; + } + + public function validate($string, $config, $context) { + $token = $context->get('CurrentToken', true); + if (!$token || $token->name !== $this->tag) { + return $this->withoutTag->validate($string, $config, $context); + } else { + return $this->withTag->validate($string, $config, $context); + } + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/AttrDef/Text.php b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/Text.php new file mode 100644 index 0000000000..c6216cc531 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/Text.php @@ -0,0 +1,15 @@ +parseCDATA($string); + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/AttrDef/URI.php b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/URI.php new file mode 100644 index 0000000000..01a6d83e95 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/URI.php @@ -0,0 +1,77 @@ +parser = new HTMLPurifier_URIParser(); + $this->embedsResource = (bool) $embeds_resource; + } + + public function make($string) { + $embeds = (bool) $string; + return new HTMLPurifier_AttrDef_URI($embeds); + } + + public function validate($uri, $config, $context) { + + if ($config->get('URI.Disable')) return false; + + $uri = $this->parseCDATA($uri); + + // parse the URI + $uri = $this->parser->parse($uri); + if ($uri === false) return false; + + // add embedded flag to context for validators + $context->register('EmbeddedURI', $this->embedsResource); + + $ok = false; + do { + + // generic validation + $result = $uri->validate($config, $context); + if (!$result) break; + + // chained filtering + $uri_def = $config->getDefinition('URI'); + $result = $uri_def->filter($uri, $config, $context); + if (!$result) break; + + // scheme-specific validation + $scheme_obj = $uri->getSchemeObj($config, $context); + if (!$scheme_obj) break; + if ($this->embedsResource && !$scheme_obj->browsable) break; + $result = $scheme_obj->validate($uri, $config, $context); + if (!$result) break; + + // Post chained filtering + $result = $uri_def->postFilter($uri, $config, $context); + if (!$result) break; + + // survived gauntlet + $ok = true; + + } while (false); + + $context->destroy('EmbeddedURI'); + if (!$ok) return false; + + // back to string + return $uri->toString(); + + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/AttrDef/URI/Email.php b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/URI/Email.php new file mode 100644 index 0000000000..bfee9d166c --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/URI/Email.php @@ -0,0 +1,17 @@ +" + // that needs more percent encoding to be done + if ($string == '') return false; + $string = trim($string); + $result = preg_match('/^[A-Z0-9._%-]+@[A-Z0-9.-]+\.[A-Z]{2,4}$/i', $string); + return $result ? $string : false; + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/AttrDef/URI/Host.php b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/URI/Host.php new file mode 100644 index 0000000000..2156c10c66 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/URI/Host.php @@ -0,0 +1,62 @@ +ipv4 = new HTMLPurifier_AttrDef_URI_IPv4(); + $this->ipv6 = new HTMLPurifier_AttrDef_URI_IPv6(); + } + + public function validate($string, $config, $context) { + $length = strlen($string); + if ($string === '') return ''; + if ($length > 1 && $string[0] === '[' && $string[$length-1] === ']') { + //IPv6 + $ip = substr($string, 1, $length - 2); + $valid = $this->ipv6->validate($ip, $config, $context); + if ($valid === false) return false; + return '['. $valid . ']'; + } + + // need to do checks on unusual encodings too + $ipv4 = $this->ipv4->validate($string, $config, $context); + if ($ipv4 !== false) return $ipv4; + + // A regular domain name. + + // This breaks I18N domain names, but we don't have proper IRI support, + // so force users to insert Punycode. If there's complaining we'll + // try to fix things into an international friendly form. + + // The productions describing this are: + $a = '[a-z]'; // alpha + $an = '[a-z0-9]'; // alphanum + $and = '[a-z0-9-]'; // alphanum | "-" + // domainlabel = alphanum | alphanum *( alphanum | "-" ) alphanum + $domainlabel = "$an($and*$an)?"; + // toplabel = alpha | alpha *( alphanum | "-" ) alphanum + $toplabel = "$a($and*$an)?"; + // hostname = *( domainlabel "." ) toplabel [ "." ] + $match = preg_match("/^($domainlabel\.)*$toplabel\.?$/i", $string); + if (!$match) return false; + + return $string; + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/AttrDef/URI/IPv4.php b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/URI/IPv4.php new file mode 100644 index 0000000000..ec4cf591b8 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/URI/IPv4.php @@ -0,0 +1,39 @@ +ip4) $this->_loadRegex(); + + if (preg_match('#^' . $this->ip4 . '$#s', $aIP)) + { + return $aIP; + } + + return false; + + } + + /** + * Lazy load function to prevent regex from being stuffed in + * cache. + */ + protected function _loadRegex() { + $oct = '(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])'; // 0-255 + $this->ip4 = "(?:{$oct}\\.{$oct}\\.{$oct}\\.{$oct})"; + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/AttrDef/URI/IPv6.php b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/URI/IPv6.php new file mode 100644 index 0000000000..9454e9be50 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/AttrDef/URI/IPv6.php @@ -0,0 +1,99 @@ +ip4) $this->_loadRegex(); + + $original = $aIP; + + $hex = '[0-9a-fA-F]'; + $blk = '(?:' . $hex . '{1,4})'; + $pre = '(?:/(?:12[0-8]|1[0-1][0-9]|[1-9][0-9]|[0-9]))'; // /0 - /128 + + // prefix check + if (strpos($aIP, '/') !== false) + { + if (preg_match('#' . $pre . '$#s', $aIP, $find)) + { + $aIP = substr($aIP, 0, 0-strlen($find[0])); + unset($find); + } + else + { + return false; + } + } + + // IPv4-compatiblity check + if (preg_match('#(?<=:'.')' . $this->ip4 . '$#s', $aIP, $find)) + { + $aIP = substr($aIP, 0, 0-strlen($find[0])); + $ip = explode('.', $find[0]); + $ip = array_map('dechex', $ip); + $aIP .= $ip[0] . $ip[1] . ':' . $ip[2] . $ip[3]; + unset($find, $ip); + } + + // compression check + $aIP = explode('::', $aIP); + $c = count($aIP); + if ($c > 2) + { + return false; + } + elseif ($c == 2) + { + list($first, $second) = $aIP; + $first = explode(':', $first); + $second = explode(':', $second); + + if (count($first) + count($second) > 8) + { + return false; + } + + while(count($first) < 8) + { + array_push($first, '0'); + } + + array_splice($first, 8 - count($second), 8, $second); + $aIP = $first; + unset($first,$second); + } + else + { + $aIP = explode(':', $aIP[0]); + } + $c = count($aIP); + + if ($c != 8) + { + return false; + } + + // All the pieces should be 16-bit hex strings. Are they? + foreach ($aIP as $piece) + { + if (!preg_match('#^[0-9a-fA-F]{4}$#s', sprintf('%04s', $piece))) + { + return false; + } + } + + return $original; + + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/AttrTransform.php b/extlib/HTMLPurifier/HTMLPurifier/AttrTransform.php new file mode 100644 index 0000000000..e61d3e01b6 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/AttrTransform.php @@ -0,0 +1,56 @@ +confiscateAttr($attr, 'background'); + // some validation should happen here + + $this->prependCSS($attr, "background-image:url($background);"); + + return $attr; + + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/AttrTransform/BdoDir.php b/extlib/HTMLPurifier/HTMLPurifier/AttrTransform/BdoDir.php new file mode 100644 index 0000000000..4d1a05665e --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/AttrTransform/BdoDir.php @@ -0,0 +1,19 @@ +get('Attr.DefaultTextDir'); + return $attr; + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/AttrTransform/BgColor.php b/extlib/HTMLPurifier/HTMLPurifier/AttrTransform/BgColor.php new file mode 100644 index 0000000000..ad3916bb96 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/AttrTransform/BgColor.php @@ -0,0 +1,23 @@ +confiscateAttr($attr, 'bgcolor'); + // some validation should happen here + + $this->prependCSS($attr, "background-color:$bgcolor;"); + + return $attr; + + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/AttrTransform/BoolToCSS.php b/extlib/HTMLPurifier/HTMLPurifier/AttrTransform/BoolToCSS.php new file mode 100644 index 0000000000..51159b6715 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/AttrTransform/BoolToCSS.php @@ -0,0 +1,36 @@ +attr = $attr; + $this->css = $css; + } + + public function transform($attr, $config, $context) { + if (!isset($attr[$this->attr])) return $attr; + unset($attr[$this->attr]); + $this->prependCSS($attr, $this->css); + return $attr; + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/AttrTransform/Border.php b/extlib/HTMLPurifier/HTMLPurifier/AttrTransform/Border.php new file mode 100644 index 0000000000..476b0b079b --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/AttrTransform/Border.php @@ -0,0 +1,18 @@ +confiscateAttr($attr, 'border'); + // some validation should happen here + $this->prependCSS($attr, "border:{$border_width}px solid;"); + return $attr; + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/AttrTransform/EnumToCSS.php b/extlib/HTMLPurifier/HTMLPurifier/AttrTransform/EnumToCSS.php new file mode 100644 index 0000000000..2a5b4514ab --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/AttrTransform/EnumToCSS.php @@ -0,0 +1,58 @@ +attr = $attr; + $this->enumToCSS = $enum_to_css; + $this->caseSensitive = (bool) $case_sensitive; + } + + public function transform($attr, $config, $context) { + + if (!isset($attr[$this->attr])) return $attr; + + $value = trim($attr[$this->attr]); + unset($attr[$this->attr]); + + if (!$this->caseSensitive) $value = strtolower($value); + + if (!isset($this->enumToCSS[$value])) { + return $attr; + } + + $this->prependCSS($attr, $this->enumToCSS[$value]); + + return $attr; + + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/AttrTransform/ImgRequired.php b/extlib/HTMLPurifier/HTMLPurifier/AttrTransform/ImgRequired.php new file mode 100644 index 0000000000..a219479a02 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/AttrTransform/ImgRequired.php @@ -0,0 +1,42 @@ +get('Core.RemoveInvalidImg')) return $attr; + $attr['src'] = $config->get('Attr.DefaultInvalidImage'); + $src = false; + } + + if (!isset($attr['alt'])) { + if ($src) { + $alt = $config->get('Attr.DefaultImageAlt'); + if ($alt === null) { + $attr['alt'] = basename($attr['src']); + } else { + $attr['alt'] = $alt; + } + } else { + $attr['alt'] = $config->get('Attr.DefaultInvalidImageAlt'); + } + } + + return $attr; + + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/AttrTransform/ImgSpace.php b/extlib/HTMLPurifier/HTMLPurifier/AttrTransform/ImgSpace.php new file mode 100644 index 0000000000..fd84c10c36 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/AttrTransform/ImgSpace.php @@ -0,0 +1,44 @@ + array('left', 'right'), + 'vspace' => array('top', 'bottom') + ); + + public function __construct($attr) { + $this->attr = $attr; + if (!isset($this->css[$attr])) { + trigger_error(htmlspecialchars($attr) . ' is not valid space attribute'); + } + } + + public function transform($attr, $config, $context) { + + if (!isset($attr[$this->attr])) return $attr; + + $width = $this->confiscateAttr($attr, $this->attr); + // some validation could happen here + + if (!isset($this->css[$this->attr])) return $attr; + + $style = ''; + foreach ($this->css[$this->attr] as $suffix) { + $property = "margin-$suffix"; + $style .= "$property:{$width}px;"; + } + + $this->prependCSS($attr, $style); + + return $attr; + + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/AttrTransform/Input.php b/extlib/HTMLPurifier/HTMLPurifier/AttrTransform/Input.php new file mode 100644 index 0000000000..16829552d1 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/AttrTransform/Input.php @@ -0,0 +1,40 @@ +pixels = new HTMLPurifier_AttrDef_HTML_Pixels(); + } + + public function transform($attr, $config, $context) { + if (!isset($attr['type'])) $t = 'text'; + else $t = strtolower($attr['type']); + if (isset($attr['checked']) && $t !== 'radio' && $t !== 'checkbox') { + unset($attr['checked']); + } + if (isset($attr['maxlength']) && $t !== 'text' && $t !== 'password') { + unset($attr['maxlength']); + } + if (isset($attr['size']) && $t !== 'text' && $t !== 'password') { + $result = $this->pixels->validate($attr['size'], $config, $context); + if ($result === false) unset($attr['size']); + else $attr['size'] = $result; + } + if (isset($attr['src']) && $t !== 'image') { + unset($attr['src']); + } + if (!isset($attr['value']) && ($t === 'radio' || $t === 'checkbox')) { + $attr['value'] = ''; + } + return $attr; + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/AttrTransform/Lang.php b/extlib/HTMLPurifier/HTMLPurifier/AttrTransform/Lang.php new file mode 100644 index 0000000000..5869e7f820 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/AttrTransform/Lang.php @@ -0,0 +1,28 @@ +name = $name; + $this->cssName = $css_name ? $css_name : $name; + } + + public function transform($attr, $config, $context) { + if (!isset($attr[$this->name])) return $attr; + $length = $this->confiscateAttr($attr, $this->name); + if(ctype_digit($length)) $length .= 'px'; + $this->prependCSS($attr, $this->cssName . ":$length;"); + return $attr; + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/AttrTransform/Name.php b/extlib/HTMLPurifier/HTMLPurifier/AttrTransform/Name.php new file mode 100644 index 0000000000..15315bc735 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/AttrTransform/Name.php @@ -0,0 +1,21 @@ +get('HTML.Attr.Name.UseCDATA')) return $attr; + if (!isset($attr['name'])) return $attr; + $id = $this->confiscateAttr($attr, 'name'); + if ( isset($attr['id'])) return $attr; + $attr['id'] = $id; + return $attr; + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/AttrTransform/NameSync.php b/extlib/HTMLPurifier/HTMLPurifier/AttrTransform/NameSync.php new file mode 100644 index 0000000000..a95638c140 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/AttrTransform/NameSync.php @@ -0,0 +1,27 @@ +idDef = new HTMLPurifier_AttrDef_HTML_ID(); + } + + public function transform($attr, $config, $context) { + if (!isset($attr['name'])) return $attr; + $name = $attr['name']; + if (isset($attr['id']) && $attr['id'] === $name) return $attr; + $result = $this->idDef->validate($name, $config, $context); + if ($result === false) unset($attr['name']); + else $attr['name'] = $result; + return $attr; + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/AttrTransform/SafeEmbed.php b/extlib/HTMLPurifier/HTMLPurifier/AttrTransform/SafeEmbed.php new file mode 100644 index 0000000000..4da449981f --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/AttrTransform/SafeEmbed.php @@ -0,0 +1,15 @@ +uri = new HTMLPurifier_AttrDef_URI(true); // embedded + } + + public function transform($attr, $config, $context) { + // If we add support for other objects, we'll need to alter the + // transforms. + switch ($attr['name']) { + // application/x-shockwave-flash + // Keep this synchronized with Injector/SafeObject.php + case 'allowScriptAccess': + $attr['value'] = 'never'; + break; + case 'allowNetworking': + $attr['value'] = 'internal'; + break; + case 'wmode': + $attr['value'] = 'window'; + break; + case 'movie': + $attr['value'] = $this->uri->validate($attr['value'], $config, $context); + break; + // add other cases to support other param name/value pairs + default: + $attr['name'] = $attr['value'] = null; + } + return $attr; + } +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/AttrTransform/ScriptRequired.php b/extlib/HTMLPurifier/HTMLPurifier/AttrTransform/ScriptRequired.php new file mode 100644 index 0000000000..4499050a22 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/AttrTransform/ScriptRequired.php @@ -0,0 +1,16 @@ + + */ +class HTMLPurifier_AttrTransform_ScriptRequired extends HTMLPurifier_AttrTransform +{ + public function transform($attr, $config, $context) { + if (!isset($attr['type'])) { + $attr['type'] = 'text/javascript'; + } + return $attr; + } +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/AttrTransform/Textarea.php b/extlib/HTMLPurifier/HTMLPurifier/AttrTransform/Textarea.php new file mode 100644 index 0000000000..81ac3488ba --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/AttrTransform/Textarea.php @@ -0,0 +1,18 @@ + + */ +class HTMLPurifier_AttrTransform_Textarea extends HTMLPurifier_AttrTransform +{ + + public function transform($attr, $config, $context) { + // Calculated from Firefox + if (!isset($attr['cols'])) $attr['cols'] = '22'; + if (!isset($attr['rows'])) $attr['rows'] = '3'; + return $attr; + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/AttrTypes.php b/extlib/HTMLPurifier/HTMLPurifier/AttrTypes.php new file mode 100644 index 0000000000..fc2ea4e588 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/AttrTypes.php @@ -0,0 +1,77 @@ +info['Enum'] = new HTMLPurifier_AttrDef_Enum(); + $this->info['Bool'] = new HTMLPurifier_AttrDef_HTML_Bool(); + + $this->info['CDATA'] = new HTMLPurifier_AttrDef_Text(); + $this->info['ID'] = new HTMLPurifier_AttrDef_HTML_ID(); + $this->info['Length'] = new HTMLPurifier_AttrDef_HTML_Length(); + $this->info['MultiLength'] = new HTMLPurifier_AttrDef_HTML_MultiLength(); + $this->info['NMTOKENS'] = new HTMLPurifier_AttrDef_HTML_Nmtokens(); + $this->info['Pixels'] = new HTMLPurifier_AttrDef_HTML_Pixels(); + $this->info['Text'] = new HTMLPurifier_AttrDef_Text(); + $this->info['URI'] = new HTMLPurifier_AttrDef_URI(); + $this->info['LanguageCode'] = new HTMLPurifier_AttrDef_Lang(); + $this->info['Color'] = new HTMLPurifier_AttrDef_HTML_Color(); + + // unimplemented aliases + $this->info['ContentType'] = new HTMLPurifier_AttrDef_Text(); + $this->info['ContentTypes'] = new HTMLPurifier_AttrDef_Text(); + $this->info['Charsets'] = new HTMLPurifier_AttrDef_Text(); + $this->info['Character'] = new HTMLPurifier_AttrDef_Text(); + + // "proprietary" types + $this->info['Class'] = new HTMLPurifier_AttrDef_HTML_Class(); + + // number is really a positive integer (one or more digits) + // FIXME: ^^ not always, see start and value of list items + $this->info['Number'] = new HTMLPurifier_AttrDef_Integer(false, false, true); + } + + /** + * Retrieves a type + * @param $type String type name + * @return Object AttrDef for type + */ + public function get($type) { + + // determine if there is any extra info tacked on + if (strpos($type, '#') !== false) list($type, $string) = explode('#', $type, 2); + else $string = ''; + + if (!isset($this->info[$type])) { + trigger_error('Cannot retrieve undefined attribute type ' . $type, E_USER_ERROR); + return; + } + + return $this->info[$type]->make($string); + + } + + /** + * Sets a new implementation for a type + * @param $type String type name + * @param $impl Object AttrDef for type + */ + public function set($type, $impl) { + $this->info[$type] = $impl; + } +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/AttrValidator.php b/extlib/HTMLPurifier/HTMLPurifier/AttrValidator.php new file mode 100644 index 0000000000..829a0f8f22 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/AttrValidator.php @@ -0,0 +1,162 @@ +getHTMLDefinition(); + $e =& $context->get('ErrorCollector', true); + + // initialize IDAccumulator if necessary + $ok =& $context->get('IDAccumulator', true); + if (!$ok) { + $id_accumulator = HTMLPurifier_IDAccumulator::build($config, $context); + $context->register('IDAccumulator', $id_accumulator); + } + + // initialize CurrentToken if necessary + $current_token =& $context->get('CurrentToken', true); + if (!$current_token) $context->register('CurrentToken', $token); + + if ( + !$token instanceof HTMLPurifier_Token_Start && + !$token instanceof HTMLPurifier_Token_Empty + ) return $token; + + // create alias to global definition array, see also $defs + // DEFINITION CALL + $d_defs = $definition->info_global_attr; + + // don't update token until the very end, to ensure an atomic update + $attr = $token->attr; + + // do global transformations (pre) + // nothing currently utilizes this + foreach ($definition->info_attr_transform_pre as $transform) { + $attr = $transform->transform($o = $attr, $config, $context); + if ($e) { + if ($attr != $o) $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr); + } + } + + // do local transformations only applicable to this element (pre) + // ex.

to

+ foreach ($definition->info[$token->name]->attr_transform_pre as $transform) { + $attr = $transform->transform($o = $attr, $config, $context); + if ($e) { + if ($attr != $o) $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr); + } + } + + // create alias to this element's attribute definition array, see + // also $d_defs (global attribute definition array) + // DEFINITION CALL + $defs = $definition->info[$token->name]->attr; + + $attr_key = false; + $context->register('CurrentAttr', $attr_key); + + // iterate through all the attribute keypairs + // Watch out for name collisions: $key has previously been used + foreach ($attr as $attr_key => $value) { + + // call the definition + if ( isset($defs[$attr_key]) ) { + // there is a local definition defined + if ($defs[$attr_key] === false) { + // We've explicitly been told not to allow this element. + // This is usually when there's a global definition + // that must be overridden. + // Theoretically speaking, we could have a + // AttrDef_DenyAll, but this is faster! + $result = false; + } else { + // validate according to the element's definition + $result = $defs[$attr_key]->validate( + $value, $config, $context + ); + } + } elseif ( isset($d_defs[$attr_key]) ) { + // there is a global definition defined, validate according + // to the global definition + $result = $d_defs[$attr_key]->validate( + $value, $config, $context + ); + } else { + // system never heard of the attribute? DELETE! + $result = false; + } + + // put the results into effect + if ($result === false || $result === null) { + // this is a generic error message that should replaced + // with more specific ones when possible + if ($e) $e->send(E_ERROR, 'AttrValidator: Attribute removed'); + + // remove the attribute + unset($attr[$attr_key]); + } elseif (is_string($result)) { + // generally, if a substitution is happening, there + // was some sort of implicit correction going on. We'll + // delegate it to the attribute classes to say exactly what. + + // simple substitution + $attr[$attr_key] = $result; + } else { + // nothing happens + } + + // we'd also want slightly more complicated substitution + // involving an array as the return value, + // although we're not sure how colliding attributes would + // resolve (certain ones would be completely overriden, + // others would prepend themselves). + } + + $context->destroy('CurrentAttr'); + + // post transforms + + // global (error reporting untested) + foreach ($definition->info_attr_transform_post as $transform) { + $attr = $transform->transform($o = $attr, $config, $context); + if ($e) { + if ($attr != $o) $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr); + } + } + + // local (error reporting untested) + foreach ($definition->info[$token->name]->attr_transform_post as $transform) { + $attr = $transform->transform($o = $attr, $config, $context); + if ($e) { + if ($attr != $o) $e->send(E_NOTICE, 'AttrValidator: Attributes transformed', $o, $attr); + } + } + + $token->attr = $attr; + + // destroy CurrentToken if we made it ourselves + if (!$current_token) $context->destroy('CurrentToken'); + + } + + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/Bootstrap.php b/extlib/HTMLPurifier/HTMLPurifier/Bootstrap.php new file mode 100644 index 0000000000..559f61a232 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/Bootstrap.php @@ -0,0 +1,98 @@ + +if (!defined('PHP_EOL')) { + switch (strtoupper(substr(PHP_OS, 0, 3))) { + case 'WIN': + define('PHP_EOL', "\r\n"); + break; + case 'DAR': + define('PHP_EOL', "\r"); + break; + default: + define('PHP_EOL', "\n"); + } +} + +/** + * Bootstrap class that contains meta-functionality for HTML Purifier such as + * the autoload function. + * + * @note + * This class may be used without any other files from HTML Purifier. + */ +class HTMLPurifier_Bootstrap +{ + + /** + * Autoload function for HTML Purifier + * @param $class Class to load + */ + public static function autoload($class) { + $file = HTMLPurifier_Bootstrap::getPath($class); + if (!$file) return false; + require HTMLPURIFIER_PREFIX . '/' . $file; + return true; + } + + /** + * Returns the path for a specific class. + */ + public static function getPath($class) { + if (strncmp('HTMLPurifier', $class, 12) !== 0) return false; + // Custom implementations + if (strncmp('HTMLPurifier_Language_', $class, 22) === 0) { + $code = str_replace('_', '-', substr($class, 22)); + $file = 'HTMLPurifier/Language/classes/' . $code . '.php'; + } else { + $file = str_replace('_', '/', $class) . '.php'; + } + if (!file_exists(HTMLPURIFIER_PREFIX . '/' . $file)) return false; + return $file; + } + + /** + * "Pre-registers" our autoloader on the SPL stack. + */ + public static function registerAutoload() { + $autoload = array('HTMLPurifier_Bootstrap', 'autoload'); + if ( ($funcs = spl_autoload_functions()) === false ) { + spl_autoload_register($autoload); + } elseif (function_exists('spl_autoload_unregister')) { + $compat = version_compare(PHP_VERSION, '5.1.2', '<=') && + version_compare(PHP_VERSION, '5.1.0', '>='); + foreach ($funcs as $func) { + if (is_array($func)) { + // :TRICKY: There are some compatibility issues and some + // places where we need to error out + $reflector = new ReflectionMethod($func[0], $func[1]); + if (!$reflector->isStatic()) { + throw new Exception(' + HTML Purifier autoloader registrar is not compatible + with non-static object methods due to PHP Bug #44144; + Please do not use HTMLPurifier.autoload.php (or any + file that includes this file); instead, place the code: + spl_autoload_register(array(\'HTMLPurifier_Bootstrap\', \'autoload\')) + after your own autoloaders. + '); + } + // Suprisingly, spl_autoload_register supports the + // Class::staticMethod callback format, although call_user_func doesn't + if ($compat) $func = implode('::', $func); + } + spl_autoload_unregister($func); + } + spl_autoload_register($autoload); + foreach ($funcs as $func) spl_autoload_register($func); + } + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/CSSDefinition.php b/extlib/HTMLPurifier/HTMLPurifier/CSSDefinition.php new file mode 100644 index 0000000000..6a2e6f56d9 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/CSSDefinition.php @@ -0,0 +1,292 @@ +info['text-align'] = new HTMLPurifier_AttrDef_Enum( + array('left', 'right', 'center', 'justify'), false); + + $border_style = + $this->info['border-bottom-style'] = + $this->info['border-right-style'] = + $this->info['border-left-style'] = + $this->info['border-top-style'] = new HTMLPurifier_AttrDef_Enum( + array('none', 'hidden', 'dotted', 'dashed', 'solid', 'double', + 'groove', 'ridge', 'inset', 'outset'), false); + + $this->info['border-style'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_style); + + $this->info['clear'] = new HTMLPurifier_AttrDef_Enum( + array('none', 'left', 'right', 'both'), false); + $this->info['float'] = new HTMLPurifier_AttrDef_Enum( + array('none', 'left', 'right'), false); + $this->info['font-style'] = new HTMLPurifier_AttrDef_Enum( + array('normal', 'italic', 'oblique'), false); + $this->info['font-variant'] = new HTMLPurifier_AttrDef_Enum( + array('normal', 'small-caps'), false); + + $uri_or_none = new HTMLPurifier_AttrDef_CSS_Composite( + array( + new HTMLPurifier_AttrDef_Enum(array('none')), + new HTMLPurifier_AttrDef_CSS_URI() + ) + ); + + $this->info['list-style-position'] = new HTMLPurifier_AttrDef_Enum( + array('inside', 'outside'), false); + $this->info['list-style-type'] = new HTMLPurifier_AttrDef_Enum( + array('disc', 'circle', 'square', 'decimal', 'lower-roman', + 'upper-roman', 'lower-alpha', 'upper-alpha', 'none'), false); + $this->info['list-style-image'] = $uri_or_none; + + $this->info['list-style'] = new HTMLPurifier_AttrDef_CSS_ListStyle($config); + + $this->info['text-transform'] = new HTMLPurifier_AttrDef_Enum( + array('capitalize', 'uppercase', 'lowercase', 'none'), false); + $this->info['color'] = new HTMLPurifier_AttrDef_CSS_Color(); + + $this->info['background-image'] = $uri_or_none; + $this->info['background-repeat'] = new HTMLPurifier_AttrDef_Enum( + array('repeat', 'repeat-x', 'repeat-y', 'no-repeat') + ); + $this->info['background-attachment'] = new HTMLPurifier_AttrDef_Enum( + array('scroll', 'fixed') + ); + $this->info['background-position'] = new HTMLPurifier_AttrDef_CSS_BackgroundPosition(); + + $border_color = + $this->info['border-top-color'] = + $this->info['border-bottom-color'] = + $this->info['border-left-color'] = + $this->info['border-right-color'] = + $this->info['background-color'] = new HTMLPurifier_AttrDef_CSS_Composite(array( + new HTMLPurifier_AttrDef_Enum(array('transparent')), + new HTMLPurifier_AttrDef_CSS_Color() + )); + + $this->info['background'] = new HTMLPurifier_AttrDef_CSS_Background($config); + + $this->info['border-color'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_color); + + $border_width = + $this->info['border-top-width'] = + $this->info['border-bottom-width'] = + $this->info['border-left-width'] = + $this->info['border-right-width'] = new HTMLPurifier_AttrDef_CSS_Composite(array( + new HTMLPurifier_AttrDef_Enum(array('thin', 'medium', 'thick')), + new HTMLPurifier_AttrDef_CSS_Length('0') //disallow negative + )); + + $this->info['border-width'] = new HTMLPurifier_AttrDef_CSS_Multiple($border_width); + + $this->info['letter-spacing'] = new HTMLPurifier_AttrDef_CSS_Composite(array( + new HTMLPurifier_AttrDef_Enum(array('normal')), + new HTMLPurifier_AttrDef_CSS_Length() + )); + + $this->info['word-spacing'] = new HTMLPurifier_AttrDef_CSS_Composite(array( + new HTMLPurifier_AttrDef_Enum(array('normal')), + new HTMLPurifier_AttrDef_CSS_Length() + )); + + $this->info['font-size'] = new HTMLPurifier_AttrDef_CSS_Composite(array( + new HTMLPurifier_AttrDef_Enum(array('xx-small', 'x-small', + 'small', 'medium', 'large', 'x-large', 'xx-large', + 'larger', 'smaller')), + new HTMLPurifier_AttrDef_CSS_Percentage(), + new HTMLPurifier_AttrDef_CSS_Length() + )); + + $this->info['line-height'] = new HTMLPurifier_AttrDef_CSS_Composite(array( + new HTMLPurifier_AttrDef_Enum(array('normal')), + new HTMLPurifier_AttrDef_CSS_Number(true), // no negatives + new HTMLPurifier_AttrDef_CSS_Length('0'), + new HTMLPurifier_AttrDef_CSS_Percentage(true) + )); + + $margin = + $this->info['margin-top'] = + $this->info['margin-bottom'] = + $this->info['margin-left'] = + $this->info['margin-right'] = new HTMLPurifier_AttrDef_CSS_Composite(array( + new HTMLPurifier_AttrDef_CSS_Length(), + new HTMLPurifier_AttrDef_CSS_Percentage(), + new HTMLPurifier_AttrDef_Enum(array('auto')) + )); + + $this->info['margin'] = new HTMLPurifier_AttrDef_CSS_Multiple($margin); + + // non-negative + $padding = + $this->info['padding-top'] = + $this->info['padding-bottom'] = + $this->info['padding-left'] = + $this->info['padding-right'] = new HTMLPurifier_AttrDef_CSS_Composite(array( + new HTMLPurifier_AttrDef_CSS_Length('0'), + new HTMLPurifier_AttrDef_CSS_Percentage(true) + )); + + $this->info['padding'] = new HTMLPurifier_AttrDef_CSS_Multiple($padding); + + $this->info['text-indent'] = new HTMLPurifier_AttrDef_CSS_Composite(array( + new HTMLPurifier_AttrDef_CSS_Length(), + new HTMLPurifier_AttrDef_CSS_Percentage() + )); + + $trusted_wh = new HTMLPurifier_AttrDef_CSS_Composite(array( + new HTMLPurifier_AttrDef_CSS_Length('0'), + new HTMLPurifier_AttrDef_CSS_Percentage(true), + new HTMLPurifier_AttrDef_Enum(array('auto')) + )); + $max = $config->get('CSS.MaxImgLength'); + + $this->info['width'] = + $this->info['height'] = + $max === null ? + $trusted_wh : + new HTMLPurifier_AttrDef_Switch('img', + // For img tags: + new HTMLPurifier_AttrDef_CSS_Composite(array( + new HTMLPurifier_AttrDef_CSS_Length('0', $max), + new HTMLPurifier_AttrDef_Enum(array('auto')) + )), + // For everyone else: + $trusted_wh + ); + + $this->info['text-decoration'] = new HTMLPurifier_AttrDef_CSS_TextDecoration(); + + $this->info['font-family'] = new HTMLPurifier_AttrDef_CSS_FontFamily(); + + // this could use specialized code + $this->info['font-weight'] = new HTMLPurifier_AttrDef_Enum( + array('normal', 'bold', 'bolder', 'lighter', '100', '200', '300', + '400', '500', '600', '700', '800', '900'), false); + + // MUST be called after other font properties, as it references + // a CSSDefinition object + $this->info['font'] = new HTMLPurifier_AttrDef_CSS_Font($config); + + // same here + $this->info['border'] = + $this->info['border-bottom'] = + $this->info['border-top'] = + $this->info['border-left'] = + $this->info['border-right'] = new HTMLPurifier_AttrDef_CSS_Border($config); + + $this->info['border-collapse'] = new HTMLPurifier_AttrDef_Enum(array( + 'collapse', 'separate')); + + $this->info['caption-side'] = new HTMLPurifier_AttrDef_Enum(array( + 'top', 'bottom')); + + $this->info['table-layout'] = new HTMLPurifier_AttrDef_Enum(array( + 'auto', 'fixed')); + + $this->info['vertical-align'] = new HTMLPurifier_AttrDef_CSS_Composite(array( + new HTMLPurifier_AttrDef_Enum(array('baseline', 'sub', 'super', + 'top', 'text-top', 'middle', 'bottom', 'text-bottom')), + new HTMLPurifier_AttrDef_CSS_Length(), + new HTMLPurifier_AttrDef_CSS_Percentage() + )); + + $this->info['border-spacing'] = new HTMLPurifier_AttrDef_CSS_Multiple(new HTMLPurifier_AttrDef_CSS_Length(), 2); + + // partial support + $this->info['white-space'] = new HTMLPurifier_AttrDef_Enum(array('nowrap')); + + if ($config->get('CSS.Proprietary')) { + $this->doSetupProprietary($config); + } + + if ($config->get('CSS.AllowTricky')) { + $this->doSetupTricky($config); + } + + $allow_important = $config->get('CSS.AllowImportant'); + // wrap all attr-defs with decorator that handles !important + foreach ($this->info as $k => $v) { + $this->info[$k] = new HTMLPurifier_AttrDef_CSS_ImportantDecorator($v, $allow_important); + } + + $this->setupConfigStuff($config); + } + + protected function doSetupProprietary($config) { + // Internet Explorer only scrollbar colors + $this->info['scrollbar-arrow-color'] = new HTMLPurifier_AttrDef_CSS_Color(); + $this->info['scrollbar-base-color'] = new HTMLPurifier_AttrDef_CSS_Color(); + $this->info['scrollbar-darkshadow-color'] = new HTMLPurifier_AttrDef_CSS_Color(); + $this->info['scrollbar-face-color'] = new HTMLPurifier_AttrDef_CSS_Color(); + $this->info['scrollbar-highlight-color'] = new HTMLPurifier_AttrDef_CSS_Color(); + $this->info['scrollbar-shadow-color'] = new HTMLPurifier_AttrDef_CSS_Color(); + + // technically not proprietary, but CSS3, and no one supports it + $this->info['opacity'] = new HTMLPurifier_AttrDef_CSS_AlphaValue(); + $this->info['-moz-opacity'] = new HTMLPurifier_AttrDef_CSS_AlphaValue(); + $this->info['-khtml-opacity'] = new HTMLPurifier_AttrDef_CSS_AlphaValue(); + + // only opacity, for now + $this->info['filter'] = new HTMLPurifier_AttrDef_CSS_Filter(); + + } + + protected function doSetupTricky($config) { + $this->info['display'] = new HTMLPurifier_AttrDef_Enum(array( + 'inline', 'block', 'list-item', 'run-in', 'compact', + 'marker', 'table', 'inline-table', 'table-row-group', + 'table-header-group', 'table-footer-group', 'table-row', + 'table-column-group', 'table-column', 'table-cell', 'table-caption', 'none' + )); + $this->info['visibility'] = new HTMLPurifier_AttrDef_Enum(array( + 'visible', 'hidden', 'collapse' + )); + $this->info['overflow'] = new HTMLPurifier_AttrDef_Enum(array('visible', 'hidden', 'auto', 'scroll')); + } + + + /** + * Performs extra config-based processing. Based off of + * HTMLPurifier_HTMLDefinition. + * @todo Refactor duplicate elements into common class (probably using + * composition, not inheritance). + */ + protected function setupConfigStuff($config) { + + // setup allowed elements + $support = "(for information on implementing this, see the ". + "support forums) "; + $allowed_attributes = $config->get('CSS.AllowedProperties'); + if ($allowed_attributes !== null) { + foreach ($this->info as $name => $d) { + if(!isset($allowed_attributes[$name])) unset($this->info[$name]); + unset($allowed_attributes[$name]); + } + // emit errors + foreach ($allowed_attributes as $name => $d) { + // :TODO: Is this htmlspecialchars() call really necessary? + $name = htmlspecialchars($name); + trigger_error("Style attribute '$name' is not supported $support", E_USER_WARNING); + } + } + + } +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ChildDef.php b/extlib/HTMLPurifier/HTMLPurifier/ChildDef.php new file mode 100644 index 0000000000..c5d5216dab --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ChildDef.php @@ -0,0 +1,48 @@ +elements; + } + + /** + * Validates nodes according to definition and returns modification. + * + * @param $tokens_of_children Array of HTMLPurifier_Token + * @param $config HTMLPurifier_Config object + * @param $context HTMLPurifier_Context object + * @return bool true to leave nodes as is + * @return bool false to remove parent node + * @return array of replacement child tokens + */ + abstract public function validateChildren($tokens_of_children, $config, $context); +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ChildDef/Chameleon.php b/extlib/HTMLPurifier/HTMLPurifier/ChildDef/Chameleon.php new file mode 100644 index 0000000000..15c364ee33 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ChildDef/Chameleon.php @@ -0,0 +1,48 @@ +inline = new HTMLPurifier_ChildDef_Optional($inline); + $this->block = new HTMLPurifier_ChildDef_Optional($block); + $this->elements = $this->block->elements; + } + + public function validateChildren($tokens_of_children, $config, $context) { + if ($context->get('IsInline') === false) { + return $this->block->validateChildren( + $tokens_of_children, $config, $context); + } else { + return $this->inline->validateChildren( + $tokens_of_children, $config, $context); + } + } +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ChildDef/Custom.php b/extlib/HTMLPurifier/HTMLPurifier/ChildDef/Custom.php new file mode 100644 index 0000000000..b68047b4b5 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ChildDef/Custom.php @@ -0,0 +1,90 @@ +dtd_regex = $dtd_regex; + $this->_compileRegex(); + } + /** + * Compiles the PCRE regex from a DTD regex ($dtd_regex to $_pcre_regex) + */ + protected function _compileRegex() { + $raw = str_replace(' ', '', $this->dtd_regex); + if ($raw{0} != '(') { + $raw = "($raw)"; + } + $el = '[#a-zA-Z0-9_.-]+'; + $reg = $raw; + + // COMPLICATED! AND MIGHT BE BUGGY! I HAVE NO CLUE WHAT I'M + // DOING! Seriously: if there's problems, please report them. + + // collect all elements into the $elements array + preg_match_all("/$el/", $reg, $matches); + foreach ($matches[0] as $match) { + $this->elements[$match] = true; + } + + // setup all elements as parentheticals with leading commas + $reg = preg_replace("/$el/", '(,\\0)', $reg); + + // remove commas when they were not solicited + $reg = preg_replace("/([^,(|]\(+),/", '\\1', $reg); + + // remove all non-paranthetical commas: they are handled by first regex + $reg = preg_replace("/,\(/", '(', $reg); + + $this->_pcre_regex = $reg; + } + public function validateChildren($tokens_of_children, $config, $context) { + $list_of_children = ''; + $nesting = 0; // depth into the nest + foreach ($tokens_of_children as $token) { + if (!empty($token->is_whitespace)) continue; + + $is_child = ($nesting == 0); // direct + + if ($token instanceof HTMLPurifier_Token_Start) { + $nesting++; + } elseif ($token instanceof HTMLPurifier_Token_End) { + $nesting--; + } + + if ($is_child) { + $list_of_children .= $token->name . ','; + } + } + // add leading comma to deal with stray comma declarations + $list_of_children = ',' . rtrim($list_of_children, ','); + $okay = + preg_match( + '/^,?'.$this->_pcre_regex.'$/', + $list_of_children + ); + + return (bool) $okay; + } +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ChildDef/Empty.php b/extlib/HTMLPurifier/HTMLPurifier/ChildDef/Empty.php new file mode 100644 index 0000000000..13171f6651 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ChildDef/Empty.php @@ -0,0 +1,20 @@ +whitespace) return $tokens_of_children; + else return array(); + } + return $result; + } +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ChildDef/Required.php b/extlib/HTMLPurifier/HTMLPurifier/ChildDef/Required.php new file mode 100644 index 0000000000..4889f249b8 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ChildDef/Required.php @@ -0,0 +1,117 @@ + $x) { + $elements[$i] = true; + if (empty($i)) unset($elements[$i]); // remove blank + } + } + $this->elements = $elements; + } + public $allow_empty = false; + public $type = 'required'; + public function validateChildren($tokens_of_children, $config, $context) { + // Flag for subclasses + $this->whitespace = false; + + // if there are no tokens, delete parent node + if (empty($tokens_of_children)) return false; + + // the new set of children + $result = array(); + + // current depth into the nest + $nesting = 0; + + // whether or not we're deleting a node + $is_deleting = false; + + // whether or not parsed character data is allowed + // this controls whether or not we silently drop a tag + // or generate escaped HTML from it + $pcdata_allowed = isset($this->elements['#PCDATA']); + + // a little sanity check to make sure it's not ALL whitespace + $all_whitespace = true; + + // some configuration + $escape_invalid_children = $config->get('Core.EscapeInvalidChildren'); + + // generator + $gen = new HTMLPurifier_Generator($config, $context); + + foreach ($tokens_of_children as $token) { + if (!empty($token->is_whitespace)) { + $result[] = $token; + continue; + } + $all_whitespace = false; // phew, we're not talking about whitespace + + $is_child = ($nesting == 0); + + if ($token instanceof HTMLPurifier_Token_Start) { + $nesting++; + } elseif ($token instanceof HTMLPurifier_Token_End) { + $nesting--; + } + + if ($is_child) { + $is_deleting = false; + if (!isset($this->elements[$token->name])) { + $is_deleting = true; + if ($pcdata_allowed && $token instanceof HTMLPurifier_Token_Text) { + $result[] = $token; + } elseif ($pcdata_allowed && $escape_invalid_children) { + $result[] = new HTMLPurifier_Token_Text( + $gen->generateFromToken($token) + ); + } + continue; + } + } + if (!$is_deleting || ($pcdata_allowed && $token instanceof HTMLPurifier_Token_Text)) { + $result[] = $token; + } elseif ($pcdata_allowed && $escape_invalid_children) { + $result[] = + new HTMLPurifier_Token_Text( + $gen->generateFromToken($token) + ); + } else { + // drop silently + } + } + if (empty($result)) return false; + if ($all_whitespace) { + $this->whitespace = true; + return false; + } + if ($tokens_of_children == $result) return true; + return $result; + } +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ChildDef/StrictBlockquote.php b/extlib/HTMLPurifier/HTMLPurifier/ChildDef/StrictBlockquote.php new file mode 100644 index 0000000000..dfae8a6e5e --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ChildDef/StrictBlockquote.php @@ -0,0 +1,88 @@ +init($config); + return $this->fake_elements; + } + + public function validateChildren($tokens_of_children, $config, $context) { + + $this->init($config); + + // trick the parent class into thinking it allows more + $this->elements = $this->fake_elements; + $result = parent::validateChildren($tokens_of_children, $config, $context); + $this->elements = $this->real_elements; + + if ($result === false) return array(); + if ($result === true) $result = $tokens_of_children; + + $def = $config->getHTMLDefinition(); + $block_wrap_start = new HTMLPurifier_Token_Start($def->info_block_wrapper); + $block_wrap_end = new HTMLPurifier_Token_End( $def->info_block_wrapper); + $is_inline = false; + $depth = 0; + $ret = array(); + + // assuming that there are no comment tokens + foreach ($result as $i => $token) { + $token = $result[$i]; + // ifs are nested for readability + if (!$is_inline) { + if (!$depth) { + if ( + ($token instanceof HTMLPurifier_Token_Text && !$token->is_whitespace) || + (!$token instanceof HTMLPurifier_Token_Text && !isset($this->elements[$token->name])) + ) { + $is_inline = true; + $ret[] = $block_wrap_start; + } + } + } else { + if (!$depth) { + // starting tokens have been inline text / empty + if ($token instanceof HTMLPurifier_Token_Start || $token instanceof HTMLPurifier_Token_Empty) { + if (isset($this->elements[$token->name])) { + // ended + $ret[] = $block_wrap_end; + $is_inline = false; + } + } + } + } + $ret[] = $token; + if ($token instanceof HTMLPurifier_Token_Start) $depth++; + if ($token instanceof HTMLPurifier_Token_End) $depth--; + } + if ($is_inline) $ret[] = $block_wrap_end; + return $ret; + } + + private function init($config) { + if (!$this->init) { + $def = $config->getHTMLDefinition(); + // allow all inline elements + $this->real_elements = $this->elements; + $this->fake_elements = $def->info_content_sets['Flow']; + $this->fake_elements['#PCDATA'] = true; + $this->init = true; + } + } +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ChildDef/Table.php b/extlib/HTMLPurifier/HTMLPurifier/ChildDef/Table.php new file mode 100644 index 0000000000..34f0227dd2 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ChildDef/Table.php @@ -0,0 +1,142 @@ + true, 'tbody' => true, 'thead' => true, + 'tfoot' => true, 'caption' => true, 'colgroup' => true, 'col' => true); + public function __construct() {} + public function validateChildren($tokens_of_children, $config, $context) { + if (empty($tokens_of_children)) return false; + + // this ensures that the loop gets run one last time before closing + // up. It's a little bit of a hack, but it works! Just make sure you + // get rid of the token later. + $tokens_of_children[] = false; + + // only one of these elements is allowed in a table + $caption = false; + $thead = false; + $tfoot = false; + + // as many of these as you want + $cols = array(); + $content = array(); + + $nesting = 0; // current depth so we can determine nodes + $is_collecting = false; // are we globbing together tokens to package + // into one of the collectors? + $collection = array(); // collected nodes + $tag_index = 0; // the first node might be whitespace, + // so this tells us where the start tag is + + foreach ($tokens_of_children as $token) { + $is_child = ($nesting == 0); + + if ($token === false) { + // terminating sequence started + } elseif ($token instanceof HTMLPurifier_Token_Start) { + $nesting++; + } elseif ($token instanceof HTMLPurifier_Token_End) { + $nesting--; + } + + // handle node collection + if ($is_collecting) { + if ($is_child) { + // okay, let's stash the tokens away + // first token tells us the type of the collection + switch ($collection[$tag_index]->name) { + case 'tr': + case 'tbody': + $content[] = $collection; + break; + case 'caption': + if ($caption !== false) break; + $caption = $collection; + break; + case 'thead': + case 'tfoot': + // access the appropriate variable, $thead or $tfoot + $var = $collection[$tag_index]->name; + if ($$var === false) { + $$var = $collection; + } else { + // transmutate the first and less entries into + // tbody tags, and then put into content + $collection[$tag_index]->name = 'tbody'; + $collection[count($collection)-1]->name = 'tbody'; + $content[] = $collection; + } + break; + case 'colgroup': + $cols[] = $collection; + break; + } + $collection = array(); + $is_collecting = false; + $tag_index = 0; + } else { + // add the node to the collection + $collection[] = $token; + } + } + + // terminate + if ($token === false) break; + + if ($is_child) { + // determine what we're dealing with + if ($token->name == 'col') { + // the only empty tag in the possie, we can handle it + // immediately + $cols[] = array_merge($collection, array($token)); + $collection = array(); + $tag_index = 0; + continue; + } + switch($token->name) { + case 'caption': + case 'colgroup': + case 'thead': + case 'tfoot': + case 'tbody': + case 'tr': + $is_collecting = true; + $collection[] = $token; + continue; + default: + if (!empty($token->is_whitespace)) { + $collection[] = $token; + $tag_index++; + } + continue; + } + } + } + + if (empty($content)) return false; + + $ret = array(); + if ($caption !== false) $ret = array_merge($ret, $caption); + if ($cols !== false) foreach ($cols as $token_array) $ret = array_merge($ret, $token_array); + if ($thead !== false) $ret = array_merge($ret, $thead); + if ($tfoot !== false) $ret = array_merge($ret, $tfoot); + foreach ($content as $token_array) $ret = array_merge($ret, $token_array); + if (!empty($collection) && $is_collecting == false){ + // grab the trailing space + $ret = array_merge($ret, $collection); + } + + array_pop($tokens_of_children); // remove phantom token + + return ($ret === $tokens_of_children) ? true : $ret; + + } +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/Config.php b/extlib/HTMLPurifier/HTMLPurifier/Config.php new file mode 100644 index 0000000000..a01706043a --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/Config.php @@ -0,0 +1,580 @@ +defaultPlist; + $this->plist = new HTMLPurifier_PropertyList($parent); + $this->def = $definition; // keep a copy around for checking + $this->parser = new HTMLPurifier_VarParser_Flexible(); + } + + /** + * Convenience constructor that creates a config object based on a mixed var + * @param mixed $config Variable that defines the state of the config + * object. Can be: a HTMLPurifier_Config() object, + * an array of directives based on loadArray(), + * or a string filename of an ini file. + * @param HTMLPurifier_ConfigSchema Schema object + * @return Configured HTMLPurifier_Config object + */ + public static function create($config, $schema = null) { + if ($config instanceof HTMLPurifier_Config) { + // pass-through + return $config; + } + if (!$schema) { + $ret = HTMLPurifier_Config::createDefault(); + } else { + $ret = new HTMLPurifier_Config($schema); + } + if (is_string($config)) $ret->loadIni($config); + elseif (is_array($config)) $ret->loadArray($config); + return $ret; + } + + /** + * Creates a new config object that inherits from a previous one. + * @param HTMLPurifier_Config $config Configuration object to inherit + * from. + * @return HTMLPurifier_Config object with $config as its parent. + */ + public static function inherit(HTMLPurifier_Config $config) { + return new HTMLPurifier_Config($config->def, $config->plist); + } + + /** + * Convenience constructor that creates a default configuration object. + * @return Default HTMLPurifier_Config object. + */ + public static function createDefault() { + $definition = HTMLPurifier_ConfigSchema::instance(); + $config = new HTMLPurifier_Config($definition); + return $config; + } + + /** + * Retreives a value from the configuration. + * @param $key String key + */ + public function get($key, $a = null) { + if ($a !== null) { + $this->triggerError("Using deprecated API: use \$config->get('$key.$a') instead", E_USER_WARNING); + $key = "$key.$a"; + } + if (!$this->finalized) $this->autoFinalize(); + if (!isset($this->def->info[$key])) { + // can't add % due to SimpleTest bug + $this->triggerError('Cannot retrieve value of undefined directive ' . htmlspecialchars($key), + E_USER_WARNING); + return; + } + if (isset($this->def->info[$key]->isAlias)) { + $d = $this->def->info[$key]; + $this->triggerError('Cannot get value from aliased directive, use real name ' . $d->key, + E_USER_ERROR); + return; + } + if ($this->lock) { + list($ns) = explode('.', $key); + if ($ns !== $this->lock) { + $this->triggerError('Cannot get value of namespace ' . $ns . ' when lock for ' . $this->lock . ' is active, this probably indicates a Definition setup method is accessing directives that are not within its namespace', E_USER_ERROR); + return; + } + } + return $this->plist->get($key); + } + + /** + * Retreives an array of directives to values from a given namespace + * @param $namespace String namespace + */ + public function getBatch($namespace) { + if (!$this->finalized) $this->autoFinalize(); + $full = $this->getAll(); + if (!isset($full[$namespace])) { + $this->triggerError('Cannot retrieve undefined namespace ' . htmlspecialchars($namespace), + E_USER_WARNING); + return; + } + return $full[$namespace]; + } + + /** + * Returns a md5 signature of a segment of the configuration object + * that uniquely identifies that particular configuration + * @note Revision is handled specially and is removed from the batch + * before processing! + * @param $namespace Namespace to get serial for + */ + public function getBatchSerial($namespace) { + if (empty($this->serials[$namespace])) { + $batch = $this->getBatch($namespace); + unset($batch['DefinitionRev']); + $this->serials[$namespace] = md5(serialize($batch)); + } + return $this->serials[$namespace]; + } + + /** + * Returns a md5 signature for the entire configuration object + * that uniquely identifies that particular configuration + */ + public function getSerial() { + if (empty($this->serial)) { + $this->serial = md5(serialize($this->getAll())); + } + return $this->serial; + } + + /** + * Retrieves all directives, organized by namespace + * @warning This is a pretty inefficient function, avoid if you can + */ + public function getAll() { + if (!$this->finalized) $this->autoFinalize(); + $ret = array(); + foreach ($this->plist->squash() as $name => $value) { + list($ns, $key) = explode('.', $name, 2); + $ret[$ns][$key] = $value; + } + return $ret; + } + + /** + * Sets a value to configuration. + * @param $key String key + * @param $value Mixed value + */ + public function set($key, $value, $a = null) { + if (strpos($key, '.') === false) { + $namespace = $key; + $directive = $value; + $value = $a; + $key = "$key.$directive"; + $this->triggerError("Using deprecated API: use \$config->set('$key', ...) instead", E_USER_NOTICE); + } else { + list($namespace) = explode('.', $key); + } + if ($this->isFinalized('Cannot set directive after finalization')) return; + if (!isset($this->def->info[$key])) { + $this->triggerError('Cannot set undefined directive ' . htmlspecialchars($key) . ' to value', + E_USER_WARNING); + return; + } + $def = $this->def->info[$key]; + + if (isset($def->isAlias)) { + if ($this->aliasMode) { + $this->triggerError('Double-aliases not allowed, please fix '. + 'ConfigSchema bug with' . $key, E_USER_ERROR); + return; + } + $this->aliasMode = true; + $this->set($def->key, $value); + $this->aliasMode = false; + $this->triggerError("$key is an alias, preferred directive name is {$def->key}", E_USER_NOTICE); + return; + } + + // Raw type might be negative when using the fully optimized form + // of stdclass, which indicates allow_null == true + $rtype = is_int($def) ? $def : $def->type; + if ($rtype < 0) { + $type = -$rtype; + $allow_null = true; + } else { + $type = $rtype; + $allow_null = isset($def->allow_null); + } + + try { + $value = $this->parser->parse($value, $type, $allow_null); + } catch (HTMLPurifier_VarParserException $e) { + $this->triggerError('Value for ' . $key . ' is of invalid type, should be ' . HTMLPurifier_VarParser::getTypeName($type), E_USER_WARNING); + return; + } + if (is_string($value) && is_object($def)) { + // resolve value alias if defined + if (isset($def->aliases[$value])) { + $value = $def->aliases[$value]; + } + // check to see if the value is allowed + if (isset($def->allowed) && !isset($def->allowed[$value])) { + $this->triggerError('Value not supported, valid values are: ' . + $this->_listify($def->allowed), E_USER_WARNING); + return; + } + } + $this->plist->set($key, $value); + + // reset definitions if the directives they depend on changed + // this is a very costly process, so it's discouraged + // with finalization + if ($namespace == 'HTML' || $namespace == 'CSS' || $namespace == 'URI') { + $this->definitions[$namespace] = null; + } + + $this->serials[$namespace] = false; + } + + /** + * Convenience function for error reporting + */ + private function _listify($lookup) { + $list = array(); + foreach ($lookup as $name => $b) $list[] = $name; + return implode(', ', $list); + } + + /** + * Retrieves object reference to the HTML definition. + * @param $raw Return a copy that has not been setup yet. Must be + * called before it's been setup, otherwise won't work. + */ + public function getHTMLDefinition($raw = false) { + return $this->getDefinition('HTML', $raw); + } + + /** + * Retrieves object reference to the CSS definition + * @param $raw Return a copy that has not been setup yet. Must be + * called before it's been setup, otherwise won't work. + */ + public function getCSSDefinition($raw = false) { + return $this->getDefinition('CSS', $raw); + } + + /** + * Retrieves a definition + * @param $type Type of definition: HTML, CSS, etc + * @param $raw Whether or not definition should be returned raw + */ + public function getDefinition($type, $raw = false) { + if (!$this->finalized) $this->autoFinalize(); + // temporarily suspend locks, so we can handle recursive definition calls + $lock = $this->lock; + $this->lock = null; + $factory = HTMLPurifier_DefinitionCacheFactory::instance(); + $cache = $factory->create($type, $this); + $this->lock = $lock; + if (!$raw) { + // see if we can quickly supply a definition + if (!empty($this->definitions[$type])) { + if (!$this->definitions[$type]->setup) { + $this->definitions[$type]->setup($this); + $cache->set($this->definitions[$type], $this); + } + return $this->definitions[$type]; + } + // memory check missed, try cache + $this->definitions[$type] = $cache->get($this); + if ($this->definitions[$type]) { + // definition in cache, return it + return $this->definitions[$type]; + } + } elseif ( + !empty($this->definitions[$type]) && + !$this->definitions[$type]->setup + ) { + // raw requested, raw in memory, quick return + return $this->definitions[$type]; + } + // quick checks failed, let's create the object + if ($type == 'HTML') { + $this->definitions[$type] = new HTMLPurifier_HTMLDefinition(); + } elseif ($type == 'CSS') { + $this->definitions[$type] = new HTMLPurifier_CSSDefinition(); + } elseif ($type == 'URI') { + $this->definitions[$type] = new HTMLPurifier_URIDefinition(); + } else { + throw new HTMLPurifier_Exception("Definition of $type type not supported"); + } + // quick abort if raw + if ($raw) { + if (is_null($this->get($type . '.DefinitionID'))) { + // fatally error out if definition ID not set + throw new HTMLPurifier_Exception("Cannot retrieve raw version without specifying %$type.DefinitionID"); + } + return $this->definitions[$type]; + } + // set it up + $this->lock = $type; + $this->definitions[$type]->setup($this); + $this->lock = null; + // save in cache + $cache->set($this->definitions[$type], $this); + return $this->definitions[$type]; + } + + /** + * Loads configuration values from an array with the following structure: + * Namespace.Directive => Value + * @param $config_array Configuration associative array + */ + public function loadArray($config_array) { + if ($this->isFinalized('Cannot load directives after finalization')) return; + foreach ($config_array as $key => $value) { + $key = str_replace('_', '.', $key); + if (strpos($key, '.') !== false) { + $this->set($key, $value); + } else { + $namespace = $key; + $namespace_values = $value; + foreach ($namespace_values as $directive => $value) { + $this->set($namespace .'.'. $directive, $value); + } + } + } + } + + /** + * Returns a list of array(namespace, directive) for all directives + * that are allowed in a web-form context as per an allowed + * namespaces/directives list. + * @param $allowed List of allowed namespaces/directives + */ + public static function getAllowedDirectivesForForm($allowed, $schema = null) { + if (!$schema) { + $schema = HTMLPurifier_ConfigSchema::instance(); + } + if ($allowed !== true) { + if (is_string($allowed)) $allowed = array($allowed); + $allowed_ns = array(); + $allowed_directives = array(); + $blacklisted_directives = array(); + foreach ($allowed as $ns_or_directive) { + if (strpos($ns_or_directive, '.') !== false) { + // directive + if ($ns_or_directive[0] == '-') { + $blacklisted_directives[substr($ns_or_directive, 1)] = true; + } else { + $allowed_directives[$ns_or_directive] = true; + } + } else { + // namespace + $allowed_ns[$ns_or_directive] = true; + } + } + } + $ret = array(); + foreach ($schema->info as $key => $def) { + list($ns, $directive) = explode('.', $key, 2); + if ($allowed !== true) { + if (isset($blacklisted_directives["$ns.$directive"])) continue; + if (!isset($allowed_directives["$ns.$directive"]) && !isset($allowed_ns[$ns])) continue; + } + if (isset($def->isAlias)) continue; + if ($directive == 'DefinitionID' || $directive == 'DefinitionRev') continue; + $ret[] = array($ns, $directive); + } + return $ret; + } + + /** + * Loads configuration values from $_GET/$_POST that were posted + * via ConfigForm + * @param $array $_GET or $_POST array to import + * @param $index Index/name that the config variables are in + * @param $allowed List of allowed namespaces/directives + * @param $mq_fix Boolean whether or not to enable magic quotes fix + * @param $schema Instance of HTMLPurifier_ConfigSchema to use, if not global copy + */ + public static function loadArrayFromForm($array, $index = false, $allowed = true, $mq_fix = true, $schema = null) { + $ret = HTMLPurifier_Config::prepareArrayFromForm($array, $index, $allowed, $mq_fix, $schema); + $config = HTMLPurifier_Config::create($ret, $schema); + return $config; + } + + /** + * Merges in configuration values from $_GET/$_POST to object. NOT STATIC. + * @note Same parameters as loadArrayFromForm + */ + public function mergeArrayFromForm($array, $index = false, $allowed = true, $mq_fix = true) { + $ret = HTMLPurifier_Config::prepareArrayFromForm($array, $index, $allowed, $mq_fix, $this->def); + $this->loadArray($ret); + } + + /** + * Prepares an array from a form into something usable for the more + * strict parts of HTMLPurifier_Config + */ + public static function prepareArrayFromForm($array, $index = false, $allowed = true, $mq_fix = true, $schema = null) { + if ($index !== false) $array = (isset($array[$index]) && is_array($array[$index])) ? $array[$index] : array(); + $mq = $mq_fix && function_exists('get_magic_quotes_gpc') && get_magic_quotes_gpc(); + + $allowed = HTMLPurifier_Config::getAllowedDirectivesForForm($allowed, $schema); + $ret = array(); + foreach ($allowed as $key) { + list($ns, $directive) = $key; + $skey = "$ns.$directive"; + if (!empty($array["Null_$skey"])) { + $ret[$ns][$directive] = null; + continue; + } + if (!isset($array[$skey])) continue; + $value = $mq ? stripslashes($array[$skey]) : $array[$skey]; + $ret[$ns][$directive] = $value; + } + return $ret; + } + + /** + * Loads configuration values from an ini file + * @param $filename Name of ini file + */ + public function loadIni($filename) { + if ($this->isFinalized('Cannot load directives after finalization')) return; + $array = parse_ini_file($filename, true); + $this->loadArray($array); + } + + /** + * Checks whether or not the configuration object is finalized. + * @param $error String error message, or false for no error + */ + public function isFinalized($error = false) { + if ($this->finalized && $error) { + $this->triggerError($error, E_USER_ERROR); + } + return $this->finalized; + } + + /** + * Finalizes configuration only if auto finalize is on and not + * already finalized + */ + public function autoFinalize() { + if ($this->autoFinalize) { + $this->finalize(); + } else { + $this->plist->squash(true); + } + } + + /** + * Finalizes a configuration object, prohibiting further change + */ + public function finalize() { + $this->finalized = true; + unset($this->parser); + } + + /** + * Produces a nicely formatted error message by supplying the + * stack frame information from two levels up and OUTSIDE of + * HTMLPurifier_Config. + */ + protected function triggerError($msg, $no) { + // determine previous stack frame + $backtrace = debug_backtrace(); + if ($this->chatty && isset($backtrace[1])) { + $frame = $backtrace[1]; + $extra = " on line {$frame['line']} in file {$frame['file']}"; + } else { + $extra = ''; + } + trigger_error($msg . $extra, $no); + } + + /** + * Returns a serialized form of the configuration object that can + * be reconstituted. + */ + public function serialize() { + $this->getDefinition('HTML'); + $this->getDefinition('CSS'); + $this->getDefinition('URI'); + return serialize($this); + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema.php b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema.php new file mode 100644 index 0000000000..67be5c71fd --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema.php @@ -0,0 +1,158 @@ + array( + * 'Directive' => new stdclass(), + * ) + * ) + * + * The stdclass may have the following properties: + * + * - If isAlias isn't set: + * - type: Integer type of directive, see HTMLPurifier_VarParser for definitions + * - allow_null: If set, this directive allows null values + * - aliases: If set, an associative array of value aliases to real values + * - allowed: If set, a lookup array of allowed (string) values + * - If isAlias is set: + * - namespace: Namespace this directive aliases to + * - name: Directive name this directive aliases to + * + * In certain degenerate cases, stdclass will actually be an integer. In + * that case, the value is equivalent to an stdclass with the type + * property set to the integer. If the integer is negative, type is + * equal to the absolute value of integer, and allow_null is true. + * + * This class is friendly with HTMLPurifier_Config. If you need introspection + * about the schema, you're better of using the ConfigSchema_Interchange, + * which uses more memory but has much richer information. + */ + public $info = array(); + + /** + * Application-wide singleton + */ + static protected $singleton; + + public function __construct() { + $this->defaultPlist = new HTMLPurifier_PropertyList(); + } + + /** + * Unserializes the default ConfigSchema. + */ + public static function makeFromSerial() { + return unserialize(file_get_contents(HTMLPURIFIER_PREFIX . '/HTMLPurifier/ConfigSchema/schema.ser')); + } + + /** + * Retrieves an instance of the application-wide configuration definition. + */ + public static function instance($prototype = null) { + if ($prototype !== null) { + HTMLPurifier_ConfigSchema::$singleton = $prototype; + } elseif (HTMLPurifier_ConfigSchema::$singleton === null || $prototype === true) { + HTMLPurifier_ConfigSchema::$singleton = HTMLPurifier_ConfigSchema::makeFromSerial(); + } + return HTMLPurifier_ConfigSchema::$singleton; + } + + /** + * Defines a directive for configuration + * @warning Will fail of directive's namespace is defined. + * @warning This method's signature is slightly different from the legacy + * define() static method! Beware! + * @param $namespace Namespace the directive is in + * @param $name Key of directive + * @param $default Default value of directive + * @param $type Allowed type of the directive. See + * HTMLPurifier_DirectiveDef::$type for allowed values + * @param $allow_null Whether or not to allow null values + */ + public function add($key, $default, $type, $allow_null) { + $obj = new stdclass(); + $obj->type = is_int($type) ? $type : HTMLPurifier_VarParser::$types[$type]; + if ($allow_null) $obj->allow_null = true; + $this->info[$key] = $obj; + $this->defaults[$key] = $default; + $this->defaultPlist->set($key, $default); + } + + /** + * Defines a directive value alias. + * + * Directive value aliases are convenient for developers because it lets + * them set a directive to several values and get the same result. + * @param $namespace Directive's namespace + * @param $name Name of Directive + * @param $aliases Hash of aliased values to the real alias + */ + public function addValueAliases($key, $aliases) { + if (!isset($this->info[$key]->aliases)) { + $this->info[$key]->aliases = array(); + } + foreach ($aliases as $alias => $real) { + $this->info[$key]->aliases[$alias] = $real; + } + } + + /** + * Defines a set of allowed values for a directive. + * @warning This is slightly different from the corresponding static + * method definition. + * @param $namespace Namespace of directive + * @param $name Name of directive + * @param $allowed Lookup array of allowed values + */ + public function addAllowedValues($key, $allowed) { + $this->info[$key]->allowed = $allowed; + } + + /** + * Defines a directive alias for backwards compatibility + * @param $namespace + * @param $name Directive that will be aliased + * @param $new_namespace + * @param $new_name Directive that the alias will be to + */ + public function addAlias($key, $new_key) { + $obj = new stdclass; + $obj->key = $new_key; + $obj->isAlias = true; + $this->info[$key] = $obj; + } + + /** + * Replaces any stdclass that only has the type property with type integer. + */ + public function postProcess() { + foreach ($this->info as $key => $v) { + if (count((array) $v) == 1) { + $this->info[$key] = $v->type; + } elseif (count((array) $v) == 2 && isset($v->allow_null)) { + $this->info[$key] = -$v->type; + } + } + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/Builder/ConfigSchema.php b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/Builder/ConfigSchema.php new file mode 100644 index 0000000000..c05668a706 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/Builder/ConfigSchema.php @@ -0,0 +1,44 @@ +directives as $d) { + $schema->add( + $d->id->key, + $d->default, + $d->type, + $d->typeAllowsNull + ); + if ($d->allowed !== null) { + $schema->addAllowedValues( + $d->id->key, + $d->allowed + ); + } + foreach ($d->aliases as $alias) { + $schema->addAlias( + $alias->key, + $d->id->key + ); + } + if ($d->valueAliases !== null) { + $schema->addValueAliases( + $d->id->key, + $d->valueAliases + ); + } + } + $schema->postProcess(); + return $schema; + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/Builder/Xml.php b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/Builder/Xml.php new file mode 100644 index 0000000000..244561a372 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/Builder/Xml.php @@ -0,0 +1,106 @@ +startElement('div'); + + $purifier = HTMLPurifier::getInstance(); + $html = $purifier->purify($html); + $this->writeAttribute('xmlns', 'http://www.w3.org/1999/xhtml'); + $this->writeRaw($html); + + $this->endElement(); // div + } + + protected function export($var) { + if ($var === array()) return 'array()'; + return var_export($var, true); + } + + public function build($interchange) { + // global access, only use as last resort + $this->interchange = $interchange; + + $this->setIndent(true); + $this->startDocument('1.0', 'UTF-8'); + $this->startElement('configdoc'); + $this->writeElement('title', $interchange->name); + + foreach ($interchange->directives as $directive) { + $this->buildDirective($directive); + } + + if ($this->namespace) $this->endElement(); // namespace + + $this->endElement(); // configdoc + $this->flush(); + } + + public function buildDirective($directive) { + + // Kludge, although I suppose having a notion of a "root namespace" + // certainly makes things look nicer when documentation is built. + // Depends on things being sorted. + if (!$this->namespace || $this->namespace !== $directive->id->getRootNamespace()) { + if ($this->namespace) $this->endElement(); // namespace + $this->namespace = $directive->id->getRootNamespace(); + $this->startElement('namespace'); + $this->writeAttribute('id', $this->namespace); + $this->writeElement('name', $this->namespace); + } + + $this->startElement('directive'); + $this->writeAttribute('id', $directive->id->toString()); + + $this->writeElement('name', $directive->id->getDirective()); + + $this->startElement('aliases'); + foreach ($directive->aliases as $alias) $this->writeElement('alias', $alias->toString()); + $this->endElement(); // aliases + + $this->startElement('constraints'); + if ($directive->version) $this->writeElement('version', $directive->version); + $this->startElement('type'); + if ($directive->typeAllowsNull) $this->writeAttribute('allow-null', 'yes'); + $this->text($directive->type); + $this->endElement(); // type + if ($directive->allowed) { + $this->startElement('allowed'); + foreach ($directive->allowed as $value => $x) $this->writeElement('value', $value); + $this->endElement(); // allowed + } + $this->writeElement('default', $this->export($directive->default)); + $this->writeAttribute('xml:space', 'preserve'); + if ($directive->external) { + $this->startElement('external'); + foreach ($directive->external as $project) $this->writeElement('project', $project); + $this->endElement(); + } + $this->endElement(); // constraints + + if ($directive->deprecatedVersion) { + $this->startElement('deprecated'); + $this->writeElement('version', $directive->deprecatedVersion); + $this->writeElement('use', $directive->deprecatedUse->toString()); + $this->endElement(); // deprecated + } + + $this->startElement('description'); + $this->writeHTMLDiv($directive->description); + $this->endElement(); // description + + $this->endElement(); // directive + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/Exception.php b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/Exception.php new file mode 100644 index 0000000000..2671516c58 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/Exception.php @@ -0,0 +1,11 @@ + array(directive info) + */ + public $directives = array(); + + /** + * Adds a directive array to $directives + */ + public function addDirective($directive) { + if (isset($this->directives[$i = $directive->id->toString()])) { + throw new HTMLPurifier_ConfigSchema_Exception("Cannot redefine directive '$i'"); + } + $this->directives[$i] = $directive; + } + + /** + * Convenience function to perform standard validation. Throws exception + * on failed validation. + */ + public function validate() { + $validator = new HTMLPurifier_ConfigSchema_Validator(); + return $validator->validate($this); + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/Interchange/Directive.php b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/Interchange/Directive.php new file mode 100644 index 0000000000..ac8be0d970 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/Interchange/Directive.php @@ -0,0 +1,77 @@ + true). + * Null if all values are allowed. + */ + public $allowed; + + /** + * List of aliases for the directive, + * e.g. array(new HTMLPurifier_ConfigSchema_Interchange_Id('Ns', 'Dir'))). + */ + public $aliases = array(); + + /** + * Hash of value aliases, e.g. array('alt' => 'real'). Null if value + * aliasing is disabled (necessary for non-scalar types). + */ + public $valueAliases; + + /** + * Version of HTML Purifier the directive was introduced, e.g. '1.3.1'. + * Null if the directive has always existed. + */ + public $version; + + /** + * ID of directive that supercedes this old directive, is an instance + * of HTMLPurifier_ConfigSchema_Interchange_Id. Null if not deprecated. + */ + public $deprecatedUse; + + /** + * Version of HTML Purifier this directive was deprecated. Null if not + * deprecated. + */ + public $deprecatedVersion; + + /** + * List of external projects this directive depends on, e.g. array('CSSTidy'). + */ + public $external = array(); + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/Interchange/Id.php b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/Interchange/Id.php new file mode 100644 index 0000000000..b9b3c6f5cf --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/Interchange/Id.php @@ -0,0 +1,37 @@ +key = $key; + } + + /** + * @warning This is NOT magic, to ensure that people don't abuse SPL and + * cause problems for PHP 5.0 support. + */ + public function toString() { + return $this->key; + } + + public function getRootNamespace() { + return substr($this->key, 0, strpos($this->key, ".")); + } + + public function getDirective() { + return substr($this->key, strpos($this->key, ".") + 1); + } + + public static function make($id) { + return new HTMLPurifier_ConfigSchema_Interchange_Id($id); + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/InterchangeBuilder.php b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/InterchangeBuilder.php new file mode 100644 index 0000000000..785b72ce8e --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/InterchangeBuilder.php @@ -0,0 +1,180 @@ +varParser = $varParser ? $varParser : new HTMLPurifier_VarParser_Native(); + } + + public static function buildFromDirectory($dir = null) { + $builder = new HTMLPurifier_ConfigSchema_InterchangeBuilder(); + $interchange = new HTMLPurifier_ConfigSchema_Interchange(); + return $builder->buildDir($interchange, $dir); + } + + public function buildDir($interchange, $dir = null) { + if (!$dir) $dir = HTMLPURIFIER_PREFIX . '/HTMLPurifier/ConfigSchema/schema'; + if (file_exists($dir . '/info.ini')) { + $info = parse_ini_file($dir . '/info.ini'); + $interchange->name = $info['name']; + } + + $files = array(); + $dh = opendir($dir); + while (false !== ($file = readdir($dh))) { + if (!$file || $file[0] == '.' || strrchr($file, '.') !== '.txt') { + continue; + } + $files[] = $file; + } + closedir($dh); + + sort($files); + foreach ($files as $file) { + $this->buildFile($interchange, $dir . '/' . $file); + } + + return $interchange; + } + + public function buildFile($interchange, $file) { + $parser = new HTMLPurifier_StringHashParser(); + $this->build( + $interchange, + new HTMLPurifier_StringHash( $parser->parseFile($file) ) + ); + } + + /** + * Builds an interchange object based on a hash. + * @param $interchange HTMLPurifier_ConfigSchema_Interchange object to build + * @param $hash HTMLPurifier_ConfigSchema_StringHash source data + */ + public function build($interchange, $hash) { + if (!$hash instanceof HTMLPurifier_StringHash) { + $hash = new HTMLPurifier_StringHash($hash); + } + if (!isset($hash['ID'])) { + throw new HTMLPurifier_ConfigSchema_Exception('Hash does not have any ID'); + } + if (strpos($hash['ID'], '.') === false) { + if (count($hash) == 2 && isset($hash['DESCRIPTION'])) { + $hash->offsetGet('DESCRIPTION'); // prevent complaining + } else { + throw new HTMLPurifier_ConfigSchema_Exception('All directives must have a namespace'); + } + } else { + $this->buildDirective($interchange, $hash); + } + $this->_findUnused($hash); + } + + public function buildDirective($interchange, $hash) { + $directive = new HTMLPurifier_ConfigSchema_Interchange_Directive(); + + // These are required elements: + $directive->id = $this->id($hash->offsetGet('ID')); + $id = $directive->id->toString(); // convenience + + if (isset($hash['TYPE'])) { + $type = explode('/', $hash->offsetGet('TYPE')); + if (isset($type[1])) $directive->typeAllowsNull = true; + $directive->type = $type[0]; + } else { + throw new HTMLPurifier_ConfigSchema_Exception("TYPE in directive hash '$id' not defined"); + } + + if (isset($hash['DEFAULT'])) { + try { + $directive->default = $this->varParser->parse($hash->offsetGet('DEFAULT'), $directive->type, $directive->typeAllowsNull); + } catch (HTMLPurifier_VarParserException $e) { + throw new HTMLPurifier_ConfigSchema_Exception($e->getMessage() . " in DEFAULT in directive hash '$id'"); + } + } + + if (isset($hash['DESCRIPTION'])) { + $directive->description = $hash->offsetGet('DESCRIPTION'); + } + + if (isset($hash['ALLOWED'])) { + $directive->allowed = $this->lookup($this->evalArray($hash->offsetGet('ALLOWED'))); + } + + if (isset($hash['VALUE-ALIASES'])) { + $directive->valueAliases = $this->evalArray($hash->offsetGet('VALUE-ALIASES')); + } + + if (isset($hash['ALIASES'])) { + $raw_aliases = trim($hash->offsetGet('ALIASES')); + $aliases = preg_split('/\s*,\s*/', $raw_aliases); + foreach ($aliases as $alias) { + $directive->aliases[] = $this->id($alias); + } + } + + if (isset($hash['VERSION'])) { + $directive->version = $hash->offsetGet('VERSION'); + } + + if (isset($hash['DEPRECATED-USE'])) { + $directive->deprecatedUse = $this->id($hash->offsetGet('DEPRECATED-USE')); + } + + if (isset($hash['DEPRECATED-VERSION'])) { + $directive->deprecatedVersion = $hash->offsetGet('DEPRECATED-VERSION'); + } + + if (isset($hash['EXTERNAL'])) { + $directive->external = preg_split('/\s*,\s*/', trim($hash->offsetGet('EXTERNAL'))); + } + + $interchange->addDirective($directive); + } + + /** + * Evaluates an array PHP code string without array() wrapper + */ + protected function evalArray($contents) { + return eval('return array('. $contents .');'); + } + + /** + * Converts an array list into a lookup array. + */ + protected function lookup($array) { + $ret = array(); + foreach ($array as $val) $ret[$val] = true; + return $ret; + } + + /** + * Convenience function that creates an HTMLPurifier_ConfigSchema_Interchange_Id + * object based on a string Id. + */ + protected function id($id) { + return HTMLPurifier_ConfigSchema_Interchange_Id::make($id); + } + + /** + * Triggers errors for any unused keys passed in the hash; such keys + * may indicate typos, missing values, etc. + * @param $hash Instance of ConfigSchema_StringHash to check. + */ + protected function _findUnused($hash) { + $accessed = $hash->getAccessed(); + foreach ($hash as $k => $v) { + if (!isset($accessed[$k])) { + trigger_error("String hash key '$k' not used by builder", E_USER_NOTICE); + } + } + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/Validator.php b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/Validator.php new file mode 100644 index 0000000000..f374f6a022 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/Validator.php @@ -0,0 +1,206 @@ +parser = new HTMLPurifier_VarParser(); + } + + /** + * Validates a fully-formed interchange object. Throws an + * HTMLPurifier_ConfigSchema_Exception if there's a problem. + */ + public function validate($interchange) { + $this->interchange = $interchange; + $this->aliases = array(); + // PHP is a bit lax with integer <=> string conversions in + // arrays, so we don't use the identical !== comparison + foreach ($interchange->directives as $i => $directive) { + $id = $directive->id->toString(); + if ($i != $id) $this->error(false, "Integrity violation: key '$i' does not match internal id '$id'"); + $this->validateDirective($directive); + } + return true; + } + + /** + * Validates a HTMLPurifier_ConfigSchema_Interchange_Id object. + */ + public function validateId($id) { + $id_string = $id->toString(); + $this->context[] = "id '$id_string'"; + if (!$id instanceof HTMLPurifier_ConfigSchema_Interchange_Id) { + // handled by InterchangeBuilder + $this->error(false, 'is not an instance of HTMLPurifier_ConfigSchema_Interchange_Id'); + } + // keys are now unconstrained (we might want to narrow down to A-Za-z0-9.) + // we probably should check that it has at least one namespace + $this->with($id, 'key') + ->assertNotEmpty() + ->assertIsString(); // implicit assertIsString handled by InterchangeBuilder + array_pop($this->context); + } + + /** + * Validates a HTMLPurifier_ConfigSchema_Interchange_Directive object. + */ + public function validateDirective($d) { + $id = $d->id->toString(); + $this->context[] = "directive '$id'"; + $this->validateId($d->id); + + $this->with($d, 'description') + ->assertNotEmpty(); + + // BEGIN - handled by InterchangeBuilder + $this->with($d, 'type') + ->assertNotEmpty(); + $this->with($d, 'typeAllowsNull') + ->assertIsBool(); + try { + // This also tests validity of $d->type + $this->parser->parse($d->default, $d->type, $d->typeAllowsNull); + } catch (HTMLPurifier_VarParserException $e) { + $this->error('default', 'had error: ' . $e->getMessage()); + } + // END - handled by InterchangeBuilder + + if (!is_null($d->allowed) || !empty($d->valueAliases)) { + // allowed and valueAliases require that we be dealing with + // strings, so check for that early. + $d_int = HTMLPurifier_VarParser::$types[$d->type]; + if (!isset(HTMLPurifier_VarParser::$stringTypes[$d_int])) { + $this->error('type', 'must be a string type when used with allowed or value aliases'); + } + } + + $this->validateDirectiveAllowed($d); + $this->validateDirectiveValueAliases($d); + $this->validateDirectiveAliases($d); + + array_pop($this->context); + } + + /** + * Extra validation if $allowed member variable of + * HTMLPurifier_ConfigSchema_Interchange_Directive is defined. + */ + public function validateDirectiveAllowed($d) { + if (is_null($d->allowed)) return; + $this->with($d, 'allowed') + ->assertNotEmpty() + ->assertIsLookup(); // handled by InterchangeBuilder + if (is_string($d->default) && !isset($d->allowed[$d->default])) { + $this->error('default', 'must be an allowed value'); + } + $this->context[] = 'allowed'; + foreach ($d->allowed as $val => $x) { + if (!is_string($val)) $this->error("value $val", 'must be a string'); + } + array_pop($this->context); + } + + /** + * Extra validation if $valueAliases member variable of + * HTMLPurifier_ConfigSchema_Interchange_Directive is defined. + */ + public function validateDirectiveValueAliases($d) { + if (is_null($d->valueAliases)) return; + $this->with($d, 'valueAliases') + ->assertIsArray(); // handled by InterchangeBuilder + $this->context[] = 'valueAliases'; + foreach ($d->valueAliases as $alias => $real) { + if (!is_string($alias)) $this->error("alias $alias", 'must be a string'); + if (!is_string($real)) $this->error("alias target $real from alias '$alias'", 'must be a string'); + if ($alias === $real) { + $this->error("alias '$alias'", "must not be an alias to itself"); + } + } + if (!is_null($d->allowed)) { + foreach ($d->valueAliases as $alias => $real) { + if (isset($d->allowed[$alias])) { + $this->error("alias '$alias'", 'must not be an allowed value'); + } elseif (!isset($d->allowed[$real])) { + $this->error("alias '$alias'", 'must be an alias to an allowed value'); + } + } + } + array_pop($this->context); + } + + /** + * Extra validation if $aliases member variable of + * HTMLPurifier_ConfigSchema_Interchange_Directive is defined. + */ + public function validateDirectiveAliases($d) { + $this->with($d, 'aliases') + ->assertIsArray(); // handled by InterchangeBuilder + $this->context[] = 'aliases'; + foreach ($d->aliases as $alias) { + $this->validateId($alias); + $s = $alias->toString(); + if (isset($this->interchange->directives[$s])) { + $this->error("alias '$s'", 'collides with another directive'); + } + if (isset($this->aliases[$s])) { + $other_directive = $this->aliases[$s]; + $this->error("alias '$s'", "collides with alias for directive '$other_directive'"); + } + $this->aliases[$s] = $d->id->toString(); + } + array_pop($this->context); + } + + // protected helper functions + + /** + * Convenience function for generating HTMLPurifier_ConfigSchema_ValidatorAtom + * for validating simple member variables of objects. + */ + protected function with($obj, $member) { + return new HTMLPurifier_ConfigSchema_ValidatorAtom($this->getFormattedContext(), $obj, $member); + } + + /** + * Emits an error, providing helpful context. + */ + protected function error($target, $msg) { + if ($target !== false) $prefix = ucfirst($target) . ' in ' . $this->getFormattedContext(); + else $prefix = ucfirst($this->getFormattedContext()); + throw new HTMLPurifier_ConfigSchema_Exception(trim($prefix . ' ' . $msg)); + } + + /** + * Returns a formatted context string. + */ + protected function getFormattedContext() { + return implode(' in ', array_reverse($this->context)); + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/ValidatorAtom.php b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/ValidatorAtom.php new file mode 100644 index 0000000000..b95aea18cc --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/ValidatorAtom.php @@ -0,0 +1,66 @@ +context = $context; + $this->obj = $obj; + $this->member = $member; + $this->contents =& $obj->$member; + } + + public function assertIsString() { + if (!is_string($this->contents)) $this->error('must be a string'); + return $this; + } + + public function assertIsBool() { + if (!is_bool($this->contents)) $this->error('must be a boolean'); + return $this; + } + + public function assertIsArray() { + if (!is_array($this->contents)) $this->error('must be an array'); + return $this; + } + + public function assertNotNull() { + if ($this->contents === null) $this->error('must not be null'); + return $this; + } + + public function assertAlnum() { + $this->assertIsString(); + if (!ctype_alnum($this->contents)) $this->error('must be alphanumeric'); + return $this; + } + + public function assertNotEmpty() { + if (empty($this->contents)) $this->error('must not be empty'); + return $this; + } + + public function assertIsLookup() { + $this->assertIsArray(); + foreach ($this->contents as $v) { + if ($v !== true) $this->error('must be a lookup array'); + } + return $this; + } + + protected function error($msg) { + throw new HTMLPurifier_ConfigSchema_Exception(ucfirst($this->member) . ' in ' . $this->context . ' ' . $msg); + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema.ser b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema.ser new file mode 100644 index 0000000000000000000000000000000000000000..bbf12f9c3e7392aa8143727d2485f6f9ad1f97e1 GIT binary patch literal 12999 zcmeHO+in}l5#3KQw8%>U6QX2iW%@~`L@W{AAkxU@X)z>QRL8>^W-hX{hW|b1RCo0> zheOKykN^Q_V_|8ky1KgV-Bo)#IC(!f`gMAJbypYm!J6XtVV*tM{%ebnvYl zoDGf^<{_!msyzCb3_hIkWBPfc+jo$_-Z6=Llal@};8O$yOR`dS{al*i#rgEy?|tlH7mnxgDp{KIv}$pt(CjHm z?Lv@_z&RH4pOar&L?Sey1@2D=M`QQ-jpZI(7o_6JPt9|6VGDwQo>uY`R|@x+Su#t_ z_~Pi}Y;eq7`yMdLi;wrGNDrIDhG3`r0Vg6Z9#!24`58RlbV&qsANX zg)P@<@^WRfFQ4znfd(0AkO;L8FA6=S@EWMvt;gzJS6<#f{{{94u~KF`mnh+P zo5#C1tNh7auGZB{&;5KeE7!ft=eb!|HqXQrPUs1cd9@&wKKH^eB8~n>Sa^%sZkoz~ zD6e5NLRJi_XgHZTmm9Cvc~K=9)bFf^?i8TY!p^@0q0z7c$Sm%Pd~J%#s=HEa1jA@; zW_h}M18=i(qCTWY;C1pmUM;Uow&VfJ0Y3Lnj*r)3L%KI97uHls(d-SE8YYM*5qa<~ zmemJrVhRfv{KJTEoCNIV`(45vC9Xic!@MxP^X0NIWoe&G`ZBW5S0V(;UqnQVVVQh=EdL5%h$YEh$bNds1j#dB}JZRJRpSf^Vu=~cylTY)a<^GM*1B~ z@*>Hc`*X=?bpGBg0qDgrIyS4gj=w=wc?#|oa(2v}_!}0j>tdZopkn|%;zK=uEN!C8 zNNzOZZy@;f(N5ekQd94Rn7V*rac2@)u6`sD0^i-yPi>(7F46Br{cQlqEQZk%Q_ zEUgd+>Xpf=X^z~peb8?lb38h&MH$NDbW5Ilo>Mx z|1z8B3!5#;2)M7Shqq_^nc>ADl=Eb5d`=iX+H_G+x<0n6>0ZWI(_|Tp^8^_+qH~jH z=ab=hJbXluor9-<$Rs0(zev)qUfCc-94j>;W? zG|1b?rZ{k~ojy*%mgu_trHK*#ldvQxWwiDZBS z>?EqXrT{(CKl6H8&qVTKOb{~5Ev=fGuoi!1abRb4rSX|TF@`r97lFYXV(4|gsMD|% z(sV%9YBwmudQG|HgCcxPP(+UZKsrZqpkYIPYxx%jB?#19pq?N=ek%vqd{Pzlys8Ut zUlMXy_^8s!-wP8?^bHcalG4SeO~?fX|J(J|Fkh`;`H=)12916nSe&qHDa{>W-yJ~e z5yXZH{5aV&_X(^?ek{mDu)3@P#d%Rxxj~MVuaFBRTzr(cPTRz4RH`_EvCYJbq>QXf z?La+314WpGHz_d}7Ks}`Rar+urgh4~N%DXKXU447R1g7jJNp;HV*u%HP_~Ues}SfV z=L=8@?CUOsTp1*4@&&e5W?^g8gkTcBB-;_iMT}^Dpj<#{5s&H%zj3LW*a=j1TUYD4 zLgHUB9JWJ?w<2)m@ovo56oKEX8m##56I4~ySvWZi#DmDEtA1nP)Ra{ZpXcT#AJ6j) z3TUbg)V1pCa;KtSCJ@6n@sh`?f`>WQzyW`iD9=1aQ6-3jxgx-m9~yKVB+E8`HDCQ= zm^|dctv@BDo)E|27k@Ev%uov;r5wT5? zF|YcB=vl-ifU6XIZ_2!eEafP2DT2ZyGFxy@=GUATA#q#JE5CHyk?0zvcb$AO=d&_T z5oeVr&+&CF^_<24!RDu}y%|^nbYkX(sZdlpHdq!Ac8hYPunSy4<(?d@fLdp4f}Jni zP4&)Q=5Dty<-TCYO$mo|mvS8Pr@#KRNGcRvhdcSgoQeH# zC*QbjAMWH&`aE&ClZT>@6_{K@e1+!Ax7OGA8pp!$a3@FYdbpFzWBB1t?mg&mCqLZD zd1wDWzmp@Z;cmV5?`n+pI1s+f*~Vgez`Y&SmCYXTIFsNduFvhi88Zf! zFpKDW&}LKI4v5p)65j=q7M-jDdqTO@4#&p7aAUln2-v^2idG?q}u^c9NtbHfH-`~>f!+;zVOMWlbZCuY4|wY?PE5)qxO6wm#7o2{T0s|ULYf2 zyE*SEkcVa$-*zX5lzXe+y{kFz7(Ru*8&c$T)oZQLZvKA7{ot6=^O!gPtSmmwnqTe zT>Z8iod&-x0#A4AUjq*j*e^uj@$brl|C*~dQv%IGohiUUwmdfQ(H<&LKH!zYN5@=2 zBzkh%nk(3b#&ZQQbf&`}X-<2IAtX9R=A1Jb)oH4oYy_ZJ+(3nie(J6kc&88inU1)a z17fBy*>uG1;Do`Ac0wWTxOMw8s<1b(TqoRPAc;hDx;o)r?uyv%g!@3Oj5cj<7kaun z+%yGEhO&8Qmm64;Qql4dr0qtxKLWjiqHqZ9Zj>j}(d%}xcz;W}>2$lz7-n`hr5of| z%m=2olStHD?=~T%1-ID6addN9?fL%=8&J#cGx(Oj_jx+yPF#=rio||ow{CI4I`)g} zzs8tu}*1RILAKbZ9ZX6%ksWZ)^UDFpnT(sVFxZ>?Q2Cf!>RhAB5 z07a^Im`nUh)HLip0?YpxAq+|=0XV6H^A6CO#PIwfcL~boB!-V!2(AKTSc!=11;D{J z2OO|9Ls}?@qil(CSfS9ZEN-BF%t3;^iYAOXwDc<@!cb~l$#bqxO8MV2WH4=D<8)Dd z|DHDh75tlbz2p9yDF*3EnLl)GV&UB%+hH`*2NJ5mn>;zZ>GzIzu5K_kS3_S4>N^&B zr`SKh;qr=d&91+j(~Ye`6r!4J4{y8`nIdqA#1SlIbfcx=j#8>?Xu}%~d_stEoI{5; zJV^J^gSXs(J}W~TK3zhKLW$q~$VN-quV35VQLP}9<(#buqYbHDK8ArHs8pvw0>Yz_ zi9_j%^J=H3A}=}{;@MP-oa$tVKbCr^e}!tkN+R}OyAA};)H`5{Mt2l~!|We6aa)IS zXvav(epX%|Y3XE+hSHvJQAdYNw}xE2lwhwNLoObObs|FQLg!!hGtY?AL9d%*;C9T= z_`M-(PEq(bl`uWwOu+-)l#8Ac-qF!L1ES-BLk#im-G%Mw1+^qXQgGs5t(3Q;P7m;GT?|o@vRUH9+Gk)9&#Z ex0z?zkIyJIzlD6=+JDysjbHrQ5DoYJ{{26FNH2{5 literal 0 HcmV?d00001 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Attr.AllowedClasses.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Attr.AllowedClasses.txt new file mode 100644 index 0000000000..0517fed0a1 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Attr.AllowedClasses.txt @@ -0,0 +1,8 @@ +Attr.AllowedClasses +TYPE: lookup/null +VERSION: 4.0.0 +DEFAULT: null +--DESCRIPTION-- +List of allowed class values in the class attribute. By default, this is null, +which means all classes are allowed. +--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Attr.AllowedFrameTargets.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Attr.AllowedFrameTargets.txt new file mode 100644 index 0000000000..249edd647b --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Attr.AllowedFrameTargets.txt @@ -0,0 +1,12 @@ +Attr.AllowedFrameTargets +TYPE: lookup +DEFAULT: array() +--DESCRIPTION-- +Lookup table of all allowed link frame targets. Some commonly used link +targets include _blank, _self, _parent and _top. Values should be +lowercase, as validation will be done in a case-sensitive manner despite +W3C's recommendation. XHTML 1.0 Strict does not permit the target attribute +so this directive will have no effect in that doctype. XHTML 1.1 does not +enable the Target module by default, you will have to manually enable it +(see the module documentation for more details.) +--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Attr.AllowedRel.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Attr.AllowedRel.txt new file mode 100644 index 0000000000..9a8fa6a2e2 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Attr.AllowedRel.txt @@ -0,0 +1,9 @@ +Attr.AllowedRel +TYPE: lookup +VERSION: 1.6.0 +DEFAULT: array() +--DESCRIPTION-- +List of allowed forward document relationships in the rel attribute. Common +values may be nofollow or print. By default, this is empty, meaning that no +document relationships are allowed. +--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Attr.AllowedRev.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Attr.AllowedRev.txt new file mode 100644 index 0000000000..b017883485 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Attr.AllowedRev.txt @@ -0,0 +1,9 @@ +Attr.AllowedRev +TYPE: lookup +VERSION: 1.6.0 +DEFAULT: array() +--DESCRIPTION-- +List of allowed reverse document relationships in the rev attribute. This +attribute is a bit of an edge-case; if you don't know what it is for, stay +away. +--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Attr.ClassUseCDATA.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Attr.ClassUseCDATA.txt new file mode 100644 index 0000000000..e774b823b1 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Attr.ClassUseCDATA.txt @@ -0,0 +1,19 @@ +Attr.ClassUseCDATA +TYPE: bool/null +DEFAULT: null +VERSION: 4.0.0 +--DESCRIPTION-- +If null, class will auto-detect the doctype and, if matching XHTML 1.1 or +XHTML 2.0, will use the restrictive NMTOKENS specification of class. Otherwise, +it will use a relaxed CDATA definition. If true, the relaxed CDATA definition +is forced; if false, the NMTOKENS definition is forced. To get behavior +of HTML Purifier prior to 4.0.0, set this directive to false. + +Some rational behind the auto-detection: +in previous versions of HTML Purifier, it was assumed that the form of +class was NMTOKENS, as specified by the XHTML Modularization (representing +XHTML 1.1 and XHTML 2.0). The DTDs for HTML 4.01 and XHTML 1.0, however +specify class as CDATA. HTML 5 effectively defines it as CDATA, but +with the additional constraint that each name should be unique (this is not +explicitly outlined in previous specifications). +--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Attr.DefaultImageAlt.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Attr.DefaultImageAlt.txt new file mode 100644 index 0000000000..533165e175 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Attr.DefaultImageAlt.txt @@ -0,0 +1,11 @@ +Attr.DefaultImageAlt +TYPE: string/null +DEFAULT: null +VERSION: 3.2.0 +--DESCRIPTION-- +This is the content of the alt tag of an image if the user had not +previously specified an alt attribute. This applies to all images without +a valid alt attribute, as opposed to %Attr.DefaultInvalidImageAlt, which +only applies to invalid images, and overrides in the case of an invalid image. +Default behavior with null is to use the basename of the src tag for the alt. +--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Attr.DefaultInvalidImage.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Attr.DefaultInvalidImage.txt new file mode 100644 index 0000000000..9eb7e38469 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Attr.DefaultInvalidImage.txt @@ -0,0 +1,9 @@ +Attr.DefaultInvalidImage +TYPE: string +DEFAULT: '' +--DESCRIPTION-- +This is the default image an img tag will be pointed to if it does not have +a valid src attribute. In future versions, we may allow the image tag to +be removed completely, but due to design issues, this is not possible right +now. +--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Attr.DefaultInvalidImageAlt.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Attr.DefaultInvalidImageAlt.txt new file mode 100644 index 0000000000..2f17bf477a --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Attr.DefaultInvalidImageAlt.txt @@ -0,0 +1,8 @@ +Attr.DefaultInvalidImageAlt +TYPE: string +DEFAULT: 'Invalid image' +--DESCRIPTION-- +This is the content of the alt tag of an invalid image if the user had not +previously specified an alt attribute. It has no effect when the image is +valid but there was no alt attribute present. +--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Attr.DefaultTextDir.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Attr.DefaultTextDir.txt new file mode 100644 index 0000000000..52654b53ae --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Attr.DefaultTextDir.txt @@ -0,0 +1,10 @@ +Attr.DefaultTextDir +TYPE: string +DEFAULT: 'ltr' +--DESCRIPTION-- +Defines the default text direction (ltr or rtl) of the document being +parsed. This generally is the same as the value of the dir attribute in +HTML, or ltr if that is not specified. +--ALLOWED-- +'ltr', 'rtl' +--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Attr.EnableID.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Attr.EnableID.txt new file mode 100644 index 0000000000..6440d21032 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Attr.EnableID.txt @@ -0,0 +1,16 @@ +Attr.EnableID +TYPE: bool +DEFAULT: false +VERSION: 1.2.0 +--DESCRIPTION-- +Allows the ID attribute in HTML. This is disabled by default due to the +fact that without proper configuration user input can easily break the +validation of a webpage by specifying an ID that is already on the +surrounding HTML. If you don't mind throwing caution to the wind, enable +this directive, but I strongly recommend you also consider blacklisting IDs +you use (%Attr.IDBlacklist) or prefixing all user supplied IDs +(%Attr.IDPrefix). When set to true HTML Purifier reverts to the behavior of +pre-1.2.0 versions. +--ALIASES-- +HTML.EnableAttrID +--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Attr.ForbiddenClasses.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Attr.ForbiddenClasses.txt new file mode 100644 index 0000000000..f31d226f58 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Attr.ForbiddenClasses.txt @@ -0,0 +1,8 @@ +Attr.ForbiddenClasses +TYPE: lookup +VERSION: 4.0.0 +DEFAULT: array() +--DESCRIPTION-- +List of forbidden class values in the class attribute. By default, this is +empty, which means that no classes are forbidden. See also %Attr.AllowedClasses. +--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Attr.IDBlacklist.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Attr.IDBlacklist.txt new file mode 100644 index 0000000000..5f2b5e3d2c --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Attr.IDBlacklist.txt @@ -0,0 +1,5 @@ +Attr.IDBlacklist +TYPE: list +DEFAULT: array() +DESCRIPTION: Array of IDs not allowed in the document. +--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Attr.IDBlacklistRegexp.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Attr.IDBlacklistRegexp.txt new file mode 100644 index 0000000000..6f5824586e --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Attr.IDBlacklistRegexp.txt @@ -0,0 +1,9 @@ +Attr.IDBlacklistRegexp +TYPE: string/null +VERSION: 1.6.0 +DEFAULT: NULL +--DESCRIPTION-- +PCRE regular expression to be matched against all IDs. If the expression is +matches, the ID is rejected. Use this with care: may cause significant +degradation. ID matching is done after all other validation. +--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Attr.IDPrefix.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Attr.IDPrefix.txt new file mode 100644 index 0000000000..cc49d43fd0 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Attr.IDPrefix.txt @@ -0,0 +1,12 @@ +Attr.IDPrefix +TYPE: string +VERSION: 1.2.0 +DEFAULT: '' +--DESCRIPTION-- +String to prefix to IDs. If you have no idea what IDs your pages may use, +you may opt to simply add a prefix to all user-submitted ID attributes so +that they are still usable, but will not conflict with core page IDs. +Example: setting the directive to 'user_' will result in a user submitted +'foo' to become 'user_foo' Be sure to set %HTML.EnableAttrID to true +before using this. +--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Attr.IDPrefixLocal.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Attr.IDPrefixLocal.txt new file mode 100644 index 0000000000..2c5924a7ad --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Attr.IDPrefixLocal.txt @@ -0,0 +1,14 @@ +Attr.IDPrefixLocal +TYPE: string +VERSION: 1.2.0 +DEFAULT: '' +--DESCRIPTION-- +Temporary prefix for IDs used in conjunction with %Attr.IDPrefix. If you +need to allow multiple sets of user content on web page, you may need to +have a seperate prefix that changes with each iteration. This way, +seperately submitted user content displayed on the same page doesn't +clobber each other. Ideal values are unique identifiers for the content it +represents (i.e. the id of the row in the database). Be sure to add a +seperator (like an underscore) at the end. Warning: this directive will +not work unless %Attr.IDPrefix is set to a non-empty value! +--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/AutoFormat.AutoParagraph.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/AutoFormat.AutoParagraph.txt new file mode 100644 index 0000000000..d5caa1bb97 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/AutoFormat.AutoParagraph.txt @@ -0,0 +1,31 @@ +AutoFormat.AutoParagraph +TYPE: bool +VERSION: 2.0.1 +DEFAULT: false +--DESCRIPTION-- + +

+ This directive turns on auto-paragraphing, where double newlines are + converted in to paragraphs whenever possible. Auto-paragraphing: +

+
    +
  • Always applies to inline elements or text in the root node,
  • +
  • Applies to inline elements or text with double newlines in nodes + that allow paragraph tags,
  • +
  • Applies to double newlines in paragraph tags
  • +
+

+ p tags must be allowed for this directive to take effect. + We do not use br tags for paragraphing, as that is + semantically incorrect. +

+

+ To prevent auto-paragraphing as a content-producer, refrain from using + double-newlines except to specify a new paragraph or in contexts where + it has special meaning (whitespace usually has no meaning except in + tags like pre, so this should not be difficult.) To prevent + the paragraphing of inline text adjacent to block elements, wrap them + in div tags (the behavior is slightly different outside of + the root node.) +

+--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/AutoFormat.Custom.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/AutoFormat.Custom.txt new file mode 100644 index 0000000000..2a476481af --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/AutoFormat.Custom.txt @@ -0,0 +1,12 @@ +AutoFormat.Custom +TYPE: list +VERSION: 2.0.1 +DEFAULT: array() +--DESCRIPTION-- + +

+ This directive can be used to add custom auto-format injectors. + Specify an array of injector names (class name minus the prefix) + or concrete implementations. Injector class must exist. +

+--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/AutoFormat.DisplayLinkURI.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/AutoFormat.DisplayLinkURI.txt new file mode 100644 index 0000000000..663064a344 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/AutoFormat.DisplayLinkURI.txt @@ -0,0 +1,11 @@ +AutoFormat.DisplayLinkURI +TYPE: bool +VERSION: 3.2.0 +DEFAULT: false +--DESCRIPTION-- +

+ This directive turns on the in-text display of URIs in <a> tags, and disables + those links. For example, example becomes + example (http://example.com). +

+--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/AutoFormat.Linkify.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/AutoFormat.Linkify.txt new file mode 100644 index 0000000000..3a48ba960e --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/AutoFormat.Linkify.txt @@ -0,0 +1,12 @@ +AutoFormat.Linkify +TYPE: bool +VERSION: 2.0.1 +DEFAULT: false +--DESCRIPTION-- + +

+ This directive turns on linkification, auto-linking http, ftp and + https URLs. a tags with the href attribute + must be allowed. +

+--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/AutoFormat.PurifierLinkify.DocURL.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/AutoFormat.PurifierLinkify.DocURL.txt new file mode 100644 index 0000000000..db58b13464 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/AutoFormat.PurifierLinkify.DocURL.txt @@ -0,0 +1,12 @@ +AutoFormat.PurifierLinkify.DocURL +TYPE: string +VERSION: 2.0.1 +DEFAULT: '#%s' +ALIASES: AutoFormatParam.PurifierLinkifyDocURL +--DESCRIPTION-- +

+ Location of configuration documentation to link to, let %s substitute + into the configuration's namespace and directive names sans the percent + sign. +

+--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/AutoFormat.PurifierLinkify.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/AutoFormat.PurifierLinkify.txt new file mode 100644 index 0000000000..7996488be0 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/AutoFormat.PurifierLinkify.txt @@ -0,0 +1,12 @@ +AutoFormat.PurifierLinkify +TYPE: bool +VERSION: 2.0.1 +DEFAULT: false +--DESCRIPTION-- + +

+ Internal auto-formatter that converts configuration directives in + syntax %Namespace.Directive to links. a tags + with the href attribute must be allowed. +

+--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions.txt new file mode 100644 index 0000000000..35c393b4e6 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions.txt @@ -0,0 +1,11 @@ +AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions +TYPE: lookup +VERSION: 4.0.0 +DEFAULT: array('td' => true, 'th' => true) +--DESCRIPTION-- +

+ When %AutoFormat.RemoveEmpty and %AutoFormat.RemoveEmpty.RemoveNbsp + are enabled, this directive defines what HTML elements should not be + removede if they have only a non-breaking space in them. +

+--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/AutoFormat.RemoveEmpty.RemoveNbsp.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/AutoFormat.RemoveEmpty.RemoveNbsp.txt new file mode 100644 index 0000000000..ca17eb1dc4 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/AutoFormat.RemoveEmpty.RemoveNbsp.txt @@ -0,0 +1,15 @@ +AutoFormat.RemoveEmpty.RemoveNbsp +TYPE: bool +VERSION: 4.0.0 +DEFAULT: false +--DESCRIPTION-- +

+ When enabled, HTML Purifier will treat any elements that contain only + non-breaking spaces as well as regular whitespace as empty, and remove + them when %AutoForamt.RemoveEmpty is enabled. +

+

+ See %AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions for a list of elements + that don't have this behavior applied to them. +

+--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/AutoFormat.RemoveEmpty.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/AutoFormat.RemoveEmpty.txt new file mode 100644 index 0000000000..34657ba47b --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/AutoFormat.RemoveEmpty.txt @@ -0,0 +1,46 @@ +AutoFormat.RemoveEmpty +TYPE: bool +VERSION: 3.2.0 +DEFAULT: false +--DESCRIPTION-- +

+ When enabled, HTML Purifier will attempt to remove empty elements that + contribute no semantic information to the document. The following types + of nodes will be removed: +

+
  • + Tags with no attributes and no content, and that are not empty + elements (remove <a></a> but not + <br />), and +
  • +
  • + Tags with no content, except for:
      +
    • The colgroup element, or
    • +
    • + Elements with the id or name attribute, + when those attributes are permitted on those elements. +
    • +
  • +
+

+ Please be very careful when using this functionality; while it may not + seem that empty elements contain useful information, they can alter the + layout of a document given appropriate styling. This directive is most + useful when you are processing machine-generated HTML, please avoid using + it on regular user HTML. +

+

+ Elements that contain only whitespace will be treated as empty. Non-breaking + spaces, however, do not count as whitespace. See + %AutoFormat.RemoveEmpty.RemoveNbsp for alternate behavior. +

+

+ This algorithm is not perfect; you may still notice some empty tags, + particularly if a node had elements, but those elements were later removed + because they were not permitted in that context, or tags that, after + being auto-closed by another tag, where empty. This is for safety reasons + to prevent clever code from breaking validation. The general rule of thumb: + if a tag looked empty on the way in, it will get removed; if HTML Purifier + made it empty, it will stay. +

+--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/CSS.AllowImportant.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/CSS.AllowImportant.txt new file mode 100644 index 0000000000..b324608f76 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/CSS.AllowImportant.txt @@ -0,0 +1,8 @@ +CSS.AllowImportant +TYPE: bool +DEFAULT: false +VERSION: 3.1.0 +--DESCRIPTION-- +This parameter determines whether or not !important cascade modifiers should +be allowed in user CSS. If false, !important will stripped. +--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/CSS.AllowTricky.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/CSS.AllowTricky.txt new file mode 100644 index 0000000000..748be0eec8 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/CSS.AllowTricky.txt @@ -0,0 +1,11 @@ +CSS.AllowTricky +TYPE: bool +DEFAULT: false +VERSION: 3.1.0 +--DESCRIPTION-- +This parameter determines whether or not to allow "tricky" CSS properties and +values. Tricky CSS properties/values can drastically modify page layout or +be used for deceptive practices but do not directly constitute a security risk. +For example, display:none; is considered a tricky property that +will only be allowed if this directive is set to true. +--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/CSS.AllowedProperties.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/CSS.AllowedProperties.txt new file mode 100644 index 0000000000..460112ebe0 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/CSS.AllowedProperties.txt @@ -0,0 +1,18 @@ +CSS.AllowedProperties +TYPE: lookup/null +VERSION: 3.1.0 +DEFAULT: NULL +--DESCRIPTION-- + +

+ If HTML Purifier's style attributes set is unsatisfactory for your needs, + you can overload it with your own list of tags to allow. Note that this + method is subtractive: it does its job by taking away from HTML Purifier + usual feature set, so you cannot add an attribute that HTML Purifier never + supported in the first place. +

+

+ Warning: If another directive conflicts with the + elements here, that directive will win and override. +

+--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/CSS.DefinitionRev.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/CSS.DefinitionRev.txt new file mode 100644 index 0000000000..5cb7dda3ba --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/CSS.DefinitionRev.txt @@ -0,0 +1,11 @@ +CSS.DefinitionRev +TYPE: int +VERSION: 2.0.0 +DEFAULT: 1 +--DESCRIPTION-- + +

+ Revision identifier for your custom definition. See + %HTML.DefinitionRev for details. +

+--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/CSS.MaxImgLength.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/CSS.MaxImgLength.txt new file mode 100644 index 0000000000..7a3291470c --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/CSS.MaxImgLength.txt @@ -0,0 +1,16 @@ +CSS.MaxImgLength +TYPE: string/null +DEFAULT: '1200px' +VERSION: 3.1.1 +--DESCRIPTION-- +

+ This parameter sets the maximum allowed length on img tags, + effectively the width and height properties. + Only absolute units of measurement (in, pt, pc, mm, cm) and pixels (px) are allowed. This is + in place to prevent imagecrash attacks, disable with null at your own risk. + This directive is similar to %HTML.MaxImgLength, and both should be + concurrently edited, although there are + subtle differences in the input format (the CSS max is a number with + a unit). +

+--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/CSS.Proprietary.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/CSS.Proprietary.txt new file mode 100644 index 0000000000..148eedb8be --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/CSS.Proprietary.txt @@ -0,0 +1,10 @@ +CSS.Proprietary +TYPE: bool +VERSION: 3.0.0 +DEFAULT: false +--DESCRIPTION-- + +

+ Whether or not to allow safe, proprietary CSS values. +

+--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Cache.DefinitionImpl.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Cache.DefinitionImpl.txt new file mode 100644 index 0000000000..c486724c88 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Cache.DefinitionImpl.txt @@ -0,0 +1,14 @@ +Cache.DefinitionImpl +TYPE: string/null +VERSION: 2.0.0 +DEFAULT: 'Serializer' +--DESCRIPTION-- + +This directive defines which method to use when caching definitions, +the complex data-type that makes HTML Purifier tick. Set to null +to disable caching (not recommended, as you will see a definite +performance degradation). + +--ALIASES-- +Core.DefinitionCache +--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Cache.SerializerPath.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Cache.SerializerPath.txt new file mode 100644 index 0000000000..54036507d6 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Cache.SerializerPath.txt @@ -0,0 +1,13 @@ +Cache.SerializerPath +TYPE: string/null +VERSION: 2.0.0 +DEFAULT: NULL +--DESCRIPTION-- + +

+ Absolute path with no trailing slash to store serialized definitions in. + Default is within the + HTML Purifier library inside DefinitionCache/Serializer. This + path must be writable by the webserver. +

+--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.AggressivelyFixLt.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.AggressivelyFixLt.txt new file mode 100644 index 0000000000..568cbf3b32 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.AggressivelyFixLt.txt @@ -0,0 +1,18 @@ +Core.AggressivelyFixLt +TYPE: bool +VERSION: 2.1.0 +DEFAULT: true +--DESCRIPTION-- +

+ This directive enables aggressive pre-filter fixes HTML Purifier can + perform in order to ensure that open angled-brackets do not get killed + during parsing stage. Enabling this will result in two preg_replace_callback + calls and at least two preg_replace calls for every HTML document parsed; + if your users make very well-formed HTML, you can set this directive false. + This has no effect when DirectLex is used. +

+

+ Notice: This directive's default turned from false to true + in HTML Purifier 3.2.0. +

+--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.CollectErrors.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.CollectErrors.txt new file mode 100644 index 0000000000..d7317911fa --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.CollectErrors.txt @@ -0,0 +1,12 @@ +Core.CollectErrors +TYPE: bool +VERSION: 2.0.0 +DEFAULT: false +--DESCRIPTION-- + +Whether or not to collect errors found while filtering the document. This +is a useful way to give feedback to your users. Warning: +Currently this feature is very patchy and experimental, with lots of +possible error messages not yet implemented. It will not cause any +problems, but it may not help your users either. +--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.ColorKeywords.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.ColorKeywords.txt new file mode 100644 index 0000000000..08b381d34c --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.ColorKeywords.txt @@ -0,0 +1,28 @@ +Core.ColorKeywords +TYPE: hash +VERSION: 2.0.0 +--DEFAULT-- +array ( + 'maroon' => '#800000', + 'red' => '#FF0000', + 'orange' => '#FFA500', + 'yellow' => '#FFFF00', + 'olive' => '#808000', + 'purple' => '#800080', + 'fuchsia' => '#FF00FF', + 'white' => '#FFFFFF', + 'lime' => '#00FF00', + 'green' => '#008000', + 'navy' => '#000080', + 'blue' => '#0000FF', + 'aqua' => '#00FFFF', + 'teal' => '#008080', + 'black' => '#000000', + 'silver' => '#C0C0C0', + 'gray' => '#808080', +) +--DESCRIPTION-- + +Lookup array of color names to six digit hexadecimal number corresponding +to color, with preceding hash mark. Used when parsing colors. +--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.ConvertDocumentToFragment.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.ConvertDocumentToFragment.txt new file mode 100644 index 0000000000..64b114fce2 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.ConvertDocumentToFragment.txt @@ -0,0 +1,14 @@ +Core.ConvertDocumentToFragment +TYPE: bool +DEFAULT: true +--DESCRIPTION-- + +This parameter determines whether or not the filter should convert +input that is a full document with html and body tags to a fragment +of just the contents of a body tag. This parameter is simply something +HTML Purifier can do during an edge-case: for most inputs, this +processing is not necessary. + +--ALIASES-- +Core.AcceptFullDocuments +--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.DirectLexLineNumberSyncInterval.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.DirectLexLineNumberSyncInterval.txt new file mode 100644 index 0000000000..36f16e07ea --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.DirectLexLineNumberSyncInterval.txt @@ -0,0 +1,17 @@ +Core.DirectLexLineNumberSyncInterval +TYPE: int +VERSION: 2.0.0 +DEFAULT: 0 +--DESCRIPTION-- + +

+ Specifies the number of tokens the DirectLex line number tracking + implementations should process before attempting to resyncronize the + current line count by manually counting all previous new-lines. When + at 0, this functionality is disabled. Lower values will decrease + performance, and this is only strictly necessary if the counting + algorithm is buggy (in which case you should report it as a bug). + This has no effect when %Core.MaintainLineNumbers is disabled or DirectLex is + not being used. +

+--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.Encoding.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.Encoding.txt new file mode 100644 index 0000000000..8bfb47c3ac --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.Encoding.txt @@ -0,0 +1,15 @@ +Core.Encoding +TYPE: istring +DEFAULT: 'utf-8' +--DESCRIPTION-- +If for some reason you are unable to convert all webpages to UTF-8, you can +use this directive as a stop-gap compatibility change to let HTML Purifier +deal with non UTF-8 input. This technique has notable deficiencies: +absolutely no characters outside of the selected character encoding will be +preserved, not even the ones that have been ampersand escaped (this is due +to a UTF-8 specific feature that automatically resolves all +entities), making it pretty useless for anything except the most I18N-blind +applications, although %Core.EscapeNonASCIICharacters offers fixes this +trouble with another tradeoff. This directive only accepts ISO-8859-1 if +iconv is not enabled. +--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.EscapeInvalidChildren.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.EscapeInvalidChildren.txt new file mode 100644 index 0000000000..4d5b5055cd --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.EscapeInvalidChildren.txt @@ -0,0 +1,10 @@ +Core.EscapeInvalidChildren +TYPE: bool +DEFAULT: false +--DESCRIPTION-- +When true, a child is found that is not allowed in the context of the +parent element will be transformed into text as if it were ASCII. When +false, that element and all internal tags will be dropped, though text will +be preserved. There is no option for dropping the element but preserving +child nodes. +--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.EscapeInvalidTags.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.EscapeInvalidTags.txt new file mode 100644 index 0000000000..a7a5b249bb --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.EscapeInvalidTags.txt @@ -0,0 +1,7 @@ +Core.EscapeInvalidTags +TYPE: bool +DEFAULT: false +--DESCRIPTION-- +When true, invalid tags will be written back to the document as plain text. +Otherwise, they are silently dropped. +--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.EscapeNonASCIICharacters.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.EscapeNonASCIICharacters.txt new file mode 100644 index 0000000000..abb499948a --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.EscapeNonASCIICharacters.txt @@ -0,0 +1,13 @@ +Core.EscapeNonASCIICharacters +TYPE: bool +VERSION: 1.4.0 +DEFAULT: false +--DESCRIPTION-- +This directive overcomes a deficiency in %Core.Encoding by blindly +converting all non-ASCII characters into decimal numeric entities before +converting it to its native encoding. This means that even characters that +can be expressed in the non-UTF-8 encoding will be entity-ized, which can +be a real downer for encodings like Big5. It also assumes that the ASCII +repetoire is available, although this is the case for almost all encodings. +Anyway, use UTF-8! +--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.HiddenElements.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.HiddenElements.txt new file mode 100644 index 0000000000..915391edb7 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.HiddenElements.txt @@ -0,0 +1,19 @@ +Core.HiddenElements +TYPE: lookup +--DEFAULT-- +array ( + 'script' => true, + 'style' => true, +) +--DESCRIPTION-- + +

+ This directive is a lookup array of elements which should have their + contents removed when they are not allowed by the HTML definition. + For example, the contents of a script tag are not + normally shown in a document, so if script tags are to be removed, + their contents should be removed to. This is opposed to a b + tag, which defines some presentational changes but does not hide its + contents. +

+--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.Language.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.Language.txt new file mode 100644 index 0000000000..233fca14f8 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.Language.txt @@ -0,0 +1,10 @@ +Core.Language +TYPE: string +VERSION: 2.0.0 +DEFAULT: 'en' +--DESCRIPTION-- + +ISO 639 language code for localizable things in HTML Purifier to use, +which is mainly error reporting. There is currently only an English (en) +translation, so this directive is currently useless. +--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.LexerImpl.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.LexerImpl.txt new file mode 100644 index 0000000000..8983e2cca9 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.LexerImpl.txt @@ -0,0 +1,34 @@ +Core.LexerImpl +TYPE: mixed/null +VERSION: 2.0.0 +DEFAULT: NULL +--DESCRIPTION-- + +

+ This parameter determines what lexer implementation can be used. The + valid values are: +

+
+
null
+
+ Recommended, the lexer implementation will be auto-detected based on + your PHP-version and configuration. +
+
string lexer identifier
+
+ This is a slim way of manually overridding the implementation. + Currently recognized values are: DOMLex (the default PHP5 +implementation) + and DirectLex (the default PHP4 implementation). Only use this if + you know what you are doing: usually, the auto-detection will + manage things for cases you aren't even aware of. +
+
object lexer instance
+
+ Super-advanced: you can specify your own, custom, implementation that + implements the interface defined by HTMLPurifier_Lexer. + I may remove this option simply because I don't expect anyone + to use it. +
+
+--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.MaintainLineNumbers.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.MaintainLineNumbers.txt new file mode 100644 index 0000000000..eb841a7597 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.MaintainLineNumbers.txt @@ -0,0 +1,16 @@ +Core.MaintainLineNumbers +TYPE: bool/null +VERSION: 2.0.0 +DEFAULT: NULL +--DESCRIPTION-- + +

+ If true, HTML Purifier will add line number information to all tokens. + This is useful when error reporting is turned on, but can result in + significant performance degradation and should not be used when + unnecessary. This directive must be used with the DirectLex lexer, + as the DOMLex lexer does not (yet) support this functionality. + If the value is null, an appropriate value will be selected based + on other configuration. +

+--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.RemoveInvalidImg.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.RemoveInvalidImg.txt new file mode 100644 index 0000000000..4070c2a0de --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.RemoveInvalidImg.txt @@ -0,0 +1,12 @@ +Core.RemoveInvalidImg +TYPE: bool +DEFAULT: true +VERSION: 1.3.0 +--DESCRIPTION-- + +

+ This directive enables pre-emptive URI checking in img + tags, as the attribute validation strategy is not authorized to + remove elements from the document. Revert to pre-1.3.0 behavior by setting to false. +

+--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.RemoveScriptContents.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.RemoveScriptContents.txt new file mode 100644 index 0000000000..a4cd966df8 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Core.RemoveScriptContents.txt @@ -0,0 +1,12 @@ +Core.RemoveScriptContents +TYPE: bool/null +DEFAULT: NULL +VERSION: 2.0.0 +DEPRECATED-VERSION: 2.1.0 +DEPRECATED-USE: Core.HiddenElements +--DESCRIPTION-- +

+ This directive enables HTML Purifier to remove not only script tags + but all of their contents. +

+--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Filter.Custom.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Filter.Custom.txt new file mode 100644 index 0000000000..3db50ef204 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Filter.Custom.txt @@ -0,0 +1,11 @@ +Filter.Custom +TYPE: list +VERSION: 3.1.0 +DEFAULT: array() +--DESCRIPTION-- +

+ This directive can be used to add custom filters; it is nearly the + equivalent of the now deprecated HTMLPurifier->addFilter() + method. Specify an array of concrete implementations. +

+--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Filter.ExtractStyleBlocks.Escaping.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Filter.ExtractStyleBlocks.Escaping.txt new file mode 100644 index 0000000000..16829bcda0 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Filter.ExtractStyleBlocks.Escaping.txt @@ -0,0 +1,14 @@ +Filter.ExtractStyleBlocks.Escaping +TYPE: bool +VERSION: 3.0.0 +DEFAULT: true +ALIASES: Filter.ExtractStyleBlocksEscaping, FilterParam.ExtractStyleBlocksEscaping +--DESCRIPTION-- + +

+ Whether or not to escape the dangerous characters <, > and & + as \3C, \3E and \26, respectively. This is can be safely set to false + if the contents of StyleBlocks will be placed in an external stylesheet, + where there is no risk of it being interpreted as HTML. +

+--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Filter.ExtractStyleBlocks.Scope.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Filter.ExtractStyleBlocks.Scope.txt new file mode 100644 index 0000000000..7f95f54d12 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Filter.ExtractStyleBlocks.Scope.txt @@ -0,0 +1,29 @@ +Filter.ExtractStyleBlocks.Scope +TYPE: string/null +VERSION: 3.0.0 +DEFAULT: NULL +ALIASES: Filter.ExtractStyleBlocksScope, FilterParam.ExtractStyleBlocksScope +--DESCRIPTION-- + +

+ If you would like users to be able to define external stylesheets, but + only allow them to specify CSS declarations for a specific node and + prevent them from fiddling with other elements, use this directive. + It accepts any valid CSS selector, and will prepend this to any + CSS declaration extracted from the document. For example, if this + directive is set to #user-content and a user uses the + selector a:hover, the final selector will be + #user-content a:hover. +

+

+ The comma shorthand may be used; consider the above example, with + #user-content, #user-content2, the final selector will + be #user-content a:hover, #user-content2 a:hover. +

+

+ Warning: It is possible for users to bypass this measure + using a naughty + selector. This is a bug in CSS Tidy 1.3, not HTML + Purifier, and I am working to get it fixed. Until then, HTML Purifier + performs a basic check to prevent this. +

+--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Filter.ExtractStyleBlocks.TidyImpl.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Filter.ExtractStyleBlocks.TidyImpl.txt new file mode 100644 index 0000000000..6c231b2d7f --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Filter.ExtractStyleBlocks.TidyImpl.txt @@ -0,0 +1,16 @@ +Filter.ExtractStyleBlocks.TidyImpl +TYPE: mixed/null +VERSION: 3.1.0 +DEFAULT: NULL +ALIASES: FilterParam.ExtractStyleBlocksTidyImpl +--DESCRIPTION-- +

+ If left NULL, HTML Purifier will attempt to instantiate a csstidy + class to use for internal cleaning. This will usually be good enough. +

+

+ However, for trusted user input, you can set this to false to + disable cleaning. In addition, you can supply your own concrete implementation + of Tidy's interface to use, although I don't know why you'd want to do that. +

+--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Filter.ExtractStyleBlocks.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Filter.ExtractStyleBlocks.txt new file mode 100644 index 0000000000..078d087417 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Filter.ExtractStyleBlocks.txt @@ -0,0 +1,74 @@ +Filter.ExtractStyleBlocks +TYPE: bool +VERSION: 3.1.0 +DEFAULT: false +EXTERNAL: CSSTidy +--DESCRIPTION-- +

+ This directive turns on the style block extraction filter, which removes + style blocks from input HTML, cleans them up with CSSTidy, + and places them in the StyleBlocks context variable, for further + use by you, usually to be placed in an external stylesheet, or a + style block in the head of your document. +

+

+ Sample usage: +

+
';
+?>
+
+
+
+  Filter.ExtractStyleBlocks
+body {color:#F00;} Some text';
+
+    $config = HTMLPurifier_Config::createDefault();
+    $config->set('Filter', 'ExtractStyleBlocks', true);
+    $purifier = new HTMLPurifier($config);
+
+    $html = $purifier->purify($dirty);
+
+    // This implementation writes the stylesheets to the styles/ directory.
+    // You can also echo the styles inside the document, but it's a bit
+    // more difficult to make sure they get interpreted properly by
+    // browsers; try the usual CSS armoring techniques.
+    $styles = $purifier->context->get('StyleBlocks');
+    $dir = 'styles/';
+    if (!is_dir($dir)) mkdir($dir);
+    $hash = sha1($_GET['html']);
+    foreach ($styles as $i => $style) {
+        file_put_contents($name = $dir . $hash . "_$i");
+        echo '';
+    }
+?>
+
+
+  
+ +
+ + +]]>
+

+ Warning: It is possible for a user to mount an + imagecrash attack using this CSS. Counter-measures are difficult; + it is not simply enough to limit the range of CSS lengths (using + relative lengths with many nesting levels allows for large values + to be attained without actually specifying them in the stylesheet), + and the flexible nature of selectors makes it difficult to selectively + disable lengths on image tags (HTML Purifier, however, does disable + CSS width and height in inline styling). There are probably two effective + counter measures: an explicit width and height set to auto in all + images in your document (unlikely) or the disabling of width and + height (somewhat reasonable). Whether or not these measures should be + used is left to the reader. +

+--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Filter.YouTube.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Filter.YouTube.txt new file mode 100644 index 0000000000..7fa6536b2c --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Filter.YouTube.txt @@ -0,0 +1,11 @@ +Filter.YouTube +TYPE: bool +VERSION: 3.1.0 +DEFAULT: false +--DESCRIPTION-- +

+ This directive enables YouTube video embedding in HTML Purifier. Check + this document + on embedding videos for more information on what this filter does. +

+--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.Allowed.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.Allowed.txt new file mode 100644 index 0000000000..3e231d2d16 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.Allowed.txt @@ -0,0 +1,22 @@ +HTML.Allowed +TYPE: itext/null +VERSION: 2.0.0 +DEFAULT: NULL +--DESCRIPTION-- + +

+ This is a convenience directive that rolls the functionality of + %HTML.AllowedElements and %HTML.AllowedAttributes into one directive. + Specify elements and attributes that are allowed using: + element1[attr1|attr2],element2.... You can also use + newlines instead of commas to separate elements. +

+

+ Warning: + All of the constraints on the component directives are still enforced. + The syntax is a subset of TinyMCE's valid_elements + whitelist: directly copy-pasting it here will probably result in + broken whitelists. If %HTML.AllowedElements or %HTML.AllowedAttributes + are set, this directive has no effect. +

+--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.AllowedAttributes.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.AllowedAttributes.txt new file mode 100644 index 0000000000..fcf093f17d --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.AllowedAttributes.txt @@ -0,0 +1,19 @@ +HTML.AllowedAttributes +TYPE: lookup/null +VERSION: 1.3.0 +DEFAULT: NULL +--DESCRIPTION-- + +

+ If HTML Purifier's attribute set is unsatisfactory, overload it! + The syntax is "tag.attr" or "*.attr" for the global attributes + (style, id, class, dir, lang, xml:lang). +

+

+ Warning: If another directive conflicts with the + elements here, that directive will win and override. For + example, %HTML.EnableAttrID will take precedence over *.id in this + directive. You must set that directive to true before you can use + IDs at all. +

+--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.AllowedElements.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.AllowedElements.txt new file mode 100644 index 0000000000..888d558196 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.AllowedElements.txt @@ -0,0 +1,18 @@ +HTML.AllowedElements +TYPE: lookup/null +VERSION: 1.3.0 +DEFAULT: NULL +--DESCRIPTION-- +

+ If HTML Purifier's tag set is unsatisfactory for your needs, you + can overload it with your own list of tags to allow. Note that this + method is subtractive: it does its job by taking away from HTML Purifier + usual feature set, so you cannot add a tag that HTML Purifier never + supported in the first place (like embed, form or head). If you + change this, you probably also want to change %HTML.AllowedAttributes. +

+

+ Warning: If another directive conflicts with the + elements here, that directive will win and override. +

+--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.AllowedModules.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.AllowedModules.txt new file mode 100644 index 0000000000..5a59a55c08 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.AllowedModules.txt @@ -0,0 +1,20 @@ +HTML.AllowedModules +TYPE: lookup/null +VERSION: 2.0.0 +DEFAULT: NULL +--DESCRIPTION-- + +

+ A doctype comes with a set of usual modules to use. Without having + to mucking about with the doctypes, you can quickly activate or + disable these modules by specifying which modules you wish to allow + with this directive. This is most useful for unit testing specific + modules, although end users may find it useful for their own ends. +

+

+ If you specify a module that does not exist, the manager will silently + fail to use it, so be careful! User-defined modules are not affected + by this directive. Modules defined in %HTML.CoreModules are not + affected by this directive. +

+--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.Attr.Name.UseCDATA.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.Attr.Name.UseCDATA.txt new file mode 100644 index 0000000000..151fb7b826 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.Attr.Name.UseCDATA.txt @@ -0,0 +1,11 @@ +HTML.Attr.Name.UseCDATA +TYPE: bool +DEFAULT: false +VERSION: 4.0.0 +--DESCRIPTION-- +The W3C specification DTD defines the name attribute to be CDATA, not ID, due +to limitations of DTD. In certain documents, this relaxed behavior is desired, +whether it is to specify duplicate names, or to specify names that would be +illegal IDs (for example, names that begin with a digit.) Set this configuration +directive to true to use the relaxed parsing rules. +--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.BlockWrapper.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.BlockWrapper.txt new file mode 100644 index 0000000000..45ae469ec9 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.BlockWrapper.txt @@ -0,0 +1,18 @@ +HTML.BlockWrapper +TYPE: string +VERSION: 1.3.0 +DEFAULT: 'p' +--DESCRIPTION-- + +

+ String name of element to wrap inline elements that are inside a block + context. This only occurs in the children of blockquote in strict mode. +

+

+ Example: by default value, + <blockquote>Foo</blockquote> would become + <blockquote><p>Foo</p></blockquote>. + The <p> tags can be replaced with whatever you desire, + as long as it is a block level element. +

+--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.CoreModules.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.CoreModules.txt new file mode 100644 index 0000000000..5246188795 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.CoreModules.txt @@ -0,0 +1,23 @@ +HTML.CoreModules +TYPE: lookup +VERSION: 2.0.0 +--DEFAULT-- +array ( + 'Structure' => true, + 'Text' => true, + 'Hypertext' => true, + 'List' => true, + 'NonXMLCommonAttributes' => true, + 'XMLCommonAttributes' => true, + 'CommonAttributes' => true, +) +--DESCRIPTION-- + +

+ Certain modularized doctypes (XHTML, namely), have certain modules + that must be included for the doctype to be an conforming document + type: put those modules here. By default, XHTML's core modules + are used. You can set this to a blank array to disable core module + protection, but this is not recommended. +

+--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.CustomDoctype.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.CustomDoctype.txt new file mode 100644 index 0000000000..a64e3d7c36 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.CustomDoctype.txt @@ -0,0 +1,9 @@ +HTML.CustomDoctype +TYPE: string/null +VERSION: 2.0.1 +DEFAULT: NULL +--DESCRIPTION-- + +A custom doctype for power-users who defined there own document +type. This directive only applies when %HTML.Doctype is blank. +--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.DefinitionID.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.DefinitionID.txt new file mode 100644 index 0000000000..103db754a2 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.DefinitionID.txt @@ -0,0 +1,33 @@ +HTML.DefinitionID +TYPE: string/null +DEFAULT: NULL +VERSION: 2.0.0 +--DESCRIPTION-- + +

+ Unique identifier for a custom-built HTML definition. If you edit + the raw version of the HTMLDefinition, introducing changes that the + configuration object does not reflect, you must specify this variable. + If you change your custom edits, you should change this directive, or + clear your cache. Example: +

+
+$config = HTMLPurifier_Config::createDefault();
+$config->set('HTML', 'DefinitionID', '1');
+$def = $config->getHTMLDefinition();
+$def->addAttribute('a', 'tabindex', 'Number');
+
+

+ In the above example, the configuration is still at the defaults, but + using the advanced API, an extra attribute has been added. The + configuration object normally has no way of knowing that this change + has taken place, so it needs an extra directive: %HTML.DefinitionID. + If someone else attempts to use the default configuration, these two + pieces of code will not clobber each other in the cache, since one has + an extra directive attached to it. +

+

+ You must specify a value to this directive to use the + advanced API features. +

+--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.DefinitionRev.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.DefinitionRev.txt new file mode 100644 index 0000000000..229ae0267a --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.DefinitionRev.txt @@ -0,0 +1,16 @@ +HTML.DefinitionRev +TYPE: int +VERSION: 2.0.0 +DEFAULT: 1 +--DESCRIPTION-- + +

+ Revision identifier for your custom definition specified in + %HTML.DefinitionID. This serves the same purpose: uniquely identifying + your custom definition, but this one does so in a chronological + context: revision 3 is more up-to-date then revision 2. Thus, when + this gets incremented, the cache handling is smart enough to clean + up any older revisions of your definition as well as flush the + cache. +

+--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.Doctype.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.Doctype.txt new file mode 100644 index 0000000000..9dab497f2f --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.Doctype.txt @@ -0,0 +1,11 @@ +HTML.Doctype +TYPE: string/null +DEFAULT: NULL +--DESCRIPTION-- +Doctype to use during filtering. Technically speaking this is not actually +a doctype (as it does not identify a corresponding DTD), but we are using +this name for sake of simplicity. When non-blank, this will override any +older directives like %HTML.XHTML or %HTML.Strict. +--ALLOWED-- +'HTML 4.01 Transitional', 'HTML 4.01 Strict', 'XHTML 1.0 Transitional', 'XHTML 1.0 Strict', 'XHTML 1.1' +--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.ForbiddenAttributes.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.ForbiddenAttributes.txt new file mode 100644 index 0000000000..57358f9bad --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.ForbiddenAttributes.txt @@ -0,0 +1,21 @@ +HTML.ForbiddenAttributes +TYPE: lookup +VERSION: 3.1.0 +DEFAULT: array() +--DESCRIPTION-- +

+ While this directive is similar to %HTML.AllowedAttributes, for + forwards-compatibility with XML, this attribute has a different syntax. Instead of + tag.attr, use tag@attr. To disallow href + attributes in a tags, set this directive to + a@href. You can also disallow an attribute globally with + attr or *@attr (either syntax is fine; the latter + is provided for consistency with %HTML.AllowedAttributes). +

+

+ Warning: This directive complements %HTML.ForbiddenElements, + accordingly, check + out that directive for a discussion of why you + should think twice before using this directive. +

+--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.ForbiddenElements.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.ForbiddenElements.txt new file mode 100644 index 0000000000..93a53e14fb --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.ForbiddenElements.txt @@ -0,0 +1,20 @@ +HTML.ForbiddenElements +TYPE: lookup +VERSION: 3.1.0 +DEFAULT: array() +--DESCRIPTION-- +

+ This was, perhaps, the most requested feature ever in HTML + Purifier. Please don't abuse it! This is the logical inverse of + %HTML.AllowedElements, and it will override that directive, or any + other directive. +

+

+ If possible, %HTML.Allowed is recommended over this directive, because it + can sometimes be difficult to tell whether or not you've forbidden all of + the behavior you would like to disallow. If you forbid img + with the expectation of preventing images on your site, you'll be in for + a nasty surprise when people start using the background-image + CSS property. +

+--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.MaxImgLength.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.MaxImgLength.txt new file mode 100644 index 0000000000..e424c386ec --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.MaxImgLength.txt @@ -0,0 +1,14 @@ +HTML.MaxImgLength +TYPE: int/null +DEFAULT: 1200 +VERSION: 3.1.1 +--DESCRIPTION-- +

+ This directive controls the maximum number of pixels in the width and + height attributes in img tags. This is + in place to prevent imagecrash attacks, disable with null at your own risk. + This directive is similar to %CSS.MaxImgLength, and both should be + concurrently edited, although there are + subtle differences in the input format (the HTML max is an integer). +

+--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.Parent.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.Parent.txt new file mode 100644 index 0000000000..62e8e160c7 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.Parent.txt @@ -0,0 +1,12 @@ +HTML.Parent +TYPE: string +VERSION: 1.3.0 +DEFAULT: 'div' +--DESCRIPTION-- + +

+ String name of element that HTML fragment passed to library will be + inserted in. An interesting variation would be using span as the + parent element, meaning that only inline tags would be allowed. +

+--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.Proprietary.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.Proprietary.txt new file mode 100644 index 0000000000..dfb720496d --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.Proprietary.txt @@ -0,0 +1,12 @@ +HTML.Proprietary +TYPE: bool +VERSION: 3.1.0 +DEFAULT: false +--DESCRIPTION-- +

+ Whether or not to allow proprietary elements and attributes in your + documents, as per HTMLPurifier_HTMLModule_Proprietary. + Warning: This can cause your documents to stop + validating! +

+--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.SafeEmbed.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.SafeEmbed.txt new file mode 100644 index 0000000000..f635a68548 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.SafeEmbed.txt @@ -0,0 +1,14 @@ +HTML.SafeEmbed +TYPE: bool +VERSION: 3.1.1 +DEFAULT: false +--DESCRIPTION-- +

+ Whether or not to permit embed tags in documents, with a number of extra + security features added to prevent script execution. This is similar to + what websites like MySpace do to embed tags. Embed is a proprietary + element and will cause your website to stop validating. You probably want + to enable this with %HTML.SafeObject. + Highly experimental. +

+--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.SafeObject.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.SafeObject.txt new file mode 100644 index 0000000000..32967b88fb --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.SafeObject.txt @@ -0,0 +1,14 @@ +HTML.SafeObject +TYPE: bool +VERSION: 3.1.1 +DEFAULT: false +--DESCRIPTION-- +

+ Whether or not to permit object tags in documents, with a number of extra + security features added to prevent script execution. This is similar to + what websites like MySpace do to object tags. You may also want to + enable %HTML.SafeEmbed for maximum interoperability with Internet Explorer, + although embed tags will cause your website to stop validating. + Highly experimental. +

+--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.Strict.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.Strict.txt new file mode 100644 index 0000000000..a8b1de56be --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.Strict.txt @@ -0,0 +1,9 @@ +HTML.Strict +TYPE: bool +VERSION: 1.3.0 +DEFAULT: false +DEPRECATED-VERSION: 1.7.0 +DEPRECATED-USE: HTML.Doctype +--DESCRIPTION-- +Determines whether or not to use Transitional (loose) or Strict rulesets. +--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.TidyAdd.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.TidyAdd.txt new file mode 100644 index 0000000000..b4c271b7fa --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.TidyAdd.txt @@ -0,0 +1,8 @@ +HTML.TidyAdd +TYPE: lookup +VERSION: 2.0.0 +DEFAULT: array() +--DESCRIPTION-- + +Fixes to add to the default set of Tidy fixes as per your level. +--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.TidyLevel.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.TidyLevel.txt new file mode 100644 index 0000000000..4186ccd0d1 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.TidyLevel.txt @@ -0,0 +1,24 @@ +HTML.TidyLevel +TYPE: string +VERSION: 2.0.0 +DEFAULT: 'medium' +--DESCRIPTION-- + +

General level of cleanliness the Tidy module should enforce. +There are four allowed values:

+
+
none
+
No extra tidying should be done
+
light
+
Only fix elements that would be discarded otherwise due to + lack of support in doctype
+
medium
+
Enforce best practices
+
heavy
+
Transform all deprecated elements and attributes to standards + compliant equivalents
+
+ +--ALLOWED-- +'none', 'light', 'medium', 'heavy' +--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.TidyRemove.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.TidyRemove.txt new file mode 100644 index 0000000000..996762bd1d --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.TidyRemove.txt @@ -0,0 +1,8 @@ +HTML.TidyRemove +TYPE: lookup +VERSION: 2.0.0 +DEFAULT: array() +--DESCRIPTION-- + +Fixes to remove from the default set of Tidy fixes as per your level. +--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.Trusted.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.Trusted.txt new file mode 100644 index 0000000000..89133b1a38 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.Trusted.txt @@ -0,0 +1,8 @@ +HTML.Trusted +TYPE: bool +VERSION: 2.0.0 +DEFAULT: false +--DESCRIPTION-- +Indicates whether or not the user input is trusted or not. If the input is +trusted, a more expansive set of allowed tags and attributes will be used. +--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.XHTML.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.XHTML.txt new file mode 100644 index 0000000000..2a47e384f4 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/HTML.XHTML.txt @@ -0,0 +1,11 @@ +HTML.XHTML +TYPE: bool +DEFAULT: true +VERSION: 1.1.0 +DEPRECATED-VERSION: 1.7.0 +DEPRECATED-USE: HTML.Doctype +--DESCRIPTION-- +Determines whether or not output is XHTML 1.0 or HTML 4.01 flavor. +--ALIASES-- +Core.XHTML +--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Output.CommentScriptContents.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Output.CommentScriptContents.txt new file mode 100644 index 0000000000..08921fde70 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Output.CommentScriptContents.txt @@ -0,0 +1,10 @@ +Output.CommentScriptContents +TYPE: bool +VERSION: 2.0.0 +DEFAULT: true +--DESCRIPTION-- +Determines whether or not HTML Purifier should attempt to fix up the +contents of script tags for legacy browsers with comments. +--ALIASES-- +Core.CommentScriptContents +--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Output.Newline.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Output.Newline.txt new file mode 100644 index 0000000000..79f8ad82cf --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Output.Newline.txt @@ -0,0 +1,13 @@ +Output.Newline +TYPE: string/null +VERSION: 2.0.1 +DEFAULT: NULL +--DESCRIPTION-- + +

+ Newline string to format final output with. If left null, HTML Purifier + will auto-detect the default newline type of the system and use that; + you can manually override it here. Remember, \r\n is Windows, \r + is Mac, and \n is Unix. +

+--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Output.SortAttr.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Output.SortAttr.txt new file mode 100644 index 0000000000..232b02362a --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Output.SortAttr.txt @@ -0,0 +1,14 @@ +Output.SortAttr +TYPE: bool +VERSION: 3.2.0 +DEFAULT: false +--DESCRIPTION-- +

+ If true, HTML Purifier will sort attributes by name before writing them back + to the document, converting a tag like: <el b="" a="" c="" /> + to <el a="" b="" c="" />. This is a workaround for + a bug in FCKeditor which causes it to swap attributes order, adding noise + to text diffs. If you're not seeing this bug, chances are, you don't need + this directive. +

+--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Output.TidyFormat.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Output.TidyFormat.txt new file mode 100644 index 0000000000..06bab00a0a --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Output.TidyFormat.txt @@ -0,0 +1,25 @@ +Output.TidyFormat +TYPE: bool +VERSION: 1.1.1 +DEFAULT: false +--DESCRIPTION-- +

+ Determines whether or not to run Tidy on the final output for pretty + formatting reasons, such as indentation and wrap. +

+

+ This can greatly improve readability for editors who are hand-editing + the HTML, but is by no means necessary as HTML Purifier has already + fixed all major errors the HTML may have had. Tidy is a non-default + extension, and this directive will silently fail if Tidy is not + available. +

+

+ If you are looking to make the overall look of your page's source + better, I recommend running Tidy on the entire page rather than just + user-content (after all, the indentation relative to the containing + blocks will be incorrect). +

+--ALIASES-- +Core.TidyFormat +--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Test.ForceNoIconv.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Test.ForceNoIconv.txt new file mode 100644 index 0000000000..071bc0295d --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/Test.ForceNoIconv.txt @@ -0,0 +1,7 @@ +Test.ForceNoIconv +TYPE: bool +DEFAULT: false +--DESCRIPTION-- +When set to true, HTMLPurifier_Encoder will act as if iconv does not exist +and use only pure PHP implementations. +--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.AllowedSchemes.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.AllowedSchemes.txt new file mode 100644 index 0000000000..98fdfe9222 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.AllowedSchemes.txt @@ -0,0 +1,15 @@ +URI.AllowedSchemes +TYPE: lookup +--DEFAULT-- +array ( + 'http' => true, + 'https' => true, + 'mailto' => true, + 'ftp' => true, + 'nntp' => true, + 'news' => true, +) +--DESCRIPTION-- +Whitelist that defines the schemes that a URI is allowed to have. This +prevents XSS attacks from using pseudo-schemes like javascript or mocha. +--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.Base.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.Base.txt new file mode 100644 index 0000000000..876f0680cf --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.Base.txt @@ -0,0 +1,17 @@ +URI.Base +TYPE: string/null +VERSION: 2.1.0 +DEFAULT: NULL +--DESCRIPTION-- + +

+ The base URI is the URI of the document this purified HTML will be + inserted into. This information is important if HTML Purifier needs + to calculate absolute URIs from relative URIs, such as when %URI.MakeAbsolute + is on. You may use a non-absolute URI for this value, but behavior + may vary (%URI.MakeAbsolute deals nicely with both absolute and + relative paths, but forwards-compatibility is not guaranteed). + Warning: If set, the scheme on this URI + overrides the one specified by %URI.DefaultScheme. +

+--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.DefaultScheme.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.DefaultScheme.txt new file mode 100644 index 0000000000..728e378cbe --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.DefaultScheme.txt @@ -0,0 +1,10 @@ +URI.DefaultScheme +TYPE: string +DEFAULT: 'http' +--DESCRIPTION-- + +

+ Defines through what scheme the output will be served, in order to + select the proper object validator when no scheme information is present. +

+--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.DefinitionID.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.DefinitionID.txt new file mode 100644 index 0000000000..f05312ba86 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.DefinitionID.txt @@ -0,0 +1,11 @@ +URI.DefinitionID +TYPE: string/null +VERSION: 2.1.0 +DEFAULT: NULL +--DESCRIPTION-- + +

+ Unique identifier for a custom-built URI definition. If you want + to add custom URIFilters, you must specify this value. +

+--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.DefinitionRev.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.DefinitionRev.txt new file mode 100644 index 0000000000..80cfea93f7 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.DefinitionRev.txt @@ -0,0 +1,11 @@ +URI.DefinitionRev +TYPE: int +VERSION: 2.1.0 +DEFAULT: 1 +--DESCRIPTION-- + +

+ Revision identifier for your custom definition. See + %HTML.DefinitionRev for details. +

+--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.Disable.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.Disable.txt new file mode 100644 index 0000000000..71ce025a2d --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.Disable.txt @@ -0,0 +1,14 @@ +URI.Disable +TYPE: bool +VERSION: 1.3.0 +DEFAULT: false +--DESCRIPTION-- + +

+ Disables all URIs in all forms. Not sure why you'd want to do that + (after all, the Internet's founded on the notion of a hyperlink). +

+ +--ALIASES-- +Attr.DisableURI +--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.DisableExternal.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.DisableExternal.txt new file mode 100644 index 0000000000..13c122c8ce --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.DisableExternal.txt @@ -0,0 +1,11 @@ +URI.DisableExternal +TYPE: bool +VERSION: 1.2.0 +DEFAULT: false +--DESCRIPTION-- +Disables links to external websites. This is a highly effective anti-spam +and anti-pagerank-leech measure, but comes at a hefty price: nolinks or +images outside of your domain will be allowed. Non-linkified URIs will +still be preserved. If you want to be able to link to subdomains or use +absolute URIs, specify %URI.Host for your website. +--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.DisableExternalResources.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.DisableExternalResources.txt new file mode 100644 index 0000000000..abcc1efd61 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.DisableExternalResources.txt @@ -0,0 +1,13 @@ +URI.DisableExternalResources +TYPE: bool +VERSION: 1.3.0 +DEFAULT: false +--DESCRIPTION-- +Disables the embedding of external resources, preventing users from +embedding things like images from other hosts. This prevents access +tracking (good for email viewers), bandwidth leeching, cross-site request +forging, goatse.cx posting, and other nasties, but also results in a loss +of end-user functionality (they can't directly post a pic they posted from +Flickr anymore). Use it if you don't have a robust user-content moderation +team. +--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.DisableResources.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.DisableResources.txt new file mode 100644 index 0000000000..51e6ea91f7 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.DisableResources.txt @@ -0,0 +1,12 @@ +URI.DisableResources +TYPE: bool +VERSION: 1.3.0 +DEFAULT: false +--DESCRIPTION-- + +

+ Disables embedding resources, essentially meaning no pictures. You can + still link to them though. See %URI.DisableExternalResources for why + this might be a good idea. +

+--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.Host.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.Host.txt new file mode 100644 index 0000000000..ee83b121de --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.Host.txt @@ -0,0 +1,19 @@ +URI.Host +TYPE: string/null +VERSION: 1.2.0 +DEFAULT: NULL +--DESCRIPTION-- + +

+ Defines the domain name of the server, so we can determine whether or + an absolute URI is from your website or not. Not strictly necessary, + as users should be using relative URIs to reference resources on your + website. It will, however, let you use absolute URIs to link to + subdomains of the domain you post here: i.e. example.com will allow + sub.example.com. However, higher up domains will still be excluded: + if you set %URI.Host to sub.example.com, example.com will be blocked. + Note: This directive overrides %URI.Base because + a given page may be on a sub-domain, but you wish HTML Purifier to be + more relaxed and allow some of the parent domains too. +

+--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.HostBlacklist.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.HostBlacklist.txt new file mode 100644 index 0000000000..0b6df7625d --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.HostBlacklist.txt @@ -0,0 +1,9 @@ +URI.HostBlacklist +TYPE: list +VERSION: 1.3.0 +DEFAULT: array() +--DESCRIPTION-- +List of strings that are forbidden in the host of any URI. Use it to kill +domain names of spam, etc. Note that it will catch anything in the domain, +so moo.com will catch moo.com.example.com. +--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.MakeAbsolute.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.MakeAbsolute.txt new file mode 100644 index 0000000000..4214900a59 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.MakeAbsolute.txt @@ -0,0 +1,13 @@ +URI.MakeAbsolute +TYPE: bool +VERSION: 2.1.0 +DEFAULT: false +--DESCRIPTION-- + +

+ Converts all URIs into absolute forms. This is useful when the HTML + being filtered assumes a specific base path, but will actually be + viewed in a different context (and setting an alternate base URI is + not possible). %URI.Base must be set for this directive to work. +

+--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.Munge.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.Munge.txt new file mode 100644 index 0000000000..58c81dcc44 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.Munge.txt @@ -0,0 +1,83 @@ +URI.Munge +TYPE: string/null +VERSION: 1.3.0 +DEFAULT: NULL +--DESCRIPTION-- + +

+ Munges all browsable (usually http, https and ftp) + absolute URIs into another URI, usually a URI redirection service. + This directive accepts a URI, formatted with a %s where + the url-encoded original URI should be inserted (sample: + http://www.google.com/url?q=%s). +

+

+ Uses for this directive: +

+
    +
  • + Prevent PageRank leaks, while being fairly transparent + to users (you may also want to add some client side JavaScript to + override the text in the statusbar). Notice: + Many security experts believe that this form of protection does not deter spam-bots. +
  • +
  • + Redirect users to a splash page telling them they are leaving your + website. While this is poor usability practice, it is often mandated + in corporate environments. +
  • +
+

+ Prior to HTML Purifier 3.1.1, this directive also enabled the munging + of browsable external resources, which could break things if your redirection + script was a splash page or used meta tags. To revert to + previous behavior, please use %URI.MungeResources. +

+

+ You may want to also use %URI.MungeSecretKey along with this directive + in order to enforce what URIs your redirector script allows. Open + redirector scripts can be a security risk and negatively affect the + reputation of your domain name. +

+

+ Starting with HTML Purifier 3.1.1, there is also these substitutions: +

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
KeyDescriptionExample <a href="">
%r1 - The URI embeds a resource
(blank) - The URI is merely a link
%nThe name of the tag this URI came froma
%mThe name of the attribute this URI came fromhref
%pThe name of the CSS property this URI came from, or blank if irrelevant
+

+ Admittedly, these letters are somewhat arbitrary; the only stipulation + was that they couldn't be a through f. r is for resource (I would have preferred + e, but you take what you can get), n is for name, m + was picked because it came after n (and I couldn't use a), p is for + property. +

+--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.MungeResources.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.MungeResources.txt new file mode 100644 index 0000000000..6fce0fdc37 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.MungeResources.txt @@ -0,0 +1,17 @@ +URI.MungeResources +TYPE: bool +VERSION: 3.1.1 +DEFAULT: false +--DESCRIPTION-- +

+ If true, any URI munging directives like %URI.Munge + will also apply to embedded resources, such as <img src="">. + Be careful enabling this directive if you have a redirector script + that does not use the Location HTTP header; all of your images + and other embedded resources will break. +

+

+ Warning: It is strongly advised you use this in conjunction + %URI.MungeSecretKey to mitigate the security risk of an open redirector. +

+--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.MungeSecretKey.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.MungeSecretKey.txt new file mode 100644 index 0000000000..0d00f62ea8 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.MungeSecretKey.txt @@ -0,0 +1,30 @@ +URI.MungeSecretKey +TYPE: string/null +VERSION: 3.1.1 +DEFAULT: NULL +--DESCRIPTION-- +

+ This directive enables secure checksum generation along with %URI.Munge. + It should be set to a secure key that is not shared with anyone else. + The checksum can be placed in the URI using %t. Use of this checksum + affords an additional level of protection by allowing a redirector + to check if a URI has passed through HTML Purifier with this line: +

+ +
$checksum === sha1($secret_key . ':' . $url)
+ +

+ If the output is TRUE, the redirector script should accept the URI. +

+ +

+ Please note that it would still be possible for an attacker to procure + secure hashes en-mass by abusing your website's Preview feature or the + like, but this service affords an additional level of protection + that should be combined with website blacklisting. +

+ +

+ Remember this has no effect if %URI.Munge is not on. +

+--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.OverrideAllowedSchemes.txt b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.OverrideAllowedSchemes.txt new file mode 100644 index 0000000000..23331a4e79 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/URI.OverrideAllowedSchemes.txt @@ -0,0 +1,9 @@ +URI.OverrideAllowedSchemes +TYPE: bool +DEFAULT: true +--DESCRIPTION-- +If this is set to true (which it is by default), you can override +%URI.AllowedSchemes by simply registering a HTMLPurifier_URIScheme to the +registry. If false, you will also have to update that directive in order +to add more schemes. +--# vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/info.ini b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/info.ini new file mode 100644 index 0000000000..5de4505e1b --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ConfigSchema/schema/info.ini @@ -0,0 +1,3 @@ +name = "HTML Purifier" + +; vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ContentSets.php b/extlib/HTMLPurifier/HTMLPurifier/ContentSets.php new file mode 100644 index 0000000000..3b6e96f5f5 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ContentSets.php @@ -0,0 +1,155 @@ + true) indexed by name. + * @note This is in HTMLPurifier_HTMLDefinition->info_content_sets + */ + public $lookup = array(); + + /** + * Synchronized list of defined content sets (keys of info) + */ + protected $keys = array(); + /** + * Synchronized list of defined content values (values of info) + */ + protected $values = array(); + + /** + * Merges in module's content sets, expands identifiers in the content + * sets and populates the keys, values and lookup member variables. + * @param $modules List of HTMLPurifier_HTMLModule + */ + public function __construct($modules) { + if (!is_array($modules)) $modules = array($modules); + // populate content_sets based on module hints + // sorry, no way of overloading + foreach ($modules as $module_i => $module) { + foreach ($module->content_sets as $key => $value) { + $temp = $this->convertToLookup($value); + if (isset($this->lookup[$key])) { + // add it into the existing content set + $this->lookup[$key] = array_merge($this->lookup[$key], $temp); + } else { + $this->lookup[$key] = $temp; + } + } + } + $old_lookup = false; + while ($old_lookup !== $this->lookup) { + $old_lookup = $this->lookup; + foreach ($this->lookup as $i => $set) { + $add = array(); + foreach ($set as $element => $x) { + if (isset($this->lookup[$element])) { + $add += $this->lookup[$element]; + unset($this->lookup[$i][$element]); + } + } + $this->lookup[$i] += $add; + } + } + + foreach ($this->lookup as $key => $lookup) { + $this->info[$key] = implode(' | ', array_keys($lookup)); + } + $this->keys = array_keys($this->info); + $this->values = array_values($this->info); + } + + /** + * Accepts a definition; generates and assigns a ChildDef for it + * @param $def HTMLPurifier_ElementDef reference + * @param $module Module that defined the ElementDef + */ + public function generateChildDef(&$def, $module) { + if (!empty($def->child)) return; // already done! + $content_model = $def->content_model; + if (is_string($content_model)) { + // Assume that $this->keys is alphanumeric + $def->content_model = preg_replace_callback( + '/\b(' . implode('|', $this->keys) . ')\b/', + array($this, 'generateChildDefCallback'), + $content_model + ); + //$def->content_model = str_replace( + // $this->keys, $this->values, $content_model); + } + $def->child = $this->getChildDef($def, $module); + } + + public function generateChildDefCallback($matches) { + return $this->info[$matches[0]]; + } + + /** + * Instantiates a ChildDef based on content_model and content_model_type + * member variables in HTMLPurifier_ElementDef + * @note This will also defer to modules for custom HTMLPurifier_ChildDef + * subclasses that need content set expansion + * @param $def HTMLPurifier_ElementDef to have ChildDef extracted + * @return HTMLPurifier_ChildDef corresponding to ElementDef + */ + public function getChildDef($def, $module) { + $value = $def->content_model; + if (is_object($value)) { + trigger_error( + 'Literal object child definitions should be stored in '. + 'ElementDef->child not ElementDef->content_model', + E_USER_NOTICE + ); + return $value; + } + switch ($def->content_model_type) { + case 'required': + return new HTMLPurifier_ChildDef_Required($value); + case 'optional': + return new HTMLPurifier_ChildDef_Optional($value); + case 'empty': + return new HTMLPurifier_ChildDef_Empty(); + case 'custom': + return new HTMLPurifier_ChildDef_Custom($value); + } + // defer to its module + $return = false; + if ($module->defines_child_def) { // save a func call + $return = $module->getChildDef($def); + } + if ($return !== false) return $return; + // error-out + trigger_error( + 'Could not determine which ChildDef class to instantiate', + E_USER_ERROR + ); + return false; + } + + /** + * Converts a string list of elements separated by pipes into + * a lookup array. + * @param $string List of elements + * @return Lookup array of elements + */ + protected function convertToLookup($string) { + $array = explode('|', str_replace(' ', '', $string)); + $ret = array(); + foreach ($array as $i => $k) { + $ret[$k] = true; + } + return $ret; + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/Context.php b/extlib/HTMLPurifier/HTMLPurifier/Context.php new file mode 100644 index 0000000000..9ddf0c5476 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/Context.php @@ -0,0 +1,82 @@ +_storage[$name])) { + trigger_error("Name $name produces collision, cannot re-register", + E_USER_ERROR); + return; + } + $this->_storage[$name] =& $ref; + } + + /** + * Retrieves a variable reference from the context. + * @param $name String name + * @param $ignore_error Boolean whether or not to ignore error + */ + public function &get($name, $ignore_error = false) { + if (!isset($this->_storage[$name])) { + if (!$ignore_error) { + trigger_error("Attempted to retrieve non-existent variable $name", + E_USER_ERROR); + } + $var = null; // so we can return by reference + return $var; + } + return $this->_storage[$name]; + } + + /** + * Destorys a variable in the context. + * @param $name String name + */ + public function destroy($name) { + if (!isset($this->_storage[$name])) { + trigger_error("Attempted to destroy non-existent variable $name", + E_USER_ERROR); + return; + } + unset($this->_storage[$name]); + } + + /** + * Checks whether or not the variable exists. + * @param $name String name + */ + public function exists($name) { + return isset($this->_storage[$name]); + } + + /** + * Loads a series of variables from an associative array + * @param $context_array Assoc array of variables to load + */ + public function loadArray($context_array) { + foreach ($context_array as $key => $discard) { + $this->register($key, $context_array[$key]); + } + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/Definition.php b/extlib/HTMLPurifier/HTMLPurifier/Definition.php new file mode 100644 index 0000000000..a7408c9749 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/Definition.php @@ -0,0 +1,39 @@ +setup) return; + $this->setup = true; + $this->doSetup($config); + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/DefinitionCache.php b/extlib/HTMLPurifier/HTMLPurifier/DefinitionCache.php new file mode 100644 index 0000000000..c6e1e388c6 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/DefinitionCache.php @@ -0,0 +1,108 @@ +type = $type; + } + + /** + * Generates a unique identifier for a particular configuration + * @param Instance of HTMLPurifier_Config + */ + public function generateKey($config) { + return $config->version . ',' . // possibly replace with function calls + $config->getBatchSerial($this->type) . ',' . + $config->get($this->type . '.DefinitionRev'); + } + + /** + * Tests whether or not a key is old with respect to the configuration's + * version and revision number. + * @param $key Key to test + * @param $config Instance of HTMLPurifier_Config to test against + */ + public function isOld($key, $config) { + if (substr_count($key, ',') < 2) return true; + list($version, $hash, $revision) = explode(',', $key, 3); + $compare = version_compare($version, $config->version); + // version mismatch, is always old + if ($compare != 0) return true; + // versions match, ids match, check revision number + if ( + $hash == $config->getBatchSerial($this->type) && + $revision < $config->get($this->type . '.DefinitionRev') + ) return true; + return false; + } + + /** + * Checks if a definition's type jives with the cache's type + * @note Throws an error on failure + * @param $def Definition object to check + * @return Boolean true if good, false if not + */ + public function checkDefType($def) { + if ($def->type !== $this->type) { + trigger_error("Cannot use definition of type {$def->type} in cache for {$this->type}"); + return false; + } + return true; + } + + /** + * Adds a definition object to the cache + */ + abstract public function add($def, $config); + + /** + * Unconditionally saves a definition object to the cache + */ + abstract public function set($def, $config); + + /** + * Replace an object in the cache + */ + abstract public function replace($def, $config); + + /** + * Retrieves a definition object from the cache + */ + abstract public function get($config); + + /** + * Removes a definition object to the cache + */ + abstract public function remove($config); + + /** + * Clears all objects from cache + */ + abstract public function flush($config); + + /** + * Clears all expired (older version or revision) objects from cache + * @note Be carefuly implementing this method as flush. Flush must + * not interfere with other Definition types, and cleanup() + * should not be repeatedly called by userland code. + */ + abstract public function cleanup($config); + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/DefinitionCache/Decorator.php b/extlib/HTMLPurifier/HTMLPurifier/DefinitionCache/Decorator.php new file mode 100644 index 0000000000..b0fb6d0cd6 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/DefinitionCache/Decorator.php @@ -0,0 +1,62 @@ +copy(); + // reference is necessary for mocks in PHP 4 + $decorator->cache =& $cache; + $decorator->type = $cache->type; + return $decorator; + } + + /** + * Cross-compatible clone substitute + */ + public function copy() { + return new HTMLPurifier_DefinitionCache_Decorator(); + } + + public function add($def, $config) { + return $this->cache->add($def, $config); + } + + public function set($def, $config) { + return $this->cache->set($def, $config); + } + + public function replace($def, $config) { + return $this->cache->replace($def, $config); + } + + public function get($config) { + return $this->cache->get($config); + } + + public function remove($config) { + return $this->cache->remove($config); + } + + public function flush($config) { + return $this->cache->flush($config); + } + + public function cleanup($config) { + return $this->cache->cleanup($config); + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/DefinitionCache/Decorator/Cleanup.php b/extlib/HTMLPurifier/HTMLPurifier/DefinitionCache/Decorator/Cleanup.php new file mode 100644 index 0000000000..d4cc35c4bc --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/DefinitionCache/Decorator/Cleanup.php @@ -0,0 +1,43 @@ +definitions[$this->generateKey($config)] = $def; + return $status; + } + + public function set($def, $config) { + $status = parent::set($def, $config); + if ($status) $this->definitions[$this->generateKey($config)] = $def; + return $status; + } + + public function replace($def, $config) { + $status = parent::replace($def, $config); + if ($status) $this->definitions[$this->generateKey($config)] = $def; + return $status; + } + + public function get($config) { + $key = $this->generateKey($config); + if (isset($this->definitions[$key])) return $this->definitions[$key]; + $this->definitions[$key] = parent::get($config); + return $this->definitions[$key]; + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/DefinitionCache/Decorator/Template.php.in b/extlib/HTMLPurifier/HTMLPurifier/DefinitionCache/Decorator/Template.php.in new file mode 100644 index 0000000000..21a8fcfda2 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/DefinitionCache/Decorator/Template.php.in @@ -0,0 +1,47 @@ +checkDefType($def)) return; + $file = $this->generateFilePath($config); + if (file_exists($file)) return false; + if (!$this->_prepareDir($config)) return false; + return $this->_write($file, serialize($def)); + } + + public function set($def, $config) { + if (!$this->checkDefType($def)) return; + $file = $this->generateFilePath($config); + if (!$this->_prepareDir($config)) return false; + return $this->_write($file, serialize($def)); + } + + public function replace($def, $config) { + if (!$this->checkDefType($def)) return; + $file = $this->generateFilePath($config); + if (!file_exists($file)) return false; + if (!$this->_prepareDir($config)) return false; + return $this->_write($file, serialize($def)); + } + + public function get($config) { + $file = $this->generateFilePath($config); + if (!file_exists($file)) return false; + return unserialize(file_get_contents($file)); + } + + public function remove($config) { + $file = $this->generateFilePath($config); + if (!file_exists($file)) return false; + return unlink($file); + } + + public function flush($config) { + if (!$this->_prepareDir($config)) return false; + $dir = $this->generateDirectoryPath($config); + $dh = opendir($dir); + while (false !== ($filename = readdir($dh))) { + if (empty($filename)) continue; + if ($filename[0] === '.') continue; + unlink($dir . '/' . $filename); + } + } + + public function cleanup($config) { + if (!$this->_prepareDir($config)) return false; + $dir = $this->generateDirectoryPath($config); + $dh = opendir($dir); + while (false !== ($filename = readdir($dh))) { + if (empty($filename)) continue; + if ($filename[0] === '.') continue; + $key = substr($filename, 0, strlen($filename) - 4); + if ($this->isOld($key, $config)) unlink($dir . '/' . $filename); + } + } + + /** + * Generates the file path to the serial file corresponding to + * the configuration and definition name + * @todo Make protected + */ + public function generateFilePath($config) { + $key = $this->generateKey($config); + return $this->generateDirectoryPath($config) . '/' . $key . '.ser'; + } + + /** + * Generates the path to the directory contain this cache's serial files + * @note No trailing slash + * @todo Make protected + */ + public function generateDirectoryPath($config) { + $base = $this->generateBaseDirectoryPath($config); + return $base . '/' . $this->type; + } + + /** + * Generates path to base directory that contains all definition type + * serials + * @todo Make protected + */ + public function generateBaseDirectoryPath($config) { + $base = $config->get('Cache.SerializerPath'); + $base = is_null($base) ? HTMLPURIFIER_PREFIX . '/HTMLPurifier/DefinitionCache/Serializer' : $base; + return $base; + } + + /** + * Convenience wrapper function for file_put_contents + * @param $file File name to write to + * @param $data Data to write into file + * @return Number of bytes written if success, or false if failure. + */ + private function _write($file, $data) { + return file_put_contents($file, $data); + } + + /** + * Prepares the directory that this type stores the serials in + * @return True if successful + */ + private function _prepareDir($config) { + $directory = $this->generateDirectoryPath($config); + if (!is_dir($directory)) { + $base = $this->generateBaseDirectoryPath($config); + if (!is_dir($base)) { + trigger_error('Base directory '.$base.' does not exist, + please create or change using %Cache.SerializerPath', + E_USER_WARNING); + return false; + } elseif (!$this->_testPermissions($base)) { + return false; + } + $old = umask(0022); // disable group and world writes + mkdir($directory); + umask($old); + } elseif (!$this->_testPermissions($directory)) { + return false; + } + return true; + } + + /** + * Tests permissions on a directory and throws out friendly + * error messages and attempts to chmod it itself if possible + */ + private function _testPermissions($dir) { + // early abort, if it is writable, everything is hunky-dory + if (is_writable($dir)) return true; + if (!is_dir($dir)) { + // generally, you'll want to handle this beforehand + // so a more specific error message can be given + trigger_error('Directory '.$dir.' does not exist', + E_USER_WARNING); + return false; + } + if (function_exists('posix_getuid')) { + // POSIX system, we can give more specific advice + if (fileowner($dir) === posix_getuid()) { + // we can chmod it ourselves + chmod($dir, 0755); + return true; + } elseif (filegroup($dir) === posix_getgid()) { + $chmod = '775'; + } else { + // PHP's probably running as nobody, so we'll + // need to give global permissions + $chmod = '777'; + } + trigger_error('Directory '.$dir.' not writable, '. + 'please chmod to ' . $chmod, + E_USER_WARNING); + } else { + // generic error message + trigger_error('Directory '.$dir.' not writable, '. + 'please alter file permissions', + E_USER_WARNING); + } + return false; + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/DefinitionCache/Serializer/README b/extlib/HTMLPurifier/HTMLPurifier/DefinitionCache/Serializer/README new file mode 100755 index 0000000000..2e35c1c3d0 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/DefinitionCache/Serializer/README @@ -0,0 +1,3 @@ +This is a dummy file to prevent Git from ignoring this empty directory. + + vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/DefinitionCacheFactory.php b/extlib/HTMLPurifier/HTMLPurifier/DefinitionCacheFactory.php new file mode 100644 index 0000000000..a6ead62818 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/DefinitionCacheFactory.php @@ -0,0 +1,91 @@ + array()); + protected $implementations = array(); + protected $decorators = array(); + + /** + * Initialize default decorators + */ + public function setup() { + $this->addDecorator('Cleanup'); + } + + /** + * Retrieves an instance of global definition cache factory. + */ + public static function instance($prototype = null) { + static $instance; + if ($prototype !== null) { + $instance = $prototype; + } elseif ($instance === null || $prototype === true) { + $instance = new HTMLPurifier_DefinitionCacheFactory(); + $instance->setup(); + } + return $instance; + } + + /** + * Registers a new definition cache object + * @param $short Short name of cache object, for reference + * @param $long Full class name of cache object, for construction + */ + public function register($short, $long) { + $this->implementations[$short] = $long; + } + + /** + * Factory method that creates a cache object based on configuration + * @param $name Name of definitions handled by cache + * @param $config Instance of HTMLPurifier_Config + */ + public function create($type, $config) { + $method = $config->get('Cache.DefinitionImpl'); + if ($method === null) { + return new HTMLPurifier_DefinitionCache_Null($type); + } + if (!empty($this->caches[$method][$type])) { + return $this->caches[$method][$type]; + } + if ( + isset($this->implementations[$method]) && + class_exists($class = $this->implementations[$method], false) + ) { + $cache = new $class($type); + } else { + if ($method != 'Serializer') { + trigger_error("Unrecognized DefinitionCache $method, using Serializer instead", E_USER_WARNING); + } + $cache = new HTMLPurifier_DefinitionCache_Serializer($type); + } + foreach ($this->decorators as $decorator) { + $new_cache = $decorator->decorate($cache); + // prevent infinite recursion in PHP 4 + unset($cache); + $cache = $new_cache; + } + $this->caches[$method][$type] = $cache; + return $this->caches[$method][$type]; + } + + /** + * Registers a decorator to add to all new cache objects + * @param + */ + public function addDecorator($decorator) { + if (is_string($decorator)) { + $class = "HTMLPurifier_DefinitionCache_Decorator_$decorator"; + $decorator = new $class; + } + $this->decorators[$decorator->name] = $decorator; + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/Doctype.php b/extlib/HTMLPurifier/HTMLPurifier/Doctype.php new file mode 100644 index 0000000000..1e3c574c06 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/Doctype.php @@ -0,0 +1,60 @@ +renderDoctype. + * If structure changes, please update that function. + */ +class HTMLPurifier_Doctype +{ + /** + * Full name of doctype + */ + public $name; + + /** + * List of standard modules (string identifiers or literal objects) + * that this doctype uses + */ + public $modules = array(); + + /** + * List of modules to use for tidying up code + */ + public $tidyModules = array(); + + /** + * Is the language derived from XML (i.e. XHTML)? + */ + public $xml = true; + + /** + * List of aliases for this doctype + */ + public $aliases = array(); + + /** + * Public DTD identifier + */ + public $dtdPublic; + + /** + * System DTD identifier + */ + public $dtdSystem; + + public function __construct($name = null, $xml = true, $modules = array(), + $tidyModules = array(), $aliases = array(), $dtd_public = null, $dtd_system = null + ) { + $this->name = $name; + $this->xml = $xml; + $this->modules = $modules; + $this->tidyModules = $tidyModules; + $this->aliases = $aliases; + $this->dtdPublic = $dtd_public; + $this->dtdSystem = $dtd_system; + } +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/DoctypeRegistry.php b/extlib/HTMLPurifier/HTMLPurifier/DoctypeRegistry.php new file mode 100644 index 0000000000..86049e9391 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/DoctypeRegistry.php @@ -0,0 +1,103 @@ +doctypes[$doctype->name] = $doctype; + $name = $doctype->name; + // hookup aliases + foreach ($doctype->aliases as $alias) { + if (isset($this->doctypes[$alias])) continue; + $this->aliases[$alias] = $name; + } + // remove old aliases + if (isset($this->aliases[$name])) unset($this->aliases[$name]); + return $doctype; + } + + /** + * Retrieves reference to a doctype of a certain name + * @note This function resolves aliases + * @note When possible, use the more fully-featured make() + * @param $doctype Name of doctype + * @return Editable doctype object + */ + public function get($doctype) { + if (isset($this->aliases[$doctype])) $doctype = $this->aliases[$doctype]; + if (!isset($this->doctypes[$doctype])) { + trigger_error('Doctype ' . htmlspecialchars($doctype) . ' does not exist', E_USER_ERROR); + $anon = new HTMLPurifier_Doctype($doctype); + return $anon; + } + return $this->doctypes[$doctype]; + } + + /** + * Creates a doctype based on a configuration object, + * will perform initialization on the doctype + * @note Use this function to get a copy of doctype that config + * can hold on to (this is necessary in order to tell + * Generator whether or not the current document is XML + * based or not). + */ + public function make($config) { + return clone $this->get($this->getDoctypeFromConfig($config)); + } + + /** + * Retrieves the doctype from the configuration object + */ + public function getDoctypeFromConfig($config) { + // recommended test + $doctype = $config->get('HTML.Doctype'); + if (!empty($doctype)) return $doctype; + $doctype = $config->get('HTML.CustomDoctype'); + if (!empty($doctype)) return $doctype; + // backwards-compatibility + if ($config->get('HTML.XHTML')) { + $doctype = 'XHTML 1.0'; + } else { + $doctype = 'HTML 4.01'; + } + if ($config->get('HTML.Strict')) { + $doctype .= ' Strict'; + } else { + $doctype .= ' Transitional'; + } + return $doctype; + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ElementDef.php b/extlib/HTMLPurifier/HTMLPurifier/ElementDef.php new file mode 100644 index 0000000000..aede2c3bb4 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ElementDef.php @@ -0,0 +1,176 @@ +setup(), this array may also + * contain an array at index 0 that indicates which attribute + * collections to load into the full array. It may also + * contain string indentifiers in lieu of HTMLPurifier_AttrDef, + * see HTMLPurifier_AttrTypes on how they are expanded during + * HTMLPurifier_HTMLDefinition->setup() processing. + */ + public $attr = array(); + + /** + * Indexed list of tag's HTMLPurifier_AttrTransform to be done before validation + */ + public $attr_transform_pre = array(); + + /** + * Indexed list of tag's HTMLPurifier_AttrTransform to be done after validation + */ + public $attr_transform_post = array(); + + /** + * HTMLPurifier_ChildDef of this tag. + */ + public $child; + + /** + * Abstract string representation of internal ChildDef rules. See + * HTMLPurifier_ContentSets for how this is parsed and then transformed + * into an HTMLPurifier_ChildDef. + * @warning This is a temporary variable that is not available after + * being processed by HTMLDefinition + */ + public $content_model; + + /** + * Value of $child->type, used to determine which ChildDef to use, + * used in combination with $content_model. + * @warning This must be lowercase + * @warning This is a temporary variable that is not available after + * being processed by HTMLDefinition + */ + public $content_model_type; + + + + /** + * Does the element have a content model (#PCDATA | Inline)*? This + * is important for chameleon ins and del processing in + * HTMLPurifier_ChildDef_Chameleon. Dynamically set: modules don't + * have to worry about this one. + */ + public $descendants_are_inline = false; + + /** + * List of the names of required attributes this element has. Dynamically + * populated by HTMLPurifier_HTMLDefinition::getElement + */ + public $required_attr = array(); + + /** + * Lookup table of tags excluded from all descendants of this tag. + * @note SGML permits exclusions for all descendants, but this is + * not possible with DTDs or XML Schemas. W3C has elected to + * use complicated compositions of content_models to simulate + * exclusion for children, but we go the simpler, SGML-style + * route of flat-out exclusions, which correctly apply to + * all descendants and not just children. Note that the XHTML + * Modularization Abstract Modules are blithely unaware of such + * distinctions. + */ + public $excludes = array(); + + /** + * This tag is explicitly auto-closed by the following tags. + */ + public $autoclose = array(); + + /** + * Whether or not this is a formatting element affected by the + * "Active Formatting Elements" algorithm. + */ + public $formatting; + + /** + * Low-level factory constructor for creating new standalone element defs + */ + public static function create($content_model, $content_model_type, $attr) { + $def = new HTMLPurifier_ElementDef(); + $def->content_model = $content_model; + $def->content_model_type = $content_model_type; + $def->attr = $attr; + return $def; + } + + /** + * Merges the values of another element definition into this one. + * Values from the new element def take precedence if a value is + * not mergeable. + */ + public function mergeIn($def) { + + // later keys takes precedence + foreach($def->attr as $k => $v) { + if ($k === 0) { + // merge in the includes + // sorry, no way to override an include + foreach ($v as $v2) { + $this->attr[0][] = $v2; + } + continue; + } + if ($v === false) { + if (isset($this->attr[$k])) unset($this->attr[$k]); + continue; + } + $this->attr[$k] = $v; + } + $this->_mergeAssocArray($this->attr_transform_pre, $def->attr_transform_pre); + $this->_mergeAssocArray($this->attr_transform_post, $def->attr_transform_post); + $this->_mergeAssocArray($this->excludes, $def->excludes); + + if(!empty($def->content_model)) { + $this->content_model = + str_replace("#SUPER", $this->content_model, $def->content_model); + $this->child = false; + } + if(!empty($def->content_model_type)) { + $this->content_model_type = $def->content_model_type; + $this->child = false; + } + if(!is_null($def->child)) $this->child = $def->child; + if(!is_null($def->formatting)) $this->formatting = $def->formatting; + if($def->descendants_are_inline) $this->descendants_are_inline = $def->descendants_are_inline; + + } + + /** + * Merges one array into another, removes values which equal false + * @param $a1 Array by reference that is merged into + * @param $a2 Array that merges into $a1 + */ + private function _mergeAssocArray(&$a1, $a2) { + foreach ($a2 as $k => $v) { + if ($v === false) { + if (isset($a1[$k])) unset($a1[$k]); + continue; + } + $a1[$k] = $v; + } + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/Encoder.php b/extlib/HTMLPurifier/HTMLPurifier/Encoder.php new file mode 100644 index 0000000000..2b3140caaf --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/Encoder.php @@ -0,0 +1,426 @@ + under the + * LGPL license. Notes on what changed are inside, but in general, + * the original code transformed UTF-8 text into an array of integer + * Unicode codepoints. Understandably, transforming that back to + * a string would be somewhat expensive, so the function was modded to + * directly operate on the string. However, this discourages code + * reuse, and the logic enumerated here would be useful for any + * function that needs to be able to understand UTF-8 characters. + * As of right now, only smart lossless character encoding converters + * would need that, and I'm probably not going to implement them. + * Once again, PHP 6 should solve all our problems. + */ + public static function cleanUTF8($str, $force_php = false) { + + // UTF-8 validity is checked since PHP 4.3.5 + // This is an optimization: if the string is already valid UTF-8, no + // need to do PHP stuff. 99% of the time, this will be the case. + // The regexp matches the XML char production, as well as well as excluding + // non-SGML codepoints U+007F to U+009F + if (preg_match('/^[\x{9}\x{A}\x{D}\x{20}-\x{7E}\x{A0}-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF}]*$/Du', $str)) { + return $str; + } + + $mState = 0; // cached expected number of octets after the current octet + // until the beginning of the next UTF8 character sequence + $mUcs4 = 0; // cached Unicode character + $mBytes = 1; // cached expected number of octets in the current sequence + + // original code involved an $out that was an array of Unicode + // codepoints. Instead of having to convert back into UTF-8, we've + // decided to directly append valid UTF-8 characters onto a string + // $out once they're done. $char accumulates raw bytes, while $mUcs4 + // turns into the Unicode code point, so there's some redundancy. + + $out = ''; + $char = ''; + + $len = strlen($str); + for($i = 0; $i < $len; $i++) { + $in = ord($str{$i}); + $char .= $str[$i]; // append byte to char + if (0 == $mState) { + // When mState is zero we expect either a US-ASCII character + // or a multi-octet sequence. + if (0 == (0x80 & ($in))) { + // US-ASCII, pass straight through. + if (($in <= 31 || $in == 127) && + !($in == 9 || $in == 13 || $in == 10) // save \r\t\n + ) { + // control characters, remove + } else { + $out .= $char; + } + // reset + $char = ''; + $mBytes = 1; + } elseif (0xC0 == (0xE0 & ($in))) { + // First octet of 2 octet sequence + $mUcs4 = ($in); + $mUcs4 = ($mUcs4 & 0x1F) << 6; + $mState = 1; + $mBytes = 2; + } elseif (0xE0 == (0xF0 & ($in))) { + // First octet of 3 octet sequence + $mUcs4 = ($in); + $mUcs4 = ($mUcs4 & 0x0F) << 12; + $mState = 2; + $mBytes = 3; + } elseif (0xF0 == (0xF8 & ($in))) { + // First octet of 4 octet sequence + $mUcs4 = ($in); + $mUcs4 = ($mUcs4 & 0x07) << 18; + $mState = 3; + $mBytes = 4; + } elseif (0xF8 == (0xFC & ($in))) { + // First octet of 5 octet sequence. + // + // This is illegal because the encoded codepoint must be + // either: + // (a) not the shortest form or + // (b) outside the Unicode range of 0-0x10FFFF. + // Rather than trying to resynchronize, we will carry on + // until the end of the sequence and let the later error + // handling code catch it. + $mUcs4 = ($in); + $mUcs4 = ($mUcs4 & 0x03) << 24; + $mState = 4; + $mBytes = 5; + } elseif (0xFC == (0xFE & ($in))) { + // First octet of 6 octet sequence, see comments for 5 + // octet sequence. + $mUcs4 = ($in); + $mUcs4 = ($mUcs4 & 1) << 30; + $mState = 5; + $mBytes = 6; + } else { + // Current octet is neither in the US-ASCII range nor a + // legal first octet of a multi-octet sequence. + $mState = 0; + $mUcs4 = 0; + $mBytes = 1; + $char = ''; + } + } else { + // When mState is non-zero, we expect a continuation of the + // multi-octet sequence + if (0x80 == (0xC0 & ($in))) { + // Legal continuation. + $shift = ($mState - 1) * 6; + $tmp = $in; + $tmp = ($tmp & 0x0000003F) << $shift; + $mUcs4 |= $tmp; + + if (0 == --$mState) { + // End of the multi-octet sequence. mUcs4 now contains + // the final Unicode codepoint to be output + + // Check for illegal sequences and codepoints. + + // From Unicode 3.1, non-shortest form is illegal + if (((2 == $mBytes) && ($mUcs4 < 0x0080)) || + ((3 == $mBytes) && ($mUcs4 < 0x0800)) || + ((4 == $mBytes) && ($mUcs4 < 0x10000)) || + (4 < $mBytes) || + // From Unicode 3.2, surrogate characters = illegal + (($mUcs4 & 0xFFFFF800) == 0xD800) || + // Codepoints outside the Unicode range are illegal + ($mUcs4 > 0x10FFFF) + ) { + + } elseif (0xFEFF != $mUcs4 && // omit BOM + // check for valid Char unicode codepoints + ( + 0x9 == $mUcs4 || + 0xA == $mUcs4 || + 0xD == $mUcs4 || + (0x20 <= $mUcs4 && 0x7E >= $mUcs4) || + // 7F-9F is not strictly prohibited by XML, + // but it is non-SGML, and thus we don't allow it + (0xA0 <= $mUcs4 && 0xD7FF >= $mUcs4) || + (0x10000 <= $mUcs4 && 0x10FFFF >= $mUcs4) + ) + ) { + $out .= $char; + } + // initialize UTF8 cache (reset) + $mState = 0; + $mUcs4 = 0; + $mBytes = 1; + $char = ''; + } + } else { + // ((0xC0 & (*in) != 0x80) && (mState != 0)) + // Incomplete multi-octet sequence. + // used to result in complete fail, but we'll reset + $mState = 0; + $mUcs4 = 0; + $mBytes = 1; + $char =''; + } + } + } + return $out; + } + + /** + * Translates a Unicode codepoint into its corresponding UTF-8 character. + * @note Based on Feyd's function at + * , + * which is in public domain. + * @note While we're going to do code point parsing anyway, a good + * optimization would be to refuse to translate code points that + * are non-SGML characters. However, this could lead to duplication. + * @note This is very similar to the unichr function in + * maintenance/generate-entity-file.php (although this is superior, + * due to its sanity checks). + */ + + // +----------+----------+----------+----------+ + // | 33222222 | 22221111 | 111111 | | + // | 10987654 | 32109876 | 54321098 | 76543210 | bit + // +----------+----------+----------+----------+ + // | | | | 0xxxxxxx | 1 byte 0x00000000..0x0000007F + // | | | 110yyyyy | 10xxxxxx | 2 byte 0x00000080..0x000007FF + // | | 1110zzzz | 10yyyyyy | 10xxxxxx | 3 byte 0x00000800..0x0000FFFF + // | 11110www | 10wwzzzz | 10yyyyyy | 10xxxxxx | 4 byte 0x00010000..0x0010FFFF + // +----------+----------+----------+----------+ + // | 00000000 | 00011111 | 11111111 | 11111111 | Theoretical upper limit of legal scalars: 2097151 (0x001FFFFF) + // | 00000000 | 00010000 | 11111111 | 11111111 | Defined upper limit of legal scalar codes + // +----------+----------+----------+----------+ + + public static function unichr($code) { + if($code > 1114111 or $code < 0 or + ($code >= 55296 and $code <= 57343) ) { + // bits are set outside the "valid" range as defined + // by UNICODE 4.1.0 + return ''; + } + + $x = $y = $z = $w = 0; + if ($code < 128) { + // regular ASCII character + $x = $code; + } else { + // set up bits for UTF-8 + $x = ($code & 63) | 128; + if ($code < 2048) { + $y = (($code & 2047) >> 6) | 192; + } else { + $y = (($code & 4032) >> 6) | 128; + if($code < 65536) { + $z = (($code >> 12) & 15) | 224; + } else { + $z = (($code >> 12) & 63) | 128; + $w = (($code >> 18) & 7) | 240; + } + } + } + // set up the actual character + $ret = ''; + if($w) $ret .= chr($w); + if($z) $ret .= chr($z); + if($y) $ret .= chr($y); + $ret .= chr($x); + + return $ret; + } + + /** + * Converts a string to UTF-8 based on configuration. + */ + public static function convertToUTF8($str, $config, $context) { + $encoding = $config->get('Core.Encoding'); + if ($encoding === 'utf-8') return $str; + static $iconv = null; + if ($iconv === null) $iconv = function_exists('iconv'); + set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler')); + if ($iconv && !$config->get('Test.ForceNoIconv')) { + $str = iconv($encoding, 'utf-8//IGNORE', $str); + if ($str === false) { + // $encoding is not a valid encoding + restore_error_handler(); + trigger_error('Invalid encoding ' . $encoding, E_USER_ERROR); + return ''; + } + // If the string is bjorked by Shift_JIS or a similar encoding + // that doesn't support all of ASCII, convert the naughty + // characters to their true byte-wise ASCII/UTF-8 equivalents. + $str = strtr($str, HTMLPurifier_Encoder::testEncodingSupportsASCII($encoding)); + restore_error_handler(); + return $str; + } elseif ($encoding === 'iso-8859-1') { + $str = utf8_encode($str); + restore_error_handler(); + return $str; + } + trigger_error('Encoding not supported, please install iconv', E_USER_ERROR); + } + + /** + * Converts a string from UTF-8 based on configuration. + * @note Currently, this is a lossy conversion, with unexpressable + * characters being omitted. + */ + public static function convertFromUTF8($str, $config, $context) { + $encoding = $config->get('Core.Encoding'); + if ($encoding === 'utf-8') return $str; + static $iconv = null; + if ($iconv === null) $iconv = function_exists('iconv'); + if ($escape = $config->get('Core.EscapeNonASCIICharacters')) { + $str = HTMLPurifier_Encoder::convertToASCIIDumbLossless($str); + } + set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler')); + if ($iconv && !$config->get('Test.ForceNoIconv')) { + // Undo our previous fix in convertToUTF8, otherwise iconv will barf + $ascii_fix = HTMLPurifier_Encoder::testEncodingSupportsASCII($encoding); + if (!$escape && !empty($ascii_fix)) { + $clear_fix = array(); + foreach ($ascii_fix as $utf8 => $native) $clear_fix[$utf8] = ''; + $str = strtr($str, $clear_fix); + } + $str = strtr($str, array_flip($ascii_fix)); + // Normal stuff + $str = iconv('utf-8', $encoding . '//IGNORE', $str); + restore_error_handler(); + return $str; + } elseif ($encoding === 'iso-8859-1') { + $str = utf8_decode($str); + restore_error_handler(); + return $str; + } + trigger_error('Encoding not supported', E_USER_ERROR); + } + + /** + * Lossless (character-wise) conversion of HTML to ASCII + * @param $str UTF-8 string to be converted to ASCII + * @returns ASCII encoded string with non-ASCII character entity-ized + * @warning Adapted from MediaWiki, claiming fair use: this is a common + * algorithm. If you disagree with this license fudgery, + * implement it yourself. + * @note Uses decimal numeric entities since they are best supported. + * @note This is a DUMB function: it has no concept of keeping + * character entities that the projected character encoding + * can allow. We could possibly implement a smart version + * but that would require it to also know which Unicode + * codepoints the charset supported (not an easy task). + * @note Sort of with cleanUTF8() but it assumes that $str is + * well-formed UTF-8 + */ + public static function convertToASCIIDumbLossless($str) { + $bytesleft = 0; + $result = ''; + $working = 0; + $len = strlen($str); + for( $i = 0; $i < $len; $i++ ) { + $bytevalue = ord( $str[$i] ); + if( $bytevalue <= 0x7F ) { //0xxx xxxx + $result .= chr( $bytevalue ); + $bytesleft = 0; + } elseif( $bytevalue <= 0xBF ) { //10xx xxxx + $working = $working << 6; + $working += ($bytevalue & 0x3F); + $bytesleft--; + if( $bytesleft <= 0 ) { + $result .= "&#" . $working . ";"; + } + } elseif( $bytevalue <= 0xDF ) { //110x xxxx + $working = $bytevalue & 0x1F; + $bytesleft = 1; + } elseif( $bytevalue <= 0xEF ) { //1110 xxxx + $working = $bytevalue & 0x0F; + $bytesleft = 2; + } else { //1111 0xxx + $working = $bytevalue & 0x07; + $bytesleft = 3; + } + } + return $result; + } + + /** + * This expensive function tests whether or not a given character + * encoding supports ASCII. 7/8-bit encodings like Shift_JIS will + * fail this test, and require special processing. Variable width + * encodings shouldn't ever fail. + * + * @param string $encoding Encoding name to test, as per iconv format + * @param bool $bypass Whether or not to bypass the precompiled arrays. + * @return Array of UTF-8 characters to their corresponding ASCII, + * which can be used to "undo" any overzealous iconv action. + */ + public static function testEncodingSupportsASCII($encoding, $bypass = false) { + static $encodings = array(); + if (!$bypass) { + if (isset($encodings[$encoding])) return $encodings[$encoding]; + $lenc = strtolower($encoding); + switch ($lenc) { + case 'shift_jis': + return array("\xC2\xA5" => '\\', "\xE2\x80\xBE" => '~'); + case 'johab': + return array("\xE2\x82\xA9" => '\\'); + } + if (strpos($lenc, 'iso-8859-') === 0) return array(); + } + $ret = array(); + set_error_handler(array('HTMLPurifier_Encoder', 'muteErrorHandler')); + if (iconv('UTF-8', $encoding, 'a') === false) return false; + for ($i = 0x20; $i <= 0x7E; $i++) { // all printable ASCII chars + $c = chr($i); // UTF-8 char + $r = iconv('UTF-8', "$encoding//IGNORE", $c); // initial conversion + if ( + $r === '' || + // This line is needed for iconv implementations that do not + // omit characters that do not exist in the target character set + ($r === $c && iconv($encoding, 'UTF-8//IGNORE', $r) !== $c) + ) { + // Reverse engineer: what's the UTF-8 equiv of this byte + // sequence? This assumes that there's no variable width + // encoding that doesn't support ASCII. + $ret[iconv($encoding, 'UTF-8//IGNORE', $c)] = $c; + } + } + restore_error_handler(); + $encodings[$encoding] = $ret; + return $ret; + } + + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/EntityLookup.php b/extlib/HTMLPurifier/HTMLPurifier/EntityLookup.php new file mode 100644 index 0000000000..b4dfce94c3 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/EntityLookup.php @@ -0,0 +1,44 @@ +table = unserialize(file_get_contents($file)); + } + + /** + * Retrieves sole instance of the object. + * @param Optional prototype of custom lookup table to overload with. + */ + public static function instance($prototype = false) { + // no references, since PHP doesn't copy unless modified + static $instance = null; + if ($prototype) { + $instance = $prototype; + } elseif (!$instance) { + $instance = new HTMLPurifier_EntityLookup(); + $instance->setup(); + } + return $instance; + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/EntityLookup/entities.ser b/extlib/HTMLPurifier/HTMLPurifier/EntityLookup/entities.ser new file mode 100644 index 0000000000..f2b8b8f2db --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/EntityLookup/entities.ser @@ -0,0 +1 @@ +a:246:{s:4:"nbsp";s:2:" ";s:5:"iexcl";s:2:"¡";s:4:"cent";s:2:"¢";s:5:"pound";s:2:"£";s:6:"curren";s:2:"¤";s:3:"yen";s:2:"¥";s:6:"brvbar";s:2:"¦";s:4:"sect";s:2:"§";s:3:"uml";s:2:"¨";s:4:"copy";s:2:"©";s:4:"ordf";s:2:"ª";s:5:"laquo";s:2:"«";s:3:"not";s:2:"¬";s:3:"shy";s:2:"­";s:3:"reg";s:2:"®";s:4:"macr";s:2:"¯";s:3:"deg";s:2:"°";s:6:"plusmn";s:2:"±";s:5:"acute";s:2:"´";s:5:"micro";s:2:"µ";s:4:"para";s:2:"¶";s:6:"middot";s:2:"·";s:5:"cedil";s:2:"¸";s:4:"ordm";s:2:"º";s:5:"raquo";s:2:"»";s:6:"iquest";s:2:"¿";s:6:"Agrave";s:2:"À";s:6:"Aacute";s:2:"Á";s:5:"Acirc";s:2:"Â";s:6:"Atilde";s:2:"Ã";s:4:"Auml";s:2:"Ä";s:5:"Aring";s:2:"Å";s:5:"AElig";s:2:"Æ";s:6:"Ccedil";s:2:"Ç";s:6:"Egrave";s:2:"È";s:6:"Eacute";s:2:"É";s:5:"Ecirc";s:2:"Ê";s:4:"Euml";s:2:"Ë";s:6:"Igrave";s:2:"Ì";s:6:"Iacute";s:2:"Í";s:5:"Icirc";s:2:"Î";s:4:"Iuml";s:2:"Ï";s:3:"ETH";s:2:"Ð";s:6:"Ntilde";s:2:"Ñ";s:6:"Ograve";s:2:"Ò";s:6:"Oacute";s:2:"Ó";s:5:"Ocirc";s:2:"Ô";s:6:"Otilde";s:2:"Õ";s:4:"Ouml";s:2:"Ö";s:5:"times";s:2:"×";s:6:"Oslash";s:2:"Ø";s:6:"Ugrave";s:2:"Ù";s:6:"Uacute";s:2:"Ú";s:5:"Ucirc";s:2:"Û";s:4:"Uuml";s:2:"Ü";s:6:"Yacute";s:2:"Ý";s:5:"THORN";s:2:"Þ";s:5:"szlig";s:2:"ß";s:6:"agrave";s:2:"à";s:6:"aacute";s:2:"á";s:5:"acirc";s:2:"â";s:6:"atilde";s:2:"ã";s:4:"auml";s:2:"ä";s:5:"aring";s:2:"å";s:5:"aelig";s:2:"æ";s:6:"ccedil";s:2:"ç";s:6:"egrave";s:2:"è";s:6:"eacute";s:2:"é";s:5:"ecirc";s:2:"ê";s:4:"euml";s:2:"ë";s:6:"igrave";s:2:"ì";s:6:"iacute";s:2:"í";s:5:"icirc";s:2:"î";s:4:"iuml";s:2:"ï";s:3:"eth";s:2:"ð";s:6:"ntilde";s:2:"ñ";s:6:"ograve";s:2:"ò";s:6:"oacute";s:2:"ó";s:5:"ocirc";s:2:"ô";s:6:"otilde";s:2:"õ";s:4:"ouml";s:2:"ö";s:6:"divide";s:2:"÷";s:6:"oslash";s:2:"ø";s:6:"ugrave";s:2:"ù";s:6:"uacute";s:2:"ú";s:5:"ucirc";s:2:"û";s:4:"uuml";s:2:"ü";s:6:"yacute";s:2:"ý";s:5:"thorn";s:2:"þ";s:4:"yuml";s:2:"ÿ";s:4:"quot";s:1:""";s:3:"amp";s:1:"&";s:2:"lt";s:1:"<";s:2:"gt";s:1:">";s:4:"apos";s:1:"'";s:5:"OElig";s:2:"Œ";s:5:"oelig";s:2:"œ";s:6:"Scaron";s:2:"Š";s:6:"scaron";s:2:"š";s:4:"Yuml";s:2:"Ÿ";s:4:"circ";s:2:"ˆ";s:5:"tilde";s:2:"˜";s:4:"ensp";s:3:" ";s:4:"emsp";s:3:" ";s:6:"thinsp";s:3:" ";s:4:"zwnj";s:3:"‌";s:3:"zwj";s:3:"‍";s:3:"lrm";s:3:"‎";s:3:"rlm";s:3:"‏";s:5:"ndash";s:3:"–";s:5:"mdash";s:3:"—";s:5:"lsquo";s:3:"‘";s:5:"rsquo";s:3:"’";s:5:"sbquo";s:3:"‚";s:5:"ldquo";s:3:"“";s:5:"rdquo";s:3:"”";s:5:"bdquo";s:3:"„";s:6:"dagger";s:3:"†";s:6:"Dagger";s:3:"‡";s:6:"permil";s:3:"‰";s:6:"lsaquo";s:3:"‹";s:6:"rsaquo";s:3:"›";s:4:"euro";s:3:"€";s:4:"fnof";s:2:"ƒ";s:5:"Alpha";s:2:"Α";s:4:"Beta";s:2:"Β";s:5:"Gamma";s:2:"Γ";s:5:"Delta";s:2:"Δ";s:7:"Epsilon";s:2:"Ε";s:4:"Zeta";s:2:"Ζ";s:3:"Eta";s:2:"Η";s:5:"Theta";s:2:"Θ";s:4:"Iota";s:2:"Ι";s:5:"Kappa";s:2:"Κ";s:6:"Lambda";s:2:"Λ";s:2:"Mu";s:2:"Μ";s:2:"Nu";s:2:"Ν";s:2:"Xi";s:2:"Ξ";s:7:"Omicron";s:2:"Ο";s:2:"Pi";s:2:"Π";s:3:"Rho";s:2:"Ρ";s:5:"Sigma";s:2:"Σ";s:3:"Tau";s:2:"Τ";s:7:"Upsilon";s:2:"Υ";s:3:"Phi";s:2:"Φ";s:3:"Chi";s:2:"Χ";s:3:"Psi";s:2:"Ψ";s:5:"Omega";s:2:"Ω";s:5:"alpha";s:2:"α";s:4:"beta";s:2:"β";s:5:"gamma";s:2:"γ";s:5:"delta";s:2:"δ";s:7:"epsilon";s:2:"ε";s:4:"zeta";s:2:"ζ";s:3:"eta";s:2:"η";s:5:"theta";s:2:"θ";s:4:"iota";s:2:"ι";s:5:"kappa";s:2:"κ";s:6:"lambda";s:2:"λ";s:2:"mu";s:2:"μ";s:2:"nu";s:2:"ν";s:2:"xi";s:2:"ξ";s:7:"omicron";s:2:"ο";s:2:"pi";s:2:"π";s:3:"rho";s:2:"ρ";s:6:"sigmaf";s:2:"ς";s:5:"sigma";s:2:"σ";s:3:"tau";s:2:"τ";s:7:"upsilon";s:2:"υ";s:3:"phi";s:2:"φ";s:3:"chi";s:2:"χ";s:3:"psi";s:2:"ψ";s:5:"omega";s:2:"ω";s:8:"thetasym";s:2:"ϑ";s:5:"upsih";s:2:"ϒ";s:3:"piv";s:2:"ϖ";s:4:"bull";s:3:"•";s:6:"hellip";s:3:"…";s:5:"prime";s:3:"′";s:5:"Prime";s:3:"″";s:5:"oline";s:3:"‾";s:5:"frasl";s:3:"⁄";s:6:"weierp";s:3:"℘";s:5:"image";s:3:"ℑ";s:4:"real";s:3:"ℜ";s:5:"trade";s:3:"™";s:7:"alefsym";s:3:"ℵ";s:4:"larr";s:3:"←";s:4:"uarr";s:3:"↑";s:4:"rarr";s:3:"→";s:4:"darr";s:3:"↓";s:4:"harr";s:3:"↔";s:5:"crarr";s:3:"↵";s:4:"lArr";s:3:"⇐";s:4:"uArr";s:3:"⇑";s:4:"rArr";s:3:"⇒";s:4:"dArr";s:3:"⇓";s:4:"hArr";s:3:"⇔";s:6:"forall";s:3:"∀";s:4:"part";s:3:"∂";s:5:"exist";s:3:"∃";s:5:"empty";s:3:"∅";s:5:"nabla";s:3:"∇";s:4:"isin";s:3:"∈";s:5:"notin";s:3:"∉";s:2:"ni";s:3:"∋";s:4:"prod";s:3:"∏";s:3:"sum";s:3:"∑";s:5:"minus";s:3:"−";s:6:"lowast";s:3:"∗";s:5:"radic";s:3:"√";s:4:"prop";s:3:"∝";s:5:"infin";s:3:"∞";s:3:"ang";s:3:"∠";s:3:"and";s:3:"∧";s:2:"or";s:3:"∨";s:3:"cap";s:3:"∩";s:3:"cup";s:3:"∪";s:3:"int";s:3:"∫";s:3:"sim";s:3:"∼";s:4:"cong";s:3:"≅";s:5:"asymp";s:3:"≈";s:2:"ne";s:3:"≠";s:5:"equiv";s:3:"≡";s:2:"le";s:3:"≤";s:2:"ge";s:3:"≥";s:3:"sub";s:3:"⊂";s:3:"sup";s:3:"⊃";s:4:"nsub";s:3:"⊄";s:4:"sube";s:3:"⊆";s:4:"supe";s:3:"⊇";s:5:"oplus";s:3:"⊕";s:6:"otimes";s:3:"⊗";s:4:"perp";s:3:"⊥";s:4:"sdot";s:3:"⋅";s:5:"lceil";s:3:"⌈";s:5:"rceil";s:3:"⌉";s:6:"lfloor";s:3:"⌊";s:6:"rfloor";s:3:"⌋";s:4:"lang";s:3:"〈";s:4:"rang";s:3:"〉";s:3:"loz";s:3:"◊";s:6:"spades";s:3:"♠";s:5:"clubs";s:3:"♣";s:6:"hearts";s:3:"♥";s:5:"diams";s:3:"♦";} \ No newline at end of file diff --git a/extlib/HTMLPurifier/HTMLPurifier/EntityParser.php b/extlib/HTMLPurifier/HTMLPurifier/EntityParser.php new file mode 100644 index 0000000000..8c384472dc --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/EntityParser.php @@ -0,0 +1,144 @@ + '"', + 38 => '&', + 39 => "'", + 60 => '<', + 62 => '>' + ); + + /** + * Stripped entity names to decimal conversion table for special entities. + */ + protected $_special_ent2dec = + array( + 'quot' => 34, + 'amp' => 38, + 'lt' => 60, + 'gt' => 62 + ); + + /** + * Substitutes non-special entities with their parsed equivalents. Since + * running this whenever you have parsed character is t3h 5uck, we run + * it before everything else. + * + * @param $string String to have non-special entities parsed. + * @returns Parsed string. + */ + public function substituteNonSpecialEntities($string) { + // it will try to detect missing semicolons, but don't rely on it + return preg_replace_callback( + $this->_substituteEntitiesRegex, + array($this, 'nonSpecialEntityCallback'), + $string + ); + } + + /** + * Callback function for substituteNonSpecialEntities() that does the work. + * + * @param $matches PCRE matches array, with 0 the entire match, and + * either index 1, 2 or 3 set with a hex value, dec value, + * or string (respectively). + * @returns Replacement string. + */ + + protected function nonSpecialEntityCallback($matches) { + // replaces all but big five + $entity = $matches[0]; + $is_num = (@$matches[0][1] === '#'); + if ($is_num) { + $is_hex = (@$entity[2] === 'x'); + $code = $is_hex ? hexdec($matches[1]) : (int) $matches[2]; + + // abort for special characters + if (isset($this->_special_dec2str[$code])) return $entity; + + return HTMLPurifier_Encoder::unichr($code); + } else { + if (isset($this->_special_ent2dec[$matches[3]])) return $entity; + if (!$this->_entity_lookup) { + $this->_entity_lookup = HTMLPurifier_EntityLookup::instance(); + } + if (isset($this->_entity_lookup->table[$matches[3]])) { + return $this->_entity_lookup->table[$matches[3]]; + } else { + return $entity; + } + } + } + + /** + * Substitutes only special entities with their parsed equivalents. + * + * @notice We try to avoid calling this function because otherwise, it + * would have to be called a lot (for every parsed section). + * + * @param $string String to have non-special entities parsed. + * @returns Parsed string. + */ + public function substituteSpecialEntities($string) { + return preg_replace_callback( + $this->_substituteEntitiesRegex, + array($this, 'specialEntityCallback'), + $string); + } + + /** + * Callback function for substituteSpecialEntities() that does the work. + * + * This callback has same syntax as nonSpecialEntityCallback(). + * + * @param $matches PCRE-style matches array, with 0 the entire match, and + * either index 1, 2 or 3 set with a hex value, dec value, + * or string (respectively). + * @returns Replacement string. + */ + protected function specialEntityCallback($matches) { + $entity = $matches[0]; + $is_num = (@$matches[0][1] === '#'); + if ($is_num) { + $is_hex = (@$entity[2] === 'x'); + $int = $is_hex ? hexdec($matches[1]) : (int) $matches[2]; + return isset($this->_special_dec2str[$int]) ? + $this->_special_dec2str[$int] : + $entity; + } else { + return isset($this->_special_ent2dec[$matches[3]]) ? + $this->_special_ent2dec[$matches[3]] : + $entity; + } + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ErrorCollector.php b/extlib/HTMLPurifier/HTMLPurifier/ErrorCollector.php new file mode 100644 index 0000000000..6713eaf773 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ErrorCollector.php @@ -0,0 +1,209 @@ +locale =& $context->get('Locale'); + $this->context = $context; + $this->_current =& $this->_stacks[0]; + $this->errors =& $this->_stacks[0]; + } + + /** + * Sends an error message to the collector for later use + * @param $severity int Error severity, PHP error style (don't use E_USER_) + * @param $msg string Error message text + * @param $subst1 string First substitution for $msg + * @param $subst2 string ... + */ + public function send($severity, $msg) { + + $args = array(); + if (func_num_args() > 2) { + $args = func_get_args(); + array_shift($args); + unset($args[0]); + } + + $token = $this->context->get('CurrentToken', true); + $line = $token ? $token->line : $this->context->get('CurrentLine', true); + $col = $token ? $token->col : $this->context->get('CurrentCol', true); + $attr = $this->context->get('CurrentAttr', true); + + // perform special substitutions, also add custom parameters + $subst = array(); + if (!is_null($token)) { + $args['CurrentToken'] = $token; + } + if (!is_null($attr)) { + $subst['$CurrentAttr.Name'] = $attr; + if (isset($token->attr[$attr])) $subst['$CurrentAttr.Value'] = $token->attr[$attr]; + } + + if (empty($args)) { + $msg = $this->locale->getMessage($msg); + } else { + $msg = $this->locale->formatMessage($msg, $args); + } + + if (!empty($subst)) $msg = strtr($msg, $subst); + + // (numerically indexed) + $error = array( + self::LINENO => $line, + self::SEVERITY => $severity, + self::MESSAGE => $msg, + self::CHILDREN => array() + ); + $this->_current[] = $error; + + + // NEW CODE BELOW ... + + $struct = null; + // Top-level errors are either: + // TOKEN type, if $value is set appropriately, or + // "syntax" type, if $value is null + $new_struct = new HTMLPurifier_ErrorStruct(); + $new_struct->type = HTMLPurifier_ErrorStruct::TOKEN; + if ($token) $new_struct->value = clone $token; + if (is_int($line) && is_int($col)) { + if (isset($this->lines[$line][$col])) { + $struct = $this->lines[$line][$col]; + } else { + $struct = $this->lines[$line][$col] = $new_struct; + } + // These ksorts may present a performance problem + ksort($this->lines[$line], SORT_NUMERIC); + } else { + if (isset($this->lines[-1])) { + $struct = $this->lines[-1]; + } else { + $struct = $this->lines[-1] = $new_struct; + } + } + ksort($this->lines, SORT_NUMERIC); + + // Now, check if we need to operate on a lower structure + if (!empty($attr)) { + $struct = $struct->getChild(HTMLPurifier_ErrorStruct::ATTR, $attr); + if (!$struct->value) { + $struct->value = array($attr, 'PUT VALUE HERE'); + } + } + if (!empty($cssprop)) { + $struct = $struct->getChild(HTMLPurifier_ErrorStruct::CSSPROP, $cssprop); + if (!$struct->value) { + // if we tokenize CSS this might be a little more difficult to do + $struct->value = array($cssprop, 'PUT VALUE HERE'); + } + } + + // Ok, structs are all setup, now time to register the error + $struct->addError($severity, $msg); + } + + /** + * Retrieves raw error data for custom formatter to use + * @param List of arrays in format of array(line of error, + * error severity, error message, + * recursive sub-errors array) + */ + public function getRaw() { + return $this->errors; + } + + /** + * Default HTML formatting implementation for error messages + * @param $config Configuration array, vital for HTML output nature + * @param $errors Errors array to display; used for recursion. + */ + public function getHTMLFormatted($config, $errors = null) { + $ret = array(); + + $this->generator = new HTMLPurifier_Generator($config, $this->context); + if ($errors === null) $errors = $this->errors; + + // 'At line' message needs to be removed + + // generation code for new structure goes here. It needs to be recursive. + foreach ($this->lines as $line => $col_array) { + if ($line == -1) continue; + foreach ($col_array as $col => $struct) { + $this->_renderStruct($ret, $struct, $line, $col); + } + } + if (isset($this->lines[-1])) { + $this->_renderStruct($ret, $this->lines[-1]); + } + + if (empty($errors)) { + return '

' . $this->locale->getMessage('ErrorCollector: No errors') . '

'; + } else { + return '
  • ' . implode('
  • ', $ret) . '
'; + } + + } + + private function _renderStruct(&$ret, $struct, $line = null, $col = null) { + $stack = array($struct); + $context_stack = array(array()); + while ($current = array_pop($stack)) { + $context = array_pop($context_stack); + foreach ($current->errors as $error) { + list($severity, $msg) = $error; + $string = ''; + $string .= '
'; + // W3C uses an icon to indicate the severity of the error. + $error = $this->locale->getErrorName($severity); + $string .= "$error "; + if (!is_null($line) && !is_null($col)) { + $string .= "Line $line, Column $col: "; + } else { + $string .= 'End of Document: '; + } + $string .= '' . $this->generator->escape($msg) . ' '; + $string .= '
'; + // Here, have a marker for the character on the column appropriate. + // Be sure to clip extremely long lines. + //$string .= '
';
+                //$string .= '';
+                //$string .= '
'; + $ret[] = $string; + } + foreach ($current->children as $type => $array) { + $context[] = $current; + $stack = array_merge($stack, array_reverse($array, true)); + for ($i = count($array); $i > 0; $i--) { + $context_stack[] = $context; + } + } + } + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/ErrorStruct.php b/extlib/HTMLPurifier/HTMLPurifier/ErrorStruct.php new file mode 100644 index 0000000000..9bc8996ec1 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/ErrorStruct.php @@ -0,0 +1,60 @@ +children[$type][$id])) { + $this->children[$type][$id] = new HTMLPurifier_ErrorStruct(); + $this->children[$type][$id]->type = $type; + } + return $this->children[$type][$id]; + } + + public function addError($severity, $message) { + $this->errors[] = array($severity, $message); + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/Exception.php b/extlib/HTMLPurifier/HTMLPurifier/Exception.php new file mode 100644 index 0000000000..be85b4c560 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/Exception.php @@ -0,0 +1,12 @@ +preFilter, + * 2->preFilter, 3->preFilter, purify, 3->postFilter, 2->postFilter, + * 1->postFilter. + * + * @note Methods are not declared abstract as it is perfectly legitimate + * for an implementation not to want anything to happen on a step + */ + +class HTMLPurifier_Filter +{ + + /** + * Name of the filter for identification purposes + */ + public $name; + + /** + * Pre-processor function, handles HTML before HTML Purifier + */ + public function preFilter($html, $config, $context) { + return $html; + } + + /** + * Post-processor function, handles HTML after HTML Purifier + */ + public function postFilter($html, $config, $context) { + return $html; + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/Filter/ExtractStyleBlocks.php b/extlib/HTMLPurifier/HTMLPurifier/Filter/ExtractStyleBlocks.php new file mode 100644 index 0000000000..bbf78a6630 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/Filter/ExtractStyleBlocks.php @@ -0,0 +1,135 @@ + blocks from input HTML, cleans them up + * using CSSTidy, and then places them in $purifier->context->get('StyleBlocks') + * so they can be used elsewhere in the document. + * + * @note + * See tests/HTMLPurifier/Filter/ExtractStyleBlocksTest.php for + * sample usage. + * + * @note + * This filter can also be used on stylesheets not included in the + * document--something purists would probably prefer. Just directly + * call HTMLPurifier_Filter_ExtractStyleBlocks->cleanCSS() + */ +class HTMLPurifier_Filter_ExtractStyleBlocks extends HTMLPurifier_Filter +{ + + public $name = 'ExtractStyleBlocks'; + private $_styleMatches = array(); + private $_tidy; + + public function __construct() { + $this->_tidy = new csstidy(); + } + + /** + * Save the contents of CSS blocks to style matches + * @param $matches preg_replace style $matches array + */ + protected function styleCallback($matches) { + $this->_styleMatches[] = $matches[1]; + } + + /** + * Removes inline #isU', array($this, 'styleCallback'), $html); + $style_blocks = $this->_styleMatches; + $this->_styleMatches = array(); // reset + $context->register('StyleBlocks', $style_blocks); // $context must not be reused + if ($this->_tidy) { + foreach ($style_blocks as &$style) { + $style = $this->cleanCSS($style, $config, $context); + } + } + return $html; + } + + /** + * Takes CSS (the stuff found in in a font-family prop). + if ($config->get('Filter.ExtractStyleBlocks.Escaping')) { + $css = str_replace( + array('<', '>', '&'), + array('\3C ', '\3E ', '\26 '), + $css + ); + } + return $css; + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/Filter/YouTube.php b/extlib/HTMLPurifier/HTMLPurifier/Filter/YouTube.php new file mode 100644 index 0000000000..aca972f6c5 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/Filter/YouTube.php @@ -0,0 +1,39 @@ +]+>.+?'. + 'http://www.youtube.com/v/([A-Za-z0-9\-_]+).+?#s'; + $pre_replace = '\1'; + return preg_replace($pre_regex, $pre_replace, $html); + } + + public function postFilter($html, $config, $context) { + $post_regex = '#([A-Za-z0-9\-_]+)#'; + return preg_replace_callback($post_regex, array($this, 'postFilterCallback'), $html); + } + + protected function armorUrl($url) { + return str_replace('--', '--', $url); + } + + protected function postFilterCallback($matches) { + $url = $this->armorUrl($matches[1]); + return ''. + ''. + ''. + ''; + + } +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/Generator.php b/extlib/HTMLPurifier/HTMLPurifier/Generator.php new file mode 100644 index 0000000000..24bd8a54ed --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/Generator.php @@ -0,0 +1,183 @@ + tags + */ + private $_scriptFix = false; + + /** + * Cache of HTMLDefinition during HTML output to determine whether or + * not attributes should be minimized. + */ + private $_def; + + /** + * Cache of %Output.SortAttr + */ + private $_sortAttr; + + /** + * Configuration for the generator + */ + protected $config; + + /** + * @param $config Instance of HTMLPurifier_Config + * @param $context Instance of HTMLPurifier_Context + */ + public function __construct($config, $context) { + $this->config = $config; + $this->_scriptFix = $config->get('Output.CommentScriptContents'); + $this->_sortAttr = $config->get('Output.SortAttr'); + $this->_def = $config->getHTMLDefinition(); + $this->_xhtml = $this->_def->doctype->xml; + } + + /** + * Generates HTML from an array of tokens. + * @param $tokens Array of HTMLPurifier_Token + * @param $config HTMLPurifier_Config object + * @return Generated HTML + */ + public function generateFromTokens($tokens) { + if (!$tokens) return ''; + + // Basic algorithm + $html = ''; + for ($i = 0, $size = count($tokens); $i < $size; $i++) { + if ($this->_scriptFix && $tokens[$i]->name === 'script' + && $i + 2 < $size && $tokens[$i+2] instanceof HTMLPurifier_Token_End) { + // script special case + // the contents of the script block must be ONE token + // for this to work. + $html .= $this->generateFromToken($tokens[$i++]); + $html .= $this->generateScriptFromToken($tokens[$i++]); + } + $html .= $this->generateFromToken($tokens[$i]); + } + + // Tidy cleanup + if (extension_loaded('tidy') && $this->config->get('Output.TidyFormat')) { + $tidy = new Tidy; + $tidy->parseString($html, array( + 'indent'=> true, + 'output-xhtml' => $this->_xhtml, + 'show-body-only' => true, + 'indent-spaces' => 2, + 'wrap' => 68, + ), 'utf8'); + $tidy->cleanRepair(); + $html = (string) $tidy; // explicit cast necessary + } + + // Normalize newlines to system defined value + $nl = $this->config->get('Output.Newline'); + if ($nl === null) $nl = PHP_EOL; + if ($nl !== "\n") $html = str_replace("\n", $nl, $html); + return $html; + } + + /** + * Generates HTML from a single token. + * @param $token HTMLPurifier_Token object. + * @return Generated HTML + */ + public function generateFromToken($token) { + if (!$token instanceof HTMLPurifier_Token) { + trigger_error('Cannot generate HTML from non-HTMLPurifier_Token object', E_USER_WARNING); + return ''; + + } elseif ($token instanceof HTMLPurifier_Token_Start) { + $attr = $this->generateAttributes($token->attr, $token->name); + return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>'; + + } elseif ($token instanceof HTMLPurifier_Token_End) { + return 'name . '>'; + + } elseif ($token instanceof HTMLPurifier_Token_Empty) { + $attr = $this->generateAttributes($token->attr, $token->name); + return '<' . $token->name . ($attr ? ' ' : '') . $attr . + ( $this->_xhtml ? ' /': '' ) //
v.
+ . '>'; + + } elseif ($token instanceof HTMLPurifier_Token_Text) { + return $this->escape($token->data, ENT_NOQUOTES); + + } elseif ($token instanceof HTMLPurifier_Token_Comment) { + return ''; + } else { + return ''; + + } + } + + /** + * Special case processor for the contents of script tags + * @warning This runs into problems if there's already a literal + * --> somewhere inside the script contents. + */ + public function generateScriptFromToken($token) { + if (!$token instanceof HTMLPurifier_Token_Text) return $this->generateFromToken($token); + // Thanks + $data = preg_replace('#//\s*$#', '', $token->data); + return ''; + } + + /** + * Generates attribute declarations from attribute array. + * @note This does not include the leading or trailing space. + * @param $assoc_array_of_attributes Attribute array + * @param $element Name of element attributes are for, used to check + * attribute minimization. + * @return Generate HTML fragment for insertion. + */ + public function generateAttributes($assoc_array_of_attributes, $element = false) { + $html = ''; + if ($this->_sortAttr) ksort($assoc_array_of_attributes); + foreach ($assoc_array_of_attributes as $key => $value) { + if (!$this->_xhtml) { + // Remove namespaced attributes + if (strpos($key, ':') !== false) continue; + // Check if we should minimize the attribute: val="val" -> val + if ($element && !empty($this->_def->info[$element]->attr[$key]->minimized)) { + $html .= $key . ' '; + continue; + } + } + $html .= $key.'="'.$this->escape($value).'" '; + } + return rtrim($html); + } + + /** + * Escapes raw text data. + * @todo This really ought to be protected, but until we have a facility + * for properly generating HTML here w/o using tokens, it stays + * public. + * @param $string String data to escape for HTML. + * @param $quote Quoting style, like htmlspecialchars. ENT_NOQUOTES is + * permissible for non-attribute output. + * @return String escaped data. + */ + public function escape($string, $quote = ENT_COMPAT) { + return htmlspecialchars($string, $quote, 'UTF-8'); + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/HTMLDefinition.php b/extlib/HTMLPurifier/HTMLPurifier/HTMLDefinition.php new file mode 100644 index 0000000000..c99ac11eb2 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/HTMLDefinition.php @@ -0,0 +1,420 @@ +getAnonymousModule(); + if (!isset($module->info[$element_name])) { + $element = $module->addBlankElement($element_name); + } else { + $element = $module->info[$element_name]; + } + $element->attr[$attr_name] = $def; + } + + /** + * Adds a custom element to your HTML definition + * @note See HTMLPurifier_HTMLModule::addElement for detailed + * parameter and return value descriptions. + */ + public function addElement($element_name, $type, $contents, $attr_collections, $attributes = array()) { + $module = $this->getAnonymousModule(); + // assume that if the user is calling this, the element + // is safe. This may not be a good idea + $element = $module->addElement($element_name, $type, $contents, $attr_collections, $attributes); + return $element; + } + + /** + * Adds a blank element to your HTML definition, for overriding + * existing behavior + * @note See HTMLPurifier_HTMLModule::addBlankElement for detailed + * parameter and return value descriptions. + */ + public function addBlankElement($element_name) { + $module = $this->getAnonymousModule(); + $element = $module->addBlankElement($element_name); + return $element; + } + + /** + * Retrieves a reference to the anonymous module, so you can + * bust out advanced features without having to make your own + * module. + */ + public function getAnonymousModule() { + if (!$this->_anonModule) { + $this->_anonModule = new HTMLPurifier_HTMLModule(); + $this->_anonModule->name = 'Anonymous'; + } + return $this->_anonModule; + } + + private $_anonModule; + + + // PUBLIC BUT INTERNAL VARIABLES -------------------------------------- + + public $type = 'HTML'; + public $manager; /**< Instance of HTMLPurifier_HTMLModuleManager */ + + /** + * Performs low-cost, preliminary initialization. + */ + public function __construct() { + $this->manager = new HTMLPurifier_HTMLModuleManager(); + } + + protected function doSetup($config) { + $this->processModules($config); + $this->setupConfigStuff($config); + unset($this->manager); + + // cleanup some of the element definitions + foreach ($this->info as $k => $v) { + unset($this->info[$k]->content_model); + unset($this->info[$k]->content_model_type); + } + } + + /** + * Extract out the information from the manager + */ + protected function processModules($config) { + + if ($this->_anonModule) { + // for user specific changes + // this is late-loaded so we don't have to deal with PHP4 + // reference wonky-ness + $this->manager->addModule($this->_anonModule); + unset($this->_anonModule); + } + + $this->manager->setup($config); + $this->doctype = $this->manager->doctype; + + foreach ($this->manager->modules as $module) { + foreach($module->info_tag_transform as $k => $v) { + if ($v === false) unset($this->info_tag_transform[$k]); + else $this->info_tag_transform[$k] = $v; + } + foreach($module->info_attr_transform_pre as $k => $v) { + if ($v === false) unset($this->info_attr_transform_pre[$k]); + else $this->info_attr_transform_pre[$k] = $v; + } + foreach($module->info_attr_transform_post as $k => $v) { + if ($v === false) unset($this->info_attr_transform_post[$k]); + else $this->info_attr_transform_post[$k] = $v; + } + foreach ($module->info_injector as $k => $v) { + if ($v === false) unset($this->info_injector[$k]); + else $this->info_injector[$k] = $v; + } + } + + $this->info = $this->manager->getElements(); + $this->info_content_sets = $this->manager->contentSets->lookup; + + } + + /** + * Sets up stuff based on config. We need a better way of doing this. + */ + protected function setupConfigStuff($config) { + + $block_wrapper = $config->get('HTML.BlockWrapper'); + if (isset($this->info_content_sets['Block'][$block_wrapper])) { + $this->info_block_wrapper = $block_wrapper; + } else { + trigger_error('Cannot use non-block element as block wrapper', + E_USER_ERROR); + } + + $parent = $config->get('HTML.Parent'); + $def = $this->manager->getElement($parent, true); + if ($def) { + $this->info_parent = $parent; + $this->info_parent_def = $def; + } else { + trigger_error('Cannot use unrecognized element as parent', + E_USER_ERROR); + $this->info_parent_def = $this->manager->getElement($this->info_parent, true); + } + + // support template text + $support = "(for information on implementing this, see the ". + "support forums) "; + + // setup allowed elements ----------------------------------------- + + $allowed_elements = $config->get('HTML.AllowedElements'); + $allowed_attributes = $config->get('HTML.AllowedAttributes'); // retrieve early + + if (!is_array($allowed_elements) && !is_array($allowed_attributes)) { + $allowed = $config->get('HTML.Allowed'); + if (is_string($allowed)) { + list($allowed_elements, $allowed_attributes) = $this->parseTinyMCEAllowedList($allowed); + } + } + + if (is_array($allowed_elements)) { + foreach ($this->info as $name => $d) { + if(!isset($allowed_elements[$name])) unset($this->info[$name]); + unset($allowed_elements[$name]); + } + // emit errors + foreach ($allowed_elements as $element => $d) { + $element = htmlspecialchars($element); // PHP doesn't escape errors, be careful! + trigger_error("Element '$element' is not supported $support", E_USER_WARNING); + } + } + + // setup allowed attributes --------------------------------------- + + $allowed_attributes_mutable = $allowed_attributes; // by copy! + if (is_array($allowed_attributes)) { + + // This actually doesn't do anything, since we went away from + // global attributes. It's possible that userland code uses + // it, but HTMLModuleManager doesn't! + foreach ($this->info_global_attr as $attr => $x) { + $keys = array($attr, "*@$attr", "*.$attr"); + $delete = true; + foreach ($keys as $key) { + if ($delete && isset($allowed_attributes[$key])) { + $delete = false; + } + if (isset($allowed_attributes_mutable[$key])) { + unset($allowed_attributes_mutable[$key]); + } + } + if ($delete) unset($this->info_global_attr[$attr]); + } + + foreach ($this->info as $tag => $info) { + foreach ($info->attr as $attr => $x) { + $keys = array("$tag@$attr", $attr, "*@$attr", "$tag.$attr", "*.$attr"); + $delete = true; + foreach ($keys as $key) { + if ($delete && isset($allowed_attributes[$key])) { + $delete = false; + } + if (isset($allowed_attributes_mutable[$key])) { + unset($allowed_attributes_mutable[$key]); + } + } + if ($delete) unset($this->info[$tag]->attr[$attr]); + } + } + // emit errors + foreach ($allowed_attributes_mutable as $elattr => $d) { + $bits = preg_split('/[.@]/', $elattr, 2); + $c = count($bits); + switch ($c) { + case 2: + if ($bits[0] !== '*') { + $element = htmlspecialchars($bits[0]); + $attribute = htmlspecialchars($bits[1]); + if (!isset($this->info[$element])) { + trigger_error("Cannot allow attribute '$attribute' if element '$element' is not allowed/supported $support"); + } else { + trigger_error("Attribute '$attribute' in element '$element' not supported $support", + E_USER_WARNING); + } + break; + } + // otherwise fall through + case 1: + $attribute = htmlspecialchars($bits[0]); + trigger_error("Global attribute '$attribute' is not ". + "supported in any elements $support", + E_USER_WARNING); + break; + } + } + + } + + // setup forbidden elements --------------------------------------- + + $forbidden_elements = $config->get('HTML.ForbiddenElements'); + $forbidden_attributes = $config->get('HTML.ForbiddenAttributes'); + + foreach ($this->info as $tag => $info) { + if (isset($forbidden_elements[$tag])) { + unset($this->info[$tag]); + continue; + } + foreach ($info->attr as $attr => $x) { + if ( + isset($forbidden_attributes["$tag@$attr"]) || + isset($forbidden_attributes["*@$attr"]) || + isset($forbidden_attributes[$attr]) + ) { + unset($this->info[$tag]->attr[$attr]); + continue; + } // this segment might get removed eventually + elseif (isset($forbidden_attributes["$tag.$attr"])) { + // $tag.$attr are not user supplied, so no worries! + trigger_error("Error with $tag.$attr: tag.attr syntax not supported for HTML.ForbiddenAttributes; use tag@attr instead", E_USER_WARNING); + } + } + } + foreach ($forbidden_attributes as $key => $v) { + if (strlen($key) < 2) continue; + if ($key[0] != '*') continue; + if ($key[1] == '.') { + trigger_error("Error with $key: *.attr syntax not supported for HTML.ForbiddenAttributes; use attr instead", E_USER_WARNING); + } + } + + // setup injectors ----------------------------------------------------- + foreach ($this->info_injector as $i => $injector) { + if ($injector->checkNeeded($config) !== false) { + // remove injector that does not have it's required + // elements/attributes present, and is thus not needed. + unset($this->info_injector[$i]); + } + } + } + + /** + * Parses a TinyMCE-flavored Allowed Elements and Attributes list into + * separate lists for processing. Format is element[attr1|attr2],element2... + * @warning Although it's largely drawn from TinyMCE's implementation, + * it is different, and you'll probably have to modify your lists + * @param $list String list to parse + * @param array($allowed_elements, $allowed_attributes) + * @todo Give this its own class, probably static interface + */ + public function parseTinyMCEAllowedList($list) { + + $list = str_replace(array(' ', "\t"), '', $list); + + $elements = array(); + $attributes = array(); + + $chunks = preg_split('/(,|[\n\r]+)/', $list); + foreach ($chunks as $chunk) { + if (empty($chunk)) continue; + // remove TinyMCE element control characters + if (!strpos($chunk, '[')) { + $element = $chunk; + $attr = false; + } else { + list($element, $attr) = explode('[', $chunk); + } + if ($element !== '*') $elements[$element] = true; + if (!$attr) continue; + $attr = substr($attr, 0, strlen($attr) - 1); // remove trailing ] + $attr = explode('|', $attr); + foreach ($attr as $key) { + $attributes["$element.$key"] = true; + } + } + + return array($elements, $attributes); + + } + + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/HTMLModule.php b/extlib/HTMLPurifier/HTMLPurifier/HTMLModule.php new file mode 100644 index 0000000000..072cf68084 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/HTMLModule.php @@ -0,0 +1,244 @@ +info, since the object's data is only info, + * with extra behavior associated with it. + */ + public $attr_collections = array(); + + /** + * Associative array of deprecated tag name to HTMLPurifier_TagTransform + */ + public $info_tag_transform = array(); + + /** + * List of HTMLPurifier_AttrTransform to be performed before validation. + */ + public $info_attr_transform_pre = array(); + + /** + * List of HTMLPurifier_AttrTransform to be performed after validation. + */ + public $info_attr_transform_post = array(); + + /** + * List of HTMLPurifier_Injector to be performed during well-formedness fixing. + * An injector will only be invoked if all of it's pre-requisites are met; + * if an injector fails setup, there will be no error; it will simply be + * silently disabled. + */ + public $info_injector = array(); + + /** + * Boolean flag that indicates whether or not getChildDef is implemented. + * For optimization reasons: may save a call to a function. Be sure + * to set it if you do implement getChildDef(), otherwise it will have + * no effect! + */ + public $defines_child_def = false; + + /** + * Boolean flag whether or not this module is safe. If it is not safe, all + * of its members are unsafe. Modules are safe by default (this might be + * slightly dangerous, but it doesn't make much sense to force HTML Purifier, + * which is based off of safe HTML, to explicitly say, "This is safe," even + * though there are modules which are "unsafe") + * + * @note Previously, safety could be applied at an element level granularity. + * We've removed this ability, so in order to add "unsafe" elements + * or attributes, a dedicated module with this property set to false + * must be used. + */ + public $safe = true; + + /** + * Retrieves a proper HTMLPurifier_ChildDef subclass based on + * content_model and content_model_type member variables of + * the HTMLPurifier_ElementDef class. There is a similar function + * in HTMLPurifier_HTMLDefinition. + * @param $def HTMLPurifier_ElementDef instance + * @return HTMLPurifier_ChildDef subclass + */ + public function getChildDef($def) {return false;} + + // -- Convenience ----------------------------------------------------- + + /** + * Convenience function that sets up a new element + * @param $element Name of element to add + * @param $type What content set should element be registered to? + * Set as false to skip this step. + * @param $contents Allowed children in form of: + * "$content_model_type: $content_model" + * @param $attr_includes What attribute collections to register to + * element? + * @param $attr What unique attributes does the element define? + * @note See ElementDef for in-depth descriptions of these parameters. + * @return Created element definition object, so you + * can set advanced parameters + */ + public function addElement($element, $type, $contents, $attr_includes = array(), $attr = array()) { + $this->elements[] = $element; + // parse content_model + list($content_model_type, $content_model) = $this->parseContents($contents); + // merge in attribute inclusions + $this->mergeInAttrIncludes($attr, $attr_includes); + // add element to content sets + if ($type) $this->addElementToContentSet($element, $type); + // create element + $this->info[$element] = HTMLPurifier_ElementDef::create( + $content_model, $content_model_type, $attr + ); + // literal object $contents means direct child manipulation + if (!is_string($contents)) $this->info[$element]->child = $contents; + return $this->info[$element]; + } + + /** + * Convenience function that creates a totally blank, non-standalone + * element. + * @param $element Name of element to create + * @return Created element + */ + public function addBlankElement($element) { + if (!isset($this->info[$element])) { + $this->elements[] = $element; + $this->info[$element] = new HTMLPurifier_ElementDef(); + $this->info[$element]->standalone = false; + } else { + trigger_error("Definition for $element already exists in module, cannot redefine"); + } + return $this->info[$element]; + } + + /** + * Convenience function that registers an element to a content set + * @param Element to register + * @param Name content set (warning: case sensitive, usually upper-case + * first letter) + */ + public function addElementToContentSet($element, $type) { + if (!isset($this->content_sets[$type])) $this->content_sets[$type] = ''; + else $this->content_sets[$type] .= ' | '; + $this->content_sets[$type] .= $element; + } + + /** + * Convenience function that transforms single-string contents + * into separate content model and content model type + * @param $contents Allowed children in form of: + * "$content_model_type: $content_model" + * @note If contents is an object, an array of two nulls will be + * returned, and the callee needs to take the original $contents + * and use it directly. + */ + public function parseContents($contents) { + if (!is_string($contents)) return array(null, null); // defer + switch ($contents) { + // check for shorthand content model forms + case 'Empty': + return array('empty', ''); + case 'Inline': + return array('optional', 'Inline | #PCDATA'); + case 'Flow': + return array('optional', 'Flow | #PCDATA'); + } + list($content_model_type, $content_model) = explode(':', $contents); + $content_model_type = strtolower(trim($content_model_type)); + $content_model = trim($content_model); + return array($content_model_type, $content_model); + } + + /** + * Convenience function that merges a list of attribute includes into + * an attribute array. + * @param $attr Reference to attr array to modify + * @param $attr_includes Array of includes / string include to merge in + */ + public function mergeInAttrIncludes(&$attr, $attr_includes) { + if (!is_array($attr_includes)) { + if (empty($attr_includes)) $attr_includes = array(); + else $attr_includes = array($attr_includes); + } + $attr[0] = $attr_includes; + } + + /** + * Convenience function that generates a lookup table with boolean + * true as value. + * @param $list List of values to turn into a lookup + * @note You can also pass an arbitrary number of arguments in + * place of the regular argument + * @return Lookup array equivalent of list + */ + public function makeLookup($list) { + if (is_string($list)) $list = func_get_args(); + $ret = array(); + foreach ($list as $value) { + if (is_null($value)) continue; + $ret[$value] = true; + } + return $ret; + } + + /** + * Lazy load construction of the module after determining whether + * or not it's needed, and also when a finalized configuration object + * is available. + * @param $config Instance of HTMLPurifier_Config + */ + public function setup($config) {} + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Bdo.php b/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Bdo.php new file mode 100644 index 0000000000..3d66f1b4e1 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Bdo.php @@ -0,0 +1,31 @@ + array('dir' => false) + ); + + public function setup($config) { + $bdo = $this->addElement( + 'bdo', 'Inline', 'Inline', array('Core', 'Lang'), + array( + 'dir' => 'Enum#ltr,rtl', // required + // The Abstract Module specification has the attribute + // inclusions wrong for bdo: bdo allows Lang + ) + ); + $bdo->attr_transform_post['required-dir'] = new HTMLPurifier_AttrTransform_BdoDir(); + + $this->attr_collections['I18N']['dir'] = 'Enum#ltr,rtl'; + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/CommonAttributes.php b/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/CommonAttributes.php new file mode 100644 index 0000000000..7c15da84fc --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/CommonAttributes.php @@ -0,0 +1,26 @@ + array( + 0 => array('Style'), + // 'xml:space' => false, + 'class' => 'Class', + 'id' => 'ID', + 'title' => 'CDATA', + ), + 'Lang' => array(), + 'I18N' => array( + 0 => array('Lang'), // proprietary, for xml:lang/lang + ), + 'Common' => array( + 0 => array('Core', 'I18N') + ) + ); + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Edit.php b/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Edit.php new file mode 100644 index 0000000000..ff93690555 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Edit.php @@ -0,0 +1,38 @@ + 'URI', + // 'datetime' => 'Datetime', // not implemented + ); + $this->addElement('del', 'Inline', $contents, 'Common', $attr); + $this->addElement('ins', 'Inline', $contents, 'Common', $attr); + } + + // HTML 4.01 specifies that ins/del must not contain block + // elements when used in an inline context, chameleon is + // a complicated workaround to acheive this effect + + // Inline context ! Block context (exclamation mark is + // separator, see getChildDef for parsing) + + public $defines_child_def = true; + public function getChildDef($def) { + if ($def->content_model_type != 'chameleon') return false; + $value = explode('!', $def->content_model); + return new HTMLPurifier_ChildDef_Chameleon($value[0], $value[1]); + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Forms.php b/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Forms.php new file mode 100644 index 0000000000..44c22f6f8b --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Forms.php @@ -0,0 +1,118 @@ + 'Form', + 'Inline' => 'Formctrl', + ); + + public function setup($config) { + $form = $this->addElement('form', 'Form', + 'Required: Heading | List | Block | fieldset', 'Common', array( + 'accept' => 'ContentTypes', + 'accept-charset' => 'Charsets', + 'action*' => 'URI', + 'method' => 'Enum#get,post', + // really ContentType, but these two are the only ones used today + 'enctype' => 'Enum#application/x-www-form-urlencoded,multipart/form-data', + )); + $form->excludes = array('form' => true); + + $input = $this->addElement('input', 'Formctrl', 'Empty', 'Common', array( + 'accept' => 'ContentTypes', + 'accesskey' => 'Character', + 'alt' => 'Text', + 'checked' => 'Bool#checked', + 'disabled' => 'Bool#disabled', + 'maxlength' => 'Number', + 'name' => 'CDATA', + 'readonly' => 'Bool#readonly', + 'size' => 'Number', + 'src' => 'URI#embeds', + 'tabindex' => 'Number', + 'type' => 'Enum#text,password,checkbox,button,radio,submit,reset,file,hidden,image', + 'value' => 'CDATA', + )); + $input->attr_transform_post[] = new HTMLPurifier_AttrTransform_Input(); + + $this->addElement('select', 'Formctrl', 'Required: optgroup | option', 'Common', array( + 'disabled' => 'Bool#disabled', + 'multiple' => 'Bool#multiple', + 'name' => 'CDATA', + 'size' => 'Number', + 'tabindex' => 'Number', + )); + + $this->addElement('option', false, 'Optional: #PCDATA', 'Common', array( + 'disabled' => 'Bool#disabled', + 'label' => 'Text', + 'selected' => 'Bool#selected', + 'value' => 'CDATA', + )); + // It's illegal for there to be more than one selected, but not + // be multiple. Also, no selected means undefined behavior. This might + // be difficult to implement; perhaps an injector, or a context variable. + + $textarea = $this->addElement('textarea', 'Formctrl', 'Optional: #PCDATA', 'Common', array( + 'accesskey' => 'Character', + 'cols*' => 'Number', + 'disabled' => 'Bool#disabled', + 'name' => 'CDATA', + 'readonly' => 'Bool#readonly', + 'rows*' => 'Number', + 'tabindex' => 'Number', + )); + $textarea->attr_transform_pre[] = new HTMLPurifier_AttrTransform_Textarea(); + + $button = $this->addElement('button', 'Formctrl', 'Optional: #PCDATA | Heading | List | Block | Inline', 'Common', array( + 'accesskey' => 'Character', + 'disabled' => 'Bool#disabled', + 'name' => 'CDATA', + 'tabindex' => 'Number', + 'type' => 'Enum#button,submit,reset', + 'value' => 'CDATA', + )); + + // For exclusions, ideally we'd specify content sets, not literal elements + $button->excludes = $this->makeLookup( + 'form', 'fieldset', // Form + 'input', 'select', 'textarea', 'label', 'button', // Formctrl + 'a' // as per HTML 4.01 spec, this is omitted by modularization + ); + + // Extra exclusion: img usemap="" is not permitted within this element. + // We'll omit this for now, since we don't have any good way of + // indicating it yet. + + // This is HIGHLY user-unfriendly; we need a custom child-def for this + $this->addElement('fieldset', 'Form', 'Custom: (#WS?,legend,(Flow|#PCDATA)*)', 'Common'); + + $label = $this->addElement('label', 'Formctrl', 'Optional: #PCDATA | Inline', 'Common', array( + 'accesskey' => 'Character', + // 'for' => 'IDREF', // IDREF not implemented, cannot allow + )); + $label->excludes = array('label' => true); + + $this->addElement('legend', false, 'Optional: #PCDATA | Inline', 'Common', array( + 'accesskey' => 'Character', + )); + + $this->addElement('optgroup', false, 'Required: option', 'Common', array( + 'disabled' => 'Bool#disabled', + 'label*' => 'Text', + )); + + // Don't forget an injector for . This one's a little complex + // because it maps to multiple elements. + + } +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Hypertext.php b/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Hypertext.php new file mode 100644 index 0000000000..d7e9bdd27e --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Hypertext.php @@ -0,0 +1,31 @@ +addElement( + 'a', 'Inline', 'Inline', 'Common', + array( + // 'accesskey' => 'Character', + // 'charset' => 'Charset', + 'href' => 'URI', + // 'hreflang' => 'LanguageCode', + 'rel' => new HTMLPurifier_AttrDef_HTML_LinkTypes('rel'), + 'rev' => new HTMLPurifier_AttrDef_HTML_LinkTypes('rev'), + // 'tabindex' => 'Number', + // 'type' => 'ContentType', + ) + ); + $a->formatting = true; + $a->excludes = array('a' => true); + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Image.php b/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Image.php new file mode 100644 index 0000000000..948d435bcd --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Image.php @@ -0,0 +1,40 @@ +get('HTML.MaxImgLength'); + $img = $this->addElement( + 'img', 'Inline', 'Empty', 'Common', + array( + 'alt*' => 'Text', + // According to the spec, it's Length, but percents can + // be abused, so we allow only Pixels. + 'height' => 'Pixels#' . $max, + 'width' => 'Pixels#' . $max, + 'longdesc' => 'URI', + 'src*' => new HTMLPurifier_AttrDef_URI(true), // embedded + ) + ); + if ($max === null || $config->get('HTML.Trusted')) { + $img->attr['height'] = + $img->attr['width'] = 'Length'; + } + + // kind of strange, but splitting things up would be inefficient + $img->attr_transform_pre[] = + $img->attr_transform_post[] = + new HTMLPurifier_AttrTransform_ImgRequired(); + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Legacy.php b/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Legacy.php new file mode 100644 index 0000000000..df33927ba6 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Legacy.php @@ -0,0 +1,143 @@ +addElement('basefont', 'Inline', 'Empty', false, array( + 'color' => 'Color', + 'face' => 'Text', // extremely broad, we should + 'size' => 'Text', // tighten it + 'id' => 'ID' + )); + $this->addElement('center', 'Block', 'Flow', 'Common'); + $this->addElement('dir', 'Block', 'Required: li', 'Common', array( + 'compact' => 'Bool#compact' + )); + $this->addElement('font', 'Inline', 'Inline', array('Core', 'I18N'), array( + 'color' => 'Color', + 'face' => 'Text', // extremely broad, we should + 'size' => 'Text', // tighten it + )); + $this->addElement('menu', 'Block', 'Required: li', 'Common', array( + 'compact' => 'Bool#compact' + )); + + $s = $this->addElement('s', 'Inline', 'Inline', 'Common'); + $s->formatting = true; + + $strike = $this->addElement('strike', 'Inline', 'Inline', 'Common'); + $strike->formatting = true; + + $u = $this->addElement('u', 'Inline', 'Inline', 'Common'); + $u->formatting = true; + + // setup modifications to old elements + + $align = 'Enum#left,right,center,justify'; + + $address = $this->addBlankElement('address'); + $address->content_model = 'Inline | #PCDATA | p'; + $address->content_model_type = 'optional'; + $address->child = false; + + $blockquote = $this->addBlankElement('blockquote'); + $blockquote->content_model = 'Flow | #PCDATA'; + $blockquote->content_model_type = 'optional'; + $blockquote->child = false; + + $br = $this->addBlankElement('br'); + $br->attr['clear'] = 'Enum#left,all,right,none'; + + $caption = $this->addBlankElement('caption'); + $caption->attr['align'] = 'Enum#top,bottom,left,right'; + + $div = $this->addBlankElement('div'); + $div->attr['align'] = $align; + + $dl = $this->addBlankElement('dl'); + $dl->attr['compact'] = 'Bool#compact'; + + for ($i = 1; $i <= 6; $i++) { + $h = $this->addBlankElement("h$i"); + $h->attr['align'] = $align; + } + + $hr = $this->addBlankElement('hr'); + $hr->attr['align'] = $align; + $hr->attr['noshade'] = 'Bool#noshade'; + $hr->attr['size'] = 'Pixels'; + $hr->attr['width'] = 'Length'; + + $img = $this->addBlankElement('img'); + $img->attr['align'] = 'Enum#top,middle,bottom,left,right'; + $img->attr['border'] = 'Pixels'; + $img->attr['hspace'] = 'Pixels'; + $img->attr['vspace'] = 'Pixels'; + + // figure out this integer business + + $li = $this->addBlankElement('li'); + $li->attr['value'] = new HTMLPurifier_AttrDef_Integer(); + $li->attr['type'] = 'Enum#s:1,i,I,a,A,disc,square,circle'; + + $ol = $this->addBlankElement('ol'); + $ol->attr['compact'] = 'Bool#compact'; + $ol->attr['start'] = new HTMLPurifier_AttrDef_Integer(); + $ol->attr['type'] = 'Enum#s:1,i,I,a,A'; + + $p = $this->addBlankElement('p'); + $p->attr['align'] = $align; + + $pre = $this->addBlankElement('pre'); + $pre->attr['width'] = 'Number'; + + // script omitted + + $table = $this->addBlankElement('table'); + $table->attr['align'] = 'Enum#left,center,right'; + $table->attr['bgcolor'] = 'Color'; + + $tr = $this->addBlankElement('tr'); + $tr->attr['bgcolor'] = 'Color'; + + $th = $this->addBlankElement('th'); + $th->attr['bgcolor'] = 'Color'; + $th->attr['height'] = 'Length'; + $th->attr['nowrap'] = 'Bool#nowrap'; + $th->attr['width'] = 'Length'; + + $td = $this->addBlankElement('td'); + $td->attr['bgcolor'] = 'Color'; + $td->attr['height'] = 'Length'; + $td->attr['nowrap'] = 'Bool#nowrap'; + $td->attr['width'] = 'Length'; + + $ul = $this->addBlankElement('ul'); + $ul->attr['compact'] = 'Bool#compact'; + $ul->attr['type'] = 'Enum#square,disc,circle'; + + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/List.php b/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/List.php new file mode 100644 index 0000000000..1d15f27293 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/List.php @@ -0,0 +1,35 @@ + 'List'); + + public function setup($config) { + $this->addElement('ol', 'List', 'Required: li', 'Common'); + $this->addElement('ul', 'List', 'Required: li', 'Common'); + $this->addElement('dl', 'List', 'Required: dt | dd', 'Common'); + + $this->addElement('li', false, 'Flow', 'Common'); + + $this->addElement('dd', false, 'Flow', 'Common'); + $this->addElement('dt', false, 'Inline', 'Common'); + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Name.php b/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Name.php new file mode 100644 index 0000000000..05694b4504 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Name.php @@ -0,0 +1,21 @@ +addBlankElement($name); + $element->attr['name'] = 'CDATA'; + if (!$config->get('HTML.Attr.Name.UseCDATA')) { + $element->attr_transform_post['NameSync'] = new HTMLPurifier_AttrTransform_NameSync(); + } + } + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/NonXMLCommonAttributes.php b/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/NonXMLCommonAttributes.php new file mode 100644 index 0000000000..5f1b14abb8 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/NonXMLCommonAttributes.php @@ -0,0 +1,14 @@ + array( + 'lang' => 'LanguageCode', + ) + ); +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Object.php b/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Object.php new file mode 100644 index 0000000000..193c1011f8 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Object.php @@ -0,0 +1,47 @@ + to cater to legacy browsers: this + * module does not allow this sort of behavior + */ +class HTMLPurifier_HTMLModule_Object extends HTMLPurifier_HTMLModule +{ + + public $name = 'Object'; + public $safe = false; + + public function setup($config) { + + $this->addElement('object', 'Inline', 'Optional: #PCDATA | Flow | param', 'Common', + array( + 'archive' => 'URI', + 'classid' => 'URI', + 'codebase' => 'URI', + 'codetype' => 'Text', + 'data' => 'URI', + 'declare' => 'Bool#declare', + 'height' => 'Length', + 'name' => 'CDATA', + 'standby' => 'Text', + 'tabindex' => 'Number', + 'type' => 'ContentType', + 'width' => 'Length' + ) + ); + + $this->addElement('param', false, 'Empty', false, + array( + 'id' => 'ID', + 'name*' => 'Text', + 'type' => 'Text', + 'value' => 'Text', + 'valuetype' => 'Enum#data,ref,object' + ) + ); + + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Presentation.php b/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Presentation.php new file mode 100644 index 0000000000..8ff0b5ed78 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Presentation.php @@ -0,0 +1,36 @@ +addElement('hr', 'Block', 'Empty', 'Common'); + $this->addElement('sub', 'Inline', 'Inline', 'Common'); + $this->addElement('sup', 'Inline', 'Inline', 'Common'); + $b = $this->addElement('b', 'Inline', 'Inline', 'Common'); + $b->formatting = true; + $big = $this->addElement('big', 'Inline', 'Inline', 'Common'); + $big->formatting = true; + $i = $this->addElement('i', 'Inline', 'Inline', 'Common'); + $i->formatting = true; + $small = $this->addElement('small', 'Inline', 'Inline', 'Common'); + $small->formatting = true; + $tt = $this->addElement('tt', 'Inline', 'Inline', 'Common'); + $tt->formatting = true; + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Proprietary.php b/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Proprietary.php new file mode 100644 index 0000000000..dd36a3de0e --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Proprietary.php @@ -0,0 +1,33 @@ +addElement('marquee', 'Inline', 'Flow', 'Common', + array( + 'direction' => 'Enum#left,right,up,down', + 'behavior' => 'Enum#alternate', + 'width' => 'Length', + 'height' => 'Length', + 'scrolldelay' => 'Number', + 'scrollamount' => 'Number', + 'loop' => 'Number', + 'bgcolor' => 'Color', + 'hspace' => 'Pixels', + 'vspace' => 'Pixels', + ) + ); + + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Ruby.php b/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Ruby.php new file mode 100644 index 0000000000..b26a0a30a0 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Ruby.php @@ -0,0 +1,27 @@ +addElement('ruby', 'Inline', + 'Custom: ((rb, (rt | (rp, rt, rp))) | (rbc, rtc, rtc?))', + 'Common'); + $this->addElement('rbc', false, 'Required: rb', 'Common'); + $this->addElement('rtc', false, 'Required: rt', 'Common'); + $rb = $this->addElement('rb', false, 'Inline', 'Common'); + $rb->excludes = array('ruby' => true); + $rt = $this->addElement('rt', false, 'Inline', 'Common', array('rbspan' => 'Number')); + $rt->excludes = array('ruby' => true); + $this->addElement('rp', false, 'Optional: #PCDATA', 'Common'); + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/SafeEmbed.php b/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/SafeEmbed.php new file mode 100644 index 0000000000..8fc03cb1c7 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/SafeEmbed.php @@ -0,0 +1,33 @@ +get('HTML.MaxImgLength'); + $embed = $this->addElement( + 'embed', 'Inline', 'Empty', 'Common', + array( + 'src*' => 'URI#embedded', + 'type' => 'Enum#application/x-shockwave-flash', + 'width' => 'Pixels#' . $max, + 'height' => 'Pixels#' . $max, + 'allowscriptaccess' => 'Enum#never', + 'allownetworking' => 'Enum#internal', + 'wmode' => 'Enum#window', + 'name' => 'ID', + ) + ); + $embed->attr_transform_post[] = new HTMLPurifier_AttrTransform_SafeEmbed(); + + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/SafeObject.php b/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/SafeObject.php new file mode 100644 index 0000000000..33bac00cf2 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/SafeObject.php @@ -0,0 +1,50 @@ +get('HTML.MaxImgLength'); + $object = $this->addElement( + 'object', + 'Inline', + 'Optional: param | Flow | #PCDATA', + 'Common', + array( + // While technically not required by the spec, we're forcing + // it to this value. + 'type' => 'Enum#application/x-shockwave-flash', + 'width' => 'Pixels#' . $max, + 'height' => 'Pixels#' . $max, + 'data' => 'URI#embedded' + ) + ); + $object->attr_transform_post[] = new HTMLPurifier_AttrTransform_SafeObject(); + + $param = $this->addElement('param', false, 'Empty', false, + array( + 'id' => 'ID', + 'name*' => 'Text', + 'value' => 'Text' + ) + ); + $param->attr_transform_post[] = new HTMLPurifier_AttrTransform_SafeParam(); + $this->info_injector[] = 'SafeObject'; + + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Scripting.php b/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Scripting.php new file mode 100644 index 0000000000..cecdea6c30 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Scripting.php @@ -0,0 +1,54 @@ + 'script | noscript', 'Inline' => 'script | noscript'); + public $safe = false; + + public function setup($config) { + // TODO: create custom child-definition for noscript that + // auto-wraps stray #PCDATA in a similar manner to + // blockquote's custom definition (we would use it but + // blockquote's contents are optional while noscript's contents + // are required) + + // TODO: convert this to new syntax, main problem is getting + // both content sets working + + // In theory, this could be safe, but I don't see any reason to + // allow it. + $this->info['noscript'] = new HTMLPurifier_ElementDef(); + $this->info['noscript']->attr = array( 0 => array('Common') ); + $this->info['noscript']->content_model = 'Heading | List | Block'; + $this->info['noscript']->content_model_type = 'required'; + + $this->info['script'] = new HTMLPurifier_ElementDef(); + $this->info['script']->attr = array( + 'defer' => new HTMLPurifier_AttrDef_Enum(array('defer')), + 'src' => new HTMLPurifier_AttrDef_URI(true), + 'type' => new HTMLPurifier_AttrDef_Enum(array('text/javascript')) + ); + $this->info['script']->content_model = '#PCDATA'; + $this->info['script']->content_model_type = 'optional'; + $this->info['script']->attr_transform_pre['type'] = + $this->info['script']->attr_transform_post['type'] = + new HTMLPurifier_AttrTransform_ScriptRequired(); + } +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/StyleAttribute.php b/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/StyleAttribute.php new file mode 100644 index 0000000000..eb78464cc0 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/StyleAttribute.php @@ -0,0 +1,24 @@ + array('style' => false), // see constructor + 'Core' => array(0 => array('Style')) + ); + + public function setup($config) { + $this->attr_collections['Style']['style'] = new HTMLPurifier_AttrDef_CSS(); + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Tables.php b/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Tables.php new file mode 100644 index 0000000000..f314ced3f8 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Tables.php @@ -0,0 +1,66 @@ +addElement('caption', false, 'Inline', 'Common'); + + $this->addElement('table', 'Block', + new HTMLPurifier_ChildDef_Table(), 'Common', + array( + 'border' => 'Pixels', + 'cellpadding' => 'Length', + 'cellspacing' => 'Length', + 'frame' => 'Enum#void,above,below,hsides,lhs,rhs,vsides,box,border', + 'rules' => 'Enum#none,groups,rows,cols,all', + 'summary' => 'Text', + 'width' => 'Length' + ) + ); + + // common attributes + $cell_align = array( + 'align' => 'Enum#left,center,right,justify,char', + 'charoff' => 'Length', + 'valign' => 'Enum#top,middle,bottom,baseline', + ); + + $cell_t = array_merge( + array( + 'abbr' => 'Text', + 'colspan' => 'Number', + 'rowspan' => 'Number', + ), + $cell_align + ); + $this->addElement('td', false, 'Flow', 'Common', $cell_t); + $this->addElement('th', false, 'Flow', 'Common', $cell_t); + + $this->addElement('tr', false, 'Required: td | th', 'Common', $cell_align); + + $cell_col = array_merge( + array( + 'span' => 'Number', + 'width' => 'MultiLength', + ), + $cell_align + ); + $this->addElement('col', false, 'Empty', 'Common', $cell_col); + $this->addElement('colgroup', false, 'Optional: col', 'Common', $cell_col); + + $this->addElement('tbody', false, 'Required: tr', 'Common', $cell_align); + $this->addElement('thead', false, 'Required: tr', 'Common', $cell_align); + $this->addElement('tfoot', false, 'Required: tr', 'Common', $cell_align); + + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Target.php b/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Target.php new file mode 100644 index 0000000000..2b844ecc45 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Target.php @@ -0,0 +1,23 @@ +addBlankElement($name); + $e->attr = array( + 'target' => new HTMLPurifier_AttrDef_HTML_FrameTarget() + ); + } + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Text.php b/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Text.php new file mode 100644 index 0000000000..ae77c71886 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Text.php @@ -0,0 +1,71 @@ + 'Heading | Block | Inline' + ); + + public function setup($config) { + + // Inline Phrasal ------------------------------------------------- + $this->addElement('abbr', 'Inline', 'Inline', 'Common'); + $this->addElement('acronym', 'Inline', 'Inline', 'Common'); + $this->addElement('cite', 'Inline', 'Inline', 'Common'); + $this->addElement('dfn', 'Inline', 'Inline', 'Common'); + $this->addElement('kbd', 'Inline', 'Inline', 'Common'); + $this->addElement('q', 'Inline', 'Inline', 'Common', array('cite' => 'URI')); + $this->addElement('samp', 'Inline', 'Inline', 'Common'); + $this->addElement('var', 'Inline', 'Inline', 'Common'); + + $em = $this->addElement('em', 'Inline', 'Inline', 'Common'); + $em->formatting = true; + + $strong = $this->addElement('strong', 'Inline', 'Inline', 'Common'); + $strong->formatting = true; + + $code = $this->addElement('code', 'Inline', 'Inline', 'Common'); + $code->formatting = true; + + // Inline Structural ---------------------------------------------- + $this->addElement('span', 'Inline', 'Inline', 'Common'); + $this->addElement('br', 'Inline', 'Empty', 'Core'); + + // Block Phrasal -------------------------------------------------- + $this->addElement('address', 'Block', 'Inline', 'Common'); + $this->addElement('blockquote', 'Block', 'Optional: Heading | Block | List', 'Common', array('cite' => 'URI') ); + $pre = $this->addElement('pre', 'Block', 'Inline', 'Common'); + $pre->excludes = $this->makeLookup( + 'img', 'big', 'small', 'object', 'applet', 'font', 'basefont' ); + $this->addElement('h1', 'Heading', 'Inline', 'Common'); + $this->addElement('h2', 'Heading', 'Inline', 'Common'); + $this->addElement('h3', 'Heading', 'Inline', 'Common'); + $this->addElement('h4', 'Heading', 'Inline', 'Common'); + $this->addElement('h5', 'Heading', 'Inline', 'Common'); + $this->addElement('h6', 'Heading', 'Inline', 'Common'); + + // Block Structural ----------------------------------------------- + $p = $this->addElement('p', 'Block', 'Inline', 'Common'); + $p->autoclose = array_flip(array("address", "blockquote", "center", "dir", "div", "dl", "fieldset", "ol", "p", "ul")); + + $this->addElement('div', 'Block', 'Flow', 'Common'); + + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Tidy.php b/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Tidy.php new file mode 100644 index 0000000000..21783f18eb --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Tidy.php @@ -0,0 +1,207 @@ + 'none', 'light', 'medium', 'heavy'); + + /** + * Default level to place all fixes in. Disabled by default + */ + public $defaultLevel = null; + + /** + * Lists of fixes used by getFixesForLevel(). Format is: + * HTMLModule_Tidy->fixesForLevel[$level] = array('fix-1', 'fix-2'); + */ + public $fixesForLevel = array( + 'light' => array(), + 'medium' => array(), + 'heavy' => array() + ); + + /** + * Lazy load constructs the module by determining the necessary + * fixes to create and then delegating to the populate() function. + * @todo Wildcard matching and error reporting when an added or + * subtracted fix has no effect. + */ + public function setup($config) { + + // create fixes, initialize fixesForLevel + $fixes = $this->makeFixes(); + $this->makeFixesForLevel($fixes); + + // figure out which fixes to use + $level = $config->get('HTML.TidyLevel'); + $fixes_lookup = $this->getFixesForLevel($level); + + // get custom fix declarations: these need namespace processing + $add_fixes = $config->get('HTML.TidyAdd'); + $remove_fixes = $config->get('HTML.TidyRemove'); + + foreach ($fixes as $name => $fix) { + // needs to be refactored a little to implement globbing + if ( + isset($remove_fixes[$name]) || + (!isset($add_fixes[$name]) && !isset($fixes_lookup[$name])) + ) { + unset($fixes[$name]); + } + } + + // populate this module with necessary fixes + $this->populate($fixes); + + } + + /** + * Retrieves all fixes per a level, returning fixes for that specific + * level as well as all levels below it. + * @param $level String level identifier, see $levels for valid values + * @return Lookup up table of fixes + */ + public function getFixesForLevel($level) { + if ($level == $this->levels[0]) { + return array(); + } + $activated_levels = array(); + for ($i = 1, $c = count($this->levels); $i < $c; $i++) { + $activated_levels[] = $this->levels[$i]; + if ($this->levels[$i] == $level) break; + } + if ($i == $c) { + trigger_error( + 'Tidy level ' . htmlspecialchars($level) . ' not recognized', + E_USER_WARNING + ); + return array(); + } + $ret = array(); + foreach ($activated_levels as $level) { + foreach ($this->fixesForLevel[$level] as $fix) { + $ret[$fix] = true; + } + } + return $ret; + } + + /** + * Dynamically populates the $fixesForLevel member variable using + * the fixes array. It may be custom overloaded, used in conjunction + * with $defaultLevel, or not used at all. + */ + public function makeFixesForLevel($fixes) { + if (!isset($this->defaultLevel)) return; + if (!isset($this->fixesForLevel[$this->defaultLevel])) { + trigger_error( + 'Default level ' . $this->defaultLevel . ' does not exist', + E_USER_ERROR + ); + return; + } + $this->fixesForLevel[$this->defaultLevel] = array_keys($fixes); + } + + /** + * Populates the module with transforms and other special-case code + * based on a list of fixes passed to it + * @param $lookup Lookup table of fixes to activate + */ + public function populate($fixes) { + foreach ($fixes as $name => $fix) { + // determine what the fix is for + list($type, $params) = $this->getFixType($name); + switch ($type) { + case 'attr_transform_pre': + case 'attr_transform_post': + $attr = $params['attr']; + if (isset($params['element'])) { + $element = $params['element']; + if (empty($this->info[$element])) { + $e = $this->addBlankElement($element); + } else { + $e = $this->info[$element]; + } + } else { + $type = "info_$type"; + $e = $this; + } + // PHP does some weird parsing when I do + // $e->$type[$attr], so I have to assign a ref. + $f =& $e->$type; + $f[$attr] = $fix; + break; + case 'tag_transform': + $this->info_tag_transform[$params['element']] = $fix; + break; + case 'child': + case 'content_model_type': + $element = $params['element']; + if (empty($this->info[$element])) { + $e = $this->addBlankElement($element); + } else { + $e = $this->info[$element]; + } + $e->$type = $fix; + break; + default: + trigger_error("Fix type $type not supported", E_USER_ERROR); + break; + } + } + } + + /** + * Parses a fix name and determines what kind of fix it is, as well + * as other information defined by the fix + * @param $name String name of fix + * @return array(string $fix_type, array $fix_parameters) + * @note $fix_parameters is type dependant, see populate() for usage + * of these parameters + */ + public function getFixType($name) { + // parse it + $property = $attr = null; + if (strpos($name, '#') !== false) list($name, $property) = explode('#', $name); + if (strpos($name, '@') !== false) list($name, $attr) = explode('@', $name); + + // figure out the parameters + $params = array(); + if ($name !== '') $params['element'] = $name; + if (!is_null($attr)) $params['attr'] = $attr; + + // special case: attribute transform + if (!is_null($attr)) { + if (is_null($property)) $property = 'pre'; + $type = 'attr_transform_' . $property; + return array($type, $params); + } + + // special case: tag transform + if (is_null($property)) { + return array('tag_transform', $params); + } + + return array($property, $params); + + } + + /** + * Defines all fixes the module will perform in a compact + * associative array of fix name to fix implementation. + */ + public function makeFixes() {} + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Tidy/Name.php b/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Tidy/Name.php new file mode 100644 index 0000000000..61ff85ce2f --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Tidy/Name.php @@ -0,0 +1,24 @@ +content_model_type != 'strictblockquote') return parent::getChildDef($def); + return new HTMLPurifier_ChildDef_StrictBlockquote($def->content_model); + } +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Tidy/Transitional.php b/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Tidy/Transitional.php new file mode 100644 index 0000000000..9960b1dd10 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/Tidy/Transitional.php @@ -0,0 +1,9 @@ + 'text-align:left;', + 'right' => 'text-align:right;', + 'top' => 'caption-side:top;', + 'bottom' => 'caption-side:bottom;' // not supported by IE + )); + + // @align for img ------------------------------------------------- + $r['img@align'] = + new HTMLPurifier_AttrTransform_EnumToCSS('align', array( + 'left' => 'float:left;', + 'right' => 'float:right;', + 'top' => 'vertical-align:top;', + 'middle' => 'vertical-align:middle;', + 'bottom' => 'vertical-align:baseline;', + )); + + // @align for table ----------------------------------------------- + $r['table@align'] = + new HTMLPurifier_AttrTransform_EnumToCSS('align', array( + 'left' => 'float:left;', + 'center' => 'margin-left:auto;margin-right:auto;', + 'right' => 'float:right;' + )); + + // @align for hr ----------------------------------------------- + $r['hr@align'] = + new HTMLPurifier_AttrTransform_EnumToCSS('align', array( + // we use both text-align and margin because these work + // for different browsers (IE and Firefox, respectively) + // and the melange makes for a pretty cross-compatible + // solution + 'left' => 'margin-left:0;margin-right:auto;text-align:left;', + 'center' => 'margin-left:auto;margin-right:auto;text-align:center;', + 'right' => 'margin-left:auto;margin-right:0;text-align:right;' + )); + + // @align for h1, h2, h3, h4, h5, h6, p, div ---------------------- + // {{{ + $align_lookup = array(); + $align_values = array('left', 'right', 'center', 'justify'); + foreach ($align_values as $v) $align_lookup[$v] = "text-align:$v;"; + // }}} + $r['h1@align'] = + $r['h2@align'] = + $r['h3@align'] = + $r['h4@align'] = + $r['h5@align'] = + $r['h6@align'] = + $r['p@align'] = + $r['div@align'] = + new HTMLPurifier_AttrTransform_EnumToCSS('align', $align_lookup); + + // @bgcolor for table, tr, td, th --------------------------------- + $r['table@bgcolor'] = + $r['td@bgcolor'] = + $r['th@bgcolor'] = + new HTMLPurifier_AttrTransform_BgColor(); + + // @border for img ------------------------------------------------ + $r['img@border'] = new HTMLPurifier_AttrTransform_Border(); + + // @clear for br -------------------------------------------------- + $r['br@clear'] = + new HTMLPurifier_AttrTransform_EnumToCSS('clear', array( + 'left' => 'clear:left;', + 'right' => 'clear:right;', + 'all' => 'clear:both;', + 'none' => 'clear:none;', + )); + + // @height for td, th --------------------------------------------- + $r['td@height'] = + $r['th@height'] = + new HTMLPurifier_AttrTransform_Length('height'); + + // @hspace for img ------------------------------------------------ + $r['img@hspace'] = new HTMLPurifier_AttrTransform_ImgSpace('hspace'); + + // @noshade for hr ------------------------------------------------ + // this transformation is not precise but often good enough. + // different browsers use different styles to designate noshade + $r['hr@noshade'] = + new HTMLPurifier_AttrTransform_BoolToCSS( + 'noshade', + 'color:#808080;background-color:#808080;border:0;' + ); + + // @nowrap for td, th --------------------------------------------- + $r['td@nowrap'] = + $r['th@nowrap'] = + new HTMLPurifier_AttrTransform_BoolToCSS( + 'nowrap', + 'white-space:nowrap;' + ); + + // @size for hr -------------------------------------------------- + $r['hr@size'] = new HTMLPurifier_AttrTransform_Length('size', 'height'); + + // @type for li, ol, ul ------------------------------------------- + // {{{ + $ul_types = array( + 'disc' => 'list-style-type:disc;', + 'square' => 'list-style-type:square;', + 'circle' => 'list-style-type:circle;' + ); + $ol_types = array( + '1' => 'list-style-type:decimal;', + 'i' => 'list-style-type:lower-roman;', + 'I' => 'list-style-type:upper-roman;', + 'a' => 'list-style-type:lower-alpha;', + 'A' => 'list-style-type:upper-alpha;' + ); + $li_types = $ul_types + $ol_types; + // }}} + + $r['ul@type'] = new HTMLPurifier_AttrTransform_EnumToCSS('type', $ul_types); + $r['ol@type'] = new HTMLPurifier_AttrTransform_EnumToCSS('type', $ol_types, true); + $r['li@type'] = new HTMLPurifier_AttrTransform_EnumToCSS('type', $li_types, true); + + // @vspace for img ------------------------------------------------ + $r['img@vspace'] = new HTMLPurifier_AttrTransform_ImgSpace('vspace'); + + // @width for hr, td, th ------------------------------------------ + $r['td@width'] = + $r['th@width'] = + $r['hr@width'] = new HTMLPurifier_AttrTransform_Length('width'); + + return $r; + + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/XMLCommonAttributes.php b/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/XMLCommonAttributes.php new file mode 100644 index 0000000000..9c0e031984 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/HTMLModule/XMLCommonAttributes.php @@ -0,0 +1,14 @@ + array( + 'xml:lang' => 'LanguageCode', + ) + ); +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/HTMLModuleManager.php b/extlib/HTMLPurifier/HTMLPurifier/HTMLModuleManager.php new file mode 100644 index 0000000000..f5c4a1d2cb --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/HTMLModuleManager.php @@ -0,0 +1,403 @@ +attrTypes = new HTMLPurifier_AttrTypes(); + $this->doctypes = new HTMLPurifier_DoctypeRegistry(); + + // setup basic modules + $common = array( + 'CommonAttributes', 'Text', 'Hypertext', 'List', + 'Presentation', 'Edit', 'Bdo', 'Tables', 'Image', + 'StyleAttribute', + // Unsafe: + 'Scripting', 'Object', 'Forms', + // Sorta legacy, but present in strict: + 'Name', + ); + $transitional = array('Legacy', 'Target'); + $xml = array('XMLCommonAttributes'); + $non_xml = array('NonXMLCommonAttributes'); + + // setup basic doctypes + $this->doctypes->register( + 'HTML 4.01 Transitional', false, + array_merge($common, $transitional, $non_xml), + array('Tidy_Transitional', 'Tidy_Proprietary'), + array(), + '-//W3C//DTD HTML 4.01 Transitional//EN', + 'http://www.w3.org/TR/html4/loose.dtd' + ); + + $this->doctypes->register( + 'HTML 4.01 Strict', false, + array_merge($common, $non_xml), + array('Tidy_Strict', 'Tidy_Proprietary', 'Tidy_Name'), + array(), + '-//W3C//DTD HTML 4.01//EN', + 'http://www.w3.org/TR/html4/strict.dtd' + ); + + $this->doctypes->register( + 'XHTML 1.0 Transitional', true, + array_merge($common, $transitional, $xml, $non_xml), + array('Tidy_Transitional', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Name'), + array(), + '-//W3C//DTD XHTML 1.0 Transitional//EN', + 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd' + ); + + $this->doctypes->register( + 'XHTML 1.0 Strict', true, + array_merge($common, $xml, $non_xml), + array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Strict', 'Tidy_Proprietary', 'Tidy_Name'), + array(), + '-//W3C//DTD XHTML 1.0 Strict//EN', + 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd' + ); + + $this->doctypes->register( + 'XHTML 1.1', true, + array_merge($common, $xml, array('Ruby')), + array('Tidy_Strict', 'Tidy_XHTML', 'Tidy_Proprietary', 'Tidy_Strict', 'Tidy_Name'), // Tidy_XHTML1_1 + array(), + '-//W3C//DTD XHTML 1.1//EN', + 'http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd' + ); + + } + + /** + * Registers a module to the recognized module list, useful for + * overloading pre-existing modules. + * @param $module Mixed: string module name, with or without + * HTMLPurifier_HTMLModule prefix, or instance of + * subclass of HTMLPurifier_HTMLModule. + * @param $overload Boolean whether or not to overload previous modules. + * If this is not set, and you do overload a module, + * HTML Purifier will complain with a warning. + * @note This function will not call autoload, you must instantiate + * (and thus invoke) autoload outside the method. + * @note If a string is passed as a module name, different variants + * will be tested in this order: + * - Check for HTMLPurifier_HTMLModule_$name + * - Check all prefixes with $name in order they were added + * - Check for literal object name + * - Throw fatal error + * If your object name collides with an internal class, specify + * your module manually. All modules must have been included + * externally: registerModule will not perform inclusions for you! + */ + public function registerModule($module, $overload = false) { + if (is_string($module)) { + // attempt to load the module + $original_module = $module; + $ok = false; + foreach ($this->prefixes as $prefix) { + $module = $prefix . $original_module; + if (class_exists($module)) { + $ok = true; + break; + } + } + if (!$ok) { + $module = $original_module; + if (!class_exists($module)) { + trigger_error($original_module . ' module does not exist', + E_USER_ERROR); + return; + } + } + $module = new $module(); + } + if (empty($module->name)) { + trigger_error('Module instance of ' . get_class($module) . ' must have name'); + return; + } + if (!$overload && isset($this->registeredModules[$module->name])) { + trigger_error('Overloading ' . $module->name . ' without explicit overload parameter', E_USER_WARNING); + } + $this->registeredModules[$module->name] = $module; + } + + /** + * Adds a module to the current doctype by first registering it, + * and then tacking it on to the active doctype + */ + public function addModule($module) { + $this->registerModule($module); + if (is_object($module)) $module = $module->name; + $this->userModules[] = $module; + } + + /** + * Adds a class prefix that registerModule() will use to resolve a + * string name to a concrete class + */ + public function addPrefix($prefix) { + $this->prefixes[] = $prefix; + } + + /** + * Performs processing on modules, after being called you may + * use getElement() and getElements() + * @param $config Instance of HTMLPurifier_Config + */ + public function setup($config) { + + $this->trusted = $config->get('HTML.Trusted'); + + // generate + $this->doctype = $this->doctypes->make($config); + $modules = $this->doctype->modules; + + // take out the default modules that aren't allowed + $lookup = $config->get('HTML.AllowedModules'); + $special_cases = $config->get('HTML.CoreModules'); + + if (is_array($lookup)) { + foreach ($modules as $k => $m) { + if (isset($special_cases[$m])) continue; + if (!isset($lookup[$m])) unset($modules[$k]); + } + } + + // add proprietary module (this gets special treatment because + // it is completely removed from doctypes, etc.) + if ($config->get('HTML.Proprietary')) { + $modules[] = 'Proprietary'; + } + + // add SafeObject/Safeembed modules + if ($config->get('HTML.SafeObject')) { + $modules[] = 'SafeObject'; + } + if ($config->get('HTML.SafeEmbed')) { + $modules[] = 'SafeEmbed'; + } + + // merge in custom modules + $modules = array_merge($modules, $this->userModules); + + foreach ($modules as $module) { + $this->processModule($module); + $this->modules[$module]->setup($config); + } + + foreach ($this->doctype->tidyModules as $module) { + $this->processModule($module); + $this->modules[$module]->setup($config); + } + + // prepare any injectors + foreach ($this->modules as $module) { + $n = array(); + foreach ($module->info_injector as $i => $injector) { + if (!is_object($injector)) { + $class = "HTMLPurifier_Injector_$injector"; + $injector = new $class; + } + $n[$injector->name] = $injector; + } + $module->info_injector = $n; + } + + // setup lookup table based on all valid modules + foreach ($this->modules as $module) { + foreach ($module->info as $name => $def) { + if (!isset($this->elementLookup[$name])) { + $this->elementLookup[$name] = array(); + } + $this->elementLookup[$name][] = $module->name; + } + } + + // note the different choice + $this->contentSets = new HTMLPurifier_ContentSets( + // content set assembly deals with all possible modules, + // not just ones deemed to be "safe" + $this->modules + ); + $this->attrCollections = new HTMLPurifier_AttrCollections( + $this->attrTypes, + // there is no way to directly disable a global attribute, + // but using AllowedAttributes or simply not including + // the module in your custom doctype should be sufficient + $this->modules + ); + } + + /** + * Takes a module and adds it to the active module collection, + * registering it if necessary. + */ + public function processModule($module) { + if (!isset($this->registeredModules[$module]) || is_object($module)) { + $this->registerModule($module); + } + $this->modules[$module] = $this->registeredModules[$module]; + } + + /** + * Retrieves merged element definitions. + * @return Array of HTMLPurifier_ElementDef + */ + public function getElements() { + + $elements = array(); + foreach ($this->modules as $module) { + if (!$this->trusted && !$module->safe) continue; + foreach ($module->info as $name => $v) { + if (isset($elements[$name])) continue; + $elements[$name] = $this->getElement($name); + } + } + + // remove dud elements, this happens when an element that + // appeared to be safe actually wasn't + foreach ($elements as $n => $v) { + if ($v === false) unset($elements[$n]); + } + + return $elements; + + } + + /** + * Retrieves a single merged element definition + * @param $name Name of element + * @param $trusted Boolean trusted overriding parameter: set to true + * if you want the full version of an element + * @return Merged HTMLPurifier_ElementDef + * @note You may notice that modules are getting iterated over twice (once + * in getElements() and once here). This + * is because + */ + public function getElement($name, $trusted = null) { + + if (!isset($this->elementLookup[$name])) { + return false; + } + + // setup global state variables + $def = false; + if ($trusted === null) $trusted = $this->trusted; + + // iterate through each module that has registered itself to this + // element + foreach($this->elementLookup[$name] as $module_name) { + + $module = $this->modules[$module_name]; + + // refuse to create/merge from a module that is deemed unsafe-- + // pretend the module doesn't exist--when trusted mode is not on. + if (!$trusted && !$module->safe) { + continue; + } + + // clone is used because, ideally speaking, the original + // definition should not be modified. Usually, this will + // make no difference, but for consistency's sake + $new_def = clone $module->info[$name]; + + if (!$def && $new_def->standalone) { + $def = $new_def; + } elseif ($def) { + // This will occur even if $new_def is standalone. In practice, + // this will usually result in a full replacement. + $def->mergeIn($new_def); + } else { + // :TODO: + // non-standalone definitions that don't have a standalone + // to merge into could be deferred to the end + continue; + } + + // attribute value expansions + $this->attrCollections->performInclusions($def->attr); + $this->attrCollections->expandIdentifiers($def->attr, $this->attrTypes); + + // descendants_are_inline, for ChildDef_Chameleon + if (is_string($def->content_model) && + strpos($def->content_model, 'Inline') !== false) { + if ($name != 'del' && $name != 'ins') { + // this is for you, ins/del + $def->descendants_are_inline = true; + } + } + + $this->contentSets->generateChildDef($def, $module); + } + + // This can occur if there is a blank definition, but no base to + // mix it in with + if (!$def) return false; + + // add information on required attributes + foreach ($def->attr as $attr_name => $attr_def) { + if ($attr_def->required) { + $def->required_attr[] = $attr_name; + } + } + + return $def; + + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/IDAccumulator.php b/extlib/HTMLPurifier/HTMLPurifier/IDAccumulator.php new file mode 100644 index 0000000000..73215295a5 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/IDAccumulator.php @@ -0,0 +1,53 @@ +load($config->get('Attr.IDBlacklist')); + return $id_accumulator; + } + + /** + * Add an ID to the lookup table. + * @param $id ID to be added. + * @return Bool status, true if success, false if there's a dupe + */ + public function add($id) { + if (isset($this->ids[$id])) return false; + return $this->ids[$id] = true; + } + + /** + * Load a list of IDs into the lookup table + * @param $array_of_ids Array of IDs to load + * @note This function doesn't care about duplicates + */ + public function load($array_of_ids) { + foreach ($array_of_ids as $id) { + $this->ids[$id] = true; + } + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/Injector.php b/extlib/HTMLPurifier/HTMLPurifier/Injector.php new file mode 100644 index 0000000000..5922f81305 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/Injector.php @@ -0,0 +1,239 @@ +processToken() + * documentation. + * + * @todo Allow injectors to request a re-run on their output. This + * would help if an operation is recursive. + */ +abstract class HTMLPurifier_Injector +{ + + /** + * Advisory name of injector, this is for friendly error messages + */ + public $name; + + /** + * Instance of HTMLPurifier_HTMLDefinition + */ + protected $htmlDefinition; + + /** + * Reference to CurrentNesting variable in Context. This is an array + * list of tokens that we are currently "inside" + */ + protected $currentNesting; + + /** + * Reference to InputTokens variable in Context. This is an array + * list of the input tokens that are being processed. + */ + protected $inputTokens; + + /** + * Reference to InputIndex variable in Context. This is an integer + * array index for $this->inputTokens that indicates what token + * is currently being processed. + */ + protected $inputIndex; + + /** + * Array of elements and attributes this injector creates and therefore + * need to be allowed by the definition. Takes form of + * array('element' => array('attr', 'attr2'), 'element2') + */ + public $needed = array(); + + /** + * Index of inputTokens to rewind to. + */ + protected $rewind = false; + + /** + * Rewind to a spot to re-perform processing. This is useful if you + * deleted a node, and now need to see if this change affected any + * earlier nodes. Rewinding does not affect other injectors, and can + * result in infinite loops if not used carefully. + * @warning HTML Purifier will prevent you from fast-forwarding with this + * function. + */ + public function rewind($index) { + $this->rewind = $index; + } + + /** + * Retrieves rewind, and then unsets it. + */ + public function getRewind() { + $r = $this->rewind; + $this->rewind = false; + return $r; + } + + /** + * Prepares the injector by giving it the config and context objects: + * this allows references to important variables to be made within + * the injector. This function also checks if the HTML environment + * will work with the Injector (see checkNeeded()). + * @param $config Instance of HTMLPurifier_Config + * @param $context Instance of HTMLPurifier_Context + * @return Boolean false if success, string of missing needed element/attribute if failure + */ + public function prepare($config, $context) { + $this->htmlDefinition = $config->getHTMLDefinition(); + // Even though this might fail, some unit tests ignore this and + // still test checkNeeded, so be careful. Maybe get rid of that + // dependency. + $result = $this->checkNeeded($config); + if ($result !== false) return $result; + $this->currentNesting =& $context->get('CurrentNesting'); + $this->inputTokens =& $context->get('InputTokens'); + $this->inputIndex =& $context->get('InputIndex'); + return false; + } + + /** + * This function checks if the HTML environment + * will work with the Injector: if p tags are not allowed, the + * Auto-Paragraphing injector should not be enabled. + * @param $config Instance of HTMLPurifier_Config + * @param $context Instance of HTMLPurifier_Context + * @return Boolean false if success, string of missing needed element/attribute if failure + */ + public function checkNeeded($config) { + $def = $config->getHTMLDefinition(); + foreach ($this->needed as $element => $attributes) { + if (is_int($element)) $element = $attributes; + if (!isset($def->info[$element])) return $element; + if (!is_array($attributes)) continue; + foreach ($attributes as $name) { + if (!isset($def->info[$element]->attr[$name])) return "$element.$name"; + } + } + return false; + } + + /** + * Tests if the context node allows a certain element + * @param $name Name of element to test for + * @return True if element is allowed, false if it is not + */ + public function allowsElement($name) { + if (!empty($this->currentNesting)) { + $parent_token = array_pop($this->currentNesting); + $this->currentNesting[] = $parent_token; + $parent = $this->htmlDefinition->info[$parent_token->name]; + } else { + $parent = $this->htmlDefinition->info_parent_def; + } + if (!isset($parent->child->elements[$name]) || isset($parent->excludes[$name])) { + return false; + } + // check for exclusion + for ($i = count($this->currentNesting) - 2; $i >= 0; $i--) { + $node = $this->currentNesting[$i]; + $def = $this->htmlDefinition->info[$node->name]; + if (isset($def->excludes[$name])) return false; + } + return true; + } + + /** + * Iterator function, which starts with the next token and continues until + * you reach the end of the input tokens. + * @warning Please prevent previous references from interfering with this + * functions by setting $i = null beforehand! + * @param &$i Current integer index variable for inputTokens + * @param &$current Current token variable. Do NOT use $token, as that variable is also a reference + */ + protected function forward(&$i, &$current) { + if ($i === null) $i = $this->inputIndex + 1; + else $i++; + if (!isset($this->inputTokens[$i])) return false; + $current = $this->inputTokens[$i]; + return true; + } + + /** + * Similar to _forward, but accepts a third parameter $nesting (which + * should be initialized at 0) and stops when we hit the end tag + * for the node $this->inputIndex starts in. + */ + protected function forwardUntilEndToken(&$i, &$current, &$nesting) { + $result = $this->forward($i, $current); + if (!$result) return false; + if ($nesting === null) $nesting = 0; + if ($current instanceof HTMLPurifier_Token_Start) $nesting++; + elseif ($current instanceof HTMLPurifier_Token_End) { + if ($nesting <= 0) return false; + $nesting--; + } + return true; + } + + /** + * Iterator function, starts with the previous token and continues until + * you reach the beginning of input tokens. + * @warning Please prevent previous references from interfering with this + * functions by setting $i = null beforehand! + * @param &$i Current integer index variable for inputTokens + * @param &$current Current token variable. Do NOT use $token, as that variable is also a reference + */ + protected function backward(&$i, &$current) { + if ($i === null) $i = $this->inputIndex - 1; + else $i--; + if ($i < 0) return false; + $current = $this->inputTokens[$i]; + return true; + } + + /** + * Initializes the iterator at the current position. Use in a do {} while; + * loop to force the _forward and _backward functions to start at the + * current location. + * @warning Please prevent previous references from interfering with this + * functions by setting $i = null beforehand! + * @param &$i Current integer index variable for inputTokens + * @param &$current Current token variable. Do NOT use $token, as that variable is also a reference + */ + protected function current(&$i, &$current) { + if ($i === null) $i = $this->inputIndex; + $current = $this->inputTokens[$i]; + } + + /** + * Handler that is called when a text token is processed + */ + public function handleText(&$token) {} + + /** + * Handler that is called when a start or empty token is processed + */ + public function handleElement(&$token) {} + + /** + * Handler that is called when an end token is processed + */ + public function handleEnd(&$token) { + $this->notifyEnd($token); + } + + /** + * Notifier that is called when an end token is processed + * @note This differs from handlers in that the token is read-only + * @deprecated + */ + public function notifyEnd($token) {} + + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/Injector/AutoParagraph.php b/extlib/HTMLPurifier/HTMLPurifier/Injector/AutoParagraph.php new file mode 100644 index 0000000000..8cc952549c --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/Injector/AutoParagraph.php @@ -0,0 +1,340 @@ +armor['MakeWellFormed_TagClosedError'] = true; + return $par; + } + + public function handleText(&$token) { + $text = $token->data; + // Does the current parent allow

tags? + if ($this->allowsElement('p')) { + if (empty($this->currentNesting) || strpos($text, "\n\n") !== false) { + // Note that we have differing behavior when dealing with text + // in the anonymous root node, or a node inside the document. + // If the text as a double-newline, the treatment is the same; + // if it doesn't, see the next if-block if you're in the document. + + $i = $nesting = null; + if (!$this->forwardUntilEndToken($i, $current, $nesting) && $token->is_whitespace) { + // State 1.1: ... ^ (whitespace, then document end) + // ---- + // This is a degenerate case + } else { + // State 1.2: PAR1 + // ---- + + // State 1.3: PAR1\n\nPAR2 + // ------------ + + // State 1.4:

PAR1\n\nPAR2 (see State 2) + // ------------ + $token = array($this->_pStart()); + $this->_splitText($text, $token); + } + } else { + // State 2:
PAR1... (similar to 1.4) + // ---- + + // We're in an element that allows paragraph tags, but we're not + // sure if we're going to need them. + if ($this->_pLookAhead()) { + // State 2.1:
PAR1PAR1\n\nPAR2 + // ---- + // Note: This will always be the first child, since any + // previous inline element would have triggered this very + // same routine, and found the double newline. One possible + // exception would be a comment. + $token = array($this->_pStart(), $token); + } else { + // State 2.2.1:
PAR1
+ // ---- + + // State 2.2.2:
PAR1PAR1
+ // ---- + } + } + // Is the current parent a

tag? + } elseif ( + !empty($this->currentNesting) && + $this->currentNesting[count($this->currentNesting)-1]->name == 'p' + ) { + // State 3.1: ...

PAR1 + // ---- + + // State 3.2: ...

PAR1\n\nPAR2 + // ------------ + $token = array(); + $this->_splitText($text, $token); + // Abort! + } else { + // State 4.1: ...PAR1 + // ---- + + // State 4.2: ...PAR1\n\nPAR2 + // ------------ + } + } + + public function handleElement(&$token) { + // We don't have to check if we're already in a

tag for block + // tokens, because the tag would have been autoclosed by MakeWellFormed. + if ($this->allowsElement('p')) { + if (!empty($this->currentNesting)) { + if ($this->_isInline($token)) { + // State 1:

... + // --- + + // Check if this token is adjacent to the parent token + // (seek backwards until token isn't whitespace) + $i = null; + $this->backward($i, $prev); + + if (!$prev instanceof HTMLPurifier_Token_Start) { + // Token wasn't adjacent + + if ( + $prev instanceof HTMLPurifier_Token_Text && + substr($prev->data, -2) === "\n\n" + ) { + // State 1.1.4:

PAR1

\n\n + // --- + + // Quite frankly, this should be handled by splitText + $token = array($this->_pStart(), $token); + } else { + // State 1.1.1:

PAR1

+ // --- + + // State 1.1.2:

+ // --- + + // State 1.1.3:
PAR + // --- + } + + } else { + // State 1.2.1:
+ // --- + + // Lookahead to see if

is needed. + if ($this->_pLookAhead()) { + // State 1.3.1:

PAR1\n\nPAR2 + // --- + $token = array($this->_pStart(), $token); + } else { + // State 1.3.2:
PAR1
+ // --- + + // State 1.3.3:
PAR1
\n\n
+ // --- + } + } + } else { + // State 2.3: ...
+ // ----- + } + } else { + if ($this->_isInline($token)) { + // State 3.1: + // --- + // This is where the {p} tag is inserted, not reflected in + // inputTokens yet, however. + $token = array($this->_pStart(), $token); + } else { + // State 3.2:
+ // ----- + } + + $i = null; + if ($this->backward($i, $prev)) { + if ( + !$prev instanceof HTMLPurifier_Token_Text + ) { + // State 3.1.1: ...

{p} + // --- + + // State 3.2.1: ...

+ // ----- + + if (!is_array($token)) $token = array($token); + array_unshift($token, new HTMLPurifier_Token_Text("\n\n")); + } else { + // State 3.1.2: ...

\n\n{p} + // --- + + // State 3.2.2: ...

\n\n
+ // ----- + + // Note: PAR cannot occur because PAR would have been + // wrapped in

tags. + } + } + } + } else { + // State 2.2:

  • + // ---- + + // State 2.4:

    + // --- + } + } + + /** + * Splits up a text in paragraph tokens and appends them + * to the result stream that will replace the original + * @param $data String text data that will be processed + * into paragraphs + * @param $result Reference to array of tokens that the + * tags will be appended onto + * @param $config Instance of HTMLPurifier_Config + * @param $context Instance of HTMLPurifier_Context + */ + private function _splitText($data, &$result) { + $raw_paragraphs = explode("\n\n", $data); + $paragraphs = array(); // without empty paragraphs + $needs_start = false; + $needs_end = false; + + $c = count($raw_paragraphs); + if ($c == 1) { + // There were no double-newlines, abort quickly. In theory this + // should never happen. + $result[] = new HTMLPurifier_Token_Text($data); + return; + } + for ($i = 0; $i < $c; $i++) { + $par = $raw_paragraphs[$i]; + if (trim($par) !== '') { + $paragraphs[] = $par; + } else { + if ($i == 0) { + // Double newline at the front + if (empty($result)) { + // The empty result indicates that the AutoParagraph + // injector did not add any start paragraph tokens. + // This means that we have been in a paragraph for + // a while, and the newline means we should start a new one. + $result[] = new HTMLPurifier_Token_End('p'); + $result[] = new HTMLPurifier_Token_Text("\n\n"); + // However, the start token should only be added if + // there is more processing to be done (i.e. there are + // real paragraphs in here). If there are none, the + // next start paragraph tag will be handled by the + // next call to the injector + $needs_start = true; + } else { + // We just started a new paragraph! + // Reinstate a double-newline for presentation's sake, since + // it was in the source code. + array_unshift($result, new HTMLPurifier_Token_Text("\n\n")); + } + } elseif ($i + 1 == $c) { + // Double newline at the end + // There should be a trailing

    when we're finally done. + $needs_end = true; + } + } + } + + // Check if this was just a giant blob of whitespace. Move this earlier, + // perhaps? + if (empty($paragraphs)) { + return; + } + + // Add the start tag indicated by \n\n at the beginning of $data + if ($needs_start) { + $result[] = $this->_pStart(); + } + + // Append the paragraphs onto the result + foreach ($paragraphs as $par) { + $result[] = new HTMLPurifier_Token_Text($par); + $result[] = new HTMLPurifier_Token_End('p'); + $result[] = new HTMLPurifier_Token_Text("\n\n"); + $result[] = $this->_pStart(); + } + + // Remove trailing start token; Injector will handle this later if + // it was indeed needed. This prevents from needing to do a lookahead, + // at the cost of a lookbehind later. + array_pop($result); + + // If there is no need for an end tag, remove all of it and let + // MakeWellFormed close it later. + if (!$needs_end) { + array_pop($result); // removes \n\n + array_pop($result); // removes

    + } + + } + + /** + * Returns true if passed token is inline (and, ergo, allowed in + * paragraph tags) + */ + private function _isInline($token) { + return isset($this->htmlDefinition->info['p']->child->elements[$token->name]); + } + + /** + * Looks ahead in the token list and determines whether or not we need + * to insert a

    tag. + */ + private function _pLookAhead() { + $this->current($i, $current); + if ($current instanceof HTMLPurifier_Token_Start) $nesting = 1; + else $nesting = 0; + $ok = false; + while ($this->forwardUntilEndToken($i, $current, $nesting)) { + $result = $this->_checkNeedsP($current); + if ($result !== null) { + $ok = $result; + break; + } + } + return $ok; + } + + /** + * Determines if a particular token requires an earlier inline token + * to get a paragraph. This should be used with _forwardUntilEndToken + */ + private function _checkNeedsP($current) { + if ($current instanceof HTMLPurifier_Token_Start){ + if (!$this->_isInline($current)) { + //

    PAR1
    + // ---- + // Terminate early, since we hit a block element + return false; + } + } elseif ($current instanceof HTMLPurifier_Token_Text) { + if (strpos($current->data, "\n\n") !== false) { + //
    PAR1PAR1\n\nPAR2 + // ---- + return true; + } else { + //
    PAR1PAR1... + // ---- + } + } + return null; + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/Injector/DisplayLinkURI.php b/extlib/HTMLPurifier/HTMLPurifier/Injector/DisplayLinkURI.php new file mode 100644 index 0000000000..9dce9bd085 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/Injector/DisplayLinkURI.php @@ -0,0 +1,26 @@ +start->attr['href'])){ + $url = $token->start->attr['href']; + unset($token->start->attr['href']); + $token = array($token, new HTMLPurifier_Token_Text(" ($url)")); + } else { + // nothing to display + } + } +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/Injector/Linkify.php b/extlib/HTMLPurifier/HTMLPurifier/Injector/Linkify.php new file mode 100644 index 0000000000..296dac2829 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/Injector/Linkify.php @@ -0,0 +1,46 @@ + array('href')); + + public function handleText(&$token) { + if (!$this->allowsElement('a')) return; + + if (strpos($token->data, '://') === false) { + // our really quick heuristic failed, abort + // this may not work so well if we want to match things like + // "google.com", but then again, most people don't + return; + } + + // there is/are URL(s). Let's split the string: + // Note: this regex is extremely permissive + $bits = preg_split('#((?:https?|ftp)://[^\s\'"<>()]+)#S', $token->data, -1, PREG_SPLIT_DELIM_CAPTURE); + + $token = array(); + + // $i = index + // $c = count + // $l = is link + for ($i = 0, $c = count($bits), $l = false; $i < $c; $i++, $l = !$l) { + if (!$l) { + if ($bits[$i] === '') continue; + $token[] = new HTMLPurifier_Token_Text($bits[$i]); + } else { + $token[] = new HTMLPurifier_Token_Start('a', array('href' => $bits[$i])); + $token[] = new HTMLPurifier_Token_Text($bits[$i]); + $token[] = new HTMLPurifier_Token_End('a'); + } + } + + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/Injector/PurifierLinkify.php b/extlib/HTMLPurifier/HTMLPurifier/Injector/PurifierLinkify.php new file mode 100644 index 0000000000..ad2455a91c --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/Injector/PurifierLinkify.php @@ -0,0 +1,45 @@ + array('href')); + + public function prepare($config, $context) { + $this->docURL = $config->get('AutoFormat.PurifierLinkify.DocURL'); + return parent::prepare($config, $context); + } + + public function handleText(&$token) { + if (!$this->allowsElement('a')) return; + if (strpos($token->data, '%') === false) return; + + $bits = preg_split('#%([a-z0-9]+\.[a-z0-9]+)#Si', $token->data, -1, PREG_SPLIT_DELIM_CAPTURE); + $token = array(); + + // $i = index + // $c = count + // $l = is link + for ($i = 0, $c = count($bits), $l = false; $i < $c; $i++, $l = !$l) { + if (!$l) { + if ($bits[$i] === '') continue; + $token[] = new HTMLPurifier_Token_Text($bits[$i]); + } else { + $token[] = new HTMLPurifier_Token_Start('a', + array('href' => str_replace('%s', $bits[$i], $this->docURL))); + $token[] = new HTMLPurifier_Token_Text('%' . $bits[$i]); + $token[] = new HTMLPurifier_Token_End('a'); + } + } + + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/Injector/RemoveEmpty.php b/extlib/HTMLPurifier/HTMLPurifier/Injector/RemoveEmpty.php new file mode 100644 index 0000000000..638bfca03b --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/Injector/RemoveEmpty.php @@ -0,0 +1,51 @@ +config = $config; + $this->context = $context; + $this->removeNbsp = $config->get('AutoFormat.RemoveEmpty.RemoveNbsp'); + $this->removeNbspExceptions = $config->get('AutoFormat.RemoveEmpty.RemoveNbsp.Exceptions'); + $this->attrValidator = new HTMLPurifier_AttrValidator(); + } + + public function handleElement(&$token) { + if (!$token instanceof HTMLPurifier_Token_Start) return; + $next = false; + for ($i = $this->inputIndex + 1, $c = count($this->inputTokens); $i < $c; $i++) { + $next = $this->inputTokens[$i]; + if ($next instanceof HTMLPurifier_Token_Text) { + if ($next->is_whitespace) continue; + if ($this->removeNbsp && !isset($this->removeNbspExceptions[$token->name])) { + $plain = str_replace("\xC2\xA0", "", $next->data); + $isWsOrNbsp = $plain === '' || ctype_space($plain); + if ($isWsOrNbsp) continue; + } + } + break; + } + if (!$next || ($next instanceof HTMLPurifier_Token_End && $next->name == $token->name)) { + if ($token->name == 'colgroup') return; + $this->attrValidator->validateToken($token, $this->config, $this->context); + $token->armor['ValidateAttributes'] = true; + if (isset($token->attr['id']) || isset($token->attr['name'])) return; + $token = $i - $this->inputIndex + 1; + for ($b = $this->inputIndex - 1; $b > 0; $b--) { + $prev = $this->inputTokens[$b]; + if ($prev instanceof HTMLPurifier_Token_Text && $prev->is_whitespace) continue; + break; + } + // This is safe because we removed the token that triggered this. + $this->rewind($b - 1); + return; + } + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/Injector/SafeObject.php b/extlib/HTMLPurifier/HTMLPurifier/Injector/SafeObject.php new file mode 100644 index 0000000000..3415828685 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/Injector/SafeObject.php @@ -0,0 +1,87 @@ + 'never', + 'allowNetworking' => 'internal', + ); + protected $allowedParam = array( + 'wmode' => true, + 'movie' => true, + ); + + public function prepare($config, $context) { + parent::prepare($config, $context); + } + + public function handleElement(&$token) { + if ($token->name == 'object') { + $this->objectStack[] = $token; + $this->paramStack[] = array(); + $new = array($token); + foreach ($this->addParam as $name => $value) { + $new[] = new HTMLPurifier_Token_Empty('param', array('name' => $name, 'value' => $value)); + } + $token = $new; + } elseif ($token->name == 'param') { + $nest = count($this->currentNesting) - 1; + if ($nest >= 0 && $this->currentNesting[$nest]->name === 'object') { + $i = count($this->objectStack) - 1; + if (!isset($token->attr['name'])) { + $token = false; + return; + } + $n = $token->attr['name']; + // We need this fix because YouTube doesn't supply a data + // attribute, which we need if a type is specified. This is + // *very* Flash specific. + if (!isset($this->objectStack[$i]->attr['data']) && $token->attr['name'] == 'movie') { + $this->objectStack[$i]->attr['data'] = $token->attr['value']; + } + // Check if the parameter is the correct value but has not + // already been added + if ( + !isset($this->paramStack[$i][$n]) && + isset($this->addParam[$n]) && + $token->attr['name'] === $this->addParam[$n] + ) { + // keep token, and add to param stack + $this->paramStack[$i][$n] = true; + } elseif (isset($this->allowedParam[$n])) { + // keep token, don't do anything to it + // (could possibly check for duplicates here) + } else { + $token = false; + } + } else { + // not directly inside an object, DENY! + $token = false; + } + } + } + + public function handleEnd(&$token) { + // This is the WRONG way of handling the object and param stacks; + // we should be inserting them directly on the relevant object tokens + // so that the global stack handling handles it. + if ($token->name == 'object') { + array_pop($this->objectStack); + array_pop($this->paramStack); + } + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/Language.php b/extlib/HTMLPurifier/HTMLPurifier/Language.php new file mode 100644 index 0000000000..3e2be03b58 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/Language.php @@ -0,0 +1,163 @@ +config = $config; + $this->context = $context; + } + + /** + * Loads language object with necessary info from factory cache + * @note This is a lazy loader + */ + public function load() { + if ($this->_loaded) return; + $factory = HTMLPurifier_LanguageFactory::instance(); + $factory->loadLanguage($this->code); + foreach ($factory->keys as $key) { + $this->$key = $factory->cache[$this->code][$key]; + } + $this->_loaded = true; + } + + /** + * Retrieves a localised message. + * @param $key string identifier of message + * @return string localised message + */ + public function getMessage($key) { + if (!$this->_loaded) $this->load(); + if (!isset($this->messages[$key])) return "[$key]"; + return $this->messages[$key]; + } + + /** + * Retrieves a localised error name. + * @param $int integer error number, corresponding to PHP's error + * reporting + * @return string localised message + */ + public function getErrorName($int) { + if (!$this->_loaded) $this->load(); + if (!isset($this->errorNames[$int])) return "[Error: $int]"; + return $this->errorNames[$int]; + } + + /** + * Converts an array list into a string readable representation + */ + public function listify($array) { + $sep = $this->getMessage('Item separator'); + $sep_last = $this->getMessage('Item separator last'); + $ret = ''; + for ($i = 0, $c = count($array); $i < $c; $i++) { + if ($i == 0) { + } elseif ($i + 1 < $c) { + $ret .= $sep; + } else { + $ret .= $sep_last; + } + $ret .= $array[$i]; + } + return $ret; + } + + /** + * Formats a localised message with passed parameters + * @param $key string identifier of message + * @param $args Parameters to substitute in + * @return string localised message + * @todo Implement conditionals? Right now, some messages make + * reference to line numbers, but those aren't always available + */ + public function formatMessage($key, $args = array()) { + if (!$this->_loaded) $this->load(); + if (!isset($this->messages[$key])) return "[$key]"; + $raw = $this->messages[$key]; + $subst = array(); + $generator = false; + foreach ($args as $i => $value) { + if (is_object($value)) { + if ($value instanceof HTMLPurifier_Token) { + // factor this out some time + if (!$generator) $generator = $this->context->get('Generator'); + if (isset($value->name)) $subst['$'.$i.'.Name'] = $value->name; + if (isset($value->data)) $subst['$'.$i.'.Data'] = $value->data; + $subst['$'.$i.'.Compact'] = + $subst['$'.$i.'.Serialized'] = $generator->generateFromToken($value); + // a more complex algorithm for compact representation + // could be introduced for all types of tokens. This + // may need to be factored out into a dedicated class + if (!empty($value->attr)) { + $stripped_token = clone $value; + $stripped_token->attr = array(); + $subst['$'.$i.'.Compact'] = $generator->generateFromToken($stripped_token); + } + $subst['$'.$i.'.Line'] = $value->line ? $value->line : 'unknown'; + } + continue; + } elseif (is_array($value)) { + $keys = array_keys($value); + if (array_keys($keys) === $keys) { + // list + $subst['$'.$i] = $this->listify($value); + } else { + // associative array + // no $i implementation yet, sorry + $subst['$'.$i.'.Keys'] = $this->listify($keys); + $subst['$'.$i.'.Values'] = $this->listify(array_values($value)); + } + continue; + } + $subst['$' . $i] = $value; + } + return strtr($raw, $subst); + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/Language/classes/en-x-test.php b/extlib/HTMLPurifier/HTMLPurifier/Language/classes/en-x-test.php new file mode 100644 index 0000000000..d52fcb7ac1 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/Language/classes/en-x-test.php @@ -0,0 +1,12 @@ + 'HTML Purifier X' +); + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/Language/messages/en-x-testmini.php b/extlib/HTMLPurifier/HTMLPurifier/Language/messages/en-x-testmini.php new file mode 100644 index 0000000000..806c83fbf7 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/Language/messages/en-x-testmini.php @@ -0,0 +1,12 @@ + 'HTML Purifier XNone' +); + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/Language/messages/en.php b/extlib/HTMLPurifier/HTMLPurifier/Language/messages/en.php new file mode 100644 index 0000000000..aab2e52ebc --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/Language/messages/en.php @@ -0,0 +1,62 @@ + 'HTML Purifier', + +// for unit testing purposes +'LanguageFactoryTest: Pizza' => 'Pizza', +'LanguageTest: List' => '$1', +'LanguageTest: Hash' => '$1.Keys; $1.Values', + +'Item separator' => ', ', +'Item separator last' => ' and ', // non-Harvard style + +'ErrorCollector: No errors' => 'No errors detected. However, because error reporting is still incomplete, there may have been errors that the error collector was not notified of; please inspect the output HTML carefully.', +'ErrorCollector: At line' => ' at line $line', +'ErrorCollector: Incidental errors' => 'Incidental errors', + +'Lexer: Unclosed comment' => 'Unclosed comment', +'Lexer: Unescaped lt' => 'Unescaped less-than sign (<) should be <', +'Lexer: Missing gt' => 'Missing greater-than sign (>), previous less-than sign (<) should be escaped', +'Lexer: Missing attribute key' => 'Attribute declaration has no key', +'Lexer: Missing end quote' => 'Attribute declaration has no end quote', + +'Strategy_RemoveForeignElements: Tag transform' => '<$1> element transformed into $CurrentToken.Serialized', +'Strategy_RemoveForeignElements: Missing required attribute' => '$CurrentToken.Compact element missing required attribute $1', +'Strategy_RemoveForeignElements: Foreign element to text' => 'Unrecognized $CurrentToken.Serialized tag converted to text', +'Strategy_RemoveForeignElements: Foreign element removed' => 'Unrecognized $CurrentToken.Serialized tag removed', +'Strategy_RemoveForeignElements: Comment removed' => 'Comment containing "$CurrentToken.Data" removed', +'Strategy_RemoveForeignElements: Foreign meta element removed' => 'Unrecognized $CurrentToken.Serialized meta tag and all descendants removed', +'Strategy_RemoveForeignElements: Token removed to end' => 'Tags and text starting from $1 element where removed to end', +'Strategy_RemoveForeignElements: Trailing hyphen in comment removed' => 'Trailing hyphen(s) in comment removed', +'Strategy_RemoveForeignElements: Hyphens in comment collapsed' => 'Double hyphens in comments are not allowed, and were collapsed into single hyphens', + +'Strategy_MakeWellFormed: Unnecessary end tag removed' => 'Unnecessary $CurrentToken.Serialized tag removed', +'Strategy_MakeWellFormed: Unnecessary end tag to text' => 'Unnecessary $CurrentToken.Serialized tag converted to text', +'Strategy_MakeWellFormed: Tag auto closed' => '$1.Compact started on line $1.Line auto-closed by $CurrentToken.Compact', +'Strategy_MakeWellFormed: Tag carryover' => '$1.Compact started on line $1.Line auto-continued into $CurrentToken.Compact', +'Strategy_MakeWellFormed: Stray end tag removed' => 'Stray $CurrentToken.Serialized tag removed', +'Strategy_MakeWellFormed: Stray end tag to text' => 'Stray $CurrentToken.Serialized tag converted to text', +'Strategy_MakeWellFormed: Tag closed by element end' => '$1.Compact tag started on line $1.Line closed by end of $CurrentToken.Serialized', +'Strategy_MakeWellFormed: Tag closed by document end' => '$1.Compact tag started on line $1.Line closed by end of document', + +'Strategy_FixNesting: Node removed' => '$CurrentToken.Compact node removed', +'Strategy_FixNesting: Node excluded' => '$CurrentToken.Compact node removed due to descendant exclusion by ancestor element', +'Strategy_FixNesting: Node reorganized' => 'Contents of $CurrentToken.Compact node reorganized to enforce its content model', +'Strategy_FixNesting: Node contents removed' => 'Contents of $CurrentToken.Compact node removed', + +'AttrValidator: Attributes transformed' => 'Attributes on $CurrentToken.Compact transformed from $1.Keys to $2.Keys', +'AttrValidator: Attribute removed' => '$CurrentAttr.Name attribute on $CurrentToken.Compact removed', + +); + +$errorNames = array( + E_ERROR => 'Error', + E_WARNING => 'Warning', + E_NOTICE => 'Notice' +); + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/LanguageFactory.php b/extlib/HTMLPurifier/HTMLPurifier/LanguageFactory.php new file mode 100644 index 0000000000..134ef8c745 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/LanguageFactory.php @@ -0,0 +1,198 @@ +cache[$language_code][$key] = $value + * @value array map + */ + public $cache; + + /** + * Valid keys in the HTMLPurifier_Language object. Designates which + * variables to slurp out of a message file. + * @value array list + */ + public $keys = array('fallback', 'messages', 'errorNames'); + + /** + * Instance of HTMLPurifier_AttrDef_Lang to validate language codes + * @value object HTMLPurifier_AttrDef_Lang + */ + protected $validator; + + /** + * Cached copy of dirname(__FILE__), directory of current file without + * trailing slash + * @value string filename + */ + protected $dir; + + /** + * Keys whose contents are a hash map and can be merged + * @value array lookup + */ + protected $mergeable_keys_map = array('messages' => true, 'errorNames' => true); + + /** + * Keys whose contents are a list and can be merged + * @value array lookup + */ + protected $mergeable_keys_list = array(); + + /** + * Retrieve sole instance of the factory. + * @param $prototype Optional prototype to overload sole instance with, + * or bool true to reset to default factory. + */ + public static function instance($prototype = null) { + static $instance = null; + if ($prototype !== null) { + $instance = $prototype; + } elseif ($instance === null || $prototype == true) { + $instance = new HTMLPurifier_LanguageFactory(); + $instance->setup(); + } + return $instance; + } + + /** + * Sets up the singleton, much like a constructor + * @note Prevents people from getting this outside of the singleton + */ + public function setup() { + $this->validator = new HTMLPurifier_AttrDef_Lang(); + $this->dir = HTMLPURIFIER_PREFIX . '/HTMLPurifier'; + } + + /** + * Creates a language object, handles class fallbacks + * @param $config Instance of HTMLPurifier_Config + * @param $context Instance of HTMLPurifier_Context + * @param $code Code to override configuration with. Private parameter. + */ + public function create($config, $context, $code = false) { + + // validate language code + if ($code === false) { + $code = $this->validator->validate( + $config->get('Core.Language'), $config, $context + ); + } else { + $code = $this->validator->validate($code, $config, $context); + } + if ($code === false) $code = 'en'; // malformed code becomes English + + $pcode = str_replace('-', '_', $code); // make valid PHP classname + static $depth = 0; // recursion protection + + if ($code == 'en') { + $lang = new HTMLPurifier_Language($config, $context); + } else { + $class = 'HTMLPurifier_Language_' . $pcode; + $file = $this->dir . '/Language/classes/' . $code . '.php'; + if (file_exists($file) || class_exists($class, false)) { + $lang = new $class($config, $context); + } else { + // Go fallback + $raw_fallback = $this->getFallbackFor($code); + $fallback = $raw_fallback ? $raw_fallback : 'en'; + $depth++; + $lang = $this->create($config, $context, $fallback); + if (!$raw_fallback) { + $lang->error = true; + } + $depth--; + } + } + + $lang->code = $code; + + return $lang; + + } + + /** + * Returns the fallback language for language + * @note Loads the original language into cache + * @param $code string language code + */ + public function getFallbackFor($code) { + $this->loadLanguage($code); + return $this->cache[$code]['fallback']; + } + + /** + * Loads language into the cache, handles message file and fallbacks + * @param $code string language code + */ + public function loadLanguage($code) { + static $languages_seen = array(); // recursion guard + + // abort if we've already loaded it + if (isset($this->cache[$code])) return; + + // generate filename + $filename = $this->dir . '/Language/messages/' . $code . '.php'; + + // default fallback : may be overwritten by the ensuing include + $fallback = ($code != 'en') ? 'en' : false; + + // load primary localisation + if (!file_exists($filename)) { + // skip the include: will rely solely on fallback + $filename = $this->dir . '/Language/messages/en.php'; + $cache = array(); + } else { + include $filename; + $cache = compact($this->keys); + } + + // load fallback localisation + if (!empty($fallback)) { + + // infinite recursion guard + if (isset($languages_seen[$code])) { + trigger_error('Circular fallback reference in language ' . + $code, E_USER_ERROR); + $fallback = 'en'; + } + $language_seen[$code] = true; + + // load the fallback recursively + $this->loadLanguage($fallback); + $fallback_cache = $this->cache[$fallback]; + + // merge fallback with current language + foreach ( $this->keys as $key ) { + if (isset($cache[$key]) && isset($fallback_cache[$key])) { + if (isset($this->mergeable_keys_map[$key])) { + $cache[$key] = $cache[$key] + $fallback_cache[$key]; + } elseif (isset($this->mergeable_keys_list[$key])) { + $cache[$key] = array_merge( $fallback_cache[$key], $cache[$key] ); + } + } else { + $cache[$key] = $fallback_cache[$key]; + } + } + + } + + // save to cache for later retrieval + $this->cache[$code] = $cache; + + return; + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/Length.php b/extlib/HTMLPurifier/HTMLPurifier/Length.php new file mode 100644 index 0000000000..8d2a46b7da --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/Length.php @@ -0,0 +1,115 @@ + true, 'ex' => true, 'px' => true, 'in' => true, + 'cm' => true, 'mm' => true, 'pt' => true, 'pc' => true + ); + + /** + * @param number $n Magnitude + * @param string $u Unit + */ + public function __construct($n = '0', $u = false) { + $this->n = (string) $n; + $this->unit = $u !== false ? (string) $u : false; + } + + /** + * @param string $s Unit string, like '2em' or '3.4in' + * @warning Does not perform validation. + */ + static public function make($s) { + if ($s instanceof HTMLPurifier_Length) return $s; + $n_length = strspn($s, '1234567890.+-'); + $n = substr($s, 0, $n_length); + $unit = substr($s, $n_length); + if ($unit === '') $unit = false; + return new HTMLPurifier_Length($n, $unit); + } + + /** + * Validates the number and unit. + */ + protected function validate() { + // Special case: + if ($this->n === '+0' || $this->n === '-0') $this->n = '0'; + if ($this->n === '0' && $this->unit === false) return true; + if (!ctype_lower($this->unit)) $this->unit = strtolower($this->unit); + if (!isset(HTMLPurifier_Length::$allowedUnits[$this->unit])) return false; + // Hack: + $def = new HTMLPurifier_AttrDef_CSS_Number(); + $result = $def->validate($this->n, false, false); + if ($result === false) return false; + $this->n = $result; + return true; + } + + /** + * Returns string representation of number. + */ + public function toString() { + if (!$this->isValid()) return false; + return $this->n . $this->unit; + } + + /** + * Retrieves string numeric magnitude. + */ + public function getN() {return $this->n;} + + /** + * Retrieves string unit. + */ + public function getUnit() {return $this->unit;} + + /** + * Returns true if this length unit is valid. + */ + public function isValid() { + if ($this->isValid === null) $this->isValid = $this->validate(); + return $this->isValid; + } + + /** + * Compares two lengths, and returns 1 if greater, -1 if less and 0 if equal. + * @warning If both values are too large or small, this calculation will + * not work properly + */ + public function compareTo($l) { + if ($l === false) return false; + if ($l->unit !== $this->unit) { + $converter = new HTMLPurifier_UnitConverter(); + $l = $converter->convert($l, $this->unit); + if ($l === false) return false; + } + return $this->n - $l->n; + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/Lexer.php b/extlib/HTMLPurifier/HTMLPurifier/Lexer.php new file mode 100644 index 0000000000..8cce008d3d --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/Lexer.php @@ -0,0 +1,298 @@ +get('Core.LexerImpl'); + } + + $needs_tracking = + $config->get('Core.MaintainLineNumbers') || + $config->get('Core.CollectErrors'); + + $inst = null; + if (is_object($lexer)) { + $inst = $lexer; + } else { + + if (is_null($lexer)) { do { + // auto-detection algorithm + + if ($needs_tracking) { + $lexer = 'DirectLex'; + break; + } + + if ( + class_exists('DOMDocument') && + method_exists('DOMDocument', 'loadHTML') && + !extension_loaded('domxml') + ) { + // check for DOM support, because while it's part of the + // core, it can be disabled compile time. Also, the PECL + // domxml extension overrides the default DOM, and is evil + // and nasty and we shan't bother to support it + $lexer = 'DOMLex'; + } else { + $lexer = 'DirectLex'; + } + + } while(0); } // do..while so we can break + + // instantiate recognized string names + switch ($lexer) { + case 'DOMLex': + $inst = new HTMLPurifier_Lexer_DOMLex(); + break; + case 'DirectLex': + $inst = new HTMLPurifier_Lexer_DirectLex(); + break; + case 'PH5P': + $inst = new HTMLPurifier_Lexer_PH5P(); + break; + default: + throw new HTMLPurifier_Exception("Cannot instantiate unrecognized Lexer type " . htmlspecialchars($lexer)); + } + } + + if (!$inst) throw new HTMLPurifier_Exception('No lexer was instantiated'); + + // once PHP DOM implements native line numbers, or we + // hack out something using XSLT, remove this stipulation + if ($needs_tracking && !$inst->tracksLineNumbers) { + throw new HTMLPurifier_Exception('Cannot use lexer that does not support line numbers with Core.MaintainLineNumbers or Core.CollectErrors (use DirectLex instead)'); + } + + return $inst; + + } + + // -- CONVENIENCE MEMBERS --------------------------------------------- + + public function __construct() { + $this->_entity_parser = new HTMLPurifier_EntityParser(); + } + + /** + * Most common entity to raw value conversion table for special entities. + */ + protected $_special_entity2str = + array( + '"' => '"', + '&' => '&', + '<' => '<', + '>' => '>', + ''' => "'", + ''' => "'", + ''' => "'" + ); + + /** + * Parses special entities into the proper characters. + * + * This string will translate escaped versions of the special characters + * into the correct ones. + * + * @warning + * You should be able to treat the output of this function as + * completely parsed, but that's only because all other entities should + * have been handled previously in substituteNonSpecialEntities() + * + * @param $string String character data to be parsed. + * @returns Parsed character data. + */ + public function parseData($string) { + + // following functions require at least one character + if ($string === '') return ''; + + // subtracts amps that cannot possibly be escaped + $num_amp = substr_count($string, '&') - substr_count($string, '& ') - + ($string[strlen($string)-1] === '&' ? 1 : 0); + + if (!$num_amp) return $string; // abort if no entities + $num_esc_amp = substr_count($string, '&'); + $string = strtr($string, $this->_special_entity2str); + + // code duplication for sake of optimization, see above + $num_amp_2 = substr_count($string, '&') - substr_count($string, '& ') - + ($string[strlen($string)-1] === '&' ? 1 : 0); + + if ($num_amp_2 <= $num_esc_amp) return $string; + + // hmm... now we have some uncommon entities. Use the callback. + $string = $this->_entity_parser->substituteSpecialEntities($string); + return $string; + } + + /** + * Lexes an HTML string into tokens. + * + * @param $string String HTML. + * @return HTMLPurifier_Token array representation of HTML. + */ + public function tokenizeHTML($string, $config, $context) { + trigger_error('Call to abstract class', E_USER_ERROR); + } + + /** + * Translates CDATA sections into regular sections (through escaping). + * + * @param $string HTML string to process. + * @returns HTML with CDATA sections escaped. + */ + protected static function escapeCDATA($string) { + return preg_replace_callback( + '//s', + array('HTMLPurifier_Lexer', 'CDATACallback'), + $string + ); + } + + /** + * Special CDATA case that is especially convoluted for )#si', + array($this, 'scriptCallback'), $html); + } + + $html = $this->normalize($html, $config, $context); + + $cursor = 0; // our location in the text + $inside_tag = false; // whether or not we're parsing the inside of a tag + $array = array(); // result array + + // This is also treated to mean maintain *column* numbers too + $maintain_line_numbers = $config->get('Core.MaintainLineNumbers'); + + if ($maintain_line_numbers === null) { + // automatically determine line numbering by checking + // if error collection is on + $maintain_line_numbers = $config->get('Core.CollectErrors'); + } + + if ($maintain_line_numbers) { + $current_line = 1; + $current_col = 0; + $length = strlen($html); + } else { + $current_line = false; + $current_col = false; + $length = false; + } + $context->register('CurrentLine', $current_line); + $context->register('CurrentCol', $current_col); + $nl = "\n"; + // how often to manually recalculate. This will ALWAYS be right, + // but it's pretty wasteful. Set to 0 to turn off + $synchronize_interval = $config->get('Core.DirectLexLineNumberSyncInterval'); + + $e = false; + if ($config->get('Core.CollectErrors')) { + $e =& $context->get('ErrorCollector'); + } + + // for testing synchronization + $loops = 0; + + while(++$loops) { + + // $cursor is either at the start of a token, or inside of + // a tag (i.e. there was a < immediately before it), as indicated + // by $inside_tag + + if ($maintain_line_numbers) { + + // $rcursor, however, is always at the start of a token. + $rcursor = $cursor - (int) $inside_tag; + + // Column number is cheap, so we calculate it every round. + // We're interested at the *end* of the newline string, so + // we need to add strlen($nl) == 1 to $nl_pos before subtracting it + // from our "rcursor" position. + $nl_pos = strrpos($html, $nl, $rcursor - $length); + $current_col = $rcursor - (is_bool($nl_pos) ? 0 : $nl_pos + 1); + + // recalculate lines + if ( + $synchronize_interval && // synchronization is on + $cursor > 0 && // cursor is further than zero + $loops % $synchronize_interval === 0 // time to synchronize! + ) { + $current_line = 1 + $this->substrCount($html, $nl, 0, $cursor); + } + + } + + $position_next_lt = strpos($html, '<', $cursor); + $position_next_gt = strpos($html, '>', $cursor); + + // triggers on "asdf" but not "asdf " + // special case to set up context + if ($position_next_lt === $cursor) { + $inside_tag = true; + $cursor++; + } + + if (!$inside_tag && $position_next_lt !== false) { + // We are not inside tag and there still is another tag to parse + $token = new + HTMLPurifier_Token_Text( + $this->parseData( + substr( + $html, $cursor, $position_next_lt - $cursor + ) + ) + ); + if ($maintain_line_numbers) { + $token->rawPosition($current_line, $current_col); + $current_line += $this->substrCount($html, $nl, $cursor, $position_next_lt - $cursor); + } + $array[] = $token; + $cursor = $position_next_lt + 1; + $inside_tag = true; + continue; + } elseif (!$inside_tag) { + // We are not inside tag but there are no more tags + // If we're already at the end, break + if ($cursor === strlen($html)) break; + // Create Text of rest of string + $token = new + HTMLPurifier_Token_Text( + $this->parseData( + substr( + $html, $cursor + ) + ) + ); + if ($maintain_line_numbers) $token->rawPosition($current_line, $current_col); + $array[] = $token; + break; + } elseif ($inside_tag && $position_next_gt !== false) { + // We are in tag and it is well formed + // Grab the internals of the tag + $strlen_segment = $position_next_gt - $cursor; + + if ($strlen_segment < 1) { + // there's nothing to process! + $token = new HTMLPurifier_Token_Text('<'); + $cursor++; + continue; + } + + $segment = substr($html, $cursor, $strlen_segment); + + if ($segment === false) { + // somehow, we attempted to access beyond the end of + // the string, defense-in-depth, reported by Nate Abele + break; + } + + // Check if it's a comment + if ( + substr($segment, 0, 3) === '!--' + ) { + // re-determine segment length, looking for --> + $position_comment_end = strpos($html, '-->', $cursor); + if ($position_comment_end === false) { + // uh oh, we have a comment that extends to + // infinity. Can't be helped: set comment + // end position to end of string + if ($e) $e->send(E_WARNING, 'Lexer: Unclosed comment'); + $position_comment_end = strlen($html); + $end = true; + } else { + $end = false; + } + $strlen_segment = $position_comment_end - $cursor; + $segment = substr($html, $cursor, $strlen_segment); + $token = new + HTMLPurifier_Token_Comment( + substr( + $segment, 3, $strlen_segment - 3 + ) + ); + if ($maintain_line_numbers) { + $token->rawPosition($current_line, $current_col); + $current_line += $this->substrCount($html, $nl, $cursor, $strlen_segment); + } + $array[] = $token; + $cursor = $end ? $position_comment_end : $position_comment_end + 3; + $inside_tag = false; + continue; + } + + // Check if it's an end tag + $is_end_tag = (strpos($segment,'/') === 0); + if ($is_end_tag) { + $type = substr($segment, 1); + $token = new HTMLPurifier_Token_End($type); + if ($maintain_line_numbers) { + $token->rawPosition($current_line, $current_col); + $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor); + } + $array[] = $token; + $inside_tag = false; + $cursor = $position_next_gt + 1; + continue; + } + + // Check leading character is alnum, if not, we may + // have accidently grabbed an emoticon. Translate into + // text and go our merry way + if (!ctype_alpha($segment[0])) { + // XML: $segment[0] !== '_' && $segment[0] !== ':' + if ($e) $e->send(E_NOTICE, 'Lexer: Unescaped lt'); + $token = new HTMLPurifier_Token_Text('<'); + if ($maintain_line_numbers) { + $token->rawPosition($current_line, $current_col); + $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor); + } + $array[] = $token; + $inside_tag = false; + continue; + } + + // Check if it is explicitly self closing, if so, remove + // trailing slash. Remember, we could have a tag like
    , so + // any later token processing scripts must convert improperly + // classified EmptyTags from StartTags. + $is_self_closing = (strrpos($segment,'/') === $strlen_segment-1); + if ($is_self_closing) { + $strlen_segment--; + $segment = substr($segment, 0, $strlen_segment); + } + + // Check if there are any attributes + $position_first_space = strcspn($segment, $this->_whitespace); + + if ($position_first_space >= $strlen_segment) { + if ($is_self_closing) { + $token = new HTMLPurifier_Token_Empty($segment); + } else { + $token = new HTMLPurifier_Token_Start($segment); + } + if ($maintain_line_numbers) { + $token->rawPosition($current_line, $current_col); + $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor); + } + $array[] = $token; + $inside_tag = false; + $cursor = $position_next_gt + 1; + continue; + } + + // Grab out all the data + $type = substr($segment, 0, $position_first_space); + $attribute_string = + trim( + substr( + $segment, $position_first_space + ) + ); + if ($attribute_string) { + $attr = $this->parseAttributeString( + $attribute_string + , $config, $context + ); + } else { + $attr = array(); + } + + if ($is_self_closing) { + $token = new HTMLPurifier_Token_Empty($type, $attr); + } else { + $token = new HTMLPurifier_Token_Start($type, $attr); + } + if ($maintain_line_numbers) { + $token->rawPosition($current_line, $current_col); + $current_line += $this->substrCount($html, $nl, $cursor, $position_next_gt - $cursor); + } + $array[] = $token; + $cursor = $position_next_gt + 1; + $inside_tag = false; + continue; + } else { + // inside tag, but there's no ending > sign + if ($e) $e->send(E_WARNING, 'Lexer: Missing gt'); + $token = new + HTMLPurifier_Token_Text( + '<' . + $this->parseData( + substr($html, $cursor) + ) + ); + if ($maintain_line_numbers) $token->rawPosition($current_line, $current_col); + // no cursor scroll? Hmm... + $array[] = $token; + break; + } + break; + } + + $context->destroy('CurrentLine'); + $context->destroy('CurrentCol'); + return $array; + } + + /** + * PHP 5.0.x compatible substr_count that implements offset and length + */ + protected function substrCount($haystack, $needle, $offset, $length) { + static $oldVersion; + if ($oldVersion === null) { + $oldVersion = version_compare(PHP_VERSION, '5.1', '<'); + } + if ($oldVersion) { + $haystack = substr($haystack, $offset, $length); + return substr_count($haystack, $needle); + } else { + return substr_count($haystack, $needle, $offset, $length); + } + } + + /** + * Takes the inside of an HTML tag and makes an assoc array of attributes. + * + * @param $string Inside of tag excluding name. + * @returns Assoc array of attributes. + */ + public function parseAttributeString($string, $config, $context) { + $string = (string) $string; // quick typecast + + if ($string == '') return array(); // no attributes + + $e = false; + if ($config->get('Core.CollectErrors')) { + $e =& $context->get('ErrorCollector'); + } + + // let's see if we can abort as quickly as possible + // one equal sign, no spaces => one attribute + $num_equal = substr_count($string, '='); + $has_space = strpos($string, ' '); + if ($num_equal === 0 && !$has_space) { + // bool attribute + return array($string => $string); + } elseif ($num_equal === 1 && !$has_space) { + // only one attribute + list($key, $quoted_value) = explode('=', $string); + $quoted_value = trim($quoted_value); + if (!$key) { + if ($e) $e->send(E_ERROR, 'Lexer: Missing attribute key'); + return array(); + } + if (!$quoted_value) return array($key => ''); + $first_char = @$quoted_value[0]; + $last_char = @$quoted_value[strlen($quoted_value)-1]; + + $same_quote = ($first_char == $last_char); + $open_quote = ($first_char == '"' || $first_char == "'"); + + if ( $same_quote && $open_quote) { + // well behaved + $value = substr($quoted_value, 1, strlen($quoted_value) - 2); + } else { + // not well behaved + if ($open_quote) { + if ($e) $e->send(E_ERROR, 'Lexer: Missing end quote'); + $value = substr($quoted_value, 1); + } else { + $value = $quoted_value; + } + } + if ($value === false) $value = ''; + return array($key => $value); + } + + // setup loop environment + $array = array(); // return assoc array of attributes + $cursor = 0; // current position in string (moves forward) + $size = strlen($string); // size of the string (stays the same) + + // if we have unquoted attributes, the parser expects a terminating + // space, so let's guarantee that there's always a terminating space. + $string .= ' '; + + while(true) { + + if ($cursor >= $size) { + break; + } + + $cursor += ($value = strspn($string, $this->_whitespace, $cursor)); + // grab the key + + $key_begin = $cursor; //we're currently at the start of the key + + // scroll past all characters that are the key (not whitespace or =) + $cursor += strcspn($string, $this->_whitespace . '=', $cursor); + + $key_end = $cursor; // now at the end of the key + + $key = substr($string, $key_begin, $key_end - $key_begin); + + if (!$key) { + if ($e) $e->send(E_ERROR, 'Lexer: Missing attribute key'); + $cursor += strcspn($string, $this->_whitespace, $cursor + 1); // prevent infinite loop + continue; // empty key + } + + // scroll past all whitespace + $cursor += strspn($string, $this->_whitespace, $cursor); + + if ($cursor >= $size) { + $array[$key] = $key; + break; + } + + // if the next character is an equal sign, we've got a regular + // pair, otherwise, it's a bool attribute + $first_char = @$string[$cursor]; + + if ($first_char == '=') { + // key="value" + + $cursor++; + $cursor += strspn($string, $this->_whitespace, $cursor); + + if ($cursor === false) { + $array[$key] = ''; + break; + } + + // we might be in front of a quote right now + + $char = @$string[$cursor]; + + if ($char == '"' || $char == "'") { + // it's quoted, end bound is $char + $cursor++; + $value_begin = $cursor; + $cursor = strpos($string, $char, $cursor); + $value_end = $cursor; + } else { + // it's not quoted, end bound is whitespace + $value_begin = $cursor; + $cursor += strcspn($string, $this->_whitespace, $cursor); + $value_end = $cursor; + } + + // we reached a premature end + if ($cursor === false) { + $cursor = $size; + $value_end = $cursor; + } + + $value = substr($string, $value_begin, $value_end - $value_begin); + if ($value === false) $value = ''; + $array[$key] = $this->parseData($value); + $cursor++; + + } else { + // boolattr + if ($key !== '') { + $array[$key] = $key; + } else { + // purely theoretical + if ($e) $e->send(E_ERROR, 'Lexer: Missing attribute key'); + } + + } + } + return $array; + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/Lexer/PEARSax3.php b/extlib/HTMLPurifier/HTMLPurifier/Lexer/PEARSax3.php new file mode 100644 index 0000000000..57cffa82ab --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/Lexer/PEARSax3.php @@ -0,0 +1,106 @@ +tokens = array(); + + $string = $this->normalize($string, $config, $context); + + $parser = new XML_HTMLSax3(); + $parser->set_object($this); + $parser->set_element_handler('openHandler','closeHandler'); + $parser->set_data_handler('dataHandler'); + $parser->set_escape_handler('escapeHandler'); + + // doesn't seem to work correctly for attributes + $parser->set_option('XML_OPTION_ENTITIES_PARSED', 1); + + $parser->parse($string); + + return $this->tokens; + + } + + /** + * Open tag event handler, interface is defined by PEAR package. + */ + public function openHandler(&$parser, $name, $attrs, $closed) { + // entities are not resolved in attrs + foreach ($attrs as $key => $attr) { + $attrs[$key] = $this->parseData($attr); + } + if ($closed) { + $this->tokens[] = new HTMLPurifier_Token_Empty($name, $attrs); + } else { + $this->tokens[] = new HTMLPurifier_Token_Start($name, $attrs); + } + return true; + } + + /** + * Close tag event handler, interface is defined by PEAR package. + */ + public function closeHandler(&$parser, $name) { + // HTMLSax3 seems to always send empty tags an extra close tag + // check and ignore if you see it: + // [TESTME] to make sure it doesn't overreach + if ($this->tokens[count($this->tokens)-1] instanceof HTMLPurifier_Token_Empty) { + return true; + } + $this->tokens[] = new HTMLPurifier_Token_End($name); + return true; + } + + /** + * Data event handler, interface is defined by PEAR package. + */ + public function dataHandler(&$parser, $data) { + $this->tokens[] = new HTMLPurifier_Token_Text($data); + return true; + } + + /** + * Escaped text handler, interface is defined by PEAR package. + */ + public function escapeHandler(&$parser, $data) { + if (strpos($data, '--') === 0) { + $this->tokens[] = new HTMLPurifier_Token_Comment($data); + } + // CDATA is handled elsewhere, but if it was handled here: + //if (strpos($data, '[CDATA[') === 0) { + // $this->tokens[] = new HTMLPurifier_Token_Text( + // substr($data, 7, strlen($data) - 9) ); + //} + return true; + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/Lexer/PH5P.php b/extlib/HTMLPurifier/HTMLPurifier/Lexer/PH5P.php new file mode 100644 index 0000000000..fa1bf973e0 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/Lexer/PH5P.php @@ -0,0 +1,3906 @@ +normalize($html, $config, $context); + $new_html = $this->wrapHTML($new_html, $config, $context); + try { + $parser = new HTML5($new_html); + $doc = $parser->save(); + } catch (DOMException $e) { + // Uh oh, it failed. Punt to DirectLex. + $lexer = new HTMLPurifier_Lexer_DirectLex(); + $context->register('PH5PError', $e); // save the error, so we can detect it + return $lexer->tokenizeHTML($html, $config, $context); // use original HTML + } + $tokens = array(); + $this->tokenizeDOM( + $doc->getElementsByTagName('html')->item(0)-> // + getElementsByTagName('body')->item(0)-> // + getElementsByTagName('div')->item(0) //
    + , $tokens); + return $tokens; + } + +} + +/* + +Copyright 2007 Jeroen van der Meer + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be included +in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +*/ + +class HTML5 { + private $data; + private $char; + private $EOF; + private $state; + private $tree; + private $token; + private $content_model; + private $escape = false; + private $entities = array('AElig;','AElig','AMP;','AMP','Aacute;','Aacute', + 'Acirc;','Acirc','Agrave;','Agrave','Alpha;','Aring;','Aring','Atilde;', + 'Atilde','Auml;','Auml','Beta;','COPY;','COPY','Ccedil;','Ccedil','Chi;', + 'Dagger;','Delta;','ETH;','ETH','Eacute;','Eacute','Ecirc;','Ecirc','Egrave;', + 'Egrave','Epsilon;','Eta;','Euml;','Euml','GT;','GT','Gamma;','Iacute;', + 'Iacute','Icirc;','Icirc','Igrave;','Igrave','Iota;','Iuml;','Iuml','Kappa;', + 'LT;','LT','Lambda;','Mu;','Ntilde;','Ntilde','Nu;','OElig;','Oacute;', + 'Oacute','Ocirc;','Ocirc','Ograve;','Ograve','Omega;','Omicron;','Oslash;', + 'Oslash','Otilde;','Otilde','Ouml;','Ouml','Phi;','Pi;','Prime;','Psi;', + 'QUOT;','QUOT','REG;','REG','Rho;','Scaron;','Sigma;','THORN;','THORN', + 'TRADE;','Tau;','Theta;','Uacute;','Uacute','Ucirc;','Ucirc','Ugrave;', + 'Ugrave','Upsilon;','Uuml;','Uuml','Xi;','Yacute;','Yacute','Yuml;','Zeta;', + 'aacute;','aacute','acirc;','acirc','acute;','acute','aelig;','aelig', + 'agrave;','agrave','alefsym;','alpha;','amp;','amp','and;','ang;','apos;', + 'aring;','aring','asymp;','atilde;','atilde','auml;','auml','bdquo;','beta;', + 'brvbar;','brvbar','bull;','cap;','ccedil;','ccedil','cedil;','cedil', + 'cent;','cent','chi;','circ;','clubs;','cong;','copy;','copy','crarr;', + 'cup;','curren;','curren','dArr;','dagger;','darr;','deg;','deg','delta;', + 'diams;','divide;','divide','eacute;','eacute','ecirc;','ecirc','egrave;', + 'egrave','empty;','emsp;','ensp;','epsilon;','equiv;','eta;','eth;','eth', + 'euml;','euml','euro;','exist;','fnof;','forall;','frac12;','frac12', + 'frac14;','frac14','frac34;','frac34','frasl;','gamma;','ge;','gt;','gt', + 'hArr;','harr;','hearts;','hellip;','iacute;','iacute','icirc;','icirc', + 'iexcl;','iexcl','igrave;','igrave','image;','infin;','int;','iota;', + 'iquest;','iquest','isin;','iuml;','iuml','kappa;','lArr;','lambda;','lang;', + 'laquo;','laquo','larr;','lceil;','ldquo;','le;','lfloor;','lowast;','loz;', + 'lrm;','lsaquo;','lsquo;','lt;','lt','macr;','macr','mdash;','micro;','micro', + 'middot;','middot','minus;','mu;','nabla;','nbsp;','nbsp','ndash;','ne;', + 'ni;','not;','not','notin;','nsub;','ntilde;','ntilde','nu;','oacute;', + 'oacute','ocirc;','ocirc','oelig;','ograve;','ograve','oline;','omega;', + 'omicron;','oplus;','or;','ordf;','ordf','ordm;','ordm','oslash;','oslash', + 'otilde;','otilde','otimes;','ouml;','ouml','para;','para','part;','permil;', + 'perp;','phi;','pi;','piv;','plusmn;','plusmn','pound;','pound','prime;', + 'prod;','prop;','psi;','quot;','quot','rArr;','radic;','rang;','raquo;', + 'raquo','rarr;','rceil;','rdquo;','real;','reg;','reg','rfloor;','rho;', + 'rlm;','rsaquo;','rsquo;','sbquo;','scaron;','sdot;','sect;','sect','shy;', + 'shy','sigma;','sigmaf;','sim;','spades;','sub;','sube;','sum;','sup1;', + 'sup1','sup2;','sup2','sup3;','sup3','sup;','supe;','szlig;','szlig','tau;', + 'there4;','theta;','thetasym;','thinsp;','thorn;','thorn','tilde;','times;', + 'times','trade;','uArr;','uacute;','uacute','uarr;','ucirc;','ucirc', + 'ugrave;','ugrave','uml;','uml','upsih;','upsilon;','uuml;','uuml','weierp;', + 'xi;','yacute;','yacute','yen;','yen','yuml;','yuml','zeta;','zwj;','zwnj;'); + + const PCDATA = 0; + const RCDATA = 1; + const CDATA = 2; + const PLAINTEXT = 3; + + const DOCTYPE = 0; + const STARTTAG = 1; + const ENDTAG = 2; + const COMMENT = 3; + const CHARACTR = 4; + const EOF = 5; + + public function __construct($data) { + $data = str_replace("\r\n", "\n", $data); + $data = str_replace("\r", null, $data); + + $this->data = $data; + $this->char = -1; + $this->EOF = strlen($data); + $this->tree = new HTML5TreeConstructer; + $this->content_model = self::PCDATA; + + $this->state = 'data'; + + while($this->state !== null) { + $this->{$this->state.'State'}(); + } + } + + public function save() { + return $this->tree->save(); + } + + private function char() { + return ($this->char < $this->EOF) + ? $this->data[$this->char] + : false; + } + + private function character($s, $l = 0) { + if($s + $l < $this->EOF) { + if($l === 0) { + return $this->data[$s]; + } else { + return substr($this->data, $s, $l); + } + } + } + + private function characters($char_class, $start) { + return preg_replace('#^(['.$char_class.']+).*#s', '\\1', substr($this->data, $start)); + } + + private function dataState() { + // Consume the next input character + $this->char++; + $char = $this->char(); + + if($char === '&' && ($this->content_model === self::PCDATA || $this->content_model === self::RCDATA)) { + /* U+0026 AMPERSAND (&) + When the content model flag is set to one of the PCDATA or RCDATA + states: switch to the entity data state. Otherwise: treat it as per + the "anything else" entry below. */ + $this->state = 'entityData'; + + } elseif($char === '-') { + /* If the content model flag is set to either the RCDATA state or + the CDATA state, and the escape flag is false, and there are at + least three characters before this one in the input stream, and the + last four characters in the input stream, including this one, are + U+003C LESS-THAN SIGN, U+0021 EXCLAMATION MARK, U+002D HYPHEN-MINUS, + and U+002D HYPHEN-MINUS (""), + set the escape flag to false. */ + if(($this->content_model === self::RCDATA || + $this->content_model === self::CDATA) && $this->escape === true && + $this->character($this->char, 3) === '-->') { + $this->escape = false; + } + + /* In any case, emit the input character as a character token. + Stay in the data state. */ + $this->emitToken(array( + 'type' => self::CHARACTR, + 'data' => $char + )); + + } elseif($this->char === $this->EOF) { + /* EOF + Emit an end-of-file token. */ + $this->EOF(); + + } elseif($this->content_model === self::PLAINTEXT) { + /* When the content model flag is set to the PLAINTEXT state + THIS DIFFERS GREATLY FROM THE SPEC: Get the remaining characters of + the text and emit it as a character token. */ + $this->emitToken(array( + 'type' => self::CHARACTR, + 'data' => substr($this->data, $this->char) + )); + + $this->EOF(); + + } else { + /* Anything else + THIS DIFFERS GREATLY FROM THE SPEC: Get as many character that + otherwise would also be treated as a character token and emit it + as a single character token. Stay in the data state. */ + $len = strcspn($this->data, '<&', $this->char); + $char = substr($this->data, $this->char, $len); + $this->char += $len - 1; + + $this->emitToken(array( + 'type' => self::CHARACTR, + 'data' => $char + )); + + $this->state = 'data'; + } + } + + private function entityDataState() { + // Attempt to consume an entity. + $entity = $this->entity(); + + // If nothing is returned, emit a U+0026 AMPERSAND character token. + // Otherwise, emit the character token that was returned. + $char = (!$entity) ? '&' : $entity; + $this->emitToken(array( + 'type' => self::CHARACTR, + 'data' => $char + )); + + // Finally, switch to the data state. + $this->state = 'data'; + } + + private function tagOpenState() { + switch($this->content_model) { + case self::RCDATA: + case self::CDATA: + /* If the next input character is a U+002F SOLIDUS (/) character, + consume it and switch to the close tag open state. If the next + input character is not a U+002F SOLIDUS (/) character, emit a + U+003C LESS-THAN SIGN character token and switch to the data + state to process the next input character. */ + if($this->character($this->char + 1) === '/') { + $this->char++; + $this->state = 'closeTagOpen'; + + } else { + $this->emitToken(array( + 'type' => self::CHARACTR, + 'data' => '<' + )); + + $this->state = 'data'; + } + break; + + case self::PCDATA: + // If the content model flag is set to the PCDATA state + // Consume the next input character: + $this->char++; + $char = $this->char(); + + if($char === '!') { + /* U+0021 EXCLAMATION MARK (!) + Switch to the markup declaration open state. */ + $this->state = 'markupDeclarationOpen'; + + } elseif($char === '/') { + /* U+002F SOLIDUS (/) + Switch to the close tag open state. */ + $this->state = 'closeTagOpen'; + + } elseif(preg_match('/^[A-Za-z]$/', $char)) { + /* U+0041 LATIN LETTER A through to U+005A LATIN LETTER Z + Create a new start tag token, set its tag name to the lowercase + version of the input character (add 0x0020 to the character's code + point), then switch to the tag name state. (Don't emit the token + yet; further details will be filled in before it is emitted.) */ + $this->token = array( + 'name' => strtolower($char), + 'type' => self::STARTTAG, + 'attr' => array() + ); + + $this->state = 'tagName'; + + } elseif($char === '>') { + /* U+003E GREATER-THAN SIGN (>) + Parse error. Emit a U+003C LESS-THAN SIGN character token and a + U+003E GREATER-THAN SIGN character token. Switch to the data state. */ + $this->emitToken(array( + 'type' => self::CHARACTR, + 'data' => '<>' + )); + + $this->state = 'data'; + + } elseif($char === '?') { + /* U+003F QUESTION MARK (?) + Parse error. Switch to the bogus comment state. */ + $this->state = 'bogusComment'; + + } else { + /* Anything else + Parse error. Emit a U+003C LESS-THAN SIGN character token and + reconsume the current input character in the data state. */ + $this->emitToken(array( + 'type' => self::CHARACTR, + 'data' => '<' + )); + + $this->char--; + $this->state = 'data'; + } + break; + } + } + + private function closeTagOpenState() { + $next_node = strtolower($this->characters('A-Za-z', $this->char + 1)); + $the_same = count($this->tree->stack) > 0 && $next_node === end($this->tree->stack)->nodeName; + + if(($this->content_model === self::RCDATA || $this->content_model === self::CDATA) && + (!$the_same || ($the_same && (!preg_match('/[\t\n\x0b\x0c >\/]/', + $this->character($this->char + 1 + strlen($next_node))) || $this->EOF === $this->char)))) { + /* If the content model flag is set to the RCDATA or CDATA states then + examine the next few characters. If they do not match the tag name of + the last start tag token emitted (case insensitively), or if they do but + they are not immediately followed by one of the following characters: + * U+0009 CHARACTER TABULATION + * U+000A LINE FEED (LF) + * U+000B LINE TABULATION + * U+000C FORM FEED (FF) + * U+0020 SPACE + * U+003E GREATER-THAN SIGN (>) + * U+002F SOLIDUS (/) + * EOF + ...then there is a parse error. Emit a U+003C LESS-THAN SIGN character + token, a U+002F SOLIDUS character token, and switch to the data state + to process the next input character. */ + $this->emitToken(array( + 'type' => self::CHARACTR, + 'data' => 'state = 'data'; + + } else { + /* Otherwise, if the content model flag is set to the PCDATA state, + or if the next few characters do match that tag name, consume the + next input character: */ + $this->char++; + $char = $this->char(); + + if(preg_match('/^[A-Za-z]$/', $char)) { + /* U+0041 LATIN LETTER A through to U+005A LATIN LETTER Z + Create a new end tag token, set its tag name to the lowercase version + of the input character (add 0x0020 to the character's code point), then + switch to the tag name state. (Don't emit the token yet; further details + will be filled in before it is emitted.) */ + $this->token = array( + 'name' => strtolower($char), + 'type' => self::ENDTAG + ); + + $this->state = 'tagName'; + + } elseif($char === '>') { + /* U+003E GREATER-THAN SIGN (>) + Parse error. Switch to the data state. */ + $this->state = 'data'; + + } elseif($this->char === $this->EOF) { + /* EOF + Parse error. Emit a U+003C LESS-THAN SIGN character token and a U+002F + SOLIDUS character token. Reconsume the EOF character in the data state. */ + $this->emitToken(array( + 'type' => self::CHARACTR, + 'data' => 'char--; + $this->state = 'data'; + + } else { + /* Parse error. Switch to the bogus comment state. */ + $this->state = 'bogusComment'; + } + } + } + + private function tagNameState() { + // Consume the next input character: + $this->char++; + $char = $this->character($this->char); + + if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { + /* U+0009 CHARACTER TABULATION + U+000A LINE FEED (LF) + U+000B LINE TABULATION + U+000C FORM FEED (FF) + U+0020 SPACE + Switch to the before attribute name state. */ + $this->state = 'beforeAttributeName'; + + } elseif($char === '>') { + /* U+003E GREATER-THAN SIGN (>) + Emit the current tag token. Switch to the data state. */ + $this->emitToken($this->token); + $this->state = 'data'; + + } elseif($this->char === $this->EOF) { + /* EOF + Parse error. Emit the current tag token. Reconsume the EOF + character in the data state. */ + $this->emitToken($this->token); + + $this->char--; + $this->state = 'data'; + + } elseif($char === '/') { + /* U+002F SOLIDUS (/) + Parse error unless this is a permitted slash. Switch to the before + attribute name state. */ + $this->state = 'beforeAttributeName'; + + } else { + /* Anything else + Append the current input character to the current tag token's tag name. + Stay in the tag name state. */ + $this->token['name'] .= strtolower($char); + $this->state = 'tagName'; + } + } + + private function beforeAttributeNameState() { + // Consume the next input character: + $this->char++; + $char = $this->character($this->char); + + if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { + /* U+0009 CHARACTER TABULATION + U+000A LINE FEED (LF) + U+000B LINE TABULATION + U+000C FORM FEED (FF) + U+0020 SPACE + Stay in the before attribute name state. */ + $this->state = 'beforeAttributeName'; + + } elseif($char === '>') { + /* U+003E GREATER-THAN SIGN (>) + Emit the current tag token. Switch to the data state. */ + $this->emitToken($this->token); + $this->state = 'data'; + + } elseif($char === '/') { + /* U+002F SOLIDUS (/) + Parse error unless this is a permitted slash. Stay in the before + attribute name state. */ + $this->state = 'beforeAttributeName'; + + } elseif($this->char === $this->EOF) { + /* EOF + Parse error. Emit the current tag token. Reconsume the EOF + character in the data state. */ + $this->emitToken($this->token); + + $this->char--; + $this->state = 'data'; + + } else { + /* Anything else + Start a new attribute in the current tag token. Set that attribute's + name to the current input character, and its value to the empty string. + Switch to the attribute name state. */ + $this->token['attr'][] = array( + 'name' => strtolower($char), + 'value' => null + ); + + $this->state = 'attributeName'; + } + } + + private function attributeNameState() { + // Consume the next input character: + $this->char++; + $char = $this->character($this->char); + + if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { + /* U+0009 CHARACTER TABULATION + U+000A LINE FEED (LF) + U+000B LINE TABULATION + U+000C FORM FEED (FF) + U+0020 SPACE + Stay in the before attribute name state. */ + $this->state = 'afterAttributeName'; + + } elseif($char === '=') { + /* U+003D EQUALS SIGN (=) + Switch to the before attribute value state. */ + $this->state = 'beforeAttributeValue'; + + } elseif($char === '>') { + /* U+003E GREATER-THAN SIGN (>) + Emit the current tag token. Switch to the data state. */ + $this->emitToken($this->token); + $this->state = 'data'; + + } elseif($char === '/' && $this->character($this->char + 1) !== '>') { + /* U+002F SOLIDUS (/) + Parse error unless this is a permitted slash. Switch to the before + attribute name state. */ + $this->state = 'beforeAttributeName'; + + } elseif($this->char === $this->EOF) { + /* EOF + Parse error. Emit the current tag token. Reconsume the EOF + character in the data state. */ + $this->emitToken($this->token); + + $this->char--; + $this->state = 'data'; + + } else { + /* Anything else + Append the current input character to the current attribute's name. + Stay in the attribute name state. */ + $last = count($this->token['attr']) - 1; + $this->token['attr'][$last]['name'] .= strtolower($char); + + $this->state = 'attributeName'; + } + } + + private function afterAttributeNameState() { + // Consume the next input character: + $this->char++; + $char = $this->character($this->char); + + if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { + /* U+0009 CHARACTER TABULATION + U+000A LINE FEED (LF) + U+000B LINE TABULATION + U+000C FORM FEED (FF) + U+0020 SPACE + Stay in the after attribute name state. */ + $this->state = 'afterAttributeName'; + + } elseif($char === '=') { + /* U+003D EQUALS SIGN (=) + Switch to the before attribute value state. */ + $this->state = 'beforeAttributeValue'; + + } elseif($char === '>') { + /* U+003E GREATER-THAN SIGN (>) + Emit the current tag token. Switch to the data state. */ + $this->emitToken($this->token); + $this->state = 'data'; + + } elseif($char === '/' && $this->character($this->char + 1) !== '>') { + /* U+002F SOLIDUS (/) + Parse error unless this is a permitted slash. Switch to the + before attribute name state. */ + $this->state = 'beforeAttributeName'; + + } elseif($this->char === $this->EOF) { + /* EOF + Parse error. Emit the current tag token. Reconsume the EOF + character in the data state. */ + $this->emitToken($this->token); + + $this->char--; + $this->state = 'data'; + + } else { + /* Anything else + Start a new attribute in the current tag token. Set that attribute's + name to the current input character, and its value to the empty string. + Switch to the attribute name state. */ + $this->token['attr'][] = array( + 'name' => strtolower($char), + 'value' => null + ); + + $this->state = 'attributeName'; + } + } + + private function beforeAttributeValueState() { + // Consume the next input character: + $this->char++; + $char = $this->character($this->char); + + if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { + /* U+0009 CHARACTER TABULATION + U+000A LINE FEED (LF) + U+000B LINE TABULATION + U+000C FORM FEED (FF) + U+0020 SPACE + Stay in the before attribute value state. */ + $this->state = 'beforeAttributeValue'; + + } elseif($char === '"') { + /* U+0022 QUOTATION MARK (") + Switch to the attribute value (double-quoted) state. */ + $this->state = 'attributeValueDoubleQuoted'; + + } elseif($char === '&') { + /* U+0026 AMPERSAND (&) + Switch to the attribute value (unquoted) state and reconsume + this input character. */ + $this->char--; + $this->state = 'attributeValueUnquoted'; + + } elseif($char === '\'') { + /* U+0027 APOSTROPHE (') + Switch to the attribute value (single-quoted) state. */ + $this->state = 'attributeValueSingleQuoted'; + + } elseif($char === '>') { + /* U+003E GREATER-THAN SIGN (>) + Emit the current tag token. Switch to the data state. */ + $this->emitToken($this->token); + $this->state = 'data'; + + } else { + /* Anything else + Append the current input character to the current attribute's value. + Switch to the attribute value (unquoted) state. */ + $last = count($this->token['attr']) - 1; + $this->token['attr'][$last]['value'] .= $char; + + $this->state = 'attributeValueUnquoted'; + } + } + + private function attributeValueDoubleQuotedState() { + // Consume the next input character: + $this->char++; + $char = $this->character($this->char); + + if($char === '"') { + /* U+0022 QUOTATION MARK (") + Switch to the before attribute name state. */ + $this->state = 'beforeAttributeName'; + + } elseif($char === '&') { + /* U+0026 AMPERSAND (&) + Switch to the entity in attribute value state. */ + $this->entityInAttributeValueState('double'); + + } elseif($this->char === $this->EOF) { + /* EOF + Parse error. Emit the current tag token. Reconsume the character + in the data state. */ + $this->emitToken($this->token); + + $this->char--; + $this->state = 'data'; + + } else { + /* Anything else + Append the current input character to the current attribute's value. + Stay in the attribute value (double-quoted) state. */ + $last = count($this->token['attr']) - 1; + $this->token['attr'][$last]['value'] .= $char; + + $this->state = 'attributeValueDoubleQuoted'; + } + } + + private function attributeValueSingleQuotedState() { + // Consume the next input character: + $this->char++; + $char = $this->character($this->char); + + if($char === '\'') { + /* U+0022 QUOTATION MARK (') + Switch to the before attribute name state. */ + $this->state = 'beforeAttributeName'; + + } elseif($char === '&') { + /* U+0026 AMPERSAND (&) + Switch to the entity in attribute value state. */ + $this->entityInAttributeValueState('single'); + + } elseif($this->char === $this->EOF) { + /* EOF + Parse error. Emit the current tag token. Reconsume the character + in the data state. */ + $this->emitToken($this->token); + + $this->char--; + $this->state = 'data'; + + } else { + /* Anything else + Append the current input character to the current attribute's value. + Stay in the attribute value (single-quoted) state. */ + $last = count($this->token['attr']) - 1; + $this->token['attr'][$last]['value'] .= $char; + + $this->state = 'attributeValueSingleQuoted'; + } + } + + private function attributeValueUnquotedState() { + // Consume the next input character: + $this->char++; + $char = $this->character($this->char); + + if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { + /* U+0009 CHARACTER TABULATION + U+000A LINE FEED (LF) + U+000B LINE TABULATION + U+000C FORM FEED (FF) + U+0020 SPACE + Switch to the before attribute name state. */ + $this->state = 'beforeAttributeName'; + + } elseif($char === '&') { + /* U+0026 AMPERSAND (&) + Switch to the entity in attribute value state. */ + $this->entityInAttributeValueState(); + + } elseif($char === '>') { + /* U+003E GREATER-THAN SIGN (>) + Emit the current tag token. Switch to the data state. */ + $this->emitToken($this->token); + $this->state = 'data'; + + } else { + /* Anything else + Append the current input character to the current attribute's value. + Stay in the attribute value (unquoted) state. */ + $last = count($this->token['attr']) - 1; + $this->token['attr'][$last]['value'] .= $char; + + $this->state = 'attributeValueUnquoted'; + } + } + + private function entityInAttributeValueState() { + // Attempt to consume an entity. + $entity = $this->entity(); + + // If nothing is returned, append a U+0026 AMPERSAND character to the + // current attribute's value. Otherwise, emit the character token that + // was returned. + $char = (!$entity) + ? '&' + : $entity; + + $last = count($this->token['attr']) - 1; + $this->token['attr'][$last]['value'] .= $char; + } + + private function bogusCommentState() { + /* Consume every character up to the first U+003E GREATER-THAN SIGN + character (>) or the end of the file (EOF), whichever comes first. Emit + a comment token whose data is the concatenation of all the characters + starting from and including the character that caused the state machine + to switch into the bogus comment state, up to and including the last + consumed character before the U+003E character, if any, or up to the + end of the file otherwise. (If the comment was started by the end of + the file (EOF), the token is empty.) */ + $data = $this->characters('^>', $this->char); + $this->emitToken(array( + 'data' => $data, + 'type' => self::COMMENT + )); + + $this->char += strlen($data); + + /* Switch to the data state. */ + $this->state = 'data'; + + /* If the end of the file was reached, reconsume the EOF character. */ + if($this->char === $this->EOF) { + $this->char = $this->EOF - 1; + } + } + + private function markupDeclarationOpenState() { + /* If the next two characters are both U+002D HYPHEN-MINUS (-) + characters, consume those two characters, create a comment token whose + data is the empty string, and switch to the comment state. */ + if($this->character($this->char + 1, 2) === '--') { + $this->char += 2; + $this->state = 'comment'; + $this->token = array( + 'data' => null, + 'type' => self::COMMENT + ); + + /* Otherwise if the next seven chacacters are a case-insensitive match + for the word "DOCTYPE", then consume those characters and switch to the + DOCTYPE state. */ + } elseif(strtolower($this->character($this->char + 1, 7)) === 'doctype') { + $this->char += 7; + $this->state = 'doctype'; + + /* Otherwise, is is a parse error. Switch to the bogus comment state. + The next character that is consumed, if any, is the first character + that will be in the comment. */ + } else { + $this->char++; + $this->state = 'bogusComment'; + } + } + + private function commentState() { + /* Consume the next input character: */ + $this->char++; + $char = $this->char(); + + /* U+002D HYPHEN-MINUS (-) */ + if($char === '-') { + /* Switch to the comment dash state */ + $this->state = 'commentDash'; + + /* EOF */ + } elseif($this->char === $this->EOF) { + /* Parse error. Emit the comment token. Reconsume the EOF character + in the data state. */ + $this->emitToken($this->token); + $this->char--; + $this->state = 'data'; + + /* Anything else */ + } else { + /* Append the input character to the comment token's data. Stay in + the comment state. */ + $this->token['data'] .= $char; + } + } + + private function commentDashState() { + /* Consume the next input character: */ + $this->char++; + $char = $this->char(); + + /* U+002D HYPHEN-MINUS (-) */ + if($char === '-') { + /* Switch to the comment end state */ + $this->state = 'commentEnd'; + + /* EOF */ + } elseif($this->char === $this->EOF) { + /* Parse error. Emit the comment token. Reconsume the EOF character + in the data state. */ + $this->emitToken($this->token); + $this->char--; + $this->state = 'data'; + + /* Anything else */ + } else { + /* Append a U+002D HYPHEN-MINUS (-) character and the input + character to the comment token's data. Switch to the comment state. */ + $this->token['data'] .= '-'.$char; + $this->state = 'comment'; + } + } + + private function commentEndState() { + /* Consume the next input character: */ + $this->char++; + $char = $this->char(); + + if($char === '>') { + $this->emitToken($this->token); + $this->state = 'data'; + + } elseif($char === '-') { + $this->token['data'] .= '-'; + + } elseif($this->char === $this->EOF) { + $this->emitToken($this->token); + $this->char--; + $this->state = 'data'; + + } else { + $this->token['data'] .= '--'.$char; + $this->state = 'comment'; + } + } + + private function doctypeState() { + /* Consume the next input character: */ + $this->char++; + $char = $this->char(); + + if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { + $this->state = 'beforeDoctypeName'; + + } else { + $this->char--; + $this->state = 'beforeDoctypeName'; + } + } + + private function beforeDoctypeNameState() { + /* Consume the next input character: */ + $this->char++; + $char = $this->char(); + + if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { + // Stay in the before DOCTYPE name state. + + } elseif(preg_match('/^[a-z]$/', $char)) { + $this->token = array( + 'name' => strtoupper($char), + 'type' => self::DOCTYPE, + 'error' => true + ); + + $this->state = 'doctypeName'; + + } elseif($char === '>') { + $this->emitToken(array( + 'name' => null, + 'type' => self::DOCTYPE, + 'error' => true + )); + + $this->state = 'data'; + + } elseif($this->char === $this->EOF) { + $this->emitToken(array( + 'name' => null, + 'type' => self::DOCTYPE, + 'error' => true + )); + + $this->char--; + $this->state = 'data'; + + } else { + $this->token = array( + 'name' => $char, + 'type' => self::DOCTYPE, + 'error' => true + ); + + $this->state = 'doctypeName'; + } + } + + private function doctypeNameState() { + /* Consume the next input character: */ + $this->char++; + $char = $this->char(); + + if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { + $this->state = 'AfterDoctypeName'; + + } elseif($char === '>') { + $this->emitToken($this->token); + $this->state = 'data'; + + } elseif(preg_match('/^[a-z]$/', $char)) { + $this->token['name'] .= strtoupper($char); + + } elseif($this->char === $this->EOF) { + $this->emitToken($this->token); + $this->char--; + $this->state = 'data'; + + } else { + $this->token['name'] .= $char; + } + + $this->token['error'] = ($this->token['name'] === 'HTML') + ? false + : true; + } + + private function afterDoctypeNameState() { + /* Consume the next input character: */ + $this->char++; + $char = $this->char(); + + if(preg_match('/^[\t\n\x0b\x0c ]$/', $char)) { + // Stay in the DOCTYPE name state. + + } elseif($char === '>') { + $this->emitToken($this->token); + $this->state = 'data'; + + } elseif($this->char === $this->EOF) { + $this->emitToken($this->token); + $this->char--; + $this->state = 'data'; + + } else { + $this->token['error'] = true; + $this->state = 'bogusDoctype'; + } + } + + private function bogusDoctypeState() { + /* Consume the next input character: */ + $this->char++; + $char = $this->char(); + + if($char === '>') { + $this->emitToken($this->token); + $this->state = 'data'; + + } elseif($this->char === $this->EOF) { + $this->emitToken($this->token); + $this->char--; + $this->state = 'data'; + + } else { + // Stay in the bogus DOCTYPE state. + } + } + + private function entity() { + $start = $this->char; + + // This section defines how to consume an entity. This definition is + // used when parsing entities in text and in attributes. + + // The behaviour depends on the identity of the next character (the + // one immediately after the U+0026 AMPERSAND character): + + switch($this->character($this->char + 1)) { + // U+0023 NUMBER SIGN (#) + case '#': + + // The behaviour further depends on the character after the + // U+0023 NUMBER SIGN: + switch($this->character($this->char + 1)) { + // U+0078 LATIN SMALL LETTER X + // U+0058 LATIN CAPITAL LETTER X + case 'x': + case 'X': + // Follow the steps below, but using the range of + // characters U+0030 DIGIT ZERO through to U+0039 DIGIT + // NINE, U+0061 LATIN SMALL LETTER A through to U+0066 + // LATIN SMALL LETTER F, and U+0041 LATIN CAPITAL LETTER + // A, through to U+0046 LATIN CAPITAL LETTER F (in other + // words, 0-9, A-F, a-f). + $char = 1; + $char_class = '0-9A-Fa-f'; + break; + + // Anything else + default: + // Follow the steps below, but using the range of + // characters U+0030 DIGIT ZERO through to U+0039 DIGIT + // NINE (i.e. just 0-9). + $char = 0; + $char_class = '0-9'; + break; + } + + // Consume as many characters as match the range of characters + // given above. + $this->char++; + $e_name = $this->characters($char_class, $this->char + $char + 1); + $entity = $this->character($start, $this->char); + $cond = strlen($e_name) > 0; + + // The rest of the parsing happens bellow. + break; + + // Anything else + default: + // Consume the maximum number of characters possible, with the + // consumed characters case-sensitively matching one of the + // identifiers in the first column of the entities table. + $e_name = $this->characters('0-9A-Za-z;', $this->char + 1); + $len = strlen($e_name); + + for($c = 1; $c <= $len; $c++) { + $id = substr($e_name, 0, $c); + $this->char++; + + if(in_array($id, $this->entities)) { + if ($e_name[$c-1] !== ';') { + if ($c < $len && $e_name[$c] == ';') { + $this->char++; // consume extra semicolon + } + } + $entity = $id; + break; + } + } + + $cond = isset($entity); + // The rest of the parsing happens bellow. + break; + } + + if(!$cond) { + // If no match can be made, then this is a parse error. No + // characters are consumed, and nothing is returned. + $this->char = $start; + return false; + } + + // Return a character token for the character corresponding to the + // entity name (as given by the second column of the entities table). + return html_entity_decode('&'.$entity.';', ENT_QUOTES, 'UTF-8'); + } + + private function emitToken($token) { + $emit = $this->tree->emitToken($token); + + if(is_int($emit)) { + $this->content_model = $emit; + + } elseif($token['type'] === self::ENDTAG) { + $this->content_model = self::PCDATA; + } + } + + private function EOF() { + $this->state = null; + $this->tree->emitToken(array( + 'type' => self::EOF + )); + } +} + +class HTML5TreeConstructer { + public $stack = array(); + + private $phase; + private $mode; + private $dom; + private $foster_parent = null; + private $a_formatting = array(); + + private $head_pointer = null; + private $form_pointer = null; + + private $scoping = array('button','caption','html','marquee','object','table','td','th'); + private $formatting = array('a','b','big','em','font','i','nobr','s','small','strike','strong','tt','u'); + private $special = array('address','area','base','basefont','bgsound', + 'blockquote','body','br','center','col','colgroup','dd','dir','div','dl', + 'dt','embed','fieldset','form','frame','frameset','h1','h2','h3','h4','h5', + 'h6','head','hr','iframe','image','img','input','isindex','li','link', + 'listing','menu','meta','noembed','noframes','noscript','ol','optgroup', + 'option','p','param','plaintext','pre','script','select','spacer','style', + 'tbody','textarea','tfoot','thead','title','tr','ul','wbr'); + + // The different phases. + const INIT_PHASE = 0; + const ROOT_PHASE = 1; + const MAIN_PHASE = 2; + const END_PHASE = 3; + + // The different insertion modes for the main phase. + const BEFOR_HEAD = 0; + const IN_HEAD = 1; + const AFTER_HEAD = 2; + const IN_BODY = 3; + const IN_TABLE = 4; + const IN_CAPTION = 5; + const IN_CGROUP = 6; + const IN_TBODY = 7; + const IN_ROW = 8; + const IN_CELL = 9; + const IN_SELECT = 10; + const AFTER_BODY = 11; + const IN_FRAME = 12; + const AFTR_FRAME = 13; + + // The different types of elements. + const SPECIAL = 0; + const SCOPING = 1; + const FORMATTING = 2; + const PHRASING = 3; + + const MARKER = 0; + + public function __construct() { + $this->phase = self::INIT_PHASE; + $this->mode = self::BEFOR_HEAD; + $this->dom = new DOMDocument; + + $this->dom->encoding = 'UTF-8'; + $this->dom->preserveWhiteSpace = true; + $this->dom->substituteEntities = true; + $this->dom->strictErrorChecking = false; + } + + // Process tag tokens + public function emitToken($token) { + switch($this->phase) { + case self::INIT_PHASE: return $this->initPhase($token); break; + case self::ROOT_PHASE: return $this->rootElementPhase($token); break; + case self::MAIN_PHASE: return $this->mainPhase($token); break; + case self::END_PHASE : return $this->trailingEndPhase($token); break; + } + } + + private function initPhase($token) { + /* Initially, the tree construction stage must handle each token + emitted from the tokenisation stage as follows: */ + + /* A DOCTYPE token that is marked as being in error + A comment token + A start tag token + An end tag token + A character token that is not one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), + or U+0020 SPACE + An end-of-file token */ + if((isset($token['error']) && $token['error']) || + $token['type'] === HTML5::COMMENT || + $token['type'] === HTML5::STARTTAG || + $token['type'] === HTML5::ENDTAG || + $token['type'] === HTML5::EOF || + ($token['type'] === HTML5::CHARACTR && isset($token['data']) && + !preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data']))) { + /* This specification does not define how to handle this case. In + particular, user agents may ignore the entirety of this specification + altogether for such documents, and instead invoke special parse modes + with a greater emphasis on backwards compatibility. */ + + $this->phase = self::ROOT_PHASE; + return $this->rootElementPhase($token); + + /* A DOCTYPE token marked as being correct */ + } elseif(isset($token['error']) && !$token['error']) { + /* Append a DocumentType node to the Document node, with the name + attribute set to the name given in the DOCTYPE token (which will be + "HTML"), and the other attributes specific to DocumentType objects + set to null, empty lists, or the empty string as appropriate. */ + $doctype = new DOMDocumentType(null, null, 'HTML'); + + /* Then, switch to the root element phase of the tree construction + stage. */ + $this->phase = self::ROOT_PHASE; + + /* A character token that is one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), + or U+0020 SPACE */ + } elseif(isset($token['data']) && preg_match('/^[\t\n\x0b\x0c ]+$/', + $token['data'])) { + /* Append that character to the Document node. */ + $text = $this->dom->createTextNode($token['data']); + $this->dom->appendChild($text); + } + } + + private function rootElementPhase($token) { + /* After the initial phase, as each token is emitted from the tokenisation + stage, it must be processed as described in this section. */ + + /* A DOCTYPE token */ + if($token['type'] === HTML5::DOCTYPE) { + // Parse error. Ignore the token. + + /* A comment token */ + } elseif($token['type'] === HTML5::COMMENT) { + /* Append a Comment node to the Document object with the data + attribute set to the data given in the comment token. */ + $comment = $this->dom->createComment($token['data']); + $this->dom->appendChild($comment); + + /* A character token that is one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), + or U+0020 SPACE */ + } elseif($token['type'] === HTML5::CHARACTR && + preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { + /* Append that character to the Document node. */ + $text = $this->dom->createTextNode($token['data']); + $this->dom->appendChild($text); + + /* A character token that is not one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED + (FF), or U+0020 SPACE + A start tag token + An end tag token + An end-of-file token */ + } elseif(($token['type'] === HTML5::CHARACTR && + !preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) || + $token['type'] === HTML5::STARTTAG || + $token['type'] === HTML5::ENDTAG || + $token['type'] === HTML5::EOF) { + /* Create an HTMLElement node with the tag name html, in the HTML + namespace. Append it to the Document object. Switch to the main + phase and reprocess the current token. */ + $html = $this->dom->createElement('html'); + $this->dom->appendChild($html); + $this->stack[] = $html; + + $this->phase = self::MAIN_PHASE; + return $this->mainPhase($token); + } + } + + private function mainPhase($token) { + /* Tokens in the main phase must be handled as follows: */ + + /* A DOCTYPE token */ + if($token['type'] === HTML5::DOCTYPE) { + // Parse error. Ignore the token. + + /* A start tag token with the tag name "html" */ + } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'html') { + /* If this start tag token was not the first start tag token, then + it is a parse error. */ + + /* For each attribute on the token, check to see if the attribute + is already present on the top element of the stack of open elements. + If it is not, add the attribute and its corresponding value to that + element. */ + foreach($token['attr'] as $attr) { + if(!$this->stack[0]->hasAttribute($attr['name'])) { + $this->stack[0]->setAttribute($attr['name'], $attr['value']); + } + } + + /* An end-of-file token */ + } elseif($token['type'] === HTML5::EOF) { + /* Generate implied end tags. */ + $this->generateImpliedEndTags(); + + /* Anything else. */ + } else { + /* Depends on the insertion mode: */ + switch($this->mode) { + case self::BEFOR_HEAD: return $this->beforeHead($token); break; + case self::IN_HEAD: return $this->inHead($token); break; + case self::AFTER_HEAD: return $this->afterHead($token); break; + case self::IN_BODY: return $this->inBody($token); break; + case self::IN_TABLE: return $this->inTable($token); break; + case self::IN_CAPTION: return $this->inCaption($token); break; + case self::IN_CGROUP: return $this->inColumnGroup($token); break; + case self::IN_TBODY: return $this->inTableBody($token); break; + case self::IN_ROW: return $this->inRow($token); break; + case self::IN_CELL: return $this->inCell($token); break; + case self::IN_SELECT: return $this->inSelect($token); break; + case self::AFTER_BODY: return $this->afterBody($token); break; + case self::IN_FRAME: return $this->inFrameset($token); break; + case self::AFTR_FRAME: return $this->afterFrameset($token); break; + case self::END_PHASE: return $this->trailingEndPhase($token); break; + } + } + } + + private function beforeHead($token) { + /* Handle the token as follows: */ + + /* A character token that is one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), + or U+0020 SPACE */ + if($token['type'] === HTML5::CHARACTR && + preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { + /* Append the character to the current node. */ + $this->insertText($token['data']); + + /* A comment token */ + } elseif($token['type'] === HTML5::COMMENT) { + /* Append a Comment node to the current node with the data attribute + set to the data given in the comment token. */ + $this->insertComment($token['data']); + + /* A start tag token with the tag name "head" */ + } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'head') { + /* Create an element for the token, append the new element to the + current node and push it onto the stack of open elements. */ + $element = $this->insertElement($token); + + /* Set the head element pointer to this new element node. */ + $this->head_pointer = $element; + + /* Change the insertion mode to "in head". */ + $this->mode = self::IN_HEAD; + + /* A start tag token whose tag name is one of: "base", "link", "meta", + "script", "style", "title". Or an end tag with the tag name "html". + Or a character token that is not one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), + or U+0020 SPACE. Or any other start tag token */ + } elseif($token['type'] === HTML5::STARTTAG || + ($token['type'] === HTML5::ENDTAG && $token['name'] === 'html') || + ($token['type'] === HTML5::CHARACTR && !preg_match('/^[\t\n\x0b\x0c ]$/', + $token['data']))) { + /* Act as if a start tag token with the tag name "head" and no + attributes had been seen, then reprocess the current token. */ + $this->beforeHead(array( + 'name' => 'head', + 'type' => HTML5::STARTTAG, + 'attr' => array() + )); + + return $this->inHead($token); + + /* Any other end tag */ + } elseif($token['type'] === HTML5::ENDTAG) { + /* Parse error. Ignore the token. */ + } + } + + private function inHead($token) { + /* Handle the token as follows: */ + + /* A character token that is one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), + or U+0020 SPACE. + + THIS DIFFERS FROM THE SPEC: If the current node is either a title, style + or script element, append the character to the current node regardless + of its content. */ + if(($token['type'] === HTML5::CHARACTR && + preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) || ( + $token['type'] === HTML5::CHARACTR && in_array(end($this->stack)->nodeName, + array('title', 'style', 'script')))) { + /* Append the character to the current node. */ + $this->insertText($token['data']); + + /* A comment token */ + } elseif($token['type'] === HTML5::COMMENT) { + /* Append a Comment node to the current node with the data attribute + set to the data given in the comment token. */ + $this->insertComment($token['data']); + + } elseif($token['type'] === HTML5::ENDTAG && + in_array($token['name'], array('title', 'style', 'script'))) { + array_pop($this->stack); + return HTML5::PCDATA; + + /* A start tag with the tag name "title" */ + } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'title') { + /* Create an element for the token and append the new element to the + node pointed to by the head element pointer, or, if that is null + (innerHTML case), to the current node. */ + if($this->head_pointer !== null) { + $element = $this->insertElement($token, false); + $this->head_pointer->appendChild($element); + + } else { + $element = $this->insertElement($token); + } + + /* Switch the tokeniser's content model flag to the RCDATA state. */ + return HTML5::RCDATA; + + /* A start tag with the tag name "style" */ + } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'style') { + /* Create an element for the token and append the new element to the + node pointed to by the head element pointer, or, if that is null + (innerHTML case), to the current node. */ + if($this->head_pointer !== null) { + $element = $this->insertElement($token, false); + $this->head_pointer->appendChild($element); + + } else { + $this->insertElement($token); + } + + /* Switch the tokeniser's content model flag to the CDATA state. */ + return HTML5::CDATA; + + /* A start tag with the tag name "script" */ + } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'script') { + /* Create an element for the token. */ + $element = $this->insertElement($token, false); + $this->head_pointer->appendChild($element); + + /* Switch the tokeniser's content model flag to the CDATA state. */ + return HTML5::CDATA; + + /* A start tag with the tag name "base", "link", or "meta" */ + } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], + array('base', 'link', 'meta'))) { + /* Create an element for the token and append the new element to the + node pointed to by the head element pointer, or, if that is null + (innerHTML case), to the current node. */ + if($this->head_pointer !== null) { + $element = $this->insertElement($token, false); + $this->head_pointer->appendChild($element); + array_pop($this->stack); + + } else { + $this->insertElement($token); + } + + /* An end tag with the tag name "head" */ + } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'head') { + /* If the current node is a head element, pop the current node off + the stack of open elements. */ + if($this->head_pointer->isSameNode(end($this->stack))) { + array_pop($this->stack); + + /* Otherwise, this is a parse error. */ + } else { + // k + } + + /* Change the insertion mode to "after head". */ + $this->mode = self::AFTER_HEAD; + + /* A start tag with the tag name "head" or an end tag except "html". */ + } elseif(($token['type'] === HTML5::STARTTAG && $token['name'] === 'head') || + ($token['type'] === HTML5::ENDTAG && $token['name'] !== 'html')) { + // Parse error. Ignore the token. + + /* Anything else */ + } else { + /* If the current node is a head element, act as if an end tag + token with the tag name "head" had been seen. */ + if($this->head_pointer->isSameNode(end($this->stack))) { + $this->inHead(array( + 'name' => 'head', + 'type' => HTML5::ENDTAG + )); + + /* Otherwise, change the insertion mode to "after head". */ + } else { + $this->mode = self::AFTER_HEAD; + } + + /* Then, reprocess the current token. */ + return $this->afterHead($token); + } + } + + private function afterHead($token) { + /* Handle the token as follows: */ + + /* A character token that is one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), + or U+0020 SPACE */ + if($token['type'] === HTML5::CHARACTR && + preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { + /* Append the character to the current node. */ + $this->insertText($token['data']); + + /* A comment token */ + } elseif($token['type'] === HTML5::COMMENT) { + /* Append a Comment node to the current node with the data attribute + set to the data given in the comment token. */ + $this->insertComment($token['data']); + + /* A start tag token with the tag name "body" */ + } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'body') { + /* Insert a body element for the token. */ + $this->insertElement($token); + + /* Change the insertion mode to "in body". */ + $this->mode = self::IN_BODY; + + /* A start tag token with the tag name "frameset" */ + } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'frameset') { + /* Insert a frameset element for the token. */ + $this->insertElement($token); + + /* Change the insertion mode to "in frameset". */ + $this->mode = self::IN_FRAME; + + /* A start tag token whose tag name is one of: "base", "link", "meta", + "script", "style", "title" */ + } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], + array('base', 'link', 'meta', 'script', 'style', 'title'))) { + /* Parse error. Switch the insertion mode back to "in head" and + reprocess the token. */ + $this->mode = self::IN_HEAD; + return $this->inHead($token); + + /* Anything else */ + } else { + /* Act as if a start tag token with the tag name "body" and no + attributes had been seen, and then reprocess the current token. */ + $this->afterHead(array( + 'name' => 'body', + 'type' => HTML5::STARTTAG, + 'attr' => array() + )); + + return $this->inBody($token); + } + } + + private function inBody($token) { + /* Handle the token as follows: */ + + switch($token['type']) { + /* A character token */ + case HTML5::CHARACTR: + /* Reconstruct the active formatting elements, if any. */ + $this->reconstructActiveFormattingElements(); + + /* Append the token's character to the current node. */ + $this->insertText($token['data']); + break; + + /* A comment token */ + case HTML5::COMMENT: + /* Append a Comment node to the current node with the data + attribute set to the data given in the comment token. */ + $this->insertComment($token['data']); + break; + + case HTML5::STARTTAG: + switch($token['name']) { + /* A start tag token whose tag name is one of: "script", + "style" */ + case 'script': case 'style': + /* Process the token as if the insertion mode had been "in + head". */ + return $this->inHead($token); + break; + + /* A start tag token whose tag name is one of: "base", "link", + "meta", "title" */ + case 'base': case 'link': case 'meta': case 'title': + /* Parse error. Process the token as if the insertion mode + had been "in head". */ + return $this->inHead($token); + break; + + /* A start tag token with the tag name "body" */ + case 'body': + /* Parse error. If the second element on the stack of open + elements is not a body element, or, if the stack of open + elements has only one node on it, then ignore the token. + (innerHTML case) */ + if(count($this->stack) === 1 || $this->stack[1]->nodeName !== 'body') { + // Ignore + + /* Otherwise, for each attribute on the token, check to see + if the attribute is already present on the body element (the + second element) on the stack of open elements. If it is not, + add the attribute and its corresponding value to that + element. */ + } else { + foreach($token['attr'] as $attr) { + if(!$this->stack[1]->hasAttribute($attr['name'])) { + $this->stack[1]->setAttribute($attr['name'], $attr['value']); + } + } + } + break; + + /* A start tag whose tag name is one of: "address", + "blockquote", "center", "dir", "div", "dl", "fieldset", + "listing", "menu", "ol", "p", "ul" */ + case 'address': case 'blockquote': case 'center': case 'dir': + case 'div': case 'dl': case 'fieldset': case 'listing': + case 'menu': case 'ol': case 'p': case 'ul': + /* If the stack of open elements has a p element in scope, + then act as if an end tag with the tag name p had been + seen. */ + if($this->elementInScope('p')) { + $this->emitToken(array( + 'name' => 'p', + 'type' => HTML5::ENDTAG + )); + } + + /* Insert an HTML element for the token. */ + $this->insertElement($token); + break; + + /* A start tag whose tag name is "form" */ + case 'form': + /* If the form element pointer is not null, ignore the + token with a parse error. */ + if($this->form_pointer !== null) { + // Ignore. + + /* Otherwise: */ + } else { + /* If the stack of open elements has a p element in + scope, then act as if an end tag with the tag name p + had been seen. */ + if($this->elementInScope('p')) { + $this->emitToken(array( + 'name' => 'p', + 'type' => HTML5::ENDTAG + )); + } + + /* Insert an HTML element for the token, and set the + form element pointer to point to the element created. */ + $element = $this->insertElement($token); + $this->form_pointer = $element; + } + break; + + /* A start tag whose tag name is "li", "dd" or "dt" */ + case 'li': case 'dd': case 'dt': + /* If the stack of open elements has a p element in scope, + then act as if an end tag with the tag name p had been + seen. */ + if($this->elementInScope('p')) { + $this->emitToken(array( + 'name' => 'p', + 'type' => HTML5::ENDTAG + )); + } + + $stack_length = count($this->stack) - 1; + + for($n = $stack_length; 0 <= $n; $n--) { + /* 1. Initialise node to be the current node (the + bottommost node of the stack). */ + $stop = false; + $node = $this->stack[$n]; + $cat = $this->getElementCategory($node->tagName); + + /* 2. If node is an li, dd or dt element, then pop all + the nodes from the current node up to node, including + node, then stop this algorithm. */ + if($token['name'] === $node->tagName || ($token['name'] !== 'li' + && ($node->tagName === 'dd' || $node->tagName === 'dt'))) { + for($x = $stack_length; $x >= $n ; $x--) { + array_pop($this->stack); + } + + break; + } + + /* 3. If node is not in the formatting category, and is + not in the phrasing category, and is not an address or + div element, then stop this algorithm. */ + if($cat !== self::FORMATTING && $cat !== self::PHRASING && + $node->tagName !== 'address' && $node->tagName !== 'div') { + break; + } + } + + /* Finally, insert an HTML element with the same tag + name as the token's. */ + $this->insertElement($token); + break; + + /* A start tag token whose tag name is "plaintext" */ + case 'plaintext': + /* If the stack of open elements has a p element in scope, + then act as if an end tag with the tag name p had been + seen. */ + if($this->elementInScope('p')) { + $this->emitToken(array( + 'name' => 'p', + 'type' => HTML5::ENDTAG + )); + } + + /* Insert an HTML element for the token. */ + $this->insertElement($token); + + return HTML5::PLAINTEXT; + break; + + /* A start tag whose tag name is one of: "h1", "h2", "h3", "h4", + "h5", "h6" */ + case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6': + /* If the stack of open elements has a p element in scope, + then act as if an end tag with the tag name p had been seen. */ + if($this->elementInScope('p')) { + $this->emitToken(array( + 'name' => 'p', + 'type' => HTML5::ENDTAG + )); + } + + /* If the stack of open elements has in scope an element whose + tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6", then + this is a parse error; pop elements from the stack until an + element with one of those tag names has been popped from the + stack. */ + while($this->elementInScope(array('h1', 'h2', 'h3', 'h4', 'h5', 'h6'))) { + array_pop($this->stack); + } + + /* Insert an HTML element for the token. */ + $this->insertElement($token); + break; + + /* A start tag whose tag name is "a" */ + case 'a': + /* If the list of active formatting elements contains + an element whose tag name is "a" between the end of the + list and the last marker on the list (or the start of + the list if there is no marker on the list), then this + is a parse error; act as if an end tag with the tag name + "a" had been seen, then remove that element from the list + of active formatting elements and the stack of open + elements if the end tag didn't already remove it (it + might not have if the element is not in table scope). */ + $leng = count($this->a_formatting); + + for($n = $leng - 1; $n >= 0; $n--) { + if($this->a_formatting[$n] === self::MARKER) { + break; + + } elseif($this->a_formatting[$n]->nodeName === 'a') { + $this->emitToken(array( + 'name' => 'a', + 'type' => HTML5::ENDTAG + )); + break; + } + } + + /* Reconstruct the active formatting elements, if any. */ + $this->reconstructActiveFormattingElements(); + + /* Insert an HTML element for the token. */ + $el = $this->insertElement($token); + + /* Add that element to the list of active formatting + elements. */ + $this->a_formatting[] = $el; + break; + + /* A start tag whose tag name is one of: "b", "big", "em", "font", + "i", "nobr", "s", "small", "strike", "strong", "tt", "u" */ + case 'b': case 'big': case 'em': case 'font': case 'i': + case 'nobr': case 's': case 'small': case 'strike': + case 'strong': case 'tt': case 'u': + /* Reconstruct the active formatting elements, if any. */ + $this->reconstructActiveFormattingElements(); + + /* Insert an HTML element for the token. */ + $el = $this->insertElement($token); + + /* Add that element to the list of active formatting + elements. */ + $this->a_formatting[] = $el; + break; + + /* A start tag token whose tag name is "button" */ + case 'button': + /* If the stack of open elements has a button element in scope, + then this is a parse error; act as if an end tag with the tag + name "button" had been seen, then reprocess the token. (We don't + do that. Unnecessary.) */ + if($this->elementInScope('button')) { + $this->inBody(array( + 'name' => 'button', + 'type' => HTML5::ENDTAG + )); + } + + /* Reconstruct the active formatting elements, if any. */ + $this->reconstructActiveFormattingElements(); + + /* Insert an HTML element for the token. */ + $this->insertElement($token); + + /* Insert a marker at the end of the list of active + formatting elements. */ + $this->a_formatting[] = self::MARKER; + break; + + /* A start tag token whose tag name is one of: "marquee", "object" */ + case 'marquee': case 'object': + /* Reconstruct the active formatting elements, if any. */ + $this->reconstructActiveFormattingElements(); + + /* Insert an HTML element for the token. */ + $this->insertElement($token); + + /* Insert a marker at the end of the list of active + formatting elements. */ + $this->a_formatting[] = self::MARKER; + break; + + /* A start tag token whose tag name is "xmp" */ + case 'xmp': + /* Reconstruct the active formatting elements, if any. */ + $this->reconstructActiveFormattingElements(); + + /* Insert an HTML element for the token. */ + $this->insertElement($token); + + /* Switch the content model flag to the CDATA state. */ + return HTML5::CDATA; + break; + + /* A start tag whose tag name is "table" */ + case 'table': + /* If the stack of open elements has a p element in scope, + then act as if an end tag with the tag name p had been seen. */ + if($this->elementInScope('p')) { + $this->emitToken(array( + 'name' => 'p', + 'type' => HTML5::ENDTAG + )); + } + + /* Insert an HTML element for the token. */ + $this->insertElement($token); + + /* Change the insertion mode to "in table". */ + $this->mode = self::IN_TABLE; + break; + + /* A start tag whose tag name is one of: "area", "basefont", + "bgsound", "br", "embed", "img", "param", "spacer", "wbr" */ + case 'area': case 'basefont': case 'bgsound': case 'br': + case 'embed': case 'img': case 'param': case 'spacer': + case 'wbr': + /* Reconstruct the active formatting elements, if any. */ + $this->reconstructActiveFormattingElements(); + + /* Insert an HTML element for the token. */ + $this->insertElement($token); + + /* Immediately pop the current node off the stack of open elements. */ + array_pop($this->stack); + break; + + /* A start tag whose tag name is "hr" */ + case 'hr': + /* If the stack of open elements has a p element in scope, + then act as if an end tag with the tag name p had been seen. */ + if($this->elementInScope('p')) { + $this->emitToken(array( + 'name' => 'p', + 'type' => HTML5::ENDTAG + )); + } + + /* Insert an HTML element for the token. */ + $this->insertElement($token); + + /* Immediately pop the current node off the stack of open elements. */ + array_pop($this->stack); + break; + + /* A start tag whose tag name is "image" */ + case 'image': + /* Parse error. Change the token's tag name to "img" and + reprocess it. (Don't ask.) */ + $token['name'] = 'img'; + return $this->inBody($token); + break; + + /* A start tag whose tag name is "input" */ + case 'input': + /* Reconstruct the active formatting elements, if any. */ + $this->reconstructActiveFormattingElements(); + + /* Insert an input element for the token. */ + $element = $this->insertElement($token, false); + + /* If the form element pointer is not null, then associate the + input element with the form element pointed to by the form + element pointer. */ + $this->form_pointer !== null + ? $this->form_pointer->appendChild($element) + : end($this->stack)->appendChild($element); + + /* Pop that input element off the stack of open elements. */ + array_pop($this->stack); + break; + + /* A start tag whose tag name is "isindex" */ + case 'isindex': + /* Parse error. */ + // w/e + + /* If the form element pointer is not null, + then ignore the token. */ + if($this->form_pointer === null) { + /* Act as if a start tag token with the tag name "form" had + been seen. */ + $this->inBody(array( + 'name' => 'body', + 'type' => HTML5::STARTTAG, + 'attr' => array() + )); + + /* Act as if a start tag token with the tag name "hr" had + been seen. */ + $this->inBody(array( + 'name' => 'hr', + 'type' => HTML5::STARTTAG, + 'attr' => array() + )); + + /* Act as if a start tag token with the tag name "p" had + been seen. */ + $this->inBody(array( + 'name' => 'p', + 'type' => HTML5::STARTTAG, + 'attr' => array() + )); + + /* Act as if a start tag token with the tag name "label" + had been seen. */ + $this->inBody(array( + 'name' => 'label', + 'type' => HTML5::STARTTAG, + 'attr' => array() + )); + + /* Act as if a stream of character tokens had been seen. */ + $this->insertText('This is a searchable index. '. + 'Insert your search keywords here: '); + + /* Act as if a start tag token with the tag name "input" + had been seen, with all the attributes from the "isindex" + token, except with the "name" attribute set to the value + "isindex" (ignoring any explicit "name" attribute). */ + $attr = $token['attr']; + $attr[] = array('name' => 'name', 'value' => 'isindex'); + + $this->inBody(array( + 'name' => 'input', + 'type' => HTML5::STARTTAG, + 'attr' => $attr + )); + + /* Act as if a stream of character tokens had been seen + (see below for what they should say). */ + $this->insertText('This is a searchable index. '. + 'Insert your search keywords here: '); + + /* Act as if an end tag token with the tag name "label" + had been seen. */ + $this->inBody(array( + 'name' => 'label', + 'type' => HTML5::ENDTAG + )); + + /* Act as if an end tag token with the tag name "p" had + been seen. */ + $this->inBody(array( + 'name' => 'p', + 'type' => HTML5::ENDTAG + )); + + /* Act as if a start tag token with the tag name "hr" had + been seen. */ + $this->inBody(array( + 'name' => 'hr', + 'type' => HTML5::ENDTAG + )); + + /* Act as if an end tag token with the tag name "form" had + been seen. */ + $this->inBody(array( + 'name' => 'form', + 'type' => HTML5::ENDTAG + )); + } + break; + + /* A start tag whose tag name is "textarea" */ + case 'textarea': + $this->insertElement($token); + + /* Switch the tokeniser's content model flag to the + RCDATA state. */ + return HTML5::RCDATA; + break; + + /* A start tag whose tag name is one of: "iframe", "noembed", + "noframes" */ + case 'iframe': case 'noembed': case 'noframes': + $this->insertElement($token); + + /* Switch the tokeniser's content model flag to the CDATA state. */ + return HTML5::CDATA; + break; + + /* A start tag whose tag name is "select" */ + case 'select': + /* Reconstruct the active formatting elements, if any. */ + $this->reconstructActiveFormattingElements(); + + /* Insert an HTML element for the token. */ + $this->insertElement($token); + + /* Change the insertion mode to "in select". */ + $this->mode = self::IN_SELECT; + break; + + /* A start or end tag whose tag name is one of: "caption", "col", + "colgroup", "frame", "frameset", "head", "option", "optgroup", + "tbody", "td", "tfoot", "th", "thead", "tr". */ + case 'caption': case 'col': case 'colgroup': case 'frame': + case 'frameset': case 'head': case 'option': case 'optgroup': + case 'tbody': case 'td': case 'tfoot': case 'th': case 'thead': + case 'tr': + // Parse error. Ignore the token. + break; + + /* A start or end tag whose tag name is one of: "event-source", + "section", "nav", "article", "aside", "header", "footer", + "datagrid", "command" */ + case 'event-source': case 'section': case 'nav': case 'article': + case 'aside': case 'header': case 'footer': case 'datagrid': + case 'command': + // Work in progress! + break; + + /* A start tag token not covered by the previous entries */ + default: + /* Reconstruct the active formatting elements, if any. */ + $this->reconstructActiveFormattingElements(); + + $this->insertElement($token, true, true); + break; + } + break; + + case HTML5::ENDTAG: + switch($token['name']) { + /* An end tag with the tag name "body" */ + case 'body': + /* If the second element in the stack of open elements is + not a body element, this is a parse error. Ignore the token. + (innerHTML case) */ + if(count($this->stack) < 2 || $this->stack[1]->nodeName !== 'body') { + // Ignore. + + /* If the current node is not the body element, then this + is a parse error. */ + } elseif(end($this->stack)->nodeName !== 'body') { + // Parse error. + } + + /* Change the insertion mode to "after body". */ + $this->mode = self::AFTER_BODY; + break; + + /* An end tag with the tag name "html" */ + case 'html': + /* Act as if an end tag with tag name "body" had been seen, + then, if that token wasn't ignored, reprocess the current + token. */ + $this->inBody(array( + 'name' => 'body', + 'type' => HTML5::ENDTAG + )); + + return $this->afterBody($token); + break; + + /* An end tag whose tag name is one of: "address", "blockquote", + "center", "dir", "div", "dl", "fieldset", "listing", "menu", + "ol", "pre", "ul" */ + case 'address': case 'blockquote': case 'center': case 'dir': + case 'div': case 'dl': case 'fieldset': case 'listing': + case 'menu': case 'ol': case 'pre': case 'ul': + /* If the stack of open elements has an element in scope + with the same tag name as that of the token, then generate + implied end tags. */ + if($this->elementInScope($token['name'])) { + $this->generateImpliedEndTags(); + + /* Now, if the current node is not an element with + the same tag name as that of the token, then this + is a parse error. */ + // w/e + + /* If the stack of open elements has an element in + scope with the same tag name as that of the token, + then pop elements from this stack until an element + with that tag name has been popped from the stack. */ + for($n = count($this->stack) - 1; $n >= 0; $n--) { + if($this->stack[$n]->nodeName === $token['name']) { + $n = -1; + } + + array_pop($this->stack); + } + } + break; + + /* An end tag whose tag name is "form" */ + case 'form': + /* If the stack of open elements has an element in scope + with the same tag name as that of the token, then generate + implied end tags. */ + if($this->elementInScope($token['name'])) { + $this->generateImpliedEndTags(); + + } + + if(end($this->stack)->nodeName !== $token['name']) { + /* Now, if the current node is not an element with the + same tag name as that of the token, then this is a parse + error. */ + // w/e + + } else { + /* Otherwise, if the current node is an element with + the same tag name as that of the token pop that element + from the stack. */ + array_pop($this->stack); + } + + /* In any case, set the form element pointer to null. */ + $this->form_pointer = null; + break; + + /* An end tag whose tag name is "p" */ + case 'p': + /* If the stack of open elements has a p element in scope, + then generate implied end tags, except for p elements. */ + if($this->elementInScope('p')) { + $this->generateImpliedEndTags(array('p')); + + /* If the current node is not a p element, then this is + a parse error. */ + // k + + /* If the stack of open elements has a p element in + scope, then pop elements from this stack until the stack + no longer has a p element in scope. */ + for($n = count($this->stack) - 1; $n >= 0; $n--) { + if($this->elementInScope('p')) { + array_pop($this->stack); + + } else { + break; + } + } + } + break; + + /* An end tag whose tag name is "dd", "dt", or "li" */ + case 'dd': case 'dt': case 'li': + /* If the stack of open elements has an element in scope + whose tag name matches the tag name of the token, then + generate implied end tags, except for elements with the + same tag name as the token. */ + if($this->elementInScope($token['name'])) { + $this->generateImpliedEndTags(array($token['name'])); + + /* If the current node is not an element with the same + tag name as the token, then this is a parse error. */ + // w/e + + /* If the stack of open elements has an element in scope + whose tag name matches the tag name of the token, then + pop elements from this stack until an element with that + tag name has been popped from the stack. */ + for($n = count($this->stack) - 1; $n >= 0; $n--) { + if($this->stack[$n]->nodeName === $token['name']) { + $n = -1; + } + + array_pop($this->stack); + } + } + break; + + /* An end tag whose tag name is one of: "h1", "h2", "h3", "h4", + "h5", "h6" */ + case 'h1': case 'h2': case 'h3': case 'h4': case 'h5': case 'h6': + $elements = array('h1', 'h2', 'h3', 'h4', 'h5', 'h6'); + + /* If the stack of open elements has in scope an element whose + tag name is one of "h1", "h2", "h3", "h4", "h5", or "h6", then + generate implied end tags. */ + if($this->elementInScope($elements)) { + $this->generateImpliedEndTags(); + + /* Now, if the current node is not an element with the same + tag name as that of the token, then this is a parse error. */ + // w/e + + /* If the stack of open elements has in scope an element + whose tag name is one of "h1", "h2", "h3", "h4", "h5", or + "h6", then pop elements from the stack until an element + with one of those tag names has been popped from the stack. */ + while($this->elementInScope($elements)) { + array_pop($this->stack); + } + } + break; + + /* An end tag whose tag name is one of: "a", "b", "big", "em", + "font", "i", "nobr", "s", "small", "strike", "strong", "tt", "u" */ + case 'a': case 'b': case 'big': case 'em': case 'font': + case 'i': case 'nobr': case 's': case 'small': case 'strike': + case 'strong': case 'tt': case 'u': + /* 1. Let the formatting element be the last element in + the list of active formatting elements that: + * is between the end of the list and the last scope + marker in the list, if any, or the start of the list + otherwise, and + * has the same tag name as the token. + */ + while(true) { + for($a = count($this->a_formatting) - 1; $a >= 0; $a--) { + if($this->a_formatting[$a] === self::MARKER) { + break; + + } elseif($this->a_formatting[$a]->tagName === $token['name']) { + $formatting_element = $this->a_formatting[$a]; + $in_stack = in_array($formatting_element, $this->stack, true); + $fe_af_pos = $a; + break; + } + } + + /* If there is no such node, or, if that node is + also in the stack of open elements but the element + is not in scope, then this is a parse error. Abort + these steps. The token is ignored. */ + if(!isset($formatting_element) || ($in_stack && + !$this->elementInScope($token['name']))) { + break; + + /* Otherwise, if there is such a node, but that node + is not in the stack of open elements, then this is a + parse error; remove the element from the list, and + abort these steps. */ + } elseif(isset($formatting_element) && !$in_stack) { + unset($this->a_formatting[$fe_af_pos]); + $this->a_formatting = array_merge($this->a_formatting); + break; + } + + /* 2. Let the furthest block be the topmost node in the + stack of open elements that is lower in the stack + than the formatting element, and is not an element in + the phrasing or formatting categories. There might + not be one. */ + $fe_s_pos = array_search($formatting_element, $this->stack, true); + $length = count($this->stack); + + for($s = $fe_s_pos + 1; $s < $length; $s++) { + $category = $this->getElementCategory($this->stack[$s]->nodeName); + + if($category !== self::PHRASING && $category !== self::FORMATTING) { + $furthest_block = $this->stack[$s]; + } + } + + /* 3. If there is no furthest block, then the UA must + skip the subsequent steps and instead just pop all + the nodes from the bottom of the stack of open + elements, from the current node up to the formatting + element, and remove the formatting element from the + list of active formatting elements. */ + if(!isset($furthest_block)) { + for($n = $length - 1; $n >= $fe_s_pos; $n--) { + array_pop($this->stack); + } + + unset($this->a_formatting[$fe_af_pos]); + $this->a_formatting = array_merge($this->a_formatting); + break; + } + + /* 4. Let the common ancestor be the element + immediately above the formatting element in the stack + of open elements. */ + $common_ancestor = $this->stack[$fe_s_pos - 1]; + + /* 5. If the furthest block has a parent node, then + remove the furthest block from its parent node. */ + if($furthest_block->parentNode !== null) { + $furthest_block->parentNode->removeChild($furthest_block); + } + + /* 6. Let a bookmark note the position of the + formatting element in the list of active formatting + elements relative to the elements on either side + of it in the list. */ + $bookmark = $fe_af_pos; + + /* 7. Let node and last node be the furthest block. + Follow these steps: */ + $node = $furthest_block; + $last_node = $furthest_block; + + while(true) { + for($n = array_search($node, $this->stack, true) - 1; $n >= 0; $n--) { + /* 7.1 Let node be the element immediately + prior to node in the stack of open elements. */ + $node = $this->stack[$n]; + + /* 7.2 If node is not in the list of active + formatting elements, then remove node from + the stack of open elements and then go back + to step 1. */ + if(!in_array($node, $this->a_formatting, true)) { + unset($this->stack[$n]); + $this->stack = array_merge($this->stack); + + } else { + break; + } + } + + /* 7.3 Otherwise, if node is the formatting + element, then go to the next step in the overall + algorithm. */ + if($node === $formatting_element) { + break; + + /* 7.4 Otherwise, if last node is the furthest + block, then move the aforementioned bookmark to + be immediately after the node in the list of + active formatting elements. */ + } elseif($last_node === $furthest_block) { + $bookmark = array_search($node, $this->a_formatting, true) + 1; + } + + /* 7.5 If node has any children, perform a + shallow clone of node, replace the entry for + node in the list of active formatting elements + with an entry for the clone, replace the entry + for node in the stack of open elements with an + entry for the clone, and let node be the clone. */ + if($node->hasChildNodes()) { + $clone = $node->cloneNode(); + $s_pos = array_search($node, $this->stack, true); + $a_pos = array_search($node, $this->a_formatting, true); + + $this->stack[$s_pos] = $clone; + $this->a_formatting[$a_pos] = $clone; + $node = $clone; + } + + /* 7.6 Insert last node into node, first removing + it from its previous parent node if any. */ + if($last_node->parentNode !== null) { + $last_node->parentNode->removeChild($last_node); + } + + $node->appendChild($last_node); + + /* 7.7 Let last node be node. */ + $last_node = $node; + } + + /* 8. Insert whatever last node ended up being in + the previous step into the common ancestor node, + first removing it from its previous parent node if + any. */ + if($last_node->parentNode !== null) { + $last_node->parentNode->removeChild($last_node); + } + + $common_ancestor->appendChild($last_node); + + /* 9. Perform a shallow clone of the formatting + element. */ + $clone = $formatting_element->cloneNode(); + + /* 10. Take all of the child nodes of the furthest + block and append them to the clone created in the + last step. */ + while($furthest_block->hasChildNodes()) { + $child = $furthest_block->firstChild; + $furthest_block->removeChild($child); + $clone->appendChild($child); + } + + /* 11. Append that clone to the furthest block. */ + $furthest_block->appendChild($clone); + + /* 12. Remove the formatting element from the list + of active formatting elements, and insert the clone + into the list of active formatting elements at the + position of the aforementioned bookmark. */ + $fe_af_pos = array_search($formatting_element, $this->a_formatting, true); + unset($this->a_formatting[$fe_af_pos]); + $this->a_formatting = array_merge($this->a_formatting); + + $af_part1 = array_slice($this->a_formatting, 0, $bookmark - 1); + $af_part2 = array_slice($this->a_formatting, $bookmark, count($this->a_formatting)); + $this->a_formatting = array_merge($af_part1, array($clone), $af_part2); + + /* 13. Remove the formatting element from the stack + of open elements, and insert the clone into the stack + of open elements immediately after (i.e. in a more + deeply nested position than) the position of the + furthest block in that stack. */ + $fe_s_pos = array_search($formatting_element, $this->stack, true); + $fb_s_pos = array_search($furthest_block, $this->stack, true); + unset($this->stack[$fe_s_pos]); + + $s_part1 = array_slice($this->stack, 0, $fb_s_pos); + $s_part2 = array_slice($this->stack, $fb_s_pos + 1, count($this->stack)); + $this->stack = array_merge($s_part1, array($clone), $s_part2); + + /* 14. Jump back to step 1 in this series of steps. */ + unset($formatting_element, $fe_af_pos, $fe_s_pos, $furthest_block); + } + break; + + /* An end tag token whose tag name is one of: "button", + "marquee", "object" */ + case 'button': case 'marquee': case 'object': + /* If the stack of open elements has an element in scope whose + tag name matches the tag name of the token, then generate implied + tags. */ + if($this->elementInScope($token['name'])) { + $this->generateImpliedEndTags(); + + /* Now, if the current node is not an element with the same + tag name as the token, then this is a parse error. */ + // k + + /* Now, if the stack of open elements has an element in scope + whose tag name matches the tag name of the token, then pop + elements from the stack until that element has been popped from + the stack, and clear the list of active formatting elements up + to the last marker. */ + for($n = count($this->stack) - 1; $n >= 0; $n--) { + if($this->stack[$n]->nodeName === $token['name']) { + $n = -1; + } + + array_pop($this->stack); + } + + $marker = end(array_keys($this->a_formatting, self::MARKER, true)); + + for($n = count($this->a_formatting) - 1; $n > $marker; $n--) { + array_pop($this->a_formatting); + } + } + break; + + /* Or an end tag whose tag name is one of: "area", "basefont", + "bgsound", "br", "embed", "hr", "iframe", "image", "img", + "input", "isindex", "noembed", "noframes", "param", "select", + "spacer", "table", "textarea", "wbr" */ + case 'area': case 'basefont': case 'bgsound': case 'br': + case 'embed': case 'hr': case 'iframe': case 'image': + case 'img': case 'input': case 'isindex': case 'noembed': + case 'noframes': case 'param': case 'select': case 'spacer': + case 'table': case 'textarea': case 'wbr': + // Parse error. Ignore the token. + break; + + /* An end tag token not covered by the previous entries */ + default: + for($n = count($this->stack) - 1; $n >= 0; $n--) { + /* Initialise node to be the current node (the bottommost + node of the stack). */ + $node = end($this->stack); + + /* If node has the same tag name as the end tag token, + then: */ + if($token['name'] === $node->nodeName) { + /* Generate implied end tags. */ + $this->generateImpliedEndTags(); + + /* If the tag name of the end tag token does not + match the tag name of the current node, this is a + parse error. */ + // k + + /* Pop all the nodes from the current node up to + node, including node, then stop this algorithm. */ + for($x = count($this->stack) - $n; $x >= $n; $x--) { + array_pop($this->stack); + } + + } else { + $category = $this->getElementCategory($node); + + if($category !== self::SPECIAL && $category !== self::SCOPING) { + /* Otherwise, if node is in neither the formatting + category nor the phrasing category, then this is a + parse error. Stop this algorithm. The end tag token + is ignored. */ + return false; + } + } + } + break; + } + break; + } + } + + private function inTable($token) { + $clear = array('html', 'table'); + + /* A character token that is one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), + or U+0020 SPACE */ + if($token['type'] === HTML5::CHARACTR && + preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { + /* Append the character to the current node. */ + $text = $this->dom->createTextNode($token['data']); + end($this->stack)->appendChild($text); + + /* A comment token */ + } elseif($token['type'] === HTML5::COMMENT) { + /* Append a Comment node to the current node with the data + attribute set to the data given in the comment token. */ + $comment = $this->dom->createComment($token['data']); + end($this->stack)->appendChild($comment); + + /* A start tag whose tag name is "caption" */ + } elseif($token['type'] === HTML5::STARTTAG && + $token['name'] === 'caption') { + /* Clear the stack back to a table context. */ + $this->clearStackToTableContext($clear); + + /* Insert a marker at the end of the list of active + formatting elements. */ + $this->a_formatting[] = self::MARKER; + + /* Insert an HTML element for the token, then switch the + insertion mode to "in caption". */ + $this->insertElement($token); + $this->mode = self::IN_CAPTION; + + /* A start tag whose tag name is "colgroup" */ + } elseif($token['type'] === HTML5::STARTTAG && + $token['name'] === 'colgroup') { + /* Clear the stack back to a table context. */ + $this->clearStackToTableContext($clear); + + /* Insert an HTML element for the token, then switch the + insertion mode to "in column group". */ + $this->insertElement($token); + $this->mode = self::IN_CGROUP; + + /* A start tag whose tag name is "col" */ + } elseif($token['type'] === HTML5::STARTTAG && + $token['name'] === 'col') { + $this->inTable(array( + 'name' => 'colgroup', + 'type' => HTML5::STARTTAG, + 'attr' => array() + )); + + $this->inColumnGroup($token); + + /* A start tag whose tag name is one of: "tbody", "tfoot", "thead" */ + } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], + array('tbody', 'tfoot', 'thead'))) { + /* Clear the stack back to a table context. */ + $this->clearStackToTableContext($clear); + + /* Insert an HTML element for the token, then switch the insertion + mode to "in table body". */ + $this->insertElement($token); + $this->mode = self::IN_TBODY; + + /* A start tag whose tag name is one of: "td", "th", "tr" */ + } elseif($token['type'] === HTML5::STARTTAG && + in_array($token['name'], array('td', 'th', 'tr'))) { + /* Act as if a start tag token with the tag name "tbody" had been + seen, then reprocess the current token. */ + $this->inTable(array( + 'name' => 'tbody', + 'type' => HTML5::STARTTAG, + 'attr' => array() + )); + + return $this->inTableBody($token); + + /* A start tag whose tag name is "table" */ + } elseif($token['type'] === HTML5::STARTTAG && + $token['name'] === 'table') { + /* Parse error. Act as if an end tag token with the tag name "table" + had been seen, then, if that token wasn't ignored, reprocess the + current token. */ + $this->inTable(array( + 'name' => 'table', + 'type' => HTML5::ENDTAG + )); + + return $this->mainPhase($token); + + /* An end tag whose tag name is "table" */ + } elseif($token['type'] === HTML5::ENDTAG && + $token['name'] === 'table') { + /* If the stack of open elements does not have an element in table + scope with the same tag name as the token, this is a parse error. + Ignore the token. (innerHTML case) */ + if(!$this->elementInScope($token['name'], true)) { + return false; + + /* Otherwise: */ + } else { + /* Generate implied end tags. */ + $this->generateImpliedEndTags(); + + /* Now, if the current node is not a table element, then this + is a parse error. */ + // w/e + + /* Pop elements from this stack until a table element has been + popped from the stack. */ + while(true) { + $current = end($this->stack)->nodeName; + array_pop($this->stack); + + if($current === 'table') { + break; + } + } + + /* Reset the insertion mode appropriately. */ + $this->resetInsertionMode(); + } + + /* An end tag whose tag name is one of: "body", "caption", "col", + "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr" */ + } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], + array('body', 'caption', 'col', 'colgroup', 'html', 'tbody', 'td', + 'tfoot', 'th', 'thead', 'tr'))) { + // Parse error. Ignore the token. + + /* Anything else */ + } else { + /* Parse error. Process the token as if the insertion mode was "in + body", with the following exception: */ + + /* If the current node is a table, tbody, tfoot, thead, or tr + element, then, whenever a node would be inserted into the current + node, it must instead be inserted into the foster parent element. */ + if(in_array(end($this->stack)->nodeName, + array('table', 'tbody', 'tfoot', 'thead', 'tr'))) { + /* The foster parent element is the parent element of the last + table element in the stack of open elements, if there is a + table element and it has such a parent element. If there is no + table element in the stack of open elements (innerHTML case), + then the foster parent element is the first element in the + stack of open elements (the html element). Otherwise, if there + is a table element in the stack of open elements, but the last + table element in the stack of open elements has no parent, or + its parent node is not an element, then the foster parent + element is the element before the last table element in the + stack of open elements. */ + for($n = count($this->stack) - 1; $n >= 0; $n--) { + if($this->stack[$n]->nodeName === 'table') { + $table = $this->stack[$n]; + break; + } + } + + if(isset($table) && $table->parentNode !== null) { + $this->foster_parent = $table->parentNode; + + } elseif(!isset($table)) { + $this->foster_parent = $this->stack[0]; + + } elseif(isset($table) && ($table->parentNode === null || + $table->parentNode->nodeType !== XML_ELEMENT_NODE)) { + $this->foster_parent = $this->stack[$n - 1]; + } + } + + $this->inBody($token); + } + } + + private function inCaption($token) { + /* An end tag whose tag name is "caption" */ + if($token['type'] === HTML5::ENDTAG && $token['name'] === 'caption') { + /* If the stack of open elements does not have an element in table + scope with the same tag name as the token, this is a parse error. + Ignore the token. (innerHTML case) */ + if(!$this->elementInScope($token['name'], true)) { + // Ignore + + /* Otherwise: */ + } else { + /* Generate implied end tags. */ + $this->generateImpliedEndTags(); + + /* Now, if the current node is not a caption element, then this + is a parse error. */ + // w/e + + /* Pop elements from this stack until a caption element has + been popped from the stack. */ + while(true) { + $node = end($this->stack)->nodeName; + array_pop($this->stack); + + if($node === 'caption') { + break; + } + } + + /* Clear the list of active formatting elements up to the last + marker. */ + $this->clearTheActiveFormattingElementsUpToTheLastMarker(); + + /* Switch the insertion mode to "in table". */ + $this->mode = self::IN_TABLE; + } + + /* A start tag whose tag name is one of: "caption", "col", "colgroup", + "tbody", "td", "tfoot", "th", "thead", "tr", or an end tag whose tag + name is "table" */ + } elseif(($token['type'] === HTML5::STARTTAG && in_array($token['name'], + array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th', + 'thead', 'tr'))) || ($token['type'] === HTML5::ENDTAG && + $token['name'] === 'table')) { + /* Parse error. Act as if an end tag with the tag name "caption" + had been seen, then, if that token wasn't ignored, reprocess the + current token. */ + $this->inCaption(array( + 'name' => 'caption', + 'type' => HTML5::ENDTAG + )); + + return $this->inTable($token); + + /* An end tag whose tag name is one of: "body", "col", "colgroup", + "html", "tbody", "td", "tfoot", "th", "thead", "tr" */ + } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], + array('body', 'col', 'colgroup', 'html', 'tbody', 'tfoot', 'th', + 'thead', 'tr'))) { + // Parse error. Ignore the token. + + /* Anything else */ + } else { + /* Process the token as if the insertion mode was "in body". */ + $this->inBody($token); + } + } + + private function inColumnGroup($token) { + /* A character token that is one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), + or U+0020 SPACE */ + if($token['type'] === HTML5::CHARACTR && + preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { + /* Append the character to the current node. */ + $text = $this->dom->createTextNode($token['data']); + end($this->stack)->appendChild($text); + + /* A comment token */ + } elseif($token['type'] === HTML5::COMMENT) { + /* Append a Comment node to the current node with the data + attribute set to the data given in the comment token. */ + $comment = $this->dom->createComment($token['data']); + end($this->stack)->appendChild($comment); + + /* A start tag whose tag name is "col" */ + } elseif($token['type'] === HTML5::STARTTAG && $token['name'] === 'col') { + /* Insert a col element for the token. Immediately pop the current + node off the stack of open elements. */ + $this->insertElement($token); + array_pop($this->stack); + + /* An end tag whose tag name is "colgroup" */ + } elseif($token['type'] === HTML5::ENDTAG && + $token['name'] === 'colgroup') { + /* If the current node is the root html element, then this is a + parse error, ignore the token. (innerHTML case) */ + if(end($this->stack)->nodeName === 'html') { + // Ignore + + /* Otherwise, pop the current node (which will be a colgroup + element) from the stack of open elements. Switch the insertion + mode to "in table". */ + } else { + array_pop($this->stack); + $this->mode = self::IN_TABLE; + } + + /* An end tag whose tag name is "col" */ + } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'col') { + /* Parse error. Ignore the token. */ + + /* Anything else */ + } else { + /* Act as if an end tag with the tag name "colgroup" had been seen, + and then, if that token wasn't ignored, reprocess the current token. */ + $this->inColumnGroup(array( + 'name' => 'colgroup', + 'type' => HTML5::ENDTAG + )); + + return $this->inTable($token); + } + } + + private function inTableBody($token) { + $clear = array('tbody', 'tfoot', 'thead', 'html'); + + /* A start tag whose tag name is "tr" */ + if($token['type'] === HTML5::STARTTAG && $token['name'] === 'tr') { + /* Clear the stack back to a table body context. */ + $this->clearStackToTableContext($clear); + + /* Insert a tr element for the token, then switch the insertion + mode to "in row". */ + $this->insertElement($token); + $this->mode = self::IN_ROW; + + /* A start tag whose tag name is one of: "th", "td" */ + } elseif($token['type'] === HTML5::STARTTAG && + ($token['name'] === 'th' || $token['name'] === 'td')) { + /* Parse error. Act as if a start tag with the tag name "tr" had + been seen, then reprocess the current token. */ + $this->inTableBody(array( + 'name' => 'tr', + 'type' => HTML5::STARTTAG, + 'attr' => array() + )); + + return $this->inRow($token); + + /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */ + } elseif($token['type'] === HTML5::ENDTAG && + in_array($token['name'], array('tbody', 'tfoot', 'thead'))) { + /* If the stack of open elements does not have an element in table + scope with the same tag name as the token, this is a parse error. + Ignore the token. */ + if(!$this->elementInScope($token['name'], true)) { + // Ignore + + /* Otherwise: */ + } else { + /* Clear the stack back to a table body context. */ + $this->clearStackToTableContext($clear); + + /* Pop the current node from the stack of open elements. Switch + the insertion mode to "in table". */ + array_pop($this->stack); + $this->mode = self::IN_TABLE; + } + + /* A start tag whose tag name is one of: "caption", "col", "colgroup", + "tbody", "tfoot", "thead", or an end tag whose tag name is "table" */ + } elseif(($token['type'] === HTML5::STARTTAG && in_array($token['name'], + array('caption', 'col', 'colgroup', 'tbody', 'tfoor', 'thead'))) || + ($token['type'] === HTML5::STARTTAG && $token['name'] === 'table')) { + /* If the stack of open elements does not have a tbody, thead, or + tfoot element in table scope, this is a parse error. Ignore the + token. (innerHTML case) */ + if(!$this->elementInScope(array('tbody', 'thead', 'tfoot'), true)) { + // Ignore. + + /* Otherwise: */ + } else { + /* Clear the stack back to a table body context. */ + $this->clearStackToTableContext($clear); + + /* Act as if an end tag with the same tag name as the current + node ("tbody", "tfoot", or "thead") had been seen, then + reprocess the current token. */ + $this->inTableBody(array( + 'name' => end($this->stack)->nodeName, + 'type' => HTML5::ENDTAG + )); + + return $this->mainPhase($token); + } + + /* An end tag whose tag name is one of: "body", "caption", "col", + "colgroup", "html", "td", "th", "tr" */ + } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], + array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr'))) { + /* Parse error. Ignore the token. */ + + /* Anything else */ + } else { + /* Process the token as if the insertion mode was "in table". */ + $this->inTable($token); + } + } + + private function inRow($token) { + $clear = array('tr', 'html'); + + /* A start tag whose tag name is one of: "th", "td" */ + if($token['type'] === HTML5::STARTTAG && + ($token['name'] === 'th' || $token['name'] === 'td')) { + /* Clear the stack back to a table row context. */ + $this->clearStackToTableContext($clear); + + /* Insert an HTML element for the token, then switch the insertion + mode to "in cell". */ + $this->insertElement($token); + $this->mode = self::IN_CELL; + + /* Insert a marker at the end of the list of active formatting + elements. */ + $this->a_formatting[] = self::MARKER; + + /* An end tag whose tag name is "tr" */ + } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'tr') { + /* If the stack of open elements does not have an element in table + scope with the same tag name as the token, this is a parse error. + Ignore the token. (innerHTML case) */ + if(!$this->elementInScope($token['name'], true)) { + // Ignore. + + /* Otherwise: */ + } else { + /* Clear the stack back to a table row context. */ + $this->clearStackToTableContext($clear); + + /* Pop the current node (which will be a tr element) from the + stack of open elements. Switch the insertion mode to "in table + body". */ + array_pop($this->stack); + $this->mode = self::IN_TBODY; + } + + /* A start tag whose tag name is one of: "caption", "col", "colgroup", + "tbody", "tfoot", "thead", "tr" or an end tag whose tag name is "table" */ + } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], + array('caption', 'col', 'colgroup', 'tbody', 'tfoot', 'thead', 'tr'))) { + /* Act as if an end tag with the tag name "tr" had been seen, then, + if that token wasn't ignored, reprocess the current token. */ + $this->inRow(array( + 'name' => 'tr', + 'type' => HTML5::ENDTAG + )); + + return $this->inCell($token); + + /* An end tag whose tag name is one of: "tbody", "tfoot", "thead" */ + } elseif($token['type'] === HTML5::ENDTAG && + in_array($token['name'], array('tbody', 'tfoot', 'thead'))) { + /* If the stack of open elements does not have an element in table + scope with the same tag name as the token, this is a parse error. + Ignore the token. */ + if(!$this->elementInScope($token['name'], true)) { + // Ignore. + + /* Otherwise: */ + } else { + /* Otherwise, act as if an end tag with the tag name "tr" had + been seen, then reprocess the current token. */ + $this->inRow(array( + 'name' => 'tr', + 'type' => HTML5::ENDTAG + )); + + return $this->inCell($token); + } + + /* An end tag whose tag name is one of: "body", "caption", "col", + "colgroup", "html", "td", "th" */ + } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], + array('body', 'caption', 'col', 'colgroup', 'html', 'td', 'th', 'tr'))) { + /* Parse error. Ignore the token. */ + + /* Anything else */ + } else { + /* Process the token as if the insertion mode was "in table". */ + $this->inTable($token); + } + } + + private function inCell($token) { + /* An end tag whose tag name is one of: "td", "th" */ + if($token['type'] === HTML5::ENDTAG && + ($token['name'] === 'td' || $token['name'] === 'th')) { + /* If the stack of open elements does not have an element in table + scope with the same tag name as that of the token, then this is a + parse error and the token must be ignored. */ + if(!$this->elementInScope($token['name'], true)) { + // Ignore. + + /* Otherwise: */ + } else { + /* Generate implied end tags, except for elements with the same + tag name as the token. */ + $this->generateImpliedEndTags(array($token['name'])); + + /* Now, if the current node is not an element with the same tag + name as the token, then this is a parse error. */ + // k + + /* Pop elements from this stack until an element with the same + tag name as the token has been popped from the stack. */ + while(true) { + $node = end($this->stack)->nodeName; + array_pop($this->stack); + + if($node === $token['name']) { + break; + } + } + + /* Clear the list of active formatting elements up to the last + marker. */ + $this->clearTheActiveFormattingElementsUpToTheLastMarker(); + + /* Switch the insertion mode to "in row". (The current node + will be a tr element at this point.) */ + $this->mode = self::IN_ROW; + } + + /* A start tag whose tag name is one of: "caption", "col", "colgroup", + "tbody", "td", "tfoot", "th", "thead", "tr" */ + } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], + array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th', + 'thead', 'tr'))) { + /* If the stack of open elements does not have a td or th element + in table scope, then this is a parse error; ignore the token. + (innerHTML case) */ + if(!$this->elementInScope(array('td', 'th'), true)) { + // Ignore. + + /* Otherwise, close the cell (see below) and reprocess the current + token. */ + } else { + $this->closeCell(); + return $this->inRow($token); + } + + /* A start tag whose tag name is one of: "caption", "col", "colgroup", + "tbody", "td", "tfoot", "th", "thead", "tr" */ + } elseif($token['type'] === HTML5::STARTTAG && in_array($token['name'], + array('caption', 'col', 'colgroup', 'tbody', 'td', 'tfoot', 'th', + 'thead', 'tr'))) { + /* If the stack of open elements does not have a td or th element + in table scope, then this is a parse error; ignore the token. + (innerHTML case) */ + if(!$this->elementInScope(array('td', 'th'), true)) { + // Ignore. + + /* Otherwise, close the cell (see below) and reprocess the current + token. */ + } else { + $this->closeCell(); + return $this->inRow($token); + } + + /* An end tag whose tag name is one of: "body", "caption", "col", + "colgroup", "html" */ + } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], + array('body', 'caption', 'col', 'colgroup', 'html'))) { + /* Parse error. Ignore the token. */ + + /* An end tag whose tag name is one of: "table", "tbody", "tfoot", + "thead", "tr" */ + } elseif($token['type'] === HTML5::ENDTAG && in_array($token['name'], + array('table', 'tbody', 'tfoot', 'thead', 'tr'))) { + /* If the stack of open elements does not have an element in table + scope with the same tag name as that of the token (which can only + happen for "tbody", "tfoot" and "thead", or, in the innerHTML case), + then this is a parse error and the token must be ignored. */ + if(!$this->elementInScope($token['name'], true)) { + // Ignore. + + /* Otherwise, close the cell (see below) and reprocess the current + token. */ + } else { + $this->closeCell(); + return $this->inRow($token); + } + + /* Anything else */ + } else { + /* Process the token as if the insertion mode was "in body". */ + $this->inBody($token); + } + } + + private function inSelect($token) { + /* Handle the token as follows: */ + + /* A character token */ + if($token['type'] === HTML5::CHARACTR) { + /* Append the token's character to the current node. */ + $this->insertText($token['data']); + + /* A comment token */ + } elseif($token['type'] === HTML5::COMMENT) { + /* Append a Comment node to the current node with the data + attribute set to the data given in the comment token. */ + $this->insertComment($token['data']); + + /* A start tag token whose tag name is "option" */ + } elseif($token['type'] === HTML5::STARTTAG && + $token['name'] === 'option') { + /* If the current node is an option element, act as if an end tag + with the tag name "option" had been seen. */ + if(end($this->stack)->nodeName === 'option') { + $this->inSelect(array( + 'name' => 'option', + 'type' => HTML5::ENDTAG + )); + } + + /* Insert an HTML element for the token. */ + $this->insertElement($token); + + /* A start tag token whose tag name is "optgroup" */ + } elseif($token['type'] === HTML5::STARTTAG && + $token['name'] === 'optgroup') { + /* If the current node is an option element, act as if an end tag + with the tag name "option" had been seen. */ + if(end($this->stack)->nodeName === 'option') { + $this->inSelect(array( + 'name' => 'option', + 'type' => HTML5::ENDTAG + )); + } + + /* If the current node is an optgroup element, act as if an end tag + with the tag name "optgroup" had been seen. */ + if(end($this->stack)->nodeName === 'optgroup') { + $this->inSelect(array( + 'name' => 'optgroup', + 'type' => HTML5::ENDTAG + )); + } + + /* Insert an HTML element for the token. */ + $this->insertElement($token); + + /* An end tag token whose tag name is "optgroup" */ + } elseif($token['type'] === HTML5::ENDTAG && + $token['name'] === 'optgroup') { + /* First, if the current node is an option element, and the node + immediately before it in the stack of open elements is an optgroup + element, then act as if an end tag with the tag name "option" had + been seen. */ + $elements_in_stack = count($this->stack); + + if($this->stack[$elements_in_stack - 1]->nodeName === 'option' && + $this->stack[$elements_in_stack - 2]->nodeName === 'optgroup') { + $this->inSelect(array( + 'name' => 'option', + 'type' => HTML5::ENDTAG + )); + } + + /* If the current node is an optgroup element, then pop that node + from the stack of open elements. Otherwise, this is a parse error, + ignore the token. */ + if($this->stack[$elements_in_stack - 1] === 'optgroup') { + array_pop($this->stack); + } + + /* An end tag token whose tag name is "option" */ + } elseif($token['type'] === HTML5::ENDTAG && + $token['name'] === 'option') { + /* If the current node is an option element, then pop that node + from the stack of open elements. Otherwise, this is a parse error, + ignore the token. */ + if(end($this->stack)->nodeName === 'option') { + array_pop($this->stack); + } + + /* An end tag whose tag name is "select" */ + } elseif($token['type'] === HTML5::ENDTAG && + $token['name'] === 'select') { + /* If the stack of open elements does not have an element in table + scope with the same tag name as the token, this is a parse error. + Ignore the token. (innerHTML case) */ + if(!$this->elementInScope($token['name'], true)) { + // w/e + + /* Otherwise: */ + } else { + /* Pop elements from the stack of open elements until a select + element has been popped from the stack. */ + while(true) { + $current = end($this->stack)->nodeName; + array_pop($this->stack); + + if($current === 'select') { + break; + } + } + + /* Reset the insertion mode appropriately. */ + $this->resetInsertionMode(); + } + + /* A start tag whose tag name is "select" */ + } elseif($token['name'] === 'select' && + $token['type'] === HTML5::STARTTAG) { + /* Parse error. Act as if the token had been an end tag with the + tag name "select" instead. */ + $this->inSelect(array( + 'name' => 'select', + 'type' => HTML5::ENDTAG + )); + + /* An end tag whose tag name is one of: "caption", "table", "tbody", + "tfoot", "thead", "tr", "td", "th" */ + } elseif(in_array($token['name'], array('caption', 'table', 'tbody', + 'tfoot', 'thead', 'tr', 'td', 'th')) && $token['type'] === HTML5::ENDTAG) { + /* Parse error. */ + // w/e + + /* If the stack of open elements has an element in table scope with + the same tag name as that of the token, then act as if an end tag + with the tag name "select" had been seen, and reprocess the token. + Otherwise, ignore the token. */ + if($this->elementInScope($token['name'], true)) { + $this->inSelect(array( + 'name' => 'select', + 'type' => HTML5::ENDTAG + )); + + $this->mainPhase($token); + } + + /* Anything else */ + } else { + /* Parse error. Ignore the token. */ + } + } + + private function afterBody($token) { + /* Handle the token as follows: */ + + /* A character token that is one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), + or U+0020 SPACE */ + if($token['type'] === HTML5::CHARACTR && + preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { + /* Process the token as it would be processed if the insertion mode + was "in body". */ + $this->inBody($token); + + /* A comment token */ + } elseif($token['type'] === HTML5::COMMENT) { + /* Append a Comment node to the first element in the stack of open + elements (the html element), with the data attribute set to the + data given in the comment token. */ + $comment = $this->dom->createComment($token['data']); + $this->stack[0]->appendChild($comment); + + /* An end tag with the tag name "html" */ + } elseif($token['type'] === HTML5::ENDTAG && $token['name'] === 'html') { + /* If the parser was originally created in order to handle the + setting of an element's innerHTML attribute, this is a parse error; + ignore the token. (The element will be an html element in this + case.) (innerHTML case) */ + + /* Otherwise, switch to the trailing end phase. */ + $this->phase = self::END_PHASE; + + /* Anything else */ + } else { + /* Parse error. Set the insertion mode to "in body" and reprocess + the token. */ + $this->mode = self::IN_BODY; + return $this->inBody($token); + } + } + + private function inFrameset($token) { + /* Handle the token as follows: */ + + /* A character token that is one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), + U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */ + if($token['type'] === HTML5::CHARACTR && + preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { + /* Append the character to the current node. */ + $this->insertText($token['data']); + + /* A comment token */ + } elseif($token['type'] === HTML5::COMMENT) { + /* Append a Comment node to the current node with the data + attribute set to the data given in the comment token. */ + $this->insertComment($token['data']); + + /* A start tag with the tag name "frameset" */ + } elseif($token['name'] === 'frameset' && + $token['type'] === HTML5::STARTTAG) { + $this->insertElement($token); + + /* An end tag with the tag name "frameset" */ + } elseif($token['name'] === 'frameset' && + $token['type'] === HTML5::ENDTAG) { + /* If the current node is the root html element, then this is a + parse error; ignore the token. (innerHTML case) */ + if(end($this->stack)->nodeName === 'html') { + // Ignore + + } else { + /* Otherwise, pop the current node from the stack of open + elements. */ + array_pop($this->stack); + + /* If the parser was not originally created in order to handle + the setting of an element's innerHTML attribute (innerHTML case), + and the current node is no longer a frameset element, then change + the insertion mode to "after frameset". */ + $this->mode = self::AFTR_FRAME; + } + + /* A start tag with the tag name "frame" */ + } elseif($token['name'] === 'frame' && + $token['type'] === HTML5::STARTTAG) { + /* Insert an HTML element for the token. */ + $this->insertElement($token); + + /* Immediately pop the current node off the stack of open elements. */ + array_pop($this->stack); + + /* A start tag with the tag name "noframes" */ + } elseif($token['name'] === 'noframes' && + $token['type'] === HTML5::STARTTAG) { + /* Process the token as if the insertion mode had been "in body". */ + $this->inBody($token); + + /* Anything else */ + } else { + /* Parse error. Ignore the token. */ + } + } + + private function afterFrameset($token) { + /* Handle the token as follows: */ + + /* A character token that is one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), + U+000D CARRIAGE RETURN (CR), or U+0020 SPACE */ + if($token['type'] === HTML5::CHARACTR && + preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { + /* Append the character to the current node. */ + $this->insertText($token['data']); + + /* A comment token */ + } elseif($token['type'] === HTML5::COMMENT) { + /* Append a Comment node to the current node with the data + attribute set to the data given in the comment token. */ + $this->insertComment($token['data']); + + /* An end tag with the tag name "html" */ + } elseif($token['name'] === 'html' && + $token['type'] === HTML5::ENDTAG) { + /* Switch to the trailing end phase. */ + $this->phase = self::END_PHASE; + + /* A start tag with the tag name "noframes" */ + } elseif($token['name'] === 'noframes' && + $token['type'] === HTML5::STARTTAG) { + /* Process the token as if the insertion mode had been "in body". */ + $this->inBody($token); + + /* Anything else */ + } else { + /* Parse error. Ignore the token. */ + } + } + + private function trailingEndPhase($token) { + /* After the main phase, as each token is emitted from the tokenisation + stage, it must be processed as described in this section. */ + + /* A DOCTYPE token */ + if($token['type'] === HTML5::DOCTYPE) { + // Parse error. Ignore the token. + + /* A comment token */ + } elseif($token['type'] === HTML5::COMMENT) { + /* Append a Comment node to the Document object with the data + attribute set to the data given in the comment token. */ + $comment = $this->dom->createComment($token['data']); + $this->dom->appendChild($comment); + + /* A character token that is one of one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), + or U+0020 SPACE */ + } elseif($token['type'] === HTML5::CHARACTR && + preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) { + /* Process the token as it would be processed in the main phase. */ + $this->mainPhase($token); + + /* A character token that is not one of U+0009 CHARACTER TABULATION, + U+000A LINE FEED (LF), U+000B LINE TABULATION, U+000C FORM FEED (FF), + or U+0020 SPACE. Or a start tag token. Or an end tag token. */ + } elseif(($token['type'] === HTML5::CHARACTR && + preg_match('/^[\t\n\x0b\x0c ]+$/', $token['data'])) || + $token['type'] === HTML5::STARTTAG || $token['type'] === HTML5::ENDTAG) { + /* Parse error. Switch back to the main phase and reprocess the + token. */ + $this->phase = self::MAIN_PHASE; + return $this->mainPhase($token); + + /* An end-of-file token */ + } elseif($token['type'] === HTML5::EOF) { + /* OMG DONE!! */ + } + } + + private function insertElement($token, $append = true, $check = false) { + // Proprietary workaround for libxml2's limitations with tag names + if ($check) { + // Slightly modified HTML5 tag-name modification, + // removing anything that's not an ASCII letter, digit, or hyphen + $token['name'] = preg_replace('/[^a-z0-9-]/i', '', $token['name']); + // Remove leading hyphens and numbers + $token['name'] = ltrim($token['name'], '-0..9'); + // In theory, this should ever be needed, but just in case + if ($token['name'] === '') $token['name'] = 'span'; // arbitrary generic choice + } + + $el = $this->dom->createElement($token['name']); + + foreach($token['attr'] as $attr) { + if(!$el->hasAttribute($attr['name'])) { + $el->setAttribute($attr['name'], $attr['value']); + } + } + + $this->appendToRealParent($el); + $this->stack[] = $el; + + return $el; + } + + private function insertText($data) { + $text = $this->dom->createTextNode($data); + $this->appendToRealParent($text); + } + + private function insertComment($data) { + $comment = $this->dom->createComment($data); + $this->appendToRealParent($comment); + } + + private function appendToRealParent($node) { + if($this->foster_parent === null) { + end($this->stack)->appendChild($node); + + } elseif($this->foster_parent !== null) { + /* If the foster parent element is the parent element of the + last table element in the stack of open elements, then the new + node must be inserted immediately before the last table element + in the stack of open elements in the foster parent element; + otherwise, the new node must be appended to the foster parent + element. */ + for($n = count($this->stack) - 1; $n >= 0; $n--) { + if($this->stack[$n]->nodeName === 'table' && + $this->stack[$n]->parentNode !== null) { + $table = $this->stack[$n]; + break; + } + } + + if(isset($table) && $this->foster_parent->isSameNode($table->parentNode)) + $this->foster_parent->insertBefore($node, $table); + else + $this->foster_parent->appendChild($node); + + $this->foster_parent = null; + } + } + + private function elementInScope($el, $table = false) { + if(is_array($el)) { + foreach($el as $element) { + if($this->elementInScope($element, $table)) { + return true; + } + } + + return false; + } + + $leng = count($this->stack); + + for($n = 0; $n < $leng; $n++) { + /* 1. Initialise node to be the current node (the bottommost node of + the stack). */ + $node = $this->stack[$leng - 1 - $n]; + + if($node->tagName === $el) { + /* 2. If node is the target node, terminate in a match state. */ + return true; + + } elseif($node->tagName === 'table') { + /* 3. Otherwise, if node is a table element, terminate in a failure + state. */ + return false; + + } elseif($table === true && in_array($node->tagName, array('caption', 'td', + 'th', 'button', 'marquee', 'object'))) { + /* 4. Otherwise, if the algorithm is the "has an element in scope" + variant (rather than the "has an element in table scope" variant), + and node is one of the following, terminate in a failure state. */ + return false; + + } elseif($node === $node->ownerDocument->documentElement) { + /* 5. Otherwise, if node is an html element (root element), terminate + in a failure state. (This can only happen if the node is the topmost + node of the stack of open elements, and prevents the next step from + being invoked if there are no more elements in the stack.) */ + return false; + } + + /* Otherwise, set node to the previous entry in the stack of open + elements and return to step 2. (This will never fail, since the loop + will always terminate in the previous step if the top of the stack + is reached.) */ + } + } + + private function reconstructActiveFormattingElements() { + /* 1. If there are no entries in the list of active formatting elements, + then there is nothing to reconstruct; stop this algorithm. */ + $formatting_elements = count($this->a_formatting); + + if($formatting_elements === 0) { + return false; + } + + /* 3. Let entry be the last (most recently added) element in the list + of active formatting elements. */ + $entry = end($this->a_formatting); + + /* 2. If the last (most recently added) entry in the list of active + formatting elements is a marker, or if it is an element that is in the + stack of open elements, then there is nothing to reconstruct; stop this + algorithm. */ + if($entry === self::MARKER || in_array($entry, $this->stack, true)) { + return false; + } + + for($a = $formatting_elements - 1; $a >= 0; true) { + /* 4. If there are no entries before entry in the list of active + formatting elements, then jump to step 8. */ + if($a === 0) { + $step_seven = false; + break; + } + + /* 5. Let entry be the entry one earlier than entry in the list of + active formatting elements. */ + $a--; + $entry = $this->a_formatting[$a]; + + /* 6. If entry is neither a marker nor an element that is also in + thetack of open elements, go to step 4. */ + if($entry === self::MARKER || in_array($entry, $this->stack, true)) { + break; + } + } + + while(true) { + /* 7. Let entry be the element one later than entry in the list of + active formatting elements. */ + if(isset($step_seven) && $step_seven === true) { + $a++; + $entry = $this->a_formatting[$a]; + } + + /* 8. Perform a shallow clone of the element entry to obtain clone. */ + $clone = $entry->cloneNode(); + + /* 9. Append clone to the current node and push it onto the stack + of open elements so that it is the new current node. */ + end($this->stack)->appendChild($clone); + $this->stack[] = $clone; + + /* 10. Replace the entry for entry in the list with an entry for + clone. */ + $this->a_formatting[$a] = $clone; + + /* 11. If the entry for clone in the list of active formatting + elements is not the last entry in the list, return to step 7. */ + if(end($this->a_formatting) !== $clone) { + $step_seven = true; + } else { + break; + } + } + } + + private function clearTheActiveFormattingElementsUpToTheLastMarker() { + /* When the steps below require the UA to clear the list of active + formatting elements up to the last marker, the UA must perform the + following steps: */ + + while(true) { + /* 1. Let entry be the last (most recently added) entry in the list + of active formatting elements. */ + $entry = end($this->a_formatting); + + /* 2. Remove entry from the list of active formatting elements. */ + array_pop($this->a_formatting); + + /* 3. If entry was a marker, then stop the algorithm at this point. + The list has been cleared up to the last marker. */ + if($entry === self::MARKER) { + break; + } + } + } + + private function generateImpliedEndTags($exclude = array()) { + /* When the steps below require the UA to generate implied end tags, + then, if the current node is a dd element, a dt element, an li element, + a p element, a td element, a th element, or a tr element, the UA must + act as if an end tag with the respective tag name had been seen and + then generate implied end tags again. */ + $node = end($this->stack); + $elements = array_diff(array('dd', 'dt', 'li', 'p', 'td', 'th', 'tr'), $exclude); + + while(in_array(end($this->stack)->nodeName, $elements)) { + array_pop($this->stack); + } + } + + private function getElementCategory($node) { + $name = $node->tagName; + if(in_array($name, $this->special)) + return self::SPECIAL; + + elseif(in_array($name, $this->scoping)) + return self::SCOPING; + + elseif(in_array($name, $this->formatting)) + return self::FORMATTING; + + else + return self::PHRASING; + } + + private function clearStackToTableContext($elements) { + /* When the steps above require the UA to clear the stack back to a + table context, it means that the UA must, while the current node is not + a table element or an html element, pop elements from the stack of open + elements. If this causes any elements to be popped from the stack, then + this is a parse error. */ + while(true) { + $node = end($this->stack)->nodeName; + + if(in_array($node, $elements)) { + break; + } else { + array_pop($this->stack); + } + } + } + + private function resetInsertionMode() { + /* 1. Let last be false. */ + $last = false; + $leng = count($this->stack); + + for($n = $leng - 1; $n >= 0; $n--) { + /* 2. Let node be the last node in the stack of open elements. */ + $node = $this->stack[$n]; + + /* 3. If node is the first node in the stack of open elements, then + set last to true. If the element whose innerHTML attribute is being + set is neither a td element nor a th element, then set node to the + element whose innerHTML attribute is being set. (innerHTML case) */ + if($this->stack[0]->isSameNode($node)) { + $last = true; + } + + /* 4. If node is a select element, then switch the insertion mode to + "in select" and abort these steps. (innerHTML case) */ + if($node->nodeName === 'select') { + $this->mode = self::IN_SELECT; + break; + + /* 5. If node is a td or th element, then switch the insertion mode + to "in cell" and abort these steps. */ + } elseif($node->nodeName === 'td' || $node->nodeName === 'th') { + $this->mode = self::IN_CELL; + break; + + /* 6. If node is a tr element, then switch the insertion mode to + "in row" and abort these steps. */ + } elseif($node->nodeName === 'tr') { + $this->mode = self::IN_ROW; + break; + + /* 7. If node is a tbody, thead, or tfoot element, then switch the + insertion mode to "in table body" and abort these steps. */ + } elseif(in_array($node->nodeName, array('tbody', 'thead', 'tfoot'))) { + $this->mode = self::IN_TBODY; + break; + + /* 8. If node is a caption element, then switch the insertion mode + to "in caption" and abort these steps. */ + } elseif($node->nodeName === 'caption') { + $this->mode = self::IN_CAPTION; + break; + + /* 9. If node is a colgroup element, then switch the insertion mode + to "in column group" and abort these steps. (innerHTML case) */ + } elseif($node->nodeName === 'colgroup') { + $this->mode = self::IN_CGROUP; + break; + + /* 10. If node is a table element, then switch the insertion mode + to "in table" and abort these steps. */ + } elseif($node->nodeName === 'table') { + $this->mode = self::IN_TABLE; + break; + + /* 11. If node is a head element, then switch the insertion mode + to "in body" ("in body"! not "in head"!) and abort these steps. + (innerHTML case) */ + } elseif($node->nodeName === 'head') { + $this->mode = self::IN_BODY; + break; + + /* 12. If node is a body element, then switch the insertion mode to + "in body" and abort these steps. */ + } elseif($node->nodeName === 'body') { + $this->mode = self::IN_BODY; + break; + + /* 13. If node is a frameset element, then switch the insertion + mode to "in frameset" and abort these steps. (innerHTML case) */ + } elseif($node->nodeName === 'frameset') { + $this->mode = self::IN_FRAME; + break; + + /* 14. If node is an html element, then: if the head element + pointer is null, switch the insertion mode to "before head", + otherwise, switch the insertion mode to "after head". In either + case, abort these steps. (innerHTML case) */ + } elseif($node->nodeName === 'html') { + $this->mode = ($this->head_pointer === null) + ? self::BEFOR_HEAD + : self::AFTER_HEAD; + + break; + + /* 15. If last is true, then set the insertion mode to "in body" + and abort these steps. (innerHTML case) */ + } elseif($last) { + $this->mode = self::IN_BODY; + break; + } + } + } + + private function closeCell() { + /* If the stack of open elements has a td or th element in table scope, + then act as if an end tag token with that tag name had been seen. */ + foreach(array('td', 'th') as $cell) { + if($this->elementInScope($cell, true)) { + $this->inCell(array( + 'name' => $cell, + 'type' => HTML5::ENDTAG + )); + + break; + } + } + } + + public function save() { + return $this->dom; + } +} +?> diff --git a/extlib/HTMLPurifier/HTMLPurifier/PercentEncoder.php b/extlib/HTMLPurifier/HTMLPurifier/PercentEncoder.php new file mode 100644 index 0000000000..a43c44f4c5 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/PercentEncoder.php @@ -0,0 +1,98 @@ +preserve[$i] = true; // digits + for ($i = 65; $i <= 90; $i++) $this->preserve[$i] = true; // upper-case + for ($i = 97; $i <= 122; $i++) $this->preserve[$i] = true; // lower-case + $this->preserve[45] = true; // Dash - + $this->preserve[46] = true; // Period . + $this->preserve[95] = true; // Underscore _ + $this->preserve[126]= true; // Tilde ~ + + // extra letters not to escape + if ($preserve !== false) { + for ($i = 0, $c = strlen($preserve); $i < $c; $i++) { + $this->preserve[ord($preserve[$i])] = true; + } + } + } + + /** + * Our replacement for urlencode, it encodes all non-reserved characters, + * as well as any extra characters that were instructed to be preserved. + * @note + * Assumes that the string has already been normalized, making any + * and all percent escape sequences valid. Percents will not be + * re-escaped, regardless of their status in $preserve + * @param $string String to be encoded + * @return Encoded string. + */ + public function encode($string) { + $ret = ''; + for ($i = 0, $c = strlen($string); $i < $c; $i++) { + if ($string[$i] !== '%' && !isset($this->preserve[$int = ord($string[$i])]) ) { + $ret .= '%' . sprintf('%02X', $int); + } else { + $ret .= $string[$i]; + } + } + return $ret; + } + + /** + * Fix up percent-encoding by decoding unreserved characters and normalizing. + * @warning This function is affected by $preserve, even though the + * usual desired behavior is for this not to preserve those + * characters. Be careful when reusing instances of PercentEncoder! + * @param $string String to normalize + */ + public function normalize($string) { + if ($string == '') return ''; + $parts = explode('%', $string); + $ret = array_shift($parts); + foreach ($parts as $part) { + $length = strlen($part); + if ($length < 2) { + $ret .= '%25' . $part; + continue; + } + $encoding = substr($part, 0, 2); + $text = substr($part, 2); + if (!ctype_xdigit($encoding)) { + $ret .= '%25' . $part; + continue; + } + $int = hexdec($encoding); + if (isset($this->preserve[$int])) { + $ret .= chr($int) . $text; + continue; + } + $encoding = strtoupper($encoding); + $ret .= '%' . $encoding . $text; + } + return $ret; + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/Printer.php b/extlib/HTMLPurifier/HTMLPurifier/Printer.php new file mode 100644 index 0000000000..e7eb82e83e --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/Printer.php @@ -0,0 +1,176 @@ +getAll(); + $context = new HTMLPurifier_Context(); + $this->generator = new HTMLPurifier_Generator($config, $context); + } + + /** + * Main function that renders object or aspect of that object + * @note Parameters vary depending on printer + */ + // function render() {} + + /** + * Returns a start tag + * @param $tag Tag name + * @param $attr Attribute array + */ + protected function start($tag, $attr = array()) { + return $this->generator->generateFromToken( + new HTMLPurifier_Token_Start($tag, $attr ? $attr : array()) + ); + } + + /** + * Returns an end teg + * @param $tag Tag name + */ + protected function end($tag) { + return $this->generator->generateFromToken( + new HTMLPurifier_Token_End($tag) + ); + } + + /** + * Prints a complete element with content inside + * @param $tag Tag name + * @param $contents Element contents + * @param $attr Tag attributes + * @param $escape Bool whether or not to escape contents + */ + protected function element($tag, $contents, $attr = array(), $escape = true) { + return $this->start($tag, $attr) . + ($escape ? $this->escape($contents) : $contents) . + $this->end($tag); + } + + protected function elementEmpty($tag, $attr = array()) { + return $this->generator->generateFromToken( + new HTMLPurifier_Token_Empty($tag, $attr) + ); + } + + protected function text($text) { + return $this->generator->generateFromToken( + new HTMLPurifier_Token_Text($text) + ); + } + + /** + * Prints a simple key/value row in a table. + * @param $name Key + * @param $value Value + */ + protected function row($name, $value) { + if (is_bool($value)) $value = $value ? 'On' : 'Off'; + return + $this->start('tr') . "\n" . + $this->element('th', $name) . "\n" . + $this->element('td', $value) . "\n" . + $this->end('tr') + ; + } + + /** + * Escapes a string for HTML output. + * @param $string String to escape + */ + protected function escape($string) { + $string = HTMLPurifier_Encoder::cleanUTF8($string); + $string = htmlspecialchars($string, ENT_COMPAT, 'UTF-8'); + return $string; + } + + /** + * Takes a list of strings and turns them into a single list + * @param $array List of strings + * @param $polite Bool whether or not to add an end before the last + */ + protected function listify($array, $polite = false) { + if (empty($array)) return 'None'; + $ret = ''; + $i = count($array); + foreach ($array as $value) { + $i--; + $ret .= $value; + if ($i > 0 && !($polite && $i == 1)) $ret .= ', '; + if ($polite && $i == 1) $ret .= 'and '; + } + return $ret; + } + + /** + * Retrieves the class of an object without prefixes, as well as metadata + * @param $obj Object to determine class of + * @param $prefix Further prefix to remove + */ + protected function getClass($obj, $sec_prefix = '') { + static $five = null; + if ($five === null) $five = version_compare(PHP_VERSION, '5', '>='); + $prefix = 'HTMLPurifier_' . $sec_prefix; + if (!$five) $prefix = strtolower($prefix); + $class = str_replace($prefix, '', get_class($obj)); + $lclass = strtolower($class); + $class .= '('; + switch ($lclass) { + case 'enum': + $values = array(); + foreach ($obj->valid_values as $value => $bool) { + $values[] = $value; + } + $class .= implode(', ', $values); + break; + case 'css_composite': + $values = array(); + foreach ($obj->defs as $def) { + $values[] = $this->getClass($def, $sec_prefix); + } + $class .= implode(', ', $values); + break; + case 'css_multiple': + $class .= $this->getClass($obj->single, $sec_prefix) . ', '; + $class .= $obj->max; + break; + case 'css_denyelementdecorator': + $class .= $this->getClass($obj->def, $sec_prefix) . ', '; + $class .= $obj->element; + break; + case 'css_importantdecorator': + $class .= $this->getClass($obj->def, $sec_prefix); + if ($obj->allow) $class .= ', !important'; + break; + } + $class .= ')'; + return $class; + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/Printer/CSSDefinition.php b/extlib/HTMLPurifier/HTMLPurifier/Printer/CSSDefinition.php new file mode 100644 index 0000000000..81f9865901 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/Printer/CSSDefinition.php @@ -0,0 +1,38 @@ +def = $config->getCSSDefinition(); + $ret = ''; + + $ret .= $this->start('div', array('class' => 'HTMLPurifier_Printer')); + $ret .= $this->start('table'); + + $ret .= $this->element('caption', 'Properties ($info)'); + + $ret .= $this->start('thead'); + $ret .= $this->start('tr'); + $ret .= $this->element('th', 'Property', array('class' => 'heavy')); + $ret .= $this->element('th', 'Definition', array('class' => 'heavy', 'style' => 'width:auto;')); + $ret .= $this->end('tr'); + $ret .= $this->end('thead'); + + ksort($this->def->info); + foreach ($this->def->info as $property => $obj) { + $name = $this->getClass($obj, 'AttrDef_'); + $ret .= $this->row($property, $name); + } + + $ret .= $this->end('table'); + $ret .= $this->end('div'); + + return $ret; + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/Printer/ConfigForm.css b/extlib/HTMLPurifier/HTMLPurifier/Printer/ConfigForm.css new file mode 100644 index 0000000000..3ff1a88aa4 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/Printer/ConfigForm.css @@ -0,0 +1,10 @@ + +.hp-config {} + +.hp-config tbody th {text-align:right; padding-right:0.5em;} +.hp-config thead, .hp-config .namespace {background:#3C578C; color:#FFF;} +.hp-config .namespace th {text-align:center;} +.hp-config .verbose {display:none;} +.hp-config .controls {text-align:center;} + +/* vim: et sw=4 sts=4 */ diff --git a/extlib/HTMLPurifier/HTMLPurifier/Printer/ConfigForm.js b/extlib/HTMLPurifier/HTMLPurifier/Printer/ConfigForm.js new file mode 100644 index 0000000000..cba00c9b80 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/Printer/ConfigForm.js @@ -0,0 +1,5 @@ +function toggleWriteability(id_of_patient, checked) { + document.getElementById(id_of_patient).disabled = checked; +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/Printer/ConfigForm.php b/extlib/HTMLPurifier/HTMLPurifier/Printer/ConfigForm.php new file mode 100644 index 0000000000..02aa656894 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/Printer/ConfigForm.php @@ -0,0 +1,368 @@ +docURL = $doc_url; + $this->name = $name; + $this->compress = $compress; + // initialize sub-printers + $this->fields[0] = new HTMLPurifier_Printer_ConfigForm_default(); + $this->fields[HTMLPurifier_VarParser::BOOL] = new HTMLPurifier_Printer_ConfigForm_bool(); + } + + /** + * Sets default column and row size for textareas in sub-printers + * @param $cols Integer columns of textarea, null to use default + * @param $rows Integer rows of textarea, null to use default + */ + public function setTextareaDimensions($cols = null, $rows = null) { + if ($cols) $this->fields['default']->cols = $cols; + if ($rows) $this->fields['default']->rows = $rows; + } + + /** + * Retrieves styling, in case it is not accessible by webserver + */ + public static function getCSS() { + return file_get_contents(HTMLPURIFIER_PREFIX . '/HTMLPurifier/Printer/ConfigForm.css'); + } + + /** + * Retrieves JavaScript, in case it is not accessible by webserver + */ + public static function getJavaScript() { + return file_get_contents(HTMLPURIFIER_PREFIX . '/HTMLPurifier/Printer/ConfigForm.js'); + } + + /** + * Returns HTML output for a configuration form + * @param $config Configuration object of current form state, or an array + * where [0] has an HTML namespace and [1] is being rendered. + * @param $allowed Optional namespace(s) and directives to restrict form to. + */ + public function render($config, $allowed = true, $render_controls = true) { + if (is_array($config) && isset($config[0])) { + $gen_config = $config[0]; + $config = $config[1]; + } else { + $gen_config = $config; + } + + $this->config = $config; + $this->genConfig = $gen_config; + $this->prepareGenerator($gen_config); + + $allowed = HTMLPurifier_Config::getAllowedDirectivesForForm($allowed, $config->def); + $all = array(); + foreach ($allowed as $key) { + list($ns, $directive) = $key; + $all[$ns][$directive] = $config->get($ns .'.'. $directive); + } + + $ret = ''; + $ret .= $this->start('table', array('class' => 'hp-config')); + $ret .= $this->start('thead'); + $ret .= $this->start('tr'); + $ret .= $this->element('th', 'Directive', array('class' => 'hp-directive')); + $ret .= $this->element('th', 'Value', array('class' => 'hp-value')); + $ret .= $this->end('tr'); + $ret .= $this->end('thead'); + foreach ($all as $ns => $directives) { + $ret .= $this->renderNamespace($ns, $directives); + } + if ($render_controls) { + $ret .= $this->start('tbody'); + $ret .= $this->start('tr'); + $ret .= $this->start('td', array('colspan' => 2, 'class' => 'controls')); + $ret .= $this->elementEmpty('input', array('type' => 'submit', 'value' => 'Submit')); + $ret .= '[Reset]'; + $ret .= $this->end('td'); + $ret .= $this->end('tr'); + $ret .= $this->end('tbody'); + } + $ret .= $this->end('table'); + return $ret; + } + + /** + * Renders a single namespace + * @param $ns String namespace name + * @param $directive Associative array of directives to values + */ + protected function renderNamespace($ns, $directives) { + $ret = ''; + $ret .= $this->start('tbody', array('class' => 'namespace')); + $ret .= $this->start('tr'); + $ret .= $this->element('th', $ns, array('colspan' => 2)); + $ret .= $this->end('tr'); + $ret .= $this->end('tbody'); + $ret .= $this->start('tbody'); + foreach ($directives as $directive => $value) { + $ret .= $this->start('tr'); + $ret .= $this->start('th'); + if ($this->docURL) { + $url = str_replace('%s', urlencode("$ns.$directive"), $this->docURL); + $ret .= $this->start('a', array('href' => $url)); + } + $attr = array('for' => "{$this->name}:$ns.$directive"); + + // crop directive name if it's too long + if (!$this->compress || (strlen($directive) < $this->compress)) { + $directive_disp = $directive; + } else { + $directive_disp = substr($directive, 0, $this->compress - 2) . '...'; + $attr['title'] = $directive; + } + + $ret .= $this->element( + 'label', + $directive_disp, + // component printers must create an element with this id + $attr + ); + if ($this->docURL) $ret .= $this->end('a'); + $ret .= $this->end('th'); + + $ret .= $this->start('td'); + $def = $this->config->def->info["$ns.$directive"]; + if (is_int($def)) { + $allow_null = $def < 0; + $type = abs($def); + } else { + $type = $def->type; + $allow_null = isset($def->allow_null); + } + if (!isset($this->fields[$type])) $type = 0; // default + $type_obj = $this->fields[$type]; + if ($allow_null) { + $type_obj = new HTMLPurifier_Printer_ConfigForm_NullDecorator($type_obj); + } + $ret .= $type_obj->render($ns, $directive, $value, $this->name, array($this->genConfig, $this->config)); + $ret .= $this->end('td'); + $ret .= $this->end('tr'); + } + $ret .= $this->end('tbody'); + return $ret; + } + +} + +/** + * Printer decorator for directives that accept null + */ +class HTMLPurifier_Printer_ConfigForm_NullDecorator extends HTMLPurifier_Printer { + /** + * Printer being decorated + */ + protected $obj; + /** + * @param $obj Printer to decorate + */ + public function __construct($obj) { + parent::__construct(); + $this->obj = $obj; + } + public function render($ns, $directive, $value, $name, $config) { + if (is_array($config) && isset($config[0])) { + $gen_config = $config[0]; + $config = $config[1]; + } else { + $gen_config = $config; + } + $this->prepareGenerator($gen_config); + + $ret = ''; + $ret .= $this->start('label', array('for' => "$name:Null_$ns.$directive")); + $ret .= $this->element('span', "$ns.$directive:", array('class' => 'verbose')); + $ret .= $this->text(' Null/Disabled'); + $ret .= $this->end('label'); + $attr = array( + 'type' => 'checkbox', + 'value' => '1', + 'class' => 'null-toggle', + 'name' => "$name"."[Null_$ns.$directive]", + 'id' => "$name:Null_$ns.$directive", + 'onclick' => "toggleWriteability('$name:$ns.$directive',checked)" // INLINE JAVASCRIPT!!!! + ); + if ($this->obj instanceof HTMLPurifier_Printer_ConfigForm_bool) { + // modify inline javascript slightly + $attr['onclick'] = "toggleWriteability('$name:Yes_$ns.$directive',checked);toggleWriteability('$name:No_$ns.$directive',checked)"; + } + if ($value === null) $attr['checked'] = 'checked'; + $ret .= $this->elementEmpty('input', $attr); + $ret .= $this->text(' or '); + $ret .= $this->elementEmpty('br'); + $ret .= $this->obj->render($ns, $directive, $value, $name, array($gen_config, $config)); + return $ret; + } +} + +/** + * Swiss-army knife configuration form field printer + */ +class HTMLPurifier_Printer_ConfigForm_default extends HTMLPurifier_Printer { + public $cols = 18; + public $rows = 5; + public function render($ns, $directive, $value, $name, $config) { + if (is_array($config) && isset($config[0])) { + $gen_config = $config[0]; + $config = $config[1]; + } else { + $gen_config = $config; + } + $this->prepareGenerator($gen_config); + // this should probably be split up a little + $ret = ''; + $def = $config->def->info["$ns.$directive"]; + if (is_int($def)) { + $type = abs($def); + } else { + $type = $def->type; + } + if (is_array($value)) { + switch ($type) { + case HTMLPurifier_VarParser::LOOKUP: + $array = $value; + $value = array(); + foreach ($array as $val => $b) { + $value[] = $val; + } + case HTMLPurifier_VarParser::ALIST: + $value = implode(PHP_EOL, $value); + break; + case HTMLPurifier_VarParser::HASH: + $nvalue = ''; + foreach ($value as $i => $v) { + $nvalue .= "$i:$v" . PHP_EOL; + } + $value = $nvalue; + break; + default: + $value = ''; + } + } + if ($type === HTMLPurifier_VarParser::MIXED) { + return 'Not supported'; + $value = serialize($value); + } + $attr = array( + 'name' => "$name"."[$ns.$directive]", + 'id' => "$name:$ns.$directive" + ); + if ($value === null) $attr['disabled'] = 'disabled'; + if (isset($def->allowed)) { + $ret .= $this->start('select', $attr); + foreach ($def->allowed as $val => $b) { + $attr = array(); + if ($value == $val) $attr['selected'] = 'selected'; + $ret .= $this->element('option', $val, $attr); + } + $ret .= $this->end('select'); + } elseif ( + $type === HTMLPurifier_VarParser::TEXT || + $type === HTMLPurifier_VarParser::ITEXT || + $type === HTMLPurifier_VarParser::ALIST || + $type === HTMLPurifier_VarParser::HASH || + $type === HTMLPurifier_VarParser::LOOKUP + ) { + $attr['cols'] = $this->cols; + $attr['rows'] = $this->rows; + $ret .= $this->start('textarea', $attr); + $ret .= $this->text($value); + $ret .= $this->end('textarea'); + } else { + $attr['value'] = $value; + $attr['type'] = 'text'; + $ret .= $this->elementEmpty('input', $attr); + } + return $ret; + } +} + +/** + * Bool form field printer + */ +class HTMLPurifier_Printer_ConfigForm_bool extends HTMLPurifier_Printer { + public function render($ns, $directive, $value, $name, $config) { + if (is_array($config) && isset($config[0])) { + $gen_config = $config[0]; + $config = $config[1]; + } else { + $gen_config = $config; + } + $this->prepareGenerator($gen_config); + $ret = ''; + $ret .= $this->start('div', array('id' => "$name:$ns.$directive")); + + $ret .= $this->start('label', array('for' => "$name:Yes_$ns.$directive")); + $ret .= $this->element('span', "$ns.$directive:", array('class' => 'verbose')); + $ret .= $this->text(' Yes'); + $ret .= $this->end('label'); + + $attr = array( + 'type' => 'radio', + 'name' => "$name"."[$ns.$directive]", + 'id' => "$name:Yes_$ns.$directive", + 'value' => '1' + ); + if ($value === true) $attr['checked'] = 'checked'; + if ($value === null) $attr['disabled'] = 'disabled'; + $ret .= $this->elementEmpty('input', $attr); + + $ret .= $this->start('label', array('for' => "$name:No_$ns.$directive")); + $ret .= $this->element('span', "$ns.$directive:", array('class' => 'verbose')); + $ret .= $this->text(' No'); + $ret .= $this->end('label'); + + $attr = array( + 'type' => 'radio', + 'name' => "$name"."[$ns.$directive]", + 'id' => "$name:No_$ns.$directive", + 'value' => '0' + ); + if ($value === false) $attr['checked'] = 'checked'; + if ($value === null) $attr['disabled'] = 'disabled'; + $ret .= $this->elementEmpty('input', $attr); + + $ret .= $this->end('div'); + + return $ret; + } +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/Printer/HTMLDefinition.php b/extlib/HTMLPurifier/HTMLPurifier/Printer/HTMLDefinition.php new file mode 100644 index 0000000000..8a8f126b81 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/Printer/HTMLDefinition.php @@ -0,0 +1,272 @@ +config =& $config; + + $this->def = $config->getHTMLDefinition(); + + $ret .= $this->start('div', array('class' => 'HTMLPurifier_Printer')); + + $ret .= $this->renderDoctype(); + $ret .= $this->renderEnvironment(); + $ret .= $this->renderContentSets(); + $ret .= $this->renderInfo(); + + $ret .= $this->end('div'); + + return $ret; + } + + /** + * Renders the Doctype table + */ + protected function renderDoctype() { + $doctype = $this->def->doctype; + $ret = ''; + $ret .= $this->start('table'); + $ret .= $this->element('caption', 'Doctype'); + $ret .= $this->row('Name', $doctype->name); + $ret .= $this->row('XML', $doctype->xml ? 'Yes' : 'No'); + $ret .= $this->row('Default Modules', implode($doctype->modules, ', ')); + $ret .= $this->row('Default Tidy Modules', implode($doctype->tidyModules, ', ')); + $ret .= $this->end('table'); + return $ret; + } + + + /** + * Renders environment table, which is miscellaneous info + */ + protected function renderEnvironment() { + $def = $this->def; + + $ret = ''; + + $ret .= $this->start('table'); + $ret .= $this->element('caption', 'Environment'); + + $ret .= $this->row('Parent of fragment', $def->info_parent); + $ret .= $this->renderChildren($def->info_parent_def->child); + $ret .= $this->row('Block wrap name', $def->info_block_wrapper); + + $ret .= $this->start('tr'); + $ret .= $this->element('th', 'Global attributes'); + $ret .= $this->element('td', $this->listifyAttr($def->info_global_attr),0,0); + $ret .= $this->end('tr'); + + $ret .= $this->start('tr'); + $ret .= $this->element('th', 'Tag transforms'); + $list = array(); + foreach ($def->info_tag_transform as $old => $new) { + $new = $this->getClass($new, 'TagTransform_'); + $list[] = "<$old> with $new"; + } + $ret .= $this->element('td', $this->listify($list)); + $ret .= $this->end('tr'); + + $ret .= $this->start('tr'); + $ret .= $this->element('th', 'Pre-AttrTransform'); + $ret .= $this->element('td', $this->listifyObjectList($def->info_attr_transform_pre)); + $ret .= $this->end('tr'); + + $ret .= $this->start('tr'); + $ret .= $this->element('th', 'Post-AttrTransform'); + $ret .= $this->element('td', $this->listifyObjectList($def->info_attr_transform_post)); + $ret .= $this->end('tr'); + + $ret .= $this->end('table'); + return $ret; + } + + /** + * Renders the Content Sets table + */ + protected function renderContentSets() { + $ret = ''; + $ret .= $this->start('table'); + $ret .= $this->element('caption', 'Content Sets'); + foreach ($this->def->info_content_sets as $name => $lookup) { + $ret .= $this->heavyHeader($name); + $ret .= $this->start('tr'); + $ret .= $this->element('td', $this->listifyTagLookup($lookup)); + $ret .= $this->end('tr'); + } + $ret .= $this->end('table'); + return $ret; + } + + /** + * Renders the Elements ($info) table + */ + protected function renderInfo() { + $ret = ''; + $ret .= $this->start('table'); + $ret .= $this->element('caption', 'Elements ($info)'); + ksort($this->def->info); + $ret .= $this->heavyHeader('Allowed tags', 2); + $ret .= $this->start('tr'); + $ret .= $this->element('td', $this->listifyTagLookup($this->def->info), array('colspan' => 2)); + $ret .= $this->end('tr'); + foreach ($this->def->info as $name => $def) { + $ret .= $this->start('tr'); + $ret .= $this->element('th', "<$name>", array('class'=>'heavy', 'colspan' => 2)); + $ret .= $this->end('tr'); + $ret .= $this->start('tr'); + $ret .= $this->element('th', 'Inline content'); + $ret .= $this->element('td', $def->descendants_are_inline ? 'Yes' : 'No'); + $ret .= $this->end('tr'); + if (!empty($def->excludes)) { + $ret .= $this->start('tr'); + $ret .= $this->element('th', 'Excludes'); + $ret .= $this->element('td', $this->listifyTagLookup($def->excludes)); + $ret .= $this->end('tr'); + } + if (!empty($def->attr_transform_pre)) { + $ret .= $this->start('tr'); + $ret .= $this->element('th', 'Pre-AttrTransform'); + $ret .= $this->element('td', $this->listifyObjectList($def->attr_transform_pre)); + $ret .= $this->end('tr'); + } + if (!empty($def->attr_transform_post)) { + $ret .= $this->start('tr'); + $ret .= $this->element('th', 'Post-AttrTransform'); + $ret .= $this->element('td', $this->listifyObjectList($def->attr_transform_post)); + $ret .= $this->end('tr'); + } + if (!empty($def->auto_close)) { + $ret .= $this->start('tr'); + $ret .= $this->element('th', 'Auto closed by'); + $ret .= $this->element('td', $this->listifyTagLookup($def->auto_close)); + $ret .= $this->end('tr'); + } + $ret .= $this->start('tr'); + $ret .= $this->element('th', 'Allowed attributes'); + $ret .= $this->element('td',$this->listifyAttr($def->attr), array(), 0); + $ret .= $this->end('tr'); + + if (!empty($def->required_attr)) { + $ret .= $this->row('Required attributes', $this->listify($def->required_attr)); + } + + $ret .= $this->renderChildren($def->child); + } + $ret .= $this->end('table'); + return $ret; + } + + /** + * Renders a row describing the allowed children of an element + * @param $def HTMLPurifier_ChildDef of pertinent element + */ + protected function renderChildren($def) { + $context = new HTMLPurifier_Context(); + $ret = ''; + $ret .= $this->start('tr'); + $elements = array(); + $attr = array(); + if (isset($def->elements)) { + if ($def->type == 'strictblockquote') { + $def->validateChildren(array(), $this->config, $context); + } + $elements = $def->elements; + } + if ($def->type == 'chameleon') { + $attr['rowspan'] = 2; + } elseif ($def->type == 'empty') { + $elements = array(); + } elseif ($def->type == 'table') { + $elements = array_flip(array('col', 'caption', 'colgroup', 'thead', + 'tfoot', 'tbody', 'tr')); + } + $ret .= $this->element('th', 'Allowed children', $attr); + + if ($def->type == 'chameleon') { + + $ret .= $this->element('td', + 'Block: ' . + $this->escape($this->listifyTagLookup($def->block->elements)),0,0); + $ret .= $this->end('tr'); + $ret .= $this->start('tr'); + $ret .= $this->element('td', + 'Inline: ' . + $this->escape($this->listifyTagLookup($def->inline->elements)),0,0); + + } elseif ($def->type == 'custom') { + + $ret .= $this->element('td', ''.ucfirst($def->type).': ' . + $def->dtd_regex); + + } else { + $ret .= $this->element('td', + ''.ucfirst($def->type).': ' . + $this->escape($this->listifyTagLookup($elements)),0,0); + } + $ret .= $this->end('tr'); + return $ret; + } + + /** + * Listifies a tag lookup table. + * @param $array Tag lookup array in form of array('tagname' => true) + */ + protected function listifyTagLookup($array) { + ksort($array); + $list = array(); + foreach ($array as $name => $discard) { + if ($name !== '#PCDATA' && !isset($this->def->info[$name])) continue; + $list[] = $name; + } + return $this->listify($list); + } + + /** + * Listifies a list of objects by retrieving class names and internal state + * @param $array List of objects + * @todo Also add information about internal state + */ + protected function listifyObjectList($array) { + ksort($array); + $list = array(); + foreach ($array as $discard => $obj) { + $list[] = $this->getClass($obj, 'AttrTransform_'); + } + return $this->listify($list); + } + + /** + * Listifies a hash of attributes to AttrDef classes + * @param $array Array hash in form of array('attrname' => HTMLPurifier_AttrDef) + */ + protected function listifyAttr($array) { + ksort($array); + $list = array(); + foreach ($array as $name => $obj) { + if ($obj === false) continue; + $list[] = "$name = " . $this->getClass($obj, 'AttrDef_') . ''; + } + return $this->listify($list); + } + + /** + * Creates a heavy header row + */ + protected function heavyHeader($text, $num = 1) { + $ret = ''; + $ret .= $this->start('tr'); + $ret .= $this->element('th', $text, array('colspan' => $num, 'class' => 'heavy')); + $ret .= $this->end('tr'); + return $ret; + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/PropertyList.php b/extlib/HTMLPurifier/HTMLPurifier/PropertyList.php new file mode 100644 index 0000000000..2b99fb7bc3 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/PropertyList.php @@ -0,0 +1,86 @@ +parent = $parent; + } + + /** + * Recursively retrieves the value for a key + */ + public function get($name) { + if ($this->has($name)) return $this->data[$name]; + // possible performance bottleneck, convert to iterative if necessary + if ($this->parent) return $this->parent->get($name); + throw new HTMLPurifier_Exception("Key '$name' not found"); + } + + /** + * Sets the value of a key, for this plist + */ + public function set($name, $value) { + $this->data[$name] = $value; + } + + /** + * Returns true if a given key exists + */ + public function has($name) { + return array_key_exists($name, $this->data); + } + + /** + * Resets a value to the value of it's parent, usually the default. If + * no value is specified, the entire plist is reset. + */ + public function reset($name = null) { + if ($name == null) $this->data = array(); + else unset($this->data[$name]); + } + + /** + * Squashes this property list and all of its property lists into a single + * array, and returns the array. This value is cached by default. + * @param $force If true, ignores the cache and regenerates the array. + */ + public function squash($force = false) { + if ($this->cache !== null && !$force) return $this->cache; + if ($this->parent) { + return $this->cache = array_merge($this->parent->squash($force), $this->data); + } else { + return $this->cache = $this->data; + } + } + + /** + * Returns the parent plist. + */ + public function getParent() { + return $this->parent; + } + + /** + * Sets the parent plist. + */ + public function setParent($plist) { + $this->parent = $plist; + } +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/PropertyListIterator.php b/extlib/HTMLPurifier/HTMLPurifier/PropertyListIterator.php new file mode 100644 index 0000000000..8f250443e4 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/PropertyListIterator.php @@ -0,0 +1,32 @@ +l = strlen($filter); + $this->filter = $filter; + } + + public function accept() { + $key = $this->getInnerIterator()->key(); + if( strncmp($key, $this->filter, $this->l) !== 0 ) { + return false; + } + return true; + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/Strategy.php b/extlib/HTMLPurifier/HTMLPurifier/Strategy.php new file mode 100644 index 0000000000..2462865210 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/Strategy.php @@ -0,0 +1,26 @@ +strategies as $strategy) { + $tokens = $strategy->execute($tokens, $config, $context); + } + return $tokens; + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/Strategy/Core.php b/extlib/HTMLPurifier/HTMLPurifier/Strategy/Core.php new file mode 100644 index 0000000000..d90e158606 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/Strategy/Core.php @@ -0,0 +1,18 @@ +strategies[] = new HTMLPurifier_Strategy_RemoveForeignElements(); + $this->strategies[] = new HTMLPurifier_Strategy_MakeWellFormed(); + $this->strategies[] = new HTMLPurifier_Strategy_FixNesting(); + $this->strategies[] = new HTMLPurifier_Strategy_ValidateAttributes(); + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/Strategy/FixNesting.php b/extlib/HTMLPurifier/HTMLPurifier/Strategy/FixNesting.php new file mode 100644 index 0000000000..f81802391b --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/Strategy/FixNesting.php @@ -0,0 +1,328 @@ +getHTMLDefinition(); + + // insert implicit "parent" node, will be removed at end. + // DEFINITION CALL + $parent_name = $definition->info_parent; + array_unshift($tokens, new HTMLPurifier_Token_Start($parent_name)); + $tokens[] = new HTMLPurifier_Token_End($parent_name); + + // setup the context variable 'IsInline', for chameleon processing + // is 'false' when we are not inline, 'true' when it must always + // be inline, and an integer when it is inline for a certain + // branch of the document tree + $is_inline = $definition->info_parent_def->descendants_are_inline; + $context->register('IsInline', $is_inline); + + // setup error collector + $e =& $context->get('ErrorCollector', true); + + //####################################################################// + // Loop initialization + + // stack that contains the indexes of all parents, + // $stack[count($stack)-1] being the current parent + $stack = array(); + + // stack that contains all elements that are excluded + // it is organized by parent elements, similar to $stack, + // but it is only populated when an element with exclusions is + // processed, i.e. there won't be empty exclusions. + $exclude_stack = array(); + + // variable that contains the start token while we are processing + // nodes. This enables error reporting to do its job + $start_token = false; + $context->register('CurrentToken', $start_token); + + //####################################################################// + // Loop + + // iterate through all start nodes. Determining the start node + // is complicated so it has been omitted from the loop construct + for ($i = 0, $size = count($tokens) ; $i < $size; ) { + + //################################################################// + // Gather information on children + + // child token accumulator + $child_tokens = array(); + + // scroll to the end of this node, report number, and collect + // all children + for ($j = $i, $depth = 0; ; $j++) { + if ($tokens[$j] instanceof HTMLPurifier_Token_Start) { + $depth++; + // skip token assignment on first iteration, this is the + // token we currently are on + if ($depth == 1) continue; + } elseif ($tokens[$j] instanceof HTMLPurifier_Token_End) { + $depth--; + // skip token assignment on last iteration, this is the + // end token of the token we're currently on + if ($depth == 0) break; + } + $child_tokens[] = $tokens[$j]; + } + + // $i is index of start token + // $j is index of end token + + $start_token = $tokens[$i]; // to make token available via CurrentToken + + //################################################################// + // Gather information on parent + + // calculate parent information + if ($count = count($stack)) { + $parent_index = $stack[$count-1]; + $parent_name = $tokens[$parent_index]->name; + if ($parent_index == 0) { + $parent_def = $definition->info_parent_def; + } else { + $parent_def = $definition->info[$parent_name]; + } + } else { + // processing as if the parent were the "root" node + // unknown info, it won't be used anyway, in the future, + // we may want to enforce one element only (this is + // necessary for HTML Purifier to clean entire documents + $parent_index = $parent_name = $parent_def = null; + } + + // calculate context + if ($is_inline === false) { + // check if conditions make it inline + if (!empty($parent_def) && $parent_def->descendants_are_inline) { + $is_inline = $count - 1; + } + } else { + // check if we're out of inline + if ($count === $is_inline) { + $is_inline = false; + } + } + + //################################################################// + // Determine whether element is explicitly excluded SGML-style + + // determine whether or not element is excluded by checking all + // parent exclusions. The array should not be very large, two + // elements at most. + $excluded = false; + if (!empty($exclude_stack)) { + foreach ($exclude_stack as $lookup) { + if (isset($lookup[$tokens[$i]->name])) { + $excluded = true; + // no need to continue processing + break; + } + } + } + + //################################################################// + // Perform child validation + + if ($excluded) { + // there is an exclusion, remove the entire node + $result = false; + $excludes = array(); // not used, but good to initialize anyway + } else { + // DEFINITION CALL + if ($i === 0) { + // special processing for the first node + $def = $definition->info_parent_def; + } else { + $def = $definition->info[$tokens[$i]->name]; + + } + + if (!empty($def->child)) { + // have DTD child def validate children + $result = $def->child->validateChildren( + $child_tokens, $config, $context); + } else { + // weird, no child definition, get rid of everything + $result = false; + } + + // determine whether or not this element has any exclusions + $excludes = $def->excludes; + } + + // $result is now a bool or array + + //################################################################// + // Process result by interpreting $result + + if ($result === true || $child_tokens === $result) { + // leave the node as is + + // register start token as a parental node start + $stack[] = $i; + + // register exclusions if there are any + if (!empty($excludes)) $exclude_stack[] = $excludes; + + // move cursor to next possible start node + $i++; + + } elseif($result === false) { + // remove entire node + + if ($e) { + if ($excluded) { + $e->send(E_ERROR, 'Strategy_FixNesting: Node excluded'); + } else { + $e->send(E_ERROR, 'Strategy_FixNesting: Node removed'); + } + } + + // calculate length of inner tokens and current tokens + $length = $j - $i + 1; + + // perform removal + array_splice($tokens, $i, $length); + + // update size + $size -= $length; + + // there is no start token to register, + // current node is now the next possible start node + // unless it turns out that we need to do a double-check + + // this is a rought heuristic that covers 100% of HTML's + // cases and 99% of all other cases. A child definition + // that would be tricked by this would be something like: + // ( | a b c) where it's all or nothing. Fortunately, + // our current implementation claims that that case would + // not allow empty, even if it did + if (!$parent_def->child->allow_empty) { + // we need to do a double-check + $i = $parent_index; + array_pop($stack); + } + + // PROJECTED OPTIMIZATION: Process all children elements before + // reprocessing parent node. + + } else { + // replace node with $result + + // calculate length of inner tokens + $length = $j - $i - 1; + + if ($e) { + if (empty($result) && $length) { + $e->send(E_ERROR, 'Strategy_FixNesting: Node contents removed'); + } else { + $e->send(E_WARNING, 'Strategy_FixNesting: Node reorganized'); + } + } + + // perform replacement + array_splice($tokens, $i + 1, $length, $result); + + // update size + $size -= $length; + $size += count($result); + + // register start token as a parental node start + $stack[] = $i; + + // register exclusions if there are any + if (!empty($excludes)) $exclude_stack[] = $excludes; + + // move cursor to next possible start node + $i++; + + } + + //################################################################// + // Scroll to next start node + + // We assume, at this point, that $i is the index of the token + // that is the first possible new start point for a node. + + // Test if the token indeed is a start tag, if not, move forward + // and test again. + $size = count($tokens); + while ($i < $size and !$tokens[$i] instanceof HTMLPurifier_Token_Start) { + if ($tokens[$i] instanceof HTMLPurifier_Token_End) { + // pop a token index off the stack if we ended a node + array_pop($stack); + // pop an exclusion lookup off exclusion stack if + // we ended node and that node had exclusions + if ($i == 0 || $i == $size - 1) { + // use specialized var if it's the super-parent + $s_excludes = $definition->info_parent_def->excludes; + } else { + $s_excludes = $definition->info[$tokens[$i]->name]->excludes; + } + if ($s_excludes) { + array_pop($exclude_stack); + } + } + $i++; + } + + } + + //####################################################################// + // Post-processing + + // remove implicit parent tokens at the beginning and end + array_shift($tokens); + array_pop($tokens); + + // remove context variables + $context->destroy('IsInline'); + $context->destroy('CurrentToken'); + + //####################################################################// + // Return + + return $tokens; + + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/Strategy/MakeWellFormed.php b/extlib/HTMLPurifier/HTMLPurifier/Strategy/MakeWellFormed.php new file mode 100644 index 0000000000..feb0c32b45 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/Strategy/MakeWellFormed.php @@ -0,0 +1,457 @@ +getHTMLDefinition(); + + // local variables + $generator = new HTMLPurifier_Generator($config, $context); + $escape_invalid_tags = $config->get('Core.EscapeInvalidTags'); + $e = $context->get('ErrorCollector', true); + $t = false; // token index + $i = false; // injector index + $token = false; // the current token + $reprocess = false; // whether or not to reprocess the same token + $stack = array(); + + // member variables + $this->stack =& $stack; + $this->t =& $t; + $this->tokens =& $tokens; + $this->config = $config; + $this->context = $context; + + // context variables + $context->register('CurrentNesting', $stack); + $context->register('InputIndex', $t); + $context->register('InputTokens', $tokens); + $context->register('CurrentToken', $token); + + // -- begin INJECTOR -- + + $this->injectors = array(); + + $injectors = $config->getBatch('AutoFormat'); + $def_injectors = $definition->info_injector; + $custom_injectors = $injectors['Custom']; + unset($injectors['Custom']); // special case + foreach ($injectors as $injector => $b) { + // XXX: Fix with a legitimate lookup table of enabled filters + if (strpos($injector, '.') !== false) continue; + $injector = "HTMLPurifier_Injector_$injector"; + if (!$b) continue; + $this->injectors[] = new $injector; + } + foreach ($def_injectors as $injector) { + // assumed to be objects + $this->injectors[] = $injector; + } + foreach ($custom_injectors as $injector) { + if (is_string($injector)) { + $injector = "HTMLPurifier_Injector_$injector"; + $injector = new $injector; + } + $this->injectors[] = $injector; + } + + // give the injectors references to the definition and context + // variables for performance reasons + foreach ($this->injectors as $ix => $injector) { + $error = $injector->prepare($config, $context); + if (!$error) continue; + array_splice($this->injectors, $ix, 1); // rm the injector + trigger_error("Cannot enable {$injector->name} injector because $error is not allowed", E_USER_WARNING); + } + + // -- end INJECTOR -- + + // a note on punting: + // In order to reduce code duplication, whenever some code needs + // to make HTML changes in order to make things "correct", the + // new HTML gets sent through the purifier, regardless of its + // status. This means that if we add a start token, because it + // was totally necessary, we don't have to update nesting; we just + // punt ($reprocess = true; continue;) and it does that for us. + + // isset is in loop because $tokens size changes during loop exec + for ( + $t = 0; + $t == 0 || isset($tokens[$t - 1]); + // only increment if we don't need to reprocess + $reprocess ? $reprocess = false : $t++ + ) { + + // check for a rewind + if (is_int($i) && $i >= 0) { + // possibility: disable rewinding if the current token has a + // rewind set on it already. This would offer protection from + // infinite loop, but might hinder some advanced rewinding. + $rewind_to = $this->injectors[$i]->getRewind(); + if (is_int($rewind_to) && $rewind_to < $t) { + if ($rewind_to < 0) $rewind_to = 0; + while ($t > $rewind_to) { + $t--; + $prev = $tokens[$t]; + // indicate that other injectors should not process this token, + // but we need to reprocess it + unset($prev->skip[$i]); + $prev->rewind = $i; + if ($prev instanceof HTMLPurifier_Token_Start) array_pop($this->stack); + elseif ($prev instanceof HTMLPurifier_Token_End) $this->stack[] = $prev->start; + } + } + $i = false; + } + + // handle case of document end + if (!isset($tokens[$t])) { + // kill processing if stack is empty + if (empty($this->stack)) break; + + // peek + $top_nesting = array_pop($this->stack); + $this->stack[] = $top_nesting; + + // send error + if ($e && !isset($top_nesting->armor['MakeWellFormed_TagClosedError'])) { + $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by document end', $top_nesting); + } + + // append, don't splice, since this is the end + $tokens[] = new HTMLPurifier_Token_End($top_nesting->name); + + // punt! + $reprocess = true; + continue; + } + + $token = $tokens[$t]; + + //echo '
    '; printTokens($tokens, $t); printTokens($this->stack); + + // quick-check: if it's not a tag, no need to process + if (empty($token->is_tag)) { + if ($token instanceof HTMLPurifier_Token_Text) { + foreach ($this->injectors as $i => $injector) { + if (isset($token->skip[$i])) continue; + if ($token->rewind !== null && $token->rewind !== $i) continue; + $injector->handleText($token); + $this->processToken($token, $i); + $reprocess = true; + break; + } + } + // another possibility is a comment + continue; + } + + if (isset($definition->info[$token->name])) { + $type = $definition->info[$token->name]->child->type; + } else { + $type = false; // Type is unknown, treat accordingly + } + + // quick tag checks: anything that's *not* an end tag + $ok = false; + if ($type === 'empty' && $token instanceof HTMLPurifier_Token_Start) { + // claims to be a start tag but is empty + $token = new HTMLPurifier_Token_Empty($token->name, $token->attr); + $ok = true; + } elseif ($type && $type !== 'empty' && $token instanceof HTMLPurifier_Token_Empty) { + // claims to be empty but really is a start tag + $this->swap(new HTMLPurifier_Token_End($token->name)); + $this->insertBefore(new HTMLPurifier_Token_Start($token->name, $token->attr)); + // punt (since we had to modify the input stream in a non-trivial way) + $reprocess = true; + continue; + } elseif ($token instanceof HTMLPurifier_Token_Empty) { + // real empty token + $ok = true; + } elseif ($token instanceof HTMLPurifier_Token_Start) { + // start tag + + // ...unless they also have to close their parent + if (!empty($this->stack)) { + + $parent = array_pop($this->stack); + $this->stack[] = $parent; + + if (isset($definition->info[$parent->name])) { + $elements = $definition->info[$parent->name]->child->getAllowedElements($config); + $autoclose = !isset($elements[$token->name]); + } else { + $autoclose = false; + } + + $carryover = false; + if ($autoclose && $definition->info[$parent->name]->formatting) { + $carryover = true; + } + + if ($autoclose) { + // errors need to be updated + $new_token = new HTMLPurifier_Token_End($parent->name); + $new_token->start = $parent; + if ($carryover) { + $element = clone $parent; + $element->armor['MakeWellFormed_TagClosedError'] = true; + $element->carryover = true; + $this->processToken(array($new_token, $token, $element)); + } else { + $this->insertBefore($new_token); + } + if ($e && !isset($parent->armor['MakeWellFormed_TagClosedError'])) { + if (!$carryover) { + $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag auto closed', $parent); + } else { + $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag carryover', $parent); + } + } + $reprocess = true; + continue; + } + + } + $ok = true; + } + + if ($ok) { + foreach ($this->injectors as $i => $injector) { + if (isset($token->skip[$i])) continue; + if ($token->rewind !== null && $token->rewind !== $i) continue; + $injector->handleElement($token); + $this->processToken($token, $i); + $reprocess = true; + break; + } + if (!$reprocess) { + // ah, nothing interesting happened; do normal processing + $this->swap($token); + if ($token instanceof HTMLPurifier_Token_Start) { + $this->stack[] = $token; + } elseif ($token instanceof HTMLPurifier_Token_End) { + throw new HTMLPurifier_Exception('Improper handling of end tag in start code; possible error in MakeWellFormed'); + } + } + continue; + } + + // sanity check: we should be dealing with a closing tag + if (!$token instanceof HTMLPurifier_Token_End) { + throw new HTMLPurifier_Exception('Unaccounted for tag token in input stream, bug in HTML Purifier'); + } + + // make sure that we have something open + if (empty($this->stack)) { + if ($escape_invalid_tags) { + if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag to text'); + $this->swap(new HTMLPurifier_Token_Text( + $generator->generateFromToken($token) + )); + } else { + $this->remove(); + if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Unnecessary end tag removed'); + } + $reprocess = true; + continue; + } + + // first, check for the simplest case: everything closes neatly. + // Eventually, everything passes through here; if there are problems + // we modify the input stream accordingly and then punt, so that + // the tokens get processed again. + $current_parent = array_pop($this->stack); + if ($current_parent->name == $token->name) { + $token->start = $current_parent; + foreach ($this->injectors as $i => $injector) { + if (isset($token->skip[$i])) continue; + if ($token->rewind !== null && $token->rewind !== $i) continue; + $injector->handleEnd($token); + $this->processToken($token, $i); + $this->stack[] = $current_parent; + $reprocess = true; + break; + } + continue; + } + + // okay, so we're trying to close the wrong tag + + // undo the pop previous pop + $this->stack[] = $current_parent; + + // scroll back the entire nest, trying to find our tag. + // (feature could be to specify how far you'd like to go) + $size = count($this->stack); + // -2 because -1 is the last element, but we already checked that + $skipped_tags = false; + for ($j = $size - 2; $j >= 0; $j--) { + if ($this->stack[$j]->name == $token->name) { + $skipped_tags = array_slice($this->stack, $j); + break; + } + } + + // we didn't find the tag, so remove + if ($skipped_tags === false) { + if ($escape_invalid_tags) { + $this->swap(new HTMLPurifier_Token_Text( + $generator->generateFromToken($token) + )); + if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag to text'); + } else { + $this->remove(); + if ($e) $e->send(E_WARNING, 'Strategy_MakeWellFormed: Stray end tag removed'); + } + $reprocess = true; + continue; + } + + // do errors, in REVERSE $j order: a,b,c with
    + $c = count($skipped_tags); + if ($e) { + for ($j = $c - 1; $j > 0; $j--) { + // notice we exclude $j == 0, i.e. the current ending tag, from + // the errors... + if (!isset($skipped_tags[$j]->armor['MakeWellFormed_TagClosedError'])) { + $e->send(E_NOTICE, 'Strategy_MakeWellFormed: Tag closed by element end', $skipped_tags[$j]); + } + } + } + + // insert tags, in FORWARD $j order: c,b,a with
    + $replace = array($token); + for ($j = 1; $j < $c; $j++) { + // ...as well as from the insertions + $new_token = new HTMLPurifier_Token_End($skipped_tags[$j]->name); + $new_token->start = $skipped_tags[$j]; + array_unshift($replace, $new_token); + if (isset($definition->info[$new_token->name]) && $definition->info[$new_token->name]->formatting) { + $element = clone $skipped_tags[$j]; + $element->carryover = true; + $element->armor['MakeWellFormed_TagClosedError'] = true; + $replace[] = $element; + } + } + $this->processToken($replace); + $reprocess = true; + continue; + } + + $context->destroy('CurrentNesting'); + $context->destroy('InputTokens'); + $context->destroy('InputIndex'); + $context->destroy('CurrentToken'); + + unset($this->injectors, $this->stack, $this->tokens, $this->t); + return $tokens; + } + + /** + * Processes arbitrary token values for complicated substitution patterns. + * In general: + * + * If $token is an array, it is a list of tokens to substitute for the + * current token. These tokens then get individually processed. If there + * is a leading integer in the list, that integer determines how many + * tokens from the stream should be removed. + * + * If $token is a regular token, it is swapped with the current token. + * + * If $token is false, the current token is deleted. + * + * If $token is an integer, that number of tokens (with the first token + * being the current one) will be deleted. + * + * @param $token Token substitution value + * @param $injector Injector that performed the substitution; default is if + * this is not an injector related operation. + */ + protected function processToken($token, $injector = -1) { + + // normalize forms of token + if (is_object($token)) $token = array(1, $token); + if (is_int($token)) $token = array($token); + if ($token === false) $token = array(1); + if (!is_array($token)) throw new HTMLPurifier_Exception('Invalid token type from injector'); + if (!is_int($token[0])) array_unshift($token, 1); + if ($token[0] === 0) throw new HTMLPurifier_Exception('Deleting zero tokens is not valid'); + + // $token is now an array with the following form: + // array(number nodes to delete, new node 1, new node 2, ...) + + $delete = array_shift($token); + $old = array_splice($this->tokens, $this->t, $delete, $token); + + if ($injector > -1) { + // determine appropriate skips + $oldskip = isset($old[0]) ? $old[0]->skip : array(); + foreach ($token as $object) { + $object->skip = $oldskip; + $object->skip[$injector] = true; + } + } + + } + + /** + * Inserts a token before the current token. Cursor now points to this token + */ + private function insertBefore($token) { + array_splice($this->tokens, $this->t, 0, array($token)); + } + + /** + * Removes current token. Cursor now points to new token occupying previously + * occupied space. + */ + private function remove() { + array_splice($this->tokens, $this->t, 1); + } + + /** + * Swap current token with new token. Cursor points to new token (no change). + */ + private function swap($token) { + $this->tokens[$this->t] = $token; + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/Strategy/RemoveForeignElements.php b/extlib/HTMLPurifier/HTMLPurifier/Strategy/RemoveForeignElements.php new file mode 100644 index 0000000000..cf3a33e406 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/Strategy/RemoveForeignElements.php @@ -0,0 +1,171 @@ +getHTMLDefinition(); + $generator = new HTMLPurifier_Generator($config, $context); + $result = array(); + + $escape_invalid_tags = $config->get('Core.EscapeInvalidTags'); + $remove_invalid_img = $config->get('Core.RemoveInvalidImg'); + + // currently only used to determine if comments should be kept + $trusted = $config->get('HTML.Trusted'); + + $remove_script_contents = $config->get('Core.RemoveScriptContents'); + $hidden_elements = $config->get('Core.HiddenElements'); + + // remove script contents compatibility + if ($remove_script_contents === true) { + $hidden_elements['script'] = true; + } elseif ($remove_script_contents === false && isset($hidden_elements['script'])) { + unset($hidden_elements['script']); + } + + $attr_validator = new HTMLPurifier_AttrValidator(); + + // removes tokens until it reaches a closing tag with its value + $remove_until = false; + + // converts comments into text tokens when this is equal to a tag name + $textify_comments = false; + + $token = false; + $context->register('CurrentToken', $token); + + $e = false; + if ($config->get('Core.CollectErrors')) { + $e =& $context->get('ErrorCollector'); + } + + foreach($tokens as $token) { + if ($remove_until) { + if (empty($token->is_tag) || $token->name !== $remove_until) { + continue; + } + } + if (!empty( $token->is_tag )) { + // DEFINITION CALL + + // before any processing, try to transform the element + if ( + isset($definition->info_tag_transform[$token->name]) + ) { + $original_name = $token->name; + // there is a transformation for this tag + // DEFINITION CALL + $token = $definition-> + info_tag_transform[$token->name]-> + transform($token, $config, $context); + if ($e) $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Tag transform', $original_name); + } + + if (isset($definition->info[$token->name])) { + + // mostly everything's good, but + // we need to make sure required attributes are in order + if ( + ($token instanceof HTMLPurifier_Token_Start || $token instanceof HTMLPurifier_Token_Empty) && + $definition->info[$token->name]->required_attr && + ($token->name != 'img' || $remove_invalid_img) // ensure config option still works + ) { + $attr_validator->validateToken($token, $config, $context); + $ok = true; + foreach ($definition->info[$token->name]->required_attr as $name) { + if (!isset($token->attr[$name])) { + $ok = false; + break; + } + } + if (!$ok) { + if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Missing required attribute', $name); + continue; + } + $token->armor['ValidateAttributes'] = true; + } + + if (isset($hidden_elements[$token->name]) && $token instanceof HTMLPurifier_Token_Start) { + $textify_comments = $token->name; + } elseif ($token->name === $textify_comments && $token instanceof HTMLPurifier_Token_End) { + $textify_comments = false; + } + + } elseif ($escape_invalid_tags) { + // invalid tag, generate HTML representation and insert in + if ($e) $e->send(E_WARNING, 'Strategy_RemoveForeignElements: Foreign element to text'); + $token = new HTMLPurifier_Token_Text( + $generator->generateFromToken($token) + ); + } else { + // check if we need to destroy all of the tag's children + // CAN BE GENERICIZED + if (isset($hidden_elements[$token->name])) { + if ($token instanceof HTMLPurifier_Token_Start) { + $remove_until = $token->name; + } elseif ($token instanceof HTMLPurifier_Token_Empty) { + // do nothing: we're still looking + } else { + $remove_until = false; + } + if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign meta element removed'); + } else { + if ($e) $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Foreign element removed'); + } + continue; + } + } elseif ($token instanceof HTMLPurifier_Token_Comment) { + // textify comments in script tags when they are allowed + if ($textify_comments !== false) { + $data = $token->data; + $token = new HTMLPurifier_Token_Text($data); + } elseif ($trusted) { + // keep, but perform comment cleaning + if ($e) { + // perform check whether or not there's a trailing hyphen + if (substr($token->data, -1) == '-') { + $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Trailing hyphen in comment removed'); + } + } + $token->data = rtrim($token->data, '-'); + $found_double_hyphen = false; + while (strpos($token->data, '--') !== false) { + if ($e && !$found_double_hyphen) { + $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Hyphens in comment collapsed'); + } + $found_double_hyphen = true; // prevent double-erroring + $token->data = str_replace('--', '-', $token->data); + } + } else { + // strip comments + if ($e) $e->send(E_NOTICE, 'Strategy_RemoveForeignElements: Comment removed'); + continue; + } + } elseif ($token instanceof HTMLPurifier_Token_Text) { + } else { + continue; + } + $result[] = $token; + } + if ($remove_until && $e) { + // we removed tokens until the end, throw error + $e->send(E_ERROR, 'Strategy_RemoveForeignElements: Token removed to end', $remove_until); + } + + $context->destroy('CurrentToken'); + + return $result; + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/Strategy/ValidateAttributes.php b/extlib/HTMLPurifier/HTMLPurifier/Strategy/ValidateAttributes.php new file mode 100644 index 0000000000..c3328a9d44 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/Strategy/ValidateAttributes.php @@ -0,0 +1,39 @@ +register('CurrentToken', $token); + + foreach ($tokens as $key => $token) { + + // only process tokens that have attributes, + // namely start and empty tags + if (!$token instanceof HTMLPurifier_Token_Start && !$token instanceof HTMLPurifier_Token_Empty) continue; + + // skip tokens that are armored + if (!empty($token->armor['ValidateAttributes'])) continue; + + // note that we have no facilities here for removing tokens + $validator->validateToken($token, $config, $context); + + $tokens[$key] = $token; // for PHP 4 + } + $context->destroy('CurrentToken'); + + return $tokens; + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/StringHash.php b/extlib/HTMLPurifier/HTMLPurifier/StringHash.php new file mode 100644 index 0000000000..62085c5c2f --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/StringHash.php @@ -0,0 +1,39 @@ +accessed[$index] = true; + return parent::offsetGet($index); + } + + /** + * Returns a lookup array of all array indexes that have been accessed. + * @return Array in form array($index => true). + */ + public function getAccessed() { + return $this->accessed; + } + + /** + * Resets the access array. + */ + public function resetAccessed() { + $this->accessed = array(); + } +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/StringHashParser.php b/extlib/HTMLPurifier/HTMLPurifier/StringHashParser.php new file mode 100644 index 0000000000..f3e70c712f --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/StringHashParser.php @@ -0,0 +1,110 @@ + 'DefaultKeyValue', + * 'KEY' => 'Value', + * 'KEY2' => 'Value2', + * 'MULTILINE-KEY' => "Multiline\nvalue.\n", + * ) + * + * We use this as an easy to use file-format for configuration schema + * files, but the class itself is usage agnostic. + * + * You can use ---- to forcibly terminate parsing of a single string-hash; + * this marker is used in multi string-hashes to delimit boundaries. + */ +class HTMLPurifier_StringHashParser +{ + + public $default = 'ID'; + + /** + * Parses a file that contains a single string-hash. + */ + public function parseFile($file) { + if (!file_exists($file)) return false; + $fh = fopen($file, 'r'); + if (!$fh) return false; + $ret = $this->parseHandle($fh); + fclose($fh); + return $ret; + } + + /** + * Parses a file that contains multiple string-hashes delimited by '----' + */ + public function parseMultiFile($file) { + if (!file_exists($file)) return false; + $ret = array(); + $fh = fopen($file, 'r'); + if (!$fh) return false; + while (!feof($fh)) { + $ret[] = $this->parseHandle($fh); + } + fclose($fh); + return $ret; + } + + /** + * Internal parser that acepts a file handle. + * @note While it's possible to simulate in-memory parsing by using + * custom stream wrappers, if such a use-case arises we should + * factor out the file handle into its own class. + * @param $fh File handle with pointer at start of valid string-hash + * block. + */ + protected function parseHandle($fh) { + $state = false; + $single = false; + $ret = array(); + do { + $line = fgets($fh); + if ($line === false) break; + $line = rtrim($line, "\n\r"); + if (!$state && $line === '') continue; + if ($line === '----') break; + if (strncmp('--#', $line, 3) === 0) { + // Comment + continue; + } elseif (strncmp('--', $line, 2) === 0) { + // Multiline declaration + $state = trim($line, '- '); + if (!isset($ret[$state])) $ret[$state] = ''; + continue; + } elseif (!$state) { + $single = true; + if (strpos($line, ':') !== false) { + // Single-line declaration + list($state, $line) = explode(':', $line, 2); + $line = trim($line); + } else { + // Use default declaration + $state = $this->default; + } + } + if ($single) { + $ret[$state] = $line; + $single = false; + $state = false; + } else { + $ret[$state] .= "$line\n"; + } + } while (!feof($fh)); + return $ret; + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/TagTransform.php b/extlib/HTMLPurifier/HTMLPurifier/TagTransform.php new file mode 100644 index 0000000000..210a447217 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/TagTransform.php @@ -0,0 +1,36 @@ + 'xx-small', + '1' => 'xx-small', + '2' => 'small', + '3' => 'medium', + '4' => 'large', + '5' => 'x-large', + '6' => 'xx-large', + '7' => '300%', + '-1' => 'smaller', + '-2' => '60%', + '+1' => 'larger', + '+2' => '150%', + '+3' => '200%', + '+4' => '300%' + ); + + public function transform($tag, $config, $context) { + + if ($tag instanceof HTMLPurifier_Token_End) { + $new_tag = clone $tag; + $new_tag->name = $this->transform_to; + return $new_tag; + } + + $attr = $tag->attr; + $prepend_style = ''; + + // handle color transform + if (isset($attr['color'])) { + $prepend_style .= 'color:' . $attr['color'] . ';'; + unset($attr['color']); + } + + // handle face transform + if (isset($attr['face'])) { + $prepend_style .= 'font-family:' . $attr['face'] . ';'; + unset($attr['face']); + } + + // handle size transform + if (isset($attr['size'])) { + // normalize large numbers + if ($attr['size']{0} == '+' || $attr['size']{0} == '-') { + $size = (int) $attr['size']; + if ($size < -2) $attr['size'] = '-2'; + if ($size > 4) $attr['size'] = '+4'; + } else { + $size = (int) $attr['size']; + if ($size > 7) $attr['size'] = '7'; + } + if (isset($this->_size_lookup[$attr['size']])) { + $prepend_style .= 'font-size:' . + $this->_size_lookup[$attr['size']] . ';'; + } + unset($attr['size']); + } + + if ($prepend_style) { + $attr['style'] = isset($attr['style']) ? + $prepend_style . $attr['style'] : + $prepend_style; + } + + $new_tag = clone $tag; + $new_tag->name = $this->transform_to; + $new_tag->attr = $attr; + + return $new_tag; + + } +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/TagTransform/Simple.php b/extlib/HTMLPurifier/HTMLPurifier/TagTransform/Simple.php new file mode 100644 index 0000000000..0e36130f25 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/TagTransform/Simple.php @@ -0,0 +1,35 @@ +transform_to = $transform_to; + $this->style = $style; + } + + public function transform($tag, $config, $context) { + $new_tag = clone $tag; + $new_tag->name = $this->transform_to; + if (!is_null($this->style) && + ($new_tag instanceof HTMLPurifier_Token_Start || $new_tag instanceof HTMLPurifier_Token_Empty) + ) { + $this->prependCSS($new_tag->attr, $this->style); + } + return $new_tag; + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/Token.php b/extlib/HTMLPurifier/HTMLPurifier/Token.php new file mode 100644 index 0000000000..7900e6cb10 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/Token.php @@ -0,0 +1,57 @@ +line = $l; + $this->col = $c; + } + + /** + * Convenience function for DirectLex settings line/col position. + */ + public function rawPosition($l, $c) { + if ($c === -1) $l++; + $this->line = $l; + $this->col = $c; + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/Token/Comment.php b/extlib/HTMLPurifier/HTMLPurifier/Token/Comment.php new file mode 100644 index 0000000000..dc6bdcabb8 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/Token/Comment.php @@ -0,0 +1,22 @@ +data = $data; + $this->line = $line; + $this->col = $col; + } +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/Token/Empty.php b/extlib/HTMLPurifier/HTMLPurifier/Token/Empty.php new file mode 100644 index 0000000000..2a82b47ad1 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/Token/Empty.php @@ -0,0 +1,11 @@ +!empty($obj->is_tag) + * without having to use a function call is_a(). + */ + public $is_tag = true; + + /** + * The lower-case name of the tag, like 'a', 'b' or 'blockquote'. + * + * @note Strictly speaking, XML tags are case sensitive, so we shouldn't + * be lower-casing them, but these tokens cater to HTML tags, which are + * insensitive. + */ + public $name; + + /** + * Associative array of the tag's attributes. + */ + public $attr = array(); + + /** + * Non-overloaded constructor, which lower-cases passed tag name. + * + * @param $name String name. + * @param $attr Associative array of attributes. + */ + public function __construct($name, $attr = array(), $line = null, $col = null) { + $this->name = ctype_lower($name) ? $name : strtolower($name); + foreach ($attr as $key => $value) { + // normalization only necessary when key is not lowercase + if (!ctype_lower($key)) { + $new_key = strtolower($key); + if (!isset($attr[$new_key])) { + $attr[$new_key] = $attr[$key]; + } + if ($new_key !== $key) { + unset($attr[$key]); + } + } + } + $this->attr = $attr; + $this->line = $line; + $this->col = $col; + } +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/Token/Text.php b/extlib/HTMLPurifier/HTMLPurifier/Token/Text.php new file mode 100644 index 0000000000..82efd823d6 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/Token/Text.php @@ -0,0 +1,33 @@ +data = $data; + $this->is_whitespace = ctype_space($data); + $this->line = $line; + $this->col = $col; + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/TokenFactory.php b/extlib/HTMLPurifier/HTMLPurifier/TokenFactory.php new file mode 100644 index 0000000000..7cf48fb41c --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/TokenFactory.php @@ -0,0 +1,94 @@ +p_start = new HTMLPurifier_Token_Start('', array()); + $this->p_end = new HTMLPurifier_Token_End(''); + $this->p_empty = new HTMLPurifier_Token_Empty('', array()); + $this->p_text = new HTMLPurifier_Token_Text(''); + $this->p_comment= new HTMLPurifier_Token_Comment(''); + } + + /** + * Creates a HTMLPurifier_Token_Start. + * @param $name Tag name + * @param $attr Associative array of attributes + * @return Generated HTMLPurifier_Token_Start + */ + public function createStart($name, $attr = array()) { + $p = clone $this->p_start; + $p->__construct($name, $attr); + return $p; + } + + /** + * Creates a HTMLPurifier_Token_End. + * @param $name Tag name + * @return Generated HTMLPurifier_Token_End + */ + public function createEnd($name) { + $p = clone $this->p_end; + $p->__construct($name); + return $p; + } + + /** + * Creates a HTMLPurifier_Token_Empty. + * @param $name Tag name + * @param $attr Associative array of attributes + * @return Generated HTMLPurifier_Token_Empty + */ + public function createEmpty($name, $attr = array()) { + $p = clone $this->p_empty; + $p->__construct($name, $attr); + return $p; + } + + /** + * Creates a HTMLPurifier_Token_Text. + * @param $data Data of text token + * @return Generated HTMLPurifier_Token_Text + */ + public function createText($data) { + $p = clone $this->p_text; + $p->__construct($data); + return $p; + } + + /** + * Creates a HTMLPurifier_Token_Comment. + * @param $data Data of comment token + * @return Generated HTMLPurifier_Token_Comment + */ + public function createComment($data) { + $p = clone $this->p_comment; + $p->__construct($data); + return $p; + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/URI.php b/extlib/HTMLPurifier/HTMLPurifier/URI.php new file mode 100644 index 0000000000..8b50d0d18d --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/URI.php @@ -0,0 +1,173 @@ +scheme = is_null($scheme) || ctype_lower($scheme) ? $scheme : strtolower($scheme); + $this->userinfo = $userinfo; + $this->host = $host; + $this->port = is_null($port) ? $port : (int) $port; + $this->path = $path; + $this->query = $query; + $this->fragment = $fragment; + } + + /** + * Retrieves a scheme object corresponding to the URI's scheme/default + * @param $config Instance of HTMLPurifier_Config + * @param $context Instance of HTMLPurifier_Context + * @return Scheme object appropriate for validating this URI + */ + public function getSchemeObj($config, $context) { + $registry = HTMLPurifier_URISchemeRegistry::instance(); + if ($this->scheme !== null) { + $scheme_obj = $registry->getScheme($this->scheme, $config, $context); + if (!$scheme_obj) return false; // invalid scheme, clean it out + } else { + // no scheme: retrieve the default one + $def = $config->getDefinition('URI'); + $scheme_obj = $registry->getScheme($def->defaultScheme, $config, $context); + if (!$scheme_obj) { + // something funky happened to the default scheme object + trigger_error( + 'Default scheme object "' . $def->defaultScheme . '" was not readable', + E_USER_WARNING + ); + return false; + } + } + return $scheme_obj; + } + + /** + * Generic validation method applicable for all schemes. May modify + * this URI in order to get it into a compliant form. + * @param $config Instance of HTMLPurifier_Config + * @param $context Instance of HTMLPurifier_Context + * @return True if validation/filtering succeeds, false if failure + */ + public function validate($config, $context) { + + // ABNF definitions from RFC 3986 + $chars_sub_delims = '!$&\'()*+,;='; + $chars_gen_delims = ':/?#[]@'; + $chars_pchar = $chars_sub_delims . ':@'; + + // validate scheme (MUST BE FIRST!) + if (!is_null($this->scheme) && is_null($this->host)) { + $def = $config->getDefinition('URI'); + if ($def->defaultScheme === $this->scheme) { + $this->scheme = null; + } + } + + // validate host + if (!is_null($this->host)) { + $host_def = new HTMLPurifier_AttrDef_URI_Host(); + $this->host = $host_def->validate($this->host, $config, $context); + if ($this->host === false) $this->host = null; + } + + // validate username + if (!is_null($this->userinfo)) { + $encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . ':'); + $this->userinfo = $encoder->encode($this->userinfo); + } + + // validate port + if (!is_null($this->port)) { + if ($this->port < 1 || $this->port > 65535) $this->port = null; + } + + // validate path + $path_parts = array(); + $segments_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/'); + if (!is_null($this->host)) { + // path-abempty (hier and relative) + $this->path = $segments_encoder->encode($this->path); + } elseif ($this->path !== '' && $this->path[0] === '/') { + // path-absolute (hier and relative) + if (strlen($this->path) >= 2 && $this->path[1] === '/') { + // This shouldn't ever happen! + $this->path = ''; + } else { + $this->path = $segments_encoder->encode($this->path); + } + } elseif (!is_null($this->scheme) && $this->path !== '') { + // path-rootless (hier) + // Short circuit evaluation means we don't need to check nz + $this->path = $segments_encoder->encode($this->path); + } elseif (is_null($this->scheme) && $this->path !== '') { + // path-noscheme (relative) + // (once again, not checking nz) + $segment_nc_encoder = new HTMLPurifier_PercentEncoder($chars_sub_delims . '@'); + $c = strpos($this->path, '/'); + if ($c !== false) { + $this->path = + $segment_nc_encoder->encode(substr($this->path, 0, $c)) . + $segments_encoder->encode(substr($this->path, $c)); + } else { + $this->path = $segment_nc_encoder->encode($this->path); + } + } else { + // path-empty (hier and relative) + $this->path = ''; // just to be safe + } + + // qf = query and fragment + $qf_encoder = new HTMLPurifier_PercentEncoder($chars_pchar . '/?'); + + if (!is_null($this->query)) { + $this->query = $qf_encoder->encode($this->query); + } + + if (!is_null($this->fragment)) { + $this->fragment = $qf_encoder->encode($this->fragment); + } + + return true; + + } + + /** + * Convert URI back to string + * @return String URI appropriate for output + */ + public function toString() { + // reconstruct authority + $authority = null; + if (!is_null($this->host)) { + $authority = ''; + if(!is_null($this->userinfo)) $authority .= $this->userinfo . '@'; + $authority .= $this->host; + if(!is_null($this->port)) $authority .= ':' . $this->port; + } + + // reconstruct the result + $result = ''; + if (!is_null($this->scheme)) $result .= $this->scheme . ':'; + if (!is_null($authority)) $result .= '//' . $authority; + $result .= $this->path; + if (!is_null($this->query)) $result .= '?' . $this->query; + if (!is_null($this->fragment)) $result .= '#' . $this->fragment; + + return $result; + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/URIDefinition.php b/extlib/HTMLPurifier/HTMLPurifier/URIDefinition.php new file mode 100644 index 0000000000..ea2b8fe245 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/URIDefinition.php @@ -0,0 +1,93 @@ +registerFilter(new HTMLPurifier_URIFilter_DisableExternal()); + $this->registerFilter(new HTMLPurifier_URIFilter_DisableExternalResources()); + $this->registerFilter(new HTMLPurifier_URIFilter_HostBlacklist()); + $this->registerFilter(new HTMLPurifier_URIFilter_MakeAbsolute()); + $this->registerFilter(new HTMLPurifier_URIFilter_Munge()); + } + + public function registerFilter($filter) { + $this->registeredFilters[$filter->name] = $filter; + } + + public function addFilter($filter, $config) { + $r = $filter->prepare($config); + if ($r === false) return; // null is ok, for backwards compat + if ($filter->post) { + $this->postFilters[$filter->name] = $filter; + } else { + $this->filters[$filter->name] = $filter; + } + } + + protected function doSetup($config) { + $this->setupMemberVariables($config); + $this->setupFilters($config); + } + + protected function setupFilters($config) { + foreach ($this->registeredFilters as $name => $filter) { + $conf = $config->get('URI.' . $name); + if ($conf !== false && $conf !== null) { + $this->addFilter($filter, $config); + } + } + unset($this->registeredFilters); + } + + protected function setupMemberVariables($config) { + $this->host = $config->get('URI.Host'); + $base_uri = $config->get('URI.Base'); + if (!is_null($base_uri)) { + $parser = new HTMLPurifier_URIParser(); + $this->base = $parser->parse($base_uri); + $this->defaultScheme = $this->base->scheme; + if (is_null($this->host)) $this->host = $this->base->host; + } + if (is_null($this->defaultScheme)) $this->defaultScheme = $config->get('URI.DefaultScheme'); + } + + public function filter(&$uri, $config, $context) { + foreach ($this->filters as $name => $f) { + $result = $f->filter($uri, $config, $context); + if (!$result) return false; + } + return true; + } + + public function postFilter(&$uri, $config, $context) { + foreach ($this->postFilters as $name => $f) { + $result = $f->filter($uri, $config, $context); + if (!$result) return false; + } + return true; + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/URIFilter.php b/extlib/HTMLPurifier/HTMLPurifier/URIFilter.php new file mode 100644 index 0000000000..c116f93dff --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/URIFilter.php @@ -0,0 +1,45 @@ +getDefinition('URI')->host; + if ($our_host !== null) $this->ourHostParts = array_reverse(explode('.', $our_host)); + } + public function filter(&$uri, $config, $context) { + if (is_null($uri->host)) return true; + if ($this->ourHostParts === false) return false; + $host_parts = array_reverse(explode('.', $uri->host)); + foreach ($this->ourHostParts as $i => $x) { + if (!isset($host_parts[$i])) return false; + if ($host_parts[$i] != $this->ourHostParts[$i]) return false; + } + return true; + } +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/URIFilter/DisableExternalResources.php b/extlib/HTMLPurifier/HTMLPurifier/URIFilter/DisableExternalResources.php new file mode 100644 index 0000000000..881abc43cf --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/URIFilter/DisableExternalResources.php @@ -0,0 +1,12 @@ +get('EmbeddedURI', true)) return true; + return parent::filter($uri, $config, $context); + } +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/URIFilter/HostBlacklist.php b/extlib/HTMLPurifier/HTMLPurifier/URIFilter/HostBlacklist.php new file mode 100644 index 0000000000..045aa0992c --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/URIFilter/HostBlacklist.php @@ -0,0 +1,21 @@ +blacklist = $config->get('URI.HostBlacklist'); + return true; + } + public function filter(&$uri, $config, $context) { + foreach($this->blacklist as $blacklisted_host_fragment) { + if (strpos($uri->host, $blacklisted_host_fragment) !== false) { + return false; + } + } + return true; + } +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/URIFilter/MakeAbsolute.php b/extlib/HTMLPurifier/HTMLPurifier/URIFilter/MakeAbsolute.php new file mode 100644 index 0000000000..f46ab2630d --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/URIFilter/MakeAbsolute.php @@ -0,0 +1,114 @@ +getDefinition('URI'); + $this->base = $def->base; + if (is_null($this->base)) { + trigger_error('URI.MakeAbsolute is being ignored due to lack of value for URI.Base configuration', E_USER_WARNING); + return false; + } + $this->base->fragment = null; // fragment is invalid for base URI + $stack = explode('/', $this->base->path); + array_pop($stack); // discard last segment + $stack = $this->_collapseStack($stack); // do pre-parsing + $this->basePathStack = $stack; + return true; + } + public function filter(&$uri, $config, $context) { + if (is_null($this->base)) return true; // abort early + if ( + $uri->path === '' && is_null($uri->scheme) && + is_null($uri->host) && is_null($uri->query) && is_null($uri->fragment) + ) { + // reference to current document + $uri = clone $this->base; + return true; + } + if (!is_null($uri->scheme)) { + // absolute URI already: don't change + if (!is_null($uri->host)) return true; + $scheme_obj = $uri->getSchemeObj($config, $context); + if (!$scheme_obj) { + // scheme not recognized + return false; + } + if (!$scheme_obj->hierarchical) { + // non-hierarchal URI with explicit scheme, don't change + return true; + } + // special case: had a scheme but always is hierarchical and had no authority + } + if (!is_null($uri->host)) { + // network path, don't bother + return true; + } + if ($uri->path === '') { + $uri->path = $this->base->path; + } elseif ($uri->path[0] !== '/') { + // relative path, needs more complicated processing + $stack = explode('/', $uri->path); + $new_stack = array_merge($this->basePathStack, $stack); + if ($new_stack[0] !== '' && !is_null($this->base->host)) { + array_unshift($new_stack, ''); + } + $new_stack = $this->_collapseStack($new_stack); + $uri->path = implode('/', $new_stack); + } else { + // absolute path, but still we should collapse + $uri->path = implode('/', $this->_collapseStack(explode('/', $uri->path))); + } + // re-combine + $uri->scheme = $this->base->scheme; + if (is_null($uri->userinfo)) $uri->userinfo = $this->base->userinfo; + if (is_null($uri->host)) $uri->host = $this->base->host; + if (is_null($uri->port)) $uri->port = $this->base->port; + return true; + } + + /** + * Resolve dots and double-dots in a path stack + */ + private function _collapseStack($stack) { + $result = array(); + $is_folder = false; + for ($i = 0; isset($stack[$i]); $i++) { + $is_folder = false; + // absorb an internally duplicated slash + if ($stack[$i] == '' && $i && isset($stack[$i+1])) continue; + if ($stack[$i] == '..') { + if (!empty($result)) { + $segment = array_pop($result); + if ($segment === '' && empty($result)) { + // error case: attempted to back out too far: + // restore the leading slash + $result[] = ''; + } elseif ($segment === '..') { + $result[] = '..'; // cannot remove .. with .. + } + } else { + // relative path, preserve the double-dots + $result[] = '..'; + } + $is_folder = true; + continue; + } + if ($stack[$i] == '.') { + // silently absorb + $is_folder = true; + continue; + } + $result[] = $stack[$i]; + } + if ($is_folder) $result[] = ''; + return $result; + } +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/URIFilter/Munge.php b/extlib/HTMLPurifier/HTMLPurifier/URIFilter/Munge.php new file mode 100644 index 0000000000..efa10a6458 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/URIFilter/Munge.php @@ -0,0 +1,58 @@ +target = $config->get('URI.' . $this->name); + $this->parser = new HTMLPurifier_URIParser(); + $this->doEmbed = $config->get('URI.MungeResources'); + $this->secretKey = $config->get('URI.MungeSecretKey'); + return true; + } + public function filter(&$uri, $config, $context) { + if ($context->get('EmbeddedURI', true) && !$this->doEmbed) return true; + + $scheme_obj = $uri->getSchemeObj($config, $context); + if (!$scheme_obj) return true; // ignore unknown schemes, maybe another postfilter did it + if (is_null($uri->host) || empty($scheme_obj->browsable)) { + return true; + } + // don't redirect if target host is our host + if ($uri->host === $config->getDefinition('URI')->host) { + return true; + } + + $this->makeReplace($uri, $config, $context); + $this->replace = array_map('rawurlencode', $this->replace); + + $new_uri = strtr($this->target, $this->replace); + $new_uri = $this->parser->parse($new_uri); + // don't redirect if the target host is the same as the + // starting host + if ($uri->host === $new_uri->host) return true; + $uri = $new_uri; // overwrite + return true; + } + + protected function makeReplace($uri, $config, $context) { + $string = $uri->toString(); + // always available + $this->replace['%s'] = $string; + $this->replace['%r'] = $context->get('EmbeddedURI', true); + $token = $context->get('CurrentToken', true); + $this->replace['%n'] = $token ? $token->name : null; + $this->replace['%m'] = $context->get('CurrentAttr', true); + $this->replace['%p'] = $context->get('CurrentCSSProperty', true); + // not always available + if ($this->secretKey) $this->replace['%t'] = sha1($this->secretKey . ':' . $string); + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/URIParser.php b/extlib/HTMLPurifier/HTMLPurifier/URIParser.php new file mode 100644 index 0000000000..7179e4ab89 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/URIParser.php @@ -0,0 +1,70 @@ +percentEncoder = new HTMLPurifier_PercentEncoder(); + } + + /** + * Parses a URI. + * @param $uri string URI to parse + * @return HTMLPurifier_URI representation of URI. This representation has + * not been validated yet and may not conform to RFC. + */ + public function parse($uri) { + + $uri = $this->percentEncoder->normalize($uri); + + // Regexp is as per Appendix B. + // Note that ["<>] are an addition to the RFC's recommended + // characters, because they represent external delimeters. + $r_URI = '!'. + '(([^:/?#"<>]+):)?'. // 2. Scheme + '(//([^/?#"<>]*))?'. // 4. Authority + '([^?#"<>]*)'. // 5. Path + '(\?([^#"<>]*))?'. // 7. Query + '(#([^"<>]*))?'. // 8. Fragment + '!'; + + $matches = array(); + $result = preg_match($r_URI, $uri, $matches); + + if (!$result) return false; // *really* invalid URI + + // seperate out parts + $scheme = !empty($matches[1]) ? $matches[2] : null; + $authority = !empty($matches[3]) ? $matches[4] : null; + $path = $matches[5]; // always present, can be empty + $query = !empty($matches[6]) ? $matches[7] : null; + $fragment = !empty($matches[8]) ? $matches[9] : null; + + // further parse authority + if ($authority !== null) { + $r_authority = "/^((.+?)@)?(\[[^\]]+\]|[^:]*)(:(\d*))?/"; + $matches = array(); + preg_match($r_authority, $authority, $matches); + $userinfo = !empty($matches[1]) ? $matches[2] : null; + $host = !empty($matches[3]) ? $matches[3] : ''; + $port = !empty($matches[4]) ? (int) $matches[5] : null; + } else { + $port = $host = $userinfo = null; + } + + return new HTMLPurifier_URI( + $scheme, $userinfo, $host, $port, $path, $query, $fragment); + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/URIScheme.php b/extlib/HTMLPurifier/HTMLPurifier/URIScheme.php new file mode 100644 index 0000000000..039710fd15 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/URIScheme.php @@ -0,0 +1,42 @@ +, resolves edge cases + * with making relative URIs absolute + */ + public $hierarchical = false; + + /** + * Validates the components of a URI + * @note This implementation should be called by children if they define + * a default port, as it does port processing. + * @param $uri Instance of HTMLPurifier_URI + * @param $config HTMLPurifier_Config object + * @param $context HTMLPurifier_Context object + * @return Bool success or failure + */ + public function validate(&$uri, $config, $context) { + if ($this->default_port == $uri->port) $uri->port = null; + return true; + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/URIScheme/ftp.php b/extlib/HTMLPurifier/HTMLPurifier/URIScheme/ftp.php new file mode 100644 index 0000000000..5849bf7ff0 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/URIScheme/ftp.php @@ -0,0 +1,43 @@ +query = null; + + // typecode check + $semicolon_pos = strrpos($uri->path, ';'); // reverse + if ($semicolon_pos !== false) { + $type = substr($uri->path, $semicolon_pos + 1); // no semicolon + $uri->path = substr($uri->path, 0, $semicolon_pos); + $type_ret = ''; + if (strpos($type, '=') !== false) { + // figure out whether or not the declaration is correct + list($key, $typecode) = explode('=', $type, 2); + if ($key !== 'type') { + // invalid key, tack it back on encoded + $uri->path .= '%3B' . $type; + } elseif ($typecode === 'a' || $typecode === 'i' || $typecode === 'd') { + $type_ret = ";type=$typecode"; + } + } else { + $uri->path .= '%3B' . $type; + } + $uri->path = str_replace(';', '%3B', $uri->path); + $uri->path .= $type_ret; + } + + return true; + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/URIScheme/http.php b/extlib/HTMLPurifier/HTMLPurifier/URIScheme/http.php new file mode 100644 index 0000000000..b097a31d6a --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/URIScheme/http.php @@ -0,0 +1,20 @@ +userinfo = null; + return true; + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/URIScheme/https.php b/extlib/HTMLPurifier/HTMLPurifier/URIScheme/https.php new file mode 100644 index 0000000000..29e380919f --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/URIScheme/https.php @@ -0,0 +1,12 @@ +userinfo = null; + $uri->host = null; + $uri->port = null; + // we need to validate path against RFC 2368's addr-spec + return true; + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/URIScheme/news.php b/extlib/HTMLPurifier/HTMLPurifier/URIScheme/news.php new file mode 100644 index 0000000000..f5f54f4f56 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/URIScheme/news.php @@ -0,0 +1,22 @@ +userinfo = null; + $uri->host = null; + $uri->port = null; + $uri->query = null; + // typecode check needed on path + return true; + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/URIScheme/nntp.php b/extlib/HTMLPurifier/HTMLPurifier/URIScheme/nntp.php new file mode 100644 index 0000000000..5bf93ea784 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/URIScheme/nntp.php @@ -0,0 +1,20 @@ +userinfo = null; + $uri->query = null; + return true; + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/URISchemeRegistry.php b/extlib/HTMLPurifier/HTMLPurifier/URISchemeRegistry.php new file mode 100644 index 0000000000..576bf7b6d1 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/URISchemeRegistry.php @@ -0,0 +1,68 @@ +get('URI.AllowedSchemes'); + if (!$config->get('URI.OverrideAllowedSchemes') && + !isset($allowed_schemes[$scheme]) + ) { + return; + } + + if (isset($this->schemes[$scheme])) return $this->schemes[$scheme]; + if (!isset($allowed_schemes[$scheme])) return; + + $class = 'HTMLPurifier_URIScheme_' . $scheme; + if (!class_exists($class)) return; + $this->schemes[$scheme] = new $class(); + return $this->schemes[$scheme]; + } + + /** + * Registers a custom scheme to the cache, bypassing reflection. + * @param $scheme Scheme name + * @param $scheme_obj HTMLPurifier_URIScheme object + */ + public function register($scheme, $scheme_obj) { + $this->schemes[$scheme] = $scheme_obj; + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/UnitConverter.php b/extlib/HTMLPurifier/HTMLPurifier/UnitConverter.php new file mode 100644 index 0000000000..545d426220 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/UnitConverter.php @@ -0,0 +1,254 @@ + array( + 'px' => 3, // This is as per CSS 2.1 and Firefox. Your mileage may vary + 'pt' => 4, + 'pc' => 48, + 'in' => 288, + self::METRIC => array('pt', '0.352777778', 'mm'), + ), + self::METRIC => array( + 'mm' => 1, + 'cm' => 10, + self::ENGLISH => array('mm', '2.83464567', 'pt'), + ), + ); + + /** + * Minimum bcmath precision for output. + */ + protected $outputPrecision; + + /** + * Bcmath precision for internal calculations. + */ + protected $internalPrecision; + + /** + * Whether or not BCMath is available + */ + private $bcmath; + + public function __construct($output_precision = 4, $internal_precision = 10, $force_no_bcmath = false) { + $this->outputPrecision = $output_precision; + $this->internalPrecision = $internal_precision; + $this->bcmath = !$force_no_bcmath && function_exists('bcmul'); + } + + /** + * Converts a length object of one unit into another unit. + * @param HTMLPurifier_Length $length + * Instance of HTMLPurifier_Length to convert. You must validate() + * it before passing it here! + * @param string $to_unit + * Unit to convert to. + * @note + * About precision: This conversion function pays very special + * attention to the incoming precision of values and attempts + * to maintain a number of significant figure. Results are + * fairly accurate up to nine digits. Some caveats: + * - If a number is zero-padded as a result of this significant + * figure tracking, the zeroes will be eliminated. + * - If a number contains less than four sigfigs ($outputPrecision) + * and this causes some decimals to be excluded, those + * decimals will be added on. + */ + public function convert($length, $to_unit) { + + if (!$length->isValid()) return false; + + $n = $length->getN(); + $unit = $length->getUnit(); + + if ($n === '0' || $unit === false) { + return new HTMLPurifier_Length('0', false); + } + + $state = $dest_state = false; + foreach (self::$units as $k => $x) { + if (isset($x[$unit])) $state = $k; + if (isset($x[$to_unit])) $dest_state = $k; + } + if (!$state || !$dest_state) return false; + + // Some calculations about the initial precision of the number; + // this will be useful when we need to do final rounding. + $sigfigs = $this->getSigFigs($n); + if ($sigfigs < $this->outputPrecision) $sigfigs = $this->outputPrecision; + + // BCMath's internal precision deals only with decimals. Use + // our default if the initial number has no decimals, or increase + // it by how ever many decimals, thus, the number of guard digits + // will always be greater than or equal to internalPrecision. + $log = (int) floor(log(abs($n), 10)); + $cp = ($log < 0) ? $this->internalPrecision - $log : $this->internalPrecision; // internal precision + + for ($i = 0; $i < 2; $i++) { + + // Determine what unit IN THIS SYSTEM we need to convert to + if ($dest_state === $state) { + // Simple conversion + $dest_unit = $to_unit; + } else { + // Convert to the smallest unit, pending a system shift + $dest_unit = self::$units[$state][$dest_state][0]; + } + + // Do the conversion if necessary + if ($dest_unit !== $unit) { + $factor = $this->div(self::$units[$state][$unit], self::$units[$state][$dest_unit], $cp); + $n = $this->mul($n, $factor, $cp); + $unit = $dest_unit; + } + + // Output was zero, so bail out early. Shouldn't ever happen. + if ($n === '') { + $n = '0'; + $unit = $to_unit; + break; + } + + // It was a simple conversion, so bail out + if ($dest_state === $state) { + break; + } + + if ($i !== 0) { + // Conversion failed! Apparently, the system we forwarded + // to didn't have this unit. This should never happen! + return false; + } + + // Pre-condition: $i == 0 + + // Perform conversion to next system of units + $n = $this->mul($n, self::$units[$state][$dest_state][1], $cp); + $unit = self::$units[$state][$dest_state][2]; + $state = $dest_state; + + // One more loop around to convert the unit in the new system. + + } + + // Post-condition: $unit == $to_unit + if ($unit !== $to_unit) return false; + + // Useful for debugging: + //echo "
    n";
    +        //echo "$n\nsigfigs = $sigfigs\nnew_log = $new_log\nlog = $log\nrp = $rp\n
    \n"; + + $n = $this->round($n, $sigfigs); + if (strpos($n, '.') !== false) $n = rtrim($n, '0'); + $n = rtrim($n, '.'); + + return new HTMLPurifier_Length($n, $unit); + } + + /** + * Returns the number of significant figures in a string number. + * @param string $n Decimal number + * @return int number of sigfigs + */ + public function getSigFigs($n) { + $n = ltrim($n, '0+-'); + $dp = strpos($n, '.'); // decimal position + if ($dp === false) { + $sigfigs = strlen(rtrim($n, '0')); + } else { + $sigfigs = strlen(ltrim($n, '0.')); // eliminate extra decimal character + if ($dp !== 0) $sigfigs--; + } + return $sigfigs; + } + + /** + * Adds two numbers, using arbitrary precision when available. + */ + private function add($s1, $s2, $scale) { + if ($this->bcmath) return bcadd($s1, $s2, $scale); + else return $this->scale($s1 + $s2, $scale); + } + + /** + * Multiples two numbers, using arbitrary precision when available. + */ + private function mul($s1, $s2, $scale) { + if ($this->bcmath) return bcmul($s1, $s2, $scale); + else return $this->scale($s1 * $s2, $scale); + } + + /** + * Divides two numbers, using arbitrary precision when available. + */ + private function div($s1, $s2, $scale) { + if ($this->bcmath) return bcdiv($s1, $s2, $scale); + else return $this->scale($s1 / $s2, $scale); + } + + /** + * Rounds a number according to the number of sigfigs it should have, + * using arbitrary precision when available. + */ + private function round($n, $sigfigs) { + $new_log = (int) floor(log(abs($n), 10)); // Number of digits left of decimal - 1 + $rp = $sigfigs - $new_log - 1; // Number of decimal places needed + $neg = $n < 0 ? '-' : ''; // Negative sign + if ($this->bcmath) { + if ($rp >= 0) { + $n = bcadd($n, $neg . '0.' . str_repeat('0', $rp) . '5', $rp + 1); + $n = bcdiv($n, '1', $rp); + } else { + // This algorithm partially depends on the standardized + // form of numbers that comes out of bcmath. + $n = bcadd($n, $neg . '5' . str_repeat('0', $new_log - $sigfigs), 0); + $n = substr($n, 0, $sigfigs + strlen($neg)) . str_repeat('0', $new_log - $sigfigs + 1); + } + return $n; + } else { + return $this->scale(round($n, $sigfigs - $new_log - 1), $rp + 1); + } + } + + /** + * Scales a float to $scale digits right of decimal point, like BCMath. + */ + private function scale($r, $scale) { + if ($scale < 0) { + // The f sprintf type doesn't support negative numbers, so we + // need to cludge things manually. First get the string. + $r = sprintf('%.0f', (float) $r); + // Due to floating point precision loss, $r will more than likely + // look something like 4652999999999.9234. We grab one more digit + // than we need to precise from $r and then use that to round + // appropriately. + $precise = (string) round(substr($r, 0, strlen($r) + $scale), -1); + // Now we return it, truncating the zero that was rounded off. + return substr($precise, 0, -1) . str_repeat('0', -$scale + 1); + } + return sprintf('%.' . $scale . 'f', (float) $r); + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/VarParser.php b/extlib/HTMLPurifier/HTMLPurifier/VarParser.php new file mode 100644 index 0000000000..68e72ae869 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/VarParser.php @@ -0,0 +1,154 @@ + self::STRING, + 'istring' => self::ISTRING, + 'text' => self::TEXT, + 'itext' => self::ITEXT, + 'int' => self::INT, + 'float' => self::FLOAT, + 'bool' => self::BOOL, + 'lookup' => self::LOOKUP, + 'list' => self::ALIST, + 'hash' => self::HASH, + 'mixed' => self::MIXED + ); + + /** + * Lookup table of types that are string, and can have aliases or + * allowed value lists. + */ + static public $stringTypes = array( + self::STRING => true, + self::ISTRING => true, + self::TEXT => true, + self::ITEXT => true, + ); + + /** + * Validate a variable according to type. Throws + * HTMLPurifier_VarParserException if invalid. + * It may return NULL as a valid type if $allow_null is true. + * + * @param $var Variable to validate + * @param $type Type of variable, see HTMLPurifier_VarParser->types + * @param $allow_null Whether or not to permit null as a value + * @return Validated and type-coerced variable + */ + final public function parse($var, $type, $allow_null = false) { + if (is_string($type)) { + if (!isset(HTMLPurifier_VarParser::$types[$type])) { + throw new HTMLPurifier_VarParserException("Invalid type '$type'"); + } else { + $type = HTMLPurifier_VarParser::$types[$type]; + } + } + $var = $this->parseImplementation($var, $type, $allow_null); + if ($allow_null && $var === null) return null; + // These are basic checks, to make sure nothing horribly wrong + // happened in our implementations. + switch ($type) { + case (self::STRING): + case (self::ISTRING): + case (self::TEXT): + case (self::ITEXT): + if (!is_string($var)) break; + if ($type == self::ISTRING || $type == self::ITEXT) $var = strtolower($var); + return $var; + case (self::INT): + if (!is_int($var)) break; + return $var; + case (self::FLOAT): + if (!is_float($var)) break; + return $var; + case (self::BOOL): + if (!is_bool($var)) break; + return $var; + case (self::LOOKUP): + case (self::ALIST): + case (self::HASH): + if (!is_array($var)) break; + if ($type === self::LOOKUP) { + foreach ($var as $k) if ($k !== true) $this->error('Lookup table contains value other than true'); + } elseif ($type === self::ALIST) { + $keys = array_keys($var); + if (array_keys($keys) !== $keys) $this->error('Indices for list are not uniform'); + } + return $var; + case (self::MIXED): + return $var; + default: + $this->errorInconsistent(get_class($this), $type); + } + $this->errorGeneric($var, $type); + } + + /** + * Actually implements the parsing. Base implementation is to not + * do anything to $var. Subclasses should overload this! + */ + protected function parseImplementation($var, $type, $allow_null) { + return $var; + } + + /** + * Throws an exception. + */ + protected function error($msg) { + throw new HTMLPurifier_VarParserException($msg); + } + + /** + * Throws an inconsistency exception. + * @note This should not ever be called. It would be called if we + * extend the allowed values of HTMLPurifier_VarParser without + * updating subclasses. + */ + protected function errorInconsistent($class, $type) { + throw new HTMLPurifier_Exception("Inconsistency in $class: ".HTMLPurifier_VarParser::getTypeName($type)." not implemented"); + } + + /** + * Generic error for if a type didn't work. + */ + protected function errorGeneric($var, $type) { + $vtype = gettype($var); + $this->error("Expected type ".HTMLPurifier_VarParser::getTypeName($type).", got $vtype"); + } + + static public function getTypeName($type) { + static $lookup; + if (!$lookup) { + // Lazy load the alternative lookup table + $lookup = array_flip(HTMLPurifier_VarParser::$types); + } + if (!isset($lookup[$type])) return 'unknown'; + return $lookup[$type]; + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/VarParser/Flexible.php b/extlib/HTMLPurifier/HTMLPurifier/VarParser/Flexible.php new file mode 100644 index 0000000000..c954250e9f --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/VarParser/Flexible.php @@ -0,0 +1,96 @@ + $j) $var[$i] = trim($j); + if ($type === self::HASH) { + // key:value,key2:value2 + $nvar = array(); + foreach ($var as $keypair) { + $c = explode(':', $keypair, 2); + if (!isset($c[1])) continue; + $nvar[$c[0]] = $c[1]; + } + $var = $nvar; + } + } + if (!is_array($var)) break; + $keys = array_keys($var); + if ($keys === array_keys($keys)) { + if ($type == self::ALIST) return $var; + elseif ($type == self::LOOKUP) { + $new = array(); + foreach ($var as $key) { + $new[$key] = true; + } + return $new; + } else break; + } + if ($type === self::LOOKUP) { + foreach ($var as $key => $value) { + $var[$key] = true; + } + } + return $var; + default: + $this->errorInconsistent(__CLASS__, $type); + } + $this->errorGeneric($var, $type); + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/VarParser/Native.php b/extlib/HTMLPurifier/HTMLPurifier/VarParser/Native.php new file mode 100644 index 0000000000..b02a6de54c --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/VarParser/Native.php @@ -0,0 +1,26 @@ +evalExpression($var); + } + + protected function evalExpression($expr) { + $var = null; + $result = eval("\$var = $expr;"); + if ($result === false) { + throw new HTMLPurifier_VarParserException("Fatal error in evaluated code"); + } + return $var; + } + +} + +// vim: et sw=4 sts=4 diff --git a/extlib/HTMLPurifier/HTMLPurifier/VarParserException.php b/extlib/HTMLPurifier/HTMLPurifier/VarParserException.php new file mode 100644 index 0000000000..5df3414959 --- /dev/null +++ b/extlib/HTMLPurifier/HTMLPurifier/VarParserException.php @@ -0,0 +1,11 @@ + + Copyright (C) + + This library is free software; you can redistribute it and/or + modify it under the terms of the GNU Lesser General Public + License as published by the Free Software Foundation; either + version 2.1 of the License, or (at your option) any later version. + + This library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public + License along with this library; if not, write to the Free Software + Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +Also add information on how to contact you by electronic and paper mail. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the library, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the + library `Frob' (a library for tweaking knobs) written by James Random Hacker. + + , 1 April 1990 + Ty Coon, President of Vice + +That's all there is to it! + + vim: et sw=4 sts=4 From ab4ec095e811f86c50fbc1cd71144b11d6b86d50 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Sat, 20 Feb 2010 11:38:05 -0500 Subject: [PATCH 28/62] adjust URI, URL, and location in Ostatus_profile::processPost --- plugins/OStatus/classes/Ostatus_profile.php | 27 ++++++++++++--------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/plugins/OStatus/classes/Ostatus_profile.php b/plugins/OStatus/classes/Ostatus_profile.php index 9f5c605612..4dd5652886 100644 --- a/plugins/OStatus/classes/Ostatus_profile.php +++ b/plugins/OStatus/classes/Ostatus_profile.php @@ -457,28 +457,33 @@ class Ostatus_profile extends Memcached_DataObject $oprofile = $this; } - if ($activity->object->link) { - $sourceUri = $activity->object->link; - } else if (preg_match('!^https?://!', $activity->object->id)) { - $sourceUri = $activity->object->id; - } else { - common_log(LOG_INFO, "OStatus: ignoring post with no source link: id $activity->object->id"); + $sourceUri = $activity->object->id; + + $dupe = Notice::staticGet('uri', $sourceUri); + + if ($dupe) { + common_log(LOG_INFO, "OStatus: ignoring duplicate post: $sourceUri"); return; } - $dupe = Notice::staticGet('uri', $sourceUri); - if ($dupe) { - common_log(LOG_INFO, "OStatus: ignoring duplicate post: $noticeLink"); - return; + $sourceUrl = null; + + if ($activity->object->link) { + $sourceUrl = $activity->object->link; + } else if (preg_match('!^https?://!', $activity->object->id)) { + $sourceUrl = $activity->object->id; } // @fixme sanitize and save HTML content if available + $content = $activity->object->title; $params = array('is_local' => Notice::REMOTE_OMB, + 'url' => $sourceUrl, 'uri' => $sourceUri); - $location = $this->getEntryLocation($activity->entry); + $location = $activity->context->location; + if ($location) { $params['lat'] = $location->lat; $params['lon'] = $location->lon; From 866b6470629b570b9f817553f85f6d8e801f0d43 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Sat, 20 Feb 2010 11:48:42 -0500 Subject: [PATCH 29/62] add hooks for OStatus notification on subscribe/unsubscribe --- plugins/OStatus/OStatusPlugin.php | 44 +++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/plugins/OStatus/OStatusPlugin.php b/plugins/OStatus/OStatusPlugin.php index e1f3fd9d37..9e6d03177f 100644 --- a/plugins/OStatus/OStatusPlugin.php +++ b/plugins/OStatus/OStatusPlugin.php @@ -335,4 +335,48 @@ class OStatusPlugin extends Plugin common_log(LOG_DEBUG, "No ostatus profile for incoming feed $feedsub->uri"); } } + + function onEndSubscribe($subscriber, $other) + { + $user = User::staticGet('id', $subscriber->id); + + if (empty($user)) { + return true; + } + + $oprofile = Ostatus_profile::staticGet('profile_id', $other->id); + + if (empty($oprofile)) { + return true; + } + + // We have a local user subscribing to a remote profile; make the + // magic happen! + + $oprofile->notify($subscriber, ActivityVerb::FOLLOW); + + return true; + } + + function onEndUnsubscribe($subscriber, $other) + { + $user = User::staticGet('id', $subscriber->id); + + if (empty($user)) { + return true; + } + + $oprofile = Ostatus_profile::staticGet('profile_id', $other->id); + + if (empty($oprofile)) { + return true; + } + + // We have a local user subscribing to a remote profile; make the + // magic happen! + + $oprofile->notify($subscriber, ActivityVerb::UNFOLLOW); + + return true; + } } From 36d21fa7162ca94ce100433da53439a67e815ba1 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Sat, 20 Feb 2010 12:03:32 -0500 Subject: [PATCH 30/62] Add events for favor and disfavor Added events to core code for when someone favors or disfavors a notice. --- EVENTS.txt | 19 +++++++++++++++++++ classes/Fave.php | 44 +++++++++++++++++++++++++++++++++++++------- 2 files changed, 56 insertions(+), 7 deletions(-) diff --git a/EVENTS.txt b/EVENTS.txt index 90242fa133..c108606ce2 100644 --- a/EVENTS.txt +++ b/EVENTS.txt @@ -729,3 +729,22 @@ StartGetProfileUri: When determining the canonical URI for a given profile EndGetProfileUri: After determining the canonical URI for a given profile - $profile: the current profile - &$uri: the URI + +StartFavorNotice: Saving a notice as a favorite +- $profile: profile of the person faving (can be remote!) +- $notice: notice being faved +- &$fave: Favor object; null to start off with, but feel free to override. + +EndFavorNotice: After saving a notice as a favorite +- $profile: profile of the person faving (can be remote!) +- $notice: notice being faved + +StartDisfavorNotice: Saving a notice as a favorite +- $profile: profile of the person faving (can be remote!) +- $notice: notice being faved +- &$result: result of the disfavoring (if you override) + +EndDisfavorNotice: After saving a notice as a favorite +- $profile: profile of the person faving (can be remote!) +- $notice: notice being faved + diff --git a/classes/Fave.php b/classes/Fave.php index 8113c8e166..0b6eec2bc4 100644 --- a/classes/Fave.php +++ b/classes/Fave.php @@ -21,17 +21,47 @@ class Fave extends Memcached_DataObject /* the code above is auto generated do not remove the tag below */ ###END_AUTOCODE - static function addNew($user, $notice) { - $fave = new Fave(); - $fave->user_id = $user->id; - $fave->notice_id = $notice->id; - if (!$fave->insert()) { - common_log_db_error($fave, 'INSERT', __FILE__); - return false; + static function addNew($profile, $notice) { + + $fave = null; + + if (Event::handle('StartFavorNotice', array($profile, $notice, &$fave))) { + + $fave = new Fave(); + + $fave->user_id = $profile->id; + $fave->notice_id = $notice->id; + + if (!$fave->insert()) { + common_log_db_error($fave, 'INSERT', __FILE__); + return false; + } + + Event::handle('EndFavorNotice', array($profile, $notice)); } + return $fave; } + function delete() + { + $profile = Profile::staticGet('id', $this->user_id); + $notice = Notice::staticGet('id', $this->notice_id); + + $result = null; + + if (Event::handle('StartDisfavorNotice', array($profile, $notice, &$result))) { + + $result = parent::delete(); + + if ($result) { + Event::handle('EndDisfavorNotice', array($profile, $notice)); + } + } + + return $result; + } + function pkeyGet($kv) { return Memcached_DataObject::pkeyGet('Fave', $kv); From 97b01432e7903797e39205e46a51394feac54292 Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Sat, 20 Feb 2010 10:06:28 -0800 Subject: [PATCH 31/62] drop no-longer-used XML_Feed_Parser extlib package from OStatus plugin --- plugins/OStatus/extlib/README | 9 - plugins/OStatus/extlib/XML/Feed/Parser.php | 351 ------------- .../OStatus/extlib/XML/Feed/Parser/Atom.php | 365 -------------- .../extlib/XML/Feed/Parser/AtomElement.php | 261 ---------- .../extlib/XML/Feed/Parser/Exception.php | 42 -- .../OStatus/extlib/XML/Feed/Parser/RSS09.php | 214 -------- .../extlib/XML/Feed/Parser/RSS09Element.php | 62 --- .../OStatus/extlib/XML/Feed/Parser/RSS1.php | 277 ----------- .../OStatus/extlib/XML/Feed/Parser/RSS11.php | 276 ----------- .../extlib/XML/Feed/Parser/RSS11Element.php | 151 ------ .../extlib/XML/Feed/Parser/RSS1Element.php | 116 ----- .../OStatus/extlib/XML/Feed/Parser/RSS2.php | 335 ------------- .../extlib/XML/Feed/Parser/RSS2Element.php | 171 ------- .../OStatus/extlib/XML/Feed/Parser/Type.php | 467 ------------------ .../XML/Feed/samples/atom10-entryonly.xml | 28 -- .../XML/Feed/samples/atom10-example1.xml | 20 - .../XML/Feed/samples/atom10-example2.xml | 45 -- .../extlib/XML/Feed/samples/delicious.feed | 177 ------- .../extlib/XML/Feed/samples/flickr.feed | 184 ------- .../extlib/XML/Feed/samples/grwifi-atom.xml | 7 - .../OStatus/extlib/XML/Feed/samples/hoder.xml | 102 ---- .../XML/Feed/samples/illformed_atom10.xml | 13 - .../XML/Feed/samples/rss091-complete.xml | 47 -- .../XML/Feed/samples/rss091-international.xml | 30 -- .../extlib/XML/Feed/samples/rss091-simple.xml | 15 - .../extlib/XML/Feed/samples/rss092-sample.xml | 103 ---- .../XML/Feed/samples/rss10-example1.xml | 62 --- .../XML/Feed/samples/rss10-example2.xml | 67 --- .../extlib/XML/Feed/samples/rss2sample.xml | 42 -- .../extlib/XML/Feed/samples/sixapart-jp.xml | 226 --------- .../extlib/XML/Feed/samples/technorati.feed | 54 -- .../OStatus/extlib/XML/Feed/schemas/atom.rnc | 338 ------------- .../OStatus/extlib/XML/Feed/schemas/rss10.rnc | 113 ----- .../OStatus/extlib/XML/Feed/schemas/rss11.rnc | 218 -------- .../extlib/xml-feed-parser-bug-16416.patch | 14 - plugins/OStatus/tests/FeedMungerTest.php | 147 ------ 36 files changed, 5149 deletions(-) delete mode 100644 plugins/OStatus/extlib/README delete mode 100755 plugins/OStatus/extlib/XML/Feed/Parser.php delete mode 100644 plugins/OStatus/extlib/XML/Feed/Parser/Atom.php delete mode 100755 plugins/OStatus/extlib/XML/Feed/Parser/AtomElement.php delete mode 100755 plugins/OStatus/extlib/XML/Feed/Parser/Exception.php delete mode 100755 plugins/OStatus/extlib/XML/Feed/Parser/RSS09.php delete mode 100755 plugins/OStatus/extlib/XML/Feed/Parser/RSS09Element.php delete mode 100755 plugins/OStatus/extlib/XML/Feed/Parser/RSS1.php delete mode 100755 plugins/OStatus/extlib/XML/Feed/Parser/RSS11.php delete mode 100755 plugins/OStatus/extlib/XML/Feed/Parser/RSS11Element.php delete mode 100755 plugins/OStatus/extlib/XML/Feed/Parser/RSS1Element.php delete mode 100644 plugins/OStatus/extlib/XML/Feed/Parser/RSS2.php delete mode 100755 plugins/OStatus/extlib/XML/Feed/Parser/RSS2Element.php delete mode 100644 plugins/OStatus/extlib/XML/Feed/Parser/Type.php delete mode 100755 plugins/OStatus/extlib/XML/Feed/samples/atom10-entryonly.xml delete mode 100755 plugins/OStatus/extlib/XML/Feed/samples/atom10-example1.xml delete mode 100755 plugins/OStatus/extlib/XML/Feed/samples/atom10-example2.xml delete mode 100755 plugins/OStatus/extlib/XML/Feed/samples/delicious.feed delete mode 100755 plugins/OStatus/extlib/XML/Feed/samples/flickr.feed delete mode 100755 plugins/OStatus/extlib/XML/Feed/samples/grwifi-atom.xml delete mode 100755 plugins/OStatus/extlib/XML/Feed/samples/hoder.xml delete mode 100755 plugins/OStatus/extlib/XML/Feed/samples/illformed_atom10.xml delete mode 100755 plugins/OStatus/extlib/XML/Feed/samples/rss091-complete.xml delete mode 100755 plugins/OStatus/extlib/XML/Feed/samples/rss091-international.xml delete mode 100755 plugins/OStatus/extlib/XML/Feed/samples/rss091-simple.xml delete mode 100755 plugins/OStatus/extlib/XML/Feed/samples/rss092-sample.xml delete mode 100755 plugins/OStatus/extlib/XML/Feed/samples/rss10-example1.xml delete mode 100755 plugins/OStatus/extlib/XML/Feed/samples/rss10-example2.xml delete mode 100755 plugins/OStatus/extlib/XML/Feed/samples/rss2sample.xml delete mode 100755 plugins/OStatus/extlib/XML/Feed/samples/sixapart-jp.xml delete mode 100755 plugins/OStatus/extlib/XML/Feed/samples/technorati.feed delete mode 100755 plugins/OStatus/extlib/XML/Feed/schemas/atom.rnc delete mode 100755 plugins/OStatus/extlib/XML/Feed/schemas/rss10.rnc delete mode 100755 plugins/OStatus/extlib/XML/Feed/schemas/rss11.rnc delete mode 100644 plugins/OStatus/extlib/xml-feed-parser-bug-16416.patch delete mode 100644 plugins/OStatus/tests/FeedMungerTest.php diff --git a/plugins/OStatus/extlib/README b/plugins/OStatus/extlib/README deleted file mode 100644 index 799b40c478..0000000000 --- a/plugins/OStatus/extlib/README +++ /dev/null @@ -1,9 +0,0 @@ -XML_Feed_Parser 1.0.3 is not currently actively maintained, and has -a nasty bug which breaks getting the feed target link from WordPress -feeds and possibly others that are RSS2-formatted but include an - self-link element as well. - -Patch from this bug report is included: -http://pear.php.net/bugs/bug.php?id=16416 - -If upgrading, be sure that fix is included with the future upgrade! diff --git a/plugins/OStatus/extlib/XML/Feed/Parser.php b/plugins/OStatus/extlib/XML/Feed/Parser.php deleted file mode 100755 index ffe8220a52..0000000000 --- a/plugins/OStatus/extlib/XML/Feed/Parser.php +++ /dev/null @@ -1,351 +0,0 @@ - - * @copyright 2005 James Stewart - * @license http://www.gnu.org/copyleft/lesser.html GNU LGPL - * @version CVS: $Id: Parser.php,v 1.24 2006/08/15 13:04:00 jystewart Exp $ - * @link http://pear.php.net/package/XML_Feed_Parser/ - */ - -/** - * XML_Feed_Parser_Type is an abstract class required by all of our - * feed types. It makes sense to load it here to keep the other files - * clean. - */ -require_once 'XML/Feed/Parser/Type.php'; - -/** - * We will throw exceptions when errors occur. - */ -require_once 'XML/Feed/Parser/Exception.php'; - -/** - * This is the core of the XML_Feed_Parser package. It identifies feed types - * and abstracts access to them. It is an iterator, allowing for easy access - * to the entire feed. - * - * @author James Stewart - * @version Release: 1.0.3 - * @package XML_Feed_Parser - */ -class XML_Feed_Parser implements Iterator -{ - /** - * This is where we hold the feed object - * @var Object - */ - private $feed; - - /** - * To allow for extensions, we make a public reference to the feed model - * @var DOMDocument - */ - public $model; - - /** - * A map between entry ID and offset - * @var array - */ - protected $idMappings = array(); - - /** - * A storage space for Namespace URIs. - * @var array - */ - private $feedNamespaces = array( - 'rss2' => array( - 'http://backend.userland.com/rss', - 'http://backend.userland.com/rss2', - 'http://blogs.law.harvard.edu/tech/rss')); - /** - * Detects feed types and instantiate appropriate objects. - * - * Our constructor takes care of detecting feed types and instantiating - * appropriate classes. For now we're going to treat Atom 0.3 as Atom 1.0 - * but raise a warning. I do not intend to introduce full support for - * Atom 0.3 as it has been deprecated, but others are welcome to. - * - * @param string $feed XML serialization of the feed - * @param bool $strict Whether or not to validate the feed - * @param bool $suppressWarnings Trigger errors for deprecated feed types? - * @param bool $tidy Whether or not to try and use the tidy library on input - */ - function __construct($feed, $strict = false, $suppressWarnings = false, $tidy = false) - { - $this->model = new DOMDocument; - if (! $this->model->loadXML($feed)) { - if (extension_loaded('tidy') && $tidy) { - $tidy = new tidy; - $tidy->parseString($feed, - array('input-xml' => true, 'output-xml' => true)); - $tidy->cleanRepair(); - if (! $this->model->loadXML((string) $tidy)) { - throw new XML_Feed_Parser_Exception('Invalid input: this is not ' . - 'valid XML'); - } - } else { - throw new XML_Feed_Parser_Exception('Invalid input: this is not valid XML'); - } - - } - - /* detect feed type */ - $doc_element = $this->model->documentElement; - $error = false; - - switch (true) { - case ($doc_element->namespaceURI == 'http://www.w3.org/2005/Atom'): - require_once 'XML/Feed/Parser/Atom.php'; - require_once 'XML/Feed/Parser/AtomElement.php'; - $class = 'XML_Feed_Parser_Atom'; - break; - case ($doc_element->namespaceURI == 'http://purl.org/atom/ns#'): - require_once 'XML/Feed/Parser/Atom.php'; - require_once 'XML/Feed/Parser/AtomElement.php'; - $class = 'XML_Feed_Parser_Atom'; - $error = 'Atom 0.3 deprecated, using 1.0 parser which won\'t provide ' . - 'all options'; - break; - case ($doc_element->namespaceURI == 'http://purl.org/rss/1.0/' || - ($doc_element->hasChildNodes() && $doc_element->childNodes->length > 1 - && $doc_element->childNodes->item(1)->namespaceURI == - 'http://purl.org/rss/1.0/')): - require_once 'XML/Feed/Parser/RSS1.php'; - require_once 'XML/Feed/Parser/RSS1Element.php'; - $class = 'XML_Feed_Parser_RSS1'; - break; - case ($doc_element->namespaceURI == 'http://purl.org/rss/1.1/' || - ($doc_element->hasChildNodes() && $doc_element->childNodes->length > 1 - && $doc_element->childNodes->item(1)->namespaceURI == - 'http://purl.org/rss/1.1/')): - require_once 'XML/Feed/Parser/RSS11.php'; - require_once 'XML/Feed/Parser/RSS11Element.php'; - $class = 'XML_Feed_Parser_RSS11'; - break; - case (($doc_element->hasChildNodes() && $doc_element->childNodes->length > 1 - && $doc_element->childNodes->item(1)->namespaceURI == - 'http://my.netscape.com/rdf/simple/0.9/') || - $doc_element->namespaceURI == 'http://my.netscape.com/rdf/simple/0.9/'): - require_once 'XML/Feed/Parser/RSS09.php'; - require_once 'XML/Feed/Parser/RSS09Element.php'; - $class = 'XML_Feed_Parser_RSS09'; - break; - case ($doc_element->tagName == 'rss' and - $doc_element->hasAttribute('version') && - $doc_element->getAttribute('version') == 0.91): - $error = 'RSS 0.91 has been superceded by RSS2.0. Using RSS2.0 parser.'; - require_once 'XML/Feed/Parser/RSS2.php'; - require_once 'XML/Feed/Parser/RSS2Element.php'; - $class = 'XML_Feed_Parser_RSS2'; - break; - case ($doc_element->tagName == 'rss' and - $doc_element->hasAttribute('version') && - $doc_element->getAttribute('version') == 0.92): - $error = 'RSS 0.92 has been superceded by RSS2.0. Using RSS2.0 parser.'; - require_once 'XML/Feed/Parser/RSS2.php'; - require_once 'XML/Feed/Parser/RSS2Element.php'; - $class = 'XML_Feed_Parser_RSS2'; - break; - case (in_array($doc_element->namespaceURI, $this->feedNamespaces['rss2']) - || $doc_element->tagName == 'rss'): - if (! $doc_element->hasAttribute('version') || - $doc_element->getAttribute('version') != 2) { - $error = 'RSS version not specified. Parsing as RSS2.0'; - } - require_once 'XML/Feed/Parser/RSS2.php'; - require_once 'XML/Feed/Parser/RSS2Element.php'; - $class = 'XML_Feed_Parser_RSS2'; - break; - default: - throw new XML_Feed_Parser_Exception('Feed type unknown'); - break; - } - - if (! $suppressWarnings && ! empty($error)) { - trigger_error($error, E_USER_WARNING); - } - - /* Instantiate feed object */ - $this->feed = new $class($this->model, $strict); - } - - /** - * Proxy to allow feed element names to be used as method names - * - * For top-level feed elements we will provide access using methods or - * attributes. This function simply passes on a request to the appropriate - * feed type object. - * - * @param string $call - the method being called - * @param array $attributes - */ - function __call($call, $attributes) - { - $attributes = array_pad($attributes, 5, false); - list($a, $b, $c, $d, $e) = $attributes; - return $this->feed->$call($a, $b, $c, $d, $e); - } - - /** - * Proxy to allow feed element names to be used as attribute names - * - * To allow variable-like access to feed-level data we use this - * method. It simply passes along to __call() which in turn passes - * along to the relevant object. - * - * @param string $val - the name of the variable required - */ - function __get($val) - { - return $this->feed->$val; - } - - /** - * Provides iteration functionality. - * - * Of course we must be able to iterate... This function simply increases - * our internal counter. - */ - function next() - { - if (isset($this->current_item) && - $this->current_item <= $this->feed->numberEntries - 1) { - ++$this->current_item; - } else if (! isset($this->current_item)) { - $this->current_item = 0; - } else { - return false; - } - } - - /** - * Return XML_Feed_Type object for current element - * - * @return XML_Feed_Parser_Type Object - */ - function current() - { - return $this->getEntryByOffset($this->current_item); - } - - /** - * For iteration -- returns the key for the current stage in the array. - * - * @return int - */ - function key() - { - return $this->current_item; - } - - /** - * For iteration -- tells whether we have reached the - * end. - * - * @return bool - */ - function valid() - { - return $this->current_item < $this->feed->numberEntries; - } - - /** - * For iteration -- resets the internal counter to the beginning. - */ - function rewind() - { - $this->current_item = 0; - } - - /** - * Provides access to entries by ID if one is specified in the source feed. - * - * As well as allowing the items to be iterated over we want to allow - * users to be able to access a specific entry. This is one of two ways of - * doing that, the other being by offset. This method can be quite slow - * if dealing with a large feed that hasn't yet been processed as it - * instantiates objects for every entry until it finds the one needed. - * - * @param string $id Valid ID for the given feed format - * @return XML_Feed_Parser_Type|false - */ - function getEntryById($id) - { - if (isset($this->idMappings[$id])) { - return $this->getEntryByOffset($this->idMappings[$id]); - } - - /* - * Since we have not yet encountered that ID, let's go through all the - * remaining entries in order till we find it. - * This is a fairly slow implementation, but it should work. - */ - return $this->feed->getEntryById($id); - } - - /** - * Retrieve entry by numeric offset, starting from zero. - * - * As well as allowing the items to be iterated over we want to allow - * users to be able to access a specific entry. This is one of two ways of - * doing that, the other being by ID. - * - * @param int $offset The position of the entry within the feed, starting from 0 - * @return XML_Feed_Parser_Type|false - */ - function getEntryByOffset($offset) - { - if ($offset < $this->feed->numberEntries) { - if (isset($this->feed->entries[$offset])) { - return $this->feed->entries[$offset]; - } else { - try { - $this->feed->getEntryByOffset($offset); - } catch (Exception $e) { - return false; - } - $id = $this->feed->entries[$offset]->getID(); - $this->idMappings[$id] = $offset; - return $this->feed->entries[$offset]; - } - } else { - return false; - } - } - - /** - * Retrieve version details from feed type class. - * - * @return void - * @author James Stewart - */ - function version() - { - return $this->feed->version; - } - - /** - * Returns a string representation of the feed. - * - * @return String - **/ - function __toString() - { - return $this->feed->__toString(); - } -} -?> \ No newline at end of file diff --git a/plugins/OStatus/extlib/XML/Feed/Parser/Atom.php b/plugins/OStatus/extlib/XML/Feed/Parser/Atom.php deleted file mode 100644 index c7e218a1e6..0000000000 --- a/plugins/OStatus/extlib/XML/Feed/Parser/Atom.php +++ /dev/null @@ -1,365 +0,0 @@ - - * @copyright 2005 James Stewart - * @license http://www.gnu.org/copyleft/lesser.html GNU LGPL 2.1 - * @version CVS: $Id: Atom.php,v 1.29 2008/03/30 22:00:36 jystewart Exp $ - * @link http://pear.php.net/package/XML_Feed_Parser/ -*/ - -/** - * This is the class that determines how we manage Atom 1.0 feeds - * - * How we deal with constructs: - * date - return as unix datetime for use with the 'date' function unless specified otherwise - * text - return as is. optional parameter will give access to attributes - * person - defaults to name, but parameter based access - * - * @author James Stewart - * @version Release: 1.0.3 - * @package XML_Feed_Parser - */ -class XML_Feed_Parser_Atom extends XML_Feed_Parser_Type -{ - /** - * The URI of the RelaxNG schema used to (optionally) validate the feed - * @var string - */ - private $relax = 'atom.rnc'; - - /** - * We're likely to use XPath, so let's keep it global - * @var DOMXPath - */ - public $xpath; - - /** - * When performing XPath queries we will use this prefix - * @var string - */ - private $xpathPrefix = '//'; - - /** - * The feed type we are parsing - * @var string - */ - public $version = 'Atom 1.0'; - - /** - * The class used to represent individual items - * @var string - */ - protected $itemClass = 'XML_Feed_Parser_AtomElement'; - - /** - * The element containing entries - * @var string - */ - protected $itemElement = 'entry'; - - /** - * Here we map those elements we're not going to handle individually - * to the constructs they are. The optional second parameter in the array - * tells the parser whether to 'fall back' (not apt. at the feed level) or - * fail if the element is missing. If the parameter is not set, the function - * will simply return false and leave it to the client to decide what to do. - * @var array - */ - protected $map = array( - 'author' => array('Person'), - 'contributor' => array('Person'), - 'icon' => array('Text'), - 'logo' => array('Text'), - 'id' => array('Text', 'fail'), - 'rights' => array('Text'), - 'subtitle' => array('Text'), - 'title' => array('Text', 'fail'), - 'updated' => array('Date', 'fail'), - 'link' => array('Link'), - 'generator' => array('Text'), - 'category' => array('Category')); - - /** - * Here we provide a few mappings for those very special circumstances in - * which it makes sense to map back to the RSS2 spec. Key is RSS2 version - * value is an array consisting of the equivalent in atom and any attributes - * needed to make the mapping. - * @var array - */ - protected $compatMap = array( - 'guid' => array('id'), - 'links' => array('link'), - 'tags' => array('category'), - 'contributors' => array('contributor')); - - /** - * Our constructor does nothing more than its parent. - * - * @param DOMDocument $xml A DOM object representing the feed - * @param bool (optional) $string Whether or not to validate this feed - */ - function __construct(DOMDocument $model, $strict = false) - { - $this->model = $model; - - if ($strict) { - if (! $this->model->relaxNGValidateSource($this->relax)) { - throw new XML_Feed_Parser_Exception('Failed required validation'); - } - } - - $this->xpath = new DOMXPath($this->model); - $this->xpath->registerNamespace('atom', 'http://www.w3.org/2005/Atom'); - $this->numberEntries = $this->count('entry'); - } - - /** - * Implement retrieval of an entry based on its ID for atom feeds. - * - * This function uses XPath to get the entry based on its ID. If DOMXPath::evaluate - * is available, we also use that to store a reference to the entry in the array - * used by getEntryByOffset so that method does not have to seek out the entry - * if it's requested that way. - * - * @param string $id any valid Atom ID. - * @return XML_Feed_Parser_AtomElement - */ - function getEntryById($id) - { - if (isset($this->idMappings[$id])) { - return $this->entries[$this->idMappings[$id]]; - } - - $entries = $this->xpath->query("//atom:entry[atom:id='$id']"); - - if ($entries->length > 0) { - $xmlBase = $entries->item(0)->baseURI; - $entry = new $this->itemClass($entries->item(0), $this, $xmlBase); - - if (in_array('evaluate', get_class_methods($this->xpath))) { - $offset = $this->xpath->evaluate("count(preceding-sibling::atom:entry)", $entries->item(0)); - $this->entries[$offset] = $entry; - } - - $this->idMappings[$id] = $entry; - - return $entry; - } - - } - - /** - * Retrieves data from a person construct. - * - * Get a person construct. We default to the 'name' element but allow - * access to any of the elements. - * - * @param string $method The name of the person construct we want - * @param array $arguments An array which we hope gives a 'param' - * @return string|false - */ - protected function getPerson($method, $arguments) - { - $offset = empty($arguments[0]) ? 0 : $arguments[0]; - $parameter = empty($arguments[1]['param']) ? 'name' : $arguments[1]['param']; - $section = $this->model->getElementsByTagName($method); - - if ($parameter == 'url') { - $parameter = 'uri'; - } - - if ($section->length <= $offset) { - return false; - } - - $param = $section->item($offset)->getElementsByTagName($parameter); - if ($param->length == 0) { - return false; - } - return $param->item(0)->nodeValue; - } - - /** - * Retrieves an element's content where that content is a text construct. - * - * Get a text construct. When calling this method, the two arguments - * allowed are 'offset' and 'attribute', so $parser->subtitle() would - * return the content of the element, while $parser->subtitle(false, 'type') - * would return the value of the type attribute. - * - * @todo Clarify overlap with getContent() - * @param string $method The name of the text construct we want - * @param array $arguments An array which we hope gives a 'param' - * @return string - */ - protected function getText($method, $arguments) - { - $offset = empty($arguments[0]) ? 0: $arguments[0]; - $attribute = empty($arguments[1]) ? false : $arguments[1]; - $tags = $this->model->getElementsByTagName($method); - - if ($tags->length <= $offset) { - return false; - } - - $content = $tags->item($offset); - - if (! $content->hasAttribute('type')) { - $content->setAttribute('type', 'text'); - } - $type = $content->getAttribute('type'); - - if (! empty($attribute) and - ! ($method == 'generator' and $attribute == 'name')) { - if ($content->hasAttribute($attribute)) { - return $content->getAttribute($attribute); - } else if ($attribute == 'href' and $content->hasAttribute('uri')) { - return $content->getAttribute('uri'); - } - return false; - } - - return $this->parseTextConstruct($content); - } - - /** - * Extract content appropriately from atom text constructs - * - * Because of different rules applied to the content element and other text - * constructs, they are deployed as separate functions, but they share quite - * a bit of processing. This method performs the core common process, which is - * to apply the rules for different mime types in order to extract the content. - * - * @param DOMNode $content the text construct node to be parsed - * @return String - * @author James Stewart - **/ - protected function parseTextConstruct(DOMNode $content) - { - if ($content->hasAttribute('type')) { - $type = $content->getAttribute('type'); - } else { - $type = 'text'; - } - - if (strpos($type, 'text/') === 0) { - $type = 'text'; - } - - switch ($type) { - case 'text': - case 'html': - return $content->textContent; - break; - case 'xhtml': - $container = $content->getElementsByTagName('div'); - if ($container->length == 0) { - return false; - } - $contents = $container->item(0); - if ($contents->hasChildNodes()) { - /* Iterate through, applying xml:base and store the result */ - $result = ''; - foreach ($contents->childNodes as $node) { - $result .= $this->traverseNode($node); - } - return $result; - } - break; - case preg_match('@^[a-zA-Z]+/[a-zA-Z+]*xml@i', $type) > 0: - return $content; - break; - case 'application/octet-stream': - default: - return base64_decode(trim($content->nodeValue)); - break; - } - return false; - } - /** - * Get a category from the entry. - * - * A feed or entry can have any number of categories. A category can have the - * attributes term, scheme and label. - * - * @param string $method The name of the text construct we want - * @param array $arguments An array which we hope gives a 'param' - * @return string - */ - function getCategory($method, $arguments) - { - $offset = empty($arguments[0]) ? 0: $arguments[0]; - $attribute = empty($arguments[1]) ? 'term' : $arguments[1]; - $categories = $this->model->getElementsByTagName('category'); - if ($categories->length <= $offset) { - $category = $categories->item($offset); - if ($category->hasAttribute($attribute)) { - return $category->getAttribute($attribute); - } - } - return false; - } - - /** - * This element must be present at least once with rel="feed". This element may be - * present any number of further times so long as there is no clash. If no 'rel' is - * present and we're asked for one, we follow the example of the Universal Feed - * Parser and presume 'alternate'. - * - * @param int $offset the position of the link within the container - * @param string $attribute the attribute name required - * @param array an array of attributes to search by - * @return string the value of the attribute - */ - function getLink($offset = 0, $attribute = 'href', $params = false) - { - if (is_array($params) and !empty($params)) { - $terms = array(); - $alt_predicate = ''; - $other_predicate = ''; - - foreach ($params as $key => $value) { - if ($key == 'rel' && $value == 'alternate') { - $alt_predicate = '[not(@rel) or @rel="alternate"]'; - } else { - $terms[] = "@$key='$value'"; - } - } - if (!empty($terms)) { - $other_predicate = '[' . join(' and ', $terms) . ']'; - } - $query = $this->xpathPrefix . 'atom:link' . $alt_predicate . $other_predicate; - $links = $this->xpath->query($query); - } else { - $links = $this->model->getElementsByTagName('link'); - } - if ($links->length > $offset) { - if ($links->item($offset)->hasAttribute($attribute)) { - $value = $links->item($offset)->getAttribute($attribute); - if ($attribute == 'href') { - $value = $this->addBase($value, $links->item($offset)); - } - return $value; - } else if ($attribute == 'rel') { - return 'alternate'; - } - } - return false; - } -} - -?> \ No newline at end of file diff --git a/plugins/OStatus/extlib/XML/Feed/Parser/AtomElement.php b/plugins/OStatus/extlib/XML/Feed/Parser/AtomElement.php deleted file mode 100755 index 063ecb6177..0000000000 --- a/plugins/OStatus/extlib/XML/Feed/Parser/AtomElement.php +++ /dev/null @@ -1,261 +0,0 @@ - - * @copyright 2005 James Stewart - * @license http://www.gnu.org/copyleft/lesser.html GNU LGPL 2.1 - * @version CVS: $Id: AtomElement.php,v 1.19 2007/03/26 12:43:11 jystewart Exp $ - * @link http://pear.php.net/package/XML_Feed_Parser/ - */ - -/** - * This class provides support for atom entries. It will usually be called by - * XML_Feed_Parser_Atom with which it shares many methods. - * - * @author James Stewart - * @version Release: 1.0.3 - * @package XML_Feed_Parser - */ -class XML_Feed_Parser_AtomElement extends XML_Feed_Parser_Atom -{ - /** - * This will be a reference to the parent object for when we want - * to use a 'fallback' rule - * @var XML_Feed_Parser_Atom - */ - protected $parent; - - /** - * When performing XPath queries we will use this prefix - * @var string - */ - private $xpathPrefix = ''; - - /** - * xml:base values inherited by the element - * @var string - */ - protected $xmlBase; - - /** - * Here we provide a few mappings for those very special circumstances in - * which it makes sense to map back to the RSS2 spec or to manage other - * compatibilities (eg. with the Univeral Feed Parser). Key is the other version's - * name for the command, value is an array consisting of the equivalent in our atom - * api and any attributes needed to make the mapping. - * @var array - */ - protected $compatMap = array( - 'guid' => array('id'), - 'links' => array('link'), - 'tags' => array('category'), - 'contributors' => array('contributor')); - - /** - * Our specific element map - * @var array - */ - protected $map = array( - 'author' => array('Person', 'fallback'), - 'contributor' => array('Person'), - 'id' => array('Text', 'fail'), - 'published' => array('Date'), - 'updated' => array('Date', 'fail'), - 'title' => array('Text', 'fail'), - 'rights' => array('Text', 'fallback'), - 'summary' => array('Text'), - 'content' => array('Content'), - 'link' => array('Link'), - 'enclosure' => array('Enclosure'), - 'category' => array('Category')); - - /** - * Store useful information for later. - * - * @param DOMElement $element - this item as a DOM element - * @param XML_Feed_Parser_Atom $parent - the feed of which this is a member - */ - function __construct(DOMElement $element, $parent, $xmlBase = '') - { - $this->model = $element; - $this->parent = $parent; - $this->xmlBase = $xmlBase; - $this->xpathPrefix = "//atom:entry[atom:id='" . $this->id . "']/"; - $this->xpath = $this->parent->xpath; - } - - /** - * Provides access to specific aspects of the author data for an atom entry - * - * Author data at the entry level is more complex than at the feed level. - * If atom:author is not present for the entry we need to look for it in - * an atom:source child of the atom:entry. If it's not there either, then - * we look to the parent for data. - * - * @param array - * @return string - */ - function getAuthor($arguments) - { - /* Find out which part of the author data we're looking for */ - if (isset($arguments['param'])) { - $parameter = $arguments['param']; - } else { - $parameter = 'name'; - } - - $test = $this->model->getElementsByTagName('author'); - if ($test->length > 0) { - $item = $test->item(0); - return $item->getElementsByTagName($parameter)->item(0)->nodeValue; - } - - $source = $this->model->getElementsByTagName('source'); - if ($source->length > 0) { - $test = $this->model->getElementsByTagName('author'); - if ($test->length > 0) { - $item = $test->item(0); - return $item->getElementsByTagName($parameter)->item(0)->nodeValue; - } - } - return $this->parent->getAuthor($arguments); - } - - /** - * Returns the content of the content element or info on a specific attribute - * - * This element may or may not be present. It cannot be present more than - * once. It may have a 'src' attribute, in which case there's no content - * If not present, then the entry must have link with rel="alternate". - * If there is content we return it, if not and there's a 'src' attribute - * we return the value of that instead. The method can take an 'attribute' - * argument, in which case we return the value of that attribute if present. - * eg. $item->content("type") will return the type of the content. It is - * recommended that all users check the type before getting the content to - * ensure that their script is capable of handling the type of returned data. - * (data carried in the content element can be either 'text', 'html', 'xhtml', - * or any standard MIME type). - * - * @return string|false - */ - protected function getContent($method, $arguments = array()) - { - $attribute = empty($arguments[0]) ? false : $arguments[0]; - $tags = $this->model->getElementsByTagName('content'); - - if ($tags->length == 0) { - return false; - } - - $content = $tags->item(0); - - if (! $content->hasAttribute('type')) { - $content->setAttribute('type', 'text'); - } - if (! empty($attribute)) { - return $content->getAttribute($attribute); - } - - $type = $content->getAttribute('type'); - - if (! empty($attribute)) { - if ($content->hasAttribute($attribute)) - { - return $content->getAttribute($attribute); - } - return false; - } - - if ($content->hasAttribute('src')) { - return $content->getAttribute('src'); - } - - return $this->parseTextConstruct($content); - } - - /** - * For compatibility, this method provides a mapping to access enclosures. - * - * The Atom spec doesn't provide for an enclosure element, but it is - * generally supported using the link element with rel='enclosure'. - * - * @param string $method - for compatibility with our __call usage - * @param array $arguments - for compatibility with our __call usage - * @return array|false - */ - function getEnclosure($method, $arguments = array()) - { - $offset = isset($arguments[0]) ? $arguments[0] : 0; - $query = "//atom:entry[atom:id='" . $this->getText('id', false) . - "']/atom:link[@rel='enclosure']"; - - $encs = $this->parent->xpath->query($query); - if ($encs->length > $offset) { - try { - if (! $encs->item($offset)->hasAttribute('href')) { - return false; - } - $attrs = $encs->item($offset)->attributes; - $length = $encs->item($offset)->hasAttribute('length') ? - $encs->item($offset)->getAttribute('length') : false; - return array( - 'url' => $attrs->getNamedItem('href')->value, - 'type' => $attrs->getNamedItem('type')->value, - 'length' => $length); - } catch (Exception $e) { - return false; - } - } - return false; - } - - /** - * Get details of this entry's source, if available/relevant - * - * Where an atom:entry is taken from another feed then the aggregator - * is supposed to include an atom:source element which replicates at least - * the atom:id, atom:title, and atom:updated metadata from the original - * feed. Atom:source therefore has a very similar structure to atom:feed - * and if we find it we will return it as an XML_Feed_Parser_Atom object. - * - * @return XML_Feed_Parser_Atom|false - */ - function getSource() - { - $test = $this->model->getElementsByTagName('source'); - if ($test->length == 0) { - return false; - } - $source = new XML_Feed_Parser_Atom($test->item(0)); - } - - /** - * Get the entry as an XML string - * - * Return an XML serialization of the feed, should it be required. Most - * users however, will already have a serialization that they used when - * instantiating the object. - * - * @return string XML serialization of element - */ - function __toString() - { - $simple = simplexml_import_dom($this->model); - return $simple->asXML(); - } -} - -?> \ No newline at end of file diff --git a/plugins/OStatus/extlib/XML/Feed/Parser/Exception.php b/plugins/OStatus/extlib/XML/Feed/Parser/Exception.php deleted file mode 100755 index 1e76e3f850..0000000000 --- a/plugins/OStatus/extlib/XML/Feed/Parser/Exception.php +++ /dev/null @@ -1,42 +0,0 @@ - - * @copyright 2005 James Stewart - * @license http://www.gnu.org/copyleft/lesser.html GNU LGPL - * @version CVS: $Id: Exception.php,v 1.3 2005/11/07 01:52:35 jystewart Exp $ - * @link http://pear.php.net/package/XML_Feed_Parser/ - */ - -/** - * We are extending PEAR_Exception - */ -require_once 'PEAR/Exception.php'; - -/** - * XML_Feed_Parser_Exception is a simple extension of PEAR_Exception, existing - * to help with identification of the source of exceptions. - * - * @author James Stewart - * @version Release: 1.0.3 - * @package XML_Feed_Parser - */ -class XML_Feed_Parser_Exception extends PEAR_Exception -{ - -} - -?> \ No newline at end of file diff --git a/plugins/OStatus/extlib/XML/Feed/Parser/RSS09.php b/plugins/OStatus/extlib/XML/Feed/Parser/RSS09.php deleted file mode 100755 index 07f38f911e..0000000000 --- a/plugins/OStatus/extlib/XML/Feed/Parser/RSS09.php +++ /dev/null @@ -1,214 +0,0 @@ - - * @copyright 2005 James Stewart - * @license http://www.gnu.org/copyleft/lesser.html GNU LGPL 2.1 - * @version CVS: $Id: RSS09.php,v 1.5 2006/07/26 21:18:46 jystewart Exp $ - * @link http://pear.php.net/package/XML_Feed_Parser/ - */ - -/** - * This class handles RSS0.9 feeds. - * - * @author James Stewart - * @version Release: 1.0.3 - * @package XML_Feed_Parser - * @todo Find a Relax NG URI we can use - */ -class XML_Feed_Parser_RSS09 extends XML_Feed_Parser_Type -{ - /** - * The URI of the RelaxNG schema used to (optionally) validate the feed - * @var string - */ - private $relax = ''; - - /** - * We're likely to use XPath, so let's keep it global - * @var DOMXPath - */ - protected $xpath; - - /** - * The feed type we are parsing - * @var string - */ - public $version = 'RSS 0.9'; - - /** - * The class used to represent individual items - * @var string - */ - protected $itemClass = 'XML_Feed_Parser_RSS09Element'; - - /** - * The element containing entries - * @var string - */ - protected $itemElement = 'item'; - - /** - * Here we map those elements we're not going to handle individually - * to the constructs they are. The optional second parameter in the array - * tells the parser whether to 'fall back' (not apt. at the feed level) or - * fail if the element is missing. If the parameter is not set, the function - * will simply return false and leave it to the client to decide what to do. - * @var array - */ - protected $map = array( - 'title' => array('Text'), - 'link' => array('Text'), - 'description' => array('Text'), - 'image' => array('Image'), - 'textinput' => array('TextInput')); - - /** - * Here we map some elements to their atom equivalents. This is going to be - * quite tricky to pull off effectively (and some users' methods may vary) - * but is worth trying. The key is the atom version, the value is RSS2. - * @var array - */ - protected $compatMap = array( - 'title' => array('title'), - 'link' => array('link'), - 'subtitle' => array('description')); - - /** - * We will be working with multiple namespaces and it is useful to - * keep them together - * @var array - */ - protected $namespaces = array( - 'rdf' => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'); - - /** - * Our constructor does nothing more than its parent. - * - * @todo RelaxNG validation - * @param DOMDocument $xml A DOM object representing the feed - * @param bool (optional) $string Whether or not to validate this feed - */ - function __construct(DOMDocument $model, $strict = false) - { - $this->model = $model; - - $this->xpath = new DOMXPath($model); - foreach ($this->namespaces as $key => $value) { - $this->xpath->registerNamespace($key, $value); - } - $this->numberEntries = $this->count('item'); - } - - /** - * Included for compatibility -- will not work with RSS 0.9 - * - * This is not something that will work with RSS0.9 as it does not have - * clear restrictions on the global uniqueness of IDs. - * - * @param string $id any valid ID. - * @return false - */ - function getEntryById($id) - { - return false; - } - - /** - * Get details of the image associated with the feed. - * - * @return array|false an array simply containing the child elements - */ - protected function getImage() - { - $images = $this->model->getElementsByTagName('image'); - if ($images->length > 0) { - $image = $images->item(0); - $details = array(); - if ($image->hasChildNodes()) { - $details = array( - 'title' => $image->getElementsByTagName('title')->item(0)->value, - 'link' => $image->getElementsByTagName('link')->item(0)->value, - 'url' => $image->getElementsByTagName('url')->item(0)->value); - } else { - $details = array('title' => false, - 'link' => false, - 'url' => $image->attributes->getNamedItem('resource')->nodeValue); - } - $details = array_merge($details, - array('description' => false, 'height' => false, 'width' => false)); - if (! empty($details)) { - return $details; - } - } - return false; - } - - /** - * The textinput element is little used, but in the interests of - * completeness we will support it. - * - * @return array|false - */ - protected function getTextInput() - { - $inputs = $this->model->getElementsByTagName('textinput'); - if ($inputs->length > 0) { - $input = $inputs->item(0); - $results = array(); - $results['title'] = isset( - $input->getElementsByTagName('title')->item(0)->value) ? - $input->getElementsByTagName('title')->item(0)->value : null; - $results['description'] = isset( - $input->getElementsByTagName('description')->item(0)->value) ? - $input->getElementsByTagName('description')->item(0)->value : null; - $results['name'] = isset( - $input->getElementsByTagName('name')->item(0)->value) ? - $input->getElementsByTagName('name')->item(0)->value : null; - $results['link'] = isset( - $input->getElementsByTagName('link')->item(0)->value) ? - $input->getElementsByTagName('link')->item(0)->value : null; - if (empty($results['link']) && - $input->attributes->getNamedItem('resource')) { - $results['link'] = $input->attributes->getNamedItem('resource')->nodeValue; - } - if (! empty($results)) { - return $results; - } - } - return false; - } - - /** - * Get details of a link from the feed. - * - * In RSS1 a link is a text element but in order to ensure that we resolve - * URLs properly we have a special function for them. - * - * @return string - */ - function getLink($offset = 0, $attribute = 'href', $params = false) - { - $links = $this->model->getElementsByTagName('link'); - if ($links->length <= $offset) { - return false; - } - $link = $links->item($offset); - return $this->addBase($link->nodeValue, $link); - } -} - -?> \ No newline at end of file diff --git a/plugins/OStatus/extlib/XML/Feed/Parser/RSS09Element.php b/plugins/OStatus/extlib/XML/Feed/Parser/RSS09Element.php deleted file mode 100755 index d41f36e8d6..0000000000 --- a/plugins/OStatus/extlib/XML/Feed/Parser/RSS09Element.php +++ /dev/null @@ -1,62 +0,0 @@ - - * @copyright 2005 James Stewart - * @license http://www.gnu.org/copyleft/lesser.html GNU LGPL 2.1 - * @version CVS: $Id: RSS09Element.php,v 1.4 2006/06/30 17:41:56 jystewart Exp $ - * @link http://pear.php.net/package/XML_Feed_Parser/ - */ - -/* - * This class provides support for RSS 0.9 entries. It will usually be called by - * XML_Feed_Parser_RSS09 with which it shares many methods. - * - * @author James Stewart - * @version Release: 1.0.3 - * @package XML_Feed_Parser - */ -class XML_Feed_Parser_RSS09Element extends XML_Feed_Parser_RSS09 -{ - /** - * This will be a reference to the parent object for when we want - * to use a 'fallback' rule - * @var XML_Feed_Parser_RSS09 - */ - protected $parent; - - /** - * Our specific element map - * @var array - */ - protected $map = array( - 'title' => array('Text'), - 'link' => array('Link')); - - /** - * Store useful information for later. - * - * @param DOMElement $element - this item as a DOM element - * @param XML_Feed_Parser_RSS1 $parent - the feed of which this is a member - */ - function __construct(DOMElement $element, $parent, $xmlBase = '') - { - $this->model = $element; - $this->parent = $parent; - } -} - -?> \ No newline at end of file diff --git a/plugins/OStatus/extlib/XML/Feed/Parser/RSS1.php b/plugins/OStatus/extlib/XML/Feed/Parser/RSS1.php deleted file mode 100755 index 60c9938baa..0000000000 --- a/plugins/OStatus/extlib/XML/Feed/Parser/RSS1.php +++ /dev/null @@ -1,277 +0,0 @@ - - * @copyright 2005 James Stewart - * @license http://www.gnu.org/copyleft/lesser.html GNU LGPL 2.1 - * @version CVS: $Id: RSS1.php,v 1.10 2006/07/27 13:52:05 jystewart Exp $ - * @link http://pear.php.net/package/XML_Feed_Parser/ - */ - -/** - * This class handles RSS1.0 feeds. - * - * @author James Stewart - * @version Release: 1.0.3 - * @package XML_Feed_Parser - * @todo Find a Relax NG URI we can use - */ -class XML_Feed_Parser_RSS1 extends XML_Feed_Parser_Type -{ - /** - * The URI of the RelaxNG schema used to (optionally) validate the feed - * @var string - */ - private $relax = 'rss10.rnc'; - - /** - * We're likely to use XPath, so let's keep it global - * @var DOMXPath - */ - protected $xpath; - - /** - * The feed type we are parsing - * @var string - */ - public $version = 'RSS 1.0'; - - /** - * The class used to represent individual items - * @var string - */ - protected $itemClass = 'XML_Feed_Parser_RSS1Element'; - - /** - * The element containing entries - * @var string - */ - protected $itemElement = 'item'; - - /** - * Here we map those elements we're not going to handle individually - * to the constructs they are. The optional second parameter in the array - * tells the parser whether to 'fall back' (not apt. at the feed level) or - * fail if the element is missing. If the parameter is not set, the function - * will simply return false and leave it to the client to decide what to do. - * @var array - */ - protected $map = array( - 'title' => array('Text'), - 'link' => array('Text'), - 'description' => array('Text'), - 'image' => array('Image'), - 'textinput' => array('TextInput'), - 'updatePeriod' => array('Text'), - 'updateFrequency' => array('Text'), - 'updateBase' => array('Date'), - 'rights' => array('Text'), # dc:rights - 'description' => array('Text'), # dc:description - 'creator' => array('Text'), # dc:creator - 'publisher' => array('Text'), # dc:publisher - 'contributor' => array('Text'), # dc:contributor - 'date' => array('Date') # dc:contributor - ); - - /** - * Here we map some elements to their atom equivalents. This is going to be - * quite tricky to pull off effectively (and some users' methods may vary) - * but is worth trying. The key is the atom version, the value is RSS2. - * @var array - */ - protected $compatMap = array( - 'title' => array('title'), - 'link' => array('link'), - 'subtitle' => array('description'), - 'author' => array('creator'), - 'updated' => array('date')); - - /** - * We will be working with multiple namespaces and it is useful to - * keep them together - * @var array - */ - protected $namespaces = array( - 'rdf' => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#', - 'rss' => 'http://purl.org/rss/1.0/', - 'dc' => 'http://purl.org/rss/1.0/modules/dc/', - 'content' => 'http://purl.org/rss/1.0/modules/content/', - 'sy' => 'http://web.resource.org/rss/1.0/modules/syndication/'); - - /** - * Our constructor does nothing more than its parent. - * - * @param DOMDocument $xml A DOM object representing the feed - * @param bool (optional) $string Whether or not to validate this feed - */ - function __construct(DOMDocument $model, $strict = false) - { - $this->model = $model; - if ($strict) { - $validate = $this->model->relaxNGValidate(self::getSchemaDir . - DIRECTORY_SEPARATOR . $this->relax); - if (! $validate) { - throw new XML_Feed_Parser_Exception('Failed required validation'); - } - } - - $this->xpath = new DOMXPath($model); - foreach ($this->namespaces as $key => $value) { - $this->xpath->registerNamespace($key, $value); - } - $this->numberEntries = $this->count('item'); - } - - /** - * Allows retrieval of an entry by ID where the rdf:about attribute is used - * - * This is not really something that will work with RSS1 as it does not have - * clear restrictions on the global uniqueness of IDs. We will employ the - * _very_ hit and miss method of selecting entries based on the rdf:about - * attribute. If DOMXPath::evaluate is available, we also use that to store - * a reference to the entry in the array used by getEntryByOffset so that - * method does not have to seek out the entry if it's requested that way. - * - * @param string $id any valid ID. - * @return XML_Feed_Parser_RSS1Element - */ - function getEntryById($id) - { - if (isset($this->idMappings[$id])) { - return $this->entries[$this->idMappings[$id]]; - } - - $entries = $this->xpath->query("//rss:item[@rdf:about='$id']"); - if ($entries->length > 0) { - $classname = $this->itemClass; - $entry = new $classname($entries->item(0), $this); - if (in_array('evaluate', get_class_methods($this->xpath))) { - $offset = $this->xpath->evaluate("count(preceding-sibling::rss:item)", $entries->item(0)); - $this->entries[$offset] = $entry; - } - $this->idMappings[$id] = $entry; - return $entry; - } - return false; - } - - /** - * Get details of the image associated with the feed. - * - * @return array|false an array simply containing the child elements - */ - protected function getImage() - { - $images = $this->model->getElementsByTagName('image'); - if ($images->length > 0) { - $image = $images->item(0); - $details = array(); - if ($image->hasChildNodes()) { - $details = array( - 'title' => $image->getElementsByTagName('title')->item(0)->value, - 'link' => $image->getElementsByTagName('link')->item(0)->value, - 'url' => $image->getElementsByTagName('url')->item(0)->value); - } else { - $details = array('title' => false, - 'link' => false, - 'url' => $image->attributes->getNamedItem('resource')->nodeValue); - } - $details = array_merge($details, array('description' => false, 'height' => false, 'width' => false)); - if (! empty($details)) { - return $details; - } - } - return false; - } - - /** - * The textinput element is little used, but in the interests of - * completeness we will support it. - * - * @return array|false - */ - protected function getTextInput() - { - $inputs = $this->model->getElementsByTagName('textinput'); - if ($inputs->length > 0) { - $input = $inputs->item(0); - $results = array(); - $results['title'] = isset( - $input->getElementsByTagName('title')->item(0)->value) ? - $input->getElementsByTagName('title')->item(0)->value : null; - $results['description'] = isset( - $input->getElementsByTagName('description')->item(0)->value) ? - $input->getElementsByTagName('description')->item(0)->value : null; - $results['name'] = isset( - $input->getElementsByTagName('name')->item(0)->value) ? - $input->getElementsByTagName('name')->item(0)->value : null; - $results['link'] = isset( - $input->getElementsByTagName('link')->item(0)->value) ? - $input->getElementsByTagName('link')->item(0)->value : null; - if (empty($results['link']) and - $input->attributes->getNamedItem('resource')) { - $results['link'] = - $input->attributes->getNamedItem('resource')->nodeValue; - } - if (! empty($results)) { - return $results; - } - } - return false; - } - - /** - * Employs various techniques to identify the author - * - * Dublin Core provides the dc:creator, dc:contributor, and dc:publisher - * elements for defining authorship in RSS1. We will try each of those in - * turn in order to simulate the atom author element and will return it - * as text. - * - * @return array|false - */ - function getAuthor() - { - $options = array('creator', 'contributor', 'publisher'); - foreach ($options as $element) { - $test = $this->model->getElementsByTagName($element); - if ($test->length > 0) { - return $test->item(0)->value; - } - } - return false; - } - - /** - * Retrieve a link - * - * In RSS1 a link is a text element but in order to ensure that we resolve - * URLs properly we have a special function for them. - * - * @return string - */ - function getLink($offset = 0, $attribute = 'href', $params = false) - { - $links = $this->model->getElementsByTagName('link'); - if ($links->length <= $offset) { - return false; - } - $link = $links->item($offset); - return $this->addBase($link->nodeValue, $link); - } -} - -?> \ No newline at end of file diff --git a/plugins/OStatus/extlib/XML/Feed/Parser/RSS11.php b/plugins/OStatus/extlib/XML/Feed/Parser/RSS11.php deleted file mode 100755 index 3cd1ef15d8..0000000000 --- a/plugins/OStatus/extlib/XML/Feed/Parser/RSS11.php +++ /dev/null @@ -1,276 +0,0 @@ - - * @copyright 2005 James Stewart - * @license http://www.gnu.org/copyleft/lesser.html GNU LGPL 2.1 - * @version CVS: $Id: RSS11.php,v 1.6 2006/07/27 13:52:05 jystewart Exp $ - * @link http://pear.php.net/package/XML_Feed_Parser/ - */ - -/** - * This class handles RSS1.1 feeds. RSS1.1 is documented at: - * http://inamidst.com/rss1.1/ - * - * @author James Stewart - * @version Release: 1.0.3 - * @package XML_Feed_Parser - * @todo Support for RDF:List - * @todo Ensure xml:lang is accessible to users - */ -class XML_Feed_Parser_RSS11 extends XML_Feed_Parser_Type -{ - /** - * The URI of the RelaxNG schema used to (optionally) validate the feed - * @var string - */ - private $relax = 'rss11.rnc'; - - /** - * We're likely to use XPath, so let's keep it global - * @var DOMXPath - */ - protected $xpath; - - /** - * The feed type we are parsing - * @var string - */ - public $version = 'RSS 1.0'; - - /** - * The class used to represent individual items - * @var string - */ - protected $itemClass = 'XML_Feed_Parser_RSS1Element'; - - /** - * The element containing entries - * @var string - */ - protected $itemElement = 'item'; - - /** - * Here we map those elements we're not going to handle individually - * to the constructs they are. The optional second parameter in the array - * tells the parser whether to 'fall back' (not apt. at the feed level) or - * fail if the element is missing. If the parameter is not set, the function - * will simply return false and leave it to the client to decide what to do. - * @var array - */ - protected $map = array( - 'title' => array('Text'), - 'link' => array('Text'), - 'description' => array('Text'), - 'image' => array('Image'), - 'updatePeriod' => array('Text'), - 'updateFrequency' => array('Text'), - 'updateBase' => array('Date'), - 'rights' => array('Text'), # dc:rights - 'description' => array('Text'), # dc:description - 'creator' => array('Text'), # dc:creator - 'publisher' => array('Text'), # dc:publisher - 'contributor' => array('Text'), # dc:contributor - 'date' => array('Date') # dc:contributor - ); - - /** - * Here we map some elements to their atom equivalents. This is going to be - * quite tricky to pull off effectively (and some users' methods may vary) - * but is worth trying. The key is the atom version, the value is RSS2. - * @var array - */ - protected $compatMap = array( - 'title' => array('title'), - 'link' => array('link'), - 'subtitle' => array('description'), - 'author' => array('creator'), - 'updated' => array('date')); - - /** - * We will be working with multiple namespaces and it is useful to - * keep them together. We will retain support for some common RSS1.0 modules - * @var array - */ - protected $namespaces = array( - 'rdf' => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#', - 'rss' => 'http://purl.org/net/rss1.1#', - 'dc' => 'http://purl.org/rss/1.0/modules/dc/', - 'content' => 'http://purl.org/rss/1.0/modules/content/', - 'sy' => 'http://web.resource.org/rss/1.0/modules/syndication/'); - - /** - * Our constructor does nothing more than its parent. - * - * @param DOMDocument $xml A DOM object representing the feed - * @param bool (optional) $string Whether or not to validate this feed - */ - function __construct(DOMDocument $model, $strict = false) - { - $this->model = $model; - - if ($strict) { - $validate = $this->model->relaxNGValidate(self::getSchemaDir . - DIRECTORY_SEPARATOR . $this->relax); - if (! $validate) { - throw new XML_Feed_Parser_Exception('Failed required validation'); - } - } - - $this->xpath = new DOMXPath($model); - foreach ($this->namespaces as $key => $value) { - $this->xpath->registerNamespace($key, $value); - } - $this->numberEntries = $this->count('item'); - } - - /** - * Attempts to identify an element by ID given by the rdf:about attribute - * - * This is not really something that will work with RSS1.1 as it does not have - * clear restrictions on the global uniqueness of IDs. We will employ the - * _very_ hit and miss method of selecting entries based on the rdf:about - * attribute. Please note that this is even more hit and miss with RSS1.1 than - * with RSS1.0 since RSS1.1 does not require the rdf:about attribute for items. - * - * @param string $id any valid ID. - * @return XML_Feed_Parser_RSS1Element - */ - function getEntryById($id) - { - if (isset($this->idMappings[$id])) { - return $this->entries[$this->idMappings[$id]]; - } - - $entries = $this->xpath->query("//rss:item[@rdf:about='$id']"); - if ($entries->length > 0) { - $classname = $this->itemClass; - $entry = new $classname($entries->item(0), $this); - return $entry; - } - return false; - } - - /** - * Get details of the image associated with the feed. - * - * @return array|false an array simply containing the child elements - */ - protected function getImage() - { - $images = $this->model->getElementsByTagName('image'); - if ($images->length > 0) { - $image = $images->item(0); - $details = array(); - if ($image->hasChildNodes()) { - $details = array( - 'title' => $image->getElementsByTagName('title')->item(0)->value, - 'url' => $image->getElementsByTagName('url')->item(0)->value); - if ($image->getElementsByTagName('link')->length > 0) { - $details['link'] = - $image->getElementsByTagName('link')->item(0)->value; - } - } else { - $details = array('title' => false, - 'link' => false, - 'url' => $image->attributes->getNamedItem('resource')->nodeValue); - } - $details = array_merge($details, - array('description' => false, 'height' => false, 'width' => false)); - if (! empty($details)) { - return $details; - } - } - return false; - } - - /** - * The textinput element is little used, but in the interests of - * completeness we will support it. - * - * @return array|false - */ - protected function getTextInput() - { - $inputs = $this->model->getElementsByTagName('textinput'); - if ($inputs->length > 0) { - $input = $inputs->item(0); - $results = array(); - $results['title'] = isset( - $input->getElementsByTagName('title')->item(0)->value) ? - $input->getElementsByTagName('title')->item(0)->value : null; - $results['description'] = isset( - $input->getElementsByTagName('description')->item(0)->value) ? - $input->getElementsByTagName('description')->item(0)->value : null; - $results['name'] = isset( - $input->getElementsByTagName('name')->item(0)->value) ? - $input->getElementsByTagName('name')->item(0)->value : null; - $results['link'] = isset( - $input->getElementsByTagName('link')->item(0)->value) ? - $input->getElementsByTagName('link')->item(0)->value : null; - if (empty($results['link']) and - $input->attributes->getNamedItem('resource')) { - $results['link'] = $input->attributes->getNamedItem('resource')->nodeValue; - } - if (! empty($results)) { - return $results; - } - } - return false; - } - - /** - * Attempts to discern authorship - * - * Dublin Core provides the dc:creator, dc:contributor, and dc:publisher - * elements for defining authorship in RSS1. We will try each of those in - * turn in order to simulate the atom author element and will return it - * as text. - * - * @return array|false - */ - function getAuthor() - { - $options = array('creator', 'contributor', 'publisher'); - foreach ($options as $element) { - $test = $this->model->getElementsByTagName($element); - if ($test->length > 0) { - return $test->item(0)->value; - } - } - return false; - } - - /** - * Retrieve a link - * - * In RSS1 a link is a text element but in order to ensure that we resolve - * URLs properly we have a special function for them. - * - * @return string - */ - function getLink($offset = 0, $attribute = 'href', $params = false) - { - $links = $this->model->getElementsByTagName('link'); - if ($links->length <= $offset) { - return false; - } - $link = $links->item($offset); - return $this->addBase($link->nodeValue, $link); - } -} - -?> \ No newline at end of file diff --git a/plugins/OStatus/extlib/XML/Feed/Parser/RSS11Element.php b/plugins/OStatus/extlib/XML/Feed/Parser/RSS11Element.php deleted file mode 100755 index 75918beda9..0000000000 --- a/plugins/OStatus/extlib/XML/Feed/Parser/RSS11Element.php +++ /dev/null @@ -1,151 +0,0 @@ - - * @copyright 2005 James Stewart - * @license http://www.gnu.org/copyleft/lesser.html GNU LGPL 2.1 - * @version CVS: $Id: RSS11Element.php,v 1.4 2006/06/30 17:41:56 jystewart Exp $ - * @link http://pear.php.net/package/XML_Feed_Parser/ - */ - -/* - * This class provides support for RSS 1.1 entries. It will usually be called by - * XML_Feed_Parser_RSS11 with which it shares many methods. - * - * @author James Stewart - * @version Release: 1.0.3 - * @package XML_Feed_Parser - */ -class XML_Feed_Parser_RSS11Element extends XML_Feed_Parser_RSS11 -{ - /** - * This will be a reference to the parent object for when we want - * to use a 'fallback' rule - * @var XML_Feed_Parser_RSS1 - */ - protected $parent; - - /** - * Our specific element map - * @var array - */ - protected $map = array( - 'id' => array('Id'), - 'title' => array('Text'), - 'link' => array('Link'), - 'description' => array('Text'), # or dc:description - 'category' => array('Category'), - 'rights' => array('Text'), # dc:rights - 'creator' => array('Text'), # dc:creator - 'publisher' => array('Text'), # dc:publisher - 'contributor' => array('Text'), # dc:contributor - 'date' => array('Date'), # dc:date - 'content' => array('Content') - ); - - /** - * Here we map some elements to their atom equivalents. This is going to be - * quite tricky to pull off effectively (and some users' methods may vary) - * but is worth trying. The key is the atom version, the value is RSS1. - * @var array - */ - protected $compatMap = array( - 'content' => array('content'), - 'updated' => array('lastBuildDate'), - 'published' => array('pubdate'), - 'subtitle' => array('description'), - 'updated' => array('date'), - 'author' => array('creator'), - 'contributor' => array('contributor') - ); - - /** - * Store useful information for later. - * - * @param DOMElement $element - this item as a DOM element - * @param XML_Feed_Parser_RSS1 $parent - the feed of which this is a member - */ - function __construct(DOMElement $element, $parent, $xmlBase = '') - { - $this->model = $element; - $this->parent = $parent; - } - - /** - * If an rdf:about attribute is specified, return that as an ID - * - * There is no established way of showing an ID for an RSS1 entry. We will - * simulate it using the rdf:about attribute of the entry element. This cannot - * be relied upon for unique IDs but may prove useful. - * - * @return string|false - */ - function getId() - { - if ($this->model->attributes->getNamedItem('about')) { - return $this->model->attributes->getNamedItem('about')->nodeValue; - } - return false; - } - - /** - * Return the entry's content - * - * The official way to include full content in an RSS1 entry is to use - * the content module's element 'encoded'. Often, however, the 'description' - * element is used instead. We will offer that as a fallback. - * - * @return string|false - */ - function getContent() - { - $options = array('encoded', 'description'); - foreach ($options as $element) { - $test = $this->model->getElementsByTagName($element); - if ($test->length == 0) { - continue; - } - if ($test->item(0)->hasChildNodes()) { - $value = ''; - foreach ($test->item(0)->childNodes as $child) { - if ($child instanceof DOMText) { - $value .= $child->nodeValue; - } else { - $simple = simplexml_import_dom($child); - $value .= $simple->asXML(); - } - } - return $value; - } else if ($test->length > 0) { - return $test->item(0)->nodeValue; - } - } - return false; - } - - /** - * How RSS1.1 should support for enclosures is not clear. For now we will return - * false. - * - * @return false - */ - function getEnclosure() - { - return false; - } -} - -?> \ No newline at end of file diff --git a/plugins/OStatus/extlib/XML/Feed/Parser/RSS1Element.php b/plugins/OStatus/extlib/XML/Feed/Parser/RSS1Element.php deleted file mode 100755 index 8e36d5a9b9..0000000000 --- a/plugins/OStatus/extlib/XML/Feed/Parser/RSS1Element.php +++ /dev/null @@ -1,116 +0,0 @@ - - * @copyright 2005 James Stewart - * @license http://www.gnu.org/copyleft/lesser.html GNU LGPL 2.1 - * @version CVS: $Id: RSS1Element.php,v 1.6 2006/06/30 17:41:56 jystewart Exp $ - * @link http://pear.php.net/package/XML_Feed_Parser/ - */ - -/* - * This class provides support for RSS 1.0 entries. It will usually be called by - * XML_Feed_Parser_RSS1 with which it shares many methods. - * - * @author James Stewart - * @version Release: 1.0.3 - * @package XML_Feed_Parser - */ -class XML_Feed_Parser_RSS1Element extends XML_Feed_Parser_RSS1 -{ - /** - * This will be a reference to the parent object for when we want - * to use a 'fallback' rule - * @var XML_Feed_Parser_RSS1 - */ - protected $parent; - - /** - * Our specific element map - * @var array - */ - protected $map = array( - 'id' => array('Id'), - 'title' => array('Text'), - 'link' => array('Link'), - 'description' => array('Text'), # or dc:description - 'category' => array('Category'), - 'rights' => array('Text'), # dc:rights - 'creator' => array('Text'), # dc:creator - 'publisher' => array('Text'), # dc:publisher - 'contributor' => array('Text'), # dc:contributor - 'date' => array('Date'), # dc:date - 'content' => array('Content') - ); - - /** - * Here we map some elements to their atom equivalents. This is going to be - * quite tricky to pull off effectively (and some users' methods may vary) - * but is worth trying. The key is the atom version, the value is RSS1. - * @var array - */ - protected $compatMap = array( - 'content' => array('content'), - 'updated' => array('lastBuildDate'), - 'published' => array('pubdate'), - 'subtitle' => array('description'), - 'updated' => array('date'), - 'author' => array('creator'), - 'contributor' => array('contributor') - ); - - /** - * Store useful information for later. - * - * @param DOMElement $element - this item as a DOM element - * @param XML_Feed_Parser_RSS1 $parent - the feed of which this is a member - */ - function __construct(DOMElement $element, $parent, $xmlBase = '') - { - $this->model = $element; - $this->parent = $parent; - } - - /** - * If an rdf:about attribute is specified, return it as an ID - * - * There is no established way of showing an ID for an RSS1 entry. We will - * simulate it using the rdf:about attribute of the entry element. This cannot - * be relied upon for unique IDs but may prove useful. - * - * @return string|false - */ - function getId() - { - if ($this->model->attributes->getNamedItem('about')) { - return $this->model->attributes->getNamedItem('about')->nodeValue; - } - return false; - } - - /** - * How RSS1 should support for enclosures is not clear. For now we will return - * false. - * - * @return false - */ - function getEnclosure() - { - return false; - } -} - -?> \ No newline at end of file diff --git a/plugins/OStatus/extlib/XML/Feed/Parser/RSS2.php b/plugins/OStatus/extlib/XML/Feed/Parser/RSS2.php deleted file mode 100644 index 0936bd2f5e..0000000000 --- a/plugins/OStatus/extlib/XML/Feed/Parser/RSS2.php +++ /dev/null @@ -1,335 +0,0 @@ - - * @copyright 2005 James Stewart - * @license http://www.gnu.org/copyleft/lesser.html GNU LGPL 2.1 - * @version CVS: $Id: RSS2.php,v 1.12 2008/03/08 18:16:45 jystewart Exp $ - * @link http://pear.php.net/package/XML_Feed_Parser/ - */ - -/** - * This class handles RSS2 feeds. - * - * @author James Stewart - * @version Release: 1.0.3 - * @package XML_Feed_Parser - */ -class XML_Feed_Parser_RSS2 extends XML_Feed_Parser_Type -{ - /** - * The URI of the RelaxNG schema used to (optionally) validate the feed - * @var string - */ - private $relax = 'rss20.rnc'; - - /** - * We're likely to use XPath, so let's keep it global - * @var DOMXPath - */ - protected $xpath; - - /** - * The feed type we are parsing - * @var string - */ - public $version = 'RSS 2.0'; - - /** - * The class used to represent individual items - * @var string - */ - protected $itemClass = 'XML_Feed_Parser_RSS2Element'; - - /** - * The element containing entries - * @var string - */ - protected $itemElement = 'item'; - - /** - * Here we map those elements we're not going to handle individually - * to the constructs they are. The optional second parameter in the array - * tells the parser whether to 'fall back' (not apt. at the feed level) or - * fail if the element is missing. If the parameter is not set, the function - * will simply return false and leave it to the client to decide what to do. - * @var array - */ - protected $map = array( - 'ttl' => array('Text'), - 'pubDate' => array('Date'), - 'lastBuildDate' => array('Date'), - 'title' => array('Text'), - 'link' => array('Link'), - 'description' => array('Text'), - 'language' => array('Text'), - 'copyright' => array('Text'), - 'managingEditor' => array('Text'), - 'webMaster' => array('Text'), - 'category' => array('Text'), - 'generator' => array('Text'), - 'docs' => array('Text'), - 'ttl' => array('Text'), - 'image' => array('Image'), - 'skipDays' => array('skipDays'), - 'skipHours' => array('skipHours')); - - /** - * Here we map some elements to their atom equivalents. This is going to be - * quite tricky to pull off effectively (and some users' methods may vary) - * but is worth trying. The key is the atom version, the value is RSS2. - * @var array - */ - protected $compatMap = array( - 'title' => array('title'), - 'rights' => array('copyright'), - 'updated' => array('lastBuildDate'), - 'subtitle' => array('description'), - 'date' => array('pubDate'), - 'author' => array('managingEditor')); - - protected $namespaces = array( - 'dc' => 'http://purl.org/rss/1.0/modules/dc/', - 'content' => 'http://purl.org/rss/1.0/modules/content/'); - - /** - * Our constructor does nothing more than its parent. - * - * @param DOMDocument $xml A DOM object representing the feed - * @param bool (optional) $string Whether or not to validate this feed - */ - function __construct(DOMDocument $model, $strict = false) - { - $this->model = $model; - - if ($strict) { - if (! $this->model->relaxNGValidate($this->relax)) { - throw new XML_Feed_Parser_Exception('Failed required validation'); - } - } - - $this->xpath = new DOMXPath($this->model); - foreach ($this->namespaces as $key => $value) { - $this->xpath->registerNamespace($key, $value); - } - $this->numberEntries = $this->count('item'); - } - - /** - * Retrieves an entry by ID, if the ID is specified with the guid element - * - * This is not really something that will work with RSS2 as it does not have - * clear restrictions on the global uniqueness of IDs. But we can emulate - * it by allowing access based on the 'guid' element. If DOMXPath::evaluate - * is available, we also use that to store a reference to the entry in the array - * used by getEntryByOffset so that method does not have to seek out the entry - * if it's requested that way. - * - * @param string $id any valid ID. - * @return XML_Feed_Parser_RSS2Element - */ - function getEntryById($id) - { - if (isset($this->idMappings[$id])) { - return $this->entries[$this->idMappings[$id]]; - } - - $entries = $this->xpath->query("//item[guid='$id']"); - if ($entries->length > 0) { - $entry = new $this->itemElement($entries->item(0), $this); - if (in_array('evaluate', get_class_methods($this->xpath))) { - $offset = $this->xpath->evaluate("count(preceding-sibling::item)", $entries->item(0)); - $this->entries[$offset] = $entry; - } - $this->idMappings[$id] = $entry; - return $entry; - } - } - - /** - * Get a category from the element - * - * The category element is a simple text construct which can occur any number - * of times. We allow access by offset or access to an array of results. - * - * @param string $call for compatibility with our overloading - * @param array $arguments - arg 0 is the offset, arg 1 is whether to return as array - * @return string|array|false - */ - function getCategory($call, $arguments = array()) - { - $categories = $this->model->getElementsByTagName('category'); - $offset = empty($arguments[0]) ? 0 : $arguments[0]; - $array = empty($arguments[1]) ? false : true; - if ($categories->length <= $offset) { - return false; - } - if ($array) { - $list = array(); - foreach ($categories as $category) { - array_push($list, $category->nodeValue); - } - return $list; - } - return $categories->item($offset)->nodeValue; - } - - /** - * Get details of the image associated with the feed. - * - * @return array|false an array simply containing the child elements - */ - protected function getImage() - { - $images = $this->xpath->query("//image"); - if ($images->length > 0) { - $image = $images->item(0); - $desc = $image->getElementsByTagName('description'); - $description = $desc->length ? $desc->item(0)->nodeValue : false; - $heigh = $image->getElementsByTagName('height'); - $height = $heigh->length ? $heigh->item(0)->nodeValue : false; - $widt = $image->getElementsByTagName('width'); - $width = $widt->length ? $widt->item(0)->nodeValue : false; - return array( - 'title' => $image->getElementsByTagName('title')->item(0)->nodeValue, - 'link' => $image->getElementsByTagName('link')->item(0)->nodeValue, - 'url' => $image->getElementsByTagName('url')->item(0)->nodeValue, - 'description' => $description, - 'height' => $height, - 'width' => $width); - } - return false; - } - - /** - * The textinput element is little used, but in the interests of - * completeness... - * - * @return array|false - */ - function getTextInput() - { - $inputs = $this->model->getElementsByTagName('input'); - if ($inputs->length > 0) { - $input = $inputs->item(0); - return array( - 'title' => $input->getElementsByTagName('title')->item(0)->value, - 'description' => - $input->getElementsByTagName('description')->item(0)->value, - 'name' => $input->getElementsByTagName('name')->item(0)->value, - 'link' => $input->getElementsByTagName('link')->item(0)->value); - } - return false; - } - - /** - * Utility function for getSkipDays and getSkipHours - * - * This is a general function used by both getSkipDays and getSkipHours. It simply - * returns an array of the values of the children of the appropriate tag. - * - * @param string $tagName The tag name (getSkipDays or getSkipHours) - * @return array|false - */ - protected function getSkips($tagName) - { - $hours = $this->model->getElementsByTagName($tagName); - if ($hours->length == 0) { - return false; - } - $skipHours = array(); - foreach($hours->item(0)->childNodes as $hour) { - if ($hour instanceof DOMElement) { - array_push($skipHours, $hour->nodeValue); - } - } - return $skipHours; - } - - /** - * Retrieve skipHours data - * - * The skiphours element provides a list of hours on which this feed should - * not be checked. We return an array of those hours (integers, 24 hour clock) - * - * @return array - */ - function getSkipHours() - { - return $this->getSkips('skipHours'); - } - - /** - * Retrieve skipDays data - * - * The skipdays element provides a list of days on which this feed should - * not be checked. We return an array of those days. - * - * @return array - */ - function getSkipDays() - { - return $this->getSkips('skipDays'); - } - - /** - * Return content of the little-used 'cloud' element - * - * The cloud element is rarely used. It is designed to provide some details - * of a location to update the feed. - * - * @return array an array of the attributes of the element - */ - function getCloud() - { - $cloud = $this->model->getElementsByTagName('cloud'); - if ($cloud->length == 0) { - return false; - } - $cloudData = array(); - foreach ($cloud->item(0)->attributes as $attribute) { - $cloudData[$attribute->name] = $attribute->value; - } - return $cloudData; - } - - /** - * Get link URL - * - * In RSS2 a link is a text element but in order to ensure that we resolve - * URLs properly we have a special function for them. We maintain the - * parameter used by the atom getLink method, though we only use the offset - * parameter. - * - * @param int $offset The position of the link within the feed. Starts from 0 - * @param string $attribute The attribute of the link element required - * @param array $params An array of other parameters. Not used. - * @return string - */ - function getLink($offset, $attribute = 'href', $params = array()) - { - $xPath = new DOMXPath($this->model); - $links = $xPath->query('//link'); - - if ($links->length <= $offset) { - return false; - } - $link = $links->item($offset); - return $this->addBase($link->nodeValue, $link); - } -} - -?> \ No newline at end of file diff --git a/plugins/OStatus/extlib/XML/Feed/Parser/RSS2Element.php b/plugins/OStatus/extlib/XML/Feed/Parser/RSS2Element.php deleted file mode 100755 index 6edf910dc4..0000000000 --- a/plugins/OStatus/extlib/XML/Feed/Parser/RSS2Element.php +++ /dev/null @@ -1,171 +0,0 @@ - - * @copyright 2005 James Stewart - * @license http://www.gnu.org/copyleft/lesser.html GNU LGPL 2.1 - * @version CVS: $Id: RSS2Element.php,v 1.11 2006/07/26 21:18:47 jystewart Exp $ - * @link http://pear.php.net/package/XML_Feed_Parser/ - */ - -/** - * This class provides support for RSS 2.0 entries. It will usually be - * called by XML_Feed_Parser_RSS2 with which it shares many methods. - * - * @author James Stewart - * @version Release: 1.0.3 - * @package XML_Feed_Parser - */ -class XML_Feed_Parser_RSS2Element extends XML_Feed_Parser_RSS2 -{ - /** - * This will be a reference to the parent object for when we want - * to use a 'fallback' rule - * @var XML_Feed_Parser_RSS2 - */ - protected $parent; - - /** - * Our specific element map - * @var array - */ - protected $map = array( - 'title' => array('Text'), - 'guid' => array('Guid'), - 'description' => array('Text'), - 'author' => array('Text'), - 'comments' => array('Text'), - 'enclosure' => array('Enclosure'), - 'pubDate' => array('Date'), - 'source' => array('Source'), - 'link' => array('Text'), - 'content' => array('Content')); - - /** - * Here we map some elements to their atom equivalents. This is going to be - * quite tricky to pull off effectively (and some users' methods may vary) - * but is worth trying. The key is the atom version, the value is RSS2. - * @var array - */ - protected $compatMap = array( - 'id' => array('guid'), - 'updated' => array('lastBuildDate'), - 'published' => array('pubdate'), - 'guidislink' => array('guid', 'ispermalink'), - 'summary' => array('description')); - - /** - * Store useful information for later. - * - * @param DOMElement $element - this item as a DOM element - * @param XML_Feed_Parser_RSS2 $parent - the feed of which this is a member - */ - function __construct(DOMElement $element, $parent, $xmlBase = '') - { - $this->model = $element; - $this->parent = $parent; - } - - /** - * Get the value of the guid element, if specified - * - * guid is the closest RSS2 has to atom's ID. It is usually but not always a - * URI. The one attribute that RSS2 can posess is 'ispermalink' which specifies - * whether the guid is itself dereferencable. Use of guid is not obligatory, - * but is advisable. To get the guid you would call $item->id() (for atom - * compatibility) or $item->guid(). To check if this guid is a permalink call - * $item->guid("ispermalink"). - * - * @param string $method - the method name being called - * @param array $params - parameters required - * @return string the guid or value of ispermalink - */ - protected function getGuid($method, $params) - { - $attribute = (isset($params[0]) and $params[0] == 'ispermalink') ? - true : false; - $tag = $this->model->getElementsByTagName('guid'); - if ($tag->length > 0) { - if ($attribute) { - if ($tag->hasAttribute("ispermalink")) { - return $tag->getAttribute("ispermalink"); - } - } - return $tag->item(0)->nodeValue; - } - return false; - } - - /** - * Access details of file enclosures - * - * The RSS2 spec is ambiguous as to whether an enclosure element must be - * unique in a given entry. For now we will assume it needn't, and allow - * for an offset. - * - * @param string $method - the method being called - * @param array $parameters - we expect the first of these to be our offset - * @return array|false - */ - protected function getEnclosure($method, $parameters) - { - $encs = $this->model->getElementsByTagName('enclosure'); - $offset = isset($parameters[0]) ? $parameters[0] : 0; - if ($encs->length > $offset) { - try { - if (! $encs->item($offset)->hasAttribute('url')) { - return false; - } - $attrs = $encs->item($offset)->attributes; - return array( - 'url' => $attrs->getNamedItem('url')->value, - 'length' => $attrs->getNamedItem('length')->value, - 'type' => $attrs->getNamedItem('type')->value); - } catch (Exception $e) { - return false; - } - } - return false; - } - - /** - * Get the entry source if specified - * - * source is an optional sub-element of item. Like atom:source it tells - * us about where the entry came from (eg. if it's been copied from another - * feed). It is not a rich source of metadata in the same way as atom:source - * and while it would be good to maintain compatibility by returning an - * XML_Feed_Parser_RSS2 element, it makes a lot more sense to return an array. - * - * @return array|false - */ - protected function getSource() - { - $get = $this->model->getElementsByTagName('source'); - if ($get->length) { - $source = $get->item(0); - $array = array( - 'content' => $source->nodeValue); - foreach ($source->attributes as $attribute) { - $array[$attribute->name] = $attribute->value; - } - return $array; - } - return false; - } -} - -?> \ No newline at end of file diff --git a/plugins/OStatus/extlib/XML/Feed/Parser/Type.php b/plugins/OStatus/extlib/XML/Feed/Parser/Type.php deleted file mode 100644 index 75052619bd..0000000000 --- a/plugins/OStatus/extlib/XML/Feed/Parser/Type.php +++ /dev/null @@ -1,467 +0,0 @@ - - * @copyright 2005 James Stewart - * @license http://www.gnu.org/copyleft/lesser.html GNU LGPL 2.1 - * @version CVS: $Id: Type.php,v 1.25 2008/03/08 18:39:09 jystewart Exp $ - * @link http://pear.php.net/package/XML_Feed_Parser/ - */ - -/** - * This abstract class provides some general methods that are likely to be - * implemented exactly the same way for all feed types. - * - * @package XML_Feed_Parser - * @author James Stewart - * @version Release: 1.0.3 - */ -abstract class XML_Feed_Parser_Type -{ - /** - * Where we store our DOM object for this feed - * @var DOMDocument - */ - public $model; - - /** - * For iteration we'll want a count of the number of entries - * @var int - */ - public $numberEntries; - - /** - * Where we store our entry objects once instantiated - * @var array - */ - public $entries = array(); - - /** - * Store mappings between entry IDs and their position in the feed - */ - public $idMappings = array(); - - /** - * Proxy to allow use of element names as method names - * - * We are not going to provide methods for every entry type so this - * function will allow for a lot of mapping. We rely pretty heavily - * on this to handle our mappings between other feed types and atom. - * - * @param string $call - the method attempted - * @param array $arguments - arguments to that method - * @return mixed - */ - function __call($call, $arguments = array()) - { - if (! is_array($arguments)) { - $arguments = array(); - } - - if (isset($this->compatMap[$call])) { - $tempMap = $this->compatMap; - $tempcall = array_pop($tempMap[$call]); - if (! empty($tempMap)) { - $arguments = array_merge($arguments, $tempMap[$call]); - } - $call = $tempcall; - } - - /* To be helpful, we allow a case-insensitive search for this method */ - if (! isset($this->map[$call])) { - foreach (array_keys($this->map) as $key) { - if (strtoupper($key) == strtoupper($call)) { - $call = $key; - break; - } - } - } - - if (empty($this->map[$call])) { - return false; - } - - $method = 'get' . $this->map[$call][0]; - if ($method == 'getLink') { - $offset = empty($arguments[0]) ? 0 : $arguments[0]; - $attribute = empty($arguments[1]) ? 'href' : $arguments[1]; - $params = isset($arguments[2]) ? $arguments[2] : array(); - return $this->getLink($offset, $attribute, $params); - } - if (method_exists($this, $method)) { - return $this->$method($call, $arguments); - } - - return false; - } - - /** - * Proxy to allow use of element names as attribute names - * - * For many elements variable-style access will be desirable. This function - * provides for that. - * - * @param string $value - the variable required - * @return mixed - */ - function __get($value) - { - return $this->__call($value, array()); - } - - /** - * Utility function to help us resolve xml:base values - * - * We have other methods which will traverse the DOM and work out the different - * xml:base declarations we need to be aware of. We then need to combine them. - * If a declaration starts with a protocol then we restart the string. If it - * starts with a / then we add on to the domain name. Otherwise we simply tag - * it on to the end. - * - * @param string $base - the base to add the link to - * @param string $link - */ - function combineBases($base, $link) - { - if (preg_match('/^[A-Za-z]+:\/\//', $link)) { - return $link; - } else if (preg_match('/^\//', $link)) { - /* Extract domain and suffix link to that */ - preg_match('/^([A-Za-z]+:\/\/.*)?\/*/', $base, $results); - $firstLayer = $results[0]; - return $firstLayer . "/" . $link; - } else if (preg_match('/^\.\.\//', $base)) { - /* Step up link to find place to be */ - preg_match('/^((\.\.\/)+)(.*)$/', $link, $bases); - $suffix = $bases[3]; - $count = preg_match_all('/\.\.\//', $bases[1], $steps); - $url = explode("/", $base); - for ($i = 0; $i <= $count; $i++) { - array_pop($url); - } - return implode("/", $url) . "/" . $suffix; - } else if (preg_match('/^(?!\/$)/', $base)) { - $base = preg_replace('/(.*\/).*$/', '$1', $base) ; - return $base . $link; - } else { - /* Just stick it on the end */ - return $base . $link; - } - } - - /** - * Determine whether we need to apply our xml:base rules - * - * Gets us the xml:base data and then processes that with regard - * to our current link. - * - * @param string - * @param DOMElement - * @return string - */ - function addBase($link, $element) - { - if (preg_match('/^[A-Za-z]+:\/\//', $link)) { - return $link; - } - - return $this->combineBases($element->baseURI, $link); - } - - /** - * Get an entry by its position in the feed, starting from zero - * - * As well as allowing the items to be iterated over we want to allow - * users to be able to access a specific entry. This is one of two ways of - * doing that, the other being by ID. - * - * @param int $offset - * @return XML_Feed_Parser_RSS1Element - */ - function getEntryByOffset($offset) - { - if (! isset($this->entries[$offset])) { - $entries = $this->model->getElementsByTagName($this->itemElement); - if ($entries->length > $offset) { - $xmlBase = $entries->item($offset)->baseURI; - $this->entries[$offset] = new $this->itemClass( - $entries->item($offset), $this, $xmlBase); - if ($id = $this->entries[$offset]->id) { - $this->idMappings[$id] = $this->entries[$offset]; - } - } else { - throw new XML_Feed_Parser_Exception('No entries found'); - } - } - - return $this->entries[$offset]; - } - - /** - * Return a date in seconds since epoch. - * - * Get a date construct. We use PHP's strtotime to return it as a unix datetime, which - * is the number of seconds since 1970-01-01 00:00:00. - * - * @link http://php.net/strtotime - * @param string $method The name of the date construct we want - * @param array $arguments Included for compatibility with our __call usage - * @return int|false datetime - */ - protected function getDate($method, $arguments) - { - $time = $this->model->getElementsByTagName($method); - if ($time->length == 0 || empty($time->item(0)->nodeValue)) { - return false; - } - return strtotime($time->item(0)->nodeValue); - } - - /** - * Get a text construct. - * - * @param string $method The name of the text construct we want - * @param array $arguments Included for compatibility with our __call usage - * @return string - */ - protected function getText($method, $arguments = array()) - { - $tags = $this->model->getElementsByTagName($method); - if ($tags->length > 0) { - $value = $tags->item(0)->nodeValue; - return $value; - } - return false; - } - - /** - * Apply various rules to retrieve category data. - * - * There is no single way of declaring a category in RSS1/1.1 as there is in RSS2 - * and Atom. Instead the usual approach is to use the dublin core namespace to - * declare categories. For example delicious use both: - * PEAR and: - * - * to declare a categorisation of 'PEAR'. - * - * We need to be sensitive to this where possible. - * - * @param string $call for compatibility with our overloading - * @param array $arguments - arg 0 is the offset, arg 1 is whether to return as array - * @return string|array|false - */ - protected function getCategory($call, $arguments) - { - $categories = $this->model->getElementsByTagName('subject'); - $offset = empty($arguments[0]) ? 0 : $arguments[0]; - $array = empty($arguments[1]) ? false : true; - if ($categories->length <= $offset) { - return false; - } - if ($array) { - $list = array(); - foreach ($categories as $category) { - array_push($list, $category->nodeValue); - } - return $list; - } - return $categories->item($offset)->nodeValue; - } - - /** - * Count occurrences of an element - * - * This function will tell us how many times the element $type - * appears at this level of the feed. - * - * @param string $type the element we want to get a count of - * @return int - */ - protected function count($type) - { - if ($tags = $this->model->getElementsByTagName($type)) { - return $tags->length; - } - return 0; - } - - /** - * Part of our xml:base processing code - * - * We need a couple of methods to access XHTML content stored in feeds. - * This is because we dereference all xml:base references before returning - * the element. This method handles the attributes. - * - * @param DOMElement $node The DOM node we are iterating over - * @return string - */ - function processXHTMLAttributes($node) { - $return = ''; - foreach ($node->attributes as $attribute) { - if ($attribute->name == 'src' or $attribute->name == 'href') { - $attribute->value = $this->addBase(htmlentities($attribute->value, NULL, 'utf-8'), $attribute); - } - if ($attribute->name == 'base') { - continue; - } - $return .= $attribute->name . '="' . htmlentities($attribute->value, NULL, 'utf-8') .'" '; - } - if (! empty($return)) { - return ' ' . trim($return); - } - return ''; - } - - /** - * Convert HTML entities based on the current character set. - * - * @param String - * @return String - */ - function processEntitiesForNodeValue($node) - { - if (function_exists('iconv')) { - $current_encoding = $node->ownerDocument->encoding; - $value = iconv($current_encoding, 'UTF-8', $node->nodeValue); - } else if ($current_encoding == 'iso-8859-1') { - $value = utf8_encode($node->nodeValue); - } else { - $value = $node->nodeValue; - } - - $decoded = html_entity_decode($value, NULL, 'UTF-8'); - return htmlentities($decoded, NULL, 'UTF-8'); - } - - /** - * Part of our xml:base processing code - * - * We need a couple of methods to access XHTML content stored in feeds. - * This is because we dereference all xml:base references before returning - * the element. This method recurs through the tree descending from the node - * and builds our string. - * - * @param DOMElement $node The DOM node we are processing - * @return string - */ - function traverseNode($node) - { - $content = ''; - - /* Add the opening of this node to the content */ - if ($node instanceof DOMElement) { - $content .= '<' . $node->tagName . - $this->processXHTMLAttributes($node) . '>'; - } - - /* Process children */ - if ($node->hasChildNodes()) { - foreach ($node->childNodes as $child) { - $content .= $this->traverseNode($child); - } - } - - if ($node instanceof DOMText) { - $content .= $this->processEntitiesForNodeValue($node); - } - - /* Add the closing of this node to the content */ - if ($node instanceof DOMElement) { - $content .= 'tagName . '>'; - } - - return $content; - } - - /** - * Get content from RSS feeds (atom has its own implementation) - * - * The official way to include full content in an RSS1 entry is to use - * the content module's element 'encoded', and RSS2 feeds often duplicate that. - * Often, however, the 'description' element is used instead. We will offer that - * as a fallback. Atom uses its own approach and overrides this method. - * - * @return string|false - */ - protected function getContent() - { - $options = array('encoded', 'description'); - foreach ($options as $element) { - $test = $this->model->getElementsByTagName($element); - if ($test->length == 0) { - continue; - } - if ($test->item(0)->hasChildNodes()) { - $value = ''; - foreach ($test->item(0)->childNodes as $child) { - if ($child instanceof DOMText) { - $value .= $child->nodeValue; - } else { - $simple = simplexml_import_dom($child); - $value .= $simple->asXML(); - } - } - return $value; - } else if ($test->length > 0) { - return $test->item(0)->nodeValue; - } - } - return false; - } - - /** - * Checks if this element has a particular child element. - * - * @param String - * @param Integer - * @return bool - **/ - function hasKey($name, $offset = 0) - { - $search = $this->model->getElementsByTagName($name); - return $search->length > $offset; - } - - /** - * Return an XML serialization of the feed, should it be required. Most - * users however, will already have a serialization that they used when - * instantiating the object. - * - * @return string XML serialization of element - */ - function __toString() - { - $simple = simplexml_import_dom($this->model); - return $simple->asXML(); - } - - /** - * Get directory holding RNG schemas. Method is based on that - * found in Contact_AddressBook. - * - * @return string PEAR data directory. - * @access public - * @static - */ - static function getSchemaDir() - { - require_once 'PEAR/Config.php'; - $config = new PEAR_Config; - return $config->get('data_dir') . '/XML_Feed_Parser/schemas'; - } -} - -?> \ No newline at end of file diff --git a/plugins/OStatus/extlib/XML/Feed/samples/atom10-entryonly.xml b/plugins/OStatus/extlib/XML/Feed/samples/atom10-entryonly.xml deleted file mode 100755 index 02e1c58002..0000000000 --- a/plugins/OStatus/extlib/XML/Feed/samples/atom10-entryonly.xml +++ /dev/null @@ -1,28 +0,0 @@ - - - Atom draft-07 snapshot - - - tag:example.org,2003:3.2397 - 2005-07-10T12:29:29Z - 2003-12-13T08:29:29-04:00 - - Mark Pilgrim - http://example.org/ - f8dy@example.com - - - Sam Ruby - - - Joe Gregorio - - -
    -

    [Update: The Atom draft is finished.]

    -
    -
    -
    \ No newline at end of file diff --git a/plugins/OStatus/extlib/XML/Feed/samples/atom10-example1.xml b/plugins/OStatus/extlib/XML/Feed/samples/atom10-example1.xml deleted file mode 100755 index d181d2b6f8..0000000000 --- a/plugins/OStatus/extlib/XML/Feed/samples/atom10-example1.xml +++ /dev/null @@ -1,20 +0,0 @@ - - - - Example Feed - - 2003-12-13T18:30:02Z - - John Doe - - urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6 - - - Atom-Powered Robots Run Amok - - urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a - 2003-12-13T18:30:02Z - Some text. - - - \ No newline at end of file diff --git a/plugins/OStatus/extlib/XML/Feed/samples/atom10-example2.xml b/plugins/OStatus/extlib/XML/Feed/samples/atom10-example2.xml deleted file mode 100755 index 98abf9d54f..0000000000 --- a/plugins/OStatus/extlib/XML/Feed/samples/atom10-example2.xml +++ /dev/null @@ -1,45 +0,0 @@ - - - dive into mark - - A <em>lot</em> of effort - went into making this effortless - - 2005-07-31T12:29:29Z - tag:example.org,2003:3 - - - Copyright (c) 2003, Mark Pilgrim - - Example Toolkit - - - Atom draft-07 snapshot - - - tag:example.org,2003:3.2397 - 2005-07-31T12:29:29Z - 2003-12-13T08:29:29-04:00 - - Mark Pilgrim - http://example.org/ - f8dy@example.com - - - Sam Ruby - - - Joe Gregorio - - -
    -

    [Update: The Atom draft is finished.]

    -
    -
    -
    -
    \ No newline at end of file diff --git a/plugins/OStatus/extlib/XML/Feed/samples/delicious.feed b/plugins/OStatus/extlib/XML/Feed/samples/delicious.feed deleted file mode 100755 index 32f9fa4935..0000000000 --- a/plugins/OStatus/extlib/XML/Feed/samples/delicious.feed +++ /dev/null @@ -1,177 +0,0 @@ - - - - -del.icio.us/tag/greenbelt -http://del.icio.us/tag/greenbelt -Text - - - - - - - - - - - - - - - - -Greenbelt - Homepage Section -http://www.greenbelt.org.uk/ -jonnybaker -2005-05-16T16:30:38Z -greenbelt - - - - - - - - -Greenbelt festival (uk) -http://www.greenbelt.org.uk/ -sssshhhh -2005-05-14T18:19:40Z -audiology festival gigs greenbelt - - - - - - - - - - - -Natuerlichwien.at - Rundumadum -http://www.natuerlichwien.at/rundumadum/dergruenguertel/ -egmilman47 -2005-05-06T21:33:41Z -Austria Vienna Wien greenbelt nature walking - - - - - - - - - - - - - -Tank - GBMediaWiki -http://www.flickerweb.co.uk/wiki/index.php/Tank#Seminars -jystewart -2005-03-21T22:44:11Z -greenbelt - - - - - - - - -Greenbelt homepage -http://www.greenbelt.ca/home.htm -Gooberoo -2005-03-01T22:43:17Z -greenbelt ontario - - - - - - - - - -Pip Wilson bhp ...... blog -http://pipwilsonbhp.blogspot.com/ -sssshhhh -2004-12-27T11:20:51Z -Greenbelt friend ideas links thinking weblog - - - - - - - - - - - - - -maggi dawn -http://maggidawn.typepad.com/maggidawn/ -sssshhhh -2004-12-27T11:20:11Z -Greenbelt ideas links thinking weblog - - - - - - - - - - - - -John Davies -http://www.johndavies.org/ -sssshhhh -2004-12-27T11:18:37Z -Greenbelt ideas links thinking weblog - - - - - - - - - - - - -jonnybaker -http://jonnybaker.blogs.com/ -sssshhhh -2004-12-27T11:18:17Z -Greenbelt event ideas links resources thinking weblog youth - - - - - - - - - - - - - - - diff --git a/plugins/OStatus/extlib/XML/Feed/samples/flickr.feed b/plugins/OStatus/extlib/XML/Feed/samples/flickr.feed deleted file mode 100755 index 57e83af572..0000000000 --- a/plugins/OStatus/extlib/XML/Feed/samples/flickr.feed +++ /dev/null @@ -1,184 +0,0 @@ - - - - jamesstewart - Everyone's Tagged Photos - - - A feed of jamesstewart - Everyone's Tagged Photos - 2005-08-01T18:50:26Z - Flickr - - - Oma and James - - - tag:flickr.com,2004:/photo/30367516 - 2005-08-01T18:50:26Z - 2005-08-01T18:50:26Z - <p><a href="http://www.flickr.com/people/30484029@N00/">kstewart</a> posted a photo:</p> - -<p><a href="http://www.flickr.com/photos/30484029@N00/30367516/" title="Oma and James"><img src="http://photos23.flickr.com/30367516_1f685a16e8_m.jpg" width="240" height="180" alt="Oma and James" style="border: 1px solid #000000;" /></a></p> - -<p>I have a beautiful Oma and a gorgeous husband.</p> - - kstewart - http://www.flickr.com/people/30484029@N00/ - - jamesstewart oma stoelfamily - - - - - tag:flickr.com,2004:/photo/21376174 - 2005-06-25T02:00:35Z - 2005-06-25T02:00:35Z - <p><a href="http://www.flickr.com/people/buddscreek/">Lan Rover</a> posted a photo:</p> - -<p><a href="http://www.flickr.com/photos/buddscreek/21376174/" title=""><img src="http://photos17.flickr.com/21376174_4314fd8d5c_m.jpg" width="240" height="160" alt="" style="border: 1px solid #000000;" /></a></p> - -<p>AMA Motocross Championship 2005, Budds Creek, Maryland</p> - - Lan Rover - http://www.flickr.com/people/buddscreek/ - - amamotocrosschampionship buddscreek maryland 2005 fathersday motocrossnational rickycarmichael 259 jamesstewart 4 - - - - - tag:flickr.com,2004:/photo/21375650 - 2005-06-25T01:56:24Z - 2005-06-25T01:56:24Z - <p><a href="http://www.flickr.com/people/buddscreek/">Lan Rover</a> posted a photo:</p> - -<p><a href="http://www.flickr.com/photos/buddscreek/21375650/" title=""><img src="http://photos16.flickr.com/21375650_5c60e0dab1_m.jpg" width="240" height="160" alt="" style="border: 1px solid #000000;" /></a></p> - - - - Lan Rover - http://www.flickr.com/people/buddscreek/ - - amamotocrosschampionship buddscreek maryland 2005 fathersday motocrossnational 259 jamesstewart - - - - - tag:flickr.com,2004:/photo/21375345 - 2005-06-25T01:54:11Z - 2005-06-25T01:54:11Z - <p><a href="http://www.flickr.com/people/buddscreek/">Lan Rover</a> posted a photo:</p> - -<p><a href="http://www.flickr.com/photos/buddscreek/21375345/" title=""><img src="http://photos15.flickr.com/21375345_4205fdd22b_m.jpg" width="160" height="240" alt="" style="border: 1px solid #000000;" /></a></p> - - - - Lan Rover - http://www.flickr.com/people/buddscreek/ - - amamotocrosschampionship buddscreek maryland 2005 fathersday motocrossnational 259 jamesstewart - - - Lunch with Kari & James, café in the crypt of St Martin in the fields - - tag:flickr.com,2004:/photo/16516618 - 2005-05-30T21:56:39Z - 2005-05-30T21:56:39Z - <p><a href="http://www.flickr.com/people/fidothe/">fidothe</a> posted a photo:</p> - -<p><a href="http://www.flickr.com/photos/fidothe/16516618/" title="Lunch with Kari &amp; James, café in the crypt of St Martin in the fields"><img src="http://photos14.flickr.com/16516618_afaa4a395e_m.jpg" width="240" height="180" alt="Lunch with Kari &amp; James, café in the crypt of St Martin in the fields" style="border: 1px solid #000000;" /></a></p> - - - - fidothe - http://www.flickr.com/people/fidothe/ - - nokia7610 london stmartininthefields clarepatterson jamesstewart parvinstewart jimstewart susanstewart - - - Stewart keeping it low over the obstacle. - - tag:flickr.com,2004:/photo/10224728 - 2005-04-21T07:30:29Z - 2005-04-21T07:30:29Z - <p><a href="http://www.flickr.com/people/pqbon/">pqbon</a> posted a photo:</p> - -<p><a href="http://www.flickr.com/photos/pqbon/10224728/" title="Stewart keeping it low over the obstacle."><img src="http://photos7.flickr.com/10224728_b756341957_m.jpg" width="240" height="180" alt="Stewart keeping it low over the obstacle." style="border: 1px solid #000000;" /></a></p> - - - - pqbon - http://www.flickr.com/people/pqbon/ - - ama hangtown motocross jamesstewart bubba - - - king james stewart - - tag:flickr.com,2004:/photo/7152910 - 2005-03-22T21:53:37Z - 2005-03-22T21:53:37Z - <p><a href="http://www.flickr.com/people/jjlook/">jj look</a> posted a photo:</p> - -<p><a href="http://www.flickr.com/photos/jjlook/7152910/" title="king james stewart"><img src="http://photos7.flickr.com/7152910_a02ab5a750_m.jpg" width="180" height="240" alt="king james stewart" style="border: 1px solid #000000;" /></a></p> - -<p>11th</p> - - jj look - http://www.flickr.com/people/jjlook/ - - dilomar05 eastside austin texas 78702 kingjames stewart jamesstewart borrowed - - - It's a Grind, downtown Grand Rapids (James, Susan, Jim, Harv, Lawson) - - tag:flickr.com,2004:/photo/1586562 - 2004-11-20T09:34:28Z - 2004-11-20T09:34:28Z - <p><a href="http://www.flickr.com/people/fidothe/">fidothe</a> posted a photo:</p> - -<p><a href="http://www.flickr.com/photos/fidothe/1586562/" title="It's a Grind, downtown Grand Rapids (James, Susan, Jim, Harv, Lawson)"><img src="http://photos2.flickr.com/1586562_0bc5313a3e_m.jpg" width="240" height="180" alt="It's a Grind, downtown Grand Rapids (James, Susan, Jim, Harv, Lawson)" style="border: 1px solid #000000;" /></a></p> - - - - fidothe - http://www.flickr.com/people/fidothe/ - - holiday grandrapids jamesstewart - - - It's a Grind, downtown Grand Rapids (James, Susan, Jim, Harv, Lawson) - - tag:flickr.com,2004:/photo/1586539 - 2004-11-20T09:28:16Z - 2004-11-20T09:28:16Z - <p><a href="http://www.flickr.com/people/fidothe/">fidothe</a> posted a photo:</p> - -<p><a href="http://www.flickr.com/photos/fidothe/1586539/" title="It's a Grind, downtown Grand Rapids (James, Susan, Jim, Harv, Lawson)"><img src="http://photos2.flickr.com/1586539_c51e5f2e7a_m.jpg" width="240" height="180" alt="It's a Grind, downtown Grand Rapids (James, Susan, Jim, Harv, Lawson)" style="border: 1px solid #000000;" /></a></p> - - - - fidothe - http://www.flickr.com/people/fidothe/ - - holiday grandrapids jamesstewart - - - It's a Grind, James and Jim can't decide) - - tag:flickr.com,2004:/photo/1586514 - 2004-11-20T09:25:05Z - 2004-11-20T09:25:05Z - <p><a href="http://www.flickr.com/people/fidothe/">fidothe</a> posted a photo:</p> - -<p><a href="http://www.flickr.com/photos/fidothe/1586514/" title="It's a Grind, James and Jim can't decide)"><img src="http://photos2.flickr.com/1586514_733c2dfa3e_m.jpg" width="240" height="180" alt="It's a Grind, James and Jim can't decide)" style="border: 1px solid #000000;" /></a></p> - - - - fidothe - http://www.flickr.com/people/fidothe/ - - holiday grandrapids jamesstewart johnkentish - - - \ No newline at end of file diff --git a/plugins/OStatus/extlib/XML/Feed/samples/grwifi-atom.xml b/plugins/OStatus/extlib/XML/Feed/samples/grwifi-atom.xml deleted file mode 100755 index c351d3c164..0000000000 --- a/plugins/OStatus/extlib/XML/Feed/samples/grwifi-atom.xml +++ /dev/null @@ -1,7 +0,0 @@ - Updates to Grand Rapids WiFi hotspot details 2005-09-01T15:43:01-05:00 WiFi Hotspots in Grand Rapids, MI http://grwifi.net/atom/locations Creative Commons Attribution-NonCommercial-ShareAlike 2.0 http://creativecommons.org/licenses/by-nc-sa/2.0/ Hotspot Details Updated: Sweetwaters http://grwifi.net/location/sweetwaters 2005-09-01T15:43:01-05:00 The details of the WiFi hotspot at: Sweetwaters have been updated. Find out more at: -http://grwifi.net/location/sweetwaters James http://jystewart.net james@jystewart.net wifi hotspot Hotspot Details Updated: Common Ground Coffee Shop http://grwifi.net/location/common-ground 2005-09-01T15:42:39-05:00 The details of the WiFi hotspot at: Common Ground Coffee Shop have been updated. Find out more at: -http://grwifi.net/location/common-ground James http://jystewart.net james@jystewart.net wifi hotspot Hotspot Details Updated: Grand Rapids Public Library, Main Branch http://grwifi.net/location/grpl-main-branch 2005-09-01T15:42:20-05:00 The details of the WiFi hotspot at: Grand Rapids Public Library, Main Branch have been updated. Find out more at: -http://grwifi.net/location/grpl-main-branch James http://jystewart.net james@jystewart.net wifi hotspot Hotspot Details Updated: Four Friends Coffee House http://grwifi.net/location/four-friends 2005-09-01T15:41:35-05:00 The details of the WiFi hotspot at: Four Friends Coffee House have been updated. Find out more at: -http://grwifi.net/location/four-friends James http://jystewart.net james@jystewart.net wifi hotspot Hotspot Details Updated: Barnes and Noble, Rivertown Crossings http://grwifi.net/location/barnes-noble-rivertown 2005-09-01T15:40:41-05:00 The details of the WiFi hotspot at: Barnes and Noble, Rivertown Crossings have been updated. Find out more at: -http://grwifi.net/location/barnes-noble-rivertown James http://jystewart.net james@jystewart.net wifi hotspot Hotspot Details Updated: The Boss Sports Bar & Grille http://grwifi.net/location/boss-sports-bar 2005-09-01T15:40:19-05:00 The details of the WiFi hotspot at: The Boss Sports Bar & Grille have been updated. Find out more at: -http://grwifi.net/location/boss-sports-bar James http://jystewart.net james@jystewart.net wifi hotspot \ No newline at end of file diff --git a/plugins/OStatus/extlib/XML/Feed/samples/hoder.xml b/plugins/OStatus/extlib/XML/Feed/samples/hoder.xml deleted file mode 100755 index 0994635707..0000000000 --- a/plugins/OStatus/extlib/XML/Feed/samples/hoder.xml +++ /dev/null @@ -1,102 +0,0 @@ - - - - -Editor: Myself (Persian) -http://editormyself.info -This is a Persian (Farsi) weblog, written by Hossein Derakhshan (aka, Hoder), an Iranian Multimedia designer and a journalist who lives in Toronto since Dec 2000. He also keeps an English weblog with the same name. -en-us -hoder@hotmail.com -2005-10-12T19:45:32-05:00 - -hourly -1 -2000-01-01T12:00+00:00 - - - -لينکدونی‌ | جلسه‌ی امریکن انترپرایز برای تقسیم قومی ایران -http://www.aei.org/events/type.upcoming,eventID.1166,filter.all/event_detail.asp -چطور بعضی‌ها فکر می‌کنند دست راستی‌های آمریکا از خامنه‌ای ملی‌گراترند -14645@http://i.hoder.com/ -iran -2005-10-12T19:45:32-05:00 - - - -لينکدونی‌ | به صبحانه آگهی بدهید -http://www.adbrite.com/mb/commerce/purchase_form.php?opid=24346&afsid=1 -خیلی ارزان و راحت است -14644@http://i.hoder.com/ -media/journalism -2005-10-12T17:23:15-05:00 - - - -لينکدونی‌ | نیروی انتظامی چگونه تابوهای هم‌جنس‌گرایانه را می‌شکند؛ فرنگوپولیس -http://farangeopolis.blogspot.com/2005/10/blog-post_08.html -از پس و پیش و حاشیه‌ی این ماجرا می‌توان یک مستند بی‌نظیر ساخت -14643@http://i.hoder.com/ -soc_popculture -2005-10-12T17:06:40-05:00 - - - -لينکدونی‌ | بازتاب توقیف شد -http://www.baztab.com/news/30201.php -اگر گفتید یک وب‌سایت را چطور توقیف می‌کنند؟ لابد ماوس‌شان را قایم می‌کنند. -14642@http://i.hoder.com/ -media/journalism -2005-10-12T14:41:57-05:00 - - - -لينکدونی‌ | رشد وب در سال 2005 از همیشه بیشتر بوده است" بی.بی.سی -http://news.bbc.co.uk/2/hi/technology/4325918.stm - -14640@http://i.hoder.com/ -tech -2005-10-12T13:04:46-05:00 - - - - - -==قرعه کشی گرین کارد به زودی شروع می‌شود== -http://nice.newsxphotos.biz/05/09/2007_dv_lottery_registration_to_begin_oct_5_14589.php - -14613@http://vagrantly.com -ads03 -2005-09-27T04:49:22-05:00 - - - - - - - - -پروژه‌ی هاروارد، قدم دوم -http://editormyself.info/archives/2005/10/051012_014641.shtml -اگر یادتان باشد چند وقت پیش نوشتم که دانشگاه هاروارد پروژه‌ای دارد با نام آواهای جهانی که در آن به وبلاگ‌های غیر انگلیسی‌زبان می‌پردازد. خواشتم که اگر کسی علاقه دارد ایمیل بزند. تعداد زیادی جواب دادند و ابراز علاقه کردند. حالا وقت قدم دوم است.

    - -

    قدم دوم این است که برای اینکه مسوولین پروژه بتوانند تصمیم بگیرند که با چه کسی کار کنند، می‌خواهند نمونه‌ی کارهای علاقمندان مشارکت در این پرزو‌ه را ببینند.

    - -

    برای همین از همه‌ی علاقماندان، حتی کسانی که قبلا اعلام آمادگی نکرده بودند، می‌‌خواهم که یک موضوع رایج این روزهای وبلاگستان فارسی را انتخاب کنند و در هفتصد کلمه، به انگلیسی، بنویسند که وبلاگ‌دارهای درباره‌اش چه می‌گویند. لینک به پنج، شش وبلاگ و بازنویسی آنچه آنها از جنبه‌های گوناگون درباره‌ی آن موضوع نوشته‌اند با نقل قول مستقیم از آنها (البته ترجمه شده از فارسی) کافی است. دو سه جمله هم اول کار توضیح دهید که چرا این موضوع مهم است.

    - -

    متن نمونه را به آدرس ایمیل من hoder@hoder.com و نیز برای افراد زیر تا روز دوشنبه بفرستید:
    -ربکا : rmackinnon@cyber.law.harvard.edu
    -هیثم: haitham.sabbah@gmail.com

    ]]>
    -14641@http://editormyself.info -weblog -2005-10-12T14:04:23-05:00 -
    - - - -
    -
    \ No newline at end of file diff --git a/plugins/OStatus/extlib/XML/Feed/samples/illformed_atom10.xml b/plugins/OStatus/extlib/XML/Feed/samples/illformed_atom10.xml deleted file mode 100755 index 612186897d..0000000000 --- a/plugins/OStatus/extlib/XML/Feed/samples/illformed_atom10.xml +++ /dev/null @@ -1,13 +0,0 @@ - - - - - Example author - me@example.com - http://example.com/ - - - - - - -Copyright 1997-1999 UserLand Software, Inc. -Thu, 08 Jul 1999 07:00:00 GMT -Thu, 08 Jul 1999 16:20:26 GMT -http://my.userland.com/stories/storyReader$11 -News and commentary from the cross-platform scripting community. -http://www.scripting.com/ -Scripting News - -http://www.scripting.com/ -Scripting News -http://www.scripting.com/gifs/tinyScriptingNews.gif -40 -78 -What is this used for? - -dave@userland.com (Dave Winer) -dave@userland.com (Dave Winer) -en-us - -6 -7 -8 -9 -10 -11 - - -Sunday - -(PICS-1.1 "http://www.rsac.org/ratingsv01.html" l gen true comment "RSACi North America Server" for "http://www.rsac.org" on "1996.04.16T08:15-0500" r (n 0 s 0 v 0 l 0)) - -stuff -http://bar -This is an article about some stuff - - -Search Now! -Enter your search <terms> -find -http://my.site.com/search.cgi - - - \ No newline at end of file diff --git a/plugins/OStatus/extlib/XML/Feed/samples/rss091-international.xml b/plugins/OStatus/extlib/XML/Feed/samples/rss091-international.xml deleted file mode 100755 index cfe91691f4..0000000000 --- a/plugins/OStatus/extlib/XML/Feed/samples/rss091-international.xml +++ /dev/null @@ -1,30 +0,0 @@ - - - - -膮ŸÛë´é´Ì´×´è´ŒÁ¹´Õ -http://www.mozilla.org -膮ŸÛë´é´Ì´×´è´ŒÁ¹´Õ -ja - -NYÒ™Á¢¸»ÌêÛì15285.25´ƒ´‘Á£´Û´—´ÀÁ¹´ê´Ì´éÒ™Ûì¡êçÒÕ‰ÌêÁ£ -http://www.mozilla.org/status/ -This is an item description... - - -‚§±Çç¡ËßÛÂҏéøÓ¸Á£Ë²®Ÿè†Ûè危ÇÌ’¡Íæ—éøë‡Á£ -http://www.mozilla.org/status/ -This is an item description... - - -ËÜË”ïÌëÈšÁ¢È†Ë§æàÀ豎ˉۂÁ¢Ë‚åܼšÛ˜íËüËÁ£ -http://www.mozilla.org/status/ -This is an item description... - - -2000‚øíŠåÁ¢«‘¦éÛ빏ېçéÛ§ÛÂè†ÒæÓ¸Á£Ì¾«…æ—ÕÝéøƒ¸Á£ -http://www.mozilla.org/status/ -This is an item description... - - - \ No newline at end of file diff --git a/plugins/OStatus/extlib/XML/Feed/samples/rss091-simple.xml b/plugins/OStatus/extlib/XML/Feed/samples/rss091-simple.xml deleted file mode 100755 index f0964a2278..0000000000 --- a/plugins/OStatus/extlib/XML/Feed/samples/rss091-simple.xml +++ /dev/null @@ -1,15 +0,0 @@ - - - - -en -News and commentary from the cross-platform scripting community. -http://www.scripting.com/ -Scripting News - -http://www.scripting.com/ -Scripting News -http://www.scripting.com/gifs/tinyScriptingNews.gif - - - \ No newline at end of file diff --git a/plugins/OStatus/extlib/XML/Feed/samples/rss092-sample.xml b/plugins/OStatus/extlib/XML/Feed/samples/rss092-sample.xml deleted file mode 100755 index 5d75c352bd..0000000000 --- a/plugins/OStatus/extlib/XML/Feed/samples/rss092-sample.xml +++ /dev/null @@ -1,103 +0,0 @@ - - - - - Dave Winer: Grateful Dead - http://www.scripting.com/blog/categories/gratefulDead.html - A high-fidelity Grateful Dead song every day. This is where we're experimenting with enclosures on RSS news items that download when you're not using your computer. If it works (it will) it will be the end of the Click-And-Wait multimedia experience on the Internet. - Fri, 13 Apr 2001 19:23:02 GMT - http://backend.userland.com/rss092 - dave@userland.com (Dave Winer) - dave@userland.com (Dave Winer) - - - It's been a few days since I added a song to the Grateful Dead channel. Now that there are all these new Radio users, many of whom are tuned into this channel (it's #16 on the hotlist of upstreaming Radio users, there's no way of knowing how many non-upstreaming users are subscribing, have to do something about this..). Anyway, tonight's song is a live version of Weather Report Suite from Dick's Picks Volume 7. It's wistful music. Of course a beautiful song, oft-quoted here on Scripting News. <i>A little change, the wind and rain.</i> - - - - - Kevin Drennan started a <a href="http://deadend.editthispage.com/">Grateful Dead Weblog</a>. Hey it's cool, he even has a <a href="http://deadend.editthispage.com/directory/61">directory</a>. <i>A Frontier 7 feature.</i> - Scripting News - - - <a href="http://arts.ucsc.edu/GDead/AGDL/other1.html">The Other One</a>, live instrumental, One From The Vault. Very rhythmic very spacy, you can listen to it many times, and enjoy something new every time. - - - - This is a test of a change I just made. Still diggin.. - - - The HTML rendering almost <a href="http://validator.w3.org/check/referer">validates</a>. Close. Hey I wonder if anyone has ever published a style guide for ALT attributes on images? What are you supposed to say in the ALT attribute? I sure don't know. If you're blind send me an email if u cn rd ths. - - - <a href="http://www.cs.cmu.edu/~mleone/gdead/dead-lyrics/Franklin's_Tower.txt">Franklin's Tower</a>, a live version from One From The Vault. - - - - Moshe Weitzman says Shakedown Street is what I'm lookin for for tonight. I'm listening right now. It's one of my favorites. "Don't tell me this town ain't got no heart." Too bright. I like the jazziness of Weather Report Suite. Dreamy and soft. How about The Other One? "Spanish lady come to me.." - Scripting News - - - <a href="http://www.scripting.com/mp3s/youWinAgain.mp3">The news is out</a>, all over town..<p> -You've been seen, out runnin round. <p> -The lyrics are <a href="http://www.cs.cmu.edu/~mleone/gdead/dead-lyrics/You_Win_Again.txt">here</a>, short and sweet. <p> -<i>You win again!</i> - - - - - <a href="http://www.getlyrics.com/lyrics/grateful-dead/wake-of-the-flood/07.htm">Weather Report Suite</a>: "Winter rain, now tell me why, summers fade, and roses die? The answer came. The wind and rain. Golden hills, now veiled in grey, summer leaves have blown away. Now what remains? The wind and rain." - - - - <a href="http://arts.ucsc.edu/gdead/agdl/darkstar.html">Dark Star</a> crashes, pouring its light into ashes. - - - - DaveNet: <a href="http://davenet.userland.com/2001/01/21/theUsBlues">The U.S. Blues</a>. - - - Still listening to the US Blues. <i>"Wave that flag, wave it wide and high.."</i> Mistake made in the 60s. We gave our country to the assholes. Ah ah. Let's take it back. Hey I'm still a hippie. <i>"You could call this song The United States Blues."</i> - - - <a href="http://www.sixties.com/html/garcia_stack_0.html"><img src="http://www.scripting.com/images/captainTripsSmall.gif" height="51" width="42" border="0" hspace="10" vspace="10" align="right"></a>In celebration of today's inauguration, after hearing all those great patriotic songs, America the Beautiful, even The Star Spangled Banner made my eyes mist up. It made my choice of Grateful Dead song of the night realllly easy. Here are the <a href="http://searchlyrics2.homestead.com/gd_usblues.html">lyrics</a>. Click on the audio icon to the left to give it a listen. "Red and white, blue suede shoes, I'm Uncle Sam, how do you do?" It's a different kind of patriotic music, but man I love my country and I love Jerry and the band. <i>I truly do!</i> - - - - Grateful Dead: "Tennessee, Tennessee, ain't no place I'd rather be." - - - - Ed Cone: "Had a nice Deadhead experience with my wife, who never was one but gets the vibe and knows and likes a lot of the music. Somehow she made it to the age of 40 without ever hearing Wharf Rat. We drove to Jersey and back over Christmas with the live album commonly known as Skull and Roses in the CD player much of the way, and it was cool to see her discover one the band's finest moments. That song is unique and underappreciated. Fun to hear that disc again after a few years off -- you get Jerry as blues-guitar hero on Big Railroad Blues and a nice version of Bertha." - - - - <a href="http://arts.ucsc.edu/GDead/AGDL/fotd.html">Tonight's Song</a>: "If I get home before daylight I just might get some sleep tonight." - - - - <a href="http://arts.ucsc.edu/GDead/AGDL/uncle.html">Tonight's song</a>: "Come hear Uncle John's Band by the river side. Got some things to talk about here beside the rising tide." - - - - <a href="http://www.cs.cmu.edu/~mleone/gdead/dead-lyrics/Me_and_My_Uncle.txt">Me and My Uncle</a>: "I loved my uncle, God rest his soul, taught me good, Lord, taught me all I know. Taught me so well, I grabbed that gold and I left his dead ass there by the side of the road." - - - - - Truckin, like the doo-dah man, once told me gotta play your hand. Sometimes the cards ain't worth a dime, if you don't lay em down. - - - - Two-Way-Web: <a href="http://www.thetwowayweb.com/payloadsForRss">Payloads for RSS</a>. "When I started talking with Adam late last year, he wanted me to think about high quality video on the Internet, and I totally didn't want to hear about it." - - - A touch of gray, kinda suits you anyway.. - - - - <a href="http://www.sixties.com/html/garcia_stack_0.html"><img src="http://www.scripting.com/images/captainTripsSmall.gif" height="51" width="42" border="0" hspace="10" vspace="10" align="right"></a>In celebration of today's inauguration, after hearing all those great patriotic songs, America the Beautiful, even The Star Spangled Banner made my eyes mist up. It made my choice of Grateful Dead song of the night realllly easy. Here are the <a href="http://searchlyrics2.homestead.com/gd_usblues.html">lyrics</a>. Click on the audio icon to the left to give it a listen. "Red and white, blue suede shoes, I'm Uncle Sam, how do you do?" It's a different kind of patriotic music, but man I love my country and I love Jerry and the band. <i>I truly do!</i> - - - - \ No newline at end of file diff --git a/plugins/OStatus/extlib/XML/Feed/samples/rss10-example1.xml b/plugins/OStatus/extlib/XML/Feed/samples/rss10-example1.xml deleted file mode 100755 index 0edecf58e9..0000000000 --- a/plugins/OStatus/extlib/XML/Feed/samples/rss10-example1.xml +++ /dev/null @@ -1,62 +0,0 @@ - - - - - - XML.com - http://xml.com/pub - - XML.com features a rich mix of information and services - for the XML community. - - - - - - - - - - - - - - - - - XML.com - http://www.xml.com - http://xml.com/universal/images/xml_tiny.gif - - - - Processing Inclusions with XSLT - http://xml.com/pub/2000/08/09/xslt/xslt.html - - Processing document inclusions with general XML tools can be - problematic. This article proposes a way of preserving inclusion - information through SAX-based processing. - - - - - Putting RDF to Work - http://xml.com/pub/2000/08/09/rdfdb/index.html - - Tool and API support for the Resource Description Framework - is slowly coming of age. Edd Dumbill takes a look at RDFDB, - one of the most exciting new RDF toolkits. - - - - - Search XML.com - Search XML.com's XML collection - s - http://search.xml.com - - - \ No newline at end of file diff --git a/plugins/OStatus/extlib/XML/Feed/samples/rss10-example2.xml b/plugins/OStatus/extlib/XML/Feed/samples/rss10-example2.xml deleted file mode 100755 index 26235f78ff..0000000000 --- a/plugins/OStatus/extlib/XML/Feed/samples/rss10-example2.xml +++ /dev/null @@ -1,67 +0,0 @@ - - - - - - Meerkat - http://meerkat.oreillynet.com - Meerkat: An Open Wire Service - The O'Reilly Network - Rael Dornfest (mailto:rael@oreilly.com) - Copyright © 2000 O'Reilly & Associates, Inc. - 2000-01-01T12:00+00:00 - hourly - 2 - 2000-01-01T12:00+00:00 - - - - - - - - - - - - - - - Meerkat Powered! - http://meerkat.oreillynet.com/icons/meerkat-powered.jpg - http://meerkat.oreillynet.com - - - - XML: A Disruptive Technology - http://c.moreover.com/click/here.pl?r123 - - XML is placing increasingly heavy loads on the existing technical - infrastructure of the Internet. - - The O'Reilly Network - Simon St.Laurent (mailto:simonstl@simonstl.com) - Copyright © 2000 O'Reilly & Associates, Inc. - XML - XML.com - NASDAQ - XML - - - - Search Meerkat - Search Meerkat's RSS Database... - s - http://meerkat.oreillynet.com/ - search - regex - - - \ No newline at end of file diff --git a/plugins/OStatus/extlib/XML/Feed/samples/rss2sample.xml b/plugins/OStatus/extlib/XML/Feed/samples/rss2sample.xml deleted file mode 100755 index 53483cc518..0000000000 --- a/plugins/OStatus/extlib/XML/Feed/samples/rss2sample.xml +++ /dev/null @@ -1,42 +0,0 @@ - - - - Liftoff News - http://liftoff.msfc.nasa.gov/ - Liftoff to Space Exploration. - en-us - Tue, 10 Jun 2003 04:00:00 GMT - Tue, 10 Jun 2003 09:41:01 GMT - http://blogs.law.harvard.edu/tech/rss - Weblog Editor 2.0 - editor@example.com - webmaster@example.com - - Star City - http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp - How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's <a href="http://howe.iki.rssi.ru/GCTC/gctc_e.htm">Star City</a>. - Tue, 03 Jun 2003 09:39:21 GMT - http://liftoff.msfc.nasa.gov/2003/06/03.html#item573 - - - Sky watchers in Europe, Asia, and parts of Alaska and Canada will experience a <a href="http://science.nasa.gov/headlines/y2003/30may_solareclipse.htm">partial eclipse of the Sun</a> on Saturday, May 31st. - Fri, 30 May 2003 11:06:42 GMT - http://liftoff.msfc.nasa.gov/2003/05/30.html#item572 - - - The Engine That Does More - http://liftoff.msfc.nasa.gov/news/2003/news-VASIMR.asp - Before man travels to Mars, NASA hopes to design new engines that will let us fly through the Solar System more quickly. The proposed VASIMR engine would do that. - Tue, 27 May 2003 08:37:32 GMT - http://liftoff.msfc.nasa.gov/2003/05/27.html#item571 - Test content

    ]]>
    -
    - - Astronauts' Dirty Laundry - http://liftoff.msfc.nasa.gov/news/2003/news-laundry.asp - Compared to earlier spacecraft, the International Space Station has many luxuries, but laundry facilities are not one of them. Instead, astronauts have other options. - Tue, 20 May 2003 08:56:02 GMT - http://liftoff.msfc.nasa.gov/2003/05/20.html#item570 - -
    -
    \ No newline at end of file diff --git a/plugins/OStatus/extlib/XML/Feed/samples/sixapart-jp.xml b/plugins/OStatus/extlib/XML/Feed/samples/sixapart-jp.xml deleted file mode 100755 index f8a04bba5c..0000000000 --- a/plugins/OStatus/extlib/XML/Feed/samples/sixapart-jp.xml +++ /dev/null @@ -1,226 +0,0 @@ - - - -Six Apart - News -http://www.sixapart.jp/ - -ja -Copyright 2005 -Fri, 07 Oct 2005 19:09:34 +0900 -http://www.movabletype.org/?v=3.2-ja -http://blogs.law.harvard.edu/tech/rss - - -ファイブ・ディーが、Movable Typeでブログプロモーションをスタート -MIYAZAWAblog_banner.jpg
    -ファイブ・ディーは、Movable Typeで構築したプロモーション ブログ『宮沢和史 中南米ツアーblog Latin America 2005』を開設しました。

    - -

    9月21日に開設されたこのブログは、ブラジル、ホンジュラス、ニカラグア、メキシコ、キューバの5か国を巡る「Latin America 2005」ツアーに合わせ、そのツアーの模様を同行マネージャーがレポートしていきます。
    -さらに今月2日からは宮沢和史自身が日々録音した声をPodcastingするという点でも、ブログを使ったユニークなプロモーションとなっています。

    - -

    「宮沢和史 中南米ツアーblog Latin America 2005」

    - -

    ※シックス・アパートではこうしたブログを使ったプロモーションに最適な製品をご用意しております。
    -

    ]]>
    -http://www.sixapart.jp/news/2005/10/07-1909.html -http://www.sixapart.jp/news/2005/10/07-1909.html -news -Fri, 07 Oct 2005 19:09:34 +0900 -
    - -Movable Type 3.2日本語版の提供を開始 -Movable Type Logo

    -

    シックス・アパートは、Movable Type 3.2日本語版の提供を開始いたしました。
    -ベータテストにご協力いただいた多くの皆様に、スタッフ一同、心から感謝いたします。

    -

    製品概要など、詳しくはプレスリリースをご参照下さい。

    -

    ご購入のご検討は、Movable Typeのご購入からどうぞ。

    ]]>
    -http://www.sixapart.jp/news/2005/09/29-1530.html -http://www.sixapart.jp/news/2005/09/29-1530.html -news -Thu, 29 Sep 2005 15:30:00 +0900 -
    - -シックス・アパートが、スパム対策強化の「Movable Type 3.2 日本語版」を提供開始 -<プレスリリース資料>

    - -

    シックス・アパートが、スパム対策強化の「Movable Type 3.2 日本語版」を提供開始 ~ スパムの自動判別機能や新ユーザー・インターフェースで、運用管理の機能を強化 ~

    -

    2005年9月29日
    -シックス・アパート株式会社

    -

    ブログ・ソフトウェア大手のシックス・アパート株式会社(本社:東京都港区、代表取締役:関 信浩)は、「Movable Type(ムーバブル・タイプ) 3.2 日本語版」(URL:http://www.sixapart.jp/movabletype/)を9月29日より提供開始いたします。

    ]]>
    -http://www.sixapart.jp/press_releases/2005/09/29-1529.html -http://www.sixapart.jp/press_releases/2005/09/29-1529.html -Press Releases -Thu, 29 Sep 2005 15:29:00 +0900 -
    - -スタッフを募集しています -シックス・アパートはMovable TypeやTypePadの開発エンジニアなど、スタッフを広く募集しています。具体的な募集職種は次の通りです。

    - - - -

    拡大を続ける、日本のブログ市場を積極的にリードする人材を、シックス・アパートは募集しています。上記以外の職種につきましても、お気軽にお問い合わせください。詳しい募集要項や応募方法については、求人情報のページをご覧ください。
    -

    ]]>
    -http://www.sixapart.jp/news/2005/09/27-0906.html -http://www.sixapart.jp/news/2005/09/27-0906.html -news -Tue, 27 Sep 2005 09:06:10 +0900 -
    - -サイト接続不具合に関するお詫びと復旧のお知らせ -9月24日(土)の14:45ごろから、同日18:30ごろまで、シックス・アパート社のウェブサイトが不安定になっており、断続的に接続できない不具合が発生しておりました。このため、この期間中にウェブサイトの閲覧や製品のダウンロードができませんでした。

    - -

    なお現在は不具合は解消しております。みなさまにご迷惑をおかけしたことをお詫びいたします。

    ]]>
    -http://www.sixapart.jp/news/2005/09/26-1000.html -http://www.sixapart.jp/news/2005/09/26-1000.html -news -Mon, 26 Sep 2005 10:00:56 +0900 -
    - -企業ブログ向けパッケージ「TypePad Promotion」を新発売 -シックス・アパートは、ウェブログ・サービスTypePadの企業ブログ向けパッケージ「TypePad Promotion」(タイプパッド・プロモーションの発売を10月下旬から開始いたします。

    - -

    詳しくは、プレスリリースをご参照下さい。

    ]]>
    -http://www.sixapart.jp/news/2005/09/20-1500.html -http://www.sixapart.jp/news/2005/09/20-1500.html -news -Tue, 20 Sep 2005 15:00:01 +0900 -
    - -シックス・アパートが、法人向けブログパッケージ「TypePad Promotion」を発売 -<プレスリリース資料>
    -印刷用(PDF版)

    - -


    -シックス・アパートが、法人向けブログパッケージ「TypePad Promotion」を発売
    -~PR/IRサイトやキャンペーンサイトなど企業のプロモーションニーズに特化~
    -

    -2005年9月20日
    -シックス・アパート株式会社

    - -

    ブログ・サービス大手のシックス・アパート株式会社(本社:東京都港区、代表取締役:関 信浩)は、法人向けプロモーションブログ・パッケージ「TypePad Promotion(タイプパッド・プロモーション)」(URL:http://www.sixapart.jp/typepad/typepad_promotion.html)を10月下旬より販売開始いたします。

    ]]>
    -http://www.sixapart.jp/press_releases/2005/09/20-1500.html -http://www.sixapart.jp/press_releases/2005/09/20-1500.html -Press Releases -Tue, 20 Sep 2005 15:00:00 +0900 -
    - -Six [days] Apart Week -本日、9月16日はSix Apartの創業者ミナ・トロットの誕生日です。
    -私たちの会社は、創業者のトロット夫妻(ベンとミナ)の誕生日が、6日離れていることからSix [days] Apart →Six Apartという風に名付けられています。本日から22日までの6日間を社名の由来となる Six [days] Apart Weekとして、私たちのプロダクトをご紹介させていただきます。

    - -

    今日は、ブログ・サービスのTypePad(タイプパッド)をご紹介します。
    -tp-logo.gif

    - -

    TypePadは、米国PC MAGAZINE誌の2003年EDITOR'S CHOICE とBEST OF THE YEARに選ばれております。
    -pcmag-ad.gif
    -

    ]]>
    -http://www.sixapart.jp/news/2005/09/16-1941.html -http://www.sixapart.jp/news/2005/09/16-1941.html -news -Fri, 16 Sep 2005 19:41:47 +0900 -
    - -ハイパーワークスが商用フォントを利用できるMovable Typeホスティングサービスを開始 -ソフト開発会社の有限会社ハイパーワークスは、商用フォントなど多彩なフォントをブログ上で利用できるブログ・サービス「Glyph-On!(グリフォン) Movable Type ホスティング サービス」の提供を開始しました。
    -

    ]]>
    -http://www.sixapart.jp/news/2005/09/14-1700.html -http://www.sixapart.jp/news/2005/09/14-1700.html -news -Wed, 14 Sep 2005 17:00:00 +0900 -
    - -Movable Type開発エンジニアの募集 - -勤務形態: フルタイム
    -勤務地: 東京 (赤坂)
    -職種: ソフトウェア・エンジニア
    -職務内容: Movable Typeの開発業務全般
    -募集人数: 若干名 -

    ]]>
    -http://www.sixapart.jp/jobs/2005/09/13-0007.html -http://www.sixapart.jp/jobs/2005/09/13-0007.html -Jobs -Tue, 13 Sep 2005 00:07:00 +0900 -
    - -TypePad開発エンジニアの募集 - -勤務形態: フルタイム
    -勤務地: 東京 (赤坂)
    -職種: アプリケーション・エンジニア
    -職務内容: TypePadのカスタマイズ、周辺開発
    -募集人数: 若干名 -

    ]]>
    -http://www.sixapart.jp/jobs/2005/09/13-0004.html -http://www.sixapart.jp/jobs/2005/09/13-0004.html -Jobs -Tue, 13 Sep 2005 00:04:00 +0900 -
    - -カスタマーサポート・ディレクターの募集 -勤務形態: フルタイム
    -勤務地: 東京(赤坂)
    -職種: カスタマーサポート・ディレクター
    -職務内容: TypePadやMovable Typeのカスタマーサポート業務の統括
    -募集人数: 若干名 -

    -]]>
    -http://www.sixapart.jp/jobs/2005/09/13-0003.html -http://www.sixapart.jp/jobs/2005/09/13-0003.html -Jobs -Tue, 13 Sep 2005 00:03:30 +0900 -
    - -アルバイト(マーケティング・広報アシスタント)の募集 -勤務形態: アルバイト
    -勤務地: 東京(港区)
    -職種:マーケティング・PRのアシスタント業務
    -募集人数: 若干名
    -時給:1000円~(但し、試用期間終了後に応相談)。交通費支給
    -時間:平日10時30分~18時30分まで。週3日以上(応相談)
    -

    ]]>
    -http://www.sixapart.jp/jobs/2005/09/13-0002.html -http://www.sixapart.jp/jobs/2005/09/13-0002.html -Jobs -Tue, 13 Sep 2005 00:02:00 +0900 -
    - -アルバイト(開発アシスタント)の募集 -勤務形態: アルバイト
    -勤務地: 東京(港区)
    -職種: アプリケーション開発のアシスタント業務
    -募集人数: 若干名
    -時給:1000円~(但し、試用期間終了後に応相談)。交通費支給
    -時間:平日10時30分~18時30分まで。週3日以上(応相談) -

    ]]>
    -http://www.sixapart.jp/jobs/2005/09/13-0001.html -http://www.sixapart.jp/jobs/2005/09/13-0001.html -Jobs -Tue, 13 Sep 2005 00:01:00 +0900 -
    - -TypePad Japan がバージョンアップしました。 -「TypePad Japan(タイプパッドジャパン)」において、本日、「TypePad 1.6 日本語版」へのバージョンアップを行いました。最新版となる「TypePad 1.6 日本語版」では、ブログデザインの機能強化、ポッドキャスティング対応、モブログ対応に加え、今回新たに大幅な容量アップが行われております。皆様、新しくなったTypePad Japanにどうぞご期待ください。

    - -

    なお、TypePadの携帯対応強化に関しましては、本日よりTypePad Japanのお客様を対象にオープン・ベータを開始しております。

    - -

    2005年9月5日発表のTypePad日本語版 1.6プレスリリースはこちらをご覧下さい。

    ]]>
    -http://www.sixapart.jp/news/2005/09/12-1953.html -http://www.sixapart.jp/news/2005/09/12-1953.html -news -Mon, 12 Sep 2005 19:53:07 +0900 -
    - - -
    -
    \ No newline at end of file diff --git a/plugins/OStatus/extlib/XML/Feed/samples/technorati.feed b/plugins/OStatus/extlib/XML/Feed/samples/technorati.feed deleted file mode 100755 index 6274a32cda..0000000000 --- a/plugins/OStatus/extlib/XML/Feed/samples/technorati.feed +++ /dev/null @@ -1,54 +0,0 @@ - - - - [Technorati] Tag results for greenbelt - http://www.technorati.com/tag/greenbelt - Posts tagged with "greenbelt" on Technorati. - Mon, 08 Aug 2005 15:15:08 GMT - greenbelt - 2 - 2 - - Technorati v1.0 - - http://static.technorati.com/pix/logos/logo_reverse_sm.gif - Technorati logo - http://www.technorati.com - - - 1 - 7 - 9 - - support@technorati.com (Technorati Support) - http://blogs.law.harvad.edu/tech/rss - 60 - - Greenbelt - http://maggidawn.typepad.com/maggidawn/2005/07/greenbelt.html - So if the plan goes according to plan (!)... I'll be speaking at Greenbelt at these times: Slot 1... - http://maggidawn.typepad.com/maggidawn/2005/07/greenbelt.html - Mon, 18 Jul 2005 02:11:42 GMT - James - 2005-07-11 02:08:12 - http://www.technorati.com/cosmos/search.html?url=http%3A%2F%2Fmaggidawn.typepad.com%2Fmaggidawn%2F2005%2F07%2Fgreenbelt.html - 190 - 237 - maggi dawn - - - - Walking along the Greenbelt - http://pictureshomeless.blogspot.com/2005/06/walking-along-greenbelt.html - [IMG] Photo of homeless man walking near the greenbelt in Boise, Idaho Tags: photo homeless greenbelt Boise Idaho picture - http://pictureshomeless.blogspot.com/2005/06/walking-along-greenbelt.html - Tue, 28 Jun 2005 01:41:24 GMT - 2005-06-26 17:24:03 - http://www.technorati.com/cosmos/search.html?url=http%3A%2F%2Fpictureshomeless.blogspot.com%2F2005%2F06%2Fwalking-along-greenbelt.html - 2 - 2 - - - - diff --git a/plugins/OStatus/extlib/XML/Feed/schemas/atom.rnc b/plugins/OStatus/extlib/XML/Feed/schemas/atom.rnc deleted file mode 100755 index e662d2626c..0000000000 --- a/plugins/OStatus/extlib/XML/Feed/schemas/atom.rnc +++ /dev/null @@ -1,338 +0,0 @@ -# -*- rnc -*- -# RELAX NG Compact Syntax Grammar for the -# Atom Format Specification Version 11 - -namespace atom = "http://www.w3.org/2005/Atom" -namespace xhtml = "http://www.w3.org/1999/xhtml" -namespace s = "http://www.ascc.net/xml/schematron" -namespace local = "" - -start = atomFeed | atomEntry - -# Common attributes - -atomCommonAttributes = - attribute xml:base { atomUri }?, - attribute xml:lang { atomLanguageTag }?, - undefinedAttribute* - -# Text Constructs - -atomPlainTextConstruct = - atomCommonAttributes, - attribute type { "text" | "html" }?, - text - -atomXHTMLTextConstruct = - atomCommonAttributes, - attribute type { "xhtml" }, - xhtmlDiv - -atomTextConstruct = atomPlainTextConstruct | atomXHTMLTextConstruct - -# Person Construct - -atomPersonConstruct = - atomCommonAttributes, - (element atom:name { text } - & element atom:uri { atomUri }? - & element atom:email { atomEmailAddress }? - & extensionElement*) - -# Date Construct - -atomDateConstruct = - atomCommonAttributes, - xsd:dateTime - -# atom:feed - -atomFeed = - [ - s:rule [ - context = "atom:feed" - s:assert [ - test = "atom:author or not(atom:entry[not(atom:author)])" - "An atom:feed must have an atom:author unless all " - ~ "of its atom:entry children have an atom:author." - ] - ] - ] - element atom:feed { - atomCommonAttributes, - (atomAuthor* - & atomCategory* - & atomContributor* - & atomGenerator? - & atomIcon? - & atomId - & atomLink* - & atomLogo? - & atomRights? - & atomSubtitle? - & atomTitle - & atomUpdated - & extensionElement*), - atomEntry* - } - -# atom:entry - -atomEntry = - [ - s:rule [ - context = "atom:entry" - s:assert [ - test = "atom:link[@rel='alternate'] " - ~ "or atom:link[not(@rel)] " - ~ "or atom:content" - "An atom:entry must have at least one atom:link element " - ~ "with a rel attribute of 'alternate' " - ~ "or an atom:content." - ] - ] - s:rule [ - context = "atom:entry" - s:assert [ - test = "atom:author or " - ~ "../atom:author or atom:source/atom:author" - "An atom:entry must have an atom:author " - ~ "if its feed does not." - ] - ] - ] - element atom:entry { - atomCommonAttributes, - (atomAuthor* - & atomCategory* - & atomContent? - & atomContributor* - & atomId - & atomLink* - & atomPublished? - & atomRights? - & atomSource? - & atomSummary? - & atomTitle - & atomUpdated - & extensionElement*) - } - -# atom:content - -atomInlineTextContent = - element atom:content { - atomCommonAttributes, - attribute type { "text" | "html" }?, - (text)* - } - -atomInlineXHTMLContent = - element atom:content { - atomCommonAttributes, - attribute type { "xhtml" }, - xhtmlDiv - } - -atomInlineOtherContent = - element atom:content { - atomCommonAttributes, - attribute type { atomMediaType }?, - (text|anyElement)* - } - -atomOutOfLineContent = - element atom:content { - atomCommonAttributes, - attribute type { atomMediaType }?, - attribute src { atomUri }, - empty - } - -atomContent = atomInlineTextContent - | atomInlineXHTMLContent - | atomInlineOtherContent - | atomOutOfLineContent - -# atom:author - -atomAuthor = element atom:author { atomPersonConstruct } - -# atom:category - -atomCategory = - element atom:category { - atomCommonAttributes, - attribute term { text }, - attribute scheme { atomUri }?, - attribute label { text }?, - undefinedContent - } - -# atom:contributor - -atomContributor = element atom:contributor { atomPersonConstruct } - -# atom:generator - -atomGenerator = element atom:generator { - atomCommonAttributes, - attribute uri { atomUri }?, - attribute version { text }?, - text -} - -# atom:icon - -atomIcon = element atom:icon { - atomCommonAttributes, - (atomUri) -} - -# atom:id - -atomId = element atom:id { - atomCommonAttributes, - (atomUri) -} - -# atom:logo - -atomLogo = element atom:logo { - atomCommonAttributes, - (atomUri) -} - -# atom:link - -atomLink = - element atom:link { - atomCommonAttributes, - attribute href { atomUri }, - attribute rel { atomNCName | atomUri }?, - attribute type { atomMediaType }?, - attribute hreflang { atomLanguageTag }?, - attribute title { text }?, - attribute length { text }?, - undefinedContent - } - -# atom:published - -atomPublished = element atom:published { atomDateConstruct } - -# atom:rights - -atomRights = element atom:rights { atomTextConstruct } - -# atom:source - -atomSource = - element atom:source { - atomCommonAttributes, - (atomAuthor* - & atomCategory* - & atomContributor* - & atomGenerator? - & atomIcon? - & atomId? - & atomLink* - & atomLogo? - & atomRights? - & atomSubtitle? - & atomTitle? - & atomUpdated? - & extensionElement*) - } - -# atom:subtitle - -atomSubtitle = element atom:subtitle { atomTextConstruct } - -# atom:summary - -atomSummary = element atom:summary { atomTextConstruct } - -# atom:title - -atomTitle = element atom:title { atomTextConstruct } - -# atom:updated - -atomUpdated = element atom:updated { atomDateConstruct } - -# Low-level simple types - -atomNCName = xsd:string { minLength = "1" pattern = "[^:]*" } - -# Whatever a media type is, it contains at least one slash -atomMediaType = xsd:string { pattern = ".+/.+" } - -# As defined in RFC 3066 -atomLanguageTag = xsd:string { - pattern = "[A-Za-z]{1,8}(-[A-Za-z0-9]{1,8})*" -} - -# Unconstrained; it's not entirely clear how IRI fit into -# xsd:anyURI so let's not try to constrain it here -atomUri = text - -# Whatever an email address is, it contains at least one @ -atomEmailAddress = xsd:string { pattern = ".+@.+" } - -# Simple Extension - -simpleExtensionElement = - element * - atom:* { - text - } - -# Structured Extension - -structuredExtensionElement = - element * - atom:* { - (attribute * { text }+, - (text|anyElement)*) - | (attribute * { text }*, - (text?, anyElement+, (text|anyElement)*)) - } - -# Other Extensibility - -extensionElement = - simpleExtensionElement | structuredExtensionElement - -undefinedAttribute = - attribute * - (xml:base | xml:lang | local:*) { text } - -undefinedContent = (text|anyForeignElement)* - -anyElement = - element * { - (attribute * { text } - | text - | anyElement)* - } - -anyForeignElement = - element * - atom:* { - (attribute * { text } - | text - | anyElement)* - } - -# XHTML - -anyXHTML = element xhtml:* { - (attribute * { text } - | text - | anyXHTML)* -} - -xhtmlDiv = element xhtml:div { - (attribute * { text } - | text - | anyXHTML)* -} - -# EOF \ No newline at end of file diff --git a/plugins/OStatus/extlib/XML/Feed/schemas/rss10.rnc b/plugins/OStatus/extlib/XML/Feed/schemas/rss10.rnc deleted file mode 100755 index 7250947887..0000000000 --- a/plugins/OStatus/extlib/XML/Feed/schemas/rss10.rnc +++ /dev/null @@ -1,113 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/plugins/OStatus/extlib/XML/Feed/schemas/rss11.rnc b/plugins/OStatus/extlib/XML/Feed/schemas/rss11.rnc deleted file mode 100755 index c8633766f6..0000000000 --- a/plugins/OStatus/extlib/XML/Feed/schemas/rss11.rnc +++ /dev/null @@ -1,218 +0,0 @@ - - - - - - - - http://purl.org/net/rss1.1#Channel - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - http://purl.org/net/rss1.1#title - - - - - - - - - - - - - - http://purl.org/net/rss1.1#link - - - - - - - - - - http://purl.org/net/rss1.1#description - - - - - - - - - - - - - http://purl.org/net/rss1.1#image - - - - - - - - - - - - - - - - - - - - - - - - - http://purl.org/net/rss1.1#url - - - - - - - - - - http://purl.org/net/rss1.1#items - - - - - - - - - - - - - - - - - http://purl.org/net/rss1.1#item - - - - - - - - - - - - - - - - - - - - - - - - - - - - http://purl.org/net/rss1.1#Any - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Resource - - - - - Collection - - - - diff --git a/plugins/OStatus/extlib/xml-feed-parser-bug-16416.patch b/plugins/OStatus/extlib/xml-feed-parser-bug-16416.patch deleted file mode 100644 index c53bd97374..0000000000 --- a/plugins/OStatus/extlib/xml-feed-parser-bug-16416.patch +++ /dev/null @@ -1,14 +0,0 @@ -diff --git a/htdocs/lib/pear/XML/Feed/Parser/RSS2.php b/htdocs/lib/pear/XML/Feed/Parser/RSS2.php -index c5d79d1..308a4ab 100644 ---- a/htdocs/lib/pear/XML/Feed/Parser/RSS2.php -+++ b/htdocs/lib/pear/XML/Feed/Parser/RSS2.php -@@ -321,7 +321,8 @@ class XML_Feed_Parser_RSS2 extends XML_Feed_Parser_Type - */ - function getLink($offset, $attribute = 'href', $params = array()) - { -- $links = $this->model->getElementsByTagName('link'); -+ $xPath = new DOMXPath($this->model); -+ $links = $xPath->query('//link'); - - if ($links->length <= $offset) { - return false; diff --git a/plugins/OStatus/tests/FeedMungerTest.php b/plugins/OStatus/tests/FeedMungerTest.php deleted file mode 100644 index 0ce24c9fb8..0000000000 --- a/plugins/OStatus/tests/FeedMungerTest.php +++ /dev/null @@ -1,147 +0,0 @@ -profile(); - - foreach ($expected as $field => $val) { - $this->assertEquals($expected[$field], $profile->$field, "profile->$field"); - } - } - - static public function profileProvider() - { - return array( - array(self::samplefeed(), - array('nickname' => 'leŭksman', // @todo does this need to be asciified? - 'fullname' => 'leŭksman', - 'bio' => 'reticula, electronica, & oddities', - 'homepage' => 'http://leuksman.com/log'))); - } - - /** - * @dataProvider noticeProvider - * - */ - public function testNotices($xml, $entryIndex, $expected) - { - $feed = new XML_Feed_Parser($xml, false, false, true); - $entry = $feed->getEntryByOffset($entryIndex); - - $munger = new FeedMunger($feed); - $notice = $munger->noticeFromEntry($entry); - - $this->assertTrue(mb_strlen($notice) <= Notice::maxContent()); - $this->assertEquals($expected, $notice); - } - - static public function noticeProvider() - { - return array( - array('A fairly short titlehttp://example.com/short/link', 0, - 'New post: "A fairly short title" http://example.com/short/link'), - // Requires URL shortening ... - array('A fairly short titlehttp://example.com/but/a/very/long/link/indeed/this/is/far/too/long/for/mere/humans/to/comprehend/oh/my/gosh', 0, - 'New post: "A fairly short title" http://ur1.ca/g2o1'), - array('A fairly long title in this case, which will have to get cut down at some point alongside its very long link. Really who even makes titles this long? It\'s just ridiculous imo...http://example.com/but/a/very/long/link/indeed/this/is/far/too/long/for/mere/humans/to/comprehend/oh/my/gosh', 0, - 'New post: "A fairly long title in this case, which will have to get cut down at some point alongside its very long li…" http://ur1.ca/g2o1'), - // Some real sample feeds - array(self::samplefeed(), 0, - 'New post: "Compiling PHP on Snow Leopard" http://leuksman.com/log/2009/11/12/compiling-php-on-snow-leopard/'), - array(self::samplefeedBlogspot(), 0, - 'New post: "I love posting" http://briontest.blogspot.com/2009/11/i-love-posting.html'), - array(self::samplefeedBlogspot(), 1, - 'New post: "Hey dude" http://briontest.blogspot.com/2009/11/hey-dude.html'), - ); - } - - static protected function samplefeed() - { - $xml = '<' . '?xml version="1.0" encoding="UTF-8"?' . ">\n"; - $samplefeed = $xml . << - - - leŭksman - - http://leuksman.com/log - reticula, electronica, & oddities - - Thu, 12 Nov 2009 17:44:42 +0000 - http://wordpress.org/?v=2.8.6 - en - hourly - 1 - - - Compiling PHP on Snow Leopard - http://leuksman.com/log/2009/11/12/compiling-php-on-snow-leopard/ - http://leuksman.com/log/2009/11/12/compiling-php-on-snow-leopard/#comments - Thu, 12 Nov 2009 17:44:42 +0000 - brion - - - - - http://leuksman.com/log/?p=649 - - If you’ve been having trouble compiling your own PHP installations on Mac OS X 10.6, here’s the secret to making it not suck! After running the configure script, edit the generated Makefile and make these fixes:

    -
      -
    • Find the EXTRA_LIBS definition and add -lresolv to the end
    • -
    • Find the EXE_EXT definition and remove .dSYM
    • -
    -

    Standard make and make install should work from here…

    -

    For reference, here’s the whole configure line I currently use; MySQL is installed from the downloadable installer; other deps from MacPorts:

    -

    ‘./configure’ ‘–prefix=/opt/php52′ ‘–with-mysql=/usr/local/mysql’ ‘–with-zlib’ ‘–with-bz2′ ‘–enable-mbstring’ ‘–enable-exif’ ‘–enable-fastcgi’ ‘–with-xmlrpc’ ‘–with-xsl’ ‘–with-readline=/opt/local’ –without-iconv –with-gd –with-png-dir=/opt/local –with-jpeg-dir=/opt/local –with-curl –with-gettext=/opt/local –with-mysqli=/usr/local/mysql/bin/mysql_config –with-tidy=/opt/local –enable-pcntl –with-openssl

    -]]>
    - http://leuksman.com/log/2009/11/12/compiling-php-on-snow-leopard/feed/ - 0 -
    -
    - -END; - return $samplefeed; - } - - static protected function samplefeedBlogspot() - { - return <<tag:blogger.com,1999:blog-77800835085316971672009-11-19T12:56:11.233-08:00Brion's Cool Test Blogbrionhttp://www.blogger.com/profile/12932299467049762017noreply@blogger.comBlogger2125tag:blogger.com,1999:blog-7780083508531697167.post-84566718790002906772009-11-19T12:55:00.000-08:002009-11-19T12:56:11.241-08:00I love postingIt's pretty awesome, if you like that sort of thing.<div class="blogger-post-footer"><img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/7780083508531697167-8456671879000290677?l=briontest.blogspot.com' alt='' /></div>brionhttp://www.blogger.com/profile/12932299467049762017noreply@blogger.com0tag:blogger.com,1999:blog-7780083508531697167.post-82022969178973466332009-11-18T13:52:00.001-08:002009-11-18T13:52:48.444-08:00Hey dudetestingggggggggg<div class="blogger-post-footer"><img width='1' height='1' src='https://blogger.googleusercontent.com/tracker/7780083508531697167-8202296917897346633?l=briontest.blogspot.com' alt='' /></div>brionhttp://www.blogger.com/profile/12932299467049762017noreply@blogger.com0 -END; - } -} From 61a072b3c492fd1b336e84655ffb6a28547acba7 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Sat, 20 Feb 2010 13:23:08 -0500 Subject: [PATCH 32/62] Add a library to mint tag URIs We've been making pretty crummy tag: URIs for a while. We should continue to favor HTTP URIs, since it's nice to be able to discover things about an object you've shared the ID of. Where that's not possible, this makes nicer tag URIs. --- lib/default.php | 2 +- lib/taguri.php | 96 ++++++++++++++++++++++++++++++++++++++++++++ tests/TagURITest.php | 36 +++++++++++++++++ 3 files changed, 133 insertions(+), 1 deletion(-) create mode 100644 lib/taguri.php create mode 100644 tests/TagURITest.php diff --git a/lib/default.php b/lib/default.php index 4f3ea00f2a..bb7708bfcd 100644 --- a/lib/default.php +++ b/lib/default.php @@ -175,7 +175,7 @@ $default = array('enabled' => false), 'integration' => array('source' => 'StatusNet', # source attribute for Twitter - 'taguri' => $_server.',2009'), # base for tag URIs + 'taguri' => null), # base for tag URIs 'twitter' => array('enabled' => true, 'consumer_key' => null, diff --git a/lib/taguri.php b/lib/taguri.php new file mode 100644 index 0000000000..d8398eded5 --- /dev/null +++ b/lib/taguri.php @@ -0,0 +1,96 @@ +. + * + * @category URI + * @package StatusNet + * @author Evan Prodromou + * @copyright 2010 StatusNet, Inc. + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html AGPLv3 + * @link http://status.net/ + */ + +if (!defined('STATUSNET')) { + exit(1); +} + +/** + * Mint tag: URIs + * + * tag: URIs are unique identifiers according to http://tools.ietf.org/html/rfc4151. + * + * We use them for creating URIs for things that can't be HTTP retrieved. + * + * @category URI + * @package StatusNet + * @author Evan Prodromou + * @license http://www.fsf.org/licensing/licenses/agpl-3.0.html AGPLv3 + * @link http://status.net/ + */ + +class TagURI +{ + /** + * Return the base part of a tag URI + * + * Note: use mint() instead. + * + * @return string Tag URI base to use + */ + + static function base() + { + $base = common_config('integration', 'taguri'); + + if (empty($base)) { + + $base = common_config('site', 'server').','.date('Y-m-d'); + + $pathPart = trim(common_config('site', 'path'), '/'); + + if (!empty($pathPart)) { + $base .= ':'.str_replace('/', ':', $pathPart); + } + } + + return $base; + } + + /** + * Make a new tag URI + * + * Builds the proper base and creates all the parts + * + * @return string minted URI + */ + + static function mint() + { + $base = self::base(); + + $args = func_get_args(); + + $format = array_shift($args); + + $extra = vsprintf($format, $args); + + return 'tag:'.$base.':'.$extra; + } +} diff --git a/tests/TagURITest.php b/tests/TagURITest.php new file mode 100644 index 0000000000..d23f8bfe66 --- /dev/null +++ b/tests/TagURITest.php @@ -0,0 +1,36 @@ +assertEquals($uri, $minted); + } + + static public function provider() + { + return array(array('favorite:%d:%d', + array(1, 3), + 'tag:example.net,'.date('Y-m-d').':apps:statusnet:favorite:1:3')); + } +} + From f891b135fbbf79bee6f9753e6a9cded65f219ab7 Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Sat, 20 Feb 2010 10:20:42 -0800 Subject: [PATCH 33/62] OStatus: fix regressions in plugin & usersalmon action. Sub/unsub notifications are working again. --- plugins/OStatus/OStatusPlugin.php | 22 --- plugins/OStatus/actions/usersalmon.php | 189 +++++++++++++++++++++++++ 2 files changed, 189 insertions(+), 22 deletions(-) diff --git a/plugins/OStatus/OStatusPlugin.php b/plugins/OStatus/OStatusPlugin.php index 9e6d03177f..e78e658a6c 100644 --- a/plugins/OStatus/OStatusPlugin.php +++ b/plugins/OStatus/OStatusPlugin.php @@ -357,26 +357,4 @@ class OStatusPlugin extends Plugin return true; } - - function onEndUnsubscribe($subscriber, $other) - { - $user = User::staticGet('id', $subscriber->id); - - if (empty($user)) { - return true; - } - - $oprofile = Ostatus_profile::staticGet('profile_id', $other->id); - - if (empty($oprofile)) { - return true; - } - - // We have a local user subscribing to a remote profile; make the - // magic happen! - - $oprofile->notify($subscriber, ActivityVerb::UNFOLLOW); - - return true; - } } diff --git a/plugins/OStatus/actions/usersalmon.php b/plugins/OStatus/actions/usersalmon.php index e69de29bb2..4363488ddc 100644 --- a/plugins/OStatus/actions/usersalmon.php +++ b/plugins/OStatus/actions/usersalmon.php @@ -0,0 +1,189 @@ +. + */ + +/** + * @package OStatusPlugin + * @author James Walker + */ + +if (!defined('STATUSNET')) { + exit(1); +} + +class UsersalmonAction extends SalmonAction +{ + function prepare($args) + { + parent::prepare($args); + + $id = $this->trimmed('id'); + + if (!$id) { + $this->clientError(_('No ID.')); + } + + $this->user = User::staticGet('id', $id); + + if (empty($this->user)) { + $this->clientError(_('No such user.')); + } + + return true; + } + + /** + * We've gotten a post event on the Salmon backchannel, probably a reply. + * + * @todo validate if we need to handle this post, then call into + * ostatus_profile's general incoming-post handling. + */ + function handlePost() + { + switch ($this->act->object->type) { + case ActivityObject::ARTICLE: + case ActivityObject::BLOGENTRY: + case ActivityObject::NOTE: + case ActivityObject::STATUS: + case ActivityObject::COMMENT: + break; + default: + throw new ClientException("Can't handle that kind of post."); + } + + // Notice must either be a) in reply to a notice by this user + // or b) to the attention of this user + + $context = $this->act->context; + + if (!empty($context->replyToID)) { + $notice = Notice::staticGet('uri', $context->replyToID); + if (empty($notice)) { + throw new ClientException("In reply to unknown notice"); + } + if ($notice->profile_id != $this->user->id) { + throw new ClientException("In reply to a notice not by this user"); + } + } else if (!empty($context->attention)) { + if (!in_array($context->attention, $this->user->uri)) { + throw new ClientException("To the attention of user(s) not including this one!"); + } + } else { + throw new ClientException("Not to anyone in reply to anything!"); + } + + $profile = $this->ensureProfile(); + // @fixme do something with the post + } + + /** + * We've gotten a follow/subscribe notification from a remote user. + * Save a subscription relationship for them. + */ + + function handleFollow() + { + $oprofile = $this->ensureProfile(); + if ($oprofile) { + common_log(LOG_INFO, "Setting up subscription from remote {$oprofile->uri} to local {$this->user->nickname}"); + $oprofile->subscribeRemoteToLocal($this->user); + } else { + common_log(LOG_INFO, "Can't set up subscription from remote; missing profile."); + } + } + + /** + * We've gotten an unfollow/unsubscribe notification from a remote user. + * Check if we have a subscription relationship for them and kill it. + * + * @fixme probably catch exceptions on fail? + */ + function handleUnfollow() + { + $oprofile = $this->ensureProfile(); + if ($oprofile) { + common_log(LOG_INFO, "Canceling subscription from remote {$oprofile->uri} to local {$this->user->nickname}"); + Subscription::cancel($oprofile->localProfile(), $this->user->getProfile()); + } else { + common_log(LOG_ERR, "Can't cancel subscription from remote, didn't find the profile"); + } + } + + /** + * Remote user likes one of our posts. + * Confirm the post is ours, and save a local favorite event. + */ + + function handleFavorite() + { + // WORST VARIABLE NAME EVER + $object = $this->act->object; + + switch ($this->act->object->type) { + case ActivityObject::ARTICLE: + case ActivityObject::BLOGENTRY: + case ActivityObject::NOTE: + case ActivityObject::STATUS: + case ActivityObject::COMMENT: + break; + default: + throw new ClientException("Can't handle that kind of object for liking/faving."); + } + + $notice = Notice::staticGet('uri', $object->id); + + if (empty($notice)) { + throw new ClientException("Notice with ID $object->id unknown."); + } + + if ($notice->profile_id != $this->user->id) { + throw new ClientException("Notice with ID $object->id not posted by $this->user->id."); + } + + $profile = $this->ensureProfile(); + + $old = Fave::pkeyGet(array('user_id' => $profile->id, + 'notice_id' => $notice->id)); + + if (!empty($old)) { + throw new ClientException("We already know that's a fave!"); + } + + $fave = new Fave(); + + // @fixme need to change this attribute name, maybe references + $fave->user_id = $profile->id; + $fave->notice_id = $notice->id; + + $result = $fave->insert(); + + if (!$result) { + common_log_db_error($fave, 'INSERT', __FILE__); + throw new ServerException('Could not save new favorite.'); + } + } + + /** + * Remote user doesn't like one of our posts after all! + * Confirm the post is ours, and save a local favorite event. + */ + function handleUnfavorite() + { + } + +} From f3b08461bd476d368d444d48025709fb6a111b7d Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Sat, 20 Feb 2010 13:31:20 -0500 Subject: [PATCH 34/62] Change to use TagURI::base() instead of common_config() I changed the way that tag: URIs are minted, so we now use the right base. Ideally most of these would use HTTP URIs instead, but for now at least they use the right base. --- actions/apidirectmessage.php | 2 +- actions/apigrouplist.php | 2 +- actions/apigrouplistall.php | 2 +- actions/apitimelinefavorites.php | 2 +- actions/apitimelinefriends.php | 2 +- actions/apitimelinegroup.php | 2 +- actions/apitimelinehome.php | 2 +- actions/apitimelinementions.php | 2 +- actions/apitimelinepublic.php | 2 +- actions/apitimelineretweetedtome.php | 2 +- actions/apitimelineretweetsofme.php | 2 +- actions/apitimelinetag.php | 2 +- actions/apitimelineuser.php | 2 +- actions/twitapisearchatom.php | 4 ++-- lib/api.php | 4 ++-- 15 files changed, 17 insertions(+), 17 deletions(-) diff --git a/actions/apidirectmessage.php b/actions/apidirectmessage.php index 5fbc46518b..5355acf825 100644 --- a/actions/apidirectmessage.php +++ b/actions/apidirectmessage.php @@ -79,7 +79,7 @@ class ApiDirectMessageAction extends ApiAuthAction } $server = common_root_url(); - $taguribase = common_config('integration', 'taguri'); + $taguribase = TagURI::base(); if ($this->arg('sent')) { diff --git a/actions/apigrouplist.php b/actions/apigrouplist.php index 66b67a030e..605b382326 100644 --- a/actions/apigrouplist.php +++ b/actions/apigrouplist.php @@ -93,7 +93,7 @@ class ApiGroupListAction extends ApiBareAuthAction $sitename = common_config('site', 'name'); $title = sprintf(_("%s's groups"), $this->user->nickname); - $taguribase = common_config('integration', 'taguri'); + $taguribase = TagURI::base(); $id = "tag:$taguribase:Groups"; $link = common_local_url( 'usergroups', diff --git a/actions/apigrouplistall.php b/actions/apigrouplistall.php index 1921c1f193..d2ef2978aa 100644 --- a/actions/apigrouplistall.php +++ b/actions/apigrouplistall.php @@ -88,7 +88,7 @@ class ApiGroupListAllAction extends ApiPrivateAuthAction $sitename = common_config('site', 'name'); $title = sprintf(_("%s groups"), $sitename); - $taguribase = common_config('integration', 'taguri'); + $taguribase = TagURI::base(); $id = "tag:$taguribase:Groups"; $link = common_local_url('groups'); $subtitle = sprintf(_("groups on %s"), $sitename); diff --git a/actions/apitimelinefavorites.php b/actions/apitimelinefavorites.php index f7f900ddfb..c89d02247a 100644 --- a/actions/apitimelinefavorites.php +++ b/actions/apitimelinefavorites.php @@ -110,7 +110,7 @@ class ApiTimelineFavoritesAction extends ApiBareAuthAction $this->user->nickname ); - $taguribase = common_config('integration', 'taguri'); + $taguribase = TagURI::base(); $id = "tag:$taguribase:Favorites:" . $this->user->id; $subtitle = sprintf( diff --git a/actions/apitimelinefriends.php b/actions/apitimelinefriends.php index 0af04fe4fb..2db76857e3 100644 --- a/actions/apitimelinefriends.php +++ b/actions/apitimelinefriends.php @@ -112,7 +112,7 @@ class ApiTimelineFriendsAction extends ApiBareAuthAction $avatar = $profile->getAvatar(AVATAR_PROFILE_SIZE); $sitename = common_config('site', 'name'); $title = sprintf(_("%s and friends"), $this->user->nickname); - $taguribase = common_config('integration', 'taguri'); + $taguribase = TagURI::base(); $id = "tag:$taguribase:FriendsTimeline:" . $this->user->id; $subtitle = sprintf( diff --git a/actions/apitimelinegroup.php b/actions/apitimelinegroup.php index 3c74e36b56..1d0c4afdd0 100644 --- a/actions/apitimelinegroup.php +++ b/actions/apitimelinegroup.php @@ -107,7 +107,7 @@ class ApiTimelineGroupAction extends ApiPrivateAuthAction $sitename = common_config('site', 'name'); $avatar = $this->group->homepage_logo; $title = sprintf(_("%s timeline"), $this->group->nickname); - $taguribase = common_config('integration', 'taguri'); + $taguribase = TagURI::base(); $id = "tag:$taguribase:GroupTimeline:" . $this->group->id; $subtitle = sprintf( diff --git a/actions/apitimelinehome.php b/actions/apitimelinehome.php index ae41680702..0c72f4020c 100644 --- a/actions/apitimelinehome.php +++ b/actions/apitimelinehome.php @@ -113,7 +113,7 @@ class ApiTimelineHomeAction extends ApiBareAuthAction $avatar = $profile->getAvatar(AVATAR_PROFILE_SIZE); $sitename = common_config('site', 'name'); $title = sprintf(_("%s and friends"), $this->user->nickname); - $taguribase = common_config('integration', 'taguri'); + $taguribase = TagURI::base(); $id = "tag:$taguribase:HomeTimeline:" . $this->user->id; $subtitle = sprintf( diff --git a/actions/apitimelinementions.php b/actions/apitimelinementions.php index d2e31d0bdd..a39c63346a 100644 --- a/actions/apitimelinementions.php +++ b/actions/apitimelinementions.php @@ -117,7 +117,7 @@ class ApiTimelineMentionsAction extends ApiBareAuthAction _('%1$s / Updates mentioning %2$s'), $sitename, $this->user->nickname ); - $taguribase = common_config('integration', 'taguri'); + $taguribase = TagURI::base(); $id = "tag:$taguribase:Mentions:" . $this->user->id; $link = common_local_url( 'replies', diff --git a/actions/apitimelinepublic.php b/actions/apitimelinepublic.php index c1fa72a3ee..1ff0fd2617 100644 --- a/actions/apitimelinepublic.php +++ b/actions/apitimelinepublic.php @@ -109,7 +109,7 @@ class ApiTimelinePublicAction extends ApiPrivateAuthAction $sitename = common_config('site', 'name'); $sitelogo = (common_config('site', 'logo')) ? common_config('site', 'logo') : Theme::path('logo.png'); $title = sprintf(_("%s public timeline"), $sitename); - $taguribase = common_config('integration', 'taguri'); + $taguribase = TagURI::base(); $id = "tag:$taguribase:PublicTimeline"; $link = common_root_url(); $subtitle = sprintf(_("%s updates from everyone!"), $sitename); diff --git a/actions/apitimelineretweetedtome.php b/actions/apitimelineretweetedtome.php index e47bc30b85..73e35c86bf 100644 --- a/actions/apitimelineretweetedtome.php +++ b/actions/apitimelineretweetedtome.php @@ -109,7 +109,7 @@ class ApiTimelineRetweetedToMeAction extends ApiAuthAction $profile = $this->auth_user->getProfile(); $title = sprintf(_("Repeated to %s"), $this->auth_user->nickname); - $taguribase = common_config('integration', 'taguri'); + $taguribase = TagURI::base(); $id = "tag:$taguribase:RepeatedToMe:" . $this->auth_user->id; $link = common_local_url('all', array('nickname' => $this->auth_user->nickname)); diff --git a/actions/apitimelineretweetsofme.php b/actions/apitimelineretweetsofme.php index 26706a75e7..c77912fd0f 100644 --- a/actions/apitimelineretweetsofme.php +++ b/actions/apitimelineretweetsofme.php @@ -112,7 +112,7 @@ class ApiTimelineRetweetsOfMeAction extends ApiAuthAction $profile = $this->auth_user->getProfile(); $title = sprintf(_("Repeats of %s"), $this->auth_user->nickname); - $taguribase = common_config('integration', 'taguri'); + $taguribase = TagURI::base(); $id = "tag:$taguribase:RepeatsOfMe:" . $this->auth_user->id; header('Content-Type: application/atom+xml; charset=utf-8'); diff --git a/actions/apitimelinetag.php b/actions/apitimelinetag.php index 5b6ded4c04..a29061fccf 100644 --- a/actions/apitimelinetag.php +++ b/actions/apitimelinetag.php @@ -105,7 +105,7 @@ class ApiTimelineTagAction extends ApiPrivateAuthAction $this->tag, $sitename ); - $taguribase = common_config('integration', 'taguri'); + $taguribase = TagURI::base(); $id = "tag:$taguribase:TagTimeline:".$tag; switch($this->format) { diff --git a/actions/apitimelineuser.php b/actions/apitimelineuser.php index 9f7ec4c236..3e849cc786 100644 --- a/actions/apitimelineuser.php +++ b/actions/apitimelineuser.php @@ -116,7 +116,7 @@ class ApiTimelineUserAction extends ApiBareAuthAction $sitename = common_config('site', 'name'); $title = sprintf(_("%s timeline"), $this->user->nickname); - $taguribase = common_config('integration', 'taguri'); + $taguribase = TagURI::base(); $id = "tag:$taguribase:UserTimeline:" . $this->user->id; $link = common_local_url( 'showstream', diff --git a/actions/twitapisearchatom.php b/actions/twitapisearchatom.php index baed2a0c7c..e389ddec84 100644 --- a/actions/twitapisearchatom.php +++ b/actions/twitapisearchatom.php @@ -245,7 +245,7 @@ class TwitapisearchatomAction extends ApiAction 'xmlns:twitter' => 'http://api.twitter.com/', 'xml:lang' => 'en-US')); // XXX Other locales ? - $taguribase = common_config('integration', 'taguri'); + $taguribase = TagURI::base(); $this->element('id', null, "tag:$taguribase:search/$server"); $site_uri = common_path(false); @@ -329,7 +329,7 @@ class TwitapisearchatomAction extends ApiAction $this->elementStart('entry'); - $taguribase = common_config('integration', 'taguri'); + $taguribase = TagURI::base(); $this->element('id', null, "tag:$taguribase:$notice->id"); $this->element('published', null, common_date_w3dtf($notice->created)); diff --git a/lib/api.php b/lib/api.php index 22eef7436d..0bcf4cc21a 100644 --- a/lib/api.php +++ b/lib/api.php @@ -358,7 +358,7 @@ class ApiAction extends Action $entry['link'] = common_local_url('shownotice', array('notice' => $notice->id)); $entry['published'] = common_date_iso8601($notice->created); - $taguribase = common_config('integration', 'taguri'); + $taguribase = TagURI::base(); $entry['id'] = "tag:$taguribase:$entry[link]"; $entry['updated'] = $entry['published']; @@ -802,7 +802,7 @@ class ApiAction extends Action $entry['link'] = common_local_url('showmessage', array('message' => $message->id)); $entry['published'] = common_date_iso8601($message->created); - $taguribase = common_config('integration', 'taguri'); + $taguribase = TagURI::base(); $entry['id'] = "tag:$taguribase:$entry[link]"; $entry['updated'] = $entry['published']; From ea9d6f21ecaff6e26ebe30775d1ddcd6d68d0a11 Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Sat, 20 Feb 2010 12:46:48 -0800 Subject: [PATCH 35/62] OStatus subscription page fixups; works but needs lots of UI loving! - ostatussub via subscribe button now works again (changed to take profile instead of feed, patched up to the new discovery) - added a quickie hack to allow putting your remote profile URI in place of webfinger acct through the remote-sub button (needs to be patched up to do proper discovery via XRDS or a link or something) --- plugins/OStatus/actions/ostatusinit.php | 88 ++++++---- plugins/OStatus/actions/ostatussub.php | 218 ++++++++++++++---------- plugins/OStatus/actions/webfinger.php | 2 +- 3 files changed, 186 insertions(+), 122 deletions(-) diff --git a/plugins/OStatus/actions/ostatusinit.php b/plugins/OStatus/actions/ostatusinit.php index d21774420d..4afde2c36e 100644 --- a/plugins/OStatus/actions/ostatusinit.php +++ b/plugins/OStatus/actions/ostatusinit.php @@ -37,7 +37,7 @@ class OStatusInitAction extends Action parent::prepare($args); if (common_logged_in()) { - $this->clientError(_('You can use the local subscription!')); + $this->clientError(_m('You can use the local subscription!')); return false; } @@ -55,7 +55,7 @@ class OStatusInitAction extends Action /* Use a session token for CSRF protection. */ $token = $this->trimmed('token'); if (!$token || $token != common_session_token()) { - $this->showForm(_('There was a problem with your session token. '. + $this->showForm(_m('There was a problem with your session token. '. 'Try again, please.')); return; } @@ -73,7 +73,7 @@ class OStatusInitAction extends Action $this->xw->startDocument('1.0', 'UTF-8'); $this->elementStart('html'); $this->elementStart('head'); - $this->element('title', null, _('Subscribe to user')); + $this->element('title', null, _m('Subscribe to user')); $this->elementEnd('head'); $this->elementStart('body'); $this->showContent(); @@ -91,50 +91,78 @@ class OStatusInitAction extends Action 'class' => 'form_settings', 'action' => common_local_url('ostatusinit'))); $this->elementStart('fieldset'); - $this->element('legend', null, sprintf(_('Subscribe to %s'), $this->nickname)); + $this->element('legend', null, sprintf(_m('Subscribe to %s'), $this->nickname)); $this->hidden('token', common_session_token()); $this->elementStart('ul', 'form_data'); $this->elementStart('li', array('id' => 'ostatus_nickname')); - $this->input('nickname', _('User nickname'), $this->nickname, - _('Nickname of the user you want to follow')); + $this->input('nickname', _m('User nickname'), $this->nickname, + _m('Nickname of the user you want to follow')); $this->elementEnd('li'); $this->elementStart('li', array('id' => 'ostatus_profile')); - $this->input('acct', _('Profile Account'), $this->acct, - _('Your account id (i.e. user@identi.ca)')); + $this->input('acct', _m('Profile Account'), $this->acct, + _m('Your account id (i.e. user@identi.ca)')); $this->elementEnd('li'); $this->elementEnd('ul'); - $this->submit('submit', _('Subscribe')); + $this->submit('submit', _m('Subscribe')); $this->elementEnd('fieldset'); $this->elementEnd('form'); } function ostatusConnect() { - $w = new Webfinger; - - $result = $w->lookup($this->acct); - foreach ($result->links as $link) { - if ($link['rel'] == 'http://ostatus.org/schema/1.0/subscribe') { - // We found a URL - let's redirect! - - $user = User::staticGet('nickname', $this->nickname); - - $feed_url = common_local_url('ApiTimelineUser', - array('id' => $user->id, - 'format' => 'atom')); - $url = $w->applyTemplate($link['template'], $feed_url); - - common_redirect($url, 303); - } - - } - + $opts = array('allowed_schemes' => array('http', 'https', 'acct')); + if (Validate::uri($this->acct, $opts)) { + $bits = parse_url($this->acct); + if ($bits['scheme'] == 'acct') { + $this->connectWebfinger($bits['path']); + } else { + $this->connectProfile($this->acct); + } + } elseif (strpos('@', $this->acct) !== false) { + $this->connectWebfinger($this->acct); + } } - + + function connectWebfinger($acct) + { + $w = new Webfinger; + + $result = $w->lookup($acct); + if (!$result) { + $this->clientError(_m("Couldn't look up OStatus account profile.")); + } + foreach ($result->links as $link) { + if ($link['rel'] == 'http://ostatus.org/schema/1.0/subscribe') { + // We found a URL - let's redirect! + + $user = User::staticGet('nickname', $this->nickname); + $target_profile = common_local_url('userbyid', array('id' => $user->id)); + + $url = $w->applyTemplate($link['template'], $feed_url); + + common_redirect($url, 303); + } + + } + + } + + function connectProfile($subscriber_profile) + { + $user = User::staticGet('nickname', $this->nickname); + $target_profile = common_local_url('userbyid', array('id' => $user->id)); + + // @fixme hack hack! We should look up the remote sub URL from XRDS + $suburl = preg_replace('!^(.*)/(.*?)$!', '$1/main/ostatussub', $subscriber_profile); + $suburl .= '?profile=' . urlencode($target_profile); + + common_redirect($suburl, 303); + } + function title() { - return _('OStatus Connect'); + return _m('OStatus Connect'); } } diff --git a/plugins/OStatus/actions/ostatussub.php b/plugins/OStatus/actions/ostatussub.php index 2391225016..5244031437 100644 --- a/plugins/OStatus/actions/ostatussub.php +++ b/plugins/OStatus/actions/ostatussub.php @@ -1,7 +1,7 @@ + * @maintainer Brion Vibber */ if (!defined('STATUSNET') && !defined('LACONICA')) { exit(1); } class OStatusSubAction extends Action { + protected $profile_uri; + protected $preview; + protected $munger; + + /** + * Title of the page + * + * @return string Title of the page + */ - protected $feedurl; - function title() { - return _m("OStatus Subscribe"); + return _m('Authorize subscription'); } - function handle($args) + /** + * Instructions for use + * + * @return instructions for use + */ + + function getInstructions() { - if ($this->validateFeed()) { - $this->showForm(); - } - - return true; - + return _m('You can subscribe to users from other supported sites. Paste their address or profile URI below:'); } - function showForm($err = null) + function showForm($error=null) { - $this->err = $err; + $this->error = $error; $this->showPage(); } + /** + * Content area of the page + * + * Shows a form for associating a remote OStatus account with this + * StatusNet account. + * + * @return void + */ + function showContent() { + // @fixme is this right place? + if ($this->error) { + $this->text($this->error); + } + $user = common_current_user(); $profile = $user->getProfile(); - $fuser = null; - - $flink = Foreign_link::getByUserID($user->id, FEEDSUB_SERVICE); - - if (!empty($flink)) { - $fuser = $flink->getForeignUser(); - } - $this->elementStart('form', array('method' => 'post', - 'id' => 'form_settings_feedsub', + 'id' => 'ostatus_sub', 'class' => 'form_settings', 'action' => - common_local_url('feedsubsettings'))); + common_local_url('ostatussub'))); $this->hidden('token', common_session_token()); @@ -77,17 +91,47 @@ class OStatusSubAction extends Action $this->elementStart('ul', 'form_data'); $this->elementStart('li'); - $this->input('feedurl', _('Feed URL'), $this->feedurl, _('Enter the URL of a PubSubHubbub-enabled feed')); + $this->input('profile', + _m('Address or profile URL'), + $this->profile_uri, + _m('Enter the profile URL of a PubSubHubbub-enabled feed')); $this->elementEnd('li'); $this->elementEnd('ul'); - $this->submit('subscribe', _m('Subscribe')); + if ($this->preview) { + $this->submit('subscribe', _m('Subscribe')); + } else { + $this->submit('validate', _m('Continue')); + } $this->elementEnd('fieldset'); $this->elementEnd('form'); - $this->previewFeed(); + if ($this->preview) { + $this->previewFeed(); + } + } + + function prepare($args) + { + parent::prepare($args); + $this->profile_uri = $this->arg('profile'); + return true; + } + + function handle($args) + { + parent::handle($args); + if ($_SERVER['REQUEST_METHOD'] == 'POST') { + $this->handlePost(); + } else { + if ($this->profile_uri) { + $this->validateAndPreview(); + } else { + $this->showPage(); + } + } } /** @@ -111,14 +155,15 @@ class OStatusSubAction extends Action return; } - if ($this->arg('subscribe')) { + if ($this->arg('validate')) { + $this->validateAndPreview(); + } else if ($this->arg('subscribe')) { $this->saveFeed(); } else { $this->showForm(_('Unexpected form submission.')); } } - /** * Set up and add a feed * @@ -127,100 +172,91 @@ class OStatusSubAction extends Action */ function validateFeed() { - $feedurl = $this->trimmed('feed'); + $profile_uri = trim($this->arg('profile')); - if ($feedurl == '') { - $this->showForm(_m('Empty feed URL!')); + if ($profile_uri == '') { + $this->showForm(_m('Empty remote profile URL!')); return; } - $this->feedurl = $feedurl; + $this->profile_uri = $profile_uri; - // Get the canonical feed URI and check it + // @fixme validate, normalize bla bla try { - $discover = new FeedDiscovery(); - $uri = $discover->discoverFromURL($feedurl); + $oprofile = Ostatus_profile::ensureProfile($this->profile_uri); + $this->oprofile = $oprofile; + return true; } catch (FeedSubBadURLException $e) { - $this->showForm(_m('Invalid URL or could not reach server.')); - return false; + $err = _m('Invalid URL or could not reach server.'); } catch (FeedSubBadResponseException $e) { - $this->showForm(_m('Cannot read feed; server returned error.')); - return false; + $err = _m('Cannot read feed; server returned error.'); } catch (FeedSubEmptyException $e) { - $this->showForm(_m('Cannot read feed; server returned an empty page.')); - return false; + $err = _m('Cannot read feed; server returned an empty page.'); } catch (FeedSubBadHTMLException $e) { - $this->showForm(_m('Bad HTML, could not find feed link.')); - return false; + $err = _m('Bad HTML, could not find feed link.'); } catch (FeedSubNoFeedException $e) { - $this->showForm(_m('Could not find a feed linked from this URL.')); - return false; + $err = _m('Could not find a feed linked from this URL.'); } catch (FeedSubUnrecognizedTypeException $e) { - $this->showForm(_m('Not a recognized feed type.')); - return false; + $err = _m('Not a recognized feed type.'); } catch (FeedSubException $e) { // Any new ones we forgot about - $this->showForm(_m('Bad feed URL.')); - return false; + $err = sprintf(_m('Bad feed URL: %s %s'), get_class($e), $e->getMessage()); } - - $this->munger = $discover->feedMunger(); - $this->profile = $this->munger->ostatusProfile(); - if ($this->profile->huburi == '') { - $this->showForm(_m('Feed is not PuSH-enabled; cannot subscribe.')); - return false; - } - - return true; + $this->showForm($err); + return false; } function saveFeed() { if ($this->validateFeed()) { $this->preview = true; - $this->profile = Ostatus_profile::ensureProfile($this->munger); - // If not already in use, subscribe to updates via the hub - if ($this->profile->sub_start) { - common_log(LOG_INFO, __METHOD__ . ": double the fun! new sub for {$this->profile->feeduri} last subbed {$this->profile->sub_start}"); - } else { - $ok = $this->profile->subscribe(); - common_log(LOG_INFO, __METHOD__ . ": sub was $ok"); - if (!$ok) { - $this->showForm(_m('Feed subscription failed! Bad response from hub.')); - return; - } - } - // And subscribe the current user to the local profile $user = common_current_user(); - $profile = $this->profile->getProfile(); - - if ($user->isSubscribed($profile)) { - $this->showForm(_m('Already subscribed!')); - } elseif ($user->subscribeTo($profile)) { - $this->showForm(_m('Feed subscribed!')); + + if (!$this->oprofile->subscribe()) { + $this->showForm(_m("Failed to set up server-to-server subscription.")); + return; + } + + if ($this->oprofile->isGroup()) { + $group = $this->oprofile->localGroup(); + if ($user->isMember($group)) { + $this->showForm(_m('Already a member!')); + } elseif (Group_member::join($this->profile->group_id, $user->id)) { + $this->showForm(_m('Joined remote group!')); + } else { + $this->showForm(_m('Remote group join failed!')); + } } else { - $this->showForm(_m('Feed subscription failed!')); + $local = $this->oprofile->localProfile(); + if ($user->isSubscribed($local)) { + $this->showForm(_m('Already subscribed!')); + } elseif ($this->oprofile->subscribeLocalToRemote($user)) { + $this->showForm(_m('Remote user subscribed!')); + } else { + $this->showForm(_m('Remote subscription failed!')); + } } } } - - function previewFeed() + function validateAndPreview() { - $profile = $this->munger->ostatusProfile(); - $notice = $this->munger->notice(0, true); // preview - - if ($notice) { - $this->element('b', null, 'Preview of latest post from this feed:'); - - $item = new NoticeList($notice, $this); - $item->show(); - } else { - $this->element('b', null, 'No posts in this feed yet.'); + if ($this->validateFeed()) { + $this->preview = true; + $this->showForm(_m('Previewing feed:')); } } + function previewFeed() + { + $this->text('Profile preview should go here'); + } + function showScripts() + { + parent::showScripts(); + $this->autofocus('feedurl'); + } } diff --git a/plugins/OStatus/actions/webfinger.php b/plugins/OStatus/actions/webfinger.php index 75ba16638b..f4dc61b7d1 100644 --- a/plugins/OStatus/actions/webfinger.php +++ b/plugins/OStatus/actions/webfinger.php @@ -66,7 +66,7 @@ class WebfingerAction extends Action 'href' => $salmon_url); // TODO - finalize where the redirect should go on the publisher - $url = common_local_url('ostatussub') . '?feed={uri}'; + $url = common_local_url('ostatussub') . '?profile={uri}'; $xrd->links[] = array('rel' => 'http://ostatus.org/schema/1.0/subscribe', 'template' => $url ); From 9c2fe8492f7dae183e0369f8d43f124fd80e4433 Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Sat, 20 Feb 2010 15:56:36 -0800 Subject: [PATCH 36/62] OStatus: send favorite/unfavorite notifications to remote authors --- classes/Notice.php | 32 ++++++++++++++++++++++++++++ plugins/OStatus/OStatusPlugin.php | 35 +++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+) diff --git a/classes/Notice.php b/classes/Notice.php index a52cfed70c..8b8f90474d 100644 --- a/classes/Notice.php +++ b/classes/Notice.php @@ -1104,6 +1104,38 @@ class Notice extends Memcached_DataObject return $xs->getString(); } + /** + * Returns an XML string fragment with a reference to a notice as an + * Activity Streams noun object with the given element type. + * + * Assumes that 'activity' namespace has been previously defined. + * + * @param string $element one of 'subject', 'object', 'target' + * @return string + */ + function asActivityNoun($element) + { + $xs = new XMLStringer(true); + + $xs->elementStart('activity:' . $element); + $xs->element('activity:object-type', + null, + 'http://activitystrea.ms/schema/1.0/note'); + $xs->element('id', + null, + $this->uri); + $xs->element('content', + array('type' => 'text/html'), + $this->rendered); + $xs->element('link', + array('type' => 'text/html', + 'rel' => 'permalink', + 'href' => $this->bestUrl())); + $xs->elementEnd('activity:' . $element); + + return $xs->getString(); + } + function bestUrl() { if (!empty($this->url)) { diff --git a/plugins/OStatus/OStatusPlugin.php b/plugins/OStatus/OStatusPlugin.php index e78e658a6c..4cbf78e638 100644 --- a/plugins/OStatus/OStatusPlugin.php +++ b/plugins/OStatus/OStatusPlugin.php @@ -357,4 +357,39 @@ class OStatusPlugin extends Plugin return true; } + + /** + * Notify remote users when their notices get favorited. + * + * @param Profile or User $profile of local user doing the faving + * @param Notice $notice being favored + * @return hook return value + */ + function onEndFavorNotice($profile, Notice $notice) + { + if ($profile instanceof User) { + // @fixme upstream function should clarify its parameters + $profile = $profile->getProfile(); + } + $oprofile = Ostatus_profile::staticGet('profile_id', $notice->profile_id); + if ($oprofile) { + $oprofile->notify($profile, ActivityVerb::FAVORITE, $notice); + } + } + + /** + * Notify remote users when their notices get de-favorited. + * + * @param Profile or User $profile of local user doing the de-faving + * @param Notice $notice being favored + * @return hook return value + */ + function onEndDisfavorNotice(Profile $profile, Notice $notice) + { + $oprofile = Ostatus_profile::staticGet('profile_id', $notice->profile_id); + if ($oprofile) { + $oprofile->notify($profile, ActivityVerb::UNFAVORITE, $notice); + } + } + } From 145a19954f6f993714cb8b65aaf9d54996503664 Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Sat, 20 Feb 2010 16:45:30 -0800 Subject: [PATCH 37/62] OStatus: Salmon favorite & unfavorite events now handled --- plugins/OStatus/actions/usersalmon.php | 77 +++++++++++++++----------- plugins/OStatus/lib/salmonaction.php | 3 + 2 files changed, 48 insertions(+), 32 deletions(-) diff --git a/plugins/OStatus/actions/usersalmon.php b/plugins/OStatus/actions/usersalmon.php index 4363488ddc..20c6c2942a 100644 --- a/plugins/OStatus/actions/usersalmon.php +++ b/plugins/OStatus/actions/usersalmon.php @@ -131,10 +131,51 @@ class UsersalmonAction extends SalmonAction function handleFavorite() { - // WORST VARIABLE NAME EVER - $object = $this->act->object; + $notice = $this->getNotice($this->act->object); + $profile = $this->ensureProfile()->localProfile(); - switch ($this->act->object->type) { + $old = Fave::pkeyGet(array('user_id' => $profile->id, + 'notice_id' => $notice->id)); + + if (!empty($old)) { + throw new ClientException("We already know that's a fave!"); + } + + if (!Fave::addNew($profile, $notice)) { + throw new ClientException("Could not save new favorite."); + } + } + + /** + * Remote user doesn't like one of our posts after all! + * Confirm the post is ours, and save a local favorite event. + */ + function handleUnfavorite() + { + $notice = $this->getNotice($this->act->object); + $profile = $this->ensureProfile()->localProfile(); + + $fave = Fave::pkeyGet(array('user_id' => $profile->id, + 'notice_id' => $notice->id)); + if (empty($fave)) { + throw new ClientException("Notice wasn't favorited!"); + } + + $fave->delete(); + } + + /** + * @param ActivityObject $object + * @return Notice + * @throws ClientException on invalid input + */ + function getNotice($object) + { + if (!$object) { + throw new ClientException("Can't favorite/unfavorite without an object."); + } + + switch ($object->type) { case ActivityObject::ARTICLE: case ActivityObject::BLOGENTRY: case ActivityObject::NOTE: @@ -155,35 +196,7 @@ class UsersalmonAction extends SalmonAction throw new ClientException("Notice with ID $object->id not posted by $this->user->id."); } - $profile = $this->ensureProfile(); - - $old = Fave::pkeyGet(array('user_id' => $profile->id, - 'notice_id' => $notice->id)); - - if (!empty($old)) { - throw new ClientException("We already know that's a fave!"); - } - - $fave = new Fave(); - - // @fixme need to change this attribute name, maybe references - $fave->user_id = $profile->id; - $fave->notice_id = $notice->id; - - $result = $fave->insert(); - - if (!$result) { - common_log_db_error($fave, 'INSERT', __FILE__); - throw new ServerException('Could not save new favorite.'); - } - } - - /** - * Remote user doesn't like one of our posts after all! - * Confirm the post is ours, and save a local favorite event. - */ - function handleUnfavorite() - { + return $notice; } } diff --git a/plugins/OStatus/lib/salmonaction.php b/plugins/OStatus/lib/salmonaction.php index 7085e4583e..c838905077 100644 --- a/plugins/OStatus/lib/salmonaction.php +++ b/plugins/OStatus/lib/salmonaction.php @@ -81,6 +81,9 @@ class SalmonAction extends Action case ActivityVerb::FAVORITE: $this->handleFavorite(); break; + case ActivityVerb::UNFAVORITE: + $this->handleUnfavorite(); + break; case ActivityVerb::FOLLOW: case ActivityVerb::FRIEND: $this->handleFollow(); From 96c6019638f6407b38fbc5a45485a3797af47adb Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Sat, 20 Feb 2010 19:58:20 -0500 Subject: [PATCH 38/62] Add support for favor and disfavor notification Added support for favoring and disfavoring in OStatusPlugin. Needed to represent the Notice as an activity:object, so added some code for that in lib/activity.php. Also, made some small changes to OStatusPlugin so it handled having a non-default argument $object correctly. --- plugins/OStatus/OStatusPlugin.php | 50 ++++++++++++++++--- plugins/OStatus/classes/Ostatus_profile.php | 6 +-- plugins/OStatus/lib/activity.php | 55 ++++++++++++++++++++- 3 files changed, 98 insertions(+), 13 deletions(-) diff --git a/plugins/OStatus/OStatusPlugin.php b/plugins/OStatus/OStatusPlugin.php index 9e6d03177f..98e048c14c 100644 --- a/plugins/OStatus/OStatusPlugin.php +++ b/plugins/OStatus/OStatusPlugin.php @@ -255,7 +255,7 @@ class OStatusPlugin extends Plugin { if ($user instanceof Profile) { $profile = $user; - } else if ($user instanceof Profile) { + } else if ($user instanceof User) { $profile = $user->getProfile(); } $oprofile = Ostatus_profile::staticGet('profile_id', $other->id); @@ -353,30 +353,64 @@ class OStatusPlugin extends Plugin // We have a local user subscribing to a remote profile; make the // magic happen! - $oprofile->notify($subscriber, ActivityVerb::FOLLOW); + $oprofile->notify($subscriber, ActivityVerb::FOLLOW, $oprofile); return true; } - function onEndUnsubscribe($subscriber, $other) + function onEndFavor($profile, $notice) { - $user = User::staticGet('id', $subscriber->id); + // is the favorer a local user? + + $user = User::staticGet('id', $profile->id); if (empty($user)) { return true; } - $oprofile = Ostatus_profile::staticGet('profile_id', $other->id); + // is the author an OStatus remote user? + + $oprofile = Ostatus_profile::staticGet('profile_id', $notice->profile_id); if (empty($oprofile)) { return true; } - // We have a local user subscribing to a remote profile; make the - // magic happen! + // Local user faved an Ostatus profile's notice; notify them! - $oprofile->notify($subscriber, ActivityVerb::UNFOLLOW); + $obj = ActivityObject::fromNotice($notice); + $oprofile->notify($profile, + ActivityVerb::FAVORITE, + $obj->asString()); + return true; + } + + function onEndDisfavor($profile, $notice) + { + // is the favorer a local user? + + $user = User::staticGet('id', $profile->id); + + if (empty($user)) { + return true; + } + + // is the author an OStatus remote user? + + $oprofile = Ostatus_profile::staticGet('profile_id', $notice->profile_id); + + if (empty($oprofile)) { + return true; + } + + // Local user faved an Ostatus profile's notice; notify them! + + $obj = ActivityObject::fromNotice($notice); + + $oprofile->notify($profile, + ActivityVerb::UNFAVORITE, + $obj->asString()); return true; } } diff --git a/plugins/OStatus/classes/Ostatus_profile.php b/plugins/OStatus/classes/Ostatus_profile.php index 4dd5652886..5bd899bc46 100644 --- a/plugins/OStatus/classes/Ostatus_profile.php +++ b/plugins/OStatus/classes/Ostatus_profile.php @@ -307,7 +307,7 @@ class Ostatus_profile extends Memcached_DataObject * * @param Profile $actor * @param $verb eg Activity::SUBSCRIBE or Activity::JOIN - * @param $object object of the action; if null, the remote entity itself is assumed + * @param string $object object of the action; if null, the remote entity itself is assumed */ public function notify($actor, $verb, $object=null) { @@ -319,7 +319,7 @@ class Ostatus_profile extends Memcached_DataObject throw new ServerException("Invalid actor passed to " . __METHOD__ . ": " . $type); } if ($object == null) { - $object = $this; + $object = $this->asActivityNoun('object'); } if ($this->salmonuri) { $text = 'update'; // @fixme @@ -345,7 +345,7 @@ class Ostatus_profile extends Memcached_DataObject $entry->element('activity:verb', null, $verb); $entry->raw($actor->asAtomAuthor()); $entry->raw($actor->asActivityActor()); - $entry->raw($object->asActivityNoun('object')); + $entry->raw($object); $entry->elementEnd('entry'); $xml = $entry->getString(); diff --git a/plugins/OStatus/lib/activity.php b/plugins/OStatus/lib/activity.php index 5bc8f78e52..6d15f85b0d 100644 --- a/plugins/OStatus/lib/activity.php +++ b/plugins/OStatus/lib/activity.php @@ -199,7 +199,7 @@ class ActivityObject public $link; public $source; - /** + /** * Constructor * * This probably needs to be refactored @@ -209,8 +209,12 @@ class ActivityObject * @param DOMElement $element DOM thing to turn into an Activity thing */ - function __construct($element) + function __construct($element = null) { + if (empty($element)) { + return; + } + $this->element = $element; if ($element->tagName == 'author') { @@ -279,6 +283,53 @@ class ActivityObject } } } + + static fromNotice($notice) + { + $object = new ActivityObject(); + + $object->type = ActivityObject::NOTE; + + $object->id = $notice->uri; + $object->title = $notice->content; + $object->content = $notice->rendered; + $object->link = $notice->bestUrl(); + + return $object; + } + + function asString($tag='activity:object') + { + $xs = new XMLStringer(true); + + $xs->elementStart($tag); + + $xs->element('activity:object-type', null, $this->type); + + $xs->element(self::ID, null, $this->id); + + if (!empty($this->title)) { + $xs->element(self::TITLE, null, $this->title); + } + + if (!empty($this->summary)) { + $xs->element(self::SUMMARY, null, $this->summary); + } + + if (!empty($this->content)) { + // XXX: assuming HTML content here + $xs->element(self::CONTENT, array('type' => 'html'), $this->content); + } + + if (!empty($this->link)) { + $xs->element('link', array('rel' => 'alternate', 'type' => 'text/html'), + $this->content); + } + + $xs->elementEnd($tag); + + return $xs->getString(); + } } /** From a3de4caf497b5d4a4d34477279fefe4c4d7ad8e2 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Sat, 20 Feb 2010 20:31:06 -0500 Subject: [PATCH 39/62] make sure argument to Fave::addNew() is a profile --- actions/favor.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/actions/favor.php b/actions/favor.php index 2aeb1da613..afca9768ae 100644 --- a/actions/favor.php +++ b/actions/favor.php @@ -79,7 +79,7 @@ class FavorAction extends Action $this->clientError(_('This notice is already a favorite!')); return; } - $fave = Fave::addNew($user, $notice); + $fave = Fave::addNew($user->getProfile(), $notice); if (!$fave) { $this->serverError(_('Could not create favorite.')); return; From 0c62c686756de0752cdd44b63c10cf6e4047860f Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Sat, 20 Feb 2010 20:34:29 -0500 Subject: [PATCH 40/62] do some double-checks on favor and disfavor handlers in OStatusPlugin --- plugins/OStatus/OStatusPlugin.php | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/plugins/OStatus/OStatusPlugin.php b/plugins/OStatus/OStatusPlugin.php index 5081c4d983..29799b0dc1 100644 --- a/plugins/OStatus/OStatusPlugin.php +++ b/plugins/OStatus/OStatusPlugin.php @@ -365,16 +365,21 @@ class OStatusPlugin extends Plugin * @param Notice $notice being favored * @return hook return value */ - function onEndFavorNotice($profile, Notice $notice) + function onEndFavorNotice(Profile $profile, Notice $notice) { - if ($profile instanceof User) { - // @fixme upstream function should clarify its parameters - $profile = $profile->getProfile(); + $user = User::staticGet('id', $profile->id); + + if (empty($user)) { + return true; } + $oprofile = Ostatus_profile::staticGet('profile_id', $notice->profile_id); + if ($oprofile) { $oprofile->notify($profile, ActivityVerb::FAVORITE, $notice); } + + return true; } /** @@ -386,10 +391,18 @@ class OStatusPlugin extends Plugin */ function onEndDisfavorNotice(Profile $profile, Notice $notice) { + $user = User::staticGet('id', $profile->id); + + if (empty($user)) { + return true; + } + $oprofile = Ostatus_profile::staticGet('profile_id', $notice->profile_id); + if ($oprofile) { $oprofile->notify($profile, ActivityVerb::UNFAVORITE, $notice); } - } + return true; + } } From 6f1ccfc5770776c39bfe28543629e63a566ab541 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Sat, 20 Feb 2010 20:36:54 -0500 Subject: [PATCH 41/62] Subscription::start() should be enough, right? --- plugins/OStatus/actions/usersalmon.php | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/plugins/OStatus/actions/usersalmon.php b/plugins/OStatus/actions/usersalmon.php index 20c6c2942a..020ca597cd 100644 --- a/plugins/OStatus/actions/usersalmon.php +++ b/plugins/OStatus/actions/usersalmon.php @@ -101,7 +101,8 @@ class UsersalmonAction extends SalmonAction $oprofile = $this->ensureProfile(); if ($oprofile) { common_log(LOG_INFO, "Setting up subscription from remote {$oprofile->uri} to local {$this->user->nickname}"); - $oprofile->subscribeRemoteToLocal($this->user); + Subscription::start($oprofile->localProfile(), + $this->user->getProfile()); } else { common_log(LOG_INFO, "Can't set up subscription from remote; missing profile."); } From 5df61141d28426329d2f4e50e61c131d43d1e405 Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Sat, 20 Feb 2010 18:25:40 -0800 Subject: [PATCH 42/62] OStatus: fix regressions from merge --- plugins/OStatus/classes/Ostatus_profile.php | 4 ++-- plugins/OStatus/lib/activity.php | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/plugins/OStatus/classes/Ostatus_profile.php b/plugins/OStatus/classes/Ostatus_profile.php index 5bd899bc46..d9cb7a6e1b 100644 --- a/plugins/OStatus/classes/Ostatus_profile.php +++ b/plugins/OStatus/classes/Ostatus_profile.php @@ -319,7 +319,7 @@ class Ostatus_profile extends Memcached_DataObject throw new ServerException("Invalid actor passed to " . __METHOD__ . ": " . $type); } if ($object == null) { - $object = $this->asActivityNoun('object'); + $object = $this; } if ($this->salmonuri) { $text = 'update'; // @fixme @@ -345,7 +345,7 @@ class Ostatus_profile extends Memcached_DataObject $entry->element('activity:verb', null, $verb); $entry->raw($actor->asAtomAuthor()); $entry->raw($actor->asActivityActor()); - $entry->raw($object); + $entry->raw($object->asActivityNoun('object')); $entry->elementEnd('entry'); $xml = $entry->getString(); diff --git a/plugins/OStatus/lib/activity.php b/plugins/OStatus/lib/activity.php index 6d15f85b0d..7563488b70 100644 --- a/plugins/OStatus/lib/activity.php +++ b/plugins/OStatus/lib/activity.php @@ -284,7 +284,7 @@ class ActivityObject } } - static fromNotice($notice) + static function fromNotice($notice) { $object = new ActivityObject(); From 229f9bd069a25af9635a9eaaa822a614bf6e9348 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Sun, 21 Feb 2010 09:15:57 -0500 Subject: [PATCH 43/62] more complete content retrieval in ActivityObject::__construct() --- plugins/OStatus/lib/activity.php | 67 ++++++++++++++++++++++++++++++-- 1 file changed, 64 insertions(+), 3 deletions(-) diff --git a/plugins/OStatus/lib/activity.php b/plugins/OStatus/lib/activity.php index 6d15f85b0d..5f174d86f6 100644 --- a/plugins/OStatus/lib/activity.php +++ b/plugins/OStatus/lib/activity.php @@ -183,6 +183,8 @@ class ActivityObject const TITLE = 'title'; const SUMMARY = 'summary'; const CONTENT = 'content'; + const TYPE = 'type'; + const SRC = 'src'; const ID = 'id'; const SOURCE = 'source'; @@ -199,7 +201,7 @@ class ActivityObject public $link; public $source; - /** + /** * Constructor * * This probably needs to be refactored @@ -243,10 +245,11 @@ class ActivityObject $this->id = $this->_childContent($element, self::ID); $this->title = $this->_childContent($element, self::TITLE); $this->summary = $this->_childContent($element, self::SUMMARY); - $this->content = $this->_childContent($element, self::CONTENT); $this->source = $this->_getSource($element); + $this->content = $this->_getContent($element); + $this->link = ActivityUtils::getPermalink($element); // XXX: grab PoCo stuff @@ -284,7 +287,65 @@ class ActivityObject } } - static fromNotice($notice) + /** + * Get the content of an atom:entry-like object + * + * @param DOMElement $element The element to examine. + * + * @return string unencoded HTML content of the element, like "This -< is HTML." + * + * @todo handle remote content + * @todo handle embedded XML mime types + * @todo handle base64-encoded non-XML and non-text mime types + */ + + private function _getContent($element) + { + $contentEl = ActivityUtils::child($element, self::CONTENT); + + if (!empty($contentEl)) { + + $src = $contentEl->getAttribute(self::SRC); + + if (!empty($src)) { + throw new ClientException(_("Can't handle remote content yet.")); + } + + $type = $contentEl->getAttribute(self::TYPE); + + // slavishly following http://atompub.org/rfc4287.html#rfc.section.4.1.3.3 + + if ($type == 'text') { + return $contentEl->textContent; + } else if ($type == 'html') { + $text = $contentEl->textContent; + return htmlspecialchars_decode($text, ENT_QUOTES); + } else if ($type == 'xhtml') { + $divEl = ActivityUtils::child($contentEl, 'div'); + if (empty($divEl)) { + return null; + } + $doc = $divEl->ownerDocument; + $text = ''; + $children = $divEl->childNodes; + + for ($i = 0; $i < $children->length; $i++) { + $child = $children->item($i); + $text .= $doc->saveXML($child); + } + return trim($text); + } else if (in_array(array('text/xml', 'application/xml'), $type) || + preg_match('#(+|/)xml$#', $type)) { + throw new ClientException(_("Can't handle embedded XML content yet.")); + } else if (strncasecmp($type, 'text/', 5)) { + return $contentEl->textContent; + } else { + throw new ClientException(_("Can't handle embedded Base64 content yet.")); + } + } + } + + static function fromNotice($notice) { $object = new ActivityObject(); From 6169d8a877fb0e76ee17187f74a6747fe7f2c97d Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Sun, 21 Feb 2010 09:16:27 -0500 Subject: [PATCH 44/62] saving notices in salmon actions --- plugins/OStatus/lib/salmonaction.php | 45 ++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/plugins/OStatus/lib/salmonaction.php b/plugins/OStatus/lib/salmonaction.php index c838905077..41e8322e89 100644 --- a/plugins/OStatus/lib/salmonaction.php +++ b/plugins/OStatus/lib/salmonaction.php @@ -231,4 +231,49 @@ class SalmonAction extends Action return null; } } + + function saveNotice() + { + $oprofile = $this->ensureProfile(); + + // Get (safe!) HTML and text versions of the content + + require_once(INSTALLDIR.'/extlib/HTMLPurifier/HTMLPurifier.auto.php'); + + $html = $this->act->object->content; + + $rendered = HTMLPurifier::purify($html); + $content = html_entity_decode(strip_tags($rendered)); + + $options = array('is_local' => Notice::REMOTE_OMB, + 'uri' => $this->act->object->id, + 'url' => $this->act->object->link, + 'rendered' => $rendered); + + if (!empty($this->act->context->location)) { + $options['lat'] = $location->lat; + $options['lon'] = $location->lon; + if ($location->location_id) { + $options['location_ns'] = $location->location_ns; + $options['location_id'] = $location->location_id; + } + } + + if (!empty($this->act->context->replyToID)) { + $orig = Notice::staticGet('uri', + $this->act->context->replyToID); + if (!empty($orig)) { + $options['reply_to'] = $orig->id; + } + } + + if (!empty($this->act->time)) { + $options['created'] = common_sql_time($this->act->time); + } + + return Notice::saveNew($oprofile->profile_id, + $content, + 'ostatus+salmon', + $options); + } } From 9498a164805892a8af17311f7e7697b132524990 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Sun, 21 Feb 2010 09:17:00 -0500 Subject: [PATCH 45/62] Notice::saveNew() accepts url and rendered options --- classes/Notice.php | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/classes/Notice.php b/classes/Notice.php index 8b8f90474d..0051cf8851 100644 --- a/classes/Notice.php +++ b/classes/Notice.php @@ -194,6 +194,7 @@ class Notice extends Memcached_DataObject */ static function saveNew($profile_id, $content, $source, $options=null) { $defaults = array('uri' => null, + 'url' => null, 'reply_to' => null, 'repeat_of' => null); @@ -256,9 +257,16 @@ class Notice extends Memcached_DataObject } $notice->content = $final; - $notice->rendered = common_render_content($final, $notice); + + if (!empty($rendered)) { + $notice->rendered = $rendered; + } else { + $notice->rendered = common_render_content($final, $notice); + } + $notice->source = $source; $notice->uri = $uri; + $notice->url = $url; // Handle repeat case From 4a4c34de3221eb8000e911addad039cd170faaa1 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Sun, 21 Feb 2010 09:17:35 -0500 Subject: [PATCH 46/62] Save posted notices in usersalmon --- plugins/OStatus/actions/usersalmon.php | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/plugins/OStatus/actions/usersalmon.php b/plugins/OStatus/actions/usersalmon.php index 020ca597cd..12c74798f2 100644 --- a/plugins/OStatus/actions/usersalmon.php +++ b/plugins/OStatus/actions/usersalmon.php @@ -87,8 +87,7 @@ class UsersalmonAction extends SalmonAction throw new ClientException("Not to anyone in reply to anything!"); } - $profile = $this->ensureProfile(); - // @fixme do something with the post + $this->saveNotice(); } /** From 7f471bde231071e3512f203ceca4b830ac2204a1 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Sun, 21 Feb 2010 09:17:52 -0500 Subject: [PATCH 47/62] more tests for Activity parsing --- plugins/OStatus/tests/ActivityParseTests.php | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/plugins/OStatus/tests/ActivityParseTests.php b/plugins/OStatus/tests/ActivityParseTests.php index 35b4b0f9d7..d7305dedea 100644 --- a/plugins/OStatus/tests/ActivityParseTests.php +++ b/plugins/OStatus/tests/ActivityParseTests.php @@ -22,8 +22,15 @@ class ActivityParseTests extends PHPUnit_Framework_TestCase $act = new Activity($dom->documentElement); $this->assertFalse(empty($act)); + $this->assertEquals($act->time, 1243860840); $this->assertEquals($act->verb, ActivityVerb::POST); + + $this->assertFalse(empty($act->object)); + $this->assertEquals($act->object->title, 'Punctuation Changeset'); + $this->assertEquals($act->object->type, 'http://versioncentral.example.org/activity/changeset'); + $this->assertEquals($act->object->summary, 'Fixing punctuation because it makes it more readable.'); + $this->assertEquals($act->object->id, 'tag:versioncentral.example.org,2009:/change/1643245'); } public function testExample3() @@ -84,6 +91,10 @@ class ActivityParseTests extends PHPUnit_Framework_TestCase $this->assertEquals('http://example.net/conversation/11', $act->context->conversation); $this->assertEquals(array('http://example.net/user/1'), $act->context->attention); + $this->assertFalse(empty($act->object)); + $this->assertEquals($act->object->content, + '@evan now is the time for all good men to come to the aid of their country. #'); + $this->assertFalse(empty($act->actor)); } } From e9d22138efb059fd701d332815d63e65b09c5282 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Sun, 21 Feb 2010 09:23:51 -0500 Subject: [PATCH 48/62] permalink on a note represented by rel=alternate --- classes/Notice.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/classes/Notice.php b/classes/Notice.php index 0051cf8851..7e524cacd4 100644 --- a/classes/Notice.php +++ b/classes/Notice.php @@ -1137,7 +1137,7 @@ class Notice extends Memcached_DataObject $this->rendered); $xs->element('link', array('type' => 'text/html', - 'rel' => 'permalink', + 'rel' => 'alternate', 'href' => $this->bestUrl())); $xs->elementEnd('activity:' . $element); From ff2efd314f15bf5e4b42fe02d56217a996a7c692 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Sun, 21 Feb 2010 10:50:51 -0500 Subject: [PATCH 49/62] let activities generate their own XML --- plugins/OStatus/lib/activity.php | 192 +++++++++++++++++++++---------- 1 file changed, 129 insertions(+), 63 deletions(-) diff --git a/plugins/OStatus/lib/activity.php b/plugins/OStatus/lib/activity.php index 5f174d86f6..5da6c75856 100644 --- a/plugins/OStatus/lib/activity.php +++ b/plugins/OStatus/lib/activity.php @@ -55,6 +55,9 @@ class ActivityUtils const TYPE = 'type'; const HREF = 'href'; + const CONTENT = 'content'; + const SRC = 'src'; + /** * Get the permalink for an Activity object * @@ -139,6 +142,64 @@ class ActivityUtils return $el->textContent; } } + + /** + * Get the content of an atom:entry-like object + * + * @param DOMElement $element The element to examine. + * + * @return string unencoded HTML content of the element, like "This -< is HTML." + * + * @todo handle remote content + * @todo handle embedded XML mime types + * @todo handle base64-encoded non-XML and non-text mime types + */ + + static function getContent($element) + { + $contentEl = ActivityUtils::child($element, self::CONTENT); + + if (!empty($contentEl)) { + + $src = $contentEl->getAttribute(self::SRC); + + if (!empty($src)) { + throw new ClientException(_("Can't handle remote content yet.")); + } + + $type = $contentEl->getAttribute(self::TYPE); + + // slavishly following http://atompub.org/rfc4287.html#rfc.section.4.1.3.3 + + if ($type == 'text') { + return $contentEl->textContent; + } else if ($type == 'html') { + $text = $contentEl->textContent; + return htmlspecialchars_decode($text, ENT_QUOTES); + } else if ($type == 'xhtml') { + $divEl = ActivityUtils::child($contentEl, 'div'); + if (empty($divEl)) { + return null; + } + $doc = $divEl->ownerDocument; + $text = ''; + $children = $divEl->childNodes; + + for ($i = 0; $i < $children->length; $i++) { + $child = $children->item($i); + $text .= $doc->saveXML($child); + } + return trim($text); + } else if (in_array(array('text/xml', 'application/xml'), $type) || + preg_match('#(+|/)xml$#', $type)) { + throw new ClientException(_("Can't handle embedded XML content yet.")); + } else if (strncasecmp($type, 'text/', 5)) { + return $contentEl->textContent; + } else { + throw new ClientException(_("Can't handle embedded Base64 content yet.")); + } + } + } } /** @@ -182,9 +243,6 @@ class ActivityObject const TITLE = 'title'; const SUMMARY = 'summary'; - const CONTENT = 'content'; - const TYPE = 'type'; - const SRC = 'src'; const ID = 'id'; const SOURCE = 'source'; @@ -248,7 +306,7 @@ class ActivityObject $this->source = $this->_getSource($element); - $this->content = $this->_getContent($element); + $this->content = ActivityUtils::getContent($element); $this->link = ActivityUtils::getPermalink($element); @@ -287,64 +345,6 @@ class ActivityObject } } - /** - * Get the content of an atom:entry-like object - * - * @param DOMElement $element The element to examine. - * - * @return string unencoded HTML content of the element, like "This -< is HTML." - * - * @todo handle remote content - * @todo handle embedded XML mime types - * @todo handle base64-encoded non-XML and non-text mime types - */ - - private function _getContent($element) - { - $contentEl = ActivityUtils::child($element, self::CONTENT); - - if (!empty($contentEl)) { - - $src = $contentEl->getAttribute(self::SRC); - - if (!empty($src)) { - throw new ClientException(_("Can't handle remote content yet.")); - } - - $type = $contentEl->getAttribute(self::TYPE); - - // slavishly following http://atompub.org/rfc4287.html#rfc.section.4.1.3.3 - - if ($type == 'text') { - return $contentEl->textContent; - } else if ($type == 'html') { - $text = $contentEl->textContent; - return htmlspecialchars_decode($text, ENT_QUOTES); - } else if ($type == 'xhtml') { - $divEl = ActivityUtils::child($contentEl, 'div'); - if (empty($divEl)) { - return null; - } - $doc = $divEl->ownerDocument; - $text = ''; - $children = $divEl->childNodes; - - for ($i = 0; $i < $children->length; $i++) { - $child = $children->item($i); - $text .= $doc->saveXML($child); - } - return trim($text); - } else if (in_array(array('text/xml', 'application/xml'), $type) || - preg_match('#(+|/)xml$#', $type)) { - throw new ClientException(_("Can't handle embedded XML content yet.")); - } else if (strncasecmp($type, 'text/', 5)) { - return $contentEl->textContent; - } else { - throw new ClientException(_("Can't handle embedded Base64 content yet.")); - } - } - } - static function fromNotice($notice) { $object = new ActivityObject(); @@ -359,6 +359,18 @@ class ActivityObject return $object; } + static function fromProfile($profile) + { + $object = new ActivityObject(); + + $object->type = ActivityObject::PERSON; + $object->id = $profile->getUri(); + $object->title = $this->getBestName(); + $object->link = $profile->profileurl; + + return $object; + } + function asString($tag='activity:object') { $xs = new XMLStringer(true); @@ -550,6 +562,11 @@ class Activity public $entry; // the source entry public $feed; // the source feed + public $summary; // summary of activity + public $content; // HTML content of activity + public $id; // ID of the activity + public $title; // title of the activity + /** * Turns a regular old Atom into a magical activity * @@ -557,8 +574,12 @@ class Activity * @param DOMElement $feed Atom feed, for context */ - function __construct($entry, $feed = null) + function __construct($entry = null, $feed = null) { + if (is_null($entry)) { + return; + } + $this->entry = $entry; $this->feed = $feed; @@ -629,6 +650,10 @@ class Activity if (!empty($targetEl)) { $this->target = new ActivityObject($targetEl); } + + $this->summary = ActivityUtils::childContent($entry, 'summary'); + $this->id = ActivityUtils::childContent($entry, 'id'); + $this->content = ActivityUtils::getContent($entry); } /** @@ -642,6 +667,47 @@ class Activity return null; } + function asString($namespace=false) + { + $xs = new XMLStringer(true); + + if ($namespace) { + $attrs = array('xmlns' => 'http://www.w3.org/2005/Atom', + 'xmlns:activity' => 'http://activitystrea.ms/spec/1.0/', + 'xmlns:ostatus' => 'http://ostatus.org/schema/1.0'); + } else { + $attrs = array(); + } + + $xs->elementStart('entry', $attrs); + + $xs->element('id', null, $this->id); + $xs->element('title', null, $this->title); + $xs->element('published', null, common_date_iso8601($this->time)); + $xs->element('content', array('type' => 'html'), $this->content); + + if (!empty($this->summary)) { + $xs->element('summary', null, $this->summary); + } + + if (!empty($this->link)) { + $xs->element('link', array('rel' => 'alternate', + 'type' => 'text/html'), + $this->link); + } + + // XXX: add context + // XXX: add target + + $xs->raw($this->actor->asString()); + $xs->element('activity:verb', null, $this->verb); + $xs->raw($this->object->asString()); + + $xs->elementEnd('entry'); + + return $xs->getString(); + } + private function _child($element, $tag, $namespace=self::SPEC) { return ActivityUtils::child($element, $tag, $namespace); From 4e90bd34e9aff1abd97a92b62fc7c48bf1ee5a9c Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Sun, 21 Feb 2010 10:53:11 -0500 Subject: [PATCH 50/62] activity notification in Ostatus_profile --- plugins/OStatus/classes/Ostatus_profile.php | 31 +++++++++++++++------ 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/plugins/OStatus/classes/Ostatus_profile.php b/plugins/OStatus/classes/Ostatus_profile.php index d9cb7a6e1b..55f347a029 100644 --- a/plugins/OStatus/classes/Ostatus_profile.php +++ b/plugins/OStatus/classes/Ostatus_profile.php @@ -305,9 +305,9 @@ class Ostatus_profile extends Memcached_DataObject * Send an Activity Streams notification to the remote Salmon endpoint, * if so configured. * - * @param Profile $actor - * @param $verb eg Activity::SUBSCRIBE or Activity::JOIN - * @param string $object object of the action; if null, the remote entity itself is assumed + * @param Profile $actor Actor who did the activity + * @param string $verb Activity::SUBSCRIBE or Activity::JOIN + * @param Object $object object of the action; must define asActivityNoun($tag) */ public function notify($actor, $verb, $object=null) { @@ -322,11 +322,12 @@ class Ostatus_profile extends Memcached_DataObject $object = $this; } if ($this->salmonuri) { - $text = 'update'; // @fixme - $id = 'tag:' . common_config('site', 'server') . - ':' . $verb . - ':' . $actor->id . - ':' . time(); // @fixme + + $text = 'update'; + $id = TagURI::mint('%s:%s:%s', + $verb, + $actor->getURI(), + common_date_iso8601(date())); // @fixme consolidate all these NS settings somewhere $attributes = array('xmlns' => Activity::ATOM, @@ -356,6 +357,20 @@ class Ostatus_profile extends Memcached_DataObject } } + public function notifyActivity($activity) + { + if ($this->salmonuri) { + + $xml = $activity->asString(); + + $salmon = new Salmon(); // ? + + $salmon->post($this->salmonuri, $xml); + } + + return; + } + function getBestName() { if ($this->isGroup()) { From df7c6b37c8b8f548a56193181acd5b2d8ee9bd9e Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Sun, 21 Feb 2010 10:53:32 -0500 Subject: [PATCH 51/62] use notifyActivity() for notifications in OStatusPlugin --- plugins/OStatus/OStatusPlugin.php | 124 ++++++++++++++++++++++++------ 1 file changed, 99 insertions(+), 25 deletions(-) diff --git a/plugins/OStatus/OStatusPlugin.php b/plugins/OStatus/OStatusPlugin.php index 29799b0dc1..3aaa769db4 100644 --- a/plugins/OStatus/OStatusPlugin.php +++ b/plugins/OStatus/OStatusPlugin.php @@ -251,27 +251,47 @@ class OStatusPlugin extends Plugin * @param Profile $other * @return hook return value */ - function onEndUnsubscribe($user, $other) + function onEndUnsubscribe($profile, $other) { - if ($user instanceof Profile) { - $profile = $user; - } else if ($user instanceof User) { - $profile = $user->getProfile(); - } - $oprofile = Ostatus_profile::staticGet('profile_id', $other->id); - if ($oprofile) { - // Notify the remote server of the unsub, if supported. - $oprofile->notify($profile, ActivityVerb::UNFOLLOW, $oprofile); + $user = User::staticGet('id', $profile->id); - // Drop the PuSH subscription if there are no other subscribers. - $sub = new Subscription(); - $sub->subscribed = $other->id; - $sub->limit(1); - if (!$sub->find(true)) { - common_log(LOG_INFO, "Unsubscribing from now-unused feed $oprofile->feeduri"); - $oprofile->unsubscribe(); - } + if (empty($user)) { + return true; } + + $oprofile = Ostatus_profile::staticGet('profile_id', $other->id); + + if (empty($oprofile)) { + return true; + } + + // Drop the PuSH subscription if there are no other subscribers. + + if ($other->subscriberCount() == 0) { + common_log(LOG_INFO, "Unsubscribing from now-unused feed $oprofile->feeduri"); + $oprofile->unsubscribe(); + } + + $act = new Activity(); + + $act->verb = ActivityVerb::UNFOLLOW; + + $act->id = TagURI::mint('unfollow:%d:%d:%s', + $profile->id, + $other->id, + common_date_iso8601(time())); + + $act->time = time(); + $act->title = _("Unfollow"); + $act->content = sprintf(_("%s stopped following %s."), + $profile->getBestName(), + $other->getBestName()); + + $act->actor = ActivityObject::fromProfile($subscriber); + $act->object = ActivityObject::fromProfile($other); + + $oprofile->notifyActivity($act); + return true; } @@ -350,10 +370,25 @@ class OStatusPlugin extends Plugin return true; } - // We have a local user subscribing to a remote profile; make the - // magic happen! + $act = new Activity(); - $oprofile->notify($subscriber, ActivityVerb::FOLLOW, $oprofile); + $act->verb = ActivityVerb::FOLLOW; + + $act->id = TagURI::mint('follow:%d:%d:%s', + $subscriber->id, + $other->id, + common_date_iso8601(time())); + + $act->time = time(); + $act->title = _("Follow"); + $act->content = sprintf(_("%s is now following %s."), + $subscriber->getBestName(), + $other->getBestName()); + + $act->actor = ActivityObject::fromProfile($subscriber); + $act->object = ActivityObject::fromProfile($other); + + $oprofile->notifyActivity($act); return true; } @@ -365,6 +400,7 @@ class OStatusPlugin extends Plugin * @param Notice $notice being favored * @return hook return value */ + function onEndFavorNotice(Profile $profile, Notice $notice) { $user = User::staticGet('id', $profile->id); @@ -375,10 +411,29 @@ class OStatusPlugin extends Plugin $oprofile = Ostatus_profile::staticGet('profile_id', $notice->profile_id); - if ($oprofile) { - $oprofile->notify($profile, ActivityVerb::FAVORITE, $notice); + if (empty($oprofile)) { + return true; } + $act = new Activity(); + + $act->verb = ActivityVerb::FAVORITE; + $act->id = TagURI::mint('favor:%d:%d:%s', + $profile->id, + $notice->id, + common_date_iso8601(time())); + + $act->time = time(); + $act->title = _("Favor"); + $act->content = sprintf(_("%s marked notice %s as a favorite."), + $profile->getBestName(), + $notice->uri); + + $act->actor = ActivityObject::fromProfile($profile); + $act->object = ActivityObject::fromNotice($notice); + + $oprofile->notifyActivity($act); + return true; } @@ -389,6 +444,7 @@ class OStatusPlugin extends Plugin * @param Notice $notice being favored * @return hook return value */ + function onEndDisfavorNotice(Profile $profile, Notice $notice) { $user = User::staticGet('id', $profile->id); @@ -399,10 +455,28 @@ class OStatusPlugin extends Plugin $oprofile = Ostatus_profile::staticGet('profile_id', $notice->profile_id); - if ($oprofile) { - $oprofile->notify($profile, ActivityVerb::UNFAVORITE, $notice); + if (empty($oprofile)) { + return true; } + $act = new Activity(); + + $act->verb = ActivityVerb::UNFAVORITE; + $act->id = TagURI::mint('disfavor:%d:%d:%s', + $profile->id, + $notice->id, + common_date_iso8601(time())); + $act->time = time(); + $act->title = _("Disfavor"); + $act->content = sprintf(_("%s marked notice %s as no longer a favorite."), + $profile->getBestName(), + $notice->uri); + + $act->actor = ActivityObject::fromProfile($profile); + $act->object = ActivityObject::fromNotice($notice); + + $oprofile->notifyActivity($act); + return true; } } From 727773cdfa8f229babbfe8a620645f2f4db9bbd8 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Sun, 21 Feb 2010 11:24:04 -0500 Subject: [PATCH 52/62] redirect ostatussub if not logged in --- plugins/OStatus/actions/ostatussub.php | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/plugins/OStatus/actions/ostatussub.php b/plugins/OStatus/actions/ostatussub.php index 5244031437..bbbd1b7e69 100644 --- a/plugins/OStatus/actions/ostatussub.php +++ b/plugins/OStatus/actions/ostatussub.php @@ -58,7 +58,6 @@ class OStatusSubAction extends Action $this->showPage(); } - /** * Content area of the page * @@ -116,7 +115,18 @@ class OStatusSubAction extends Action function prepare($args) { parent::prepare($args); + + if (!common_logged_in()) { + // XXX: selfURL() didn't work. :< + common_set_returnto($_SERVER['REQUEST_URI']); + if (Event::handle('RedirectToLogin', array($this, null))) { + common_redirect(common_local_url('login'), 303); + } + return false; + } + $this->profile_uri = $this->arg('profile'); + return true; } @@ -173,13 +183,13 @@ class OStatusSubAction extends Action function validateFeed() { $profile_uri = trim($this->arg('profile')); - + if ($profile_uri == '') { $this->showForm(_m('Empty remote profile URL!')); return; } $this->profile_uri = $profile_uri; - + // @fixme validate, normalize bla bla try { $oprofile = Ostatus_profile::ensureProfile($this->profile_uri); From 09e2d181be3b72b1d8134273b9fdfa280895627c Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Sun, 21 Feb 2010 12:54:52 -0500 Subject: [PATCH 53/62] remove unused profile code from salmonaction --- plugins/OStatus/lib/salmonaction.php | 71 ---------------------------- 1 file changed, 71 deletions(-) diff --git a/plugins/OStatus/lib/salmonaction.php b/plugins/OStatus/lib/salmonaction.php index 41e8322e89..abd8d4c833 100644 --- a/plugins/OStatus/lib/salmonaction.php +++ b/plugins/OStatus/lib/salmonaction.php @@ -161,77 +161,6 @@ class SalmonAction extends Action return Ostatus_profile::ensureActorProfile($this->act); } - /** - * @fixme merge into Ostatus_profile::ensureActorProfile and friends - */ - function createProfile() - { - $actor = $this->act->actor; - - $profile = new Profile(); - - $profile->nickname = $this->nicknameFromURI($actor->id); - - if (empty($profile->nickname)) { - $profile->nickname = common_nicknamize($actor->title); - } - - $profile->fullname = $actor->title; - $profile->bio = $actor->summary; // XXX: is that right? - $profile->profileurl = $actor->link; // XXX: is that right? - $profile->created = common_sql_now(); - - $id = $profile->insert(); - - if (empty($id)) { - common_log_db_error($profile, 'INSERT', __FILE__); - throw new Exception("Couldn't save new profile for $actor->id\n"); - } - - // XXX: add avatars - - $op = new Ostatus_profile(); - - $op->profile_id = $id; - $op->homeuri = $actor->id; - $op->created = $profile->created; - - // XXX: determine feed URI from source or Webfinger or whatever - - $id = $op->insert(); - - if (empty($id)) { - common_log_db_error($op, 'INSERT', __FILE__); - throw new Exception("Couldn't save new ostatus profile for $actor->id\n"); - } - - return $profile; - } - - /** - * @fixme should be merged into Ostatus_profile - */ - function nicknameFromURI($uri) - { - preg_match('/(\w+):/', $uri, $matches); - - $protocol = $matches[1]; - - switch ($protocol) { - case 'acct': - case 'mailto': - if (preg_match("/^$protocol:(.*)?@.*\$/", $uri, $matches)) { - return common_canonical_nickname($matches[1]); - } - return null; - case 'http': - return common_url_to_nickname($uri); - break; - default: - return null; - } - } - function saveNotice() { $oprofile = $this->ensureProfile(); From d372a352ec16f7fabd9059e283c98bd45d7272f0 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Sun, 21 Feb 2010 12:55:40 -0500 Subject: [PATCH 54/62] make avatar attribute explicit for ActivityObject --- plugins/OStatus/lib/activity.php | 1 + 1 file changed, 1 insertion(+) diff --git a/plugins/OStatus/lib/activity.php b/plugins/OStatus/lib/activity.php index 5da6c75856..ea1303f199 100644 --- a/plugins/OStatus/lib/activity.php +++ b/plugins/OStatus/lib/activity.php @@ -258,6 +258,7 @@ class ActivityObject public $content; public $link; public $source; + public $avatar; /** * Constructor From 1a86bf9c65d2579d9245c6edcc968fed3d674f39 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Sun, 21 Feb 2010 12:56:46 -0500 Subject: [PATCH 55/62] try different ways to get a profile from a feed --- plugins/OStatus/classes/Ostatus_profile.php | 177 +++++++++++++++----- 1 file changed, 132 insertions(+), 45 deletions(-) diff --git a/plugins/OStatus/classes/Ostatus_profile.php b/plugins/OStatus/classes/Ostatus_profile.php index 55f347a029..793d451125 100644 --- a/plugins/OStatus/classes/Ostatus_profile.php +++ b/plugins/OStatus/classes/Ostatus_profile.php @@ -537,18 +537,58 @@ class Ostatus_profile extends Memcached_DataObject throw new FeedSubNoHubException(); } - // Ok this is going to be a terrible hack! - // Won't be suitable for groups, empty feeds, or getting - // info that's only available on the profile page. - $entries = $discover->feed->getElementsByTagNameNS(Activity::ATOM, 'entry'); - if (!$entries || $entries->length == 0) { - throw new FeedSubException('empty feed'); + // Try to get a profile from the feed activity:subject + + $feedEl = $discover->feed->documentElement; + + $subject = ActivityUtils::child($feedEl, Activity::SUBJECT, Activity::SPEC); + + if (!empty($subject)) { + $subjObject = new ActivityObject($subject); + return self::ensureActivityObjectProfile($subjObject, $feeduri, $salmonuri); } - $first = new Activity($entries->item(0), $discover->feed); - return self::ensureActorProfile($first, $feeduri, $salmonuri); + + // Otherwise, try the feed author + + $author = ActivityUtils::child($feedEl, Activity::AUTHOR, Activity::ATOM); + + if (!empty($author)) { + $authorObject = new ActivityObject($author); + return self::ensureActivityObjectProfile($authorObject, $feeduri, $salmonuri); + } + + // Sheesh. Not a very nice feed! Let's try fingerpoken in the + // entries. + + $entries = $discover->feed->getElementsByTagNameNS(Activity::ATOM, 'entry'); + + if (!empty($entries) && $entries->length > 0) { + + $entry = $entries->item(0); + + $actor = ActivityUtils::child($entry, Activity::ACTOR, Activity::SPEC); + + if (!empty($actor)) { + $actorObject = new ActivityObject($actor); + return self::ensureActivityObjectProfile($actorObject, $feeduri, $salmonuri); + + } + + $author = ActivityUtils::child($entry, Activity::AUTHOR, Activity::ATOM); + + if (!empty($author)) { + $authorObject = new ActivityObject($author); + return self::ensureActivityObjectProfile($authorObject, $feeduri, $salmonuri); + } + } + + // XXX: make some educated guesses here + + throw new FeedSubException("Can't find enough profile information to make a feed."); } /** + * * Download and update given avatar image * @param string $url * @throws Exception in various failure cases @@ -581,6 +621,12 @@ class Ostatus_profile extends Memcached_DataObject } } + protected static function getActivityObjectAvatar($object) + { + // XXX: go poke around in the feed + return $object->avatar; + } + /** * Get an appropriate avatar image source URL, if available. * @@ -588,6 +634,7 @@ class Ostatus_profile extends Memcached_DataObject * @param DOMElement $feed * @return string */ + protected static function getAvatar($actor, $feed) { $url = ''; @@ -635,11 +682,17 @@ class Ostatus_profile extends Memcached_DataObject * @param string $salmonuri if we already know the salmon return channel URI * @return Ostatus_profile */ + public static function ensureActorProfile($activity, $feeduri=null, $salmonuri=null) { - $profile = self::getActorProfile($activity); + return self::ensureActivityObjectProfile($activity->actor, $feeduri, $salmonuri); + } + + public static function ensureActivityObjectProfile($object, $feeduri=null, $salmonuri=null) + { + $profile = self::getActivityObjectProfile($object); if (!$profile) { - $profile = self::createActorProfile($activity, $feeduri, $salmonuri); + $profile = self::createActivityObjectProfile($object, $feeduri, $salmonuri); } return $profile; } @@ -650,8 +703,18 @@ class Ostatus_profile extends Memcached_DataObject */ protected static function getActorProfile($activity) { - $homeuri = self::getActorProfileURI($activity); - return self::staticGet('uri', $homeuri); + return self::getActivityObjectProfile($activity->actor); + } + + protected static function getActivityObjectProfile($object) + { + $uri = self::getActivityObjectProfileURI($object); + return Ostatus_profile::staticGet('homeuri', $uri); + } + + protected static function getActorProfileURI($activity) + { + return self::getActivityObjectProfileURI($activity->actor); } /** @@ -659,15 +722,14 @@ class Ostatus_profile extends Memcached_DataObject * @return string * @throws ServerException */ - protected static function getActorProfileURI($activity) + protected static function getActivityObjectProfileURI($object) { $opts = array('allowed_schemes' => array('http', 'https')); - $actor = $activity->actor; - if ($actor->id && Validate::uri($actor->id, $opts)) { - return $actor->id; + if ($object->id && Validate::uri($object->id, $opts)) { + return $object->id; } - if ($actor->link && Validate::uri($actor->link, $opts)) { - return $actor->link; + if ($object->link && Validate::uri($object->link, $opts)) { + return $object->link; } throw new ServerException("No author ID URI found"); } @@ -675,12 +737,19 @@ class Ostatus_profile extends Memcached_DataObject /** * @fixme validate stuff somewhere */ + protected static function createActorProfile($activity, $feeduri=null, $salmonuri=null) { $actor = $activity->actor; - $homeuri = self::getActorProfileURI($activity); - $nickname = self::getAuthorNick($activity); - $avatar = self::getAvatar($actor, $activity->feed); + + self::createActivityObjectProfile($actor, $feeduri, $salmonuri); + } + + protected static function createActivityObjectProfile($object, $feeduri=null, $salmonuri=null) + { + $homeuri = self::getActivityObjectProfileURI($object); + $nickname = self::getActivityObjectNickname($object); + $avatar = self::getActivityObjectAvatar($object); if (!$homeuri) { common_log(LOG_DEBUG, __METHOD__ . " empty actor profile URI: " . var_export($activity, true)); @@ -703,9 +772,10 @@ class Ostatus_profile extends Memcached_DataObject $profile = new Profile(); $profile->nickname = $nickname; - $profile->fullname = $actor->displayName; - $profile->homepage = $actor->link; // @fixme - $profile->profileurl = $homeuri; + $profile->fullname = $object->title; + $profile->profileurl = $object->link; + $profile->created = common_sql_now(); + // @fixme bio // @fixme tags/categories // @fixme location? @@ -713,6 +783,7 @@ class Ostatus_profile extends Memcached_DataObject // @todo lat/lon/location? $ok = $profile->insert(); + if (!$ok) { throw new ServerException("Can't save local profile"); } @@ -720,16 +791,19 @@ class Ostatus_profile extends Memcached_DataObject // @fixme either need to do feed discovery here // or need to split out some of the feed stuff // so we can leave it empty until later. + $oprofile = new Ostatus_profile(); - $oprofile->uri = $homeuri; - $oprofile->feeduri = $feeduri; - $oprofile->salmonuri = $salmonuri; + + $oprofile->uri = $homeuri; + $oprofile->feeduri = $feeduri; + $oprofile->salmonuri = $salmonuri; $oprofile->profile_id = $profile->id; - $oprofile->created = common_sql_now(); - $oprofile->modified = common_sql_now(); + $oprofile->created = common_sql_now(); + $oprofile->modified = common_sql_now(); $ok = $oprofile->insert(); + if ($ok) { $oprofile->updateAvatar($avatar); return $oprofile; @@ -738,24 +812,37 @@ class Ostatus_profile extends Memcached_DataObject } } - /** - * @fixme move this into Activity? - * @param Activity $activity - * @return string - */ - protected static function getAuthorNick($activity) + protected static function getActivityObjectNickname($object) { - // @fixme not technically part of the actor? - foreach (array($activity->entry, $activity->feed) as $source) { - $author = ActivityUtils::child($source, 'author', Activity::ATOM); - if ($author) { - $name = ActivityUtils::child($author, 'name', Activity::ATOM); - if ($name) { - return trim($name->textContent); - } - } + // XXX: check whatever PoCo calls a nickname first + + $nickname = self::nicknameFromURI($object->id); + + if (empty($nickname)) { + $nickname = common_nicknamize($object->title); } - return false; + + return $nickname; } + protected static function nicknameFromURI($uri) + { + preg_match('/(\w+):/', $uri, $matches); + + $protocol = $matches[1]; + + switch ($protocol) { + case 'acct': + case 'mailto': + if (preg_match("/^$protocol:(.*)?@.*\$/", $uri, $matches)) { + return common_canonical_nickname($matches[1]); + } + return null; + case 'http': + return common_url_to_nickname($uri); + break; + default: + return null; + } + } } From d5ba9e1b10071484ec9bdce9821207a37b446fc8 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Sun, 21 Feb 2010 13:03:13 -0500 Subject: [PATCH 56/62] add feed-level author for user atom feeds --- lib/atomusernoticefeed.php | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lib/atomusernoticefeed.php b/lib/atomusernoticefeed.php index 9f224325c6..f71c721fec 100644 --- a/lib/atomusernoticefeed.php +++ b/lib/atomusernoticefeed.php @@ -54,9 +54,15 @@ class AtomUserNoticeFeed extends AtomNoticeFeed * * @return void */ + function __construct($user = null, $indent = true) { parent::__construct($indent); $this->user = $user; + if (!empty($user)) { + $profile = $user->getProfile(); + $this->addAuthor($profile->getBestName(), + $user->uri); + } } function getUser() From 5aeed9e04110c34bca12e601836797afd5acadba Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Sun, 21 Feb 2010 13:11:00 -0500 Subject: [PATCH 57/62] add activity:subject to atom feeds --- plugins/OStatus/OStatusPlugin.php | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/plugins/OStatus/OStatusPlugin.php b/plugins/OStatus/OStatusPlugin.php index 3aaa769db4..b5cfb5caec 100644 --- a/plugins/OStatus/OStatusPlugin.php +++ b/plugins/OStatus/OStatusPlugin.php @@ -102,16 +102,21 @@ class OStatusPlugin extends Plugin $id = null; if ($feed instanceof AtomUserNoticeFeed) { - $salmonAction = 'salmon'; - $id = $feed->getUser()->id; + $salmonAction = 'usersalmon'; + $user = $feed->getUser(); + $id = $user->id; + $profile = $user->getProfile(); + $feed->setActivitySubject($profile->asActivityNoun('subject')); } else if ($feed instanceof AtomGroupNoticeFeed) { - $salmonAction = 'salmongroup'; - $id = $feed->getGroup()->id; + $salmonAction = 'groupsalmon'; + $group = $feed->getGroup(); + $id = $group->id; + $feed->setActivitySubject($group->asActivitySubject()); } else { - return; + return true; } - if (!empty($id)) { + if (!empty($id)) { $hub = common_config('ostatus', 'hub'); if (empty($hub)) { // Updates will be handled through our internal PuSH hub. @@ -123,6 +128,8 @@ class OStatusPlugin extends Plugin $salmon = common_local_url($salmonAction, array('id' => $id)); $feed->addLink($salmon, array('rel' => 'salmon')); } + + return true; } /** From f83941a67baab8acf3bb381ec7fce5480f9c8626 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Sun, 21 Feb 2010 13:31:31 -0500 Subject: [PATCH 58/62] correct name check in ActivityObject::fromProfile() --- plugins/OStatus/lib/activity.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/OStatus/lib/activity.php b/plugins/OStatus/lib/activity.php index ea1303f199..f25a843c36 100644 --- a/plugins/OStatus/lib/activity.php +++ b/plugins/OStatus/lib/activity.php @@ -366,7 +366,7 @@ class ActivityObject $object->type = ActivityObject::PERSON; $object->id = $profile->getUri(); - $object->title = $this->getBestName(); + $object->title = $profile->getBestName(); $object->link = $profile->profileurl; return $object; From 068d8f92200e6fcef31db2fdca956a745422a1ac Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Sun, 21 Feb 2010 13:31:58 -0500 Subject: [PATCH 59/62] correct call to time() in Ostatus_profile::notify() --- plugins/OStatus/classes/Ostatus_profile.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/plugins/OStatus/classes/Ostatus_profile.php b/plugins/OStatus/classes/Ostatus_profile.php index 793d451125..aab316c347 100644 --- a/plugins/OStatus/classes/Ostatus_profile.php +++ b/plugins/OStatus/classes/Ostatus_profile.php @@ -327,7 +327,7 @@ class Ostatus_profile extends Memcached_DataObject $id = TagURI::mint('%s:%s:%s', $verb, $actor->getURI(), - common_date_iso8601(date())); + common_date_iso8601(time())); // @fixme consolidate all these NS settings somewhere $attributes = array('xmlns' => Activity::ATOM, From de093537f6c7f3b83817146247fbd9edbed16935 Mon Sep 17 00:00:00 2001 From: Evan Prodromou Date: Sun, 21 Feb 2010 13:32:24 -0500 Subject: [PATCH 60/62] correct actor for OStatusPlugin::onEndUnsubscribe() --- plugins/OStatus/OStatusPlugin.php | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/plugins/OStatus/OStatusPlugin.php b/plugins/OStatus/OStatusPlugin.php index b5cfb5caec..b966661db6 100644 --- a/plugins/OStatus/OStatusPlugin.php +++ b/plugins/OStatus/OStatusPlugin.php @@ -294,7 +294,7 @@ class OStatusPlugin extends Plugin $profile->getBestName(), $other->getBestName()); - $act->actor = ActivityObject::fromProfile($subscriber); + $act->actor = ActivityObject::fromProfile($profile); $act->object = ActivityObject::fromProfile($other); $oprofile->notifyActivity($act); @@ -447,8 +447,9 @@ class OStatusPlugin extends Plugin /** * Notify remote users when their notices get de-favorited. * - * @param Profile or User $profile of local user doing the de-faving - * @param Notice $notice being favored + * @param Profile $profile Profile person doing the de-faving + * @param Notice $notice Notice being favored + * * @return hook return value */ From 588fe5d603abe40c45a1147eba18c8b5143babc4 Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Sun, 21 Feb 2010 10:48:48 -0800 Subject: [PATCH 61/62] OStatus: debug aid - log the received Salmon post when it can't be parsed properly as an --- plugins/OStatus/lib/salmonaction.php | 1 + 1 file changed, 1 insertion(+) diff --git a/plugins/OStatus/lib/salmonaction.php b/plugins/OStatus/lib/salmonaction.php index abd8d4c833..4e5ed7fe6a 100644 --- a/plugins/OStatus/lib/salmonaction.php +++ b/plugins/OStatus/lib/salmonaction.php @@ -51,6 +51,7 @@ class SalmonAction extends Action if ($dom->documentElement->namespaceURI != Activity::ATOM || $dom->documentElement->localName != 'entry') { + common_log(LOG_DEBUG, "Got invalid Salmon post: $xml"); $this->clientError(_m('Salmon post must be an Atom entry.')); } // XXX: check the signature From 3e7a2a4014dd93637f5a666e238dde13e397523c Mon Sep 17 00:00:00 2001 From: Brion Vibber Date: Sun, 21 Feb 2010 11:11:37 -0800 Subject: [PATCH 62/62] Fix for doc action on systems that return false for globbing in a non-existing dir --- actions/doc.php | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/actions/doc.php b/actions/doc.php index eaf4b7df2d..459f5f0968 100644 --- a/actions/doc.php +++ b/actions/doc.php @@ -173,6 +173,10 @@ class DocAction extends Action } $local = glob(INSTALLDIR.'/local/doc-src/'.$this->title.'.*'); + if ($local === false) { + // Some systems return false, others array(), if dir didn't exist. + $local = array(); + } if (count($local) || isset($localDef)) { return $this->negotiateLanguage($local, $localDef); @@ -183,6 +187,9 @@ class DocAction extends Action } $dist = glob(INSTALLDIR.'/doc-src/'.$this->title.'.*'); + if ($dist === false) { + $dist = array(); + } if (count($dist) || isset($distDef)) { return $this->negotiateLanguage($dist, $distDef);