*
* USAGE:
* php www/index.php --moduleName=siteexporter --managerName=siteexporter --action=run --url="user/login"
*/
class SiteExporterMgr extends SGL_Manager
{
public function __construct()
{
SGL::logMessage(null, PEAR_LOG_DEBUG);
parent::SGL_Manager();
$this->_aActionsMapping = array(
'list' => array('list', 'cliResult'),
'run' => array('run', 'cliResult'),
'runCollection' => array('runCollection', 'cliResult'),
);
}
public function validate(SGL_Request $req, SGL_Registry $input)
{
SGL::logMessage(null, PEAR_LOG_DEBUG);
$this->validated = true;
$input->tty = "\n";
$input->action = $req->get('action') ? $req->get('action') : 'list';
$input->url = $req->get('url');
$input->baseUrl = $req->get('baseUrl');
$input->ext = $req->get('ext') ? $req->get('ext') : 'html';
$input->dir = $req->get('dir') ? $req->get('dir') : '/';
$input->params = $req->get('params');
}
public function _cmd_list(SGL_Reqistry $input, SGL_Output $output)
{
SGL::logMessage(null, PEAR_LOG_DEBUG);
$input->tty = <<< HELP
Available actions:
1. run export single page to file
--url url to export
--baseUrl replace links' base URL with specified value
--ext append file extension to file
(default: html)
--dir links replacement will be limited to specified
subset of URLs (example: /user/)
2. runCollection export collection of urls to files
--baseUrl replace links' base URL with specified value
--ext append file extension to file
(default: html)
--dir links replacement will be limited to specified
subset of URLs (example: /user/)
--params pass parameters to URL collectors
(format: k1:v1::k2::v2)
HELP;
}
public function _cmd_run(SGL_Reqistry $input, SGL_Output $output)
{
SGL::logMessage(null, PEAR_LOG_DEBUG);
$baseUrl = SGL_Config::get('site.baseUrl');
$fc = SGL_Config::get('site.frontScriptName');
// to map file correctly on file system
if (strpos($input->url, '%') !== false) {
$input->url = urldecode($input->url);
}
// request url
$input->url = trim($input->url, '/');
$url = $baseUrl . '/' . ($fc ? $fc . '/' : '') . $input->url;
// prepare save location
$saveFile = SGL_WEB_ROOT . '/' . $input->url . '.' . $input->ext;
$this->_ensureDirIsWriteable(dirname($saveFile));
// do the job
$cmd = "wget -q -O $saveFile $url";
$ok = `$cmd`;
$html = file_get_contents($saveFile);
// remove front controller from links
if ($fc) {
$regex = "@(dir})(.*?)\"@";
$html = preg_replace($regex, "\\1\\2\\3\\4\"", $html);
$html = str_replace("\"$baseUrl/$fc/\"", "\"$baseUrl\"", $html);
$html = str_replace("\"$baseUrl/$fc\"", "\"$baseUrl\"", $html);
}
// replace base URL
$html = str_replace($baseUrl, $input->baseUrl, $html);
// add extension to all links under certain dir
$regex = "@(baseUrl}{$input->dir}.*?)/?\"@";
$html = preg_replace($regex, "\\1\\2.{$input->ext}\"", $html);
file_put_contents($saveFile, $html);
// output
$input->tty .= "Exported to $saveFile\n";
$this->_flush($input->tty);
}
public function _cmd_runCollection(SGL_Reqistry $input, SGL_Output $output)
{
$aUrls = array();
$aParams = $this->_parseParams($input->params);
if (SGL_Config::get('SiteExporterMgr.strategies')) {
// collect urls
$aCollectors = explode(',', SGL_Config::get('SiteExporterMgr.strategies'));
$oCollection = new SGL_UrlCollection();
foreach ($aCollectors as $collectorName) {
$collectorName = trim($collectorName);
// get path to UrlCollector class
$aPath = explode('_', $collectorName);
if ($aPath[0] == 'SGL') {
$strategyFile = SGL_MOD_DIR
. '/siteexporter/lib/UrlCollector/'
. array_pop($aPath) . '.php';
} else {
$strategyFile = implode(DIRECTORY_SEPARATOR, $aPath) . '.php';
}
require_once $strategyFile;
$oCollection->add(new $collectorName($aParams));
}
$aUrls = $oCollection->retrieve();
}
// export
foreach ($aUrls as $url) {
$input->url = $url;
$this->_cmd_run($input, $output);
}
}
/**
* Action, which outputs CLI result.
*
* @param SGL_Registry $input
* @param SGL_Output $output
*/
public function _cmd_cliResult(SGL_Registry $input, SGL_Output $output)
{
SGL::logMessage(null, PEAR_LOG_DEBUG);
$input->tty .= "\n";
$this->_flush($input->tty, $stopScript = true);
}
/**
* Send data to terminal.
*
* @param string $string
* @param boolean $stopScript
*/
private function _flush(&$string, $stopScript = false)
{
echo $string;
flush();
$string = '';
if ($stopScript) {
exit;
}
}
private function _ensureDirIsWriteable($dir)
{
if (!is_writeable($dir)) {
require_once 'System.php';
$ok = System::mkDir(array('-p', $dir));
$mask = umask(0);
chmod($dir, 0777);
umask($mask);
}
}
private function _parseParams($paramString)
{
$aRet = array();
if (!empty($paramString)) {
$aParams = explode('::', $paramString);
foreach ($aParams as $paramKeyValue) {
$aVar = explode(':', $paramKeyValue);
if (isset($aVar[1])) {
$aRet[$aVar[0]] = $aVar[1];
}
}
}
return $aRet;
}
}
/**
* @package SGL
* @author Dmitri Lakachauskis
*/
class SGL_UrlCollection
{
private $_aCollectors = array();
public function add(SGL_UrlCollector $oCollector)
{
$this->_aCollectors[] = $oCollector;
}
public function retrieve()
{
$aRet = array();
foreach ($this->_aCollectors as $oCollector) {
$aUrl = $oCollector->generate();
$aRet = array_merge($aRet, $aUrl);
}
return $aRet;
}
}
?>