ez projects / data_import / forum / example source handler / example source handler
You need to be logged in to post messages in the forums. New users may register here.
Member since: Posts: 15 |
Thursday 29 April 2010 6:03:53 am Custom Source Handler to deal with multiple eZ content-objects that have been exported into a single XML source file.
Note 1: This requires a small modification to ImportOperator.php (below) so that you can pass the name of the file to the command line command: #php extension/data_import/scripts/run.php -s <siteaccess> -i ImportOperator -d MyCustomSourceXMLHandler -f <name-of-source-XML-file-without.xml-suffix> Note 2: This also assumes that your source XML is in the same format as those exports found in the data_import 'examples' folder (or has been transformed using XSL into this format from another XML format, say from the ezxmlexport extension) Note 3: This only works for 'folder' and 'article' content-classes and doesn't seem to want to import inline images, but I'm working on it. Note 4: You will of course need to add this class to the ezp_extension.php array in order to eZ to recognise your new class. ImportOperator.php Modification To come... Custom Source Handler <?php /* * @decription: Generic exported eZ content-class import class. Relies on the 'n' export method from ezxmlexport. * Heavily modified version of the example script: XMLFolders.php * @author: R.Michell April 2010 * @package: data_import */ class MyCustomSourceXMLHandler extends XmlHandlerPHP { var $handlerTitle = 'Generic Handler'; // Default handler name. var $logfile = 'data_import.log'; // Log file-name. const REMOTE_IDENTIFIER = 'xmlimport_'; // Default. Is appended-to later.. var $root_node = 'all'; // Source XML root node element. var $xml_source_path = 'extension/data_import/dataSource/exports'; // Path to parent dir od source XML file(s) for import. var $xml_source_file; // ezxmlexport uses an export name for the export's parent dir and XML filename. var $parent_id_fallback = 2; // Fallback to root node ('Main') if a parent_id cannot be found for an imported object /* * Constructor */ public function MyCustomSourceXMLHandler() { $message = 'Starting data_import process'; $logfile = $this->logfile; $this->writeLog($message,$logfile); } /* * @desc Log stuff to ezpublish log using ezcomponents * @return void * @args string $message, string $logfile * @author Russell Michell April 2010 * @ToDo Relies on presence of ezcomponents - run a check for its existence. * Find native eZ function to get ezpublish intall path and pass to $writer var */ private function logger($message,$logfile) { // Get the one and only instance of the ezcLog. $log = ezcLog::getInstance(); // Get an instance to the default log mapper. $mapper = $log->getMapper(); // Create a new Unix file writer, that writes to the file: "default.log". $writer = new ezcLogUnixFileWriter('/var/www/htdocs/ez-GeoNet/var/log/',$logfile); // Create a filter that accepts every message (default behavior). $filter = new ezcLogFilter; // Combine the filter with the writer in a filter rule. $rule = new ezcLogFilterRule( $filter, $writer, true ); // And finally assign the rule to the mapper. $mapper->appendRule( $rule ); // Write a message to the log. $log->log($message,ezcLog::WARNING); } private function writeLog( $message, $newlogfile = '') { if($newlogfile) { $logfile = $newlogfile; } else { $logfile = $this->logfile; } $this->logger($message,$logfile); } // Mapping for source XML field name to an eZ attribute name: function geteZAttributeIdentifierFromField() { $field_name = $this->current_field->getAttribute('name'); if($this->getTargetContentClass() == 'folder') { switch ($field_name) { case 'shortname': return 'short_name'; break; case 'showsubitems': return 'show_children'; break; case 'publishdate': return 'publish_date'; break; case 'shortdescription': return 'short_description'; break; default: return $field_name; break; } } if($this->getTargetContentClass() == 'article') { switch ($field_name) { case 'name': return 'title'; break; case 'shortname': return 'short_title'; break; case 'description': return 'body'; break; case 'publishdate': return 'publish_date'; break; case 'shortdescription': return 'intro'; break; default: return $field_name; break; } } else { switch ($field_name) { case 'shortname': return 'short_name'; break; case 'showsubitems': return 'show_children'; break; case 'publishdate': return 'publish_date'; break; default: return $field_name; break; } } } /* * Handles 'stringy' fields (Text,XML etc) before storing them in ez publish */ function getValueFromField() { switch( $this->current_field->getAttribute('name') ) { case 'publishdate': { $return_unix_ts = time(); $us_formated_date = $this->current_field->nodeValue; $parts = explode('/', $us_formated_date ); if( count( $parts ) == 3) { $return_unix_ts = mktime( 0,0,0, $parts[0], $parts[1] , $parts[2] ); } return $return_unix_ts; } break; case 'shortdescription': case 'description': { $xml_text_parser = new XmlTextParser(); // Having probs with the in-page anchors: <a name="eztoc27594_0_1" id="eztoc27594_0_1"></a> - they don't get imported. Parse them out: if(preg_match("#<a name=\"eztoc.*\" id=\"eztoc.*\"></a>#e",$this->current_field->nodeValue)) { $this->current_field->nodeValue = preg_replace("#<a name=\"eztoc.*\" id=\"eztoc.*\"></a>#",'',$this->current_field->nodeValue); } $xmltext = $xml_text_parser->Html2XmlText( $this->current_field->nodeValue ); echo $xmltext; if($xmltext !== false) { return $xmltext; } else { $message = 'Failed to parse XML for attribute: '.$this->current_field->getAttribute('name'); $this->writeLog(self::REMOTE_IDENTIFIER.$this->current_row->getAttribute('id').': '.$message,$logfile); return false; } } break; default: { return $this->current_field->nodeValue; } break; } } /* * @desc Logic where to place the current content node into the content tree * */ function getParentNodeId() { $parent_id = $this->parent_id_fallback; // Conventional method taken right out of XMLFolders.php example if($this->current_row->getAttribute('parent_id')) { $parent_remote_id = $this->current_row->getAttribute('parent_id'); $eZ_object = eZContentObject::fetchByRemoteID( self::REMOTE_IDENTIFIER.$parent_remote_id ); if($eZ_object) { $parent_id = $eZ_object->attribute('main_node_id'); } else { // If a parent_id can't be found using self::REMOTE_IDENTIFIER.$parent_remote_id as the key, lose the REMOTE_IDENTIFIER and try that with eZContentObject::fetch() $eZ_object = eZContentObject::fetch( $parent_remote_id ); if($eZ_object) { $parent_id = $eZ_object->attribute('main_node_id'); } } } return $parent_id; } function getDataRowId() { return self::REMOTE_IDENTIFIER.$this->current_row->getAttribute('id'); } /* * - Allow the flexibility to extract data from multiple content-classes a single source XML file from ezxmlexport: * - See comments by Joachim Karl at: http://ez.no/developer/contribs/import_export/data_import */ function getTargetContentClass() { if($this->current_row->getAttribute('type')) { return $this->current_row->getAttribute('type'); } else { $message = 'eZ content-class not found. Given class name was: '.$this->current_row->getAttribute('type'); $this->writeLog(self::REMOTE_IDENTIFIER.$this->current_row->getAttribute('id').': '.$message,$logfile); return false; } } function readData() { $filename = $this->xml_source_path.'/'.$this->xml_source_file.'/'.$this->xml_source_file.'.transformed.xml'; if(isset($this->xml_source_path) && isset($this->xml_source_file)) { $filename = $this->xml_source_path.'/'.$this->xml_source_file.'/'.$this->xml_source_file.'.transformed.xml'; if(!is_file($filename)) { $message = 'Cannot open '.$filename.' for reading. Please check files/dirs exist and permissions are set correctly'."\n"; $this->writeLog(self::REMOTE_IDENTIFIER.$this->current_row->getAttribute('id').': '.$message,$logfile); return false; } else { return $this->parse_xml_document($filename,$this->root_node); } } else { $message = 'Source export file cannot be found or is not set. Please check files/dirs exist and permissions are set correctly'."\n"; $this->writeLog(self::REMOTE_IDENTIFIER.$this->current_row->getAttribute('id').': '.$message,$logfile); return false; } } function post_publish_handling( $eZ_object, $force_exit ) { $force_exit = false; return true; } } ?> Russell Michell, Wellington, New Zealand. |
You need to be logged in to post messages in the forums. New users may register here.