# Make a structure exporter that works with plone 2.0
from csv import writer, QUOTE_MINIMAL
from xml.dom.minidom import getDOMImplementation
from ConfigParser import ConfigParser
from StringIO import StringIO
import os, shutil
from types import FileType, ListType, TupleType, StringTypes
from Globals import InitializeClass
 
from AccessControl import ClassSecurityInfo, AuthEncoding
from AccessControl.Permissions import use_mailhost_services
from Acquisition import aq_inner
from OFS.SimpleItem import SimpleItem
from OFS.Image import File
from DateTime import DateTime
import time
 
try:
    from Products.CMFCore.permissions import ManagePortal
except ImportError:
    from Products.CMFCore.CMFCorePermissions import ManagePortal
 
try:
    set()
except NameError:
    from sets import Set as set
 
from Products.PageTemplates.PageTemplateFile import PageTemplateFile
from Products.CMFCore.utils import UniqueObject
from Products.CMFCore.utils import getToolByName
 
import zLOG
from config import SEPARATOR, PROPS, ALLPROPS, NONATPROPS, TYPEMAP, DEFAULT_ENCODING
from config import BRAIN_METHODS
try:
    from App.special_dtml import DTMLFile
except:
    from Globals import DTMLFile
 
class ContentExporter(UniqueObject, SimpleItem):
    """An old school Product for plone 2 that sucks out content into plone 3
       generic setup style structure. From Andreas Jung's idea
       http://www.zopyx.com/blog/when-the-plone-migration-fails-doing-content-migration-only
       Exports content and folder structure
       Content gets default dublin core and workflow metadata in properties files
       If archetypes are found their schema data is added to the properties.
    """  
 
    try:
        __implements__ = (SimpleItem.__implements__,)
    except:
        pass
 
    meta_type = 'Content Exporter Tool'
    plone_tool = 1
    id = 'portal_exportcontent'
    title = 'Exports content from pre-plone 3 site to generic setup structure folder'
    security = ClassSecurityInfo()
 
    manage_options = ( ({ 'label' : 'Overview', 'action' : 'manage_overview' }
                     ,  { 'label' : 'Export content', 'action' : 'manage_export' }
                     ,
                     )
                     )
 
    #
    #   ZMI methods
    #
    security.declareProtected( ManagePortal, 'manage_overview' )
    manage_overview = PageTemplateFile(os.path.join('www','explainContentMigrator.pt'), globals(),
                                   __name__='manage_overview')
 
    security.declareProtected( ManagePortal, 'manage_export' )
    manage_export = PageTemplateFile(os.path.join('www','exportContentMigrator.pt'), globals(),
                                   __name__='manage_export')
 
    def __init__(self):
        """ Set savepath and log """
        self.savepath = self.get_var_path()
        self.out = []
        self.format = 'CSV'
        self.dom = None
 
    def write_folder_element(self, id, objtype, doc):
        """ XML folders best gathered by type and listed """
        if self.format == 'XML':
            text = doc[0].createTextNode(id)
            name = objtype.lower()
            name = name.replace(' ','')
            child = doc[0].createElement(name)
            child.appendChild(text)
            plural = '%ss' % name
            nodes = doc[0].getElementsByTagName(plural)
            if nodes:
                element = nodes[0]
            else:
                element = doc[0].createElement(plural)
            element.appendChild(child)
            doc[1].appendChild(element)            
        else:
            self.write_element(name=id, data=objtype, doc=doc, space=False)
 
    def write_element(self, name, data, listed=False, doc=None, space=True):
        """ abstract output writing to allow different formats """
        if self.format == 'XML':
            node = None
            if listed:
                if name.endswith('s'):
                    item_name = name[:-1]
                else:
                    item_name = 'item'
                node = doc[0].createElement(item_name)
                for item in data:
                    text = doc[0].createTextNode(item)
                    node.appendChild(text)
            else:
                try:
                    node = doc[0].createTextNode(data)
                except:
                    pass
            if node:
                element = doc[0].createElement(name)
                element.appendChild(node)
                doc[1].appendChild(element)
        else:
            if listed:
                if data:
                    data = SEPARATOR.join(data)
                else:
                    data = ''
            if space:
                doc[1].writerow((name, ' %s' % data))
            else:
                doc[1].writerow((name, data))
            return 
 
    def get_doc(self, delimiter=':'):
        """ Get doc as xml minidom or csv string stream
            and its writer as a tuple  
        """
        if self.format == 'XML':
            if delimiter == ',':
                tag = 'items'
            else:
                tag = 'properties'
            doc = self.dom.createDocument(None, tag, None)
            node = doc.documentElement
            return (doc, node)
        else:
            ostream = StringIO()
            return (ostream, writer(ostream,
                                    delimiter=delimiter,
                                    quoting=QUOTE_MINIMAL))
 
    def get_output(self, doc, headfoot=None):
        """ get output from doc for writing to file
            Unfortunately csv or xml writers both 
            require some minor format and header of footer tweaks
        """
        if self.format == 'XML':
            output = doc[0].toprettyxml(indent="  ")
            if headfoot and headfoot != 'DEFAULT':
                # Add html at end of xml
                # Close and reopen any existing CDATA
                headfoot = headfoot.replace(']]>',']]]]><![CDATA[>')
                # Wrap in CDATA to prevent tags breaking xml validation
                return output.replace('</properties>',
                                      '''<html><![CDATA[\n%s]]></html>
                                         \n</properties>''' % headfoot)
            else:
                return output
        else:
            if headfoot == 'DEFAULT':
                output = "[%s]\n%s" % (headfoot, doc[0].getvalue())
            elif headfoot:
                output = "%s\n\n%s" % (doc[0].getvalue(), headfoot)                
            else:
                output = doc[0].getvalue()
            #if hasattr(os, 'O_BINARY'):
            #    output = output.replace('"\r','')
            #    output = output.replace("\r", '')
            # kill minimal quotes
            output = output.replace('\r', '')
            output = output.replace('"\n', '\n')
            output = output.replace(':" ', ': ')
            return output
        return 'NO FORMAT SUPPLIED'
 
    def get_var_path(self):
        """ Find var if buildout or old style zope layout or
            test runner where instance home is the buildout-cache!
        """
        var = os.path.join(INSTANCE_HOME, 'var')
        if os.path.exists(var):
            return os.path.join(var, 'structure')
        for folder in ['parts', 'buildout-cache']:
            parts = INSTANCE_HOME.find(folder)
            if parts>-1:
                if os.path.exists(os.path.join(INSTANCE_HOME[0:parts], 'var')):
                    return os.path.join(var,'structure')
                else:
                    return os.path.join(INSTANCE_HOME[0:parts], 'structure')
        for var in ['/tmp', '\\temp']:
            if os.path.exists(var):
                return os.path.join(var, 'structure')
 
 
    security.declareProtected( ManagePortal, 'manage_runExport' )
    def manage_runExport(self, portal=None, root=''):
        """ run the export if root submitted -
            option to pass in the portal object so this can be run
            more easily by external scripts
        """
        request = getattr(self, 'REQUEST', {})
        if not root:
            root = request.get('root', '')
        format = request.get('format', 'CSV')
        if root:
            if not portal:
                portal = getToolByName(self, 'portal_url').getPortalObject()
            self.export(root=root,
                        portal=portal,
                        users=request.get('exportusers', None),
                        format = format
                        )
        #if self.dom:
        #    self.dom.unlink()
        if hasattr(request, 'RESPONSE'):
            request.RESPONSE.redirect('manage_export')
 
    security.declareProtected( ManagePortal, 'getLog' )
    def getLog(self):
        """ return the out log file of export actions """
        return self.out 
 
    def write_file(self, path, filename='', data='', modified=None):
        """ Save the file directly to the file system var folder
            If no file or data is supplied this just creates a folder
        """
        currentpath = self.savepath
        for folder in path.split('/'):
            if folder:
                folderpath = os.path.join(currentpath, folder)
            else:
                folderpath = self.savepath
            if folderpath:
                if not os.path.exists(folderpath):        
                    try:
                        os.mkdir(folderpath)
                        self.out.append('Created %s' % folderpath)
                    except:
                        self.out.append('Failed to create or replace ' + folderpath)
                        return
                currentpath = folderpath
 
        if filename:
            if self.format == 'XML':
                if filename.startswith('.'):
                    filename = '%s.xml' % filename
                elif filename.endswith('.html'):
                    filename = filename.replace('.html', '.xml')
            filepath=os.path.join(folderpath, filename)
            if os.path.exists(filepath) and os.path.isfile(filepath):
                os.remove(filepath)
            ofd = None
            try:
                # Treat everything as binary if Windows so line endings dont get tampered with
                if hasattr(os, 'O_BINARY'):
                    ofd = os.open(filepath,os.O_CREAT | os.O_WRONLY | os.O_APPEND | os.O_BINARY)
                else:
                    ofd = os.open(filepath,os.O_CREAT | os.O_WRONLY | os.O_APPEND)
            except:
                self.out.append("Failed to open %s for writing" % filepath)
            if ofd:
                binary = 0
                if type(data) in StringTypes:
                    try:
                        os.write(ofd,data)
                    except UnicodeEncodeError:
                        os.write(ofd, data.encode(DEFAULT_ENCODING))
                    except:
                        binary = 1
                else:
                    binary = 1
                if binary:
                    # Try to cope with string buffers or strings or None
                    try:
                        while data is not None:
                            os.write(ofd, data.data)
                            data = data.next
                    except:
                        try:
                            os.write(ofd, data.data)
                        except:
                            try:
                                data = str(data.data)
                                if data:
                                    os.write(ofd, data)
                            except Exception, error:
                                self.out.append("Sorry failed to write to %s due to %s" % (filepath,error))
                self.set_times(ofd, modified)
                os.close(ofd)
        #debug self.out.append('Wrote %s' % filepath)
        return
 
    def set_times(self, ofd, modified):
        """ Set the last access and modified time for the file system """
        if modified:
            try:
                times = (time.time(), modified.timeTime())
                os.utime(ofd, times)
            except:
                pass
        return
 
    def export_users(self, portal):
        """ Write out users and roles in generic setup XML format
            Dump memberdata contents in RFC822 csv format
        """
        folderdoc = self.get_doc(delimiter=',')        
        membership = getToolByName(portal, 'portal_membership')
        memberdata = getToolByName(portal, 'portal_memberdata')
        self.write_file('/acl_users','','')
        path = '/acl_users/portal_memberdata'
        user_info = []
        for user_id in memberdata._members.keys():
            u = membership.getMemberById(user_id)
            if u is not None:
                self.write_element(name=user_id, data='Memberdata',
                                   doc=folderdoc, space=False)                
                doc = self.get_doc()
                try:
                    password = portal.acl_users._user_passwords[user_id]
                except:
                    password = u.getPassword()
                    if password and not AuthEncoding.is_encrypted(password):
                        password = AuthEncoding.pw_encrypt(password)
                if not password:
                    password = 'this user is going to need a password reset'
 
                info = {'user_id': user_id,
                        'login_name': u.getProperty('login_name',user_id),
                        'password_hash': password,
                       }
                user_info.append(info)
                for prop in memberdata.propertyIds():
                    data = u.getProperty(prop)
                    if data:
                        self.write_element(name=prop, data=data, doc=doc)
                user_output = self.get_output(doc)
                if self.format == 'XML':
                    self.write_file(path, '%s.xml' % user_id, user_output)
                else:
                    self.write_file(path, user_id, "[DEFAULT]\n%s" % user_output)
                #TODO: add portait export
                #obj = memberdata._getPortrait(user_id)
                #if obj:
                #    self.write_file(path,'%s.jpg' % user_id,obj.data)
 
        info = {}
        info['title'] = 'source_users'
        info['users'] = user_info
        template = DTMLFile(os.path.join('xml', 'zodbusers.xml'),
                            globals()).__of__(portal) 
        self.write_file('/acl_users', 'source_users.xml', template(options=info))
        template = DTMLFile(os.path.join('xml', 'zodbroles.xml'),
                            globals()).__of__(portal) 
        info = self._getRoleInfo(portal)
        self.write_file('/acl_users', 'portal_role_manager.xml',
                                            template(options=info))
        template = DTMLFile(os.path.join('xml', 'zodbgroups.xml'),
                            globals()).__of__(portal) 
        info = self._getGroupInfo(portal)
        self.write_file('/acl_users', 'source_groups.xml',
                                     template(options=info))
        output = self.get_output(folderdoc)
        # strip Windows line endings
        if hasattr(os, 'O_BINARY'):
            output = output.replace('\r', '')
        self.write_file(path, '.objects', output)
        self.out.append('Exported member data')
 
    def _getRoleInfo(self, portal):
        """ Does the same as method in PluggableAuthService exportimport
            but doesnt require PAS
        """
        role_info = []
        try:
            allroles = portal.acl_users.listRoleInfo()
        except:
            try:
                allroles = portal.portal_membership.getCandidateLocalRoles(portal)
            except:
                allroles = portal.__ac_roles__
        try:
            userroles = portal.acl_users._principal_roles.items()
        except:
            userroles = portal.acl_users.getLocalRolesForDisplay(portal)            
        for role_id in allroles:
            info = {'role_id': role_id,
                    'title': role_id,
                    'description': '',
                   }
            info['principals'] = self._listRolePrincipals(userroles,
                                                          role_id) 
            role_info.append(info)
 
        return {'title': 'portal_role_manager',
                'roles': role_info,
               }
 
    def _listRolePrincipals(self, userroles, role_id):
        """ Does the same as method in PluggableAuthService exportimport
            but doesnt require PAS
        """
        result = []
        for userrole in userroles:
            if role_id in userrole[1]:
                result.append(userrole[0])
        return tuple(result)
 
 
    def _getGroupInfo(self, portal):
        """ Does the same as method in PluggableAuthService exportimport
            but doesnt require PAS
        """
        group_info = []
        try:
            allgroups = portal.acl_users.listGroupInfo()
        except:
            allgroups = portal.portal_groups.listGroupIds()
        try:
            usergroups = portal.acl_users._principal_groups.items()
        except:
            usergroups = None
        for group_id in allgroups:
            info = {'group_id': group_id,
                    'title': group_id,
                    'description': '',
                   }
            if usergroups:
                info['principals'] = self._listRolePrincipals(usergroups,
                                                              group_id) 
            else:
                group = portal.portal_groups.getGroupById(group_id)
                try:
                    info['principals'] = group.getGroupMembers(group_id)
                except:
                    info['principals'] = group.getGroupMembers()
 
            group_info.append(info)
 
        return {'title': 'local_roles',
                'groups': group_info,
               }
 
    def write_folder(self, folder, path, extras={}):
        """ Write the contents of folder out 
            meta is for extra metadata for the contained objects
            e.g. {obj_id:{meta:'foobar'}, }
        """
        folderdoc = self.get_doc(delimiter=',')
        path = path[self.lenportal:]
 
        for id in folder.objectIds():
            if not id.startswith('.'):
                obj = getattr(folder, id, None)
                # getTypeInfo can return Folder for python scripts so check
                if obj and not str(obj) == '<PythonScript at %s>' % id:
                    try:
                        objtype = obj.getTypeInfo().getId()
                        objtype = TYPEMAP.get(objtype, objtype)
                    except:
                        objtype = None
                    if objtype:
                        doc = self.get_doc()
                        self.export_object(id, obj, objtype, path, doc, 
                                           extras.get(id, {}))
                        self.write_folder_element(id=id, objtype=objtype,
                                                  doc=folderdoc)
        output = self.get_output(folderdoc)
        # Strip Windows line endings 
        if hasattr(os, 'O_BINARY'):
            output = output.replace('\r', '')
        self.write_file(path, '.objects', output)
 
    def export(self, portal, root='/', users='yes', format='CSV', request={}):
        """ Based on generic setup folder export to structre -
            See Products.GenericSetup.interfaces.IFilesystemExporter
        """
        self.format = format
        if format == 'XML':
            self.dom = getDOMImplementation()
        self.workflow_tool = getToolByName(portal, 'portal_workflow')
        self.portalname = portal.getId()
        self.lenportal = len(self.portalname) + 1
        self.out = ['Log started at %s' % DateTime()]
        # Give this a request attribute since some methods expect it
        # and it is not available when used via current plone 
        if not hasattr(self, 'REQUEST'):
            self.REQUEST = request
 
        try:
            if os.path.exists(self.savepath):
                shutil.rmtree(self.savepath)
            os.mkdir(self.savepath)
        except:
            self.out.append('Failed to create structure folder in %s' % self.savepath)
            return self.out
 
        if users:
            try:
                self.export_users(portal)
            except:
                self.out.append('Sorry not all users could be exported.')
 
        if not root.startswith('/'):
            root = '/%s' % root
        if len(root) > 1 and not root.startswith('/%s/' % self.portalname):            
            root = '/%s%s' % (self.portalname, root)            
        catalog = getToolByName(portal, 'portal_catalog')
        # Just return everything then filter for folderish later
        results = catalog(path={'query': root})
        # exportable = portal.contentItems()
        self.out.append('Exporting %s content items to zope/var/structure' % len(results))
 
        if root == '/':    
            self.write_folder(portal, '/')
        elif results:
            rootpath = root.split('/')
            if len(rootpath)>1:
                rootobj = getattr(portal, rootpath[2], None)
                if rootobj:
                    id = rootobj.getId()
                    objtype = rootobj.getTypeInfo().getId()
                    folderdoc = self.get_doc(delimiter=',')
                    self.write_folder_element(id=id, objtype=objtype,
                                              doc=folderdoc)
                    self.write_file('/',
                                    '.objects',
                                    self.get_output(folderdoc)
                                    )
                    doc = self.get_doc()                    
                    self.export_object(id, rootobj, objtype, '/', doc)
        for brain in results:
            try:
                obj = brain.getObject(self.REQUEST)
            except:
                obj = None
                self.out.append('Object %s at %s was not traversable' % (brain.getId,
                                                                         brain.getPath()))
            if obj and obj.isPrincipiaFolderish:
                path = brain.getPath()
                self.write_folder(obj, path)
 
        return self.out
 
 
    def export_object(self, id, obj, objtype, path, doc, extra={}):
        """ export file content to filesystem 
            extra is for extra metadata for the contained objects
            e.g. {meta:'foobar', humbug:'mint'}
        """
        modified = self.properties_marshall(obj, doc)
        # do required fields
        for prop, data in PROPS['fixed'].items():
            propname = prop
            prop = PROPS['boolean'].get(prop, prop)
            if hasattr(obj, prop):
                if getattr(obj, prop, None):
                    data = True
                else:
                    data = False
            self.write_element(name=propname,
                               data=data,
                               doc=doc)
        self.workflow(obj, doc)
        # sort out metadata
        self.at_marshall(obj, path, doc)
        # Do any extra metadata
        for key, value in extra.items():
            self.write_element(name=key.lower(),
                               data=value,
                               doc=doc)
 
        if hasattr(obj,'data'):
            treatas = 'binary'
        elif obj.isPrincipiaFolderish:
            treatas = 'folder'
        else:
            treatas = 'text'
 
        if treatas == 'text':
            try:
                value = obj['text']
            except:
                value = getattr(obj, 'text', '')
            if value:
                if type(value) not in StringTypes:
                    value = str(value)
                output = self.get_output(doc, value.replace('\r', ""))
                self.write_file(path, id, output, modified)
            # Handle webpage types which are not isPrincipiaFolderish
            # but are really folders that contain hidden extra items
            if hasattr(obj, 'objectIds'):
                try:
                    num_objs = len(obj.objectIds())
                except:
                    num_objs = 0
                if num_objs:
                    if BRAIN_METHODS:
                        # Add extra metadata from contained brains
                        meta = {}
                        for obj_id in obj.objectIds():
                            try:
                                subobj = obj[obj_id]
                            except:
                                subobj = None
                        if subobj:
                            for bmethod in BRAIN_METHODS.keys():
                                battribs = BRAIN_METHODS[bmethod]
                                try:
                                    brains = getattr(obj, bmethod)()
                                except:
                                    brains = []
                                for brain in brains:
                                    extras = {}
                                    for battr in battribs:
                                        try:
                                            exvalue = getattr(brain, battr)()
                                            if value:
                                                extras[battr] = exvalue
                                        except:
                                            pass
                                    if extras:
                                        obj_id = brain.getObject().getId()
                                        if meta.has_key(obj_id):
                                            meta[obj_id].update(extras)
                                        else:
                                            meta[obj_id] = extras
                    self.out.append('Writing files: %s has %s hidden objects' % (id, num_objs))
                    filepath = '/%s/%s/%s.content' % (self.portalname, path, id)
                    self.write_folder(obj, filepath, meta)
        else:
            if treatas == 'folder':
                self.write_file(os.path.join(path, id), '.properties',
                                self.get_output(doc, 'DEFAULT'))
            else:
                self.write_file(path, id + '.ini', self.get_output(doc, 'DEFAULT'))
                self.write_file(path, id, obj.data, modified)
        return
 
    def workflow(self, obj, doc):
        """ Get list of workflows and matching states as lines fields """
        chain = self.workflow_tool.getChainFor(obj)
        if chain:
            self.write_element(name='workflows', data=chain,
                               listed=True, doc=doc)
            states = []
            for wf_id in chain: 
                states.append(self.workflow_tool.getInfoFor(obj, 'review_state', wf_id))
            self.write_element(name='states', data=states,
                               listed=True, doc=doc)            
        return 
 
    def stringify(self, value):
        """ Ensure properties or fields that are files or other types are
            converted to indented strings and clean up line returns in them """
        if isinstance(value, File):
            value = getattr(value, 'data', value)
        if not type(value) in StringTypes:
            value = str(value)
        value = value.replace("\r", "")
        if value.endswith("\n"):
            value = value[:-1]
        return value.replace("\n",SEPARATOR)
 
    def at_marshall(self, obj, path, doc):
        """ Check whether object is an archetype and if so
            marshall the fields to properties text and save file field objects"""
        if not hasattr(obj,'Schema'):
            return ''
        p = obj.getPrimaryField()
        pname = p and p.getName() or None
        fields = obj.Schema().fields()
        #[f for f in obj.Schema().fields()
        #          if f.getName() not in ALLPROPS]
        if pname in fields:
            fields.remove(pname)
        for f in fields:
            name = f.getName()
            try:
                value = obj[name]
            except:
                value = None
            #FIXME: check to see if this file is the data file rather than only
            # doing extra files for objects with no data attribute.
            if f.type in ('file', 'image'):
                if not hasattr(obj, 'data'):
                    filename = '%s.%s' % (obj.getId(), name)
                    self.write_file(path, filename, value)                
                    value = 'EXTRAFILE'
                else:
                    value = 'DATAFILE'
            #TODO: Add GSXML style ATReference handling all archetypes in plone 2.1 or later        
            if value != None:
                listed = False
                if type(value) not in StringTypes and type(value) in [ListType, TupleType]:
                    if value:
                        data = [self.stringify(v) for v in value]
                        listed = True
                else:
                    data = self.stringify(value)
                self.write_element(name=name, data=data,
                                   listed=listed, doc=doc)
        return 
 
    def properties_marshall(self, obj, doc):
        """ Pull out dublin core, workflow state and
            other basic plone properties 
        """
        modified = None
        for prop, method in PROPS['string'].items():
            if hasattr(obj,method):
                data = self.stringify(getattr(obj, method)())
            else:
                data = ''
            self.write_element(name=prop, data=data, doc=doc)
        for prop, method in PROPS['date'].items():
            if hasattr(obj, method):
                data = str(getattr(obj, method)())
            if not data:
                data = 'None'
            else:
                if prop == 'modification_date':
                    modified = data
            self.write_element(name=prop, data=data, doc=doc)        
        for prop, method in PROPS['list'].items():
            me = getattr(obj, method, None)
            if me:
                data = [self.stringify(v) for v in me()]
                listed = True 
            else:
                data = ''
                listed = False
            self.write_element(name=prop, data=data,
                                          listed=listed, doc=doc)
        creators = []
        for prop,method in PROPS['user'].items():
            if hasattr(obj,method):
                data = str(getattr(obj,method)())
                creators.append(data)
            else:
                data = ''
            self.write_element(name=prop, data=data, doc=doc)
        if creators:
            creators = set(creators)
            self.write_element(name='creators', data=creators,
                               listed=True, doc=doc)
        objtype = obj.getTypeInfo().getId()
        if objtype in TYPEMAP.keys():
            propmap = NONATPROPS[objtype]
            for prop in propmap.keys():
                if hasattr(obj,prop):
                    data = self.stringify(getattr(obj, prop, ''))
                    self.write_element(name=propmap[prop],
                                       data=data, doc=doc)
        return modified
 
InitializeClass(ContentExporter)