#pylint: disable-msg=C0103
The Publisherworker does the following:
    a. looks to UserFileCacheEndpoint to get the workflow details
    b. looks to local database to get transferred files
    c. publish trnasferred files
There should be one worker per user transfer.
import time
import logging
import os
import datetime
import traceback
import tarfile
import urllib
import json
import types
import re
import uuid
import pprint
from DBSAPI.dbsApi import DbsApi
from WMCore.Database.CMSCouch import CouchServer
from WMCore.Credential.Proxy import Proxy
from AsyncStageOut import getHashLfn
from WMCore.Services.PhEDEx.PhEDEx import PhEDEx
from RestClient.ErrorHandling.RestClientExceptions import HTTPError
import dbs.apis.dbsClient as dbsClient
import pycurl
import cStringIO
from AsyncStageOut import getDNFromUserName
def getProxy(userdn, group, role, defaultDelegation, logger):
    logger.debug("Retrieving proxy for %s" % userdn)
    config = defaultDelegation
    config['userDN'] = userdn
    config['group'] = group
    config['role'] = role
    proxy = Proxy(defaultDelegation)
    proxyPath = proxy.getProxyFilename( True )
    timeleft = proxy.getTimeLeft( proxyPath )
    if timeleft is not None and timeleft > 3600:
        return (True, proxyPath)
    proxyPath = proxy.logonRenewMyProxy()
    timeleft = proxy.getTimeLeft( proxyPath )
    if timeleft is not None and timeleft > 0:
        return (True, proxyPath)
    return (False, None)
class PublisherWorker:
    def __init__(self, user, config):
        store the user and tfc the worker
        self.user = user[0]
        self.group = user[1]
        self.role = user[2]
        self.config = config
        self.logger = logging.getLogger('DBSPublisher-Worker-%s' % self.user)
        self.pfn_to_lfn_mapping = {}
        self.max_retry = config.publication_max_retry
        self.uiSetupScript = getattr(self.config, 'UISetupScript', None)
        self.proxyDir = config.credentialDir
        self.myproxyServer = 'myproxy.cern.ch'
        self.init = True
        self.userDN = ''
        query = {'group': True,
                 'startkey':[self.user], 'endkey':[self.user, {}, {}]}
        self.logger.debug("Trying to get DN")
            self.userDN = getDNFromUserName(self.user, self.logger)
        except Exception, ex:
            msg =  "Error retrieving the user DN"
            msg += str(ex)
            msg += str(traceback.format_exc())
            self.init = False
        defaultDelegation = {
                                  'logger': self.logger,
                                  'credServerPath' : \
                                  # It will be moved to be getfrom couchDB
                                  'myProxySvr': 'myproxy.cern.ch',
                                  'min_time_left' : getattr(self.config, 'minTimeLeft', 36000),
                                  'serverDN' : self.config.serverDN,
                                  'uisource' : self.uiSetupScript
        if hasattr(self.config, "cache_area"):
                defaultDelegation['myproxyAccount'] = re.compile('https?://([^/]*)/.*').findall(self.config.cache_area)[0]
            except IndexError:
                self.logger.error('MyproxyAccount parameter cannot be retrieved from %s' % self.config.cache_area)
        if getattr(self.config, 'serviceCert', None):
            defaultDelegation['server_cert'] = self.config.serviceCert
        if getattr(self.config, 'serviceKey', None):
            defaultDelegation['server_key'] = self.config.serviceKey
        valid = False
            if not os.getenv("TEST_ASO"):
                valid, proxy = getProxy(self.userDN, self.group, self.role, defaultDelegation, self.logger)
        except Exception, ex:
            msg =  "Error getting the user proxy"
            msg += str(ex)
            msg += str(traceback.format_exc())
        if valid:
            self.userProxy = proxy
            # Use the operator's proxy when the user proxy in invalid.
            # This will be moved soon
            self.logger.error('Did not get valid proxy. Setting proxy to ops proxy')
            info = {'server_key': self.config.opsProxy, 'server_cert': self.config.opsProxy, 'logger': self.logger}
            self.logger.info("Ops proxy info: %s" % str(info))
            opsProxy = Proxy({'server_key': self.config.opsProxy, 'server_cert': self.config.opsProxy, 'logger': self.logger})
            self.userDN = opsProxy.getSubject()
            self.userProxy = self.config.opsProxy
        self.cache_area = self.config.cache_area
        os.environ['X509_USER_PROXY'] = self.userProxy
        # If we're just testing publication, we skip the DB connection.
        if os.getenv("TEST_ASO"):
            self.db = None
            server = CouchServer(dburl=self.config.couch_instance, ckey=self.config.opsProxy, cert=self.config.opsProxy)
            self.db = server.connectDatabase(self.config.files_database)
        self.phedexApi = PhEDEx(responseType='json')
        self.max_files_per_block = self.config.max_files_per_block
        self.block_publication_timeout = self.config.block_closure_timeout
    def __call__(self):
        1- check the nubmer of files in wf to publish if it is < max_files_per_block
	2- check in wf if now - last_finished_job > max_publish_time
        3- then call publish, mark_good, mark_failed for each wf
        active_user_workflows = []
        self.lfn_map = {}
        query = {'group':True, 'startkey':[self.user, self.group, self.role], 'endkey':[self.user, self.group, self.role, {}]}
            active_user_workflows = self.db.loadView('DBSPublisher', 'publish', query)['rows']
        except Exception, e:
            self.logger.error('A problem occured when contacting couchDB to get the list of active WFs: %s' %e)
            self.logger.info('Publications for %s will be retried next time' % self.user)
        self.logger.debug('active user wfs %s' % active_user_workflows)
        self.logger.info('active user wfs %s' %len(active_user_workflows))
        now = time.time()
        last_publication_time = 0
        for user_wf in active_user_workflows:
            self.forceFailure = False
            self.forcePublication = False
            lfn_ready = []
            active_files = []
            wf_jobs_endtime = []
            workToDo = False
            query = {'reduce':False, 'key': user_wf['key'], 'stale': 'ok'}
                active_files = self.db.loadView('DBSPublisher', 'publish', query)['rows']
            except Exception, e:
                self.logger.error('A problem occured to retrieve the list of active files for %s: %s' %(self.user, e))
                self.logger.info('Publications of %s will be retried next time' % user_wf['key'])
            self.logger.info('active files of %s: %s' %(user_wf['key'], len(active_files)))
            for file in active_files:
                if file['value'][5]:
                    # Get the list of jobs end_time for each WF.
                                           str(file['value'][5]), '%Y-%m-%d %H:%M:%S'))) \
                                           - time.timezone)
                # To move once the remote stageout from WN is fixed.
                if "/temp/temp" in file['value'][1]:
                    lfn_hash = file['value'][1].replace('store/temp/temp', 'store', 1)
                    lfn_hash = file['value'][1].replace('store/temp', 'store', 1)
                lfn_orig = lfn_hash.replace('.' + file['value'][1].split('.', 1)[1].split('/', 1)[0], '', 1)
                if "/temp/temp" in file['value'][1]:
                    self.lfn_map[lfn_orig] = file['value'][1].replace('temp/temp', 'temp', 1)
                    self.lfn_map[lfn_orig] = file['value'][1]
            self.logger.info('LFNs %s ready in %s' %(len(lfn_ready), user_wf['value']))
            # Check if the workflow is still valid
            if wf_jobs_endtime:
                self.logger.debug('jobs_endtime of %s: %s' % (user_wf, wf_jobs_endtime))
                workflow_duration = (now - wf_jobs_endtime[0]) / 86400
                if workflow_duration > self.config.workflow_expiration_time:
                    self.logger.info('Workflow %s expired since %s days! Force the publication failure.' % (\
                                      user_wf['key'], (workflow_duration - self.config.workflow_expiration_time)))
                    self.forceFailure = True
            # If the number of files < max_files_per_block then check the last publication time
            if user_wf['value'] <= self.max_files_per_block:
                query = {'reduce':True, 'key': user_wf['key']}
                    last_publication_time = self.db.loadView('DBSPublisher', 'last_publication', query)['rows']
                except Exception, e:
                    self.logger.error('Cannot get last publication time from Couch for %s: %s' %(user_wf['key'], e))
                self.logger.debug('last_publication_time of %s: %s' % (user_wf['key'], last_publication_time))
                if last_publication_time:
                    self.logger.debug('last published block: %s' % last_publication_time[0]['value']['max'])
                    wait_files = now - last_publication_time[0]['value']['max']
                    if wait_files > self.block_publication_timeout:
                        self.logger.info('Forse the publication since the publication %s ago' % wait_files)
                        self.logger.info('Publish number of files minor of max_files_per_block for %s.' % user_wf['key'])
                        self.forcePublication = True
                    elif not self.forceFailure:
            if lfn_ready:
                self.logger.debug("Retrieving SE Name from Phedex %s" %str(file['value'][0]))
                    seName = self.phedexApi.getNodeSE( str(file['value'][0]) )
                    if not seName:
                except Exception, ex:
                    msg =  "SE of %s cannot be retrieved" % str(file['value'][0])
                    msg += str(ex)
                    msg += str(traceback.format_exc())
                failed_files, good_files = self.publish( str(file['key'][3]), str(file['value'][2]), \
                                                         self.userDN, str(file['key'][0]), \
                                                         str(file['value'][3]), str(file['value'][4]), \
                                                         str(seName), lfn_ready )
                self.mark_failed( failed_files )
                self.mark_good( good_files )
        self.logger.info('Publications completed')
    def mark_good(self, files=[]):
        Mark the list of files as tranferred
        last_update = int(time.time())
        for lfn in files:
                data = {}
                data['publication_state'] = 'published'
                data['last_update'] = last_update
                lfn_db = self.lfn_map[lfn]
                updateUri = "/" + self.db.name + "/_design/DBSPublisher/_update/updateFile/" + \
                updateUri += "?" + urllib.urlencode(data)
                self.db.makeRequest(uri = updateUri, type = "PUT", decode = False)
            except Exception, ex:
                msg =  "Error updating document in couch"
                msg += str(ex)
                msg += str(traceback.format_exc())
        except Exception, ex:
            msg =  "Error commiting documents in couch"
            msg += str(ex)
            msg += str(traceback.format_exc())
    def mark_failed( self, files=[], forceFailure=False ):
        Something failed for these files so increment the retry count
        now = str(datetime.datetime.now())
        last_update = int(time.time())
        for lfn in files:
            self.logger.info("Marking failed %s" % lfn)
            data = {}
            lfn_db = self.lfn_map[lfn]
            docId = getHashLfn(lfn_db)
            self.logger.info("Marking failed %s of %s" % (docId, lfn_db))
            # Load document to get the retry_count
                document = self.db.document( docId )
            except Exception, ex:
                msg =  "Error loading document from couch"
                msg += str(ex)
                msg += str(traceback.format_exc())
            # Prepare data to update the document in couch
            if len(document['publication_retry_count']) + 1 > self.max_retry or forceFailure:
                data['publication_state'] = 'publication_failed'
                data['publication_state'] = 'publishing'
            data['last_update'] = last_update
            data['retry'] = now
            # Update the document in couch
                updateUri = "/" + self.db.name + "/_design/DBSPublisher/_update/updateFile/" + docId
                updateUri += "?" + urllib.urlencode(data)
                self.db.makeRequest(uri = updateUri, type = "PUT", decode = False)
            except Exception, ex:
                msg =  "Error in updating document in couch"
                msg += str(ex)
                msg += str(traceback.format_exc())
        except Exception, ex:
            msg =  "Error commiting documents in couch"
            msg += str(ex)
            msg += str(traceback.format_exc())
    def publish(self, workflow, dbsurl, userdn, userhn, inputDataset, sourceurl, targetSE, lfn_ready):
        """Perform the data publication of the workflow result.
          :arg str workflow: a workflow name
          :arg str dbsurl: the DBS URL endpoint where to publish
          :arg str inputDataset
          :arg str sourceurl
          :return: the publication status or result"""
            toPublish = self.readToPublishFiles(workflow, userhn, lfn_ready)
        except (tarfile.ReadError, RuntimeError):
            self.logger.error("Unable to read publication description files. ")
            return lfn_ready, []
        if not toPublish:
            if self.forceFailure:
                self.logger.error("FWJR seems not in cache! Forcing the failure")
                self.mark_failed(lfn_ready, True)
                return [], []
        self.logger.info("Starting data publication in %s of %s" % ("DBS", str(workflow)))
        failed, done, dbsResults = self.publishInDBS3(userdn=userdn, sourceURL=sourceurl,
                                                 inputDataset=inputDataset, toPublish=toPublish,
                                                 destURL=dbsurl, targetSE=targetSE, workflow=workflow)
        self.logger.debug("DBS publication results %s" % dbsResults)
        return failed, done
    def readToPublishFiles(self, workflow, userhn, lfn_ready):
        Download and read the files describing
        what needs to be published
        def decodeAsString(a):
            DBS is stupid and doesn't understand that unicode is a string: (if type(obj) == type(''))
            So best to convert as much of the decoded JSON to str as possible. Some is left over and handled by
            newDict = {}
            for key, value in a.iteritems():
                if type(key) == types.UnicodeType:
                    key = str(key)
                if type(value) == types.UnicodeType:
                    value = str(value)
                newDict.update({key : value})
            return newDict
        self.logger.info("Starting Read cache for %s..." % workflow)
        buf = cStringIO.StringIO()
        res = []
        # TODO: input sanitization
        url = self.cache_area + '?taskname=' + workflow + '&filetype=EDM'
            c = pycurl.Curl()
            c.setopt(c.URL, url)
            c.setopt(c.SSL_VERIFYPEER, 0)
            c.setopt(c.SSLKEY, self.userProxy)
            c.setopt(c.SSLCERT, self.userProxy)
            c.setopt(c.WRITEFUNCTION, buf.write)
        except Exception, ex:
            msg =  "Error reading data from cache"
            msg += str(ex)
            msg += str(traceback.format_exc())
            return {}
        self.logger.info("Data read from cache...")
            json_string = buf.getvalue()
            res = json.loads(json_string)
        except Exception, ex:
            msg =  "Error loading results. Trying next time!"
            msg += str(ex)
            msg += str(traceback.format_exc())
            return {}
        self.logger.info("%s records got from cache" % len(res['result']))
        self.logger.debug("results %s..." % res['result'])
        toPublish = {}
        for files in res['result']:
            outdataset = str(files['outdataset'])
            if toPublish.has_key(outdataset):
                toPublish[outdataset] = []
        if toPublish:
            self.logger.info("cleaning...%s LFN ready from %s" %(len(lfn_ready), len(toPublish[outdataset])))
            fail_files, toPublish = self.clean(lfn_ready, toPublish)
            self.logger.info('to_publish %s files' % len(toPublish))
            self.logger.debug('to_publish %s' % toPublish)
        return toPublish
    def clean(self, lfn_ready, toPublish):
        Clean before publishing
        fail_files = []
        new_toPublish = {}
        files_to_publish = []
        lfn_to_publish = []
        for ready in lfn_ready:
            if toPublish:
                for datasetPath, files in toPublish.iteritems():
                    new_temp_files = []
                    lfn_dict = {}
                    for lfn in files:
                        if lfn['lfn'] == ready:
                            lfn_dict = lfn
                            lfn_dict['lfn'] = lfn['lfn'].replace('store/temp', 'store', 1)
                if new_temp_files:
                    if new_toPublish.has_key(datasetPath):
                        new_toPublish[datasetPath] = new_temp_files
        self.logger.info('Cleaning ends')
        return fail_files, new_toPublish
    def format_file_3(self, file, output_config, dataset):
        nf = {'logical_file_name': file['lfn'],
              'file_type': 'EDM',
              'check_sum': unicode(file['cksum']),
              'event_count': file['inevents'],
              'file_size': file['filesize'],
              'adler32': file['adler32'],
              'file_parent_list': [{'file_parent_lfn': i} for i in file['parents']],
        file_lumi_list = []
        for run, lumis in file['runlumi'].items():
            for lumi in lumis:
                file_lumi_list.append({'lumi_section_num': int(lumi), 'run_num': int(run)})
        nf['file_lumi_list'] = file_lumi_list
        if file.get("md5") != "asda" and file.get("md5") != "NOTSET": # asda is the silly value that MD5 defaults to
            nf['md5'] = file['md5']
        return nf
    def migrateDBS3(self, migrateApi, destReadApi, sourceURL, inputDataset):
            existing_datasets = destReadApi.listDatasets(dataset=inputDataset, detail=True)
        except HTTPError, he:
            msg = str(he)
            msg += str(traceback.format_exc())
            self.logger.error("Request to list input dataset %s failed." % inputDataset)
            return []
        should_migrate = False
        if not existing_datasets or (existing_datasets[0]['dataset'] != inputDataset):
            should_migrate = True
            self.logger.debug("Dataset %s must be migrated; not in the destination DBS." % inputDataset)
        if not should_migrate:
            # The dataset exists in the destination; make sure source and destination
            # have the same blocks.
            existing_blocks = set([i['block_name'] for i in destReadApi.listBlocks(dataset=inputDataset)])
            proxy = os.environ.get("SOCKS5_PROXY")
            sourceApi = dbsClient.DbsApi(url=sourceURL, proxy=proxy)
            source_blocks = set([i['block_name'] for i in sourceApi.listBlocks(dataset=inputDataset)])
            blocks_to_migrate = source_blocks - existing_blocks
            self.logger.debug("Dataset %s in destination DBS with %d blocks; %d blocks in source." % (inputDataset, \
                                                                                                      len(existing_blocks), \
            if blocks_to_migrate:
                self.logger.debug("%d blocks (%s) must be migrated to destination dataset %s." % (len(existing_blocks), \
                                                                                                  ", ".join(existing_blocks), \
                                                                                                  inputDataset) )
                should_migrate = True
        if should_migrate:
            data = {'migration_url': sourceURL, 'migration_input': inputDataset}
            self.logger.debug("About to submit migrate request for %s" % str(data))
                result = migrateApi.submitMigration(data)
            except HTTPError, he:
                if he.msg.find("Requested dataset %s is already in destination" % inputDataset) >= 0:
                    self.logger.info("Migration API believes this dataset has already been migrated.")
                    return destReadApi.listDatasets(dataset=inputDataset, detail=True)
                self.logger.exception("Request to migrate %s failed." % inputDataset)
                return []
            self.logger.info("Result of migration request %s" % str(result))
            id = result.get("migration_details", {}).get("migration_request_id")
            if id == None:
                self.logger.error("Migration request failed to submit.")
                self.logger.error("Migration request results: %s" % str(result))
                return []
            self.logger.debug("Migration ID: %s" % id)
            # Wait for up to 300 seconds, then return to the main loop.  Note we don't
            # fail or cancel anything.  Just retry later.
            # States:
            # 0=PENDING
            # 1=IN PROGRESS
            # 2=COMPLETED
            # *=FAILED
            # 9=Terminally FAILED
            # In the case of failure, we expect the publisher daemon to try again in
            # the future.
            wait_time = 30
            for i in range(10):
                self.logger.debug("About to get migration request for %s." % id)
                status = migrateApi.statusMigration(migration_rqst_id=id)
                state = status[0].get("migration_status")
                self.logger.debug("Migration status: %s" % state)
                if state != 9 or state != 2:
            if state == 9:
                self.logger.info("Migration of %s has failed.  Full status: %s" % (inputDataset, str(status)))
                return []
            elif state == 2:
                self.logger.info("Migration of %s is complete." % inputDataset)
                existing_datasets = destReadApi.api.listDatasets(dataset=inputDataset, detail=True, dataset_access_type='*')
                self.logger.info("Migration of %s has taken too long - will delay publication." % inputDataset)
                return []
        return existing_datasets
    def createBulkBlock(self, output_config, processing_era_config, primds_config, \
                        dataset_config, acquisition_era_config, block_config, files):
        file_conf_list = []
        file_parent_list = []
        for file in files:
            file_conf = output_config.copy()
            file_conf['lfn'] = file['logical_file_name']
            for parent_lfn in file.get('file_parent_list', []):
                file_parent_list.append({'logical_file_name': file['logical_file_name'], \
                                         'parent_logical_file_name': parent_lfn['file_parent_lfn']})
            del file['file_parent_list']
        blockDump = { \
            'dataset_conf_list': [output_config],
            'file_conf_list': file_conf_list,
            'files': files,
            'processing_era': processing_era_config,
            'primds': primds_config,
            'dataset': dataset_config,
            'acquisition_era': acquisition_era_config,
            'block': block_config,
            'file_parent_list': file_parent_list,
        blockDump['block']['file_count'] = len(files)
        blockDump['block']['block_size'] = sum([int(file[u'file_size']) for file in files])
        return blockDump
    def publishInDBS3(self, userdn, sourceURL, inputDataset, toPublish, destURL, targetSE, workflow):
        Publish files into DBS3
        publish_next_iteration = []
        failed = []
        published = []
        results = {}
        blockSize = self.max_files_per_block
        # In the case of MC input (or something else which has no 'real' input dataset),
        # we simply call the input dataset by the primary DS name (/foo/).
        noInput = len(inputDataset.split("/")) <= 3
        # Normalize URLs
        # TODO: set WRITE_PATH to "/DBSWriter" once fixed in CS.
        WRITE_PATH = "/DBSWriter/"
        MIGRATE_PATH = "/DBSMigrate"
        READ_PATH = "/DBSReader"
        if destURL.endswith(WRITE_PATH):
            destReadURL = destURL[:-len(WRITE_PATH)] + READ_PATH
            migrateURL = destURL[:-len(WRITE_PATH)] + MIGRATE_PATH
            migrateURL = destURL + MIGRATE_PATH
            destReadURL = destURL + READ_PATH
            destURL += WRITE_PATH
        if not sourceURL.endswith(READ_PATH):
            sourceURL += READ_PATH
        self.logger.debug("Migrate datasets")
        proxy = os.environ.get("SOCKS5_PROXY")
        self.logger.debug("Destination API URL: %s" % destURL)
        destApi = dbsClient.DbsApi(url=destURL, proxy=proxy)
        self.logger.debug("Destination read API URL: %s" % destReadURL)
        destReadApi = dbsClient.DbsApi(url=destReadURL, proxy=proxy)
        self.logger.debug("Migration API URL: %s" % migrateURL)
        migrateApi = dbsClient.DbsApi(url=migrateURL, proxy=proxy)
        if not noInput:
            self.logger.debug("Submit migration from source DBS %s to destination %s." % (sourceURL, migrateURL))
            # Submit migration
            sourceApi = dbsClient.DbsApi(url=sourceURL)
                existing_datasets = self.migrateDBS3(migrateApi, destReadApi, sourceURL, inputDataset)
            except Exception, ex:
                msg = str(ex)
                msg += str(traceback.format_exc())
                existing_datasets = []
            if not existing_datasets:
                self.logger.info("Failed to migrate %s from %s to %s; not publishing any files." % \
                                 (inputDataset, sourceURL, migrateURL))
                return [], [], []
            # Get basic data about the parent dataset
            if not (existing_datasets and existing_datasets[0]['dataset'] == inputDataset):
                self.logger.error("Inconsistent state: %s migrated, but listDatasets didn't return any information")
                return [], [], []
            primary_ds_type = existing_datasets[0]['primary_ds_type']
            # There's little chance this is correct, but it's our best guess for now.
            # CRAB2 uses 'crab2_tag' for all cases
            existing_output = destReadApi.listOutputConfigs(dataset=inputDataset)
            if not existing_output:
                self.logger.error("Unable to list output config for input dataset %s." % inputDataset)
            existing_output = existing_output[0]
            global_tag = existing_output['global_tag']
            self.logger.info("This publication appears to be for private MC.")
            primary_ds_type = 'mc'
            global_tag = 'crab3_tag'
        acquisition_era_name = "CRAB"
        processing_era_config = {'processing_version': 1, 'description': 'CRAB3_processing_era'}
        self.logger.debug("Starting iteratation through files for publication")
        for datasetPath, files in toPublish.iteritems():
            results[datasetPath] = {'files': 0, 'blocks': 0, 'existingFiles': 0,}
            dbsDatasetPath = datasetPath
            if not files:
            appName = 'cmsRun'
            appVer  = files[0]["swversion"]
            appFam  = 'output'
            seName  = targetSE
            pset_hash = files[0]['publishname'].split("-")[-1]
            gtag = str(files[0]['globaltag'])
            if gtag == "None":
                gtag = global_tag
            acquisitionera = str(files[0]['acquisitionera'])
            if acquisitionera == "null":
                acquisitionera = acquisition_era_name
            empty, primName, procName, tier =  dbsDatasetPath.split('/')
            primds_config = {'primary_ds_name': primName, 'primary_ds_type': primary_ds_type}
            self.logger.debug("About to insert primary dataset: %s" % str(primds_config))
            self.logger.debug("Successfully inserting primary dataset %s" % primName)
            # Find any files already in the dataset so we can skip them
                existingDBSFiles = destApi.listFiles(dataset=dbsDatasetPath)
                existingFiles = [x['logical_file_name'] for x in existingDBSFiles]
                results[datasetPath]['existingFiles'] = len(existingFiles)
            except Exception, ex:
                msg =  "Error when listing files in DBS"
                msg += str(ex)
                msg += str(traceback.format_exc())
                return [], [], []
            workToDo = False
            # Is there anything to do?
            for file in files:
                if not file['lfn'] in existingFiles:
                    workToDo = True
            if not workToDo:
                self.logger.info("Nothing uploaded, %s has these files already or not enough files" % datasetPath)
                for file in files:
            acquisition_era_config = {'acquisition_era_name': acquisitionera, 'start_date': 0}
            output_config = {'release_version': appVer,
                             'pset_hash': pset_hash,
                             'app_name': appName,
                             'output_module_label': 'o', #TODO
                             'global_tag': global_tag,
            self.logger.debug("Published output config.")
            dataset_config = {'dataset': dbsDatasetPath,
                              'processed_ds_name': procName,
                              'data_tier_name': tier,
                              'acquisition_era_name': acquisitionera,
                              'dataset_access_type': 'VALID', # TODO
                              'physics_group_name': 'CRAB3',
                              'last_modification_date': int(time.time()),
            self.logger.info("About to insert dataset: %s" % str(dataset_config))
            del dataset_config['acquisition_era_name']
            #dataset_config['acquisition_era'] = acquisition_era_config
            dbsFiles = []
            for file in files:
                if not file['lfn'] in existingFiles:
                    dbsFiles.append(self.format_file_3(file, output_config, dbsDatasetPath))
            count = 0
            blockCount = 0
            if len(dbsFiles) < self.max_files_per_block:
                block_name = "%s#%s" % (dbsDatasetPath, str(uuid.uuid4()))
                files_to_publish = dbsFiles[count:count+blockSize]
                    block_config = {'block_name': block_name, 'origin_site_name': seName, 'open_for_writing': 0}
                    self.logger.debug("Inserting files %s into block %s." % ([i['logical_file_name'] for i in files_to_publish], block_name))
                    blockDump = self.createBulkBlock(output_config, processing_era_config, primds_config, dataset_config, \
                                                     acquisition_era_config, block_config, files_to_publish)
                    count += blockSize
                    blockCount += 1
                except Exception, ex:
                    failed += [i['logical_file_name'] for i in files_to_publish]
                    msg =  "Error when publishing"
                    msg += str(ex)
                    msg += str(traceback.format_exc())
                while count < len(dbsFiles):
                    block_name = "%s#%s" % (dbsDatasetPath, str(uuid.uuid4()))
                    files_to_publish = dbsFiles[count:count+blockSize]
                        if len(dbsFiles[count:len(dbsFiles)]) < self.max_files_per_block:
                            for file in dbsFiles[count:len(dbsFiles)]:
                            count += blockSize
                        block_config = {'block_name': block_name, 'origin_site_name': seName, 'open_for_writing': 0}
                        blockDump = self.createBulkBlock(output_config, processing_era_config, primds_config, dataset_config, \
                                                         acquisition_era_config, block_config, files_to_publish)
                        self.logger.debug("Block to insert: %s\n" % pprint.pformat(blockDump))
                        count += blockSize
                        blockCount += 1
                    except Exception, ex:
                        failed += [i['logical_file_name'] for i in files_to_publish]
                        msg =  "Error when publishing (%s) " % ", ".join(failed)
                        msg += str(ex)
                        msg += str(traceback.format_exc())
                        count += blockSize
            results[datasetPath]['files'] = len(dbsFiles) - len(failed)
            results[datasetPath]['blocks'] = blockCount
        published = filter(lambda x: x not in failed + publish_next_iteration, published)
        self.logger.info("End of publication status: failed %s, published %s, publish_next_iteration %s, results %s" \
                         % (failed, published, publish_next_iteration, results))
        return failed, published, results