# -*- coding: utf-8 -*- ########################################################################## # # # copyright (c) 2003,2005 ITB, Humboldt-University Berlin # # written by: Raphael Ritz, r.ritz@biologie.hu-berlin.de # # # # modified by LOGILAB S.A. (Paris, FRANCE) - 2005 # # http://www.logilab.fr/ -- mailto:contact@logilab.fr # ########################################################################## """BibliographyFolder main class""" __revision__ = '$Id: $' # Python imports from operator import and_ from types import StringType import sys # Zope imports from AccessControl import ClassSecurityInfo from Acquisition import Acquirer from DocumentTemplate import sequence from Persistence import PersistentMapping from OFS.Folder import Folder from Products.ATContentTypes.content.base import ATCTOrderedFolder from Products.ATContentTypes.content.folder import ( ATBTreeFolder, ATBTreeFolderSchema ) from Products.ATContentTypes.content.schemata import finalizeATCTSchema from zope.interface import implements from zope.event import notify # CMF imports from Products.CMFCore.utils import ( _checkPermission, getToolByName ) from Products.CMFCore.permissions import ( View, ModifyPortalContent, AddPortalContent, ManageProperties ) from Products.Archetypes.utils import shasattr # My imports ;-) from bibliograph.core.utils import _encode, _decode from bibliograph.core.interfaces import IBibliography from bibliograph.core.interfaces import IBibliographyExport from Products.CMFBibliographyAT.DuplicatesCriteria import DuplicatesCriteriaManager from Products.CMFBibliographyAT.config import CMFBAT_USES_LINGUAPLONE from Products.CMFBibliographyAT.config import PROJECTNAME, FOLDER_TYPES from Products.CMFBibliographyAT.interface import IBibliographyFolder from Products.CMFBibliographyAT.interface import ILargeBibliographyFolder from Products.CMFBibliographyAT.events import BibentryImportedEvent from bibliograph.parsing.parsers.base import EntryParseError from Products.CMFBibliographyAT.utils import log if CMFBAT_USES_LINGUAPLONE: from Products.LinguaPlone.public import * else: from Products.Archetypes.public import * BibFolderIdCookingSchema = Schema(( BooleanField('cookIdsOnBibRefCreation', searchable=0, default=True, default_method='getSiteDefaultCookIdsOnBibRefCreation', widget=BooleanWidget( label="ID Cooking on Creation of a Bibliographical Entry", label_msgid="label_bibfolder_cookidsoncreation", description="Cook IDs for bibliographical entries when created "\ "TTW. For this to work, leave new IDs (auto-"\ "generated by Plone - if shown at all) untouched on "\ "creation of a bibliographical entry. If you change "\ "the entry's ID on creation, no ID cooking will "\ "be performed. If not sure what to say here, leave "\ "the default.", description_msgid="help_bibfolder_cookidsoncreation", i18n_domain="cmfbibliographyat", ), ), StringField('referenceIdCookingMethod', searchable=0, default=None, default_method='getSiteDefaultIdCookerId', vocabulary="listEnabledIdCookers", widget=SelectionWidget( label="ID Cooking Method for Bibliographical Entries", description="Specify the algorithm that shall be used for "\ "bibliographical entry ID cooking in this "\ "bibliography folder. If not sure what to say here, "\ "leave the default.", label_msgid="label_bibfolder_idcooking", description_msgid="help_bibfolder_idcooking", condition="python:len(object.portal_bibliography.listIdCookers()) > 1", format="select", i18n_domain="cmfbibliographyat", ), ), BooleanField('cookIdsAfterBibRefEdit', searchable=0, default=True, default_method='getSiteDefaultCookIdsAfterBibRefEdit', widget=BooleanWidget( label="ID Re-Cooking after a Bibliographical Entry has been Edited", label_msgid="label_bibfolder_cookidsonedit", description="Re-cook IDs for bibliographical items after each "\ "edit action. This feature is useful, if you want to "\ "make sure, that bibliographical reference IDs "\ "always stay in sync with the entry's properties "\ "(author names, publication year etc. - depending on "\ "what properties are used for ID cooking). If not "\ "sure what to say here, leave the default.", description_msgid="help_bibfolder_cookidsonedit", i18n_domain="cmfbibliographyat", ), ), )) BibFolderBibrefImportSchema = Schema(( BooleanField('useParserIdsOnImport', searchable=0, default=True, default_method='getSiteDefaultUseParserIdsOnImport', widget=BooleanWidget( label="Use IDs from Import Files on Reference Import", label_msgid="label_bibfolder_usepidsonimport", description="On bibliography import some import formats already offer unique IDs for bibliographical entries. If you want to keep those IDs, tick here. If not sure what to say here, leave the default.", description_msgid="help_bibfolder_usepidsonimport", i18n_domain="cmfbibliographyat", ), ), )) BibFolderPdfManagerSchema = Schema(( LinesField('allowPdfUploadForTypes', searchable=0, default=None, default_method='getSiteDefaultAllowPdfUploadForTypes', vocabulary="listReferenceTypes", widget=InAndOutWidget( label="Allow PDF Upload Only for Selected Types", label_msgid="label_bibfolder_allowpdfuploadfortypes", description="Bibliographical reference types of this bibliography folder will only support upload of publications in PDF format if selected here.", description_msgid="help_bibfolder_allowpdfuploadfortypes", condition="python:object.portal_bibliography.allowPdfUploadPortalPolicy()", i18n_domain="cmfbibliographyat", ), ), BooleanField('synchronizePdfFileAttributes', searchable=0, default=True, default_method='getSiteDefaultSynchronizePdfFileAttributes', widget=BooleanWidget( label="Synchronize PDF File Attributes", label_msgid="label_bibfolder_synchronizepdffileattributes", description="Synchronize some attributes of an uploaded PDF file with the attribute values of the referring bibliographical entry (currently supported: ID, Roles, Creator).", description_msgid="help_bibfolder_synchronizepdffileattributes", condition="python:object.portal_bibliography.allowPdfUploadPortalPolicy()", i18n_domain="cmfbibliographyat", ), ), )) BibFolderDuplicatesManagerSchema = Schema(( BooleanField('enableDuplicatesManager', searchable=0, default=True, default_method='getSiteDefaultEnableDuplicatesManager', widget=BooleanWidget( label="Enable Duplicates Management", label_msgid="label_bibfolder_enableduplicatemanager", description="Enable the Duplicates Management engine of this bibliography folder. If enabled (recommended) duplicate bibliographical references will be treated specially on import, cut, copy and paste.", description_msgid="help_bibfolder_enableduplicatemanager", i18n_domain="cmfbibliographyat", ), ), StringField('duplicatesMatchingPolicy', searchable=0, vocabulary='listDuplicatesMatchingPolicies', default='local', widget=SelectionWidget( label="Duplicates Matching Policy", label_msgid="label_bibfolder_duplicatesmatchingpolicy", description="If duplicates management is enabled, this value will define the search span for matching objects.", description_msgid="help_bibfolder_duplicatesmatchingpolicy", i18n_domain="cmfbibliographyat", ), ), )) BibFolderSchema = ATBTreeFolderSchema.copy() + \ BibFolderIdCookingSchema.copy() + \ BibFolderBibrefImportSchema.copy() + \ BibFolderDuplicatesManagerSchema.copy() + \ BibFolderPdfManagerSchema.copy() + Schema(( BooleanField('searchable', searchable=0, default=True, default_method='getSiteDefaultSearchableBibFolders', widget=BooleanWidget( label="Searchable Bibliography Folder", label_msgid="label_bibfolder_searchable_bibfolder", description="Only if this option is enabled bibliographical entries in this bibliography folder will be found by site users using the bibliography search form. You can also use this option to hide this bibliography folder's bibliographical entries from »(Smart) Bibliography Lists« (if installed).", description_msgid="help_bibfolder_searchable_bibfolder", i18n_domain="cmfbibliographyat", ), ), TextField('intro', searchable=1, default='', default_content_type='text/html', default_output_type='text/x-html-captioned', allowed_content_types=('text/html', 'text/plain',), widget=RichWidget( label="Intro Text", label_msgid="label_bibfolder_intro", description="Adding an introductory text at the top of the folder listing.", description_msgid="help_bibfolder_intro", condition="python:object.portal_bibliography.allow_folder_intro", rows=15, i18n_domain="cmfbibliographyat", ), ), )) BibFolderSchema.moveField('searchable', after='description') DuplicatesBibFolderSchema = ATBTreeFolderSchema.copy() finalizeATCTSchema(BibFolderSchema, folderish=True, moveDiscussion=False) class BaseBibliographyFolder(Acquirer): """base class for containers for bibliographic references """ security = ClassSecurityInfo() author_urls = {} schema = BibFolderSchema _at_rename_after_creation = True implements(IBibliographyExport, IBibliography) def __iter__(self): """XXX[fill me in] """ return self.iterkeys() def iterkeys(self): """XXX[fill me in] """ for each in self.objectIds(): yield each def itervalues(self): """XXX[fill me in] """ for each in self.objectValues(): yield each def iteritems(self): """ Fullfil zope.interface.common.mapping.IIterableMapping contract """ for each in self.objectItems(): yield each security.declareProtected(View, 'isTranslatable') def isTranslatable(self): bib_tool = getToolByName(self, 'portal_bibliography') plone_utils = getToolByName(self, 'plone_utils') return bib_tool.isBibFolderTranslatable() \ and plone_utils.isTranslatable(self) security.declareProtected(View, 'getSiteDefaultAllowPdfUploadForTypes') def getSiteDefaultAllowPdfUploadForTypes(self): """get default values for allowed PDF upload from bibliography tool """ bib_tool = getToolByName(self, 'portal_bibliography') return bib_tool.allowPdfUploadForTypes() security.declareProtected(View, 'getSiteDefaultSearchableBibFolders') def getSiteDefaultSearchableBibFolders(self): """get default value for searchable field of bibfolders """ bib_tool = getToolByName(self, 'portal_bibliography') return bib_tool.searchableBibFolders() security.declareProtected(View, 'ListReferenceTypes') def listReferenceTypes(self): """list available reference types for use in schema field """ bib_tool = getToolByName(self, 'portal_bibliography') return DisplayList([(ref_type, ref_type) for ref_type in bib_tool.getReferenceTypes() ]) security.declareProtected(View, 'getBibReferences') def getBibReferences(self, lazy=False, sort_on='publication_year'): """Returns all of the contained bib references, as catalog brains. """ bibtool = getToolByName(self, 'portal_bibliography') ref_types = bibtool.getReferenceTypes() catalog = getToolByName(self, 'portal_catalog') refs = catalog( portal_type=ref_types, path={'query' : '/'.join(self.getPhysicalPath()), 'level' : 0, 'depth' : 1, }, sort_on=sort_on, sort_order='reverse', ) if lazy: return refs return [brain.getObject() for brain in refs] ## play nice with FTP and WebDAV security.declareProtected(View, 'manage_FTPget') def manage_FTPget(self): """render all references as one BibTeX file""" bibtool = getToolByName(self, 'portal_bibliography') return bibtool.render(self, 'bib') security.declareProtected(View, 'content_type') def content_type(self): """rely on our default rendering 'applicatio/x-bibtex' """ return 'application/x-bibtex' security.declareProtected(View, 'get_size') def get_size(self): """ The number of objects in this folder """ return len(self.objectIds()) security.declareProtected(ModifyPortalContent, 'PUT_factory') def PUT_factory(self, name, typ, body): """Handle HTTP and FTP PUT requests What we need to do here is to return something that later can be called 'PUT' upon as we are in a bibfolder already Also temporarily allow nesting of bibfodlers """ # temporarily allow bibfolders within bibfolders types_tool = getToolByName(self, 'portal_types') for ptype in ['BibliographyFolder', 'LargeBibliographyFolder']: fti = types_tool[ptype] fti.filter_content_types = 0 return BibliographyFolder(name) security.declareProtected(ModifyPortalContent, 'PUT') def PUT(self, REQUEST, RESPONSE): """Handle HTTP and FTP PUT requests """ raw = REQUEST.get('BODY') id = self.getId() parent = self.aq_inner.aq_parent if IBibliographyExport.providedBy(parent): target_folder = parent try: target_folder.manage_delObjects([id]) except AttributeError: pass # don't forget to impose our restriction again types_tool = getToolByName(self, 'portal_types') for ptype in ['BibliographyFolder', 'LargeBibliographyFolder']: fti = types_tool[ptype] fti.filter_content_types = 1 else: target_folder = self target_folder.setTitle(id) target_folder.processImport(raw, id) RESPONSE.setStatus(204) target_folder.reindexObject() return RESPONSE class BaseBibliographyAuthorUrlManager(Acquirer): """Enable management of an {author:authorurl} dictionary """ security = ClassSecurityInfo() security.declareProtected(View, 'AuthorURLs') def AuthorURLs(self): """accessor """ return self.author_urls security.declareProtected(View, 'AuthorUrlList') def AuthorUrlList(self): """list of dictionaries for editing """ link_list = [] keys = self.AuthorURLs().keys() keys.sort() for key in keys: entry = {'key':key, 'value':self.AuthorURLs()[key], } link_list.append(entry) return link_list security.declareProtected(ModifyPortalContent, 'setAuthorURLs') def setAuthorURLs(self, author_dict): """mutator (entire dictionary) """ self.author_urls = author_dict security.declareProtected(ModifyPortalContent, 'addAuthorURLs') def addAuthorURL(self, key='dummy', value=None): """new entry """ self.author_urls[key] = value security.declareProtected(ModifyPortalContent, 'processAuthorUrlList') def processAuthorUrlList(self, link_list): """creates the links dictionary from the list entries """ link_dict = {} for link in link_list: link_dict[link['key']] = link['value'] self.setAuthorURLs(link_dict) # enable look-up by authors security.declareProtected(View, 'getPublicationsByAuthors') def getPublicationsByAuthors(self, search_list, and_flag=0): """returns a filtered list of content values matching the publications of the specified authors authors MUST be specified by first initial plus lastname like in 'J Foo' or ['J Foo', 'J Bar'] """ if type(search_list) == StringType: search_list = [search_list] result = [] bib_tool = getToolByName(self, 'portal_bibliography') ref_types = bib_tool.getReferenceTypes() filter = {'portal_type' : ref_types} for value in self.contentValues(filter=filter): author_list = [] for author in value.getAuthors(): entry = author.get('firstname', ' ')[0] \ + ' ' \ + author.get('lastname', '') author_list.append(entry.strip()) for author in search_list: if author in author_list and value not in result: result.append(value) if and_flag: if value in result: for author in search_list: if author not in author_list: result.remove(value) return result class BaseBibliographyIdCookerManager(Acquirer): """methods that relate to automated ID cooking of bibliographical reference items """ security = ClassSecurityInfo() schema = BibFolderIdCookingSchema security.declarePublic(View, 'getSiteDefaultIdCookerId') def getSiteDefaultIdCookerId(self): bib_tool = getToolByName(self, 'portal_bibliography') return bib_tool.getDefaultIdCooker(with_disabled=False).getId() security.declarePublic(View, 'getSiteDefaultUseParserIdsOnImport') def getSiteDefaultUseParserIdsOnImport(self): bib_tool = getToolByName(self, 'portal_bibliography') return bib_tool.useParserIdsOnImport() security.declarePublic(View, 'getSiteDefaultCookIdsOnBibRefCreation') def getSiteDefaultCookIdsOnBibRefCreation(self): bib_tool = getToolByName(self, 'portal_bibliography') return bib_tool.cookIdsOnBibRefCreation() security.declarePublic(View, 'getSiteDefaultCookIdsAfterBibRefEdit') def getSiteDefaultCookIdsAfterBibRefEdit(self): bib_tool = getToolByName(self, 'portal_bibliography') return bib_tool.cookIdsAfterBibRefEdit() security.declarePublic(View, 'listEnabledIdCookers') def listEnabledIdCookers(self): bib_tool = getToolByName(self, 'portal_bibliography') dl = DisplayList( [(bib_tool.getIdCooker(idcooker_id=cooker_id).getId(), bib_tool.getIdCooker(idcooker_id=cooker_id).Title()) for cooker_id in bib_tool.listIdCookers(with_disabled=False)]) return dl security.declarePublic(AddPortalContent, 'listEnabledIdCookers') def getIdCooker(self): bib_tool = getToolByName(self, 'portal_bibliography') return bib_tool.getIdCooker( idcooker_id=self.getReferenceIdCookingMethod(), with_disabled=True) security.declareProtected(AddPortalContent, 'cookId') def cookId(self, ref): """:param ref: a single bibliography entry """ # invalid entry code : nobody1000 # if entry has no author and no publication_year - invalid entry bib_tool = getToolByName(self, 'portal_bibliography') if type(ref) != type({}): return "nobody1000" newid = bib_tool.cookReferenceId(ref, idcooker_id=self.getReferenceIdCookingMethod(), use_pid_on_import=self.getUseParserIdsOnImport(), with_disabled=True) while self.hasObject(newid): newid = self.nextId(newid) return newid def nextId(self, testid): bib_tool = getToolByName(self, 'portal_bibliography') return bib_tool.nextId(testid) class BaseBibliographyPublicationRanking(Acquirer): """ method that deal with publication ranking """ security = ClassSecurityInfo() security.declareProtected(View, 'Top') def Top(self, number=None, order=None, explicit=0): """ Returns all ranked entries in order of their ranking. If number is set, returns the top 'number' reference objects (or all if 'number' is greater than the number of ranked entries). If 'explicit' is set, only returns the explicitly ranked entries, otherwise the default ordering is used """ if number: number = int(number) top_elements = [] entries = self.listFolderContents() for entry in entries: if getattr(entry, 'rank', None): top_elements.append((entry.rank, entry)) top_elements.sort() result = [entry[1] for entry in top_elements] if result and number: return result[:number] elif result: return result if not explicit: if number: return self.defaultOrder(order)[:number] else: return self.defaultOrder(order) return [] def defaultOrder(self, order=None): """ The entries in default ordering: If no orer is specified, (('publication_year', 'cmp', 'desc'),('Authors', 'nocase', 'asc')) is used. Order must be formated to match the 'DocumentTemplate.sequence' syntax """ raw_list = self.listFolderContents() if order is None: sort_on = (('publication_year', 'cmp', 'desc'), ('Authors', 'nocase', 'asc')) else: sort_on = order return sequence.sort(raw_list, sort_on) security.declareProtected(ModifyPortalContent, 'setTop') def setTop(self, ids=[]): """ sets the publication ranking """ rank = 0 self._resetRanking() for obj_id in ids: obj = getattr(self, obj_id, None) if obj: rank += 1 obj.rank = rank def _resetRanking(self): """ resets the ranking of reference objects """ for obj in self.contentValues(): obj.rank = None class BaseBibliographyImportManager(Acquirer): """ management for bibliography import """ security = ClassSecurityInfo() schema = BibFolderBibrefImportSchema security.declareProtected(AddPortalContent, 'logImportReport') def logImportReport(self, report): """Store the import report. """ if isinstance(report, unicode): report = _encode(report) # finish building and write the report old_report = self.getProperty('import_report', '') report = report + '=' * 30 + '\n' + _encode(_decode(old_report)) self.manage_changeProperties(import_report=report) def initReport(self, origin): # start building the report mtool = getToolByName(self, 'portal_membership') member = mtool.getAuthenticatedMember() member_id = member.getId() fullname = member.getProperty('fullname', None) if not fullname: fullname = 'unknown fullname' import_date = self.ZopeTime() tmp_report = '[%s] Imported by %s (%s) from %s:\n\n' \ % (import_date, member_id, fullname, origin) return tmp_report def wrapupReport(self, tmp_report): # finish building and write the report old_report = self.import_report report = tmp_report \ + '=' * 30 + '\n' \ + old_report self.manage_changeProperties(import_report=report) security.declareProtected(ModifyPortalContent, 'buildReportLine') def buildReportLine(self, import_status, entry, url=None, relations=None): """ format a line to be added in the import report """ if 'authors' in entry: authors = entry['authors'] authors_list = [] for author in authors: firstname = author.get('firstname') middlename = author.get('middlename', '') lastname = author.get('lastname') authors_list.append('%s %s %s' % (firstname, middlename, lastname)) ref_authors = ', '.join(authors_list) else: ref_authors = 'Anonymous' ref_authors = _decode(ref_authors) ref_title = _decode(entry.get('title')) for car in ['\n', '\r', '\t', ' ', ' ']: ref_title = ref_title.replace(car, '') line = u'%s - %s' % (ref_authors, ref_title) if 'publication_year' in entry: year = entry.get('publication_year') try: year = unicode(year) except: # got some "A paraĆ®tre" values... year = entry.get('publication_year').decode('utf-8') line = u'%s (%s)' % (line, year) if import_status == 'OK': line = u'Successfully Imported: %s' % line if import_status == 'ok' and relations: relations = _decode(relations) line = u'%s (Inferred author references: %s)' \ % (line, relations) if import_status == 'ok' and url: line = u'%s [<a href="%s">view</a>], ' \ u'[<a href="%s">edit</a>]' \ % (line, url, url + '/edit') else: line = u'%s: %s' % (import_status, line) return _encode(line + '.\n') security.declareProtected(AddPortalContent, 'processSingleImport') def processSingleImport(self, entry, span_of_search=None, force_to_duplicates=False, skip_matching=False, infer_references=True): """ called for importing a single entry :type entry: dict :param entry: a single bibliography entry :type skip_matching: Boolean :param skip_matching: toogle to skip matching objects (used for force import) :type force_to_duplicates: Boolean :param force_to_duplicates: used for migration returns a tuple description of success of operation (desc, code, obj) eg. ('Failed : this entry', 'FAILED', obj) possible codes: OK, FAILED, (+ duplicate) """ url = None relations = None obj = None if isinstance(entry, EntryParseError): return ('Failed: %s\n' % entry.description, 'FAILED') span_of_search = span_of_search or self.getDuplicatesMatchingPolicy() # import entry as bibref object first try: newid = self.cookId(entry) if newid and newid != "nobody1000": rtype = entry.get('reference_type', 'ArticleReference') del entry['reference_type'] if rtype not in [_.getId() for _ in self.getAllowedTypes()]: return "Error: Content-Type %s is not " % rtype + \ "allowed to create in Folder %s." % self.Type() self.invokeFactory(rtype, newid) obj = getattr(self, newid) obj.edit(**entry) url = obj.absolute_url() if obj.showMemberAuthors() and infer_references: relations = obj.inferAuthorReferences() if self.getEnableDuplicatesManager() \ and (not skip_matching or force_to_duplicates): test, matched_obj = self.isDuplicate(obj, span_of_search) if test or force_to_duplicates: # in any case, we want the duplicate obj to be aware of # local AND global matches if span_of_search != 'global': dummy, matched_obj = self.isDuplicate(obj, 'global') duplicate = self.moveToDuplicatesFolder(obj, matched_obj) notify(BibentryImportedEvent(duplicate, matched_obj)) message = self.REQUEST.get('bibtext_import_message', 'DUPLICATE CREATED') return ('Duplicate: %s\n' % obj.Title() or 'no info', message, duplicate) import_status = 'ok' notify(BibentryImportedEvent(obj, False)) except: # for debugging # XXX shouldn't catch all exceptions # Remove the \n from import_status so that it all appears # on one line in the import_report, which keeps the count # the same as that reported. import_status = "Error type: %s. Error value: %s" \ % (sys.exc_info()[0], sys.exc_info()[1]) report_line = self.buildReportLine(import_status, entry, url=url, relations=relations) return (report_line, import_status, obj) security.declareProtected(AddPortalContent, 'processImport') def processImport(self, source, filename, format=None, return_obs=False, input_encoding='utf-8'): """ main routine to be called for importing entire files from custom code. """ current_report = self.initReport('file %s' % filename) # get parsed entries from the Bibliography Tool bib_tool = getToolByName(self, 'portal_bibliography') entries = bib_tool.getEntries(source, format, filename, input_encoding=input_encoding) obs = [] for entry in entries: if entry.get('title'): infer_references = bib_tool.inferAuthorReferencesAfterImport() upload = self.processSingleImport(entry, infer_references=infer_references) if return_obs: obs.append(upload[2]) current_report = current_report + upload[0] self.logImportReport(current_report) # we have to set a transaction savepoint here bib_tool.transaction_savepoint(optimistic=True) if return_obs: return obs class BaseBibliographyDuplicatesManager(Acquirer): """ duplication handling for bibliography folders """ security = ClassSecurityInfo() _assoc_duplicates_folder = None schema = BibFolderDuplicatesManagerSchema security.declareProtected(View, 'getDuplicatesFolder') def getDuplicatesFolder(self, id='duplicates'): """ Returns a bibliography folder similar container within this bibliography folder for storing duplicate bibliographical entries. This method creates the folder if needed. """ reference_catalog = getToolByName(self, 'reference_catalog') if self._assoc_duplicates_folder is None: tt = getToolByName(self, 'portal_types') fti = tt['DuplicatesBibliographyFolder'] fti._constructInstance(self, id) self[id].setTitle('Pending Duplicate Bibliography Entries') self._assoc_duplicates_folder = self[id].UID() self[id]._assoc_bibliography_folder = self.UID() return reference_catalog.lookupObject(self._assoc_duplicates_folder) security.declarePublic(View, 'getSiteDefaultEnableDuplicatesManager') def getSiteDefaultEnableDuplicatesManager(self): bib_tool = getToolByName(self, 'portal_bibliography') return bib_tool.enableDuplicatesManager() def duplicatesExist(self): """ test if duplicates exist (used for action tab) """ duplicates_folder = None reference_catalog = getToolByName(self, 'reference_catalog') if self._assoc_duplicates_folder is not None: duplicates_folder = reference_catalog.lookupObject(self._assoc_duplicates_folder) if duplicates_folder: return duplicates_folder.contentValues() and True or False return False def getDuplicatesCriteria(self): """ returns rights criteria depending on if they are defined portal or folder wide """ return self.getSelectedCriteria() getCriterias = getDuplicatesCriteria def listDuplicatesMatchingPolicies(self): return DisplayList((('local', 'local_duplicates_matchingpolicy'), ('global', 'global_duplicates_matchingpolicy'))) def isDuplicate(self, bibref_item, span_of_search=None): """ checks if entry is duplicated """ if not self.getEnableDuplicatesManager(): return False, [] span_of_search = span_of_search or self.getDuplicatesMatchingPolicy() all_criteria = self.getDuplicatesCriteria() bib_tool = getToolByName(self, 'portal_bibliography') entry = bib_tool.getEntryDict(bibref_item) ref_types = bib_tool.getReferenceTypes() have = all_criteria.has_key global_tests = [] filter = {'portal_type': ref_types} # get search span from criteria cache_key = 'CMFBibliographyAT_import_duplicate_aqobj' if cache_key in self.REQUEST: acquired_objects = self.REQUEST[cache_key] elif span_of_search == 'local': # local configuration acquired_objects = self.contentValues(filter=filter) self.REQUEST[cache_key] = acquired_objects # print "***Debuging*** found %s" % aquired_objects elif span_of_search == 'global': acquired_objects = [] portal_catalog = getToolByName(self, 'portal_catalog') all_folders = portal_catalog(meta_type=FOLDER_TYPES) for each_result in all_folders: obj = each_result.getObject() acquired_objects += obj.contentValues(filter=filter) self.REQUEST[cache_key] = acquired_objects else: raise ValueError("span of search for duplicates has an " + "invalid value : %s" % span_of_search) bibref_item_uid = bibref_item.UID() for existing_object in acquired_objects: if existing_object.UID() == bibref_item_uid: continue bib_type = entry.get('reference_type', 'ArticleReference') if not have(bib_type): continue criteria = all_criteria[bib_type] if not criteria: return False, [] # authors need special treatment for attribute in criteria: # authors need special treatment if attribute == 'authors': equal = self.compareAuthors(entry, existing_object) if not equal: break else: x = entry.get(attribute, None) try: get_func = getattr(existing_object, 'get' + attribute.capitalize()) except AttributeError: try: get_func = getattr(existing_object, attribute.capitalize()) except AttributeError: # XXX print ? print "can't do get" + attribute.capitalize(), \ 'or', attribute.capitalize() break y = _decode(get_func()) if y and y[-1] == '.': y = y[:-1] if x and x[-1] == '.': x = x[:-1] if x != y: #print "***Debug***: found difference" #print "%s doesn't match %s" % (x, y) break else: global_tests.append((True, existing_object)) matching_objects = [ext_obj for (test, ext_obj) in global_tests \ if test] global_bools = [test for (test, ext_obj) in global_tests] if global_tests: return reduce(and_, global_bools), matching_objects return False, [] def findDuplicated(self, uid=None): reference_catalog = getToolByName(self, 'reference_catalog') if uid and shasattr(self.getDuplicatesFolder(), uid): duplicate = reference_catalog.lookupObject(uid) test, matched_objects = self.isDuplicate(duplicate, span_of_search='global') if test: duplicate.setIs_duplicate_of(matched_objects) return "Matching bibliographical reference of duplicate item " \ "with UID %s updated" % uid elif uid is None: for duplicate in self.getDuplicatesFolder().contentValues(): test, matched_objects = self.isDuplicate(duplicate, span_of_search='global') if test: duplicate.setIs_duplicate_of(matched_objects) def skipDuplicated(self, uid): """ skip function on an entry in _duplicates (simple deletion) :type key: string :param key: identifier for the entry stored in _duplicates """ reference_catalog = getToolByName(self, 'reference_catalog') duplicate_bibref_item = reference_catalog.lookupObject(uid) if duplicate_bibref_item: dupfolder = self.getDuplicatesFolder() dupfolder.manage_delObjects([duplicate_bibref_item.getId()]) else: return "UID %s does not exist - cannot delete referenced " + \ "duplicate entry" % uid def compareAuthors(self, entry, existing): new_last_names = [_decode(a.get('lastname')) for a in entry.get('authors', [])] old_last_names = [_decode(a.get('lastname')) for a in existing.getRawAuthors()] if new_last_names == old_last_names: return True return False def moveToDuplicatesFolder(self, bibref_object, matched_objects): """ moves bibref item to associated Duplicates Bibliography Folder for post-processing :type entry: dict :param entry: a single bibliography entry :type matched_object: AT object :param matched_object: the entry matches this object (already present on site) """ reference_catalog = getToolByName(self, 'reference_catalog') bib_tool = getToolByName(self, 'portal_bibliography') if bibref_object.portal_type in bib_tool.getReferenceTypes(): tt = getToolByName(self, 'portal_types') fti = tt[self.getDuplicatesFolder().portal_type] allowed_types = fti.allowed_content_types fti.allowed_content_types += (bibref_object.portal_type,) bib_tool.transaction_savepoint(optimistic=True) bibref_object.setIs_duplicate_of(matched_objects) bibref_uid = bibref_object.UID() objs = self.manage_cutObjects([bibref_object.getId(), ]) self.getDuplicatesFolder().manage_pasteObjects(objs) bibref_object = reference_catalog.lookupObject(bibref_uid) fti.allowed_content_types = allowed_types return bibref_object return None def forceDuplicated(self, uid): """ force function on an entry in _duplicates (forces import of new entry) """ initReport = self.initReport('duplication management') reference_catalog = getToolByName(self, 'reference_catalog') bib_tool = getToolByName(self, 'portal_bibliography') # process import for duplicated entry duplicate_bibref_item = reference_catalog.lookupObject(uid) duplicate_bibref_item.setIs_duplicate_of(None) duplicates_manager_restore = self.getEnableDuplicatesManager() self.setEnableDuplicatesManager(False) dupfolder = self.getDuplicatesFolder() objs = dupfolder.manage_cutObjects([duplicate_bibref_item.getId(), ]) self.manage_pasteObjects(objs) self.setEnableDuplicatesManager(duplicates_manager_restore) entry = bib_tool.getEntryDict(duplicate_bibref_item) url = duplicate_bibref_item.absolute_url() report_line = self.buildReportLine('forced', entry, url=url, relations=None) current_report = initReport + report_line self.wrapupReport(current_report) return current_report def replaceDuplicated(self, uid): """ replace function on an entry in _duplicates (replaces originial finding with new entry) :type key: string :param key: identifier for the entry stored in _duplicates """ reference_catalog = getToolByName(self, 'reference_catalog') bib_tool = getToolByName(self, 'portal_bibliography') duplicate_bibref_item = reference_catalog.lookupObject(uid) entry = bib_tool.getEntryDict(duplicate_bibref_item) for orig_obj in duplicate_bibref_item.getIs_duplicate_of(): orig_obj.setAuthors(entry.get('authors', [])) orig_obj.edit(**entry) # hmmm... replace means: also replace the PDF file (if any in # associated) if duplicate_bibref_item.getPdf_file(): orig_obj.setPdf_file(duplicate_bibref_item.getPdf_file()) # and if id cooking after bibref edit is enabled, we have to # re-cook the orig_obj's ID if self.getCookIdsAfterBibRefEdit(): orig_obj.bibliography_entry_cookId() # nasty trick, isn't it? the bibref item does not know that it sleeps # in the duplicates bibliography folder... but we do!!! duplicates_folder = duplicate_bibref_item.getBibFolder() duplicates_folder.manage_delObjects([duplicate_bibref_item.getId()]) return 'replace begin implemented' def delayDuplicated(self, uid): """delay function on an entry in _duplicates (does nothing) """ pass security.declarePublic('getDuplicates') def getDuplicates(self, uid=None): """ get a dict with uid: bibref_object entries """ if uid: reference_catalog = getToolByName(self, 'archetypes_tool') return reference_catalog.lookupObject(uid) bib_tool = getToolByName(self, 'portal_bibliography') duplicates_dict = {} cfilter = {'portal_type': bib_tool.getReferenceTypes(), } for obj in self.getDuplicatesFolder().contentValues(filter=cfilter): duplicates_dict[obj.UID()] = obj return duplicates_dict security.declareProtected(ModifyPortalContent, 'handleAction') def handleAction(self, REQUEST=None): """ run through duplicates and determine what to do according to request """ logs = [] seq_of_uids = [] for uid in self.getDuplicates().keys(): seq_of_uids.append(uid) for uid in seq_of_uids: if uid in REQUEST.keys(): action = REQUEST.get(uid) action_func = getattr(self, '%sDuplicated' % action) log_msg = action_func(uid) if log_msg: logs.append(log_msg) def format_log(log): return _encode(_decode(log.replace('\n\n', ' ').replace('\n', ''))) return ', '.join([format_log(log) for log in logs]) security.declareProtected(ModifyPortalContent, 'rescanBibfolderForDuplicates') def updateDuplicatesFolder(self, sort_on='modified', sort_order='reverse', span_of_search=None): """ run through bibliographical entries and check if they are duplicates """ bib_tool = getToolByName(self, 'portal_bibliography') ctool = getToolByName(self, 'portal_catalog') span_of_search = span_of_search or self.getDuplicatesMatchingPolicy() query_bibfolder = { 'sort_on': sort_on, 'portal_type': bib_tool.getReferenceTypes(), 'path': {'depth': 1, 'query': '/'.join(self.getPhysicalPath()), }, } if sort_order: query_bibfolder['sort_order'] = sort_order bibfolder_contentValues = [bibref_item.getObject() for bibref_item in ctool(**query_bibfolder)] # walk through the list of duplicates from end to beginning... bibfolder_contentValues.reverse() count = 0 for bibref_item in bibfolder_contentValues: test, matching_objects = self.isDuplicate(bibref_item, span_of_search=span_of_search) if test: count += 1 self.moveToDuplicatesFolder(bibref_item, matching_objects) return 'Bibliography Duplicate Update: %s bibref items have been '\ 'regarded as duplicate bibliographical entries.' % count class BaseBibliographyPdfFolderManager(Acquirer): security = ClassSecurityInfo() schema = BibFolderPdfManagerSchema _assoc_pdf_folder = None _delete_associated_pdffiles = False _move_associated_pdffiles = False security.declareProtected(View, 'getPdfFolder') def getPdfFolder(self, pdfsid='pdfs'): """ Returns a folder for storing files Creates it if needed """ reference_catalog = getToolByName(self, 'reference_catalog') if self._assoc_pdf_folder is None: if not self.hasObject(pdfsid): tt = getToolByName(self, 'portal_types') fti = tt['PDF Folder'] fti._constructInstance(self, pdfsid) self[pdfsid].setTitle('PDFs') self._assoc_pdf_folder = self[pdfsid].UID() pdf_folder = reference_catalog.lookupObject(self._assoc_pdf_folder) # test, if the returned object really is the pdf folder: if pdf_folder is not None \ and pdf_folder.getPhysicalPath()[:-1] == self.getPhysicalPath(): return pdf_folder else: # BULLSHIT, reference to PDF folder broken. pdf_folder = eval('self.%s' % pdfsid) try: self._assoc_pdf_folder = pdf_folder.UID() except: pass return pdf_folder security.declarePublic(View, 'getSiteDefaultSynchronizePdfFileAttributes') def getSiteDefaultSynchronizePdfFileAttributes(self): bib_tool = getToolByName(self, 'portal_bibliography') return bib_tool.synchronizePdfFileAttributes() class BaseBibliographyObjectManager(Acquirer): security = ClassSecurityInfo() _base_folder_class = Folder def manage_delObjects(self, ids=[], REQUEST=None): self._delete_associated_pdffiles = True return self._base_folder_class.manage_delObjects(self, ids=ids, REQUEST=REQUEST) def manage_cutObjects(self, ids=[], REQUEST=None): self._move_associated_pdffiles = True return self._base_folder_class.manage_cutObjects(self, ids=ids, REQUEST=REQUEST) def manage_renameObject(self, cid, new_id, REQUEST=None): """ also rename PDF file (if any is associated) """ # rename ids in self first self._base_folder_class.manage_renameObject(self, cid, new_id, REQUEST=REQUEST) # then try to rename PDF file ob = self._getOb(new_id) pdf_file = shasattr(ob, 'getPdf_file') and ob.getPdf_file() or None # manage_renameObject is called during setId. thus, whenever setId is # called we do not need to explicitly call the pdffile id cooker, this # is done here!!! if pdf_file and self.getSynchronizePdfFileAttributes(): object.bibliography_pdffile_cookId(with_disabled=True) def manage_pasteObjects(self, cb_copy_data=None, REQUEST=None): ofs_result = self._base_folder_class.manage_pasteObjects(self, cb_copy_data=cb_copy_data, REQUEST=REQUEST) self._move_associated_pdffiles = False # re-cook ids of bibref items and (again) of associated PDF files if # requested in bibfolder settings if self.getCookIdsAfterBibRefEdit(): bib_tool = getToolByName(self, 'portal_bibliography') result = [] for ofs_item in ofs_result: bibref_old_id = ofs_item['id'] bibref_ofstemp_id = ofs_item['new_id'] item = getattr(self, bibref_ofstemp_id) bibref_new_id = bib_tool.cookReferenceId(ref=item, idcooker_id=self.getReferenceIdCookingMethod(), with_disabled=True) if bibref_new_id != 'nobody1000': item.bibliography_entry_cookId(with_disabled=True) # no subsequent PDF file ID cooking needed here, # it will be invoked by manage_renameObject # during bibliography entry ID cooking result.append({'id': bibref_old_id, 'new_id': item.getId(), }) else: result.append({'id': bibref_old_id, 'new_id': bibref_ofstemp_id, }) else: result = ofs_result if self.portal_type in FOLDER_TYPES: # duplicate checking only if we are not yet in the duplicates # folder (or wherever) for ofs_item in result: new_bibref_item_id = ofs_item['new_id'] new_bibref_item = self[new_bibref_item_id] test, matched_objects = self.isDuplicate(new_bibref_item) if test: dummy, matched_objects = self.isDuplicate(new_bibref_item, span_of_search='global') self.moveToDuplicatesFolder(new_bibref_item, matched_objects) # TODO: notify the user that some objects may have been moved to # DuplicatesBibliographyFolder... return result class BibliographyFolder(BaseBibliographyIdCookerManager, BaseBibliographyPdfFolderManager, BaseBibliographyDuplicatesManager, BaseBibliographyImportManager, BaseBibliographyAuthorUrlManager, BaseBibliographyPublicationRanking, BaseBibliographyObjectManager, BaseBibliographyFolder, ATCTOrderedFolder, DuplicatesCriteriaManager): """container for bibliographic references """ _base_folder_class = ATCTOrderedFolder def __init__(self, cid, **kwargs): self._duplicates = PersistentMapping() self._base_folder_class.__init__(self, cid, **kwargs) DuplicatesCriteriaManager.__init__(self) schema = BibFolderSchema archetype_name = "Bibliography Folder" implements(IBibliographyFolder) import_report = '' _properties = ATCTOrderedFolder._properties + \ ({'id': 'import_report', 'type': 'text', 'mode': 'w'},) manage_options = ( ATCTOrderedFolder.manage_options[:2] + DuplicatesCriteriaManager.manage_options + ATCTOrderedFolder.manage_options[2:] ) class LargeBibliographyFolder(BaseBibliographyIdCookerManager, BaseBibliographyPdfFolderManager, BaseBibliographyDuplicatesManager, BaseBibliographyImportManager, BaseBibliographyAuthorUrlManager, BaseBibliographyPublicationRanking, BaseBibliographyObjectManager, BaseBibliographyFolder, ATBTreeFolder, DuplicatesCriteriaManager, ): """container for bibliographic references """ _base_folder_class = ATBTreeFolder def __init__(self, cid, **kwargs): self._duplicates = PersistentMapping() self._base_folder_class.__init__(self, cid, **kwargs) DuplicatesCriteriaManager.__init__(self) schema = BibFolderSchema archetype_name = "Large Bibliography Folder" global_allow = 0 implements(ILargeBibliographyFolder) import_report = '' _properties = ATBTreeFolder._properties + \ ({'id': 'import_report', 'type': 'text', 'mode': 'w'},) manage_options = ( ATBTreeFolder.manage_options[:2] + DuplicatesCriteriaManager.manage_options + ATBTreeFolder.manage_options[2:] ) def itervalues(self): """XXX[fill me in] """ for obj in self._tree.itervalues(): # Wrap the object for security checks and returning... obj = obj.__of__(self) # Check to see if the object has been changed (elsewhere in the # current transaction/request. changed1 = getattr(obj, '_p_changed', False) if _checkPermission(View, obj): yield obj # Only deactivate (and retrieve memory) if the object hasn't been # changed, either before this method was called, or during it. changed2 = getattr(obj, '_p_changed', False) deactivate = not changed1 and not changed2 if deactivate: obj._p_deactivate() def listDAVObjects(self): """This may have unpleasant side-effects! Turn this into a generator that deactivates objects after they have been yield-ed. """ return self.itervalues() class DuplicatesBibliographyFolder(BaseBibliographyIdCookerManager, BaseBibliographyPdfFolderManager, BaseBibliographyObjectManager, BaseBibliographyFolder, ATCTOrderedFolder): """container for duplicates of bibliographic references """ security = ClassSecurityInfo() schema = DuplicatesBibFolderSchema _at_rename_after_creation = True _assoc_bibliography_folder = None def getBibFolder(self): """ returns associated bibliography folder """ ref_catalog = getToolByName(self, 'reference_catalog') return ref_catalog.lookupObject(self._assoc_bibliography_folder) def getPdfFolder(self): ref_catalog = getToolByName(self, 'reference_catalog') bibfolder = ref_catalog.lookupObject(self._assoc_bibliography_folder) if bibfolder is not None: return bibfolder.getPdfFolder() def getDuplicatesFolder(self): return self def getSynchronizePdfFileAttributes(self, **kwargs): return True def getCookIdsAfterBibRefEdit(self, **kwargs): return True def getUseParserIdsOnImport(self, **kwargs): return False def getReferenceIdCookingMethod(self, **kwargs): return 'uid' def manage_beforeDelete(self, item, container): """ do some cleaning up before vanishing forever """ # manage_beforeDelete is deprecated in Zope 2.11+ bibfolder = self.getBibFolder() bibfolder._assoc_duplicates_folder = None ATCTOrderedFolder.manage_beforeDelete(self, item, container) def manage_renameObject(self, cid, new_id, REQUEST=None): """ make sure that we can rename bibref item content types """ tt = getToolByName(self, 'portal_types') bib_tool = getToolByName(self, 'portal_bibliography') fti = tt[self.portal_type] fti.allowed_content_types = tuple(bib_tool.getReferenceTypes()) # BaseBibliographyPdfFolderManager will also call the # ATCTOrderedFolder.manage_renameObject method. # So nothing more to do here... BaseBibliographyObjectManager.manage_renameObject( self, cid, new_id, REQUEST=REQUEST) fti.allowed_content_types = () registerType(BibliographyFolder, PROJECTNAME) registerType(LargeBibliographyFolder, PROJECTNAME) registerType(DuplicatesBibliographyFolder, PROJECTNAME)