from ftw.pdfgenerator import interfaces
from ftw.pdfgenerator.html2latex import wrapper
from ftw.pdfgenerator.html2latex.patterns import DEFAULT_PATTERNS
from ftw.pdfgenerator.html2latex.subconverters import htmlentities
from ftw.pdfgenerator.html2latex.subconverters import hyperlink
from ftw.pdfgenerator.html2latex.subconverters import listing
from ftw.pdfgenerator.html2latex.subconverters import table
from ftw.pdfgenerator.html2latex.subconverters import textformatting
from ftw.pdfgenerator.utils import encode_htmlentities
from ftw.pdfgenerator.utils import xml2htmlentities
from random import choice
from zope.component import adapts
from zope.interface import implements, Interface
import re
 
 
PLACEHOLDERS = (
    interfaces.HTML2LATEX_CUSTOM_PATTERN_PLACEHOLDER,
    interfaces.HTML2LATEX_CUSTOM_PATTERN_PLACEHOLDER_TOP,
    interfaces.HTML2LATEX_CUSTOM_PATTERN_PLACEHOLDER_BOTTOM,
    )
 
DEFAULT_PLACEHOLDER = interfaces.HTML2LATEX_CUSTOM_PATTERN_PLACEHOLDER
 
DEFAULT_SUBCONVERTERS = (
    table.TableConverter,
    listing.ListConverter,
    htmlentities.HtmlentitiesConverter,
    hyperlink.HyperlinkConverter,
    textformatting.Textformatting,
    )
 
 
class BasePatternAware(object):
    """Base class providing some functions for handling a local patterns
    list.
    """
 
    default_patterns = None
 
    def __init__(self, patterns):
        self.patterns = patterns
 
    def register_patterns(self, patterns):
        for pattern in patterns:
            self._insert_custom_pattern(pattern)
 
    def register_subconverters(self, subconverters):
        for converter in subconverters:
            self._register_converter(converter)
 
    def get_subconverter_by_pattern(self, pattern):
        for ptn in self.patterns:
            if ptn[0] == interfaces.HTML2LATEX_MODE_REGEXP_FUNCTION and \
                    ptn[1] == pattern:
                return ptn[2]
 
        return None
 
    def _insert_custom_pattern(self, pattern,
                               placeholder=DEFAULT_PLACEHOLDER,
                               replace=True):
        """Insert a custom pattern to the pattern list. If the search term
        of the pattern already exists, the existing pattern will is updated.
        """
 
        if isinstance(pattern[0], wrapper.CustomPatternAtPlaceholderWrapper):
            pattern = list(pattern)
            modeObject = pattern[0]
            pattern[0] = modeObject.mode
            placeholder = modeObject.placeholder
            replace = False
 
        found = False
        if replace:
            for i in range(0, len(self.patterns)):
                if self.patterns[i] in PLACEHOLDERS:
                    continue
 
                if pattern[1] == self.patterns[i][1]:
                    # overwrite existing pattern
                    self.patterns[i] = pattern
                    found = True
 
        if not found:
            # pattern will be inserted at the configured placeholder
            self.patterns.insert(self.patterns.index(placeholder), pattern)
 
    def _register_converter(self, converter_class):
        """
        Generates a pattern with a SubConverter class.
        """
        self._insert_custom_pattern(
            pattern=(interfaces.HTML2LATEX_MODE_REGEXP_FUNCTION,
                     converter_class.pattern,
                     converter_class),
            placeholder=converter_class.placeholder)
 
 
class HTML2LatexConverter(BasePatternAware):
    implements(interfaces.IHTML2LaTeXConverter)
    adapts(Interface, Interface, interfaces.ILaTeXLayout)
 
    default_patterns = DEFAULT_PATTERNS
 
    def __init__(self, context, request, layout):
        self.context = context
        self.request = request
        self.layout = layout
 
        super(HTML2LatexConverter, self).__init__(
            patterns=self.__class__.default_patterns[:])
        self.register_subconverters(self.get_default_subconverters())
 
    def get_default_subconverters(self):
        return DEFAULT_SUBCONVERTERS
 
    def convert(self, html, custom_patterns=None, custom_subconverters=None,
                trim=True):
 
        runner = HTML2LatexConvertRunner(
            converter=self,
            patterns=self.patterns[:],
            html=html,
            trim=trim)
 
        if custom_patterns is not None:
            runner.register_patterns(custom_patterns)
 
        if custom_subconverters is not None:
            runner.register_subconverters(custom_subconverters)
 
        return runner.runner_convert()
 
    def convert_plain(self, text, **kwargs):
        html = encode_htmlentities(text)
        return self.convert(html, **kwargs)
 
 
class HTML2LatexConvertRunner(BasePatternAware):
    implements(interfaces.IHTML2LaTeXConvertRunner)
 
    def __init__(self, converter, patterns, html, trim=True):
        """
        Creates a instance for converting html to latex.
        Attention: this instance should only be used ONCE for converting,
        because of the lockers and html instance attributes.
        You can use convert() on this instance, it will be proxied to the
        HTML2LatexConverter instance.
        """
        BasePatternAware.__init__(self, patterns)
 
        if not interfaces.IHTML2LaTeXConverter.providedBy(converter):
            raise ValueError(
                'converter should by a IHTML2LaTeXConverter')
        else:
            self.converter = converter
 
        self.lockers = {}
        self.html = ''
        self.patterns = patterns
        self._convert_started = False
 
        # we use utf8
        if type(html) == unicode:
            html = html.encode('utf8')
 
        # we do not use xmlentities, but htmlentities
        self.html = xml2htmlentities(html)
 
        # trim ?
        if trim:
            self.html = self.html.strip()
 
    def lock_chars(self, startPos, endPos):
        """
        Locks a specific part of the html. Other Patterns will not match this
        part of html anymore. This is generally used by SubConverters, if
        they replaced the HTML with latex.
        See replaceAndLock()
        """
        # generate new id with same length
        id_chars = 'abcdefghijklmnopqrstuvwxyz0123456789'
        stop = False
        id_ = ''
        length = endPos - startPos
 
        # use keys with a length of at least 10 chars for making them
        # unique enought. otherwise the key generation may loop.
        if length < 10:
            length = 10
 
        while not stop:
            id_ = ''.join([choice(id_chars) for _i in range(length)])
            if id_ not in self.lockers.keys() and id_ not in self.html:
                stop = True
 
        # lock html (replace with id)
        self.lockers[id_] = self.html[startPos:endPos]
        self.html = self.html[:startPos] + id_ + self.html[endPos:]
        return id_
 
    def replace(self, startPos, endPos, text):
        """
        Replaces a specific part in the HTML with [text] (e.g. latex code).
        The new text will be replaced by further patterns! Please use
        lockChars() or replaceAndLock() if you don't want further patterns
        to match and replace
        """
        self.html = self.html[:startPos] + text + self.html[endPos:]
        return self.html
 
    def replace_and_lock(self, startPos, endPos, text):
        """
        Replaces a spefic part in the HTML with [text] and locks it with
        lockChars() after replacing.
        See replace() and lockChars()
        """
        newEndPos = startPos + len(text)
        # replace html
        self.replace(startPos, endPos, text)
        # lock chars
        return self.lock_chars(startPos, newEndPos)
 
    def _unlock_chars(self):
        """
        Unlocks previously locked HTML (see lockChars()). This method is
        automatically called by _convert() after converting HTML to Latex.
        """
        for id_ in self.lockers.keys():
            value = self.lockers[id_]
            self.html = self.html.replace(id_, value)
 
    def convert(self, html, custom_patterns=None, custom_subconverters=None,
                trim=True):
        """Convert a sub-part of the HTML. This initializes another
        runner and converts returns the results. This is necessary for
        converting HTML parts matched in subconverters.
        """
 
        runner = HTML2LatexConvertRunner(
            converter=self.converter,
            patterns=self.patterns[:],
            html=html,
            trim=trim)
 
        if custom_patterns is not None:
            runner.register_patterns(custom_patterns)
 
        if custom_subconverters is not None:
            runner.register_subconverters(custom_subconverters)
 
        return runner.runner_convert()
 
    def runner_convert(self):
        """This method does the actual converting. It should never by called
        directly, but through HTML2LatexConverter.convert()
        """
 
        if self._convert_started:
            raise RuntimeError(
                'runner_convert() should not be called twice!')
        else:
            self._convert_started = True
 
        for pattern in self.patterns:
            if pattern in PLACEHOLDERS:
                continue
 
            mode = pattern[0]
            search = pattern[1]
            replace = pattern[2]
            modifiers = ()
 
            if len(pattern) == 4:
                modifiers = pattern[3]
 
            # replace
            if mode == interfaces.HTML2LATEX_MODE_REPLACE:
                self.html = self.html.replace(search, replace)
 
            # regexp replace
            elif mode == interfaces.HTML2LATEX_MODE_REGEXP:
                self._replace_regexp(search, replace, modifiers)
 
            # regexp function
            elif mode == interfaces.HTML2LATEX_MODE_REGEXP_FUNCTION:
                self._replace_regexp_function(search, replace)
 
        self._unlock_chars()
        return self.html
 
    def _replace_regexp(self, search, replace, modifiers):
        xpr = re.compile(search, re.DOTALL)
        previous_html = ''
        if interfaces.HTML2LATEX_REPEAT_MODIFIER in modifiers:
            previous_html = ''
 
            while previous_html != self.html:
                previous_html = self.html
                self.html = xpr.sub(replace, self.html)
 
        else:
            self.html = xpr.sub(replace, self.html)
 
    def _replace_regexp_function(self, search, replace_fun):
        xpr = re.compile(search, re.DOTALL)
        skipStartPos = []
        startLimit = 0
        search = True
 
        while search:
            previous_html = self.html
            match = xpr.search(self.html, startLimit)
 
            if match and match.start() not in skipStartPos:
                skipStartPos.append(match.start())
                obj = replace_fun(self, match, self.html)
                if callable(obj):
                    obj()
 
            elif match and match.start() in skipStartPos:
                startLimit = match.start() + 1
 
            else:
                search = False
 
            if self.html != previous_html:
                skipStartPos = []
                startLimit = 0