Did I find the right examples for you? yes no

All Samples(26)  |  Call(21)  |  Derive(0)  |  Import(5)
Parse an input source with HTML text into an Amara Bindery tree

Warning: if you pass a string, you must make sure it's a byte string, not a Unicode object.  You might also want to wrap it with amara.lib.inputsource.text if it's not obviously XML or HTML (for example it could be confused with a file name)

        def parse(source, prefixes=None, model=None, encoding=None, use_xhtml_ns=False):
    '''
    Parse an input source with HTML text into an Amara Bindery tree

    Warning: if you pass a string, you must make sure it's a byte string, not a Unicode object.  You might also want to wrap it with amara.lib.inputsource.text if it's not obviously XML or HTML (for example it could be confused with a file name)
    '''
    from amara.lib.util import set_namespaces
    #from amara.bindery import html; doc = html.parse("http://www.hitimewine.net/istar.asp?a=6&id=161153!1247")
    #parser = html5lib.HTMLParser()
    if PRE_0_90:
        def get_tree_instance():
            entity_factory = model.clone if model else entity
            return treebuilder(entity_factory)
    else:
        def get_tree_instance(namespaceHTMLElements, use_xhtml_ns=use_xhtml_ns):
            #use_xhtml_ns is a boolean, whether or not to use http://www.w3.org/1999/xhtml
            entity_factory = model.clone if model else entity
            return treebuilder(entity_factory, use_xhtml_ns)
    parser = html5lib.HTMLParser(tree=get_tree_instance)
    doc = parser.parse(inputsource(source, None).stream, encoding=encoding)
    if prefixes: set_namespaces(doc, prefixes)
    return doc
        


src/a/m/Amara-2.0.0/demo/atomtools.py   Amara(Download)
        if node.xml_select(u'@type = "html"') and node.xml_select(u'string(.)'):
            #unsouped = html.parse('<html xmlns="http://www.w3.org/1999/xhtml">%s</html>'%node.xml_select(u'string(.)').encode('utf-8'), encoding='utf-8')
            unsouped = html.parse('<html>%s</html>'%node.xml_select(u'string(.)').encode('utf-8'), encoding='utf-8')
            unsouped.html.xml_namespaces[None] = XHTML_NAMESPACE
            subtree = element_subtree_iter(unsouped)

src/a/k/Akara-2.0.0a4/lib/demo/moinrest.py   Akara(Download)
from amara import bindery
from amara.writers.struct import structencoder, E, NS, ROOT, RAW
from amara.bindery.html import parse as htmlparse
from amara.bindery.model import examplotron_model, generate_metadata
from amara.lib.util import first_item
        with closing(opener.open(request)) as resp:
            x = resp.read(); resp = x
            doc = htmlparse(resp)
            raise_embedded_error(doc)
 
    try:
        with closing(opener.open(request)) as resp:
            doc = htmlparse(resp)
            raise_embedded_error(doc)
 
        with closing(opener.open(request)) as resp:
            x = resp.read(); resp = x
            doc = htmlparse(resp)
            raise_embedded_error(doc)
 
    try:
        with closing(opener.open(request)) as resp:
            doc = htmlparse(resp)
            raise_embedded_error(doc)
 

src/a/m/Amara-2.0.0/demo/scrapesniff.py   Amara(Download)
    def __init__(self, source, html=False):
        self.records = []
        if html:
            from amara.bindery.html import parse as htmlparse
            self.doc = htmlparse(source)

src/a/k/Akara-2.0.0a4/lib/demo/moincms.py   Akara(Download)
from amara.bindery.model.examplotron import examplotron_model
from amara.writers.struct import *
from amara.bindery.html import parse as htmlparse
from amara.lib import U
from amara.lib.iri import split_fragment, relativize, absolutize
 
 
        doc = htmlparse(resp)
 
    #print (wikibase, outputdir, rewrite)

src/a/k/Akara-2.0.0a4/lib/demo/markuptools.py   Akara(Download)
    curl --request POST --data-binary "<a>one two <b>three four </b><c>five <d>six seven</d> eight</c> nine</a>" --header "Content-Type: application/xml" "http://localhost:8880/akara.tidy"
    '''
    doc = htmldoc.parse(body)
    return doc
 
    max_ = int(max) if max else 500
    if html == 'yes':
        doc = htmldoc.parse(body)
    else:
        doc = amara.parse(body)

src/a/k/Akara-2.0.0a4/lib/demo/xslt.py   Akara(Download)
    '''
    if params.get("tidy") == 'yes':
        doc = html.parse(body)
    else:
        doc = amara.parse(body)

src/a/k/Akara-2.0.0a4/lib/demo/unicodetools.py   Akara(Download)
    query = urllib.urlencode({"q": q})
    search_url = UINFO_SEARCH_URL + query
    doc = html.parse(search_url)
    buf = StringIO()
    structwriter(indent=u"yes", stream=buf).feed(

src/a/m/Amara-2.0.0/demo/rdfascrape.py   Amara(Download)
def rdfascrape(source):
    from amara.lib import inputsource
    source = inputsource(source, None)
    doc = html.parse(source.stream)
    try:

src/a/m/Amara-2.0.0/test/bindery/test_html.py   Amara(Download)
def test_reserved_attributes_page():
    EXPECTED = '<h1 id="akara:metadata">akara:metadata</h1>'
    f = filesource('tagsoup2.html')
    doc = html.parse(f.source)
    #import sys; print >> sys.stderr, [ d.xml_name for d in doc.xml_select(u'//div') ]
def test_reserved_attributes_page_ns():
    EXPECTED = '<h1 xmlns="http://www.w3.org/1999/xhtml" xmlns:h="http://www.w3.org/1999/xhtml" id="akara:metadata">akara:metadata</h1>'
    f = filesource('tagsoup2.html')
    doc = html.parse(f.source, prefixes=XHTML_NSS, use_xhtml_ns=True)
    #import sys; print >> sys.stderr, doc.xml_select(u'*')[0].xml_name
def test_simple_attr_update3():
    EXPECTED = """<html xmlns="http://www.w3.org/1999/xhtml"><head><title>HELLO</title></head><body><p>WORLD</body></html>"""
    doc = html.parse('<n:a xmlns:n="urn:bogus:x" x="1"/>')
    doc.a.x = unicode(int(doc.a.x)+1)
    treecompare.check_xml(doc.xml_encode(), XMLDECL+EXPECTED)
    def test_parse_file(self):
        """Parse ugly HTML file"""
        f = filesource('nastytagsoup1.html')
        doc = html.parse(f.source)
        self.assertEqual(len(doc.xml_children), 1)
    def test_tagsoup1(self):
        """Test RDFa interpretation from tagsoup"""
        f = filesource('tagsouprdfa1.html')
        doc = html.parse(f.source)
        h = doc.xml_select(u'//h1')[0]

src/a/k/Akara-2.0.0a4/lib/util/moin.py   Akara(Download)
from amara.xslt import transform
from amara.writers.struct import *
from amara.bindery.html import parse as htmlparse
from amara.lib import U
from amara.lib.date import timezone, UTC

  1 | 2  Next