Did I find the right examples for you? yes no      Crawl my project      Python Jobs

All Samples(0)  |  Call(0)  |  Derive(0)  |  Import(0)

src/s/t/streamcorpus_pipeline-0.5.23.dev1/streamcorpus_pipeline/_lingpipe.py   streamcorpus_pipeline(Download)
        tok.token_num = self.tok_num
        if 'BYTES' in self.config['offset_types']:
            tok.offsets[OffsetType.BYTES] = Offset(
                type =  OffsetType.BYTES,
                first=self.byte_idx + len(self._input_string[:start].encode('utf-8')),

src/s/t/streamcorpus_pipeline-0.5.23.dev1/streamcorpus_pipeline/_hyperlink_labels.py   streamcorpus_pipeline(Download)
    def __init__(self, *args, **kwargs):
        super(hyperlink_labels, self).__init__(*args, **kwargs)
        self.offset_type = getattr(OffsetType, self.config['offset_types'][0])
        if self.offset_type != OffsetType.BYTES:
            logger.warn('using offset_type other than BYTES: %r' % self.offset_type)
        anchors
        '''
        if   self.offset_type == OffsetType.BYTES:
            parser = self.byte_href_anchors
 

src/s/t/streamcorpus_pipeline-0.5.23.dev1/streamcorpus_pipeline/_tokenizer.py   streamcorpus_pipeline(Download)
                label = None
            if label:
                off = label.offsets[OffsetType.BYTES]
                end = max(off.first + off.length, end)
            previous_end = end
                    sentence_pos=sentence_pos,
                )
                tok.offsets[OffsetType.BYTES] = Offset(
                    type=OffsetType.BYTES, 
                    first=sent_start + start,
                    label = None
                if label:
                    off = label.offsets[OffsetType.BYTES]
                    if off.first + off.length > sent_start + start:
                        logger.info('overlapping label: %r' % label.target.target_id)

src/s/t/streamcorpus_pipeline-0.5.23.dev1/streamcorpus_pipeline/tests/test_hyperlink_labels.py   streamcorpus_pipeline(Download)
    elapsed_bytes = time.time() - start
 
    assert si1.body.labels['author'][0].offsets.keys() == [OffsetType.BYTES]
 
    ## run it with regex
    for annotator_id in si1.body.labels:
        for label in si1.body.labels[annotator_id]:
            assert OffsetType.BYTES in label.offsets
            byte_labels.add(label.target.target_id)