Did I find the right examples for you? yes no

All Samples(8)  |  Call(8)  |  Derive(0)  |  Import(0)
Collects a frequency distribution for a single field by reading
the file provided.
Issues:
- has limited checking for wrong number of fields in rec

        def get_field_freq(filename,
                   dialect,
                   field_number,
                   max_freq_size=MAX_FREQ_SIZE_DEFAULT):
    """ Collects a frequency distribution for a single field by reading
        the file provided.
        Issues:
        - has limited checking for wrong number of fields in rec
    """
    freq        = collections.defaultdict(int)
    rec_cnt     = 0
    truncated   = False
    invalid_row_cnt = 0

    for fields in csv.reader(open(filename,'r'), dialect=dialect):
        rec_cnt += 1
        if rec_cnt == 1 and dialect.has_header:
            continue
        try:
            freq[fields[field_number].strip()] += 1
        except IndexError:
            invalid_row_cnt += 1
        if len(freq) >= max_freq_size:
            print '      WARNING: freq dict is too large - will trunc'
            truncated = True
            break

    return freq, truncated, invalid_row_cnt
        


src/d/a/datagristle-0.56/gristle/field_determinator.py   datagristle(Download)
            (self.field_freqs[f_no],
            self.field_trunc[f_no],
            self.field_rows_invalid[f_no]) = miscer.get_field_freq(self.filename,
                                                            self.dialect,
                                                            f_no,

src/d/a/DataGristle-HEAD/gristle/field_determinator.py   DataGristle(Download)
            (self.field_freqs[f_no],
            self.field_trunc[f_no],
            self.field_rows_invalid[f_no]) = miscer.get_field_freq(self.filename,
                                                            self.dialect,
                                                            f_no,

src/d/a/datagristle-0.56/gristle/tests/test_field_misc.py   datagristle(Download)
    def test_misc_b01_truncation(self):
        (freq, trunc_flag, bad_cnt) = mod.get_field_freq(self.test1_fqfn, 
                                                self.dialect,
                                                field_number=0,
                                                max_freq_size=4)
        assert len(freq) == 4
        assert trunc_flag is True
 
    def test_misc_b02(self):
        (freq, trunc_flag, bad_cnt) = mod.get_field_freq(self.test1_fqfn, 
    def test_misc_b03(self):
        (freq, trunc_flag, bad_cnt) = mod.get_field_freq(self.test1_fqfn, 
                                                self.dialect,
                                                field_number=2)
        assert len(freq) == 1  # should be 1 x '' x 200 occurances

src/d/a/DataGristle-HEAD/gristle/tests/test_field_misc.py   DataGristle(Download)
    def test_misc_b01_truncation(self):
        (freq, trunc_flag, bad_cnt) = mod.get_field_freq(self.test1_fqfn, 
                                                self.dialect,
                                                field_number=0,
                                                max_freq_size=4)
        assert len(freq) == 4
        assert trunc_flag is True
 
    def test_misc_b02(self):
        (freq, trunc_flag, bad_cnt) = mod.get_field_freq(self.test1_fqfn, 
    def test_misc_b03(self):
        (freq, trunc_flag, bad_cnt) = mod.get_field_freq(self.test1_fqfn, 
                                                self.dialect,
                                                field_number=2)
        assert len(freq) == 1  # should be 1 x '' x 200 occurances