import re
import csv
 
from django.core.management.base import BaseCommand
from django.conf import settings
from django.db import connection
 
from dcentity.models import Entity
 
 
class Command(BaseCommand):
    args = '<output_file>'
    help = """Sample all entities according to some ruleset.  Requires one
argument: <output_file>, a file to which JSON data containing the sampled
entities will be written.  An optional second argument specifies which sampling
method to use.  It may be one of:
 
    'stratified' (default)
    'uniform'
 
The data will be returned as a CSV file with entity ID's in the first column,
and a few other contextual columns to help with manually verifying that the
file was made correctly.
"""
    def handle(self, *args, **kwargs):
        if len(args) == 0:
            print "Usage: python manage.py wp_sample_entities <output_file>"
            print
            print self.help
            return
 
        self.args = args
 
        outfile = args[0]
        if len(args) > 1:
            method = args[1]
        else:
            method = 'stratified'
 
        methods = {
            'uniform': self.get_uniform_sample,
            'stratified': self.get_stratified_sample,
        }
        samples = methods[method]()
        with open(outfile, 'w') as fh:
            writer = csv.writer(fh)
            for samp in samples:
                # Only id is needed; other fields written for ease of visual
                # inspection
                row = [samp.id, samp.name, samp.type, samp.total]
                writer.writerow(row)
 
    def get_uniform_sample(self, num_samples=300):
        return list(Entity.objects.raw("""
            SELECT m.*, GREATEST(a.contributor_amount, a.recipient_amount) AS total
            FROM matchbox_entity m LEFT JOIN agg_entities a ON m.id=a.entity_id
            WHERE cycle = -1
            ORDER BY random()
            LIMIT """ + str(num_samples)))
 
    def get_stratified_sample(self, samples_per_type=100, amount_divisions=10):
        cursor = connection.cursor()
        samples = []
        for etype in ("individual", "politician", "organization"):
            cursor.execute("""
                SELECT COUNT(*) FROM matchbox_entity m LEFT JOIN agg_entities a ON m.id=a.entity_id
                    WHERE cycle = -1 AND m.type = %s
                """, [etype])
            count = int(cursor.fetchone()[0])
            step = float(count) / amount_divisions
            step_limit = int(step)
            for i in range(amount_divisions):
                step_offset = int(i * step)
                rqs = list(Entity.objects.raw("""
                    SELECT * FROM (
                        SELECT m.*, GREATEST(a.contributor_amount, a.recipient_amount) AS total 
                        FROM matchbox_entity m LEFT JOIN agg_entities a ON m.id=a.entity_id
                        WHERE cycle = -1 AND m.type = %s
                        ORDER BY total LIMIT %i OFFSET %i
                    ) AS sorted ORDER BY random() LIMIT %i
                    """ % ("%s", step_limit, step_offset, amount_divisions),
                    [etype]))
                rqs.sort(key=lambda e: e.total)
                print i, etype, step_limit, step_offset, count, len(rqs)
                samples += rqs
        return samples