#creaza si updateaza index-ul pentru taguri
import time
from datetime import datetime, timedelta
import os
import marshal
import sys
import string
from sets import Set
from vars import SYS_PATH
os.environ["DJANGO_SETTINGS_MODULE"] = "news.settings"
sys.path.append(SYS_PATH)

from django.conf import settings
from news.stiri.models import Cluster
from news.engine.functions import list_intersection

class TagIndex(object):
    """Clasa pentru crearea si updatarea index-ului pentru taguri.
    """

    def __init__(self, days_ago=35):
        self.days_ago = days_ago
        self.now = datetime.now()
        self.start_days_ago = self.now - timedelta(days=self.days_ago)
        self.clusters = self.get_clusters()
        self.letter_dict= self.get_letter_dict(self.clusters)

    def get_clusters(self):
        """Obtine clusterele pe baza carora vom alcatui indexul
        """
        return Cluster.objects.filter(data_articol__gte=self.start_days_ago)

    def get_letter_dict(self, clusters):
        """Obtine un dictionar al literelor de forma
        dictionar['a'] = {'arab': [2323, 2312], 'aroman': [23123, 43423]}
        """
        letter_dict, dupl_dict = {}, {}
        for cluster in clusters:
            keys_list = [key for key in cluster.keys.split('_') if key and key[0] in string.ascii_lowercase]
            for key in keys_list:
                #nu vrem sa salvam doua sau mai multe clustere ce au acelasi cluster.ids
                insert_key = 0
                if letter_dict.has_key(key[0]):
                    if letter_dict[key[0]].has_key(key):
                        if dupl_dict.has_key(key):
                            if cluster.ids not in dupl_dict[key]:
                                letter_dict[key[0]][key].append(cluster.id)
                                insert_key = 1
                        else:
                            letter_dict[key[0]][key].append(cluster.id)
                            insert_key = 1
                    else:
                        letter_dict[key[0]][key] = [cluster.id]
                        insert_key = 1
                else:
                    letter_dict[key[0]] = {key: [cluster.id]}
                    insert_key = 1
                if insert_key == 1:
                    if dupl_dict.has_key(key):
                        dupl_dict[key].append(cluster.ids)
                    else:
                        dupl_dict[key] = [cluster.ids]
        return letter_dict

class SaveTagIndex:
    """Salveaza indexul pentru taguri.""" 
    
    def __init__(self, letter_dict):
        self.letter_dict = letter_dict
        self.root = settings.ENGINE_ROOT
        self.save_letter_dict(self.letter_dict)

    def save_letter_dict(self, letter_dict):
        """Salveza clusterele in care se gasesc tagurile in
        fisiere de forma a.txt, b.txt, c.txt etc
        """
        for letter, values in letter_dict.iteritems():
            try:
                file = open(self.root + 'tag_files/%s.txt' %(letter), 'w+')
            except IOError:
                print 'Nu am putut deschide fisierul %stag_files/%s.txt' %(self.root, letter)
            else:
                self.save_letter(file, values)
                file.close()
            
    def save_letter(self, file, values):
        """Salveaza in tag_files/a.txt valorile de tipul {'abrud': [23, 213], 'arges': [3273, 1212]}
        """
        file.truncate(0)
        marshal.dump(values, file)

class LoadTagIndex:
    """Clasa ID-urile clusterelor care contin cuvinte ce incep cu o anumita
       litera a alfabetului/"""
    
    def __init__(self, tag_list, operation='intersection'):
        self.tag_list, self.operation = tag_list, operation
        self.root = settings.ENGINE_ROOT
        self.file_dict, self.loader_dict = {}, {}
        self.tag_list = [tag for tag in self.tag_list if tag and tag[0] in string.ascii_lowercase]
        self.set_file_dict(self.tag_list)
        self.set_loader_dict(self.tag_list)
        self.cluster_ids = self.get_cluster_ids(self.tag_list)

    def set_file_dict(self, tag_list):
        """Deschide fisierele de forma 'a.txt' etc, si salveaza
            handlerele intr-un dictionar.
        """
        for tag in tag_list:
            if not self.file_dict.has_key(tag[0]):
                try:
                    file = open(self.root + 'tag_files/%s.txt' %(tag[0]), 'r')
                except IOError:
                    print 'Nu am putut deschide fisierul %stag_files/%s.txt' %(self.root, tag[0])
                else:
                    self.file_dict[tag[0]] = file

    def set_loader_dict(self, tag_list):
        """Seteaza loaderele pentru diferite litere.
        """
        for tag in tag_list:
            if not self.loader_dict.has_key(tag[0]) and self.file_dict.has_key(tag[0]):
                letter_dict = marshal.load(self.file_dict[tag[0]])
                self.loader_dict[tag[0]] = letter_dict

    def get_cluster_ids(self, tag_list):
        """Obtine id-urile clusterelor care contin tagurile din tag_list.
        """
        cluster_ids = []
        for tag in tag_list:
            handler = self.file_dict.get(tag[0])
            if handler is not None:
                letter_dict = self.loader_dict.get(tag[0])
                if letter_dict is not None:
                    ids = letter_dict.get(tag, [])
                    if self.operation == 'union':
                        cluster_ids.extend(ids)
                    elif self.operation == 'intersection':
                        if not cluster_ids:
                            cluster_ids = ids
                        else:
                            cluster_ids = list_intersection(cluster_ids, ids)
                    else:
                        #poate in viitor sa facem si alte operatii?
                        pass
        if self.operation == 'union':
            temp_dict = {}
            #eliminam duplicatele
            for id in cluster_ids:
                temp_dict[id] = 1
            cluster_ids = temp_dict.keys()
        return cluster_ids

    def get_letter_values(self, letter):
        return_values = None
        try:
            file = open(self.root + 'tag_files/%s.txt' %(letter), 'r')
        except IOError:
            print 'Nu am putut deschide fisierul %stag_files/%s.txt' %(self.root, letter)
        else:
            return_values = marshal.load(file)
            file.close()
        return return_values

if __name__ == '__main__':
    inceput = time.time()
    indexer_obj = TagIndex(days_ago=35)
    saver_obj = SaveTagIndex(indexer_obj.letter_dict)
    #tag_list = ['basescu', 'bucuresti']
    """tag_list = ['iliescu']
    loader = LoadTagIndex(tag_list, operation="intersection")
    if loader.cluster_ids:
        print loader.cluster_ids
        #cluster_list = clusters.get_list(id__in=loader.cluster_ids)
        #print len(cluster_list)"""
    print 'Programul a fost executat in %s secunde' %(str(time.time() - inceput))
