#calculeaza conexiunile dintre diferite cuvinte cheie, asa cum au fost
#salvate in definitiile clusterelor
import time
import string
from datetime import datetime, timedelta
import os
import marshal
import sys
import calendar
from sets import Set
from vars import SYS_PATH
os.environ["DJANGO_SETTINGS_MODULE"] = "news.settings"
sys.path.append(SYS_PATH)

from django.conf import settings
from news.stiri.models import Cluster
from news.utils import get_news_logger
from news.engine.tag_index import LoadTagIndex

class Related(object):
    """Clasa pentru calculul legaturilor dintre diferite cuvinte cheie
    """

    def __init__(self, keyword):
        self.keyword = keyword
        self.limita_cuvinte = 10
        self.cluster_list = self.get_cluster_list(self.keyword)
        self.dict_cuvinte = self.compute_related(self.keyword, self.cluster_list)
        self.related_kws = self.get_most_important(self.dict_cuvinte)

    def get_cluster_list(self, keyword):
        """Obtine lista clusterelor care contin keywordul
        """
        cluster_list = []
        loader = LoadTagIndex([keyword])
        if loader.cluster_ids:
            cluster_list = Cluster.objects.filter(id__in=loader.cluster_ids)
        return cluster_list

    def compute_related(self, keyword, cluster_list):
        """Calculeaza cele mai importante self.limita_cuvinte cuvinte care au legatura cu keyword.
        """
        dict_cuvinte = {}
        for cluster in cluster_list:
            cuvinte = cluster.keys.split('_')
            for i, cuvant in enumerate(cuvinte):
                if cuvant != keyword:
                    if i == 0:
                        punctaj = 10
                    else:
                        punctaj = (10 - i) % 10
                        dict_cuvinte[cuvant] = dict_cuvinte.get(cuvant, 0) + punctaj
                    #dict_cuvinte[cuvant] = dict_cuvinte.get(cuvant, 0) + 1
        return dict_cuvinte

    def get_most_important(self, dict_cuvinte):
        """Obtine cele mai importante self.limita_cuvinte cuvinte
        """
        lista_temp = [[value, key] for key, value in dict_cuvinte.iteritems()]
        lista_temp.sort()
        lista_temp.reverse()
        return [item[1] for item in lista_temp][:10]

if __name__ == '__main__':
    inceput = time.time()
    keyword = 'basescu'
    related_obj = Related(keyword)
    print related_obj.related_kws
    print 'Programul a fost executat in %s secunde' %(str(time.time() - inceput))
