#clase pentru cautare
import time
import os
import sys
from sets import Set
from vars import SYS_PATH
os.environ["DJANGO_SETTINGS_MODULE"] = "news.settings"
sys.path.append(SYS_PATH)

from news.stiri.models import Articol

from compute_poisson import LoadPoisson
from create_clusters import LoadClusters
from functions import elimina_sufixe, intersection, list_intersection
from news.utils import get_news_logger

class SearchClusters(object):
    """Dandu-se un query returneaza articolele in care se gasesc toti
    termenii din acel query.
    """
    
    def __init__(self, query):
        self.query = query
        self.logger = get_news_logger()
        self.exact_search = self.is_exact_search()
        self.lista_query = self.get_lista_query(self.query)
        poisson_loader = LoadPoisson()
        self.dict_poisson = poisson_loader.dict_poisson
        self.results = self.search(self.lista_query)
        
    def is_exact_search(self):
        if self.query.startswith('"') and self.query.endswith('"'):
            exact_search = 1
            self.query = self.query.strip('"')
        else:
            exact_search = 0
        return exact_search

    def search(self, lista_query):
        #cautarea propriu-zisa
        return_list = []
        dict_indecsi = {}
        for item in lista_query:
            if self.dict_poisson.has_key(item):
                if self.dict_poisson[item]['lista_indecsi']:
                    dict_indecsi[item] = self.dict_poisson[item]['lista_indecsi']
        index_sets = [Set(item) for item in dict_indecsi.values()]
        if index_sets:
            lista_indecsi = reduce(intersection, index_sets)
        else:
            lista_indecsi = []
        if lista_indecsi:
            #exista o intersectie intre toate cuvintele cautate
            return_list = [item for item in lista_indecsi]
            #daca avem un exact_search, cautam textul exact in articol
            if self.exact_search:
                articole = Articol.objects.filter(text__icontains=self.query, id__in=[int(id) for id in return_list])
                return_list = [articol.id for articol in articole]
        else:
            #intoarcem toate articolele in care apare cel putin unul dintre cuvinte
            for item in dict_indecsi.values():
                return_list.extend(item)
            #daca avem un exact_search, cautam textul exact in articol
            if self.exact_search:
                articole = Articol.objects.filter(text__icontains=self.query, id__in=[int(id) for id in return_list])
                return_list = [articol.id for articol in articole]
        return return_list
    
    def search_to_clusters(self, lista_indecsi):
        #transforma o lista de indecsi, ce sunt rezultat al cautarii, intr-o
        #lista de clustere ce contin acei indecsi
        clusters_loader = LoadClusters()
        dict_temp, dict_temp_clustere = {}, {}
        #cream un reverse dictionary, id_articole -> cluster_info
        for cluster in clusters_loader.lista_clustere:
            ids_text = '_'.join([str(item) for item in cluster['articole_ids']])
            for id in cluster['articole_ids']:
                dict_temp[id] = [ids_text, cluster]
        #resetam toate clusterele la 0
        for cluster_info in dict_temp.values():
            cluster_info[1]['articole_ids'] = []
        for id in lista_indecsi:
            cluster_info = dict_temp.get(id)
            if cluster_info is not None:
                cluster = cluster_info[1]
                cluster['articole_ids'].append(id)
                cluster['lungime'] = len(cluster['articole_ids'])
                dict_temp_clustere[cluster_info[0]] = cluster
        return dict_temp_clustere.values()
    
    def get_lista_query(self, query):
        #intoarce o lista cu cuvintele din query, eliminand si sufixele
        #(deoarece cuvintele din poisson.txt nu au sufixe atasate)
        return [elimina_sufixe(item).lower() for item in query.split()]
    
if __name__ == "__main__":
    from news.stiri.views import order_clusters
    inceput = time.time()
    query = '"Basescu"'
    #query = '"credite ipotecare"'
    search_obj = SearchClusters(query)
    temp = time.time() - inceput
    print 'Inainte de matching %s secunde' %(str(temp))
    results = search_obj.search_to_clusters(search_obj.results)
    results = order_clusters(results)
    final = time.time() - inceput
    print 'Programul a fost executat in %s secunde' %(str(final))
    print 'Procent = %.2f %%' %(temp * 100.0 / final)
