#script de management pentru parsare, indexare si clustering
import sys
import time
import os
from datetime import datetime
from vars import SYS_PATH
os.environ["DJANGO_SETTINGS_MODULE"] = "news.settings"
sys.path.append(SYS_PATH)

from news.stiri.models import Ziar
from news.engine.parsers import ParseZiar, SaveArticole
from news.engine.compute_poisson import ComputePoisson, SavePoisson
from news.engine.create_clusters import CreateCluster, SaveClusters
from news.engine.categorization import CategorizeKNN
from news.engine.tag_index import TagIndex, SaveTagIndex

inceput = time.time()

#pentru inceput parsam ziarele
query = {'parsare__exact': 1}
#query = {'nume__exact': 'Observator cultural'}
lista_ziare = Ziar.objects.filter(**query)
for ziar in lista_ziare:
    print 'Parsam %s' %(str(ziar))
    parser = ParseZiar(ziar)
    SaveArticole(ziar, parser.articole)
    
intermed1 = time.time()
#indexam si calculam greutatea Poisson
compute_obj = ComputePoisson()
dict_cuvinte = compute_obj.proceseaza_articole()
dict_poisson = compute_obj.compute_poisson(dict_cuvinte)
#print 'Am calculat greutatea Poisson in %s secunde' %(str(time.time() - intermed1))

intermed2 = time.time()
#clustering
cluster_limit = 10
cluster_simil = 5
create_cl_obj = CreateCluster(dict_poisson, cluster_limit, cluster_simil)
#print 'Am calculat clustering in %s secunde' %(str(time.time() - intermed2))
#facem categorizarea articolelor
categorizer = CategorizeKNN(create_cl_obj.lista_clustere)
categorized_clusters = categorizer.categorized_clusters

#salvam greutatile Poisson si clusterele calculate
save_obj = SavePoisson(dict_poisson)
#save_obj = SaveClusters(create_cl_obj.lista_clustere)
save_obj = SaveClusters(categorized_clusters)

#cream indexul pentru taguri
indexer_obj = TagIndex(days_ago=35)
saver_obj = SaveTagIndex(indexer_obj.letter_dict)

#print 'Programul a fost executat in %s secunde' %(str(time.time() - inceput))


