Questo sito utilizza cookies solo per scopi di autenticazione sul sito e nient'altro. Nessuna informazione personale viene tracciata. Leggi l'informativa sui cookies.
Username: Password: oppure
Bidplaza Statistics - bpreloaddatabase.py

bpreloaddatabase.py

Caricato da: Piero Tofy
Scarica il programma completo

  1. #!/usr/bin/python
  2. #Author: Piero Toffanin
  3.  
  4. #BidPlaza Statistics 1.0
  5.  
  6. #Reload offerts database module
  7.  
  8. import urllib, re, os, string, bprebuildindex
  9.  
  10. bidplazaAuditionUrl = "http://www.bidplaza.it/t_auction_object_ended_view.php?bidauction_id="
  11. bidplazaClosedAuditionsUrl = "http://www.bidplaza.it/t_auctions_ended.php?from="
  12. ClosedAuditionsMultiplier = 40
  13.  
  14. def startDatabaseReloadProcess():
  15.     print "Database reload process started..."
  16.     auditionsIds = getAuditionIDs()
  17.     fetched = []
  18.     for auid in auditionsIds:
  19.         if not auid in fetched:
  20.             fetchAudition(auid)
  21.         fetched.append(auid)
  22.     bprebuildindex.rebuildIndex()
  23.     print "Database reload process completed!"
  24.  
  25. def startDatabaseUpdateProcess():
  26.     print "Database update process started..."
  27.     auditionsIds = getAuditionIDs()
  28.     fetched = []
  29.     for auid in auditionsIds:
  30.         if not auid in fetched and not os.path.exists("odb/" + str(auid)+".dat"):
  31.             fetchAudition(auid)
  32.         fetched.append(auid)
  33.     bprebuildindex.rebuildIndex()
  34.     print "Database update process completed!"
  35.  
  36.  
  37. def fetchAudition(AuditionId):
  38.     print "Fetching data for AuditionId " + str(AuditionId) + "(" + bidplazaAuditionUrl + str(AuditionId) + ")"
  39.     #Fetch data
  40.     data = urllib.urlopen(bidplazaAuditionUrl + str(AuditionId)).read()
  41.  
  42.     #Find audition product name
  43.     audProdName = string.replace(re.findall("<title>([^<]+)</title>",data)[0],"Bidplaza - ","")
  44.  
  45.     #Read regex string from file
  46.     bidplazaOffertsRegex = open("offertsregex.dat").read()
  47.  
  48.     #Find matches
  49.     matches = re.findall(bidplazaOffertsRegex,data)
  50.  
  51.     #Writes them on file
  52.     print "Writing results for AuditionId " + str(AuditionId)
  53.     fd = os.open("odb/" + str(AuditionId)+".dat",os.O_WRONLY | os.O_TRUNC | os.O_CREAT)
  54.  
  55.     #First though we need to find:
  56.     # - Total offerts (real offerts + heuristic ones)
  57.     # - Winning offert (unique and minimum)
  58.     # - Missing data (0 = no, heuristic ones = yes)
  59.     # - Average bet for offert
  60.     # Heuristic = Minimum offert / ((Maximum - Minimum)/Real offerts)
  61.  
  62.     winningoffert = "0"
  63.     for item in matches:
  64.         if item[1] == "1":
  65.             winningoffert = item[0]
  66.             break
  67.     if matches[0][0] == "1":
  68.         heuristic = 0
  69.     else:
  70.         heuristic = int(float(matches[0][0]) / ((float(matches[-1][0]) - float(matches[0][0])) / len(matches)) - 1)
  71.  
  72.     totalOfferts = len(matches) + heuristic
  73.     average = 0
  74.     for item in matches:
  75.         average += int(item[1])
  76.     average = round(float(average) / float(len(matches)),4)
  77.    
  78.     last = 0
  79.     os.write(fd,audProdName + "\n" + str(totalOfferts) + "\n" + winningoffert + "\n" + str(heuristic) + "\n" + str(average) + "\n")
  80.     for offertcount in matches:
  81.         if int(offertcount[0]) > last:
  82.             os.write(fd,offertcount[0] + " " + offertcount[1] + "\n")
  83.             last = int(offertcount[0])
  84.         else:
  85.             #Second column, skip please
  86.             break
  87.     os.close(fd)
  88.  
  89.  
  90.  
  91. def getAuditionIDs():
  92.     c = 0
  93.     validPage = True
  94.     ret = []
  95.     while(validPage):
  96.         print "Fetching IDs from page " + str(c+1) + " ("+str(bidplazaClosedAuditionsUrl + str(ClosedAuditionsMultiplier * c))+")"
  97.         data = urllib.urlopen(bidplazaClosedAuditionsUrl + str(ClosedAuditionsMultiplier * c)).read()
  98.         p = re.findall("t_auction_object_ended_view\.php\?bidauction_id=([0-9]+)",data)
  99.         ret = ret + p
  100.         if len(re.findall("t_auctions_ended\.php\?from=([0-9]+)",data)) == 0:
  101.             validPage = False
  102.         else:
  103.             c+=1
  104.     return ret