#!/usr/bin/python
#Author: Piero Toffanin
#BidPlaza Statistics 1.0
#Reload offerts database module
import urllib, re, os, string, bprebuildindex
bidplazaAuditionUrl = "http://www.bidplaza.it/t_auction_object_ended_view.php?bidauction_id="
bidplazaClosedAuditionsUrl = "http://www.bidplaza.it/t_auctions_ended.php?from="
ClosedAuditionsMultiplier = 40
def startDatabaseReloadProcess():
print "Database reload process started..."
auditionsIds = getAuditionIDs()
fetched = []
for auid in auditionsIds:
if not auid in fetched:
fetchAudition(auid)
fetched.append(auid)
bprebuildindex.rebuildIndex()
print "Database reload process completed!"
def startDatabaseUpdateProcess():
print "Database update process started..."
auditionsIds = getAuditionIDs()
fetched = []
for auid in auditionsIds:
if not auid in fetched and not os.path.exists("odb/" + str(auid)+".dat"):
fetchAudition(auid)
fetched.append(auid)
bprebuildindex.rebuildIndex()
print "Database update process completed!"
def fetchAudition(AuditionId):
print "Fetching data for AuditionId " + str(AuditionId) + "(" + bidplazaAuditionUrl + str(AuditionId) + ")"
#Fetch data
data = urllib.urlopen(bidplazaAuditionUrl + str(AuditionId)).read()
#Find audition product name
audProdName = string.replace(re.findall("<title>([^<]+)</title>",data)[0],"Bidplaza - ","")
#Read regex string from file
bidplazaOffertsRegex = open("offertsregex.dat").read()
#Find matches
matches = re.findall(bidplazaOffertsRegex,data)
#Writes them on file
print "Writing results for AuditionId " + str(AuditionId)
fd = os.open("odb/" + str(AuditionId)+".dat",os.O_WRONLY | os.O_TRUNC | os.O_CREAT)
#First though we need to find:
# - Total offerts (real offerts + heuristic ones)
# - Winning offert (unique and minimum)
# - Missing data (0 = no, heuristic ones = yes)
# - Average bet for offert
# Heuristic = Minimum offert / ((Maximum - Minimum)/Real offerts)
winningoffert = "0"
for item in matches:
if item[1] == "1":
winningoffert = item[0]
break
if matches[0][0] == "1":
heuristic = 0
else:
heuristic = int(float(matches[0][0]) / ((float(matches[-1][0]) - float(matches[0][0])) / len(matches)) - 1)
totalOfferts = len(matches) + heuristic
average = 0
for item in matches:
average += int(item[1])
average = round(float(average) / float(len(matches)),4)
last = 0
os.write(fd,audProdName + "\n" + str(totalOfferts) + "\n" + winningoffert + "\n" + str(heuristic) + "\n" + str(average) + "\n")
for offertcount in matches:
if int(offertcount[0]) > last:
os.write(fd,offertcount[0] + " " + offertcount[1] + "\n")
last = int(offertcount[0])
else:
#Second column, skip please
break
os.close(fd)
def getAuditionIDs():
c = 0
validPage = True
ret = []
while(validPage):
print "Fetching IDs from page " + str(c+1) + " ("+str(bidplazaClosedAuditionsUrl + str(ClosedAuditionsMultiplier * c))+")"
data = urllib.urlopen(bidplazaClosedAuditionsUrl + str(ClosedAuditionsMultiplier * c)).read()
p = re.findall("t_auction_object_ended_view\.php\?bidauction_id=([0-9]+)",data)
ret = ret + p
if len(re.findall("t_auctions_ended\.php\?from=([0-9]+)",data)) == 0:
validPage = False
else:
c+=1
return ret