blob: f57dc859e62928ce475a8bb83e379cca136b1ea1 [file] [log] [blame]
# getbinpkg.py -- Portage binary-package helper functions
# Copyright 2003-2004 Gentoo Foundation
# Distributed under the terms of the GNU General Public License v2
# $Id$
if not hasattr(__builtins__, "set"):
from sets import Set as set
from output import red, yellow, green
import htmllib,HTMLParser,formatter,sys,os,xpak,time,tempfile,base64,urllib2
try:
import cPickle
except ImportError:
import pickle as cPickle
try:
import ftplib
except SystemExit, e:
raise
except Exception, e:
sys.stderr.write(red("!!! CANNOT IMPORT FTPLIB: ")+str(e)+"\n")
try:
import httplib
except SystemExit, e:
raise
except Exception, e:
sys.stderr.write(red("!!! CANNOT IMPORT HTTPLIB: ")+str(e)+"\n")
def make_metadata_dict(data):
myid,myglob = data
mydict = {}
for x in xpak.getindex_mem(myid):
mydict[x] = xpak.getitem(data,x)
return mydict
class ParseLinks(HTMLParser.HTMLParser):
"""Parser class that overrides HTMLParser to grab all anchors from an html
page and provide suffix and prefix limitors"""
def __init__(self):
self.PL_anchors = []
HTMLParser.HTMLParser.__init__(self)
def get_anchors(self):
return self.PL_anchors
def get_anchors_by_prefix(self,prefix):
newlist = []
for x in self.PL_anchors:
if x.startswith(prefix):
if x not in newlist:
newlist.append(x[:])
return newlist
def get_anchors_by_suffix(self,suffix):
newlist = []
for x in self.PL_anchors:
if x.endswith(suffix):
if x not in newlist:
newlist.append(x[:])
return newlist
def handle_endtag(self,tag):
pass
def handle_starttag(self,tag,attrs):
if tag == "a":
for x in attrs:
if x[0] == 'href':
if x[1] not in self.PL_anchors:
self.PL_anchors.append(urllib2.unquote(x[1]))
def create_conn(baseurl,conn=None):
"""(baseurl,conn) --- Takes a protocol://site:port/address url, and an
optional connection. If connection is already active, it is passed on.
baseurl is reduced to address and is returned in tuple (conn,address)"""
parts = baseurl.split("://",1)
if len(parts) != 2:
raise ValueError, "Provided URL does not contain protocol identifier. '%s'" % baseurl
protocol,url_parts = parts
del parts
url_parts = url_parts.split("/")
host = url_parts[0]
if len(url_parts) < 2:
address = "/"
else:
address = "/"+"/".join(url_parts[1:])
del url_parts
userpass_host = host.split("@",1)
if len(userpass_host) == 1:
host = userpass_host[0]
userpass = ["anonymous"]
else:
host = userpass_host[1]
userpass = userpass_host[0].split(":")
del userpass_host
if len(userpass) > 2:
raise ValueError, "Unable to interpret username/password provided."
elif len(userpass) == 2:
username = userpass[0]
password = userpass[1]
elif len(userpass) == 1:
username = userpass[0]
password = None
del userpass
http_headers = {}
http_params = {}
if username and password:
http_headers = {
"Authorization": "Basic %s" %
base64.encodestring("%s:%s" % (username, password)).replace(
"\012",
""
),
}
if not conn:
if protocol == "https":
conn = httplib.HTTPSConnection(host)
elif protocol == "http":
conn = httplib.HTTPConnection(host)
elif protocol == "ftp":
passive = 1
if(host[-1] == "*"):
passive = 0
host = host[:-1]
conn = ftplib.FTP(host)
if password:
conn.login(username,password)
else:
sys.stderr.write(yellow(" * No password provided for username")+" '"+str(username)+"'\n\n")
conn.login(username)
conn.set_pasv(passive)
conn.set_debuglevel(0)
else:
raise NotImplementedError, "%s is not a supported protocol." % protocol
return (conn,protocol,address, http_params, http_headers)
def make_ftp_request(conn, address, rest=None, dest=None):
"""(conn,address,rest) --- uses the conn object to request the data
from address and issuing a rest if it is passed."""
try:
if dest:
fstart_pos = dest.tell()
conn.voidcmd("TYPE I")
fsize = conn.size(address)
if (rest != None) and (rest < 0):
rest = fsize+int(rest)
if rest < 0:
rest = 0
if rest != None:
mysocket = conn.transfercmd("RETR "+str(address), rest)
else:
mysocket = conn.transfercmd("RETR "+str(address))
mydata = ""
while 1:
somedata = mysocket.recv(8192)
if somedata:
if dest:
dest.write(somedata)
else:
mydata = mydata + somedata
else:
break
if dest:
data_size = fstart_pos - dest.tell()
else:
data_size = len(mydata)
mysocket.close()
conn.voidresp()
conn.voidcmd("TYPE A")
return mydata,not (fsize==data_size),""
except ValueError, e:
return None,int(str(e)[:4]),str(e)
def make_http_request(conn, address, params={}, headers={}, dest=None):
"""(conn,address,params,headers) --- uses the conn object to request
the data from address, performing Location forwarding and using the
optional params and headers."""
rc = 0
response = None
while (rc == 0) or (rc == 301) or (rc == 302):
try:
if (rc != 0):
conn,ignore,ignore,ignore,ignore = create_conn(address)
conn.request("GET", address, params, headers)
except SystemExit, e:
raise
except Exception, e:
return None,None,"Server request failed: "+str(e)
response = conn.getresponse()
rc = response.status
# 301 means that the page address is wrong.
if ((rc == 301) or (rc == 302)):
ignored_data = response.read()
del ignored_data
for x in str(response.msg).split("\n"):
parts = x.split(": ",1)
if parts[0] == "Location":
if (rc == 301):
sys.stderr.write(red("Location has moved: ")+str(parts[1])+"\n")
if (rc == 302):
sys.stderr.write(red("Location has temporarily moved: ")+str(parts[1])+"\n")
address = parts[1]
break
if (rc != 200) and (rc != 206):
sys.stderr.write(str(response.msg)+"\n")
sys.stderr.write(response.read()+"\n")
sys.stderr.write("address: "+address+"\n")
return None,rc,"Server did not respond successfully ("+str(response.status)+": "+str(response.reason)+")"
if dest:
dest.write(response.read())
return "",0,""
return response.read(),0,""
def match_in_array(array, prefix="", suffix="", match_both=1, allow_overlap=0):
myarray = []
if not (prefix and suffix):
match_both = 0
for x in array:
add_p = 0
if prefix and (len(x) >= len(prefix)) and (x[:len(prefix)] == prefix):
add_p = 1
if match_both:
if prefix and not add_p: # Require both, but don't have first one.
continue
else:
if add_p: # Only need one, and we have it.
myarray.append(x[:])
continue
if not allow_overlap: # Not allow to overlap prefix and suffix
if len(x) >= (len(prefix)+len(suffix)):
y = x[len(prefix):]
else:
continue # Too short to match.
else:
y = x # Do whatever... We're overlapping.
if suffix and (len(x) >= len(suffix)) and (x[-len(suffix):] == suffix):
myarray.append(x) # It matches
else:
continue # Doesn't match.
return myarray
def dir_get_list(baseurl,conn=None):
"""(baseurl[,connection]) -- Takes a base url to connect to and read from.
URL should be in the for <proto>://<site>[:port]<path>
Connection is used for persistent connection instances."""
if not conn:
keepconnection = 0
else:
keepconnection = 1
conn,protocol,address,params,headers = create_conn(baseurl, conn)
listing = None
if protocol in ["http","https"]:
page,rc,msg = make_http_request(conn,address,params,headers)
if page:
parser = ParseLinks()
parser.feed(page)
del page
listing = parser.get_anchors()
else:
raise Exception, "Unable to get listing: %s %s" % (rc,msg)
elif protocol in ["ftp"]:
if address[-1] == '/':
olddir = conn.pwd()
conn.cwd(address)
listing = conn.nlst()
conn.cwd(olddir)
del olddir
else:
listing = conn.nlst(address)
else:
raise TypeError, "Unknown protocol. '%s'" % protocol
if not keepconnection:
conn.close()
return listing
def file_get_metadata(baseurl,conn=None, chunk_size=3000):
"""(baseurl[,connection]) -- Takes a base url to connect to and read from.
URL should be in the for <proto>://<site>[:port]<path>
Connection is used for persistent connection instances."""
if not conn:
keepconnection = 0
else:
keepconnection = 1
conn,protocol,address,params,headers = create_conn(baseurl, conn)
if protocol in ["http","https"]:
headers["Range"] = "bytes=-"+str(chunk_size)
data,rc,msg = make_http_request(conn, address, params, headers)
elif protocol in ["ftp"]:
data,rc,msg = make_ftp_request(conn, address, -chunk_size)
else:
raise TypeError, "Unknown protocol. '%s'" % protocol
if data:
xpaksize = xpak.decodeint(data[-8:-4])
if (xpaksize+8) > chunk_size:
myid = file_get_metadata(baseurl, conn, (xpaksize+8))
if not keepconnection:
conn.close()
return myid
else:
xpak_data = data[len(data)-(xpaksize+8):-8]
del data
myid = xpak.xsplit_mem(xpak_data)
if not myid:
myid = None,None
del xpak_data
else:
myid = None,None
if not keepconnection:
conn.close()
return myid
def file_get(baseurl,dest,conn=None,fcmd=None):
"""(baseurl,dest,fcmd=) -- Takes a base url to connect to and read from.
URL should be in the for <proto>://[user[:pass]@]<site>[:port]<path>"""
if not fcmd:
return file_get_lib(baseurl,dest,conn)
fcmd = fcmd.replace("${DISTDIR}",dest)
fcmd = fcmd.replace("${URI}", baseurl)
fcmd = fcmd.replace("${FILE}", os.path.basename(baseurl))
mysplit = fcmd.split()
mycmd = mysplit[0]
myargs = [os.path.basename(mycmd)]+mysplit[1:]
mypid=os.fork()
if mypid == 0:
try:
os.execv(mycmd,myargs)
except OSError:
pass
sys.stderr.write("!!! Failed to spawn fetcher.\n")
sys.stderr.flush()
os._exit(1)
retval=os.waitpid(mypid,0)[1]
if (retval & 0xff) == 0:
retval = retval >> 8
else:
sys.stderr.write("Spawned processes caught a signal.\n")
sys.exit(1)
if retval != 0:
sys.stderr.write("Fetcher exited with a failure condition.\n")
return 0
return 1
def file_get_lib(baseurl,dest,conn=None):
"""(baseurl[,connection]) -- Takes a base url to connect to and read from.
URL should be in the for <proto>://<site>[:port]<path>
Connection is used for persistent connection instances."""
if not conn:
keepconnection = 0
else:
keepconnection = 1
conn,protocol,address,params,headers = create_conn(baseurl, conn)
sys.stderr.write("Fetching '"+str(os.path.basename(address)+"'\n"))
if protocol in ["http","https"]:
data,rc,msg = make_http_request(conn, address, params, headers, dest=dest)
elif protocol in ["ftp"]:
data,rc,msg = make_ftp_request(conn, address, dest=dest)
else:
raise TypeError, "Unknown protocol. '%s'" % protocol
if not keepconnection:
conn.close()
return rc
def dir_get_metadata(baseurl, conn=None, chunk_size=3000, verbose=1, usingcache=1, makepickle=None):
"""(baseurl,conn,chunk_size,verbose) --
"""
if not conn:
keepconnection = 0
else:
keepconnection = 1
if makepickle is None:
makepickle = "/var/cache/edb/metadata.idx.most_recent"
conn,protocol,address,params,headers = create_conn(baseurl, conn)
filedict = {}
try:
metadatafile = open("/var/cache/edb/remote_metadata.pickle")
metadata = cPickle.load(metadatafile)
sys.stderr.write("Loaded metadata pickle.\n")
metadatafile.close()
except (cPickle.UnpicklingError, OSError, IOError, EOFError):
metadata = {}
if not metadata.has_key(baseurl):
metadata[baseurl]={}
if not metadata[baseurl].has_key("indexname"):
metadata[baseurl]["indexname"]=""
if not metadata[baseurl].has_key("timestamp"):
metadata[baseurl]["timestamp"]=0
if not metadata[baseurl].has_key("unmodified"):
metadata[baseurl]["unmodified"]=0
if not metadata[baseurl].has_key("data"):
metadata[baseurl]["data"]={}
filelist = dir_get_list(baseurl, conn)
tbz2list = match_in_array(filelist, suffix=".tbz2")
metalist = match_in_array(filelist, prefix="metadata.idx")
del filelist
# Determine if our metadata file is current.
metalist.sort()
metalist.reverse() # makes the order new-to-old.
havecache=0
for mfile in metalist:
if usingcache and \
((metadata[baseurl]["indexname"] != mfile) or \
(metadata[baseurl]["timestamp"] < int(time.time()-(60*60*24)))):
# Try to download new cache until we succeed on one.
data=""
for trynum in [1,2,3]:
mytempfile = tempfile.TemporaryFile()
try:
file_get(baseurl+"/"+mfile, mytempfile, conn)
if mytempfile.tell() > len(data):
mytempfile.seek(0)
data = mytempfile.read()
except ValueError, e:
sys.stderr.write("--- "+str(e)+"\n")
if trynum < 3:
sys.stderr.write("Retrying...\n")
mytempfile.close()
continue
if match_in_array([mfile],suffix=".gz"):
sys.stderr.write("gzip'd\n")
try:
import gzip
mytempfile.seek(0)
gzindex = gzip.GzipFile(mfile[:-3],'rb',9,mytempfile)
data = gzindex.read()
except SystemExit, e:
raise
except Exception, e:
mytempfile.close()
sys.stderr.write("!!! Failed to use gzip: "+str(e)+"\n")
mytempfile.close()
try:
metadata[baseurl]["data"] = cPickle.loads(data)
del data
metadata[baseurl]["indexname"] = mfile
metadata[baseurl]["timestamp"] = int(time.time())
metadata[baseurl]["modified"] = 0 # It's not, right after download.
sys.stderr.write("Pickle loaded.\n")
break
except SystemExit, e:
raise
except Exception, e:
sys.stderr.write("!!! Failed to read data from index: "+str(mfile)+"\n")
sys.stderr.write("!!! "+str(e)+"\n")
try:
metadatafile = open("/var/cache/edb/remote_metadata.pickle", "w+")
cPickle.dump(metadata,metadatafile)
metadatafile.close()
except SystemExit, e:
raise
except Exception, e:
sys.stderr.write("!!! Failed to write binary metadata to disk!\n")
sys.stderr.write("!!! "+str(e)+"\n")
break
# We may have metadata... now we run through the tbz2 list and check.
sys.stderr.write(yellow("cache miss: 'x'")+" --- "+green("cache hit: 'o'")+"\n")
binpkg_filenames = set()
for x in tbz2list:
x = os.path.basename(x)
binpkg_filenames.add(x)
if ((not metadata[baseurl]["data"].has_key(x)) or \
(x not in metadata[baseurl]["data"].keys())):
sys.stderr.write(yellow("x"))
metadata[baseurl]["modified"] = 1
myid = None
for retry in xrange(3):
try:
myid = file_get_metadata(
"/".join((baseurl.rstrip("/"), x.lstrip("/"))),
conn, chunk_size)
break
except httplib.BadStatusLine:
# Sometimes this error is thrown from conn.getresponse() in
# make_http_request(). The docstring for this error in
# httplib.py says "Presumably, the server closed the
# connection before sending a valid response".
conn, protocol, address, params, headers = create_conn(
baseurl)
if myid and myid[0]:
metadata[baseurl]["data"][x] = make_metadata_dict(myid)
elif verbose:
sys.stderr.write(red("!!! Failed to retrieve metadata on: ")+str(x)+"\n")
else:
sys.stderr.write(green("o"))
# Cleanse stale cache for files that don't exist on the server anymore.
stale_cache = set(metadata[baseurl]["data"]).difference(binpkg_filenames)
if stale_cache:
for x in stale_cache:
del metadata[baseurl]["data"][x]
metadata[baseurl]["modified"] = 1
del stale_cache
del binpkg_filenames
sys.stderr.write("\n")
try:
if metadata[baseurl].has_key("modified") and metadata[baseurl]["modified"]:
metadata[baseurl]["timestamp"] = int(time.time())
metadatafile = open("/var/cache/edb/remote_metadata.pickle", "w+")
cPickle.dump(metadata,metadatafile)
metadatafile.close()
if makepickle:
metadatafile = open(makepickle, "w")
cPickle.dump(metadata[baseurl]["data"],metadatafile)
metadatafile.close()
except SystemExit, e:
raise
except Exception, e:
sys.stderr.write("!!! Failed to write binary metadata to disk!\n")
sys.stderr.write("!!! "+str(e)+"\n")
if not keepconnection:
conn.close()
return metadata[baseurl]["data"]