blob: 1470076592a92dd99b53fba60a86fcab61803e98 [file] [log] [blame]
# getbinpkg.py -- Portage binary-package helper functions
# Copyright 2003-2004 Gentoo Foundation
# Distributed under the terms of the GNU General Public License v2
# $Id$
from portage.output import red, yellow, green
import portage.xpak
import HTMLParser
import sys
import os
import time
import tempfile
import base64
import urllib2
try:
import cPickle
except ImportError:
import pickle as cPickle
try:
import ftplib
except ImportError, e:
sys.stderr.write(red("!!! CANNOT IMPORT FTPLIB: ")+str(e)+"\n")
try:
import httplib
except ImportError, e:
sys.stderr.write(red("!!! CANNOT IMPORT HTTPLIB: ")+str(e)+"\n")
def make_metadata_dict(data):
myid,myglob = data
mydict = {}
for x in portage.xpak.getindex_mem(myid):
mydict[x] = portage.xpak.getitem(data,x)
return mydict
class ParseLinks(HTMLParser.HTMLParser):
"""Parser class that overrides HTMLParser to grab all anchors from an html
page and provide suffix and prefix limitors"""
def __init__(self):
self.PL_anchors = []
HTMLParser.HTMLParser.__init__(self)
def get_anchors(self):
return self.PL_anchors
def get_anchors_by_prefix(self,prefix):
newlist = []
for x in self.PL_anchors:
if x.startswith(prefix):
if x not in newlist:
newlist.append(x[:])
return newlist
def get_anchors_by_suffix(self,suffix):
newlist = []
for x in self.PL_anchors:
if x.endswith(suffix):
if x not in newlist:
newlist.append(x[:])
return newlist
def handle_endtag(self,tag):
pass
def handle_starttag(self,tag,attrs):
if tag == "a":
for x in attrs:
if x[0] == 'href':
if x[1] not in self.PL_anchors:
self.PL_anchors.append(urllib2.unquote(x[1]))
def create_conn(baseurl,conn=None):
"""(baseurl,conn) --- Takes a protocol://site:port/address url, and an
optional connection. If connection is already active, it is passed on.
baseurl is reduced to address and is returned in tuple (conn,address)"""
parts = baseurl.split("://",1)
if len(parts) != 2:
raise ValueError, "Provided URL does not contain protocol identifier. '%s'" % baseurl
protocol,url_parts = parts
del parts
url_parts = url_parts.split("/")
host = url_parts[0]
if len(url_parts) < 2:
address = "/"
else:
address = "/"+"/".join(url_parts[1:])
del url_parts
userpass_host = host.split("@",1)
if len(userpass_host) == 1:
host = userpass_host[0]
userpass = ["anonymous"]
else:
host = userpass_host[1]
userpass = userpass_host[0].split(":")
del userpass_host
if len(userpass) > 2:
raise ValueError, "Unable to interpret username/password provided."
elif len(userpass) == 2:
username = userpass[0]
password = userpass[1]
elif len(userpass) == 1:
username = userpass[0]
password = None
del userpass
http_headers = {}
http_params = {}
if username and password:
http_headers = {
"Authorization": "Basic %s" %
base64.encodestring("%s:%s" % (username, password)).replace(
"\012",
""
),
}
if not conn:
if protocol == "https":
conn = httplib.HTTPSConnection(host)
elif protocol == "http":
conn = httplib.HTTPConnection(host)
elif protocol == "ftp":
passive = 1
if(host[-1] == "*"):
passive = 0
host = host[:-1]
conn = ftplib.FTP(host)
if password:
conn.login(username,password)
else:
sys.stderr.write(yellow(" * No password provided for username")+" '"+str(username)+"'\n\n")
conn.login(username)
conn.set_pasv(passive)
conn.set_debuglevel(0)
elif protocol == "sftp":
try:
import paramiko
except ImportError:
raise NotImplementedError(
"paramiko must be installed for sftp support")
t = paramiko.Transport(host)
t.connect(username=username, password=password)
conn = paramiko.SFTPClient.from_transport(t)
else:
raise NotImplementedError, "%s is not a supported protocol." % protocol
return (conn,protocol,address, http_params, http_headers)
def make_ftp_request(conn, address, rest=None, dest=None):
"""(conn,address,rest) --- uses the conn object to request the data
from address and issuing a rest if it is passed."""
try:
if dest:
fstart_pos = dest.tell()
conn.voidcmd("TYPE I")
fsize = conn.size(address)
if (rest != None) and (rest < 0):
rest = fsize+int(rest)
if rest < 0:
rest = 0
if rest != None:
mysocket = conn.transfercmd("RETR "+str(address), rest)
else:
mysocket = conn.transfercmd("RETR "+str(address))
mydata = ""
while 1:
somedata = mysocket.recv(8192)
if somedata:
if dest:
dest.write(somedata)
else:
mydata = mydata + somedata
else:
break
if dest:
data_size = fstart_pos - dest.tell()
else:
data_size = len(mydata)
mysocket.close()
conn.voidresp()
conn.voidcmd("TYPE A")
return mydata,not (fsize==data_size),""
except ValueError, e:
return None,int(str(e)[:4]),str(e)
def make_http_request(conn, address, params={}, headers={}, dest=None):
"""(conn,address,params,headers) --- uses the conn object to request
the data from address, performing Location forwarding and using the
optional params and headers."""
rc = 0
response = None
while (rc == 0) or (rc == 301) or (rc == 302):
try:
if (rc != 0):
conn,ignore,ignore,ignore,ignore = create_conn(address)
conn.request("GET", address, params, headers)
except SystemExit, e:
raise
except Exception, e:
return None,None,"Server request failed: "+str(e)
response = conn.getresponse()
rc = response.status
# 301 means that the page address is wrong.
if ((rc == 301) or (rc == 302)):
ignored_data = response.read()
del ignored_data
for x in str(response.msg).split("\n"):
parts = x.split(": ",1)
if parts[0] == "Location":
if (rc == 301):
sys.stderr.write(red("Location has moved: ")+str(parts[1])+"\n")
if (rc == 302):
sys.stderr.write(red("Location has temporarily moved: ")+str(parts[1])+"\n")
address = parts[1]
break
if (rc != 200) and (rc != 206):
return None,rc,"Server did not respond successfully ("+str(response.status)+": "+str(response.reason)+")"
if dest:
dest.write(response.read())
return "",0,""
return response.read(),0,""
def match_in_array(array, prefix="", suffix="", match_both=1, allow_overlap=0):
myarray = []
if not (prefix and suffix):
match_both = 0
for x in array:
add_p = 0
if prefix and (len(x) >= len(prefix)) and (x[:len(prefix)] == prefix):
add_p = 1
if match_both:
if prefix and not add_p: # Require both, but don't have first one.
continue
else:
if add_p: # Only need one, and we have it.
myarray.append(x[:])
continue
if not allow_overlap: # Not allow to overlap prefix and suffix
if len(x) >= (len(prefix)+len(suffix)):
pass
else:
continue # Too short to match.
else:
pass # Do whatever... We're overlapping.
if suffix and (len(x) >= len(suffix)) and (x[-len(suffix):] == suffix):
myarray.append(x) # It matches
else:
continue # Doesn't match.
return myarray
def dir_get_list(baseurl,conn=None):
"""(baseurl[,connection]) -- Takes a base url to connect to and read from.
URL should be in the for <proto>://<site>[:port]<path>
Connection is used for persistent connection instances."""
if not conn:
keepconnection = 0
else:
keepconnection = 1
conn,protocol,address,params,headers = create_conn(baseurl, conn)
listing = None
if protocol in ["http","https"]:
if not address.endswith("/"):
# http servers can return a 400 error here
# if the address doesn't end with a slash.
address += "/"
page,rc,msg = make_http_request(conn,address,params,headers)
if page:
parser = ParseLinks()
parser.feed(page)
del page
listing = parser.get_anchors()
else:
import portage.exception
raise portage.exception.PortageException(
"Unable to get listing: %s %s" % (rc,msg))
elif protocol in ["ftp"]:
if address[-1] == '/':
olddir = conn.pwd()
conn.cwd(address)
listing = conn.nlst()
conn.cwd(olddir)
del olddir
else:
listing = conn.nlst(address)
elif protocol == "sftp":
listing = conn.listdir(address)
else:
raise TypeError, "Unknown protocol. '%s'" % protocol
if not keepconnection:
conn.close()
return listing
def file_get_metadata(baseurl,conn=None, chunk_size=3000):
"""(baseurl[,connection]) -- Takes a base url to connect to and read from.
URL should be in the for <proto>://<site>[:port]<path>
Connection is used for persistent connection instances."""
if not conn:
keepconnection = 0
else:
keepconnection = 1
conn,protocol,address,params,headers = create_conn(baseurl, conn)
if protocol in ["http","https"]:
headers["Range"] = "bytes=-"+str(chunk_size)
data,rc,msg = make_http_request(conn, address, params, headers)
elif protocol in ["ftp"]:
data,rc,msg = make_ftp_request(conn, address, -chunk_size)
elif protocol == "sftp":
f = conn.open(address)
try:
f.seek(-chunk_size, 2)
data = f.read()
finally:
f.close()
else:
raise TypeError, "Unknown protocol. '%s'" % protocol
if data:
xpaksize = portage.xpak.decodeint(data[-8:-4])
if (xpaksize+8) > chunk_size:
myid = file_get_metadata(baseurl, conn, (xpaksize+8))
if not keepconnection:
conn.close()
return myid
else:
xpak_data = data[len(data)-(xpaksize+8):-8]
del data
myid = portage.xpak.xsplit_mem(xpak_data)
if not myid:
myid = None,None
del xpak_data
else:
myid = None,None
if not keepconnection:
conn.close()
return myid
def file_get(baseurl,dest,conn=None,fcmd=None):
"""(baseurl,dest,fcmd=) -- Takes a base url to connect to and read from.
URL should be in the for <proto>://[user[:pass]@]<site>[:port]<path>"""
if not fcmd:
return file_get_lib(baseurl,dest,conn)
variables = {
"DISTDIR": dest,
"URI": baseurl,
"FILE": os.path.basename(baseurl)
}
import shlex, StringIO
from portage.util import varexpand
from portage.process import spawn
lexer = shlex.shlex(StringIO.StringIO(fcmd), posix=True)
lexer.whitespace_split = True
myfetch = [varexpand(x, mydict=variables) for x in lexer]
retval = spawn(myfetch, env=os.environ.copy())
if retval != os.EX_OK:
sys.stderr.write("Fetcher exited with a failure condition.\n")
return 0
return 1
def file_get_lib(baseurl,dest,conn=None):
"""(baseurl[,connection]) -- Takes a base url to connect to and read from.
URL should be in the for <proto>://<site>[:port]<path>
Connection is used for persistent connection instances."""
if not conn:
keepconnection = 0
else:
keepconnection = 1
conn,protocol,address,params,headers = create_conn(baseurl, conn)
sys.stderr.write("Fetching '"+str(os.path.basename(address)+"'\n"))
if protocol in ["http","https"]:
data,rc,msg = make_http_request(conn, address, params, headers, dest=dest)
elif protocol in ["ftp"]:
data,rc,msg = make_ftp_request(conn, address, dest=dest)
elif protocol == "sftp":
rc = 0
try:
f = conn.open(address)
except SystemExit:
raise
except Exception:
rc = 1
else:
try:
if dest:
bufsize = 8192
while True:
data = f.read(bufsize)
if not data:
break
dest.write(data)
finally:
f.close()
else:
raise TypeError, "Unknown protocol. '%s'" % protocol
if not keepconnection:
conn.close()
return rc
def dir_get_metadata(baseurl, conn=None, chunk_size=3000, verbose=1, usingcache=1, makepickle=None):
"""(baseurl,conn,chunk_size,verbose) --
"""
if not conn:
keepconnection = 0
else:
keepconnection = 1
cache_path = "/var/cache/edb"
if makepickle is None:
makepickle = "/var/cache/edb/metadata.idx.most_recent"
conn,protocol,address,params,headers = create_conn(baseurl, conn)
out = sys.stdout
try:
metadatafile = open("/var/cache/edb/remote_metadata.pickle")
metadata = cPickle.load(metadatafile)
out.write("Loaded metadata pickle.\n")
out.flush()
metadatafile.close()
except (cPickle.UnpicklingError, OSError, IOError, EOFError):
metadata = {}
if not metadata.has_key(baseurl):
metadata[baseurl]={}
if not metadata[baseurl].has_key("indexname"):
metadata[baseurl]["indexname"]=""
if not metadata[baseurl].has_key("timestamp"):
metadata[baseurl]["timestamp"]=0
if not metadata[baseurl].has_key("unmodified"):
metadata[baseurl]["unmodified"]=0
if not metadata[baseurl].has_key("data"):
metadata[baseurl]["data"]={}
if not os.access(cache_path, os.W_OK):
sys.stderr.write("!!! Unable to write binary metadata to disk!\n")
sys.stderr.write("!!! Permission denied: '%s'\n" % cache_path)
return metadata[baseurl]["data"]
import portage.exception
try:
filelist = dir_get_list(baseurl, conn)
except portage.exception.PortageException, e:
sys.stderr.write("!!! Error connecting to '%s'.\n" % baseurl)
sys.stderr.write("!!! %s\n" % str(e))
del e
return metadata[baseurl]["data"]
tbz2list = match_in_array(filelist, suffix=".tbz2")
metalist = match_in_array(filelist, prefix="metadata.idx")
del filelist
# Determine if our metadata file is current.
metalist.sort()
metalist.reverse() # makes the order new-to-old.
for mfile in metalist:
if usingcache and \
((metadata[baseurl]["indexname"] != mfile) or \
(metadata[baseurl]["timestamp"] < int(time.time()-(60*60*24)))):
# Try to download new cache until we succeed on one.
data=""
for trynum in [1,2,3]:
mytempfile = tempfile.TemporaryFile()
try:
file_get(baseurl+"/"+mfile, mytempfile, conn)
if mytempfile.tell() > len(data):
mytempfile.seek(0)
data = mytempfile.read()
except ValueError, e:
sys.stderr.write("--- "+str(e)+"\n")
if trynum < 3:
sys.stderr.write("Retrying...\n")
sys.stderr.flush()
mytempfile.close()
continue
if match_in_array([mfile],suffix=".gz"):
out.write("gzip'd\n")
out.flush()
try:
import gzip
mytempfile.seek(0)
gzindex = gzip.GzipFile(mfile[:-3],'rb',9,mytempfile)
data = gzindex.read()
except SystemExit, e:
raise
except Exception, e:
mytempfile.close()
sys.stderr.write("!!! Failed to use gzip: "+str(e)+"\n")
sys.stderr.flush()
mytempfile.close()
try:
metadata[baseurl]["data"] = cPickle.loads(data)
del data
metadata[baseurl]["indexname"] = mfile
metadata[baseurl]["timestamp"] = int(time.time())
metadata[baseurl]["modified"] = 0 # It's not, right after download.
out.write("Pickle loaded.\n")
out.flush()
break
except SystemExit, e:
raise
except Exception, e:
sys.stderr.write("!!! Failed to read data from index: "+str(mfile)+"\n")
sys.stderr.write("!!! "+str(e)+"\n")
sys.stderr.flush()
try:
metadatafile = open("/var/cache/edb/remote_metadata.pickle", "w+")
cPickle.dump(metadata,metadatafile)
metadatafile.close()
except SystemExit, e:
raise
except Exception, e:
sys.stderr.write("!!! Failed to write binary metadata to disk!\n")
sys.stderr.write("!!! "+str(e)+"\n")
sys.stderr.flush()
break
# We may have metadata... now we run through the tbz2 list and check.
class CacheStats(object):
from time import time
def __init__(self, out):
self.misses = 0
self.hits = 0
self.last_update = 0
self.out = out
self.min_display_latency = 0.2
def update(self):
cur_time = self.time()
if cur_time - self.last_update >= self.min_display_latency:
self.last_update = cur_time
self.display()
def display(self):
self.out.write("\r"+yellow("cache miss: '"+str(self.misses)+"'")+\
" --- "+green("cache hit: '"+str(self.hits)+"'"))
self.out.flush()
cache_stats = CacheStats(out)
have_tty = out.isatty()
if have_tty:
cache_stats.display()
binpkg_filenames = set()
for x in tbz2list:
x = os.path.basename(x)
binpkg_filenames.add(x)
if x not in metadata[baseurl]["data"]:
cache_stats.misses += 1
if have_tty:
cache_stats.update()
metadata[baseurl]["modified"] = 1
myid = None
for retry in xrange(3):
try:
myid = file_get_metadata(
"/".join((baseurl.rstrip("/"), x.lstrip("/"))),
conn, chunk_size)
break
except httplib.BadStatusLine:
# Sometimes this error is thrown from conn.getresponse() in
# make_http_request(). The docstring for this error in
# httplib.py says "Presumably, the server closed the
# connection before sending a valid response".
conn, protocol, address, params, headers = create_conn(
baseurl)
if myid and myid[0]:
metadata[baseurl]["data"][x] = make_metadata_dict(myid)
elif verbose:
sys.stderr.write(red("!!! Failed to retrieve metadata on: ")+str(x)+"\n")
sys.stderr.flush()
else:
cache_stats.hits += 1
if have_tty:
cache_stats.update()
cache_stats.display()
# Cleanse stale cache for files that don't exist on the server anymore.
stale_cache = set(metadata[baseurl]["data"]).difference(binpkg_filenames)
if stale_cache:
for x in stale_cache:
del metadata[baseurl]["data"][x]
metadata[baseurl]["modified"] = 1
del stale_cache
del binpkg_filenames
out.write("\n")
out.flush()
try:
if metadata[baseurl].has_key("modified") and metadata[baseurl]["modified"]:
metadata[baseurl]["timestamp"] = int(time.time())
metadatafile = open("/var/cache/edb/remote_metadata.pickle", "w+")
cPickle.dump(metadata,metadatafile)
metadatafile.close()
if makepickle:
metadatafile = open(makepickle, "w")
cPickle.dump(metadata[baseurl]["data"],metadatafile)
metadatafile.close()
except SystemExit, e:
raise
except Exception, e:
sys.stderr.write("!!! Failed to write binary metadata to disk!\n")
sys.stderr.write("!!! "+str(e)+"\n")
sys.stderr.flush()
if not keepconnection:
conn.close()
return metadata[baseurl]["data"]
def _cmp_cpv(d1, d2):
cpv1 = d1["CPV"]
cpv2 = d2["CPV"]
if cpv1 > cpv2:
return 1
elif cpv1 == cpv2:
return 0
else:
return -1
class PackageIndex(object):
def __init__(self, default_pkg_data=None, inherited_keys=None):
self._default_pkg_data = default_pkg_data
self._inherited_keys = inherited_keys
self.header = {}
self.packages = []
self.modified = True
def _readpkgindex(self, pkgfile):
d = {}
for line in pkgfile:
line = line.rstrip("\n")
if not line:
break
line = line.split(":", 1)
if not len(line) == 2:
continue
k, v = line
if v:
v = v[1:]
d[k] = v
return d
def _writepkgindex(self, pkgfile, items):
for k, v in items:
pkgfile.write("%s: %s\n" % (k, v))
pkgfile.write("\n")
def read(self, pkgfile):
self.readHeader(pkgfile)
self.readBody(pkgfile)
def readHeader(self, pkgfile):
self.header.update(self._readpkgindex(pkgfile))
def readBody(self, pkgfile):
while True:
d = self._readpkgindex(pkgfile)
if not d:
break
mycpv = d.get("CPV")
if not mycpv:
continue
if self._default_pkg_data:
for k, v in self._default_pkg_data.iteritems():
d.setdefault(k, v)
if self._inherited_keys:
for k in self._inherited_keys:
v = self.header.get(k)
if v is not None:
d.setdefault(k, v)
self.packages.append(d)
def write(self, pkgfile):
if self.modified:
self.header["TIMESTAMP"] = str(long(time.time()))
self.header["PACKAGES"] = str(len(self.packages))
keys = self.header.keys()
keys.sort()
self._writepkgindex(pkgfile, [(k, self.header[k]) for k in keys])
for metadata in sorted(self.packages, _cmp_cpv):
metadata = metadata.copy()
cpv = metadata["CPV"]
if self._inherited_keys:
for k in self._inherited_keys:
v = self.header.get(k)
if v is not None and v == metadata.get(k):
del metadata[k]
if self._default_pkg_data:
for k, v in self._default_pkg_data.iteritems():
if metadata.get(k) == v:
metadata.pop(k, None)
keys = metadata.keys()
keys.sort()
self._writepkgindex(pkgfile,
[(k, metadata[k]) for k in keys if metadata[k]])