Oliver is presenting a workshop on Data @nalytics at The WNY-CSTA Fall Conference. Hello to all the high-school teachers in attendance!
<!--more-->The solution developed in class
import sys; import twitter; import pickle; import time; from cache import read_cache, write_cache; from keys import consumer_key, consumer_secret, access_token, access_token_secret;My complete solution:api = twitter.Api(consumer_key = consumer_key, consumer_secret = consumer_secret, access_token_key = access_token, access_token_secret = access_token_secret );
print api.VerifyCredentials();
Rate limit status
def followers(user_id): try: followers = read_cache(str(user_id)+"_followers") print "Cached results for "+str(user_id) except: sleep_time = api.GetSleepTime("followers/ids"); if sleep_time != 0: print "Goodnight for "+str(sleep_time)+" seconds"; sleep(sleep_time) followers = api.GetFollowerIDs(user_id) write_cache(str(user_id)+"_followers", followers) return followers print followers(45606271)
import sys; import twitter; import pickle; import time; from keys import consumer_key, consumer_secret, access_token, access_token_secret;api = twitter.Api(consumer_key = consumer_key, consumer_secret = consumer_secret, access_token_key = access_token, access_token_secret = access_token_secret);
print api.VerifyCredentials();
Rate limit status
print api.GetRateLimitStatus();
Average time to sleep per request to avoid exceeding the threshold.
or 0 if the threshold has been reached
sleep_time = max( api.GetAverageSleepTime('followers/ids'), api.GetAverageSleepTime('friends/ids') );
print "Will probably need to sleep "+str(sleep_time)+" seconds per request"
Time required to sleep per request if the threshold has been reached
or 0 if the threshold has not been reached
print api.GetSleepTime('followers/ids')
print api.GetSleepTime('friends/ids')
print api.GetFollowerIDs(45606271);
print api.GetFriendIDs(45606271);
def read_cache(cname): cname = "cache/"+cname; with open(cname) as f: ret = pickle.load(f) f.close() return ret;
def write_cache(cname, value): cname = "cache/"+cname; with open(cname, 'w+') as f: pickle.dump(value, f) f.close() return value;
def sleep_for(time_to_sleep): if time_to_sleep > 0: sleep_interval = 15 print ("_"*int((time_to_sleep+sleep_interval-1) / sleep_interval)) sys.stdout.flush(); while time_to_sleep > 0: time.sleep(min(time_to_sleep, sleep_interval)) sys.stdout.write("="); sys.stdout.flush(); time_to_sleep -= sleep_interval; print "";
def followers(uid): try: ret = read_cache(str(uid)+"_followers");
print "followers of user " + str(uid) + " are cached";
return ret; except: sleep_time = api.GetSleepTime('followers/ids') if sleep_time > 0: sleep_time += 30 print "Need to fetch followers of "+str(uid)+"; sleeping for "+str(sleep_time) sleep_for(sleep_time); try: return write_cache( str(uid)+"_followers", api.GetFollowerIDs(uid, count = 100, total_count = 100) ) except twitter.TwitterError, e: print "Caught: " + str(e); if str(e) == "Not authorized.": return write_cache( str(uid)+"_followers", [] ) raise e
def friends(uid):
try:
ret = read_cache(str(uid)+"_friends");
print "friends of user " + str(uid) + " are cached";
return ret;
except:
sleep_time = api.GetSleepTime('friends/ids')
print "Need to fetch friends of "+str(uid)+"; sleeping for "+str(sleep_time)
time.sleep(sleep_time);
return write_cache(
str(uid)+"_friends",
api.GetFriendIDs(uid, count = 200)
)
def connected(uid): return set(followers(uid))# | set(friends(uid))
me = 45606271; completed = set(); todo = [ me ];
while len(todo) > 0 and len(completed) < 300: next = todo.pop(0); if(next not in completed): connections = connected(next);
print("Connections for "+str(next)+": "+str(connections));
todo.extend(followers(next)); completed.add(next)
print "Completed: " + str(completed)
users = dict() for u in completed: u_followers = list(set(followers(u)) & completed); if u not in users: users[u] = dict() for f in u_followers: users[u][f] = 1; if f not in users: users[f] = dict() users[f][u] = 1;
depth = 1; made_a_change = True; while made_a_change and depth < 10: made_a_change = False; print "Running round "+str(depth) depth = depth+1 for source in completed: for mid in users[source].keys(): for dest in users[mid].keys(): new_path_length = int(users[source][mid]) + int(users[mid][dest]) if dest not in users[source].keys() or users[source][dest] > new_path_length: print "Better path ("+str(new_path_length)+" from " + str(source) + " to " + str(dest) + " through " + str(mid) users[source][dest] = new_path_length; made_a_change = True;
print users; min_dist = 1000; min_user = None;
for u,distances in users.iteritems(): tot = sum(distances.values()) avg = float(tot) / float(len(distances)) print "Average distance for " + str(u) + " : " + str(avg) if avg < min_dist and int(u) != int(me): min_dist = avg; min_user = u;
print "The most central user is "+str(min_user)+" with an average bacon-distance of "+str(min_dist) user = api.GetUser(user_id = min_user) print "The user's name is "+user.name + " a.k.a. @"+user.screen_name