CSTA @nalytics Workshop

Oliver is presenting a workshop on Data @nalytics at The WNY-CSTA Fall Conference.  Hello to all the high-school teachers in attendance!

<!--more-->

The solution developed in class

import sys;
import twitter;
import pickle;
import time;
from cache import read_cache, write_cache;
from keys import consumer_key, consumer_secret, access_token, access_token_secret;

api = twitter.Api(consumer_key = consumer_key, consumer_secret = consumer_secret, access_token_key = access_token, access_token_secret = access_token_secret );

print api.VerifyCredentials();

Rate limit status

def followers(user_id): try: followers = read_cache(str(user_id)+"followers") print "Cached results for "+str(user_id) except: sleep_time = api.GetSleepTime("followers/ids"); if sleep_time != 0: print "Goodnight for "+str(sleep_time)+" seconds"; sleep(sleep_time) followers = api.GetFollowerIDs(user_id) write_cache(str(user_id)+"_followers", followers) return followers print followers(45606271)

My complete solution:
import sys;
import twitter;
import pickle;
import time;
from keys import consumerkey, consumer_secret, access_token, access_token_secret;

api = twitter.Api(consumer_key = consumer_key, consumer_secret = consumer_secret, access_token_key = access_token, access_token_secret = access_token_secret);

print api.VerifyCredentials();

Rate limit status

print api.GetRateLimitStatus();

Average time to sleep per request to avoid exceeding the threshold.

or 0 if the threshold has been reached

sleep_time = max( api.GetAverageSleepTime('followers/ids'), api.GetAverageSleepTime('friends/ids') );

print "Will probably need to sleep "+str(sleep_time)+" seconds per request"

Time required to sleep per request if the threshold has been reached

or 0 if the threshold has not been reached

print api.GetSleepTime('followers/ids')

print api.GetSleepTime('friends/ids')

print api.GetFollowerIDs(45606271);

print api.GetFriendIDs(45606271);

def read_cache(cname): cname = "cache/"+cname; with open(cname) as f: ret = pickle.load(f) f.close() return ret;

def write_cache(cname, value): cname = "cache/"+cname; with open(cname, 'w+') as f: pickle.dump(value, f) f.close() return value;

def sleep_for(time_to_sleep): if time_to_sleep > 0: sleep_interval = 15 print (""*int((time_to_sleep+sleep_interval-1) / sleep_interval)) sys.stdout.flush(); while time_to_sleep > 0: time.sleep(min(timeto_sleep, sleep_interval)) sys.stdout.write("="); sys.stdout.flush(); time_to_sleep -= sleep_interval; print "";

def followers(uid): try: ret = read_cache(str(uid)+"_followers");

print "followers of user " + str(uid) + " are cached";

return ret; except: sleep_time = api.GetSleepTime('followers/ids') if sleep_time > 0: sleep_time += 30 print "Need to fetch followers of "+str(uid)+"; sleeping for "+str(sleep_time) sleep_for(sleep_time); try: return write_cache( str(uid)+"followers", api.GetFollowerIDs(uid, count = 100, total_count = 100) ) except twitter.TwitterError, e: print "Caught: " + str(e); if str(e) == "Not authorized.": return writecache( str(uid)+"_followers", [] ) raise e

def friends(uid):

try:

ret = read_cache(str(uid)+"_friends");

print "friends of user " + str(uid) + " are cached";

return ret;

except:

sleep_time = api.GetSleepTime('friends/ids')

print "Need to fetch friends of "+str(uid)+"; sleeping for "+str(sleep_time)

time.sleep(sleep_time);

return write_cache(

str(uid)+"_friends",

api.GetFriendIDs(uid, count = 200)

)

def connected(uid): return set(followers(uid))# | set(friends(uid))

me = 45606271; completed = set(); todo = [ me ];

while len(todo) > 0 and len(completed) < 300: next = todo.pop(0); if(next not in completed): connections = connected(next);

print("Connections for "+str(next)+": "+str(connections));

todo.extend(followers(next)); completed.add(next)

print "Completed: " + str(completed)

users = dict() for u in completed: u_followers = list(set(followers(u)) & completed); if u not in users: users[u] = dict() for f in u_followers: users[u][f] = 1; if f not in users: users[f] = dict() users[f][u] = 1;

depth = 1; made_a_change = True; while made_a_change and depth < 10: made_a_change = False; print "Running round "+str(depth) depth = depth+1 for source in completed: for mid in users[source].keys(): for dest in users[mid].keys(): new_path_length = int(users[source][mid]) + int(users[mid][dest]) if dest not in users[source].keys() or users[source][dest] > new_path_length: print "Better path ("+str(new_path_length)+" from " + str(source) + " to " + str(dest) + " through " + str(mid) users[source][dest] = new_path_length; made_a_change = True;

print users; min_dist = 1000; min_user = None;

for u,distances in users.iteritems(): tot = sum(distances.values()) avg = float(tot) / float(len(distances)) print "Average distance for " + str(u) + " : " + str(avg) if avg < min_dist and int(u) != int(me): min_dist = avg; min_user = u;

print "The most central user is "+str(min_user)+" with an average bacon-distance of "+str(min_dist) user = api.GetUser(user_id = min_user) print "The user's name is "+user.name + " a.k.a. @"+user.screen_name