CSTA @nalytics Workshop
Oliver is presenting a workshop on Data @nalytics at The WNY-CSTA Fall Conference. Hello to all the high-school teachers in attendance!
The solution developed in class
import sys;
import twitter;
import pickle;
import time;
from cache import read_cache, write_cache;
from keys import consumer_key, consumer_secret, access_token, access_token_secret;
api = twitter.Api(consumer_key = consumer_key,
consumer_secret = consumer_secret,
access_token_key = access_token,
access_token_secret = access_token_secret
);
#print api.VerifyCredentials();
# Rate limit status
def followers(user_id):
try:
followers = read_cache(str(user_id)+"_followers")
print "Cached results for "+str(user_id)
except:
sleep_time = api.GetSleepTime("followers/ids");
if sleep_time != 0:
print "Goodnight for "+str(sleep_time)+" seconds";
sleep(sleep_time)
followers = api.GetFollowerIDs(user_id)
write_cache(str(user_id)+"_followers", followers)
return followers
print followers(45606271)
My complete solution:
import sys;
import twitter;
import pickle;
import time;
from keys import consumer_key, consumer_secret, access_token, access_token_secret;
api = twitter.Api(consumer_key = consumer_key,
consumer_secret = consumer_secret,
access_token_key = access_token,
access_token_secret = access_token_secret);
#print api.VerifyCredentials();
# Rate limit status
#print api.GetRateLimitStatus();
# Average time to sleep per request to avoid exceeding the threshold.
# or 0 if the threshold has been reached
sleep_time = max(
api.GetAverageSleepTime('followers/ids'),
api.GetAverageSleepTime('friends/ids')
);
print "Will probably need to sleep "+str(sleep_time)+" seconds per request"
# Time required to sleep per request if the threshold has been reached
# or 0 if the threshold has not been reached
#print api.GetSleepTime('followers/ids')
#print api.GetSleepTime('friends/ids')
#print api.GetFollowerIDs(45606271);
#print api.GetFriendIDs(45606271);
def read_cache(cname):
cname = "cache/"+cname;
with open(cname) as f:
ret = pickle.load(f)
f.close()
return ret;
def write_cache(cname, value):
cname = "cache/"+cname;
with open(cname, 'w+') as f:
pickle.dump(value, f)
f.close()
return value;
def sleep_for(time_to_sleep):
if time_to_sleep > 0:
sleep_interval = 15
print ("_"*int((time_to_sleep+sleep_interval-1) / sleep_interval))
sys.stdout.flush();
while time_to_sleep > 0:
time.sleep(min(time_to_sleep, sleep_interval))
sys.stdout.write("=");
sys.stdout.flush();
time_to_sleep -= sleep_interval;
print "";
def followers(uid):
try:
ret = read_cache(str(uid)+"_followers");
# print "followers of user " + str(uid) + " are cached";
return ret;
except:
sleep_time = api.GetSleepTime('followers/ids')
if sleep_time > 0:
sleep_time += 30
print "Need to fetch followers of "+str(uid)+"; sleeping for "+str(sleep_time)
sleep_for(sleep_time);
try:
return write_cache(
str(uid)+"_followers",
api.GetFollowerIDs(uid, count = 100, total_count = 100)
)
except twitter.TwitterError, e:
print "Caught: " + str(e);
if str(e) == "Not authorized.":
return write_cache(
str(uid)+"_followers",
[]
)
raise e
#def friends(uid):
# try:
# ret = read_cache(str(uid)+"_friends");
# print "friends of user " + str(uid) + " are cached";
# return ret;
# except:
# sleep_time = api.GetSleepTime('friends/ids')
# print "Need to fetch friends of "+str(uid)+"; sleeping for "+str(sleep_time)
# time.sleep(sleep_time);
# return write_cache(
# str(uid)+"_friends",
# api.GetFriendIDs(uid, count = 200)
# )
def connected(uid):
return set(followers(uid))# | set(friends(uid))
me = 45606271;
completed = set();
todo = [ me ];
while len(todo) > 0 and len(completed) < 300:
next = todo.pop(0);
if(next not in completed):
connections = connected(next);
# print("Connections for "+str(next)+": "+str(connections));
todo.extend(followers(next));
completed.add(next)
print "Completed: " + str(completed)
users = dict()
for u in completed:
u_followers = list(set(followers(u)) & completed);
if u not in users:
users[u] = dict()
for f in u_followers:
users[u][f] = 1;
if f not in users:
users[f] = dict()
users[f][u] = 1;
depth = 1;
made_a_change = True;
while made_a_change and depth < 10:
made_a_change = False;
print "Running round "+str(depth)
depth = depth+1
for source in completed:
for mid in users[source].keys():
for dest in users[mid].keys():
new_path_length = int(users[source][mid]) + int(users[mid][dest])
if dest not in users[source].keys() or users[source][dest] > new_path_length:
print "Better path ("+str(new_path_length)+" from " + str(source) + " to " + str(dest) + " through " + str(mid)
users[source][dest] = new_path_length;
made_a_change = True;
print users;
min_dist = 1000;
min_user = None;
for u,distances in users.iteritems():
tot = sum(distances.values())
avg = float(tot) / float(len(distances))
print "Average distance for " + str(u) + " : " + str(avg)
if avg < min_dist and int(u) != int(me):
min_dist = avg;
min_user = u;
print "The most central user is "+str(min_user)+" with an average bacon-distance of "+str(min_dist)
user = api.GetUser(user_id = min_user)
print "The user's name is "+user.name + " a.k.a. @"+user.screen_name