The Twitter API is an interface to get, alter and insert content on Twitter. This tutorial includes the basic steps one could follow to interact with the API. Contact me for any questions at vasisouv@csd.auth.gr or souvatzisbill@gmail.com
Having a Twitter account is a must for communicating with its API. Skip this step if you already have an account. (Signup link)
Go to apps.twitter.com and create a Twitter Application. An application's URL is required but any valid URL can be placed on the field as a placeholer for later. Also, please provide your mobile phone number if/when asked to do so. An account's application can only communicate with the API when a mobile phone is associated with the account.
After creating your application you will need to generate the API keys that will be used for the communication with the API itself. When inside your application's dashboard, click on the Keys and Access tokens tab, as seen on the image below.
Scroll to the bottom (Your Access Token section) and click Create my access token.
IMPORTANT: You should never reveal your Consumer Secret and Access Token Secret to the public. These keys can be used to make malicious calls to the API on your behalf and result on bans or other prohibitions to your account. Also be cautious when uploading your project on github or any other online repository. Preferably avoid hardcoding the keys and save them externally on a text file that stays local-only or is encrypted (for git see: gitignore).
This tutorial makes use of python and Java to communicate with the Twitter API. The libraries that are used are:
These libraries are well-known, tested and you can find plenty of material on the web about them.
It is time to utilize the API keys we just generated. The following code initializes an api object that we will use to communicate with the API. The keys used on the examples below are simple placeholders and should be replaced with your own.
# Initialize the API consumer keys and access tokens
consumer_key = "SORXfCgvpS3wdbMRRNtc2qzfB"
consumer_secret = "GBJkn5LCRuqvHqnogOdJJYKS4lNqVKiTWtOL0xlBKzD3p6uYFY"
access_token = "599269165-blChAStgzrWo9TIPcoUHiqhJTf7RqLxz090HnWjF"
access_token_secret = "RjdUeLOkemTPLObB6dLDsdfwSQk3joyqDKer94g27Qi9J"
# Authenticate tweepy using the keys
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth, wait_on_rate_limit=True, wait_on_rate_limit_notify=True, compression=True)
// Creating a ConfigurationBuilder object to authenticate our application
ConfigurationBuilder cb = new ConfigurationBuilder();
cb.setDebugEnabled(true)
.setOAuthConsumerKey("SORXfCgvpS3wdbMRRNtc2qzfB")
.setOAuthConsumerSecret("GBJkn5LCRuqvHqnogOdJJYKS4lNqVKiTWtOL0xlBKzD3p6uYFY")
.setOAuthAccessToken("599269165-blChAStgzrWo9TIPcoUHiqhJTf7RqLxz090HnWjF")
.setOAuthAccessTokenSecret("RjdUeLOkemTPLObB6dLDsdfwSQk3joyqDKer94g27Qi9J");
Twitter api = new TwitterFactory(cb.build()).getInstance();
The Streaming API is the best way to get massive amounts of tweets based on specific keywords or location. While not all tweets that get posted on Twitter are caught by the streaming API, it is still the optimal way to gather tweets due to the fact that it is not rate limited
if __name__ == '__main__':
# Initialize the listener
listener = TwitterListener()
stream = tweepy.Stream(auth, listener)
# Set the parameters for the stream listener
topics = ["music,guitar,concerts"]
langs = ["en", "el"]
async = True
encoding = "utf-8"
# Create bounding boxes using http://boundingbox.klokantech.com/
# Copy and paste CSV Raw inside an array as below
geo_greece = [19.4897460938,34.7596661247,28.5205078125,41.7385284694]
locations = geo_greece
# Start Streaming
# ATTENTION: using both location and topics is not possible
# by topic
stream.filter(languages=langs, track=topics, async=async, encoding=encoding)
# OR by location
stream.filter(languages=langs, locations=locations, async=async, encoding=encoding)
class TwitterListener(tweepy.StreamListener):
print("Starting tweet collection...")
# The stream listener found a tweet that matches our parameters
def on_data(self, data):
tweet_json = json.loads(data)
# get any value using tweet_json[*name_of_field*]
# see: https://developer.twitter.com/en/docs/tweets/data-dictionary/overview/tweet-object for field names
tweet_text = tweet_json["text"]
# An error occured
def on_error(self, status):
print(status)
public static void main(String[] args) throws TwitterException, JSONException {
String[] keywords = {"music,guitar,concert"};
getTweetsFromStream(keywords,cb);
}
private static void getTweetsFromStream(String[] keywords,ConfigurationBuilder cb){
TwitterStream twitterStream = new TwitterStreamFactory(cb.build()).getInstance();
StatusListener listener = new StatusListener() {
@Override
public void onStatus(Status status)
{
String tweetText = status.getText();
System.out.println(tweetText);
}
@Override
public void onDeletionNotice(StatusDeletionNotice statusDeletionNotice) {
System.out.println("Got a status deletion notice id:" + statusDeletionNotice.getStatusId());
}
@Override
public void onTrackLimitationNotice(int numberOfLimitedStatuses) {
System.out.println("Got track limitation notice:" + numberOfLimitedStatuses);
}
@Override
public void onScrubGeo(long userId, long upToStatusId) {
System.out.println("Got scrub_geo event userId:" + userId + " upToStatusId:" + upToStatusId);
}
@Override
public void onStallWarning(StallWarning warning) {
System.out.println("Got stall warning:" + warning);
}
@Override
public void onException(Exception ex) {
ex.printStackTrace();
}
};
FilterQuery fq = new FilterQuery();
fq.language("en");
// keywords
fq.track(keywords);
// location
double[] geoGreeceW = {20.26,34.87};
double[] geoGreeceE = {27.16,41.29};
double[][] location = {geoGreeceW,geoGreeceE};
fq.locations(location);
// ATTENTION: using both location and keywords filtering does not work.
twitterStream.addListener(listener);
twitterStream.filter(fq);
}
Getting the entire timeline of a specific user can be achieved by calling the api using a user's id or screen name property.
if __name__ == '__main__':
# Aristotle University's Twitter user ID
user_id = "234343780"
statuses = get_user_tweets(user_id)
for status in statuses:
print (status._json["text"])
def get_user_tweets(user_id):
timeline = []
progress = 0
for status in tweepy.Cursor(api.user_timeline, id=user_id).items():
timeline.append(status)
progress+=1
print("Fetched "+str(progress)+" out of all timeline items")
return statuses
public static void main(String[] args) throws TwitterException, JSONException {
String username = "Aristoteleio";
List<String> userTweets = getTweetsOfUser(username,cb);
for (String tweet : userTweets){
System.out.println(tweet);
}
}
private static List<String> getTweetsOfUser(String username, ConfigurationBuilder cb){
// gets Twitter instance with default credentials
Twitter api = new TwitterFactory(cb.build()).getInstance();
List<String> tweets = new ArrayList<String>();
try {
List<Status> statuses;
int pageNum = 1;
Paging paging = new Paging(pageNum,1000);
do{
statuses = api.getUserTimeline(username,paging);
for (Status status : statuses) {
String tweetText = status.getText();
tweets.add(tweetText);
}
pageNum+=1;
paging.setPage(pageNum);
System.out.println("Fetched "+String.valueOf(tweets.size())+"tweets so far");
}while (statuses.size() != 0);
} catch (TwitterException te) {
te.printStackTrace();
System.out.println("Failed to get timeline: " + te.getMessage());
}
return tweets;
}
Getting the entire user can be achieved by calling the api using a user's id or screen name property.
if __name__ == '__main__':
user_json = get_user(user_id)
# Access all the information using .*field*
# https://developer.twitter.com/en/docs/tweets/data-dictionary/overview/user-object
screen_name = str(user_json.screen_name)
followers_count = str(user_json.followers_count)
friends_count = str(user_json.friends_count)
print ("This user has the screen name: "+screen_name)
print ("This user has "+followers_count+" followers")
print ("This user has "+friends_count+" friends")
def get_user(user_id):
print("Searching full information for user with id " + str(user_id))
try:
user_json = api.get_user(user_id)
except tweepy.TweepError as tweep_error:
print("Error with code : " + str(tweep_error.response.text))
return 0
return user_json
public static void main(String[] args) throws TwitterException, JSONException {
String username = "Aristoteleio";
User user = getUserByScreenName(username,cb);
if (user != null){
System.out.println("This user's id is "+String.valueOf(user.getId()));
}
}
private static User getUserByScreenName(String username,ConfigurationBuilder cb){
Twitter api = new TwitterFactory(cb.build()).getInstance();
try {
User user = api.showUser(username);
if (user.getStatus() == null) {
// the user is protected or non-existent
return null;
}
return user;
} catch (TwitterException te) {
te.printStackTrace();
System.out.println("Failed to get user: " + te.getMessage());
return null;
}
}
When referring to friends in the API, friends are all the users this user follows. This part can be tricky since it is rate limited and many users have a huge number of followers or friends. Preferably limit them for lower fetching times.
if __name__ == '__main__':
# Aristotle University's Twitter user ID
user_id = "234343780"
network = get_user_network(user_id)
print(network["friends"])
print(network["followers"])
def get_user_network(user_id):
print("Searching network for user with id " + str(user_id))
followers = []
friends = []
max_followers = 100000
max_friends = 100000
try:
for page in tweepy.Cursor(api.followers_ids, id=user_id).pages():
followers.extend(page)
if len(followers) >= max_followers:
break
print("Followers so far : " + str(len(followers)))
print("finished followers")
for page in tweepy.Cursor(api.friends_ids, id=user_id).pages():
friends.extend(page)
if len(friends) >= max_friends:
break
print("Friends so far : " + str(len(friends)))
print("finished friends")
except tweepy.TweepError as tweep_error:
print("Error with code : " + str(tweep_error.response.text))
return 0
print("User with ID: " + user_id + " has " + str(len(followers)) + " followers and " + str(len(friends)) + " friends")
custom_object = {
"id": user_id,
"followers": followers,
"friends": friends
}
return custom_object
public static void main(String[] args) throws TwitterException, JSONException {
String username = "Aristoteleio";
JSONObject userNetwork = getUserNetwork(username,cb);
JSONArray friends = userNetwork.getJSONArray("friendsIDs");
JSONArray followers = userNetwork.getJSONArray("followersIDs");
System.out.println("Showing this person's friends");
for (int i=0 ; i< friends.length() ; i++){
System.out.println(friends.get(i).toString());
}
System.out.println("Showing this person's followers");
for (int i=0 ; i< followers.length() ; i++){
System.out.println(followers.get(i).toString());
}
System.out.println("This user has a total of "+String.valueOf(friends.length())+" friends" +
" and "+String.valueOf(followers.length())+" followers on Twitter");
}
private static JSONObject getUserNetwork(String username,ConfigurationBuilder cb) throws JSONException {
JSONObject obj = new JSONObject();
obj.put("username", username);
Twitter api = new TwitterFactory(cb.build()).getInstance();
List<String> friendsIDs = new ArrayList<String>();
List<String> followersIDs = new ArrayList<String>();
// Getting the friends IDs
try {
long cursor = -1;
IDs ids;
do {
RateLimitStatus status = api.getRateLimitStatus().get("/friends/ids");
checkLimit(status);
ids = api.getFriendsIDs(username,cursor);
for (long id : ids.getIDs()) {
//System.out.println(id);
friendsIDs.add(String.valueOf(id));
}
System.out.println("Friends so far : "+String.valueOf(friendsIDs.size()));
} while ((cursor = ids.getNextCursor()) != 0);
} catch (TwitterException te) {
te.printStackTrace();
System.out.println("Failed to get friends' ids: " + te.getMessage());
} catch (InterruptedException e) {
e.printStackTrace();
}
// Getting the followers IDs
try {
long cursor = -1;
IDs ids;
do {
RateLimitStatus status = api.getRateLimitStatus().get("/followers/ids");
checkLimit(status);
ids = api.getFollowersIDs(username,cursor);
for (long id : ids.getIDs()) {
followersIDs.add(String.valueOf(id));
}
System.out.println("Followers so far : "+String.valueOf(followersIDs.size()));
} while ((cursor = ids.getNextCursor()) != 0);
} catch (TwitterException te) {
te.printStackTrace();
System.out.println("Failed to get followers' ids: " + te.getMessage());
} catch (InterruptedException e) {
e.printStackTrace();
}
obj.put("friendsIDs", friendsIDs);
obj.put("followersIDs",followersIDs);
return obj;
}
private static void checkLimit(RateLimitStatus status) throws InterruptedException {
if (status.getRemaining() == 0){
// the api is rate limiting the application
int timeRemaining = status.getSecondsUntilReset();
System.out.println("Rate limit reached! Should wake up in "+String.valueOf(timeRemaining/60)+" minutes!");
Thread.sleep(1000 * timeRemaining);
System.out.println("Waking up and resuming!");
}
}
All the methods have a custom_object parameter which is basically a json object like the following:
custom_object = {
"id": tweet_json["id_str"],
"created_at": tweet_json["created_at"],
"text": tweet_json["text"],
"by_user": tweet_json["user"]["id_str"]
}
def save_to_txt(custom_object):
file = "../../output/output.txt"
with open(file, "a") as f:
line = ""
for value in custom_object.items():
line+= (str(value[1].encode("utf-8"))[2:])[:-1]
line+= "\t"
f.write(line+"\n")
def save_to_json(custom_object):
file = "../../output.json"
try:
with open(file) as f:
data = json.load(f)
data["tweets"].append(custom_object)
f.close()
with open(file, 'w') as f:
json.dump(data, f, indent=2)
f.close()
except (Exception):
print("failed to save an entry!")
To save data in MongoDB, I recommend the pymongo library. If you haven't used MongoDB, this is a good starter tutorial.
def save_to_mongo(collection_name,custom_object):
# http://api.mongodb.com/python/current/tutorial.html
client = MongoClient()
db = client.twitterapitest
collection = db[collection_name]
result = collection.insert_one(custom_object).inserted_id
private static void saveToMongo(String collectionName,BasicDBObject obj) throws UnknownHostException {
// http://www.mkyong.com/mongodb/java-mongodb-hello-world-example/
MongoClient mongo = new MongoClient( "localhost" , 27017 );
DB db = mongo.getDB("twitterapitest");
DBCollection table = db.getCollection(collectionName);
table.insert(obj);
}