View more »
Answered 2 days AfterJun 08, 2023

Answer To:

Pratibha answered on Jun 10 2023
35 Votes
Text scraping and Analysis
Text scraping and Analysis
2023-06-10
API Setup
library("rtweet")
## Warning: package 'rtweet' was built under R version 4.2.3
library("base64enc")
library("httpuv")
## Warning: package 'httpuv' was built under R version 4.2.3
library("magrittr")
library("dplyr")
## Warning: package 'dplyr' was built under R version 4.2.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library("textdata")
## Warning: package 'textdata' was built under R version 4.2.3
#Authenticating with Twitter API Credentials
app='GretaProject_2023'
api_key='AagjVq96hOMojkDdc0fz8OJPI'
api_secret_key='DWrqQZWe2QDabVKDT5nVped8jqDk6UrPGAmJM74xX1xMIVL6Cf'
acc_token='124194957-1fvDtoNyoah7sq92QWFZ8GGsAkmmSl1xWBSgb3E3'
acc_secret_token='N29dRKpzRSgt7vCcVj8AFCuwfHUROGStK15X7HMeBWvg4'
#generate token
create_token(
app=app,
consumer_key=api_key,
consumer_secret=api_secret_key,
access_token=acc_token,
access_secret=acc_secret_token
)
## Warning: `create_token()` was deprecated in rtweet 1.0.0.
## ℹ See vignette('auth') for details
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated
.
## Saving auth to
## 'C:\Users\Pratibha\AppData\Roaming/R/config/R/rtweet/create_token.rds'
1. Followed By Greta
# Get the friends (people followed) by Greta Thunberg
set.seed(123)
greta_friends <- get_friends("GretaThunberg", n = 100000)
head(greta_friends)
## # A tibble: 6 × 2
## from_id to_id
##
## 1 GretaThunberg 42643305
## 2 GretaThunberg 1450363558483709954
## 3 GretaThunberg 1663643377215127553
## 4 GretaThunberg 1513242630217519104
## 5 GretaThunberg 1645750061438205952
## 6 GretaThunberg 1461716693437214722
# Extract the friend IDs
friend_ids <- greta_friends$to_id
# Fetch detailed information of the friends including their follower counts
friend_info <- lookup_users(user = friend_ids)
# Filter out friends who are companies or organizations
friend_info <- friend_info[!grepl("company|organization", friend_info$description, ignore.case = TRUE), ]
friend_info
## # A tibble: 2,865 × 23
## id id_str name screen_name location derived url description protected
##
## 1 4.26e 7 42643… Hong… honghoangc… "Ho Chi… http… "Environme… FALSE
## 2 1.45e18 14503… RePl… letsreplan… "Europe" http… "We’re a c… FALSE
## 3 1.66e18 16636… Peop… PeopleFFut… "" "" FALSE
## 4 1.51e18 15132… Scie… SR_Netherl… "" http… "Scientist… FALSE
## 5 1.65e18 16457… Frid… F4F_ROSA "Nepal" http… "FFF_South… FALSE
## 6 1.46e18 14617… Nich… OmonukN "Planet… http… "A Climate… FALSE
## 7 1.36e18 13632… Ende… ende_gelan… "Brunsb… http… "Climate j… FALSE
## 8 1.60e18 16023… Kari… k_nuttipil… "" "" FALSE
## 9 1.66e18 16571… XR M… XRMothersUg "Uganda" "We refuse… FALSE
## 10 8.31e 8 83100… Dr A… PerrinAbi "York, … "Molecular… FALSE
## # ℹ 2,855 more rows
## # ℹ 14 more variables: verified , followers_count ,
## # friends_count , listed_count , favourites_count ,
## # statuses_count , created_at , profile_banner_url ,
## # profile_image_url_https , default_profile ,
## # default_profile_image , withheld_in_countries , entities ,
## # withheld_scope
## ℹ Tweets data at tweets_data()
# Sort the friends based on their follower counts
top_friends <- head(friend_info[order(friend_info$followers_count, decreasing = TRUE), ], 12)
top_friends
## # A tibble: 12 × 23
## id id_str name screen_name location derived url description protected
##
## 1 8.13e5 813286 Bara… BarackObama "Washin… http… "Dad, husb… FALSE
## 2 1.88e7 18839… Nare… narendramo… "India" http… "Prime Min… FALSE
## 3 1.58e7 15846… Elle… EllenDeGen… "Califo… http… "Comedian,… FALSE
## 4 7.59e5 759251 CNN CNN "" http… "It’s our … FALSE
## 5 8.07e5 807095 The … nytimes "New Yo… http… "News tips… FALSE
## 6 4.72e8 47174… PMO … PMOIndia "India" http… "Office of… FALSE
## 7 1.94e7 19397… Opra… Oprah "" http… "" FALSE
## 8 7.42e5 742143 BBC … BBCWorld "London… http… "News, fea… FALSE
## 9 1.81e8 18050… Inst… instagram "" http… "Discover … FALSE
## 10 1.34e9 13398… Hill… HillaryCli… "New Yo… http… "2016 Demo… FALSE
## 11 2.87e7 28706… P!nk Pink "los an… http… "My new al… FALSE
## 12 1.75e7 17471… Nati… NatGeo "Global" http… "Taking ou… FALSE
## # ℹ 14 more variables: verified , followers_count ,
## # friends_count , listed_count , favourites_count ,
## # statuses_count , created_at , profile_banner_url ,
## # profile_image_url_https , default_profile ,
## # default_profile_image , withheld_in_countries , entities ,
## # withheld_scope
## ℹ Tweets data at tweets_data()
# Print summary information about the friends
class(top_friends)
## [1] "users" "tbl_df" "tbl" "data.frame"
for (i in 1:nrow(top_friends)) {
cat("Friend", i, "\n")
cat("Name:", top_friends$name[i], "\n")
cat("Followers Count:", top_friends$followers_count[i], "\n")
cat("Description:", top_friends$description[i], "\n\n")
}
## Friend 1
## Name: Barack Obama
## Followers Count: 132386379
## Description: Dad, husband, President, citizen.
##
## Friend 2
## Name: Narendra Modi
## Followers Count: 89217028
## Description: Prime Minister of India
##
## Friend 3
## Name: Ellen DeGeneres
## Followers Count: 75715030
## Description: Comedian, talk show host and ice road trucker. My tweets are real, and they’re spectacular.
##
## Friend 4
## Name: CNN
## Followers Count: 61425940
## Description: It’s our job to #GoThere & tell the most difficult stories. For breaking news, follow @CNNBRK and download our app https://t.co/ceNBoNi8y6
##
## Friend 5
## Name: The New York Times
## Followers Count: 55066472
## Description: News tips? Share them here: https://t.co/ghL9OoYKMM
##
## Friend 6
## Name: PMO India
## Followers Count: 53268439
## Description: Office of the Prime Minister of India
##
## Friend 7
## Name: Oprah Winfrey
## Followers Count: 42443961
## Description:
##
## Friend 8
## Name: BBC News (World)
## Followers Count: 39877268
## Description: News, features and analysis from the World's newsroom. Breaking news, follow @BBCBreaking. UK news, @BBCNews. Latest sports news @BBCSport
##
## Friend 9
## Name: Instagram
## Followers Count: 33351158
## Description: Discover what's next on Instagram ?✨
##
## Friend 10
## Name: Hillary Clinton
## Followers Count: 31424886
## Description: 2016 Democratic Nominee, SecState, Senator, hair icon. Mom, Wife, Grandma x3, lawyer, advocate, fan of walks in the woods & standing up for our democracy.
##
## Friend 11
## Name: P!nk
## Followers Count: 31117062
## Description: My new album TRUSTFALL out NOW ? Summer Carnival European and North American tickets on sale! ??
##
## Friend 12
## Name: National Geographic
## Followers Count: 28902510
## Description: Taking our understanding and awareness of the world further for more than 130 years
2. Followers of Greta
# Fetch followers of Greta Thunberg
set.seed(123)
followers <- get_followers("GretaThunberg", n = 10000)
head(followers)
## # A tibble: 6 × 2
## from_id to_id
##
## 1 1445129105146621963 GretaThunberg
## 2 1516813445890514955 GretaThunberg
## 3 1448607481421205504 GretaThunberg
## 4 237276143 GretaThunberg
## 5 1542950228042694656 GretaThunberg
## 6 422872595 GretaThunberg
descriptions<- lookup_users(user = followers$from_id)
library(dplyr)
# Add descriptions to the followers data frame
followers$description <- descriptions$description
followers$followers_count <- descriptions$followers_count
followers$friends_count <- descriptions$friends_count
followers$name<- descriptions$name
followers$screen_name<- descriptions$screen_name
followers$location<- descriptions$location
followers=followers[order(followers$followers_count, decreasing = TRUE), ]
top_followers=head(followers,12)
for (i in 1:nrow(top_followers)) {
cat("Follower", i, "\n")
cat("Name:", top_followers$name[i], "\n")
cat("Screen Name:", top_followers$screen_name[i], "\n")
cat("Followers Count:", top_followers$followers_count[i], "\n")
cat("Location:", top_followers$location[i], "\n\n")
cat("Description:", top_followers$description[i], "\n\n")
}
## Follower 1
## Name: Matthew VanDyke
## Screen Name: Matt_VanDyke
## Followers Count: 513006
## Location: Ukraine
##
## Description: Founder, Sons of Liberty International, #veterans training Ukrainian forces to fight Russia. A 501c3 nonprofit org: https://t.co/iJVW8PgiN9
##
## Follower 2
## Name: Giles Paley-Phillips
## Screen Name: eliistender10
## Followers Count: 407783
## Location: Seaford
##
## Description: I write books, films & produce. Half of @blankpod @forgotpodcast @unquestionpod Guitar in @burnthousemusic Ambassador for @actionaidUK Happily mediocre
##
## Follower 3
## Name: Anton Gerashchenko
## Screen Name: Gerashchenko_en
## Followers Count: 364676
## Location: Ukraine
##
## Description: Ukrainian patriot. Advisor to the Minister of Internal Affairs of Ukraine. Founder of the Institute of the Future. Official enemy of Russian propaganda
##
## Follower 4
## Name: ????? ?????
## Screen Name: pussyrrriot
## Followers Count: 245534
## Location: sugar mommy
##
## Description: spent 2 years in jail for fighting putin / global protest art movement ??? NOT a punk rock band
##
## Follower 5
## Name: Daily Star
## Screen Name: dailystar
## Followers Count: 233401
## Location: London
##
## Description: Home of Fun Stuff! Follow @StarBreaksNews to be the first to know about #BreakingNews
##
## Follower 6
## Name: Andriy Yermak
## Screen Name: AndriyYermak
## Followers Count: 210274
## Location:
##
## Description: Керівник Офісу Президента України / Head of the Office of the President of Ukraine
##
## Follower 7
## Name: ōLand by Overline
## Screen Name: overlinenetwork
## Followers Count: 162873
## Location: Jackson, WY
##
## Description: 2M+ users. Personal ownership is everything. Your crypto, your creativity, and your internet.
##
## Follower 8
## Name: Jonathan “Loda” Berg
## Screen Name: LodaBerg
## Followers Count: 139965
## Location:
##
## Description: CEO of @thealliancegg, TI3 winner, Living legend, bringer of balance.
##
## Follower 9
## Name: Orb Planet ❁ ?? Cͨliͥmͫaͣᴛⷮeͤ ? Blue ?⚓
## Screen Name: OrbPlanet
## Followers Count: 89955
## Location: ? ??ℝ?ℍ ? ?
##
## Description: #ClimateActionNow?#Science?#?????????????
##
## ?#VoteBlue ?#S̅o̅l̅a̅r?#E̷l̷e̷c̷t̷r̷i̷f̷y̷ ⚡
##
## #Renewables?#Nature #Oceans ?#Ecocide?#Pollution
##
## Follower 10
## Name: UkrARMY cats & dogs
## Screen Name: UAarmy_animals
## Followers Count: 87971
## Location: Kyiv
##
## Description: We fight for freedom and for Ukraine. For donates PayPal: vitaliys007@gmail.com and https://t.co/ETJVHY357A
##
## Follower 11
## Name: Vox Populi Noticias
## Screen Name: VoxPopuliNoti
## Followers Count: 73885
## Location: Victoria, Tamaulipas
##
## Description:
##
## Follower 12
## Name: Edinburgh International Book Festival
## Screen Name: edbookfest
## Followers Count: 68609
## Location: Edinburgh
##
## Description: The world's largest public celebration of the written word.
## Next Festival: 12-28 Aug 23, Programme Released: 14 Jun 23,
## Tickets on Sale: 29 Jun 23 #EdBookFest
library(rtweet)
head(top_followers)
## # A tibble: 6 × 8
## from_id to_id description followers_count friends_count name screen_name
##
## 1 431071870 Gret… "Founder, … 513006 382398 "Mat… Matt_VanDy…
## 2 23483816 Gret… "I write b… 407783 100186 "Gil… eliistende…
## 3 15054819498… Gret… "Ukrainian… 364676 883 "Ant… Gerashchen…
## 4 2479224200 Gret… "spent 2 y… 245534 1005 "\U0… pussyrrriot
## 5 20442930 Gret… "Home of F… 233401 3861 "Dai… dailystar
## 6 11492710283… Gret… "Керівник … 210274 209 "And… AndriyYerm…
## # ℹ 1 more variable: location
# Retrieve the latest tweets of the top followers
tweets <- lapply(top_followers$from_id, function(user_id) {
get_timeline(user = user_id, n = 200)
})
# Retrieve the latest tweets of the top followers
tweets <- lapply(top_followers$from_id, function(user_id) {
get_timeline(user = user_id, n = 200)
})
# Add follower_id column to each data frame in the list
tweets <- Map(function(df, follower_id) {
df$follower_id <- follower_id
df
}, tweets, top_followers$from_id)
# Combine all the data frames into a single data...
SOLUTION.PDF

Answer To This Question Is Available To Download

Related Questions & Answers

More Questions »

Submit New Assignment

Copy and Paste Your Assignment Here
April
January
February
March
April
May
June
July
August
September
October
November
December
2025
2025
2026
2027
SunMonTueWedThuFriSat
30
31
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
1
2
3
00:00
00:30
01:00
01:30
02:00
02:30
03:00
03:30
04:00
04:30
05:00
05:30
06:00
06:30
07:00
07:30
08:00
08:30
09:00
09:30
10:00
10:30
11:00
11:30
12:00
12:30
13:00
13:30
14:00
14:30
15:00
15:30
16:00
16:30
17:00
17:30
18:00
18:30
19:00
19:30
20:00
20:30
21:00
21:30
22:00
22:30
23:00
23:30