den #Tatort auf Twitter verfolgen

Am Sonntag habe ich nicht den Tatort geschaut, was mich auf die Frage brachte: Kann ich den Tatort auf Twitter "nachschauen"? Geht das nur mithilfe der Tweets mit dem Hashtag #Tatort? Hier ein Selbstversuch:

Erstes Erkenntnis: es gibt genügend Tweets dazu.

Dann kann ich ja direkt in die Analyse einsteigen. Finde ich mithilfe der meistbenutzten Wörter mehr heraus? Es scheint wohl in Dresden stattzufinden und etwas mit Sanitätern zu tun haben:

Vielleicht erfahre ich ja auch etwas von den häufigsten Bi- und Trigrammen. Hier wird die Anzahl aber schon weniger. Immerhin erfahre ich die mutmaßlichen Schauspieler, kann aber auch schlussfolgern, dass der Tatort wohl für viele die Gute-Nacht-Geschichte zu sein scheint.

Und wie kam der Tatort an? Die Rezession, bzw. der Sentimentwert, ist positiv. Scheint also eine gute Tatort-Folge gewesen zu sein. So gibt es mehr positive als negative Wörter (nach dem Abgleich mit der SentiWS-Bibliothek):

Und war der Tatort spannend? Nun, zumindest kann der zeitliche Verlauf der Tweets im 10min Takt als auch eine Art Fieberkurve gelesen werden. Es gab a) viel Vorfreude und b) schon vor 21:00Uhr war der Täter vielleicht ja sogar identifiziert? Das ist nur eine Mutmaßung, da ich den Tatort tatsächlich nicht gesehen habe.

Die SentiWS-Bibliothek stammt hiervon:

D. Goldhahn, T. Eckart & U. Quasthoff: Building Large Monolingual Dictionaries at the Leipzig Corpora Collection: From 100 to 200 Languages.
In: Proceedings of the 8th International Language Resources and Evaluation (LREC'12), 2012

Und hier noch der Code zu der Analyse:

library(rtweet)
library(tidyr)
library(dplyr)
library(ggplot2)
library(viridis)
library(lubridate)
library(tidytext)
library(stopwords)
library(scales)

tweets_tatort2 <- search_tweets(q = "#Tatort", n = 18000, include_rts = FALSE, lang = "de")
tweets_data <- tweets_tatort2 %>%
  select(created_at, text)

# Plot 1 - zeitl. Verlauf der Tweets
tweets_data %>%
  mutate(date = cut(created_at, breaks = "1 hours")) %>%
  mutate(date = as.POSIXct(date)) %>%
  group_by(date) %>%
  count(date) %>%
  filter(date >= "2021-02-07 03:00:00") %>%
  ggplot(aes(x=date, y=n)) +
  geom_bar(stat="identity") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
  labs(
    x = NULL, y = NULL,
    title = paste("Anzahl an Tweets mit dem Hashtag #Tatort"),
    subtitle = ("in 1h Intervallen"),
    caption = "Plot1")

# Plot 2 - häufigste Begriffe
custom_stop_words <- bind_rows(tibble(word = c("twitter", "t.co", "mal", "fr", "tatort", "ja", "https", "http", "dass", "beim"), lexicon = c("custom")),
                               tibble(word = stopwords("de"), lexicon = c("stopwords")))
tweets_words <- tweets_data %>%
  mutate(tweet_number = row_number()) %>%
  as_tibble() %>%
  unnest_tokens(word, text) %>%
  anti_join(custom_stop_words, by = "word")
tweets_words %>%
  count(word, sort = TRUE) %>%
  slice(1:15) %>%
  mutate(word = reorder(word, n)) %>%
  ggplot(aes(x = word, y = n, fill = word))+
  geom_col(show.legend = FALSE) +
  coord_flip() +
  theme_minimal() +
  scale_fill_viridis(discrete = TRUE, option="cividis") +
  labs(
    x = NULL, y = NULL,
    title = paste("die häufigsten Begriffe"),
    subtitle = ("in Tweets mit dem Hashtag #Tatort"),
    caption = "Plot2")

# Plot3: die häufigsten Bigramme
bigrams <- tweets_data %>%
  unnest_tokens(bigram, text, token = "ngrams", n = 2) %>%
  separate(bigram, c("word1", "word2"), sep = " ") %>%
  filter(!word1 %in% custom_stop_words$word,
         !word2 %in% custom_stop_words$word) %>%
  drop_na(word1) %>%
  drop_na(word2) %>%
  unite(bigram, word1, word2, sep = " ")
bigrams<-bigrams[-grep("\\b\\d+\\b", bigrams$bigram),]
bigrams %>%
  count(bigram, sort = TRUE) %>%
  slice(1:15) %>%
  mutate(bigram = reorder(bigram, n)) %>%
  ggplot(aes(x = bigram, y = n, fill = bigram)) +
  geom_col(show.legend = FALSE) +
  coord_flip() +
  scale_fill_viridis(discrete = TRUE, option="cividis") +
  theme_minimal() +
  labs(
    x = NULL, y = NULL,
    title = paste("die häufigsten Bigramme"),
    subtitle = ("in Tweets mit dem Hashtag #Tatort"),
    caption = "Plot 3")

# Plot 4: die häufigsten Trigramme
trigrams <- tweets_data %>%
  unnest_tokens(trigram, text, token = "ngrams", n = 3) %>%
  separate(trigram, c("word1", "word2", "word3"), sep = " ") %>%
  filter(!word1 %in% custom_stop_words$word,
         !word2 %in% custom_stop_words$word,
         !word3 %in% custom_stop_words$word) %>%
  drop_na(word1) %>%
  drop_na(word2) %>%
  drop_na(word3)
trigrams <- trigrams %>%
  unite(trigrams, word1, word2, word3, sep = " ")
trigrams<-trigrams[-grep("\\b\\d+\\b", trigrams$trigrams),]  #remove numbers
trigrams %>%
  count(trigrams, sort = TRUE) %>%
  slice(1:15) %>%
  mutate(trigrams = reorder(trigrams, n)) %>%
  ggplot(aes(x = trigrams, y = n, fill = trigrams)) +
  geom_col(show.legend = FALSE) +
  coord_flip() +
  scale_fill_viridis(discrete = TRUE, option="cividis") +
  theme_minimal() +
  labs(
    x = NULL, y = NULL,
    title = paste("die häufigsten Trigramme"),
    subtitle = ("in Tweets mit dem Hashtag #Tatort"),
    caption = "Plot 4")

# Sentiment-Analyse
tweets_sentiment <- tweets_words %>%
  left_join(SentiWS_df, by="word") 
tweets_sentiment %>%
  drop_na() %>%
  count(Polarität) %>%
  spread(Polarität, n, fill=0) %>%
  mutate(sum = positive - negative) %>%
  mutate(relation = positive/negative) %>%
  as.data.frame()

# Plot 5 - Anzahl positiver & negativer Wörter pro Tag
tweets_sentiment %>%
  drop_na() %>%
  mutate(date = cut(created_at, breaks = "10 min")) %>%
  mutate(date = as.POSIXct(date)) %>%
  filter(date >= "2021-02-07 18:00:00") %>%
  filter(date <= "2021-02-07 23:00:00") %>%
  group_by(date) %>%
  count(Polarität) %>%
  ggplot(aes(x=date, y=n, group=Polarität, color=Polarität)) +
  geom_line(size=0.6, alpha=0.6)+
  scale_colour_brewer(palette = "Set1") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) +
  labs(
    x = NULL, y = NULL,
    title = "Anzahl positiver & negativer Wörter",
    subtitle = "in Tweets mit dem Hashtag #Tatort",
    caption = "Plot 5")

Photo by Andrea Piacquadio from Pexels

aufschrieb

Dieses Blog durchsuchen

den #Tatort auf Twitter verfolgen

Labels

Beliebte Posts aus diesem Blog

Sentiment-Analyse von deutschen Texten in R

Was ist fremd?

Migration und Bevölkerungsentwicklung: Solidarität und Selbsthilfe

die Hot-Dog-Ökonomie

die schöne Welt von Red Bull

Verspargelung der Landschaft