Первое задание (про футболистов)
library(tidyverse)
## ── Attaching packages ───────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.2.1 ✔ purrr 0.3.2
## ✔ tibble 2.1.3 ✔ dplyr 0.8.3
## ✔ tidyr 1.0.0 ✔ stringr 1.3.1
## ✔ readr 1.1.1 ✔ forcats 0.4.0
## Warning: package 'ggplot2' was built under R version 3.5.2
## Warning: package 'tibble' was built under R version 3.5.2
## Warning: package 'tidyr' was built under R version 3.5.2
## Warning: package 'purrr' was built under R version 3.5.2
## Warning: package 'dplyr' was built under R version 3.5.2
## Warning: package 'forcats' was built under R version 3.5.2
## ── Conflicts ──────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(ggplot2)
json_file <- jsonlite::read_json("fpl_data_2018_2019.json")
json_file %>%
tibble(Goals = map_chr(., "Goals"), Club = map_chr(., "Club")) %>%
mutate(new_goals = as.integer(Goals)) -> goals_clubs
json_file %>%
map("name") %>%
enframe() -> names_set
names_set %>%
bind_cols(goals_clubs) %>%
within(rm(value, ., Goals)) %>%
arrange(desc(new_goals)) %>%
slice(1:30) %>%
ggplot(aes(fct_reorder(name, new_goals), new_goals, fill = Club)) +
coord_flip() +
geom_col() +
labs(x = "", y = "", title = "Number of Goals", caption = "data from www.premierleague.com")
Второе задание (частотные слова из сборника)
library(gutenbergr)
## Warning: package 'gutenbergr' was built under R version 3.5.2
library(tidytext)
## Warning: package 'tidytext' was built under R version 3.5.2
library(ggplot2)
stop_list <- read_csv("stop_list.csv")
## Parsed with column specification:
## cols(
## word = col_character()
## )
stop_list
## # A tibble: 559 x 1
## word
## <chr>
## 1 c
## 2 а
## 3 алло
## 4 без
## 5 белый
## 6 близко
## 7 более
## 8 больше
## 9 большой
## 10 будем
## # … with 549 more rows
women <- gutenberg_download(37196)
## Determining mirror for Project Gutenberg from http://www.gutenberg.org/robot/harvest
## Using mirror http://aleph.gutenberg.org
women %>%
unnest_tokens(word, text) %>%
count(word, sort = TRUE) %>%
anti_join(stop_list, by = "word") %>%
slice(1:20) %>%
ggplot(aes(fct_reorder(word, n), n)) +
geom_col() +
coord_flip() +
labs(x = "", y = "", title = "'Женское международное движение: Сборник статей': частотные слова", caption = "source www.gutenberg.org")
Третье задание
library(tidyverse)
library(udpipe)
## Warning: package 'udpipe' was built under R version 3.5.2
rumodel <- udpipe_download_model(language = "russian-syntagrus")
## Downloading udpipe model from https://raw.githubusercontent.com/jwijffels/udpipe.models.ud.2.4/master/inst/udpipe-ud-2.4-190531/russian-syntagrus-ud-2.4-190531.udpipe to /Users/veronikafajnberg/R_hwrk14/russian-syntagrus-ud-2.4-190531.udpipe
## Visit https://github.com/jwijffels/udpipe.models.ud.2.4 for model license details
tabak <- gutenberg_download(5316)
str_c(tabak$text, collapse = " ") %>%
udpipe(rumodel) -> table
table %>%
mutate(upos = str_replace_all(upos, "DET", "PART")) %>%
mutate(upos = case_when(
str_detect(lemma, "быть") ~ "VERB",
str_detect(lemma, "бы") ~ "PART",
TRUE ~ upos)) %>%
count(upos) -> counted_upos
counted_upos %>%
arrange(n) %>%
ggplot(aes(fct_reorder(upos, n), n)) +
geom_col() +
labs(x = "", y = "", title = "Красавице, которая нюхала табак: части речи")