## First specify the packages of interest packages = c("tidyverse", "ggplot2", "dplyr", "phonR", "gridExtra", "rlang", "ggpubr", "sf", "sp") ## Now load or install&load all package.check <- lapply( packages, FUN = function(x) { if (!require(x, character.only = TRUE)) { install.packages(x, dependencies = TRUE) library(x, character.only = TRUE) } } ) # function to prep data prep.data <- function(data) { # filter missing data data <- subset(data, F1_20 != "--undefined--") data <- subset(data, F1_50 != "--undefined--") data <- subset(data, F1_80 != "--undefined--") data <- subset(data, F2_20 != "--undefined--") data <- subset(data, F2_50 != "--undefined--") data <- subset(data, F2_80 != "--undefined--") data <- subset(data, F3_20 != "--undefined--") data <- subset(data, F3_50 != "--undefined--") data <- subset(data, F3_80 != "--undefined--") # convert frequencies because of missing data data$F1_20 <- as.numeric(data$F1_20) data$F1_50 <- as.numeric(data$F1_50) data$F1_80 <- as.numeric(data$F1_80) data$F2_20 <- as.numeric(data$F2_20) data$F2_50 <- as.numeric(data$F2_50) data$F2_80 <- as.numeric(data$F2_80) data$F3_20 <- as.numeric(data$F3_20) data$F3_50 <- as.numeric(data$F3_50) data$F3_80 <- as.numeric(data$F3_80) # convert some columns to factors data$phoneme <- as.factor(data$phoneme) data$file <- as.factor(data$file) return(data) } prep.data.diphthongs <- function(data, diphthongs) { data2.grouped <- data %>% filter(substr(phoneme, 1, 2)%in% diphthongs) levels(data2.grouped$phoneme) <- substr(levels(data2.grouped$phoneme), 1, 2) data2.grouped.tall <- data2.grouped %>% # Relevel the vowels mutate(phoneme = factor(phoneme, levels = c("IY", "EH", "AO", "UW", "AW", "IH", "AE", "OW", "AH", "OY", "EY", "AA", "UH", "AY"))) %>% # Summarize by vowel group_by(phoneme) %>% summarize_at(vars(F1_20,F1_50,F1_80,F2_20,F2_50,F2_80), median, na.rm = TRUE) %>% # Turn it into tall data gather("formant_percent", "hz", starts_with("F")) %>% separate(formant_percent, into = c("formant", "percent"), extra = "drop") %>% spread(formant, hz) return(data2.grouped.tall) } formant.means <- function(data) { data$F1.mean <- rowMeans(data[,c('F1_20','F1_50','F1_80')]) data$F2.mean <- rowMeans(data[,c('F2_20','F2_50','F2_80')]) data$F3.mean <- rowMeans(data[,c('F3_20','F3_50','F3_80')]) return(data) } outliers.hampel <- function(data) { data.lower.boundf1 <- median(data$F1.mean) - 3 * mad(data$F1.mean) data.upper.boundf1 <- median(data$F1.mean) + 3 * mad(data$F1.mean) data.outlier.indices <- which(data$F1.mean < data.lower.boundf1 | data$F1.mean > data.upper.boundf1) data.lower.boundf2 <- median(data$F2.mean) - 3 * mad(data$F2.mean) data.upper.boundf2 <- median(data$F2.mean) + 3 * mad(data$F2.mean) data.outlier.indices <- append(data.outlier.indices, which(data$F2.mean < data.lower.boundf2 | data$F2.mean > data.upper.boundf2)) data.lower.boundf3 <- median(data$F3.mean) - 3 * mad(data$F3.mean) data.upper.boundf3 <- median(data$F3.mean) + 3 * mad(data$F3.mean) data.outlier.indices <- append(data.outlier.indices, which(data$F3.mean < data.lower.boundf3 | data$F3.mean > data.upper.boundf3)) # we have to check if the outlier list is empty # don't ask me why that rlang:: is needed, stackoverflow said so ^^ if(rlang::is_empty(data.outlier.indices) == TRUE) { data <- data } else { data <- data[-data.outlier.indices,] } return(data) } normalize.lobanov <- function(data) { # lobanov normalization # group by file only makes sense if different files are different speakers I guess #vowels.ljspeech.lobanov = vowels.ljspeech.grouped.means %>% group_by(file) %>% mutate(f1scale = scale(F1.mean), f2scale = scale(F2.mean), f1lobanov = normLobanov(F1.mean), f2lobanov = normLobanov(F2.mean)) data.lobanov <- data %>% mutate(f1scale = scale(F1.mean), f2scale = scale(F2.mean), f1lobanov = normLobanov(F1.mean), f2lobanov = normLobanov(F2.mean)) return(data.lobanov) } plot.monophthongs <- function(data, means, xmin, xmax, ymin, ymax, title.text) { plot <- ggplot(NULL, aes(x = f2lobanov, y = f1lobanov, color = phoneme, label = phoneme)) + geom_point(data = data, aes(color = phoneme),alpha = 0.1, size = 1) + stat_ellipse(data = data, level = 0.67, size = 2, alpha = 1.0) + geom_label(data = means, aes(x = mean_F2, y = mean_F1), size = 10, alpha = 1.0) + xlim(xmax, xmin) + ylim(ymax, ymin) + scale_color_discrete(breaks = c("IY", "IH", "EY", "EH", "AE", "AA", "AO", "OW", "UH", "UW", "AH")) + guides(color = "black") + theme_classic() + theme(axis.line = element_line(colour = 'black', size = 2), text = element_text(size = 20), plot.title = element_text(face = "bold", hjust = 0.5), legend.position="none", plot.subtitle = element_text(hjust = 0.5)) + labs(title = title.text, subtitle = "Hampel filtered, Lobanov normalized", x = "F2", y = "F1") + #labs(x = "F2", y = "F1") + coord_fixed() return(plot) } plot.diphthongs <- function(data, onset.labels, title.text) { plot <- ggplot(data, aes(F2, F1, color = phoneme, group = phoneme)) + geom_path(arrow = arrow(ends = "last", type = "closed", length = unit(0.1, "inches"))) + geom_label(data = onset.labels, aes(label = phoneme)) + labs(title = title.text, subtitle = "Raw Frequencies")+ scale_x_reverse() + scale_y_reverse() + theme_classic() + theme(axis.line = element_line(colour = 'black', size = 2), text = element_text(size = 20), plot.title = element_text(face = "bold", hjust = 0.5), legend.position="none", plot.subtitle = element_text(hjust = 0.5)) + guides(color = "black") return(plot) } find.hull <- function(dataframe) { dataframe[chull(dataframe$mean_F2, dataframe$mean_F1), ] #dataframe <- rbind(dataframe, dataframe[1,]) #return(dataframe) } find.hull.all <- function(dataframe) { dataframe[chull(dataframe$f2lobanov, dataframe$f1lobanov), ] #dataframe <- rbind(dataframe, dataframe[1,]) #return(dataframe) } # read data from file working.path = "/home/sven/tmp/CIVEMSA/30min_kld_monothongs_proto" # working.path = "/home/sven/tmp/CIVEMSA/kld_30min_proto" # working.path = "/home/sven/tmp/CIVEMSA/data/random_30min" setwd(working.path) data.ljspeech.original <- read.csv('formants_original.csv') data.ljspeech.inferred <- read.csv('formants_inferred.csv') data.ljspeech.original <- prep.data(data.ljspeech.original) data.ljspeech.inferred <- prep.data(data.ljspeech.inferred) # define vowel sets arpabet_vowels = c('AA', 'AE', 'AH', 'AO', 'AW', 'AX', 'AY', 'EH', 'ER', 'EY', 'IH', 'IX', 'IY', 'OW', 'OY', 'UH', 'UW', 'UX') monophthongs = c('AA', 'AE', 'AH', 'AO', 'AX', 'EH', 'ER', 'IH', 'IX', 'IY', 'UH', 'UW', 'UX') diphthongs = c('AW', 'AY', 'EY', 'OW', 'OY') #selected_vowels = c('IH', 'IY') vowels.pinyin = c('a', 'o', 'e', 'u', 'v', 'i') diphthongs.pinyin = c('ei', 'ai', 'ou', 'ao', 've') ipa.symbols = c('ɑ', 'æ', 'ʌ', 'ɔ', 'aʊ', 'ə', 'aɪ', 'ɛ', 'ɝ', 'eɪ', 'ɪ', 'ɨ', 'i', 'oʊ', 'ɔɪ', 'ʊ', 'u', 'ʉ') monophthongs.ipa = c('ɑ', 'æ', 'ʌ', 'ɔ', 'ə', 'ɛ', 'ɝ', 'ɪ', 'ɨ', 'i', 'ʊ', 'u', 'ʉ') # only stressed vowels for GuoYa #vowels.china <- data.china %>% filter(grepl("1", phoneme)) #vowels.china <- vowels.china %>% filter(substr(phoneme, 1, 2)%in% monophthongs) # filter vowels vowels.ljspeech.original <- data.ljspeech.original %>% filter(substr(phoneme, 1, 2)%in% monophthongs) vowels.ljspeech.inferred <- data.ljspeech.inferred %>% filter(substr(phoneme, 1, 2)%in% monophthongs) # re-apply factor levels vowels.ljspeech.original$phoneme <- factor(vowels.ljspeech.original$phoneme) vowels.ljspeech.inferred$phoneme <- factor(vowels.ljspeech.inferred$phoneme) # group vowels (collapse stress) vowels.ljspeech.original.grouped <- vowels.ljspeech.original vowels.ljspeech.inferred.grouped <- vowels.ljspeech.inferred levels(vowels.ljspeech.original.grouped$phoneme) <- substr(levels(vowels.ljspeech.original.grouped$phoneme), 1, 2) levels(vowels.ljspeech.inferred.grouped$phoneme) <- substr(levels(vowels.ljspeech.inferred.grouped$phoneme), 1, 2) # calculate formant means vowels.ljspeech.original.grouped.means <- formant.means(vowels.ljspeech.original.grouped) vowels.ljspeech.inferred.grouped.means <- formant.means(vowels.ljspeech.inferred.grouped) # filter outliers vowels.ljspeech.original.grouped.means <- outliers.hampel(vowels.ljspeech.original.grouped.means) vowels.ljspeech.inferred.grouped.means <- outliers.hampel(vowels.ljspeech.inferred.grouped.means) # Normalize data vowels.ljspeech.original.lobanov <- normalize.lobanov(vowels.ljspeech.original.grouped.means) vowels.ljspeech.inferred.lobanov <- normalize.lobanov(vowels.ljspeech.inferred.grouped.means) # add normalized F2-F1 column #vowels.ljspeech.original.lobanov <- vowels.ljspeech.original.lobanov %>% # mutate(f2f1lobanov = .$f2lobanov - .$f1lobanov) #vowels.ljspeech.inferred.lobanov <- vowels.ljspeech.inferred.lobanov %>% # mutate(f2f1lobanov = .$f2lobanov - .$f1lobanov) # calculate means of lobanov normalized data ljspeech.original.means.lobanov <- vowels.ljspeech.original.lobanov %>% group_by(phoneme) %>% summarise(mean_F1 = mean(f1lobanov), mean_F2 = mean(f2lobanov)) ljspeech.inferred.means.lobanov <- vowels.ljspeech.inferred.lobanov %>% group_by(phoneme) %>% summarise(mean_F1 = mean(f1lobanov), mean_F2 = mean(f2lobanov)) # calculate min and max f1 and f2 to scale the axes. # Note: not sure if this is actually necessary with the current way of plotting. ljspeech.original.x.min <- min(vowels.ljspeech.original.lobanov$f2lobanov) ljspeech.original.x.max <- max(vowels.ljspeech.original.lobanov$f2lobanov) ljspeech.original.y.min <- min(vowels.ljspeech.original.lobanov$f1lobanov) ljspeech.original.y.max <- max(vowels.ljspeech.original.lobanov$f1lobanov) ljspeech.inferred.x.min <- min(vowels.ljspeech.inferred.lobanov$f2lobanov) ljspeech.inferred.x.max <- max(vowels.ljspeech.inferred.lobanov$f2lobanov) ljspeech.inferred.y.min <- min(vowels.ljspeech.inferred.lobanov$f1lobanov) ljspeech.inferred.y.max <- max(vowels.ljspeech.inferred.lobanov$f1lobanov) abs.x.min <- min(c(ljspeech.original.x.min, ljspeech.inferred.x.min)) abs.x.max <- max(c(ljspeech.original.x.max, ljspeech.inferred.x.max)) abs.y.min <- min(c(ljspeech.original.y.min, ljspeech.inferred.y.min)) abs.y.max <- max(c(ljspeech.original.y.max, ljspeech.inferred.y.max)) # plot means only # new df for means #ljspeech.original.means.lobanov.2 <- ljspeech.original.means.lobanov #ljspeech.original.means.lobanov.2$person <- "LJSpeech" #ljspeech.inferred.means.lobanov.2 <- ljspeech.inferred.means.lobanov #ljspeech.inferred.means.lobanov.2$person <- "LJSpeech" # find polygon hulls of lobanov normalized averages hull.ljspeech.original.average <- find.hull(ljspeech.original.means.lobanov[c("phoneme", "mean_F1", "mean_F2")]) hull.ljspeech.original.average <- rbind(hull.ljspeech.original.average, hull.ljspeech.original.average[1,]) hull.ljspeech.inferred.average <- find.hull(ljspeech.inferred.means.lobanov[c("phoneme", "mean_F1", "mean_F2")]) hull.ljspeech.inferred.average <- rbind(hull.ljspeech.inferred.average, hull.ljspeech.inferred.average[1,]) # create df to find hull from hull.ljspeech.original.all <- find.hull.all(as_tibble(vowels.ljspeech.original.lobanov[ , c("phoneme", "f1lobanov", "f2lobanov")])) hull.ljspeech.original.all <- rbind(hull.ljspeech.original.all, hull.ljspeech.original.all[1,]) hull.ljspeech.inferred.all <- find.hull.all(as_tibble(vowels.ljspeech.inferred.lobanov[ , c("phoneme", "f1lobanov", "f2lobanov")])) hull.ljspeech.inferred.all <- rbind(hull.ljspeech.inferred.all, hull.ljspeech.inferred.all[1,]) # quantiles confidence = 0.75 #vowels.ljspeech.original.lobanov.quantile <- vowels.ljspeech.original.lobanov %>% # group_by(phoneme) %>% # filter(f2lobanov > quantile(f2lobanov, (1-confidence)/2), f2lobanov < quantile(f2lobanov, 1-((1-confidence)/2))) %>% # filter(f1lobanov > quantile(f1lobanov, (1-confidence)/2), f1lobanov < quantile(f1lobanov, 1-((1-confidence)/2))) vowels.ljspeech.original.lobanov.quantile <- vowels.ljspeech.original.lobanov %>% group_by(phoneme) %>% filter(f2lobanov > quantile(f2lobanov, (1-confidence)/2), f2lobanov < quantile(f2lobanov, 1-((1-confidence)/2))) %>% filter(f1lobanov > quantile(f1lobanov, (1-confidence)/2), f1lobanov < quantile(f1lobanov, 1-((1-confidence)/2))) hull.ljspeech.original.quantile <- find.hull.all(as_tibble(vowels.ljspeech.original.lobanov.quantile[ , c("phoneme", "f1lobanov", "f2lobanov")])) hull.ljspeech.original.quantile <- rbind(hull.ljspeech.original.quantile, hull.ljspeech.original.quantile[1,]) vowels.ljspeech.inferred.lobanov.quantile <- vowels.ljspeech.inferred.lobanov %>% group_by(phoneme) %>% filter(f2lobanov > quantile(f2lobanov, (1-confidence)/2), f2lobanov < quantile(f2lobanov, 1-((1-confidence)/2))) %>% filter(f1lobanov > quantile(f1lobanov, (1-confidence)/2), f1lobanov < quantile(f1lobanov, 1-((1-confidence)/2))) hull.ljspeech.inferred.quantile <- find.hull.all(as_tibble(vowels.ljspeech.inferred.lobanov.quantile[ , c("phoneme", "f1lobanov", "f2lobanov")])) hull.ljspeech.inferred.quantile <- rbind(hull.ljspeech.inferred.quantile, hull.ljspeech.inferred.quantile[1,]) # create sf polygon ljspeech.original.poly.average <- st_polygon(list(as.matrix(select(hull.ljspeech.original.average, 2:3),ncol=2, byrow=TRUE))) ljspeech.inferred.poly.average <- st_polygon(list(as.matrix(select(hull.ljspeech.inferred.average, 2:3),ncol=2, byrow=TRUE))) poly.intersection.average <- st_intersection(ljspeech.original.poly.average, ljspeech.inferred.poly.average) overlap.percentage.average <- st_area(poly.intersection.average) / (st_area(ljspeech.original.poly.average)/100) ljspeech.original.poly.all <- st_polygon(list(as.matrix(select(hull.ljspeech.original.all, 2:3),ncol=2, byrow=TRUE))) ljspeech.inferred.poly.all <- st_polygon(list(as.matrix(select(hull.ljspeech.inferred.all, 2:3),ncol=2, byrow=TRUE))) poly.intersection.all <- st_intersection(ljspeech.original.poly.all, ljspeech.inferred.poly.all) overlap.percentage.all <- st_area(poly.intersection.all) / (st_area(ljspeech.original.poly.all)/100) ljspeech.original.poly.quantile <- st_polygon(list(as.matrix(select(hull.ljspeech.original.quantile, 2:3),ncol=2, byrow=TRUE))) ljspeech.inferred.poly.quantile <- st_polygon(list(as.matrix(select(hull.ljspeech.inferred.quantile, 2:3),ncol=2, byrow=TRUE))) poly.intersection.quantile <- st_intersection(ljspeech.original.poly.quantile, ljspeech.inferred.poly.quantile) overlap.percentage.quantile <- st_area(poly.intersection.quantile) / (st_area(ljspeech.original.poly.quantile)/100) plot.averages <- ggplot(NULL) + # original geom_point(data = vowels.ljspeech.original.lobanov, aes(x = f2lobanov, y = f1lobanov), alpha = 1, size = 1, color = "green") + geom_polygon(data = hull.ljspeech.original.average, aes(x = mean_F2, y = mean_F1, fill = "original"), alpha = 0.7, show.legend = T) + # geom_polygon(data = hull.ljspeech.original.quantile, aes(x = f2lobanov, y = f1lobanov, fill = "original"), alpha = 0.6) + # inferred geom_point(data = vowels.ljspeech.inferred.lobanov, aes(x = f2lobanov, y = f1lobanov),alpha = 1, size = 1, color = "red") + geom_polygon(data = hull.ljspeech.inferred.average, aes(x = mean_F2, y = mean_F1, fill = "inferred"), alpha = 0.7) + # geom_text(data = hull.ljspeech.inferred.average, aes(x = mean_F2, y = mean_F1, label = phoneme), color = "red", size = 10, alpha = 1) + # geom_polygon(data = hull.ljspeech.inferred.quantile, aes(x = f2lobanov, y = f1lobanov, fill = "inferred"), alpha = 0.6, show.legend = T) + # geom_text(data = hull.ljspeech.inferred.quantile, aes(x = f2lobanov, y = f1lobanov, label = phoneme), color = "red", size = 10, alpha = 1) + # scale xlim(abs.x.max, abs.x.min) + ylim(abs.y.max, abs.y.min) + # theme scale_fill_manual(values = c("red", "green")) + # scale_color_manual(values = c("red", "green")) + #guides(colour = guide_legend(title = "Title")) + #theme_classic() + theme_pubr() + theme(axis.line = element_line(colour = 'black', size = 2), text = element_text(size = 20), plot.title = element_text(size = 100, face = "bold", hjust = 0.5), plot.subtitle = element_text(hjust = 0.5), legend.text= element_text(size = 50)) + labs(title = "Means", x = "F2", y = "F1", fill='') + coord_fixed() plot.quantiles <- ggplot(NULL) + # original geom_point(data = vowels.ljspeech.original.lobanov, aes(x = f2lobanov, y = f1lobanov), alpha = 1, size = 1, color = "green") + # geom_polygon(data = hull.ljspeech.original.average, aes(x = mean_F2, y = mean_F1, fill = "original"), alpha = 0.7, show.legend = T) + geom_polygon(data = hull.ljspeech.original.quantile, aes(x = f2lobanov, y = f1lobanov, fill = "original"), alpha = 0.6) + # inferred geom_point(data = vowels.ljspeech.inferred.lobanov, aes(x = f2lobanov, y = f1lobanov),alpha = 1, size = 1, color = "red") + # geom_polygon(data = hull.ljspeech.inferred.average, aes(x = mean_F2, y = mean_F1, fill = "inferred"), alpha = 0.7) + # geom_text(data = hull.ljspeech.inferred.average, aes(x = mean_F2, y = mean_F1, label = phoneme), color = "red", size = 10, alpha = 1) + geom_polygon(data = hull.ljspeech.inferred.quantile, aes(x = f2lobanov, y = f1lobanov, fill = "inferred"), alpha = 0.6, show.legend = T) + # geom_text(data = hull.ljspeech.inferred.quantile, aes(x = f2lobanov, y = f1lobanov, label = phoneme), color = "red", size = 10, alpha = 1) + # scale xlim(abs.x.max, abs.x.min) + ylim(abs.y.max, abs.y.min) + # theme scale_fill_manual(values = c("red", "green")) + # scale_color_manual(values = c("red", "green")) + #guides(colour = guide_legend(title = "Title")) + #theme_classic() + theme_pubr() + theme(axis.line = element_line(colour = 'black', size = 2), text = element_text(size = 20), plot.title = element_text(size = 100, face = "bold", hjust = 0.5), plot.subtitle = element_text(hjust = 0.5), legend.text = element_text(size = 50)) + labs(title = sprintf("Central %i%%", confidence*100), x = "F2", y = "F1", fill='') + coord_fixed() plot.all <- ggplot(NULL) + # original geom_point(data = vowels.ljspeech.original.lobanov, aes(x = f2lobanov, y = f1lobanov), alpha = 1, size = 1, color = "green") + # geom_polygon(data = hull.ljspeech.original.average, aes(x = mean_F2, y = mean_F1, fill = "original"), alpha = 0.7, show.legend = T) + geom_polygon(data = hull.ljspeech.original.all, aes(x = f2lobanov, y = f1lobanov, fill = "original"), alpha = 0.6) + # inferred geom_point(data = vowels.ljspeech.inferred.lobanov, aes(x = f2lobanov, y = f1lobanov),alpha = 1, size = 1, color = "red") + # geom_polygon(data = hull.ljspeech.inferred.average, aes(x = mean_F2, y = mean_F1, fill = "inferred"), alpha = 0.7) + # geom_text(data = hull.ljspeech.inferred.average, aes(x = mean_F2, y = mean_F1, label = phoneme), color = "red", size = 10, alpha = 1) + geom_polygon(data = hull.ljspeech.inferred.all, aes(x = f2lobanov, y = f1lobanov, fill = "inferred"), alpha = 0.6, show.legend = T) + # geom_text(data = hull.ljspeech.inferred.quantile, aes(x = f2lobanov, y = f1lobanov, label = phoneme), color = "red", size = 10, alpha = 1) + # scale xlim(abs.x.max, abs.x.min) + ylim(abs.y.max, abs.y.min) + # theme scale_fill_manual(values = c("red", "green")) + # scale_color_manual(values = c("red", "green")) + #guides(colour = guide_legend(title = "Title")) + #theme_classic() + theme_pubr() + theme(axis.line = element_line(colour = 'black', size = 2), text = element_text(size = 20), plot.title = element_text(size = 100, face = "bold", hjust = 0.5), plot.subtitle = element_text(hjust = 0.5), legend.text = element_text(size = 50)) + labs(title = "All Data Points", x = "F2", y = "F1", fill='') + coord_fixed() grid.arrange(plot.averages, plot.quantiles, plot.all, ncol = 3) #save grob.vowel.space <- arrangeGrob(plot.averages, plot.quantiles, plot.all, ncol = 3) suppressMessages(ggsave(file="vowel_space.png", grob.vowel.space, scale = 2)) print(eval(sprintf("Overlap average: %f", overlap.percentage.average))) print(eval(sprintf("Overlap all: %f", overlap.percentage.all))) print(eval(sprintf("Overlap %i%% percentile: %f", confidence*100, overlap.percentage.quantile))) print(eval(sprintf("LibreOffice: Overlap average: %f", overlap.percentage.average/100))) print(eval(sprintf("LibreOffice: Overlap all: %f", overlap.percentage.all/100))) print(eval(sprintf("LibreOffice: Overlap %i%% percentile: %f", confidence*100, overlap.percentage.quantile/100))) # , legend.text = element_text(size = 50) plot.monophthongs.byphoneme <- function(data.original, data.inferred, filter.phoneme, x.min, x.max, y.min, y.max, title.text) { plot <-ggplot(NULL) + # original geom_point(data = data.original %>% filter(grepl(filter.phoneme, phoneme)), aes(x = f2lobanov, y = f1lobanov), alpha = 1, size = 1, color = "green") + geom_polygon(data = find.hull.all(data.original %>% filter(grepl(filter.phoneme, phoneme))), aes(x = f2lobanov, y = f1lobanov, fill = "original"), alpha = 0.7, show.legend = T) + #geom_text(data = hull.ljspeech.original.average, aes(x = mean_F2, y = mean_F1, label = phoneme), color = "green", size = 10, alpha = 1) + # inferred geom_point(data = data.inferred %>% filter(grepl(filter.phoneme, phoneme)), aes(x = f2lobanov, y = f1lobanov),alpha = 1, size = 1, color = "red") + geom_polygon(data = find.hull.all(data.inferred %>% filter(grepl(filter.phoneme, phoneme))), aes(x = f2lobanov, y = f1lobanov, fill = "inferred"), alpha = 0.7) + #geom_text(data = hull.ljspeech.inferred.average, aes(x = mean_F2, y = mean_F1, label = phoneme), color = "red", size = 10, alpha = 1) + # scale xlim(x.max, x.min) + ylim(y.max, y.min) + # theme scale_fill_manual(values = c("red", "green")) + theme_pubr() + theme(axis.line = element_line(colour = 'black', size = 2), text = element_text(size = 20), plot.title = element_text(size = 100, face = "bold", hjust = 0.5), legend.text = element_text(size = 50)) + labs(title = title.text, x = "F2", y = "F1", fill='') + coord_fixed() return(plot) } plots.quantiles = list() for(p in levels(vowels.ljspeech.original.lobanov.quantile$phoneme)) { plots.quantiles[[p]] = plot.monophthongs.byphoneme(vowels.ljspeech.original.lobanov.quantile, vowels.ljspeech.inferred.lobanov.quantile, p, abs.x.min, abs.x.max, abs.y.min, abs.y.max, sprintf("%s", p) ) hull.ljspeech.original.tmp <- find.hull.all(as_tibble(vowels.ljspeech.original.lobanov.quantile[ , c("phoneme", "f1lobanov", "f2lobanov")] %>% filter(grepl(p, phoneme)))) hull.ljspeech.original.tmp <- rbind(hull.ljspeech.original.tmp, hull.ljspeech.original.tmp[1,]) hull.ljspeech.inferred.tmp <- find.hull.all(as_tibble(vowels.ljspeech.inferred.lobanov.quantile[ , c("phoneme", "f1lobanov", "f2lobanov")] %>% filter(grepl(p, phoneme)))) hull.ljspeech.inferred.tmp <- rbind(hull.ljspeech.inferred.tmp, hull.ljspeech.inferred.tmp[1,]) ljspeech.original.poly.tmp <- st_polygon(list(as.matrix(select(hull.ljspeech.original.tmp, 2:3),ncol=2, byrow=TRUE))) ljspeech.inferred.poly.tmp <- st_polygon(list(as.matrix(select(hull.ljspeech.inferred.tmp, 2:3),ncol=2, byrow=TRUE))) poly.intersection.tmp <- st_intersection(ljspeech.original.poly.tmp, ljspeech.inferred.poly.tmp) overlap.percentage.tmp <- st_area(poly.intersection.tmp) / (st_area(ljspeech.original.poly.tmp)/100) print(eval(sprintf("Overlap percentage for phoneme %s for %i%% percentile: %f", p, confidence*100, overlap.percentage.tmp))) print(eval(sprintf("LibreOffice: Overlap percentage for phoneme %s for all values: %f", p, overlap.percentage.tmp/ 100))) } #do.call("grid.arrange", c(plots, ncol = 4)) # does exactly the same as next line grid.arrange(grobs = plots.quantiles, ncol = 4) grob.phonemes.quantile <- arrangeGrob(grobs = plots.quantiles, ncol = 4) suppressMessages(ggsave(file="phonemes_percentile.png", grob.phonemes.quantile, scale = 2)) plots.all = list() for(p in levels(vowels.ljspeech.original.lobanov$phoneme)) { plots.all[[p]] = plot.monophthongs.byphoneme(vowels.ljspeech.original.lobanov, vowels.ljspeech.inferred.lobanov, p, abs.x.min, abs.x.max, abs.y.min, abs.y.max, sprintf("%s", p) ) hull.ljspeech.original.tmp <- find.hull.all(as_tibble(vowels.ljspeech.original.lobanov[ , c("phoneme", "f1lobanov", "f2lobanov")] %>% filter(grepl(p, phoneme)))) hull.ljspeech.original.tmp <- rbind(hull.ljspeech.original.tmp, hull.ljspeech.original.tmp[1,]) hull.ljspeech.inferred.tmp <- find.hull.all(as_tibble(vowels.ljspeech.inferred.lobanov[ , c("phoneme", "f1lobanov", "f2lobanov")] %>% filter(grepl(p, phoneme)))) hull.ljspeech.inferred.tmp <- rbind(hull.ljspeech.inferred.tmp, hull.ljspeech.inferred.tmp[1,]) ljspeech.original.poly.tmp <- st_polygon(list(as.matrix(select(hull.ljspeech.original.tmp, 2:3),ncol=2, byrow=TRUE))) ljspeech.inferred.poly.tmp <- st_polygon(list(as.matrix(select(hull.ljspeech.inferred.tmp, 2:3),ncol=2, byrow=TRUE))) poly.intersection.tmp <- st_intersection(ljspeech.original.poly.tmp, ljspeech.inferred.poly.tmp) overlap.percentage.tmp <- st_area(poly.intersection.tmp) / (st_area(ljspeech.original.poly.tmp)/100) print(eval(sprintf("Overlap percentage for phoneme %s for all values: %f", p, overlap.percentage.tmp))) print(eval(sprintf("LibreOffice: Overlap percentage for phoneme %s for all values: %f", p, overlap.percentage.tmp/ 100))) } #do.call("grid.arrange", c(plots, ncol = 4)) # does exactly the same as next line grid.arrange(grobs = plots.all, ncol = 4) grob.phonemes.all <- arrangeGrob(grobs = plots.all, ncol = 4) suppressMessages(ggsave(file="phonemes_all.png", grob.phonemes.all, scale = 2))