forked from SzemesBotond/info-drama
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsurprise-linear-correlation-time.R
More file actions
82 lines (64 loc) · 2.64 KB
/
surprise-linear-correlation-time.R
File metadata and controls
82 lines (64 loc) · 2.64 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
# The same procedure as in "surprise-pairwise-from-embedding.R"
# The only exception, that here we take the whole drama and not comparing character pairs
# Under "Weight by act" you can see the calculation of "diff" variable used also in "surprise-pairwise-from-embedding.R"
a1 <- lapply(drama_longsent$embedding_num, as.numeric)
all_together <- list()
max_together <- list()
for(i in 1:length(a1)){
for(j in 1:length(a1)){
if(j<i){
all_together [[j]] <- cosine(a1[[i]], a1[[j]] )}
else{
all_together [[j]] <- 0}
}
max_together[[i]] <- max(unlist(all_together)) }
together <- c()
together <- tibble ( sentence_num = 1:length(max_together),
similarity_score = unlist(max_together))
ggplot(together, aes ( x = sentence_num, y = similarity_score))+
geom_point(alpha = 0.2)+
geom_smooth(method= "lm", color = "red")+
theme_bw()
fit <- lm(together$sentence_num ~ together$similarity_score )
summary(fit)
## Weigth by acts
# mena of all the max cosine similarity
all_mean <- mean(unlist(max_together))
# calculate means by act
act_length <- drama_longsent %>%
group_by(act) %>%
count()
act_length$pos <- cumsum(act_length$n)
act_length <- bind_rows(
tibble(act = "Beggining", n= 1, pos = 1 ),
act_length)
act_mean <- list()
for(i in 1:nrow(act_length)){
if ( i < nrow(act_length)){
act_mean [[i]] <- mean(unlist(
max_together[as.numeric(act_length[i,3]):as.numeric(act_length[i+1,3])] ) ) } }
diff <- all_mean - unlist(act_mean)
# add weight to the model
a1 <- lapply(drama_longsent$embedding_num, as.numeric)
all_together <- list()
max_together <- list()
for(i in 1:length(a1)){
for(j in 1:length(a1)){
if(j<i){
all_together [[j]] <- cosine(a1[[i]], a1[[j]] )}
else{
all_together [[j]] <- 0}
}
max_together[[i]] <- max(unlist(all_together))
# add weigth by act (the diff beetween act_mean and all_mean)
if (i %in% as.numeric(act_length[1,3]):as.numeric(act_length[2,3])){
max_together[[i]] <- max_together[[i]] + diff[[1]] }
if (i %in% as.numeric(act_length[2,3]):as.numeric(act_length[3,3])){
max_together[[i]] <- max_together[[i]] + diff[[2]] }
if (i %in% as.numeric(act_length[3,3]):as.numeric(act_length[4,3])){
max_together[[i]] <- max_together[[i]] + diff[[3]] }
if (i %in% as.numeric(act_length[4,3]):as.numeric(act_length[5,3])){
max_together[[i]] <- max_together[[i]] + diff[[4]] }
if (i %in% as.numeric(act_length[5,3]):as.numeric(act_length[6,3])){
max_together[[i]] <- max_together[[i]] + diff[[5]] }
}