#-------------------------------------------------------------------------------
# INTERRATER RELIABILITY IN OBSERVATIONAL RESEARCH
#-------------------------------------------------------------------------------
# Neadle, Beck & Tennie (2021)
#-------------------------------------------------------------------------------
# Written by:
# Damien Neadle
# Psychology Department, Birmingham City University
#-------------------------------------------------------------------------------

# Load required packages
library(plyr)
library(dplyr)
library(irr)
library(ICC.Sample.Size)
library(DataEditR)
library(caret)

#### Simulating the dataset ####
#NB, if you are using your own data, you do not need to use this section
#skip to sample size calculations
##### making social data #####
# Make the subjects
oranzee_ID <- rep(LETTERS[1:20], each = 20)

# need to make seperately then bind because different numbers of behaviours
category <- rep(1, times = 400)
summary(category)

set.seed(1) # ensure numbers don't change for replication

# set behavioural subcategories - 1-4
sub_category <- floor(runif(400, min=1, max=5))
summary(sub_category)

# set individual behaviours - 1-6
behaviour <- floor(runif(400, min=1, max=7))
summary(behaviour)

# set time start
time_start_sec <- floor(runif(400, min=1, max=600))
summary(time_start_sec)

# set duration
duration_sec <- rpois(n = 400, lambda = 25)
summary(duration_sec)

# set handedness
handedness <- floor(runif(400, min=1, max=3))
summary(handedness)

# set sociality
sociality <- floor(runif(400, min=1, max=3))
summary(sociality)

# Bind together
simulated_data_social <- data.frame(oranzee_ID, category, sub_category,
                                    behaviour, time_start_sec, duration_sec,
                                    handedness, sociality)
head(simulated_data_social)

##### making food-related data #####
# varying the number of observations to make it more realistic

# Make the subjects
oranzee_ID <- rep(LETTERS[1:20], each = 15)

## making social data
# need to make seperately then bind because different numbers of behaviours
category <- rep(2, times = 300)
summary(category)

# set behavioural subcategories - 1-4
sub_category <- floor(runif(300, min=5, max=9))
summary(sub_category)

# set individual behaviours - 1-4
behaviour <- floor(runif(300, min=1, max=5))
summary(behaviour)

# set time start
time_start_sec <- floor(runif(300, min=1, max=600))
summary(time_start_sec)

# set duration
duration_sec <- rpois(n = 300, lambda = 25)
summary(duration_sec)

# set handedness
handedness <- floor(runif(300, min=1, max=3))
summary(handedness)

# set sociality
sociality <- floor(runif(300, min=1, max=3))
summary(sociality)

# Bind food dataset
simulated_data_food <- data.frame(oranzee_ID, category, sub_category,
                                    behaviour, time_start_sec, duration_sec,
                                    handedness, sociality)

head(simulated_data_food)

##### finalising datasets #####

# bind together 2 datasets
simulated_data <- bind_rows(simulated_data_social, simulated_data_food)

# Apply value labels
simulated_data$category <- factor(simulated_data$category,
                    levels = c(1,2),
                    labels = c("Social", "Food-related"))
simulated_data$sub_category <- factor(simulated_data$sub_category,
                                  levels = c(1,2,3,4,5,6,7,8),
                                  labels = c("Play", "Display", "Groom", "Courtship",
                                             "Fruit-Hammer Foraging", "Stick-Based Foraging",
                                             "Anvil Smash", "Rolling Pin Techniques"))

Play<-subset(simulated_data, sub_category=="Play")
Play$behaviour <- factor(Play$behaviour,
                                      levels = c(1,2,3,4,5,6),
                                      labels = c("Fruit-Missile", "Slap-Fight", "Air-Split", "Leaf-Mask",
                                                 "Whistle", "Pebble-Tease"))

Display<-subset(simulated_data, sub_category=="Display")
Display$behaviour <- factor(Display$behaviour,
                         levels = c(1,2,3,4,5,6),
                         labels = c("Stone Drop", "Branch Pull-Release", "Arm-Cross", "Two-Hand-Drum",
                                    "Splash", "Arm-Swing"))

Groom<-subset(simulated_data, sub_category=="Groom")
Groom$behaviour <- factor(Groom$behaviour,
                            levels = c(1,2,3,4,5,6),
                            labels = c("Tool Back-Scratcher", "Hand Back-Scratcher", "Tongue-Bathe", "Tooth-Pick",
                                       "Dirt-Shower", "Ant-Shower"))

Courtship<-subset(simulated_data, sub_category=="Courtship")
Courtship$behaviour <- factor(Courtship$behaviour,
                          levels = c(1,2,3,4,5,6),
                          labels = c("Flower-Offer", "Hand-Stand", "Rope-Swing", "Leaf-Fan",
                                     "Wreath-Clutch", "Ear-Pull"))

Fruit_Hammer<-subset(simulated_data, sub_category=="Fruit-Hammer Foraging")
Fruit_Hammer$behaviour <- factor(Fruit_Hammer$behaviour,
                              levels = c(1,2,3,4),
                              labels = c("Wood-Wood", "Wood-Stone", "Stone-Wood", "Stone-Stone"))

Stick_based<-subset(simulated_data, sub_category=="Stick-Based Foraging")
Stick_based$behaviour <- factor(Stick_based$behaviour,
                                 levels = c(1,2,3,4),
                                 labels = c("Stick-Throw V", "Stick-Throw A", "Fish-Stab", "Hedgehog-Flick"))

Anvil_Smash<-subset(simulated_data, sub_category=="Anvil Smash")
Anvil_Smash$behaviour <- factor(Anvil_Smash$behaviour,
                                levels = c(1,2,3,4),
                                labels = c("Anvil-Smash S", "Anvil-Smash W", "Smash-Ground", "Drop-Ground"))

Rolling_pin<-subset(simulated_data, sub_category=="Rolling Pin Techniques")
Rolling_pin$behaviour <- factor(Rolling_pin$behaviour,
                                levels = c(1,2,3,4),
                                labels = c("Rolling-Wood", "Rolling-Stone", "Rolling-Bone", "Rolling-Other"))

#Bind value label data
simulated_data <- bind_rows(Play, Display, Groom, Courtship, Fruit_Hammer, Stick_based, Anvil_Smash, Rolling_pin)

#continue adding value labels to final variables
simulated_data$handedness <- factor(simulated_data$handedness,
                                  levels = c(1,2),
                                  labels = c("Left", "Right"))

simulated_data$sociality <- factor(simulated_data$sociality,
                                    levels = c(1,2),
                                    labels = c("Asocial", "Social"))

# create time-end variable and move to logical place
simulated_data$time_end_sec <- simulated_data$time_start_sec + simulated_data$duration_sec
simulated_data <- simulated_data[ , c(1,2,3,4,5,9,6,7,8)]

# organise datasets into logical order (subject then timestart)
simulated_data <- simulated_data[order(simulated_data$oranzee_ID, simulated_data$time_start_sec),]

unique(simulated_data$oranzee_ID)

#### Sample size calculations ####
##### Categorical variables #####
###### behaviour (finest grain) ######
#calculate probability of each behaviour
pBehavT <- prop.table(table(simulated_data$behaviour))
#divide by 2 as the dummys mean there is a 50% chance of the behaviour not being present
probability_behaviours <- as.data.frame(pBehavT/2)
#check that this is the case
sum(probability_behaviours$Freq)
#characterise variable to allow no behaviour to be added
probability_behaviours$Var1 <- as.character(probability_behaviours$Var1)
#new row vector
no_behav_vector <- c("No Behaviour", 0.5)
#add new row in
probability_behaviours <- rbind(probability_behaviours, no_behav_vector, stringsAsFactors=FALSE)
#refactorise var1
probability_behaviours$Var1 <- as.factor(probability_behaviours$Var1)
probability_behaviours$Freq <- as.numeric(probability_behaviours$Freq)


#check probability = 1
sum(probability_behaviours$Freq)
#print for power calculation
paste(as.character(probability_behaviours$Freq), collapse=", ")

#sample size required
#irr only allows for 10 categories
length(unique(probability_behaviours$Var1))

# As this data set has too many categories and it becomes meaningless to divide them
# it is possible to devolve the calculation into agree/disagree, which is actually more conservative
# this is shown here:
# 10x possibilities with uneven probabilities = N=75
N2.cohen.kappa(c(0.025, 0.075, 0.01, 0.025, 0.075, 0.02, 0.08, 0.09, 0.1, 0.5), k1=0.78, k0=0.6)
# 2x possibilities (binary outcome) = N=105
# To be safe, this is the one we go with (oversampling)
# expected value taken from average of lead author's past studies, which have been published
# Neadle et al., 2017 and Neadle et al., 2020
N.cohen.kappa(0.5, 0.5, 0.78, 0.6, alpha=0.05, 
              power=0.8, twosided=FALSE)
## User beware then, if you have many categories (>10) you will have to oversample ##

###### sub-category (medium grain) ######
#calculate probability of each behaviour
pSubCatT <- prop.table(table(simulated_data$sub_category))
#divide by 2 as the dummys mean there is a 50% chance of the behaviour not being present
probability_sub_cat <- as.data.frame(pSubCatT/2)
#check that this is the case
sum(probability_sub_cat$Freq)
#characterise variable to allow no behaviour to be added
probability_sub_cat$Var1 <- as.character(probability_sub_cat$Var1)
#add new row in
probability_sub_cat <- rbind(probability_sub_cat, no_behav_vector, stringsAsFactors=FALSE)
#refactorise var1
probability_sub_cat$Var1 <- as.factor(probability_sub_cat$Var1)
probability_sub_cat$Freq <- as.numeric(probability_sub_cat$Freq)


#check probability = 1
sum(probability_sub_cat$Freq)
#print for power calculation
paste(as.character(probability_sub_cat$Freq), collapse=", ")

#check sample size for sub-category (N=74)
N2.cohen.kappa(c(0.0685714285714286, 0.0864285714285714, 0.0628571428571429, 
                 0.0678571428571429, 0.0514285714285714, 0.055, 
                 0.0528571428571429, 0.055, 0.5), k1=0.78, k0=0.6)

###### category (coarse grain) ######
#calculate probability of each behaviour
pCatT <- prop.table(table(simulated_data$category))
#divide by 2 as the dummys mean there is a 50% chance of the behaviour not being present
probability_cat <- as.data.frame(pCatT/2)
#check that this is the case
sum(probability_cat$Freq)
#characterise variable to allow no behaviour to be added
probability_cat$Var1 <- as.character(probability_cat$Var1)
#add new row in
probability_cat <- rbind(probability_cat, no_behav_vector, stringsAsFactors=FALSE)
#refactorise var1
probability_cat$Var1 <- as.factor(probability_cat$Var1)
probability_cat$Freq <- as.numeric(probability_cat$Freq)


#check probability = 1
sum(probability_cat$Freq)
#print for power calculation
paste(as.character(probability_cat$Freq), collapse=", ")

#check sample size for category (N=85)
N2.cohen.kappa(c(0.285714285714286, 0.214285714285714, 0.5), k1=0.78, k0=0.6)

###### handedness ######
#calculate probability of each behaviour
pHandT <- prop.table(table(simulated_data$handedness))
#divide by 2 as the dummys mean there is a 50% chance of the behaviour not being present
probability_hand <- as.data.frame(pHandT/2)
#check that this is the case
sum(probability_hand$Freq)
#characterise variable to allow no behaviour to be added
probability_hand$Var1 <- as.character(probability_hand$Var1)
#add new row in
probability_hand <- rbind(probability_hand, no_behav_vector, stringsAsFactors=FALSE)
#refactorise var1
probability_hand$Var1 <- as.factor(probability_hand$Var1)
probability_hand$Freq <- as.numeric(probability_hand$Freq)


#check probability = 1
sum(probability_hand$Freq)
#print for power calculation
paste(as.character(probability_hand$Freq), collapse=", ")

#check sample size for handedness (N=83)
N2.cohen.kappa(c(0.239285714285714, 0.260714285714286, 0.5), k1=0.78, k0=0.6)

###### sociality ######
#calculate probability of each behaviour
pSocT <- prop.table(table(simulated_data$sociality))
#divide by 2 as the dummys mean there is a 50% chance of the behaviour not being present
probability_sociality <- as.data.frame(pSocT/2)
#check that this is the case
sum(probability_sociality$Freq)
#characterise variable to allow no behaviour to be added
probability_sociality$Var1 <- as.character(probability_sociality$Var1)
#add new row in
probability_sociality <- rbind(probability_sociality, no_behav_vector, stringsAsFactors=FALSE)
#refactorise var1
probability_sociality$Var1 <- as.factor(probability_sociality$Var1)
probability_sociality$Freq <- as.numeric(probability_sociality$Freq)


#check probability = 1
sum(probability_sociality$Freq)
#print for power calculation
paste(as.character(probability_sociality$Freq), collapse=", ")

#check sample size for sub-category (N=83)
N2.cohen.kappa(c(0.257857142857143, 0.242142857142857, 0.5), k1=0.78, k0=0.6)

##### Continuous variables #####
###### time start ######

# Easier to calculate required power for ICC, which is used for continous outcomes
# and produces a result [[1]] but also to see the effect that varying these has [[2]]
# on the required power
# this function needs to be handed the minimum acceptable agreement (p0), and the actual expected agreement (p)
# this is set at p0=.6 (advised by Cohen) - the closer the expected agreement is to this the higher the required sample
# P should be set based on previous experience with coders and/or similar values from literature
# here set at 0.78 gives value of N=65

calculateIccSampleSize(p=0.78,p0=0.60,k=2,alpha=0.05,tails=2,power=0.80,by="p",step=0.05)

####### duration #######

# Same procedure as above and thus same result
calculateIccSampleSize(p=0.78,p0=0.60,k=2,alpha=0.05,tails=2,power=0.80,by="p",step=0.05)

#### Selecting the clips ####

# First, check the average length of the 'live' clips to know how long the 'dummy' clips should be
# in this example, this was set when simulating the data at 25 (actual M=24.63)
mean(simulated_data$duration_sec)

# Assign behaviour ID to each observation
simulated_data <- tibble::rowid_to_column(simulated_data, "Behaviour_ID")

# Select sample for reliability
# size depends on calculations above - select largest number and insert into size argument
# needs to be 50% of that number, if odd round up, to allow for dummy clips
reliability_sample_size <- ceiling(105/2)

reliability_sample <- sample (c((min(simulated_data$Behaviour_ID)):max(simulated_data$Behaviour_ID)), 
                           size=reliability_sample_size, replace =F)
reliability_sample <- sort(reliability_sample, decreasing = F)
#print to hand to slice argument
paste(as.character(reliability_sample), collapse=", ")
#subset the sample
#replace the numbers in this argument with the result of the paste command ^
reliability_master <- simulated_data %>% slice(21, 36, 77, 80, 86, 90, 91, 99, 114, 116, 118, 
                                               121, 126, 130, 138, 140, 155, 165, 183, 184, 204, 
                                               207, 209, 218, 224, 283, 297, 300, 317, 348, 371, 
                                               387, 390, 391, 397, 413, 417, 418, 427, 451, 452, 
                                               461, 468, 494, 522, 554, 558, 601, 629, 630, 659, 
                                               673, 680)
#### Adding in 'dummy' clips ####
dummy_row <- data.frame(99999, "None", "Dummy",
                        "Dummy", "Dummy", 999999, 
                        999999, 999999, "Dummy", 
                        "Dummy")
names(dummy_row) <- c("Behaviour_ID", "oranzee_ID", "category",
                       "sub_category", "behaviour", "time_start_sec", 
                       "time_end_sec", "duration_sec", "handedness", 
                       "sociality")
reliability_master_inc_dummy <- rbind(reliability_master, dummy_row)

#duplicate the dummy entry by number required - 1
reliability_master_inc_dummy <- rbind(reliability_master_inc_dummy, reliability_master_inc_dummy[rep((reliability_sample_size+1), 
                                                                     reliability_sample_size-1), ])
#fix row names
rownames(reliability_master_inc_dummy) <- 1:106

####Preparing data for second coder####


# the number of random numbers to generate = number of observations
total_reliability_files <- length(reliability_master_inc_dummy$Behaviour_ID)
# assign random numbers to name clips
random_behav_ID <- sample (c(1:max(total_reliability_files)), 
                           size=total_reliability_files, replace =F)

# add random ID to dataset and put in position 1 and then order by it for matching up
reliability_master_inc_dummy$random_behav_ID <- random_behav_ID
reliability_master_inc_dummy <- reliability_master_inc_dummy[ , c(11,1,2,3,4,5,6,7,8,9,10)]
reliability_master_inc_dummy <- reliability_master_inc_dummy[order(reliability_master_inc_dummy$random_behav_ID),]

####Intra rater reliability check####
#Blank spreadsheet for main coder
##remove all values
reliability_for_intra_rater <- reliability_master_inc_dummy
reliability_for_intra_rater[which(!is.na(reliability_for_intra_rater), arr.ind = TRUE)] <- NA
reliability_for_intra_rater_blank <- reliability_for_intra_rater
reliability_for_intra_rater_blank[is.na(reliability_for_intra_rater_blank)] <- ""  
reliability_for_intra_rater_blank$random_behav_ID <- reliability_master_inc_dummy$random_behav_ID
rownames(reliability_for_intra_rater_blank) <- 1:106

#create data entry form for coder (can also do in excel this is Rshiny)
write.csv(reliability_for_intra_rater_blank, "reliability_for_intra_rater_blank.csv", row.names=FALSE, na="")
#this code stands alone until line of ### it uses the blank template from the previous row - edit as required
#don't forget to edit the rShiny function

#IMPORTANT: if using this function, the second coder MUST use the SAVE SELECTION TO FILE option!
#you will need to edit these items to include your files, if you need more or less,
#add and delete as required by copying and pasting or simply deleting
#ensure that you 
uni_oranzee <- paste(unique(as.character(simulated_data$oranzee_ID, collapse=", ")))
uni_cat <- paste(unique(as.character(simulated_data$category, collapse=", ")))
uni_sub_cat <- paste(unique(as.character(simulated_data$sub_category, collapse=", ")))
uni_behav <- paste(unique(as.character(simulated_data$behaviour, collapse=", ")))
uni_hand <- paste(unique(as.character(simulated_data$handedness, collapse=", ")))
uni_soc <- paste(unique(as.character(simulated_data$sociality, collapse=", ")))
instruction <- 'https://i.ibb.co/Rpcm2Th/correct-incorrect.png'

# [EWL 2025-04-16] Some users of RStudio report that this function may produce an error when it is  first run.
# For these users this function works when run a second time. 
# Currently we cannot tell if this is an issue with a particular version of RStudio (we are unable to reproduce the error in other code editors)
reliability_for_intra_rater <- data_edit(reliability_for_intra_rater_blank,
                                       col_options = list(oranzee_ID = uni_oranzee, category = uni_cat, sub_category = uni_sub_cat,
                                                          behaviour = uni_behav, handedness = uni_hand, sociality = uni_soc),
                                       col_edit = F,
                                       row_edit = F,
                                       row_names = F,
                                       col_stretch = T,
                                       viewer = "d",
                                       viewer_height = 800,
                                       viewer_width = 1200,
                                       title = "# To save, please use the SAVE SELECTION TO FILE option - left NOT right ",
                                       logo = instruction,
                                       logo_size = 85,
                                       logo_side = "right",
                                       write_fun = "write.csv",
                                       code = "S2_Neadle_et_al_Reliability_coding_second_script.R",
                                       save_as = "reliability_for_intra_rater_completed.csv")


#####Simulate Intra-Rater Data#####
#Creating main coder's intra data
reliability_for_intra_rater <- reliability_master_inc_dummy
reliability_for_intra_rater <- reliability_for_intra_rater[,-c(2)]
# this example simulates a main coder with 90% accuracy, this is arbitary and for simulation purposes only
intra_accuracy <- length(reliability_master_inc_dummy$Behaviour_ID) - round(90*(length(reliability_master_inc_dummy$Behaviour_ID))/100, 0)
#these are the (random) 11 behaviours the main coder coded differently second time round
reliability_for_intra_rater_errors <- reliability_master_inc_dummy[sample(1:nrow(reliability_master_inc_dummy), intra_accuracy), ] 
#this code makes random systematic errors in the data to simulate disagreement
stored_behav_ID <- reliability_for_intra_rater_errors$random_behav_ID
reliability_for_intra_rater_rand_errors <- reliability_master_inc_dummy %>% slice_sample(n = intra_accuracy, replace = T)
reliability_for_intra_rater_rand_errors$random_behav_ID <- stored_behav_ID
reliability_for_intra_rater_rand_errors <- reliability_for_intra_rater_rand_errors[,-c(2)]
ind <- match(reliability_for_intra_rater_rand_errors$random_behav_ID, reliability_for_intra_rater$random_behav_ID)
reliability_for_intra_rater[ind, 2:10] <- reliability_for_intra_rater_rand_errors[2:10]
#Data with simulated errors
head(reliability_for_intra_rater)

#####Intra-rater reliability tests#####
######Categorical tests######
intra_reliability_tests <- data.frame(matrix(ncol = 10 , nrow = 106))
colnames(intra_reliability_tests) <- c('category_main', 'category_second', 'sub_category_main', 
                                 'sub_category_second', 'behaviour_main', 'behaviour_second',
                                 'handedness_main', 'handedness_second', 'sociality_main',
                                 'sociality_second')
#Category
intra_reliability_tests$category_main <- c(reliability_master_inc_dummy$category)
intra_reliability_tests$category_second <- c(reliability_for_intra_rater$category)
kappa2(intra_reliability_tests[,1:2])


#Sub-category
intra_reliability_tests$sub_category_main <- c(reliability_master_inc_dummy$sub_category)
intra_reliability_tests$sub_category_second <- c(reliability_for_intra_rater$sub_category)
kappa2(intra_reliability_tests[,3:4])
agree(intra_reliability_tests[,3:4])
sub_cat_matrix <- confusionMatrix(data = intra_reliability_tests$sub_category_second, 
                                  reference = intra_reliability_tests$sub_category_main)
sub_cat_matrix_table <-as.table(sub_cat_matrix)
write.table(sub_cat_matrix_table, file = "sub_cat_matrix_table.txt", sep = ",", quote = FALSE, row.names = T)


#Behaviour
intra_reliability_tests$behaviour_main <- c(reliability_master_inc_dummy$behaviour)
intra_reliability_tests$behaviour_second <- c(reliability_for_intra_rater$behaviour)
kappa2(intra_reliability_tests[,5:6])


#Handedness
intra_reliability_tests$handedness_main <- c(reliability_master_inc_dummy$handedness)
intra_reliability_tests$handedness_second <- c(reliability_for_intra_rater$handedness)
kappa2(intra_reliability_tests[,7:8])


#Sociality
intra_reliability_tests$sociality_main <- c(reliability_master_inc_dummy$sociality)
intra_reliability_tests$sociality_second <- c(reliability_for_intra_rater$sociality)
kappa2(intra_reliability_tests[,9:10])


######Continuous tests######
#######Start time example ######
#fine grain
intra_reliability_tests$start_fine_main <- c(reliability_master_inc_dummy$time_start_sec)
intra_reliability_tests$start_fine_second <- c(reliability_for_intra_rater$time_start_sec)
icc(intra_reliability_tests[,11:12], model = "oneway", type = 'consistency', unit = 'single')

#coarse grain
#compare first and second coders, and remove negative signs
intra_reliability_tests$start_cont_diff <- intra_reliability_tests$start_fine_main - intra_reliability_tests$start_fine_second
#vector to store loop output
start_agree_binary <- c()
#loop to determine if second and first agree within a margin of error (5s)
for (i in 1:length(intra_reliability_tests$start_cont_diff)) {
  if(intra_reliability_tests$start_cont_diff[i] > 5){ 
    start_agree_binary <- append(start_agree_binary, "too low")
  } 
  else if(intra_reliability_tests$start_cont_diff[i] < -5){
    start_agree_binary <- append(start_agree_binary, "too high")
  }
  else {
    start_agree_binary <- append(start_agree_binary, "Agree")
  }
}

#import second agreement data
intra_reliability_tests$start_agree_binary_second <- as.factor(start_agree_binary)

#compare agreement
start_agree_binary_main <- c()
for (i in 1:length(intra_reliability_tests$start_agree_binary_second)) {
  if(intra_reliability_tests$start_agree_binary_second[i] == "too high"){ 
    start_agree_binary_main <- append(start_agree_binary_main, "too low")
  } 
  else if(intra_reliability_tests$start_agree_binary_second[i] == "too low"){
    start_agree_binary_main <- append(start_agree_binary_main, "too high")
  }
  else {
    start_agree_binary_main <- append(start_agree_binary_main, "Agree")
  }
}
#import main agreement data
intra_reliability_tests$start_agree_binary_main <- as.factor(start_agree_binary_main)

#check kappa
kappa2(intra_reliability_tests[,14:15])
agree(intra_reliability_tests[,14:15])


#######End time######
#fine grain
intra_reliability_tests$end_fine_main <- c(reliability_master_inc_dummy$time_end_sec)
intra_reliability_tests$end_fine_second <- c(reliability_for_intra_rater$time_end_sec)
icc(intra_reliability_tests[,16:17], model = "oneway", type = 'consistency', unit = 'single')


###Back to second coder###
#Blank spreadsheet for second coder
##remove all values
reliability_for_coder <- reliability_master_inc_dummy
reliability_for_coder[which(!is.na(reliability_for_coder), arr.ind = TRUE)] <- NA
reliability_for_coder_blank <- reliability_for_coder
reliability_for_coder_blank[is.na(reliability_for_coder_blank)] <- ""  
reliability_for_coder_blank$random_behav_ID <- reliability_master_inc_dummy$random_behav_ID
rownames(reliability_for_coder) <- 1:106

#create data entry form for coder (can also do in excel this is Rshiny)
write.csv(reliability_for_coder_blank, "reliability_for_coder_blank.csv", row.names=FALSE, na="")
#this code stands alone until line of ### it uses the blank template from the previous row - edit as required
#don't forget to edit the rShiny function

#IMPORTANT: if using this function, the second coder MUST use the SAVE SELECTION TO FILE option!
#you will need to edit these items to include your files, if you need more or less,
#add and delete as required by copying and pasting or simply deleting
#ensure that you 
uni_oranzee <- paste(unique(as.character(simulated_data$oranzee_ID, collapse=", ")))
uni_cat <- paste(unique(as.character(simulated_data$category, collapse=", ")))
uni_sub_cat <- paste(unique(as.character(simulated_data$sub_category, collapse=", ")))
uni_behav <- paste(unique(as.character(simulated_data$behaviour, collapse=", ")))
uni_hand <- paste(unique(as.character(simulated_data$handedness, collapse=", ")))
uni_soc <- paste(unique(as.character(simulated_data$sociality, collapse=", ")))
instruction <- 'https://i.ibb.co/Rpcm2Th/correct-incorrect.png'
reliability_coding_second <- data_edit(reliability_for_coder_blank,
                                      col_options = list(oranzee_ID = uni_oranzee, category = uni_cat, sub_category = uni_sub_cat,
                                      behaviour = uni_behav, handedness = uni_hand, sociality = uni_soc),
                                      col_edit = F,
                                      row_edit = F,
                                      row_names = F,
                                      col_stretch = T,
                                      viewer = "d",
                                      viewer_height = 800,
                                      viewer_width = 1200,
                                      title = "# To save, please use the SAVE SELECTION TO FILE option - left NOT right ",
                                      logo = instruction,
                                      logo_size = 85,
                                      logo_side = "right",
                                      write_fun = "write.csv",
                                      code = "S2_Neadle_et_al_Reliability_coding_second_script.R",
                                      save_as = "reliability_for_coder_completed.csv")

################################################################################

#### Read in second coder's data ####
#####Excel#####
#read in second coders data if made in excel and saved as CSV "reliability_for_coder_completed_excel.csv"
reliability_for_coder_header <- reliability_for_coder %>% slice_sample(n = 1)
reliability_coding_second <- rbind(reliability_for_coder_header, read.csv("reliability_for_coder_completed_excel.csv"))
reliability_coding_second <- reliability_for_coder_completed[-c(1),] 
#####RShiny#####
#read in second coders data if made using R shiny and sent
reliability_coding_second <- read.csv("reliability_for_coder_completed.csv")
reliability_coding_second <- reliability_for_coder_completed[,-c(1)] 


#### Simulating second coder ####
#Creating second coders data
reliability_coding_second <- reliability_master_inc_dummy
reliability_coding_second <- reliability_coding_second[,-c(2)]
# this example simulates a second coder with 65% accuracy, this is arbitary and for simulation purposes only
accuracy <- length(reliability_master_inc_dummy$Behaviour_ID) - round(65*(length(reliability_master_inc_dummy$Behaviour_ID))/100, 0)
#these are the (random) 37 behaviours the second coder made an error on
reliability_second_errors <- reliability_master_inc_dummy[sample(1:nrow(reliability_master_inc_dummy), accuracy), ] 
#this code makes random systematic errors in the data to simulate disagreement
stored_behav_ID <- reliability_second_errors$random_behav_ID
reliability_second_errors_rand_errors <- reliability_master_inc_dummy %>% slice_sample(n = accuracy, replace = T)
reliability_second_errors_rand_errors$random_behav_ID <- stored_behav_ID
reliability_second_errors_rand_errors <- reliability_second_errors_rand_errors[,-c(2)]
ind <- match(reliability_second_errors_rand_errors$random_behav_ID, reliability_coding_second$random_behav_ID)
reliability_coding_second[ind, 2:10] <- reliability_second_errors_rand_errors[2:10]
#Data with simulated errors
head(reliability_coding_second)


####Reliability tests####
#####Categorical tests#####
reliability_tests <- data.frame(matrix(ncol = 10 , nrow = 106))
colnames(reliability_tests) <- c('category_main', 'category_second', 'sub_category_main', 
                                'sub_category_second', 'behaviour_main', 'behaviour_second',
                                'handedness_main', 'handedness_second', 'sociality_main',
                                'sociality_second')
#Category
reliability_tests$category_main <- c(reliability_master_inc_dummy$category)
reliability_tests$category_second <- c(reliability_coding_second$category)
kappa2(reliability_tests[,1:2])


#Sub-category
reliability_tests$sub_category_main <- c(reliability_master_inc_dummy$sub_category)
reliability_tests$sub_category_second <- c(reliability_coding_second$sub_category)
kappa2(reliability_tests[,3:4])
agree(reliability_tests[,3:4])
sub_cat_matrix <- confusionMatrix(data = reliability_tests$sub_category_second, 
                                    reference = reliability_tests$sub_category_main)
sub_cat_matrix_table <-as.table(sub_cat_matrix)
write.table(sub_cat_matrix_table, file = "sub_cat_matrix_table.txt", sep = ",", quote = FALSE, row.names = T)


#Behaviour
reliability_tests$behaviour_main <- c(reliability_master_inc_dummy$behaviour)
reliability_tests$behaviour_second <- c(reliability_coding_second$behaviour)
kappa2(reliability_tests[,5:6])


#Handedness
reliability_tests$handedness_main <- c(reliability_master_inc_dummy$handedness)
reliability_tests$handedness_second <- c(reliability_coding_second$handedness)
kappa2(reliability_tests[,7:8])


#Sociality
reliability_tests$sociality_main <- c(reliability_master_inc_dummy$sociality)
reliability_tests$sociality_second <- c(reliability_coding_second$sociality)
kappa2(reliability_tests[,9:10])


#####Continuous tests#####
######Start time example #####
#fine grain
reliability_tests$start_fine_main <- c(reliability_master_inc_dummy$time_start_sec)
reliability_tests$start_fine_second <- c(reliability_coding_second$time_start_sec)
icc(reliability_tests[,11:12], model = "oneway", type = 'consistency', unit = 'single')

#coarse grain
#compare first and second coders, and remove negative signs
reliability_tests$start_cont_diff <- reliability_tests$start_fine_main - reliability_tests$start_fine_second
#vector to store loop output
start_agree_binary <- c()
#loop to determine if second and first agree within a margin of error (5s)
for (i in 1:length(reliability_tests$start_cont_diff)) {
  if(reliability_tests$start_cont_diff[i] > 5){ 
    start_agree_binary <- append(start_agree_binary, "too low")
  } 
  else if(reliability_tests$start_cont_diff[i] < -5){
    start_agree_binary <- append(start_agree_binary, "too high")
  }
  else {
    start_agree_binary <- append(start_agree_binary, "Agree")
  }
}

#import second agreement data
reliability_tests$start_agree_binary_second <- as.factor(start_agree_binary)

#compare agreement
start_agree_binary_main <- c()
for (i in 1:length(reliability_tests$start_agree_binary_second)) {
  if(reliability_tests$start_agree_binary_second[i] == "too high"){ 
    start_agree_binary_main <- append(start_agree_binary_main, "too low")
  } 
  else if(reliability_tests$start_agree_binary_second[i] == "too low"){
    start_agree_binary_main <- append(start_agree_binary_main, "too high")
  }
  else {
    start_agree_binary_main <- append(start_agree_binary_main, "Agree")
  }
}
#import main agreement data
reliability_tests$start_agree_binary_main <- as.factor(start_agree_binary_main)

#check kappa
kappa2(reliability_tests[,14:15])
agree(reliability_tests[,14:15])