#------------------------------------------------------------------------------- # INTERRATER RELIABILITY IN OBSERVATIONAL RESEARCH #------------------------------------------------------------------------------- # Neadle, Beck & Tennie (2021) #------------------------------------------------------------------------------- # Written by: # Damien Neadle # Psychology Department, Birmingham City University #------------------------------------------------------------------------------- # Load required packages library(plyr) library(dplyr) library(irr) library(ICC.Sample.Size) library(DataEditR) library(caret) #### Simulating the dataset #### #NB, if you are using your own data, you do not need to use this section #skip to sample size calculations ##### making social data ##### # Make the subjects oranzee_ID <- rep(LETTERS[1:20], each = 20) # need to make seperately then bind because different numbers of behaviours category <- rep(1, times = 400) summary(category) set.seed(1) # ensure numbers don't change for replication # set behavioural subcategories - 1-4 sub_category <- floor(runif(400, min=1, max=5)) summary(sub_category) # set individual behaviours - 1-6 behaviour <- floor(runif(400, min=1, max=7)) summary(behaviour) # set time start time_start_sec <- floor(runif(400, min=1, max=600)) summary(time_start_sec) # set duration duration_sec <- rpois(n = 400, lambda = 25) summary(duration_sec) # set handedness handedness <- floor(runif(400, min=1, max=3)) summary(handedness) # set sociality sociality <- floor(runif(400, min=1, max=3)) summary(sociality) # Bind together simulated_data_social <- data.frame(oranzee_ID, category, sub_category, behaviour, time_start_sec, duration_sec, handedness, sociality) head(simulated_data_social) ##### making food-related data ##### # varying the number of observations to make it more realistic # Make the subjects oranzee_ID <- rep(LETTERS[1:20], each = 15) ## making social data # need to make seperately then bind because different numbers of behaviours category <- rep(2, times = 300) summary(category) # set behavioural subcategories - 1-4 sub_category <- floor(runif(300, min=5, max=9)) summary(sub_category) # set individual behaviours - 1-4 behaviour <- floor(runif(300, min=1, max=5)) summary(behaviour) # set time start time_start_sec <- floor(runif(300, min=1, max=600)) summary(time_start_sec) # set duration duration_sec <- rpois(n = 300, lambda = 25) summary(duration_sec) # set handedness handedness <- floor(runif(300, min=1, max=3)) summary(handedness) # set sociality sociality <- floor(runif(300, min=1, max=3)) summary(sociality) # Bind food dataset simulated_data_food <- data.frame(oranzee_ID, category, sub_category, behaviour, time_start_sec, duration_sec, handedness, sociality) head(simulated_data_food) ##### finalising datasets ##### # bind together 2 datasets simulated_data <- bind_rows(simulated_data_social, simulated_data_food) # Apply value labels simulated_data$category <- factor(simulated_data$category, levels = c(1,2), labels = c("Social", "Food-related")) simulated_data$sub_category <- factor(simulated_data$sub_category, levels = c(1,2,3,4,5,6,7,8), labels = c("Play", "Display", "Groom", "Courtship", "Fruit-Hammer Foraging", "Stick-Based Foraging", "Anvil Smash", "Rolling Pin Techniques")) Play<-subset(simulated_data, sub_category=="Play") Play$behaviour <- factor(Play$behaviour, levels = c(1,2,3,4,5,6), labels = c("Fruit-Missile", "Slap-Fight", "Air-Split", "Leaf-Mask", "Whistle", "Pebble-Tease")) Display<-subset(simulated_data, sub_category=="Display") Display$behaviour <- factor(Display$behaviour, levels = c(1,2,3,4,5,6), labels = c("Stone Drop", "Branch Pull-Release", "Arm-Cross", "Two-Hand-Drum", "Splash", "Arm-Swing")) Groom<-subset(simulated_data, sub_category=="Groom") Groom$behaviour <- factor(Groom$behaviour, levels = c(1,2,3,4,5,6), labels = c("Tool Back-Scratcher", "Hand Back-Scratcher", "Tongue-Bathe", "Tooth-Pick", "Dirt-Shower", "Ant-Shower")) Courtship<-subset(simulated_data, sub_category=="Courtship") Courtship$behaviour <- factor(Courtship$behaviour, levels = c(1,2,3,4,5,6), labels = c("Flower-Offer", "Hand-Stand", "Rope-Swing", "Leaf-Fan", "Wreath-Clutch", "Ear-Pull")) Fruit_Hammer<-subset(simulated_data, sub_category=="Fruit-Hammer Foraging") Fruit_Hammer$behaviour <- factor(Fruit_Hammer$behaviour, levels = c(1,2,3,4), labels = c("Wood-Wood", "Wood-Stone", "Stone-Wood", "Stone-Stone")) Stick_based<-subset(simulated_data, sub_category=="Stick-Based Foraging") Stick_based$behaviour <- factor(Stick_based$behaviour, levels = c(1,2,3,4), labels = c("Stick-Throw V", "Stick-Throw A", "Fish-Stab", "Hedgehog-Flick")) Anvil_Smash<-subset(simulated_data, sub_category=="Anvil Smash") Anvil_Smash$behaviour <- factor(Anvil_Smash$behaviour, levels = c(1,2,3,4), labels = c("Anvil-Smash S", "Anvil-Smash W", "Smash-Ground", "Drop-Ground")) Rolling_pin<-subset(simulated_data, sub_category=="Rolling Pin Techniques") Rolling_pin$behaviour <- factor(Rolling_pin$behaviour, levels = c(1,2,3,4), labels = c("Rolling-Wood", "Rolling-Stone", "Rolling-Bone", "Rolling-Other")) #Bind value label data simulated_data <- bind_rows(Play, Display, Groom, Courtship, Fruit_Hammer, Stick_based, Anvil_Smash, Rolling_pin) #continue adding value labels to final variables simulated_data$handedness <- factor(simulated_data$handedness, levels = c(1,2), labels = c("Left", "Right")) simulated_data$sociality <- factor(simulated_data$sociality, levels = c(1,2), labels = c("Asocial", "Social")) # create time-end variable and move to logical place simulated_data$time_end_sec <- simulated_data$time_start_sec + simulated_data$duration_sec simulated_data <- simulated_data[ , c(1,2,3,4,5,9,6,7,8)] # organise datasets into logical order (subject then timestart) simulated_data <- simulated_data[order(simulated_data$oranzee_ID, simulated_data$time_start_sec),] unique(simulated_data$oranzee_ID) #### Sample size calculations #### ##### Categorical variables ##### ###### behaviour (finest grain) ###### #calculate probability of each behaviour pBehavT <- prop.table(table(simulated_data$behaviour)) #divide by 2 as the dummys mean there is a 50% chance of the behaviour not being present probability_behaviours <- as.data.frame(pBehavT/2) #check that this is the case sum(probability_behaviours$Freq) #characterise variable to allow no behaviour to be added probability_behaviours$Var1 <- as.character(probability_behaviours$Var1) #new row vector no_behav_vector <- c("No Behaviour", 0.5) #add new row in probability_behaviours <- rbind(probability_behaviours, no_behav_vector, stringsAsFactors=FALSE) #refactorise var1 probability_behaviours$Var1 <- as.factor(probability_behaviours$Var1) probability_behaviours$Freq <- as.numeric(probability_behaviours$Freq) #check probability = 1 sum(probability_behaviours$Freq) #print for power calculation paste(as.character(probability_behaviours$Freq), collapse=", ") #sample size required #irr only allows for 10 categories length(unique(probability_behaviours$Var1)) # As this data set has too many categories and it becomes meaningless to divide them # it is possible to devolve the calculation into agree/disagree, which is actually more conservative # this is shown here: # 10x possibilities with uneven probabilities = N=75 N2.cohen.kappa(c(0.025, 0.075, 0.01, 0.025, 0.075, 0.02, 0.08, 0.09, 0.1, 0.5), k1=0.78, k0=0.6) # 2x possibilities (binary outcome) = N=105 # To be safe, this is the one we go with (oversampling) # expected value taken from average of lead author's past studies, which have been published # Neadle et al., 2017 and Neadle et al., 2020 N.cohen.kappa(0.5, 0.5, 0.78, 0.6, alpha=0.05, power=0.8, twosided=FALSE) ## User beware then, if you have many categories (>10) you will have to oversample ## ###### sub-category (medium grain) ###### #calculate probability of each behaviour pSubCatT <- prop.table(table(simulated_data$sub_category)) #divide by 2 as the dummys mean there is a 50% chance of the behaviour not being present probability_sub_cat <- as.data.frame(pSubCatT/2) #check that this is the case sum(probability_sub_cat$Freq) #characterise variable to allow no behaviour to be added probability_sub_cat$Var1 <- as.character(probability_sub_cat$Var1) #add new row in probability_sub_cat <- rbind(probability_sub_cat, no_behav_vector, stringsAsFactors=FALSE) #refactorise var1 probability_sub_cat$Var1 <- as.factor(probability_sub_cat$Var1) probability_sub_cat$Freq <- as.numeric(probability_sub_cat$Freq) #check probability = 1 sum(probability_sub_cat$Freq) #print for power calculation paste(as.character(probability_sub_cat$Freq), collapse=", ") #check sample size for sub-category (N=74) N2.cohen.kappa(c(0.0685714285714286, 0.0864285714285714, 0.0628571428571429, 0.0678571428571429, 0.0514285714285714, 0.055, 0.0528571428571429, 0.055, 0.5), k1=0.78, k0=0.6) ###### category (coarse grain) ###### #calculate probability of each behaviour pCatT <- prop.table(table(simulated_data$category)) #divide by 2 as the dummys mean there is a 50% chance of the behaviour not being present probability_cat <- as.data.frame(pCatT/2) #check that this is the case sum(probability_cat$Freq) #characterise variable to allow no behaviour to be added probability_cat$Var1 <- as.character(probability_cat$Var1) #add new row in probability_cat <- rbind(probability_cat, no_behav_vector, stringsAsFactors=FALSE) #refactorise var1 probability_cat$Var1 <- as.factor(probability_cat$Var1) probability_cat$Freq <- as.numeric(probability_cat$Freq) #check probability = 1 sum(probability_cat$Freq) #print for power calculation paste(as.character(probability_cat$Freq), collapse=", ") #check sample size for category (N=85) N2.cohen.kappa(c(0.285714285714286, 0.214285714285714, 0.5), k1=0.78, k0=0.6) ###### handedness ###### #calculate probability of each behaviour pHandT <- prop.table(table(simulated_data$handedness)) #divide by 2 as the dummys mean there is a 50% chance of the behaviour not being present probability_hand <- as.data.frame(pHandT/2) #check that this is the case sum(probability_hand$Freq) #characterise variable to allow no behaviour to be added probability_hand$Var1 <- as.character(probability_hand$Var1) #add new row in probability_hand <- rbind(probability_hand, no_behav_vector, stringsAsFactors=FALSE) #refactorise var1 probability_hand$Var1 <- as.factor(probability_hand$Var1) probability_hand$Freq <- as.numeric(probability_hand$Freq) #check probability = 1 sum(probability_hand$Freq) #print for power calculation paste(as.character(probability_hand$Freq), collapse=", ") #check sample size for handedness (N=83) N2.cohen.kappa(c(0.239285714285714, 0.260714285714286, 0.5), k1=0.78, k0=0.6) ###### sociality ###### #calculate probability of each behaviour pSocT <- prop.table(table(simulated_data$sociality)) #divide by 2 as the dummys mean there is a 50% chance of the behaviour not being present probability_sociality <- as.data.frame(pSocT/2) #check that this is the case sum(probability_sociality$Freq) #characterise variable to allow no behaviour to be added probability_sociality$Var1 <- as.character(probability_sociality$Var1) #add new row in probability_sociality <- rbind(probability_sociality, no_behav_vector, stringsAsFactors=FALSE) #refactorise var1 probability_sociality$Var1 <- as.factor(probability_sociality$Var1) probability_sociality$Freq <- as.numeric(probability_sociality$Freq) #check probability = 1 sum(probability_sociality$Freq) #print for power calculation paste(as.character(probability_sociality$Freq), collapse=", ") #check sample size for sub-category (N=83) N2.cohen.kappa(c(0.257857142857143, 0.242142857142857, 0.5), k1=0.78, k0=0.6) ##### Continuous variables ##### ###### time start ###### # Easier to calculate required power for ICC, which is used for continous outcomes # and produces a result [[1]] but also to see the effect that varying these has [[2]] # on the required power # this function needs to be handed the minimum acceptable agreement (p0), and the actual expected agreement (p) # this is set at p0=.6 (advised by Cohen) - the closer the expected agreement is to this the higher the required sample # P should be set based on previous experience with coders and/or similar values from literature # here set at 0.78 gives value of N=65 calculateIccSampleSize(p=0.78,p0=0.60,k=2,alpha=0.05,tails=2,power=0.80,by="p",step=0.05) ####### duration ####### # Same procedure as above and thus same result calculateIccSampleSize(p=0.78,p0=0.60,k=2,alpha=0.05,tails=2,power=0.80,by="p",step=0.05) #### Selecting the clips #### # First, check the average length of the 'live' clips to know how long the 'dummy' clips should be # in this example, this was set when simulating the data at 25 (actual M=24.63) mean(simulated_data$duration_sec) # Assign behaviour ID to each observation simulated_data <- tibble::rowid_to_column(simulated_data, "Behaviour_ID") # Select sample for reliability # size depends on calculations above - select largest number and insert into size argument # needs to be 50% of that number, if odd round up, to allow for dummy clips reliability_sample_size <- ceiling(105/2) reliability_sample <- sample (c((min(simulated_data$Behaviour_ID)):max(simulated_data$Behaviour_ID)), size=reliability_sample_size, replace =F) reliability_sample <- sort(reliability_sample, decreasing = F) #print to hand to slice argument paste(as.character(reliability_sample), collapse=", ") #subset the sample #replace the numbers in this argument with the result of the paste command ^ reliability_master <- simulated_data %>% slice(21, 36, 77, 80, 86, 90, 91, 99, 114, 116, 118, 121, 126, 130, 138, 140, 155, 165, 183, 184, 204, 207, 209, 218, 224, 283, 297, 300, 317, 348, 371, 387, 390, 391, 397, 413, 417, 418, 427, 451, 452, 461, 468, 494, 522, 554, 558, 601, 629, 630, 659, 673, 680) #### Adding in 'dummy' clips #### dummy_row <- data.frame(99999, "None", "Dummy", "Dummy", "Dummy", 999999, 999999, 999999, "Dummy", "Dummy") names(dummy_row) <- c("Behaviour_ID", "oranzee_ID", "category", "sub_category", "behaviour", "time_start_sec", "time_end_sec", "duration_sec", "handedness", "sociality") reliability_master_inc_dummy <- rbind(reliability_master, dummy_row) #duplicate the dummy entry by number required - 1 reliability_master_inc_dummy <- rbind(reliability_master_inc_dummy, reliability_master_inc_dummy[rep((reliability_sample_size+1), reliability_sample_size-1), ]) #fix row names rownames(reliability_master_inc_dummy) <- 1:106 ####Preparing data for second coder#### # the number of random numbers to generate = number of observations total_reliability_files <- length(reliability_master_inc_dummy$Behaviour_ID) # assign random numbers to name clips random_behav_ID <- sample (c(1:max(total_reliability_files)), size=total_reliability_files, replace =F) # add random ID to dataset and put in position 1 and then order by it for matching up reliability_master_inc_dummy$random_behav_ID <- random_behav_ID reliability_master_inc_dummy <- reliability_master_inc_dummy[ , c(11,1,2,3,4,5,6,7,8,9,10)] reliability_master_inc_dummy <- reliability_master_inc_dummy[order(reliability_master_inc_dummy$random_behav_ID),] ####Intra rater reliability check#### #Blank spreadsheet for main coder ##remove all values reliability_for_intra_rater <- reliability_master_inc_dummy reliability_for_intra_rater[which(!is.na(reliability_for_intra_rater), arr.ind = TRUE)] <- NA reliability_for_intra_rater_blank <- reliability_for_intra_rater reliability_for_intra_rater_blank[is.na(reliability_for_intra_rater_blank)] <- "" reliability_for_intra_rater_blank$random_behav_ID <- reliability_master_inc_dummy$random_behav_ID rownames(reliability_for_intra_rater_blank) <- 1:106 #create data entry form for coder (can also do in excel this is Rshiny) write.csv(reliability_for_intra_rater_blank, "reliability_for_intra_rater_blank.csv", row.names=FALSE, na="") #this code stands alone until line of ### it uses the blank template from the previous row - edit as required #don't forget to edit the rShiny function #IMPORTANT: if using this function, the second coder MUST use the SAVE SELECTION TO FILE option! #you will need to edit these items to include your files, if you need more or less, #add and delete as required by copying and pasting or simply deleting #ensure that you uni_oranzee <- paste(unique(as.character(simulated_data$oranzee_ID, collapse=", "))) uni_cat <- paste(unique(as.character(simulated_data$category, collapse=", "))) uni_sub_cat <- paste(unique(as.character(simulated_data$sub_category, collapse=", "))) uni_behav <- paste(unique(as.character(simulated_data$behaviour, collapse=", "))) uni_hand <- paste(unique(as.character(simulated_data$handedness, collapse=", "))) uni_soc <- paste(unique(as.character(simulated_data$sociality, collapse=", "))) instruction <- 'https://i.ibb.co/Rpcm2Th/correct-incorrect.png' # [EWL 2025-04-16] Some users of RStudio report that this function may produce an error when it is first run. # For these users this function works when run a second time. # Currently we cannot tell if this is an issue with a particular version of RStudio (we are unable to reproduce the error in other code editors) reliability_for_intra_rater <- data_edit(reliability_for_intra_rater_blank, col_options = list(oranzee_ID = uni_oranzee, category = uni_cat, sub_category = uni_sub_cat, behaviour = uni_behav, handedness = uni_hand, sociality = uni_soc), col_edit = F, row_edit = F, row_names = F, col_stretch = T, viewer = "d", viewer_height = 800, viewer_width = 1200, title = "# To save, please use the SAVE SELECTION TO FILE option - left NOT right ", logo = instruction, logo_size = 85, logo_side = "right", write_fun = "write.csv", code = "S2_Neadle_et_al_Reliability_coding_second_script.R", save_as = "reliability_for_intra_rater_completed.csv") #####Simulate Intra-Rater Data##### #Creating main coder's intra data reliability_for_intra_rater <- reliability_master_inc_dummy reliability_for_intra_rater <- reliability_for_intra_rater[,-c(2)] # this example simulates a main coder with 90% accuracy, this is arbitary and for simulation purposes only intra_accuracy <- length(reliability_master_inc_dummy$Behaviour_ID) - round(90*(length(reliability_master_inc_dummy$Behaviour_ID))/100, 0) #these are the (random) 11 behaviours the main coder coded differently second time round reliability_for_intra_rater_errors <- reliability_master_inc_dummy[sample(1:nrow(reliability_master_inc_dummy), intra_accuracy), ] #this code makes random systematic errors in the data to simulate disagreement stored_behav_ID <- reliability_for_intra_rater_errors$random_behav_ID reliability_for_intra_rater_rand_errors <- reliability_master_inc_dummy %>% slice_sample(n = intra_accuracy, replace = T) reliability_for_intra_rater_rand_errors$random_behav_ID <- stored_behav_ID reliability_for_intra_rater_rand_errors <- reliability_for_intra_rater_rand_errors[,-c(2)] ind <- match(reliability_for_intra_rater_rand_errors$random_behav_ID, reliability_for_intra_rater$random_behav_ID) reliability_for_intra_rater[ind, 2:10] <- reliability_for_intra_rater_rand_errors[2:10] #Data with simulated errors head(reliability_for_intra_rater) #####Intra-rater reliability tests##### ######Categorical tests###### intra_reliability_tests <- data.frame(matrix(ncol = 10 , nrow = 106)) colnames(intra_reliability_tests) <- c('category_main', 'category_second', 'sub_category_main', 'sub_category_second', 'behaviour_main', 'behaviour_second', 'handedness_main', 'handedness_second', 'sociality_main', 'sociality_second') #Category intra_reliability_tests$category_main <- c(reliability_master_inc_dummy$category) intra_reliability_tests$category_second <- c(reliability_for_intra_rater$category) kappa2(intra_reliability_tests[,1:2]) #Sub-category intra_reliability_tests$sub_category_main <- c(reliability_master_inc_dummy$sub_category) intra_reliability_tests$sub_category_second <- c(reliability_for_intra_rater$sub_category) kappa2(intra_reliability_tests[,3:4]) agree(intra_reliability_tests[,3:4]) sub_cat_matrix <- confusionMatrix(data = intra_reliability_tests$sub_category_second, reference = intra_reliability_tests$sub_category_main) sub_cat_matrix_table <-as.table(sub_cat_matrix) write.table(sub_cat_matrix_table, file = "sub_cat_matrix_table.txt", sep = ",", quote = FALSE, row.names = T) #Behaviour intra_reliability_tests$behaviour_main <- c(reliability_master_inc_dummy$behaviour) intra_reliability_tests$behaviour_second <- c(reliability_for_intra_rater$behaviour) kappa2(intra_reliability_tests[,5:6]) #Handedness intra_reliability_tests$handedness_main <- c(reliability_master_inc_dummy$handedness) intra_reliability_tests$handedness_second <- c(reliability_for_intra_rater$handedness) kappa2(intra_reliability_tests[,7:8]) #Sociality intra_reliability_tests$sociality_main <- c(reliability_master_inc_dummy$sociality) intra_reliability_tests$sociality_second <- c(reliability_for_intra_rater$sociality) kappa2(intra_reliability_tests[,9:10]) ######Continuous tests###### #######Start time example ###### #fine grain intra_reliability_tests$start_fine_main <- c(reliability_master_inc_dummy$time_start_sec) intra_reliability_tests$start_fine_second <- c(reliability_for_intra_rater$time_start_sec) icc(intra_reliability_tests[,11:12], model = "oneway", type = 'consistency', unit = 'single') #coarse grain #compare first and second coders, and remove negative signs intra_reliability_tests$start_cont_diff <- intra_reliability_tests$start_fine_main - intra_reliability_tests$start_fine_second #vector to store loop output start_agree_binary <- c() #loop to determine if second and first agree within a margin of error (5s) for (i in 1:length(intra_reliability_tests$start_cont_diff)) { if(intra_reliability_tests$start_cont_diff[i] > 5){ start_agree_binary <- append(start_agree_binary, "too low") } else if(intra_reliability_tests$start_cont_diff[i] < -5){ start_agree_binary <- append(start_agree_binary, "too high") } else { start_agree_binary <- append(start_agree_binary, "Agree") } } #import second agreement data intra_reliability_tests$start_agree_binary_second <- as.factor(start_agree_binary) #compare agreement start_agree_binary_main <- c() for (i in 1:length(intra_reliability_tests$start_agree_binary_second)) { if(intra_reliability_tests$start_agree_binary_second[i] == "too high"){ start_agree_binary_main <- append(start_agree_binary_main, "too low") } else if(intra_reliability_tests$start_agree_binary_second[i] == "too low"){ start_agree_binary_main <- append(start_agree_binary_main, "too high") } else { start_agree_binary_main <- append(start_agree_binary_main, "Agree") } } #import main agreement data intra_reliability_tests$start_agree_binary_main <- as.factor(start_agree_binary_main) #check kappa kappa2(intra_reliability_tests[,14:15]) agree(intra_reliability_tests[,14:15]) #######End time###### #fine grain intra_reliability_tests$end_fine_main <- c(reliability_master_inc_dummy$time_end_sec) intra_reliability_tests$end_fine_second <- c(reliability_for_intra_rater$time_end_sec) icc(intra_reliability_tests[,16:17], model = "oneway", type = 'consistency', unit = 'single') ###Back to second coder### #Blank spreadsheet for second coder ##remove all values reliability_for_coder <- reliability_master_inc_dummy reliability_for_coder[which(!is.na(reliability_for_coder), arr.ind = TRUE)] <- NA reliability_for_coder_blank <- reliability_for_coder reliability_for_coder_blank[is.na(reliability_for_coder_blank)] <- "" reliability_for_coder_blank$random_behav_ID <- reliability_master_inc_dummy$random_behav_ID rownames(reliability_for_coder) <- 1:106 #create data entry form for coder (can also do in excel this is Rshiny) write.csv(reliability_for_coder_blank, "reliability_for_coder_blank.csv", row.names=FALSE, na="") #this code stands alone until line of ### it uses the blank template from the previous row - edit as required #don't forget to edit the rShiny function #IMPORTANT: if using this function, the second coder MUST use the SAVE SELECTION TO FILE option! #you will need to edit these items to include your files, if you need more or less, #add and delete as required by copying and pasting or simply deleting #ensure that you uni_oranzee <- paste(unique(as.character(simulated_data$oranzee_ID, collapse=", "))) uni_cat <- paste(unique(as.character(simulated_data$category, collapse=", "))) uni_sub_cat <- paste(unique(as.character(simulated_data$sub_category, collapse=", "))) uni_behav <- paste(unique(as.character(simulated_data$behaviour, collapse=", "))) uni_hand <- paste(unique(as.character(simulated_data$handedness, collapse=", "))) uni_soc <- paste(unique(as.character(simulated_data$sociality, collapse=", "))) instruction <- 'https://i.ibb.co/Rpcm2Th/correct-incorrect.png' reliability_coding_second <- data_edit(reliability_for_coder_blank, col_options = list(oranzee_ID = uni_oranzee, category = uni_cat, sub_category = uni_sub_cat, behaviour = uni_behav, handedness = uni_hand, sociality = uni_soc), col_edit = F, row_edit = F, row_names = F, col_stretch = T, viewer = "d", viewer_height = 800, viewer_width = 1200, title = "# To save, please use the SAVE SELECTION TO FILE option - left NOT right ", logo = instruction, logo_size = 85, logo_side = "right", write_fun = "write.csv", code = "S2_Neadle_et_al_Reliability_coding_second_script.R", save_as = "reliability_for_coder_completed.csv") ################################################################################ #### Read in second coder's data #### #####Excel##### #read in second coders data if made in excel and saved as CSV "reliability_for_coder_completed_excel.csv" reliability_for_coder_header <- reliability_for_coder %>% slice_sample(n = 1) reliability_coding_second <- rbind(reliability_for_coder_header, read.csv("reliability_for_coder_completed_excel.csv")) reliability_coding_second <- reliability_for_coder_completed[-c(1),] #####RShiny##### #read in second coders data if made using R shiny and sent reliability_coding_second <- read.csv("reliability_for_coder_completed.csv") reliability_coding_second <- reliability_for_coder_completed[,-c(1)] #### Simulating second coder #### #Creating second coders data reliability_coding_second <- reliability_master_inc_dummy reliability_coding_second <- reliability_coding_second[,-c(2)] # this example simulates a second coder with 65% accuracy, this is arbitary and for simulation purposes only accuracy <- length(reliability_master_inc_dummy$Behaviour_ID) - round(65*(length(reliability_master_inc_dummy$Behaviour_ID))/100, 0) #these are the (random) 37 behaviours the second coder made an error on reliability_second_errors <- reliability_master_inc_dummy[sample(1:nrow(reliability_master_inc_dummy), accuracy), ] #this code makes random systematic errors in the data to simulate disagreement stored_behav_ID <- reliability_second_errors$random_behav_ID reliability_second_errors_rand_errors <- reliability_master_inc_dummy %>% slice_sample(n = accuracy, replace = T) reliability_second_errors_rand_errors$random_behav_ID <- stored_behav_ID reliability_second_errors_rand_errors <- reliability_second_errors_rand_errors[,-c(2)] ind <- match(reliability_second_errors_rand_errors$random_behav_ID, reliability_coding_second$random_behav_ID) reliability_coding_second[ind, 2:10] <- reliability_second_errors_rand_errors[2:10] #Data with simulated errors head(reliability_coding_second) ####Reliability tests#### #####Categorical tests##### reliability_tests <- data.frame(matrix(ncol = 10 , nrow = 106)) colnames(reliability_tests) <- c('category_main', 'category_second', 'sub_category_main', 'sub_category_second', 'behaviour_main', 'behaviour_second', 'handedness_main', 'handedness_second', 'sociality_main', 'sociality_second') #Category reliability_tests$category_main <- c(reliability_master_inc_dummy$category) reliability_tests$category_second <- c(reliability_coding_second$category) kappa2(reliability_tests[,1:2]) #Sub-category reliability_tests$sub_category_main <- c(reliability_master_inc_dummy$sub_category) reliability_tests$sub_category_second <- c(reliability_coding_second$sub_category) kappa2(reliability_tests[,3:4]) agree(reliability_tests[,3:4]) sub_cat_matrix <- confusionMatrix(data = reliability_tests$sub_category_second, reference = reliability_tests$sub_category_main) sub_cat_matrix_table <-as.table(sub_cat_matrix) write.table(sub_cat_matrix_table, file = "sub_cat_matrix_table.txt", sep = ",", quote = FALSE, row.names = T) #Behaviour reliability_tests$behaviour_main <- c(reliability_master_inc_dummy$behaviour) reliability_tests$behaviour_second <- c(reliability_coding_second$behaviour) kappa2(reliability_tests[,5:6]) #Handedness reliability_tests$handedness_main <- c(reliability_master_inc_dummy$handedness) reliability_tests$handedness_second <- c(reliability_coding_second$handedness) kappa2(reliability_tests[,7:8]) #Sociality reliability_tests$sociality_main <- c(reliability_master_inc_dummy$sociality) reliability_tests$sociality_second <- c(reliability_coding_second$sociality) kappa2(reliability_tests[,9:10]) #####Continuous tests##### ######Start time example ##### #fine grain reliability_tests$start_fine_main <- c(reliability_master_inc_dummy$time_start_sec) reliability_tests$start_fine_second <- c(reliability_coding_second$time_start_sec) icc(reliability_tests[,11:12], model = "oneway", type = 'consistency', unit = 'single') #coarse grain #compare first and second coders, and remove negative signs reliability_tests$start_cont_diff <- reliability_tests$start_fine_main - reliability_tests$start_fine_second #vector to store loop output start_agree_binary <- c() #loop to determine if second and first agree within a margin of error (5s) for (i in 1:length(reliability_tests$start_cont_diff)) { if(reliability_tests$start_cont_diff[i] > 5){ start_agree_binary <- append(start_agree_binary, "too low") } else if(reliability_tests$start_cont_diff[i] < -5){ start_agree_binary <- append(start_agree_binary, "too high") } else { start_agree_binary <- append(start_agree_binary, "Agree") } } #import second agreement data reliability_tests$start_agree_binary_second <- as.factor(start_agree_binary) #compare agreement start_agree_binary_main <- c() for (i in 1:length(reliability_tests$start_agree_binary_second)) { if(reliability_tests$start_agree_binary_second[i] == "too high"){ start_agree_binary_main <- append(start_agree_binary_main, "too low") } else if(reliability_tests$start_agree_binary_second[i] == "too low"){ start_agree_binary_main <- append(start_agree_binary_main, "too high") } else { start_agree_binary_main <- append(start_agree_binary_main, "Agree") } } #import main agreement data reliability_tests$start_agree_binary_main <- as.factor(start_agree_binary_main) #check kappa kappa2(reliability_tests[,14:15]) agree(reliability_tests[,14:15])