--- title: "Numeral Classifiers in Yucatec Maya: Microvariation and syntactic change" subtitle: "Supplementary Material" author: "Barbara Blaha and Stavros Skopeteas" date: "September 28, 2024" output: pdf_document: highlight: tango toc: true toc_depth: 2 number_sections: true df_print: kable fig_width: 4 fig_height: 3 keep_tex: true html_document: default --- # Global ```{r include=FALSE} knitr::opts_chunk$set(message=FALSE,warning=FALSE,attr.source='.numberLines') knitr::opts_chunk$set( fig.width = 12, # Width of the figure in inches fig.height = 9, # Height of the figure in inches dpi = 700 # High resolution ) ``` ## Libraries data processing ```{r, attr.source='.numberLines'} library(readxl) library(writexl) library(reshape2) library(scales) library(dplyr) library(stringr) library(showtext)#fonts library(kableExtra) library(broom) ``` graphs and maps ```{r} library(ggplot2) library(ggmap) library(ggspatial) ``` statistics ```{r} library(mgcv) library(mgcViz) library(itsadug) ``` ## Settings ```{r} #setwd(dirname(rstudioapi::getActiveDocumentContext()$path)) theme_set(theme_bw()) #setting the size of annotations annosize = 6 #downloads google font for graphs font_add_google("Martel Sans", family = "special") showtext_auto() ``` ## Loading data ```{r} load(file='YUC-CL.rda') ``` # Base map This function creates a base map of Yucatán, based on the GADM data (https://gadm.org) ```{r, out.width="80%", warning=FALSE, results= 'hide', fig.cap="Figure: Base map"} yuc.map.bare <- ggplot(data = gadm) + geom_sf(fill = "#faf9f8",color = "grey30",size=.1) + #FloralWhite theme(text = element_text(family = "special"), axis.text = element_text(size = 18, color="grey40"), axis.title = element_text(size = 22))+ coord_sf(xlim = c(-91, -86.7), ylim = c(18.1, 21.8), expand = T)+ scale_x_continuous(labels = abs,breaks = seq(-180,180,by=1), minor_breaks = seq(-180,0,by=.1))+#,breaks = seq(-91,-87,1), scale_y_continuous(labels = abs,breaks = seq(0, 100, by=1), minor_breaks = seq(0,100,by=0.2))+ annotation_north_arrow(location = "br", which_north = "true", height = unit(0.7, "cm"), width = unit(0.7, "cm"), pad_x = unit(0.02, "in"), pad_y = unit(0.1, "in"), style = north_arrow_fancy_orienteering(line_width = 0.4, line_col = "black", fill = c("white", "grey30"), text_col = "black", text_size = 12))+ annotation_scale(location = "br", width_hint = 0.12, line_width = 0.2, bar_cols = c("grey30", "white"), pad_y = unit(0.1, "cm"), pad_x = unit(0.1, "cm"), text_cex = 1, height = unit(0.1, "cm"))+ labs(x="Longitude (°W)",y="Latitude (°N)") yuc.map <- yuc.map.bare+ annotate("text",x=-90.4,y=20.05, label = "Camino\nReal", size = annosize, lineheight = 0.3,family="special")+ annotate("text",x=-88,y=21.1, label = "Northeast", size = annosize, family="special")+ annotate("text",x=-89.9,y=20.8, label = "Nortwest", size = annosize, family="special")+ annotate("text",x=-89.9,y=19.2, label = "Los Ch'e'enes", size = annosize, family="special")+ annotate("text",x=-89.1,y=20, label = "Southern\nYucatán", lineheight = 0.3,size = annosize)+ annotate("text",x=-88.4,y=19.6, label = "Central\nQR", lineheight = 0.3,size = annosize)+ annotate("text",x=-88.3,y=18.2, label = "BELIZE",size = annosize)+ annotate("text",x=-88.6,y=21.3, label = "YUCATAN",size = annosize)+ annotate("text",x=-88.4,y=19, label = "QUINTANA ROO",size = annosize)+ annotate("text",x=-90,y=18.8, label = "CAMPECHE",size = annosize) yuc.map ``` # Functions ## Map graphs The following function creates plots of gradient features on the base map. The maps contain 5 colors on a scale. The use of colors depends on the distribution of the data points in the dataset (see median-based distribution in p1...p5) ```{r} plot.map.gradient <- function(data) { caption <- substitute(data) data$Value.p <- rescale(data$Value.p) p5 <- 1 p4 <- 1-(1-median(data$Value.p))/2 p3 <- median(data$Value.p) p2 <- median(data$Value.p)/2 p1 <- 0 sum.plot <- yuc.map + theme(text = element_text(family = "special",size=14))+ geom_point(data = data, aes(x = Long, y = Lat,fill=Value.p), size=3, alpha=.9, shape=21)+ theme(legend.key.size = unit(0.1, 'lines'), legend.text = element_text(size=18,vjust= 1,lineheight = 0), legend.title = element_text(lineheight = 0.4, size=19,angle=0,hjust= 1), legend.position="top", legend.justification="right", legend.box.background = element_rect(color="grey30", size=.1) )+ scale_fill_gradientn(values=c(p5, p4, p3, p2, p1), colours=c("#663300", "#996633", "#CCFFCC", "#99FF33", "#66CC00"), guide = guide_colourbar(direction = "horizontal", frame.colour="grey", barheight = .7, barwidth = 4, ticks = T, title.position = "left"), breaks=c(0,1), name="% general classifier,\n rescaled to [0,1]" ) print(sum.plot) } ``` ## Statistics ### Plotting GAM coefficients ```{r} my.cex = 2 my.cex.lab = 3 plot.gam <- function(model) { caption = substitute(model) vis.plot <- vis.gam(model, plot.type="contour", color="terrain", too.far=0.15, view=c("Long","Lat"), main="", cex.axis=2, cex.lab=my.cex.lab ) text(-89.8,21, label = "Nortwest",cex=my.cex) text(-90.3,y=20.05, labels = "Camino Real", xpd=TRUE,cex=my.cex) text(-88,21.2, labels = "Northeast", xpd=TRUE,cex=my.cex) text(-89.8,19.2, labels = "Los Ch'e'enes", xpd=TRUE,cex=my.cex) text(x=-89.1,y=20, labels = "Southern Yucatán", xpd=TRUE,cex=my.cex) text(x=-88.4,y=19.6, labels = "Central QR", xpd=TRUE,cex=my.cex) text(-88.3,18.5, labels = "Belize", xpd=TRUE,cex=my.cex) print(vis.plot) } ``` Plotting GAM co-efficients with interactions of the smooth term ```{r} plot.gam.2 <- function(model,condition="") { caption = substitute(model) caption.condition = substitute(condition) vis.plot <- vis.gam(model, plot.type="contour", color="terrain", cond=list(Subtype=condition), too.far=0.15, view=c("Long","Lat"), main="", cex.axis=3, cex.lab=4 ) text(-89.8,21, label = "Nortwest",cex=my.cex) text(-90.3,y=20.05, labels = "Camino Real", xpd=TRUE,cex=my.cex) text(-88,21.2, labels = "Northeast", xpd=TRUE,cex=my.cex) text(-89.8,19.2, labels = "Los Ch'e'enes", xpd=TRUE,cex=my.cex) text(x=-89.1,y=20, labels = "Southern Yucatán", xpd=TRUE,cex=my.cex) text(x=-88.4,y=19.6, labels = "Central QR", xpd=TRUE,cex=my.cex) text(-88.3,18.5, labels = "Belize", xpd=TRUE,cex=my.cex) print(vis.plot) } ``` ### Density graph ```{r} plot.density <- function(data,xlab = "n responses by speaker",bin = 2,xlim1=0,xlim2) { caption <- substitute(data) density.plot <- ggplot(data,aes(Freq)) + geom_histogram(aes(y=..density..),binwidth=bin, colour="black", fill="grey")+ xlim(xlim1,xlim2)+ geom_density(alpha=.1, fill="blue")+ geom_vline(aes(xintercept=mean(Freq)), color="darkred", linetype="dashed", size=1)+ labs(x=paste0(xlab, " (mean = ", round(mean(data$Freq)),")"))+ theme(text = element_text(size = 30, family = "special")) print(density.plot) } ``` ### Scatter plots ```{r} plot.scatter <- function(data) { myplot <- ggplot(result.type.2, aes(x=sum, y=percent,fill=Subtype)) + geom_smooth(method=lm, aes(color=Subtype,fill=Subtype), level=.50,linetype="dotted",se=F)+ geom_jitter(aes(fill=Subtype),size=3, shape=21, width=0.5,height=0.5,alpha=0.7)+ scale_x_continuous(trans = 'log2')+ scale_colour_manual(values=c("grey","black","lightblue"),name = "Measure Type")+ scale_fill_manual(values=c("grey","black","lightblue"),name = "Measure Type")+ ylab("% multiple classifiers")+ xlab("Frequency per classifier (log-scale)")+ theme(axis.text = element_text(lineheight = 0.5, size=20), axis.title = element_text(lineheight = 0.5, size=24))+ theme(legend.text = element_text(lineheight = 0,size=20), legend.title = element_text(size=24))+ theme(legend.position="right", legend.box.background = element_rect(colour = "black")) print(myplot) } ``` # Data processing For the following analysis, all relevant data was merged in the dataframe "dta": ## Labels ### Prompts + **Question_ID**: question identifier + **Question**: Form of the question + **Q_Label**: label of the question (including Question_ID) + **Translation**: translation of the question ### Prompt Groups - **Type**: division of prompts in: sort | measure - **TypePS**: division of prompts in: sort | part | portion & sum - **TypePP**: division of prompts in: sorts | part & portion | portion - **Subtype**: division of prompts in: sorts | part | portion | sum ### Speakers + **Speaker_ID**: speaker identifier + **Birthyear**: year of speaker's birth + **Gender**: M=male, F=female + **Spanish_odds**: Spanish:Maya ratio in speaker's data (in AYM) + **Long.jitter**: latitude of Location with jitter for each speaker (to avoid overplotting speakers of the same location) + **Lat.jitter**: latitude of Location with jitter for each speaker (to avoid overplotting speakers of the same location) ### Locations + **Location**: speaker's place of living + **Lat**: latitude of Location + **Long**: longitude of Location + **Pop.size**: population size of Location + **P.yuc**: p of indigenous speakers in Location ### Responses + **Form**: transcribed response + **Value**: classifier + **Variant**: variant. Annotations of the numeric expression: - **CL.Gen**: general classifier p'éel; - **CL.Spec**: specific classifier, e.g., _túul_ 'CL.ANIM', _kúul_ 'CL.PLANT', _ts'íit_ 'CL.LONG', _maataj_ 'stem' (measure noun from Spanish), _che'_ 'tree', _xéek_ 'CL.FOOT_OF_TREE'; - **CL.Gen CL.Spec**: general classifier followed by specific classifiers; - **NV**: non-valid; - **NV_noData**: no data available; - **NV_noNoun**: no noun available; - **NV_noNum**: no numeric expression, e.g., un hombre rendered as _e máako'_ ; intended numeric expressions translated with the vague quantifier _ya'ab_ 'much/many'; - **NV_noClass**: no classifier available; - **NV_Spanish**: Spanish syntax; - The same lexical item may occur as CL.Spec and as N. It is coded as CL.Spec if it appears in a classifier construction, i.e., it is accompanied by an N without bearing possessive morphology, e.g., _jum p'éel maata wayúum_ (one CL.UNIT stem huaya) 'one stem huaya'; the same lexical item is the head N of an NP if it bears possessive morphology, e.g., _jum p'éel u maata-il wayúum_ (one CL.UNIT A.3 stem-REL huaya) 'one stem of huaya'. - In case of multiple responses: the annotation corresponds to the last valid answer. + **Comment**: explanations for Non-Valid data + **Structure**: structural representation [Annotation of the structure of the numeral expression at issue: - **A**: set A person marker; - **Adj**: Adjective (annotated in cases that it needs to be distinguished from classifiers), e.g., Q167 _um p'ée wolis sakan_ (one CL.UNIT round dough); also deverbal adjectives in _-bil_ and _-Vkbal_, which are not classifiers - **Adj.Size**: size modifier (e.g., chan): only these modifiers are annotated, to test how often they intervene between numerals and classifiers; - **CL.Gen**: general classifier _p'éel_ (CL.UNIT); - **CL.Spec**: specific classifier (sortal/mensural) or measure noun; - **D**: Determiner; - **N**: noun; - **Num**: numeral; - **(Num)**: phonologically zero numeral (only applies to 'one'). - **Rel**: relationalizer _-il/-ij/-i_ (evidence for a possessive construction); - In case of multiple responses: the annotation corresponds to the last valid answer; : preposition; + **Origin**: origin of the classifier (Maya, Spanish) [Origin of classifier: this annotation refers to the leftmost specific classifier: - **Spanish**: Spanish origin, e.g., _maataj_ 'stem', _fiilaj_ 'row'; - **Maya**: Maya origin, e.g., _p'éel_ 'CL.Unit', _túul_ 'CL.Anim', - **NV**: not applicable if no specific classifier appears. ## Data ```{r} kable(head(dta)) %>% kable_styling("striped", full_width = F) %>% scroll_box(width = "100%", height = "400px") ``` ## Sample ```{r} speakers.df <- unique(select(dta,Speaker_ID,Birthyear,Gender)) speakers.n <- nrow(speakers.df) #locations locations.n <- length(unique(dta$Location)) #age speakers.sum <- summary(speakers.df$Birthyear) speakers.gender.sum <- summary(as.factor(speakers.df$Gender)) #summary speakers.summary <- data.frame(info=c("total speakers", "female speakers", "locations", "birthyear: min", "birthyear: max", "birthyear: mean", "birthyear: median"), n=round(c(speakers.n,speakers.gender.sum[2],locations.n, speakers.sum[1],speakers.sum[6],speakers.sum[4],speakers.sum[3]),0)) kable(speakers.summary) %>% kable_styling("striped", full_width = F) ``` ## Responses ```{r} questions.n <- length(unique(dta$Question_ID)) speakers.n <- length(unique(dta$Speaker_ID)) permutations.n <- speakers.n * questions.n responses.n <- nrow(dta) responses.p <- responses.n*100/permutations.n #table responses.summary <- data.frame(info=c("n questions","n speakers","n questions*speakers", "n responses","% responses (out of n permutations)"), n=round(c(questions.n,speakers.n,permutations.n,responses.n,responses.p),0)) kable(responses.summary) %>% kable_styling("striped", full_width = F) ``` ### Non-valid data ```{r} nv <- data.frame(xtabs(~Variant+Comment,dta)) kable(nv) %>% kable_styling("striped", full_width = F) ``` ```{r} nv.n <- sum(nv$Freq) nv.content.n <- sum(nv[grepl("Content at issue", nv$Comment),]$Freq) nv.construction.n <- sum(nv[grepl("Construction at issue", nv$Comment),]$Freq) nv.summary <- data.frame(info=c("n non-valid", "n non-valid: content", "n non-valid: construction"), n=round(c(nv.n,nv.content.n,nv.construction.n),0)) kable(nv.summary) %>% kable_styling("striped", full_width = F) ``` excluding non valid data; dataset contains: ```{r} dta.v1 <- droplevels(subset(dta,Variant != "NV")) nrow(dta.v1) ``` responses with Spanish classifiers ```{r} #spanish dta.spa <- droplevels(subset(dta.v1,Origin == "Spanish")) nrow(dta.spa) ``` excluding responses with Spanish classifiers ```{r} dta.v2 <- droplevels(subset(dta.v1,Origin != "Spanish")) ``` excluding speakers who do not have at least one valid token for sortal and one valid token for mensural classifiers ```{r} speakers.n <- length(unique(dta.v2$Speaker_ID)) v.speaker <- data.frame(xtabs(~Type+Speaker_ID,dta.v2)) v.speaker.cast <- dcast(v.speaker, ... ~ Type,value.var = "Freq") nv.speakers <- v.speaker.cast[v.speaker.cast$sort == 0 | v.speaker.cast$measure == 0, ]$Speaker_ID dta.v <- droplevels(subset(dta.v2,!(Speaker_ID %in% nv.speakers))) ``` ### Valid data n and % valid ```{r} valid.n <- nrow(dta.v) valid.prop <- 100*valid.n/responses.n valid.n ``` n valid responses per question ```{r,fig.fullwidth=T,warning=FALSE, fig.cap="Figure: n valid per question"} questions.valid <- data.frame(xtabs(~Question_ID,dta.v)) summary(questions.valid$Freq) ``` ```{r, out.width="70%", warning=FALSE, results= 'hide', fig.cap="Figure: Histogramm per questions"} plot.density(questions.valid, xlab = "n responses by question",bin = 10,xlim2=180) ``` n valid responses per speaker ```{r} speakers.valid <- data.frame(xtabs(~Speaker_ID,dta.v)) summary(speakers.valid$Freq) ``` ```{r, out.width="70%", warning=FALSE, results= 'hide', fig.cap="Figure: Histogramm per speakers"} plot.density(speakers.valid,xlim2=25) ``` ### classes of prompts Prompt labels are listed for Appendix I ```{r} kable(unique(dta.v[dta.v$Subtype =="sort",]$Question_Label)) %>% kable_styling("striped", full_width = F) kable(unique(dta.v[dta.v$Subtype =="portion",]$Question_Label)) %>% kable_styling("striped", full_width = F) kable(unique(dta.v[dta.v$Subtype =="part",]$Question_Label)) %>% kable_styling("striped", full_width = F) kable(unique(dta.v[dta.v$Subtype =="sum",]$Question_Label)) %>% kable_styling("striped", full_width = F) ``` # Descriptive stats Creating two subsets of the data -- for sortal and mensural classifiers: ```{r} dta.v.sort <- dta.v[dta.v$Type =="sort",] dta.v.meas <- dta.v[dta.v$Type =="measure",] ``` ## Sortal classifiers ### Frequencies reporting frequencies of sortal classifiers ```{r} sortal_table <- dta.v.sort %>% group_by(Variant) %>% summarise(frequency = n()) %>% mutate(percentage = round(frequency*100 / sum(frequency),1)) kable(sortal_table) %>% kable_styling("striped", full_width = F) ``` excluding multiple classifiers from this analysis ```{r} dta.v.sort <- droplevels(subset(dta.v.sort, Variant != "CL.Gen CL.Spec")) dta.v.sort$Variant <- factor(dta.v.sort$Variant, levels = c("CL.Spec","CL.Gen")) ``` ### Use of the general classifier Aggregating by speaker and by location and ploting ```{r} # per speaker dta.v.sort$Substitute.p <- as.numeric(as.factor(dta.v.sort$Variant))-1 dta.v.sort.aggr.speaker <-aggregate(Substitute.p ~ Speaker_ID+Long+Lat, data= dta.v.sort[,c("Speaker_ID","Lat","Long", "Substitute.p")],FUN=mean) # per location dta.v.sort.aggr <-aggregate(Substitute.p ~ Location+Long+Lat, data= dta.v.sort[,c("Location","Lat","Long", "Substitute.p")],FUN=mean) summary(dta.v.sort$Substitute.p) nrow(dta.v.sort.aggr) dta.v.sort.aggr$Value.p <- dta.v.sort.aggr$Substitute.p ``` ```{r, out.width="80%", warning=FALSE, results= 'hide', fig.cap="Figure: Sortal classifiers on base map"} plot.map.gradient(dta.v.sort.aggr) ``` ### List of sortal classifiers ```{r} dta.v.sort$sortalCL <- dta.v.sort$Value dta.v.sort$sortalCL <- gsub(" .*","",dta.v.sort$sortalCL) sortal.classifiers <- as.data.frame(xtabs(~sortalCL, dta.v.sort)) kable(sortal.classifiers) %>% kable_styling("striped", full_width = F) sum(sortal.classifiers$Freq) ``` ## Mensural classifiers ### Data Creating a frequency table for mensural classifiers ```{r} nrow(dta.v.meas) ## prompts mensural_table <- dta.v.meas %>% group_by(Variant) %>% summarise(frequency = n()) %>% mutate(percentage = round(frequency*100 / sum(frequency),1)) kable(mensural_table) %>% kable_styling("striped", full_width = F) ``` Creating a frequency table for mensural classifiers by measure type ```{r} subtype_table <- dta.v.meas %>% group_by(Subtype,Variant) %>% summarise(frequency = n()) subtype.df <- dcast(data.frame(subtype_table), ... ~Variant) subtype.df$sum <- subtype.df$'CL.Gen CL.Spec'+ subtype.df$CL.Spec subtype.df$pCL.Gen <- round(100*subtype.df$'CL.Gen CL.Spec'/subtype.df$sum,1) subtype.df$pCL.Spec <- round(100*subtype.df$CL.Spec/subtype.df$sum,1) kable(subtype.df) %>% kable_styling("striped", full_width = F) ``` ### Use of multiple classifiers Aggregating by speaker and by location and ploting ```{r} dta.v.meas$Variant <- factor(dta.v.meas$Variant, levels = c("CL.Spec","CL.Gen CL.Spec")) dta.v.meas$Insert.p <- as.numeric(dta.v.meas$Variant)-1 dta.v.meas.aggr.speaker <-aggregate(Insert.p ~ Speaker_ID+Long.jitter+Lat.jitter, data= dta.v.meas[,c("Speaker_ID", "Lat.jitter", "Long.jitter", "Insert.p")], FUN=mean) dta.v.meas.aggr <-aggregate(Insert.p ~ Long+Lat, data= dta.v.meas[,c("Lat","Long","Insert.p")], FUN=mean) dta.v.meas.aggr$Value.p <- dta.v.meas.aggr$Insert.p dta.v.meas.aggr.speaker$Value.p <- dta.v.meas.aggr.speaker$Insert.p summary(dta.v.meas.aggr$Value.p) summary(dta.v.meas.aggr.speaker$Value.p) ``` ```{r, out.width="80%", warning=FALSE, results= 'hide', fig.cap="Figure: Mensural classifiers on base map"} plot.map.gradient(dta.v.meas.aggr) ``` Entering the p of substituting sortal by general to the mensural classifier table (for statistic evaluations) ```{r} dta.v.meas <- merge(dta.v.meas, dta.v.sort.aggr.speaker[,c("Speaker_ID","Substitute.p")], by="Speaker_ID", all.x = T) ``` ### List of mensural classifiers Creating lists of classifiers for the Appendix ```{r} dta.v.meas$mensuralCL <- dta.v.meas$Value dta.v.meas$mensuralCL <- str_remove(dta.v.meas$mensuralCL, "p'éel ") dta.v.meas$mensuralCL <- gsub(" .*","",dta.v.meas$mensuralCL) result.type <- as.data.frame(xtabs(~mensuralCL+Variant+Subtype, dta.v.meas)) result.type.2 <- dcast(result.type, mensuralCL + Subtype ~ Variant) result.type.2$sum <- result.type.2$'CL.Gen CL.Spec'+result.type.2$CL.Spec result.type.2$percent <- 100*result.type.2$'CL.Gen CL.Spec'/result.type.2$sum result.type.2 <- droplevels(subset(result.type.2,percent != "NaN")) kable(result.type.2) %>% kable_styling("striped", full_width = F) %>% scroll_box(width = "100%", height = "200px") ``` ### Frequency of mensural in multiple classifiers create scatter plot ```{r, out.width="80%", warning=FALSE, results= 'hide', fig.cap="Figure: Scatter plot for Measure types"} plot.scatter(result.type.2) ``` ## Inferential stats: Generalized additive models (GAM) ### Sortal Classifiers Maximal model: ```{r} gam.sortal.0 = mgcv::gam((Variant=="CL.Gen") ~ Spanish_odds + Birthyear + Pop.size + s(Long, Lat, bs="tp",k=15), data=dta.v.sort,method="ML") print(summary(gam.sortal.0)) ``` Model comparisons: ```{r,fig.width=8, fig.height=5,fig.fullwidth=T,warning=FALSE, fig.cap="Figure: Space coefficients for sortal classifiers"} gam.sortal.1 <- update(gam.sortal.0, . ~ . -Pop.size) compareML(gam.sortal.0,gam.sortal.1,suggest.report = T) gam.sortal.2 <- update(gam.sortal.1, . ~ . -Birthyear) compareML(gam.sortal.1,gam.sortal.2,suggest.report = T) gam.sortal.3 <- update(gam.sortal.2, . ~ . -Spanish_odds) compareML(gam.sortal.2,gam.sortal.3,suggest.report = T) ``` Coefficients of winner model ```{r} print(summary(gam.sortal.2)) ``` Coefficients of smooth term of Space ```{r, out.width="80%", warning=FALSE, results= 'hide', fig.cap="Figure: Space coefficients for Sorts"} plot.gam(gam.sortal.2) ``` ### Mensural classifiers Contrast coding: successive differences of subtypes ```{r} dta.v.meas$Subtype <- factor(dta.v.meas$Subtype) contrasts(dta.v.meas$Subtype) <- MASS::contr.sdif(3) ``` model.0: Maximal model ```{r} gam.mensural.0 = mgcv::gam((Variant=="CL.Gen CL.Spec") ~ Substitute.p + Subtype + Spanish_odds + Birthyear + Pop.size + s(Long, Lat, bs="tp",k=15,by=Subtype), data=dta.v.meas,method="ML") print(summary(gam.mensural.0)) ``` Removing SPACE : MEASURE TYPE ```{r} gam.mensural.0.interaction = mgcv::gam((Variant=="CL.Gen CL.Spec") ~ Substitute.p + Subtype + Spanish_odds + Birthyear + Pop.size + s(Long, Lat, bs="tp",k=15), data=dta.v.meas,method="ML") print(summary(gam.mensural.0.interaction)) ``` Model comparison, winner: model.0 ```{r} compareML(gam.mensural.0,gam.mensural.0.interaction,suggest.report = T) ``` Removing POPULATION SIZE Model comparison: non-significant ```{r} gam.mensural.1 <- update(gam.mensural.0, . ~ . -Pop.size) compareML(gam.mensural.0,gam.mensural.1,suggest.report = T) ``` Removing TIME Model comparison: non-significant ```{r} gam.mensural.2 <- update(gam.mensural.1, . ~ . -Birthyear) compareML(gam.mensural.1,gam.mensural.2,suggest.report = T) ``` Removing SPANISH ODDS Model comparison: significant ```{r} gam.mensural.3 <- update(gam.mensural.2, . ~ . -Spanish_odds) compareML(gam.mensural.2,gam.mensural.3,suggest.report = T)#win 2 ``` Removing MEASURE TYPE Model comparison: significant ```{r} gam.mensural.4 <- update(gam.mensural.2, . ~ . -Subtype) compareML(gam.mensural.2,gam.mensural.4,suggest.report = T)#win 2 ``` Collapsing Parts and Sums Model comparison: significant ```{r} gam.mensural.41 <- update(gam.mensural.4, . ~ . +TypePS) compareML(gam.mensural.2,gam.mensural.41,suggest.report = T)#win 2 ``` Collapsing Parts and Portions Model comparison: significant ```{r} gam.mensural.42 <- update(gam.mensural.4, . ~ . +TypePP) compareML(gam.mensural.2,gam.mensural.42,suggest.report = T)#win 2 ``` Removing Likelihood of Substitution Model comparison: significant ```{r} gam.mensural.5 <- update(gam.mensural.2, . ~ . -Substitute.p) compareML(gam.mensural.2,gam.mensural.5,suggest.report = T) ``` Coefficients of winner model ```{r} summary(gam.mensural.2) ``` Coefficients of the smooth term of space ```{r, out.width="80%", warning=FALSE, results= 'hide', fig.cap="Figure: Space coefficients for Parts"} plot.gam.2(gam.mensural.2,"part") ``` ```{r, out.width="80%", warning=FALSE, results= 'hide',fig.cap="Figure: Space coefficients for Sums"} plot.gam.2(gam.mensural.2,"sum") ``` ```{r, out.width="80%", results= 'hide', warning=FALSE, fig.cap="Figure: Space coefficients for Portions"} plot.gam.2(gam.mensural.2,"portion") ``` # References ## R and R Version ```{r} version[['version.string']] ``` ```{r} print(citation()) ``` ## Packages ```{r} packages <- c("readxl","writexl","reshape2","scales","dplyr", "stringr","showtext","ggplot2","ggmap","ggspatial", "mgcv","mgcViz","itsadug") knitr::write_bib(c(packages), width = 60) ``` --- end of documentation ---