LFQData R6 class
LFQData R6 class
Missing Data Assumptions
The filtering and imputation methods in this package assume that missing values are Missing Completely At Random (MCAR) or Missing At Random (MAR). Abundance-dependent missingness (MNAR), which is common in DDA proteomics, is not modelled. Users should be aware that MNAR can bias fold-change estimates and inflate false discovery rates.
Public fields
configAnalysisConfiguration
datadata.frame or tibble matching AnalysisConfiguration.
prefixe.g. "peptide_", "protein_", "compound_"
Methods
Method new()
initialize
Usage
LFQData$new(data, config, prefix = "ms_", setup = FALSE)Arguments
datadata.frame
configconfiguration
prefixwill be use as output prefix
setupis data setup needed, default = FALSE, if TRUE, calls
setup_analysison data first.is_peptodo
Method remove_small_intensities()
some software is reporting NA's as 0, you must remove it from your data
Method filter_proteins_by_peptide_count()
remove proteins with less than X peptides
Method omit_NA()
Omit NA from intensities per hierarchy (e.g. protein or peptide), idea is to use it for normalization For instance if a peptide has a missing value in more then nrNA of the samples within a condition it will be removed
Method complete_cases()
some software is reporting NA's as 0, you must remove it from your data
Method summarize_hierarchy()
e.g. number of peptides per protein etc
Method get_Stats()
Get LFQDataStats. For more details see LFQDataStats.
Usage
LFQData$get_Stats(stats = c("everything", "interaction", "all"))Method filter_difference()
get difference of self with other if other is subset of self
Examples
istar <- sim_lfq_data_peptide_config()
#> creating sampleName from fileName column
#> completing cases
#> completing cases done
#> setup done
lfqdata <- LFQData$new(istar$data, istar$config)
lfqdata$filter_proteins_by_peptide_count()
#> removing proteins with less than: 2 peptpides
#> Column added : nr_peptide_Id_IN_protein_Id
tmp <- lfqdata$to_wide()
testthat::expect_equal(nrow(tmp$data) , nrow(tmp$rowdata))
testthat::expect_equal(ncol(tmp$data) , nrow(tmp$annotation) + ncol(tmp$rowdata))
stopifnot("data.frame" %in% class(tmp$data))
tmp <- lfqdata$to_wide(as.matrix = TRUE)
stopifnot("matrix" %in% class(tmp$data))
stopifnot(lfqdata$is_transformed()==FALSE)
lfqdata$summarize_hierarchy()
#> # A tibble: 6 × 3
#> protein_Id isotopeLabel_n peptide_Id_n
#> <chr> <int> <int>
#> 1 0EfVhX~0087 1 3
#> 2 BEJI92~5282 1 2
#> 3 Fl4JiV~8625 1 4
#> 4 HvIpHG~9079 1 2
#> 5 JcKVfU~9653 1 7
#> 6 SGIVBl~5782 1 6
# filter for missing values
f1 <- lfqdata$omit_NA(nrNA = 0)
#> completing cases
#> Joining with `by = join_by(protein_Id, peptide_Id)`
stopifnot(f1$hierarchy_counts() <= lfqdata$hierarchy_counts())
f2 <- lfqdata$omit_NA(factorDepth = 0)
#> completing cases
#> Joining with `by = join_by(protein_Id, peptide_Id)`
stopifnot(f2$hierarchy_counts() <= lfqdata$hierarchy_counts())
lfqdata$response()
#> [1] "abundance"
lfqdata$rename_response("peptide.intensity")
lfqdata$response()
#> [1] "peptide.intensity"
stopifnot("LFQData" %in% class(lfqdata$get_copy()))
stopifnot("LFQDataTransformer" %in% class(lfqdata$get_Transformer()))
stopifnot("LFQDataStats" %in% class(lfqdata$get_Stats()))
#> completing cases
#> completing cases
stopifnot("LFQDataSummariser" %in% class(lfqdata$get_Summariser()))
stopifnot("LFQDataPlotter" %in% class(lfqdata$get_Plotter()))
stopifnot("LFQDataImp" %in% class(lfqdata$get_Imputer()))
stopifnot("LFQDataAggregator" %in% class(lfqdata$get_Aggregator()))
lfqdata2 <- lfqdata$get_copy()
lfqdata2$data <- lfqdata2$data[1:100,]
res <- lfqdata$filter_difference(lfqdata2)
stopifnot(nrow(res$data) == nrow(lfqdata$data) - 100)
tmp <- lfqdata$get_sample(5, seed = 4)
#> Sampling 5protein_Id
#> Joining with `by = join_by(protein_Id)`
stopifnot(nrow(tmp$hierarchy()) == 5)