diff --git a/tools/mfassignr/help.xml b/tools/mfassignr/help.xml index cb80ad2ff..51209ebd4 100644 --- a/tools/mfassignr/help.xml +++ b/tools/mfassignr/help.xml @@ -32,9 +32,10 @@ The recommended workflow how to run the MFAssignR package is as follows: (3) Use IsoFiltR() to identify potential 13C and 34S isotope masses. (4) Using the S/N threshold, and the two data frames output from IsoFiltR(), run MFAssignCHO() to assign MF with C, H, and O to assess the mass accuracy. (5) Use RecalList() to generate a list of the potential recalibrant series. -(6) After choosing recalibrant series, use Recal() to recalibrate the mass lists. -(7) Assign MF to the recalibrated mass list using MFAssign(). -(8) Check the output plots from MFAssign() to evaluate the quality of the assignments. +(6) Choose the most suitable recalibrant series using FindRecalSeries(). +(7) After choosing recalibrant series, use Recal() to recalibrate the mass lists. +(8) Assign MF to the recalibrated mass list using MFAssign(). +(9) Check the output plots from MFAssign() to evaluate the quality of the assignments. For detailed documentation on the individual steps please see the individual tool wrappers. @@ -49,8 +50,8 @@ KMDnoise is a Kendrick Mass Defect (KMD) approach for the noise estimation. It s Output: -- noise estimate - (this noise level can then be multiplied by the user chosen value (3, 6, 10) in order to set the signal to noise cut for formula assignment.) -- KMD plot - bounds of the noise estimation area are highlighted in red +- noise estimate - this noise level can then be multiplied by the user chosen value (3, 6, 10) in order to set the signal to noise cut for formula assignment. +- KMD plot - bounds of the noise estimation area are highlighted in red. @@ -64,7 +65,7 @@ HistNoise function creates a histogram using natural log of the intensity, which Output: - noise estimate - this noise level can then be multiplied by the user chosen value in order to set the signal to noise cut for formula assignment -- Histogram - shows where the cut is being applied123 +- Histogram - shows where the cut is being applied @@ -118,7 +119,7 @@ Output: MFAssignR - RecalList ============================= -This tool is the fifth step of the MFAssignR workflow (MFAssignCHO -> RecalList -> Recal) +This tool is the fifth step of the MFAssignR workflow (MFAssignCHO -> RecalList -> FindRecalSeries) RecalList() function identifies the homologous series that could be used for recalibration. On the input, there is the output from MFAssign() or MFAssignCHO() functions. It returns a dataframe that contains the CH2 homologous series that contain more than 3 members. @@ -127,11 +128,34 @@ Output: - Dataframe that contains the CH2 homologous series that contain more than 3 members. + +MFAssignR - FindRecalSeries +============================= + +This tool is the sixth step of the MFAssignR workflow (RecalList -> FindRecalSeries -> Recal) + +This function takes on input the CH2 homologous recalibration series, which are provided by the RecalList function and tries to find the most suitable series combination for recalibration based on the following criteria: + +(1) Series should cover the full mass spectral range, +(2) Series should be optimally long and combined have a “Tall Peak” at least every 100 m/z, +(3) Abundance score: the higher, the better, +(4) Peak score: the closer to 0, the better, +(5) Peak Distance: the closer to 1, the better, +(6) Series Score: the closer to this value, the better. + +Combinations of 5 series are assembled, scores are computed for other metrics (in case of Peak proximity and Peak +distance, an inverted score is computed) and these are summed. Finally, either a series of the size of combination or top 10 unique series having the highest score are outputted. + +Output: + +- Dataframe of n or 10 most suitable recalibrant series. + + MFAssignR - Recal ============================= -This tool is the sixth step of the MFAssignR workflow (RecalList -> Recal -> MFAssign) +This tool is the seventh step of the MFAssignR workflow (FindRecalSeries -> Recal -> MFAssign) Recal() function recalibrates the 'Mono' and 'Iso' outputs from the IsoFiltR() function and prepares a dataframe containing chose recalibrants. Also it outputs a plot for the qualitative assessment of recalibrants. The input to the function is output from MFAssign() or MFAssignCHO(). diff --git a/tools/mfassignr/macros.xml b/tools/mfassignr/macros.xml index a8920e439..4a2c6724b 100644 --- a/tools/mfassignr/macros.xml +++ b/tools/mfassignr/macros.xml @@ -1,5 +1,5 @@ - 1.0.3 + 1.1.1 r-mfassignr @@ -96,6 +96,26 @@ help= "Upper limit of molecular mass to be assigned."/> + + + + + + + + + + + @@ -104,7 +124,7 @@ + help= "Isopeaks data frame, the Mono output from IsoFiltR"/> + Selects most suitable series for recalibration + + macros.xml + help.xml + + + topic_3172 + + + operation_3627 + + + + + + + + + dplyr::rename(Series = series) + + write.table(result, file="$final_series", sep="\t", row.names=FALSE) + ]]> + + + + + + + + + + + + + + + + + + + + + @FINDRECALSERIES_HELP@ + + @GENERAL_HELP@ + + + diff --git a/tools/mfassignr/mfassignr_isofiltr.xml b/tools/mfassignr/mfassignr_isofiltr.xml index b5e7a197c..cf0423d23 100644 --- a/tools/mfassignr/mfassignr_isofiltr.xml +++ b/tools/mfassignr/mfassignr_isofiltr.xml @@ -1,7 +1,8 @@ - + Separates likely isotopic masses from monoisotopic masses in a mass list macros.xml + help.xml topic_3172 diff --git a/tools/mfassignr/mfassignr_recal.xml b/tools/mfassignr/mfassignr_recal.xml index cdebd1b63..e5588a272 100644 --- a/tools/mfassignr/mfassignr_recal.xml +++ b/tools/mfassignr/mfassignr_recal.xml @@ -86,8 +86,8 @@ - - + + - + diff --git a/tools/mfassignr/mfassignr_snplot.xml b/tools/mfassignr/mfassignr_snplot.xml index 54c80e5bb..ebe0ce563 100644 --- a/tools/mfassignr/mfassignr_snplot.xml +++ b/tools/mfassignr/mfassignr_snplot.xml @@ -1,4 +1,4 @@ - + Noise level assessment using the SNplot function. macros.xml diff --git a/tools/mfassignr/test-data/findrecalseries/selected_series.tabular b/tools/mfassignr/test-data/findrecalseries/selected_series.tabular new file mode 100644 index 000000000..b4d0dbe91 --- /dev/null +++ b/tools/mfassignr/test-data/findrecalseries/selected_series.tabular @@ -0,0 +1,6 @@ +"Series" "total_abundance" "total_series_length" "peak_proximity" "peak_distance_proximity" "series_id" "sum_score" +"O_H_7" 437.136255030871 504.562 129.612788237483 2723.59808058946 "O_H_7 O2_H_6 O2_H_11" 3794.90912385781 +"O2_H_6" 437.136255030871 504.562 129.612788237483 2723.59808058946 "O_H_7 O2_H_6 O2_H_11" 3794.90912385781 +"O2_H_11" 437.136255030871 504.562 129.612788237483 2723.59808058946 "O_H_7 O2_H_6 O2_H_11" 3794.90912385781 +"O4_H_11" 943.304144088114 392.438 134.36084248065 1826.47532994759 "O2_H_6 O2_H_11 O4_H_11" 3296.57831651636 +"O3_H_12" 330.037060987448 364.407 135.12153276257 1826.47538570915 "O2_H_6 O3_H_12 O2_H_11" 2656.04097945917 diff --git a/tools/mfassignr/test-data/recallist/recal_series.tabular b/tools/mfassignr/test-data/recallist/recal_series.tabular index bcb182947..2c82bd06f 100644 --- a/tools/mfassignr/test-data/recallist/recal_series.tabular +++ b/tools/mfassignr/test-data/recallist/recal_series.tabular @@ -1,4 +1,4 @@ -"Series" "Number Observed" "Series Index" "Mass Range" "Tall Peak" "Abundance Score" "Peak Score" "Peak Distance" "Series Score" +"Series" "Number.Observed" "Series.Index" "Mass.Range" "Tall.Peak" "Abundance.Score" "Peak.Score" "Peak.Distance" "Series.Score" "_H_4" 8 54 "121.101-177.164" 121.101133005817 -40.744548219864 0.513008460106113 2.0022239614575 0.6255625 "_H_5" 7 60 "119.085-203.179" 119.085478343665 0 0.00189700255432302 2.002230609633 1.00095918367347 "_H_6" 9 49 "117.07-173.133" 145.101380561463 -56.0021549559649 0.13569292865165 2.00220483719043 0.556055555555556