OpenMS  2.5.0
OpenSwathBase.h
Go to the documentation of this file.
1 // --------------------------------------------------------------------------
2 // OpenMS -- Open-Source Mass Spectrometry
3 // --------------------------------------------------------------------------
4 // Copyright The OpenMS Team -- Eberhard Karls University Tuebingen,
5 // ETH Zurich, and Freie Universitaet Berlin 2002-2020.
6 //
7 // This software is released under a three-clause BSD license:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in the
12 // documentation and/or other materials provided with the distribution.
13 // * Neither the name of any author or any participating institution
14 // may be used to endorse or promote products derived from this software
15 // without specific prior written permission.
16 // For a full list of authors, refer to the file AUTHORS.
17 // --------------------------------------------------------------------------
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 // ARE DISCLAIMED. IN NO EVENT SHALL ANY OF THE AUTHORS OR THE CONTRIBUTING
22 // INSTITUTIONS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
23 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
24 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
25 // OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
26 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
27 // OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
28 // ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 //
30 // --------------------------------------------------------------------------
31 // $Maintainer: Hannes Roest$
32 // $Authors: Hannes Roest$
33 // --------------------------------------------------------------------------
34 
35 #pragma once
36 
37 // Consumers
40 
41 // Files
45 #include <OpenMS/FORMAT/MzMLFile.h>
54 
55 // Kernel and implementations
61 
62 // Helpers
66 
67 // Algorithms
73 
75 
76 #include <cassert>
77 #include <limits>
78 
79 
81 
82 
83 
84 namespace OpenMS
85 {
86 
88  public TOPPBase
89 {
90 
91 public:
92 
93  TOPPOpenSwathBase(String name, String description, bool official = true) :
94  TOPPBase(name, description, official)
95  {
96  }
97 
98 private:
99 
100  void loadSwathFiles_(const StringList& file_list,
101  const bool split_file,
102  const String& tmp,
103  const String& readoptions,
104  boost::shared_ptr<ExperimentalSettings > & exp_meta,
105  std::vector< OpenSwath::SwathMap > & swath_maps,
106  Interfaces::IMSDataConsumer* plugin_consumer)
107  {
108  SwathFile swath_file;
109  swath_file.setLogType(log_type_);
110 
111  if (split_file || file_list.size() > 1)
112  {
113  // TODO cannot use data reduction here any more ...
114  swath_maps = swath_file.loadSplit(file_list, tmp, exp_meta, readoptions);
115  }
116  else
117  {
118  FileTypes::Type in_file_type = FileHandler::getTypeByFileName(file_list[0]);
119  if (in_file_type == FileTypes::MZML)
120  {
121  swath_maps = swath_file.loadMzML(file_list[0], tmp, exp_meta, readoptions, plugin_consumer);
122  }
123  else if (in_file_type == FileTypes::MZXML)
124  {
125  swath_maps = swath_file.loadMzXML(file_list[0], tmp, exp_meta, readoptions);
126  }
127  else if (in_file_type == FileTypes::SQMASS)
128  {
129  swath_maps = swath_file.loadSqMass(file_list[0], exp_meta);
130  }
131  else
132  {
133  throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION,
134  "Input file needs to have ending mzML or mzXML");
135  }
136  }
137  }
138 
139 protected:
140 
165  bool loadSwathFiles(const StringList& file_list,
166  boost::shared_ptr<ExperimentalSettings >& exp_meta,
167  std::vector< OpenSwath::SwathMap >& swath_maps,
168  const bool split_file,
169  const String& tmp,
170  const String& readoptions,
171  const String& swath_windows_file,
172  const double min_upper_edge_dist,
173  const bool force,
174  const bool sort_swath_maps,
175  const bool sonar,
176  const bool prm,
177  Interfaces::IMSDataConsumer* plugin_consumer = nullptr)
178  {
179  // (i) Load files
180  loadSwathFiles_(file_list, split_file, tmp, readoptions, exp_meta, swath_maps, plugin_consumer);
181 
182  // (ii) Allow the user to specify the SWATH windows
183  if (!swath_windows_file.empty())
184  {
185  SwathWindowLoader::annotateSwathMapsFromFile(swath_windows_file, swath_maps, sort_swath_maps, force);
186  }
187 
188  for (Size i = 0; i < swath_maps.size(); i++)
189  {
190  OPENMS_LOG_DEBUG << "Found swath map " << i
191  << " with lower " << swath_maps[i].lower
192  << " and upper " << swath_maps[i].upper
193  << " and " << swath_maps[i].sptr->getNrSpectra()
194  << " spectra." << std::endl;
195  }
196 
197  // (iii) Sanity check: there should be no overlap between the windows:
198  std::vector<std::pair<double, double>> sw_windows;
199  for (Size i = 0; i < swath_maps.size(); i++)
200  {
201  if (!swath_maps[i].ms1)
202  {
203  sw_windows.push_back(std::make_pair(swath_maps[i].lower, swath_maps[i].upper));
204  }
205  }
206  // sort by lower bound (first entry in pair)
207  std::sort(sw_windows.begin(), sw_windows.end());
208 
209  for (Size i = 1; i < sw_windows.size(); i++)
210  {
211  double lower_map_end = sw_windows[i-1].second - min_upper_edge_dist;
212  double upper_map_start = sw_windows[i].first;
213  OPENMS_LOG_DEBUG << "Extraction will go up to " << lower_map_end << " and continue at " << upper_map_start << std::endl;
214 
215  if (prm) {continue;} // skip next step as expect them to overlap and have gaps...
216 
217  if (upper_map_start - lower_map_end > 0.01)
218  {
219  OPENMS_LOG_WARN << "Extraction will have a gap between " << lower_map_end << " and " << upper_map_start << std::endl;
220  if (!force)
221  {
222  OPENMS_LOG_ERROR << "Extraction windows have a gap. Will abort (override with -force)" << std::endl;
223  return false;
224  }
225  }
226 
227  if (sonar) {continue;} // skip next step as expect them to overlap ...
228 
229  if (lower_map_end - upper_map_start > 0.01)
230  {
231  OPENMS_LOG_WARN << "Extraction will overlap between " << lower_map_end << " and " << upper_map_start << "!\n"
232  << "This will lead to multiple extraction of the transitions in the overlapping region "
233  << "which will lead to duplicated output. It is very unlikely that you want this." << "\n"
234  << "Please fix this by providing an appropriate extraction file with -swath_windows_file" << std::endl;
235  if (!force)
236  {
237  OPENMS_LOG_ERROR << "Extraction windows overlap. Will abort (override with -force)" << std::endl;
238  return false;
239  }
240  }
241  }
242  return true;
243  }
244 
258  void prepareChromOutput(Interfaces::IMSDataConsumer ** chromatogramConsumer,
259  const boost::shared_ptr<ExperimentalSettings>& exp_meta,
260  const OpenSwath::LightTargetedExperiment& transition_exp,
261  const String& out_chrom)
262  {
263  if (!out_chrom.empty())
264  {
265  String tmp = out_chrom;
266  if (tmp.toLower().hasSuffix(".sqmass"))
267  {
268  bool full_meta = false; // can lead to very large files in memory
269  bool lossy_compression = true;
270  *chromatogramConsumer = new MSDataSqlConsumer(out_chrom, 500, full_meta, lossy_compression);
271  }
272  else
273  {
274  PlainMSDataWritingConsumer * chromConsumer = new PlainMSDataWritingConsumer(out_chrom);
275  int expected_chromatograms = transition_exp.transitions.size();
276  chromConsumer->setExpectedSize(0, expected_chromatograms);
277  chromConsumer->setExperimentalSettings(*exp_meta);
278  chromConsumer->getOptions().setWriteIndex(true); // ensure that we write the index
280 
281  // prepare data structures for lossy compression
283  MSNumpressCoder::NumpressConfig npconfig_int;
284  npconfig_mz.estimate_fixed_point = true; // critical
285  npconfig_int.estimate_fixed_point = true; // critical
286  npconfig_mz.numpressErrorTolerance = -1.0; // skip check, faster
287  npconfig_int.numpressErrorTolerance = -1.0; // skip check, faster
288  npconfig_mz.setCompression("linear");
289  npconfig_int.setCompression("slof");
290  npconfig_mz.linear_fp_mass_acc = 0.05; // set the desired RT accuracy in seconds
291 
292  chromConsumer->getOptions().setNumpressConfigurationMassTime(npconfig_mz);
293  chromConsumer->getOptions().setNumpressConfigurationIntensity(npconfig_int);
294  chromConsumer->getOptions().setCompression(true);
295 
296  *chromatogramConsumer = chromConsumer;
297  }
298  }
299  else
300  {
301  *chromatogramConsumer = new NoopMSDataWritingConsumer("");
302  }
303  }
304 
314  const String& tr_file,
315  const Param& tsv_reader_param)
316  {
317  OpenSwath::LightTargetedExperiment transition_exp;
318  ProgressLogger progresslogger;
319  progresslogger.setLogType(log_type_);
320  if (tr_type == FileTypes::TRAML)
321  {
322  progresslogger.startProgress(0, 1, "Load TraML file");
323  TargetedExperiment targeted_exp;
324  TraMLFile().load(tr_file, targeted_exp);
325  OpenSwathDataAccessHelper::convertTargetedExp(targeted_exp, transition_exp);
326  progresslogger.endProgress();
327  }
328  else if (tr_type == FileTypes::PQP)
329  {
330  progresslogger.startProgress(0, 1, "Load PQP file");
331  TransitionPQPFile().convertPQPToTargetedExperiment(tr_file.c_str(), transition_exp);
332  progresslogger.endProgress();
333  }
334  else if (tr_type == FileTypes::TSV)
335  {
336  progresslogger.startProgress(0, 1, "Load TSV file");
337  TransitionTSVFile tsv_reader;
338  tsv_reader.setParameters(tsv_reader_param);
339  tsv_reader.convertTSVToTargetedExperiment(tr_file.c_str(), tr_type, transition_exp);
340  progresslogger.endProgress();
341  }
342  else
343  {
344  OPENMS_LOG_ERROR << "Provide valid TraML, TSV or PQP transition file." << std::endl;
345  throw Exception::IllegalArgument(__FILE__, __LINE__, OPENMS_PRETTY_FUNCTION, "Need to provide valid input file.");
346  }
347  return transition_exp;
348  }
349 
383  String irt_tr_file,
384  std::vector< OpenSwath::SwathMap > & swath_maps,
385  double min_rsq,
386  double min_coverage,
387  const Param& feature_finder_param,
388  const ChromExtractParams& cp_irt,
389  const Param& irt_detection_param,
390  const Param& calibration_param,
391  Size debug_level,
392  bool sonar,
393  bool load_into_memory,
394  const String& irt_trafo_out,
395  const String& irt_mzml_out)
396  {
397  TransformationDescription trafo_rtnorm;
398 
399  if (!trafo_in.empty())
400  {
401  // get read RT normalization file
402  TransformationXMLFile trafoxml;
403  trafoxml.load(trafo_in, trafo_rtnorm, false);
404  Param model_params = getParam_().copy("model:", true);
405  model_params.setValue("symmetric_regression", "false");
406  model_params.setValue("span", irt_detection_param.getValue("lowess:span"));
407  model_params.setValue("num_nodes", irt_detection_param.getValue("b_spline:num_nodes"));
408  String model_type = irt_detection_param.getValue("alignmentMethod");
409  trafo_rtnorm.fitModel(model_type, model_params);
410  }
411  else if (!irt_tr_file.empty())
412  {
413  // Loading iRT file
414  std::cout << "Will load iRT transitions and try to find iRT peptides" << std::endl;
415  TraMLFile traml;
416  FileTypes::Type tr_type = FileHandler::getType(irt_tr_file);
417  Param tsv_reader_param = TransitionTSVFile().getDefaults();
418  OpenSwath::LightTargetedExperiment irt_transitions = loadTransitionList(tr_type, irt_tr_file, tsv_reader_param);
419 
420  // perform extraction
422  wf.setLogType(log_type_);
423  TransformationDescription im_trafo;
424  trafo_rtnorm = wf.performRTNormalization(irt_transitions, swath_maps, im_trafo,
425  min_rsq, min_coverage,
426  feature_finder_param,
427  cp_irt, irt_detection_param,
428  calibration_param, irt_mzml_out, debug_level, sonar,
429  load_into_memory);
430 
431  if (!irt_trafo_out.empty())
432  {
433  TransformationXMLFile().store(irt_trafo_out, trafo_rtnorm);
434  }
435  }
436  return trafo_rtnorm;
437  }
438 
439 
440 };
441 
442 }
443 
445 
446 
A more convenient string class.
Definition: String.h:58
static void annotateSwathMapsFromFile(const std::string &filename, std::vector< OpenSwath::SwathMap > &swath_maps, bool do_sort, bool force)
Annotate a Swath map using a Swath window file specifying the individual windows. ...
void startProgress(SignedSize begin, SignedSize end, const String &label) const
Initializes the progress display.
void setWriteIndex(bool write_index)
Whether to write an index at the end of the file (e.g. indexedmzML file format)
any TSV file, for example msInspect file or OpenSWATH transition file (see TransitionTSVFile) ...
Definition: FileTypes.h:87
double linear_fp_mass_acc
Desired mass accuracy for *linear* encoding.
Definition: MSNumpressCoder.h:130
bool hasSuffix(const String &string) const
true if String ends with string, false otherwise
String & toLower()
Converts the string to lowercase.
bool estimate_fixed_point
Whether to estimate the fixed point used for encoding (highly recommended)
Definition: MSNumpressCoder.h:122
PeakFileOptions & getOptions()
Get the peak file options.
ChromatogramExtractor parameters.
Definition: OpenSwathWorkflow.h:82
std::vector< OpenSwath::SwathMap > loadMzML(const String &file, const String &tmp, boost::shared_ptr< ExperimentalSettings > &exp_meta, const String &readoptions="normal", Interfaces::IMSDataConsumer *plugin_consumer=nullptr)
Loads a Swath run from a single mzML file.
const DataValue & getValue(const String &key) const
Returns a value of a parameter.
std::vector< OpenSwath::SwathMap > loadMzXML(String file, String tmp, boost::shared_ptr< ExperimentalSettings > &exp_meta, String readoptions="normal")
Loads a Swath run from a single mzXML file.
void convertPQPToTargetedExperiment(const char *filename, OpenMS::TargetedExperiment &targeted_exp, bool legacy_traml_id=false)
Read in a PQP file and construct a targeted experiment (TraML structure)
TOPPOpenSwathBase(String name, String description, bool official=true)
Definition: OpenSwathBase.h:93
double numpressErrorTolerance
Check error tolerance after encoding.
Definition: MSNumpressCoder.h:107
DataProcessing getProcessingInfo_(DataProcessing::ProcessingAction action) const
Returns the data processing information.
OpenSwath::LightTargetedExperiment loadTransitionList(const FileTypes::Type &tr_type, const String &tr_file, const Param &tsv_reader_param)
Loads transition list from TraML / TSV or PQP.
Definition: OpenSwathBase.h:313
void setNumpressConfigurationIntensity(MSNumpressCoder::NumpressConfig config)
Get numpress configuration options for intensity dimension.
void store(String filename, const TransformationDescription &transformation)
Stores the data in an TransformationXML file.
Smoothing of the signal to reduce noise.
Definition: DataProcessing.h:63
Base class for TOPP applications.
Definition: TOPPBase.h:144
static void convertTargetedExp(const OpenMS::TargetedExperiment &transition_exp_, OpenSwath::LightTargetedExperiment &transition_exp)
convert from the OpenMS TargetedExperiment to the LightTargetedExperiment
This class supports reading and writing of OpenSWATH transition lists.
Definition: TransitionTSVFile.h:144
void setExpectedSize(Size expectedSpectra, Size expectedChromatograms) override
Set expected size of spectra and chromatograms to be written.
void setExperimentalSettings(const ExperimentalSettings &exp) override
Set experimental settings for the whole file.
Generic description of a coordinate transformation.
Definition: TransformationDescription.h:61
TransformationDescription performRTNormalization(const OpenSwath::LightTargetedExperiment &irt_transitions, std::vector< OpenSwath::SwathMap > &swath_maps, TransformationDescription &im_trafo, double min_rsq, double min_coverage, const Param &feature_finder_param, const ChromExtractParams &cp_irt, const Param &irt_detection_param, const Param &calibration_param, const String &irt_mzml_out, Size debug_level, bool sonar=false, bool load_into_memory=false)
Perform RT and m/z correction of the input data using RT-normalization peptides.
void setCompression(const std::string &compression)
Set compression using a string mapping to enum NumpressCompression.
Definition: MSNumpressCoder.h:148
#define OPENMS_LOG_WARN
Macro if a warning, a piece of information which should be read by the user, should be logged...
Definition: LogStream.h:460
A data consumer that inserts MS data into a SQLite database.
Definition: MSDataSqlConsumer.h:60
File adapter for HUPO PSI TraML files.
Definition: TraMLFile.h:63
OpenSWATH Peptide Query Parameter (PQP) SQLite DB, see TransitionPQPFile.
Definition: FileTypes.h:104
ProgressLogger::LogType log_type_
Type of progress logging.
Definition: TOPPBase.h:898
virtual void addDataProcessing(DataProcessing d)
Optionally add a data processing method to each chromatogram and spectrum.
void setNumpressConfigurationMassTime(MSNumpressCoder::NumpressConfig config)
Get numpress configuration options for m/z or rt dimension.
This class supports reading and writing of PQP files.
Definition: TransitionPQPFile.h:219
A description of a targeted experiment containing precursor and production ions.
Definition: TargetedExperiment.h:64
void endProgress() const
Ends the progress display.
Main OpenMS namespace.
Definition: FeatureDeconvolution.h:46
Consumer class that perform no operation.
Definition: MSDataWritingConsumer.h:258
SqLite format for mass and chromatograms, see SqMassFile.
Definition: FileTypes.h:103
void setLogType(LogType type) const
Sets the progress log that should be used. The default type is NONE!
Base class for all classes that want to report their progress.
Definition: ProgressLogger.h:54
MzXML file (.mzXML)
Definition: FileTypes.h:64
TraML (HUPO PSI format) for transitions (.traML)
Definition: FileTypes.h:81
void setValue(const String &key, const DataValue &value, const String &description="", const StringList &tags=StringList())
Sets a value.
Param const & getParam_() const
Return all parameters relevant to this TOPP tool.
Execute all steps for retention time and m/z calibration of SWATH-MS data.
Definition: OpenSwathWorkflow.h:235
Definition: OpenSwathBase.h:87
std::vector< OpenSwath::SwathMap > loadSplit(StringList file_list, String tmp, boost::shared_ptr< ExperimentalSettings > &exp_meta, String readoptions="normal")
Loads a Swath run from a list of split mzML files.
bool loadSwathFiles(const StringList &file_list, boost::shared_ptr< ExperimentalSettings > &exp_meta, std::vector< OpenSwath::SwathMap > &swath_maps, const bool split_file, const String &tmp, const String &readoptions, const String &swath_windows_file, const double min_upper_edge_dist, const bool force, const bool sort_swath_maps, const bool sonar, const bool prm, Interfaces::IMSDataConsumer *plugin_consumer=nullptr)
Load the DIA files into internal data structures.
Definition: OpenSwathBase.h:165
The interface of a consumer of spectra and chromatograms.
Definition: IMSDataConsumer.h:69
void convertTSVToTargetedExperiment(const char *filename, FileTypes::Type filetype, OpenMS::TargetedExperiment &targeted_exp)
Read in a tsv/mrm file and construct a targeted experiment (TraML structure)
File adapter for Swath files.
Definition: SwathFile.h:67
Definition: TransitionExperiment.h:207
TransformationDescription performCalibration(String trafo_in, String irt_tr_file, std::vector< OpenSwath::SwathMap > &swath_maps, double min_rsq, double min_coverage, const Param &feature_finder_param, const ChromExtractParams &cp_irt, const Param &irt_detection_param, const Param &calibration_param, Size debug_level, bool sonar, bool load_into_memory, const String &irt_trafo_out, const String &irt_mzml_out)
Perform retention time and m/z calibration.
Definition: OpenSwathBase.h:382
A method or algorithm argument contains illegal values.
Definition: Exception.h:648
const Param & getDefaults() const
Non-mutable access to the default parameters.
Consumer class that writes MS data to disk using the mzML format.
Definition: MSDataWritingConsumer.h:240
static FileTypes::Type getTypeByFileName(const String &filename)
Determines the file type from a file name.
#define OPENMS_LOG_DEBUG
Macro for general debugging information.
Definition: LogStream.h:470
Type
Actual file types enum.
Definition: FileTypes.h:58
Management and storage of parameters / INI files.
Definition: Param.h:73
std::vector< String > StringList
Vector of String.
Definition: ListUtils.h:70
std::vector< OpenSwath::SwathMap > loadSqMass(String file, boost::shared_ptr< ExperimentalSettings > &)
Loads a Swath run from a single sqMass file.
Param copy(const String &prefix, bool remove_prefix=false) const
Returns a new Param object containing all entries that start with prefix.
static FileTypes::Type getType(const String &filename)
Tries to determine the file type (by name or content)
void setParameters(const Param &param)
Sets the parameters.
Configuration class for MSNumpress.
Definition: MSNumpressCoder.h:87
void load(const String &filename, TargetedExperiment &id)
Loads a map from a TraML file.
void prepareChromOutput(Interfaces::IMSDataConsumer **chromatogramConsumer, const boost::shared_ptr< ExperimentalSettings > &exp_meta, const OpenSwath::LightTargetedExperiment &transition_exp, const String &out_chrom)
Prepare chromatogram output.
Definition: OpenSwathBase.h:258
#define OPENMS_LOG_ERROR
Macro to be used if non-fatal error are reported (processing continues)
Definition: LogStream.h:455
std::vector< LightTransition > transitions
Definition: TransitionExperiment.h:216
Used to load and store TransformationXML files.
Definition: TransformationXMLFile.h:56
void fitModel(const String &model_type, const Param &params=Param())
Fits a model to the data.
MzML file (.mzML)
Definition: FileTypes.h:72
size_t Size
Size type e.g. used as variable which can hold result of size()
Definition: Types.h:127
void loadSwathFiles_(const StringList &file_list, const bool split_file, const String &tmp, const String &readoptions, boost::shared_ptr< ExperimentalSettings > &exp_meta, std::vector< OpenSwath::SwathMap > &swath_maps, Interfaces::IMSDataConsumer *plugin_consumer)
Definition: OpenSwathBase.h:100
void load(const String &filename, TransformationDescription &transformation, bool fit_model=true)
Loads the transformation from an TransformationXML file.
void setCompression(bool compress)