libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
pwizmsrunreader.cpp
Go to the documentation of this file.
1/**
2 * \file pappsomspp/msrun/private/pwizmsrunreader.cpp
3 * \date 29/05/2018
4 * \author Olivier Langella
5 * \brief MSrun file reader base on proteowizard library
6 */
7
8/*******************************************************************************
9 * Copyright (c) 2018 Olivier Langella <Olivier.Langella@u-psud.fr>.
10 *
11 * This file is part of the PAPPSOms++ library.
12 *
13 * PAPPSOms++ is free software: you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation, either version 3 of the License, or
16 * (at your option) any later version.
17 *
18 * PAPPSOms++ is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
22 *
23 * You should have received a copy of the GNU General Public License
24 * along with PAPPSOms++. If not, see <http://www.gnu.org/licenses/>.
25 *
26 * Contributors:
27 * Olivier Langella <Olivier.Langella@u-psud.fr> - initial API and
28 *implementation
29 ******************************************************************************/
30
31
32#include <QDebug>
33
34#include "pwizmsrunreader.h"
35
36#include <pwiz/data/msdata/DefaultReaderList.hpp>
37
38
39#include "../../utils.h"
40#include "../../pappsoexception.h"
41#include "../../exception/exceptionnotfound.h"
42#include "../../exception/exceptionnotpossible.h"
43
44
45// int pwizMsRunReaderMetaTypeId =
46// qRegisterMetaType<pappso::PwizMsRunReader>("pappso::PwizMsRunReader");
47
48
49namespace pappso
50{
51
53
55 : MsRunReader(msrun_id_csp)
56{
57 // The initialization needs to be done immediately so that we get the pwiz
58 // MsDataPtr corresponding to the right ms_run_id in the parameter. That
59 // pointer will be set to msp_msData.
60
61 initialize();
62}
63
64
65void
67{
68 std::string file_name_std =
70
71 // Make a backup of the current locale
72 std::string env_backup = setlocale(LC_ALL, "");
73 // struct lconv *lc = localeconv();
74
75 // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
76 //<< "env_backup=" << env_backup.c_str() << "lc->decimal_point"
77 //<< lc->decimal_point;
78
79 // Now actually search the useful MSDataPtr to the member variable.
80
81 pwiz::msdata::DefaultReaderList defaultReaderList;
82
83 std::vector<pwiz::msdata::MSDataPtr> msDataPtrVector;
84
85 try
86 {
87 defaultReaderList.read(file_name_std, msDataPtrVector);
88 }
89 catch(std::exception &error)
90 {
91 qDebug() << QString("Failed to read the data from file %1")
92 .arg(QString::fromStdString(file_name_std));
93
94 throw(PappsoException(
95 QString("Error reading file %1 in PwizMsRunReader, for msrun %2:\n%3")
96 .arg(mcsp_msRunId->getFileName())
97 .arg(mcsp_msRunId.get()->toString())
98 .arg(error.what())));
99 }
100
101 // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
102 //<< "The number of runs is:" << msDataPtrVector.size()
103 //<< "The number of spectra in first run is:"
104 //<< msDataPtrVector.at(0)->run.spectrumListPtr->size();
105
106 // Single-run file handling here.
107
108 // Specific case of the MGF data format: we do not have a run id for that kind
109 // of data. In this case there must be a single run!
110
111 if(mcsp_msRunId->getRunId().isEmpty())
112 {
113 if(msDataPtrVector.size() != 1)
114 throw(
115 ExceptionNotPossible("For the kind of file at hand there can only be "
116 "one run in the file."));
117
118 // At this point we know the single msDataPtr is the one we are looking
119 // for.
120
121 msp_msData = msDataPtrVector.front();
122 }
123 else
124 {
125 // Multi-run file handling here.
126 for(const pwiz::msdata::MSDataPtr &msDataPtr : msDataPtrVector)
127 {
128 qDebug() << "msDataPtr->run.id=" << msDataPtr->run.id.c_str();
129 qDebug() << "mcsp_msRunId->getRunId()=" << mcsp_msRunId->getRunId();
130 qDebug() << "mcsp_msRunId->getXmlId()=" << mcsp_msRunId->getXmlId();
131 qDebug() << "mcsp_msRunId->getSampleName()="
132 << mcsp_msRunId->getSampleName();
133 if(msDataPtr->run.id == mcsp_msRunId->getRunId().toStdString())
134 {
135 msp_msData = msDataPtr;
136
137 // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
138 //<< "Found the right MSDataPtr for run id.";
139
140 break;
141 }
142 }
143 }
144
145 if(msp_msData == nullptr)
146 {
148 QString(
149 "Could not find a pwiz MSDataPtr matching the requested run id : %1")
150 .arg(mcsp_msRunId.get()->toString())));
151 }
152
153
154 // check if this MS run can be used with scan numbers
155 // MS:1000490 Agilent instrument model
156 pwiz::cv::CVID native_id_format =
157 pwiz::msdata::id::getDefaultNativeIDFormat(*msp_msData.get());
158
159 // msp_msData.get()->getDefaultNativeIDFormat();
160
161 if(native_id_format == pwiz::cv::CVID::MS_Thermo_nativeID_format)
162 {
163 m_hasScanNumbers = true;
164 }
165 else
166 {
167 m_hasScanNumbers = false;
168 }
169
170 if(mcsp_msRunId.get()->getMsDataFormat() == MsDataFormat::mzXML)
171 {
172 m_hasScanNumbers = true;
173 }
174}
175
179
180
181const OboPsiModTerm
183{
184
185 OboPsiModTerm term;
186
187 term.m_accession = "MS:1000824";
188 term.m_name = "no nativeID format";
189 term.m_definition =
190 "No nativeID format indicates that the file tagged with this term does not "
191 "contain spectra that can have a nativeID format.";
192
193
194 pwiz::cv::CVID cvid =
195 pwiz::msdata::id::getDefaultNativeIDFormat(*(msp_msData.get()));
196
197 switch(cvid)
198 {
199 case pwiz::cv::MS_Thermo_nativeID_format:
200 term.m_accession = "MS:1000768";
201 term.m_name = "Thermo nativeID format";
202 term.m_definition =
203 "Native format defined by controllerType=xsd:nonNegativeInteger "
204 "controllerNumber=xsd:positiveInteger scan=xsd:positiveInteger.";
205 break;
206 default:
207 break;
208 }
209 return term;
210}
211
212pwiz::msdata::SpectrumPtr
213PwizMsRunReader::getPwizSpectrumPtr(pwiz::msdata::SpectrumList *p_spectrum_list,
214 std::size_t spectrum_index,
215 bool want_binary_data) const
216{
217 pwiz::msdata::SpectrumPtr native_pwiz_spectrum_sp;
218
219 try
220 {
221 native_pwiz_spectrum_sp =
222 p_spectrum_list->spectrum(spectrum_index, want_binary_data);
223 }
224 catch(std::runtime_error &error)
225 {
226 qDebug() << "getPwizSpectrumPtr error " << error.what() << " "
227 << typeid(error).name();
228
229 throw ExceptionNotFound(QObject::tr("Pwiz spectrum index %1 not found in "
230 "MS file std::runtime_error :\n%2")
231 .arg(spectrum_index)
232 .arg(error.what()));
233 }
234 catch(std::exception &error)
235 {
236 qDebug() << "getPwizSpectrumPtr error " << error.what()
237 << typeid(error).name();
238
239 throw ExceptionNotFound(
240 QObject::tr("Pwiz spectrum index %1 not found in MS file :\n%2")
241 .arg(spectrum_index)
242 .arg(error.what()));
243 }
244
245 if(native_pwiz_spectrum_sp.get() == nullptr)
246 {
247 throw ExceptionNotFound(
248 QObject::tr(
249 "Pwiz spectrum index %1 not found in MS file : null pointer")
250 .arg(spectrum_index));
251 }
252
253 return native_pwiz_spectrum_sp;
254}
255
256
257bool
259 pwiz::msdata::Spectrum *spectrum_p,
260 QualifiedMassSpectrum &qualified_mass_spectrum) const
261{
262
263 // We now have to set the retention time at which this mass spectrum
264 // was acquired. This is the scan start time.
265
266 if(!spectrum_p->scanList.scans[0].hasCVParam(
267 pwiz::msdata::MS_scan_start_time))
268 {
269 if(mcsp_msRunId.get()->getMsDataFormat() == MsDataFormat::MGF)
270 { // MGF could not have scan start time
271 qualified_mass_spectrum.setRtInSeconds(-1);
272 }
273 else
274 {
276 "The spectrum has no scan start time value set."));
277 }
278 }
279 else
280 {
281 pwiz::data::CVParam retention_time_cv_param =
282 spectrum_p->scanList.scans[0].cvParam(pwiz::msdata::MS_scan_start_time);
283
284 // Try to get the units of the retention time value.
285
286 std::string unit_name = retention_time_cv_param.unitsName();
287 // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
288 //<< "Unit name for the retention time:"
289 //<< QString::fromStdString(unit_name);
290
291 if(unit_name == "second")
292 {
293 qualified_mass_spectrum.setRtInSeconds(
294 retention_time_cv_param.valueAs<double>());
295 }
296 else if(unit_name == "minute")
297 {
298 qualified_mass_spectrum.setRtInSeconds(
299 retention_time_cv_param.valueAs<double>() * 60);
300 }
301 else
302 throw(
303 ExceptionNotPossible("Could not determine the unit for the "
304 "scan start time value."));
305 }
306
307 // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
308 //<< "Retention time for spectrum is:"
309 //<< qualified_mass_spectrum.getRtInSeconds();
310
311 // Old version not checking unit (by default unit is minutes for RT,
312 // not seconds)
313 //
314 // pappso_double retentionTime =
315 // QString(spectrum_p->scanList.scans[0]
316 //.cvParam(pwiz::msdata::MS_scan_start_time)
317 //.value.c_str())
318 //.toDouble();
319 // qualified_mass_spectrum.setRtInSeconds(retentionTime);
320
321 return true;
322}
323
324
325bool
327 pwiz::msdata::Spectrum *spectrum_p,
328 QualifiedMassSpectrum &qualified_mass_spectrum) const
329{
330 // Not all the acquisitions have ion mobility data. We need to test
331 // that:
332
333 if(spectrum_p->scanList.scans[0].hasCVParam(
334 pwiz::msdata::MS_ion_mobility_drift_time))
335 {
336
337 // qDebug() << "as strings:"
338 //<< QString::fromStdString(
339 // spectrum_p->scanList.scans[0]
340 //.cvParam(pwiz::msdata::MS_ion_mobility_drift_time)
341 //.valueAs<std::string>());
342
343 pappso_double driftTime =
344 spectrum_p->scanList.scans[0]
345 .cvParam(pwiz::msdata::MS_ion_mobility_drift_time)
346 .valueAs<double>();
347
348 // qDebug() << "driftTime:" << driftTime;
349
350 // Old version requiring use of QString.
351 // pappso_double driftTime =
352 // QString(spectrum_p->scanList.scans[0]
353 //.cvParam(pwiz::msdata::MS_ion_mobility_drift_time)
354 //.value.c_str())
355 //.toDouble();
356
357 // Now make positively sure that the obtained value is correct.
358 // Note that I suffered a lot with Waters Synapt data that
359 // contained apparently correct drift time XML element that in
360 // fact contained either NaN or inf. When such mass spectra were
361 // encountered, the mz,i data were bogus and crashed the data
362 // loading functions. We just want to skip this kind of bogus mass
363 // spectrum by letting the caller know that the drift time was
364 // bogus ("I" is Filippo Rusconi).
365
366 if(std::isnan(driftTime) || std::isinf(driftTime))
367 {
368 // qDebug() << "detected as nan or inf.";
369
370 return false;
371 }
372 else
373 {
374 // The mzML standard stipulates that drift times are in
375 // milliseconds.
376 qualified_mass_spectrum.setDtInMilliSeconds(driftTime);
377 }
378 }
379 // End of
380 // if(spectrum_p->scanList.scans[0].hasCVParam(
381 // pwiz::msdata::MS_ion_mobility_drift_time))
382 else
383 {
384 // Not a bogus mass spectrum but also not a drift spectrum, set -1
385 // as the drift time value.
386 qualified_mass_spectrum.setDtInMilliSeconds(-1);
387 }
388
389 return true;
390}
391
392
395 const MassSpectrumId &massSpectrumId,
396 pwiz::msdata::Spectrum *spectrum_p,
397 bool want_binary_data,
398 bool &ok) const
399{
400 // qDebug();
401
402 std::string env;
403 env = setlocale(LC_ALL, "");
404 if(env != "C")
405 setlocale(LC_ALL, "C");
406
407 QualifiedMassSpectrum qualified_mass_spectrum(massSpectrumId);
408
409 try
410 {
411
412 // We want to store the ms level for this spectrum
413
414 int msLevel =
415 (spectrum_p->cvParam(pwiz::msdata::MS_ms_level).valueAs<int>());
416
417 qualified_mass_spectrum.setMsLevel(msLevel);
418
419 if(!spectrum_p->scanList.scans[0].hasCVParam(
420 pwiz::msdata::MS_peak_list_scans))
421 {
422
423 // qDebug() << spectrum_p->cvParam(pwiz::msdata::MS_peak_list_scans)
424 // .valueAs<double>();
425 qualified_mass_spectrum.setParameterValue(
427 spectrum_p->cvParam(pwiz::msdata::MS_peak_list_scans)
428 .valueAs<double>());
429 }
430 // We want to know if this spectrum is a fragmentation spectrum obtained
431 // from a selected precursor ion.
432
433 std::size_t precursor_list_size = spectrum_p->precursors.size();
434
435 // qDebug() << "For spectrum at index:" <<
436 // massSpectrumId.getSpectrumIndex()
437 //<< "msLevel:" << msLevel
438 //<< "with number of precursors:" << precursor_list_size;
439
440 if(precursor_list_size > 0)
441 {
442
443 // Sanity check
444 if(msLevel < 2)
445 {
446 qDebug() << "Going to throw: msLevel cannot be less than two for "
447 "a spectrum that has items in its Precursor list.";
448
450 "msLevel cannot be less than two for "
451 "a spectrum that has items in its Precursor list."));
452 }
453
454 // See what is the first precursor in the list.
455
456 for(auto &precursor : spectrum_p->precursors)
457 {
458
459 // Set this variable ready as we need that default value in
460 // certain circumstances.
461
462 std::size_t precursor_spectrum_index =
463 std::numeric_limits<std::size_t>::max();
464
465 // The spectrum ID of the precursor might be empty.
466
467 if(precursor.spectrumID.empty())
468 {
469 // qDebug() << "The precursor's spectrum ID is empty.";
470
471 if(mcsp_msRunId.get()->getMsDataFormat() == MsDataFormat::MGF)
472 {
473 // qDebug()
474 //<< "Format is MGF, precursor's spectrum ID can be
475 // empty.";
476 }
477 else
478 {
479 // When performing Lumos Fusion fragmentation experiments
480 // in Tune mode and with recording, the first spectrum of
481 // the list is a fragmentation spectrum (ms level 2) that
482 // has no identity for the precursor spectrum because
483 // there is no full scan accquisition.
484 }
485 }
486 // End of
487 // if(precursor.spectrumID.empty())
488 else
489 {
490 // We could get a native precursor spectrum id, so convert
491 // that native id to a spectrum index.
492
493 qualified_mass_spectrum.setPrecursorNativeId(
494 QString::fromStdString(precursor.spectrumID));
495
496 if(qualified_mass_spectrum.getPrecursorNativeId().isEmpty())
497 {
498 // qDebug() << "The native id of the precursor spectrum is
499 // empty.";
500 }
501
502 // Get the spectrum index of the spectrum that contained the
503 // precursor ion.
504
505 precursor_spectrum_index =
506 msp_msData->run.spectrumListPtr->find(precursor.spectrumID);
507
508 // Note that the Mascot MGF format has a peculiar handling of
509 // the precursor ion stuff so we cannot throw.
510 if(precursor_spectrum_index ==
511 msp_msData->run.spectrumListPtr->size())
512 {
513 if(mcsp_msRunId.get()->getMsDataFormat() !=
515 {
517 "Failed to find the index of the "
518 "precursor ion's spectrum."));
519 }
520 }
521
522 qualified_mass_spectrum.setPrecursorSpectrumIndex(
523 precursor_spectrum_index);
524
525 // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ <<
526 // "()"
527 //<< "Set the precursor spectrum index to:"
528 //<< qualified_mass_spectrum.getPrecursorSpectrumIndex()
529 //<< "for qualified mass spectrum:"
530 //<< &qualified_mass_spectrum;
531 }
532
533 if(!precursor.selectedIons.size())
534 {
535 qDebug()
536 << "Going to throw The spectrum has msLevel > 1 but the "
537 "precursor ions's selected ions list is empty..";
538
539 throw(
540 ExceptionNotPossible("The spectrum has msLevel > 1 but the "
541 "precursor ions's selected ions "
542 "list is empty."));
543 }
544
545 pwiz::msdata::SelectedIon &ion =
546 *(precursor.selectedIons.begin());
547
548 // selected ion m/z
549
550 pappso_double selected_ion_mz =
551 QString(
552 ion.cvParam(pwiz::cv::MS_selected_ion_m_z).value.c_str())
553 .toDouble();
554
555 // selected ion peak intensity
556 //<cvParam cvRef="MS" accession="MS:1000042"
557 // value="910663.949707031" name="peak intensity"
558 // unitAccession="MS:1000131" unitName="number of detector counts"
559 // unitCvRef="MS" />
560
561 pappso_double selected_ion_peak_intensity =
562 QString(ion.cvParam(pwiz::cv::MS_peak_intensity).value.c_str())
563 .toDouble();
564
565 // charge state
566
567 unsigned int selected_ion_charge_state =
568 QString(ion.cvParam(pwiz::cv::MS_charge_state).value.c_str())
569 .toUInt();
570
571 // At this point we can craft a new PrecursorIonData instance and
572 // push it back to the vector.
573
574 PrecursorIonData precursor_ion_data(selected_ion_mz,
575 selected_ion_charge_state,
576 selected_ion_peak_intensity);
577
578 qualified_mass_spectrum.appendPrecursorIonData(
579 precursor_ion_data);
580
581 // General sum-up
582
583 // qDebug()
584 //<< "Appended new PrecursorIonData:"
585 //<< "mz:"
586 //<< qualified_mass_spectrum.getPrecursorIonData().back().mz
587 //<< "charge:"
588 //<< qualified_mass_spectrum.getPrecursorIonData().back().charge
589 //<< "intensity:"
590 //<< qualified_mass_spectrum.getPrecursorIonData()
591 //.back()
592 //.intensity;
593 }
594 // End of
595 // for(auto &precursor : spectrum_p->precursors)
596 }
597 // End of
598 // if(precursor_list_size > 0)
599 else
600 {
601 // Sanity check
602
603 // Unfortunately, logic here is defeated by some vendors that have
604 // files with MS2 spectra without <precursorList>. Thus we have
605 // spectrum_p->precursors.size() == 0 and msLevel > 1.
606
607 // if(msLevel != 1)
608 //{
609 // throw(
610 // ExceptionNotPossible("msLevel cannot be different than 1 if "
611 //"there is not a single precursor ion."));
612 //}
613 }
614
615 // Sanity check.
616
617 if(precursor_list_size !=
618 qualified_mass_spectrum.getPrecursorIonData().size())
619 {
620 qDebug() << "Going to throw The number of precursors in the file is "
621 "different from the number of precursors in memory.";
622
624 QObject::tr("The number of precursors in the file is different "
625 "from the number of precursors in memory."));
626 }
627
628 // if(precursor_list_size == 1)
629 //{
630 // qDebug() << "Trying to get the mz value of the unique precursor ion:"
631 //<< qualified_mass_spectrum.getPrecursorMz();
632 //}
633
634 processRetentionTime(spectrum_p, qualified_mass_spectrum);
635
636 processDriftTime(spectrum_p, qualified_mass_spectrum);
637
638 // for(pwiz::data::CVParam cv_param : ion.cvParams)
639 //{
640 // pwiz::msdata::CVID param_id = cv_param.cvid;
641 // qDebug() << param_id;
642 // qDebug() << cv_param.cvid.c_str();
643 // qDebug() << cv_param.name().c_str();
644 // qDebug() << cv_param.value.c_str();
645 //}
646
647 if(want_binary_data)
648 {
649
650 // Fill-in MZIntensityPair vector for convenient access to binary
651 // data
652
653 std::vector<pwiz::msdata::MZIntensityPair> pairs;
654 spectrum_p->getMZIntensityPairs(pairs);
655
656 MassSpectrum spectrum;
657 double tic = 0;
658 // std::size_t iterCount = 0;
659
660 // Iterate through the m/z-intensity pairs
661 for(std::vector<pwiz::msdata::MZIntensityPair>::const_iterator
662 it = pairs.begin(),
663 end = pairs.end();
664 it != end;
665 ++it)
666 {
667 //++iterCount;
668
669 // qDebug() << "it->mz " << it->mz << " it->intensity" <<
670 // it->intensity;
671 if(it->intensity)
672 {
673 spectrum.push_back(DataPoint(it->mz, it->intensity));
674 tic += it->intensity;
675 }
676 }
677
678 if(mcsp_msRunId.get()->getMsDataFormat() == MsDataFormat::MGF)
679 {
680 // Sort peaks by mz
681 spectrum.sortMz();
682 }
683
684 // lc = localeconv ();
685 // qDebug() << " env=" << localeconv () << " lc->decimal_point "
686 // << lc->decimal_point;
687 // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()
688 // "<< spectrum.size();
689 MassSpectrumSPtr spectrum_sp = spectrum.makeMassSpectrumSPtr();
690 qualified_mass_spectrum.setMassSpectrumSPtr(spectrum_sp);
691
692 // double sumY =
693 // qualified_mass_spectrum.getMassSpectrumSPtr()->sumY(); qDebug()
694 // <<
695 // __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()"
696 //<< "iterCount:" << iterCount << "Spectrum size "
697 //<< spectrum.size() << "with tic:" << tic
698 //<< "and sumY:" << sumY;
699 }
700 else
701 qualified_mass_spectrum.setMassSpectrumSPtr(nullptr);
702 }
703 catch(PappsoException &errorp)
704 {
705 qDebug() << "Going to throw";
706
708 QObject::tr("Error reading data using the proteowizard library: %1")
709 .arg(errorp.qwhat()));
710 }
711 catch(std::exception &error)
712 {
713 qDebug() << "Going to throw";
714
716 QObject::tr("Error reading data using the proteowizard library: %1")
717 .arg(error.what()));
718 }
719
720 // setlocale(LC_ALL, env.c_str());
721
722 ok = true;
723
724 // qDebug() << "QualifiedMassSpectrum: " <<
725 // qualified_mass_spectrum.toString();
726 return qualified_mass_spectrum;
727}
728
729
732 bool want_binary_data,
733 bool &ok) const
734{
735
736 std::string env;
737 env = setlocale(LC_ALL, "");
738 // struct lconv *lc = localeconv();
739
740 // qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__
741 //<< "env=" << env.c_str()
742 //<< "lc->decimal_point:" << lc->decimal_point;
743
744 setlocale(LC_ALL, "C");
745
746 MassSpectrumId massSpectrumId(mcsp_msRunId);
747
748 if(msp_msData == nullptr)
749 {
750 setlocale(LC_ALL, env.c_str());
751 return (QualifiedMassSpectrum(massSpectrumId));
752 }
753
754 // const bool want_binary_data = true;
755
756 pwiz::msdata::SpectrumListPtr spectrum_list_p =
757 msp_msData->run.spectrumListPtr;
758
759 if(spectrum_index == spectrum_list_p.get()->size())
760 {
761 setlocale(LC_ALL, env.c_str());
762 throw ExceptionNotFound(
763 QObject::tr("The spectrum index cannot be equal to the size of the "
764 "spectrum list."));
765 }
766
767 // At this point we know the spectrum index might be sane, so store it in
768 // the mass spec id object.
769 massSpectrumId.setSpectrumIndex(spectrum_index);
770
771 pwiz::msdata::SpectrumPtr native_pwiz_spectrum_sp =
772 getPwizSpectrumPtr(spectrum_list_p.get(), spectrum_index, want_binary_data);
773
774 setlocale(LC_ALL, env.c_str());
775
776 massSpectrumId.setNativeId(
777 QString::fromStdString(native_pwiz_spectrum_sp->id));
778
780 massSpectrumId, native_pwiz_spectrum_sp.get(), want_binary_data, ok);
781}
782
783
784bool
785PwizMsRunReader::accept(const QString &file_name) const
786{
787 // We want to know if we can handle the file_name.
788 pwiz::msdata::ReaderList reader_list;
789
790 std::string reader_type = reader_list.identify(file_name.toStdString());
791
792 if(!reader_type.empty())
793 return true;
794
795 return false;
796}
797
798
800PwizMsRunReader::massSpectrumSPtr(std::size_t spectrum_index)
801{
802 // qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__;
803 return qualifiedMassSpectrum(spectrum_index, true).getMassSpectrumSPtr();
804}
805
807PwizMsRunReader::massSpectrumCstSPtr(std::size_t spectrum_index)
808{
809 // qDebug() << __FILE__ << " " << __FUNCTION__ << " " << __LINE__;
810 return qualifiedMassSpectrum(spectrum_index, true).getMassSpectrumCstSPtr();
811}
812
814PwizMsRunReader::qualifiedMassSpectrum(std::size_t spectrum_index,
815 bool want_binary_data) const
816{
817
818 QualifiedMassSpectrum spectrum;
819 bool ok = false;
820
821 spectrum =
822 qualifiedMassSpectrumFromPwizMSData(spectrum_index, want_binary_data, ok);
823
824 if(mcsp_msRunId->getMsDataFormat() == pappso::MsDataFormat::MGF)
825 {
826 if(spectrum.getRtInSeconds() == 0)
827 {
828 // spectrum = qualifiedMassSpectrumFromPwizMSData(scan_num - 1);
829 }
830 }
831
832 // if(!ok)
833 // qDebug() << "Encountered a mass spectrum for which the status is bad.";
834
835 return spectrum;
836}
837
838
839void
845
846void
848 [[maybe_unused]] const MsRunReadConfig &config,
850{
851 qDebug();
852
854}
855
856void
858 SpectrumCollectionHandlerInterface &handler, unsigned int ms_level)
859{
860
862 // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()";
863
864 // We want to iterate in the pwiz-spectrum-list and for each pwiz-spectrum
865 // create a pappso-spectrum (QualifiedMassSpectrum). Once the pappso mass
866 // spectrum has been fully qualified (that is, the member data have been
867 // set), it is transferred to the handler passed as parameter to this
868 // function for the consumer to do what it wants with it.
869
870 // Does the handler consuming the mass spectra read from file want these
871 // mass spectra to hold the binary data arrays (mz/i vectors)?
872
873 const bool want_binary_data = handler.needPeakList();
874
875
876 std::string env;
877 env = setlocale(LC_ALL, "");
878 setlocale(LC_ALL, "C");
879
880
881 // We access the pwiz-mass-spectra via the spectrumListPtr that sits in the
882 // run member of msp_msData.
883
884 pwiz::msdata::SpectrumListPtr spectrum_list_p =
885 msp_msData->run.spectrumListPtr;
886
887 // We'll need it to perform the looping in the spectrum list.
888 std::size_t spectrum_list_size = spectrum_list_p.get()->size();
889
890 // qDebug() << "The spectrum list has size:" << spectrum_list_size;
891
892 // Inform the handler of the spectrum list so that it can handle feedback to
893 // the user.
894 handler.spectrumListHasSize(spectrum_list_size);
895
896 // Iterate in the full list of spectra.
897
898 for(std::size_t iter = 0; iter < spectrum_list_size; iter++)
899 {
900
901 // If the user of this reader instance wants to stop reading the
902 // spectra, then break this loop.
903 if(handler.shouldStop())
904 {
905 qDebug() << "The operation was cancelled. Breaking the loop.";
906 break;
907 }
908
909 // Get the native pwiz-spectrum from the spectrum list.
910 // Note that this pointer is a shared pointer from pwiz.
911
912 pwiz::msdata::SpectrumPtr native_pwiz_spectrum_sp =
913 getPwizSpectrumPtr(spectrum_list_p.get(), iter, want_binary_data);
914 /*
915 * we want to load metadata of the spectrum even if it does not contain
916 peaks
917
918 * if(!native_pwiz_spectrum_sp->hasBinaryData())
919 {
920 // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ <<
921 "
922 ()"
923 //<< "native pwiz spectrum is empty, continuing.";
924 continue;
925 }
926 */
927
928 // Instantiate the mass spectrum id that will hold critical information
929 // like the the native id string and the spectrum index.
930
931 MassSpectrumId massSpectrumId(mcsp_msRunId, iter /* spectrum index*/);
932
933 // Get the spectrum native id as a QString to store it in the mass
934 // spectrum id class. This is will allow later to refer to the same
935 // spectrum starting back from the file.
936
937 QString native_id = QString::fromStdString(native_pwiz_spectrum_sp->id);
938 massSpectrumId.setNativeId(native_id);
939
940 // Finally, instantiate the qualified mass spectrum with its id. This
941 // function will continue performing pappso-spectrum detailed
942 // qualification.
943
944 bool ok = false;
945
946 QualifiedMassSpectrum qualified_mass_spectrum =
948 massSpectrumId, native_pwiz_spectrum_sp.get(), want_binary_data, ok);
949
950 if(!ok)
951 {
952 // qDebug() << "Encountered a mass spectrum for which the returned "
953 //"status is bad.";
954 continue;
955 }
956
957 // Before handing the mass spectrum out to the handler, see if the
958 // native mass spectrum was empty or not.
959
960 // if(!native_pwiz_spectrum_sp->defaultArrayLength)
961 // qDebug() << "The mass spectrum has not defaultArrayLength";
962
963 qualified_mass_spectrum.setEmptyMassSpectrum(
964 !native_pwiz_spectrum_sp->defaultArrayLength);
965
966 // The handler will receive the index of the mass spectrum in the
967 // current run via the mass spectrum id member datum.
968 if(ms_level == 0)
969 {
970 handler.setQualifiedMassSpectrum(qualified_mass_spectrum);
971 }
972 else
973 {
974 if(qualified_mass_spectrum.getMsLevel() == ms_level)
975 {
976 handler.setQualifiedMassSpectrum(qualified_mass_spectrum);
977 }
978 }
979 }
980
981 setlocale(LC_ALL, env.c_str());
982 // End of
983 // for(std::size_t iter = 0; iter < spectrum_list_size; iter++)
984
985 // Now let the loading handler know that the loading of the data has ended.
986 // The handler might need this "signal" to perform additional tasks or to
987 // cleanup cruft.
988
989 // qDebug() << "Loading ended";
990 handler.loadingEnded();
991}
992
993void
996{
997 // pwiz library is not thread safe under MXE
998 QMutexLocker lock(&m_mutex);
999
1000 qDebug();
1001 acquireDevice();
1002 qDebug();
1003 // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ << "()";
1004
1005 // We want to iterate in the pwiz-spectrum-list and for each pwiz-spectrum
1006 // create a pappso-spectrum (QualifiedMassSpectrum). Once the pappso mass
1007 // spectrum has been fully qualified (that is, the member data have been
1008 // set), it is transferred to the handler passed as parameter to this
1009 // function for the consumer to do what it wants with it.
1010
1011 // Does the handler consuming the mass spectra read from file want these
1012 // mass spectra to hold the binary data arrays (mz/i vectors)?
1013
1014 const bool want_binary_data = config.needPeakList();
1015
1016
1017 std::string env;
1018 env = setlocale(LC_ALL, "");
1019 setlocale(LC_ALL, "C");
1020
1021
1022 qDebug();
1023 // We access the pwiz-mass-spectra via the spectrumListPtr that sits in the
1024 // run member of msp_msData.
1025
1026 pwiz::msdata::SpectrumListPtr spectrum_list_p =
1027 msp_msData->run.spectrumListPtr;
1028
1029 // We'll need it to perform the looping in the spectrum list.
1030 std::size_t spectrum_list_size = spectrum_list_p.get()->size();
1031
1032 // qDebug() << "The spectrum list has size:" << spectrum_list_size;
1033
1034 // Inform the handler of the spectrum list so that it can handle feedback to
1035 // the user.
1036 handler.spectrumListHasSize(spectrum_list_size);
1037
1038 // Iterate in the full list of spectra.
1039
1040 qDebug();
1041 for(std::size_t iter = 0; iter < spectrum_list_size; iter++)
1042 {
1043
1044
1045 // If the user of this reader instance wants to stop reading the
1046 // spectra, then break this loop.
1047 if(handler.shouldStop())
1048 {
1049 qDebug() << "The operation was cancelled. Breaking the loop.";
1050 break;
1051 }
1052
1053 qDebug();
1054 // Get the native pwiz-spectrum from the spectrum list.
1055 // Note that this pointer is a shared pointer from pwiz.
1056
1057 pwiz::msdata::SpectrumPtr native_pwiz_spectrum_sp =
1058 getPwizSpectrumPtr(spectrum_list_p.get(), iter, want_binary_data);
1059 qDebug();
1060 /*
1061 * we want to load metadata of the spectrum even if it does not contain
1062 peaks
1063
1064 * if(!native_pwiz_spectrum_sp->hasBinaryData())
1065 {
1066 // qDebug() << __FILE__ << "@" << __LINE__ << __FUNCTION__ <<
1067 "
1068 ()"
1069 //<< "native pwiz spectrum is empty, continuing.";
1070 continue;
1071 }
1072 */
1073
1074 // Instantiate the mass spectrum id that will hold critical information
1075 // like the the native id string and the spectrum index.
1076
1077 MassSpectrumId massSpectrumId(mcsp_msRunId, iter /* spectrum index*/);
1078
1079 qDebug();
1080 // Get the spectrum native id as a QString to store it in the mass
1081 // spectrum id class. This is will allow later to refer to the same
1082 // spectrum starting back from the file.
1083
1084 QString native_id = QString::fromStdString(native_pwiz_spectrum_sp->id);
1085 massSpectrumId.setNativeId(native_id);
1086
1087 // Finally, instantiate the qualified mass spectrum with its id. This
1088 // function will continue performing pappso-spectrum detailed
1089 // qualification.
1090
1091 bool ok = false;
1092
1093 QualifiedMassSpectrum qualified_mass_spectrum =
1095 massSpectrumId, native_pwiz_spectrum_sp.get(), want_binary_data, ok);
1096
1097 if(!ok)
1098 {
1099 // qDebug() << "Encountered a mass spectrum for which the returned "
1100 //"status is bad.";
1101 continue;
1102 }
1103
1104 qDebug();
1105 // Before handing the mass spectrum out to the handler, see if the
1106 // native mass spectrum was empty or not.
1107
1108 // if(!native_pwiz_spectrum_sp->defaultArrayLength)
1109 // qDebug() << "The mass spectrum has not defaultArrayLength";
1110
1111 qualified_mass_spectrum.setEmptyMassSpectrum(
1112 !native_pwiz_spectrum_sp->defaultArrayLength);
1113
1114 // The handler will receive the index of the mass spectrum in the
1115 // current run via the mass spectrum id member datum.
1116
1117 qDebug();
1118 if(config.acceptMsLevel(qualified_mass_spectrum.getMsLevel()))
1119 {
1121 qualified_mass_spectrum.getRtInSeconds()))
1122 {
1123 handler.setQualifiedMassSpectrum(qualified_mass_spectrum);
1124 }
1125 }
1126 }
1127
1128 qDebug();
1129 setlocale(LC_ALL, env.c_str());
1130 // End of
1131 // for(std::size_t iter = 0; iter < spectrum_list_size; iter++)
1132
1133 // Now let the loading handler know that the loading of the data has ended.
1134 // The handler might need this "signal" to perform additional tasks or to
1135 // cleanup cruft.
1136
1137 qDebug() << "Loading ended";
1138 handler.loadingEnded();
1139}
1140
1141std::size_t
1143{
1144 return msp_msData->run.spectrumListPtr.get()->size();
1145}
1146
1147bool
1149{
1150 return m_hasScanNumbers;
1151}
1152
1153bool
1155{
1156 msp_msData = nullptr;
1157 return true;
1158}
1159
1160bool
1162{
1163 if(msp_msData == nullptr)
1164 {
1165 initialize();
1166 }
1167 return true;
1168}
1169
1170
1173 std::size_t spectrum_index, pappso::PrecisionPtr precision) const
1174{
1175
1176 QualifiedMassSpectrum mass_spectrum =
1177 qualifiedMassSpectrum(spectrum_index, false);
1178
1179 return newXicCoordSPtrFromQualifiedMassSpectrum(mass_spectrum, precision);
1180}
1181
1184 const pappso::QualifiedMassSpectrum &mass_spectrum,
1185 pappso::PrecisionPtr precision) const
1186{
1187 XicCoordSPtr xic_coord = std::make_shared<XicCoord>();
1188
1189 xic_coord.get()->rtTarget = mass_spectrum.getRtInSeconds();
1190
1191 xic_coord.get()->mzRange = MzRange(mass_spectrum.getPrecursorMz(), precision);
1192
1193 return xic_coord;
1194}
1195
1196} // namespace pappso
void setNativeId(const QString &native_id)
void setSpectrumIndex(std::size_t index)
Class to represent a mass spectrum.
void sortMz()
Sort the DataPoint instances of this spectrum.
MassSpectrumSPtr makeMassSpectrumSPtr() const
bool acceptMsLevel(std::size_t ms_level) const
bool acceptRetentionTimeInSeconds(double retention_time_in_seconds) const
base class to read MSrun the only way to build a MsRunReader object is to use the MsRunReaderFactory
Definition msrunreader.h:63
MsRunIdCstSPtr mcsp_msRunId
virtual const QString & qwhat() const
virtual pappso::XicCoordSPtr newXicCoordSPtrFromSpectrumIndex(std::size_t spectrum_index, pappso::PrecisionPtr precision) const override
get a xic coordinate object from a given spectrum index
pwiz::msdata::MSDataPtr msp_msData
virtual void readSpectrumCollectionByMsLevel(SpectrumCollectionHandlerInterface &handler, unsigned int ms_level) override
function to visit an MsRunReader and get each Spectrum in a spectrum collection handler by Ms Levels
virtual void readSpectrumCollectionWithMsrunReadConfig(const MsRunReadConfig &config, SpectrumCollectionHandlerInterface &handler)
bool processDriftTime(pwiz::msdata::Spectrum *spectrum_p, QualifiedMassSpectrum &qualified_mass_spectrum) const
virtual MassSpectrumCstSPtr massSpectrumCstSPtr(std::size_t spectrum_index) override
QualifiedMassSpectrum qualifiedMassSpectrumFromPwizMSData(std::size_t spectrum_index, bool want_binary_data, bool &ok) const
PwizMsRunReader(MsRunIdCstSPtr &msrun_id_csp)
QualifiedMassSpectrum qualifiedMassSpectrumFromPwizSpectrumPtr(const MassSpectrumId &massSpectrumId, pwiz::msdata::Spectrum *spectrum_p, bool want_binary_data, bool &ok) const
virtual void readSpectrumCollection2(const MsRunReadConfig &config, SpectrumCollectionHandlerInterface &handler) override
virtual QualifiedMassSpectrum qualifiedMassSpectrum(std::size_t spectrum_index, bool want_binary_data=true) const override
get a QualifiedMassSpectrum class given its scan number
virtual bool hasScanNumbers() const override
tells if spectra can be accessed using scan numbers by default, it returns false. Only overrided func...
bool processRetentionTime(pwiz::msdata::Spectrum *spectrum_p, QualifiedMassSpectrum &qualified_mass_spectrum) const
virtual bool acquireDevice() override
acquire data back end device
virtual void initialize() override
virtual void readSpectrumCollection(SpectrumCollectionHandlerInterface &handler) override
function to visit an MsRunReader and get each Spectrum in a spectrum collection handler
virtual bool accept(const QString &file_name) const override
tells if the reader is able to handle this file must be implemented by private MS run reader,...
virtual std::size_t spectrumListSize() const override
get the totat number of spectrum conained in the MSrun data file
pwiz::msdata::SpectrumPtr getPwizSpectrumPtr(pwiz::msdata::SpectrumList *p_spectrum_list, std::size_t spectrum_index, bool want_binary_data) const
virtual bool releaseDevice() override
release data back end device if a the data back end is released, the developper has to use acquireDev...
virtual pappso::XicCoordSPtr newXicCoordSPtrFromQualifiedMassSpectrum(const pappso::QualifiedMassSpectrum &mass_spectrum, pappso::PrecisionPtr precision) const override
get a xic coordinate object from a given spectrum
const OboPsiModTerm getOboPsiModTermNativeIDFormat() const
get OboPsiModTerm corresponding to the nativeID format format of mz data
virtual MassSpectrumSPtr massSpectrumSPtr(std::size_t spectrum_index) override
get a MassSpectrumSPtr class given its spectrum index
Class representing a fully specified mass spectrum.
uint getMsLevel() const
Get the mass spectrum level.
MassSpectrumCstSPtr getMassSpectrumCstSPtr() const
Get the MassSpectrumCstSPtr.
void setPrecursorNativeId(const QString &native_id)
Set the scan native id of the precursor ion.
const std::vector< PrecursorIonData > & getPrecursorIonData() const
void setDtInMilliSeconds(pappso_double rt)
Set the drift time in milliseconds.
const QString & getPrecursorNativeId() const
void appendPrecursorIonData(const PrecursorIonData &precursor_ion_data)
void setMsLevel(uint ms_level)
Set the mass spectrum level.
void setPrecursorSpectrumIndex(std::size_t precursor_scan_num)
Set the scan number of the precursor ion.
pappso_double getPrecursorMz(bool *ok=nullptr) const
get precursor mz
MassSpectrumSPtr getMassSpectrumSPtr() const
Get the MassSpectrumSPtr.
void setParameterValue(QualifiedMassSpectrumParameter parameter, const QVariant &value)
void setMassSpectrumSPtr(MassSpectrumSPtr massSpectrum)
Set the MassSpectrumSPtr.
void setRtInSeconds(pappso_double rt)
Set the retention time in seconds.
pappso_double getRtInSeconds() const
Get the retention time in seconds.
void setEmptyMassSpectrum(bool is_empty_mass_spectrum)
interface to collect spectrums from the MsRunReader class
virtual bool needPeakList() const =0
tells if we need the peak list (if we want the binary data) for each spectrum
virtual void setQualifiedMassSpectrum(const QualifiedMassSpectrum &spectrum)=0
static std::string toUtf8StandardString(const QString &text)
Definition utils.cpp:164
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition aa.cpp:39
@ MGF
Mascot format.
std::shared_ptr< const MsRunId > MsRunIdCstSPtr
Definition msrunid.h:46
double pappso_double
A type definition for doubles.
Definition types.h:50
std::shared_ptr< const MassSpectrum > MassSpectrumCstSPtr
std::shared_ptr< MassSpectrum > MassSpectrumSPtr
std::shared_ptr< XicCoord > XicCoordSPtr
Definition xiccoord.h:43
MSrun file reader base on proteowizard library.