libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
msfileaccessor.cpp
Go to the documentation of this file.
1// #include <proteowizard/pwiz/data/msdata/DefaultReaderList.hpp>
2
3#include <QDebug>
4#include <QFile>
5#include <QFileInfo>
6
7
8#include "msfileaccessor.h"
9#include "pwizmsfilereader.h"
10#include "timsmsfilereader.h"
11#include "bafasciifilereader.h"
12#include "xymsfilereader.h"
13
14
15#include "../exception/exceptionnotfound.h"
16#include "../exception/exceptionnotpossible.h"
17#include "../exception/exceptionnotrecognized.h"
18#include "../msrun/msrunid.h"
19#include "../msrun/private/timsframesmsrunreader.h"
20
21#include "../msrun/private/pwizmsrunreader.h"
22#include "../msrun/private/timsmsrunreader.h"
23#include "../msrun/private/timsmsrunreaderms2.h"
24#include "../msrun/bafasciimsrunreader.h"
25#include "../msrun/xymsrunreader.h"
26
27#include "../utils.h"
28
29
30namespace pappso
31{
32
33
34MsFileAccessor::MsFileAccessor(const QString &file_name,
35 const QString &xml_prefix)
36 : m_fileName(file_name), m_xmlPrefix(xml_prefix)
37{
38 QFile file(file_name);
39 if(!file.exists())
40 throw(ExceptionNotFound(QObject::tr("File %1 not found.")
41 .arg(QFileInfo(file_name).absoluteFilePath())));
42
43
45 m_oboPsiModTermNativeIDFormat.m_name = "no nativeID format";
47 "No nativeID format indicates that the file tagged with this term does not "
48 "contain spectra that can have a nativeID format.";
49}
50
51
53 : m_fileName(other.m_fileName),
54 m_xmlPrefix(other.m_xmlPrefix),
55 m_fileFormat(other.m_fileFormat),
56 m_fileReaderType(other.m_fileReaderType)
57{
59}
60
64
65
66const QString &
68{
69 return m_fileName;
70}
71
72
78
79const OboPsiModTerm
81{
82 OboPsiModTerm term;
83
84 // is_a: MS:1000560 ! mass spectrometer file format
85 switch(m_fileFormat)
86 {
88 term.m_accession = "MS:1001560";
89 term.m_name = "SCIEX TOF/TOF T2D format";
90 term.m_definition =
91 "Applied Biosystems/MDS Analytical Technologies TOF/TOF instrument "
92 "export format.";
93 break;
95 term.m_accession = "MS:1000562";
96 term.m_name = "ABI WIFF format";
97 term.m_definition = "Applied Biosystems WIFF file format.";
98 break;
100 term.m_accession = "MS:1001509";
101 term.m_name = "Agilent MassHunter format";
102 term.m_definition =
103 "A data file format found in an Agilent MassHunter directory which "
104 "contains raw data acquired by an Agilent mass spectrometer.";
105 break;
107 break;
109 term.m_accession = "MS:1000825";
110 term.m_name = "Bruker FID format";
111 term.m_definition = "Bruker FID file format.";
112 break;
114 term.m_accession = "MS:1002817";
115 term.m_name = "Bruker TDF format";
116 term.m_definition = "Bruker TDF raw file format.";
117 break;
119 term.m_accession = "MS:1000567";
120 term.m_name = "Bruker/Agilent YEP format";
121 term.m_definition = "Bruker/Agilent YEP file format.";
122 break;
124 term.m_accession = "MS:1001062";
125 term.m_name = "Mascot MGF format";
126 term.m_definition = "Mascot MGF file format.";
127 break;
129 break;
131 term.m_accession = "MS:1001881";
132 term.m_name = "mz5 format";
133 term.m_definition = "mz5 file format, modelled after mzML.";
134 break;
136 term.m_accession = "MS:1000584";
137 term.m_name = "mzML format";
138 term.m_definition =
139 "Proteomics Standards Inititative mzML file format.";
140 break;
142 term.m_accession = "MS:1000566";
143 term.m_name = "ISB mzXML format";
144 term.m_definition = "Institute of Systems Biology mzXML file format.";
145 break;
147 break;
149
150 term.m_accession = "MS:1000563";
151 term.m_name = "Thermo RAW format";
152 term.m_definition = "Thermo Scientific RAW file format.";
153 break;
155 break;
157 term.m_accession = "MS:1000526";
158 term.m_name = "Waters raw format";
159 term.m_definition =
160 "Waters data file format found in a Waters RAW directory, generated "
161 "from an MS acquisition.";
162 break;
164 term.m_accession = "MS:1001369";
165 term.m_name = "BafAscii text format";
166 term.m_definition =
167 "Simple text file format obtained by exporting Bruker Baf to ascii "
168 "using Bruker software";
169 break;
170 case MsDataFormat::xy:
171 term.m_accession = "MS:1001369";
172 term.m_name = "text format";
173 term.m_definition =
174 "Simple text file format of \"m/z<separator>intensity\" value pairs "
175 "for a single mass spectrum, a PMF (or single MS2) search.";
176 break;
177 default:
178 break;
179 }
180
181 return term;
182}
183
184
185const OboPsiModTerm &
192
193
194std::vector<MsRunIdCstSPtr>
196{
197 // qDebug();
198
199 // Try the PwizMsFileReader
200
201 PwizMsFileReader pwiz_ms_file_reader(m_fileName);
202
203 std::vector<MsRunIdCstSPtr> ms_run_ids =
204 pwiz_ms_file_reader.getMsRunIds(m_xmlPrefix);
205 if(ms_run_ids.size())
206 {
207 qDebug() << "Might well be handled using the Pwiz code.";
208
209 m_fileFormat = pwiz_ms_file_reader.getFileFormat();
211
212 // But the user might have configured one preferred reader type.
213
215 if(pref != m_preferredFileReaderTypeMap.end())
216 {
217 m_fileReaderType = pref->second;
218 }
219
220 return ms_run_ids;
221 }
222
223 qDebug() << "The Pwiz reader did not work.";
224
225 // Try the TimsData reader
226
227 QString tims_dir = m_fileName;
228 if(!QFileInfo(tims_dir).isDir())
229 {
230 tims_dir = QFileInfo(m_fileName).absolutePath();
231 }
232
233 TimsMsFileReader tims_file_reader(tims_dir);
234
235 ms_run_ids = tims_file_reader.getMsRunIds(m_xmlPrefix);
236
237 if(ms_run_ids.size())
238 {
239 qDebug() << "Might well be handled using the Bruker code";
240
241 m_fileName = tims_dir;
242 m_fileFormat = tims_file_reader.getFileFormat();
244
246 if(pref != m_preferredFileReaderTypeMap.end())
247 {
248 m_fileReaderType = pref->second;
249 }
250
251 qDebug() << "Returning Bruker::tims ms run(s)."
252 << "with preferred reader type:"
254
255 return ms_run_ids;
256 }
257
258 qDebug() << "The Tims reader did not work.";
259
260 // Try the Baf->ascii export format from Bruker Compass
261
262 try
263 {
264 ms_run_ids.clear();
265 BafAsciiFileReader baf_ascii_ms_file_reader(m_fileName);
266
267 ms_run_ids = baf_ascii_ms_file_reader.getMsRunIds(m_xmlPrefix);
268
269 if(ms_run_ids.size())
270 {
271 qDebug() << "Might well be handled using the BafAscii code";
272
274
275 m_fileFormat = baf_ascii_ms_file_reader.getFileFormat();
276
278 {
279 ms_run_ids.clear();
280 }
281 else
282 {
283 return ms_run_ids;
284 }
285 }
286 }
287 catch(const pappso::PappsoException &error)
288 {
289 qDebug() << "This is not a BafAscii code file" << error.qwhat();
290 }
291
292
293 qDebug() << "The BafAscii reader did not work.";
294
295 // At this point try the XyMsFileReader
296
297 XyMsFileReader xy_ms_file_reader(m_fileName);
298
299 ms_run_ids = xy_ms_file_reader.getMsRunIds(m_xmlPrefix);
300
301 if(ms_run_ids.size())
302 {
303 qDebug() << "Might well be handled using the XY code";
304
306
307 m_fileFormat = xy_ms_file_reader.getFileFormat();
308
309 return ms_run_ids;
310 }
311
312 qDebug() << "The XY reader did not work.";
313
314 return ms_run_ids;
315}
316
317
318void
320 FileReaderType reader_type)
321{
322 // qDebug();
323
324 auto ret = m_preferredFileReaderTypeMap.insert(
325 std::pair<MsDataFormat, FileReaderType>(format, reader_type));
326
327 if(!ret.second)
328 {
329 // replace
330 ret.first->second = reader_type;
331 }
332}
333
334
337{
338 // qDebug();
339
340 auto ret = m_preferredFileReaderTypeMap.find(format);
341
342 if(ret != m_preferredFileReaderTypeMap.end())
343 {
344 return ret->second;
345 }
346
347 return m_fileReaderType;
348}
349
350
356
357
358void
360{
361 mcsp_selectedMsRunId = ms_run_id_csp;
362}
363
364
370
373{
374 // try TimsData reader
375 QString tims_dir = m_fileName;
376 if(!QFileInfo(tims_dir).isDir())
377 {
378 tims_dir = QFileInfo(m_fileName).absolutePath();
379 }
380 TimsMsFileReader tims_file_reader(tims_dir);
381
382 std::vector<MsRunIdCstSPtr> ms_run_ids =
383 tims_file_reader.getMsRunIds(m_xmlPrefix);
384
385 if(ms_run_ids.size())
386 {
387 // qDebug() << "Might well be handled using the Bruker code";
389 m_fileFormat = tims_file_reader.getFileFormat();
390 m_fileName = tims_dir;
391
392 return std::make_shared<TimsMsRunReaderMs2>(ms_run_ids.front());
393 }
394 else
395 {
397 QObject::tr("Unable to read mz data directory %1 with TimsTOF reader.")
398 .arg(tims_dir)));
399 }
400}
401
402
405{
406 // qDebug();
407
408 // We want to return a MsRunReader that accounts for the configuration that
409 // the user might have set.
410
411 if(m_fileName != ms_run_id->getFileName())
413 QObject::tr("The MsRunId instance must have the name file name as the "
414 "MsFileAccessor.")));
415
417 {
418 // qDebug() << "Returning a PwizMsRunReader.";
419 auto pwiz_reader = std::make_shared<PwizMsRunReader>(ms_run_id);
421 pwiz_reader->getOboPsiModTermNativeIDFormat();
422 return pwiz_reader;
423 }
425 {
426 // qDebug() << "Returning a XyMsRunReader.";
427
428 return std::make_shared<XyMsRunReader>(ms_run_id);
429 }
431 {
432 // qDebug() << "Returning a TimsMsRunReader.";
433
434 return std::make_shared<TimsMsRunReader>(ms_run_id);
435 }
438 {
439 // qDebug() << "Returning a TimsFramesMsRunReader.";
440
441 return std::make_shared<TimsFramesMsRunReader>(ms_run_id);
442 }
444 {
445 // qDebug() << "Returning a TimsMsRunReaderMs2.";
446
447 return std::make_shared<TimsMsRunReaderMs2>(ms_run_id);
448 }
450 {
451 // qDebug() << "Returning a BafAsciiMsRunReader.";
452
453 return std::make_shared<BafAsciiMsRunReader>(ms_run_id);
454 }
456 {
457 if(ms_run_id.get()->getMsDataFormat() == MsDataFormat::xy)
458 {
459 return std::make_shared<XyMsRunReader>(ms_run_id);
460 }
461 else
462 {
463 auto pwiz_reader = std::make_shared<PwizMsRunReader>(ms_run_id);
465 pwiz_reader->getOboPsiModTermNativeIDFormat();
466 return pwiz_reader;
467 }
468 }
469 else
470 {
471 throw PappsoException(QObject::tr("No file format was found."));
472 }
473
474 return nullptr;
475}
476
477
479MsFileAccessor::msRunReaderSPtr(std::size_t ms_run_id_index)
480{
481 std::vector<MsRunIdCstSPtr> ms_run_ids = getMsRunIds();
482 if(ms_run_id_index >= ms_run_ids.size())
483 throw PappsoException(QObject::tr("MsRunId request out-of-bound error."));
484
485 return msRunReaderSPtr(ms_run_ids.at(ms_run_id_index));
486}
487
488
496
497
503
506 MsRunIdCstSPtr ms_run_id, pappso::FileReaderType preferred_file_reader_type)
507{
508 QFile file(ms_run_id.get()->getFileName());
509 if(!file.exists())
510 throw(ExceptionNotFound(
511 QObject::tr("unable to build a reader : file %1 not found.")
512 .arg(QFileInfo(ms_run_id.get()->getFileName()).absoluteFilePath())));
513
514 MsDataFormat file_format = ms_run_id.get()->getMsDataFormat();
515
516 if(file_format == MsDataFormat::xy)
517 {
518 // qDebug() << "Returning a XyMsRunReader.";
519
520 return std::make_shared<XyMsRunReader>(ms_run_id);
521 }
522 else if(file_format == MsDataFormat::brukerBafAscii)
523 {
524 // qDebug() << "Returning a XyMsRunReader.";
525
526 return std::make_shared<BafAsciiMsRunReader>(ms_run_id);
527 }
528 else if(file_format == MsDataFormat::unknown)
529 {
530 throw(PappsoException(
531 QObject::tr("unable to build a reader for %1 : unknown file format")
532 .arg(QFileInfo(ms_run_id.get()->getFileName()).absoluteFilePath())));
533 }
534
535 else if(file_format == MsDataFormat::brukerTims)
536 {
537 if(preferred_file_reader_type == pappso::FileReaderType::tims)
538 {
539 return std::make_shared<TimsMsRunReader>(ms_run_id);
540 }
541 else if(preferred_file_reader_type == pappso::FileReaderType::tims_ms2)
542 {
543 return std::make_shared<TimsMsRunReaderMs2>(ms_run_id);
544 }
545 else if(preferred_file_reader_type == pappso::FileReaderType::tims_frames)
546 {
547 qDebug()
548 << "returning std::make_shared<TimsFramesMsRunReader>(ms_run_id).";
549 return std::make_shared<TimsFramesMsRunReader>(ms_run_id);
550 }
551 // qDebug() << "by default, build a TimsMsRunReader.";
552 return std::make_shared<TimsMsRunReader>(ms_run_id);
553 }
554 else
555 {
556 // qDebug() << "Returning a PwizMsRunReader .";
557 return std::make_shared<PwizMsRunReader>(ms_run_id);
558 }
559}
560
561
564 const QString &xml_id)
565{
566 std::vector<MsRunIdCstSPtr> run_list = getMsRunIds();
567 MsRunReaderSPtr reader_sp;
568 for(MsRunIdCstSPtr &original_run_id : run_list)
569 {
570 if(original_run_id.get()->getRunId() == run_id)
571 {
572 MsRunId new_run_id(*original_run_id.get());
573 new_run_id.setXmlId(xml_id);
574
575 return msRunReaderSPtr(std::make_shared<MsRunId>(new_run_id));
576 }
577 }
578
579 if((run_id.isEmpty()) && (run_list.size() == 1))
580 {
581 MsRunId new_run_id(*run_list[0].get());
582 new_run_id.setXmlId(xml_id);
583
584 return msRunReaderSPtr(std::make_shared<MsRunId>(new_run_id));
585 }
586
587
588 if(reader_sp == nullptr)
589 {
590 throw(
591 ExceptionNotFound(QObject::tr("run id %1 not found in file %2")
592 .arg(run_id)
593 .arg(QFileInfo(m_fileName).absoluteFilePath())));
594 }
595 return reader_sp;
596}
597
598
599} // namespace pappso
virtual MsDataFormat getFileFormat() override
virtual std::vector< MsRunIdCstSPtr > getMsRunIds(const QString &run_prefix) override
const OboPsiModTerm & getOboPsiModTermNativeIDFormat() const
get OboPsiModTerm corresponding to the nativeID format format of mz data
std::map< MsDataFormat, FileReaderType > m_preferredFileReaderTypeMap
MsRunIdCstSPtr getSelectedMsRunId() const
MsRunReaderSPtr msRunReaderSPtr(MsRunIdCstSPtr ms_run_id)
void setPreferredFileReaderType(MsDataFormat format, FileReaderType reader_type)
given an mz format, explicitly set the preferred reader
FileReaderType getpreferredFileReaderType(MsDataFormat format)
MsRunReaderSPtr msRunReaderSPtrForSelectedMsRunId()
FileReaderType getFileReaderType() const
get the file reader type
MsRunIdCstSPtr mcsp_selectedMsRunId
MsDataFormat getFileFormat() const
get the raw format of mz data
std::vector< MsRunIdCstSPtr > getMsRunIds()
OboPsiModTerm m_oboPsiModTermNativeIDFormat
void setSelectedMsRunId(MsRunIdCstSPtr ms_run_id_csp)
const OboPsiModTerm getOboPsiModTermFileFormat() const
get OboPsiModTerm corresponding to the raw format of mz data
FileReaderType m_fileReaderType
MsRunReaderSPtr getMsRunReaderSPtrByRunId(const QString &run_id, const QString &xml_id)
get an msrun reader by finding the run_id in file
MsFileAccessor(const QString &file_name, const QString &xml_prefix)
static MsRunReaderSPtr buildMsRunReaderSPtr(MsRunIdCstSPtr ms_run_id)
get an MsRunReader directly from a valid MsRun ID
TimsMsRunReaderMs2SPtr buildTimsMsRunReaderMs2SPtr()
if possible, builds directly a dedicated Tims TOF tdf file reader
const QString & getFileName() const
MS run identity MsRunId identifies an MS run with a unique ID (XmlId) and contains eventually informa...
Definition msrunid.h:54
void setXmlId(const QString &xml_id)
set an XML unique identifier for this MsRunId
Definition msrunid.cpp:137
virtual const QString & qwhat() const
virtual std::vector< MsRunIdCstSPtr > getMsRunIds(const QString &run_prefix) override
virtual MsDataFormat getFileFormat() override
virtual MsDataFormat getFileFormat() override
virtual std::vector< MsRunIdCstSPtr > getMsRunIds(const QString &run_prefix) override
static QString fileReaderTypeAsString(FileReaderType file_reader_type)
Definition utils.cpp:496
virtual std::vector< MsRunIdCstSPtr > getMsRunIds(const QString &run_prefix) override
virtual MsDataFormat getFileFormat() override
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition aa.cpp:39
std::shared_ptr< MsRunReader > MsRunReaderSPtr
Definition msrunreader.h:56
MsDataFormat
Definition types.h:120
@ xy
(x,y) format
@ unknown
unknown format
@ SQLite3
SQLite3 format.
@ MGF
Mascot format.
std::shared_ptr< TimsMsRunReaderMs2 > TimsMsRunReaderMs2SPtr
std::shared_ptr< const MsRunId > MsRunIdCstSPtr
Definition msrunid.h:46
FileReaderType
Definition types.h:147
MSrun file reader for native Bruker TimsTOF raw data.