libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
mzintegrationparams.cpp
Go to the documentation of this file.
1/* BEGIN software license
2 *
3 * msXpertSuite - mass spectrometry software suite
4 * -----------------------------------------------
5 * Copyright(C) 2009,...,2018 Filippo Rusconi
6 *
7 * http://www.msxpertsuite.org
8 *
9 * This file is part of the msXpertSuite project.
10 *
11 * The msXpertSuite project is the successor of the massXpert project. This
12 * project now includes various independent modules:
13 *
14 * - massXpert, model polymer chemistries and simulate mass spectrometric data;
15 * - mineXpert, a powerful TIC chromatogram/mass spectrum viewer/miner;
16 *
17 * This program is free software: you can redistribute it and/or modify
18 * it under the terms of the GNU General Public License as published by
19 * the Free Software Foundation, either version 3 of the License, or
20 * (at your option) any later version.
21 *
22 * This program is distributed in the hope that it will be useful,
23 * but WITHOUT ANY WARRANTY; without even the implied warranty of
24 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
25 * GNU General Public License for more details.
26 *
27 * You should have received a copy of the GNU General Public License
28 * along with this program. If not, see <http://www.gnu.org/licenses/>.
29 *
30 * END software license
31 */
32
33
34/////////////////////// StdLib includes
35#include <map>
36#include <cmath>
37
38
39/////////////////////// Qt includes
40#include <QDebug>
41#include <QString>
42#include <QFile>
43#include <QDateTime>
44
45
46/////////////////////// pappsomspp includes
47#include "../../utils.h"
48#include "../../massspectrum/massspectrum.h"
49
50
51/////////////////////// Local includes
52#include "mzintegrationparams.h"
53
54
56 qRegisterMetaType<pappso::MzIntegrationParams>("pappso::MzIntegrationParams");
58 qRegisterMetaType<pappso::MzIntegrationParams *>(
59 "pappso::MzIntegrationParams *");
60
61
62namespace pappso
63{
64
65
66//! Map relating the BinningType to a textual representation
67std::map<BinningType, QString> binningTypeMap{
68 {BinningType::NONE, "NONE"},
69 {BinningType::DATA_BASED, "DATA_BASED"},
70 {BinningType::ARBITRARY, "ARBITRARY"}};
71
72
78
79
82 BinningType binningType,
83 int decimalPlaces,
84 pappso::PrecisionPtr precisionPtr,
85 int binSizeDivisor,
86 bool removeZeroValDataPoints)
87 : m_smallestMz(minMz),
88 m_greatestMz(maxMz),
89 m_binningType(binningType),
90 m_decimalPlaces(decimalPlaces),
91 mp_precision(precisionPtr),
92 m_binSizeDivisor(binSizeDivisor),
93 m_removeZeroValDataPoints(removeZeroValDataPoints)
94{
95 if(mp_precision == nullptr)
97
98 // qDebug() << "mp_precision:" << mp_precision->toString();
99}
100
101
103 : m_smallestMz(other.m_smallestMz),
104 m_greatestMz(other.m_greatestMz),
105 m_binningType(other.m_binningType),
106 m_decimalPlaces(other.m_decimalPlaces),
107 mp_precision(other.mp_precision),
108 m_binSizeDivisor(other.m_binSizeDivisor),
109 m_removeZeroValDataPoints(other.m_removeZeroValDataPoints)
110{
111 if(mp_precision == nullptr)
113
114 // qDebug() << "mp_precision:" << mp_precision->toString();
115}
116
117
121
122
125{
126 if(this == &other)
127 return *this;
128
132
134
136 if(mp_precision == nullptr)
138
140
142
143 return *this;
144}
145
146
147void
152
153
154void
159
160
166
167
168void
173
174
175void
180
181
187
188
189void
191{
192 m_binningType = binningType;
193}
194
195
201
202
203void
205{
206 m_decimalPlaces = decimal_places;
207}
208
209
210int
215
216void
224
230
231
232void
234{
235 m_binSizeDivisor = divisor;
236}
237
238
239int
244
245
246void
251
252
253bool
258
259
260//! Reset the instance to default values.
261void
263{
264 m_smallestMz = std::numeric_limits<double>::min();
265 m_greatestMz = std::numeric_limits<double>::min();
267
268 // Special case for this member datum
270
273}
274
275
276bool
278{
279 int errors = 0;
280
282 errors += 1;
283
285 {
286 // qDebug() << "m_smallestMz:" << m_smallestMz;
287 // qDebug() << "smallest is max:" << (m_smallestMz ==
288 // std::numeric_limits<double>::max());
289
290 errors += (m_smallestMz == std::numeric_limits<double>::max() ? 1 : 0);
291
292 // qDebug() << "m_greatestMz:" << m_greatestMz;
293 // qDebug() << "greatest is min:" << (m_greatestMz ==
294 // std::numeric_limits<double>::min());
295 errors += (m_greatestMz == std::numeric_limits<double>::min() ? 1 : 0);
296
297 // if(mp_precision != nullptr)
298 // qDebug() << mp_precision->toString();
299
300 errors += (mp_precision == nullptr ? 1 : 0);
301 }
302
303 if(errors)
304 {
305 qDebug()
306 << "The m/z integration parameters are not valid or do not apply...";
307 }
308
309 return !errors;
310}
311
312
313bool
315{
316 return (m_smallestMz != std::numeric_limits<double>::max()) &&
317 (m_greatestMz != std::numeric_limits<double>::min());
318}
319
320
321std::vector<double>
323{
324
325 // qDebug() << "mp_precision:" << mp_precision->toString();
326
327 std::vector<double> bins;
328
330 {
331 // If no binning is to be performed, fine.
332 return bins;
333 }
335 {
336 // Use only data in the MzIntegrationParams member data.
337 return createArbitraryBins();
338 }
340 {
341 // qDebug();
342
343 qFatal("Programming error.");
344 }
345
346 return bins;
347}
348
349
350std::vector<double>
352{
353
354 // qDebug();
355
356 std::vector<double> bins;
357
359 {
360 // If no binning is to be performed, fine.
361 return bins;
362 }
364 {
365 // Use only data in the MzIntegrationParams member data.
366 return createArbitraryBins();
367 }
369 {
370 // qDebug();
371
372 // Use the first spectrum to perform the data-based bins
373
374 return createDataBasedBins(mass_spectrum_csp);
375 }
376
377 return bins;
378}
379
380
381std::vector<double>
383{
384 // Now starts the tricky stuff. Depending on how the binning has been
385 // configured, we need to take diverse actions.
386
387 // qDebug() << "Bin size specification:" << mp_precision->toString();
388
391
392 // qDebug() << "m_smallestMz:" << m_smallestMz
393 //<< "m_greatestMz:" << m_greatestMz;
394
395 // qDebug() << QString::asprintf("min_mz: %.6f\n", min_mz)
396 //<< QString::asprintf("max_mz: %.6f\n", max_mz);
397
398 pappso::pappso_double binSize;
399
401 {
402 double resolution_based_bin_size = mp_precision->delta(min_mz);
403 binSize = resolution_based_bin_size / m_binSizeDivisor;
404
405 // qDebug() << "With res-based bin size, the uncorrected bin size:"
406 //<< resolution_based_bin_size
407 //<< "and the final binSize:" << binSize;
408 }
409 else
410 binSize = mp_precision->delta(min_mz);
411
412 // qDebug() << QString::asprintf(
413 //"binSize is the precision delta for min_mz: %.6f\n", binSize);
414
415 // Only compute the decimal places if they were not configured already.
416 if(m_decimalPlaces == -1)
417 {
418 // qDebug() << "Now checking how many decimal places are needed.";
419
420 // We want as many decimal places as there are 0s between the integral
421 // part of the double and the first non-0 cipher. For example, if
422 // binSize is 0.004, zero decimals is 2 and m_decimalPlaces is set to 3,
423 // because we want decimals up to 4 included.
424
426
427 // qDebug() << "With binSize" << binSize
428 //<< " m_decimalPlaces was computed to be:" << m_decimalPlaces;
429 }
430 // else
431 // qDebug() << "m_decimalPlaces: " << m_decimalPlaces;
432
433 // Now that we have defined the value of m_decimalPlaces, let's use that
434 // value.
435
436 double first_mz = ceil((min_mz * std::pow(10, m_decimalPlaces)) - 0.49) /
437 pow(10, m_decimalPlaces);
438 double last_mz =
439 ceil((max_mz * pow(10, m_decimalPlaces)) - 0.49) / pow(10, m_decimalPlaces);
440
441 // qDebug() << "After having accounted for the decimals, new min/max values:"
442 //<< QString::asprintf("Very first data point: %.6f\n", first_mz)
443 //<< QString::asprintf("Very last data point to reach: %.6f\n",
444 // last_mz);
445
446 // Instanciate the vector of mz double_s that we'll feed with the bins.
447
448 std::vector<pappso::pappso_double> bins;
449
450 // Store that very first value for later use in the loop.
451 // The bins are notking more than:
452 //
453 // 1. The first mz (that is the smallest mz value found in all the spectra
454 // 2. A sequence of mz values corresponding to that first mz value
455 // incremented by the bin size.
456
457 // Seed the root of the bin vector with the first mz value rounded above as
458 // requested.
459 pappso::pappso_double previous_mz_bin = first_mz;
460
461 bins.push_back(previous_mz_bin);
462
463 // Now continue adding mz values until we have reached the end of the
464 // spectrum, that is the max_mz value, as converted using the decimals to
465 // last_mz.
466
467 // debugCount value used below for debugging purposes.
468 // int debugCount = 0;
469
470 while(previous_mz_bin <= last_mz)
471 {
472
473 // qDebug() << "Now starting the bin creation loop.";
474
475 // Calculate dynamically the precision delta according to the current mz
476 // value.
477
478 // double precision_delta = mp_precision->delta(previous_mz_bin);
479 // qDebug() << "precision_delta: " << precision_delta;
480
481 // In certain circumstances, the bin size is not enough to properly render
482 // hyper-high resolution data (like the theoretical isotopic cluster data
483 // generated in silico). In that case, the bin size, computed using the
484 // precision object, is divided by the m_binSizeDivisor, which normally is
485 // set to 1 as the default, that is, it has no effect.
486
487 double current_mz;
488
490 current_mz = previous_mz_bin +
491 (mp_precision->delta(previous_mz_bin) / m_binSizeDivisor);
492 else
493 current_mz = previous_mz_bin + mp_precision->delta(previous_mz_bin);
494
495 // qDebug() << QString::asprintf(
496 //"previous_mzBin: %.6f and current_mz: %.6f\n",
497 // previous_mz_bin,
498 // current_mz);
499
500 // Now apply on the obtained mz value the decimals that were either set
501 // or computed earlier.
502
503 double current_rounded_mz =
504 ceil((current_mz * pow(10, m_decimalPlaces)) - 0.49) /
505 pow(10, m_decimalPlaces);
506
507 // qDebug() << QString::asprintf(
508 //"current_mz: %.6f and current_rounded_mz: %.6f and previous_mzBin "
509 //": % .6f\n ",
510 // current_mz,
511 // current_rounded_mz,
512 // previous_mz_bin);
513
514 // If rounding makes the new value identical to the previous one, then
515 // that means that we need to decrease roughness.
516
517 if(current_rounded_mz == previous_mz_bin)
518 {
520
521 current_rounded_mz =
522 ceil((current_mz * pow(10, m_decimalPlaces)) - 0.49) /
523 pow(10, m_decimalPlaces);
524
525 // qDebug().noquote()
526 //<< "Had to increment decimal places by one while creating the bins "
527 //"in BinningType::ARBITRARY mode..";
528 }
529
530 bins.push_back(current_rounded_mz);
531
532 // Use the local_mz value for the storage of the previous mz bin.
533 previous_mz_bin = current_rounded_mz;
534 }
535
536
537#if 0
538
539 QString fileName = "/tmp/massSpecArbitraryBins.txt-at-" +
540 QDateTime::currentDateTime().toString("yyyyMMdd-HH-mm-ss");
541
542 qDebug() << "Writing the list of bins setup in the "
543 "mass spectrum in file "
544 << fileName;
545
546 QFile file(fileName);
547 file.open(QIODevice::WriteOnly);
548
549 QTextStream fileStream(&file);
550
551 for(auto &&bin : bins)
552 fileStream << QString("%1\n").arg(bin, 0, 'f', 10);
553
554 fileStream.flush();
555 file.close();
556
557#endif
558
559 // qDebug() << "Prepared bins with " << bins.size() << "elements."
560 //<< "starting with mz" << bins.front() << "ending with mz"
561 //<< bins.back();
562
563 return bins;
564}
565
566
567std::vector<double>
569 pappso::MassSpectrumCstSPtr mass_spectrum_csp)
570{
571 // qDebug();
572
573 // The bins in *this mass spectrum must be calculated starting from the
574 // data in the mass_spectrum_csp parameter.
575
576 // Instanciate the vector of mz double_s that we'll feed with the bins.
577
578 std::vector<pappso::pappso_double> bins;
579
580 if(mass_spectrum_csp->size() < 2)
581 return bins;
582
583 // Make sure the spectrum is sorted, as this functions takes for granted
584 // that the DataPoint instances are sorted in ascending x (== mz) value
585 // order.
586 pappso::MassSpectrum local_mass_spectrum = *mass_spectrum_csp;
587 local_mass_spectrum.sortMz();
588
590
591 // qDebug() << "The min_mz:" << min_mz;
592
593 if(m_decimalPlaces != -1)
594 min_mz = ceil((min_mz * pow(10, m_decimalPlaces)) - 0.49) /
595 pow(10, m_decimalPlaces);
596
597
598 // Two values for the definition of a MassSpectrumBin.
599
600 // The first value of the mz range that defines the bin. This value is part
601 // of the bin.
602 pappso::pappso_double start_mz_in = min_mz;
603
604 // The second value of the mz range that defines the bin. This value is
605 // *not* part of the bin.
606 pappso::pappso_double end_mz_out;
607
608 std::vector<pappso::DataPoint>::const_iterator it =
609 local_mass_spectrum.begin();
610
611 pappso::pappso_double prev_mz = it->x;
612
613 if(m_decimalPlaces != -1)
614 prev_mz = ceil((prev_mz * pow(10, m_decimalPlaces)) - 0.49) /
615 pow(10, m_decimalPlaces);
616
617 ++it;
618
619 while(it != local_mass_spectrum.end())
620 {
621 pappso::pappso_double next_mz = it->x;
622
623 if(m_decimalPlaces != -1)
624 next_mz = ceil((next_mz * pow(10, m_decimalPlaces)) - 0.49) /
625 pow(10, m_decimalPlaces);
626
627 pappso::pappso_double step = next_mz - prev_mz;
628 end_mz_out = start_mz_in + step;
629
630 if(m_decimalPlaces != -1)
631 end_mz_out = ceil((end_mz_out * pow(10, m_decimalPlaces)) - 0.49) /
632 pow(10, m_decimalPlaces);
633
634 // The data point that is crafted has a 0 y-value. The binning must
635 // indeed not create artificial intensity data.
636
637 // qDebug() << "Pushing back bin:" << start_mz_in << end_mz_out;
638
639 bins.push_back(start_mz_in);
640
641 // Prepare next bin
642 start_mz_in = end_mz_out;
643
644 // Update prev_mz to be the current one for next iteration.
645 prev_mz = next_mz;
646
647 // Now got the next DataPoint instance.
648 ++it;
649 }
650
651#if 0
652
653 QString fileName = "/tmp/massSpecDataBasedBins.txt";
654
655 qDebug() << "Writing the list of bins setup in the "
656 "mass spectrum in file "
657 << fileName;
658
659 QFile file(fileName);
660 file.open(QIODevice::WriteOnly);
661
662 QTextStream fileStream(&file);
663
664 for(auto &&bin : m_bins)
665 fileStream << QString("[%1-%2]\n")
666 .arg(bin.startMzIn, 0, 'f', 10)
667 .arg(bin.endMzOut, 0, 'f', 10);
668
669 fileStream.flush();
670 file.close();
671
672 qDebug() << "elements."
673 << "starting with mz" << m_bins.front().startMzIn << "ending with mz"
674 << m_bins.back().endMzOut;
675
676#endif
677
678 return bins;
679}
680
681
682QString
683MzIntegrationParams::toString(int offset, const QString &spacer) const
684{
685 QString lead;
686
687 for(int iter = 0; iter < offset; ++iter)
688 lead += spacer;
689
690 QString text = lead;
691 text += "m/z integration parameters:\n";
692
693 text += lead;
694 text += spacer;
695 if(m_smallestMz != std::numeric_limits<double>::max())
696 text.append(
697 QString::asprintf("Smallest (first) m/z: %.6f\n", m_smallestMz));
698
699 text += lead;
700 text += spacer;
701 if(m_greatestMz != std::numeric_limits<double>::min())
702 text.append(QString::asprintf("Greatest (last) m/z: %.6f\n", m_greatestMz));
703
704 text += lead;
705 text += spacer;
706 text.append(QString("Decimal places: %1\n").arg(m_decimalPlaces));
707
708 std::map<BinningType, QString>::iterator it;
709 it = binningTypeMap.find(m_binningType);
710
711 if(it == binningTypeMap.end())
712 qFatal("Programming error.");
713
714 text += lead;
715 text += spacer;
716 text.append(QString("Binning type: %1\n").arg(it->second.toLatin1().data()));
717
718 // Only provide the details relative to the ARBITRARY binning type.
719
721 {
722 text += lead;
723 text += spacer;
724 text += spacer;
725 text.append(QString("Bin nominal size: %1\n")
726 .arg(mp_precision->getNominal(), 0, 'f', 6));
727
728 text += lead;
729 text += spacer;
730 text += spacer;
731 text.append(QString("Bin size: %2\n")
732 .arg(mp_precision->toString().toLatin1().data()));
733
734 text += lead;
735 text += spacer;
736 text += spacer;
737 text.append(QString("Bin size divisor: %2\n").arg(m_binSizeDivisor));
738 }
739
740 // Now other data that are independent of the bin settings.
741
742 text += lead;
743 text += spacer;
744 text += QString("Remove 0-val data points: %1\n")
745 .arg(m_removeZeroValDataPoints ? "true" : "false");
746
747 return text;
748}
749
750} // namespace pappso
Class to represent a mass spectrum.
void sortMz()
Sort the DataPoint instances of this spectrum.
The MzIntegrationParams class provides the parameters definining how m/z !
Q_INVOKABLE BinningType getBinningType() const
Q_INVOKABLE int getDecimalPlaces() const
pappso::pappso_double getSmallestMz() const
pappso::pappso_double m_smallestMz
MzIntegrationParams & operator=(const MzIntegrationParams &other)
Q_INVOKABLE pappso::pappso_double getGreatestMz() const
pappso::pappso_double m_greatestMz
Q_INVOKABLE pappso::PrecisionPtr getPrecision() const
std::vector< double > createArbitraryBins()
Q_INVOKABLE void setPrecision(pappso::PrecisionPtr precisionPtr)
Q_INVOKABLE void updateSmallestMz(pappso::pappso_double value)
Q_INVOKABLE void updateGreatestMz(pappso::pappso_double value)
Q_INVOKABLE bool isRemoveZeroValDataPoints() const
Q_INVOKABLE bool isValid() const
Q_INVOKABLE bool hasValidMzRange() const
Q_INVOKABLE QString toString(int offset=0, const QString &spacer=QString()) const
pappso::PrecisionPtr mp_precision
Q_INVOKABLE void setSmallestMz(pappso::pappso_double value)
Q_INVOKABLE void setBinningType(BinningType binningType)
Q_INVOKABLE void reset()
Reset the instance to default values.
std::vector< double > createDataBasedBins(pappso::MassSpectrumCstSPtr massSpectrum)
Q_INVOKABLE void setDecimalPlaces(int decimal_places)
Q_INVOKABLE std::vector< pappso::pappso_double > createBins()
Q_INVOKABLE void setRemoveZeroValDataPoints(bool removeOrNot=true)
Q_INVOKABLE void setGreatestMz(pappso::pappso_double value)
virtual QString toString() const =0
virtual pappso_double getNominal() const final
Definition precision.cpp:65
virtual pappso_double delta(pappso_double value) const =0
virtual PrecisionUnit unit() const =0
static PrecisionPtr getResInstance(pappso_double value)
get a resolution precision pointer
static PrecisionPtr getPpmInstance(pappso_double value)
get a ppm precision pointer
static PrecisionPtr getDaltonInstance(pappso_double value)
get a Dalton precision pointer
static int zeroDecimalsInValue(pappso_double value)
0.11 would return 0 (no empty decimal) 2.001 would return 2 1000.0001254 would return 3
Definition utils.cpp:103
int mzIntegrationParamsMetaTypeId
int mzIntegrationParamsPtrMetaTypeId
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition aa.cpp:39
BinningType
Type of binning when performing integrations to a mass spectrum.
@ DATA_BASED
binning based on mass spectral data
@ ARBITRARY
binning based on arbitrary bin size value
@ NONE
< no binning
double pappso_double
A type definition for doubles.
Definition types.h:50
std::shared_ptr< const MassSpectrum > MassSpectrumCstSPtr
std::map< BinningType, QString > binningTypeMap
Map relating the BinningType to a textual representation.