ProteoWizard
FeatureDetectorTuningTest.cpp
Go to the documentation of this file.
1//
2// $Id$
3//
4//
5// Original author: Darren Kessner <darren@proteowizard.org>
6//
7// Copyright 2009 Center for Applied Molecular Medicine
8// University of Southern California, Los Angeles, CA
9//
10// Licensed under the Apache License, Version 2.0 (the "License");
11// you may not use this file except in compliance with the License.
12// You may obtain a copy of the License at
13//
14// http://www.apache.org/licenses/LICENSE-2.0
15//
16// Unless required by applicable law or agreed to in writing, software
17// distributed under the License is distributed on an "AS IS" BASIS,
18// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19// See the License for the specific language governing permissions and
20// limitations under the License.
21//
22
23
24#include "PeakExtractor.hpp"
25#include "PeakelGrower.hpp"
26#include "PeakelPicker.hpp"
30#include "boost/filesystem/path.hpp"
32
33using namespace pwiz::util;
34using namespace pwiz::analysis;
35using namespace pwiz::data;
36using namespace pwiz::data::peakdata;
37using namespace pwiz::msdata;
38namespace bfs = boost::filesystem;
39
40
41ostream* os_ = 0;
42
43
44shared_ptr<PeakExtractor> createPeakExtractor()
45{
46 shared_ptr<NoiseCalculator> noiseCalculator(new NoiseCalculator_2Pass);
47
48 PeakFinder_SNR::Config pfsnrConfig;
49 pfsnrConfig.windowRadius = 2;
50 pfsnrConfig.zValueThreshold = 3;
51
52 shared_ptr<PeakFinder> peakFinder(new PeakFinder_SNR(noiseCalculator, pfsnrConfig));
53
55 pfpConfig.windowRadius = 1; // (windowRadius != 1) is not good for real data
56 shared_ptr<PeakFitter> peakFitter(new PeakFitter_Parabola(pfpConfig));
57
58 return shared_ptr<PeakExtractor>(new PeakExtractor(peakFinder, peakFitter));
59}
60
61
63{
64 double rt;
65 SetRetentionTime(double _rt) : rt(_rt) {}
66 void operator()(Peak& peak) {peak.retentionTime = rt;}
67};
68
69
70vector< vector<Peak> > extractPeaks(const MSData& msd, const PeakExtractor& peakExtractor)
71{
72 MSDataCache msdCache;
73 msdCache.open(msd);
74
75 const size_t spectrumCount = msdCache.size();
76 vector< vector<Peak> > result(spectrumCount);
77
78 for (size_t index=0; index<spectrumCount; index++)
79 {
80 const SpectrumInfo& spectrumInfo = msdCache.spectrumInfo(index, true);
81
82 vector<Peak>& peaks = result[index];
83 peakExtractor.extractPeaks(spectrumInfo.data, peaks);
84 for_each(peaks.begin(), peaks.end(), SetRetentionTime(spectrumInfo.retentionTime));
85
86 if (os_)
87 {
88 *os_ << "index: " << index << endl;
89 *os_ << "peaks: " << peaks.size() << endl;
90 copy(peaks.begin(), peaks.end(), ostream_iterator<Peak>(*os_, "\n"));
91 }
92 }
93
94 return result;
95}
96
97
98shared_ptr<PeakelGrower> createPeakelGrower()
99{
101 config.mzTolerance = .01;
102 config.rtTolerance = 10; // seconds
103
104 return shared_ptr<PeakelGrower>(new PeakelGrower_Proximity(config));
105}
106
107
108void print(ostream& os, const string& label, vector<PeakelPtr> v)
109{
110 os << label << ":\n";
111 for (vector<PeakelPtr>::const_iterator it=v.begin(); it!=v.end(); ++it)
112 os << **it << endl;
113}
114
115
116void verifyBombesinPeakels(const PeakelField& peakelField)
117{
118 // TODO: assert # peaks/peakel, verify metadata
119
120 // charge state 2
121
122 vector<PeakelPtr> bombesin_2_0 = peakelField.find(810.41, .01, RTMatches_Contains<Peakel>(1870));
123 if (os_) print(*os_, "bombesin_2_0", bombesin_2_0);
124 unit_assert(bombesin_2_0.size() == 1);
125
126 vector<PeakelPtr> bombesin_2_1 = peakelField.find(810.91, .01, RTMatches_Contains<Peakel>(1870));
127 if (os_) print(*os_, "bombesin_2_1", bombesin_2_1);
128 unit_assert(bombesin_2_1.size() == 1);
129
130 vector<PeakelPtr> bombesin_2_2 = peakelField.find(811.41, .01, RTMatches_Contains<Peakel>(1870));
131 if (os_) print(*os_, "bombesin_2_2", bombesin_2_2);
132 unit_assert(bombesin_2_2.size() == 1);
133
134 vector<PeakelPtr> bombesin_2_3 = peakelField.find(811.91, .01, RTMatches_Contains<Peakel>(1870,10));
135 if (os_) print(*os_, "bombesin_2_3", bombesin_2_3);
136 unit_assert(bombesin_2_3.size() == 1);
137
138 // charge state 3
139
140 vector<PeakelPtr> bombesin_3_0 = peakelField.find(540.61, .01, RTMatches_Contains<Peakel>(1870));
141 if (os_) print(*os_, "bombesin_3_0", bombesin_3_0);
142 unit_assert(bombesin_3_0.size() == 1);
143
144 vector<PeakelPtr> bombesin_3_1 = peakelField.find(540.61 + 1./3., .02, RTMatches_Contains<Peakel>(1865));
145 if (os_) print(*os_, "bombesin_3_1", bombesin_3_1);
146 unit_assert(bombesin_3_1.size() == 1);
147
148 vector<PeakelPtr> bombesin_3_2 = peakelField.find(540.61 + 2./3., .02, RTMatches_Contains<Peakel>(1865,5));
149 if (os_) print(*os_, "bombesin_3_2", bombesin_3_2);
150 unit_assert(bombesin_3_2.size() == 1);
151 // TODO: verify peaks.size() == 1
152}
153
154
155shared_ptr<PeakelPicker> createPeakelPicker()
156{
158 //config.log = os_;
159 config.minMonoisotopicPeakelSize = 2;
160
161 return shared_ptr<PeakelPicker>(new PeakelPicker_Basic(config));
162}
163
164
165void verifyBombesinFeatures(const FeatureField& featureField)
166{
167 const double epsilon = .01;
168
169 const double mz_bomb2 = 810.415;
170 vector<FeaturePtr> bombesin_2_found = featureField.find(mz_bomb2, epsilon,
172 unit_assert(bombesin_2_found.size() == 1);
173 const Feature& bombesin_2 = *bombesin_2_found[0];
174 unit_assert(bombesin_2.charge == 2);
175 unit_assert(bombesin_2.peakels.size() == 5);
176 unit_assert_equal(bombesin_2.peakels[0]->mz, mz_bomb2, epsilon);
177 unit_assert_equal(bombesin_2.peakels[1]->mz, mz_bomb2+.5, epsilon);
178 unit_assert_equal(bombesin_2.peakels[2]->mz, mz_bomb2+1, epsilon);
179 unit_assert_equal(bombesin_2.peakels[3]->mz, mz_bomb2+1.5, epsilon);
180 unit_assert_equal(bombesin_2.peakels[4]->mz, mz_bomb2+2, epsilon);
181 //TODO: verify feature metadata
182
183 const double mz_bomb3 = 540.612;
184 vector<FeaturePtr> bombesin_3_found = featureField.find(mz_bomb3, epsilon,
186 unit_assert(bombesin_3_found.size() == 1);
187 const Feature& bombesin_3 = *bombesin_3_found[0];
188 unit_assert(bombesin_3.charge == 3);
189 unit_assert(bombesin_3.peakels.size() == 3);
190 unit_assert_equal(bombesin_3.peakels[0]->mz, mz_bomb3, epsilon);
191 unit_assert_equal(bombesin_3.peakels[1]->mz, mz_bomb3+1./3, epsilon);
192 unit_assert_equal(bombesin_3.peakels[2]->mz, mz_bomb3+2./3, epsilon);
193 //TODO: verify feature metadata
194}
195
196
197void testBombesin(const string& filename)
198{
199 if (os_) *os_ << "testBombesin()" << endl;
200
201 // open data file and check sanity
202
203 MSDataFile msd(filename);
205 unit_assert(msd.run.spectrumListPtr->size() == 8);
206
207 // instantiate PeakExtractor and extract peaks
208
209 shared_ptr<PeakExtractor> peakExtractor = createPeakExtractor();
210 vector< vector<Peak> > peaks = extractPeaks(msd, *peakExtractor);
211 unit_assert(peaks.size() == 8);
212
213 // grow peakels
214 shared_ptr<PeakelGrower> peakelGrower = createPeakelGrower();
215 PeakelField peakelField;
216 peakelGrower->sowPeaks(peakelField, peaks);
217
218 if (os_) *os_ << "peakelField:\n" << peakelField << endl;
219 verifyBombesinPeakels(peakelField);
220
221 // pick peakels
222
223 shared_ptr<PeakelPicker> peakelPicker = createPeakelPicker();
224 FeatureField featureField;
225 peakelPicker->pick(peakelField, featureField);
226
227 if (os_) *os_ << "featureField:\n" << featureField << endl;
228 verifyBombesinFeatures(featureField);
229}
230
231
232void test(const bfs::path& datadir)
233{
234 testBombesin((datadir / "FeatureDetectorTest_Bombesin.mzML").string());
235}
236
237
238int main(int argc, char* argv[])
239{
240 TEST_PROLOG(argc, argv)
241
242 try
243 {
244 bfs::path datadir = ".";
245
246 for (int i=1; i<argc; i++)
247 {
248 if (!strcmp(argv[i],"-v"))
249 os_ = &cout;
250 else
251 // hack to allow running unit test from a different directory:
252 // Jamfile passes full path to specified input file.
253 // we want the path, so we can ignore filename
254 datadir = bfs::path(argv[i]).branch_path();
255 }
256
257 test(datadir);
258 }
259 catch (exception& e)
260 {
261 TEST_FAILED(e.what())
262 }
263 catch (...)
264 {
265 TEST_FAILED("Caught unknown exception.")
266 }
267
269}
270
shared_ptr< PeakExtractor > createPeakExtractor()
shared_ptr< PeakelGrower > createPeakelGrower()
void testBombesin(const string &filename)
int main(int argc, char *argv[])
shared_ptr< PeakelPicker > createPeakelPicker()
void print(ostream &os, const string &label, vector< PeakelPtr > v)
void verifyBombesinFeatures(const FeatureField &featureField)
void verifyBombesinPeakels(const PeakelField &peakelField)
ostream * os_
vector< vector< Peak > > extractPeaks(const MSData &msd, const PeakExtractor &peakExtractor)
simple memory cache for common MSData info
virtual void open(const DataInfo &dataInfo)
start analysis of the data
const SpectrumInfo & spectrumInfo(size_t index, bool getBinaryData=false)
access to SpectrumInfo with automatic update (open() must be called first)
Class for extracting Peak objects from an array of ordered pairs; in design pattern lingo,...
void extractPeaks(const pwiz::math::OrderedPairContainerRef &pairs, std::vector< Peak > &result) const
PeakFinder implementation based on signal-to-noise ratio.
PeakFitter implementation based on fitting a parabola.
simple PeakelGrower implementation, based on proximity of Peaks
const double epsilon
Definition DiffTest.cpp:41
MZRTField is a std::set of boost::shared_ptrs, stored as a binary tree ordered by LessThan_MZRT.
Definition MZRTField.hpp:95
std::vector< TPtr > find(double mz, MZTolerance mzTolerance, RTMatches matches) const
find all objects with a given m/z, within a given m/z tolerance, satisfying the 'matches' predicate
represents some generic metadata about a peak detected in a signal
predicate returns true iff the object's retention time range contains the specified retention time
std::vector< PeakelPtr > peakels
Definition PeakData.hpp:274
MSData object plus file I/O.
This is the root element of ProteoWizard; it represents the mzML element, defined as: intended to cap...
Definition MSData.hpp:850
Run run
a run in mzML should correspond to a single, consecutive and coherent set of scans on an instrument.
Definition MSData.hpp:886
SpectrumListPtr spectrumListPtr
all mass spectra and the acquisitions underlying them are described and attached here....
Definition MSData.hpp:827
simple structure for holding Spectrum info
std::vector< MZIntensityPair > data
#define unit_assert(x)
Definition unit.hpp:85
#define TEST_EPILOG
Definition unit.hpp:183
#define TEST_FAILED(x)
Definition unit.hpp:177
#define unit_assert_equal(x, y, epsilon)
Definition unit.hpp:99
#define TEST_PROLOG(argc, argv)
Definition unit.hpp:175