libpappsomspp
Library for mass spectrometry
grpexperiment.cpp
Go to the documentation of this file.
1 
2 /*******************************************************************************
3  * Copyright (c) 2015 Olivier Langella <Olivier.Langella@moulon.inra.fr>.
4  *
5  * This file is part of the PAPPSOms++ library.
6  *
7  * PAPPSOms++ is free software: you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation, either version 3 of the License, or
10  * (at your option) any later version.
11  *
12  * PAPPSOms++ is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with PAPPSOms++. If not, see <http://www.gnu.org/licenses/>.
19  *
20  * Contributors:
21  * Olivier Langella <Olivier.Langella@moulon.inra.fr> - initial API and
22  *implementation
23  ******************************************************************************/
24 
25 #include "grpexperiment.h"
26 #include "grpprotein.h"
27 #include "grppeptide.h"
28 
29 #include "grpgroup.h"
30 #include "grpsubgroup.h"
31 #include "../pappsoexception.h"
32 
33 using namespace pappso;
34 
36 {
37  mp_monitor = p_monitor;
38 }
39 
41 {
42 }
43 void
45 {
47 }
48 
49 void
51 {
52  GrpPeptideSet peptide_set(sp_protein.get());
54 }
55 
56 
57 void
59 {
60  GrpPeptideSet peptide_set(sp_protein.get());
62 }
63 
64 std::vector<GrpGroupSpConst>
66 {
67  std::vector<GrpGroupSpConst> grp_list;
68  for(GrpGroupSp group : m_grpGroupSpList)
69  {
70  grp_list.push_back(group);
71  }
72  return grp_list;
73 }
74 
76 GrpExperiment::getGrpProteinSp(const QString &accession,
77  const QString &description)
78 {
79  GrpProtein grpProtein(accession, description);
80  auto insertedPair = m_mapProteins.insert(std::pair<QString, GrpProteinSp>(
81  accession, std::make_shared<GrpProtein>(grpProtein)));
82  if(insertedPair.second)
83  {
84  m_grpProteinList.push_back(insertedPair.first->second);
85  m_remainingGrpProteinList.push_back(insertedPair.first->second.get());
86  }
87  return (insertedPair.first->second);
88 }
89 
92  const QString &sequence,
93  pappso_double mass)
94 {
95  proteinSp.get()->countPlus();
96  GrpPeptideSp sp_grppeptide =
97  std::make_shared<GrpPeptide>(GrpPeptide(sequence, mass));
98 
99  auto insertedPair = m_mapPeptides.insert(
100  std::pair<QString, std::map<unsigned long, GrpPeptideSp>>(
101  sp_grppeptide.get()->m_sequence,
102  std::map<unsigned long, GrpPeptideSp>()));
103  auto secondInsertedPair =
104  insertedPair.first->second.insert(std::pair<unsigned long, GrpPeptideSp>(
105  (unsigned long)(mass * 100), sp_grppeptide));
106  if(secondInsertedPair.second)
107  {
108  m_grpPeptideList.push_back(secondInsertedPair.first->second);
109  }
110  proteinSp.get()->push_back(secondInsertedPair.first->second.get());
111  return (secondInsertedPair.first->second);
112 }
113 
114 void
116 {
117  qDebug() << "GrpExperiment::startGrouping begin";
118  if(mp_monitor != nullptr)
120  m_grpPeptideList.size());
121  m_isGroupingStarted = true;
122  m_mapPeptides.clear();
123  m_mapProteins.clear();
124  qDebug() << "GrpExperiment::startGrouping sort protein list "
125  "m_remainingGrpProteinList.size() "
126  << m_remainingGrpProteinList.size();
127  // m_remainingGrpProteinList.sort();
128  // m_remainingGrpProteinList.unique();
129 
131  {
132  // TODO clean protein list to remove contaminant peptides before grouping
133  }
134 
135 
136  GrpMapPeptideToGroup grp_map_peptide_to_group;
137  qDebug() << "GrpExperiment::startGrouping grouping begin";
138  for(auto p_grpProtein : m_remainingGrpProteinList)
139  {
140  p_grpProtein->strip();
141  if(p_grpProtein->m_count == 0)
142  {
143  // no peptides : do not group this protein
144  }
145  else
146  {
147  GrpSubGroupSp grpSubGroupSp =
148  GrpSubGroup(p_grpProtein).makeGrpSubGroupSp();
149 
150  if(mp_monitor != nullptr)
152  this->addSubGroupSp(grp_map_peptide_to_group, grpSubGroupSp);
153  }
154  }
155  grp_map_peptide_to_group.clear(m_grpGroupSpList);
156  qDebug() << "GrpExperiment::startGrouping grouping end";
157 
158  qDebug() << "GrpExperiment::startGrouping grouping m_grpGroupSpList.size() "
159  << m_grpGroupSpList.size();
160 
162  {
164  }
165 
166  // post grouping protein group removal
167  // remove any group containing contaminants
168  m_grpGroupSpList.remove_if([this](GrpGroupSp &groupSp) {
169  return (
170  groupSp.get()->containsAny(this->m_grpPostGroupingProteinListRemoval));
171  });
172 
173 
174  numbering();
175  if(mp_monitor != nullptr)
177  // GrpGroup(this, *m_remainingGrpProteinList.begin());
178  qDebug() << "GrpExperiment::startGrouping end";
179 }
180 
181 
183 {
184  ContainsAny(const GrpPeptideSet &peptide_set) : _peptide_set(peptide_set)
185  {
186  }
187 
188  typedef bool result_type;
189 
190  bool
191  operator()(const GrpGroupSp &testGroupSp)
192  {
193  return testGroupSp.get()->containsAny(_peptide_set);
194  }
195 
197 };
198 
199 
200 void
202  GrpSubGroupSp &grpSubGroupSp) const
203 {
204  qDebug() << "GrpExperiment::addSubGroupSp begin "
205  << grpSubGroupSp.get()->getFirstAccession();
206 
207  std::list<GrpGroupSp> new_group_list;
208  grp_map_peptide_to_group.getGroupList(grpSubGroupSp.get()->getPeptideSet(),
209  new_group_list);
210 
211  if(new_group_list.size() == 0)
212  {
213  qDebug() << "GrpExperiment::addSubGroupSp create a new group";
214  // create a new group
215  GrpGroupSp sp_group = GrpGroup(grpSubGroupSp).makeGrpGroupSp();
216  // m_grpGroupSpList.push_back(sp_group);
217 
218  grp_map_peptide_to_group.set(grpSubGroupSp.get()->getPeptideSet(),
219  sp_group);
220  }
221  else
222  {
223  qDebug() << "GrpExperiment::addSubGroupSp fusion groupList.size() "
224  << new_group_list.size();
225  // fusion group and add the subgroup
226  auto itGroup = new_group_list.begin();
227  GrpGroupSp p_keepGroup = *itGroup;
228  qDebug() << "GrpExperiment::addSubGroupSp "
229  "p_keepGroup->addSubGroupSp(grpSubGroupSp) "
230  << p_keepGroup.get();
231  p_keepGroup->addSubGroupSp(grpSubGroupSp);
232  grp_map_peptide_to_group.set(grpSubGroupSp.get()->getPeptideSet(),
233  p_keepGroup);
234 
235  itGroup++;
236  while(itGroup != new_group_list.end())
237  {
238  qDebug()
239  << "GrpExperiment::addSubGroupSp p_keepGroup->addGroup(*itGroup) "
240  << itGroup->get();
241  p_keepGroup->addGroup(itGroup->get());
242  grp_map_peptide_to_group.set((*itGroup)->getGrpPeptideSet(),
243  p_keepGroup);
244 
245  // m_grpGroupSpList.remove_if([itGroup](GrpGroupSp & groupSp) {
246  // return (itGroup->get() == groupSp.get()) ;
247  //});
248  itGroup++;
249  }
250  }
251 
252  qDebug() << "GrpExperiment::addSubGroupSp end";
253 }
254 
255 void
257 {
258  qDebug() << "GrpExperiment::numbering begin";
259  if(mp_monitor != nullptr)
261  for(auto &&group_sp : m_grpGroupSpList)
262  {
263  group_sp.get()->numbering();
264  }
265  m_grpGroupSpList.sort([](GrpGroupSp &first, GrpGroupSp &second) {
266  return ((*first.get()) < (*second.get()));
267  });
268  unsigned int i = 1;
269  for(auto &&group_sp : m_grpGroupSpList)
270  {
271  group_sp.get()->setGroupNumber(i);
272  i++;
273  }
274 
275  qDebug() << "GrpExperiment::numbering end";
276 }
277 
278 std::vector<GrpProteinSpConst>
280 {
281  std::vector<GrpProteinSpConst> grouped_protein_list;
283  {
284  throw PappsoException(
285  QObject::tr("unable to get grouped protein list before grouping"));
286  }
287  for(auto &&protein_sp : m_grpProteinList)
288  {
289  if(protein_sp.get()->getGroupNumber() > 0)
290  {
291  grouped_protein_list.push_back(protein_sp);
292  }
293  }
294  return grouped_protein_list;
295 }
296 
297 void
299 {
300  qDebug() << "GrpExperiment::removeNonInformativeSubGroups begin";
301  if(mp_monitor != nullptr)
303  m_grpGroupSpList.size());
304 
305  std::list<GrpGroupSp> old_grp_group_sp_list(m_grpGroupSpList);
306  m_grpGroupSpList.clear();
307  auto it_group = old_grp_group_sp_list.begin();
308  while(it_group != old_grp_group_sp_list.end())
309  {
310  if(mp_monitor != nullptr)
312  if(it_group->get()->removeNonInformativeSubGroups())
313  {
314  // need to regroup it
315  GrpGroupSp old_group_sp = *it_group;
316  GrpMapPeptideToGroup grp_map_peptide_to_group;
317 
318  std::list<GrpSubGroupSp> dispatch_sub_group_set =
319  old_group_sp.get()->getSubGroupSpList();
320  for(GrpSubGroupSp &grp_subgroup : dispatch_sub_group_set)
321  {
322  addSubGroupSp(grp_map_peptide_to_group, grp_subgroup);
323  }
324  grp_map_peptide_to_group.clear(m_grpGroupSpList);
325  }
326  else
327  {
328  qDebug() << "GrpExperiment::removeNonInformativeSubGroups no "
329  "removeNonInformativeSubGroups";
330  m_grpGroupSpList.push_back(*it_group);
331  }
332  it_group++;
333  }
334  if(mp_monitor != nullptr)
336  m_grpGroupSpList.size());
337 
338  qDebug() << "GrpExperiment::removeNonInformativeSubGroups end";
339 }
void addSubGroupSp(GrpMapPeptideToGroup &grp_map_peptide_to_group, GrpSubGroupSp &grpSubGroupSp) const
std::list< GrpGroupSp > m_grpGroupSpList
Definition: grpexperiment.h:58
GrpPeptideSp & setGrpPeptide(const GrpProteinSp &proteinSp, const QString &sequence, pappso_double mass)
GrpGroupingMonitorInterface * mp_monitor
Definition: grpexperiment.h:44
GrpProteinSp & getGrpProteinSp(const QString &acc, const QString &description)
void addPostGroupingGrpProteinSpRemoval(GrpProteinSp sp_protein)
protein to remove with its entire group after grouping is completed typically : to use with protein c...
bool m_isRemoveNonInformativeSubgroups
Definition: grpexperiment.h:45
std::list< GrpPeptideSp > m_grpPeptideList
Definition: grpexperiment.h:49
std::vector< GrpGroupSpConst > getGrpGroupSpList() const
void setRemoveNonInformativeSubgroups(bool ok)
GrpPeptideSet m_grpPreGroupingProteinListRemoval
Definition: grpexperiment.h:56
GrpExperiment(GrpGroupingMonitorInterface *p_monitor)
void addPreGroupingGrpProteinSpRemoval(GrpProteinSp sp_protein)
protein peptides to remove before grouping typically : remove protein contaminants in special metapro...
std::map< QString, std::map< unsigned long, GrpPeptideSp > > m_mapPeptides
Definition: grpexperiment.h:47
std::list< GrpProteinSp > m_grpProteinList
Definition: grpexperiment.h:50
GrpPeptideSet m_grpPostGroupingProteinListRemoval
Definition: grpexperiment.h:54
std::list< GrpProtein * > m_remainingGrpProteinList
Definition: grpexperiment.h:52
std::vector< GrpProteinSpConst > getGrpProteinSpList() const
std::map< QString, GrpProteinSp > m_mapProteins
Definition: grpexperiment.h:46
GrpGroupSp makeGrpGroupSp()
Definition: grpgroup.cpp:98
virtual void removingNonInformativeSubGroupsInGroup()=0
virtual void startNumberingAllGroups(std::size_t total_number_group)=0
virtual void startRemovingNonInformativeSubGroupsInAllGroups(std::size_t total_number_group)=0
virtual void startGrouping(std::size_t total_number_protein, std::size_t total_number_peptide)=0
virtual void stopRemovingNonInformativeSubGroupsInAllGroups(std::size_t total_number_group)=0
void clear(std::list< GrpGroupSp > &grp_group_list)
void getGroupList(const GrpPeptideSet &peptide_set_in, std::list< GrpGroupSp > &impacted_group_list) const
get all groups concerned by a list of peptides
void set(const GrpPeptideSet &peptide_set_in, GrpGroupSp grp_group)
set peptide keys pointing on the group
unsigned int size() const
Definition: grppeptideset.h:54
void addAll(const GrpPeptideSet &peptideSet)
GrpSubGroupSp makeGrpSubGroupSp()
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition: aa.cpp:39
std::shared_ptr< GrpProtein > GrpProteinSp
Definition: grpprotein.h:37
std::shared_ptr< GrpSubGroup > GrpSubGroupSp
Definition: grpsubgroup.h:39
std::shared_ptr< GrpPeptide > GrpPeptideSp
Definition: grppeptide.h:40
double pappso_double
A type definition for doubles.
Definition: types.h:48
std::shared_ptr< GrpGroup > GrpGroupSp
Definition: grpgroup.h:38
bool operator()(const GrpGroupSp &testGroupSp)
GrpPeptideSet _peptide_set
ContainsAny(const GrpPeptideSet &peptide_set)