libpappsomspp
Library for mass spectrometry
Loading...
Searching...
No Matches
proteinintegercode.cpp
Go to the documentation of this file.
1/**
2 * \file protein/proteinintegercode.cpp
3 * \date 22/05/2023
4 * \author Olivier Langella
5 * \brief transform protein amino acid sequence into vectors of amino acid codes
6 */
7
8/*******************************************************************************
9 * Copyright (c) 2023 Olivier Langella
10 *<Olivier.Langella@universite-paris-saclay.fr>.
11 *
12 * This file is part of PAPPSOms-tools.
13 *
14 * PAPPSOms-tools is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU General Public License as published by
16 * the Free Software Foundation, either version 3 of the License, or
17 * (at your option) any later version.
18 *
19 * PAPPSOms-tools is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU General Public License for more details.
23 *
24 * You should have received a copy of the GNU General Public License
25 * along with PAPPSOms-tools. If not, see <http://www.gnu.org/licenses/>.
26 *
27 ******************************************************************************/
28
29#include "proteinintegercode.h"
30#include "../exception/exceptionoutofrange.h"
31
32using namespace pappso;
33
35 const AaStringCodec &codec,
36 std::size_t aa_str_max_size)
37{
38 msp_protein = protein;
39
40 if(aa_str_max_size > 7)
41 {
43 QObject::tr("aa_str_max_size exceeds max size"));
44 }
45
46 QString seq_str = protein.get()->getSequence();
47 m_seqAaCode.clear();
48
49 for(const QChar &aa_str : seq_str)
50 {
51 m_seqAaCode.push_back(codec.getAaCode().getAaCode(aa_str.toLatin1()));
52 }
53
54 for(std::size_t i = 2; i <= aa_str_max_size; i++)
55 {
57 }
58}
59
68
72
73std::vector<std::uint32_t>
75 const AaStringCodec &codec, std::size_t fragment_size) const
76{
77 std::vector<std::uint32_t> fragments;
78
79 int max = (m_seqAaCode.size() - fragment_size);
80 if(max < 0)
81 return fragments;
82
83 auto it = m_seqAaCode.begin();
84 for(int i = 0; i <= max; i++)
85 {
86 fragments.push_back(codec.codeLlc(it, fragment_size));
87 it++;
88 }
89
90 return fragments;
91}
92
93
94const std::vector<std::uint32_t> &
96{
97 if(size < 2)
98 {
99
100 throw ExceptionOutOfRange(QObject::tr("size too small"));
101 }
102 std::size_t indice = size - 2;
103 if(indice < m_peptideCodedFragments.size())
104 {
105 return m_peptideCodedFragments.at(indice);
106 }
107
108 throw ExceptionOutOfRange(QObject::tr("size too big"));
109}
110
111
112std::vector<std::pair<std::size_t, std::uint32_t>>
114 const std::vector<uint32_t> &code_list_in) const
115{
116 std::vector<std::pair<std::size_t, std::uint32_t>> return_pos;
117 std::vector<uint32_t> code_list = code_list_in;
118
119 std::sort(code_list.begin(), code_list.end());
120 auto it_end = std::unique(code_list.begin(), code_list.end());
121 for(auto it_code = code_list.begin(); it_code != it_end; it_code++)
122 {
123
124 std::size_t size = 2;
125 for(auto &liste_protein_seq_code : m_peptideCodedFragments)
126 {
127
128 auto it_seq_position = std::find(liste_protein_seq_code.begin(),
129 liste_protein_seq_code.end(),
130 *it_code);
131 while(it_seq_position != liste_protein_seq_code.end())
132 {
133 // found
134 std::size_t position =
135 std::distance(liste_protein_seq_code.begin(), it_seq_position);
136 return_pos.push_back({size, position});
137
138 it_seq_position = std::find(
139 ++it_seq_position, liste_protein_seq_code.end(), *it_code);
140 qDebug();
141 }
142 size++;
143 qDebug();
144 }
145 qDebug();
146 }
147
148 return return_pos;
149}
uint8_t getAaCode(char aa_letter) const
Definition aacode.cpp:81
const AaCode & getAaCode() const
uint32_t codeLlc(const QString &aa_str) const
get the lowest common denominator integer from amino acide suite string
const std::vector< std::uint32_t > & getPeptideCodedFragment(std::size_t size) const
std::vector< std::pair< std::size_t, std::uint32_t > > match(const std::vector< uint32_t > &code_list) const
std::vector< std::uint8_t > m_seqAaCode
ProteinIntegerCode(ProteinSp protein, const AaStringCodec &codec, std::size_t aa_str_max_size=5)
std::vector< std::vector< std::uint32_t > > m_peptideCodedFragments
std::vector< std::uint32_t > computePeptideCodeFragments(const AaStringCodec &codec, std::size_t fragment_size) const
tries to keep as much as possible monoisotopes, removing any possible C13 peaks and changes multichar...
Definition aa.cpp:39
std::shared_ptr< const Protein > ProteinSp
shared pointer on a Protein object
Definition protein.h:47
@ max
maximum of intensities
transform protein amino acid sequence into vectors of amino acid codes