forked from zproksi/bpatch
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathstreamreplacer.cpp
548 lines (471 loc) · 16.9 KB
/
streamreplacer.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
#include "stdafx.h"
#include "binarylexeme.h"
#include "fileprocessing.h"
#include "streamreplacer.h"
namespace bpatch
{
using namespace std;
//--------------------------------------------------
/// <summary>
/// Replacer to last in chain. All incoming data should be trasferred to a Writer interface
/// </summary>
class WriterReplacer final : public StreamReplacer
{
public:
WriterReplacer(Writer* const pWriter): pWriter_(pWriter) {};
virtual void DoReplacements(const char toProcess, const bool aEod) const override;
virtual void SetNextReplacer(std::unique_ptr<StreamReplacer>&& pNext) override;
protected:
Writer* const pWriter_;
};
void WriterReplacer::DoReplacements(const char toProcess, const bool aEod) const
{
pWriter_->WriteCharacter(toProcess, aEod);
}
void WriterReplacer::SetNextReplacer(std::unique_ptr<StreamReplacer>&&)
{
throw logic_error("Writer Replacer should be unchangeable. Contact with maintainer.");
}
unique_ptr<StreamReplacer> StreamReplacer::ReplacerLastInChain(Writer* const pWriter)
{
return unique_ptr<StreamReplacer>(new WriterReplacer(pWriter));
}
//--------------------------------------------------
/// <summary>
/// common part for set next replacer
/// </summary>
class ReplacerWithNext: public StreamReplacer
{
protected:
/// to pass processing further
std::unique_ptr<StreamReplacer> pNext_;
public:
void SetNextReplacer(std::unique_ptr<StreamReplacer>&& pNext) override
{
std::swap(pNext_, pNext);
}
};
//--------------------------------------------------
struct ReplacerPairHolder
{
ReplacerPairHolder(unique_ptr<AbstractBinaryLexeme>& src, // what to replace
unique_ptr<AbstractBinaryLexeme>& trg) // to what
: src_(src)
, trg_(trg)
{}
unique_ptr<AbstractBinaryLexeme>& src_; // what to replace
unique_ptr<AbstractBinaryLexeme>& trg_; // with what
};
//--------------------------------------------------
class UsualReplacer final : public ReplacerWithNext
{
public:
UsualReplacer(unique_ptr<AbstractBinaryLexeme>& src, // what to replace
unique_ptr<AbstractBinaryLexeme>& trg) // with what
: src_(src->access())
, trg_(trg->access())
{
cachedData_.resize(src_.size());
}
void DoReplacements(const char toProcess, const bool aEod) const override;
protected:
const span<const char>& src_; // what to replace
const span<const char>& trg_; // with what
mutable size_t cachedAmount_ = 0; // we cached this amount of data
// this is used to hold temporary data while the logic is
// looking for the new beginning of the cached value
mutable vector<char> cachedData_;
};
void UsualReplacer::DoReplacements(const char toProcess, const bool aEod) const
{
if (nullptr == pNext_)
{
throw logic_error("Replacement chain has been broken. Communicate with maintainer");
}
// no more data
// just send cached amount
if (aEod)
{
for (size_t i = 0; i < cachedAmount_; ++i)
{
pNext_->DoReplacements(src_[i], false);
}
cachedAmount_ = 0;
pNext_->DoReplacements(toProcess, true);
return;
}
if (src_[cachedAmount_] == toProcess) // check for match
{
if (++cachedAmount_ >= src_.size())
{// send target - do replacement
for (size_t q = 0; q < trg_.size(); ++q) { pNext_->DoReplacements(trg_[q], false); }
cachedAmount_ = 0;
}
return;
}
// here: toProcess is not our char
// lets check for fast track (255/256 probability)
if (0 == cachedAmount_)
{
pNext_->DoReplacements(toProcess, false);
return;
}
// here: We have some cached data
// at least 1 char need to be send further
// remaining cached data including toProcess need to be reprocessed for match
memcpy(cachedData_.data(), src_.data(), cachedAmount_);
cachedData_[cachedAmount_++]= toProcess;
size_t i = 0;
do
{
pNext_->DoReplacements(cachedData_[i++], false); // send 1 byte after another
} while (0 != memcmp(src_.data(), cachedData_.data() + i, --cachedAmount_));
// Everything that was needed has already been sent
// cachedAmount_ is zero or greater
}
/// <summary>
/// creates replacer
/// </summary>
/// <param name="src">what we are going to replace</param>
/// <param name="trg">this is the result of the replacement</param>
/// <returns>Replacer for building replacement chain</returns>
static unique_ptr<StreamReplacer> CreateSimpleReplacer(
unique_ptr<AbstractBinaryLexeme>& src, // what to replace
unique_ptr<AbstractBinaryLexeme>& trg) // with what
{
return unique_ptr<StreamReplacer>(new UsualReplacer(src, trg));
}
//--------------------------------------------------
///
/// |--SRC 1 TRG 1 |
/// O - |-- ... | - o
/// |--SRC N TRG N |
///
class ChoiceReplacer final : public ReplacerWithNext
{
typedef struct
{
span<const char> src_;
span<const char> trg_;
}ChoiceReplacerPair;
public:
/// <summary>
/// creating ChoiceReplacer from provided pairs
/// </summary>
/// <param name="choice">vector os source & target pairs</param>
ChoiceReplacer(StreamReplacerChoice& choice)
{
size_t bufferSize = 0; // to allocate buffer
const size_t sz = choice.size();
rpairs_.resize(sz);
for (size_t i = 0; i < sz; ++i)
{
auto& vPair = choice[i]; // copy from
auto& rpair = rpairs_[i];// copy to
rpair.src_ = vPair.first->access();
const size_t sourceSize = rpair.src_.size();
if (bufferSize < sourceSize)
{
bufferSize = sourceSize; // calculate necessary buffer size
}
rpair.trg_ = vPair.second->access();
}
cachedData_.resize(bufferSize);
}
void DoReplacements(const char toProcess, const bool aEod) const override;
protected:
/// <summary>
/// check for partial or full match of the data from cachedData_
/// with any of lexemes sequentially
/// and provides type of the match and index if found
/// </summary>
/// <param name="indexFrom"> search in pairs from this index</param>
/// <param name="fullOnly"> what type of match we want to find : only full or partial also </param>
/// <returns>bool: partial, bool: full, size_t: index </returns>
tuple<bool, bool, size_t> FindMatch(const size_t indexFrom, const bool fullOnly) const
{
for (size_t i = indexFrom; i < rpairs_.size(); ++i)
{
const auto& srcSpan = rpairs_[i].src_;
const size_t cmpLength = (srcSpan.size() > cachedAmount_) ? cachedAmount_ : srcSpan.size();
if (0 == memcmp(srcSpan.data(), cachedData_.data(), cmpLength))
{ // match
if (cmpLength == srcSpan.size())
{
return {false, true, i};
}
if (!fullOnly)
{
return {true, false, i};
}
}
// continue - no match here
}
return {false, false, 0};
}
/// <summary>
/// Sends target to next replacers, and resets partial match index to zero
/// </summary>
/// <param name="target">the array we need to send</param>
void SendAndResetPartialMatch(const span<const char> target) const
{
for (const char c : target)
{
pNext_->DoReplacements(c, false);
}
indexOfPartialMatch_ = 0;
}
/// <summary>
/// Clean srcMatchedLength bytes of cache from the beginning
/// </summary>
/// <param name="srcMatchedLength">number of bytes we have to clear</param>
void CleanTheCache(size_t srcMatchedLength) const
{
shift_left(cachedData_.data(),
cachedData_.data() + cachedAmount_,
static_cast<std::iterator_traits<decltype(cachedData_.data())>::difference_type>(srcMatchedLength));
cachedAmount_ -= srcMatchedLength;
}
/// <summary>
/// The end of the data sign has been received and the cached data need to be either send or replaced & send
/// </summary>
/// <param name="toProcess">character received along with end of data sign</param>
void DoReplacementsAtTheEndOfTheData(const char toProcess) const
{
while (cachedAmount_ > 0)
{
const auto [partialMatch, fullMatch, matchPairIndex] = FindMatch(indexOfPartialMatch_, true);
if (fullMatch)
{
const auto& rpair = rpairs_[matchPairIndex];
SendAndResetPartialMatch(rpair.trg_);
CleanTheCache(rpair.src_.size());
}
else // No full match -> send 1 char from cache
{
SendAndResetPartialMatch(std::span<char> (cachedData_.data(), 1));
CleanTheCache(1);
}
}
pNext_->DoReplacements(toProcess, true);
}
protected:
// our pairs sorted by priority - only one of them could be replaced for concrete pos
vector<ChoiceReplacerPair> rpairs_;
mutable size_t cachedAmount_ = 0; // we cached this amount of data
mutable size_t indexOfPartialMatch_ = 0; // this index from rpairs_ represents last partial match
// this is used to hold temporary data while the logic is
// looking for the new beginning of the cached value
mutable vector<char> cachedData_;
};
void ChoiceReplacer::DoReplacements(const char toProcess, const bool aEod) const
{
if (nullptr == pNext_)
{
throw logic_error("Replacement chain has been broken. Communicate with maintainer");
}
if (aEod) [[unlikely]]
{
DoReplacementsAtTheEndOfTheData(toProcess);
return;
}
cachedData_[cachedAmount_++] = toProcess;
while (cachedAmount_ > 0)
{
const auto [partialMatch, fullMatch, matchPairIndex] = FindMatch(indexOfPartialMatch_, false);
if (fullMatch)
{
const auto& rpair = rpairs_[matchPairIndex];
SendAndResetPartialMatch(rpair.trg_);
CleanTheCache(rpair.src_.size());
return;
}
if (partialMatch)
{
indexOfPartialMatch_ = matchPairIndex;
return;
}
// No any match -> send 1 char from cache
SendAndResetPartialMatch(std::span<char>(cachedData_.data(), 1));
CleanTheCache(1);
}
}
namespace
{
static std::string_view warningDuplicatePattern("Warning: Duplicate pattern to replace found. Second lexeme will be ignored.");
};
//--------------------------------------------------
/// <summary>
/// replaces for lexemes of the same length
/// </summary>
class UniformLexemeReplacer final : public ReplacerWithNext
{
public:
UniformLexemeReplacer(StreamReplacerChoice& choice, const size_t sz)
: cachedData_(sz)
{
for (AbstractLexemesPair& alpair : choice)
{
const span<const char>& src = alpair.first->access();
const span<const char>& trg = alpair.second->access();
if (auto result = replaceOptions_.insert(
{
string_view(src.data(), src.size()),
string_view(trg.data(), trg.size()),
}); !result.second)
{
cout << coloredconsole::toconsole(warningDuplicatePattern) << endl;
}
}
}
void DoReplacements(const char toProcess, const bool aEod) const override;
protected:
// here we hold pairs of sources and targets
unordered_map<string_view, string_view> replaceOptions_;
mutable size_t cachedAmount_ = 0; // we cache this amount of data in the cachedData_
// this is used to hold temporary data while the logic is
// looking for the new beginning of the cached value
mutable vector<char> cachedData_;
};
void UniformLexemeReplacer::DoReplacements(const char toProcess, const bool aEod) const
{
if (nullptr == pNext_)
{
throw logic_error("Replacement chain has been broken. Communicate with maintainer");
}
// no more data
if (aEod)
{
if (cachedAmount_ > 0)
{
for (size_t q = 0; q < cachedAmount_; ++q) { pNext_->DoReplacements(cachedData_[q], false); }
cachedAmount_ = 0;
}
pNext_->DoReplacements(toProcess, aEod); // send end of the data further
return;
} // if (aEod)
// set buffer of cached at once
char* const& pBuffer = cachedData_.data();
pBuffer[cachedAmount_++] = toProcess;
if (cachedAmount_ >= cachedData_.size())
{
if (const auto it = replaceOptions_.find(string_view(pBuffer, cachedAmount_)); it != replaceOptions_.cend())
{ // found
string_view trg = it->second;
for (size_t q = 0; q < trg.size(); ++q) { pNext_->DoReplacements(trg[q], false); }
cachedAmount_ = 0;
}
else
{ // not found
pNext_->DoReplacements(pBuffer[0], false); // send 1 char
std::shift_left(pBuffer, pBuffer + cachedAmount_--, 1);
}
}
}
//--------------------------------------------------
/// <summary>
/// replaces for lexemes of the same length
/// </summary>
class LexemeOf1Replacer final : public ReplacerWithNext
{
public:
LexemeOf1Replacer(StreamReplacerChoice& choice)
{
for (AbstractLexemesPair& alpair : choice)
{
const size_t index = static_cast<const size_t>(*(reinterpret_cast<const unsigned char*>(alpair.first->access().data())));
if (replaces_[index].present_)
{
cout << coloredconsole::toconsole(warningDuplicatePattern) << endl;
}
else
{
replaces_[index].present_ = true;
replaces_[index].trg_ = alpair.second->access();
}
}
}
void DoReplacements(const char toProcess, const bool aEod) const override;
protected:
struct
{
bool present_ = false; // if this char is present
span<const char> trg_;
} replaces_[256];
};
void LexemeOf1Replacer::DoReplacements(const char toProcess, const bool aEod) const
{
if (nullptr == pNext_)
{
throw logic_error("Replacement chain has been broken. Communicate with maintainer");
}
// no more data
if (aEod)
{
pNext_->DoReplacements(toProcess, aEod);
return;
} // if (aEod)
const size_t index = static_cast<size_t>(*(reinterpret_cast<const unsigned char*>(&toProcess)));
if (replaces_[index].present_)
{
auto& trg = replaces_[index].trg_;
for (size_t q = 0; q < trg.size(); ++q) { pNext_->DoReplacements(trg[q], false); }
}
else
{
pNext_->DoReplacements(toProcess, aEod);
}
}
//--------------------------------------------------
/// <summary>
/// creates replacer for lexemes of the same length
/// since this fact allows to use unordered_map for faster search
/// </summary>
/// <param name="choice">set of pairs of src & trg lexemes - one of which
/// can be processed. The one that was found first.</param>
/// <param name="sz">size of all source lexemes</param>
/// <returns>Replacer for building replacement chain</returns>
unique_ptr<StreamReplacer> CreateEqualLengthReplacer(StreamReplacerChoice& choice, const size_t sz)
{
if (sz == 1)
{
return unique_ptr<StreamReplacer>(new LexemeOf1Replacer(choice));
}
return unique_ptr<StreamReplacer>(new UniformLexemeReplacer(choice, sz));
}
//--------------------------------------------------
/// <summary>
/// creates replacer for choose specific lexeme among others to replace
/// </summary>
/// <param name="choice">set of pairs of src & trg lexemes - one of which
/// can be processed. The one that was found first.</param>
/// <returns>Replacer for building replacement chain</returns>
unique_ptr<StreamReplacer> CreateMultipleReplacer(StreamReplacerChoice& choice)
{
const size_t szSrc = choice.cbegin()->first->access().size(); // save size of the first lexeme
// check for sources of the same length
for (const AbstractLexemesPair& alpair: choice)
{
if (alpair.first->access().size() != szSrc)
{
return unique_ptr<StreamReplacer>(new ChoiceReplacer(choice));
}
}
return CreateEqualLengthReplacer(choice, szSrc); // create optimized replacer for lexemes of the same length
}
//--------------------------------------------------
unique_ptr<StreamReplacer> StreamReplacer::CreateReplacer(StreamReplacerChoice& choice)
{
for (const AbstractLexemesPair& alpair: choice)
{
if (alpair.first->access().empty())
throw logic_error("Pattern to replace cannot be empty");
}
if (choice.size() == 1)
{
const AbstractLexemesPair& alexemesPair = *choice.cbegin();
return CreateSimpleReplacer(alexemesPair.first, alexemesPair.second);
}
return CreateMultipleReplacer(choice);
}
}; // namespace bpatch