本文最后更新于 749 天前,其中的信息可能已经有所发展或是发生改变。
Lab Checkpoint 1: stitching substrings into a byte stream
3 Putting substrings in sequence
存储substring的数据结构选择的是红黑树(std::map
)
key | value |
index | substring |
空间复杂度为$O(\log n)$,插入和删除的时间复杂度为$O(\log n)$,遍历时间复杂度为$O(n)$。
把接收的substring情况先大致分为下面几种,其中先要判断边界条件,窗口外的部分需要丢弃。
对于与capacity有重叠部分的substring简单分成两种情况:
index > first unread
index <= first unread
然后对end index
进行判断,以此确定substring需要的区间。
对该所需区间与已存取的区间进行合并(merge),判断是否能够push substring。若能则更新几个指针参数(first unread, first unacceptable, etc)。
还需要注意的是eof的判断,然后就是实现合并算法和更新数据结构的策略,最后面向测试用例debug了。
StreamReassembler::StreamReassembler(const size_t capacity)
: _first_unread(0), _first_unassembled(0), _first_unacceptable(capacity), _unassembled_bytes(0), _eof_index(-1),
_outside_size(0), _eof(false), _auxiliary_storage({}), _output(capacity), _capacity(capacity) {}
//! \details This function accepts a substring (aka a segment) of bytes,
//! possibly out-of-order, from the logical stream, and assembles any newly
//! contiguous substrings and writes them into the output stream in order.
void StreamReassembler::push_substring(const string &data, const size_t index, const bool eof) {
// DUMMY_CODE(data, index, eof);
// The number of overlap chars.
size_t overlap_size = 0;
// if out of bounds, discard it.
if(index + data.size() < _first_unread || index > _first_unacceptable){
return;
}
// if in unaassembled area, store it.
if(index > _first_unread){
string substring = data.substr(0, min(_first_unacceptable - index, data.size()));
// if never stored before
if(_auxiliary_storage.find(index) == _auxiliary_storage.end()){
_auxiliary_storage[index] = substring;
overlap_size = overlap_length();
if(_auxiliary_storage.find(index) == _auxiliary_storage.end()){ // if deleted after merge
// _unassembled_bytes += _auxiliary_storage[index].size() - _outside_size; // not excuted while testing lab1
_unassembled_bytes += data.size() - overlap_size;
}else{
_unassembled_bytes += _auxiliary_storage[index].size() - overlap_size; // c bcd ->error
}
}else if(substring.size() > _auxiliary_storage[index].size()){ // update better(longer) substring
size_t before_len = _auxiliary_storage[index].size();
_auxiliary_storage[index] = substring;
overlap_size = overlap_length();
_unassembled_bytes += before_len + overlap_size;
}
}
//if includes read & unread area, take unread area.
if(index <= _first_unread){
size_t end_index = min(data.size() - (_first_unread - index), _capacity);
_auxiliary_storage[_first_unread] = data.substr(_first_unread - index, end_index);
size_t len_before = _auxiliary_storage[_first_unread].size();
overlap_size = overlap_length();
_unassembled_bytes += len_before - overlap_size; // index -> _first_unread
}
// judge if it is possible to push substring
map<size_t, string>::const_iterator iter = _auxiliary_storage.begin();
if(iter->first == _first_unread){
string to_be_written = _auxiliary_storage[_first_unread];
size_t accepted_size = _output.write(to_be_written);
_auxiliary_storage.erase(_first_unread); // important!!
_first_unread += accepted_size;
_first_unassembled = _first_unread;
_first_unacceptable = _first_unread + _capacity;
_unassembled_bytes -= accepted_size;
}
if(eof){
_eof_index = index + data.size();
}
if(_eof_index <= _first_unread){
_output.end_input();
}
}
size_t StreamReassembler::unassembled_bytes() const { return _unassembled_bytes; }
bool StreamReassembler::empty() const { return _unassembled_bytes == 0; }
size_t StreamReassembler::overlap_length() {
size_t total_length = 0;
std::map<size_t, string>::const_iterator iter1 = _auxiliary_storage.begin();
std::map<size_t, string>::const_iterator iter2 = ++_auxiliary_storage.begin(); // iter next to iter1
while(iter2 != _auxiliary_storage.end()){
size_t overlap_size;
size_t idx1 = iter1->first, idx2 = iter2->first;
string s1 = iter1->second, s2 = iter2->second;
if(idx1 + s1.size() >= idx2){ // overlap or neighbours
iter2++;
overlap_size = merge_substrings(idx1, idx2, s1, s2); // _auxiliary_storage[idx2] would be erased
if(overlap_size == 0){
if(idx1 + s1.size() == idx2){ // neighbours
continue;
}else{ // previous iter2 has been deleted
iter1++;
if(iter2 != _auxiliary_storage.end()){
iter2++;
}else{
break;
}
}
}else{ // overlap
total_length += overlap_size;
}
}else{
iter1++;
iter2++;
}
}
return total_length;
}
size_t StreamReassembler::merge_substrings(const size_t idx1, const size_t idx2, const string &data1, const string &data2) {
size_t overlap = 0;
// neighbour
if(idx1 + data1.size() == idx2){
string data = data1 + data2;
_auxiliary_storage.erase(idx2);
_auxiliary_storage[idx1] = data;
_outside_size = data2.size();
return overlap;
}
// overlap
// 1. include
if(idx1+ data1.size() >= idx2 + data2.size()){
overlap = data2.size();
_outside_size = 0;
}else{ // 2. intersection
overlap = idx1 + data1.size() - idx2;
_outside_size = data2.size() - overlap;
string data = data1 + data2.substr(overlap, _outside_size);
_auxiliary_storage[idx1] = data;
}
_auxiliary_storage.erase(idx2);
return overlap;
}
更新:将第27行代码更新为第28行代码
修复了下述情况遇到的错误
修复了下述情况遇到的错误
seqno | 2303251161 | 2303251162 | 2303251163 | 2303251164 | 2303251165 | 2303251166 | 2303251167 | 2303251168 | 2303251169 |
---|---|---|---|---|---|---|---|---|---|
e | |||||||||
g | |||||||||
c | |||||||||
a | b | ||||||||
f |