update_engine: Revert "Fix delta generator mishandling of unchanged blocks."

CL:246670 introduced an optimization that would not include the blocks
that aren't written by any operation AND didn't change from the previous
image. These blocks are normally free-space, but were also including
data blocks not moved by other operations in very similar images (like
a delta to itself). While introducing this optimization, that CL also
introduced a bug that fails to generate the payload when the old image
is smaller than the new image.

This patch reverts the CL entirely in M42, merging here the CL:249955
that replaced vector<char> to vector<uint8_t>.

BUG=chromium:477876
TEST=sudo env FEATURES=test emerge update_engine
TEST=Ran delta generator from alex-2913.331.0 to alex-6812.75.2

Change-Id: I5a79038cb0137be266aeb0c9a1d4bdf6f5be3a67
Reviewed-on: https://chromium-review.googlesource.com/266067
Reviewed-by: Matthew Yuan <matthewyuan@chromium.org>
Trybot-Ready: Alex Deymo <deymo@chromium.org>
Tested-by: Alex Deymo <deymo@chromium.org>
diff --git a/update_engine/delta_performer_unittest.cc b/update_engine/delta_performer_unittest.cc
index 4e39c0c..53d9c2a 100644
--- a/update_engine/delta_performer_unittest.cc
+++ b/update_engine/delta_performer_unittest.cc
@@ -568,7 +568,7 @@
     }
 
     if (noop) {
-      EXPECT_EQ(0, manifest.install_operations_size());
+      EXPECT_EQ(1, manifest.install_operations_size());
       EXPECT_EQ(1, manifest.kernel_install_operations_size());
     }
 
diff --git a/update_engine/payload_generator/delta_diff_generator.cc b/update_engine/payload_generator/delta_diff_generator.cc
index 0855232..a1f83a7 100644
--- a/update_engine/payload_generator/delta_diff_generator.cc
+++ b/update_engine/payload_generator/delta_diff_generator.cc
@@ -287,27 +287,24 @@
   uint64_t next_block_;
 };
 
-// Reads blocks from image_path that are not yet marked as being written in the
-// blocks array. These blocks that remain are either unchanged files or
-// non-file-data blocks.  We compare each of them to the old image, and compress
-// the ones that changed into a single REPLACE_BZ operation. This updates a
-// newly created node in the graph to write these blocks and writes the
-// appropriate blob to blobs_fd. Reads and updates blobs_length.
+// Reads blocks from image_path that are not yet marked as being written
+// in the blocks array. These blocks that remain are non-file-data blocks.
+// In the future we might consider intelligent diffing between this data
+// and data in the previous image, but for now we just bzip2 compress it
+// and include it in the update.
+// Creates a new node in the graph to write these blocks and writes the
+// appropriate blob to blobs_fd. Reads and updates blobs_length;
 bool ReadUnwrittenBlocks(const vector<Block>& blocks,
                          int blobs_fd,
                          off_t* blobs_length,
-                         const string& old_image_path,
-                         const string& new_image_path,
+                         const string& image_path,
                          Vertex* vertex) {
   vertex->file_name = "<rootfs-non-file-data>";
 
   DeltaArchiveManifest_InstallOperation* out_op = &vertex->op;
-  int new_image_fd = open(new_image_path.c_str(), O_RDONLY, 000);
-  TEST_AND_RETURN_FALSE_ERRNO(new_image_fd >= 0);
-  ScopedFdCloser new_image_fd_closer(&new_image_fd);
-  int old_image_fd = open(old_image_path.c_str(), O_RDONLY, 000);
-  TEST_AND_RETURN_FALSE_ERRNO(old_image_fd >= 0);
-  ScopedFdCloser old_image_fd_closer(&old_image_fd);
+  int image_fd = open(image_path.c_str(), O_RDONLY, 000);
+  TEST_AND_RETURN_FALSE_ERRNO(image_fd >= 0);
+  ScopedFdCloser image_fd_closer(&image_fd);
 
   string temp_file_path;
   TEST_AND_RETURN_FALSE(utils::MakeTempFile("CrAU_temp_data.XXXXXX",
@@ -328,68 +325,46 @@
   vector<Extent> extents;
   vector<Block>::size_type block_count = 0;
 
-  LOG(INFO) << "Appending unwritten blocks to extents";
+  LOG(INFO) << "Appending left over blocks to extents";
   for (vector<Block>::size_type i = 0; i < blocks.size(); i++) {
     if (blocks[i].writer != Vertex::kInvalidIndex)
       continue;
+    if (blocks[i].reader != Vertex::kInvalidIndex) {
+      graph_utils::AddReadBeforeDep(vertex, blocks[i].reader, i);
+    }
     graph_utils::AppendBlockToExtents(&extents, i);
     block_count++;
   }
 
-  // Code will handle buffers of any size that's a multiple of kBlockSize,
+  // Code will handle 'buf' at any size that's a multiple of kBlockSize,
   // so we arbitrarily set it to 1024 * kBlockSize.
-  chromeos::Blob new_buf(1024 * kBlockSize);
-  chromeos::Blob old_buf(1024 * kBlockSize);
+  chromeos::Blob buf(1024 * kBlockSize);
 
-  LOG(INFO) << "Scanning " << block_count << " unwritten blocks";
-  vector<Extent> changed_extents;
-  vector<Block>::size_type changed_block_count = 0;
+  LOG(INFO) << "Reading left over blocks";
   vector<Block>::size_type blocks_copied_count = 0;
 
-  // For each extent in extents, write the unchanged blocks into BZ2_bzWrite,
-  // which sends it to an output file.  We use the temporary buffers to hold the
-  // old and new data, which may be smaller than the extent, so in that case we
-  // have to loop to get the extent's data (that's the inner while loop).
+  // For each extent in extents, write the data into BZ2_bzWrite which
+  // sends it to an output file.
+  // We use the temporary buffer 'buf' to hold the data, which may be
+  // smaller than the extent, so in that case we have to loop to get
+  // the extent's data (that's the inner while loop).
   for (const Extent& extent : extents) {
     vector<Block>::size_type blocks_read = 0;
     float printed_progress = -1;
     while (blocks_read < extent.num_blocks()) {
-      const uint64_t copy_first_block = extent.start_block() + blocks_read;
       const int copy_block_cnt =
-          min(new_buf.size() / kBlockSize,
+          min(buf.size() / kBlockSize,
               static_cast<chromeos::Blob::size_type>(
                   extent.num_blocks() - blocks_read));
-      const size_t count = copy_block_cnt * kBlockSize;
-      const off_t offset = copy_first_block * kBlockSize;
-      ssize_t rc = pread(new_image_fd, new_buf.data(), count, offset);
+      ssize_t rc = pread(image_fd,
+                         buf.data(),
+                         copy_block_cnt * kBlockSize,
+                         (extent.start_block() + blocks_read) * kBlockSize);
       TEST_AND_RETURN_FALSE_ERRNO(rc >= 0);
-      TEST_AND_RETURN_FALSE(static_cast<size_t>(rc) == count);
-
-      rc = pread(old_image_fd, old_buf.data(), count, offset);
-      TEST_AND_RETURN_FALSE_ERRNO(rc >= 0);
-      TEST_AND_RETURN_FALSE(static_cast<size_t>(rc) == count);
-
-      // Compare each block in the buffer to its counterpart in the old image
-      // and only compress it if its content has changed.
-      int buf_offset = 0;
-      for (int i = 0; i < copy_block_cnt; ++i) {
-        int buf_end_offset = buf_offset + kBlockSize;
-        if (!std::equal(new_buf.begin() + buf_offset,
-                        new_buf.begin() + buf_end_offset,
-                        old_buf.begin() + buf_offset)) {
-          BZ2_bzWrite(&err, bz_file, &new_buf[buf_offset], kBlockSize);
-          TEST_AND_RETURN_FALSE(err == BZ_OK);
-          const uint64_t block_idx = copy_first_block + i;
-          if (blocks[block_idx].reader != Vertex::kInvalidIndex) {
-            graph_utils::AddReadBeforeDep(vertex, blocks[block_idx].reader,
-                                          block_idx);
-          }
-          graph_utils::AppendBlockToExtents(&changed_extents, block_idx);
-          changed_block_count++;
-        }
-        buf_offset = buf_end_offset;
-      }
-
+      TEST_AND_RETURN_FALSE(static_cast<size_t>(rc) ==
+                            copy_block_cnt * kBlockSize);
+      BZ2_bzWrite(&err, bz_file, buf.data(), copy_block_cnt * kBlockSize);
+      TEST_AND_RETURN_FALSE(err == BZ_OK);
       blocks_read += copy_block_cnt;
       blocks_copied_count += copy_block_cnt;
       float current_progress =
@@ -407,13 +382,9 @@
   TEST_AND_RETURN_FALSE_ERRNO(0 == fclose(file));
   file = nullptr;
 
-  LOG(INFO) << "Compressed " << changed_block_count << " blocks ("
-            << block_count - changed_block_count << " blocks unchanged)";
   chromeos::Blob compressed_data;
-  if (changed_block_count > 0) {
-    LOG(INFO) << "Reading compressed data off disk";
-    TEST_AND_RETURN_FALSE(utils::ReadFile(temp_file_path, &compressed_data));
-  }
+  LOG(INFO) << "Reading compressed data off disk";
+  TEST_AND_RETURN_FALSE(utils::ReadFile(temp_file_path, &compressed_data));
   TEST_AND_RETURN_FALSE(unlink(temp_file_path.c_str()) == 0);
 
   // Add node to graph to write these blocks
@@ -423,14 +394,13 @@
   LOG(INFO) << "Rootfs non-data blocks compressed take up "
             << compressed_data.size();
   *blobs_length += compressed_data.size();
-  out_op->set_dst_length(kBlockSize * changed_block_count);
-  DeltaDiffGenerator::StoreExtents(changed_extents,
-                                   out_op->mutable_dst_extents());
+  out_op->set_dst_length(kBlockSize * block_count);
+  DeltaDiffGenerator::StoreExtents(extents, out_op->mutable_dst_extents());
 
   TEST_AND_RETURN_FALSE(utils::WriteAll(blobs_fd,
                                         compressed_data.data(),
                                         compressed_data.size()));
-  LOG(INFO) << "Done processing unwritten blocks";
+  LOG(INFO) << "done with extra blocks";
   return true;
 }
 
@@ -1674,13 +1644,8 @@
       TEST_AND_RETURN_FALSE(ReadUnwrittenBlocks(blocks,
                                                 fd,
                                                 &data_file_size,
-                                                old_image,
                                                 new_image,
                                                 &graph.back()));
-      if (graph.back().op.data_length() == 0) {
-        LOG(INFO) << "No unwritten blocks to write, omitting operation";
-        graph.pop_back();
-      }
 
       // Final scratch block (if there's space)
       if (blocks.size() < (rootfs_partition_size / kBlockSize)) {