patchpanel: mark forwarded traffic with fwmark routing tag

This patch adds mangle PREROUTING iptables rule for marking forwarded
traffic from VMs and containers with the fwmark routing tag.

For ARC++ and ARCVM, the traffic is directly marked with the fwmark
routing tag corresponding to the target output interface.

For other VMs that track the default network, a CONNMARK restore rule
is set to reapply the fwmark routing tag saved in conntrack. A follow-up
patch will add the POSTROUTING rules for setting CONNMARK based on the
output interface selected by routing.

BUG=b:161507671
BUG=b:161508179
TEST=Unit tests. Flashed rammus, checked ARC++ connectivity and Crosvm
connectivity, checked that traffic is correctly tagged in PREROUTING by
observing iptables counters for ARC++.

Change-Id: I648bbd55f2670192a764ce9ba65846912e7eb609
Reviewed-on: https://chromium-review.googlesource.com/c/chromiumos/platform2/+/2377336
Tested-by: Hugo Benichi <hugobenichi@google.com>
Commit-Queue: Hugo Benichi <hugobenichi@google.com>
Reviewed-by: Garrick Evans <garrick@chromium.org>
Reviewed-by: Taoyu Li <taoyl@chromium.org>
diff --git a/patchpanel/datapath.cc b/patchpanel/datapath.cc
index b73ca65..73075bd 100644
--- a/patchpanel/datapath.cc
+++ b/patchpanel/datapath.cc
@@ -51,6 +51,17 @@
   return n;
 }
 
+bool IsValidIpFamily(IpFamily family) {
+  switch (family) {
+    case IPv4:
+    case IPv6:
+    case Dual:
+      return true;
+    default:
+      return false;
+  }
+}
+
 }  // namespace
 
 std::string ArcVethHostName(const std::string& ifname) {
@@ -352,12 +363,24 @@
     LOG(ERROR) << "Failed to enable IP forwarding for " << ext_ifname << "<-"
                << int_ifname;
 
-  // TODO(b/161507671) If ext_ifname is not null, mark egress traffic with the
-  // fwmark routing tag corresponding to |ext_ifname|, and set up strong routing
-  // in ip rule.
+  if (!ext_ifname.empty()) {
+    // If |ext_ifname| is not null, mark egress traffic with the
+    // fwmark routing tag corresponding to |ext_ifname|.
+    if (!ModifyFwmarkRoutingTag("-A", ext_ifname, int_ifname))
+      LOG(ERROR) << "Failed to add PREROUTING fwmark routing tag for "
+                 << ext_ifname << "<-" << int_ifname;
+  } else {
+    // Otherwise if ext_ifname is null, set up a CONNMARK restore rule in
+    // PREROUTING to apply any fwmark routing tag saved for the current
+    // connection, and rely on implicit routing to the default logical network
+    // otherwise.
+    if (!ModifyConnmarkRestore(IpFamily::Dual, "PREROUTING", "-A", int_ifname))
+      LOG(ERROR) << "Failed to add PREROUTING CONNMARK restore rule for "
+                 << int_ifname;
 
-  // TODO(b/161507671) If ext_ifname is null, set up connection tracking for the
-  // current default interface.
+    // TODO(b/161507671) When a VPN service starts, tag new connections with the
+    // fwmark routing tag for VPNs.
+  }
 
   if (!ModifyFwmarkSourceTag("-A", int_ifname, source))
     LOG(ERROR) << "Failed to add PREROUTING fwmark tagging rule for source "
@@ -373,7 +396,11 @@
   StopIpForwarding(IpFamily::IPv4, ext_ifname, int_ifname);
   StopIpForwarding(IpFamily::IPv4, int_ifname, ext_ifname);
   ModifyFwmarkSourceTag("-D", int_ifname, source);
-  // TODO(b/161507671) Remove routing tag marking for egress traffic.
+  if (!ext_ifname.empty()) {
+    ModifyFwmarkRoutingTag("-D", ext_ifname, int_ifname);
+  } else {
+    ModifyConnmarkRestore(IpFamily::Dual, "PREROUTING", "-D", int_ifname);
+  }
 }
 
 bool Datapath::AddInboundIPv4DNAT(const std::string& ifname,
@@ -544,6 +571,51 @@
   process_runner_->ip6("addr", "del", {ipv6_addr, "dev", ifname});
 }
 
+bool Datapath::ModifyConnmarkRestore(IpFamily family,
+                                     const std::string& chain,
+                                     const std::string& op,
+                                     const std::string& iif) {
+  if (chain != "OUTPUT" && (chain != "PREROUTING" || iif.empty())) {
+    LOG(ERROR) << "Invalid arguments chain=" << chain << " iif=" << iif;
+    return false;
+  }
+
+  if (!IsValidIpFamily(family)) {
+    LOG(ERROR) << "Cannot change " << chain << " -j CONNMARK restore-mark"
+               << " for " << iif << ": incorrect IP family " << family;
+    return false;
+  }
+
+  std::vector<std::string> args = {op, chain};
+  if (!iif.empty()) {
+    args.push_back("-i");
+    args.push_back(iif);
+  }
+  args.insert(args.end(), {"-j", "CONNMARK", "--restore-mark", "--mask",
+                          kFwmarkRoutingMask.ToString(), "-w"});
+
+  bool success = true;
+  if (family & IPv4)
+    success &= process_runner_->iptables("mangle", args) == 0;
+  if (family & IPv6)
+    success &= process_runner_->ip6tables("mangle", args) == 0;
+  return success;
+}
+
+bool Datapath::ModifyFwmarkRoutingTag(const std::string& op,
+                                      const std::string& ext_ifname,
+                                      const std::string& int_ifname) {
+  int ifindex = FindIfIndex(ext_ifname);
+  if (ifindex == 0) {
+    PLOG(ERROR) << "if_nametoindex(" << ext_ifname << ") failed";
+    return false;
+  }
+
+  return ModifyFwmarkPrerouting(IpFamily::Dual, op, int_ifname,
+                                Fwmark::FromIfIndex(ifindex),
+                                kFwmarkRoutingMask);
+}
+
 bool Datapath::ModifyFwmarkSourceTag(const std::string& op,
                                      const std::string& iif,
                                      TrafficSource source) {
@@ -564,15 +636,10 @@
     return false;
   }
 
-  switch (family) {
-    case IPv4:
-    case IPv6:
-    case Dual:
-      break;
-    default:
-      LOG(ERROR) << "Cannot change PREROUTING set-fwmark for " << iif
-                 << ": incorrect IP family " << family;
-      return false;
+  if (!IsValidIpFamily(family)) {
+    LOG(ERROR) << "Cannot change PREROUTING set-fwmark for " << iif
+               << ": incorrect IP family " << family;
+    return false;
   }
 
   std::vector<std::string> args = {
@@ -599,16 +666,10 @@
     return false;
   }
 
-  switch (family) {
-    case IPv4:
-    case IPv6:
-    case Dual:
-      break;
-    default:
-
-      LOG(ERROR) << "Cannot change IP forwarding from \"" << iif << "\" to \""
-                 << oif << "\": incorrect IP family " << family;
-      return false;
+  if (!IsValidIpFamily(family)) {
+    LOG(ERROR) << "Cannot change IP forwarding from \"" << iif << "\" to \""
+               << oif << "\": incorrect IP family " << family;
+    return false;
   }
 
   std::vector<std::string> args = {op, "FORWARD"};
@@ -765,4 +826,22 @@
                                kAdbProxyTcpListenPort, ifname);
 }
 
+void Datapath::SetIfnameIndex(const std::string& ifname, int ifindex) {
+  if_nametoindex_[ifname] = ifindex;
+}
+
+int Datapath::FindIfIndex(const std::string& ifname) {
+  uint32_t ifindex = if_nametoindex(ifname.c_str());
+  if (ifindex > 0) {
+    if_nametoindex_[ifname] = ifindex;
+    return ifindex;
+  }
+
+  const auto it = if_nametoindex_.find(ifname);
+  if (it != if_nametoindex_.end())
+    return it->second;
+
+  return 0;
+}
+
 }  // namespace patchpanel
diff --git a/patchpanel/datapath.h b/patchpanel/datapath.h
index b0337e1..c2dcbfd 100644
--- a/patchpanel/datapath.h
+++ b/patchpanel/datapath.h
@@ -235,9 +235,20 @@
   virtual bool AddAdbPortAccessRule(const std::string& ifname);
   virtual void DeleteAdbPortAccessRule(const std::string& ifname);
 
+  // Set or override the interface name to index mapping for |ifname|.
+  // Only used for testing.
+  void SetIfnameIndex(const std::string& ifname, int ifindex);
+
   MinijailedProcessRunner& runner() const;
 
  private:
+  bool ModifyConnmarkRestore(IpFamily family,
+                             const std::string& chain,
+                             const std::string& op,
+                             const std::string& iif);
+  bool ModifyFwmarkRoutingTag(const std::string& op,
+                              const std::string& ext_ifname,
+                              const std::string& int_ifname);
   bool ModifyFwmarkSourceTag(const std::string& op,
                              const std::string& iif,
                              TrafficSource source);
@@ -252,12 +263,22 @@
                           const std::string& iif,
                           const std::string& oif,
                           bool log_failures = true);
+  bool ModifyRtentry(ioctl_req_t op, struct rtentry* route);
+  int FindIfIndex(const std::string& ifname);
 
   MinijailedProcessRunner* process_runner_;
   Firewall* firewall_;
   ioctl_t ioctl_;
 
-  bool ModifyRtentry(ioctl_req_t op, struct rtentry* route);
+  // A map used for remembering the interface index of an interface. This
+  // information is necessary when cleaning up iptables fwmark rules that
+  // directly references the interface index. When removing these rules on
+  // an RTM_DELLINK event, the interface index cannot be retrieved anymore.
+  // A new entry is only added when a new physical device appears, and entries
+  // are not removed.
+  // TODO(b/161507671) Rely on RoutingService to obtain this information once
+  // shill/routing_table.cc has been migrated to patchpanel.
+  std::map<std::string, int> if_nametoindex_;
 
   DISALLOW_COPY_AND_ASSIGN(Datapath);
 };
diff --git a/patchpanel/datapath_test.cc b/patchpanel/datapath_test.cc
index 77ad28b..4df4342 100644
--- a/patchpanel/datapath_test.cc
+++ b/patchpanel/datapath_test.cc
@@ -343,14 +343,26 @@
                                            "-j", "MARK", "--set-mark",
                                            "0x00002000/0x00003f00", "-w"),
                                true, nullptr));
+  EXPECT_CALL(runner, iptables(StrEq("mangle"),
+                               ElementsAre("-A", "PREROUTING", "-i", "arc_eth0",
+                                           "-j", "MARK", "--set-mark",
+                                           "0x03ea0000/0xffff0000", "-w"),
+                               true, nullptr));
   EXPECT_CALL(
       runner,
       ip6tables(StrEq("mangle"),
                 ElementsAre("-A", "PREROUTING", "-i", "arc_eth0", "-j", "MARK",
                             "--set-mark", "0x00002000/0x00003f00", "-w"),
                 true, nullptr));
+  EXPECT_CALL(
+      runner,
+      ip6tables(StrEq("mangle"),
+                ElementsAre("-A", "PREROUTING", "-i", "arc_eth0", "-j", "MARK",
+                            "--set-mark", "0x03ea0000/0xffff0000", "-w"),
+                true, nullptr));
 
   Datapath datapath(&runner, &firewall);
+  datapath.SetIfnameIndex("eth0", 2);
   datapath.StartRoutingDevice("eth0", "arc_eth0", Ipv4Addr(1, 2, 3, 4),
                               TrafficSource::ARC);
 }
@@ -371,11 +383,21 @@
                                            "-j", "MARK", "--set-mark",
                                            "0x00002100/0x00003f00", "-w"),
                                true, nullptr));
+  EXPECT_CALL(runner, iptables(StrEq("mangle"),
+                               ElementsAre("-A", "PREROUTING", "-i", "vmtap0",
+                                           "-j", "CONNMARK", "--restore-mark",
+                                           "--mask", "0xffff0000", "-w"),
+                               true, nullptr));
   EXPECT_CALL(runner, ip6tables(StrEq("mangle"),
                                 ElementsAre("-A", "PREROUTING", "-i", "vmtap0",
                                             "-j", "MARK", "--set-mark",
                                             "0x00002100/0x00003f00", "-w"),
                                 true, nullptr));
+  EXPECT_CALL(runner, ip6tables(StrEq("mangle"),
+                                ElementsAre("-A", "PREROUTING", "-i", "vmtap0",
+                                            "-j", "CONNMARK", "--restore-mark",
+                                            "--mask", "0xffff0000", "-w"),
+                                true, nullptr));
 
   Datapath datapath(&runner, &firewall);
   datapath.StartRoutingDevice("", "vmtap0", Ipv4Addr(1, 2, 3, 4),
@@ -413,14 +435,26 @@
                                            "-j", "MARK", "--set-mark",
                                            "0x00002000/0x00003f00", "-w"),
                                true, nullptr));
+  EXPECT_CALL(runner, iptables(StrEq("mangle"),
+                               ElementsAre("-D", "PREROUTING", "-i", "arc_eth0",
+                                           "-j", "MARK", "--set-mark",
+                                           "0x03ea0000/0xffff0000", "-w"),
+                               true, nullptr));
   EXPECT_CALL(
       runner,
       ip6tables(StrEq("mangle"),
                 ElementsAre("-D", "PREROUTING", "-i", "arc_eth0", "-j", "MARK",
                             "--set-mark", "0x00002000/0x00003f00", "-w"),
                 true, nullptr));
+  EXPECT_CALL(
+      runner,
+      ip6tables(StrEq("mangle"),
+                ElementsAre("-D", "PREROUTING", "-i", "arc_eth0", "-j", "MARK",
+                            "--set-mark", "0x03ea0000/0xffff0000", "-w"),
+                true, nullptr));
 
   Datapath datapath(&runner, &firewall);
+  datapath.SetIfnameIndex("eth0", 2);
   datapath.StopRoutingDevice("eth0", "arc_eth0", Ipv4Addr(1, 2, 3, 4),
                              TrafficSource::ARC);
 }
@@ -441,11 +475,21 @@
                                            "-j", "MARK", "--set-mark",
                                            "0x00002100/0x00003f00", "-w"),
                                true, nullptr));
+  EXPECT_CALL(runner, iptables(StrEq("mangle"),
+                               ElementsAre("-D", "PREROUTING", "-i", "vmtap0",
+                                           "-j", "CONNMARK", "--restore-mark",
+                                           "--mask", "0xffff0000", "-w"),
+                               true, nullptr));
   EXPECT_CALL(runner, ip6tables(StrEq("mangle"),
                                 ElementsAre("-D", "PREROUTING", "-i", "vmtap0",
                                             "-j", "MARK", "--set-mark",
                                             "0x00002100/0x00003f00", "-w"),
                                 true, nullptr));
+  EXPECT_CALL(runner, ip6tables(StrEq("mangle"),
+                                ElementsAre("-D", "PREROUTING", "-i", "vmtap0",
+                                            "-j", "CONNMARK", "--restore-mark",
+                                            "--mask", "0xffff0000", "-w"),
+                                true, nullptr));
 
   Datapath datapath(&runner, &firewall);
   datapath.StopRoutingDevice("", "vmtap0", Ipv4Addr(1, 2, 3, 4),
diff --git a/patchpanel/routing_service.h b/patchpanel/routing_service.h
index e80fcde..74d1789 100644
--- a/patchpanel/routing_service.h
+++ b/patchpanel/routing_service.h
@@ -18,6 +18,16 @@
 
 namespace patchpanel {
 
+// Constant used for establishing a stable mapping between routing table ids
+// and interface indexes. An interface with ifindex 2 will be assigned the
+// routing table with id 1002 by the routing layer. This stable mapping is used
+// for configuring ip rules, iptables fwmark mangle rules, and the
+// accept_ra_rt_table sysctl for all physical interfaces.
+// TODO(b/161507671) Consolidate with shill::kInterfaceTableIdIncrement
+// in platform2/shill/routing_table.cc once routing and ip rule configuration
+// is migrated to patchpanel.
+constexpr const uint32_t kInterfaceTableIdIncrement = 1000;
+
 // The list of all sources of traffic that need to be distinguished
 // for routing or traffic accounting. Currently 6 bits are used for encoding
 // the TrafficSource enum in a fwmark. The enum is split into two groups:local
@@ -124,6 +134,13 @@
     return {
         .policy = static_cast<uint8_t>(source), .legacy = 0, .rt_table_id = 0};
   }
+
+  static Fwmark FromIfIndex(uint32_t ifindex) {
+    uint32_t table_id = ifindex + kInterfaceTableIdIncrement;
+    return {.policy = 0,
+            .legacy = 0,
+            .rt_table_id = static_cast<uint16_t>(table_id)};
+  }
 };
 
 // All local sources