#!/bin/sh
. /etc/utils.sh

trap ": >/tmp/run/network.init" EXIT

register_experiment BigInitRwnd
register_experiment NoSTP
register_experiment TVBoxNoGigEthernet
register_experiment TcpWmem1Meg
register_experiment AggressiveTCP
register_experiment BigSocketBuffers
register_experiment MocaFlowControl


irq_affinity() {
  local irq="$1" cpu="$2"
  if [ -e "/proc/irq/$irq/smp_affinity" ]; then
    if ! echo "$cpu" >"/proc/irq/$irq/smp_affinity"; then
      echo "Failed to set affinity for IRQ $irq @ CPU $cpu"
    fi
  else
    echo "  ...irq #$irq not present, skipping" >&2
  fi
}


redistribute_irqs() {
  # if a second CPU isn't available, do nothing.
  echo "distribute IRQ handling between cores"
  if ! taskset 2 true 2>/dev/null; then
    echo "  ...only one CPU available, skipped."
  fi

  # Rule of thumb:
  #  - interrupts are generally by default on first CPU (bitfield=1)
  #  - let's move realtime sensitive stuff to second CPU (bitfield=2)
  #  - non-realtime userspace processes can default to both (bitfield=3)
  if is-storage-box || is-network-box; then
    # Storage box serves MoCA and ethernet simultaneously.
    # - put them both on separate CPUs to maximize throughput.
    # - consider ethernet (not MoCA) to be realtime sensitive, since it
    #   carries the multicast feed.
    # - consider disk to be non-realtime, since it can prefetch/writeback.
    serialcpu=1
    eth0cpu=2
    mocacpu=1
    wificpu=1  # may not actually have wifi
    diskcpu=1
  else
    # TV box handles video interrupts (via miniclient) and
    # (ethernet *or* MoCA) traffic (which carries the video stream), and wifi.
    # Network traffic includes the video stream, but it's TCP, so it should
    # be able to retransmit if packets are lost (unlike multicast on the
    # storage box) so it should be okay to put it on the non-realtime CPU.
    serialcpu=1
    eth0cpu=1
    mocacpu=1
    wificpu=1
    diskcpu=1  # doesn't actually have a disk
  fi

  if grep -q BCM7425 /proc/cpuinfo; then
    irq_affinity 62 $serialcpu
    irq_affinity 18 $eth0cpu
    irq_affinity 19 $eth0cpu
    irq_affinity 20 $mocacpu
    irq_affinity 21 $mocacpu
    irq_affinity 31 $mocacpu
    irq_affinity 34 $wificpu  # bcm wifi, if present
    irq_affinity 42 $diskcpu
  elif grep -q BCM7429 /proc/cpuinfo; then
    irq_affinity 52 $serialcpu
    irq_affinity 16 $eth0cpu
    irq_affinity 17 $eth0cpu
    irq_affinity 18 $mocacpu
    irq_affinity 19 $mocacpu
    irq_affinity 28 $mocacpu
    # Wifi is on one of mmc0 or mmc1, but it has varied between early
    # device models.  Since mmc is pretty fast and seldom used (and we happen
    # to want it on the non-realtime cpu anyway, like wifi), let's just
    # blindly put them both on the wifi cpu.
    irq_affinity 35 $wificpu  # mmc1
    irq_affinity 36 $wificpu  # mmc0
    # Note: no disk present, ignore $diskcpu
  elif grep -q comcerto /proc/iomem; then
    # TODO(apenwarr): we steer *all* PFE network traffic to $eth0cpu.
    #  Ideally we want only multicast to go there, and other traffic to
    #  go to the non-realtime CPU.  There may be some way to do this by
    #  changing PFE somehow.  As is, incoming WAN traffic destined for wifi
    #  could end up sucking all available CPU and interfere with multicast.
    irq_affinity  59 $serialcpu  # serial
    irq_affinity  62 $serialcpu  # i2c (NOR flash)
    irq_affinity  36 $eth0cpu    # *all* PFE
    irq_affinity  77 $mocacpu
    irq_affinity 128 $wificpu    # new boards, ath10k
    irq_affinity 160 $wificpu    # new boards, ath9k
    irq_affinity  96 $wificpu  2>/dev/null  # old boards, ath10k
    irq_affinity 164 $wificpu  2>/dev/null  # old boards, ath9k
    irq_affinity  48 $diskcpu
  elif grep -q LS1024A /proc/cpuinfo; then
    # Optimus with Linux 4.1
    irq_affinity  24 $serialcpu  # serial
    irq_affinity  26 $serialcpu  # i2c (NOR flash)
    irq_affinity  27 $diskcpu
    irq_affinity  28 $wificpu    # ath9k
    irq_affinity  29 $wificpu    # ath10k
    irq_affinity  94 $mocacpu
    irq_affinity  96 $eth0cpu    # *all* PFE
  fi
}

get_moca_iface() {
  # TODO(apenwarr): remove this once all devices use 'moca0' as the name.
  if [ -e /sys/class/net/moca0 ]; then
    # GFRG200
    echo "moca0"
  else
    # GFMS100/GFHD100/GFHD200
    echo "eth1"
  fi
}

get_wifi_macaddr_param() {
  local WIFI_MAC=$(hnvram -qr MAC_ADDR_WIFI)
  if [ -z "$WIFI_MAC" ]; then
    local MAC=$(hnvram -qr MAC_ADDR)
    if [ -z "${MAC}" ]; then
      echo "$0: WARNING: Box has no MAC_ADDR nor MAC_ADDR_WIFI" >&2
    else
      local WIFI_MAC=$(mac_addr_increment "${MAC}" 3)
    fi
  fi
  if [ -n "$WIFI_MAC" ]; then
    echo "wifi_addr=$WIFI_MAC"
  fi
}

load_wifi_drivers() {
  if [ "$(kernopt wifical)" != "1" ] && ! experiment SkipBluetoothWifi; then
    # wifical=1 means Litepoint Wifi/Bluetooth calibration. We cannot load
    # the production firmware at all, or it barfs.

    # WARNING: Keep ath9k first, ath10k second, other wifi stuff later so that
    # the wifi device ordering is constant.
    modprobe -a ath9k ath10k_pci mwl8k 2>/dev/null

    local mwifi_param=$(get_wifi_macaddr_param)
    modprobe mwifiex $mwifi_param 2>/dev/null

    modprobe -a mwifiex_sdio mwifiex_usb mwifiex_pcie ath9k_htc 2>/dev/null

    # On GFRG250, the Quantenna PCIe module has not initialized by the time the
    # kernel enumerates PCIe devices. For development, also include GFRG200 and
    # GFRG210.
    if contains "GFRG200 GFRG210 GFRG250" "$(cat /etc/platform)"; then
      echo 1 > /sys/bus/pci/rescan
    fi
    if contains "GFHD254" "$(cat /etc/platform)" && ! startswith "$(uname -r)" "3"; then
      echo 1 > /sys/bus/pci/devices/0000:01:00.0/remove
      sleep 1
      echo 1 > /sys/bus/pci/rescan
    fi
    modprobe qdpc-host 2>/dev/null

    # Enable wifi beacon and fatal debug logging.
    if [ -e /sys/module/ath9k/parameters/debug ]; then
      echo 0x00004400 >/sys/module/ath9k/parameters/debug
    fi

    # Enable pktgen.
    modprobe pktgen 2>/dev/null
  fi
}

create_br0() {
  echo -n "Creating bridge br0... "
  brctl setfd br0 1
  if is-ptp || experiment NoSTP; then
    brctl stp br0 off
  else
    brctl stp br0 on
  fi
  # Storage and network boxes should have higher precedence (lower value)
  #  when choosing the root of the spanning tree.
  #  The default in Linux is 0x8000.
  if is-network-box || is-windcharger; then
    brctl setbridgeprio br0 0x7980
  elif is-storage-box; then
    brctl setbridgeprio br0 0x7990
  fi
  for i in lan0 eth0; do
    [ -e /sys/class/net/$i/address ] && \
      ip link set br0 address \
      $(cat /sys/class/net/$i/address) && \
      break
  done
  echo "done"
  ip link set br0 up
  ip link set br0 up  # kernel 3.2.26 bug: fails the first time
}

create_br1() {
  echo -n "Creating bridge br1... "
  brctl setfd br1 1
  brctl stp br1 off
  echo "done"
  ip link set br1 up
  ip link set br1 up  # kernel 3.2.26 bug: fails the first time
}

configure_moca_vlan() {
  # VLAN tag for MoCA interface
  ip link add link $MOCAIFC name $MOCAIFC.0 type vlan id 0
  # SO_PRIORITY -> 802.1p mapping
  for prio in 0 1 2 3 4 5 6 7; do
    vconfig set_egress_map $MOCAIFC.0 $prio $prio >/dev/null 2>&1
  done
}

add_interfaces_to_bridge() {
  local bridge=$1
  shift

  echo "Adding interfaces to $bridge... "

  # Move any existing network info from added network interfaces to the bridge.
  # Only strictly needed for br0, when NFS booting; this prevents losing the
  # NFS connection in the middle of the script.
  for x in "$@"; do
    [ -e "$x" ] || continue

    # Wait for MAC address.
    for i in $(seq 1 12); do
      if [ "$(get_mac_address_for_interface "$x")" != 00:00:00:00:00:00 ]; then
        break
      fi
      sleep 0.25
    done

    local x="${x#/sys/class/net/}"
    echo "Adding interface $x..."

    ip link set $x up &&
    ip -o -f inet addr show $x |
    while read a b c ip d brd junk; do
      ip addr add $ip broadcast $brd dev $bridge
    done &&
    if [ -n "$MOCAIFC" ] && [ $x = $MOCAIFC ]; then
      x="${MOCAIFC}.0"
    fi &&
    brctl addif $bridge $x &&
    ip -o -f inet route list dev $x |
    while read dst routeinfo; do
      ip route add dev $bridge $dst $routeinfo metric 1 &&
      ip route del dev $x $dst $routeinfo
    done &&
    ip -o -f inet addr show $x |
    while read a b c ip junk; do
      ip addr del $ip dev $x
    done
  done
}

add_multicast_route() {
  echo "Adding RFC2365 multicast route..."
  ip route add 239.0.0.0/8 dev br0
  kernopt ip |
  while IFS=: read ip serverip gatewayip netmask hostname ifc junk; do
    # $ip should be already setup by simpleramfs.
    # TODO(apenwarr): simpleramfs should set up $gatewayip too.
    #  But right now its included networking commands are too limited.
    if [ -n "$gatewayip" ]; then
      echo "IP: adding default gateway '$gatewayip'"
      if experiment BigInitRwnd; then
        initrwnd='initrwnd 1000'
      else
        initrwnd=
      fi
      ip route add default via "$gatewayip" $initrwnd
    fi
  done
}

if_up_no_bridging() {
  local dev="$1"
  echo "Adding interface $dev..."
  ip link set "$dev" up &&
  kernopt ip |
  while IFS=: read ip serverip gatewayip netmask hostname ifc junk; do
    # $ip should be already setup by simpleramfs.
    # TODO(apenwarr): simpleramfs should set up $gatewayip too.
    #  But right now its included networking commands are too limited.
    if [ -n "$gatewayip" ]; then
      echo "IP: adding default gateway '$gatewayip'"
      ip route add default via "$gatewayip"
    fi
  done
  echo "Bringing up interface $dev..."
  ifup "$dev"
  enable_rx_coalescing "$dev"
}

bridge_up() {
  local br="$1"
  echo "Bringing up the bridge $br..."
  ip link set dev "$br" up  # kernel 3.16 behavior, sometimes br0 does not come up.
  ifup "$br"
}

enable_rx_coalescing() {
  # Enable receive interrupt coalescing, this is a significant
  # performance gain for multicast receive.
  ethtool -C $1 rx-usecs 1000
}

enable_hw_offloads() {
  # Enable hardware offloads, do this at the end to avoid
  # invalid access caused by powered down core
  ethtool -K $1 rx on tx on sg on gso on gro on
}

wan0_down() {
  if [ -e /sys/class/net/wan0 ]; then
    echo "Shutting down the WAN port..."
    ifdown wan0
    ifdown wan0.2
    ip link del wan0.2
  fi
}

bridge_down() {
  local br="$1"

  if [ -n /sys/class/net/$br/bridge ]; then
    ifdown $br
    echo "Shutting down $br interface"
  fi

  for x in $(ls /sys/class/net); do
    if [ -e /sys/class/net/$x/device ]; then
      brctl delif $br $x
    fi
  done
  ifdown $br
  [ -e /sys/class/net/$br ] && brctl delbr $br
}

# encode a string as base64
encode()
{
  python -c "import base64; print base64.b64encode('$1')"
}

add_firewall_rules_to_vlan_input() {
  if [ -e /sys/class/net/sw0.peer ]; then
    # open ports for peer to peer configuration control
    ip46tables -A vlan-input -i sw0.peer -p tcp --dport 80 -j ACCEPT
    ip46tables -A vlan-input -i sw0.peer -p tcp --dport 443 -j ACCEPT
    # Open ports used by macsecd for peer communication.
    ip46tables -A vlan-input -i sw0.peer -p tcp --dport 50000 -j ACCEPT
    ip46tables -A vlan-input -i sw0.peer -p tcp --dport 50001 -j ACCEPT
  fi
}

setup_tcp_congestion_control() {
  # load all algorithms we may want to use
  modprobe -a tcp_acubic tcp_turbulent tcp_transonic

  # make all "available" algorithms "allowed" so any process can use it
  cat /proc/sys/net/ipv4/tcp_available_congestion_control \
      >/proc/sys/net/ipv4/tcp_allowed_congestion_control

  # log
  echo -n "tcp_allowed_congestion_control="
  cat /proc/sys/net/ipv4/tcp_allowed_congestion_control
}

case "$1" in
  start)
    if is-fiberjack; then
      lan_port=$(cat /etc/config/lanport)
      echo "Starting LAN."
      ifup lo
      if [ -n "$lan_port" ]; then
        ifup $lan_port
      fi
      exit 0
    fi

    if is-moca-present; then
      MOCAIFC=$(get_moca_iface)
    fi

    if ! is-spacecast && ! is-ptp; then
      load_wifi_drivers
    fi

    setup_tcp_congestion_control

    echo 1 >/proc/sys/net/ipv4/conf/all/arp_ignore

    # Create a default resolv.conf and /etc/hosts in case DHCP doesn't.
    # (this is always true if using nfsroot).
    # 8.8.8.8 and 8.8.4.4 are well-known IPv4 Google public DNS servers
    # that can be used from anywhere, so it makes a good place to start.
    # 2001:4860:4860::8888 is one of the two IPv6 Google public DNS servers
    # (the other being 2001:4860:4860::8844).
    # DHCP will just overwrite this file when it's ready to go.
    if [ ! -r /etc/resolv.conf ]; then
      # Note: uClibc only reads the first three nameserver entries.
      (echo 'nameserver 8.8.8.8';
       echo 'nameserver 8.8.4.4';
       echo 'nameserver 2001:4860:4860::8888') >/etc/resolv.conf
    fi
    if [ ! -r /etc/hosts ]; then
      echo "127.0.0.1 localhost $(hostname)" >/etc/hosts
    fi

    # Configure dhclient with the right hostname etc.
    setup-dhclient

    # Configure loopback
    ifup lo

    # Quantenna-specific initialization.
    if interface_exists quantenna; then
      ip link set dev quantenna up
      i=0
      while true; do
        if ! interface_exists wlan"$i"; then
          echo "add wlan$i 3" >/sys/class/net/quantenna/vlan
          break
        fi
        i=$((i+1))
      done
    fi

    # GFRG2X0 sets lan0, wan0, and moca0 in pfe kernel module
    # from addrs passed on command line from barebox

    if is-ptp; then
      # set defaults
      ptp-config -d craft_ipaddr 192.168.1.1/24
      ptp-config -d vlan_inband 4091
      ptp-config -d vlan_ooband 50
      ptp-config -d vlan_peer 60
      # TODO(edjames) default should depend on hi/lo radio model
      ptp-config -d local_ipaddr 192.168.2.1/24
      ptp-config -d peer_ipaddr 192.168.2.2/24
      ptp-config -d password_admin $(encode admin)
      ptp-config -d password_guest $(encode guest)

      # mac addrs
      set_mac_from_hnvram craft0 MAC_ADDR
      set_mac_from_hnvram sw0 MAC_ADDR_WAN

      # craft0 (static ip for UI)
      ptp-config -i craft_ipaddr
      if_up_no_bridging craft0

      # bring up inband vlan (will dhcp)
      ptp-config -i vlan_inband
      # no if_up_no_bridging, inband is in br0

      # bring up ooband vlan (will dhcp)
      ptp-config -i vlan_ooband
      if_up_no_bridging sw0.ooband

      # bring up peer vlan, set our static ip
      ptp-config -i vlan_peer
      ptp-config -i local_ipaddr
      if_up_no_bridging sw0.peer

      add_firewall_rules_to_vlan_input
    fi

    if is-spacecast; then
      if_up_no_bridging lan0

      (nohup /sbin/lan0_watchdog 2>&1 > /dev/null) &
    else
      # br0 bridges Ethernet, MoCA, and WiFi AP interfaces
      create_br0

      # br1 only bridges captive portal WiFi interfaces
      create_br1

      if is-moca-present; then
        configure_moca_vlan
      fi

      add_interfaces_to_bridge br0 \
        /sys/class/net/[ea]th[0-9] \
        /sys/class/net/*.inband \
        /sys/class/net/lan[0-9] \
        /sys/class/net/moca[0-9] \
        /sys/class/net/wlan[0-9]

      add_multicast_route

      bridge_up br0
      bridge_up br1
      redistribute_irqs
    fi

    if is-moca-present; then
      echo "starting moca..."
      babysit 30 runmoca 2>&1 | logos mocad &
      if experiment MocaFlowControl; then
        moca-flowctl --enable
      fi
    fi

    if [ -e /sys/class/net/wan0 ]; then
      echo "Bringing up the WAN port..."
      ip link add link wan0 name wan0.2 type vlan id 2
      sysctl net.ipv4.conf.all.forwarding=1
      sysctl net.ipv6.conf.all.forwarding=1
      sysctl net.ipv6.conf.wan0.accept_ra=2
      sysctl net.ipv6.conf.wan0/2.accept_ra=2
      ifup wan0
      ifup wan0.2

      # set the class priority to 802.1p bit mapping.  We don't want
      # pbit=0,1 ever, so force into 2.
      vconfig set_egress_map wan0.2 0 2
      vconfig set_egress_map wan0.2 1 2
      for prio in 2 3 4 5 6 7; do
        vconfig set_egress_map wan0.2 $prio $prio >/dev/null 2>&1
      done

      # Enable receive interrupt coalescing, this is a significant
      # performance gain for multicast receive.
      enable_rx_coalescing wan0
    fi

    if is-tv-box || is-storage-box; then
      # Enable hardware offloads, do this at the end to avoid
      # invalid access caused by powered down core
      enable_hw_offloads eth0
      if is-moca-present; then
        enable_hw_offloads $MOCAIFC
      fi
    fi
    if is-tv-box && experiment TVBoxNoGigEthernet; then
      # advertise only 10 & 100 at half and full duplex.
      ethtool -s eth0 advertise 0xf
    fi
    if is-storage-box && experiment TcpWmem1Meg; then
      # Values from GFRG210, applied to all Storage Boxes.
      echo "4096 16384 1019904" >/proc/sys/net/ipv4/tcp_wmem
    fi
    if experiment AggressiveTCP; then
      # technically only needed for server
      echo 1 >/proc/sys/net/ipv4/tcp_thin_linear_timeouts
      echo 1 >/proc/sys/net/ipv4/tcp_thin_dupack
      if [ -e "/proc/sys/net/ipv4/tcp_min_rtt_wlen" ]; then
        # Only on more recent kernels
        echo 10 >/proc/sys/net/ipv4/tcp_min_rtt_wlen
        echo 4 >/proc/sys/net/ipv4/tcp_early_retrans
      fi

      # technically only needed for client
      echo 0 >/proc/sys/net/ipv4/tcp_moderate_rcvbuf
    fi
    if experiment BigSocketBuffers; then
      # technically only rbuf increase needed on receiver, and only wbuf
      # on sender, but for simplicity just do it everywhere for now.
      n=8000000
      echo $n $n $n | tee /proc/sys/net/ipv4/tcp_?mem >/dev/null
      echo $n | tee /proc/sys/net/core/?mem_* >/dev/null
    fi
    ;;
  stop)
    if [ ! -e /tmp/NFS ]; then
      if is-fiberjack; then
        lan_port=$(cat /etc/config/lanport)
        echo "Stopping LAN."
        ifdown lo
        if [ -n "$lan_port" ]; then
          ifdown $lan_port
        fi
        exit 0
      fi

      if is-spacecast; then
        ifdown lan0
        echo "Shutting down lan0 interface"
      else
        wan0_down

        bridge_down br0
        bridge_down br1
      fi

      # Kill mocad and mocactl
      if [ -e /dev/bmoca0 ]; then
        echo -n "mocacfg kill"
        pkillwait mocad
      fi
    fi
    exit 0
    ;;
  restart|reload)
    "$0" stop
    "$0" start
    ;;
  *)
    echo "Usage: $0 {start|stop|restart}"
    exit 1
esac
