mirror of
				https://gitlab.alpinelinux.org/alpine/aports.git
				synced 2025-10-31 08:21:49 +01:00 
			
		
		
		
	
		
			
				
	
	
		
			4309 lines
		
	
	
		
			133 KiB
		
	
	
	
		
			Diff
		
	
	
	
	
	
			
		
		
	
	
			4309 lines
		
	
	
		
			133 KiB
		
	
	
	
		
			Diff
		
	
	
	
	
	
| diff --git a/Makefile.in b/Makefile.in
 | ||
| index 30ebbfe..0a82c67 100644
 | ||
| --- a/Makefile.in
 | ||
| +++ b/Makefile.in
 | ||
| @@ -1,43 +1,73 @@
 | ||
| -#
 | ||
| +# Edit Makefile.in and run ./configure
 | ||
|  
 | ||
|  KVERSION = @KVERSION@
 | ||
|  KDIR = @KDIR@
 | ||
| +KINSTDIR = $(shell dirname @KDIR@)
 | ||
|  IPTABLES_CFLAGS = @IPTABLES_CFLAGS@
 | ||
|  IPTABLES_MODULES = @IPTABLES_MODULES@
 | ||
| +DEPMOD = depmod -a
 | ||
|  
 | ||
| +# https://www.kernel.org/doc/Documentation/kbuild/modules.txt
 | ||
| +# https://www.kernel.org/doc/Documentation/kbuild/makefiles.txt
 | ||
|  obj-m = ipt_NETFLOW.o
 | ||
|  
 | ||
| -ipt_NETFLOW.ko: ipt_NETFLOW.c ipt_NETFLOW.h
 | ||
| +all: ipt_NETFLOW.ko libipt_NETFLOW.so libip6t_NETFLOW.so
 | ||
| +ipt_NETFLOW.ko: version.h ipt_NETFLOW.c ipt_NETFLOW.h Makefile
 | ||
|  	@echo Compiling for kernel $(KVERSION)
 | ||
|  	make -C $(KDIR) M=$(CURDIR) modules
 | ||
| -all: ipt_NETFLOW.ko libipt_NETFLOW.so
 | ||
| -minstall:
 | ||
| -	make -C $(KDIR) M=$(CURDIR) modules_install
 | ||
| +	@touch $@
 | ||
| +sparse: | version.h ipt_NETFLOW.c ipt_NETFLOW.h Makefile
 | ||
| +	@rm -f ipt_NETFLOW.ko ipt_NETFLOW.o
 | ||
| +	@echo Compiling for kernel $(KVERSION)
 | ||
| +	make -C $(KDIR) M=$(CURDIR) modules C=1
 | ||
| +	@touch ipt_NETFLOW.ko
 | ||
| +minstall: | ipt_NETFLOW.ko
 | ||
| +	make -C $(KDIR) M=$(CURDIR) modules_install INSTALL_MOD_PATH=$(DESTDIR)
 | ||
| +	$(DEPMOD)
 | ||
|  mclean:
 | ||
|  	make -C $(KDIR) M=$(CURDIR) clean
 | ||
|  lclean:
 | ||
|  	-rm -f *.so *_sh.o
 | ||
|  clean: mclean lclean
 | ||
| -	-rm -f *.so *.o modules.order
 | ||
| +	-rm -f *.so *.o modules.order version.h
 | ||
| +
 | ||
| +%_sh.o: libipt_NETFLOW.c
 | ||
| +	gcc -O2 -Wall -Wunused $(IPTABLES_CFLAGS) -fPIC -o $@ -c libipt_NETFLOW.c
 | ||
| +
 | ||
| +%.so: %_sh.o
 | ||
| +	gcc -shared -o $@ $<
 | ||
|  
 | ||
| -libipt_NETFLOW.so: libipt_NETFLOW.c
 | ||
| -	gcc -O2 -Wall -Wunused -I$(KDIR)/include $(IPTABLES_CFLAGS) -fPIC -o libipt_NETFLOW_sh.o -c libipt_NETFLOW.c
 | ||
| -	gcc -shared  -o libipt_NETFLOW.so libipt_NETFLOW_sh.o
 | ||
| +version.h: ipt_NETFLOW.c ipt_NETFLOW.h Makefile
 | ||
| +	@if [ -d .git ] && type git >/dev/null 2>&1; then \
 | ||
| +		 echo "#define GITVERSION \"`git describe --dirty`\""; \
 | ||
| +	fi > version.h
 | ||
|  
 | ||
| -linstall: ipt_NETFLOW.ko libipt_NETFLOW.so
 | ||
| -	cp -a libipt_NETFLOW.so $(IPTABLES_MODULES)
 | ||
| +linstall: | libipt_NETFLOW.so libip6t_NETFLOW.so
 | ||
| +	install -D libipt_NETFLOW.so $(DESTDIR)$(IPTABLES_MODULES)/libipt_NETFLOW.so
 | ||
| +	install -D libip6t_NETFLOW.so $(DESTDIR)$(IPTABLES_MODULES)/libip6t_NETFLOW.so
 | ||
|  
 | ||
|  install: minstall linstall
 | ||
|  
 | ||
| +uninstall:
 | ||
| +	-rm -f $(DESTDIR)$(IPTABLES_MODULES)/libipt_NETFLOW.so
 | ||
| +	-rm -f $(DESTDIR)$(IPTABLES_MODULES)/libip6t_NETFLOW.so
 | ||
| +	-rm -f $(DESTDIR)$(KINSTDIR)/extra/ipt_NETFLOW.ko
 | ||
| +
 | ||
|  Makefile: Makefile.in configure
 | ||
|  	./configure --make
 | ||
|  
 | ||
|  load: all
 | ||
| -	insmod ipt_NETFLOW.ko active_timeout=5
 | ||
| -	iptables -A OUTPUT -d 0/0 -j NETFLOW
 | ||
| -	iptables -A INPUT -d 0/0 -j NETFLOW
 | ||
| +	-insmod ipt_NETFLOW.ko active_timeout=5 protocol=9
 | ||
| +	-iptables -I OUTPUT -j NETFLOW
 | ||
| +	-iptables -I INPUT -j NETFLOW
 | ||
| +	-ip6tables -I OUTPUT -j NETFLOW
 | ||
| +	-ip6tables -I INPUT -j NETFLOW
 | ||
|  
 | ||
|  unload:
 | ||
| -	iptables -D OUTPUT -d 0/0 -j NETFLOW
 | ||
| -	iptables -D INPUT -d 0/0 -j NETFLOW
 | ||
| -	rmmod ipt_NETFLOW.ko
 | ||
| +	-iptables -D OUTPUT -j NETFLOW
 | ||
| +	-iptables -D INPUT -j NETFLOW
 | ||
| +	-ip6tables -D OUTPUT -j NETFLOW
 | ||
| +	-ip6tables -D INPUT -j NETFLOW
 | ||
| +	-rmmod ipt_NETFLOW.ko
 | ||
| +
 | ||
| +reload: unload load
 | ||
| diff --git a/README b/README
 | ||
| index 213f02c..56a4fde 100644
 | ||
| --- a/README
 | ||
| +++ b/README
 | ||
| @@ -1,10 +1,17 @@
 | ||
| -ipt_NETFLOW linux 2.6 kernel module by <abc@telekom.ru> -- 11 Feb 2008
 | ||
| +ipt_NETFLOW linux 2.6.x-3.x kernel module by <abc@telekom.ru> -- 2008-2013.
 | ||
| +
 | ||
| +   High performance NetFlow v5, v9, IPFIX flow data export module for Linux
 | ||
| +   kernel. Supporting IPv4 and IPv6. Created to be useful for highly loaded
 | ||
| +   linux router. It should be used as iptables target. Also can export NAT
 | ||
| +   translation events using NetFlow Event Logging (NEL) for v9, IPFIX, or
 | ||
| +   specially crafted v5 flows.
 | ||
| +
 | ||
|  
 | ||
|  ============================
 | ||
|  = OBTAINING LATEST VERSION =
 | ||
|  ============================
 | ||
|  
 | ||
| -   $ git clone git://ipt-netflow.git.sourceforge.net/gitroot/ipt-netflow/ipt-netflow
 | ||
| +   $ git clone git://git.code.sf.net/p/ipt-netflow/code ipt-netflow
 | ||
|     $ cd ipt-netflow
 | ||
|  
 | ||
|  
 | ||
| @@ -12,94 +19,220 @@ ipt_NETFLOW linux 2.6 kernel module by <abc@telekom.ru> -- 11 Feb 2008
 | ||
|  = INSTALLATION =
 | ||
|  ================
 | ||
|  
 | ||
| -1. Besides kernel you will need iptables/netfilter source matching your
 | ||
| -     installation or just fresh install from there: ftp://ftp.netfilter.org/pub/iptables/snapshot/
 | ||
| -   I have this: ftp://ftp.netfilter.org/pub/iptables/snapshot/iptables-1.3.7-20070329.tar.bz2
 | ||
| -   Unpack it somewhere and build with make.
 | ||
| +   Four easy steps.
 | ||
| +
 | ||
| +** 1. Prepare Kernel source
 | ||
| +
 | ||
| +   If you have package system install kernel-devel package, otherwise install
 | ||
| +   raw kernel source from http://kernel.org matching _exactly_ version of your
 | ||
| +   installed kernel.
 | ||
| +
 | ||
| +   a) What to do for Centos:
 | ||
| +
 | ||
| +      ~# yum install kernel-devel
 | ||
| +
 | ||
| +   b) What to do for Debian:
 | ||
| +
 | ||
| +      ~# apt-get install module-assistant
 | ||
| +      ~# m-a prepare
 | ||
| +
 | ||
| +   c) Otherwise, if you downloaded raw kernel sources don't forget to create
 | ||
| +    .config by copying it from your distribution's kernel. Its copy could reside
 | ||
| +    in /boot or sometimes in /proc, examples:
 | ||
| +
 | ||
| +      kernel-src-dir/# cp /boot/config-`uname -r` .config
 | ||
| +    or
 | ||
| +      kernel-src-dir/# zcat /proc/config.gz > .config
 | ||
| +
 | ||
| +    Assuming you unpacked kernel source into `kernel-src-dir/' directory.
 | ||
| +    Then run:
 | ||
| +
 | ||
| +      kernel-src-dir/# make oldconfig
 | ||
| +
 | ||
| +    After that you'll need to prepare kernel for modules build:
 | ||
| +
 | ||
| +      kernel-src-dir/# make prepare modules_prepare
 | ||
| +
 | ||
| +   Note: Don't try to `make prepare' in Centos kernel-devel package directory
 | ||
| +     (which is usually something like /usr/src/kernels/2.6.32-431.el6.x86_64)
 | ||
| +     as this is wrong and meaningless.
 | ||
| +
 | ||
| +** 2. Prepare Iptables
 | ||
| +
 | ||
| +   Before this step it also would be useful to install pkg-config if don't
 | ||
| +   already have.
 | ||
| +
 | ||
| +   If you have package system just install iptables-devel (or iptables-dev)
 | ||
| +   package, otherwise install iptables source matching version of your
 | ||
| +   installation from ftp://ftp.netfilter.org/pub/iptables/
 | ||
| +
 | ||
| +   a) What to do for Centos:
 | ||
| +
 | ||
| +      # yum install iptables-devel
 | ||
| +
 | ||
| +   b) What to do for Debian:
 | ||
| +
 | ||
| +      # apt-get install iptables-dev pkg-config
 | ||
|  
 | ||
| -2. Run ./configure script and it will create Makefile
 | ||
| +   c) Otherwise, for raw iptables source build it and make install.
 | ||
|  
 | ||
| -3. make all install; depmod
 | ||
| -   This will install kernel module and iptable specific library.
 | ||
| +** 3. Now, to actually build the module run:
 | ||
|  
 | ||
| -Troubleshooting:
 | ||
| -   1) Sometimes you will want to add CC=gcc-3 to make command.
 | ||
| -   Example: make CC=gcc-3.3
 | ||
| +      ~/ipt-netflow# ./configure
 | ||
| +      ~/ipt-netflow# make all install
 | ||
| +      ~/ipt-netflow# depmod
 | ||
|  
 | ||
| -   2) Compile module with actual kernel source compiled.
 | ||
| -   I.e. first compile kernel and boot into it, and then compile module.
 | ||
| +   This will install kernel module and iptables specific library.
 | ||
|  
 | ||
| -   3) For autoloading module after reboot: set net.netflow.destination (or load
 | ||
| -   module, if idestination set on load) after interfaces are up. Becasue module
 | ||
| -   needs exporting interface (usually lo) to establish export connection.
 | ||
| +   Troubleshooting:
 | ||
|  
 | ||
| -4. After this point you should be able to load module
 | ||
| -     and use -j NETFLOW target in your iptables. See next section.
 | ||
| +     a) Sometimes you will want to add CC=gcc-3 to make command.
 | ||
| +     Example: make CC=gcc-3.3
 | ||
| +
 | ||
| +     b) Compile module with actual kernel source compiled.
 | ||
| +     I.e. first compile kernel and boot into it, and then compile module.
 | ||
| +     If you are using kernel-devel package check that its version matches
 | ||
| +     your kernel package.
 | ||
| +
 | ||
| +     c) If you have sources in non-standard places or configure isn't able to
 | ||
| +     find something run ./configure --help to see how to specify paths manually.
 | ||
| +
 | ||
| +** 4. After this point you should be able to load module and
 | ||
| +     use -j NETFLOW target in your iptables. See next section.
 | ||
|  
 | ||
|  
 | ||
|  ===========
 | ||
|  = RUNNING =
 | ||
|  ===========
 | ||
|  
 | ||
| -1. You can load module by insmod like this:
 | ||
| -   # insmod ipt_NETFLOW.ko destination=127.0.0.1:2055 debug=1
 | ||
| +1. You can load module directly by insmod like this:
 | ||
| +
 | ||
| +     # insmod ipt_NETFLOW.ko destination=127.0.0.1:2055 debug=1
 | ||
|  
 | ||
|     Or if properly installed (make install; depmod) by this:
 | ||
| -   # modprobe ipt_NETFLOW destination=127.0.0.1:2055
 | ||
| +
 | ||
| +     # modprobe ipt_NETFLOW destination=127.0.0.1:2055
 | ||
|  
 | ||
|     See, you may add options in insmod/modprobe command line, or add
 | ||
| -     them in /etc/ to modules.conf or modprobe.conf like thus:
 | ||
| -   options ipt_NETFLOW destination=127.0.0.1:2055
 | ||
| +   them in /etc/modprobe.conf or /etc/modprobe.d/ipt_NETFLOW.conf
 | ||
| +   like thus:
 | ||
| +
 | ||
| +     options ipt_NETFLOW destination=127.0.0.1:2055 protocol=9 natevents=1
 | ||
|  
 | ||
|  2. Statistics is in /proc/net/stat/ipt_netflow
 | ||
| -   To view slab statistics: grep ipt_netflow /proc/slabinfo
 | ||
| +   To view boring slab statistics: grep ipt_netflow /proc/slabinfo
 | ||
|  
 | ||
|  3. You can view parameters and control them via sysctl, example:
 | ||
| -   # sysctl -w net.netflow.hashsize=32768
 | ||
|  
 | ||
| -4. Example of directing all traffic into module:
 | ||
| -   # iptables -A FORWARD -j NETFLOW
 | ||
| -   # iptables -A INPUT -j NETFLOW
 | ||
| -   # iptables -A OUTPUT -j NETFLOW
 | ||
| +     # sysctl net.netflow
 | ||
| +     # sysctl net.netflow.hashsize=32768
 | ||
| +
 | ||
| +   Note: For after-reboot configuration I recommend to store module parameters
 | ||
| +   in modprobe configs instead of storing them in /etc/sysctl.conf, as it's
 | ||
| +   less clear when init process will apply sysctl.conf, before of after
 | ||
| +   module's load.
 | ||
| +
 | ||
| +4. Example of directing all IPv4 traffic into the module:
 | ||
| +
 | ||
| +     # iptables -I FORWARD -j NETFLOW
 | ||
| +     # iptables -I INPUT -j NETFLOW
 | ||
| +     # iptables -I OUTPUT -j NETFLOW
 | ||
| +
 | ||
| +   Note: It is preferable (because easier to understand) to _insert_
 | ||
| +   NETFLOW target at the top of the chain, otherwise not all traffic may
 | ||
| +   reach NETFLOW if your iptables configuration is complicated and some
 | ||
| +   other rule inadvertently consume the traffic (dropping or acepting before
 | ||
| +   NETFLOW is reached). It's always good to test your configuration.
 | ||
| +   Use  iptables -L -nvx  to check pkts/bytes counters on the rules.
 | ||
| +
 | ||
| +5. If you want to account IPv6 traffic you should use protocol 9 or 10.
 | ||
| +   Example of directing all IPv6 traffic into the module:
 | ||
|  
 | ||
| +     # sysctl net.netflow.protocol=10
 | ||
| +     # ip6tables -I FORWARD -j NETFLOW
 | ||
| +     # ip6tables -I INPUT -j NETFLOW
 | ||
| +     # ip6tables -I OUTPUT -j NETFLOW
 | ||
| +
 | ||
| +   Note: First enable right version of protocol and after that add ip6tables
 | ||
| +     rules, otherwise you will get errors in dmesg.
 | ||
| +
 | ||
| +6. If you want to account NAT events (NEL):
 | ||
| +
 | ||
| +     # sysctl net.netflow.natevents=1
 | ||
| +
 | ||
| +   Note that natevents feature is completely independent from traffic accounting
 | ||
| +   (it's using so called conntrack events), thus you don't need to set or change
 | ||
| +   any iptables rules to use that. You may need to enable kernel config option
 | ||
| +   CONFIG_NF_CONNTRACK_EVENTS though (if it isn't already enabled).
 | ||
| +   For details on how they are exported for different protocol versions see
 | ||
| +   below.
 | ||
|  
 | ||
|  ===========
 | ||
|  = OPTIONS =
 | ||
|  ===========
 | ||
|  
 | ||
| +   protocol=5
 | ||
| +     - what version of NetFlow protocol to use. Default is 5.
 | ||
| +       You can choose from 5, 9, or 10 (where 10 is IPFIX). If you plan
 | ||
| +       to account IPv6 traffic you should use protocol 9 or 10 (IPFIX),
 | ||
| +       because NetFlow v5 isn't compatible with IPv6.
 | ||
| +
 | ||
|     destination=127.0.0.1:2055
 | ||
|       - where to export netflow, to this ip address
 | ||
|         You will see this connection in netstat like this:
 | ||
|         udp 0 0 127.0.0.1:32772 127.0.0.1:2055 ESTABLISHED 
 | ||
|  
 | ||
|     destination=127.0.0.1:2055,192.0.0.1:2055
 | ||
| -     - mirror flows to two (can be more) addresses,
 | ||
| -       separate addresses with comma.
 | ||
| +     - mirror flows to two (can be more) addresses, separate addresses
 | ||
| +       with comma.
 | ||
| +
 | ||
| +   natevents=1
 | ||
| +     - Collect and send NAT translation events as NetFlow Event Logging (NEL)
 | ||
| +       for NetFlow v9/IPFIX, or as dummy flows compatible with NetFlow v5.
 | ||
| +       Default is 0 (don't send).
 | ||
| +
 | ||
| +       For NetFlow v5 protocol meaning of fields in dummy flows are such:
 | ||
| +         Src IP, Src Port  is Pre-nat source address.
 | ||
| +         Dst IP, Dst Port  is Post-nat destination address.
 | ||
| +           - These two fields made equal to data flows catched in FORWARD chain.
 | ||
| +	 Nexthop, Src AS  is Post-nat source address for SNAT. Or,
 | ||
| +	 Nexthop, Dst AS  is Pre-nat destination address for DNAT.
 | ||
| +	 TCP Flags is SYN+SCK for start event, RST+FIN for stop event.
 | ||
| +	 Pkt/Traffic size is 0 (zero), so it won't interfere with accounting.
 | ||
|  
 | ||
|     inactive_timeout=15
 | ||
|       - export flow after it's inactive 15 seconds. Default value is 15.
 | ||
|  
 | ||
|     active_timeout=1800
 | ||
| -     - export flow after it's active 1800 seconds (30 minutes). Default value is 1800.
 | ||
| +     - export flow after it's active 1800 seconds (30 minutes). Default valuae
 | ||
| +       is 1800.
 | ||
| +
 | ||
| +   refresh-rate=20
 | ||
| +     - for NetFlow v9 and IPFIX it's rate how frequently to re-send templates
 | ||
| +       (per packets). You probably don't need to change default (which is 20).
 | ||
| +
 | ||
| +   timeout-rate=30
 | ||
| +     - for NetFlow v9 and IPFIX it's rate when to re-send old templates (in
 | ||
| +       minutes). No need to change it.
 | ||
|  
 | ||
|     debug=0
 | ||
|       - debug level (none).
 | ||
|  
 | ||
|     sndbuf=number
 | ||
| -     - size of output socket buffer in bytes. Recommend you to put
 | ||
| +     - size of output socket buffer in bytes. I recommend you to put
 | ||
|         higher value if you experience netflow packet drops (can be
 | ||
|         seen in statistics as 'sock: fail' number.)
 | ||
|         Default value is system default.
 | ||
|  
 | ||
|     hashsize=number
 | ||
|       - Hash table bucket size. Used for performance tuning.
 | ||
| -       Abstractly speaking, it should be two times bigger than flows
 | ||
| +       Abstractly speaking, it should be minimum two times bigger than flows
 | ||
|         you usually have, but not need to.
 | ||
|         Default is system memory dependent small enough value.
 | ||
|  
 | ||
|     maxflows=2000000
 | ||
| -     - Maximum number of flows to account. It's here to prevent DOS attacks. After
 | ||
| -       this limit reached new flows will not be accounted. Default is
 | ||
| +     - Maximum number of flows to account. It's here to prevent DOS attacks.
 | ||
| +       After this limit reached new flows will not be accounted. Default is
 | ||
|         2000000, zero is unlimited.
 | ||
|  
 | ||
|     aggregation=string..
 | ||
| @@ -130,14 +263,15 @@ Troubleshooting:
 | ||
|  = HOW TO READ STAT =
 | ||
|  ====================
 | ||
|  
 | ||
| -  Statistics is your friend to fine tune and understand netflow module performance.
 | ||
| +  Statistics is your friend to fine tune and understand netflow module
 | ||
| +  performance.
 | ||
|  
 | ||
|    To see stat:
 | ||
|    # cat /proc/net/stat/ipt_netflow
 | ||
|  
 | ||
|    How to interpret the data:
 | ||
|  
 | ||
| -> Flows: active 5187 (peak 83905 reached 0d0h1m ago, maxflows 2000000), mem 283K
 | ||
| +> Flows: active 5187 (peak 83905 reached 0d0h1m ago, maxflows 2000000), mem 283K, worker delay 100/1000.
 | ||
|  
 | ||
|    active X: currently active flows in memory cache.
 | ||
|      - for optimum CPU performance it is recommended to set hash table size to
 | ||
| @@ -146,8 +280,9 @@ Troubleshooting:
 | ||
|    mem XK: how much kilobytes of memory currently taken by active flows.
 | ||
|      - one active flow taking 56 bytes of memory.
 | ||
|      - there is system limit on cache size too.
 | ||
| +  worker delay X/HZ: how frequently exporter scan flows table per second.
 | ||
|  
 | ||
| -> Hash: size 8192 (mem 32K), metric 1.0, 1.0, 1.0, 1.0. MemTraf: 1420 pkt, 364 K (pdu 0, 0).
 | ||
| +> Hash: size 8192 (mem 32K), metric 1.00, [1.00, 1.00, 1.00]. MemTraf: 1420 pkt, 364 K (pdu 0, 0).
 | ||
|  
 | ||
|    Hash: size X: current hash size/limit.
 | ||
|      - you can control this by sysctl net.netflow.hashsize variable.
 | ||
| @@ -156,18 +291,22 @@ Troubleshooting:
 | ||
|      - optimal value is twice of average of active flows.
 | ||
|    mem XK: how much memory occupied by hash table.
 | ||
|      - hash table is fixed size by nature, taking 4 bytes per entry.
 | ||
| -  metric X, X, X, X: how optimal is your hash table being used.
 | ||
| +  metric X, [X, X, X]: how optimal is your hash table being used.
 | ||
|      - lesser value mean more optimal hash table use, min is 1.0.
 | ||
| -    - this is moving average (EWMA) of hash table access divided
 | ||
| -      by match rate (searches / matches) for 4sec, and 1, 5, 15 minutes.
 | ||
| -      Sort of hash table load average.
 | ||
| +    - last three numbers in squares is moving average (EWMA) of hash table
 | ||
| +      access divided by match rate (searches / matches) for 4sec, and 1, 5, and
 | ||
| +      15 minutes. Sort of hash table load average. First value is instantaneous.
 | ||
| +      You can try to increase hashsize if averages more than 1 (increase
 | ||
| +      certainly if >= 2).
 | ||
|    MemTraf: X pkt, X K: how much traffic accounted for flows that are in memory.
 | ||
|      - these flows that are residing in internal hash table.
 | ||
|    pdu X, X: how much traffic in flows preparing to be exported.
 | ||
|      - it is included already in aforementioned MemTraf total.
 | ||
|  
 | ||
| -> Timeout: active 1800, inactive 15. Maxflows 2000000
 | ||
| +> Protocol version 10 (ipfix), refresh-rate 20, timeout-rate 30, (templates 2, active 2). Timeouts: active 5, inactive 15. Maxflows 2000000
 | ||
|  
 | ||
| +  Protocol version currently in use. Refresh-rate and timeout-rate
 | ||
| +      for v9 and IPFIX. Total templates generated and currently active.
 | ||
|    Timeout: active X: how much seconds to wait before exporting active flow.
 | ||
|      - same as sysctl net.netflow.active_timeout variable.
 | ||
|    inactive X: how much seconds to wait before exporting inactive flow.
 | ||
| @@ -180,20 +319,22 @@ Troubleshooting:
 | ||
|  
 | ||
|    - Module throughput values for 1 second, 1 minute, and 5 minutes.
 | ||
|  
 | ||
| -> cpu#  stat: <search found new, trunc frag alloc maxflows>, sock: <ok fail cberr, bytes>, traffic: <pkt, bytes>, drop: <pkt, bytes>
 | ||
| -> cpu0  stat: 980540  10473 180600,    0    0    0    0, sock:   4983 928 0, 7124 K, traffic: 188765, 14 MB, drop: 27863, 1142 K
 | ||
| +> cpu#  stat: <search found new [metric], trunc frag alloc maxflows>, sock: <ok fail cberr, bytes>, traffic: <pkt, bytes>, drop: <pkt, bytes>
 | ||
| +> cpu0  stat: 980540  10473 180600 [1.03],    0    0    0    0, sock:   4983 928 0, 7124 K, traffic: 188765, 14 MB, drop: 27863, 1142 K
 | ||
|  
 | ||
|    cpu#: this is Total and per CPU statistics for:
 | ||
|    stat: <search found new, trunc frag alloc maxflows>: internal stat for:
 | ||
|    search found new: hash table searched, found, and not found counters.
 | ||
| -  trunc: how much truncated packets is ignored
 | ||
| +  [metric]: average hash metric since module load.
 | ||
| +  trunc: how much truncated packets are ignored
 | ||
|      - these are that possible don't have valid IP header.
 | ||
|      - accounted in drop packets counter but not in drop bytes.
 | ||
|    frag: how much fragmented packets have seen.
 | ||
|      - kernel always defragments INPUT/OUTPUT chains for us.
 | ||
|      - these packets are not ignored but not reassembled either, so:
 | ||
| -    - if there is no enough data in fragment (ex. tcp ports) it is considered zero.
 | ||
| -  alloc: how much cache memory allocations is failed.
 | ||
| +    - if there is no enough data in fragment (ex. tcp ports) it is considered
 | ||
| +      zero.
 | ||
| +  alloc: how much cache memory allocations are failed.
 | ||
|      - packets ignored and accounted in drop stat.
 | ||
|      - probably increase system memory if this ever happen.
 | ||
|    maxflows: how much packets ignored on maxflows (maximum active flows reached).
 | ||
| @@ -203,7 +344,8 @@ Troubleshooting:
 | ||
|    sock: <ok fail cberr, bytes>: table of exporting stats for:
 | ||
|    ok: how much Netflow PDUs are exported (i.e. UDP packets sent by module).
 | ||
|    fail: how much socket errors (i.e. packets failed to be sent).
 | ||
| -    - packets dropped and their internal statistics cumulatively accounted in drop stat.
 | ||
| +    - packets dropped and their internal statistics cumulatively accounted in
 | ||
| +      drop stat.
 | ||
|    cberr: how much connection refused ICMP errors we got from export target.
 | ||
|      - probably you not launched collector software on destination,
 | ||
|      - or specified wrong destination address.
 | ||
| @@ -225,20 +367,34 @@ Troubleshooting:
 | ||
|        packet is for new flow but maxflows is already reached,
 | ||
|        all flows in export packets that got socket error.
 | ||
|  
 | ||
| -> sock0: 10.0.0.2:2055, sndbuf 106496, filled 0, peak 106848; err: sndbuf reached 928, other 0
 | ||
| +> Natevents disabled, count start 0, stop 0.
 | ||
| +
 | ||
| +    - Natevents mode disabled or enabled, and how much start or stop events
 | ||
| +      are reported.
 | ||
| +
 | ||
| +> sock0: 10.0.0.2:2055 unconnected (1 attempts).
 | ||
| +
 | ||
| +  If socket is unconnected (for example if module loaded before interfaces is
 | ||
| +  up) it shows now much connection attempts was failed. It will try to connect
 | ||
| +  until success.
 | ||
| +
 | ||
| +> sock0: 10.0.0.2:2055, sndbuf 106496, filled 0, peak 106848; err: sndbuf reached 928, connect 0, other 0
 | ||
|  
 | ||
|    sockX: per destination stats for:
 | ||
|    X.X.X.X:Y: destination ip address and port.
 | ||
|      - controlled by sysctl net.netflow.destination variable.
 | ||
|    sndbuf X: how much data socket can hold in buffers.
 | ||
|      - controlled by sysctl net.netflow.sndbuf variable.
 | ||
| -    - if you have packet drops due to sndbuf reached (error -11) increase this value.
 | ||
| +    - if you have packet drops due to sndbuf reached (error -11) increase this
 | ||
| +      value.
 | ||
|    filled X: how much data in socket buffers right now.
 | ||
|    peak X: peak value of how much data in socket buffers was.
 | ||
|      - you will be interested to keep it below sndbuf value.
 | ||
|    err: how much packets are dropped due to errors.
 | ||
|      - all flows from them will be accounted in drop stat.
 | ||
| -  sndbuf reached X: how much packets dropped due to sndbuf being too small (error -11).
 | ||
| +  sndbuf reached X: how much packets dropped due to sndbuf being too small
 | ||
| +      (error -11).
 | ||
| +  connect X: how much connection attempts was failed.
 | ||
|    other X: dropped due to other possible errors.
 | ||
|  
 | ||
|  > aggr0: ...
 | ||
| diff --git a/README.promisc b/README.promisc
 | ||
| index 60ca922..31d774f 100644
 | ||
| --- a/README.promisc
 | ||
| +++ b/README.promisc
 | ||
| @@ -2,9 +2,14 @@ Hello,
 | ||
|  
 | ||
|  If you wish to account with netflow module traffic mirrored on switch you may follow this example:
 | ||
|  
 | ||
| -**************
 | ||
| -* Solution 1 *
 | ||
| -**************
 | ||
| +
 | ||
| +   Solution 1: General kernel patch.
 | ||
| +   Solution 2: Alternative w/o kernel patch.
 | ||
| +
 | ||
| +
 | ||
| +    **************
 | ||
| +    * Solution 1 *
 | ||
| +    **************
 | ||
|  
 | ||
|  1. Patch your kernel with `raw_promisc.patch' to enable raw table to see promisc traffic.
 | ||
|  
 | ||
| @@ -33,17 +38,7 @@ If you wish to account with netflow module traffic mirrored on switch you may fo
 | ||
|   # /sbin/vconfig add eth1 47
 | ||
|   # /sbin/ifconfig eth1.47 up
 | ||
|  
 | ||
| -5. Recompile ipt_netflow module with #define RAW_PROMISC_HACK uncommented:
 | ||
| -
 | ||
| - Find this line in ipt_NETFLOW.c (should be line 7):
 | ||
| -
 | ||
| -//#define RAW_PROMISC_HACK
 | ||
| -
 | ||
| - And remove two slashes at beginning of the line, so it become like this:
 | ||
| -
 | ||
| -#define RAW_PROMISC_HACK
 | ||
| -
 | ||
| - Re-compile module:
 | ||
| +5. Compile module:
 | ||
|  
 | ||
|   # make clean all install
 | ||
|  
 | ||
| @@ -55,13 +50,14 @@ If you wish to account with netflow module traffic mirrored on switch you may fo
 | ||
|  
 | ||
|   # /sbin/iptables -A PREROUTING -t raw -i eth1.47 -j NETFLOW
 | ||
|  
 | ||
| -
 | ||
|  Voila.
 | ||
|  
 | ||
| +ps. For Debian Squeeze instructions look at raw_promisc_debian_squeeze6.patch
 | ||
|  
 | ||
| -**************
 | ||
| -* Solution 2 *
 | ||
| -**************
 | ||
| +
 | ||
| +    **************
 | ||
| +    * Solution 2 *
 | ||
| +    **************
 | ||
|  
 | ||
|  By Anonymous.
 | ||
|  
 | ||
| @@ -81,4 +77,3 @@ Sometimes you may need to run:
 | ||
|  
 | ||
|  for this scheme to work.
 | ||
|  
 | ||
| -
 | ||
| diff --git a/configure b/configure
 | ||
| index 677dd7f..3f10e2a 100755
 | ||
| --- a/configure
 | ||
| +++ b/configure
 | ||
| @@ -3,7 +3,7 @@
 | ||
|  PATH=$PATH:/bin:/usr/bin:/usr/sbin:/sbin:/usr/local/sbin
 | ||
|  
 | ||
|  error() {
 | ||
| -  echo "! Error: $@"
 | ||
| +  echo -e "! Error: $@"
 | ||
|    exit 1
 | ||
|  }
 | ||
|  
 | ||
| @@ -56,19 +56,20 @@ get_lib_from_lib() {
 | ||
|  }
 | ||
|  
 | ||
|  iptables_inc() {
 | ||
| -  echo -n "Iptables include path: "
 | ||
| +  echo -n "Iptables include flags: "
 | ||
|    if [ "$IPTINC" ]; then
 | ||
| -    echo "$IPTINC (user specified)"
 | ||
|      IPTINC="-I$IPTINC"
 | ||
| +    echo "$IPTINC (user specified)"
 | ||
| +  elif [ "$PKGVER" ]; then
 | ||
| +    IPTINC="$PKGINC"
 | ||
| +    echo "$IPTINC (pkg-config)"
 | ||
| +  elif [ "$NOIPTSRC" ]; then
 | ||
| +    IPTINC=
 | ||
| +    echo "none (default)"
 | ||
|    else
 | ||
| -    if [ "$PKGINC" ]; then
 | ||
| -      IPTINC="$PKGINC"
 | ||
| -      echo "$IPTINC (pkg-config)"
 | ||
| -    else
 | ||
| -      IPTINC="$IPTSRC/include"
 | ||
| -      echo "$IPTINC (from source)"
 | ||
| -      IPTINC="-I$IPTINC"
 | ||
| -    fi
 | ||
| +    IPTINC="$IPTSRC/include"
 | ||
| +    IPTINC="-I$IPTINC"
 | ||
| +    echo "$IPTINC (from source)"
 | ||
|    fi
 | ||
|  }
 | ||
|  
 | ||
| @@ -109,7 +110,16 @@ try_dir2() {
 | ||
|    test -d "$1" && try_dir `dirname $1` && return 0
 | ||
|  }
 | ||
|  
 | ||
| -iptables_ver() {
 | ||
| +check_pkg_config() {
 | ||
| +  test "$PKGWARN" && return 1
 | ||
| +  if ! which pkg-config >/dev/null 2>&1; then
 | ||
| +    echo "! You don't have pkg-config, it may be useful to install it."
 | ||
| +    PKGWARN=1
 | ||
| +    return 1
 | ||
| +  fi
 | ||
| +  return 0
 | ||
| +}
 | ||
| +iptables_find_version() {
 | ||
|    echo -n "Iptables binary version: "
 | ||
|    if [ "$IPTVER" ]; then
 | ||
|      echo "$IPTVER (user specified)"
 | ||
| @@ -121,6 +131,7 @@ iptables_ver() {
 | ||
|      else
 | ||
|        echo "no iptables binary found"
 | ||
|      fi
 | ||
| +    check_pkg_config
 | ||
|      PKGVER=`pkg-config --modversion xtables 2>/dev/null`
 | ||
|      if [ "$PKGVER" ]; then
 | ||
|        IPTVER="$PKGVER"
 | ||
| @@ -131,44 +142,90 @@ iptables_ver() {
 | ||
|    fi
 | ||
|  }
 | ||
|  
 | ||
| -iptables_dir() {
 | ||
| -  test "$IPTINC" && return 1
 | ||
| -  test "$PKGINC" && return 1
 | ||
| -
 | ||
| -  VER="iptables-$IPTVER"
 | ||
| -  if [ "$IPTSRC" ]; then
 | ||
| -    echo "User specified source directory: $IPTSRC"
 | ||
| -    try_dir $IPTSRC || error "Specified directory is not iptables source.."
 | ||
| +compile_libitp_test() {
 | ||
| +  echo -n "Checking for presence of $@... "
 | ||
| +  echo "
 | ||
| +#define __EXPORTED_HEADERS__
 | ||
| +#include <$*>" > test.c
 | ||
| +  gcc -c test.c  >/dev/null 2>&1
 | ||
| +  RET=$?
 | ||
| +  if [ $RET = 0 ]; then
 | ||
| +    echo Yes;
 | ||
|    else
 | ||
| -    echo "Searching for $VER sources.."
 | ||
| -    try_dir "./$VER" && return 0
 | ||
| -    try_dir "../$VER" && return 0
 | ||
| -    try_dir "/usr/src/$VER" && return 0
 | ||
| -    try_dirg "iptables" && return 0
 | ||
| -    try_dirg "../iptables" && return 0
 | ||
| -    try_dirg "/usr/src/iptables" && return 0
 | ||
| -    try_dir2 `locate $VER/extensions | head -1` && return 0
 | ||
| -    error "Can not find iptables source directory, try setting it with --ipt-src="
 | ||
| +    echo No;
 | ||
|    fi
 | ||
| +  rm -f test.c test.o
 | ||
| +  return $RET
 | ||
|  }
 | ||
|  
 | ||
| -iptables_pkg_config() {
 | ||
| +iptables_try_pkgconfig() {
 | ||
|    if [ ! "$PKGVER" ]; then
 | ||
| +    check_pkg_config
 | ||
| +    PKGVER=`pkg-config --modversion xtables 2>/dev/null`
 | ||
| +    TRYPKGVER=`pkg-config --modversion xtables 2>/dev/null`
 | ||
|      echo -n "pkg-config for version $IPTVER exists: "
 | ||
| -    PKGVER=`pkg-config --exact-version=$IPTVER --modversion xtables 2>/dev/null`
 | ||
| +    pkg-config --exact-version=$IPTVER xtables 2>/dev/null
 | ||
|      if [ $? = 0 ]; then
 | ||
|        echo "Yes"
 | ||
| +      PKGVER=$TRYPKGVER
 | ||
|      else
 | ||
| -      echo "No (reported: $PKGVER)"
 | ||
| -      unset PKGVER
 | ||
| +      if [ "$TRYPKGVER" ]; then
 | ||
| +	echo "No (reported: $TRYPKGVER)"
 | ||
| +      else
 | ||
| +	echo "No"
 | ||
| +      fi
 | ||
|      fi
 | ||
|    fi
 | ||
|    if [ "$PKGVER" ]; then
 | ||
| +    check_pkg_config
 | ||
| +    PKGVER=`pkg-config --modversion xtables 2>/dev/null`
 | ||
|      PKGINC=`pkg-config --cflags xtables`
 | ||
|      PKGLIB=`pkg-config --variable=xtlibdir xtables`
 | ||
| -    IPTCFLAGS="-DXTABLES"
 | ||
|    else
 | ||
| -    IPTCFLAGS="-DIPTABLES_VERSION=\\\\\"$IPTVER\\\\\""
 | ||
| +    # Newer versions of iptables should not have -I/kernel/include!
 | ||
| +    # So I assume that newer version will have correct pkg-config set up
 | ||
| +    # and if not, then it's older who need it.
 | ||
| +    IPTCFLAGS="-I$KDIR/include -DIPTABLES_VERSION=\\\\\"$IPTVER\\\\\""
 | ||
| +  fi
 | ||
| +  if compile_libitp_test xtables.h; then
 | ||
| +    IPTCFLAGS="-DXTABLES $IPTCFLAGS"
 | ||
| +  elif ! compile_libitp_test iptables.h; then
 | ||
| +    echo "! Iptables headers not found. You may need to specify --ipt-inc=..."
 | ||
| +    if [ -s /etc/debian_version ]; then
 | ||
| +      echo "! "
 | ||
| +      echo "! Under Debian simply run this:"
 | ||
| +      echo "!   root# apt-get install iptables-dev pkg-config"
 | ||
| +    elif [ -s /etc/redhat-release ]; then
 | ||
| +      echo "! "
 | ||
| +      arch=.`uname -m`
 | ||
| +      echo "! Under Centos simply run this:"
 | ||
| +      echo "!   root# yum install iptables-devel$arch pkgconfig"
 | ||
| +    fi
 | ||
| +    exit 1
 | ||
| +  fi
 | ||
| +
 | ||
| +}
 | ||
| +
 | ||
| +iptables_find_src() {
 | ||
| +  test "$IPTINC" && return 1
 | ||
| +  test "$PKGVER" && return 1
 | ||
| +
 | ||
| +  VER="iptables-$IPTVER"
 | ||
| +  if [ "$IPTSRC" ]; then
 | ||
| +    echo "User specified source directory: $IPTSRC"
 | ||
| +    try_dir $IPTSRC || error "Specified directory is not iptables source.."
 | ||
| +  else
 | ||
| +    echo "Searching for $VER sources.."
 | ||
| +    try_dir "./$VER" && return 0
 | ||
| +    try_dir "../$VER" && return 0
 | ||
| +    try_dir "/usr/src/$VER" && return 0
 | ||
| +    try_dirg "iptables" && return 0
 | ||
| +    try_dirg "../iptables" && return 0
 | ||
| +    try_dirg "/usr/src/iptables" && return 0
 | ||
| +    try_dir2 `locate $VER/extensions 2>/dev/null | head -1` && return 0
 | ||
| +    echo "! Can not find iptables source directory, you may try setting it with --ipt-src="
 | ||
| +    echo "! This is not fatal error, yet. Will be just using default include dir."
 | ||
| +    NOIPTSRC=1
 | ||
|    fi
 | ||
|  }
 | ||
|  
 | ||
| @@ -206,18 +263,110 @@ do
 | ||
|    esac
 | ||
|  done
 | ||
|  
 | ||
| -test "$KVERSION" || KVERSION=`uname -r`
 | ||
| -echo Kernel version: $KVERSION
 | ||
| +kernel_find_version() {
 | ||
| +  KHOW=requested
 | ||
| +  test "$KVERSION" && return 0
 | ||
| +
 | ||
| +  if grep -q '#.*Debian' /proc/version; then
 | ||
| +    KHOW=proc
 | ||
| +    KVERSION=`sed -n 's/.*#.*Debian \([0-9\.]\+\)-.*/\1/p' /proc/version`
 | ||
| +    KLIBMOD=`uname -r`
 | ||
| +  else
 | ||
| +    KHOW=uname
 | ||
| +    KVERSION=`uname -r`
 | ||
| +  fi
 | ||
| +  test "$KDIR" || return 0
 | ||
| +
 | ||
| +  test -s $KDIR/Makefile || return 1
 | ||
| +  test -s $KDIR/include/config/kernel.release || return 1
 | ||
| +  KVERSION=`cat $KDIR/include/config/kernel.release`
 | ||
| +  KHOW=sources
 | ||
| +}
 | ||
| +
 | ||
| +kernel_check_src() {
 | ||
| +  if [ -s "$1/Makefile" ]; then
 | ||
| +    KDIR="$1"
 | ||
| +    return 0
 | ||
| +  fi
 | ||
| +  return 1
 | ||
| +}
 | ||
| +
 | ||
| +kernel_find_source() {
 | ||
| +  KSHOW=requested
 | ||
| +  test "$KDIR" && return 0
 | ||
| +  KSHOW=found
 | ||
| +  kernel_check_src /lib/modules/$KLIBMOD/build && return 0
 | ||
| +  kernel_check_src /lib/modules/$KVERSION/build && return 0
 | ||
| +  kernel_check_src /usr/src/kernels/$KVERSION && return 0
 | ||
| +  kernel_check_src /usr/src/linux-$KVERSION && return 0
 | ||
| +  echo "! Linux source not found. Don't panic. You may specify kernel source"
 | ||
| +  echo "! directory with --kdir=..., or try to install kernel-devel package,"
 | ||
| +  echo "! or just raw sources for linux-$KVERSION from kernel.org."
 | ||
| +  if grep -q -i centos /proc/version 2>/dev/null; then
 | ||
| +    echo "! "
 | ||
| +    arch=.`uname -m`
 | ||
| +    echo "! Under Centos simply run this:"
 | ||
| +    echo "!   root# yum install kernel-devel iptables-devel$arch pkgconfig"
 | ||
| +  fi
 | ||
| +  if grep -q -i debian /proc/version 2>/dev/null; then
 | ||
| +    echo "! "
 | ||
| +    echo "! Under Debian simply run this:"
 | ||
| +    echo "!   root# apt-get install module-assistant iptables-dev pkg-config"
 | ||
| +    echo "!   root# m-a prepare"
 | ||
| +  fi
 | ||
| +  exit 1
 | ||
| +}
 | ||
| +
 | ||
| +kernel_check_consistency() {
 | ||
| +  if test -s $KDIR/include/config/kernel.release; then
 | ||
| +    SRCVER=`cat $KDIR/include/config/kernel.release`
 | ||
| +    test "$KVERSION" != "$SRCVER" && error "$KHOW kernel version ($KVERSION) and $KSHOW version of kernel source ($SRCVER) doesn't match!\n!" \
 | ||
| +      "You may try to specify only kernel source tree with --kdir=$KDIR\n!" \
 | ||
| +      "and configure will pick up version properly."
 | ||
| +  else
 | ||
| +    test -e "$KDIR/.config" || error ".config in kernel source not found, run  make menuconfig  in $KDIR"
 | ||
| +    test -d "$KDIR/include/config" || error "kernel is not prepared, run  make prepare modules_prepare  in $KDIR"
 | ||
| +  fi
 | ||
| +}
 | ||
| +
 | ||
| +kconfig() {
 | ||
| +  KCONFIG=$KDIR/.config
 | ||
| +  if ! grep -q "^$1=" $KCONFIG 2>/dev/null; then
 | ||
| +    if [ "$KCONFIGREPORTED" != true ]; then
 | ||
| +      KCONFIGREPORTED=true
 | ||
| +      echo Kernel config file checked: $KCONFIG
 | ||
| +      echo
 | ||
| +    fi
 | ||
| +    echo "! Attention: $1 is undefined in your kernel configuration"
 | ||
| +    echo "!   Without this option enabled $2 will not work."
 | ||
| +    echo
 | ||
| +  fi
 | ||
| +}
 | ||
| +
 | ||
| +kernel_check_config() {
 | ||
| +  kconfig CONFIG_SYSCTL			"sysctl interface"
 | ||
| +  kconfig CONFIG_PROC_FS		"proc interface"
 | ||
| +  kconfig CONFIG_NF_NAT_NEEDED		"natevents"
 | ||
| +  kconfig CONFIG_NF_CONNTRACK_EVENTS	"natevents"
 | ||
| +  kconfig CONFIG_NF_CONNTRACK_MARK	"connmark tracking"
 | ||
| +  kconfig CONFIG_IPV6			"IPv6"
 | ||
| +  kconfig CONFIG_IP6_NF_IPTABLES	"ip6tables target"
 | ||
| +}
 | ||
|  
 | ||
| -test "$KDIR" || KDIR=/lib/modules/$KVERSION/build
 | ||
| -echo Kernel sources: $KDIR
 | ||
| +kernel_find_version	#KVERSION
 | ||
| +test "$KLIBMOD" || KLIBMOD=$KVERSION
 | ||
| +echo "Kernel version: $KVERSION ($KHOW)"
 | ||
| +kernel_find_source	#KDIR
 | ||
| +echo "Kernel sources: $KDIR ($KSHOW)"
 | ||
| +kernel_check_consistency
 | ||
| +kernel_check_config
 | ||
|  
 | ||
|  test "$IPTBIN" || IPTBIN=`which iptables`
 | ||
|  
 | ||
| -iptables_ver		#IPTVER
 | ||
| -iptables_pkg_config
 | ||
| -iptables_dir		#IPTSRC
 | ||
| -iptables_src_version	#check IPTSRC match to IPTVER
 | ||
| +iptables_find_version	#IPTVER
 | ||
| +iptables_try_pkgconfig	#try to configure from pkg-config
 | ||
| +iptables_find_src	#IPTSRC
 | ||
| +iptables_src_version	#check that IPTSRC match to IPTVER
 | ||
|  iptables_inc		#IPTINC
 | ||
|  iptables_modules	#IPTLIB
 | ||
|  
 | ||
| @@ -225,7 +374,6 @@ REPLACE="\
 | ||
|  s!@KVERSION@!$KVERSION!;\
 | ||
|  s!@KDIR@!$KDIR!;\
 | ||
|  s!@IPTABLES_VERSION@!$IPTVER!;\
 | ||
| -s!@IPTABLES_INCLUDES@!$IPTINC!;\
 | ||
|  s!@IPTABLES_CFLAGS@!$IPTCFLAGS $IPTINC!;\
 | ||
|  s!@IPTABLES_MODULES@!$IPTLIB!"
 | ||
|  
 | ||
| diff --git a/ipt_NETFLOW.c b/ipt_NETFLOW.c
 | ||
| index d4c91e1..ad974c5 100644
 | ||
| --- a/ipt_NETFLOW.c
 | ||
| +++ b/ipt_NETFLOW.c
 | ||
| @@ -1,6 +1,6 @@
 | ||
|  /*
 | ||
|   * This is NetFlow exporting module (NETFLOW target) for linux
 | ||
| - * (c) 2008-2012 <abc@telekom.ru>
 | ||
| + * (c) 2008-2013 <abc@telekom.ru>
 | ||
|   *
 | ||
|   *
 | ||
|   *   This program is free software: you can redistribute it and/or modify
 | ||
| @@ -18,8 +18,6 @@
 | ||
|   *
 | ||
|   */
 | ||
|  
 | ||
| -//#define RAW_PROMISC_HACK
 | ||
| -
 | ||
|  #include <linux/module.h>
 | ||
|  #include <linux/skbuff.h>
 | ||
|  #include <linux/proc_fs.h>
 | ||
| @@ -31,16 +29,26 @@
 | ||
|  #include <linux/icmp.h>
 | ||
|  #include <linux/igmp.h>
 | ||
|  #include <linux/inetdevice.h>
 | ||
| -#include <linux/jhash.h>
 | ||
| +#include <linux/hash.h>
 | ||
| +#include <linux/delay.h>
 | ||
| +#include <linux/spinlock_types.h>
 | ||
|  #include <net/icmp.h>
 | ||
|  #include <net/ip.h>
 | ||
| +#include <net/ipv6.h>
 | ||
|  #include <net/tcp.h>
 | ||
|  #include <net/route.h>
 | ||
| +#include <net/ip6_fib.h>
 | ||
|  #include <net/dst.h>
 | ||
|  #include <linux/netfilter_ipv4/ip_tables.h>
 | ||
| +#if defined(CONFIG_NF_NAT_NEEDED) || defined(CONFIG_NF_CONNTRACK_MARK)
 | ||
| +#include <linux/notifier.h>
 | ||
| +#include <net/netfilter/nf_conntrack.h>
 | ||
| +#include <net/netfilter/nf_conntrack_core.h>
 | ||
| +#endif
 | ||
|  #include <linux/version.h>
 | ||
|  #include <asm/unaligned.h>
 | ||
|  #include "ipt_NETFLOW.h"
 | ||
| +#include "murmur3.h"
 | ||
|  #ifdef CONFIG_BRIDGE_NETFILTER
 | ||
|  #include <linux/netfilter_bridge.h>
 | ||
|  #endif
 | ||
| @@ -74,41 +82,66 @@
 | ||
|  #define ipt_target xt_target
 | ||
|  #endif
 | ||
|  
 | ||
| -#define IPT_NETFLOW_VERSION "1.8"
 | ||
| +#define IPT_NETFLOW_VERSION "1.8.2" /* Note that if you are using git, you
 | ||
| +				       will see version in other format. */
 | ||
| +#include "version.h"
 | ||
| +#ifdef GITVERSION
 | ||
| +#undef IPT_NETFLOW_VERSION
 | ||
| +#define IPT_NETFLOW_VERSION GITVERSION
 | ||
| +#endif
 | ||
|  
 | ||
|  MODULE_LICENSE("GPL");
 | ||
|  MODULE_AUTHOR("<abc@telekom.ru>");
 | ||
|  MODULE_DESCRIPTION("iptables NETFLOW target module");
 | ||
|  MODULE_VERSION(IPT_NETFLOW_VERSION);
 | ||
| +MODULE_ALIAS("ip6t_NETFLOW");
 | ||
|  
 | ||
|  #define DST_SIZE 256
 | ||
|  static char destination_buf[DST_SIZE] = "127.0.0.1:2055";
 | ||
|  static char *destination = destination_buf;
 | ||
| -module_param(destination, charp, 0400);
 | ||
| +module_param(destination, charp, 0444);
 | ||
|  MODULE_PARM_DESC(destination, "export destination ipaddress:port");
 | ||
|  
 | ||
|  static int inactive_timeout = 15;
 | ||
| -module_param(inactive_timeout, int, 0600);
 | ||
| +module_param(inactive_timeout, int, 0644);
 | ||
|  MODULE_PARM_DESC(inactive_timeout, "inactive flows timeout in seconds");
 | ||
|  
 | ||
|  static int active_timeout = 30 * 60;
 | ||
| -module_param(active_timeout, int, 0600);
 | ||
| +module_param(active_timeout, int, 0644);
 | ||
|  MODULE_PARM_DESC(active_timeout, "active flows timeout in seconds");
 | ||
|  
 | ||
|  static int debug = 0;
 | ||
| -module_param(debug, int, 0600);
 | ||
| +module_param(debug, int, 0644);
 | ||
|  MODULE_PARM_DESC(debug, "debug verbosity level");
 | ||
|  
 | ||
|  static int sndbuf;
 | ||
| -module_param(sndbuf, int, 0400);
 | ||
| +module_param(sndbuf, int, 0444);
 | ||
|  MODULE_PARM_DESC(sndbuf, "udp socket SNDBUF size");
 | ||
|  
 | ||
| +static int protocol = 5;
 | ||
| +module_param(protocol, int, 0444);
 | ||
| +MODULE_PARM_DESC(protocol, "netflow protocol version (5, 9, 10)");
 | ||
| +
 | ||
| +static unsigned int refresh_rate = 20;
 | ||
| +module_param(refresh_rate, uint, 0644);
 | ||
| +MODULE_PARM_DESC(refresh_rate, "NetFlow v9/IPFIX refresh rate (packets)");
 | ||
| +
 | ||
| +static unsigned int timeout_rate = 30;
 | ||
| +module_param(timeout_rate, uint, 0644);
 | ||
| +MODULE_PARM_DESC(timeout_rate, "NetFlow v9/IPFIX timeout rate (minutes)");
 | ||
| +
 | ||
| +#ifdef CONFIG_NF_NAT_NEEDED
 | ||
| +static int natevents = 0;
 | ||
| +module_param(natevents, int, 0444);
 | ||
| +MODULE_PARM_DESC(natevents, "send NAT Events");
 | ||
| +#endif
 | ||
| +
 | ||
|  static int hashsize;
 | ||
| -module_param(hashsize, int, 0400);
 | ||
| +module_param(hashsize, int, 0444);
 | ||
|  MODULE_PARM_DESC(hashsize, "hash table size");
 | ||
|  
 | ||
|  static int maxflows = 2000000;
 | ||
| -module_param(maxflows, int, 0600);
 | ||
| +module_param(maxflows, int, 0644);
 | ||
|  MODULE_PARM_DESC(maxflows, "maximum number of flows");
 | ||
|  static int peakflows = 0;
 | ||
|  static unsigned long peakflows_at;
 | ||
| @@ -121,22 +154,52 @@ MODULE_PARM_DESC(aggregation, "aggregation ruleset");
 | ||
|  
 | ||
|  static DEFINE_PER_CPU(struct ipt_netflow_stat, ipt_netflow_stat);
 | ||
|  static LIST_HEAD(usock_list);
 | ||
| -static DEFINE_RWLOCK(sock_lock);
 | ||
| +static DEFINE_MUTEX(sock_lock);
 | ||
|  
 | ||
| +#define LOCK_COUNT (1<<8)
 | ||
| +#define LOCK_COUNT_MASK (LOCK_COUNT-1)
 | ||
| +static spinlock_t htable_locks[LOCK_COUNT] = {
 | ||
| +	[0 ... LOCK_COUNT - 1] = __SPIN_LOCK_UNLOCKED(htable_locks)
 | ||
| +};
 | ||
| +static DEFINE_RWLOCK(htable_rwlock); /* global lock to protect htable_locks change */
 | ||
|  static unsigned int ipt_netflow_hash_rnd;
 | ||
| -struct hlist_head *ipt_netflow_hash __read_mostly; /* hash table memory */
 | ||
| +static struct hlist_head *ipt_netflow_hash __read_mostly; /* hash table memory */
 | ||
|  static unsigned int ipt_netflow_hash_size __read_mostly = 0; /* buckets */
 | ||
|  static LIST_HEAD(ipt_netflow_list); /* all flows */
 | ||
| +static DEFINE_SPINLOCK(hlist_lock); /* should almost always be locked w/o _bh */
 | ||
|  static LIST_HEAD(aggr_n_list);
 | ||
|  static LIST_HEAD(aggr_p_list);
 | ||
|  static DEFINE_RWLOCK(aggr_lock);
 | ||
| +#ifdef CONFIG_NF_NAT_NEEDED
 | ||
| +static LIST_HEAD(nat_list); /* nat events */
 | ||
| +static DEFINE_SPINLOCK(nat_lock);
 | ||
| +static unsigned long nat_events_start = 0;
 | ||
| +static unsigned long nat_events_stop = 0;
 | ||
| +#endif
 | ||
|  static struct kmem_cache *ipt_netflow_cachep __read_mostly; /* ipt_netflow memory */
 | ||
|  static atomic_t ipt_netflow_count = ATOMIC_INIT(0);
 | ||
| -static DEFINE_SPINLOCK(ipt_netflow_lock); /* hash table lock */
 | ||
|  
 | ||
| -static long long pdu_packets = 0, pdu_traf = 0;
 | ||
| -static struct netflow5_pdu pdu;
 | ||
| -static unsigned long pdu_ts_mod;
 | ||
| +static long long pdu_packets = 0, pdu_traf = 0; /* how much accounted traffic in pdu */
 | ||
| +static unsigned int pdu_count = 0;
 | ||
| +static unsigned int pdu_seq = 0;
 | ||
| +static unsigned int pdu_data_records = 0;
 | ||
| +static unsigned int pdu_tpl_records = 0;
 | ||
| +static unsigned long pdu_ts_mod; /* ts of last flow */
 | ||
| +static union {
 | ||
| +	struct netflow5_pdu v5;
 | ||
| +	struct netflow9_pdu v9;
 | ||
| +	struct ipfix_pdu ipfix;
 | ||
| +} pdu;
 | ||
| +static int engine_id = 0; /* Observation Domain */
 | ||
| +static __u8 *pdu_data_used;
 | ||
| +static __u8 *pdu_high_wm; /* high watermark */
 | ||
| +static unsigned int pdu_max_size; /* sizeof pdu */
 | ||
| +static struct flowset_data *pdu_flowset = NULL; /* current data flowset */
 | ||
| +
 | ||
| +static void (*netflow_export_flow)(struct ipt_netflow *nf);
 | ||
| +static void (*netflow_export_pdu)(void); /* called on timeout */
 | ||
| +static void netflow_switch_version(int ver);
 | ||
| +
 | ||
|  #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20)
 | ||
|  static void netflow_work_fn(void *work);
 | ||
|  static DECLARE_WORK(netflow_work, netflow_work_fn, NULL);
 | ||
| @@ -146,19 +209,26 @@ static DECLARE_DELAYED_WORK(netflow_work, netflow_work_fn);
 | ||
|  #endif
 | ||
|  static struct timer_list rate_timer;
 | ||
|  
 | ||
| +#define TCP_SYN_ACK 0x12
 | ||
|  #define TCP_FIN_RST 0x05
 | ||
|  
 | ||
|  static long long sec_prate = 0, sec_brate = 0;
 | ||
|  static long long min_prate = 0, min_brate = 0;
 | ||
|  static long long min5_prate = 0, min5_brate = 0;
 | ||
| -static unsigned int metric = 10, min15_metric = 10, min5_metric = 10, min_metric = 10; /* hash metrics */
 | ||
| +static unsigned int metric = 100, min15_metric = 100, min5_metric = 100, min_metric = 100; /* hash metrics */
 | ||
|  
 | ||
|  static int set_hashsize(int new_size);
 | ||
|  static void destination_removeall(void);
 | ||
|  static int add_destinations(char *ptr);
 | ||
|  static void aggregation_remove(struct list_head *list);
 | ||
|  static int add_aggregation(char *ptr);
 | ||
| -static void netflow_scan_and_export(int flush);
 | ||
| +static int netflow_scan_and_export(int flush);
 | ||
| +enum {
 | ||
| +	DONT_FLUSH, AND_FLUSH
 | ||
| +};
 | ||
| +static int template_ids = FLOWSET_DATA_FIRST;
 | ||
| +static int tpl_count = 0; /* how much active templates */
 | ||
| +
 | ||
|  
 | ||
|  static inline __be32 bits2mask(int bits) {
 | ||
|  	return (bits? 0xffffffff << (32 - bits) : 0);
 | ||
| @@ -175,28 +245,46 @@ static inline int mask2bits(__be32 mask) {
 | ||
|  /* under that lock worker is always stopped and not rescheduled,
 | ||
|   * and we can call worker sub-functions manually */
 | ||
|  static DEFINE_MUTEX(worker_lock);
 | ||
| -static inline void __start_scan_worker(void)
 | ||
| +#define MIN_DELAY 1
 | ||
| +#define MAX_DELAY (HZ / 10)
 | ||
| +static int worker_delay = HZ / 10;
 | ||
| +static inline void _schedule_scan_worker(const int status)
 | ||
|  {
 | ||
| -	schedule_delayed_work(&netflow_work, HZ / 10);
 | ||
| +	/* rudimentary congestion avoidance */
 | ||
| +	if (status > 0)
 | ||
| +		worker_delay -= status;
 | ||
| +	else if (status < 0)
 | ||
| +		worker_delay /= 2;
 | ||
| +	else
 | ||
| +		worker_delay++;
 | ||
| +	if (worker_delay < MIN_DELAY)
 | ||
| +		worker_delay = MIN_DELAY;
 | ||
| +	else if (worker_delay > MAX_DELAY)
 | ||
| +		worker_delay = MAX_DELAY;
 | ||
| +	schedule_delayed_work(&netflow_work, worker_delay);
 | ||
|  }
 | ||
|  
 | ||
| -static inline void start_scan_worker(void)
 | ||
| +/* This is only called soon after pause_scan_worker. */
 | ||
| +static inline void cont_scan_worker(void)
 | ||
|  {
 | ||
| -	__start_scan_worker();
 | ||
| +	_schedule_scan_worker(0);
 | ||
|  	mutex_unlock(&worker_lock);
 | ||
|  }
 | ||
|  
 | ||
| -/* we always stop scanner before write_lock(&sock_lock)
 | ||
| - * to let it never hold that spin lock */
 | ||
| -static inline void __stop_scan_worker(void)
 | ||
| +static inline void _unschedule_scan_worker(void)
 | ||
|  {
 | ||
| +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)
 | ||
| +	cancel_rearming_delayed_work(&netflow_work);
 | ||
| +#else
 | ||
|  	cancel_delayed_work_sync(&netflow_work);
 | ||
| +#endif
 | ||
|  }
 | ||
|  
 | ||
| -static inline void stop_scan_worker(void)
 | ||
| +/* This is only used for quick pause (in procctl). */
 | ||
| +static inline void pause_scan_worker(void)
 | ||
|  {
 | ||
|  	mutex_lock(&worker_lock);
 | ||
| -	__stop_scan_worker();
 | ||
| +	_unschedule_scan_worker();
 | ||
|  }
 | ||
|  
 | ||
|  #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24)
 | ||
| @@ -223,11 +311,14 @@ static int nf_seq_show(struct seq_file *seq, void *v)
 | ||
|  	int snum = 0;
 | ||
|  	int peak = (jiffies - peakflows_at) / HZ;
 | ||
|  
 | ||
| -	seq_printf(seq, "Flows: active %u (peak %u reached %ud%uh%um ago), mem %uK\n",
 | ||
| +	seq_printf(seq, "ipt_NETFLOW version " IPT_NETFLOW_VERSION ", srcversion %s\n",
 | ||
| +	    THIS_MODULE->srcversion);
 | ||
| +	seq_printf(seq, "Flows: active %u (peak %u reached %ud%uh%um ago), mem %uK, worker delay %d/%d.\n",
 | ||
|  		   nr_flows,
 | ||
|  		   peakflows,
 | ||
|  		   peak / (60 * 60 * 24), (peak / (60 * 60)) % 24, (peak / 60) % 60,
 | ||
| -		   (unsigned int)((nr_flows * sizeof(struct ipt_netflow)) >> 10));
 | ||
| +		   (unsigned int)((nr_flows * sizeof(struct ipt_netflow)) >> 10),
 | ||
| +		   worker_delay, HZ);
 | ||
|  
 | ||
|  	for_each_present_cpu(cpu) {
 | ||
|  		struct ipt_netflow_stat *st = &per_cpu(ipt_netflow_stat, cpu);
 | ||
| @@ -252,93 +343,123 @@ static int nf_seq_show(struct seq_file *seq, void *v)
 | ||
|  	}
 | ||
|  
 | ||
|  #define FFLOAT(x, prec) (int)(x) / prec, (int)(x) % prec
 | ||
| -	seq_printf(seq, "Hash: size %u (mem %uK), metric %d.%d, %d.%d, %d.%d, %d.%d. MemTraf: %llu pkt, %llu K (pdu %llu, %llu).\n",
 | ||
| -		   ipt_netflow_hash_size, 
 | ||
| -		   (unsigned int)((ipt_netflow_hash_size * sizeof(struct hlist_head)) >> 10),
 | ||
| -		   FFLOAT(metric, 10),
 | ||
| -		   FFLOAT(min_metric, 10),
 | ||
| -		   FFLOAT(min5_metric, 10),
 | ||
| -		   FFLOAT(min15_metric, 10),
 | ||
| -		   pkt_total - pkt_out + pdu_packets,
 | ||
| -		   (traf_total - traf_out + pdu_traf) >> 10,
 | ||
| -		   pdu_packets,
 | ||
| -		   pdu_traf);
 | ||
| -
 | ||
| -	seq_printf(seq, "Timeout: active %d, inactive %d. Maxflows %u\n",
 | ||
| -		   active_timeout,
 | ||
| -		   inactive_timeout,
 | ||
| -		   maxflows);
 | ||
| -
 | ||
| -	seq_printf(seq, "Rate: %llu bits/sec, %llu packets/sec; Avg 1 min: %llu bps, %llu pps; 5 min: %llu bps, %llu pps\n",
 | ||
| -		   sec_brate, sec_prate, min_brate, min_prate, min5_brate, min5_prate);
 | ||
| -
 | ||
| -	seq_printf(seq, "cpu#  stat: <search found new, trunc frag alloc maxflows>, sock: <ok fail cberr, bytes>, traffic: <pkt, bytes>, drop: <pkt, bytes>\n");
 | ||
| -
 | ||
| -	seq_printf(seq, "Total stat: %6llu %6llu %6llu, %4u %4u %4u %4u, sock: %6u %u %u, %llu K, traffic: %llu, %llu MB, drop: %llu, %llu K\n",
 | ||
| -		   (unsigned long long)searched,
 | ||
| -		   (unsigned long long)found,
 | ||
| -		   (unsigned long long)notfound,
 | ||
| -		   truncated, frags, alloc_err, maxflows_err,
 | ||
| -		   send_success, send_failed, sock_errors,
 | ||
| -		   (unsigned long long)exported_size >> 10,
 | ||
| -		   (unsigned long long)pkt_total, (unsigned long long)traf_total >> 20,
 | ||
| -		   (unsigned long long)pkt_drop, (unsigned long long)traf_drop >> 10);
 | ||
| +	seq_printf(seq, "Hash: size %u (mem %uK), metric %d.%02d [%d.%02d, %d.%02d, %d.%02d]."
 | ||
| +	    " MemTraf: %llu pkt, %llu K (pdu %llu, %llu), Out %llu pkt, %llu K.\n",
 | ||
| +	    ipt_netflow_hash_size,
 | ||
| +	    (unsigned int)((ipt_netflow_hash_size * sizeof(struct hlist_head)) >> 10),
 | ||
| +	    FFLOAT(metric, 100),
 | ||
| +	    FFLOAT(min_metric, 100),
 | ||
| +	    FFLOAT(min5_metric, 100),
 | ||
| +	    FFLOAT(min15_metric, 100),
 | ||
| +	    pkt_total - pkt_out + pdu_packets,
 | ||
| +	    (traf_total - traf_out + pdu_traf) >> 10,
 | ||
| +	    pdu_packets,
 | ||
| +	    pdu_traf,
 | ||
| +	    pkt_out,
 | ||
| +	    traf_out >> 10);
 | ||
| +
 | ||
| +	seq_printf(seq, "Rate: %llu bits/sec, %llu packets/sec;"
 | ||
| +	    " Avg 1 min: %llu bps, %llu pps; 5 min: %llu bps, %llu pps\n",
 | ||
| +	    sec_brate, sec_prate, min_brate, min_prate, min5_brate, min5_prate);
 | ||
| +
 | ||
| +	seq_printf(seq, "cpu#  stat: <search found new [metric], trunc frag alloc maxflows>,"
 | ||
| +	    " sock: <ok fail cberr, bytes>, traffic: <pkt, bytes>, drop: <pkt, bytes>\n");
 | ||
| +
 | ||
| +#define SAFEDIV(x,y) ((y)? ({ u64 __tmp = x; do_div(__tmp, y); (int)__tmp; }) : 0)
 | ||
| +	seq_printf(seq, "Total stat: %6llu %6llu %6llu [%d.%02d], %4u %4u %4u %4u,"
 | ||
| +	    " sock: %6u %u %u, %llu K, traffic: %llu, %llu MB, drop: %llu, %llu K\n",
 | ||
| +	    searched,
 | ||
| +	    (unsigned long long)found,
 | ||
| +	    (unsigned long long)notfound,
 | ||
| +	    FFLOAT(SAFEDIV(100LL * (searched + found + notfound), (found + notfound)), 100),
 | ||
| +	    truncated, frags, alloc_err, maxflows_err,
 | ||
| +	    send_success, send_failed, sock_errors,
 | ||
| +	    (unsigned long long)exported_size >> 10,
 | ||
| +	    (unsigned long long)pkt_total, (unsigned long long)traf_total >> 20,
 | ||
| +	    (unsigned long long)pkt_drop, (unsigned long long)traf_drop >> 10);
 | ||
|  
 | ||
|  	if (num_present_cpus() > 1) {
 | ||
|  		for_each_present_cpu(cpu) {
 | ||
|  			struct ipt_netflow_stat *st;
 | ||
|  
 | ||
|  			st = &per_cpu(ipt_netflow_stat, cpu);
 | ||
| -			seq_printf(seq, "cpu%u  stat: %6llu %6llu %6llu, %4u %4u %4u %4u, sock: %6u %u %u, %llu K, traffic: %llu, %llu MB, drop: %llu, %llu K\n",
 | ||
| -				   cpu,
 | ||
| -				   (unsigned long long)st->searched,
 | ||
| -				   (unsigned long long)st->found,
 | ||
| -				   (unsigned long long)st->notfound,
 | ||
| -				   st->truncated, st->frags, st->alloc_err, st->maxflows_err,
 | ||
| -				   st->send_success, st->send_failed, st->sock_errors,
 | ||
| -				   (unsigned long long)st->exported_size >> 10,
 | ||
| -				   (unsigned long long)st->pkt_total, (unsigned long long)st->traf_total >> 20,
 | ||
| -				   (unsigned long long)st->pkt_drop, (unsigned long long)st->traf_drop >> 10);
 | ||
| +			seq_printf(seq, "cpu%u  stat: %6llu %6llu %6llu [%d.%02d], %4u %4u %4u %4u,"
 | ||
| +			    " sock: %6u %u %u, %llu K, traffic: %llu, %llu MB, drop: %llu, %llu K\n",
 | ||
| +			    cpu,
 | ||
| +			    (unsigned long long)st->searched,
 | ||
| +			    (unsigned long long)st->found,
 | ||
| +			    (unsigned long long)st->notfound,
 | ||
| +			    FFLOAT(SAFEDIV(100LL * (st->searched + st->found + st->notfound), (st->found + st->notfound)), 100),
 | ||
| +			    st->truncated, st->frags, st->alloc_err, st->maxflows_err,
 | ||
| +			    st->send_success, st->send_failed, st->sock_errors,
 | ||
| +			    (unsigned long long)st->exported_size >> 10,
 | ||
| +			    (unsigned long long)st->pkt_total, (unsigned long long)st->traf_total >> 20,
 | ||
| +			    (unsigned long long)st->pkt_drop, (unsigned long long)st->traf_drop >> 10);
 | ||
|  		}
 | ||
|  	}
 | ||
|  
 | ||
| -	read_lock(&sock_lock);
 | ||
| +	seq_printf(seq, "Protocol version %d", protocol);
 | ||
| +	if (protocol == 10)
 | ||
| +		seq_printf(seq, " (ipfix)");
 | ||
| +	else
 | ||
| +		seq_printf(seq, " (netflow)");
 | ||
| +	if (protocol >= 9)
 | ||
| +		seq_printf(seq, ", refresh-rate %u, timeout-rate %u, (templates %d, active %d)",
 | ||
| +		    refresh_rate, timeout_rate, template_ids - FLOWSET_DATA_FIRST, tpl_count);
 | ||
| +
 | ||
| +	seq_printf(seq, ". Timeouts: active %d, inactive %d. Maxflows %u\n",
 | ||
| +	    active_timeout,
 | ||
| +	    inactive_timeout,
 | ||
| +	    maxflows);
 | ||
| +
 | ||
| +#ifdef CONFIG_NF_NAT_NEEDED
 | ||
| +	seq_printf(seq, "Natevents %s, count start %lu, stop %lu.\n", natevents? "enabled" : "disabled",
 | ||
| +	    nat_events_start, nat_events_stop);
 | ||
| +#endif
 | ||
| +
 | ||
| +	mutex_lock(&sock_lock);
 | ||
|  	list_for_each_entry(usock, &usock_list, list) {
 | ||
| -		struct sock *sk = usock->sock->sk;
 | ||
| -
 | ||
| -		seq_printf(seq, "sock%d: %u.%u.%u.%u:%u, sndbuf %u, filled %u, peak %u; err: sndbuf reached %u, other %u\n",
 | ||
| -			   snum,
 | ||
| -			   usock->ipaddr >> 24,
 | ||
| -			   (usock->ipaddr >> 16) & 255,
 | ||
| -			   (usock->ipaddr >> 8) & 255,
 | ||
| -			   usock->ipaddr & 255,
 | ||
| -			   usock->port,
 | ||
| -			   sk->sk_sndbuf,
 | ||
| -			   atomic_read(&sk->sk_wmem_alloc),
 | ||
| -			   atomic_read(&usock->wmem_peak),
 | ||
| -			   atomic_read(&usock->err_full),
 | ||
| -			   atomic_read(&usock->err_other));
 | ||
| +		seq_printf(seq, "sock%d: %u.%u.%u.%u:%u",
 | ||
| +		    snum,
 | ||
| +		    HIPQUAD(usock->ipaddr),
 | ||
| +		    usock->port);
 | ||
| +		if (usock->sock) {
 | ||
| +			struct sock *sk = usock->sock->sk;
 | ||
| +
 | ||
| +			seq_printf(seq, ", sndbuf %u, filled %u, peak %u;"
 | ||
| +			    " err: sndbuf reached %u, connect %u, other %u\n",
 | ||
| +			    sk->sk_sndbuf,
 | ||
| +			    atomic_read(&sk->sk_wmem_alloc),
 | ||
| +			    atomic_read(&usock->wmem_peak),
 | ||
| +			    atomic_read(&usock->err_full),
 | ||
| +			    atomic_read(&usock->err_connect),
 | ||
| +			    atomic_read(&usock->err_other));
 | ||
| +		} else
 | ||
| +			seq_printf(seq, " unconnected (%u attempts).\n",
 | ||
| +			    atomic_read(&usock->err_connect));
 | ||
|  		snum++;
 | ||
|  	}
 | ||
| -	read_unlock(&sock_lock);
 | ||
| +	mutex_unlock(&sock_lock);
 | ||
|  
 | ||
|  	read_lock_bh(&aggr_lock);
 | ||
|  	snum = 0;
 | ||
|  	list_for_each_entry(aggr_n, &aggr_n_list, list) {
 | ||
| -		seq_printf(seq, "aggr#%d net: match %u.%u.%u.%u/%d strip %d\n",
 | ||
| -			   snum,
 | ||
| -			   HIPQUAD(aggr_n->addr),
 | ||
| -			   mask2bits(aggr_n->mask),
 | ||
| -			   mask2bits(aggr_n->aggr_mask));
 | ||
| +		seq_printf(seq, "aggr#%d net: match %u.%u.%u.%u/%d strip %d (usage %u)\n",
 | ||
| +		    snum,
 | ||
| +		    HIPQUAD(aggr_n->addr),
 | ||
| +		    mask2bits(aggr_n->mask),
 | ||
| +		    mask2bits(aggr_n->aggr_mask),
 | ||
| +		    atomic_read(&aggr_n->usage));
 | ||
|  		snum++;
 | ||
|  	}
 | ||
|  	snum = 0;
 | ||
|  	list_for_each_entry(aggr_p, &aggr_p_list, list) {
 | ||
| -		seq_printf(seq, "aggr#%d port: ports %u-%u replace %u\n",
 | ||
| -			   snum,
 | ||
| -			   aggr_p->port1,
 | ||
| -			   aggr_p->port2,
 | ||
| -			   aggr_p->aggr_port);
 | ||
| +		seq_printf(seq, "aggr#%d port: ports %u-%u replace %u (usage %u)\n",
 | ||
| +		    snum,
 | ||
| +		    aggr_p->port1,
 | ||
| +		    aggr_p->port2,
 | ||
| +		    aggr_p->aggr_port,
 | ||
| +		    atomic_read(&aggr_p->usage));
 | ||
|  		snum++;
 | ||
|  	}
 | ||
|  	read_unlock_bh(&aggr_lock);
 | ||
| @@ -367,8 +488,13 @@ static struct file_operations nf_seq_fops = {
 | ||
|  #define BEFORE2632(x,y)
 | ||
|  #endif
 | ||
|  
 | ||
| +/* PAX need to know that we are allowed to write */
 | ||
| +#ifndef CONSTIFY_PLUGIN
 | ||
| +#define ctl_table_no_const ctl_table
 | ||
| +#endif
 | ||
| +
 | ||
|  /* sysctl /proc/sys/net/netflow */
 | ||
| -static int hsize_procctl(ctl_table *ctl, int write, BEFORE2632(struct file *filp,)
 | ||
| +static int hsize_procctl(ctl_table_no_const *ctl, int write, BEFORE2632(struct file *filp,)
 | ||
|  			 void __user *buffer, size_t *lenp, loff_t *fpos)
 | ||
|  {
 | ||
|  	void *orig = ctl->data;
 | ||
| @@ -386,20 +512,21 @@ static int hsize_procctl(ctl_table *ctl, int write, BEFORE2632(struct file *filp
 | ||
|  		return ret;
 | ||
|  }
 | ||
|  
 | ||
| -static int sndbuf_procctl(ctl_table *ctl, int write, BEFORE2632(struct file *filp,)
 | ||
| +static int sndbuf_procctl(ctl_table_no_const *ctl, int write, BEFORE2632(struct file *filp,)
 | ||
|  			 void __user *buffer, size_t *lenp, loff_t *fpos)
 | ||
|  {
 | ||
|  	int ret;
 | ||
|  	struct ipt_netflow_sock *usock;
 | ||
| -       
 | ||
| -	read_lock(&sock_lock);
 | ||
| +
 | ||
| +	mutex_lock(&sock_lock);
 | ||
|  	if (list_empty(&usock_list)) {
 | ||
| -		read_unlock(&sock_lock);
 | ||
| +		mutex_unlock(&sock_lock);
 | ||
|  		return -ENOENT;
 | ||
|  	}
 | ||
|  	usock = list_first_entry(&usock_list, struct ipt_netflow_sock, list);
 | ||
| -	sndbuf = usock->sock->sk->sk_sndbuf;
 | ||
| -	read_unlock(&sock_lock);
 | ||
| +	if (usock->sock)
 | ||
| +		sndbuf = usock->sock->sk->sk_sndbuf;
 | ||
| +	mutex_unlock(&sock_lock);
 | ||
|  
 | ||
|  	ctl->data = &sndbuf;
 | ||
|  	ret = proc_dointvec(ctl, write, BEFORE2632(filp,) buffer, lenp, fpos);
 | ||
| @@ -407,13 +534,14 @@ static int sndbuf_procctl(ctl_table *ctl, int write, BEFORE2632(struct file *fil
 | ||
|  		return ret;
 | ||
|  	if (sndbuf < SOCK_MIN_SNDBUF)
 | ||
|  		sndbuf = SOCK_MIN_SNDBUF;
 | ||
| -	stop_scan_worker();
 | ||
| -	write_lock(&sock_lock);
 | ||
| +	pause_scan_worker();
 | ||
| +	mutex_lock(&sock_lock);
 | ||
|  	list_for_each_entry(usock, &usock_list, list) {
 | ||
| -		usock->sock->sk->sk_sndbuf = sndbuf;
 | ||
| +		if (usock->sock)
 | ||
| +			usock->sock->sk->sk_sndbuf = sndbuf;
 | ||
|  	}
 | ||
| -	write_unlock(&sock_lock);
 | ||
| -	start_scan_worker();
 | ||
| +	mutex_unlock(&sock_lock);
 | ||
| +	cont_scan_worker();
 | ||
|  	return ret;
 | ||
|  }
 | ||
|  
 | ||
| @@ -424,10 +552,10 @@ static int destination_procctl(ctl_table *ctl, int write, BEFORE2632(struct file
 | ||
|  
 | ||
|  	ret = proc_dostring(ctl, write, BEFORE2632(filp,) buffer, lenp, fpos);
 | ||
|  	if (ret >= 0 && write) {
 | ||
| -		stop_scan_worker();
 | ||
| +		pause_scan_worker();
 | ||
|  		destination_removeall();
 | ||
|  		add_destinations(destination_buf);
 | ||
| -		start_scan_worker();
 | ||
| +		cont_scan_worker();
 | ||
|  	}
 | ||
|  	return ret;
 | ||
|  }
 | ||
| @@ -446,13 +574,12 @@ static int aggregation_procctl(ctl_table *ctl, int write, BEFORE2632(struct file
 | ||
|  	return ret;
 | ||
|  }
 | ||
|  
 | ||
| -static int flush_procctl(ctl_table *ctl, int write, BEFORE2632(struct file *filp,)
 | ||
| +static int flush_procctl(ctl_table_no_const *ctl, int write, BEFORE2632(struct file *filp,)
 | ||
|  			 void __user *buffer, size_t *lenp, loff_t *fpos)
 | ||
|  {
 | ||
|  	int ret;
 | ||
| -	int val;
 | ||
| +	int val = 0;
 | ||
|  
 | ||
| -	val = 0;
 | ||
|  	ctl->data = &val;
 | ||
|  	ret = proc_dointvec(ctl, write, BEFORE2632(filp,) buffer, lenp, fpos);
 | ||
|  
 | ||
| @@ -461,14 +588,67 @@ static int flush_procctl(ctl_table *ctl, int write, BEFORE2632(struct file *filp
 | ||
|  
 | ||
|  	if (val > 0) {
 | ||
|  		printk(KERN_INFO "ipt_NETFLOW: forced flush\n");
 | ||
| -		stop_scan_worker();
 | ||
| -		netflow_scan_and_export(1);
 | ||
| -		start_scan_worker();
 | ||
| +		pause_scan_worker();
 | ||
| +		netflow_scan_and_export(AND_FLUSH);
 | ||
| +		cont_scan_worker();
 | ||
| +	}
 | ||
| +
 | ||
| +	return ret;
 | ||
| +}
 | ||
| +
 | ||
| +static int protocol_procctl(ctl_table *ctl, int write, BEFORE2632(struct file *filp,)
 | ||
| +			 void __user *buffer, size_t *lenp, loff_t *fpos)
 | ||
| +{
 | ||
| +	int ret;
 | ||
| +	int ver = protocol;
 | ||
| +
 | ||
| +	ctl->data = &ver;
 | ||
| +	ret = proc_dointvec(ctl, write, BEFORE2632(filp,) buffer, lenp, fpos);
 | ||
| +
 | ||
| +	if (!write)
 | ||
| +		return ret;
 | ||
| +
 | ||
| +	switch (ver) {
 | ||
| +		case 5:
 | ||
| +		case 9:
 | ||
| +		case 10:
 | ||
| +			printk(KERN_INFO "ipt_NETFLOW: forced flush (protocol version change)\n");
 | ||
| +			pause_scan_worker();
 | ||
| +			netflow_scan_and_export(AND_FLUSH);
 | ||
| +			netflow_switch_version(ver);
 | ||
| +			cont_scan_worker();
 | ||
| +			break;
 | ||
| +		default:
 | ||
| +			return -EPERM;
 | ||
|  	}
 | ||
|  
 | ||
|  	return ret;
 | ||
|  }
 | ||
|  
 | ||
| +#ifdef CONFIG_NF_NAT_NEEDED
 | ||
| +static void register_ct_events(void);
 | ||
| +static void unregister_ct_events(void);
 | ||
| +static int natevents_procctl(ctl_table *ctl, int write, BEFORE2632(struct file *filp,)
 | ||
| +			 void __user *buffer, size_t *lenp, loff_t *fpos)
 | ||
| +{
 | ||
| +	int ret;
 | ||
| +	int val = natevents;
 | ||
| +
 | ||
| +	ctl->data = &val;
 | ||
| +	ret = proc_dointvec(ctl, write, BEFORE2632(filp,) buffer, lenp, fpos);
 | ||
| +
 | ||
| +	if (!write)
 | ||
| +		return ret;
 | ||
| +
 | ||
| +	if (natevents && !val)
 | ||
| +		unregister_ct_events();
 | ||
| +	else if (!natevents && val)
 | ||
| +		register_ct_events();
 | ||
| +
 | ||
| +	return ret;
 | ||
| +}
 | ||
| +#endif
 | ||
| +
 | ||
|  static struct ctl_table_header *netflow_sysctl_header;
 | ||
|  
 | ||
|  #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20)
 | ||
| @@ -547,6 +727,38 @@ static struct ctl_table netflow_sysctl_table[] = {
 | ||
|  		.maxlen		= sizeof(int),
 | ||
|  		.proc_handler	= &flush_procctl,
 | ||
|  	},
 | ||
| +	{
 | ||
| +		_CTL_NAME(10)
 | ||
| +		.procname	= "protocol",
 | ||
| +		.mode		= 0644,
 | ||
| +		.maxlen		= sizeof(int),
 | ||
| +		.proc_handler	= &protocol_procctl,
 | ||
| +	},
 | ||
| +	{
 | ||
| +		_CTL_NAME(11)
 | ||
| +		.procname	= "refresh-rate",
 | ||
| +		.mode		= 0644,
 | ||
| +		.data		= &refresh_rate,
 | ||
| +		.maxlen		= sizeof(int),
 | ||
| +		.proc_handler	= &proc_dointvec,
 | ||
| +	},
 | ||
| +	{
 | ||
| +		_CTL_NAME(12)
 | ||
| +		.procname	= "timeout-rate",
 | ||
| +		.mode		= 0644,
 | ||
| +		.data		= &timeout_rate,
 | ||
| +		.maxlen		= sizeof(int),
 | ||
| +		.proc_handler	= &proc_dointvec,
 | ||
| +	},
 | ||
| +#ifdef CONFIG_NF_NAT_NEEDED
 | ||
| +	{
 | ||
| +		_CTL_NAME(13)
 | ||
| +		.procname	= "natevents",
 | ||
| +		.mode		= 0644,
 | ||
| +		.maxlen		= sizeof(int),
 | ||
| +		.proc_handler	= &natevents_procctl,
 | ||
| +	},
 | ||
| +#endif
 | ||
|  	{ }
 | ||
|  };
 | ||
|  
 | ||
| @@ -588,18 +800,69 @@ static struct ctl_path netflow_sysctl_path[] = {
 | ||
|  static void sk_error_report(struct sock *sk)
 | ||
|  {
 | ||
|  	/* clear connection refused errors if any */
 | ||
| -	write_lock_bh(&sk->sk_callback_lock);
 | ||
|  	if (debug > 1)
 | ||
| -		printk(KERN_INFO "NETFLOW: socket error <%d>\n", sk->sk_err);
 | ||
| +		printk(KERN_INFO "ipt_NETFLOW: socket error <%d>\n", sk->sk_err);
 | ||
|  	sk->sk_err = 0;
 | ||
|  	NETFLOW_STAT_INC(sock_errors);
 | ||
| -	write_unlock_bh(&sk->sk_callback_lock);
 | ||
|  	return;
 | ||
|  }
 | ||
|  
 | ||
| +static struct socket *_usock_alloc(const __be32 ipaddr, const unsigned short port)
 | ||
| +{
 | ||
| +	struct sockaddr_in sin;
 | ||
| +	struct socket *sock;
 | ||
| +	int error;
 | ||
| +
 | ||
| +	if ((error = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock)) < 0) {
 | ||
| +		printk(KERN_ERR "ipt_NETFLOW: sock_create_kern error %d\n", -error);
 | ||
| +		return NULL;
 | ||
| +	}
 | ||
| +	sock->sk->sk_allocation = GFP_ATOMIC;
 | ||
| +	sock->sk->sk_prot->unhash(sock->sk); /* hidden from input */
 | ||
| +	sock->sk->sk_error_report = &sk_error_report; /* clear ECONNREFUSED */
 | ||
| +	if (sndbuf)
 | ||
| +		sock->sk->sk_sndbuf = sndbuf;
 | ||
| +	else
 | ||
| +		sndbuf = sock->sk->sk_sndbuf;
 | ||
| +	memset(&sin, 0, sizeof(sin));
 | ||
| +	sin.sin_family      = AF_INET;
 | ||
| +	sin.sin_addr.s_addr = htonl(ipaddr);
 | ||
| +	sin.sin_port        = htons(port);
 | ||
| +	if ((error = sock->ops->connect(sock, (struct sockaddr *)&sin,
 | ||
| +				  sizeof(sin), 0)) < 0) {
 | ||
| +		printk(KERN_ERR "ipt_NETFLOW: error connecting UDP socket %d,"
 | ||
| +		    " don't worry, will try reconnect later.\n", -error);
 | ||
| +		/* ENETUNREACH when no interfaces */
 | ||
| +		sock_release(sock);
 | ||
| +		return NULL;
 | ||
| +	}
 | ||
| +	return sock;
 | ||
| +}
 | ||
| +
 | ||
| +static void usock_connect(struct ipt_netflow_sock *usock, const int sendmsg)
 | ||
| +{
 | ||
| +	usock->sock = _usock_alloc(usock->ipaddr, usock->port);
 | ||
| +	if (usock->sock) {
 | ||
| +		if (sendmsg || debug)
 | ||
| +			printk(KERN_INFO "ipt_NETFLOW: connected %u.%u.%u.%u:%u\n",
 | ||
| +			    HIPQUAD(usock->ipaddr),
 | ||
| +			    usock->port);
 | ||
| +	} else {
 | ||
| +		atomic_inc(&usock->err_connect);
 | ||
| +		if (debug)
 | ||
| +			printk(KERN_INFO "ipt_NETFLOW: connect to %u.%u.%u.%u:%u failed%s.\n",
 | ||
| +			    HIPQUAD(usock->ipaddr),
 | ||
| +			    usock->port,
 | ||
| +			    (sendmsg)? " (pdu lost)" : "");
 | ||
| +	}
 | ||
| +	atomic_set(&usock->wmem_peak, 0);
 | ||
| +	atomic_set(&usock->err_full, 0);
 | ||
| +	atomic_set(&usock->err_other, 0);
 | ||
| +}
 | ||
| +
 | ||
|  // return numbers of sends succeded, 0 if none
 | ||
|  /* only called in scan worker path */
 | ||
| -static int netflow_send_pdu(void *buffer, int len)
 | ||
| +static void netflow_sendmsg(void *buffer, const int len)
 | ||
|  {
 | ||
|  	struct msghdr msg = { .msg_flags = MSG_DONTWAIT|MSG_NOSIGNAL };
 | ||
|  	struct kvec iov = { buffer, len };
 | ||
| @@ -607,9 +870,16 @@ static int netflow_send_pdu(void *buffer, int len)
 | ||
|  	int snum = 0;
 | ||
|  	struct ipt_netflow_sock *usock;
 | ||
|  
 | ||
| +	mutex_lock(&sock_lock);
 | ||
|  	list_for_each_entry(usock, &usock_list, list) {
 | ||
| +		if (!usock->sock)
 | ||
| +			usock_connect(usock, 1);
 | ||
| +		if (!usock->sock) {
 | ||
| +			NETFLOW_STAT_INC_ATOMIC(send_failed);
 | ||
| +			continue;
 | ||
| +		}
 | ||
|  		if (debug)
 | ||
| -			printk(KERN_INFO "netflow_send_pdu: sendmsg(%d, %d) [%u %u]\n",
 | ||
| +			printk(KERN_INFO "netflow_sendmsg: sendmsg(%d, %d) [%u %u]\n",
 | ||
|  			       snum,
 | ||
|  			       len,
 | ||
|  			       atomic_read(&usock->sock->sk->sk_wmem_alloc),
 | ||
| @@ -624,7 +894,7 @@ static int netflow_send_pdu(void *buffer, int len)
 | ||
|  				suggestion = ": increase sndbuf!";
 | ||
|  			} else
 | ||
|  				atomic_inc(&usock->err_other);
 | ||
| -			printk(KERN_ERR "netflow_send_pdu[%d]: sendmsg error %d: data loss %llu pkt, %llu bytes%s\n",
 | ||
| +			printk(KERN_ERR "ipt_NETFLOW: sendmsg[%d] error %d: data loss %llu pkt, %llu bytes%s\n",
 | ||
|  			       snum, ret, pdu_packets, pdu_traf, suggestion);
 | ||
|  		} else {
 | ||
|  			unsigned int wmem = atomic_read(&usock->sock->sk->sk_wmem_alloc);
 | ||
| @@ -636,98 +906,67 @@ static int netflow_send_pdu(void *buffer, int len)
 | ||
|  		}
 | ||
|  		snum++;
 | ||
|  	}
 | ||
| -	return retok;
 | ||
| +	mutex_unlock(&sock_lock);
 | ||
| +	if (retok == 0) {
 | ||
| +		/* not least one send succeded, account stat for dropped packets */
 | ||
| +		NETFLOW_STAT_ADD_ATOMIC(pkt_drop, pdu_packets);
 | ||
| +		NETFLOW_STAT_ADD_ATOMIC(traf_drop, pdu_traf);
 | ||
| +	}
 | ||
|  }
 | ||
|  
 | ||
| -static void usock_free(struct ipt_netflow_sock *usock)
 | ||
| +static void usock_close_free(struct ipt_netflow_sock *usock)
 | ||
|  {
 | ||
| -	printk(KERN_INFO "netflow: remove destination %u.%u.%u.%u:%u (%p)\n",
 | ||
| +	printk(KERN_INFO "ipt_NETFLOW: removed destination %u.%u.%u.%u:%u\n",
 | ||
|  	       HIPQUAD(usock->ipaddr),
 | ||
| -	       usock->port,
 | ||
| -	       usock->sock);
 | ||
| +	       usock->port);
 | ||
|  	if (usock->sock)
 | ||
|  		sock_release(usock->sock);
 | ||
|  	usock->sock = NULL;
 | ||
| -	vfree(usock); 
 | ||
| +	vfree(usock);
 | ||
|  }
 | ||
|  
 | ||
|  static void destination_removeall(void)
 | ||
|  {
 | ||
| -	write_lock(&sock_lock);
 | ||
| +	mutex_lock(&sock_lock);
 | ||
|  	while (!list_empty(&usock_list)) {
 | ||
|  		struct ipt_netflow_sock *usock;
 | ||
|  
 | ||
|  		usock = list_entry(usock_list.next, struct ipt_netflow_sock, list);
 | ||
|  		list_del(&usock->list);
 | ||
| -		write_unlock(&sock_lock);
 | ||
| -		usock_free(usock);
 | ||
| -		write_lock(&sock_lock);
 | ||
| +		mutex_unlock(&sock_lock);
 | ||
| +		usock_close_free(usock);
 | ||
| +		mutex_lock(&sock_lock);
 | ||
|  	}
 | ||
| -	write_unlock(&sock_lock);
 | ||
| +	mutex_unlock(&sock_lock);
 | ||
|  }
 | ||
|  
 | ||
|  static void add_usock(struct ipt_netflow_sock *usock)
 | ||
|  {
 | ||
|  	struct ipt_netflow_sock *sk;
 | ||
|  
 | ||
| -	/* don't need empty sockets */
 | ||
| -	if (!usock->sock) {
 | ||
| -		usock_free(usock);
 | ||
| -		return;
 | ||
| -	}
 | ||
| -
 | ||
| -	write_lock(&sock_lock);
 | ||
| +	mutex_lock(&sock_lock);
 | ||
|  	/* don't need duplicated sockets */
 | ||
|  	list_for_each_entry(sk, &usock_list, list) {
 | ||
|  		if (sk->ipaddr == usock->ipaddr &&
 | ||
|  		    sk->port == usock->port) {
 | ||
| -			write_unlock(&sock_lock);
 | ||
| -			usock_free(usock);
 | ||
| +			mutex_unlock(&sock_lock);
 | ||
| +			usock_close_free(usock);
 | ||
|  			return;
 | ||
|  		}
 | ||
|  	}
 | ||
|  	list_add_tail(&usock->list, &usock_list);
 | ||
| -	printk(KERN_INFO "netflow: added destination %u.%u.%u.%u:%u\n",
 | ||
| +	printk(KERN_INFO "ipt_NETFLOW: added destination %u.%u.%u.%u:%u%s\n",
 | ||
|  	       HIPQUAD(usock->ipaddr),
 | ||
| -	       usock->port);
 | ||
| -	write_unlock(&sock_lock);
 | ||
| -}
 | ||
| -
 | ||
| -static struct socket *usock_alloc(__be32 ipaddr, unsigned short port)
 | ||
| -{
 | ||
| -	struct sockaddr_in sin;
 | ||
| -	struct socket *sock;
 | ||
| -	int error;
 | ||
| -
 | ||
| -	if ((error = sock_create_kern(PF_INET, SOCK_DGRAM, IPPROTO_UDP, &sock)) < 0) {
 | ||
| -		printk(KERN_ERR "netflow: sock_create_kern error %d\n", error);
 | ||
| -		return NULL;
 | ||
| -	}
 | ||
| -	sock->sk->sk_allocation = GFP_ATOMIC;
 | ||
| -	sock->sk->sk_prot->unhash(sock->sk); /* hidden from input */
 | ||
| -	sock->sk->sk_error_report = &sk_error_report; /* clear ECONNREFUSED */
 | ||
| -	if (sndbuf)
 | ||
| -		sock->sk->sk_sndbuf = sndbuf;
 | ||
| -	else
 | ||
| -		sndbuf = sock->sk->sk_sndbuf;
 | ||
| -	memset(&sin, 0, sizeof(sin));
 | ||
| -	sin.sin_family      = AF_INET;
 | ||
| -	sin.sin_addr.s_addr = htonl(ipaddr);
 | ||
| -	sin.sin_port        = htons(port);
 | ||
| -	if ((error = sock->ops->connect(sock, (struct sockaddr *)&sin,
 | ||
| -				  sizeof(sin), 0)) < 0) {
 | ||
| -		printk(KERN_ERR "netflow: error connecting UDP socket %d\n", error);
 | ||
| -		sock_release(sock);
 | ||
| -		return NULL;
 | ||
| -	}
 | ||
| -	return sock;
 | ||
| +	       usock->port,
 | ||
| +	       (!usock->sock)? " (unconnected)" : "");
 | ||
| +	mutex_unlock(&sock_lock);
 | ||
|  }
 | ||
|  
 | ||
|  #define SEPARATORS " ,;\t\n"
 | ||
|  static int add_destinations(char *ptr)
 | ||
|  {
 | ||
|  	while (ptr) {
 | ||
| -		unsigned char ip[4]; 
 | ||
| +		unsigned char ip[4];
 | ||
|  		unsigned short port;
 | ||
|  
 | ||
|  		ptr += strspn(ptr, SEPARATORS);
 | ||
| @@ -737,17 +976,15 @@ static int add_destinations(char *ptr)
 | ||
|  			struct ipt_netflow_sock *usock;
 | ||
|  
 | ||
|  			if (!(usock = vmalloc(sizeof(*usock)))) {
 | ||
| -				printk(KERN_ERR "netflow: can't vmalloc socket\n");
 | ||
| +				printk(KERN_ERR "ipt_NETFLOW: can't vmalloc socket\n");
 | ||
|  				return -ENOMEM;
 | ||
|  			}
 | ||
|  
 | ||
|  			memset(usock, 0, sizeof(*usock));
 | ||
| +			atomic_set(&usock->err_connect, 0);
 | ||
|  			usock->ipaddr = ntohl(*(__be32 *)ip);
 | ||
|  			usock->port = port;
 | ||
| -			usock->sock = usock_alloc(usock->ipaddr, port);
 | ||
| -			atomic_set(&usock->wmem_peak, 0);
 | ||
| -			atomic_set(&usock->err_full, 0);
 | ||
| -			atomic_set(&usock->err_other, 0);
 | ||
| +			usock_connect(usock, 0);
 | ||
|  			add_usock(usock);
 | ||
|  		} else
 | ||
|  			break;
 | ||
| @@ -781,7 +1018,7 @@ static int add_aggregation(char *ptr)
 | ||
|  	LIST_HEAD(old_aggr_list);
 | ||
|  
 | ||
|  	while (ptr && *ptr) {
 | ||
| -		unsigned char ip[4]; 
 | ||
| +		unsigned char ip[4];
 | ||
|  		unsigned int mask;
 | ||
|  		unsigned int port1, port2;
 | ||
|  		unsigned int aggr_to;
 | ||
| @@ -792,16 +1029,16 @@ static int add_aggregation(char *ptr)
 | ||
|  			   ip, ip + 1, ip + 2, ip + 3, &mask, &aggr_to) == 6) {
 | ||
|  
 | ||
|  			if (!(aggr_n = vmalloc(sizeof(*aggr_n)))) {
 | ||
| -				printk(KERN_ERR "netflow: can't vmalloc aggr\n");
 | ||
| +				printk(KERN_ERR "ipt_NETFLOW: can't vmalloc aggr\n");
 | ||
|  				return -ENOMEM;
 | ||
|  			}
 | ||
|  			memset(aggr_n, 0, sizeof(*aggr_n));
 | ||
|  
 | ||
| -			aggr_n->addr = ntohl(*(__be32 *)ip);
 | ||
|  			aggr_n->mask = bits2mask(mask);
 | ||
| +			aggr_n->addr = ntohl(*(__be32 *)ip) & aggr_n->mask;
 | ||
|  			aggr_n->aggr_mask = bits2mask(aggr_to);
 | ||
|  			aggr_n->prefix = mask;
 | ||
| -			printk(KERN_INFO "netflow: add aggregation [%u.%u.%u.%u/%u=%u]\n",
 | ||
| +			printk(KERN_INFO "ipt_NETFLOW: add aggregation [%u.%u.%u.%u/%u=%u]\n",
 | ||
|  			       HIPQUAD(aggr_n->addr), mask, aggr_to);
 | ||
|  			list_add_tail(&aggr_n->list, &new_aggr_n_list);
 | ||
|  
 | ||
| @@ -809,7 +1046,7 @@ static int add_aggregation(char *ptr)
 | ||
|  			   sscanf(ptr, "%u=%u", &port2, &aggr_to) == 2) {
 | ||
|  
 | ||
|  			if (!(aggr_p = vmalloc(sizeof(*aggr_p)))) {
 | ||
| -				printk(KERN_ERR "netflow: can't vmalloc aggr\n");
 | ||
| +				printk(KERN_ERR "ipt_NETFLOW: can't vmalloc aggr\n");
 | ||
|  				return -ENOMEM;
 | ||
|  			}
 | ||
|  			memset(aggr_p, 0, sizeof(*aggr_p));
 | ||
| @@ -817,11 +1054,11 @@ static int add_aggregation(char *ptr)
 | ||
|  			aggr_p->port1 = port1;
 | ||
|  			aggr_p->port2 = port2;
 | ||
|  			aggr_p->aggr_port = aggr_to;
 | ||
| -			printk(KERN_INFO "netflow: add aggregation [%u-%u=%u]\n",
 | ||
| +			printk(KERN_INFO "ipt_NETFLOW: add aggregation [%u-%u=%u]\n",
 | ||
|  			       port1, port2, aggr_to);
 | ||
|  			list_add_tail(&aggr_p->list, &new_aggr_p_list);
 | ||
|  		} else {
 | ||
| -			printk(KERN_ERR "netflow: bad aggregation rule: %s (ignoring)\n", ptr);
 | ||
| +			printk(KERN_ERR "ipt_NETFLOW: bad aggregation rule: %s (ignoring)\n", ptr);
 | ||
|  			break;
 | ||
|  		}
 | ||
|  
 | ||
| @@ -846,17 +1083,23 @@ static int add_aggregation(char *ptr)
 | ||
|  
 | ||
|  static inline u_int32_t hash_netflow(const struct ipt_netflow_tuple *tuple)
 | ||
|  {
 | ||
| -	/* tuple is rounded to u32s */
 | ||
| -	return jhash2((u32 *)tuple, NETFLOW_TUPLE_SIZE, ipt_netflow_hash_rnd) % ipt_netflow_hash_size;
 | ||
| +	return murmur3(tuple, sizeof(struct ipt_netflow_tuple), ipt_netflow_hash_rnd) % ipt_netflow_hash_size;
 | ||
|  }
 | ||
|  
 | ||
|  static struct ipt_netflow *
 | ||
| -ipt_netflow_find(const struct ipt_netflow_tuple *tuple, unsigned int hash)
 | ||
| +ipt_netflow_find(const struct ipt_netflow_tuple *tuple, const unsigned int hash)
 | ||
|  {
 | ||
|  	struct ipt_netflow *nf;
 | ||
| +#if LINUX_VERSION_CODE < KERNEL_VERSION(3,9,0)
 | ||
| +#define compat_hlist_for_each_entry		      hlist_for_each_entry
 | ||
| +#define compat_hlist_for_each_entry_safe	      hlist_for_each_entry_safe
 | ||
|  	struct hlist_node *pos;
 | ||
| +#else /* since 3.9.0 */
 | ||
| +#define compat_hlist_for_each_entry(a,pos,c,d)	      hlist_for_each_entry(a,c,d)
 | ||
| +#define compat_hlist_for_each_entry_safe(a,pos,c,d,e) hlist_for_each_entry_safe(a,c,d,e)
 | ||
| +#endif
 | ||
|  
 | ||
| -	hlist_for_each_entry(nf, pos, &ipt_netflow_hash[hash], hlist) {
 | ||
| +	compat_hlist_for_each_entry(nf, pos, &ipt_netflow_hash[hash], hlist) {
 | ||
|  		if (ipt_netflow_tuple_equal(tuple, &nf->tuple) &&
 | ||
|  		    nf->nr_bytes < FLOW_FULL_WATERMARK) {
 | ||
|  			NETFLOW_STAT_INC(found);
 | ||
| @@ -868,7 +1111,7 @@ ipt_netflow_find(const struct ipt_netflow_tuple *tuple, unsigned int hash)
 | ||
|  	return NULL;
 | ||
|  }
 | ||
|  
 | ||
| -static struct hlist_head *alloc_hashtable(int size)
 | ||
| +static struct hlist_head *alloc_hashtable(const int size)
 | ||
|  {
 | ||
|  	struct hlist_head *hash;
 | ||
|  
 | ||
| @@ -879,19 +1122,18 @@ static struct hlist_head *alloc_hashtable(int size)
 | ||
|  		for (i = 0; i < size; i++)
 | ||
|  			INIT_HLIST_HEAD(&hash[i]);
 | ||
|  	} else
 | ||
| -		printk(KERN_ERR "netflow: unable to vmalloc hash table.\n");
 | ||
| +		printk(KERN_ERR "ipt_NETFLOW: unable to vmalloc hash table.\n");
 | ||
|  
 | ||
|  	return hash;
 | ||
|  }
 | ||
|  
 | ||
| -static int set_hashsize(int new_size)
 | ||
| +static int set_hashsize(const int new_size)
 | ||
|  {
 | ||
|  	struct hlist_head *new_hash, *old_hash;
 | ||
| -	unsigned int hash;
 | ||
|  	struct ipt_netflow *nf;
 | ||
|  	int rnd;
 | ||
|  
 | ||
| -	printk(KERN_INFO "netflow: allocating new hash table %u -> %u buckets\n",
 | ||
| +	printk(KERN_INFO "ipt_NETFLOW: allocating new hash table %u -> %u buckets\n",
 | ||
|  	       ipt_netflow_hash_size, new_size);
 | ||
|  	new_hash = alloc_hashtable(new_size);
 | ||
|  	if (!new_hash)
 | ||
| @@ -900,19 +1142,24 @@ static int set_hashsize(int new_size)
 | ||
|  	get_random_bytes(&rnd, 4);
 | ||
|  
 | ||
|  	/* rehash */
 | ||
| -	spin_lock_bh(&ipt_netflow_lock);
 | ||
| +	write_lock_bh(&htable_rwlock);
 | ||
|  	old_hash = ipt_netflow_hash;
 | ||
|  	ipt_netflow_hash = new_hash;
 | ||
|  	ipt_netflow_hash_size = new_size;
 | ||
|  	ipt_netflow_hash_rnd = rnd;
 | ||
|  	/* hash_netflow() is dependent on ipt_netflow_hash_* values */
 | ||
| +	spin_lock(&hlist_lock);
 | ||
|  	list_for_each_entry(nf, &ipt_netflow_list, list) {
 | ||
| +		unsigned int hash;
 | ||
| +
 | ||
|  		hash = hash_netflow(&nf->tuple);
 | ||
|  		/* hlist_add_head overwrites hlist pointers for this node
 | ||
|  		 * so it's good */
 | ||
|  		hlist_add_head(&nf->hlist, &new_hash[hash]);
 | ||
| +		nf->lock = &htable_locks[hash & LOCK_COUNT_MASK];
 | ||
|  	}
 | ||
| -	spin_unlock_bh(&ipt_netflow_lock);
 | ||
| +	spin_unlock(&hlist_lock);
 | ||
| +	write_unlock_bh(&htable_rwlock);
 | ||
|  
 | ||
|  	vfree(old_hash);
 | ||
|  
 | ||
| @@ -920,14 +1167,14 @@ static int set_hashsize(int new_size)
 | ||
|  }
 | ||
|  
 | ||
|  static struct ipt_netflow *
 | ||
| -ipt_netflow_alloc(struct ipt_netflow_tuple *tuple)
 | ||
| +ipt_netflow_alloc(const struct ipt_netflow_tuple *tuple)
 | ||
|  {
 | ||
|  	struct ipt_netflow *nf;
 | ||
|  	long count;
 | ||
|  
 | ||
|  	nf = kmem_cache_alloc(ipt_netflow_cachep, GFP_ATOMIC);
 | ||
|  	if (!nf) {
 | ||
| -		printk(KERN_ERR "Can't allocate netflow.\n");
 | ||
| +		printk(KERN_ERR "ipt_NETFLOW: Can't allocate flow.\n");
 | ||
|  		return NULL;
 | ||
|  	}
 | ||
|  
 | ||
| @@ -945,13 +1192,15 @@ ipt_netflow_alloc(struct ipt_netflow_tuple *tuple)
 | ||
|  
 | ||
|  static void ipt_netflow_free(struct ipt_netflow *nf)
 | ||
|  {
 | ||
| +	if (IS_DUMMY_FLOW(nf))
 | ||
| +		return;
 | ||
|  	atomic_dec(&ipt_netflow_count);
 | ||
|  	kmem_cache_free(ipt_netflow_cachep, nf);
 | ||
|  }
 | ||
|  
 | ||
|  static struct ipt_netflow *
 | ||
| -init_netflow(struct ipt_netflow_tuple *tuple,
 | ||
| -	     struct sk_buff *skb, unsigned int hash)
 | ||
| +init_netflow(const struct ipt_netflow_tuple *tuple,
 | ||
| +	     const struct sk_buff *skb, const unsigned int hash)
 | ||
|  {
 | ||
|  	struct ipt_netflow *nf;
 | ||
|  
 | ||
| @@ -959,93 +1208,774 @@ init_netflow(struct ipt_netflow_tuple *tuple,
 | ||
|  	if (!nf)
 | ||
|  		return NULL;
 | ||
|  
 | ||
| +	nf->lock = &htable_locks[hash & LOCK_COUNT_MASK];
 | ||
|  	hlist_add_head(&nf->hlist, &ipt_netflow_hash[hash]);
 | ||
| +	spin_lock(&hlist_lock);
 | ||
|  	list_add(&nf->list, &ipt_netflow_list);
 | ||
| +	spin_unlock(&hlist_lock);
 | ||
|  
 | ||
|  	return nf;
 | ||
|  }
 | ||
|  
 | ||
|  /* cook pdu, send, and clean */
 | ||
|  /* only called in scan worker path */
 | ||
| -static void netflow_export_pdu(void)
 | ||
| +static void netflow_export_pdu_v5(void)
 | ||
|  {
 | ||
|  	struct timeval tv;
 | ||
|  	int pdusize;
 | ||
|  
 | ||
| -	if (!pdu.nr_records)
 | ||
| +	if (!pdu_data_records)
 | ||
|  		return;
 | ||
|  
 | ||
|  	if (debug > 1)
 | ||
| -		printk(KERN_INFO "netflow_export_pdu with %d records\n", pdu.nr_records);
 | ||
| -	do_gettimeofday(&tv);
 | ||
| -
 | ||
| -	pdu.version	= htons(5);
 | ||
| -	pdu.ts_uptime	= htonl(jiffies_to_msecs(jiffies));
 | ||
| -	pdu.ts_usecs	= htonl(tv.tv_sec);
 | ||
| -	pdu.ts_unsecs	= htonl(tv.tv_usec);
 | ||
| -	//pdu.eng_type	= 0;
 | ||
| -	//pdu.eng_id	= 0;
 | ||
| -	//pdu.padding	= 0;
 | ||
| +		printk(KERN_INFO "netflow_export_pdu_v5 with %d records\n", pdu_data_records);
 | ||
|  
 | ||
| -	pdusize = NETFLOW5_HEADER_SIZE + sizeof(struct netflow5_record) * pdu.nr_records;
 | ||
| -
 | ||
| -	/* especially fix nr_records before export */
 | ||
| -	pdu.nr_records	= htons(pdu.nr_records);
 | ||
| +	pdu.v5.version		= htons(5);
 | ||
| +	pdu.v5.nr_records	= htons(pdu_data_records);
 | ||
| +	pdu.v5.ts_uptime	= htonl(jiffies_to_msecs(jiffies));
 | ||
| +	do_gettimeofday(&tv);
 | ||
| +	pdu.v5.ts_usecs		= htonl(tv.tv_sec);
 | ||
| +	pdu.v5.ts_unsecs	= htonl(tv.tv_usec);
 | ||
| +	pdu.v5.seq		= htonl(pdu_seq);
 | ||
| +	//pdu.v5.eng_type	= 0;
 | ||
| +	pdu.v5.eng_id		= engine_id;
 | ||
| +	//pdu.v5.padding	= 0;
 | ||
|  
 | ||
| -	if (netflow_send_pdu(&pdu, pdusize) == 0) {
 | ||
| -		/* not least one send succeded, account stat for dropped packets */
 | ||
| -		NETFLOW_STAT_ADD_ATOMIC(pkt_drop, pdu_packets);
 | ||
| -		NETFLOW_STAT_ADD_ATOMIC(traf_drop, pdu_traf);
 | ||
| -	}
 | ||
| +	pdusize = NETFLOW5_HEADER_SIZE + sizeof(struct netflow5_record) * pdu_data_records;
 | ||
|  
 | ||
| -	pdu.seq = htonl(ntohl(pdu.seq) + ntohs(pdu.nr_records));
 | ||
| +	netflow_sendmsg(&pdu.v5, pdusize);
 | ||
|  
 | ||
| -	pdu.nr_records	= 0;
 | ||
|  	pdu_packets = 0;
 | ||
| -	pdu_traf = 0;
 | ||
| +	pdu_traf    = 0;
 | ||
| +
 | ||
| +	pdu_seq += pdu_data_records;
 | ||
| +	pdu_count++;
 | ||
| +	pdu_data_records = 0;
 | ||
|  }
 | ||
|  
 | ||
|  /* only called in scan worker path */
 | ||
| -static void netflow_export_flow(struct ipt_netflow *nf)
 | ||
| +static void netflow_export_flow_v5(struct ipt_netflow *nf)
 | ||
|  {
 | ||
|  	struct netflow5_record *rec;
 | ||
|  
 | ||
| -	if (debug > 2)
 | ||
| -		printk(KERN_INFO "adding flow to export (%d)\n", pdu.nr_records);
 | ||
| +	if (unlikely(debug > 2))
 | ||
| +		printk(KERN_INFO "adding flow to export (%d)\n", pdu_data_records);
 | ||
|  
 | ||
|  	pdu_packets += nf->nr_packets;
 | ||
|  	pdu_traf += nf->nr_bytes;
 | ||
|  	pdu_ts_mod = jiffies;
 | ||
| -	rec = &pdu.flow[pdu.nr_records++];
 | ||
| +	rec = &pdu.v5.flow[pdu_data_records++];
 | ||
|  
 | ||
|  	/* make V5 flow record */
 | ||
| -	rec->s_addr	= nf->tuple.s_addr;
 | ||
| -	rec->d_addr	= nf->tuple.d_addr;
 | ||
| -	//rec->nexthop	= 0;
 | ||
| +	rec->s_addr	= nf->tuple.src.ip;
 | ||
| +	rec->d_addr	= nf->tuple.dst.ip;
 | ||
| +	rec->nexthop	= nf->nh.ip;
 | ||
|  	rec->i_ifc	= htons(nf->tuple.i_ifc);
 | ||
|  	rec->o_ifc	= htons(nf->o_ifc);
 | ||
|  	rec->nr_packets = htonl(nf->nr_packets);
 | ||
|  	rec->nr_octets	= htonl(nf->nr_bytes);
 | ||
| -	rec->ts_first	= htonl(jiffies_to_msecs(nf->ts_first));
 | ||
| -	rec->ts_last	= htonl(jiffies_to_msecs(nf->ts_last));
 | ||
| +	rec->first_ms	= htonl(jiffies_to_msecs(nf->ts_first));
 | ||
| +	rec->last_ms	= htonl(jiffies_to_msecs(nf->ts_last));
 | ||
|  	rec->s_port	= nf->tuple.s_port;
 | ||
|  	rec->d_port	= nf->tuple.d_port;
 | ||
| -	//rec->reserved	= 0;
 | ||
| +	//rec->reserved	= 0; /* pdu is always zeroized for v5 in netflow_switch_version */
 | ||
|  	rec->tcp_flags	= nf->tcp_flags;
 | ||
|  	rec->protocol	= nf->tuple.protocol;
 | ||
|  	rec->tos	= nf->tuple.tos;
 | ||
| -	//rec->s_as	= 0;
 | ||
| -	//rec->d_as	= 0;
 | ||
| +#ifdef CONFIG_NF_NAT_NEEDED
 | ||
| +	rec->s_as	= nf->s_as;
 | ||
| +	rec->d_as	= nf->d_as;
 | ||
| +#endif
 | ||
|  	rec->s_mask	= nf->s_mask;
 | ||
|  	rec->d_mask	= nf->d_mask;
 | ||
|  	//rec->padding	= 0;
 | ||
|  	ipt_netflow_free(nf);
 | ||
|  
 | ||
| -	if (pdu.nr_records == NETFLOW5_RECORDS_MAX)
 | ||
| +	if (pdu_data_records == NETFLOW5_RECORDS_MAX)
 | ||
| +		netflow_export_pdu_v5();
 | ||
| +}
 | ||
| +
 | ||
| +/* pdu is initially blank, export current pdu, and prepare next for filling. */
 | ||
| +static void netflow_export_pdu_v9(void)
 | ||
| +{
 | ||
| +	struct timeval tv;
 | ||
| +	int pdusize;
 | ||
| +
 | ||
| +	if (pdu_data_used <= pdu.v9.data)
 | ||
| +		return;
 | ||
| +
 | ||
| +	if (debug > 1)
 | ||
| +		printk(KERN_INFO "netflow_export_pdu_v9 with %d records\n",
 | ||
| +		    pdu_data_records + pdu_tpl_records);
 | ||
| +
 | ||
| +	pdu.v9.version		= htons(9);
 | ||
| +	pdu.v9.nr_records	= htons(pdu_data_records + pdu_tpl_records);
 | ||
| +	pdu.v9.sys_uptime_ms	= htonl(jiffies_to_msecs(jiffies));
 | ||
| +	do_gettimeofday(&tv);
 | ||
| +	pdu.v9.export_time_s	= htonl(tv.tv_sec);
 | ||
| +	pdu.v9.seq		= htonl(pdu_seq);
 | ||
| +	pdu.v9.source_id	= engine_id;
 | ||
| +
 | ||
| +	pdusize = pdu_data_used - (unsigned char *)&pdu.v9;
 | ||
| +
 | ||
| +	netflow_sendmsg(&pdu.v9, pdusize);
 | ||
| +
 | ||
| +	pdu_packets = 0;
 | ||
| +	pdu_traf    = 0;
 | ||
| +
 | ||
| +	pdu_seq++;
 | ||
| +	pdu_count++;
 | ||
| +	pdu_data_records = pdu_tpl_records = 0;
 | ||
| +	pdu_data_used = pdu.v9.data;
 | ||
| +	pdu_flowset = NULL;
 | ||
| +}
 | ||
| +
 | ||
| +static void netflow_export_pdu_ipfix(void)
 | ||
| +{
 | ||
| +	struct timeval tv;
 | ||
| +	int pdusize;
 | ||
| +
 | ||
| +	if (pdu_data_used <= pdu.ipfix.data)
 | ||
| +		return;
 | ||
| +
 | ||
| +	if (debug > 1)
 | ||
| +		printk(KERN_INFO "netflow_export_pduX with %d records\n",
 | ||
| +		    pdu_data_records);
 | ||
| +
 | ||
| +	pdu.ipfix.version	= htons(10);
 | ||
| +	do_gettimeofday(&tv);
 | ||
| +	pdu.ipfix.export_time_s	= htonl(tv.tv_sec);
 | ||
| +	pdu.ipfix.seq		= htonl(pdu_seq);
 | ||
| +	pdu.ipfix.odomain_id	= engine_id;
 | ||
| +	pdusize = pdu_data_used - (unsigned char *)&pdu;
 | ||
| +	pdu.ipfix.length	= htons(pdusize);
 | ||
| +
 | ||
| +	netflow_sendmsg(&pdu.ipfix, pdusize);
 | ||
| +
 | ||
| +	pdu_packets = 0;
 | ||
| +	pdu_traf    = 0;
 | ||
| +
 | ||
| +	pdu_seq += pdu_data_records;
 | ||
| +	pdu_count++;
 | ||
| +	pdu_data_records = pdu_tpl_records = 0;
 | ||
| +	pdu_data_used = pdu.ipfix.data;
 | ||
| +	pdu_flowset = NULL;
 | ||
| +}
 | ||
| +
 | ||
| +static inline int pdu_have_space(const size_t size)
 | ||
| +{
 | ||
| +	return ((pdu_data_used + size) <= pdu_high_wm);
 | ||
| +}
 | ||
| +
 | ||
| +static inline unsigned char *pdu_grab_space(const size_t size)
 | ||
| +{
 | ||
| +	unsigned char *ptr = pdu_data_used;
 | ||
| +	pdu_data_used += size;
 | ||
| +	return ptr;
 | ||
| +}
 | ||
| +
 | ||
| +// allocate data space in pdu, or fail if pdu is reallocated.
 | ||
| +static inline unsigned char *pdu_alloc_fail(const size_t size)
 | ||
| +{
 | ||
| +	if (!pdu_have_space(size)) {
 | ||
|  		netflow_export_pdu();
 | ||
| +		return NULL;
 | ||
| +	}
 | ||
| +	return pdu_grab_space(size);
 | ||
| +}
 | ||
| +
 | ||
| +/* doesn't fail, but can provide empty pdu. */
 | ||
| +static unsigned char *pdu_alloc(const size_t size)
 | ||
| +{
 | ||
| +	return pdu_alloc_fail(size) ?: pdu_grab_space(size);
 | ||
| +}
 | ||
| +
 | ||
| +/* global table of sizes of template field types */
 | ||
| +static u_int8_t tpl_element_sizes[] = {
 | ||
| +	[IN_BYTES]	= 4,
 | ||
| +	[IN_PKTS]	= 4,
 | ||
| +	[PROTOCOL]	= 1,
 | ||
| +	[TOS]		= 1,
 | ||
| +	[TCP_FLAGS]	= 1,
 | ||
| +	[L4_SRC_PORT]	= 2,
 | ||
| +	[IPV4_SRC_ADDR]	= 4,
 | ||
| +	[SRC_MASK]	= 1,
 | ||
| +	[INPUT_SNMP]	= 2,
 | ||
| +	[L4_DST_PORT]	= 2,
 | ||
| +	[IPV4_DST_ADDR]	= 4,
 | ||
| +	[DST_MASK]	= 1,
 | ||
| +	[OUTPUT_SNMP]	= 2,
 | ||
| +	[IPV4_NEXT_HOP]	= 4,
 | ||
| +	//[SRC_AS]		= 2,
 | ||
| +	//[DST_AS]		= 2,
 | ||
| +	//[BGP_IPV4_NEXT_HOP]	= 4,
 | ||
| +	//[MUL_DST_PKTS]	= 4,
 | ||
| +	//[MUL_DST_BYTES]	= 4,
 | ||
| +	[LAST_SWITCHED]	= 4,
 | ||
| +	[FIRST_SWITCHED]= 4,
 | ||
| +	[IPV6_SRC_ADDR]	= 16,
 | ||
| +	[IPV6_DST_ADDR]	= 16,
 | ||
| +	[IPV6_FLOW_LABEL] = 3,
 | ||
| +	[ICMP_TYPE]	= 2,
 | ||
| +	[MUL_IGMP_TYPE]	= 1,
 | ||
| +	//[TOTAL_BYTES_EXP]	= 4,
 | ||
| +	//[TOTAL_PKTS_EXP]	= 4,
 | ||
| +	//[TOTAL_FLOWS_EXP]	= 4,
 | ||
| +	[IPV6_NEXT_HOP]			   = 16,
 | ||
| +	[IPV6_OPTION_HEADERS]		   = 2,
 | ||
| +	[commonPropertiesId]		   = 4,
 | ||
| +	[ipv4Options]			   = 4,
 | ||
| +	[tcpOptions]			   = 4,
 | ||
| +	[postNATSourceIPv4Address]	   = 4,
 | ||
| +	[postNATDestinationIPv4Address]	   = 4,
 | ||
| +	[postNAPTSourceTransportPort]	   = 2,
 | ||
| +	[postNAPTDestinationTransportPort] = 2,
 | ||
| +	[natEvent]			   = 1,
 | ||
| +	[postNATSourceIPv6Address]	   = 16,
 | ||
| +	[postNATDestinationIPv6Address]	   = 16,
 | ||
| +	[IPSecSPI]			   = 4,
 | ||
| +	[observationTimeMilliseconds]	   = 8,
 | ||
| +	[observationTimeMicroseconds]	   = 8,
 | ||
| +	[observationTimeNanoseconds]	   = 8,
 | ||
| +};
 | ||
| +
 | ||
| +#define TEMPLATES_HASH_BSIZE	8
 | ||
| +#define TEMPLATES_HASH_SIZE	(1<<TEMPLATES_HASH_BSIZE)
 | ||
| +static struct hlist_head templates_hash[TEMPLATES_HASH_SIZE];
 | ||
| +
 | ||
| +struct base_template {
 | ||
| +	int length; /* number of elements in template */
 | ||
| +	u_int16_t types[]; /* {type, size} pairs */
 | ||
| +};
 | ||
| +
 | ||
| +/* base templates */
 | ||
| +#define BTPL_BASE	0x00000001	/* base stat */
 | ||
| +#define BTPL_IP4	0x00000002	/* IPv4 */
 | ||
| +#define BTPL_MASK4	0x00000004	/* Aggregated */
 | ||
| +#define BTPL_PORTS	0x00000008	/* UDP&TCP */
 | ||
| +#define BTPL_IP6	0x00000010	/* IPv6 */
 | ||
| +#define BTPL_ICMP	0x00000020	/* ICMP */
 | ||
| +#define BTPL_IGMP	0x00000040	/* IGMP */
 | ||
| +#define BTPL_IPSEC	0x00000080	/* AH&ESP */
 | ||
| +#define BTPL_NAT4	0x00000100	/* NAT IPv4 */
 | ||
| +#define BTPL_MARK	0x00000400	/* connmark */
 | ||
| +#define BTPL_LABEL6	0x00000800	/* IPv6 flow label */
 | ||
| +#define BTPL_OPTIONS4	0x00001000	/* IPv4 Options */
 | ||
| +#define BTPL_OPTIONS6	0x00002000	/* IPv6 Options */
 | ||
| +#define BTPL_TCPOPTIONS	0x00004000	/* TCP Options */
 | ||
| +#define BTPL_MAX	32
 | ||
| +
 | ||
| +static struct base_template template_base = {
 | ||
| +	.types = {
 | ||
| +		INPUT_SNMP,
 | ||
| +		OUTPUT_SNMP,
 | ||
| +		IN_PKTS,
 | ||
| +		IN_BYTES,
 | ||
| +		FIRST_SWITCHED,
 | ||
| +		LAST_SWITCHED,
 | ||
| +		PROTOCOL,
 | ||
| +		TOS,
 | ||
| +		0
 | ||
| +	}
 | ||
| +};
 | ||
| +static struct base_template template_ipv4 = {
 | ||
| +	.types = {
 | ||
| +		IPV4_SRC_ADDR,
 | ||
| +		IPV4_DST_ADDR,
 | ||
| +		IPV4_NEXT_HOP,
 | ||
| +		0
 | ||
| +	}
 | ||
| +};
 | ||
| +static struct base_template template_options4 = {
 | ||
| +	.types = { ipv4Options, 0 }
 | ||
| +};
 | ||
| +static struct base_template template_tcpoptions = {
 | ||
| +	.types = { tcpOptions, 0 }
 | ||
| +};
 | ||
| +static struct base_template template_ipv6 = {
 | ||
| +	.types = {
 | ||
| +		IPV6_SRC_ADDR,
 | ||
| +		IPV6_DST_ADDR,
 | ||
| +		IPV6_NEXT_HOP,
 | ||
| +		0
 | ||
| +	}
 | ||
| +};
 | ||
| +static struct base_template template_options6 = {
 | ||
| +	.types = { IPV6_OPTION_HEADERS, 0 }
 | ||
| +};
 | ||
| +static struct base_template template_label6 = {
 | ||
| +	.types = { IPV6_FLOW_LABEL, 0 }
 | ||
| +};
 | ||
| +static struct base_template template_ipv4_mask = {
 | ||
| +	.types = {
 | ||
| +		SRC_MASK,
 | ||
| +		DST_MASK,
 | ||
| +		0
 | ||
| +	}
 | ||
| +};
 | ||
| +static struct base_template template_ports = {
 | ||
| +	.types = {
 | ||
| +		L4_SRC_PORT,
 | ||
| +		L4_DST_PORT,
 | ||
| +		TCP_FLAGS,
 | ||
| +		0
 | ||
| +	}
 | ||
| +};
 | ||
| +static struct base_template template_icmp = {
 | ||
| +	.types = { ICMP_TYPE, 0 }
 | ||
| +};
 | ||
| +static struct base_template template_igmp = {
 | ||
| +	.types = { MUL_IGMP_TYPE, 0 }
 | ||
| +};
 | ||
| +static struct base_template template_ipsec = {
 | ||
| +	.types = { IPSecSPI, 0 }
 | ||
| +};
 | ||
| +static struct base_template template_nat4 = {
 | ||
| +	.types = {
 | ||
| +		observationTimeMilliseconds,
 | ||
| +		IPV4_SRC_ADDR,
 | ||
| +		IPV4_DST_ADDR,
 | ||
| +		postNATSourceIPv4Address,
 | ||
| +		postNATDestinationIPv4Address,
 | ||
| +		L4_SRC_PORT,
 | ||
| +		L4_DST_PORT,
 | ||
| +		postNAPTSourceTransportPort,
 | ||
| +		postNAPTDestinationTransportPort,
 | ||
| +		PROTOCOL,
 | ||
| +		natEvent,
 | ||
| +		0
 | ||
| +	}
 | ||
| +};
 | ||
| +static struct base_template template_mark = {
 | ||
| +	.types = { commonPropertiesId, 0 }
 | ||
| +};
 | ||
| +
 | ||
| +struct data_template {
 | ||
| +	struct hlist_node hlist;
 | ||
| +	int tpl_mask;
 | ||
| +
 | ||
| +	int length; /* number of elements in template */
 | ||
| +	int tpl_size; /* summary size of template with flowset header */
 | ||
| +	int rec_size; /* summary size of all recods of template (w/o flowset header) */
 | ||
| +	int template_id_n; /* assigned from template_ids, network order. */
 | ||
| +	int		exported_cnt;
 | ||
| +	unsigned long	exported_ts; /* jiffies */
 | ||
| +	u_int16_t fields[]; /* {type, size} pairs */
 | ||
| +} __attribute__ ((packed));
 | ||
| +
 | ||
| +#define TPL_FIELD_NSIZE 4 /* one complete template field's network size */
 | ||
| +
 | ||
| +static void free_templates(void)
 | ||
| +{
 | ||
| +	int i;
 | ||
| +#if LINUX_VERSION_CODE < KERNEL_VERSION(3,9,0)
 | ||
| +	struct hlist_node *pos;
 | ||
| +#endif
 | ||
| +	struct hlist_node *tmp;
 | ||
| +
 | ||
| +	for (i = 0; i < TEMPLATES_HASH_SIZE; i++) {
 | ||
| +		struct hlist_head *thead = &templates_hash[i];
 | ||
| +		struct data_template *tpl;
 | ||
| +
 | ||
| +		compat_hlist_for_each_entry_safe(tpl, pos, tmp, thead, hlist)
 | ||
| +			kfree(tpl);
 | ||
| +		INIT_HLIST_HEAD(thead);
 | ||
| +	}
 | ||
| +	tpl_count = 0;
 | ||
| +}
 | ||
| +
 | ||
| +/* create combined template from mask */
 | ||
| +static struct data_template *get_template(const int tmask)
 | ||
| +{
 | ||
| +	struct base_template *tlist[BTPL_MAX];
 | ||
| +	struct data_template *tpl;
 | ||
| +	int tnum;
 | ||
| +	int length;
 | ||
| +	int i, j, k;
 | ||
| +#if LINUX_VERSION_CODE < KERNEL_VERSION(3,9,0)
 | ||
| +	struct hlist_node *pos;
 | ||
| +#endif
 | ||
| +	int hash = hash_long(tmask, TEMPLATES_HASH_BSIZE);
 | ||
| +
 | ||
| +	compat_hlist_for_each_entry(tpl, pos, &templates_hash[hash], hlist)
 | ||
| +		if (tpl->tpl_mask == tmask)
 | ||
| +			return tpl;
 | ||
| +
 | ||
| +	tnum = 0;
 | ||
| +	if (tmask & BTPL_IP4) {
 | ||
| +		tlist[tnum++] = &template_ipv4;
 | ||
| +		if (tmask & BTPL_OPTIONS4)
 | ||
| +			tlist[tnum++] = &template_options4;
 | ||
| +		if (tmask & BTPL_MASK4)
 | ||
| +			tlist[tnum++] = &template_ipv4_mask;
 | ||
| +	} else if (tmask & BTPL_IP6) {
 | ||
| +		tlist[tnum++] = &template_ipv6;
 | ||
| +		if (tmask & BTPL_LABEL6)
 | ||
| +			tlist[tnum++] = &template_label6;
 | ||
| +		if (tmask & BTPL_OPTIONS6)
 | ||
| +			tlist[tnum++] = &template_options6;
 | ||
| +	} else if (tmask & BTPL_NAT4)
 | ||
| +		tlist[tnum++] = &template_nat4;
 | ||
| +	if (tmask & BTPL_PORTS)
 | ||
| +		tlist[tnum++] = &template_ports;
 | ||
| +	if (tmask & BTPL_BASE)
 | ||
| +		tlist[tnum++] = &template_base;
 | ||
| +	if (tmask & BTPL_TCPOPTIONS)
 | ||
| +		tlist[tnum++] = &template_tcpoptions;
 | ||
| +	if (tmask & BTPL_ICMP)
 | ||
| +		tlist[tnum++] = &template_icmp;
 | ||
| +	if (tmask & BTPL_IGMP)
 | ||
| +		tlist[tnum++] = &template_igmp;
 | ||
| +	if (tmask & BTPL_IPSEC)
 | ||
| +		tlist[tnum++] = &template_ipsec;
 | ||
| +	if (tmask & BTPL_MARK)
 | ||
| +		tlist[tnum++] = &template_mark;
 | ||
| +
 | ||
| +	/* calc memory size */
 | ||
| +	length = 0;
 | ||
| +	for (i = 0; i < tnum; i++) {
 | ||
| +		if (!tlist[i]->length) {
 | ||
| +			for (k = 0; tlist[i]->types[k]; k++);
 | ||
| +			tlist[i]->length = k;
 | ||
| +		}
 | ||
| +		length += tlist[i]->length;
 | ||
| +	}
 | ||
| +	/* elements are pairs + one termiantor */
 | ||
| +	tpl = kmalloc(sizeof(struct data_template) + (length * 2 + 1) * sizeof(u_int16_t), GFP_KERNEL);
 | ||
| +	if (!tpl) {
 | ||
| +		printk(KERN_ERR "ipt_NETFLOW: unable to kmalloc template.\n");
 | ||
| +		return NULL;
 | ||
| +	}
 | ||
| +	tpl->tpl_mask = tmask;
 | ||
| +	tpl->length = length;
 | ||
| +	tpl->tpl_size = sizeof(struct flowset_template);
 | ||
| +	tpl->rec_size = 0;
 | ||
| +	tpl->template_id_n = htons(template_ids++);
 | ||
| +	tpl->exported_cnt = 0;
 | ||
| +	tpl->exported_ts = 0;
 | ||
| +
 | ||
| +	j = 0;
 | ||
| +	for (i = 0; i < tnum; i++) {
 | ||
| +		struct base_template *btpl = tlist[i];
 | ||
| +
 | ||
| +		for (k = 0; k < btpl->length; k++) {
 | ||
| +			int size;
 | ||
| +			int type = btpl->types[k];
 | ||
| +
 | ||
| +			tpl->fields[j++] = type;
 | ||
| +			size = tpl_element_sizes[type];
 | ||
| +			tpl->fields[j++] = size;
 | ||
| +			tpl->rec_size += size;
 | ||
| +		}
 | ||
| +		tpl->tpl_size += btpl->length * TPL_FIELD_NSIZE;
 | ||
| +	}
 | ||
| +	tpl->fields[j++] = 0;
 | ||
| +
 | ||
| +	hlist_add_head(&tpl->hlist, &templates_hash[hash]);
 | ||
| +	tpl_count++;
 | ||
| +
 | ||
| +	return tpl;
 | ||
| +}
 | ||
| +
 | ||
| +static void pdu_add_template(struct data_template *tpl)
 | ||
| +{
 | ||
| +	int i;
 | ||
| +	unsigned char *ptr;
 | ||
| +	struct flowset_template *ntpl;
 | ||
| +	__be16 *sptr;
 | ||
| +
 | ||
| +	ptr = pdu_alloc(tpl->tpl_size);
 | ||
| +	ntpl = (struct flowset_template *)ptr;
 | ||
| +	ntpl->flowset_id  = protocol == 9? htons(FLOWSET_TEMPLATE) : htons(IPFIX_TEMPLATE);
 | ||
| +	ntpl->length	  = htons(tpl->tpl_size);
 | ||
| +	ntpl->template_id = tpl->template_id_n;
 | ||
| +	ntpl->field_count = htons(tpl->length);
 | ||
| +	ptr += sizeof(struct flowset_template);
 | ||
| +	sptr = (__be16 *)ptr;
 | ||
| +	for (i = 0; ; ) {
 | ||
| +		int type = tpl->fields[i++];
 | ||
| +		if (!type)
 | ||
| +			break;
 | ||
| +		*sptr++ = htons(type);
 | ||
| +		*sptr++ = htons(tpl->fields[i++]);
 | ||
| +	}
 | ||
| +
 | ||
| +	tpl->exported_cnt = pdu_count;
 | ||
| +	tpl->exported_ts = jiffies;
 | ||
| +
 | ||
| +	pdu_flowset = NULL;
 | ||
| +	pdu_tpl_records++;
 | ||
| +}
 | ||
| +
 | ||
| +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,35)
 | ||
| +static inline s64 portable_ktime_to_ms(const ktime_t kt)
 | ||
| +{
 | ||
| +	struct timeval tv = ktime_to_timeval(kt);
 | ||
| +	return (s64) tv.tv_sec * MSEC_PER_SEC + tv.tv_usec / USEC_PER_MSEC;
 | ||
| +}
 | ||
| +#define ktime_to_ms portable_ktime_to_ms
 | ||
| +#endif
 | ||
| +
 | ||
| +/* encode one field */
 | ||
| +typedef struct in6_addr in6_t;
 | ||
| +static inline void add_ipv4_field(__u8 *ptr, const int type, const struct ipt_netflow *nf)
 | ||
| +{
 | ||
| +	switch (type) {
 | ||
| +		case IN_BYTES:	     *(__be32 *)ptr = htonl(nf->nr_bytes); break;
 | ||
| +		case IN_PKTS:	     *(__be32 *)ptr = htonl(nf->nr_packets); break;
 | ||
| +		case FIRST_SWITCHED: *(__be32 *)ptr = htonl(jiffies_to_msecs(nf->ts_first)); break;
 | ||
| +		case LAST_SWITCHED:  *(__be32 *)ptr = htonl(jiffies_to_msecs(nf->ts_last)); break;
 | ||
| +		case IPV4_SRC_ADDR:  *(__be32 *)ptr = nf->tuple.src.ip; break;
 | ||
| +		case IPV4_DST_ADDR:  *(__be32 *)ptr = nf->tuple.dst.ip; break;
 | ||
| +		case IPV4_NEXT_HOP:  *(__be32 *)ptr = nf->nh.ip; break;
 | ||
| +		case L4_SRC_PORT:    *(__be16 *)ptr = nf->tuple.s_port; break;
 | ||
| +		case L4_DST_PORT:    *(__be16 *)ptr = nf->tuple.d_port; break;
 | ||
| +		case INPUT_SNMP:     *(__be16 *)ptr = htons(nf->tuple.i_ifc); break;
 | ||
| +		case OUTPUT_SNMP:    *(__be16 *)ptr = htons(nf->o_ifc); break;
 | ||
| +		case PROTOCOL:	               *ptr = nf->tuple.protocol; break;
 | ||
| +		case TCP_FLAGS:	               *ptr = nf->tcp_flags; break;
 | ||
| +		case TOS:	               *ptr = nf->tuple.tos; break;
 | ||
| +		case IPV6_SRC_ADDR:   *(in6_t *)ptr = nf->tuple.src.in6; break;
 | ||
| +		case IPV6_DST_ADDR:   *(in6_t *)ptr = nf->tuple.dst.in6; break;
 | ||
| +		case IPV6_NEXT_HOP:   *(in6_t *)ptr = nf->nh.in6; break;
 | ||
| +		case IPV6_FLOW_LABEL:        *ptr++ = nf->flow_label >> 16;
 | ||
| +				     *(__be16 *)ptr = nf->flow_label;
 | ||
| +				      break;
 | ||
| +		case tcpOptions:     *(__be32 *)ptr = htonl(nf->tcpoptions); break;
 | ||
| +		case ipv4Options:    *(__be32 *)ptr = htonl(nf->options); break;
 | ||
| +		case IPV6_OPTION_HEADERS: *(__be16 *)ptr = htons(nf->options); break;
 | ||
| +#ifdef CONFIG_NF_CONNTRACK_MARK
 | ||
| +		case commonPropertiesId:
 | ||
| +				     *(__be32 *)ptr = htonl(nf->mark); break;
 | ||
| +#endif
 | ||
| +		case SRC_MASK:	               *ptr = nf->s_mask; break;
 | ||
| +		case DST_MASK:	               *ptr = nf->d_mask; break;
 | ||
| +		case ICMP_TYPE:	     *(__be16 *)ptr = nf->tuple.d_port; break;
 | ||
| +		case MUL_IGMP_TYPE:            *ptr = nf->tuple.d_port; break;
 | ||
| +#ifdef CONFIG_NF_NAT_NEEDED
 | ||
| +		case postNATSourceIPv4Address:	       *(__be32 *)ptr = nf->nat->post.s_addr; break;
 | ||
| +		case postNATDestinationIPv4Address:    *(__be32 *)ptr = nf->nat->post.d_addr; break;
 | ||
| +		case postNAPTSourceTransportPort:      *(__be16 *)ptr = nf->nat->post.s_port; break;
 | ||
| +		case postNAPTDestinationTransportPort: *(__be16 *)ptr = nf->nat->post.d_port; break;
 | ||
| +		case natEvent:				         *ptr = nf->nat->nat_event; break;
 | ||
| +#endif
 | ||
| +		case IPSecSPI:        *(__u32 *)ptr = (nf->tuple.s_port << 16) | nf->tuple.d_port; break;
 | ||
| +		case observationTimeMilliseconds:
 | ||
| +				      *(__be64 *)ptr = cpu_to_be64(ktime_to_ms(nf->ts_obs)); break;
 | ||
| +		case observationTimeMicroseconds:
 | ||
| +				      *(__be64 *)ptr = cpu_to_be64(ktime_to_us(nf->ts_obs)); break;
 | ||
| +		case observationTimeNanoseconds:
 | ||
| +				      *(__be64 *)ptr = cpu_to_be64(ktime_to_ns(nf->ts_obs)); break;
 | ||
| +		default:
 | ||
| +					memset(ptr, 0, tpl_element_sizes[type]);
 | ||
| +	}
 | ||
| +}
 | ||
| +
 | ||
| +#define PAD_SIZE 4 /* rfc prescribes flowsets to be padded */
 | ||
| +
 | ||
| +/* cache timeout_rate in jiffies */
 | ||
| +static inline unsigned long timeout_rate_j(void)
 | ||
| +{
 | ||
| +	static unsigned int t_rate = 0;
 | ||
| +	static unsigned long t_rate_j = 0;
 | ||
| +
 | ||
| +	if (unlikely(timeout_rate != t_rate)) {
 | ||
| +		struct timeval tv = { .tv_sec = timeout_rate * 60, .tv_usec = 0 };
 | ||
| +
 | ||
| +		t_rate = timeout_rate;
 | ||
| +		t_rate_j = timeval_to_jiffies(&tv);
 | ||
| +	}
 | ||
| +	return t_rate_j;
 | ||
| +}
 | ||
| +
 | ||
| +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20)
 | ||
| +#define IPPROTO_UDPLITE 136
 | ||
| +#endif
 | ||
| +
 | ||
| +#ifndef time_is_before_jiffies
 | ||
| +#define time_is_before_jiffies(a) time_after(jiffies, a)
 | ||
| +#endif
 | ||
| +
 | ||
| +static void netflow_export_flow_tpl(struct ipt_netflow *nf)
 | ||
| +{
 | ||
| +	unsigned char *ptr;
 | ||
| +	int i;
 | ||
| +	struct data_template *tpl;
 | ||
| +	int tpl_mask = BTPL_BASE;
 | ||
| +
 | ||
| +	if (unlikely(debug > 2))
 | ||
| +		printk(KERN_INFO "adding flow to export (%d)\n",
 | ||
| +		    pdu_data_records + pdu_tpl_records);
 | ||
| +
 | ||
| +	if (likely(nf->tuple.l3proto == AF_INET)) {
 | ||
| +		tpl_mask |= BTPL_IP4;
 | ||
| +		if (unlikely(nf->options))
 | ||
| +			tpl_mask |= BTPL_OPTIONS4;
 | ||
| +	} else {
 | ||
| +		tpl_mask |= BTPL_IP6;
 | ||
| +		if (unlikely(nf->options))
 | ||
| +			tpl_mask |= BTPL_OPTIONS6;
 | ||
| +		if (unlikely(nf->flow_label))
 | ||
| +			tpl_mask |= BTPL_LABEL6;
 | ||
| +	}
 | ||
| +	if (unlikely(nf->tcpoptions))
 | ||
| +		tpl_mask |= BTPL_TCPOPTIONS;
 | ||
| +	if (unlikely(nf->s_mask || nf->d_mask))
 | ||
| +		tpl_mask |= BTPL_MASK4;
 | ||
| +	if (likely(nf->tuple.protocol == IPPROTO_TCP ||
 | ||
| +		    nf->tuple.protocol == IPPROTO_UDP ||
 | ||
| +		    nf->tuple.protocol == IPPROTO_SCTP ||
 | ||
| +		    nf->tuple.protocol == IPPROTO_UDPLITE))
 | ||
| +		tpl_mask |= BTPL_PORTS;
 | ||
| +	else if (nf->tuple.protocol == IPPROTO_ICMP)
 | ||
| +		tpl_mask |= BTPL_ICMP;
 | ||
| +	else if (nf->tuple.protocol == IPPROTO_IGMP)
 | ||
| +		tpl_mask |= BTPL_IGMP;
 | ||
| +#ifdef CONFIG_NF_CONNTRACK_MARK
 | ||
| +	if (nf->mark)
 | ||
| +		tpl_mask |= BTPL_MARK;
 | ||
| +#endif
 | ||
| +#ifdef CONFIG_NF_NAT_NEEDED
 | ||
| +	if (nf->nat)
 | ||
| +		tpl_mask = BTPL_NAT4;
 | ||
| +#endif
 | ||
| +
 | ||
| +	tpl = get_template(tpl_mask);
 | ||
| +	if (unlikely(!tpl)) {
 | ||
| +		printk(KERN_INFO "ipt_NETFLOW: template allocation failed.\n");
 | ||
| +		NETFLOW_STAT_INC(alloc_err);
 | ||
| +		NETFLOW_STAT_ADD_ATOMIC(pkt_drop, nf->nr_packets);
 | ||
| +		NETFLOW_STAT_ADD_ATOMIC(traf_drop, nf->nr_bytes);
 | ||
| +		ipt_netflow_free(nf);
 | ||
| +		return;
 | ||
| +	}
 | ||
| +
 | ||
| +	if (unlikely(!pdu_flowset ||
 | ||
| +	    pdu_flowset->flowset_id != tpl->template_id_n ||
 | ||
| +	    !(ptr = pdu_alloc_fail(tpl->rec_size)))) {
 | ||
| +
 | ||
| +		/* if there was previous data template we should pad it to 4 bytes */
 | ||
| +		if (pdu_flowset) {
 | ||
| +			int padding = (PAD_SIZE - ntohs(pdu_flowset->length) % PAD_SIZE) % PAD_SIZE;
 | ||
| +			if (padding && (ptr = pdu_alloc_fail(padding))) {
 | ||
| +				pdu_flowset->length = htons(ntohs(pdu_flowset->length) + padding);
 | ||
| +				for (; padding; padding--)
 | ||
| +					*ptr++ = 0;
 | ||
| +			}
 | ||
| +		}
 | ||
| +
 | ||
| +		if (!tpl->exported_ts ||
 | ||
| +		    pdu_count > (tpl->exported_cnt + refresh_rate) ||
 | ||
| +		    time_is_before_jiffies(tpl->exported_ts + timeout_rate_j())) {
 | ||
| +			pdu_add_template(tpl);
 | ||
| +		}
 | ||
| +
 | ||
| +		ptr = pdu_alloc(sizeof(struct flowset_data) + tpl->rec_size);
 | ||
| +		pdu_flowset = (struct flowset_data *)ptr;
 | ||
| +		pdu_flowset->flowset_id = tpl->template_id_n;
 | ||
| +		pdu_flowset->length     = htons(sizeof(struct flowset_data));
 | ||
| +		ptr += sizeof(struct flowset_data);
 | ||
| +	}
 | ||
| +
 | ||
| +	/* encode all fields */
 | ||
| +	for (i = 0; ; ) {
 | ||
| +		int type = tpl->fields[i++];
 | ||
| +
 | ||
| +		if (!type)
 | ||
| +			break;
 | ||
| +		add_ipv4_field(ptr, type, nf);
 | ||
| +		ptr += tpl->fields[i++];
 | ||
| +	}
 | ||
| +
 | ||
| +	pdu_data_records++;
 | ||
| +	pdu_flowset->length = htons(ntohs(pdu_flowset->length) + tpl->rec_size);
 | ||
| +
 | ||
| +	pdu_packets += nf->nr_packets;
 | ||
| +	pdu_traf    += nf->nr_bytes;
 | ||
| +
 | ||
| +	ipt_netflow_free(nf);
 | ||
| +	pdu_ts_mod = jiffies;
 | ||
| +}
 | ||
| +
 | ||
| +static void netflow_switch_version(const int ver)
 | ||
| +{
 | ||
| +	protocol = ver;
 | ||
| +	if (protocol == 5) {
 | ||
| +		memset(&pdu, 0, sizeof(pdu));
 | ||
| +		netflow_export_flow = &netflow_export_flow_v5;
 | ||
| +		netflow_export_pdu = &netflow_export_pdu_v5;
 | ||
| +	} else if (protocol == 9) {
 | ||
| +		pdu_data_used = pdu.v9.data;
 | ||
| +		pdu_max_size = sizeof(pdu.v9);
 | ||
| +		pdu_high_wm = (unsigned char *)&pdu + pdu_max_size;
 | ||
| +		netflow_export_flow = &netflow_export_flow_tpl;
 | ||
| +		netflow_export_pdu = &netflow_export_pdu_v9;
 | ||
| +	} else { /* IPFIX */
 | ||
| +		pdu_data_used = pdu.ipfix.data;
 | ||
| +		pdu_max_size = sizeof(pdu.ipfix);
 | ||
| +		pdu_high_wm = (unsigned char *)&pdu + pdu_max_size;
 | ||
| +		netflow_export_flow = &netflow_export_flow_tpl;
 | ||
| +		netflow_export_pdu = &netflow_export_pdu_ipfix;
 | ||
| +	}
 | ||
| +	if (protocol != 5)
 | ||
| +		free_templates();
 | ||
| +	pdu_data_records = pdu_tpl_records = 0;
 | ||
| +	pdu_flowset = NULL;
 | ||
| +	printk(KERN_INFO "ipt_NETFLOW protocol version %d (%s) enabled.\n",
 | ||
| +	    protocol, protocol == 10? "IPFIX" : "NetFlow");
 | ||
| +}
 | ||
| +
 | ||
| +#ifdef CONFIG_NF_NAT_NEEDED
 | ||
| +static void export_nat_event(struct nat_event *nel)
 | ||
| +{
 | ||
| +	static struct ipt_netflow nf = { { NULL } };
 | ||
| +
 | ||
| +	nf.tuple.l3proto = AF_INET;
 | ||
| +	nf.tuple.protocol = nel->protocol;
 | ||
| +	nf.nat = nel; /* this is also flag of dummy flow */
 | ||
| +	nf.tcp_flags = (nel->nat_event == NAT_DESTROY)? TCP_FIN_RST : TCP_SYN_ACK;
 | ||
| +	if (protocol >= 9) {
 | ||
| +		nf.ts_obs = nel->ts_ktime;
 | ||
| +		nf.tuple.src.ip = nel->pre.s_addr;
 | ||
| +		nf.tuple.dst.ip = nel->pre.d_addr;
 | ||
| +		nf.tuple.s_port = nel->pre.s_port;
 | ||
| +		nf.tuple.d_port = nel->pre.d_port;
 | ||
| +		netflow_export_flow(&nf);
 | ||
| +	} else { /* v5 */
 | ||
| +		/* The weird v5 packet(s).
 | ||
| +		 * src and dst will be same as in data flow from the FORWARD chain
 | ||
| +		 * where src is pre-nat src ip and dst is post-nat dst ip.
 | ||
| +		 * What we lacking here is external src ip for SNAT, or
 | ||
| +		 * pre-nat dst ip for DNAT. We will put this into Nexthop field
 | ||
| +		 * with port into src/dst AS field. tcp_flags will distinguish it's
 | ||
| +		 * start or stop event. Two flows in case of full nat. */
 | ||
| +		nf.tuple.src.ip = nel->pre.s_addr;
 | ||
| +		nf.tuple.s_port = nel->pre.s_port;
 | ||
| +		nf.tuple.dst.ip = nel->post.d_addr;
 | ||
| +		nf.tuple.d_port = nel->post.d_port;
 | ||
| +
 | ||
| +		nf.ts_first = nel->ts_jiffies;
 | ||
| +		nf.ts_last = nel->ts_jiffies;
 | ||
| +		if (nel->pre.s_addr != nel->post.s_addr ||
 | ||
| +		    nel->pre.s_port != nel->post.s_port) {
 | ||
| +			nf.nh.ip = nel->post.s_addr;
 | ||
| +			nf.s_as  = nel->post.s_port;
 | ||
| +			nf.d_as  = 0;
 | ||
| +			netflow_export_flow(&nf);
 | ||
| +		}
 | ||
| +		if (nel->pre.d_addr != nel->post.d_addr ||
 | ||
| +		    nel->pre.d_port != nel->post.d_port) {
 | ||
| +			nf.nh.ip = nel->pre.d_addr;
 | ||
| +			nf.s_as  = 0;
 | ||
| +			nf.d_as  = nel->pre.d_port;
 | ||
| +			netflow_export_flow(&nf);
 | ||
| +		}
 | ||
| +	}
 | ||
| +	kfree(nel);
 | ||
|  }
 | ||
| +#endif /* CONFIG_NF_NAT_NEEDED */
 | ||
|  
 | ||
| -static inline int active_needs_export(struct ipt_netflow *nf, long a_timeout)
 | ||
| +static inline int active_needs_export(const struct ipt_netflow *nf, const long a_timeout)
 | ||
|  {
 | ||
|  	/* active too long, finishing, or having too much bytes */
 | ||
|  	return ((jiffies - nf->ts_first) > a_timeout) ||
 | ||
| @@ -1057,42 +1987,77 @@ static inline int active_needs_export(struct ipt_netflow *nf, long a_timeout)
 | ||
|  
 | ||
|  /* could be called with zero to flush cache and pdu */
 | ||
|  /* this function is guaranteed to be called non-concurrently */
 | ||
| -static void netflow_scan_and_export(int flush)
 | ||
| +/* return -1 is trylockfailed, 0 if nothin gexported, >=1 if exported something */
 | ||
| +static int netflow_scan_and_export(const int flush)
 | ||
|  {
 | ||
|  	long i_timeout = inactive_timeout * HZ;
 | ||
|  	long a_timeout = active_timeout * HZ;
 | ||
| +	int trylock_failed = 0;
 | ||
| +	int pdu_c = pdu_count;
 | ||
|  
 | ||
|  	if (flush)
 | ||
|  		i_timeout = 0;
 | ||
|  
 | ||
| -	spin_lock_bh(&ipt_netflow_lock);
 | ||
| -	while (!list_empty(&ipt_netflow_list)) {
 | ||
| +	local_bh_disable();
 | ||
| +	spin_lock(&hlist_lock);
 | ||
| +	/* This is different order of locking than elsewhere,
 | ||
| +	 * so we trylock&break to avoid deadlock. */
 | ||
| +
 | ||
| +	while (likely(!list_empty(&ipt_netflow_list))) {
 | ||
|  		struct ipt_netflow *nf;
 | ||
| -	       
 | ||
| +
 | ||
| +		/* Last entry, which is usually oldest. */
 | ||
|  		nf = list_entry(ipt_netflow_list.prev, struct ipt_netflow, list);
 | ||
| +		if (!spin_trylock(nf->lock)) {
 | ||
| +			trylock_failed = 1;
 | ||
| +			break;
 | ||
| +		}
 | ||
|  		/* Note: i_timeout checked with >= to allow specifying zero timeout
 | ||
|  		 * to purge all flows on module unload */
 | ||
|  		if (((jiffies - nf->ts_last) >= i_timeout) ||
 | ||
|  		    active_needs_export(nf, a_timeout)) {
 | ||
|  			hlist_del(&nf->hlist);
 | ||
| +			spin_unlock(nf->lock);
 | ||
| +
 | ||
|  			list_del(&nf->list);
 | ||
| +			spin_unlock(&hlist_lock);
 | ||
| +			local_bh_enable();
 | ||
| +
 | ||
|  			NETFLOW_STAT_ADD(pkt_out, nf->nr_packets);
 | ||
|  			NETFLOW_STAT_ADD(traf_out, nf->nr_bytes);
 | ||
| -			spin_unlock_bh(&ipt_netflow_lock);
 | ||
|  			netflow_export_flow(nf);
 | ||
| -			spin_lock_bh(&ipt_netflow_lock);
 | ||
| +
 | ||
| +			local_bh_disable();
 | ||
| +			spin_lock(&hlist_lock);
 | ||
|  		} else {
 | ||
| +			spin_unlock(nf->lock);
 | ||
|  			/* all flows which need to be exported is always at the tail
 | ||
|  			 * so if no more exportable flows we can break */
 | ||
|  			break;
 | ||
|  		}
 | ||
|  	}
 | ||
| -	spin_unlock_bh(&ipt_netflow_lock);
 | ||
| -
 | ||
| +	spin_unlock(&hlist_lock);
 | ||
| +	local_bh_enable();
 | ||
| +
 | ||
| +#ifdef CONFIG_NF_NAT_NEEDED
 | ||
| +	spin_lock_bh(&nat_lock);
 | ||
| +	while (!list_empty(&nat_list)) {
 | ||
| +		struct nat_event *nel;
 | ||
| +
 | ||
| +		nel = list_entry(nat_list.next, struct nat_event, list);
 | ||
| +		list_del(&nel->list);
 | ||
| +		spin_unlock_bh(&nat_lock);
 | ||
| +		export_nat_event(nel);
 | ||
| +		spin_lock_bh(&nat_lock);
 | ||
| +	}
 | ||
| +	spin_unlock_bh(&nat_lock);
 | ||
| +#endif
 | ||
|  	/* flush flows stored in pdu if there no new flows for too long */
 | ||
|  	/* Note: using >= to allow flow purge on zero timeout */
 | ||
|  	if ((jiffies - pdu_ts_mod) >= i_timeout)
 | ||
|  		netflow_export_pdu();
 | ||
| +
 | ||
| +	return trylock_failed? -1 : pdu_count - pdu_c;
 | ||
|  }
 | ||
|  
 | ||
|  #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20)
 | ||
| @@ -1101,8 +2066,10 @@ static void netflow_work_fn(void *dummy)
 | ||
|  static void netflow_work_fn(struct work_struct *dummy)
 | ||
|  #endif
 | ||
|  {
 | ||
| -	netflow_scan_and_export(0);
 | ||
| -	__start_scan_worker();
 | ||
| +	int status;
 | ||
| +
 | ||
| +	status = netflow_scan_and_export(DONT_FLUSH);
 | ||
| +	_schedule_scan_worker(status);
 | ||
|  }
 | ||
|  
 | ||
|  #define RATESHIFT 2
 | ||
| @@ -1154,7 +2121,7 @@ static void rate_timer_calc(unsigned long dummy)
 | ||
|  	old_found = found;
 | ||
|  	old_notfound = notfound;
 | ||
|  	/* if there is no access to hash keep rate steady */
 | ||
| -	metric = (dfnd + dnfnd)? 10 * (dsrch + dfnd + dnfnd) / (dfnd + dnfnd) : metric;
 | ||
| +	metric = (dfnd + dnfnd)? 100 * (dsrch + dfnd + dnfnd) / (dfnd + dnfnd) : metric;
 | ||
|  	CALC_RATE(min15_metric, (unsigned long long)metric, 15);
 | ||
|  	CALC_RATE(min5_metric, (unsigned long long)metric, 5);
 | ||
|  	CALC_RATE(min_metric, (unsigned long long)metric, 1);
 | ||
| @@ -1162,6 +2129,262 @@ static void rate_timer_calc(unsigned long dummy)
 | ||
|  	mod_timer(&rate_timer, jiffies + (HZ * SAMPLERATE));
 | ||
|  }
 | ||
|  
 | ||
| +#ifdef CONFIG_NF_NAT_NEEDED
 | ||
| +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,31)
 | ||
| +static struct nf_ct_event_notifier *saved_event_cb __read_mostly = NULL;
 | ||
| +static int netflow_conntrack_event(const unsigned int events, struct nf_ct_event *item)
 | ||
| +#else
 | ||
| +static int netflow_conntrack_event(struct notifier_block *this, unsigned long events, void *ptr)
 | ||
| +#endif
 | ||
| +{
 | ||
| +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,31)
 | ||
| +	struct nf_conn *ct = item->ct;
 | ||
| +#else
 | ||
| +	struct nf_conn *ct = (struct nf_conn *)ptr;
 | ||
| +#endif
 | ||
| +	struct nat_event *nel;
 | ||
| +	const struct nf_conntrack_tuple *t;
 | ||
| +	int ret = NOTIFY_DONE;
 | ||
| +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,31)
 | ||
| +	struct nf_ct_event_notifier *notifier;
 | ||
| +
 | ||
| +	/* Call netlink first. */
 | ||
| +	notifier = rcu_dereference(saved_event_cb);
 | ||
| +	if (likely(notifier))
 | ||
| +		ret = notifier->fcn(events, item);
 | ||
| +#endif
 | ||
| +	if (unlikely(!natevents))
 | ||
| +		return ret;
 | ||
| +
 | ||
| +	if (!(events & ((1 << IPCT_NEW) | (1 << IPCT_RELATED) | (1 << IPCT_DESTROY))))
 | ||
| +		return ret;
 | ||
| +
 | ||
| +	if (!(ct->status & IPS_NAT_MASK))
 | ||
| +		return ret;
 | ||
| +
 | ||
| +	if (unlikely(ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple.src.l3num != AF_INET ||
 | ||
| +		    ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.l3num != AF_INET)) {
 | ||
| +		/* Well, there is no linux NAT for IPv6 anyway. */
 | ||
| +		return ret;
 | ||
| +	}
 | ||
| +
 | ||
| +	if (!(nel = kmalloc(sizeof(struct nat_event), GFP_ATOMIC))) {
 | ||
| +		printk(KERN_ERR "ipt_NETFLOW: can't kmalloc nat event\n");
 | ||
| +		return ret;
 | ||
| +	}
 | ||
| +	memset(nel, 0, sizeof(struct nat_event));
 | ||
| +	nel->ts_ktime = ktime_get_real();
 | ||
| +	nel->ts_jiffies = jiffies;
 | ||
| +	t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
 | ||
| +	nel->protocol = t->dst.protonum;
 | ||
| +	nel->pre.s_addr = t->src.u3.ip;
 | ||
| +	nel->pre.d_addr = t->dst.u3.ip;
 | ||
| +	nel->pre.s_port = t->src.u.all;
 | ||
| +	nel->pre.d_port = t->dst.u.all;
 | ||
| +	t = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
 | ||
| +	/* reply is reversed */
 | ||
| +	nel->post.s_addr = t->dst.u3.ip;
 | ||
| +	nel->post.d_addr = t->src.u3.ip;
 | ||
| +	nel->post.s_port = t->dst.u.all;
 | ||
| +	nel->post.d_port = t->src.u.all;
 | ||
| +	if (events & (1 << IPCT_DESTROY)) {
 | ||
| +		nel->nat_event = NAT_DESTROY;
 | ||
| +		nat_events_stop++;
 | ||
| +	} else {
 | ||
| +		nel->nat_event = NAT_CREATE;
 | ||
| +		nat_events_start++;
 | ||
| +	}
 | ||
| +
 | ||
| +	spin_lock_bh(&nat_lock);
 | ||
| +	list_add_tail(&nel->list, &nat_list);
 | ||
| +	spin_unlock_bh(&nat_lock);
 | ||
| +
 | ||
| +	return ret;
 | ||
| +}
 | ||
| +
 | ||
| +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,31)
 | ||
| +static struct notifier_block ctnl_notifier = {
 | ||
| +	.notifier_call = netflow_conntrack_event
 | ||
| +};
 | ||
| +#else
 | ||
| +static struct nf_ct_event_notifier ctnl_notifier = {
 | ||
| +	.fcn = netflow_conntrack_event
 | ||
| +};
 | ||
| +#endif /* since 2.6.31 */
 | ||
| +#endif /* CONFIG_NF_NAT_NEEDED */
 | ||
| +
 | ||
| +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23) && \
 | ||
| +    LINUX_VERSION_CODE < KERNEL_VERSION(2,6,35)
 | ||
| +static bool
 | ||
| +#else
 | ||
| +static int
 | ||
| +#endif
 | ||
| +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28)
 | ||
| +netflow_target_check(const char *tablename, const void *entry, const struct xt_target *target,
 | ||
| +    void *targinfo,
 | ||
| +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,18)
 | ||
| +    unsigned int targinfosize,
 | ||
| +#endif
 | ||
| +    unsigned int hook_mask)
 | ||
| +{
 | ||
| +#else
 | ||
| +netflow_target_check(const struct xt_tgchk_param *par)
 | ||
| +{
 | ||
| +	const char *tablename = par->table;
 | ||
| +	const struct xt_target *target = par->target;
 | ||
| +#endif
 | ||
| +	if (strcmp("nat", tablename) == 0) {
 | ||
| +		/* In the nat table we only see single packet per flow, which is useless. */
 | ||
| +		printk(KERN_ERR "%s target: is not valid in %s table\n", target->name, tablename);
 | ||
| +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,35)
 | ||
| +#define CHECK_FAIL	0
 | ||
| +#define CHECK_OK	1
 | ||
| +#else
 | ||
| +#define CHECK_FAIL	-EINVAL
 | ||
| +#define CHECK_OK	0
 | ||
| +#endif
 | ||
| +		return CHECK_FAIL;
 | ||
| +	}
 | ||
| +	if (target->family == AF_INET6 && protocol == 5) {
 | ||
| +		printk(KERN_ERR "ip6tables NETFLOW target is meaningful for protocol 9 or 10 only.\n");
 | ||
| +		return CHECK_FAIL;
 | ||
| +	}
 | ||
| +	return CHECK_OK;
 | ||
| +}
 | ||
| +
 | ||
| +#define SetXBit(x) (0x8000 >> (x)) /* Proper bit for htons later. */
 | ||
| +#ifndef IPPROTO_MH
 | ||
| +#define IPPROTO_MH	135
 | ||
| +#endif
 | ||
| +static inline __u16 observed_hdrs(const __u8 currenthdr)
 | ||
| +{
 | ||
| +	switch (currenthdr) {
 | ||
| +	case IPPROTO_TCP:
 | ||
| +	case IPPROTO_UDP:
 | ||
| +		/* For speed, in case switch is not optimized. */
 | ||
| +		return 0;
 | ||
| +	case IPPROTO_DSTOPTS:  return SetXBit(0);
 | ||
| +	case IPPROTO_HOPOPTS:  return SetXBit(1);
 | ||
| +	case IPPROTO_ROUTING:  return SetXBit(5);
 | ||
| +	case IPPROTO_MH:       return SetXBit(12);
 | ||
| +	case IPPROTO_ESP:      return SetXBit(13);
 | ||
| +	case IPPROTO_AH:       return SetXBit(14);
 | ||
| +	case IPPROTO_COMP:     return SetXBit(15);
 | ||
| +	case IPPROTO_FRAGMENT: /* Handled elsewhere. */
 | ||
| +		/* Next is known headers. */
 | ||
| +	case IPPROTO_ICMPV6:
 | ||
| +	case IPPROTO_UDPLITE:
 | ||
| +	case IPPROTO_IPIP:
 | ||
| +	case IPPROTO_PIM:
 | ||
| +	case IPPROTO_GRE:
 | ||
| +	case IPPROTO_SCTP:
 | ||
| +#ifdef IPPROTO_L2TP
 | ||
| +	case IPPROTO_L2TP:
 | ||
| +#endif
 | ||
| +	case IPPROTO_DCCP:
 | ||
| +	       return 0;
 | ||
| +	}
 | ||
| +	return SetXBit(3); /* Unknown header. */
 | ||
| +}
 | ||
| +
 | ||
| +/* http://www.iana.org/assignments/ip-parameters/ip-parameters.xhtml */
 | ||
| +static const __u8 ip4_opt_table[] = {
 | ||
| +	[7]	= 0,	/* RR */ /* parsed manually becasue of 0 */
 | ||
| +	[134]	= 1,	/* CIPSO */
 | ||
| +	[133]	= 2,	/* E-SEC */
 | ||
| +	[68]	= 3,	/* TS */
 | ||
| +	[131]	= 4,	/* LSR */
 | ||
| +	[130]	= 5,	/* SEC */
 | ||
| +	[1]	= 6,	/* NOP */
 | ||
| +	[0]	= 7,	/* EOOL */
 | ||
| +	[15]	= 8,	/* ENCODE */
 | ||
| +	[142]	= 9,	/* VISA */
 | ||
| +	[205]	= 10,	/* FINN */
 | ||
| +	[12]	= 11,	/* MTUR */
 | ||
| +	[11]	= 12,	/* MTUP */
 | ||
| +	[10]	= 13,	/* ZSU */
 | ||
| +	[137]	= 14,	/* SSR */
 | ||
| +	[136]	= 15,	/* SID */
 | ||
| +	[151]	= 16,	/* DPS */
 | ||
| +	[150]	= 17,	/* NSAPA */
 | ||
| +	[149]	= 18,	/* SDB */
 | ||
| +	[147]	= 19,	/* ADDEXT */
 | ||
| +	[148]	= 20,	/* RTRALT */
 | ||
| +	[82]	= 21,	/* TR */
 | ||
| +	[145]	= 22,	/* EIP */
 | ||
| +	[144]	= 23,	/* IMITD */
 | ||
| +	[30]	= 25,	/* EXP */
 | ||
| +	[94]	= 25,	/* EXP */
 | ||
| +	[158]	= 25,	/* EXP */
 | ||
| +	[222]	= 25,	/* EXP */
 | ||
| +	[25]	= 30,	/* QS */
 | ||
| +	[152]	= 31,	/* UMP */
 | ||
| +};
 | ||
| +/* Parse IPv4 Options array int ipv4Options IPFIX value. */
 | ||
| +static inline __u32 ip4_options(const u_int8_t *p, const unsigned int optsize)
 | ||
| +{
 | ||
| +	__u32 ret = 0;
 | ||
| +	unsigned int i;
 | ||
| +
 | ||
| +	for (i = 0; likely(i < optsize); ) {
 | ||
| +		u_int8_t op = p[i++];
 | ||
| +
 | ||
| +		if (op == 7) /* RR: bit 0 */
 | ||
| +			ret |= 1;
 | ||
| +		else if (likely(op < ARRAY_SIZE(ip4_opt_table))) {
 | ||
| +			/* Btw, IANA doc is messed up in a crazy way:
 | ||
| +			 *   http://www.ietf.org/mail-archive/web/ipfix/current/msg06008.html (2011)
 | ||
| +			 * I decided to follow IANA _text_ description from
 | ||
| +			 *   http://www.iana.org/assignments/ipfix/ipfix.xhtml (2013-09-18)
 | ||
| +			 *
 | ||
| +			 * Set proper bit for htonl later. */
 | ||
| +			if (ip4_opt_table[op])
 | ||
| +				ret |= 1 << (32 - ip4_opt_table[op]);
 | ||
| +		}
 | ||
| +		if (likely(i >= optsize || op == 0))
 | ||
| +			break;
 | ||
| +		else if (unlikely(op == 1))
 | ||
| +			continue;
 | ||
| +		else if (unlikely(p[i] < 2))
 | ||
| +			break;
 | ||
| +		else
 | ||
| +			i += p[i] - 1;
 | ||
| +	}
 | ||
| +	return ret;
 | ||
| +}
 | ||
| +
 | ||
| +#define TCPHDR_MAXSIZE (4 * 15)
 | ||
| +/* List of options: http://www.iana.org/assignments/tcp-parameters/tcp-parameters.xhtml */
 | ||
| +static inline __u32 tcp_options(const struct sk_buff *skb, const unsigned int ptr, const struct tcphdr *th)
 | ||
| +{
 | ||
| +	const unsigned int optsize = th->doff * 4 - sizeof(struct tcphdr);
 | ||
| +	__u8 _opt[TCPHDR_MAXSIZE];
 | ||
| +	const u_int8_t *p;
 | ||
| +	__u32 ret;
 | ||
| +	unsigned int i;
 | ||
| +
 | ||
| +	p = skb_header_pointer(skb, ptr + sizeof(struct tcphdr), optsize, _opt);
 | ||
| +	if (unlikely(!p))
 | ||
| +		return 0;
 | ||
| +	ret = 0;
 | ||
| +	for (i = 0; likely(i < optsize); ) {
 | ||
| +		u_int8_t opt = p[i++];
 | ||
| +
 | ||
| +		if (likely(opt < 32)) {
 | ||
| +			/* IANA doc is messed up, see above. */
 | ||
| +			ret |= 1 << (32 - opt);
 | ||
| +		}
 | ||
| +		if (likely(i >= optsize || opt == 0))
 | ||
| +			break;
 | ||
| +		else if (unlikely(opt == 1))
 | ||
| +			continue;
 | ||
| +		else if (unlikely(p[i] < 2)) /* "silly options" */
 | ||
| +			break;
 | ||
| +		else
 | ||
| +			i += p[i] - 1;
 | ||
| +	}
 | ||
| +	return ret;
 | ||
| +}
 | ||
|  /* packet receiver */
 | ||
|  static unsigned int netflow_target(
 | ||
|  #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24)
 | ||
| @@ -1192,27 +2415,38 @@ static unsigned int netflow_target(
 | ||
|  		)
 | ||
|  {
 | ||
|  #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24)
 | ||
| -	struct sk_buff *skb = *pskb;
 | ||
| +	const struct sk_buff *skb = *pskb;
 | ||
| +#endif
 | ||
| +	union {
 | ||
| +		struct iphdr ip;
 | ||
| +		struct ipv6hdr ip6;
 | ||
| +	} _iph, *iph;
 | ||
| +	unsigned int hash;
 | ||
| +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28)
 | ||
| +	const int family = target->family;
 | ||
| +#else
 | ||
| +	const int family = par->family;
 | ||
|  #endif
 | ||
| -	struct iphdr _iph, *iph;
 | ||
|  	struct ipt_netflow_tuple tuple;
 | ||
|  	struct ipt_netflow *nf;
 | ||
|  	__u8 tcp_flags;
 | ||
|  	struct netflow_aggr_n *aggr_n;
 | ||
|  	struct netflow_aggr_p *aggr_p;
 | ||
|  	__u8 s_mask, d_mask;
 | ||
| -	unsigned int hash;
 | ||
| -
 | ||
| -	iph = skb_header_pointer(skb, 0, sizeof(_iph), &_iph); //iph = ip_hdr(skb);
 | ||
| -
 | ||
| -	if (iph == NULL) {
 | ||
| +	unsigned int ptr;
 | ||
| +	int fragment;
 | ||
| +	size_t pkt_len;
 | ||
| +	int options = 0;
 | ||
| +	int tcpoptions = 0;
 | ||
| +
 | ||
| +	iph = skb_header_pointer(skb, 0, (likely(family == AF_INET))? sizeof(_iph.ip) : sizeof(_iph.ip6), &iph);
 | ||
| +	if (unlikely(iph == NULL)) {
 | ||
|  		NETFLOW_STAT_INC(truncated);
 | ||
|  		NETFLOW_STAT_INC(pkt_drop);
 | ||
|  		return IPT_CONTINUE;
 | ||
|  	}
 | ||
|  
 | ||
| -	tuple.s_addr	= iph->saddr;
 | ||
| -	tuple.d_addr	= iph->daddr;
 | ||
| +	tuple.l3proto	= family;
 | ||
|  	tuple.s_port	= 0;
 | ||
|  	tuple.d_port	= 0;
 | ||
|  #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28)
 | ||
| @@ -1220,30 +2454,118 @@ static unsigned int netflow_target(
 | ||
|  #else
 | ||
|  	tuple.i_ifc	= par->in? par->in->ifindex : -1;
 | ||
|  #endif
 | ||
| -	tuple.protocol	= iph->protocol;
 | ||
| -	tuple.tos	= iph->tos;
 | ||
|  	tcp_flags	= 0; /* Cisco sometimes have TCP ACK for non TCP packets, don't get it */
 | ||
|  	s_mask		= 0;
 | ||
|  	d_mask		= 0;
 | ||
|  
 | ||
| -	if (iph->frag_off & htons(IP_OFFSET))
 | ||
| +	if (likely(family == AF_INET)) {
 | ||
| +		tuple.src	= (union nf_inet_addr){ .ip = iph->ip.saddr };
 | ||
| +		tuple.dst	= (union nf_inet_addr){ .ip = iph->ip.daddr };
 | ||
| +		tuple.tos	= iph->ip.tos;
 | ||
| +		tuple.protocol	= iph->ip.protocol;
 | ||
| +		fragment	= unlikely(iph->ip.frag_off & htons(IP_OFFSET));
 | ||
| +		ptr		= iph->ip.ihl * 4;
 | ||
| +		pkt_len		= ntohs(iph->ip.tot_len);
 | ||
| +
 | ||
| +#define IPHDR_MAXSIZE (4 * 15)
 | ||
| +		if (unlikely(iph->ip.ihl * 4 > sizeof(struct iphdr))) {
 | ||
| +			u_int8_t _opt[IPHDR_MAXSIZE - sizeof(struct iphdr)];
 | ||
| +			const u_int8_t *op;
 | ||
| +			unsigned int optsize = iph->ip.ihl * 4 - sizeof(struct iphdr);
 | ||
| +
 | ||
| +			op = skb_header_pointer(skb, sizeof(struct iphdr), optsize, _opt);
 | ||
| +			if (likely(op))
 | ||
| +				options = ip4_options(op, optsize);
 | ||
| +		}
 | ||
| +	} else {
 | ||
| +		__u8 currenthdr;
 | ||
| +
 | ||
| +		tuple.src.in6	= iph->ip6.saddr;
 | ||
| +		tuple.dst.in6	= iph->ip6.daddr;
 | ||
| +		tuple.tos	= iph->ip6.priority;
 | ||
| +		fragment	= 0;
 | ||
| +		ptr		= sizeof(struct ipv6hdr);
 | ||
| +		pkt_len		= ntohs(iph->ip6.payload_len) + sizeof(struct ipv6hdr);
 | ||
| +
 | ||
| +		currenthdr	= iph->ip6.nexthdr;
 | ||
| +		while (currenthdr != NEXTHDR_NONE && ipv6_ext_hdr(currenthdr)) {
 | ||
| +			struct ipv6_opt_hdr _hdr;
 | ||
| +			const struct ipv6_opt_hdr *hp;
 | ||
| +			unsigned int hdrlen = 0;
 | ||
| +
 | ||
| +			options |= observed_hdrs(currenthdr);
 | ||
| +			hp = skb_header_pointer(skb, ptr, sizeof(_hdr), &_hdr);
 | ||
| +			if (hp == NULL) {
 | ||
| +				/* We have src/dst, so must account something. */
 | ||
| +				tuple.protocol = currenthdr;
 | ||
| +				fragment = 3;
 | ||
| +				goto do_protocols;
 | ||
| +			}
 | ||
| +
 | ||
| +			switch (currenthdr) {
 | ||
| +			case IPPROTO_FRAGMENT: {
 | ||
| +				struct frag_hdr _fhdr;
 | ||
| +				const struct frag_hdr *fh;
 | ||
| +
 | ||
| +				fh = skb_header_pointer(skb, ptr, sizeof(_fhdr),
 | ||
| +						&_fhdr);
 | ||
| +				if (fh == NULL) {
 | ||
| +					tuple.protocol = currenthdr;
 | ||
| +					fragment = 2;
 | ||
| +					goto do_protocols;
 | ||
| +				}
 | ||
| +				fragment = 1;
 | ||
| +#define FRA0 SetXBit(4) /* Fragment header - first fragment */
 | ||
| +#define FRA1 SetXBit(6) /* Fragmentation header - not first fragment */
 | ||
| +				options |= (ntohs(fh->frag_off) & 0xFFF8)? FRA1 : FRA0;
 | ||
| +				hdrlen = 8;
 | ||
| +				break;
 | ||
| +			}
 | ||
| +			case IPPROTO_AH: {
 | ||
| +				struct ip_auth_hdr _hdr, *hp;
 | ||
| +
 | ||
| +				if (likely(hp = skb_header_pointer(skb, ptr, 8, &_hdr))) {
 | ||
| +					tuple.s_port = hp->spi >> 16;
 | ||
| +					tuple.d_port = hp->spi;
 | ||
| +				}
 | ||
| +				hdrlen = (hp->hdrlen + 2) << 2;
 | ||
| +				break;
 | ||
| +			}
 | ||
| +			default:
 | ||
| +				hdrlen = ipv6_optlen(hp);
 | ||
| +			}
 | ||
| +			currenthdr = hp->nexthdr;
 | ||
| +			ptr += hdrlen;
 | ||
| +		}
 | ||
| +		tuple.protocol	= currenthdr;
 | ||
| +		options |= observed_hdrs(currenthdr);
 | ||
| +	}
 | ||
| +
 | ||
| +do_protocols:
 | ||
| +	if (fragment) {
 | ||
| +		/* if conntrack is enabled it should defrag on pre-routing and local-out */
 | ||
|  		NETFLOW_STAT_INC(frags);
 | ||
| -	else {
 | ||
| +	} else {
 | ||
|  		switch (tuple.protocol) {
 | ||
|  		    case IPPROTO_TCP: {
 | ||
|  			struct tcphdr _hdr, *hp;
 | ||
|  
 | ||
| -			if ((hp = skb_header_pointer(skb, iph->ihl * 4, 14, &_hdr))) {
 | ||
| +			if (likely(hp = skb_header_pointer(skb, ptr, 14, &_hdr))) {
 | ||
|  				tuple.s_port = hp->source;
 | ||
|  				tuple.d_port = hp->dest;
 | ||
|  				tcp_flags = (u_int8_t)(ntohl(tcp_flag_word(hp)) >> 16);
 | ||
| +
 | ||
| +				if (unlikely(hp->doff * 4 > sizeof(struct tcphdr)))
 | ||
| +					tcpoptions = tcp_options(skb, ptr, hp);
 | ||
|  			}
 | ||
|  			break;
 | ||
|  		    }
 | ||
| -		    case IPPROTO_UDP: {
 | ||
| +		    case IPPROTO_UDP:
 | ||
| +		    case IPPROTO_UDPLITE:
 | ||
| +		    case IPPROTO_SCTP: {
 | ||
|  			struct udphdr _hdr, *hp;
 | ||
|  
 | ||
| -			if ((hp = skb_header_pointer(skb, iph->ihl * 4, 4, &_hdr))) {
 | ||
| +			if (likely(hp = skb_header_pointer(skb, ptr, 4, &_hdr))) {
 | ||
|  				tuple.s_port = hp->source;
 | ||
|  				tuple.d_port = hp->dest;
 | ||
|  			}
 | ||
| @@ -1252,72 +2574,111 @@ static unsigned int netflow_target(
 | ||
|  		    case IPPROTO_ICMP: {
 | ||
|  			struct icmphdr _hdr, *hp;
 | ||
|  
 | ||
| -			if ((hp = skb_header_pointer(skb, iph->ihl * 4, 2, &_hdr)))
 | ||
| -				tuple.d_port = (hp->type << 8) | hp->code;
 | ||
| +			if (likely(family == AF_INET &&
 | ||
| +				    (hp = skb_header_pointer(skb, ptr, 2, &_hdr))))
 | ||
| +				tuple.d_port = htons((hp->type << 8) | hp->code);
 | ||
|  			break;
 | ||
|  		    }
 | ||
| +		    case IPPROTO_ICMPV6: {
 | ||
| +			    struct icmp6hdr _icmp6h, *ic;
 | ||
| +
 | ||
| +			    if (likely(family == AF_INET6 &&
 | ||
| +					(ic = skb_header_pointer(skb, ptr, 2, &_icmp6h))))
 | ||
| +				    tuple.d_port = htons((ic->icmp6_type << 8) | ic->icmp6_code);
 | ||
| +			    break;
 | ||
| +		    }
 | ||
|  		    case IPPROTO_IGMP: {
 | ||
| -			struct igmphdr *_hdr, *hp;
 | ||
| +			struct igmphdr _hdr, *hp;
 | ||
|  
 | ||
| -			if ((hp = skb_header_pointer(skb, iph->ihl * 4, 1, &_hdr)))
 | ||
| +			if (likely(hp = skb_header_pointer(skb, ptr, 1, &_hdr)))
 | ||
|  				tuple.d_port = hp->type;
 | ||
|  			}
 | ||
|  			break;
 | ||
| +		    case IPPROTO_AH: { /* IPSEC */
 | ||
| +			struct ip_auth_hdr _hdr, *hp;
 | ||
| +
 | ||
| +			if (likely(family == AF_INET && /* For IPv6 it's parsed above. */
 | ||
| +				    (hp = skb_header_pointer(skb, ptr, 8, &_hdr)))) {
 | ||
| +				tuple.s_port = hp->spi >> 16;
 | ||
| +				tuple.d_port = hp->spi;
 | ||
| +			}
 | ||
| +			break;
 | ||
| +		    }
 | ||
| +		    case IPPROTO_ESP: {
 | ||
| +			struct ip_esp_hdr _hdr, *hp;
 | ||
| +
 | ||
| +			if (likely(hp = skb_header_pointer(skb, ptr, 4, &_hdr)))
 | ||
| +				tuple.s_port = hp->spi >> 16;
 | ||
| +				tuple.d_port = hp->spi;
 | ||
| +			}
 | ||
| +			break;
 | ||
|  	       	}
 | ||
|  	} /* not fragmented */
 | ||
|  
 | ||
|  	/* aggregate networks */
 | ||
|  	read_lock_bh(&aggr_lock);
 | ||
| -	list_for_each_entry(aggr_n, &aggr_n_list, list)
 | ||
| -		if ((ntohl(tuple.s_addr) & aggr_n->mask) == aggr_n->addr) {
 | ||
| -			tuple.s_addr &= htonl(aggr_n->aggr_mask);
 | ||
| -			s_mask = aggr_n->prefix;
 | ||
| -			break; 
 | ||
| -		}
 | ||
| -	list_for_each_entry(aggr_n, &aggr_n_list, list)
 | ||
| -		if ((ntohl(tuple.d_addr) & aggr_n->mask) == aggr_n->addr) {
 | ||
| -			tuple.d_addr &= htonl(aggr_n->aggr_mask);
 | ||
| -			d_mask = aggr_n->prefix;
 | ||
| -			break; 
 | ||
| -		}
 | ||
| +	if (family == AF_INET) {
 | ||
| +		list_for_each_entry(aggr_n, &aggr_n_list, list)
 | ||
| +			if (unlikely((ntohl(tuple.src.ip) & aggr_n->mask) == aggr_n->addr)) {
 | ||
| +				tuple.src.ip &= htonl(aggr_n->aggr_mask);
 | ||
| +				s_mask = aggr_n->prefix;
 | ||
| +				atomic_inc(&aggr_n->usage);
 | ||
| +				break;
 | ||
| +			}
 | ||
| +		list_for_each_entry(aggr_n, &aggr_n_list, list)
 | ||
| +			if (unlikely((ntohl(tuple.dst.ip) & aggr_n->mask) == aggr_n->addr)) {
 | ||
| +				tuple.dst.ip &= htonl(aggr_n->aggr_mask);
 | ||
| +				d_mask = aggr_n->prefix;
 | ||
| +				atomic_inc(&aggr_n->usage);
 | ||
| +				break;
 | ||
| +			}
 | ||
| +	}
 | ||
|  
 | ||
| -	/* aggregate ports */
 | ||
| -	list_for_each_entry(aggr_p, &aggr_p_list, list)
 | ||
| -		if (ntohs(tuple.s_port) >= aggr_p->port1 &&
 | ||
| -		    ntohs(tuple.s_port) <= aggr_p->port2) {
 | ||
| -			tuple.s_port = htons(aggr_p->aggr_port);
 | ||
| -			break;
 | ||
| -		}
 | ||
| +	if (tuple.protocol == IPPROTO_TCP ||
 | ||
| +	    tuple.protocol == IPPROTO_UDP ||
 | ||
| +	    tuple.protocol == IPPROTO_SCTP ||
 | ||
| +	    tuple.protocol == IPPROTO_UDPLITE) {
 | ||
| +		/* aggregate ports */
 | ||
| +		list_for_each_entry(aggr_p, &aggr_p_list, list)
 | ||
| +			if (unlikely(ntohs(tuple.s_port) >= aggr_p->port1 &&
 | ||
| +			    ntohs(tuple.s_port) <= aggr_p->port2)) {
 | ||
| +				tuple.s_port = htons(aggr_p->aggr_port);
 | ||
| +				atomic_inc(&aggr_p->usage);
 | ||
| +				break;
 | ||
| +			}
 | ||
|  
 | ||
| -	list_for_each_entry(aggr_p, &aggr_p_list, list)
 | ||
| -		if (ntohs(tuple.d_port) >= aggr_p->port1 &&
 | ||
| -		    ntohs(tuple.d_port) <= aggr_p->port2) {
 | ||
| -			tuple.d_port = htons(aggr_p->aggr_port);
 | ||
| -			break;
 | ||
| -		}
 | ||
| +		list_for_each_entry(aggr_p, &aggr_p_list, list)
 | ||
| +			if (unlikely(ntohs(tuple.d_port) >= aggr_p->port1 &&
 | ||
| +			    ntohs(tuple.d_port) <= aggr_p->port2)) {
 | ||
| +				tuple.d_port = htons(aggr_p->aggr_port);
 | ||
| +				atomic_inc(&aggr_p->usage);
 | ||
| +				break;
 | ||
| +			}
 | ||
| +	}
 | ||
|  	read_unlock_bh(&aggr_lock);
 | ||
|  
 | ||
|  	hash = hash_netflow(&tuple);
 | ||
| -	spin_lock_bh(&ipt_netflow_lock);
 | ||
| +	read_lock_bh(&htable_rwlock);
 | ||
| +	spin_lock(&htable_locks[hash & LOCK_COUNT_MASK]);
 | ||
|  	/* record */
 | ||
|  	nf = ipt_netflow_find(&tuple, hash);
 | ||
| -	if (!nf) {
 | ||
| -		if (maxflows > 0 && atomic_read(&ipt_netflow_count) >= maxflows) {
 | ||
| +	if (unlikely(!nf)) {
 | ||
| +		struct rtable *rt;
 | ||
| +
 | ||
| +		if (unlikely(maxflows > 0 && atomic_read(&ipt_netflow_count) >= maxflows)) {
 | ||
|  			/* This is DOS attack prevention */
 | ||
|  			NETFLOW_STAT_INC(maxflows_err);
 | ||
|  			NETFLOW_STAT_INC(pkt_drop);
 | ||
| -			NETFLOW_STAT_ADD(traf_drop, ntohs(iph->tot_len));
 | ||
| -			spin_unlock_bh(&ipt_netflow_lock);
 | ||
| -			return IPT_CONTINUE;
 | ||
| +			NETFLOW_STAT_ADD(traf_drop, pkt_len);
 | ||
| +			goto unlock_return;
 | ||
|  		}
 | ||
|  
 | ||
|  		nf = init_netflow(&tuple, skb, hash);
 | ||
| -		if (!nf || IS_ERR(nf)) {
 | ||
| +		if (unlikely(!nf || IS_ERR(nf))) {
 | ||
|  			NETFLOW_STAT_INC(alloc_err);
 | ||
|  			NETFLOW_STAT_INC(pkt_drop);
 | ||
| -			NETFLOW_STAT_ADD(traf_drop, ntohs(iph->tot_len));
 | ||
| -			spin_unlock_bh(&ipt_netflow_lock);
 | ||
| -			return IPT_CONTINUE;
 | ||
| +			NETFLOW_STAT_ADD(traf_drop, pkt_len);
 | ||
| +			goto unlock_return;
 | ||
|  		}
 | ||
|  
 | ||
|  		nf->ts_first = jiffies;
 | ||
| @@ -1330,31 +2691,68 @@ static unsigned int netflow_target(
 | ||
|  		nf->s_mask = s_mask;
 | ||
|  		nf->d_mask = d_mask;
 | ||
|  
 | ||
| -		if (debug > 2)
 | ||
| -			printk(KERN_INFO "ipt_netflow: new (%u) %hd:%hd SRC=%u.%u.%u.%u:%u DST=%u.%u.%u.%u:%u\n",
 | ||
| +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26)
 | ||
| +		rt = (struct rtable *)skb->dst;
 | ||
| +#else /* since 2.6.26 */
 | ||
| +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,31)
 | ||
| +		rt = skb->rtable;
 | ||
| +#else /* since 2.6.31 */
 | ||
| +		rt = skb_rtable(skb);
 | ||
| +#endif
 | ||
| +#endif
 | ||
| +		if (likely(family == AF_INET)) {
 | ||
| +			if (rt)
 | ||
| +				nf->nh.ip = rt->rt_gateway;
 | ||
| +		} else {
 | ||
| +			if (rt)
 | ||
| +				nf->nh.in6 = ((struct rt6_info *)rt)->rt6i_gateway;
 | ||
| +			nf->flow_label = (iph->ip6.flow_lbl[0] << 16) |
 | ||
| +			       	(iph->ip6.flow_lbl[1] << 8) | (iph->ip6.flow_lbl[2]);
 | ||
| +		}
 | ||
| +#if 0
 | ||
| +		if (unlikely(debug > 2))
 | ||
| +			printk(KERN_INFO "ipt_NETFLOW: new (%u) %hd:%hd SRC=%u.%u.%u.%u:%u DST=%u.%u.%u.%u:%u\n",
 | ||
|  			       atomic_read(&ipt_netflow_count),
 | ||
|  			       tuple.i_ifc, nf->o_ifc,
 | ||
|  			       NIPQUAD(tuple.s_addr), ntohs(tuple.s_port),
 | ||
|  			       NIPQUAD(tuple.d_addr), ntohs(tuple.d_port));
 | ||
| +#endif
 | ||
|  	} else {
 | ||
|  		/* ipt_netflow_list is sorted by access time:
 | ||
|  		 * most recently accessed flows are at head, old flows remain at tail
 | ||
|  		 * this function bubble up flow to the head */
 | ||
| +		spin_lock(&hlist_lock);
 | ||
|  		list_move(&nf->list, &ipt_netflow_list);
 | ||
| +		spin_unlock(&hlist_lock);
 | ||
|  	}
 | ||
|  
 | ||
| +#ifdef CONFIG_NF_CONNTRACK_MARK
 | ||
| +	{
 | ||
| +		struct nf_conn *ct;
 | ||
| +		enum ip_conntrack_info ctinfo;
 | ||
| +		ct = nf_ct_get(skb, &ctinfo);
 | ||
| +		if (ct)
 | ||
| +			nf->mark = ct->mark;
 | ||
| +	}
 | ||
| +#endif
 | ||
| +
 | ||
|  	nf->nr_packets++;
 | ||
| -	nf->nr_bytes += ntohs(iph->tot_len);
 | ||
| +	nf->nr_bytes += pkt_len;
 | ||
|  	nf->ts_last = jiffies;
 | ||
|  	nf->tcp_flags |= tcp_flags;
 | ||
| +	nf->options |= options;
 | ||
| +	if (tuple.protocol == IPPROTO_TCP)
 | ||
| +		nf->tcpoptions |= tcpoptions;
 | ||
|  
 | ||
|  	NETFLOW_STAT_INC(pkt_total);
 | ||
| -	NETFLOW_STAT_ADD(traf_total, ntohs(iph->tot_len));
 | ||
| +	NETFLOW_STAT_ADD(traf_total, pkt_len);
 | ||
|  
 | ||
| -	if (active_needs_export(nf, active_timeout * HZ)) {
 | ||
| +	if (likely(active_needs_export(nf, active_timeout * HZ))) {
 | ||
|  		/* ok, if this active flow to be exported
 | ||
|  		 * bubble it to the tail */
 | ||
| +		spin_lock(&hlist_lock);
 | ||
|  		list_move_tail(&nf->list, &ipt_netflow_list);
 | ||
| +		spin_unlock(&hlist_lock);
 | ||
|  
 | ||
|  		/* Blog: I thought about forcing timer to wake up sooner if we have
 | ||
|  		 * enough exportable flows, but in fact this doesn't have much sense,
 | ||
| @@ -1363,35 +2761,194 @@ static unsigned int netflow_target(
 | ||
|  		 * limited size). But yes, this is disputable. */
 | ||
|  	}
 | ||
|  
 | ||
| -	spin_unlock_bh(&ipt_netflow_lock);
 | ||
| +unlock_return:
 | ||
| +	spin_unlock(&htable_locks[hash & LOCK_COUNT_MASK]);
 | ||
| +	read_unlock_bh(&htable_rwlock);
 | ||
|  
 | ||
|  	return IPT_CONTINUE;
 | ||
|  }
 | ||
|  
 | ||
| -static struct ipt_target ipt_netflow_reg = {
 | ||
| -	.name		= "NETFLOW",
 | ||
| -	.target		= netflow_target,
 | ||
| -	.family		= AF_INET,
 | ||
| -#ifndef RAW_PROMISC_HACK
 | ||
| -	.table		= "filter",
 | ||
| -#ifndef NF_IP_LOCAL_IN /* 2.6.25 */
 | ||
| -	.hooks		= (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_FORWARD) |
 | ||
| -				(1 << NF_INET_LOCAL_OUT),
 | ||
| -#else
 | ||
| -	.hooks		= (1 << NF_IP_LOCAL_IN) | (1 << NF_IP_FORWARD) |
 | ||
| -				(1 << NF_IP_LOCAL_OUT),
 | ||
| -#endif /* NF_IP_LOCAL_IN */
 | ||
| +#ifdef CONFIG_NF_NAT_NEEDED
 | ||
| +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,31)
 | ||
| +	/* Below 2.6.31 we don't need to handle callback chain manually. */
 | ||
| +
 | ||
| +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,2,0)
 | ||
| +#define NET_STRUCT struct net *net
 | ||
| +#define NET_ARG net,
 | ||
| +#define nf_conntrack_event_cb net->ct.nf_conntrack_event_cb
 | ||
|  #else
 | ||
| -	.table          = "raw",
 | ||
| -#ifndef NF_IP_LOCAL_IN
 | ||
| -	.hooks          = (1 << NF_INET_LOCAL_IN) | (1 << NF_INET_FORWARD) |
 | ||
| -				(1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_PRE_ROUTING),
 | ||
| +#define NET_STRUCT void
 | ||
| +#define NET_ARG
 | ||
| +#endif
 | ||
| +static int set_notifier_cb(NET_STRUCT)
 | ||
| +{
 | ||
| +	struct nf_ct_event_notifier *notifier;
 | ||
| +
 | ||
| +	notifier = rcu_dereference(nf_conntrack_event_cb);
 | ||
| +	if (notifier == NULL) {
 | ||
| +		/* Polite mode. */
 | ||
| +		nf_conntrack_register_notifier(NET_ARG &ctnl_notifier);
 | ||
| +	} else if (notifier != &ctnl_notifier) {
 | ||
| +		if (!saved_event_cb)
 | ||
| +			saved_event_cb = notifier;
 | ||
| +		else if (saved_event_cb != notifier)
 | ||
| +			printk(KERN_ERR "natevents_net_init: %p != %p (report error.)\n",
 | ||
| +			    saved_event_cb, notifier);
 | ||
| +		rcu_assign_pointer(nf_conntrack_event_cb, &ctnl_notifier);
 | ||
| +	} else
 | ||
| +		printk(KERN_ERR "ipt_NETFLOW: natevents already enabled.\n");
 | ||
| +	return 0;
 | ||
| +}
 | ||
| +static void unset_notifier_cb(NET_STRUCT)
 | ||
| +{
 | ||
| +	struct nf_ct_event_notifier *notifier;
 | ||
| +
 | ||
| +	notifier = rcu_dereference(nf_conntrack_event_cb);
 | ||
| +	if (notifier == &ctnl_notifier) {
 | ||
| +		if (saved_event_cb == NULL)
 | ||
| +			nf_conntrack_unregister_notifier(NET_ARG &ctnl_notifier);
 | ||
| +		else
 | ||
| +			rcu_assign_pointer(nf_conntrack_event_cb, saved_event_cb);
 | ||
| +	} else
 | ||
| +		printk(KERN_ERR "ipt_NETFLOW: natevents already disabled.\n");
 | ||
| +}
 | ||
| +
 | ||
| +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,2,0)
 | ||
| +#undef nf_conntrack_event_cb
 | ||
| +static struct pernet_operations natevents_net_ops = {
 | ||
| +	.init = set_notifier_cb,
 | ||
| +	.exit = unset_notifier_cb
 | ||
| +};
 | ||
| +#endif
 | ||
| +#endif /* since 2.6.31 */
 | ||
| +
 | ||
| +static DEFINE_MUTEX(events_lock);
 | ||
| +/* Both functions may be called multiple times. */
 | ||
| +static void register_ct_events(void)
 | ||
| +{
 | ||
| +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,31)
 | ||
| +#define NETLINK_M "nf_conntrack_netlink"
 | ||
| +	struct module *netlink_m;
 | ||
| +	static int referenced = 0;
 | ||
| +#endif
 | ||
| +
 | ||
| +	printk(KERN_INFO "ipt_NETFLOW: enable natevents.\n");
 | ||
| +	mutex_lock(&events_lock);
 | ||
| +
 | ||
| +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,31)
 | ||
| +	/* Pre-load netlink module who will be first notifier
 | ||
| +	 * user, and then hijack nf_conntrack_event_cb from it. */
 | ||
| +	if (
 | ||
| +#if LINUX_VERSION_CODE < KERNEL_VERSION(3,2,0)
 | ||
| +	    !rcu_dereference(nf_conntrack_event_cb) ||
 | ||
| +#endif
 | ||
| +	    !(netlink_m = find_module(NETLINK_M))) {
 | ||
| +		printk("Loading " NETLINK_M "\n");
 | ||
| +		request_module(NETLINK_M);
 | ||
| +	}
 | ||
| +	/* Reference netlink module to prevent it's unsafe unload before us. */
 | ||
| +	if (!referenced && (netlink_m = find_module(NETLINK_M))) {
 | ||
| +		referenced++;
 | ||
| +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35)
 | ||
| +#define use_module ref_module
 | ||
| +#endif
 | ||
| +		use_module(THIS_MODULE, netlink_m);
 | ||
| +	}
 | ||
| +
 | ||
| +	/* Register ct events callback. */
 | ||
| +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,2,0)
 | ||
| +	register_pernet_subsys(&natevents_net_ops);
 | ||
|  #else
 | ||
| -	.hooks          = (1 << NF_IP_LOCAL_IN) | (1 << NF_IP_FORWARD) |
 | ||
| -				(1 << NF_IP_LOCAL_OUT) | (1 << NF_IP_PRE_ROUTING),
 | ||
| -#endif /* NF_IP_LOCAL_IN */
 | ||
| -#endif /* !RAW_PROMISC_HACK */
 | ||
| -	.me		= THIS_MODULE
 | ||
| +	set_notifier_cb();
 | ||
| +#endif
 | ||
| +#else /* below v2.6.31 */
 | ||
| +	if (!natevents && nf_conntrack_register_notifier(&ctnl_notifier) < 0)
 | ||
| +		printk(KERN_ERR "Can't register conntrack notifier, natevents disabled.\n");
 | ||
| +	else
 | ||
| +#endif
 | ||
| +	natevents = 1;
 | ||
| +	mutex_unlock(&events_lock);
 | ||
| +}
 | ||
| +
 | ||
| +static void unregister_ct_events(void)
 | ||
| +{
 | ||
| +	printk(KERN_INFO "ipt_NETFLOW: disable natevents.\n");
 | ||
| +	mutex_lock(&events_lock);
 | ||
| +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,31)
 | ||
| +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,2,0)
 | ||
| +	unregister_pernet_subsys(&natevents_net_ops);
 | ||
| +#else /* < v3.2 */
 | ||
| +	unset_notifier_cb();
 | ||
| +#endif /* v3.2 */
 | ||
| +	rcu_assign_pointer(saved_event_cb, NULL);
 | ||
| +#else /* < v2.6.31 */
 | ||
| +	nf_conntrack_unregister_notifier(&ctnl_notifier);
 | ||
| +#endif
 | ||
| +	natevents = 0;
 | ||
| +	mutex_unlock(&events_lock);
 | ||
| +}
 | ||
| +#endif /* CONFIG_NF_NAT_NEEDED */
 | ||
| +
 | ||
| +#ifndef NF_IP_LOCAL_IN /* 2.6.25 */
 | ||
| +#define NF_IP_PRE_ROUTING	NF_INET_PRE_ROUTING
 | ||
| +#define NF_IP_LOCAL_IN		NF_INET_LOCAL_IN
 | ||
| +#define NF_IP_FORWARD		NF_INET_FORWARD
 | ||
| +#define NF_IP_LOCAL_OUT		NF_INET_LOCAL_OUT
 | ||
| +#define NF_IP_POST_ROUTING	NF_INET_POST_ROUTING
 | ||
| +#endif
 | ||
| +
 | ||
| +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19)
 | ||
| +/* net/netfilter/x_tables.c */
 | ||
| +static void xt_unregister_targets(struct xt_target *target, unsigned int n)
 | ||
| +{
 | ||
| +	unsigned int i;
 | ||
| +
 | ||
| +	for (i = 0; i < n; i++)
 | ||
| +		xt_unregister_target(&target[i]);
 | ||
| +}
 | ||
| +static int xt_register_targets(struct xt_target *target, unsigned int n)
 | ||
| +{
 | ||
| +	unsigned int i;
 | ||
| +
 | ||
| +	int err = 0;
 | ||
| +	for (i = 0; i < n; i++)
 | ||
| +		if ((err = xt_register_target(&target[i])))
 | ||
| +			goto err;
 | ||
| +	return err;
 | ||
| +err:
 | ||
| +	if (i > 0)
 | ||
| +		xt_unregister_targets(target, i);
 | ||
| +	return err;
 | ||
| +}
 | ||
| +#endif
 | ||
| +
 | ||
| +static struct ipt_target ipt_netflow_reg[] __read_mostly = {
 | ||
| +	{
 | ||
| +		.name		= "NETFLOW",
 | ||
| +		.target		= netflow_target,
 | ||
| +		.checkentry	= netflow_target_check,
 | ||
| +		.family		= AF_INET,
 | ||
| +		.hooks		=
 | ||
| +		       	(1 << NF_IP_PRE_ROUTING) |
 | ||
| +		       	(1 << NF_IP_LOCAL_IN) |
 | ||
| +		       	(1 << NF_IP_FORWARD) |
 | ||
| +			(1 << NF_IP_LOCAL_OUT) |
 | ||
| +			(1 << NF_IP_POST_ROUTING),
 | ||
| +		.me		= THIS_MODULE
 | ||
| +	},
 | ||
| +	{
 | ||
| +		.name		= "NETFLOW",
 | ||
| +		.target		= netflow_target,
 | ||
| +		.checkentry	= netflow_target_check,
 | ||
| +		.family		= AF_INET6,
 | ||
| +		.hooks		=
 | ||
| +		       	(1 << NF_IP_PRE_ROUTING) |
 | ||
| +		       	(1 << NF_IP_LOCAL_IN) |
 | ||
| +		       	(1 << NF_IP_FORWARD) |
 | ||
| +			(1 << NF_IP_LOCAL_OUT) |
 | ||
| +			(1 << NF_IP_POST_ROUTING),
 | ||
| +		.me		= THIS_MODULE
 | ||
| +	},
 | ||
|  };
 | ||
|  
 | ||
|  static int __init ipt_netflow_init(void)
 | ||
| @@ -1399,11 +2956,16 @@ static int __init ipt_netflow_init(void)
 | ||
|  #ifdef CONFIG_PROC_FS
 | ||
|  	struct proc_dir_entry *proc_stat;
 | ||
|  #endif
 | ||
| +	printk(KERN_INFO "ipt_NETFLOW version %s, srcversion %s\n",
 | ||
| +		IPT_NETFLOW_VERSION, THIS_MODULE->srcversion);
 | ||
|  
 | ||
|  	get_random_bytes(&ipt_netflow_hash_rnd, 4);
 | ||
|  
 | ||
|  	/* determine hash size (idea from nf_conntrack_core.c) */
 | ||
|  	if (!hashsize) {
 | ||
| +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,11,0)
 | ||
| +#define num_physpages totalram_pages
 | ||
| +#endif
 | ||
|  		hashsize = (((num_physpages << PAGE_SHIFT) / 16384)
 | ||
|  					 / sizeof(struct hlist_head));
 | ||
|  		if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE))
 | ||
| @@ -1411,8 +2973,7 @@ static int __init ipt_netflow_init(void)
 | ||
|  	}
 | ||
|  	if (hashsize < 16)
 | ||
|  		hashsize = 16;
 | ||
| -	printk(KERN_INFO "ipt_netflow version %s (%u buckets)\n",
 | ||
| -		IPT_NETFLOW_VERSION, hashsize);
 | ||
| +	printk(KERN_INFO "ipt_NETFLOW: hashsize %u\n", hashsize);
 | ||
|  
 | ||
|  	ipt_netflow_hash_size = hashsize;
 | ||
|  	ipt_netflow_hash = alloc_hashtable(ipt_netflow_hash_size);
 | ||
| @@ -1434,12 +2995,18 @@ static int __init ipt_netflow_init(void)
 | ||
|  	}
 | ||
|  
 | ||
|  #ifdef CONFIG_PROC_FS
 | ||
| +#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0)
 | ||
|  	proc_stat = create_proc_entry("ipt_netflow", S_IRUGO, INIT_NET(proc_net_stat));
 | ||
| +#else
 | ||
| +	proc_stat = proc_create("ipt_netflow", S_IRUGO, INIT_NET(proc_net_stat), &nf_seq_fops);
 | ||
| +#endif
 | ||
|  	if (!proc_stat) {
 | ||
|  		printk(KERN_ERR "Unable to create /proc/net/stat/ipt_netflow entry\n");
 | ||
|  		goto err_free_netflow_slab;
 | ||
|  	}
 | ||
| +#if LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0)
 | ||
|  	proc_stat->proc_fops = &nf_seq_fops;
 | ||
| +#endif
 | ||
|  #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,30)
 | ||
|  	proc_stat->owner = THIS_MODULE;
 | ||
|  #endif
 | ||
| @@ -1480,21 +3047,28 @@ static int __init ipt_netflow_init(void)
 | ||
|  	}
 | ||
|  	add_aggregation(aggregation);
 | ||
|  
 | ||
| -	__start_scan_worker();
 | ||
| +	netflow_switch_version(protocol);
 | ||
| +	_schedule_scan_worker(0);
 | ||
|  	setup_timer(&rate_timer, rate_timer_calc, 0);
 | ||
|  	mod_timer(&rate_timer, jiffies + (HZ * SAMPLERATE));
 | ||
|  
 | ||
| -	if (xt_register_target(&ipt_netflow_reg))
 | ||
| +	peakflows_at = jiffies;
 | ||
| +	if (xt_register_targets(ipt_netflow_reg, ARRAY_SIZE(ipt_netflow_reg)))
 | ||
|  		goto err_stop_timer;
 | ||
|  
 | ||
| -	peakflows_at = jiffies;
 | ||
| +#ifdef CONFIG_NF_NAT_NEEDED
 | ||
| +	if (natevents)
 | ||
| +		register_ct_events();
 | ||
| +#endif
 | ||
|  
 | ||
| -	printk(KERN_INFO "ipt_netflow loaded.\n");
 | ||
| +	printk(KERN_INFO "ipt_NETFLOW is loaded.\n");
 | ||
|  	return 0;
 | ||
|  
 | ||
|  err_stop_timer:
 | ||
| -	__stop_scan_worker();
 | ||
| +	_unschedule_scan_worker();
 | ||
| +	netflow_scan_and_export(AND_FLUSH);
 | ||
|  	del_timer_sync(&rate_timer);
 | ||
| +	free_templates();
 | ||
|  	destination_removeall();
 | ||
|  	aggregation_remove(&aggr_n_list);
 | ||
|  	aggregation_remove(&aggr_p_list);
 | ||
| @@ -1506,17 +3080,18 @@ err_free_proc_stat:
 | ||
|  #ifdef CONFIG_PROC_FS
 | ||
|  	remove_proc_entry("ipt_netflow", INIT_NET(proc_net_stat));
 | ||
|  err_free_netflow_slab:
 | ||
| -#endif  
 | ||
| +#endif
 | ||
|  	kmem_cache_destroy(ipt_netflow_cachep);
 | ||
|  err_free_hash:
 | ||
|  	vfree(ipt_netflow_hash);
 | ||
|  err:
 | ||
| +	printk(KERN_INFO "ipt_NETFLOW is not loaded.\n");
 | ||
|  	return -ENOMEM;
 | ||
|  }
 | ||
|  
 | ||
|  static void __exit ipt_netflow_fini(void)
 | ||
|  {
 | ||
| -	printk(KERN_INFO "ipt_netflow unloading..\n");
 | ||
| +	printk(KERN_INFO "ipt_NETFLOW unloading..\n");
 | ||
|  
 | ||
|  #ifdef CONFIG_SYSCTL
 | ||
|  	unregister_sysctl_table(netflow_sysctl_header);
 | ||
| @@ -1524,14 +3099,18 @@ static void __exit ipt_netflow_fini(void)
 | ||
|  #ifdef CONFIG_PROC_FS
 | ||
|  	remove_proc_entry("ipt_netflow", INIT_NET(proc_net_stat));
 | ||
|  #endif
 | ||
| -
 | ||
| -	xt_unregister_target(&ipt_netflow_reg);
 | ||
| -	__stop_scan_worker();
 | ||
| -	netflow_scan_and_export(1);
 | ||
| +	xt_unregister_targets(ipt_netflow_reg, ARRAY_SIZE(ipt_netflow_reg));
 | ||
| +#ifdef CONFIG_NF_NAT_NEEDED
 | ||
| +	if (natevents)
 | ||
| +		unregister_ct_events();
 | ||
| +#endif
 | ||
| +	_unschedule_scan_worker();
 | ||
| +	netflow_scan_and_export(AND_FLUSH);
 | ||
|  	del_timer_sync(&rate_timer);
 | ||
|  
 | ||
|  	synchronize_sched();
 | ||
|  
 | ||
| +	free_templates();
 | ||
|  	destination_removeall();
 | ||
|  	aggregation_remove(&aggr_n_list);
 | ||
|  	aggregation_remove(&aggr_p_list);
 | ||
| @@ -1539,7 +3118,7 @@ static void __exit ipt_netflow_fini(void)
 | ||
|  	kmem_cache_destroy(ipt_netflow_cachep);
 | ||
|  	vfree(ipt_netflow_hash);
 | ||
|  
 | ||
| -	printk(KERN_INFO "ipt_netflow unloaded.\n");
 | ||
| +	printk(KERN_INFO "ipt_NETFLOW unloaded.\n");
 | ||
|  }
 | ||
|  
 | ||
|  module_init(ipt_netflow_init);
 | ||
| diff --git a/ipt_NETFLOW.h b/ipt_NETFLOW.h
 | ||
| index 4a7b645..749f985 100644
 | ||
| --- a/ipt_NETFLOW.h
 | ||
| +++ b/ipt_NETFLOW.h
 | ||
| @@ -35,8 +35,8 @@ struct netflow5_record {
 | ||
|  	__be16		o_ifc;
 | ||
|  	__be32		nr_packets;
 | ||
|  	__be32		nr_octets;
 | ||
| -	__be32		ts_first;
 | ||
| -	__be32		ts_last;
 | ||
| +	__be32		first_ms;
 | ||
| +	__be32		last_ms;
 | ||
|  	__be16		s_port;
 | ||
|  	__be16		d_port;
 | ||
|  	__u8		reserved;
 | ||
| @@ -54,9 +54,9 @@ struct netflow5_record {
 | ||
|  struct netflow5_pdu {
 | ||
|  	__be16			version;
 | ||
|  	__be16			nr_records;
 | ||
| -	__be32			ts_uptime;
 | ||
| -	__be32			ts_usecs;
 | ||
| -	__be32			ts_unsecs;
 | ||
| +	__be32			ts_uptime; /* ms */
 | ||
| +	__be32			ts_usecs;  /* s  */
 | ||
| +	__be32			ts_unsecs; /* ns */
 | ||
|  	__be32			seq;
 | ||
|  	__u8			eng_type;
 | ||
|  	__u8			eng_id;
 | ||
| @@ -65,42 +65,185 @@ struct netflow5_pdu {
 | ||
|  } __attribute__ ((packed));
 | ||
|  #define NETFLOW5_HEADER_SIZE (sizeof(struct netflow5_pdu) - NETFLOW5_RECORDS_MAX * sizeof(struct netflow5_record))
 | ||
|  
 | ||
| +/* NetFlow v9 RFC http://www.ietf.org/rfc/rfc3954.txt */
 | ||
| +enum {
 | ||
| +	IN_BYTES = 1,
 | ||
| +	IN_PKTS = 2,
 | ||
| +	PROTOCOL = 4,
 | ||
| +	TOS = 5,
 | ||
| +	TCP_FLAGS = 6,
 | ||
| +	L4_SRC_PORT = 7,
 | ||
| +	IPV4_SRC_ADDR = 8,
 | ||
| +	SRC_MASK = 9,
 | ||
| +	INPUT_SNMP = 10,
 | ||
| +	L4_DST_PORT = 11,
 | ||
| +	IPV4_DST_ADDR = 12,
 | ||
| +	DST_MASK = 13,
 | ||
| +	OUTPUT_SNMP = 14,
 | ||
| +	IPV4_NEXT_HOP = 15,
 | ||
| +	//SRC_AS = 16,
 | ||
| +	//DST_AS = 17,
 | ||
| +	//BGP_IPV4_NEXT_HOP = 18,
 | ||
| +	//MUL_DST_PKTS = 19,
 | ||
| +	//MUL_DST_BYTES = 20,
 | ||
| +	LAST_SWITCHED = 21,
 | ||
| +	FIRST_SWITCHED = 22,
 | ||
| +	IPV6_SRC_ADDR = 27,
 | ||
| +	IPV6_DST_ADDR = 28,
 | ||
| +	IPV6_FLOW_LABEL = 31,
 | ||
| +	ICMP_TYPE = 32,
 | ||
| +	MUL_IGMP_TYPE = 33,
 | ||
| +	//TOTAL_BYTES_EXP = 40,
 | ||
| +	//TOTAL_PKTS_EXP = 41,
 | ||
| +	//TOTAL_FLOWS_EXP = 42,
 | ||
| +	IPV6_NEXT_HOP = 62,
 | ||
| +	IPV6_OPTION_HEADERS = 64,
 | ||
| +	commonPropertiesId = 137, /* for MARK */
 | ||
| +	ipv4Options = 208,
 | ||
| +	tcpOptions = 209,
 | ||
| +	postNATSourceIPv4Address = 225,
 | ||
| +	postNATDestinationIPv4Address = 226,
 | ||
| +	postNAPTSourceTransportPort = 227,
 | ||
| +	postNAPTDestinationTransportPort = 228,
 | ||
| +	natEvent = 230,
 | ||
| +	postNATSourceIPv6Address = 281,
 | ||
| +	postNATDestinationIPv6Address = 282,
 | ||
| +	IPSecSPI = 295,
 | ||
| +	observationTimeMilliseconds = 323,
 | ||
| +	observationTimeMicroseconds = 324,
 | ||
| +	observationTimeNanoseconds = 325,
 | ||
| +};
 | ||
| +
 | ||
| +enum {
 | ||
| +	FLOWSET_TEMPLATE = 0,
 | ||
| +	FLOWSET_OPTIONS = 1,
 | ||
| +	IPFIX_TEMPLATE = 2,
 | ||
| +	IPFIX_OPTIONS = 3,
 | ||
| +	FLOWSET_DATA_FIRST = 256,
 | ||
| +};
 | ||
| +
 | ||
| +struct flowset_template {
 | ||
| +	__be16	flowset_id;
 | ||
| +	__be16	length;
 | ||
| +	__be16	template_id;
 | ||
| +	__be16	field_count;
 | ||
| +} __attribute__ ((packed));
 | ||
| +
 | ||
| +struct flowset_data {
 | ||
| +	__be16	flowset_id;
 | ||
| +	__be16	length;
 | ||
| +} __attribute__ ((packed));
 | ||
| +
 | ||
| +/* NetFlow v9 packet. */
 | ||
| +struct netflow9_pdu {
 | ||
| +	__be16		version;
 | ||
| +	__be16		nr_records;
 | ||
| +	__be32		sys_uptime_ms;
 | ||
| +	__be32		export_time_s;
 | ||
| +	__be32		seq;
 | ||
| +	__be32		source_id; /* Exporter Observation Domain */
 | ||
| +	__u8		data[1400];
 | ||
| +} __attribute__ ((packed));
 | ||
| +
 | ||
| +/* IPFIX packet. */
 | ||
| +struct ipfix_pdu {
 | ||
| +	__be16		version;
 | ||
| +	__be16		length;
 | ||
| +	__be32		export_time_s;
 | ||
| +	__be32		seq;
 | ||
| +	__be32		odomain_id; /* Observation Domain ID */
 | ||
| +	__u8		data[1400];
 | ||
| +} __attribute__ ((packed));
 | ||
| +
 | ||
| +/* Maximum bytes flow can have, after it's reached flow will become
 | ||
| + * not searchable and will be exported soon. */
 | ||
| +#define FLOW_FULL_WATERMARK 0xffefffff
 | ||
| +
 | ||
| +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25)
 | ||
| +union nf_inet_addr {
 | ||
| +	__be32          ip;
 | ||
| +	__be32          ip6[4];
 | ||
| +	struct in_addr  in;
 | ||
| +	struct in6_addr in6;
 | ||
| +};
 | ||
| +#endif
 | ||
| +
 | ||
|  /* hashed data which identify unique flow */
 | ||
| +/* 16+16 + 2+2 + 2+1+1+1 = 41 */
 | ||
|  struct ipt_netflow_tuple {
 | ||
| -	__be32		s_addr;	// Network byte order
 | ||
| -	__be32		d_addr; // -"-
 | ||
| -	__be16		s_port; // -"-
 | ||
| +	union nf_inet_addr src;
 | ||
| +	union nf_inet_addr dst;
 | ||
| +	__be16		s_port; // Network byte order
 | ||
|  	__be16		d_port; // -"-
 | ||
| -	__be16		i_ifc;	// Local byte order
 | ||
| +	__u16		i_ifc;	// Host byte order
 | ||
|  	__u8		protocol;
 | ||
|  	__u8		tos;
 | ||
| +	__u8		l3proto;
 | ||
|  };
 | ||
| -/* tuple size is rounded to u32s */
 | ||
| -#define NETFLOW_TUPLE_SIZE (sizeof(struct ipt_netflow_tuple) / 4)
 | ||
| -
 | ||
| -/* maximum bytes flow can have, after it reached flow become not searchable and will be exported soon */
 | ||
| -#define FLOW_FULL_WATERMARK 0xffefffff
 | ||
|  
 | ||
| -/* flow entry */
 | ||
| +/* hlist[2] + tuple[]: 8+8 + 41 = 57 (less than usual cache line, 64) */
 | ||
|  struct ipt_netflow {
 | ||
|  	struct hlist_node hlist; // hashtable search chain
 | ||
| -	struct list_head list; // all flows chain
 | ||
|  
 | ||
|  	/* unique per flow data (hashed, NETFLOW_TUPLE_SIZE) */
 | ||
|  	struct ipt_netflow_tuple tuple;
 | ||
|  
 | ||
|  	/* volatile data */
 | ||
| -	__be16		o_ifc;
 | ||
| +	union nf_inet_addr nh;
 | ||
| +	__u16		o_ifc;
 | ||
|  	__u8		s_mask;
 | ||
|  	__u8		d_mask;
 | ||
| +	__u8		tcp_flags; /* `OR' of all tcp flags */
 | ||
|  
 | ||
|  	/* flow statistics */
 | ||
|  	u_int32_t	nr_packets;
 | ||
|  	u_int32_t	nr_bytes;
 | ||
| -	unsigned long	ts_first;
 | ||
| -	unsigned long	ts_last;
 | ||
| -	__u8		tcp_flags; /* `OR' of all tcp flags */
 | ||
| +	union {
 | ||
| +		struct {
 | ||
| +			unsigned long first;
 | ||
| +			unsigned long last;
 | ||
| +		} ts;
 | ||
| +		ktime_t	ts_obs;
 | ||
| +	} _ts_un;
 | ||
| +#define ts_first _ts_un.ts.first
 | ||
| +#define ts_last  _ts_un.ts.last
 | ||
| +#define ts_obs   _ts_un.ts_obs
 | ||
| +	u_int32_t	flow_label; /* IPv6 */
 | ||
| +	u_int32_t	options; /* IPv4(16) & IPv6(32) Options */
 | ||
| +	u_int32_t	tcpoptions;
 | ||
| +#ifdef CONFIG_NF_CONNTRACK_MARK
 | ||
| +	u_int32_t	mark; /* Exported as commonPropertiesId */
 | ||
| +#endif
 | ||
| +#ifdef CONFIG_NF_NAT_NEEDED
 | ||
| +	__be32		s_as;
 | ||
| +	__be32		d_as;
 | ||
| +	struct nat_event *nat;
 | ||
| +#endif
 | ||
| +	struct list_head list; // all flows chain
 | ||
| +	spinlock_t	*lock;
 | ||
| +};
 | ||
| +
 | ||
| +#ifdef CONFIG_NF_NAT_NEEDED
 | ||
| +enum {
 | ||
| +	NAT_CREATE, NAT_DESTROY, NAT_POOLEXHAUSTED
 | ||
|  };
 | ||
| +struct nat_event {
 | ||
| +	struct list_head list;
 | ||
| +	struct {
 | ||
| +		__be32	s_addr;
 | ||
| +		__be32	d_addr;
 | ||
| +		__be16	s_port;
 | ||
| +		__be16	d_port;
 | ||
| +	} pre, post;
 | ||
| +	ktime_t		ts_ktime;
 | ||
| +	unsigned long	ts_jiffies;
 | ||
| +	__u8	protocol;
 | ||
| +	__u8	nat_event;
 | ||
| +};
 | ||
| +#define IS_DUMMY_FLOW(nf) (nf->nat)
 | ||
| +#else
 | ||
| +#define IS_DUMMY_FLOW(nf) 0
 | ||
| +#endif
 | ||
|  
 | ||
|  static inline int ipt_netflow_tuple_equal(const struct ipt_netflow_tuple *t1,
 | ||
|  				    const struct ipt_netflow_tuple *t2)
 | ||
| @@ -115,11 +258,13 @@ struct ipt_netflow_sock {
 | ||
|  	unsigned short port;
 | ||
|  	atomic_t wmem_peak;	// sk_wmem_alloc peak value
 | ||
|  	atomic_t err_full;	// socket filled error
 | ||
| +	atomic_t err_connect;	// connect errors
 | ||
|  	atomic_t err_other;	// other socket errors
 | ||
|  };
 | ||
|  
 | ||
|  struct netflow_aggr_n {
 | ||
|  	struct list_head list;
 | ||
| +	atomic_t usage;
 | ||
|  	__u32 mask;
 | ||
|  	__u32 addr;
 | ||
|  	__u32 aggr_mask;
 | ||
| @@ -128,6 +273,7 @@ struct netflow_aggr_n {
 | ||
|  
 | ||
|  struct netflow_aggr_p {
 | ||
|  	struct list_head list;
 | ||
| +	atomic_t usage;
 | ||
|  	__u16 port1;
 | ||
|  	__u16 port2;
 | ||
|  	__u16 aggr_port;
 | ||
| diff --git a/libipt_NETFLOW.c b/libipt_NETFLOW.c
 | ||
| index d85b6d9..a0f9e5d 100644
 | ||
| --- a/libipt_NETFLOW.c
 | ||
| +++ b/libipt_NETFLOW.c
 | ||
| @@ -58,24 +58,24 @@
 | ||
|  #define _IPT_IP struct ipt_ip
 | ||
|  #endif
 | ||
|  
 | ||
| +#ifndef IPTABLES_VERSION
 | ||
| +#define IPTABLES_VERSION XTABLES_VERSION
 | ||
| +#endif
 | ||
| +
 | ||
|  static struct option opts[] = {
 | ||
| -  {0}
 | ||
| +  { 0 }
 | ||
|  };
 | ||
|  
 | ||
|  static void help(void)
 | ||
|  {
 | ||
| -	printf( "NETFLOW target\n");
 | ||
| +	printf("NETFLOW target\n");
 | ||
|  }
 | ||
|  
 | ||
| -//static int parse(int c, char **argv, int invert, unsigned int *flags,
 | ||
| -//      const _IPT_ENTRY *entry,
 | ||
| -//      struct ipt_entry_target **target)
 | ||
|  static int parse(int c, char **argv, int invert, unsigned int *flags,
 | ||
|  	     const _IPT_ENTRY  *entry,
 | ||
|  	     struct ipt_entry_target **targetinfo)
 | ||
|  
 | ||
|  {
 | ||
| -
 | ||
|  	return 1;
 | ||
|  }
 | ||
|  
 | ||
| @@ -95,16 +95,9 @@ static void print(const _IPT_IP *ip,
 | ||
|  }
 | ||
|  
 | ||
|  static struct iptables_target netflow = { 
 | ||
| -#ifdef MOD140
 | ||
| -	.family		= AF_INET,
 | ||
| -#endif
 | ||
|  	.next		= NULL,
 | ||
|  	.name		= "NETFLOW",
 | ||
| -#ifdef XTABLES_VERSION
 | ||
| -	.version	= XTABLES_VERSION,
 | ||
| -#else
 | ||
|  	.version	= IPTABLES_VERSION,
 | ||
| -#endif
 | ||
|  	.size           = IPT_ALIGN(0),
 | ||
|  	.userspacesize  = IPT_ALIGN(0),
 | ||
|  	.help		= &help,
 | ||
| diff --git a/murmur3.h b/murmur3.h
 | ||
| new file mode 100644
 | ||
| index 0000000..57a6006
 | ||
| --- /dev/null
 | ||
| +++ b/murmur3.h
 | ||
| @@ -0,0 +1,42 @@
 | ||
| +/* MurmurHash3, based on https://code.google.com/p/smhasher of Austin Appleby. */
 | ||
| +
 | ||
| +static __always_inline uint32_t rotl32(const uint32_t x, const int8_t r)
 | ||
| +{
 | ||
| +	return (x << r) | (x >> (32 - r));
 | ||
| +}
 | ||
| +
 | ||
| +static __always_inline uint32_t fmix32(register uint32_t h)
 | ||
| +{
 | ||
| +	h ^= h >> 16;
 | ||
| +	h *= 0x85ebca6b;
 | ||
| +	h ^= h >> 13;
 | ||
| +	h *= 0xc2b2ae35;
 | ||
| +	h ^= h >> 16;
 | ||
| +	return h;
 | ||
| +}
 | ||
| +
 | ||
| +static inline uint32_t murmur3(const void *key, const uint32_t len, const uint32_t seed)
 | ||
| +{
 | ||
| +	const uint32_t c1 = 0xcc9e2d51;
 | ||
| +	const uint32_t c2 = 0x1b873593;
 | ||
| +	const uint32_t *blocks;
 | ||
| +	const uint8_t *tail;
 | ||
| +	register uint32_t h1 = seed;
 | ||
| +	uint32_t k1 = 0;
 | ||
| +	uint32_t i;
 | ||
| +
 | ||
| +	blocks = (const uint32_t *)key;
 | ||
| +	for (i = len / 4; i; --i) {
 | ||
| +		h1 ^= rotl32(*blocks++ * c1, 15) * c2;
 | ||
| +		h1 = rotl32(h1, 13) * 5 + 0xe6546b64;
 | ||
| +	}
 | ||
| +	tail = (const uint8_t*)blocks;
 | ||
| +	switch (len & 3) {
 | ||
| +		case 3: k1 ^= tail[2] << 16;
 | ||
| +		case 2: k1 ^= tail[1] << 8;
 | ||
| +		case 1: k1 ^= tail[0];
 | ||
| +			h1 ^= rotl32(k1 * c1, 15) * c2;
 | ||
| +	}
 | ||
| +	return fmix32(h1^ len);
 | ||
| +}
 | ||
| +
 | ||
| diff --git a/raw_promisc_debian_squeeze6.patch b/raw_promisc_debian_squeeze6.patch
 | ||
| new file mode 100644
 | ||
| index 0000000..69d0d35
 | ||
| --- /dev/null
 | ||
| +++ b/raw_promisc_debian_squeeze6.patch
 | ||
| @@ -0,0 +1,37 @@
 | ||
| +
 | ||
| + Short manual and patch for Debian Squeeze
 | ||
| + suggested by Pavel Odintsov:
 | ||
| +
 | ||
| +On Thu, Dec 27, 2012 at 07:46:30PM +0400, Pavel Odintsov wrote:
 | ||
| +>
 | ||
| +> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> Debian Squeeze <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> promisc.
 | ||
| +> 
 | ||
| +> cd /usr/src
 | ||
| +> apt-get install -y dpkg-dev
 | ||
| +> apt-get build-dep  linux-image-2.6.32-5-amd64
 | ||
| +> cd linux-2.6-2.6.32/
 | ||
| +> apt-get source  linux-image-2.6.32-5-amd64
 | ||
| +> 
 | ||
| +> wget .... /root/raw_promisc_debian_squeeze6.patch
 | ||
| +> patch -p1 < raw_promisc_debian_squeeze6.patch
 | ||
| +> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>:
 | ||
| +> debian/rules source
 | ||
| +> 
 | ||
| +> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>:
 | ||
| +> debian/rules binary
 | ||
| +> 
 | ||
| +
 | ||
| +diff -rupN linux-2.6-2.6.32/net/ipv4/ip_input.c linux-2.6-2.6.32_promisc_raw//net/ipv4/ip_input.c
 | ||
| +--- linux-2.6-2.6.32/net/ipv4/ip_input.c	2009-12-03 04:51:21.000000000 +0100
 | ||
| ++++ linux-2.6-2.6.32_promisc_raw//net/ipv4/ip_input.c	2012-06-25 19:13:49.000000000 +0200
 | ||
| +@@ -383,8 +383,8 @@ int ip_rcv(struct sk_buff *skb, struct n
 | ||
| + 	/* When the interface is in promisc. mode, drop all the crap
 | ||
| + 	 * that it receives, do not try to analyse it.
 | ||
| + 	 */
 | ||
| +-	if (skb->pkt_type == PACKET_OTHERHOST)
 | ||
| +-		goto drop;
 | ||
| ++	//if (skb->pkt_type == PACKET_OTHERHOST)
 | ||
| ++	//	goto drop;
 | ||
| + 
 | ||
| + 
 | ||
| + 	IP_UPD_PO_STATS_BH(dev_net(dev), IPSTATS_MIB_IN, skb->len);
 |