diff --git a/extensions/Kbuild b/extensions/Kbuild index 4a5f57c..6274627 100644 --- a/extensions/Kbuild +++ b/extensions/Kbuild @@ -10,6 +10,7 @@ obj-${build_CHAOS} += xt_CHAOS.o obj-${build_CHECKSUM} += xt_CHECKSUM.o obj-${build_DELUDE} += xt_DELUDE.o obj-${build_DHCPMAC} += xt_DHCPMAC.o +obj-${build_DNETMAP} += xt_DNETMAP.o obj-${build_ECHO} += xt_ECHO.o obj-${build_IPMARK} += xt_IPMARK.o obj-${build_LOGMARK} += xt_LOGMARK.o diff --git a/extensions/Mbuild b/extensions/Mbuild index 5a1eeba..c0e7ca3 100644 --- a/extensions/Mbuild +++ b/extensions/Mbuild @@ -5,6 +5,7 @@ obj-${build_CHAOS} += libxt_CHAOS.so obj-${build_CHECKSUM} += libxt_CHECKSUM.so obj-${build_DELUDE} += libxt_DELUDE.so obj-${build_DHCPMAC} += libxt_DHCPMAC.so libxt_dhcpmac.so +obj-${build_DNETMAP} += libxt_DNETMAP.so obj-${build_ECHO} += libxt_ECHO.so obj-${build_IPMARK} += libxt_IPMARK.so obj-${build_LOGMARK} += libxt_LOGMARK.so diff --git a/extensions/libxt_DNETMAP.c b/extensions/libxt_DNETMAP.c new file mode 100644 index 0000000..ee60aef --- /dev/null +++ b/extensions/libxt_DNETMAP.c @@ -0,0 +1,245 @@ +/* Shared library add-on to iptables to add DNETMAP support. + * (C) 2010 Marek Kierdelewicz + * + * uses some code from libipt_NETMAP by: + * Svenning Soerensen + */ + +#include +#include +#include +#include +#include +#include +#include +#include "xt_DNETMAP.h" + +#define MODULENAME "DNETMAP" + +static const struct option DNETMAP_opts[] = { + {"prefix", 1, NULL, 'p'}, + {"reuse", 0, NULL, 'r'}, + {"ttl", 1, NULL, 't'}, + {.name = NULL} +}; + +static void DNETMAP_help(void) +{ + printf(MODULENAME " target options:\n" + " --%s address[/mask]\n" + " Network subnet to map to. If not specified, all existing prefixes are used.\n" + " --%s\n" + " Reuse entry for given prenat-ip from any prefix despite bindings ttl < 0.\n" + " --%s seconds\n" + " Regenerate bindings ttl value to seconds. If negative value is specified,\n" + " bindings ttl is kept unchanged. If not specified then default ttl value (600s)\n" + " is used.\n\n", + DNETMAP_opts[0].name, DNETMAP_opts[1].name, + DNETMAP_opts[2].name); +} + +static u_int32_t bits2netmask(int bits) +{ + u_int32_t netmask, bm; + + if (bits >= 32 || bits < 0) + return ~0; + for (netmask = 0, bm = 0x80000000; bits; bits--, bm >>= 1) + netmask |= bm; + return htonl(netmask); +} + +static int netmask2bits(u_int32_t netmask) +{ + u_int32_t bm; + int bits; + + netmask = ntohl(netmask); + for (bits = 0, bm = 0x80000000; netmask & bm; netmask <<= 1) + bits++; + if (netmask) + return -1; /* holes in netmask */ + return bits; +} + +static void DNETMAP_init(struct xt_entry_target *t) +{ + struct xt_DNETMAP_tginfo *tginfo = (struct xt_DNETMAP_tginfo *)&t->data; + struct nf_nat_multi_range *mr = &tginfo->prefix; + + /* Actually, it's 0, but it's ignored at the moment. */ + mr->rangesize = 1; + tginfo->ttl = 0; + tginfo->flags = 0; +} + +/* Parses network address */ +static void parse_prefix(char *arg, struct nf_nat_range *range) +{ + char *slash; + const struct in_addr *ip; + u_int32_t netmask; + unsigned int bits; + + range->flags |= IP_NAT_RANGE_MAP_IPS; + slash = strchr(arg, '/'); + if (slash) + *slash = '\0'; + + ip = xtables_numeric_to_ipaddr(arg); + if (!ip) + xtables_error(PARAMETER_PROBLEM, "Bad IP address \"%s\"\n", + arg); + range->min_ip = ip->s_addr; + if (slash) { + if (strchr(slash + 1, '.')) { + ip = xtables_numeric_to_ipmask(slash + 1); + if (!ip) + xtables_error(PARAMETER_PROBLEM, + "Bad netmask \"%s\"\n", + slash + 1); + netmask = ip->s_addr; + } else { + if (!xtables_strtoui(slash + 1, NULL, &bits, 0, 32)) + xtables_error(PARAMETER_PROBLEM, + "Bad netmask \"%s\"\n", + slash + 1); + netmask = bits2netmask(bits); + } + /* Don't allow /0 (/1 is probably insane, too) */ + if (netmask == 0) + xtables_error(PARAMETER_PROBLEM, "Netmask needed\n"); + /* Mask should be <= then /16 */ + if (bits < 16) + xtables_error(PARAMETER_PROBLEM, + "Max netmask size is /16\n"); + } else + netmask = ~0; + + if (range->min_ip & ~netmask) { + if (slash) + *slash = '/'; + xtables_error(PARAMETER_PROBLEM, "Bad network address \"%s\"\n", + arg); + } + range->max_ip = range->min_ip | ~netmask; +} + +static int DNETMAP_parse(int c, char **argv, int invert, unsigned int *flags, + const void *entry, struct xt_entry_target **target) +{ + struct xt_DNETMAP_tginfo *tginfo = + (struct xt_DNETMAP_tginfo *)(*target)->data; + struct nf_nat_multi_range *mr = &tginfo->prefix; + char *end; + + switch (c) { + case 'p': + xtables_param_act(XTF_ONLY_ONCE, MODULENAME, "--prefix", + *flags & XT_DNETMAP_PREFIX); + xtables_param_act(XTF_NO_INVERT, MODULENAME, "--prefix", + invert); + + /* TO-DO use xtables_ipparse_any instead? */ + parse_prefix(optarg, &mr->range[0]); + *flags |= XT_DNETMAP_PREFIX; + tginfo->flags |= XT_DNETMAP_PREFIX; + return 1; + case 'r': + xtables_param_act(XTF_ONLY_ONCE, MODULENAME, "--reuse", + *flags & XT_DNETMAP_REUSE); + xtables_param_act(XTF_NO_INVERT, MODULENAME, "--reuse", invert); + *flags |= XT_DNETMAP_REUSE; + tginfo->flags |= XT_DNETMAP_REUSE; + return 1; + case 't': + xtables_param_act(XTF_ONLY_ONCE, MODULENAME, "--ttl", + *flags & XT_DNETMAP_TTL); + xtables_param_act(XTF_NO_INVERT, MODULENAME, "--ttl", invert); + *flags |= XT_DNETMAP_TTL; + tginfo->flags |= XT_DNETMAP_TTL; + tginfo->ttl = strtol(optarg, &end, 10); + if (*end != '\0') + return 0; + return 1; + default: + return 0; + } +} + +static void DNETMAP_print_addr(const void *ip, + const struct xt_entry_target *target, + int numeric) +{ + struct xt_DNETMAP_tginfo *tginfo = + (struct xt_DNETMAP_tginfo *)&target->data; + const struct nf_nat_multi_range *mr = &tginfo->prefix; + const struct nf_nat_range *r = &mr->range[0]; + struct in_addr a; + int bits; + + a.s_addr = r->min_ip; + printf("%s", xtables_ipaddr_to_numeric(&a)); + a.s_addr = ~(r->min_ip ^ r->max_ip); + bits = netmask2bits(a.s_addr); + if (bits < 0) + printf("/%s", xtables_ipaddr_to_numeric(&a)); + else + printf("/%d", bits); +} + +static void DNETMAP_print(const void *ip, const struct xt_entry_target *target, + int numeric) +{ + struct xt_DNETMAP_tginfo *tginfo = + (struct xt_DNETMAP_tginfo *)&target->data; + const __u8 *flags = &tginfo->flags; + + printf("prefix "); + if (*flags & XT_DNETMAP_PREFIX) + DNETMAP_print_addr(ip, target, numeric); + else + printf("any"); + + printf(" reuse %i", (*flags & XT_DNETMAP_REUSE) > 0); + if (*flags & XT_DNETMAP_TTL) + printf(" ttl %i", tginfo->ttl); + else + printf(" ttl default"); +} + +static void DNETMAP_save(const void *ip, const struct xt_entry_target *target) +{ + struct xt_DNETMAP_tginfo *tginfo = + (struct xt_DNETMAP_tginfo *)&target->data; + const __u8 *flags = &tginfo->flags; + + if (*flags & XT_DNETMAP_PREFIX) { + printf("--%s", DNETMAP_opts[0].name); + DNETMAP_print_addr(ip, target, 0); + } + printf(" --reuse %i", *flags & XT_DNETMAP_REUSE); + + /* ommited because default value can change as kernel mod param */ + if (*flags & XT_DNETMAP_TTL) + printf(" --ttl %i", tginfo->ttl); +} + +static struct xtables_target dnetmap_tg_reg = { + .name = MODULENAME, + .version = XTABLES_VERSION, + .family = NFPROTO_IPV4, + .size = XT_ALIGN(sizeof(struct xt_DNETMAP_tginfo)), + .userspacesize = XT_ALIGN(sizeof(struct xt_DNETMAP_tginfo)), + .help = DNETMAP_help, + .init = DNETMAP_init, + .parse = DNETMAP_parse, + .print = DNETMAP_print, + .save = DNETMAP_save, + .extra_opts = DNETMAP_opts, +}; + +static void _init(void) +{ + xtables_register_target(&dnetmap_tg_reg); +} diff --git a/extensions/libxt_DNETMAP.man b/extensions/libxt_DNETMAP.man new file mode 100644 index 0000000..6a0d44b --- /dev/null +++ b/extensions/libxt_DNETMAP.man @@ -0,0 +1,91 @@ +The \fBDNETMAP\fR target allows dynamic two-way 1:1 mapping of IPv4 subnets. +Single rule can map private subnet to shorter public subnet creating and +maintaining unambigeous private-public ip bindings. Second rule can be used to +map new flows to private subnet according to maintained bindings. Target allows +efficient public IPv4 space usage and unambigeous NAT at the same time. + +Target can be used only in \fBnat\fR table in \fBPOSTROUTING\fR or \fBOUTPUT\fR +chains for SNAT and in \fBPREROUTING\fR for DNAT. Only flows directed to bound +IPs will be DNATed. Packet continues chain traversal if there is no free +postnat-ip to be assigned to prenat-ip. Default binding \fBttl\fR is \fI10 +minutes\fR and can be changed using \fBdefault_ttl\fR module option. Default ip +hash size is 256 and can be changed using \fBhash_size\fR module option. + +.TP +\fB\-\-prefix\fR \fIaddr\fR\fB/\fR\fImask\fR +Network subnet to map to. If not specified, all existing prefixes are used. +.TP +\fB\-\-reuse\fR +Reuse entry for given prenat-ip from any prefix despite bindings ttl < 0. +.TP +\fB\-\-ttl\fR \fIseconds\fR +Regenerate bindings ttl value to \fIseconds\fR. If negative value is specified, +bindings ttl is kept unchanged. If not specified then default ttl value (600s) +is used. +.PP +\fB* /proc interface\fR + +Module creates following entries for each new specified subnet: +.TP +\fB/proc/net/xt_DNETMAP/\fR\fIsubnet\fR\fB_\fR\fImask\fR +Contains binding table for subnet/mask. Each line contains \fBprenat-ip\fR, +\fBpostnat-ip\fR,\fBttl\fR (seconds till entry times out), \fBlasthit\fR (last +entry hit in seconds relative to system boot time). +.TP +\fB/proc/net/xt_DNETMAP/\fR\fIsubnet\fR\fB_\fR\fImask\fR\fB_stat\fR +Contains statistics for given subnet/mask. Line contains contains three +numerical values separated by spaces. First one is number of currently used +addresses (bindings with negative ttl excluded), second one is number of all +usable addresses in subnet and third one is mean \fBttl\fR value for all active +entries. +.PP +Entries are removed if the last iptables rule for a specific subnet is deleted. + +\fB* Logging\fR + +Module logs binding add/timeout events to klog. This behaviour can be disabled +using \fBdisable_log\fR module parameter. + +\fB* Examples\fR + +\fB1.\fR Map subnet 192.168.0.0/24 to subnets 20.0.0.0/26. SNAT only: + +iptables -t nat -A POSTROUTING -s 192.168.0.0/24 -j DNETMAP --prefix 20.0.0.0/26 + +Active hosts from 192.168.0.0/24 subnet are mapped to 20.0.0.0/26. If packet +from not yet bound prenat-ip hits the rule and there are no free or timed-out +(ttl<0) entries in prefix 20.0.0.0/28, then notice is logged to klog and chain +traversal continues. If packet from already bound prenat-ip hits the rule, +bindings ttl value is regenerated to default_ttl and SNAT is performed. + +\fB2.\fR Use of \fB\-\-reuse\fR and \fB\-\-ttl\fR switches, multiple rule +interaction: + +iptables -t nat -A POSTROUTING -s 192.168.0.0/24 -j DNETMAP --prefix +20.0.0.0/26 --reuse --ttl 200 + +iptables -t nat -A POSTROUTING -s 192.168.0.0/24 -j DNETMAP --prefix 30.0.0.0/26 + +Active hosts from 192.168.0.0/24 subnet are mapped to 20.0.0.0/26 with ttl = +200 seconds. If there are no free addresses in first prefix the next one +(30.0.0.0/26) is used with default ttl. It's important to note that the first +rule SNATs all flows whose source IP is already actively (ttl>0) bound to ANY +prefix. Parameter \fB\-\-reuse\fR makes this functionality work even for +inactive (ttl<0) entries. + +If both subnets are exhaused, then chain traversal continues. + +\fB3.\fR Map 192.168.0.0/24 to subnets 20.0.0.0/26 bidirectional way: + +iptables -t nat -A POSTROUTING -s 192.168.0.0/24 -j DNETMAP --prefix 20.0.0.0/26 + +iptables -t nat -A PREROUTING -j DNETMAP + +If host 192.168.0.10 generates some traffic, it gets bound to first free IP in +subnet - 20.0.0.0. Now any traffic directed to 20.0.0.0 gets DNATed to +192.168.0.10 as long as there's an active (ttl>0) binding. There's no need to +specify \fB\-\-prefix\fR parameter in PREROUTING rule, because this way it DNATs +traffic to all active prefixes. You could specify prefix it you'd like to make +DNAT work for specific prefix only. + +. diff --git a/extensions/xt_DNETMAP.c b/extensions/xt_DNETMAP.c new file mode 100644 index 0000000..2d7eeb6 --- /dev/null +++ b/extensions/xt_DNETMAP.c @@ -0,0 +1,704 @@ +/* DNETMAP - dynamic two-way 1:1 NAT mapping of IPv4 network addresses. + * The mapping can be applied to source (POSTROUTING|OUTPUT) + * or destination (PREROUTING), + */ + +/* (C) 2010 Marek Kierdelewicz + * + * module is dedicated to my wife Eliza and my daughters Jula and Ola :* :* :* + * + * module uses some code and ideas from following modules: + * - "NETMAP" module by Svenning Soerensen + * - "recent" module by Stephen Frost + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "xt_DNETMAP.h" + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Marek Kierdelewicz "); +MODULE_DESCRIPTION( + "Xtables: dynamic two-way 1:1 NAT mapping of IPv4 addresses"); + +static unsigned int default_ttl = 600; +static unsigned int proc_perms = 0644; +static unsigned int proc_uid; +static unsigned int proc_gid; +static unsigned int default_hash_size = 256; +static unsigned int hash_size = 256; +static unsigned int disable_log; +static unsigned int whole_prefix = 1; +module_param(default_ttl, uint, 0400); +MODULE_PARM_DESC(default_ttl, + " default ttl value to be used if rule doesn't specify any (default: 600)"); +module_param(hash_size, uint, 0400); +MODULE_PARM_DESC(hash_size, + " hash size for ip lists, needs to be power of 2 (default: 256)"); +module_param(disable_log, uint, 0400); +MODULE_PARM_DESC(disable_log, + " disables logging of bind/timeout events (default: 0)"); +module_param(whole_prefix, uint, 0400); +MODULE_PARM_DESC(whole_prefix, + " use network and broadcast addresses of specified prefix for bindings (default: 1)"); + +static unsigned int jtimeout; + +struct dnetmap_entry { + struct list_head list; + /* priv2entry */ + struct list_head glist; + /* pub2entry */ + struct list_head grlist; + struct list_head lru_list; + __be32 prenat_addr; + __be32 postnat_addr; + unsigned long stamp; + struct dnetmap_prefix *prefix; +}; + +struct dnetmap_prefix { + struct nf_nat_multi_range_compat prefix; + char prefix_str[16]; + struct list_head list; + unsigned int refcnt; + /* lru entry list */ + struct list_head lru_list; + /* hash based on prenat-ips */ + struct list_head iphash[0]; +}; + +struct dnetmap_net { + struct list_head prefixes; +#ifdef CONFIG_PROC_FS + struct proc_dir_entry *xt_dnetmap; +#endif + /* global hash */ + struct list_head *dnetmap_iphash; +}; + +static int dnetmap_net_id; +static inline struct dnetmap_net *dnetmap_pernet(struct net *net) +{ + return net_generic(net, dnetmap_net_id); +} + +static DEFINE_SPINLOCK(dnetmap_lock); +static DEFINE_MUTEX(dnetmap_mutex); + +#ifdef CONFIG_PROC_FS +static const struct file_operations dnetmap_tg_fops; +#endif + +static int dnetmap_stat_proc_read(char __user *buffer, char **start, + off_t offset, int length, int *eof, + void *data); + +static inline unsigned int dnetmap_entry_hash(const __be32 addr) +{ + return ntohl(addr) & (hash_size - 1); +} + +static struct dnetmap_entry *dnetmap_entry_lookup(struct dnetmap_net + *dnetmap_net, + const __be32 addr) +{ + struct dnetmap_entry *e; + unsigned int h; + + h = dnetmap_entry_hash(addr); + + list_for_each_entry(e, &dnetmap_net->dnetmap_iphash[h], glist) + if (memcmp(&e->prenat_addr, &addr, sizeof(e->prenat_addr)) == 0) + return e; + return NULL; +} + +static struct dnetmap_entry *dnetmap_entry_rlookup(struct dnetmap_net + *dnetmap_net, + const __be32 addr) +{ + struct dnetmap_entry *e; + unsigned int h; + + h = dnetmap_entry_hash(addr); + + list_for_each_entry(e, &dnetmap_net->dnetmap_iphash[hash_size + h], + grlist) + if (memcmp(&e->postnat_addr, &addr, sizeof(e->postnat_addr)) == 0) + return e; + return NULL; +} + +static struct dnetmap_prefix *dnetmap_prefix_lookup(struct dnetmap_net + *dnetmap_net, + const struct + nf_nat_multi_range_compat + *mr) +{ + struct dnetmap_prefix *p; + + list_for_each_entry(p, &dnetmap_net->prefixes, list) + if (!memcmp(&p->prefix, mr, sizeof(*mr))) + return p; + return NULL; +} + +static void dnetmap_prefix_flush(struct dnetmap_net *dnetmap_net, + struct dnetmap_prefix *p) +{ + struct dnetmap_entry *e, *next; + unsigned int i; + + for (i = 0; i < hash_size; i++) { + list_for_each_entry_safe(e, next, + &dnetmap_net->dnetmap_iphash[i], glist) + if (e->prefix == p) + list_del(&e->glist); + + list_for_each_entry_safe(e, next, + &dnetmap_net-> + dnetmap_iphash[hash_size + i], grlist) + if (e->prefix == p) + list_del(&e->grlist); + + list_for_each_entry_safe(e, next, &p->iphash[i], list) { + list_del(&e->list); + list_del(&e->lru_list); + kfree(e); + } + } +} + +static int dnetmap_tg_check(const struct xt_tgchk_param *par) +{ + struct dnetmap_net *dnetmap_net = dnetmap_pernet(par->net); + const struct xt_DNETMAP_tginfo *tginfo = par->targinfo; + const struct nf_nat_multi_range_compat *mr = &tginfo->prefix; + struct dnetmap_prefix *p; + struct dnetmap_entry *e; +#ifdef CONFIG_PROC_FS + struct proc_dir_entry *pde_data, *pde_stat; + char proc_str_data[20]; + char proc_str_stat[25]; +#endif + int ret = -EINVAL; + int i; + __be32 a; + __u32 ip_min, ip_max, ip; + + /* prefix not specified - no need to do anything */ + if (!(tginfo->flags & XT_DNETMAP_PREFIX)) { + ret = 0; + return ret; + } + + if (!(mr->range[0].flags & IP_NAT_RANGE_MAP_IPS)) { + pr_debug("DNETMAP:check: bad MAP_IPS.\n"); + return -EINVAL; + } + if (mr->rangesize != 1) { + pr_debug("DNETMAP:check: bad rangesize %u.\n", mr->rangesize); + return -EINVAL; + } + + mutex_lock(&dnetmap_mutex); + p = dnetmap_prefix_lookup(dnetmap_net, mr); + + if (p != NULL) { + p->refcnt++; + ret = 0; + goto out; + } + + p = kzalloc(sizeof(*p) + sizeof(struct list_head) * hash_size * 2, + GFP_KERNEL); + if (p == NULL) { + ret = -ENOMEM; + goto out; + } + p->refcnt = 1; + memcpy(&p->prefix, mr, sizeof(*mr)); + + INIT_LIST_HEAD(&p->lru_list); + for (i = 0; i < hash_size * 2; i++) + INIT_LIST_HEAD(&p->iphash[i]); + + ip_min = ntohl(mr->range[0].min_ip) + (whole_prefix == 0); + ip_max = ntohl(mr->range[0].max_ip) - (whole_prefix == 0); + + sprintf(p->prefix_str, "%pI4/%i", &mr->range[0].min_ip, + 33 - ffs(~(ip_min ^ ip_max))); +#ifdef CONFIG_PROC_FS + sprintf(proc_str_data, "%pI4_%i", &mr->range[0].min_ip, + 33 - ffs(~(ip_min ^ ip_max))); + sprintf(proc_str_stat, "%pI4_%i_stat", &mr->range[0].min_ip, + 33 - ffs(~(ip_min ^ ip_max))); +#endif + printk(KERN_INFO KBUILD_MODNAME ": new prefix %s\n", p->prefix_str); + + for (ip = ip_min; ip <= ip_max; ip++) { + a = htonl(ip); + e = kmalloc(sizeof(*e), GFP_ATOMIC); + if (e == NULL) + return 0; + e->postnat_addr = a; + e->prenat_addr = 0; + e->stamp = jiffies; + e->prefix = p; + list_add_tail(&e->lru_list, &p->lru_list); + } + +#ifdef CONFIG_PROC_FS + /* data */ + pde_data = + proc_create_data(proc_str_data, proc_perms, dnetmap_net->xt_dnetmap, + &dnetmap_tg_fops, p); + if (pde_data == NULL) { + kfree(p); + ret = -ENOMEM; + goto out; + } + pde_data->uid = proc_uid; + pde_data->gid = proc_gid; + + /* statistics */ + pde_stat = + create_proc_entry(proc_str_stat, proc_perms, + dnetmap_net->xt_dnetmap); + if (pde_stat == NULL) { + kfree(p); + ret = -ENOMEM; + goto out; + } + pde_stat->data = p; + pde_stat->read_proc = dnetmap_stat_proc_read; + pde_stat->uid = proc_uid; + pde_stat->gid = proc_gid; +#endif + + spin_lock_bh(&dnetmap_lock); + list_add_tail(&p->list, &dnetmap_net->prefixes); + spin_unlock_bh(&dnetmap_lock); + ret = 0; + +out: + mutex_unlock(&dnetmap_mutex); + return ret; +} + +static unsigned int +dnetmap_tg(struct sk_buff *skb, const struct xt_action_param *par) +{ + struct net *net = dev_net(par->in ? par->in : par->out); + struct dnetmap_net *dnetmap_net = dnetmap_pernet(net); + struct nf_conn *ct; + enum ip_conntrack_info ctinfo; + __be32 prenat_ip, postnat_ip, prenat_ip_prev; + const struct xt_DNETMAP_tginfo *tginfo = par->targinfo; + const struct nf_nat_multi_range_compat *mr = &tginfo->prefix; + struct nf_nat_range newrange; + struct dnetmap_entry *e; + struct dnetmap_prefix *p; + __s32 jttl; + + NF_CT_ASSERT(par->hooknum == NF_INET_POST_ROUTING || + par->hooknum == NF_INET_LOCAL_OUT || + par->hooknum == NF_INET_PRE_ROUTING); + ct = nf_ct_get(skb, &ctinfo); + + jttl = tginfo->flags & XT_DNETMAP_TTL ? tginfo->ttl * HZ : jtimeout; + + /* in prerouting we try to map postnat-ip to prenat-ip */ + if (par->hooknum == NF_INET_PRE_ROUTING) { + postnat_ip = ip_hdr(skb)->daddr; + + spin_lock_bh(&dnetmap_lock); + + e = dnetmap_entry_rlookup(dnetmap_net, postnat_ip); + + if (e == NULL) + goto no_rev_map; /* no binding found */ + + /* if prefix is specified, we check if + it matches lookedup entry */ + if (tginfo->flags & XT_DNETMAP_PREFIX) { + if (memcmp(mr, &e->prefix, sizeof(*mr))) + goto no_rev_map; + } + /* don't reset ttl if flag is set */ + if (jttl >= 0) { + p = e->prefix; + e->stamp = jiffies + jttl; + list_move_tail(&e->lru_list, &p->lru_list); + } + + spin_unlock_bh(&dnetmap_lock); + + newrange = ((struct nf_nat_range) { + mr->range[0].flags | IP_NAT_RANGE_MAP_IPS, + e->prenat_addr, e->prenat_addr, + mr->range[0].min, mr->range[0].max}); + + /* Hand modified range to generic setup. */ + return nf_nat_setup_info(ct, &newrange, + HOOK2MANIP(par->hooknum)); + + } + + prenat_ip = ip_hdr(skb)->saddr; + spin_lock_bh(&dnetmap_lock); + + p = dnetmap_prefix_lookup(dnetmap_net, mr); + e = dnetmap_entry_lookup(dnetmap_net, prenat_ip); + + if (e == NULL) { /* need for new binding */ + +bind_new_prefix: + e = list_entry(p->lru_list.next, struct dnetmap_entry, + lru_list); + if (e->prenat_addr != 0 && time_before(jiffies, e->stamp)) { + if (!disable_log) + printk(KERN_INFO KBUILD_MODNAME + ": ip %pI4 - no free adresses in prefix %s\n", + &prenat_ip, p->prefix_str); + goto no_free_ip; + } + + postnat_ip = e->postnat_addr; + + if (e->prenat_addr != 0) { + prenat_ip_prev = e->prenat_addr; + if (!disable_log) + printk(KERN_INFO KBUILD_MODNAME + ": timeout binding %pI4 -> %pI4\n", + &prenat_ip_prev, &postnat_ip); + list_del(&e->list); + list_del(&e->glist); + list_del(&e->grlist); + } + + e->prenat_addr = prenat_ip; + e->stamp = jiffies + jttl; + list_move_tail(&e->lru_list, &p->lru_list); + list_add_tail(&e->list, + &p->iphash[dnetmap_entry_hash(prenat_ip)]); + list_add_tail(&e->glist, + &dnetmap_net-> + dnetmap_iphash[dnetmap_entry_hash(prenat_ip)]); + list_add_tail(&e->grlist, + &dnetmap_net->dnetmap_iphash[hash_size + + dnetmap_entry_hash + (postnat_ip)]); + if (!disable_log) + printk(KERN_INFO KBUILD_MODNAME + ": add binding %pI4 -> %pI4\n", &prenat_ip, + &postnat_ip); + + } else { + + if (!(tginfo->flags & XT_DNETMAP_REUSE)) { + if (time_before(e->stamp, jiffies) && p != e->prefix) { + if (!disable_log) + printk(KERN_INFO KBUILD_MODNAME + ": timeout binding %pI4 -> %pI4\n", + &e->prenat_addr, + &e->postnat_addr); + list_del(&e->list); + list_del(&e->glist); + list_del(&e->grlist); + e->prenat_addr = 0; + goto bind_new_prefix; + } + } + /* don't reset ttl if flag is set */ + if (jttl >= 0) { + e->stamp = jiffies + jttl; + p = e->prefix; + list_move_tail(&e->lru_list, &p->lru_list); + } + postnat_ip = e->postnat_addr; + } + + spin_unlock_bh(&dnetmap_lock); + + newrange = ((struct nf_nat_range) { + mr->range[0].flags | IP_NAT_RANGE_MAP_IPS, + postnat_ip, postnat_ip, + mr->range[0].min, mr->range[0].max}); + + /* Hand modified range to generic setup. */ + return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(par->hooknum)); + +no_rev_map: +no_free_ip: + spin_unlock_bh(&dnetmap_lock); + return XT_CONTINUE; + +} + +static void dnetmap_tg_destroy(const struct xt_tgdtor_param *par) +{ + struct dnetmap_net *dnetmap_net = dnetmap_pernet(par->net); + const struct xt_DNETMAP_tginfo *tginfo = par->targinfo; + const struct nf_nat_multi_range_compat *mr = &tginfo->prefix; + struct dnetmap_prefix *p; +#ifdef CONFIG_PROC_FS + char str[25]; +#endif + + if (!(tginfo->flags & XT_DNETMAP_PREFIX)) + return; + + mutex_lock(&dnetmap_mutex); + p = dnetmap_prefix_lookup(dnetmap_net, mr); + if (--p->refcnt == 0) { + spin_lock_bh(&dnetmap_lock); + list_del(&p->list); + spin_unlock_bh(&dnetmap_lock); +#ifdef CONFIG_PROC_FS + sprintf(str, "%pI4_%i", &mr->range[0].min_ip, + 33 - ffs(~(ntohl(mr->range[0].min_ip ^ + mr->range[0].max_ip)))); + remove_proc_entry(str, dnetmap_net->xt_dnetmap); + sprintf(str, "%pI4_%i_stat", &mr->range[0].min_ip, + 33 - ffs(~(ntohl(mr->range[0].min_ip ^ + mr->range[0].max_ip)))); + remove_proc_entry(str, dnetmap_net->xt_dnetmap); +#endif + dnetmap_prefix_flush(dnetmap_net, p); + kfree(p); + } + mutex_unlock(&dnetmap_mutex); +} + +#ifdef CONFIG_PROC_FS +struct dnetmap_iter_state { + const struct dnetmap_prefix *p; + unsigned int bucket; +}; + +static void *dnetmap_seq_start(struct seq_file *seq, loff_t * pos) +__acquires(dnetmap_lock) +{ + struct dnetmap_iter_state *st = seq->private; + const struct dnetmap_prefix *prefix = st->p; + struct dnetmap_entry *e; + loff_t p = *pos; + + spin_lock_bh(&dnetmap_lock); + + list_for_each_entry(e, &prefix->lru_list, lru_list) + if (p-- == 0) + return e; + return NULL; +} + +static void *dnetmap_seq_next(struct seq_file *seq, void *v, loff_t * pos) +{ + struct dnetmap_iter_state *st = seq->private; + const struct dnetmap_prefix *prefix = st->p; + const struct dnetmap_entry *e = v; + const struct list_head *head = e->lru_list.next; + + if (head == &prefix->lru_list) + return NULL; + + (*pos)++; + return list_entry(head, struct dnetmap_entry, lru_list); +} + +static void dnetmap_seq_stop(struct seq_file *s, void *v) +__releases(dnetmap_lock) +{ + spin_unlock_bh(&dnetmap_lock); +} + +static int dnetmap_seq_show(struct seq_file *seq, void *v) +{ + const struct dnetmap_entry *e = v; + + seq_printf(seq, "%pI4 -> %pI4 --- ttl: %i lasthit: %lu\n", + &e->prenat_addr, &e->postnat_addr, + (int)(e->stamp - jiffies) / HZ, (e->stamp - jtimeout) / HZ); + return 0; +} + +static const struct seq_operations dnetmap_seq_ops = { + .start = dnetmap_seq_start, + .next = dnetmap_seq_next, + .stop = dnetmap_seq_stop, + .show = dnetmap_seq_show, +}; + +static int dnetmap_seq_open(struct inode *inode, struct file *file) +{ + struct proc_dir_entry *pde = PDE(inode); + struct dnetmap_iter_state *st; + + st = __seq_open_private(file, &dnetmap_seq_ops, sizeof(*st)); + if (st == NULL) + return -ENOMEM; + + st->p = pde->data; + return 0; +} + +static const struct file_operations dnetmap_tg_fops = { + .open = dnetmap_seq_open, + .read = seq_read, + .release = seq_release_private, + .owner = THIS_MODULE, +}; + +/* for statistics */ +static int dnetmap_stat_proc_read(char __user *buffer, char **start, + off_t offset, int length, int *eof, + void *data) +{ + const struct dnetmap_prefix *p = data; + struct dnetmap_entry *e; + unsigned int used, all; + long int ttl, sum_ttl; + + used = 0; + all = 0; + sum_ttl = 0; + + spin_lock_bh(&dnetmap_lock); + + list_for_each_entry(e, &p->lru_list, lru_list) { + + ttl = e->stamp - jiffies; + if (e->prenat_addr != 0 && ttl >= 0) { + used++; + sum_ttl += ttl; + } + all++; + } + + sum_ttl = used > 0 ? sum_ttl / (used * HZ) : 0; + sprintf(buffer, "%u %u %li\n", used, all, sum_ttl); + + if (length >= strlen(buffer)) + *eof = true; + + spin_unlock_bh(&dnetmap_lock); + + return strlen(buffer); +} + +static int __net_init dnetmap_proc_net_init(struct net *net) +{ + struct dnetmap_net *dnetmap_net = dnetmap_pernet(net); + + dnetmap_net->xt_dnetmap = proc_mkdir("xt_DNETMAP", net->proc_net); + if (!dnetmap_net->xt_dnetmap) + return -ENOMEM; + return 0; +} + +static void __net_exit dnetmap_proc_net_exit(struct net *net) +{ + proc_net_remove(net, "xt_DNETMAP"); +} +#else +static inline int dnetmap_proc_net_init(struct net *net) +{ + return 0; +} + +static inline void dnetmap_proc_net_exit(struct net *net) +{ +} + +#endif /* CONFIG_PROC_FS */ + +static int __net_init dnetmap_net_init(struct net *net) +{ + struct dnetmap_net *dnetmap_net = dnetmap_pernet(net); + int i; + + dnetmap_net->dnetmap_iphash = + kmalloc(sizeof(struct list_head) * hash_size * 2, GFP_ATOMIC); + if (dnetmap_net->dnetmap_iphash == NULL) + return -ENOMEM; + + INIT_LIST_HEAD(&dnetmap_net->prefixes); + for (i = 0; i < hash_size * 2; i++) + INIT_LIST_HEAD(&dnetmap_net->dnetmap_iphash[i]); + return dnetmap_proc_net_init(net); +} + +static void __net_exit dnetmap_net_exit(struct net *net) +{ + struct dnetmap_net *dnetmap_net = dnetmap_pernet(net); + + BUG_ON(!list_empty(&dnetmap_net->prefixes)); + kfree(dnetmap_net->dnetmap_iphash); + dnetmap_proc_net_exit(net); +} + +static struct pernet_operations dnetmap_net_ops = { + .init = dnetmap_net_init, + .exit = dnetmap_net_exit, + .id = &dnetmap_net_id, + .size = sizeof(struct dnetmap_net), +}; + +static struct xt_target dnetmap_tg_reg __read_mostly = { + .name = "DNETMAP", + .family = NFPROTO_IPV4, + .target = dnetmap_tg, + .targetsize = sizeof(struct xt_DNETMAP_tginfo), + .table = "nat", + .hooks = (1 << NF_INET_POST_ROUTING) | (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_PRE_ROUTING), + .checkentry = dnetmap_tg_check, + .destroy = dnetmap_tg_destroy, + .me = THIS_MODULE +}; + +static int __init dnetmap_tg_init(void) +{ + int err; + + /* verify parameters */ + if (ffs(hash_size) != fls(hash_size) || hash_size <= 0) { + pr_info("bad hash_size parameter value - using defaults"); + hash_size = default_hash_size; + } + + jtimeout = default_ttl * HZ; + + err = register_pernet_subsys(&dnetmap_net_ops); + if (err) + return err; + + err = xt_register_target(&dnetmap_tg_reg); + if (err) + unregister_pernet_subsys(&dnetmap_net_ops); + + return err; +} + +static void __exit dnetmap_tg_exit(void) +{ + xt_unregister_target(&dnetmap_tg_reg); + unregister_pernet_subsys(&dnetmap_net_ops); +} + +module_init(dnetmap_tg_init); +module_exit(dnetmap_tg_exit); diff --git a/extensions/xt_DNETMAP.h b/extensions/xt_DNETMAP.h new file mode 100644 index 0000000..2a71cb9 --- /dev/null +++ b/extensions/xt_DNETMAP.h @@ -0,0 +1,16 @@ +#ifndef _LINUX_NETFILTER_XT_DNETMAP_H +#define _LINUX_NETFILTER_XT_DNETMAP_H 1 + +enum { + XT_DNETMAP_TTL = 1 << 0, + XT_DNETMAP_REUSE = 1 << 1, + XT_DNETMAP_PREFIX = 1 << 2, +}; + +struct xt_DNETMAP_tginfo { + struct nf_nat_multi_range_compat prefix; + __u8 flags; + __s16 ttl; +}; + +#endif diff --git a/include/net/netfilter/nf_conntrack_tuple.h b/include/net/netfilter/nf_conntrack_tuple.h new file mode 100644 index 0000000..81776e7 --- /dev/null +++ b/include/net/netfilter/nf_conntrack_tuple.h @@ -0,0 +1,215 @@ +/* + * Definitions and Declarations for tuple. + * + * 16 Dec 2003: Yasuyuki Kozakai @USAGI + * - generalize L3 protocol dependent part. + * + * Derived from include/linux/netfiter_ipv4/ip_conntrack_tuple.h + */ + +#ifndef _NF_CONNTRACK_TUPLE_H +#define _NF_CONNTRACK_TUPLE_H + +#include +#include +/*#include */ + +/* A `tuple' is a structure containing the information to uniquely + identify a connection. ie. if two packets have the same tuple, they + are in the same connection; if not, they are not. + + We divide the structure along "manipulatable" and + "non-manipulatable" lines, for the benefit of the NAT code. +*/ + +#define NF_CT_TUPLE_L3SIZE ARRAY_SIZE(((union nf_inet_addr *)NULL)->all) + +/* The protocol-specific manipulable parts of the tuple: always in + network order! */ +union nf_conntrack_man_proto { + /* Add other protocols here. */ + __be16 all; + + struct { + __be16 port; + } tcp; + struct { + __be16 port; + } udp; + struct { + __be16 id; + } icmp; + struct { + __be16 port; + } dccp; + struct { + __be16 port; + } sctp; + struct { + __be16 key; /* GRE key is 32bit, PPtP only uses 16bit */ + } gre; +}; + +/* The manipulable part of the tuple. */ +struct nf_conntrack_man { + union nf_inet_addr u3; + union nf_conntrack_man_proto u; + /* Layer 3 protocol */ + u_int16_t l3num; +}; + +/* This contains the information to distinguish a connection. */ +struct nf_conntrack_tuple { + struct nf_conntrack_man src; + + /* These are the parts of the tuple which are fixed. */ + struct { + union nf_inet_addr u3; + union { + /* Add other protocols here. */ + __be16 all; + + struct { + __be16 port; + } tcp; + struct { + __be16 port; + } udp; + struct { + u_int8_t type, code; + } icmp; + struct { + __be16 port; + } dccp; + struct { + __be16 port; + } sctp; + struct { + __be16 key; + } gre; + } u; + + /* The protocol. */ + u_int8_t protonum; + + /* The direction (for tuplehash) */ + u_int8_t dir; + } dst; +}; + +struct nf_conntrack_tuple_mask { + struct { + union nf_inet_addr u3; + union nf_conntrack_man_proto u; + } src; +}; + +#ifdef __KERNEL__ + +static inline void nf_ct_dump_tuple_ip(const struct nf_conntrack_tuple *t) +{ +#ifdef DEBUG + printk("tuple %p: %u %pI4:%hu -> %pI4:%hu\n", + t, t->dst.protonum, + &t->src.u3.ip, ntohs(t->src.u.all), + &t->dst.u3.ip, ntohs(t->dst.u.all)); +#endif +} + +static inline void nf_ct_dump_tuple_ipv6(const struct nf_conntrack_tuple *t) +{ +#ifdef DEBUG + printk("tuple %p: %u %pI6 %hu -> %pI6 %hu\n", + t, t->dst.protonum, + t->src.u3.all, ntohs(t->src.u.all), + t->dst.u3.all, ntohs(t->dst.u.all)); +#endif +} + +static inline void nf_ct_dump_tuple(const struct nf_conntrack_tuple *t) +{ + switch (t->src.l3num) { + case AF_INET: + nf_ct_dump_tuple_ip(t); + break; + case AF_INET6: + nf_ct_dump_tuple_ipv6(t); + break; + } +} + +/* If we're the first tuple, it's the original dir. */ +#define NF_CT_DIRECTION(h) \ + ((enum ip_conntrack_dir)(h)->tuple.dst.dir) + +/* Connections have two entries in the hash table: one for each way */ +struct nf_conntrack_tuple_hash { + struct hlist_nulls_node hnnode; + struct nf_conntrack_tuple tuple; +}; + +static inline bool __nf_ct_tuple_src_equal(const struct nf_conntrack_tuple *t1, + const struct nf_conntrack_tuple *t2) +{ + return (nf_inet_addr_cmp(&t1->src.u3, &t2->src.u3) && + t1->src.u.all == t2->src.u.all && + t1->src.l3num == t2->src.l3num); +} + +static inline bool __nf_ct_tuple_dst_equal(const struct nf_conntrack_tuple *t1, + const struct nf_conntrack_tuple *t2) +{ + return (nf_inet_addr_cmp(&t1->dst.u3, &t2->dst.u3) && + t1->dst.u.all == t2->dst.u.all && + t1->dst.protonum == t2->dst.protonum); +} + +static inline bool nf_ct_tuple_equal(const struct nf_conntrack_tuple *t1, + const struct nf_conntrack_tuple *t2) +{ + return __nf_ct_tuple_src_equal(t1, t2) && + __nf_ct_tuple_dst_equal(t1, t2); +} + +static inline bool +nf_ct_tuple_mask_equal(const struct nf_conntrack_tuple_mask *m1, + const struct nf_conntrack_tuple_mask *m2) +{ + return (nf_inet_addr_cmp(&m1->src.u3, &m2->src.u3) && + m1->src.u.all == m2->src.u.all); +} + +static inline bool +nf_ct_tuple_src_mask_cmp(const struct nf_conntrack_tuple *t1, + const struct nf_conntrack_tuple *t2, + const struct nf_conntrack_tuple_mask *mask) +{ + int count; + + for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++) { + if ((t1->src.u3.all[count] ^ t2->src.u3.all[count]) & + mask->src.u3.all[count]) + return false; + } + + if ((t1->src.u.all ^ t2->src.u.all) & mask->src.u.all) + return false; + + if (t1->src.l3num != t2->src.l3num || + t1->dst.protonum != t2->dst.protonum) + return false; + + return true; +} + +static inline bool +nf_ct_tuple_mask_cmp(const struct nf_conntrack_tuple *t, + const struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_tuple_mask *mask) +{ + return nf_ct_tuple_src_mask_cmp(t, tuple, mask) && + __nf_ct_tuple_dst_equal(t, tuple); +} +#endif /* __KERNEL__ */ + +#endif /* _NF_CONNTRACK_TUPLE_H */ diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h new file mode 100644 index 0000000..f5f09f0 --- /dev/null +++ b/include/net/netfilter/nf_nat.h @@ -0,0 +1,93 @@ +#ifndef _NF_NAT_H +#define _NF_NAT_H +#include +#include + +#define NF_NAT_MAPPING_TYPE_MAX_NAMELEN 16 + +enum nf_nat_manip_type { + IP_NAT_MANIP_SRC, + IP_NAT_MANIP_DST +}; + +/* SRC manip occurs POST_ROUTING or LOCAL_IN */ +#define HOOK2MANIP(hooknum) ((hooknum) != NF_INET_POST_ROUTING && \ + (hooknum) != NF_INET_LOCAL_IN) + +#define IP_NAT_RANGE_MAP_IPS 1 +#define IP_NAT_RANGE_PROTO_SPECIFIED 2 +#define IP_NAT_RANGE_PROTO_RANDOM 4 +#define IP_NAT_RANGE_PERSISTENT 8 + +/* NAT sequence number modifications */ +struct nf_nat_seq { + /* position of the last TCP sequence number modification (if any) */ + u_int32_t correction_pos; + + /* sequence number offset before and after last modification */ + int16_t offset_before, offset_after; +}; + +/* Single range specification. */ +struct nf_nat_range { + /* Set to OR of flags above. */ + unsigned int flags; + + /* Inclusive: network order. */ + __be32 min_ip, max_ip; + + /* Inclusive: network order */ + union nf_conntrack_man_proto min, max; +}; + +/* For backwards compat: don't use in modern code. */ +struct nf_nat_multi_range_compat { + unsigned int rangesize; /* Must be 1. */ + + /* hangs off end. */ + struct nf_nat_range range[1]; +}; + +#ifdef __KERNEL__ +#include +#include +#include + +/* per conntrack: nat application helper private data */ +union nf_conntrack_nat_help { + /* insert nat helper private data here */ + struct nf_nat_pptp nat_pptp_info; +}; + +struct nf_conn; + +/* The structure embedded in the conntrack structure. */ +struct nf_conn_nat { + struct hlist_node bysource; + struct nf_nat_seq seq[IP_CT_DIR_MAX]; + struct nf_conn *ct; + union nf_conntrack_nat_help help; +#if defined(CONFIG_IP_NF_TARGET_MASQUERADE) || \ + defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE) + int masq_index; +#endif +}; + +/* Set up the info structure to map into this range. */ +extern unsigned int nf_nat_setup_info(struct nf_conn *ct, + const struct nf_nat_range *range, + enum nf_nat_manip_type maniptype); + +/* Is this tuple already taken? (not by us)*/ +extern int nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple, + const struct nf_conn *ignored_conntrack); + +static inline struct nf_conn_nat *nfct_nat(const struct nf_conn *ct) +{ + return nf_ct_ext_find(ct, NF_CT_EXT_NAT); +} + +#else /* !__KERNEL__: iptables wants this to compile. */ +#define nf_nat_multi_range nf_nat_multi_range_compat +#endif /*__KERNEL__*/ +#endif diff --git a/mconfig b/mconfig index e096018..3c67718 100644 --- a/mconfig +++ b/mconfig @@ -27,3 +27,4 @@ build_lscan=m build_pknock=m build_psd=m build_quota2=m +build_DNETMAP=m