Professional Documents
Culture Documents
Clusters
Jon P. Maloy
jon.maloy@ericsson.com
TIPC Motivation
ForCES
of Network
Devices
NOKIA RESEARCH
CENTER / BOSTON
Efficient Clustering
TIPC
Transparent Inter Process Communication
A transport protocol specialized for single node and cluster
environments
Cluster global Unix sockets with structured addressing scheme
More to come
UDP
Unix Sockets
What We Wanted
Functional addressing
NOKIA RESEARCH
/ BOSTON beyond the local node
locationCENTER
transparency
Extend address
Have failure detection times at millisecond level, at least
A way to know when addresses becomes available/unavailable
What We Got
Addressing Location Transparency
Performance
And More
Congestion control at three levels
Simple to configure
And More
Network Redundancy
Neighbour Supervision
Scalability
Functional View
Socket API Adapter
Custom API
Adapters
Address Subscription
Address Resolution
Address Table
Distribution
Reliable Multicast
Connection Supervision
Route/Link Selection
Neighbour Detection
Link Establish/Supervision/Failover
Fragmentation/De-fragmentation
Node
Internal
Packet Bundling
Congestion Control
Sequence/Retransmission
Control
Bearer Adapter API
NOKIA RESEARCH CENTER / BOSTON
Ethernet
DCCP
SCTP
TCP
Shared
Memory
Network Topology*
Zone <1>
Cluster <1.1>
Zone <2>
Cluster <2.1>
Cluster <1.2>
Internet/
Intranet
Node <1.2.3>
Slave Node
<2.1.3333>
Server Process,
Partition B
bind(type = foo,
lower=100,
upper=199)
sendto(type = foo,
instance = 33)
foo
, 33
Server Process,
Partition A
bind(type = foo,
lower=0,
upper=99)
//server.c
#defineFOO4711
#defineINSTANCE33
#defineFOO4711
#defineLOWER_BOUND0
#defineUPPER_BOUND99
intmain(intargc,char*argv[],char*dummy[])
{
structsockaddr_tipcsrv_addr;
intsd=socket(AF_TIPC,SOCK_RDM,0);
intmain(intargc,char*argv[],char*dummy[])
{
intsd=socket(AF_TIPC,SOCK_RDM,0);
structsockaddr_tipcpartition_addr,client_addr;
intalen=sizeof(client_addr);
charinbuf[40],outbuf[40]="Uh?";
srv_addr.addrtype=TIPC_ADDR_NAME;
srv_addr.addr.name.name.type=FOO;
srv_addr.addr.name.name.instance=INSTANCE;
srv_addr.addr.name.domain=0;
printf("**TIPCclientprogramstarted**\n\n");
wait_for_server(&srv_addr.addr.name.name,
10000);
/*Sendconnectionless"hello"message:*/
partition_addr.family=AF_TIPC;
partition_addr.addrtype=TIPC_ADDR_NAMESEQ;
partition_addr.addr.nameseq.type=FOO;
partition_addr.addr.nameseq.lower=LOWER_BOUND;
partition_addr.addr.nameseq.upper=UPPER_BOUND;
partition_addr.scope=TIPC_CLUSTER_SCOPE;
printf("**TIPCserverprogramstarted**\n");
charbuf[40]={"HelloWorld"};
/*Makeserveravailable:*/
if(0>sendto(sd,buf,strlen(buf)+1,0,
(structsockaddr*)&srv_addr,
sizeof(srv_addr))){
perror("Client:Failedtosend");
exit(1);
}
if(0!=bind(sd,(structsockaddr*)&partition_addr,
sizeof(partition_addr))){
printf("Server:Failedtobind\n");
exit(1);
}
/*Receivetheacknowledge*/
if(0>=recv(sd,buf,sizeof(buf),0)){
perror("Unexepectedresponse");
exit(1);
}
if(0>=recvfrom(sd,inbuf,sizeof(inbuf),0,
(structsockaddr*)&client_addr,
&alen)){
perror("Unexepectedrecv:");
}
printf("Server:Messagereceived:%s!\n",inbuf);
if(0>sendto(sd,outbuf,strlen(outbuf)+1,0,
(structsockaddr*)&client_addr,
BOSTON
sizeof(client_addr))){
perror("Server:Failedtosend");
}
printf("\n**TIPCserverprogramfinished**\n");
}
#defineFOO4711
#defineLOWER_BOUND0
#defineUPPER_BOUND99
intmain(intargc,char*argv[],char*dummy[])
{
intsd=socket(AF_TIPC,SOCK_RDM,0);
structsockaddr_tipcpartition_addr,client_addr;
intalen=sizeof(client_addr);
charinbuf[40],outbuf[40]="Uh?";
partition_addr.family=AF_TIPC;
partition_addr.addrtype=TIPC_ADDR_NAMESEQ;
partition_addr.addr.nameseq.type=FOO;
partition_addr.addr.nameseq.lower=LOWER_BOUND;
partition_addr.addr.nameseq.upper=UPPER_BOUND;
partition_addr.scope=TIPC_CLUSTER_SCOPE;
printf("**TIPCserverprogramstarted**\n");
if(0!=bind(sd,(structsockaddr*)&partition_addr,sizeof(partition_addr))){
printf("Server:Failedtobind\n");
exit(1);
}
if(0>=recvfrom(sd,inbuf,sizeof(inbuf),0,(structsockaddr*)&client_addr,&alen)){
perror("Unexepectedrecv:");
exit(1);
NOKIA RESEARCH CENTER / BOSTON
}
printf("Server:Messagereceived:%s!\n",inbuf);
if(0>sendto(sd,outbuf,strlen(outbuf)+1,0,(struct
sockaddr*)&client_addr,sizeof(client_addr))){
perror("Server:Failedtosend");
}
printf("\n**TIPCserverprogramfinished**\n");
}
#defineFOO4711
#defineINSTANCE33
intmain(intargc,char*argv[],char*dummy[])
{
charbuf[40]={"HelloWorld"};
structsockaddr_tipcsrv_addr;
intsd=socket(AF_TIPC,SOCK_RDM,0);
srv_addr.addrtype=TIPC_ADDR_NAME;
srv_addr.addr.name.name.type=FOO;
srv_addr.addr.name.name.instance=INSTANCE;
srv_addr.addr.name.domain=0;
printf("**TIPCclientprogramstarted**\n\n");
wait_for_server(&srv_addr.addr.name.name,10000);
if(0>sendto(sd,buf,strlen(buf)+1,0,(structsockaddr*)&srv_addr,sizeof(srv_addr))){
perror("Client:Failedtosend");
exit(1);
}
if(0>=recv(sd,buf,sizeof(buf),0)){
perror("Unexepectedresponse");
exit(1);
NOKIA RESEARCH CENTER / BOSTON
}
printf("Client:Receivedresponse:%s\n",buf);
printf("**TIPCclientprogramfinished**\n\n");
}
33
foo,33,1
foo
, 33
,13
3
Server Process,
Partition B
bind(type = foo,
lower=100,
upper=199)
Server Process,
Partition A
bind(type = foo,
lower=0,
upper=99)
//server.c
#defineFOO4711
#defineLOWER_BOUND33
#defineUPPER_BOUND133
#defineFOO4711
#defineLOWER_BOUND0
#defineUPPER_BOUND99
intmain(intargc,char*argv[],char*dummy[])
{
structsockaddr_tipcmcast_group;
intsd=socket(AF_TIPC,SOCK_RDM,0);
intmain(intargc,char*argv[],char*dummy[])
{
intsd=socket(AF_TIPC,SOCK_RDM,0);
structsockaddr_tipcpartition_addr,client_addr;
intalen=sizeof(client_addr);
charinbuf[40],outbuf[40]="Uh?";
mcast_group.addrtype=TIPC_ADDR_NAMESEQ;
mcast_group.addr.name.name.type=FOO;
mcast_group.addr.nameseq.lower=LOWER_BOUND;
mcast_group.addr.nameseq.upper=UPPER_BOUND;
printf("**TIPCclientprogramstarted**\n\n");
wait_for_server(&mcast_group.addr.name.name,
10000);
/*Sendconnectionless"hello"message:*/
printf("**TIPCserverprogramstarted**\n");
/*Makeserveravailable:*/
charbuf[40]={"HelloWorld"};
if(0!=bind(sd,(structsockaddr*)&partition_addr,
sizeof(partition_addr))){
printf("Server:Failedtobind\n");
exit(1);
}
if(0>sendto(sd,buf,strlen(buf)+1,0,
(structsockaddr*)&mcast_group,
sizeof(mcast_group))){
perror("Client:Failedtosend");
exit(1);
}
/*Receiveoneacknowledge*/
if(0>=recv(sd,buf,sizeof(buf),0)){
perror("Unexepectedresponse");
exit(1);
}
NOKIA RESEARCH CENTER
printf("Client: Received response: %s \n",buf);
printf("\n****** TIPC client program finished ******\n");
}
partition_addr.family=AF_TIPC;
partition_addr.addrtype=TIPC_ADDR_NAMESEQ;
partition_addr.addr.nameseq.type=FOO;
partition_addr.addr.nameseq.lower=LOWER_BOUND;
partition_addr.addr.nameseq.upper=UPPER_BOUND;
partition_addr.scope=TIPC_CLUSTER_SCOPE;
if(0>=recvfrom(sd,inbuf,sizeof(inbuf),0,
(structsockaddr*)&client_addr,
&alen)){
perror("Unexepectedrecv:");
}
printf("Server:Messagereceived:%s!\n",inbuf);
if(0>sendto(sd,outbuf,strlen(outbuf)+1,0,
(structsockaddr*)&client_addr,
BOSTON
sizeof(client_addr))){
perror("Server:Failedtosend");
}
printf("\n**TIPCserverprogramfinished**\n");
}
#defineFOO4711
#defineLOWER_BOUND33
#defineUPPER_BOUND133
intmain(intargc,char*argv[],char*dummy[])
{
charbuf[40]={"HelloWorld"};
structsockaddr_tipcmcast_group;
intsd=socket(AF_TIPC,SOCK_RDM,0);
mcast_group.addrtype=TIPC_ADDR_NAMESEQ;
mcast_group.addr.name.name.type=FOO;
mcast_group.addr.nameseq.lower=LOWER_BOUND;
mcast_group.addr.nameseq.upper=UPPER_BOUND;
printf("**TIPCclientprogramstarted**\n\n");
wait_for_server(&mcast_group.addr.name.name,10000);
if(0>sendto(sd,buf,strlen(buf)+1,0,(struct
sockaddr*)&mcast_group,sizeof(mcast_group))){
perror("Client:Failedtosend");
exit(1);
}
/*Receivefirstacknowledge*/
NOKIA RESEARCH CENTER / BOSTON
if(0>=recv(sd,buf,sizeof(buf),0)){
perror("Unexepectedresponse");
exit(1);
}
printf("Client:Receivedresponse:%s\n",buf);
printf("\n******TIPCclientprogramfinished******\n");
}
bind(type = foo,
lower=100,
upper=199)
foo
, 33
,13
3
Server Process,
Partition A
bind(type = foo,
lower=0,
upper=99)
Node <1.1.1>
Server Process,
Partition B
Client Process
sendto(type = foo,
lower = 33,
upper = 133)
bind(type = foo,
lower=100,
upper=199)
foo
, 33
,13
3
Server Process,
Partition A
bind(type = foo,
lower=0,
upper=99)
Node <1.1.1>
Server Process,
Partition B
Client Process
sendto(type = foo,
lower = 33,
upper = 133)
Node <1.1.3>
foo
, 33
,13
3
bind(type = foo,
lower=100,
upper=199)
Server Process,
Partition A
bind(type = foo,
lower=0,
upper=99)
NOKIA RESEARCH CENTER / BOSTON
Address Binding
Many sockets may bind to same partition
Closest-First or Round-Robin algorithm chosen by client
Server Process,
Partition A
Client Process
sendto(type = foo,
lower = 33,
upper = 133)
bind(type = foo,
lower=0,
upper=99)
foo
, 33
,13
3
Server Process,
Partition A
bind(type = foo,
lower=0,
upper=99)
Address Binding
Many sockets may bind to same partition
Closest-First or Round-Robin algorithm chosen by client
bind(type = foo,
lower=100,
upper=199)
foo
, 33
,13
3
Server Process,
Partition A+B
bind(type = foo,
lower=0,
upper=99)
bind(type=foo,
lower=100,
upper=199)
Address Binding
Many sockets may bind to same partition
Closest-First or Round-Robin algorithm chosen by client
Server Process,
Partition B
bind(type = foo,
lower=100,
upper=199)
foo
, 33
,13
3
Server Process,
Partition A
bind(type = foo,
lower=0,
upper=99)
bind(type=bar,
lower=0,
upper=999)
1
100,
foo,
foo
,0,
99
bind(type = foo,
lower=100,
upper=199)
Server Process,
Partition A
99
bind(type = foo,
lower=0,
upper=99)
Node <1.1.1>
Client Process
subscribe(type = node,
lower = 0x1001000,
upper = 0x1001009)
1
0x100
node,
nod
e,0
x10
01
00 3
bind(type = node,
lower=0x1001003,
upper=0x1001003)
Node <1.1.2>
TIPC
002
bind(type = node,
lower=0x1001002,
upper=0x1001002)
NOKIA RESEARCH CENTER / BOSTON
Connections
Establishment based on functional addressing
Selectable lookup algorithm, partitioning, redundancy etc
Lightweight
End-to-end flow control
SOCK_STREAM/SOCK_SEQPACKET in connection oriented mode
Mutually compatible
Connection Setup
No protocol messages exchanged during setup/shutdown
Only payload carrying messages
Server Process,
Partition B
Client
Process
sendto(type = foo,
instance = 117)
7
foo,11
Connection Setup
No protocol messages exchanged during setup/shutdown
Only payload carrying messages
Client
Process
Server Process,
Partition B
lconnect(client)
send()
Connection Setup
No protocol messages exchanged during setup/shutdown
Only payload carrying messages
Client
Process
lconnect(server)
Server Process,
Partition B
Connection Shutdown
No protocol messages exchanged during setup/shutdown
Only payload carrying messages
Client
Process
disconnect()
Server Process,
Partition B
Connection Shutdown
No protocol messages exchanged during setup/shutdown
Only payload carrying messages
Client
Process
Server Process,
Partition B
disconnect()
Connection Setup/Shutdown
Well-known TCP-style connect/shutdown with exchange of SYN and
FIN message exchange available as alternative
Server Process,
Partition B
Client
Process
connect(type=foo,
instance=117)
7)
foo,11
(
N
Y
S
bind()
listen()
accept()
Connection Abortion
Immediate abortion event in case of peer process crash
Client
Process
Server Process,
Partition B
abort
Connection Abortion
Immediate abortion event in case of peer node crash
Node <1.1.5>
Node <1.1.3>
Server Process,
Partition B
Client
Process
ort
ab
Connection Abortion
Immediate abortion event in case of communication failure
Node <1.1.5>
Node <1.1.3>
Server Process,
Partition B
Client
Process
ort
ab
Connection Abortion
Immediate abortion in case of node overload
Node <1.1.5>
Node <1.1.3>
Server Process,
Partition B
Client
Process
abort
Server Process,
Partition B
d
ow le
Ackn
e
Signalling Links
Retransmission protocol and congestion control at signalling link level
Transmitted packets acknowledged/released by any packet from other node
Packet losses detected and retransmission performed earlier
Packets from different sources are bundled in same buffer in case of congestion
Packet flow more traffic driven, no need for timers per socket or message
Node <1.1.5>
Node <1.1.3>
Client
Process
Client
Process
NOKIA RESEARCH CENTER / BOSTON
Server Process,
Partition B
Server Process,
Partition B
Node <1.1.5>
Node <1.1.3>
Client
Process
Client
Process
NOKIA RESEARCH CENTER / BOSTON
Server Process,
Partition B
Server Process,
Partition B
Network Redundancy
Smooth failover in case of single link failure, with no consequences for
user level connections
Node <1.1.5>
Node <1.1.3>
Client
Process
Client
Process
NOKIA RESEARCH CENTER / BOSTON
Server Process,
Partition B
Server Process,
Partition B
Code Status
Initial Release for Linux
Additional Bearers
Distributed netlink ??
NOKIA RESEARCH CENTER / BOSTON
http://tipc.sourceforge.net
QUESTIONS ??