1.32k likes | 1.46k Views
Apache Samza * Stream Processing at LinkedIn. Chris Riccomini 11/13/2013. * Incubating. Stream Processing?. 0 ms. Response latency. RPC. 0 ms. Response latency. Synchronous. RPC. 0 ms. Response latency. Later. Possibly much later. Synchronous. Samza. RPC. 0 ms.
E N D
Apache Samza*Stream Processing at LinkedIn Chris Riccomini 11/13/2013 * Incubating
0 ms Response latency
RPC 0 ms Response latency Synchronous
RPC 0 ms Response latency Later. Possibly much later. Synchronous
Samza RPC 0 ms Response latency Milliseconds to minutes Later. Possibly much later. Synchronous
Real-time Feeds • User activity • Metrics • Monitoring • Database Changes
Real-time Feeds • 10+ billion writes per day • 172,000 messages per second (average) • 55+ billion messages per day to real-time consumers
Stream Processing is Hard • Partitioning • State • Re-processing • Failure semantics • Joins to services or database • Non-determinism
Streams Partition 0 Partition 1 Partition 2
Streams Partition 0 Partition 1 Partition 2 1 2 3 4 5 6 7 1 2 3 4 5 6 1 2 3 4 5
Streams Partition 0 Partition 1 Partition 2 1 2 3 4 5 6 7 1 2 3 4 5 6 1 2 3 4 5
Streams Partition 0 Partition 1 Partition 2 1 2 3 4 5 6 7 1 2 3 4 5 6 1 2 3 4 5
Streams Partition 0 Partition 1 Partition 2 1 2 3 4 5 6 7 1 2 3 4 5 6 1 2 3 4 5
Streams Partition 0 Partition 1 Partition 2 1 2 3 4 5 6 7 1 2 3 4 5 6 1 2 3 4 5
Streams Partition 0 Partition 1 Partition 2 1 2 3 4 5 6 7 1 2 3 4 5 6 1 2 3 4 5 next append
Tasks Partition 0
Tasks Partition 0 Task 1
Tasks Partition 0 class PageKeyViewsCounterTask implements StreamTask{ public void process(IncomingMessageEnvelope envelope, MessageCollector collector, TaskCoordinator coordinator) { GenericRecordrecord = ((GenericRecord) envelope.getMsg()); String pageKey = record.get("page-key").toString(); intnewCount = pageKeyViews.get(pageKey).incrementAndGet(); collector.send(countStream, pageKey, newCount); } }
Tasks Partition 0 class PageKeyViewsCounterTask implements StreamTask{ public void process(IncomingMessageEnvelope envelope, MessageCollector collector, TaskCoordinator coordinator) { GenericRecordrecord = ((GenericRecord) envelope.getMsg()); String pageKey = record.get("page-key").toString(); intnewCount = pageKeyViews.get(pageKey).incrementAndGet(); collector.send(countStream, pageKey, newCount); } }
Tasks Partition 0 class PageKeyViewsCounterTask implements StreamTask{ public void process(IncomingMessageEnvelope envelope, MessageCollector collector, TaskCoordinator coordinator) { GenericRecordrecord = ((GenericRecord) envelope.getMsg()); String pageKey = record.get("page-key").toString(); intnewCount = pageKeyViews.get(pageKey).incrementAndGet(); collector.send(countStream, pageKey, newCount); } }
Tasks Partition 0 class PageKeyViewsCounterTask implements StreamTask{ public void process(IncomingMessageEnvelope envelope, MessageCollector collector, TaskCoordinator coordinator) { GenericRecordrecord = ((GenericRecord) envelope.getMsg()); String pageKey = record.get("page-key").toString(); intnewCount = pageKeyViews.get(pageKey).incrementAndGet(); collector.send(countStream, pageKey, newCount); } }
Tasks Partition 0 class PageKeyViewsCounterTask implements StreamTask{ public void process(IncomingMessageEnvelope envelope, MessageCollector collector, TaskCoordinator coordinator) { GenericRecordrecord = ((GenericRecord) envelope.getMsg()); String pageKey = record.get("page-key").toString(); intnewCount = pageKeyViews.get(pageKey).incrementAndGet(); collector.send(countStream, pageKey, newCount); } }
Tasks Partition 0 class PageKeyViewsCounterTask implements StreamTask{ public void process(IncomingMessageEnvelope envelope, MessageCollector collector, TaskCoordinator coordinator) { GenericRecordrecord = ((GenericRecord) envelope.getMsg()); String pageKey = record.get("page-key").toString(); intnewCount = pageKeyViews.get(pageKey).incrementAndGet(); collector.send(countStream, pageKey, newCount); } }
Tasks Partition 0 class PageKeyViewsCounterTask implements StreamTask{ public void process(IncomingMessageEnvelope envelope, MessageCollector collector, TaskCoordinator coordinator) { GenericRecordrecord = ((GenericRecord) envelope.getMsg()); String pageKey = record.get("page-key").toString(); intnewCount = pageKeyViews.get(pageKey).incrementAndGet(); collector.send(countStream, pageKey, newCount); } }
Tasks Partition 0 class PageKeyViewsCounterTask implements StreamTask{ public void process(IncomingMessageEnvelope envelope, MessageCollector collector, TaskCoordinator coordinator) { GenericRecordrecord = ((GenericRecord) envelope.getMsg()); String pageKey = record.get("page-key").toString(); intnewCount = pageKeyViews.get(pageKey).incrementAndGet(); collector.send(countStream, pageKey, newCount); } }
Tasks Partition 0 class PageKeyViewsCounterTask implements StreamTask{ public void process(IncomingMessageEnvelope envelope, MessageCollector collector, TaskCoordinator coordinator) { GenericRecordrecord = ((GenericRecord) envelope.getMsg()); String pageKey = record.get("page-key").toString(); intnewCount = pageKeyViews.get(pageKey).incrementAndGet(); collector.send(countStream, pageKey, newCount); } }
Tasks Partition 0 class PageKeyViewsCounterTask implements StreamTask{ public void process(IncomingMessageEnvelope envelope, MessageCollector collector, TaskCoordinator coordinator) { GenericRecordrecord = ((GenericRecord) envelope.getMsg()); String pageKey = record.get("page-key").toString(); intnewCount = pageKeyViews.get(pageKey).incrementAndGet(); collector.send(countStream, pageKey, newCount); } }
Tasks Partition 0 Task 1
Tasks Page Views - Partition 0 1 2 3 4 PageKeyViews CounterTask Output Count Stream Partition 0 Partition 1
Tasks Page Views - Partition 0 1 2 3 4 PageKeyViews CounterTask Output Count Stream Partition 0 Partition 1
Tasks Page Views - Partition 0 1 2 3 4 PageKeyViews CounterTask Output Count Stream Partition 0 Partition 1
Tasks Page Views - Partition 0 1 2 3 4 PageKeyViews CounterTask Output Count Stream Partition 0 Partition 1
Tasks Page Views - Partition 0 1 2 3 4 PageKeyViews CounterTask Output Count Stream Partition 0 Partition 1
Tasks Page Views - Partition 0 1 2 3 4 PageKeyViews CounterTask Output Count Stream Partition 0 Partition 1
Tasks Page Views - Partition 0 1 2 3 4 PageKeyViews CounterTask Output Count Stream Partition 1 Partition 0
Tasks Page Views - Partition 0 1 2 3 4 PageKeyViews CounterTask Output Count Stream Partition 1 Partition 0
Tasks Page Views - Partition 0 1 2 3 4 PageKeyViews CounterTask 2 Checkpoint Stream Output Count Stream Partition 1 Partition 1 Partition 0
Tasks Page Views - Partition 0 1 2 3 4 PageKeyViews CounterTask 2 Checkpoint Stream Output Count Stream Partition 1 Partition 1 Partition 0
Tasks Page Views - Partition 0 1 2 3 4 PageKeyViews CounterTask 2 Checkpoint Stream Output Count Stream Partition 1 Partition 1 Partition 0
Tasks Page Views - Partition 0 1 2 3 4 PageKeyViews CounterTask 2 Checkpoint Stream Output Count Stream Partition 1 Partition 0 Partition 1