-
Notifications
You must be signed in to change notification settings - Fork 0
/
Top-5-things-every-apache-kafka-developer-should-know.html
817 lines (354 loc) · 36.9 KB
/
Top-5-things-every-apache-kafka-developer-should-know.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
<!DOCTYPE html>
<html class="theme-next muse use-motion" lang="zh-Hans">
<head>
<meta charset="UTF-8"/>
<meta http-equiv="X-UA-Compatible" content="IE=edge" />
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1"/>
<meta name="theme-color" content="#222">
<meta http-equiv="Cache-Control" content="no-transform" />
<meta http-equiv="Cache-Control" content="no-siteapp" />
<link href="/lib/fancybox/source/jquery.fancybox.css?v=2.1.5" rel="stylesheet" type="text/css" />
<link href="/lib/font-awesome/css/font-awesome.min.css?v=4.6.2" rel="stylesheet" type="text/css" />
<link href="/css/main.css?v=5.1.4" rel="stylesheet" type="text/css" />
<link rel="apple-touch-icon" sizes="180x180" href="/images/apple-touch-icon-next.png?v=5.1.4">
<link rel="icon" type="image/png" sizes="32x32" href="/images/favicon-32x32-next.png?v=5.1.4">
<link rel="icon" type="image/png" sizes="16x16" href="/images/favicon-16x16-next.png?v=5.1.4">
<link rel="mask-icon" href="/images/logo.svg?v=5.1.4" color="#222">
<meta name="keywords" content="kafka," />
<link rel="alternate" href="/atom.xml" title="个人博客" type="application/atom+xml" />
<meta name="description" content="top 5 things every apache kafka developer should know 翻译自:https://www.confluent.io/blog/top-5-things-every-apache-kafka-developer-should-know介绍下面5个内容 理解消息传递和持久化保证 学习producer api中的新的粘性分区(learn about t">
<meta property="og:type" content="article">
<meta property="og:title" content="Top 5 things every apache kafka developer should know">
<meta property="og:url" content="https://lightnine/github.io/Top-5-things-every-apache-kafka-developer-should-know.html">
<meta property="og:site_name" content="个人博客">
<meta property="og:description" content="top 5 things every apache kafka developer should know 翻译自:https://www.confluent.io/blog/top-5-things-every-apache-kafka-developer-should-know介绍下面5个内容 理解消息传递和持久化保证 学习producer api中的新的粘性分区(learn about t">
<meta property="og:locale">
<meta property="og:image" content="https://lightnine/Top-5-things-every-apache-kafka-developer-should-know/producer-ack-1024x691.png">
<meta property="og:image" content="https://lightnine/Top-5-things-every-apache-kafka-developer-should-know/in-sync-replicas-1024x710.png">
<meta property="og:image" content="https://lightnine/Top-5-things-every-apache-kafka-developer-should-know/not-enough-replicas-1.png">
<meta property="og:image" content="https://lightnine/Top-5-things-every-apache-kafka-developer-should-know/producer-partition-updated.png">
<meta property="og:image" content="https://lightnine/Top-5-things-every-apache-kafka-developer-should-know/sparse_batches_sent-updated.png">
<meta property="og:image" content="https://lightnine/Top-5-things-every-apache-kafka-developer-should-know/batch-partition-updated.png">
<meta property="og:image" content="https://lightnine/Top-5-things-every-apache-kafka-developer-should-know/six-partitions.png">
<meta property="og:image" content="https://lightnine/Top-5-things-every-apache-kafka-developer-should-know/minus-consumer-2.png">
<meta property="article:published_time" content="2023-12-09T14:20:21.000Z">
<meta property="article:modified_time" content="2024-06-10T07:00:19.016Z">
<meta property="article:author" content="liang">
<meta property="article:tag" content="kafka">
<meta name="twitter:card" content="summary">
<meta name="twitter:image" content="https://lightnine/Top-5-things-every-apache-kafka-developer-should-know/producer-ack-1024x691.png">
<script type="text/javascript" id="hexo.configurations">
var NexT = window.NexT || {};
var CONFIG = {
root: '',
scheme: 'Muse',
version: '5.1.4',
sidebar: {"position":"left","display":"post","offset":12,"b2t":false,"scrollpercent":false,"onmobile":false},
fancybox: true,
tabs: true,
motion: {"enable":true,"async":false,"transition":{"post_block":"fadeIn","post_header":"slideDownIn","post_body":"slideDownIn","coll_header":"slideLeftIn","sidebar":"slideUpIn"}},
duoshuo: {
userId: '0',
author: '博主'
},
algolia: {
applicationID: '',
apiKey: '',
indexName: '',
hits: {"per_page":10},
labels: {"input_placeholder":"Search for Posts","hits_empty":"We didn't find any results for the search: ${query}","hits_stats":"${hits} results found in ${time} ms"}
}
};
</script>
<link rel="canonical" href="https://lightnine/github.io/Top-5-things-every-apache-kafka-developer-should-know.html"/>
<title>Top 5 things every apache kafka developer should know | 个人博客</title>
<meta name="generator" content="Hexo 7.0.0"></head>
<body itemscope itemtype="http://schema.org/WebPage" lang="zh-Hans">
<div class="container sidebar-position-left page-post-detail">
<div class="headband"></div>
<header id="header" class="header" itemscope itemtype="http://schema.org/WPHeader">
<div class="header-inner"><div class="site-brand-wrapper">
<div class="site-meta ">
<div class="custom-logo-site-title">
<a href="/" class="brand" rel="start">
<span class="logo-line-before"><i></i></span>
<span class="site-title">个人博客</span>
<span class="logo-line-after"><i></i></span>
</a>
</div>
<p class="site-subtitle"></p>
</div>
<div class="site-nav-toggle">
<button>
<span class="btn-bar"></span>
<span class="btn-bar"></span>
<span class="btn-bar"></span>
</button>
</div>
</div>
<nav class="site-nav">
<ul id="menu" class="menu">
<li class="menu-item menu-item-home">
<a href="/" rel="section">
<i class="menu-item-icon fa fa-fw fa-home"></i> <br />
首页
</a>
</li>
<li class="menu-item menu-item-about">
<a href="/about/" rel="section">
<i class="menu-item-icon fa fa-fw fa-user"></i> <br />
关于
</a>
</li>
<li class="menu-item menu-item-tags">
<a href="/tags/" rel="section">
<i class="menu-item-icon fa fa-fw fa-tags"></i> <br />
标签
</a>
</li>
<li class="menu-item menu-item-categories">
<a href="/categories/" rel="section">
<i class="menu-item-icon fa fa-fw fa-th"></i> <br />
分类
</a>
</li>
<li class="menu-item menu-item-archives">
<a href="/archives/" rel="section">
<i class="menu-item-icon fa fa-fw fa-archive"></i> <br />
归档
</a>
</li>
</ul>
</nav>
</div>
</header>
<main id="main" class="main">
<div class="main-inner">
<div class="content-wrap">
<div id="content" class="content">
<div id="posts" class="posts-expand">
<article class="post post-type-normal" itemscope itemtype="http://schema.org/Article">
<div class="post-block">
<link itemprop="mainEntityOfPage" href="https://lightnine/github.io/Top-5-things-every-apache-kafka-developer-should-know.html">
<span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
<meta itemprop="name" content="">
<meta itemprop="description" content="">
<meta itemprop="image" content="/images/avatar.gif">
</span>
<span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
<meta itemprop="name" content="个人博客">
</span>
<header class="post-header">
<h1 class="post-title" itemprop="name headline">Top 5 things every apache kafka developer should know</h1>
<div class="post-meta">
<span class="post-time">
<span class="post-meta-item-icon">
<i class="fa fa-calendar-o"></i>
</span>
<span class="post-meta-item-text">发表于</span>
<time title="创建于" itemprop="dateCreated datePublished" datetime="2023-12-09T22:20:21+08:00">
2023-12-09
</time>
</span>
<span class="post-category" >
<span class="post-meta-divider">|</span>
<span class="post-meta-item-icon">
<i class="fa fa-folder-o"></i>
</span>
<span class="post-meta-item-text">分类于</span>
<span itemprop="about" itemscope itemtype="http://schema.org/Thing">
<a href="/categories/kafka/" itemprop="url" rel="index">
<span itemprop="name">kafka</span>
</a>
</span>
</span>
</div>
</header>
<div class="post-body" itemprop="articleBody">
<h1 id="top-5-things-every-apache-kafka-developer-should-know"><a href="#top-5-things-every-apache-kafka-developer-should-know" class="headerlink" title="top 5 things every apache kafka developer should know"></a>top 5 things every apache kafka developer should know</h1><blockquote>
<p>翻译自:<a href="https://www.confluent.io/blog/top-5-things-every-apache-kafka-developer-should-know">https://www.confluent.io/blog/top-5-things-every-apache-kafka-developer-should-know</a><br>介绍下面5个内容</p>
<ul>
<li>理解消息传递和持久化保证</li>
<li>学习producer api中的新的粘性分区(learn about the new sticky partitioner in the producer API)</li>
<li>利用cooperative rebalancing(协同重平衡)来避免消费者组执行rebalance时的stop the world</li>
<li>掌握常用命令行工具<ul>
<li>kafka console producer</li>
<li>kafka consule consumer</li>
<li>dump log</li>
<li>delete records</li>
</ul>
</li>
<li>使用record headers的能力<ul>
<li>为kafka记录增加headers</li>
<li>检索headers</li>
</ul>
</li>
</ul>
</blockquote>
<h2 id="Tip1:理解消息传递和持久化保证"><a href="#Tip1:理解消息传递和持久化保证" class="headerlink" title="Tip1:理解消息传递和持久化保证"></a>Tip1:理解消息传递和持久化保证</h2><p>针对数据持久化,KafkaProducer提供了不同的配置。acks 配置指定了当生产者接受到多少消息确认后,才认为记录已经成功发送到broker上。kafka提供了以下三种选择:</p>
<ul>
<li>none: 生产者不等待broker的确认,发送消息后就认为已经成功发送到broker上。</li>
<li>one: 生产者等待leader broker的确认(leader broker有一个),一定收到确认,就认为消息发送成功</li>
<li>all: 生产者需要等待所有的ISR(in-sync replicas) broker都确认消息后,才认为消息发送成功。<br>如果需要更到的发送吞吐量,可以损失一定的数据,那么可以使用none或one。而如果应用不能容忍数据丢失,那么可以设置all,但是这样吞吐量会降低。<br>这里需要说明下acks=all的情况。下面的场景描述中,producer都是使用acks=all来发送消息,并且topic副本数是3,一个leader,两个follower。<br>情况1:如果这些副本中的记录偏移量是一致的,那么他们被认为是in-sync的。如下面所示,producer采用acks=all的情况:<img src="/Top-5-things-every-apache-kafka-developer-should-know/producer-ack-1024x691.png" class="" title="producer ack">
情况2:假设由于某些情况(网络分区,负载过高等),导致两个follower没有跟上leader,那么follower就不是in sync的。此时生产者发送消息,那么实际的确认只会有一个。acks=all并不是指定有多少副本必须在in-sync。leader broker始终跟自己是同步的。<img src="/Top-5-things-every-apache-kafka-developer-should-know/in-sync-replicas-1024x710.png" class="" title="in-sync acks">
</li>
</ul>
<p>一般来说,设置acks=all, 我们的要求通常都是所有副本都应该确认,或者至少大量的in sync副本应该确认。如果不是这样,那么应该抛出异常知道所有副本都在in sync中。<br>为了满足这个要求,kafka提供了这样一个配置:min.insync.replicas. 这个配置强制指定多少个副本写成功才被认为真正写成功。需要注意的是,min.insync.replicas配置在是broker或者topic级别,而不是在producer上。min.insync.replicas默认值是1。所以为了避免上面说的情况,在三个副本的情况下,需要将min.insync.replicas设置为2。<br><img src="/Top-5-things-every-apache-kafka-developer-should-know/not-enough-replicas-1.png" class="" title="not-enough-replicas"><br>上图中展示了in sync中的副本不满足min.insync.replicas要求的情况,此时producer发送的消息,leader broker不会将记录添加到log中,而是会抛出NotEnoughReplicasException 或者 NotEnoughReplicasAfterAppendException。副本与leader不一致被认为是一种可以重试的错误,所以producer会重试直到成功或者达到超时时间(默认值两分钟)<a href="https://kafka.apache.org/documentation/#delivery.timeout.ms">delivery.timeout.ms</a>。<br>如果需要非常高的数据持久化保证,那么应该同时设置min.insync.replicas和acks=all.</p>
<h2 id="Tip-2-Learn-about-the-new-sticky-partitioner-in-the-producer-API"><a href="#Tip-2-Learn-about-the-new-sticky-partitioner-in-the-producer-API" class="headerlink" title="Tip 2: Learn about the new sticky partitioner in the producer API"></a>Tip 2: Learn about the new sticky partitioner in the producer API</h2><p>kafka需要partition来提升吞吐量并且将消息均衡到不同的broker上。kafka的消息记录是key/value格式,其中key可以为null。kafka producer在发送消息时,不会立即发送,而是将消息放置到对应的partition batch中(类似缓存),待缓存满了,在一次发送。batch是一种增加网络利用的有效方式。在将消息发送到partition中,通常有三种方式来决定发送到哪个partition上。</p>
<ul>
<li>方式1:在发送消息时,直接指定消息对应的partition。这种情况,producer直接使用这个partition</li>
<li>方式2:如果没有提供partition,消息包含key,那么producer会使用key的hash值来决定partition。</li>
<li>方式3:如果既没有key也没有提供partition信息,那么kafka会使用round-robin的方式将消息发送到不同的partition中。producer会将第一个消息发送到partition 0,第二个消息发送到partition 1,以此类推。</li>
</ul>
<p>下图展示了方式3:<br><img src="/Top-5-things-every-apache-kafka-developer-should-know/producer-partition-updated.png" class="" title="producer-partition-updated"><br>round robin方法对于将消息均衡到不同的partition上工作的很好。但是存在一个缺点,由于producer是依次将消息发送到不同的partition batch中,那么有可能会出现每个partition中的batch都填充不满。比如下面展示的,topic有三个partition。假设应用产生了9条消息,并且消息没有key,所有的消息几乎是同时发送,如下图:<br><img src="/Top-5-things-every-apache-kafka-developer-should-know/sparse_batches_sent-updated.png" class="" title="sparse_batches_sent-updated"><br>9条记录分散到是三个batch中,每个batch有三条。但是如果我们将9条消息放到一个batch中会更好。更少的batch使用更少的网络带宽并且对于broker的负载更小。<br>kafka 2.4.0新增了sticky partitioner approach. 这种方法能够将消息发送到一个partition的batch中直到此batch满了。然后,发送这个batch,sticky partitioner使用下一个partition的batch。如下图展示了使用sticky partitioner的例子:<br><img src="/Top-5-things-every-apache-kafka-developer-should-know/batch-partition-updated.png" class="" title="batch-partition-updated"><br>通过使用sticky partitioner方法,我们减少了请求次数,同时也减少了请求队列上的负载,也减少了系统延迟。需要注意的时,sticky partitioner仍然是将消息均衡放置到不同的partition batch中。可以将这种认为是per-batch round robin 或者 eventually even approach。<br>如果想要更多了解sticky 模式,可以参考<a href="https://www.confluent.io/blog/apache-kafka-producer-improvements-sticky-partitioner/"> Apache Kafka Producer Improvements with the Sticky Partitioner</a></p>
<h2 id="Tip-3-Avoid-“stop-the-world”-consumer-group-rebalances-by-using-cooperative-rebalancing"><a href="#Tip-3-Avoid-“stop-the-world”-consumer-group-rebalances-by-using-cooperative-rebalancing" class="headerlink" title="Tip 3: Avoid “stop-the world” consumer group rebalances by using cooperative rebalancing"></a>Tip 3: Avoid “stop-the world” consumer group rebalances by using cooperative rebalancing</h2><p>kafka是一个分布式系统,而分布式系统中一个重要的事情就是如何处理失败。kafka处理失败的方式之一是使用consumer group,consumer group管理多个consumer。如果其中一个consumer停止,kafka会进行rebalance从而确保另一个consumer能够接管这个工作。<br>从2.4版本开始,kafka引入了一个新的rebalance协议,cooperative rebalancing。在深入了解cooperative rebalancing之前,先来了解一下consumer group的基础。<br>假设一个分布式应用(比如一个微服务的多个副本)有个多个consumer,订阅同一个topic。这些consumer组成了一个consumer group,具有同样的.group.id。在consumer group中的每个consumer负责从一个或多个partition中消费消息。这些partition的分配是由consumer group中的leader进行的。如下图所示:<br><img src="/Top-5-things-every-apache-kafka-developer-should-know/six-partitions.png" class="" title="six-partitions"><br>从图中可以看到,总共有6个partition,在理想的情况下,每个consumer负责消费两个partition。但是如果其中的某个应用失败了或者不能连接网络。那么对应的partition中的消息是不是就不能被消费直到应用恢复?幸运的是,由于consumer rebalancing协议的存在,不会发生这种情况。<br>下图展示了consumer group protocal过程:<br><img src="/Top-5-things-every-apache-kafka-developer-should-know/minus-consumer-2.png" class="" title="minus-consumer-2"><br>如上图,consumer2由于某些原因失败了。group coordinator将它从组中移除然后触发rebalance。rebalance尝试将工作负载在组内所有工作的consumer上进行均衡分布。在这个例子中,consumer2离开了组,rebalance会将consumer2拥有的partition分配给组内其他的consumer。所以对于一个consumer group,如果其中consumer失败了, 那么对于这些partition的处理不会产生影响。<br>但是,默认的rebalance协议有个缺点。在rebalance过程中,每个consumer都会放弃之前获得的partition(这会造成consumer停止消费),知道topic下所有的partition都被重新分配。这种情况被称为stop the world rebalance。为了解决这个问题,依靠ConsumerPartitionAssignor实例,consumer简单的重新获取之前分配的partition,所以在这些partition上仍然能够继续消费。<br>上述描述的实现被称为<a href="https://www.confluent.io/blog/cooperative-rebalancing-in-kafka-streams-consumer-ksqldb/">eager rebalancing</a>, 因为它优先考虑的是针对一个consumer group中,不会有两个consumer同时对于一个partition拥有主权。<br>虽然对于同一个topic下的某个partition不能具有相同的consumer非常重要,但是有一种更好的方法,既能够提供安全性同时还不会暂停处理,既<a href="https://www.confluent.io/blog/incremental-cooperative-rebalancing-in-kafka/">incremental cooperative rebalancing</a>。这个方法在kafka2.3版本的kafka connect中被首次引入,现在已经在consumer group 协议中实现了。利用cooperative 方法,消费者不会在rebalance开始时主动放弃partition的所有权。在cooperative方法中,consumer group中的所有成员会将当前的分配进行编码然后将信息发送到group leader中。group leader决定那个partition需要修改对应的consumer。而不是一开始就完全从新分配。之后第二次rebalance发起,但是这一次,仅仅涉及到那些需要改变所有权的分区。这有可能是撤销不在用的partition或者新增的partition。对于那些没有改变所有权的分区,这些分区中的数据会继续进行处理。<br>这种处理办法解决了stop-the-world,而仅仅是暂停了哪些需要修改所有权分区的消费。这带来了更少的rebalance代驾以及降低了完成rebalance的时间。即使rebalance时间很长也没有关系,因为现在数据仍然被处理。使用CooperativeStickyAssignor能够开启这个功能。<br>如果要开启这个功能,则需要将partition.assignment.strategy设置为使用CooperativeStickyAssignor。这种设置完全是在客户端测,所以仅仅更新客户端版本即可。而在Kafka Stream中,这个功能是默认开启的。</p>
<h2 id="Tip-4:掌握命令行工具"><a href="#Tip-4:掌握命令行工具" class="headerlink" title="Tip 4:掌握命令行工具"></a>Tip 4:掌握命令行工具</h2><p>下面介绍了4种在平时工作中使用最多的工具。</p>
<h3 id="kafka-console-producer"><a href="#kafka-console-producer" class="headerlink" title="kafka console producer"></a>kafka console producer</h3><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># 开启发送者程序, 发送的消息只有value,没有key</span></span><br><span class="line">kafka-console-producer --topic <topic> --broker-list <broker-host:port></span><br><span class="line"><span class="comment"># 发送消息,发送的消息包含key 和 value</span></span><br><span class="line">kafka-console-producer --topic <topic> --broker-list <broker-host:port> --property parse.key=<span class="literal">true</span> --property key.separator=<span class="string">":"</span></span><br></pre></td></tr></table></figure>
<h3 id="kafka-console-consumer"><a href="#kafka-console-consumer" class="headerlink" title="kafka console consumer"></a>kafka console consumer</h3><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># 消费指定topic中的消息</span></span><br><span class="line">kafka-console-consumer --topic <topic> --bootstrap-server <broker-host:port></span><br><span class="line"><span class="comment"># 指定从开始的地方消费</span></span><br><span class="line">kafka-console-consumer --topic <topic> --bootstrap-server <broker-host:port> --from-beginning</span><br><span class="line"><span class="comment"># 默认情况下consumer只会打印消息的value,如果想要打印消息的key,则输入下面命令</span></span><br><span class="line">kafka-console-consumer --topic <topic> --bootstrap-server <broker-host:port> --property print.key=<span class="literal">true</span> --property key.separator=<span class="string">":"</span></span><br></pre></td></tr></table></figure>
<h3 id="Dump-log"><a href="#Dump-log" class="headerlink" title="Dump log"></a>Dump log</h3><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br></pre></td><td class="code"><pre><span class="line"><span class="comment"># 指定打印topic为example-0中的日志,参数--print-data-log表示输出日志</span></span><br><span class="line"><span class="comment"># 不过一般在生产环境中不会使用这个命令</span></span><br><span class="line">kafka-dump-log --print-data-log --files ./var/lib/kafka/data/example-0/00000000000000000000.<span class="built_in">log</span></span><br></pre></td></tr></table></figure>
<h3 id="delete-records"><a href="#delete-records" class="headerlink" title="delete records"></a>delete records</h3><p>kafka提供了配置来控制数据保留,包括时间和数据大小</p>
<ul>
<li>数据保留的时间由 log.retention.hours 控制,默认值是168hour,也就是一周</li>
<li>configuration.log.retention.bytes 控制segment文件最大是多少。默认值是-1, 也就是不限制大小</li>
</ul>
<p>如果想要删除数据,可以使用下述命令:<br><figure class="highlight bash"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br></pre></td><td class="code"><pre><span class="line">kafka-delete-records --bootstrap-server <broker-host:port> \</span><br><span class="line"> --offset-json-file offsets.json</span><br></pre></td></tr></table></figure><br>offsets.json 文件内容如下:<br><figure class="highlight json"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br></pre></td><td class="code"><pre><span class="line"><span class="punctuation">{</span></span><br><span class="line"> <span class="attr">"partitions"</span><span class="punctuation">:</span> <span class="punctuation">[</span></span><br><span class="line"> <span class="punctuation">{</span><span class="attr">"topic"</span><span class="punctuation">:</span> <span class="string">"example"</span><span class="punctuation">,</span> <span class="attr">"partition"</span><span class="punctuation">:</span> <span class="number">0</span><span class="punctuation">,</span> <span class="attr">"offset"</span><span class="punctuation">:</span> <span class="number">-1</span><span class="punctuation">}</span></span><br><span class="line"> <span class="punctuation">]</span><span class="punctuation">,</span></span><br><span class="line"> <span class="attr">"version"</span><span class="punctuation">:</span><span class="number">1</span></span><br><span class="line"> <span class="punctuation">}</span></span><br></pre></td></tr></table></figure><br>参数介绍如下:</p>
<ul>
<li>topic:指定要删除数据对应的topic</li>
<li>partition:指定需要删除数据对应的partition</li>
<li>offset:指定从哪个offset开始删除,注:是删除offset之前的数据。-1表示删除当前HW之前的数据,HW(high watermark)表示能够开始消费的位置</li>
</ul>
<h2 id="Tip5:使用record-headers的能力"><a href="#Tip5:使用record-headers的能力" class="headerlink" title="Tip5:使用record headers的能力"></a>Tip5:使用record headers的能力</h2><p>Record headers可以给kafka消息添加一些元数据,并且不是给消息的key value添加额外的信息。比如如果你想要在消息中嵌入一些信息,如表示消息来源系统,也是是想要增加一些审计功能。<br>为什么不能将这些额外的数据添加到key中。因为给key中添加数据会带来两个潜在的问题</p>
<ol>
<li>首先,如果你使用的是压缩主题,那么给key添加信息会使得消息不正确。这样压缩不会像之前起作用</li>
<li>其次,给key添加额外的信息有可能会影响数据的partition分布</li>
</ol>
<h1 id="回顾"><a href="#回顾" class="headerlink" title="回顾"></a>回顾</h1><p>我们了解了kafka的五个tips,我们理解了下面的知识点</p>
<ol>
<li>消息持久性以及和消息传递之间的关系</li>
<li>producer API中的sticky partitioner</li>
<li>command line tools</li>
<li>record headers的能力</li>
</ol>
</div>
<footer class="post-footer">
<div class="post-tags">
<a href="/tags/kafka/" rel="tag"># kafka</a>
</div>
<div class="post-nav">
<div class="post-nav-next post-nav-item">
<a href="/java-zero-copy.html" rel="next" title="java-zero-copy">
<i class="fa fa-chevron-left"></i> java-zero-copy
</a>
</div>
<span class="post-nav-divider"></span>
<div class="post-nav-prev post-nav-item">
<a href="/redis-study.html" rel="prev" title="redis study">
redis study <i class="fa fa-chevron-right"></i>
</a>
</div>
</div>
</footer>
</div>
</article>
<div class="post-spread">
</div>
</div>
</div>
</div>
<div class="sidebar-toggle">
<div class="sidebar-toggle-line-wrap">
<span class="sidebar-toggle-line sidebar-toggle-line-first"></span>
<span class="sidebar-toggle-line sidebar-toggle-line-middle"></span>
<span class="sidebar-toggle-line sidebar-toggle-line-last"></span>
</div>
</div>
<aside id="sidebar" class="sidebar">
<div class="sidebar-inner">
<ul class="sidebar-nav motion-element">
<li class="sidebar-nav-toc sidebar-nav-active" data-target="post-toc-wrap">
文章目录
</li>
<li class="sidebar-nav-overview" data-target="site-overview-wrap">
站点概览
</li>
</ul>
<section class="site-overview-wrap sidebar-panel">
<div class="site-overview">
<div class="site-author motion-element" itemprop="author" itemscope itemtype="http://schema.org/Person">
<p class="site-author-name" itemprop="name"></p>
<p class="site-description motion-element" itemprop="description"></p>
</div>
<nav class="site-state motion-element">
<div class="site-state-item site-state-posts">
<a href="/archives/%7C%7C%20archive">
<span class="site-state-item-count">37</span>
<span class="site-state-item-name">日志</span>
</a>
</div>
<div class="site-state-item site-state-categories">
<a href="/categories/index.html">
<span class="site-state-item-count">22</span>
<span class="site-state-item-name">分类</span>
</a>
</div>
<div class="site-state-item site-state-tags">
<a href="/tags/index.html">
<span class="site-state-item-count">45</span>
<span class="site-state-item-name">标签</span>
</a>
</div>
</nav>
<div class="feed-link motion-element">
<a href="/atom.xml" rel="alternate">
<i class="fa fa-rss"></i>
RSS
</a>
</div>
</div>
</section>
<!--noindex-->
<section class="post-toc-wrap motion-element sidebar-panel sidebar-panel-active">
<div class="post-toc">
<div class="post-toc-content"><ol class="nav"><li class="nav-item nav-level-1"><a class="nav-link" href="#top-5-things-every-apache-kafka-developer-should-know"><span class="nav-number">1.</span> <span class="nav-text">top 5 things every apache kafka developer should know</span></a><ol class="nav-child"><li class="nav-item nav-level-2"><a class="nav-link" href="#Tip1%EF%BC%9A%E7%90%86%E8%A7%A3%E6%B6%88%E6%81%AF%E4%BC%A0%E9%80%92%E5%92%8C%E6%8C%81%E4%B9%85%E5%8C%96%E4%BF%9D%E8%AF%81"><span class="nav-number">1.1.</span> <span class="nav-text">Tip1:理解消息传递和持久化保证</span></a></li><li class="nav-item nav-level-2"><a class="nav-link" href="#Tip-2-Learn-about-the-new-sticky-partitioner-in-the-producer-API"><span class="nav-number">1.2.</span> <span class="nav-text">Tip 2: Learn about the new sticky partitioner in the producer API</span></a></li><li class="nav-item nav-level-2"><a class="nav-link" href="#Tip-3-Avoid-%E2%80%9Cstop-the-world%E2%80%9D-consumer-group-rebalances-by-using-cooperative-rebalancing"><span class="nav-number">1.3.</span> <span class="nav-text">Tip 3: Avoid “stop-the world” consumer group rebalances by using cooperative rebalancing</span></a></li><li class="nav-item nav-level-2"><a class="nav-link" href="#Tip-4%EF%BC%9A%E6%8E%8C%E6%8F%A1%E5%91%BD%E4%BB%A4%E8%A1%8C%E5%B7%A5%E5%85%B7"><span class="nav-number">1.4.</span> <span class="nav-text">Tip 4:掌握命令行工具</span></a><ol class="nav-child"><li class="nav-item nav-level-3"><a class="nav-link" href="#kafka-console-producer"><span class="nav-number">1.4.1.</span> <span class="nav-text">kafka console producer</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#kafka-console-consumer"><span class="nav-number">1.4.2.</span> <span class="nav-text">kafka console consumer</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#Dump-log"><span class="nav-number">1.4.3.</span> <span class="nav-text">Dump log</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#delete-records"><span class="nav-number">1.4.4.</span> <span class="nav-text">delete records</span></a></li></ol></li><li class="nav-item nav-level-2"><a class="nav-link" href="#Tip5%EF%BC%9A%E4%BD%BF%E7%94%A8record-headers%E7%9A%84%E8%83%BD%E5%8A%9B"><span class="nav-number">1.5.</span> <span class="nav-text">Tip5:使用record headers的能力</span></a></li></ol></li><li class="nav-item nav-level-1"><a class="nav-link" href="#%E5%9B%9E%E9%A1%BE"><span class="nav-number">2.</span> <span class="nav-text">回顾</span></a></li></ol></div>
</div>
</section>
<!--/noindex-->
</div>
</aside>
</div>
</main>
<footer id="footer" class="footer">
<div class="footer-inner">
<div class="copyright">© <span itemprop="copyrightYear">2024</span>
<span class="with-love">
<i class="fa fa-user"></i>
</span>
<span class="author" itemprop="copyrightHolder">liang</span>
</div>
<div class="powered-by">由 <a class="theme-link" target="_blank" href="https://hexo.io">Hexo</a> 强力驱动</div>
<span class="post-meta-divider">|</span>
<div class="theme-info">主题 — <a class="theme-link" target="_blank" href="https://github.com/iissnan/hexo-theme-next">NexT.Muse</a> v5.1.4</div>
</div>
</footer>
<div class="back-to-top">
<i class="fa fa-arrow-up"></i>
</div>
</div>
<script type="text/javascript">
if (Object.prototype.toString.call(window.Promise) !== '[object Function]') {
window.Promise = null;
}
</script>
<script type="text/javascript" src="/lib/jquery/index.js?v=2.1.3"></script>
<script type="text/javascript" src="/lib/fastclick/lib/fastclick.min.js?v=1.0.6"></script>
<script type="text/javascript" src="/lib/jquery_lazyload/jquery.lazyload.js?v=1.9.7"></script>
<script type="text/javascript" src="/lib/velocity/velocity.min.js?v=1.2.1"></script>
<script type="text/javascript" src="/lib/velocity/velocity.ui.min.js?v=1.2.1"></script>
<script type="text/javascript" src="/lib/fancybox/source/jquery.fancybox.pack.js?v=2.1.5"></script>
<script type="text/javascript" src="/js/src/utils.js?v=5.1.4"></script>
<script type="text/javascript" src="/js/src/motion.js?v=5.1.4"></script>
<script type="text/javascript" src="/js/src/scrollspy.js?v=5.1.4"></script>
<script type="text/javascript" src="/js/src/post-details.js?v=5.1.4"></script>
<script type="text/javascript" src="/js/src/bootstrap.js?v=5.1.4"></script>
</body>
</html>