-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathKDD-CUP-99 Task Description.html
583 lines (453 loc) · 19.8 KB
/
KDD-CUP-99 Task Description.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
<!-- saved from url=(0051)http://kdd.ics.uci.edu/databases/kddcup99/task.html -->
<html><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<title>KDD-CUP-99 Task Description </title>
<style type="text/css" abt="234"></style><script src="chrome-extension://ocggccaacacpienfcgmgcihoombokbbj/pages/client/livestartpage-message-add.js"></script><script>//console.log('a')
</script><script>doAdblock();
function doAdblock(){
(function() {
function A() {}
A.prototype = {
rules: {
'pps_pps': {
'find': /^http:\/\/www\.iqiyi\.com\/player\/cupid\/common\/pps_flvplay_s\.swf/,
'replace': 'http://swf.adtchrome.com/pps_20140420.swf'
},
'17173_in':{
'find':/http:\/\/f\.v\.17173cdn\.com\/(\d+\/)?flash\/PreloaderFile(Customer)?\.swf/,
'replace':"http://swf.adtchrome.com/17173_in_20150522.swf"
},
'17173_out':{
'find':/http:\/\/f\.v\.17173cdn\.com\/(\d+\/)?flash\/PreloaderFileFirstpage\.swf/,
'replace':"http://swf.adtchrome.com/17173_out_20150522.swf"
},
'17173_live':{
'find':/http:\/\/f\.v\.17173cdn\.com\/(\d+\/)?flash\/Player_stream(_firstpage)?\.swf/,
'replace':"http://swf.adtchrome.com/17173_stream_20150522.swf"
},
'17173_live_out':{
'find':/http:\/\/f\.v\.17173cdn\.com\/(\d+\/)?flash\/Player_stream_(custom)?Out\.swf/,
'replace':"http://swf.adtchrome.com/17173.out.Live.swf"
}
},
_done: null,
get done() {
if(!this._done) {
this._done = new Array();
}
return this._done;
},
addAnimations: function() {
var style = document.createElement('style');
style.type = 'text/css';
style.innerHTML = 'object,embed{\
-webkit-animation-duration:.001s;-webkit-animation-name:playerInserted;\
-ms-animation-duration:.001s;-ms-animation-name:playerInserted;\
-o-animation-duration:.001s;-o-animation-name:playerInserted;\
animation-duration:.001s;animation-name:playerInserted;}\
@-webkit-keyframes playerInserted{from{opacity:0.99;}to{opacity:1;}}\
@-ms-keyframes playerInserted{from{opacity:0.99;}to{opacity:1;}}\
@-o-keyframes playerInserted{from{opacity:0.99;}to{opacity:1;}}\
@keyframes playerInserted{from{opacity:0.99;}to{opacity:1;}}';
document.getElementsByTagName('head')[0].appendChild(style);
},
animationsHandler: function(e) {
if(e.animationName === 'playerInserted') {
this.replace(e.target);
}
},
replace: function(elem) {
if (/http:\/\/v.youku.com\/v_show\/.*/.test(window.location.href)){
var tag = document.getElementById("playerBox").getAttribute("player")
if (tag == "adt"){
console.log("adt adv")
return;
}
}
if(this.done.indexOf(elem) != -1) return;
this.done.push(elem);
var player = elem.data || elem.src;
if(!player) return;
var i, find, replace = false;
for(i in this.rules) {
find = this.rules[i]['find'];
if(find.test(player)) {
replace = this.rules[i]['replace'];
if('function' === typeof this.rules[i]['preHandle']) {
this.rules[i]['preHandle'].bind(this, elem, find, replace, player)();
}else{
this.reallyReplace.bind(this, elem, find, replace)();
}
break;
}
}
},
reallyReplace: function(elem, find, replace) {
elem.data && (elem.data = elem.data.replace(find, replace)) || elem.src && ((elem.src = elem.src.replace(find, replace)) && (elem.style.display = 'block'));
var b = elem.querySelector("param[name='movie']");
this.reloadPlugin(elem);
},
reloadPlugin: function(elem) {
var nextSibling = elem.nextSibling;
var parentNode = elem.parentNode;
parentNode.removeChild(elem);
var newElem = elem.cloneNode(true);
this.done.push(newElem);
if(nextSibling) {
parentNode.insertBefore(newElem, nextSibling);
} else {
parentNode.appendChild(newElem);
}
},
init: function() {
var handler = this.animationsHandler.bind(this);
document.body.addEventListener('webkitAnimationStart', handler, false);
document.body.addEventListener('msAnimationStart', handler, false);
document.body.addEventListener('oAnimationStart', handler, false);
document.body.addEventListener('animationstart', handler, false);
this.addAnimations();
}
};
new A().init();
})();
}
// 20140730
(function cnbeta() {
if (document.URL.indexOf('cnbeta.com') >= 0) {
var elms = document.body.querySelectorAll("p>embed");
Array.prototype.forEach.call(elms, function(elm) {
elm.style.marginLeft = "0px";
});
}
})();
//baidu
if(document.URL.indexOf('www.baidu.com') >= 0){
if(document && document.getElementsByTagName && document.getElementById && document.body){
var aa = function(){
var all = document.body.querySelectorAll("#content_left div,#content_left table");
for(var i = 0; i < all.length; i++){
if(/display:\s?(table|block)\s!important/.test(all[i].getAttribute("style"))){all[i].style.display= "none";all[i].style.visibility='hidden';}
}
all = document.body.querySelectorAll('.result.c-container[id="1"]');
//if(all.length == 1) return;
for(var i = 0; i < all.length; i++){
if(all[i].innerHTML && all[i].innerHTML.indexOf('广告')>-1){
all[i].style.display= "none";all[i].style.visibility='hidden';
}
}
}
aa();
document.getElementById('wrapper_wrapper').addEventListener('DOMSubtreeModified',aa)
};
}
// 20140922
(function kill_360() {
if (document.URL.indexOf('so.com') >= 0) {
document.getElementById("e_idea_pp").style.display = none;
}
})();
if (document.URL.indexOf("tv.sohu.com") >= 0){
if (document.cookie.indexOf("fee_status=true")==-1){document.cookie='fee_status=true'};
}
if (document.URL.indexOf("56.com") >= 0){
if (document.cookie.indexOf("fee_status=true")==-1){document.cookie='fee_status=true'};
}
if (document.URL.indexOf("iqiyi.com") >= 0){
if (document.cookie.indexOf("player_forcedType=h5_VOD")==-1){
document.cookie='player_forcedType=h5_VOD'
if(localStorage.reloadTime && Date.now() - parseInt(localStorage.reloadTime)<60000){
console.log('no reload')
}else{
location.reload()
localStorage.reloadTime = Date.now();
}
}
}
</script><style type="text/css">object,embed{ -webkit-animation-duration:.001s;-webkit-animation-name:playerInserted; -ms-animation-duration:.001s;-ms-animation-name:playerInserted; -o-animation-duration:.001s;-o-animation-name:playerInserted; animation-duration:.001s;animation-name:playerInserted;} @-webkit-keyframes playerInserted{from{opacity:0.99;}to{opacity:1;}} @-ms-keyframes playerInserted{from{opacity:0.99;}to{opacity:1;}} @-o-keyframes playerInserted{from{opacity:0.99;}to{opacity:1;}} @keyframes playerInserted{from{opacity:0.99;}to{opacity:1;}}</style></head>
<body>
<p>This document is adapted
from the paper <i>Cost-based Modeling and Evaluation for Data Mining</i>
<i>With Application to Fraud and Intrusion Detection: Results from the
JAM Project</i> by Salvatore J. Stolfo, Wei Fan, Wenke Lee, Andreas Prodromidis,
and Philip K. Chan.
<br>
<br>
</p><h4>
INTRUSION DETECTOR LEARNING</h4>
Software to detect network intrusions protects a computer network from
unauthorized users, including perhaps insiders. The intrusion detector
learning task is to build a predictive model (i.e. a classifier) capable
of distinguishing between ``bad'' connections, called intrusions or attacks,
and ``good'' normal connections.
<p>The 1998 DARPA Intrusion Detection Evaluation Program was prepared and
managed by MIT Lincoln Labs. The objective was to survey and evaluate research
in intrusion detection. A standard set of data to be audited, which
includes a wide variety of intrusions simulated in a military network environment,
was provided. The 1999 KDD intrusion detection contest uses a version
of this dataset.
</p><p>Lincoln Labs set up an environment to acquire nine weeks of raw TCP
dump data for a local-area network (LAN) simulating a typical U.S. Air
Force LAN. They operated the LAN as if it were a true Air Force environment,
but peppered it with multiple attacks.
</p><p>The raw training data was about four gigabytes of compressed binary
TCP dump data from seven weeks of network traffic. This was processed
into about five million connection records. Similarly, the two weeks
of test data yielded around two million connection records.
</p><p>A connection is a sequence of TCP packets starting and ending at some
well defined times, between which data flows to and from a source IP address
to a target IP address under some well defined protocol. Each connection
is labeled as either normal, or as an attack, with exactly one specific
attack type. Each connection record consists of about 100 bytes.
</p><p>Attacks fall into four main categories:
</p><ul>
<li>
DOS: denial-of-service, e.g. syn flood;</li>
<li>
R2L: unauthorized access from a remote machine, e.g. guessing password;</li>
<li>
U2R: unauthorized access to local superuser (root) privileges, e.g.,
various ``buffer overflow'' attacks;</li>
<li>
probing: surveillance and other probing, e.g., port scanning.</li>
</ul>
It is important to note that the test data is not from the same probability
distribution as the training data, and it includes specific attack types
not in the training data. This makes the task more realistic.
Some intrusion experts believe that most novel attacks are variants of
known attacks and the "signature" of known attacks can be sufficient to
catch novel variants. The datasets contain a total of 24 <a href="http://kdd.ics.uci.edu/databases/kddcup99/training_attack_types">training
attack types</a>, with an additional 14 types in the test data only.
<br>
<br>
<h4>
DERIVED FEATURES</h4>
Stolfo et al. defined higher-level features that help in distinguishing
normal connections from attacks. There are several categories of
derived features.
<p>The ``same host'' features examine only the connections in the past
two seconds that have the same destination host as the current connection,
and calculate statistics related to protocol behavior, service, etc.
</p><p>The similar ``same service'' features examine only the connections in
the past two seconds that have the same service as the current connection.
</p><p>"Same host" and "same service" features are together called time-based
traffic features of the connection records.
</p><p>Some probing attacks scan the hosts (or ports) using a much larger time
interval than two seconds, for example once per minute. Therefore,
connection records were also sorted by destination host, and features were
constructed using a window of 100 connections to the same host instead
of a time window. This yields a set of so-called host-based traffic
features.
</p><p>Unlike most of the DOS and probing attacks, there appear to be no sequential
patterns that are frequent in records of R2L and U2R attacks. This is because
the DOS and probing attacks involve many connections to some host(s) in
a very short period of time, but the R2L and U2R attacks are embedded in
the data portions
<br>of packets, and normally involve only a single connection.
</p><p>Useful algorithms for mining the unstructured data portions of packets
automatically are an open research question. Stolfo et al. used domain
knowledge to add features that look for suspicious behavior in the data
portions, such as the number of failed login attempts. These features
are called ``content'' features.
</p><p>A complete listing of the set of features defined for the connection
records is given in the three tables below. The data schema of the
contest dataset is available in <a href="http://kdd.ics.uci.edu/databases/kddcup99/kddcup.names">machine-readable
form</a> .
<br>
<br>
</p><center><table border="" width="80%" nosave="">
<tbody><tr nosave="">
<td><i>feature name</i></td>
<td nosave=""><i>description </i></td>
<td><i>type</i></td>
</tr>
<tr>
<td>duration </td>
<td>length (number of seconds) of the connection </td>
<td>continuous</td>
</tr>
<tr>
<td>protocol_type </td>
<td>type of the protocol, e.g. tcp, udp, etc. </td>
<td>discrete</td>
</tr>
<tr>
<td>service </td>
<td>network service on the destination, e.g., http, telnet, etc. </td>
<td>discrete</td>
</tr>
<tr>
<td>src_bytes </td>
<td>number of data bytes from source to destination </td>
<td>continuous</td>
</tr>
<tr>
<td>dst_bytes </td>
<td>number of data bytes from destination to source </td>
<td>continuous</td>
</tr>
<tr>
<td>flag </td>
<td>normal or error status of the connection </td>
<td>discrete </td>
</tr>
<tr>
<td>land </td>
<td>1 if connection is from/to the same host/port; 0 otherwise </td>
<td>discrete</td>
</tr>
<tr>
<td>wrong_fragment </td>
<td>number of ``wrong'' fragments </td>
<td>continuous</td>
</tr>
<tr>
<td>urgent </td>
<td>number of urgent packets </td>
<td>continuous</td>
</tr>
</tbody><caption align="BOTTOM">
<br>Table 1: Basic features of individual TCP connections.</caption>
</table></center>
<center><table border="" width="80%" nosave="">
<tbody><tr>
<td><i>feature name</i></td>
<td><i>description </i></td>
<td><i>type</i></td>
</tr>
<tr>
<td>hot </td>
<td>number of ``hot'' indicators</td>
<td>continuous</td>
</tr>
<tr>
<td>num_failed_logins </td>
<td>number of failed login attempts </td>
<td>continuous</td>
</tr>
<tr>
<td>logged_in </td>
<td>1 if successfully logged in; 0 otherwise </td>
<td>discrete</td>
</tr>
<tr>
<td>num_compromised </td>
<td>number of ``compromised'' conditions </td>
<td>continuous</td>
</tr>
<tr>
<td>root_shell </td>
<td>1 if root shell is obtained; 0 otherwise </td>
<td>discrete</td>
</tr>
<tr>
<td>su_attempted </td>
<td>1 if ``su root'' command attempted; 0 otherwise </td>
<td>discrete</td>
</tr>
<tr>
<td>num_root </td>
<td>number of ``root'' accesses </td>
<td>continuous</td>
</tr>
<tr>
<td>num_file_creations </td>
<td>number of file creation operations </td>
<td>continuous</td>
</tr>
<tr>
<td>num_shells </td>
<td>number of shell prompts </td>
<td>continuous</td>
</tr>
<tr>
<td>num_access_files </td>
<td>number of operations on access control files </td>
<td>continuous</td>
</tr>
<tr nosave="">
<td>num_outbound_cmds</td>
<td nosave="">number of outbound commands in an ftp session </td>
<td>continuous</td>
</tr>
<tr>
<td>is_hot_login </td>
<td>1 if the login belongs to the ``hot'' list; 0 otherwise </td>
<td>discrete</td>
</tr>
<tr>
<td>is_guest_login </td>
<td>1 if the login is a ``guest''login; 0 otherwise </td>
<td>discrete</td>
</tr>
</tbody><caption align="BOTTOM">
<br>Table 2: Content features within a connection suggested by domain knowledge.</caption>
</table></center>
<center><table border="" width="80%" nosave="">
<tbody><tr>
<td><i>feature name</i></td>
<td><i>description </i></td>
<td><i>type</i></td>
</tr>
<tr>
<td>count </td>
<td>number of connections to the same host as the current connection in
the past two seconds </td>
<td>continuous</td>
</tr>
<tr>
<td></td>
<td><i>Note: The following features refer to these same-host connections.</i></td>
<td></td>
</tr>
<tr>
<td>serror_rate </td>
<td>% of connections that have ``SYN'' errors </td>
<td>continuous</td>
</tr>
<tr>
<td>rerror_rate </td>
<td>% of connections that have ``REJ'' errors </td>
<td>continuous</td>
</tr>
<tr>
<td>same_srv_rate </td>
<td>% of connections to the same service </td>
<td>continuous</td>
</tr>
<tr>
<td>diff_srv_rate </td>
<td>% of connections to different services </td>
<td>continuous</td>
</tr>
<tr>
<td>srv_count </td>
<td>number of connections to the same service as the current connection
in the past two seconds </td>
<td>continuous</td>
</tr>
<tr>
<td></td>
<td><i>Note: The following features refer to these same-service connections.</i></td>
<td></td>
</tr>
<tr>
<td>srv_serror_rate </td>
<td>% of connections that have ``SYN'' errors </td>
<td>continuous</td>
</tr>
<tr>
<td>srv_rerror_rate </td>
<td>% of connections that have ``REJ'' errors </td>
<td>continuous</td>
</tr>
<tr>
<td>srv_diff_host_rate </td>
<td>% of connections to different hosts </td>
<td>continuous </td>
</tr>
</tbody><caption align="BOTTOM">
<br>Table 3: Traffic features computed using a two-second time window.</caption>
</table></center>
<audio controls="controls" style="display: none;"></audio></body><style type="text/css">#yddContainer{display:block;font-family:Microsoft YaHei;position:relative;width:100%;height:100%;top:-4px;left:-4px;font-size:12px;border:1px solid}#yddTop{display:block;height:22px}#yddTopBorderlr{display:block;position:static;height:17px;padding:2px 28px;line-height:17px;font-size:12px;color:#5079bb;font-weight:bold;border-style:none solid;border-width:1px}#yddTopBorderlr .ydd-sp{position:absolute;top:2px;height:0;overflow:hidden}.ydd-icon{left:5px;width:17px;padding:0px 0px 0px 0px;padding-top:17px;background-position:-16px -44px}.ydd-close{right:5px;width:16px;padding-top:16px;background-position:left -44px}#yddKeyTitle{float:left;text-decoration:none}#yddMiddle{display:block;margin-bottom:10px}.ydd-tabs{display:block;margin:5px 0;padding:0 5px;height:18px;border-bottom:1px solid}.ydd-tab{display:block;float:left;height:18px;margin:0 5px -1px 0;padding:0 4px;line-height:18px;border:1px solid;border-bottom:none}.ydd-trans-container{display:block;line-height:160%}.ydd-trans-container a{text-decoration:none;}#yddBottom{position:absolute;bottom:0;left:0;width:100%;height:22px;line-height:22px;overflow:hidden;background-position:left -22px}.ydd-padding010{padding:0 10px}#yddWrapper{color:#252525;z-index:10001;background:url(chrome-extension://eopjamdnofihpioajgfdikhhbobonhbb/ab20.png);}#yddContainer{background:#fff;border-color:#4b7598}#yddTopBorderlr{border-color:#f0f8fc}#yddWrapper .ydd-sp{background-image:url(chrome-extension://eopjamdnofihpioajgfdikhhbobonhbb/ydd-sprite.png)}#yddWrapper a,#yddWrapper a:hover,#yddWrapper a:visited{color:#50799b}#yddWrapper .ydd-tabs{color:#959595}.ydd-tabs,.ydd-tab{background:#fff;border-color:#d5e7f3}#yddBottom{color:#363636}#yddWrapper{min-width:250px;max-width:400px;}</style></html>