-
Notifications
You must be signed in to change notification settings - Fork 0
/
Intro to CS Python
699 lines (425 loc) · 31 KB
/
Intro to CS Python
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
Input > Procedure > Output
#def procedurename(): #if no inputs; input = argument = operand
# <block of code here>
# return output1, output2 #if you dont return any output, your variables/values will remain unchanged outside this procedure
#and print is not the same as returning a value
#def procedurename(input1, input2, ...):
# <block of code here>
# return output1, output2
# + #can sum values; can concatenate strings
#find second occurrence of a particular string
def find_second(search, target):
first = search.find(target)
second = search.find(target, first + 1) #find target string from 'position of previous target string + 1'
return second
#if <testexpression>: #execute 0 or 1 time
# <execute this block of code if expression above is true>
#else
# <execute this block of code>
#while <testexpression>: #execute 0 or 1 or a no of times
# <execute this block of code while expression above is true>
#while <testexpression>:
# <execute this block of code while expression above is true>
# if <break test>:
# break #ie jump out from the while loop
#<code after while loop>
#if url is not found, return None, 0
def get_next_target(page):
start_link = page.find('<a href=')
# if the link tag sequence is not found, find returns a -1
if start_link == -1:
# return the error codes of None, 0 now and skip the rest!
return None, 0
start_quote = page.find('"', start_link)
end_quote = page.find('"', start_quote + 1)
url = page[start_quote + 1:end_quote]
return url, end_quote
#print all url
def print_all_links(page):
while True:
url, endpos = get_next_target(page)
if url:
print url
page = page[endpos:]
else:
break
#to test
#print_all_links('some link 1', ..., 'some link n')
#print_all_links(get_page('page url 1'))
#assertion #give an assertion error msg if fail
#assert <expression>
# False => fail
assert not dateIsBefore(year2, month2, day2, year1, month1, day1)
days = 0
##return days between 2 dates
# finish your daysBetweenDates
# procedure. It will need to take into account leap years
# in addition to the correct number of days in each month.
def nextDay(year, month, day):
"""Simple version: assume every month has 30 days"""
if day < 30:
return year, month, day + 1
else:
if month == 12:
return year + 1, 1, 1
else:
return year, month + 1, 1
#added daysInMonth
def daysInMonth(year,month):
if month == 1 or month == 3 or month == 5 or month == 7 or month == 8 or month == 10 or month == 12
return 31
else:
if month == 2:
if isLeapYear(year):
return 29
else:
return 28 #elseif month is Feb but not a leap year, return 28
return 30
#added isLeapYear
def isLeapYear(year):
if year % 400 == 0:
return True
if year % 0 == 0:
return False
if year % 4 == 0:
return True
return False #ie else return False
def dateIsBefore(year1, month1, day1, year2, month2, day2):
"""Returns True if year1-month1-day1 is before year2-month2-day2. Otherwise, returns False."""
if year1 < year2:
return True
if year1 == year2:
if month1 < month2:
return True
if month1 == month2:
return day1 < day2
return False
def daysBetweenDates(year1, month1, day1, year2, month2, day2):
"""Returns the number of days between year1/month1/day1
and year2/month2/day2. Assumes inputs are valid dates
in Gregorian calendar."""
# program defensively! Add an assertion if the input is not valid!
assert not dateIsBefore(year2, month2, day2, year1, month1, day1)
days = 0
while dateIsBefore(year1, month1, day1, year2, month2, day2):
year1, month1, day1 = nextDay(year1, month1, day1)
days += 1
return days
def test():
test_cases = [((2012,1,1,2012,2,28), 58),
((2012,1,1,2012,3,1), 60),
((2011,6,30,2012,6,30), 366),
((2011,1,1,2012,8,8), 585 ),
((1900,1,1,1999,12,31), 36523)]
for (args, answer) in test_cases:
result = daysBetweenDates(*args)
if result != answer:
print "Test with data:", args, "failed"
else:
print "Test case passed!"
def mytest():
#tests with 30-day months
assert daysBetweenDates(2017,1,1, 2017,1,1) == 0
assert daysBetweenDates(2017,1,1, 2017,1,2) == 1
assert nextDay(2017,1,1) == (2017,1,2)
assert nextDay(2017,4,30) == (2017,5,1)
assert nextDay(2017,12,31) == (2018,1,1)
assert nextDay(2017,2,28) == (2017,3,1)
assert nextDay(2017,9,30) == (2017,10,1)
assert nextDay(2016,2,28) == (2016,2,29) #leap year
assert daysBetweenDates(2016,1,1, 2017,1,1) == 366 #leap year
assert daysBetweenDates(2017,1,1, 2018,1,1) == 365
assert daysBetweenDates(2017,1,24, 2018,6,29) == 156
print "Tests finished"
test()
#list
list = ['a', 'b', 3]
list[position no]
list[start pos:end pos]
list1 + list2 = [elements in list 1 and 2]
len(listname) #only count outer elements; dont count elements inside the inner list
len('string') #length of a string; no of characters
listname.append('element') #we can append a list within another list
#for loop
#for <name> in <list>:
# <execute block of code>
#print all elements in a list on separate lines
def print_all_elements(listname):
for element in listname: #'element' will be assigned the element at each position of our list
print element
mylist = [1,2,[3,4]]
print_all_elements(mylist)
#1
#2
#[3,4]
#sum all elements in a list
def sum_list(p): #input list p
result = 0
for e in p: #for element in list p
result = result + e
return result
#count no of elements which start with 'U'
def measure_udacity(p):
result = 0
for e in p:
if e[0] == 'U':
result += 1
return result
#return 1st occurrence of target element in a list
#own ans
def find_element(p, t):
i = 0
while i < len(p):
if p[i] == t:
return i
return -1
i += 1
#udacity ans
def find_element(p, t):
i = 0
while i < len(p):
if p[i] == t:
return i
i += 1
return -1
def find_element(p, t):
i = 0
for e in p:
if e == t:
return i
i += 1
return -1
#<listname>.index(<target element>) #returns 1st position of the target element in the list
#<target element> in <listname> #return True/False whether target element is in the list
#<target element> not in <listname>
#find element using list.index(element) and 'target in list'
def find_element(p, t):
if t in p:
return p.index(t)
else:
return -1
#find element using list.index(element) and 'target not in list'
def find_element(p, t):
if t not in p:
return -1
return p.index(t)
#append unique elements from list t onto list s
def union(s, t):
for e in t:
if e not in s:
s.append(e)
return s
#<listname>.pop() #removes and returns the last element
#web crawler for links starting from a seed page
def get_page(url):
# This is a simulated get_page procedure so that you can test your
# code on two pages "http://xkcd.com/353" and "http://xkcd.com/554".
# A procedure which actually grabs a page from the web will be
# introduced in unit 4.
try:
if url == "http://xkcd.com/353":
return '<?xml version="1.0" encoding="utf-8" ?><?xml-stylesheet href="http://imgs.xkcd.com/s/c40a9f8.css" type="text/css" media="screen" ?><!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"><html xmlns="http://www.w3.org/1999/xhtml"> <head> <title>xkcd: Python</title> <link rel="stylesheet" type="text/css" href="http://imgs.xkcd.com/s/c40a9f8.css" media="screen" title="Default" /> <!--[if IE]><link rel="stylesheet" type="text/css" href="http://imgs.xkcd.com/s/ecbbecc.css" media="screen" title="Default" /><![endif]--> <link rel="alternate" type="application/atom+xml" title="Atom 1.0" href="/atom.xml" /> <link rel="alternate" type="application/rss+xml" title="RSS 2.0" href="/rss.xml" /> <link rel="icon" href="http://imgs.xkcd.com/s/919f273.ico" type="image/x-icon" /> <link rel="shortcut icon" href="http://imgs.xkcd.com/s/919f273.ico" type="image/x-icon" /> </head> <body> <div id="container"> <div id="topContainer"> <div id="topLeft" class="dialog"> <div class="hd"><div class="c"></div></div> <div class="bd"> <div class="c"> <div class="s">\t<ul> <li><a href="http://xkcd.com/554"">Archive</a><br /></li>\t <li><a href="http://blag.xkcd.com/">News/Blag</a><br /></li> <li><a href="http://store.xkcd.com/">Store</a><br /></li> <li><a href="/about/">About</a><br /></li> <li><a href="http://forums.xkcd.com/">Forums</a><br /></li> </ul> </div> </div> </div> <div class="ft"><div class="c"></div></div> </div> <div id="topRight" class="dialog"> <div class="hd"><div class="c"></div></div> <div class="bd"> <div class="c"> <div class="s"> <div id="topRightContainer"> <div id="logo"> <a href="/"><img src="http://imgs.xkcd.com/s/9be30a7.png" alt="xkcd.com logo" height="83" width="185"/></a> <h2><br />A webcomic of romance,<br/> sarcasm, math, and language.</h2> <div class="clearleft"></div> <br />XKCD updates every Monday, Wednesday, and Friday. </div> </div> </div> </div> </div> <div class="ft"><div class="c"></div></div> </div> </div> <div id="contentContainer"> <div id="middleContent" class="dialog"> <div class="hd"><div class="c"></div></div> <div class="bd"> <div class="c"> <div class="s"><h1>Python</h1><br/><br /><div class="menuCont"> <ul> <li><a href="/1/">|<</a></li> <li><a href="/352/" accesskey="p">< Prev</a></li> <li><a href="http://dynamic.xkcd.com/random/comic/" id="rnd_btn_t">Random</a></li> <li><a href="/354/" accesskey="n">Next ></a></li> <li><a href="/">>|</a></li> </ul></div><br/><br/><img src="http://imgs.xkcd.com/comics/python.png" title="I wrote 20 short programs in Python yesterday. It was wonderful. Perl, Im leaving you." alt="Python" /><br/><br/><div class="menuCont"> <ul> <li><a href="/1/">|<</a></li> <li><a href="/352/" accesskey="p">< Prev</a></li> <li><a href="http://dynamic.xkcd.com/random/comic/" id="rnd_btn_b">Random</a></li> <li><a href="/354/" accesskey="n">Next ></a></li> <li><a href="/">>|</a></li> </ul></div><h3>Permanent link to this comic: http://xkcd.com/353/</h3><h3>Image URL (for hotlinking/embedding): http://imgs.xkcd.com/comics/python.png</h3><div id="transcript" style="display: none">[[ Guy 1 is talking to Guy 2, who is floating in the sky ]]Guy 1: You39;re flying! How?Guy 2: Python!Guy 2: I learned it last night! Everything is so simple!Guy 2: Hello world is just 39;print "Hello, World!" 39;Guy 1: I dunno... Dynamic typing? Whitespace?Guy 2: Come join us! Programming is fun again! It39;s a whole new world up here!Guy 1: But how are you flying?Guy 2: I just typed 39;import antigravity39;Guy 1: That39;s it?Guy 2: ...I also sampled everything in the medicine cabinet for comparison.Guy 2: But i think this is the python.{{ I wrote 20 short programs in Python yesterday. It was wonderful. Perl, I39;m leaving you. }}</div> </div> </div> </div> <div class="ft"><div class="c"></div></div> </div> <div id="middleFooter" class="dialog"> <div class="hd"><div class="c"></div></div> <div class="bd"> <div class="c"> <div class="s"> <img src="http://imgs.xkcd.com/s/a899e84.jpg" width="520" height="100" alt="Selected Comics" usemap=" comicmap" /> <map name="comicmap"> <area shape="rect" coords="0,0,100,100" href="/150/" alt="Grownups" /> <area shape="rect" coords="104,0,204,100" href="/730/" alt="Circuit Diagram" /> <area shape="rect" coords="208,0,308,100" href="/162/" alt="Angular Momentum" /> <area shape="rect" coords="312,0,412,100" href="/688/" alt="Self-Description" /> <area shape="rect" coords="416,0,520,100" href="/556/" alt="Alternative Energy Revolution" /> </map><br/><br />Search comic titles and transcripts:<br /><script type="text/javascript" src="//www.google.com/jsapi"></script><script type="text/javascript"> google.load(\"search\", \"1\"); google.setOnLoadCallback(function() { google.search.CustomSearchControl.attachAutoCompletion( \"012652707207066138651:zudjtuwe28q\", document.getElementById(\"q\"), \"cse-search-box\"); });</script><form action="//www.google.com/cse" id="cse-search-box"> <div> <input type="hidden" name="cx" value="012652707207066138651:zudjtuwe28q" /> <input type="hidden" name="ie" value="UTF-8" /> <input type="text" name="q" id="q" autocomplete="off" size="31" /> <input type="submit" name="sa" value="Search" /> </div></form><script type="text/javascript" src="//www.google.com/cse/brand?form=cse-search-box&lang=en"></script><a href="/rss.xml">RSS Feed</a> - <a href="/atom.xml">Atom Feed</a><br /> <br/> <div id="comicLinks"> Comics I enjoy:<br/> <a href="http://www.qwantz.com">Dinosaur Comics</a>, <a href="http://www.asofterworld.com">A Softer World</a>, <a href="http://pbfcomics.com/">Perry Bible Fellowship</a>, <a href="http://www.boltcity.com/copper/">Copper</a>, <a href="http://questionablecontent.net/">Questionable Content</a>, <a href="http://achewood.com/">Achewood</a>, <a href="http://wondermark.com/">Wondermark</a>, <a href="http://thisisindexed.com/">Indexed</a>, <a href="http://www.buttercupfestival.com/buttercupfestival.htm">Buttercup Festival</a> </div> <br/> Warning: this comic occasionally contains strong language (which may be unsuitable for children), unusual humor (which may be unsuitable for adults), and advanced mathematics (which may be unsuitable for liberal-arts majors).<br/> <br/> <h4>We did not invent the algorithm. The algorithm consistently finds Jesus. The algorithm killed Jeeves. <br />The algorithm is banned in China. The algorithm is from Jersey. The algorithm constantly finds Jesus.<br />This is not the algorithm. This is close.</h4><br/> <div class="line"></div> <br/> <div id="licenseText"> <!-- <a rel="license" href="http://creativecommons.org/licenses/by-nc/2.5/"><img alt="Creative Commons License" style="border:none" src="http://imgs.xkcd.com/static/somerights20.png" /></a><br/> --> This work is licensed under a <a rel="license" href="http://creativecommons.org/licenses/by-nc/2.5/">Creative Commons Attribution-NonCommercial 2.5 License</a>.<!-- <rdf:RDF xmlns="http://web.resource.org/cc/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns "><Work rdf:about=""><dc:creator>Randall Munroe</dc:creator><dcterms:rightsHolder>Randall Munroe</dcterms:rightsHolder><dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage" /><dc:source rdf:resource="http://www.xkcd.com/"/><license rdf:resource="http://creativecommons.org/licenses/by-nc/2.5/" /></Work><License rdf:about="http://creativecommons.org/licenses/by-nc/2.5/"><permits rdf:resource="http://web.resource.org/cc/Reproduction" /><permits rdf:resource="http://web.resource.org/cc/Distribution" /><requires rdf:resource="http://web.resource.org/cc/Notice" /><requires rdf:resource="http://web.resource.org/cc/Attribution" /><prohibits rdf:resource="http://web.resource.org/cc/CommercialUse" /><permits rdf:resource="http://web.resource.org/cc/DerivativeWorks" /></License></rdf:RDF> --> <br/> This means you\"re free to copy and share these comics (but not to sell them). <a href="/license.html">More details</a>.<br/> </div> </div> </div> </div> <div class="ft"><div class="c"></div></div> </div> </div> </div> </body></html> '
elif url == "http://xkcd.com/554":
return '<?xml version="1.0" encoding="utf-8" ?> <?xml-stylesheet href="http://imgs.xkcd.com/s/c40a9f8.css" type="text/css" media="screen" ?> <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd"> <html xmlns="http://www.w3.org/1999/xhtml"> <head> <title>xkcd: Not Enough Work</title> <link rel="stylesheet" type="text/css" href="http://imgs.xkcd.com/s/c40a9f8.css" media="screen" title="Default" /> <!--[if IE]><link rel="stylesheet" type="text/css" href="http://imgs.xkcd.com/s/ecbbecc.css" media="screen" title="Default" /><![endif]--> <link rel="alternate" type="application/atom+xml" title="Atom 1.0" href="/atom.xml" /> <link rel="alternate" type="application/rss+xml" title="RSS 2.0" href="/rss.xml" /> <link rel="icon" href="http://imgs.xkcd.com/s/919f273.ico" type="image/x-icon" /> <link rel="shortcut icon" href="http://imgs.xkcd.com/s/919f273.ico" type="image/x-icon" /> </head> <body> <div id="container"> <div id="topContainer"> <div id="topLeft" class="dialog"> <div class="hd"><div class="c"></div></div> <div class="bd"> <div class="c"> <div class="s"> <ul> <li><a href="/archive/">Archive</a><br /></li> <li><a href="http://blag.xkcd.com/">News/Blag</a><br /></li> <li><a href="http://store.xkcd.com/">Store</a><br /></li> <li><a href="/about/">About</a><br /></li> <li><a href="http://forums.xkcd.com/">Forums</a><br /></li> </ul> </div> </div> </div> <div class="ft"><div class="c"></div></div> </div> <div id="topRight" class="dialog"> <div class="hd"><div class="c"></div></div> <div class="bd"> <div class="c"> <div class="s"> <div id="topRightContainer"> <div id="logo"> <a href="/"><img src="http://imgs.xkcd.com/s/9be30a7.png" alt="xkcd.com logo" height="83" width="185"/></a> <h2><br />A webcomic of romance,<br/> sarcasm, math, and language.</h2> <div class="clearleft"></div> XKCD updates every Monday, Wednesday, and Friday. <br /> Blag: Remember geohashing? <a href="http://blog.xkcd.com/2012/02/27/geohashing-2/">Something pretty cool</a> happened Sunday. </div> </div> </div> </div> </div> <div class="ft"><div class="c"></div></div> </div> </div> <div id="contentContainer"> <div id="middleContent" class="dialog"> <div class="hd"><div class="c"></div></div> <div class="bd"> <div class="c"> <div class="s"> <h1>Not Enough Work</h1><br/> <br /> <div class="menuCont"> <ul> <li><a href="/1/">|<</a></li> <li><a href="/553/" accesskey="p">< Prev</a></li> <li><a href="http://dynamic.xkcd.com/random/comic/" id="rnd_btn_t">Random</a></li> <li><a href="/555/" accesskey="n">Next ></a></li> <li><a href="/">>|</a></li> </ul> </div> <br/> <br/> <img src="http://imgs.xkcd.com/comics/not_enough_work.png" title="It39;s even harder if you39;re an asshole who pronounces <> brackets." alt="Not Enough Work" /><br/> <br/> <div class="menuCont"> <ul> <li><a href="/1/">|<</a></li> <li><a href="/553/" accesskey="p">< Prev</a></li> <li><a href="http://dynamic.xkcd.com/random/comic/" id="rnd_btn_b">Random</a></li> <li><a href="/555/" accesskey="n">Next ></a></li> <li><a href="/">>|</a></li> </ul> </div> <h3>Permanent link to this comic: http://xkcd.com/554/</h3> <h3>Image URL (for hotlinking/embedding): http://imgs.xkcd.com/comics/not_enough_work.png</h3> <div id="transcript" style="display: none">Narration: Signs your coders don39;t have enough work to do: [[A man sitting at his workstation; a female co-worker behind him]] Man: I39;m almost up to my old typing speed in dvorak [[Two men standing by a server rack]] Man 1: Our servers now support gopher. Man 1: Just in case. [[A woman standing near her workstation speaking to a male co-worker]] Woman: Our pages are now HTML, XHTML-STRICT, and haiku-compliant Man: Haiku? Woman: <div class="main"> Woman: <span id="marquee"> Woman: Blog!< span>< div> [[A woman sitting at her workstation]] Woman: Hey! Have you guys seen this webcomic? {{title text: It39;s even harder if you39;re an asshole who pronounces <> brackets.}}</div> </div> </div> </div> <div class="ft"><div class="c"></div></div> </div> <div id="middleFooter" class="dialog"> <div class="hd"><div class="c"></div></div> <div class="bd"> <div class="c"> <div class="s"> <img src="http://imgs.xkcd.com/s/a899e84.jpg" width="520" height="100" alt="Selected Comics" usemap=" comicmap" /> <map name="comicmap"> <area shape="rect" coords="0,0,100,100" href="/150/" alt="Grownups" /> <area shape="rect" coords="104,0,204,100" href="/730/" alt="Circuit Diagram" /> <area shape="rect" coords="208,0,308,100" href="/162/" alt="Angular Momentum" /> <area shape="rect" coords="312,0,412,100" href="/688/" alt="Self-Description" /> <area shape="rect" coords="416,0,520,100" href="/556/" alt="Alternative Energy Revolution" /> </map><br/><br /> Search comic titles and transcripts:<br /> <script type="text/javascript" src="//www.google.com/jsapi"></script> <script type="text/javascript"> google.load("search", "1"); google.search.CustomSearchControl.attachAutoCompletion( "012652707207066138651:zudjtuwe28q", document.getElementById("q"), "cse-search-box"); }); </script> <form action="//www.google.com/cse" id="cse-search-box"> <div> <input type="hidden" name="cx" value="012652707207066138651:zudjtuwe28q" /> <input type="hidden" name="ie" value="UTF-8" /> <input type="text" name="q" id="q" autocomplete="off" size="31" /> <input type="submit" name="sa" value="Search" /> </div> </form> <script type="text/javascript" src="//www.google.com/cse/brand?form=cse-search-box&lang=en"></script> <a href="/rss.xml">RSS Feed</a> - <a href="/atom.xml">Atom Feed</a> <br /> <br/> <div id="comicLinks"> Comics I enjoy:<br/> <a href="http://threewordphrase.com/">Three Word Phrase</a>, <a href="http://oglaf.com/">Oglaf</a> (nsfw), <a href="http://www.smbc-comics.com/">SMBC</a>, <a href="http://www.qwantz.com">Dinosaur Comics</a>, <a href="http://www.asofterworld.com">A Softer World</a>, <a href="http://buttersafe.com/">Buttersafe</a>, <a href="http://pbfcomics.com/">Perry Bible Fellowship</a>, <a href="http://questionablecontent.net/">Questionable Content</a>, <a href="http://www.buttercupfestival.com/buttercupfestival.htm">Buttercup Festival</a> </div> <br/> Warning: this comic occasionally contains strong language (which may be unsuitable for children), unusual humor (which may be unsuitable for adults), and advanced mathematics (which may be unsuitable for liberal-arts majors).<br/> <br/> <h4>We did not invent the algorithm. The algorithm consistently finds Jesus. The algorithm killed Jeeves. <br />The algorithm is banned in China. The algorithm is from Jersey. The algorithm constantly finds Jesus.<br />This is not the algorithm. This is close.</h4><br/> <div class="line"></div> <br/> <div id="licenseText"> <!-- <a rel="license" href="http://creativecommons.org/licenses/by-nc/2.5/"><img alt="Creative Commons License" style="border:none" src="http://imgs.xkcd.com/static/somerights20.png" /></a><br/> --> This work is licensed under a <a rel="license" href="http://creativecommons.org/licenses/by-nc/2.5/">Creative Commons Attribution-NonCommercial 2.5 License</a>. <!-- <rdf:RDF xmlns="http://web.resource.org/cc/" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:dcterms="http://purl.org/dc/terms/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns "><Work rdf:about=""><dc:creator>Randall Munroe</dc:creator><dcterms:rightsHolder>Randall Munroe</dcterms:rightsHolder><dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage" /><dc:source rdf:resource="http://www.xkcd.com/"/><license rdf:resource="http://creativecommons.org/licenses/by-nc/2.5/" /></Work><License rdf:about="http://creativecommons.org/licenses/by-nc/2.5/"><permits rdf:resource="http://web.resource.org/cc/Reproduction" /><permits rdf:resource="http://web.resource.org/cc/Distribution" /><requires rdf:resource="http://web.resource.org/cc/Notice" /><requires rdf:resource="http://web.resource.org/cc/Attribution" /><prohibits rdf:resource="http://web.resource.org/cc/CommercialUse" /><permits rdf:resource="http://web.resource.org/cc/DerivativeWorks" /></License></rdf:RDF> --> <br/> This means you"re free to copy and share these comics (but not to sell them). <a href="/license.html">More details</a>.<br/> </div> </div> </div> </div> <div class="ft"><div class="c"></div></div> </div> </div> </div> </body> </html> '
except:
return ""
return ""
def get_next_target(page):
start_link = page.find('<a href=')
if start_link == -1:
return None, 0
start_quote = page.find('"', start_link)
end_quote = page.find('"', start_quote + 1)
url = page[start_quote + 1:end_quote]
return url, end_quote
def union(p,q):
for e in q:
if e not in p:
p.append(e)
def get_all_links(page):
links = []
while True:
url,endpos = get_next_target(page)
if url:
links.append(url)
page = page[endpos:]
else:
break
return links
#depth first search
def crawl_web(seed):
tocrawl = [seed] #seed page as input
crawled = [] #empty list initially
while tocrawl: #while listname = while there are elements in listname
page = tocrawl.pop() #pop allows us to do breadth first search, ie start from the last link on a seed page, then go to the last link of subsequent links,..., then move on to second last link, ... , move on to first link on seed page at the end
if page not in crawled:
union(tocrawl, get_all_links(get_page(page))) #union procedure was defined by us #union links from any new pages we find to the tocrawl list
crawled.append(page)
return crawled
#crawl up to a max no of pages
def crawl_web(seed,max_pages):
tocrawl = [seed]
crawled = []
while tocrawl:
page = tocrawl.pop()
if page not in crawled and len(crawled) < max_pages: #no of crawled pages/elements < the max no of pages you want to crawl
union(tocrawl, get_all_links(get_page(page)))
crawled.append(page)
return crawled
#crawl up to a max depth, ie take the shortest path to get to subsequent webpages
def crawl_web(seed,max_depth):
tocrawl = [seed]
crawled = []
next_depth= []
depth = ()
while tocrawl and depth <= max_depth:
page = tocrawl.pop()
if page not in crawled: #existing page/pages not found in crawled
union(next_depth, get_all_links(get_page(page))) #get all links to subsequent pages and add it to our next_depth list
crawled.append(page) #existing page/pages are added to crawled
if not tocrawl:
tocrawl, next_depth = next_depth, [] #links from next_depth move to tocrawl if havent crawled(above) or not in tocrawl(here)
depth = depth + 1 #next_depth becomes empty after moving; increase depth of next round/iteration of search
return crawled
#sudoku digit checker
correct = [[1,2,3],
[2,3,1],
[3,1,2]]
incorrect = [[1,2,3,4],
[2,3,1,3],
[3,1,2,3],
[4,4,4,4]]
incorrect2 = [[1,2,3,4],
[2,3,1,4],
[4,1,2,3],
[3,4,1,2]]
incorrect3 = [[1,2,3,4,5],
[2,3,1,5,6],
[4,5,2,1,3],
[3,4,5,2,1],
[5,6,4,3,2]]
incorrect4 = [['a','b','c'],
['b','c','a'],
['c','a','b']]
incorrect5 = [ [1, 1.5],
[1.5, 1]]
def check_sudoku(p):
n = len(p) #extract size of grid
digit = 1 #start with 1
while digit <= n: #go through each digit
i=0
while i<n: #go through every row and col
row_count = 0 #initialise counters
col_count = 0
j = 0
while j <= n: # for each entry in ith row/ith col
if p[i][j] == digit: #row i
row_count += 1
if p[j][i] == digit: #col j
col_count += 1
j += 1 #go through every col on row i + every row on col i
if row_count != 1 or col_count != 1:
return False
i += 1 #next set of ith row/col
digit += 1 #next digit; and go through the whole grid again
return True #nothing was wrong
#inner loop looks at 1 digit before going through the whole grid; return False at any time that the digit test in a row/col is False
#then middle loop will increment digit till the last digit; take the number and go through the inner loop
#once digit is greater than the last digit, stop loops and return True
#print check_sudoku(incorrect)
#>>> False
#print check_sudoku(correct)
#>>> True
#print check_sudoku(incorrect2)
#>>> False
#print check_sudoku(incorrect3)
#>>> False
#print check_sudoku(incorrect4)
#>>> False
#print check_sudoku(incorrect5)
#>>> False
#compile a list of (lists of [keyword and list of urls])
# Define a procedure, add_to_index,
# that takes 3 inputs:
# - an index: [[<keyword>,[<url>,...]],...]
# - a keyword: String
# - a url: String
# If the keyword is already
# in the index, add the url
# to the list of urls associated
# with that keyword.
# If the keyword is not in the index,
# add an entry to the index: [keyword,[url]]
index = [] #initialise empty list
def add_to_index(index,keyword,url):
for entry in index:
if entry[0] == keyword: #do not use index[entry][0]
entry[1].append(url) #when keyword == existing entry, only insert url in the 2nd element, ie the url list of the entry
return #return exits from for loop; doesnt end the whole procedure like 'break' does
index.append([keyword,[url]]) #since keyword was not found after running through our index list, add keyword and url as a new entry to our index list
#add_to_index(index,'udacity','http://udacity.com')
#add_to_index(index,'computing','http://acm.org')
#add_to_index(index,'udacity','http://npr.org')
#print index
#>>> [['udacity', ['http://udacity.com', 'http://npr.org']],
#>>> ['computing', ['http://acm.org']]]
#Lookup a keyword from an index list; then return list of url
# Define a procedure, lookup,
# that takes two inputs:
# - an index
# - keyword
# The procedure should return a list
# of the urls associated
# with the keyword. If the keyword
# is not in the index, the procedure
# should return an empty list.
index = [['udacity', ['http://udacity.com', 'http://npr.org']],
['computing', ['http://acm.org']]]
def lookup(index,keyword):
for entry in index:
if entry[0] == keyword:
return entry[1]
return []
print lookup(index,'udacity')
#>>> ['http://udacity.com','http://npr.org']
#stringname.split() #will split all the words in a string into individual elements in a list
#from a web page, add all keywords + url that can appear in a page to our index list
#then look up another webpage and add keywords + url to our index page
# Define a procedure, add_page_to_index,
# that takes three inputs:
# - index
# - url (String)
# - content (String)
# It should update the index to include
# all of the word occurences found in the
# page content by adding the url to the
# word's associated url list.
index = []
def add_to_index(index,keyword,url):
for entry in index:
if entry[0] == keyword:
entry[1].append(url)
return
index.append([keyword,[url]])
def add_page_to_index(index,url,content):
words = content.split()
for word in words:
add_to_index(index,word,url) #use our previously defined procedure
#just modify index; dont need to return anything
add_page_to_index(index,'fake.text',"This is a test")
print index
#>>> [['This', ['fake.text']], ['is', ['fake.text']], ['a', ['fake.text']],
#>>> ['test',['fake.text']]]
#web crawler [L15]
def crawl_web(seed):
tocrawl = [seed]
crawled = []
index = []
while tocrawl: #links on seed page
page = tocrawl.pop() #
if page not in crawled:
content = get_page(page)
add_page_to_index(index,page,content) #for each page, add content to index
union(tocrawl, get_all_links(content))
crawled.append(page)
return index