Skip to content

pkuqiuning/coursera_datascience

Repository files navigation

coursera_datascience

R cheatsheet https://github.com/startupjing/Tech_Notes/blob/master/R/R_language.md

Quick R http://www.statmethods.net/

Compare SQL actions

- SQL: SWGHOL, S front
- R: WSGHOL, S secondWSGHOL
- FUNCTION: WGHSOL, S middle

SQL

SELECT summarize(col1, col2) as newcol
INTO results
FROM table
WHERE check(col3, col4) IS TRUE
GROUP BY col5, categorize(col6)
HAVING subcheck(col7, col8) IS TRUE
ORDER BY key(col9) DESC
LIMIT 10;

R

aggregate(select ~ group by)[having, ][order, ][limit]

aggdata <-aggregate(cbind(col1, col2) ~ col5 + categorize(col6), data=table, FUN=summarize)
having <-aggregate(cbind(col7, col8) ~ col5 + categorize(col6), data=table, FUN=subcheck)
order_key <- aggregate(col9 ~ col5 + categorize(col6), data=table, FUN=key)
aggdata[having, ][order(-order_key), ][1:10]

R using data.table

table[where, select|update, gorup by][having][order by][limit]

table[
	check(col3, col4),
	c('newcol', having_check, key_check) := list(summarize(col1, col2), subcheck(col7, col8), key(col9)), 
	by=(col5, categorize(col6))
	][having_check][order(-key_check))][1:10]

Python using Pandas

table[where].groupby([]).filter(lambda having).apply(select).sort(order by).head(limit)

selected = table[check(table.col3, table.col4)]
results = selected.groupby(['col5', categorize(selected.col6)]).filter(
    lambda t: subcheck(t.col7, t.col8)
  ).apply(
    lambda t:DataFrame(dict(newcol=summarize(t.col1, t.col2),sortkey=key(t.col9))
  ).sort('sortkey', ascending=False).head(10)

##In Program using Collectors

def get:
	[]
	for g in groupby:
		[][][]
		for p:
			if where:
				append(having, select, order by)
		if havings:
			append((selects, order bys))
	return

results = sorted(get)[limit]
def get_answers():
	results = []
	for g in generate_group(col5, col6, categorize):
		collector = []
		key_collector = []
		having_collector = []
		for p in parameter_range(g):
			if check(col3(g, p), col4(g, p)):
				collector.append(col1(g, p), col2(g, p))
				having_collector.append(col7(g, p), col8(g, p))
				key_collector.append(col9(g, p))
		if subcheck(having_collector):
			newcol = summarize(collector)
			sortkey = key(key_collector)
			results.append((sortkey, newcol))
	return results

results = sorted(get_anwser(), reverse=True)[:10]

In Program using Yield

def gen():
	def do_group(g):
		for p:
			if where:
				yield having, select, order by
	for g:
		s, h, o = zip(*do_group(g))
		if having:
			yield select, order by
			

results = sorted(gen)[limit]
def yield_anwsers():
	def calculate_group(g):
		for p in parameter_range(g):
			if check(col3(g, p), col4(g, p)):
				collector = (col1(g, p), col2(g, p))
				having = (col7(g, p), col8(g, p))
				key = col9(g, p)
				yield collector, having, key

	
	for g in generate_group(col5, col6, categorize):
		collectors, havings, keys = zip(*calculate_group(g))
		if subcheck(havings):
			newcol = summarize(collectors)
			sortkey = key(keys)
			yield sortkey, newcol

results = sorted(yield_anwsers(), reverse=True)[:10]

In Program vector style

def gen():
	for g:
		y=f(g) # where, having, order by
		if having:
			yield select, order by

results = sorted(gen)[limit]
def yield_anwsers():
	for g in generate_group(col5, col6, categorize):
		ps = makeps(parameter_range)
		col3 = makecol3(g, ps)
		col4 = makecol4(g, ps)
		loc = makloc(check, col3, col4)
		col1 = makecol1(g, loc, ps)
		col2 = makecol2(g, loc, ps)
		col7 = makecol7(g, loc, ps)
		col8 = makecol8(g, loc, ps)
		col9 = makecol9(g, loc, ps)
		if subcheck(col7, col8):
			newcol = summarize(col1, col2)
			sortkey = key(col9)
			yield sortkey, newcol

results = sorted(yield_anwsers(), reverse=True)[:10]

About

No description, website, or topics provided.

Resources

Stars

Watchers

Forks

Releases

No releases published

Packages

No packages published

Languages