-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest-exe-getStockMajorData.R
71 lines (50 loc) · 2.3 KB
/
test-exe-getStockMajorData.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
USER_LIB_PATH = Sys.getenv("USER_LIB_PATH", "C:/Users/chiachi/Documents/R/win-library/3.4")
.libPaths(USER_LIB_PATH)
library(httr)
library(XML)
# library(stringr)
getStockMajorData = function(stockId) {
# Connector
# url = "http://tw.stock.yahoo.com/d/s/major_2451.html"
# stockId = "2451"
url = sprintf("http://tw.stock.yahoo.com/d/s/major_%s.html",as.character(stockId))
res <- GET(url)
content(res, "text", encoding = "big5")
html <- htmlParse(content(res, "text", encoding = "big5"), encoding = "utf8")
# Parser
tables <- readHTMLTable(html)
# figure out filtering condition ...
filter_condition <- (sapply(tables,NCOL)==8)&(sapply(tables,NROW) <= 15)
data_table <- tables[filter_condition][[1]]
# extract date info
DataString_source = content(res, "text", encoding = "big5")
# DataString_regexp <- "([[:digit:]]{3}) /([[:digit:]]{2}) /([[:digit:]]{2})"
# DataString_Location = str_locate_all(DataString_source,DataString_regexp)[[1]]
# DataString = str_sub(DataString_source, DataString_Location[1],DataString_Location[2])
DateString = regmatches(DataString_source,regexpr("([0-9]+) /([0-9]+) /([0-9]+)",DataString_source))
DateVector = as.numeric(unlist(strsplit(DateString,split = " /")))
DateVector[1] = DateVector[1] + 1911
DataDate = as.Date(paste(DateVector,collapse = "-"))
# change the data type of each column
Data_Table = data_table
Data_Table[,1] = as.factor(Data_Table[,1])
Data_Table[,2] = as.integer(as.character(Data_Table[,2]))
Data_Table[,3] = as.integer(as.character(Data_Table[,3]))
Data_Table[,4] = as.integer(as.character(Data_Table[,4]))
Data_Table[,5] = as.factor(Data_Table[,5])
Data_Table[,6] = as.integer(as.character(Data_Table[,6]))
Data_Table[,7] = as.integer(as.character(Data_Table[,7]))
Data_Table[,8] = as.integer(as.character(Data_Table[,8]))
# Convert data to table in db
names(Data_Table)[c(1,5)] <- "Broker"
Data_Table <- rbind(Data_Table[,1:3],Data_Table[,5:7])
names(Data_Table)
names(Data_Table)[2:3]<-c("Buy","Sell")
Data_Table = data.frame(StockId=stockId,Date=DataDate,Data_Table)
return(Data_Table)
}
library(readr)
STOCK_ID = Sys.getenv("STOCK_ID", "2330")
OUTPUT_PATH = Sys.getenv("OUTPUT_PATH", sprintf("%s.csv",STOCK_ID))
df = getStockMajorData(STOCK_ID)
write_csv(df,OUTPUT_PATH)