From 9174479008d1968b9418b4041670ed5e57dd1325 Mon Sep 17 00:00:00 2001 From: Yancey1989 Date: Fri, 26 Jul 2019 18:51:24 +0800 Subject: [PATCH 01/15] predict sql generate odps udf script --- sql/codegen_alps.go | 193 +++++-------------- sql/executor.go | 6 +- sql/expression_resolver.go | 2 +- sql/parser.go | 371 +++++++++++++++++++------------------ sql/sql.y | 15 +- sql/template_alps.go | 163 ++++++++++++++++ 6 files changed, 419 insertions(+), 331 deletions(-) create mode 100644 sql/template_alps.go diff --git a/sql/codegen_alps.go b/sql/codegen_alps.go index e096e30300..ae161d8fbd 100644 --- a/sql/codegen_alps.go +++ b/sql/codegen_alps.go @@ -17,7 +17,6 @@ import ( "bytes" "database/sql" "fmt" - "io" "io/ioutil" "os" "path/filepath" @@ -29,6 +28,9 @@ import ( "sqlflow.org/gomaxcompute" ) +var alpsTrainTemplate = template.Must(template.New("alps_train").Parse(alpsTrainTemplateText)) +var alpsPredTemplate = template.Must(template.New("alps_predict").Parse(alpsPredTemplateText)) + type alpsFiller struct { // Training or Predicting IsTraining bool @@ -40,6 +42,7 @@ type alpsFiller struct { ModelDir string ScratchDir string PredictOutputTable string + PredictInputModel string // Schema & Decode info Fields string @@ -53,6 +56,10 @@ type alpsFiller struct { TrainClause *resolvedTrainClause ExitOnSubmit bool + // Predict + PredictStanderClause string + PredictUDF string + // Feature map FeatureMapTable string FeatureMapPartition string @@ -60,6 +67,11 @@ type alpsFiller struct { // ODPS OdpsConf *gomaxcompute.Config EngineCode string + + // Credential + UserID string + OSSID string + OSSKey string } type alpsFeatureColumn interface { @@ -249,26 +261,27 @@ func newALPSTrainFiller(pr *extendedSelect, db *DB, session *pb.Session) (*alpsF ExitOnSubmit: exitOnSubmit}, nil } -func newALPSPredictFiller(pr *extendedSelect) (*alpsFiller, error) { - return nil, fmt.Errorf("alps predict not supported") -} - -func genALPSFiller(w io.Writer, pr *extendedSelect, db *DB, session *pb.Session) (*alpsFiller, error) { - if pr.train { - return newALPSTrainFiller(pr, db, session) - } - return newALPSPredictFiller(pr) +func newALPSPredictFiller(pr *extendedSelect, session *pb.Session) (*alpsFiller, error) { + return &alpsFiller{ + IsTraining: true, + PredictInputTable: pr.tables[0], + PredictOutputTable: pr.predictClause.into, + PredictUDF: strings.Join(pr.fields.Strings(), " "), + PredictInputModel: pr.predictClause.model, + UserID: session.UserId, + OSSID: pr.attrs["OSS_ID"].String(), + OSSKey: pr.attrs["OSS_KEY"].String(), + }, nil } -func submitALPS(w *PipeWriter, pr *extendedSelect, db *DB, cwd string, session *pb.Session) error { +func alpsTrain(w *PipeWriter, pr *extendedSelect, db *DB, cwd string, session *pb.Session) error { var program bytes.Buffer - - filler, err := genALPSFiller(&program, pr, db, session) + filler, err := newALPSTrainFiller(pr, db, session) if err != nil { return err } - if err = alpsTemplate.Execute(&program, filler); err != nil { + if err = alpsTrainTemplate.Execute(&program, filler); err != nil { return fmt.Errorf("submitALPS: failed executing template: %v", err) } code := program.String() @@ -300,12 +313,32 @@ pip install http://091349.oss-cn-hangzhou-zmf.aliyuncs.com/alps/sqlflow/alps-2.0 if e := cmd.Run(); e != nil { return fmt.Errorf("code %v failed %v", code, e) } - if pr.train { - // TODO(uuleon): save model to DB + // TODO(uuleon): save model to DB + return nil +} + +func alpsPred(w *PipeWriter, pr *extendedSelect, db *DB, cwd string, session *pb.Session) error { + var program bytes.Buffer + filler, err := newALPSPredictFiller(pr, session) + if err != nil { + return err + } + if err = alpsPredTemplate.Execute(&program, filler); err != nil { + return fmt.Errorf("submitALPS: failed executing template: %v", err) } + fmt.Println(program.String()) + _, err = db.Query(program.String()) + fmt.Println(err.Error()) return nil } +func submitALPS(w *PipeWriter, pr *extendedSelect, db *DB, cwd string, session *pb.Session) error { + if pr.train { + return alpsTrain(w, pr, db, cwd, session) + } + return alpsPred(w, pr, db, cwd, session) +} + func (nc *numericColumn) GenerateAlpsCode(metadata *metadata) ([]string, error) { output := make([]string, 0) output = append(output, @@ -418,134 +451,6 @@ func generateAlpsFeatureColumnCode(fcs []featureColumn, metadata *metadata) ([]s return codes, nil } -const alpsTemplateText = ` -# coding: utf-8 -# Copyright (c) Antfin, Inc. All rights reserved. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os - -import tensorflow as tf - -from alps.conf.closure import Closure -from alps.framework.train.training import build_run_config -from alps.framework.exporter import ExportStrategy -from alps.framework.exporter.arks_exporter import ArksExporter -from alps.client.base import run_experiment, submit_experiment -from alps.framework.engine import LocalEngine, YarnEngine, ResourceConf -from alps.framework.column.column import DenseColumn, SparseColumn, GroupedSparseColumn -from alps.framework.exporter.compare_fn import best_auc_fn -from alps.io import DatasetX -from alps.io.base import OdpsConf, FeatureMap -from alps.framework.experiment import EstimatorBuilder, Experiment, TrainConf, EvalConf, RuntimeConf -from alps.io.reader.odps_reader import OdpsReader - -os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # for debug usage. -#tf.logging.set_verbosity(tf.logging.INFO) - -class SQLFlowEstimatorBuilder(EstimatorBuilder): - def _build(self, experiment, run_config): -{{if ne .FeatureMapTable ""}} - feature_columns = [] - {{.FeatureColumnCode}} -{{end}} -{{if ne .ImportCode ""}} - {{.ImportCode}} -{{end}} - return {{.ModelCreatorCode}} - -if __name__ == "__main__": - odpsConf=OdpsConf( - accessid="{{.OdpsConf.AccessID}}", - accesskey="{{.OdpsConf.AccessKey}}", - endpoint="{{.OdpsConf.Endpoint}}" - ) - - trainDs = DatasetX( - num_epochs={{.TrainClause.Epoch}}, - batch_size={{.TrainClause.BatchSize}}, - shuffle="{{.TrainClause.EnableShuffle}}" == "true", - shuffle_buffer_size={{.TrainClause.ShuffleBufferSize}}, -{{if .TrainClause.EnableCache}} - cache_file={{.TrainClause.CachePath}}, -{{end}} - reader=OdpsReader( - odps=odpsConf, - project="{{.OdpsConf.Project}}", - table="{{.TrainInputTable}}", - # FIXME(typhoonzero): add field_names back if needed. - # field_names={{.Fields}}, - features={{.X}}, - labels={{.Y}}, -{{if ne .FeatureMapTable ""}} - feature_map=FeatureMap(table="{{.FeatureMapTable}}", -{{if ne .FeatureMapPartition ""}} - partition="{{.FeatureMapPartition}}" -{{end}} - ), - flatten_group=True -{{end}} - ), - drop_remainder="{{.TrainClause.DropRemainder}}" == "true" - ) - - evalDs = DatasetX( - num_epochs=1, - batch_size={{.TrainClause.BatchSize}}, - reader=OdpsReader( - odps=odpsConf, - project="{{.OdpsConf.Project}}", - table="{{.EvalInputTable}}", - # FIXME(typhoonzero): add field_names back if needed. - # field_names={{.Fields}}, - features={{.X}}, - labels={{.Y}}, - flatten_group=True - ) - ) - - export_path = "{{.ModelDir}}" -{{if ne .ScratchDir ""}} - runtime_conf = RuntimeConf(model_dir="{{.ScratchDir}}") -{{else}} - runtime_conf = None -{{end}} - experiment = Experiment( - user="shangchun.sun", # TODO(joyyoj) pai will check user name be a valid user, removed later. - engine={{.EngineCode}}, - train=TrainConf(input=trainDs, -{{if (ne .TrainClause.MaxSteps -1)}} - max_steps={{.TrainClause.MaxSteps}}, -{{end}} - ), - eval=EvalConf(input=evalDs, - # FIXME(typhoonzero): Support configure metrics - metrics_set=['accuracy'], -{{if (ne .TrainClause.EvalSteps -1)}} - steps={{.TrainClause.EvalSteps}}, -{{end}} - start_delay_secs={{.TrainClause.EvalStartDelay}}, - throttle_secs={{.TrainClause.EvalThrottle}}, - ), - # FIXME(typhoonzero): Use ExportStrategy.BEST when possible. - exporter=ArksExporter(deploy_path=export_path, strategy=ExportStrategy.LATEST, compare_fn=Closure(best_auc_fn)), - runtime = runtime_conf, - model_builder=SQLFlowEstimatorBuilder()) - - if isinstance(experiment.engine, LocalEngine): - run_experiment(experiment) - else: - if "{{.ExitOnSubmit}}" == "false": - run_experiment(experiment) - else: - submit_experiment(experiment, exit_on_submit=True) -` - -var alpsTemplate = template.Must(template.New("alps").Parse(alpsTemplateText)) - type metadata struct { odpsConfig *gomaxcompute.Config table string diff --git a/sql/executor.go b/sql/executor.go index b2201c3044..887a885807 100644 --- a/sql/executor.go +++ b/sql/executor.go @@ -29,8 +29,10 @@ import ( // Run executes a SQL query and returns a stream of rows or messages func Run(slct string, db *DB, modelDir string, session *pb.Session) *PipeReader { if len(splitExtendedSQL(slct)) == 2 { + fmt.Println("ext sql", slct) return runExtendedSQL(slct, db, modelDir, session) } + fmt.Println("standard sql", slct) return runStandardSQL(slct, db) } @@ -66,7 +68,8 @@ func splitExtendedSQL(slct string) []string { } for i := 1; i < len(typ)-2; i++ { if (typ[i] == TRAIN && typ[i+1] == IDENT && typ[i+2] == WITH) || - (typ[i] == PREDICT && typ[i+1] == IDENT && typ[i+2] == USING) { + (typ[i] == PREDICT && typ[i+1] == IDENT && typ[i+2] == USING) || + (typ[i] == PREDICT && typ[i+1] == IDENT && typ[i+2] == WITH) { return []string{slct[:pos[i-1]], slct[pos[i-1]:]} } } @@ -243,7 +246,6 @@ func runExtendedSQL(slct string, db *DB, modelDir string, session *pb.Session) * defer func(startAt time.Time) { log.Debugf("runExtendedSQL %v finished, elapsed:%v", slct, time.Since(startAt)) }(time.Now()) - pr, e := newParser().Parse(slct) if e != nil { return e diff --git a/sql/expression_resolver.go b/sql/expression_resolver.go index 6b6c0d3664..4616b1bb9d 100644 --- a/sql/expression_resolver.go +++ b/sql/expression_resolver.go @@ -191,7 +191,7 @@ func getEngineSpec(attrs map[string]*attribute) engineSpec { func resolveTrainClause(tc *trainClause) (*resolvedTrainClause, error) { modelName := tc.estimator preMadeModel := !strings.ContainsAny(modelName, ".") - attrs, err := resolveTrainAttribute(&tc.attrs) + attrs, err := resolveTrainAttribute(&tc.trainAttrs) if err != nil { return nil, err } diff --git a/sql/parser.go b/sql/parser.go index 2139e62797..65dbfe869d 100644 --- a/sql/parser.go +++ b/sql/parser.go @@ -77,6 +77,7 @@ func variadic(typ int, op string, ods exprlist) *expr { type extendedSelect struct { extended bool train bool + attrs attrs standardSelect trainClause predictClause @@ -90,11 +91,11 @@ type standardSelect struct { } type trainClause struct { - estimator string - attrs attrs - columns columnClause - label string - save string + estimator string + trainAttrs attrs + columns columnClause + label string + save string } /* If no FOR in the COLUMN, the key is "" */ @@ -104,8 +105,9 @@ type filedClause exprlist type attrs map[string]*expr type predictClause struct { - model string - into string + predAttrs attrs + model string + into string } var parseResult *extendedSelect @@ -120,7 +122,7 @@ func attrsUnion(as1, as2 attrs) attrs { return as1 } -//line sql.y:105 +//line sql.y:107 type sqlSymType struct { yys int val string /* NUMBER, IDENT, STRING, and keywords */ @@ -210,7 +212,7 @@ const sqlEofCode = 1 const sqlErrCode = 2 const sqlInitialStackSize = 16 -//line sql.y:280 +//line sql.y:283 /* Like Lisp's builtin function cdr. */ func (e *expr) cdr() (r []string) { @@ -326,95 +328,95 @@ var sqlExca = [...]int{ const sqlPrivate = 57344 -const sqlLast = 164 +const sqlLast = 167 var sqlAct = [...]int{ - 29, 98, 57, 97, 13, 79, 82, 80, 110, 78, - 92, 79, 22, 38, 90, 36, 56, 37, 49, 50, - 46, 45, 44, 48, 47, 39, 40, 41, 42, 43, - 51, 108, 107, 53, 54, 77, 7, 9, 8, 10, - 11, 89, 65, 66, 67, 68, 69, 70, 71, 72, - 73, 74, 75, 76, 63, 108, 24, 23, 25, 86, - 79, 60, 18, 17, 91, 52, 4, 31, 24, 23, - 25, 30, 41, 42, 43, 27, 62, 85, 32, 31, - 28, 100, 87, 30, 105, 15, 106, 27, 21, 113, - 32, 101, 28, 99, 102, 101, 96, 16, 104, 111, - 24, 23, 25, 39, 40, 41, 42, 43, 109, 101, - 112, 31, 83, 84, 64, 30, 61, 34, 33, 27, - 20, 103, 32, 55, 28, 49, 50, 46, 45, 44, - 48, 47, 39, 40, 41, 42, 43, 46, 45, 44, - 48, 47, 39, 40, 41, 42, 43, 35, 94, 95, - 58, 59, 3, 81, 12, 26, 19, 14, 6, 93, - 88, 5, 2, 1, + 31, 101, 63, 100, 13, 41, 62, 85, 86, 40, + 87, 112, 24, 55, 56, 52, 51, 50, 54, 53, + 45, 46, 47, 48, 49, 96, 86, 90, 86, 69, + 84, 70, 57, 44, 110, 59, 60, 109, 95, 7, + 9, 8, 10, 11, 38, 39, 68, 65, 72, 73, + 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, + 110, 17, 47, 48, 49, 69, 26, 25, 27, 4, + 58, 23, 89, 92, 88, 91, 107, 33, 108, 115, + 103, 32, 26, 25, 27, 29, 67, 113, 34, 93, + 30, 19, 102, 33, 20, 111, 104, 32, 37, 104, + 42, 29, 106, 15, 34, 61, 30, 26, 25, 27, + 71, 104, 114, 66, 43, 16, 18, 36, 33, 35, + 22, 105, 32, 3, 98, 99, 29, 64, 12, 34, + 28, 30, 55, 56, 52, 51, 50, 54, 53, 45, + 46, 47, 48, 49, 52, 51, 50, 54, 53, 45, + 46, 47, 48, 49, 45, 46, 47, 48, 49, 21, + 14, 6, 97, 94, 5, 2, 1, } var sqlPact = [...]int{ - 148, -1000, 31, 68, -1000, 28, 27, 103, 70, 51, - 101, 100, -1000, 131, -23, -19, -1000, -1000, -1000, -25, - -1000, -1000, 105, -1000, -19, -1000, -1000, 51, 46, -1000, - 51, 51, 83, 140, 138, 25, 99, 39, 97, 51, - 51, 51, 51, 51, 51, 51, 51, 51, 51, 51, - 51, -2, -32, -1000, -1000, -1000, -33, 105, 95, 96, - 51, -1000, -1000, 22, -1000, 43, 43, -1000, -1000, -1000, - 76, 76, 76, 76, 76, 115, 115, -1000, -1000, 51, - -1000, 3, -1000, 40, -1000, -27, -1000, 105, 137, 95, - 64, 51, -1000, 107, 64, 67, -1000, 17, -1000, -1000, - -19, -1000, 105, 91, -7, -1000, -1000, 82, 64, -1000, - 72, -1000, -1000, -1000, + 119, -1000, 34, 86, -1000, 26, 81, 103, 53, 90, + 102, 100, -1000, 82, 6, 9, -1000, -1000, -1000, 83, + 97, -5, -1000, -1000, 112, -1000, 9, -1000, -1000, 90, + 51, -1000, 90, 90, 65, 117, -1000, 11, 96, 49, + -9, -1000, 7, -1000, 93, 90, 90, 90, 90, 90, + 90, 90, 90, 90, 90, 90, 90, -7, -34, -1000, + -1000, -1000, -30, 112, 83, 90, -1000, -1000, -10, 83, + 90, -1000, 33, 33, -1000, -1000, -1000, 127, 127, 127, + 127, 127, 122, 122, -1000, -1000, 90, -1000, 27, -12, + -1000, -1000, 112, 112, 113, 63, -1000, 107, 63, 59, + 22, -1000, -1000, 9, -1000, 78, -4, -1000, -1000, 70, + 63, -1000, 62, -1000, -1000, -1000, } var sqlPgo = [...]int{ - 0, 163, 162, 161, 160, 159, 158, 157, 156, 2, - 0, 1, 16, 155, 3, 154, 6, 153, + 0, 166, 165, 164, 163, 162, 161, 160, 159, 2, + 0, 1, 6, 130, 3, 128, 5, 9, } var sqlR1 = [...]int{ 0, 1, 1, 1, 2, 2, 2, 2, 3, 6, - 4, 4, 4, 15, 15, 7, 7, 7, 11, 11, - 11, 14, 14, 5, 5, 8, 8, 16, 17, 17, - 10, 10, 12, 12, 13, 13, 9, 9, 9, 9, + 6, 6, 4, 4, 4, 15, 15, 7, 7, 7, + 11, 11, 11, 14, 14, 5, 5, 8, 8, 16, + 17, 17, 10, 10, 12, 12, 13, 13, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, 9, } var sqlR2 = [...]int{ - 0, 2, 3, 3, 2, 3, 3, 3, 8, 4, - 2, 4, 5, 5, 1, 1, 1, 3, 1, 1, - 1, 1, 3, 2, 2, 1, 3, 3, 1, 3, - 3, 4, 1, 3, 2, 3, 1, 1, 1, 1, - 3, 3, 1, 3, 3, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 2, 2, + 0, 2, 3, 3, 2, 3, 3, 3, 8, 2, + 3, 3, 2, 4, 5, 5, 1, 1, 1, 3, + 1, 1, 1, 1, 3, 2, 2, 1, 3, 3, + 1, 3, 3, 4, 1, 3, 2, 3, 1, 1, + 1, 1, 3, 3, 1, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 2, 2, } var sqlChk = [...]int{ -1000, -1, -2, 4, 35, -3, -6, 5, 7, 6, - 8, 9, -15, -10, -7, 17, 29, 35, 35, -8, - 17, 18, -9, 18, 17, 19, -13, 36, 41, -10, - 32, 28, 39, 17, 17, 16, 38, 36, 38, 27, - 28, 29, 30, 31, 24, 23, 22, 26, 25, 20, - 21, -9, 19, -9, -9, 40, -12, -9, 10, 13, - 36, 17, 37, -12, 17, -9, -9, -9, -9, -9, - -9, -9, -9, -9, -9, -9, -9, 37, 41, 38, - 40, -17, -16, 17, 17, -12, 37, -9, -4, 38, - 11, 24, 37, -5, 11, 12, -16, -14, -11, 29, - 17, -10, -9, 14, -14, 17, 19, 15, 38, 17, - 15, 17, -11, 17, + 8, 9, -15, -10, -7, 17, 29, 35, 35, 10, + 13, -8, 17, 18, -9, 18, 17, 19, -13, 36, + 41, -10, 32, 28, 39, 17, 17, 16, 38, 36, + -17, -16, 17, 17, 38, 27, 28, 29, 30, 31, + 24, 23, 22, 26, 25, 20, 21, -9, 19, -9, + -9, 40, -12, -9, 10, 36, 17, 37, -12, 38, + 24, 17, -9, -9, -9, -9, -9, -9, -9, -9, + -9, -9, -9, -9, 37, 41, 38, 40, -17, -12, + 37, -16, -9, -9, -4, 11, 37, -5, 11, 12, + -14, -11, 29, 17, -10, 14, -14, 17, 19, 15, + 38, 17, 15, 17, -11, 17, } var sqlDef = [...]int{ 0, -2, 0, 0, 1, 0, 0, 0, 0, 0, - 0, 0, 4, 0, 14, 16, 15, 2, 3, 5, - 25, 6, 7, 36, 37, 38, 39, 0, 0, 42, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 55, 56, 34, 0, 32, 0, 0, - 0, 17, 30, 0, 26, 43, 44, 45, 46, 47, - 48, 49, 50, 51, 52, 53, 54, 40, 41, 0, - 35, 0, 28, 0, 9, 0, 31, 33, 0, 0, - 0, 0, 13, 0, 0, 0, 29, 10, 21, 18, - 19, 20, 27, 0, 0, 23, 24, 0, 0, 8, - 0, 11, 22, 12, + 0, 0, 4, 0, 16, 18, 17, 2, 3, 0, + 0, 5, 27, 6, 7, 38, 39, 40, 41, 0, + 0, 44, 0, 0, 0, 0, 9, 0, 0, 0, + 10, 30, 0, 11, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 57, + 58, 36, 0, 34, 0, 0, 19, 32, 0, 0, + 0, 28, 45, 46, 47, 48, 49, 50, 51, 52, + 53, 54, 55, 56, 42, 43, 0, 37, 0, 0, + 33, 31, 29, 35, 0, 0, 15, 0, 0, 0, + 12, 23, 20, 21, 22, 0, 0, 25, 26, 0, + 0, 8, 0, 13, 24, 14, } var sqlTok1 = [...]int{ @@ -778,7 +780,7 @@ sqldefault: case 1: sqlDollar = sqlS[sqlpt-2 : sqlpt+1] -//line sql.y:147 +//line sql.y:149 { parseResult = &extendedSelect{ extended: false, @@ -786,344 +788,357 @@ sqldefault: } case 2: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:152 +//line sql.y:154 { parseResult = &extendedSelect{ extended: true, train: true, + attrs: sqlDollar[2].tran.trainAttrs, standardSelect: sqlDollar[1].slct, trainClause: sqlDollar[2].tran} } case 3: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:159 +//line sql.y:162 { parseResult = &extendedSelect{ extended: true, train: false, + attrs: sqlDollar[2].infr.predAttrs, standardSelect: sqlDollar[1].slct, predictClause: sqlDollar[2].infr} } case 4: sqlDollar = sqlS[sqlpt-2 : sqlpt+1] -//line sql.y:169 +//line sql.y:173 { sqlVAL.slct.fields = sqlDollar[2].expl } case 5: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:170 +//line sql.y:174 { sqlVAL.slct.tables = sqlDollar[3].tbls } case 6: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:171 +//line sql.y:175 { sqlVAL.slct.limit = sqlDollar[3].val } case 7: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:172 +//line sql.y:176 { sqlVAL.slct.where = sqlDollar[3].expr } case 8: sqlDollar = sqlS[sqlpt-8 : sqlpt+1] -//line sql.y:176 +//line sql.y:180 { sqlVAL.tran.estimator = sqlDollar[2].val - sqlVAL.tran.attrs = sqlDollar[4].atrs + sqlVAL.tran.trainAttrs = sqlDollar[4].atrs sqlVAL.tran.columns = sqlDollar[5].colc sqlVAL.tran.label = sqlDollar[6].labc sqlVAL.tran.save = sqlDollar[8].val } case 9: - sqlDollar = sqlS[sqlpt-4 : sqlpt+1] -//line sql.y:186 + sqlDollar = sqlS[sqlpt-2 : sqlpt+1] +//line sql.y:190 { sqlVAL.infr.into = sqlDollar[2].val - sqlVAL.infr.model = sqlDollar[4].val } case 10: + sqlDollar = sqlS[sqlpt-3 : sqlpt+1] +//line sql.y:191 + { + sqlVAL.infr.predAttrs = sqlDollar[3].atrs + } + case 11: + sqlDollar = sqlS[sqlpt-3 : sqlpt+1] +//line sql.y:192 + { + sqlVAL.infr.model = sqlDollar[3].val + } + case 12: sqlDollar = sqlS[sqlpt-2 : sqlpt+1] -//line sql.y:193 +//line sql.y:196 { sqlVAL.colc = map[string]exprlist{"feature_columns": sqlDollar[2].expl} } - case 11: + case 13: sqlDollar = sqlS[sqlpt-4 : sqlpt+1] -//line sql.y:194 +//line sql.y:197 { sqlVAL.colc = map[string]exprlist{sqlDollar[4].val: sqlDollar[2].expl} } - case 12: + case 14: sqlDollar = sqlS[sqlpt-5 : sqlpt+1] -//line sql.y:195 +//line sql.y:198 { sqlVAL.colc[sqlDollar[5].val] = sqlDollar[3].expl } - case 13: + case 15: sqlDollar = sqlS[sqlpt-5 : sqlpt+1] -//line sql.y:199 +//line sql.y:202 { sqlVAL.expl = exprlist{sqlDollar[1].expr, atomic(IDENT, "AS"), funcall("", sqlDollar[4].expl)} } - case 14: + case 16: sqlDollar = sqlS[sqlpt-1 : sqlpt+1] -//line sql.y:202 +//line sql.y:205 { sqlVAL.expl = sqlDollar[1].flds } - case 15: + case 17: sqlDollar = sqlS[sqlpt-1 : sqlpt+1] -//line sql.y:206 +//line sql.y:209 { sqlVAL.flds = append(sqlVAL.flds, atomic(IDENT, "*")) } - case 16: + case 18: sqlDollar = sqlS[sqlpt-1 : sqlpt+1] -//line sql.y:207 +//line sql.y:210 { sqlVAL.flds = append(sqlVAL.flds, atomic(IDENT, sqlDollar[1].val)) } - case 17: + case 19: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:208 +//line sql.y:211 { sqlVAL.flds = append(sqlDollar[1].flds, atomic(IDENT, sqlDollar[3].val)) } - case 18: + case 20: sqlDollar = sqlS[sqlpt-1 : sqlpt+1] -//line sql.y:212 +//line sql.y:215 { sqlVAL.expr = atomic(IDENT, "*") } - case 19: + case 21: sqlDollar = sqlS[sqlpt-1 : sqlpt+1] -//line sql.y:213 +//line sql.y:216 { sqlVAL.expr = atomic(IDENT, sqlDollar[1].val) } - case 20: + case 22: sqlDollar = sqlS[sqlpt-1 : sqlpt+1] -//line sql.y:214 +//line sql.y:217 { sqlVAL.expr = sqlDollar[1].expr } - case 21: + case 23: sqlDollar = sqlS[sqlpt-1 : sqlpt+1] -//line sql.y:218 +//line sql.y:221 { sqlVAL.expl = exprlist{sqlDollar[1].expr} } - case 22: + case 24: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:219 +//line sql.y:222 { sqlVAL.expl = append(sqlDollar[1].expl, sqlDollar[3].expr) } - case 23: + case 25: sqlDollar = sqlS[sqlpt-2 : sqlpt+1] -//line sql.y:223 +//line sql.y:226 { sqlVAL.labc = sqlDollar[2].val } - case 24: + case 26: sqlDollar = sqlS[sqlpt-2 : sqlpt+1] -//line sql.y:224 +//line sql.y:227 { sqlVAL.labc = sqlDollar[2].val[1 : len(sqlDollar[2].val)-1] } - case 25: + case 27: sqlDollar = sqlS[sqlpt-1 : sqlpt+1] -//line sql.y:228 +//line sql.y:231 { sqlVAL.tbls = []string{sqlDollar[1].val} } - case 26: + case 28: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:229 +//line sql.y:232 { sqlVAL.tbls = append(sqlDollar[1].tbls, sqlDollar[3].val) } - case 27: + case 29: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:233 +//line sql.y:236 { sqlVAL.atrs = attrs{sqlDollar[1].val: sqlDollar[3].expr} } - case 28: + case 30: sqlDollar = sqlS[sqlpt-1 : sqlpt+1] -//line sql.y:237 +//line sql.y:240 { sqlVAL.atrs = sqlDollar[1].atrs } - case 29: + case 31: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:238 +//line sql.y:241 { sqlVAL.atrs = attrsUnion(sqlDollar[1].atrs, sqlDollar[3].atrs) } - case 30: + case 32: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:242 +//line sql.y:245 { sqlVAL.expr = funcall(sqlDollar[1].val, nil) } - case 31: + case 33: sqlDollar = sqlS[sqlpt-4 : sqlpt+1] -//line sql.y:243 +//line sql.y:246 { sqlVAL.expr = funcall(sqlDollar[1].val, sqlDollar[3].expl) } - case 32: + case 34: sqlDollar = sqlS[sqlpt-1 : sqlpt+1] -//line sql.y:247 +//line sql.y:250 { sqlVAL.expl = exprlist{sqlDollar[1].expr} } - case 33: + case 35: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:248 +//line sql.y:251 { sqlVAL.expl = append(sqlDollar[1].expl, sqlDollar[3].expr) } - case 34: + case 36: sqlDollar = sqlS[sqlpt-2 : sqlpt+1] -//line sql.y:252 +//line sql.y:255 { sqlVAL.expl = nil } - case 35: + case 37: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:253 +//line sql.y:256 { sqlVAL.expl = sqlDollar[2].expl } - case 36: + case 38: sqlDollar = sqlS[sqlpt-1 : sqlpt+1] -//line sql.y:257 +//line sql.y:260 { sqlVAL.expr = atomic(NUMBER, sqlDollar[1].val) } - case 37: + case 39: sqlDollar = sqlS[sqlpt-1 : sqlpt+1] -//line sql.y:258 +//line sql.y:261 { sqlVAL.expr = atomic(IDENT, sqlDollar[1].val) } - case 38: + case 40: sqlDollar = sqlS[sqlpt-1 : sqlpt+1] -//line sql.y:259 +//line sql.y:262 { sqlVAL.expr = atomic(STRING, sqlDollar[1].val) } - case 39: + case 41: sqlDollar = sqlS[sqlpt-1 : sqlpt+1] -//line sql.y:260 +//line sql.y:263 { sqlVAL.expr = variadic('[', "square", sqlDollar[1].expl) } - case 40: + case 42: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:261 +//line sql.y:264 { sqlVAL.expr = unary('(', "paren", sqlDollar[2].expr) } - case 41: + case 43: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:262 +//line sql.y:265 { sqlVAL.expr = unary('"', "quota", atomic(STRING, sqlDollar[2].val)) } - case 42: + case 44: sqlDollar = sqlS[sqlpt-1 : sqlpt+1] -//line sql.y:263 +//line sql.y:266 { sqlVAL.expr = sqlDollar[1].expr } - case 43: + case 45: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:264 +//line sql.y:267 { sqlVAL.expr = binary('+', sqlDollar[1].expr, sqlDollar[2].val, sqlDollar[3].expr) } - case 44: + case 46: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:265 +//line sql.y:268 { sqlVAL.expr = binary('-', sqlDollar[1].expr, sqlDollar[2].val, sqlDollar[3].expr) } - case 45: + case 47: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:266 +//line sql.y:269 { sqlVAL.expr = binary('*', sqlDollar[1].expr, sqlDollar[2].val, sqlDollar[3].expr) } - case 46: + case 48: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:267 +//line sql.y:270 { sqlVAL.expr = binary('/', sqlDollar[1].expr, sqlDollar[2].val, sqlDollar[3].expr) } - case 47: + case 49: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:268 +//line sql.y:271 { sqlVAL.expr = binary('%', sqlDollar[1].expr, sqlDollar[2].val, sqlDollar[3].expr) } - case 48: + case 50: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:269 +//line sql.y:272 { sqlVAL.expr = binary('=', sqlDollar[1].expr, sqlDollar[2].val, sqlDollar[3].expr) } - case 49: + case 51: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:270 +//line sql.y:273 { sqlVAL.expr = binary('<', sqlDollar[1].expr, sqlDollar[2].val, sqlDollar[3].expr) } - case 50: + case 52: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:271 +//line sql.y:274 { sqlVAL.expr = binary('>', sqlDollar[1].expr, sqlDollar[2].val, sqlDollar[3].expr) } - case 51: + case 53: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:272 +//line sql.y:275 { sqlVAL.expr = binary(LE, sqlDollar[1].expr, sqlDollar[2].val, sqlDollar[3].expr) } - case 52: + case 54: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:273 +//line sql.y:276 { sqlVAL.expr = binary(GE, sqlDollar[1].expr, sqlDollar[2].val, sqlDollar[3].expr) } - case 53: + case 55: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:274 +//line sql.y:277 { sqlVAL.expr = binary(AND, sqlDollar[1].expr, sqlDollar[2].val, sqlDollar[3].expr) } - case 54: + case 56: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:275 +//line sql.y:278 { sqlVAL.expr = binary(OR, sqlDollar[1].expr, sqlDollar[2].val, sqlDollar[3].expr) } - case 55: + case 57: sqlDollar = sqlS[sqlpt-2 : sqlpt+1] -//line sql.y:276 +//line sql.y:279 { sqlVAL.expr = unary(NOT, sqlDollar[1].val, sqlDollar[2].expr) } - case 56: + case 58: sqlDollar = sqlS[sqlpt-2 : sqlpt+1] -//line sql.y:277 +//line sql.y:280 { sqlVAL.expr = unary('-', sqlDollar[1].val, sqlDollar[2].expr) } diff --git a/sql/sql.y b/sql/sql.y index fd544a90bb..eed7013248 100644 --- a/sql/sql.y +++ b/sql/sql.y @@ -58,6 +58,7 @@ type extendedSelect struct { extended bool train bool + attrs attrs standardSelect trainClause predictClause @@ -72,7 +73,7 @@ type trainClause struct { estimator string - attrs attrs + trainAttrs attrs columns columnClause label string save string @@ -85,6 +86,7 @@ type attrs map[string]*expr type predictClause struct { + predAttrs attrs model string into string } @@ -153,6 +155,7 @@ select_stmt parseResult = &extendedSelect{ extended: true, train: true, + attrs: $2.trainAttrs, standardSelect: $1, trainClause: $2} } @@ -160,6 +163,7 @@ select_stmt parseResult = &extendedSelect{ extended: true, train: false, + attrs: $2.predAttrs, standardSelect: $1, predictClause: $2} } @@ -175,7 +179,7 @@ select train_clause : TRAIN IDENT WITH attrs column_clause label_clause INTO IDENT { $$.estimator = $2 - $$.attrs = $4 + $$.trainAttrs = $4 $$.columns = $5 $$.label = $6 $$.save = $8 @@ -183,10 +187,9 @@ train_clause ; predict_clause -: PREDICT IDENT USING IDENT { - $$.into = $2 - $$.model = $4 -} +: PREDICT IDENT { $$.into = $2 } +| predict_clause WITH attrs { $$.predAttrs = $3 } +| predict_clause USING IDENT { $$.model = $3 } ; column_clause diff --git a/sql/template_alps.go b/sql/template_alps.go new file mode 100644 index 0000000000..ae2ab40a30 --- /dev/null +++ b/sql/template_alps.go @@ -0,0 +1,163 @@ +// Copyright 2019 The SQLFlow Authors. All rights reserved. +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package sql + +const alpsTrainTemplateText = ` +# coding: utf-8 +# Copyright (c) Antfin, Inc. All rights reserved. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os + +import tensorflow as tf + +from alps.conf.closure import Closure +from alps.framework.train.training import build_run_config +from alps.framework.exporter import ExportStrategy +from alps.framework.exporter.arks_exporter import ArksExporter +from alps.client.base import run_experiment, submit_experiment +from alps.framework.engine import LocalEngine, YarnEngine, ResourceConf +from alps.framework.column.column import DenseColumn, SparseColumn, GroupedSparseColumn +from alps.framework.exporter.compare_fn import best_auc_fn +from alps.io import DatasetX +from alps.io.base import OdpsConf, FeatureMap +from alps.framework.experiment import EstimatorBuilder, Experiment, TrainConf, EvalConf, RuntimeConf +from alps.io.reader.odps_reader import OdpsReader + +os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # for debug usage. +#tf.logging.set_verbosity(tf.logging.INFO) + +class SQLFlowEstimatorBuilder(EstimatorBuilder): + def _build(self, experiment, run_config): +{{if ne .FeatureMapTable ""}} + feature_columns = [] + {{.FeatureColumnCode}} +{{end}} +{{if ne .ImportCode ""}} + {{.ImportCode}} +{{end}} + return {{.ModelCreatorCode}} + +if __name__ == "__main__": + odpsConf=OdpsConf( + accessid="{{.OdpsConf.AccessID}}", + accesskey="{{.OdpsConf.AccessKey}}", + endpoint="{{.OdpsConf.Endpoint}}" + ) + + trainDs = DatasetX( + num_epochs={{.TrainClause.Epoch}}, + batch_size={{.TrainClause.BatchSize}}, + shuffle="{{.TrainClause.EnableShuffle}}" == "true", + shuffle_buffer_size={{.TrainClause.ShuffleBufferSize}}, +{{if .TrainClause.EnableCache}} + cache_file={{.TrainClause.CachePath}}, +{{end}} + reader=OdpsReader( + odps=odpsConf, + project="{{.OdpsConf.Project}}", + table="{{.TrainInputTable}}", + # FIXME(typhoonzero): add field_names back if needed. + # field_names={{.Fields}}, + features={{.X}}, + labels={{.Y}}, +{{if ne .FeatureMapTable ""}} + feature_map=FeatureMap(table="{{.FeatureMapTable}}", +{{if ne .FeatureMapPartition ""}} + partition="{{.FeatureMapPartition}}" +{{end}} + ), + flatten_group=True +{{end}} + ), + drop_remainder="{{.TrainClause.DropRemainder}}" == "true" + ) + + evalDs = DatasetX( + num_epochs=1, + batch_size={{.TrainClause.BatchSize}}, + reader=OdpsReader( + odps=odpsConf, + project="{{.OdpsConf.Project}}", + table="{{.EvalInputTable}}", + # FIXME(typhoonzero): add field_names back if needed. + # field_names={{.Fields}}, + features={{.X}}, + labels={{.Y}}, + flatten_group=True + ) + ) + + export_path = "{{.ModelDir}}" +{{if ne .ScratchDir ""}} + runtime_conf = RuntimeConf(model_dir="{{.ScratchDir}}") +{{else}} + runtime_conf = None +{{end}} + experiment = Experiment( + user="shangchun.sun", # TODO(joyyoj) pai will check user name be a valid user, removed later. + engine={{.EngineCode}}, + train=TrainConf(input=trainDs, +{{if (ne .TrainClause.MaxSteps -1)}} + max_steps={{.TrainClause.MaxSteps}}, +{{end}} + ), + eval=EvalConf(input=evalDs, + # FIXME(typhoonzero): Support configure metrics + metrics_set=['accuracy'], +{{if (ne .TrainClause.EvalSteps -1)}} + steps={{.TrainClause.EvalSteps}}, +{{end}} + start_delay_secs={{.TrainClause.EvalStartDelay}}, + throttle_secs={{.TrainClause.EvalThrottle}}, + ), + # FIXME(typhoonzero): Use ExportStrategy.BEST when possible. + exporter=ArksExporter(deploy_path=export_path, strategy=ExportStrategy.LATEST, compare_fn=Closure(best_auc_fn)), + runtime = runtime_conf, + model_builder=SQLFlowEstimatorBuilder()) + + if isinstance(experiment.engine, LocalEngine): + run_experiment(experiment) + else: + if "{{.ExitOnSubmit}}" == "false": + run_experiment(experiment) + else: + submit_experiment(experiment, exit_on_submit=True) +` + +const alpsPredTemplateText = ` +set odps.task.major.version=default; +set odps.isolation.session.enable=true; +set odps.service.mode=off; +set odps.instance.priority = 0; +set odps.sql.udf.timeout = 3000; + +set mst.model.path=oss://arks-model/{{.UserID}}/{{.PredictInputModel}}.tar.gz; +set mst.model.name={{.PredictInputModel}}; +set mst.oss.id={{.OSSID}}; +set mst.oss.key={{.OSSKey}}; +set mst.load.feature_map=false; + +set deepbreath.sparse.group.separator=:; +set deepbreath.sparse.separator=,; +set deepbreath.enable.sigmoid=false; +set odps.sql.mapper.split.size=64; + +set alps.custom.output=predictions; + +SELECT {{.PredictUDF}} FROM {{.PredictInputTable}}; +` From 6a47ac4d3bd113df3cb4f0ba09b846e9f724e244 Mon Sep 17 00:00:00 2001 From: Yancey1989 Date: Fri, 26 Jul 2019 18:53:59 +0800 Subject: [PATCH 02/15] update --- sql/executor.go | 2 -- 1 file changed, 2 deletions(-) diff --git a/sql/executor.go b/sql/executor.go index 887a885807..3762bf850a 100644 --- a/sql/executor.go +++ b/sql/executor.go @@ -29,10 +29,8 @@ import ( // Run executes a SQL query and returns a stream of rows or messages func Run(slct string, db *DB, modelDir string, session *pb.Session) *PipeReader { if len(splitExtendedSQL(slct)) == 2 { - fmt.Println("ext sql", slct) return runExtendedSQL(slct, db, modelDir, session) } - fmt.Println("standard sql", slct) return runStandardSQL(slct, db) } From f64f50e845f5e3163843617f04b3415693412cc3 Mon Sep 17 00:00:00 2001 From: Yancey1989 Date: Mon, 29 Jul 2019 20:30:58 +0800 Subject: [PATCH 03/15] submit odps script using odpscmd --- scripts/image_build.sh | 8 +++++++- sql/codegen_alps.go | 34 ++++++++++++++++++++++++++++++++-- sql/executor.go | 1 + sql/template_alps.go | 2 +- 4 files changed, 41 insertions(+), 4 deletions(-) diff --git a/scripts/image_build.sh b/scripts/image_build.sh index c10ed88706..d6216cbf29 100644 --- a/scripts/image_build.sh +++ b/scripts/image_build.sh @@ -101,7 +101,13 @@ if [ "${WITH_SQLFLOW_MODELS:-ON}" = "ON" ]; then rm -rf models fi -# 7. Load sqlflow Jupyter magic command automatically. c.f. https://stackoverflow.com/a/32683001. +# 7. Install odpscmd for submitting alps predict job with odps udf script +# TODO(Yancey1989): using gomaxcompute instead of the odpscmd command-line tool. +wget -q http://odps.alibaba-inc.com/official_downloads/odpscmd/0.32.0/odpscmd_public.zip +unzip -qq odpscmd_public.zip -d /usr/local/odpscmd +ln -s /usr/local/odpscmd/bin/odpscmd /usr/local/bin/odpscmd + +# 8. Load sqlflow Jupyter magic command automatically. c.f. https://stackoverflow.com/a/32683001. mkdir -p $IPYTHON_STARTUP mkdir -p /workspace echo 'get_ipython().magic(u"%reload_ext sqlflow.magic")' >> $IPYTHON_STARTUP/00-first.py diff --git a/sql/codegen_alps.go b/sql/codegen_alps.go index ae161d8fbd..c7977f64c7 100644 --- a/sql/codegen_alps.go +++ b/sql/codegen_alps.go @@ -19,6 +19,7 @@ import ( "fmt" "io/ioutil" "os" + "os/exec" "path/filepath" "strconv" "strings" @@ -326,9 +327,38 @@ func alpsPred(w *PipeWriter, pr *extendedSelect, db *DB, cwd string, session *pb if err = alpsPredTemplate.Execute(&program, filler); err != nil { return fmt.Errorf("submitALPS: failed executing template: %v", err) } + + fname := "alps_pre.odps" + filepath := filepath.Join(cwd, fname) + f, err := os.Create(filepath) + if err != nil { + return fmt.Errorf("Create ODPS script failed %v", err) + } + f.WriteString(program.String()) + f.Close() + cw := &logChanWriter{wr: w} fmt.Println(program.String()) - _, err = db.Query(program.String()) - fmt.Println(err.Error()) + _, ok := db.Driver().(*gomaxcompute.Driver) + if !ok { + return fmt.Errorf("Alps Predict only support Maxcompute database driver") + } + + cfg, err := gomaxcompute.ParseDSN(db.dataSourceName) + // FIXME(Yancey1989): using https proto. + fixedEndpoint := strings.Replace(cfg.Endpoint, "https://", "http://", 0) + // TODO(Yancey1989): submit the Maxcompute UDF script using gomaxcompute driver. + cmd := exec.Command("odpscmd", + "-u", cfg.AccessID, + "-p", cfg.AccessKey, + fmt.Sprintf("--endpoint=%s", fixedEndpoint), + fmt.Sprintf("--project=%s", cfg.Project), + "-s", filepath) + cmd.Dir = cwd + cmd.Stdout = cw + cmd.Stderr = cw + if e := cmd.Run(); e != nil { + return fmt.Errorf("submit ODPS script %s failed %v", program.String(), e) + } return nil } diff --git a/sql/executor.go b/sql/executor.go index 3762bf850a..03d79498ee 100644 --- a/sql/executor.go +++ b/sql/executor.go @@ -397,6 +397,7 @@ func pred(pr *extendedSelect, db *DB, cwd string, wr *PipeWriter, modelDir strin if e := genTF(&buf, pr, fts, db); e != nil { return fmt.Errorf("genTF: %v", e) } + fmt.Println(buf.String()) cw := &logChanWriter{wr: wr} defer cw.Close() diff --git a/sql/template_alps.go b/sql/template_alps.go index ae2ab40a30..33d1452846 100644 --- a/sql/template_alps.go +++ b/sql/template_alps.go @@ -159,5 +159,5 @@ set odps.sql.mapper.split.size=64; set alps.custom.output=predictions; -SELECT {{.PredictUDF}} FROM {{.PredictInputTable}}; +CREATE TABLE IF NOT EXISTS {{.PredictOutputTable}} AS SELECT {{.PredictUDF}} FROM {{.PredictInputTable}}; ` From a61d51f2c2a5acb9a36cc8f79d6e9a4fb0bb3afd Mon Sep 17 00:00:00 2001 From: Yancey1989 Date: Mon, 29 Jul 2019 21:06:35 +0800 Subject: [PATCH 04/15] fix ci --- sql/codegen.go | 5 ++- sql/executor.go | 1 - sql/parser.go | 96 ++++++++++++++++++++++++------------------------- sql/sql.y | 2 +- 4 files changed, 53 insertions(+), 51 deletions(-) diff --git a/sql/codegen.go b/sql/codegen.go index ac45f818bf..6bdcbee6b9 100644 --- a/sql/codegen.go +++ b/sql/codegen.go @@ -111,7 +111,10 @@ func newFiller(pr *extendedSelect, ds *trainAndValDataset, fts fieldTypes, db *D IsKerasModel: isKerasModel, }, } - for k, v := range pr.attrs { + for k, v := range pr.trainClause.trainAttrs { + r.Attrs[k] = v.String() + } + for k, v := range pr.predictClause.predAttrs { r.Attrs[k] = v.String() } diff --git a/sql/executor.go b/sql/executor.go index c7ea5fd995..8ff1415689 100644 --- a/sql/executor.go +++ b/sql/executor.go @@ -397,7 +397,6 @@ func pred(pr *extendedSelect, db *DB, cwd string, wr *PipeWriter, modelDir strin if e := genTF(&buf, pr, nil, fts, db); e != nil { return fmt.Errorf("genTF: %v", e) } - fmt.Println(buf.String()) cw := &logChanWriter{wr: wr} defer cw.Close() diff --git a/sql/parser.go b/sql/parser.go index 939eb7364e..e1d109ccee 100644 --- a/sql/parser.go +++ b/sql/parser.go @@ -332,43 +332,43 @@ const sqlLast = 167 var sqlAct = [...]int{ - 31, 101, 63, 100, 13, 41, 62, 85, 86, 40, - 87, 112, 24, 55, 56, 52, 51, 50, 54, 53, - 45, 46, 47, 48, 49, 96, 86, 90, 86, 69, - 84, 70, 57, 44, 110, 59, 60, 109, 95, 7, - 9, 8, 10, 11, 38, 39, 68, 65, 72, 73, - 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, - 110, 17, 47, 48, 49, 69, 26, 25, 27, 4, - 58, 23, 89, 92, 88, 91, 107, 33, 108, 115, - 103, 32, 26, 25, 27, 29, 67, 113, 34, 93, - 30, 19, 102, 33, 20, 111, 104, 32, 37, 104, - 42, 29, 106, 15, 34, 61, 30, 26, 25, 27, - 71, 104, 114, 66, 43, 16, 18, 36, 33, 35, - 22, 105, 32, 3, 98, 99, 29, 64, 12, 34, - 28, 30, 55, 56, 52, 51, 50, 54, 53, 45, - 46, 47, 48, 49, 52, 51, 50, 54, 53, 45, - 46, 47, 48, 49, 45, 46, 47, 48, 49, 21, - 14, 6, 97, 94, 5, 2, 1, + 30, 101, 60, 100, 13, 63, 62, 84, 83, 85, + 112, 87, 23, 96, 84, 41, 93, 59, 38, 52, + 53, 49, 48, 47, 51, 50, 42, 43, 44, 45, + 46, 54, 39, 110, 56, 57, 82, 7, 9, 8, + 10, 11, 109, 87, 65, 70, 71, 72, 73, 74, + 75, 76, 77, 78, 79, 80, 81, 68, 25, 24, + 26, 90, 84, 19, 17, 110, 88, 4, 86, 32, + 55, 22, 103, 31, 44, 45, 46, 28, 67, 107, + 33, 108, 29, 89, 102, 18, 15, 91, 115, 113, + 111, 95, 64, 94, 104, 25, 24, 26, 16, 104, + 36, 69, 106, 66, 40, 34, 32, 35, 21, 37, + 31, 104, 114, 105, 28, 98, 99, 33, 58, 29, + 25, 24, 26, 42, 43, 44, 45, 46, 61, 12, + 3, 32, 27, 20, 14, 31, 6, 97, 92, 28, + 5, 2, 33, 1, 29, 52, 53, 49, 48, 47, + 51, 50, 42, 43, 44, 45, 46, 49, 48, 47, + 51, 50, 42, 43, 44, 45, 46, } var sqlPact = [...]int{ - 119, -1000, 34, 86, -1000, 26, 81, 103, 53, 90, - 102, 100, -1000, 82, 6, 9, -1000, -1000, -1000, 83, - 97, -5, -1000, -1000, 112, -1000, 9, -1000, -1000, 90, - 51, -1000, 90, 90, 65, 117, -1000, 11, 96, 49, - -9, -1000, 7, -1000, 93, 90, 90, 90, 90, 90, - 90, 90, 90, 90, 90, 90, 90, -7, -34, -1000, - -1000, -1000, -30, 112, 83, 90, -1000, -1000, -10, 83, - 90, -1000, 33, 33, -1000, -1000, -1000, 127, 127, 127, - 127, 127, 122, 122, -1000, -1000, 90, -1000, 27, -12, - -1000, -1000, 112, 112, 113, 63, -1000, 107, 63, 59, - 22, -1000, -1000, 9, -1000, 78, -4, -1000, -1000, 70, - 63, -1000, 62, -1000, -1000, -1000, + 126, -1000, 32, 69, -1000, 29, 50, 91, 53, 103, + 88, 90, -1000, 93, -20, -4, -1000, -1000, -1000, 87, + -23, -1000, -1000, 125, -1000, -4, -1000, -1000, 103, 51, + -1000, 103, 103, 78, 118, -1000, 75, 8, 86, 41, + -1000, 84, 103, 103, 103, 103, 103, 103, 103, 103, + 103, 103, 103, 103, -1, -33, -1000, -1000, -1000, -31, + 125, 75, -27, -1000, 42, 103, -1000, -1000, 24, -1000, + 45, 45, -1000, -1000, -1000, 96, 96, 96, 96, 96, + 135, 135, -1000, -1000, 103, -1000, 5, 75, 103, -24, + -1000, 125, 104, 55, -1000, 125, -1000, 99, 55, 62, + 27, -1000, -1000, -4, -1000, 73, -5, -1000, -1000, 72, + 55, -1000, 71, -1000, -1000, -1000, } var sqlPgo = [...]int{ - 0, 166, 165, 164, 163, 162, 161, 160, 159, 2, - 0, 1, 6, 130, 3, 128, 5, 9, + 0, 143, 141, 140, 138, 137, 136, 134, 133, 2, + 0, 1, 17, 132, 3, 129, 5, 6, } var sqlR1 = [...]int{ @@ -391,15 +391,15 @@ var sqlR2 = [...]int{ var sqlChk = [...]int{ -1000, -1, -2, 4, 35, -3, -6, 5, 7, 6, - 8, 9, -15, -10, -7, 17, 29, 35, 35, 10, - 13, -8, 17, 18, -9, 18, 17, 19, -13, 36, - 41, -10, 32, 28, 39, 17, 17, 16, 38, 36, - -17, -16, 17, 17, 38, 27, 28, 29, 30, 31, - 24, 23, 22, 26, 25, 20, 21, -9, 19, -9, - -9, 40, -12, -9, 10, 36, 17, 37, -12, 38, - 24, 17, -9, -9, -9, -9, -9, -9, -9, -9, - -9, -9, -9, -9, 37, 41, 38, 40, -17, -12, - 37, -16, -9, -9, -4, 11, 37, -5, 11, 12, + 8, 9, -15, -10, -7, 17, 29, 35, 35, 13, + -8, 17, 18, -9, 18, 17, 19, -13, 36, 41, + -10, 32, 28, 39, 17, 17, 10, 16, 38, 36, + 17, 38, 27, 28, 29, 30, 31, 24, 23, 22, + 26, 25, 20, 21, -9, 19, -9, -9, 40, -12, + -9, 10, -17, -16, 17, 36, 17, 37, -12, 17, + -9, -9, -9, -9, -9, -9, -9, -9, -9, -9, + -9, -9, 37, 41, 38, 40, -17, 38, 24, -12, + 37, -9, -4, 11, -16, -9, 37, -5, 11, 12, -14, -11, 29, 17, -10, 14, -14, 17, 19, 15, 38, 17, 15, 17, -11, 17, } @@ -407,14 +407,14 @@ var sqlDef = [...]int{ 0, -2, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 4, 0, 16, 18, 17, 2, 3, 0, - 0, 5, 27, 6, 7, 38, 39, 40, 41, 0, - 0, 44, 0, 0, 0, 0, 9, 0, 0, 0, - 10, 30, 0, 11, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 57, - 58, 36, 0, 34, 0, 0, 19, 32, 0, 0, - 0, 28, 45, 46, 47, 48, 49, 50, 51, 52, - 53, 54, 55, 56, 42, 43, 0, 37, 0, 0, - 33, 31, 29, 35, 0, 0, 15, 0, 0, 0, + 5, 27, 6, 7, 38, 39, 40, 41, 0, 0, + 44, 0, 0, 0, 0, 9, 0, 0, 0, 0, + 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 57, 58, 36, 0, + 34, 0, 10, 30, 0, 0, 19, 32, 0, 28, + 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, + 55, 56, 42, 43, 0, 37, 0, 0, 0, 0, + 33, 35, 0, 0, 31, 29, 15, 0, 0, 0, 12, 23, 20, 21, 22, 0, 0, 25, 26, 0, 0, 8, 0, 13, 24, 14, } diff --git a/sql/sql.y b/sql/sql.y index f611503dbe..94b20e9ed4 100644 --- a/sql/sql.y +++ b/sql/sql.y @@ -188,7 +188,7 @@ train_clause predict_clause : PREDICT IDENT { $$.into = $2 } -| predict_clause WITH attrs { $$.predAttrs = $3 } +| PREDICT WITH attrs { $$.predAttrs = $3 } | predict_clause USING IDENT { $$.model = $3 } ; From d0aa22a111da96035c4fedf30c8bfd465c666d7a Mon Sep 17 00:00:00 2001 From: Yancey1989 Date: Mon, 29 Jul 2019 22:45:07 +0800 Subject: [PATCH 05/15] fix ci --- sql/codegen_alps.go | 6 ++- sql/parser.go | 116 ++++++++++++++++++++++---------------------- sql/sql.y | 2 - 3 files changed, 61 insertions(+), 63 deletions(-) diff --git a/sql/codegen_alps.go b/sql/codegen_alps.go index c7977f64c7..b80e65ff49 100644 --- a/sql/codegen_alps.go +++ b/sql/codegen_alps.go @@ -340,10 +340,12 @@ func alpsPred(w *PipeWriter, pr *extendedSelect, db *DB, cwd string, session *pb fmt.Println(program.String()) _, ok := db.Driver().(*gomaxcompute.Driver) if !ok { - return fmt.Errorf("Alps Predict only support Maxcompute database driver") + return fmt.Errorf("Alps Predict Job only supports Maxcompute database driver") } - cfg, err := gomaxcompute.ParseDSN(db.dataSourceName) + if err != nil { + return fmt.Errorf("Parse Maxcompute DSN failed: %v", err) + } // FIXME(Yancey1989): using https proto. fixedEndpoint := strings.Replace(cfg.Endpoint, "https://", "http://", 0) // TODO(Yancey1989): submit the Maxcompute UDF script using gomaxcompute driver. diff --git a/sql/parser.go b/sql/parser.go index e1d109ccee..9253b9b16e 100644 --- a/sql/parser.go +++ b/sql/parser.go @@ -212,7 +212,7 @@ const sqlEofCode = 1 const sqlErrCode = 2 const sqlInitialStackSize = 16 -//line sql.y:283 +//line sql.y:281 /* Like Lisp's builtin function cdr. */ func (e *expr) cdr() (r []string) { @@ -793,48 +793,46 @@ sqldefault: parseResult = &extendedSelect{ extended: true, train: true, - attrs: sqlDollar[2].tran.trainAttrs, standardSelect: sqlDollar[1].slct, trainClause: sqlDollar[2].tran} } case 3: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:162 +//line sql.y:161 { parseResult = &extendedSelect{ extended: true, train: false, - attrs: sqlDollar[2].infr.predAttrs, standardSelect: sqlDollar[1].slct, predictClause: sqlDollar[2].infr} } case 4: sqlDollar = sqlS[sqlpt-2 : sqlpt+1] -//line sql.y:173 +//line sql.y:171 { sqlVAL.slct.fields = sqlDollar[2].expl } case 5: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:174 +//line sql.y:172 { sqlVAL.slct.tables = sqlDollar[3].tbls } case 6: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:175 +//line sql.y:173 { sqlVAL.slct.limit = sqlDollar[3].val } case 7: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:176 +//line sql.y:174 { sqlVAL.slct.where = sqlDollar[3].expr } case 8: sqlDollar = sqlS[sqlpt-8 : sqlpt+1] -//line sql.y:180 +//line sql.y:178 { sqlVAL.tran.estimator = sqlDollar[2].val sqlVAL.tran.trainAttrs = sqlDollar[4].atrs @@ -844,301 +842,301 @@ sqldefault: } case 9: sqlDollar = sqlS[sqlpt-2 : sqlpt+1] -//line sql.y:190 +//line sql.y:188 { sqlVAL.infr.into = sqlDollar[2].val } case 10: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:191 +//line sql.y:189 { sqlVAL.infr.predAttrs = sqlDollar[3].atrs } case 11: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:192 +//line sql.y:190 { sqlVAL.infr.model = sqlDollar[3].val } case 12: sqlDollar = sqlS[sqlpt-2 : sqlpt+1] -//line sql.y:196 +//line sql.y:194 { sqlVAL.colc = map[string]exprlist{"feature_columns": sqlDollar[2].expl} } case 13: sqlDollar = sqlS[sqlpt-4 : sqlpt+1] -//line sql.y:197 +//line sql.y:195 { sqlVAL.colc = map[string]exprlist{sqlDollar[4].val: sqlDollar[2].expl} } case 14: sqlDollar = sqlS[sqlpt-5 : sqlpt+1] -//line sql.y:198 +//line sql.y:196 { sqlVAL.colc[sqlDollar[5].val] = sqlDollar[3].expl } case 15: sqlDollar = sqlS[sqlpt-5 : sqlpt+1] -//line sql.y:202 +//line sql.y:200 { sqlVAL.expl = exprlist{sqlDollar[1].expr, atomic(IDENT, "AS"), funcall("", sqlDollar[4].expl)} } case 16: sqlDollar = sqlS[sqlpt-1 : sqlpt+1] -//line sql.y:205 +//line sql.y:203 { sqlVAL.expl = sqlDollar[1].flds } case 17: sqlDollar = sqlS[sqlpt-1 : sqlpt+1] -//line sql.y:209 +//line sql.y:207 { sqlVAL.flds = append(sqlVAL.flds, atomic(IDENT, "*")) } case 18: sqlDollar = sqlS[sqlpt-1 : sqlpt+1] -//line sql.y:210 +//line sql.y:208 { sqlVAL.flds = append(sqlVAL.flds, atomic(IDENT, sqlDollar[1].val)) } case 19: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:211 +//line sql.y:209 { sqlVAL.flds = append(sqlDollar[1].flds, atomic(IDENT, sqlDollar[3].val)) } case 20: sqlDollar = sqlS[sqlpt-1 : sqlpt+1] -//line sql.y:215 +//line sql.y:213 { sqlVAL.expr = atomic(IDENT, "*") } case 21: sqlDollar = sqlS[sqlpt-1 : sqlpt+1] -//line sql.y:216 +//line sql.y:214 { sqlVAL.expr = atomic(IDENT, sqlDollar[1].val) } case 22: sqlDollar = sqlS[sqlpt-1 : sqlpt+1] -//line sql.y:217 +//line sql.y:215 { sqlVAL.expr = sqlDollar[1].expr } case 23: sqlDollar = sqlS[sqlpt-1 : sqlpt+1] -//line sql.y:221 +//line sql.y:219 { sqlVAL.expl = exprlist{sqlDollar[1].expr} } case 24: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:222 +//line sql.y:220 { sqlVAL.expl = append(sqlDollar[1].expl, sqlDollar[3].expr) } case 25: sqlDollar = sqlS[sqlpt-2 : sqlpt+1] -//line sql.y:226 +//line sql.y:224 { sqlVAL.labc = sqlDollar[2].val } case 26: sqlDollar = sqlS[sqlpt-2 : sqlpt+1] -//line sql.y:227 +//line sql.y:225 { sqlVAL.labc = sqlDollar[2].val[1 : len(sqlDollar[2].val)-1] } case 27: sqlDollar = sqlS[sqlpt-1 : sqlpt+1] -//line sql.y:231 +//line sql.y:229 { sqlVAL.tbls = []string{sqlDollar[1].val} } case 28: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:232 +//line sql.y:230 { sqlVAL.tbls = append(sqlDollar[1].tbls, sqlDollar[3].val) } case 29: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:236 +//line sql.y:234 { sqlVAL.atrs = attrs{sqlDollar[1].val: sqlDollar[3].expr} } case 30: sqlDollar = sqlS[sqlpt-1 : sqlpt+1] -//line sql.y:240 +//line sql.y:238 { sqlVAL.atrs = sqlDollar[1].atrs } case 31: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:241 +//line sql.y:239 { sqlVAL.atrs = attrsUnion(sqlDollar[1].atrs, sqlDollar[3].atrs) } case 32: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:245 +//line sql.y:243 { sqlVAL.expr = funcall(sqlDollar[1].val, nil) } case 33: sqlDollar = sqlS[sqlpt-4 : sqlpt+1] -//line sql.y:246 +//line sql.y:244 { sqlVAL.expr = funcall(sqlDollar[1].val, sqlDollar[3].expl) } case 34: sqlDollar = sqlS[sqlpt-1 : sqlpt+1] -//line sql.y:250 +//line sql.y:248 { sqlVAL.expl = exprlist{sqlDollar[1].expr} } case 35: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:251 +//line sql.y:249 { sqlVAL.expl = append(sqlDollar[1].expl, sqlDollar[3].expr) } case 36: sqlDollar = sqlS[sqlpt-2 : sqlpt+1] -//line sql.y:255 +//line sql.y:253 { sqlVAL.expl = nil } case 37: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:256 +//line sql.y:254 { sqlVAL.expl = sqlDollar[2].expl } case 38: sqlDollar = sqlS[sqlpt-1 : sqlpt+1] -//line sql.y:260 +//line sql.y:258 { sqlVAL.expr = atomic(NUMBER, sqlDollar[1].val) } case 39: sqlDollar = sqlS[sqlpt-1 : sqlpt+1] -//line sql.y:261 +//line sql.y:259 { sqlVAL.expr = atomic(IDENT, sqlDollar[1].val) } case 40: sqlDollar = sqlS[sqlpt-1 : sqlpt+1] -//line sql.y:262 +//line sql.y:260 { sqlVAL.expr = atomic(STRING, sqlDollar[1].val) } case 41: sqlDollar = sqlS[sqlpt-1 : sqlpt+1] -//line sql.y:263 +//line sql.y:261 { sqlVAL.expr = variadic('[', "square", sqlDollar[1].expl) } case 42: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:264 +//line sql.y:262 { sqlVAL.expr = unary('(', "paren", sqlDollar[2].expr) } case 43: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:265 +//line sql.y:263 { sqlVAL.expr = unary('"', "quota", atomic(STRING, sqlDollar[2].val)) } case 44: sqlDollar = sqlS[sqlpt-1 : sqlpt+1] -//line sql.y:266 +//line sql.y:264 { sqlVAL.expr = sqlDollar[1].expr } case 45: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:267 +//line sql.y:265 { sqlVAL.expr = binary('+', sqlDollar[1].expr, sqlDollar[2].val, sqlDollar[3].expr) } case 46: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:268 +//line sql.y:266 { sqlVAL.expr = binary('-', sqlDollar[1].expr, sqlDollar[2].val, sqlDollar[3].expr) } case 47: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:269 +//line sql.y:267 { sqlVAL.expr = binary('*', sqlDollar[1].expr, sqlDollar[2].val, sqlDollar[3].expr) } case 48: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:270 +//line sql.y:268 { sqlVAL.expr = binary('/', sqlDollar[1].expr, sqlDollar[2].val, sqlDollar[3].expr) } case 49: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:271 +//line sql.y:269 { sqlVAL.expr = binary('%', sqlDollar[1].expr, sqlDollar[2].val, sqlDollar[3].expr) } case 50: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:272 +//line sql.y:270 { sqlVAL.expr = binary('=', sqlDollar[1].expr, sqlDollar[2].val, sqlDollar[3].expr) } case 51: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:273 +//line sql.y:271 { sqlVAL.expr = binary('<', sqlDollar[1].expr, sqlDollar[2].val, sqlDollar[3].expr) } case 52: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:274 +//line sql.y:272 { sqlVAL.expr = binary('>', sqlDollar[1].expr, sqlDollar[2].val, sqlDollar[3].expr) } case 53: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:275 +//line sql.y:273 { sqlVAL.expr = binary(LE, sqlDollar[1].expr, sqlDollar[2].val, sqlDollar[3].expr) } case 54: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:276 +//line sql.y:274 { sqlVAL.expr = binary(GE, sqlDollar[1].expr, sqlDollar[2].val, sqlDollar[3].expr) } case 55: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:277 +//line sql.y:275 { sqlVAL.expr = binary(AND, sqlDollar[1].expr, sqlDollar[2].val, sqlDollar[3].expr) } case 56: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:278 +//line sql.y:276 { sqlVAL.expr = binary(OR, sqlDollar[1].expr, sqlDollar[2].val, sqlDollar[3].expr) } case 57: sqlDollar = sqlS[sqlpt-2 : sqlpt+1] -//line sql.y:279 +//line sql.y:277 { sqlVAL.expr = unary(NOT, sqlDollar[1].val, sqlDollar[2].expr) } case 58: sqlDollar = sqlS[sqlpt-2 : sqlpt+1] -//line sql.y:280 +//line sql.y:278 { sqlVAL.expr = unary('-', sqlDollar[1].val, sqlDollar[2].expr) } diff --git a/sql/sql.y b/sql/sql.y index 94b20e9ed4..45d98aa768 100644 --- a/sql/sql.y +++ b/sql/sql.y @@ -155,7 +155,6 @@ select_stmt parseResult = &extendedSelect{ extended: true, train: true, - attrs: $2.trainAttrs, standardSelect: $1, trainClause: $2} } @@ -163,7 +162,6 @@ select_stmt parseResult = &extendedSelect{ extended: true, train: false, - attrs: $2.predAttrs, standardSelect: $1, predictClause: $2} } From bb9e0c3be81c220381be2094ec2e38ee1af00d5d Mon Sep 17 00:00:00 2001 From: Yancey1989 Date: Mon, 29 Jul 2019 22:55:42 +0800 Subject: [PATCH 06/15] debug ci --- scripts/image_build.sh | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/scripts/image_build.sh b/scripts/image_build.sh index d6216cbf29..e3c21a2b46 100644 --- a/scripts/image_build.sh +++ b/scripts/image_build.sh @@ -103,8 +103,10 @@ fi # 7. Install odpscmd for submitting alps predict job with odps udf script # TODO(Yancey1989): using gomaxcompute instead of the odpscmd command-line tool. -wget -q http://odps.alibaba-inc.com/official_downloads/odpscmd/0.32.0/odpscmd_public.zip -unzip -qq odpscmd_public.zip -d /usr/local/odpscmd +#wget -q http://odps.alibaba-inc.com/official_downloads/odpscmd/0.32.0/odpscmd_public.zip +#unzip -qq odpscmd_public.zip -d /usr/local/odpscmd +wget http://odps.alibaba-inc.com/official_downloads/odpscmd/0.32.0/odpscmd_public.zip +unzip odpscmd_public.zip -d /usr/local/odpscmd ln -s /usr/local/odpscmd/bin/odpscmd /usr/local/bin/odpscmd # 8. Load sqlflow Jupyter magic command automatically. c.f. https://stackoverflow.com/a/32683001. From 661dc68fe17220a8df30120e21184d9c2b44f080 Mon Sep 17 00:00:00 2001 From: Yancey1989 Date: Mon, 29 Jul 2019 23:08:26 +0800 Subject: [PATCH 07/15] fix odpscmd link --- scripts/image_build.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/scripts/image_build.sh b/scripts/image_build.sh index e3c21a2b46..75592e7fc0 100644 --- a/scripts/image_build.sh +++ b/scripts/image_build.sh @@ -105,9 +105,10 @@ fi # TODO(Yancey1989): using gomaxcompute instead of the odpscmd command-line tool. #wget -q http://odps.alibaba-inc.com/official_downloads/odpscmd/0.32.0/odpscmd_public.zip #unzip -qq odpscmd_public.zip -d /usr/local/odpscmd -wget http://odps.alibaba-inc.com/official_downloads/odpscmd/0.32.0/odpscmd_public.zip +wget http://docs-aliyun.cn-hangzhou.oss.aliyun-inc.com/assets/attach/119096/cn_zh/1557995455961/odpscmd_public.zip unzip odpscmd_public.zip -d /usr/local/odpscmd ln -s /usr/local/odpscmd/bin/odpscmd /usr/local/bin/odpscmd +rm -rf odpscmd_public.zip # 8. Load sqlflow Jupyter magic command automatically. c.f. https://stackoverflow.com/a/32683001. mkdir -p $IPYTHON_STARTUP From cd001e8775d1ec51ac997149e5bcd730e6754262 Mon Sep 17 00:00:00 2001 From: Yancey1989 Date: Mon, 29 Jul 2019 23:24:58 +0800 Subject: [PATCH 08/15] fix ci --- scripts/image_build.sh | 6 +- sql/expression_resolver_test.go | 4 +- sql/parser.go | 121 ++++++++++++++++---------------- sql/sql.y | 1 - 4 files changed, 64 insertions(+), 68 deletions(-) diff --git a/scripts/image_build.sh b/scripts/image_build.sh index 75592e7fc0..11c5e48932 100644 --- a/scripts/image_build.sh +++ b/scripts/image_build.sh @@ -103,10 +103,8 @@ fi # 7. Install odpscmd for submitting alps predict job with odps udf script # TODO(Yancey1989): using gomaxcompute instead of the odpscmd command-line tool. -#wget -q http://odps.alibaba-inc.com/official_downloads/odpscmd/0.32.0/odpscmd_public.zip -#unzip -qq odpscmd_public.zip -d /usr/local/odpscmd -wget http://docs-aliyun.cn-hangzhou.oss.aliyun-inc.com/assets/attach/119096/cn_zh/1557995455961/odpscmd_public.zip -unzip odpscmd_public.zip -d /usr/local/odpscmd +wget -q http://docs-aliyun.cn-hangzhou.oss.aliyun-inc.com/assets/attach/119096/cn_zh/1557995455961/odpscmd_public.zip +unzip -qq odpscmd_public.zip -d /usr/local/odpscmd ln -s /usr/local/odpscmd/bin/odpscmd /usr/local/bin/odpscmd rm -rf odpscmd_public.zip diff --git a/sql/expression_resolver_test.go b/sql/expression_resolver_test.go index e2849c9e25..e57aac3e6b 100644 --- a/sql/expression_resolver_test.go +++ b/sql/expression_resolver_test.go @@ -262,7 +262,7 @@ func TestAttrs(t *testing.T) { s := statementWithAttrs("estimator.hidden_units = [10, 20]") r, e := parser.Parse(s) a.NoError(e) - attrs, err := resolveTrainAttribute(&r.attrs) + attrs, err := resolveTrainAttribute(&r.trainAttrs) a.NoError(err) attr := attrs["estimator.hidden_units"] a.Equal("estimator", attr.Prefix) @@ -272,7 +272,7 @@ func TestAttrs(t *testing.T) { s = statementWithAttrs("dataset.name = hello") r, e = parser.Parse(s) a.NoError(e) - attrs, err = resolveTrainAttribute(&r.attrs) + attrs, err = resolveTrainAttribute(&r.trainAttrs) a.NoError(err) attr = attrs["dataset.name"] a.Equal("dataset", attr.Prefix) diff --git a/sql/parser.go b/sql/parser.go index 9253b9b16e..dfa99bba6d 100644 --- a/sql/parser.go +++ b/sql/parser.go @@ -77,7 +77,6 @@ func variadic(typ int, op string, ods exprlist) *expr { type extendedSelect struct { extended bool train bool - attrs attrs standardSelect trainClause predictClause @@ -122,7 +121,7 @@ func attrsUnion(as1, as2 attrs) attrs { return as1 } -//line sql.y:107 +//line sql.y:106 type sqlSymType struct { yys int val string /* NUMBER, IDENT, STRING, and keywords */ @@ -212,7 +211,7 @@ const sqlEofCode = 1 const sqlErrCode = 2 const sqlInitialStackSize = 16 -//line sql.y:281 +//line sql.y:280 /* Like Lisp's builtin function cdr. */ func (e *expr) cdr() (r []string) { @@ -780,7 +779,7 @@ sqldefault: case 1: sqlDollar = sqlS[sqlpt-2 : sqlpt+1] -//line sql.y:149 +//line sql.y:148 { parseResult = &extendedSelect{ extended: false, @@ -788,7 +787,7 @@ sqldefault: } case 2: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:154 +//line sql.y:153 { parseResult = &extendedSelect{ extended: true, @@ -798,7 +797,7 @@ sqldefault: } case 3: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:161 +//line sql.y:160 { parseResult = &extendedSelect{ extended: true, @@ -808,31 +807,31 @@ sqldefault: } case 4: sqlDollar = sqlS[sqlpt-2 : sqlpt+1] -//line sql.y:171 +//line sql.y:170 { sqlVAL.slct.fields = sqlDollar[2].expl } case 5: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:172 +//line sql.y:171 { sqlVAL.slct.tables = sqlDollar[3].tbls } case 6: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:173 +//line sql.y:172 { sqlVAL.slct.limit = sqlDollar[3].val } case 7: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:174 +//line sql.y:173 { sqlVAL.slct.where = sqlDollar[3].expr } case 8: sqlDollar = sqlS[sqlpt-8 : sqlpt+1] -//line sql.y:178 +//line sql.y:177 { sqlVAL.tran.estimator = sqlDollar[2].val sqlVAL.tran.trainAttrs = sqlDollar[4].atrs @@ -842,301 +841,301 @@ sqldefault: } case 9: sqlDollar = sqlS[sqlpt-2 : sqlpt+1] -//line sql.y:188 +//line sql.y:187 { sqlVAL.infr.into = sqlDollar[2].val } case 10: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:189 +//line sql.y:188 { sqlVAL.infr.predAttrs = sqlDollar[3].atrs } case 11: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:190 +//line sql.y:189 { sqlVAL.infr.model = sqlDollar[3].val } case 12: sqlDollar = sqlS[sqlpt-2 : sqlpt+1] -//line sql.y:194 +//line sql.y:193 { sqlVAL.colc = map[string]exprlist{"feature_columns": sqlDollar[2].expl} } case 13: sqlDollar = sqlS[sqlpt-4 : sqlpt+1] -//line sql.y:195 +//line sql.y:194 { sqlVAL.colc = map[string]exprlist{sqlDollar[4].val: sqlDollar[2].expl} } case 14: sqlDollar = sqlS[sqlpt-5 : sqlpt+1] -//line sql.y:196 +//line sql.y:195 { sqlVAL.colc[sqlDollar[5].val] = sqlDollar[3].expl } case 15: sqlDollar = sqlS[sqlpt-5 : sqlpt+1] -//line sql.y:200 +//line sql.y:199 { sqlVAL.expl = exprlist{sqlDollar[1].expr, atomic(IDENT, "AS"), funcall("", sqlDollar[4].expl)} } case 16: sqlDollar = sqlS[sqlpt-1 : sqlpt+1] -//line sql.y:203 +//line sql.y:202 { sqlVAL.expl = sqlDollar[1].flds } case 17: sqlDollar = sqlS[sqlpt-1 : sqlpt+1] -//line sql.y:207 +//line sql.y:206 { sqlVAL.flds = append(sqlVAL.flds, atomic(IDENT, "*")) } case 18: sqlDollar = sqlS[sqlpt-1 : sqlpt+1] -//line sql.y:208 +//line sql.y:207 { sqlVAL.flds = append(sqlVAL.flds, atomic(IDENT, sqlDollar[1].val)) } case 19: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:209 +//line sql.y:208 { sqlVAL.flds = append(sqlDollar[1].flds, atomic(IDENT, sqlDollar[3].val)) } case 20: sqlDollar = sqlS[sqlpt-1 : sqlpt+1] -//line sql.y:213 +//line sql.y:212 { sqlVAL.expr = atomic(IDENT, "*") } case 21: sqlDollar = sqlS[sqlpt-1 : sqlpt+1] -//line sql.y:214 +//line sql.y:213 { sqlVAL.expr = atomic(IDENT, sqlDollar[1].val) } case 22: sqlDollar = sqlS[sqlpt-1 : sqlpt+1] -//line sql.y:215 +//line sql.y:214 { sqlVAL.expr = sqlDollar[1].expr } case 23: sqlDollar = sqlS[sqlpt-1 : sqlpt+1] -//line sql.y:219 +//line sql.y:218 { sqlVAL.expl = exprlist{sqlDollar[1].expr} } case 24: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:220 +//line sql.y:219 { sqlVAL.expl = append(sqlDollar[1].expl, sqlDollar[3].expr) } case 25: sqlDollar = sqlS[sqlpt-2 : sqlpt+1] -//line sql.y:224 +//line sql.y:223 { sqlVAL.labc = sqlDollar[2].val } case 26: sqlDollar = sqlS[sqlpt-2 : sqlpt+1] -//line sql.y:225 +//line sql.y:224 { sqlVAL.labc = sqlDollar[2].val[1 : len(sqlDollar[2].val)-1] } case 27: sqlDollar = sqlS[sqlpt-1 : sqlpt+1] -//line sql.y:229 +//line sql.y:228 { sqlVAL.tbls = []string{sqlDollar[1].val} } case 28: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:230 +//line sql.y:229 { sqlVAL.tbls = append(sqlDollar[1].tbls, sqlDollar[3].val) } case 29: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:234 +//line sql.y:233 { sqlVAL.atrs = attrs{sqlDollar[1].val: sqlDollar[3].expr} } case 30: sqlDollar = sqlS[sqlpt-1 : sqlpt+1] -//line sql.y:238 +//line sql.y:237 { sqlVAL.atrs = sqlDollar[1].atrs } case 31: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:239 +//line sql.y:238 { sqlVAL.atrs = attrsUnion(sqlDollar[1].atrs, sqlDollar[3].atrs) } case 32: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:243 +//line sql.y:242 { sqlVAL.expr = funcall(sqlDollar[1].val, nil) } case 33: sqlDollar = sqlS[sqlpt-4 : sqlpt+1] -//line sql.y:244 +//line sql.y:243 { sqlVAL.expr = funcall(sqlDollar[1].val, sqlDollar[3].expl) } case 34: sqlDollar = sqlS[sqlpt-1 : sqlpt+1] -//line sql.y:248 +//line sql.y:247 { sqlVAL.expl = exprlist{sqlDollar[1].expr} } case 35: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:249 +//line sql.y:248 { sqlVAL.expl = append(sqlDollar[1].expl, sqlDollar[3].expr) } case 36: sqlDollar = sqlS[sqlpt-2 : sqlpt+1] -//line sql.y:253 +//line sql.y:252 { sqlVAL.expl = nil } case 37: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:254 +//line sql.y:253 { sqlVAL.expl = sqlDollar[2].expl } case 38: sqlDollar = sqlS[sqlpt-1 : sqlpt+1] -//line sql.y:258 +//line sql.y:257 { sqlVAL.expr = atomic(NUMBER, sqlDollar[1].val) } case 39: sqlDollar = sqlS[sqlpt-1 : sqlpt+1] -//line sql.y:259 +//line sql.y:258 { sqlVAL.expr = atomic(IDENT, sqlDollar[1].val) } case 40: sqlDollar = sqlS[sqlpt-1 : sqlpt+1] -//line sql.y:260 +//line sql.y:259 { sqlVAL.expr = atomic(STRING, sqlDollar[1].val) } case 41: sqlDollar = sqlS[sqlpt-1 : sqlpt+1] -//line sql.y:261 +//line sql.y:260 { sqlVAL.expr = variadic('[', "square", sqlDollar[1].expl) } case 42: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:262 +//line sql.y:261 { sqlVAL.expr = unary('(', "paren", sqlDollar[2].expr) } case 43: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:263 +//line sql.y:262 { sqlVAL.expr = unary('"', "quota", atomic(STRING, sqlDollar[2].val)) } case 44: sqlDollar = sqlS[sqlpt-1 : sqlpt+1] -//line sql.y:264 +//line sql.y:263 { sqlVAL.expr = sqlDollar[1].expr } case 45: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:265 +//line sql.y:264 { sqlVAL.expr = binary('+', sqlDollar[1].expr, sqlDollar[2].val, sqlDollar[3].expr) } case 46: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:266 +//line sql.y:265 { sqlVAL.expr = binary('-', sqlDollar[1].expr, sqlDollar[2].val, sqlDollar[3].expr) } case 47: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:267 +//line sql.y:266 { sqlVAL.expr = binary('*', sqlDollar[1].expr, sqlDollar[2].val, sqlDollar[3].expr) } case 48: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:268 +//line sql.y:267 { sqlVAL.expr = binary('/', sqlDollar[1].expr, sqlDollar[2].val, sqlDollar[3].expr) } case 49: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:269 +//line sql.y:268 { sqlVAL.expr = binary('%', sqlDollar[1].expr, sqlDollar[2].val, sqlDollar[3].expr) } case 50: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:270 +//line sql.y:269 { sqlVAL.expr = binary('=', sqlDollar[1].expr, sqlDollar[2].val, sqlDollar[3].expr) } case 51: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:271 +//line sql.y:270 { sqlVAL.expr = binary('<', sqlDollar[1].expr, sqlDollar[2].val, sqlDollar[3].expr) } case 52: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:272 +//line sql.y:271 { sqlVAL.expr = binary('>', sqlDollar[1].expr, sqlDollar[2].val, sqlDollar[3].expr) } case 53: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:273 +//line sql.y:272 { sqlVAL.expr = binary(LE, sqlDollar[1].expr, sqlDollar[2].val, sqlDollar[3].expr) } case 54: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:274 +//line sql.y:273 { sqlVAL.expr = binary(GE, sqlDollar[1].expr, sqlDollar[2].val, sqlDollar[3].expr) } case 55: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:275 +//line sql.y:274 { sqlVAL.expr = binary(AND, sqlDollar[1].expr, sqlDollar[2].val, sqlDollar[3].expr) } case 56: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:276 +//line sql.y:275 { sqlVAL.expr = binary(OR, sqlDollar[1].expr, sqlDollar[2].val, sqlDollar[3].expr) } case 57: sqlDollar = sqlS[sqlpt-2 : sqlpt+1] -//line sql.y:277 +//line sql.y:276 { sqlVAL.expr = unary(NOT, sqlDollar[1].val, sqlDollar[2].expr) } case 58: sqlDollar = sqlS[sqlpt-2 : sqlpt+1] -//line sql.y:278 +//line sql.y:277 { sqlVAL.expr = unary('-', sqlDollar[1].val, sqlDollar[2].expr) } diff --git a/sql/sql.y b/sql/sql.y index 45d98aa768..b1b9982cbd 100644 --- a/sql/sql.y +++ b/sql/sql.y @@ -58,7 +58,6 @@ type extendedSelect struct { extended bool train bool - attrs attrs standardSelect trainClause predictClause From 46ff06637a1162c3eea12a96a8ef2a3f379846fe Mon Sep 17 00:00:00 2001 From: Yancey1989 Date: Mon, 29 Jul 2019 23:41:38 +0800 Subject: [PATCH 09/15] fix ci --- sql/codegen_alps.go | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/sql/codegen_alps.go b/sql/codegen_alps.go index b80e65ff49..f632c6156b 100644 --- a/sql/codegen_alps.go +++ b/sql/codegen_alps.go @@ -263,6 +263,15 @@ func newALPSTrainFiller(pr *extendedSelect, db *DB, session *pb.Session) (*alpsF } func newALPSPredictFiller(pr *extendedSelect, session *pb.Session) (*alpsFiller, error) { + var ossID, ossKey *expr + var ok bool + if ossID, ok = pr.predAttrs["OSS_ID"]; !ok { + return nil, fmt.Errorf("the ALPS Predict job should specify OSS_ID") + } + if ossKey, ok = pr.predAttrs["OSS_KEY"]; !ok { + return nil, fmt.Errorf("the ALPS Predict job should specify OSS_KEY") + } + return &alpsFiller{ IsTraining: true, PredictInputTable: pr.tables[0], @@ -270,8 +279,8 @@ func newALPSPredictFiller(pr *extendedSelect, session *pb.Session) (*alpsFiller, PredictUDF: strings.Join(pr.fields.Strings(), " "), PredictInputModel: pr.predictClause.model, UserID: session.UserId, - OSSID: pr.attrs["OSS_ID"].String(), - OSSKey: pr.attrs["OSS_KEY"].String(), + OSSID: ossID.String(), + OSSKey: ossKey.String(), }, nil } From f54b04bfb161cd9a99b6ca5308ba841edd65056e Mon Sep 17 00:00:00 2001 From: Yancey1989 Date: Mon, 29 Jul 2019 23:56:41 +0800 Subject: [PATCH 10/15] update --- sql/expression_resolver_test.go | 2 +- sql/parser_test.go | 8 ++++---- sql/python/sqlflow_submitter/db.py | 4 ++++ 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/sql/expression_resolver_test.go b/sql/expression_resolver_test.go index e57aac3e6b..99f1825c73 100644 --- a/sql/expression_resolver_test.go +++ b/sql/expression_resolver_test.go @@ -286,7 +286,7 @@ func TestExecResource(t *testing.T) { s := statementWithAttrs("exec.worker_num = 2") r, e := parser.Parse(s) a.NoError(e) - attrs, err := resolveTrainAttribute(&r.attrs) + attrs, err := resolveTrainAttribute(&r.trainAttrs) a.NoError(err) attr := attrs["exec.worker_num"] fmt.Println(attr) diff --git a/sql/parser_test.go b/sql/parser_test.go index 3a01a6ef83..aa12b854bb 100644 --- a/sql/parser_test.go +++ b/sql/parser_test.go @@ -81,8 +81,8 @@ func TestTrainParser(t *testing.T) { a.True(r.extended) a.True(r.train) a.Equal("DNNClassifier", r.estimator) - a.Equal("[10, 20]", r.attrs["hidden_units"].String()) - a.Equal("3", r.attrs["n_classes"].String()) + a.Equal("[10, 20]", r.trainAttrs["hidden_units"].String()) + a.Equal("3", r.trainAttrs["n_classes"].String()) a.Equal(`employee.name`, r.columns["feature_columns"][0].String()) a.Equal(`bucketize(last_name, 1000)`, @@ -101,8 +101,8 @@ func TestMultiColumnTrainParser(t *testing.T) { a.True(r.extended) a.True(r.train) a.Equal("DNNClassifier", r.estimator) - a.Equal("[10, 20]", r.attrs["hidden_units"].String()) - a.Equal("3", r.attrs["n_classes"].String()) + a.Equal("[10, 20]", r.trainAttrs["hidden_units"].String()) + a.Equal("3", r.trainAttrs["n_classes"].String()) a.Equal(`employee.name`, r.columns["feature_columns"][0].String()) a.Equal(`bucketize(last_name, 1000)`, diff --git a/sql/python/sqlflow_submitter/db.py b/sql/python/sqlflow_submitter/db.py index 8376eeab01..fd5554c9bd 100644 --- a/sql/python/sqlflow_submitter/db.py +++ b/sql/python/sqlflow_submitter/db.py @@ -42,6 +42,7 @@ def connect(driver, database, user, password, host, port): def db_generator(driver, conn, statement, feature_column_names, label_column_name, feature_specs, fetch_size=128): + print("-----") def reader(): cursor = conn.cursor() cursor.execute(statement) @@ -54,10 +55,12 @@ def reader(): label_idx = field_names.index(label_column_name) rows = cursor.fetchmany(fetch_size) + print("rows.len", len(rows)) while len(rows) > 0: # NOTE: keep the connection while training or connection will lost if no activities appear. if driver == "mysql" and not conn.is_connected(): conn.ping(True) + print("rows.len", len(rows)) for row in rows: label = row[label_idx] features = [] @@ -83,6 +86,7 @@ def reader(): features.append(cell) yield (tuple(features), [label]) rows = cursor.fetchmany(fetch_size) + print("rows.len", len(rows)) cursor.close() if driver == "maxcompute": From 742c2dd65de758c47186000062ceb315dd7569a9 Mon Sep 17 00:00:00 2001 From: Yancey1989 Date: Tue, 30 Jul 2019 00:28:11 +0800 Subject: [PATCH 11/15] polish code --- sql/codegen_alps.go | 1 - sql/python/sqlflow_submitter/db.py | 4 ---- 2 files changed, 5 deletions(-) diff --git a/sql/codegen_alps.go b/sql/codegen_alps.go index f632c6156b..5634312175 100644 --- a/sql/codegen_alps.go +++ b/sql/codegen_alps.go @@ -346,7 +346,6 @@ func alpsPred(w *PipeWriter, pr *extendedSelect, db *DB, cwd string, session *pb f.WriteString(program.String()) f.Close() cw := &logChanWriter{wr: w} - fmt.Println(program.String()) _, ok := db.Driver().(*gomaxcompute.Driver) if !ok { return fmt.Errorf("Alps Predict Job only supports Maxcompute database driver") diff --git a/sql/python/sqlflow_submitter/db.py b/sql/python/sqlflow_submitter/db.py index fd5554c9bd..8376eeab01 100644 --- a/sql/python/sqlflow_submitter/db.py +++ b/sql/python/sqlflow_submitter/db.py @@ -42,7 +42,6 @@ def connect(driver, database, user, password, host, port): def db_generator(driver, conn, statement, feature_column_names, label_column_name, feature_specs, fetch_size=128): - print("-----") def reader(): cursor = conn.cursor() cursor.execute(statement) @@ -55,12 +54,10 @@ def reader(): label_idx = field_names.index(label_column_name) rows = cursor.fetchmany(fetch_size) - print("rows.len", len(rows)) while len(rows) > 0: # NOTE: keep the connection while training or connection will lost if no activities appear. if driver == "mysql" and not conn.is_connected(): conn.ping(True) - print("rows.len", len(rows)) for row in rows: label = row[label_idx] features = [] @@ -86,7 +83,6 @@ def reader(): features.append(cell) yield (tuple(features), [label]) rows = cursor.fetchmany(fetch_size) - print("rows.len", len(rows)) cursor.close() if driver == "maxcompute": From 99ef0c4dbd59c437a7fed6e1a37b33587d929c7d Mon Sep 17 00:00:00 2001 From: Yancey1989 Date: Tue, 30 Jul 2019 19:38:34 +0800 Subject: [PATCH 12/15] update by comment --- sql/codegen_alps.go | 4 +- sql/parser.go | 305 +++++++++++++++++++++---------------------- sql/parser_test.go | 26 ++-- sql/sql.y | 5 +- sql/template_alps.go | 2 +- 5 files changed, 173 insertions(+), 169 deletions(-) diff --git a/sql/codegen_alps.go b/sql/codegen_alps.go index 5634312175..ea3dc2d080 100644 --- a/sql/codegen_alps.go +++ b/sql/codegen_alps.go @@ -43,7 +43,6 @@ type alpsFiller struct { ModelDir string ScratchDir string PredictOutputTable string - PredictInputModel string // Schema & Decode info Fields string @@ -271,13 +270,14 @@ func newALPSPredictFiller(pr *extendedSelect, session *pb.Session) (*alpsFiller, if ossKey, ok = pr.predAttrs["OSS_KEY"]; !ok { return nil, fmt.Errorf("the ALPS Predict job should specify OSS_KEY") } + modelDir := fmt.Sprintf("oss://arks-model/%s/%s.tar.gz", session.UserId, pr.predictClause.model) return &alpsFiller{ IsTraining: true, PredictInputTable: pr.tables[0], PredictOutputTable: pr.predictClause.into, PredictUDF: strings.Join(pr.fields.Strings(), " "), - PredictInputModel: pr.predictClause.model, + ModelDir: modelDir, UserID: session.UserId, OSSID: ossID.String(), OSSKey: ossKey.String(), diff --git a/sql/parser.go b/sql/parser.go index dfa99bba6d..66efa90aa9 100644 --- a/sql/parser.go +++ b/sql/parser.go @@ -211,7 +211,7 @@ const sqlEofCode = 1 const sqlErrCode = 2 const sqlInitialStackSize = 16 -//line sql.y:280 +//line sql.y:279 /* Like Lisp's builtin function cdr. */ func (e *expr) cdr() (r []string) { @@ -327,95 +327,95 @@ var sqlExca = [...]int{ const sqlPrivate = 57344 -const sqlLast = 167 +const sqlLast = 169 var sqlAct = [...]int{ - 30, 101, 60, 100, 13, 63, 62, 84, 83, 85, - 112, 87, 23, 96, 84, 41, 93, 59, 38, 52, - 53, 49, 48, 47, 51, 50, 42, 43, 44, 45, - 46, 54, 39, 110, 56, 57, 82, 7, 9, 8, - 10, 11, 109, 87, 65, 70, 71, 72, 73, 74, - 75, 76, 77, 78, 79, 80, 81, 68, 25, 24, - 26, 90, 84, 19, 17, 110, 88, 4, 86, 32, - 55, 22, 103, 31, 44, 45, 46, 28, 67, 107, - 33, 108, 29, 89, 102, 18, 15, 91, 115, 113, - 111, 95, 64, 94, 104, 25, 24, 26, 16, 104, - 36, 69, 106, 66, 40, 34, 32, 35, 21, 37, - 31, 104, 114, 105, 28, 98, 99, 33, 58, 29, - 25, 24, 26, 42, 43, 44, 45, 46, 61, 12, - 3, 32, 27, 20, 14, 31, 6, 97, 92, 28, - 5, 2, 33, 1, 29, 52, 53, 49, 48, 47, - 51, 50, 42, 43, 44, 45, 46, 49, 48, 47, - 51, 50, 42, 43, 44, 45, 46, + 29, 101, 57, 100, 13, 83, 82, 80, 79, 81, + 95, 80, 22, 92, 114, 111, 56, 38, 49, 50, + 46, 45, 44, 48, 47, 39, 40, 41, 42, 43, + 51, 88, 80, 53, 54, 78, 18, 112, 112, 94, + 91, 36, 66, 67, 68, 69, 70, 71, 72, 73, + 74, 75, 76, 77, 64, 24, 23, 25, 7, 9, + 8, 10, 11, 37, 91, 61, 31, 86, 17, 93, + 30, 41, 42, 43, 27, 63, 52, 32, 87, 28, + 24, 23, 25, 89, 109, 117, 110, 21, 4, 115, + 113, 31, 106, 104, 84, 30, 105, 99, 104, 27, + 85, 108, 32, 55, 28, 24, 23, 25, 39, 40, + 41, 42, 43, 104, 116, 65, 31, 62, 34, 33, + 30, 20, 35, 107, 27, 60, 58, 32, 59, 28, + 49, 50, 46, 45, 44, 48, 47, 39, 40, 41, + 42, 43, 46, 45, 44, 48, 47, 39, 40, 41, + 42, 43, 103, 15, 97, 98, 3, 12, 26, 19, + 14, 6, 96, 90, 102, 16, 5, 2, 1, } var sqlPact = [...]int{ - 126, -1000, 32, 69, -1000, 29, 50, 91, 53, 103, - 88, 90, -1000, 93, -20, -4, -1000, -1000, -1000, 87, - -23, -1000, -1000, 125, -1000, -4, -1000, -1000, 103, 51, - -1000, 103, 103, 78, 118, -1000, 75, 8, 86, 41, - -1000, 84, 103, 103, 103, 103, 103, 103, 103, 103, - 103, 103, 103, 103, -1, -33, -1000, -1000, -1000, -31, - 125, 75, -27, -1000, 42, 103, -1000, -1000, 24, -1000, - 45, 45, -1000, -1000, -1000, 96, 96, 96, 96, 96, - 135, 135, -1000, -1000, 103, -1000, 5, 75, 103, -24, - -1000, 125, 104, 55, -1000, 125, -1000, 99, 55, 62, - 27, -1000, -1000, -4, -1000, 73, -5, -1000, -1000, 72, - 55, -1000, 71, -1000, -1000, -1000, + 152, -1000, 53, 136, -1000, 33, 1, 104, 69, 88, + 102, 101, -1000, 106, 3, 27, -1000, -1000, -1000, -21, + -1000, -1000, 110, -1000, 27, -1000, -1000, 88, 57, -1000, + 88, 88, 63, 116, 115, 29, 100, 38, 98, 88, + 88, 88, 88, 88, 88, 88, 88, 88, 88, 88, + 88, -2, -33, -1000, -1000, -1000, -31, 110, 77, 83, + 77, 88, -1000, -1000, -6, -1000, 42, 42, -1000, -1000, + -1000, 81, 81, 81, 81, 81, 120, 120, -1000, -1000, + 88, -1000, 2, -1000, 45, -1000, 26, -27, -1000, 110, + 143, 77, 135, 88, 75, -1000, 109, 135, 67, -1000, + 0, -1000, -1000, 27, -1000, 110, -1000, 73, -1, -1000, + -1000, 72, 135, -1000, 68, -1000, -1000, -1000, } var sqlPgo = [...]int{ - 0, 143, 141, 140, 138, 137, 136, 134, 133, 2, - 0, 1, 17, 132, 3, 129, 5, 6, + 0, 168, 167, 166, 163, 162, 161, 160, 159, 2, + 0, 1, 16, 158, 3, 157, 5, 6, } var sqlR1 = [...]int{ 0, 1, 1, 1, 2, 2, 2, 2, 3, 6, - 6, 6, 4, 4, 4, 15, 15, 7, 7, 7, - 11, 11, 11, 14, 14, 5, 5, 8, 8, 16, - 17, 17, 10, 10, 12, 12, 13, 13, 9, 9, + 6, 4, 4, 4, 15, 15, 7, 7, 7, 11, + 11, 11, 14, 14, 5, 5, 8, 8, 16, 17, + 17, 10, 10, 12, 12, 13, 13, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, - 9, 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, } var sqlR2 = [...]int{ - 0, 2, 3, 3, 2, 3, 3, 3, 8, 2, - 3, 3, 2, 4, 5, 5, 1, 1, 1, 3, - 1, 1, 1, 1, 3, 2, 2, 1, 3, 3, - 1, 3, 3, 4, 1, 3, 2, 3, 1, 1, - 1, 1, 3, 3, 1, 3, 3, 3, 3, 3, - 3, 3, 3, 3, 3, 3, 3, 2, 2, + 0, 2, 3, 3, 2, 3, 3, 3, 8, 4, + 6, 2, 4, 5, 5, 1, 1, 1, 3, 1, + 1, 1, 1, 3, 2, 2, 1, 3, 3, 1, + 3, 3, 4, 1, 3, 2, 3, 1, 1, 1, + 1, 3, 3, 1, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 2, 2, } var sqlChk = [...]int{ -1000, -1, -2, 4, 35, -3, -6, 5, 7, 6, - 8, 9, -15, -10, -7, 17, 29, 35, 35, 13, - -8, 17, 18, -9, 18, 17, 19, -13, 36, 41, - -10, 32, 28, 39, 17, 17, 10, 16, 38, 36, - 17, 38, 27, 28, 29, 30, 31, 24, 23, 22, - 26, 25, 20, 21, -9, 19, -9, -9, 40, -12, - -9, 10, -17, -16, 17, 36, 17, 37, -12, 17, - -9, -9, -9, -9, -9, -9, -9, -9, -9, -9, - -9, -9, 37, 41, 38, 40, -17, 38, 24, -12, - 37, -9, -4, 11, -16, -9, 37, -5, 11, 12, - -14, -11, 29, 17, -10, 14, -14, 17, 19, 15, - 38, 17, 15, 17, -11, 17, + 8, 9, -15, -10, -7, 17, 29, 35, 35, -8, + 17, 18, -9, 18, 17, 19, -13, 36, 41, -10, + 32, 28, 39, 17, 17, 16, 38, 36, 38, 27, + 28, 29, 30, 31, 24, 23, 22, 26, 25, 20, + 21, -9, 19, -9, -9, 40, -12, -9, 10, 13, + 10, 36, 17, 37, -12, 17, -9, -9, -9, -9, + -9, -9, -9, -9, -9, -9, -9, -9, 37, 41, + 38, 40, -17, -16, 17, 17, -17, -12, 37, -9, + -4, 38, 11, 24, 13, 37, -5, 11, 12, -16, + -14, -11, 29, 17, -10, -9, 17, 14, -14, 17, + 19, 15, 38, 17, 15, 17, -11, 17, } var sqlDef = [...]int{ 0, -2, 0, 0, 1, 0, 0, 0, 0, 0, - 0, 0, 4, 0, 16, 18, 17, 2, 3, 0, - 5, 27, 6, 7, 38, 39, 40, 41, 0, 0, - 44, 0, 0, 0, 0, 9, 0, 0, 0, 0, - 11, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 57, 58, 36, 0, - 34, 0, 10, 30, 0, 0, 19, 32, 0, 28, - 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, - 55, 56, 42, 43, 0, 37, 0, 0, 0, 0, - 33, 35, 0, 0, 31, 29, 15, 0, 0, 0, - 12, 23, 20, 21, 22, 0, 0, 25, 26, 0, - 0, 8, 0, 13, 24, 14, + 0, 0, 4, 0, 15, 17, 16, 2, 3, 5, + 26, 6, 7, 37, 38, 39, 40, 0, 0, 43, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 56, 57, 35, 0, 33, 0, 0, + 0, 0, 18, 31, 0, 27, 44, 45, 46, 47, + 48, 49, 50, 51, 52, 53, 54, 55, 41, 42, + 0, 36, 0, 29, 0, 9, 0, 0, 32, 34, + 0, 0, 0, 0, 0, 14, 0, 0, 0, 30, + 11, 22, 19, 20, 21, 28, 10, 0, 0, 24, + 25, 0, 0, 8, 0, 12, 23, 13, } var sqlTok1 = [...]int{ @@ -840,302 +840,299 @@ sqldefault: sqlVAL.tran.save = sqlDollar[8].val } case 9: - sqlDollar = sqlS[sqlpt-2 : sqlpt+1] + sqlDollar = sqlS[sqlpt-4 : sqlpt+1] //line sql.y:187 { sqlVAL.infr.into = sqlDollar[2].val + sqlVAL.infr.model = sqlDollar[4].val } case 10: - sqlDollar = sqlS[sqlpt-3 : sqlpt+1] + sqlDollar = sqlS[sqlpt-6 : sqlpt+1] //line sql.y:188 { - sqlVAL.infr.predAttrs = sqlDollar[3].atrs + sqlVAL.infr.into = sqlDollar[2].val + sqlVAL.infr.predAttrs = sqlDollar[4].atrs + sqlVAL.infr.model = sqlDollar[6].val } case 11: - sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:189 - { - sqlVAL.infr.model = sqlDollar[3].val - } - case 12: sqlDollar = sqlS[sqlpt-2 : sqlpt+1] -//line sql.y:193 +//line sql.y:192 { sqlVAL.colc = map[string]exprlist{"feature_columns": sqlDollar[2].expl} } - case 13: + case 12: sqlDollar = sqlS[sqlpt-4 : sqlpt+1] -//line sql.y:194 +//line sql.y:193 { sqlVAL.colc = map[string]exprlist{sqlDollar[4].val: sqlDollar[2].expl} } - case 14: + case 13: sqlDollar = sqlS[sqlpt-5 : sqlpt+1] -//line sql.y:195 +//line sql.y:194 { sqlVAL.colc[sqlDollar[5].val] = sqlDollar[3].expl } - case 15: + case 14: sqlDollar = sqlS[sqlpt-5 : sqlpt+1] -//line sql.y:199 +//line sql.y:198 { sqlVAL.expl = exprlist{sqlDollar[1].expr, atomic(IDENT, "AS"), funcall("", sqlDollar[4].expl)} } - case 16: + case 15: sqlDollar = sqlS[sqlpt-1 : sqlpt+1] -//line sql.y:202 +//line sql.y:201 { sqlVAL.expl = sqlDollar[1].flds } - case 17: + case 16: sqlDollar = sqlS[sqlpt-1 : sqlpt+1] -//line sql.y:206 +//line sql.y:205 { sqlVAL.flds = append(sqlVAL.flds, atomic(IDENT, "*")) } - case 18: + case 17: sqlDollar = sqlS[sqlpt-1 : sqlpt+1] -//line sql.y:207 +//line sql.y:206 { sqlVAL.flds = append(sqlVAL.flds, atomic(IDENT, sqlDollar[1].val)) } - case 19: + case 18: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:208 +//line sql.y:207 { sqlVAL.flds = append(sqlDollar[1].flds, atomic(IDENT, sqlDollar[3].val)) } - case 20: + case 19: sqlDollar = sqlS[sqlpt-1 : sqlpt+1] -//line sql.y:212 +//line sql.y:211 { sqlVAL.expr = atomic(IDENT, "*") } - case 21: + case 20: sqlDollar = sqlS[sqlpt-1 : sqlpt+1] -//line sql.y:213 +//line sql.y:212 { sqlVAL.expr = atomic(IDENT, sqlDollar[1].val) } - case 22: + case 21: sqlDollar = sqlS[sqlpt-1 : sqlpt+1] -//line sql.y:214 +//line sql.y:213 { sqlVAL.expr = sqlDollar[1].expr } - case 23: + case 22: sqlDollar = sqlS[sqlpt-1 : sqlpt+1] -//line sql.y:218 +//line sql.y:217 { sqlVAL.expl = exprlist{sqlDollar[1].expr} } - case 24: + case 23: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:219 +//line sql.y:218 { sqlVAL.expl = append(sqlDollar[1].expl, sqlDollar[3].expr) } - case 25: + case 24: sqlDollar = sqlS[sqlpt-2 : sqlpt+1] -//line sql.y:223 +//line sql.y:222 { sqlVAL.labc = sqlDollar[2].val } - case 26: + case 25: sqlDollar = sqlS[sqlpt-2 : sqlpt+1] -//line sql.y:224 +//line sql.y:223 { sqlVAL.labc = sqlDollar[2].val[1 : len(sqlDollar[2].val)-1] } - case 27: + case 26: sqlDollar = sqlS[sqlpt-1 : sqlpt+1] -//line sql.y:228 +//line sql.y:227 { sqlVAL.tbls = []string{sqlDollar[1].val} } - case 28: + case 27: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:229 +//line sql.y:228 { sqlVAL.tbls = append(sqlDollar[1].tbls, sqlDollar[3].val) } - case 29: + case 28: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:233 +//line sql.y:232 { sqlVAL.atrs = attrs{sqlDollar[1].val: sqlDollar[3].expr} } - case 30: + case 29: sqlDollar = sqlS[sqlpt-1 : sqlpt+1] -//line sql.y:237 +//line sql.y:236 { sqlVAL.atrs = sqlDollar[1].atrs } - case 31: + case 30: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:238 +//line sql.y:237 { sqlVAL.atrs = attrsUnion(sqlDollar[1].atrs, sqlDollar[3].atrs) } - case 32: + case 31: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:242 +//line sql.y:241 { sqlVAL.expr = funcall(sqlDollar[1].val, nil) } - case 33: + case 32: sqlDollar = sqlS[sqlpt-4 : sqlpt+1] -//line sql.y:243 +//line sql.y:242 { sqlVAL.expr = funcall(sqlDollar[1].val, sqlDollar[3].expl) } - case 34: + case 33: sqlDollar = sqlS[sqlpt-1 : sqlpt+1] -//line sql.y:247 +//line sql.y:246 { sqlVAL.expl = exprlist{sqlDollar[1].expr} } - case 35: + case 34: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:248 +//line sql.y:247 { sqlVAL.expl = append(sqlDollar[1].expl, sqlDollar[3].expr) } - case 36: + case 35: sqlDollar = sqlS[sqlpt-2 : sqlpt+1] -//line sql.y:252 +//line sql.y:251 { sqlVAL.expl = nil } - case 37: + case 36: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] -//line sql.y:253 +//line sql.y:252 { sqlVAL.expl = sqlDollar[2].expl } + case 37: + sqlDollar = sqlS[sqlpt-1 : sqlpt+1] +//line sql.y:256 + { + sqlVAL.expr = atomic(NUMBER, sqlDollar[1].val) + } case 38: sqlDollar = sqlS[sqlpt-1 : sqlpt+1] //line sql.y:257 { - sqlVAL.expr = atomic(NUMBER, sqlDollar[1].val) + sqlVAL.expr = atomic(IDENT, sqlDollar[1].val) } case 39: sqlDollar = sqlS[sqlpt-1 : sqlpt+1] //line sql.y:258 { - sqlVAL.expr = atomic(IDENT, sqlDollar[1].val) + sqlVAL.expr = atomic(STRING, sqlDollar[1].val) } case 40: sqlDollar = sqlS[sqlpt-1 : sqlpt+1] //line sql.y:259 { - sqlVAL.expr = atomic(STRING, sqlDollar[1].val) + sqlVAL.expr = variadic('[', "square", sqlDollar[1].expl) } case 41: - sqlDollar = sqlS[sqlpt-1 : sqlpt+1] + sqlDollar = sqlS[sqlpt-3 : sqlpt+1] //line sql.y:260 { - sqlVAL.expr = variadic('[', "square", sqlDollar[1].expl) + sqlVAL.expr = unary('(', "paren", sqlDollar[2].expr) } case 42: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] //line sql.y:261 { - sqlVAL.expr = unary('(', "paren", sqlDollar[2].expr) + sqlVAL.expr = unary('"', "quota", atomic(STRING, sqlDollar[2].val)) } case 43: - sqlDollar = sqlS[sqlpt-3 : sqlpt+1] + sqlDollar = sqlS[sqlpt-1 : sqlpt+1] //line sql.y:262 { - sqlVAL.expr = unary('"', "quota", atomic(STRING, sqlDollar[2].val)) + sqlVAL.expr = sqlDollar[1].expr } case 44: - sqlDollar = sqlS[sqlpt-1 : sqlpt+1] + sqlDollar = sqlS[sqlpt-3 : sqlpt+1] //line sql.y:263 { - sqlVAL.expr = sqlDollar[1].expr + sqlVAL.expr = binary('+', sqlDollar[1].expr, sqlDollar[2].val, sqlDollar[3].expr) } case 45: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] //line sql.y:264 { - sqlVAL.expr = binary('+', sqlDollar[1].expr, sqlDollar[2].val, sqlDollar[3].expr) + sqlVAL.expr = binary('-', sqlDollar[1].expr, sqlDollar[2].val, sqlDollar[3].expr) } case 46: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] //line sql.y:265 { - sqlVAL.expr = binary('-', sqlDollar[1].expr, sqlDollar[2].val, sqlDollar[3].expr) + sqlVAL.expr = binary('*', sqlDollar[1].expr, sqlDollar[2].val, sqlDollar[3].expr) } case 47: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] //line sql.y:266 { - sqlVAL.expr = binary('*', sqlDollar[1].expr, sqlDollar[2].val, sqlDollar[3].expr) + sqlVAL.expr = binary('/', sqlDollar[1].expr, sqlDollar[2].val, sqlDollar[3].expr) } case 48: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] //line sql.y:267 { - sqlVAL.expr = binary('/', sqlDollar[1].expr, sqlDollar[2].val, sqlDollar[3].expr) + sqlVAL.expr = binary('%', sqlDollar[1].expr, sqlDollar[2].val, sqlDollar[3].expr) } case 49: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] //line sql.y:268 { - sqlVAL.expr = binary('%', sqlDollar[1].expr, sqlDollar[2].val, sqlDollar[3].expr) + sqlVAL.expr = binary('=', sqlDollar[1].expr, sqlDollar[2].val, sqlDollar[3].expr) } case 50: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] //line sql.y:269 { - sqlVAL.expr = binary('=', sqlDollar[1].expr, sqlDollar[2].val, sqlDollar[3].expr) + sqlVAL.expr = binary('<', sqlDollar[1].expr, sqlDollar[2].val, sqlDollar[3].expr) } case 51: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] //line sql.y:270 { - sqlVAL.expr = binary('<', sqlDollar[1].expr, sqlDollar[2].val, sqlDollar[3].expr) + sqlVAL.expr = binary('>', sqlDollar[1].expr, sqlDollar[2].val, sqlDollar[3].expr) } case 52: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] //line sql.y:271 { - sqlVAL.expr = binary('>', sqlDollar[1].expr, sqlDollar[2].val, sqlDollar[3].expr) + sqlVAL.expr = binary(LE, sqlDollar[1].expr, sqlDollar[2].val, sqlDollar[3].expr) } case 53: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] //line sql.y:272 { - sqlVAL.expr = binary(LE, sqlDollar[1].expr, sqlDollar[2].val, sqlDollar[3].expr) + sqlVAL.expr = binary(GE, sqlDollar[1].expr, sqlDollar[2].val, sqlDollar[3].expr) } case 54: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] //line sql.y:273 { - sqlVAL.expr = binary(GE, sqlDollar[1].expr, sqlDollar[2].val, sqlDollar[3].expr) + sqlVAL.expr = binary(AND, sqlDollar[1].expr, sqlDollar[2].val, sqlDollar[3].expr) } case 55: sqlDollar = sqlS[sqlpt-3 : sqlpt+1] //line sql.y:274 { - sqlVAL.expr = binary(AND, sqlDollar[1].expr, sqlDollar[2].val, sqlDollar[3].expr) + sqlVAL.expr = binary(OR, sqlDollar[1].expr, sqlDollar[2].val, sqlDollar[3].expr) } case 56: - sqlDollar = sqlS[sqlpt-3 : sqlpt+1] + sqlDollar = sqlS[sqlpt-2 : sqlpt+1] //line sql.y:275 { - sqlVAL.expr = binary(OR, sqlDollar[1].expr, sqlDollar[2].val, sqlDollar[3].expr) + sqlVAL.expr = unary(NOT, sqlDollar[1].val, sqlDollar[2].expr) } case 57: sqlDollar = sqlS[sqlpt-2 : sqlpt+1] //line sql.y:276 - { - sqlVAL.expr = unary(NOT, sqlDollar[1].val, sqlDollar[2].expr) - } - case 58: - sqlDollar = sqlS[sqlpt-2 : sqlpt+1] -//line sql.y:277 { sqlVAL.expr = unary('-', sqlDollar[1].val, sqlDollar[2].expr) } diff --git a/sql/parser_test.go b/sql/parser_test.go index aa12b854bb..20a5c48f16 100644 --- a/sql/parser_test.go +++ b/sql/parser_test.go @@ -55,6 +55,13 @@ INTO sqlflow_models.my_dnn_model; ` testPredictSelect = testStandardSelectStmt + `PREDICT db.table.field USING sqlflow_models.my_dnn_model;` + + testMaxcomputeUDFPredict = ` +SELECT predict_fun(concat(",", col_1, col_2)) AS (info, score) FROM db.table +PREDICT db.predict_result +WITH OSS_KEY=a, OSS_ID=b +USING sqlflow_models.my_model; + ` ) func TestStandardSelect(t *testing.T) { @@ -148,13 +155,14 @@ func TestStandardDropTable(t *testing.T) { func TestSelectMaxcomputeUDF(t *testing.T) { a := assert.New(t) - slct := "SELECT func(func2(\"arg0\", arg1), arg_2) AS (info, score) FROM a_table where a_table.col_1 > 100;" - pr, _ := newParser().Parse(slct) - expFields := []string{ - "func(func2(\"arg0\", arg1), arg_2)", - "AS", - "(info, score)", - } - a.Equal(pr.fields.Strings(), expFields) - a.Equal(pr.tables[0], "a_table") + r, e := newParser().Parse(testMaxcomputeUDFPredict) + a.NoError(e) + a.Equal(3, len(r.fields.Strings())) + a.Equal(r.fields[0].String(), `predict_fun(concat(",", col_1, col_2))`) + a.Equal(r.fields[1].String(), `AS`) + a.Equal(r.fields[2].String(), `(info, score)`) + a.Equal(r.predictClause.into, "db.predict_result") + a.Equal(r.predAttrs["OSS_KEY"].String(), "a") + a.Equal(r.predAttrs["OSS_ID"].String(), "b") + a.Equal(r.predictClause.model, "sqlflow_models.my_model") } diff --git a/sql/sql.y b/sql/sql.y index b1b9982cbd..740c2c41c1 100644 --- a/sql/sql.y +++ b/sql/sql.y @@ -184,9 +184,8 @@ train_clause ; predict_clause -: PREDICT IDENT { $$.into = $2 } -| PREDICT WITH attrs { $$.predAttrs = $3 } -| predict_clause USING IDENT { $$.model = $3 } +: PREDICT IDENT USING IDENT { $$.into = $2; $$.model = $4 } +| PREDICT IDENT WITH attrs USING IDENT { $$.into = $2; $$.predAttrs = $4; $$.model = $6 } ; column_clause diff --git a/sql/template_alps.go b/sql/template_alps.go index 33d1452846..a2530fb13e 100644 --- a/sql/template_alps.go +++ b/sql/template_alps.go @@ -146,7 +146,7 @@ set odps.service.mode=off; set odps.instance.priority = 0; set odps.sql.udf.timeout = 3000; -set mst.model.path=oss://arks-model/{{.UserID}}/{{.PredictInputModel}}.tar.gz; +set mst.model.path={{.ModelDir}}; set mst.model.name={{.PredictInputModel}}; set mst.oss.id={{.OSSID}}; set mst.oss.key={{.OSSKey}}; From 90c298b0dd37875a929bed42ad75f46313c0edf0 Mon Sep 17 00:00:00 2001 From: Yancey1989 Date: Tue, 30 Jul 2019 19:41:58 +0800 Subject: [PATCH 13/15] code clean up --- sql/codegen_alps.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/sql/codegen_alps.go b/sql/codegen_alps.go index ea3dc2d080..5312c4049e 100644 --- a/sql/codegen_alps.go +++ b/sql/codegen_alps.go @@ -57,8 +57,7 @@ type alpsFiller struct { ExitOnSubmit bool // Predict - PredictStanderClause string - PredictUDF string + PredictUDF string // Feature map FeatureMapTable string From da4afeb8c1df74a50afcf0a4db1cebaa1526b277 Mon Sep 17 00:00:00 2001 From: Yancey1989 Date: Tue, 30 Jul 2019 22:31:35 +0800 Subject: [PATCH 14/15] update --- sql/codegen_alps.go | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/sql/codegen_alps.go b/sql/codegen_alps.go index ee98dfdb48..abd834c0a9 100644 --- a/sql/codegen_alps.go +++ b/sql/codegen_alps.go @@ -290,9 +290,9 @@ func newALPSPredictFiller(pr *extendedSelect, session *pb.Session) (*alpsFiller, }, nil } -func alpsTrain(w *PipeWriter, pr *extendedSelect, db *DB, cwd string, session *pb.Session) error { +func alpsTrain(w *PipeWriter, pr *extendedSelect, db *DB, cwd string, session *pb.Session, ds *trainAndValDataset) error { var program bytes.Buffer - filler, err := newALPSTrainFiller(pr, db, session) + filler, err := newALPSTrainFiller(pr, db, session, ds) if err != nil { return err } @@ -378,13 +378,6 @@ func alpsPred(w *PipeWriter, pr *extendedSelect, db *DB, cwd string, session *pb return nil } -func submitALPS(w *PipeWriter, pr *extendedSelect, db *DB, cwd string, session *pb.Session) error { - if pr.train { - return alpsTrain(w, pr, db, cwd, session) - } - return alpsPred(w, pr, db, cwd, session) -} - func (nc *numericColumn) GenerateAlpsCode(metadata *metadata) ([]string, error) { output := make([]string, 0) output = append(output, From 76554a2da763c8d8709a18b7db908336c9c4f6c9 Mon Sep 17 00:00:00 2001 From: Yancey1989 Date: Thu, 1 Aug 2019 10:35:18 +0800 Subject: [PATCH 15/15] follow comment --- sql/codegen_alps.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sql/codegen_alps.go b/sql/codegen_alps.go index abd834c0a9..b46cd40d49 100644 --- a/sql/codegen_alps.go +++ b/sql/codegen_alps.go @@ -279,7 +279,7 @@ func newALPSPredictFiller(pr *extendedSelect, session *pb.Session) (*alpsFiller, modelDir := fmt.Sprintf("oss://arks-model/%s/%s.tar.gz", session.UserId, pr.predictClause.model) return &alpsFiller{ - IsTraining: true, + IsTraining: false, PredictInputTable: pr.tables[0], PredictOutputTable: pr.predictClause.into, PredictUDF: strings.Join(pr.fields.Strings(), " "), @@ -349,6 +349,7 @@ func alpsPred(w *PipeWriter, pr *extendedSelect, db *DB, cwd string, session *pb if err != nil { return fmt.Errorf("Create ODPS script failed %v", err) } + defer os.Remove(filepath) f.WriteString(program.String()) f.Close() cw := &logChanWriter{wr: w}