-
Notifications
You must be signed in to change notification settings - Fork 0
/
cloud_Sentiment.json
1 lines (1 loc) · 7.45 KB
/
cloud_Sentiment.json
1
{"properties":{},"description":"","processes":{"python3operator1":{"component":"com.sap.system.python3Operator","metadata":{"label":"Python3 Operator","x":505.99999809265137,"y":40,"height":80,"width":120,"extensible":true,"config":{"script":"import hashlib\nimport json\nimport textblob\nimport pandas as pd\n\ndef force_unicode(m):\n if type(m)==str:\n return m\n if type(m) == bytes:\n try:\n return (m.decode('utf-8'))\n except UnicodeDecodeError:\n ascii = str(m).encode('unicode_escape')\n return ascii.decode('utf-8')\n\ndef parse_review_body(txt):\n \"\"\"\n Parse reviews provided by textual body (one review per line).\n \"\"\"\n lines = txt.splitlines()\n records = []\n for line in lines:\n # forcefully handle encoding issues\n line = force_unicode(line.strip())\n if line == \"\": continue\n records.append(parse_review(line.strip()))\n jsonout = json.dumps(records)\n return jsonout\n\ndef parse_review(line):\n \"\"\"\n Parses a review of format: <PRODUCT-ID> Review: <REVIEW-TEXT>.\n Extracts the following attributes and textual features:\n \n Extracts product ID (ID), text length (LENGTH), text (TEXT), sentiment polarity (POLARITY), and\n sentiment subjectivity (SUBJECTIVITY). Returns information as a dictinary.\n \"\"\"\n #bytes(original, 'utf-8')\n try:\n md5 = hashlib.md5(line.encode('utf-8')).hexdigest()\n rid, text = line.split(\": \", 1)\n tb = textblob.TextBlob(text)\n return { \"PRODUCT_ID\": rid[:7],\n \"MD5\": md5,\n \"LENGTH\": len(text), \n \"TEXT\": force_unicode(line) ,\n \"POLARITY\": tb.polarity,\n \"SUBJECTIVITY\": tb.subjectivity }\n except ValueError as e:\n raise ValueError(\"Line does not match expceted format \\\"<PRODUCT-ID> Review: <REVIEW-TEXT>\\\"; LINE: \\\"%s\\\"; ERROR: %s\" % (line, str(e)))\n except Exception as e:\n # just forward\n raise e\n\n# ////////////////////////////////////////////////////////////\n# Wrap parser in python operator\n# ////////////////////////////////////////////////////////////\n\ndef on_input(msg):\n # inform downstream operators about last file:\n # set message.commit.token = 1 for last file\n commit_token = \"0\"\n if msg.body[\"Attributes\"][\"message.lastBatch\"]:\n commit_token = \"1\"\n\n \n # parse the line-based input \n parsed_as_json = parse_review_body(msg.body[\"Body\"])\n \n output_message = api.Message(parsed_as_json)\n df = pd.read_json(parsed_as_json)\n output=df.to_csv(index=None)\n api.send(\"output\", output)\n\napi.set_port_callback(\"input\", on_input)\n"},"additionalinports":[{"name":"input","type":"message"}],"additionaloutports":[{"name":"output","type":"message"}]}},"listfiles1":{"component":"com.sap.file.list","metadata":{"label":"List Files","x":17,"y":32,"height":80,"width":120,"config":{"filter":"With regular expression","mode":"Once","connection":{"configurationType":"Connection Management","connectionID":"CLOUD_STORAGE"},"path":"/DI_ML/TA/DAT361/Product_Reviews","pattern":".*txt","recursive":true}}},"readfile1":{"component":"com.sap.file.read","metadata":{"label":"Read File","x":201.99999904632568,"y":32,"height":80,"width":120,"config":{}}},"tomessageconverter1":{"component":"com.sap.util.toMessageConverter","metadata":{"label":"ToMessage Converter","x":370.9999990463257,"y":47,"height":50,"width":50,"config":{}}},"saphanaclient1":{"component":"com.sap.hana.client2","metadata":{"label":"SAP HANA Client","x":710.999997138977,"y":32,"height":80,"width":120,"config":{"connection":{"configurationType":"Configuration Manager","connectionID":"HANA_ONPREMISE"},"tableName":"\"DI_DEMO\".\"TA01_PRODUCT_REVIEWS\"","csvHeader":"Use as schema","initTable":"Truncate","tableColumns":[{"name":"\"PRODUCT_ID\"","type":"NVARCHAR","size":7},{"name":"\"MD5\"","type":"NVARCHAR","size":32},{"name":"\"LENGTH\"","type":"INTEGER"},{"name":"\"TEXT\"","type":"NVARCHAR","size":1024},{"name":"\"POLARITY\"","type":"DOUBLE"},{"name":"\"SUBJECTIVITY\"","type":"DOUBLE"}]}}},"graphterminator11":{"component":"com.sap.util.graphTerminator","metadata":{"label":"Graph Terminator","x":1163.9999961853027,"y":32,"height":80,"width":120,"config":{}},"name":"graphterminator1"},"messagecounter1111":{"component":"com.sap.node.counter","metadata":{"label":"Message Counter","x":994.9999961853027,"y":32,"height":80,"width":120,"extensible":true,"config":{"script":"/*\n* Copyright 2019 SAP SE or an SAP affiliate company. All rights reserved.\n*/\nconst SDK = require(\"@sap/vflow-sub-node-sdk\");\nconst operator = SDK.Operator.getInstance();\nconst appLog = operator.applicationLogger;\nlet counter = 0;\n\n// Handler to send message to application log\nconst shutDownHandler = (cb) => {\n appLog.info(\"finish\", \"I01\", \"Application finished without problems.\");\n cb(0);\n};\noperator.addShutdownHandler(shutDownHandler)\n\nappLog.info(\"Start\", \"I00\", \"Application successfully started\");\n\n/**\n * This operator receives messages on port \"in1\",\n * increases a counter and forwards the counter value\n * to port \"out1\".\n */\noperator.getInPort(\"in1\").onMessage((msg) => {\n // The content of the actual message is ignored.\n // We will only count the number of messages here.\n counter++;\n if (counter >=10){\n operator.getOutPort(\"out1\")\n .send(counter.toString());\n }\n\n});\n\n/**\n * A keep alive hook for the node process.\n * from its parent - or by SIGKILL if it don't want to.\n * @param tick length of a heart beat of the operator\n */\nfunction keepAlive(tick) {\n setTimeout(() => {\n keepAlive(tick);\n }, tick);\n}\n\n// keep the operator alive in 2sec ticks\nkeepAlive(1000);\n"}},"name":"messagecounter111"},"tostringconverter1":{"component":"com.sap.util.toStringConverter","metadata":{"label":"ToString Converter","x":895.9999961853027,"y":47,"height":50,"width":50,"config":{}}}},"groups":[{"name":"group1","nodes":["python3operator1"],"metadata":{"description":"Group"},"tags":{"textblob36":"0.12.0","pandas":"1.0.3"}}],"connections":[{"metadata":{"points":"141,63 168.99999952316284,63 168.99999952316284,72 196.99999904632568,72"},"src":{"port":"ref","process":"listfiles1"},"tgt":{"port":"ref","process":"readfile1"}},{"metadata":{"points":"325.9999990463257,63 365.9999990463257,63"},"src":{"port":"file","process":"readfile1"},"tgt":{"port":"inbody","process":"tomessageconverter1"}},{"metadata":{"points":"424.9999990463257,72 452.9999985694885,72 452.9999985694885,80 500.99999809265137,80"},"src":{"port":"out","process":"tomessageconverter1"},"tgt":{"port":"input","process":"python3operator1"}},{"metadata":{"points":"629.9999980926514,80 677.9999976158142,80 677.9999976158142,81 705.999997138977,81"},"src":{"port":"output","process":"python3operator1"},"tgt":{"port":"data","process":"saphanaclient1"}},{"metadata":{"points":"834.999997138977,72 862.9999966621399,72 862.9999966621399,63 890.9999961853027,63"},"src":{"port":"result","process":"saphanaclient1"},"tgt":{"port":"ininterface","process":"tostringconverter1"}},{"metadata":{"points":"949.9999961853027,72 989.9999961853027,72"},"src":{"port":"outstring","process":"tostringconverter1"},"tgt":{"port":"in1","process":"messagecounter1111"}},{"metadata":{"points":"1118.9999961853027,72 1158.9999961853027,72"},"src":{"port":"out1","process":"messagecounter1111"},"tgt":{"port":"stop","process":"graphterminator11"}}],"inports":{},"outports":{}}