|
16 | 16 | },
|
17 | 17 | {
|
18 | 18 | "cell_type": "code",
|
19 |
| - "execution_count": 1, |
| 19 | + "execution_count": 3, |
20 | 20 | "metadata": {},
|
21 | 21 | "outputs": [],
|
22 | 22 | "source": [
|
|
31 | 31 | },
|
32 | 32 | {
|
33 | 33 | "cell_type": "code",
|
34 |
| - "execution_count": 2, |
| 34 | + "execution_count": 4, |
35 | 35 | "metadata": {},
|
36 | 36 | "outputs": [],
|
37 | 37 | "source": [
|
38 |
| - "df = pd.read_csv(\"G:/Shared drives/Unidad Compartida Pachitos/Data Science Projects Pachitos/HackOff-CompanySentiments/archive (2)/Tweets.csv\")\n", |
39 |
| - "df.drop(['tweet_id','airline_sentiment_confidence','negativereason','negativereason_confidence','airline_sentiment_gold','name','negativereason_gold','retweet_count',\n", |
40 |
| - "'tweet_coord','tweet_created','tweet_location','user_timezone','airline'], axis = 1, inplace = True)\n", |
41 |
| - "df = df.rename(columns = {'airline_sentiment':'original'})" |
| 38 | + "#df = pd.read_csv(\"G:/Shared drives/Unidad Compartida Pachitos/Data Science Projects Pachitos/HackOff-CompanySentiments/tesla_sf.csv\")" |
42 | 39 | ]
|
43 | 40 | },
|
44 | 41 | {
|
|
47 | 44 | "metadata": {},
|
48 | 45 | "outputs": [],
|
49 | 46 | "source": [
|
50 |
| - "#data_location = 's3://tweets-hackoff2/Tweets.csv'\n", |
51 |
| - "#df = pd.read_csv(data_location)\n", |
52 |
| - "#df" |
| 47 | + "data_location = 's3://tweets-hackoff2/tesla_sf.csv'\n", |
| 48 | + "df = pd.read_csv(data_location)" |
53 | 49 | ]
|
54 | 50 | },
|
55 | 51 | {
|
56 | 52 | "cell_type": "code",
|
57 |
| - "execution_count": 5, |
| 53 | + "execution_count": 6, |
58 | 54 | "metadata": {},
|
59 | 55 | "outputs": [],
|
60 | 56 | "source": [
|
|
89 | 85 | },
|
90 | 86 | {
|
91 | 87 | "cell_type": "code",
|
92 |
| - "execution_count": 6, |
| 88 | + "execution_count": 7, |
93 | 89 | "metadata": {},
|
94 | 90 | "outputs": [],
|
95 | 91 | "source": [
|
|
98 | 94 | },
|
99 | 95 | {
|
100 | 96 | "cell_type": "code",
|
101 |
| - "execution_count": 7, |
| 97 | + "execution_count": 8, |
102 | 98 | "metadata": {},
|
103 | 99 | "outputs": [
|
104 | 100 | {
|
105 | 101 | "output_type": "execute_result",
|
106 | 102 | "data": {
|
107 | 103 | "text/plain": [
|
108 |
| - "<__main__.SentimentAnalysisTweets at 0x1e7c26cd2c8>" |
| 104 | + "<__main__.SentimentAnalysisTweets at 0x20e6cf84188>" |
109 | 105 | ]
|
110 | 106 | },
|
111 | 107 | "metadata": {},
|
112 |
| - "execution_count": 7 |
| 108 | + "execution_count": 8 |
113 | 109 | }
|
114 | 110 | ],
|
115 | 111 | "source": [
|
|
118 | 114 | },
|
119 | 115 | {
|
120 | 116 | "cell_type": "code",
|
121 |
| - "execution_count": 8, |
| 117 | + "execution_count": 14, |
122 | 118 | "metadata": {},
|
123 | 119 | "outputs": [],
|
124 | 120 | "source": [
|
125 |
| - "df_label = pd.DataFrame()\n", |
126 |
| - "df_label['text'] = tweet_class.tweets_.loc[(tweet_class.tweets_['flag'] == 'negative'),'text']\n", |
| 121 | + "df_label = tweet_class.tweets_.copy()\n", |
| 122 | + "df_label.drop(['tweets_clean','sentiment'], axis=1, inplace=True)\n", |
127 | 123 | "df_label.to_csv('label_tweets.csv', index=False)"
|
128 | 124 | ]
|
129 | 125 | },
|
|
132 | 128 | "execution_count": null,
|
133 | 129 | "metadata": {},
|
134 | 130 | "outputs": [],
|
135 |
| - "source": [] |
| 131 | + "source": [ |
| 132 | + "# instantiate S3 client and upload to s3\n", |
| 133 | + "import boto3\n", |
| 134 | + "\n", |
| 135 | + "s3 = boto3.resource('s3')\n", |
| 136 | + "s3.meta.client.upload_file('label_tweets.csv', 'tweets-hackoff', 'DESIRED_S3_OBJECT_NAME')" |
| 137 | + ] |
136 | 138 | }
|
137 | 139 | ],
|
138 | 140 | "metadata": {
|
|
0 commit comments