|
65 | 65 | "outputs": [],
|
66 | 66 | "source": [
|
67 | 67 | "# Load credentials\n",
|
68 |
| - "dotenv_dir = \"/opt/app-root/src/aicoe-osc-demo\"\n", |
| 68 | + "dotenv_dir = os.environ.get(\n", |
| 69 | + " \"CREDENTIAL_DOTENV_DIR\", os.environ.get(\"PWD\", \"/opt/app-root/src\")\n", |
| 70 | + ")\n", |
69 | 71 | "dotenv_path = pathlib.Path(dotenv_dir) / \"credentials.env\"\n",
|
70 | 72 | "if os.path.exists(dotenv_path):\n",
|
71 | 73 | " load_dotenv(dotenv_path=dotenv_path, override=True)"
|
|
106 | 108 | "source": [
|
107 | 109 | "if os.getenv(\"AUTOMATION\"):\n",
|
108 | 110 | " if not os.path.exists(config.BASE_INFER_KPI_FOLDER):\n",
|
109 |
| - " pathlib.Path(config.BASE_INFER_KPI_FOLDER).mkdir(parents=True, exist_ok=True)" |
| 111 | + " pathlib.Path(config.BASE_INFER_KPI_FOLDER).mkdir(parents=True, exist_ok=True)\n", |
| 112 | + "\n", |
| 113 | + " # Download a sample dataset file from s3\n", |
| 114 | + " s3c.download_files_in_prefix_to_dir(\n", |
| 115 | + " s3_prefix=config.BASE_INFER_KPI_S3_PREFIX,\n", |
| 116 | + " destination_dir=config.BASE_INFER_KPI_FOLDER\n", |
| 117 | + " )" |
110 | 118 | ]
|
111 | 119 | },
|
112 | 120 | {
|
|
151 | 159 | " <tbody>\n",
|
152 | 160 | " <tr>\n",
|
153 | 161 | " <th>0</th>\n",
|
154 |
| - " <td>413749035_Eversource Energy_2019-12-31</td>\n", |
| 162 | + " <td>sustainability-report-2019</td>\n", |
155 | 163 | " <td>In which year was the annual report or the sus...</td>\n",
|
156 | 164 | " <td><NA></td>\n",
|
157 | 165 | " <td>2019</td>\n",
|
158 |
| - " <td>7</td>\n", |
159 |
| - " <td>• Our core utility operations performed very w...</td>\n", |
| 166 | + " <td>3</td>\n", |
| 167 | + " <td>This report focuses on the sustainability topi...</td>\n", |
160 | 168 | " <td>Text</td>\n",
|
161 |
| - " <td>13.372849</td>\n", |
162 |
| - " <td>-10.76948</td>\n", |
163 |
| - " <td>-25.76948</td>\n", |
| 169 | + " <td>12.819071</td>\n", |
| 170 | + " <td>-11.384018</td>\n", |
| 171 | + " <td>-26.384018</td>\n", |
164 | 172 | " </tr>\n",
|
165 | 173 | " <tr>\n",
|
166 | 174 | " <th>1</th>\n",
|
167 |
| - " <td>413749035_Eversource Energy_2019-12-31</td>\n", |
| 175 | + " <td>sustainability-report-2019</td>\n", |
168 | 176 | " <td>In which year was the annual report or the sus...</td>\n",
|
169 | 177 | " <td><NA></td>\n",
|
170 |
| - " <td>2019</td>\n", |
171 |
| - " <td>34</td>\n", |
172 |
| - " <td>The American Council for an Energy-Efficient E...</td>\n", |
| 178 | + " <td>2018</td>\n", |
| 179 | + " <td>7</td>\n", |
| 180 | + " <td>According to IPCC’s 1.5 C report from 2018 and...</td>\n", |
173 | 181 | " <td>Text</td>\n",
|
174 |
| - " <td>12.66205</td>\n", |
175 |
| - " <td>-9.417558</td>\n", |
176 |
| - " <td>-24.417558</td>\n", |
| 182 | + " <td>12.50875</td>\n", |
| 183 | + " <td>-6.967497</td>\n", |
| 184 | + " <td>-21.967497</td>\n", |
177 | 185 | " </tr>\n",
|
178 | 186 | " <tr>\n",
|
179 | 187 | " <th>2</th>\n",
|
180 |
| - " <td>413749035_Eversource Energy_2019-12-31</td>\n", |
| 188 | + " <td>sustainability-report-2019</td>\n", |
181 | 189 | " <td>In which year was the annual report or the sus...</td>\n",
|
182 | 190 | " <td><NA></td>\n",
|
183 | 191 | " <td>2019</td>\n",
|
184 |
| - " <td>12</td>\n", |
185 |
| - " <td>The Eversource Internal Audit Department perfo...</td>\n", |
| 192 | + " <td>26</td>\n", |
| 193 | + " <td>Equinor Sustainability report 2019 High value ...</td>\n", |
186 | 194 | " <td>Text</td>\n",
|
187 |
| - " <td>12.373636</td>\n", |
188 |
| - " <td>-10.899869</td>\n", |
189 |
| - " <td>-25.899869</td>\n", |
| 195 | + " <td>12.427496</td>\n", |
| 196 | + " <td>-9.680325</td>\n", |
| 197 | + " <td>-24.680325</td>\n", |
190 | 198 | " </tr>\n",
|
191 | 199 | " <tr>\n",
|
192 | 200 | " <th>3</th>\n",
|
193 |
| - " <td>413749035_Eversource Energy_2019-12-31</td>\n", |
| 201 | + " <td>sustainability-report-2019</td>\n", |
194 | 202 | " <td>In which year was the annual report or the sus...</td>\n",
|
195 | 203 | " <td><NA></td>\n",
|
196 | 204 | " <td>2019</td>\n",
|
197 |
| - " <td>118</td>\n", |
198 |
| - " <td>These are referenced throughout our 2019 Susta...</td>\n", |
| 205 | + " <td>8</td>\n", |
| 206 | + " <td>Equinor Sustainability report 2019Low carbon —...</td>\n", |
199 | 207 | " <td>Text</td>\n",
|
200 |
| - " <td>12.245757</td>\n", |
201 |
| - " <td>-10.556628</td>\n", |
202 |
| - " <td>-25.556628</td>\n", |
| 208 | + " <td>12.356202</td>\n", |
| 209 | + " <td>-8.748007</td>\n", |
| 210 | + " <td>-23.748007</td>\n", |
203 | 211 | " </tr>\n",
|
204 | 212 | " <tr>\n",
|
205 | 213 | " <th>4</th>\n",
|
206 |
| - " <td>413749035_Eversource Energy_2019-12-31</td>\n", |
| 214 | + " <td>sustainability-report-2019</td>\n", |
207 | 215 | " <td>What is the annual total production from coal?</td>\n",
|
208 | 216 | " <td><NA></td>\n",
|
209 | 217 | " <td>no_answer</td>\n",
|
210 | 218 | " <td><NA></td>\n",
|
211 | 219 | " <td><NA></td>\n",
|
212 | 220 | " <td>Text</td>\n",
|
213 |
| - " <td>2.720188</td>\n", |
| 221 | + " <td>2.840454</td>\n", |
214 | 222 | " <td><NA></td>\n",
|
215 | 223 | " <td><NA></td>\n",
|
216 | 224 | " </tr>\n",
|
|
219 | 227 | "</div>"
|
220 | 228 | ],
|
221 | 229 | "text/plain": [
|
222 |
| - " pdf_name \\\n", |
223 |
| - "0 413749035_Eversource Energy_2019-12-31 \n", |
224 |
| - "1 413749035_Eversource Energy_2019-12-31 \n", |
225 |
| - "2 413749035_Eversource Energy_2019-12-31 \n", |
226 |
| - "3 413749035_Eversource Energy_2019-12-31 \n", |
227 |
| - "4 413749035_Eversource Energy_2019-12-31 \n", |
| 230 | + " pdf_name \\\n", |
| 231 | + "0 sustainability-report-2019 \n", |
| 232 | + "1 sustainability-report-2019 \n", |
| 233 | + "2 sustainability-report-2019 \n", |
| 234 | + "3 sustainability-report-2019 \n", |
| 235 | + "4 sustainability-report-2019 \n", |
228 | 236 | "\n",
|
229 | 237 | " kpi kpi_id answer page \\\n",
|
230 |
| - "0 In which year was the annual report or the sus... <NA> 2019 7 \n", |
231 |
| - "1 In which year was the annual report or the sus... <NA> 2019 34 \n", |
232 |
| - "2 In which year was the annual report or the sus... <NA> 2019 12 \n", |
233 |
| - "3 In which year was the annual report or the sus... <NA> 2019 118 \n", |
| 238 | + "0 In which year was the annual report or the sus... <NA> 2019 3 \n", |
| 239 | + "1 In which year was the annual report or the sus... <NA> 2018 7 \n", |
| 240 | + "2 In which year was the annual report or the sus... <NA> 2019 26 \n", |
| 241 | + "3 In which year was the annual report or the sus... <NA> 2019 8 \n", |
234 | 242 | "4 What is the annual total production from coal? <NA> no_answer <NA> \n",
|
235 | 243 | "\n",
|
236 | 244 | " paragraph source score \\\n",
|
237 |
| - "0 • Our core utility operations performed very w... Text 13.372849 \n", |
238 |
| - "1 The American Council for an Energy-Efficient E... Text 12.66205 \n", |
239 |
| - "2 The Eversource Internal Audit Department perfo... Text 12.373636 \n", |
240 |
| - "3 These are referenced throughout our 2019 Susta... Text 12.245757 \n", |
241 |
| - "4 <NA> Text 2.720188 \n", |
| 245 | + "0 This report focuses on the sustainability topi... Text 12.819071 \n", |
| 246 | + "1 According to IPCC’s 1.5 C report from 2018 and... Text 12.50875 \n", |
| 247 | + "2 Equinor Sustainability report 2019 High value ... Text 12.427496 \n", |
| 248 | + "3 Equinor Sustainability report 2019Low carbon —... Text 12.356202 \n", |
| 249 | + "4 <NA> Text 2.840454 \n", |
242 | 250 | "\n",
|
243 | 251 | " no_ans_score no_answer_score_plus_boost \n",
|
244 |
| - "0 -10.76948 -25.76948 \n", |
245 |
| - "1 -9.417558 -24.417558 \n", |
246 |
| - "2 -10.899869 -25.899869 \n", |
247 |
| - "3 -10.556628 -25.556628 \n", |
| 252 | + "0 -11.384018 -26.384018 \n", |
| 253 | + "1 -6.967497 -21.967497 \n", |
| 254 | + "2 -9.680325 -24.680325 \n", |
| 255 | + "3 -8.748007 -23.748007 \n", |
248 | 256 | "4 <NA> <NA> "
|
249 | 257 | ]
|
250 | 258 | },
|
|
254 | 262 | }
|
255 | 263 | ],
|
256 | 264 | "source": [
|
257 |
| - "# Download a sample dataset file from s3\n", |
258 |
| - "s3c.download_files_in_prefix_to_dir(\n", |
259 |
| - " s3_prefix=config.BASE_INFER_KPI_S3_PREFIX,\n", |
260 |
| - " destination_dir=config.BASE_INFER_KPI_FOLDER\n", |
261 |
| - ")\n", |
262 |
| - "\n", |
263 | 265 | "all_files = glob.glob(str(config.BASE_INFER_KPI_FOLDER / \"*.csv\"))\n",
|
264 | 266 | "list_of_files = []\n",
|
265 | 267 | "\n",
|
|
315 | 317 | "output_type": "stream",
|
316 | 318 | "text": [
|
317 | 319 | "<class 'pandas.core.frame.DataFrame'>\n",
|
318 |
| - "RangeIndex: 689 entries, 0 to 688\n", |
| 320 | + "RangeIndex: 96 entries, 0 to 95\n", |
319 | 321 | "Data columns (total 10 columns):\n",
|
320 | 322 | " # Column Non-Null Count Dtype \n",
|
321 | 323 | "--- ------ -------------- ----- \n",
|
322 |
| - " 0 pdf_name 689 non-null string \n", |
323 |
| - " 1 kpi 689 non-null string \n", |
| 324 | + " 0 pdf_name 96 non-null string \n", |
| 325 | + " 1 kpi 96 non-null string \n", |
324 | 326 | " 2 kpi_id 0 non-null Int64 \n",
|
325 |
| - " 3 answer 689 non-null string \n", |
326 |
| - " 4 page 555 non-null Int64 \n", |
327 |
| - " 5 paragraph 555 non-null string \n", |
328 |
| - " 6 source 689 non-null string \n", |
329 |
| - " 7 score 689 non-null Float64\n", |
330 |
| - " 8 no_ans_score 555 non-null Float64\n", |
331 |
| - " 9 no_answer_score_plus_boost 555 non-null Float64\n", |
| 327 | + " 3 answer 96 non-null string \n", |
| 328 | + " 4 page 79 non-null Int64 \n", |
| 329 | + " 5 paragraph 79 non-null string \n", |
| 330 | + " 6 source 96 non-null string \n", |
| 331 | + " 7 score 96 non-null Float64\n", |
| 332 | + " 8 no_ans_score 79 non-null Float64\n", |
| 333 | + " 9 no_answer_score_plus_boost 79 non-null Float64\n", |
332 | 334 | "dtypes: Float64(3), Int64(2), string(5)\n",
|
333 |
| - "memory usage: 57.3 KB\n" |
| 335 | + "memory usage: 8.1 KB\n" |
334 | 336 | ]
|
335 | 337 | }
|
336 | 338 | ],
|
|
359 | 361 | "name": "stdout",
|
360 | 362 | "output_type": "stream",
|
361 | 363 | "text": [
|
362 |
| - "200\n", |
363 |
| - "200\n", |
364 |
| - "200\n", |
365 |
| - "200\n", |
366 |
| - "200\n", |
367 |
| - "200\n", |
368 |
| - "200\n", |
369 |
| - "200\n", |
370 |
| - "200\n", |
371 | 364 | "200\n"
|
372 | 365 | ]
|
373 | 366 | }
|
|
464 | 457 | {
|
465 | 458 | "data": {
|
466 | 459 | "text/plain": [
|
467 |
| - "['sustainability-report-2019',\n", |
| 460 | + "['90044053_Fisher & Paykel Hl_2017-11-07',\n", |
468 | 461 | " 'In which year was the annual report or the sustainability report published?',\n",
|
469 | 462 | " None,\n",
|
470 |
| - " '2019',\n", |
471 |
| - " 26,\n", |
472 |
| - " 'Equinor Sustainability report 2019 High value — creating shared value',\n", |
| 463 | + " '2017',\n", |
| 464 | + " 2,\n", |
| 465 | + " 'Corporate Responsibility and Sustainability Report 2017Fisher & Paykel Healthcare Corporation Limited',\n", |
473 | 466 | " 'Text',\n",
|
474 |
| - " 12.427505493164062,\n", |
475 |
| - " -9.680328369140623,\n", |
476 |
| - " -24.680328369140625]" |
| 467 | + " 11.549626350402832,\n", |
| 468 | + " -8.787019729614258,\n", |
| 469 | + " -23.787019729614254]" |
477 | 470 | ]
|
478 | 471 | },
|
479 | 472 | "execution_count": 11,
|
|
514 | 507 | "name": "python",
|
515 | 508 | "nbconvert_exporter": "python",
|
516 | 509 | "pygments_lexer": "ipython3",
|
517 |
| - "version": "3.8.6" |
| 510 | + "version": "3.8.8" |
518 | 511 | }
|
519 | 512 | },
|
520 | 513 | "nbformat": 4,
|
|
0 commit comments