|
21 | 21 | {
|
22 | 22 | "cell_type": "markdown",
|
23 | 23 | "id": "ff6f6471-8061-4fdd-85a1-25fdc27c5cf3",
|
24 |
| - "metadata": {}, |
| 24 | + "metadata": { |
| 25 | + "heading_collapsed": true |
| 26 | + }, |
25 | 27 | "source": [
|
26 | 28 | "## Setup"
|
27 | 29 | ]
|
|
30 | 32 | "cell_type": "code",
|
31 | 33 | "execution_count": null,
|
32 | 34 | "id": "033c76fd",
|
33 |
| - "metadata": {}, |
| 35 | + "metadata": { |
| 36 | + "hidden": true |
| 37 | + }, |
34 | 38 | "outputs": [],
|
35 | 39 | "source": [
|
36 | 40 | "#| export\n",
|
|
50 | 54 | "cell_type": "code",
|
51 | 55 | "execution_count": null,
|
52 | 56 | "id": "2795f9fc",
|
53 |
| - "metadata": {}, |
| 57 | + "metadata": { |
| 58 | + "hidden": true |
| 59 | + }, |
54 | 60 | "outputs": [],
|
55 | 61 | "source": [
|
56 | 62 | "#| exports\n",
|
|
73 | 79 | {
|
74 | 80 | "cell_type": "markdown",
|
75 | 81 | "id": "140a35a2",
|
76 |
| - "metadata": {}, |
| 82 | + "metadata": { |
| 83 | + "hidden": true |
| 84 | + }, |
77 | 85 | "source": [
|
78 | 86 | "JSON doesn't map as nicely to XML as the data structure used in `fastcore.xml`, but for simple XML trees it can be convenient -- for example:"
|
79 | 87 | ]
|
|
82 | 90 | "cell_type": "code",
|
83 | 91 | "execution_count": null,
|
84 | 92 | "id": "005a5be4",
|
85 |
| - "metadata": {}, |
| 93 | + "metadata": { |
| 94 | + "hidden": true |
| 95 | + }, |
86 | 96 | "outputs": [
|
87 | 97 | {
|
88 | 98 | "data": {
|
|
119 | 129 | {
|
120 | 130 | "cell_type": "markdown",
|
121 | 131 | "id": "7788c48c",
|
122 |
| - "metadata": {}, |
| 132 | + "metadata": { |
| 133 | + "heading_collapsed": true |
| 134 | + }, |
123 | 135 | "source": [
|
124 | 136 | "## Including documents"
|
125 | 137 | ]
|
126 | 138 | },
|
127 | 139 | {
|
128 | 140 | "cell_type": "markdown",
|
129 | 141 | "id": "479be4c9",
|
130 |
| - "metadata": {}, |
| 142 | + "metadata": { |
| 143 | + "hidden": true |
| 144 | + }, |
131 | 145 | "source": [
|
132 | 146 | "According [to Anthropic](https://docs.anthropic.com/claude/docs/long-context-window-tips), \"*it's essential to structure your prompts in a way that clearly separates the input data from the instructions*\". They recommend using the following format:\n",
|
133 | 147 | "\n",
|
|
153 | 167 | "cell_type": "code",
|
154 | 168 | "execution_count": null,
|
155 | 169 | "id": "a01dc320",
|
156 |
| - "metadata": {}, |
| 170 | + "metadata": { |
| 171 | + "hidden": true |
| 172 | + }, |
157 | 173 | "outputs": [],
|
158 | 174 | "source": [
|
159 | 175 | "#| exports\n",
|
|
163 | 179 | {
|
164 | 180 | "cell_type": "markdown",
|
165 | 181 | "id": "6620a123",
|
166 |
| - "metadata": {}, |
| 182 | + "metadata": { |
| 183 | + "hidden": true |
| 184 | + }, |
167 | 185 | "source": [
|
168 | 186 | "We'll use `doctype` to store our pairs."
|
169 | 187 | ]
|
|
172 | 190 | "cell_type": "code",
|
173 | 191 | "execution_count": null,
|
174 | 192 | "id": "ce853491",
|
175 |
| - "metadata": {}, |
| 193 | + "metadata": { |
| 194 | + "hidden": true |
| 195 | + }, |
176 | 196 | "outputs": [],
|
177 | 197 | "source": [
|
178 | 198 | "#| exports\n",
|
|
186 | 206 | {
|
187 | 207 | "cell_type": "markdown",
|
188 | 208 | "id": "026d3b06",
|
189 |
| - "metadata": {}, |
| 209 | + "metadata": { |
| 210 | + "hidden": true |
| 211 | + }, |
190 | 212 | "source": [
|
191 | 213 | "Since Anthropic's example shows newlines before and after each tag, we'll do the same."
|
192 | 214 | ]
|
|
195 | 217 | "cell_type": "code",
|
196 | 218 | "execution_count": null,
|
197 | 219 | "id": "932e8858",
|
198 |
| - "metadata": {}, |
| 220 | + "metadata": { |
| 221 | + "hidden": true |
| 222 | + }, |
199 | 223 | "outputs": [],
|
200 | 224 | "source": [
|
201 | 225 | "#| exports\n",
|
|
210 | 234 | {
|
211 | 235 | "cell_type": "markdown",
|
212 | 236 | "id": "8800921b",
|
213 |
| - "metadata": {}, |
| 237 | + "metadata": { |
| 238 | + "hidden": true |
| 239 | + }, |
214 | 240 | "source": [
|
215 | 241 | "This is a convenience wrapper to ensure that a `doctype` has the needed information in the right format."
|
216 | 242 | ]
|
|
219 | 245 | "cell_type": "code",
|
220 | 246 | "execution_count": null,
|
221 | 247 | "id": "14f9e185",
|
222 |
| - "metadata": {}, |
| 248 | + "metadata": { |
| 249 | + "hidden": true |
| 250 | + }, |
223 | 251 | "outputs": [
|
224 | 252 | {
|
225 | 253 | "data": {
|
|
241 | 269 | "cell_type": "code",
|
242 | 270 | "execution_count": null,
|
243 | 271 | "id": "d8913bd3",
|
244 |
| - "metadata": {}, |
| 272 | + "metadata": { |
| 273 | + "hidden": true |
| 274 | + }, |
245 | 275 | "outputs": [],
|
246 | 276 | "source": [
|
247 | 277 | "from fastcore.xml import xt"
|
|
251 | 281 | "cell_type": "code",
|
252 | 282 | "execution_count": null,
|
253 | 283 | "id": "3b8e6f87",
|
254 |
| - "metadata": {}, |
| 284 | + "metadata": { |
| 285 | + "hidden": true |
| 286 | + }, |
255 | 287 | "outputs": [],
|
256 | 288 | "source": [
|
257 | 289 | "#| exports\n",
|
|
270 | 302 | "cell_type": "code",
|
271 | 303 | "execution_count": null,
|
272 | 304 | "id": "15e454db",
|
273 |
| - "metadata": {}, |
| 305 | + "metadata": { |
| 306 | + "hidden": true |
| 307 | + }, |
274 | 308 | "outputs": [],
|
275 | 309 | "source": [
|
276 | 310 | "#| exports\n",
|
|
288 | 322 | {
|
289 | 323 | "cell_type": "markdown",
|
290 | 324 | "id": "a8b6ac26",
|
291 |
| - "metadata": {}, |
| 325 | + "metadata": { |
| 326 | + "hidden": true |
| 327 | + }, |
292 | 328 | "source": [
|
293 | 329 | "We can now generate XML for one document in the suggested format:"
|
294 | 330 | ]
|
|
297 | 333 | "cell_type": "code",
|
298 | 334 | "execution_count": null,
|
299 | 335 | "id": "5782369d",
|
300 |
| - "metadata": {}, |
| 336 | + "metadata": { |
| 337 | + "hidden": true |
| 338 | + }, |
301 | 339 | "outputs": [],
|
302 | 340 | "source": [
|
303 | 341 | "dt = mk_doctype(doc)"
|
|
307 | 345 | "cell_type": "code",
|
308 | 346 | "execution_count": null,
|
309 | 347 | "id": "e7ed5a9a",
|
310 |
| - "metadata": {}, |
| 348 | + "metadata": { |
| 349 | + "hidden": true |
| 350 | + }, |
311 | 351 | "outputs": [
|
312 | 352 | {
|
313 | 353 | "name": "stdout",
|
|
333 | 373 | "cell_type": "code",
|
334 | 374 | "execution_count": null,
|
335 | 375 | "id": "ba5ebfab",
|
336 |
| - "metadata": {}, |
| 376 | + "metadata": { |
| 377 | + "hidden": true |
| 378 | + }, |
337 | 379 | "outputs": [],
|
338 | 380 | "source": [
|
339 | 381 | "#| exports\n",
|
|
351 | 393 | {
|
352 | 394 | "cell_type": "markdown",
|
353 | 395 | "id": "85004124",
|
354 |
| - "metadata": {}, |
| 396 | + "metadata": { |
| 397 | + "hidden": true |
| 398 | + }, |
355 | 399 | "source": [
|
356 | 400 | "Putting it all together, we have our final XML format:"
|
357 | 401 | ]
|
|
360 | 404 | "cell_type": "code",
|
361 | 405 | "execution_count": null,
|
362 | 406 | "id": "1dac60f6",
|
363 |
| - "metadata": {}, |
| 407 | + "metadata": { |
| 408 | + "hidden": true |
| 409 | + }, |
364 | 410 | "outputs": [
|
365 | 411 | {
|
366 | 412 | "name": "stdout",
|
|
415 | 461 | {
|
416 | 462 | "cell_type": "markdown",
|
417 | 463 | "id": "65317fc6",
|
418 |
| - "metadata": {}, |
| 464 | + "metadata": { |
| 465 | + "heading_collapsed": true |
| 466 | + }, |
419 | 467 | "source": [
|
420 | 468 | "### File list to context"
|
421 | 469 | ]
|
422 | 470 | },
|
423 | 471 | {
|
424 | 472 | "cell_type": "markdown",
|
425 | 473 | "id": "3778e8ed",
|
426 |
| - "metadata": {}, |
| 474 | + "metadata": { |
| 475 | + "hidden": true |
| 476 | + }, |
427 | 477 | "source": [
|
428 | 478 | "For generating XML context from files, we'll just read them as text and use the file names as `source`."
|
429 | 479 | ]
|
|
432 | 482 | "cell_type": "code",
|
433 | 483 | "execution_count": null,
|
434 | 484 | "id": "0a168636",
|
435 |
| - "metadata": {}, |
| 485 | + "metadata": { |
| 486 | + "hidden": true |
| 487 | + }, |
436 | 488 | "outputs": [],
|
437 | 489 | "source": [
|
438 | 490 | "#| exports\n",
|
|
449 | 501 | "cell_type": "code",
|
450 | 502 | "execution_count": null,
|
451 | 503 | "id": "1bf73d36",
|
452 |
| - "metadata": {}, |
| 504 | + "metadata": { |
| 505 | + "hidden": true |
| 506 | + }, |
453 | 507 | "outputs": [
|
454 | 508 | {
|
455 | 509 | "data": {
|
|
565 | 619 | "@delegates(folder2ctx)\n",
|
566 | 620 | "def folder2ctx_cli(\n",
|
567 | 621 | " folder:str, # Folder name containing files to add to context\n",
|
568 |
| - " prefix:bool=True, # Include Anthropic's suggested prose intro?\n", |
569 | 622 | " **kwargs # Passed to `folder2ctx`\n",
|
570 | 623 | ")->str: # XML for Claude context\n",
|
571 |
| - " return folder2ctx(folder, prefix, **kwargs)" |
| 624 | + " return folder2ctx(folder, **kwargs)" |
572 | 625 | ]
|
573 | 626 | },
|
574 | 627 | {
|
|
596 | 649 | },
|
597 | 650 | {
|
598 | 651 | "cell_type": "code",
|
599 |
| - "execution_count": null, |
| 652 | + "execution_count": 28, |
600 | 653 | "id": "1e9ee5c1",
|
601 | 654 | "metadata": {},
|
602 | 655 | "outputs": [],
|
|
618 | 671 | ],
|
619 | 672 | "metadata": {
|
620 | 673 | "kernelspec": {
|
621 |
| - "display_name": "python3", |
| 674 | + "display_name": "Python 3 (ipykernel)", |
622 | 675 | "language": "python",
|
623 | 676 | "name": "python3"
|
| 677 | + }, |
| 678 | + "language_info": { |
| 679 | + "codemirror_mode": { |
| 680 | + "name": "ipython", |
| 681 | + "version": 3 |
| 682 | + }, |
| 683 | + "file_extension": ".py", |
| 684 | + "mimetype": "text/x-python", |
| 685 | + "name": "python", |
| 686 | + "nbconvert_exporter": "python", |
| 687 | + "pygments_lexer": "ipython3", |
| 688 | + "version": "3.11.8" |
624 | 689 | }
|
625 | 690 | },
|
626 | 691 | "nbformat": 4,
|
|
0 commit comments