|
238 | 238 | "mk_doctype(doc)"
|
239 | 239 | ]
|
240 | 240 | },
|
241 |
| - { |
242 |
| - "cell_type": "code", |
243 |
| - "execution_count": null, |
244 |
| - "id": "d8913bd3", |
245 |
| - "metadata": {}, |
246 |
| - "outputs": [], |
247 |
| - "source": [ |
248 |
| - "from fastcore.xml import ft" |
249 |
| - ] |
250 |
| - }, |
251 |
| - { |
252 |
| - "cell_type": "code", |
253 |
| - "execution_count": null, |
254 |
| - "id": "3b8e6f87", |
255 |
| - "metadata": {}, |
256 |
| - "outputs": [], |
257 |
| - "source": [ |
258 |
| - "#| exports\n", |
259 |
| - "def mk_doc(index:int, # The document index\n", |
260 |
| - " content:str, # The document content\n", |
261 |
| - " source:Optional[str]=None # URL, filename, etc; defaults to `md5(content)` if not provided\n", |
262 |
| - " ) -> tuple:\n", |
263 |
| - " \"Create an `ft` format tuple for a single doc in Anthropic's recommended format\"\n", |
264 |
| - " dt = mk_doctype(content, source)\n", |
265 |
| - " content = ft('document_content', dt.content)\n", |
266 |
| - " source = ft('source', dt.source)\n", |
267 |
| - " return ft('document', source, content, index=index)" |
268 |
| - ] |
269 |
| - }, |
270 | 241 | {
|
271 | 242 | "cell_type": "code",
|
272 | 243 | "execution_count": null,
|
|
277 | 248 | "#| exports\n",
|
278 | 249 | "def mk_doc(index:int, # The document index\n",
|
279 | 250 | " content:str, # The document content\n",
|
280 |
| - " source:Optional[str]=None # URL, filename, etc; defaults to `md5(content)` if not provided\n", |
| 251 | + " source:Optional[str]=None, # URL, filename, etc; defaults to `md5(content)` if not provided\n", |
| 252 | + " **kwargs\n", |
281 | 253 | " ) -> tuple:\n",
|
282 | 254 | " \"Create an `ft` format tuple for a single doc in Anthropic's recommended format\"\n",
|
283 | 255 | " dt = mk_doctype(content, source)\n",
|
284 | 256 | " content = Document_content(dt.content)\n",
|
285 | 257 | " source = Source(dt.source)\n",
|
286 |
| - " return Document(source, content, index=index)" |
| 258 | + " return Document(source, content, index=index, **kwargs)" |
287 | 259 | ]
|
288 | 260 | },
|
289 | 261 | {
|
|
294 | 266 | "We can now generate XML for one document in the suggested format:"
|
295 | 267 | ]
|
296 | 268 | },
|
297 |
| - { |
298 |
| - "cell_type": "code", |
299 |
| - "execution_count": null, |
300 |
| - "id": "5782369d", |
301 |
| - "metadata": {}, |
302 |
| - "outputs": [], |
303 |
| - "source": [ |
304 |
| - "dt = mk_doctype(doc)" |
305 |
| - ] |
306 |
| - }, |
307 | 269 | {
|
308 | 270 | "cell_type": "code",
|
309 | 271 | "execution_count": null,
|
310 | 272 | "id": "e7ed5a9a",
|
311 | 273 | "metadata": {},
|
312 | 274 | "outputs": [
|
313 | 275 | {
|
314 |
| - "name": "stdout", |
315 |
| - "output_type": "stream", |
316 |
| - "text": [ |
317 |
| - "<document index=\"1\">\n", |
318 |
| - " <source>\n", |
319 |
| - "b8898fab\n", |
320 |
| - "\n", |
321 |
| - " <document_content>\n", |
322 |
| - "This is a sample\n", |
323 |
| - "</document_content>\n", |
324 |
| - "</document>\n", |
325 |
| - "\n" |
326 |
| - ] |
| 276 | + "data": { |
| 277 | + "text/markdown": [ |
| 278 | + "```html\n", |
| 279 | + "<document index=\"1\" title=\"test\">\n", |
| 280 | + " <source>\n", |
| 281 | + "b8898fab\n", |
| 282 | + "\n", |
| 283 | + " <document-content>\n", |
| 284 | + "This is a sample\n", |
| 285 | + "</document-content>\n", |
| 286 | + "</document>\n", |
| 287 | + "\n", |
| 288 | + "```" |
| 289 | + ], |
| 290 | + "text/plain": [ |
| 291 | + "document((source(('\\nb8898fab\\n',),{}), document-content(('\\nThis is a sample\\n',),{})),{'index': 1, 'title': 'test'})" |
| 292 | + ] |
| 293 | + }, |
| 294 | + "execution_count": null, |
| 295 | + "metadata": {}, |
| 296 | + "output_type": "execute_result" |
327 | 297 | }
|
328 | 298 | ],
|
329 | 299 | "source": [
|
330 |
| - "print(to_xml(mk_doc(1, doc)))" |
| 300 | + "mk_doc(1, doc, title=\"test\")" |
331 | 301 | ]
|
332 | 302 | },
|
333 | 303 | {
|
|
340 | 310 | "#| exports\n",
|
341 | 311 | "def docs_xml(docs:list[str], # The content of each document\n",
|
342 | 312 | " sources:Optional[list]=None, # URLs, filenames, etc; each one defaults to `md5(content)` if not provided\n",
|
343 |
| - " prefix:bool=True # Include Anthropic's suggested prose intro?\n", |
| 313 | + " prefix:bool=True, # Include Anthropic's suggested prose intro?\n", |
| 314 | + " details:Optional[list]=None # Optional list of dicts with additional attrs for each doc\n", |
344 | 315 | " )->str:\n",
|
345 | 316 | " \"Create an XML string containing `docs` in Anthropic's recommended format\"\n",
|
346 | 317 | " pre = 'Here are some documents for you to reference for your task:\\n\\n' if prefix else ''\n",
|
347 | 318 | " if sources is None: sources = [None]*len(docs)\n",
|
348 |
| - " docs = (mk_doc(i+1, *o) for i,o in enumerate(zip(docs,sources)))\n", |
| 319 | + " if details is None: details = [{}]*len(docs)\n", |
| 320 | + " docs = (mk_doc(i+1, d, s, **kw) for i,(d,s,kw) in enumerate(zip(docs,sources,details)))\n", |
349 | 321 | " return pre + to_xml(Documents(docs))"
|
350 | 322 | ]
|
351 | 323 | },
|
|
374 | 346 | " <source>\n",
|
375 | 347 | "b8898fab\n",
|
376 | 348 | "\n",
|
377 |
| - " <document_content>\n", |
| 349 | + " <document-content>\n", |
378 | 350 | "This is a sample\n",
|
379 |
| - "</document_content>\n", |
| 351 | + "</document-content>\n", |
380 | 352 | " </document>\n",
|
381 | 353 | " <document index=\"2\">\n",
|
382 | 354 | " <source>\n",
|
383 | 355 | "doc.txt\n",
|
384 | 356 | "\n",
|
385 |
| - " <document_content>\n", |
| 357 | + " <document-content>\n", |
386 | 358 | "And another one\n",
|
387 |
| - "</document_content>\n", |
| 359 | + "</document-content>\n", |
388 | 360 | " </document>\n",
|
389 | 361 | "</documents>\n",
|
390 | 362 | "\n"
|
|
462 | 434 | " <document index=\"1\">\n",
|
463 | 435 | " <source>\n",
|
464 | 436 | "samples/sample_core.py\n",
|
465 |
| - "</source>\n", |
466 |
| - " <document_content>\n", |
| 437 | + "\n", |
| 438 | + " <document-content>\n", |
467 | 439 | "import inspect\n",
|
468 | 440 | "empty = inspect.Parameter.empty\n",
|
469 | 441 | "models = 'claude-3-opus-20240229','claude-3-sonnet-20240229','claude-3-haiku-20240307'\n",
|
470 |
| - "</document_content>\n", |
| 442 | + "</document-content>\n", |
471 | 443 | " </document>\n",
|
472 | 444 | " <document index=\"2\">\n",
|
473 | 445 | " <source>\n",
|
474 | 446 | "samples/sample_styles.css\n",
|
475 |
| - "</source>\n", |
476 |
| - " <document_content>\n", |
| 447 | + "\n", |
| 448 | + " <document-content>\n", |
477 | 449 | ".cell { margin-bottom: 1rem; }\n",
|
478 | 450 | ".cell > .sourceCode { margin-bottom: 0; }\n",
|
479 | 451 | ".cell-output > pre { margin-bottom: 0; }\n",
|
480 |
| - "</document_content>\n", |
| 452 | + "</document-content>\n", |
481 | 453 | " </document>\n",
|
482 | 454 | "</documents>\n",
|
483 | 455 | "\n",
|
|
537 | 509 | " <document index=\"1\">\n",
|
538 | 510 | " <source>\n",
|
539 | 511 | "samples/sample_core.py\n",
|
540 |
| - "</source>\n", |
541 |
| - " <document_content>\n", |
| 512 | + "\n", |
| 513 | + " <document-content>\n", |
542 | 514 | "import inspect\n",
|
543 | 515 | "empty = inspect.Parameter.empty\n",
|
544 | 516 | "models = 'claude-3-opus-20240229','claude-3-sonnet-20240229','claude-3-haiku-20240307'\n",
|
545 |
| - "</document_content>\n", |
| 517 | + "</document-content>\n", |
546 | 518 | " </document>\n",
|
547 | 519 | "</documents>\n",
|
548 | 520 | "\n"
|
|
0 commit comments