Skip to content

Commit bb94cf7

Browse files
committed
fixes #8
1 parent e06e897 commit bb94cf7

File tree

6 files changed

+522
-92
lines changed

6 files changed

+522
-92
lines changed

00_xml.ipynb

Lines changed: 42 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -238,35 +238,6 @@
238238
"mk_doctype(doc)"
239239
]
240240
},
241-
{
242-
"cell_type": "code",
243-
"execution_count": null,
244-
"id": "d8913bd3",
245-
"metadata": {},
246-
"outputs": [],
247-
"source": [
248-
"from fastcore.xml import ft"
249-
]
250-
},
251-
{
252-
"cell_type": "code",
253-
"execution_count": null,
254-
"id": "3b8e6f87",
255-
"metadata": {},
256-
"outputs": [],
257-
"source": [
258-
"#| exports\n",
259-
"def mk_doc(index:int, # The document index\n",
260-
" content:str, # The document content\n",
261-
" source:Optional[str]=None # URL, filename, etc; defaults to `md5(content)` if not provided\n",
262-
" ) -> tuple:\n",
263-
" \"Create an `ft` format tuple for a single doc in Anthropic's recommended format\"\n",
264-
" dt = mk_doctype(content, source)\n",
265-
" content = ft('document_content', dt.content)\n",
266-
" source = ft('source', dt.source)\n",
267-
" return ft('document', source, content, index=index)"
268-
]
269-
},
270241
{
271242
"cell_type": "code",
272243
"execution_count": null,
@@ -277,13 +248,14 @@
277248
"#| exports\n",
278249
"def mk_doc(index:int, # The document index\n",
279250
" content:str, # The document content\n",
280-
" source:Optional[str]=None # URL, filename, etc; defaults to `md5(content)` if not provided\n",
251+
" source:Optional[str]=None, # URL, filename, etc; defaults to `md5(content)` if not provided\n",
252+
" **kwargs\n",
281253
" ) -> tuple:\n",
282254
" \"Create an `ft` format tuple for a single doc in Anthropic's recommended format\"\n",
283255
" dt = mk_doctype(content, source)\n",
284256
" content = Document_content(dt.content)\n",
285257
" source = Source(dt.source)\n",
286-
" return Document(source, content, index=index)"
258+
" return Document(source, content, index=index, **kwargs)"
287259
]
288260
},
289261
{
@@ -294,40 +266,38 @@
294266
"We can now generate XML for one document in the suggested format:"
295267
]
296268
},
297-
{
298-
"cell_type": "code",
299-
"execution_count": null,
300-
"id": "5782369d",
301-
"metadata": {},
302-
"outputs": [],
303-
"source": [
304-
"dt = mk_doctype(doc)"
305-
]
306-
},
307269
{
308270
"cell_type": "code",
309271
"execution_count": null,
310272
"id": "e7ed5a9a",
311273
"metadata": {},
312274
"outputs": [
313275
{
314-
"name": "stdout",
315-
"output_type": "stream",
316-
"text": [
317-
"<document index=\"1\">\n",
318-
" <source>\n",
319-
"b8898fab\n",
320-
"\n",
321-
" <document_content>\n",
322-
"This is a sample\n",
323-
"</document_content>\n",
324-
"</document>\n",
325-
"\n"
326-
]
276+
"data": {
277+
"text/markdown": [
278+
"```html\n",
279+
"<document index=\"1\" title=\"test\">\n",
280+
" <source>\n",
281+
"b8898fab\n",
282+
"\n",
283+
" <document-content>\n",
284+
"This is a sample\n",
285+
"</document-content>\n",
286+
"</document>\n",
287+
"\n",
288+
"```"
289+
],
290+
"text/plain": [
291+
"document((source(('\\nb8898fab\\n',),{}), document-content(('\\nThis is a sample\\n',),{})),{'index': 1, 'title': 'test'})"
292+
]
293+
},
294+
"execution_count": null,
295+
"metadata": {},
296+
"output_type": "execute_result"
327297
}
328298
],
329299
"source": [
330-
"print(to_xml(mk_doc(1, doc)))"
300+
"mk_doc(1, doc, title=\"test\")"
331301
]
332302
},
333303
{
@@ -340,12 +310,14 @@
340310
"#| exports\n",
341311
"def docs_xml(docs:list[str], # The content of each document\n",
342312
" sources:Optional[list]=None, # URLs, filenames, etc; each one defaults to `md5(content)` if not provided\n",
343-
" prefix:bool=True # Include Anthropic's suggested prose intro?\n",
313+
" prefix:bool=True, # Include Anthropic's suggested prose intro?\n",
314+
" details:Optional[list]=None # Optional list of dicts with additional attrs for each doc\n",
344315
" )->str:\n",
345316
" \"Create an XML string containing `docs` in Anthropic's recommended format\"\n",
346317
" pre = 'Here are some documents for you to reference for your task:\\n\\n' if prefix else ''\n",
347318
" if sources is None: sources = [None]*len(docs)\n",
348-
" docs = (mk_doc(i+1, *o) for i,o in enumerate(zip(docs,sources)))\n",
319+
" if details is None: details = [{}]*len(docs)\n",
320+
" docs = (mk_doc(i+1, d, s, **kw) for i,(d,s,kw) in enumerate(zip(docs,sources,details)))\n",
349321
" return pre + to_xml(Documents(docs))"
350322
]
351323
},
@@ -374,17 +346,17 @@
374346
" <source>\n",
375347
"b8898fab\n",
376348
"\n",
377-
" <document_content>\n",
349+
" <document-content>\n",
378350
"This is a sample\n",
379-
"</document_content>\n",
351+
"</document-content>\n",
380352
" </document>\n",
381353
" <document index=\"2\">\n",
382354
" <source>\n",
383355
"doc.txt\n",
384356
"\n",
385-
" <document_content>\n",
357+
" <document-content>\n",
386358
"And another one\n",
387-
"</document_content>\n",
359+
"</document-content>\n",
388360
" </document>\n",
389361
"</documents>\n",
390362
"\n"
@@ -462,22 +434,22 @@
462434
" <document index=\"1\">\n",
463435
" <source>\n",
464436
"samples/sample_core.py\n",
465-
"</source>\n",
466-
" <document_content>\n",
437+
"\n",
438+
" <document-content>\n",
467439
"import inspect\n",
468440
"empty = inspect.Parameter.empty\n",
469441
"models = &#x27;claude-3-opus-20240229&#x27;,&#x27;claude-3-sonnet-20240229&#x27;,&#x27;claude-3-haiku-20240307&#x27;\n",
470-
"</document_content>\n",
442+
"</document-content>\n",
471443
" </document>\n",
472444
" <document index=\"2\">\n",
473445
" <source>\n",
474446
"samples/sample_styles.css\n",
475-
"</source>\n",
476-
" <document_content>\n",
447+
"\n",
448+
" <document-content>\n",
477449
".cell { margin-bottom: 1rem; }\n",
478450
".cell &gt; .sourceCode { margin-bottom: 0; }\n",
479451
".cell-output &gt; pre { margin-bottom: 0; }\n",
480-
"</document_content>\n",
452+
"</document-content>\n",
481453
" </document>\n",
482454
"</documents>\n",
483455
"\n",
@@ -537,12 +509,12 @@
537509
" <document index=\"1\">\n",
538510
" <source>\n",
539511
"samples/sample_core.py\n",
540-
"</source>\n",
541-
" <document_content>\n",
512+
"\n",
513+
" <document-content>\n",
542514
"import inspect\n",
543515
"empty = inspect.Parameter.empty\n",
544516
"models = &#x27;claude-3-opus-20240229&#x27;,&#x27;claude-3-sonnet-20240229&#x27;,&#x27;claude-3-haiku-20240307&#x27;\n",
545-
"</document_content>\n",
517+
"</document-content>\n",
546518
" </document>\n",
547519
"</documents>\n",
548520
"\n"

0 commit comments

Comments
 (0)