Skip to content

Commit 8055bdb

Browse files
authored
Merge pull request #9 from AnswerDotAI/dynamic_docs_dir
allow saving of file into directory and clean up the code
2 parents 5a26fba + dfb74ef commit 8055bdb

File tree

4 files changed

+48
-60
lines changed

4 files changed

+48
-60
lines changed

llms_txt/_modidx.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,11 @@
66
'git_url': 'https://github.com/AnswerDotAI/llms-txt',
77
'lib_path': 'llms_txt'},
88
'syms': { 'llms_txt.core': { 'llms_txt.core._doc': ('core.html#_doc', 'llms_txt/core.py'),
9+
'llms_txt.core._local_docs_pth': ('core.html#_local_docs_pth', 'llms_txt/core.py'),
910
'llms_txt.core._parse_links': ('core.html#_parse_links', 'llms_txt/core.py'),
1011
'llms_txt.core._parse_llms': ('core.html#_parse_llms', 'llms_txt/core.py'),
1112
'llms_txt.core._section': ('core.html#_section', 'llms_txt/core.py'),
1213
'llms_txt.core.create_ctx': ('core.html#create_ctx', 'llms_txt/core.py'),
13-
'llms_txt.core.find_root_dir': ('core.html#find_root_dir', 'llms_txt/core.py'),
1414
'llms_txt.core.get_doc_content': ('core.html#get_doc_content', 'llms_txt/core.py'),
1515
'llms_txt.core.get_sizes': ('core.html#get_sizes', 'llms_txt/core.py'),
1616
'llms_txt.core.llms_txt2ctx': ('core.html#llms_txt2ctx', 'llms_txt/core.py'),

llms_txt/core.py

Lines changed: 16 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,8 @@
33
# AUTOGENERATED! DO NOT EDIT! File to edit: ../nbs/01_core.ipynb.
44

55
# %% auto 0
6-
__all__ = ['opt_re', 'named_re', 'search', 'parse_link', 'parse_llms_file', 'find_root_dir', 'get_doc_content', 'mk_ctx',
7-
'get_sizes', 'create_ctx', 'llms_txt2ctx']
6+
__all__ = ['opt_re', 'named_re', 'search', 'parse_link', 'parse_llms_file', 'get_doc_content', 'mk_ctx', 'get_sizes',
7+
'create_ctx', 'llms_txt2ctx']
88

99
# %% ../nbs/01_core.ipynb
1010
import re
@@ -15,6 +15,7 @@
1515
from fastcore.script import *
1616
import httpx
1717
from urllib.parse import urlparse
18+
from nbdev.config import get_config
1819

1920
# %% ../nbs/01_core.ipynb
2021
def opt_re(s):
@@ -68,28 +69,15 @@ def parse_llms_file(txt):
6869
from fastcore.xml import Sections,Project,Doc
6970

7071
# %% ../nbs/01_core.ipynb
71-
def find_root_dir():
72-
"Find the root directory of the nbdev project by looking for settings.ini"
73-
path = Path.cwd()
74-
while path != path.parent:
75-
if (path / 'settings.ini').exists(): return path
76-
path = path.parent
77-
return None
72+
def _local_docs_pth(cfg): return cfg.config_path/'_proc'/cfg.doc_path.name
7873

79-
# %% ../nbs/01_core.ipynb
8074
def get_doc_content(url):
8175
"Fetch content from local file if in nbdev repo."
82-
root_dir = find_root_dir()
83-
if root_dir:
84-
config = Config(root_dir, 'settings.ini')
85-
doc_host = config.get('doc_host')
86-
if doc_host and url.startswith(doc_host):
87-
parsed_url = urlparse(url)
88-
relative_path = parsed_url.path.lstrip('/')
89-
local_path = root_dir / '_docs' / relative_path
90-
if local_path.exists():
91-
with open(local_path, 'r') as f: return f.read()
92-
# If not a local file or file doesn't exist, fetch from URL
76+
cfg = get_config()
77+
if url.startswith(cfg.doc_host):
78+
relative_path = urlparse(url).path.lstrip('/')
79+
local_path = _local_docs_pth(cfg) / relative_path
80+
if local_path.exists(): return local_path.read_text()
9381
return httpx.get(url).text
9482

9583
# %% ../nbs/01_core.ipynb
@@ -131,7 +119,12 @@ def create_ctx(txt, optional=False, n_workers=None):
131119
def llms_txt2ctx(
132120
fname:str, # File name to read
133121
optional:bool_arg=False, # Include 'optional' section?
134-
n_workers:int=None # Number of threads to use for parallel downloading
122+
n_workers:int=None, # Number of threads to use for parallel downloading
123+
save_nbdev_fname:str=None #save output to nbdev `{docs_path}` instead of emitting to stdout
135124
):
136125
"Print a `Project` with a `Section` for each H2 part in file read from `fname`, optionally skipping the 'optional' section."
137-
print(create_ctx(Path(fname).read_text(), optional=optional, n_workers=n_workers))
126+
ctx = create_ctx(Path(fname).read_text(), optional=optional, n_workers=n_workers)
127+
if save_nbdev_fname:
128+
cfg = get_config()
129+
(_local_docs_pth(cfg) / save_nbdev_fname).mk_write(ctx)
130+
else: print(ctx)

nbs/01_core.ipynb

Lines changed: 30 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,8 @@
5050
"from fastcore.xml import *\n",
5151
"from fastcore.script import *\n",
5252
"import httpx\n",
53-
"from urllib.parse import urlparse"
53+
"from urllib.parse import urlparse\n",
54+
"from nbdev.config import get_config"
5455
]
5556
},
5657
{
@@ -185,7 +186,7 @@
185186
{
186187
"data": {
187188
"text/plain": [
188-
"{'title': 'FastHTML quick start'}"
189+
"{'title': 'internal docs - ed'}"
189190
]
190191
},
191192
"execution_count": null,
@@ -217,8 +218,7 @@
217218
{
218219
"data": {
219220
"text/plain": [
220-
"{'title': 'FastHTML quick start',\n",
221-
" 'url': 'https://docs.fastht.ml/tutorials/quickstart_for_web_devs.html.md'}"
221+
"{'title': 'internal docs - ed', 'url': 'https://llmstxt.org/ed.html'}"
222222
]
223223
},
224224
"execution_count": null,
@@ -674,42 +674,22 @@
674674
"from fastcore.xml import Sections,Project,Doc"
675675
]
676676
},
677-
{
678-
"cell_type": "code",
679-
"execution_count": null,
680-
"metadata": {},
681-
"outputs": [],
682-
"source": [
683-
"#| export\n",
684-
"def find_root_dir():\n",
685-
" \"Find the root directory of the nbdev project by looking for settings.ini\"\n",
686-
" path = Path.cwd()\n",
687-
" while path != path.parent:\n",
688-
" if (path / 'settings.ini').exists(): return path\n",
689-
" path = path.parent\n",
690-
" return None"
691-
]
692-
},
693677
{
694678
"cell_type": "code",
695679
"execution_count": null,
696680
"metadata": {},
697681
"outputs": [],
698682
"source": [
699683
"#|export\n",
684+
"def _local_docs_pth(cfg): return cfg.config_path/'_proc'/cfg.doc_path.name\n",
685+
"\n",
700686
"def get_doc_content(url):\n",
701687
" \"Fetch content from local file if in nbdev repo.\"\n",
702-
" root_dir = find_root_dir()\n",
703-
" if root_dir:\n",
704-
" config = Config(root_dir, 'settings.ini')\n",
705-
" doc_host = config.get('doc_host')\n",
706-
" if doc_host and url.startswith(doc_host):\n",
707-
" parsed_url = urlparse(url)\n",
708-
" relative_path = parsed_url.path.lstrip('/')\n",
709-
" local_path = root_dir / '_docs' / relative_path\n",
710-
" if local_path.exists():\n",
711-
" with open(local_path, 'r') as f: return f.read()\n",
712-
" # If not a local file or file doesn't exist, fetch from URL\n",
688+
" cfg = get_config()\n",
689+
" if url.startswith(cfg.doc_host):\n",
690+
" relative_path = urlparse(url).path.lstrip('/')\n",
691+
" local_path = _local_docs_pth(cfg) / relative_path\n",
692+
" if local_path.exists(): return local_path.read_text()\n",
713693
" return httpx.get(url).text"
714694
]
715695
},
@@ -797,7 +777,8 @@
797777
{
798778
"data": {
799779
"text/plain": [
800-
"{'docs': {'FastHTML quick start': 27376,\n",
780+
"{'docs': {'internal docs - ed': 34464,\n",
781+
" 'FastHTML quick start': 27376,\n",
801782
" 'HTMX reference': 26427,\n",
802783
" 'Starlette quick guide': 7936},\n",
803784
" 'examples': {'Todo list application': 18558},\n",
@@ -821,7 +802,7 @@
821802
{
822803
"data": {
823804
"text/plain": [
824-
"129814"
805+
"164321"
825806
]
826807
},
827808
"execution_count": null,
@@ -858,10 +839,24 @@
858839
"def llms_txt2ctx(\n",
859840
" fname:str, # File name to read\n",
860841
" optional:bool_arg=False, # Include 'optional' section?\n",
861-
" n_workers:int=None # Number of threads to use for parallel downloading\n",
842+
" n_workers:int=None, # Number of threads to use for parallel downloading\n",
843+
" save_nbdev_fname:str=None #save output to nbdev `{docs_path}` instead of emitting to stdout\n",
862844
"):\n",
863845
" \"Print a `Project` with a `Section` for each H2 part in file read from `fname`, optionally skipping the 'optional' section.\"\n",
864-
" print(create_ctx(Path(fname).read_text(), optional=optional, n_workers=n_workers))"
846+
" ctx = create_ctx(Path(fname).read_text(), optional=optional, n_workers=n_workers)\n",
847+
" if save_nbdev_fname:\n",
848+
" cfg = get_config()\n",
849+
" (_local_docs_pth(cfg) / save_nbdev_fname).mk_write(ctx)\n",
850+
" else: print(ctx)"
851+
]
852+
},
853+
{
854+
"cell_type": "code",
855+
"execution_count": null,
856+
"metadata": {},
857+
"outputs": [],
858+
"source": [
859+
"Path('/Users/hamel/github/fastcore/_docs/llms-ctx-full.txt').mk_write('hello')"
865860
]
866861
},
867862
{

settings.ini

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ keywords = nbdev jupyter notebook python LLMs NLP
2727
language = English
2828
status = 3
2929
user = AnswerDotAI
30-
requirements = fastcore>=1.7.3 httpx
30+
requirements = fastcore>=1.7.3 httpx nbdev
3131
conda_user = fastai
3232
console_scripts = llms_txt2ctx=llms_txt.core:llms_txt2ctx
3333
readme_nb = index.ipynb

0 commit comments

Comments
 (0)