Skip to content

Commit b8d9363

Browse files
authored
fix: fix Markdown heading boundary probas (#81)
1 parent 620e556 commit b8d9363

File tree

1 file changed

+3
-2
lines changed

1 file changed

+3
-2
lines changed

src/raglite/_split_sentences.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,10 +38,11 @@ def get_markdown_heading_indexes(doc: str) -> list[tuple[int, int]]:
3838
# Indicate that each heading is a contiguous sentence by setting the boundary probabilities.
3939
boundary_probas = np.full(len(doc), np.nan)
4040
for heading_start, heading_end in headings:
41-
if heading_start >= 1:
41+
if 0 <= heading_start - 1 < len(boundary_probas):
4242
boundary_probas[heading_start - 1] = 1 # First heading character starts a sentence.
4343
boundary_probas[heading_start : heading_end - 1] = 0 # Body does not contain boundaries.
44-
boundary_probas[heading_end - 1] = 1 # Last heading character is the end of a sentence.
44+
if 0 <= heading_end - 1 < len(boundary_probas):
45+
boundary_probas[heading_end - 1] = 1 # Last heading character is the end of a sentence.
4546
return boundary_probas
4647

4748

0 commit comments

Comments
 (0)