Skip to content

Commit 84bfdc0

Browse files
committed
feat: ignore frontmatter, support mdx
1 parent 9ce7c1f commit 84bfdc0

File tree

9 files changed

+47
-10
lines changed

9 files changed

+47
-10
lines changed

.github/workflows/test.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ jobs:
1616
strategy:
1717
fail-fast: false
1818
matrix:
19-
ruby: ["3.1", "3.2", "3.3"]
19+
ruby: ["3.1", "3.2", "3.3", "3.4"]
2020
steps:
2121
- uses: actions/checkout@v4
2222
- uses: ruby/setup-ruby@v1

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,12 @@
22

33
## master
44

5+
## 0.2.0 (2025-08-05)
6+
7+
- Ignore frontmatter. ([@palkan][])
8+
9+
- Support `.mdx` ([@palkan][])
10+
511
## 0.1.2 (2025-01-03)
612

713
- Fix the bug with links including PWD. ([@palkan][])

etc/list_chunks.rb

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
#!/usr/bin/env ruby
2+
# frozen_string_literal: true
3+
4+
# A script to remove broken chunks from the dataset, i.e., chunks matching the given link pattern
5+
6+
require "uptriever"
7+
require "ruby-progressbar"
8+
9+
client = Uptriever::Client.new
10+
11+
chunks = {}
12+
13+
usage = client.usage
14+
15+
puts "Total chunks: #{usage["chunk_count"]}"
16+
17+
progressbar = ProgressBar.create(title: "Scroll chunks", total: usage["chunk_count"])
18+
19+
client.scroll_chunks do |chunk|
20+
progressbar.increment
21+
chunks[chunk["id"]] = chunk
22+
end
23+
24+
chunks.each_value do |chunk|
25+
puts "Chunk [#{chunk["id"]}]: #{chunk["link"]}"
26+
end

lib/uptriever/chunker.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,6 @@ def chunks
4141

4242
private
4343

44-
def chunk_dup = chunk.dup.tap { _1[:chunk_html] = +"" }
44+
def chunk_dup = chunk.dup
4545
end
4646
end

lib/uptriever/client.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ def push_group(group, upsert: true)
2626

2727
def push_chunk(chunk, upsert: true)
2828
chunk[:upsert_by_tracking_id] = upsert
29-
perform_request("/chunk", chunk.to_json)
29+
perform_request("/chunk", chunk.to_json).inspect
3030
end
3131

3232
def scroll_chunks(per_page: 100)

lib/uptriever/document.rb

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
# frozen_string_literal: true
22

33
require "redcarpet"
4+
require "front_matter_parser"
5+
require "front_matter_parser/syntax_parser"
6+
FrontMatterParser::SyntaxParser::Mdx = FrontMatterParser::SyntaxParser::Md
47

58
module Uptriever
69
class Document
@@ -17,9 +20,10 @@ def initialize(id, path, link, tags: nil, groups: nil, weight: 1.0)
1720

1821
def to_html
1922
case File.extname(path)
20-
when ".md"
23+
when ".md", ".mdx"
24+
parsed = FrontMatterParser::Parser.parse_file(path)
2125
markdown = Redcarpet::Markdown.new(Redcarpet::Render::HTML, autolink: true, tables: true)
22-
markdown.render(File.read(path))
26+
markdown.render(parsed.content)
2327
when ".html"
2428
File.read(path)
2529
else
@@ -29,7 +33,7 @@ def to_html
2933

3034
def to_chunk_json
3135
{
32-
chunk_html: to_html,
36+
chunk_html: +to_html,
3337
link:,
3438
tracking_id: id,
3539
weight:

lib/uptriever/version.rb

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
# frozen_string_literal: true
22

33
module Uptriever # :nodoc:
4-
VERSION = "0.1.2"
4+
VERSION = "0.2.0"
55
end

test/chunker_test.rb

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
module Uptriever
66
class ChunkerTest < TestCase
77
def setup
8-
@chunk = {chunk_html: "<h1>Title</h1>", link: "http://example.com", tracking_id: "root"}
8+
@chunk = {chunk_html: +"<h1>Title</h1>", link: "http://example.com", tracking_id: "root"}
99
@chunker = Chunker.new(@chunk)
1010
end
1111

@@ -21,7 +21,7 @@ def test_chunks_with_single_h1_html
2121
end
2222

2323
def test_chunks_with_h2_splitting
24-
@chunker.chunk[:chunk_html] = "<h1>Title</h1><h2>Section 1</h2><p>Content 1</p><h2>Section 2</h2><p>Content 2</p>"
24+
@chunker.chunk[:chunk_html] = +"<h1>Title</h1><h2>Section 1</h2><p>Content 1</p><h2>Section 2</h2><p>Content 2</p>"
2525
chunks = @chunker.chunks
2626
assert_equal 3, chunks.size
2727
assert chunks[1][:link].include?("?id=section-1")
@@ -31,7 +31,7 @@ def test_chunks_with_h2_splitting
3131
end
3232

3333
def test_chunks_with_non_h2_content
34-
@chunker.chunk[:chunk_html] = "<h1>Title</h1><p>Intro</p>"
34+
@chunker.chunk[:chunk_html] = +"<h1>Title</h1><p>Intro</p>"
3535
chunks = @chunker.chunks
3636
assert_equal 1, chunks.size
3737
assert chunks.first[:chunk_html].include?("Intro")

uptriever.gemspec

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ Gem::Specification.new do |s|
3434
s.add_dependency "json"
3535
s.add_dependency "optparse"
3636
s.add_dependency "erb"
37+
s.add_dependency "front_matter_parser"
3738

3839
s.add_development_dependency "bundler", ">= 1.15"
3940
s.add_development_dependency "rake", ">= 13.0"

0 commit comments

Comments
 (0)