fix(scraper): additionally grab common words

This commit is contained in:
Matthew Esposito 2024-11-15 16:53:00 -05:00
parent 62717ef6b2
commit 6c64ebd56b
3 changed files with 114 additions and 12 deletions

46
Cargo.lock generated
View file

@ -71,6 +71,12 @@ version = "1.0.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8365de52b16c035ff4fcafe0092ba9390540e3e352870ac09933bebcaa2c8c56"
[[package]]
name = "anyhow"
version = "1.0.93"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4c95c10ba0b00a02636238b814946408b1322d5ac4760326e6fb8ec956d85775"
[[package]]
name = "arc-swap"
version = "1.7.1"
@ -307,6 +313,18 @@ version = "0.7.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97"
[[package]]
name = "common-words-all"
version = "0.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "84a6ff47eb813c9e315610ceca0ddd247827e22f2cdadc4189e4676a81470c77"
dependencies = [
"anyhow",
"csv",
"glob",
"serde",
]
[[package]]
name = "cookie"
version = "0.18.1"
@ -370,6 +388,27 @@ dependencies = [
"typenum",
]
[[package]]
name = "csv"
version = "1.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "acdc4883a9c96732e4733212c01447ebd805833b7275a73ca3ee080fd77afdaf"
dependencies = [
"csv-core",
"itoa",
"ryu",
"serde",
]
[[package]]
name = "csv-core"
version = "0.1.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5efa2b3d7902f4b634a20cae3c9c4e6209dc4779feb6863329607560143efa70"
dependencies = [
"memchr",
]
[[package]]
name = "darling"
version = "0.20.10"
@ -642,6 +681,12 @@ version = "0.31.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f"
[[package]]
name = "glob"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
[[package]]
name = "globset"
version = "0.4.15"
@ -1160,6 +1205,7 @@ dependencies = [
"build_html",
"cached",
"clap",
"common-words-all",
"cookie",
"dotenvy",
"fastrand",