From 9d0d15f3f8fe67add8e80aa46708e37f14ba634f Mon Sep 17 00:00:00 2001 From: donpat1to Date: Fri, 21 Nov 2025 00:17:59 +0100 Subject: [PATCH] adding corporate data to webscraper --- .gitignore | 5 +- Cargo.lock | 1301 +++++++++++++++++++++++++++-- Cargo.toml | 44 +- src/config.rs | 38 + src/corporate/mod.rs | 8 + src/corporate/scraper.rs | 116 +++ src/corporate/storage.rs | 64 ++ src/corporate/types.rs | 38 + src/corporate/update.rs | 31 + src/economic/extraction_script.js | 60 ++ src/economic/helpers.rs | 62 ++ src/economic/mod.rs | 11 + src/economic/scraper.rs | 84 ++ src/economic/storage.rs | 113 +++ src/economic/types.rs | 41 + src/economic/update.rs | 116 +++ src/main.rs | 943 +-------------------- src/util.rs | 23 + 18 files changed, 2128 insertions(+), 970 deletions(-) create mode 100644 src/config.rs create mode 100644 src/corporate/mod.rs create mode 100644 src/corporate/scraper.rs create mode 100644 src/corporate/storage.rs create mode 100644 src/corporate/types.rs create mode 100644 src/corporate/update.rs create mode 100644 src/economic/extraction_script.js create mode 100644 src/economic/helpers.rs create mode 100644 src/economic/mod.rs create mode 100644 src/economic/scraper.rs create mode 100644 src/economic/storage.rs create mode 100644 src/economic/types.rs create mode 100644 src/economic/update.rs create mode 100644 src/util.rs diff --git a/.gitignore b/.gitignore index 1e9ae24..8bd3cac 100644 --- a/.gitignore +++ b/.gitignore @@ -24,4 +24,7 @@ target/ /chromedriver-win64/* /economic_events* -/economic_event_changes* \ No newline at end of file +/economic_event_changes* +/corporate_events* +/corporate_prices* +/corporate_event_changes* \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index 328f23b..461af89 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3,17 +3,22 @@ version = 4 [[package]] -name = "WebScraper" -version = "0.1.0" +name = "adler2" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" + +[[package]] +name = "ahash" +version = "0.8.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" dependencies = [ - "anyhow", - "chrono", - "fantoccini", - "futures", - "regex", - "serde", - "serde_json", - "tokio", + "cfg-if", + "getrandom 0.3.4", + "once_cell", + "version_check", + "zerocopy", ] [[package]] @@ -25,6 +30,21 @@ dependencies = [ "memchr", ] +[[package]] +name = "alloc-no-stdlib" +version = "2.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc7bb162ec39d46ab1ca8c77bf72e890535becd1751bb45f64c597edb4c8c6b3" + +[[package]] +name = "alloc-stdlib" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94fb8275041c72129eb51b7d0322c29b8387a0386127718b096429201a5d6ece" +dependencies = [ + "alloc-no-stdlib", +] + [[package]] name = "android_system_properties" version = "0.1.5" @@ -40,6 +60,19 @@ version = "1.0.100" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" +[[package]] +name = "async-compression" +version = "0.4.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93c1f86859c1af3d514fa19e8323147ff10ea98684e6c7b307912509f50e67b2" +dependencies = [ + "compression-codecs", + "compression-core", + "futures-core", + "pin-project-lite", + "tokio", +] + [[package]] name = "atomic-waker" version = "1.1.2" @@ -70,12 +103,39 @@ version = "2.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" +[[package]] +name = "brotli" +version = "8.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4bd8b9603c7aa97359dbd97ecf258968c95f3adddd6db2f7e7a5bef101c84560" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", + "brotli-decompressor", +] + +[[package]] +name = "brotli-decompressor" +version = "5.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "874bb8112abecc98cbd6d81ea4fa7e94fb9449648c93cc89aa40c81c24d7de03" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", +] + [[package]] name = "bumpalo" version = "3.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + [[package]] name = "bytes" version = "1.11.0" @@ -107,10 +167,29 @@ dependencies = [ "iana-time-zone", "js-sys", "num-traits", + "serde", "wasm-bindgen", "windows-link", ] +[[package]] +name = "compression-codecs" +version = "0.4.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "680dc087785c5230f8e8843e2e57ac7c1c90488b6a91b88caa265410568f441b" +dependencies = [ + "brotli", + "compression-core", + "flate2", + "memchr", +] + +[[package]] +name = "compression-core" +version = "0.4.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a9b614a5787ef0c8802a55766480563cb3a93b435898c422ed2a359cf811582" + [[package]] name = "cookie" version = "0.16.2" @@ -123,9 +202,9 @@ dependencies = [ [[package]] name = "cookie" -version = "0.18.1" +version = "0.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ddef33a339a91ea89fb53151bd0a4689cfce27055c291dfa69945475d22c747" +checksum = "7efb37c3e1ccb1ff97164ad95ac1606e8ccd35b3fa0a7d99a304c7f4a428cc24" dependencies = [ "percent-encoding", "time", @@ -148,6 +227,63 @@ version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" +[[package]] +name = "crc32fast" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9481c1c90cbf2ac953f07c8d4a58aa3945c425b7185c9154d67a65e4230da511" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "crossbeam-deque" +version = "0.8.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51" +dependencies = [ + "crossbeam-epoch", + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-epoch" +version = "0.9.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e" +dependencies = [ + "crossbeam-utils", +] + +[[package]] +name = "crossbeam-utils" +version = "0.8.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28" + +[[package]] +name = "cssparser" +version = "0.31.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5b3df4f93e5fbbe73ec01ec8d3f68bba73107993a5b1e7519273c32db9b0d5be" +dependencies = [ + "cssparser-macros", + "dtoa-short", + "itoa", + "phf 0.11.3", + "smallvec", +] + +[[package]] +name = "cssparser-macros" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13b588ba4ac1a99f7f2964d24b3d896ddc6bf847ee3855dbd4366f058cfcd331" +dependencies = [ + "quote", + "syn", +] + [[package]] name = "deranged" version = "0.5.5" @@ -157,6 +293,17 @@ dependencies = [ "powerfmt", ] +[[package]] +name = "derive_more" +version = "0.99.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6edb4b64a43d977b8e99788fe3a04d483834fba1215a7e02caa415b626497f7f" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "displaydoc" version = "0.2.5" @@ -168,6 +315,48 @@ dependencies = [ "syn", ] +[[package]] +name = "dtoa" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6add3b8cff394282be81f3fc1a0605db594ed69890078ca6e2cab1c408bcf04" + +[[package]] +name = "dtoa-short" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cd1511a7b6a56299bd043a9c167a6d2bfb37bf84a6dfceaba651168adfb43c87" +dependencies = [ + "dtoa", +] + +[[package]] +name = "ego-tree" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "12a0bb14ac04a9fcf170d0bbbef949b44cc492f4452bd20c095636956f653642" + +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + +[[package]] +name = "encoding_rs" +version = "0.8.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3" +dependencies = [ + "cfg-if", +] + +[[package]] +name = "equivalent" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" + [[package]] name = "errno" version = "0.3.14" @@ -178,20 +367,38 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "event_backtest_engine" +version = "0.1.0" +dependencies = [ + "anyhow", + "chrono", + "fantoccini", + "futures", + "rayon", + "reqwest", + "scraper", + "serde", + "serde_json", + "tokio", + "tracing", + "tracing-subscriber", +] + [[package]] name = "fantoccini" -version = "0.21.5" +version = "0.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3a6a7a9a454c24453f9807c7f12b37e31ae43f3eb41888ae1f79a9a3e3be3f5" +checksum = "ecae006a07643bcdf9ba8b8a2fe874b98500323ebc83ca9ac7a03824d3e55ee1" dependencies = [ "base64 0.22.1", - "cookie 0.18.1", + "cookie 0.17.0", + "futures-core", "futures-util", - "http 1.3.1", - "http-body-util", - "hyper", - "hyper-tls", - "hyper-util", + "http 0.2.12", + "hyper 0.14.32", + "hyper-rustls 0.24.2", + "hyper-tls 0.5.0", "mime", "openssl", "serde", @@ -214,6 +421,16 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3a3076410a55c90011c298b04d0cfa770b00fa04e1e3c97d3f6c9de105a03844" +[[package]] +name = "flate2" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfe33edd8e85a12a67454e37f8c75e730830d83e313556ab9ebf9ee7fbeb3bfb" +dependencies = [ + "crc32fast", + "miniz_oxide", +] + [[package]] name = "fnv" version = "1.0.7" @@ -244,6 +461,16 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "futf" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df420e2e84819663797d1ec6544b13c5be84629e7bb00dc960d6917db2987843" +dependencies = [ + "mac", + "new_debug_unreachable", +] + [[package]] name = "futures" version = "0.3.31" @@ -333,6 +560,35 @@ dependencies = [ "slab", ] +[[package]] +name = "fxhash" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c31b6d751ae2c7f11320402d34e41349dd1016f8d5d45e48c4312bc8625af50c" +dependencies = [ + "byteorder", +] + +[[package]] +name = "getopts" +version = "0.2.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cfe4fbac503b8d1f88e6676011885f34b7174f46e59956bba534ba83abded4df" +dependencies = [ + "unicode-width", +] + +[[package]] +name = "getrandom" +version = "0.2.16" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" +dependencies = [ + "cfg-if", + "libc", + "wasi", +] + [[package]] name = "getrandom" version = "0.3.4" @@ -345,6 +601,45 @@ dependencies = [ "wasip2", ] +[[package]] +name = "h2" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3c0b69cfcb4e1b9f1bf2f53f95f766e4661169728ec61cd3fe5a0166f2d1386" +dependencies = [ + "atomic-waker", + "bytes", + "fnv", + "futures-core", + "futures-sink", + "http 1.3.1", + "indexmap", + "slab", + "tokio", + "tokio-util", + "tracing", +] + +[[package]] +name = "hashbrown" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" + +[[package]] +name = "html5ever" +version = "0.27.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c13771afe0e6e846f1e67d038d4cb29998a6779f93c809212e4e9c32efd244d4" +dependencies = [ + "log", + "mac", + "markup5ever", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "http" version = "0.2.12" @@ -367,6 +662,17 @@ dependencies = [ "itoa", ] +[[package]] +name = "http-body" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" +dependencies = [ + "bytes", + "http 0.2.12", + "pin-project-lite", +] + [[package]] name = "http-body" version = "1.0.1" @@ -386,7 +692,7 @@ dependencies = [ "bytes", "futures-core", "http 1.3.1", - "http-body", + "http-body 1.0.1", "pin-project-lite", ] @@ -396,6 +702,35 @@ version = "1.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87" +[[package]] +name = "httpdate" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" + +[[package]] +name = "hyper" +version = "0.14.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41dfc780fdec9373c01bae43289ea34c972e40ee3c9f6b3c8801a35f35586ce7" +dependencies = [ + "bytes", + "futures-channel", + "futures-core", + "futures-util", + "http 0.2.12", + "http-body 0.4.6", + "httparse", + "httpdate", + "itoa", + "pin-project-lite", + "socket2 0.5.10", + "tokio", + "tower-service", + "tracing", + "want", +] + [[package]] name = "hyper" version = "1.8.1" @@ -406,8 +741,9 @@ dependencies = [ "bytes", "futures-channel", "futures-core", + "h2", "http 1.3.1", - "http-body", + "http-body 1.0.1", "httparse", "itoa", "pin-project-lite", @@ -417,6 +753,51 @@ dependencies = [ "want", ] +[[package]] +name = "hyper-rustls" +version = "0.24.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec3efd23720e2049821a693cbc7e65ea87c72f1c58ff2f9522ff332b1491e590" +dependencies = [ + "futures-util", + "http 0.2.12", + "hyper 0.14.32", + "log", + "rustls 0.21.12", + "rustls-native-certs", + "tokio", + "tokio-rustls 0.24.1", +] + +[[package]] +name = "hyper-rustls" +version = "0.27.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58" +dependencies = [ + "http 1.3.1", + "hyper 1.8.1", + "hyper-util", + "rustls 0.23.35", + "rustls-pki-types", + "tokio", + "tokio-rustls 0.26.4", + "tower-service", +] + +[[package]] +name = "hyper-tls" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d6183ddfa99b85da61a140bea0efc93fdf56ceaa041b37d553518030827f9905" +dependencies = [ + "bytes", + "hyper 0.14.32", + "native-tls", + "tokio", + "tokio-native-tls", +] + [[package]] name = "hyper-tls" version = "0.6.0" @@ -425,7 +806,7 @@ checksum = "70206fc6890eaca9fde8a0bf71caa2ddfc9fe045ac9e5c70df101a7dbde866e0" dependencies = [ "bytes", "http-body-util", - "hyper", + "hyper 1.8.1", "hyper-util", "native-tls", "tokio", @@ -439,19 +820,24 @@ version = "0.1.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "52e9a2a24dc5c6821e71a7030e1e14b7b632acac55c40e9d2e082c621261bb56" dependencies = [ + "base64 0.22.1", "bytes", "futures-channel", "futures-core", "futures-util", "http 1.3.1", - "http-body", - "hyper", + "http-body 1.0.1", + "hyper 1.8.1", + "ipnet", "libc", + "percent-encoding", "pin-project-lite", - "socket2", + "socket2 0.6.1", + "system-configuration", "tokio", "tower-service", "tracing", + "windows-registry", ] [[package]] @@ -580,6 +966,32 @@ dependencies = [ "icu_properties", ] +[[package]] +name = "indexmap" +version = "2.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0ad4bb2b565bca0645f4d68c5c9af97fba094e9791da685bf83cb5f3ce74acf2" +dependencies = [ + "equivalent", + "hashbrown", +] + +[[package]] +name = "ipnet" +version = "2.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130" + +[[package]] +name = "iri-string" +version = "0.7.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f867b9d1d896b67beb18518eda36fdb77a32ea590de864f1325b294a6d14397" +dependencies = [ + "memchr", + "serde", +] + [[package]] name = "itoa" version = "1.0.15" @@ -596,6 +1008,12 @@ dependencies = [ "wasm-bindgen", ] +[[package]] +name = "lazy_static" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" + [[package]] name = "libc" version = "0.2.177" @@ -629,6 +1047,35 @@ version = "0.4.28" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432" +[[package]] +name = "mac" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c41e0c4fef86961ac6d6f8a82609f55f31b05e4fce149ac5710e439df7619ba4" + +[[package]] +name = "markup5ever" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16ce3abbeba692c8b8441d036ef91aea6df8da2c6b6e21c7e14d3c18e526be45" +dependencies = [ + "log", + "phf 0.11.3", + "phf_codegen 0.11.3", + "string_cache", + "string_cache_codegen", + "tendril", +] + +[[package]] +name = "matchers" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1525a2a28c7f4fa0fc98bb91ae755d1e2d1505079e05539e35bc876b5d65ae9" +dependencies = [ + "regex-automata", +] + [[package]] name = "memchr" version = "2.7.6" @@ -641,6 +1088,16 @@ version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" +[[package]] +name = "miniz_oxide" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" +dependencies = [ + "adler2", + "simd-adler32", +] + [[package]] name = "mio" version = "1.1.0" @@ -669,6 +1126,21 @@ dependencies = [ "tempfile", ] +[[package]] +name = "new_debug_unreachable" +version = "1.0.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "650eef8c711430f1a879fdd01d4745a7deea475becfb90269c06775983bbf086" + +[[package]] +name = "nu-ansi-term" +version = "0.50.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5" +dependencies = [ + "windows-sys 0.61.2", +] + [[package]] name = "num-conv" version = "0.1.0" @@ -763,6 +1235,96 @@ version = "2.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" +[[package]] +name = "phf" +version = "0.10.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fabbf1ead8a5bcbc20f5f8b939ee3f5b0f6f281b6ad3468b84656b658b455259" +dependencies = [ + "phf_shared 0.10.0", +] + +[[package]] +name = "phf" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd6780a80ae0c52cc120a26a1a42c1ae51b247a253e4e06113d23d2c2edd078" +dependencies = [ + "phf_macros", + "phf_shared 0.11.3", +] + +[[package]] +name = "phf_codegen" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4fb1c3a8bc4dd4e5cfce29b44ffc14bedd2ee294559a294e2a4d4c9e9a6a13cd" +dependencies = [ + "phf_generator 0.10.0", + "phf_shared 0.10.0", +] + +[[package]] +name = "phf_codegen" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aef8048c789fa5e851558d709946d6d79a8ff88c0440c587967f8e94bfb1216a" +dependencies = [ + "phf_generator 0.11.3", + "phf_shared 0.11.3", +] + +[[package]] +name = "phf_generator" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d5285893bb5eb82e6aaf5d59ee909a06a16737a8970984dd7746ba9283498d6" +dependencies = [ + "phf_shared 0.10.0", + "rand", +] + +[[package]] +name = "phf_generator" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c80231409c20246a13fddb31776fb942c38553c51e871f8cbd687a4cfb5843d" +dependencies = [ + "phf_shared 0.11.3", + "rand", +] + +[[package]] +name = "phf_macros" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f84ac04429c13a7ff43785d75ad27569f2951ce0ffd30a3321230db2fc727216" +dependencies = [ + "phf_generator 0.11.3", + "phf_shared 0.11.3", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "phf_shared" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096" +dependencies = [ + "siphasher 0.3.11", +] + +[[package]] +name = "phf_shared" +version = "0.11.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67eabc2ef2a60eb7faa00097bd1ffdb5bd28e62bf39990626a582201b7a754e5" +dependencies = [ + "siphasher 1.0.1", +] + [[package]] name = "pin-project-lite" version = "0.2.16" @@ -796,6 +1358,21 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" +[[package]] +name = "ppv-lite86" +version = "0.2.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9" +dependencies = [ + "zerocopy", +] + +[[package]] +name = "precomputed-hash" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" + [[package]] name = "proc-macro2" version = "1.0.103" @@ -820,6 +1397,56 @@ version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" +[[package]] +name = "rand" +version = "0.8.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +dependencies = [ + "libc", + "rand_chacha", + "rand_core", +] + +[[package]] +name = "rand_chacha" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88" +dependencies = [ + "ppv-lite86", + "rand_core", +] + +[[package]] +name = "rand_core" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" +dependencies = [ + "getrandom 0.2.16", +] + +[[package]] +name = "rayon" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f" +dependencies = [ + "either", + "rayon-core", +] + +[[package]] +name = "rayon-core" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "22e18b0f0062d30d4230b2e85ff77fdfe4326feb054b9783a3460d8435c8ab91" +dependencies = [ + "crossbeam-deque", + "crossbeam-utils", +] + [[package]] name = "redox_syscall" version = "0.5.18" @@ -829,18 +1456,6 @@ dependencies = [ "bitflags", ] -[[package]] -name = "regex" -version = "1.12.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4" -dependencies = [ - "aho-corasick", - "memchr", - "regex-automata", - "regex-syntax", -] - [[package]] name = "regex-automata" version = "0.4.13" @@ -858,6 +1473,63 @@ version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" +[[package]] +name = "reqwest" +version = "0.12.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d0946410b9f7b082a427e4ef5c8ff541a88b357bc6c637c40db3a68ac70a36f" +dependencies = [ + "async-compression", + "base64 0.22.1", + "bytes", + "encoding_rs", + "futures-core", + "futures-util", + "h2", + "http 1.3.1", + "http-body 1.0.1", + "http-body-util", + "hyper 1.8.1", + "hyper-rustls 0.27.7", + "hyper-tls 0.6.0", + "hyper-util", + "js-sys", + "log", + "mime", + "native-tls", + "percent-encoding", + "pin-project-lite", + "rustls-pki-types", + "serde", + "serde_json", + "serde_urlencoded", + "sync_wrapper", + "tokio", + "tokio-native-tls", + "tokio-util", + "tower", + "tower-http", + "tower-service", + "url", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", +] + +[[package]] +name = "ring" +version = "0.17.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7" +dependencies = [ + "cc", + "cfg-if", + "getrandom 0.2.16", + "libc", + "untrusted", + "windows-sys 0.52.0", +] + [[package]] name = "rustix" version = "1.1.2" @@ -871,6 +1543,82 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "rustls" +version = "0.21.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3f56a14d1f48b391359b22f731fd4bd7e43c97f3c50eee276f3aa09c94784d3e" +dependencies = [ + "log", + "ring", + "rustls-webpki 0.101.7", + "sct", +] + +[[package]] +name = "rustls" +version = "0.23.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "533f54bc6a7d4f647e46ad909549eda97bf5afc1585190ef692b4286b198bd8f" +dependencies = [ + "once_cell", + "rustls-pki-types", + "rustls-webpki 0.103.8", + "subtle", + "zeroize", +] + +[[package]] +name = "rustls-native-certs" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a9aace74cb666635c918e9c12bc0d348266037aa8eb599b5cba565709a8dff00" +dependencies = [ + "openssl-probe", + "rustls-pemfile", + "schannel", + "security-framework", +] + +[[package]] +name = "rustls-pemfile" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c74cae0a4cf6ccbbf5f359f08efdf8ee7e1dc532573bf0db71968cb56b1448c" +dependencies = [ + "base64 0.21.7", +] + +[[package]] +name = "rustls-pki-types" +version = "1.13.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94182ad936a0c91c324cd46c6511b9510ed16af436d7b5bab34beab0afd55f7a" +dependencies = [ + "zeroize", +] + +[[package]] +name = "rustls-webpki" +version = "0.101.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765" +dependencies = [ + "ring", + "untrusted", +] + +[[package]] +name = "rustls-webpki" +version = "0.103.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ffdfa2f5286e2247234e03f680868ac2815974dc39e00ea15adc445d0aafe52" +dependencies = [ + "ring", + "rustls-pki-types", + "untrusted", +] + [[package]] name = "rustversion" version = "1.0.22" @@ -898,6 +1646,32 @@ version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "scraper" +version = "0.19.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "761fb705fdf625482d2ed91d3f0559dcfeab2798fe2771c69560a774865d0802" +dependencies = [ + "ahash", + "cssparser", + "ego-tree", + "getopts", + "html5ever", + "once_cell", + "selectors", + "tendril", +] + +[[package]] +name = "sct" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414" +dependencies = [ + "ring", + "untrusted", +] + [[package]] name = "security-framework" version = "2.11.1" @@ -921,6 +1695,25 @@ dependencies = [ "libc", ] +[[package]] +name = "selectors" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4eb30575f3638fc8f6815f448d50cb1a2e255b0897985c8c59f4d37b72a07b06" +dependencies = [ + "bitflags", + "cssparser", + "derive_more", + "fxhash", + "log", + "new_debug_unreachable", + "phf 0.10.1", + "phf_codegen 0.10.0", + "precomputed-hash", + "servo_arc", + "smallvec", +] + [[package]] name = "serde" version = "1.0.228" @@ -964,6 +1757,36 @@ dependencies = [ "serde_core", ] +[[package]] +name = "serde_urlencoded" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd" +dependencies = [ + "form_urlencoded", + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "servo_arc" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d036d71a959e00c77a63538b90a6c2390969f9772b096ea837205c6bd0491a44" +dependencies = [ + "stable_deref_trait", +] + +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + [[package]] name = "shlex" version = "1.3.0" @@ -979,6 +1802,24 @@ dependencies = [ "libc", ] +[[package]] +name = "simd-adler32" +version = "0.3.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe" + +[[package]] +name = "siphasher" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" + +[[package]] +name = "siphasher" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" + [[package]] name = "slab" version = "0.4.11" @@ -991,6 +1832,16 @@ version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" +[[package]] +name = "socket2" +version = "0.5.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678" +dependencies = [ + "libc", + "windows-sys 0.52.0", +] + [[package]] name = "socket2" version = "0.6.1" @@ -1007,6 +1858,37 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596" +[[package]] +name = "string_cache" +version = "0.8.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf776ba3fa74f83bf4b63c3dcbbf82173db2632ed8452cb2d891d33f459de70f" +dependencies = [ + "new_debug_unreachable", + "parking_lot", + "phf_shared 0.11.3", + "precomputed-hash", + "serde", +] + +[[package]] +name = "string_cache_codegen" +version = "0.5.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c711928715f1fe0fe509c53b43e993a9a557babc2d0a3567d0a3006f1ac931a0" +dependencies = [ + "phf_generator 0.11.3", + "phf_shared 0.11.3", + "proc-macro2", + "quote", +] + +[[package]] +name = "subtle" +version = "2.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" + [[package]] name = "syn" version = "2.0.110" @@ -1018,6 +1900,15 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "sync_wrapper" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263" +dependencies = [ + "futures-core", +] + [[package]] name = "synstructure" version = "0.13.2" @@ -1029,6 +1920,27 @@ dependencies = [ "syn", ] +[[package]] +name = "system-configuration" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b" +dependencies = [ + "bitflags", + "core-foundation", + "system-configuration-sys", +] + +[[package]] +name = "system-configuration-sys" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e1d1b10ced5ca923a1fcb8d03e96b8d3268065d724548c0211415ff6ac6bac4" +dependencies = [ + "core-foundation-sys", + "libc", +] + [[package]] name = "tempfile" version = "3.23.0" @@ -1036,12 +1948,23 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16" dependencies = [ "fastrand", - "getrandom", + "getrandom 0.3.4", "once_cell", "rustix", "windows-sys 0.61.2", ] +[[package]] +name = "tendril" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d24a120c5fc464a3458240ee02c299ebcb9d67b5249c8848b09d639dca8d7bb0" +dependencies = [ + "futf", + "mac", + "utf-8", +] + [[package]] name = "thiserror" version = "1.0.69" @@ -1062,6 +1985,15 @@ dependencies = [ "syn", ] +[[package]] +name = "thread_local" +version = "1.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185" +dependencies = [ + "cfg-if", +] + [[package]] name = "time" version = "0.3.44" @@ -1115,7 +2047,7 @@ dependencies = [ "parking_lot", "pin-project-lite", "signal-hook-registry", - "socket2", + "socket2 0.6.1", "tokio-macros", "windows-sys 0.61.2", ] @@ -1141,6 +2073,78 @@ dependencies = [ "tokio", ] +[[package]] +name = "tokio-rustls" +version = "0.24.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c28327cf380ac148141087fbfb9de9d7bd4e84ab5d2c28fbc911d753de8a7081" +dependencies = [ + "rustls 0.21.12", + "tokio", +] + +[[package]] +name = "tokio-rustls" +version = "0.26.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1729aa945f29d91ba541258c8df89027d5792d85a8841fb65e8bf0f4ede4ef61" +dependencies = [ + "rustls 0.23.35", + "tokio", +] + +[[package]] +name = "tokio-util" +version = "0.7.17" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2efa149fe76073d6e8fd97ef4f4eca7b67f599660115591483572e406e165594" +dependencies = [ + "bytes", + "futures-core", + "futures-sink", + "pin-project-lite", + "tokio", +] + +[[package]] +name = "tower" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9" +dependencies = [ + "futures-core", + "futures-util", + "pin-project-lite", + "sync_wrapper", + "tokio", + "tower-layer", + "tower-service", +] + +[[package]] +name = "tower-http" +version = "0.6.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "adc82fd73de2a9722ac5da747f12383d2bfdb93591ee6c58486e0097890f05f2" +dependencies = [ + "bitflags", + "bytes", + "futures-util", + "http 1.3.1", + "http-body 1.0.1", + "iri-string", + "pin-project-lite", + "tower", + "tower-layer", + "tower-service", +] + +[[package]] +name = "tower-layer" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e" + [[package]] name = "tower-service" version = "0.3.3" @@ -1154,9 +2158,21 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" dependencies = [ "pin-project-lite", + "tracing-attributes", "tracing-core", ] +[[package]] +name = "tracing-attributes" +version = "0.1.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "tracing-core" version = "0.1.34" @@ -1164,6 +2180,36 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b9d12581f227e93f094d3af2ae690a574abb8a2b9b7a96e7cfe9647b2b617678" dependencies = [ "once_cell", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +dependencies = [ + "log", + "once_cell", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2054a14f5307d601f88daf0553e1cbf472acc4f2c51afab632431cdcd72124d5" +dependencies = [ + "matchers", + "nu-ansi-term", + "once_cell", + "regex-automata", + "sharded-slab", + "smallvec", + "thread_local", + "tracing", + "tracing-core", + "tracing-log", ] [[package]] @@ -1184,6 +2230,18 @@ version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" +[[package]] +name = "unicode-width" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b4ac048d71ede7ee76d585517add45da530660ef4390e49b098733c6e897f254" + +[[package]] +name = "untrusted" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" + [[package]] name = "url" version = "2.5.7" @@ -1196,12 +2254,24 @@ dependencies = [ "serde", ] +[[package]] +name = "utf-8" +version = "0.7.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" + [[package]] name = "utf8_iter" version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" +[[package]] +name = "valuable" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" + [[package]] name = "vcpkg" version = "0.2.15" @@ -1251,6 +2321,19 @@ dependencies = [ "wasm-bindgen-shared", ] +[[package]] +name = "wasm-bindgen-futures" +version = "0.4.55" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "551f88106c6d5e7ccc7cd9a16f312dd3b5d36ea8b4954304657d5dfba115d4a0" +dependencies = [ + "cfg-if", + "js-sys", + "once_cell", + "wasm-bindgen", + "web-sys", +] + [[package]] name = "wasm-bindgen-macro" version = "0.2.105" @@ -1283,6 +2366,16 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "web-sys" +version = "0.3.82" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a1f95c0d03a47f4ae1f7a64643a6bb97465d9b740f0fa8f90ea33915c99a9a1" +dependencies = [ + "js-sys", + "wasm-bindgen", +] + [[package]] name = "webdriver" version = "0.50.0" @@ -1344,6 +2437,17 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" +[[package]] +name = "windows-registry" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "02752bf7fbdcce7f2a27a742f798510f3e5ad88dbe84871e5168e2120c3d5720" +dependencies = [ + "windows-link", + "windows-result", + "windows-strings", +] + [[package]] name = "windows-result" version = "0.4.1" @@ -1362,13 +2466,22 @@ dependencies = [ "windows-link", ] +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets 0.52.6", +] + [[package]] name = "windows-sys" version = "0.60.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" dependencies = [ - "windows-targets", + "windows-targets 0.53.5", ] [[package]] @@ -1380,6 +2493,22 @@ dependencies = [ "windows-link", ] +[[package]] +name = "windows-targets" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" +dependencies = [ + "windows_aarch64_gnullvm 0.52.6", + "windows_aarch64_msvc 0.52.6", + "windows_i686_gnu 0.52.6", + "windows_i686_gnullvm 0.52.6", + "windows_i686_msvc 0.52.6", + "windows_x86_64_gnu 0.52.6", + "windows_x86_64_gnullvm 0.52.6", + "windows_x86_64_msvc 0.52.6", +] + [[package]] name = "windows-targets" version = "0.53.5" @@ -1387,58 +2516,106 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3" dependencies = [ "windows-link", - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_gnullvm", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", + "windows_aarch64_gnullvm 0.53.1", + "windows_aarch64_msvc 0.53.1", + "windows_i686_gnu 0.53.1", + "windows_i686_gnullvm 0.53.1", + "windows_i686_msvc 0.53.1", + "windows_x86_64_gnu 0.53.1", + "windows_x86_64_gnullvm 0.53.1", + "windows_x86_64_msvc 0.53.1", ] +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" + [[package]] name = "windows_aarch64_gnullvm" version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53" +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" + [[package]] name = "windows_aarch64_msvc" version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006" +[[package]] +name = "windows_i686_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" + [[package]] name = "windows_i686_gnu" version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3" +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" + [[package]] name = "windows_i686_gnullvm" version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c" +[[package]] +name = "windows_i686_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" + [[package]] name = "windows_i686_msvc" version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2" +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" + [[package]] name = "windows_x86_64_gnu" version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" + [[package]] name = "windows_x86_64_gnullvm" version = "0.53.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1" +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + [[package]] name = "windows_x86_64_msvc" version = "0.53.1" @@ -1480,6 +2657,26 @@ dependencies = [ "synstructure", ] +[[package]] +name = "zerocopy" +version = "0.8.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43fa6694ed34d6e57407afbccdeecfa268c470a7d2a5b0cf49ce9fcc345afb90" +dependencies = [ + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.8.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c640b22cd9817fae95be82f0d2f90b11f7605f6c319d16705c459b27ac2cbc26" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "zerofrom" version = "0.1.6" @@ -1501,6 +2698,12 @@ dependencies = [ "synstructure", ] +[[package]] +name = "zeroize" +version = "1.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0" + [[package]] name = "zerotrie" version = "0.2.3" diff --git a/Cargo.toml b/Cargo.toml index 7bfc29d..f46ff06 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,14 +1,40 @@ [package] -name = "WebScraper" +name = "event_backtest_engine" version = "0.1.0" -edition = "2024" +edition = "2021" +authors = ["Your Name "] +description = "High-impact economic & corporate earnings data collector for short-event backtesting (overnight/weekend gaps)" +license = "MIT OR Apache-2.0" +repository = "https://github.com/yourname/event_backtest_engine" +keywords = ["finance", "earnings", "economic-calendar", "backtesting", "quant"] +categories = ["finance", "data-structures", "asynchronous"] +# =================================================================== +# Dependencies +# =================================================================== [dependencies] -fantoccini = { version = "0.21.5", default-features = false, features = ["native-tls"] } -tokio = { version = "1", features = ["full"] } -serde = { version = "1", features = ["derive"] } -anyhow = "1.0" -futures = "0.3" +# Async runtime +tokio = { version = "1.38", features = ["full"] } + +# Web scraping & HTTP +reqwest = { version = "0.12", features = ["json", "gzip", "brotli", "deflate"] } +scraper = "0.19" # HTML parsing for Yahoo earnings pages +fantoccini = { version = "0.20", features = ["rustls-tls"] } # Headless Chrome for finanzen.net + +# Serialization +serde = { version = "1.0", features = ["derive"] } serde_json = "1.0" -chrono = "0.4.42" -regex = "1.0" \ No newline at end of file + +# Date & time +chrono = { version = "0.4", features = ["serde"] } + +# Error handling +anyhow = "1.0" + +# Logging (optional but recommended) +tracing = "0.1" +tracing-subscriber = { version = "0.3", features = ["fmt", "env-filter"] } + +# Parallel processing (for batch tickers) +futures = "0.3" +rayon = "1.10" # optional: for parallel price downloads \ No newline at end of file diff --git a/src/config.rs b/src/config.rs new file mode 100644 index 0000000..fc02ec4 --- /dev/null +++ b/src/config.rs @@ -0,0 +1,38 @@ +// src/config.rs +#[derive(Debug, Clone)] +pub struct Config { + // Economic calendar start (usually the earliest available on finanzen.net) + pub economic_start_date: String, // e.g. "2007-02-13" + + // Corporate earnings & price history start + pub corporate_start_date: String, // e.g. "2000-01-01" or "2010-01-01" + + // How far into the future we scrape economic events + pub economic_lookahead_months: u32, // default: 3 +} + +impl Default for Config { + fn default() -> Self { + Self { + economic_start_date: "2007-02-13".to_string(), + corporate_start_date: "2010-01-01".to_string(), + economic_lookahead_months: 3, + } + } +} + +impl Config { + pub fn target_end_date(&self) -> String { + let now = chrono::Local::now().naive_local().date(); + let future = now + chrono::Duration::days(30 * self.economic_lookahead_months as i64); + future.format("%Y-%m-%d").to_string() + } +} + +pub fn get_tickers() -> Vec { + vec![ + "AAPL", "MSFT", "NVDA", "GOOGL", "AMZN", + "TSLA", "META", "JPM", "V", "WMT", + // ... your 100–500 tickers here + ].into_iter().map(String::from).collect() +} \ No newline at end of file diff --git a/src/corporate/mod.rs b/src/corporate/mod.rs new file mode 100644 index 0000000..bf1e410 --- /dev/null +++ b/src/corporate/mod.rs @@ -0,0 +1,8 @@ +// src/corporate/mod.rs +pub mod types; +pub mod scraper; +pub mod storage; +pub mod update; + +pub use types::*; +pub use update::run_full_update; \ No newline at end of file diff --git a/src/corporate/scraper.rs b/src/corporate/scraper.rs new file mode 100644 index 0000000..f7474c5 --- /dev/null +++ b/src/corporate/scraper.rs @@ -0,0 +1,116 @@ +// src/corporate/scraper.rs +use super::types::{CompanyEvent, CompanyPrice}; +use reqwest::Client; +use scraper::{Html, Selector}; +use chrono::{NaiveDate, Datelike}; + +const USER_AGENT: &str = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"; + +pub async fn fetch_earnings_history(ticker: &str) -> anyhow::Result> { + let client = Client::new(); + let url = format!("https://finance.yahoo.com/quote/{ticker}/history?filter=earnings"); + + let text = client + .get(&url) + .header("User-Agent", USER_AGENT) + .send() + .await? + .text() + .await?; + + let document = Html::parse_document(&text); + let row_sel = Selector::parse(r#"table tbody tr"#).unwrap(); + let mut events = Vec::new(); + + for row in document.select(&row_sel) { + let cols: Vec = row.text().map(|s| s.trim().to_string()).collect(); + if cols.len() < 4 { continue; } + + let raw_date = cols[0].split(" - ").next().unwrap_or(&cols[0]); + let date = match parse_yahoo_date(raw_date) { + Ok(d) => d, + Err(_) => continue, + }; + + let eps_forecast = parse_float(&cols[1]); + let eps_actual = parse_float(&cols[2]); + + let surprise_pct = if let (Some(f), Some(a)) = (eps_forecast, eps_actual) { + if f.abs() > 0.001 { Some((a - f) / f.abs() * 100.0) } else { None } + } else { None }; + + let time = if cols[0].contains("After") || cols[0].contains("AMC") { + "AMC".to_string() + } else if cols[0].contains("Before") || cols[0].contains("BMO") { + "BMO".to_string() + } else { + "".to_string() + }; + + events.push(CompanyEvent { + ticker: ticker.to_string(), + date: date.format("%Y-%m-%d").to_string(), + time, + period: cols.get(3).cloned().unwrap_or_default(), + eps_forecast, + eps_actual, + revenue_forecast: None, + revenue_actual: None, + surprise_pct, + source: "Yahoo".to_string(), + }); + } + + Ok(events) +} + +pub async fn fetch_price_history(ticker: &str, start: &str, end: &str) -> anyhow::Result> { + let client = Client::new(); + let start_ts = NaiveDate::parse_from_str(start, "%Y-%m-%d")? + .and_hms_opt(0, 0, 0).unwrap().and_utc() + .timestamp(); + + let end_ts = NaiveDate::parse_from_str(end, "%Y-%m-%d")? + .succ_opt().unwrap() + .and_hms_opt(0, 0, 0).unwrap().and_utc() + .timestamp(); + + let url = format!( + "https://query1.finance.yahoo.com/v7/finance/download/{ticker}?period1={start_ts}&period2={end_ts}&interval=1d&events=history&includeAdjustedClose=true" + ); + + let csv = client + .get(&url) + .header("User-Agent", USER_AGENT) + .send() + .await? + .text() + .await?; + + let mut prices = Vec::new(); + for line in csv.lines().skip(1) { + let cols: Vec<&str> = line.split(',').collect(); + if cols.len() < 7 { continue; } + prices.push(CompanyPrice { + ticker: ticker.to_string(), + date: cols[0].to_string(), + open: cols[1].parse()?, + high: cols[2].parse()?, + low: cols[3].parse()?, + close: cols[4].parse()?, + adj_close: cols[5].parse()?, + volume: cols[6].parse()?, + }); + } + Ok(prices) +} + +fn parse_float(s: &str) -> Option { + s.replace("--", "").replace(",", "").parse::().ok() +} + +fn parse_yahoo_date(s: &str) -> anyhow::Result { + NaiveDate::parse_from_str(s, "%b %d, %Y") + .or_else(|_| NaiveDate::parse_from_str(s, "%B %d, %Y")) + .map_err(|_| anyhow::anyhow!("Bad date: {s}")) +} \ No newline at end of file diff --git a/src/corporate/storage.rs b/src/corporate/storage.rs new file mode 100644 index 0000000..4bcfeca --- /dev/null +++ b/src/corporate/storage.rs @@ -0,0 +1,64 @@ +// src/corporate/storage.rs +use super::types::{CompanyEvent, CompanyPrice}; +use std::collections::{HashMap, HashSet}; +use tokio::fs; +use chrono::{Local, NaiveDate}; + +/// Load all events from disk into a HashMap +async fn load_all_events_map() -> anyhow::Result> { + let mut map = HashMap::new(); + let dir = std::path::Path::new("corporate_events"); + if !dir.exists() { + return Ok(map); + } + + let mut entries = fs::read_dir(dir).await?; + while let Some(entry) = entries.next_entry().await? { + let path = entry.path(); + if path.extension().and_then(|s| s.to_str()) == Some("json") { + let content = fs::read_to_string(&path).await?; + if let Ok(events) = serde_json::from_str::>(&content) { + for event in events { + let key = format!("{}|{}", event.ticker, event.date); + map.insert(key, event); + } + } + } + } + Ok(map) +} + +/// Merge new events with existing ones and save back to disk +pub async fn merge_and_save_events(ticker: &str, new_events: Vec) -> anyhow::Result<()> { + let mut existing = load_all_events_map().await?; + + // Insert or update + for event in new_events { + let key = format!("{}|{}", event.ticker, event.date); + existing.insert(key, event); + } + + // Convert back to Vec and save (simple single file for now) + let all_events: Vec = existing.into_values().collect(); + let dir = std::path::Path::new("corporate_events"); + fs::create_dir_all(dir).await?; + let path = dir.join("all_events.json"); + let json = serde_json::to_string_pretty(&all_events)?; + fs::write(&path, json).await?; + Ok(()) +} + +/// Save price history for a single ticker (overwrite old file) +pub async fn save_prices_for_ticker(ticker: &str, prices: Vec) -> anyhow::Result<()> { + let dir = std::path::Path::new("corporate_prices"); + fs::create_dir_all(dir).await?; + let path = dir.join(format!("{}.json", ticker)); + + // Optional: sort by date + let mut prices = prices; + prices.sort_by_key(|p| p.date.clone()); + + let json = serde_json::to_string_pretty(&prices)?; + fs::write(&path, json).await?; + Ok(()) +} \ No newline at end of file diff --git a/src/corporate/types.rs b/src/corporate/types.rs new file mode 100644 index 0000000..50448cc --- /dev/null +++ b/src/corporate/types.rs @@ -0,0 +1,38 @@ +// src/corporate/types.rs +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +pub struct CompanyEvent { + pub ticker: String, + pub date: String, // YYYY-MM-DD + pub time: String, // "AMC", "BMO", "TAS", or "" + pub period: String, // "Q1 2025", "FY 2024" + pub eps_forecast: Option, + pub eps_actual: Option, + pub revenue_forecast: Option, + pub revenue_actual: Option, + pub surprise_pct: Option, // (actual - forecast) / |forecast| + pub source: String, // "Yahoo" +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct CompanyPrice { + pub ticker: String, + pub date: String, // YYYY-MM-DD + pub open: f64, + pub high: f64, + pub low: f64, + pub close: f64, + pub adj_close: f64, + pub volume: u64, +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct CompanyEventChange { + pub ticker: String, + pub date: String, + pub field: String, // "time", "eps_forecast", "eps_actual", "new_event" + pub old_value: String, + pub new_value: String, + pub detected_at: String, +} \ No newline at end of file diff --git a/src/corporate/update.rs b/src/corporate/update.rs new file mode 100644 index 0000000..1f1655b --- /dev/null +++ b/src/corporate/update.rs @@ -0,0 +1,31 @@ +// src/corporate/update.rs +use super::{scraper::*, storage::*, types::*}; +use crate::config::Config; + +use chrono::Local; +use std::collections::{HashMap, HashSet}; + + +pub async fn run_full_update(tickers: Vec, config: &Config) -> anyhow::Result<()> { + println!("Updating {} tickers (prices from {})", tickers.len(), config.corporate_start_date); + + let today = chrono::Local::now().format("%Y-%m-%d").to_string(); + + for ticker in tickers { + print!(" → {:6} ", ticker); + + // Earnings + if let Ok(events) = fetch_earnings_history(&ticker).await { + merge_and_save_events(&ticker, events.clone()).await?; + println!("{} earnings", events.len()); + } + + // Prices – now using config.corporate_start_date + if let Ok(prices) = fetch_price_history(&ticker, &config.corporate_start_date, &today).await { + save_prices_for_ticker(&ticker, prices).await?; + } + + tokio::time::sleep(tokio::time::Duration::from_millis(250)).await; + } + Ok(()) +} \ No newline at end of file diff --git a/src/economic/extraction_script.js b/src/economic/extraction_script.js new file mode 100644 index 0000000..cefea1b --- /dev/null +++ b/src/economic/extraction_script.js @@ -0,0 +1,60 @@ +// src/economic/extraction_script.js +const events = []; +let currentDate = ''; + +const rows = document.querySelectorAll('#TeletraderForm table tbody tr'); + +for (let i = 0; i < rows.length; i++) { + const row = rows[i]; + const cells = row.querySelectorAll('td'); + + if (cells.length === 1 && cells[0].colSpan === 9) { + const dateText = cells[0].textContent.trim(); + const monthMap = { + 'Januar': '01', 'Februar': '02', 'März': '03', 'April': '04', + 'Mai': '05', 'Juni': '06', 'Juli': '07', 'August': '08', + 'September': '09', 'Oktober': '10', 'November': '11', 'Dezember': '12' + }; + const match = dateText.match(/(\d{1,2})\.\s+([a-zA-ZäöüßÄÖÜ]+)\s+(\d{4})/); + if (match) { + const day = match[1].padStart(2, '0'); + const month = monthMap[match[2]] || '01'; + const year = match[3]; + currentDate = `${year}-${month}-${day}`; + } else { + currentDate = ''; + } + continue; + } + + if (cells.length >= 8) { + const time = cells[0]?.textContent?.trim() || ''; + const country = cells[2]?.textContent?.trim() || ''; + const eventName = cells[4]?.textContent?.trim() || ''; + if (!time || !country || !eventName) continue; + + const yellowStars = cells[3]?.querySelectorAll('.icon--star.font-color-yellow').length || 0; + if (yellowStars !== 3) continue; + + let description = ''; + if (i + 1 < rows.length) { + const next = rows[i + 1]; + const descP = next.querySelector('p'); + if (descP) description = descP.textContent?.trim() || ''; + } + + events.push({ + country, + date: currentDate, + time, + event: eventName, + actual: cells[7]?.textContent?.trim() || '', + forecast: cells[6]?.textContent?.trim() || '', + previous: cells[5]?.textContent?.trim() || '', + importance: 'High', + description + }); + } +} + +return events; \ No newline at end of file diff --git a/src/economic/helpers.rs b/src/economic/helpers.rs new file mode 100644 index 0000000..e2c10cd --- /dev/null +++ b/src/economic/helpers.rs @@ -0,0 +1,62 @@ +// src/economic/helpers.rs +use super::types::*; +use chrono::{Local, NaiveDate}; +use std::collections::{HashMap, HashSet}; + +pub fn event_key(e: &EconomicEvent) -> String { + format!("{}|{}|{}", e.date, e.time, e.event) +} + +pub fn identity_key(e: &EconomicEvent) -> String { + format!("{}|{}|{}", e.country, e.event, e.date.split('-').take(2).collect::>().join("-")) +} + +pub fn build_identity_lookup(events: &HashMap) -> HashMap { + let mut map = HashMap::new(); + for (k, e) in events { + map.insert(identity_key(e), (k.clone(), e.clone())); + } + map +} + +pub fn build_date_event_lookup( + events: &HashMap, +) -> HashMap> { + let mut map: HashMap> = HashMap::new(); + + for (k, e) in events { + let key = format!("{}|{}|{}", e.country, e.event, e.date); + map.entry(key).or_default().push((k.clone(), e.clone())); + } + map +} + +pub fn detect_changes(old: &EconomicEvent, new: &EconomicEvent, today: &str) -> Vec { + let mut changes = Vec::new(); + let ts = Local::now().format("%Y-%m-%d %H:%M:%S").to_string(); + + if new.date.as_str() <= today { return changes; } + + let fields = [ + ("actual", &old.actual, &new.actual), + ("forecast", &old.forecast, &new.forecast), + ("previous", &old.previous, &new.previous), + ("description", &old.description, &new.description), + ]; + + for (field, old_val, new_val) in fields { + if old_val != new_val { + changes.push(EventChange { + date: new.date.clone(), + event: new.event.clone(), + country: new.country.clone(), + change_type: field.to_string(), + field_changed: field.to_string(), + old_value: old_val.clone(), + new_value: new_val.clone(), + detected_at: ts.clone(), + }); + } + } + changes +} \ No newline at end of file diff --git a/src/economic/mod.rs b/src/economic/mod.rs new file mode 100644 index 0000000..16f2619 --- /dev/null +++ b/src/economic/mod.rs @@ -0,0 +1,11 @@ +// src/economic/mod.rs +pub mod types; +pub mod scraper; +pub mod storage; +pub mod update; +pub mod helpers; + +pub use types::*; +pub use scraper::*; +pub use update::run_full_update; +pub use helpers::*; \ No newline at end of file diff --git a/src/economic/scraper.rs b/src/economic/scraper.rs new file mode 100644 index 0000000..e21b064 --- /dev/null +++ b/src/economic/scraper.rs @@ -0,0 +1,84 @@ +// src/economic/scraper.rs +use super::types::{EconomicEvent, ScrapeResult}; +use fantoccini::Client; +use tokio::time::{sleep, Duration}; +use chrono::{Local, NaiveDate}; + +const EXTRACTION_JS: &str = include_str!("extraction_script.js"); + +pub async fn goto_and_prepare(client: &Client) -> anyhow::Result<()> { + client.goto("https://www.finanzen.net/termine/wirtschaftsdaten/").await?; + dismiss_overlays(client).await?; + + if let Ok(tab) = client.find(fantoccini::Locator::Css(r#"div[data-sg-tab-item="teletrader-dates-three-stars"]"#)).await { + tab.click().await?; + println!("High importance tab selected"); + sleep(Duration::from_secs(2)).await; + } + Ok(()) +} + +pub async fn dismiss_overlays(client: &Client) -> anyhow::Result<()> { + for _ in 0..10 { + let removed: bool = client + .execute( + r#"(() => { + const iframe = document.querySelector('iframe[title="Contentpass First Layer"]'); + if (iframe && iframe.parentNode) { + iframe.parentNode.removeChild(iframe); + return true; + } + return false; + })()"#, + vec![], + ) + .await? + .as_bool() + .unwrap_or(false); + if removed { break; } + sleep(Duration::from_millis(500)).await; + } + Ok(()) +} + +pub async fn set_date_range(client: &Client, start: &str, end: &str) -> anyhow::Result<()> { + let script = format!( + r#" + (() => {{ + const from = document.querySelector('#dtTeletraderFromDate'); + const to = document.querySelector('#dtTeletraderEndDate'); + if (from) {{ from.value = '{}'; from.dispatchEvent(new Event('change', {{bubbles: true}})); }} + if (to) {{ to.value = '{}'; to.dispatchEvent(new Event('change', {{bubbles: true}})); }} + return true; + }})() + "#, + start, end + ); + client.execute(&script, vec![]).await?; + sleep(Duration::from_millis(1200)).await; + Ok(()) +} + +pub async fn extract_events(client: &Client) -> anyhow::Result> { + let result = client.execute(EXTRACTION_JS, vec![]).await?; + let array = result.as_array().ok_or_else(|| anyhow::anyhow!("Expected array"))?; + + let mut events = Vec::with_capacity(array.len()); + for val in array { + if let Some(obj) = val.as_object() { + events.push(EconomicEvent { + country: obj["country"].as_str().unwrap_or("").to_string(), + date: obj["date"].as_str().unwrap_or("").to_string(), + time: obj["time"].as_str().unwrap_or("").to_string(), + event: obj["event"].as_str().unwrap_or("").to_string(), + actual: obj["actual"].as_str().unwrap_or("").to_string(), + forecast: obj["forecast"].as_str().unwrap_or("").to_string(), + previous: obj["previous"].as_str().unwrap_or("").to_string(), + importance: "High".to_string(), + description: obj["description"].as_str().unwrap_or("").to_string(), + }); + } + } + println!("Extracted {} high-impact events", events.len()); + Ok(events) +} \ No newline at end of file diff --git a/src/economic/storage.rs b/src/economic/storage.rs new file mode 100644 index 0000000..5ebe8aa --- /dev/null +++ b/src/economic/storage.rs @@ -0,0 +1,113 @@ +use std::collections::HashMap; + +// src/economic/storage.rs +use super::types::*; +use super::helpers::*; +use tokio::fs; +use chrono::{Local, NaiveDate, Datelike}; + +pub async fn scan_existing_chunks() -> anyhow::Result> { + let dir = std::path::Path::new("economic_events"); + let mut chunks = Vec::new(); + + if dir.exists() { + let mut entries = fs::read_dir(dir).await?; + while let Some(entry) = entries.next_entry().await? { + let path = entry.path(); + if path.extension().map(|e| e == "json").unwrap_or(false) { + if let Some(name) = path.file_name().and_then(|n| n.to_str()) { + if name.starts_with("chunk_") { + if let Some(content) = fs::read_to_string(&path).await.ok() { + if let Ok(events) = serde_json::from_str::>(&content) { + let start = name[6..16].to_string(); + let end = name[17..27].to_string(); + chunks.push(ChunkInfo { start_date: start, end_date: end, path, event_count: events.len() }); + } + } + } + } + } + } + } + chunks.sort_by_key(|c| c.start_date.clone()); + Ok(chunks) +} + +pub async fn load_existing_events(chunks: &[ChunkInfo]) -> anyhow::Result> { + let mut map = HashMap::new(); + for chunk in chunks { + let content = fs::read_to_string(&chunk.path).await?; + let events: Vec = serde_json::from_str(&content)?; + for e in events { + map.insert(event_key(&e), e); + } + } + Ok(map) +} + +pub async fn save_optimized_chunks(events: HashMap) -> anyhow::Result<()> { + let dir = std::path::Path::new("economic_events"); + fs::create_dir_all(dir).await?; + + let mut sorted: Vec<_> = events.into_values().collect(); + sorted.sort_by_key(|e| e.date.clone()); + + let mut chunk = Vec::new(); + let mut start: Option = None; + for e in sorted { + let date = NaiveDate::parse_from_str(&e.date, "%Y-%m-%d")?; + if let Some(s) = start { + if (date - s).num_days() > 100 || chunk.len() >= 500 { + save_chunk(&chunk, dir).await?; + chunk.clear(); + start = Some(date); + } + } else { + start = Some(date); + } + chunk.push(e); + } + if !chunk.is_empty() { + save_chunk(&chunk, dir).await?; + } + Ok(()) +} + +async fn save_chunk(events: &[EconomicEvent], dir: &std::path::Path) -> anyhow::Result<()> { + let start = events.iter().map(|e| &e.date).min().unwrap().clone(); + let end = events.iter().map(|e| &e.date).max().unwrap().clone(); + let path = dir.join(format!("chunk_{}_{}.json", start, end)); + fs::write(&path, serde_json::to_string_pretty(events)?).await?; + Ok(()) +} + +pub async fn save_changes(changes: &[EventChange]) -> anyhow::Result<()> { + if changes.is_empty() { return Ok(()); } + let dir = std::path::Path::new("economic_event_changes"); + fs::create_dir_all(dir).await?; + + let mut by_month: HashMap> = HashMap::new(); + for c in changes { + if let Ok(d) = NaiveDate::parse_from_str(&c.date, "%Y-%m-%d") { + let key = format!("{:02}_{}", d.month(), d.year()); + by_month.entry(key).or_default().push(c.clone()); + } + } + + for (month, list) in by_month { + let path = dir.join(format!("event_changes_{}.json", month)); + let mut all = if path.exists() { + let s = fs::read_to_string(&path).await?; + serde_json::from_str(&s).unwrap_or_default() + } else { vec![] }; + all.extend(list); + fs::write(&path, serde_json::to_string_pretty(&all)?).await?; + } + Ok(()) +} + +pub fn target_end_date() -> String { + let now = Local::now().naive_local().date(); + let future = now + chrono::Duration::days(90); + future.format("%Y-%m-%d").to_string() +} \ No newline at end of file diff --git a/src/economic/types.rs b/src/economic/types.rs new file mode 100644 index 0000000..2934bcd --- /dev/null +++ b/src/economic/types.rs @@ -0,0 +1,41 @@ +// src/economic/types.rs +use serde::{Deserialize, Serialize}; + +#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq, Hash)] +pub struct EconomicEvent { + pub country: String, + pub date: String, // YYYY-MM-DD + pub time: String, + pub event: String, + pub actual: String, + pub forecast: String, + pub previous: String, + pub importance: String, + pub description: String, +} + +#[derive(Debug, Serialize, Deserialize, Clone)] +pub struct EventChange { + pub date: String, + pub event: String, + pub country: String, + pub change_type: String, // actual|forecast|time|newly_added|removed + pub field_changed: String, + pub old_value: String, + pub new_value: String, + pub detected_at: String, +} + +#[derive(Debug)] +pub struct ChunkInfo { + pub start_date: String, + pub end_date: String, + pub path: std::path::PathBuf, + pub event_count: usize, +} + +#[derive(Debug)] +pub struct ScrapeResult { + pub changes: Vec, + pub removed_keys: std::collections::HashSet, +} \ No newline at end of file diff --git a/src/economic/update.rs b/src/economic/update.rs new file mode 100644 index 0000000..d30b56b --- /dev/null +++ b/src/economic/update.rs @@ -0,0 +1,116 @@ +// src/economic/update.rs +use super::{scraper::*, storage::*, helpers::*, types::*}; +use crate::config::Config; +use chrono::{Local, NaiveDate}; + +pub async fn run_full_update(client: &fantoccini::Client, config: &Config) -> anyhow::Result<()> { + let today_str = chrono::Local::now().date_naive().format("%Y-%m-%d").to_string(); + let end_date = config.target_end_date(); + + let chunks = scan_existing_chunks().await?; + let mut events = load_existing_events(&chunks).await?; + println!("Loaded {} events from {} chunks", events.len(), chunks.len()); + + let start_date = if events.is_empty() { + config.economic_start_date.clone() + } else if events.values().any(|e| e.date >= today_str) { + today_str.clone() + } else { + events.values() + .filter_map(|e| chrono::NaiveDate::parse_from_str(&e.date, "%Y-%m-%d").ok()) + .max() + .and_then(|d| d.succ_opt()) + .map(|d| d.format("%Y-%m-%d").to_string()) + .unwrap_or(today_str.clone()) + }; + + println!("Scraping economic events: {} → {}", start_date, end_date); + + let mut current = start_date; + let mut total_changes = 0; + + while current <= end_date { + set_date_range(client, ¤t, &end_date).await?; + tokio::time::sleep(tokio::time::Duration::from_secs(3)).await; + + let new_events = extract_events(client).await?; + if new_events.is_empty() { break; } + + let result = process_batch(&new_events, &mut events, &today_str); + total_changes += result.changes.len(); + save_changes(&result.changes).await?; + + let next = new_events.iter() + .filter_map(|e| chrono::NaiveDate::parse_from_str(&e.date, "%Y-%m-%d").ok()) + .max() + .and_then(|d| d.succ_opt()) + .map(|d| d.format("%Y-%m-%d").to_string()) + .unwrap_or(end_date.clone()); + + if next > end_date { break; } + current = next; + } + + save_optimized_chunks(events).await?; + println!("Economic update complete — {} changes detected", total_changes); + Ok(()) +} + +pub fn process_batch( + new_events: &[EconomicEvent], + existing: &mut std::collections::HashMap, + today: &str, +) -> ScrapeResult { + let mut changes = Vec::new(); + let mut removed = std::collections::HashSet::new(); + + let identity_map = build_identity_lookup(existing); + let date_map = build_date_event_lookup(existing); + + for new in new_events { + let key = event_key(new); + + if let Some(old) = existing.get(&key) { + changes.extend(detect_changes(old, new, today)); + existing.insert(key, new.clone()); + continue; + } + + let date_key = format!("{}|{}|{}", new.country, new.event, new.date); + if let Some(occurrences) = date_map.get(&date_key) { + if let Some((old_key, old_event)) = occurrences.iter().find(|(k, _)| *k != key) { + if new.date.as_str() > today { + changes.push(EventChange { + date: new.date.clone(), + event: new.event.clone(), + country: new.country.clone(), + change_type: "time".to_string(), + field_changed: "time".to_string(), + old_value: old_event.time.clone(), + new_value: new.time.clone(), + detected_at: Local::now().format("%Y-%m-%d %H:%M:%S").to_string(), + }); + } + removed.insert(old_key.clone()); + existing.remove(old_key); + } + } + + if new.date.as_str() > today { + changes.push(EventChange { + date: new.date.clone(), + event: new.event.clone(), + country: new.country.clone(), + change_type: "newly_added".to_string(), + field_changed: "new_event".to_string(), + old_value: "".to_string(), + new_value: format!("{} @ {}", new.date, new.time), + detected_at: Local::now().format("%Y-%m-%d %H:%M:%S").to_string(), + }); + } + + existing.insert(key, new.clone()); + } + + ScrapeResult { changes, removed_keys: removed } +} \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index 92264b2..057ee06 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,930 +1,51 @@ -use chrono::{Datelike, Local, NaiveDate}; +// src/main.rs +mod economic; +mod corporate; +mod config; +mod util; + use fantoccini::{ClientBuilder, Locator}; -use serde::{Deserialize, Serialize}; -use serde_json::{Map, Value}; -use std::{ - collections::{HashMap, HashSet}, - path::PathBuf, - process::Command, -}; -use tokio::{ - fs, signal, - time::{Duration, sleep}, -}; - -#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq, Hash)] -struct EconomicEvent { - country: String, - date: String, - time: String, - event: String, - actual: String, - forecast: String, - previous: String, - importance: String, - description: String, -} - -#[derive(Debug, Serialize, Deserialize, Clone)] -struct EventChange { - date: String, - event: String, - country: String, - change_type: String, // date | time | forecast | previous | actual | description | newly_added - field_changed: String, - old_value: String, - new_value: String, - detected_at: String, -} - -#[derive(Debug)] -struct ChunkInfo { - start_date: String, - end_date: String, - path: PathBuf, - event_count: usize, -} - -#[derive(Debug)] -struct ScrapeResult { - changes: Vec, - removed_keys: HashSet, // Keys of events that were removed (rescheduled) -} - -fn start_chromedriver(port: u16) -> std::process::Child { - Command::new("chromedriver-win64/chromedriver.exe") - .args(&[format!("--port={}", port)]) - .spawn() - .expect("Failed to start ChromeDriver") -} - -async fn dismiss_overlays(client: &fantoccini::Client) -> anyhow::Result<()> { - for _ in 0..10 { - let removed: bool = client - .execute( - r#"(() => { - const iframe = document.querySelector('iframe[title="Contentpass First Layer"]'); - if (iframe && iframe.parentNode) { - iframe.parentNode.removeChild(iframe); - return true; - } - return false; - })()"#, - vec![], - ) - .await? - .as_bool() - .unwrap_or(false); - - if removed { - break; - } - sleep(Duration::from_millis(500)).await; - } - Ok(()) -} - -async fn extract_all_data_via_js( - client: &fantoccini::Client, -) -> anyhow::Result> { - let extraction_script = r#" - const events = []; - let currentDate = ''; - - const rows = document.querySelectorAll('#TeletraderForm table tbody tr'); - - for (let i = 0; i < rows.length; i++) { - const row = rows[i]; - const cells = row.querySelectorAll('td'); - - if (cells.length === 1 && cells[0].colSpan === 9) { - const dateText = cells[0].textContent.trim(); - - const monthMap = { - 'Januar': '01', 'Februar': '02', 'März': '03', 'April': '04', - 'Mai': '05', 'Juni': '06', 'Juli': '07', 'August': '08', - 'September': '09', 'Oktober': '10', 'November': '11', 'Dezember': '12' - }; - - const dateParts = dateText.match(/(\d{1,2})\.\s+([a-zA-ZäöüßÄÖÜ]+)\s+(\d{4})/); - if (dateParts) { - const day = dateParts[1].padStart(2, '0'); - const germanMonth = dateParts[2]; - const year = dateParts[3]; - const month = monthMap[germanMonth] || '01'; - currentDate = `${year}-${month}-${day}`; - } else { - currentDate = ''; - } - continue; - } - - if (cells.length >= 8) { - const time = cells[0]?.textContent?.trim() || ''; - const country = cells[2]?.textContent?.trim() || ''; - const eventName = cells[4]?.textContent?.trim() || ''; - - if (!time || !country || !eventName) continue; - - const importanceCell = cells[3]; - const yellowStarCount = importanceCell?.querySelectorAll('.icon--star.font-color-yellow').length || 0; - - if (yellowStarCount === 3) { - let description = ''; - if (i + 1 < rows.length) { - const nextRow = rows[i + 1]; - const nextCells = nextRow.querySelectorAll('td'); - if (nextCells.length === 1 || nextCells[0].colSpan === 8) { - const descPara = nextRow.querySelector('p'); - if (descPara) { - description = descPara.textContent?.trim() || ''; - } - } - } - - events.push({ - country: country, - date: currentDate, - time: time, - event: eventName, - actual: cells[7]?.textContent?.trim() || '', - forecast: cells[6]?.textContent?.trim() || '', - previous: cells[5]?.textContent?.trim() || '', - importance: 'High', - description: description - }); - } - } - } - - return events; - "#; - - let result = client.execute(extraction_script, vec![]).await?; - - if let Some(events_array) = result.as_array() { - let mut events = Vec::new(); - for event_value in events_array { - if let Some(event_obj) = event_value.as_object() { - let event = EconomicEvent { - country: event_obj - .get("country") - .and_then(|v| v.as_str()) - .unwrap_or("") - .to_string(), - date: event_obj - .get("date") - .and_then(|v| v.as_str()) - .unwrap_or("") - .to_string(), - time: event_obj - .get("time") - .and_then(|v| v.as_str()) - .unwrap_or("") - .to_string(), - event: event_obj - .get("event") - .and_then(|v| v.as_str()) - .unwrap_or("") - .to_string(), - actual: event_obj - .get("actual") - .and_then(|v| v.as_str()) - .unwrap_or("") - .to_string(), - forecast: event_obj - .get("forecast") - .and_then(|v| v.as_str()) - .unwrap_or("") - .to_string(), - previous: event_obj - .get("previous") - .and_then(|v| v.as_str()) - .unwrap_or("") - .to_string(), - importance: event_obj - .get("importance") - .and_then(|v| v.as_str()) - .unwrap_or("") - .to_string(), - description: event_obj - .get("description") - .and_then(|v| v.as_str()) - .unwrap_or("") - .to_string(), - }; - events.push(event); - } - } - println!("Extracted {} events (3 YELLOW stars ONLY)", events.len()); - return Ok(events); - } - - Ok(vec![]) -} - -async fn set_date_range(client: &fantoccini::Client, start: &str, end: &str) -> anyhow::Result<()> { - let set_dates_script = format!( - r#" - (() => {{ - const fromInput = document.querySelector('#dtTeletraderFromDate'); - const toInput = document.querySelector('#dtTeletraderEndDate'); - - if (fromInput) {{ - fromInput.value = '{}'; - fromInput.dispatchEvent(new Event('input', {{ bubbles: true }})); - fromInput.dispatchEvent(new Event('change', {{ bubbles: true }})); - }} - - if (toInput) {{ - toInput.value = '{}'; - toInput.dispatchEvent(new Event('input', {{ bubbles: true }})); - toInput.dispatchEvent(new Event('change', {{ bubbles: true }})); - }} - - return !!fromInput && !!toInput; - }})() - "#, - start, end - ); - - client.execute(&set_dates_script, vec![]).await?; - sleep(Duration::from_millis(1000)).await; - - Ok(()) -} - -fn parse_date(date: &str) -> Option { - NaiveDate::parse_from_str(date.trim(), "%Y-%m-%d").ok() -} - -fn calculate_next_start_date(events: &[EconomicEvent]) -> anyhow::Result { - let mut dates: Vec<_> = events.iter().filter_map(|e| parse_date(&e.date)).collect(); - - if dates.is_empty() { - return Err(anyhow::anyhow!("No parseable dates found")); - } - - dates.sort(); - let next = dates.last().unwrap().succ_opt().unwrap(); - - Ok(next.format("%Y-%m-%d").to_string()) -} - -/// Storage key: date|time|event (for exact occurrence deduplication) -fn event_lookup_key(event: &EconomicEvent) -> String { - format!("{}|{}|{}", event.date, event.time, event.event) -} - -/// Identity key: country|event|date (to distinguish recurring monthly/quarterly events) -/// This prevents treating December and January releases of the same recurring event as reschedules -fn event_identity_key(event: &EconomicEvent) -> String { - format!("{}|{}|{}", event.country, event.event, event.date) -} - -/// Compare two events and detect changes in future data -fn detect_changes(old: &EconomicEvent, new: &EconomicEvent, now: &str) -> Vec { - let mut changes = Vec::new(); - let timestamp = Local::now().format("%Y-%m-%d %H:%M:%S").to_string(); - - println!( - "🔍 Checking event: {} on {} (now: {})", - new.event, new.date, now - ); - - // Only track changes for future events - if new.date.as_str() <= now { - println!(" ⏭️ Skipped: Event is in the past/today"); - return changes; - } - - println!(" 📅 Event is in the future - checking for changes..."); - - let fields = [ - ("actual", "actual", &old.actual, &new.actual), - ("forecast", "forecast", &old.forecast, &new.forecast), - ("previous", "previous", &old.previous, &new.previous), - ("description", "description", &old.description, &new.description), - ]; - - for (field_name, change_type, old_val, new_val) in fields { - if old_val != new_val { - println!( - " 📝 CHANGE DETECTED in '{}': '{}' -> '{}'", - field_name, old_val, new_val - ); - changes.push(EventChange { - date: new.date.clone(), - event: new.event.clone(), - country: new.country.clone(), - change_type: change_type.to_string(), - field_changed: field_name.to_string(), - old_value: old_val.to_string(), - new_value: new_val.to_string(), - detected_at: timestamp.clone(), - }); - } - } - - if changes.is_empty() { - println!(" ✅ No changes detected"); - } else { - println!(" 🎯 Total changes: {}", changes.len()); - } - - changes -} - -/// Build identity lookup map: finds most recent occurrence of each event by identity -/// Identity now includes date to distinguish recurring events (e.g., monthly GDP releases) -fn build_identity_lookup( - events: &HashMap, -) -> HashMap { - let mut identity_map: HashMap = HashMap::new(); - - for (lookup_key, event) in events { - let identity = event_identity_key(event); - identity_map.insert(identity, (lookup_key.clone(), event.clone())); - } - - identity_map -} - -/// Build a separate lookup for detecting time-only changes (same date, different time) -fn build_date_event_lookup( - events: &HashMap, -) -> HashMap> { - let mut date_event_map: HashMap> = HashMap::new(); - - for (lookup_key, event) in events { - let key = format!("{}|{}|{}", event.country, event.event, event.date); - date_event_map.entry(key).or_default().push((lookup_key.clone(), event.clone())); - } - - date_event_map -} - -/// Scan the economic_events directory for existing chunks -async fn scan_existing_chunks() -> anyhow::Result> { - let events_dir = PathBuf::from("economic_events"); - - if !events_dir.exists() { - fs::create_dir_all(&events_dir).await?; - println!("📁 Created economic_events directory"); - return Ok(vec![]); - } - - let mut chunks = Vec::new(); - let mut entries = fs::read_dir(&events_dir).await?; - - while let Some(entry) = entries.next_entry().await? { - let path = entry.path(); - if path.extension().and_then(|s| s.to_str()) == Some("json") { - if let Some(filename) = path.file_stem().and_then(|s| s.to_str()) { - if let Some(dates) = filename.strip_prefix("chunk_") { - let parts: Vec<&str> = dates.split('_').collect(); - if parts.len() == 2 { - if let Ok(content) = fs::read_to_string(&path).await { - if let Ok(events) = serde_json::from_str::>(&content) - { - chunks.push(ChunkInfo { - start_date: parts[0].to_string(), - end_date: parts[1].to_string(), - path: path.clone(), - event_count: events.len(), - }); - } - } - } - } - } - } - } - - chunks.sort_by(|a, b| a.start_date.cmp(&b.start_date)); - - if !chunks.is_empty() { - println!("\n📊 Found {} existing chunks:", chunks.len()); - for chunk in &chunks { - println!( - " • {} to {} ({} events)", - chunk.start_date, chunk.end_date, chunk.event_count - ); - } - } else { - println!("🔭 No existing chunks found"); - } - - Ok(chunks) -} - -/// Calculate target end date: first day of month, 3 months from now -fn calculate_target_end_date() -> String { - let now = Local::now().naive_local().date(); - let three_months_ahead = if now.month() + 3 > 12 { - NaiveDate::from_ymd_opt(now.year() + 1, (now.month() + 3) % 12, 1) - } else { - NaiveDate::from_ymd_opt(now.year(), now.month() + 3, 1) - } - .unwrap(); - - three_months_ahead.format("%Y-%m-%d").to_string() -} - -/// Load all events from existing chunks into a HashMap -async fn load_existing_events( - chunks: &[ChunkInfo], -) -> anyhow::Result> { - let mut event_map = HashMap::new(); - - for chunk in chunks { - if let Ok(content) = fs::read_to_string(&chunk.path).await { - if let Ok(events) = serde_json::from_str::>(&content) { - for event in events { - event_map.insert(event_lookup_key(&event), event); - } - } - } - } - - println!("📥 Loaded {} events from existing chunks", event_map.len()); - Ok(event_map) -} - -/// Save or append changes to monthly change files -async fn save_changes(changes: &[EventChange]) -> anyhow::Result<()> { - if changes.is_empty() { - println!("ℹ️ No changes to save"); - return Ok(()); - } - - println!("\n💾 Saving {} changes...", changes.len()); - - let changes_dir = PathBuf::from("economic_event_changes"); - fs::create_dir_all(&changes_dir).await?; - - // Group changes by month - let mut changes_by_month: HashMap> = HashMap::new(); - - for change in changes { - if let Some(date) = parse_date(&change.date) { - let month_key = format!("{:02}_{}", date.month(), date.year()); - changes_by_month - .entry(month_key) - .or_default() - .push(change.clone()); - } - } - - println!("📂 Grouped into {} month(s)", changes_by_month.len()); - - // Save each month's changes - for (month_key, month_changes) in changes_by_month { - let filename = format!("event_changes_{}.json", month_key); - let filepath = changes_dir.join(&filename); - - // Load existing changes if file exists - let existing_count = if filepath.exists() { - let content = fs::read_to_string(&filepath).await?; - serde_json::from_str::>(&content) - .unwrap_or_default() - .len() - } else { - 0 - }; - - let mut all_changes = if filepath.exists() { - let content = fs::read_to_string(&filepath).await?; - serde_json::from_str::>(&content).unwrap_or_default() - } else { - Vec::new() - }; - - // Append new changes - all_changes.extend(month_changes.clone()); - - // Save combined changes - let json = serde_json::to_string_pretty(&all_changes)?; - fs::write(&filepath, json).await?; - - println!( - " ✅ {}: {} existing + {} new = {} total changes", - filename, - existing_count, - month_changes.len(), - all_changes.len() - ); - } - - Ok(()) -} - -/// Reorganize events into optimal chunks and save them -async fn save_optimized_chunks(events: HashMap) -> anyhow::Result<()> { - if events.is_empty() { - return Ok(()); - } - - let events_dir = PathBuf::from("economic_events"); - fs::create_dir_all(&events_dir).await?; - - // Convert to sorted vector - let mut all_events: Vec = events.into_values().collect(); - all_events.sort_by(|a, b| a.date.cmp(&b.date)); - - // Group events by date ranges (chunks of ~100 days or similar) - let mut chunks: Vec> = Vec::new(); - let mut current_chunk = Vec::new(); - let mut current_start_date: Option = None; - - for event in all_events { - if let Some(event_date) = parse_date(&event.date) { - if let Some(start) = current_start_date { - // Start new chunk if we've gone 100+ days or have 500+ events - if (event_date - start).num_days() > 100 || current_chunk.len() >= 500 { - chunks.push(current_chunk); - current_chunk = Vec::new(); - current_start_date = Some(event_date); - } - } else { - current_start_date = Some(event_date); - } - current_chunk.push(event); - } - } - - if !current_chunk.is_empty() { - chunks.push(current_chunk); - } - - // Delete old chunk files - let mut entries = fs::read_dir(&events_dir).await?; - while let Some(entry) = entries.next_entry().await? { - let path = entry.path(); - if let Some(filename) = path.file_stem().and_then(|s| s.to_str()) { - if filename.starts_with("chunk_") { - fs::remove_file(&path).await?; - } - } - } - - // Save new optimized chunks - for chunk in chunks { - if chunk.is_empty() { - continue; - } - - let start = chunk - .iter() - .filter_map(|e| parse_date(&e.date)) - .min() - .unwrap() - .format("%Y-%m-%d") - .to_string(); - - let end = chunk - .iter() - .filter_map(|e| parse_date(&e.date)) - .max() - .unwrap() - .format("%Y-%m-%d") - .to_string(); - - let filename = format!("chunk_{}_{}.json", start, end); - let filepath = events_dir.join(&filename); - - let json = serde_json::to_string_pretty(&chunk)?; - fs::write(&filepath, json).await?; - - println!( - "💾 Saved optimized chunk: {} ({} events)", - filename, - chunk.len() - ); - } - - Ok(()) -} - -/// Scrape and update data with change tracking -async fn scrape_and_update( - client: &fantoccini::Client, - start: &str, - end: &str, - existing_events: &mut HashMap, -) -> anyhow::Result { - println!("\n🎯 Scraping range: {} to {}", start, end); - - let mut current_start = start.to_string(); - let mut all_changes = Vec::new(); - let mut all_removed_keys = HashSet::new(); - let now = Local::now() - .naive_local() - .date() - .format("%Y-%m-%d") - .to_string(); - - println!("📅 Current date for comparison: {}", now); - println!("🔍 Starting change detection...\n"); - - loop { - set_date_range(client, ¤t_start, end).await?; - sleep(Duration::from_secs(3)).await; - - let events = extract_all_data_via_js(client).await?; - if events.is_empty() { - println!(" ✅ No more events in this range"); - break; - } - - println!(" 📦 Fetched {} events", events.len()); - - // Build lookups for existing events - let identity_lookup = build_identity_lookup(existing_events); - let date_event_lookup = build_date_event_lookup(existing_events); - let mut events_to_remove: Vec = Vec::new(); - - // Process events: detect changes and update map - let mut new_events_count = 0; - let mut updated_events_count = 0; - let mut time_changed_events_count = 0; - - for new_event in events.clone() { - let lookup_key = event_lookup_key(&new_event); - let identity_key = event_identity_key(&new_event); - - // CASE A: Exact match (same date/time/event) - if let Some(old_event) = existing_events.get(&lookup_key) { - println!("\n 🔎 Comparing existing event:"); - println!(" Event: {}", new_event.event); - println!(" Date: {} | Time: {}", new_event.date, new_event.time); - - let changes = detect_changes(old_event, &new_event, &now); - if !changes.is_empty() { - println!(" ✨ {} change(s) detected and recorded!", changes.len()); - all_changes.extend(changes); - updated_events_count += 1; - } - - // CRITICAL: Always update the event in the map with latest data - existing_events.insert(lookup_key, new_event); - continue; - } - - // CASE B: Check if time changed for same date/event - let date_event_key = format!("{}|{}|{}", new_event.country, new_event.event, new_event.date); - if let Some(existing_occurrences) = date_event_lookup.get(&date_event_key) { - // Find if there's an existing event with different time - if let Some((old_lookup_key, old_event)) = existing_occurrences.iter() - .find(|(key, _)| key != &lookup_key) { - - println!("\n 🕐 TIME CHANGE DETECTED:"); - println!(" Event: {}", new_event.event); - println!(" Date: {}", new_event.date); - println!(" Old time: {} | New time: {}", old_event.time, new_event.time); - - // Track time change - if new_event.date.as_str() > now.as_str() { - let timestamp = Local::now().format("%Y-%m-%d %H:%M:%S").to_string(); - all_changes.push(EventChange { - date: new_event.date.clone(), - event: new_event.event.clone(), - country: new_event.country.clone(), - change_type: "time".to_string(), - field_changed: "time".to_string(), - old_value: old_event.time.clone(), - new_value: new_event.time.clone(), - detected_at: timestamp, - }); - - println!(" 📝 Time change recorded"); - } - - // Check for other field changes too - let field_changes = detect_changes(old_event, &new_event, &now); - if !field_changes.is_empty() { - println!( - " ✨ {} additional field change(s) detected!", - field_changes.len() - ); - all_changes.extend(field_changes); - } - - // Remove old occurrence and add new one - events_to_remove.push(old_lookup_key.clone()); - all_removed_keys.insert(old_lookup_key.clone()); - existing_events.insert(lookup_key, new_event); - time_changed_events_count += 1; - continue; - } - } - - // CASE C: New event - new_events_count += 1; - println!( - " ➕ New event: {} on {} @ {}", - new_event.event, new_event.date, new_event.time - ); - - // Track as newly added if it's a future event - if new_event.date.as_str() > now.as_str() { - let timestamp = Local::now().format("%Y-%m-%d %H:%M:%S").to_string(); - all_changes.push(EventChange { - date: new_event.date.clone(), - event: new_event.event.clone(), - country: new_event.country.clone(), - change_type: "newly_added".to_string(), - field_changed: "new_event".to_string(), - old_value: "".to_string(), - new_value: format!("{} @ {}", new_event.date, new_event.time), - detected_at: timestamp, - }); - } - - existing_events.insert(lookup_key, new_event); - } - - // Remove old occurrences of time-changed events - for key in events_to_remove { - existing_events.remove(&key); - } - - println!("\n 📊 Batch summary:"); - println!(" New events: {}", new_events_count); - println!(" Updated events: {}", updated_events_count); - println!(" Time changed events: {}", time_changed_events_count); - println!(" Changes tracked: {}", all_changes.len()); - - let next = match calculate_next_start_date(&events) { - Ok(n) => n, - Err(_) => { - println!(" ⚠️ Cannot calculate next date, stopping"); - break; - } - }; - - if next > end.to_string() { - println!(" ✅ Reached end of range"); - break; - } - - current_start = next; - sleep(Duration::from_secs(2)).await; - } - - println!("\n🎯 SCRAPE COMPLETE:"); - println!(" Total changes detected: {}", all_changes.len()); - println!(" Total events removed (time changes): {}", all_removed_keys.len()); - - Ok(ScrapeResult { - changes: all_changes, - removed_keys: all_removed_keys, - }) -} - -/// Main logic with intelligent update handling -async fn run_intelligent_update(client: &fantoccini::Client) -> anyhow::Result<()> { - let now = Local::now() - .naive_local() - .date() - .format("%Y-%m-%d") - .to_string(); - let target_end = calculate_target_end_date(); - - println!("📅 Today: {}", now); - println!("🎯 Target end date: {}", target_end); - - // Load existing chunks - let chunks = scan_existing_chunks().await?; - let mut existing_events = load_existing_events(&chunks).await?; - - if existing_events.is_empty() { - // No existing data - full scrape from beginning - println!("\n🔭 No existing data - starting fresh scrape from 2007-02-13"); - let result = - scrape_and_update(client, "2007-02-13", &target_end, &mut existing_events).await?; - save_changes(&result.changes).await?; - save_optimized_chunks(existing_events).await?; - return Ok(()); - } - - // Find date range of existing data - let dates: Vec = existing_events - .values() - .filter_map(|e| parse_date(&e.date)) - .collect(); - - let min_date = dates.iter().min().unwrap().format("%Y-%m-%d").to_string(); - let max_date = dates.iter().max().unwrap().format("%Y-%m-%d").to_string(); - - println!("📊 Existing data range: {} to {}", min_date, max_date); - - // Determine update strategy - if max_date < now { - // Case 1: Data is in the past, need to update from max_date to target - let next_start = parse_date(&max_date) - .and_then(|d| d.succ_opt()) - .map(|d| d.format("%Y-%m-%d").to_string()) - .unwrap_or(max_date); - - println!( - "\n📈 Updating from end of existing data: {} to {}", - next_start, target_end - ); - let result = - scrape_and_update(client, &next_start, &target_end, &mut existing_events).await?; - save_changes(&result.changes).await?; - save_optimized_chunks(existing_events).await?; - } else if max_date >= now { - // Case 2: Data extends to or beyond today, refresh future data - println!( - "\n🔄 Data exists up to today - refreshing future data: {} to {}", - now, target_end - ); - - // CRITICAL FIX: Pass the actual existing_events HashMap directly - // This ensures all updates (including rescheduled events) are properly handled - let result = scrape_and_update(client, &now, &target_end, &mut existing_events).await?; - - save_changes(&result.changes).await?; - - // The existing_events HashMap is already updated in-place by scrape_and_update - // Just save the optimized chunks - save_optimized_chunks(existing_events).await?; - } - - println!("\n✅ Update complete!"); - Ok(()) -} +use tokio::signal; #[tokio::main] async fn main() -> anyhow::Result<()> { - let port = 9515; - let mut chromedriver = start_chromedriver(port); - sleep(Duration::from_secs(1)).await; + // === Ensure data directories exist === + util::ensure_data_dirs().await?; - let caps_value = serde_json::json!({ - "goog:chromeOptions": { - "args": [ - "--disable-gpu", - "--disable-notifications", - "--disable-popup-blocking", - "--disable-blink-features=AutomationControlled", - ], - "excludeSwitches": ["enable-automation"] - } - }); + // === Load configuration === + let config = config::Config::default(); - let caps_map: Map = caps_value - .as_object() - .expect("Capabilities should be a JSON object") - .clone(); + // === Start ChromeDriver === + let mut child = std::process::Command::new("chromedriver-win64/chromedriver.exe") + .args(["--port=9515"]) + .spawn()?; - let mut client = ClientBuilder::native() - .capabilities(caps_map) - .connect(&format!("http://localhost:{}", port)) + let client = ClientBuilder::native() + .connect("http://localhost:9515") .await?; - // Setup graceful shutdown - let shutdown_client = client.clone(); + // Graceful shutdown + let client_clone = client.clone(); tokio::spawn(async move { - signal::ctrl_c().await.expect("Failed to listen for ctrl+c"); - println!("\nCtrl+C received, shutting down..."); - shutdown_client.close().await.ok(); + signal::ctrl_c().await.unwrap(); + client_clone.close().await.ok(); std::process::exit(0); }); - // Navigate to page - let url = "https://www.finanzen.net/termine/wirtschaftsdaten/"; - client.goto(url).await?; + // === Economic Calendar Update === + println!("Updating Economic Calendar (High Impact Only)"); + economic::goto_and_prepare(&client).await?; + economic::run_full_update(&client, &config).await?; - dismiss_overlays(&client).await?; - - // Click high importance tab - if let Ok(tab) = client - .find(Locator::Css( - r#"div[data-sg-tab-item="teletrader-dates-three-stars"]"#, - )) - .await - { - tab.click().await?; - println!("✓ High importance tab clicked"); - sleep(Duration::from_secs(2)).await; - } - - // Run intelligent update - run_intelligent_update(&client).await?; - - // Display final summary - let chunks = scan_existing_chunks().await?; - let final_events = load_existing_events(&chunks).await?; - - println!("\n📊 FINAL SUMMARY:"); - println!(" • Total chunks: {}", chunks.len()); - println!(" • Total events: {}", final_events.len()); + // === Corporate Earnings Update === + println!("\nUpdating Corporate Earnings"); + let tickers = config::get_tickers(); + corporate::run_full_update(tickers, &config).await?; + // === Cleanup === client.close().await?; - chromedriver.kill()?; + child.kill()?; + println!("\nAll data updated successfully!"); Ok(()) } \ No newline at end of file diff --git a/src/util.rs b/src/util.rs new file mode 100644 index 0000000..27e9832 --- /dev/null +++ b/src/util.rs @@ -0,0 +1,23 @@ +// src/util.rs (or put it directly in main.rs if you prefer) +use tokio::fs; +use std::path::Path; + +/// Create the required data folders if they do not exist yet. +pub async fn ensure_data_dirs() -> anyhow::Result<()> { + let dirs = [ + "economic_events", + "economic_event_changes", + "corporate_events", + "corporate_prices", + ]; + + for dir in dirs { + let path = Path::new(dir); + if !path.exists() { + fs::create_dir_all(path).await?; + println!("Created directory: {dir}"); + } + // else → silently continue + } + Ok(()) +} \ No newline at end of file