added parallelized scraping instances for company yahoo ticker seeding
This commit is contained in:
@@ -2,6 +2,9 @@
|
||||
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use fantoccini::{Client, ClientBuilder};
|
||||
use rand::seq::{IndexedRandom, SliceRandom};
|
||||
use rand::rngs::ThreadRng;
|
||||
use rand::Rng; // for the RNG trait
|
||||
use serde_json::{Map, Value};
|
||||
use std::pin::Pin;
|
||||
use std::process::Stdio;
|
||||
@@ -363,6 +366,7 @@ impl ChromeInstance {
|
||||
}
|
||||
|
||||
fn chrome_args(&self) -> Map<String, Value> {
|
||||
let user_agent = Self::chrome_user_agent();
|
||||
let mut args = vec![
|
||||
"--headless=new".to_string(),
|
||||
"--disable-gpu".to_string(),
|
||||
@@ -372,14 +376,14 @@ impl ChromeInstance {
|
||||
"--disable-extensions".to_string(),
|
||||
"--disable-popup-blocking".to_string(),
|
||||
"--disable-notifications".to_string(),
|
||||
"--disable-logging".to_string(),
|
||||
//"--disable-logging".to_string(),
|
||||
"--disable-autofill".to_string(),
|
||||
"--disable-sync".to_string(),
|
||||
"--disable-default-apps".to_string(),
|
||||
"--disable-translate".to_string(),
|
||||
"--window-size=1920,1080".to_string(),
|
||||
//"--window-size=1920,1080".to_string(),
|
||||
"--disable-blink-features=AutomationControlled".to_string(),
|
||||
"--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36".to_string()
|
||||
format!("--user-agent={}", user_agent),
|
||||
];
|
||||
if let Some(ref proxy) = self.proxy_url {
|
||||
let proxy = proxy.clone();
|
||||
@@ -397,6 +401,18 @@ impl ChromeInstance {
|
||||
});
|
||||
caps.as_object().cloned().unwrap()
|
||||
}
|
||||
|
||||
|
||||
pub fn chrome_user_agent() -> &'static str {
|
||||
static UAS: &[&str] = &[
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.6367.91 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/123.0.6312.122 Safari/537.36",
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.6261.129 Safari/537.36",
|
||||
];
|
||||
|
||||
let mut rng = ThreadRng::default(); // non-deprecated RNG
|
||||
*UAS.choose(&mut rng).unwrap()
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_chromedriver_address(line: &str) -> Option<String> {
|
||||
|
||||
Reference in New Issue
Block a user