From 417e825ebafd6fbe54e6b6e2855218a37cf0d7c3 Mon Sep 17 00:00:00 2001 From: Nick Shipp Date: Thu, 3 May 2018 11:34:06 -0400 Subject: initial commit --- Cargo.toml | 19 ++++++ src/main.rs | 187 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 206 insertions(+) create mode 100644 Cargo.toml create mode 100644 src/main.rs diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..88fd792 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "download-crates" +version = "0.1.0" +authors = ["Nick Shipp "] + +[dependencies] +serde = "*" +serde_derive = "*" +serde_json = "*" + +glob = "*" +pbr = "*" +reqwest = "*" +clap = "*" +git2 = "*" + +[dependencies.semver] +version = "*" +features = ["serde"] diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..0fcb8f5 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,187 @@ +#![feature(assoc_unix_epoch)] +extern crate serde; +extern crate serde_json; +#[macro_use] extern crate serde_derive; + +extern crate glob; +extern crate pbr; +extern crate reqwest; +extern crate clap; +extern crate git2; + +use glob::{glob_with, MatchOptions}; +use pbr::ProgressBar; +use reqwest::Client; +use clap::{Arg, App}; +use git2::Repository; + +use std::time::SystemTime; +use std::fs::{File, create_dir_all}; +use std::io::{BufReader, BufWriter, BufRead, Write}; +use std::path::{Path, PathBuf}; +use std::collections::HashMap; +use std::error::Error; + +#[derive(Debug, Deserialize)] +struct CrateDep { + name: String, + req: String, + features: Vec, + optional: bool, + default_features: bool, + target: Option, + kind: Option, +} + +#[derive(Debug, Deserialize)] +struct Crate { + name: String, + vers: String, + deps: Vec, + yanked: bool, + cksum: String, + features: HashMap>, +} + +fn read_crate_file

(path: P) + -> Result, Box> + where P: AsRef +{ + let file = BufReader::new(File::open(path)?); + + let lines: Result, _> = file.lines() + .map(|line| serde_json::from_str(&line.unwrap())) + .collect(); + + Ok(lines?) +} + +fn mirror_path

(base: P, cr: Crate) -> PathBuf + where PathBuf: From

+{ + let mut path = PathBuf::from(base); + + path.push("api/v1/crates"); + path.push(&cr.name); + path.push(&cr.vers); + if ! path.exists() { + create_dir_all(&path).unwrap(); + } + path.push("download"); + + path +} + +fn missing_path(path: &PathBuf) -> bool { + ! path.exists() +} + +fn download_crates(paths: &[PathBuf]) + -> Result<(), Box> +{ + let mut pb = ProgressBar::new(paths.len() as u64); + let client = Client::new(); + + for path in paths { + let mut file = BufWriter::new(File::create(&path)?); + let uri = format!("https://crates.io/{}", path.to_str().unwrap()); + let mut resp = client.get(&uri).send()?; + + if resp.status().is_success() { + resp.copy_to(&mut file)?; + } else { + panic!("failed to download {}", uri); + } + + pb.inc(); + } + + Ok(()) +} + +fn clone_index

(path: P) -> Result<(), Box> + where P: AsRef +{ + eprintln!("cloning index..."); + let _repo = Repository::clone("https://github.com/rust-lang/crates.io-index.git", path)?; + eprintln!("done"); + Ok(()) +} + +fn pull_index

(path: P) -> Result<(), Box> + where P: AsRef +{ + let repo = Repository::open(path)?; + let mut remote = repo.find_remote("origin")?; + remote.fetch(&["master"], None, None)?; + let oid = repo.refname_to_id("refs/remotes/origin/master")?; + let object = repo.find_object(oid, None).unwrap(); + repo.reset(&object, git2::ResetType::Hard, None)?; + Ok(()) +} + +fn main() { + let args = App::new("download-crates") + .arg(Arg::with_name("OUTPUT") + .help("Mirror output directory") + .index(1) + .takes_value(true)) + .get_matches(); + + let mirror_base = PathBuf::from(args.value_of("OUTPUT").unwrap_or(".")); + + if ! mirror_base.exists() { + create_dir_all(&mirror_base).expect("unable to create output directory"); + } + + let index = { + let mut p = mirror_base.clone(); + p.push("crates.io-index"); + p + }; + + let indexglob = { + let mut p = index.clone(); + p.push("*"); + p.push("**"); + p.push("*"); + String::from(p.to_str().unwrap()) + }; + + let newfilesfn = { + let epoch = SystemTime::now().duration_since(SystemTime::UNIX_EPOCH).unwrap(); + let mut p = mirror_base.clone(); + p.push(&format!("new-files-{}", epoch.as_secs())); + p + }; + + let mut newfiles = BufWriter::new(File::create(&newfilesfn).unwrap()); + + if ! index.exists() { + clone_index(&index).expect("unable to clone index"); + } else { + pull_index(&index).expect("unable to pull index"); + } + + let opts = MatchOptions { + case_sensitive: false, + require_literal_separator: false, + require_literal_leading_dot: true + }; + + let paths: Vec<_> = glob_with(&indexglob, &opts).unwrap() + .filter_map(|p| p.ok()) + .filter(|p| p.is_file()) + .flat_map(|p| read_crate_file(p).unwrap()) + .map(|p| mirror_path(&mirror_base, p)) + .filter(missing_path) + .collect(); + + download_crates(&paths).expect("downloading crates failed"); + + for path in paths { + writeln!(&mut newfiles, "{}", path.to_str().unwrap()).unwrap(); + } + + println!("List of new files written to {}", newfilesfn.to_str().unwrap()); +} -- cgit v1.2.3-54-g00ecf