From ae0fa3fe93b2656473259d1d266809cecc070f1f Mon Sep 17 00:00:00 2001 From: Shunsuke Shibayama Date: Sat, 4 Nov 2023 00:50:55 +0900 Subject: [PATCH] fix: pylyzer integration bug --- crates/els/definition.rs | 2 +- crates/erg_common/io.rs | 8 +- crates/erg_compiler/build_package.rs | 205 +++++++++++++++++++++++- crates/erg_compiler/context/register.rs | 194 +--------------------- 4 files changed, 207 insertions(+), 202 deletions(-) diff --git a/crates/els/definition.rs b/crates/els/definition.rs index 97b2915f6..e1d65c7dc 100644 --- a/crates/els/definition.rs +++ b/crates/els/definition.rs @@ -1,7 +1,7 @@ use erg_common::consts::PYTHON_MODE; use erg_common::traits::Stream; use erg_compiler::artifact::BuildRunnable; -use erg_compiler::context::register::PylyzerStatus; +use erg_compiler::build_package::PylyzerStatus; use erg_compiler::erg_parser::parse::Parsable; use erg_compiler::erg_parser::token::{Token, TokenCategory}; use erg_compiler::hir::{Def, Expr}; diff --git a/crates/erg_common/io.rs b/crates/erg_common/io.rs index 40456f453..cee8f1d2d 100644 --- a/crates/erg_common/io.rs +++ b/crates/erg_common/io.rs @@ -5,7 +5,7 @@ use std::path::{Path, PathBuf}; use std::process; use std::process::Stdio; -use crate::consts::{ERG_MODE, EXPERIMENTAL_MODE}; +use crate::consts::EXPERIMENTAL_MODE; use crate::env::{ erg_path, erg_py_external_lib_path, erg_pystd_path, erg_std_path, python_site_packages, }; @@ -450,10 +450,8 @@ impl Input { } pub fn resolve_py(&self, path: &Path) -> Result { - if ERG_MODE || path.starts_with("./") { - if let Ok(path) = self.resolve_local_py(path) { - return Ok(path); - } + if let Ok(path) = self.resolve_local_py(path) { + return Ok(path); } for sys_path in self.sys_path()? { let mut dir = sys_path; diff --git a/crates/erg_compiler/build_package.rs b/crates/erg_compiler/build_package.rs index e6b393b1e..3cd5f9f97 100644 --- a/crates/erg_compiler/build_package.rs +++ b/crates/erg_compiler/build_package.rs @@ -1,11 +1,22 @@ use std::ffi::OsStr; +use std::fmt; +use std::fs::{metadata, remove_file, File}; +use std::io::{BufRead, BufReader}; use std::marker::PhantomData; +use std::option::Option; use std::path::Path; +use std::path::PathBuf; +use std::process::{Command, Stdio}; +use std::thread::sleep; +use std::time::{Duration, SystemTime}; + +use erg_common::config::ErgMode; use erg_common::config::ErgConfig; use erg_common::consts::ELS; use erg_common::debug_power_assert; use erg_common::dict::Dict; +use erg_common::env::is_std_decl_path; use erg_common::error::MultiErrorDisplay; use erg_common::io::Input; #[allow(unused)] @@ -30,6 +41,106 @@ use crate::ty::ValueObj; use crate::varinfo::VarInfo; use crate::GenericHIRBuilder; +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum CheckStatus { + Succeed, + Failed, + Ongoing, +} + +impl fmt::Display for CheckStatus { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + CheckStatus::Succeed => write!(f, "succeed"), + CheckStatus::Failed => write!(f, "failed"), + CheckStatus::Ongoing => write!(f, "ongoing"), + } + } +} + +impl std::str::FromStr for CheckStatus { + type Err = String; + + fn from_str(s: &str) -> Result { + match s { + "succeed" => Ok(CheckStatus::Succeed), + "failed" => Ok(CheckStatus::Failed), + "ongoing" => Ok(CheckStatus::Ongoing), + _ => Err(format!("invalid status: {s}")), + } + } +} + +/// format: +/// ```python +/// #[pylyzer] succeed foo.py 1234567890 +/// ``` +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct PylyzerStatus { + pub status: CheckStatus, + pub file: PathBuf, + pub timestamp: SystemTime, + pub hash: u64, +} + +impl fmt::Display for PylyzerStatus { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "##[pylyzer] {} {} {} {}", + self.status, + self.file.display(), + self.timestamp + .duration_since(SystemTime::UNIX_EPOCH) + .unwrap() + .as_secs(), + self.hash, + ) + } +} + +impl std::str::FromStr for PylyzerStatus { + type Err = String; + + fn from_str(s: &str) -> Result { + let mut iter = s.split_whitespace(); + let pylyzer = iter.next().ok_or("no pylyzer")?; + if pylyzer != "##[pylyzer]" { + return Err("not pylyzer".to_string()); + } + let status = iter.next().ok_or("no succeed")?; + let status = status.parse()?; + let file = iter.next().ok_or("no file")?; + let file = PathBuf::from(file); + let timestamp = iter.next().ok_or("no timestamp")?; + let timestamp = SystemTime::UNIX_EPOCH + .checked_add(std::time::Duration::from_secs( + timestamp + .parse() + .map_err(|e| format!("timestamp parse error: {e}"))?, + )) + .ok_or("timestamp overflow")?; + let hash = iter.next().ok_or("no hash")?; + let hash = hash.parse().map_err(|e| format!("hash parse error: {e}"))?; + Ok(PylyzerStatus { + status, + file, + timestamp, + hash, + }) + } +} + +enum Availability { + Available, + InProgress, + NotFound, + Unreadable, + OutOfDate, +} + +use Availability::*; + #[derive(Debug)] pub enum ResolveError { CycleDetected { @@ -275,6 +386,79 @@ impl result } + fn analysis_in_progress(path: &Path) -> bool { + let Ok(meta) = metadata(path) else { + return false; + }; + !is_std_decl_path(path) && meta.len() == 0 + } + + fn availability(path: &Path) -> Availability { + let Ok(file) = File::open(path) else { + return Availability::NotFound; + }; + if is_std_decl_path(path) { + return Availability::Available; + } + let mut line = "".to_string(); + let Ok(_) = BufReader::new(file).read_line(&mut line) else { + return Availability::Unreadable; + }; + if line.is_empty() { + return Availability::InProgress; + } + let Ok(status) = line.parse::() else { + return Availability::Available; + }; + let Some(meta) = metadata(&status.file).ok() else { + return Availability::NotFound; + }; + let dummy_hash = meta.len(); + if status.hash != dummy_hash { + Availability::OutOfDate + } else { + Availability::Available + } + } + + fn try_gen_py_decl_file(&self, __name__: &Str) -> Result { + if let Ok(path) = self.cfg.input.resolve_py(Path::new(&__name__[..])) { + if self.cfg.input.path() == path.as_path() { + return Ok(path); + } + let (out, err) = if self.cfg.mode == ErgMode::LanguageServer || self.cfg.quiet_repl { + (Stdio::null(), Stdio::null()) + } else { + (Stdio::inherit(), Stdio::inherit()) + }; + // pylyzer is a static analysis tool for Python (https://github.com/mtshiba/pylyzer). + // It can convert a Python script to an Erg AST for code analysis. + // There is also an option to output the analysis result as `d.er`. Use this if the system have pylyzer installed. + // A type definition file may be generated even if not all type checks succeed. + if let Ok(status) = Command::new("pylyzer") + .arg("--dump-decl") + .arg(path.to_str().unwrap()) + .stdout(out) + .stderr(err) + .spawn() + .and_then(|mut child| child.wait()) + { + if let Some(path) = self.cfg.input.resolve_decl_path(Path::new(&__name__[..])) { + let size = metadata(&path).unwrap().len(); + // if pylyzer crashed + if !status.success() && size == 0 { + // The presence of the decl file indicates that the analysis is in progress or completed, + // so if pylyzer crashes in the middle of the analysis, delete the file. + remove_file(&path).unwrap(); + } else { + return Ok(path); + } + } + } + } + Err(()) + } + fn register(&mut self, expr: &mut Expr, cfg: &ErgConfig) -> ResolveResult<()> { let Expr::Call(call) = expr else { unreachable!() @@ -298,11 +482,26 @@ impl } return Ok(()); } - let import_path = match cfg.input.resolve_path(Path::new(&__name__[..])) { + let path = Path::new(&__name__[..]); + let import_path = match cfg.input.resolve_path(path) { Some(path) => path, None => { - // error will be reported in `Context::import_erg_mod` - return Ok(()); + for _ in 0..600 { + if !Self::analysis_in_progress(path) { + break; + } + sleep(Duration::from_millis(100)); + } + if matches!(Self::availability(path), OutOfDate | NotFound | Unreadable) { + if let Ok(path) = self.try_gen_py_decl_file(__name__) { + path + } else { + return Ok(()); + } + } else { + // error will be reported in `Context::import_erg_mod` + return Ok(()); + } } }; let from_path = NormalizedPathBuf::from(cfg.input.path()); diff --git a/crates/erg_compiler/context/register.rs b/crates/erg_compiler/context/register.rs index 559e8f7eb..651a5dd40 100644 --- a/crates/erg_compiler/context/register.rs +++ b/crates/erg_compiler/context/register.rs @@ -1,16 +1,9 @@ -use std::fmt; -use std::fs::{metadata, remove_file, File}; -use std::io::{BufRead, BufReader}; use std::option::Option; use std::path::{Path, PathBuf}; -use std::process::{Command, Stdio}; -use std::thread::sleep; -use std::time::{Duration, SystemTime}; -use erg_common::config::ErgMode; use erg_common::consts::{ERG_MODE, PYTHON_MODE}; use erg_common::dict::Dict; -use erg_common::env::{is_pystd_main_module, is_std_decl_path}; +use erg_common::env::is_pystd_main_module; use erg_common::erg_util::BUILTIN_ERG_MODS; use erg_common::levenshtein::get_similar_name; use erg_common::pathutil::{DirKind, FileKind}; @@ -56,106 +49,6 @@ pub fn valid_mod_name(name: &str) -> bool { !name.is_empty() && !name.starts_with('/') && name.trim() == name } -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub enum CheckStatus { - Succeed, - Failed, - Ongoing, -} - -impl fmt::Display for CheckStatus { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - match self { - CheckStatus::Succeed => write!(f, "succeed"), - CheckStatus::Failed => write!(f, "failed"), - CheckStatus::Ongoing => write!(f, "ongoing"), - } - } -} - -impl std::str::FromStr for CheckStatus { - type Err = String; - - fn from_str(s: &str) -> Result { - match s { - "succeed" => Ok(CheckStatus::Succeed), - "failed" => Ok(CheckStatus::Failed), - "ongoing" => Ok(CheckStatus::Ongoing), - _ => Err(format!("invalid status: {s}")), - } - } -} - -/// format: -/// ```python -/// #[pylyzer] succeed foo.py 1234567890 -/// ``` -#[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct PylyzerStatus { - pub status: CheckStatus, - pub file: PathBuf, - pub timestamp: SystemTime, - pub hash: u64, -} - -impl fmt::Display for PylyzerStatus { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - write!( - f, - "##[pylyzer] {} {} {} {}", - self.status, - self.file.display(), - self.timestamp - .duration_since(SystemTime::UNIX_EPOCH) - .unwrap() - .as_secs(), - self.hash, - ) - } -} - -impl std::str::FromStr for PylyzerStatus { - type Err = String; - - fn from_str(s: &str) -> Result { - let mut iter = s.split_whitespace(); - let pylyzer = iter.next().ok_or("no pylyzer")?; - if pylyzer != "##[pylyzer]" { - return Err("not pylyzer".to_string()); - } - let status = iter.next().ok_or("no succeed")?; - let status = status.parse()?; - let file = iter.next().ok_or("no file")?; - let file = PathBuf::from(file); - let timestamp = iter.next().ok_or("no timestamp")?; - let timestamp = SystemTime::UNIX_EPOCH - .checked_add(std::time::Duration::from_secs( - timestamp - .parse() - .map_err(|e| format!("timestamp parse error: {e}"))?, - )) - .ok_or("timestamp overflow")?; - let hash = iter.next().ok_or("no hash")?; - let hash = hash.parse().map_err(|e| format!("hash parse error: {e}"))?; - Ok(PylyzerStatus { - status, - file, - timestamp, - hash, - }) - } -} - -enum Availability { - Available, - InProgress, - NotFound, - Unreadable, - OutOfDate, -} - -use Availability::*; - const UBAR: &Str = &Str::ever("_"); impl Context { @@ -2264,56 +2157,12 @@ impl Context { get_similar_name(BUILTIN_ERG_MODS.into_iter(), name).map(Str::rc) } - fn analysis_in_progress(path: &Path) -> bool { - let Ok(meta) = metadata(path) else { - return false; - }; - !is_std_decl_path(path) && meta.len() == 0 - } - - fn availability(path: &Path) -> Availability { - let Ok(file) = File::open(path) else { - return Availability::NotFound; - }; - if is_std_decl_path(path) { - return Availability::Available; - } - let mut line = "".to_string(); - let Ok(_) = BufReader::new(file).read_line(&mut line) else { - return Availability::Unreadable; - }; - if line.is_empty() { - return Availability::InProgress; - } - let Ok(status) = line.parse::() else { - return Availability::Available; - }; - let Some(meta) = metadata(&status.file).ok() else { - return Availability::NotFound; - }; - let dummy_hash = meta.len(); - if status.hash != dummy_hash { - Availability::OutOfDate - } else { - Availability::Available - } - } - fn get_decl_path(&self, __name__: &Str, loc: &impl Locational) -> CompileResult { match self.cfg.input.resolve_decl_path(Path::new(&__name__[..])) { Some(path) => { if self.cfg.input.decl_file_is(&path) { return Ok(path); } - for _ in 0..600 { - if !Self::analysis_in_progress(&path) { - break; - } - sleep(Duration::from_millis(100)); - } - if matches!(Self::availability(&path), OutOfDate | NotFound | Unreadable) { - let _ = self.try_gen_py_decl_file(__name__); - } if is_pystd_main_module(path.as_path()) && !BUILTIN_PYTHON_MODS.contains(&&__name__[..]) { @@ -2329,9 +2178,6 @@ impl Context { Ok(path) } None => { - if let Ok(path) = self.try_gen_py_decl_file(__name__) { - return Ok(path); - } let err = TyCheckError::import_error( self.cfg.input.clone(), line!() as usize, @@ -2348,44 +2194,6 @@ impl Context { } } - fn try_gen_py_decl_file(&self, __name__: &Str) -> Result { - if let Ok(path) = self.cfg.input.resolve_py(Path::new(&__name__[..])) { - if self.cfg.input.path() == path.as_path() { - return Ok(path); - } - let (out, err) = if self.cfg.mode == ErgMode::LanguageServer || self.cfg.quiet_repl { - (Stdio::null(), Stdio::null()) - } else { - (Stdio::inherit(), Stdio::inherit()) - }; - // pylyzer is a static analysis tool for Python (https://github.com/mtshiba/pylyzer). - // It can convert a Python script to an Erg AST for code analysis. - // There is also an option to output the analysis result as `d.er`. Use this if the system have pylyzer installed. - // A type definition file may be generated even if not all type checks succeed. - if let Ok(status) = Command::new("pylyzer") - .arg("--dump-decl") - .arg(path.to_str().unwrap()) - .stdout(out) - .stderr(err) - .spawn() - .and_then(|mut child| child.wait()) - { - if let Some(path) = self.cfg.input.resolve_decl_path(Path::new(&__name__[..])) { - let size = metadata(&path).unwrap().len(); - // if pylyzer crashed - if !status.success() && size == 0 { - // The presence of the decl file indicates that the analysis is in progress or completed, - // so if pylyzer crashes in the middle of the analysis, delete the file. - remove_file(&path).unwrap(); - } else { - return Ok(path); - } - } - } - } - Err(()) - } - fn import_py_mod(&self, __name__: &Str, loc: &impl Locational) -> CompileResult { let path = self.get_decl_path(__name__, loc)?; // module itself