init
This commit is contained in:
commit
0f8550b517
21 changed files with 584 additions and 0 deletions
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
|
@ -0,0 +1 @@
|
|||
/target
|
14
Cargo.lock
generated
Normal file
14
Cargo.lock
generated
Normal file
|
@ -0,0 +1,14 @@
|
|||
# This file is automatically @generated by Cargo.
|
||||
# It is not intended for manual editing.
|
||||
version = 4
|
||||
|
||||
[[package]]
|
||||
name = "mousquet"
|
||||
version = "0.1.0"
|
||||
|
||||
[[package]]
|
||||
name = "mousquetaire"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"mousquet",
|
||||
]
|
2
Cargo.toml
Normal file
2
Cargo.toml
Normal file
|
@ -0,0 +1,2 @@
|
|||
[workspace]
|
||||
members = ["mousquet", "mousquetaire"]
|
35
README.md
Normal file
35
README.md
Normal file
|
@ -0,0 +1,35 @@
|
|||
# Mousquet
|
||||
|
||||
Utility for making rough, euristic estimations of code similarity between
|
||||
implementations.
|
||||
|
||||
The similarity algorithm is based on token sequence matching, like some other
|
||||
software serving the same purpose. This aproach has many limitations but may fit
|
||||
some use cases.
|
||||
|
||||
## Example
|
||||
|
||||
```bash
|
||||
$ mousquetaire 'examples/primes_1.py' 'examples/primes_2.py'
|
||||
```
|
||||
|
||||

|
||||
|
||||
## Build
|
||||
|
||||
### Dependencies
|
||||
|
||||
- cargo
|
||||
- Install cargo through rustup
|
||||
- `pacman -S rustup`
|
||||
- `curl --proto '=https' --tlsv1.2 -sSf 'https://sh.rustup.rs' | sh`
|
||||
- Use any toolchain
|
||||
- `rustup default stable`
|
||||
|
||||
## Building
|
||||
|
||||
```bash
|
||||
cargo build --release
|
||||
```
|
||||
|
||||
Find the binary at `mousquetaire/target/release/mousquetaire`.
|
BIN
assets/screenshot.png
Normal file
BIN
assets/screenshot.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 78 KiB |
6
mousquet/Cargo.toml
Normal file
6
mousquet/Cargo.toml
Normal file
|
@ -0,0 +1,6 @@
|
|||
[package]
|
||||
name = "mousquet"
|
||||
version = "0.1.0"
|
||||
edition = "2024"
|
||||
|
||||
[dependencies]
|
10
mousquet/examples/differents.rs
Normal file
10
mousquet/examples/differents.rs
Normal file
|
@ -0,0 +1,10 @@
|
|||
fn main() {
|
||||
let source_a = include_str!("primes_1.py");
|
||||
let source_b = include_str!("primes_2.py");
|
||||
let sims = mousquet::similarity(mousquet::lang::PYTHON, source_a, source_b);
|
||||
for sim in sims.token_matches {
|
||||
let text_in_a = &source_a[sim.0.clone()].to_string().replace("\n", "\\n");
|
||||
let text_in_b = &source_b[sim.1.clone()].to_string().replace("\n", "\\n");
|
||||
println!("Found similarity {sim:?}\n\ta: '{text_in_a}'\n\tb: '{text_in_b}'");
|
||||
}
|
||||
}
|
21
mousquet/examples/primes_1.py
Normal file
21
mousquet/examples/primes_1.py
Normal file
|
@ -0,0 +1,21 @@
|
|||
|
||||
# Generates the sequence of prime numbers up to a maximum number.
|
||||
def primes(max = 999_999_999):
|
||||
found = list[int]() # Known primes for subsequent dividability checks.
|
||||
for value in range(2, max):
|
||||
is_prime = True # Prime until proven otherwise.
|
||||
for prime in found:
|
||||
if value % prime == 0:
|
||||
is_prime = False
|
||||
break
|
||||
if is_prime:
|
||||
yield value
|
||||
found.append(value)
|
||||
|
||||
|
||||
def main():
|
||||
for p in primes():
|
||||
print(p)
|
||||
|
||||
|
||||
if __name__ == "__main__": main()
|
24
mousquet/examples/primes_1_ren.py
Normal file
24
mousquet/examples/primes_1_ren.py
Normal file
|
@ -0,0 +1,24 @@
|
|||
# original file ?
|
||||
|
||||
|
||||
def get_pr(limit = 999_999_999):
|
||||
result = list[int]()
|
||||
|
||||
for num in range(2, limit):
|
||||
valid = True
|
||||
for known in result:
|
||||
if num % known == 0:
|
||||
valid = False
|
||||
break
|
||||
|
||||
if valid:
|
||||
yield num
|
||||
result.append(num)
|
||||
|
||||
|
||||
def main():
|
||||
for num in get_pr():
|
||||
print(num)
|
||||
|
||||
|
||||
if __name__ == "__main__": main()
|
24
mousquet/examples/primes_2.py
Normal file
24
mousquet/examples/primes_2.py
Normal file
|
@ -0,0 +1,24 @@
|
|||
|
||||
from typing import Generator
|
||||
|
||||
|
||||
def prime_numbers(max = 999_999_999):
|
||||
def rec_between(value: int, at: int, until: int) -> bool:
|
||||
if at >= until: return True
|
||||
if value % at == 0: return False
|
||||
return rec_between(value, at + 1, until)
|
||||
def rec(value: int) -> Generator[int]:
|
||||
if value >= max: return
|
||||
if rec_between(value, 2, value): yield value
|
||||
for r in rec(value + 1): yield r
|
||||
for r in rec(2): yield r
|
||||
|
||||
|
||||
def print_all():
|
||||
for p in prime_numbers():
|
||||
print(p)
|
||||
|
||||
|
||||
if __name__ == "__main__": print_all()
|
||||
|
||||
# author: mb
|
10
mousquet/examples/renamed.rs
Normal file
10
mousquet/examples/renamed.rs
Normal file
|
@ -0,0 +1,10 @@
|
|||
fn main() {
|
||||
let source_a = include_str!("primes_1.py");
|
||||
let source_b = include_str!("primes_1_ren.py");
|
||||
let sims = mousquet::similarity(mousquet::lang::PYTHON, source_a, source_b);
|
||||
for sim in sims.token_matches {
|
||||
let text_in_a = &source_a[sim.0.clone()].to_string().replace("\n", "\\n");
|
||||
let text_in_b = &source_b[sim.1.clone()].to_string().replace("\n", "\\n");
|
||||
println!("Found similarity {sim:?}\n\ta: '{text_in_a}'\n\tb: '{text_in_b}'");
|
||||
}
|
||||
}
|
10
mousquet/examples/same.rs
Normal file
10
mousquet/examples/same.rs
Normal file
|
@ -0,0 +1,10 @@
|
|||
fn main() {
|
||||
let source_a = include_str!("primes_1.py");
|
||||
let source_b = include_str!("primes_1.py");
|
||||
let sims = mousquet::similarity(mousquet::lang::PYTHON, source_a, source_b);
|
||||
for sim in sims.token_matches {
|
||||
let text_in_a = &source_a[sim.0.clone()].to_string().replace("\n", "\\n");
|
||||
let text_in_b = &source_b[sim.1.clone()].to_string().replace("\n", "\\n");
|
||||
println!("Found similarity {sim:?}\n\ta: '{text_in_a}'\n\tb: '{text_in_b}'");
|
||||
}
|
||||
}
|
2
mousquet/examples/small.py
Normal file
2
mousquet/examples/small.py
Normal file
|
@ -0,0 +1,2 @@
|
|||
def hello():
|
||||
print("Hello World")
|
6
mousquet/examples/tokenize.rs
Normal file
6
mousquet/examples/tokenize.rs
Normal file
|
@ -0,0 +1,6 @@
|
|||
fn main() {
|
||||
let source = include_str!("small.py");
|
||||
let language = mousquet::lang::python::LANG;
|
||||
let tokens = (language.tokenizer)(source);
|
||||
dbg!(&tokens, tokens.len());
|
||||
}
|
39
mousquet/src/lang.rs
Normal file
39
mousquet/src/lang.rs
Normal file
|
@ -0,0 +1,39 @@
|
|||
use std::{fmt::Debug, ops::Range};
|
||||
|
||||
pub mod python;
|
||||
|
||||
pub const PYTHON: Lang = python::LANG;
|
||||
pub const ALL: &[Lang] = &[PYTHON];
|
||||
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct Lang {
|
||||
pub id: &'static str,
|
||||
pub tokenizer: fn(&str) -> Vec<Located<Token>>,
|
||||
pub ignored_token: &'static [&'static str],
|
||||
pub ignored_token_content: &'static [&'static str],
|
||||
}
|
||||
|
||||
pub type Span = Range<usize>;
|
||||
pub type Located<T> = (Span, T);
|
||||
|
||||
#[derive(Clone, PartialEq, Eq, Hash)]
|
||||
pub struct Token {
|
||||
pub kind: &'static str,
|
||||
pub content: String,
|
||||
}
|
||||
|
||||
impl Token {
|
||||
pub fn of_kind(kind: &'static str) -> impl Fn(&str) -> Token {
|
||||
move |content| Token {
|
||||
kind,
|
||||
content: content.into(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Debug for Token {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
let Token { kind, content } = self;
|
||||
f.write_fmt(format_args!(r#"Token({kind},"{content}")"#))
|
||||
}
|
||||
}
|
171
mousquet/src/lang/python.rs
Normal file
171
mousquet/src/lang/python.rs
Normal file
|
@ -0,0 +1,171 @@
|
|||
use crate::lang::{Lang, Located, Token};
|
||||
|
||||
pub const LANG: Lang = Lang {
|
||||
id: "python",
|
||||
tokenizer,
|
||||
ignored_token: &["sp"],
|
||||
ignored_token_content: &["id"],
|
||||
};
|
||||
|
||||
pub fn tokenizer(text: &str) -> Vec<Located<Token>> {
|
||||
Tokenizer(text, text.len()).collect()
|
||||
}
|
||||
|
||||
pub struct Tokenizer<'s>(&'s str, usize);
|
||||
|
||||
impl<'s> Iterator for Tokenizer<'s> {
|
||||
type Item = Located<Token>;
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
if self.0.is_empty() {
|
||||
return None;
|
||||
}
|
||||
self.skip_comment();
|
||||
let start = self.1 - self.0.len();
|
||||
let result = self
|
||||
.parse_space()
|
||||
.or_else(|| self.parse_op())
|
||||
.or_else(|| self.parse_str())
|
||||
.or_else(|| self.parse_ident())
|
||||
.or_else(|| self.parse_unknown());
|
||||
let end = self.1 - self.0.len();
|
||||
result.map(|r| ((start..end), r))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'s> Tokenizer<'s> {
|
||||
fn skip_comment(&mut self) {
|
||||
while self.0.starts_with("#") {
|
||||
let line_length = self.0.find("\n").unwrap_or(self.0.len());
|
||||
self.0 = &self.0[line_length..];
|
||||
}
|
||||
}
|
||||
|
||||
fn try_take(&mut self, word: &str) -> Option<&str> {
|
||||
if self.0.strip_prefix(word).is_some() {
|
||||
let (word, rest) = self.0.split_at(word.len());
|
||||
self.0 = rest;
|
||||
Some(word)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_space(&mut self) -> Option<Token> {
|
||||
[" ", "\n", "\t", "\r"]
|
||||
.iter()
|
||||
.filter_map(|op| self.try_take(op).map(Token::of_kind("sp")))
|
||||
.next()
|
||||
}
|
||||
|
||||
fn parse_op(&mut self) -> Option<Token> {
|
||||
OPERATORS
|
||||
.iter()
|
||||
.filter_map(|op| self.try_take(op).map(Token::of_kind("op")))
|
||||
.next()
|
||||
}
|
||||
|
||||
fn parse_str(&mut self) -> Option<Token> {
|
||||
let (open, close) = STR_STARTS.iter().find(|(s, _)| self.0.starts_with(s))?;
|
||||
let mut content_length = 0;
|
||||
loop {
|
||||
let prefix_length = open.len() + content_length;
|
||||
let remainder = match self.0.get(prefix_length..) {
|
||||
None => break,
|
||||
Some("") => break,
|
||||
Some(r) => r,
|
||||
};
|
||||
if remainder.starts_with("\\") {
|
||||
content_length += 2;
|
||||
continue;
|
||||
}
|
||||
if remainder.starts_with(close) {
|
||||
let length = open.len() + content_length + close.len();
|
||||
let content = &self.0[..length];
|
||||
return self.try_take(content).map(Token::of_kind("str"));
|
||||
}
|
||||
content_length += 1;
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
fn parse_ident(&mut self) -> Option<Token> {
|
||||
let forbidden = " \n\t\r!-@*/&%^+<=>|~()[]{}:;,.";
|
||||
let length = self.0.chars().take_while(|c| !forbidden.contains(*c)).count();
|
||||
self.try_take(&self.0[..length]).map(|content| {
|
||||
let kind = match KEYWORDS.contains(&content) {
|
||||
true => "kw",
|
||||
false => "id",
|
||||
};
|
||||
Token::of_kind(kind)(content)
|
||||
})
|
||||
}
|
||||
|
||||
fn parse_unknown(&mut self) -> Option<Token> {
|
||||
let next_break = self.0.find(' ').unwrap_or(self.0.len());
|
||||
let content = self.try_take(&self.0[..next_break]).unwrap();
|
||||
Some(Token::of_kind("unk")(content))
|
||||
}
|
||||
}
|
||||
|
||||
/// Ordered by size then alphabetically.
|
||||
const OPERATORS: &[&str] = &[
|
||||
"**=", //
|
||||
"//=", //
|
||||
"<<=", //
|
||||
">>=", //
|
||||
"-=", //
|
||||
"!=", //
|
||||
"[]", //
|
||||
"@=", //
|
||||
"**", //
|
||||
"*=", //
|
||||
"//", //
|
||||
"/=", //
|
||||
"&=", //
|
||||
"%=", //
|
||||
"^=", //
|
||||
"+=", //
|
||||
"<<", //
|
||||
"<=", //
|
||||
"==", //
|
||||
">=", //
|
||||
">>", //
|
||||
"|=", //
|
||||
"-", //
|
||||
"@", //
|
||||
"*", //
|
||||
"/", //
|
||||
"&", //
|
||||
"%", //
|
||||
"^", //
|
||||
"+", //
|
||||
"<", //
|
||||
"=", //
|
||||
">", //
|
||||
"|", //
|
||||
"~", //
|
||||
"(", //
|
||||
")", //
|
||||
"[", //
|
||||
"]", //
|
||||
"{", //
|
||||
"}", //
|
||||
":", //
|
||||
";", //
|
||||
",", //
|
||||
".", //
|
||||
];
|
||||
|
||||
const KEYWORDS: &[&str] = &[
|
||||
"def", "and", "or", "not", "for", "while", "in", "try", "raise", "except", "yield", "return", "import", "from",
|
||||
"as",
|
||||
];
|
||||
|
||||
const STR_STARTS: &[(&str, &str)] = &[
|
||||
(r#"r""""#, r#"""""#),
|
||||
(r#"b""""#, r#"""""#),
|
||||
(r#"""""#, r#"""""#),
|
||||
(r#"r""#, r#"""#),
|
||||
(r#"b""#, r#"""#),
|
||||
(r#"""#, r#"""#),
|
||||
];
|
46
mousquet/src/lcs.rs
Normal file
46
mousquet/src/lcs.rs
Normal file
|
@ -0,0 +1,46 @@
|
|||
use crate::lang::Span;
|
||||
|
||||
pub fn longuest_common_section<T: Eq>(a: &[T], b: &[T]) -> Option<(Span, Span)> {
|
||||
let max_size = a.len().min(b.len());
|
||||
for size in (1..=max_size).rev() {
|
||||
for a_start in 0..=(a.len() - size) {
|
||||
let a_span = a_start..(a_start + size);
|
||||
let a_section = &a[a_span.clone()];
|
||||
for b_start in 0..=(b.len() - size) {
|
||||
let b_span = b_start..(b_start + size);
|
||||
let b_section = &b[b_span.clone()];
|
||||
if a_section == b_section {
|
||||
return Some((a_span, b_span));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_longuest_common_section() {
|
||||
fn illustrate<'a>((a, b): (&'a [i32], &'a [i32]), (sa, sb): (Span, Span)) -> (&'a [i32], &'a [i32]) {
|
||||
(&a[sa], &b[sb])
|
||||
}
|
||||
|
||||
fn case<const A: usize, const B: usize, const E: usize>(a: [i32; A], b: [i32; B], expected: [i32; E]) {
|
||||
let res = longuest_common_section(&a, &b).unwrap();
|
||||
let ill = illustrate((&a, &b), res);
|
||||
let exp: (&[i32], &[i32]) = (&expected, &expected);
|
||||
assert_eq!(ill, exp);
|
||||
}
|
||||
|
||||
case(
|
||||
/*****/ [1, 2, 3, 4, 5, 6, 7, 8, 9],
|
||||
/**/ [8, 9, 2, 3, 4],
|
||||
/********/ [2, 3, 4],
|
||||
);
|
||||
|
||||
case(
|
||||
//
|
||||
[1, 2, 3, 4, 5, 6],
|
||||
[1, 2, 3, 4, 5, 6],
|
||||
[1, 2, 3, 4, 5, 6],
|
||||
);
|
||||
}
|
92
mousquet/src/lib.rs
Normal file
92
mousquet/src/lib.rs
Normal file
|
@ -0,0 +1,92 @@
|
|||
use std::ops::Range;
|
||||
|
||||
pub mod lang;
|
||||
pub mod lcs;
|
||||
|
||||
use crate::lang::{Lang, Located, Span, Token};
|
||||
|
||||
pub fn similarity(lang: Lang, source_a: &str, source_b: &str) -> Similarity {
|
||||
let tokens_a = (lang.tokenizer)(source_a);
|
||||
let tokens_b = (lang.tokenizer)(source_b);
|
||||
|
||||
let exact_matches = Vec::new();
|
||||
// TODO
|
||||
|
||||
let mut token_matches = Vec::new();
|
||||
{
|
||||
let tokens_a = tokens_a.clone();
|
||||
let tokens_b = tokens_b.clone();
|
||||
let (tokens_a, comparables_a) = comparable_parts_of(&lang, &tokens_a);
|
||||
let (tokens_b, comparables_b) = comparable_parts_of(&lang, &tokens_b);
|
||||
let mut segments_a = vec![(tokens_a, comparables_a)];
|
||||
let mut segments_b = vec![(tokens_b, comparables_b)];
|
||||
|
||||
let length_threshold = 6;
|
||||
while let Some(biggest_common_segment) = segments_a
|
||||
.iter()
|
||||
.enumerate()
|
||||
.flat_map(|(segment_index_a, (_, segment_a))| {
|
||||
segments_b
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter_map(move |(segment_index_b, (_, segment_b))| {
|
||||
let common = lcs::longuest_common_section(segment_a, segment_b)?;
|
||||
Some(((segment_index_a, segment_index_b), common))
|
||||
})
|
||||
})
|
||||
.filter(|(_, (range_a, _))| range_a.len() > length_threshold)
|
||||
.max_by_key(|(_, (range_a, _))| range_a.len())
|
||||
{
|
||||
let ((segment_index_a, segment_index_b), (token_range_a, token_range_b)) = biggest_common_segment;
|
||||
let segment_a = segments_a.remove(segment_index_a);
|
||||
let segment_b = segments_b.remove(segment_index_b);
|
||||
|
||||
let (tokens_l, tokens_a, tokens_r) = slice_range(segment_a.0, token_range_a.clone());
|
||||
let (compas_l, _comps_a, compas_r) = slice_range(segment_a.1, token_range_a);
|
||||
segments_a.extend_from_slice(&[(tokens_l, compas_l), (tokens_r, compas_r)]);
|
||||
|
||||
let (tokens_l, tokens_b, tokens_r) = slice_range(segment_b.0, token_range_b.clone());
|
||||
let (compas_l, _comps_b, compas_r) = slice_range(segment_b.1, token_range_b);
|
||||
segments_b.extend_from_slice(&[(tokens_l, compas_l), (tokens_r, compas_r)]);
|
||||
|
||||
let (first, last) = (tokens_a.first().unwrap(), tokens_a.last().unwrap());
|
||||
let character_span_a = first.0.start..last.0.end;
|
||||
let (first, last) = (tokens_b.first().unwrap(), tokens_b.last().unwrap());
|
||||
let character_span_b = first.0.start..last.0.end;
|
||||
token_matches.push(Match(character_span_a, character_span_b));
|
||||
}
|
||||
}
|
||||
|
||||
Similarity {
|
||||
exact_matches,
|
||||
token_matches,
|
||||
}
|
||||
}
|
||||
|
||||
fn slice_range<T>(mut items: Vec<T>, range: Span) -> (Vec<T>, Vec<T>, Vec<T>) {
|
||||
let end = items.split_off(range.end);
|
||||
let middle = items.split_off(range.start);
|
||||
let start = items;
|
||||
(start, middle, end)
|
||||
}
|
||||
|
||||
type TokenAndContent = (&'static str, Option<String>);
|
||||
fn comparable_parts_of(lang: &Lang, tokens: &[(Range<usize>, Token)]) -> (Vec<Located<Token>>, Vec<TokenAndContent>) {
|
||||
tokens
|
||||
.iter()
|
||||
.filter_map(|token @ (_, Token { kind, content })| match kind {
|
||||
k if lang.ignored_token.contains(k) => None,
|
||||
k if lang.ignored_token_content.contains(k) => Some(((token.clone()), (*k, None))),
|
||||
k => Some((token.clone(), (k, Some(content.clone())))),
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Similarity {
|
||||
pub exact_matches: Vec<Match>,
|
||||
pub token_matches: Vec<Match>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Match(pub Range<usize>, pub Range<usize>);
|
7
mousquetaire/Cargo.toml
Normal file
7
mousquetaire/Cargo.toml
Normal file
|
@ -0,0 +1,7 @@
|
|||
[package]
|
||||
name = "mousquetaire"
|
||||
version = "0.1.0"
|
||||
edition = "2024"
|
||||
|
||||
[dependencies]
|
||||
mousquet = { path = "../mousquet" }
|
61
mousquetaire/src/main.rs
Normal file
61
mousquetaire/src/main.rs
Normal file
|
@ -0,0 +1,61 @@
|
|||
use std::{env::args, fs};
|
||||
|
||||
use mousquet::lang::Span;
|
||||
|
||||
pub struct Args {
|
||||
file_a: String,
|
||||
file_b: String,
|
||||
}
|
||||
|
||||
impl Args {
|
||||
pub fn parse() -> Self {
|
||||
let [_, file_a, file_b] = args()
|
||||
.collect::<Vec<_>>()
|
||||
.try_into()
|
||||
.expect("Usage: mousquet <file_a> <file_b>");
|
||||
Self { file_a, file_b }
|
||||
}
|
||||
}
|
||||
|
||||
fn main() {
|
||||
let Args { file_a, file_b } = Args::parse();
|
||||
let source_a = fs::read_to_string(&file_a).unwrap();
|
||||
let source_b = fs::read_to_string(&file_b).unwrap();
|
||||
let similarities = mousquet::similarity(mousquet::lang::PYTHON, &source_a, &source_b);
|
||||
|
||||
let mut similarities_in_a: Vec<_> = similarities.token_matches.iter().map(|s| s.0.clone()).collect();
|
||||
let mut similarities_in_b: Vec<_> = similarities.token_matches.iter().map(|s| s.1.clone()).collect();
|
||||
similarities_in_a.sort_by_key(|s| s.start);
|
||||
similarities_in_b.sort_by_key(|s| s.start);
|
||||
println!();
|
||||
print_file_with_similarities(file_a, source_a, similarities_in_a);
|
||||
print_file_with_similarities(file_b, source_b, similarities_in_b);
|
||||
}
|
||||
|
||||
fn print_file_with_similarities(file_name: String, file_content: String, sorted_similarities: Vec<Span>) {
|
||||
println!("┌────────────────────────────────────────");
|
||||
println!("│ File '{file_name}':");
|
||||
println!("├────────────────────────────────────────");
|
||||
print!("│");
|
||||
let mut prev_end = 0;
|
||||
for sim in sorted_similarities {
|
||||
let before = &file_content[prev_end..sim.start];
|
||||
let inside = &file_content[sim.start..sim.end];
|
||||
prev_end = sim.end;
|
||||
print_formatted_text(before, "│ ", (BLUE, RESET));
|
||||
print_formatted_text(inside, "│ ", (YELLOW, RESET));
|
||||
}
|
||||
print_formatted_text(&file_content[prev_end..], "│ ", (BLUE, RESET));
|
||||
println!();
|
||||
println!("└────────────────────────────────────────");
|
||||
}
|
||||
|
||||
fn print_formatted_text(text: &str, prefix: &str, color: (&str, &str)) {
|
||||
let (col_start, col_end) = color;
|
||||
let prefixed = text.replace("\n", &format!("{col_end}\n{prefix}{col_start}"));
|
||||
print!("{col_start}{prefixed}{col_end}");
|
||||
}
|
||||
|
||||
const YELLOW: &str = "\x1b[0;33m";
|
||||
const BLUE: &str = "\x1b[0;34m";
|
||||
const RESET: &str = "\x1b[0m";
|
3
rustfmt.toml
Normal file
3
rustfmt.toml
Normal file
|
@ -0,0 +1,3 @@
|
|||
|
||||
max_width = 120
|
||||
hard_tabs = true
|
Loading…
Add table
Add a link
Reference in a new issue