This commit is contained in:
JOLIMAITRE Matthieu 2024-03-27 02:21:34 +01:00
commit 02515c9675
17 changed files with 1653 additions and 0 deletions

2
.gitignore vendored Normal file
View file

@ -0,0 +1,2 @@
/bin
/deno.lock

18
.vscode/c_cpp_properties.json vendored Normal file
View file

@ -0,0 +1,18 @@
{
"configurations": [
{
"name": "linux-gcc-x64",
"includePath": [
"${workspaceFolder}/**"
],
"compilerPath": "/usr/bin/gcc",
"cStandard": "${default}",
"cppStandard": "${default}",
"intelliSenseMode": "linux-gcc-x64",
"compilerArgs": [
""
]
}
],
"version": 4
}

24
.vscode/launch.json vendored Normal file
View file

@ -0,0 +1,24 @@
{
"version": "0.2.0",
"configurations": [
{
"name": "C/C++ Runner: Debug Session",
"type": "cppdbg",
"request": "launch",
"args": [],
"stopAtEntry": false,
"externalConsole": false,
"cwd": "/home/mb/Projects/011-epita-proj/tgr/src",
"program": "/home/mb/Projects/011-epita-proj/tgr/src/build/Debug/outDebug",
"MIMode": "gdb",
"miDebuggerPath": "gdb",
"setupCommands": [
{
"description": "Enable pretty-printing for gdb",
"text": "-enable-pretty-printing",
"ignoreFailures": true
}
]
}
]
}

60
.vscode/settings.json vendored Normal file
View file

@ -0,0 +1,60 @@
{
"deno.enable": true,
"C_Cpp_Runner.cCompilerPath": "gcc",
"C_Cpp_Runner.cppCompilerPath": "g++",
"C_Cpp_Runner.debuggerPath": "gdb",
"C_Cpp_Runner.cStandard": "",
"C_Cpp_Runner.cppStandard": "",
"C_Cpp_Runner.msvcBatchPath": "",
"C_Cpp_Runner.useMsvc": false,
"C_Cpp_Runner.warnings": [
"-Wall",
"-Wextra",
"-Wpedantic",
"-Wshadow",
"-Wformat=2",
"-Wcast-align",
"-Wconversion",
"-Wsign-conversion",
"-Wnull-dereference"
],
"C_Cpp_Runner.msvcWarnings": [
"/W4",
"/permissive-",
"/w14242",
"/w14287",
"/w14296",
"/w14311",
"/w14826",
"/w44062",
"/w44242",
"/w14905",
"/w14906",
"/w14263",
"/w44265",
"/w14928"
],
"C_Cpp_Runner.enableWarnings": true,
"C_Cpp_Runner.warningsAsError": false,
"C_Cpp_Runner.compilerArgs": [],
"C_Cpp_Runner.linkerArgs": [],
"C_Cpp_Runner.includePaths": [],
"C_Cpp_Runner.includeSearch": [
"*",
"**/*"
],
"C_Cpp_Runner.excludeSearch": [
"**/build",
"**/build/**",
"**/.*",
"**/.*/**",
"**/.vscode",
"**/.vscode/**"
],
"C_Cpp_Runner.useAddressSanitizer": false,
"C_Cpp_Runner.useUndefinedSanitizer": false,
"C_Cpp_Runner.useLeakSanitizer": false,
"C_Cpp_Runner.showCompilationTime": false,
"C_Cpp_Runner.useLinkTimeOptimization": false,
"C_Cpp_Runner.msvcSecureNoWarnings": false
}

33
README.md Normal file
View file

@ -0,0 +1,33 @@
# CF Guesser
Clang Format guesser.
## Description
This is a script to find a clang-format configuration similar to the one used to
format a specific source file.
It works by formatting copies of the provided source file with each variant of each properties and keeping the variant that least modifies the source.
## Usage
### Dependencies
- [deno](https://deno.land/)
- [clang-format](https://clang.llvm.org/docs/ClangFormat.html) (comes with most distribution of clang)
```sh
./bin/cfguesser --help
# [cfguesser.ts] Usage: cfguesser <source_file> <config_output>
./bin/cfguesser source.c clang-format
# [cfguesser.ts] reading input from 'source.c'.
# [cfguesser.ts] scoring properties.
# [cfguesser.ts] Filtering guesses.
# [cfguesser.ts] Certainty 100 %
# [cfguesser.ts] writing config to 'clang-format'
```
### Building
`./build.sh`

7
build.sh Executable file
View file

@ -0,0 +1,7 @@
#!/bin/sh
set -e
cd "$(dirname "$(realpath "$0")")"
mkdir -p "bin"
rm -f bin/cfguesser
deno compile -A -o bin/cfguesser src/cfguesser.ts

6
deno.json Normal file
View file

@ -0,0 +1,6 @@
{
"fmt": {
"useTabs": true,
"lineWidth": 120
}
}

39
src/cfguesser.ts Executable file
View file

@ -0,0 +1,39 @@
#!/bin/env -S deno run -A
import { all_properties } from "./lib/all_properties.ts";
import { ConfigFile } from "./lib/config_file.ts";
import { Guess } from "./lib/guess.ts";
import { log_from } from "./lib/utils.ts";
const log = log_from(import.meta, 0);
async function main() {
const { input_source_path, output_path } = parse_args(Deno.args);
log(`reading input from '${input_source_path}'.`);
const source = await Deno.readTextFile(input_source_path);
log("scoring properties.");
const guess_promises = [] as Promise<Guess | null>[];
for (const property of all_properties()) guess_promises.push(property.guess_from_source(source));
const guesses = await Promise.all(guess_promises);
log("Filtering guesses.");
const filtered_guesses = guesses.filter((g) => g !== null) as Guess[];
const guessed_config = ConfigFile.from_guesses(filtered_guesses);
const certainty = await guessed_config.calculate_certainty(source);
log("Certainty", certainty * 100, "%");
log(`writing config to '${output_path}'`);
const output = guessed_config.serialize();
await Deno.writeTextFile(output_path, output);
}
function parse_args(args: string[]) {
const bad_usage = () => [log("Usage: cfguesser <source_file> <config_output>"), Deno.exit(1)];
if (args.length < 2) bad_usage();
for (const arg of args) if (["-h", "--help"].includes(arg)) bad_usage();
const [input_source_path, output_path] = args;
return { input_source_path, output_path };
}
if (import.meta.main) await main();

46
src/lib/all_properties.ts Normal file
View file

@ -0,0 +1,46 @@
import { Property } from "./property.ts";
import { z } from "https://deno.land/x/zod@v3.22.4/mod.ts";
import schema from "./data/schema.json" with { type: "json" };
function schema_property_parse(input: unknown) {
return z.object({
type: z.literal("string"),
enum: z.array(z.string()).or(z.undefined()),
}).or(z.object({
type: z.literal("integer"),
})).or(z.object({
type: z.literal("boolean"),
})).or(z.object({
type: z.literal("object"),
properties: z.record(z.string(), z.unknown()),
})).or(z.object({
type: z.literal("array"),
items: z.unknown(),
}))
.or(z.object({
oneOf: z.unknown().array(),
}))
.parse(input);
}
const blacklist = ["Language", "InsertTrailingCommas", "QualifierAlignment"];
export function all_properties() {
const result = [] as Property[];
const schema_properties = schema.properties as Record<string, unknown>;
const names = Object.keys(schema.properties) as string[];
for (const name of names) {
if (blacklist.includes(name)) continue;
const parsed = schema_property_parse(schema_properties[name]);
if ("oneOf" in parsed) continue;
if ((parsed.type === "string") && parsed.enum !== undefined) result.push(new Property(name, parsed.enum));
if (parsed.type === "boolean") result.push(new Property(name, ["true", "false"]));
if (parsed.type === "integer") result.push(new Property(name, ["1", "2", "4", "8", "16", "80", "120"]));
}
return result;
}
Deno.test("test_all_properties", () => {
console.log(all_properties());
});

25
src/lib/config_file.ts Normal file
View file

@ -0,0 +1,25 @@
import { Guess } from "./guess.ts";
import { clang_format } from "./utils.ts";
import { source_distance } from "./source_distance.ts";
export class ConfigFile {
properties;
constructor(properties: [string, string][]) {
this.properties = properties;
}
static from_guesses(guesses: Guess[]) {
return new ConfigFile(guesses.map((g) => [g.property.name, g.value]));
}
async calculate_certainty(source: string) {
const formatted = await clang_format(source, ...this.properties);
const distance = source_distance(source, formatted);
return (source.length - distance) / source.length;
}
serialize() {
return this.properties.map(([name, value]) => `${name}: ${value}`).join("\n");
}
}

1188
src/lib/data/schema.json Normal file

File diff suppressed because it is too large Load diff

5
src/lib/data/schema_update.sh Executable file
View file

@ -0,0 +1,5 @@
#!/bin/sh
set -e
cd "$(dirname "$(realpath "$0")")"
wget -O ./schema.json https://json.schemastore.org/clang-format.json

24
src/lib/guess.ts Normal file
View file

@ -0,0 +1,24 @@
import { source_distance } from "./source_distance.ts";
import { Property } from "./property.ts";
import { clang_format, log_from } from "./utils.ts";
export const log = log_from(import.meta);
export class Guess {
property;
value;
constructor(property: Property, value: string) {
this.property = property;
this.value = value;
}
async test(source: string) {
const original = source;
const formatted = await clang_format(source, [this.property.name, this.value]);
log("Scoring", this.property.name, ":", this.value);
const distance = source_distance(original, formatted);
const score = -1 * distance;
log("Scored to", score);
return score;
}
}

2
src/lib/lib.ts Normal file
View file

@ -0,0 +1,2 @@
import { log_from } from "./utils.ts";
export const log = log_from(import.meta);

33
src/lib/property.ts Normal file
View file

@ -0,0 +1,33 @@
import { Guess } from "./guess.ts";
import { indexed } from "./utils.ts";
export class Property {
name;
possible_values;
constructor(name: string, possible_values: string[]) {
this.name = name;
this.possible_values = possible_values;
}
async guess_from_source(source: string) {
const score_promises = [] as Promise<[number, Guess, number]>[];
for (const [index, value] of indexed(this.possible_values)) {
const guess = new Guess(this, value);
score_promises.push((async () => [index, guess, await guess.test(source)])());
}
const scored_guesses = await Promise.all(score_promises);
if (all_equals(scored_guesses.map(([_, __, s]) => s))) return null;
// note : Sorts by score (highest first) then by index (lowest first).
const sorted_gueses = scored_guesses.toSorted(([ia, _, a], [ib, __, b]) => (b - a) == 0 ? (ia - ib) : (b - a));
const [_, guess, __] = sorted_gueses[0];
return guess;
}
}
function all_equals<T>(arr: T[]) {
if (arr.length === 0) return true;
const [first] = arr;
for (const item of arr) if (item !== first) return false;
return true;
}

View file

@ -0,0 +1,84 @@
import { assertEquals } from "https://deno.land/std@0.220.1/assert/mod.ts";
import { range } from "./utils.ts";
export function source_distance(a: string, b: string) {
// return source_distance_impl(a, b);
return source_distance_wrap(a, b);
}
import { distance } from "https://deno.land/x/fastest_levenshtein@1.0.10/mod.ts";
export function source_distance_wrap(a: string, b: string) {
a = unify_indents(a);
b = unify_indents(b);
return distance(a, b);
}
export function source_distance_impl(a: string, b: string) {
a = unify_indents(a);
b = unify_indents(b);
const [rest_a, rest_b] = remove_common_fragments(a, b, 6);
const len_a = rest_a.replaceAll(sep_seq, "").length;
const len_b = rest_b.replaceAll(sep_seq, "").length;
return len_a + len_b;
}
const sep_seq = "🥖";
function unify_indents(text: string) {
const after = text.replaceAll("\t", " ").replaceAll(" ", " ");
if (after === text) return text;
else return unify_indents(after);
}
function remove_common_fragments(a: string, b: string, min_frag_size: number) {
const result = find_fragment_location(a, b, min_frag_size);
if (result === null) return [a, b];
const [ia, ib, size] = result;
const new_a = remove_fragment(a, ia, size);
const new_b = remove_fragment(b, ib, size);
return remove_common_fragments(new_a, new_b, min_frag_size);
}
function find_fragment_location(a: string, b: string, min_frag_size: number) {
for (const start_a of range(0, a.length)) {
const slice_a = a.slice(start_a);
for (const start_b of range(0, b.length)) {
const slice_b = b.slice(start_b);
const common_count = common_length(slice_a, slice_b);
if (common_count < min_frag_size) continue;
return [start_a, start_b, common_count] as const;
}
}
return null;
}
function common_length(a: string, b: string) {
let result = 0;
for (const index of range(0, Math.min(a.length, b.length))) {
if (a[index] != b[index]) break;
result += 1;
}
return result;
}
function remove_fragment(text: string, start: number, size: number) {
const prefix = text.slice(0, start);
const suffix = text.slice(start + size);
return prefix + sep_seq + suffix;
}
Deno.test("test_source_distance", () => {
const source_a = `
int main() {
if (a) b;
}`;
const source_b = `
int main()
{
if (a)
{
b;
}
}`;
assertEquals(source_distance(source_a, source_b), 19);
});

57
src/lib/utils.ts Normal file
View file

@ -0,0 +1,57 @@
import { assert, assertEquals } from "https://deno.land/std@0.220.1/assert/mod.ts";
import * as path from "https://deno.land/std@0.220.1/path/mod.ts";
export function* range(from: number, to: number) {
while (from < to) yield from++;
}
export function* indexed<T>(source: Iterable<T>) {
let index = 0;
for (const item of source) yield [index++, item] as const;
}
const LOG_LEVEL = 0;
export function log_from(meta: ImportMeta, level = 1) {
const filename = path.basename(new URL(meta.url).pathname);
return (...args: unknown[]) => {
if (LOG_LEVEL < level) return;
console.log(`[${filename}]`, ...args);
};
}
export function promise_split<T>() {
let resolver = null as ((item: T) => void) | null;
let resolved = false;
const promise = new Promise<T>((res) => resolver = res);
const resolve = (item: T) => {
resolved = true;
assert(resolver !== null);
resolver(item);
};
return { promise, resolve, resolved };
}
export function sleep(ms: number) {
return new Promise((resolver) => setTimeout(resolver, ms));
}
export async function clang_format(source: string, ...style_properties: [string, string][]) {
const encoded_input = new TextEncoder().encode(source);
const style_arg = `--style={${style_properties.map(([n, v]) => `${n}: ${v}`).join(",")}}`;
const command = new Deno.Command("clang-format", { args: ["-", style_arg], stdin: "piped", stdout: "piped" });
const process = command.spawn();
const writer = process.stdin.getWriter();
await writer.write(encoded_input);
await writer.close();
const result = await process.output();
if (!result.success) throw new Error(`clang-format failed with style_arg: '${style_arg}'`);
const encoded_output = result.stdout;
const output = new TextDecoder().decode(encoded_output);
return output;
}
Deno.test("try_clang_format", async () => {
const input = "int a = 3;";
const output = await clang_format(input);
assertEquals(input, output);
});