-
Notifications
You must be signed in to change notification settings - Fork 1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Refactoring suggestins #1
base: main
Are you sure you want to change the base?
Changes from 8 commits
bd6665d
61db9df
5716d22
ab653a5
8e48606
c106481
cd240c9
e0a5a24
d753fb8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,5 @@ | ||
venv | ||
dist | ||
*.egg-info | ||
.pytest_cache | ||
__pycache__ |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,25 @@ | ||
[project] | ||
name = "diff-files" | ||
version = "0.1" | ||
description = "Return the difference between FILE1 and FILE2" | ||
readme = "Readme.md" | ||
authors = [{name = "Lucas Payr", email = "orasund@gmail.com"}] | ||
dependencies = [ | ||
"click", | ||
] | ||
|
||
[project.scripts] | ||
diff-files = "diff_files:diff_files" | ||
|
||
[project.optional-dependencies] | ||
test = [ | ||
"pytest", | ||
] | ||
dev = [ | ||
"build", | ||
"pip-tools", | ||
] | ||
|
||
[build-system] | ||
requires = ["setuptools >= 61.0"] | ||
build-backend = "setuptools.build_meta" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,10 @@ | ||
pytest | ||
click | ||
# | ||
# This file is autogenerated by pip-compile with Python 3.11 | ||
# by the following command: | ||
# | ||
# pip-compile --output-file='.\requirements.txt' pyproject.toml | ||
# | ||
click==8.1.7 | ||
# via diff-files (pyproject.toml) | ||
colorama==0.4.6 | ||
# via click |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,106 +1,130 @@ | ||
from typing import cast | ||
from dataclasses import dataclass | ||
from typing import cast, IO | ||
import click | ||
|
||
|
||
@dataclass | ||
class Position: | ||
x: int | ||
y: int | ||
|
||
|
||
@dataclass | ||
class Node: | ||
def __init__(self, next_pos: tuple[int, int] | None, length: int) -> None: | ||
self.next_pos = next_pos | ||
self.length = length | ||
next_pos: Position | ||
length: int | ||
|
||
|
||
def build_subsequence_matrix(l1: list[str], l2: list[str]) -> list[list[Node]]: | ||
# we use the matrix as a graph | ||
# where each node points to one of their neighbors | ||
nodes: list[list[Node]] = [] | ||
|
||
def get_node(pos: tuple[int, int]) -> Node | None: | ||
return nodes[pos[0]][pos[1]] if pos[0] >= 0 and pos[1] >= 0 else None | ||
|
||
def compute_node(i1: int, i2: int) -> Node: | ||
next_pos: tuple[int, int] | None | ||
length: int | ||
|
||
if l1[i1] == l2[i2]: | ||
next_pos = (i1 - 1, i2 - 1) | ||
length = nodes[next_pos[0]][next_pos[1]].length + 1 | ||
else: | ||
candidate1 = (i1, i2 - 1) | ||
candidate2 = (i1 - 1, i2) | ||
node1 = get_node(candidate1) | ||
node2 = get_node(candidate2) | ||
|
||
# find the candidate with the bigger length | ||
if node1 is None and node2 is None: | ||
next_pos = None | ||
length = 0 | ||
elif node2 is not None and (node1 is None or node1.length < node2.length): | ||
next_pos = candidate2 | ||
length = node2.length | ||
else: | ||
next_pos = candidate1 | ||
length = cast(Node, node1).length | ||
@dataclass | ||
class Matrix: | ||
nodes: list[list[Node]] | ||
|
||
return Node(next_pos, length) | ||
def get_node(self, pos: Position) -> Node | None: | ||
if pos.x < 0 or pos.y < 0: | ||
return None | ||
return self.nodes[pos.x][pos.y] | ||
|
||
for i1 in range(len(l1)): | ||
nodes.append([]) | ||
for i2 in range(len(l2)): | ||
nodes[i1].append(compute_node(i1, i2)) | ||
@classmethod | ||
def make(cls, l1: list[str], l2: list[str]) -> "Matrix": | ||
def compute_node(i1: int, i2: int, line1, line2) -> Node: | ||
next_pos: Position | None | ||
length: int | ||
|
||
if line1 == line2: | ||
next_pos = Position(i1 - 1, i2 - 1) | ||
length = result.nodes[next_pos.x][next_pos.y].length + 1 | ||
else: | ||
candidate1 = Position(i1, i2 - 1) | ||
candidate2 = Position(i1 - 1, i2) | ||
node1 = result.get_node(candidate1) | ||
node2 = result.get_node(candidate2) | ||
|
||
# find the candidate with the bigger length | ||
if node1 is None and node2 is None: | ||
next_pos = None | ||
length = 0 | ||
elif node2 is not None and (node1 is None or node1.length < node2.length): | ||
next_pos = candidate2 | ||
length = node2.length | ||
else: | ||
next_pos = candidate1 | ||
length = cast(Node, node1).length | ||
|
||
return Node(next_pos, length) | ||
|
||
result = Matrix([]) | ||
for i1, line1 in enumerate(l1): | ||
result.nodes.append([]) | ||
for i2, line2 in enumerate(l2): | ||
result.nodes[i1].append(compute_node(i1, i2, line1, line2)) | ||
return result | ||
|
||
|
||
def build_subsequence_matrix(l1: list[str], l2: list[str]) -> Matrix: | ||
# we use the matrix as a graph | ||
# where each node points to one of their neighbors | ||
nodes = Matrix.make(l1, l2) | ||
return nodes | ||
|
||
|
||
def compute_common_indices(nodes: list[list[Node]]) -> list[tuple[int, int]]: | ||
pos: tuple[int, int] = (len(nodes) - 1, len(nodes[0]) - 1) | ||
def compute_common_indices(nodes: Matrix) -> list[Position]: | ||
common_indices: list[Position] = [] | ||
if not nodes.nodes: | ||
return common_indices | ||
Orasund marked this conversation as resolved.
Show resolved
Hide resolved
|
||
|
||
pos = Position(len(nodes.nodes) - 1, len(nodes.nodes[0]) - 1) | ||
|
||
def get_node(pos: tuple[int, int]) -> Node: | ||
return nodes[pos[0]][pos[1]] | ||
def get_node(p: Position) -> Node: | ||
Orasund marked this conversation as resolved.
Show resolved
Hide resolved
|
||
result = nodes.get_node(p) | ||
if result is None: | ||
return Node(Position(0, 0), 0) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This here is my "tricky" fix for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm a bit puzzled. This is only possible if The first case occurs if the second file is empty. Second case occurs if I forgot a guard inside |
||
return result | ||
|
||
node: Node = get_node(pos) | ||
commonIndices: list[tuple[int, int]] = [] | ||
|
||
while node.next_pos is not None: | ||
nextNode = get_node(node.next_pos) | ||
next_node = get_node(node.next_pos) | ||
|
||
if nextNode.length < node.length: | ||
commonIndices.append(pos) | ||
if next_node.length < node.length: | ||
common_indices.append(pos) | ||
|
||
pos = node.next_pos | ||
node = nextNode | ||
node = next_node | ||
|
||
commonIndices.reverse() | ||
return commonIndices | ||
common_indices.reverse() | ||
return common_indices | ||
|
||
|
||
def compute_differences( | ||
lines1: list[str], lines2: list[str], commonsIndices: list[tuple[int, int]] | ||
lines1: list[str], lines2: list[str], commons_indices: list[Position] | ||
): | ||
indices: list[tuple[int, int]] = ( | ||
[(-1, -1)] + commonsIndices + [(len(lines1), len(lines2))] | ||
indices: list[Position] = ( | ||
[Position(-1, -1)] + commons_indices + [Position(len(lines1), len(lines2))] | ||
) | ||
|
||
return [ | ||
(lines1[iMin[0] + 1 : iMax[0]], lines2[iMin[1] + 1 : iMax[1]]) | ||
(lines1[iMin.x + 1: iMax.x], lines2[iMin.y + 1: iMax.y]) | ||
for (iMin, iMax) in zip(indices, indices[1:]) | ||
] | ||
|
||
|
||
@click.command() | ||
@click.argument("file1") | ||
@click.argument("file2") | ||
def diff_files(file1: str, file2: str): | ||
@click.argument("file1", type=click.File(mode="r")) | ||
@click.argument("file2", type=click.File(mode="r")) | ||
def diff_files(file1: IO, file2: IO): | ||
"""Return the difference between FILE1 and FILE2""" | ||
lines1: list[str] = open(file1, "r").readlines() | ||
lines2: list[str] = open(file2, "r").readlines() | ||
lines1: list[str] = file1.readlines() | ||
lines2: list[str] = file2.readlines() | ||
|
||
# Algorithm taken from | ||
# https://en.wikipedia.org/wiki/Longest_common_subsequence | ||
# | ||
# 1. Build a graph of subsequences (stores as a matrix) | ||
matrix = build_subsequence_matrix(lines1, lines2) | ||
# 2. Walk the graph and collect all the indecies where the strings match | ||
commonIndices = compute_common_indices(matrix) | ||
common_indices = compute_common_indices(matrix) | ||
# 3. Compute the differences based on the collected indecies | ||
differences = compute_differences(lines1, lines2, commonIndices) | ||
differences = compute_differences(lines1, lines2, common_indices) | ||
|
||
click.echo(differences) | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1 @@ | ||
pytest tests/ | ||
pytest tests/ |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,9 +1,41 @@ | ||
import os | ||
|
||
from click.testing import CliRunner | ||
from src.diff_files import diff_files | ||
from diff_files import diff_files, build_subsequence_matrix, compute_common_indices, compute_differences | ||
|
||
|
||
def test_empty(): | ||
os.chdir(os.path.dirname(__file__)) | ||
runner = CliRunner() | ||
result = runner.invoke(diff_files, ["data/empty", "data/empty"]) | ||
assert result.exit_code == 0 | ||
assert result.output == "[([], [])]\n" | ||
|
||
|
||
def test(): | ||
def test_basic(): | ||
os.chdir(os.path.dirname(__file__)) | ||
runner = CliRunner() | ||
result = runner.invoke(diff_files, ["tests/data/file1", "tests/data/file2"]) | ||
expected = "[(['Hello \\n'], []), (['beautiful\\n'], ['little\\n', 'boy\\n', 'lives\\n', 'in\\n', 'his\\n', 'own\\n']), (['\\n'], ['!'])]\n" | ||
result = runner.invoke(diff_files, ["data/file1", "data/file2"]) | ||
expected = ("[" | ||
"(['Hello \\n'], []), " | ||
"(['beautiful\\n'], " | ||
"['little\\n', 'boy\\n', 'lives\\n', 'in\\n', 'his\\n', 'own\\n']), " | ||
"(['\\n'], ['!'])" | ||
"]\n") | ||
assert result.output == expected | ||
|
||
|
||
def test_diff(): | ||
os.chdir(os.path.dirname(__file__)) | ||
with open("data/file1") as f1, open("data/file2") as f2: | ||
lines1 = f1.readlines() | ||
lines2 = f2.readlines() | ||
Orasund marked this conversation as resolved.
Show resolved
Hide resolved
|
||
matrix = build_subsequence_matrix(lines1, lines2) | ||
# 2. Walk the graph and collect all the indecies where the strings match | ||
common_indices = compute_common_indices(matrix) | ||
# 3. Compute the differences based on the collected indecies | ||
differences = compute_differences(lines1, lines2, common_indices) | ||
assert differences == [ | ||
(['Hello \n'], []), | ||
(['beautiful\n'], ['little\n', 'boy\n', 'lives\n', 'in\n', 'his\n', 'own\n']), | ||
(['\n'], ['!'])] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In order for
next_pos.y = -1
we would need to enter this branch withnode1 is None
(as there is a guard inside get_node). However, ifnode1 is None
, then either branch 1 or 2 will be entered depending onnode1 is None
.So im not seeing what should be wrong here.