diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..308b503 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +.idea/ +tests/receipts/ +vendor/ +.DS_Store +composer.lock +examples/test.php \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..103bdca --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2020 Kristian Stöckel + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md new file mode 100644 index 0000000..f6a958f --- /dev/null +++ b/README.md @@ -0,0 +1,39 @@ +# Lidl eReceipt Parser + +## Installation + +``` +$ composer require mrkriskrisu/lidl-ereceipt-parser +``` + +```json +{ + "require": { + "mrkriskrisu/lidl-ereceipt-parser": "^0.1" + } +} +``` + +## Example Usage +```php +getTotal() . " Euros."; +``` + +## Requirements +This library requires Tesseract OCR v3.02 or later. + +## Get the eReceipt +To receive the eReceipt you need do download the App "Lidl Plus". +At your checkout you have to scan your customer card within the App +and you'll can download the receipt in the app later. + +## Contribution +I'm glad that you want to help this library to be perfect. +Just do your magic und make a Pull Request. ✨ diff --git a/composer.json b/composer.json new file mode 100644 index 0000000..534207e --- /dev/null +++ b/composer.json @@ -0,0 +1,25 @@ +{ + "name": "mrkriskrisu/lidl-ereceipt-parser", + "description": "Library for parsing digital receipts from Lidl (supermarket)", + "type": "library", + "license": "MIT", + "authors": [ + { + "name": "Kristian Stöckel", + "email": "git@k118.de" + } + ], + "require": { + "php": ">=7.1", + "nesbot/carbon": "^2.38", + "thiagoalessio/tesseract_ocr": "^2.9" + }, + "autoload": { + "psr-4": { + "LidlParser\\": "src/" + } + }, + "require-dev": { + "phpunit/phpunit": "^9" + } +} diff --git a/examples/parse.php b/examples/parse.php new file mode 100644 index 0000000..975c1f8 --- /dev/null +++ b/examples/parse.php @@ -0,0 +1,20 @@ +getTimestamp()->diffForHumans() . ". \n"; + echo "You've bought " . count($receipt->getPositions()) . " Products for a total of " . $receipt->getTotal() . "€. \n"; + +} catch (ReceiptParseException $e) { + echo "There is something weird with the receipt... Maybe it's not compatible?\n"; + echo "Error: " . $e->getMessage(); +} catch (TesseractOcrException $e) { + echo "The given Image cant be read successfully: " . $e->getMessage(); +} diff --git a/src/Exception/PositionNotFoundException.php b/src/Exception/PositionNotFoundException.php new file mode 100644 index 0000000..fdb7230 --- /dev/null +++ b/src/Exception/PositionNotFoundException.php @@ -0,0 +1,8 @@ +name; + } + + /** + * The total sum of the position + * @return float + * @throws ReceiptParseException + */ + public function getPriceTotal() + { + if ($this->priceTotal !== NULL) + return $this->priceTotal; + if ($this->priceSingle !== NULL && $this->amount !== NULL) + return $this->priceSingle * $this->amount; + if ($this->priceSingle !== NULL && $this->weight !== NULL) + return $this->priceSingle * $this->weight; + throw new ReceiptParseException(); + } + + /** + * The single value for one unit of the product + * @return float + * @throws ReceiptParseException + */ + public function getPriceSingle() + { + if ($this->priceSingle !== NULL) + return $this->priceSingle; + if ($this->priceTotal !== NULL && $this->amount !== NULL) + return $this->priceTotal / $this->amount; + if ($this->priceTotal !== NULL && $this->weight !== NULL) + return $this->priceTotal / $this->weight; + if ($this->priceTotal !== NULL) + return $this->priceTotal; + throw new ReceiptParseException(); + } + + /** + * The Tax Code of the position (e.g. "A" or "B") + * @return string|NULL + */ + public function getTaxCode() + { + return $this->taxCode; + } + + /** + * The weight of the position (if the product is weightable) + * @return float|NULL + */ + public function getWeight() + { + return $this->weight; + } + + /** + * The amount of the position (if the product is countable) + * @return int|NULL + */ + public function getAmount() + { + if ($this->amount === NULL && $this->weight === NULL) + return 1; + return $this->amount; + } + + public function setName(string $name) + { + $this->name = $name; + } + + public function setPriceTotal(float $priceTotal) + { + $this->priceTotal = $priceTotal; + } + + public function setPriceSingle(float $priceSingle) + { + $this->priceSingle = $priceSingle; + } + + public function setTaxCode(string $taxCode) + { + $this->taxCode = $taxCode; + } + + public function setWeight(float $weight) + { + $this->weight = $weight; + } + + public function setAmount(int $amount) + { + $this->amount = $amount; + } + +} \ No newline at end of file diff --git a/src/Receipt.php b/src/Receipt.php new file mode 100644 index 0000000..32229bc --- /dev/null +++ b/src/Receipt.php @@ -0,0 +1,190 @@ +rawReceipt = $ocr->run(); + $this->rawReceipt = str_replace('@', '0', $this->rawReceipt); //Maybe there is a better solution to handle these ocr problem? + $this->explodedReceipt = explode("\n", $this->rawReceipt); + print_r($this->explodedReceipt); + } + + /** + * @return float + * @throws ReceiptParseException + */ + public function getTotal(): float + { + if (preg_match('/zu zahlen (-?\d+,\d{2})/', $this->rawReceipt, $match)) + return (float)str_replace(',', '.', $match[1]); + throw new ReceiptParseException(); + } + + /** + * @return string + * @throws ReceiptParseException + */ + public function getPaymentMethod(): string + { + $next = false; + foreach ($this->explodedReceipt as $row) + if ($next) { + if (!preg_match("/(.*) \d+,\d{2}/", $row, $match)) + throw new ReceiptParseException(); + return $match[1]; + } else if (substr(trim($row), 0, 9) == "zu zahlen") + $next = true; + throw new ReceiptParseException(); + } + + /** + * @return bool + */ + public function hasPayedCashless(): bool + { + return preg_match('/(Kreditkarte|Karte)/', $this->rawReceipt); + } + + /** + * @return Carbon + * @throws ReceiptParseException + */ + public function getTimestamp(): Carbon + { + if (preg_match('/(\d{2}).(\d{2}).(\d{2}) (\d{2}):(\d{2})/', $this->rawReceipt, $match)) + return Carbon::create("20" . $match[3], $match[2], $match[1], $match[4], $match[5]); + throw new ReceiptParseException(); + } + + /** + * @return int + * @throws ReceiptParseException + */ + private function getProductStartLine(): int + { + foreach (explode("\n", $this->rawReceipt) as $line => $content) + if (trim($content) == "EUR") + return $line + 1; + throw new ReceiptParseException(); + } + + /** + * @return int + * @throws ReceiptParseException + */ + private function getProductEndLine(): int + { + foreach (explode("\n", $this->rawReceipt) as $line => $content) + if (substr(trim($content), 0, 9) == "zu zahlen") + return $line - 1; + throw new ReceiptParseException(); + } + + /** + * @param string $name + * @return Position + * @throws PositionNotFoundException|ReceiptParseException + */ + public function getPositionByName(string $name): Position + { + foreach ($this->getPositions() as $position) { + if ($position->getName() == $name) + return $position; + } + throw new PositionNotFoundException("Position '$name' not found"); + } + + /** + * TODO: Wiege und mehrzahl + * @return array + * @throws ReceiptParseException + */ + public function getPositions(): array + { + $positions = []; + $lastPosition = NULL; + + for ($lineNr = $this->getProductStartLine(); $lineNr <= $this->getProductEndLine(); $lineNr++) { + //echo $this->explodedReceipt[$lineNr]; + if ($this->isProductLine($lineNr)) { + + if ($lastPosition !== NULL) { + $positions[] = $lastPosition; + $lastPosition = NULL; + } + + if (preg_match('/(.*) (-?\d+,\d{2}) ([A-Z])/', $this->explodedReceipt[$lineNr], $match)) { + $lastPosition = new Position(); + $lastPosition->setName(trim($match[1])); + $lastPosition->setPriceTotal((float)str_replace(',', '.', $match[2])); + $lastPosition->setTaxCode($match[3]); + } elseif (preg_match('/(.*) (-?\d+,\d{2})/', $this->explodedReceipt[$lineNr], $match)) { + $lastPosition = new Position(); + $lastPosition->setName(trim($match[1])); + $lastPosition->setPriceTotal((float)str_replace(',', '.', $match[2])); + } else throw new ReceiptParseException("Error while parsing Product line"); + + } /*else if ($this->isAmountLine($lineNr)) { + + if (preg_match('/(-?\d+) Stk x *(-?\d+,\d{2})/', $this->expl_receipt[$lineNr], $match)) { + $lastPosition->setAmount((int)$match[1]); + $lastPosition->setPriceSingle((float)str_replace(',', '.', $match[2])); + } else throw new ReceiptParseException("Error while parsing Amount line"); + + } else if ($this->isWeightLine($lineNr)) { + + if (preg_match('/(-?\d+,\d{3}) kg x *(-?\d+,\d{2}) EUR/', $this->expl_receipt[$lineNr], $match)) { + $lastPosition->setWeight((float)str_replace(',', '.', $match[1])); + $lastPosition->setPriceSingle((float)str_replace(',', '.', $match[2])); + } else if (preg_match('/Handeingabe E-Bon *(-?\d+,\d{3}) kg/', $this->expl_receipt[$lineNr], $match)) { + $lastPosition->setWeight((float)str_replace(',', '.', $match[1])); + } else throw new ReceiptParseException("Error while parsing Weight line"); + + }*/ else throw new ReceiptParseException("Error while parsing unknown receipt line"); + + } + + if ($lastPosition !== NULL) + $positions[] = $lastPosition; + + if (count($positions) == 0) + throw new ReceiptParseException("Cannot parse any products on receipt"); + + return $positions; + } + + private function isWeightLine($lineNr) + { + return false; //TODO: Receipt example needed + return strpos($this->expl_receipt[$lineNr], 'kg') !== false; + } + + private function isAmountLine($lineNr) + { + return false; //TODO: Receipt example needed + return strpos($this->expl_receipt[$lineNr], ' Stk x') !== false; + } + + private function isProductLine($lineNr) + { + return true; //TODO: Receipt example needed + return !$this->isWeightLine($lineNr) && !$this->isAmountLine($lineNr); + } +} \ No newline at end of file diff --git a/src/Shop.php b/src/Shop.php new file mode 100644 index 0000000..ebd75cc --- /dev/null +++ b/src/Shop.php @@ -0,0 +1,45 @@ +name = $name; + $this->address = $address; + $this->postalCode = $postalCode; + $this->city = $city; + } + + public function getName(): string + { + return $this->name; + } + + public function getAddress(): string + { + return $this->address; + } + + public function getPostalCode(): string + { + return $this->postalCode; + } + + public function getCity(): string + { + return $this->city; + } +} \ No newline at end of file diff --git a/tests/ReceiptParsingTest.php b/tests/ReceiptParsingTest.php new file mode 100644 index 0000000..79b2be4 --- /dev/null +++ b/tests/ReceiptParsingTest.php @@ -0,0 +1,8 @@ +