From 57d1dc7b9bc50238eeaac4f9c6a0b0d1c80cdc4b Mon Sep 17 00:00:00 2001 From: Yi Ge Date: Sat, 20 Apr 2019 23:21:41 +0200 Subject: [PATCH] add initial models --- .gitignore | 125 ++++++++++++++++++++++++++++++++++++++++++++ Pipfile | 14 +++++ Pipfile.lock | 121 ++++++++++++++++++++++++++++++++++++++++++ videocr/__init__.py | 0 videocr/models.py | 47 +++++++++++++++++ 5 files changed, 307 insertions(+) create mode 100644 .gitignore create mode 100644 Pipfile create mode 100644 Pipfile.lock create mode 100644 videocr/__init__.py create mode 100644 videocr/models.py diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..e8b0168 --- /dev/null +++ b/.gitignore @@ -0,0 +1,125 @@ +.DS_Store + +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +.hypothesis/ +.pytest_cache/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don’t work, or not +# install all needed dependencies. +#Pipfile.lock + +# celery beat schedule file +celerybeat-schedule + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ diff --git a/Pipfile b/Pipfile new file mode 100644 index 0000000..e4e1418 --- /dev/null +++ b/Pipfile @@ -0,0 +1,14 @@ +[[source]] +name = "pypi" +url = "https://pypi.org/simple" +verify_ssl = true + +[dev-packages] + +[packages] +pytesseract = "*" +pillow = "*" +opencv-python = "*" + +[requires] +python_version = "3.7" diff --git a/Pipfile.lock b/Pipfile.lock new file mode 100644 index 0000000..035c89b --- /dev/null +++ b/Pipfile.lock @@ -0,0 +1,121 @@ +{ + "_meta": { + "hash": { + "sha256": "101aa7956bfaa0af4aa5a78b93d953f90855c84fb51aac77ba22f1b4facea2d3" + }, + "pipfile-spec": 6, + "requires": { + "python_version": "3.7" + }, + "sources": [ + { + "name": "pypi", + "url": "https://pypi.org/simple", + "verify_ssl": true + } + ] + }, + "default": { + "numpy": { + "hashes": [ + "sha256:1980f8d84548d74921685f68096911585fee393975f53797614b34d4f409b6da", + "sha256:22752cd809272671b273bb86df0f505f505a12368a3a5fc0aa811c7ece4dfd5c", + "sha256:23cc40313036cffd5d1873ef3ce2e949bdee0646c5d6f375bf7ee4f368db2511", + "sha256:2b0b118ff547fecabc247a2668f48f48b3b1f7d63676ebc5be7352a5fd9e85a5", + "sha256:3a0bd1edf64f6a911427b608a894111f9fcdb25284f724016f34a84c9a3a6ea9", + "sha256:3f25f6c7b0d000017e5ac55977a3999b0b1a74491eacb3c1aa716f0e01f6dcd1", + "sha256:4061c79ac2230594a7419151028e808239450e676c39e58302ad296232e3c2e8", + "sha256:560ceaa24f971ab37dede7ba030fc5d8fa173305d94365f814d9523ffd5d5916", + "sha256:62be044cd58da2a947b7e7b2252a10b42920df9520fc3d39f5c4c70d5460b8ba", + "sha256:6c692e3879dde0b67a9dc78f9bfb6f61c666b4562fd8619632d7043fb5b691b0", + "sha256:6f65e37b5a331df950ef6ff03bd4136b3c0bbcf44d4b8e99135d68a537711b5a", + "sha256:7a78cc4ddb253a55971115f8320a7ce28fd23a065fc33166d601f51760eecfa9", + "sha256:80a41edf64a3626e729a62df7dd278474fc1726836552b67a8c6396fd7e86760", + "sha256:893f4d75255f25a7b8516feb5766c6b63c54780323b9bd4bc51cdd7efc943c73", + "sha256:972ea92f9c1b54cc1c1a3d8508e326c0114aaf0f34996772a30f3f52b73b942f", + "sha256:9f1d4865436f794accdabadc57a8395bd3faa755449b4f65b88b7df65ae05f89", + "sha256:9f4cd7832b35e736b739be03b55875706c8c3e5fe334a06210f1a61e5c2c8ca5", + "sha256:adab43bf657488300d3aeeb8030d7f024fcc86e3a9b8848741ea2ea903e56610", + "sha256:bd2834d496ba9b1bdda3a6cf3de4dc0d4a0e7be306335940402ec95132ad063d", + "sha256:d20c0360940f30003a23c0adae2fe50a0a04f3e48dc05c298493b51fd6280197", + "sha256:d3b3ed87061d2314ff3659bb73896e622252da52558f2380f12c421fbdee3d89", + "sha256:dc235bf29a406dfda5790d01b998a1c01d7d37f449128c0b1b7d1c89a84fae8b", + "sha256:fb3c83554f39f48f3fa3123b9c24aecf681b1c289f9334f8215c1d3c8e2f6e5b" + ], + "version": "==1.16.2" + }, + "opencv-python": { + "hashes": [ + "sha256:1703a296a96d3d46615e5053f224867977accb4240bcaa0fcabcb0768bf5ac13", + "sha256:1777ce7535ee7a1995cae168a107a1320e9df13648b930e72a1a2c2eccd64cda", + "sha256:1e5520482fb18fbd64d079e7f17ac0018f195fd75f6360a53bb82d7903106b50", + "sha256:25522dcf2529614750a71112a6659759080b4bdc2323f19d47f4d895960fd796", + "sha256:2af5f2842ad44c65ae2647377e0ff198719e1a1cfc9c6a19bc0c525c035d4bd8", + "sha256:31ec48d7eca13fc25c287dea7cecab453976e372cad8f50d55c054a247efda21", + "sha256:47cf48ff5dbd554e9f58cc9e98cf0b5de3f6a971172612bffa06bc5fb79ce872", + "sha256:494f98366bb5d6c2ac7e50e6617139f353704fd97a6d12ec9d392e72817d5cb0", + "sha256:4a9845870739e640e3350a8d98d511c92c087fe3d66090e83be7bf94e0ac64f7", + "sha256:4ac29cc0847d948a6636899014e84e165c30cc8779d6218394d44363462a01ce", + "sha256:5857ace03b7854221abf8072462d306c2c2ce4e366190b21d90ee8ee8aaf5bb4", + "sha256:5b4a23d99d5a2874767034466f5a8fd37b9f93ac14955a01b1a208983c76b9ad", + "sha256:734d87a5021c037064beb62133e135e66c7128e401a63b8b842b809ae2093749", + "sha256:78005c1c5d15ef4e32e0f485557bd15b5b6d87f49c19db7fe3e9246a61ebe7e4", + "sha256:81ae2283225c5c52fc3d72debd4241c30ccff2bb922578bf7867f9851cce3acb", + "sha256:88dbf900f297fdae0f62b899d6a784d8868ec2135854c5f8a9abbad00a6f0c5b", + "sha256:8c98ea7b8d327a31cd6028782a06147d0e0329ae8e829e881fb5d02f7ed8aec9", + "sha256:937d4686fef6967921145290f5b50c01c00c5b5d3542a6519e8a85cd88448723", + "sha256:a057958c0e362b3c4f03b9af1cbdb6d5af035fd22ecd7fd794eba8fdeb049eb8", + "sha256:c41eab31fa2c641226c6187caa391a688d064c99f078d604574f1912296b771f", + "sha256:cf4f7e62d1f80d1fa85a1693a3500def5cde54b2b75212b3609e552e4c25acfb", + "sha256:d90d60143e18334330c149f293071c9f2f3c79c896f33dc4ec65099e58baaaa7", + "sha256:db3106b7ca86999a7bd1f2fcc93e49314e5e6e451356774e421a69428df5020b", + "sha256:dbaf264db56f4771dfac6624f438bc4dc670aa94f61a6138848fcab7e9e77380", + "sha256:e65206c4cf651dc9cf0829962fae8bec986767c9f123d6a1ad17f9356bf7257e", + "sha256:eac94ddc78c58e891cff7180274317dad2938a4ddfc6ced1c04846c7f50e77e9", + "sha256:f2e828711f044a965509c862b3a59b3181e9c56c145a950cb53d43fec54e66d2" + ], + "index": "pypi", + "version": "==4.1.0.25" + }, + "pillow": { + "hashes": [ + "sha256:15c056bfa284c30a7f265a41ac4cbbc93bdbfc0dfe0613b9cb8a8581b51a9e55", + "sha256:1a4e06ba4f74494ea0c58c24de2bb752818e9d504474ec95b0aa94f6b0a7e479", + "sha256:1c3c707c76be43c9e99cb7e3d5f1bee1c8e5be8b8a2a5eeee665efbf8ddde91a", + "sha256:1fd0b290203e3b0882d9605d807b03c0f47e3440f97824586c173eca0aadd99d", + "sha256:24114e4a6e1870c5a24b1da8f60d0ba77a0b4027907860188ea82bd3508c80eb", + "sha256:258d886a49b6b058cd7abb0ab4b2b85ce78669a857398e83e8b8e28b317b5abb", + "sha256:33c79b6dd6bc7f65079ab9ca5bebffb5f5d1141c689c9c6a7855776d1b09b7e8", + "sha256:367385fc797b2c31564c427430c7a8630db1a00bd040555dfc1d5c52e39fcd72", + "sha256:3c1884ff078fb8bf5f63d7d86921838b82ed4a7d0c027add773c2f38b3168754", + "sha256:44e5240e8f4f8861d748f2a58b3f04daadab5e22bfec896bf5434745f788f33f", + "sha256:46aa988e15f3ea72dddd81afe3839437b755fffddb5e173886f11460be909dce", + "sha256:74d90d499c9c736d52dd6d9b7221af5665b9c04f1767e35f5dd8694324bd4601", + "sha256:809c0a2ce9032cbcd7b5313f71af4bdc5c8c771cb86eb7559afd954cab82ebb5", + "sha256:85d1ef2cdafd5507c4221d201aaf62fc9276f8b0f71bd3933363e62a33abc734", + "sha256:8c3889c7681af77ecfa4431cd42a2885d093ecb811e81fbe5e203abc07e0995b", + "sha256:9218d81b9fca98d2c47d35d688a0cea0c42fd473159dfd5612dcb0483c63e40b", + "sha256:9aa4f3827992288edd37c9df345783a69ef58bd20cc02e64b36e44bcd157bbf1", + "sha256:9d80f44137a70b6f84c750d11019a3419f409c944526a95219bea0ac31f4dd91", + "sha256:b7ebd36128a2fe93991293f997e44be9286503c7530ace6a55b938b20be288d8", + "sha256:c4c78e2c71c257c136cdd43869fd3d5e34fc2162dc22e4a5406b0ebe86958239", + "sha256:c6a842537f887be1fe115d8abb5daa9bc8cc124e455ff995830cc785624a97af", + "sha256:cf0a2e040fdf5a6d95f4c286c6ef1df6b36c218b528c8a9158ec2452a804b9b8", + "sha256:cfd28aad6fc61f7a5d4ee556a997dc6e5555d9381d1390c00ecaf984d57e4232", + "sha256:dca5660e25932771460d4688ccbb515677caaf8595f3f3240ec16c117deff89a", + "sha256:de7aedc85918c2f887886442e50f52c1b93545606317956d65f342bd81cb4fc3", + "sha256:e6c0bbf8e277b74196e3140c35f9a1ae3eafd818f7f2d3a15819c49135d6c062" + ], + "index": "pypi", + "version": "==6.0.0" + }, + "pytesseract": { + "hashes": [ + "sha256:11c20321595b6e2e904b594633edf1a717212b13bac7512986a2d807b8849770" + ], + "index": "pypi", + "version": "==0.2.6" + } + }, + "develop": {} +} diff --git a/videocr/__init__.py b/videocr/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/videocr/models.py b/videocr/models.py new file mode 100644 index 0000000..16c1137 --- /dev/null +++ b/videocr/models.py @@ -0,0 +1,47 @@ +from __future__ import annotations +from typing import List +from dataclasses import dataclass + + +CONFIDENCE_THRESHOLD = 60 +# predictions with lower confidence will be filtered out + + +@dataclass +class PredictedWord: + __slots__ = 'confidence', 'text' + confidence: int + text: str + + +class PredictedFrame: + words: List[PredictedWord] + + def __init__(self, pred_data: str): + self.words = [] + + block_current = 1 + for line in pred_data.split('\n')[1:]: + tmp = line.split() + if len(tmp) < 12: + # no word is predicted + continue + _, _, block_num, *_, conf, text = tmp + block_num, conf = int(block_num), int(conf) + + # handle line breaks + if block_current < block_num: + block_current = block_num + self.words.append(PredictedWord(0, '\n')) + + if conf >= CONFIDENCE_THRESHOLD: + self.words.append(PredictedWord(conf, text)) + + @property + def confidence(self) -> int: + return sum(word.confidence for word in self.words) + + @property + def text(self) -> str: + return ''.join(word.text + ' ' for word in self.words) +