community/tesseract-ocr: add ocr languages

and training tools/data.
This commit is contained in:
Carlo Landmeter 2017-08-01 14:36:13 +02:00
parent b6189305eb
commit f302893742

View File

@ -2,22 +2,35 @@
# Maintainer: Francesco Colista <fcolista@alpinelinux.org>
pkgname=tesseract-ocr
pkgver=3.05.01
pkgrel=0
_tdver=3.04.00
pkgrel=1
pkgdesc="open source OCR engine"
url="https://github.com/tesseract-ocr/tesseract/releases"
arch="all"
license="Apache"
depends=""
depends_dev=""
makedepends="automake autoconf libtool leptonica-dev pango-dev icu-dev cairo-dev"
makedepends="automake autoconf libtool leptonica-dev pango-dev icu-dev
cairo-dev"
subpackages="$pkgname-dev $pkgname-doc"
source="$pkgname-$pkgver.tar.gz::https://github.com/$pkgname/${pkgname/-*}/archive/$pkgver.tar.gz"
source="$pkgname-$pkgver.tar.gz::https://github.com/$pkgname/${pkgname/-*}/archive/$pkgver.tar.gz
https://github.com/tesseract-ocr/tessdata/archive/${_tdver}/tessdata-${_tdver}.tar.gz
"
_langs="afr ara aze bel ben bul cat ces chi_sim chi_tra chr dan deu ell
enm epo equ est eus fin fra frk frm glg grc heb hin hrv hun ind isl ita
ita_old jpn kan kor lav lit mal mkd mlt msa nld nor pol por ron rus slk
slv spa spa_old sqi srp swa swe tam tel tgl tha tur ukr vie"
for _lang in $_langs; do
subpackages="$subpackages $pkgname-data-$_lang:_lang_data:noarch"
done
builddir="$srcdir"/${pkgname/-*}-$pkgver
build() {
cd "$builddir"
./autogen.sh || return 1
./autogen.sh
./configure \
--build=$CBUILD \
--host=$CHOST \
@ -26,15 +39,28 @@ build() {
--mandir=/usr/share/man \
--infodir=/usr/share/info \
--localstatedir=/var \
--disable-static \
--disable-graphics \
|| return 1
make || return 1
--disable-static
make
make training
}
package() {
cd "$builddir"
make DESTDIR="$pkgdir" install || return 1
make DESTDIR="$pkgdir" install
make DESTDIR="$pkgdir" training-install
install -D "$srcdir"/tessdata-$_tdver/eng.* \
"$srcdir"/tessdata-$_tdver/osd.* \
"$pkgdir"/usr/share/tessdata/
}
sha512sums="a49c20c98386684cd89582e57b772811204fad8e5ff18214fb0da109f73629c70845054985e31e8deeb49107fbcf56e546aff661f08eb5dd60fbf83dbe976e81 tesseract-ocr-3.05.01.tar.gz"
_lang_data() {
local lang="${subpkgname#$pkgname-data-}"
pkgdesc="Tesseract language data for $lang"
depends="$pkgname"
mkdir -p "$subpkgdir"/usr/share/tessdata
mv "$srcdir"/tessdata-$_tdver/$lang.* \
"$subpkgdir"/usr/share/tessdata/
}
sha512sums="a49c20c98386684cd89582e57b772811204fad8e5ff18214fb0da109f73629c70845054985e31e8deeb49107fbcf56e546aff661f08eb5dd60fbf83dbe976e81 tesseract-ocr-3.05.01.tar.gz
4fbb66137c729e16c7a9e35b09916a45c1bb5ec5a7002a22647e0b10975362cb44c6d6c0c997baf25866f78749ec2d4a86317ec3fb664bd963243e230516d162 tessdata-3.04.00.tar.gz"