From 585ea8c65b757f30b384d611944fa51dad9c05d8 Mon Sep 17 00:00:00 2001 From: Chendle Date: Wed, 27 Mar 2024 02:32:13 +0000 Subject: [PATCH] =?UTF-8?q?ocr=E4=BD=9C=E4=B8=9A?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Chendle --- ...351\231\210\346\240\213\346\242\201.ipynb" | 155 ++++++++++++++++++ 1 file changed, 155 insertions(+) create mode 100644 "course-coding/HW/2024-\351\231\210\346\240\213\346\242\201.ipynb" diff --git "a/course-coding/HW/2024-\351\231\210\346\240\213\346\242\201.ipynb" "b/course-coding/HW/2024-\351\231\210\346\240\213\346\242\201.ipynb" new file mode 100644 index 0000000..bb504f6 --- /dev/null +++ "b/course-coding/HW/2024-\351\231\210\346\240\213\346\242\201.ipynb" @@ -0,0 +1,155 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "# -*- coding: utf-8 -*-\n", + "\n", + "import json\n", + "import base64\n", + "import os\n", + "import ssl" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "try:\n", + " from urllib.error import HTTPError\n", + " from urllib.request import Request, urlopen\n", + "except ImportError:\n", + " from urllib2 import Request, urlopen, HTTPError\n", + "\n", + "context = ssl._create_unverified_context()\n" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "def get_img(img_file):\n", + " \"\"\"将本地图片转成base64编码的字符串,或者直接返回图片链接\"\"\"\n", + " # 简单判断是否为图片链接\n", + " if img_file.startswith(\"http\"):\n", + " return img_file\n", + " else:\n", + " with open(os.path.expanduser(img_file), 'rb') as f: # 以二进制读取本地图片\n", + " data = f.read()\n", + " try:\n", + " encodestr = str(base64.b64encode(data), 'utf-8')\n", + " except TypeError:\n", + " encodestr = base64.b64encode(data)\n", + "\n", + " return encodestr\n" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "{\"sid\":\"6f88b0b0e35cd0aad9e5ad47f2a9d77f3b851f7b47e756d0b58955e8a5bede10d45b1610\",\"prism_version\":\"1.0.9\",\"prism_wnum\":9,\"prism_wordsInfo\":[{\"word\":\"乙丑竹害古人诗句夏\",\"pos\":[{\"x\":65,\"y\":443},{\"x\":343,\"y\":443},{\"x\":343,\"y\":2319},{\"x\":65,\"y\":2319}],\"direction\":1,\"angle\":-89,\"x\":-733,\"y\":1241,\"width\":1875,\"height\":277},{\"word\":\"優景俯仰\",\"pos\":[{\"x\":284,\"y\":260},{\"x\":2295,\"y\":263},{\"x\":2294,\"y\":710},{\"x\":283,\"y\":707}],\"direction\":0,\"angle\":-89,\"x\":1066,\"y\":-520,\"width\":446,\"height\":2011},{\"word\":\"遊物聽翰\",\"pos\":[{\"x\":259,\"y\":811},{\"x\":2291,\"y\":806},{\"x\":2292,\"y\":1236},{\"x\":260,\"y\":1242}],\"direction\":0,\"angle\":0,\"x\":259,\"y\":808,\"width\":2031,\"height\":431},{\"word\":\"可自横高\",\"pos\":[{\"x\":318,\"y\":1374},{\"x\":2335,\"y\":1374},{\"x\":2335,\"y\":1823},{\"x\":318,\"y\":1823}],\"direction\":0,\"angle\":-89,\"x\":1101,\"y\":589,\"width\":449,\"height\":2019},{\"word\":\"忘清\",\"pos\":[{\"x\":303,\"y\":1870},{\"x\":1177,\"y\":1909},{\"x\":1157,\"y\":2337},{\"x\":284,\"y\":2298}],\"direction\":0,\"angle\":-87,\"x\":516,\"y\":1666,\"width\":427,\"height\":874},{\"word\":\"木\",\"pos\":[{\"x\":1732,\"y\":1902},{\"x\":2299,\"y\":1902},{\"x\":2299,\"y\":2351},{\"x\":1732,\"y\":2351}],\"direction\":0,\"angle\":-90,\"x\":1791,\"y\":1843,\"width\":449,\"height\":569},{\"word\":\"墨\",\"pos\":[{\"x\":71,\"y\":2342},{\"x\":245,\"y\":2342},{\"x\":245,\"y\":2510},{\"x\":71,\"y\":2510}],\"direction\":0,\"angle\":-90,\"x\":73,\"y\":2338,\"width\":168,\"height\":174},{\"word\":\"年絕泉陰\",\"pos\":[{\"x\":264,\"y\":2471},{\"x\":2324,\"y\":2472},{\"x\":2324,\"y\":2933},{\"x\":264,\"y\":2932}],\"direction\":0,\"angle\":-89,\"x\":1062,\"y\":1672,\"width\":461,\"height\":2060},{\"word\":\"天为\",\"pos\":[{\"x\":2100,\"y\":3053},{\"x\":2290,\"y\":3034},{\"x\":2304,\"y\":3144},{\"x\":2113,\"y\":3144}],\"direction\":0,\"angle\":-5,\"x\":2104,\"y\":3043,\"width\":193,\"height\":110}],\"height\":3144,\"width\":2384,\"orgHeight\":3144,\"orgWidth\":2384,\"content\":\"乙丑竹害古人诗句夏 優景俯仰 遊物聽翰 可自横高 忘清 木 墨 年絕泉陰 天为 \",\"algo_version\":\"97dbd70e1abbbe1ae999d4f0e30898b2f1c26b39\"}\n" + ] + } + ], + "source": [ + "def posturl(headers, body):\n", + " \"\"\"发送请求,获取识别结果\"\"\"\n", + " try:\n", + " params = json.dumps(body).encode(encoding='UTF8')\n", + " req = Request(REQUEST_URL, params, headers)\n", + " r = urlopen(req, context=context)\n", + " html = r.read()\n", + " return html.decode(\"utf8\")\n", + " except HTTPError as e:\n", + " print(e.code)\n", + " print(e.read().decode(\"utf8\"))\n", + "\n", + "\n", + "def request(appcode, img_file, params):\n", + " # 请求参数\n", + " if params is None:\n", + " params = {}\n", + " img = get_img(img_file)\n", + " if img.startswith('http'): # img 表示图片链接\n", + " params.update({'url': img})\n", + " else: # img 表示图片base64\n", + " params.update({'img': img})\n", + "\n", + " # 请求头\n", + " headers = {\n", + " 'Authorization': 'APPCODE %s' % appcode,\n", + " 'Content-Type': 'application/json; charset=UTF-8'\n", + " }\n", + "\n", + " response = posturl(headers, params)\n", + " print(response)\n", + "\n", + "# 请求接口\n", + "REQUEST_URL = \"https://gjbsb.market.alicloudapi.com/ocrservice/advanced\"\n", + "\n", + "if __name__ == \"__main__\":\n", + " # 配置信息\n", + " appcode = \"e53874e614ba4e068b1108f8a8a27660\"\n", + " img_file = \"https://ts1.cn.mm.bing.net/th/id/R-C.6f88b0b0e35cd0aad9e5ad47f2a9d77f?rik=hZhw2jTezUioPA&riu=http%3a%2f%2fwww.52wwz.cn%2fupFiles%2finfoImg%2f2013011670529913.jpg&ehk=sKZ4khHLIx0XQlf7CtgdUzQMT9kFAgD5UpKT1wtB0%2fM%3d&risl=&pid=ImgRaw&r=0\"\n", + " params = {\n", + " # 是否需要识别结果中每一行的置信度,默认不需要。 true:需要 false:不需要\n", + " \"prob\": False,\n", + " # 是否需要单字识别功能,默认不需要。 true:需要 false:不需要\n", + " \"charInfo\": False,\n", + " # 是否需要自动旋转功能,默认不需要。 true:需要 false:不需要\n", + " \"rotate\": False,\n", + " # 是否需要表格识别功能,默认不需要。 true:需要 false:不需要\n", + " \"table\": False,\n", + " # 字块返回顺序,false表示从左往右,从上到下的顺序,true表示从上到下,从左往右的顺序,默认false\n", + " \"sortPage\": False,\n", + " # 是否需要去除印章功能,默认不需要。true:需要 false:不需要\n", + " \"noStamp\": False,\n", + " # 是否需要图案检测功能,默认不需要。true:需要 false:不需要\n", + " \"figure\": False,\n", + " # 是否需要成行返回功能,默认不需要。true:需要 false:不需要\n", + " \"row\": False,\n", + " # 是否需要分段功能,默认不需要。true:需要 false:不需要\n", + " \"paragraph\": False,\n", + " # 图片旋转后,是否需要返回原始坐标,默认不需要。true:需要 false:不需要\n", + " \"oricoord\": True\n", + " }\n", + "\n", + " request(appcode, img_file, params)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} -- Gitee