๐Ÿ“ฆ๋ถ„์„ ํ”„๋กœ์ ํŠธ/๐Ÿป ์ด๋ชจํ‹ฐ์ฝ˜ ํŠธ๋ Œ๋“œ ๋ฐ ํ†ต๊ณ„ ๋ถ„์„

๐Ÿป ์ด๋ชจํ‹ฐ์ฝ˜ ํŠธ๋ Œ๋“œ ๋ฐ ํ†ต๊ณ„ ๋ถ„์„ (2) - tesseract ocr

๋ฐ์ดํ„ฐํŒ์Šค 2024. 10. 11. 18:59

ํŒŒ์ด์ฌ Tesseract OCR ๊ธฐ๋Šฅ์„ ํ™œ์šฉํ•ด ๋ฐ์ดํ„ฐ ํ…์ŠคํŠธํ™”

import pytesseract
import cv2
import os
from PIL import Image

# Tesseract ๊ฒฝ๋กœ ์„ค์ • (Windows ์ „์šฉ)
pytesseract.pytesseract.tesseract_cmd = r'C:\Program Files\Tesseract-OCR\tesseract.exe'

# ๊ฒฐ๊ณผ๋ฅผ ์ €์žฅํ•  ํ…์ŠคํŠธ ํŒŒ์ผ ์—ด๊ธฐ
output_path = "C:\\Users\\user\\Desktop\\sample\\output.txt"
result = open(output_path, "w")

# ์ด๋ฏธ์ง€ ํŒŒ์ผ๋“ค์ด ์ €์žฅ๋œ ๊ฒฝ๋กœ
path_dir = 'C:\\Users\\user\\Desktop\\sample'
file_list = os.listdir(path_dir)

# ์ด๋ฏธ์ง€ ํŒŒ์ผ ์ฒ˜๋ฆฌ
for file_name in file_list:
    if file_name.endswith('.jpg') or file_name.endswith('.png'):  # ์ด๋ฏธ์ง€ ํŒŒ์ผ ํ•„ํ„ฐ๋ง
        img_path = os.path.join(path_dir, file_name)
        
        # ์ด๋ฏธ์ง€ ๊ฒฝ๋กœ๊ฐ€ ์˜ฌ๋ฐ”๋ฅธ์ง€ ํ™•์ธ
        if not os.path.exists(img_path):
            print(f"ํŒŒ์ผ์„ ์ฐพ์„ ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค: {img_path}")
            continue
        
        # ์ด๋ฏธ์ง€ ์ฝ๊ธฐ
        img_cv = cv2.imread(img_path)
        
        # ์ด๋ฏธ์ง€๊ฐ€ ์ œ๋Œ€๋กœ ๋กœ๋“œ๋˜์—ˆ๋Š”์ง€ ํ™•์ธ
        if img_cv is None:
            print(f"์ด๋ฏธ์ง€๋ฅผ ๋กœ๋“œํ•  ์ˆ˜ ์—†์Šต๋‹ˆ๋‹ค: {img_path}")
            continue
        
        # OpenCV ์ด๋ฏธ์ง€์—์„œ RGB๋กœ ๋ณ€ํ™˜ํ•œ ๋’ค, Pillow ํ˜•์‹์œผ๋กœ ๋ณ€ํ™˜
        img_rgb = cv2.cvtColor(img_cv, cv2.COLOR_BGR2RGB)
        img_pil = Image.fromarray(img_rgb)
        
        # Tesseract๋กœ ํ…์ŠคํŠธ ์ถ”์ถœ
        text = pytesseract.image_to_string(img_pil, lang='ENG+KOR', config='--psm 4 -c preserve_interword_spaces=1')
        result.write(text + '\n')

# ๊ฒฐ๊ณผ ํŒŒ์ผ ๋‹ซ๊ธฐ
result.close()
print("์ถ”์ถœ์ด ์™„๋ฃŒ๋˜์—ˆ์Šต๋‹ˆ๋‹ค. ํ™•์ธ ๋ถ€ํƒ๋“œ๋ฆฝ๋‹ˆ๋‹ค.")

 

ํ•ด๋‹น ์ฝ”๋“œ๋ฅผ ์ž‘์„ฑํ•˜์—ฌ ์Šคํฌ๋ฆฐ์ƒท์„ ํ…์ŠคํŠธ๋กœ ๋ฐ”๊ฟ”๋ณด๊ฒ ์Šต๋‹ˆ๋‹ค.

 

 

 

์„ฑ๋Šฅ์ด ์ข‹์ง€ ์•Š์Šต๋‹ˆ๋‹ค...

๋‹ค๋ฅธ ์Šคํฌ๋ฆฐ์ƒท์œผ๋กœ๋„ ํ•ด๋ดค๋Š”๋ฐ ์ œ๋Œ€๋กœ ๋ฐ”๊ฟ”์ฃผ์ง€ ์•Š์Šต๋‹ˆ๋‹ค.

chat gpt๋Š” ์™„๋ฒฝํ•˜๊ฒŒ ํ…์ŠคํŠธ๋กœ ๋ฐ”๊ฟ”์ฃผ๋Š”๋ฐ, ๋น ์ง„ ํ…์ŠคํŠธ์™€ ์ž˜๋ชป๋œ ํ…์ŠคํŠธ๊ฐ€ ๋งŽ์Šต๋‹ˆ๋‹ค.

๋‹ค๋ฅธ ๋ฐฉ์‹์„ ์ข€ ๋” ์ฐพ์•„๋ณด์•„ ์™„๋ฒฝํ•˜๊ฒŒ ํ…์ŠคํŠธ๋กœ ๋ฐ”๊ฟ”์ฃผ๋Š” ๋ฐฉ์‹์„ ํƒํ•ด์•ผ ํ•  ๊ฒƒ ๊ฐ™์Šต๋‹ˆ๋‹ค.