upload_exams/read_qr/read_qrcodes.py

123 lines
5.1 KiB
Python
Raw Normal View History

from os import chdir, listdir
import numpy as np
from time import gmtime
def regulater(binary_array) :
if not (len(binary_array.shape) == 2 and set([itm for row in binary_array for itm in row]) <= set([0,255]) ):
print('binary_array is not two dimensional or not binary', len(binary_array.shape), set([itm for row in binary_array for itm in row]))
raise TypeError
newshape = tuple(list(binary_array.shape)+[5])
newarray = np.empty(newshape,dtype=np.uint8)
height,width = binary_array.shape
for indx in range(height) :
for indx2 in range(width) :
comparators = [int(binary_array[i,indx2]) for i in [max([0,indx-1]),indx,min([height-1,indx+1])] ] + [int(binary_array[indx,indx2]) for j in [max([0,indx2-1]),min([width-1,indx2+1])] ]
if sum(comparators)/255 > 2 :
newarray[indx,indx2] = 255
else :
newarray[indx,indx2] = 0
return newarray
def read_qrcodes(pdfdoc, inventory, pagenum=0) :
'''note can improve accuracy by focusing on top right corner where the qrcode will be.
this can be a larger or smaller sized box around where the nominal position of the qrcode. if
larger it will be more robust against feed instability in the scanning process but less robust
against obfuscating marks and clutter'''
import csv, cv2, fitz, json
from PIL import Image
student_inventory = {}
with open(inventory, 'r') as invent :
csvreader = csv.reader(invent, delimiter = '\t')
for line in csvreader :
try :
serial_number = line[4]
filename = line[5]
#student_inventory[ line[0] ] = line[ 1 ]
except IndexError :
if len(line) == 0 :
continue
else :
print('index error csv', line)
raise IndexError
else :
student_inventory[ line[ 4 ] ] = line[ 5 ]
print('inventory', student_inventory)
doc = fitz.open(pdfdoc)
print('doc_length',doc.page_count)
mat = fitz.Matrix(8.0,8.0)
clip = fitz.Rect( (480,5), (630,120) )
#(650,120)
qcd = cv2.QRCodeDetector()
pageno = pagenum
assignment = {}
weirds = []
for indx in range(doc.page_count) :
page = doc[indx]
pix = page.get_pixmap(matrix = mat, clip = clip)
im = np.frombuffer(pix.samples, dtype=np.uint8).reshape(pix.h,pix.w,pix.n)
im = np.ascontiguousarray(im[...,[2,1,0]])
gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
kernel = np.ones((3,3),dtype=np.uint8)
ret,thresh = cv2.threshold(gray,200,255,cv2.THRESH_BINARY)
thresh=cv2.bitwise_not(cv2.dilate(cv2.erode(cv2.bitwise_not(thresh),kernel,iterations=1),kernel,iterations=1))
data = []
for k in range(4) :
dat = qcd.detectAndDecodeMulti(np.rot90(thresh,k))
if dat[0] == True :
data += [dat[1]]
if len(set(data)) == 1 and data[0]!=('',):
print(data)
assignment[pageno] = data[0]
newdoc = fitz.open()
newdoc.insert_pdf(doc,from_page=indx, to_page=indx, start_at=-1, rotate=-1, links=True, annots=True)
newbytes = newdoc.tobytes(garbage=4,deflate=True,deflate_images=True, deflate_fonts=True)
try :
newfilename = student_inventory[data[1][0]]
except KeyError :
print('key error', pageno)
pageno += 1
continue
except IndexError :
print('index error', pageno, data)
else :
'''it would be good to downsample this for smaller filesize'''
with open('/path/to/directory/to/store/student/quizzes/'+newfilename, 'wb') as f:
f.write(newbytes)
else :
weirds += [pageno]
print("I can't read the qr code at page number " + str(pageno+1) +", could you enter the filename for me?")
img = Image.fromarray(thresh.astype('uint8'))
'''we want to keep an eye on any unreadable qr codes so we can keep track of what goes wrong'''
img.save('/path/to/store/weird/qrs/'+str(gmtime().tm_yday)+'_'+str(pageno)+'.jpg')
img.show()
newfilename = input('page '+str(pageno+1)+'-->')
newdoc = fitz.open()
newdoc.insert_pdf(doc,from_page=indx, to_page=indx, start_at=-1, rotate=-1, links=True, annots=True)
newbytes = newdoc.tobytes()
with open('/path/to/directory/to/store/student/quizzes/' + newfilename, 'wb') as f:
f.write(newbytes)
pageno += 1
return (assignment, weirds, pageno)
if __name__=='__main__' :
inventory = []
outp = {}
assignment,weirds,pageno = read_qrcodes('/path/to/scanned/quizzes/somefilename.pdf', '/path/to/inventory/file/q4_inventory_mat343.txt')
print('pageno', pageno)
for ky,val in assignment.items() :
outp[ ky ] = val
print(outp, weirds)