2024-09-19 04:01:15 +00:00
|
|
|
from os import chdir, listdir
|
|
|
|
import numpy as np
|
2024-09-21 23:08:53 +00:00
|
|
|
from time import gmtime
|
|
|
|
|
2024-09-19 04:01:15 +00:00
|
|
|
def regulater(binary_array) :
|
|
|
|
if not (len(binary_array.shape) == 2 and set([itm for row in binary_array for itm in row]) <= set([0,255]) ):
|
|
|
|
print('binary_array is not two dimensional or not binary', len(binary_array.shape), set([itm for row in binary_array for itm in row]))
|
|
|
|
raise TypeError
|
|
|
|
newshape = tuple(list(binary_array.shape)+[5])
|
|
|
|
newarray = np.empty(newshape,dtype=np.uint8)
|
|
|
|
height,width = binary_array.shape
|
|
|
|
for indx in range(height) :
|
|
|
|
for indx2 in range(width) :
|
|
|
|
comparators = [int(binary_array[i,indx2]) for i in [max([0,indx-1]),indx,min([height-1,indx+1])] ] + [int(binary_array[indx,indx2]) for j in [max([0,indx2-1]),min([width-1,indx2+1])] ]
|
|
|
|
|
|
|
|
if sum(comparators)/255 > 2 :
|
|
|
|
newarray[indx,indx2] = 255
|
|
|
|
else :
|
|
|
|
newarray[indx,indx2] = 0
|
|
|
|
return newarray
|
|
|
|
|
|
|
|
def read_qrcodes(pdfdoc, inventory, pagenum=0) :
|
|
|
|
'''note can improve accuracy by focusing on top right corner where the qrcode will be.
|
|
|
|
this can be a larger or smaller sized box around where the nominal position of the qrcode. if
|
|
|
|
larger it will be more robust against feed instability in the scanning process but less robust
|
|
|
|
against obfuscating marks and clutter'''
|
|
|
|
|
|
|
|
import csv, cv2, fitz, json
|
|
|
|
from PIL import Image
|
|
|
|
|
|
|
|
student_inventory = {}
|
|
|
|
with open(inventory, 'r') as invent :
|
|
|
|
csvreader = csv.reader(invent, delimiter = '\t')
|
|
|
|
for line in csvreader :
|
|
|
|
try :
|
2024-09-21 23:08:53 +00:00
|
|
|
serial_number = line[4]
|
|
|
|
filename = line[5]
|
|
|
|
|
2024-09-19 04:01:15 +00:00
|
|
|
#student_inventory[ line[0] ] = line[ 1 ]
|
2024-09-21 23:08:53 +00:00
|
|
|
except IndexError :
|
|
|
|
if len(line) == 0 :
|
|
|
|
continue
|
|
|
|
else :
|
|
|
|
print('index error csv', line)
|
|
|
|
raise IndexError
|
|
|
|
else :
|
|
|
|
student_inventory[ line[ 4 ] ] = line[ 5 ]
|
|
|
|
|
2024-09-19 04:01:15 +00:00
|
|
|
print('inventory', student_inventory)
|
|
|
|
doc = fitz.open(pdfdoc)
|
|
|
|
print('doc_length',doc.page_count)
|
|
|
|
mat = fitz.Matrix(8.0,8.0)
|
2024-09-21 23:08:53 +00:00
|
|
|
clip = fitz.Rect( (480,5), (630,120) )
|
|
|
|
#(650,120)
|
2024-09-19 04:01:15 +00:00
|
|
|
qcd = cv2.QRCodeDetector()
|
|
|
|
pageno = pagenum
|
|
|
|
assignment = {}
|
|
|
|
weirds = []
|
|
|
|
for indx in range(doc.page_count) :
|
|
|
|
page = doc[indx]
|
|
|
|
|
|
|
|
pix = page.get_pixmap(matrix = mat, clip = clip)
|
|
|
|
im = np.frombuffer(pix.samples, dtype=np.uint8).reshape(pix.h,pix.w,pix.n)
|
|
|
|
im = np.ascontiguousarray(im[...,[2,1,0]])
|
|
|
|
|
|
|
|
gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
|
|
|
|
kernel = np.ones((3,3),dtype=np.uint8)
|
|
|
|
ret,thresh = cv2.threshold(gray,200,255,cv2.THRESH_BINARY)
|
|
|
|
thresh=cv2.bitwise_not(cv2.dilate(cv2.erode(cv2.bitwise_not(thresh),kernel,iterations=1),kernel,iterations=1))
|
2024-09-21 23:08:53 +00:00
|
|
|
data = []
|
|
|
|
for k in range(4) :
|
|
|
|
dat = qcd.detectAndDecodeMulti(np.rot90(thresh,k))
|
|
|
|
if dat[0] == True :
|
|
|
|
data += [dat[1]]
|
|
|
|
if len(set(data)) == 1 and data[0]!=('',):
|
|
|
|
print(data)
|
|
|
|
assignment[pageno] = data[0]
|
2024-09-19 04:01:15 +00:00
|
|
|
newdoc = fitz.open()
|
|
|
|
newdoc.insert_pdf(doc,from_page=indx, to_page=indx, start_at=-1, rotate=-1, links=True, annots=True)
|
2024-09-21 23:08:53 +00:00
|
|
|
newbytes = newdoc.tobytes(garbage=4,deflate=True,deflate_images=True, deflate_fonts=True)
|
2024-09-19 04:01:15 +00:00
|
|
|
try :
|
|
|
|
newfilename = student_inventory[data[1][0]]
|
|
|
|
except KeyError :
|
|
|
|
print('key error', pageno)
|
|
|
|
pageno += 1
|
|
|
|
continue
|
2024-09-21 23:08:53 +00:00
|
|
|
except IndexError :
|
|
|
|
print('index error', pageno, data)
|
2024-09-19 04:01:15 +00:00
|
|
|
else :
|
2024-09-21 23:08:53 +00:00
|
|
|
'''it would be good to downsample this for smaller filesize'''
|
|
|
|
with open('/path/to/directory/to/store/student/quizzes/'+newfilename, 'wb') as f:
|
2024-09-19 04:01:15 +00:00
|
|
|
f.write(newbytes)
|
|
|
|
else :
|
|
|
|
weirds += [pageno]
|
2024-09-21 23:08:53 +00:00
|
|
|
print("I can't read the qr code at page number " + str(pageno+1) +", could you enter the filename for me?")
|
|
|
|
img = Image.fromarray(thresh.astype('uint8'))
|
|
|
|
'''we want to keep an eye on any unreadable qr codes so we can keep track of what goes wrong'''
|
|
|
|
img.save('/path/to/store/weird/qrs/'+str(gmtime().tm_yday)+'_'+str(pageno)+'.jpg')
|
|
|
|
img.show()
|
|
|
|
newfilename = input('page '+str(pageno+1)+'-->')
|
2024-09-19 04:01:15 +00:00
|
|
|
newdoc = fitz.open()
|
|
|
|
newdoc.insert_pdf(doc,from_page=indx, to_page=indx, start_at=-1, rotate=-1, links=True, annots=True)
|
|
|
|
newbytes = newdoc.tobytes()
|
2024-09-21 23:08:53 +00:00
|
|
|
with open('/path/to/directory/to/store/student/quizzes/' + newfilename, 'wb') as f:
|
2024-09-19 04:01:15 +00:00
|
|
|
f.write(newbytes)
|
2024-09-21 23:08:53 +00:00
|
|
|
|
|
|
|
|
2024-09-19 04:01:15 +00:00
|
|
|
pageno += 1
|
|
|
|
return (assignment, weirds, pageno)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__=='__main__' :
|
|
|
|
inventory = []
|
|
|
|
outp = {}
|
2024-09-21 23:08:53 +00:00
|
|
|
assignment,weirds,pageno = read_qrcodes('/path/to/scanned/quizzes/somefilename.pdf', '/path/to/inventory/file/q4_inventory_mat343.txt')
|
2024-09-19 04:01:15 +00:00
|
|
|
print('pageno', pageno)
|
|
|
|
for ky,val in assignment.items() :
|
|
|
|
outp[ ky ] = val
|
|
|
|
|
|
|
|
print(outp, weirds)
|
|
|
|
|