from os import chdir, listdir import numpy as np import time def regulater(binary_array) : if not (len(binary_array.shape) == 2 and set([itm for row in binary_array for itm in row]) <= set([0,255]) ): print('binary_array is not two dimensional or not binary', len(binary_array.shape), set([itm for row in binary_array for itm in row])) raise TypeError newshape = tuple(list(binary_array.shape)+[5]) newarray = np.empty(newshape,dtype=np.uint8) height,width = binary_array.shape for indx in range(height) : for indx2 in range(width) : comparators = [int(binary_array[i,indx2]) for i in [max([0,indx-1]),indx,min([height-1,indx+1])] ] + [int(binary_array[indx,indx2]) for j in [max([0,indx2-1]),min([width-1,indx2+1])] ] if sum(comparators)/255 > 2 : newarray[indx,indx2] = 255 else : newarray[indx,indx2] = 0 return newarray def read_qrcodes(pdfdoc, inventory, pagenum=0) : '''note can improve accuracy by focusing on top right corner where the qrcode will be. this can be a larger or smaller sized box around where the nominal position of the qrcode. if larger it will be more robust against feed instability in the scanning process but less robust against obfuscating marks and clutter''' import csv, cv2, fitz, json from PIL import Image student_inventory = {} with open(inventory, 'r') as invent : csvreader = csv.reader(invent, delimiter = '\t') for line in csvreader : try : student_inventory[ line[ 4 ] ] = line[ 5 ] #student_inventory[ line[0] ] = line[ 1 ] except IndexError : print('index error', line) raise IndexError #print('inventory', student_inventory) doc = fitz.open(pdfdoc) print('doc_length',doc.page_count) mat = fitz.Matrix(8.0,8.0) clip = fitz.Rect( (480,5), (600,110) ) qcd = cv2.QRCodeDetector() pageno = pagenum assignment = {} weirds = [] for indx in range(doc.page_count) : if indx%10 == 0 : print('index',indx) page = doc[indx] if indx % 2 == 0 : pix = page.get_pixmap(matrix = mat, clip = clip) im = np.frombuffer(pix.samples, dtype=np.uint8).reshape(pix.h,pix.w,pix.n) im = np.ascontiguousarray(im[...,[2,1,0]]) gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY) kernel = np.ones((3,3),dtype=np.uint8) ret,thresh = cv2.threshold(gray,200,255,cv2.THRESH_BINARY) thresh=cv2.bitwise_not(cv2.dilate(cv2.erode(cv2.bitwise_not(thresh),kernel,iterations=1),kernel,iterations=1)) #print(np.min(gray)) data = [] for k in range(4) : dat = qcd.detectAndDecodeMulti(np.rot90(thresh,k)) if dat[0] == True : data += [dat[1]] if len(set(data)) == 1 and data[0]!='': assignment[pageno] = data[0] newdoc = fitz.open() newdoc.insert_pdf(doc,from_page=indx, to_page=indx+1, start_at=-1, rotate=-1, links=True, annots=True) newbytes = newdoc.tobytes(garbage=4,deflate=True,deflate_images=True, deflate_fonts=True) try : newfilename = student_inventory[data[1][0]] except KeyError : print('key error', pageno) pageno += 1 continue else : with open('/home/tomtaylor/exams_/quiz3/'+newfilename, 'wb') as f: f.write(newbytes) else : weirds += [pageno] newdoc = fitz.open() newdoc.insert_pdf(doc,from_page=indx, to_page=indx+1, start_at=-1, rotate=-1, links=True, annots=True) newbytes = newdoc.tobytes() with open('/home/tomtaylor/exams_/quiz3/'+str(pageno)+'.pdf', 'wb') as f: f.write(newbytes) img = Image.fromarray(thresh.astype('uint8')) img.save('/home/tomtaylor/exams_/weird_qrs/'+str(time.tm_yday)+'_'+str(pageno)+'.jpg') img.show() pageno += 2 else : continue return (assignment, weirds, pageno) if __name__=='__main__' : inventory = [] outp = {} assignment,weirds,pageno = read_qrcodes('/media/tomtaylor/F680-7D49/20240912114705.pdf', '/home/tomtaylor/exams_/inventoryQ3_mat343.txt') print('pageno', pageno) for ky,val in assignment.items() : outp[ ky ] = val print(outp, weirds)