Busca correta da unidade cosumidora na fatura
All checks were successful
continuous-integration/drone/push Build is passing

This commit is contained in:
2025-08-14 11:22:38 -03:00
parent 98c6cf2363
commit b64068cfb6

View File

@@ -29,14 +29,19 @@ def extrair_dados(texto_final):
nota_fiscal = extrair_seguro(r'NOTA FISCAL Nº\s*(\d+)', texto_final)
# --- Unidade Consumidora (UC): 812 dígitos, SEM hífen ---
uc = extrair_seguro([
r'(\d{7,10}-\d)',
r'UNIDADE\s+CONSUMIDORA\s*[:\-]?\s*(\d{6,})',
r'(\d{6,})\s+FAZENDA',
r'(\d{6,})\s+AVENIDA',
r'(\d{6,})\s+RUA'
r'UNIDADE\s*CONSUMIDORA\D*?(\d{8,12})',
r'\bUC\D*?(\d{8,12})',
r'INSTALA[ÇC][ÃA]O\D*?(\d{8,12})',
], texto_final)
# fallback: maior sequência "solta" de 810 dígitos sem hífen
if not uc:
seqs = re.findall(r'(?<!\d)(\d{8,10})(?![\d-])', texto_final)
if seqs:
uc = max(seqs, key=len)
logging.debug("TEXTO PDF:\n" + texto_final)
referencia = extrair_seguro([