utfpr · femedici · Dec 10, 2025
diff --git a/.gitignore b/.gitignore
@@ -6,6 +6,7 @@
 .gradle/
 gradle/
 gradlew*
+node_modules/
 # generated
 build/
 out/

diff --git a/ScriptFloraBrasil/fetch_taxa.ts b/ScriptFloraBrasil/fetch_taxa.ts
@@ -0,0 +1,201 @@
+// fetch_taxa.ts
+import fs from "fs";
+import path from "path";
+import axios from "axios";
+import { parse } from "csv-parse";
+import pLimit from "p-limit";
+
+type Row = string[];
+
+// Caminho do CSV (arquivo que você enviou)
+const CSV_PATH = "querySELECT.csv";
+
+// Pasta de saída
+const OUT_DIR = path.resolve(process.cwd(), "output");
+if (!fs.existsSync(OUT_DIR)) fs.mkdirSync(OUT_DIR, { recursive: true });
+
+// Configurações
+const CONCURRENCY = 5; // número de requisições simultâneas
+const RETRIES = 3; // tentativas em caso de erro
+const RETRY_DELAY_MS = 1000; // tempo entre retries
+const REQUEST_TIMEOUT = 15000; // timeout axios em ms
+const BASE_URL = "https://servicos.jbrj.gov.br/v2/flora/taxon/";
+
+function sleep(ms: number) {
+  return new Promise((res) => setTimeout(res, ms));
+}
+
+/**
+ * Extrai genero e especie de uma row do csv.
+ * Lógica:
+ * - Se existir header com "genero" e "especie" (case-insensitive), usa-os.
+ * - Caso contrário, assume que as duas primeiras colunas são genero e especie.
+ */
+async function readCsv(
+  filePath: string
+): Promise<{ genero: string; especie: string }[]> {
+  return new Promise((resolve, reject) => {
+    const input = fs.createReadStream(filePath);
+    const rows: { genero: string; especie: string }[] = [];
+    const parser = parse({
+      bom: true,
+      columns: true, // tenta usar header; se não quiser, fallback abaixo
+      skip_empty_lines: true,
+      trim: true,
+    });
+
+    let usedColumns = true;
+
+    parser.on("readable", () => {
+      let record;
+      while ((record = parser.read())) {
+        // record será um object se columns=true. Precisamos mapear.
+        const keys = Object.keys(record);
+        // procurar por colunas que pareçam genero/espécie
+        const lowerKeys = keys.map((k) => k.toLowerCase());
+        const gi = lowerKeys.findIndex(
+          (k) =>
+            k.includes("gener") || k.includes("gênero") || k.includes("genero")
+        );
+        const ei = lowerKeys.findIndex(
+          (k) =>
+            k.includes("espec") ||
+            k.includes("espécie") ||
+            k.includes("especie") ||
+            k.includes("species")
+        );
+        if (gi !== -1 && ei !== -1) {
+          rows.push({
+            genero: record[keys[gi]].trim(),
+            especie: record[keys[ei]].trim(),
+          });
+        } else {
+          // as keys existem, mas não reconhecemos: fallback para as 2 primeiras colunas
+          usedColumns = false;
+          // converter record object para array na ordem das keys
+          const vals = keys.map((k) => (record[k] ?? "").toString().trim());
+          rows.push({ genero: vals[0] ?? "", especie: vals[1] ?? "" });
+        }
+      }
+    });
+
+    parser.on("error", (err) => {
+      // fallback: tentar parse sem columns
+      if (usedColumns) {
+        // tentar parse como CSV regular
+        parse(fs.readFileSync(filePath), { bom: true }, (err2, data: Row[]) => {
+          if (err2) return reject(err2);
+          for (const r of data) {
+            if (r.length < 2) continue;
+            rows.push({ genero: r[0].trim(), especie: r[1].trim() });
+          }
+          resolve(rows);
+        });
+      } else {
+        reject(err);
+      }
+    });
+
+    parser.on("end", () => {
+      resolve(rows);
+    });
+
+    input.pipe(parser);
+  });
+}
+
+async function fetchWithRetries(
+  url: string,
+  triesLeft = RETRIES
+): Promise<{ status: number; data: any }> {
+  try {
+    const resp = await axios.get(url, { timeout: REQUEST_TIMEOUT });
+    return { status: resp.status, data: resp.data };
+  } catch (err: any) {
+    if (triesLeft > 0) {
+      await sleep(RETRY_DELAY_MS);
+      return fetchWithRetries(url, triesLeft - 1);
+    }
+    if (err.response) {
+      return { status: err.response.status, data: err.response.data };
+    }
+    throw err;
+  }
+}
+
+function safeFileName(text: string) {
+  return text.replace(/[^a-z0-9_\-\.]/gi, "_");
+}
+
+async function main() {
+  console.log("Lendo CSV...", CSV_PATH);
+  const items = await readCsv(CSV_PATH);
+  console.log(`Linhas carregadas: ${items.length}`);
+
+  const limit = pLimit(CONCURRENCY);
+  const summaryRows: string[] = [];
+  summaryRows.push(
+    [
+      "genero",
+      "especie",
+      "taxon_name",
+      "url",
+      "http_status",
+      "output_file",
+    ].join(",")
+  );
+
+  const tasks = items.map((it, idx) =>
+    limit(async () => {
+      const genero = (it.genero || "").trim();
+      const especie = (it.especie || "").trim();
+      if (!genero || !especie) {
+        console.warn(`Linha ${idx + 1}: genero/especie vazio, pulando`);
+        summaryRows.push([genero, especie, "", "", "SKIPPED", ""].join(","));
+        return;
+      }
+      const taxonPlain = `${genero} ${especie}`;
+      const taxonEncoded = encodeURIComponent(taxonPlain);
+      const url = BASE_URL + taxonEncoded;
+
+      try {
+        const result = await fetchWithRetries(url);
+        const fileName = safeFileName(`${genero}_${especie}_${idx + 1}.json`);
+        const outPath = path.join(OUT_DIR, fileName);
+        fs.writeFileSync(
+          outPath,
+          JSON.stringify(result.data, null, 2),
+          "utf-8"
+        );
+        console.log(`[OK] ${taxonPlain} -> ${result.status} -> ${fileName}`);
+        summaryRows.push(
+          [
+            genero,
+            especie,
+            taxonPlain,
+            url,
+            String(result.status),
+            fileName,
+          ].join(",")
+        );
+      } catch (err: any) {
+        console.error(`[ERR] ${taxonPlain}:`, err.message ?? err);
+        summaryRows.push(
+          [genero, especie, taxonPlain, url, "ERROR", ""].join(",")
+        );
+      }
+    })
+  );
+
+  await Promise.all(tasks);
+
+  const summaryPath = path.join(OUT_DIR, "summary.csv");
+  fs.writeFileSync(summaryPath, summaryRows.join("\n"), "utf-8");
+  console.log("Concluído. Saída em:", OUT_DIR);
+  console.log("Arquivo resumo:", summaryPath);
+}
+
+main().catch((e) => {
+  console.error("Erro fatal:", e);
+  process.exit(1);
+});
diff --git a/ScriptFloraBrasil/jsons_to_xlsx.js b/ScriptFloraBrasil/jsons_to_xlsx.js
@@ -0,0 +1,162 @@
+// jsons_to_xlsx.js
+import fs from "fs";
+import path from "path";
+import { globSync } from "glob";
+import XLSX from "xlsx";
+
+const OUT_DIR = path.resolve(process.cwd(), "output");
+const GLOB_PATTERN = path.join(OUT_DIR, "*.json");
+const REPORT_PATH = path.join(OUT_DIR, "report.xlsx");
+
+function safeString(v) {
+  if (v === undefined || v === null) return "";
+  if (typeof v === "string") return v.trim();
+  return String(v);
+}
+
+function parseNameFromFilename(filename) {
+  const base = path.basename(filename, path.extname(filename));
+  const cleaned = base.replace(/_\d+$/, "");
+  const parts = cleaned.replace(/[_\-]+/g, " ").split(/\s+/).filter(Boolean);
+  if (parts.length >= 2) {
+    return { genero: parts[0], especie: parts[1] };
+  }
+  return { genero: "", especie: "" };
+}
+
+function readJsonFile(filePath) {
+  try {
+    const raw = fs.readFileSync(filePath, "utf-8");
+    return JSON.parse(raw);
+  } catch (err) {
+    console.error("Erro lendo/parsing JSON:", filePath, err.message);
+    return null;
+  }
+}
+
+function extractAuthorsFromString(authStr) {
+  if (!authStr) return [];
+  // remover tags html e -- opcional: manter o texto simples
+  let s = authStr.replace(/<\/?[^>]+(>|$)/g, " ");
+  // remover parênteses em excesso (mas preserva conteúdo dentro se for autor)
+  s = s.replace(/[()]/g, " ");
+  // split por separadores comuns: comma, semicolon, ampersand, " and ", " e ", slash
+  const parts = s.split(/,|;|&|\band\b|\be\b|\//i).map(p => p.trim()).filter(Boolean);
+  // normalizar espaços e remover duplicatas locais
+  const cleaned = parts.map(p => p.replace(/\s+/g, " ").trim());
+  return cleaned;
+}
+
+function normalizeRecord(filePath, data) {
+  let taxon = null;
+
+  if (Array.isArray(data) && data[0]?.taxon) {
+    taxon = data[0].taxon;
+  } else if (data?.taxon) {
+    taxon = data.taxon;
+  } else if (Array.isArray(data) && data.length > 0 && typeof data[0] === "object" && data[0].taxon === undefined) {
+    taxon = data[0];
+  }
+
+  let genero = taxon?.genus || "";
+  let especie = taxon?.specificepithet || "";
+
+  if (!genero || !especie) {
+    const parsed = parseNameFromFilename(filePath);
+    genero = genero || parsed.genero;
+    especie = especie || parsed.especie;
+  }
+
+  const nomenclaturalstatus = safeString(taxon?.nomenclaturalstatus);
+  const taxonomicstatus = safeString(taxon?.taxonomicstatus);
+  const scientificnameauthorship = safeString(taxon?.scientificnameauthorship);
+  const scientificname = safeString(taxon?.scientificname);
+  const taxonid = taxon?.taxonid ?? "";
+
+  return {
+    genero,
+    especie,
+    scientificname,
+    taxonid,
+    nomenclaturalstatus,
+    taxonomicstatus,
+    scientificnameauthorship,
+    source_file: path.basename(filePath),
+  };
+}
+
+function gatherAllAuthors(rows) {
+  const set = new Set();
+  for (const r of rows) {
+    const authField = r.scientificnameauthorship || "";
+    const parts = extractAuthorsFromString(authField);
+    for (const p of parts) {
+      if (p) set.add(p);
+    }
+
+    const sn = r.scientificname || "";
+    if (sn) {
+      const m = sn.match(/^[A-Z][a-zA-Z-]+(?:\s+[a-z-]+)?\s+[a-z-]+(.*)$/);
+      if (m && m[1]) {
+        const tail = m[1].replace(/[()]/g, " ").trim();
+        const tailParts = extractAuthorsFromString(tail);
+        for (const p of tailParts) if (p) set.add(p);
+      }
+    }
+  }
+
+  return Array.from(set).sort((a, b) => a.localeCompare(b, "pt-BR"));
+}
+
+function main() {
+  const files = globSync(GLOB_PATTERN);
+  if (!files.length) {
+    console.error("Nenhum arquivo JSON encontrado em:", OUT_DIR);
+    process.exit(1);
+  }
+  console.log(`Arquivos JSON encontrados: ${files.length}`);
+
+  const rows = [];
+  for (const f of files) {
+    const data = readJsonFile(f);
+    if (!data) {
+      rows.push({
+        genero: "",
+        especie: "",
+        scientificname: "",
+        taxonid: "",
+        nomenclaturalstatus: "",
+        taxonomicstatus: "",
+        scientificnameauthorship: "",
+        source_file: path.basename(f),
+      });
+      continue;
+    }
+    const rec = normalizeRecord(f, data);
+    rows.push(rec);
+  }
+
+  const header = [
+    "genero",
+    "especie",
+    "scientificname",
+    "taxonid",
+    "nomenclaturalstatus",
+    "taxonomicstatus",
+    "scientificnameauthorship",
+    "source_file"
+  ];
+  const worksheet = XLSX.utils.json_to_sheet(rows, { header });
+  const workbook = XLSX.utils.book_new();
+  XLSX.utils.book_append_sheet(workbook, worksheet, "Taxa");
+
+  const authors = gatherAllAuthors(rows);
+  const authorsRows = authors.map(a => ({ author: a }));
+  const wsAuth = XLSX.utils.json_to_sheet(authorsRows, { header: ["author"] });
+  XLSX.utils.book_append_sheet(workbook, wsAuth, "Authors");
+
+  XLSX.writeFile(workbook, REPORT_PATH);
+  console.log("Planilha gerada em:", REPORT_PATH);
+}
+
+main();