Skip to content

Commit f8fd075

Browse files
Merge pull request #5 from andrewalkermo/main
versão inicial documentada do DPToie-Python
2 parents b1be6a5 + a1e02ab commit f8fd075

25 files changed

+25063
-3889
lines changed

.dockerignore

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
# Arquivo: .dockerignore
2+
3+
# Ignorar pastas de ambiente virtual
4+
.venv/
5+
venv/
6+
env/
7+
8+
# Ignorar cache do Python
9+
__pycache__/
10+
*.pyc
11+
12+
# Ignorar cache e pastas do Git
13+
.git/
14+
.gitignore
15+
16+
# Ignorar arquivos do Docker
17+
Dockerfile
18+
.dockerignore
19+
20+
# Outros
21+
.idea/
22+
.vscode/
23+
dist/
24+
build/
25+
*.egg-info/

.editorconfig

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
root = true
2+
3+
[*]
4+
charset = utf-8
5+
end_of_line = lf
6+
indent_size = 4
7+
indent_style = space
8+
insert_final_newline = false
9+
max_line_length = 120
10+
tab_width = 4
11+
12+
[*.yml]
13+
indent_size = 2
14+
15+
[*.json]
16+
indent_size = 2

.github/workflows/extrai_fatos.yml

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
name: extrai_fatos
2+
3+
on: [push, workflow_dispatch]
4+
5+
jobs:
6+
extrai_fatos:
7+
runs-on: ubuntu-latest
8+
steps:
9+
- uses: actions/checkout@v4
10+
- name: Build container
11+
run: DOCKER_BUILDKIT=1 docker build --tag ptoie_python .
12+
13+
- name: Run all inputs with all configurations
14+
run: |
15+
set -euo pipefail
16+
mkdir -p outputs
17+
18+
# Define flag combinations to run (same for JSON and CSV)
19+
FLAGS_LIST=("" "-cc" "-sc" "-hs" "-a" "-a -t" "-cc -sc -hs -a -t")
20+
21+
# iterate only over .conll files in the inputs directory
22+
for input_path in inputs/*.conll; do
23+
[ -f "$input_path" ] || continue
24+
filename=$(basename "$input_path")
25+
# strip last extension only (handles names like ceten-200.conll)
26+
name="${filename%.*}"
27+
28+
# detect input type: .txt -> --input-type txt, otherwise assume conll
29+
if [[ "$filename" == *.txt ]]; then
30+
input_args="--input-type txt"
31+
else
32+
input_args="-it conll"
33+
fi
34+
35+
for flags in "${FLAGS_LIST[@]}"; do
36+
# --- JSON ---
37+
echo "[JSON] input=$filename flags=[$flags]"
38+
# remove stale json output if any
39+
rm -f ./outputs/output.json
40+
docker run --rm -v "$(pwd)":/ptoie_python ptoie_python poetry run python3 src/main.py -i "$input_path" $input_args $flags || echo "json-run-failed for $filename $flags"
41+
42+
# compute cleaned suffix for filename (empty => base name only)
43+
if [[ -z "$flags" ]]; then
44+
out_json="./outputs/${name}.json"
45+
else
46+
cleaned=$(echo "$flags" | sed -E "s/^\s*-//; s/\s+-/ /g; s/\s+/-/g; s/^-//")
47+
# remove any leading/trailing hyphens/spaces
48+
cleaned=$(echo "$cleaned" | sed -E 's/^[ -]+//; s/[ -]+$//; s/\s+/-/g')
49+
out_json="./outputs/${name}-${cleaned}.json"
50+
fi
51+
if [[ -f ./outputs/output.json ]]; then
52+
mv ./outputs/output.json "$out_json"
53+
else
54+
echo "Warning: JSON output not produced for $filename with flags [$flags]"
55+
fi
56+
57+
# --- CSV ---
58+
echo "[CSV] input=$filename flags=[$flags]"
59+
# remove stale csv output if any
60+
rm -f ./outputs/output.csv
61+
docker run --rm -v "$(pwd)":/ptoie_python ptoie_python poetry run python3 src/main.py -i "$input_path" $input_args $flags -o ./outputs/output.csv -ot csv || echo "csv-run-failed for $filename $flags"
62+
63+
if [[ -z "$flags" ]]; then
64+
out_csv="./outputs/${name}.csv"
65+
else
66+
cleaned=$(echo "$flags" | sed -E "s/^\s*-//; s/\s+-/ /g; s/\s+/-/g; s/^-//")
67+
cleaned=$(echo "$cleaned" | sed -E 's/^[ -]+//; s/[ -]+$//; s/\s+/-/g')
68+
out_csv="./outputs/${name}-${cleaned}.csv"
69+
fi
70+
if [[ -f ./outputs/output.csv ]]; then
71+
mv ./outputs/output.csv "$out_csv"
72+
else
73+
echo "Warning: CSV output not produced for $filename with flags [$flags]"
74+
fi
75+
76+
done
77+
done
78+
79+
- name: Upload artifacts
80+
uses: actions/upload-artifact@v4
81+
with:
82+
name: extrai_fatos_outputs
83+
path: |
84+
./outputs

.gitignore

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
.idea
2+
.vscode
3+
.hypothesis
4+
out.txt
5+
/outputs/*
6+
!/outputs/.gitkeep
7+
/models/*
8+
!/models/.gitkeep

Dockerfile

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
FROM python:3.12-slim
2+
3+
WORKDIR /ptoie_python
4+
5+
COPY pyproject.toml poetry.lock /ptoie_python/
6+
RUN pip install poetry \
7+
&& poetry config virtualenvs.create false \
8+
&& poetry install --only main --no-root --no-directory
9+
COPY . /ptoie_python
10+
RUN poetry install --only main
11+
12+
ENV PYTHONPATH="$PYTHONPATH:/ptoie_python"
13+
14+
CMD [ "poetry", "run", "python3", "src/noie.py" ]

0 commit comments

Comments
 (0)