-
Notifications
You must be signed in to change notification settings - Fork 0
forch draft #70
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: latch/3.x.x
Are you sure you want to change the base?
forch draft #70
Changes from all commits
3c7ab97
dcbcd28
000c376
763916f
00802f6
7838be4
b8eba80
c6d109c
c4062b5
80aa48e
052d8f4
3717181
f2632e5
4832468
c9cacf8
00fa840
703f417
5f79539
f85acb8
324e3e1
35d3061
8a18574
d953e0a
afb02f7
af05397
69d50ac
3b5132d
5a35a65
2c45d3a
cbf09af
8edb4cd
bb10e47
83e1ea6
5087298
592fffe
1dbce6a
db76344
b993d60
76f76a3
a1ef374
2828e39
34d5b8c
03630c4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,14 @@ | ||
| # syntax = docker/dockerfile:1.4.1 | ||
|
|
||
| from alpine:3.22.0 | ||
|
|
||
| run apk add \ | ||
| bash \ | ||
| curl \ | ||
| openjdk21-jre-headless | ||
|
|
||
| run curl -sSL https://github.com/jqlang/jq/releases/download/jq-1.8.1/jq-linux-amd64 -o /bin/jq | ||
| run chmod +x /bin/jq | ||
|
|
||
| copy ./.nextflow /root/.nextflow | ||
| copy ./nextflow /usr/bin/nextflow |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1 +1 @@ | ||
| v3.0.4 | ||
| v3.0.36 |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,64 @@ | ||
| package nextflow.forch | ||
|
|
||
| import java.nio.file.Path | ||
|
|
||
| import groovy.util.logging.Slf4j | ||
| import nextflow.executor.Executor | ||
| import nextflow.extension.FilesEx | ||
| import nextflow.processor.TaskHandler | ||
| import nextflow.processor.TaskMonitor | ||
| import nextflow.processor.TaskPollingMonitor | ||
| import nextflow.processor.TaskRun | ||
| import nextflow.util.DispatcherClient | ||
| import nextflow.util.ForchClient | ||
| import nextflow.util.Duration | ||
|
|
||
| @Slf4j | ||
| class ForchExecutor extends Executor { | ||
|
|
||
| Path remoteBinDir = null | ||
| private ForchClient forchClient | ||
|
|
||
| @Override | ||
| protected TaskMonitor createTaskMonitor() { | ||
| return TaskPollingMonitor.create(session, name, 100, Duration.of("15s")) | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. del the |
||
| } | ||
|
|
||
| @Override | ||
| protected void register() { | ||
| // todo(ayush): decouple dispatcher and executor | ||
| this.dispatcherClient = new DispatcherClient() | ||
| this.forchClient = new ForchClient() | ||
|
|
||
| this.session.addIgniter { | ||
| this.dispatcherClient.updateExecutionStatus("RUNNING") | ||
| } | ||
|
|
||
| uploadBinDir() | ||
| } | ||
|
|
||
| @Override | ||
| TaskHandler createTaskHandler(TaskRun task) { | ||
| return new ForchTaskHandler(task, remoteBinDir, session, this.forchClient, this.dispatcherClient) | ||
| } | ||
|
|
||
| protected void uploadBinDir() { | ||
| if( session.binDir && !session.binDir.empty() ) { | ||
| def s3 = getTempDir() | ||
| log.info "Uploading local `bin` scripts folder to ${s3.toUriString()}/bin" | ||
| remoteBinDir = FilesEx.copyTo(session.binDir, s3) | ||
| } | ||
| } | ||
|
|
||
| @Override | ||
| void shutdown() { | ||
| def status = session.success ? "SUCCEEDED" : ((session.aborted || session.cancelled) ? "ABORTED" : "FAILED") | ||
| this.dispatcherClient.updateExecutionStatus(status) | ||
|
|
||
| String nfsServerTaskId = System.getenv("nfs_server_task_id") | ||
| if (nfsServerTaskId != null) | ||
| this.forchClient.abortTasks([Integer.parseInt(nfsServerTaskId)]) | ||
|
|
||
| super.shutdown() | ||
| } | ||
| } | ||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,104 @@ | ||||||
| package nextflow.forch | ||||||
|
|
||||||
| import java.nio.file.Path | ||||||
|
|
||||||
| import nextflow.executor.BashFunLib | ||||||
| import nextflow.executor.SimpleFileCopyStrategy | ||||||
| import nextflow.util.Escape | ||||||
|
|
||||||
| class ForchFileCopyStrategy extends SimpleFileCopyStrategy { | ||||||
|
|
||||||
| @Override | ||||||
| String getBeforeStartScript() { | ||||||
| def lib = new BashFunLib().coreLib() | ||||||
|
|
||||||
| return lib + "\n\n" + """\ | ||||||
| nxf_s5cmd_upload() { | ||||||
| local name=\$1 | ||||||
| local s3path=\$2 | ||||||
| if [[ "\$name" == - ]]; then | ||||||
| echo 's5cmd --no-verify-ssl pipe "\$s3path"' | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why are we doing |
||||||
| s5cmd --no-verify-ssl pipe "\$s3path" | ||||||
| elif [[ -d "\$name" ]]; then | ||||||
| s5cmd --no-verify-ssl cp "\$name" "\$s3path/" | ||||||
| else | ||||||
| s5cmd --no-verify-ssl cp "\$name" "\$s3path/\$name" | ||||||
| fi | ||||||
| } | ||||||
|
|
||||||
| nxf_s5cmd_download() { | ||||||
| local source=\$1 | ||||||
| local target=\$2 | ||||||
| local file_name=\$(basename \$1) | ||||||
| local is_dir=\$(s5cmd --no-verify-ssl ls \$source | grep -F "DIR \${file_name}/" -c) | ||||||
| if [[ \$is_dir == 1 ]]; then | ||||||
| s5cmd --no-verify-ssl cp "\$source/*" "\$target" | ||||||
| else | ||||||
| s5cmd --no-verify-ssl cp "\$source" "\$target" | ||||||
| fi | ||||||
| } | ||||||
|
|
||||||
|
|
||||||
| """.stripIndent() | ||||||
| } | ||||||
|
|
||||||
| @Override | ||||||
| String getStageInputFilesScript(Map<String, Path> inputFiles) { | ||||||
| def result = 'downloads=(true)\n' | ||||||
| result += super.getStageInputFilesScript(inputFiles) + '\n' | ||||||
| result += 'nxf_parallel "${downloads[@]}"\n' | ||||||
| return result | ||||||
| } | ||||||
|
|
||||||
| @Override | ||||||
| protected String stageInCommand(String source, String target, String mode) { | ||||||
| return "downloads+=(\"nxf_s5cmd_download s3:/${Escape.path(source)} ${Escape.path(target)}\")" | ||||||
| } | ||||||
|
|
||||||
| @Override | ||||||
| String getUnstageOutputFilesScript(List<String> outputFiles, Path targetDir) { | ||||||
| final patterns = normalizeGlobStarPaths(outputFiles) | ||||||
|
|
||||||
| if( !patterns ) | ||||||
| return null | ||||||
|
|
||||||
| final escape = new ArrayList(outputFiles.size()) | ||||||
| for( String it : patterns ) | ||||||
| escape.add( Escape.path(it) ) | ||||||
|
|
||||||
| return """\ | ||||||
| uploads=() | ||||||
| IFS=\$'\\n' | ||||||
| for name in \$(eval "ls -1d ${escape.join(' ')}" | sort | uniq); do | ||||||
| uploads+=("nxf_s5cmd_upload '\$name' s3:/${Escape.path(targetDir)}") | ||||||
| done | ||||||
| unset IFS | ||||||
| nxf_parallel "\${uploads[@]}" | ||||||
| """.stripIndent(true) | ||||||
| } | ||||||
|
|
||||||
| @Override | ||||||
| String touchFile(Path file) { | ||||||
| return "echo start | s5cmd --no-verify-ssl pipe s3:/${Escape.path(file)}" | ||||||
| } | ||||||
|
|
||||||
| @Override | ||||||
| String fileStr( Path path ) { | ||||||
| Escape.path(path.getFileName()) | ||||||
| } | ||||||
|
|
||||||
| @Override | ||||||
| String copyFile( String name, Path target ) { | ||||||
| "s5cmd --no-verify-ssl cp ${Escape.path(name)} s3:/${Escape.path(target)}" | ||||||
| } | ||||||
|
|
||||||
| @Override | ||||||
| String exitFile(Path file) { | ||||||
| return "| s5cmd --no-verify-ssl pipe s3:/${Escape.path(file)} || true" | ||||||
| } | ||||||
|
|
||||||
| @Override | ||||||
| String pipeInputFile(Path file) { | ||||||
| return " < ${Escape.path(file.getFileName())}" | ||||||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
perhaps? |
||||||
| } | ||||||
| } | ||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,140 @@ | ||
| package nextflow.forch | ||
|
|
||
| import nextflow.util.DispatcherClient | ||
| import nextflow.util.ForchClient | ||
|
|
||
| import java.nio.file.Path | ||
|
|
||
| import groovy.util.logging.Slf4j | ||
|
|
||
| import nextflow.Session | ||
|
|
||
| import nextflow.processor.TaskHandler | ||
| import nextflow.processor.TaskRun | ||
| import nextflow.processor.TaskStatus | ||
| import nextflow.script.ProcessConfig | ||
| import nextflow.util.MemoryUnit | ||
|
|
||
| @Slf4j | ||
| class ForchTaskHandler extends TaskHandler { | ||
|
|
||
| ProcessConfig processConfig | ||
| Integer forchTaskId | ||
| Path remoteBinDir = null | ||
| private ForchClient forchClient | ||
| private DispatcherClient dispatcherClient | ||
| Session session | ||
|
|
||
| ForchTaskHandler(TaskRun task, Path remoteBinDir, Session session, ForchClient forchClient, DispatcherClient dispatcherClient) { | ||
| super(task) | ||
|
|
||
| this.processConfig = task.processor.config | ||
| this.remoteBinDir = remoteBinDir | ||
| this.forchClient = forchClient | ||
| this.dispatcherClient = dispatcherClient | ||
|
|
||
| this.session = session | ||
| } | ||
|
|
||
| private String getCurrentStatus() { | ||
| if (this.forchTaskId == null) return | ||
|
|
||
| return this.forchClient.getTaskStatus(this.forchTaskId) | ||
| } | ||
|
|
||
| @Override | ||
| boolean checkIfRunning() { | ||
| def running = this.currentStatus == 'RUNNING' | ||
| if (running) | ||
| status = TaskStatus.RUNNING | ||
| return running | ||
| } | ||
|
|
||
| @Override | ||
| boolean checkIfCompleted() { | ||
| def cur = this.currentStatus | ||
| if (cur != "SUCCEEDED" && cur != "FAILED") return false | ||
|
|
||
| // todo(ayush): single query | ||
| task.exitStatus = this.forchClient.getTaskExitCode(this.forchTaskId) | ||
|
|
||
| // todo(ayush): logs, retries | ||
| task.stdout = "" | ||
| task.stderr = "" | ||
| status = TaskStatus.COMPLETED | ||
| return true | ||
| } | ||
|
|
||
| @Override | ||
| void kill() { | ||
| forchClient.abortTasks([forchTaskId]) | ||
| } | ||
|
|
||
| @Override | ||
| void prepareLauncher() { | ||
| new ForchTaskWrapperBuilder(this.task.toTaskBean()).build() | ||
| } | ||
|
|
||
| @Override | ||
| void submit() { | ||
| int cpus = task.config.getCpus() | ||
| MemoryUnit memory = task.config.getMemory() ?: MemoryUnit.of("2GiB") | ||
|
|
||
| final containerOpts = task.config.getContainerOptionsMap() | ||
|
|
||
| MemoryUnit shm = null; | ||
| if (containerOpts != null && containerOpts.exists("shm-size")) { | ||
| shm = new MemoryUnit(containerOpts.getFirstValue("shm-size") as String) | ||
| } | ||
|
|
||
| // todo(ayush): gpu support | ||
| // AcceleratorResource acc = task.config.getAccelerator() | ||
|
|
||
| def serverIp = System.getenv("latch_internal_nfs_server_ip") | ||
| if (serverIp == null) | ||
| throw new RuntimeException("failed to get server ip") | ||
|
|
||
| String cmd = """\ | ||
| mkdir --parents ${session.baseDir} | ||
|
|
||
| chown -R root:root /usr/bin/mount 2>&1 > /dev/null | ||
|
|
||
| until mount -t nfs4 [${serverIp}]:/ ${session.baseDir} 2>&1 > /dev/null | ||
| do | ||
| sleep 5 | ||
| done | ||
|
|
||
| cat ${task.workDir}/${TaskRun.CMD_RUN} | bash 2>&1 | ||
| """.stripIndent().trim() | ||
|
|
||
| if (remoteBinDir != null) { | ||
| cmd = """\ | ||
| mkdir -p /nextflow-bin | ||
| cp ${remoteBinDir}/* /nextflow-bin | ||
| chmod +x /nextflow-bin/* | ||
| export PATH=/nextflow-bin:\$PATH | ||
| """.stripIndent() + cmd | ||
| } | ||
|
|
||
| List<String> entrypoint = [ | ||
| "/bin/bash", | ||
| "-c", | ||
| cmd, | ||
| ] | ||
|
|
||
| this.forchTaskId = this.forchClient.submitTask( | ||
| this.task.name, | ||
| this.task.container, | ||
| entrypoint, | ||
| cpus, | ||
| memory.bytes, | ||
| shm?.bytes ?: 0 | ||
| ) | ||
|
|
||
| // todo(rahul): put this in a single transaction with submitTask | ||
| this.dispatcherClient.updateForchTaskId( | ||
| this.taskExecutionId, | ||
| this.forchTaskId | ||
| ) | ||
| } | ||
| } |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
you might still want to flush + fsync?