← back to elixir-nx__nx

Function bodies 219 total

All specs Real LLM only Function bodies

encode_local_pid function · cpp · L656-L660 (5 LOC)

exla/c_src/exla/exla.cc

std::string encode_local_pid(ErlNifEnv *env, ErlNifPid pid) {
  (void)env;
  return std::string(reinterpret_cast<const char *>(&pid), sizeof(ErlNifPid));
}

decode_local_pid function · cpp · L663-L672 (10 LOC)

exla/c_src/exla/exla.cc

fine::Term decode_local_pid(ErlNifEnv *env, std::string pid_binary) {
  if (pid_binary.size() != sizeof(ErlNifPid)) {
    throw std::invalid_argument("invalid encoded local pid size");
  }

  ErlNifPid pid;
  std::memcpy(&pid, pid_binary.data(), sizeof(ErlNifPid));
  return fine::Term(enif_make_pid(env, &pid));
}

start_log_sink function · cpp · L683-L696 (14 LOC)

exla/c_src/exla/exla.cc

fine::Ok<> start_log_sink(ErlNifEnv *env, ErlNifPid logger_pid) {
  ExlaLogSink *sink = new ExlaLogSink(logger_pid);

  // In addition to sinks, logs go to stderr above the given threshold.
  // We could disable it entirely with kInfinity, but we keep errors
  // just to make sure they are logged, in case the process crashes
  // right after the log, without time for Elixir to print it.
  absl::SetStderrThreshold(absl::LogSeverityAtLeast::kError);
  absl::InitializeLog();
  absl::AddLogSink(sink);

  return fine::Ok();
}

write_to_pointer function · cpp · L706-L711 (6 LOC)

exla/c_src/exla/exla.cc

fine::Ok<> write_to_pointer(ErlNifEnv *env, uint64_t address,
                            ErlNifBinary data, uint64_t offset) {
  uint8_t *ptr = reinterpret_cast<uint8_t *>(address);
  std::memcpy(ptr + offset, data.data, data.size);
  return fine::Ok();
}

TrackDeallocation method · cpp · L32-L40 (9 LOC)

exla/c_src/exla/exla_client.cc

void ExlaBuffer::TrackDeallocation() {
  if (client_) {
    auto size_or = GetOnDeviceSizeInBytes();
    if (size_or.ok()) {
      client_->TrackBufferDeallocated(device_id(), size_or.value());
    }
  }
}

CopyLiteralToBinary function · cpp · L41-L49 (9 LOC)

exla/c_src/exla/exla_client.cc

void CopyLiteralToBinary(xla::Literal *literal, ErlNifBinary *binary,
                         exla::int64 size) {
  exla::int64 actual_size = literal->size_bytes();
  if (size < 0 or size > actual_size)
    size = actual_size;
  enif_alloc_binary(size, binary);
  std::memcpy(binary->data, literal->untyped_data(), size);
}

ToBinary method · cpp · L50-L65 (16 LOC)

exla/c_src/exla/exla_client.cc

tsl::StatusOr<ERL_NIF_TERM> ExlaBuffer::ToBinary(ErlNifEnv *env,
                                                 exla::int64 size) {
  EXLA_ASSIGN_OR_RETURN(std::shared_ptr<xla::Literal> literal,
                        buffer_->ToLiteralSync());

  exla::int64 actual_size = literal->size_bytes();
  if (size < 0 or size > actual_size)
    size = actual_size;

  ERL_NIF_TERM binary_term;
  auto data = enif_make_new_binary(env, size, &binary_term);
  memcpy(data, literal->untyped_data(), size);

  return binary_term;
}

Repobility · code-quality intelligence platform · https://repobility.com

ReplaceBuffer method · cpp · L66-L79 (14 LOC)

exla/c_src/exla/exla_client.cc

void ExlaBuffer::ReplaceBuffer(std::unique_ptr<xla::PjRtBuffer> new_buffer) {
  if (buffer_ && !buffer_->IsDeleted()) {
    TrackDeallocation();
    buffer_->Delete();
  }
  buffer_ = std::move(new_buffer);
  if (client_ && buffer_) {
    auto size_or = GetOnDeviceSizeInBytes();
    if (size_or.ok()) {
      client_->TrackBufferAllocated(device_id(), size_or.value());
    }
  }
}

Deallocate method · cpp · L80-L91 (12 LOC)

exla/c_src/exla/exla_client.cc

tsl::Status ExlaBuffer::Deallocate() {
  if (buffer_->IsDeleted()) {
    return xla::FailedPrecondition(
        "Attempt to deallocate already deallocated buffer.");
  } else {
    // Track memory before marking as deleted
    TrackDeallocation();
    buffer_->Delete();
    return tsl::OkStatus();
  }
}

CopyToDevice method · cpp · L92-L110 (19 LOC)

exla/c_src/exla/exla_client.cc

tsl::StatusOr<fine::ResourcePtr<ExlaBuffer>>
ExlaBuffer::CopyToDevice(xla::PjRtDevice *dst_device) {
  EXLA_ASSIGN_OR_RETURN(auto memory_space, dst_device->default_memory_space());
  EXLA_ASSIGN_OR_RETURN(std::unique_ptr<xla::PjRtBuffer> buf,
                        buffer_->CopyToMemorySpace(memory_space));
  auto new_buffer = fine::make_resource<ExlaBuffer>(std::move(buf));

  // Copy client tracking
  new_buffer->SetClient(client_);
  if (client_) {
    auto size_or = new_buffer->GetOnDeviceSizeInBytes();
    if (size_or.ok()) {
      client_->TrackBufferAllocated(new_buffer->device_id(), size_or.value());
    }
  }

  return new_buffer;
}

PjRtBufferFromBinary function · cpp · L119-L153 (35 LOC)

exla/c_src/exla/exla_client.cc

tsl::StatusOr<std::unique_ptr<xla::PjRtBuffer>>
PjRtBufferFromBinary(xla::PjRtClient *client, ERL_NIF_TERM source_term,
                     const xla::Shape &shape, int device_id) {
  // We copy the binary term into a new env and point the buffer to
  // the binary content. Since larger binaries are shared and refcounted
  // this should be zero-copy.

  ErlNifEnv *copy_env = enif_alloc_env();
  ERL_NIF_TERM dest_term = enif_make_copy(copy_env, source_term);

  auto binary = fine::decode<ErlNifBinary>(copy_env, dest_term);

  xla::PjRtClient::HostBufferSemantics semantics =
      xla::PjRtClient::HostBufferSemantics::kImmutableZeroCopy;
  std::function<void()> on_done_with_host_buffer = [copy_env]() {
    enif_free_env(copy_env);
  };

  EXLA_ASSIGN_OR_RETURN(
      xla::PjRtDevice * device,
      client->LookupDevice(xla::PjRtGlobalDeviceId(device_id)));
  EXLA_ASSIGN_OR_RETURN(auto memory_space, device->default_memory_space());
  // Passing std::nullopt should work, but it fails fo

UnpackRunArguments function · cpp · L154-L219 (66 LOC)

exla/c_src/exla/exla_client.cc

tsl::StatusOr<std::vector<std::vector<xla::PjRtBuffer *>>> UnpackRunArguments(
    ErlNifEnv *env, ExlaExecutable::RunArguments arguments,
    std::vector<std::unique_ptr<xla::PjRtBuffer>> &transient_buffers,
    ExlaClient *client, xla::DeviceAssignment device_assignment, int device_id,
    int num_partitions) {
  std::vector<std::vector<xla::PjRtBuffer *>> arg_buffers;
  arg_buffers.reserve(arguments.size());

  int index = 0;

  for (const auto &replica_arguments : arguments) {
    // For automatic SPMD: each input list goes to a different partition device
    // device_assignment is (replica, partition) -> device
    // With num_partitions > 1, we iterate through partitions (replica=0,
    // partition=0..N-1) For replication, we iterate through replicas
    // (replica=0..N-1, partition=0)
    int replica = (num_partitions > 1) ? 0 : index;
    int partition = (num_partitions > 1) ? index : 0;

    auto device =
        device_id >= 0 ? device_id : device_assignment(replica, part

UnpackResult function · cpp · L220-L262 (43 LOC)

exla/c_src/exla/exla_client.cc

ExlaExecutable::RunResult
UnpackResult(ErlNifEnv *env,
             std::vector<std::vector<std::unique_ptr<xla::PjRtBuffer>>> result,
             xla::DeviceAssignment device_assignment, int device_id,
             ExlaClient *client) {
  auto per_replica_results = std::vector<
      std::tuple<std::vector<fine::ResourcePtr<ExlaBuffer>>, int64_t>>();

  for (int i = 0; i < result.size(); i++) {
    auto replica_results = std::vector<fine::ResourcePtr<ExlaBuffer>>();

    int64_t device;
    if (device_id >= 0) {
      device = device_id;
    } else if (device_assignment.computation_count() > 1) {
      // SPMD: results correspond to partitions (replica 0, partition i)
      device = device_assignment(0, i);
    } else {
      // Replication: results correspond to replicas (replica i, partition 0)
      device = device_assignment(i, 0);
    }

    for (auto &pjrt_buf : result.at(i)) {
      pjrt_buf->GetReadyFuture().Await();
      auto result = fine::make_resource<ExlaBuffer>(std::m

Run method · cpp · L263-L390 (128 LOC)

exla/c_src/exla/exla_client.cc

tsl::StatusOr<ExlaExecutable::RunResult>
ExlaExecutable::Run(ErlNifEnv *env, ExlaExecutable::RunArguments arguments,
                    int device_id) {
  xla::ExecuteOptions options;
  // we do not handle multi-device launches at this time, so this must always
  // be set to 0
  options.launch_id = 0;
  // disable strict shape checking which ensures shapes of buffers match exact
  // shape (with layout) expected be compiled executable, we have mismatches
  // on gpu
  options.strict_shape_checking = false;
  // execution mode determines whether or not to launch the executable in the
  // calling thread or in a separate thread, default mode is either-or, here
  // we specify synchronous because the Elixir side ensures execution is always
  // synchronous
  options.execution_mode = xla::ExecuteOptions::ExecutionMode::kSynchronous;

  // the number of replicas will equal the number of devices involved in
  // a pmap, but in all other cases it will be equal to 1
  int num_replicas = exe

BufferFromBinary method · cpp · L394-L410 (17 LOC)

exla/c_src/exla/exla_client.cc

tsl::StatusOr<fine::ResourcePtr<ExlaBuffer>>
ExlaClient::BufferFromBinary(ERL_NIF_TERM source_term, xla::Shape &shape,
                             int device_id) {
  EXLA_ASSIGN_OR_RETURN(auto buffer, PjRtBufferFromBinary(client(), source_term,
                                                          shape, device_id));
  auto exla_buffer = fine::make_resource<ExlaBuffer>(std::move(buffer));

  // Track memory allocation
  exla_buffer->SetClient(this);
  auto size_or = exla_buffer->GetOnDeviceSizeInBytes();
  if (size_or.ok()) {
    TrackBufferAllocated(device_id, size_or.value());
  }

  return exla_buffer;
}

Repobility — the code-quality scanner for AI-generated software · https://repobility.com

ExecutableFingerprint function · cpp · L411-L424 (14 LOC)

exla/c_src/exla/exla_client.cc

tsl::StatusOr<std::optional<std::string>>
ExecutableFingerprint(std::unique_ptr<xla::PjRtLoadedExecutable> &executable) {
  auto fingerprint = executable->FingerprintExecutable();

  if (fingerprint.ok()) {
    return {fingerprint.value()};
  } else if (fingerprint.status().code() == absl::StatusCode::kUnimplemented) {
    // Return nullopt in case of unimplemented error.
    return std::nullopt;
  } else {
    return fingerprint.status();
  }
}

DeserializeExecutable method · cpp · L425-L438 (14 LOC)

exla/c_src/exla/exla_client.cc

tsl::StatusOr<fine::ResourcePtr<ExlaExecutable>>
ExlaClient::DeserializeExecutable(std::string deserialized_executable) {
  EXLA_ASSIGN_OR_RETURN(
      std::unique_ptr<xla::PjRtLoadedExecutable> executable,
      client_->LoadSerializedExecutable(deserialized_executable, std::nullopt,
                                        xla::LoadOptions()));

  EXLA_ASSIGN_OR_RETURN(absl::optional<std::string> fingerprint,
                        ExecutableFingerprint(executable));

  return fine::make_resource<ExlaExecutable>(
      std::move(executable), std::move(fingerprint), this);
}

Compile method · cpp · L439-L466 (28 LOC)

exla/c_src/exla/exla_client.cc

tsl::StatusOr<fine::ResourcePtr<ExlaExecutable>> ExlaClient::Compile(
    mlir::ModuleOp module, std::vector<xla::Shape> argument_layouts,
    xla::ExecutableBuildOptions &options, bool compile_portable_executable) {
  std::vector<xla::Shape> layouts;
  layouts.reserve(argument_layouts.size());
  for (auto shape : argument_layouts) {
    xla::Shape cpy_shape =
        xla::ShapeUtil::MakeShape(shape.element_type(), shape.dimensions());
    xla::LayoutUtil::ClearLayout(&cpy_shape);
    layouts.push_back(cpy_shape);
  }

  xla::CompileOptions compile_opts;
  compile_opts.argument_layouts = layouts;
  compile_opts.parameter_is_tupled_arguments = false;
  compile_opts.executable_build_options = options;
  compile_opts.compile_portable_executable = compile_portable_executable;

  EXLA_ASSIGN_OR_RETURN(
      std::unique_ptr<xla::PjRtLoadedExecutable> executable,
      client_->CompileAndLoad(module, std::move(compile_opts)));
  EXLA_ASSIGN_OR_RETURN(absl::optional<std::string> fingerprint,

TransferToInfeed method · cpp · L467-L515 (49 LOC)

exla/c_src/exla/exla_client.cc

tsl::Status ExlaClient::TransferToInfeed(ErlNifEnv *env,
                                         std::vector<ErlNifBinary> buffer_bins,
                                         std::vector<xla::Shape> shapes,
                                         int device_id) {
  std::vector<const char *> buf_ptrs;
  buf_ptrs.reserve(buffer_bins.size());

  for (const auto &buffer_bin : buffer_bins) {
    const char *data_ptr =
        const_cast<char *>(reinterpret_cast<char *>(buffer_bin.data));
    buf_ptrs.push_back(data_ptr);
  }

  auto shape = xla::ShapeUtil::MakeTupleShape(shapes);

  // Instead of pushing each buffer separately, we create a flat tuple
  // literal and push the whole group of buffers.
  //
  // On the CPU, XLA infeed reads buffers from a queue one at a time [1][2]
  // (or rather, the infeed operation is lowered to multiple queue reads),
  // hence pushing one at a time works fine. Pushing a flat tuple works
  // effectively the same, since it basically adds each element

TransferFromOutfeed method · cpp · L516-L539 (24 LOC)

exla/c_src/exla/exla_client.cc

tsl::StatusOr<ERL_NIF_TERM> ExlaClient::TransferFromOutfeed(ErlNifEnv *env,
                                                            int device_id,
                                                            xla::Shape &shape) {
  EXLA_ASSIGN_OR_RETURN(
      xla::PjRtDevice * device,
      client_->LookupDevice(xla::PjRtGlobalDeviceId(device_id)));

  auto literal = std::make_shared<xla::Literal>(shape);

  auto transfer_status = device->TransferFromOutfeed(literal.get());

  if (!transfer_status.ok()) {
    return transfer_status;
  }

  auto size = literal->size_bytes();

  ERL_NIF_TERM binary_term;
  auto data = enif_make_new_binary(env, size, &binary_term);
  memcpy(data, literal->untyped_data(), size);

  return binary_term;
}

GetHostClient function · cpp · L540-L548 (9 LOC)

exla/c_src/exla/exla_client.cc

tsl::StatusOr<fine::ResourcePtr<ExlaClient>> GetHostClient() {
  xla::CpuClientOptions options;
  options.asynchronous = false;
  EXLA_ASSIGN_OR_RETURN(std::unique_ptr<xla::PjRtClient> client,
                        xla::GetXlaPjrtCpuClient(options));

  return fine::make_resource<ExlaClient>(std::move(client));
}

GetGpuClient function · cpp · L549-L564 (16 LOC)

exla/c_src/exla/exla_client.cc

tsl::StatusOr<fine::ResourcePtr<ExlaClient>>
GetGpuClient(double memory_fraction, bool preallocate,
             xla::GpuAllocatorConfig::Kind kind) {
  xla::GpuAllocatorConfig allocator_config = {.kind = kind,
                                              .memory_fraction =
                                                  memory_fraction,
                                              .preallocate = preallocate};

  xla::GpuClientOptions client_options = {.allocator_config = allocator_config};

  EXLA_ASSIGN_OR_RETURN(std::unique_ptr<xla::PjRtClient> client,
                        xla::GetStreamExecutorGpuClient(client_options));

  return fine::make_resource<ExlaClient>(std::move(client));
}

GetTpuClient function · cpp · L565-L582 (18 LOC)

exla/c_src/exla/exla_client.cc

tsl::StatusOr<fine::ResourcePtr<ExlaClient>> GetTpuClient() {
  auto statusor = pjrt::LoadPjrtPlugin("tpu", "libtpu.so");
  if (!statusor.ok()) {
    return statusor.status();
  }

  tsl::Status status = pjrt::InitializePjrtPlugin("tpu");

  if (!status.ok()) {
    return status;
  }

  EXLA_ASSIGN_OR_RETURN(std::unique_ptr<xla::PjRtClient> client,
                        xla::GetCApiClient("TPU"));

  return fine::make_resource<ExlaClient>(std::move(client));
}

All rows above produced by Repobility · https://repobility.com

GetCApiClient function · cpp · L583-L590 (8 LOC)

exla/c_src/exla/exla_client.cc

tsl::StatusOr<fine::ResourcePtr<ExlaClient>>
GetCApiClient(std::string device_type) {
  EXLA_ASSIGN_OR_RETURN(std::unique_ptr<xla::PjRtClient> client,
                        xla::GetCApiClient(device_type));

  return fine::make_resource<ExlaClient>(std::move(client));
}

TrackBufferAllocated method · cpp · L593-L600 (8 LOC)

exla/c_src/exla/exla_client.cc

void ExlaClient::TrackBufferAllocated(int device_id, size_t size) {
  std::lock_guard<std::mutex> lock(memory_mutex_);
  device_memory_[device_id] += size;
  total_memory_ += size;
  if (total_memory_ > peak_memory_) {
    peak_memory_ = total_memory_;
  }
}

TrackBufferDeallocated method · cpp · L601-L606 (6 LOC)

exla/c_src/exla/exla_client.cc

void ExlaClient::TrackBufferDeallocated(int device_id, size_t size) {
  std::lock_guard<std::mutex> lock(memory_mutex_);
  device_memory_[device_id] -= size;
  total_memory_ -= size;
}

GetAllocatedMemory method · cpp · L607-L611 (5 LOC)

exla/c_src/exla/exla_client.cc

size_t ExlaClient::GetAllocatedMemory() const {
  std::lock_guard<std::mutex> lock(memory_mutex_);
  return total_memory_;
}

GetPeakMemory method · cpp · L612-L616 (5 LOC)

exla/c_src/exla/exla_client.cc

size_t ExlaClient::GetPeakMemory() const {
  std::lock_guard<std::mutex> lock(memory_mutex_);
  return peak_memory_;
}

ResetPeakMemory method · cpp · L617-L621 (5 LOC)

exla/c_src/exla/exla_client.cc

void ExlaClient::ResetPeakMemory() {
  std::lock_guard<std::mutex> lock(memory_mutex_);
  peak_memory_ = total_memory_;
}

GetPerDeviceMemory method · cpp · L622-L626 (5 LOC)

exla/c_src/exla/exla_client.cc

std::map<int, size_t> ExlaClient::GetPerDeviceMemory() const {
  std::lock_guard<std::mutex> lock(memory_mutex_);
  return device_memory_;
}

ExlaBuffer class · c · L26-L59 (34 LOC)

exla/c_src/exla/exla_client.h

class ExlaBuffer {
public:
  ExlaBuffer(std::unique_ptr<xla::PjRtBuffer> buffer);

  int device_id() { return buffer_->device()->id(); }
  xla::PjRtBuffer *buffer() { return buffer_.get(); }
  tsl::StatusOr<fine::ResourcePtr<ExlaBuffer>>
  CopyToDevice(xla::PjRtDevice *dst_device);
  tsl::StatusOr<ERL_NIF_TERM> ToBinary(ErlNifEnv *env, exla::int64 size);
  tsl::Status Deallocate();

  tsl::StatusOr<std::uintptr_t> GetDevicePointer(xla::PjRtClient *client) {
    return client->UnsafeBufferPointer(buffer_.get());
  }

  tsl::StatusOr<size_t> GetOnDeviceSizeInBytes() {
    return buffer_.get()->GetOnDeviceSizeInBytes();
  }

  void SetClient(ExlaClient *client) { client_ = client; }

  // Replace the underlying PjRt buffer with a new one (e.g. an shm-backed
  // view).  The old buffer is deallocated first so XLA can free its memory.
  void ReplaceBuffer(std::unique_ptr<xla::PjRtBuffer> new_buffer);

  ~ExlaBuffer();

private:
  std::unique_ptr<xla::PjRtBuffer> buffer_;
  ExlaClient *clie

Provenance: Repobility (https://repobility.com) — every score reproducible from /scan/

GetDevicePointer method · c · L37-L40 (4 LOC)

exla/c_src/exla/exla_client.h

  tsl::StatusOr<std::uintptr_t> GetDevicePointer(xla::PjRtClient *client) {
    return client->UnsafeBufferPointer(buffer_.get());
  }

GetOnDeviceSizeInBytes method · c · L41-L44 (4 LOC)

exla/c_src/exla/exla_client.h

  tsl::StatusOr<size_t> GetOnDeviceSizeInBytes() {
    return buffer_.get()->GetOnDeviceSizeInBytes();
  }

ExlaExecutable class · c · L60-L89 (30 LOC)

exla/c_src/exla/exla_client.h

class ExlaExecutable {
public:
  using ReplicaArgument = std::variant<fine::ResourcePtr<ExlaBuffer>,
                                       std::tuple<fine::Term, xla::Shape>>;
  using RunArguments = std::vector<std::vector<ReplicaArgument>>;

  using RunReplicaResult =
      std::tuple<std::vector<fine::ResourcePtr<ExlaBuffer>>, int64_t>;
  using RunResult = std::vector<RunReplicaResult>;

  ExlaExecutable(std::unique_ptr<xla::PjRtLoadedExecutable> executable,
                 absl::optional<std::string> fingerprint, ExlaClient *client);

  ~ExlaExecutable();

  xla::PjRtLoadedExecutable *executable() { return executable_.get(); }

  tsl::StatusOr<RunResult> Run(ErlNifEnv *env, RunArguments arguments,
                               int device_id);

  tsl::StatusOr<std::string> SerializeExecutable() {
    return executable_->SerializeExecutable();
  }

private:
  std::unique_ptr<xla::PjRtLoadedExecutable> executable_;
  absl::optional<std::string> fingerprint_;
  ExlaClient *client_;

SerializeExecutable method · c · L80-L83 (4 LOC)

exla/c_src/exla/exla_client.h

  tsl::StatusOr<std::string> SerializeExecutable() {
    return executable_->SerializeExecutable();
  }

ExlaClient class · c · L90-L136 (47 LOC)

exla/c_src/exla/exla_client.h

class ExlaClient {
public:
  explicit ExlaClient(std::shared_ptr<xla::PjRtClient> client);

  virtual ~ExlaClient() = default;

  xla::PjRtClient *client() { return client_.get(); }

  // Compiles the given computation with the given compile options

  tsl::StatusOr<fine::ResourcePtr<ExlaExecutable>>
  Compile(mlir::ModuleOp computation, std::vector<xla::Shape> argument_layouts,
          xla::ExecutableBuildOptions &options,
          bool compile_portable_executable);

  tsl::StatusOr<fine::ResourcePtr<ExlaBuffer>>
  BufferFromBinary(ERL_NIF_TERM binary_term, xla::Shape &shape, int device_id);

  tsl::StatusOr<fine::ResourcePtr<ExlaExecutable>>
  DeserializeExecutable(std::string serialized_executable);

  // TODO(seanmor5): This is device logic and should be refactored
  tsl::Status TransferToInfeed(ErlNifEnv *env,
                               std::vector<ErlNifBinary> buffer_bins,
                               std::vector<xla::Shape> shapes, int device_id);

  tsl::StatusOr<ERL

get_cuda_ipc_handle function · cpp · L10-L26 (17 LOC)

exla/c_src/exla/exla_cuda.cc

std::optional<std::string> get_cuda_ipc_handle(std::uintptr_t ptr) {
  cudaIpcMemHandle_t ipc_handle;
  cudaError_t status = cudaIpcGetMemHandle(&ipc_handle, reinterpret_cast<void*>(ptr));

  if (status != cudaSuccess) {
    return std::nullopt;
  }

  // Assuming sizeof(cudaIpcMemHandle_t) is constant
  const size_t size = sizeof(cudaIpcMemHandle_t);

  // Copy the memory handle to a buffer
  auto buffer = std::string(reinterpret_cast<const char*>(&ipc_handle), size);

  return buffer;
}

get_pointer_for_ipc_handle function · cpp · L27-L55 (29 LOC)

exla/c_src/exla/exla_cuda.cc

std::optional<void*> get_pointer_for_ipc_handle(uint8_t* handle_bin, size_t handle_size, int device_id) {
  if (handle_size != sizeof(cudaIpcMemHandle_t)) {
    return std::nullopt;
  }

  unsigned char ipc_handle_data[sizeof(cudaIpcMemHandle_t)];
  for (int i = 0; i < sizeof(cudaIpcMemHandle_t); i++) {
    ipc_handle_data[i] = handle_bin[i];
  }

  cudaIpcMemHandle_t ipc_handle;
  memcpy(&ipc_handle, ipc_handle_data, sizeof(cudaIpcMemHandle_t));

  int* ptr;
  cudaError_t cuda_status = cudaSetDevice(device_id);  // Assuming device 0, change as needed
  if (cuda_status != cudaSuccess) {
    printf("Error setting CUDA device: %s\n", cudaGetErrorString(cuda_status));
    return std::nullopt;
  }

  cuda_status = cudaIpcOpenMemHandle((void**)&ptr, ipc_handle, cudaIpcMemLazyEnablePeerAccess);
  if (cuda_status != cudaSuccess) {
    printf("Error opening CUDA IPC memory handle: %s\n", cudaGetErrorString(cuda_status));
    return std::nullopt;
  }

  return ptr;
}

get_cuda_ipc_handle function · cpp · L57-L59 (3 LOC)

exla/c_src/exla/exla_cuda.cc

std::optional<std::string> get_cuda_ipc_handle(std::uintptr_t ptr) {
  return std::nullopt;
}

Repobility · code-quality intelligence platform · https://repobility.com

get_pointer_for_ipc_handle function · cpp · L60-L63 (4 LOC)

exla/c_src/exla/exla_cuda.cc

std::optional<void*> get_pointer_for_ipc_handle(uint8_t* handle_bin, size_t handle_size, int device_id) {
  return std::nullopt;
}

ExlaLogSink class · c · L17-L61 (45 LOC)

exla/c_src/exla/exla_log_sink.h

class ExlaLogSink : public absl::LogSink {
public:
  explicit ExlaLogSink(ErlNifPid sink_pid) : sink_pid_(sink_pid) {}

  void Send(const absl::LogEntry &entry) {
    auto string = std::string(entry.text_message());
    auto filename = std::string(entry.source_filename());
    int64_t line = entry.source_line();
    auto severity = entry.log_severity();

    if (severity == absl::LogSeverity::kFatal) {
      // LOG(FATAL) aborts the program before we are able to send and
      // log the information from Elixir, so we need to get it out
      // there for debugging before everything crashes
      std::cerr << "[FATAL] " << filename << ":" << line << " " << string << "\n";
    }

    auto env = enif_alloc_env();

    auto message = fine::encode(
        env, std::make_tuple(severity_to_atom(severity), string, filename, line));

    enif_send(NULL, &sink_pid_, env, message);

    enif_free_env(env);
  }

private:
  fine::Atom severity_to_atom(absl::LogSeverity severity) {
    switch (sev

Send method · c · L20-L42 (23 LOC)

exla/c_src/exla/exla_log_sink.h

  void Send(const absl::LogEntry &entry) {
    auto string = std::string(entry.text_message());
    auto filename = std::string(entry.source_filename());
    int64_t line = entry.source_line();
    auto severity = entry.log_severity();

    if (severity == absl::LogSeverity::kFatal) {
      // LOG(FATAL) aborts the program before we are able to send and
      // log the information from Elixir, so we need to get it out
      // there for debugging before everything crashes
      std::cerr << "[FATAL] " << filename << ":" << line << " " << string << "\n";
    }

    auto env = enif_alloc_env();

    auto message = fine::encode(
        env, std::make_tuple(severity_to_atom(severity), string, filename, line));

    enif_send(NULL, &sink_pid_, env, message);

    enif_free_env(env);
  }

severity_to_atom method · c · L43-L58 (16 LOC)

exla/c_src/exla/exla_log_sink.h

private:
  fine::Atom severity_to_atom(absl::LogSeverity severity) {
    switch (severity) {
    case absl::LogSeverity::kInfo:
      return atoms::info;
    case absl::LogSeverity::kWarning:
      return atoms::warning;
    case absl::LogSeverity::kError:
      return atoms::error;
    case absl::LogSeverity::kFatal:
      return atoms::error;
    default:
      return atoms::info;
    }
  }

Op method · cpp · L13-L66 (54 LOC)

exla/c_src/exla/exla_mlir.cc

std::vector<fine::ResourcePtr<mlir::Value>> MLIRFunction::Op(
    std::string op_name, std::vector<fine::ResourcePtr<mlir::Value>> operands,
    std::vector<mlir::Type> result_types,
    std::vector<std::tuple<std::string, mlir::Attribute>> attributes,
    std::vector<fine::ResourcePtr<mlir::Region>> regions) {
  auto builder = module_->builder();
  auto context = builder->getContext();

  auto types_range = mlir::TypeRange{llvm::ArrayRef<mlir::Type>{result_types}};

  auto named_attributes = std::vector<mlir::NamedAttribute>{};
  for (auto const &[key, value] : attributes) {
    auto attribute = builder->getNamedAttr(key, value);
    named_attributes.push_back(attribute);
  }


  auto operand_values = std::vector<mlir::Value>();
  operand_values.reserve(operands.size());
  for (const auto &operand : operands) {
    operand_values.push_back(*operand);
  }

  auto operands_range = mlir::ValueRange{llvm::ArrayRef<mlir::Value>{operand_values}};
  auto attributes_array = llvm::ArrayRef<ml

PushRegion method · cpp · L67-L88 (22 LOC)

exla/c_src/exla/exla_mlir.cc

std::tuple<fine::ResourcePtr<mlir::Region>, std::vector<fine::ResourcePtr<mlir::Value>>>
MLIRFunction::PushRegion(std::vector<mlir::Type> types) {
  auto context = module_->builder()->getContext();

  auto region = fine::make_resource<mlir::Region>();
  auto & block = region->emplaceBlock();

  for (mlir::Type type : types) {
    block.addArgument(type, mlir::UnknownLoc::get(context));
  }

  auto args = std::vector<fine::ResourcePtr<mlir::Value>>();
  for (auto &arg : block.getArguments()) {
    args.push_back(fine::make_resource<mlir::Value>(arg));
  }

  region_stack.push(region);
  setInsertionPoint();

  return std::make_tuple(region, args);
}

PopRegion method · cpp · L89-L93 (5 LOC)

exla/c_src/exla/exla_mlir.cc

void MLIRFunction::PopRegion() {
  region_stack.pop();
  setInsertionPoint();
}

setInsertionPoint method · cpp · L94-L101 (8 LOC)

exla/c_src/exla/exla_mlir.cc

void MLIRFunction::setInsertionPoint() {
  if (region_stack.size() == 0) {
    module_->builder()->setInsertionPointToEnd(&func_->getBody().back());
  } else {
    module_->builder()->setInsertionPointToEnd(&region_stack.top()->back());
  }
}

Repobility — the code-quality scanner for AI-generated software · https://repobility.com

CreateFunction method · cpp · L109-L126 (18 LOC)

exla/c_src/exla/exla_mlir.cc

std::unique_ptr<mlir::func::FuncOp> MLIRModule::CreateFunction(
    std::string name,
    std::vector<mlir::Type> arg_types,
    std::vector<mlir::Type> ret_types,
    bool is_public) {
  auto visibility = is_public ? "public" : "nested";

  auto funcType = builder_->getFunctionType(arg_types, ret_types);
  auto loc = builder_->getUnknownLoc();
  auto funcOp = std::make_unique<mlir::func::FuncOp>(mlir::func::FuncOp::create(loc, name, funcType));
  funcOp->setSymVisibility(visibility);
  module_->push_back(*funcOp);
  funcOp->addEntryBlock();
  builder_->setInsertionPointToStart(&funcOp->getBody().front());

  return funcOp;
}

ToString method · cpp · L127-L133 (7 LOC)

exla/c_src/exla/exla_mlir.cc

std::string MLIRModule::ToString() {
  auto output_string = std::string{};
  auto output_stream = llvm::raw_string_ostream{output_string};
  module_->print(output_stream);
  return output_string;
}

ParseType method · cpp · L134-L143 (10 LOC)

exla/c_src/exla/exla_mlir.cc

mlir::Type MLIRModule::ParseType(std::string string) {
  auto type = mlir::parseType(string, context_.get());

  if (type == nullptr) {
    throw std::runtime_error("unable to parse MLIR type: " + string);
  }

  return type;
}

‹ prevpage 2 / 5next ›