drivers/misc/habanalabs/mmu.c

   1 // SPDX-License-Identifier: GPL-2.0
   2
   3 /*
   4  * Copyright 2016-2019 HabanaLabs, Ltd.
   5  * All Rights Reserved.
   6  */
   7
   8 #include "habanalabs.h"
   9 #include "include/hw_ip/mmu/mmu_general.h"
  10
  11 #include <linux/genalloc.h>
  12 #include <linux/slab.h>
  13
  14 static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr);
  15
  16 static struct pgt_info *get_pgt_info(struct hl_ctx *ctx, u64 hop_addr)
  17 {
  18         struct pgt_info *pgt_info = NULL;
  19
  20         hash_for_each_possible(ctx->mmu_shadow_hash, pgt_info, node,
  21                                 (unsigned long) hop_addr)
  22                 if (hop_addr == pgt_info->shadow_addr)
  23                         break;
  24
  25         return pgt_info;
  26 }
  27
  28 static void free_hop(struct hl_ctx *ctx, u64 hop_addr)
  29 {
  30         struct hl_device *hdev = ctx->hdev;
  31         struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr);
  32
  33         gen_pool_free(hdev->mmu_pgt_pool, pgt_info->phys_addr,
  34                         hdev->asic_prop.mmu_hop_table_size);
  35         hash_del(&pgt_info->node);
  36         kfree((u64 *) (uintptr_t) pgt_info->shadow_addr);
  37         kfree(pgt_info);
  38 }
  39
  40 static u64 alloc_hop(struct hl_ctx *ctx)
  41 {
  42         struct hl_device *hdev = ctx->hdev;
  43         struct asic_fixed_properties *prop = &hdev->asic_prop;
  44         struct pgt_info *pgt_info;
  45         u64 phys_addr, shadow_addr;
  46
  47         pgt_info = kmalloc(sizeof(*pgt_info), GFP_KERNEL);
  48         if (!pgt_info)
  49                 return ULLONG_MAX;
  50
  51         phys_addr = (u64) gen_pool_alloc(hdev->mmu_pgt_pool,
  52                                         prop->mmu_hop_table_size);
  53         if (!phys_addr) {
  54                 dev_err(hdev->dev, "failed to allocate page\n");
  55                 goto pool_add_err;
  56         }
  57
  58         shadow_addr = (u64) (uintptr_t) kzalloc(prop->mmu_hop_table_size,
  59                                                 GFP_KERNEL);
  60         if (!shadow_addr)
  61                 goto shadow_err;
  62
  63         pgt_info->phys_addr = phys_addr;
  64         pgt_info->shadow_addr = shadow_addr;
  65         pgt_info->ctx = ctx;
  66         pgt_info->num_of_ptes = 0;
  67         hash_add(ctx->mmu_shadow_hash, &pgt_info->node, shadow_addr);
  68
  69         return shadow_addr;
  70
  71 shadow_err:
  72         gen_pool_free(hdev->mmu_pgt_pool, phys_addr, prop->mmu_hop_table_size);
  73 pool_add_err:
  74         kfree(pgt_info);
  75
  76         return ULLONG_MAX;
  77 }
  78
  79 static inline u64 get_phys_hop0_addr(struct hl_ctx *ctx)
  80 {
  81         return ctx->hdev->asic_prop.mmu_pgt_addr +
  82                         (ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size);
  83 }
  84
  85 static inline u64 get_hop0_addr(struct hl_ctx *ctx)
  86 {
  87         return (u64) (uintptr_t) ctx->hdev->mmu_shadow_hop0 +
  88                         (ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size);
  89 }
  90
  91 static inline void flush(struct hl_ctx *ctx)
  92 {
  93         /* flush all writes from all cores to reach PCI */
  94         mb();
  95         ctx->hdev->asic_funcs->read_pte(ctx->hdev, get_phys_hop0_addr(ctx));
  96 }
  97
  98 /* transform the value to physical address when writing to H/W */
  99 static inline void write_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, u64 val)
 100 {
 101         /*
 102          * The value to write is actually the address of the next shadow hop +
 103          * flags at the 12 LSBs.
 104          * Hence in order to get the value to write to the physical PTE, we
 105          * clear the 12 LSBs and translate the shadow hop to its associated
 106          * physical hop, and add back the original 12 LSBs.
 107          */
 108         u64 phys_val = get_phys_addr(ctx, val & PTE_PHYS_ADDR_MASK) |
 109                                 (val & OFFSET_MASK);
 110
 111         ctx->hdev->asic_funcs->write_pte(ctx->hdev,
 112                                         get_phys_addr(ctx, shadow_pte_addr),
 113                                         phys_val);
 114
 115         *(u64 *) (uintptr_t) shadow_pte_addr = val;
 116 }
 117
 118 /* do not transform the value to physical address when writing to H/W */
 119 static inline void write_final_pte(struct hl_ctx *ctx, u64 shadow_pte_addr,
 120                                         u64 val)
 121 {
 122         ctx->hdev->asic_funcs->write_pte(ctx->hdev,
 123                                         get_phys_addr(ctx, shadow_pte_addr),
 124                                         val);
 125         *(u64 *) (uintptr_t) shadow_pte_addr = val;
 126 }
 127
 128 /* clear the last and present bits */
 129 static inline void clear_pte(struct hl_ctx *ctx, u64 pte_addr)
 130 {
 131         /* no need to transform the value to physical address */
 132         write_final_pte(ctx, pte_addr, 0);
 133 }
 134
 135 static inline void get_pte(struct hl_ctx *ctx, u64 hop_addr)
 136 {
 137         get_pgt_info(ctx, hop_addr)->num_of_ptes++;
 138 }
 139
 140 /*
 141  * put_pte - decrement the num of ptes and free the hop if possible
 142  *
 143  * @ctx: pointer to the context structure
 144  * @hop_addr: addr of the hop
 145  *
 146  * This function returns the number of ptes left on this hop. If the number is
 147  * 0, it means the pte was freed.
 148  */
 149 static inline int put_pte(struct hl_ctx *ctx, u64 hop_addr)
 150 {
 151         struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr);
 152         int num_of_ptes_left;
 153
 154         pgt_info->num_of_ptes--;
 155
 156         /*
 157          * Need to save the number of ptes left because free_hop might free
 158          * the pgt_info
 159          */
 160         num_of_ptes_left = pgt_info->num_of_ptes;
 161         if (!num_of_ptes_left)
 162                 free_hop(ctx, hop_addr);
 163
 164         return num_of_ptes_left;
 165 }
 166
 167 static inline u64 get_hopN_pte_addr(struct hl_ctx *ctx, u64 hop_addr,
 168                                         u64 virt_addr, u64 mask, u64 shift)
 169 {
 170         return hop_addr + ctx->hdev->asic_prop.mmu_pte_size *
 171                         ((virt_addr & mask) >> shift);
 172 }
 173
 174 static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr)
 175 {
 176         return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP0_MASK, HOP0_SHIFT);
 177 }
 178
 179 static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr)
 180 {
 181         return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP1_MASK, HOP1_SHIFT);
 182 }
 183
 184 static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr)
 185 {
 186         return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP2_MASK, HOP2_SHIFT);
 187 }
 188
 189 static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr)
 190 {
 191         return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP3_MASK, HOP3_SHIFT);
 192 }
 193
 194 static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx, u64 hop_addr, u64 vaddr)
 195 {
 196         return get_hopN_pte_addr(ctx, hop_addr, vaddr, HOP4_MASK, HOP4_SHIFT);
 197 }
 198
 199 static inline u64 get_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte)
 200 {
 201         if (curr_pte & PAGE_PRESENT_MASK)
 202                 return curr_pte & PHYS_ADDR_MASK;
 203         else
 204                 return ULLONG_MAX;
 205 }
 206
 207 static inline u64 get_alloc_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte,
 208                                                 bool *is_new_hop)
 209 {
 210         u64 hop_addr = get_next_hop_addr(ctx, curr_pte);
 211
 212         if (hop_addr == ULLONG_MAX) {
 213                 hop_addr = alloc_hop(ctx);
 214                 *is_new_hop = (hop_addr != ULLONG_MAX);
 215         }
 216
 217         return hop_addr;
 218 }
 219
 220 /* translates shadow address inside hop to a physical address */
 221 static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr)
 222 {
 223         u64 page_mask = (ctx->hdev->asic_prop.mmu_hop_table_size - 1);
 224         u64 shadow_hop_addr = shadow_addr & ~page_mask;
 225         u64 pte_offset = shadow_addr & page_mask;
 226         u64 phys_hop_addr;
 227
 228         if (shadow_hop_addr != get_hop0_addr(ctx))
 229                 phys_hop_addr = get_pgt_info(ctx, shadow_hop_addr)->phys_addr;
 230         else
 231                 phys_hop_addr = get_phys_hop0_addr(ctx);
 232
 233         return phys_hop_addr + pte_offset;
 234 }
 235
 236 static int dram_default_mapping_init(struct hl_ctx *ctx)
 237 {
 238         struct hl_device *hdev = ctx->hdev;
 239         struct asic_fixed_properties *prop = &hdev->asic_prop;
 240         u64 num_of_hop3, total_hops, hop0_addr, hop1_addr, hop2_addr,
 241                 hop2_pte_addr, hop3_pte_addr, pte_val;
 242         int rc, i, j, hop3_allocated = 0;
 243
 244         if ((!hdev->dram_supports_virtual_memory) ||
 245                         (!hdev->dram_default_page_mapping) ||
 246                         (ctx->asid == HL_KERNEL_ASID_ID))
 247                 return 0;
 248
 249         num_of_hop3 = prop->dram_size_for_default_page_mapping;
 250         do_div(num_of_hop3, prop->dram_page_size);
 251         do_div(num_of_hop3, PTE_ENTRIES_IN_HOP);
 252
 253         /* add hop1 and hop2 */
 254         total_hops = num_of_hop3 + 2;
 255
 256         ctx->dram_default_hops = kzalloc(HL_PTE_SIZE * total_hops,  GFP_KERNEL);
 257         if (!ctx->dram_default_hops)
 258                 return -ENOMEM;
 259
 260         hop0_addr = get_hop0_addr(ctx);
 261
 262         hop1_addr = alloc_hop(ctx);
 263         if (hop1_addr == ULLONG_MAX) {
 264                 dev_err(hdev->dev, "failed to alloc hop 1\n");
 265                 rc = -ENOMEM;
 266                 goto hop1_err;
 267         }
 268
 269         ctx->dram_default_hops[total_hops - 1] = hop1_addr;
 270
 271         hop2_addr = alloc_hop(ctx);
 272         if (hop2_addr == ULLONG_MAX) {
 273                 dev_err(hdev->dev, "failed to alloc hop 2\n");
 274                 rc = -ENOMEM;
 275                 goto hop2_err;
 276         }
 277
 278         ctx->dram_default_hops[total_hops - 2] = hop2_addr;
 279
 280         for (i = 0 ; i < num_of_hop3 ; i++) {
 281                 ctx->dram_default_hops[i] = alloc_hop(ctx);
 282                 if (ctx->dram_default_hops[i] == ULLONG_MAX) {
 283                         dev_err(hdev->dev, "failed to alloc hop 3, i: %d\n", i);
 284                         rc = -ENOMEM;
 285                         goto hop3_err;
 286                 }
 287                 hop3_allocated++;
 288         }
 289
 290         /* need only pte 0 in hops 0 and 1 */
 291         pte_val = (hop1_addr & PTE_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
 292         write_pte(ctx, hop0_addr, pte_val);
 293
 294         pte_val = (hop2_addr & PTE_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
 295         write_pte(ctx, hop1_addr, pte_val);
 296         get_pte(ctx, hop1_addr);
 297
 298         hop2_pte_addr = hop2_addr;
 299         for (i = 0 ; i < num_of_hop3 ; i++) {
 300                 pte_val = (ctx->dram_default_hops[i] & PTE_PHYS_ADDR_MASK) |
 301                                 PAGE_PRESENT_MASK;
 302                 write_pte(ctx, hop2_pte_addr, pte_val);
 303                 get_pte(ctx, hop2_addr);
 304                 hop2_pte_addr += HL_PTE_SIZE;
 305         }
 306
 307         pte_val = (prop->mmu_dram_default_page_addr & PTE_PHYS_ADDR_MASK) |
 308                         LAST_MASK | PAGE_PRESENT_MASK;
 309
 310         for (i = 0 ; i < num_of_hop3 ; i++) {
 311                 hop3_pte_addr = ctx->dram_default_hops[i];
 312                 for (j = 0 ; j < PTE_ENTRIES_IN_HOP ; j++) {
 313                         write_final_pte(ctx, hop3_pte_addr, pte_val);
 314                         get_pte(ctx, ctx->dram_default_hops[i]);
 315                         hop3_pte_addr += HL_PTE_SIZE;
 316                 }
 317         }
 318
 319         flush(ctx);
 320
 321         return 0;
 322
 323 hop3_err:
 324         for (i = 0 ; i < hop3_allocated ; i++)
 325                 free_hop(ctx, ctx->dram_default_hops[i]);
 326
 327         free_hop(ctx, hop2_addr);
 328 hop2_err:
 329         free_hop(ctx, hop1_addr);
 330 hop1_err:
 331         kfree(ctx->dram_default_hops);
 332
 333         return rc;
 334 }
 335
 336 static void dram_default_mapping_fini(struct hl_ctx *ctx)
 337 {
 338         struct hl_device *hdev = ctx->hdev;
 339         struct asic_fixed_properties *prop = &hdev->asic_prop;
 340         u64 num_of_hop3, total_hops, hop0_addr, hop1_addr, hop2_addr,
 341                 hop2_pte_addr, hop3_pte_addr;
 342         int i, j;
 343
 344         if ((!hdev->dram_supports_virtual_memory) ||
 345                         (!hdev->dram_default_page_mapping) ||
 346                         (ctx->asid == HL_KERNEL_ASID_ID))
 347                 return;
 348
 349         num_of_hop3 = prop->dram_size_for_default_page_mapping;
 350         do_div(num_of_hop3, prop->dram_page_size);
 351         do_div(num_of_hop3, PTE_ENTRIES_IN_HOP);
 352
 353         hop0_addr = get_hop0_addr(ctx);
 354         /* add hop1 and hop2 */
 355         total_hops = num_of_hop3 + 2;
 356         hop1_addr = ctx->dram_default_hops[total_hops - 1];
 357         hop2_addr = ctx->dram_default_hops[total_hops - 2];
 358
 359         for (i = 0 ; i < num_of_hop3 ; i++) {
 360                 hop3_pte_addr = ctx->dram_default_hops[i];
 361                 for (j = 0 ; j < PTE_ENTRIES_IN_HOP ; j++) {
 362                         clear_pte(ctx, hop3_pte_addr);
 363                         put_pte(ctx, ctx->dram_default_hops[i]);
 364                         hop3_pte_addr += HL_PTE_SIZE;
 365                 }
 366         }
 367
 368         hop2_pte_addr = hop2_addr;
 369         hop2_pte_addr = hop2_addr;
 370         for (i = 0 ; i < num_of_hop3 ; i++) {
 371                 clear_pte(ctx, hop2_pte_addr);
 372                 put_pte(ctx, hop2_addr);
 373                 hop2_pte_addr += HL_PTE_SIZE;
 374         }
 375
 376         clear_pte(ctx, hop1_addr);
 377         put_pte(ctx, hop1_addr);
 378         clear_pte(ctx, hop0_addr);
 379
 380         kfree(ctx->dram_default_hops);
 381
 382         flush(ctx);
 383 }
 384
 385 /**
 386  * hl_mmu_init() - initialize the MMU module.
 387  * @hdev: habanalabs device structure.
 388  *
 389  * This function does the following:
 390  * - Create a pool of pages for pgt_infos.
 391  * - Create a shadow table for pgt
 392  *
 393  * Return: 0 for success, non-zero for failure.
 394  */
 395 int hl_mmu_init(struct hl_device *hdev)
 396 {
 397         struct asic_fixed_properties *prop = &hdev->asic_prop;
 398         int rc;
 399
 400         if (!hdev->mmu_enable)
 401                 return 0;
 402
 403         /* MMU H/W init was already done in device hw_init() */
 404
 405         hdev->mmu_pgt_pool =
 406                         gen_pool_create(__ffs(prop->mmu_hop_table_size), -1);
 407
 408         if (!hdev->mmu_pgt_pool) {
 409                 dev_err(hdev->dev, "Failed to create page gen pool\n");
 410                 return -ENOMEM;
 411         }
 412
 413         rc = gen_pool_add(hdev->mmu_pgt_pool, prop->mmu_pgt_addr +
 414                         prop->mmu_hop0_tables_total_size,
 415                         prop->mmu_pgt_size - prop->mmu_hop0_tables_total_size,
 416                         -1);
 417         if (rc) {
 418                 dev_err(hdev->dev, "Failed to add memory to page gen pool\n");
 419                 goto err_pool_add;
 420         }
 421
 422         hdev->mmu_shadow_hop0 = kvmalloc_array(prop->max_asid,
 423                                         prop->mmu_hop_table_size,
 424                                         GFP_KERNEL | __GFP_ZERO);
 425         if (!hdev->mmu_shadow_hop0) {
 426                 rc = -ENOMEM;
 427                 goto err_pool_add;
 428         }
 429
 430         return 0;
 431
 432 err_pool_add:
 433         gen_pool_destroy(hdev->mmu_pgt_pool);
 434
 435         return rc;
 436 }
 437
 438 /**
 439  * hl_mmu_fini() - release the MMU module.
 440  * @hdev: habanalabs device structure.
 441  *
 442  * This function does the following:
 443  * - Disable MMU in H/W.
 444  * - Free the pgt_infos pool.
 445  *
 446  * All contexts should be freed before calling this function.
 447  */
 448 void hl_mmu_fini(struct hl_device *hdev)
 449 {
 450         if (!hdev->mmu_enable)
 451                 return;
 452
 453         kvfree(hdev->mmu_shadow_hop0);
 454         gen_pool_destroy(hdev->mmu_pgt_pool);
 455
 456         /* MMU H/W fini will be done in device hw_fini() */
 457 }
 458
 459 /**
 460  * hl_mmu_ctx_init() - initialize a context for using the MMU module.
 461  * @ctx: pointer to the context structure to initialize.
 462  *
 463  * Initialize a mutex to protect the concurrent mapping flow, a hash to hold all
 464  * page tables hops related to this context.
 465  * Return: 0 on success, non-zero otherwise.
 466  */
 467 int hl_mmu_ctx_init(struct hl_ctx *ctx)
 468 {
 469         struct hl_device *hdev = ctx->hdev;
 470
 471         if (!hdev->mmu_enable)
 472                 return 0;
 473
 474         mutex_init(&ctx->mmu_lock);
 475         hash_init(ctx->mmu_phys_hash);
 476         hash_init(ctx->mmu_shadow_hash);
 477
 478         return dram_default_mapping_init(ctx);
 479 }
 480
 481 /*
 482  * hl_mmu_ctx_fini - disable a ctx from using the mmu module
 483  *
 484  * @ctx: pointer to the context structure
 485  *
 486  * This function does the following:
 487  * - Free any pgts which were not freed yet
 488  * - Free the mutex
 489  * - Free DRAM default page mapping hops
 490  */
 491 void hl_mmu_ctx_fini(struct hl_ctx *ctx)
 492 {
 493         struct hl_device *hdev = ctx->hdev;
 494         struct pgt_info *pgt_info;
 495         struct hlist_node *tmp;
 496         int i;
 497
 498         if (!hdev->mmu_enable)
 499                 return;
 500
 501         dram_default_mapping_fini(ctx);
 502
 503         if (!hash_empty(ctx->mmu_shadow_hash))
 504                 dev_err(hdev->dev, "ctx is freed while it has pgts in use\n");
 505
 506         hash_for_each_safe(ctx->mmu_shadow_hash, i, tmp, pgt_info, node) {
 507                 dev_err(hdev->dev,
 508                         "pgt_info of addr 0x%llx of asid %d was not destroyed, num_ptes: %d\n",
 509                         pgt_info->phys_addr, ctx->asid, pgt_info->num_of_ptes);
 510                 free_hop(ctx, pgt_info->shadow_addr);
 511         }
 512
 513         mutex_destroy(&ctx->mmu_lock);
 514 }
 515
 516 static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr)
 517 {
 518         struct hl_device *hdev = ctx->hdev;
 519         struct asic_fixed_properties *prop = &hdev->asic_prop;
 520         u64 hop0_addr = 0, hop0_pte_addr = 0,
 521                 hop1_addr = 0, hop1_pte_addr = 0,
 522                 hop2_addr = 0, hop2_pte_addr = 0,
 523                 hop3_addr = 0, hop3_pte_addr = 0,
 524                 hop4_addr = 0, hop4_pte_addr = 0,
 525                 curr_pte;
 526         bool is_dram_addr, is_huge, clear_hop3 = true;
 527
 528         is_dram_addr = hl_mem_area_inside_range(virt_addr, PAGE_SIZE_2MB,
 529                                 prop->va_space_dram_start_address,
 530                                 prop->va_space_dram_end_address);
 531
 532         hop0_addr = get_hop0_addr(ctx);
 533         hop0_pte_addr = get_hop0_pte_addr(ctx, hop0_addr, virt_addr);
 534
 535         curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr;
 536
 537         hop1_addr = get_next_hop_addr(ctx, curr_pte);
 538
 539         if (hop1_addr == ULLONG_MAX)
 540                 goto not_mapped;
 541
 542         hop1_pte_addr = get_hop1_pte_addr(ctx, hop1_addr, virt_addr);
 543
 544         curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr;
 545
 546         hop2_addr = get_next_hop_addr(ctx, curr_pte);
 547
 548         if (hop2_addr == ULLONG_MAX)
 549                 goto not_mapped;
 550
 551         hop2_pte_addr = get_hop2_pte_addr(ctx, hop2_addr, virt_addr);
 552
 553         curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr;
 554
 555         hop3_addr = get_next_hop_addr(ctx, curr_pte);
 556
 557         if (hop3_addr == ULLONG_MAX)
 558                 goto not_mapped;
 559
 560         hop3_pte_addr = get_hop3_pte_addr(ctx, hop3_addr, virt_addr);
 561
 562         curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr;
 563
 564         is_huge = curr_pte & LAST_MASK;
 565
 566         if (is_dram_addr && !is_huge) {
 567                 dev_err(hdev->dev,
 568                                 "DRAM unmapping should use huge pages only\n");
 569                 return -EFAULT;
 570         }
 571
 572         if (!is_huge) {
 573                 hop4_addr = get_next_hop_addr(ctx, curr_pte);
 574
 575                 if (hop4_addr == ULLONG_MAX)
 576                         goto not_mapped;
 577
 578                 hop4_pte_addr = get_hop4_pte_addr(ctx, hop4_addr, virt_addr);
 579
 580                 curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr;
 581
 582                 clear_hop3 = false;
 583         }
 584
 585         if (hdev->dram_default_page_mapping && is_dram_addr) {
 586                 u64 default_pte = (prop->mmu_dram_default_page_addr &
 587                                 PTE_PHYS_ADDR_MASK) | LAST_MASK |
 588                                         PAGE_PRESENT_MASK;
 589                 if (curr_pte == default_pte) {
 590                         dev_err(hdev->dev,
 591                                 "DRAM: hop3 PTE points to zero page, can't unmap, va: 0x%llx\n",
 592                                         virt_addr);
 593                         goto not_mapped;
 594                 }
 595
 596                 if (!(curr_pte & PAGE_PRESENT_MASK)) {
 597                         dev_err(hdev->dev,
 598                                 "DRAM: hop3 PTE is cleared! can't unmap, va: 0x%llx\n",
 599                                         virt_addr);
 600                         goto not_mapped;
 601                 }
 602
 603                 write_final_pte(ctx, hop3_pte_addr, default_pte);
 604                 put_pte(ctx, hop3_addr);
 605         } else {
 606                 if (!(curr_pte & PAGE_PRESENT_MASK))
 607                         goto not_mapped;
 608
 609                 if (hop4_addr)
 610                         clear_pte(ctx, hop4_pte_addr);
 611                 else
 612                         clear_pte(ctx, hop3_pte_addr);
 613
 614                 if (hop4_addr && !put_pte(ctx, hop4_addr))
 615                         clear_hop3 = true;
 616
 617                 if (!clear_hop3)
 618                         goto flush;
 619
 620                 clear_pte(ctx, hop3_pte_addr);
 621
 622                 if (put_pte(ctx, hop3_addr))
 623                         goto flush;
 624
 625                 clear_pte(ctx, hop2_pte_addr);
 626
 627                 if (put_pte(ctx, hop2_addr))
 628                         goto flush;
 629
 630                 clear_pte(ctx, hop1_pte_addr);
 631
 632                 if (put_pte(ctx, hop1_addr))
 633                         goto flush;
 634
 635                 clear_pte(ctx, hop0_pte_addr);
 636         }
 637
 638 flush:
 639         flush(ctx);
 640
 641         return 0;
 642
 643 not_mapped:
 644         dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n",
 645                 virt_addr);
 646
 647         return -EINVAL;
 648 }
 649
 650 /*
 651  * hl_mmu_unmap - unmaps a virtual addr
 652  *
 653  * @ctx: pointer to the context structure
 654  * @virt_addr: virt addr to map from
 655  * @page_size: size of the page to unmap
 656  *
 657  * This function does the following:
 658  * - Check that the virt addr is mapped
 659  * - Unmap the virt addr and frees pgts if possible
 660  * - Returns 0 on success, -EINVAL if the given addr is not mapped
 661  *
 662  * Because this function changes the page tables in the device and because it
 663  * changes the MMU hash, it must be protected by a lock.
 664  * However, because it maps only a single page, the lock should be implemented
 665  * in a higher level in order to protect the entire mapping of the memory area
 666  */
 667 int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size)
 668 {
 669         struct hl_device *hdev = ctx->hdev;
 670         u64 real_virt_addr;
 671         u32 real_page_size, npages;
 672         int i, rc;
 673
 674         if (!hdev->mmu_enable)
 675                 return 0;
 676
 677         /*
 678          * The H/W handles mapping of 4KB/2MB page. Hence if the host page size
 679          * is bigger, we break it to sub-pages and unmap them separately.
 680          */
 681         if ((page_size % PAGE_SIZE_2MB) == 0) {
 682                 real_page_size = PAGE_SIZE_2MB;
 683         } else if ((page_size % PAGE_SIZE_4KB) == 0) {
 684                 real_page_size = PAGE_SIZE_4KB;
 685         } else {
 686                 dev_err(hdev->dev,
 687                         "page size of %u is not 4KB nor 2MB aligned, can't unmap\n",
 688                                 page_size);
 689
 690                 return -EFAULT;
 691         }
 692
 693         npages = page_size / real_page_size;
 694         real_virt_addr = virt_addr;
 695
 696         for (i = 0 ; i < npages ; i++) {
 697                 rc = _hl_mmu_unmap(ctx, real_virt_addr);
 698                 if (rc)
 699                         return rc;
 700
 701                 real_virt_addr += real_page_size;
 702         }
 703
 704         return 0;
 705 }
 706
 707 static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
 708                 u32 page_size)
 709 {
 710         struct hl_device *hdev = ctx->hdev;
 711         struct asic_fixed_properties *prop = &hdev->asic_prop;
 712         u64 hop0_addr = 0, hop0_pte_addr = 0,
 713                 hop1_addr = 0, hop1_pte_addr = 0,
 714                 hop2_addr = 0, hop2_pte_addr = 0,
 715                 hop3_addr = 0, hop3_pte_addr = 0,
 716                 hop4_addr = 0, hop4_pte_addr = 0,
 717                 curr_pte = 0;
 718         bool hop1_new = false, hop2_new = false, hop3_new = false,
 719                 hop4_new = false, is_huge, is_dram_addr;
 720         int rc = -ENOMEM;
 721
 722         /*
 723          * This mapping function can map a 4KB/2MB page. For 2MB page there are
 724          * only 3 hops rather than 4. Currently the DRAM allocation uses 2MB
 725          * pages only but user memory could have been allocated with one of the
 726          * two page sizes. Since this is a common code for all the three cases,
 727          * we need this hugs page check.
 728          */
 729         is_huge = page_size == PAGE_SIZE_2MB;
 730
 731         is_dram_addr = hl_mem_area_inside_range(virt_addr, page_size,
 732                                 prop->va_space_dram_start_address,
 733                                 prop->va_space_dram_end_address);
 734
 735         if (is_dram_addr && !is_huge) {
 736                 dev_err(hdev->dev, "DRAM mapping should use huge pages only\n");
 737                 return -EFAULT;
 738         }
 739
 740         hop0_addr = get_hop0_addr(ctx);
 741         hop0_pte_addr = get_hop0_pte_addr(ctx, hop0_addr, virt_addr);
 742         curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr;
 743
 744         hop1_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop1_new);
 745         if (hop1_addr == ULLONG_MAX)
 746                 goto err;
 747
 748         hop1_pte_addr = get_hop1_pte_addr(ctx, hop1_addr, virt_addr);
 749         curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr;
 750
 751         hop2_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop2_new);
 752         if (hop2_addr == ULLONG_MAX)
 753                 goto err;
 754
 755         hop2_pte_addr = get_hop2_pte_addr(ctx, hop2_addr, virt_addr);
 756         curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr;
 757
 758         hop3_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop3_new);
 759         if (hop3_addr == ULLONG_MAX)
 760                 goto err;
 761
 762         hop3_pte_addr = get_hop3_pte_addr(ctx, hop3_addr, virt_addr);
 763         curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr;
 764
 765         if (!is_huge) {
 766                 hop4_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop4_new);
 767                 if (hop4_addr == ULLONG_MAX)
 768                         goto err;
 769
 770                 hop4_pte_addr = get_hop4_pte_addr(ctx, hop4_addr, virt_addr);
 771                 curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr;
 772         }
 773
 774         if (hdev->dram_default_page_mapping && is_dram_addr) {
 775                 u64 default_pte = (prop->mmu_dram_default_page_addr &
 776                                         PTE_PHYS_ADDR_MASK) | LAST_MASK |
 777                                                 PAGE_PRESENT_MASK;
 778
 779                 if (curr_pte != default_pte) {
 780                         dev_err(hdev->dev,
 781                                 "DRAM: mapping already exists for virt_addr 0x%llx\n",
 782                                         virt_addr);
 783                         rc = -EINVAL;
 784                         goto err;
 785                 }
 786
 787                 if (hop1_new || hop2_new || hop3_new || hop4_new) {
 788                         dev_err(hdev->dev,
 789                                 "DRAM mapping should not allocate more hops\n");
 790                         rc = -EFAULT;
 791                         goto err;
 792                 }
 793         } else if (curr_pte & PAGE_PRESENT_MASK) {
 794                 dev_err(hdev->dev,
 795                         "mapping already exists for virt_addr 0x%llx\n",
 796                                 virt_addr);
 797
 798                 dev_dbg(hdev->dev, "hop0 pte: 0x%llx (0x%llx)\n",
 799                         *(u64 *) (uintptr_t) hop0_pte_addr, hop0_pte_addr);
 800                 dev_dbg(hdev->dev, "hop1 pte: 0x%llx (0x%llx)\n",
 801                         *(u64 *) (uintptr_t) hop1_pte_addr, hop1_pte_addr);
 802                 dev_dbg(hdev->dev, "hop2 pte: 0x%llx (0x%llx)\n",
 803                         *(u64 *) (uintptr_t) hop2_pte_addr, hop2_pte_addr);
 804                 dev_dbg(hdev->dev, "hop3 pte: 0x%llx (0x%llx)\n",
 805                         *(u64 *) (uintptr_t) hop3_pte_addr, hop3_pte_addr);
 806
 807                 if (!is_huge)
 808                         dev_dbg(hdev->dev, "hop4 pte: 0x%llx (0x%llx)\n",
 809                                 *(u64 *) (uintptr_t) hop4_pte_addr,
 810                                 hop4_pte_addr);
 811
 812                 rc = -EINVAL;
 813                 goto err;
 814         }
 815
 816         curr_pte = (phys_addr & PTE_PHYS_ADDR_MASK) | LAST_MASK
 817                         | PAGE_PRESENT_MASK;
 818
 819         if (is_huge)
 820                 write_final_pte(ctx, hop3_pte_addr, curr_pte);
 821         else
 822                 write_final_pte(ctx, hop4_pte_addr, curr_pte);
 823
 824         if (hop1_new) {
 825                 curr_pte =
 826                         (hop1_addr & PTE_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
 827                 write_pte(ctx, hop0_pte_addr, curr_pte);
 828         }
 829         if (hop2_new) {
 830                 curr_pte =
 831                         (hop2_addr & PTE_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
 832                 write_pte(ctx, hop1_pte_addr, curr_pte);
 833                 get_pte(ctx, hop1_addr);
 834         }
 835         if (hop3_new) {
 836                 curr_pte =
 837                         (hop3_addr & PTE_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
 838                 write_pte(ctx, hop2_pte_addr, curr_pte);
 839                 get_pte(ctx, hop2_addr);
 840         }
 841
 842         if (!is_huge) {
 843                 if (hop4_new) {
 844                         curr_pte = (hop4_addr & PTE_PHYS_ADDR_MASK) |
 845                                         PAGE_PRESENT_MASK;
 846                         write_pte(ctx, hop3_pte_addr, curr_pte);
 847                         get_pte(ctx, hop3_addr);
 848                 }
 849
 850                 get_pte(ctx, hop4_addr);
 851         } else {
 852                 get_pte(ctx, hop3_addr);
 853         }
 854
 855         flush(ctx);
 856
 857         return 0;
 858
 859 err:
 860         if (hop4_new)
 861                 free_hop(ctx, hop4_addr);
 862         if (hop3_new)
 863                 free_hop(ctx, hop3_addr);
 864         if (hop2_new)
 865                 free_hop(ctx, hop2_addr);
 866         if (hop1_new)
 867                 free_hop(ctx, hop1_addr);
 868
 869         return rc;
 870 }
 871
 872 /*
 873  * hl_mmu_map - maps a virtual addr to physical addr
 874  *
 875  * @ctx: pointer to the context structure
 876  * @virt_addr: virt addr to map from
 877  * @phys_addr: phys addr to map to
 878  * @page_size: physical page size
 879  *
 880  * This function does the following:
 881  * - Check that the virt addr is not mapped
 882  * - Allocate pgts as necessary in order to map the virt addr to the phys
 883  * - Returns 0 on success, -EINVAL if addr is already mapped, or -ENOMEM.
 884  *
 885  * Because this function changes the page tables in the device and because it
 886  * changes the MMU hash, it must be protected by a lock.
 887  * However, because it maps only a single page, the lock should be implemented
 888  * in a higher level in order to protect the entire mapping of the memory area
 889  */
 890 int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size)
 891 {
 892         struct hl_device *hdev = ctx->hdev;
 893         u64 real_virt_addr, real_phys_addr;
 894         u32 real_page_size, npages;
 895         int i, rc, mapped_cnt = 0;
 896
 897         if (!hdev->mmu_enable)
 898                 return 0;
 899
 900         /*
 901          * The H/W handles mapping of 4KB/2MB page. Hence if the host page size
 902          * is bigger, we break it to sub-pages and map them separately.
 903          */
 904         if ((page_size % PAGE_SIZE_2MB) == 0) {
 905                 real_page_size = PAGE_SIZE_2MB;
 906         } else if ((page_size % PAGE_SIZE_4KB) == 0) {
 907                 real_page_size = PAGE_SIZE_4KB;
 908         } else {
 909                 dev_err(hdev->dev,
 910                         "page size of %u is not 4KB nor 2MB aligned, can't map\n",
 911                                 page_size);
 912
 913                 return -EFAULT;
 914         }
 915
 916         WARN_ONCE((phys_addr & (real_page_size - 1)),
 917                 "Mapping 0x%llx with page size of 0x%x is erroneous! Address must be divisible by page size",
 918                 phys_addr, real_page_size);
 919
 920         npages = page_size / real_page_size;
 921         real_virt_addr = virt_addr;
 922         real_phys_addr = phys_addr;
 923
 924         for (i = 0 ; i < npages ; i++) {
 925                 rc = _hl_mmu_map(ctx, real_virt_addr, real_phys_addr,
 926                                 real_page_size);
 927                 if (rc)
 928                         goto err;
 929
 930                 real_virt_addr += real_page_size;
 931                 real_phys_addr += real_page_size;
 932                 mapped_cnt++;
 933         }
 934
 935         return 0;
 936
 937 err:
 938         real_virt_addr = virt_addr;
 939         for (i = 0 ; i < mapped_cnt ; i++) {
 940                 if (_hl_mmu_unmap(ctx, real_virt_addr))
 941                         dev_warn_ratelimited(hdev->dev,
 942                                 "failed to unmap va: 0x%llx\n", real_virt_addr);
 943
 944                 real_virt_addr += real_page_size;
 945         }
 946
 947         return rc;
 948 }
 949
 950 /*
 951  * hl_mmu_swap_out - marks all mapping of the given ctx as swapped out
 952  *
 953  * @ctx: pointer to the context structure
 954  *
 955  */
 956 void hl_mmu_swap_out(struct hl_ctx *ctx)
 957 {
 958
 959 }
 960
 961 /*
 962  * hl_mmu_swap_in - marks all mapping of the given ctx as swapped in
 963  *
 964  * @ctx: pointer to the context structure
 965  *
 966  */
 967 void hl_mmu_swap_in(struct hl_ctx *ctx)
 968 {
 969
 970 }