drivers/misc/habanalabs/mmu.c

   1 // SPDX-License-Identifier: GPL-2.0
   2
   3 /*
   4  * Copyright 2016-2019 HabanaLabs, Ltd.
   5  * All Rights Reserved.
   6  */
   7
   8 #include "habanalabs.h"
   9 #include "include/hw_ip/mmu/mmu_general.h"
  10
  11 #include <linux/genalloc.h>
  12 #include <linux/slab.h>
  13
  14 static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr);
  15
  16 static struct pgt_info *get_pgt_info(struct hl_ctx *ctx, u64 hop_addr)
  17 {
  18         struct pgt_info *pgt_info = NULL;
  19
  20         hash_for_each_possible(ctx->mmu_shadow_hash, pgt_info, node,
  21                                 (unsigned long) hop_addr)
  22                 if (hop_addr == pgt_info->shadow_addr)
  23                         break;
  24
  25         return pgt_info;
  26 }
  27
  28 static void _free_hop(struct hl_ctx *ctx, struct pgt_info *pgt_info)
  29 {
  30         struct hl_device *hdev = ctx->hdev;
  31
  32         gen_pool_free(hdev->mmu_pgt_pool, pgt_info->phys_addr,
  33                         hdev->asic_prop.mmu_hop_table_size);
  34         hash_del(&pgt_info->node);
  35         kfree((u64 *) (uintptr_t) pgt_info->shadow_addr);
  36         kfree(pgt_info);
  37 }
  38
  39 static void free_hop(struct hl_ctx *ctx, u64 hop_addr)
  40 {
  41         struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr);
  42
  43         _free_hop(ctx, pgt_info);
  44 }
  45
  46 static u64 alloc_hop(struct hl_ctx *ctx)
  47 {
  48         struct hl_device *hdev = ctx->hdev;
  49         struct asic_fixed_properties *prop = &hdev->asic_prop;
  50         struct pgt_info *pgt_info;
  51         u64 phys_addr, shadow_addr;
  52
  53         pgt_info = kmalloc(sizeof(*pgt_info), GFP_KERNEL);
  54         if (!pgt_info)
  55                 return ULLONG_MAX;
  56
  57         phys_addr = (u64) gen_pool_alloc(hdev->mmu_pgt_pool,
  58                                         prop->mmu_hop_table_size);
  59         if (!phys_addr) {
  60                 dev_err(hdev->dev, "failed to allocate page\n");
  61                 goto pool_add_err;
  62         }
  63
  64         shadow_addr = (u64) (uintptr_t) kzalloc(prop->mmu_hop_table_size,
  65                                                 GFP_KERNEL);
  66         if (!shadow_addr)
  67                 goto shadow_err;
  68
  69         pgt_info->phys_addr = phys_addr;
  70         pgt_info->shadow_addr = shadow_addr;
  71         pgt_info->ctx = ctx;
  72         pgt_info->num_of_ptes = 0;
  73         hash_add(ctx->mmu_shadow_hash, &pgt_info->node, shadow_addr);
  74
  75         return shadow_addr;
  76
  77 shadow_err:
  78         gen_pool_free(hdev->mmu_pgt_pool, phys_addr, prop->mmu_hop_table_size);
  79 pool_add_err:
  80         kfree(pgt_info);
  81
  82         return ULLONG_MAX;
  83 }
  84
  85 static inline u64 get_phys_hop0_addr(struct hl_ctx *ctx)
  86 {
  87         return ctx->hdev->asic_prop.mmu_pgt_addr +
  88                         (ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size);
  89 }
  90
  91 static inline u64 get_hop0_addr(struct hl_ctx *ctx)
  92 {
  93         return (u64) (uintptr_t) ctx->hdev->mmu_shadow_hop0 +
  94                         (ctx->asid * ctx->hdev->asic_prop.mmu_hop_table_size);
  95 }
  96
  97 static inline void flush(struct hl_ctx *ctx)
  98 {
  99         /* flush all writes from all cores to reach PCI */
 100         mb();
 101         ctx->hdev->asic_funcs->read_pte(ctx->hdev, get_phys_hop0_addr(ctx));
 102 }
 103
 104 /* transform the value to physical address when writing to H/W */
 105 static inline void write_pte(struct hl_ctx *ctx, u64 shadow_pte_addr, u64 val)
 106 {
 107         /*
 108          * The value to write is actually the address of the next shadow hop +
 109          * flags at the 12 LSBs.
 110          * Hence in order to get the value to write to the physical PTE, we
 111          * clear the 12 LSBs and translate the shadow hop to its associated
 112          * physical hop, and add back the original 12 LSBs.
 113          */
 114         u64 phys_val = get_phys_addr(ctx, val & HOP_PHYS_ADDR_MASK) |
 115                                 (val & FLAGS_MASK);
 116
 117         ctx->hdev->asic_funcs->write_pte(ctx->hdev,
 118                                         get_phys_addr(ctx, shadow_pte_addr),
 119                                         phys_val);
 120
 121         *(u64 *) (uintptr_t) shadow_pte_addr = val;
 122 }
 123
 124 /* do not transform the value to physical address when writing to H/W */
 125 static inline void write_final_pte(struct hl_ctx *ctx, u64 shadow_pte_addr,
 126                                         u64 val)
 127 {
 128         ctx->hdev->asic_funcs->write_pte(ctx->hdev,
 129                                         get_phys_addr(ctx, shadow_pte_addr),
 130                                         val);
 131         *(u64 *) (uintptr_t) shadow_pte_addr = val;
 132 }
 133
 134 /* clear the last and present bits */
 135 static inline void clear_pte(struct hl_ctx *ctx, u64 pte_addr)
 136 {
 137         /* no need to transform the value to physical address */
 138         write_final_pte(ctx, pte_addr, 0);
 139 }
 140
 141 static inline void get_pte(struct hl_ctx *ctx, u64 hop_addr)
 142 {
 143         get_pgt_info(ctx, hop_addr)->num_of_ptes++;
 144 }
 145
 146 /*
 147  * put_pte - decrement the num of ptes and free the hop if possible
 148  *
 149  * @ctx: pointer to the context structure
 150  * @hop_addr: addr of the hop
 151  *
 152  * This function returns the number of ptes left on this hop. If the number is
 153  * 0, it means the pte was freed.
 154  */
 155 static inline int put_pte(struct hl_ctx *ctx, u64 hop_addr)
 156 {
 157         struct pgt_info *pgt_info = get_pgt_info(ctx, hop_addr);
 158         int num_of_ptes_left;
 159
 160         pgt_info->num_of_ptes--;
 161
 162         /*
 163          * Need to save the number of ptes left because free_hop might free
 164          * the pgt_info
 165          */
 166         num_of_ptes_left = pgt_info->num_of_ptes;
 167         if (!num_of_ptes_left)
 168                 _free_hop(ctx, pgt_info);
 169
 170         return num_of_ptes_left;
 171 }
 172
 173 static inline u64 get_hopN_pte_addr(struct hl_ctx *ctx, u64 hop_addr,
 174                                         u64 virt_addr, u64 mask, u64 shift)
 175 {
 176         return hop_addr + ctx->hdev->asic_prop.mmu_pte_size *
 177                         ((virt_addr & mask) >> shift);
 178 }
 179
 180 static inline u64 get_hop0_pte_addr(struct hl_ctx *ctx,
 181                                         struct hl_mmu_properties *mmu_prop,
 182                                         u64 hop_addr, u64 vaddr)
 183 {
 184         return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop0_mask,
 185                                         mmu_prop->hop0_shift);
 186 }
 187
 188 static inline u64 get_hop1_pte_addr(struct hl_ctx *ctx,
 189                                         struct hl_mmu_properties *mmu_prop,
 190                                         u64 hop_addr, u64 vaddr)
 191 {
 192         return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop1_mask,
 193                                         mmu_prop->hop1_shift);
 194 }
 195
 196 static inline u64 get_hop2_pte_addr(struct hl_ctx *ctx,
 197                                         struct hl_mmu_properties *mmu_prop,
 198                                         u64 hop_addr, u64 vaddr)
 199 {
 200         return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop2_mask,
 201                                         mmu_prop->hop2_shift);
 202 }
 203
 204 static inline u64 get_hop3_pte_addr(struct hl_ctx *ctx,
 205                                         struct hl_mmu_properties *mmu_prop,
 206                                         u64 hop_addr, u64 vaddr)
 207 {
 208         return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop3_mask,
 209                                         mmu_prop->hop3_shift);
 210 }
 211
 212 static inline u64 get_hop4_pte_addr(struct hl_ctx *ctx,
 213                                         struct hl_mmu_properties *mmu_prop,
 214                                         u64 hop_addr, u64 vaddr)
 215 {
 216         return get_hopN_pte_addr(ctx, hop_addr, vaddr, mmu_prop->hop4_mask,
 217                                         mmu_prop->hop4_shift);
 218 }
 219
 220 static inline u64 get_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte)
 221 {
 222         if (curr_pte & PAGE_PRESENT_MASK)
 223                 return curr_pte & HOP_PHYS_ADDR_MASK;
 224         else
 225                 return ULLONG_MAX;
 226 }
 227
 228 static inline u64 get_alloc_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte,
 229                                                 bool *is_new_hop)
 230 {
 231         u64 hop_addr = get_next_hop_addr(ctx, curr_pte);
 232
 233         if (hop_addr == ULLONG_MAX) {
 234                 hop_addr = alloc_hop(ctx);
 235                 *is_new_hop = (hop_addr != ULLONG_MAX);
 236         }
 237
 238         return hop_addr;
 239 }
 240
 241 /* translates shadow address inside hop to a physical address */
 242 static inline u64 get_phys_addr(struct hl_ctx *ctx, u64 shadow_addr)
 243 {
 244         u64 page_mask = (ctx->hdev->asic_prop.mmu_hop_table_size - 1);
 245         u64 shadow_hop_addr = shadow_addr & ~page_mask;
 246         u64 pte_offset = shadow_addr & page_mask;
 247         u64 phys_hop_addr;
 248
 249         if (shadow_hop_addr != get_hop0_addr(ctx))
 250                 phys_hop_addr = get_pgt_info(ctx, shadow_hop_addr)->phys_addr;
 251         else
 252                 phys_hop_addr = get_phys_hop0_addr(ctx);
 253
 254         return phys_hop_addr + pte_offset;
 255 }
 256
 257 static int dram_default_mapping_init(struct hl_ctx *ctx)
 258 {
 259         struct hl_device *hdev = ctx->hdev;
 260         struct asic_fixed_properties *prop = &hdev->asic_prop;
 261         u64 num_of_hop3, total_hops, hop0_addr, hop1_addr, hop2_addr,
 262                 hop2_pte_addr, hop3_pte_addr, pte_val;
 263         int rc, i, j, hop3_allocated = 0;
 264
 265         if ((!hdev->dram_supports_virtual_memory) ||
 266                         (!hdev->dram_default_page_mapping) ||
 267                         (ctx->asid == HL_KERNEL_ASID_ID))
 268                 return 0;
 269
 270         num_of_hop3 = prop->dram_size_for_default_page_mapping;
 271         do_div(num_of_hop3, prop->dram_page_size);
 272         do_div(num_of_hop3, PTE_ENTRIES_IN_HOP);
 273
 274         /* add hop1 and hop2 */
 275         total_hops = num_of_hop3 + 2;
 276
 277         ctx->dram_default_hops = kzalloc(HL_PTE_SIZE * total_hops,  GFP_KERNEL);
 278         if (!ctx->dram_default_hops)
 279                 return -ENOMEM;
 280
 281         hop0_addr = get_hop0_addr(ctx);
 282
 283         hop1_addr = alloc_hop(ctx);
 284         if (hop1_addr == ULLONG_MAX) {
 285                 dev_err(hdev->dev, "failed to alloc hop 1\n");
 286                 rc = -ENOMEM;
 287                 goto hop1_err;
 288         }
 289
 290         ctx->dram_default_hops[total_hops - 1] = hop1_addr;
 291
 292         hop2_addr = alloc_hop(ctx);
 293         if (hop2_addr == ULLONG_MAX) {
 294                 dev_err(hdev->dev, "failed to alloc hop 2\n");
 295                 rc = -ENOMEM;
 296                 goto hop2_err;
 297         }
 298
 299         ctx->dram_default_hops[total_hops - 2] = hop2_addr;
 300
 301         for (i = 0 ; i < num_of_hop3 ; i++) {
 302                 ctx->dram_default_hops[i] = alloc_hop(ctx);
 303                 if (ctx->dram_default_hops[i] == ULLONG_MAX) {
 304                         dev_err(hdev->dev, "failed to alloc hop 3, i: %d\n", i);
 305                         rc = -ENOMEM;
 306                         goto hop3_err;
 307                 }
 308                 hop3_allocated++;
 309         }
 310
 311         /* need only pte 0 in hops 0 and 1 */
 312         pte_val = (hop1_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
 313         write_pte(ctx, hop0_addr, pte_val);
 314
 315         pte_val = (hop2_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
 316         write_pte(ctx, hop1_addr, pte_val);
 317         get_pte(ctx, hop1_addr);
 318
 319         hop2_pte_addr = hop2_addr;
 320         for (i = 0 ; i < num_of_hop3 ; i++) {
 321                 pte_val = (ctx->dram_default_hops[i] & HOP_PHYS_ADDR_MASK) |
 322                                 PAGE_PRESENT_MASK;
 323                 write_pte(ctx, hop2_pte_addr, pte_val);
 324                 get_pte(ctx, hop2_addr);
 325                 hop2_pte_addr += HL_PTE_SIZE;
 326         }
 327
 328         pte_val = (prop->mmu_dram_default_page_addr & HOP_PHYS_ADDR_MASK) |
 329                         LAST_MASK | PAGE_PRESENT_MASK;
 330
 331         for (i = 0 ; i < num_of_hop3 ; i++) {
 332                 hop3_pte_addr = ctx->dram_default_hops[i];
 333                 for (j = 0 ; j < PTE_ENTRIES_IN_HOP ; j++) {
 334                         write_final_pte(ctx, hop3_pte_addr, pte_val);
 335                         get_pte(ctx, ctx->dram_default_hops[i]);
 336                         hop3_pte_addr += HL_PTE_SIZE;
 337                 }
 338         }
 339
 340         flush(ctx);
 341
 342         return 0;
 343
 344 hop3_err:
 345         for (i = 0 ; i < hop3_allocated ; i++)
 346                 free_hop(ctx, ctx->dram_default_hops[i]);
 347
 348         free_hop(ctx, hop2_addr);
 349 hop2_err:
 350         free_hop(ctx, hop1_addr);
 351 hop1_err:
 352         kfree(ctx->dram_default_hops);
 353
 354         return rc;
 355 }
 356
 357 static void dram_default_mapping_fini(struct hl_ctx *ctx)
 358 {
 359         struct hl_device *hdev = ctx->hdev;
 360         struct asic_fixed_properties *prop = &hdev->asic_prop;
 361         u64 num_of_hop3, total_hops, hop0_addr, hop1_addr, hop2_addr,
 362                 hop2_pte_addr, hop3_pte_addr;
 363         int i, j;
 364
 365         if ((!hdev->dram_supports_virtual_memory) ||
 366                         (!hdev->dram_default_page_mapping) ||
 367                         (ctx->asid == HL_KERNEL_ASID_ID))
 368                 return;
 369
 370         num_of_hop3 = prop->dram_size_for_default_page_mapping;
 371         do_div(num_of_hop3, prop->dram_page_size);
 372         do_div(num_of_hop3, PTE_ENTRIES_IN_HOP);
 373
 374         hop0_addr = get_hop0_addr(ctx);
 375         /* add hop1 and hop2 */
 376         total_hops = num_of_hop3 + 2;
 377         hop1_addr = ctx->dram_default_hops[total_hops - 1];
 378         hop2_addr = ctx->dram_default_hops[total_hops - 2];
 379
 380         for (i = 0 ; i < num_of_hop3 ; i++) {
 381                 hop3_pte_addr = ctx->dram_default_hops[i];
 382                 for (j = 0 ; j < PTE_ENTRIES_IN_HOP ; j++) {
 383                         clear_pte(ctx, hop3_pte_addr);
 384                         put_pte(ctx, ctx->dram_default_hops[i]);
 385                         hop3_pte_addr += HL_PTE_SIZE;
 386                 }
 387         }
 388
 389         hop2_pte_addr = hop2_addr;
 390         hop2_pte_addr = hop2_addr;
 391         for (i = 0 ; i < num_of_hop3 ; i++) {
 392                 clear_pte(ctx, hop2_pte_addr);
 393                 put_pte(ctx, hop2_addr);
 394                 hop2_pte_addr += HL_PTE_SIZE;
 395         }
 396
 397         clear_pte(ctx, hop1_addr);
 398         put_pte(ctx, hop1_addr);
 399         clear_pte(ctx, hop0_addr);
 400
 401         kfree(ctx->dram_default_hops);
 402
 403         flush(ctx);
 404 }
 405
 406 /**
 407  * hl_mmu_init() - initialize the MMU module.
 408  * @hdev: habanalabs device structure.
 409  *
 410  * This function does the following:
 411  * - Create a pool of pages for pgt_infos.
 412  * - Create a shadow table for pgt
 413  *
 414  * Return: 0 for success, non-zero for failure.
 415  */
 416 int hl_mmu_init(struct hl_device *hdev)
 417 {
 418         struct asic_fixed_properties *prop = &hdev->asic_prop;
 419         int rc;
 420
 421         if (!hdev->mmu_enable)
 422                 return 0;
 423
 424         hdev->mmu_pgt_pool =
 425                         gen_pool_create(__ffs(prop->mmu_hop_table_size), -1);
 426
 427         if (!hdev->mmu_pgt_pool) {
 428                 dev_err(hdev->dev, "Failed to create page gen pool\n");
 429                 return -ENOMEM;
 430         }
 431
 432         rc = gen_pool_add(hdev->mmu_pgt_pool, prop->mmu_pgt_addr +
 433                         prop->mmu_hop0_tables_total_size,
 434                         prop->mmu_pgt_size - prop->mmu_hop0_tables_total_size,
 435                         -1);
 436         if (rc) {
 437                 dev_err(hdev->dev, "Failed to add memory to page gen pool\n");
 438                 goto err_pool_add;
 439         }
 440
 441         hdev->mmu_shadow_hop0 = kvmalloc_array(prop->max_asid,
 442                                         prop->mmu_hop_table_size,
 443                                         GFP_KERNEL | __GFP_ZERO);
 444         if (!hdev->mmu_shadow_hop0) {
 445                 rc = -ENOMEM;
 446                 goto err_pool_add;
 447         }
 448
 449         /* MMU H/W init will be done in device hw_init() */
 450
 451         return 0;
 452
 453 err_pool_add:
 454         gen_pool_destroy(hdev->mmu_pgt_pool);
 455
 456         return rc;
 457 }
 458
 459 /**
 460  * hl_mmu_fini() - release the MMU module.
 461  * @hdev: habanalabs device structure.
 462  *
 463  * This function does the following:
 464  * - Disable MMU in H/W.
 465  * - Free the pgt_infos pool.
 466  *
 467  * All contexts should be freed before calling this function.
 468  */
 469 void hl_mmu_fini(struct hl_device *hdev)
 470 {
 471         if (!hdev->mmu_enable)
 472                 return;
 473
 474         /* MMU H/W fini was already done in device hw_fini() */
 475
 476         kvfree(hdev->mmu_shadow_hop0);
 477         gen_pool_destroy(hdev->mmu_pgt_pool);
 478 }
 479
 480 /**
 481  * hl_mmu_ctx_init() - initialize a context for using the MMU module.
 482  * @ctx: pointer to the context structure to initialize.
 483  *
 484  * Initialize a mutex to protect the concurrent mapping flow, a hash to hold all
 485  * page tables hops related to this context.
 486  * Return: 0 on success, non-zero otherwise.
 487  */
 488 int hl_mmu_ctx_init(struct hl_ctx *ctx)
 489 {
 490         struct hl_device *hdev = ctx->hdev;
 491
 492         if (!hdev->mmu_enable)
 493                 return 0;
 494
 495         mutex_init(&ctx->mmu_lock);
 496         hash_init(ctx->mmu_phys_hash);
 497         hash_init(ctx->mmu_shadow_hash);
 498
 499         return dram_default_mapping_init(ctx);
 500 }
 501
 502 /*
 503  * hl_mmu_ctx_fini - disable a ctx from using the mmu module
 504  *
 505  * @ctx: pointer to the context structure
 506  *
 507  * This function does the following:
 508  * - Free any pgts which were not freed yet
 509  * - Free the mutex
 510  * - Free DRAM default page mapping hops
 511  */
 512 void hl_mmu_ctx_fini(struct hl_ctx *ctx)
 513 {
 514         struct hl_device *hdev = ctx->hdev;
 515         struct pgt_info *pgt_info;
 516         struct hlist_node *tmp;
 517         int i;
 518
 519         if (!hdev->mmu_enable)
 520                 return;
 521
 522         dram_default_mapping_fini(ctx);
 523
 524         if (!hash_empty(ctx->mmu_shadow_hash))
 525                 dev_err(hdev->dev, "ctx %d is freed while it has pgts in use\n",
 526                         ctx->asid);
 527
 528         hash_for_each_safe(ctx->mmu_shadow_hash, i, tmp, pgt_info, node) {
 529                 dev_err_ratelimited(hdev->dev,
 530                         "pgt_info of addr 0x%llx of asid %d was not destroyed, num_ptes: %d\n",
 531                         pgt_info->phys_addr, ctx->asid, pgt_info->num_of_ptes);
 532                 _free_hop(ctx, pgt_info);
 533         }
 534
 535         mutex_destroy(&ctx->mmu_lock);
 536 }
 537
 538 static int _hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, bool is_dram_addr)
 539 {
 540         struct hl_device *hdev = ctx->hdev;
 541         struct asic_fixed_properties *prop = &hdev->asic_prop;
 542         struct hl_mmu_properties *mmu_prop;
 543         u64 hop0_addr = 0, hop0_pte_addr = 0,
 544                 hop1_addr = 0, hop1_pte_addr = 0,
 545                 hop2_addr = 0, hop2_pte_addr = 0,
 546                 hop3_addr = 0, hop3_pte_addr = 0,
 547                 hop4_addr = 0, hop4_pte_addr = 0,
 548                 curr_pte;
 549         bool is_huge, clear_hop3 = true;
 550
 551         mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
 552
 553         hop0_addr = get_hop0_addr(ctx);
 554         hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr);
 555
 556         curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr;
 557
 558         hop1_addr = get_next_hop_addr(ctx, curr_pte);
 559
 560         if (hop1_addr == ULLONG_MAX)
 561                 goto not_mapped;
 562
 563         hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr);
 564
 565         curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr;
 566
 567         hop2_addr = get_next_hop_addr(ctx, curr_pte);
 568
 569         if (hop2_addr == ULLONG_MAX)
 570                 goto not_mapped;
 571
 572         hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr);
 573
 574         curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr;
 575
 576         hop3_addr = get_next_hop_addr(ctx, curr_pte);
 577
 578         if (hop3_addr == ULLONG_MAX)
 579                 goto not_mapped;
 580
 581         hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr);
 582
 583         curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr;
 584
 585         is_huge = curr_pte & LAST_MASK;
 586
 587         if (is_dram_addr && !is_huge) {
 588                 dev_err(hdev->dev,
 589                                 "DRAM unmapping should use huge pages only\n");
 590                 return -EFAULT;
 591         }
 592
 593         if (!is_huge) {
 594                 hop4_addr = get_next_hop_addr(ctx, curr_pte);
 595
 596                 if (hop4_addr == ULLONG_MAX)
 597                         goto not_mapped;
 598
 599                 hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr,
 600                                                         virt_addr);
 601
 602                 curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr;
 603
 604                 clear_hop3 = false;
 605         }
 606
 607         if (hdev->dram_default_page_mapping && is_dram_addr) {
 608                 u64 default_pte = (prop->mmu_dram_default_page_addr &
 609                                 HOP_PHYS_ADDR_MASK) | LAST_MASK |
 610                                         PAGE_PRESENT_MASK;
 611                 if (curr_pte == default_pte) {
 612                         dev_err(hdev->dev,
 613                                 "DRAM: hop3 PTE points to zero page, can't unmap, va: 0x%llx\n",
 614                                         virt_addr);
 615                         goto not_mapped;
 616                 }
 617
 618                 if (!(curr_pte & PAGE_PRESENT_MASK)) {
 619                         dev_err(hdev->dev,
 620                                 "DRAM: hop3 PTE is cleared! can't unmap, va: 0x%llx\n",
 621                                         virt_addr);
 622                         goto not_mapped;
 623                 }
 624
 625                 write_final_pte(ctx, hop3_pte_addr, default_pte);
 626                 put_pte(ctx, hop3_addr);
 627         } else {
 628                 if (!(curr_pte & PAGE_PRESENT_MASK))
 629                         goto not_mapped;
 630
 631                 if (hop4_addr)
 632                         clear_pte(ctx, hop4_pte_addr);
 633                 else
 634                         clear_pte(ctx, hop3_pte_addr);
 635
 636                 if (hop4_addr && !put_pte(ctx, hop4_addr))
 637                         clear_hop3 = true;
 638
 639                 if (!clear_hop3)
 640                         goto flush;
 641
 642                 clear_pte(ctx, hop3_pte_addr);
 643
 644                 if (put_pte(ctx, hop3_addr))
 645                         goto flush;
 646
 647                 clear_pte(ctx, hop2_pte_addr);
 648
 649                 if (put_pte(ctx, hop2_addr))
 650                         goto flush;
 651
 652                 clear_pte(ctx, hop1_pte_addr);
 653
 654                 if (put_pte(ctx, hop1_addr))
 655                         goto flush;
 656
 657                 clear_pte(ctx, hop0_pte_addr);
 658         }
 659
 660 flush:
 661         flush(ctx);
 662
 663         return 0;
 664
 665 not_mapped:
 666         dev_err(hdev->dev, "virt addr 0x%llx is not mapped to phys addr\n",
 667                 virt_addr);
 668
 669         return -EINVAL;
 670 }
 671
 672 /*
 673  * hl_mmu_unmap - unmaps a virtual addr
 674  *
 675  * @ctx: pointer to the context structure
 676  * @virt_addr: virt addr to map from
 677  * @page_size: size of the page to unmap
 678  *
 679  * This function does the following:
 680  * - Check that the virt addr is mapped
 681  * - Unmap the virt addr and frees pgts if possible
 682  * - Returns 0 on success, -EINVAL if the given addr is not mapped
 683  *
 684  * Because this function changes the page tables in the device and because it
 685  * changes the MMU hash, it must be protected by a lock.
 686  * However, because it maps only a single page, the lock should be implemented
 687  * in a higher level in order to protect the entire mapping of the memory area
 688  */
 689 int hl_mmu_unmap(struct hl_ctx *ctx, u64 virt_addr, u32 page_size)
 690 {
 691         struct hl_device *hdev = ctx->hdev;
 692         struct asic_fixed_properties *prop = &hdev->asic_prop;
 693         struct hl_mmu_properties *mmu_prop;
 694         u64 real_virt_addr;
 695         u32 real_page_size, npages;
 696         int i, rc;
 697         bool is_dram_addr;
 698
 699         if (!hdev->mmu_enable)
 700                 return 0;
 701
 702         is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
 703                                 prop->va_space_dram_start_address,
 704                                 prop->va_space_dram_end_address);
 705
 706         mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
 707
 708         /*
 709          * The H/W handles mapping of specific page sizes. Hence if the page
 710          * size is bigger, we break it to sub-pages and unmap them separately.
 711          */
 712         if ((page_size % mmu_prop->huge_page_size) == 0) {
 713                 real_page_size = mmu_prop->huge_page_size;
 714         } else if ((page_size % mmu_prop->page_size) == 0) {
 715                 real_page_size = mmu_prop->page_size;
 716         } else {
 717                 dev_err(hdev->dev,
 718                         "page size of %u is not %uKB nor %uMB aligned, can't unmap\n",
 719                         page_size,
 720                         mmu_prop->page_size >> 10,
 721                         mmu_prop->huge_page_size >> 20);
 722
 723                 return -EFAULT;
 724         }
 725
 726         npages = page_size / real_page_size;
 727         real_virt_addr = virt_addr;
 728
 729         for (i = 0 ; i < npages ; i++) {
 730                 rc = _hl_mmu_unmap(ctx, real_virt_addr, is_dram_addr);
 731                 if (rc)
 732                         return rc;
 733
 734                 real_virt_addr += real_page_size;
 735         }
 736
 737         return 0;
 738 }
 739
 740 static int _hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
 741                         u32 page_size, bool is_dram_addr)
 742 {
 743         struct hl_device *hdev = ctx->hdev;
 744         struct asic_fixed_properties *prop = &hdev->asic_prop;
 745         struct hl_mmu_properties *mmu_prop;
 746         u64 hop0_addr = 0, hop0_pte_addr = 0,
 747                 hop1_addr = 0, hop1_pte_addr = 0,
 748                 hop2_addr = 0, hop2_pte_addr = 0,
 749                 hop3_addr = 0, hop3_pte_addr = 0,
 750                 hop4_addr = 0, hop4_pte_addr = 0,
 751                 curr_pte = 0;
 752         bool hop1_new = false, hop2_new = false, hop3_new = false,
 753                 hop4_new = false, is_huge;
 754         int rc = -ENOMEM;
 755
 756         mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
 757
 758         /*
 759          * This mapping function can map a page or a huge page. For huge page
 760          * there are only 3 hops rather than 4. Currently the DRAM allocation
 761          * uses huge pages only but user memory could have been allocated with
 762          * one of the two page sizes. Since this is a common code for all the
 763          * three cases, we need this hugs page check.
 764          */
 765         is_huge = page_size == mmu_prop->huge_page_size;
 766
 767         if (is_dram_addr && !is_huge) {
 768                 dev_err(hdev->dev, "DRAM mapping should use huge pages only\n");
 769                 return -EFAULT;
 770         }
 771
 772         hop0_addr = get_hop0_addr(ctx);
 773         hop0_pte_addr = get_hop0_pte_addr(ctx, mmu_prop, hop0_addr, virt_addr);
 774         curr_pte = *(u64 *) (uintptr_t) hop0_pte_addr;
 775
 776         hop1_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop1_new);
 777         if (hop1_addr == ULLONG_MAX)
 778                 goto err;
 779
 780         hop1_pte_addr = get_hop1_pte_addr(ctx, mmu_prop, hop1_addr, virt_addr);
 781         curr_pte = *(u64 *) (uintptr_t) hop1_pte_addr;
 782
 783         hop2_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop2_new);
 784         if (hop2_addr == ULLONG_MAX)
 785                 goto err;
 786
 787         hop2_pte_addr = get_hop2_pte_addr(ctx, mmu_prop, hop2_addr, virt_addr);
 788         curr_pte = *(u64 *) (uintptr_t) hop2_pte_addr;
 789
 790         hop3_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop3_new);
 791         if (hop3_addr == ULLONG_MAX)
 792                 goto err;
 793
 794         hop3_pte_addr = get_hop3_pte_addr(ctx, mmu_prop, hop3_addr, virt_addr);
 795         curr_pte = *(u64 *) (uintptr_t) hop3_pte_addr;
 796
 797         if (!is_huge) {
 798                 hop4_addr = get_alloc_next_hop_addr(ctx, curr_pte, &hop4_new);
 799                 if (hop4_addr == ULLONG_MAX)
 800                         goto err;
 801
 802                 hop4_pte_addr = get_hop4_pte_addr(ctx, mmu_prop, hop4_addr,
 803                                                         virt_addr);
 804                 curr_pte = *(u64 *) (uintptr_t) hop4_pte_addr;
 805         }
 806
 807         if (hdev->dram_default_page_mapping && is_dram_addr) {
 808                 u64 default_pte = (prop->mmu_dram_default_page_addr &
 809                                         HOP_PHYS_ADDR_MASK) | LAST_MASK |
 810                                                 PAGE_PRESENT_MASK;
 811
 812                 if (curr_pte != default_pte) {
 813                         dev_err(hdev->dev,
 814                                 "DRAM: mapping already exists for virt_addr 0x%llx\n",
 815                                         virt_addr);
 816                         rc = -EINVAL;
 817                         goto err;
 818                 }
 819
 820                 if (hop1_new || hop2_new || hop3_new || hop4_new) {
 821                         dev_err(hdev->dev,
 822                                 "DRAM mapping should not allocate more hops\n");
 823                         rc = -EFAULT;
 824                         goto err;
 825                 }
 826         } else if (curr_pte & PAGE_PRESENT_MASK) {
 827                 dev_err(hdev->dev,
 828                         "mapping already exists for virt_addr 0x%llx\n",
 829                                 virt_addr);
 830
 831                 dev_dbg(hdev->dev, "hop0 pte: 0x%llx (0x%llx)\n",
 832                         *(u64 *) (uintptr_t) hop0_pte_addr, hop0_pte_addr);
 833                 dev_dbg(hdev->dev, "hop1 pte: 0x%llx (0x%llx)\n",
 834                         *(u64 *) (uintptr_t) hop1_pte_addr, hop1_pte_addr);
 835                 dev_dbg(hdev->dev, "hop2 pte: 0x%llx (0x%llx)\n",
 836                         *(u64 *) (uintptr_t) hop2_pte_addr, hop2_pte_addr);
 837                 dev_dbg(hdev->dev, "hop3 pte: 0x%llx (0x%llx)\n",
 838                         *(u64 *) (uintptr_t) hop3_pte_addr, hop3_pte_addr);
 839
 840                 if (!is_huge)
 841                         dev_dbg(hdev->dev, "hop4 pte: 0x%llx (0x%llx)\n",
 842                                 *(u64 *) (uintptr_t) hop4_pte_addr,
 843                                 hop4_pte_addr);
 844
 845                 rc = -EINVAL;
 846                 goto err;
 847         }
 848
 849         curr_pte = (phys_addr & HOP_PHYS_ADDR_MASK) | LAST_MASK
 850                         | PAGE_PRESENT_MASK;
 851
 852         if (is_huge)
 853                 write_final_pte(ctx, hop3_pte_addr, curr_pte);
 854         else
 855                 write_final_pte(ctx, hop4_pte_addr, curr_pte);
 856
 857         if (hop1_new) {
 858                 curr_pte =
 859                         (hop1_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
 860                 write_pte(ctx, hop0_pte_addr, curr_pte);
 861         }
 862         if (hop2_new) {
 863                 curr_pte =
 864                         (hop2_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
 865                 write_pte(ctx, hop1_pte_addr, curr_pte);
 866                 get_pte(ctx, hop1_addr);
 867         }
 868         if (hop3_new) {
 869                 curr_pte =
 870                         (hop3_addr & HOP_PHYS_ADDR_MASK) | PAGE_PRESENT_MASK;
 871                 write_pte(ctx, hop2_pte_addr, curr_pte);
 872                 get_pte(ctx, hop2_addr);
 873         }
 874
 875         if (!is_huge) {
 876                 if (hop4_new) {
 877                         curr_pte = (hop4_addr & HOP_PHYS_ADDR_MASK) |
 878                                         PAGE_PRESENT_MASK;
 879                         write_pte(ctx, hop3_pte_addr, curr_pte);
 880                         get_pte(ctx, hop3_addr);
 881                 }
 882
 883                 get_pte(ctx, hop4_addr);
 884         } else {
 885                 get_pte(ctx, hop3_addr);
 886         }
 887
 888         flush(ctx);
 889
 890         return 0;
 891
 892 err:
 893         if (hop4_new)
 894                 free_hop(ctx, hop4_addr);
 895         if (hop3_new)
 896                 free_hop(ctx, hop3_addr);
 897         if (hop2_new)
 898                 free_hop(ctx, hop2_addr);
 899         if (hop1_new)
 900                 free_hop(ctx, hop1_addr);
 901
 902         return rc;
 903 }
 904
 905 /*
 906  * hl_mmu_map - maps a virtual addr to physical addr
 907  *
 908  * @ctx: pointer to the context structure
 909  * @virt_addr: virt addr to map from
 910  * @phys_addr: phys addr to map to
 911  * @page_size: physical page size
 912  *
 913  * This function does the following:
 914  * - Check that the virt addr is not mapped
 915  * - Allocate pgts as necessary in order to map the virt addr to the phys
 916  * - Returns 0 on success, -EINVAL if addr is already mapped, or -ENOMEM.
 917  *
 918  * Because this function changes the page tables in the device and because it
 919  * changes the MMU hash, it must be protected by a lock.
 920  * However, because it maps only a single page, the lock should be implemented
 921  * in a higher level in order to protect the entire mapping of the memory area
 922  */
 923 int hl_mmu_map(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr, u32 page_size)
 924 {
 925         struct hl_device *hdev = ctx->hdev;
 926         struct asic_fixed_properties *prop = &hdev->asic_prop;
 927         struct hl_mmu_properties *mmu_prop;
 928         u64 real_virt_addr, real_phys_addr;
 929         u32 real_page_size, npages;
 930         int i, rc, mapped_cnt = 0;
 931         bool is_dram_addr;
 932
 933         if (!hdev->mmu_enable)
 934                 return 0;
 935
 936         is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
 937                                 prop->va_space_dram_start_address,
 938                                 prop->va_space_dram_end_address);
 939
 940         mmu_prop = is_dram_addr ? &prop->dmmu : &prop->pmmu;
 941
 942         /*
 943          * The H/W handles mapping of specific page sizes. Hence if the page
 944          * size is bigger, we break it to sub-pages and map them separately.
 945          */
 946         if ((page_size % mmu_prop->huge_page_size) == 0) {
 947                 real_page_size = mmu_prop->huge_page_size;
 948         } else if ((page_size % mmu_prop->page_size) == 0) {
 949                 real_page_size = mmu_prop->page_size;
 950         } else {
 951                 dev_err(hdev->dev,
 952                         "page size of %u is not %dKB nor %dMB aligned, can't unmap\n",
 953                         page_size,
 954                         mmu_prop->page_size >> 10,
 955                         mmu_prop->huge_page_size >> 20);
 956
 957                 return -EFAULT;
 958         }
 959
 960         WARN_ONCE((phys_addr & (real_page_size - 1)),
 961                 "Mapping 0x%llx with page size of 0x%x is erroneous! Address must be divisible by page size",
 962                 phys_addr, real_page_size);
 963
 964         npages = page_size / real_page_size;
 965         real_virt_addr = virt_addr;
 966         real_phys_addr = phys_addr;
 967
 968         for (i = 0 ; i < npages ; i++) {
 969                 rc = _hl_mmu_map(ctx, real_virt_addr, real_phys_addr,
 970                                 real_page_size, is_dram_addr);
 971                 if (rc)
 972                         goto err;
 973
 974                 real_virt_addr += real_page_size;
 975                 real_phys_addr += real_page_size;
 976                 mapped_cnt++;
 977         }
 978
 979         return 0;
 980
 981 err:
 982         real_virt_addr = virt_addr;
 983         for (i = 0 ; i < mapped_cnt ; i++) {
 984                 if (_hl_mmu_unmap(ctx, real_virt_addr, is_dram_addr))
 985                         dev_warn_ratelimited(hdev->dev,
 986                                 "failed to unmap va: 0x%llx\n", real_virt_addr);
 987
 988                 real_virt_addr += real_page_size;
 989         }
 990
 991         return rc;
 992 }
 993
 994 /*
 995  * hl_mmu_swap_out - marks all mapping of the given ctx as swapped out
 996  *
 997  * @ctx: pointer to the context structure
 998  *
 999  */
1000 void hl_mmu_swap_out(struct hl_ctx *ctx)
1001 {
1002
1003 }
1004
1005 /*
1006  * hl_mmu_swap_in - marks all mapping of the given ctx as swapped in
1007  *
1008  * @ctx: pointer to the context structure
1009  *
1010  */
1011 void hl_mmu_swap_in(struct hl_ctx *ctx)
1012 {
1013
1014 }