| // SPDX-License-Identifier: GPL-2.0+ |
| /* |
| * TCE helpers for IODA PCI/PCIe on PowerNV platforms |
| * |
| * Copyright 2018 IBM Corp. |
| * |
| * This program is free software; you can redistribute it and/or |
| * modify it under the terms of the GNU General Public License |
| * as published by the Free Software Foundation; either version |
| * 2 of the License, or (at your option) any later version. |
| */ |
| |
| #include <linux/kernel.h> |
| #include <linux/iommu.h> |
| |
| #include <asm/iommu.h> |
| #include <asm/tce.h> |
| #include "pci.h" |
| |
| unsigned long pnv_ioda_parse_tce_sizes(struct pnv_phb *phb) |
| { |
| struct pci_controller *hose = phb->hose; |
| struct device_node *dn = hose->dn; |
| unsigned long mask = 0; |
| int i, rc, count; |
| u32 val; |
| |
| count = of_property_count_u32_elems(dn, "ibm,supported-tce-sizes"); |
| if (count <= 0) { |
| mask = SZ_4K | SZ_64K; |
| /* Add 16M for POWER8 by default */ |
| if (cpu_has_feature(CPU_FTR_ARCH_207S) && |
| !cpu_has_feature(CPU_FTR_ARCH_300)) |
| mask |= SZ_16M | SZ_256M; |
| return mask; |
| } |
| |
| for (i = 0; i < count; i++) { |
| rc = of_property_read_u32_index(dn, "ibm,supported-tce-sizes", |
| i, &val); |
| if (rc == 0) |
| mask |= 1ULL << val; |
| } |
| |
| return mask; |
| } |
| |
| void pnv_pci_setup_iommu_table(struct iommu_table *tbl, |
| void *tce_mem, u64 tce_size, |
| u64 dma_offset, unsigned int page_shift) |
| { |
| tbl->it_blocksize = 16; |
| tbl->it_base = (unsigned long)tce_mem; |
| tbl->it_page_shift = page_shift; |
| tbl->it_offset = dma_offset >> tbl->it_page_shift; |
| tbl->it_index = 0; |
| tbl->it_size = tce_size >> 3; |
| tbl->it_busno = 0; |
| tbl->it_type = TCE_PCI; |
| } |
| |
| static __be64 *pnv_alloc_tce_level(int nid, unsigned int shift) |
| { |
| struct page *tce_mem = NULL; |
| __be64 *addr; |
| |
| tce_mem = alloc_pages_node(nid, GFP_ATOMIC | __GFP_NOWARN, |
| shift - PAGE_SHIFT); |
| if (!tce_mem) { |
| pr_err("Failed to allocate a TCE memory, level shift=%d\n", |
| shift); |
| return NULL; |
| } |
| addr = page_address(tce_mem); |
| memset(addr, 0, 1UL << shift); |
| |
| return addr; |
| } |
| |
| static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr, |
| unsigned long size, unsigned int levels); |
| |
| static __be64 *pnv_tce(struct iommu_table *tbl, bool user, long idx, bool alloc) |
| { |
| __be64 *tmp = user ? tbl->it_userspace : (__be64 *) tbl->it_base; |
| int level = tbl->it_indirect_levels; |
| const long shift = ilog2(tbl->it_level_size); |
| unsigned long mask = (tbl->it_level_size - 1) << (level * shift); |
| |
| while (level) { |
| int n = (idx & mask) >> (level * shift); |
| unsigned long oldtce, tce = be64_to_cpu(READ_ONCE(tmp[n])); |
| |
| if (!tce) { |
| __be64 *tmp2; |
| |
| if (!alloc) |
| return NULL; |
| |
| tmp2 = pnv_alloc_tce_level(tbl->it_nid, |
| ilog2(tbl->it_level_size) + 3); |
| if (!tmp2) |
| return NULL; |
| |
| tce = __pa(tmp2) | TCE_PCI_READ | TCE_PCI_WRITE; |
| oldtce = be64_to_cpu(cmpxchg(&tmp[n], 0, |
| cpu_to_be64(tce))); |
| if (oldtce) { |
| pnv_pci_ioda2_table_do_free_pages(tmp2, |
| ilog2(tbl->it_level_size) + 3, 1); |
| tce = oldtce; |
| } |
| } |
| |
| tmp = __va(tce & ~(TCE_PCI_READ | TCE_PCI_WRITE)); |
| idx &= ~mask; |
| mask >>= shift; |
| --level; |
| } |
| |
| return tmp + idx; |
| } |
| |
| int pnv_tce_build(struct iommu_table *tbl, long index, long npages, |
| unsigned long uaddr, enum dma_data_direction direction, |
| unsigned long attrs) |
| { |
| u64 proto_tce = iommu_direction_to_tce_perm(direction); |
| u64 rpn = __pa(uaddr) >> tbl->it_page_shift; |
| long i; |
| |
| if (proto_tce & TCE_PCI_WRITE) |
| proto_tce |= TCE_PCI_READ; |
| |
| for (i = 0; i < npages; i++) { |
| unsigned long newtce = proto_tce | |
| ((rpn + i) << tbl->it_page_shift); |
| unsigned long idx = index - tbl->it_offset + i; |
| |
| *(pnv_tce(tbl, false, idx, true)) = cpu_to_be64(newtce); |
| } |
| |
| return 0; |
| } |
| |
| #ifdef CONFIG_IOMMU_API |
| int pnv_tce_xchg(struct iommu_table *tbl, long index, |
| unsigned long *hpa, enum dma_data_direction *direction) |
| { |
| u64 proto_tce = iommu_direction_to_tce_perm(*direction); |
| unsigned long newtce = *hpa | proto_tce, oldtce; |
| unsigned long idx = index - tbl->it_offset; |
| __be64 *ptce = NULL; |
| |
| BUG_ON(*hpa & ~IOMMU_PAGE_MASK(tbl)); |
| |
| if (*direction == DMA_NONE) { |
| ptce = pnv_tce(tbl, false, idx, false); |
| if (!ptce) { |
| *hpa = 0; |
| return 0; |
| } |
| } |
| |
| if (!ptce) { |
| ptce = pnv_tce(tbl, false, idx, true); |
| if (!ptce) |
| return -ENOMEM; |
| } |
| |
| if (newtce & TCE_PCI_WRITE) |
| newtce |= TCE_PCI_READ; |
| |
| oldtce = be64_to_cpu(xchg(ptce, cpu_to_be64(newtce))); |
| *hpa = oldtce & ~(TCE_PCI_READ | TCE_PCI_WRITE); |
| *direction = iommu_tce_direction(oldtce); |
| |
| return 0; |
| } |
| |
| __be64 *pnv_tce_useraddrptr(struct iommu_table *tbl, long index, bool alloc) |
| { |
| if (WARN_ON_ONCE(!tbl->it_userspace)) |
| return NULL; |
| |
| return pnv_tce(tbl, true, index - tbl->it_offset, alloc); |
| } |
| #endif |
| |
| void pnv_tce_free(struct iommu_table *tbl, long index, long npages) |
| { |
| long i; |
| |
| for (i = 0; i < npages; i++) { |
| unsigned long idx = index - tbl->it_offset + i; |
| __be64 *ptce = pnv_tce(tbl, false, idx, false); |
| |
| if (ptce) |
| *ptce = cpu_to_be64(0); |
| else |
| /* Skip the rest of the level */ |
| i |= tbl->it_level_size - 1; |
| } |
| } |
| |
| unsigned long pnv_tce_get(struct iommu_table *tbl, long index) |
| { |
| __be64 *ptce = pnv_tce(tbl, false, index - tbl->it_offset, false); |
| |
| if (!ptce) |
| return 0; |
| |
| return be64_to_cpu(*ptce); |
| } |
| |
| static void pnv_pci_ioda2_table_do_free_pages(__be64 *addr, |
| unsigned long size, unsigned int levels) |
| { |
| const unsigned long addr_ul = (unsigned long) addr & |
| ~(TCE_PCI_READ | TCE_PCI_WRITE); |
| |
| if (levels) { |
| long i; |
| u64 *tmp = (u64 *) addr_ul; |
| |
| for (i = 0; i < size; ++i) { |
| unsigned long hpa = be64_to_cpu(tmp[i]); |
| |
| if (!(hpa & (TCE_PCI_READ | TCE_PCI_WRITE))) |
| continue; |
| |
| pnv_pci_ioda2_table_do_free_pages(__va(hpa), size, |
| levels - 1); |
| } |
| } |
| |
| free_pages(addr_ul, get_order(size << 3)); |
| } |
| |
| void pnv_pci_ioda2_table_free_pages(struct iommu_table *tbl) |
| { |
| const unsigned long size = tbl->it_indirect_levels ? |
| tbl->it_level_size : tbl->it_size; |
| |
| if (!tbl->it_size) |
| return; |
| |
| pnv_pci_ioda2_table_do_free_pages((__be64 *)tbl->it_base, size, |
| tbl->it_indirect_levels); |
| if (tbl->it_userspace) { |
| pnv_pci_ioda2_table_do_free_pages(tbl->it_userspace, size, |
| tbl->it_indirect_levels); |
| } |
| } |
| |
| static __be64 *pnv_pci_ioda2_table_do_alloc_pages(int nid, unsigned int shift, |
| unsigned int levels, unsigned long limit, |
| unsigned long *current_offset, unsigned long *total_allocated) |
| { |
| __be64 *addr, *tmp; |
| unsigned long allocated = 1UL << shift; |
| unsigned int entries = 1UL << (shift - 3); |
| long i; |
| |
| addr = pnv_alloc_tce_level(nid, shift); |
| *total_allocated += allocated; |
| |
| --levels; |
| if (!levels) { |
| *current_offset += allocated; |
| return addr; |
| } |
| |
| for (i = 0; i < entries; ++i) { |
| tmp = pnv_pci_ioda2_table_do_alloc_pages(nid, shift, |
| levels, limit, current_offset, total_allocated); |
| if (!tmp) |
| break; |
| |
| addr[i] = cpu_to_be64(__pa(tmp) | |
| TCE_PCI_READ | TCE_PCI_WRITE); |
| |
| if (*current_offset >= limit) |
| break; |
| } |
| |
| return addr; |
| } |
| |
| long pnv_pci_ioda2_table_alloc_pages(int nid, __u64 bus_offset, |
| __u32 page_shift, __u64 window_size, __u32 levels, |
| bool alloc_userspace_copy, struct iommu_table *tbl) |
| { |
| void *addr, *uas = NULL; |
| unsigned long offset = 0, level_shift, total_allocated = 0; |
| unsigned long total_allocated_uas = 0; |
| const unsigned int window_shift = ilog2(window_size); |
| unsigned int entries_shift = window_shift - page_shift; |
| unsigned int table_shift = max_t(unsigned int, entries_shift + 3, |
| PAGE_SHIFT); |
| const unsigned long tce_table_size = 1UL << table_shift; |
| |
| if (!levels || (levels > POWERNV_IOMMU_MAX_LEVELS)) |
| return -EINVAL; |
| |
| if (!is_power_of_2(window_size)) |
| return -EINVAL; |
| |
| /* Adjust direct table size from window_size and levels */ |
| entries_shift = (entries_shift + levels - 1) / levels; |
| level_shift = entries_shift + 3; |
| level_shift = max_t(unsigned int, level_shift, PAGE_SHIFT); |
| |
| if ((level_shift - 3) * levels + page_shift >= 55) |
| return -EINVAL; |
| |
| /* Allocate TCE table */ |
| addr = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift, |
| 1, tce_table_size, &offset, &total_allocated); |
| |
| /* addr==NULL means that the first level allocation failed */ |
| if (!addr) |
| return -ENOMEM; |
| |
| /* |
| * First level was allocated but some lower level failed as |
| * we did not allocate as much as we wanted, |
| * release partially allocated table. |
| */ |
| if (levels == 1 && offset < tce_table_size) |
| goto free_tces_exit; |
| |
| /* Allocate userspace view of the TCE table */ |
| if (alloc_userspace_copy) { |
| offset = 0; |
| uas = pnv_pci_ioda2_table_do_alloc_pages(nid, level_shift, |
| 1, tce_table_size, &offset, |
| &total_allocated_uas); |
| if (!uas) |
| goto free_tces_exit; |
| if (levels == 1 && (offset < tce_table_size || |
| total_allocated_uas != total_allocated)) |
| goto free_uas_exit; |
| } |
| |
| /* Setup linux iommu table */ |
| pnv_pci_setup_iommu_table(tbl, addr, tce_table_size, bus_offset, |
| page_shift); |
| tbl->it_level_size = 1ULL << (level_shift - 3); |
| tbl->it_indirect_levels = levels - 1; |
| tbl->it_userspace = uas; |
| tbl->it_nid = nid; |
| |
| pr_debug("Created TCE table: ws=%08llx ts=%lx @%08llx base=%lx uas=%p levels=%d/%d\n", |
| window_size, tce_table_size, bus_offset, tbl->it_base, |
| tbl->it_userspace, 1, levels); |
| |
| return 0; |
| |
| free_uas_exit: |
| pnv_pci_ioda2_table_do_free_pages(uas, |
| 1ULL << (level_shift - 3), levels - 1); |
| free_tces_exit: |
| pnv_pci_ioda2_table_do_free_pages(addr, |
| 1ULL << (level_shift - 3), levels - 1); |
| |
| return -ENOMEM; |
| } |
| |
| void pnv_pci_unlink_table_and_group(struct iommu_table *tbl, |
| struct iommu_table_group *table_group) |
| { |
| long i; |
| bool found; |
| struct iommu_table_group_link *tgl; |
| |
| if (!tbl || !table_group) |
| return; |
| |
| /* Remove link to a group from table's list of attached groups */ |
| found = false; |
| |
| rcu_read_lock(); |
| list_for_each_entry_rcu(tgl, &tbl->it_group_list, next) { |
| if (tgl->table_group == table_group) { |
| list_del_rcu(&tgl->next); |
| kfree_rcu(tgl, rcu); |
| found = true; |
| break; |
| } |
| } |
| rcu_read_unlock(); |
| |
| if (WARN_ON(!found)) |
| return; |
| |
| /* Clean a pointer to iommu_table in iommu_table_group::tables[] */ |
| found = false; |
| for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { |
| if (table_group->tables[i] == tbl) { |
| iommu_tce_table_put(tbl); |
| table_group->tables[i] = NULL; |
| found = true; |
| break; |
| } |
| } |
| WARN_ON(!found); |
| } |
| |
| long pnv_pci_link_table_and_group(int node, int num, |
| struct iommu_table *tbl, |
| struct iommu_table_group *table_group) |
| { |
| struct iommu_table_group_link *tgl = NULL; |
| |
| if (WARN_ON(!tbl || !table_group)) |
| return -EINVAL; |
| |
| tgl = kzalloc_node(sizeof(struct iommu_table_group_link), GFP_KERNEL, |
| node); |
| if (!tgl) |
| return -ENOMEM; |
| |
| tgl->table_group = table_group; |
| list_add_rcu(&tgl->next, &tbl->it_group_list); |
| |
| table_group->tables[num] = iommu_tce_table_get(tbl); |
| |
| return 0; |
| } |