@@ -1818,3 +1818,172 @@ void uvm_service_block_context_exit(void)
18181818 }
18191819 INIT_LIST_HEAD (& g_cpu_service_block_context_list );
18201820}
1821+
1822+ // Get a fault service context from the global list or allocate a new one if
1823+ // there are no available entries.
1824+ static uvm_service_block_context_t * service_block_context_cpu_alloc (void )
1825+ {
1826+ uvm_service_block_context_t * service_context ;
1827+
1828+ uvm_spin_lock (& g_cpu_service_block_context_list_lock );
1829+
1830+ service_context = list_first_entry_or_null (& g_cpu_service_block_context_list , uvm_service_block_context_t ,
1831+ cpu_fault .service_context_list );
1832+
1833+ if (service_context )
1834+ list_del (& service_context -> cpu_fault .service_context_list );
1835+
1836+ uvm_spin_unlock (& g_cpu_service_block_context_list_lock );
1837+
1838+ if (!service_context )
1839+ service_context = uvm_kvmalloc (sizeof (* service_context ));
1840+
1841+ return service_context ;
1842+ }
1843+
1844+ // Put a fault service context in the global list.
1845+ static void service_block_context_cpu_free (uvm_service_block_context_t * service_context )
1846+ {
1847+ uvm_spin_lock (& g_cpu_service_block_context_list_lock );
1848+
1849+ list_add (& service_context -> cpu_fault .service_context_list , & g_cpu_service_block_context_list );
1850+
1851+ uvm_spin_unlock (& g_cpu_service_block_context_list_lock );
1852+ }
1853+
1854+ static status uvm_va_space_cpu_fault (uvm_va_space_t * va_space , context ctx , u64 fault_addr ,
1855+ bool is_hmm )
1856+ {
1857+ uvm_va_block_t * va_block ;
1858+ bool is_write = is_write_fault (ctx -> frame );
1859+ NV_STATUS status = uvm_global_get_status ();
1860+ bool tools_enabled ;
1861+ uvm_service_block_context_t * service_context ;
1862+ uvm_global_processor_mask_t gpus_to_check_for_ecc ;
1863+
1864+ if (status != NV_OK )
1865+ goto convert_error ;
1866+
1867+ service_context = service_block_context_cpu_alloc ();
1868+ if (!service_context ) {
1869+ status = NV_ERR_NO_MEMORY ;
1870+ goto convert_error ;
1871+ }
1872+
1873+ service_context -> cpu_fault .wakeup_time_stamp = 0 ;
1874+ service_context -> cpu_fault .ctx = ctx ;
1875+
1876+ do {
1877+ bool do_sleep = false;
1878+
1879+ if (status == NV_WARN_MORE_PROCESSING_REQUIRED ) {
1880+ NvU64 now = NV_GETTIME ();
1881+ if (now < service_context -> cpu_fault .wakeup_time_stamp )
1882+ do_sleep = true;
1883+
1884+ if (do_sleep )
1885+ uvm_tools_record_throttling_start (va_space , fault_addr , UVM_ID_CPU );
1886+
1887+ // Drop the VA space lock while we sleep
1888+ uvm_va_space_up_read (va_space );
1889+
1890+ // usleep_range is preferred because msleep has a 20ms granularity
1891+ // and udelay uses a busy-wait loop. usleep_range uses
1892+ // high-resolution timers and, by adding a range, the Linux
1893+ // scheduler may coalesce our wakeup with others, thus saving some
1894+ // interrupts.
1895+ if (do_sleep ) {
1896+ unsigned long nap_us = (service_context -> cpu_fault .wakeup_time_stamp - now ) / 1000 ;
1897+
1898+ kernel_delay (microseconds (nap_us ));
1899+ }
1900+ }
1901+
1902+ uvm_va_space_down_read (va_space );
1903+
1904+ if (do_sleep )
1905+ uvm_tools_record_throttling_end (va_space , fault_addr , UVM_ID_CPU );
1906+
1907+ if (is_hmm ) {
1908+ // Note that normally we should find a va_block for the faulting
1909+ // address because the block had to be created when migrating a
1910+ // page to the GPU and a device private PTE inserted into the CPU
1911+ // page tables in order for migrate_to_ram() to be called. Not
1912+ // finding it means the PTE was remapped to a different virtual
1913+ // address with mremap() so create a new va_block if needed.
1914+ status = uvm_hmm_va_block_find_create (va_space ,
1915+ fault_addr ,
1916+ & service_context -> block_context .hmm .vma ,
1917+ & va_block );
1918+ if (status != NV_OK )
1919+ break ;
1920+
1921+ status = uvm_hmm_migrate_begin (va_block );
1922+ if (status != NV_OK )
1923+ break ;
1924+ }
1925+ else {
1926+ status = uvm_va_block_find_create_managed (va_space , fault_addr , & va_block );
1927+ if (status != NV_OK ) {
1928+ UVM_ASSERT_MSG (status == NV_ERR_NO_MEMORY , "status: %s\n" , nvstatusToString (status ));
1929+ break ;
1930+ }
1931+ }
1932+
1933+ // Loop until thrashing goes away.
1934+ status = uvm_va_block_cpu_fault (va_block , fault_addr , is_write , service_context );
1935+
1936+ if (is_hmm )
1937+ uvm_hmm_migrate_finish (va_block );
1938+ } while (status == NV_WARN_MORE_PROCESSING_REQUIRED );
1939+
1940+ if (status != NV_OK && !(is_hmm && status == NV_ERR_BUSY_RETRY )) {
1941+ UvmEventFatalReason reason ;
1942+
1943+ reason = uvm_tools_status_to_fatal_fault_reason (status );
1944+ UVM_ASSERT (reason != UvmEventFatalReasonInvalid );
1945+
1946+ uvm_tools_record_cpu_fatal_fault (va_space , fault_addr , is_write , reason );
1947+ }
1948+
1949+ tools_enabled = va_space -> tools .enabled ;
1950+
1951+ if (status == NV_OK ) {
1952+ uvm_va_space_global_gpus_in_mask (va_space ,
1953+ & gpus_to_check_for_ecc ,
1954+ & service_context -> cpu_fault .gpus_to_check_for_ecc );
1955+ uvm_global_mask_retain (& gpus_to_check_for_ecc );
1956+ }
1957+
1958+ uvm_va_space_up_read (va_space );
1959+
1960+ if (status == NV_OK ) {
1961+ status = uvm_global_mask_check_ecc_error (& gpus_to_check_for_ecc );
1962+ uvm_global_mask_release (& gpus_to_check_for_ecc );
1963+ }
1964+
1965+ if (tools_enabled )
1966+ uvm_tools_flush_events ();
1967+
1968+ // Major faults involve I/O in order to resolve the fault.
1969+ // If any pages were DMA'ed between the GPU and host memory, that makes it
1970+ // a major fault. A process can also get statistics for major and minor
1971+ // faults by calling readproc().
1972+ service_block_context_cpu_free (service_context );
1973+
1974+ convert_error :
1975+ switch (status ) {
1976+ case NV_OK :
1977+ case NV_ERR_BUSY_RETRY :
1978+ return STATUS_OK ;
1979+ case NV_ERR_NO_MEMORY :
1980+ return timm_oom ;
1981+ default :
1982+ return timm ("result" , "sigbus" );
1983+ }
1984+ }
1985+
1986+ status uvm_va_space_cpu_fault_managed (uvm_va_space_t * va_space , context ctx , u64 vaddr )
1987+ {
1988+ return uvm_va_space_cpu_fault (va_space , ctx , vaddr , false);
1989+ }
0 commit comments