arch/tile/include/asm/system.h - kernel/mindspeed - Git at Google

 /*
  * Copyright 2010 Tilera Corporation. All Rights Reserved.
  *
  *   This program is free software; you can redistribute it and/or
  *   modify it under the terms of the GNU General Public License
  *   as published by the Free Software Foundation, version 2.
  *
  *   This program is distributed in the hope that it will be useful, but
  *   WITHOUT ANY WARRANTY; without even the implied warranty of
  *   MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
  *   NON INFRINGEMENT.  See the GNU General Public License for
  *   more details.
  */

 #ifndef _ASM_TILE_SYSTEM_H
 #define _ASM_TILE_SYSTEM_H

 #ifndef __ASSEMBLY__

 #include <linux/types.h>
 #include <linux/irqflags.h>

 /* NOTE: we can't include <linux/ptrace.h> due to #include dependencies. */
 #include <asm/ptrace.h>

 #include <arch/chip.h>
 #include <arch/sim_def.h>
 #include <arch/spr_def.h>

 /*
  * read_barrier_depends - Flush all pending reads that subsequents reads
  * depend on.
  *
  * No data-dependent reads from memory-like regions are ever reordered
  * over this barrier.  All reads preceding this primitive are guaranteed
  * to access memory (but not necessarily other CPUs' caches) before any
  * reads following this primitive that depend on the data return by
  * any of the preceding reads.  This primitive is much lighter weight than
  * rmb() on most CPUs, and is never heavier weight than is
  * rmb().
  *
  * These ordering constraints are respected by both the local CPU
  * and the compiler.
  *
  * Ordering is not guaranteed by anything other than these primitives,
  * not even by data dependencies.  See the documentation for
  * memory_barrier() for examples and URLs to more information.
  *
  * For example, the following code would force ordering (the initial
  * value of "a" is zero, "b" is one, and "p" is "&a"):
  *
  * <programlisting>
  *	CPU 0				CPU 1
  *
  *	b = 2;
  *	memory_barrier();
  *	p = &b;				q = p;
  *					read_barrier_depends();
  *					d = *q;
  * </programlisting>
  *
  * because the read of "*q" depends on the read of "p" and these
  * two reads are separated by a read_barrier_depends().  However,
  * the following code, with the same initial values for "a" and "b":
  *
  * <programlisting>
  *	CPU 0				CPU 1
  *
  *	a = 2;
  *	memory_barrier();
  *	b = 3;				y = b;
  *					read_barrier_depends();
  *					x = a;
  * </programlisting>
  *
  * does not enforce ordering, since there is no data dependency between
  * the read of "a" and the read of "b".  Therefore, on some CPUs, such
  * as Alpha, "y" could be set to 3 and "x" to 0.  Use rmb()
  * in cases like this where there are no data dependencies.
  */

 #define read_barrier_depends()	do { } while (0)

 #define __sync()	__insn_mf()

 #if CHIP_HAS_SPLIT_CYCLE()
 #define get_cycles_low() __insn_mfspr(SPR_CYCLE_LOW)
 #else
 #define get_cycles_low() __insn_mfspr(SPR_CYCLE)   /* just get all 64 bits */
 #endif

 #if !CHIP_HAS_MF_WAITS_FOR_VICTIMS()
 #include <hv/syscall_public.h>
 /*
  * Issue an uncacheable load to each memory controller, then
  * wait until those loads have completed.
  */
 static inline void __mb_incoherent(void)
 {
 	long clobber_r10;
 	asm volatile("swint2"
 		     : "=R10" (clobber_r10)
 		     : "R10" (HV_SYS_fence_incoherent)
 		     : "r0", "r1", "r2", "r3", "r4",
 		       "r5", "r6", "r7", "r8", "r9",
 		       "r11", "r12", "r13", "r14",
 		       "r15", "r16", "r17", "r18", "r19",
 		       "r20", "r21", "r22", "r23", "r24",
 		       "r25", "r26", "r27", "r28", "r29");
 }
 #endif

 /* Fence to guarantee visibility of stores to incoherent memory. */
 static inline void
 mb_incoherent(void)
 {
 	__insn_mf();

 #if !CHIP_HAS_MF_WAITS_FOR_VICTIMS()
 	{
 #if CHIP_HAS_TILE_WRITE_PENDING()
 		const unsigned long WRITE_TIMEOUT_CYCLES = 400;
 		unsigned long start = get_cycles_low();
 		do {
 			if (__insn_mfspr(SPR_TILE_WRITE_PENDING) == 0)
 				return;
 		} while ((get_cycles_low() - start) < WRITE_TIMEOUT_CYCLES);
 #endif /* CHIP_HAS_TILE_WRITE_PENDING() */
 		(void) __mb_incoherent();
 	}
 #endif /* CHIP_HAS_MF_WAITS_FOR_VICTIMS() */
 }

 #define fast_wmb()	__sync()
 #define fast_rmb()	__sync()
 #define fast_mb()	__sync()
 #define fast_iob()	mb_incoherent()

 #define wmb()		fast_wmb()
 #define rmb()		fast_rmb()
 #define mb()		fast_mb()
 #define iob()		fast_iob()

 #ifdef CONFIG_SMP
 #define smp_mb()	mb()
 #define smp_rmb()	rmb()
 #define smp_wmb()	wmb()
 #define smp_read_barrier_depends()	read_barrier_depends()
 #else
 #define smp_mb()	barrier()
 #define smp_rmb()	barrier()
 #define smp_wmb()	barrier()
 #define smp_read_barrier_depends()	do { } while (0)
 #endif

 #define set_mb(var, value) \
 	do { var = value; mb(); } while (0)

 /*
  * Pause the DMA engine and static network before task switching.
  */
 #define prepare_arch_switch(next) _prepare_arch_switch(next)
 void _prepare_arch_switch(struct task_struct *next);


 /*
  * switch_to(n) should switch tasks to task nr n, first
  * checking that n isn't the current task, in which case it does nothing.
  * The number of callee-saved registers saved on the kernel stack
  * is defined here for use in copy_thread() and must agree with __switch_to().
  */
 #endif /* !__ASSEMBLY__ */
 #define CALLEE_SAVED_FIRST_REG 30
 #define CALLEE_SAVED_REGS_COUNT 24   /* r30 to r52, plus an empty to align */
 #ifndef __ASSEMBLY__
 struct task_struct;
 #define switch_to(prev, next, last) ((last) = _switch_to((prev), (next)))
 extern struct task_struct *_switch_to(struct task_struct *prev,
 				      struct task_struct *next);

 /* Helper function for _switch_to(). */
 extern struct task_struct *__switch_to(struct task_struct *prev,
 				       struct task_struct *next,
 				       unsigned long new_system_save_k_0);

 /* Address that switched-away from tasks are at. */
 extern unsigned long get_switch_to_pc(void);

 /*
  * On SMP systems, when the scheduler does migration-cost autodetection,
  * it needs a way to flush as much of the CPU's caches as possible:
  *
  * TODO: fill this in!
  */
 static inline void sched_cacheflush(void)
 {
 }

 #define arch_align_stack(x) (x)

 /*
  * Is the kernel doing fixups of unaligned accesses?  If <0, no kernel
  * intervention occurs and SIGBUS is delivered with no data address
  * info.  If 0, the kernel single-steps the instruction to discover
  * the data address to provide with the SIGBUS.  If 1, the kernel does
  * a fixup.
  */
 extern int unaligned_fixup;

 /* Is the kernel printing on each unaligned fixup? */
 extern int unaligned_printk;

 /* Number of unaligned fixups performed */
 extern unsigned int unaligned_fixup_count;

 /* Init-time routine to do tile-specific per-cpu setup. */
 void setup_cpu(int boot);

 /* User-level DMA management functions */
 void grant_dma_mpls(void);
 void restrict_dma_mpls(void);

 #ifdef CONFIG_HARDWALL
 /* User-level network management functions */
 void reset_network_state(void);
 void grant_network_mpls(void);
 void restrict_network_mpls(void);
 int hardwall_deactivate(struct task_struct *task);

 /* Hook hardwall code into changes in affinity. */
 #define arch_set_cpus_allowed(p, new_mask) do { \
 	if (p->thread.hardwall && !cpumask_equal(&p->cpus_allowed, new_mask)) \
 		hardwall_deactivate(p); \
 } while (0)
 #endif

 /*
  * Kernel threads can check to see if they need to migrate their
  * stack whenever they return from a context switch; for user
  * threads, we defer until they are returning to user-space.
  */
 #define finish_arch_switch(prev) do {                                     \
 	if (unlikely((prev)->state == TASK_DEAD))                         \
 		__insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_OS_EXIT |       \
 			((prev)->pid << _SIM_CONTROL_OPERATOR_BITS));     \
 	__insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_OS_SWITCH |             \
 		(current->pid << _SIM_CONTROL_OPERATOR_BITS));            \
 	if (current->mm == NULL && !kstack_hash &&                        \
 	    current_thread_info()->homecache_cpu != smp_processor_id())   \
 		homecache_migrate_kthread();                              \
 } while (0)

 /* Support function for forking a new task. */
 void ret_from_fork(void);

 /* Called from ret_from_fork() when a new process starts up. */
 struct task_struct *sim_notify_fork(struct task_struct *prev);

 #endif /* !__ASSEMBLY__ */

 #endif /* _ASM_TILE_SYSTEM_H */
	/*
	* Copyright 2010 Tilera Corporation. All Rights Reserved.
	*
	* This program is free software; you can redistribute it and/or
	* modify it under the terms of the GNU General Public License
	* as published by the Free Software Foundation, version 2.
	*
	* This program is distributed in the hope that it will be useful, but
	* WITHOUT ANY WARRANTY; without even the implied warranty of
	* MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
	* NON INFRINGEMENT. See the GNU General Public License for
	* more details.
	*/

	#ifndef _ASM_TILE_SYSTEM_H
	#define _ASM_TILE_SYSTEM_H

	#ifndef __ASSEMBLY__

	#include <linux/types.h>
	#include <linux/irqflags.h>

	/* NOTE: we can't include <linux/ptrace.h> due to #include dependencies. */
	#include <asm/ptrace.h>

	#include <arch/chip.h>
	#include <arch/sim_def.h>
	#include <arch/spr_def.h>

	/*
	* read_barrier_depends - Flush all pending reads that subsequents reads
	* depend on.
	*
	* No data-dependent reads from memory-like regions are ever reordered
	* over this barrier. All reads preceding this primitive are guaranteed
	* to access memory (but not necessarily other CPUs' caches) before any
	* reads following this primitive that depend on the data return by
	* any of the preceding reads. This primitive is much lighter weight than
	* rmb() on most CPUs, and is never heavier weight than is
	* rmb().
	*
	* These ordering constraints are respected by both the local CPU
	* and the compiler.
	*
	* Ordering is not guaranteed by anything other than these primitives,
	* not even by data dependencies. See the documentation for
	* memory_barrier() for examples and URLs to more information.
	*
	* For example, the following code would force ordering (the initial
	* value of "a" is zero, "b" is one, and "p" is "&a"):
	*
	* <programlisting>
	* CPU 0 CPU 1
	*
	* b = 2;
	* memory_barrier();
	* p = &b; q = p;
	* read_barrier_depends();
	* d = *q;
	* </programlisting>
	*
	* because the read of "*q" depends on the read of "p" and these
	* two reads are separated by a read_barrier_depends(). However,
	* the following code, with the same initial values for "a" and "b":
	*
	* <programlisting>
	* CPU 0 CPU 1
	*
	* a = 2;
	* memory_barrier();
	* b = 3; y = b;
	* read_barrier_depends();
	* x = a;
	* </programlisting>
	*
	* does not enforce ordering, since there is no data dependency between
	* the read of "a" and the read of "b". Therefore, on some CPUs, such
	* as Alpha, "y" could be set to 3 and "x" to 0. Use rmb()
	* in cases like this where there are no data dependencies.
	*/

	#define read_barrier_depends() do { } while (0)

	#define __sync() __insn_mf()

	#if CHIP_HAS_SPLIT_CYCLE()
	#define get_cycles_low() __insn_mfspr(SPR_CYCLE_LOW)
	#else
	#define get_cycles_low() __insn_mfspr(SPR_CYCLE) /* just get all 64 bits */
	#endif

	#if !CHIP_HAS_MF_WAITS_FOR_VICTIMS()
	#include <hv/syscall_public.h>
	/*
	* Issue an uncacheable load to each memory controller, then
	* wait until those loads have completed.
	*/
	static inline void __mb_incoherent(void)
	{
	long clobber_r10;
	asm volatile("swint2"
	: "=R10" (clobber_r10)
	: "R10" (HV_SYS_fence_incoherent)
	: "r0", "r1", "r2", "r3", "r4",
	"r5", "r6", "r7", "r8", "r9",
	"r11", "r12", "r13", "r14",
	"r15", "r16", "r17", "r18", "r19",
	"r20", "r21", "r22", "r23", "r24",
	"r25", "r26", "r27", "r28", "r29");
	}
	#endif

	/* Fence to guarantee visibility of stores to incoherent memory. */
	static inline void
	mb_incoherent(void)
	{
	__insn_mf();

	#if !CHIP_HAS_MF_WAITS_FOR_VICTIMS()
	{
	#if CHIP_HAS_TILE_WRITE_PENDING()
	const unsigned long WRITE_TIMEOUT_CYCLES = 400;
	unsigned long start = get_cycles_low();
	do {
	if (__insn_mfspr(SPR_TILE_WRITE_PENDING) == 0)
	return;
	} while ((get_cycles_low() - start) < WRITE_TIMEOUT_CYCLES);
	#endif /* CHIP_HAS_TILE_WRITE_PENDING() */
	(void) __mb_incoherent();
	}
	#endif /* CHIP_HAS_MF_WAITS_FOR_VICTIMS() */
	}

	#define fast_wmb() __sync()
	#define fast_rmb() __sync()
	#define fast_mb() __sync()
	#define fast_iob() mb_incoherent()

	#define wmb() fast_wmb()
	#define rmb() fast_rmb()
	#define mb() fast_mb()
	#define iob() fast_iob()

	#ifdef CONFIG_SMP
	#define smp_mb() mb()
	#define smp_rmb() rmb()
	#define smp_wmb() wmb()
	#define smp_read_barrier_depends() read_barrier_depends()
	#else
	#define smp_mb() barrier()
	#define smp_rmb() barrier()
	#define smp_wmb() barrier()
	#define smp_read_barrier_depends() do { } while (0)
	#endif

	#define set_mb(var, value) \
	do { var = value; mb(); } while (0)

	/*
	* Pause the DMA engine and static network before task switching.
	*/
	#define prepare_arch_switch(next) _prepare_arch_switch(next)
	void _prepare_arch_switch(struct task_struct *next);


	/*
	* switch_to(n) should switch tasks to task nr n, first
	* checking that n isn't the current task, in which case it does nothing.
	* The number of callee-saved registers saved on the kernel stack
	* is defined here for use in copy_thread() and must agree with __switch_to().
	*/
	#endif /* !__ASSEMBLY__ */
	#define CALLEE_SAVED_FIRST_REG 30
	#define CALLEE_SAVED_REGS_COUNT 24 /* r30 to r52, plus an empty to align */
	#ifndef __ASSEMBLY__
	struct task_struct;
	#define switch_to(prev, next, last) ((last) = _switch_to((prev), (next)))
	extern struct task_struct _switch_to(struct task_struct prev,
	struct task_struct *next);

	/* Helper function for _switch_to(). */
	extern struct task_struct __switch_to(struct task_struct prev,
	struct task_struct *next,
	unsigned long new_system_save_k_0);

	/* Address that switched-away from tasks are at. */
	extern unsigned long get_switch_to_pc(void);

	/*
	* On SMP systems, when the scheduler does migration-cost autodetection,
	* it needs a way to flush as much of the CPU's caches as possible:
	*
	* TODO: fill this in!
	*/
	static inline void sched_cacheflush(void)
	{
	}

	#define arch_align_stack(x) (x)

	/*
	* Is the kernel doing fixups of unaligned accesses? If <0, no kernel
	* intervention occurs and SIGBUS is delivered with no data address
	* info. If 0, the kernel single-steps the instruction to discover
	* the data address to provide with the SIGBUS. If 1, the kernel does
	* a fixup.
	*/
	extern int unaligned_fixup;

	/* Is the kernel printing on each unaligned fixup? */
	extern int unaligned_printk;

	/* Number of unaligned fixups performed */
	extern unsigned int unaligned_fixup_count;

	/* Init-time routine to do tile-specific per-cpu setup. */
	void setup_cpu(int boot);

	/* User-level DMA management functions */
	void grant_dma_mpls(void);
	void restrict_dma_mpls(void);

	#ifdef CONFIG_HARDWALL
	/* User-level network management functions */
	void reset_network_state(void);
	void grant_network_mpls(void);
	void restrict_network_mpls(void);
	int hardwall_deactivate(struct task_struct *task);

	/* Hook hardwall code into changes in affinity. */
	#define arch_set_cpus_allowed(p, new_mask) do { \
	if (p->thread.hardwall && !cpumask_equal(&p->cpus_allowed, new_mask)) \
	hardwall_deactivate(p); \
	} while (0)
	#endif

	/*
	* Kernel threads can check to see if they need to migrate their
	* stack whenever they return from a context switch; for user
	* threads, we defer until they are returning to user-space.
	*/
	#define finish_arch_switch(prev) do { \
	if (unlikely((prev)->state == TASK_DEAD)) \
	__insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_OS_EXIT \| \
	((prev)->pid << _SIM_CONTROL_OPERATOR_BITS)); \
	__insn_mtspr(SPR_SIM_CONTROL, SIM_CONTROL_OS_SWITCH \| \
	(current->pid << _SIM_CONTROL_OPERATOR_BITS)); \
	if (current->mm == NULL && !kstack_hash && \
	current_thread_info()->homecache_cpu != smp_processor_id()) \
	homecache_migrate_kthread(); \
	} while (0)

	/* Support function for forking a new task. */
	void ret_from_fork(void);

	/* Called from ret_from_fork() when a new process starts up. */
	struct task_struct sim_notify_fork(struct task_struct prev);

	#endif /* !__ASSEMBLY__ */

	#endif /* _ASM_TILE_SYSTEM_H */