% BibTeX bibliography file @InProceedings{acharya97, author = {Acharya, Anurag and Edjlali, Guy and Saltz, Joel}, title = {{The Utility of Exploiting Idle Workstations for Parallel Computation}}, booktitle = {Proceedings of 1997 ACM Sigmetrics International Conference on Measurement and Modeling of Computer Systems}, year = {1997}, month = {June}, address = {Seattle}, keyword = {network of workstations, cluster, scheduling, idle, parallel} } @InProceedings{adve93, author = {Adve, Vikram~S. and Vernon, Mary~K.}, title = {{The Influence of Random Delays on Parallel Execution Times}}, booktitle = {Proceedings of the 1993 ACM SIGMETRICS Conference on Measurement and Modeling of Computer Systems}, year = {1993}, month = {May}, pages = {61-73}, keyword = {scheduling, parallel}, abstract = {Stochastic models are widely used for the performance evaluation of parallel programs and systems. The stochastic assumptions in such models are intended to represent non-deterministic processing requirements as well as random delays due to inter-process communication and resource contention. The authors provide compelling analytical and experimental evidence that in current and foreseeable shared-memory programs, communication delays introduce negligible variance into the execution time between synchronization points. Furthermore, they show using direct measurements of variance that other sources of randomness, particularly non-deterministic computational requirements, also do not introduce significant variance in many programs. They then use two examples to demonstrate the implications of these results for parallel program performance prediction models, as well as for general stochastic models of parallel systems.} } @InProceedings{agrawal85, author = {Agrawal, R. and Ezzat, A.K.}, title = {{Processor Sharing in Nest: A Network of Computer Workstations}}, booktitle = {Proceedings of 1st International Conference on Computer Workstations}, year = {1985}, month = {November}, keyword = {network of workstations, cluster, scheduling}, abstract = {NEST is a research project aimed at creating a computing environment that consists of a network of highly autonomous, yet cooperating personal computer workstations and shared servers. An important aspect of this project is to provide processor sharing by creating a pool of compute servers in the network that may be used by the workstations to supplement their computing needs. Some processors are permanently designated to be the computer servers. In addition, through an advertisement mechanism, any workstation may make itself temporarily available for a specific duration of time to be used as a compute server. The design and implementation of the scheme for augmenting the UNIX system with the execution-location-independent remote execution capability is presented. This capability allows processes to be offloaded to the compute servers and preserves the execution environment of these processes as if they were still executing locally at the originating machine.} } @InProceedings{anderson92, author = {Thomas Anderson and Brian Bershad and Edward Lazowska and Henry Levy}, title = {{Scheduler Activations: Effective Kernel Support for the User-Level Management of Parallelism}}, booktitle = {ACM Transactions on Computer Systems}, year = {1992}, month = {February}, pages = {53-79}, keyword = {operating system, scheduling}, abstract = {Threads are the vehicle for concurrency in many approaches to parallel programming. Threads can be supported either by the operating system kernel or by user-level library code in the application address space, but neither approach has been fully satisfactory. The paper addresses this dilemma. First, the authors argue that the performance of kernel threads is inherently worse than that of user-level threads, rather than this being an artifact of existing implementations; managing parallelism at the user level is essential to high-performance parallel computing. Next, they argue that the problems encountered in integrating user-level threads with other system services is a consequence of the lack of kernel support for user-level threads provided by contemporary multiprocessor operating systems; kernel threads are the wrong abstraction on which to support user-level management of parallelism. Finally, they describe the design, implementation, and performance of a new kernel interface and user-level thread package that together provide the same functionality as kernel threads without compromising the performance and flexibility advantages of user-level management of parallelism.} } @InProceedings{arpaci95:interact, author = {Arpaci, Remzi H. and Dusseau, Andrea C. and Vahdat, Amin M. and Liu, Lok T. and Anderson, Thomas E. and Patterson, David A.}, title = {{The Interaction of Parallel and Sequential Workloads on a Network of Workstations}}, booktitle = {Proceedings of ACM SIGMETRICS'95/PERFORMANCE'95 Joint International Conference on Measurement and Modeling of Computer Systems}, year = {1995}, month = {May}, pages = {267-278}, keyword = {network of workstations, cluster, parallel, scheduling}, abstract = {This paper examines the plausibility of using a network of workstations (NOW) for a mixture of parallel and sequential jobs. Through simulations, our study examines issues that arise when combining these two workloads on a single platform. Starting from a dedicated NOW just for parallel programs, we incrementally relax uniprogramming restrictions until we have a multi-programmed, multi-user NOW for both interactive sequential users and parallel programs. We show that a number of issues associated with the distributed NOW environment (e.g., daemon activity, coscheduling skew) can have a small but noticeable effect on parallel program performance. We also find that efficient migration to idle workstations is necessary to maintain acceptable parallel application performance. Furthermore, we present a methodology for deriving an optimal delay time for recruiting idle machines for use by parallel programs; this recruitment threshold was just 3 minutes for the research cluster we measured. Finally, we quantify the effects of the additional parallel load upon interactive users by keeping track of the potential number of user delays in our simulations. When we limit the maximum number of delays per user, we can still maintain acceptable parallel program performance. In summary, we find that for our workloads a 2:1 rule applies: a NOW cluster of approximately 60 machines can sustain a 32-node parallel workload in addition to the sequential load placed upon it by interactive users.} } @InProceedings{ashok92, author = {Immaneni Ashok and John Zahorjan}, title = {{Scheduling a Mixed Interactive and Batch Workload on a Parallel, Shared-Memory Supercomputer}}, booktitle = {Proceedings of Supercomputing '92}, year = {1992}, month = {November}, pages = {616--625}, keyword = {scheduling, parallel}, abstract = {The authors analyze three approaches to scheduling mixed batch and interactive workloads on a supercomputer: (i) fixed partition, in which memory resources are statically allocated between the workloads: (ii) no partition, in which the interactive workload preempts resources as needed from the batch workload, and (iii) no partition with grouped admission, in which the interactive workload preempts resources only when the number of waiting interactive jobs reaches a threshold value. The authors also investigate the potential benefits of using virtual memory to perform the automatic overlay of jobs too large to fit in the amount of real memory instantaneously available to them. Using analytic tools, they compare the different policies according to the average speedup achieved by the batch workload given that a mean interactive job response time objective must be met by each. They show that, under a wide variety of conditions, fixed partition performs better than the other policies.} } @Article{atallah92, author = {Atallah, Mikhail J. and Black, Christina Lock and Marinescu, Dan C. and Siegel, Howard Jay and Casavant, Thomas L.}, title = {{Models and Algorithms for Co-scheduling Compute-Intensive Tasks on a Network of Workstations}}, journal = {Journal of Parallel and Distrbuted Computing}, year = {1992}, volume = {16}, pages = {319-327}, keyword = {scheduling, parallel, network of workstations, cluster}, abstract = {The problem of using the idle cycles of a number of high performance workstations, interconnected by a high speed network, for solving computationally intensive tasks is discussed. The classes of distributed applications examined require some form of synchronization among the subtasks, hence the need for coscheduling to guarantee that subtasks start at the same time and execute at the same pace on a group of workstations. A model of the system is presented that allows the definition of an objective function to be maximized. Then a quadratic time and linear space algorithm is derived for computing the optimal coschedule, for the given model and class of applications addressed.} } @Article{bernstein71, author = {Bernstein, A.J. and Sharp, J.C.}, title = {{A Policy-Driven Scheduler for a Time Sharing System}}, journal = {Communications of the ACM}, year = {1971}, month = {February}, volume = {14}, number = {2}, pages = {74-78}, keyword = {scheduling, time share, fair}, abstract = {The services received by a process from a time-sharing operating system can be characterized by a resource count Sigma w/sub i/R/sub ij/ where R/sub ij/ is the number of units of service received by process j from resource i and w/sub i/ is the cost per unit of the service. Each class of users can be characterized by a policy function which specifies the amount of service a user who belongs to this class should receive as a function of time. Priority changes dynamically as a function of the difference between the service promised to the user by the policy function and the service user actually receives. A scheduling and swapping algorithm which keeps the resource count of each process above its policy function will provide the specified level of service. Overhead can be reduced by avoiding swaps of processes which have received at least this level of service. The algorithm has been implemented in a general purpose operating system, and it has provided significantly better service to interactive and to batch jobs than the previous scheduler.} } @InProceedings{braams95, author = {Bramms, Jan}, title = {{Batch Class process scheduler for Unix SVR4}}, booktitle = {Proceedings of Sigmetrics '95/ Performance '95}, year = {1995}, month = {May}, pages = {301-302}, note = {Extended Abstract}, keyword = {scheduling, svr4}, abstract = {This paper introduces the BC (Batch Class) process scheduler. It runs batch processes in their own class, with lower priority than the interactive processes. It schedules batch processes, on the basis of the available amounts of resources that are left over from processes in other classes, rather than its own resource usage. The paper also describes an actual implementation of the BC class and an example of the effects of its use.} } @InProceedings{chandra94, author = {Rohit Chandra and Scott Devine and Ben Verghese and Anoop Gupta and Mendel Rosenblum}, title = {{Scheduling and Page Migration for Multiprocessor Computer Servers}}, booktitle = {Proceedings of the 6th International Conference on Architectural Support for Programming Languages and Operating Systems}, year = {1994}, month = {October}, pages = {12--24}, keyword = {parallel, scheduling, migration, shared memory}, abstract = {Several cache coherent shared memory multiprocessors have been developed that are scalable and offer a very tight coupling between the processing resources. They are therefore quite attractive for use as compute servers for multiprogramming and parallel application workloads. Process scheduling and memory management, however, remain challenging due to the distributed main memory found on such machines. The paper examines the effects of OS scheduling and page migration policies on the performance of such compute servers. Our experiments are done on the Stanford DASH, a distributed memory cache coherent multiprocessor. We show that for our multiprogramming workloads consisting of sequential jobs, the traditional Unix scheduling policy does very poorly. In contrast, a policy incorporating cluster and cache affinity along with a simple page migration algorithm offers up to twofold performance improvement. For our workloads consisting of multiple parallel applications, we compare space sharing policies that divide the processors among the applications to time slicing policies such as standard Unix or gang scheduling. We show that space sharing policies can achieve better processor utilization due to the operating point effect, but time slicing policies benefit strongly from user level data distribution. Our initial experience with automatic page migration suggests that policies based only on TLB miss information can be quite effective, and useful for addressing the data distribution problems of space sharing schedulers.} } @InProceedings{chiang94, author = {Su-Hui Chiang and Rajesh K. Mansharamani and Mary K. Vernon}, title = {{Use of Application Characteristics and Limited Preemption for Run-To-Completion Parallel Processor Scheduling Policies}}, booktitle = {Proceedings of the 1994 ACM SIGMETRICS Conference}, year = {1994}, month = {February}, pages = {33--44}, keyword = {parallel, scheduling}, abstract = {The performance potential of run-to-completion (RTC) parallel processor scheduling policies is investigated by examining whether (1) application execution rate characteristics such as average parallelism (avg) and processor working set (pws) and/or (2) limited preemption can be used to improve the performance of these policies. We address the first question by comparing policies (previous as well as new) that differ only in whether or not they use execution rate characteristics and by examining a wider range of the workload parameter space than previous studies. We address the second question by comparing a simple two-level queueing policy with RTC scheduling in the second level queue against RTC policies that don't allow any preemption and against dynamic equiallocation (EQ).} } @TechReport{crovella91, author = {Crovella, Mark and Das, Prakash and Dubnicki, Czarek and LeBlanc, Thomas and Markatos, Evangelos}, title = {{Multiprogramming on Multiprocessors}}, year = {1991}, month = {February}, number = {385}, institution = {University of Rochester, Computer Science Department}, keyword = {parallel, scheduling}, abstract = {Many solutions have been proposed to the problem of multiprogramming a multiprocessor. However, each has limited applicability or fails to address an important source of overhead. In addition, there has been little experimental comparison of the various solutions in the presence of applications with varying degrees of parallelism and synchronization. The authors explore the tradeoffs between three different approaches to multiprogramming a multiprocessor: time-slicing, coscheduling, and dynamic hardware partitions. They implemented applications that vary in the degree of parallelism, and the frequency and type of synchronization. They show that in most cases coscheduling is preferable to time-slicing. They also show that although there are cases where coscheduling is beneficial, dynamic hardware partitions do no worse, and will often do better. They conclude that under most circumstances, hardware partitioning is the best strategy for multiprogramming a multiprocessor, no matter how much parallelism applications employ or how frequently synchronization occurs.} } @TechReport{dusseau95, author = {Dusseau, Andrea C. and Arpaci, Remzi H. and Culler, David E.}, title = {{Re-examining Scheduling and Communication in Parallel Programs}}, year = {1994}, month = {December}, number = {UCB//CSD-95-881}, type = {Computer Science}, institution = {University of California, Berkley}, keyword = {scheduling, communication, parallel, network of workstations, cluster} } @InProceedings{dusseau96:implicit, author = {Dusseau, Andrea C. and Arpaci, Remzi H. and Culler, David E.}, title = {{Effective Distributed Scheduling of Parallel Workloads}}, booktitle = {Proceedings of 1996 ACM Sigmetrics International Conference on Measurement and Modeling of Computer Systems}, year = {1996}, keyword = {scheduling, parallel, network of workstations, cluster, coscheduling}, abstract = {We present a distributed algorithm for time-sharing parallel workloads that is competitive with coscheduling. Implicit scheduling allows each local scheduler in the system to make independent decisions that dynamically coordinate the scheduling of cooperating processes across processors. Of particular importance is the blocking algorithm which decides the action of a process waiting for a communication or synchronization event to complete. Through simulation of bulk-synchronous parallel applications, we find that a simple two-phase fixed-spin blocking algorithm performs well; a two-phase adaptive algorithm that gathers run-time data on barrier wait-times performs slightly better. Our results hold for a range of machine parameters and parallel program characteristics. These findings are in direct contrast to the literature that states explicit coscheduling is necessary for fine-grained programs. We show that the choice of the local scheduler is crucial, with a priority-based scheduler performing two to three times better than a round-robin scheduler. Overall, we find that the performance of implicit scheduling is near that of coscheduling (+/-35%), without the requirement of explicit, global coordination.} } @Article{eager86, author = {Derek L. Eager and Edward D. Lazowska and John Zahorjan}, title = {{Adaptive Load Sharing in Homogeneous Distributed Systems}}, journal = {IEEE Transactions on Software Engineering}, year = {1986}, month = {May}, volume = {12}, number = {5}, pages = {662--675}, keyword = {load balance, parallel, distributed, scheduling}, abstract = {Rather than proposing a specific load sharing policy for implementation, the authors address the more fundamental question of the appropriate level of complexity for load sharing policies. It is shown that extremely simple adaptive load sharing policies, which collect very small amounts of system state information and which use this information in very simple ways, yield dramatic performance improvements. These policies in fact yield performance close to that expected from more complex policies whose viability is questionable. It is concluded that simple policies offer the greatest promise in practice, because of their combination of nearly optimal performance and inherent stability.} } @Article{eager89, author = {Derek L. Eager and John Zahorjan and Edward D. Lazowska}, title = {{Speedup versus Efficiency in Parallel Systems}}, journal = {IEEE Transactions on Computers}, year = {1989}, month = {March}, volume = {38}, number = {3}, pages = {408--23}, keyword = {scheduling, parallel}, abstract = {The tradeoff between speedup and efficiency that is inherent to a software system is investigated. The extent to which this tradeoff is determined by the average parallelism of the software system, as contrasted with other, more detailed, characterizations, is shown. The extent to which both speedup and efficiency can simultaneously be poor is bound: it is shown that for any software system and any number of processors, the sum of the average processor utilization (i.e. efficiency) and the attained fraction of the maximum possible speedup must exceed one. Bounds are given on speedup and efficiency, and on the incremental benefit and cost of allocating additional processors. An explicit formulation, as well as bounds, are given for the location of the knee of the execution time-efficiency profile, where the benefit per unit cost is maximized.} } @InProceedings{efe92, author = {Efe, Kemal and Schaar, Margaret A.}, title = {{Performance of Co-Scheduling on a Network of Workstations}}, booktitle = {Proceedings of the 13th International Conference on Distributed Computing Systems}, year = {1993}, pages = {525-531}, keyword = {network of workstations, cluster, parallel, scheduling}, abstract = {In a set of high performance workstations connected by a network, many workstations may be underutilized by their owners. While each workstation may be primarily responsible for executing its owner's tasks with the highest priority, the unused processing capacity may be made available to computationally intensive tasks submitted externally to the system. Static co-scheduling for such an environment has been considered previously (M.J. Atallah et al., 1991), where the goal was to maximize the speedup by partitioning the task among many workstations. The authors consider the problem from the system point of view, and develop a queuing model and efficient algorithms to minimize the mean response time. The results obtained show that significant improvements in the mean response time can be obtained through co-scheduling over that of the M/M/m system where each task would be assigned to a single workstation as a whole.} } @InProceedings{epema95, author = {Epema, D.H.J.}, title = {{An Analysis of Decay-Usage Scheduling in Multiprocessors}}, booktitle = {Proceedings of Sigmetrics '95/Performance '95}, year = {1995}, month = {May}, pages = {74-85}, organization = {ACM}, address = {Ottawa, Canada}, keyword = {scheduling, parallel, time share}, abstract = {Priority-ageing or decay-usage scheduling is a time-sharing scheduling policy capable of dealing with a workload of both interactive and batch jobs by decreasing the priority of a job when it acquires CPU time, and by increasing its priority when it does not use the CPU. In this paper we deal with a decay-usage scheduling policy in multiprocessor systems modeled after widely used systems. The priority of a job consists of a base priority and a time-dependent part based on processor usage. Because the priorities in our model are time dependent, a queueing-theoretic analysis, for instance for the mean response time, seems impossible. Still, it turns out that as a consequence of the scheduling policy, the shares of available CPU time obtained by jobs converge, and a deterministic analysis for these shares is feasible: for a fixed set of jobs with very large (infinite) processing demands, we derive the relation between their base priorities and their steady-state shares. In addition, we analyze the relation between the values of the parameters of the scheduler and the level of control it can exercise over the steady-state shares. We validate the model by simulations and by measurements of actual systems.} } @InProceedings{evans93, author = {Evans, Steve and Clarke, Kevin and Singleton, Dave and Smaalders, Bart}, title = {{Optimizing Unix Resource Scheduling for User Interaction}}, booktitle = {1993 Summer Usenix}, year = {1993}, month = {June}, pages = {205-218}, organization = {USENIX}, keyword = {scheduling, interactive}, abstract = {Techniques for improving system responsiveness for interactive end users of Unix workstations are explored. After a discussion of the current state of resource scheduling, a model is presented in which dynamic input from the human user is combined with data from user interaction software to supply a centralized manager with the information necessary to determine what processes are involved with interacting with the user at any given moment. This service then communicates this process set information to the kernel, which uses it to manage memory and CPU resource allocation on the behalf of the user. Experience with a prototype of this environment is reported. An argument for an interactive scheduling class is made, along with other infrastructure changes needed to take advantage of it.} } @Article{fayolle80, author = {Fayolle, G. and Mitrani, I. and Iasnogorodski, R.}, title = {{Sharing a Processor Among Many Job Classes}}, journal = {Journal of the Association for Computing Machinery}, year = {1980}, month = {July}, volume = {27}, number = {3}, pages = {519-532}, keyword = {scheduling}, abstract = {A single-server processor-sharing system with M job classes is analyzed in the steady state. The scheduling strategy considered divides the total processor capacity in unequal fractions among the different job classes. More precisely, if there are N/sub j/ jobs of class j in the system, j=1,2,..., M, each class k job receives a fraction g/sub k//( Sigma /sub j=1//sup M/g/sub j/N/sub j/) of the processor capacity. Earlier analyses of this system are shown to be incorrect and new expressions for the conditional expected response times W/sub k/(t) of class k jobs with required service time t are obtained (for general required service time distributions). These yield the asymptotic behavior of W/sub k/(t) as t to infinity and rather simple formulas in the exponential case. The unconditional average response times are also obtained.} } @Article{feitelson90, author = {Feitelson,Dror~G. and Rudolph, Larry}, title = {{Distributed Hierarchical Control for Parallel Processing}}, journal = {IEEE Computer}, year = {1990}, month = {May}, volume = {23}, number = {5}, pages = {65-77}, keyword = {scheduling, parallel, coscheduling}, abstract = {A description is given of a novel design, using a hierarchy of controllers, that effectively controls a multiuser, multiprogrammed parallel system. Such a structure allows dynamic repartitioning according to changing job requirements. The design goals are examined, and the principles of distributed hierarchical control are presented. Control over processors is discussed. Mapping and load balancing with distributed hierarchical control are considered. Support for gang scheduling as well as availability and fault tolerance is addressed. The use of distributed hierarchical control in memory management and I/O is discussed.} } @Article{feitelson92, author = {Feitelson, Dror G. and Rudolph, Larry}, title = {{Gang Scheduling Performance Benefits for Fine-Grained Synchronization}}, journal = {Journal of Parallel and Distributed Computing}, year = {1992}, month = {December}, volume = {16}, number = {4}, pages = {306-18}, keyword = {scheduling, parallel, coscheduling}, abstract = {Multiprogrammed multiprocessors executing fine-grain parallel programs appear to require new scheduling policies. A promising new idea is gang scheduling, where a set of threads are scheduled to execute simultaneously on a set of processors. This has the intuitive appeal of supplying the threads with an environment that is very similar to a dedicated machine. It allows the threads to interact efficiently by using busy waiting, without the risk of waiting for a thread that currently is not running. Without gang scheduling, threads have to block in order to synchronize, thus suffering the overhead of a context switch. While this is tolerable in coarse-grain computations, and might even led to performance benefits if the threads are highly unbalanced, it causes severe performance degradation in the fine-grain case. The authors have developed a model to evaluate the performance of different combinations of synchronization mechanisms and scheduling policies, and validated it by an implementation on the Makbilan multiprocessor. The model leads to the conclusion that gang scheduling is required for efficient fine-grain synchronization on multiprogrammed multiprocessors.} } @Article{feitelson95:runtime, author = {Feitelson,Dror~G. and Rudolph, Larry}, title = {{Coscheduling Based on Run-Time Identification of Activity Working Sets}}, journal = {International Journal of Parallel Programming}, year = {1995}, month = {April}, volume = {23}, number = {2}, pages = {136-160}, keyword = {scheduling, parallel, coscheduling}, abstract = {This paper introduces a method for runtime identification of sets of interacting activities ("working sets") with the purpose of coscheduling them, i.e., scheduling them so that all the activities in the set execute simultaneously on distinct processors. The identification is done by monitoring access rates to shared communication objects: activities that access the same objects at a high rate thereby interact frequently, and therefore would benefit from coscheduling. Simulation results show that coscheduling with our runtime identification scheme can give better performance than uncoordinated scheduling based on a single global activity queue. The finer-grained the interactions among the activities in a working set, the better the performance differential. Moreover, coscheduling based on automatic runtime identification achieves about the same performance as coscheduling based on manual identification of working sets by the programmer.} } @TechReport{feitelson95:survey, author = {Feitelson, Dror G.}, title = {{A Survey of Scheduling in Multiprogrammed Parallel Systems}}, year = {1995}, month = {February}, type = {Research Report RC 19790 (87657)}, institution = {IBM T.J. Watson Research Center}, address = {Yorktown Heights, NY}, keyword = {scheduling, parallel, survey} } @InProceedings{ferguson88, author = {Ferguson, Donald and Yemini, Yechiam and Nikolaou, Christos}, title = {{Microeconomic Algorithms for Load Balancing in Distributed Computer Systems}}, booktitle = {International Conference on Distributed Computer Systems}, year = {1988}, pages = {491-499}, keyword = {scheduling, load balancing, distributed, microeconomic}, abstract = {A novel approach to allocating and sharing communication and computational resources in a distributed system is described. The approach, which is based on concepts drawn from microeconomics, uses algorithms that are competitive rather than cooperative. The effectiveness of these concepts is demonstrated by describing an economy that improves the performance of a distributed system by implementing load balancing. In this economy, competition sets prices for the resources in the system. Jobs complete for the resources by issuing bids, and the resource allocation decisions are made through auctions held by the processors. The benefits of the method include limited complexity and algorithms that are intrinsically decentralized and modular. Simulation studies show that these economies achieve substantial performance benefits.} } @TechReport{fong95, author = {Fong, Liana L. and Squillante, Mark S.}, title = {{Time-Function Scheduling: A General Apporach to Controllable Resource Management}}, year = {1995}, month = {August}, number = {RC 20155 (89194)}, institution = {IBM Research Division, T.J. Watson Research Center}, address = {Yorktown Heights, NY 10598}, keyword = {scheduling, proportional-share} } @InProceedings{ford96, author = {Ford, Bryan and Susarla, Sai}, title = {{CPU Inheritance Scheduling}}, booktitle = {Usenix Association Second Symposium on Operating Systems Design and Implementation (OSDI)}, year = {1996}, pages = {91-105}, keyword = {scheduling, proportional-share}, abstract = {Traditional processor scheduling mechanisms in operating systems (OSs) are fairly rigid, often supporting only one fixed scheduling policy, or, at most, a few "scheduling classes" whose implementations are closely tied together in the OS kernel. This paper presents CPU inheritance scheduling, a novel processor scheduling framework in which arbitrary threads can act as schedulers for other threads. Widely different scheduling policies can be implemented under the framework, and many different policies can coexist in a single system, providing much greater scheduling flexibility. Modular, hierarchical control can be provided over the processor utilization of arbitrary administrative domains, such as processes, jobs, users and groups, and the CPU resources consumed can be accounted for and attributed accurately. Applications, as well as the OS, can implement customized local scheduling policies; the framework ensures that all the different policies work together logically and predictably. As a side effect, the framework also cleanly addresses priority inversion by providing a generalized form of priority inheritance that automatically works within and among diverse scheduling policies. CPU inheritance scheduling extends naturally to multiprocessors, and supports processor management techniques such as processor affinity and scheduler activations. We show that this flexibility can be provided with acceptable overhead in typical environments, depending on factors such as context switch speed and frequency.} } @Article{ghosal91, author = {Ghosal, Dipak and Serazzi, Giuseppe and Tripathi, Satish K.}, title = {{The Processor Working Set and Its Use in Scheduling Multiprocessor Systems}}, journal = {IEEE Transactions on Software Engineering}, year = {1991}, month = {May}, volume = {17}, number = {5}, pages = {443-453}, keyword = {scheduling, parallel}, abstract = {The concept of a processor working set (PWS) as a single value parameter for characterizing the parallel program behavior is introduced. Through detailed experimental studies of different algorithms on a transputer-based multiprocessor machine, it is shown that the PWS is a robust measure for characterizing the workload of a multiprocessor system. It is shown that processor allocation strategies based on the PWS provide significantly better throughput-delay characteristics. The robustness of PWS is further demonstrated by showing that allocation policies that allocate processors more than the PWS are inferior in performance to those that never allocate more than the PWS-even at a moderately low load. Based on the results, a simple static allocation policy that allocates the PWS at low load and adaptively fragments at high load to one processor per job is proposed.} } @InProceedings{goyal96, author = {Goyal, Pawan and Guo, Xingang and Vin, Harrick M.}, title = {{A Hierarchical CPU Scheduler for Multimedia Operating Systems}}, booktitle = {Usenix Association Second Symposium on Operating Systems Design and Implementation (OSDI)}, year = {1996}, pages = {107-121}, keyword = {scheduling, proportional-share}, abstract = {The need to support a variety of hard and soft real-time systems, as well as best-effort applications in a multimedia computing environment requires an operating system framework that: (1) enables different schedulers to be employed for different application classes, and (2) provides protection between the various classes of applications. We argue that these objectives can be achieved by hierarchical partitioning of CPU bandwidth, in which an operating system partitions the CPU bandwidth among various application classes, and each application class, in turn, partitions its allocation (potentially using a different scheduling algorithm) among its sub-classes or applications. We present the start-time fair queuing (SFQ) algorithm, which enables such hierarchical partitioning. We have implemented a hierarchical scheduler in Solaris 2.4. We describe our implementation and demonstrate its suitability for multimedia operating systems.} } @InProceedings{gupta91, author = {Gupta, Anoop and Tucker, Andrew and Urushibara, Shigeru}, title = {{The Impact of Operating System Scheduling Policies and Synchronization Methods on the Performance of Parallel Applications}}, booktitle = {Proceedings of the 1991 ACM SIGMETRICS Conference}, year = {1991}, month = {May}, pages = {120-32}, keyword = {parallel, scheduling, synchronization}, abstract = {Shared-memory multiprocessors are frequently used as compute servers with multiple parallel applications executing at the same time. In such environments, the efficiency of a parallel application can be significantly affected by the operating system scheduling policy. The authors use detailed simulation studies to evaluate the performance of several different scheduling strategies. They also explore tradeoffs between the use of busy-waiting and blocking synchronization primitives and their interactions with the scheduling strategies. A key focus is on the impact of the scheduling strategies on the caching behavior of the applications.} } @Article{hellerstein93, author = {Hellerstein, Joseph L.}, title = {{Achieving Service Rate Objectives with Decay Usage Scheduling}}, journal = {IEEE Transactions on Software Engineering}, year = {1993}, month = {August}, volume = {19}, number = {8}, pages = {813-825}, keyword = {scheduling, time share}, abstract = {Decay usage scheduling is a priority- and usage-based approach to CPU allocation in which preference is given to processes that have consumed little CPU in the recent past. The author develops an analytic model for decay usage schedulers running compute-bound workloads, such as those found in many engineering and scientific environments; the model is validated from measurements of a Unix system. This model is used in two ways. First, ways to parameterize decay usage schedulers are studied to achieve a wide range of service rates. Doing so requires a fine granularity of control and a large range of control. The results show that, for a fixed representation of process priorities a larger range of control makes the granularity of control coarser, and a finer granularity of control decreases the range of control. A second use of the analytic model is to construct a low overhead algorithms for achieving service rate objectives. Existing approaches require adding a feedback loop to the scheduler. This overhead is avoided by exploiting the feedback already present in decay usage schedulers. Using both empirical and analytical techniques, it is shown that the algorithm is effective and that it provides fairness when the system is over- or under-loaded.} } @Article{henry84, author = {Henry, G. J.}, title = {{The Fair Share Scheduler}}, journal = {AT\&T Bell Laboratories Technical Journal}, year = {1984}, month = {October}, volume = {63}, number = {8}, pages = {1845-1857}, keyword = {scheduling, fair}, abstract = {The Fair Share Scheduler (FSS) is a process scheduling scheme within the UNIX operating system that controls the distribution of resources to sets of related processes. This control offers features that are useful to many applications, including user control of service level, execution predictability, fair resource allocation, predictable and fair billing, and load insulation between user communities. The author discusses the concepts of a fair share scheduler, the motivation for and history behind FSS, some practical FSS applications, the user and administrator interfaces to FSS, and the design philosophy of FSS.} } @InProceedings{karlin91, author = {Karlin, A.R. and Li, K. and Manasse, M.S. and Owicki, S.}, title = {Empirical Studies of Competitive Spinning for a Shared-Memory Multiprocessor}, booktitle = {Thirteenth ACM Symposium on Operating Systems Principles}, year = {1991}, month = {October}, keyword = {parallel, shared-memory, synchronization, scheduling}, abstract = {A common operation in multiprocessor programs is acquiring a lock to protect access to shared data. Typically, the requesting thread is blocked if the lock it needs is held by another thread. Alternatively, the thread could spin until the lock is free, or spin for a while and then block. The authors study seven strategies for determining whether and how long to spin before blocking. Of particular interest are competitive strategies, for which the performance can be shown to be no worse than some constant factor times an optimal off-line strategy. The performance of five competitive strategies is compared with that of always blocking, always spinning, or using the optimal off-line algorithm. Measurements of lock-waiting time distributions for five parallel programs were used to compare the cost of synchronization under all the strategies. Additional measurements of elapsed time for some of the programs and strategies allowed assessment of the impact of synchronization strategy on overall program performance. Both types of measurements indicate that the standard blocking strategy performs poorly compared to mixed strategies. Among the mixed strategies studied, adaptive algorithms perform better than non-adaptive ones.} } @Article{kay88, author = {Kay, J. and Lauder, P.}, title = {{A Fair Share Scheduler}}, journal = {Communications of the ACM}, year = {1988}, month = {January}, volume = {31}, number = {1}, pages = {44-55}, keyword = {scheduling, fair}, abstract = {Central-processing-unit schedulers have traditionally allocated resources fairly among processes. By contrast, a fair Share scheduler allocates resources so that users get their fair machine share over a long period. To date, Share has been used exclusively to allocate CPU time, though it takes into account the consumption of all resources. Share may be applicable to the scheduling of resources other than CPU, but for simplicity, the article concentrates on CPU scheduling.} } @Article{kleinrock67, author = {Kleinrock, Leonard}, title = {{Time-shared Systems: A Theoretical Treatment}}, journal = {Journal of the Association for Computing Machinery}, year = {1967}, month = {April}, volume = {14}, number = {2}, pages = {242-261}, keyword = {time share, scheduling} } @InProceedings{konto93, author = {Kontothanassis, Leonidas I. and Wisniewski, Robert W.}, title = {{Using Scheduler Information to Achieve Optimal Barrier Synchronization Performance}}, booktitle = {Proceedings of the ACM SIGPLAN Symposium on Principles and Practice of Parallel Programming}, year = {1993}, month = {May}, keyword = {sychronization, scheduling, parallel}, abstract = {Parallel programs frequently use barriers to synchronize successive steps in an algorithm. In the presence of multiprogramming, the choice of spinning versus blocking barriers can have a significant impact on performance. The authors demonstrate how competitive spinning techniques previously designed for locks can be extended to barriers, and evaluate their performance. They design an additional competitive spinning technique that adapts more quickly in a dynamic environment. They then propose and evaluate a new method that obtains by using scheduler information to decide between spinning and blocking. The schedular information technique makes optimal choices incurring little overhead.} } @InProceedings{lee97, author = {Lee, Walter and Frank, Matthew and Lee, Victor and Mackenzi, Kenneth and Rudolph, Larry}, title = {{Implications of I/O for Gang Scheduled Workloads}}, booktitle = {Proceedings of the IPPS '97 Workshop on Job Scheduling Strategies for Parallel Processing}, year = {1997}, keyword = {scheduling, I/O, coscheduling, parallel} } @InProceedings{leutenegger90, author = {Leutenegger, Scott T. and Vernon, Mary K.}, title = {{The Performance of Multiprogrammed Multiprocessor Scheduling Policies}}, booktitle = {Proceedings of the 1990 ACM SIGMETRICS Conference}, year = {1990}, month = {May}, pages = {226-36}, keyword = {parallel, scheduling}, abstract = {Scheduling policies for general purpose multiprogrammed multiprocessors are not well understood. This paper examines various policies to determine which properties of a scheduling policy are the most significant determinants of performance. The authors compare a more comprehensive set of policies than previous work, including one important scheduling policy that has not previously been examined. They also compare the policies under workloads that they feel are more realistic than previous studies have used. Using these new workloads, they arrive at different conclusions than reported in earlier work. In particular, they find that the 'smallest number of processes first' (SNPF) scheduling discipline performs poorly, even when the number or processes in a job is positively correlated with the total service demand of the job. The authors also find that policies that allocate an equal fraction of the processing power to each job in the system perform better, on the whole, than policies that allocate processing power unequally. Finally, they find that for lock access synchronization, dividing processing power equally among all jobs in the system is a more effective property of a scheduling policy than the property of minimizing synchronization spin-waiting, unless demand for synchronization is extremely high.} } @InProceedings{leutenegger93, author = {Scott T. Leutenegger and Xian-He Sun}, title = {{Distributed Computing Feasibility in a Non-Dedicated Homogenous Distributed System}}, booktitle = {Proceedings of Supercomputing '93}, year = {1993}, pages = {143--152}, keyword = {networks of workstations, clusters, parallel, distributed, scheduling}, abstract = {The authors address the feasibility of a nondedicated parallel processing environment, assuming workstation processes have preemptive priority over parallel tasks. They develop an analytical model to predict parallel job response times. The model provides insight into how significantly the workstation owner interference degrades parallel program performance. A new term, task ratio, which relates the parallel task demand to the mean service demand of nonparallel workstation processes, is introduced. It is proposed that the task ratio is a useful metric for determining how large the demand of a parallel applications must be in order to make efficient use of a nondedicated distributed system.} } @InProceedings{litzkow88, author = {Litzkow, Michael and Livny, Miron and Mutka, Matt}, title = {{Condor - A Hunter of Idle Workstations}}, booktitle = {Proceedings of the 8th International Conference of Distributed Computing Systems}, year = {1988}, month = {June}, pages = {104-111}, keyword = {scheduling, networks of workstations, migration, idle, clusters}, abstract = {The design, implementation, and performance of the Condor scheduling system, which operates in a workstation environment, are presented. The system aims to maximize the utilization of workstations with as little interference as possible between the jobs it schedules and the activities of the people who own workstations. It identifies idle workstations and schedules background jobs on them. When the owner of a workstation resumes activity at a station, Condor checkpoints the remote job running on the station and transfers it to another workstation. The system guarantees that the job will eventually complete, and that very little, if any, work will be performed more than once. A performance profile of the system is presented that is based on data accumulated from 23 stations during one month.} } @InProceedings{lo87, author = {Lo, Shau-Ping and Gligor, Virgil D.}, title = {{A Comparative Analysis of Multiprocessor Scheduling Algorithms}}, booktitle = {Proceedings of the Seventh International Conference on Distributed Computing Systems}, year = {1987}, month = {September}, pages = {356-363}, keyword = {scheduling, parallel}, abstract = {Critical path and group algorithms for scheduling multiprocess computations on multiprocessor systems are defined. The dependencies of these algorithms on the degree and pattern of message interaction are reviewed, and the sensitivity of these dependencies on the length of the pause-wait time and the swap-in/swap-out time is investigated using simulation experiments. The authors explain the notions of the threshold number of processors for multiprocess computations. An analytical procedure is developed to compute a lower bound of the differences in scheduling overhead for the simulated algorithms when the number of processors is large. Thus procedure is used to validate some of the results of the simulation experiments.} } @InBook{malone88, author = {Malone, T. and Fikes, R. and Grant, K. and Howard, M}, title = {{Enterprise: A market-like task scheduler for distributed computing environments}}, year = {1988}, pages = {177-205}, publisher = {North-Holland}, keyword = {scheduling, microeconomic, distributed} } @InProceedings{mccann93, author = {McCann, C. and Vaswani, R. and Zahorjan, John}, title = {{A Dynamic Processor Allocation Policy for Multi-programmed Shared-Memory Multiprocessors}}, booktitle = {ACM Transactions on Computer Systems}, year = {1993}, month = {May}, volume = {11}, pages = {146-178}, keyword = {scheduling, parallel}, abstract = {The authors propose and evaluate empirically the performance of a dynamic processor-scheduling policy for multiprogrammed shared-memory multiprocessors. The policy is dynamic in that it reallocates processors from one parallel job to another based on the currently realized parallelism of those jobs. The policy is suitable for implementation in production systems in that: it interacts well with very efficient user-level thread packages, leaving to them many low-level thread operations that do not require kernel intervention; it deals with thread blocking due to user I/O and page faults it ensures fairness in delivering resources to jobs; its performance, measured in terms of average job response time, is superior to that of previously proposed schedules; it provides good performance to very short, sequential (e.g., interactive) requests.} } @InProceedings{mccann94, author = {Cathy McCann and John Zahorjan}, title = {{Processor Allocation Policies for Message-Passing Parallel Computers}}, booktitle = {Proceedings of the 1994 ACM SIGMETRICS Conference}, year = {1994}, month = {February}, pages = {19--32}, keyword = {scheduling, parallel}, abstract = {When multiple jobs compete for processing resources on a parallel computer, the operating system kernel's processor allocation policy determines how many and which processors to allocate to each. In this paper we investigate the issues involved in constructing a processor allocation policy for large scale, message-passing parallel computers supporting a scientific workload.} } @InProceedings{mccann95, author = {McCann, Cathy and Zahorjan, John}, title = {{Scheduling Memory Constrained Jobs on Distributed Memory Parallel Computers}}, booktitle = {Proceedings of ACM SIGMETRICS'95/PERFORMANCE'95 Joint International Conference on Measurement and Modeling of Computer Systems}, year = {1995}, month = {May}, pages = {208-219}, keyword = {scheduling, parallel, memory}, abstract = {We consider the problem of multiprocessor scheduling of jobs whose memory requirements place lower bounds on the fraction of the machine required in order to execute. We address three primary questions in this work: 1. How can a parallel machine be multiprogrammed with minimal overhead when jobs have minimum memory requirements? 2. To what extent does the inability of an application to repartition its workload during runtime affect the choice of processor allocation policy? 3. How rigid should the system be in attempting to provide equal resource allocation to each runnable job in order to minimise average response time?. This work is applicable both to parallel machines and to networks of workstations supporting parallel applications.} } @InBook{miller88, author = {Miller, Mark and Drexler, K.}, title = {{Markets and computation: Agoric open systems}}, year = {1988}, pages = {133-176}, publisher = {North-Holland}, keyword = {scheduling, microeconomic} } @InProceedings{mitzen96, author = {Mitzenmacher, Michael}, title = {{Load balancing and density dependent jump Markov processes}}, booktitle = {Proceedings of 37th Conference on Foundations of Computer Science}, year = {1996}, month = {October}, pages = {213-222}, keyword = {scheduling, load balance, random}, abstract = {We provide a new approach for analyzing both static and dynamic randomized load balancing strategies. We demonstrate the approach by providing the first analysis of the following model: customers arrive as a Poisson stream of rate lambda /sub n/, lambda <1, at a collection of n servers. Each customer chooses some constant d servers independently and uniformly at random from the n servers, and waits for service at the one with the fewest customers. Customers are served according to the first-in first-out (FIFO) protocol, and the service time for a customer is exponentially distributed with mean 1. We call this problem the supermarket model. We wish to know how the system behaves, and in particular we are interested in the expected time a customer spends in the system in equilibrium. The model provides a good abstraction of a simple, efficient load balancing scheme in the setting where jobs arrive at a large system of parallel processors. This model appears more realistic than similar models studied previously, in that it is both dynamic and open: that is, customers arrive over time, and the number of customers is not fixed.} } @InProceedings{mutka87, author = {Mutka, Matt and Livny, Miron}, title = {{Scheduling Remote Processing Capacity In A Workstation-Processor Bank Network}}, booktitle = {Proceedings of the 7th International Conference on Distributed Computing Systems}, year = {1987}, month = {September}, pages = {2-9}, keyword = {idle, clusters, network of workstations, scheduling, fair}, abstract = {The problem of long-term scheduling of a group of workstations and a processor bank is considered. It is assumed that each workstation is under the full control of its user, whereas the processors that constitute the processor bank are public resources. Therefore, a workstation can be allocated for remote processing only if its user does not perform any local activity. A long-term scheduling algorithm called the up-down algorithm and a set of performance criteria for evaluating these types of scheduling algorithms are presented. Using these criteria and traces of usage patterns of 13 workstations, the algorithm is evaluated, and its efficiency and fairness are demonstrated. The performances of the round-robin and the random algorithms are analyzed using the same criteria and workload, and is shown that the up-down algorithm outperforms the other two. All of the three algorithm provide the same throughput, but the up-down algorithm protects the rights of light users when a few heavy users try to monopolize all free resources. The two other algorithms do not maintain a steady quality of service for light users in the face of an increasing load of heavy users.} } @Article{mutka91, author = {Mutka, Matt M. and Livny, Miron}, title = {{The Available Capacity of a Privately Owned Workstation Environment}}, journal = {Performance Evaluation}, year = {1991}, month = {July}, volume = {12}, number = {4}, pages = {269-84}, keyword = {idle, clusters, network of workstations, scheduling}, abstract = {Powerful workstations interconnected by networks have become widely available as sources of computing cycles. Each workstation is typically owned by a single user in order to provide a high quality of service for the owner. In most cases, an owner does not have computing demands as large as the capacity of the workstation. Therefore, most of the workstations are underutilized. Nevertheless, some users have demands that exceed the capacities of their workstations. In order to effectively share the capacity of workstations, there must be algorithms that allocate available capacity and long periods when owners do not use their stations. To understand the profile of station availability, the authors analyzed the usage patterns of a cluster of workstations. A stochastic model was developed which was based on an analysis of the relative frequency distribution and the correlation of available and non-available interval lengths. This stochastic model is important as a workload generator for the performance evaluation of capacity sharing strategies of a cluster of workstations.} } @InProceedings{naik93, author = {Vijay K. Naik and Sanjeev K. Setia and Mark S. Squillante}, title = {{Performance Analysis of Job Scheduling Policies in Parallel Supercomputing Environments}}, booktitle = {Proceedings of Supercomputing '93}, year = {1993}, month = {November}, pages = {824--833}, keyword = {parallel, scheduling}, abstract = {The authors analyze three general classes of scheduling policies under a workload typical of large-scale scientific computing. These policies differ in the manner in which processors are partitioned among the jobs as well as the way in which jobs are prioritized for execution on the partitions. The results indicate that existing static schemes to not perform well under varying workloads. Adaptive policies tend to make better scheduling decisions, but their ability to adjust to workload changes is limited. Dynamic partitioning policies, on the other hand, yield the best performance and can be tuned to provide desired performance differences among jobs with varying resource demands.} } @Conference{ousterhout82, author = {Ousterhout, John K.}, title = {{Scheduling Techniques for Concurrent Systems}}, booktitle = {Third International Conference on Distributed Computing Systems}, year = {1982}, month = {May}, pages = {22-30}, keyword = {scheduling, coscheduling, parallel}, abstract = {Current operating systems base many of their decisions on the assumption that processes are independent. This paper examines what happens in multiprocessor systems that use interprocess communication extensively. Traditional techniques for short-term scheduling result in serious limitations on interprocessor communication; a two-phase blocking scheme is suggested as a solution to the problem. Long-term scheduling policies that assume process independence result in a form of thrashing when there are groups of cooperating processes. The notion of coscheduling is introduced, and three algorithms are described for achieving it. Simulation results suggest that substantial degrees of coscheduling can be achieved over a variety of conditions using relatively simple methods.} } @InProceedings{peris94, author = {Vinod G.J. Peris and Mark S. Squillante and Vijay K. Naik}, title = {{Analysis of the Impact of Memory in Distributed Parallel Processing Systems}}, booktitle = {Proceedings of the 1994 ACM SIGMETRICS Conference}, year = {1994}, month = {February}, pages = {5--18}, keyword = {scheduling, parallel}, abstract = {We consider an important tradeoff between processor and memory allocation in distributed parallel processing systems. To study this tradeoff, we formulate stochastic models of parallel program behavior, distributed parallel processing environments and memory overheads incurred by parallel programs as a function of their processor allocation. A mathematical analysis of the models is developed, which includes the effects of contention for shared resources caused by paging activity. We conduct a detailed analysis of real large-scale scientific applications and use these results to parameterize our models. Our results show that memory overhead resulting from processor allocation decisions can have a significant effect on system performance in distributed parallel environments, strongly suggesting that memory considerations must be incorporated in the resource allocation policies for parallel systems. We also demonstrate the importance of the inter-locality miss ratio, which is introduced in this paper and analyzed for the first time.} } @InProceedings{sevcik89, author = {Kenneth C. Sevcik}, title = {{Characterizations of Parallelism in Applications and their Use in Scheduling}}, booktitle = {Proceedings of the 1989 ACM SIGMETRICS and PERFORMANCE Conference on Measurement and Modeling of Computer Systems}, year = {1989}, month = {May}, pages = {171--180}, keyword = {scheduling, parallel}, abstract = {Examines the quality of processor allocation decisions under multiprogramming that can be made with several different high-level characterizations of application parallelism. The author demonstrates that decisions based on parallelism characterizations with two to four parameters are superior to those based on single-parameter characterizations (such as fraction sequential or average parallelism). The results are based predominantly on simulation, with some guidance from a simple analytic model.} } @Article{shoch82, author = {John F. Shoch and Jon A. Hupp}, title = {{The Worm Programs: Early Experience with a Distributed Computation}}, journal = {Communications of the ACM}, year = {1982}, month = {March}, volume = {25}, number = {3}, keyword = {scheduling, distributed, economic} } @Article{smith80, author = {Smith, Reid}, title = {{The Contract Net Protocol: High-Level Communication and Control in a Distributed Problem Solver}}, journal = {IEEE Transactions on Computers}, year = {1980}, month = {December}, volume = {C-29}, number = {12}, pages = {1104-1113}, keyword = {distributed, economic, scheduling}, abstract = {The contract net protocol has been developed to specify problem-solving communication and control for nodes in a distributed problem solver. Task distribution is affected by a negotiation process, a discussion carried on between nodes with tasks to be executed and nodes that may be able to execute those tasks. The author presents the specification of the protocol and demonstrates its use in the solution of a problem in distributed sensing. The utility of negotiation as an interaction mechanism is discussed. It can be used to achieve different goals, such as distributing control and data to avoid bottlenecks and enabling a finer degree of control in making resource allocation and focus decisions than is possible with traditional mechanisms.} } @InProceedings{sobalvarro95, author = {Sobalvarro, Patrick G. and Weihl, William E.}, title = {{Demand-based Coscheduling of Parallel Jobs on Multiprogrammed Multiprocessors}}, booktitle = {Proceedings of the IPPS '95 Workshop on Job Scheduling Strategies for Parallel Processing}, year = {1995}, month = {April}, pages = {63-75}, keyword = {parallel, scheduling, coscheduling}, abstract = {We present demand-based coscheduling, a new approach to scheduling parallel computations on multiprogrammed multiprocessors. In demand-based coscheduling, rather than making the pessimistic assumption that all the processes constituting a parallel job must be simultaneously scheduled in order to achieve good performance, we use information about which processes are communicating in order to coschedule only these; the result is more opportunities for coscheduling and fewer preemptions than in more traditional coscheduling schemes. We introduce two particular types of demand-based coscheduling. The first is dynamic coscheduling, which was conceived for use on message-passing architectures. We present an analytical model and a simulation of dynamic coscheduling that show that the algorithm can achieve good performance under pessimistic assumptions. The second is predictive coscheduling, for which we present an algorithm that detects communication by using virtual memory system information on a bus-based shared-memory multiprocessor.} } @Unpublished{sobalvarro97, author = {Sobalvarro, Patrick G. and Pakin, Scott and Weihl, William E. and Chien, Andrew A}, title = {{Dynamic Coscheduling on Workstation Clusters}}, year = {1997}, howpublished = {http://www.research.digital.com/SRC/scheduling}, URL = {http://www.research.digital.com/SRC/scheduling}, keyword = {parallel, scheduling, coscheduling} } @InProceedings{stoica95, author = {Stoica, Ion and Abdel-Wahab, Hussein and Pothen, Alex}, title = {{A Microeconomic Scheduler for Parallel Computers}}, booktitle = {Proceedings of the IPPS '95 Workshop on Job Scheduling Strategies for Parallel Processing}, year = {1995}, month = {April}, pages = {122-135}, keyword = {parallel, scheduling, economic}, abstract = {We describe a scheduler based on the microeconomic paradigm for scheduling on-line a set of parallel jobs in a multiprocessor system. In addition to increasing the system throughput and reducing the response time, we consider fairness in allocating system resources among the users, and provide the user with control over the relative performances of his jobs. Every user has a savings account in which he receives money at a constant rate. To run a job, the user creates an expense account for that job to which he transfers money from his savings account. The job uses the funds in its expense account to obtain the system resources it needs. The share of the system resources allocated to the user is directly related to the rate at which the user receives money; the rate at which the user transfers money into a job expense account controls the job's performance. We prove that starvation is not possible in our model. Simulation results show that our scheduler improves both system and user performances in comparison with two different variable partitioning policies. It is also effective in guaranteeing fairness and providing control over the performance of jobs.} } @InProceedings{stoica96, author = {Stoica, Ion and Abdel-Wahab, Hussein and Jeffay, Kevin and Baruah, Sanjoy and Gehrke, Johannes and Plaxton, C. Greg}, title = {{A Proportional Share Resource Allocation Algorithm for Real-Time, Time-Shared Systems}}, booktitle = {IEEE Real-Time Systems Symposium}, year = {1996}, month = {December}, keyword = {scheduling, proportional-share}, abstract = {We propose and analyze a proportional share resource allocation algorithm for realizing real-time performance in time-shared operating systems. Processes are assigned a weight which determines a share (percentage) of the resource they are to receive. The resource is then allocated in discrete-sized time quanta in such a manner that each process makes progress at a precise, uniform rate. Proportional share allocation algorithms are of interest because: they provide a natural means of seamlessly integrating real and non-real-time processing; they are easy to implement; they provide a simple and effective means of precisely controlling the real-time performance of a process; and they provide a natural means of policing so that processes that use more of a resource than they request have no ill-effect on well-behaved processes. We analyze our algorithm in the context of an idealized system in which a resource is assumed to be granted in arbitrarily small intervals of time and show that our algorithm guarantees that the difference between the service time that a process should receive and the service time it actually receives is optimally bounded by the size of a time quantum. In addition, the algorithm provides support for dynamic operations, such as processes joining or leaving the competition, and for both fractional and non-uniform time quanta. As a proof of concept we have implemented a prototype of a CPU scheduler under FreeBSD. The experimental results shows that our implementation performs within the theoretical bounds and hence supports real-time execution in a general purpose operating system.} } @Article{tucker89, author = {Tucker, Andy and Gupta, Anoop}, title = {Process Control and Scheduling Issues for Multiprogrammed Shared-Memory Multiprocessors}, journal = {Operating Systems Review}, year = {1989}, volume = {23}, number = {5}, pages = {159-66}, keyword = {parallel, scheduling, shared memory}, abstract = {Shared-memory multiprocessors are frequently used in a time-sharing style with multiple parallel applications executing at the same time. In such an environment, where the machine load is continuously varying, the question arises of how an application should maximize its performance while being fair to other users of the system. They first show that if the number of runnable processes belonging to a parallel application significantly exceeds the effective number of physical processors executing it, its performance can be significantly degraded. They can propose a way of controlling the number of runnable processes associated with an application dynamically, to ensure good performance. The optimal number of runnable processes for each application is determined by a centralized server, and applications dynamically suspend or resume processes in order to match that number. A preliminary implementation of the proposed scheme is now running on the Encore Multimax and they show how it helps improve the performance of several applications. In some cases the improvement is more than a factor of two. They also discuss implications of the proposed scheme for multiprocessor schedulers, and how the scheme should interface with parallel programming languages.} } @Article{waldspurger92, author = {Waldspurger, Carl and Hogg, Tad and Huberman, Bernado and Kephart, Jeffrey and Stornetta, Scott}, title = {{Spawn: A Distributed Computational Economy}}, journal = {IEEE Transactions on Software Engineering}, year = {1992}, month = {February}, volume = {18}, number = {2}, pages = {103-117}, keyword = {scheduling, economic, parallel}, abstract = {The authors have designed and implemented an open, market-based computational system called Spawn. The Spawn system utilizes idle computational resources in a distributed network of heterogeneous computer workstations. It supports both coarse-grain concurrent applications and the remote execution of many independent tasks. Using concurrent Monte Carlo simulations as prototypical applications, the authors explore issues of fairness in resource distribution, currency as a form of priority, price equilibria, the dynamics of transients, and scaling to large systems. In addition to serving the practical goal of harnessing idle processor time in a computer network, Spawn has proven to be a valuable experimental workbench for studying computational markets and their dynamics.} } @InProceedings{waldspurger95:lottery, author = {Waldspurger, Carl A. and Weihl, William E.}, title = {{Lottery Scheduling: Flexible Proportional-Share Resource Management}}, booktitle = {First Symposium on Operating Systems Design and Implementation (OSDI)}, year = {1995}, pages = {1-11}, organization = {USENIX Association}, keyword = {scheduling, proportional-share}, abstract = {This paper presents lottery scheduling, a novel randomized resource allocation mechanism. Lottery scheduling provides efficient, responsive control over the relative execution rates of computations. Such control is beyond the capabilities of conventional schedulers, and is desirable in systems that service requests of varying importance, such as databases, media-based applications, and networks. Lottery scheduling also supports modular resource management by enabling concurrent modules to insulate their resource allocation policies from one another. A currency abstraction is introduced to flexibly name, share, and protect resource rights. We also show that lottery scheduling can be generalized to manage many diverse resources, such as I/O bandwidth, memory, and access to locks. We have implemented a prototype lottery scheduler for the Mach 3.0 microkernel, and found that it provides flexible and responsive control over the relative execution rates of a wide range of applications. The overhead imposed by our unoptimized prototype is comparable to that of the standard Mach timesharing policy.} } @PhdThesis{waldspurger95:phdthesis, author = {Waldspurger, Carl A.}, title = {{Lottery and Stride Scheduling: Flexible Proportional-Share Resource Management}}, year = {1995}, month = {September}, school = {Massachusetts Institute of Technology}, note = {Also appears as Technical Report MIT/LCS/TR-667}, keyword = {scheduling, proportional-share} } @TechReport{waldspurger95:stride, author = {Waldspurger, Carl A. and Weihl, William E.}, title = {{Stride Scheduling: Deterministic Proportional-Share Resource Mangement}}, year = {1995}, month = {June}, number = {MIT/LCS/TM-528}, institution = {Massachusetts Institute of Technology}, address = {MIT Laboratory for Computer Science}, keyword = {scheduling, proportional-share} }